mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (70) hide show
  1. mlrun/__main__.py +2 -0
  2. mlrun/common/constants.py +6 -0
  3. mlrun/common/schemas/__init__.py +3 -0
  4. mlrun/common/schemas/api_gateway.py +8 -1
  5. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  6. mlrun/common/schemas/model_monitoring/constants.py +35 -18
  7. mlrun/common/schemas/project.py +1 -0
  8. mlrun/common/types.py +7 -1
  9. mlrun/config.py +34 -10
  10. mlrun/data_types/data_types.py +4 -0
  11. mlrun/datastore/alibaba_oss.py +130 -0
  12. mlrun/datastore/azure_blob.py +4 -5
  13. mlrun/datastore/base.py +22 -16
  14. mlrun/datastore/datastore.py +4 -0
  15. mlrun/datastore/datastore_profile.py +7 -0
  16. mlrun/datastore/google_cloud_storage.py +1 -1
  17. mlrun/datastore/sources.py +2 -3
  18. mlrun/datastore/targets.py +6 -1
  19. mlrun/db/base.py +14 -6
  20. mlrun/db/httpdb.py +61 -56
  21. mlrun/db/nopdb.py +3 -0
  22. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +6 -1
  23. mlrun/frameworks/tf_keras/mlrun_interface.py +20 -8
  24. mlrun/kfpops.py +2 -5
  25. mlrun/model.py +1 -0
  26. mlrun/model_monitoring/__init__.py +1 -1
  27. mlrun/model_monitoring/api.py +104 -295
  28. mlrun/model_monitoring/controller.py +25 -25
  29. mlrun/model_monitoring/db/__init__.py +16 -0
  30. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
  31. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  32. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
  33. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  34. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
  35. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
  36. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
  37. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
  38. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
  39. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  40. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
  41. mlrun/model_monitoring/helpers.py +3 -3
  42. mlrun/model_monitoring/stream_processing.py +41 -9
  43. mlrun/model_monitoring/tracking_policy.py +7 -1
  44. mlrun/model_monitoring/writer.py +4 -36
  45. mlrun/projects/pipelines.py +14 -2
  46. mlrun/projects/project.py +118 -103
  47. mlrun/run.py +5 -1
  48. mlrun/runtimes/base.py +6 -0
  49. mlrun/runtimes/nuclio/api_gateway.py +218 -65
  50. mlrun/runtimes/nuclio/function.py +3 -0
  51. mlrun/runtimes/nuclio/serving.py +28 -32
  52. mlrun/runtimes/pod.py +26 -0
  53. mlrun/serving/routers.py +4 -3
  54. mlrun/serving/server.py +4 -6
  55. mlrun/serving/states.py +34 -14
  56. mlrun/serving/v2_serving.py +4 -3
  57. mlrun/utils/helpers.py +34 -0
  58. mlrun/utils/http.py +1 -1
  59. mlrun/utils/retryer.py +1 -0
  60. mlrun/utils/version/version.json +2 -2
  61. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/METADATA +25 -16
  62. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/RECORD +66 -62
  63. mlrun/model_monitoring/batch.py +0 -933
  64. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  65. mlrun/model_monitoring/stores/models/mysql.py +0 -34
  66. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  67. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/LICENSE +0 -0
  68. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/WHEEL +0 -0
  69. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/entry_points.txt +0 -0
  70. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc8.dist-info}/top_level.txt +0 -0
@@ -12,15 +12,17 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import base64
15
+ import typing
15
16
  from typing import Optional, Union
16
17
  from urllib.parse import urljoin
17
18
 
18
19
  import requests
20
+ from requests.auth import HTTPBasicAuth
19
21
 
20
22
  import mlrun
21
23
  import mlrun.common.schemas
22
24
 
23
- from .function import RemoteRuntime
25
+ from .function import RemoteRuntime, get_fullname
24
26
  from .serving import ServingRuntime
25
27
 
26
28
  NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_BASIC_AUTH = "basicAuth"
@@ -28,6 +30,67 @@ NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_NONE = "none"
28
30
  PROJECT_NAME_LABEL = "nuclio.io/project-name"
29
31
 
30
32
 
33
+ class APIGatewayAuthenticator(typing.Protocol):
34
+ @property
35
+ def authentication_mode(self) -> str:
36
+ return NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_NONE
37
+
38
+ @classmethod
39
+ def from_scheme(cls, api_gateway_spec: mlrun.common.schemas.APIGatewaySpec):
40
+ if (
41
+ api_gateway_spec.authenticationMode
42
+ == NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_BASIC_AUTH
43
+ ):
44
+ if api_gateway_spec.authentication:
45
+ return BasicAuth(
46
+ username=api_gateway_spec.authentication.get("username", ""),
47
+ password=api_gateway_spec.authentication.get("password", ""),
48
+ )
49
+ else:
50
+ return BasicAuth()
51
+ else:
52
+ return NoneAuth()
53
+
54
+ def to_scheme(
55
+ self,
56
+ ) -> Optional[dict[str, Optional[mlrun.common.schemas.APIGatewayBasicAuth]]]:
57
+ return None
58
+
59
+
60
+ class NoneAuth(APIGatewayAuthenticator):
61
+ """
62
+ An API gateway authenticator with no authentication.
63
+ """
64
+
65
+ pass
66
+
67
+
68
+ class BasicAuth(APIGatewayAuthenticator):
69
+ """
70
+ An API gateway authenticator with basic authentication.
71
+
72
+ :param username: (str) The username for basic authentication.
73
+ :param password: (str) The password for basic authentication.
74
+ """
75
+
76
+ def __init__(self, username=None, password=None):
77
+ self._username = username
78
+ self._password = password
79
+
80
+ @property
81
+ def authentication_mode(self) -> str:
82
+ return NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_BASIC_AUTH
83
+
84
+ def to_scheme(
85
+ self,
86
+ ) -> Optional[dict[str, Optional[mlrun.common.schemas.APIGatewayBasicAuth]]]:
87
+ return {
88
+ "basicAuth": mlrun.common.schemas.APIGatewayBasicAuth(
89
+ username=self._username, password=self._password
90
+ )
91
+ }
92
+
93
+
31
94
  class APIGateway:
32
95
  def __init__(
33
96
  self,
@@ -47,22 +110,34 @@ class APIGateway:
47
110
  ],
48
111
  description: str = "",
49
112
  path: str = "/",
50
- authentication_mode: Optional[
51
- str
52
- ] = NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_NONE,
113
+ authentication: Optional[APIGatewayAuthenticator] = NoneAuth(),
53
114
  host: Optional[str] = None,
54
115
  canary: Optional[list[int]] = None,
55
- username: Optional[str] = None,
56
- password: Optional[str] = None,
57
116
  ):
117
+ """
118
+ Initialize the APIGateway instance.
119
+
120
+ :param project: The project name
121
+ :param name: The name of the API gateway
122
+ :param functions: The list of functions associated with the API gateway
123
+ Can be a list of function names (["my-func1", "my-func2"])
124
+ or a list or a single entity of
125
+ :py:class:`~mlrun.runtimes.nuclio.function.RemoteRuntime` OR
126
+ :py:class:`~mlrun.runtimes.nuclio.serving.ServingRuntime`
127
+
128
+ :param description: Optional description of the API gateway
129
+ :param path: Optional path of the API gateway, default value is "/"
130
+ :param authentication: The authentication for the API gateway of type
131
+ :py:class:`~mlrun.runtimes.nuclio.api_gateway.BasicAuth`
132
+ :param host: The host of the API gateway (optional). If not set, it will be automatically generated
133
+ :param canary: The canary percents for the API gateway of type list[int]; for instance: [20,80]
134
+ """
58
135
  self.functions = None
59
136
  self._validate(
60
137
  project=project,
61
138
  functions=functions,
62
139
  name=name,
63
140
  canary=canary,
64
- username=username,
65
- password=password,
66
141
  )
67
142
  self.project = project
68
143
  self.name = name
@@ -70,14 +145,9 @@ class APIGateway:
70
145
 
71
146
  self.path = path
72
147
  self.description = description
73
- self.authentication_mode = (
74
- authentication_mode
75
- if authentication_mode
76
- else self._enrich_authentication_mode(username=username, password=password)
77
- )
78
148
  self.canary = canary
79
- self._username = username
80
- self._password = password
149
+ self.authentication = authentication
150
+ self.state = ""
81
151
 
82
152
  def invoke(
83
153
  self,
@@ -86,47 +156,140 @@ class APIGateway:
86
156
  auth: Optional[tuple[str, str]] = None,
87
157
  **kwargs,
88
158
  ):
159
+ """
160
+ Invoke the API gateway.
161
+
162
+ :param method: (str, optional) The HTTP method for the invocation.
163
+ :param headers: (dict, optional) The HTTP headers for the invocation.
164
+ :param auth: (Optional[tuple[str, str]], optional) The authentication creds for the invocation if required.
165
+ :param kwargs: (dict) Additional keyword arguments.
166
+
167
+ :return: The response from the API gateway invocation.
168
+ """
89
169
  if not self.invoke_url:
90
- raise mlrun.errors.MLRunInvalidArgumentError(
91
- "Invocation url is not set. Set up gateway's `invoke_url` attribute."
170
+ # try to resolve invoke_url before fail
171
+ self.sync()
172
+ if not self.invoke_url:
173
+ raise mlrun.errors.MLRunInvalidArgumentError(
174
+ "Invocation url is not set. Set up gateway's `invoke_url` attribute."
175
+ )
176
+ if not self.is_ready():
177
+ raise mlrun.errors.MLRunPreconditionFailedError(
178
+ f"API gateway is not ready. " f"Current state: {self.state}"
92
179
  )
180
+
93
181
  if (
94
- self.authentication_mode
182
+ self.authentication.authentication_mode
95
183
  == NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_BASIC_AUTH
96
184
  and not auth
97
185
  ):
98
186
  raise mlrun.errors.MLRunInvalidArgumentError(
99
187
  "API Gateway invocation requires authentication. Please pass credentials"
100
188
  )
101
- if auth:
102
- headers["Authorization"] = self._generate_basic_auth(*auth)
103
189
  return requests.request(
104
- method=method, url=self.invoke_url, headers=headers, **kwargs
190
+ method=method,
191
+ url=self.invoke_url,
192
+ headers=headers,
193
+ **kwargs,
194
+ auth=HTTPBasicAuth(*auth) if auth else None,
105
195
  )
106
196
 
197
+ def is_ready(self):
198
+ if self.state is not mlrun.common.schemas.api_gateway.APIGatewayState.ready:
199
+ # try to sync the state
200
+ self.sync()
201
+ return self.state == mlrun.common.schemas.api_gateway.APIGatewayState.ready
202
+
203
+ def sync(self):
204
+ """
205
+ Synchronize the API gateway from the server.
206
+ """
207
+ synced_gateway = mlrun.get_run_db().get_api_gateway(self.name, self.project)
208
+ synced_gateway = self.from_scheme(synced_gateway)
209
+
210
+ self.host = synced_gateway.host
211
+ self.path = synced_gateway.path
212
+ self.authentication = synced_gateway.authentication
213
+ self.functions = synced_gateway.functions
214
+ self.canary = synced_gateway.canary
215
+ self.description = synced_gateway.description
216
+ self.state = synced_gateway.state
217
+
218
+ def with_basic_auth(self, username: str, password: str):
219
+ """
220
+ Set basic authentication for the API gateway.
221
+
222
+ :param username: (str) The username for basic authentication.
223
+ :param password: (str) The password for basic authentication.
224
+ """
225
+ self.authentication = BasicAuth(username=username, password=password)
226
+
227
+ def with_canary(
228
+ self,
229
+ functions: Union[
230
+ list[str],
231
+ list[
232
+ Union[
233
+ RemoteRuntime,
234
+ ServingRuntime,
235
+ ]
236
+ ],
237
+ ],
238
+ canary: list[int],
239
+ ):
240
+ """
241
+ Set canary function for the API gateway
242
+
243
+ :param functions: The list of functions associated with the API gateway
244
+ Can be a list of function names (["my-func1", "my-func2"])
245
+ or a list of nuclio functions of types
246
+ :py:class:`~mlrun.runtimes.nuclio.function.RemoteRuntime` OR
247
+ :py:class:`~mlrun.runtimes.nuclio.serving.ServingRuntime`
248
+ :param canary: The canary percents for the API gateway of type list[int]; for instance: [20,80]
249
+
250
+ """
251
+ if len(functions) != 2:
252
+ raise mlrun.errors.MLRunInvalidArgumentError(
253
+ f"Gateway with canary can be created only with two functions, "
254
+ f"the number of functions passed is {len(functions)}"
255
+ )
256
+ self.functions = self._validate_functions(self.project, functions)
257
+ self.canary = self._validate_canary(canary)
258
+
107
259
  @classmethod
108
260
  def from_scheme(cls, api_gateway: mlrun.common.schemas.APIGateway):
109
261
  project = api_gateway.metadata.labels.get(PROJECT_NAME_LABEL)
110
262
  functions, canary = cls._resolve_canary(api_gateway.spec.upstreams)
111
- return cls(
263
+ state = (
264
+ api_gateway.status.state
265
+ if api_gateway.status
266
+ else mlrun.common.schemas.APIGatewayState.none
267
+ )
268
+ api_gateway = cls(
112
269
  project=project,
113
270
  description=api_gateway.spec.description,
114
271
  name=api_gateway.spec.name,
115
272
  host=api_gateway.spec.host,
116
273
  path=api_gateway.spec.path,
117
- authentication_mode=str(api_gateway.spec.authenticationMode),
274
+ authentication=APIGatewayAuthenticator.from_scheme(api_gateway.spec),
118
275
  functions=functions,
119
276
  canary=canary,
120
277
  )
278
+ api_gateway.state = state
279
+ return api_gateway
121
280
 
122
281
  def to_scheme(self) -> mlrun.common.schemas.APIGateway:
123
282
  upstreams = (
124
283
  [
125
284
  mlrun.common.schemas.APIGatewayUpstream(
126
- nucliofunction={"name": function_name},
127
- percentage=percentage,
128
- )
129
- for function_name, percentage in zip(self.functions, self.canary)
285
+ nucliofunction={"name": self.functions[0]},
286
+ percentage=self.canary[0],
287
+ ),
288
+ mlrun.common.schemas.APIGatewayUpstream(
289
+ # do not set percent for the second function,
290
+ # so we can define which function to display as a primary one in UI
291
+ nucliofunction={"name": self.functions[1]},
292
+ ),
130
293
  ]
131
294
  if self.canary
132
295
  else [
@@ -141,26 +304,28 @@ class APIGateway:
141
304
  spec=mlrun.common.schemas.APIGatewaySpec(
142
305
  name=self.name,
143
306
  description=self.description,
307
+ host=self.host,
144
308
  path=self.path,
145
- authentication_mode=mlrun.common.schemas.APIGatewayAuthenticationMode.from_str(
146
- self.authentication_mode
309
+ authenticationMode=mlrun.common.schemas.APIGatewayAuthenticationMode.from_str(
310
+ self.authentication.authentication_mode
147
311
  ),
148
312
  upstreams=upstreams,
149
313
  ),
150
314
  )
151
- if (
152
- self.authentication_mode
153
- is NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_BASIC_AUTH
154
- ):
155
- api_gateway.spec.authentication = mlrun.common.schemas.APIGatewayBasicAuth(
156
- username=self._username, password=self._password
157
- )
315
+ api_gateway.spec.authentication = self.authentication.to_scheme()
158
316
  return api_gateway
159
317
 
160
318
  @property
161
319
  def invoke_url(
162
320
  self,
163
321
  ):
322
+ """
323
+ Get the invoke URL.
324
+
325
+ :return: (str) The invoke URL.
326
+ """
327
+ if not self.host.startswith("http"):
328
+ self.host = f"https://{self.host}"
164
329
  return urljoin(self.host, self.path)
165
330
 
166
331
  def _validate(
@@ -180,8 +345,6 @@ class APIGateway:
180
345
  ],
181
346
  ],
182
347
  canary: Optional[list[int]] = None,
183
- username: Optional[str] = None,
184
- password: Optional[str] = None,
185
348
  ):
186
349
  if not name:
187
350
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -192,26 +355,23 @@ class APIGateway:
192
355
 
193
356
  # validating canary
194
357
  if canary:
195
- if len(self.functions) != len(canary):
196
- raise mlrun.errors.MLRunInvalidArgumentError(
197
- "Function and canary lists lengths do not match"
198
- )
199
- for canary_percent in canary:
200
- if canary_percent < 0 or canary_percent > 100:
201
- raise mlrun.errors.MLRunInvalidArgumentError(
202
- "The percentage value must be in the range from 0 to 100"
203
- )
204
- if sum(canary) != 100:
358
+ self._validate_canary(canary)
359
+
360
+ def _validate_canary(self, canary: list[int]):
361
+ if len(self.functions) != len(canary):
362
+ raise mlrun.errors.MLRunInvalidArgumentError(
363
+ "Function and canary lists lengths do not match"
364
+ )
365
+ for canary_percent in canary:
366
+ if canary_percent < 0 or canary_percent > 100:
205
367
  raise mlrun.errors.MLRunInvalidArgumentError(
206
- "The sum of canary function percents should be equal to 100"
368
+ "The percentage value must be in the range from 0 to 100"
207
369
  )
208
-
209
- # validating auth
210
- if username and not password:
211
- raise mlrun.errors.MLRunInvalidArgumentError("Password is not specified")
212
-
213
- if password and not username:
214
- raise mlrun.errors.MLRunInvalidArgumentError("Username is not specified")
370
+ if sum(canary) != 100:
371
+ raise mlrun.errors.MLRunInvalidArgumentError(
372
+ "The sum of canary function percents should be equal to 100"
373
+ )
374
+ return canary
215
375
 
216
376
  @staticmethod
217
377
  def _validate_functions(
@@ -257,17 +417,10 @@ class APIGateway:
257
417
  f"input function {function_name} "
258
418
  f"does not belong to this project"
259
419
  )
260
- function_names.append(func.uri)
420
+ nuclio_name = get_fullname(function_name, project, func.metadata.tag)
421
+ function_names.append(nuclio_name)
261
422
  return function_names
262
423
 
263
- @staticmethod
264
- def _enrich_authentication_mode(username, password):
265
- return (
266
- NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_NONE
267
- if username is not None and password is not None
268
- else NUCLIO_API_GATEWAY_AUTHENTICATION_MODE_BASIC_AUTH
269
- )
270
-
271
424
  @staticmethod
272
425
  def _generate_basic_auth(username: str, password: str):
273
426
  token = base64.b64encode(f"{username}:{password}".encode()).decode()
@@ -775,6 +775,9 @@ class RemoteRuntime(KubeResource):
775
775
  ] = self.metadata.credentials.access_key
776
776
  return runtime_env
777
777
 
778
+ def _get_serving_spec(self):
779
+ return None
780
+
778
781
  def _get_nuclio_config_spec_env(self):
779
782
  env_dict = {}
780
783
  external_source_env_dict = {}
@@ -14,8 +14,9 @@
14
14
 
15
15
  import json
16
16
  import os
17
+ import warnings
17
18
  from copy import deepcopy
18
- from typing import Union
19
+ from typing import TYPE_CHECKING, Optional, Union
19
20
 
20
21
  import nuclio
21
22
  from nuclio import KafkaTrigger
@@ -24,7 +25,6 @@ import mlrun
24
25
  import mlrun.common.schemas
25
26
  from mlrun.datastore import parse_kafka_url
26
27
  from mlrun.model import ObjectList
27
- from mlrun.model_monitoring.tracking_policy import TrackingPolicy
28
28
  from mlrun.runtimes.function_reference import FunctionReference
29
29
  from mlrun.secrets import SecretsStore
30
30
  from mlrun.serving.server import GraphServer, create_graph_server
@@ -43,6 +43,10 @@ from .function import NuclioSpec, RemoteRuntime
43
43
 
44
44
  serving_subkind = "serving_v2"
45
45
 
46
+ if TYPE_CHECKING:
47
+ # remove this block in 1.9.0
48
+ from mlrun.model_monitoring import TrackingPolicy
49
+
46
50
 
47
51
  def new_v2_model_server(
48
52
  name,
@@ -291,7 +295,9 @@ class ServingRuntime(RemoteRuntime):
291
295
  "provided class is not a router step, must provide a router class in router topology"
292
296
  )
293
297
  else:
294
- step = RouterStep(class_name=class_name, class_args=class_args)
298
+ step = RouterStep(
299
+ class_name=class_name, class_args=class_args, engine=engine
300
+ )
295
301
  self.spec.graph = step
296
302
  elif topology == StepKinds.flow:
297
303
  self.spec.graph = RootFlowStep(engine=engine)
@@ -303,12 +309,12 @@ class ServingRuntime(RemoteRuntime):
303
309
 
304
310
  def set_tracking(
305
311
  self,
306
- stream_path: str = None,
307
- batch: int = None,
308
- sample: int = None,
309
- stream_args: dict = None,
310
- tracking_policy: Union[TrackingPolicy, dict] = None,
311
- ):
312
+ stream_path: Optional[str] = None,
313
+ batch: Optional[int] = None,
314
+ sample: Optional[int] = None,
315
+ stream_args: Optional[dict] = None,
316
+ tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
317
+ ) -> None:
312
318
  """apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
313
319
  and analyze performance.
314
320
 
@@ -317,31 +323,17 @@ class ServingRuntime(RemoteRuntime):
317
323
  :param batch: Micro batch size (send micro batches of N records at a time).
318
324
  :param sample: Sample size (send only one of N records).
319
325
  :param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
320
- :param tracking_policy: Tracking policy object or a dictionary that will be converted into a tracking policy
321
- object. By using TrackingPolicy, the user can apply his model monitoring requirements,
322
- such as setting the scheduling policy of the model monitoring batch job or changing
323
- the image of the model monitoring stream.
324
326
 
325
327
  example::
326
328
 
327
329
  # initialize a new serving function
328
330
  serving_fn = mlrun.import_function("hub://v2-model-server", new_name="serving")
329
- # apply model monitoring and set monitoring batch job to run every 3 hours
330
- tracking_policy = {'default_batch_intervals':"0 */3 * * *"}
331
- serving_fn.set_tracking(tracking_policy=tracking_policy)
331
+ # apply model monitoring
332
+ serving_fn.set_tracking()
332
333
 
333
334
  """
334
-
335
335
  # Applying model monitoring configurations
336
336
  self.spec.track_models = True
337
- self.spec.tracking_policy = None
338
- if tracking_policy:
339
- if isinstance(tracking_policy, dict):
340
- # Convert tracking policy dictionary into `model_monitoring.TrackingPolicy` object
341
- self.spec.tracking_policy = TrackingPolicy.from_dict(tracking_policy)
342
- else:
343
- # Tracking_policy is already a `model_monitoring.TrackingPolicy` object
344
- self.spec.tracking_policy = tracking_policy
345
337
 
346
338
  if stream_path:
347
339
  self.spec.parameters["log_stream"] = stream_path
@@ -351,6 +343,14 @@ class ServingRuntime(RemoteRuntime):
351
343
  self.spec.parameters["log_stream_sample"] = sample
352
344
  if stream_args:
353
345
  self.spec.parameters["stream_args"] = stream_args
346
+ if tracking_policy is not None:
347
+ warnings.warn(
348
+ "The `tracking_policy` argument is deprecated from version 1.7.0 "
349
+ "and has no effect. It will be removed in 1.9.0.\n"
350
+ "To set the desired model monitoring time window and schedule, use "
351
+ "the `base_period` argument in `project.enable_model_monitoring()`.",
352
+ FutureWarning,
353
+ )
354
354
 
355
355
  def add_model(
356
356
  self,
@@ -644,8 +644,7 @@ class ServingRuntime(RemoteRuntime):
644
644
  force_build=force_build,
645
645
  )
646
646
 
647
- def _get_runtime_env(self):
648
- env = super()._get_runtime_env()
647
+ def _get_serving_spec(self):
649
648
  function_name_uri_map = {f.name: f.uri(self) for f in self.spec.function_refs}
650
649
 
651
650
  serving_spec = {
@@ -658,9 +657,7 @@ class ServingRuntime(RemoteRuntime):
658
657
  "graph_initializer": self.spec.graph_initializer,
659
658
  "error_stream": self.spec.error_stream,
660
659
  "track_models": self.spec.track_models,
661
- "tracking_policy": self.spec.tracking_policy.to_dict()
662
- if self.spec.tracking_policy
663
- else None,
660
+ "tracking_policy": None,
664
661
  "default_content_type": self.spec.default_content_type,
665
662
  }
666
663
 
@@ -668,8 +665,7 @@ class ServingRuntime(RemoteRuntime):
668
665
  self._secrets = SecretsStore.from_list(self.spec.secret_sources)
669
666
  serving_spec["secret_sources"] = self._secrets.to_serial()
670
667
 
671
- env["SERVING_SPEC_ENV"] = json.dumps(serving_spec)
672
- return env
668
+ return json.dumps(serving_spec)
673
669
 
674
670
  def to_mock_server(
675
671
  self,
mlrun/runtimes/pod.py CHANGED
@@ -1057,6 +1057,32 @@ class KubeResource(BaseRuntime):
1057
1057
  return True
1058
1058
  return False
1059
1059
 
1060
+ def enrich_runtime_spec(
1061
+ self,
1062
+ project_node_selector: dict[str, str],
1063
+ ):
1064
+ """
1065
+ Enriches the runtime spec with the project-level node selector.
1066
+
1067
+ This method merges the project-level node selector with the existing function node_selector.
1068
+ The merge logic used here combines the two dictionaries, giving precedence to
1069
+ the keys in the runtime node_selector. If there are conflicting keys between the
1070
+ two dictionaries, the values from self.spec.node_selector will overwrite the
1071
+ values from project_node_selector.
1072
+
1073
+ Example:
1074
+ Suppose self.spec.node_selector = {"type": "gpu", "zone": "us-east-1"}
1075
+ and project_node_selector = {"type": "cpu", "environment": "production"}.
1076
+ After the merge, the resulting node_selector will be:
1077
+ {"type": "gpu", "zone": "us-east-1", "environment": "production"}
1078
+
1079
+ Note:
1080
+ - The merge uses the ** operator, also known as the "unpacking" operator in Python,
1081
+ combining key-value pairs from each dictionary. Later dictionaries take precedence
1082
+ when there are conflicting keys.
1083
+ """
1084
+ self.spec.node_selector = {**project_node_selector, **self.spec.node_selector}
1085
+
1060
1086
  def _set_env(self, name, value=None, value_from=None):
1061
1087
  new_var = k8s_client.V1EnvVar(name=name, value=value, value_from=value_from)
1062
1088
  i = 0
mlrun/serving/routers.py CHANGED
@@ -28,6 +28,7 @@ import numpy as np
28
28
  import mlrun
29
29
  import mlrun.common.model_monitoring
30
30
  import mlrun.common.schemas.model_monitoring
31
+ from mlrun.errors import err_to_str
31
32
  from mlrun.utils import logger, now_date
32
33
 
33
34
  from ..common.helpers import parse_versioned_object_uri
@@ -1013,7 +1014,7 @@ def _init_endpoint_record(
1013
1014
  graph_server.function_uri
1014
1015
  )
1015
1016
  except Exception as e:
1016
- logger.error("Failed to parse function URI", exc=e)
1017
+ logger.error("Failed to parse function URI", exc=err_to_str(e))
1017
1018
  return None
1018
1019
 
1019
1020
  # Generating version model value based on the model name and model version
@@ -1089,12 +1090,12 @@ def _init_endpoint_record(
1089
1090
  except Exception as exc:
1090
1091
  logger.warning(
1091
1092
  "Failed creating model endpoint record",
1092
- exc=exc,
1093
+ exc=err_to_str(exc),
1093
1094
  traceback=traceback.format_exc(),
1094
1095
  )
1095
1096
 
1096
1097
  except Exception as e:
1097
- logger.error("Failed to retrieve model endpoint object", exc=e)
1098
+ logger.error("Failed to retrieve model endpoint object", exc=err_to_str(e))
1098
1099
 
1099
1100
  return endpoint_uid
1100
1101
 
mlrun/serving/server.py CHANGED
@@ -23,6 +23,7 @@ import uuid
23
23
  from typing import Optional, Union
24
24
 
25
25
  import mlrun
26
+ import mlrun.common.constants
26
27
  import mlrun.common.helpers
27
28
  import mlrun.model_monitoring
28
29
  from mlrun.config import config
@@ -311,11 +312,8 @@ class GraphServer(ModelObj):
311
312
  def v2_serving_init(context, namespace=None):
312
313
  """hook for nuclio init_context()"""
313
314
 
314
- data = os.environ.get("SERVING_SPEC_ENV", "")
315
- if not data:
316
- raise MLRunInvalidArgumentError("failed to find spec env var")
317
- spec = json.loads(data)
318
315
  context.logger.info("Initializing server from spec")
316
+ spec = mlrun.utils.get_serving_spec()
319
317
  server = GraphServer.from_dict(spec)
320
318
  if config.log_level.lower() == "debug":
321
319
  server.verbose = True
@@ -355,7 +353,7 @@ def v2_serving_init(context, namespace=None):
355
353
 
356
354
  async def termination_callback():
357
355
  context.logger.info("Termination callback called")
358
- await server.wait_for_completion()
356
+ server.wait_for_completion()
359
357
  context.logger.info("Termination of async flow is completed")
360
358
 
361
359
  context.platform.set_termination_callback(termination_callback)
@@ -367,7 +365,7 @@ def v2_serving_init(context, namespace=None):
367
365
 
368
366
  async def drain_callback():
369
367
  context.logger.info("Drain callback called")
370
- await server.wait_for_completion()
368
+ server.wait_for_completion()
371
369
  context.logger.info(
372
370
  "Termination of async flow is completed. Rerunning async flow."
373
371
  )