mlrun 1.7.0rc36__py3-none-any.whl → 1.7.0rc38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +64 -0
- mlrun/common/schemas/alert.py +2 -2
- mlrun/common/schemas/model_monitoring/constants.py +4 -0
- mlrun/common/schemas/notification.py +26 -7
- mlrun/datastore/azure_blob.py +120 -30
- mlrun/datastore/s3.py +8 -1
- mlrun/feature_store/common.py +6 -11
- mlrun/model.py +5 -0
- mlrun/model_monitoring/api.py +1 -1
- mlrun/model_monitoring/applications/_application_steps.py +9 -4
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +14 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +10 -7
- mlrun/model_monitoring/db/tsdb/base.py +141 -12
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +65 -5
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +23 -1
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +211 -35
- mlrun/model_monitoring/helpers.py +1 -2
- mlrun/model_monitoring/stream_processing.py +67 -25
- mlrun/model_monitoring/writer.py +4 -1
- mlrun/projects/operations.py +4 -0
- mlrun/projects/project.py +11 -1
- mlrun/runtimes/__init__.py +15 -8
- mlrun/runtimes/base.py +3 -0
- mlrun/runtimes/nuclio/application/application.py +98 -17
- mlrun/runtimes/nuclio/function.py +5 -1
- mlrun/runtimes/pod.py +2 -2
- mlrun/runtimes/remotesparkjob.py +2 -5
- mlrun/runtimes/sparkjob/spark3job.py +11 -16
- mlrun/serving/routers.py +1 -4
- mlrun/serving/server.py +4 -7
- mlrun/serving/states.py +1 -1
- mlrun/serving/v2_serving.py +5 -7
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/METADATA +12 -6
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/RECORD +40 -40
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc36.dist-info → mlrun-1.7.0rc38.dist-info}/top_level.txt +0 -0
|
@@ -169,11 +169,40 @@ class EventStreamProcessor:
|
|
|
169
169
|
mlrun.serving.states.RootFlowStep,
|
|
170
170
|
fn.set_topology(mlrun.serving.states.StepKinds.flow),
|
|
171
171
|
)
|
|
172
|
+
graph.add_step(
|
|
173
|
+
"ExtractEndpointID",
|
|
174
|
+
"extract_endpoint",
|
|
175
|
+
full_event=True,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# split the graph between event with error vs valid event
|
|
179
|
+
graph.add_step(
|
|
180
|
+
"storey.Filter",
|
|
181
|
+
"FilterError",
|
|
182
|
+
after="extract_endpoint",
|
|
183
|
+
_fn="(event.get('error') is None)",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
graph.add_step(
|
|
187
|
+
"storey.Filter",
|
|
188
|
+
"ForwardError",
|
|
189
|
+
after="extract_endpoint",
|
|
190
|
+
_fn="(event.get('error') is not None)",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
194
|
+
project=self.project, secret_provider=secret_provider
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
tsdb_connector.handle_model_error(
|
|
198
|
+
graph,
|
|
199
|
+
)
|
|
172
200
|
|
|
173
201
|
# Process endpoint event: splitting into sub-events and validate event data
|
|
174
202
|
def apply_process_endpoint_event():
|
|
175
203
|
graph.add_step(
|
|
176
204
|
"ProcessEndpointEvent",
|
|
205
|
+
after="FilterError",
|
|
177
206
|
full_event=True,
|
|
178
207
|
project=self.project,
|
|
179
208
|
)
|
|
@@ -295,9 +324,6 @@ class EventStreamProcessor:
|
|
|
295
324
|
|
|
296
325
|
apply_storey_sample_window()
|
|
297
326
|
|
|
298
|
-
tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
299
|
-
project=self.project, secret_provider=secret_provider
|
|
300
|
-
)
|
|
301
327
|
tsdb_connector.apply_monitoring_stream_steps(graph=graph)
|
|
302
328
|
|
|
303
329
|
# Parquet branch
|
|
@@ -386,6 +412,38 @@ class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
|
|
|
386
412
|
return e
|
|
387
413
|
|
|
388
414
|
|
|
415
|
+
class ExtractEndpointID(mlrun.feature_store.steps.MapClass):
|
|
416
|
+
def __init__(self, **kwargs) -> None:
|
|
417
|
+
"""
|
|
418
|
+
Generate the model endpoint ID based on the event parameters and attach it to the event.
|
|
419
|
+
"""
|
|
420
|
+
super().__init__(**kwargs)
|
|
421
|
+
|
|
422
|
+
def do(self, full_event) -> typing.Union[storey.Event, None]:
|
|
423
|
+
# Getting model version and function uri from event
|
|
424
|
+
# and use them for retrieving the endpoint_id
|
|
425
|
+
function_uri = full_event.body.get(EventFieldType.FUNCTION_URI)
|
|
426
|
+
if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
|
|
427
|
+
return None
|
|
428
|
+
|
|
429
|
+
model = full_event.body.get(EventFieldType.MODEL)
|
|
430
|
+
if not is_not_none(model, [EventFieldType.MODEL]):
|
|
431
|
+
return None
|
|
432
|
+
|
|
433
|
+
version = full_event.body.get(EventFieldType.VERSION)
|
|
434
|
+
versioned_model = f"{model}:{version}" if version else f"{model}:latest"
|
|
435
|
+
|
|
436
|
+
endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
|
|
437
|
+
function_uri=function_uri,
|
|
438
|
+
versioned_model=versioned_model,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
endpoint_id = str(endpoint_id)
|
|
442
|
+
full_event.body[EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
443
|
+
full_event.body[EventFieldType.VERSIONED_MODEL] = versioned_model
|
|
444
|
+
return full_event
|
|
445
|
+
|
|
446
|
+
|
|
389
447
|
class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
|
|
390
448
|
def __init__(self, **kwargs):
|
|
391
449
|
"""
|
|
@@ -459,28 +517,9 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
459
517
|
def do(self, full_event):
|
|
460
518
|
event = full_event.body
|
|
461
519
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
function_uri = event
|
|
465
|
-
if not is_not_none(function_uri, [EventFieldType.FUNCTION_URI]):
|
|
466
|
-
return None
|
|
467
|
-
|
|
468
|
-
model = event.get(EventFieldType.MODEL)
|
|
469
|
-
if not is_not_none(model, [EventFieldType.MODEL]):
|
|
470
|
-
return None
|
|
471
|
-
|
|
472
|
-
version = event.get(EventFieldType.VERSION)
|
|
473
|
-
versioned_model = f"{model}:{version}" if version else f"{model}:latest"
|
|
474
|
-
|
|
475
|
-
endpoint_id = mlrun.common.model_monitoring.create_model_endpoint_uid(
|
|
476
|
-
function_uri=function_uri,
|
|
477
|
-
versioned_model=versioned_model,
|
|
478
|
-
)
|
|
479
|
-
|
|
480
|
-
endpoint_id = str(endpoint_id)
|
|
481
|
-
|
|
482
|
-
event[EventFieldType.VERSIONED_MODEL] = versioned_model
|
|
483
|
-
event[EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
520
|
+
versioned_model = event[EventFieldType.VERSIONED_MODEL]
|
|
521
|
+
endpoint_id = event[EventFieldType.ENDPOINT_ID]
|
|
522
|
+
function_uri = event[EventFieldType.FUNCTION_URI]
|
|
484
523
|
|
|
485
524
|
# In case this process fails, resume state from existing record
|
|
486
525
|
self.resume_state(endpoint_id)
|
|
@@ -598,6 +637,9 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
598
637
|
EventFieldType.PREDICTION: prediction,
|
|
599
638
|
EventFieldType.FIRST_REQUEST: self.first_request[endpoint_id],
|
|
600
639
|
EventFieldType.LAST_REQUEST: self.last_request[endpoint_id],
|
|
640
|
+
EventFieldType.LAST_REQUEST_TIMESTAMP: mlrun.utils.enrich_datetime_with_tz_info(
|
|
641
|
+
self.last_request[endpoint_id]
|
|
642
|
+
).timestamp(),
|
|
601
643
|
EventFieldType.ERROR_COUNT: self.error_count[endpoint_id],
|
|
602
644
|
EventFieldType.LABELS: event.get(EventFieldType.LABELS, {}),
|
|
603
645
|
EventFieldType.METRICS: event.get(EventFieldType.METRICS, {}),
|
mlrun/model_monitoring/writer.py
CHANGED
|
@@ -130,7 +130,6 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
130
130
|
project_name: str,
|
|
131
131
|
result_kind: int,
|
|
132
132
|
) -> None:
|
|
133
|
-
logger.info("Sending an event")
|
|
134
133
|
entity = mlrun.common.schemas.alert.EventEntities(
|
|
135
134
|
kind=alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT,
|
|
136
135
|
project=project_name,
|
|
@@ -146,7 +145,9 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
146
145
|
entity=entity,
|
|
147
146
|
value_dict=event_value,
|
|
148
147
|
)
|
|
148
|
+
logger.info("Sending a drift event")
|
|
149
149
|
mlrun.get_run_db().generate_event(event_kind, event_data)
|
|
150
|
+
logger.info("Drift event sent successfully")
|
|
150
151
|
|
|
151
152
|
@staticmethod
|
|
152
153
|
def _generate_alert_event_kind(
|
|
@@ -261,3 +262,5 @@ class ModelMonitoringWriter(StepToDict):
|
|
|
261
262
|
endpoint_id=endpoint_id,
|
|
262
263
|
attributes=json.loads(event[ResultData.RESULT_EXTRA_DATA]),
|
|
263
264
|
)
|
|
265
|
+
|
|
266
|
+
logger.info("Model monitoring writer finished handling event")
|
mlrun/projects/operations.py
CHANGED
|
@@ -187,6 +187,10 @@ def run_function(
|
|
|
187
187
|
task.spec.verbose = task.spec.verbose or verbose
|
|
188
188
|
|
|
189
189
|
if engine == "kfp":
|
|
190
|
+
if schedule:
|
|
191
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
192
|
+
"Scheduling jobs is not supported when running a workflow with the kfp engine."
|
|
193
|
+
)
|
|
190
194
|
return function.as_step(
|
|
191
195
|
name=name, runspec=task, workdir=workdir, outputs=outputs, labels=labels
|
|
192
196
|
)
|
mlrun/projects/project.py
CHANGED
|
@@ -600,6 +600,10 @@ def _run_project_setup(
|
|
|
600
600
|
if hasattr(mod, "setup"):
|
|
601
601
|
try:
|
|
602
602
|
project = getattr(mod, "setup")(project)
|
|
603
|
+
if not project or not isinstance(project, mlrun.projects.MlrunProject):
|
|
604
|
+
raise ValueError(
|
|
605
|
+
"MLRun project_setup:setup() must return a project object"
|
|
606
|
+
)
|
|
603
607
|
except Exception as exc:
|
|
604
608
|
logger.error(
|
|
605
609
|
"Failed to run project_setup script",
|
|
@@ -610,7 +614,9 @@ def _run_project_setup(
|
|
|
610
614
|
if save:
|
|
611
615
|
project.save()
|
|
612
616
|
else:
|
|
613
|
-
logger.warn(
|
|
617
|
+
logger.warn(
|
|
618
|
+
f"skipping setup, setup() handler was not found in {path.basename(setup_file_path)}"
|
|
619
|
+
)
|
|
614
620
|
return project
|
|
615
621
|
|
|
616
622
|
|
|
@@ -2967,6 +2973,7 @@ class MlrunProject(ModelObj):
|
|
|
2967
2973
|
source: str = None,
|
|
2968
2974
|
cleanup_ttl: int = None,
|
|
2969
2975
|
notifications: list[mlrun.model.Notification] = None,
|
|
2976
|
+
send_start_notification: bool = True,
|
|
2970
2977
|
) -> _PipelineRunStatus:
|
|
2971
2978
|
"""Run a workflow using kubeflow pipelines
|
|
2972
2979
|
|
|
@@ -3003,6 +3010,8 @@ class MlrunProject(ModelObj):
|
|
|
3003
3010
|
workflow and all its resources are deleted)
|
|
3004
3011
|
:param notifications:
|
|
3005
3012
|
List of notifications to send for workflow completion
|
|
3013
|
+
:param send_start_notification:
|
|
3014
|
+
Send a notification when the workflow starts
|
|
3006
3015
|
|
|
3007
3016
|
:returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
|
|
3008
3017
|
"""
|
|
@@ -3080,6 +3089,7 @@ class MlrunProject(ModelObj):
|
|
|
3080
3089
|
namespace=namespace,
|
|
3081
3090
|
source=source,
|
|
3082
3091
|
notifications=notifications,
|
|
3092
|
+
send_start_notification=send_start_notification,
|
|
3083
3093
|
)
|
|
3084
3094
|
# run is None when scheduling
|
|
3085
3095
|
if run and run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
mlrun/runtimes/__init__.py
CHANGED
|
@@ -30,6 +30,8 @@ __all__ = [
|
|
|
30
30
|
"MpiRuntimeV1",
|
|
31
31
|
]
|
|
32
32
|
|
|
33
|
+
import typing
|
|
34
|
+
|
|
33
35
|
from mlrun.runtimes.utils import resolve_spark_operator_version
|
|
34
36
|
|
|
35
37
|
from ..common.runtimes.constants import MPIJobCRDVersions
|
|
@@ -181,7 +183,7 @@ class RuntimeKinds:
|
|
|
181
183
|
]
|
|
182
184
|
|
|
183
185
|
@staticmethod
|
|
184
|
-
def is_log_collectable_runtime(kind: str):
|
|
186
|
+
def is_log_collectable_runtime(kind: typing.Optional[str]):
|
|
185
187
|
"""
|
|
186
188
|
whether log collector can collect logs for that runtime
|
|
187
189
|
:param kind: kind name
|
|
@@ -192,13 +194,18 @@ class RuntimeKinds:
|
|
|
192
194
|
if RuntimeKinds.is_local_runtime(kind):
|
|
193
195
|
return False
|
|
194
196
|
|
|
195
|
-
if
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
197
|
+
if (
|
|
198
|
+
kind
|
|
199
|
+
not in [
|
|
200
|
+
# dask implementation is different from other runtimes, because few runs can be run against the same
|
|
201
|
+
# runtime resource, so collecting logs on that runtime resource won't be correct, the way we collect
|
|
202
|
+
# logs for dask is by using `log_std` on client side after we execute the code against the cluster,
|
|
203
|
+
# as submitting the run with the dask client will return the run stdout.
|
|
204
|
+
# For more information head to `DaskCluster._run`.
|
|
205
|
+
RuntimeKinds.dask
|
|
206
|
+
]
|
|
207
|
+
+ RuntimeKinds.nuclio_runtimes()
|
|
208
|
+
):
|
|
202
209
|
return True
|
|
203
210
|
|
|
204
211
|
return False
|
mlrun/runtimes/base.py
CHANGED
|
@@ -18,6 +18,7 @@ import nuclio
|
|
|
18
18
|
|
|
19
19
|
import mlrun.common.schemas as schemas
|
|
20
20
|
import mlrun.errors
|
|
21
|
+
import mlrun.run
|
|
21
22
|
from mlrun.common.runtimes.constants import NuclioIngressAddTemplatedIngressModes
|
|
22
23
|
from mlrun.runtimes import RemoteRuntime
|
|
23
24
|
from mlrun.runtimes.nuclio import min_nuclio_versions
|
|
@@ -121,6 +122,11 @@ class ApplicationSpec(NuclioSpec):
|
|
|
121
122
|
state_thresholds=state_thresholds,
|
|
122
123
|
disable_default_http_trigger=disable_default_http_trigger,
|
|
123
124
|
)
|
|
125
|
+
|
|
126
|
+
# Override default min/max replicas (don't assume application is stateless)
|
|
127
|
+
self.min_replicas = min_replicas or 1
|
|
128
|
+
self.max_replicas = max_replicas or 1
|
|
129
|
+
|
|
124
130
|
self.internal_application_port = (
|
|
125
131
|
internal_application_port
|
|
126
132
|
or mlrun.mlconf.function.application.default_sidecar_internal_port
|
|
@@ -168,12 +174,13 @@ class ApplicationStatus(NuclioStatus):
|
|
|
168
174
|
self.application_source = application_source or None
|
|
169
175
|
self.sidecar_name = sidecar_name or None
|
|
170
176
|
self.api_gateway_name = api_gateway_name or None
|
|
171
|
-
self.api_gateway = api_gateway or None
|
|
177
|
+
self.api_gateway: typing.Optional[APIGateway] = api_gateway or None
|
|
172
178
|
self.url = url or None
|
|
173
179
|
|
|
174
180
|
|
|
175
181
|
class ApplicationRuntime(RemoteRuntime):
|
|
176
182
|
kind = "application"
|
|
183
|
+
reverse_proxy_image = None
|
|
177
184
|
|
|
178
185
|
@min_nuclio_versions("1.13.1")
|
|
179
186
|
def __init__(self, spec=None, metadata=None):
|
|
@@ -252,6 +259,15 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
252
259
|
"Application sidecar spec must include a command if args are provided"
|
|
253
260
|
)
|
|
254
261
|
|
|
262
|
+
def prepare_image_for_deploy(self):
|
|
263
|
+
if self.spec.build.source and self.spec.build.load_source_on_run:
|
|
264
|
+
logger.warning(
|
|
265
|
+
"Application runtime requires loading the source into the application image. "
|
|
266
|
+
f"Even though {self.spec.build.load_source_on_run=}, loading on build will be forced."
|
|
267
|
+
)
|
|
268
|
+
self.spec.build.load_source_on_run = False
|
|
269
|
+
super().prepare_image_for_deploy()
|
|
270
|
+
|
|
255
271
|
def deploy(
|
|
256
272
|
self,
|
|
257
273
|
project="",
|
|
@@ -273,6 +289,7 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
273
289
|
"""
|
|
274
290
|
Deploy function, builds the application image if required (self.requires_build()) or force_build is True,
|
|
275
291
|
Once the image is built, the function is deployed.
|
|
292
|
+
|
|
276
293
|
:param project: Project name
|
|
277
294
|
:param tag: Function tag
|
|
278
295
|
:param verbose: Set True for verbose logging
|
|
@@ -306,10 +323,11 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
306
323
|
show_on_failure=show_on_failure,
|
|
307
324
|
)
|
|
308
325
|
|
|
309
|
-
self
|
|
326
|
+
# This is a class method that accepts a function instance, so we pass self as the function instance
|
|
327
|
+
self._ensure_reverse_proxy_configurations(self)
|
|
310
328
|
self._configure_application_sidecar()
|
|
311
329
|
|
|
312
|
-
#
|
|
330
|
+
# We only allow accessing the application via the API Gateway
|
|
313
331
|
name_tag = tag or self.metadata.tag
|
|
314
332
|
self.status.api_gateway_name = (
|
|
315
333
|
f"{self.metadata.name}-{name_tag}" if name_tag else self.metadata.name
|
|
@@ -346,9 +364,13 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
346
364
|
)
|
|
347
365
|
|
|
348
366
|
def with_source_archive(
|
|
349
|
-
self,
|
|
367
|
+
self,
|
|
368
|
+
source,
|
|
369
|
+
workdir=None,
|
|
370
|
+
pull_at_runtime: bool = False,
|
|
371
|
+
target_dir: str = None,
|
|
350
372
|
):
|
|
351
|
-
"""load the code from git/tar/zip archive at
|
|
373
|
+
"""load the code from git/tar/zip archive at build
|
|
352
374
|
|
|
353
375
|
:param source: valid absolute path or URL to git, zip, or tar file, e.g.
|
|
354
376
|
git://github.com/mlrun/something.git
|
|
@@ -356,13 +378,20 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
356
378
|
note path source must exist on the image or exist locally when run is local
|
|
357
379
|
(it is recommended to use 'workdir' when source is a filepath instead)
|
|
358
380
|
:param workdir: working dir relative to the archive root (e.g. './subdir') or absolute to the image root
|
|
359
|
-
:param pull_at_runtime:
|
|
381
|
+
:param pull_at_runtime: currently not supported, source must be loaded into the image during the build process
|
|
360
382
|
:param target_dir: target dir on runtime pod or repo clone / archive extraction
|
|
361
383
|
"""
|
|
384
|
+
if pull_at_runtime:
|
|
385
|
+
logger.warning(
|
|
386
|
+
f"{pull_at_runtime=} is currently not supported for application runtime "
|
|
387
|
+
"and will be overridden to False",
|
|
388
|
+
pull_at_runtime=pull_at_runtime,
|
|
389
|
+
)
|
|
390
|
+
|
|
362
391
|
self._configure_mlrun_build_with_source(
|
|
363
392
|
source=source,
|
|
364
393
|
workdir=workdir,
|
|
365
|
-
pull_at_runtime=
|
|
394
|
+
pull_at_runtime=False,
|
|
366
395
|
target_dir=target_dir,
|
|
367
396
|
)
|
|
368
397
|
|
|
@@ -391,8 +420,8 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
391
420
|
"main:Handler",
|
|
392
421
|
)
|
|
393
422
|
|
|
394
|
-
@
|
|
395
|
-
def get_filename_and_handler(
|
|
423
|
+
@staticmethod
|
|
424
|
+
def get_filename_and_handler() -> (str, str):
|
|
396
425
|
reverse_proxy_file_path = pathlib.Path(__file__).parent / "reverse_proxy.go"
|
|
397
426
|
return str(reverse_proxy_file_path), "Handler"
|
|
398
427
|
|
|
@@ -488,6 +517,39 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
488
517
|
**http_client_kwargs,
|
|
489
518
|
)
|
|
490
519
|
|
|
520
|
+
@classmethod
|
|
521
|
+
def deploy_reverse_proxy_image(cls):
|
|
522
|
+
"""
|
|
523
|
+
Build the reverse proxy image and save it.
|
|
524
|
+
The reverse proxy image is used to route requests to the application sidecar.
|
|
525
|
+
This is useful when you want to decrease build time by building the application image only once.
|
|
526
|
+
|
|
527
|
+
:param use_cache: Use the cache when building the image
|
|
528
|
+
"""
|
|
529
|
+
# create a function that includes only the reverse proxy, without the application
|
|
530
|
+
|
|
531
|
+
reverse_proxy_func = mlrun.run.new_function(
|
|
532
|
+
name="reverse-proxy-temp", kind="remote"
|
|
533
|
+
)
|
|
534
|
+
# default max replicas is 4, we only need one replica for the reverse proxy
|
|
535
|
+
reverse_proxy_func.spec.max_replicas = 1
|
|
536
|
+
|
|
537
|
+
# the reverse proxy image should not be based on another image
|
|
538
|
+
reverse_proxy_func.set_config("spec.build.baseImage", None)
|
|
539
|
+
reverse_proxy_func.spec.image = ""
|
|
540
|
+
reverse_proxy_func.spec.build.base_image = ""
|
|
541
|
+
|
|
542
|
+
cls._ensure_reverse_proxy_configurations(reverse_proxy_func)
|
|
543
|
+
reverse_proxy_func.deploy()
|
|
544
|
+
|
|
545
|
+
# save the created container image
|
|
546
|
+
cls.reverse_proxy_image = reverse_proxy_func.status.container_image
|
|
547
|
+
|
|
548
|
+
# delete the function to avoid cluttering the project
|
|
549
|
+
mlrun.get_run_db().delete_function(
|
|
550
|
+
reverse_proxy_func.metadata.name, reverse_proxy_func.metadata.project
|
|
551
|
+
)
|
|
552
|
+
|
|
491
553
|
def _run(self, runobj: "mlrun.RunObject", execution):
|
|
492
554
|
raise mlrun.runtimes.RunError(
|
|
493
555
|
"Application runtime .run() is not yet supported. Use .invoke() instead."
|
|
@@ -515,6 +577,13 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
515
577
|
args=self.spec.args,
|
|
516
578
|
)
|
|
517
579
|
|
|
580
|
+
if self.spec.build.source in [".", "./"]:
|
|
581
|
+
logger.info(
|
|
582
|
+
"The application is configured to use the project's source. "
|
|
583
|
+
"Application runtime requires loading the source into the application image. "
|
|
584
|
+
"Loading on build will be forced regardless of whether 'pull_at_runtime=True' was configured."
|
|
585
|
+
)
|
|
586
|
+
|
|
518
587
|
with_mlrun = self._resolve_build_with_mlrun(with_mlrun)
|
|
519
588
|
return self._build_image(
|
|
520
589
|
builder_env=builder_env,
|
|
@@ -527,21 +596,29 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
527
596
|
with_mlrun=with_mlrun,
|
|
528
597
|
)
|
|
529
598
|
|
|
530
|
-
|
|
531
|
-
|
|
599
|
+
@staticmethod
|
|
600
|
+
def _ensure_reverse_proxy_configurations(function: RemoteRuntime):
|
|
601
|
+
if function.spec.build.functionSourceCode or function.status.container_image:
|
|
532
602
|
return
|
|
533
603
|
|
|
534
604
|
filename, handler = ApplicationRuntime.get_filename_and_handler()
|
|
535
605
|
name, spec, code = nuclio.build_file(
|
|
536
606
|
filename,
|
|
537
|
-
name=
|
|
607
|
+
name=function.metadata.name,
|
|
538
608
|
handler=handler,
|
|
539
609
|
)
|
|
540
|
-
|
|
541
|
-
|
|
610
|
+
function.spec.function_handler = mlrun.utils.get_in(spec, "spec.handler")
|
|
611
|
+
function.spec.build.functionSourceCode = mlrun.utils.get_in(
|
|
542
612
|
spec, "spec.build.functionSourceCode"
|
|
543
613
|
)
|
|
544
|
-
|
|
614
|
+
function.spec.nuclio_runtime = mlrun.utils.get_in(spec, "spec.runtime")
|
|
615
|
+
|
|
616
|
+
# default the reverse proxy logger level to info
|
|
617
|
+
logger_sinks_key = "spec.loggerSinks"
|
|
618
|
+
if not function.spec.config.get(logger_sinks_key):
|
|
619
|
+
function.set_config(
|
|
620
|
+
logger_sinks_key, [{"level": "info", "sink": "myStdoutLoggerSink"}]
|
|
621
|
+
)
|
|
545
622
|
|
|
546
623
|
def _configure_application_sidecar(self):
|
|
547
624
|
# Save the application image in the status to allow overriding it with the reverse proxy entry point
|
|
@@ -552,8 +629,12 @@ class ApplicationRuntime(RemoteRuntime):
|
|
|
552
629
|
self.status.application_image = self.spec.image
|
|
553
630
|
self.spec.image = ""
|
|
554
631
|
|
|
555
|
-
if
|
|
556
|
-
|
|
632
|
+
# reuse the reverse proxy image if it was built before
|
|
633
|
+
if (
|
|
634
|
+
reverse_proxy_image := self.status.container_image
|
|
635
|
+
or self.reverse_proxy_image
|
|
636
|
+
):
|
|
637
|
+
self.from_image(reverse_proxy_image)
|
|
557
638
|
|
|
558
639
|
self.status.sidecar_name = f"{self.metadata.name}-sidecar"
|
|
559
640
|
self.with_sidecar(
|
|
@@ -689,7 +689,7 @@ class RemoteRuntime(KubeResource):
|
|
|
689
689
|
"State thresholds do not apply for nuclio as it has its own function pods healthiness monitoring"
|
|
690
690
|
)
|
|
691
691
|
|
|
692
|
-
@min_nuclio_versions("1.
|
|
692
|
+
@min_nuclio_versions("1.13.1")
|
|
693
693
|
def disable_default_http_trigger(
|
|
694
694
|
self,
|
|
695
695
|
):
|
|
@@ -707,6 +707,10 @@ class RemoteRuntime(KubeResource):
|
|
|
707
707
|
"""
|
|
708
708
|
self.spec.disable_default_http_trigger = False
|
|
709
709
|
|
|
710
|
+
def skip_image_enrichment(self):
|
|
711
|
+
# make sure the API does not enrich the base image if the function is not a python function
|
|
712
|
+
return self.spec.nuclio_runtime and "python" not in self.spec.nuclio_runtime
|
|
713
|
+
|
|
710
714
|
def _get_state(
|
|
711
715
|
self,
|
|
712
716
|
dashboard="",
|
mlrun/runtimes/pod.py
CHANGED
|
@@ -1174,9 +1174,9 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
|
|
|
1174
1174
|
"""
|
|
1175
1175
|
if node_name:
|
|
1176
1176
|
self.spec.node_name = node_name
|
|
1177
|
-
if node_selector:
|
|
1177
|
+
if node_selector is not None:
|
|
1178
1178
|
self.spec.node_selector = node_selector
|
|
1179
|
-
if affinity:
|
|
1179
|
+
if affinity is not None:
|
|
1180
1180
|
self.spec.affinity = affinity
|
|
1181
1181
|
if tolerations is not None:
|
|
1182
1182
|
self.spec.tolerations = tolerations
|
mlrun/runtimes/remotesparkjob.py
CHANGED
|
@@ -102,16 +102,13 @@ class RemoteSparkRuntime(KubejobRuntime):
|
|
|
102
102
|
|
|
103
103
|
@classmethod
|
|
104
104
|
def deploy_default_image(cls):
|
|
105
|
-
|
|
106
|
-
from mlrun.run import new_function
|
|
107
|
-
|
|
108
|
-
sj = new_function(
|
|
105
|
+
sj = mlrun.new_function(
|
|
109
106
|
kind="remote-spark", name="remote-spark-default-image-deploy-temp"
|
|
110
107
|
)
|
|
111
108
|
sj.spec.build.image = cls.default_image
|
|
112
109
|
sj.with_spark_service(spark_service="dummy-spark")
|
|
113
110
|
sj.deploy()
|
|
114
|
-
get_run_db().delete_function(name=sj.metadata.name)
|
|
111
|
+
mlrun.get_run_db().delete_function(name=sj.metadata.name)
|
|
115
112
|
|
|
116
113
|
def is_deployed(self):
|
|
117
114
|
if (
|
|
@@ -505,13 +505,10 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
505
505
|
raise NotImplementedError(
|
|
506
506
|
"Setting node name is not supported for spark runtime"
|
|
507
507
|
)
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
"Setting affinity is not supported for spark runtime"
|
|
513
|
-
)
|
|
514
|
-
super().with_node_selection(node_name, node_selector, affinity, tolerations)
|
|
508
|
+
self.with_driver_node_selection(node_name, node_selector, affinity, tolerations)
|
|
509
|
+
self.with_executor_node_selection(
|
|
510
|
+
node_name, node_selector, affinity, tolerations
|
|
511
|
+
)
|
|
515
512
|
|
|
516
513
|
def with_driver_node_selection(
|
|
517
514
|
self,
|
|
@@ -537,11 +534,11 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
537
534
|
raise NotImplementedError(
|
|
538
535
|
"Setting node name is not supported for spark runtime"
|
|
539
536
|
)
|
|
540
|
-
if affinity:
|
|
537
|
+
if affinity is not None:
|
|
541
538
|
self.spec.driver_affinity = affinity
|
|
542
|
-
if node_selector:
|
|
539
|
+
if node_selector is not None:
|
|
543
540
|
self.spec.driver_node_selector = node_selector
|
|
544
|
-
if tolerations:
|
|
541
|
+
if tolerations is not None:
|
|
545
542
|
self.spec.driver_tolerations = tolerations
|
|
546
543
|
|
|
547
544
|
def with_executor_node_selection(
|
|
@@ -568,11 +565,11 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
568
565
|
raise NotImplementedError(
|
|
569
566
|
"Setting node name is not supported for spark runtime"
|
|
570
567
|
)
|
|
571
|
-
if affinity:
|
|
568
|
+
if affinity is not None:
|
|
572
569
|
self.spec.executor_affinity = affinity
|
|
573
|
-
if node_selector:
|
|
570
|
+
if node_selector is not None:
|
|
574
571
|
self.spec.executor_node_selector = node_selector
|
|
575
|
-
if tolerations:
|
|
572
|
+
if tolerations is not None:
|
|
576
573
|
self.spec.executor_tolerations = tolerations
|
|
577
574
|
|
|
578
575
|
def with_preemption_mode(
|
|
@@ -811,9 +808,7 @@ class Spark3Runtime(KubejobRuntime):
|
|
|
811
808
|
|
|
812
809
|
@classmethod
|
|
813
810
|
def deploy_default_image(cls, with_gpu=False):
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
sj = new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
|
|
811
|
+
sj = mlrun.new_function(kind=cls.kind, name="spark-default-image-deploy-temp")
|
|
817
812
|
sj.spec.build.image = cls._get_default_deployed_mlrun_image_name(with_gpu)
|
|
818
813
|
|
|
819
814
|
# setting required resources
|
mlrun/serving/routers.py
CHANGED
|
@@ -32,7 +32,6 @@ from mlrun.errors import err_to_str
|
|
|
32
32
|
from mlrun.utils import logger, now_date
|
|
33
33
|
|
|
34
34
|
from ..common.helpers import parse_versioned_object_uri
|
|
35
|
-
from ..config import config
|
|
36
35
|
from .server import GraphServer
|
|
37
36
|
from .utils import RouterToDict, _extract_input_data, _update_result_body
|
|
38
37
|
from .v2_serving import _ModelLogPusher
|
|
@@ -1057,9 +1056,7 @@ def _init_endpoint_record(
|
|
|
1057
1056
|
function_uri=graph_server.function_uri,
|
|
1058
1057
|
model=versioned_model_name,
|
|
1059
1058
|
model_class=voting_ensemble.__class__.__name__,
|
|
1060
|
-
stream_path=
|
|
1061
|
-
project=project, kind="stream"
|
|
1062
|
-
),
|
|
1059
|
+
stream_path=voting_ensemble.context.stream.stream_uri,
|
|
1063
1060
|
active=True,
|
|
1064
1061
|
monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
|
|
1065
1062
|
),
|
mlrun/serving/server.py
CHANGED
|
@@ -38,10 +38,7 @@ from ..errors import MLRunInvalidArgumentError
|
|
|
38
38
|
from ..model import ModelObj
|
|
39
39
|
from ..utils import get_caller_globals
|
|
40
40
|
from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
|
|
41
|
-
from .utils import
|
|
42
|
-
event_id_key,
|
|
43
|
-
event_path_key,
|
|
44
|
-
)
|
|
41
|
+
from .utils import event_id_key, event_path_key
|
|
45
42
|
|
|
46
43
|
|
|
47
44
|
class _StreamContext:
|
|
@@ -71,15 +68,15 @@ class _StreamContext:
|
|
|
71
68
|
function_uri, config.default_project
|
|
72
69
|
)
|
|
73
70
|
|
|
74
|
-
stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
|
|
71
|
+
self.stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
|
|
75
72
|
|
|
76
73
|
if log_stream:
|
|
77
74
|
# Update the stream path to the log stream value
|
|
78
|
-
stream_uri = log_stream.format(project=project)
|
|
75
|
+
self.stream_uri = log_stream.format(project=project)
|
|
79
76
|
|
|
80
77
|
stream_args = parameters.get("stream_args", {})
|
|
81
78
|
|
|
82
|
-
self.output_stream = get_stream_pusher(stream_uri, **stream_args)
|
|
79
|
+
self.output_stream = get_stream_pusher(self.stream_uri, **stream_args)
|
|
83
80
|
|
|
84
81
|
|
|
85
82
|
class GraphServer(ModelObj):
|