mlrun 1.7.0rc48__py3-none-any.whl → 1.7.0rc50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/formatters/run.py +3 -0
- mlrun/common/schemas/auth.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +0 -7
- mlrun/common/schemas/workflow.py +9 -2
- mlrun/data_types/data_types.py +1 -1
- mlrun/db/httpdb.py +11 -4
- mlrun/execution.py +7 -1
- mlrun/feature_store/retrieval/spark_merger.py +0 -4
- mlrun/model_monitoring/api.py +1 -12
- mlrun/model_monitoring/applications/__init__.py +1 -2
- mlrun/model_monitoring/applications/base.py +2 -182
- mlrun/model_monitoring/applications/context.py +2 -9
- mlrun/model_monitoring/applications/evidently_base.py +0 -74
- mlrun/model_monitoring/applications/histogram_data_drift.py +2 -2
- mlrun/model_monitoring/controller.py +45 -208
- mlrun/projects/operations.py +11 -8
- mlrun/projects/pipelines.py +12 -7
- mlrun/projects/project.py +1 -4
- mlrun/runtimes/nuclio/api_gateway.py +6 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc50.dist-info}/METADATA +99 -21
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc50.dist-info}/RECORD +26 -28
- mlrun/model_monitoring/application.py +0 -19
- mlrun/model_monitoring/evidently_application.py +0 -20
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc50.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc50.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc50.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc50.dist-info}/top_level.txt +0 -0
|
@@ -15,28 +15,22 @@
|
|
|
15
15
|
import concurrent.futures
|
|
16
16
|
import datetime
|
|
17
17
|
import json
|
|
18
|
-
import multiprocessing
|
|
19
18
|
import os
|
|
20
19
|
import re
|
|
21
20
|
from collections.abc import Iterator
|
|
22
|
-
from typing import
|
|
21
|
+
from typing import NamedTuple, Optional, Union, cast
|
|
23
22
|
|
|
24
23
|
import nuclio
|
|
25
24
|
|
|
26
25
|
import mlrun
|
|
27
26
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
28
27
|
import mlrun.data_types.infer
|
|
29
|
-
import mlrun.feature_store as fstore
|
|
30
28
|
import mlrun.model_monitoring.db.stores
|
|
31
|
-
from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
|
|
32
29
|
from mlrun.datastore import get_stream_pusher
|
|
33
|
-
from mlrun.datastore.targets import ParquetTarget
|
|
34
30
|
from mlrun.errors import err_to_str
|
|
35
31
|
from mlrun.model_monitoring.helpers import (
|
|
36
32
|
_BatchDict,
|
|
37
33
|
batch_dict2timedelta,
|
|
38
|
-
calculate_inputs_statistics,
|
|
39
|
-
get_monitoring_parquet_path,
|
|
40
34
|
get_stream_path,
|
|
41
35
|
)
|
|
42
36
|
from mlrun.utils import datetime_now, logger
|
|
@@ -292,15 +286,9 @@ class MonitoringApplicationController:
|
|
|
292
286
|
)
|
|
293
287
|
|
|
294
288
|
self.model_monitoring_access_key = self._get_model_monitoring_access_key()
|
|
295
|
-
self.
|
|
296
|
-
self.
|
|
297
|
-
kind=mm_constants.FileTargetKind.APPS_PARQUET,
|
|
289
|
+
self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
290
|
+
project=self.project
|
|
298
291
|
)
|
|
299
|
-
self.storage_options = None
|
|
300
|
-
if not mlrun.mlconf.is_ce_mode():
|
|
301
|
-
self._initialize_v3io_configurations()
|
|
302
|
-
elif self.parquet_directory.startswith("s3://"):
|
|
303
|
-
self.storage_options = mlrun.mlconf.get_s3_storage_options()
|
|
304
292
|
|
|
305
293
|
@staticmethod
|
|
306
294
|
def _get_model_monitoring_access_key() -> Optional[str]:
|
|
@@ -310,12 +298,6 @@ class MonitoringApplicationController:
|
|
|
310
298
|
access_key = mlrun.mlconf.get_v3io_access_key()
|
|
311
299
|
return access_key
|
|
312
300
|
|
|
313
|
-
def _initialize_v3io_configurations(self) -> None:
|
|
314
|
-
self.storage_options = dict(
|
|
315
|
-
v3io_access_key=self.model_monitoring_access_key,
|
|
316
|
-
v3io_api=mlrun.mlconf.v3io_api,
|
|
317
|
-
)
|
|
318
|
-
|
|
319
301
|
def run(self) -> None:
|
|
320
302
|
"""
|
|
321
303
|
Main method for run all the relevant monitoring applications on each endpoint.
|
|
@@ -367,11 +349,8 @@ class MonitoringApplicationController:
|
|
|
367
349
|
)
|
|
368
350
|
return
|
|
369
351
|
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
370
|
-
with concurrent.futures.
|
|
352
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
371
353
|
max_workers=min(len(endpoints), 10),
|
|
372
|
-
# On Linux, the default is "fork" (this is set to change in Python 3.14), which inherits the current heap
|
|
373
|
-
# and resources (such as sockets), which is not what we want (ML-7160)
|
|
374
|
-
mp_context=multiprocessing.get_context("spawn"),
|
|
375
354
|
) as pool:
|
|
376
355
|
for endpoint in endpoints:
|
|
377
356
|
if (
|
|
@@ -395,13 +374,10 @@ class MonitoringApplicationController:
|
|
|
395
374
|
applications_names=applications_names,
|
|
396
375
|
batch_window_generator=self._batch_window_generator,
|
|
397
376
|
project=self.project,
|
|
398
|
-
parquet_directory=self.parquet_directory,
|
|
399
|
-
storage_options=self.storage_options,
|
|
400
377
|
model_monitoring_access_key=self.model_monitoring_access_key,
|
|
378
|
+
tsdb_connector=self.tsdb_connector,
|
|
401
379
|
)
|
|
402
380
|
|
|
403
|
-
self._delete_old_parquet(endpoints=endpoints)
|
|
404
|
-
|
|
405
381
|
@classmethod
|
|
406
382
|
def model_endpoint_process(
|
|
407
383
|
cls,
|
|
@@ -409,9 +385,8 @@ class MonitoringApplicationController:
|
|
|
409
385
|
applications_names: list[str],
|
|
410
386
|
batch_window_generator: _BatchWindowGenerator,
|
|
411
387
|
project: str,
|
|
412
|
-
parquet_directory: str,
|
|
413
|
-
storage_options: dict,
|
|
414
388
|
model_monitoring_access_key: str,
|
|
389
|
+
tsdb_connector: mlrun.model_monitoring.db.tsdb.TSDBConnector,
|
|
415
390
|
) -> None:
|
|
416
391
|
"""
|
|
417
392
|
Process a model endpoint and trigger the monitoring applications. This function running on different process
|
|
@@ -422,16 +397,13 @@ class MonitoringApplicationController:
|
|
|
422
397
|
:param applications_names: (list[str]) List of application names to push results to.
|
|
423
398
|
:param batch_window_generator: (_BatchWindowGenerator) An object that generates _BatchWindow objects.
|
|
424
399
|
:param project: (str) Project name.
|
|
425
|
-
:param parquet_directory: (str) Directory to store application parquet files
|
|
426
|
-
:param storage_options: (dict) Storage options for writing ParquetTarget.
|
|
427
400
|
:param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
|
|
401
|
+
:param tsdb_connector: (mlrun.model_monitoring.db.tsdb.TSDBConnector) TSDB connector
|
|
428
402
|
"""
|
|
429
403
|
endpoint_id = endpoint[mm_constants.EventFieldType.UID]
|
|
404
|
+
# if false the endpoint represent batch infer step.
|
|
405
|
+
has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
|
|
430
406
|
try:
|
|
431
|
-
m_fs = fstore.get_feature_set(
|
|
432
|
-
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
433
|
-
)
|
|
434
|
-
|
|
435
407
|
for application in applications_names:
|
|
436
408
|
batch_window = batch_window_generator.get_batch_window(
|
|
437
409
|
project=project,
|
|
@@ -439,158 +411,70 @@ class MonitoringApplicationController:
|
|
|
439
411
|
application=application,
|
|
440
412
|
first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
|
|
441
413
|
last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
|
|
442
|
-
has_stream=
|
|
414
|
+
has_stream=has_stream,
|
|
443
415
|
)
|
|
444
416
|
|
|
445
417
|
for start_infer_time, end_infer_time in batch_window.get_intervals():
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
418
|
+
prediction_metric = tsdb_connector.read_predictions(
|
|
419
|
+
endpoint_id=endpoint_id,
|
|
420
|
+
start=start_infer_time,
|
|
421
|
+
end=end_infer_time,
|
|
422
|
+
)
|
|
423
|
+
if not prediction_metric.data and has_stream:
|
|
424
|
+
logger.info(
|
|
425
|
+
"No data found for the given interval",
|
|
426
|
+
start=start_infer_time,
|
|
427
|
+
end=end_infer_time,
|
|
428
|
+
endpoint_id=endpoint_id,
|
|
429
|
+
)
|
|
430
|
+
else:
|
|
431
|
+
logger.info(
|
|
432
|
+
"Data found for the given interval",
|
|
433
|
+
start=start_infer_time,
|
|
434
|
+
end=end_infer_time,
|
|
451
435
|
endpoint_id=endpoint_id,
|
|
436
|
+
)
|
|
437
|
+
cls._push_to_applications(
|
|
452
438
|
start_infer_time=start_infer_time,
|
|
453
439
|
end_infer_time=end_infer_time,
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
df = offline_response.to_dataframe()
|
|
460
|
-
parquet_target_path = offline_response.vector.get_target_path()
|
|
461
|
-
|
|
462
|
-
if len(df) == 0:
|
|
463
|
-
logger.info(
|
|
464
|
-
"During this time window, the endpoint has not received any data",
|
|
465
|
-
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
466
|
-
start_time=start_infer_time,
|
|
467
|
-
end_time=end_infer_time,
|
|
468
|
-
)
|
|
469
|
-
continue
|
|
470
|
-
|
|
471
|
-
except FileNotFoundError:
|
|
472
|
-
logger.warn(
|
|
473
|
-
"No parquets were written yet",
|
|
474
|
-
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
440
|
+
endpoint_id=endpoint_id,
|
|
441
|
+
project=project,
|
|
442
|
+
applications_names=[application],
|
|
443
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
475
444
|
)
|
|
476
|
-
continue
|
|
477
|
-
|
|
478
|
-
# Get the timestamp of the latest request:
|
|
479
|
-
latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
|
|
480
|
-
|
|
481
|
-
# Get the feature stats from the model endpoint for reference data
|
|
482
|
-
feature_stats = json.loads(
|
|
483
|
-
endpoint[mm_constants.EventFieldType.FEATURE_STATS]
|
|
484
|
-
)
|
|
485
|
-
|
|
486
|
-
# Pad the original feature stats to accommodate current
|
|
487
|
-
# data out of the original range (unless already padded)
|
|
488
|
-
pad_features_hist(FeatureStats(feature_stats))
|
|
489
|
-
|
|
490
|
-
# Get the current stats:
|
|
491
|
-
current_stats = calculate_inputs_statistics(
|
|
492
|
-
sample_set_statistics=feature_stats, inputs=df
|
|
493
|
-
)
|
|
494
|
-
# end - TODO : delete in 1.9.0 (V1 app deprecation)
|
|
495
|
-
cls._push_to_applications(
|
|
496
|
-
current_stats=current_stats,
|
|
497
|
-
feature_stats=feature_stats,
|
|
498
|
-
start_infer_time=start_infer_time,
|
|
499
|
-
end_infer_time=end_infer_time,
|
|
500
|
-
endpoint_id=endpoint_id,
|
|
501
|
-
latest_request=latest_request,
|
|
502
|
-
project=project,
|
|
503
|
-
applications_names=[application],
|
|
504
|
-
model_monitoring_access_key=model_monitoring_access_key,
|
|
505
|
-
parquet_target_path=parquet_target_path,
|
|
506
|
-
)
|
|
507
445
|
except Exception:
|
|
508
446
|
logger.exception(
|
|
509
447
|
"Encountered an exception",
|
|
510
448
|
endpoint_id=endpoint[mm_constants.EventFieldType.UID],
|
|
511
449
|
)
|
|
512
450
|
|
|
513
|
-
def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
|
|
514
|
-
"""
|
|
515
|
-
Delete application parquets older than the argument days.
|
|
516
|
-
|
|
517
|
-
:param endpoints: A list of dictionaries of model endpoints records.
|
|
518
|
-
"""
|
|
519
|
-
if self.parquet_directory.startswith("v3io:///"):
|
|
520
|
-
# create fs with access to the user side (under projects)
|
|
521
|
-
store, _, _ = mlrun.store_manager.get_or_create_store(
|
|
522
|
-
self.parquet_directory,
|
|
523
|
-
{"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
|
|
524
|
-
)
|
|
525
|
-
fs = store.filesystem
|
|
526
|
-
|
|
527
|
-
# calculate time threshold (keep only files from the last 24 hours)
|
|
528
|
-
time_to_keep = (
|
|
529
|
-
datetime.datetime.now(tz=datetime.timezone.utc)
|
|
530
|
-
- datetime.timedelta(days=days)
|
|
531
|
-
).timestamp()
|
|
532
|
-
|
|
533
|
-
for endpoint in endpoints:
|
|
534
|
-
try:
|
|
535
|
-
apps_parquet_directories = fs.listdir(
|
|
536
|
-
path=f"{self.parquet_directory}"
|
|
537
|
-
f"/key={endpoint[mm_constants.EventFieldType.UID]}"
|
|
538
|
-
)
|
|
539
|
-
for directory in apps_parquet_directories:
|
|
540
|
-
if directory["mtime"] < time_to_keep:
|
|
541
|
-
# Delete files
|
|
542
|
-
fs.rm(path=directory["name"], recursive=True)
|
|
543
|
-
# Delete directory
|
|
544
|
-
fs.rmdir(path=directory["name"])
|
|
545
|
-
except FileNotFoundError:
|
|
546
|
-
logger.info(
|
|
547
|
-
"Application parquet directory is empty, "
|
|
548
|
-
"probably parquets have not yet been created for this app",
|
|
549
|
-
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
550
|
-
path=f"{self.parquet_directory}"
|
|
551
|
-
f"/key={endpoint[mm_constants.EventFieldType.UID]}",
|
|
552
|
-
)
|
|
553
|
-
|
|
554
451
|
@staticmethod
|
|
555
452
|
def _push_to_applications(
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
project,
|
|
563
|
-
applications_names,
|
|
564
|
-
model_monitoring_access_key,
|
|
565
|
-
parquet_target_path,
|
|
453
|
+
start_infer_time: datetime.datetime,
|
|
454
|
+
end_infer_time: datetime.datetime,
|
|
455
|
+
endpoint_id: str,
|
|
456
|
+
project: str,
|
|
457
|
+
applications_names: list[str],
|
|
458
|
+
model_monitoring_access_key: str,
|
|
566
459
|
):
|
|
567
460
|
"""
|
|
568
461
|
Pushes data to multiple stream applications.
|
|
569
462
|
|
|
570
|
-
:param
|
|
571
|
-
:param
|
|
572
|
-
:param
|
|
573
|
-
:param
|
|
574
|
-
:param
|
|
575
|
-
:param
|
|
576
|
-
:param project: mlrun Project name.
|
|
577
|
-
:param applications_names: List of application names to which data will be pushed.
|
|
463
|
+
:param start_infer_time: The beginning of the infer interval window.
|
|
464
|
+
:param end_infer_time: The end of the infer interval window.
|
|
465
|
+
:param endpoint_id: Identifier for the model endpoint.
|
|
466
|
+
:param project: mlrun Project name.
|
|
467
|
+
:param applications_names: List of application names to which data will be pushed.
|
|
468
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring process.
|
|
578
469
|
|
|
579
470
|
"""
|
|
580
|
-
|
|
581
471
|
data = {
|
|
582
|
-
mm_constants.ApplicationEvent.CURRENT_STATS: json.dumps(current_stats),
|
|
583
|
-
mm_constants.ApplicationEvent.FEATURE_STATS: json.dumps(feature_stats),
|
|
584
|
-
mm_constants.ApplicationEvent.SAMPLE_PARQUET_PATH: parquet_target_path,
|
|
585
472
|
mm_constants.ApplicationEvent.START_INFER_TIME: start_infer_time.isoformat(
|
|
586
473
|
sep=" ", timespec="microseconds"
|
|
587
474
|
),
|
|
588
475
|
mm_constants.ApplicationEvent.END_INFER_TIME: end_infer_time.isoformat(
|
|
589
476
|
sep=" ", timespec="microseconds"
|
|
590
477
|
),
|
|
591
|
-
mm_constants.ApplicationEvent.LAST_REQUEST: latest_request.isoformat(
|
|
592
|
-
sep=" ", timespec="microseconds"
|
|
593
|
-
),
|
|
594
478
|
mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
|
|
595
479
|
mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
|
|
596
480
|
project=project,
|
|
@@ -608,53 +492,6 @@ class MonitoringApplicationController:
|
|
|
608
492
|
[data]
|
|
609
493
|
)
|
|
610
494
|
|
|
611
|
-
@staticmethod
|
|
612
|
-
def _get_sample_df(
|
|
613
|
-
feature_set: mlrun.common.schemas.FeatureSet,
|
|
614
|
-
endpoint_id: str,
|
|
615
|
-
start_infer_time: datetime.datetime,
|
|
616
|
-
end_infer_time: datetime.datetime,
|
|
617
|
-
parquet_directory: str,
|
|
618
|
-
storage_options: dict,
|
|
619
|
-
application_name: str,
|
|
620
|
-
) -> mlrun.feature_store.OfflineVectorResponse:
|
|
621
|
-
"""
|
|
622
|
-
Retrieves a sample DataFrame of the current input according to the provided infer interval window.
|
|
623
|
-
|
|
624
|
-
:param feature_set: The main feature set.
|
|
625
|
-
:param endpoint_id: Identifier for the model endpoint.
|
|
626
|
-
:param start_infer_time: The beginning of the infer interval window.
|
|
627
|
-
:param end_infer_time: The end of the infer interval window.
|
|
628
|
-
:param parquet_directory: Directory where Parquet files are stored.
|
|
629
|
-
:param storage_options: Storage options for accessing the data.
|
|
630
|
-
:param application_name: Current application name.
|
|
631
|
-
|
|
632
|
-
:return: OfflineVectorResponse that can be used for generating a sample DataFrame for the specified endpoint.
|
|
633
|
-
|
|
634
|
-
"""
|
|
635
|
-
features = [f"{feature_set.metadata.name}.*"]
|
|
636
|
-
vector = fstore.FeatureVector(
|
|
637
|
-
name=f"{endpoint_id}_vector",
|
|
638
|
-
features=features,
|
|
639
|
-
with_indexes=True,
|
|
640
|
-
)
|
|
641
|
-
vector.metadata.tag = application_name
|
|
642
|
-
vector.feature_set_objects = {feature_set.metadata.name: feature_set}
|
|
643
|
-
|
|
644
|
-
# get offline features based on application start and end time.
|
|
645
|
-
# store the result parquet by partitioning by controller end processing time
|
|
646
|
-
offline_response = vector.get_offline_features(
|
|
647
|
-
start_time=start_infer_time,
|
|
648
|
-
end_time=end_infer_time,
|
|
649
|
-
timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
|
|
650
|
-
target=ParquetTarget(
|
|
651
|
-
path=parquet_directory
|
|
652
|
-
+ f"/key={endpoint_id}/{int(start_infer_time.timestamp())}/{application_name}.parquet",
|
|
653
|
-
storage_options=storage_options,
|
|
654
|
-
),
|
|
655
|
-
)
|
|
656
|
-
return offline_response
|
|
657
|
-
|
|
658
495
|
|
|
659
496
|
def handler(context: nuclio.Context, event: nuclio.Event) -> None:
|
|
660
497
|
"""
|
mlrun/projects/operations.py
CHANGED
|
@@ -15,10 +15,13 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
import mlrun_pipelines.common.models
|
|
19
|
+
import mlrun_pipelines.models
|
|
19
20
|
|
|
20
21
|
import mlrun
|
|
21
22
|
import mlrun.common.constants as mlrun_constants
|
|
23
|
+
import mlrun.common.schemas.function
|
|
24
|
+
import mlrun.common.schemas.workflow
|
|
22
25
|
from mlrun.utils import hub_prefix
|
|
23
26
|
|
|
24
27
|
from .pipelines import enrich_function_object, pipeline_context
|
|
@@ -49,7 +52,7 @@ def _get_engine_and_function(function, project=None):
|
|
|
49
52
|
function = enrich_function_object(project, function, copy_function=False)
|
|
50
53
|
|
|
51
54
|
if not pipeline_context.workflow:
|
|
52
|
-
return
|
|
55
|
+
return mlrun.common.schemas.workflow.EngineType.LOCAL, function
|
|
53
56
|
|
|
54
57
|
return pipeline_context.workflow.engine, function
|
|
55
58
|
|
|
@@ -78,7 +81,7 @@ def run_function(
|
|
|
78
81
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
79
82
|
builder_env: Optional[list] = None,
|
|
80
83
|
reset_on_run: Optional[bool] = None,
|
|
81
|
-
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
84
|
+
) -> Union[mlrun.model.RunObject, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
82
85
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
83
86
|
|
|
84
87
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -186,7 +189,7 @@ def run_function(
|
|
|
186
189
|
)
|
|
187
190
|
task.spec.verbose = task.spec.verbose or verbose
|
|
188
191
|
|
|
189
|
-
if engine ==
|
|
192
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
190
193
|
if schedule:
|
|
191
194
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
192
195
|
"Scheduling jobs is not supported when running a workflow with the kfp engine."
|
|
@@ -266,7 +269,7 @@ def build_function(
|
|
|
266
269
|
overwrite_build_params: bool = False,
|
|
267
270
|
extra_args: str = None,
|
|
268
271
|
force_build: bool = False,
|
|
269
|
-
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
272
|
+
) -> Union[BuildStatus, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
270
273
|
"""deploy ML function, build container with its dependencies
|
|
271
274
|
|
|
272
275
|
:param function: Name of the function (in the project) or function object
|
|
@@ -302,7 +305,7 @@ def build_function(
|
|
|
302
305
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
303
306
|
"Cannot build use deploy_function()"
|
|
304
307
|
)
|
|
305
|
-
if engine ==
|
|
308
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
306
309
|
if overwrite_build_params:
|
|
307
310
|
function.spec.build.commands = None
|
|
308
311
|
if requirements or requirements_file:
|
|
@@ -375,7 +378,7 @@ def deploy_function(
|
|
|
375
378
|
builder_env: dict = None,
|
|
376
379
|
project_object=None,
|
|
377
380
|
mock: bool = None,
|
|
378
|
-
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
381
|
+
) -> Union[DeployStatus, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
379
382
|
"""deploy real-time (nuclio based) functions
|
|
380
383
|
|
|
381
384
|
:param function: name of the function (in the project) or function object
|
|
@@ -392,7 +395,7 @@ def deploy_function(
|
|
|
392
395
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
393
396
|
"deploy is used with real-time functions, for other kinds use build_function()"
|
|
394
397
|
)
|
|
395
|
-
if engine ==
|
|
398
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
396
399
|
return function.deploy_step(models=models, env=env, tag=tag, verbose=verbose)
|
|
397
400
|
else:
|
|
398
401
|
if env:
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -27,6 +27,8 @@ import mlrun_pipelines.utils
|
|
|
27
27
|
import mlrun
|
|
28
28
|
import mlrun.common.runtimes.constants
|
|
29
29
|
import mlrun.common.schemas
|
|
30
|
+
import mlrun.common.schemas.function
|
|
31
|
+
import mlrun.common.schemas.workflow
|
|
30
32
|
import mlrun.utils.notifications
|
|
31
33
|
from mlrun.errors import err_to_str
|
|
32
34
|
from mlrun.utils import (
|
|
@@ -44,21 +46,21 @@ from ..runtimes.pod import AutoMountType
|
|
|
44
46
|
|
|
45
47
|
def get_workflow_engine(engine_kind, local=False):
|
|
46
48
|
if pipeline_context.is_run_local(local):
|
|
47
|
-
if engine_kind ==
|
|
49
|
+
if engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
48
50
|
logger.warning(
|
|
49
51
|
"Running kubeflow pipeline locally, note some ops may not run locally!"
|
|
50
52
|
)
|
|
51
|
-
elif engine_kind ==
|
|
53
|
+
elif engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
|
|
52
54
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
53
55
|
"Cannot run a remote pipeline locally using `kind='remote'` and `local=True`. "
|
|
54
56
|
"in order to run a local pipeline remotely, please use `engine='remote:local'` instead"
|
|
55
57
|
)
|
|
56
58
|
return _LocalRunner
|
|
57
|
-
if not engine_kind or engine_kind ==
|
|
59
|
+
if not engine_kind or engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
58
60
|
return _KFPRunner
|
|
59
|
-
if engine_kind ==
|
|
61
|
+
if engine_kind == mlrun.common.schemas.workflow.EngineType.LOCAL:
|
|
60
62
|
return _LocalRunner
|
|
61
|
-
if engine_kind ==
|
|
63
|
+
if engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
|
|
62
64
|
return _RemoteRunner
|
|
63
65
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
64
66
|
f"Provided workflow engine is not supported. engine_kind={engine_kind}"
|
|
@@ -313,7 +315,11 @@ def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
|
313
315
|
|
|
314
316
|
|
|
315
317
|
def enrich_function_object(
|
|
316
|
-
project
|
|
318
|
+
project: mlrun.common.schemas.Project,
|
|
319
|
+
function: mlrun.runtimes.BaseRuntime,
|
|
320
|
+
decorator: typing.Callable = None,
|
|
321
|
+
copy_function: bool = True,
|
|
322
|
+
try_auto_mount: bool = True,
|
|
317
323
|
) -> mlrun.runtimes.BaseRuntime:
|
|
318
324
|
if hasattr(function, "_enriched"):
|
|
319
325
|
return function
|
|
@@ -354,7 +360,6 @@ def enrich_function_object(
|
|
|
354
360
|
f.enrich_runtime_spec(
|
|
355
361
|
project.spec.default_function_node_selector,
|
|
356
362
|
)
|
|
357
|
-
|
|
358
363
|
if try_auto_mount:
|
|
359
364
|
if (
|
|
360
365
|
decorator and AutoMountType.is_auto_modifier(decorator)
|
mlrun/projects/project.py
CHANGED
|
@@ -708,7 +708,7 @@ def _load_project_from_db(url, secrets, user_project=False):
|
|
|
708
708
|
|
|
709
709
|
def _delete_project_from_db(project_name, secrets, deletion_strategy):
|
|
710
710
|
db = mlrun.db.get_run_db(secrets=secrets)
|
|
711
|
-
|
|
711
|
+
db.delete_project(project_name, deletion_strategy=deletion_strategy)
|
|
712
712
|
|
|
713
713
|
|
|
714
714
|
def _load_project_file(url, name="", secrets=None, allow_cross_project=None):
|
|
@@ -1950,7 +1950,6 @@ class MlrunProject(ModelObj):
|
|
|
1950
1950
|
application_class: typing.Union[
|
|
1951
1951
|
str,
|
|
1952
1952
|
mm_app.ModelMonitoringApplicationBase,
|
|
1953
|
-
mm_app.ModelMonitoringApplicationBaseV2,
|
|
1954
1953
|
] = None,
|
|
1955
1954
|
name: str = None,
|
|
1956
1955
|
image: str = None,
|
|
@@ -2018,7 +2017,6 @@ class MlrunProject(ModelObj):
|
|
|
2018
2017
|
application_class: typing.Union[
|
|
2019
2018
|
str,
|
|
2020
2019
|
mm_app.ModelMonitoringApplicationBase,
|
|
2021
|
-
mm_app.ModelMonitoringApplicationBaseV2,
|
|
2022
2020
|
] = None,
|
|
2023
2021
|
name: str = None,
|
|
2024
2022
|
image: str = None,
|
|
@@ -2076,7 +2074,6 @@ class MlrunProject(ModelObj):
|
|
|
2076
2074
|
application_class: typing.Union[
|
|
2077
2075
|
str,
|
|
2078
2076
|
mm_app.ModelMonitoringApplicationBase,
|
|
2079
|
-
mm_app.ModelMonitoringApplicationBaseV2,
|
|
2080
2077
|
None,
|
|
2081
2078
|
] = None,
|
|
2082
2079
|
name: typing.Optional[str] = None,
|
|
@@ -22,6 +22,7 @@ from nuclio.auth import AuthKinds as NuclioAuthKinds
|
|
|
22
22
|
|
|
23
23
|
import mlrun
|
|
24
24
|
import mlrun.common.constants as mlrun_constants
|
|
25
|
+
import mlrun.common.helpers
|
|
25
26
|
import mlrun.common.schemas as schemas
|
|
26
27
|
import mlrun.common.types
|
|
27
28
|
from mlrun.model import ModelObj
|
|
@@ -202,8 +203,13 @@ class APIGatewaySpec(ModelObj):
|
|
|
202
203
|
self.project = project
|
|
203
204
|
self.ports = ports
|
|
204
205
|
|
|
206
|
+
self.enrich()
|
|
205
207
|
self.validate(project=project, functions=functions, canary=canary, ports=ports)
|
|
206
208
|
|
|
209
|
+
def enrich(self):
|
|
210
|
+
if self.path and not self.path.startswith("/"):
|
|
211
|
+
self.path = f"/{self.path}"
|
|
212
|
+
|
|
207
213
|
def validate(
|
|
208
214
|
self,
|
|
209
215
|
project: str,
|
mlrun/utils/version/version.json
CHANGED