mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +23 -21
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +148 -14
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +55 -12
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/document.py +378 -0
- mlrun/artifacts/manager.py +26 -17
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +1 -29
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +6 -2
- mlrun/common/schemas/alert.py +111 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +11 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -3
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +67 -14
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +92 -147
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +25 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +68 -10
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/data_types.py +1 -0
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +5 -3
- mlrun/data_types/to_pandas.py +11 -2
- mlrun/datastore/__init__.py +2 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +79 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +52 -51
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/targets.py +23 -22
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +229 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +213 -83
- mlrun/db/factory.py +0 -3
- mlrun/db/httpdb.py +1265 -387
- mlrun/db/nopdb.py +205 -74
- mlrun/errors.py +2 -2
- mlrun/execution.py +136 -50
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +41 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +117 -46
- mlrun/model_monitoring/__init__.py +4 -4
- mlrun/model_monitoring/api.py +72 -59
- mlrun/model_monitoring/applications/_application_steps.py +17 -17
- mlrun/model_monitoring/applications/base.py +165 -6
- mlrun/model_monitoring/applications/context.py +88 -37
- mlrun/model_monitoring/applications/evidently_base.py +0 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +207 -239
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/base.py +78 -25
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
- mlrun/model_monitoring/helpers.py +151 -49
- mlrun/model_monitoring/stream_processing.py +99 -283
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +48 -36
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +71 -36
- mlrun/projects/project.py +890 -220
- mlrun/run.py +53 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +19 -17
- mlrun/runtimes/nuclio/serving.py +18 -13
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +185 -172
- mlrun/serving/server.py +7 -1
- mlrun/serving/states.py +97 -78
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +105 -72
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/helpers.py +63 -19
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +86 -44
- mlrun/utils/regex.py +11 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +29 -24
- mlrun-1.8.0rc11.dist-info/RECORD +347 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.1rc10.dist-info/RECORD +0 -351
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0
mlrun/execution.py
CHANGED
|
@@ -15,8 +15,9 @@
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import uuid
|
|
18
|
+
import warnings
|
|
18
19
|
from copy import deepcopy
|
|
19
|
-
from typing import Union
|
|
20
|
+
from typing import Optional, Union, cast
|
|
20
21
|
|
|
21
22
|
import numpy as np
|
|
22
23
|
import yaml
|
|
@@ -25,17 +26,23 @@ from dateutil import parser
|
|
|
25
26
|
import mlrun
|
|
26
27
|
import mlrun.common.constants as mlrun_constants
|
|
27
28
|
import mlrun.common.formatters
|
|
28
|
-
from mlrun.artifacts import
|
|
29
|
+
from mlrun.artifacts import (
|
|
30
|
+
Artifact,
|
|
31
|
+
DatasetArtifact,
|
|
32
|
+
DocumentArtifact,
|
|
33
|
+
DocumentLoaderSpec,
|
|
34
|
+
ModelArtifact,
|
|
35
|
+
)
|
|
29
36
|
from mlrun.datastore.store_resources import get_store_resource
|
|
30
37
|
from mlrun.errors import MLRunInvalidArgumentError
|
|
31
38
|
|
|
32
|
-
from .artifacts import DatasetArtifact
|
|
33
39
|
from .artifacts.manager import ArtifactManager, dict_to_artifact, extend_artifact_path
|
|
34
40
|
from .datastore import store_manager
|
|
35
41
|
from .features import Feature
|
|
36
42
|
from .model import HyperParamOptions
|
|
37
43
|
from .secrets import SecretsStore
|
|
38
44
|
from .utils import (
|
|
45
|
+
Logger,
|
|
39
46
|
RunKeys,
|
|
40
47
|
dict_to_json,
|
|
41
48
|
dict_to_yaml,
|
|
@@ -152,7 +159,7 @@ class MLClientCtx:
|
|
|
152
159
|
return self._project
|
|
153
160
|
|
|
154
161
|
@property
|
|
155
|
-
def logger(self):
|
|
162
|
+
def logger(self) -> Logger:
|
|
156
163
|
"""Built-in logger interface
|
|
157
164
|
|
|
158
165
|
Example::
|
|
@@ -194,6 +201,11 @@ class MLClientCtx:
|
|
|
194
201
|
"""Dictionary of artifacts (read-only)"""
|
|
195
202
|
return deepcopy(self._artifacts_manager.artifact_list())
|
|
196
203
|
|
|
204
|
+
@property
|
|
205
|
+
def artifact_uris(self):
|
|
206
|
+
"""Dictionary of artifact URIs (read-only)"""
|
|
207
|
+
return deepcopy(self._artifacts_manager.artifact_uris)
|
|
208
|
+
|
|
197
209
|
@property
|
|
198
210
|
def in_path(self):
|
|
199
211
|
"""Default input path for data objects"""
|
|
@@ -296,7 +308,7 @@ class MLClientCtx:
|
|
|
296
308
|
)
|
|
297
309
|
self._parent.log_iteration_results(self._iteration, None, self.to_dict())
|
|
298
310
|
|
|
299
|
-
def get_store_resource(self, url, secrets: dict = None):
|
|
311
|
+
def get_store_resource(self, url, secrets: Optional[dict] = None):
|
|
300
312
|
"""Get mlrun data resource (feature set/vector, artifact, item) from url.
|
|
301
313
|
|
|
302
314
|
Example::
|
|
@@ -317,7 +329,7 @@ class MLClientCtx:
|
|
|
317
329
|
data_store_secrets=secrets,
|
|
318
330
|
)
|
|
319
331
|
|
|
320
|
-
def get_dataitem(self, url, secrets: dict = None):
|
|
332
|
+
def get_dataitem(self, url, secrets: Optional[dict] = None):
|
|
321
333
|
"""Get mlrun dataitem from url
|
|
322
334
|
|
|
323
335
|
Example::
|
|
@@ -425,8 +437,11 @@ class MLClientCtx:
|
|
|
425
437
|
self._results = status.get("results", self._results)
|
|
426
438
|
for artifact in status.get("artifacts", []):
|
|
427
439
|
artifact_obj = dict_to_artifact(artifact)
|
|
428
|
-
key =
|
|
429
|
-
|
|
440
|
+
self._artifacts_manager.artifact_uris[artifact_obj.key] = (
|
|
441
|
+
artifact_obj.uri
|
|
442
|
+
)
|
|
443
|
+
for key, uri in status.get("artifact_uris", {}).items():
|
|
444
|
+
self._artifacts_manager.artifact_uris[key] = uri
|
|
430
445
|
self._state = status.get("state", self._state)
|
|
431
446
|
|
|
432
447
|
# No need to store the run for every worker
|
|
@@ -486,11 +501,11 @@ class MLClientCtx:
|
|
|
486
501
|
return default
|
|
487
502
|
return self._parameters[key]
|
|
488
503
|
|
|
489
|
-
def get_project_object(self):
|
|
504
|
+
def get_project_object(self) -> Optional["mlrun.MlrunProject"]:
|
|
490
505
|
"""
|
|
491
506
|
Get the MLRun project object by the project name set in the context.
|
|
492
507
|
|
|
493
|
-
:
|
|
508
|
+
:returns: The project object or None if it couldn't be retrieved.
|
|
494
509
|
"""
|
|
495
510
|
return self._load_project_object()
|
|
496
511
|
|
|
@@ -573,22 +588,25 @@ class MLClientCtx:
|
|
|
573
588
|
"""Reserved for internal use"""
|
|
574
589
|
|
|
575
590
|
if best:
|
|
591
|
+
# Recreate the best iteration context for the interface of getting its artifacts
|
|
592
|
+
best_context = MLClientCtx.from_dict(
|
|
593
|
+
task, store_run=False, include_status=True
|
|
594
|
+
)
|
|
576
595
|
self._results["best_iteration"] = best
|
|
577
|
-
for
|
|
578
|
-
self._results[
|
|
579
|
-
for
|
|
580
|
-
self._artifacts_manager.
|
|
581
|
-
|
|
582
|
-
)
|
|
596
|
+
for key, result in best_context.results.items():
|
|
597
|
+
self._results[key] = result
|
|
598
|
+
for key, artifact_uri in best_context.artifact_uris.items():
|
|
599
|
+
self._artifacts_manager.artifact_uris[key] = artifact_uri
|
|
600
|
+
artifact = best_context.get_artifact(key)
|
|
583
601
|
self._artifacts_manager.link_artifact(
|
|
584
602
|
self.project,
|
|
585
603
|
self.name,
|
|
586
604
|
self.tag,
|
|
587
|
-
|
|
605
|
+
key,
|
|
588
606
|
self.iteration,
|
|
589
|
-
artifact
|
|
607
|
+
artifact.target_path,
|
|
608
|
+
db_key=artifact.db_key,
|
|
590
609
|
link_iteration=best,
|
|
591
|
-
db_key=artifact["spec"]["db_key"],
|
|
592
610
|
)
|
|
593
611
|
|
|
594
612
|
if summary is not None:
|
|
@@ -611,7 +629,7 @@ class MLClientCtx:
|
|
|
611
629
|
format=None,
|
|
612
630
|
db_key=None,
|
|
613
631
|
**kwargs,
|
|
614
|
-
):
|
|
632
|
+
) -> Artifact:
|
|
615
633
|
"""Log an output artifact and optionally upload it to datastore
|
|
616
634
|
|
|
617
635
|
Example::
|
|
@@ -679,9 +697,9 @@ class MLClientCtx:
|
|
|
679
697
|
db_key=None,
|
|
680
698
|
target_path="",
|
|
681
699
|
extra_data=None,
|
|
682
|
-
label_column: str = None,
|
|
700
|
+
label_column: Optional[str] = None,
|
|
683
701
|
**kwargs,
|
|
684
|
-
):
|
|
702
|
+
) -> DatasetArtifact:
|
|
685
703
|
"""Log a dataset artifact and optionally upload it to datastore
|
|
686
704
|
|
|
687
705
|
If the dataset exists with the same key and tag, it will be overwritten.
|
|
@@ -719,7 +737,7 @@ class MLClientCtx:
|
|
|
719
737
|
:param db_key: The key to use in the artifact DB table, by default its run name + '_' + key
|
|
720
738
|
db_key=False will not register it in the artifacts table
|
|
721
739
|
|
|
722
|
-
:returns:
|
|
740
|
+
:returns: Dataset artifact object
|
|
723
741
|
"""
|
|
724
742
|
ds = DatasetArtifact(
|
|
725
743
|
key,
|
|
@@ -732,16 +750,19 @@ class MLClientCtx:
|
|
|
732
750
|
**kwargs,
|
|
733
751
|
)
|
|
734
752
|
|
|
735
|
-
item =
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
753
|
+
item = cast(
|
|
754
|
+
DatasetArtifact,
|
|
755
|
+
self._artifacts_manager.log_artifact(
|
|
756
|
+
self,
|
|
757
|
+
ds,
|
|
758
|
+
local_path=local_path,
|
|
759
|
+
artifact_path=extend_artifact_path(artifact_path, self.artifact_path),
|
|
760
|
+
target_path=target_path,
|
|
761
|
+
tag=tag,
|
|
762
|
+
upload=upload,
|
|
763
|
+
db_key=db_key,
|
|
764
|
+
labels=labels,
|
|
765
|
+
),
|
|
745
766
|
)
|
|
746
767
|
self._update_run()
|
|
747
768
|
return item
|
|
@@ -760,16 +781,16 @@ class MLClientCtx:
|
|
|
760
781
|
artifact_path=None,
|
|
761
782
|
upload=True,
|
|
762
783
|
labels=None,
|
|
763
|
-
inputs: list[Feature] = None,
|
|
764
|
-
outputs: list[Feature] = None,
|
|
765
|
-
feature_vector: str = None,
|
|
766
|
-
feature_weights: list = None,
|
|
784
|
+
inputs: Optional[list[Feature]] = None,
|
|
785
|
+
outputs: Optional[list[Feature]] = None,
|
|
786
|
+
feature_vector: Optional[str] = None,
|
|
787
|
+
feature_weights: Optional[list] = None,
|
|
767
788
|
training_set=None,
|
|
768
|
-
label_column: Union[str, list] = None,
|
|
789
|
+
label_column: Optional[Union[str, list]] = None,
|
|
769
790
|
extra_data=None,
|
|
770
791
|
db_key=None,
|
|
771
792
|
**kwargs,
|
|
772
|
-
):
|
|
793
|
+
) -> ModelArtifact:
|
|
773
794
|
"""Log a model artifact and optionally upload it to datastore
|
|
774
795
|
|
|
775
796
|
Example::
|
|
@@ -788,7 +809,7 @@ class MLClientCtx:
|
|
|
788
809
|
:param key: Artifact key or artifact class ()
|
|
789
810
|
:param body: Will use the body as the artifact content
|
|
790
811
|
:param model_file: Path to the local model file we upload (see also model_dir)
|
|
791
|
-
or to a model file data url (e.g. http://host/path/model.pkl)
|
|
812
|
+
or to a model file data url (e.g. `http://host/path/model.pkl`)
|
|
792
813
|
:param model_dir: Path to the local dir holding the model file and extra files
|
|
793
814
|
:param artifact_path: Target artifact path (when not using the default)
|
|
794
815
|
to define a subpath under the default location use:
|
|
@@ -811,7 +832,7 @@ class MLClientCtx:
|
|
|
811
832
|
:param db_key: The key to use in the artifact DB table, by default its run name + '_' + key
|
|
812
833
|
db_key=False will not register it in the artifacts table
|
|
813
834
|
|
|
814
|
-
:returns:
|
|
835
|
+
:returns: Model artifact object
|
|
815
836
|
"""
|
|
816
837
|
|
|
817
838
|
if training_set is not None and inputs:
|
|
@@ -838,13 +859,63 @@ class MLClientCtx:
|
|
|
838
859
|
if training_set is not None:
|
|
839
860
|
model.infer_from_df(training_set, label_column)
|
|
840
861
|
|
|
862
|
+
item = cast(
|
|
863
|
+
ModelArtifact,
|
|
864
|
+
self._artifacts_manager.log_artifact(
|
|
865
|
+
self,
|
|
866
|
+
model,
|
|
867
|
+
artifact_path=extend_artifact_path(artifact_path, self.artifact_path),
|
|
868
|
+
tag=tag,
|
|
869
|
+
upload=upload,
|
|
870
|
+
db_key=db_key,
|
|
871
|
+
labels=labels,
|
|
872
|
+
),
|
|
873
|
+
)
|
|
874
|
+
self._update_run()
|
|
875
|
+
return item
|
|
876
|
+
|
|
877
|
+
def log_document(
|
|
878
|
+
self,
|
|
879
|
+
key: str,
|
|
880
|
+
tag: str = "",
|
|
881
|
+
local_path: str = "",
|
|
882
|
+
artifact_path: Optional[str] = None,
|
|
883
|
+
document_loader: DocumentLoaderSpec = DocumentLoaderSpec(),
|
|
884
|
+
upload: Optional[bool] = False,
|
|
885
|
+
labels: Optional[dict[str, str]] = None,
|
|
886
|
+
target_path: Optional[str] = None,
|
|
887
|
+
**kwargs,
|
|
888
|
+
) -> DocumentArtifact:
|
|
889
|
+
"""
|
|
890
|
+
Log a document as an artifact.
|
|
891
|
+
|
|
892
|
+
:param key: Artifact key
|
|
893
|
+
:param tag: Version tag
|
|
894
|
+
:param local_path: path to the local file we upload, will also be use
|
|
895
|
+
as the destination subpath (under "artifact_path")
|
|
896
|
+
:param artifact_path: Target artifact path (when not using the default)
|
|
897
|
+
to define a subpath under the default location use:
|
|
898
|
+
`artifact_path=context.artifact_subpath('data')`
|
|
899
|
+
:param document_loader: Spec to use to load the artifact as langchain document
|
|
900
|
+
:param upload: Whether to upload the artifact
|
|
901
|
+
:param labels: Key-value labels
|
|
902
|
+
:param target_path: Path to the local file
|
|
903
|
+
:param kwargs: Additional keyword arguments
|
|
904
|
+
:return: DocumentArtifact object
|
|
905
|
+
"""
|
|
906
|
+
doc_artifact = DocumentArtifact(
|
|
907
|
+
key=key,
|
|
908
|
+
original_source=local_path or target_path,
|
|
909
|
+
document_loader=document_loader,
|
|
910
|
+
**kwargs,
|
|
911
|
+
)
|
|
912
|
+
|
|
841
913
|
item = self._artifacts_manager.log_artifact(
|
|
842
914
|
self,
|
|
843
|
-
|
|
915
|
+
doc_artifact,
|
|
844
916
|
artifact_path=extend_artifact_path(artifact_path, self.artifact_path),
|
|
845
917
|
tag=tag,
|
|
846
918
|
upload=upload,
|
|
847
|
-
db_key=db_key,
|
|
848
919
|
labels=labels,
|
|
849
920
|
)
|
|
850
921
|
self._update_run()
|
|
@@ -852,10 +923,18 @@ class MLClientCtx:
|
|
|
852
923
|
|
|
853
924
|
def get_cached_artifact(self, key):
|
|
854
925
|
"""Return a logged artifact from cache (for potential updates)"""
|
|
855
|
-
|
|
926
|
+
warnings.warn(
|
|
927
|
+
"get_cached_artifact is deprecated in 1.8.0 and will be removed in 1.10.0. Use get_artifact instead.",
|
|
928
|
+
FutureWarning,
|
|
929
|
+
)
|
|
930
|
+
return self.get_artifact(key)
|
|
931
|
+
|
|
932
|
+
def get_artifact(self, key: str) -> Artifact:
|
|
933
|
+
artifact_uri = self._artifacts_manager.artifact_uris[key]
|
|
934
|
+
return self.get_store_resource(artifact_uri)
|
|
856
935
|
|
|
857
|
-
def update_artifact(self, artifact_object):
|
|
858
|
-
"""Update an artifact object in the
|
|
936
|
+
def update_artifact(self, artifact_object: Artifact):
|
|
937
|
+
"""Update an artifact object in the DB and the cached uri"""
|
|
859
938
|
self._artifacts_manager.update_artifact(self, artifact_object)
|
|
860
939
|
|
|
861
940
|
def commit(self, message: str = "", completed=False):
|
|
@@ -885,7 +964,12 @@ class MLClientCtx:
|
|
|
885
964
|
if completed and not self.iteration:
|
|
886
965
|
mlrun.runtimes.utils.global_context.set(None)
|
|
887
966
|
|
|
888
|
-
def set_state(
|
|
967
|
+
def set_state(
|
|
968
|
+
self,
|
|
969
|
+
execution_state: Optional[str] = None,
|
|
970
|
+
error: Optional[str] = None,
|
|
971
|
+
commit=True,
|
|
972
|
+
):
|
|
889
973
|
"""
|
|
890
974
|
Modify and store the execution state or mark an error and update the run state accordingly.
|
|
891
975
|
This method allows to set the run state to 'completed' in the DB which is discouraged.
|
|
@@ -1013,7 +1097,7 @@ class MLClientCtx:
|
|
|
1013
1097
|
set_if_not_none(struct["status"], "commit", self._commit)
|
|
1014
1098
|
set_if_not_none(struct["status"], "iterations", self._iteration_results)
|
|
1015
1099
|
|
|
1016
|
-
struct["status"][RunKeys.
|
|
1100
|
+
struct["status"][RunKeys.artifact_uris] = self._artifacts_manager.artifact_uris
|
|
1017
1101
|
self._data_stores.to_dict(struct["spec"])
|
|
1018
1102
|
return struct
|
|
1019
1103
|
|
|
@@ -1107,7 +1191,9 @@ class MLClientCtx:
|
|
|
1107
1191
|
set_if_not_none(struct, "status.commit", self._commit)
|
|
1108
1192
|
set_if_not_none(struct, "status.iterations", self._iteration_results)
|
|
1109
1193
|
|
|
1110
|
-
struct[f"status.{RunKeys.
|
|
1194
|
+
struct[f"status.{RunKeys.artifact_uris}"] = (
|
|
1195
|
+
self._artifacts_manager.artifact_uris
|
|
1196
|
+
)
|
|
1111
1197
|
return struct
|
|
1112
1198
|
|
|
1113
1199
|
def _init_dbs(self, rundb):
|
|
@@ -1121,7 +1207,7 @@ class MLClientCtx:
|
|
|
1121
1207
|
self._data_stores = store_manager.set(self._secrets_manager, db=self._rundb)
|
|
1122
1208
|
self._artifacts_manager = ArtifactManager(db=self._rundb)
|
|
1123
1209
|
|
|
1124
|
-
def _load_project_object(self):
|
|
1210
|
+
def _load_project_object(self) -> Optional["mlrun.MlrunProject"]:
|
|
1125
1211
|
if not self._project_object:
|
|
1126
1212
|
if not self._project:
|
|
1127
1213
|
self.logger.warning(
|
mlrun/feature_store/__init__.py
CHANGED
mlrun/feature_store/api.py
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
14
15
|
import copy
|
|
15
16
|
import importlib.util
|
|
16
17
|
import pathlib
|
|
@@ -99,21 +100,21 @@ def _features_to_vector_and_check_permissions(features, update_stats):
|
|
|
99
100
|
def get_offline_features(
|
|
100
101
|
feature_vector: Union[str, FeatureVector],
|
|
101
102
|
entity_rows=None,
|
|
102
|
-
entity_timestamp_column: str = None,
|
|
103
|
+
entity_timestamp_column: Optional[str] = None,
|
|
103
104
|
target: DataTargetBase = None,
|
|
104
105
|
run_config: RunConfig = None,
|
|
105
|
-
drop_columns: list[str] = None,
|
|
106
|
-
start_time: Union[str, datetime] = None,
|
|
107
|
-
end_time: Union[str, datetime] = None,
|
|
106
|
+
drop_columns: Optional[list[str]] = None,
|
|
107
|
+
start_time: Optional[Union[str, datetime]] = None,
|
|
108
|
+
end_time: Optional[Union[str, datetime]] = None,
|
|
108
109
|
with_indexes: bool = False,
|
|
109
110
|
update_stats: bool = False,
|
|
110
|
-
engine: str = None,
|
|
111
|
-
engine_args: dict = None,
|
|
112
|
-
query: str = None,
|
|
113
|
-
order_by: Union[str, list[str]] = None,
|
|
114
|
-
spark_service: str = None,
|
|
115
|
-
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
116
|
-
additional_filters: list = None,
|
|
111
|
+
engine: Optional[str] = None,
|
|
112
|
+
engine_args: Optional[dict] = None,
|
|
113
|
+
query: Optional[str] = None,
|
|
114
|
+
order_by: Optional[Union[str, list[str]]] = None,
|
|
115
|
+
spark_service: Optional[str] = None,
|
|
116
|
+
timestamp_for_filtering: Optional[Union[str, dict[str, str]]] = None,
|
|
117
|
+
additional_filters: Optional[list] = None,
|
|
117
118
|
):
|
|
118
119
|
"""retrieve offline feature vector results
|
|
119
120
|
|
|
@@ -209,20 +210,20 @@ def get_offline_features(
|
|
|
209
210
|
def _get_offline_features(
|
|
210
211
|
feature_vector: Union[str, FeatureVector],
|
|
211
212
|
entity_rows=None,
|
|
212
|
-
entity_timestamp_column: str = None,
|
|
213
|
+
entity_timestamp_column: Optional[str] = None,
|
|
213
214
|
target: DataTargetBase = None,
|
|
214
215
|
run_config: RunConfig = None,
|
|
215
|
-
drop_columns: list[str] = None,
|
|
216
|
-
start_time: Union[str, datetime] = None,
|
|
217
|
-
end_time: Union[str, datetime] = None,
|
|
216
|
+
drop_columns: Optional[list[str]] = None,
|
|
217
|
+
start_time: Optional[Union[str, datetime]] = None,
|
|
218
|
+
end_time: Optional[Union[str, datetime]] = None,
|
|
218
219
|
with_indexes: bool = False,
|
|
219
220
|
update_stats: bool = False,
|
|
220
|
-
engine: str = None,
|
|
221
|
-
engine_args: dict = None,
|
|
222
|
-
query: str = None,
|
|
223
|
-
order_by: Union[str, list[str]] = None,
|
|
224
|
-
spark_service: str = None,
|
|
225
|
-
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
221
|
+
engine: Optional[str] = None,
|
|
222
|
+
engine_args: Optional[dict] = None,
|
|
223
|
+
query: Optional[str] = None,
|
|
224
|
+
order_by: Optional[Union[str, list[str]]] = None,
|
|
225
|
+
spark_service: Optional[str] = None,
|
|
226
|
+
timestamp_for_filtering: Optional[Union[str, dict[str, str]]] = None,
|
|
226
227
|
additional_filters=None,
|
|
227
228
|
) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
|
|
228
229
|
if entity_rows is None and entity_timestamp_column is not None:
|
|
@@ -297,9 +298,9 @@ def get_online_feature_service(
|
|
|
297
298
|
feature_vector: Union[str, FeatureVector],
|
|
298
299
|
run_config: RunConfig = None,
|
|
299
300
|
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
300
|
-
impute_policy: dict = None,
|
|
301
|
+
impute_policy: Optional[dict] = None,
|
|
301
302
|
update_stats: bool = False,
|
|
302
|
-
entity_keys: list[str] = None,
|
|
303
|
+
entity_keys: Optional[list[str]] = None,
|
|
303
304
|
):
|
|
304
305
|
"""initialize and return online feature vector service api,
|
|
305
306
|
returns :py:class:`~mlrun.feature_store.OnlineVectorService`
|
|
@@ -378,9 +379,9 @@ def _get_online_feature_service(
|
|
|
378
379
|
feature_vector: Union[str, FeatureVector],
|
|
379
380
|
run_config: RunConfig = None,
|
|
380
381
|
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
381
|
-
impute_policy: dict = None,
|
|
382
|
+
impute_policy: Optional[dict] = None,
|
|
382
383
|
update_stats: bool = False,
|
|
383
|
-
entity_keys: list[str] = None,
|
|
384
|
+
entity_keys: Optional[list[str]] = None,
|
|
384
385
|
) -> OnlineVectorService:
|
|
385
386
|
if isinstance(feature_vector, FeatureVector):
|
|
386
387
|
update_stats = True
|
|
@@ -450,7 +451,7 @@ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
|
|
|
450
451
|
def ingest(
|
|
451
452
|
featureset: Union[FeatureSet, str] = None,
|
|
452
453
|
source=None,
|
|
453
|
-
targets: list[DataTargetBase] = None,
|
|
454
|
+
targets: Optional[list[DataTargetBase]] = None,
|
|
454
455
|
namespace=None,
|
|
455
456
|
return_df: bool = True,
|
|
456
457
|
infer_options: InferOptions = InferOptions.default(),
|
|
@@ -530,7 +531,7 @@ def ingest(
|
|
|
530
531
|
def _ingest(
|
|
531
532
|
featureset: Union[FeatureSet, str] = None,
|
|
532
533
|
source=None,
|
|
533
|
-
targets: list[DataTargetBase] = None,
|
|
534
|
+
targets: Optional[list[DataTargetBase]] = None,
|
|
534
535
|
namespace=None,
|
|
535
536
|
return_df: bool = True,
|
|
536
537
|
infer_options: InferOptions = InferOptions.default(),
|
|
@@ -783,11 +784,11 @@ def _ingest(
|
|
|
783
784
|
def preview(
|
|
784
785
|
featureset: FeatureSet,
|
|
785
786
|
source,
|
|
786
|
-
entity_columns: list = None,
|
|
787
|
+
entity_columns: Optional[list] = None,
|
|
787
788
|
namespace=None,
|
|
788
789
|
options: InferOptions = None,
|
|
789
790
|
verbose: bool = False,
|
|
790
|
-
sample_size: int = None,
|
|
791
|
+
sample_size: Optional[int] = None,
|
|
791
792
|
) -> pd.DataFrame:
|
|
792
793
|
"""run the ingestion pipeline with local DataFrame/file data and infer features schema and stats
|
|
793
794
|
|
|
@@ -825,11 +826,11 @@ def preview(
|
|
|
825
826
|
def _preview(
|
|
826
827
|
featureset: FeatureSet,
|
|
827
828
|
source,
|
|
828
|
-
entity_columns: list = None,
|
|
829
|
+
entity_columns: Optional[list] = None,
|
|
829
830
|
namespace=None,
|
|
830
831
|
options: InferOptions = None,
|
|
831
832
|
verbose: bool = False,
|
|
832
|
-
sample_size: int = None,
|
|
833
|
+
sample_size: Optional[int] = None,
|
|
833
834
|
) -> pd.DataFrame:
|
|
834
835
|
if isinstance(source, pd.DataFrame):
|
|
835
836
|
source = _rename_source_dataframe_columns(source)
|
|
@@ -895,8 +896,8 @@ def _preview(
|
|
|
895
896
|
def _run_ingestion_job(
|
|
896
897
|
featureset: Union[FeatureSet, str],
|
|
897
898
|
source: DataSource = None,
|
|
898
|
-
targets: list[DataTargetBase] = None,
|
|
899
|
-
name: str = None,
|
|
899
|
+
targets: Optional[list[DataTargetBase]] = None,
|
|
900
|
+
name: Optional[str] = None,
|
|
900
901
|
infer_options: InferOptions = InferOptions.default(),
|
|
901
902
|
run_config: RunConfig = None,
|
|
902
903
|
):
|
|
@@ -920,8 +921,8 @@ def _run_ingestion_job(
|
|
|
920
921
|
def deploy_ingestion_service_v2(
|
|
921
922
|
featureset: Union[FeatureSet, str],
|
|
922
923
|
source: DataSource = None,
|
|
923
|
-
targets: list[DataTargetBase] = None,
|
|
924
|
-
name: str = None,
|
|
924
|
+
targets: Optional[list[DataTargetBase]] = None,
|
|
925
|
+
name: Optional[str] = None,
|
|
925
926
|
run_config: RunConfig = None,
|
|
926
927
|
verbose=False,
|
|
927
928
|
) -> tuple[str, BaseRuntime]:
|
|
@@ -963,8 +964,8 @@ def deploy_ingestion_service_v2(
|
|
|
963
964
|
def _deploy_ingestion_service_v2(
|
|
964
965
|
featureset: Union[FeatureSet, str],
|
|
965
966
|
source: DataSource = None,
|
|
966
|
-
targets: list[DataTargetBase] = None,
|
|
967
|
-
name: str = None,
|
|
967
|
+
targets: Optional[list[DataTargetBase]] = None,
|
|
968
|
+
name: Optional[str] = None,
|
|
968
969
|
run_config: RunConfig = None,
|
|
969
970
|
verbose=False,
|
|
970
971
|
) -> tuple[str, BaseRuntime]:
|
|
@@ -1026,7 +1027,7 @@ def _ingest_with_spark(
|
|
|
1026
1027
|
spark=None,
|
|
1027
1028
|
featureset: Union[FeatureSet, str] = None,
|
|
1028
1029
|
source: BaseSourceDriver = None,
|
|
1029
|
-
targets: list[BaseStoreTarget] = None,
|
|
1030
|
+
targets: Optional[list[BaseStoreTarget]] = None,
|
|
1030
1031
|
infer_options: InferOptions = InferOptions.default(),
|
|
1031
1032
|
mlrun_context=None,
|
|
1032
1033
|
namespace=None,
|
|
@@ -1199,8 +1200,8 @@ def _infer_from_static_df(
|
|
|
1199
1200
|
def set_task_params(
|
|
1200
1201
|
featureset: FeatureSet,
|
|
1201
1202
|
source: DataSource = None,
|
|
1202
|
-
targets: list[DataTargetBase] = None,
|
|
1203
|
-
parameters: dict = None,
|
|
1203
|
+
targets: Optional[list[DataTargetBase]] = None,
|
|
1204
|
+
parameters: Optional[dict] = None,
|
|
1204
1205
|
infer_options: InferOptions = InferOptions.Null,
|
|
1205
1206
|
overwrite=None,
|
|
1206
1207
|
):
|
mlrun/feature_store/common.py
CHANGED
|
@@ -178,17 +178,17 @@ class RunConfig:
|
|
|
178
178
|
def __init__(
|
|
179
179
|
self,
|
|
180
180
|
function: typing.Union[str, FunctionReference, BaseRuntime] = None,
|
|
181
|
-
local: bool = None,
|
|
182
|
-
image: str = None,
|
|
183
|
-
kind: str = None,
|
|
184
|
-
handler: str = None,
|
|
185
|
-
parameters: dict = None,
|
|
186
|
-
watch: bool = None,
|
|
181
|
+
local: typing.Optional[bool] = None,
|
|
182
|
+
image: typing.Optional[str] = None,
|
|
183
|
+
kind: typing.Optional[str] = None,
|
|
184
|
+
handler: typing.Optional[str] = None,
|
|
185
|
+
parameters: typing.Optional[dict] = None,
|
|
186
|
+
watch: typing.Optional[bool] = None,
|
|
187
187
|
owner=None,
|
|
188
188
|
credentials: typing.Optional[mlrun.model.Credentials] = None,
|
|
189
|
-
code: str = None,
|
|
190
|
-
requirements: typing.Union[str, list[str]] = None,
|
|
191
|
-
extra_spec: dict = None,
|
|
189
|
+
code: typing.Optional[str] = None,
|
|
190
|
+
requirements: typing.Optional[typing.Union[str, list[str]]] = None,
|
|
191
|
+
extra_spec: typing.Optional[dict] = None,
|
|
192
192
|
auth_info=None,
|
|
193
193
|
):
|
|
194
194
|
"""class for holding function and run specs for jobs and serving functions
|