mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -2
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +21 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +113 -2
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +11 -0
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +224 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +374 -102
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +231 -22
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +864 -228
- mlrun/db/nopdb.py +268 -16
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1125 -414
- mlrun/render.py +28 -22
- mlrun/run.py +207 -180
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +40 -14
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +646 -177
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc5.dist-info/METADATA +0 -269
- mlrun-1.7.0rc5.dist-info/RECORD +0 -323
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/platforms/iguazio.py
CHANGED
|
@@ -15,12 +15,9 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
import urllib
|
|
18
|
-
from collections import namedtuple
|
|
19
18
|
from urllib.parse import urlparse
|
|
20
19
|
|
|
21
|
-
import kfp.dsl
|
|
22
20
|
import requests
|
|
23
|
-
import semver
|
|
24
21
|
import v3io
|
|
25
22
|
|
|
26
23
|
import mlrun.errors
|
|
@@ -29,203 +26,6 @@ from mlrun.utils import dict_to_json
|
|
|
29
26
|
|
|
30
27
|
_cached_control_session = None
|
|
31
28
|
|
|
32
|
-
VolumeMount = namedtuple("Mount", ["path", "sub_path"])
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def mount_v3io(
|
|
36
|
-
name="v3io",
|
|
37
|
-
remote="",
|
|
38
|
-
access_key="",
|
|
39
|
-
user="",
|
|
40
|
-
secret=None,
|
|
41
|
-
volume_mounts=None,
|
|
42
|
-
):
|
|
43
|
-
"""Modifier function to apply to a Container Op to volume mount a v3io path
|
|
44
|
-
|
|
45
|
-
:param name: the volume name
|
|
46
|
-
:param remote: the v3io path to use for the volume. ~/ prefix will be replaced with /users/<username>/
|
|
47
|
-
:param access_key: the access key used to auth against v3io. if not given V3IO_ACCESS_KEY env var will be used
|
|
48
|
-
:param user: the username used to auth against v3io. if not given V3IO_USERNAME env var will be used
|
|
49
|
-
:param secret: k8s secret name which would be used to get the username and access key to auth against v3io.
|
|
50
|
-
:param volume_mounts: list of VolumeMount. empty volume mounts & remote will default to mount /v3io & /User.
|
|
51
|
-
"""
|
|
52
|
-
volume_mounts, user = _enrich_and_validate_v3io_mounts(
|
|
53
|
-
remote=remote,
|
|
54
|
-
volume_mounts=volume_mounts,
|
|
55
|
-
user=user,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
def _attach_volume_mounts_and_creds(container_op: kfp.dsl.ContainerOp):
|
|
59
|
-
from kubernetes import client as k8s_client
|
|
60
|
-
|
|
61
|
-
vol = v3io_to_vol(name, remote, access_key, user, secret=secret)
|
|
62
|
-
container_op.add_volume(vol)
|
|
63
|
-
for volume_mount in volume_mounts:
|
|
64
|
-
container_op.container.add_volume_mount(
|
|
65
|
-
k8s_client.V1VolumeMount(
|
|
66
|
-
mount_path=volume_mount.path,
|
|
67
|
-
sub_path=volume_mount.sub_path,
|
|
68
|
-
name=name,
|
|
69
|
-
)
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
if not secret:
|
|
73
|
-
container_op = v3io_cred(access_key=access_key, user=user)(container_op)
|
|
74
|
-
return container_op
|
|
75
|
-
|
|
76
|
-
return _attach_volume_mounts_and_creds
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _enrich_and_validate_v3io_mounts(remote="", volume_mounts=None, user=""):
|
|
80
|
-
if remote and not volume_mounts:
|
|
81
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
82
|
-
"volume_mounts must be specified when remote is given"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
# Empty remote & volume_mounts defaults are volume mounts of /v3io and /User
|
|
86
|
-
if not remote and not volume_mounts:
|
|
87
|
-
user = _resolve_mount_user(user)
|
|
88
|
-
if not user:
|
|
89
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
90
|
-
"user name/env must be specified when using empty remote and volume_mounts"
|
|
91
|
-
)
|
|
92
|
-
volume_mounts = [
|
|
93
|
-
VolumeMount(path="/v3io", sub_path=""),
|
|
94
|
-
VolumeMount(path="/User", sub_path="users/" + user),
|
|
95
|
-
]
|
|
96
|
-
|
|
97
|
-
if not isinstance(volume_mounts, list) and any(
|
|
98
|
-
[not isinstance(x, VolumeMount) for x in volume_mounts]
|
|
99
|
-
):
|
|
100
|
-
raise TypeError("mounts should be a list of Mount")
|
|
101
|
-
|
|
102
|
-
return volume_mounts, user
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _resolve_mount_user(user=None):
|
|
106
|
-
return user or os.environ.get("V3IO_USERNAME")
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def mount_spark_conf():
|
|
110
|
-
def _mount_spark(container_op: kfp.dsl.ContainerOp):
|
|
111
|
-
from kubernetes import client as k8s_client
|
|
112
|
-
|
|
113
|
-
container_op.container.add_volume_mount(
|
|
114
|
-
k8s_client.V1VolumeMount(
|
|
115
|
-
name="spark-master-config", mount_path="/etc/config/spark"
|
|
116
|
-
)
|
|
117
|
-
)
|
|
118
|
-
return container_op
|
|
119
|
-
|
|
120
|
-
return _mount_spark
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def mount_v3iod(namespace, v3io_config_configmap):
|
|
124
|
-
def _mount_v3iod(container_op: kfp.dsl.ContainerOp):
|
|
125
|
-
from kubernetes import client as k8s_client
|
|
126
|
-
|
|
127
|
-
def add_vol(name, mount_path, host_path):
|
|
128
|
-
vol = k8s_client.V1Volume(
|
|
129
|
-
name=name,
|
|
130
|
-
host_path=k8s_client.V1HostPathVolumeSource(path=host_path, type=""),
|
|
131
|
-
)
|
|
132
|
-
container_op.add_volume(vol)
|
|
133
|
-
container_op.container.add_volume_mount(
|
|
134
|
-
k8s_client.V1VolumeMount(mount_path=mount_path, name=name)
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
# this is a legacy path for the daemon shared memory
|
|
138
|
-
host_path = "/dev/shm/"
|
|
139
|
-
|
|
140
|
-
# path to shared memory for daemon was changed in Iguazio 3.2.3-b1
|
|
141
|
-
igz_version = mlrun.mlconf.get_parsed_igz_version()
|
|
142
|
-
if igz_version and igz_version >= semver.VersionInfo.parse("3.2.3-b1"):
|
|
143
|
-
host_path = "/var/run/iguazio/dayman-shm/"
|
|
144
|
-
add_vol(name="shm", mount_path="/dev/shm", host_path=host_path + namespace)
|
|
145
|
-
|
|
146
|
-
add_vol(
|
|
147
|
-
name="v3iod-comm",
|
|
148
|
-
mount_path="/var/run/iguazio/dayman",
|
|
149
|
-
host_path="/var/run/iguazio/dayman/" + namespace,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
vol = k8s_client.V1Volume(
|
|
153
|
-
name="daemon-health", empty_dir=k8s_client.V1EmptyDirVolumeSource()
|
|
154
|
-
)
|
|
155
|
-
container_op.add_volume(vol)
|
|
156
|
-
container_op.container.add_volume_mount(
|
|
157
|
-
k8s_client.V1VolumeMount(
|
|
158
|
-
mount_path="/var/run/iguazio/daemon_health", name="daemon-health"
|
|
159
|
-
)
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
vol = k8s_client.V1Volume(
|
|
163
|
-
name="v3io-config",
|
|
164
|
-
config_map=k8s_client.V1ConfigMapVolumeSource(
|
|
165
|
-
name=v3io_config_configmap, default_mode=420
|
|
166
|
-
),
|
|
167
|
-
)
|
|
168
|
-
container_op.add_volume(vol)
|
|
169
|
-
container_op.container.add_volume_mount(
|
|
170
|
-
k8s_client.V1VolumeMount(mount_path="/etc/config/v3io", name="v3io-config")
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
container_op.container.add_env_variable(
|
|
174
|
-
k8s_client.V1EnvVar(
|
|
175
|
-
name="CURRENT_NODE_IP",
|
|
176
|
-
value_from=k8s_client.V1EnvVarSource(
|
|
177
|
-
field_ref=k8s_client.V1ObjectFieldSelector(
|
|
178
|
-
api_version="v1", field_path="status.hostIP"
|
|
179
|
-
)
|
|
180
|
-
),
|
|
181
|
-
)
|
|
182
|
-
)
|
|
183
|
-
container_op.container.add_env_variable(
|
|
184
|
-
k8s_client.V1EnvVar(
|
|
185
|
-
name="IGZ_DATA_CONFIG_FILE", value="/igz/java/conf/v3io.conf"
|
|
186
|
-
)
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
return container_op
|
|
190
|
-
|
|
191
|
-
return _mount_v3iod
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def v3io_cred(api="", user="", access_key=""):
|
|
195
|
-
"""
|
|
196
|
-
Modifier function to copy local v3io env vars to container
|
|
197
|
-
|
|
198
|
-
Usage::
|
|
199
|
-
|
|
200
|
-
train = train_op(...)
|
|
201
|
-
train.apply(use_v3io_cred())
|
|
202
|
-
"""
|
|
203
|
-
|
|
204
|
-
def _use_v3io_cred(container_op: kfp.dsl.ContainerOp):
|
|
205
|
-
from os import environ
|
|
206
|
-
|
|
207
|
-
from kubernetes import client as k8s_client
|
|
208
|
-
|
|
209
|
-
web_api = api or environ.get("V3IO_API") or mlconf.v3io_api
|
|
210
|
-
_user = user or environ.get("V3IO_USERNAME")
|
|
211
|
-
_access_key = access_key or environ.get("V3IO_ACCESS_KEY")
|
|
212
|
-
v3io_framesd = mlconf.v3io_framesd or environ.get("V3IO_FRAMESD")
|
|
213
|
-
|
|
214
|
-
return (
|
|
215
|
-
container_op.container.add_env_variable(
|
|
216
|
-
k8s_client.V1EnvVar(name="V3IO_API", value=web_api)
|
|
217
|
-
)
|
|
218
|
-
.add_env_variable(k8s_client.V1EnvVar(name="V3IO_USERNAME", value=_user))
|
|
219
|
-
.add_env_variable(
|
|
220
|
-
k8s_client.V1EnvVar(name="V3IO_ACCESS_KEY", value=_access_key)
|
|
221
|
-
)
|
|
222
|
-
.add_env_variable(
|
|
223
|
-
k8s_client.V1EnvVar(name="V3IO_FRAMESD", value=v3io_framesd)
|
|
224
|
-
)
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
return _use_v3io_cred
|
|
228
|
-
|
|
229
29
|
|
|
230
30
|
def split_path(mntpath=""):
|
|
231
31
|
if mntpath[0] == "/":
|
|
@@ -297,34 +97,43 @@ class OutputStream:
|
|
|
297
97
|
|
|
298
98
|
self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
|
|
299
99
|
self._container, self._stream_path = split_path(stream_path)
|
|
100
|
+
self._shards = shards
|
|
101
|
+
self._retention_in_hours = retention_in_hours
|
|
102
|
+
self._create = create
|
|
103
|
+
self._endpoint = endpoint
|
|
300
104
|
self._mock = mock
|
|
301
105
|
self._mock_queue = []
|
|
302
106
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
107
|
+
def create_stream(self):
|
|
108
|
+
# this import creates an import loop via the utils module, so putting it in execution path
|
|
109
|
+
from mlrun.utils.helpers import logger
|
|
110
|
+
|
|
111
|
+
logger.debug(
|
|
112
|
+
"Creating output stream",
|
|
113
|
+
endpoint=self._endpoint,
|
|
114
|
+
container=self._container,
|
|
115
|
+
stream_path=self._stream_path,
|
|
116
|
+
shards=self._shards,
|
|
117
|
+
retention_in_hours=self._retention_in_hours,
|
|
118
|
+
)
|
|
119
|
+
response = self._v3io_client.stream.create(
|
|
120
|
+
container=self._container,
|
|
121
|
+
stream_path=self._stream_path,
|
|
122
|
+
shard_count=self._shards or 1,
|
|
123
|
+
retention_period_hours=self._retention_in_hours or 24,
|
|
124
|
+
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
125
|
+
)
|
|
126
|
+
if not (response.status_code == 400 and "ResourceInUse" in str(response.body)):
|
|
127
|
+
response.raise_for_status([409, 204])
|
|
128
|
+
|
|
129
|
+
def _lazy_init(self):
|
|
130
|
+
if self._create and not self._mock:
|
|
131
|
+
self._create = False
|
|
132
|
+
self.create_stream()
|
|
306
133
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
endpoint=endpoint,
|
|
310
|
-
container=self._container,
|
|
311
|
-
stream_path=self._stream_path,
|
|
312
|
-
shards=shards,
|
|
313
|
-
retention_in_hours=retention_in_hours,
|
|
314
|
-
)
|
|
315
|
-
response = self._v3io_client.stream.create(
|
|
316
|
-
container=self._container,
|
|
317
|
-
stream_path=self._stream_path,
|
|
318
|
-
shard_count=shards or 1,
|
|
319
|
-
retention_period_hours=retention_in_hours or 24,
|
|
320
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
321
|
-
)
|
|
322
|
-
if not (
|
|
323
|
-
response.status_code == 400 and "ResourceInUse" in str(response.body)
|
|
324
|
-
):
|
|
325
|
-
response.raise_for_status([409, 204])
|
|
134
|
+
def push(self, data, partition_key=None):
|
|
135
|
+
self._lazy_init()
|
|
326
136
|
|
|
327
|
-
def push(self, data):
|
|
328
137
|
def dump_record(rec):
|
|
329
138
|
if not isinstance(rec, (str, bytes)):
|
|
330
139
|
return dict_to_json(rec)
|
|
@@ -332,7 +141,14 @@ class OutputStream:
|
|
|
332
141
|
|
|
333
142
|
if not isinstance(data, list):
|
|
334
143
|
data = [data]
|
|
335
|
-
|
|
144
|
+
|
|
145
|
+
records = []
|
|
146
|
+
for rec in data:
|
|
147
|
+
record = {"data": dump_record(rec)}
|
|
148
|
+
if partition_key is not None:
|
|
149
|
+
record["partition_key"] = partition_key
|
|
150
|
+
records.append(record)
|
|
151
|
+
|
|
336
152
|
if self._mock:
|
|
337
153
|
# for mock testing
|
|
338
154
|
self._mock_queue.extend(records)
|
|
@@ -405,7 +221,7 @@ class KafkaOutputStream:
|
|
|
405
221
|
|
|
406
222
|
self._initialized = True
|
|
407
223
|
|
|
408
|
-
def push(self, data):
|
|
224
|
+
def push(self, data, partition_key=None):
|
|
409
225
|
self._lazy_init()
|
|
410
226
|
|
|
411
227
|
def dump_record(rec):
|
|
@@ -426,7 +242,11 @@ class KafkaOutputStream:
|
|
|
426
242
|
else:
|
|
427
243
|
for record in data:
|
|
428
244
|
serialized_record = dump_record(record)
|
|
429
|
-
|
|
245
|
+
if isinstance(partition_key, str):
|
|
246
|
+
partition_key = partition_key.encode("UTF-8")
|
|
247
|
+
self._kafka_producer.send(
|
|
248
|
+
self._topic, serialized_record, key=partition_key
|
|
249
|
+
)
|
|
430
250
|
|
|
431
251
|
|
|
432
252
|
class V3ioStreamClient:
|
|
@@ -525,8 +345,8 @@ def add_or_refresh_credentials(
|
|
|
525
345
|
# different access keys for the 2 usages
|
|
526
346
|
token = (
|
|
527
347
|
token
|
|
528
|
-
# can't use mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session cause this is running
|
|
529
|
-
# import execution path (when we're initializing the run db) and therefore we can't import mlrun.runtimes
|
|
348
|
+
# can't use mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session cause this is running
|
|
349
|
+
# in the import execution path (when we're initializing the run db) and therefore we can't import mlrun.runtimes
|
|
530
350
|
or os.environ.get("MLRUN_AUTH_SESSION")
|
|
531
351
|
or os.environ.get("V3IO_ACCESS_KEY")
|
|
532
352
|
)
|
|
@@ -582,3 +402,22 @@ def sanitize_username(username: str):
|
|
|
582
402
|
So simply replace it with dash
|
|
583
403
|
"""
|
|
584
404
|
return username.replace("_", "-")
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def min_iguazio_versions(*versions):
|
|
408
|
+
def decorator(function):
|
|
409
|
+
def wrapper(*args, **kwargs):
|
|
410
|
+
if mlrun.utils.helpers.validate_component_version_compatibility(
|
|
411
|
+
"iguazio", *versions
|
|
412
|
+
):
|
|
413
|
+
return function(*args, **kwargs)
|
|
414
|
+
|
|
415
|
+
message = (
|
|
416
|
+
f"{function.__name__} is supported since Iguazio {' or '.join(versions)}, currently using "
|
|
417
|
+
f"Iguazio {mlconf.igz_version}."
|
|
418
|
+
)
|
|
419
|
+
raise mlrun.errors.MLRunIncompatibleVersionError(message)
|
|
420
|
+
|
|
421
|
+
return wrapper
|
|
422
|
+
|
|
423
|
+
return decorator
|
mlrun/projects/__init__.py
CHANGED
|
@@ -27,7 +27,12 @@ __all__ = [
|
|
|
27
27
|
]
|
|
28
28
|
|
|
29
29
|
from .operations import build_function, deploy_function, run_function # noqa
|
|
30
|
-
from .pipelines import
|
|
30
|
+
from .pipelines import (
|
|
31
|
+
import_remote_project,
|
|
32
|
+
load_and_run_workflow,
|
|
33
|
+
load_and_run,
|
|
34
|
+
pipeline_context,
|
|
35
|
+
) # noqa
|
|
31
36
|
from .project import (
|
|
32
37
|
MlrunProject,
|
|
33
38
|
ProjectMetadata,
|
mlrun/projects/operations.py
CHANGED
|
@@ -15,9 +15,13 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
import mlrun_pipelines.common.models
|
|
19
|
+
import mlrun_pipelines.models
|
|
19
20
|
|
|
20
21
|
import mlrun
|
|
22
|
+
import mlrun.common.constants as mlrun_constants
|
|
23
|
+
import mlrun.common.schemas.function
|
|
24
|
+
import mlrun.common.schemas.workflow
|
|
21
25
|
from mlrun.utils import hub_prefix
|
|
22
26
|
|
|
23
27
|
from .pipelines import enrich_function_object, pipeline_context
|
|
@@ -48,7 +52,7 @@ def _get_engine_and_function(function, project=None):
|
|
|
48
52
|
function = enrich_function_object(project, function, copy_function=False)
|
|
49
53
|
|
|
50
54
|
if not pipeline_context.workflow:
|
|
51
|
-
return
|
|
55
|
+
return mlrun.common.schemas.workflow.EngineType.LOCAL, function
|
|
52
56
|
|
|
53
57
|
return pipeline_context.workflow.engine, function
|
|
54
58
|
|
|
@@ -76,7 +80,8 @@ def run_function(
|
|
|
76
80
|
notifications: list[mlrun.model.Notification] = None,
|
|
77
81
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
78
82
|
builder_env: Optional[list] = None,
|
|
79
|
-
|
|
83
|
+
reset_on_run: Optional[bool] = None,
|
|
84
|
+
) -> Union[mlrun.model.RunObject, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
80
85
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
81
86
|
|
|
82
87
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -86,7 +91,7 @@ def run_function(
|
|
|
86
91
|
when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
|
|
87
92
|
e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
|
|
88
93
|
project runs provide additional notifications/reporting and exception handling.
|
|
89
|
-
inside a Kubeflow pipeline (KFP) run_function() generates KFP
|
|
94
|
+
inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
|
|
90
95
|
some behavior may differ between regular runs and deferred KFP runs.
|
|
91
96
|
|
|
92
97
|
example (use with function object)::
|
|
@@ -95,8 +100,11 @@ def run_function(
|
|
|
95
100
|
MODEL_CLASS = "sklearn.ensemble.RandomForestClassifier"
|
|
96
101
|
DATA_PATH = "s3://bigdata/data.parquet"
|
|
97
102
|
function = mlrun.import_function("hub://auto-trainer")
|
|
98
|
-
run1 = run_function(
|
|
99
|
-
|
|
103
|
+
run1 = run_function(
|
|
104
|
+
function,
|
|
105
|
+
params={"label_columns": LABELS, "model_class": MODEL_CLASS},
|
|
106
|
+
inputs={"dataset": DATA_PATH},
|
|
107
|
+
)
|
|
100
108
|
|
|
101
109
|
example (use with project)::
|
|
102
110
|
|
|
@@ -115,8 +123,12 @@ def run_function(
|
|
|
115
123
|
@dsl.pipeline(name="test pipeline", description="test")
|
|
116
124
|
def my_pipe(url=""):
|
|
117
125
|
run1 = run_function("loaddata", params={"url": url}, outputs=["data"])
|
|
118
|
-
run2 = run_function(
|
|
119
|
-
|
|
126
|
+
run2 = run_function(
|
|
127
|
+
"train",
|
|
128
|
+
params={"label_columns": LABELS, "model_class": MODEL_CLASS},
|
|
129
|
+
inputs={"dataset": run1.outputs["data"]},
|
|
130
|
+
)
|
|
131
|
+
|
|
120
132
|
|
|
121
133
|
project.run(workflow_handler=my_pipe, arguments={"param1": 7})
|
|
122
134
|
|
|
@@ -159,7 +171,10 @@ def run_function(
|
|
|
159
171
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
160
172
|
"key": "the_key".
|
|
161
173
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
162
|
-
:
|
|
174
|
+
:param reset_on_run: When True, function python modules would reload prior to code execution.
|
|
175
|
+
This ensures latest code changes are executed. This argument must be used in
|
|
176
|
+
conjunction with the local=True argument.
|
|
177
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
163
178
|
"""
|
|
164
179
|
engine, function = _get_engine_and_function(function, project_object)
|
|
165
180
|
task = mlrun.new_task(
|
|
@@ -174,7 +189,11 @@ def run_function(
|
|
|
174
189
|
)
|
|
175
190
|
task.spec.verbose = task.spec.verbose or verbose
|
|
176
191
|
|
|
177
|
-
if engine ==
|
|
192
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
193
|
+
if schedule:
|
|
194
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
195
|
+
"Scheduling jobs is not supported when running a workflow with the kfp engine."
|
|
196
|
+
)
|
|
178
197
|
return function.as_step(
|
|
179
198
|
name=name, runspec=task, workdir=workdir, outputs=outputs, labels=labels
|
|
180
199
|
)
|
|
@@ -183,7 +202,9 @@ def run_function(
|
|
|
183
202
|
local = pipeline_context.is_run_local(local)
|
|
184
203
|
task.metadata.labels = task.metadata.labels or labels or {}
|
|
185
204
|
if pipeline_context.workflow_id:
|
|
186
|
-
task.metadata.labels[
|
|
205
|
+
task.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = (
|
|
206
|
+
pipeline_context.workflow_id
|
|
207
|
+
)
|
|
187
208
|
if function.kind == "local":
|
|
188
209
|
command, function = mlrun.run.load_func_code(function)
|
|
189
210
|
function.spec.command = command
|
|
@@ -205,6 +226,7 @@ def run_function(
|
|
|
205
226
|
schedule=schedule,
|
|
206
227
|
notifications=notifications,
|
|
207
228
|
builder_env=builder_env,
|
|
229
|
+
reset_on_run=reset_on_run,
|
|
208
230
|
)
|
|
209
231
|
if run_result:
|
|
210
232
|
run_result._notified = False
|
|
@@ -218,9 +240,9 @@ def run_function(
|
|
|
218
240
|
class BuildStatus:
|
|
219
241
|
"""returned status from build operation"""
|
|
220
242
|
|
|
221
|
-
def __init__(self, ready, outputs=
|
|
243
|
+
def __init__(self, ready, outputs=None, function=None):
|
|
222
244
|
self.ready = ready
|
|
223
|
-
self.outputs = outputs
|
|
245
|
+
self.outputs = outputs or {}
|
|
224
246
|
self.function = function
|
|
225
247
|
|
|
226
248
|
def after(self, step):
|
|
@@ -247,7 +269,7 @@ def build_function(
|
|
|
247
269
|
overwrite_build_params: bool = False,
|
|
248
270
|
extra_args: str = None,
|
|
249
271
|
force_build: bool = False,
|
|
250
|
-
) -> Union[BuildStatus,
|
|
272
|
+
) -> Union[BuildStatus, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
251
273
|
"""deploy ML function, build container with its dependencies
|
|
252
274
|
|
|
253
275
|
:param function: Name of the function (in the project) or function object
|
|
@@ -283,11 +305,15 @@ def build_function(
|
|
|
283
305
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
284
306
|
"Cannot build use deploy_function()"
|
|
285
307
|
)
|
|
286
|
-
if engine ==
|
|
308
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
287
309
|
if overwrite_build_params:
|
|
288
310
|
function.spec.build.commands = None
|
|
289
311
|
if requirements or requirements_file:
|
|
290
|
-
function.with_requirements(
|
|
312
|
+
function.with_requirements(
|
|
313
|
+
requirements=requirements,
|
|
314
|
+
requirements_file=requirements_file,
|
|
315
|
+
overwrite=True,
|
|
316
|
+
)
|
|
291
317
|
if commands:
|
|
292
318
|
function.with_commands(commands)
|
|
293
319
|
return function.deploy_step(
|
|
@@ -311,6 +337,7 @@ def build_function(
|
|
|
311
337
|
commands=commands,
|
|
312
338
|
secret=secret_name,
|
|
313
339
|
requirements=requirements,
|
|
340
|
+
requirements_file=requirements_file,
|
|
314
341
|
overwrite=overwrite_build_params,
|
|
315
342
|
extra_args=extra_args,
|
|
316
343
|
)
|
|
@@ -329,9 +356,9 @@ def build_function(
|
|
|
329
356
|
class DeployStatus:
|
|
330
357
|
"""returned status from deploy operation"""
|
|
331
358
|
|
|
332
|
-
def __init__(self, state, outputs=
|
|
359
|
+
def __init__(self, state, outputs=None, function=None):
|
|
333
360
|
self.state = state
|
|
334
|
-
self.outputs = outputs
|
|
361
|
+
self.outputs = outputs or {}
|
|
335
362
|
self.function = function
|
|
336
363
|
|
|
337
364
|
def after(self, step):
|
|
@@ -351,7 +378,7 @@ def deploy_function(
|
|
|
351
378
|
builder_env: dict = None,
|
|
352
379
|
project_object=None,
|
|
353
380
|
mock: bool = None,
|
|
354
|
-
) -> Union[DeployStatus,
|
|
381
|
+
) -> Union[DeployStatus, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
355
382
|
"""deploy real-time (nuclio based) functions
|
|
356
383
|
|
|
357
384
|
:param function: name of the function (in the project) or function object
|
|
@@ -368,7 +395,7 @@ def deploy_function(
|
|
|
368
395
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
369
396
|
"deploy is used with real-time functions, for other kinds use build_function()"
|
|
370
397
|
)
|
|
371
|
-
if engine ==
|
|
398
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
372
399
|
return function.deploy_step(models=models, env=env, tag=tag, verbose=verbose)
|
|
373
400
|
else:
|
|
374
401
|
if env:
|