mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (235) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -1
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +31 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +196 -0
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +13 -2
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +233 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +387 -119
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +245 -20
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +909 -231
  77. mlrun/db/nopdb.py +279 -14
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1176 -406
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +208 -181
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +54 -24
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/__init__.py +1 -0
  178. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  179. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  180. mlrun/runtimes/nuclio/application/application.py +758 -0
  181. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  182. mlrun/runtimes/nuclio/function.py +188 -68
  183. mlrun/runtimes/nuclio/serving.py +57 -60
  184. mlrun/runtimes/pod.py +191 -58
  185. mlrun/runtimes/remotesparkjob.py +11 -8
  186. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  187. mlrun/runtimes/utils.py +40 -73
  188. mlrun/secrets.py +6 -2
  189. mlrun/serving/__init__.py +8 -1
  190. mlrun/serving/remote.py +2 -3
  191. mlrun/serving/routers.py +89 -64
  192. mlrun/serving/server.py +54 -26
  193. mlrun/serving/states.py +187 -56
  194. mlrun/serving/utils.py +19 -11
  195. mlrun/serving/v2_serving.py +136 -63
  196. mlrun/track/tracker.py +2 -1
  197. mlrun/track/trackers/mlflow_tracker.py +5 -0
  198. mlrun/utils/async_http.py +26 -6
  199. mlrun/utils/db.py +18 -0
  200. mlrun/utils/helpers.py +375 -105
  201. mlrun/utils/http.py +2 -2
  202. mlrun/utils/logger.py +75 -9
  203. mlrun/utils/notifications/notification/__init__.py +14 -10
  204. mlrun/utils/notifications/notification/base.py +48 -0
  205. mlrun/utils/notifications/notification/console.py +2 -0
  206. mlrun/utils/notifications/notification/git.py +24 -1
  207. mlrun/utils/notifications/notification/ipython.py +2 -0
  208. mlrun/utils/notifications/notification/slack.py +96 -21
  209. mlrun/utils/notifications/notification/webhook.py +63 -2
  210. mlrun/utils/notifications/notification_pusher.py +146 -16
  211. mlrun/utils/regex.py +9 -0
  212. mlrun/utils/retryer.py +3 -2
  213. mlrun/utils/v3io_clients.py +2 -3
  214. mlrun/utils/version/version.json +2 -2
  215. mlrun-1.7.2.dist-info/METADATA +390 -0
  216. mlrun-1.7.2.dist-info/RECORD +351 -0
  217. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  218. mlrun/feature_store/retrieval/conversion.py +0 -271
  219. mlrun/kfpops.py +0 -868
  220. mlrun/model_monitoring/application.py +0 -310
  221. mlrun/model_monitoring/batch.py +0 -974
  222. mlrun/model_monitoring/controller_handler.py +0 -37
  223. mlrun/model_monitoring/prometheus.py +0 -216
  224. mlrun/model_monitoring/stores/__init__.py +0 -111
  225. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  226. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  227. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  228. mlrun/model_monitoring/stores/models/base.py +0 -84
  229. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  230. mlrun/platforms/other.py +0 -305
  231. mlrun-1.7.0rc4.dist-info/METADATA +0 -269
  232. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  233. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  234. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  235. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/kfpops.py DELETED
@@ -1,868 +0,0 @@
1
- # Copyright 2023 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import json
15
- import os
16
- import os.path
17
- from copy import deepcopy
18
- from typing import Union
19
-
20
- import inflection
21
- from kfp import dsl
22
- from kubernetes import client as k8s_client
23
-
24
- import mlrun
25
- from mlrun.errors import err_to_str
26
-
27
- from .config import config
28
- from .model import HyperParamOptions, RunSpec
29
- from .utils import (
30
- dict_to_yaml,
31
- gen_md_table,
32
- get_artifact_target,
33
- get_in,
34
- get_workflow_url,
35
- is_ipython,
36
- is_legacy_artifact,
37
- logger,
38
- run_keys,
39
- version,
40
- )
41
-
42
- # default KFP artifacts and output (ui metadata, metrics etc.)
43
- # directories to /tmp to allow running with security context
44
- KFPMETA_DIR = "/tmp"
45
- KFP_ARTIFACTS_DIR = "/tmp"
46
-
47
- project_annotation = "mlrun/project"
48
- run_annotation = "mlrun/pipeline-step-type"
49
- function_annotation = "mlrun/function-uri"
50
-
51
- dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
52
-
53
-
54
- class PipelineRunType:
55
- run = "run"
56
- build = "build"
57
- deploy = "deploy"
58
-
59
-
60
- def is_num(v):
61
- return isinstance(v, (int, float, complex))
62
-
63
-
64
- def write_kfpmeta(struct):
65
- if "status" not in struct:
66
- return
67
-
68
- results = struct["status"].get("results", {})
69
- metrics = {
70
- "metrics": [
71
- {"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
72
- ],
73
- }
74
- with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
75
- json.dump(metrics, f)
76
-
77
- struct = deepcopy(struct)
78
- uid = struct["metadata"].get("uid")
79
- project = struct["metadata"].get("project", config.default_project)
80
- output_artifacts, out_dict = get_kfp_outputs(
81
- struct["status"].get(run_keys.artifacts, []),
82
- struct["metadata"].get("labels", {}),
83
- project,
84
- )
85
-
86
- results["run_id"] = results.get("run_id", "/".join([project, uid]))
87
- for key in struct["spec"].get(run_keys.outputs, []):
88
- val = "None"
89
- if key in out_dict:
90
- val = out_dict[key]
91
- elif key in results:
92
- val = results[key]
93
- try:
94
- # NOTE: if key has "../x", it would fail on path traversal
95
- path = os.path.join(KFP_ARTIFACTS_DIR, key)
96
- if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
97
- logger.warning(
98
- "Path traversal is not allowed ignoring", path=path, key=key
99
- )
100
- continue
101
- path = os.path.abspath(path)
102
- logger.info("Writing artifact output", path=path, val=val)
103
- with open(path, "w") as fp:
104
- fp.write(str(val))
105
- except Exception as exc:
106
- logger.warning("Failed writing to temp file. Ignoring", exc=repr(exc))
107
- pass
108
-
109
- text = "# Run Report\n"
110
- if "iterations" in struct["status"]:
111
- del struct["status"]["iterations"]
112
-
113
- text += "## Metadata\n```yaml\n" + dict_to_yaml(struct) + "```\n"
114
-
115
- metadata = {
116
- "outputs": output_artifacts
117
- + [{"type": "markdown", "storage": "inline", "source": text}]
118
- }
119
- with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
120
- json.dump(metadata, f)
121
-
122
-
123
- def get_kfp_outputs(artifacts, labels, project):
124
- outputs = []
125
- out_dict = {}
126
- for output in artifacts:
127
- if is_legacy_artifact(output):
128
- key = output["key"]
129
- # The spec in a legacy artifact is contained in the main object, so using this assignment saves us a lot
130
- # of if/else in the rest of this function.
131
- output_spec = output
132
- else:
133
- key = output.get("metadata")["key"]
134
- output_spec = output.get("spec", {})
135
-
136
- target = output_spec.get("target_path", "")
137
- target = output_spec.get("inline", target)
138
-
139
- out_dict[key] = get_artifact_target(output, project=project)
140
-
141
- if target.startswith("v3io:///"):
142
- target = target.replace("v3io:///", "http://v3io-webapi:8081/")
143
-
144
- user = labels.get("v3io_user", "") or os.environ.get("V3IO_USERNAME", "")
145
- if target.startswith("/User/"):
146
- user = user or "admin"
147
- target = "http://v3io-webapi:8081/users/" + user + target[5:]
148
-
149
- viewer = output_spec.get("viewer", "")
150
- if viewer in ["web-app", "chart"]:
151
- meta = {"type": "web-app", "source": target}
152
- outputs += [meta]
153
-
154
- elif viewer == "table":
155
- header = output_spec.get("header", None)
156
- if header and target.endswith(".csv"):
157
- meta = {
158
- "type": "table",
159
- "format": "csv",
160
- "header": header,
161
- "source": target,
162
- }
163
- outputs += [meta]
164
-
165
- elif output.get("kind") == "dataset":
166
- header = output_spec.get("header")
167
- preview = output_spec.get("preview")
168
- if preview:
169
- tbl_md = gen_md_table(header, preview)
170
- text = f"## Dataset: {key} \n\n" + tbl_md
171
- del output_spec["preview"]
172
-
173
- meta = {"type": "markdown", "storage": "inline", "source": text}
174
- outputs += [meta]
175
-
176
- return outputs, out_dict
177
-
178
-
179
- def mlrun_op(
180
- name: str = "",
181
- project: str = "",
182
- function=None,
183
- func_url=None,
184
- image: str = "",
185
- runobj=None,
186
- command: str = "",
187
- secrets: list = None,
188
- params: dict = None,
189
- job_image=None,
190
- hyperparams: dict = None,
191
- param_file: str = "",
192
- labels: dict = None,
193
- selector: str = "",
194
- inputs: dict = None,
195
- outputs: list = None,
196
- in_path: str = "",
197
- out_path: str = "",
198
- rundb: str = "",
199
- mode: str = "",
200
- handler: str = "",
201
- more_args: list = None,
202
- hyper_param_options=None,
203
- verbose=None,
204
- scrape_metrics=False,
205
- returns: list[Union[str, dict[str, str]]] = None,
206
- auto_build: bool = False,
207
- ):
208
- """mlrun KubeFlow pipelines operator, use to form pipeline steps
209
-
210
- when using kubeflow pipelines, each step is wrapped in an mlrun_op
211
- one step can pass state and data to the next step, see example below.
212
-
213
- :param name: name used for the step
214
- :param project: optional, project name
215
- :param image: optional, run container image (will be executing the step)
216
- the container should host all required packages + code
217
- for the run, alternatively user can mount packages/code via
218
- shared file volumes like v3io (see example below)
219
- :param function: optional, function object
220
- :param func_url: optional, function object url
221
- :param command: exec command (or URL for functions)
222
- :param secrets: extra secrets specs, will be injected into the runtime
223
- e.g. ['file=<filename>', 'env=ENV_KEY1,ENV_KEY2']
224
- :param params: dictionary of run parameters and values
225
- :param hyperparams: dictionary of hyper parameters and list values, each
226
- hyperparam holds a list of values, the run will be
227
- executed for every parameter combination (GridSearch)
228
- :param param_file: a csv/json file with parameter combinations, first csv row hold
229
- the parameter names, following rows hold param values
230
- :param selector: selection criteria for hyperparams e.g. "max.accuracy"
231
- :param hyper_param_options: hyper param options class, see: :py:class:`~mlrun.model.HyperParamOptions`
232
- :param labels: labels to tag the job/run with ({key:val, ..})
233
- :param inputs: dictionary of input objects + optional paths (if path is
234
- omitted the path will be the in_path/key.
235
- :param outputs: dictionary of output objects + optional paths (if path is
236
- omitted the path will be the out_path/key.
237
- :param in_path: default input path/url (prefix) for inputs
238
- :param out_path: default output path/url (prefix) for artifacts
239
- :param rundb: path for rundb (or use 'MLRUN_DBPATH' env instead)
240
- :param mode: run mode, e.g. 'pass' for using the command without mlrun wrapper
241
- :param handler code entry-point/handler name
242
- :param job_image name of the image user for the job
243
- :param verbose: add verbose prints/logs
244
- :param scrape_metrics: whether to add the `mlrun/scrape-metrics` label to this run's resources
245
- :param returns: List of configurations for how to log the returning values from the handler's run (as artifacts or
246
- results). The list's length must be equal to the amount of returning objects. A configuration may be
247
- given as:
248
-
249
- * A string of the key to use to log the returning value as result or as an artifact. To specify
250
- The artifact type, it is possible to pass a string in the following structure:
251
- "<key> : <type>". Available artifact types can be seen in `mlrun.ArtifactType`. If no artifact
252
- type is specified, the object's default artifact type will be used.
253
- * A dictionary of configurations to use when logging. Further info per object type and artifact
254
- type can be given there. The artifact key must appear in the dictionary as "key": "the_key".
255
- :param auto_build: when set to True and the function require build it will be built on the first
256
- function run, use only if you dont plan on changing the build config between runs
257
-
258
- :returns: KFP step operation
259
-
260
- Example:
261
- from kfp import dsl
262
- from mlrun import mlrun_op
263
- from mlrun.platforms import mount_v3io
264
-
265
- def mlrun_train(p1, p2):
266
- return mlrun_op('training',
267
- command = '/User/kubeflow/training.py',
268
- params = {'p1':p1, 'p2':p2},
269
- outputs = {'model.txt':'', 'dataset.csv':''},
270
- out_path ='v3io:///projects/my-proj/mlrun/{{workflow.uid}}/',
271
- rundb = '/User/kubeflow')
272
-
273
- # use data from the first step
274
- def mlrun_validate(modelfile):
275
- return mlrun_op('validation',
276
- command = '/User/kubeflow/validation.py',
277
- inputs = {'model.txt':modelfile},
278
- out_path ='v3io:///projects/my-proj/{{workflow.uid}}/',
279
- rundb = '/User/kubeflow')
280
-
281
- @dsl.pipeline(
282
- name='My MLRUN pipeline', description='Shows how to use mlrun.'
283
- )
284
- def mlrun_pipeline(
285
- p1 = 5 , p2 = '"text"'
286
- ):
287
- # run training, mount_v3io will mount "/User" into the pipeline step
288
- train = mlrun_train(p1, p2).apply(mount_v3io())
289
-
290
- # feed 1st step results into the second step
291
- validate = mlrun_validate(
292
- train.outputs['model-txt']).apply(mount_v3io())
293
-
294
- """
295
- secrets = [] if secrets is None else secrets
296
- params = {} if params is None else params
297
- hyperparams = {} if hyperparams is None else hyperparams
298
- if hyper_param_options and isinstance(hyper_param_options, dict):
299
- hyper_param_options = HyperParamOptions.from_dict(hyper_param_options)
300
- inputs = {} if inputs is None else inputs
301
- returns = [] if returns is None else returns
302
- outputs = [] if outputs is None else outputs
303
- labels = {} if labels is None else labels
304
-
305
- rundb = rundb or mlrun.db.get_or_set_dburl()
306
- cmd = [
307
- "python",
308
- "-m",
309
- "mlrun",
310
- "run",
311
- "--kfp",
312
- "--from-env",
313
- "--workflow",
314
- "{{workflow.uid}}",
315
- ]
316
- file_outputs = {}
317
-
318
- runtime = None
319
- code_env = None
320
- function_name = ""
321
- if function:
322
- if not func_url:
323
- if function.kind in ["", "local"]:
324
- image = image or function.spec.image
325
- command = command or function.spec.command
326
- more_args = more_args or function.spec.args
327
- mode = mode or function.spec.mode
328
- rundb = rundb or function.spec.rundb
329
- code_env = str(function.spec.build.functionSourceCode)
330
- else:
331
- runtime = str(function.to_dict())
332
-
333
- function_name = function.metadata.name
334
- if function.kind == "dask":
335
- image = image or function.spec.kfp_image or config.dask_kfp_image
336
-
337
- image = image or config.kfp_image
338
-
339
- if runobj:
340
- handler = handler or runobj.spec.handler_name
341
- params = params or runobj.spec.parameters
342
- hyperparams = hyperparams or runobj.spec.hyperparams
343
- param_file = (
344
- param_file
345
- or runobj.spec.param_file
346
- or runobj.spec.hyper_param_options.param_file
347
- )
348
- hyper_param_options = hyper_param_options or runobj.spec.hyper_param_options
349
- selector = (
350
- selector or runobj.spec.selector or runobj.spec.hyper_param_options.selector
351
- )
352
- inputs = inputs or runobj.spec.inputs
353
- returns = returns or runobj.spec.returns
354
- outputs = outputs or runobj.spec.outputs
355
- in_path = in_path or runobj.spec.input_path
356
- out_path = out_path or runobj.spec.output_path
357
- secrets = secrets or runobj.spec.secret_sources
358
- project = project or runobj.metadata.project
359
- labels = runobj.metadata.labels or labels
360
- verbose = verbose or runobj.spec.verbose
361
- scrape_metrics = scrape_metrics or runobj.spec.scrape_metrics
362
-
363
- outputs = RunSpec.join_outputs_and_returns(outputs=outputs, returns=returns)
364
-
365
- if not name:
366
- if not function_name:
367
- raise ValueError("name or function object must be specified")
368
- name = function_name
369
- if handler:
370
- short_name = handler
371
- for separator in ["#", "::", "."]:
372
- # drop paths, module or class name from short name
373
- if separator in short_name:
374
- short_name = short_name.split(separator)[-1]
375
- name += "-" + short_name
376
-
377
- if hyperparams or param_file:
378
- outputs.append("iteration_results")
379
- if "run_id" not in outputs:
380
- outputs.append("run_id")
381
-
382
- params = params or {}
383
- hyperparams = hyperparams or {}
384
- inputs = inputs or {}
385
- returns = returns or []
386
- secrets = secrets or []
387
-
388
- mlrun.runtimes.utils.enrich_run_labels(labels)
389
-
390
- if name:
391
- cmd += ["--name", name]
392
- if func_url:
393
- cmd += ["-f", func_url]
394
- for secret in secrets:
395
- cmd += ["-s", f"{secret['kind']}={secret['source']}"]
396
- for param, val in params.items():
397
- cmd += ["-p", f"{param}={val}"]
398
- for xpram, val in hyperparams.items():
399
- cmd += ["-x", f"{xpram}={val}"]
400
- for input_param, val in inputs.items():
401
- cmd += ["-i", f"{input_param}={val}"]
402
- for log_hint in returns:
403
- cmd += [
404
- "--returns",
405
- json.dumps(log_hint) if isinstance(log_hint, dict) else log_hint,
406
- ]
407
- for label, val in labels.items():
408
- cmd += ["--label", f"{label}={val}"]
409
- for output in outputs:
410
- cmd += ["-o", str(output)]
411
- file_outputs[output.replace(".", "_")] = (
412
- f"/tmp/{output}" # not using path.join to avoid windows "\"
413
- )
414
- if project:
415
- cmd += ["--project", project]
416
- if handler:
417
- cmd += ["--handler", handler]
418
- if runtime:
419
- cmd += ["--runtime", runtime]
420
- if in_path:
421
- cmd += ["--in-path", in_path]
422
- if out_path:
423
- cmd += ["--out-path", out_path]
424
- if param_file:
425
- cmd += ["--param-file", param_file]
426
- if hyper_param_options:
427
- cmd += ["--hyper-param-options", hyper_param_options.to_json()]
428
- if selector:
429
- cmd += ["--selector", selector]
430
- if job_image:
431
- cmd += ["--image", job_image]
432
- if mode:
433
- cmd += ["--mode", mode]
434
- if verbose:
435
- cmd += ["--verbose"]
436
- if scrape_metrics:
437
- cmd += ["--scrape-metrics"]
438
- if auto_build:
439
- cmd += ["--auto-build"]
440
- if more_args:
441
- cmd += more_args
442
-
443
- registry = get_default_reg()
444
- if image and image.startswith("."):
445
- if registry:
446
- image = f"{registry}/{image[1:]}"
447
- else:
448
- raise ValueError("local image registry env not found")
449
-
450
- image = mlrun.utils.enrich_image_url(
451
- image, mlrun.get_version(), str(version.Version().get_python_version())
452
- )
453
-
454
- cop = dsl.ContainerOp(
455
- name=name,
456
- image=image,
457
- command=cmd + [command],
458
- file_outputs=file_outputs,
459
- output_artifact_paths={
460
- "mlpipeline-ui-metadata": os.path.join(
461
- KFPMETA_DIR, "mlpipeline-ui-metadata.json"
462
- ),
463
- "mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
464
- },
465
- )
466
- cop = add_default_function_resources(cop)
467
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
468
-
469
- add_annotations(cop, PipelineRunType.run, function, func_url, project)
470
- add_labels(cop, function, scrape_metrics)
471
- if code_env:
472
- cop.container.add_env_variable(
473
- k8s_client.V1EnvVar(name="MLRUN_EXEC_CODE", value=code_env)
474
- )
475
- if registry:
476
- cop.container.add_env_variable(
477
- k8s_client.V1EnvVar(
478
- name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY", value=registry
479
- )
480
- )
481
-
482
- add_default_env(k8s_client, cop)
483
-
484
- return cop
485
-
486
-
487
- def deploy_op(
488
- name,
489
- function,
490
- func_url=None,
491
- source="",
492
- project="",
493
- models: list = None,
494
- env: dict = None,
495
- tag="",
496
- verbose=False,
497
- ):
498
- cmd = ["python", "-m", "mlrun", "deploy"]
499
- if source:
500
- cmd += ["-s", source]
501
- if tag:
502
- cmd += ["--tag", tag]
503
- if verbose:
504
- cmd += ["--verbose"]
505
- if project:
506
- cmd += ["-p", project]
507
-
508
- if models:
509
- for m in models:
510
- for key in ["key", "model_path", "model_url", "class_name", "model_url"]:
511
- if key in m:
512
- m[key] = str(m[key]) # verify we stringify pipeline params
513
- if function.kind == mlrun.runtimes.RuntimeKinds.serving:
514
- cmd += ["-m", json.dumps(m)]
515
- else:
516
- cmd += ["-m", f"{m['key']}={m['model_path']}"]
517
-
518
- if env:
519
- for key, val in env.items():
520
- cmd += ["--env", f"{key}={val}"]
521
-
522
- if func_url:
523
- cmd += ["-f", func_url]
524
- else:
525
- runtime = f"{function.to_dict()}"
526
- cmd += [runtime]
527
-
528
- cop = dsl.ContainerOp(
529
- name=name,
530
- image=config.kfp_image,
531
- command=cmd,
532
- file_outputs={"endpoint": "/tmp/output", "name": "/tmp/name"},
533
- )
534
- cop = add_default_function_resources(cop)
535
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
536
-
537
- add_annotations(cop, PipelineRunType.deploy, function, func_url)
538
- add_default_env(k8s_client, cop)
539
- return cop
540
-
541
-
542
- def add_env(env=None):
543
- """
544
- Modifier function to add env vars from dict
545
- Usage:
546
- train = train_op(...)
547
- train.apply(add_env({'MY_ENV':'123'}))
548
- """
549
-
550
- env = {} if env is None else env
551
-
552
- def _add_env(task):
553
- for k, v in env.items():
554
- task.add_env_variable(k8s_client.V1EnvVar(name=k, value=v))
555
- return task
556
-
557
- return _add_env
558
-
559
-
560
- def build_op(
561
- name,
562
- function=None,
563
- func_url=None,
564
- image=None,
565
- base_image=None,
566
- commands: list = None,
567
- secret_name="",
568
- with_mlrun=True,
569
- skip_deployed=False,
570
- ):
571
- """build Docker image."""
572
-
573
- cmd = ["python", "-m", "mlrun", "build", "--kfp"]
574
- if function:
575
- if not hasattr(function, "to_dict"):
576
- raise ValueError("function must specify a function runtime object")
577
- cmd += ["-r", str(function.to_dict())]
578
- elif not func_url:
579
- raise ValueError("function object or func_url must be specified")
580
-
581
- commands = commands or []
582
- if image:
583
- cmd += ["-i", image]
584
- if base_image:
585
- cmd += ["-b", base_image]
586
- if secret_name:
587
- cmd += ["--secret-name", secret_name]
588
- if with_mlrun:
589
- cmd += ["--with-mlrun"]
590
- if skip_deployed:
591
- cmd += ["--skip"]
592
- for c in commands:
593
- cmd += ["-c", c]
594
- if func_url and not function:
595
- cmd += [func_url]
596
-
597
- cop = dsl.ContainerOp(
598
- name=name,
599
- image=config.kfp_image,
600
- command=cmd,
601
- file_outputs={"state": "/tmp/state", "image": "/tmp/image"},
602
- )
603
- cop = add_default_function_resources(cop)
604
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
605
-
606
- add_annotations(cop, PipelineRunType.build, function, func_url)
607
- if config.httpdb.builder.docker_registry:
608
- cop.container.add_env_variable(
609
- k8s_client.V1EnvVar(
610
- name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY",
611
- value=config.httpdb.builder.docker_registry,
612
- )
613
- )
614
- if "IGZ_NAMESPACE_DOMAIN" in os.environ:
615
- cop.container.add_env_variable(
616
- k8s_client.V1EnvVar(
617
- name="IGZ_NAMESPACE_DOMAIN",
618
- value=os.environ.get("IGZ_NAMESPACE_DOMAIN"),
619
- )
620
- )
621
-
622
- is_v3io = function.spec.build.source and function.spec.build.source.startswith(
623
- "v3io"
624
- )
625
- if "V3IO_ACCESS_KEY" in os.environ and is_v3io:
626
- cop.container.add_env_variable(
627
- k8s_client.V1EnvVar(
628
- name="V3IO_ACCESS_KEY", value=os.environ.get("V3IO_ACCESS_KEY")
629
- )
630
- )
631
-
632
- add_default_env(k8s_client, cop)
633
-
634
- return cop
635
-
636
-
637
- def add_default_env(k8s_client, cop):
638
- cop.container.add_env_variable(
639
- k8s_client.V1EnvVar(
640
- "MLRUN_NAMESPACE",
641
- value_from=k8s_client.V1EnvVarSource(
642
- field_ref=k8s_client.V1ObjectFieldSelector(
643
- field_path="metadata.namespace"
644
- )
645
- ),
646
- )
647
- )
648
-
649
- if config.httpdb.api_url:
650
- cop.container.add_env_variable(
651
- k8s_client.V1EnvVar(name="MLRUN_DBPATH", value=config.httpdb.api_url)
652
- )
653
-
654
- if config.mpijob_crd_version:
655
- cop.container.add_env_variable(
656
- k8s_client.V1EnvVar(
657
- name="MLRUN_MPIJOB_CRD_VERSION", value=config.mpijob_crd_version
658
- )
659
- )
660
-
661
- auth_env_var = mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session
662
- if auth_env_var in os.environ or "V3IO_ACCESS_KEY" in os.environ:
663
- cop.container.add_env_variable(
664
- k8s_client.V1EnvVar(
665
- name=auth_env_var,
666
- value=os.environ.get(auth_env_var) or os.environ.get("V3IO_ACCESS_KEY"),
667
- )
668
- )
669
-
670
-
671
- def get_default_reg():
672
- if config.httpdb.builder.docker_registry:
673
- return config.httpdb.builder.docker_registry
674
- namespace_domain = os.environ.get("IGZ_NAMESPACE_DOMAIN", None)
675
- if namespace_domain is not None:
676
- return f"docker-registry.{namespace_domain}:80"
677
- return ""
678
-
679
-
680
- def add_annotations(cop, kind, function, func_url=None, project=None):
681
- if func_url and func_url.startswith("db://"):
682
- func_url = func_url[len("db://") :]
683
- cop.add_pod_annotation(run_annotation, kind)
684
- cop.add_pod_annotation(project_annotation, project or function.metadata.project)
685
- cop.add_pod_annotation(function_annotation, func_url or function.uri)
686
-
687
-
688
- def add_labels(cop, function, scrape_metrics=False):
689
- prefix = mlrun.runtimes.utils.mlrun_key
690
- cop.add_pod_label(prefix + "class", function.kind)
691
- cop.add_pod_label(prefix + "function", function.metadata.name)
692
- cop.add_pod_label(prefix + "name", cop.human_name)
693
- cop.add_pod_label(prefix + "project", function.metadata.project)
694
- cop.add_pod_label(prefix + "tag", function.metadata.tag or "latest")
695
- cop.add_pod_label(prefix + "scrape-metrics", "True" if scrape_metrics else "False")
696
-
697
-
698
- def generate_kfp_dag_and_resolve_project(run, project=None):
699
- workflow = run.get("pipeline_runtime", {}).get("workflow_manifest")
700
- if not workflow:
701
- return None, project, None
702
- workflow = json.loads(workflow)
703
-
704
- templates = {}
705
- for template in workflow["spec"]["templates"]:
706
- project = project or get_in(
707
- template, ["metadata", "annotations", project_annotation], ""
708
- )
709
- name = template["name"]
710
- templates[name] = {
711
- "run_type": get_in(
712
- template, ["metadata", "annotations", run_annotation], ""
713
- ),
714
- "function": get_in(
715
- template, ["metadata", "annotations", function_annotation], ""
716
- ),
717
- }
718
-
719
- nodes = workflow["status"].get("nodes", {})
720
- dag = {}
721
- for node in nodes.values():
722
- name = node["displayName"]
723
- record = {
724
- k: node[k] for k in ["phase", "startedAt", "finishedAt", "type", "id"]
725
- }
726
-
727
- # snake case
728
- # align kfp fields to mlrun snake case convention
729
- # create snake_case for consistency.
730
- # retain the camelCase for compatibility
731
- for key in list(record.keys()):
732
- record[inflection.underscore(key)] = record[key]
733
-
734
- record["parent"] = node.get("boundaryID", "")
735
- record["name"] = name
736
- record["children"] = node.get("children", [])
737
- if name in templates:
738
- record["function"] = templates[name].get("function")
739
- record["run_type"] = templates[name].get("run_type")
740
- dag[node["id"]] = record
741
-
742
- return dag, project, workflow["status"].get("message", "")
743
-
744
-
745
- def format_summary_from_kfp_run(
746
- kfp_run, project=None, run_db: "mlrun.db.RunDBInterface" = None
747
- ):
748
- override_project = project if project and project != "*" else None
749
- dag, project, message = generate_kfp_dag_and_resolve_project(
750
- kfp_run, override_project
751
- )
752
- run_id = get_in(kfp_run, "run.id")
753
- logger.debug("Formatting summary from KFP run", run_id=run_id, project=project)
754
-
755
- # run db parameter allows us to use the same db session for the whole flow and avoid session isolation issues
756
- if not run_db:
757
- run_db = mlrun.db.get_run_db()
758
-
759
- # enrich DAG with mlrun run info
760
- runs = run_db.list_runs(project=project, labels=f"workflow={run_id}")
761
-
762
- for run in runs:
763
- step = get_in(run, ["metadata", "labels", "mlrun/runner-pod"])
764
- if step and step in dag:
765
- dag[step]["run_uid"] = get_in(run, "metadata.uid")
766
- dag[step]["kind"] = get_in(run, "metadata.labels.kind")
767
- error = get_in(run, "status.error")
768
- if error:
769
- dag[step]["error"] = error
770
-
771
- short_run = {
772
- "graph": dag,
773
- "run": mlrun.utils.helpers.format_run(kfp_run["run"]),
774
- }
775
- short_run["run"]["project"] = project
776
- short_run["run"]["message"] = message
777
- logger.debug("Completed summary formatting", run_id=run_id, project=project)
778
- return short_run
779
-
780
-
781
- def show_kfp_run(run, clear_output=False):
782
- phase_to_color = {
783
- mlrun.run.RunStatuses.failed: "red",
784
- mlrun.run.RunStatuses.succeeded: "green",
785
- mlrun.run.RunStatuses.skipped: "white",
786
- }
787
- runtype_to_shape = {
788
- PipelineRunType.run: "ellipse",
789
- PipelineRunType.build: "box",
790
- PipelineRunType.deploy: "box3d",
791
- }
792
- if not run or "graph" not in run:
793
- return
794
- if is_ipython:
795
- try:
796
- from graphviz import Digraph
797
- except ImportError:
798
- return
799
-
800
- try:
801
- graph = run["graph"]
802
- dag = Digraph("kfp", format="svg")
803
- dag.attr(compound="true")
804
-
805
- for key, node in graph.items():
806
- if node["type"] != "DAG" or node["parent"]:
807
- shape = "ellipse"
808
- if node.get("run_type"):
809
- shape = runtype_to_shape.get(node["run_type"], None)
810
- elif node["phase"] == "Skipped" or (
811
- node["type"] == "DAG" and node["name"].startswith("condition-")
812
- ):
813
- shape = "diamond"
814
- dag.node(
815
- key,
816
- label=node["name"],
817
- fillcolor=phase_to_color.get(node["phase"], None),
818
- style="filled",
819
- shape=shape,
820
- tooltip=node.get("error", None),
821
- )
822
- for child in node.get("children") or []:
823
- dag.edge(key, child)
824
-
825
- import IPython
826
-
827
- if clear_output:
828
- IPython.display.clear_output(wait=True)
829
-
830
- run_id = run["run"]["id"]
831
- url = get_workflow_url(run["run"]["project"], run_id)
832
- href = f'<a href="{url}" target="_blank"><b>click here</b></a>'
833
- html = IPython.display.HTML(
834
- f"<div>Pipeline running (id={run_id}), {href} to view the details in MLRun UI</div>"
835
- )
836
- IPython.display.display(html, dag)
837
- except Exception as exc:
838
- logger.warning(f"failed to plot graph, {err_to_str(exc)}")
839
-
840
-
841
- def add_default_function_resources(
842
- container_op: dsl.ContainerOp,
843
- ) -> dsl.ContainerOp:
844
- default_resources = config.get_default_function_pod_resources()
845
- for resource_name, resource_value in default_resources["requests"].items():
846
- if resource_value:
847
- container_op.container.add_resource_request(resource_name, resource_value)
848
-
849
- for resource_name, resource_value in default_resources["limits"].items():
850
- if resource_value:
851
- container_op.container.add_resource_limit(resource_name, resource_value)
852
- return container_op
853
-
854
-
855
- def add_function_node_selection_attributes(
856
- function, container_op: dsl.ContainerOp
857
- ) -> dsl.ContainerOp:
858
- if not mlrun.runtimes.RuntimeKinds.is_local_runtime(function.kind):
859
- if getattr(function.spec, "node_selector"):
860
- container_op.node_selector = function.spec.node_selector
861
-
862
- if getattr(function.spec, "tolerations"):
863
- container_op.tolerations = function.spec.tolerations
864
-
865
- if getattr(function.spec, "affinity"):
866
- container_op.affinity = function.spec.affinity
867
-
868
- return container_op