mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +18 -109
  3. mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
  4. mlrun/alerts/alert.py +141 -0
  5. mlrun/artifacts/__init__.py +8 -3
  6. mlrun/artifacts/base.py +36 -253
  7. mlrun/artifacts/dataset.py +9 -190
  8. mlrun/artifacts/manager.py +20 -41
  9. mlrun/artifacts/model.py +8 -140
  10. mlrun/artifacts/plots.py +14 -375
  11. mlrun/common/schemas/__init__.py +4 -2
  12. mlrun/common/schemas/alert.py +46 -4
  13. mlrun/common/schemas/api_gateway.py +4 -0
  14. mlrun/common/schemas/artifact.py +15 -0
  15. mlrun/common/schemas/auth.py +2 -0
  16. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  17. mlrun/common/schemas/model_monitoring/constants.py +40 -4
  18. mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
  19. mlrun/common/schemas/project.py +2 -0
  20. mlrun/config.py +7 -4
  21. mlrun/data_types/to_pandas.py +4 -4
  22. mlrun/datastore/base.py +41 -9
  23. mlrun/datastore/datastore_profile.py +54 -4
  24. mlrun/datastore/inmem.py +2 -2
  25. mlrun/datastore/sources.py +43 -2
  26. mlrun/datastore/store_resources.py +2 -6
  27. mlrun/datastore/targets.py +106 -39
  28. mlrun/db/base.py +23 -3
  29. mlrun/db/httpdb.py +101 -47
  30. mlrun/db/nopdb.py +20 -2
  31. mlrun/errors.py +5 -0
  32. mlrun/feature_store/__init__.py +0 -2
  33. mlrun/feature_store/api.py +12 -47
  34. mlrun/feature_store/feature_set.py +9 -0
  35. mlrun/feature_store/retrieval/base.py +9 -4
  36. mlrun/feature_store/retrieval/conversion.py +4 -4
  37. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  38. mlrun/feature_store/retrieval/job.py +2 -0
  39. mlrun/feature_store/retrieval/local_merger.py +2 -0
  40. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  41. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  42. mlrun/launcher/base.py +4 -3
  43. mlrun/launcher/client.py +1 -1
  44. mlrun/lists.py +4 -2
  45. mlrun/model.py +25 -11
  46. mlrun/model_monitoring/__init__.py +1 -1
  47. mlrun/model_monitoring/api.py +41 -18
  48. mlrun/model_monitoring/application.py +5 -305
  49. mlrun/model_monitoring/applications/__init__.py +11 -0
  50. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  51. mlrun/model_monitoring/applications/base.py +282 -0
  52. mlrun/model_monitoring/applications/context.py +214 -0
  53. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  54. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  55. mlrun/model_monitoring/applications/results.py +99 -0
  56. mlrun/model_monitoring/controller.py +3 -1
  57. mlrun/model_monitoring/db/__init__.py +2 -0
  58. mlrun/model_monitoring/db/stores/base/store.py +9 -36
  59. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  60. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
  61. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
  62. mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
  63. mlrun/model_monitoring/db/tsdb/base.py +135 -0
  64. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  65. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  66. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
  67. mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
  68. mlrun/model_monitoring/evidently_application.py +6 -118
  69. mlrun/model_monitoring/helpers.py +1 -1
  70. mlrun/model_monitoring/model_endpoint.py +3 -2
  71. mlrun/model_monitoring/stream_processing.py +48 -213
  72. mlrun/model_monitoring/writer.py +101 -121
  73. mlrun/platforms/__init__.py +10 -9
  74. mlrun/platforms/iguazio.py +21 -202
  75. mlrun/projects/operations.py +11 -7
  76. mlrun/projects/pipelines.py +13 -76
  77. mlrun/projects/project.py +73 -45
  78. mlrun/render.py +11 -13
  79. mlrun/run.py +6 -41
  80. mlrun/runtimes/__init__.py +3 -3
  81. mlrun/runtimes/base.py +6 -6
  82. mlrun/runtimes/funcdoc.py +0 -28
  83. mlrun/runtimes/kubejob.py +2 -1
  84. mlrun/runtimes/local.py +1 -1
  85. mlrun/runtimes/mpijob/__init__.py +0 -20
  86. mlrun/runtimes/mpijob/v1.py +1 -1
  87. mlrun/runtimes/nuclio/api_gateway.py +75 -9
  88. mlrun/runtimes/nuclio/function.py +9 -35
  89. mlrun/runtimes/pod.py +16 -36
  90. mlrun/runtimes/remotesparkjob.py +1 -1
  91. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  92. mlrun/runtimes/utils.py +1 -39
  93. mlrun/utils/helpers.py +72 -71
  94. mlrun/utils/notifications/notification/base.py +1 -1
  95. mlrun/utils/notifications/notification/slack.py +12 -5
  96. mlrun/utils/notifications/notification/webhook.py +1 -1
  97. mlrun/utils/notifications/notification_pusher.py +134 -14
  98. mlrun/utils/version/version.json +2 -2
  99. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
  100. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
  101. mlrun/kfpops.py +0 -865
  102. mlrun/platforms/other.py +0 -305
  103. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  104. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
  105. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
  106. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
  107. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
mlrun/kfpops.py DELETED
@@ -1,865 +0,0 @@
1
- # Copyright 2023 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import json
15
- import os
16
- import os.path
17
- from copy import deepcopy
18
- from typing import Union
19
-
20
- import inflection
21
- from kfp import dsl
22
- from kubernetes import client as k8s_client
23
-
24
- import mlrun
25
- from mlrun.errors import err_to_str
26
-
27
- from .config import config
28
- from .model import HyperParamOptions, RunSpec
29
- from .utils import (
30
- dict_to_yaml,
31
- gen_md_table,
32
- get_artifact_target,
33
- get_in,
34
- get_workflow_url,
35
- is_ipython,
36
- is_legacy_artifact,
37
- logger,
38
- run_keys,
39
- version,
40
- )
41
-
42
- # default KFP artifacts and output (ui metadata, metrics etc.)
43
- # directories to /tmp to allow running with security context
44
- KFPMETA_DIR = "/tmp"
45
- KFP_ARTIFACTS_DIR = "/tmp"
46
-
47
- project_annotation = "mlrun/project"
48
- run_annotation = "mlrun/pipeline-step-type"
49
- function_annotation = "mlrun/function-uri"
50
-
51
- dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
52
-
53
-
54
- class PipelineRunType:
55
- run = "run"
56
- build = "build"
57
- deploy = "deploy"
58
-
59
-
60
- def is_num(v):
61
- return isinstance(v, (int, float, complex))
62
-
63
-
64
- def write_kfpmeta(struct):
65
- if "status" not in struct:
66
- return
67
-
68
- results = struct["status"].get("results", {})
69
- metrics = {
70
- "metrics": [
71
- {"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
72
- ],
73
- }
74
- with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
75
- json.dump(metrics, f)
76
-
77
- struct = deepcopy(struct)
78
- uid = struct["metadata"].get("uid")
79
- project = struct["metadata"].get("project", config.default_project)
80
- output_artifacts, out_dict = get_kfp_outputs(
81
- struct["status"].get(run_keys.artifacts, []),
82
- struct["metadata"].get("labels", {}),
83
- project,
84
- )
85
-
86
- results["run_id"] = results.get("run_id", "/".join([project, uid]))
87
- for key in struct["spec"].get(run_keys.outputs, []):
88
- val = "None"
89
- if key in out_dict:
90
- val = out_dict[key]
91
- elif key in results:
92
- val = results[key]
93
- try:
94
- # NOTE: if key has "../x", it would fail on path traversal
95
- path = os.path.join(KFP_ARTIFACTS_DIR, key)
96
- if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
97
- logger.warning(
98
- "Path traversal is not allowed ignoring", path=path, key=key
99
- )
100
- continue
101
- path = os.path.abspath(path)
102
- logger.info("Writing artifact output", path=path, val=val)
103
- with open(path, "w") as fp:
104
- fp.write(str(val))
105
- except Exception as exc:
106
- logger.warning("Failed writing to temp file. Ignoring", exc=err_to_str(exc))
107
- pass
108
-
109
- text = "# Run Report\n"
110
- if "iterations" in struct["status"]:
111
- del struct["status"]["iterations"]
112
-
113
- text += "## Metadata\n```yaml\n" + dict_to_yaml(struct) + "```\n"
114
-
115
- metadata = {"outputs": [{"type": "markdown", "storage": "inline", "source": text}]}
116
- with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
117
- json.dump(metadata, f)
118
-
119
-
120
- def get_kfp_outputs(artifacts, labels, project):
121
- outputs = []
122
- out_dict = {}
123
- for output in artifacts:
124
- if is_legacy_artifact(output):
125
- key = output["key"]
126
- # The spec in a legacy artifact is contained in the main object, so using this assignment saves us a lot
127
- # of if/else in the rest of this function.
128
- output_spec = output
129
- else:
130
- key = output.get("metadata")["key"]
131
- output_spec = output.get("spec", {})
132
-
133
- target = output_spec.get("target_path", "")
134
- target = output_spec.get("inline", target)
135
-
136
- out_dict[key] = get_artifact_target(output, project=project)
137
-
138
- if target.startswith("v3io:///"):
139
- target = target.replace("v3io:///", "http://v3io-webapi:8081/")
140
-
141
- user = labels.get("v3io_user", "") or os.environ.get("V3IO_USERNAME", "")
142
- if target.startswith("/User/"):
143
- user = user or "admin"
144
- target = "http://v3io-webapi:8081/users/" + user + target[5:]
145
-
146
- viewer = output_spec.get("viewer", "")
147
- if viewer in ["web-app", "chart"]:
148
- meta = {"type": "web-app", "source": target}
149
- outputs += [meta]
150
-
151
- elif viewer == "table":
152
- header = output_spec.get("header", None)
153
- if header and target.endswith(".csv"):
154
- meta = {
155
- "type": "table",
156
- "format": "csv",
157
- "header": header,
158
- "source": target,
159
- }
160
- outputs += [meta]
161
-
162
- elif output.get("kind") == "dataset":
163
- header = output_spec.get("header")
164
- preview = output_spec.get("preview")
165
- if preview:
166
- tbl_md = gen_md_table(header, preview)
167
- text = f"## Dataset: {key} \n\n" + tbl_md
168
- del output_spec["preview"]
169
-
170
- meta = {"type": "markdown", "storage": "inline", "source": text}
171
- outputs += [meta]
172
-
173
- return outputs, out_dict
174
-
175
-
176
- def mlrun_op(
177
- name: str = "",
178
- project: str = "",
179
- function=None,
180
- func_url=None,
181
- image: str = "",
182
- runobj=None,
183
- command: str = "",
184
- secrets: list = None,
185
- params: dict = None,
186
- job_image=None,
187
- hyperparams: dict = None,
188
- param_file: str = "",
189
- labels: dict = None,
190
- selector: str = "",
191
- inputs: dict = None,
192
- outputs: list = None,
193
- in_path: str = "",
194
- out_path: str = "",
195
- rundb: str = "",
196
- mode: str = "",
197
- handler: str = "",
198
- more_args: list = None,
199
- hyper_param_options=None,
200
- verbose=None,
201
- scrape_metrics=False,
202
- returns: list[Union[str, dict[str, str]]] = None,
203
- auto_build: bool = False,
204
- ):
205
- """mlrun KubeFlow pipelines operator, use to form pipeline steps
206
-
207
- when using kubeflow pipelines, each step is wrapped in an mlrun_op
208
- one step can pass state and data to the next step, see example below.
209
-
210
- :param name: name used for the step
211
- :param project: optional, project name
212
- :param image: optional, run container image (will be executing the step)
213
- the container should host all required packages + code
214
- for the run, alternatively user can mount packages/code via
215
- shared file volumes like v3io (see example below)
216
- :param function: optional, function object
217
- :param func_url: optional, function object url
218
- :param command: exec command (or URL for functions)
219
- :param secrets: extra secrets specs, will be injected into the runtime
220
- e.g. ['file=<filename>', 'env=ENV_KEY1,ENV_KEY2']
221
- :param params: dictionary of run parameters and values
222
- :param hyperparams: dictionary of hyper parameters and list values, each
223
- hyperparam holds a list of values, the run will be
224
- executed for every parameter combination (GridSearch)
225
- :param param_file: a csv/json file with parameter combinations, first csv row hold
226
- the parameter names, following rows hold param values
227
- :param selector: selection criteria for hyperparams e.g. "max.accuracy"
228
- :param hyper_param_options: hyper param options class, see: :py:class:`~mlrun.model.HyperParamOptions`
229
- :param labels: labels to tag the job/run with ({key:val, ..})
230
- :param inputs: dictionary of input objects + optional paths (if path is
231
- omitted the path will be the in_path/key.
232
- :param outputs: dictionary of output objects + optional paths (if path is
233
- omitted the path will be the out_path/key.
234
- :param in_path: default input path/url (prefix) for inputs
235
- :param out_path: default output path/url (prefix) for artifacts
236
- :param rundb: path for rundb (or use 'MLRUN_DBPATH' env instead)
237
- :param mode: run mode, e.g. 'pass' for using the command without mlrun wrapper
238
- :param handler code entry-point/handler name
239
- :param job_image name of the image user for the job
240
- :param verbose: add verbose prints/logs
241
- :param scrape_metrics: whether to add the `mlrun/scrape-metrics` label to this run's resources
242
- :param returns: List of configurations for how to log the returning values from the handler's run (as artifacts or
243
- results). The list's length must be equal to the amount of returning objects. A configuration may be
244
- given as:
245
-
246
- * A string of the key to use to log the returning value as result or as an artifact. To specify
247
- The artifact type, it is possible to pass a string in the following structure:
248
- "<key> : <type>". Available artifact types can be seen in `mlrun.ArtifactType`. If no artifact
249
- type is specified, the object's default artifact type will be used.
250
- * A dictionary of configurations to use when logging. Further info per object type and artifact
251
- type can be given there. The artifact key must appear in the dictionary as "key": "the_key".
252
- :param auto_build: when set to True and the function require build it will be built on the first
253
- function run, use only if you dont plan on changing the build config between runs
254
-
255
- :returns: KFP step operation
256
-
257
- Example:
258
- from kfp import dsl
259
- from mlrun import mlrun_op
260
- from mlrun.platforms import mount_v3io
261
-
262
- def mlrun_train(p1, p2):
263
- return mlrun_op('training',
264
- command = '/User/kubeflow/training.py',
265
- params = {'p1':p1, 'p2':p2},
266
- outputs = {'model.txt':'', 'dataset.csv':''},
267
- out_path ='v3io:///projects/my-proj/mlrun/{{workflow.uid}}/',
268
- rundb = '/User/kubeflow')
269
-
270
- # use data from the first step
271
- def mlrun_validate(modelfile):
272
- return mlrun_op('validation',
273
- command = '/User/kubeflow/validation.py',
274
- inputs = {'model.txt':modelfile},
275
- out_path ='v3io:///projects/my-proj/{{workflow.uid}}/',
276
- rundb = '/User/kubeflow')
277
-
278
- @dsl.pipeline(
279
- name='My MLRUN pipeline', description='Shows how to use mlrun.'
280
- )
281
- def mlrun_pipeline(
282
- p1 = 5 , p2 = '"text"'
283
- ):
284
- # run training, mount_v3io will mount "/User" into the pipeline step
285
- train = mlrun_train(p1, p2).apply(mount_v3io())
286
-
287
- # feed 1st step results into the second step
288
- validate = mlrun_validate(
289
- train.outputs['model-txt']).apply(mount_v3io())
290
-
291
- """
292
- secrets = [] if secrets is None else secrets
293
- params = {} if params is None else params
294
- hyperparams = {} if hyperparams is None else hyperparams
295
- if hyper_param_options and isinstance(hyper_param_options, dict):
296
- hyper_param_options = HyperParamOptions.from_dict(hyper_param_options)
297
- inputs = {} if inputs is None else inputs
298
- returns = [] if returns is None else returns
299
- outputs = [] if outputs is None else outputs
300
- labels = {} if labels is None else labels
301
-
302
- rundb = rundb or mlrun.db.get_or_set_dburl()
303
- cmd = [
304
- "python",
305
- "-m",
306
- "mlrun",
307
- "run",
308
- "--kfp",
309
- "--from-env",
310
- "--workflow",
311
- "{{workflow.uid}}",
312
- ]
313
- file_outputs = {}
314
-
315
- runtime = None
316
- code_env = None
317
- function_name = ""
318
- if function:
319
- if not func_url:
320
- if function.kind in ["", "local"]:
321
- image = image or function.spec.image
322
- command = command or function.spec.command
323
- more_args = more_args or function.spec.args
324
- mode = mode or function.spec.mode
325
- rundb = rundb or function.spec.rundb
326
- code_env = str(function.spec.build.functionSourceCode)
327
- else:
328
- runtime = str(function.to_dict())
329
-
330
- function_name = function.metadata.name
331
- if function.kind == "dask":
332
- image = image or function.spec.kfp_image or config.dask_kfp_image
333
-
334
- image = image or config.kfp_image
335
-
336
- if runobj:
337
- handler = handler or runobj.spec.handler_name
338
- params = params or runobj.spec.parameters
339
- hyperparams = hyperparams or runobj.spec.hyperparams
340
- param_file = (
341
- param_file
342
- or runobj.spec.param_file
343
- or runobj.spec.hyper_param_options.param_file
344
- )
345
- hyper_param_options = hyper_param_options or runobj.spec.hyper_param_options
346
- selector = (
347
- selector or runobj.spec.selector or runobj.spec.hyper_param_options.selector
348
- )
349
- inputs = inputs or runobj.spec.inputs
350
- returns = returns or runobj.spec.returns
351
- outputs = outputs or runobj.spec.outputs
352
- in_path = in_path or runobj.spec.input_path
353
- out_path = out_path or runobj.spec.output_path
354
- secrets = secrets or runobj.spec.secret_sources
355
- project = project or runobj.metadata.project
356
- labels = runobj.metadata.labels or labels
357
- verbose = verbose or runobj.spec.verbose
358
- scrape_metrics = scrape_metrics or runobj.spec.scrape_metrics
359
-
360
- outputs = RunSpec.join_outputs_and_returns(outputs=outputs, returns=returns)
361
-
362
- if not name:
363
- if not function_name:
364
- raise ValueError("name or function object must be specified")
365
- name = function_name
366
- if handler:
367
- short_name = handler
368
- for separator in ["#", "::", "."]:
369
- # drop paths, module or class name from short name
370
- if separator in short_name:
371
- short_name = short_name.split(separator)[-1]
372
- name += "-" + short_name
373
-
374
- if hyperparams or param_file:
375
- outputs.append("iteration_results")
376
- if "run_id" not in outputs:
377
- outputs.append("run_id")
378
-
379
- params = params or {}
380
- hyperparams = hyperparams or {}
381
- inputs = inputs or {}
382
- returns = returns or []
383
- secrets = secrets or []
384
-
385
- mlrun.runtimes.utils.enrich_run_labels(labels)
386
-
387
- if name:
388
- cmd += ["--name", name]
389
- if func_url:
390
- cmd += ["-f", func_url]
391
- for secret in secrets:
392
- cmd += ["-s", f"{secret['kind']}={secret['source']}"]
393
- for param, val in params.items():
394
- cmd += ["-p", f"{param}={val}"]
395
- for xpram, val in hyperparams.items():
396
- cmd += ["-x", f"{xpram}={val}"]
397
- for input_param, val in inputs.items():
398
- cmd += ["-i", f"{input_param}={val}"]
399
- for log_hint in returns:
400
- cmd += [
401
- "--returns",
402
- json.dumps(log_hint) if isinstance(log_hint, dict) else log_hint,
403
- ]
404
- for label, val in labels.items():
405
- cmd += ["--label", f"{label}={val}"]
406
- for output in outputs:
407
- cmd += ["-o", str(output)]
408
- file_outputs[output.replace(".", "_")] = (
409
- f"/tmp/{output}" # not using path.join to avoid windows "\"
410
- )
411
- if project:
412
- cmd += ["--project", project]
413
- if handler:
414
- cmd += ["--handler", handler]
415
- if runtime:
416
- cmd += ["--runtime", runtime]
417
- if in_path:
418
- cmd += ["--in-path", in_path]
419
- if out_path:
420
- cmd += ["--out-path", out_path]
421
- if param_file:
422
- cmd += ["--param-file", param_file]
423
- if hyper_param_options:
424
- cmd += ["--hyper-param-options", hyper_param_options.to_json()]
425
- if selector:
426
- cmd += ["--selector", selector]
427
- if job_image:
428
- cmd += ["--image", job_image]
429
- if mode:
430
- cmd += ["--mode", mode]
431
- if verbose:
432
- cmd += ["--verbose"]
433
- if scrape_metrics:
434
- cmd += ["--scrape-metrics"]
435
- if auto_build:
436
- cmd += ["--auto-build"]
437
- if more_args:
438
- cmd += more_args
439
-
440
- registry = get_default_reg()
441
- if image and image.startswith("."):
442
- if registry:
443
- image = f"{registry}/{image[1:]}"
444
- else:
445
- raise ValueError("local image registry env not found")
446
-
447
- image = mlrun.utils.enrich_image_url(
448
- image, mlrun.get_version(), str(version.Version().get_python_version())
449
- )
450
-
451
- cop = dsl.ContainerOp(
452
- name=name,
453
- image=image,
454
- command=cmd + [command],
455
- file_outputs=file_outputs,
456
- output_artifact_paths={
457
- "mlpipeline-ui-metadata": os.path.join(
458
- KFPMETA_DIR, "mlpipeline-ui-metadata.json"
459
- ),
460
- "mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
461
- },
462
- )
463
- cop = add_default_function_resources(cop)
464
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
465
-
466
- add_annotations(cop, PipelineRunType.run, function, func_url, project)
467
- add_labels(cop, function, scrape_metrics)
468
- if code_env:
469
- cop.container.add_env_variable(
470
- k8s_client.V1EnvVar(name="MLRUN_EXEC_CODE", value=code_env)
471
- )
472
- if registry:
473
- cop.container.add_env_variable(
474
- k8s_client.V1EnvVar(
475
- name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY", value=registry
476
- )
477
- )
478
-
479
- add_default_env(k8s_client, cop)
480
-
481
- return cop
482
-
483
-
484
- def deploy_op(
485
- name,
486
- function,
487
- func_url=None,
488
- source="",
489
- project="",
490
- models: list = None,
491
- env: dict = None,
492
- tag="",
493
- verbose=False,
494
- ):
495
- cmd = ["python", "-m", "mlrun", "deploy"]
496
- if source:
497
- cmd += ["-s", source]
498
- if tag:
499
- cmd += ["--tag", tag]
500
- if verbose:
501
- cmd += ["--verbose"]
502
- if project:
503
- cmd += ["-p", project]
504
-
505
- if models:
506
- for m in models:
507
- for key in ["key", "model_path", "model_url", "class_name", "model_url"]:
508
- if key in m:
509
- m[key] = str(m[key]) # verify we stringify pipeline params
510
- if function.kind == mlrun.runtimes.RuntimeKinds.serving:
511
- cmd += ["-m", json.dumps(m)]
512
- else:
513
- cmd += ["-m", f"{m['key']}={m['model_path']}"]
514
-
515
- if env:
516
- for key, val in env.items():
517
- cmd += ["--env", f"{key}={val}"]
518
-
519
- if func_url:
520
- cmd += ["-f", func_url]
521
- else:
522
- runtime = f"{function.to_dict()}"
523
- cmd += [runtime]
524
-
525
- cop = dsl.ContainerOp(
526
- name=name,
527
- image=config.kfp_image,
528
- command=cmd,
529
- file_outputs={"endpoint": "/tmp/output", "name": "/tmp/name"},
530
- )
531
- cop = add_default_function_resources(cop)
532
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
533
-
534
- add_annotations(cop, PipelineRunType.deploy, function, func_url)
535
- add_default_env(k8s_client, cop)
536
- return cop
537
-
538
-
539
- def add_env(env=None):
540
- """
541
- Modifier function to add env vars from dict
542
- Usage:
543
- train = train_op(...)
544
- train.apply(add_env({'MY_ENV':'123'}))
545
- """
546
-
547
- env = {} if env is None else env
548
-
549
- def _add_env(task):
550
- for k, v in env.items():
551
- task.add_env_variable(k8s_client.V1EnvVar(name=k, value=v))
552
- return task
553
-
554
- return _add_env
555
-
556
-
557
- def build_op(
558
- name,
559
- function=None,
560
- func_url=None,
561
- image=None,
562
- base_image=None,
563
- commands: list = None,
564
- secret_name="",
565
- with_mlrun=True,
566
- skip_deployed=False,
567
- ):
568
- """build Docker image."""
569
-
570
- cmd = ["python", "-m", "mlrun", "build", "--kfp"]
571
- if function:
572
- if not hasattr(function, "to_dict"):
573
- raise ValueError("function must specify a function runtime object")
574
- cmd += ["-r", str(function.to_dict())]
575
- elif not func_url:
576
- raise ValueError("function object or func_url must be specified")
577
-
578
- commands = commands or []
579
- if image:
580
- cmd += ["-i", image]
581
- if base_image:
582
- cmd += ["-b", base_image]
583
- if secret_name:
584
- cmd += ["--secret-name", secret_name]
585
- if with_mlrun:
586
- cmd += ["--with-mlrun"]
587
- if skip_deployed:
588
- cmd += ["--skip"]
589
- for c in commands:
590
- cmd += ["-c", c]
591
- if func_url and not function:
592
- cmd += [func_url]
593
-
594
- cop = dsl.ContainerOp(
595
- name=name,
596
- image=config.kfp_image,
597
- command=cmd,
598
- file_outputs={"state": "/tmp/state", "image": "/tmp/image"},
599
- )
600
- cop = add_default_function_resources(cop)
601
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
602
-
603
- add_annotations(cop, PipelineRunType.build, function, func_url)
604
- if config.httpdb.builder.docker_registry:
605
- cop.container.add_env_variable(
606
- k8s_client.V1EnvVar(
607
- name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY",
608
- value=config.httpdb.builder.docker_registry,
609
- )
610
- )
611
- if "IGZ_NAMESPACE_DOMAIN" in os.environ:
612
- cop.container.add_env_variable(
613
- k8s_client.V1EnvVar(
614
- name="IGZ_NAMESPACE_DOMAIN",
615
- value=os.environ.get("IGZ_NAMESPACE_DOMAIN"),
616
- )
617
- )
618
-
619
- is_v3io = function.spec.build.source and function.spec.build.source.startswith(
620
- "v3io"
621
- )
622
- if "V3IO_ACCESS_KEY" in os.environ and is_v3io:
623
- cop.container.add_env_variable(
624
- k8s_client.V1EnvVar(
625
- name="V3IO_ACCESS_KEY", value=os.environ.get("V3IO_ACCESS_KEY")
626
- )
627
- )
628
-
629
- add_default_env(k8s_client, cop)
630
-
631
- return cop
632
-
633
-
634
- def add_default_env(k8s_client, cop):
635
- cop.container.add_env_variable(
636
- k8s_client.V1EnvVar(
637
- "MLRUN_NAMESPACE",
638
- value_from=k8s_client.V1EnvVarSource(
639
- field_ref=k8s_client.V1ObjectFieldSelector(
640
- field_path="metadata.namespace"
641
- )
642
- ),
643
- )
644
- )
645
-
646
- if config.httpdb.api_url:
647
- cop.container.add_env_variable(
648
- k8s_client.V1EnvVar(name="MLRUN_DBPATH", value=config.httpdb.api_url)
649
- )
650
-
651
- if config.mpijob_crd_version:
652
- cop.container.add_env_variable(
653
- k8s_client.V1EnvVar(
654
- name="MLRUN_MPIJOB_CRD_VERSION", value=config.mpijob_crd_version
655
- )
656
- )
657
-
658
- auth_env_var = mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session
659
- if auth_env_var in os.environ or "V3IO_ACCESS_KEY" in os.environ:
660
- cop.container.add_env_variable(
661
- k8s_client.V1EnvVar(
662
- name=auth_env_var,
663
- value=os.environ.get(auth_env_var) or os.environ.get("V3IO_ACCESS_KEY"),
664
- )
665
- )
666
-
667
-
668
- def get_default_reg():
669
- if config.httpdb.builder.docker_registry:
670
- return config.httpdb.builder.docker_registry
671
- namespace_domain = os.environ.get("IGZ_NAMESPACE_DOMAIN", None)
672
- if namespace_domain is not None:
673
- return f"docker-registry.{namespace_domain}:80"
674
- return ""
675
-
676
-
677
- def add_annotations(cop, kind, function, func_url=None, project=None):
678
- if func_url and func_url.startswith("db://"):
679
- func_url = func_url[len("db://") :]
680
- cop.add_pod_annotation(run_annotation, kind)
681
- cop.add_pod_annotation(project_annotation, project or function.metadata.project)
682
- cop.add_pod_annotation(function_annotation, func_url or function.uri)
683
-
684
-
685
- def add_labels(cop, function, scrape_metrics=False):
686
- prefix = mlrun.runtimes.utils.mlrun_key
687
- cop.add_pod_label(prefix + "class", function.kind)
688
- cop.add_pod_label(prefix + "function", function.metadata.name)
689
- cop.add_pod_label(prefix + "name", cop.human_name)
690
- cop.add_pod_label(prefix + "project", function.metadata.project)
691
- cop.add_pod_label(prefix + "tag", function.metadata.tag or "latest")
692
- cop.add_pod_label(prefix + "scrape-metrics", "True" if scrape_metrics else "False")
693
-
694
-
695
- def generate_kfp_dag_and_resolve_project(run, project=None):
696
- workflow = run.get("pipeline_runtime", {}).get("workflow_manifest")
697
- if not workflow:
698
- return None, project, None
699
- workflow = json.loads(workflow)
700
-
701
- templates = {}
702
- for template in workflow["spec"]["templates"]:
703
- project = project or get_in(
704
- template, ["metadata", "annotations", project_annotation], ""
705
- )
706
- name = template["name"]
707
- templates[name] = {
708
- "run_type": get_in(
709
- template, ["metadata", "annotations", run_annotation], ""
710
- ),
711
- "function": get_in(
712
- template, ["metadata", "annotations", function_annotation], ""
713
- ),
714
- }
715
-
716
- nodes = workflow["status"].get("nodes", {})
717
- dag = {}
718
- for node in nodes.values():
719
- name = node["displayName"]
720
- record = {
721
- k: node[k] for k in ["phase", "startedAt", "finishedAt", "type", "id"]
722
- }
723
-
724
- # snake case
725
- # align kfp fields to mlrun snake case convention
726
- # create snake_case for consistency.
727
- # retain the camelCase for compatibility
728
- for key in list(record.keys()):
729
- record[inflection.underscore(key)] = record[key]
730
-
731
- record["parent"] = node.get("boundaryID", "")
732
- record["name"] = name
733
- record["children"] = node.get("children", [])
734
- if name in templates:
735
- record["function"] = templates[name].get("function")
736
- record["run_type"] = templates[name].get("run_type")
737
- dag[node["id"]] = record
738
-
739
- return dag, project, workflow["status"].get("message", "")
740
-
741
-
742
- def format_summary_from_kfp_run(
743
- kfp_run, project=None, run_db: "mlrun.db.RunDBInterface" = None
744
- ):
745
- override_project = project if project and project != "*" else None
746
- dag, project, message = generate_kfp_dag_and_resolve_project(
747
- kfp_run, override_project
748
- )
749
- run_id = get_in(kfp_run, "run.id")
750
- logger.debug("Formatting summary from KFP run", run_id=run_id, project=project)
751
-
752
- # run db parameter allows us to use the same db session for the whole flow and avoid session isolation issues
753
- if not run_db:
754
- run_db = mlrun.db.get_run_db()
755
-
756
- # enrich DAG with mlrun run info
757
- runs = run_db.list_runs(project=project, labels=f"workflow={run_id}")
758
-
759
- for run in runs:
760
- step = get_in(run, ["metadata", "labels", "mlrun/runner-pod"])
761
- if step and step in dag:
762
- dag[step]["run_uid"] = get_in(run, "metadata.uid")
763
- dag[step]["kind"] = get_in(run, "metadata.labels.kind")
764
- error = get_in(run, "status.error")
765
- if error:
766
- dag[step]["error"] = error
767
-
768
- short_run = {
769
- "graph": dag,
770
- "run": mlrun.utils.helpers.format_run(kfp_run["run"]),
771
- }
772
- short_run["run"]["project"] = project
773
- short_run["run"]["message"] = message
774
- logger.debug("Completed summary formatting", run_id=run_id, project=project)
775
- return short_run
776
-
777
-
778
- def show_kfp_run(run, clear_output=False):
779
- phase_to_color = {
780
- mlrun.run.RunStatuses.failed: "red",
781
- mlrun.run.RunStatuses.succeeded: "green",
782
- mlrun.run.RunStatuses.skipped: "white",
783
- }
784
- runtype_to_shape = {
785
- PipelineRunType.run: "ellipse",
786
- PipelineRunType.build: "box",
787
- PipelineRunType.deploy: "box3d",
788
- }
789
- if not run or "graph" not in run:
790
- return
791
- if is_ipython:
792
- try:
793
- from graphviz import Digraph
794
- except ImportError:
795
- return
796
-
797
- try:
798
- graph = run["graph"]
799
- dag = Digraph("kfp", format="svg")
800
- dag.attr(compound="true")
801
-
802
- for key, node in graph.items():
803
- if node["type"] != "DAG" or node["parent"]:
804
- shape = "ellipse"
805
- if node.get("run_type"):
806
- shape = runtype_to_shape.get(node["run_type"], None)
807
- elif node["phase"] == "Skipped" or (
808
- node["type"] == "DAG" and node["name"].startswith("condition-")
809
- ):
810
- shape = "diamond"
811
- dag.node(
812
- key,
813
- label=node["name"],
814
- fillcolor=phase_to_color.get(node["phase"], None),
815
- style="filled",
816
- shape=shape,
817
- tooltip=node.get("error", None),
818
- )
819
- for child in node.get("children") or []:
820
- dag.edge(key, child)
821
-
822
- import IPython
823
-
824
- if clear_output:
825
- IPython.display.clear_output(wait=True)
826
-
827
- run_id = run["run"]["id"]
828
- url = get_workflow_url(run["run"]["project"], run_id)
829
- href = f'<a href="{url}" target="_blank"><b>click here</b></a>'
830
- html = IPython.display.HTML(
831
- f"<div>Pipeline running (id={run_id}), {href} to view the details in MLRun UI</div>"
832
- )
833
- IPython.display.display(html, dag)
834
- except Exception as exc:
835
- logger.warning(f"failed to plot graph, {err_to_str(exc)}")
836
-
837
-
838
- def add_default_function_resources(
839
- container_op: dsl.ContainerOp,
840
- ) -> dsl.ContainerOp:
841
- default_resources = config.get_default_function_pod_resources()
842
- for resource_name, resource_value in default_resources["requests"].items():
843
- if resource_value:
844
- container_op.container.add_resource_request(resource_name, resource_value)
845
-
846
- for resource_name, resource_value in default_resources["limits"].items():
847
- if resource_value:
848
- container_op.container.add_resource_limit(resource_name, resource_value)
849
- return container_op
850
-
851
-
852
- def add_function_node_selection_attributes(
853
- function, container_op: dsl.ContainerOp
854
- ) -> dsl.ContainerOp:
855
- if not mlrun.runtimes.RuntimeKinds.is_local_runtime(function.kind):
856
- if getattr(function.spec, "node_selector"):
857
- container_op.node_selector = function.spec.node_selector
858
-
859
- if getattr(function.spec, "tolerations"):
860
- container_op.tolerations = function.spec.tolerations
861
-
862
- if getattr(function.spec, "affinity"):
863
- container_op.affinity = function.spec.affinity
864
-
865
- return container_op