mlrun 1.7.0rc15__py3-none-any.whl → 1.7.0rc17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (77) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +18 -4
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/artifacts/__init__.py +7 -1
  6. mlrun/artifacts/base.py +28 -3
  7. mlrun/artifacts/dataset.py +8 -0
  8. mlrun/artifacts/manager.py +18 -0
  9. mlrun/artifacts/model.py +8 -1
  10. mlrun/artifacts/plots.py +13 -0
  11. mlrun/common/schemas/__init__.py +10 -2
  12. mlrun/common/schemas/alert.py +64 -5
  13. mlrun/common/schemas/api_gateway.py +4 -0
  14. mlrun/common/schemas/artifact.py +15 -0
  15. mlrun/common/schemas/auth.py +2 -0
  16. mlrun/common/schemas/model_monitoring/__init__.py +4 -1
  17. mlrun/common/schemas/model_monitoring/constants.py +17 -1
  18. mlrun/common/schemas/model_monitoring/model_endpoints.py +60 -1
  19. mlrun/common/schemas/project.py +5 -1
  20. mlrun/config.py +11 -4
  21. mlrun/datastore/datastore_profile.py +10 -7
  22. mlrun/db/base.py +24 -4
  23. mlrun/db/httpdb.py +97 -43
  24. mlrun/db/nopdb.py +25 -4
  25. mlrun/errors.py +5 -0
  26. mlrun/launcher/base.py +3 -2
  27. mlrun/lists.py +4 -0
  28. mlrun/model.py +15 -8
  29. mlrun/model_monitoring/__init__.py +1 -1
  30. mlrun/model_monitoring/applications/_application_steps.py +1 -2
  31. mlrun/model_monitoring/applications/context.py +1 -1
  32. mlrun/model_monitoring/applications/histogram_data_drift.py +64 -38
  33. mlrun/model_monitoring/db/__init__.py +2 -0
  34. mlrun/model_monitoring/db/stores/base/store.py +9 -36
  35. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
  36. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +56 -202
  37. mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
  38. mlrun/model_monitoring/db/tsdb/base.py +135 -0
  39. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  40. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  41. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +442 -0
  42. mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
  43. mlrun/model_monitoring/stream_processing.py +46 -210
  44. mlrun/model_monitoring/writer.py +50 -100
  45. mlrun/platforms/__init__.py +10 -9
  46. mlrun/platforms/iguazio.py +19 -200
  47. mlrun/projects/operations.py +11 -7
  48. mlrun/projects/pipelines.py +13 -76
  49. mlrun/projects/project.py +62 -17
  50. mlrun/render.py +9 -3
  51. mlrun/run.py +5 -38
  52. mlrun/runtimes/__init__.py +1 -0
  53. mlrun/runtimes/base.py +3 -3
  54. mlrun/runtimes/kubejob.py +2 -1
  55. mlrun/runtimes/nuclio/api_gateway.py +163 -77
  56. mlrun/runtimes/nuclio/application/application.py +160 -7
  57. mlrun/runtimes/nuclio/function.py +25 -45
  58. mlrun/runtimes/pod.py +16 -36
  59. mlrun/runtimes/remotesparkjob.py +1 -1
  60. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  61. mlrun/runtimes/utils.py +0 -38
  62. mlrun/track/tracker.py +2 -1
  63. mlrun/utils/helpers.py +51 -31
  64. mlrun/utils/logger.py +11 -6
  65. mlrun/utils/notifications/notification/base.py +1 -1
  66. mlrun/utils/notifications/notification/slack.py +9 -4
  67. mlrun/utils/notifications/notification/webhook.py +1 -1
  68. mlrun/utils/notifications/notification_pusher.py +21 -14
  69. mlrun/utils/version/version.json +2 -2
  70. {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/METADATA +4 -3
  71. {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/RECORD +75 -69
  72. mlrun/kfpops.py +0 -860
  73. mlrun/platforms/other.py +0 -305
  74. {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/LICENSE +0 -0
  75. {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/WHEEL +0 -0
  76. {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/entry_points.txt +0 -0
  77. {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/top_level.txt +0 -0
mlrun/kfpops.py DELETED
@@ -1,860 +0,0 @@
1
- # Copyright 2023 Iguazio
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import json
15
- import os
16
- import os.path
17
- from copy import deepcopy
18
- from typing import Union
19
-
20
- import inflection
21
- from kfp import dsl
22
- from kubernetes import client as k8s_client
23
-
24
- import mlrun
25
- from mlrun.errors import err_to_str
26
-
27
- from .config import config
28
- from .model import HyperParamOptions, RunSpec
29
- from .utils import (
30
- dict_to_yaml,
31
- gen_md_table,
32
- get_artifact_target,
33
- get_in,
34
- get_workflow_url,
35
- is_ipython,
36
- logger,
37
- run_keys,
38
- version,
39
- )
40
-
41
- # default KFP artifacts and output (ui metadata, metrics etc.)
42
- # directories to /tmp to allow running with security context
43
- KFPMETA_DIR = "/tmp"
44
- KFP_ARTIFACTS_DIR = "/tmp"
45
-
46
- project_annotation = "mlrun/project"
47
- run_annotation = "mlrun/pipeline-step-type"
48
- function_annotation = "mlrun/function-uri"
49
-
50
- dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
51
-
52
-
53
- class PipelineRunType:
54
- run = "run"
55
- build = "build"
56
- deploy = "deploy"
57
-
58
-
59
- def is_num(v):
60
- return isinstance(v, (int, float, complex))
61
-
62
-
63
- def write_kfpmeta(struct):
64
- if "status" not in struct:
65
- return
66
-
67
- results = struct["status"].get("results", {})
68
- metrics = {
69
- "metrics": [
70
- {"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
71
- ],
72
- }
73
- with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
74
- json.dump(metrics, f)
75
-
76
- struct = deepcopy(struct)
77
- uid = struct["metadata"].get("uid")
78
- project = struct["metadata"].get("project", config.default_project)
79
- output_artifacts, out_dict = get_kfp_outputs(
80
- struct["status"].get(run_keys.artifacts, []),
81
- struct["metadata"].get("labels", {}),
82
- project,
83
- )
84
-
85
- results["run_id"] = results.get("run_id", "/".join([project, uid]))
86
- for key in struct["spec"].get(run_keys.outputs, []):
87
- val = "None"
88
- if key in out_dict:
89
- val = out_dict[key]
90
- elif key in results:
91
- val = results[key]
92
- try:
93
- # NOTE: if key has "../x", it would fail on path traversal
94
- path = os.path.join(KFP_ARTIFACTS_DIR, key)
95
- if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
96
- logger.warning(
97
- "Path traversal is not allowed ignoring", path=path, key=key
98
- )
99
- continue
100
- path = os.path.abspath(path)
101
- logger.info("Writing artifact output", path=path, val=val)
102
- with open(path, "w") as fp:
103
- fp.write(str(val))
104
- except Exception as exc:
105
- logger.warning("Failed writing to temp file. Ignoring", exc=err_to_str(exc))
106
- pass
107
-
108
- text = "# Run Report\n"
109
- if "iterations" in struct["status"]:
110
- del struct["status"]["iterations"]
111
-
112
- text += "## Metadata\n```yaml\n" + dict_to_yaml(struct) + "```\n"
113
-
114
- metadata = {"outputs": [{"type": "markdown", "storage": "inline", "source": text}]}
115
- with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
116
- json.dump(metadata, f)
117
-
118
-
119
- def get_kfp_outputs(artifacts, labels, project):
120
- outputs = []
121
- out_dict = {}
122
- for output in artifacts:
123
- key = output.get("metadata")["key"]
124
- output_spec = output.get("spec", {})
125
-
126
- target = output_spec.get("target_path", "")
127
- target = output_spec.get("inline", target)
128
-
129
- out_dict[key] = get_artifact_target(output, project=project)
130
-
131
- if target.startswith("v3io:///"):
132
- target = target.replace("v3io:///", "http://v3io-webapi:8081/")
133
-
134
- user = labels.get("v3io_user", "") or os.environ.get("V3IO_USERNAME", "")
135
- if target.startswith("/User/"):
136
- user = user or "admin"
137
- target = "http://v3io-webapi:8081/users/" + user + target[5:]
138
-
139
- viewer = output_spec.get("viewer", "")
140
- if viewer in ["web-app", "chart"]:
141
- meta = {"type": "web-app", "source": target}
142
- outputs += [meta]
143
-
144
- elif viewer == "table":
145
- header = output_spec.get("header", None)
146
- if header and target.endswith(".csv"):
147
- meta = {
148
- "type": "table",
149
- "format": "csv",
150
- "header": header,
151
- "source": target,
152
- }
153
- outputs += [meta]
154
-
155
- elif output.get("kind") == "dataset":
156
- header = output_spec.get("header")
157
- preview = output_spec.get("preview")
158
- if preview:
159
- tbl_md = gen_md_table(header, preview)
160
- text = f"## Dataset: {key} \n\n" + tbl_md
161
- del output_spec["preview"]
162
-
163
- meta = {"type": "markdown", "storage": "inline", "source": text}
164
- outputs += [meta]
165
-
166
- return outputs, out_dict
167
-
168
-
169
- def mlrun_op(
170
- name: str = "",
171
- project: str = "",
172
- function=None,
173
- func_url=None,
174
- image: str = "",
175
- runobj=None,
176
- command: str = "",
177
- secrets: list = None,
178
- params: dict = None,
179
- job_image=None,
180
- hyperparams: dict = None,
181
- param_file: str = "",
182
- labels: dict = None,
183
- selector: str = "",
184
- inputs: dict = None,
185
- outputs: list = None,
186
- in_path: str = "",
187
- out_path: str = "",
188
- rundb: str = "",
189
- mode: str = "",
190
- handler: str = "",
191
- more_args: list = None,
192
- hyper_param_options=None,
193
- verbose=None,
194
- scrape_metrics=False,
195
- returns: list[Union[str, dict[str, str]]] = None,
196
- auto_build: bool = False,
197
- ):
198
- """mlrun KubeFlow pipelines operator, use to form pipeline steps
199
-
200
- when using kubeflow pipelines, each step is wrapped in an mlrun_op
201
- one step can pass state and data to the next step, see example below.
202
-
203
- :param name: name used for the step
204
- :param project: optional, project name
205
- :param image: optional, run container image (will be executing the step)
206
- the container should host all required packages + code
207
- for the run, alternatively user can mount packages/code via
208
- shared file volumes like v3io (see example below)
209
- :param function: optional, function object
210
- :param func_url: optional, function object url
211
- :param command: exec command (or URL for functions)
212
- :param secrets: extra secrets specs, will be injected into the runtime
213
- e.g. ['file=<filename>', 'env=ENV_KEY1,ENV_KEY2']
214
- :param params: dictionary of run parameters and values
215
- :param hyperparams: dictionary of hyper parameters and list values, each
216
- hyperparam holds a list of values, the run will be
217
- executed for every parameter combination (GridSearch)
218
- :param param_file: a csv/json file with parameter combinations, first csv row hold
219
- the parameter names, following rows hold param values
220
- :param selector: selection criteria for hyperparams e.g. "max.accuracy"
221
- :param hyper_param_options: hyper param options class, see: :py:class:`~mlrun.model.HyperParamOptions`
222
- :param labels: labels to tag the job/run with ({key:val, ..})
223
- :param inputs: dictionary of input objects + optional paths (if path is
224
- omitted the path will be the in_path/key.
225
- :param outputs: dictionary of output objects + optional paths (if path is
226
- omitted the path will be the out_path/key.
227
- :param in_path: default input path/url (prefix) for inputs
228
- :param out_path: default output path/url (prefix) for artifacts
229
- :param rundb: path for rundb (or use 'MLRUN_DBPATH' env instead)
230
- :param mode: run mode, e.g. 'pass' for using the command without mlrun wrapper
231
- :param handler code entry-point/handler name
232
- :param job_image name of the image user for the job
233
- :param verbose: add verbose prints/logs
234
- :param scrape_metrics: whether to add the `mlrun/scrape-metrics` label to this run's resources
235
- :param returns: List of configurations for how to log the returning values from the handler's run (as artifacts or
236
- results). The list's length must be equal to the amount of returning objects. A configuration may be
237
- given as:
238
-
239
- * A string of the key to use to log the returning value as result or as an artifact. To specify
240
- The artifact type, it is possible to pass a string in the following structure:
241
- "<key> : <type>". Available artifact types can be seen in `mlrun.ArtifactType`. If no artifact
242
- type is specified, the object's default artifact type will be used.
243
- * A dictionary of configurations to use when logging. Further info per object type and artifact
244
- type can be given there. The artifact key must appear in the dictionary as "key": "the_key".
245
- :param auto_build: when set to True and the function require build it will be built on the first
246
- function run, use only if you dont plan on changing the build config between runs
247
-
248
- :returns: KFP step operation
249
-
250
- Example:
251
- from kfp import dsl
252
- from mlrun import mlrun_op
253
- from mlrun.platforms import mount_v3io
254
-
255
- def mlrun_train(p1, p2):
256
- return mlrun_op('training',
257
- command = '/User/kubeflow/training.py',
258
- params = {'p1':p1, 'p2':p2},
259
- outputs = {'model.txt':'', 'dataset.csv':''},
260
- out_path ='v3io:///projects/my-proj/mlrun/{{workflow.uid}}/',
261
- rundb = '/User/kubeflow')
262
-
263
- # use data from the first step
264
- def mlrun_validate(modelfile):
265
- return mlrun_op('validation',
266
- command = '/User/kubeflow/validation.py',
267
- inputs = {'model.txt':modelfile},
268
- out_path ='v3io:///projects/my-proj/{{workflow.uid}}/',
269
- rundb = '/User/kubeflow')
270
-
271
- @dsl.pipeline(
272
- name='My MLRUN pipeline', description='Shows how to use mlrun.'
273
- )
274
- def mlrun_pipeline(
275
- p1 = 5 , p2 = '"text"'
276
- ):
277
- # run training, mount_v3io will mount "/User" into the pipeline step
278
- train = mlrun_train(p1, p2).apply(mount_v3io())
279
-
280
- # feed 1st step results into the second step
281
- validate = mlrun_validate(
282
- train.outputs['model-txt']).apply(mount_v3io())
283
-
284
- """
285
- secrets = [] if secrets is None else secrets
286
- params = {} if params is None else params
287
- hyperparams = {} if hyperparams is None else hyperparams
288
- if hyper_param_options and isinstance(hyper_param_options, dict):
289
- hyper_param_options = HyperParamOptions.from_dict(hyper_param_options)
290
- inputs = {} if inputs is None else inputs
291
- returns = [] if returns is None else returns
292
- outputs = [] if outputs is None else outputs
293
- labels = {} if labels is None else labels
294
-
295
- rundb = rundb or mlrun.db.get_or_set_dburl()
296
- cmd = [
297
- "python",
298
- "-m",
299
- "mlrun",
300
- "run",
301
- "--kfp",
302
- "--from-env",
303
- "--workflow",
304
- "{{workflow.uid}}",
305
- ]
306
- file_outputs = {}
307
-
308
- runtime = None
309
- code_env = None
310
- function_name = ""
311
- if function:
312
- if not func_url:
313
- if function.kind in ["", "local"]:
314
- image = image or function.spec.image
315
- command = command or function.spec.command
316
- more_args = more_args or function.spec.args
317
- mode = mode or function.spec.mode
318
- rundb = rundb or function.spec.rundb
319
- code_env = str(function.spec.build.functionSourceCode)
320
- else:
321
- runtime = str(function.to_dict())
322
-
323
- function_name = function.metadata.name
324
- if function.kind == "dask":
325
- image = image or function.spec.kfp_image or config.dask_kfp_image
326
-
327
- image = image or config.kfp_image
328
-
329
- if runobj:
330
- handler = handler or runobj.spec.handler_name
331
- params = params or runobj.spec.parameters
332
- hyperparams = hyperparams or runobj.spec.hyperparams
333
- param_file = (
334
- param_file
335
- or runobj.spec.param_file
336
- or runobj.spec.hyper_param_options.param_file
337
- )
338
- hyper_param_options = hyper_param_options or runobj.spec.hyper_param_options
339
- selector = (
340
- selector or runobj.spec.selector or runobj.spec.hyper_param_options.selector
341
- )
342
- inputs = inputs or runobj.spec.inputs
343
- returns = returns or runobj.spec.returns
344
- outputs = outputs or runobj.spec.outputs
345
- in_path = in_path or runobj.spec.input_path
346
- out_path = out_path or runobj.spec.output_path
347
- secrets = secrets or runobj.spec.secret_sources
348
- project = project or runobj.metadata.project
349
- labels = runobj.metadata.labels or labels
350
- verbose = verbose or runobj.spec.verbose
351
- scrape_metrics = scrape_metrics or runobj.spec.scrape_metrics
352
-
353
- outputs = RunSpec.join_outputs_and_returns(outputs=outputs, returns=returns)
354
-
355
- if not name:
356
- if not function_name:
357
- raise ValueError("name or function object must be specified")
358
- name = function_name
359
- if handler:
360
- short_name = handler
361
- for separator in ["#", "::", "."]:
362
- # drop paths, module or class name from short name
363
- if separator in short_name:
364
- short_name = short_name.split(separator)[-1]
365
- name += "-" + short_name
366
-
367
- if hyperparams or param_file:
368
- outputs.append("iteration_results")
369
- if "run_id" not in outputs:
370
- outputs.append("run_id")
371
-
372
- params = params or {}
373
- hyperparams = hyperparams or {}
374
- inputs = inputs or {}
375
- returns = returns or []
376
- secrets = secrets or []
377
-
378
- mlrun.runtimes.utils.enrich_run_labels(labels)
379
-
380
- if name:
381
- cmd += ["--name", name]
382
- if func_url:
383
- cmd += ["-f", func_url]
384
- for secret in secrets:
385
- cmd += ["-s", f"{secret['kind']}={secret['source']}"]
386
- for param, val in params.items():
387
- cmd += ["-p", f"{param}={val}"]
388
- for xpram, val in hyperparams.items():
389
- cmd += ["-x", f"{xpram}={val}"]
390
- for input_param, val in inputs.items():
391
- cmd += ["-i", f"{input_param}={val}"]
392
- for log_hint in returns:
393
- cmd += [
394
- "--returns",
395
- json.dumps(log_hint) if isinstance(log_hint, dict) else log_hint,
396
- ]
397
- for label, val in labels.items():
398
- cmd += ["--label", f"{label}={val}"]
399
- for output in outputs:
400
- cmd += ["-o", str(output)]
401
- file_outputs[output.replace(".", "_")] = (
402
- f"/tmp/{output}" # not using path.join to avoid windows "\"
403
- )
404
- if project:
405
- cmd += ["--project", project]
406
- if handler:
407
- cmd += ["--handler", handler]
408
- if runtime:
409
- cmd += ["--runtime", runtime]
410
- if in_path:
411
- cmd += ["--in-path", in_path]
412
- if out_path:
413
- cmd += ["--out-path", out_path]
414
- if param_file:
415
- cmd += ["--param-file", param_file]
416
- if hyper_param_options:
417
- cmd += ["--hyper-param-options", hyper_param_options.to_json()]
418
- if selector:
419
- cmd += ["--selector", selector]
420
- if job_image:
421
- cmd += ["--image", job_image]
422
- if mode:
423
- cmd += ["--mode", mode]
424
- if verbose:
425
- cmd += ["--verbose"]
426
- if scrape_metrics:
427
- cmd += ["--scrape-metrics"]
428
- if auto_build:
429
- cmd += ["--auto-build"]
430
- if more_args:
431
- cmd += more_args
432
-
433
- registry = get_default_reg()
434
- if image and image.startswith("."):
435
- if registry:
436
- image = f"{registry}/{image[1:]}"
437
- else:
438
- raise ValueError("local image registry env not found")
439
-
440
- image = mlrun.utils.enrich_image_url(
441
- image, mlrun.get_version(), str(version.Version().get_python_version())
442
- )
443
-
444
- cop = dsl.ContainerOp(
445
- name=name,
446
- image=image,
447
- command=cmd + [command],
448
- file_outputs=file_outputs,
449
- output_artifact_paths={
450
- "mlpipeline-ui-metadata": os.path.join(
451
- KFPMETA_DIR, "mlpipeline-ui-metadata.json"
452
- ),
453
- "mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
454
- },
455
- )
456
- cop = add_default_function_resources(cop)
457
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
458
-
459
- add_annotations(cop, PipelineRunType.run, function, func_url, project)
460
- add_labels(cop, function, scrape_metrics)
461
- if code_env:
462
- cop.container.add_env_variable(
463
- k8s_client.V1EnvVar(name="MLRUN_EXEC_CODE", value=code_env)
464
- )
465
- if registry:
466
- cop.container.add_env_variable(
467
- k8s_client.V1EnvVar(
468
- name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY", value=registry
469
- )
470
- )
471
-
472
- add_default_env(k8s_client, cop)
473
-
474
- return cop
475
-
476
-
477
- def deploy_op(
478
- name,
479
- function,
480
- func_url=None,
481
- source="",
482
- project="",
483
- models: list = None,
484
- env: dict = None,
485
- tag="",
486
- verbose=False,
487
- ):
488
- cmd = ["python", "-m", "mlrun", "deploy"]
489
- if source:
490
- cmd += ["-s", source]
491
- if tag:
492
- cmd += ["--tag", tag]
493
- if verbose:
494
- cmd += ["--verbose"]
495
- if project:
496
- cmd += ["-p", project]
497
-
498
- if models:
499
- for m in models:
500
- for key in ["key", "model_path", "model_url", "class_name", "model_url"]:
501
- if key in m:
502
- m[key] = str(m[key]) # verify we stringify pipeline params
503
- if function.kind == mlrun.runtimes.RuntimeKinds.serving:
504
- cmd += ["-m", json.dumps(m)]
505
- else:
506
- cmd += ["-m", f"{m['key']}={m['model_path']}"]
507
-
508
- if env:
509
- for key, val in env.items():
510
- cmd += ["--env", f"{key}={val}"]
511
-
512
- if func_url:
513
- cmd += ["-f", func_url]
514
- else:
515
- runtime = f"{function.to_dict()}"
516
- cmd += [runtime]
517
-
518
- cop = dsl.ContainerOp(
519
- name=name,
520
- image=config.kfp_image,
521
- command=cmd,
522
- file_outputs={"endpoint": "/tmp/output", "name": "/tmp/name"},
523
- )
524
- cop = add_default_function_resources(cop)
525
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
526
-
527
- add_annotations(cop, PipelineRunType.deploy, function, func_url)
528
- add_default_env(k8s_client, cop)
529
- return cop
530
-
531
-
532
- def add_env(env=None):
533
- """
534
- Modifier function to add env vars from dict
535
- Usage:
536
- train = train_op(...)
537
- train.apply(add_env({'MY_ENV':'123'}))
538
- """
539
-
540
- env = {} if env is None else env
541
-
542
- def _add_env(task):
543
- for k, v in env.items():
544
- task.add_env_variable(k8s_client.V1EnvVar(name=k, value=v))
545
- return task
546
-
547
- return _add_env
548
-
549
-
550
- def build_op(
551
- name,
552
- function=None,
553
- func_url=None,
554
- image=None,
555
- base_image=None,
556
- commands: list = None,
557
- secret_name="",
558
- with_mlrun=True,
559
- skip_deployed=False,
560
- ):
561
- """build Docker image."""
562
-
563
- cmd = ["python", "-m", "mlrun", "build", "--kfp"]
564
- if function:
565
- if not hasattr(function, "to_dict"):
566
- raise ValueError("function must specify a function runtime object")
567
- cmd += ["-r", str(function.to_dict())]
568
- elif not func_url:
569
- raise ValueError("function object or func_url must be specified")
570
-
571
- commands = commands or []
572
- if image:
573
- cmd += ["-i", image]
574
- if base_image:
575
- cmd += ["-b", base_image]
576
- if secret_name:
577
- cmd += ["--secret-name", secret_name]
578
- if with_mlrun:
579
- cmd += ["--with-mlrun"]
580
- if skip_deployed:
581
- cmd += ["--skip"]
582
- for c in commands:
583
- cmd += ["-c", c]
584
- if func_url and not function:
585
- cmd += [func_url]
586
-
587
- cop = dsl.ContainerOp(
588
- name=name,
589
- image=config.kfp_image,
590
- command=cmd,
591
- file_outputs={"state": "/tmp/state", "image": "/tmp/image"},
592
- )
593
- cop = add_default_function_resources(cop)
594
- cop = add_function_node_selection_attributes(container_op=cop, function=function)
595
-
596
- add_annotations(cop, PipelineRunType.build, function, func_url)
597
- if config.httpdb.builder.docker_registry:
598
- cop.container.add_env_variable(
599
- k8s_client.V1EnvVar(
600
- name="MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY",
601
- value=config.httpdb.builder.docker_registry,
602
- )
603
- )
604
- if "IGZ_NAMESPACE_DOMAIN" in os.environ:
605
- cop.container.add_env_variable(
606
- k8s_client.V1EnvVar(
607
- name="IGZ_NAMESPACE_DOMAIN",
608
- value=os.environ.get("IGZ_NAMESPACE_DOMAIN"),
609
- )
610
- )
611
-
612
- is_v3io = function.spec.build.source and function.spec.build.source.startswith(
613
- "v3io"
614
- )
615
- if "V3IO_ACCESS_KEY" in os.environ and is_v3io:
616
- cop.container.add_env_variable(
617
- k8s_client.V1EnvVar(
618
- name="V3IO_ACCESS_KEY", value=os.environ.get("V3IO_ACCESS_KEY")
619
- )
620
- )
621
-
622
- add_default_env(k8s_client, cop)
623
-
624
- return cop
625
-
626
-
627
- def add_default_env(k8s_client, cop):
628
- cop.container.add_env_variable(
629
- k8s_client.V1EnvVar(
630
- "MLRUN_NAMESPACE",
631
- value_from=k8s_client.V1EnvVarSource(
632
- field_ref=k8s_client.V1ObjectFieldSelector(
633
- field_path="metadata.namespace"
634
- )
635
- ),
636
- )
637
- )
638
-
639
- if config.httpdb.api_url:
640
- cop.container.add_env_variable(
641
- k8s_client.V1EnvVar(name="MLRUN_DBPATH", value=config.httpdb.api_url)
642
- )
643
-
644
- if config.mpijob_crd_version:
645
- cop.container.add_env_variable(
646
- k8s_client.V1EnvVar(
647
- name="MLRUN_MPIJOB_CRD_VERSION", value=config.mpijob_crd_version
648
- )
649
- )
650
-
651
- auth_env_var = (
652
- mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session
653
- )
654
- if auth_env_var in os.environ or "V3IO_ACCESS_KEY" in os.environ:
655
- cop.container.add_env_variable(
656
- k8s_client.V1EnvVar(
657
- name=auth_env_var,
658
- value=os.environ.get(auth_env_var) or os.environ.get("V3IO_ACCESS_KEY"),
659
- )
660
- )
661
-
662
-
663
- def get_default_reg():
664
- if config.httpdb.builder.docker_registry:
665
- return config.httpdb.builder.docker_registry
666
- namespace_domain = os.environ.get("IGZ_NAMESPACE_DOMAIN", None)
667
- if namespace_domain is not None:
668
- return f"docker-registry.{namespace_domain}:80"
669
- return ""
670
-
671
-
672
- def add_annotations(cop, kind, function, func_url=None, project=None):
673
- if func_url and func_url.startswith("db://"):
674
- func_url = func_url[len("db://") :]
675
- cop.add_pod_annotation(run_annotation, kind)
676
- cop.add_pod_annotation(project_annotation, project or function.metadata.project)
677
- cop.add_pod_annotation(function_annotation, func_url or function.uri)
678
-
679
-
680
- def add_labels(cop, function, scrape_metrics=False):
681
- prefix = mlrun.runtimes.utils.mlrun_key
682
- cop.add_pod_label(prefix + "class", function.kind)
683
- cop.add_pod_label(prefix + "function", function.metadata.name)
684
- cop.add_pod_label(prefix + "name", cop.human_name)
685
- cop.add_pod_label(prefix + "project", function.metadata.project)
686
- cop.add_pod_label(prefix + "tag", function.metadata.tag or "latest")
687
- cop.add_pod_label(prefix + "scrape-metrics", "True" if scrape_metrics else "False")
688
-
689
-
690
- def generate_kfp_dag_and_resolve_project(run, project=None):
691
- workflow = run.get("pipeline_runtime", {}).get("workflow_manifest")
692
- if not workflow:
693
- return None, project, None
694
- workflow = json.loads(workflow)
695
-
696
- templates = {}
697
- for template in workflow["spec"]["templates"]:
698
- project = project or get_in(
699
- template, ["metadata", "annotations", project_annotation], ""
700
- )
701
- name = template["name"]
702
- templates[name] = {
703
- "run_type": get_in(
704
- template, ["metadata", "annotations", run_annotation], ""
705
- ),
706
- "function": get_in(
707
- template, ["metadata", "annotations", function_annotation], ""
708
- ),
709
- }
710
-
711
- nodes = workflow["status"].get("nodes", {})
712
- dag = {}
713
- for node in nodes.values():
714
- name = node["displayName"]
715
- record = {
716
- k: node[k] for k in ["phase", "startedAt", "finishedAt", "type", "id"]
717
- }
718
-
719
- # snake case
720
- # align kfp fields to mlrun snake case convention
721
- # create snake_case for consistency.
722
- # retain the camelCase for compatibility
723
- for key in list(record.keys()):
724
- record[inflection.underscore(key)] = record[key]
725
-
726
- record["parent"] = node.get("boundaryID", "")
727
- record["name"] = name
728
- record["children"] = node.get("children", [])
729
- if name in templates:
730
- record["function"] = templates[name].get("function")
731
- record["run_type"] = templates[name].get("run_type")
732
- dag[node["id"]] = record
733
-
734
- return dag, project, workflow["status"].get("message", "")
735
-
736
-
737
- def format_summary_from_kfp_run(
738
- kfp_run, project=None, run_db: "mlrun.db.RunDBInterface" = None
739
- ):
740
- override_project = project if project and project != "*" else None
741
- dag, project, message = generate_kfp_dag_and_resolve_project(
742
- kfp_run, override_project
743
- )
744
- run_id = get_in(kfp_run, "run.id")
745
- logger.debug("Formatting summary from KFP run", run_id=run_id, project=project)
746
-
747
- # run db parameter allows us to use the same db session for the whole flow and avoid session isolation issues
748
- if not run_db:
749
- run_db = mlrun.db.get_run_db()
750
-
751
- # enrich DAG with mlrun run info
752
- runs = run_db.list_runs(project=project, labels=f"workflow={run_id}")
753
-
754
- for run in runs:
755
- step = get_in(run, ["metadata", "labels", "mlrun/runner-pod"])
756
- if step and step in dag:
757
- dag[step]["run_uid"] = get_in(run, "metadata.uid")
758
- dag[step]["kind"] = get_in(run, "metadata.labels.kind")
759
- error = get_in(run, "status.error")
760
- if error:
761
- dag[step]["error"] = error
762
-
763
- short_run = {
764
- "graph": dag,
765
- "run": mlrun.utils.helpers.format_run(kfp_run["run"]),
766
- }
767
- short_run["run"]["project"] = project
768
- short_run["run"]["message"] = message
769
- logger.debug("Completed summary formatting", run_id=run_id, project=project)
770
- return short_run
771
-
772
-
773
- def show_kfp_run(run, clear_output=False):
774
- phase_to_color = {
775
- mlrun.run.RunStatuses.failed: "red",
776
- mlrun.run.RunStatuses.succeeded: "green",
777
- mlrun.run.RunStatuses.skipped: "white",
778
- }
779
- runtype_to_shape = {
780
- PipelineRunType.run: "ellipse",
781
- PipelineRunType.build: "box",
782
- PipelineRunType.deploy: "box3d",
783
- }
784
- if not run or "graph" not in run:
785
- return
786
- if is_ipython:
787
- try:
788
- from graphviz import Digraph
789
- except ImportError:
790
- return
791
-
792
- try:
793
- graph = run["graph"]
794
- dag = Digraph("kfp", format="svg")
795
- dag.attr(compound="true")
796
-
797
- for key, node in graph.items():
798
- if node["type"] != "DAG" or node["parent"]:
799
- shape = "ellipse"
800
- if node.get("run_type"):
801
- shape = runtype_to_shape.get(node["run_type"], None)
802
- elif node["phase"] == "Skipped" or (
803
- node["type"] == "DAG" and node["name"].startswith("condition-")
804
- ):
805
- shape = "diamond"
806
- dag.node(
807
- key,
808
- label=node["name"],
809
- fillcolor=phase_to_color.get(node["phase"], None),
810
- style="filled",
811
- shape=shape,
812
- tooltip=node.get("error", None),
813
- )
814
- for child in node.get("children") or []:
815
- dag.edge(key, child)
816
-
817
- import IPython
818
-
819
- if clear_output:
820
- IPython.display.clear_output(wait=True)
821
-
822
- run_id = run["run"]["id"]
823
- url = get_workflow_url(run["run"]["project"], run_id)
824
- href = f'<a href="{url}" target="_blank"><b>click here</b></a>'
825
- html = IPython.display.HTML(
826
- f"<div>Pipeline running (id={run_id}), {href} to view the details in MLRun UI</div>"
827
- )
828
- IPython.display.display(html, dag)
829
- except Exception as exc:
830
- logger.warning(f"failed to plot graph, {err_to_str(exc)}")
831
-
832
-
833
- def add_default_function_resources(
834
- container_op: dsl.ContainerOp,
835
- ) -> dsl.ContainerOp:
836
- default_resources = config.get_default_function_pod_resources()
837
- for resource_name, resource_value in default_resources["requests"].items():
838
- if resource_value:
839
- container_op.container.add_resource_request(resource_name, resource_value)
840
-
841
- for resource_name, resource_value in default_resources["limits"].items():
842
- if resource_value:
843
- container_op.container.add_resource_limit(resource_name, resource_value)
844
- return container_op
845
-
846
-
847
- def add_function_node_selection_attributes(
848
- function, container_op: dsl.ContainerOp
849
- ) -> dsl.ContainerOp:
850
- if not mlrun.runtimes.RuntimeKinds.is_local_runtime(function.kind):
851
- if getattr(function.spec, "node_selector"):
852
- container_op.node_selector = function.spec.node_selector
853
-
854
- if getattr(function.spec, "tolerations"):
855
- container_op.tolerations = function.spec.tolerations
856
-
857
- if getattr(function.spec, "affinity"):
858
- container_op.affinity = function.spec.affinity
859
-
860
- return container_op