mlrun 1.7.0rc3__py3-none-any.whl → 1.7.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (76) hide show
  1. mlrun/artifacts/manager.py +6 -1
  2. mlrun/common/constants.py +2 -0
  3. mlrun/common/model_monitoring/helpers.py +12 -6
  4. mlrun/common/schemas/__init__.py +11 -0
  5. mlrun/common/schemas/api_gateway.py +85 -0
  6. mlrun/common/schemas/auth.py +2 -2
  7. mlrun/common/schemas/client_spec.py +1 -0
  8. mlrun/common/schemas/common.py +40 -0
  9. mlrun/common/schemas/model_monitoring/constants.py +4 -1
  10. mlrun/common/schemas/project.py +2 -0
  11. mlrun/config.py +31 -17
  12. mlrun/datastore/azure_blob.py +22 -9
  13. mlrun/datastore/base.py +15 -25
  14. mlrun/datastore/datastore.py +19 -8
  15. mlrun/datastore/datastore_profile.py +47 -5
  16. mlrun/datastore/google_cloud_storage.py +10 -6
  17. mlrun/datastore/hdfs.py +51 -0
  18. mlrun/datastore/redis.py +4 -0
  19. mlrun/datastore/s3.py +4 -0
  20. mlrun/datastore/sources.py +29 -43
  21. mlrun/datastore/targets.py +59 -53
  22. mlrun/datastore/utils.py +2 -49
  23. mlrun/datastore/v3io.py +4 -0
  24. mlrun/db/base.py +50 -0
  25. mlrun/db/httpdb.py +121 -50
  26. mlrun/db/nopdb.py +13 -0
  27. mlrun/execution.py +3 -3
  28. mlrun/feature_store/feature_vector.py +2 -2
  29. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
  30. mlrun/frameworks/tf_keras/model_handler.py +7 -7
  31. mlrun/k8s_utils.py +10 -5
  32. mlrun/kfpops.py +19 -10
  33. mlrun/model.py +5 -0
  34. mlrun/model_monitoring/api.py +3 -3
  35. mlrun/model_monitoring/application.py +1 -1
  36. mlrun/model_monitoring/applications/__init__.py +13 -0
  37. mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
  38. mlrun/model_monitoring/batch.py +9 -111
  39. mlrun/model_monitoring/controller.py +73 -55
  40. mlrun/model_monitoring/controller_handler.py +13 -5
  41. mlrun/model_monitoring/features_drift_table.py +62 -53
  42. mlrun/model_monitoring/helpers.py +30 -21
  43. mlrun/model_monitoring/metrics/__init__.py +13 -0
  44. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  45. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
  46. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
  47. mlrun/package/packagers/pandas_packagers.py +3 -3
  48. mlrun/package/utils/_archiver.py +3 -1
  49. mlrun/platforms/iguazio.py +8 -65
  50. mlrun/projects/pipelines.py +21 -11
  51. mlrun/projects/project.py +180 -42
  52. mlrun/run.py +1 -1
  53. mlrun/runtimes/base.py +25 -2
  54. mlrun/runtimes/kubejob.py +5 -3
  55. mlrun/runtimes/local.py +2 -2
  56. mlrun/runtimes/mpijob/abstract.py +6 -6
  57. mlrun/runtimes/nuclio/__init__.py +1 -0
  58. mlrun/runtimes/nuclio/api_gateway.py +300 -0
  59. mlrun/runtimes/nuclio/function.py +9 -9
  60. mlrun/runtimes/nuclio/serving.py +3 -3
  61. mlrun/runtimes/pod.py +3 -3
  62. mlrun/runtimes/sparkjob/spark3job.py +3 -3
  63. mlrun/serving/remote.py +4 -2
  64. mlrun/serving/server.py +2 -8
  65. mlrun/utils/async_http.py +3 -3
  66. mlrun/utils/helpers.py +27 -5
  67. mlrun/utils/http.py +3 -3
  68. mlrun/utils/logger.py +2 -2
  69. mlrun/utils/notifications/notification_pusher.py +6 -6
  70. mlrun/utils/version/version.json +2 -2
  71. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/METADATA +13 -16
  72. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/RECORD +76 -68
  73. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/WHEEL +1 -1
  74. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/LICENSE +0 -0
  75. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/entry_points.txt +0 -0
  76. {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/top_level.txt +0 -0
mlrun/db/base.py CHANGED
@@ -616,6 +616,22 @@ class RunDBInterface(ABC):
616
616
  ):
617
617
  pass
618
618
 
619
+ @abstractmethod
620
+ def store_api_gateway(
621
+ self,
622
+ project: str,
623
+ api_gateway: mlrun.common.schemas.APIGateway,
624
+ ):
625
+ pass
626
+
627
+ @abstractmethod
628
+ def list_api_gateways(self, project=None) -> mlrun.common.schemas.APIGatewaysOutput:
629
+ pass
630
+
631
+ @abstractmethod
632
+ def get_api_gateway(self, name, project=None) -> mlrun.common.schemas.APIGateway:
633
+ pass
634
+
619
635
  def get_builder_status(
620
636
  self,
621
637
  func: "mlrun.runtimes.BaseRuntime",
@@ -676,3 +692,37 @@ class RunDBInterface(ABC):
676
692
  self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
677
693
  ):
678
694
  pass
695
+
696
+ def submit_workflow(
697
+ self,
698
+ project: str,
699
+ name: str,
700
+ workflow_spec: Union[
701
+ "mlrun.projects.pipelines.WorkflowSpec",
702
+ "mlrun.common.schemas.WorkflowSpec",
703
+ dict,
704
+ ],
705
+ arguments: Optional[dict] = None,
706
+ artifact_path: Optional[str] = None,
707
+ source: Optional[str] = None,
708
+ run_name: Optional[str] = None,
709
+ namespace: Optional[str] = None,
710
+ notifications: list["mlrun.model.Notification"] = None,
711
+ ) -> "mlrun.common.schemas.WorkflowResponse":
712
+ pass
713
+
714
+ def update_model_monitoring_controller(
715
+ self,
716
+ project: str,
717
+ base_period: int = 10,
718
+ image: str = "mlrun/mlrun",
719
+ ):
720
+ pass
721
+
722
+ def enable_model_monitoring(
723
+ self,
724
+ project: str,
725
+ base_period: int = 10,
726
+ image: str = "mlrun/mlrun",
727
+ ):
728
+ pass
mlrun/db/httpdb.py CHANGED
@@ -33,6 +33,7 @@ import mlrun.common.schemas
33
33
  import mlrun.model_monitoring.model_endpoint
34
34
  import mlrun.platforms
35
35
  import mlrun.projects
36
+ import mlrun.runtimes.nuclio.api_gateway
36
37
  from mlrun.errors import MLRunInvalidArgumentError, err_to_str
37
38
 
38
39
  from ..artifacts import Artifact
@@ -152,7 +153,7 @@ class HTTPRunDB(RunDBInterface):
152
153
  @staticmethod
153
154
  def get_api_path_prefix(version: str = None) -> str:
154
155
  """
155
- :param version: API version to use, None (the default) will mean to use the default value from mlconf,
156
+ :param version: API version to use, None (the default) will mean to use the default value from mlrun.config,
156
157
  for un-versioned api set an empty string.
157
158
  """
158
159
  if version is not None:
@@ -250,7 +251,11 @@ class HTTPRunDB(RunDBInterface):
250
251
 
251
252
  try:
252
253
  response = self.session.request(
253
- method, url, timeout=timeout, verify=False, **kw
254
+ method,
255
+ url,
256
+ timeout=timeout,
257
+ verify=config.httpdb.http.verify,
258
+ **kw,
254
259
  )
255
260
  except requests.RequestException as exc:
256
261
  error = f"{err_to_str(exc)}: {error}" if error else err_to_str(exc)
@@ -302,11 +307,11 @@ class HTTPRunDB(RunDBInterface):
302
307
 
303
308
  def connect(self, secrets=None):
304
309
  """Connect to the MLRun API server. Must be called prior to executing any other method.
305
- The code utilizes the URL for the API server from the configuration - ``mlconf.dbpath``.
310
+ The code utilizes the URL for the API server from the configuration - ``config.dbpath``.
306
311
 
307
312
  For example::
308
313
 
309
- mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'
314
+ config.dbpath = config.dbpath or 'http://mlrun-api:8080'
310
315
  db = get_run_db().connect()
311
316
  """
312
317
  # hack to allow unit tests to instantiate HTTPRunDB without a real server behind
@@ -454,6 +459,10 @@ class HTTPRunDB(RunDBInterface):
454
459
  setattr(
455
460
  config.feature_store.data_prefixes, prefix, server_prefix_value
456
461
  )
462
+ config.feature_store.default_targets = (
463
+ server_cfg.get("feature_store_default_targets")
464
+ or config.feature_store.default_targets
465
+ )
457
466
 
458
467
  except Exception as exc:
459
468
  logger.warning(
@@ -500,7 +509,7 @@ class HTTPRunDB(RunDBInterface):
500
509
  if offset < 0:
501
510
  raise MLRunInvalidArgumentError("Offset cannot be negative")
502
511
  if size is None:
503
- size = int(mlrun.mlconf.httpdb.logs.pull_logs_default_size_limit)
512
+ size = int(config.httpdb.logs.pull_logs_default_size_limit)
504
513
  elif size == -1:
505
514
  logger.warning(
506
515
  "Retrieving all logs. This may be inefficient and can result in a large log."
@@ -546,25 +555,23 @@ class HTTPRunDB(RunDBInterface):
546
555
 
547
556
  state, text = self.get_log(uid, project, offset=offset)
548
557
  if text:
549
- print(text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors))
558
+ print(text.decode(errors=config.httpdb.logs.decode.errors))
550
559
  nil_resp = 0
551
560
  while True:
552
561
  offset += len(text)
553
562
  # if we get 3 nil responses in a row, increase the sleep time to 10 seconds
554
563
  # TODO: refactor this to use a conditional backoff mechanism
555
564
  if nil_resp < 3:
556
- time.sleep(int(mlrun.mlconf.httpdb.logs.pull_logs_default_interval))
565
+ time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
557
566
  else:
558
567
  time.sleep(
559
- int(
560
- mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
561
- )
568
+ int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
562
569
  )
563
570
  state, text = self.get_log(uid, project, offset=offset)
564
571
  if text:
565
572
  nil_resp = 0
566
573
  print(
567
- text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors),
574
+ text.decode(errors=config.httpdb.logs.decode.errors),
568
575
  end="",
569
576
  )
570
577
  else:
@@ -1135,17 +1142,17 @@ class HTTPRunDB(RunDBInterface):
1135
1142
  structured_dict = {}
1136
1143
  for project, job_runtime_resources_map in response.json().items():
1137
1144
  for job_id, runtime_resources in job_runtime_resources_map.items():
1138
- structured_dict.setdefault(project, {})[
1139
- job_id
1140
- ] = mlrun.common.schemas.RuntimeResources(**runtime_resources)
1145
+ structured_dict.setdefault(project, {})[job_id] = (
1146
+ mlrun.common.schemas.RuntimeResources(**runtime_resources)
1147
+ )
1141
1148
  return structured_dict
1142
1149
  elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
1143
1150
  structured_dict = {}
1144
1151
  for project, kind_runtime_resources_map in response.json().items():
1145
1152
  for kind, runtime_resources in kind_runtime_resources_map.items():
1146
- structured_dict.setdefault(project, {})[
1147
- kind
1148
- ] = mlrun.common.schemas.RuntimeResources(**runtime_resources)
1153
+ structured_dict.setdefault(project, {})[kind] = (
1154
+ mlrun.common.schemas.RuntimeResources(**runtime_resources)
1155
+ )
1149
1156
  return structured_dict
1150
1157
  else:
1151
1158
  raise NotImplementedError(
@@ -1173,7 +1180,8 @@ class HTTPRunDB(RunDBInterface):
1173
1180
  :param force: Force deletion - delete the runtime resource even if it's not in terminal state or if the grace
1174
1181
  period didn't pass.
1175
1182
  :param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
1176
- the moment they moved to terminal state (defaults to mlrun.mlconf.runtime_resources_deletion_grace_period).
1183
+ the moment they moved to terminal state
1184
+ (defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
1177
1185
 
1178
1186
  :returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
1179
1187
  that were removed.
@@ -1203,9 +1211,9 @@ class HTTPRunDB(RunDBInterface):
1203
1211
  structured_dict = {}
1204
1212
  for project, kind_runtime_resources_map in response.json().items():
1205
1213
  for kind, runtime_resources in kind_runtime_resources_map.items():
1206
- structured_dict.setdefault(project, {})[
1207
- kind
1208
- ] = mlrun.common.schemas.RuntimeResources(**runtime_resources)
1214
+ structured_dict.setdefault(project, {})[kind] = (
1215
+ mlrun.common.schemas.RuntimeResources(**runtime_resources)
1216
+ )
1209
1217
  return structured_dict
1210
1218
 
1211
1219
  def create_schedule(
@@ -1340,7 +1348,7 @@ class HTTPRunDB(RunDBInterface):
1340
1348
  logger.warning(
1341
1349
  "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
1342
1350
  "keys. Only the permissions granted to the platform's configured secret will take affect "
1343
- "(see mlrun.mlconf.httpdb.builder.docker_registry_secret). "
1351
+ "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
1344
1352
  "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
1345
1353
  source=func.spec.build.source,
1346
1354
  load_source_on_run=func.spec.build.load_source_on_run,
@@ -1495,7 +1503,7 @@ class HTTPRunDB(RunDBInterface):
1495
1503
  Retrieve updated information on project background tasks being executed.
1496
1504
  If no filter is provided, will return background tasks from the last week.
1497
1505
 
1498
- :param project: Project name (defaults to mlrun.mlconf.default_project).
1506
+ :param project: Project name (defaults to mlrun.config.config.default_project).
1499
1507
  :param state: List only background tasks whose state is specified.
1500
1508
  :param created_from: Filter by background task created time in ``[created_from, created_to]``.
1501
1509
  :param created_to: Filter by background task created time in ``[created_from, created_to]``.
@@ -3071,36 +3079,58 @@ class HTTPRunDB(RunDBInterface):
3071
3079
  resp = self.api_call(method="POST", path=path, params=params)
3072
3080
  return resp.json()["func"]
3073
3081
 
3074
- def create_model_monitoring_controller(
3082
+ def update_model_monitoring_controller(
3075
3083
  self,
3076
- project: str = "",
3077
- default_controller_image: str = "mlrun/mlrun",
3084
+ project: str,
3078
3085
  base_period: int = 10,
3086
+ image: str = "mlrun/mlrun",
3079
3087
  ):
3080
3088
  """
3081
- Submit model monitoring application controller job along with deploying the model monitoring writer function.
3082
- While the main goal of the controller job is to handle the monitoring processing and triggering applications,
3083
- the goal of the model monitoring writer function is to write all the monitoring application results to the
3084
- databases. Note that the default scheduling policy of the controller job is to run every 10 min.
3089
+ Redeploy model monitoring application controller function.
3085
3090
 
3086
3091
  :param project: Project name.
3087
- :param default_controller_image: The default image of the model monitoring controller job. Note that the writer
3088
- function, which is a real time nuclio functino, will be deployed with the same
3089
- image. By default, the image is mlrun/mlrun.
3090
- :param base_period: Minutes to determine the frequency in which the model monitoring controller job
3091
- is running. By default, the base period is 5 minutes.
3092
- :returns: model monitoring controller job as a dictionary. You can easily convert the returned function into a
3093
- runtime object by calling ~mlrun.new_function.
3092
+ :param base_period: The time period in minutes in which the model monitoring controller function
3093
+ triggers. By default, the base period is 10 minutes.
3094
+ :param image: The image of the model monitoring controller function.
3095
+ By default, the image is mlrun/mlrun.
3094
3096
  """
3095
3097
 
3096
3098
  params = {
3097
- "default_controller_image": default_controller_image,
3099
+ "image": image,
3098
3100
  "base_period": base_period,
3099
3101
  }
3100
- path = f"projects/{project}/jobs/model-monitoring-controller"
3102
+ path = f"projects/{project}/model-monitoring/model-monitoring-controller"
3103
+ self.api_call(method="POST", path=path, params=params)
3101
3104
 
3102
- resp = self.api_call(method="POST", path=path, params=params)
3103
- return resp.json()["func"]
3105
+ def enable_model_monitoring(
3106
+ self,
3107
+ project: str,
3108
+ base_period: int = 10,
3109
+ image: str = "mlrun/mlrun",
3110
+ ):
3111
+ """
3112
+ Deploy model monitoring application controller, writer and stream functions.
3113
+ While the main goal of the controller function is to handle the monitoring processing and triggering
3114
+ applications, the goal of the model monitoring writer function is to write all the monitoring
3115
+ application results to the databases.
3116
+ The stream function goal is to monitor the log of the data stream. It is triggered when a new log entry
3117
+ is detected. It processes the new events into statistics that are then written to statistics databases.
3118
+
3119
+
3120
+ :param project: Project name.
3121
+ :param base_period: The time period in minutes in which the model monitoring controller function
3122
+ triggers. By default, the base period is 10 minutes.
3123
+ :param image: The image of the model monitoring controller, writer & monitoring
3124
+ stream functions, which are real time nuclio functions.
3125
+ By default, the image is mlrun/mlrun.
3126
+ """
3127
+
3128
+ params = {
3129
+ "base_period": base_period,
3130
+ "image": image,
3131
+ }
3132
+ path = f"projects/{project}/model-monitoring/enable-model-monitoring"
3133
+ self.api_call(method="POST", path=path, params=params)
3104
3134
 
3105
3135
  def create_hub_source(
3106
3136
  self, source: Union[dict, mlrun.common.schemas.IndexedHubSource]
@@ -3340,20 +3370,61 @@ class HTTPRunDB(RunDBInterface):
3340
3370
  body=dict_to_json(authorization_verification_input.dict()),
3341
3371
  )
3342
3372
 
3343
- def list_api_gateways(self, project=None):
3373
+ def list_api_gateways(self, project=None) -> mlrun.common.schemas.APIGatewaysOutput:
3344
3374
  """
3345
3375
  Returns a list of Nuclio api gateways
3346
- :param project: optional str parameter to filter by project, if not passed, default Nuclio's value is taken
3376
+ :param project: optional str parameter to filter by project, if not passed, default project value is taken
3347
3377
 
3348
- :return: json with the list of Nuclio Api Gateways
3349
- (json example is here
3350
- https://github.com/nuclio/nuclio/blob/development/docs/reference/api/README.md#listing-all-api-gateways)
3378
+ :return: :py:class:`~mlrun.common.schemas.APIGateways`.
3351
3379
  """
3352
3380
  project = project or config.default_project
3353
3381
  error = "list api gateways"
3354
- endpoint_path = f"projects/{project}/nuclio/api-gateways"
3355
- resp = self.api_call("GET", endpoint_path, error)
3356
- return resp.json()
3382
+ endpoint_path = f"projects/{project}/api-gateways"
3383
+ response = self.api_call("GET", endpoint_path, error)
3384
+ return mlrun.common.schemas.APIGatewaysOutput(**response.json())
3385
+
3386
+ def get_api_gateway(self, name, project=None) -> mlrun.common.schemas.APIGateway:
3387
+ """
3388
+ Returns an API gateway
3389
+ :param name: API gateway name
3390
+ :param project: optional str parameter to filter by project, if not passed, default project value is taken
3391
+
3392
+ :return: :py:class:`~mlrun.common.schemas.APIGateway`.
3393
+ """
3394
+ project = project or config.default_project
3395
+ error = "get api gateway"
3396
+ endpoint_path = f"projects/{project}/api-gateways/{name}"
3397
+ response = self.api_call("GET", endpoint_path, error)
3398
+ return mlrun.common.schemas.APIGateway(**response.json())
3399
+
3400
+ def store_api_gateway(
3401
+ self,
3402
+ api_gateway: Union[
3403
+ mlrun.common.schemas.APIGateway,
3404
+ mlrun.runtimes.nuclio.api_gateway.APIGateway,
3405
+ ],
3406
+ project: Optional[str] = None,
3407
+ ) -> mlrun.common.schemas.APIGateway:
3408
+ """
3409
+ Stores an API Gateway.
3410
+ :param api_gateway :py:class:`~mlrun.runtimes.nuclio.APIGateway`
3411
+ or :py:class:`~mlrun.common.schemas.APIGateway`: API Gateway entity.
3412
+ :param project: project name. Mandatory if api_gateway is mlrun.common.schemas.APIGateway.
3413
+
3414
+ :return: :py:class:`~mlrun.common.schemas.APIGateway`.
3415
+ """
3416
+
3417
+ if isinstance(api_gateway, mlrun.runtimes.nuclio.api_gateway.APIGateway):
3418
+ api_gateway = api_gateway.to_scheme()
3419
+ endpoint_path = f"projects/{project}/api-gateways/{api_gateway.metadata.name}"
3420
+ error = "store api gateways"
3421
+ response = self.api_call(
3422
+ "PUT",
3423
+ endpoint_path,
3424
+ error,
3425
+ json=api_gateway.dict(exclude_unset=True, exclude_none=True),
3426
+ )
3427
+ return mlrun.common.schemas.APIGateway(**response.json())
3357
3428
 
3358
3429
  def trigger_migrations(self) -> Optional[mlrun.common.schemas.BackgroundTask]:
3359
3430
  """Trigger migrations (will do nothing if no migrations are needed) and wait for them to finish if actually
@@ -3451,7 +3522,7 @@ class HTTPRunDB(RunDBInterface):
3451
3522
  run_name: Optional[str] = None,
3452
3523
  namespace: Optional[str] = None,
3453
3524
  notifications: list[mlrun.model.Notification] = None,
3454
- ):
3525
+ ) -> mlrun.common.schemas.WorkflowResponse:
3455
3526
  """
3456
3527
  Submitting workflow for a remote execution.
3457
3528
 
mlrun/db/nopdb.py CHANGED
@@ -506,6 +506,19 @@ class NopDB(RunDBInterface):
506
506
  ):
507
507
  pass
508
508
 
509
+ def store_api_gateway(
510
+ self,
511
+ project: str,
512
+ api_gateway: mlrun.runtimes.nuclio.APIGateway,
513
+ ) -> mlrun.common.schemas.APIGateway:
514
+ pass
515
+
516
+ def list_api_gateways(self, project=None):
517
+ pass
518
+
519
+ def get_api_gateway(self, name, project=None):
520
+ pass
521
+
509
522
  def verify_authorization(
510
523
  self,
511
524
  authorization_verification_input: mlrun.common.schemas.AuthorizationVerificationInput,
mlrun/execution.py CHANGED
@@ -559,9 +559,9 @@ class MLClientCtx:
559
559
  for k, v in get_in(task, ["status", "results"], {}).items():
560
560
  self._results[k] = v
561
561
  for artifact in get_in(task, ["status", run_keys.artifacts], []):
562
- self._artifacts_manager.artifacts[
563
- artifact["metadata"]["key"]
564
- ] = artifact
562
+ self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
563
+ artifact
564
+ )
565
565
  self._artifacts_manager.link_artifact(
566
566
  self.project,
567
567
  self.name,
@@ -490,10 +490,10 @@ class FeatureVector(ModelObj):
490
490
  vector = fstore.FeatureVector("my-vec", features)
491
491
 
492
492
  # get the vector as a dataframe
493
- df = fstore.get_offline_features(vector).to_dataframe()
493
+ df = vector.get_offline_features().to_dataframe()
494
494
 
495
495
  # return an online/real-time feature service
496
- svc = fstore.get_online_feature_service(vector, impute_policy={"*": "$mean"})
496
+ svc = vector.get_online_feature_service(impute_policy={"*": "$mean"})
497
497
  resp = svc.get([{"stock": "GOOG"}])
498
498
 
499
499
  :param name: List of names of targets to delete (default: delete all ingested targets)
@@ -389,9 +389,9 @@ class LoggingCallback(Callback):
389
389
  ):
390
390
  try:
391
391
  self._get_hyperparameter(key_chain=learning_rate_key_chain)
392
- self._dynamic_hyperparameters_keys[
393
- learning_rate_key
394
- ] = learning_rate_key_chain
392
+ self._dynamic_hyperparameters_keys[learning_rate_key] = (
393
+ learning_rate_key_chain
394
+ )
395
395
  except (KeyError, IndexError, ValueError):
396
396
  pass
397
397
 
@@ -263,13 +263,13 @@ class TFKerasModelHandler(DLModelHandler):
263
263
  # Update the paths and log artifacts if context is available:
264
264
  if self._weights_file is not None:
265
265
  if self._context is not None:
266
- artifacts[
267
- self._get_weights_file_artifact_name()
268
- ] = self._context.log_artifact(
269
- self._weights_file,
270
- local_path=self._weights_file,
271
- artifact_path=output_path,
272
- db_key=False,
266
+ artifacts[self._get_weights_file_artifact_name()] = (
267
+ self._context.log_artifact(
268
+ self._weights_file,
269
+ local_path=self._weights_file,
270
+ artifact_path=output_path,
271
+ db_key=False,
272
+ )
273
273
  )
274
274
 
275
275
  return artifacts if self._context is not None else None
mlrun/k8s_utils.py CHANGED
@@ -133,13 +133,13 @@ def sanitize_label_value(value: str) -> str:
133
133
  return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
134
134
 
135
135
 
136
- def verify_label_key(key):
136
+ def verify_label_key(key: str):
137
+ """
138
+ Verify that the label key is valid for Kubernetes.
139
+ Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
140
+ """
137
141
  if not key:
138
142
  raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
139
- if key.startswith("k8s.io") or key.startswith("kubernetes.io"):
140
- raise mlrun.errors.MLRunInvalidArgumentError(
141
- "Labels cannot start with 'k8s.io' or 'kubernetes.io'"
142
- )
143
143
 
144
144
  mlrun.utils.helpers.verify_field_regex(
145
145
  f"project.metadata.labels.'{key}'",
@@ -147,6 +147,11 @@ def verify_label_key(key):
147
147
  mlrun.utils.regex.k8s_character_limit,
148
148
  )
149
149
 
150
+ if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
151
+ raise mlrun.errors.MLRunInvalidArgumentError(
152
+ "Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
153
+ )
154
+
150
155
  parts = key.split("/")
151
156
  if len(parts) == 1:
152
157
  name = parts[0]
mlrun/kfpops.py CHANGED
@@ -41,8 +41,8 @@ from .utils import (
41
41
 
42
42
  # default KFP artifacts and output (ui metadata, metrics etc.)
43
43
  # directories to /tmp to allow running with security context
44
- KFPMETA_DIR = os.environ.get("KFPMETA_OUT_DIR", "/tmp")
45
- KFP_ARTIFACTS_DIR = os.environ.get("KFP_ARTIFACTS_DIR", "/tmp")
44
+ KFPMETA_DIR = "/tmp"
45
+ KFP_ARTIFACTS_DIR = "/tmp"
46
46
 
47
47
  project_annotation = "mlrun/project"
48
48
  run_annotation = "mlrun/pipeline-step-type"
@@ -71,7 +71,7 @@ def write_kfpmeta(struct):
71
71
  {"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
72
72
  ],
73
73
  }
74
- with open(KFPMETA_DIR + "/mlpipeline-metrics.json", "w") as f:
74
+ with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
75
75
  json.dump(metrics, f)
76
76
 
77
77
  struct = deepcopy(struct)
@@ -91,7 +91,14 @@ def write_kfpmeta(struct):
91
91
  elif key in results:
92
92
  val = results[key]
93
93
  try:
94
- path = "/".join([KFP_ARTIFACTS_DIR, key])
94
+ # NOTE: if key has "../x", it would fail on path traversal
95
+ path = os.path.join(KFP_ARTIFACTS_DIR, key)
96
+ if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
97
+ logger.warning(
98
+ "Path traversal is not allowed ignoring", path=path, key=key
99
+ )
100
+ continue
101
+ path = os.path.abspath(path)
95
102
  logger.info("Writing artifact output", path=path, val=val)
96
103
  with open(path, "w") as fp:
97
104
  fp.write(str(val))
@@ -109,7 +116,7 @@ def write_kfpmeta(struct):
109
116
  "outputs": output_artifacts
110
117
  + [{"type": "markdown", "storage": "inline", "source": text}]
111
118
  }
112
- with open(KFPMETA_DIR + "/mlpipeline-ui-metadata.json", "w") as f:
119
+ with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
113
120
  json.dump(metadata, f)
114
121
 
115
122
 
@@ -401,9 +408,9 @@ def mlrun_op(
401
408
  cmd += ["--label", f"{label}={val}"]
402
409
  for output in outputs:
403
410
  cmd += ["-o", str(output)]
404
- file_outputs[
405
- output.replace(".", "_")
406
- ] = f"/tmp/{output}" # not using path.join to avoid windows "\"
411
+ file_outputs[output.replace(".", "_")] = (
412
+ f"/tmp/{output}" # not using path.join to avoid windows "\"
413
+ )
407
414
  if project:
408
415
  cmd += ["--project", project]
409
416
  if handler:
@@ -450,8 +457,10 @@ def mlrun_op(
450
457
  command=cmd + [command],
451
458
  file_outputs=file_outputs,
452
459
  output_artifact_paths={
453
- "mlpipeline-ui-metadata": KFPMETA_DIR + "/mlpipeline-ui-metadata.json",
454
- "mlpipeline-metrics": KFPMETA_DIR + "/mlpipeline-metrics.json",
460
+ "mlpipeline-ui-metadata": os.path.join(
461
+ KFPMETA_DIR, "mlpipeline-ui-metadata.json"
462
+ ),
463
+ "mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
455
464
  },
456
465
  )
457
466
  cop = add_default_function_resources(cop)
mlrun/model.py CHANGED
@@ -501,6 +501,7 @@ class ImageBuilder(ModelObj):
501
501
  requirements: list = None,
502
502
  extra_args=None,
503
503
  builder_env=None,
504
+ source_code_target_dir=None,
504
505
  ):
505
506
  self.functionSourceCode = functionSourceCode #: functionSourceCode
506
507
  self.codeEntryType = "" #: codeEntryType
@@ -521,6 +522,7 @@ class ImageBuilder(ModelObj):
521
522
  self.auto_build = auto_build #: auto_build
522
523
  self.build_pod = None
523
524
  self.requirements = requirements or [] #: pip requirements
525
+ self.source_code_target_dir = source_code_target_dir or None
524
526
 
525
527
  @property
526
528
  def source(self):
@@ -557,6 +559,7 @@ class ImageBuilder(ModelObj):
557
559
  overwrite=False,
558
560
  builder_env=None,
559
561
  extra_args=None,
562
+ source_code_target_dir=None,
560
563
  ):
561
564
  if image:
562
565
  self.image = image
@@ -582,6 +585,8 @@ class ImageBuilder(ModelObj):
582
585
  self.builder_env = builder_env
583
586
  if extra_args:
584
587
  self.extra_args = extra_args
588
+ if source_code_target_dir:
589
+ self.source_code_target_dir = source_code_target_dir
585
590
 
586
591
  def with_commands(
587
592
  self,
@@ -436,9 +436,9 @@ def _generate_model_endpoint(
436
436
  ] = possible_drift_threshold
437
437
 
438
438
  model_endpoint.spec.monitoring_mode = monitoring_mode
439
- model_endpoint.status.first_request = (
440
- model_endpoint.status.last_request
441
- ) = datetime_now().isoformat()
439
+ model_endpoint.status.first_request = model_endpoint.status.last_request = (
440
+ datetime_now().isoformat()
441
+ )
442
442
  if sample_set_statistics:
443
443
  model_endpoint.status.feature_stats = sample_set_statistics
444
444
 
@@ -267,7 +267,7 @@ class PushToMonitoringWriter(StepToDict):
267
267
  self.project = project
268
268
  self.application_name_to_push = writer_application_name
269
269
  self.stream_uri = stream_uri or get_stream_path(
270
- project=self.project, application_name=self.application_name_to_push
270
+ project=self.project, function_name=self.application_name_to_push
271
271
  )
272
272
  self.output_stream = None
273
273
  self.name = name or "PushToMonitoringWriter"
@@ -0,0 +1,13 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.