mlrun 1.6.0rc26__py3-none-any.whl → 1.6.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (66) hide show
  1. mlrun/artifacts/manager.py +6 -0
  2. mlrun/artifacts/model.py +28 -22
  3. mlrun/common/db/sql_session.py +3 -0
  4. mlrun/common/model_monitoring/helpers.py +4 -2
  5. mlrun/common/schemas/__init__.py +2 -0
  6. mlrun/common/schemas/common.py +40 -0
  7. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  8. mlrun/common/schemas/model_monitoring/constants.py +21 -5
  9. mlrun/common/schemas/project.py +2 -0
  10. mlrun/config.py +43 -17
  11. mlrun/data_types/data_types.py +4 -0
  12. mlrun/datastore/azure_blob.py +9 -9
  13. mlrun/datastore/base.py +22 -44
  14. mlrun/datastore/datastore.py +7 -3
  15. mlrun/datastore/datastore_profile.py +15 -3
  16. mlrun/datastore/google_cloud_storage.py +7 -7
  17. mlrun/datastore/sources.py +17 -4
  18. mlrun/datastore/targets.py +3 -1
  19. mlrun/datastore/utils.py +11 -1
  20. mlrun/datastore/v3io.py +70 -46
  21. mlrun/db/base.py +18 -0
  22. mlrun/db/httpdb.py +41 -36
  23. mlrun/execution.py +3 -3
  24. mlrun/feature_store/api.py +133 -132
  25. mlrun/feature_store/feature_set.py +89 -0
  26. mlrun/feature_store/feature_vector.py +120 -0
  27. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
  28. mlrun/frameworks/tf_keras/model_handler.py +7 -7
  29. mlrun/k8s_utils.py +56 -0
  30. mlrun/kfpops.py +19 -10
  31. mlrun/model.py +6 -0
  32. mlrun/model_monitoring/api.py +8 -8
  33. mlrun/model_monitoring/batch.py +1 -1
  34. mlrun/model_monitoring/controller.py +0 -7
  35. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
  36. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
  37. mlrun/model_monitoring/stream_processing.py +52 -38
  38. mlrun/package/packagers/pandas_packagers.py +3 -3
  39. mlrun/package/utils/_archiver.py +3 -1
  40. mlrun/platforms/iguazio.py +6 -65
  41. mlrun/projects/pipelines.py +29 -12
  42. mlrun/projects/project.py +100 -61
  43. mlrun/run.py +2 -0
  44. mlrun/runtimes/base.py +24 -1
  45. mlrun/runtimes/function.py +14 -15
  46. mlrun/runtimes/kubejob.py +5 -3
  47. mlrun/runtimes/local.py +2 -2
  48. mlrun/runtimes/mpijob/abstract.py +6 -6
  49. mlrun/runtimes/pod.py +3 -3
  50. mlrun/runtimes/serving.py +7 -14
  51. mlrun/runtimes/sparkjob/spark3job.py +3 -3
  52. mlrun/serving/remote.py +4 -2
  53. mlrun/serving/routers.py +14 -8
  54. mlrun/utils/async_http.py +3 -3
  55. mlrun/utils/helpers.py +59 -3
  56. mlrun/utils/http.py +3 -3
  57. mlrun/utils/logger.py +2 -2
  58. mlrun/utils/notifications/notification_pusher.py +6 -6
  59. mlrun/utils/regex.py +5 -1
  60. mlrun/utils/version/version.json +2 -2
  61. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/METADATA +21 -23
  62. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/RECORD +66 -65
  63. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/WHEEL +1 -1
  64. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/LICENSE +0 -0
  65. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/entry_points.txt +0 -0
  66. {mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/top_level.txt +0 -0
mlrun/k8s_utils.py CHANGED
@@ -18,6 +18,7 @@ import kubernetes.client
18
18
 
19
19
  import mlrun.common.schemas
20
20
  import mlrun.errors
21
+ import mlrun.utils.regex
21
22
 
22
23
  from .config import config as mlconfig
23
24
 
@@ -131,3 +132,58 @@ def sanitize_label_value(value: str) -> str:
131
132
  :return: string fully compliant with k8s label value expectations
132
133
  """
133
134
  return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
135
+
136
+
137
+ def verify_label_key(key: str):
138
+ """
139
+ Verify that the label key is valid for Kubernetes.
140
+ Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
141
+ """
142
+ if not key:
143
+ raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
144
+
145
+ mlrun.utils.helpers.verify_field_regex(
146
+ f"project.metadata.labels.'{key}'",
147
+ key,
148
+ mlrun.utils.regex.k8s_character_limit,
149
+ )
150
+
151
+ if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
152
+ raise mlrun.errors.MLRunInvalidArgumentError(
153
+ "Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
154
+ )
155
+
156
+ parts = key.split("/")
157
+ if len(parts) == 1:
158
+ name = parts[0]
159
+ elif len(parts) == 2:
160
+ prefix, name = parts
161
+ if len(prefix) == 0:
162
+ raise mlrun.errors.MLRunInvalidArgumentError(
163
+ "Label key prefix cannot be empty"
164
+ )
165
+
166
+ # prefix must adhere dns_1123_subdomain
167
+ mlrun.utils.helpers.verify_field_regex(
168
+ f"Project.metadata.labels.'{key}'",
169
+ prefix,
170
+ mlrun.utils.regex.dns_1123_subdomain,
171
+ )
172
+ else:
173
+ raise mlrun.errors.MLRunInvalidArgumentError(
174
+ "Label key can only contain one '/'"
175
+ )
176
+
177
+ mlrun.utils.helpers.verify_field_regex(
178
+ f"project.metadata.labels.'{key}'",
179
+ name,
180
+ mlrun.utils.regex.qualified_name,
181
+ )
182
+
183
+
184
+ def verify_label_value(value, label_key):
185
+ mlrun.utils.helpers.verify_field_regex(
186
+ f"project.metadata.labels.'{label_key}'",
187
+ value,
188
+ mlrun.utils.regex.label_value,
189
+ )
mlrun/kfpops.py CHANGED
@@ -41,8 +41,8 @@ from .utils import (
41
41
 
42
42
  # default KFP artifacts and output (ui metadata, metrics etc.)
43
43
  # directories to /tmp to allow running with security context
44
- KFPMETA_DIR = os.environ.get("KFPMETA_OUT_DIR", "/tmp")
45
- KFP_ARTIFACTS_DIR = os.environ.get("KFP_ARTIFACTS_DIR", "/tmp")
44
+ KFPMETA_DIR = "/tmp"
45
+ KFP_ARTIFACTS_DIR = "/tmp"
46
46
 
47
47
  project_annotation = "mlrun/project"
48
48
  run_annotation = "mlrun/pipeline-step-type"
@@ -71,7 +71,7 @@ def write_kfpmeta(struct):
71
71
  {"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
72
72
  ],
73
73
  }
74
- with open(KFPMETA_DIR + "/mlpipeline-metrics.json", "w") as f:
74
+ with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
75
75
  json.dump(metrics, f)
76
76
 
77
77
  struct = deepcopy(struct)
@@ -91,7 +91,14 @@ def write_kfpmeta(struct):
91
91
  elif key in results:
92
92
  val = results[key]
93
93
  try:
94
- path = "/".join([KFP_ARTIFACTS_DIR, key])
94
+ # NOTE: if key has "../x", it would fail on path traversal
95
+ path = os.path.join(KFP_ARTIFACTS_DIR, key)
96
+ if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
97
+ logger.warning(
98
+ "Path traversal is not allowed ignoring", path=path, key=key
99
+ )
100
+ continue
101
+ path = os.path.abspath(path)
95
102
  logger.info("Writing artifact output", path=path, val=val)
96
103
  with open(path, "w") as fp:
97
104
  fp.write(str(val))
@@ -109,7 +116,7 @@ def write_kfpmeta(struct):
109
116
  "outputs": output_artifacts
110
117
  + [{"type": "markdown", "storage": "inline", "source": text}]
111
118
  }
112
- with open(KFPMETA_DIR + "/mlpipeline-ui-metadata.json", "w") as f:
119
+ with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
113
120
  json.dump(metadata, f)
114
121
 
115
122
 
@@ -401,9 +408,9 @@ def mlrun_op(
401
408
  cmd += ["--label", f"{label}={val}"]
402
409
  for output in outputs:
403
410
  cmd += ["-o", str(output)]
404
- file_outputs[
405
- output.replace(".", "_")
406
- ] = f"/tmp/{output}" # not using path.join to avoid windows "\"
411
+ file_outputs[output.replace(".", "_")] = (
412
+ f"/tmp/{output}" # not using path.join to avoid windows "\"
413
+ )
407
414
  if project:
408
415
  cmd += ["--project", project]
409
416
  if handler:
@@ -450,8 +457,10 @@ def mlrun_op(
450
457
  command=cmd + [command],
451
458
  file_outputs=file_outputs,
452
459
  output_artifact_paths={
453
- "mlpipeline-ui-metadata": KFPMETA_DIR + "/mlpipeline-ui-metadata.json",
454
- "mlpipeline-metrics": KFPMETA_DIR + "/mlpipeline-metrics.json",
460
+ "mlpipeline-ui-metadata": os.path.join(
461
+ KFPMETA_DIR, "mlpipeline-ui-metadata.json"
462
+ ),
463
+ "mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
455
464
  },
456
465
  )
457
466
  cop = add_default_function_resources(cop)
mlrun/model.py CHANGED
@@ -62,6 +62,7 @@ class ModelObj:
62
62
  return new_type.from_dict(param)
63
63
  return param
64
64
 
65
+ @mlrun.utils.filter_warnings("ignore", FutureWarning)
65
66
  def to_dict(self, fields=None, exclude=None):
66
67
  """convert the object to a python dictionary
67
68
 
@@ -359,6 +360,7 @@ class ImageBuilder(ModelObj):
359
360
  requirements: list = None,
360
361
  extra_args=None,
361
362
  builder_env=None,
363
+ source_code_target_dir=None,
362
364
  ):
363
365
  self.functionSourceCode = functionSourceCode #: functionSourceCode
364
366
  self.codeEntryType = "" #: codeEntryType
@@ -379,6 +381,7 @@ class ImageBuilder(ModelObj):
379
381
  self.auto_build = auto_build #: auto_build
380
382
  self.build_pod = None
381
383
  self.requirements = requirements or [] #: pip requirements
384
+ self.source_code_target_dir = source_code_target_dir or None
382
385
 
383
386
  @property
384
387
  def source(self):
@@ -415,6 +418,7 @@ class ImageBuilder(ModelObj):
415
418
  overwrite=False,
416
419
  builder_env=None,
417
420
  extra_args=None,
421
+ source_code_target_dir=None,
418
422
  ):
419
423
  if image:
420
424
  self.image = image
@@ -440,6 +444,8 @@ class ImageBuilder(ModelObj):
440
444
  self.builder_env = builder_env
441
445
  if extra_args:
442
446
  self.extra_args = extra_args
447
+ if source_code_target_dir:
448
+ self.source_code_target_dir = source_code_target_dir
443
449
 
444
450
  def with_commands(
445
451
  self,
@@ -436,9 +436,9 @@ def _generate_model_endpoint(
436
436
  ] = possible_drift_threshold
437
437
 
438
438
  model_endpoint.spec.monitoring_mode = monitoring_mode
439
- model_endpoint.status.first_request = (
440
- model_endpoint.status.last_request
441
- ) = datetime_now().isoformat()
439
+ model_endpoint.status.first_request = model_endpoint.status.last_request = (
440
+ datetime_now().isoformat()
441
+ )
442
442
  if sample_set_statistics:
443
443
  model_endpoint.status.feature_stats = sample_set_statistics
444
444
 
@@ -476,11 +476,11 @@ def trigger_drift_batch_job(
476
476
  db_session = mlrun.get_run_db()
477
477
 
478
478
  # Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
479
- batch_function_dict: typing.Dict[
480
- str, typing.Any
481
- ] = db_session.deploy_monitoring_batch_job(
482
- project=project,
483
- default_batch_image=default_batch_image,
479
+ batch_function_dict: typing.Dict[str, typing.Any] = (
480
+ db_session.deploy_monitoring_batch_job(
481
+ project=project,
482
+ default_batch_image=default_batch_image,
483
+ )
484
484
  )
485
485
 
486
486
  # Prepare current run params
@@ -992,7 +992,7 @@ class BatchProcessor:
992
992
  """
993
993
  stream_http_path = (
994
994
  mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
995
- project=self.project
995
+ project=self.project, namespace=mlrun.mlconf.namespace
996
996
  )
997
997
  )
998
998
 
@@ -426,13 +426,6 @@ class MonitoringApplicationController:
426
426
  m_fs = fstore.get_feature_set(
427
427
  endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
428
428
  )
429
- labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
430
- if labels:
431
- if isinstance(labels, str):
432
- labels = json.loads(labels)
433
- for label in labels:
434
- if label not in list(m_fs.spec.features.keys()):
435
- m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
436
429
 
437
430
  for application in applications_names:
438
431
  batch_window = batch_window_generator.get_batch_window(
@@ -540,24 +540,24 @@ class KVModelEndpointStore(ModelEndpointStore):
540
540
  and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
541
541
  == "null"
542
542
  ):
543
- endpoint[
544
- mlrun.common.schemas.model_monitoring.EventFieldType.METRICS
545
- ] = json.dumps(
546
- {
547
- mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
548
- mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
549
- mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
543
+ endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
544
+ json.dumps(
545
+ {
546
+ mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
547
+ mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
548
+ mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
549
+ }
550
550
  }
551
- }
551
+ )
552
552
  )
553
553
  # Validate key `uid` instead of `endpoint_id`
554
554
  # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
555
555
  if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
556
- endpoint[
557
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
558
- ] = endpoint[
559
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
560
- ]
556
+ endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
557
+ endpoint[
558
+ mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
559
+ ]
560
+ )
561
561
 
562
562
  @staticmethod
563
563
  def _encode_field(field: typing.Union[str, bytes]) -> bytes:
@@ -31,7 +31,6 @@ from .models import get_model_endpoints_table
31
31
 
32
32
 
33
33
  class SQLModelEndpointStore(ModelEndpointStore):
34
-
35
34
  """
36
35
  Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
37
36
  SQL toolkit that handles the communication with the database. When using SQL for storing the model endpoints
@@ -24,6 +24,7 @@ import mlrun
24
24
  import mlrun.common.model_monitoring.helpers
25
25
  import mlrun.config
26
26
  import mlrun.datastore.targets
27
+ import mlrun.feature_store as fstore
27
28
  import mlrun.feature_store.steps
28
29
  import mlrun.model_monitoring.prometheus
29
30
  import mlrun.serving.states
@@ -49,7 +50,7 @@ class EventStreamProcessor:
49
50
  parquet_batching_timeout_secs: int,
50
51
  parquet_target: str,
51
52
  sample_window: int = 10,
52
- aggregate_windows: typing.Optional[typing.List[str]] = None,
53
+ aggregate_windows: typing.Optional[list[str]] = None,
53
54
  aggregate_period: str = "30s",
54
55
  model_monitoring_access_key: str = None,
55
56
  ):
@@ -180,14 +181,14 @@ class EventStreamProcessor:
180
181
 
181
182
  apply_event_routing()
182
183
 
183
- # Step 2 - Filter out events with no '-' in path which indicates that the event is supposed to be processed
184
+ # Step 2 - Filter out events with '-' in the path basename from going forward
184
185
  # through the next steps of the stream graph
185
186
  def apply_storey_filter_stream_events():
186
187
  # Remove none values from each event
187
188
  graph.add_step(
188
189
  "storey.Filter",
189
190
  "filter_stream_event",
190
- _fn="('-' not in event.path)",
191
+ _fn="('-' not in event.path.split('/')[-1])",
191
192
  full_event=True,
192
193
  )
193
194
 
@@ -587,6 +588,8 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
587
588
  for key in [
588
589
  EventFieldType.FEATURES,
589
590
  EventFieldType.NAMED_FEATURES,
591
+ EventFieldType.PREDICTION,
592
+ EventFieldType.NAMED_PREDICTIONS,
590
593
  ]:
591
594
  event.pop(key, None)
592
595
 
@@ -629,14 +632,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
629
632
  self.project: str = project
630
633
 
631
634
  # First and last requests timestamps (value) of each endpoint (key)
632
- self.first_request: typing.Dict[str, str] = dict()
633
- self.last_request: typing.Dict[str, str] = dict()
635
+ self.first_request: dict[str, str] = dict()
636
+ self.last_request: dict[str, str] = dict()
634
637
 
635
638
  # Number of errors (value) per endpoint (key)
636
- self.error_count: typing.Dict[str, int] = collections.defaultdict(int)
639
+ self.error_count: dict[str, int] = collections.defaultdict(int)
637
640
 
638
641
  # Set of endpoints in the current events
639
- self.endpoints: typing.Set[str] = set()
642
+ self.endpoints: set[str] = set()
640
643
 
641
644
  def do(self, full_event):
642
645
  event = full_event.body
@@ -745,18 +748,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
745
748
  # in list of events. This list will be used as the body for the storey event.
746
749
  events = []
747
750
  for i, (feature, prediction) in enumerate(zip(features, predictions)):
748
- # Validate that inputs are based on numeric values
749
- if not self.is_valid(
750
- endpoint_id,
751
- self.is_list_of_numerics,
752
- feature,
753
- ["request", "inputs", f"[{i}]"],
754
- ):
755
- return None
756
-
757
751
  if not isinstance(prediction, list):
758
752
  prediction = [prediction]
759
753
 
754
+ if not isinstance(feature, list):
755
+ feature = [feature]
756
+
760
757
  events.append(
761
758
  {
762
759
  EventFieldType.FUNCTION_URI: function_uri,
@@ -803,18 +800,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
803
800
  f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
804
801
  )
805
802
 
806
- @staticmethod
807
- def is_list_of_numerics(
808
- field: typing.List[typing.Union[int, float, dict, list]],
809
- dict_path: typing.List[str],
810
- ):
811
- if all(isinstance(x, int) or isinstance(x, float) for x in field):
812
- return True
813
- logger.error(
814
- f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
815
- )
816
- return False
817
-
818
803
  def resume_state(self, endpoint_id):
819
804
  # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
820
805
  # left them
@@ -849,7 +834,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
849
834
  endpoint_id: str,
850
835
  validation_function,
851
836
  field: typing.Any,
852
- dict_path: typing.List[str],
837
+ dict_path: list[str],
853
838
  ):
854
839
  if validation_function(field, dict_path):
855
840
  return True
@@ -857,7 +842,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
857
842
  return False
858
843
 
859
844
 
860
- def is_not_none(field: typing.Any, dict_path: typing.List[str]):
845
+ def is_not_none(field: typing.Any, dict_path: list[str]):
861
846
  if field is not None:
862
847
  return True
863
848
  logger.error(
@@ -946,9 +931,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
946
931
  return self.label_columns[endpoint_id]
947
932
  return None
948
933
 
949
- def do(self, event: typing.Dict):
934
+ def do(self, event: dict):
950
935
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
951
936
 
937
+ feature_values = event[EventFieldType.FEATURES]
938
+ label_values = event[EventFieldType.PREDICTION]
952
939
  # Get feature names and label columns
953
940
  if endpoint_id not in self.feature_names:
954
941
  endpoint_record = get_endpoint_record(
@@ -984,6 +971,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
984
971
  },
985
972
  )
986
973
 
974
+ update_monitoring_feature_set(
975
+ endpoint_record=endpoint_record,
976
+ feature_names=feature_names,
977
+ feature_values=feature_values,
978
+ )
979
+
987
980
  # Similar process with label columns
988
981
  if not label_columns and self._infer_columns_from_data:
989
982
  label_columns = self._infer_label_columns_from_data(event)
@@ -1002,6 +995,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1002
995
  endpoint_id=endpoint_id,
1003
996
  attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
1004
997
  )
998
+ update_monitoring_feature_set(
999
+ endpoint_record=endpoint_record,
1000
+ feature_names=label_columns,
1001
+ feature_values=label_values,
1002
+ )
1005
1003
 
1006
1004
  self.label_columns[endpoint_id] = label_columns
1007
1005
  self.feature_names[endpoint_id] = feature_names
@@ -1019,7 +1017,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1019
1017
 
1020
1018
  # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
1021
1019
  feature_names = self.feature_names[endpoint_id]
1022
- feature_values = event[EventFieldType.FEATURES]
1023
1020
  self._map_dictionary_values(
1024
1021
  event=event,
1025
1022
  named_iters=feature_names,
@@ -1029,7 +1026,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1029
1026
 
1030
1027
  # Add label_name:value pairs along with a mapping dictionary of all of these pairs
1031
1028
  label_names = self.label_columns[endpoint_id]
1032
- label_values = event[EventFieldType.PREDICTION]
1033
1029
  self._map_dictionary_values(
1034
1030
  event=event,
1035
1031
  named_iters=label_names,
@@ -1045,9 +1041,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1045
1041
 
1046
1042
  @staticmethod
1047
1043
  def _map_dictionary_values(
1048
- event: typing.Dict,
1049
- named_iters: typing.List,
1050
- values_iters: typing.List,
1044
+ event: dict,
1045
+ named_iters: list,
1046
+ values_iters: list,
1051
1047
  mapping_dictionary: str,
1052
1048
  ):
1053
1049
  """Adding name-value pairs to event dictionary based on two provided lists of names and values. These pairs
@@ -1082,7 +1078,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
1082
1078
  self.project = project
1083
1079
  self.model_endpoint_store_target = model_endpoint_store_target
1084
1080
 
1085
- def do(self, event: typing.Dict):
1081
+ def do(self, event: dict):
1086
1082
  update_endpoint_record(
1087
1083
  project=self.project,
1088
1084
  endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
@@ -1117,7 +1113,7 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
1117
1113
  self.table = table
1118
1114
  self.keys = set()
1119
1115
 
1120
- def do(self, event: typing.Dict):
1116
+ def do(self, event: dict):
1121
1117
  key_set = set(event.keys())
1122
1118
  if not key_set.issubset(self.keys):
1123
1119
  self.keys.update(key_set)
@@ -1241,3 +1237,21 @@ def get_endpoint_record(project: str, endpoint_id: str):
1241
1237
  project=project,
1242
1238
  )
1243
1239
  return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
1240
+
1241
+
1242
+ def update_monitoring_feature_set(
1243
+ endpoint_record: dict[str, typing.Any],
1244
+ feature_names: list[str],
1245
+ feature_values: list[typing.Any],
1246
+ ):
1247
+ monitoring_feature_set = fstore.get_feature_set(
1248
+ endpoint_record[
1249
+ mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
1250
+ ]
1251
+ )
1252
+ for name, val in zip(feature_names, feature_values):
1253
+ monitoring_feature_set.add_feature(
1254
+ fstore.Feature(name=name, value_type=type(val))
1255
+ )
1256
+
1257
+ monitoring_feature_set.save()
@@ -838,9 +838,9 @@ class PandasDataFramePackager(DefaultPackager):
838
838
  """
839
839
  if isinstance(obj, dict):
840
840
  for key, value in obj.items():
841
- obj[
842
- PandasDataFramePackager._prepare_result(obj=key)
843
- ] = PandasDataFramePackager._prepare_result(obj=value)
841
+ obj[PandasDataFramePackager._prepare_result(obj=key)] = (
842
+ PandasDataFramePackager._prepare_result(obj=value)
843
+ )
844
844
  elif isinstance(obj, list):
845
845
  for i, value in enumerate(obj):
846
846
  obj[i] = PandasDataFramePackager._prepare_result(obj=value)
@@ -179,7 +179,9 @@ class _TarArchiver(_Archiver):
179
179
 
180
180
  # Extract:
181
181
  with tarfile.open(archive_path, f"r:{cls._MODE_STRING}") as tar_file:
182
- tar_file.extractall(directory_path)
182
+ # use 'data' to ensure no security risks are imposed by the archive files
183
+ # see: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
184
+ tar_file.extractall(directory_path, filter="data")
183
185
 
184
186
  return str(directory_path)
185
187
 
@@ -16,19 +16,15 @@ import json
16
16
  import os
17
17
  import urllib
18
18
  from collections import namedtuple
19
- from datetime import datetime
20
- from http import HTTPStatus
21
19
  from urllib.parse import urlparse
22
20
 
23
21
  import kfp.dsl
24
22
  import requests
25
23
  import semver
26
- import urllib3
27
24
  import v3io
28
25
 
29
26
  import mlrun.errors
30
27
  from mlrun.config import config as mlconf
31
- from mlrun.errors import err_to_str
32
28
  from mlrun.utils import dict_to_json
33
29
 
34
30
  _cached_control_session = None
@@ -488,25 +484,6 @@ class V3ioStreamClient:
488
484
  return response.output.records
489
485
 
490
486
 
491
- def create_control_session(url, username, password):
492
- # for systems without production cert - silence no cert verification WARN
493
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
494
- if not username or not password:
495
- raise ValueError("cannot create session key, missing username or password")
496
-
497
- session = requests.Session()
498
- session.auth = (username, password)
499
- try:
500
- auth = session.post(f"{url}/api/sessions", verify=False)
501
- except OSError as exc:
502
- raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
503
-
504
- if not auth.ok:
505
- raise OSError(f"failed to create session: {url}, {auth.text}")
506
-
507
- return auth.json()["data"]["id"]
508
-
509
-
510
487
  def is_iguazio_endpoint(endpoint_url: str) -> bool:
511
488
  # TODO: find a better heuristic
512
489
  return ".default-tenant." in endpoint_url
@@ -533,21 +510,6 @@ def is_iguazio_session_cookie(session_cookie: str) -> bool:
533
510
  return False
534
511
 
535
512
 
536
- def is_iguazio_system_2_10_or_above(dashboard_url):
537
- # for systems without production cert - silence no cert verification WARN
538
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
539
- response = requests.get(f"{dashboard_url}/api/external_versions", verify=False)
540
-
541
- if not response.ok:
542
- if response.status_code == HTTPStatus.NOT_FOUND.value:
543
- # in iguazio systems prior to 2.10 this endpoint didn't exist, so the api returns 404 cause endpoint not
544
- # found
545
- return False
546
- response.raise_for_status()
547
-
548
- return True
549
-
550
-
551
513
  # we assign the control session or access key to the password since this is iguazio auth scheme
552
514
  # (requests should be sent with username:control_session/access_key as auth header)
553
515
  def add_or_refresh_credentials(
@@ -577,33 +539,12 @@ def add_or_refresh_credentials(
577
539
  # (ideally if we could identify we're in enterprise we would have verify here that token and username have value)
578
540
  if not is_iguazio_endpoint(api_url):
579
541
  return "", "", token
580
- iguazio_dashboard_url = "https://dashboard" + api_url[api_url.find(".") :]
581
-
582
- # in 2.8 mlrun api is protected with control session, from 2.10 it's protected with access key
583
- is_access_key_auth = is_iguazio_system_2_10_or_above(iguazio_dashboard_url)
584
- if is_access_key_auth:
585
- if not username or not token:
586
- raise ValueError(
587
- "username and access key required to authenticate against iguazio system"
588
- )
589
- return username, token, ""
590
-
591
- if not username or not password:
592
- raise ValueError("username and password needed to create session")
593
-
594
- global _cached_control_session
595
- now = datetime.now()
596
- if _cached_control_session:
597
- if (
598
- _cached_control_session[2] == username
599
- and _cached_control_session[3] == password
600
- and (now - _cached_control_session[1]).seconds < 20 * 60 * 60
601
- ):
602
- return _cached_control_session[2], _cached_control_session[0], ""
603
-
604
- control_session = create_control_session(iguazio_dashboard_url, username, password)
605
- _cached_control_session = (control_session, now, username, password)
606
- return username, control_session, ""
542
+
543
+ if not username or not token:
544
+ raise ValueError(
545
+ "username and access key required to authenticate against iguazio system"
546
+ )
547
+ return username, token, ""
607
548
 
608
549
 
609
550
  def parse_path(url, suffix="/"):