PyPI - mlrun - Versions diffs - 1.6.0rc26__py3-none-any.whl → 1.6.3rc1__py3-none-any.whl - Mend

mlrun 1.6.0rc26py3-none-any.whl → 1.6.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (66) hide show

mlrun/artifacts/manager.py +6 -0
mlrun/artifacts/model.py +28 -22
mlrun/common/db/sql_session.py +3 -0
mlrun/common/model_monitoring/helpers.py +4 -2
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/common.py +40 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +21 -5
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +43 -17
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/azure_blob.py +9 -9
mlrun/datastore/base.py +22 -44
mlrun/datastore/datastore.py +7 -3
mlrun/datastore/datastore_profile.py +15 -3
mlrun/datastore/google_cloud_storage.py +7 -7
mlrun/datastore/sources.py +17 -4
mlrun/datastore/targets.py +3 -1
mlrun/datastore/utils.py +11 -1
mlrun/datastore/v3io.py +70 -46
mlrun/db/base.py +18 -0
mlrun/db/httpdb.py +41 -36
mlrun/execution.py +3 -3
mlrun/feature_store/api.py +133 -132
mlrun/feature_store/feature_set.py +89 -0
mlrun/feature_store/feature_vector.py +120 -0
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +56 -0
mlrun/kfpops.py +19 -10
mlrun/model.py +6 -0
mlrun/model_monitoring/api.py +8 -8
mlrun/model_monitoring/batch.py +1 -1
mlrun/model_monitoring/controller.py +0 -7
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
mlrun/model_monitoring/stream_processing.py +52 -38
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +3 -1
mlrun/platforms/iguazio.py +6 -65
mlrun/projects/pipelines.py +29 -12
mlrun/projects/project.py +100 -61
mlrun/run.py +2 -0
mlrun/runtimes/base.py +24 -1
mlrun/runtimes/function.py +14 -15
mlrun/runtimes/kubejob.py +5 -3
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/pod.py +3 -3
mlrun/runtimes/serving.py +7 -14
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +4 -2
mlrun/serving/routers.py +14 -8
mlrun/utils/async_http.py +3 -3
mlrun/utils/helpers.py +59 -3
mlrun/utils/http.py +3 -3
mlrun/utils/logger.py +2 -2
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/regex.py +5 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/METADATA +21 -23
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/RECORD +66 -65
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/WHEEL +1 -1
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/top_level.txt +0 -0

mlrun/k8s_utils.py CHANGED Viewed

@@ -18,6 +18,7 @@ import kubernetes.client
 import mlrun.common.schemas
 import mlrun.errors
+import mlrun.utils.regex
 from .config import config as mlconfig
@@ -131,3 +132,58 @@ def sanitize_label_value(value: str) -> str:
     :return:      string fully compliant with k8s label value expectations
     """
     return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
+def verify_label_key(key: str):
+    """
+    Verify that the label key is valid for Kubernetes.
+    Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
+    """
+    if not key:
+        raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
+    mlrun.utils.helpers.verify_field_regex(
+        f"project.metadata.labels.'{key}'",
+        key,
+        mlrun.utils.regex.k8s_character_limit,
+    )
+    if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
+        )
+    parts = key.split("/")
+    if len(parts) == 1:
+        name = parts[0]
+    elif len(parts) == 2:
+        prefix, name = parts
+        if len(prefix) == 0:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Label key prefix cannot be empty"
+            )
+        # prefix must adhere dns_1123_subdomain
+        mlrun.utils.helpers.verify_field_regex(
+            f"Project.metadata.labels.'{key}'",
+            prefix,
+            mlrun.utils.regex.dns_1123_subdomain,
+        )
+    else:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "Label key can only contain one '/'"
+        )
+    mlrun.utils.helpers.verify_field_regex(
+        f"project.metadata.labels.'{key}'",
+        name,
+        mlrun.utils.regex.qualified_name,
+    )
+def verify_label_value(value, label_key):
+    mlrun.utils.helpers.verify_field_regex(
+        f"project.metadata.labels.'{label_key}'",
+        value,
+        mlrun.utils.regex.label_value,
+    )

mlrun/kfpops.py CHANGED Viewed

@@ -41,8 +41,8 @@ from .utils import (
 # default KFP artifacts and output (ui metadata, metrics etc.)
 # directories to /tmp to allow running with security context
-KFPMETA_DIR = os.environ.get("KFPMETA_OUT_DIR", "/tmp")
-KFP_ARTIFACTS_DIR = os.environ.get("KFP_ARTIFACTS_DIR", "/tmp")
+KFPMETA_DIR = "/tmp"
+KFP_ARTIFACTS_DIR = "/tmp"
 project_annotation = "mlrun/project"
 run_annotation = "mlrun/pipeline-step-type"
@@ -71,7 +71,7 @@ def write_kfpmeta(struct):
             {"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
         ],
     }
-    with open(KFPMETA_DIR + "/mlpipeline-metrics.json", "w") as f:
+    with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
         json.dump(metrics, f)
     struct = deepcopy(struct)
@@ -91,7 +91,14 @@ def write_kfpmeta(struct):
         elif key in results:
             val = results[key]
         try:
-            path = "/".join([KFP_ARTIFACTS_DIR, key])
+            # NOTE: if key has "../x", it would fail on path traversal
+            path = os.path.join(KFP_ARTIFACTS_DIR, key)
+            if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
+                logger.warning(
+                    "Path traversal is not allowed ignoring", path=path, key=key
+                )
+                continue
+            path = os.path.abspath(path)
             logger.info("Writing artifact output", path=path, val=val)
             with open(path, "w") as fp:
                 fp.write(str(val))
@@ -109,7 +116,7 @@ def write_kfpmeta(struct):
         "outputs": output_artifacts
         + [{"type": "markdown", "storage": "inline", "source": text}]
     }
-    with open(KFPMETA_DIR + "/mlpipeline-ui-metadata.json", "w") as f:
+    with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
         json.dump(metadata, f)
@@ -401,9 +408,9 @@ def mlrun_op(
         cmd += ["--label", f"{label}={val}"]
     for output in outputs:
         cmd += ["-o", str(output)]
-        file_outputs[
-            output.replace(".", "_")
-        ] = f"/tmp/{output}"  # not using path.join to avoid windows "\"
+        file_outputs[output.replace(".", "_")] = (
+            f"/tmp/{output}"  # not using path.join to avoid windows "\"
+        )
     if project:
         cmd += ["--project", project]
     if handler:
@@ -450,8 +457,10 @@ def mlrun_op(
         command=cmd + [command],
         file_outputs=file_outputs,
         output_artifact_paths={
-            "mlpipeline-ui-metadata": KFPMETA_DIR + "/mlpipeline-ui-metadata.json",
-            "mlpipeline-metrics": KFPMETA_DIR + "/mlpipeline-metrics.json",
+            "mlpipeline-ui-metadata": os.path.join(
+                KFPMETA_DIR, "mlpipeline-ui-metadata.json"
+            ),
+            "mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
         },
     )
     cop = add_default_function_resources(cop)

mlrun/model.py CHANGED Viewed

@@ -62,6 +62,7 @@ class ModelObj:
             return new_type.from_dict(param)
         return param
+    @mlrun.utils.filter_warnings("ignore", FutureWarning)
     def to_dict(self, fields=None, exclude=None):
         """convert the object to a python dictionary
@@ -359,6 +360,7 @@ class ImageBuilder(ModelObj):
         requirements: list = None,
         extra_args=None,
         builder_env=None,
+        source_code_target_dir=None,
     ):
         self.functionSourceCode = functionSourceCode  #: functionSourceCode
         self.codeEntryType = ""  #: codeEntryType
@@ -379,6 +381,7 @@ class ImageBuilder(ModelObj):
         self.auto_build = auto_build  #: auto_build
         self.build_pod = None
         self.requirements = requirements or []  #: pip requirements
+        self.source_code_target_dir = source_code_target_dir or None
     @property
     def source(self):
@@ -415,6 +418,7 @@ class ImageBuilder(ModelObj):
         overwrite=False,
         builder_env=None,
         extra_args=None,
+        source_code_target_dir=None,
     ):
         if image:
             self.image = image
@@ -440,6 +444,8 @@ class ImageBuilder(ModelObj):
             self.builder_env = builder_env
         if extra_args:
             self.extra_args = extra_args
+        if source_code_target_dir:
+            self.source_code_target_dir = source_code_target_dir
     def with_commands(
         self,

mlrun/model_monitoring/api.py CHANGED Viewed

@@ -436,9 +436,9 @@ def _generate_model_endpoint(
         ] = possible_drift_threshold
     model_endpoint.spec.monitoring_mode = monitoring_mode
-    model_endpoint.status.first_request = (
-        model_endpoint.status.last_request
-    ) = datetime_now().isoformat()
+    model_endpoint.status.first_request = model_endpoint.status.last_request = (
+        datetime_now().isoformat()
+    )
     if sample_set_statistics:
         model_endpoint.status.feature_stats = sample_set_statistics
@@ -476,11 +476,11 @@ def trigger_drift_batch_job(
         db_session = mlrun.get_run_db()
     # Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
-    batch_function_dict: typing.Dict[
-        str, typing.Any
-    ] = db_session.deploy_monitoring_batch_job(
-        project=project,
-        default_batch_image=default_batch_image,
+    batch_function_dict: typing.Dict[str, typing.Any] = (
+        db_session.deploy_monitoring_batch_job(
+            project=project,
+            default_batch_image=default_batch_image,
+        )
     )
     # Prepare current run params

mlrun/model_monitoring/batch.py CHANGED Viewed

@@ -992,7 +992,7 @@ class BatchProcessor:
         """
         stream_http_path = (
             mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
-                project=self.project
+                project=self.project, namespace=mlrun.mlconf.namespace
             )
         )

mlrun/model_monitoring/controller.py CHANGED Viewed

@@ -426,13 +426,6 @@ class MonitoringApplicationController:
             m_fs = fstore.get_feature_set(
                 endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
             )
-            labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
-            if labels:
-                if isinstance(labels, str):
-                    labels = json.loads(labels)
-                for label in labels:
-                    if label not in list(m_fs.spec.features.keys()):
-                        m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
             for application in applications_names:
                 batch_window = batch_window_generator.get_batch_window(

mlrun/model_monitoring/stores/kv_model_endpoint_store.py CHANGED Viewed

@@ -540,24 +540,24 @@ class KVModelEndpointStore(ModelEndpointStore):
             and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
             == "null"
         ):
-            endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.METRICS
-            ] = json.dumps(
-                {
-                    mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
-                        mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
-                        mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
+            endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
+                json.dumps(
+                    {
+                        mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
+                            mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
+                            mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
+                        }
                     }
-                }
+                )
             )
         # Validate key `uid` instead of `endpoint_id`
         # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
         if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
-            endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.UID
-            ] = endpoint[
-                mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
-            ]
+            endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
+                endpoint[
+                    mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
+                ]
+            )
     @staticmethod
     def _encode_field(field: typing.Union[str, bytes]) -> bytes:

mlrun/model_monitoring/stores/sql_model_endpoint_store.py CHANGED Viewed

@@ -31,7 +31,6 @@ from .models import get_model_endpoints_table
 class SQLModelEndpointStore(ModelEndpointStore):
     """
     Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
     SQL toolkit that handles the communication with the database.  When using SQL for storing the model endpoints

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -24,6 +24,7 @@ import mlrun
 import mlrun.common.model_monitoring.helpers
 import mlrun.config
 import mlrun.datastore.targets
+import mlrun.feature_store as fstore
 import mlrun.feature_store.steps
 import mlrun.model_monitoring.prometheus
 import mlrun.serving.states
@@ -49,7 +50,7 @@ class EventStreamProcessor:
         parquet_batching_timeout_secs: int,
         parquet_target: str,
         sample_window: int = 10,
-        aggregate_windows: typing.Optional[typing.List[str]] = None,
+        aggregate_windows: typing.Optional[list[str]] = None,
         aggregate_period: str = "30s",
         model_monitoring_access_key: str = None,
     ):
@@ -180,14 +181,14 @@ class EventStreamProcessor:
         apply_event_routing()
-        # Step 2 - Filter out events with no '-' in path which indicates that the event is supposed to be processed
+        # Step 2 - Filter out events with '-' in the path basename from going forward
         # through the next steps of the stream graph
         def apply_storey_filter_stream_events():
             # Remove none values from each event
             graph.add_step(
                 "storey.Filter",
                 "filter_stream_event",
-                _fn="('-' not in event.path)",
+                _fn="('-' not in event.path.split('/')[-1])",
                 full_event=True,
             )
@@ -587,6 +588,8 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
         for key in [
             EventFieldType.FEATURES,
             EventFieldType.NAMED_FEATURES,
+            EventFieldType.PREDICTION,
+            EventFieldType.NAMED_PREDICTIONS,
         ]:
             event.pop(key, None)
@@ -629,14 +632,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         self.project: str = project
         # First and last requests timestamps (value) of each endpoint (key)
-        self.first_request: typing.Dict[str, str] = dict()
-        self.last_request: typing.Dict[str, str] = dict()
+        self.first_request: dict[str, str] = dict()
+        self.last_request: dict[str, str] = dict()
         # Number of errors (value) per endpoint (key)
-        self.error_count: typing.Dict[str, int] = collections.defaultdict(int)
+        self.error_count: dict[str, int] = collections.defaultdict(int)
         # Set of endpoints in the current events
-        self.endpoints: typing.Set[str] = set()
+        self.endpoints: set[str] = set()
     def do(self, full_event):
         event = full_event.body
@@ -745,18 +748,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         # in list of events. This list will be used as the body for the storey event.
         events = []
         for i, (feature, prediction) in enumerate(zip(features, predictions)):
-            # Validate that inputs are based on numeric values
-            if not self.is_valid(
-                endpoint_id,
-                self.is_list_of_numerics,
-                feature,
-                ["request", "inputs", f"[{i}]"],
-            ):
-                return None
             if not isinstance(prediction, list):
                 prediction = [prediction]
+            if not isinstance(feature, list):
+                feature = [feature]
             events.append(
                 {
                     EventFieldType.FUNCTION_URI: function_uri,
@@ -803,18 +800,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
             )
-    @staticmethod
-    def is_list_of_numerics(
-        field: typing.List[typing.Union[int, float, dict, list]],
-        dict_path: typing.List[str],
-    ):
-        if all(isinstance(x, int) or isinstance(x, float) for x in field):
-            return True
-        logger.error(
-            f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
-        )
-        return False
     def resume_state(self, endpoint_id):
         # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
         # left them
@@ -849,7 +834,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         endpoint_id: str,
         validation_function,
         field: typing.Any,
-        dict_path: typing.List[str],
+        dict_path: list[str],
     ):
         if validation_function(field, dict_path):
             return True
@@ -857,7 +842,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         return False
-def is_not_none(field: typing.Any, dict_path: typing.List[str]):
+def is_not_none(field: typing.Any, dict_path: list[str]):
     if field is not None:
         return True
     logger.error(
@@ -946,9 +931,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 return self.label_columns[endpoint_id]
         return None
-    def do(self, event: typing.Dict):
+    def do(self, event: dict):
         endpoint_id = event[EventFieldType.ENDPOINT_ID]
+        feature_values = event[EventFieldType.FEATURES]
+        label_values = event[EventFieldType.PREDICTION]
         # Get feature names and label columns
         if endpoint_id not in self.feature_names:
             endpoint_record = get_endpoint_record(
@@ -984,6 +971,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     },
                 )
+                update_monitoring_feature_set(
+                    endpoint_record=endpoint_record,
+                    feature_names=feature_names,
+                    feature_values=feature_values,
+                )
             # Similar process with label columns
             if not label_columns and self._infer_columns_from_data:
                 label_columns = self._infer_label_columns_from_data(event)
@@ -1002,6 +995,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                     endpoint_id=endpoint_id,
                     attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
                 )
+                update_monitoring_feature_set(
+                    endpoint_record=endpoint_record,
+                    feature_names=label_columns,
+                    feature_values=label_values,
+                )
             self.label_columns[endpoint_id] = label_columns
             self.feature_names[endpoint_id] = feature_names
@@ -1019,7 +1017,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
         feature_names = self.feature_names[endpoint_id]
-        feature_values = event[EventFieldType.FEATURES]
         self._map_dictionary_values(
             event=event,
             named_iters=feature_names,
@@ -1029,7 +1026,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # Add label_name:value pairs along with a mapping dictionary of all of these pairs
         label_names = self.label_columns[endpoint_id]
-        label_values = event[EventFieldType.PREDICTION]
         self._map_dictionary_values(
             event=event,
             named_iters=label_names,
@@ -1045,9 +1041,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
     @staticmethod
     def _map_dictionary_values(
-        event: typing.Dict,
-        named_iters: typing.List,
-        values_iters: typing.List,
+        event: dict,
+        named_iters: list,
+        values_iters: list,
         mapping_dictionary: str,
     ):
         """Adding name-value pairs to event dictionary based on two provided lists of names and values. These pairs
@@ -1082,7 +1078,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
         self.project = project
         self.model_endpoint_store_target = model_endpoint_store_target
-    def do(self, event: typing.Dict):
+    def do(self, event: dict):
         update_endpoint_record(
             project=self.project,
             endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
@@ -1117,7 +1113,7 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
         self.table = table
         self.keys = set()
-    def do(self, event: typing.Dict):
+    def do(self, event: dict):
         key_set = set(event.keys())
         if not key_set.issubset(self.keys):
             self.keys.update(key_set)
@@ -1241,3 +1237,21 @@ def get_endpoint_record(project: str, endpoint_id: str):
         project=project,
     )
     return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
+def update_monitoring_feature_set(
+    endpoint_record: dict[str, typing.Any],
+    feature_names: list[str],
+    feature_values: list[typing.Any],
+):
+    monitoring_feature_set = fstore.get_feature_set(
+        endpoint_record[
+            mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
+        ]
+    )
+    for name, val in zip(feature_names, feature_values):
+        monitoring_feature_set.add_feature(
+            fstore.Feature(name=name, value_type=type(val))
+        )
+    monitoring_feature_set.save()

mlrun/package/packagers/pandas_packagers.py CHANGED Viewed

@@ -838,9 +838,9 @@ class PandasDataFramePackager(DefaultPackager):
         """
         if isinstance(obj, dict):
             for key, value in obj.items():
-                obj[
-                    PandasDataFramePackager._prepare_result(obj=key)
-                ] = PandasDataFramePackager._prepare_result(obj=value)
+                obj[PandasDataFramePackager._prepare_result(obj=key)] = (
+                    PandasDataFramePackager._prepare_result(obj=value)
+                )
         elif isinstance(obj, list):
             for i, value in enumerate(obj):
                 obj[i] = PandasDataFramePackager._prepare_result(obj=value)

mlrun/package/utils/_archiver.py CHANGED Viewed

@@ -179,7 +179,9 @@ class _TarArchiver(_Archiver):
         # Extract:
         with tarfile.open(archive_path, f"r:{cls._MODE_STRING}") as tar_file:
-            tar_file.extractall(directory_path)
+            # use 'data' to ensure no security risks are imposed by the archive files
+            # see: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
+            tar_file.extractall(directory_path, filter="data")
         return str(directory_path)

mlrun/platforms/iguazio.py CHANGED Viewed

@@ -16,19 +16,15 @@ import json
 import os
 import urllib
 from collections import namedtuple
-from datetime import datetime
-from http import HTTPStatus
 from urllib.parse import urlparse
 import kfp.dsl
 import requests
 import semver
-import urllib3
 import v3io
 import mlrun.errors
 from mlrun.config import config as mlconf
-from mlrun.errors import err_to_str
 from mlrun.utils import dict_to_json
 _cached_control_session = None
@@ -488,25 +484,6 @@ class V3ioStreamClient:
         return response.output.records
-def create_control_session(url, username, password):
-    # for systems without production cert - silence no cert verification WARN
-    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-    if not username or not password:
-        raise ValueError("cannot create session key, missing username or password")
-    session = requests.Session()
-    session.auth = (username, password)
-    try:
-        auth = session.post(f"{url}/api/sessions", verify=False)
-    except OSError as exc:
-        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
-    if not auth.ok:
-        raise OSError(f"failed to create session: {url}, {auth.text}")
-    return auth.json()["data"]["id"]
 def is_iguazio_endpoint(endpoint_url: str) -> bool:
     # TODO: find a better heuristic
     return ".default-tenant." in endpoint_url
@@ -533,21 +510,6 @@ def is_iguazio_session_cookie(session_cookie: str) -> bool:
         return False
-def is_iguazio_system_2_10_or_above(dashboard_url):
-    # for systems without production cert - silence no cert verification WARN
-    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-    response = requests.get(f"{dashboard_url}/api/external_versions", verify=False)
-    if not response.ok:
-        if response.status_code == HTTPStatus.NOT_FOUND.value:
-            # in iguazio systems prior to 2.10 this endpoint didn't exist, so the api returns 404 cause endpoint not
-            # found
-            return False
-        response.raise_for_status()
-    return True
 # we assign the control session or access key to the password since this is iguazio auth scheme
 # (requests should be sent with username:control_session/access_key as auth header)
 def add_or_refresh_credentials(
@@ -577,33 +539,12 @@ def add_or_refresh_credentials(
     # (ideally if we could identify we're in enterprise we would have verify here that token and username have value)
     if not is_iguazio_endpoint(api_url):
         return "", "", token
-    iguazio_dashboard_url = "https://dashboard" + api_url[api_url.find(".") :]
-    # in 2.8 mlrun api is protected with control session, from 2.10 it's protected with access key
-    is_access_key_auth = is_iguazio_system_2_10_or_above(iguazio_dashboard_url)
-    if is_access_key_auth:
-        if not username or not token:
-            raise ValueError(
-                "username and access key required to authenticate against iguazio system"
-            )
-        return username, token, ""
-    if not username or not password:
-        raise ValueError("username and password needed to create session")
-    global _cached_control_session
-    now = datetime.now()
-    if _cached_control_session:
-        if (
-            _cached_control_session[2] == username
-            and _cached_control_session[3] == password
-            and (now - _cached_control_session[1]).seconds < 20 * 60 * 60
-        ):
-            return _cached_control_session[2], _cached_control_session[0], ""
-    control_session = create_control_session(iguazio_dashboard_url, username, password)
-    _cached_control_session = (control_session, now, username, password)
-    return username, control_session, ""
+    if not username or not token:
+        raise ValueError(
+            "username and access key required to authenticate against iguazio system"
+        )
+    return username, token, ""
 def parse_path(url, suffix="/"):

mlrun 1.6.0rc26__py3-none-any.whl → 1.6.3rc1__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc26py3-none-any.whl → 1.6.3rc1py3-none-any.whl