mlrun 1.8.0rc34__py3-none-any.whl → 1.8.0rc36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

mlrun/artifacts/model.py CHANGED
@@ -279,7 +279,11 @@ class ModelArtifact(Artifact):
279
279
  )
280
280
  if label_columns:
281
281
  inferer.infer_schema(
282
- df[label_columns], self.spec.outputs, {}, options=InferOptions.Features
282
+ df[label_columns],
283
+ self.spec.outputs,
284
+ {},
285
+ options=InferOptions.Features,
286
+ push_at_start=True,
283
287
  )
284
288
  if with_stats:
285
289
  self.spec.feature_stats = inferer.get_stats(
@@ -57,6 +57,15 @@ class ArtifactCategories(mlrun.common.types.StrEnum):
57
57
  return cls(kind)
58
58
  return cls.other
59
59
 
60
+ @staticmethod
61
+ def all():
62
+ """Return all applicable artifact categories"""
63
+ return [
64
+ ArtifactCategories.model,
65
+ ArtifactCategories.dataset,
66
+ ArtifactCategories.document,
67
+ ]
68
+
60
69
 
61
70
  class ArtifactIdentifier(pydantic.v1.BaseModel):
62
71
  # artifact kind
@@ -114,6 +114,8 @@ class AuthorizationVerificationInput(pydantic.v1.BaseModel):
114
114
 
115
115
 
116
116
  class AuthInfo(pydantic.v1.BaseModel):
117
+ # Keep request headers for inter-service communication
118
+ request_headers: typing.Optional[dict[str, str]] = None
117
119
  # Basic + Iguazio auth
118
120
  username: typing.Optional[str] = None
119
121
  # Basic auth
mlrun/config.py CHANGED
@@ -816,6 +816,8 @@ default_config = {
816
816
  "max_criteria_count": 100,
817
817
  # interval for periodic events generation job
818
818
  "events_generation_interval": 30, # seconds
819
+ # number of alerts to delete in each chunk
820
+ "chunk_size_during_project_deletion": 100,
819
821
  # maximum allowed alert config cache size in alert's CRUD
820
822
  # for the best performance, it is recommended to set this value to the maximum number of alerts
821
823
  "max_allowed_cache_size": 20000,
mlrun/data_types/infer.py CHANGED
@@ -20,6 +20,8 @@ import pandas as pd
20
20
  import pyarrow
21
21
  from pandas.io.json._table_schema import convert_pandas_type_to_json_field
22
22
 
23
+ import mlrun.features
24
+ from mlrun.model import ObjectList
23
25
  from mlrun.utils import logger
24
26
 
25
27
  from .data_types import InferOptions, pa_type_to_value_type, pd_schema_to_value_type
@@ -29,17 +31,19 @@ default_num_bins = 20
29
31
 
30
32
  def infer_schema_from_df(
31
33
  df: pd.DataFrame,
32
- features,
34
+ features: ObjectList,
33
35
  entities,
34
36
  timestamp_key: Optional[str] = None,
35
37
  entity_columns=None,
36
38
  options: InferOptions = InferOptions.Null,
39
+ push_at_start: Optional[bool] = False,
37
40
  ):
38
41
  """infer feature set schema from dataframe"""
39
42
  timestamp_fields = []
40
43
  current_entities = list(entities.keys())
41
44
  entity_columns = entity_columns or []
42
45
  index_columns = dict()
46
+ temp_features = ObjectList(mlrun.features.Feature)
43
47
 
44
48
  def upsert_entity(name, value_type):
45
49
  if name in current_entities:
@@ -74,10 +78,14 @@ def infer_schema_from_df(
74
78
  if column in features.keys():
75
79
  features[column].value_type = value_type
76
80
  else:
77
- features[column] = {"name": column, "value_type": value_type}
81
+ temp_features[column] = {"name": column, "value_type": value_type}
78
82
  if value_type == "datetime" and not is_entity:
79
83
  timestamp_fields.append(column)
80
84
 
85
+ features.update_list(
86
+ object_list=temp_features, push_at_start=push_at_start
87
+ ) # Push to start of the Object list
88
+
81
89
  index_type = None
82
90
  if InferOptions.get_common_options(options, InferOptions.Index):
83
91
  # infer types of index fields
@@ -18,7 +18,7 @@ import warnings
18
18
  from base64 import b64encode
19
19
  from copy import copy
20
20
  from datetime import datetime
21
- from typing import Optional, Union
21
+ from typing import Any, Optional, Union
22
22
 
23
23
  import pandas as pd
24
24
  import semver
@@ -34,6 +34,7 @@ from mlrun.datastore.utils import transform_list_filters_to_tuple
34
34
  from mlrun.secrets import SecretsStore
35
35
  from mlrun.utils import logger
36
36
 
37
+ from ..common.schemas.function import Function
37
38
  from ..model import DataSource
38
39
  from ..platforms.iguazio import parse_path
39
40
  from ..utils import get_class, is_explicit_ack_supported
@@ -966,6 +967,26 @@ class OnlineSource(BaseSourceDriver):
966
967
  "This source type is not supported with ingestion service yet"
967
968
  )
968
969
 
970
+ @staticmethod
971
+ def set_explicit_ack_mode(function: Function, **extra_arguments) -> dict[str, Any]:
972
+ extra_arguments = extra_arguments or {}
973
+ engine = "sync"
974
+ if (
975
+ function.spec
976
+ and hasattr(function.spec, "graph")
977
+ and function.spec.graph
978
+ and function.spec.graph.engine
979
+ ):
980
+ engine = function.spec.graph.engine
981
+ if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
982
+ extra_arguments["explicit_ack_mode"] = extra_arguments.get(
983
+ "explicit_ack_mode", "explicitOnly"
984
+ )
985
+ extra_arguments["worker_allocation_mode"] = extra_arguments.get(
986
+ "worker_allocation_mode", "static"
987
+ )
988
+ return extra_arguments
989
+
969
990
 
970
991
  class HttpSource(OnlineSource):
971
992
  kind = "http"
@@ -1028,15 +1049,7 @@ class StreamSource(OnlineSource):
1028
1049
  raise_for_status=v3io.dataplane.RaiseForStatus.never,
1029
1050
  )
1030
1051
  res.raise_for_status([409, 204])
1031
-
1032
- kwargs = {}
1033
- engine = "async"
1034
- if hasattr(function.spec, "graph") and function.spec.graph.engine:
1035
- engine = function.spec.graph.engine
1036
-
1037
- if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
1038
- kwargs["explicit_ack_mode"] = "explicitOnly"
1039
- kwargs["worker_allocation_mode"] = "static"
1052
+ kwargs = self.set_explicit_ack_mode(function=function)
1040
1053
 
1041
1054
  function.add_v3io_stream_trigger(
1042
1055
  url,
@@ -1118,20 +1131,12 @@ class KafkaSource(OnlineSource):
1118
1131
  else:
1119
1132
  extra_attributes = copy(self.attributes)
1120
1133
  partitions = extra_attributes.pop("partitions", None)
1121
- explicit_ack_mode = None
1122
- engine = "async"
1123
- if hasattr(function.spec, "graph") and function.spec.graph.engine:
1124
- engine = function.spec.graph.engine
1125
1134
 
1126
- if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
1127
- explicit_ack_mode = "explicitOnly"
1128
- extra_attributes["workerAllocationMode"] = extra_attributes.get(
1129
- "worker_allocation_mode", "static"
1130
- )
1131
- else:
1132
- extra_attributes["workerAllocationMode"] = extra_attributes.get(
1133
- "worker_allocation_mode", "pool"
1134
- )
1135
+ extra_attributes = self.set_explicit_ack_mode(function, **extra_attributes)
1136
+ explicit_ack_mode = extra_attributes.get("explicit_ack_mode")
1137
+ extra_attributes["workerAllocationMode"] = extra_attributes.get(
1138
+ "worker_allocation_mode", "pool"
1139
+ )
1135
1140
 
1136
1141
  trigger_kwargs = {}
1137
1142
 
mlrun/db/base.py CHANGED
@@ -102,7 +102,6 @@ class RunDBInterface(ABC):
102
102
  ] = None, # Backward compatibility
103
103
  states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
104
104
  sort: bool = True,
105
- last: int = 0,
106
105
  iter: bool = False,
107
106
  start_time_from: Optional[datetime.datetime] = None,
108
107
  start_time_to: Optional[datetime.datetime] = None,
@@ -149,7 +148,13 @@ class RunDBInterface(ABC):
149
148
 
150
149
  @abstractmethod
151
150
  def store_artifact(
152
- self, key, artifact, uid=None, iter=None, tag="", project="", tree=None
151
+ self,
152
+ key,
153
+ artifact,
154
+ iter=None,
155
+ tag="",
156
+ project="",
157
+ tree=None,
153
158
  ):
154
159
  pass
155
160
 
mlrun/db/httpdb.py CHANGED
@@ -350,17 +350,10 @@ class HTTPRunDB(RunDBInterface):
350
350
  version=version,
351
351
  )
352
352
 
353
- page_params = deepcopy(params) or {}
354
-
355
- if page_params.get("page-token") is None and page_params.get("page") is None:
356
- page_params["page"] = 1
357
-
358
- if page_params.get("page-size") is None:
359
- page_params["page-size"] = config.httpdb.pagination.default_page_size
360
-
353
+ page_params = self._resolve_page_params(params)
361
354
  response = _api_call(page_params)
362
355
 
363
- # Yield only a single page of results
356
+ # yields a single page of results
364
357
  yield response
365
358
 
366
359
  if return_all:
@@ -899,7 +892,6 @@ class HTTPRunDB(RunDBInterface):
899
892
  ] = None, # Backward compatibility
900
893
  states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
901
894
  sort: bool = True,
902
- last: int = 0,
903
895
  iter: bool = False,
904
896
  start_time_from: Optional[datetime] = None,
905
897
  start_time_to: Optional[datetime] = None,
@@ -946,7 +938,6 @@ class HTTPRunDB(RunDBInterface):
946
938
  :param states: List only runs whose state is one of the provided states.
947
939
  :param sort: Whether to sort the result according to their start time. Otherwise, results will be
948
940
  returned by their internal order in the DB (order will not be guaranteed).
949
- :param last: Deprecated - currently not used (will be removed in 1.8.0).
950
941
  :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
951
942
  :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
952
943
  :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
@@ -974,7 +965,6 @@ class HTTPRunDB(RunDBInterface):
974
965
  state=state,
975
966
  states=states,
976
967
  sort=sort,
977
- last=last,
978
968
  iter=iter,
979
969
  start_time_from=start_time_from,
980
970
  start_time_to=start_time_to,
@@ -1094,8 +1084,6 @@ class HTTPRunDB(RunDBInterface):
1094
1084
  self,
1095
1085
  key,
1096
1086
  artifact,
1097
- # TODO: deprecated, remove in 1.8.0
1098
- uid=None,
1099
1087
  iter=None,
1100
1088
  tag=None,
1101
1089
  project="",
@@ -1105,8 +1093,6 @@ class HTTPRunDB(RunDBInterface):
1105
1093
 
1106
1094
  :param key: Identifying key of the artifact.
1107
1095
  :param artifact: The :py:class:`~mlrun.artifacts.Artifact` to store.
1108
- :param uid: A unique ID for this specific version of the artifact
1109
- (deprecated, artifact uid is generated in the backend use `tree` instead)
1110
1096
  :param iter: The task iteration which generated this artifact. If ``iter`` is not ``None`` the iteration will
1111
1097
  be added to the key provided to generate a unique key for the artifact of the specific iteration.
1112
1098
  :param tag: Tag of the artifact.
@@ -1114,15 +1100,6 @@ class HTTPRunDB(RunDBInterface):
1114
1100
  :param tree: The tree (producer id) which generated this artifact.
1115
1101
  :returns: The stored artifact dictionary.
1116
1102
  """
1117
- if uid:
1118
- warnings.warn(
1119
- "'uid' is deprecated in 1.6.0 and will be removed in 1.8.0, use 'tree' instead.",
1120
- # TODO: Remove this in 1.8.0
1121
- FutureWarning,
1122
- )
1123
-
1124
- # we do this because previously the 'uid' name was used for the 'tree' parameter
1125
- tree = tree or uid
1126
1103
  project = project or mlrun.mlconf.default_project
1127
1104
  endpoint_path = f"projects/{project}/artifacts/{key}"
1128
1105
 
@@ -1295,7 +1272,7 @@ class HTTPRunDB(RunDBInterface):
1295
1272
  :param rows_per_partition: How many top rows (per sorting defined by `partition_sort_by` and `partition_order`)
1296
1273
  to return per group. Default value is 1.
1297
1274
  :param partition_sort_by: What field to sort the results by, within each partition defined by `partition_by`.
1298
- Currently the only allowed values are `created` and `updated`.
1275
+ Currently, the only allowed values are `created` and `updated`.
1299
1276
  :param partition_order: Order of sorting within partitions - `asc` or `desc`. Default is `desc`.
1300
1277
  """
1301
1278
 
@@ -1318,7 +1295,7 @@ class HTTPRunDB(RunDBInterface):
1318
1295
  rows_per_partition=rows_per_partition,
1319
1296
  partition_sort_by=partition_sort_by,
1320
1297
  partition_order=partition_order,
1321
- return_all=True,
1298
+ return_all=not limit,
1322
1299
  )
1323
1300
  return artifacts
1324
1301
 
@@ -5202,7 +5179,6 @@ class HTTPRunDB(RunDBInterface):
5202
5179
  ] = None, # Backward compatibility
5203
5180
  states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
5204
5181
  sort: bool = True,
5205
- last: int = 0,
5206
5182
  iter: bool = False,
5207
5183
  start_time_from: Optional[datetime] = None,
5208
5184
  start_time_to: Optional[datetime] = None,
@@ -5234,13 +5210,6 @@ class HTTPRunDB(RunDBInterface):
5234
5210
  "using the `with_notifications` flag."
5235
5211
  )
5236
5212
 
5237
- if last:
5238
- # TODO: Remove this in 1.8.0
5239
- warnings.warn(
5240
- "'last' is deprecated and will be removed in 1.8.0.",
5241
- FutureWarning,
5242
- )
5243
-
5244
5213
  if state:
5245
5214
  # TODO: Remove this in 1.9.0
5246
5215
  warnings.warn(
@@ -5256,7 +5225,6 @@ class HTTPRunDB(RunDBInterface):
5256
5225
  and not labels
5257
5226
  and not state
5258
5227
  and not states
5259
- and not last
5260
5228
  and not start_time_from
5261
5229
  and not start_time_to
5262
5230
  and not last_update_time_from
@@ -5378,6 +5346,33 @@ class HTTPRunDB(RunDBInterface):
5378
5346
  )
5379
5347
  return None
5380
5348
 
5349
+ def _resolve_page_params(self, params: typing.Optional[dict]) -> dict:
5350
+ """
5351
+ Resolve the page parameters, setting defaults where necessary.
5352
+ """
5353
+ page_params = deepcopy(params) or {}
5354
+ if page_params.get("page-token") is None and page_params.get("page") is None:
5355
+ page_params["page"] = 1
5356
+ if page_params.get("page-size") is None:
5357
+ page_size = config.httpdb.pagination.default_page_size
5358
+
5359
+ if page_params.get("limit") is not None:
5360
+ page_size = page_params["limit"]
5361
+
5362
+ # limit and page/page size are conflicting
5363
+ page_params.pop("limit")
5364
+ page_params["page-size"] = page_size
5365
+
5366
+ # this may happen only when page-size was explicitly set along with limit
5367
+ # this is to ensure we will not get stopped by API on similar below validation
5368
+ # but rather simply fallback to use page-size.
5369
+ if page_params.get("page-size") and page_params.get("limit"):
5370
+ logger.warning(
5371
+ "Both 'limit' and 'page-size' are provided, using 'page-size'."
5372
+ )
5373
+ page_params.pop("limit")
5374
+ return page_params
5375
+
5381
5376
 
5382
5377
  def _as_json(obj):
5383
5378
  fn = getattr(obj, "to_json", None)
mlrun/db/nopdb.py CHANGED
@@ -132,7 +132,6 @@ class NopDB(RunDBInterface):
132
132
  ] = None, # Backward compatibility
133
133
  states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
134
134
  sort: bool = True,
135
- last: int = 0,
136
135
  iter: bool = False,
137
136
  start_time_from: Optional[datetime.datetime] = None,
138
137
  start_time_to: Optional[datetime.datetime] = None,
@@ -175,7 +174,13 @@ class NopDB(RunDBInterface):
175
174
  pass
176
175
 
177
176
  def store_artifact(
178
- self, key, artifact, uid=None, iter=None, tag="", project="", tree=None
177
+ self,
178
+ key,
179
+ artifact,
180
+ iter=None,
181
+ tag="",
182
+ project="",
183
+ tree=None,
179
184
  ):
180
185
  pass
181
186
 
@@ -13,11 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  __all__ = [
16
- "get_offline_features",
17
- "get_online_feature_service",
18
- "ingest",
19
- "preview",
20
- "deploy_ingestion_service_v2",
21
16
  "delete_feature_set",
22
17
  "delete_feature_vector",
23
18
  "get_feature_set",
@@ -38,13 +33,8 @@ from ..features import Entity, Feature
38
33
  from .api import (
39
34
  delete_feature_set,
40
35
  delete_feature_vector,
41
- deploy_ingestion_service_v2,
42
36
  get_feature_set,
43
37
  get_feature_vector,
44
- get_offline_features,
45
- get_online_feature_service,
46
- ingest,
47
- preview,
48
38
  )
49
39
  from .common import RunConfig
50
40
  from .feature_set import FeatureSet