mlrun 1.7.0rc39__py3-none-any.whl → 1.7.0rc42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (58) hide show
  1. mlrun/common/constants.py +3 -0
  2. mlrun/common/db/sql_session.py +3 -2
  3. mlrun/common/helpers.py +0 -1
  4. mlrun/common/schemas/api_gateway.py +6 -6
  5. mlrun/common/schemas/common.py +4 -4
  6. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
  7. mlrun/config.py +1 -1
  8. mlrun/data_types/to_pandas.py +12 -12
  9. mlrun/datastore/alibaba_oss.py +1 -0
  10. mlrun/datastore/azure_blob.py +1 -6
  11. mlrun/datastore/base.py +12 -0
  12. mlrun/datastore/dbfs_store.py +1 -5
  13. mlrun/datastore/filestore.py +1 -3
  14. mlrun/datastore/google_cloud_storage.py +1 -9
  15. mlrun/datastore/redis.py +1 -0
  16. mlrun/datastore/s3.py +1 -0
  17. mlrun/datastore/storeytargets.py +147 -0
  18. mlrun/datastore/targets.py +67 -69
  19. mlrun/datastore/v3io.py +1 -0
  20. mlrun/errors.py +7 -4
  21. mlrun/feature_store/feature_vector.py +3 -1
  22. mlrun/feature_store/retrieval/job.py +3 -1
  23. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  24. mlrun/model.py +1 -1
  25. mlrun/model_monitoring/api.py +1 -2
  26. mlrun/model_monitoring/applications/_application_steps.py +25 -43
  27. mlrun/model_monitoring/applications/context.py +206 -70
  28. mlrun/model_monitoring/controller.py +0 -1
  29. mlrun/model_monitoring/db/stores/__init__.py +3 -3
  30. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
  31. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +14 -4
  32. mlrun/model_monitoring/db/tsdb/__init__.py +3 -3
  33. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +18 -10
  34. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -23
  35. mlrun/model_monitoring/helpers.py +38 -1
  36. mlrun/model_monitoring/stream_processing.py +8 -26
  37. mlrun/package/packagers/default_packager.py +2 -2
  38. mlrun/projects/project.py +17 -16
  39. mlrun/runtimes/funcdoc.py +1 -1
  40. mlrun/runtimes/nuclio/api_gateway.py +9 -0
  41. mlrun/runtimes/nuclio/application/application.py +131 -55
  42. mlrun/runtimes/nuclio/function.py +4 -10
  43. mlrun/runtimes/nuclio/serving.py +2 -2
  44. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  45. mlrun/runtimes/utils.py +16 -0
  46. mlrun/serving/routers.py +1 -1
  47. mlrun/serving/server.py +19 -5
  48. mlrun/serving/states.py +8 -0
  49. mlrun/serving/v2_serving.py +34 -26
  50. mlrun/utils/helpers.py +12 -2
  51. mlrun/utils/v3io_clients.py +2 -2
  52. mlrun/utils/version/version.json +2 -2
  53. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/METADATA +2 -2
  54. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/RECORD +58 -57
  55. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/WHEEL +1 -1
  56. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/LICENSE +0 -0
  57. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/entry_points.txt +0 -0
  58. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc42.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py CHANGED
@@ -65,6 +65,9 @@ class MLRunInternalLabels:
65
65
  task_name = f"{MLRUN_LABEL_PREFIX}task-name"
66
66
  resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
67
67
  created = f"{MLRUN_LABEL_PREFIX}created"
68
+ producer_type = f"{MLRUN_LABEL_PREFIX}producer-type"
69
+ app_name = f"{MLRUN_LABEL_PREFIX}app-name"
70
+ endpoint_id = f"{MLRUN_LABEL_PREFIX}endpoint-id"
68
71
  host = "host"
69
72
  job_type = "job-type"
70
73
  kind = "kind"
@@ -11,13 +11,14 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
 
17
16
  from sqlalchemy import create_engine
18
17
  from sqlalchemy.engine import Engine
19
18
  from sqlalchemy.orm import Session
20
- from sqlalchemy.orm import sessionmaker as SessionMaker
19
+ from sqlalchemy.orm import (
20
+ sessionmaker as SessionMaker, # noqa: N812 - `sessionmaker` is a class
21
+ )
21
22
 
22
23
  from mlrun.config import config
23
24
 
mlrun/common/helpers.py CHANGED
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
 
17
16
  def parse_versioned_object_uri(
@@ -77,7 +77,7 @@ class APIGatewaySpec(_APIGatewayBaseModel):
77
77
  name: str
78
78
  description: Optional[str]
79
79
  path: Optional[str] = "/"
80
- authenticationMode: Optional[APIGatewayAuthenticationMode] = (
80
+ authenticationMode: Optional[APIGatewayAuthenticationMode] = ( # noqa: N815 - for compatibility with Nuclio https://github.com/nuclio/nuclio/blob/672b8e36f9edd6e42b4685ec1d27cabae3c5f045/pkg/platform/types.go#L476
81
81
  APIGatewayAuthenticationMode.none
82
82
  )
83
83
  upstreams: list[APIGatewayUpstream]
@@ -103,11 +103,11 @@ class APIGateway(_APIGatewayBaseModel):
103
103
  ]
104
104
 
105
105
  def get_invoke_url(self):
106
- return (
107
- self.spec.host + self.spec.path
108
- if self.spec.path and self.spec.host
109
- else self.spec.host
110
- ).rstrip("/")
106
+ if self.spec.host and self.spec.path:
107
+ return f"{self.spec.host.rstrip('/')}/{self.spec.path.lstrip('/')}".rstrip(
108
+ "/"
109
+ )
110
+ return self.spec.host.rstrip("/")
111
111
 
112
112
  def enrich_mlrun_names(self):
113
113
  self._enrich_api_gateway_mlrun_name()
@@ -11,16 +11,16 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
14
+
15
15
  import typing
16
16
 
17
17
  import pydantic
18
18
 
19
19
 
20
20
  class ImageBuilder(pydantic.BaseModel):
21
- functionSourceCode: typing.Optional[str] = None
22
- codeEntryType: typing.Optional[str] = None
23
- codeEntryAttributes: typing.Optional[str] = None
21
+ functionSourceCode: typing.Optional[str] = None # noqa: N815
22
+ codeEntryType: typing.Optional[str] = None # noqa: N815
23
+ codeEntryAttributes: typing.Optional[str] = None # noqa: N815
24
24
  source: typing.Optional[str] = None
25
25
  code_origin: typing.Optional[str] = None
26
26
  origin_filename: typing.Optional[str] = None
@@ -21,7 +21,6 @@ from typing import Any, NamedTuple, Optional
21
21
  from pydantic import BaseModel, Field, validator
22
22
  from pydantic.main import Extra
23
23
 
24
- import mlrun.common.model_monitoring
25
24
  import mlrun.common.types
26
25
 
27
26
  from ..object import ObjectKind, ObjectSpec, ObjectStatus
mlrun/config.py CHANGED
@@ -863,7 +863,7 @@ class Config:
863
863
  f"Unable to decode {attribute_path}"
864
864
  )
865
865
  parsed_attribute_value = json.loads(decoded_attribute_value)
866
- if type(parsed_attribute_value) != expected_type:
866
+ if not isinstance(parsed_attribute_value, expected_type):
867
867
  raise mlrun.errors.MLRunInvalidArgumentTypeError(
868
868
  f"Expected type {expected_type}, got {type(parsed_attribute_value)}"
869
869
  )
@@ -19,9 +19,9 @@ import pandas as pd
19
19
  import semver
20
20
 
21
21
 
22
- def _toPandas(spark_df):
22
+ def _to_pandas(spark_df):
23
23
  """
24
- Modified version of spark DataFrame.toPandas()
24
+ Modified version of spark DataFrame.toPandas() -
25
25
  https://github.com/apache/spark/blob/v3.2.3/python/pyspark/sql/pandas/conversion.py#L35
26
26
 
27
27
  The original code (which is only replaced in pyspark 3.5.0) fails with Pandas 2 installed, with the following error:
@@ -223,21 +223,21 @@ def _to_corrected_pandas_type(dt):
223
223
  TimestampType,
224
224
  )
225
225
 
226
- if type(dt) == ByteType:
226
+ if isinstance(dt, ByteType):
227
227
  return np.int8
228
- elif type(dt) == ShortType:
228
+ elif isinstance(dt, ShortType):
229
229
  return np.int16
230
- elif type(dt) == IntegerType:
230
+ elif isinstance(dt, IntegerType):
231
231
  return np.int32
232
- elif type(dt) == LongType:
232
+ elif isinstance(dt, LongType):
233
233
  return np.int64
234
- elif type(dt) == FloatType:
234
+ elif isinstance(dt, FloatType):
235
235
  return np.float32
236
- elif type(dt) == DoubleType:
236
+ elif isinstance(dt, DoubleType):
237
237
  return np.float64
238
- elif type(dt) == BooleanType:
238
+ elif isinstance(dt, BooleanType):
239
239
  return bool
240
- elif type(dt) == TimestampType:
240
+ elif isinstance(dt, TimestampType):
241
241
  return "datetime64[ns]"
242
242
  else:
243
243
  return None
@@ -262,9 +262,9 @@ def spark_df_to_pandas(spark_df):
262
262
  )
263
263
  type_conversion_dict[field.name] = "datetime64[ns]"
264
264
 
265
- df = _toPandas(spark_df)
265
+ df = _to_pandas(spark_df)
266
266
  if type_conversion_dict:
267
267
  df = df.astype(type_conversion_dict)
268
268
  return df
269
269
  else:
270
- return _toPandas(spark_df)
270
+ return _to_pandas(spark_df)
@@ -85,6 +85,7 @@ class OSSStore(DataStore):
85
85
  return oss.get_object(key).read()
86
86
 
87
87
  def put(self, key, data, append=False):
88
+ data, _ = self._prepare_put_data(data, append)
88
89
  bucket, key = self.get_bucket_and_key(key)
89
90
  oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
90
91
  oss.put_object(key, data)
@@ -189,12 +189,7 @@ class AzureBlobStore(DataStore):
189
189
  "Append mode not supported for Azure blob datastore"
190
190
  )
191
191
  remote_path = self._convert_key_to_remote_path(key)
192
- if isinstance(data, bytes):
193
- mode = "wb"
194
- elif isinstance(data, str):
195
- mode = "w"
196
- else:
197
- raise TypeError("Data type unknown. Unable to put in Azure!")
192
+ data, mode = self._prepare_put_data(data, append)
198
193
  with self.filesystem.open(remote_path, mode) as f:
199
194
  f.write(data)
200
195
 
mlrun/datastore/base.py CHANGED
@@ -157,6 +157,18 @@ class DataStore:
157
157
  def put(self, key, data, append=False):
158
158
  pass
159
159
 
160
+ def _prepare_put_data(self, data, append=False):
161
+ mode = "a" if append else "w"
162
+ if isinstance(data, bytearray):
163
+ data = bytes(data)
164
+
165
+ if isinstance(data, bytes):
166
+ return data, f"{mode}b"
167
+ elif isinstance(data, str):
168
+ return data, mode
169
+ else:
170
+ raise TypeError(f"Unable to put a value of type {type(self).__name__}")
171
+
160
172
  def stat(self, key):
161
173
  pass
162
174
 
@@ -130,11 +130,7 @@ class DBFSStore(DataStore):
130
130
  "Append mode not supported for Databricks file system"
131
131
  )
132
132
  # can not use append mode because it overrides data.
133
- mode = "w"
134
- if isinstance(data, bytes):
135
- mode += "b"
136
- elif not isinstance(data, str):
137
- raise TypeError(f"Unknown data type {type(data)}")
133
+ data, mode = self._prepare_put_data(data, append)
138
134
  with self.filesystem.open(key, mode) as f:
139
135
  f.write(data)
140
136
 
@@ -66,9 +66,7 @@ class FileStore(DataStore):
66
66
  dir_to_create = path.dirname(self._join(key))
67
67
  if dir_to_create:
68
68
  self._ensure_directory(dir_to_create)
69
- mode = "a" if append else "w"
70
- if isinstance(data, bytes):
71
- mode = mode + "b"
69
+ data, mode = self._prepare_put_data(data, append)
72
70
  with open(self._join(key), mode) as fp:
73
71
  fp.write(data)
74
72
  fp.close()
@@ -131,15 +131,7 @@ class GoogleCloudStorageStore(DataStore):
131
131
  raise mlrun.errors.MLRunInvalidArgumentError(
132
132
  "Append mode not supported for Google cloud storage datastore"
133
133
  )
134
-
135
- if isinstance(data, bytes):
136
- mode = "wb"
137
- elif isinstance(data, str):
138
- mode = "w"
139
- else:
140
- raise TypeError(
141
- "Data type unknown. Unable to put in Google cloud storage!"
142
- )
134
+ data, mode = self._prepare_put_data(data, append)
143
135
  with self.filesystem.open(path, mode) as f:
144
136
  f.write(data)
145
137
 
mlrun/datastore/redis.py CHANGED
@@ -126,6 +126,7 @@ class RedisStore(DataStore):
126
126
 
127
127
  def put(self, key, data, append=False):
128
128
  key = RedisStore.build_redis_key(key)
129
+ data, _ = self._prepare_put_data(data, append)
129
130
  if append:
130
131
  self.redis.append(key, data)
131
132
  else:
mlrun/datastore/s3.py CHANGED
@@ -183,6 +183,7 @@ class S3Store(DataStore):
183
183
  return obj.get()["Body"].read()
184
184
 
185
185
  def put(self, key, data, append=False):
186
+ data, _ = self._prepare_put_data(data, append)
186
187
  bucket, key = self.get_bucket_and_key(key)
187
188
  self.s3.Object(bucket, key).put(Body=data)
188
189
 
@@ -0,0 +1,147 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import storey
15
+ from mergedeep import merge
16
+ from storey import V3ioDriver
17
+
18
+ import mlrun
19
+ import mlrun.model_monitoring.helpers
20
+ from mlrun.datastore.base import DataStore
21
+
22
+ from .utils import (
23
+ parse_kafka_url,
24
+ )
25
+
26
+ """
27
+ Storey targets expect storage_options, which may contain credentials.
28
+ To avoid passing it openly within the graph, we use wrapper classes.
29
+ """
30
+
31
+
32
+ def get_url_and_storage_options(path, external_storage_options=None):
33
+ store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(path)
34
+ storage_options = store.get_storage_options()
35
+ if storage_options and external_storage_options:
36
+ # merge external storage options with the store's storage options. storage_options takes precedence
37
+ storage_options = merge(external_storage_options, storage_options)
38
+ else:
39
+ storage_options = storage_options or external_storage_options
40
+ return url, DataStore._sanitize_storage_options(storage_options)
41
+
42
+
43
+ class TDEngineStoreyTarget(storey.TDEngineTarget):
44
+ def __init__(self, *args, **kwargs):
45
+ kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
46
+ super().__init__(*args, **kwargs)
47
+
48
+
49
+ class StoreyTargetUtils:
50
+ @staticmethod
51
+ def process_args_and_kwargs(args, kwargs):
52
+ args = list(args)
53
+ path = args[0] if args else kwargs.get("path")
54
+ external_storage_options = kwargs.get("storage_options")
55
+
56
+ url, storage_options = get_url_and_storage_options(
57
+ path, external_storage_options
58
+ )
59
+
60
+ if storage_options:
61
+ kwargs["storage_options"] = storage_options
62
+ if args:
63
+ args[0] = url
64
+ if "path" in kwargs:
65
+ kwargs["path"] = url
66
+ return args, kwargs
67
+
68
+
69
+ class ParquetStoreyTarget(storey.ParquetTarget):
70
+ def __init__(self, *args, **kwargs):
71
+ args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
72
+ super().__init__(*args, **kwargs)
73
+
74
+
75
+ class CSVStoreyTarget(storey.CSVTarget):
76
+ def __init__(self, *args, **kwargs):
77
+ args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
78
+ super().__init__(*args, **kwargs)
79
+
80
+
81
+ class StreamStoreyTarget(storey.StreamTarget):
82
+ def __init__(self, *args, **kwargs):
83
+ args = list(args)
84
+
85
+ path = args[0] if args else kwargs.get("stream_path")
86
+ endpoint, storage_options = get_url_and_storage_options(path)
87
+
88
+ if not path:
89
+ raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
90
+
91
+ access_key = storage_options.get("v3io_access_key")
92
+ storage = (
93
+ V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
94
+ )
95
+
96
+ if storage_options:
97
+ kwargs["storage"] = storage
98
+ if args:
99
+ args[0] = endpoint
100
+ if "stream_path" in kwargs:
101
+ kwargs["stream_path"] = endpoint
102
+
103
+ super().__init__(*args, **kwargs)
104
+
105
+
106
+ class KafkaStoreyTarget(storey.KafkaTarget):
107
+ def __init__(self, *args, **kwargs):
108
+ path = kwargs.pop("path")
109
+ attributes = kwargs.pop("attributes", None)
110
+ if path and path.startswith("ds://"):
111
+ datastore_profile = (
112
+ mlrun.datastore.datastore_profile.datastore_profile_read(path)
113
+ )
114
+ attributes = merge(attributes, datastore_profile.attributes())
115
+ brokers = attributes.pop(
116
+ "brokers", attributes.pop("bootstrap_servers", None)
117
+ )
118
+ topic = datastore_profile.topic
119
+ else:
120
+ brokers = attributes.pop(
121
+ "brokers", attributes.pop("bootstrap_servers", None)
122
+ )
123
+ topic, brokers = parse_kafka_url(path, brokers)
124
+
125
+ if not topic:
126
+ raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
127
+ kwargs["brokers"] = brokers
128
+ kwargs["topic"] = topic
129
+ super().__init__(*args, **kwargs, **attributes)
130
+
131
+
132
+ class NoSqlStoreyTarget(storey.NoSqlTarget):
133
+ pass
134
+
135
+
136
+ class RedisNoSqlStoreyTarget(storey.NoSqlTarget):
137
+ def __init__(self, *args, **kwargs):
138
+ path = kwargs.pop("path")
139
+ endpoint, uri = mlrun.datastore.targets.RedisNoSqlTarget.get_server_endpoint(
140
+ path
141
+ )
142
+ kwargs["path"] = endpoint + "/" + uri
143
+ super().__init__(*args, **kwargs)
144
+
145
+
146
+ class TSDBStoreyTarget(storey.TSDBTarget):
147
+ pass