mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +5 -2
- mlrun/alerts/alert.py +1 -1
- mlrun/artifacts/manager.py +5 -1
- mlrun/common/constants.py +64 -3
- mlrun/common/formatters/__init__.py +16 -0
- mlrun/common/formatters/base.py +59 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/runtimes/constants.py +32 -4
- mlrun/common/schemas/__init__.py +1 -2
- mlrun/common/schemas/alert.py +31 -9
- mlrun/common/schemas/api_gateway.py +52 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +9 -4
- mlrun/common/schemas/model_monitoring/constants.py +22 -8
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
- mlrun/config.py +9 -2
- mlrun/data_types/to_pandas.py +5 -5
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +106 -7
- mlrun/datastore/store_resources.py +5 -1
- mlrun/datastore/targets.py +5 -4
- mlrun/datastore/utils.py +42 -0
- mlrun/db/base.py +5 -1
- mlrun/db/httpdb.py +22 -3
- mlrun/db/nopdb.py +5 -1
- mlrun/errors.py +6 -0
- mlrun/execution.py +16 -6
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/conversion.py +5 -5
- mlrun/feature_store/retrieval/job.py +7 -3
- mlrun/feature_store/retrieval/spark_merger.py +2 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/launcher/client.py +4 -2
- mlrun/launcher/local.py +8 -2
- mlrun/launcher/remote.py +8 -2
- mlrun/model.py +5 -1
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +16 -4
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +235 -166
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
- mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
- mlrun/model_monitoring/db/tsdb/base.py +232 -38
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +292 -104
- mlrun/model_monitoring/helpers.py +45 -0
- mlrun/model_monitoring/stream_processing.py +7 -4
- mlrun/model_monitoring/writer.py +50 -20
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/projects/operations.py +8 -5
- mlrun/projects/pipelines.py +42 -15
- mlrun/projects/project.py +55 -14
- mlrun/render.py +8 -5
- mlrun/runtimes/base.py +2 -1
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/local.py +4 -1
- mlrun/runtimes/nuclio/api_gateway.py +32 -8
- mlrun/runtimes/nuclio/application/application.py +3 -3
- mlrun/runtimes/nuclio/function.py +1 -4
- mlrun/runtimes/utils.py +5 -6
- mlrun/serving/server.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +28 -7
- mlrun/utils/logger.py +28 -1
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/slack.py +27 -7
- mlrun/utils/notifications/notification_pusher.py +47 -42
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +9 -4
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +89 -82
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -134
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0
|
@@ -11,12 +11,18 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
from typing import Optional, Union
|
|
17
16
|
|
|
18
17
|
from pydantic import BaseModel
|
|
19
18
|
|
|
19
|
+
import mlrun.common.types
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GrafanaColumnType(mlrun.common.types.StrEnum):
|
|
23
|
+
NUMBER = "number"
|
|
24
|
+
STRING = "string"
|
|
25
|
+
|
|
20
26
|
|
|
21
27
|
class GrafanaColumn(BaseModel):
|
|
22
28
|
text: str
|
|
@@ -24,13 +30,11 @@ class GrafanaColumn(BaseModel):
|
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
class GrafanaNumberColumn(GrafanaColumn):
|
|
27
|
-
|
|
28
|
-
type: str = "number"
|
|
33
|
+
type: str = GrafanaColumnType.NUMBER
|
|
29
34
|
|
|
30
35
|
|
|
31
36
|
class GrafanaStringColumn(GrafanaColumn):
|
|
32
|
-
|
|
33
|
-
type: str = "string"
|
|
37
|
+
type: str = GrafanaColumnType.STRING
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
class GrafanaTable(BaseModel):
|
|
@@ -298,6 +298,7 @@ class ModelEndpointList(BaseModel):
|
|
|
298
298
|
|
|
299
299
|
class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
|
|
300
300
|
RESULT = "result"
|
|
301
|
+
METRIC = "metric"
|
|
301
302
|
|
|
302
303
|
|
|
303
304
|
class ModelEndpointMonitoringMetric(BaseModel):
|
|
@@ -322,7 +323,7 @@ _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
|
|
|
322
323
|
_FQN_PATTERN = (
|
|
323
324
|
rf"^(?P<project>{_FQN_PART_PATTERN})\."
|
|
324
325
|
rf"(?P<app>{_FQN_PART_PATTERN})\."
|
|
325
|
-
rf"(?P<type>{
|
|
326
|
+
rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
|
|
326
327
|
rf"(?P<name>{_FQN_PART_PATTERN})$"
|
|
327
328
|
)
|
|
328
329
|
_FQN_REGEX = re.compile(_FQN_PATTERN)
|
|
@@ -337,27 +338,37 @@ def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringM
|
|
|
337
338
|
)
|
|
338
339
|
|
|
339
340
|
|
|
341
|
+
class _MetricPoint(NamedTuple):
|
|
342
|
+
timestamp: datetime
|
|
343
|
+
value: float
|
|
344
|
+
|
|
345
|
+
|
|
340
346
|
class _ResultPoint(NamedTuple):
|
|
341
347
|
timestamp: datetime
|
|
342
348
|
value: float
|
|
343
349
|
status: ResultStatusApp
|
|
344
350
|
|
|
345
351
|
|
|
346
|
-
class
|
|
352
|
+
class _ModelEndpointMonitoringMetricValuesBase(BaseModel):
|
|
347
353
|
full_name: str
|
|
348
354
|
type: ModelEndpointMonitoringMetricType
|
|
349
355
|
data: bool
|
|
350
356
|
|
|
351
357
|
|
|
352
|
-
class
|
|
353
|
-
|
|
354
|
-
|
|
358
|
+
class ModelEndpointMonitoringMetricValues(_ModelEndpointMonitoringMetricValuesBase):
|
|
359
|
+
type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.METRIC
|
|
360
|
+
values: list[_MetricPoint]
|
|
361
|
+
data: bool = True
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringMetricValuesBase):
|
|
365
|
+
type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT
|
|
355
366
|
result_kind: ResultKindApp
|
|
356
367
|
values: list[_ResultPoint]
|
|
357
368
|
data: bool = True
|
|
358
369
|
|
|
359
370
|
|
|
360
|
-
class
|
|
371
|
+
class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBase):
|
|
361
372
|
full_name: str
|
|
362
373
|
type: ModelEndpointMonitoringMetricType
|
|
363
374
|
data: bool = False
|
mlrun/config.py
CHANGED
|
@@ -37,6 +37,7 @@ import dotenv
|
|
|
37
37
|
import semver
|
|
38
38
|
import yaml
|
|
39
39
|
|
|
40
|
+
import mlrun.common.constants
|
|
40
41
|
import mlrun.common.schemas
|
|
41
42
|
import mlrun.errors
|
|
42
43
|
|
|
@@ -87,7 +88,7 @@ default_config = {
|
|
|
87
88
|
"mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
|
|
88
89
|
"ipython_widget": True,
|
|
89
90
|
"log_level": "INFO",
|
|
90
|
-
# log formatter (options: human | json)
|
|
91
|
+
# log formatter (options: human | human_extended | json)
|
|
91
92
|
"log_formatter": "human",
|
|
92
93
|
"submit_timeout": "180", # timeout when submitting a new k8s resource
|
|
93
94
|
# runtimes cleanup interval in seconds
|
|
@@ -370,7 +371,7 @@ default_config = {
|
|
|
370
371
|
"add_templated_ingress_host_mode": "never",
|
|
371
372
|
"explicit_ack": "enabled",
|
|
372
373
|
# size of serving spec to move to config maps
|
|
373
|
-
"serving_spec_env_cutoff":
|
|
374
|
+
"serving_spec_env_cutoff": 0,
|
|
374
375
|
},
|
|
375
376
|
"logs": {
|
|
376
377
|
"decode": {
|
|
@@ -521,7 +522,9 @@ default_config = {
|
|
|
521
522
|
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
522
523
|
"store_type": "v3io-nosql",
|
|
523
524
|
"endpoint_store_connection": "",
|
|
525
|
+
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
524
526
|
"tsdb_connector_type": "v3io-tsdb",
|
|
527
|
+
"tsdb_connection": "",
|
|
525
528
|
},
|
|
526
529
|
"secret_stores": {
|
|
527
530
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -965,6 +968,10 @@ class Config:
|
|
|
965
968
|
self.httpdb.clusterization.chief.url = chief_api_url
|
|
966
969
|
return self.httpdb.clusterization.chief.url
|
|
967
970
|
|
|
971
|
+
@staticmethod
|
|
972
|
+
def internal_labels():
|
|
973
|
+
return mlrun.common.constants.MLRunInternalLabels.all()
|
|
974
|
+
|
|
968
975
|
@staticmethod
|
|
969
976
|
def get_storage_auto_mount_params():
|
|
970
977
|
auto_mount_params = {}
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -154,10 +154,10 @@ def toPandas(spark_df):
|
|
|
154
154
|
column_counter = Counter(spark_df.columns)
|
|
155
155
|
|
|
156
156
|
dtype = [None] * len(spark_df.schema)
|
|
157
|
-
for
|
|
157
|
+
for field_idx, field in enumerate(spark_df.schema):
|
|
158
158
|
# For duplicate column name, we use `iloc` to access it.
|
|
159
159
|
if column_counter[field.name] > 1:
|
|
160
|
-
pandas_col = pdf.iloc[:,
|
|
160
|
+
pandas_col = pdf.iloc[:, field_idx]
|
|
161
161
|
else:
|
|
162
162
|
pandas_col = pdf[field.name]
|
|
163
163
|
|
|
@@ -171,12 +171,12 @@ def toPandas(spark_df):
|
|
|
171
171
|
and field.nullable
|
|
172
172
|
and pandas_col.isnull().any()
|
|
173
173
|
):
|
|
174
|
-
dtype[
|
|
174
|
+
dtype[field_idx] = pandas_type
|
|
175
175
|
# Ensure we fall back to nullable numpy types, even when whole column is null:
|
|
176
176
|
if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
|
|
177
|
-
dtype[
|
|
177
|
+
dtype[field_idx] = np.float64
|
|
178
178
|
if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
|
|
179
|
-
dtype[
|
|
179
|
+
dtype[field_idx] = object
|
|
180
180
|
|
|
181
181
|
df = pd.DataFrame()
|
|
182
182
|
for index, t in enumerate(dtype):
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -223,6 +223,11 @@ class StoreManager:
|
|
|
223
223
|
subpath = url[len("memory://") :]
|
|
224
224
|
return in_memory_store, subpath, url
|
|
225
225
|
|
|
226
|
+
elif schema in get_local_file_schema():
|
|
227
|
+
# parse_url() will drop the windows drive-letter from the path for url like "c:\a\b".
|
|
228
|
+
# As a workaround, we set subpath to the url.
|
|
229
|
+
subpath = url.replace("file://", "", 1)
|
|
230
|
+
|
|
226
231
|
if not schema and endpoint:
|
|
227
232
|
if endpoint in self._stores.keys():
|
|
228
233
|
return self._stores[endpoint], subpath, url
|
|
@@ -241,8 +246,7 @@ class StoreManager:
|
|
|
241
246
|
)
|
|
242
247
|
if not secrets and not mlrun.config.is_running_as_api():
|
|
243
248
|
self._stores[store_key] = store
|
|
244
|
-
|
|
245
|
-
return store, url if store.kind == "file" else subpath, url
|
|
249
|
+
return store, subpath, url
|
|
246
250
|
|
|
247
251
|
def reset_secrets(self):
|
|
248
252
|
self._secrets = {}
|
mlrun/datastore/redis.py
CHANGED
|
@@ -31,7 +31,7 @@ class RedisStore(DataStore):
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
34
|
-
|
|
34
|
+
redis_default_port = "6379"
|
|
35
35
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
36
36
|
self.headers = None
|
|
37
37
|
|
|
@@ -49,7 +49,7 @@ class RedisStore(DataStore):
|
|
|
49
49
|
user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
|
|
50
50
|
password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
|
|
51
51
|
host = parsed_endpoint.hostname
|
|
52
|
-
port = parsed_endpoint.port if parsed_endpoint.port else
|
|
52
|
+
port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
|
|
53
53
|
schema = parsed_endpoint.scheme
|
|
54
54
|
if user or password:
|
|
55
55
|
endpoint = f"{schema}://{user}:{password}@{host}:{port}"
|
mlrun/datastore/s3.py
CHANGED
|
@@ -198,6 +198,11 @@ class S3Store(DataStore):
|
|
|
198
198
|
bucket = self.s3.Bucket(bucket)
|
|
199
199
|
return [obj.key[key_length:] for obj in bucket.objects.filter(Prefix=key)]
|
|
200
200
|
|
|
201
|
+
def rm(self, path, recursive=False, maxdepth=None):
|
|
202
|
+
bucket, key = self.get_bucket_and_key(path)
|
|
203
|
+
path = f"{bucket}/{key}"
|
|
204
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
205
|
+
|
|
201
206
|
|
|
202
207
|
def parse_s3_bucket_and_key(s3_path):
|
|
203
208
|
try:
|
mlrun/datastore/sources.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import json
|
|
15
|
+
import operator
|
|
15
16
|
import os
|
|
16
17
|
import warnings
|
|
17
18
|
from base64 import b64encode
|
|
@@ -29,6 +30,7 @@ from nuclio.config import split_path
|
|
|
29
30
|
import mlrun
|
|
30
31
|
from mlrun.config import config
|
|
31
32
|
from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
|
|
33
|
+
from mlrun.datastore.utils import transform_list_filters_to_tuple
|
|
32
34
|
from mlrun.secrets import SecretsStore
|
|
33
35
|
|
|
34
36
|
from ..model import DataSource
|
|
@@ -178,7 +180,7 @@ class CSVSource(BaseSourceDriver):
|
|
|
178
180
|
self,
|
|
179
181
|
name: str = "",
|
|
180
182
|
path: str = None,
|
|
181
|
-
attributes: dict[str,
|
|
183
|
+
attributes: dict[str, object] = None,
|
|
182
184
|
key_field: str = None,
|
|
183
185
|
schedule: str = None,
|
|
184
186
|
parse_dates: Union[None, int, str, list[int], list[str]] = None,
|
|
@@ -305,14 +307,19 @@ class ParquetSource(BaseSourceDriver):
|
|
|
305
307
|
self,
|
|
306
308
|
name: str = "",
|
|
307
309
|
path: str = None,
|
|
308
|
-
attributes: dict[str,
|
|
310
|
+
attributes: dict[str, object] = None,
|
|
309
311
|
key_field: str = None,
|
|
310
312
|
time_field: str = None,
|
|
311
313
|
schedule: str = None,
|
|
312
314
|
start_time: Optional[Union[datetime, str]] = None,
|
|
313
315
|
end_time: Optional[Union[datetime, str]] = None,
|
|
314
|
-
additional_filters: Optional[list[tuple]] = None,
|
|
316
|
+
additional_filters: Optional[list[Union[tuple, list]]] = None,
|
|
315
317
|
):
|
|
318
|
+
if additional_filters:
|
|
319
|
+
attributes = copy(attributes) or {}
|
|
320
|
+
additional_filters = transform_list_filters_to_tuple(additional_filters)
|
|
321
|
+
attributes["additional_filters"] = additional_filters
|
|
322
|
+
|
|
316
323
|
super().__init__(
|
|
317
324
|
name,
|
|
318
325
|
path,
|
|
@@ -323,7 +330,6 @@ class ParquetSource(BaseSourceDriver):
|
|
|
323
330
|
start_time,
|
|
324
331
|
end_time,
|
|
325
332
|
)
|
|
326
|
-
self.additional_filters = additional_filters
|
|
327
333
|
|
|
328
334
|
@property
|
|
329
335
|
def start_time(self):
|
|
@@ -341,6 +347,10 @@ class ParquetSource(BaseSourceDriver):
|
|
|
341
347
|
def end_time(self, end_time):
|
|
342
348
|
self._end_time = self._convert_to_datetime(end_time)
|
|
343
349
|
|
|
350
|
+
@property
|
|
351
|
+
def additional_filters(self):
|
|
352
|
+
return self.attributes.get("additional_filters")
|
|
353
|
+
|
|
344
354
|
@staticmethod
|
|
345
355
|
def _convert_to_datetime(time):
|
|
346
356
|
if time and isinstance(time, str):
|
|
@@ -361,13 +371,13 @@ class ParquetSource(BaseSourceDriver):
|
|
|
361
371
|
):
|
|
362
372
|
import storey
|
|
363
373
|
|
|
364
|
-
attributes = self.attributes
|
|
374
|
+
attributes = copy(self.attributes)
|
|
375
|
+
attributes.pop("additional_filters", None)
|
|
365
376
|
if context:
|
|
366
377
|
attributes["context"] = context
|
|
367
|
-
|
|
378
|
+
additional_filters = transform_list_filters_to_tuple(additional_filters)
|
|
368
379
|
data_item = mlrun.store_manager.object(self.path)
|
|
369
380
|
store, path, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
370
|
-
|
|
371
381
|
return storey.ParquetSource(
|
|
372
382
|
paths=url, # unlike self.path, it already has store:// replaced
|
|
373
383
|
key_field=self.key_field or key_field,
|
|
@@ -379,6 +389,16 @@ class ParquetSource(BaseSourceDriver):
|
|
|
379
389
|
**attributes,
|
|
380
390
|
)
|
|
381
391
|
|
|
392
|
+
@classmethod
|
|
393
|
+
def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
|
|
394
|
+
new_obj = super().from_dict(
|
|
395
|
+
struct=struct, fields=fields, deprecated_fields=deprecated_fields
|
|
396
|
+
)
|
|
397
|
+
new_obj.attributes["additional_filters"] = transform_list_filters_to_tuple(
|
|
398
|
+
new_obj.additional_filters
|
|
399
|
+
)
|
|
400
|
+
return new_obj
|
|
401
|
+
|
|
382
402
|
def get_spark_options(self):
|
|
383
403
|
store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
|
|
384
404
|
spark_options = store.get_spark_options()
|
|
@@ -401,6 +421,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
401
421
|
additional_filters=None,
|
|
402
422
|
):
|
|
403
423
|
reader_args = self.attributes.get("reader_args", {})
|
|
424
|
+
additional_filters = transform_list_filters_to_tuple(additional_filters)
|
|
404
425
|
return mlrun.store_manager.object(url=self.path).as_df(
|
|
405
426
|
columns=columns,
|
|
406
427
|
df_module=df_module,
|
|
@@ -412,6 +433,84 @@ class ParquetSource(BaseSourceDriver):
|
|
|
412
433
|
**reader_args,
|
|
413
434
|
)
|
|
414
435
|
|
|
436
|
+
def _build_spark_additional_filters(self, column_types: dict):
|
|
437
|
+
if not self.additional_filters:
|
|
438
|
+
return None
|
|
439
|
+
from pyspark.sql.functions import col, isnan, lit
|
|
440
|
+
|
|
441
|
+
operators = {
|
|
442
|
+
"==": operator.eq,
|
|
443
|
+
"=": operator.eq,
|
|
444
|
+
">": operator.gt,
|
|
445
|
+
"<": operator.lt,
|
|
446
|
+
">=": operator.ge,
|
|
447
|
+
"<=": operator.le,
|
|
448
|
+
"!=": operator.ne,
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
spark_filter = None
|
|
452
|
+
new_filter = lit(True)
|
|
453
|
+
for filter_tuple in self.additional_filters:
|
|
454
|
+
if not filter_tuple:
|
|
455
|
+
continue
|
|
456
|
+
col_name, op, value = filter_tuple
|
|
457
|
+
if op.lower() in ("in", "not in") and isinstance(value, (list, tuple, set)):
|
|
458
|
+
none_exists = False
|
|
459
|
+
value = list(value)
|
|
460
|
+
for sub_value in value:
|
|
461
|
+
if sub_value is None:
|
|
462
|
+
value.remove(sub_value)
|
|
463
|
+
none_exists = True
|
|
464
|
+
if none_exists:
|
|
465
|
+
filter_nan = column_types[col_name] not in ("timestamp", "date")
|
|
466
|
+
if value:
|
|
467
|
+
if op.lower() == "in":
|
|
468
|
+
new_filter = (
|
|
469
|
+
col(col_name).isin(value) | col(col_name).isNull()
|
|
470
|
+
)
|
|
471
|
+
if filter_nan:
|
|
472
|
+
new_filter = new_filter | isnan(col(col_name))
|
|
473
|
+
|
|
474
|
+
else:
|
|
475
|
+
new_filter = (
|
|
476
|
+
~col(col_name).isin(value) & ~col(col_name).isNull()
|
|
477
|
+
)
|
|
478
|
+
if filter_nan:
|
|
479
|
+
new_filter = new_filter & ~isnan(col(col_name))
|
|
480
|
+
else:
|
|
481
|
+
if op.lower() == "in":
|
|
482
|
+
new_filter = col(col_name).isNull()
|
|
483
|
+
if filter_nan:
|
|
484
|
+
new_filter = new_filter | isnan(col(col_name))
|
|
485
|
+
else:
|
|
486
|
+
new_filter = ~col(col_name).isNull()
|
|
487
|
+
if filter_nan:
|
|
488
|
+
new_filter = new_filter & ~isnan(col(col_name))
|
|
489
|
+
else:
|
|
490
|
+
if op.lower() == "in":
|
|
491
|
+
new_filter = col(col_name).isin(value)
|
|
492
|
+
elif op.lower() == "not in":
|
|
493
|
+
new_filter = ~col(col_name).isin(value)
|
|
494
|
+
elif op in operators:
|
|
495
|
+
new_filter = operators[op](col(col_name), value)
|
|
496
|
+
else:
|
|
497
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
498
|
+
f"unsupported filter operator: {op}"
|
|
499
|
+
)
|
|
500
|
+
if spark_filter is not None:
|
|
501
|
+
spark_filter = spark_filter & new_filter
|
|
502
|
+
else:
|
|
503
|
+
spark_filter = new_filter
|
|
504
|
+
return spark_filter
|
|
505
|
+
|
|
506
|
+
def _filter_spark_df(self, df, time_field=None, columns=None):
|
|
507
|
+
spark_additional_filters = self._build_spark_additional_filters(
|
|
508
|
+
column_types=dict(df.dtypes)
|
|
509
|
+
)
|
|
510
|
+
if spark_additional_filters is not None:
|
|
511
|
+
df = df.filter(spark_additional_filters)
|
|
512
|
+
return super()._filter_spark_df(df=df, time_field=time_field, columns=columns)
|
|
513
|
+
|
|
415
514
|
|
|
416
515
|
class BigQuerySource(BaseSourceDriver):
|
|
417
516
|
"""
|
|
@@ -146,7 +146,11 @@ def get_store_resource(
|
|
|
146
146
|
|
|
147
147
|
db = db or mlrun.get_run_db(secrets=secrets)
|
|
148
148
|
kind, uri = parse_store_uri(uri)
|
|
149
|
-
if kind
|
|
149
|
+
if not kind:
|
|
150
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
151
|
+
f"Cannot get store resource from invalid URI: {uri}"
|
|
152
|
+
)
|
|
153
|
+
elif kind == StorePrefix.FeatureSet:
|
|
150
154
|
project, name, tag, uid = parse_versioned_object_uri(
|
|
151
155
|
uri, project or config.default_project
|
|
152
156
|
)
|
mlrun/datastore/targets.py
CHANGED
|
@@ -30,6 +30,7 @@ import mlrun
|
|
|
30
30
|
import mlrun.utils.helpers
|
|
31
31
|
from mlrun.config import config
|
|
32
32
|
from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
|
|
33
|
+
from mlrun.datastore.utils import transform_list_filters_to_tuple
|
|
33
34
|
from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
|
|
34
35
|
from mlrun.utils import logger, now_date
|
|
35
36
|
from mlrun.utils.helpers import to_parquet
|
|
@@ -757,7 +758,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
757
758
|
# options used in spark.read.load(**options)
|
|
758
759
|
raise NotImplementedError()
|
|
759
760
|
|
|
760
|
-
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=
|
|
761
|
+
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
761
762
|
return df
|
|
762
763
|
|
|
763
764
|
def get_dask_options(self):
|
|
@@ -999,7 +1000,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
999
1000
|
start_time=start_time,
|
|
1000
1001
|
end_time=end_time,
|
|
1001
1002
|
time_column=time_column,
|
|
1002
|
-
additional_filters=additional_filters,
|
|
1003
|
+
additional_filters=transform_list_filters_to_tuple(additional_filters),
|
|
1003
1004
|
**kwargs,
|
|
1004
1005
|
)
|
|
1005
1006
|
if not columns:
|
|
@@ -2134,7 +2135,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
2134
2135
|
raise ValueError(f"Table named {table_name} is not exist")
|
|
2135
2136
|
|
|
2136
2137
|
elif not table_exists and create_table:
|
|
2137
|
-
|
|
2138
|
+
type_to_sql_type = {
|
|
2138
2139
|
int: sqlalchemy.Integer,
|
|
2139
2140
|
str: sqlalchemy.String(self.attributes.get("varchar_len")),
|
|
2140
2141
|
datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
|
|
@@ -2147,7 +2148,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
2147
2148
|
# creat new table with the given name
|
|
2148
2149
|
columns = []
|
|
2149
2150
|
for col, col_type in self.schema.items():
|
|
2150
|
-
col_type_sql =
|
|
2151
|
+
col_type_sql = type_to_sql_type.get(col_type)
|
|
2151
2152
|
if col_type_sql is None:
|
|
2152
2153
|
raise TypeError(
|
|
2153
2154
|
f"'{col_type}' unsupported type for column '{col}'"
|
mlrun/datastore/utils.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
import math
|
|
15
16
|
import tarfile
|
|
16
17
|
import tempfile
|
|
17
18
|
import typing
|
|
@@ -180,3 +181,44 @@ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str
|
|
|
180
181
|
FutureWarning,
|
|
181
182
|
)
|
|
182
183
|
return kafka_bootstrap_servers
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def transform_list_filters_to_tuple(additional_filters):
|
|
187
|
+
tuple_filters = []
|
|
188
|
+
if not additional_filters:
|
|
189
|
+
return tuple_filters
|
|
190
|
+
validate_additional_filters(additional_filters)
|
|
191
|
+
for additional_filter in additional_filters:
|
|
192
|
+
tuple_filters.append(tuple(additional_filter))
|
|
193
|
+
return tuple_filters
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def validate_additional_filters(additional_filters):
|
|
197
|
+
nan_error_message = "using NaN in additional_filters is not supported"
|
|
198
|
+
if additional_filters in [None, [], ()]:
|
|
199
|
+
return
|
|
200
|
+
for filter_tuple in additional_filters:
|
|
201
|
+
if filter_tuple == () or filter_tuple == []:
|
|
202
|
+
continue
|
|
203
|
+
if not isinstance(filter_tuple, (list, tuple)):
|
|
204
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
205
|
+
f"mlrun supports additional_filters only as a list of tuples."
|
|
206
|
+
f" Current additional_filters: {additional_filters}"
|
|
207
|
+
)
|
|
208
|
+
if isinstance(filter_tuple[0], (list, tuple)):
|
|
209
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
210
|
+
f"additional_filters does not support nested list inside filter tuples except in -in- logic."
|
|
211
|
+
f" Current filter_tuple: {filter_tuple}."
|
|
212
|
+
)
|
|
213
|
+
if len(filter_tuple) != 3:
|
|
214
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
215
|
+
f"illegal filter tuple length, {filter_tuple} in additional filters:"
|
|
216
|
+
f" {additional_filters}"
|
|
217
|
+
)
|
|
218
|
+
col_name, op, value = filter_tuple
|
|
219
|
+
if isinstance(value, float) and math.isnan(value):
|
|
220
|
+
raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
|
|
221
|
+
elif isinstance(value, (list, tuple)):
|
|
222
|
+
for sub_value in value:
|
|
223
|
+
if isinstance(sub_value, float) and math.isnan(sub_value):
|
|
224
|
+
raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
|
mlrun/db/base.py
CHANGED
|
@@ -17,6 +17,7 @@ from abc import ABC, abstractmethod
|
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
import mlrun.alerts
|
|
20
|
+
import mlrun.common.runtimes.constants
|
|
20
21
|
import mlrun.common.schemas
|
|
21
22
|
import mlrun.model_monitoring
|
|
22
23
|
|
|
@@ -63,7 +64,10 @@ class RunDBInterface(ABC):
|
|
|
63
64
|
uid: Optional[Union[str, list[str]]] = None,
|
|
64
65
|
project: Optional[str] = None,
|
|
65
66
|
labels: Optional[Union[str, list[str]]] = None,
|
|
66
|
-
state: Optional[
|
|
67
|
+
state: Optional[
|
|
68
|
+
mlrun.common.runtimes.constants.RunStates
|
|
69
|
+
] = None, # Backward compatibility
|
|
70
|
+
states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
|
|
67
71
|
sort: bool = True,
|
|
68
72
|
last: int = 0,
|
|
69
73
|
iter: bool = False,
|
mlrun/db/httpdb.py
CHANGED
|
@@ -30,6 +30,7 @@ import semver
|
|
|
30
30
|
from mlrun_pipelines.utils import compile_pipeline
|
|
31
31
|
|
|
32
32
|
import mlrun
|
|
33
|
+
import mlrun.common.runtimes
|
|
33
34
|
import mlrun.common.schemas
|
|
34
35
|
import mlrun.common.types
|
|
35
36
|
import mlrun.model_monitoring.model_endpoint
|
|
@@ -530,6 +531,10 @@ class HTTPRunDB(RunDBInterface):
|
|
|
530
531
|
server_cfg.get("model_endpoint_monitoring_endpoint_store_connection")
|
|
531
532
|
or config.model_endpoint_monitoring.endpoint_store_connection
|
|
532
533
|
)
|
|
534
|
+
config.model_endpoint_monitoring.tsdb_connection = (
|
|
535
|
+
server_cfg.get("model_monitoring_tsdb_connection")
|
|
536
|
+
or config.model_endpoint_monitoring.tsdb_connection
|
|
537
|
+
)
|
|
533
538
|
config.packagers = server_cfg.get("packagers") or config.packagers
|
|
534
539
|
server_data_prefixes = server_cfg.get("feature_store_data_prefixes") or {}
|
|
535
540
|
for prefix in ["default", "nosql", "redisnosql"]:
|
|
@@ -752,7 +757,10 @@ class HTTPRunDB(RunDBInterface):
|
|
|
752
757
|
uid: Optional[Union[str, list[str]]] = None,
|
|
753
758
|
project: Optional[str] = None,
|
|
754
759
|
labels: Optional[Union[str, list[str]]] = None,
|
|
755
|
-
state: Optional[
|
|
760
|
+
state: Optional[
|
|
761
|
+
mlrun.common.runtimes.constants.RunStates
|
|
762
|
+
] = None, # Backward compatibility
|
|
763
|
+
states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
|
|
756
764
|
sort: bool = True,
|
|
757
765
|
last: int = 0,
|
|
758
766
|
iter: bool = False,
|
|
@@ -790,7 +798,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
790
798
|
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value
|
|
791
799
|
of a label (i.e. list("key=value")) or by looking for the existence of a given
|
|
792
800
|
key (i.e. "key").
|
|
793
|
-
:param state: List only runs whose state is specified.
|
|
801
|
+
:param state: Deprecated - List only runs whose state is specified (will be removed in 1.9.0)
|
|
802
|
+
:param states: List only runs whose state is one of the provided states.
|
|
794
803
|
:param sort: Whether to sort the result according to their start time. Otherwise, results will be
|
|
795
804
|
returned by their internal order in the DB (order will not be guaranteed).
|
|
796
805
|
:param last: Deprecated - currently not used (will be removed in 1.8.0).
|
|
@@ -826,11 +835,19 @@ class HTTPRunDB(RunDBInterface):
|
|
|
826
835
|
FutureWarning,
|
|
827
836
|
)
|
|
828
837
|
|
|
838
|
+
if state:
|
|
839
|
+
# TODO: Remove this in 1.9.0
|
|
840
|
+
warnings.warn(
|
|
841
|
+
"'state' is deprecated and will be removed in 1.9.0. Use 'states' instead.",
|
|
842
|
+
FutureWarning,
|
|
843
|
+
)
|
|
844
|
+
|
|
829
845
|
if (
|
|
830
846
|
not name
|
|
831
847
|
and not uid
|
|
832
848
|
and not labels
|
|
833
849
|
and not state
|
|
850
|
+
and not states
|
|
834
851
|
and not last
|
|
835
852
|
and not start_time_from
|
|
836
853
|
and not start_time_to
|
|
@@ -849,7 +866,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
849
866
|
"name": name,
|
|
850
867
|
"uid": uid,
|
|
851
868
|
"label": labels or [],
|
|
852
|
-
"state": state
|
|
869
|
+
"state": mlrun.utils.helpers.as_list(state)
|
|
870
|
+
if state is not None
|
|
871
|
+
else states or None,
|
|
853
872
|
"sort": bool2str(sort),
|
|
854
873
|
"iter": bool2str(iter),
|
|
855
874
|
"start_time_from": datetime_to_iso(start_time_from),
|
mlrun/db/nopdb.py
CHANGED
|
@@ -17,6 +17,7 @@ import datetime
|
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
import mlrun.alerts
|
|
20
|
+
import mlrun.common.runtimes.constants
|
|
20
21
|
import mlrun.common.schemas
|
|
21
22
|
import mlrun.errors
|
|
22
23
|
|
|
@@ -80,7 +81,10 @@ class NopDB(RunDBInterface):
|
|
|
80
81
|
uid: Optional[Union[str, list[str]]] = None,
|
|
81
82
|
project: Optional[str] = None,
|
|
82
83
|
labels: Optional[Union[str, list[str]]] = None,
|
|
83
|
-
state: Optional[
|
|
84
|
+
state: Optional[
|
|
85
|
+
mlrun.common.runtimes.constants.RunStates
|
|
86
|
+
] = None, # Backward compatibility
|
|
87
|
+
states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
|
|
84
88
|
sort: bool = True,
|
|
85
89
|
last: int = 0,
|
|
86
90
|
iter: bool = False,
|
mlrun/errors.py
CHANGED
|
@@ -155,6 +155,10 @@ class MLRunNotFoundError(MLRunHTTPStatusError):
|
|
|
155
155
|
error_status_code = HTTPStatus.NOT_FOUND.value
|
|
156
156
|
|
|
157
157
|
|
|
158
|
+
class MLRunPaginationEndOfResultsError(MLRunNotFoundError):
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
|
|
158
162
|
class MLRunBadRequestError(MLRunHTTPStatusError):
|
|
159
163
|
error_status_code = HTTPStatus.BAD_REQUEST.value
|
|
160
164
|
|
|
@@ -240,3 +244,5 @@ STATUS_ERRORS = {
|
|
|
240
244
|
HTTPStatus.SERVICE_UNAVAILABLE.value: MLRunServiceUnavailableError,
|
|
241
245
|
HTTPStatus.NOT_IMPLEMENTED.value: MLRunNotImplementedServerError,
|
|
242
246
|
}
|
|
247
|
+
|
|
248
|
+
EXPECTED_ERRORS = (MLRunPaginationEndOfResultsError,)
|