mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (90) hide show
  1. mlrun/__main__.py +5 -2
  2. mlrun/alerts/alert.py +1 -1
  3. mlrun/artifacts/manager.py +5 -1
  4. mlrun/common/constants.py +64 -3
  5. mlrun/common/formatters/__init__.py +16 -0
  6. mlrun/common/formatters/base.py +59 -0
  7. mlrun/common/formatters/function.py +41 -0
  8. mlrun/common/runtimes/constants.py +32 -4
  9. mlrun/common/schemas/__init__.py +1 -2
  10. mlrun/common/schemas/alert.py +31 -9
  11. mlrun/common/schemas/api_gateway.py +52 -0
  12. mlrun/common/schemas/client_spec.py +1 -0
  13. mlrun/common/schemas/frontend_spec.py +1 -0
  14. mlrun/common/schemas/function.py +4 -0
  15. mlrun/common/schemas/model_monitoring/__init__.py +9 -4
  16. mlrun/common/schemas/model_monitoring/constants.py +22 -8
  17. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  18. mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
  19. mlrun/config.py +9 -2
  20. mlrun/data_types/to_pandas.py +5 -5
  21. mlrun/datastore/datastore.py +6 -2
  22. mlrun/datastore/redis.py +2 -2
  23. mlrun/datastore/s3.py +5 -0
  24. mlrun/datastore/sources.py +106 -7
  25. mlrun/datastore/store_resources.py +5 -1
  26. mlrun/datastore/targets.py +5 -4
  27. mlrun/datastore/utils.py +42 -0
  28. mlrun/db/base.py +5 -1
  29. mlrun/db/httpdb.py +22 -3
  30. mlrun/db/nopdb.py +5 -1
  31. mlrun/errors.py +6 -0
  32. mlrun/execution.py +16 -6
  33. mlrun/feature_store/ingestion.py +7 -6
  34. mlrun/feature_store/retrieval/conversion.py +5 -5
  35. mlrun/feature_store/retrieval/job.py +7 -3
  36. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  37. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
  38. mlrun/frameworks/parallel_coordinates.py +2 -1
  39. mlrun/frameworks/tf_keras/__init__.py +4 -1
  40. mlrun/launcher/client.py +4 -2
  41. mlrun/launcher/local.py +8 -2
  42. mlrun/launcher/remote.py +8 -2
  43. mlrun/model.py +5 -1
  44. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  45. mlrun/model_monitoring/db/stores/base/store.py +16 -4
  46. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  47. mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
  48. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
  49. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  50. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +235 -166
  51. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
  52. mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
  53. mlrun/model_monitoring/db/tsdb/base.py +232 -38
  54. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  55. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  56. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  57. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  58. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  59. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +292 -104
  60. mlrun/model_monitoring/helpers.py +45 -0
  61. mlrun/model_monitoring/stream_processing.py +7 -4
  62. mlrun/model_monitoring/writer.py +50 -20
  63. mlrun/package/utils/_formatter.py +2 -2
  64. mlrun/projects/operations.py +8 -5
  65. mlrun/projects/pipelines.py +42 -15
  66. mlrun/projects/project.py +55 -14
  67. mlrun/render.py +8 -5
  68. mlrun/runtimes/base.py +2 -1
  69. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  70. mlrun/runtimes/local.py +4 -1
  71. mlrun/runtimes/nuclio/api_gateway.py +32 -8
  72. mlrun/runtimes/nuclio/application/application.py +3 -3
  73. mlrun/runtimes/nuclio/function.py +1 -4
  74. mlrun/runtimes/utils.py +5 -6
  75. mlrun/serving/server.py +2 -1
  76. mlrun/utils/async_http.py +25 -5
  77. mlrun/utils/helpers.py +28 -7
  78. mlrun/utils/logger.py +28 -1
  79. mlrun/utils/notifications/notification/__init__.py +14 -9
  80. mlrun/utils/notifications/notification/slack.py +27 -7
  81. mlrun/utils/notifications/notification_pusher.py +47 -42
  82. mlrun/utils/v3io_clients.py +0 -1
  83. mlrun/utils/version/version.json +2 -2
  84. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +9 -4
  85. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +89 -82
  86. mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -134
  87. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
  88. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
  89. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
  90. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0
@@ -11,12 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  from typing import Optional, Union
17
16
 
18
17
  from pydantic import BaseModel
19
18
 
19
+ import mlrun.common.types
20
+
21
+
22
+ class GrafanaColumnType(mlrun.common.types.StrEnum):
23
+ NUMBER = "number"
24
+ STRING = "string"
25
+
20
26
 
21
27
  class GrafanaColumn(BaseModel):
22
28
  text: str
@@ -24,13 +30,11 @@ class GrafanaColumn(BaseModel):
24
30
 
25
31
 
26
32
  class GrafanaNumberColumn(GrafanaColumn):
27
- text: str
28
- type: str = "number"
33
+ type: str = GrafanaColumnType.NUMBER
29
34
 
30
35
 
31
36
  class GrafanaStringColumn(GrafanaColumn):
32
- text: str
33
- type: str = "string"
37
+ type: str = GrafanaColumnType.STRING
34
38
 
35
39
 
36
40
  class GrafanaTable(BaseModel):
@@ -298,6 +298,7 @@ class ModelEndpointList(BaseModel):
298
298
 
299
299
  class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
300
300
  RESULT = "result"
301
+ METRIC = "metric"
301
302
 
302
303
 
303
304
  class ModelEndpointMonitoringMetric(BaseModel):
@@ -322,7 +323,7 @@ _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
322
323
  _FQN_PATTERN = (
323
324
  rf"^(?P<project>{_FQN_PART_PATTERN})\."
324
325
  rf"(?P<app>{_FQN_PART_PATTERN})\."
325
- rf"(?P<type>{_FQN_PART_PATTERN})\."
326
+ rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
326
327
  rf"(?P<name>{_FQN_PART_PATTERN})$"
327
328
  )
328
329
  _FQN_REGEX = re.compile(_FQN_PATTERN)
@@ -337,27 +338,37 @@ def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringM
337
338
  )
338
339
 
339
340
 
341
+ class _MetricPoint(NamedTuple):
342
+ timestamp: datetime
343
+ value: float
344
+
345
+
340
346
  class _ResultPoint(NamedTuple):
341
347
  timestamp: datetime
342
348
  value: float
343
349
  status: ResultStatusApp
344
350
 
345
351
 
346
- class _ModelEndpointMonitoringResultValuesBase(BaseModel):
352
+ class _ModelEndpointMonitoringMetricValuesBase(BaseModel):
347
353
  full_name: str
348
354
  type: ModelEndpointMonitoringMetricType
349
355
  data: bool
350
356
 
351
357
 
352
- class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringResultValuesBase):
353
- full_name: str
354
- type: ModelEndpointMonitoringMetricType
358
+ class ModelEndpointMonitoringMetricValues(_ModelEndpointMonitoringMetricValuesBase):
359
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.METRIC
360
+ values: list[_MetricPoint]
361
+ data: bool = True
362
+
363
+
364
+ class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringMetricValuesBase):
365
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT
355
366
  result_kind: ResultKindApp
356
367
  values: list[_ResultPoint]
357
368
  data: bool = True
358
369
 
359
370
 
360
- class ModelEndpointMonitoringResultNoData(_ModelEndpointMonitoringResultValuesBase):
371
+ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBase):
361
372
  full_name: str
362
373
  type: ModelEndpointMonitoringMetricType
363
374
  data: bool = False
mlrun/config.py CHANGED
@@ -37,6 +37,7 @@ import dotenv
37
37
  import semver
38
38
  import yaml
39
39
 
40
+ import mlrun.common.constants
40
41
  import mlrun.common.schemas
41
42
  import mlrun.errors
42
43
 
@@ -87,7 +88,7 @@ default_config = {
87
88
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
88
89
  "ipython_widget": True,
89
90
  "log_level": "INFO",
90
- # log formatter (options: human | json)
91
+ # log formatter (options: human | human_extended | json)
91
92
  "log_formatter": "human",
92
93
  "submit_timeout": "180", # timeout when submitting a new k8s resource
93
94
  # runtimes cleanup interval in seconds
@@ -370,7 +371,7 @@ default_config = {
370
371
  "add_templated_ingress_host_mode": "never",
371
372
  "explicit_ack": "enabled",
372
373
  # size of serving spec to move to config maps
373
- "serving_spec_env_cutoff": 4096,
374
+ "serving_spec_env_cutoff": 0,
374
375
  },
375
376
  "logs": {
376
377
  "decode": {
@@ -521,7 +522,9 @@ default_config = {
521
522
  # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
522
523
  "store_type": "v3io-nosql",
523
524
  "endpoint_store_connection": "",
525
+ # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
524
526
  "tsdb_connector_type": "v3io-tsdb",
527
+ "tsdb_connection": "",
525
528
  },
526
529
  "secret_stores": {
527
530
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -965,6 +968,10 @@ class Config:
965
968
  self.httpdb.clusterization.chief.url = chief_api_url
966
969
  return self.httpdb.clusterization.chief.url
967
970
 
971
+ @staticmethod
972
+ def internal_labels():
973
+ return mlrun.common.constants.MLRunInternalLabels.all()
974
+
968
975
  @staticmethod
969
976
  def get_storage_auto_mount_params():
970
977
  auto_mount_params = {}
@@ -154,10 +154,10 @@ def toPandas(spark_df):
154
154
  column_counter = Counter(spark_df.columns)
155
155
 
156
156
  dtype = [None] * len(spark_df.schema)
157
- for fieldIdx, field in enumerate(spark_df.schema):
157
+ for field_idx, field in enumerate(spark_df.schema):
158
158
  # For duplicate column name, we use `iloc` to access it.
159
159
  if column_counter[field.name] > 1:
160
- pandas_col = pdf.iloc[:, fieldIdx]
160
+ pandas_col = pdf.iloc[:, field_idx]
161
161
  else:
162
162
  pandas_col = pdf[field.name]
163
163
 
@@ -171,12 +171,12 @@ def toPandas(spark_df):
171
171
  and field.nullable
172
172
  and pandas_col.isnull().any()
173
173
  ):
174
- dtype[fieldIdx] = pandas_type
174
+ dtype[field_idx] = pandas_type
175
175
  # Ensure we fall back to nullable numpy types, even when whole column is null:
176
176
  if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
177
- dtype[fieldIdx] = np.float64
177
+ dtype[field_idx] = np.float64
178
178
  if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
179
- dtype[fieldIdx] = object
179
+ dtype[field_idx] = object
180
180
 
181
181
  df = pd.DataFrame()
182
182
  for index, t in enumerate(dtype):
@@ -223,6 +223,11 @@ class StoreManager:
223
223
  subpath = url[len("memory://") :]
224
224
  return in_memory_store, subpath, url
225
225
 
226
+ elif schema in get_local_file_schema():
227
+ # parse_url() will drop the windows drive-letter from the path for url like "c:\a\b".
228
+ # As a workaround, we set subpath to the url.
229
+ subpath = url.replace("file://", "", 1)
230
+
226
231
  if not schema and endpoint:
227
232
  if endpoint in self._stores.keys():
228
233
  return self._stores[endpoint], subpath, url
@@ -241,8 +246,7 @@ class StoreManager:
241
246
  )
242
247
  if not secrets and not mlrun.config.is_running_as_api():
243
248
  self._stores[store_key] = store
244
- # in file stores in windows path like c:\a\b the drive letter is dropped from the path, so we return the url
245
- return store, url if store.kind == "file" else subpath, url
249
+ return store, subpath, url
246
250
 
247
251
  def reset_secrets(self):
248
252
  self._secrets = {}
mlrun/datastore/redis.py CHANGED
@@ -31,7 +31,7 @@ class RedisStore(DataStore):
31
31
  """
32
32
 
33
33
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
34
- REDIS_DEFAULT_PORT = "6379"
34
+ redis_default_port = "6379"
35
35
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
36
36
  self.headers = None
37
37
 
@@ -49,7 +49,7 @@ class RedisStore(DataStore):
49
49
  user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
50
50
  password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
51
51
  host = parsed_endpoint.hostname
52
- port = parsed_endpoint.port if parsed_endpoint.port else REDIS_DEFAULT_PORT
52
+ port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
53
53
  schema = parsed_endpoint.scheme
54
54
  if user or password:
55
55
  endpoint = f"{schema}://{user}:{password}@{host}:{port}"
mlrun/datastore/s3.py CHANGED
@@ -198,6 +198,11 @@ class S3Store(DataStore):
198
198
  bucket = self.s3.Bucket(bucket)
199
199
  return [obj.key[key_length:] for obj in bucket.objects.filter(Prefix=key)]
200
200
 
201
+ def rm(self, path, recursive=False, maxdepth=None):
202
+ bucket, key = self.get_bucket_and_key(path)
203
+ path = f"{bucket}/{key}"
204
+ self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
205
+
201
206
 
202
207
  def parse_s3_bucket_and_key(s3_path):
203
208
  try:
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import json
15
+ import operator
15
16
  import os
16
17
  import warnings
17
18
  from base64 import b64encode
@@ -29,6 +30,7 @@ from nuclio.config import split_path
29
30
  import mlrun
30
31
  from mlrun.config import config
31
32
  from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
32
34
  from mlrun.secrets import SecretsStore
33
35
 
34
36
  from ..model import DataSource
@@ -178,7 +180,7 @@ class CSVSource(BaseSourceDriver):
178
180
  self,
179
181
  name: str = "",
180
182
  path: str = None,
181
- attributes: dict[str, str] = None,
183
+ attributes: dict[str, object] = None,
182
184
  key_field: str = None,
183
185
  schedule: str = None,
184
186
  parse_dates: Union[None, int, str, list[int], list[str]] = None,
@@ -305,14 +307,19 @@ class ParquetSource(BaseSourceDriver):
305
307
  self,
306
308
  name: str = "",
307
309
  path: str = None,
308
- attributes: dict[str, str] = None,
310
+ attributes: dict[str, object] = None,
309
311
  key_field: str = None,
310
312
  time_field: str = None,
311
313
  schedule: str = None,
312
314
  start_time: Optional[Union[datetime, str]] = None,
313
315
  end_time: Optional[Union[datetime, str]] = None,
314
- additional_filters: Optional[list[tuple]] = None,
316
+ additional_filters: Optional[list[Union[tuple, list]]] = None,
315
317
  ):
318
+ if additional_filters:
319
+ attributes = copy(attributes) or {}
320
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
321
+ attributes["additional_filters"] = additional_filters
322
+
316
323
  super().__init__(
317
324
  name,
318
325
  path,
@@ -323,7 +330,6 @@ class ParquetSource(BaseSourceDriver):
323
330
  start_time,
324
331
  end_time,
325
332
  )
326
- self.additional_filters = additional_filters
327
333
 
328
334
  @property
329
335
  def start_time(self):
@@ -341,6 +347,10 @@ class ParquetSource(BaseSourceDriver):
341
347
  def end_time(self, end_time):
342
348
  self._end_time = self._convert_to_datetime(end_time)
343
349
 
350
+ @property
351
+ def additional_filters(self):
352
+ return self.attributes.get("additional_filters")
353
+
344
354
  @staticmethod
345
355
  def _convert_to_datetime(time):
346
356
  if time and isinstance(time, str):
@@ -361,13 +371,13 @@ class ParquetSource(BaseSourceDriver):
361
371
  ):
362
372
  import storey
363
373
 
364
- attributes = self.attributes or {}
374
+ attributes = copy(self.attributes)
375
+ attributes.pop("additional_filters", None)
365
376
  if context:
366
377
  attributes["context"] = context
367
-
378
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
368
379
  data_item = mlrun.store_manager.object(self.path)
369
380
  store, path, url = mlrun.store_manager.get_or_create_store(self.path)
370
-
371
381
  return storey.ParquetSource(
372
382
  paths=url, # unlike self.path, it already has store:// replaced
373
383
  key_field=self.key_field or key_field,
@@ -379,6 +389,16 @@ class ParquetSource(BaseSourceDriver):
379
389
  **attributes,
380
390
  )
381
391
 
392
+ @classmethod
393
+ def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
394
+ new_obj = super().from_dict(
395
+ struct=struct, fields=fields, deprecated_fields=deprecated_fields
396
+ )
397
+ new_obj.attributes["additional_filters"] = transform_list_filters_to_tuple(
398
+ new_obj.additional_filters
399
+ )
400
+ return new_obj
401
+
382
402
  def get_spark_options(self):
383
403
  store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
384
404
  spark_options = store.get_spark_options()
@@ -401,6 +421,7 @@ class ParquetSource(BaseSourceDriver):
401
421
  additional_filters=None,
402
422
  ):
403
423
  reader_args = self.attributes.get("reader_args", {})
424
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
404
425
  return mlrun.store_manager.object(url=self.path).as_df(
405
426
  columns=columns,
406
427
  df_module=df_module,
@@ -412,6 +433,84 @@ class ParquetSource(BaseSourceDriver):
412
433
  **reader_args,
413
434
  )
414
435
 
436
+ def _build_spark_additional_filters(self, column_types: dict):
437
+ if not self.additional_filters:
438
+ return None
439
+ from pyspark.sql.functions import col, isnan, lit
440
+
441
+ operators = {
442
+ "==": operator.eq,
443
+ "=": operator.eq,
444
+ ">": operator.gt,
445
+ "<": operator.lt,
446
+ ">=": operator.ge,
447
+ "<=": operator.le,
448
+ "!=": operator.ne,
449
+ }
450
+
451
+ spark_filter = None
452
+ new_filter = lit(True)
453
+ for filter_tuple in self.additional_filters:
454
+ if not filter_tuple:
455
+ continue
456
+ col_name, op, value = filter_tuple
457
+ if op.lower() in ("in", "not in") and isinstance(value, (list, tuple, set)):
458
+ none_exists = False
459
+ value = list(value)
460
+ for sub_value in value:
461
+ if sub_value is None:
462
+ value.remove(sub_value)
463
+ none_exists = True
464
+ if none_exists:
465
+ filter_nan = column_types[col_name] not in ("timestamp", "date")
466
+ if value:
467
+ if op.lower() == "in":
468
+ new_filter = (
469
+ col(col_name).isin(value) | col(col_name).isNull()
470
+ )
471
+ if filter_nan:
472
+ new_filter = new_filter | isnan(col(col_name))
473
+
474
+ else:
475
+ new_filter = (
476
+ ~col(col_name).isin(value) & ~col(col_name).isNull()
477
+ )
478
+ if filter_nan:
479
+ new_filter = new_filter & ~isnan(col(col_name))
480
+ else:
481
+ if op.lower() == "in":
482
+ new_filter = col(col_name).isNull()
483
+ if filter_nan:
484
+ new_filter = new_filter | isnan(col(col_name))
485
+ else:
486
+ new_filter = ~col(col_name).isNull()
487
+ if filter_nan:
488
+ new_filter = new_filter & ~isnan(col(col_name))
489
+ else:
490
+ if op.lower() == "in":
491
+ new_filter = col(col_name).isin(value)
492
+ elif op.lower() == "not in":
493
+ new_filter = ~col(col_name).isin(value)
494
+ elif op in operators:
495
+ new_filter = operators[op](col(col_name), value)
496
+ else:
497
+ raise mlrun.errors.MLRunInvalidArgumentError(
498
+ f"unsupported filter operator: {op}"
499
+ )
500
+ if spark_filter is not None:
501
+ spark_filter = spark_filter & new_filter
502
+ else:
503
+ spark_filter = new_filter
504
+ return spark_filter
505
+
506
+ def _filter_spark_df(self, df, time_field=None, columns=None):
507
+ spark_additional_filters = self._build_spark_additional_filters(
508
+ column_types=dict(df.dtypes)
509
+ )
510
+ if spark_additional_filters is not None:
511
+ df = df.filter(spark_additional_filters)
512
+ return super()._filter_spark_df(df=df, time_field=time_field, columns=columns)
513
+
415
514
 
416
515
  class BigQuerySource(BaseSourceDriver):
417
516
  """
@@ -146,7 +146,11 @@ def get_store_resource(
146
146
 
147
147
  db = db or mlrun.get_run_db(secrets=secrets)
148
148
  kind, uri = parse_store_uri(uri)
149
- if kind == StorePrefix.FeatureSet:
149
+ if not kind:
150
+ raise mlrun.errors.MLRunInvalidArgumentError(
151
+ f"Cannot get store resource from invalid URI: {uri}"
152
+ )
153
+ elif kind == StorePrefix.FeatureSet:
150
154
  project, name, tag, uid = parse_versioned_object_uri(
151
155
  uri, project or config.default_project
152
156
  )
@@ -30,6 +30,7 @@ import mlrun
30
30
  import mlrun.utils.helpers
31
31
  from mlrun.config import config
32
32
  from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
33
34
  from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
34
35
  from mlrun.utils import logger, now_date
35
36
  from mlrun.utils.helpers import to_parquet
@@ -757,7 +758,7 @@ class BaseStoreTarget(DataTargetBase):
757
758
  # options used in spark.read.load(**options)
758
759
  raise NotImplementedError()
759
760
 
760
- def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
761
+ def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
761
762
  return df
762
763
 
763
764
  def get_dask_options(self):
@@ -999,7 +1000,7 @@ class ParquetTarget(BaseStoreTarget):
999
1000
  start_time=start_time,
1000
1001
  end_time=end_time,
1001
1002
  time_column=time_column,
1002
- additional_filters=additional_filters,
1003
+ additional_filters=transform_list_filters_to_tuple(additional_filters),
1003
1004
  **kwargs,
1004
1005
  )
1005
1006
  if not columns:
@@ -2134,7 +2135,7 @@ class SQLTarget(BaseStoreTarget):
2134
2135
  raise ValueError(f"Table named {table_name} is not exist")
2135
2136
 
2136
2137
  elif not table_exists and create_table:
2137
- TYPE_TO_SQL_TYPE = {
2138
+ type_to_sql_type = {
2138
2139
  int: sqlalchemy.Integer,
2139
2140
  str: sqlalchemy.String(self.attributes.get("varchar_len")),
2140
2141
  datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
@@ -2147,7 +2148,7 @@ class SQLTarget(BaseStoreTarget):
2147
2148
  # creat new table with the given name
2148
2149
  columns = []
2149
2150
  for col, col_type in self.schema.items():
2150
- col_type_sql = TYPE_TO_SQL_TYPE.get(col_type)
2151
+ col_type_sql = type_to_sql_type.get(col_type)
2151
2152
  if col_type_sql is None:
2152
2153
  raise TypeError(
2153
2154
  f"'{col_type}' unsupported type for column '{col}'"
mlrun/datastore/utils.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import math
15
16
  import tarfile
16
17
  import tempfile
17
18
  import typing
@@ -180,3 +181,44 @@ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str
180
181
  FutureWarning,
181
182
  )
182
183
  return kafka_bootstrap_servers
184
+
185
+
186
+ def transform_list_filters_to_tuple(additional_filters):
187
+ tuple_filters = []
188
+ if not additional_filters:
189
+ return tuple_filters
190
+ validate_additional_filters(additional_filters)
191
+ for additional_filter in additional_filters:
192
+ tuple_filters.append(tuple(additional_filter))
193
+ return tuple_filters
194
+
195
+
196
+ def validate_additional_filters(additional_filters):
197
+ nan_error_message = "using NaN in additional_filters is not supported"
198
+ if additional_filters in [None, [], ()]:
199
+ return
200
+ for filter_tuple in additional_filters:
201
+ if filter_tuple == () or filter_tuple == []:
202
+ continue
203
+ if not isinstance(filter_tuple, (list, tuple)):
204
+ raise mlrun.errors.MLRunInvalidArgumentError(
205
+ f"mlrun supports additional_filters only as a list of tuples."
206
+ f" Current additional_filters: {additional_filters}"
207
+ )
208
+ if isinstance(filter_tuple[0], (list, tuple)):
209
+ raise mlrun.errors.MLRunInvalidArgumentError(
210
+ f"additional_filters does not support nested list inside filter tuples except in -in- logic."
211
+ f" Current filter_tuple: {filter_tuple}."
212
+ )
213
+ if len(filter_tuple) != 3:
214
+ raise mlrun.errors.MLRunInvalidArgumentError(
215
+ f"illegal filter tuple length, {filter_tuple} in additional filters:"
216
+ f" {additional_filters}"
217
+ )
218
+ col_name, op, value = filter_tuple
219
+ if isinstance(value, float) and math.isnan(value):
220
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
221
+ elif isinstance(value, (list, tuple)):
222
+ for sub_value in value:
223
+ if isinstance(sub_value, float) and math.isnan(sub_value):
224
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
mlrun/db/base.py CHANGED
@@ -17,6 +17,7 @@ from abc import ABC, abstractmethod
17
17
  from typing import Optional, Union
18
18
 
19
19
  import mlrun.alerts
20
+ import mlrun.common.runtimes.constants
20
21
  import mlrun.common.schemas
21
22
  import mlrun.model_monitoring
22
23
 
@@ -63,7 +64,10 @@ class RunDBInterface(ABC):
63
64
  uid: Optional[Union[str, list[str]]] = None,
64
65
  project: Optional[str] = None,
65
66
  labels: Optional[Union[str, list[str]]] = None,
66
- state: Optional[str] = None,
67
+ state: Optional[
68
+ mlrun.common.runtimes.constants.RunStates
69
+ ] = None, # Backward compatibility
70
+ states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
67
71
  sort: bool = True,
68
72
  last: int = 0,
69
73
  iter: bool = False,
mlrun/db/httpdb.py CHANGED
@@ -30,6 +30,7 @@ import semver
30
30
  from mlrun_pipelines.utils import compile_pipeline
31
31
 
32
32
  import mlrun
33
+ import mlrun.common.runtimes
33
34
  import mlrun.common.schemas
34
35
  import mlrun.common.types
35
36
  import mlrun.model_monitoring.model_endpoint
@@ -530,6 +531,10 @@ class HTTPRunDB(RunDBInterface):
530
531
  server_cfg.get("model_endpoint_monitoring_endpoint_store_connection")
531
532
  or config.model_endpoint_monitoring.endpoint_store_connection
532
533
  )
534
+ config.model_endpoint_monitoring.tsdb_connection = (
535
+ server_cfg.get("model_monitoring_tsdb_connection")
536
+ or config.model_endpoint_monitoring.tsdb_connection
537
+ )
533
538
  config.packagers = server_cfg.get("packagers") or config.packagers
534
539
  server_data_prefixes = server_cfg.get("feature_store_data_prefixes") or {}
535
540
  for prefix in ["default", "nosql", "redisnosql"]:
@@ -752,7 +757,10 @@ class HTTPRunDB(RunDBInterface):
752
757
  uid: Optional[Union[str, list[str]]] = None,
753
758
  project: Optional[str] = None,
754
759
  labels: Optional[Union[str, list[str]]] = None,
755
- state: Optional[str] = None,
760
+ state: Optional[
761
+ mlrun.common.runtimes.constants.RunStates
762
+ ] = None, # Backward compatibility
763
+ states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
756
764
  sort: bool = True,
757
765
  last: int = 0,
758
766
  iter: bool = False,
@@ -790,7 +798,8 @@ class HTTPRunDB(RunDBInterface):
790
798
  :param labels: A list of labels to filter by. Label filters work by either filtering a specific value
791
799
  of a label (i.e. list("key=value")) or by looking for the existence of a given
792
800
  key (i.e. "key").
793
- :param state: List only runs whose state is specified.
801
+ :param state: Deprecated - List only runs whose state is specified (will be removed in 1.9.0)
802
+ :param states: List only runs whose state is one of the provided states.
794
803
  :param sort: Whether to sort the result according to their start time. Otherwise, results will be
795
804
  returned by their internal order in the DB (order will not be guaranteed).
796
805
  :param last: Deprecated - currently not used (will be removed in 1.8.0).
@@ -826,11 +835,19 @@ class HTTPRunDB(RunDBInterface):
826
835
  FutureWarning,
827
836
  )
828
837
 
838
+ if state:
839
+ # TODO: Remove this in 1.9.0
840
+ warnings.warn(
841
+ "'state' is deprecated and will be removed in 1.9.0. Use 'states' instead.",
842
+ FutureWarning,
843
+ )
844
+
829
845
  if (
830
846
  not name
831
847
  and not uid
832
848
  and not labels
833
849
  and not state
850
+ and not states
834
851
  and not last
835
852
  and not start_time_from
836
853
  and not start_time_to
@@ -849,7 +866,9 @@ class HTTPRunDB(RunDBInterface):
849
866
  "name": name,
850
867
  "uid": uid,
851
868
  "label": labels or [],
852
- "state": state,
869
+ "state": mlrun.utils.helpers.as_list(state)
870
+ if state is not None
871
+ else states or None,
853
872
  "sort": bool2str(sort),
854
873
  "iter": bool2str(iter),
855
874
  "start_time_from": datetime_to_iso(start_time_from),
mlrun/db/nopdb.py CHANGED
@@ -17,6 +17,7 @@ import datetime
17
17
  from typing import Optional, Union
18
18
 
19
19
  import mlrun.alerts
20
+ import mlrun.common.runtimes.constants
20
21
  import mlrun.common.schemas
21
22
  import mlrun.errors
22
23
 
@@ -80,7 +81,10 @@ class NopDB(RunDBInterface):
80
81
  uid: Optional[Union[str, list[str]]] = None,
81
82
  project: Optional[str] = None,
82
83
  labels: Optional[Union[str, list[str]]] = None,
83
- state: Optional[str] = None,
84
+ state: Optional[
85
+ mlrun.common.runtimes.constants.RunStates
86
+ ] = None, # Backward compatibility
87
+ states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
84
88
  sort: bool = True,
85
89
  last: int = 0,
86
90
  iter: bool = False,
mlrun/errors.py CHANGED
@@ -155,6 +155,10 @@ class MLRunNotFoundError(MLRunHTTPStatusError):
155
155
  error_status_code = HTTPStatus.NOT_FOUND.value
156
156
 
157
157
 
158
+ class MLRunPaginationEndOfResultsError(MLRunNotFoundError):
159
+ pass
160
+
161
+
158
162
  class MLRunBadRequestError(MLRunHTTPStatusError):
159
163
  error_status_code = HTTPStatus.BAD_REQUEST.value
160
164
 
@@ -240,3 +244,5 @@ STATUS_ERRORS = {
240
244
  HTTPStatus.SERVICE_UNAVAILABLE.value: MLRunServiceUnavailableError,
241
245
  HTTPStatus.NOT_IMPLEMENTED.value: MLRunNotImplementedServerError,
242
246
  }
247
+
248
+ EXPECTED_ERRORS = (MLRunPaginationEndOfResultsError,)