mlrun 1.7.0rc26__py3-none-any.whl → 1.7.0rc29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (66) hide show
  1. mlrun/__main__.py +7 -7
  2. mlrun/alerts/alert.py +13 -1
  3. mlrun/artifacts/manager.py +5 -0
  4. mlrun/common/constants.py +2 -2
  5. mlrun/common/formatters/base.py +9 -9
  6. mlrun/common/schemas/alert.py +4 -8
  7. mlrun/common/schemas/api_gateway.py +7 -0
  8. mlrun/common/schemas/constants.py +3 -0
  9. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  10. mlrun/common/schemas/model_monitoring/constants.py +27 -12
  11. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
  12. mlrun/common/schemas/schedule.py +1 -1
  13. mlrun/config.py +16 -9
  14. mlrun/datastore/azure_blob.py +2 -1
  15. mlrun/datastore/base.py +1 -5
  16. mlrun/datastore/datastore.py +3 -3
  17. mlrun/datastore/inmem.py +1 -1
  18. mlrun/datastore/snowflake_utils.py +3 -1
  19. mlrun/datastore/sources.py +26 -11
  20. mlrun/datastore/store_resources.py +2 -0
  21. mlrun/datastore/targets.py +60 -25
  22. mlrun/db/base.py +10 -0
  23. mlrun/db/httpdb.py +41 -30
  24. mlrun/db/nopdb.py +10 -1
  25. mlrun/errors.py +4 -0
  26. mlrun/execution.py +18 -10
  27. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  28. mlrun/launcher/local.py +2 -2
  29. mlrun/model.py +30 -0
  30. mlrun/model_monitoring/api.py +6 -52
  31. mlrun/model_monitoring/applications/histogram_data_drift.py +4 -1
  32. mlrun/model_monitoring/db/stores/__init__.py +21 -9
  33. mlrun/model_monitoring/db/stores/base/store.py +39 -1
  34. mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
  35. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +4 -2
  36. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +34 -79
  37. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -27
  38. mlrun/model_monitoring/db/tsdb/__init__.py +19 -14
  39. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +4 -2
  40. mlrun/model_monitoring/helpers.py +9 -5
  41. mlrun/model_monitoring/writer.py +1 -5
  42. mlrun/projects/operations.py +1 -0
  43. mlrun/projects/project.py +71 -75
  44. mlrun/render.py +10 -5
  45. mlrun/run.py +2 -2
  46. mlrun/runtimes/daskjob.py +7 -1
  47. mlrun/runtimes/local.py +24 -7
  48. mlrun/runtimes/nuclio/function.py +20 -0
  49. mlrun/runtimes/pod.py +5 -29
  50. mlrun/serving/routers.py +75 -59
  51. mlrun/serving/server.py +1 -0
  52. mlrun/serving/v2_serving.py +8 -1
  53. mlrun/utils/helpers.py +46 -2
  54. mlrun/utils/logger.py +36 -2
  55. mlrun/utils/notifications/notification/base.py +4 -0
  56. mlrun/utils/notifications/notification/git.py +21 -0
  57. mlrun/utils/notifications/notification/slack.py +8 -0
  58. mlrun/utils/notifications/notification/webhook.py +41 -1
  59. mlrun/utils/notifications/notification_pusher.py +2 -2
  60. mlrun/utils/version/version.json +2 -2
  61. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/METADATA +9 -4
  62. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/RECORD +66 -66
  63. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/WHEEL +1 -1
  64. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/LICENSE +0 -0
  65. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/entry_points.txt +0 -0
  66. {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/top_level.txt +0 -0
mlrun/__main__.py CHANGED
@@ -50,12 +50,12 @@ from .run import (
50
50
  from .runtimes import RemoteRuntime, RunError, RuntimeKinds, ServingRuntime
51
51
  from .secrets import SecretsStore
52
52
  from .utils import (
53
+ RunKeys,
53
54
  dict_to_yaml,
54
55
  get_in,
55
56
  is_relative_path,
56
57
  list2dict,
57
58
  logger,
58
- run_keys,
59
59
  update_in,
60
60
  )
61
61
  from .utils.version import Version
@@ -380,15 +380,15 @@ def run(
380
380
  set_item(runobj.spec.hyper_param_options, hyper_param_strategy, "strategy")
381
381
  set_item(runobj.spec.hyper_param_options, selector, "selector")
382
382
 
383
- set_item(runobj.spec, inputs, run_keys.inputs, list2dict(inputs))
383
+ set_item(runobj.spec, inputs, RunKeys.inputs, list2dict(inputs))
384
384
  set_item(
385
- runobj.spec, returns, run_keys.returns, [py_eval(value) for value in returns]
385
+ runobj.spec, returns, RunKeys.returns, [py_eval(value) for value in returns]
386
386
  )
387
- set_item(runobj.spec, in_path, run_keys.input_path)
388
- set_item(runobj.spec, out_path, run_keys.output_path)
389
- set_item(runobj.spec, outputs, run_keys.outputs, list(outputs))
387
+ set_item(runobj.spec, in_path, RunKeys.input_path)
388
+ set_item(runobj.spec, out_path, RunKeys.output_path)
389
+ set_item(runobj.spec, outputs, RunKeys.outputs, list(outputs))
390
390
  set_item(
391
- runobj.spec, secrets, run_keys.secrets, line2keylist(secrets, "kind", "source")
391
+ runobj.spec, secrets, RunKeys.secrets, line2keylist(secrets, "kind", "source")
392
392
  )
393
393
  set_item(runobj.spec, verbose, "verbose")
394
394
  set_item(runobj.spec, scrape_metrics, "scrape_metrics")
mlrun/alerts/alert.py CHANGED
@@ -26,7 +26,6 @@ class AlertConfig(ModelObj):
26
26
  "description",
27
27
  "summary",
28
28
  "severity",
29
- "criteria",
30
29
  "reset_policy",
31
30
  "state",
32
31
  ]
@@ -34,6 +33,7 @@ class AlertConfig(ModelObj):
34
33
  "entities",
35
34
  "notifications",
36
35
  "trigger",
36
+ "criteria",
37
37
  ]
38
38
 
39
39
  def __init__(
@@ -104,6 +104,14 @@ class AlertConfig(ModelObj):
104
104
  else self.trigger
105
105
  )
106
106
  return None
107
+ if field_name == "criteria":
108
+ if self.criteria:
109
+ return (
110
+ self.criteria.dict()
111
+ if not isinstance(self.criteria, dict)
112
+ else self.criteria
113
+ )
114
+ return None
107
115
  return super()._serialize_field(struct, field_name, strip)
108
116
 
109
117
  def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
@@ -137,6 +145,10 @@ class AlertConfig(ModelObj):
137
145
  trigger_obj = alert_objects.AlertTrigger.parse_obj(trigger_data)
138
146
  new_obj.trigger = trigger_obj
139
147
 
148
+ criteria_data = struct.get("criteria")
149
+ if criteria_data:
150
+ criteria_obj = alert_objects.AlertCriteria.parse_obj(criteria_data)
151
+ new_obj.criteria = criteria_obj
140
152
  return new_obj
141
153
 
142
154
  def with_notifications(self, notifications: list[alert_objects.AlertNotification]):
@@ -100,6 +100,11 @@ class ArtifactProducer:
100
100
 
101
101
  def dict_to_artifact(struct: dict) -> Artifact:
102
102
  kind = struct.get("kind", "")
103
+
104
+ # TODO: remove this in 1.8.0
105
+ if mlrun.utils.is_legacy_artifact(struct):
106
+ return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
107
+
103
108
  artifact_class = artifact_types[kind]
104
109
  return artifact_class.from_dict(struct)
105
110
 
mlrun/common/constants.py CHANGED
@@ -64,12 +64,12 @@ class MLRunInternalLabels:
64
64
  username = f"{MLRUN_LABEL_PREFIX}username"
65
65
  username_domain = f"{MLRUN_LABEL_PREFIX}username_domain"
66
66
  task_name = f"{MLRUN_LABEL_PREFIX}task-name"
67
+ resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
68
+ created = f"{MLRUN_LABEL_PREFIX}created"
67
69
  host = "host"
68
70
  job_type = "job-type"
69
71
  kind = "kind"
70
72
  component = "component"
71
- resource_name = "resource_name"
72
- created = "mlrun-created"
73
73
 
74
74
  owner = "owner"
75
75
  v3io_user = "v3io_user"
@@ -28,42 +28,42 @@ class ObjectFormat:
28
28
  full = "full"
29
29
 
30
30
  @staticmethod
31
- def format_method(_format: str) -> typing.Optional[typing.Callable]:
31
+ def format_method(format_: str) -> typing.Optional[typing.Callable]:
32
32
  """
33
33
  Get the formatting method for the provided format.
34
34
  A `None` value signifies a pass-through formatting method (no formatting).
35
- :param _format: The format as a string representation.
35
+ :param format_: The format as a string representation.
36
36
  :return: The formatting method.
37
37
  """
38
38
  return {
39
39
  ObjectFormat.full: None,
40
- }[_format]
40
+ }[format_]
41
41
 
42
42
  @classmethod
43
43
  def format_obj(
44
44
  cls,
45
45
  obj: typing.Any,
46
- _format: str,
46
+ format_: str,
47
47
  exclude_formats: typing.Optional[list[str]] = None,
48
48
  ) -> typing.Any:
49
49
  """
50
50
  Format the provided object based on the provided format.
51
51
  :param obj: The object to format.
52
- :param _format: The format as a string representation.
52
+ :param format_: The format as a string representation.
53
53
  :param exclude_formats: A list of formats to exclude from the formatting process. If the provided format is in
54
54
  this list, an invalid format exception will be raised.
55
55
  """
56
56
  exclude_formats = exclude_formats or []
57
- _format = _format or cls.full
57
+ format_ = format_ or cls.full
58
58
  invalid_format_exc = mlrun.errors.MLRunBadRequestError(
59
- f"Provided format is not supported. format={_format}"
59
+ f"Provided format is not supported. format={format_}"
60
60
  )
61
61
 
62
- if _format in exclude_formats:
62
+ if format_ in exclude_formats:
63
63
  raise invalid_format_exc
64
64
 
65
65
  try:
66
- format_method = cls.format_method(_format)
66
+ format_method = cls.format_method(format_)
67
67
  except KeyError:
68
68
  raise invalid_format_exc
69
69
 
@@ -39,8 +39,8 @@ class EventKind(StrEnum):
39
39
  CONCEPT_DRIFT_SUSPECTED = "concept_drift_suspected"
40
40
  MODEL_PERFORMANCE_DETECTED = "model_performance_detected"
41
41
  MODEL_PERFORMANCE_SUSPECTED = "model_performance_suspected"
42
- MODEL_SERVING_PERFORMANCE_DETECTED = "model_serving_performance_detected"
43
- MODEL_SERVING_PERFORMANCE_SUSPECTED = "model_serving_performance_suspected"
42
+ SYSTEM_PERFORMANCE_DETECTED = "system_performance_detected"
43
+ SYSTEM_PERFORMANCE_SUSPECTED = "system_performance_suspected"
44
44
  MM_APP_ANOMALY_DETECTED = "mm_app_anomaly_detected"
45
45
  MM_APP_ANOMALY_SUSPECTED = "mm_app_anomaly_suspected"
46
46
  FAILED = "failed"
@@ -53,12 +53,8 @@ _event_kind_entity_map = {
53
53
  EventKind.CONCEPT_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
54
54
  EventKind.MODEL_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
55
55
  EventKind.MODEL_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
56
- EventKind.MODEL_SERVING_PERFORMANCE_DETECTED: [
57
- EventEntityKind.MODEL_ENDPOINT_RESULT
58
- ],
59
- EventKind.MODEL_SERVING_PERFORMANCE_SUSPECTED: [
60
- EventEntityKind.MODEL_ENDPOINT_RESULT
61
- ],
56
+ EventKind.SYSTEM_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
57
+ EventKind.SYSTEM_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
62
58
  EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
63
59
  EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
64
60
  EventKind.FAILED: [EventEntityKind.JOB],
@@ -102,6 +102,13 @@ class APIGateway(_APIGatewayBaseModel):
102
102
  if upstream.nucliofunction.get("name")
103
103
  ]
104
104
 
105
+ def get_invoke_url(self):
106
+ return (
107
+ self.spec.host + self.spec.path
108
+ if self.spec.path and self.spec.host
109
+ else self.spec.host
110
+ )
111
+
105
112
  def enrich_mlrun_names(self):
106
113
  self._enrich_api_gateway_mlrun_name()
107
114
  self._enrich_mlrun_function_names()
@@ -120,10 +120,13 @@ class FeatureStorePartitionByField(mlrun.common.types.StrEnum):
120
120
 
121
121
  class RunPartitionByField(mlrun.common.types.StrEnum):
122
122
  name = "name" # Supported for runs objects
123
+ project_and_name = "project_and_name" # Supported for runs objects
123
124
 
124
125
  def to_partition_by_db_field(self, db_cls):
125
126
  if self.value == RunPartitionByField.name:
126
127
  return db_cls.name
128
+ elif self.value == RunPartitionByField.project_and_name:
129
+ return db_cls.project, db_cls.name
127
130
  else:
128
131
  raise mlrun.errors.MLRunInvalidArgumentError(
129
132
  f"Unknown group by field: {self.value}"
@@ -25,6 +25,7 @@ from .constants import (
25
25
  FunctionURI,
26
26
  MetricData,
27
27
  ModelEndpointTarget,
28
+ ModelEndpointTargetSchemas,
28
29
  ModelMonitoringMode,
29
30
  ModelMonitoringStoreKinds,
30
31
  MonitoringFunctionNames,
@@ -78,8 +78,6 @@ class EventFieldType:
78
78
  FEATURE_SET_URI = "monitoring_feature_set_uri"
79
79
  ALGORITHM = "algorithm"
80
80
  VALUE = "value"
81
- DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
82
- POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
83
81
  SAMPLE_PARQUET_PATH = "sample_parquet_path"
84
82
  TIME = "time"
85
83
  TABLE_COLUMN = "table_column"
@@ -158,19 +156,42 @@ class EventKeyMetrics:
158
156
  REAL_TIME = "real_time"
159
157
 
160
158
 
161
- class ModelEndpointTarget:
159
+ class ModelEndpointTarget(MonitoringStrEnum):
162
160
  V3IO_NOSQL = "v3io-nosql"
163
161
  SQL = "sql"
164
162
 
165
163
 
164
+ class StreamKind(MonitoringStrEnum):
165
+ V3IO_STREAM = "v3io_stream"
166
+ KAFKA = "kafka"
167
+
168
+
169
+ class TSDBTarget(MonitoringStrEnum):
170
+ V3IO_TSDB = "v3io-tsdb"
171
+ TDEngine = "tdengine"
172
+ PROMETHEUS = "prometheus"
173
+
174
+
166
175
  class ProjectSecretKeys:
167
176
  ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
168
177
  ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
169
- PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
170
- KAFKA_BROKERS = "KAFKA_BROKERS"
171
178
  STREAM_PATH = "STREAM_PATH"
172
179
  TSDB_CONNECTION = "TSDB_CONNECTION"
173
180
 
181
+ @classmethod
182
+ def mandatory_secrets(cls):
183
+ return [
184
+ cls.ENDPOINT_STORE_CONNECTION,
185
+ cls.STREAM_PATH,
186
+ cls.TSDB_CONNECTION,
187
+ ]
188
+
189
+
190
+ class ModelEndpointTargetSchemas(MonitoringStrEnum):
191
+ V3IO = "v3io"
192
+ MYSQL = "mysql"
193
+ SQLITE = "sqlite"
194
+
174
195
 
175
196
  class ModelMonitoringStoreKinds:
176
197
  ENDPOINTS = "endpoints"
@@ -318,7 +339,7 @@ class ResultKindApp(Enum):
318
339
  concept_drift = 1
319
340
  model_performance = 2
320
341
  system_performance = 3
321
- custom = 4
342
+ mm_app_anomaly = 4
322
343
 
323
344
 
324
345
  class ResultStatusApp(IntEnum):
@@ -344,12 +365,6 @@ class ControllerPolicy:
344
365
  BASE_PERIOD = "base_period"
345
366
 
346
367
 
347
- class TSDBTarget:
348
- V3IO_TSDB = "v3io-tsdb"
349
- TDEngine = "tdengine"
350
- PROMETHEUS = "prometheus"
351
-
352
-
353
368
  class HistogramDataDriftApplicationConstants:
354
369
  NAME = "histogram-data-drift"
355
370
  GENERAL_RESULT_NAME = "general_drift"
@@ -103,18 +103,6 @@ class ModelEndpointSpec(ObjectSpec):
103
103
  json_parse_values=json_parse_values,
104
104
  )
105
105
 
106
- @validator("monitor_configuration")
107
- @classmethod
108
- def set_name(cls, monitor_configuration):
109
- return monitor_configuration or {
110
- EventFieldType.DRIFT_DETECTED_THRESHOLD: (
111
- mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected
112
- ),
113
- EventFieldType.POSSIBLE_DRIFT_THRESHOLD: (
114
- mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift
115
- ),
116
- }
117
-
118
106
  @validator("model_uri")
119
107
  @classmethod
120
108
  def validate_model_uri(cls, model_uri):
@@ -96,7 +96,7 @@ class ScheduleUpdate(BaseModel):
96
96
  scheduled_object: Optional[Any]
97
97
  cron_trigger: Optional[Union[str, ScheduleCronTrigger]]
98
98
  desired_state: Optional[str]
99
- labels: Optional[dict] = {}
99
+ labels: Optional[dict] = None
100
100
  concurrency_limit: Optional[int]
101
101
  credentials: Credentials = Credentials()
102
102
 
mlrun/config.py CHANGED
@@ -64,11 +64,15 @@ default_config = {
64
64
  "api_base_version": "v1",
65
65
  "version": "", # will be set to current version
66
66
  "images_tag": "", # tag to use with mlrun images e.g. mlrun/mlrun (defaults to version)
67
- "images_registry": "", # registry to use with mlrun images e.g. quay.io/ (defaults to empty, for dockerhub)
67
+ # registry to use with mlrun images that start with "mlrun/" e.g. quay.io/ (defaults to empty, for dockerhub)
68
+ "images_registry": "",
69
+ # registry to use with non-mlrun images (don't start with "mlrun/") specified in 'images_to_enrich_registry'
70
+ # defaults to empty, for dockerhub
71
+ "vendor_images_registry": "",
68
72
  # comma separated list of images that are in the specified images_registry, and therefore will be enriched with this
69
73
  # registry when used. default to mlrun/* which means any image which is of the mlrun repository (mlrun/mlrun,
70
74
  # mlrun/ml-base, etc...)
71
- "images_to_enrich_registry": "^mlrun/*",
75
+ "images_to_enrich_registry": "^mlrun/*,python:3.9",
72
76
  "kfp_url": "",
73
77
  "kfp_ttl": "14400", # KFP ttl in sec, after that completed PODs will be deleted
74
78
  "kfp_image": "mlrun/mlrun", # image to use for KFP runner (defaults to mlrun/mlrun)
@@ -250,7 +254,7 @@ default_config = {
250
254
  "remote": "mlrun/mlrun",
251
255
  "dask": "mlrun/ml-base",
252
256
  "mpijob": "mlrun/mlrun",
253
- "application": "python:3.9-slim",
257
+ "application": "python:3.9",
254
258
  },
255
259
  # see enrich_function_preemption_spec for more info,
256
260
  # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -504,13 +508,12 @@ default_config = {
504
508
  "model_endpoint_monitoring": {
505
509
  "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
506
510
  "application_stream_args": {"shard_count": 1, "retention_period_hours": 24},
507
- "drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
508
511
  # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
509
512
  # stream, and endpoints.
510
513
  "store_prefixes": {
511
514
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
512
515
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
513
- "stream": "",
516
+ "stream": "", # TODO: Delete in 1.9.0
514
517
  "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
515
518
  },
516
519
  # Offline storage path can be either relative or a full path. This path is used for general offline data
@@ -523,11 +526,12 @@ default_config = {
523
526
  "parquet_batching_max_events": 10_000,
524
527
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
525
528
  # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
526
- "store_type": "v3io-nosql",
529
+ "store_type": "v3io-nosql", # TODO: Delete in 1.9.0
527
530
  "endpoint_store_connection": "",
528
531
  # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
529
- "tsdb_connector_type": "v3io-tsdb",
530
532
  "tsdb_connection": "",
533
+ # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
534
+ "stream_connection": "",
531
535
  },
532
536
  "secret_stores": {
533
537
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -660,7 +664,9 @@ default_config = {
660
664
  "failed_runs_grace_period": 3600,
661
665
  "verbose": True,
662
666
  # the number of workers which will be used to trigger the start log collection
663
- "concurrent_start_logs_workers": 15,
667
+ "concurrent_start_logs_workers": 50,
668
+ # the number of runs for which to start logs on api startup
669
+ "start_logs_startup_run_limit": 150,
664
670
  # the time in hours in which to start log collection from.
665
671
  # after upgrade, we might have runs which completed in the mean time or still in non-terminal state and
666
672
  # we want to collect their logs in the new log collection method (sidecar)
@@ -708,6 +714,8 @@ default_config = {
708
714
  # maximum number of alerts we allow to be configured.
709
715
  # user will get an error when exceeding this
710
716
  "max_allowed": 10000,
717
+ # maximum allowed value for count in criteria field inside AlertConfig
718
+ "max_criteria_count": 100,
711
719
  },
712
720
  "auth_with_client_id": {
713
721
  "enabled": False,
@@ -1118,7 +1126,6 @@ class Config:
1118
1126
  if store_prefix_dict.get(kind):
1119
1127
  # Target exist in store prefix and has a valid string value
1120
1128
  return store_prefix_dict[kind].format(project=project, **kwargs)
1121
-
1122
1129
  if (
1123
1130
  function_name
1124
1131
  and function_name
@@ -208,6 +208,7 @@ class AzureBlobStore(DataStore):
208
208
  for key in spark_options:
209
209
  if key.startswith(prefix):
210
210
  account_key = key[len(prefix) :]
211
- url += f"@{account_key}"
211
+ if not url.endswith(account_key):
212
+ url += f"@{account_key}"
212
213
  break
213
214
  return url
mlrun/datastore/base.py CHANGED
@@ -319,11 +319,7 @@ class DataStore:
319
319
  dfs.append(df_module.read_csv(*updated_args, **kwargs))
320
320
  return df_module.concat(dfs)
321
321
 
322
- elif (
323
- file_url.endswith(".parquet")
324
- or file_url.endswith(".pq")
325
- or format == "parquet"
326
- ):
322
+ elif mlrun.utils.helpers.is_parquet_file(file_url, format):
327
323
  if columns:
328
324
  kwargs["columns"] = columns
329
325
 
@@ -21,7 +21,7 @@ from mlrun.datastore.datastore_profile import datastore_profile_read
21
21
  from mlrun.errors import err_to_str
22
22
  from mlrun.utils.helpers import get_local_file_schema
23
23
 
24
- from ..utils import DB_SCHEMA, run_keys
24
+ from ..utils import DB_SCHEMA, RunKeys
25
25
  from .base import DataItem, DataStore, HttpStore
26
26
  from .filestore import FileStore
27
27
  from .inmem import InMemoryStore
@@ -133,7 +133,7 @@ class StoreManager:
133
133
  return self._db
134
134
 
135
135
  def from_dict(self, struct: dict):
136
- stor_list = struct.get(run_keys.data_stores)
136
+ stor_list = struct.get(RunKeys.data_stores)
137
137
  if stor_list and isinstance(stor_list, list):
138
138
  for stor in stor_list:
139
139
  schema, endpoint, parsed_url = parse_url(stor.get("url"))
@@ -145,7 +145,7 @@ class StoreManager:
145
145
  self._stores[stor["name"]] = new_stor
146
146
 
147
147
  def to_dict(self, struct):
148
- struct[run_keys.data_stores] = [
148
+ struct[RunKeys.data_stores] = [
149
149
  stor.to_dict() for stor in self._stores.values() if stor.from_spec
150
150
  ]
151
151
 
mlrun/datastore/inmem.py CHANGED
@@ -72,7 +72,7 @@ class InMemoryStore(DataStore):
72
72
  if columns:
73
73
  kwargs["usecols"] = columns
74
74
  reader = df_module.read_csv
75
- elif url.endswith(".parquet") or url.endswith(".pq") or format == "parquet":
75
+ elif mlrun.utils.helpers.is_parquet_file(url, format):
76
76
  if columns:
77
77
  kwargs["columns"] = columns
78
78
  reader = df_module.read_parquet
@@ -30,13 +30,15 @@ def get_snowflake_password():
30
30
 
31
31
 
32
32
  def get_snowflake_spark_options(attributes):
33
+ if not attributes:
34
+ return {}
33
35
  return {
34
36
  "format": "net.snowflake.spark.snowflake",
35
37
  "sfURL": attributes.get("url"),
36
38
  "sfUser": attributes.get("user"),
37
39
  "sfPassword": get_snowflake_password(),
38
40
  "sfDatabase": attributes.get("database"),
39
- "sfSchema": attributes.get("schema"),
41
+ "sfSchema": attributes.get("db_schema"),
40
42
  "sfWarehouse": attributes.get("warehouse"),
41
43
  "application": "iguazio_platform",
42
44
  "TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
@@ -747,7 +747,7 @@ class SnowflakeSource(BaseSourceDriver):
747
747
  url="...",
748
748
  user="...",
749
749
  database="...",
750
- schema="...",
750
+ db_schema="...",
751
751
  warehouse="...",
752
752
  )
753
753
 
@@ -762,7 +762,8 @@ class SnowflakeSource(BaseSourceDriver):
762
762
  :parameter url: URL of the snowflake cluster
763
763
  :parameter user: snowflake user
764
764
  :parameter database: snowflake database
765
- :parameter schema: snowflake schema
765
+ :parameter schema: snowflake schema - deprecated, use db_schema
766
+ :parameter db_schema: snowflake schema
766
767
  :parameter warehouse: snowflake warehouse
767
768
  """
768
769
 
@@ -774,6 +775,7 @@ class SnowflakeSource(BaseSourceDriver):
774
775
  self,
775
776
  name: str = "",
776
777
  key_field: str = None,
778
+ attributes: dict[str, object] = None,
777
779
  time_field: str = None,
778
780
  schedule: str = None,
779
781
  start_time=None,
@@ -783,21 +785,34 @@ class SnowflakeSource(BaseSourceDriver):
783
785
  user: str = None,
784
786
  database: str = None,
785
787
  schema: str = None,
788
+ db_schema: str = None,
786
789
  warehouse: str = None,
787
790
  **kwargs,
788
791
  ):
789
- attrs = {
790
- "query": query,
791
- "url": url,
792
- "user": user,
793
- "database": database,
794
- "schema": schema,
795
- "warehouse": warehouse,
796
- }
792
+ # TODO: Remove in 1.9.0
793
+ if schema:
794
+ warnings.warn(
795
+ "schema is deprecated in 1.7.0, and will be removed in 1.9.0, please use db_schema"
796
+ )
797
+ db_schema = db_schema or schema # TODO: Remove in 1.9.0
798
+
799
+ attributes = attributes or {}
800
+ if url:
801
+ attributes["url"] = url
802
+ if user:
803
+ attributes["user"] = user
804
+ if database:
805
+ attributes["database"] = database
806
+ if db_schema:
807
+ attributes["db_schema"] = db_schema
808
+ if warehouse:
809
+ attributes["warehouse"] = warehouse
810
+ if query:
811
+ attributes["query"] = query
797
812
 
798
813
  super().__init__(
799
814
  name,
800
- attributes=attrs,
815
+ attributes=attributes,
801
816
  key_field=key_field,
802
817
  time_field=time_field,
803
818
  schedule=schedule,
@@ -27,6 +27,8 @@ from .targets import get_online_target
27
27
 
28
28
  def is_store_uri(url):
29
29
  """detect if the uri starts with the store schema prefix"""
30
+ if not url:
31
+ return False
30
32
  return url.startswith(DB_SCHEMA + "://")
31
33
 
32
34