mlrun 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (52) hide show
  1. mlrun/__init__.py +37 -3
  2. mlrun/__main__.py +5 -0
  3. mlrun/alerts/alert.py +1 -0
  4. mlrun/artifacts/document.py +78 -36
  5. mlrun/common/formatters/feature_set.py +1 -0
  6. mlrun/common/runtimes/constants.py +17 -0
  7. mlrun/common/schemas/alert.py +3 -0
  8. mlrun/common/schemas/client_spec.py +0 -1
  9. mlrun/common/schemas/model_monitoring/constants.py +32 -9
  10. mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
  11. mlrun/common/schemas/workflow.py +1 -0
  12. mlrun/config.py +39 -6
  13. mlrun/datastore/datastore_profile.py +58 -16
  14. mlrun/datastore/sources.py +7 -1
  15. mlrun/datastore/vectorstore.py +20 -1
  16. mlrun/db/base.py +20 -0
  17. mlrun/db/httpdb.py +97 -10
  18. mlrun/db/nopdb.py +19 -0
  19. mlrun/errors.py +4 -0
  20. mlrun/execution.py +15 -6
  21. mlrun/frameworks/_common/model_handler.py +0 -2
  22. mlrun/launcher/client.py +2 -2
  23. mlrun/launcher/local.py +5 -1
  24. mlrun/model_monitoring/applications/_application_steps.py +3 -1
  25. mlrun/model_monitoring/controller.py +266 -103
  26. mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
  27. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
  28. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +20 -21
  29. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -34
  30. mlrun/model_monitoring/helpers.py +16 -10
  31. mlrun/model_monitoring/stream_processing.py +106 -35
  32. mlrun/package/context_handler.py +1 -1
  33. mlrun/package/packagers_manager.py +4 -18
  34. mlrun/projects/pipelines.py +18 -5
  35. mlrun/projects/project.py +156 -39
  36. mlrun/runtimes/nuclio/serving.py +22 -13
  37. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  38. mlrun/secrets.py +1 -1
  39. mlrun/serving/server.py +11 -3
  40. mlrun/serving/states.py +65 -8
  41. mlrun/serving/v2_serving.py +67 -44
  42. mlrun/utils/helpers.py +111 -23
  43. mlrun/utils/notifications/notification/base.py +6 -1
  44. mlrun/utils/notifications/notification/slack.py +5 -1
  45. mlrun/utils/notifications/notification_pusher.py +67 -36
  46. mlrun/utils/version/version.json +2 -2
  47. {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
  48. {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +52 -52
  49. {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
  50. {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
  51. {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
  52. {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0
mlrun/__init__.py CHANGED
@@ -213,7 +213,41 @@ def set_env_from_file(env_file: str, return_dict: bool = False) -> Optional[dict
213
213
  env_vars = dotenv.dotenv_values(env_file)
214
214
  if None in env_vars.values():
215
215
  raise MLRunInvalidArgumentError("env file lines must be in the form key=value")
216
- for key, value in env_vars.items():
217
- environ[key] = value # Load to local environ
216
+
217
+ ordered_env_vars = order_env_vars(env_vars)
218
+ for key, value in ordered_env_vars.items():
219
+ environ[key] = value
220
+
218
221
  mlconf.reload() # reload mlrun configuration
219
- return env_vars if return_dict else None
222
+ return ordered_env_vars if return_dict else None
223
+
224
+
225
+ def order_env_vars(env_vars: dict[str, str]) -> dict[str, str]:
226
+ """
227
+ Order and process environment variables by first handling specific ordered keys,
228
+ then processing the remaining keys in the given dictionary.
229
+
230
+ The function ensures that environment variables defined in the `ordered_keys` list
231
+ are added to the result dictionary first. Any other environment variables from
232
+ `env_vars` are then added in the order they appear in the input dictionary.
233
+
234
+ :param env_vars: A dictionary where each key is the name of an environment variable (str),
235
+ and each value is the corresponding environment variable value (str).
236
+ :return: A dictionary with the processed environment variables, ordered with the specific
237
+ keys first, followed by the rest in their original order.
238
+ """
239
+ ordered_keys = mlconf.get_ordered_keys()
240
+
241
+ ordered_env_vars: dict[str, str] = {}
242
+
243
+ # First, add the ordered keys to the dictionary
244
+ for key in ordered_keys:
245
+ if key in env_vars:
246
+ ordered_env_vars[key] = env_vars[key]
247
+
248
+ # Then, add the remaining keys (those not in ordered_keys)
249
+ for key, value in env_vars.items():
250
+ if key not in ordered_keys:
251
+ ordered_env_vars[key] = value
252
+
253
+ return ordered_env_vars
mlrun/__main__.py CHANGED
@@ -32,6 +32,7 @@ from tabulate import tabulate
32
32
  import mlrun
33
33
  import mlrun.common.constants as mlrun_constants
34
34
  import mlrun.common.schemas
35
+ import mlrun.utils.helpers
35
36
  from mlrun.common.helpers import parse_versioned_object_uri
36
37
  from mlrun.runtimes.mounts import auto_mount as auto_mount_modifier
37
38
 
@@ -304,6 +305,7 @@ def run(
304
305
  update_in(runtime, "spec.build.code_origin", url_file)
305
306
  elif runtime:
306
307
  runtime = py_eval(runtime)
308
+ runtime = mlrun.utils.helpers.as_dict(runtime)
307
309
  if not isinstance(runtime, dict):
308
310
  print(f"Runtime parameter must be a dict, not {type(runtime)}")
309
311
  exit(1)
@@ -515,6 +517,7 @@ def build(
515
517
 
516
518
  if runtime:
517
519
  runtime = py_eval(runtime)
520
+ runtime = mlrun.utils.helpers.as_dict(runtime)
518
521
  if not isinstance(runtime, dict):
519
522
  print(f"Runtime parameter must be a dict, not {type(runtime)}")
520
523
  exit(1)
@@ -662,6 +665,8 @@ def deploy(
662
665
  runtime = py_eval(spec)
663
666
  else:
664
667
  runtime = {}
668
+
669
+ runtime = mlrun.utils.helpers.as_dict(runtime)
665
670
  if not isinstance(runtime, dict):
666
671
  print(f"Runtime parameter must be a dict, not {type(runtime)}")
667
672
  exit(1)
mlrun/alerts/alert.py CHANGED
@@ -57,6 +57,7 @@ class AlertConfig(ModelObj):
57
57
  created: Optional[str] = None,
58
58
  count: Optional[int] = None,
59
59
  updated: Optional[str] = None,
60
+ **kwargs,
60
61
  ):
61
62
  """Alert config object
62
63
 
@@ -20,10 +20,12 @@ from importlib import import_module
20
20
  from typing import Optional, Union
21
21
 
22
22
  import mlrun
23
+ import mlrun.artifacts
23
24
  from mlrun.artifacts import Artifact, ArtifactSpec
24
25
  from mlrun.model import ModelObj
25
26
 
26
27
  from ..utils import generate_artifact_uri
28
+ from .base import ArtifactStatus
27
29
 
28
30
 
29
31
  class DocumentLoaderSpec(ModelObj):
@@ -41,13 +43,13 @@ class DocumentLoaderSpec(ModelObj):
41
43
 
42
44
  """
43
45
 
44
- _dict_fields = ["loader_class_name", "src_name", "kwargs"]
46
+ _dict_fields = ["loader_class_name", "src_name", "download_object", "kwargs"]
45
47
 
46
48
  def __init__(
47
49
  self,
48
50
  loader_class_name: str = "langchain_community.document_loaders.TextLoader",
49
51
  src_name: str = "file_path",
50
- download_object: bool = False,
52
+ download_object: bool = True,
51
53
  kwargs: Optional[dict] = None,
52
54
  ):
53
55
  """
@@ -191,6 +193,14 @@ class MLRunLoader:
191
193
  self.producer = mlrun.get_or_create_project(self.producer)
192
194
 
193
195
  def lazy_load(self) -> Iterator["Document"]: # noqa: F821
196
+ collections = None
197
+ try:
198
+ artifact = self.producer.get_artifact(self.artifact_key, self.tag)
199
+ collections = (
200
+ artifact.status.collections if artifact else collections
201
+ )
202
+ except mlrun.MLRunNotFoundError:
203
+ pass
194
204
  artifact = self.producer.log_document(
195
205
  key=self.artifact_key,
196
206
  document_loader_spec=self.loader_spec,
@@ -198,6 +208,7 @@ class MLRunLoader:
198
208
  upload=self.upload,
199
209
  labels=self.labels,
200
210
  tag=self.tag,
211
+ collections=collections,
201
212
  )
202
213
  res = artifact.to_langchain_documents()
203
214
  return res
@@ -217,30 +228,8 @@ class MLRunLoader:
217
228
  @staticmethod
218
229
  def artifact_key_instance(artifact_key: str, src_path: str) -> str:
219
230
  if "%%" in artifact_key:
220
- pattern = mlrun.utils.regex.artifact_key[0]
221
- # Convert anchored pattern (^...$) to non-anchored version for finditer
222
- search_pattern = pattern.strip("^$")
223
- result = []
224
- current_pos = 0
225
-
226
- # Find all valid sequences
227
- for match in re.finditer(search_pattern, src_path):
228
- # Add hex values for characters between matches
229
- for char in src_path[current_pos : match.start()]:
230
- result.append(hex(ord(char))[2:].zfill(2))
231
-
232
- # Add the valid sequence
233
- result.append(match.group())
234
- current_pos = match.end()
235
-
236
- # Handle any remaining characters after the last match
237
- for char in src_path[current_pos:]:
238
- result.append(hex(ord(char))[2:].zfill(2))
239
-
240
- resolved_path = "".join(result)
241
-
231
+ resolved_path = DocumentArtifact.key_from_source(src_path)
242
232
  artifact_key = artifact_key.replace("%%", resolved_path)
243
-
244
233
  return artifact_key
245
234
 
246
235
 
@@ -249,29 +238,70 @@ class DocumentArtifact(Artifact):
249
238
  A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
250
239
  """
251
240
 
241
+ @staticmethod
242
+ def key_from_source(src_path: str) -> str:
243
+ """Convert a source path into a valid artifact key by replacing invalid characters with underscores.
244
+ Args:
245
+ src_path (str): The source path to be converted into a valid artifact key
246
+ Returns:
247
+ str: A modified version of the source path where all invalid characters are replaced
248
+ with underscores while preserving valid sequences in their original positions
249
+ Examples:
250
+ >>> DocumentArtifact.key_from_source("data/file-name(v1).txt")
251
+ "data_file-name_v1__txt"
252
+ """
253
+ pattern = mlrun.utils.regex.artifact_key[0]
254
+ # Convert anchored pattern (^...$) to non-anchored version for finditer
255
+ search_pattern = pattern.strip("^$")
256
+ result = []
257
+ current_pos = 0
258
+
259
+ # Find all valid sequences
260
+ for match in re.finditer(search_pattern, src_path):
261
+ # Add '_' values for characters between matches
262
+ for char in src_path[current_pos : match.start()]:
263
+ result.append("_")
264
+
265
+ # Add the valid sequence
266
+ result.append(match.group())
267
+ current_pos = match.end()
268
+
269
+ # Handle any remaining characters after the last match
270
+ for char in src_path[current_pos:]:
271
+ result.append("_")
272
+
273
+ resolved_path = "".join(result)
274
+ return resolved_path
275
+
252
276
  class DocumentArtifactSpec(ArtifactSpec):
253
277
  _dict_fields = ArtifactSpec._dict_fields + [
254
278
  "document_loader",
255
- "collections",
256
279
  "original_source",
257
280
  ]
258
- _exclude_fields_from_uid_hash = ArtifactSpec._exclude_fields_from_uid_hash + [
259
- "collections",
260
- ]
261
281
 
262
282
  def __init__(
263
283
  self,
264
284
  *args,
265
285
  document_loader: Optional[DocumentLoaderSpec] = None,
266
- collections: Optional[dict] = None,
267
286
  original_source: Optional[str] = None,
268
287
  **kwargs,
269
288
  ):
270
289
  super().__init__(*args, **kwargs)
271
290
  self.document_loader = document_loader
272
- self.collections = collections if collections is not None else {}
273
291
  self.original_source = original_source
274
292
 
293
+ class DocumentArtifactStatus(ArtifactStatus):
294
+ _dict_fields = ArtifactStatus._dict_fields + ["collections"]
295
+
296
+ def __init__(
297
+ self,
298
+ *args,
299
+ collections: Optional[dict] = None,
300
+ **kwargs,
301
+ ):
302
+ super().__init__(*args, **kwargs)
303
+ self.collections = collections if collections is not None else {}
304
+
275
305
  kind = "document"
276
306
 
277
307
  METADATA_SOURCE_KEY = "source"
@@ -286,6 +316,7 @@ class DocumentArtifact(Artifact):
286
316
  self,
287
317
  original_source: Optional[str] = None,
288
318
  document_loader_spec: Optional[DocumentLoaderSpec] = None,
319
+ collections: Optional[dict] = None,
289
320
  **kwargs,
290
321
  ):
291
322
  super().__init__(**kwargs)
@@ -295,6 +326,17 @@ class DocumentArtifact(Artifact):
295
326
  else self.spec.document_loader
296
327
  )
297
328
  self.spec.original_source = original_source or self.spec.original_source
329
+ self.status = DocumentArtifact.DocumentArtifactStatus(collections=collections)
330
+
331
+ @property
332
+ def status(self) -> DocumentArtifactStatus:
333
+ return self._status
334
+
335
+ @status.setter
336
+ def status(self, status):
337
+ self._status = self._verify_dict(
338
+ status, "status", DocumentArtifact.DocumentArtifactStatus
339
+ )
298
340
 
299
341
  @property
300
342
  def spec(self) -> DocumentArtifactSpec:
@@ -355,7 +397,7 @@ class DocumentArtifact(Artifact):
355
397
  metadata[self.METADATA_ORIGINAL_SOURCE_KEY] = self.spec.original_source
356
398
  metadata[self.METADATA_SOURCE_KEY] = self.get_source()
357
399
  metadata[self.METADATA_ARTIFACT_TAG] = self.tag or "latest"
358
- metadata[self.METADATA_ARTIFACT_KEY] = self.key
400
+ metadata[self.METADATA_ARTIFACT_KEY] = self.db_key
359
401
  metadata[self.METADATA_ARTIFACT_PROJECT] = self.metadata.project
360
402
 
361
403
  if self.get_target_path():
@@ -386,8 +428,8 @@ class DocumentArtifact(Artifact):
386
428
  Args:
387
429
  collection_id (str): The ID of the collection to add
388
430
  """
389
- if collection_id not in self.spec.collections:
390
- self.spec.collections[collection_id] = "1"
431
+ if collection_id not in self.status.collections:
432
+ self.status.collections[collection_id] = "1"
391
433
  return True
392
434
  return False
393
435
 
@@ -403,7 +445,7 @@ class DocumentArtifact(Artifact):
403
445
  Args:
404
446
  collection_id (str): The ID of the collection to remove
405
447
  """
406
- if collection_id in self.spec.collections:
407
- self.spec.collections.pop(collection_id)
448
+ if collection_id in self.status.collections:
449
+ self.status.collections.pop(collection_id)
408
450
  return True
409
451
  return False
@@ -33,6 +33,7 @@ class FeatureSetFormat(ObjectFormat, mlrun.common.types.StrEnum):
33
33
  "metadata.name",
34
34
  "metadata.project",
35
35
  "metadata.tag",
36
+ "metadata.updated",
36
37
  "metadata.uid",
37
38
  "metadata.labels",
38
39
  "spec.entities",
@@ -214,6 +214,23 @@ class RunStates:
214
214
  RunStates.skipped: mlrun_pipelines.common.models.RunStatuses.skipped,
215
215
  }[run_state]
216
216
 
217
+ @staticmethod
218
+ def pipeline_run_status_to_run_state(pipeline_run_status):
219
+ if pipeline_run_status not in mlrun_pipelines.common.models.RunStatuses.all():
220
+ raise ValueError(f"Invalid pipeline run status: {pipeline_run_status}")
221
+ return {
222
+ mlrun_pipelines.common.models.RunStatuses.succeeded: RunStates.completed,
223
+ mlrun_pipelines.common.models.RunStatuses.failed: RunStates.error,
224
+ mlrun_pipelines.common.models.RunStatuses.running: RunStates.running,
225
+ mlrun_pipelines.common.models.RunStatuses.pending: RunStates.pending,
226
+ mlrun_pipelines.common.models.RunStatuses.canceled: RunStates.aborted,
227
+ mlrun_pipelines.common.models.RunStatuses.canceling: RunStates.aborting,
228
+ mlrun_pipelines.common.models.RunStatuses.skipped: RunStates.skipped,
229
+ mlrun_pipelines.common.models.RunStatuses.runtime_state_unspecified: RunStates.unknown,
230
+ mlrun_pipelines.common.models.RunStatuses.error: RunStates.error,
231
+ mlrun_pipelines.common.models.RunStatuses.paused: RunStates.unknown,
232
+ }[pipeline_run_status]
233
+
217
234
 
218
235
  # TODO: remove this class in 1.9.0 - use only MlrunInternalLabels
219
236
  class RunLabels(enum.Enum):
@@ -160,6 +160,9 @@ class AlertConfig(pydantic.v1.BaseModel):
160
160
  count: Optional[int] = 0
161
161
  updated: datetime = None
162
162
 
163
+ class Config:
164
+ extra = pydantic.v1.Extra.allow
165
+
163
166
  def get_raw_notifications(self) -> list[notification_objects.Notification]:
164
167
  return [
165
168
  alert_notification.notification for alert_notification in self.notifications
@@ -57,7 +57,6 @@ class ClientSpec(pydantic.v1.BaseModel):
57
57
  redis_url: typing.Optional[str]
58
58
  redis_type: typing.Optional[str]
59
59
  sql_url: typing.Optional[str]
60
- model_monitoring_tsdb_connection: typing.Optional[str]
61
60
  ce: typing.Optional[dict]
62
61
  # not passing them as one object as it possible client user would like to override only one of the params
63
62
  calculate_artifact_hash: typing.Optional[str]
@@ -61,6 +61,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
61
61
  STATE = "state"
62
62
  MONITORING_MODE = "monitoring_mode"
63
63
  FIRST_REQUEST = "first_request"
64
+ SAMPLING_PERCENTAGE = "sampling_percentage"
64
65
 
65
66
  # status - operative
66
67
  LAST_REQUEST = "last_request"
@@ -137,6 +138,10 @@ class EventFieldType:
137
138
  SAMPLE_PARQUET_PATH = "sample_parquet_path"
138
139
  TIME = "time"
139
140
  TABLE_COLUMN = "table_column"
141
+ SAMPLING_PERCENTAGE = "sampling_percentage"
142
+ SAMPLING_RATE = "sampling_rate"
143
+ ESTIMATED_PREDICTION_COUNT = "estimated_prediction_count"
144
+ EFFECTIVE_SAMPLE_COUNT = "effective_sample_count"
140
145
 
141
146
 
142
147
  class FeatureSetFeatures(MonitoringStrEnum):
@@ -178,6 +183,25 @@ class WriterEventKind(MonitoringStrEnum):
178
183
  STATS = "stats"
179
184
 
180
185
 
186
+ class ControllerEvent(MonitoringStrEnum):
187
+ KIND = "kind"
188
+ ENDPOINT_ID = "endpoint_id"
189
+ ENDPOINT_NAME = "endpoint_name"
190
+ PROJECT = "project"
191
+ TIMESTAMP = "timestamp"
192
+ FIRST_REQUEST = "first_request"
193
+ FEATURE_SET_URI = "feature_set_uri"
194
+ ENDPOINT_TYPE = "endpoint_type"
195
+ ENDPOINT_POLICY = "endpoint_policy"
196
+ # Note: currently under endpoint policy we will have a dictionary including the keys: "application_names"
197
+ # and "base_period"
198
+
199
+
200
+ class ControllerEventKind(MonitoringStrEnum):
201
+ NOP_EVENT = "nop_event"
202
+ REGULAR_EVENT = "regular_event"
203
+
204
+
181
205
  class MetricData(MonitoringStrEnum):
182
206
  METRIC_NAME = "metric_name"
183
207
  METRIC_VALUE = "metric_value"
@@ -223,28 +247,26 @@ class ModelEndpointTarget(MonitoringStrEnum):
223
247
  SQL = "sql"
224
248
 
225
249
 
226
- class StreamKind(MonitoringStrEnum):
227
- V3IO_STREAM = "v3io_stream"
228
- KAFKA = "kafka"
229
-
230
-
231
250
  class TSDBTarget(MonitoringStrEnum):
232
251
  V3IO_TSDB = "v3io-tsdb"
233
252
  TDEngine = "tdengine"
234
253
 
235
254
 
255
+ class DefaultProfileName(StrEnum):
256
+ STREAM = "mm-infra-stream"
257
+ TSDB = "mm-infra-tsdb"
258
+
259
+
236
260
  class ProjectSecretKeys:
237
261
  ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
238
- STREAM_PATH = "STREAM_PATH"
239
- TSDB_CONNECTION = "TSDB_CONNECTION"
240
262
  TSDB_PROFILE_NAME = "TSDB_PROFILE_NAME"
241
263
  STREAM_PROFILE_NAME = "STREAM_PROFILE_NAME"
242
264
 
243
265
  @classmethod
244
266
  def mandatory_secrets(cls):
245
267
  return [
246
- cls.STREAM_PATH,
247
- cls.TSDB_CONNECTION,
268
+ cls.STREAM_PROFILE_NAME,
269
+ cls.TSDB_PROFILE_NAME,
248
270
  ]
249
271
 
250
272
 
@@ -306,6 +328,7 @@ class V3IOTSDBTables(MonitoringStrEnum):
306
328
  METRICS = "metrics"
307
329
  EVENTS = "events"
308
330
  ERRORS = "errors"
331
+ PREDICTIONS = "predictions"
309
332
 
310
333
 
311
334
  class TDEngineSuperTables(MonitoringStrEnum):
@@ -160,6 +160,7 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
160
160
  state: Optional[str] = "unknown" # will be updated according to the function state
161
161
  first_request: Optional[datetime] = None
162
162
  monitoring_mode: Optional[ModelMonitoringMode] = ModelMonitoringMode.disabled
163
+ sampling_percentage: Optional[float] = 100
163
164
 
164
165
  # operative
165
166
  last_request: Optional[datetime] = None
@@ -177,6 +178,7 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
177
178
  "monitoring_mode",
178
179
  "first_request",
179
180
  "last_request",
181
+ "sampling_percentage",
180
182
  ]
181
183
 
182
184
 
@@ -62,3 +62,4 @@ class EngineType(StrEnum):
62
62
  LOCAL = "local"
63
63
  REMOTE = "remote"
64
64
  KFP = "kfp"
65
+ REMOTE_KFP = "remote:kfp"
mlrun/config.py CHANGED
@@ -537,6 +537,8 @@ default_config = {
537
537
  },
538
538
  "pagination": {
539
539
  "default_page_size": 200,
540
+ "page_limit": 1000000,
541
+ "page_size_limit": 1000000,
540
542
  "pagination_cache": {
541
543
  "interval": 60,
542
544
  "ttl": 3600,
@@ -594,6 +596,22 @@ default_config = {
594
596
  "max_replicas": 1,
595
597
  },
596
598
  },
599
+ "controller_stream_args": {
600
+ "v3io": {
601
+ "shard_count": 10,
602
+ "retention_period_hours": 24,
603
+ "num_workers": 10,
604
+ "min_replicas": 1,
605
+ "max_replicas": 1,
606
+ },
607
+ "kafka": {
608
+ "partition_count": 10,
609
+ "replication_factor": 1,
610
+ "num_workers": 10,
611
+ "min_replicas": 1,
612
+ "max_replicas": 1,
613
+ },
614
+ },
597
615
  # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
598
616
  # stream, and endpoints.
599
617
  "store_prefixes": {
@@ -606,10 +624,6 @@ default_config = {
606
624
  "offline_storage_path": "model-endpoints/{kind}",
607
625
  "parquet_batching_max_events": 10_000,
608
626
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
609
- # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
610
- "tsdb_connection": "",
611
- # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
612
- "stream_connection": "",
613
627
  "tdengine": {
614
628
  "timeout": 10,
615
629
  "retries": 1,
@@ -727,6 +741,7 @@ default_config = {
727
741
  },
728
742
  "workflows": {
729
743
  "default_workflow_runner_name": "workflow-runner-{}",
744
+ "concurrent_delete_worker_count": 20,
730
745
  # Default timeout seconds for retrieving workflow id after execution
731
746
  # Remote workflow timeout is the maximum between remote and the inner engine timeout
732
747
  "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
@@ -799,7 +814,7 @@ default_config = {
799
814
  # maximum allowed value for count in criteria field inside AlertConfig
800
815
  "max_criteria_count": 100,
801
816
  # interval for periodic events generation job
802
- "events_generation_interval": "30",
817
+ "events_generation_interval": 30, # seconds
803
818
  },
804
819
  "auth_with_client_id": {
805
820
  "enabled": False,
@@ -1282,6 +1297,8 @@ class Config:
1282
1297
  function_name
1283
1298
  and function_name
1284
1299
  != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
1300
+ and function_name
1301
+ != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
1285
1302
  ):
1286
1303
  return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
1287
1304
  project=project,
@@ -1289,12 +1306,21 @@ class Config:
1289
1306
  if function_name is None
1290
1307
  else f"{kind}-{function_name.lower()}",
1291
1308
  )
1292
- elif kind == "stream":
1309
+ elif (
1310
+ kind == "stream"
1311
+ and function_name
1312
+ != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
1313
+ ):
1293
1314
  return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
1294
1315
  project=project,
1295
1316
  kind=kind,
1296
1317
  )
1297
1318
  else:
1319
+ if (
1320
+ function_name
1321
+ == mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
1322
+ ):
1323
+ kind = function_name
1298
1324
  return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1299
1325
  project=project,
1300
1326
  kind=kind,
@@ -1363,6 +1389,13 @@ class Config:
1363
1389
  >= semver.VersionInfo.parse("1.12.10")
1364
1390
  )
1365
1391
 
1392
+ @staticmethod
1393
+ def get_ordered_keys():
1394
+ # Define the keys to process first
1395
+ return [
1396
+ "MLRUN_HTTPDB__HTTP__VERIFY" # Ensure this key is processed first for proper connection setup
1397
+ ]
1398
+
1366
1399
 
1367
1400
  # Global configuration
1368
1401
  config = Config.from_dict(default_config)
@@ -17,7 +17,7 @@ import base64
17
17
  import json
18
18
  import typing
19
19
  import warnings
20
- from urllib.parse import ParseResult, urlparse, urlunparse
20
+ from urllib.parse import ParseResult, urlparse
21
21
 
22
22
  import pydantic.v1
23
23
  from mergedeep import merge
@@ -211,9 +211,10 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
211
211
  attributes["partitions"] = self.partitions
212
212
  sasl = attributes.pop("sasl", {})
213
213
  if self.sasl_user and self.sasl_pass:
214
- sasl["enabled"] = True
214
+ sasl["enable"] = True
215
215
  sasl["user"] = self.sasl_user
216
216
  sasl["password"] = self.sasl_pass
217
+ sasl["mechanism"] = "PLAIN"
217
218
  if sasl:
218
219
  attributes["sasl"] = sasl
219
220
  return attributes
@@ -312,7 +313,7 @@ class DatastoreProfileRedis(DatastoreProfile):
312
313
  query=parsed_url.query,
313
314
  fragment=parsed_url.fragment,
314
315
  )
315
- return urlunparse(new_parsed_url)
316
+ return new_parsed_url.geturl()
316
317
 
317
318
  def secrets(self) -> dict:
318
319
  res = {}
@@ -473,6 +474,59 @@ class DatastoreProfileHdfs(DatastoreProfile):
473
474
  return f"webhdfs://{self.host}:{self.http_port}{subpath}"
474
475
 
475
476
 
477
+ class TDEngineDatastoreProfile(DatastoreProfile):
478
+ """
479
+ A profile that holds the required parameters for a TDEngine database, with the websocket scheme.
480
+ https://docs.tdengine.com/developer-guide/connecting-to-tdengine/#websocket-connection
481
+ """
482
+
483
+ type: str = pydantic.v1.Field("taosws")
484
+ _private_attributes = ["password"]
485
+ user: str
486
+ # The password cannot be empty in real world scenarios. It's here just because of the profiles completion design.
487
+ password: typing.Optional[str]
488
+ host: str
489
+ port: int
490
+
491
+ def dsn(self) -> str:
492
+ """Get the Data Source Name of the configured TDEngine profile."""
493
+ return f"{self.type}://{self.user}:{self.password}@{self.host}:{self.port}"
494
+
495
+ @classmethod
496
+ def from_dsn(cls, dsn: str, profile_name: str) -> "TDEngineDatastoreProfile":
497
+ """
498
+ Construct a TDEngine profile from DSN (connection string) and a name for the profile.
499
+
500
+ :param dsn: The DSN (Data Source Name) of the TDEngine database, e.g.: ``"taosws://root:taosdata@localhost:6041"``.
501
+ :param profile_name: The new profile's name.
502
+ :return: The TDEngine profile.
503
+ """
504
+ parsed_url = urlparse(dsn)
505
+ return cls(
506
+ name=profile_name,
507
+ user=parsed_url.username,
508
+ password=parsed_url.password,
509
+ host=parsed_url.hostname,
510
+ port=parsed_url.port,
511
+ )
512
+
513
+
514
+ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
515
+ "v3io": DatastoreProfileV3io,
516
+ "s3": DatastoreProfileS3,
517
+ "redis": DatastoreProfileRedis,
518
+ "basic": DatastoreProfileBasic,
519
+ "kafka_target": DatastoreProfileKafkaTarget,
520
+ "kafka_source": DatastoreProfileKafkaSource,
521
+ "dbfs": DatastoreProfileDBFS,
522
+ "gcs": DatastoreProfileGCS,
523
+ "az": DatastoreProfileAzureBlob,
524
+ "hdfs": DatastoreProfileHdfs,
525
+ "taosws": TDEngineDatastoreProfile,
526
+ "config": ConfigProfile,
527
+ }
528
+
529
+
476
530
  class DatastoreProfile2Json(pydantic.v1.BaseModel):
477
531
  @staticmethod
478
532
  def _to_json(attributes):
@@ -523,19 +577,7 @@ class DatastoreProfile2Json(pydantic.v1.BaseModel):
523
577
 
524
578
  decoded_dict = {k: safe_literal_eval(v) for k, v in decoded_dict.items()}
525
579
  datastore_type = decoded_dict.get("type")
526
- ds_profile_factory = {
527
- "v3io": DatastoreProfileV3io,
528
- "s3": DatastoreProfileS3,
529
- "redis": DatastoreProfileRedis,
530
- "basic": DatastoreProfileBasic,
531
- "kafka_target": DatastoreProfileKafkaTarget,
532
- "kafka_source": DatastoreProfileKafkaSource,
533
- "dbfs": DatastoreProfileDBFS,
534
- "gcs": DatastoreProfileGCS,
535
- "az": DatastoreProfileAzureBlob,
536
- "hdfs": DatastoreProfileHdfs,
537
- "config": ConfigProfile,
538
- }
580
+ ds_profile_factory = _DATASTORE_TYPE_TO_PROFILE_CLASS
539
581
  if datastore_type in ds_profile_factory:
540
582
  return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
541
583
  else: