mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/base.py +0 -31
  3. mlrun/artifacts/document.py +6 -1
  4. mlrun/artifacts/llm_prompt.py +123 -25
  5. mlrun/artifacts/manager.py +0 -5
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/common/constants.py +10 -1
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/common/model_monitoring/helpers.py +86 -0
  10. mlrun/common/schemas/__init__.py +3 -0
  11. mlrun/common/schemas/auth.py +2 -0
  12. mlrun/common/schemas/function.py +10 -0
  13. mlrun/common/schemas/hub.py +30 -18
  14. mlrun/common/schemas/model_monitoring/__init__.py +3 -0
  15. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  16. mlrun/common/schemas/model_monitoring/functions.py +14 -5
  17. mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
  18. mlrun/common/schemas/pipeline.py +1 -1
  19. mlrun/common/schemas/serving.py +3 -0
  20. mlrun/common/schemas/workflow.py +3 -1
  21. mlrun/common/secrets.py +22 -1
  22. mlrun/config.py +33 -11
  23. mlrun/datastore/__init__.py +11 -3
  24. mlrun/datastore/azure_blob.py +162 -47
  25. mlrun/datastore/datastore.py +9 -4
  26. mlrun/datastore/datastore_profile.py +61 -5
  27. mlrun/datastore/model_provider/huggingface_provider.py +363 -0
  28. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  29. mlrun/datastore/model_provider/model_provider.py +230 -65
  30. mlrun/datastore/model_provider/openai_provider.py +295 -42
  31. mlrun/datastore/s3.py +24 -2
  32. mlrun/datastore/storeytargets.py +2 -3
  33. mlrun/datastore/utils.py +15 -3
  34. mlrun/db/base.py +47 -19
  35. mlrun/db/httpdb.py +120 -56
  36. mlrun/db/nopdb.py +38 -10
  37. mlrun/execution.py +70 -19
  38. mlrun/hub/__init__.py +15 -0
  39. mlrun/hub/module.py +181 -0
  40. mlrun/k8s_utils.py +105 -16
  41. mlrun/launcher/base.py +13 -6
  42. mlrun/launcher/local.py +15 -0
  43. mlrun/model.py +24 -3
  44. mlrun/model_monitoring/__init__.py +1 -0
  45. mlrun/model_monitoring/api.py +66 -27
  46. mlrun/model_monitoring/applications/__init__.py +1 -1
  47. mlrun/model_monitoring/applications/base.py +509 -117
  48. mlrun/model_monitoring/applications/context.py +2 -4
  49. mlrun/model_monitoring/applications/results.py +4 -7
  50. mlrun/model_monitoring/controller.py +239 -101
  51. mlrun/model_monitoring/db/_schedules.py +116 -33
  52. mlrun/model_monitoring/db/_stats.py +4 -3
  53. mlrun/model_monitoring/db/tsdb/base.py +100 -9
  54. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
  55. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
  56. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  57. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  58. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
  59. mlrun/model_monitoring/helpers.py +54 -9
  60. mlrun/model_monitoring/stream_processing.py +45 -14
  61. mlrun/model_monitoring/writer.py +220 -1
  62. mlrun/platforms/__init__.py +3 -2
  63. mlrun/platforms/iguazio.py +7 -3
  64. mlrun/projects/operations.py +6 -1
  65. mlrun/projects/pipelines.py +46 -26
  66. mlrun/projects/project.py +166 -58
  67. mlrun/run.py +94 -17
  68. mlrun/runtimes/__init__.py +18 -0
  69. mlrun/runtimes/base.py +14 -6
  70. mlrun/runtimes/daskjob.py +7 -0
  71. mlrun/runtimes/local.py +5 -2
  72. mlrun/runtimes/mounts.py +20 -2
  73. mlrun/runtimes/mpijob/abstract.py +6 -0
  74. mlrun/runtimes/mpijob/v1.py +6 -0
  75. mlrun/runtimes/nuclio/__init__.py +1 -0
  76. mlrun/runtimes/nuclio/application/application.py +149 -17
  77. mlrun/runtimes/nuclio/function.py +76 -27
  78. mlrun/runtimes/nuclio/serving.py +97 -15
  79. mlrun/runtimes/pod.py +234 -21
  80. mlrun/runtimes/remotesparkjob.py +6 -0
  81. mlrun/runtimes/sparkjob/spark3job.py +6 -0
  82. mlrun/runtimes/utils.py +49 -11
  83. mlrun/secrets.py +54 -13
  84. mlrun/serving/__init__.py +2 -0
  85. mlrun/serving/remote.py +79 -6
  86. mlrun/serving/routers.py +23 -41
  87. mlrun/serving/server.py +320 -80
  88. mlrun/serving/states.py +725 -157
  89. mlrun/serving/steps.py +62 -0
  90. mlrun/serving/system_steps.py +200 -119
  91. mlrun/serving/v2_serving.py +9 -10
  92. mlrun/utils/helpers.py +288 -88
  93. mlrun/utils/logger.py +3 -1
  94. mlrun/utils/notifications/notification/base.py +18 -0
  95. mlrun/utils/notifications/notification/git.py +2 -4
  96. mlrun/utils/notifications/notification/slack.py +2 -4
  97. mlrun/utils/notifications/notification/webhook.py +2 -5
  98. mlrun/utils/notifications/notification_pusher.py +1 -1
  99. mlrun/utils/retryer.py +15 -2
  100. mlrun/utils/version/version.json +2 -2
  101. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
  102. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
  103. mlrun/api/schemas/__init__.py +0 -259
  104. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
  105. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
  106. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
  107. {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@
15
15
  from datetime import datetime, timezone
16
16
  from typing import Optional
17
17
 
18
+ import deepdiff
18
19
  from pydantic.v1 import BaseModel, Extra, Field
19
20
 
20
21
  import mlrun.common.types
@@ -36,9 +37,9 @@ class HubObjectMetadata(BaseModel):
36
37
  extra = Extra.allow
37
38
 
38
39
 
39
- # Currently only functions are supported. Will add more in the future.
40
40
  class HubSourceType(mlrun.common.types.StrEnum):
41
41
  functions = "functions"
42
+ modules = "modules"
42
43
 
43
44
 
44
45
  # Sources-related objects
@@ -46,7 +47,6 @@ class HubSourceSpec(ObjectSpec):
46
47
  path: str # URL to base directory, should include schema (s3://, etc...)
47
48
  channel: str
48
49
  credentials: Optional[dict] = {}
49
- object_type: HubSourceType = Field(HubSourceType.functions, const=True)
50
50
 
51
51
 
52
52
  class HubSource(BaseModel):
@@ -55,11 +55,11 @@ class HubSource(BaseModel):
55
55
  spec: HubSourceSpec
56
56
  status: Optional[ObjectStatus] = ObjectStatus(state="created")
57
57
 
58
- def get_full_uri(self, relative_path):
59
- return f"{self.spec.path}/{self.spec.object_type}/{self.spec.channel}/{relative_path}"
58
+ def get_full_uri(self, relative_path, object_type):
59
+ return f"{self.spec.path}/{object_type}/{self.spec.channel}/{relative_path}"
60
60
 
61
- def get_catalog_uri(self):
62
- return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
61
+ def get_catalog_uri(self, object_type):
62
+ return self.get_full_uri(mlrun.mlconf.hub.catalog_filename, object_type)
63
63
 
64
64
  @classmethod
65
65
  def generate_default_source(cls):
@@ -78,11 +78,23 @@ class HubSource(BaseModel):
78
78
  spec=HubSourceSpec(
79
79
  path=mlrun.mlconf.hub.default_source.url,
80
80
  channel=mlrun.mlconf.hub.default_source.channel,
81
- object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
82
81
  ),
83
82
  status=ObjectStatus(state="created"),
84
83
  )
85
84
 
85
+ def diff(self, another_source: "HubSource") -> dict:
86
+ """
87
+ Compare this HubSource with another one.
88
+ Returns a dict of differences (metadata, spec, status).
89
+ """
90
+ exclude_paths = [
91
+ "root['metadata']['updated']",
92
+ "root['metadata']['created']",
93
+ ]
94
+ return deepdiff.DeepDiff(
95
+ self.dict(), another_source.dict(), exclude_paths=exclude_paths
96
+ )
97
+
86
98
 
87
99
  last_source_index = -1
88
100
 
@@ -94,21 +106,16 @@ class IndexedHubSource(BaseModel):
94
106
 
95
107
  # Item-related objects
96
108
  class HubItemMetadata(HubObjectMetadata):
97
- source: HubSourceType = Field(HubSourceType.functions, const=True)
109
+ source: HubSourceType = HubSourceType.functions
98
110
  version: str
99
111
  tag: Optional[str]
100
112
 
101
113
  def get_relative_path(self) -> str:
102
- if self.source == HubSourceType.functions:
103
- # This is needed since the hub deployment script modifies the paths to use _ instead of -.
104
- modified_name = self.name.replace("-", "_")
105
- # Prefer using the tag if exists. Otherwise, use version.
106
- version = self.tag or self.version
107
- return f"{modified_name}/{version}/"
108
- else:
109
- raise mlrun.errors.MLRunInvalidArgumentError(
110
- f"Bad source for hub item - {self.source}"
111
- )
114
+ # This is needed since the hub deployment script modifies the paths to use _ instead of -.
115
+ modified_name = self.name.replace("-", "_")
116
+ # Prefer using the tag if exists. Otherwise, use version.
117
+ version = self.tag or self.version
118
+ return f"{modified_name}/{version}/"
112
119
 
113
120
 
114
121
  class HubItemSpec(ObjectSpec):
@@ -127,3 +134,8 @@ class HubCatalog(BaseModel):
127
134
  kind: ObjectKind = Field(ObjectKind.hub_catalog, const=True)
128
135
  channel: str
129
136
  catalog: list[HubItem]
137
+
138
+
139
+ class HubModuleType(mlrun.common.types.StrEnum):
140
+ generic = "generic"
141
+ monitoring_app = "monitoring_application"
@@ -16,6 +16,7 @@ from .constants import (
16
16
  INTERSECT_DICT_KEYS,
17
17
  ApplicationEvent,
18
18
  DriftStatus,
19
+ EndpointMode,
19
20
  EndpointType,
20
21
  EndpointUID,
21
22
  EventFieldType,
@@ -29,6 +30,7 @@ from .constants import (
29
30
  ModelEndpointMonitoringMetricType,
30
31
  ModelEndpointSchema,
31
32
  ModelMonitoringAppLabel,
33
+ ModelMonitoringInfraLabel,
32
34
  ModelMonitoringMode,
33
35
  MonitoringFunctionNames,
34
36
  PredictionsQueryConstants,
@@ -59,6 +61,7 @@ from .model_endpoints import (
59
61
  Features,
60
62
  FeatureValues,
61
63
  ModelEndpoint,
64
+ ModelEndpointDriftValues,
62
65
  ModelEndpointList,
63
66
  ModelEndpointMetadata,
64
67
  ModelEndpointMonitoringMetric,
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
34
34
  UID = "uid"
35
35
  PROJECT = "project"
36
36
  ENDPOINT_TYPE = "endpoint_type"
37
+ MODE = "mode"
37
38
  NAME = "name"
38
39
  CREATED = "created"
39
40
  UPDATED = "updated"
@@ -195,6 +196,10 @@ class WriterEventKind(MonitoringStrEnum):
195
196
  RESULT = "result"
196
197
  STATS = "stats"
197
198
 
199
+ @classmethod
200
+ def user_app_outputs(cls):
201
+ return [cls.METRIC, cls.RESULT]
202
+
198
203
 
199
204
  class ControllerEvent(MonitoringStrEnum):
200
205
  KIND = "kind"
@@ -205,6 +210,11 @@ class ControllerEvent(MonitoringStrEnum):
205
210
  FIRST_REQUEST = "first_request"
206
211
  FEATURE_SET_URI = "feature_set_uri"
207
212
  ENDPOINT_TYPE = "endpoint_type"
213
+
214
+ # first_timestamp and last_timestamp are used to batch completed events
215
+ FIRST_TIMESTAMP = "first_timestamp"
216
+ LAST_TIMESTAMP = "last_timestamp"
217
+
208
218
  ENDPOINT_POLICY = "endpoint_policy"
209
219
  # Note: currently under endpoint policy we will have a dictionary including the keys: "application_names"
210
220
  # "base_period", and "updated_endpoint" stand for when the MEP was updated
@@ -219,6 +229,7 @@ class ControllerEventEndpointPolicy(MonitoringStrEnum):
219
229
  class ControllerEventKind(MonitoringStrEnum):
220
230
  NOP_EVENT = "nop_event"
221
231
  REGULAR_EVENT = "regular_event"
232
+ BATCH_COMPLETE = "batch_complete"
222
233
 
223
234
 
224
235
  class MetricData(MonitoringStrEnum):
@@ -297,6 +308,7 @@ class FileTargetKind:
297
308
  MONITORING_APPLICATION = "monitoring_application"
298
309
  ERRORS = "errors"
299
310
  STATS = "stats"
311
+ PARQUET_STATS = "parquet_stats"
300
312
  LAST_REQUEST = "last_request"
301
313
 
302
314
 
@@ -321,6 +333,12 @@ class EndpointType(IntEnum):
321
333
  return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
322
334
 
323
335
 
336
+ class EndpointMode(IntEnum):
337
+ REAL_TIME = 0
338
+ BATCH = 1
339
+ BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
340
+
341
+
324
342
  class MonitoringFunctionNames(MonitoringStrEnum):
325
343
  STREAM = "model-monitoring-stream"
326
344
  APPLICATION_CONTROLLER = "model-monitoring-controller"
@@ -474,19 +492,25 @@ class ModelEndpointMonitoringMetricType(StrEnum):
474
492
  METRIC = "metric"
475
493
 
476
494
 
495
+ # refer to `mlrun.utils.regex.project_name`
496
+ _INNER_PROJECT_PATTERN = r"[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?"
497
+ PROJECT_PATTERN = rf"^{_INNER_PROJECT_PATTERN}$"
498
+
499
+ MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
500
+
477
501
  _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
502
+ _RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
503
+
478
504
  FQN_PATTERN = (
479
- rf"^(?P<project>{_FQN_PART_PATTERN})\."
505
+ rf"^(?P<project>{_INNER_PROJECT_PATTERN})\."
480
506
  rf"(?P<app>{_FQN_PART_PATTERN})\."
481
507
  rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
482
- rf"(?P<name>{_FQN_PART_PATTERN})$"
508
+ rf"(?P<name>{_RESULT_NAME_PATTERN})$"
483
509
  )
484
510
  FQN_REGEX = re.compile(FQN_PATTERN)
511
+ APP_NAME_REGEX = re.compile(_FQN_PART_PATTERN)
512
+ RESULT_NAME_REGEX = re.compile(_RESULT_NAME_PATTERN)
485
513
 
486
- # refer to `mlrun.utils.regex.project_name`
487
- PROJECT_PATTERN = r"^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$"
488
- MODEL_ENDPOINT_ID_PATTERN = r"^[a-zA-Z0-9_-]+$"
489
- RESULT_NAME_PATTERN = r"[a-zA-Z_][a-zA-Z0-9_]*"
490
514
 
491
515
  INTERSECT_DICT_KEYS = {
492
516
  ModelEndpointMonitoringMetricType.METRIC: "intersect_metrics",
@@ -54,15 +54,24 @@ class FunctionSummary(BaseModel):
54
54
 
55
55
  return cls(
56
56
  type=func_type,
57
- name=func_dict["metadata"]["name"],
57
+ name=func_dict["metadata"]["name"]
58
+ if func_type != FunctionsType.APPLICATION
59
+ else func_dict["spec"]
60
+ .get("graph", {})
61
+ .get("steps", {})
62
+ .get("PrepareMonitoringEvent", {})
63
+ .get("class_args", {})
64
+ .get("application_name"),
58
65
  application_class=""
59
66
  if func_type != FunctionsType.APPLICATION
60
- else func_dict["spec"]["graph"]["steps"]["PushToMonitoringWriter"]["after"][
61
- 0
62
- ],
67
+ else func_dict["spec"]
68
+ .get("graph", {})
69
+ .get("steps", {})
70
+ .get("PushToMonitoringWriter", {})
71
+ .get("after", [None])[0],
63
72
  project_name=func_dict["metadata"]["project"],
64
73
  updated_time=func_dict["metadata"].get("updated"),
65
74
  status=func_dict["status"].get("state"),
66
75
  base_period=base_period,
67
- stats=stats,
76
+ stats=stats or {},
68
77
  )
@@ -28,6 +28,7 @@ from .constants import (
28
28
  FQN_REGEX,
29
29
  MODEL_ENDPOINT_ID_PATTERN,
30
30
  PROJECT_PATTERN,
31
+ EndpointMode,
31
32
  EndpointType,
32
33
  ModelEndpointMonitoringMetricType,
33
34
  ModelMonitoringMode,
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
118
119
  project: constr(regex=PROJECT_PATTERN)
119
120
  endpoint_type: EndpointType = EndpointType.NODE_EP
120
121
  uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
122
+ mode: Optional[EndpointMode] = None
121
123
 
122
124
  @classmethod
123
125
  def mutable_fields(cls):
@@ -129,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
129
131
  return str(v)
130
132
  return v
131
133
 
134
+ @validator("mode", pre=True, always=True)
135
+ def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
136
+ if v is None:
137
+ if values.get("endpoint_type") == EndpointType.BATCH_EP:
138
+ return EndpointMode.BATCH_LEGACY
139
+ else:
140
+ return EndpointMode.REAL_TIME
141
+ return v
142
+
132
143
 
133
144
  class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
134
145
  model_class: Optional[str] = ""
@@ -352,6 +363,16 @@ class ApplicationMetricRecord(ApplicationBaseRecord):
352
363
  type: Literal["metric"] = "metric"
353
364
 
354
365
 
366
+ class _DriftBin(NamedTuple):
367
+ timestamp: datetime
368
+ count_suspected: int
369
+ count_detected: int
370
+
371
+
372
+ class ModelEndpointDriftValues(BaseModel):
373
+ values: list[_DriftBin]
374
+
375
+
355
376
  def _mapping_attributes(
356
377
  model_class: type[Model],
357
378
  flattened_dictionary: dict,
@@ -18,7 +18,7 @@ import pydantic.v1
18
18
 
19
19
 
20
20
  class PipelinesPagination(str):
21
- default_page_size = 20
21
+ default_page_size = 200
22
22
  # https://github.com/kubeflow/pipelines/blob/master/backend/src/apiserver/list/list.go#L363
23
23
  max_page_size = 200
24
24
 
@@ -47,3 +47,6 @@ class MonitoringData(StrEnum):
47
47
  class ModelsData(enum.Enum):
48
48
  MODEL_CLASS = 0
49
49
  MODEL_PARAMETERS = 1
50
+
51
+
52
+ MAX_BATCH_JOB_DURATION = "1w"
@@ -49,9 +49,11 @@ class WorkflowRequest(pydantic.v1.BaseModel):
49
49
  class RerunWorkflowRequest(pydantic.v1.BaseModel):
50
50
  run_name: typing.Optional[str] = None
51
51
  run_id: typing.Optional[str] = None
52
- original_workflow_id: typing.Optional[str] = None
53
52
  notifications: typing.Optional[list[Notification]] = None
54
53
  workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
54
+ original_workflow_runner_uid: typing.Optional[str] = None
55
+ original_workflow_name: typing.Optional[str] = None
56
+ rerun_index: typing.Optional[int] = None
55
57
 
56
58
 
57
59
  class WorkflowResponse(pydantic.v1.BaseModel):
mlrun/common/secrets.py CHANGED
@@ -11,10 +11,31 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import re
15
15
  from abc import ABC, abstractmethod
16
16
 
17
17
  import mlrun.common.schemas
18
+ from mlrun.config import config as mlconf
19
+
20
+ _AUTH_SECRET_NAME_TEMPLATE = re.escape(
21
+ mlconf.secret_stores.kubernetes.auth_secret_name.format(
22
+ hashed_access_key="",
23
+ )
24
+ )
25
+ AUTH_SECRET_PATTERN = re.compile(f"^{_AUTH_SECRET_NAME_TEMPLATE}.*")
26
+
27
+
28
+ def validate_not_forbidden_secret(secret_name: str) -> None:
29
+ """
30
+ Forbid client-supplied references to internal MLRun auth/project secrets.
31
+ No-op when running inside the API server (API enrichments are allowed).
32
+ """
33
+ if not secret_name or mlrun.config.is_running_as_api():
34
+ return
35
+ if AUTH_SECRET_PATTERN.match(secret_name):
36
+ raise mlrun.errors.MLRunInvalidArgumentError(
37
+ f"Forbidden secret '{secret_name}' matches MLRun auth-secret pattern."
38
+ )
18
39
 
19
40
 
20
41
  class SecretProviderInterface(ABC):
mlrun/config.py CHANGED
@@ -107,7 +107,11 @@ default_config = {
107
107
  "submit_timeout": "280", # timeout when submitting a new k8s resource
108
108
  # runtimes cleanup interval in seconds
109
109
  "runtimes_cleanup_interval": "300",
110
- "background_task_cleanup_interval": "86400", # 24 hours in seconds
110
+ # disabled by default due to an internal bug in serving functions
111
+ # relying on a background task to hold the status for its model endpoints
112
+ # TODO: need to refine what/when we can delete the background tasks
113
+ # e.g: use labels or naming convention.
114
+ "background_task_cleanup_interval": "0",
111
115
  "background_task_max_age": "21600", # 6 hours in seconds
112
116
  "monitoring": {
113
117
  "runs": {
@@ -193,7 +197,8 @@ default_config = {
193
197
  },
194
198
  "v3io_framesd": "http://framesd:8080",
195
199
  "model_providers": {
196
- "openai_default_model": "gpt-4",
200
+ "openai_default_model": "gpt-4o",
201
+ "huggingface_default_model": "microsoft/Phi-3-mini-4k-instruct",
197
202
  },
198
203
  # default node selector to be applied to all functions - json string base64 encoded format
199
204
  "default_function_node_selector": "e30=",
@@ -250,7 +255,8 @@ default_config = {
250
255
  },
251
256
  "runtimes": {
252
257
  "dask": "600",
253
- "dask_cluster_start": "300",
258
+ # cluster start might take some time in case k8s needs to spin up new nodes
259
+ "dask_cluster_start": "600",
254
260
  },
255
261
  "push_notifications": "60",
256
262
  },
@@ -298,6 +304,7 @@ default_config = {
298
304
  "application": {
299
305
  "default_sidecar_internal_port": 8050,
300
306
  "default_authentication_mode": mlrun.common.schemas.APIGatewayAuthenticationMode.none,
307
+ "default_worker_number": 10000,
301
308
  },
302
309
  },
303
310
  # TODO: function defaults should be moved to the function spec config above
@@ -406,11 +413,7 @@ default_config = {
406
413
  #
407
414
  # if set to "nil" or "none", nothing would be set
408
415
  "modes": (
409
- "STRICT_TRANS_TABLES"
410
- ",NO_ZERO_IN_DATE"
411
- ",NO_ZERO_DATE"
412
- ",ERROR_FOR_DIVISION_BY_ZERO"
413
- ",NO_ENGINE_SUBSTITUTION",
416
+ "STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION"
414
417
  )
415
418
  },
416
419
  },
@@ -647,6 +650,13 @@ default_config = {
647
650
  "max_replicas": 1,
648
651
  },
649
652
  },
653
+ "writer_graph": {
654
+ "max_events": 1000,
655
+ "flush_after_seconds": 30,
656
+ "writer_version": "v1", # v1 is the sync version while v2 is async
657
+ "parquet_batching_max_events": 10,
658
+ "parquet_batching_timeout_secs": 30,
659
+ },
650
660
  # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
651
661
  # stream, and endpoints.
652
662
  "store_prefixes": {
@@ -717,7 +727,6 @@ default_config = {
717
727
  "name": "default",
718
728
  "description": "MLRun global function hub",
719
729
  "url": "https://mlrun.github.io/marketplace",
720
- "object_type": "functions",
721
730
  "channel": "master",
722
731
  },
723
732
  },
@@ -999,9 +1008,9 @@ class Config:
999
1008
  )
1000
1009
 
1001
1010
  @staticmethod
1002
- def get_default_hub_source() -> str:
1011
+ def get_default_hub_source_url_prefix(object_type) -> str:
1003
1012
  default_source = config.hub.default_source
1004
- return f"{default_source.url}/{default_source.object_type}/{default_source.channel}/"
1013
+ return f"{default_source.url}/{object_type}/{default_source.channel}/"
1005
1014
 
1006
1015
  @staticmethod
1007
1016
  def decode_base64_config_and_load_to_object(
@@ -1242,6 +1251,19 @@ class Config:
1242
1251
  """
1243
1252
  return self.is_running_on_iguazio()
1244
1253
 
1254
+ @staticmethod
1255
+ def get_run_retry_staleness_threshold_timedelta() -> timedelta:
1256
+ """
1257
+ Get the staleness threshold in timedelta for run retries.
1258
+ This is used to determine if a run is stale and should be retried.
1259
+
1260
+ :return: The staleness threshold in timedelta.
1261
+ """
1262
+ staleness_threshold = int(
1263
+ mlrun.mlconf.monitoring.runs.retry.staleness_threshold
1264
+ )
1265
+ return timedelta(minutes=staleness_threshold)
1266
+
1245
1267
  def to_dict(self):
1246
1268
  return copy.deepcopy(self._cfg)
1247
1269
 
@@ -39,10 +39,11 @@ __all__ = [
39
39
  from urllib.parse import urlparse
40
40
 
41
41
  import fsspec
42
+ import storey
42
43
 
43
44
  import mlrun.datastore.wasbfs
44
45
  from mlrun.datastore.datastore_profile import (
45
- DatastoreProfileKafkaSource,
46
+ DatastoreProfileKafkaStream,
46
47
  DatastoreProfileKafkaTarget,
47
48
  DatastoreProfileV3io,
48
49
  )
@@ -122,7 +123,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
122
123
  )
123
124
  if isinstance(
124
125
  datastore_profile,
125
- (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
126
+ (DatastoreProfileKafkaStream, DatastoreProfileKafkaTarget),
126
127
  ):
127
128
  attributes = datastore_profile.attributes()
128
129
  brokers = attributes.pop("brokers", None)
@@ -168,11 +169,12 @@ def get_stream_pusher(stream_path: str, **kwargs):
168
169
  raise ValueError(f"unsupported stream path {stream_path}")
169
170
 
170
171
 
171
- class _DummyStream:
172
+ class _DummyStream(storey.MapClass):
172
173
  """stream emulator for tests and debug"""
173
174
 
174
175
  def __init__(self, event_list=None, **kwargs):
175
176
  self.event_list = event_list or []
177
+ super().__init__(**kwargs)
176
178
 
177
179
  def push(self, data, **kwargs):
178
180
  if not isinstance(data, list):
@@ -180,3 +182,9 @@ class _DummyStream:
180
182
  for item in data:
181
183
  logger.info(f"dummy stream got event: {item}, kwargs={kwargs}")
182
184
  self.event_list.append(item)
185
+
186
+ def do(self, event):
187
+ if not isinstance(event, list):
188
+ event = [event]
189
+ for item in event:
190
+ self.event_list.append(item)