mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (160) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +23 -111
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +169 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +36 -253
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +46 -42
  10. mlrun/artifacts/model.py +9 -141
  11. mlrun/artifacts/plots.py +14 -375
  12. mlrun/common/constants.py +65 -3
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
  15. mlrun/common/formatters/base.py +113 -0
  16. mlrun/common/formatters/function.py +46 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +10 -5
  21. mlrun/common/schemas/alert.py +92 -11
  22. mlrun/common/schemas/api_gateway.py +56 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +2 -0
  25. mlrun/common/schemas/client_spec.py +1 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/model_monitoring/__init__.py +15 -3
  29. mlrun/common/schemas/model_monitoring/constants.py +58 -7
  30. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  31. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  32. mlrun/common/schemas/pipeline.py +0 -9
  33. mlrun/common/schemas/project.py +5 -11
  34. mlrun/common/types.py +1 -0
  35. mlrun/config.py +30 -9
  36. mlrun/data_types/to_pandas.py +9 -9
  37. mlrun/datastore/base.py +41 -9
  38. mlrun/datastore/datastore.py +6 -2
  39. mlrun/datastore/datastore_profile.py +56 -4
  40. mlrun/datastore/inmem.py +2 -2
  41. mlrun/datastore/redis.py +2 -2
  42. mlrun/datastore/s3.py +5 -0
  43. mlrun/datastore/sources.py +147 -7
  44. mlrun/datastore/store_resources.py +7 -7
  45. mlrun/datastore/targets.py +110 -42
  46. mlrun/datastore/utils.py +42 -0
  47. mlrun/db/base.py +54 -10
  48. mlrun/db/httpdb.py +282 -79
  49. mlrun/db/nopdb.py +52 -10
  50. mlrun/errors.py +11 -0
  51. mlrun/execution.py +26 -9
  52. mlrun/feature_store/__init__.py +0 -2
  53. mlrun/feature_store/api.py +12 -47
  54. mlrun/feature_store/feature_set.py +9 -0
  55. mlrun/feature_store/feature_vector.py +8 -0
  56. mlrun/feature_store/ingestion.py +7 -6
  57. mlrun/feature_store/retrieval/base.py +9 -4
  58. mlrun/feature_store/retrieval/conversion.py +9 -9
  59. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  60. mlrun/feature_store/retrieval/job.py +9 -3
  61. mlrun/feature_store/retrieval/local_merger.py +2 -0
  62. mlrun/feature_store/retrieval/spark_merger.py +16 -0
  63. mlrun/frameworks/__init__.py +6 -0
  64. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  65. mlrun/frameworks/parallel_coordinates.py +2 -1
  66. mlrun/frameworks/tf_keras/__init__.py +4 -1
  67. mlrun/k8s_utils.py +10 -11
  68. mlrun/launcher/base.py +4 -3
  69. mlrun/launcher/client.py +5 -3
  70. mlrun/launcher/local.py +12 -2
  71. mlrun/launcher/remote.py +9 -2
  72. mlrun/lists.py +6 -2
  73. mlrun/model.py +47 -21
  74. mlrun/model_monitoring/__init__.py +1 -1
  75. mlrun/model_monitoring/api.py +42 -18
  76. mlrun/model_monitoring/application.py +5 -305
  77. mlrun/model_monitoring/applications/__init__.py +11 -0
  78. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  79. mlrun/model_monitoring/applications/base.py +280 -0
  80. mlrun/model_monitoring/applications/context.py +214 -0
  81. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  82. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  83. mlrun/model_monitoring/applications/results.py +99 -0
  84. mlrun/model_monitoring/controller.py +3 -1
  85. mlrun/model_monitoring/db/__init__.py +2 -0
  86. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  87. mlrun/model_monitoring/db/stores/base/store.py +22 -37
  88. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  89. mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
  90. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
  91. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  92. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
  93. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
  94. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  95. mlrun/model_monitoring/db/tsdb/base.py +316 -0
  96. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  97. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  98. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  99. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  100. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +401 -0
  101. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  102. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  103. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +658 -0
  104. mlrun/model_monitoring/evidently_application.py +6 -118
  105. mlrun/model_monitoring/helpers.py +63 -1
  106. mlrun/model_monitoring/model_endpoint.py +3 -2
  107. mlrun/model_monitoring/stream_processing.py +57 -216
  108. mlrun/model_monitoring/writer.py +134 -124
  109. mlrun/package/__init__.py +13 -1
  110. mlrun/package/packagers/__init__.py +6 -1
  111. mlrun/package/utils/_formatter.py +2 -2
  112. mlrun/platforms/__init__.py +10 -9
  113. mlrun/platforms/iguazio.py +21 -202
  114. mlrun/projects/operations.py +24 -12
  115. mlrun/projects/pipelines.py +79 -102
  116. mlrun/projects/project.py +271 -103
  117. mlrun/render.py +15 -14
  118. mlrun/run.py +16 -46
  119. mlrun/runtimes/__init__.py +6 -3
  120. mlrun/runtimes/base.py +14 -7
  121. mlrun/runtimes/daskjob.py +1 -0
  122. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  123. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  124. mlrun/runtimes/funcdoc.py +0 -28
  125. mlrun/runtimes/kubejob.py +2 -1
  126. mlrun/runtimes/local.py +12 -3
  127. mlrun/runtimes/mpijob/__init__.py +0 -20
  128. mlrun/runtimes/mpijob/v1.py +1 -1
  129. mlrun/runtimes/nuclio/api_gateway.py +194 -84
  130. mlrun/runtimes/nuclio/application/application.py +170 -8
  131. mlrun/runtimes/nuclio/function.py +39 -49
  132. mlrun/runtimes/pod.py +16 -36
  133. mlrun/runtimes/remotesparkjob.py +9 -3
  134. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  135. mlrun/runtimes/utils.py +6 -45
  136. mlrun/serving/__init__.py +8 -1
  137. mlrun/serving/server.py +2 -1
  138. mlrun/serving/states.py +51 -8
  139. mlrun/serving/utils.py +19 -11
  140. mlrun/serving/v2_serving.py +5 -1
  141. mlrun/track/tracker.py +2 -1
  142. mlrun/utils/async_http.py +25 -5
  143. mlrun/utils/helpers.py +157 -83
  144. mlrun/utils/logger.py +39 -7
  145. mlrun/utils/notifications/notification/__init__.py +14 -9
  146. mlrun/utils/notifications/notification/base.py +1 -1
  147. mlrun/utils/notifications/notification/slack.py +34 -7
  148. mlrun/utils/notifications/notification/webhook.py +1 -1
  149. mlrun/utils/notifications/notification_pusher.py +147 -16
  150. mlrun/utils/regex.py +9 -0
  151. mlrun/utils/v3io_clients.py +0 -1
  152. mlrun/utils/version/version.json +2 -2
  153. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/METADATA +14 -6
  154. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/RECORD +158 -138
  155. mlrun/kfpops.py +0 -865
  156. mlrun/platforms/other.py +0 -305
  157. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/LICENSE +0 -0
  158. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/WHEEL +0 -0
  159. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/entry_points.txt +0 -0
  160. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc22.dist-info}/top_level.txt +0 -0
@@ -81,6 +81,8 @@ class EventFieldType:
81
81
  DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
82
82
  POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
83
83
  SAMPLE_PARQUET_PATH = "sample_parquet_path"
84
+ TIME = "time"
85
+ TABLE_COLUMN = "table_column"
84
86
 
85
87
 
86
88
  class FeatureSetFeatures(MonitoringStrEnum):
@@ -99,14 +101,17 @@ class FeatureSetFeatures(MonitoringStrEnum):
99
101
 
100
102
  class ApplicationEvent:
101
103
  APPLICATION_NAME = "application_name"
102
- CURRENT_STATS = "current_stats"
103
- FEATURE_STATS = "feature_stats"
104
- SAMPLE_PARQUET_PATH = "sample_parquet_path"
105
104
  START_INFER_TIME = "start_infer_time"
106
105
  END_INFER_TIME = "end_infer_time"
107
106
  LAST_REQUEST = "last_request"
108
107
  ENDPOINT_ID = "endpoint_id"
109
108
  OUTPUT_STREAM_URI = "output_stream_uri"
109
+ MLRUN_CONTEXT = "mlrun_context"
110
+
111
+ # Deprecated fields - TODO : delete in 1.9.0 (V1 app deprecation)
112
+ SAMPLE_PARQUET_PATH = "sample_parquet_path"
113
+ CURRENT_STATS = "current_stats"
114
+ FEATURE_STATS = "feature_stats"
110
115
 
111
116
 
112
117
  class WriterEvent(MonitoringStrEnum):
@@ -114,6 +119,21 @@ class WriterEvent(MonitoringStrEnum):
114
119
  ENDPOINT_ID = "endpoint_id"
115
120
  START_INFER_TIME = "start_infer_time"
116
121
  END_INFER_TIME = "end_infer_time"
122
+ EVENT_KIND = "event_kind" # metric or result
123
+ DATA = "data"
124
+
125
+
126
+ class WriterEventKind(MonitoringStrEnum):
127
+ METRIC = "metric"
128
+ RESULT = "result"
129
+
130
+
131
+ class MetricData(MonitoringStrEnum):
132
+ METRIC_NAME = "metric_name"
133
+ METRIC_VALUE = "metric_value"
134
+
135
+
136
+ class ResultData(MonitoringStrEnum):
117
137
  RESULT_NAME = "result_name"
118
138
  RESULT_VALUE = "result_value"
119
139
  RESULT_KIND = "result_kind"
@@ -138,10 +158,6 @@ class EventKeyMetrics:
138
158
  REAL_TIME = "real_time"
139
159
 
140
160
 
141
- class TimeSeriesTarget:
142
- TSDB = "tsdb"
143
-
144
-
145
161
  class ModelEndpointTarget:
146
162
  V3IO_NOSQL = "v3io-nosql"
147
163
  SQL = "sql"
@@ -153,6 +169,7 @@ class ProjectSecretKeys:
153
169
  PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
154
170
  KAFKA_BROKERS = "KAFKA_BROKERS"
155
171
  STREAM_PATH = "STREAM_PATH"
172
+ TSDB_CONNECTION = "TSDB_CONNECTION"
156
173
 
157
174
 
158
175
  class ModelMonitoringStoreKinds:
@@ -170,12 +187,15 @@ class SchedulingKeys:
170
187
  class FileTargetKind:
171
188
  ENDPOINTS = "endpoints"
172
189
  EVENTS = "events"
190
+ PREDICTIONS = "predictions"
173
191
  STREAM = "stream"
174
192
  PARQUET = "parquet"
175
193
  APPS_PARQUET = "apps_parquet"
176
194
  LOG_STREAM = "log_stream"
177
195
  APP_RESULTS = "app_results"
196
+ APP_METRICS = "app_metrics"
178
197
  MONITORING_SCHEDULES = "monitoring_schedules"
198
+ MONITORING_APPLICATION = "monitoring_application"
179
199
 
180
200
 
181
201
  class ModelMonitoringMode(str, Enum):
@@ -210,6 +230,18 @@ class MonitoringFunctionNames(MonitoringStrEnum):
210
230
  WRITER = "model-monitoring-writer"
211
231
 
212
232
 
233
+ class V3IOTSDBTables(MonitoringStrEnum):
234
+ APP_RESULTS = "app-results"
235
+ METRICS = "metrics"
236
+ EVENTS = "events"
237
+
238
+
239
+ class TDEngineSuperTables(MonitoringStrEnum):
240
+ APP_RESULTS = "app_results"
241
+ METRICS = "metrics"
242
+ PREDICTIONS = "predictions"
243
+
244
+
213
245
  @dataclass
214
246
  class FunctionURI:
215
247
  project: str
@@ -286,6 +318,7 @@ class ResultKindApp(Enum):
286
318
  concept_drift = 1
287
319
  model_performance = 2
288
320
  system_performance = 3
321
+ custom = 4
289
322
 
290
323
 
291
324
  class ResultStatusApp(IntEnum):
@@ -303,11 +336,29 @@ class ModelMonitoringAppLabel:
303
336
  KEY = "mlrun__type"
304
337
  VAL = "mlrun__model-monitoring-application"
305
338
 
339
+ def __str__(self) -> str:
340
+ return f"{self.KEY}={self.VAL}"
341
+
306
342
 
307
343
  class ControllerPolicy:
308
344
  BASE_PERIOD = "base_period"
309
345
 
310
346
 
347
+ class TSDBTarget:
348
+ V3IO_TSDB = "v3io-tsdb"
349
+ TDEngine = "tdengine"
350
+ PROMETHEUS = "prometheus"
351
+
352
+
311
353
  class HistogramDataDriftApplicationConstants:
312
354
  NAME = "histogram-data-drift"
313
355
  GENERAL_RESULT_NAME = "general_drift"
356
+
357
+
358
+ class PredictionsQueryConstants:
359
+ DEFAULT_AGGREGATION_GRANULARITY = "10m"
360
+ INVOCATIONS = "invocations"
361
+
362
+
363
+ class SpecialApps:
364
+ MLRUN_INFRA = "mlrun-infra"
@@ -11,12 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  from typing import Optional, Union
17
16
 
18
17
  from pydantic import BaseModel
19
18
 
19
+ import mlrun.common.types
20
+
21
+
22
+ class GrafanaColumnType(mlrun.common.types.StrEnum):
23
+ NUMBER = "number"
24
+ STRING = "string"
25
+
20
26
 
21
27
  class GrafanaColumn(BaseModel):
22
28
  text: str
@@ -24,13 +30,11 @@ class GrafanaColumn(BaseModel):
24
30
 
25
31
 
26
32
  class GrafanaNumberColumn(GrafanaColumn):
27
- text: str
28
- type: str = "number"
33
+ type: str = GrafanaColumnType.NUMBER
29
34
 
30
35
 
31
36
  class GrafanaStringColumn(GrafanaColumn):
32
- text: str
33
- type: str = "string"
37
+ type: str = GrafanaColumnType.STRING
34
38
 
35
39
 
36
40
  class GrafanaTable(BaseModel):
@@ -11,16 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import enum
17
16
  import json
18
- from typing import Any, Optional
17
+ import re
18
+ from datetime import datetime
19
+ from typing import Any, NamedTuple, Optional
19
20
 
20
21
  from pydantic import BaseModel, Field, validator
21
22
  from pydantic.main import Extra
22
23
 
23
24
  import mlrun.common.model_monitoring
25
+ import mlrun.common.types
24
26
 
25
27
  from ..object import ObjectKind, ObjectSpec, ObjectStatus
26
28
  from .constants import (
@@ -29,6 +31,8 @@ from .constants import (
29
31
  EventKeyMetrics,
30
32
  EventLiveStats,
31
33
  ModelMonitoringMode,
34
+ ResultKindApp,
35
+ ResultStatusApp,
32
36
  )
33
37
 
34
38
 
@@ -100,6 +104,7 @@ class ModelEndpointSpec(ObjectSpec):
100
104
  )
101
105
 
102
106
  @validator("monitor_configuration")
107
+ @classmethod
103
108
  def set_name(cls, monitor_configuration):
104
109
  return monitor_configuration or {
105
110
  EventFieldType.DRIFT_DETECTED_THRESHOLD: (
@@ -111,6 +116,7 @@ class ModelEndpointSpec(ObjectSpec):
111
116
  }
112
117
 
113
118
  @validator("model_uri")
119
+ @classmethod
114
120
  def validate_model_uri(cls, model_uri):
115
121
  """Validate that the model uri includes the required prefix"""
116
122
  prefix, uri = mlrun.datastore.parse_store_uri(model_uri)
@@ -292,6 +298,84 @@ class ModelEndpointList(BaseModel):
292
298
  endpoints: list[ModelEndpoint] = []
293
299
 
294
300
 
301
+ class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
302
+ RESULT = "result"
303
+ METRIC = "metric"
304
+
305
+
306
+ class ModelEndpointMonitoringMetric(BaseModel):
307
+ project: str
308
+ app: str
309
+ type: ModelEndpointMonitoringMetricType
310
+ name: str
311
+ full_name: str
312
+
313
+
314
+ def _compose_full_name(
315
+ *,
316
+ project: str,
317
+ app: str,
318
+ name: str,
319
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT,
320
+ ) -> str:
321
+ return ".".join([project, app, type, name])
322
+
323
+
324
+ _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
325
+ _FQN_PATTERN = (
326
+ rf"^(?P<project>{_FQN_PART_PATTERN})\."
327
+ rf"(?P<app>{_FQN_PART_PATTERN})\."
328
+ rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
329
+ rf"(?P<name>{_FQN_PART_PATTERN})$"
330
+ )
331
+ _FQN_REGEX = re.compile(_FQN_PATTERN)
332
+
333
+
334
+ def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringMetric:
335
+ match = _FQN_REGEX.fullmatch(fqn)
336
+ if match is None:
337
+ raise ValueError("The fully qualified name is not in the expected format")
338
+ return ModelEndpointMonitoringMetric.parse_obj(
339
+ match.groupdict() | {"full_name": fqn}
340
+ )
341
+
342
+
343
+ class _MetricPoint(NamedTuple):
344
+ timestamp: datetime
345
+ value: float
346
+
347
+
348
+ class _ResultPoint(NamedTuple):
349
+ timestamp: datetime
350
+ value: float
351
+ status: ResultStatusApp
352
+
353
+
354
+ class _ModelEndpointMonitoringMetricValuesBase(BaseModel):
355
+ full_name: str
356
+ type: ModelEndpointMonitoringMetricType
357
+ data: bool
358
+
359
+
360
+ class ModelEndpointMonitoringMetricValues(_ModelEndpointMonitoringMetricValuesBase):
361
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.METRIC
362
+ values: list[_MetricPoint]
363
+ data: bool = True
364
+
365
+
366
+ class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringMetricValuesBase):
367
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT
368
+ result_kind: ResultKindApp
369
+ values: list[_ResultPoint]
370
+ data: bool = True
371
+
372
+
373
+ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBase):
374
+ full_name: str
375
+ type: ModelEndpointMonitoringMetricType
376
+ data: bool = False
377
+
378
+
295
379
  def _mapping_attributes(
296
380
  base_model: BaseModel,
297
381
  flattened_dictionary: dict,
@@ -16,15 +16,6 @@ import typing
16
16
 
17
17
  import pydantic
18
18
 
19
- import mlrun.common.types
20
-
21
-
22
- class PipelinesFormat(mlrun.common.types.StrEnum):
23
- full = "full"
24
- metadata_only = "metadata_only"
25
- summary = "summary"
26
- name_only = "name_only"
27
-
28
19
 
29
20
  class PipelinesPagination(str):
30
21
  default_page_size = 20
@@ -23,16 +23,6 @@ from .common import ImageBuilder
23
23
  from .object import ObjectKind, ObjectStatus
24
24
 
25
25
 
26
- class ProjectsFormat(mlrun.common.types.StrEnum):
27
- full = "full"
28
- name_only = "name_only"
29
- # minimal format removes large fields from the response (e.g. functions, workflows, artifacts)
30
- # and is used for faster response times (in the UI)
31
- minimal = "minimal"
32
- # internal - allowed only in follower mode, only for the leader for upgrade purposes
33
- leader = "leader"
34
-
35
-
36
26
  class ProjectMetadata(pydantic.BaseModel):
37
27
  name: str
38
28
  created: typing.Optional[datetime.datetime] = None
@@ -113,7 +103,11 @@ class ProjectSummary(pydantic.BaseModel):
113
103
  runs_completed_recent_count: int
114
104
  runs_failed_recent_count: int
115
105
  runs_running_count: int
116
- schedules_count: int
106
+ distinct_schedules_count: int
107
+ distinct_scheduled_jobs_pending_count: int
108
+ distinct_scheduled_pipelines_pending_count: int
109
+ pipelines_completed_recent_count: typing.Optional[int] = None
110
+ pipelines_failed_recent_count: typing.Optional[int] = None
117
111
  pipelines_running_count: typing.Optional[int] = None
118
112
 
119
113
 
mlrun/common/types.py CHANGED
@@ -29,3 +29,4 @@ class StrEnum(str, enum.Enum):
29
29
  class HTTPMethod(StrEnum):
30
30
  GET = "GET"
31
31
  POST = "POST"
32
+ DELETE = "DELETE"
mlrun/config.py CHANGED
@@ -37,6 +37,7 @@ import dotenv
37
37
  import semver
38
38
  import yaml
39
39
 
40
+ import mlrun.common.constants
40
41
  import mlrun.common.schemas
41
42
  import mlrun.errors
42
43
 
@@ -87,7 +88,7 @@ default_config = {
87
88
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
88
89
  "ipython_widget": True,
89
90
  "log_level": "INFO",
90
- # log formatter (options: human | json)
91
+ # log formatter (options: human | human_extended | json)
91
92
  "log_formatter": "human",
92
93
  "submit_timeout": "180", # timeout when submitting a new k8s resource
93
94
  # runtimes cleanup interval in seconds
@@ -228,10 +229,17 @@ default_config = {
228
229
  "executing": "24h",
229
230
  }
230
231
  },
232
+ # When the module is reloaded, the maximum depth recursion configuration for the recursive reload
233
+ # function is used to prevent infinite loop
234
+ "reload_max_recursion_depth": 100,
231
235
  },
232
236
  "databricks": {
233
237
  "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
234
238
  },
239
+ "application": {
240
+ "default_sidecar_internal_port": 8050,
241
+ "default_authentication_mode": "accessKey",
242
+ },
235
243
  },
236
244
  # TODO: function defaults should be moved to the function spec config above
237
245
  "function_defaults": {
@@ -361,12 +369,12 @@ default_config = {
361
369
  # is set to ClusterIP
362
370
  # ---------------------------------------------------------------------
363
371
  # Note: adding a mode requires special handling on
364
- # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
372
+ # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
365
373
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
366
374
  "add_templated_ingress_host_mode": "never",
367
375
  "explicit_ack": "enabled",
368
376
  # size of serving spec to move to config maps
369
- "serving_spec_env_cutoff": 4096,
377
+ "serving_spec_env_cutoff": 0,
370
378
  },
371
379
  "logs": {
372
380
  "decode": {
@@ -503,6 +511,7 @@ default_config = {
503
511
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
504
512
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
505
513
  "stream": "",
514
+ "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
506
515
  },
507
516
  # Offline storage path can be either relative or a full path. This path is used for general offline data
508
517
  # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -516,6 +525,9 @@ default_config = {
516
525
  # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
517
526
  "store_type": "v3io-nosql",
518
527
  "endpoint_store_connection": "",
528
+ # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
529
+ "tsdb_connector_type": "v3io-tsdb",
530
+ "tsdb_connection": "",
519
531
  },
520
532
  "secret_stores": {
521
533
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -554,7 +566,7 @@ default_config = {
554
566
  "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
555
567
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
556
568
  "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
557
- "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/nosql",
569
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
558
570
  },
559
571
  "default_targets": "parquet,nosql",
560
572
  "default_job_image": "mlrun/mlrun",
@@ -692,7 +704,10 @@ default_config = {
692
704
  "grafana_url": "",
693
705
  "alerts": {
694
706
  # supported modes: "enabled", "disabled".
695
- "mode": "disabled"
707
+ "mode": "enabled",
708
+ # maximum number of alerts we allow to be configured.
709
+ # user will get an error when exceeding this
710
+ "max_allowed": 1000,
696
711
  },
697
712
  "auth_with_client_id": {
698
713
  "enabled": False,
@@ -797,6 +812,7 @@ class Config:
797
812
  ):
798
813
  """
799
814
  decodes and loads the config attribute to expected type
815
+
800
816
  :param attribute_path: the path in the default_config e.g. preemptible_nodes.node_selector
801
817
  :param expected_type: the object type valid values are : `dict`, `list` etc...
802
818
  :return: the expected type instance
@@ -959,6 +975,10 @@ class Config:
959
975
  self.httpdb.clusterization.chief.url = chief_api_url
960
976
  return self.httpdb.clusterization.chief.url
961
977
 
978
+ @staticmethod
979
+ def internal_labels():
980
+ return mlrun.common.constants.MLRunInternalLabels.all()
981
+
962
982
  @staticmethod
963
983
  def get_storage_auto_mount_params():
964
984
  auto_mount_params = {}
@@ -1088,6 +1108,7 @@ class Config:
1088
1108
  target: str = "online",
1089
1109
  artifact_path: str = None,
1090
1110
  function_name: str = None,
1111
+ **kwargs,
1091
1112
  ) -> typing.Union[str, list[str]]:
1092
1113
  """Get the full path from the configuration based on the provided project and kind.
1093
1114
 
@@ -1114,7 +1135,7 @@ class Config:
1114
1135
  )
1115
1136
  if store_prefix_dict.get(kind):
1116
1137
  # Target exist in store prefix and has a valid string value
1117
- return store_prefix_dict[kind].format(project=project)
1138
+ return store_prefix_dict[kind].format(project=project, **kwargs)
1118
1139
 
1119
1140
  if (
1120
1141
  function_name
@@ -1399,14 +1420,14 @@ def read_env(env=None, prefix=env_prefix):
1399
1420
  if log_formatter_name := config.get("log_formatter"):
1400
1421
  import mlrun.utils.logger
1401
1422
 
1402
- log_formatter = mlrun.utils.create_formatter_instance(
1423
+ log_formatter = mlrun.utils.resolve_formatter_by_kind(
1403
1424
  mlrun.utils.FormatterKinds(log_formatter_name)
1404
1425
  )
1405
1426
  current_handler = mlrun.utils.logger.get_handler("default")
1406
1427
  current_formatter_name = current_handler.formatter.__class__.__name__
1407
- desired_formatter_name = log_formatter.__class__.__name__
1428
+ desired_formatter_name = log_formatter.__name__
1408
1429
  if current_formatter_name != desired_formatter_name:
1409
- current_handler.setFormatter(log_formatter)
1430
+ current_handler.setFormatter(log_formatter())
1410
1431
 
1411
1432
  # The default function pod resource values are of type str; however, when reading from environment variable numbers,
1412
1433
  # it converts them to type int if contains only number, so we want to convert them to str.
@@ -65,10 +65,10 @@ def toPandas(spark_df):
65
65
  msg = (
66
66
  "toPandas attempted Arrow optimization because "
67
67
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
68
- "failed by the reason below:\n %s\n"
68
+ f"failed by the reason below:\n {e}\n"
69
69
  "Attempting non-optimization as "
70
70
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
71
- "true." % str(e)
71
+ "true."
72
72
  )
73
73
  warnings.warn(msg)
74
74
  use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
78
78
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
79
79
  "reached the error below and will not continue because automatic fallback "
80
80
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
81
- "false.\n %s" % str(e)
81
+ f"false.\n {e}"
82
82
  )
83
83
  warnings.warn(msg)
84
84
  raise
@@ -144,7 +144,7 @@ def toPandas(spark_df):
144
144
  "reached the error below and can not continue. Note that "
145
145
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
146
146
  "effect on failures in the middle of "
147
- "computation.\n %s" % str(e)
147
+ f"computation.\n {e}"
148
148
  )
149
149
  warnings.warn(msg)
150
150
  raise
@@ -154,10 +154,10 @@ def toPandas(spark_df):
154
154
  column_counter = Counter(spark_df.columns)
155
155
 
156
156
  dtype = [None] * len(spark_df.schema)
157
- for fieldIdx, field in enumerate(spark_df.schema):
157
+ for field_idx, field in enumerate(spark_df.schema):
158
158
  # For duplicate column name, we use `iloc` to access it.
159
159
  if column_counter[field.name] > 1:
160
- pandas_col = pdf.iloc[:, fieldIdx]
160
+ pandas_col = pdf.iloc[:, field_idx]
161
161
  else:
162
162
  pandas_col = pdf[field.name]
163
163
 
@@ -171,12 +171,12 @@ def toPandas(spark_df):
171
171
  and field.nullable
172
172
  and pandas_col.isnull().any()
173
173
  ):
174
- dtype[fieldIdx] = pandas_type
174
+ dtype[field_idx] = pandas_type
175
175
  # Ensure we fall back to nullable numpy types, even when whole column is null:
176
176
  if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
177
- dtype[fieldIdx] = np.float64
177
+ dtype[field_idx] = np.float64
178
178
  if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
179
- dtype[fieldIdx] = object
179
+ dtype[field_idx] = object
180
180
 
181
181
  df = pd.DataFrame()
182
182
  for index, t in enumerate(dtype):
mlrun/datastore/base.py CHANGED
@@ -179,11 +179,23 @@ class DataStore:
179
179
  return {}
180
180
 
181
181
  @staticmethod
182
- def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
182
+ def _parquet_reader(
183
+ df_module,
184
+ url,
185
+ file_system,
186
+ time_column,
187
+ start_time,
188
+ end_time,
189
+ additional_filters,
190
+ ):
183
191
  from storey.utils import find_filters, find_partitions
184
192
 
185
193
  def set_filters(
186
- partitions_time_attributes, start_time_inner, end_time_inner, kwargs
194
+ partitions_time_attributes,
195
+ start_time_inner,
196
+ end_time_inner,
197
+ filters_inner,
198
+ kwargs,
187
199
  ):
188
200
  filters = []
189
201
  find_filters(
@@ -193,20 +205,23 @@ class DataStore:
193
205
  filters,
194
206
  time_column,
195
207
  )
208
+ if filters and filters_inner:
209
+ filters[0] += filters_inner
210
+
196
211
  kwargs["filters"] = filters
197
212
 
198
213
  def reader(*args, **kwargs):
199
- if start_time or end_time:
200
- if time_column is None:
201
- raise mlrun.errors.MLRunInvalidArgumentError(
202
- "When providing start_time or end_time, must provide time_column"
203
- )
204
-
214
+ if time_column is None and (start_time or end_time):
215
+ raise mlrun.errors.MLRunInvalidArgumentError(
216
+ "When providing start_time or end_time, must provide time_column"
217
+ )
218
+ if start_time or end_time or additional_filters:
205
219
  partitions_time_attributes = find_partitions(url, file_system)
206
220
  set_filters(
207
221
  partitions_time_attributes,
208
222
  start_time,
209
223
  end_time,
224
+ additional_filters,
210
225
  kwargs,
211
226
  )
212
227
  try:
@@ -217,6 +232,7 @@ class DataStore:
217
232
  ):
218
233
  raise ex
219
234
 
235
+ # TODO: fix timezone issue (ML-6308)
220
236
  if start_time.tzinfo:
221
237
  start_time_inner = start_time.replace(tzinfo=None)
222
238
  end_time_inner = end_time.replace(tzinfo=None)
@@ -228,6 +244,7 @@ class DataStore:
228
244
  partitions_time_attributes,
229
245
  start_time_inner,
230
246
  end_time_inner,
247
+ additional_filters,
231
248
  kwargs,
232
249
  )
233
250
  return df_module.read_parquet(*args, **kwargs)
@@ -246,6 +263,7 @@ class DataStore:
246
263
  start_time=None,
247
264
  end_time=None,
248
265
  time_column=None,
266
+ additional_filters=None,
249
267
  **kwargs,
250
268
  ):
251
269
  df_module = df_module or pd
@@ -310,7 +328,13 @@ class DataStore:
310
328
  kwargs["columns"] = columns
311
329
 
312
330
  reader = self._parquet_reader(
313
- df_module, url, file_system, time_column, start_time, end_time
331
+ df_module,
332
+ url,
333
+ file_system,
334
+ time_column,
335
+ start_time,
336
+ end_time,
337
+ additional_filters,
314
338
  )
315
339
 
316
340
  elif file_url.endswith(".json") or format == "json":
@@ -539,6 +563,7 @@ class DataItem:
539
563
  time_column=None,
540
564
  start_time=None,
541
565
  end_time=None,
566
+ additional_filters=None,
542
567
  **kwargs,
543
568
  ):
544
569
  """return a dataframe object (generated from the dataitem).
@@ -550,6 +575,12 @@ class DataItem:
550
575
  :param end_time: filters out data after this time
551
576
  :param time_column: Store timestamp_key will be used if None.
552
577
  The results will be filtered by this column and start_time & end_time.
578
+ :param additional_filters: List of additional_filter conditions as tuples.
579
+ Each tuple should be in the format (column_name, operator, value).
580
+ Supported operators: "=", ">=", "<=", ">", "<".
581
+ Example: [("Product", "=", "Computer")]
582
+ For all supported filters, please see:
583
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
553
584
  """
554
585
  df = self._store.as_df(
555
586
  self._url,
@@ -560,6 +591,7 @@ class DataItem:
560
591
  time_column=time_column,
561
592
  start_time=start_time,
562
593
  end_time=end_time,
594
+ additional_filters=additional_filters,
563
595
  **kwargs,
564
596
  )
565
597
  return df