mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (152) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +23 -111
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +36 -253
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +46 -42
  10. mlrun/artifacts/model.py +9 -141
  11. mlrun/artifacts/plots.py +14 -375
  12. mlrun/common/constants.py +65 -3
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
  15. mlrun/common/formatters/base.py +113 -0
  16. mlrun/common/formatters/function.py +46 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +10 -5
  21. mlrun/common/schemas/alert.py +92 -11
  22. mlrun/common/schemas/api_gateway.py +56 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +2 -0
  25. mlrun/common/schemas/client_spec.py +1 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/model_monitoring/__init__.py +15 -3
  29. mlrun/common/schemas/model_monitoring/constants.py +58 -7
  30. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  31. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  32. mlrun/common/schemas/pipeline.py +0 -9
  33. mlrun/common/schemas/project.py +5 -11
  34. mlrun/common/types.py +1 -0
  35. mlrun/config.py +27 -9
  36. mlrun/data_types/to_pandas.py +9 -9
  37. mlrun/datastore/base.py +41 -9
  38. mlrun/datastore/datastore.py +6 -2
  39. mlrun/datastore/datastore_profile.py +56 -4
  40. mlrun/datastore/inmem.py +2 -2
  41. mlrun/datastore/redis.py +2 -2
  42. mlrun/datastore/s3.py +5 -0
  43. mlrun/datastore/sources.py +147 -7
  44. mlrun/datastore/store_resources.py +7 -7
  45. mlrun/datastore/targets.py +110 -42
  46. mlrun/datastore/utils.py +42 -0
  47. mlrun/db/base.py +54 -10
  48. mlrun/db/httpdb.py +282 -79
  49. mlrun/db/nopdb.py +52 -10
  50. mlrun/errors.py +11 -0
  51. mlrun/execution.py +24 -9
  52. mlrun/feature_store/__init__.py +0 -2
  53. mlrun/feature_store/api.py +12 -47
  54. mlrun/feature_store/feature_set.py +9 -0
  55. mlrun/feature_store/feature_vector.py +8 -0
  56. mlrun/feature_store/ingestion.py +7 -6
  57. mlrun/feature_store/retrieval/base.py +9 -4
  58. mlrun/feature_store/retrieval/conversion.py +9 -9
  59. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  60. mlrun/feature_store/retrieval/job.py +9 -3
  61. mlrun/feature_store/retrieval/local_merger.py +2 -0
  62. mlrun/feature_store/retrieval/spark_merger.py +16 -0
  63. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  64. mlrun/frameworks/parallel_coordinates.py +2 -1
  65. mlrun/frameworks/tf_keras/__init__.py +4 -1
  66. mlrun/k8s_utils.py +10 -11
  67. mlrun/launcher/base.py +4 -3
  68. mlrun/launcher/client.py +5 -3
  69. mlrun/launcher/local.py +8 -2
  70. mlrun/launcher/remote.py +8 -2
  71. mlrun/lists.py +6 -2
  72. mlrun/model.py +45 -21
  73. mlrun/model_monitoring/__init__.py +1 -1
  74. mlrun/model_monitoring/api.py +41 -18
  75. mlrun/model_monitoring/application.py +5 -305
  76. mlrun/model_monitoring/applications/__init__.py +11 -0
  77. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  78. mlrun/model_monitoring/applications/base.py +280 -0
  79. mlrun/model_monitoring/applications/context.py +214 -0
  80. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  81. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  82. mlrun/model_monitoring/applications/results.py +99 -0
  83. mlrun/model_monitoring/controller.py +3 -1
  84. mlrun/model_monitoring/db/__init__.py +2 -0
  85. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  86. mlrun/model_monitoring/db/stores/base/store.py +22 -37
  87. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  88. mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
  89. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
  90. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  91. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
  92. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
  93. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  94. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  95. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  96. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  97. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  98. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  99. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  100. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  101. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  102. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
  103. mlrun/model_monitoring/evidently_application.py +6 -118
  104. mlrun/model_monitoring/helpers.py +46 -1
  105. mlrun/model_monitoring/model_endpoint.py +3 -2
  106. mlrun/model_monitoring/stream_processing.py +57 -216
  107. mlrun/model_monitoring/writer.py +134 -124
  108. mlrun/package/utils/_formatter.py +2 -2
  109. mlrun/platforms/__init__.py +10 -9
  110. mlrun/platforms/iguazio.py +21 -202
  111. mlrun/projects/operations.py +19 -12
  112. mlrun/projects/pipelines.py +79 -102
  113. mlrun/projects/project.py +265 -103
  114. mlrun/render.py +15 -14
  115. mlrun/run.py +16 -46
  116. mlrun/runtimes/__init__.py +6 -3
  117. mlrun/runtimes/base.py +8 -7
  118. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  119. mlrun/runtimes/funcdoc.py +0 -28
  120. mlrun/runtimes/kubejob.py +2 -1
  121. mlrun/runtimes/local.py +5 -2
  122. mlrun/runtimes/mpijob/__init__.py +0 -20
  123. mlrun/runtimes/mpijob/v1.py +1 -1
  124. mlrun/runtimes/nuclio/api_gateway.py +194 -84
  125. mlrun/runtimes/nuclio/application/application.py +170 -8
  126. mlrun/runtimes/nuclio/function.py +39 -49
  127. mlrun/runtimes/pod.py +16 -36
  128. mlrun/runtimes/remotesparkjob.py +9 -3
  129. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  130. mlrun/runtimes/utils.py +6 -45
  131. mlrun/serving/server.py +2 -1
  132. mlrun/serving/v2_serving.py +5 -1
  133. mlrun/track/tracker.py +2 -1
  134. mlrun/utils/async_http.py +25 -5
  135. mlrun/utils/helpers.py +107 -75
  136. mlrun/utils/logger.py +39 -7
  137. mlrun/utils/notifications/notification/__init__.py +14 -9
  138. mlrun/utils/notifications/notification/base.py +1 -1
  139. mlrun/utils/notifications/notification/slack.py +34 -7
  140. mlrun/utils/notifications/notification/webhook.py +1 -1
  141. mlrun/utils/notifications/notification_pusher.py +147 -16
  142. mlrun/utils/regex.py +9 -0
  143. mlrun/utils/v3io_clients.py +0 -1
  144. mlrun/utils/version/version.json +2 -2
  145. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
  146. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +150 -130
  147. mlrun/kfpops.py +0 -865
  148. mlrun/platforms/other.py +0 -305
  149. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
  150. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
  151. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
  152. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0
@@ -81,6 +81,8 @@ class EventFieldType:
81
81
  DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
82
82
  POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
83
83
  SAMPLE_PARQUET_PATH = "sample_parquet_path"
84
+ TIME = "time"
85
+ TABLE_COLUMN = "table_column"
84
86
 
85
87
 
86
88
  class FeatureSetFeatures(MonitoringStrEnum):
@@ -99,14 +101,17 @@ class FeatureSetFeatures(MonitoringStrEnum):
99
101
 
100
102
  class ApplicationEvent:
101
103
  APPLICATION_NAME = "application_name"
102
- CURRENT_STATS = "current_stats"
103
- FEATURE_STATS = "feature_stats"
104
- SAMPLE_PARQUET_PATH = "sample_parquet_path"
105
104
  START_INFER_TIME = "start_infer_time"
106
105
  END_INFER_TIME = "end_infer_time"
107
106
  LAST_REQUEST = "last_request"
108
107
  ENDPOINT_ID = "endpoint_id"
109
108
  OUTPUT_STREAM_URI = "output_stream_uri"
109
+ MLRUN_CONTEXT = "mlrun_context"
110
+
111
+ # Deprecated fields - TODO : delete in 1.9.0 (V1 app deprecation)
112
+ SAMPLE_PARQUET_PATH = "sample_parquet_path"
113
+ CURRENT_STATS = "current_stats"
114
+ FEATURE_STATS = "feature_stats"
110
115
 
111
116
 
112
117
  class WriterEvent(MonitoringStrEnum):
@@ -114,6 +119,21 @@ class WriterEvent(MonitoringStrEnum):
114
119
  ENDPOINT_ID = "endpoint_id"
115
120
  START_INFER_TIME = "start_infer_time"
116
121
  END_INFER_TIME = "end_infer_time"
122
+ EVENT_KIND = "event_kind" # metric or result
123
+ DATA = "data"
124
+
125
+
126
+ class WriterEventKind(MonitoringStrEnum):
127
+ METRIC = "metric"
128
+ RESULT = "result"
129
+
130
+
131
+ class MetricData(MonitoringStrEnum):
132
+ METRIC_NAME = "metric_name"
133
+ METRIC_VALUE = "metric_value"
134
+
135
+
136
+ class ResultData(MonitoringStrEnum):
117
137
  RESULT_NAME = "result_name"
118
138
  RESULT_VALUE = "result_value"
119
139
  RESULT_KIND = "result_kind"
@@ -138,10 +158,6 @@ class EventKeyMetrics:
138
158
  REAL_TIME = "real_time"
139
159
 
140
160
 
141
- class TimeSeriesTarget:
142
- TSDB = "tsdb"
143
-
144
-
145
161
  class ModelEndpointTarget:
146
162
  V3IO_NOSQL = "v3io-nosql"
147
163
  SQL = "sql"
@@ -153,6 +169,7 @@ class ProjectSecretKeys:
153
169
  PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
154
170
  KAFKA_BROKERS = "KAFKA_BROKERS"
155
171
  STREAM_PATH = "STREAM_PATH"
172
+ TSDB_CONNECTION = "TSDB_CONNECTION"
156
173
 
157
174
 
158
175
  class ModelMonitoringStoreKinds:
@@ -170,12 +187,15 @@ class SchedulingKeys:
170
187
  class FileTargetKind:
171
188
  ENDPOINTS = "endpoints"
172
189
  EVENTS = "events"
190
+ PREDICTIONS = "predictions"
173
191
  STREAM = "stream"
174
192
  PARQUET = "parquet"
175
193
  APPS_PARQUET = "apps_parquet"
176
194
  LOG_STREAM = "log_stream"
177
195
  APP_RESULTS = "app_results"
196
+ APP_METRICS = "app_metrics"
178
197
  MONITORING_SCHEDULES = "monitoring_schedules"
198
+ MONITORING_APPLICATION = "monitoring_application"
179
199
 
180
200
 
181
201
  class ModelMonitoringMode(str, Enum):
@@ -210,6 +230,18 @@ class MonitoringFunctionNames(MonitoringStrEnum):
210
230
  WRITER = "model-monitoring-writer"
211
231
 
212
232
 
233
+ class V3IOTSDBTables(MonitoringStrEnum):
234
+ APP_RESULTS = "app-results"
235
+ METRICS = "metrics"
236
+ EVENTS = "events"
237
+
238
+
239
+ class TDEngineSuperTables(MonitoringStrEnum):
240
+ APP_RESULTS = "app_results"
241
+ METRICS = "metrics"
242
+ PREDICTIONS = "predictions"
243
+
244
+
213
245
  @dataclass
214
246
  class FunctionURI:
215
247
  project: str
@@ -286,6 +318,7 @@ class ResultKindApp(Enum):
286
318
  concept_drift = 1
287
319
  model_performance = 2
288
320
  system_performance = 3
321
+ custom = 4
289
322
 
290
323
 
291
324
  class ResultStatusApp(IntEnum):
@@ -303,11 +336,29 @@ class ModelMonitoringAppLabel:
303
336
  KEY = "mlrun__type"
304
337
  VAL = "mlrun__model-monitoring-application"
305
338
 
339
+ def __str__(self) -> str:
340
+ return f"{self.KEY}={self.VAL}"
341
+
306
342
 
307
343
  class ControllerPolicy:
308
344
  BASE_PERIOD = "base_period"
309
345
 
310
346
 
347
+ class TSDBTarget:
348
+ V3IO_TSDB = "v3io-tsdb"
349
+ TDEngine = "tdengine"
350
+ PROMETHEUS = "prometheus"
351
+
352
+
311
353
  class HistogramDataDriftApplicationConstants:
312
354
  NAME = "histogram-data-drift"
313
355
  GENERAL_RESULT_NAME = "general_drift"
356
+
357
+
358
+ class PredictionsQueryConstants:
359
+ DEFAULT_AGGREGATION_GRANULARITY = "10m"
360
+ INVOCATIONS = "invocations"
361
+
362
+
363
+ class SpecialApps:
364
+ MLRUN_INFRA = "mlrun-infra"
@@ -11,12 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  from typing import Optional, Union
17
16
 
18
17
  from pydantic import BaseModel
19
18
 
19
+ import mlrun.common.types
20
+
21
+
22
+ class GrafanaColumnType(mlrun.common.types.StrEnum):
23
+ NUMBER = "number"
24
+ STRING = "string"
25
+
20
26
 
21
27
  class GrafanaColumn(BaseModel):
22
28
  text: str
@@ -24,13 +30,11 @@ class GrafanaColumn(BaseModel):
24
30
 
25
31
 
26
32
  class GrafanaNumberColumn(GrafanaColumn):
27
- text: str
28
- type: str = "number"
33
+ type: str = GrafanaColumnType.NUMBER
29
34
 
30
35
 
31
36
  class GrafanaStringColumn(GrafanaColumn):
32
- text: str
33
- type: str = "string"
37
+ type: str = GrafanaColumnType.STRING
34
38
 
35
39
 
36
40
  class GrafanaTable(BaseModel):
@@ -11,16 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import enum
17
16
  import json
18
- from typing import Any, Optional
17
+ import re
18
+ from datetime import datetime
19
+ from typing import Any, NamedTuple, Optional
19
20
 
20
21
  from pydantic import BaseModel, Field, validator
21
22
  from pydantic.main import Extra
22
23
 
23
24
  import mlrun.common.model_monitoring
25
+ import mlrun.common.types
24
26
 
25
27
  from ..object import ObjectKind, ObjectSpec, ObjectStatus
26
28
  from .constants import (
@@ -29,6 +31,8 @@ from .constants import (
29
31
  EventKeyMetrics,
30
32
  EventLiveStats,
31
33
  ModelMonitoringMode,
34
+ ResultKindApp,
35
+ ResultStatusApp,
32
36
  )
33
37
 
34
38
 
@@ -100,6 +104,7 @@ class ModelEndpointSpec(ObjectSpec):
100
104
  )
101
105
 
102
106
  @validator("monitor_configuration")
107
+ @classmethod
103
108
  def set_name(cls, monitor_configuration):
104
109
  return monitor_configuration or {
105
110
  EventFieldType.DRIFT_DETECTED_THRESHOLD: (
@@ -111,6 +116,7 @@ class ModelEndpointSpec(ObjectSpec):
111
116
  }
112
117
 
113
118
  @validator("model_uri")
119
+ @classmethod
114
120
  def validate_model_uri(cls, model_uri):
115
121
  """Validate that the model uri includes the required prefix"""
116
122
  prefix, uri = mlrun.datastore.parse_store_uri(model_uri)
@@ -292,6 +298,84 @@ class ModelEndpointList(BaseModel):
292
298
  endpoints: list[ModelEndpoint] = []
293
299
 
294
300
 
301
+ class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
302
+ RESULT = "result"
303
+ METRIC = "metric"
304
+
305
+
306
+ class ModelEndpointMonitoringMetric(BaseModel):
307
+ project: str
308
+ app: str
309
+ type: ModelEndpointMonitoringMetricType
310
+ name: str
311
+ full_name: str
312
+
313
+
314
+ def _compose_full_name(
315
+ *,
316
+ project: str,
317
+ app: str,
318
+ name: str,
319
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT,
320
+ ) -> str:
321
+ return ".".join([project, app, type, name])
322
+
323
+
324
+ _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
325
+ _FQN_PATTERN = (
326
+ rf"^(?P<project>{_FQN_PART_PATTERN})\."
327
+ rf"(?P<app>{_FQN_PART_PATTERN})\."
328
+ rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
329
+ rf"(?P<name>{_FQN_PART_PATTERN})$"
330
+ )
331
+ _FQN_REGEX = re.compile(_FQN_PATTERN)
332
+
333
+
334
+ def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringMetric:
335
+ match = _FQN_REGEX.fullmatch(fqn)
336
+ if match is None:
337
+ raise ValueError("The fully qualified name is not in the expected format")
338
+ return ModelEndpointMonitoringMetric.parse_obj(
339
+ match.groupdict() | {"full_name": fqn}
340
+ )
341
+
342
+
343
+ class _MetricPoint(NamedTuple):
344
+ timestamp: datetime
345
+ value: float
346
+
347
+
348
+ class _ResultPoint(NamedTuple):
349
+ timestamp: datetime
350
+ value: float
351
+ status: ResultStatusApp
352
+
353
+
354
+ class _ModelEndpointMonitoringMetricValuesBase(BaseModel):
355
+ full_name: str
356
+ type: ModelEndpointMonitoringMetricType
357
+ data: bool
358
+
359
+
360
+ class ModelEndpointMonitoringMetricValues(_ModelEndpointMonitoringMetricValuesBase):
361
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.METRIC
362
+ values: list[_MetricPoint]
363
+ data: bool = True
364
+
365
+
366
+ class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringMetricValuesBase):
367
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT
368
+ result_kind: ResultKindApp
369
+ values: list[_ResultPoint]
370
+ data: bool = True
371
+
372
+
373
+ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBase):
374
+ full_name: str
375
+ type: ModelEndpointMonitoringMetricType
376
+ data: bool = False
377
+
378
+
295
379
  def _mapping_attributes(
296
380
  base_model: BaseModel,
297
381
  flattened_dictionary: dict,
@@ -16,15 +16,6 @@ import typing
16
16
 
17
17
  import pydantic
18
18
 
19
- import mlrun.common.types
20
-
21
-
22
- class PipelinesFormat(mlrun.common.types.StrEnum):
23
- full = "full"
24
- metadata_only = "metadata_only"
25
- summary = "summary"
26
- name_only = "name_only"
27
-
28
19
 
29
20
  class PipelinesPagination(str):
30
21
  default_page_size = 20
@@ -23,16 +23,6 @@ from .common import ImageBuilder
23
23
  from .object import ObjectKind, ObjectStatus
24
24
 
25
25
 
26
- class ProjectsFormat(mlrun.common.types.StrEnum):
27
- full = "full"
28
- name_only = "name_only"
29
- # minimal format removes large fields from the response (e.g. functions, workflows, artifacts)
30
- # and is used for faster response times (in the UI)
31
- minimal = "minimal"
32
- # internal - allowed only in follower mode, only for the leader for upgrade purposes
33
- leader = "leader"
34
-
35
-
36
26
  class ProjectMetadata(pydantic.BaseModel):
37
27
  name: str
38
28
  created: typing.Optional[datetime.datetime] = None
@@ -113,7 +103,11 @@ class ProjectSummary(pydantic.BaseModel):
113
103
  runs_completed_recent_count: int
114
104
  runs_failed_recent_count: int
115
105
  runs_running_count: int
116
- schedules_count: int
106
+ distinct_schedules_count: int
107
+ distinct_scheduled_jobs_pending_count: int
108
+ distinct_scheduled_pipelines_pending_count: int
109
+ pipelines_completed_recent_count: typing.Optional[int] = None
110
+ pipelines_failed_recent_count: typing.Optional[int] = None
117
111
  pipelines_running_count: typing.Optional[int] = None
118
112
 
119
113
 
mlrun/common/types.py CHANGED
@@ -29,3 +29,4 @@ class StrEnum(str, enum.Enum):
29
29
  class HTTPMethod(StrEnum):
30
30
  GET = "GET"
31
31
  POST = "POST"
32
+ DELETE = "DELETE"
mlrun/config.py CHANGED
@@ -37,6 +37,7 @@ import dotenv
37
37
  import semver
38
38
  import yaml
39
39
 
40
+ import mlrun.common.constants
40
41
  import mlrun.common.schemas
41
42
  import mlrun.errors
42
43
 
@@ -87,7 +88,7 @@ default_config = {
87
88
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
88
89
  "ipython_widget": True,
89
90
  "log_level": "INFO",
90
- # log formatter (options: human | json)
91
+ # log formatter (options: human | human_extended | json)
91
92
  "log_formatter": "human",
92
93
  "submit_timeout": "180", # timeout when submitting a new k8s resource
93
94
  # runtimes cleanup interval in seconds
@@ -232,6 +233,10 @@ default_config = {
232
233
  "databricks": {
233
234
  "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
234
235
  },
236
+ "application": {
237
+ "default_sidecar_internal_port": 8050,
238
+ "default_authentication_mode": "accessKey",
239
+ },
235
240
  },
236
241
  # TODO: function defaults should be moved to the function spec config above
237
242
  "function_defaults": {
@@ -361,12 +366,12 @@ default_config = {
361
366
  # is set to ClusterIP
362
367
  # ---------------------------------------------------------------------
363
368
  # Note: adding a mode requires special handling on
364
- # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
369
+ # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
365
370
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
366
371
  "add_templated_ingress_host_mode": "never",
367
372
  "explicit_ack": "enabled",
368
373
  # size of serving spec to move to config maps
369
- "serving_spec_env_cutoff": 4096,
374
+ "serving_spec_env_cutoff": 0,
370
375
  },
371
376
  "logs": {
372
377
  "decode": {
@@ -503,6 +508,7 @@ default_config = {
503
508
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
504
509
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
505
510
  "stream": "",
511
+ "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
506
512
  },
507
513
  # Offline storage path can be either relative or a full path. This path is used for general offline data
508
514
  # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -516,6 +522,9 @@ default_config = {
516
522
  # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
517
523
  "store_type": "v3io-nosql",
518
524
  "endpoint_store_connection": "",
525
+ # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
526
+ "tsdb_connector_type": "v3io-tsdb",
527
+ "tsdb_connection": "",
519
528
  },
520
529
  "secret_stores": {
521
530
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -554,7 +563,7 @@ default_config = {
554
563
  "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
555
564
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
556
565
  "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
557
- "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/nosql",
566
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
558
567
  },
559
568
  "default_targets": "parquet,nosql",
560
569
  "default_job_image": "mlrun/mlrun",
@@ -692,7 +701,10 @@ default_config = {
692
701
  "grafana_url": "",
693
702
  "alerts": {
694
703
  # supported modes: "enabled", "disabled".
695
- "mode": "disabled"
704
+ "mode": "enabled",
705
+ # maximum number of alerts we allow to be configured.
706
+ # user will get an error when exceeding this
707
+ "max_allowed": 1000,
696
708
  },
697
709
  "auth_with_client_id": {
698
710
  "enabled": False,
@@ -797,6 +809,7 @@ class Config:
797
809
  ):
798
810
  """
799
811
  decodes and loads the config attribute to expected type
812
+
800
813
  :param attribute_path: the path in the default_config e.g. preemptible_nodes.node_selector
801
814
  :param expected_type: the object type valid values are : `dict`, `list` etc...
802
815
  :return: the expected type instance
@@ -959,6 +972,10 @@ class Config:
959
972
  self.httpdb.clusterization.chief.url = chief_api_url
960
973
  return self.httpdb.clusterization.chief.url
961
974
 
975
+ @staticmethod
976
+ def internal_labels():
977
+ return mlrun.common.constants.MLRunInternalLabels.all()
978
+
962
979
  @staticmethod
963
980
  def get_storage_auto_mount_params():
964
981
  auto_mount_params = {}
@@ -1088,6 +1105,7 @@ class Config:
1088
1105
  target: str = "online",
1089
1106
  artifact_path: str = None,
1090
1107
  function_name: str = None,
1108
+ **kwargs,
1091
1109
  ) -> typing.Union[str, list[str]]:
1092
1110
  """Get the full path from the configuration based on the provided project and kind.
1093
1111
 
@@ -1114,7 +1132,7 @@ class Config:
1114
1132
  )
1115
1133
  if store_prefix_dict.get(kind):
1116
1134
  # Target exist in store prefix and has a valid string value
1117
- return store_prefix_dict[kind].format(project=project)
1135
+ return store_prefix_dict[kind].format(project=project, **kwargs)
1118
1136
 
1119
1137
  if (
1120
1138
  function_name
@@ -1399,14 +1417,14 @@ def read_env(env=None, prefix=env_prefix):
1399
1417
  if log_formatter_name := config.get("log_formatter"):
1400
1418
  import mlrun.utils.logger
1401
1419
 
1402
- log_formatter = mlrun.utils.create_formatter_instance(
1420
+ log_formatter = mlrun.utils.resolve_formatter_by_kind(
1403
1421
  mlrun.utils.FormatterKinds(log_formatter_name)
1404
1422
  )
1405
1423
  current_handler = mlrun.utils.logger.get_handler("default")
1406
1424
  current_formatter_name = current_handler.formatter.__class__.__name__
1407
- desired_formatter_name = log_formatter.__class__.__name__
1425
+ desired_formatter_name = log_formatter.__name__
1408
1426
  if current_formatter_name != desired_formatter_name:
1409
- current_handler.setFormatter(log_formatter)
1427
+ current_handler.setFormatter(log_formatter())
1410
1428
 
1411
1429
  # The default function pod resource values are of type str; however, when reading from environment variable numbers,
1412
1430
  # it converts them to type int if contains only number, so we want to convert them to str.
@@ -65,10 +65,10 @@ def toPandas(spark_df):
65
65
  msg = (
66
66
  "toPandas attempted Arrow optimization because "
67
67
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
68
- "failed by the reason below:\n %s\n"
68
+ f"failed by the reason below:\n {e}\n"
69
69
  "Attempting non-optimization as "
70
70
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
71
- "true." % str(e)
71
+ "true."
72
72
  )
73
73
  warnings.warn(msg)
74
74
  use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
78
78
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
79
79
  "reached the error below and will not continue because automatic fallback "
80
80
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
81
- "false.\n %s" % str(e)
81
+ f"false.\n {e}"
82
82
  )
83
83
  warnings.warn(msg)
84
84
  raise
@@ -144,7 +144,7 @@ def toPandas(spark_df):
144
144
  "reached the error below and can not continue. Note that "
145
145
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
146
146
  "effect on failures in the middle of "
147
- "computation.\n %s" % str(e)
147
+ f"computation.\n {e}"
148
148
  )
149
149
  warnings.warn(msg)
150
150
  raise
@@ -154,10 +154,10 @@ def toPandas(spark_df):
154
154
  column_counter = Counter(spark_df.columns)
155
155
 
156
156
  dtype = [None] * len(spark_df.schema)
157
- for fieldIdx, field in enumerate(spark_df.schema):
157
+ for field_idx, field in enumerate(spark_df.schema):
158
158
  # For duplicate column name, we use `iloc` to access it.
159
159
  if column_counter[field.name] > 1:
160
- pandas_col = pdf.iloc[:, fieldIdx]
160
+ pandas_col = pdf.iloc[:, field_idx]
161
161
  else:
162
162
  pandas_col = pdf[field.name]
163
163
 
@@ -171,12 +171,12 @@ def toPandas(spark_df):
171
171
  and field.nullable
172
172
  and pandas_col.isnull().any()
173
173
  ):
174
- dtype[fieldIdx] = pandas_type
174
+ dtype[field_idx] = pandas_type
175
175
  # Ensure we fall back to nullable numpy types, even when whole column is null:
176
176
  if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
177
- dtype[fieldIdx] = np.float64
177
+ dtype[field_idx] = np.float64
178
178
  if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
179
- dtype[fieldIdx] = object
179
+ dtype[field_idx] = object
180
180
 
181
181
  df = pd.DataFrame()
182
182
  for index, t in enumerate(dtype):
mlrun/datastore/base.py CHANGED
@@ -179,11 +179,23 @@ class DataStore:
179
179
  return {}
180
180
 
181
181
  @staticmethod
182
- def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
182
+ def _parquet_reader(
183
+ df_module,
184
+ url,
185
+ file_system,
186
+ time_column,
187
+ start_time,
188
+ end_time,
189
+ additional_filters,
190
+ ):
183
191
  from storey.utils import find_filters, find_partitions
184
192
 
185
193
  def set_filters(
186
- partitions_time_attributes, start_time_inner, end_time_inner, kwargs
194
+ partitions_time_attributes,
195
+ start_time_inner,
196
+ end_time_inner,
197
+ filters_inner,
198
+ kwargs,
187
199
  ):
188
200
  filters = []
189
201
  find_filters(
@@ -193,20 +205,23 @@ class DataStore:
193
205
  filters,
194
206
  time_column,
195
207
  )
208
+ if filters and filters_inner:
209
+ filters[0] += filters_inner
210
+
196
211
  kwargs["filters"] = filters
197
212
 
198
213
  def reader(*args, **kwargs):
199
- if start_time or end_time:
200
- if time_column is None:
201
- raise mlrun.errors.MLRunInvalidArgumentError(
202
- "When providing start_time or end_time, must provide time_column"
203
- )
204
-
214
+ if time_column is None and (start_time or end_time):
215
+ raise mlrun.errors.MLRunInvalidArgumentError(
216
+ "When providing start_time or end_time, must provide time_column"
217
+ )
218
+ if start_time or end_time or additional_filters:
205
219
  partitions_time_attributes = find_partitions(url, file_system)
206
220
  set_filters(
207
221
  partitions_time_attributes,
208
222
  start_time,
209
223
  end_time,
224
+ additional_filters,
210
225
  kwargs,
211
226
  )
212
227
  try:
@@ -217,6 +232,7 @@ class DataStore:
217
232
  ):
218
233
  raise ex
219
234
 
235
+ # TODO: fix timezone issue (ML-6308)
220
236
  if start_time.tzinfo:
221
237
  start_time_inner = start_time.replace(tzinfo=None)
222
238
  end_time_inner = end_time.replace(tzinfo=None)
@@ -228,6 +244,7 @@ class DataStore:
228
244
  partitions_time_attributes,
229
245
  start_time_inner,
230
246
  end_time_inner,
247
+ additional_filters,
231
248
  kwargs,
232
249
  )
233
250
  return df_module.read_parquet(*args, **kwargs)
@@ -246,6 +263,7 @@ class DataStore:
246
263
  start_time=None,
247
264
  end_time=None,
248
265
  time_column=None,
266
+ additional_filters=None,
249
267
  **kwargs,
250
268
  ):
251
269
  df_module = df_module or pd
@@ -310,7 +328,13 @@ class DataStore:
310
328
  kwargs["columns"] = columns
311
329
 
312
330
  reader = self._parquet_reader(
313
- df_module, url, file_system, time_column, start_time, end_time
331
+ df_module,
332
+ url,
333
+ file_system,
334
+ time_column,
335
+ start_time,
336
+ end_time,
337
+ additional_filters,
314
338
  )
315
339
 
316
340
  elif file_url.endswith(".json") or format == "json":
@@ -539,6 +563,7 @@ class DataItem:
539
563
  time_column=None,
540
564
  start_time=None,
541
565
  end_time=None,
566
+ additional_filters=None,
542
567
  **kwargs,
543
568
  ):
544
569
  """return a dataframe object (generated from the dataitem).
@@ -550,6 +575,12 @@ class DataItem:
550
575
  :param end_time: filters out data after this time
551
576
  :param time_column: Store timestamp_key will be used if None.
552
577
  The results will be filtered by this column and start_time & end_time.
578
+ :param additional_filters: List of additional_filter conditions as tuples.
579
+ Each tuple should be in the format (column_name, operator, value).
580
+ Supported operators: "=", ">=", "<=", ">", "<".
581
+ Example: [("Product", "=", "Computer")]
582
+ For all supported filters, please see:
583
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
553
584
  """
554
585
  df = self._store.as_df(
555
586
  self._url,
@@ -560,6 +591,7 @@ class DataItem:
560
591
  time_column=time_column,
561
592
  start_time=start_time,
562
593
  end_time=end_time,
594
+ additional_filters=additional_filters,
563
595
  **kwargs,
564
596
  )
565
597
  return df