mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +18 -109
  3. mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
  4. mlrun/alerts/alert.py +141 -0
  5. mlrun/artifacts/__init__.py +8 -3
  6. mlrun/artifacts/base.py +36 -253
  7. mlrun/artifacts/dataset.py +9 -190
  8. mlrun/artifacts/manager.py +20 -41
  9. mlrun/artifacts/model.py +8 -140
  10. mlrun/artifacts/plots.py +14 -375
  11. mlrun/common/schemas/__init__.py +4 -2
  12. mlrun/common/schemas/alert.py +46 -4
  13. mlrun/common/schemas/api_gateway.py +4 -0
  14. mlrun/common/schemas/artifact.py +15 -0
  15. mlrun/common/schemas/auth.py +2 -0
  16. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  17. mlrun/common/schemas/model_monitoring/constants.py +40 -4
  18. mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
  19. mlrun/common/schemas/project.py +2 -0
  20. mlrun/config.py +7 -4
  21. mlrun/data_types/to_pandas.py +4 -4
  22. mlrun/datastore/base.py +41 -9
  23. mlrun/datastore/datastore_profile.py +54 -4
  24. mlrun/datastore/inmem.py +2 -2
  25. mlrun/datastore/sources.py +43 -2
  26. mlrun/datastore/store_resources.py +2 -6
  27. mlrun/datastore/targets.py +106 -39
  28. mlrun/db/base.py +23 -3
  29. mlrun/db/httpdb.py +101 -47
  30. mlrun/db/nopdb.py +20 -2
  31. mlrun/errors.py +5 -0
  32. mlrun/feature_store/__init__.py +0 -2
  33. mlrun/feature_store/api.py +12 -47
  34. mlrun/feature_store/feature_set.py +9 -0
  35. mlrun/feature_store/retrieval/base.py +9 -4
  36. mlrun/feature_store/retrieval/conversion.py +4 -4
  37. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  38. mlrun/feature_store/retrieval/job.py +2 -0
  39. mlrun/feature_store/retrieval/local_merger.py +2 -0
  40. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  41. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  42. mlrun/launcher/base.py +4 -3
  43. mlrun/launcher/client.py +1 -1
  44. mlrun/lists.py +4 -2
  45. mlrun/model.py +25 -11
  46. mlrun/model_monitoring/__init__.py +1 -1
  47. mlrun/model_monitoring/api.py +41 -18
  48. mlrun/model_monitoring/application.py +5 -305
  49. mlrun/model_monitoring/applications/__init__.py +11 -0
  50. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  51. mlrun/model_monitoring/applications/base.py +282 -0
  52. mlrun/model_monitoring/applications/context.py +214 -0
  53. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  54. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  55. mlrun/model_monitoring/applications/results.py +99 -0
  56. mlrun/model_monitoring/controller.py +3 -1
  57. mlrun/model_monitoring/db/__init__.py +2 -0
  58. mlrun/model_monitoring/db/stores/base/store.py +9 -36
  59. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  60. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
  61. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
  62. mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
  63. mlrun/model_monitoring/db/tsdb/base.py +135 -0
  64. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  65. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  66. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
  67. mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
  68. mlrun/model_monitoring/evidently_application.py +6 -118
  69. mlrun/model_monitoring/helpers.py +1 -1
  70. mlrun/model_monitoring/model_endpoint.py +3 -2
  71. mlrun/model_monitoring/stream_processing.py +48 -213
  72. mlrun/model_monitoring/writer.py +101 -121
  73. mlrun/platforms/__init__.py +10 -9
  74. mlrun/platforms/iguazio.py +21 -202
  75. mlrun/projects/operations.py +11 -7
  76. mlrun/projects/pipelines.py +13 -76
  77. mlrun/projects/project.py +73 -45
  78. mlrun/render.py +11 -13
  79. mlrun/run.py +6 -41
  80. mlrun/runtimes/__init__.py +3 -3
  81. mlrun/runtimes/base.py +6 -6
  82. mlrun/runtimes/funcdoc.py +0 -28
  83. mlrun/runtimes/kubejob.py +2 -1
  84. mlrun/runtimes/local.py +1 -1
  85. mlrun/runtimes/mpijob/__init__.py +0 -20
  86. mlrun/runtimes/mpijob/v1.py +1 -1
  87. mlrun/runtimes/nuclio/api_gateway.py +75 -9
  88. mlrun/runtimes/nuclio/function.py +9 -35
  89. mlrun/runtimes/pod.py +16 -36
  90. mlrun/runtimes/remotesparkjob.py +1 -1
  91. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  92. mlrun/runtimes/utils.py +1 -39
  93. mlrun/utils/helpers.py +72 -71
  94. mlrun/utils/notifications/notification/base.py +1 -1
  95. mlrun/utils/notifications/notification/slack.py +12 -5
  96. mlrun/utils/notifications/notification/webhook.py +1 -1
  97. mlrun/utils/notifications/notification_pusher.py +134 -14
  98. mlrun/utils/version/version.json +2 -2
  99. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
  100. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
  101. mlrun/kfpops.py +0 -865
  102. mlrun/platforms/other.py +0 -305
  103. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  104. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
  105. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
  106. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
  107. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
@@ -26,10 +26,10 @@ class EventEntityKind(StrEnum):
26
26
  JOB = "job"
27
27
 
28
28
 
29
- class EventEntity(pydantic.BaseModel):
29
+ class EventEntities(pydantic.BaseModel):
30
30
  kind: EventEntityKind
31
31
  project: str
32
- id: str
32
+ ids: pydantic.conlist(str, min_items=1, max_items=1)
33
33
 
34
34
 
35
35
  class EventKind(StrEnum):
@@ -48,7 +48,7 @@ _event_kind_entity_map = {
48
48
  class Event(pydantic.BaseModel):
49
49
  kind: EventKind
50
50
  timestamp: Union[str, datetime] = None # occurrence time
51
- entity: EventEntity
51
+ entity: EventEntities
52
52
  value_dict: Optional[dict] = pydantic.Field(default_factory=dict)
53
53
 
54
54
  def is_valid(self):
@@ -71,6 +71,12 @@ class AlertTrigger(pydantic.BaseModel):
71
71
  events: list[EventKind] = []
72
72
  prometheus_alert: str = None
73
73
 
74
+ def __eq__(self, other):
75
+ return (
76
+ self.prometheus_alert == other.prometheus_alert
77
+ and self.events == other.events
78
+ )
79
+
74
80
 
75
81
  class AlertCriteria(pydantic.BaseModel):
76
82
  count: Annotated[
@@ -86,6 +92,9 @@ class AlertCriteria(pydantic.BaseModel):
86
92
  ),
87
93
  ] = None
88
94
 
95
+ def __eq__(self, other):
96
+ return self.count == other.count and self.period == other.period
97
+
89
98
 
90
99
  class ResetPolicy(StrEnum):
91
100
  MANUAL = "manual"
@@ -108,7 +117,7 @@ class AlertConfig(pydantic.BaseModel):
108
117
  ]
109
118
  created: Union[str, datetime] = None
110
119
  severity: AlertSeverity
111
- entity: EventEntity
120
+ entities: EventEntities
112
121
  trigger: AlertTrigger
113
122
  criteria: Optional[AlertCriteria]
114
123
  reset_policy: ResetPolicy = ResetPolicy.MANUAL
@@ -120,3 +129,36 @@ class AlertConfig(pydantic.BaseModel):
120
129
  class AlertsModes(StrEnum):
121
130
  enabled = "enabled"
122
131
  disabled = "disabled"
132
+
133
+
134
+ class AlertTemplate(
135
+ pydantic.BaseModel
136
+ ): # Template fields that are not shared with created configs
137
+ template_id: int = None
138
+ template_name: str
139
+ template_description: Optional[str] = (
140
+ "String explaining the purpose of this template"
141
+ )
142
+
143
+ # A property that identifies templates that were created by the system and cannot be modified/deleted by the user
144
+ system_generated: bool = False
145
+
146
+ # AlertConfig fields that are pre-defined
147
+ description: Optional[str] = (
148
+ "String to be sent in the generated notifications e.g. 'Model {{ $project }}/{{ $entity }} is drifting.'"
149
+ )
150
+ severity: AlertSeverity
151
+ trigger: AlertTrigger
152
+ criteria: Optional[AlertCriteria]
153
+ reset_policy: ResetPolicy = ResetPolicy.MANUAL
154
+
155
+ # This is slightly different than __eq__ as it doesn't compare everything
156
+ def templates_differ(self, other):
157
+ return (
158
+ self.template_description != other.template_description
159
+ or self.description != other.description
160
+ or self.severity != other.severity
161
+ or self.trigger != other.trigger
162
+ or self.reset_policy != other.reset_policy
163
+ or self.criteria != other.criteria
164
+ )
@@ -23,6 +23,7 @@ import mlrun.common.types
23
23
  class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
24
24
  basic = "basicAuth"
25
25
  none = "none"
26
+ access_key = "accessKey"
26
27
 
27
28
  @classmethod
28
29
  def from_str(cls, authentication_mode: str):
@@ -30,6 +31,8 @@ class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
30
31
  return cls.none
31
32
  elif authentication_mode == "basicAuth":
32
33
  return cls.basic
34
+ elif authentication_mode == "accessKey":
35
+ return cls.access_key
33
36
  else:
34
37
  raise mlrun.errors.MLRunInvalidArgumentError(
35
38
  f"Authentication mode `{authentication_mode}` is not supported",
@@ -63,6 +66,7 @@ class APIGatewayUpstream(_APIGatewayBaseModel):
63
66
  kind: Optional[str] = "nucliofunction"
64
67
  nucliofunction: dict[str, str]
65
68
  percentage: Optional[int] = 0
69
+ port: Optional[int] = 0
66
70
 
67
71
 
68
72
  class APIGatewaySpec(_APIGatewayBaseModel):
@@ -93,3 +93,18 @@ class Artifact(pydantic.BaseModel):
93
93
  metadata: ArtifactMetadata
94
94
  spec: ArtifactSpec
95
95
  status: ObjectStatus
96
+
97
+
98
+ class ArtifactsDeletionStrategies(mlrun.common.types.StrEnum):
99
+ """Artifacts deletion strategies types."""
100
+
101
+ metadata_only = "metadata-only"
102
+ """Only removes the artifact db record, leaving all related artifact data in-place"""
103
+
104
+ data_optional = "data-optional"
105
+ """Delete the artifact data of the artifact as a best-effort.
106
+ If artifact data deletion fails still try to delete the artifact db record"""
107
+
108
+ data_force = "data-force"
109
+ """Delete the artifact data, and if cannot delete it fail the deletion
110
+ and don’t delete the artifact db record"""
@@ -59,6 +59,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
59
59
  hub_source = "hub-source"
60
60
  workflow = "workflow"
61
61
  alert = "alert"
62
+ alert_templates = "alert-templates"
62
63
  event = "event"
63
64
  datastore_profile = "datastore-profile"
64
65
  api_gateway = "api-gateway"
@@ -87,6 +88,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
87
88
  AuthorizationResourceTypes.run: "/projects/{project_name}/runs/{resource_name}",
88
89
  AuthorizationResourceTypes.event: "/projects/{project_name}/events/{resource_name}",
89
90
  AuthorizationResourceTypes.alert: "/projects/{project_name}/alerts/{resource_name}",
91
+ AuthorizationResourceTypes.alert_templates: "/alert-templates/{resource_name}",
90
92
  # runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
91
93
  AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
92
94
  AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",
@@ -25,17 +25,22 @@ from .constants import (
25
25
  FeatureSetFeatures,
26
26
  FileTargetKind,
27
27
  FunctionURI,
28
+ MetricData,
28
29
  ModelEndpointTarget,
29
30
  ModelMonitoringMode,
30
31
  ModelMonitoringStoreKinds,
31
32
  MonitoringFunctionNames,
33
+ MonitoringTSDBTables,
32
34
  ProjectSecretKeys,
33
35
  PrometheusEndpoints,
34
36
  PrometheusMetric,
37
+ ResultData,
35
38
  SchedulingKeys,
36
- TimeSeriesTarget,
39
+ TimeSeriesConnector,
40
+ TSDBTarget,
37
41
  VersionedModel,
38
42
  WriterEvent,
43
+ WriterEventKind,
39
44
  )
40
45
  from .grafana import (
41
46
  GrafanaColumn,
@@ -51,6 +56,8 @@ from .model_endpoints import (
51
56
  ModelEndpoint,
52
57
  ModelEndpointList,
53
58
  ModelEndpointMetadata,
59
+ ModelEndpointMonitoringMetric,
60
+ ModelEndpointMonitoringMetricType,
54
61
  ModelEndpointSpec,
55
62
  ModelEndpointStatus,
56
63
  )
@@ -99,14 +99,17 @@ class FeatureSetFeatures(MonitoringStrEnum):
99
99
 
100
100
  class ApplicationEvent:
101
101
  APPLICATION_NAME = "application_name"
102
- CURRENT_STATS = "current_stats"
103
- FEATURE_STATS = "feature_stats"
104
- SAMPLE_PARQUET_PATH = "sample_parquet_path"
105
102
  START_INFER_TIME = "start_infer_time"
106
103
  END_INFER_TIME = "end_infer_time"
107
104
  LAST_REQUEST = "last_request"
108
105
  ENDPOINT_ID = "endpoint_id"
109
106
  OUTPUT_STREAM_URI = "output_stream_uri"
107
+ MLRUN_CONTEXT = "mlrun_context"
108
+
109
+ # Deprecated fields - TODO : delete in 1.9.0 (V1 app deprecation)
110
+ SAMPLE_PARQUET_PATH = "sample_parquet_path"
111
+ CURRENT_STATS = "current_stats"
112
+ FEATURE_STATS = "feature_stats"
110
113
 
111
114
 
112
115
  class WriterEvent(MonitoringStrEnum):
@@ -114,6 +117,21 @@ class WriterEvent(MonitoringStrEnum):
114
117
  ENDPOINT_ID = "endpoint_id"
115
118
  START_INFER_TIME = "start_infer_time"
116
119
  END_INFER_TIME = "end_infer_time"
120
+ EVENT_KIND = "event_kind" # metric or result
121
+ DATA = "data"
122
+
123
+
124
+ class WriterEventKind(MonitoringStrEnum):
125
+ METRIC = "metric"
126
+ RESULT = "result"
127
+
128
+
129
+ class MetricData(MonitoringStrEnum):
130
+ METRIC_NAME = "metric_name"
131
+ METRIC_VALUE = "metric_value"
132
+
133
+
134
+ class ResultData(MonitoringStrEnum):
117
135
  RESULT_NAME = "result_name"
118
136
  RESULT_VALUE = "result_value"
119
137
  RESULT_KIND = "result_kind"
@@ -138,7 +156,7 @@ class EventKeyMetrics:
138
156
  REAL_TIME = "real_time"
139
157
 
140
158
 
141
- class TimeSeriesTarget:
159
+ class TimeSeriesConnector:
142
160
  TSDB = "tsdb"
143
161
 
144
162
 
@@ -176,6 +194,7 @@ class FileTargetKind:
176
194
  LOG_STREAM = "log_stream"
177
195
  APP_RESULTS = "app_results"
178
196
  MONITORING_SCHEDULES = "monitoring_schedules"
197
+ MONITORING_APPLICATION = "monitoring_application"
179
198
 
180
199
 
181
200
  class ModelMonitoringMode(str, Enum):
@@ -210,6 +229,12 @@ class MonitoringFunctionNames(MonitoringStrEnum):
210
229
  WRITER = "model-monitoring-writer"
211
230
 
212
231
 
232
+ class MonitoringTSDBTables(MonitoringStrEnum):
233
+ APP_RESULTS = "app-results"
234
+ METRICS = "metrics"
235
+ EVENTS = "events"
236
+
237
+
213
238
  @dataclass
214
239
  class FunctionURI:
215
240
  project: str
@@ -303,11 +328,22 @@ class ModelMonitoringAppLabel:
303
328
  KEY = "mlrun__type"
304
329
  VAL = "mlrun__model-monitoring-application"
305
330
 
331
+ def __str__(self) -> str:
332
+ return f"{self.KEY}={self.VAL}"
333
+
306
334
 
307
335
  class ControllerPolicy:
308
336
  BASE_PERIOD = "base_period"
309
337
 
310
338
 
339
+ class TSDBTarget:
340
+ V3IO_TSDB = "v3io-tsdb"
341
+ PROMETHEUS = "prometheus"
342
+ APP_RESULTS_TABLE = "app-results"
343
+ V3IO_BE = "tsdb"
344
+ V3IO_RATE = "1/s"
345
+
346
+
311
347
  class HistogramDataDriftApplicationConstants:
312
348
  NAME = "histogram-data-drift"
313
349
  GENERAL_RESULT_NAME = "general_drift"
@@ -11,16 +11,18 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import enum
17
16
  import json
18
- from typing import Any, Optional
17
+ import re
18
+ from datetime import datetime
19
+ from typing import Any, NamedTuple, Optional
19
20
 
20
21
  from pydantic import BaseModel, Field, validator
21
22
  from pydantic.main import Extra
22
23
 
23
24
  import mlrun.common.model_monitoring
25
+ import mlrun.common.types
24
26
 
25
27
  from ..object import ObjectKind, ObjectSpec, ObjectStatus
26
28
  from .constants import (
@@ -29,6 +31,8 @@ from .constants import (
29
31
  EventKeyMetrics,
30
32
  EventLiveStats,
31
33
  ModelMonitoringMode,
34
+ ResultKindApp,
35
+ ResultStatusApp,
32
36
  )
33
37
 
34
38
 
@@ -292,6 +296,73 @@ class ModelEndpointList(BaseModel):
292
296
  endpoints: list[ModelEndpoint] = []
293
297
 
294
298
 
299
+ class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
300
+ RESULT = "result"
301
+
302
+
303
+ class ModelEndpointMonitoringMetric(BaseModel):
304
+ project: str
305
+ app: str
306
+ type: ModelEndpointMonitoringMetricType
307
+ name: str
308
+ full_name: str
309
+
310
+
311
+ def _compose_full_name(
312
+ *,
313
+ project: str,
314
+ app: str,
315
+ name: str,
316
+ type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT,
317
+ ) -> str:
318
+ return ".".join([project, app, type, name])
319
+
320
+
321
+ _FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
322
+ _FQN_PATTERN = (
323
+ rf"^(?P<project>{_FQN_PART_PATTERN})\."
324
+ rf"(?P<app>{_FQN_PART_PATTERN})\."
325
+ rf"(?P<type>{_FQN_PART_PATTERN})\."
326
+ rf"(?P<name>{_FQN_PART_PATTERN})$"
327
+ )
328
+ _FQN_REGEX = re.compile(_FQN_PATTERN)
329
+
330
+
331
+ def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringMetric:
332
+ match = _FQN_REGEX.fullmatch(fqn)
333
+ if match is None:
334
+ raise ValueError("The fully qualified name is not in the expected format")
335
+ return ModelEndpointMonitoringMetric.parse_obj(
336
+ match.groupdict() | {"full_name": fqn}
337
+ )
338
+
339
+
340
+ class _ResultPoint(NamedTuple):
341
+ timestamp: datetime
342
+ value: float
343
+ status: ResultStatusApp
344
+
345
+
346
+ class _ModelEndpointMonitoringResultValuesBase(BaseModel):
347
+ full_name: str
348
+ type: ModelEndpointMonitoringMetricType
349
+ data: bool
350
+
351
+
352
+ class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringResultValuesBase):
353
+ full_name: str
354
+ type: ModelEndpointMonitoringMetricType
355
+ result_kind: ResultKindApp
356
+ values: list[_ResultPoint]
357
+ data: bool = True
358
+
359
+
360
+ class ModelEndpointMonitoringResultNoData(_ModelEndpointMonitoringResultValuesBase):
361
+ full_name: str
362
+ type: ModelEndpointMonitoringMetricType
363
+ data: bool = False
364
+
365
+
295
366
  def _mapping_attributes(
296
367
  base_model: BaseModel,
297
368
  flattened_dictionary: dict,
@@ -114,6 +114,8 @@ class ProjectSummary(pydantic.BaseModel):
114
114
  runs_failed_recent_count: int
115
115
  runs_running_count: int
116
116
  schedules_count: int
117
+ pipelines_completed_recent_count: typing.Optional[int] = None
118
+ pipelines_failed_recent_count: typing.Optional[int] = None
117
119
  pipelines_running_count: typing.Optional[int] = None
118
120
 
119
121
 
mlrun/config.py CHANGED
@@ -361,7 +361,7 @@ default_config = {
361
361
  # is set to ClusterIP
362
362
  # ---------------------------------------------------------------------
363
363
  # Note: adding a mode requires special handling on
364
- # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
364
+ # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
365
365
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
366
366
  "add_templated_ingress_host_mode": "never",
367
367
  "explicit_ack": "enabled",
@@ -503,6 +503,7 @@ default_config = {
503
503
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
504
504
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
505
505
  "stream": "",
506
+ "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
506
507
  },
507
508
  # Offline storage path can be either relative or a full path. This path is used for general offline data
508
509
  # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -516,6 +517,7 @@ default_config = {
516
517
  # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
517
518
  "store_type": "v3io-nosql",
518
519
  "endpoint_store_connection": "",
520
+ "tsdb_connector_type": "v3io-tsdb",
519
521
  },
520
522
  "secret_stores": {
521
523
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -554,7 +556,7 @@ default_config = {
554
556
  "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
555
557
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
556
558
  "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
557
- "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/nosql",
559
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
558
560
  },
559
561
  "default_targets": "parquet,nosql",
560
562
  "default_job_image": "mlrun/mlrun",
@@ -692,7 +694,7 @@ default_config = {
692
694
  "grafana_url": "",
693
695
  "alerts": {
694
696
  # supported modes: "enabled", "disabled".
695
- "mode": "disabled"
697
+ "mode": "enabled"
696
698
  },
697
699
  "auth_with_client_id": {
698
700
  "enabled": False,
@@ -1088,6 +1090,7 @@ class Config:
1088
1090
  target: str = "online",
1089
1091
  artifact_path: str = None,
1090
1092
  function_name: str = None,
1093
+ **kwargs,
1091
1094
  ) -> typing.Union[str, list[str]]:
1092
1095
  """Get the full path from the configuration based on the provided project and kind.
1093
1096
 
@@ -1114,7 +1117,7 @@ class Config:
1114
1117
  )
1115
1118
  if store_prefix_dict.get(kind):
1116
1119
  # Target exist in store prefix and has a valid string value
1117
- return store_prefix_dict[kind].format(project=project)
1120
+ return store_prefix_dict[kind].format(project=project, **kwargs)
1118
1121
 
1119
1122
  if (
1120
1123
  function_name
@@ -65,10 +65,10 @@ def toPandas(spark_df):
65
65
  msg = (
66
66
  "toPandas attempted Arrow optimization because "
67
67
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
68
- "failed by the reason below:\n %s\n"
68
+ f"failed by the reason below:\n {e}\n"
69
69
  "Attempting non-optimization as "
70
70
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
71
- "true." % str(e)
71
+ "true."
72
72
  )
73
73
  warnings.warn(msg)
74
74
  use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
78
78
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
79
79
  "reached the error below and will not continue because automatic fallback "
80
80
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
81
- "false.\n %s" % str(e)
81
+ f"false.\n {e}"
82
82
  )
83
83
  warnings.warn(msg)
84
84
  raise
@@ -144,7 +144,7 @@ def toPandas(spark_df):
144
144
  "reached the error below and can not continue. Note that "
145
145
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
146
146
  "effect on failures in the middle of "
147
- "computation.\n %s" % str(e)
147
+ f"computation.\n {e}"
148
148
  )
149
149
  warnings.warn(msg)
150
150
  raise
mlrun/datastore/base.py CHANGED
@@ -179,11 +179,23 @@ class DataStore:
179
179
  return {}
180
180
 
181
181
  @staticmethod
182
- def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
182
+ def _parquet_reader(
183
+ df_module,
184
+ url,
185
+ file_system,
186
+ time_column,
187
+ start_time,
188
+ end_time,
189
+ additional_filters,
190
+ ):
183
191
  from storey.utils import find_filters, find_partitions
184
192
 
185
193
  def set_filters(
186
- partitions_time_attributes, start_time_inner, end_time_inner, kwargs
194
+ partitions_time_attributes,
195
+ start_time_inner,
196
+ end_time_inner,
197
+ filters_inner,
198
+ kwargs,
187
199
  ):
188
200
  filters = []
189
201
  find_filters(
@@ -193,20 +205,23 @@ class DataStore:
193
205
  filters,
194
206
  time_column,
195
207
  )
208
+ if filters and filters_inner:
209
+ filters[0] += filters_inner
210
+
196
211
  kwargs["filters"] = filters
197
212
 
198
213
  def reader(*args, **kwargs):
199
- if start_time or end_time:
200
- if time_column is None:
201
- raise mlrun.errors.MLRunInvalidArgumentError(
202
- "When providing start_time or end_time, must provide time_column"
203
- )
204
-
214
+ if time_column is None and (start_time or end_time):
215
+ raise mlrun.errors.MLRunInvalidArgumentError(
216
+ "When providing start_time or end_time, must provide time_column"
217
+ )
218
+ if start_time or end_time or additional_filters:
205
219
  partitions_time_attributes = find_partitions(url, file_system)
206
220
  set_filters(
207
221
  partitions_time_attributes,
208
222
  start_time,
209
223
  end_time,
224
+ additional_filters,
210
225
  kwargs,
211
226
  )
212
227
  try:
@@ -217,6 +232,7 @@ class DataStore:
217
232
  ):
218
233
  raise ex
219
234
 
235
+ # TODO: fix timezone issue (ML-6308)
220
236
  if start_time.tzinfo:
221
237
  start_time_inner = start_time.replace(tzinfo=None)
222
238
  end_time_inner = end_time.replace(tzinfo=None)
@@ -228,6 +244,7 @@ class DataStore:
228
244
  partitions_time_attributes,
229
245
  start_time_inner,
230
246
  end_time_inner,
247
+ additional_filters,
231
248
  kwargs,
232
249
  )
233
250
  return df_module.read_parquet(*args, **kwargs)
@@ -246,6 +263,7 @@ class DataStore:
246
263
  start_time=None,
247
264
  end_time=None,
248
265
  time_column=None,
266
+ additional_filters=None,
249
267
  **kwargs,
250
268
  ):
251
269
  df_module = df_module or pd
@@ -310,7 +328,13 @@ class DataStore:
310
328
  kwargs["columns"] = columns
311
329
 
312
330
  reader = self._parquet_reader(
313
- df_module, url, file_system, time_column, start_time, end_time
331
+ df_module,
332
+ url,
333
+ file_system,
334
+ time_column,
335
+ start_time,
336
+ end_time,
337
+ additional_filters,
314
338
  )
315
339
 
316
340
  elif file_url.endswith(".json") or format == "json":
@@ -539,6 +563,7 @@ class DataItem:
539
563
  time_column=None,
540
564
  start_time=None,
541
565
  end_time=None,
566
+ additional_filters=None,
542
567
  **kwargs,
543
568
  ):
544
569
  """return a dataframe object (generated from the dataitem).
@@ -550,6 +575,12 @@ class DataItem:
550
575
  :param end_time: filters out data after this time
551
576
  :param time_column: Store timestamp_key will be used if None.
552
577
  The results will be filtered by this column and start_time & end_time.
578
+ :param additional_filters: List of additional_filter conditions as tuples.
579
+ Each tuple should be in the format (column_name, operator, value).
580
+ Supported operators: "=", ">=", "<=", ">", "<".
581
+ Example: [("Product", "=", "Computer")]
582
+ For all supported filters, please see:
583
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
553
584
  """
554
585
  df = self._store.as_df(
555
586
  self._url,
@@ -560,6 +591,7 @@ class DataItem:
560
591
  time_column=time_column,
561
592
  start_time=start_time,
562
593
  end_time=end_time,
594
+ additional_filters=additional_filters,
563
595
  **kwargs,
564
596
  )
565
597
  return df