mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (150) hide show
  1. mlrun/__init__.py +3 -2
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/plots.py +1 -1
  5. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  6. mlrun/auth/nuclio.py +89 -0
  7. mlrun/auth/providers.py +429 -0
  8. mlrun/auth/utils.py +415 -0
  9. mlrun/common/constants.py +7 -0
  10. mlrun/common/model_monitoring/helpers.py +41 -4
  11. mlrun/common/runtimes/constants.py +28 -0
  12. mlrun/common/schemas/__init__.py +13 -3
  13. mlrun/common/schemas/alert.py +2 -2
  14. mlrun/common/schemas/api_gateway.py +3 -0
  15. mlrun/common/schemas/auth.py +10 -10
  16. mlrun/common/schemas/client_spec.py +4 -0
  17. mlrun/common/schemas/constants.py +25 -0
  18. mlrun/common/schemas/frontend_spec.py +1 -8
  19. mlrun/common/schemas/function.py +24 -0
  20. mlrun/common/schemas/hub.py +3 -2
  21. mlrun/common/schemas/model_monitoring/__init__.py +1 -1
  22. mlrun/common/schemas/model_monitoring/constants.py +2 -2
  23. mlrun/common/schemas/secret.py +17 -2
  24. mlrun/common/secrets.py +95 -1
  25. mlrun/common/types.py +10 -10
  26. mlrun/config.py +53 -15
  27. mlrun/data_types/infer.py +2 -2
  28. mlrun/datastore/__init__.py +2 -3
  29. mlrun/datastore/base.py +274 -10
  30. mlrun/datastore/datastore.py +1 -1
  31. mlrun/datastore/datastore_profile.py +49 -17
  32. mlrun/datastore/model_provider/huggingface_provider.py +6 -2
  33. mlrun/datastore/model_provider/model_provider.py +2 -2
  34. mlrun/datastore/model_provider/openai_provider.py +2 -2
  35. mlrun/datastore/s3.py +15 -16
  36. mlrun/datastore/sources.py +1 -1
  37. mlrun/datastore/store_resources.py +4 -4
  38. mlrun/datastore/storeytargets.py +16 -10
  39. mlrun/datastore/targets.py +1 -1
  40. mlrun/datastore/utils.py +16 -3
  41. mlrun/datastore/v3io.py +1 -1
  42. mlrun/db/base.py +36 -12
  43. mlrun/db/httpdb.py +316 -101
  44. mlrun/db/nopdb.py +29 -11
  45. mlrun/errors.py +4 -2
  46. mlrun/execution.py +11 -12
  47. mlrun/feature_store/api.py +1 -1
  48. mlrun/feature_store/common.py +1 -1
  49. mlrun/feature_store/feature_vector_utils.py +1 -1
  50. mlrun/feature_store/steps.py +8 -6
  51. mlrun/frameworks/_common/utils.py +3 -3
  52. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  53. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  54. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  55. mlrun/frameworks/_ml_common/utils.py +2 -1
  56. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  57. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  58. mlrun/frameworks/onnx/dataset.py +2 -1
  59. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  60. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  61. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  62. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  63. mlrun/frameworks/pytorch/utils.py +2 -1
  64. mlrun/frameworks/sklearn/metric.py +2 -1
  65. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  66. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  67. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  68. mlrun/hub/__init__.py +37 -0
  69. mlrun/hub/base.py +142 -0
  70. mlrun/hub/module.py +67 -76
  71. mlrun/hub/step.py +113 -0
  72. mlrun/launcher/base.py +2 -1
  73. mlrun/launcher/local.py +2 -1
  74. mlrun/model.py +12 -2
  75. mlrun/model_monitoring/__init__.py +0 -1
  76. mlrun/model_monitoring/api.py +2 -2
  77. mlrun/model_monitoring/applications/base.py +20 -6
  78. mlrun/model_monitoring/applications/context.py +1 -0
  79. mlrun/model_monitoring/controller.py +7 -17
  80. mlrun/model_monitoring/db/_schedules.py +2 -16
  81. mlrun/model_monitoring/db/_stats.py +2 -13
  82. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  83. mlrun/model_monitoring/db/tsdb/base.py +2 -4
  84. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  85. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  86. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  87. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  88. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  89. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  90. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  91. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  92. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  93. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  94. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  95. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  98. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
  99. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
  100. mlrun/model_monitoring/features_drift_table.py +2 -1
  101. mlrun/model_monitoring/helpers.py +2 -1
  102. mlrun/model_monitoring/stream_processing.py +18 -16
  103. mlrun/model_monitoring/writer.py +4 -3
  104. mlrun/package/__init__.py +2 -1
  105. mlrun/platforms/__init__.py +0 -44
  106. mlrun/platforms/iguazio.py +1 -1
  107. mlrun/projects/operations.py +11 -10
  108. mlrun/projects/project.py +81 -82
  109. mlrun/run.py +4 -7
  110. mlrun/runtimes/__init__.py +2 -204
  111. mlrun/runtimes/base.py +89 -21
  112. mlrun/runtimes/constants.py +225 -0
  113. mlrun/runtimes/daskjob.py +4 -2
  114. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  115. mlrun/runtimes/mounts.py +5 -0
  116. mlrun/runtimes/nuclio/__init__.py +12 -8
  117. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  118. mlrun/runtimes/nuclio/application/application.py +200 -32
  119. mlrun/runtimes/nuclio/function.py +154 -49
  120. mlrun/runtimes/nuclio/serving.py +55 -42
  121. mlrun/runtimes/pod.py +59 -10
  122. mlrun/secrets.py +46 -2
  123. mlrun/serving/__init__.py +2 -0
  124. mlrun/serving/remote.py +5 -5
  125. mlrun/serving/routers.py +3 -3
  126. mlrun/serving/server.py +46 -43
  127. mlrun/serving/serving_wrapper.py +6 -2
  128. mlrun/serving/states.py +554 -207
  129. mlrun/serving/steps.py +1 -1
  130. mlrun/serving/system_steps.py +42 -33
  131. mlrun/track/trackers/mlflow_tracker.py +29 -31
  132. mlrun/utils/helpers.py +89 -16
  133. mlrun/utils/http.py +9 -2
  134. mlrun/utils/notifications/notification/git.py +1 -1
  135. mlrun/utils/notifications/notification/mail.py +39 -16
  136. mlrun/utils/notifications/notification_pusher.py +2 -2
  137. mlrun/utils/version/version.json +2 -2
  138. mlrun/utils/version/version.py +3 -4
  139. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
  140. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
  141. mlrun/db/auth_utils.py +0 -152
  142. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
  143. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  144. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
  146. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
  147. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  148. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  149. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  150. {mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,585 @@
1
+ # Copyright 2025 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from datetime import datetime, timedelta
17
+ from typing import TYPE_CHECKING, Optional, Union
18
+
19
+ import mlrun.common.schemas.model_monitoring as mm_schemas
20
+ import mlrun.errors
21
+
22
+ if TYPE_CHECKING:
23
+ import pandas as pd
24
+
25
+ # TimescaleDB interval pattern for parsing intervals like "1h", "10m", "1d", "1w", "1M"
26
+ _TIMESCALEDB_INTERVAL_PATTERN = re.compile(r"(\d+)([mhdwM])")
27
+
28
+
29
+ class TimescaleDBQueryBuilder:
30
+ """Utility class for building common SQL query components."""
31
+
32
+ @staticmethod
33
+ def build_endpoint_filter(endpoint_ids: Optional[Union[str, list[str]]]) -> str:
34
+ """
35
+ Generate SQL filter for endpoint IDs.
36
+
37
+ :param endpoint_ids: Single endpoint ID, list of endpoint IDs, or None for no filtering
38
+ :return: SQL WHERE clause fragment for endpoint filtering, or empty string if None
39
+ """
40
+ if endpoint_ids is None:
41
+ return ""
42
+ if isinstance(endpoint_ids, str):
43
+ return f"{mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_ids}'"
44
+ elif isinstance(endpoint_ids, list):
45
+ endpoint_list = "', '".join(endpoint_ids)
46
+ return f"{mm_schemas.WriterEvent.ENDPOINT_ID} IN ('{endpoint_list}')"
47
+ else:
48
+ raise mlrun.errors.MLRunInvalidArgumentError(
49
+ "Invalid 'endpoint_ids' filter: must be a string or a list of strings"
50
+ )
51
+
52
+ @staticmethod
53
+ def build_time_range_filter(
54
+ start: datetime, end: datetime, time_column: str
55
+ ) -> str:
56
+ """
57
+ Generate SQL filter for time range.
58
+
59
+ :param start: Start datetime
60
+ :param end: End datetime
61
+ :param time_column: Name of the time column to filter on
62
+ :return: SQL WHERE clause fragment for time filtering
63
+ """
64
+ return f"{time_column} >= '{start}' AND {time_column} <= '{end}'"
65
+
66
+ @staticmethod
67
+ def build_application_filter(app_names: Union[str, list[str]]) -> str:
68
+ """
69
+ Generate SQL filter for application names.
70
+
71
+ :param app_names: Single application name or list of application names
72
+ :return: SQL WHERE clause fragment for application filtering
73
+ """
74
+ if isinstance(app_names, str):
75
+ return f"{mm_schemas.WriterEvent.APPLICATION_NAME} = '{app_names}'"
76
+ elif isinstance(app_names, list):
77
+ app_list = "', '".join(app_names)
78
+ return f"{mm_schemas.WriterEvent.APPLICATION_NAME} IN ('{app_list}')"
79
+ else:
80
+ raise mlrun.errors.MLRunInvalidArgumentError(
81
+ "Invalid 'app_names' filter: must be either a string or a list of strings"
82
+ )
83
+
84
+ @staticmethod
85
+ def build_metrics_filter(
86
+ metrics: Optional[list[mm_schemas.ModelEndpointMonitoringMetric]],
87
+ ) -> str:
88
+ """
89
+ Generate SQL filter for metrics using both application_name and metric_name columns.
90
+
91
+ :param metrics: List of ModelEndpointMonitoringMetric objects, or None for no filtering
92
+ :return: SQL WHERE clause fragment for metrics filtering, or empty string if None
93
+ """
94
+ if metrics is None:
95
+ return ""
96
+ if not metrics:
97
+ raise mlrun.errors.MLRunInvalidArgumentError("Metrics list cannot be empty")
98
+
99
+ # Build filter that includes both application_name and metric_name
100
+ # Format: (application_name = 'app1' AND metric_name = 'name1') OR
101
+ # (application_name = 'app2' AND metric_name = 'name2')
102
+ conditions = []
103
+ for metric in metrics:
104
+ condition = (
105
+ f"({mm_schemas.WriterEvent.APPLICATION_NAME} = '{metric.app}' "
106
+ f"AND {mm_schemas.MetricData.METRIC_NAME} = '{metric.name}')"
107
+ )
108
+ conditions.append(condition)
109
+
110
+ if len(conditions) == 1:
111
+ return conditions[0]
112
+ return " OR ".join(conditions)
113
+
114
+ @staticmethod
115
+ def build_results_filter(
116
+ metrics: Optional[list[mm_schemas.ModelEndpointMonitoringMetric]],
117
+ ) -> str:
118
+ """
119
+ Generate SQL filter for results using both application_name and result_name columns.
120
+ :param metrics: List of ModelEndpointMonitoringMetric objects, or None for no filtering
121
+ :return: SQL WHERE clause fragment for results filtering, or empty string if None
122
+ """
123
+ if metrics is None:
124
+ return ""
125
+ if not metrics:
126
+ raise mlrun.errors.MLRunInvalidArgumentError("Metrics list cannot be empty")
127
+
128
+ # Build filter that includes both application_name and result_name
129
+ # Format: (application_name = 'app1' AND result_name = 'name1') OR
130
+ # (application_name = 'app2' AND result_name = 'name2')
131
+ conditions = []
132
+ for metric in metrics:
133
+ condition = (
134
+ f"({mm_schemas.WriterEvent.APPLICATION_NAME} = '{metric.app}' "
135
+ f"AND {mm_schemas.ResultData.RESULT_NAME} = '{metric.name}')"
136
+ )
137
+ conditions.append(condition)
138
+
139
+ if len(conditions) == 1:
140
+ return conditions[0]
141
+ return " OR ".join(conditions)
142
+
143
+ @staticmethod
144
+ def build_metrics_filter_from_names(metric_names: list[str]) -> str:
145
+ """
146
+ Generate SQL filter for metrics by name.
147
+
148
+ :param metric_names: List of metric names
149
+ :return: SQL WHERE clause fragment for metrics filtering
150
+ """
151
+ if not metric_names:
152
+ raise mlrun.errors.MLRunInvalidArgumentError(
153
+ "Metric names list cannot be empty"
154
+ )
155
+
156
+ if len(metric_names) == 1:
157
+ return f"{mm_schemas.MetricData.METRIC_NAME} = '{metric_names[0]}'"
158
+ metric_list = "', '".join(metric_names)
159
+ return f"{mm_schemas.MetricData.METRIC_NAME} IN ('{metric_list}')"
160
+
161
+ @staticmethod
162
+ def combine_filters(filters: list[str]) -> Optional[str]:
163
+ """
164
+ Combine multiple filter conditions with AND operator.
165
+
166
+ :param filters: List of filter condition strings
167
+ :return: Combined filter string or None if no filters
168
+ """
169
+ if valid_filters := [f.strip() for f in filters if f.strip()]:
170
+ return (
171
+ valid_filters[0]
172
+ if len(valid_filters) == 1
173
+ else " AND ".join(valid_filters)
174
+ )
175
+ else:
176
+ return None
177
+
178
+ @staticmethod
179
+ def interval_to_minutes(interval: str) -> Optional[int]:
180
+ """
181
+ Convert TimescaleDB interval string to minutes.
182
+
183
+ Uses PostgreSQL/TimescaleDB fixed duration assumptions:
184
+ - 1 month = 30 days = 43,200 minutes
185
+ - 1 year = 365.25 days = 525,960 minutes
186
+
187
+ This matches TimescaleDB's INTERVAL arithmetic behavior and is appropriate
188
+ for duration calculations and optimal interval selection.
189
+
190
+ :param interval: Interval string like "1h", "10m", "1d", "1w", "1M"
191
+ :return: Duration in minutes, or None if invalid format
192
+ """
193
+ match = _TIMESCALEDB_INTERVAL_PATTERN.match(interval)
194
+ if not match:
195
+ return None
196
+
197
+ amount, unit = int(match.group(1)), match.group(2)
198
+
199
+ if unit == "m": # minutes
200
+ return amount
201
+ elif unit == "h": # hours
202
+ return amount * 60
203
+ elif unit == "d": # days
204
+ return amount * 1440
205
+ elif unit == "w": # weeks
206
+ return amount * 10080
207
+ elif unit == "M": # months (PostgreSQL: 30 days)
208
+ return amount * 43200
209
+ else:
210
+ return None
211
+
212
+ @staticmethod
213
+ def determine_optimal_interval(start: datetime, end: datetime) -> str:
214
+ """
215
+ Determine optimal interval for time-based aggregation based on time range.
216
+
217
+ This method selects appropriate interval from a comprehensive list of
218
+ standard TimescaleDB intervals rather than simple time-based thresholds.
219
+ This provides better balance between query performance
220
+ and data granularity by targeting optimal data point counts.
221
+
222
+ :param start: Start time
223
+ :param end: End time
224
+ :return: Optimal interval string (in Python format like "1h", "1d")
225
+ """
226
+ # Comprehensive list of standard TimescaleDB intervals
227
+ standard_intervals = [
228
+ "1m",
229
+ "5m",
230
+ "10m",
231
+ "15m",
232
+ "30m",
233
+ "1h",
234
+ "2h",
235
+ "6h",
236
+ "12h",
237
+ "1d",
238
+ "3d",
239
+ "1w",
240
+ "1M",
241
+ ]
242
+
243
+ optimal = TimescaleDBQueryBuilder.determine_optimal_from_available(
244
+ start, end, standard_intervals
245
+ )
246
+
247
+ # Fallback for edge cases where algorithm doesn't find a suitable match
248
+ # Simple binary choice: smallest interval for short ranges, largest for long ranges
249
+ if optimal is None:
250
+ time_diff = end - start
251
+ return "1m" if time_diff <= timedelta(days=30) else "1M"
252
+ return optimal
253
+
254
+ @staticmethod
255
+ def determine_optimal_from_available(
256
+ start: datetime, end: datetime, available_intervals: list[str]
257
+ ) -> Optional[str]:
258
+ """
259
+ Determine optimal interval from available pre-aggregate intervals.
260
+
261
+ Uses a formula-based approach to select intervals that provide reasonable data points
262
+ (~50-200 range) for optimal visualization and query performance.
263
+
264
+ :param start: Start time
265
+ :param end: End time
266
+ :param available_intervals: List of available interval strings (e.g., ["10m", "1h", "6h", "1d"])
267
+ :return: Optimal interval string or None if no suitable intervals available
268
+ """
269
+ if not available_intervals:
270
+ return None
271
+
272
+ # Convert available intervals to (name, minutes) tuples using our centralized parsing
273
+ available_with_minutes = []
274
+ for interval in available_intervals:
275
+ minutes = TimescaleDBQueryBuilder.interval_to_minutes(interval)
276
+ if minutes is not None:
277
+ available_with_minutes.append((interval, minutes))
278
+
279
+ if not available_with_minutes:
280
+ return None
281
+
282
+ # Sort by duration (ascending)
283
+ available_with_minutes.sort(key=lambda x: x[1])
284
+
285
+ # Calculate time range in minutes
286
+ time_diff_minutes = (end - start).total_seconds() / 60
287
+
288
+ # Target ~100 data points for optimal visualization balance
289
+ # Accept intervals that give 20-500 data points (wider reasonable range)
290
+ target_points = 100
291
+ min_acceptable_points = 20
292
+ max_acceptable_points = 500
293
+
294
+ optimal_interval_minutes = time_diff_minutes / target_points
295
+ min_interval_minutes = time_diff_minutes / max_acceptable_points
296
+ max_interval_minutes = time_diff_minutes / min_acceptable_points
297
+
298
+ # Find the best matching interval within acceptable range
299
+ best_interval = None
300
+ best_score = float("inf")
301
+
302
+ for interval_name, interval_minutes in available_with_minutes:
303
+ # Check if this interval is within acceptable range
304
+ if min_interval_minutes <= interval_minutes <= max_interval_minutes:
305
+ # Score by distance from optimal (closer to optimal = better)
306
+ score = abs(interval_minutes - optimal_interval_minutes)
307
+ if score < best_score:
308
+ best_score = score
309
+ best_interval = interval_name
310
+
311
+ return best_interval
312
+
313
+ @staticmethod
314
+ def build_read_data_with_fallback(
315
+ connection,
316
+ pre_aggregate_manager,
317
+ table_schema,
318
+ start: "datetime", # Use string to avoid import cycle
319
+ end: "datetime",
320
+ columns: list[str],
321
+ filter_query: Optional[str],
322
+ name_column: str,
323
+ value_column: str,
324
+ debug_name: str = "read_data",
325
+ timestamp_column: Optional[str] = None,
326
+ ) -> "pd.DataFrame": # Use string to avoid import cycle
327
+ """
328
+ Build and execute read data query with pre-aggregate fallback pattern.
329
+
330
+ This method deduplicates the common pattern used in both metrics and results
331
+ queries for reading data with pre-aggregate optimization and fallback.
332
+
333
+ :param connection: Database connection instance
334
+ :param pre_aggregate_manager: Pre-aggregate handler for optimization
335
+ :param table_schema: Table schema for query building
336
+ :param start: Start datetime for query
337
+ :param end: End datetime for query
338
+ :param columns: List of columns to select
339
+ :param filter_query: WHERE clause conditions
340
+ :param name_column: Name of the metric/result name column
341
+ :param value_column: Name of the metric/result value column
342
+ :param debug_name: Name for debugging purposes
343
+ :param timestamp_column: Optional timestamp column to use for time filtering
344
+ :return: DataFrame with query results
345
+ """
346
+
347
+ def build_pre_agg_query():
348
+ return table_schema._get_records_query(
349
+ start=start,
350
+ end=end,
351
+ columns_to_filter=columns,
352
+ filter_query=filter_query,
353
+ use_pre_aggregates=True,
354
+ timestamp_column=timestamp_column,
355
+ )
356
+
357
+ def build_raw_query():
358
+ return table_schema._get_records_query(
359
+ start=start,
360
+ end=end,
361
+ columns_to_filter=columns,
362
+ filter_query=filter_query,
363
+ timestamp_column=timestamp_column,
364
+ )
365
+
366
+ # Column mapping rules for pre-aggregate results
367
+ import mlrun.common.schemas.model_monitoring as mm_schemas
368
+
369
+ column_mapping_rules = {
370
+ name_column: [name_column],
371
+ value_column: [value_column],
372
+ table_schema.time_column: [table_schema.time_column],
373
+ mm_schemas.WriterEvent.APPLICATION_NAME: [
374
+ mm_schemas.WriterEvent.APPLICATION_NAME
375
+ ],
376
+ }
377
+
378
+ return connection.execute_with_fallback(
379
+ pre_aggregate_manager,
380
+ build_pre_agg_query,
381
+ build_raw_query,
382
+ interval=None, # No specific interval for this query
383
+ agg_funcs=None,
384
+ column_mapping_rules=column_mapping_rules,
385
+ debug_name=debug_name,
386
+ )
387
+
388
+ @staticmethod
389
+ def prepare_time_range_and_interval(
390
+ pre_aggregate_manager,
391
+ start: Optional[datetime] = None,
392
+ end: Optional[datetime] = None,
393
+ interval: Optional[str] = None,
394
+ auto_determine_interval: bool = True,
395
+ ) -> tuple[datetime, datetime, str]:
396
+ """
397
+ Standardized time range and interval preparation for TimescaleDB queries.
398
+
399
+ This helper eliminates the common pattern of:
400
+ 1. get_start_end()
401
+ 2. determine_optimal_interval() (optional)
402
+ 3. align_time_range()
403
+
404
+ :param pre_aggregate_manager: PreAggregateManager instance
405
+ :param start: Start datetime (optional)
406
+ :param end: End datetime (optional)
407
+ :param interval: Time interval (optional, auto-determined if None and auto_determine_interval=True)
408
+ :param auto_determine_interval: Whether to auto-determine interval if not provided
409
+ :return: Tuple of (aligned_start, aligned_end, interval) - interval is guaranteed to be valid
410
+ """
411
+ # Step 1: Get start/end times with defaults
412
+ start, end = pre_aggregate_manager.get_start_end(start, end)
413
+
414
+ # Step 2: Auto-determine optimal interval if requested and not provided
415
+ if interval is None and auto_determine_interval:
416
+ # First, try to use available pre-aggregate intervals if they exist
417
+ available_intervals = (
418
+ pre_aggregate_manager.config.aggregate_intervals
419
+ if pre_aggregate_manager.config
420
+ else None
421
+ )
422
+
423
+ if available_intervals:
424
+ if optimal_from_preaggregate := (
425
+ TimescaleDBQueryBuilder.determine_optimal_from_available(
426
+ start, end, available_intervals
427
+ )
428
+ ):
429
+ interval = optimal_from_preaggregate
430
+
431
+ # If no suitable pre-aggregate interval found, use formula-based approach
432
+ if interval is None:
433
+ interval = TimescaleDBQueryBuilder.determine_optimal_interval(
434
+ start, end
435
+ )
436
+
437
+ # Step 3: Align times to interval boundaries
438
+ start, end = pre_aggregate_manager.align_time_range(start, end, interval)
439
+
440
+ return start, end, interval
441
+
442
+ @staticmethod
443
+ def prepare_time_range_with_validation(
444
+ pre_aggregate_manager,
445
+ start_iso: str,
446
+ end_iso: str,
447
+ interval: Optional[str] = None,
448
+ agg_function: Optional[str] = None,
449
+ ) -> tuple[datetime, datetime, Optional[str]]:
450
+ """
451
+ Specialized helper for time preparation with validation and ISO string conversion.
452
+
453
+ This helper eliminates the pattern of:
454
+ 1. validate_interval_and_function()
455
+ 2. datetime.fromisoformat() conversion
456
+ 3. align_time_range()
457
+
458
+ :param pre_aggregate_manager: PreAggregateManager instance
459
+ :param start_iso: Start time as ISO string
460
+ :param end_iso: End time as ISO string
461
+ :param interval: Time interval (optional)
462
+ :param agg_function: Aggregation function (optional)
463
+ :return: Tuple of (aligned_start_dt, aligned_end_dt, interval)
464
+ """
465
+ # Step 1: Validate parameters using the pre-aggregate handler
466
+ pre_aggregate_manager.validate_interval_and_function(interval, agg_function)
467
+
468
+ # Step 2: Convert ISO strings to datetime objects
469
+ start_dt, end_dt = (
470
+ datetime.fromisoformat(start_iso),
471
+ datetime.fromisoformat(end_iso),
472
+ )
473
+
474
+ # Step 3: Align times if interval is provided
475
+ start_dt, end_dt = pre_aggregate_manager.align_time_range(
476
+ start_dt, end_dt, interval
477
+ )
478
+
479
+ return start_dt, end_dt, interval
480
+
481
+ @staticmethod
482
+ def build_endpoint_aggregation_query(
483
+ subquery: str,
484
+ aggregation_columns: dict[str, str],
485
+ group_by_column: str = mm_schemas.WriterEvent.ENDPOINT_ID,
486
+ order_by_column: str = mm_schemas.WriterEvent.ENDPOINT_ID,
487
+ ) -> str:
488
+ """
489
+ Build standardized outer query for endpoint-level aggregation over time buckets.
490
+
491
+ This helper eliminates the repeated pattern of:
492
+ SELECT endpoint_id, AGG(column) FROM (subquery) GROUP BY endpoint_id ORDER BY endpoint_id
493
+
494
+ :param subquery: Inner query that provides time-bucketed data
495
+ :param aggregation_columns: Dict of {result_column: "AGG(source_column)"} mappings
496
+ :param group_by_column: Column to group by (default: endpoint_id)
497
+ :param order_by_column: Column to order by (default: endpoint_id)
498
+ :return: Complete SQL query string
499
+ """
500
+ # Build the SELECT columns list
501
+ select_columns = [group_by_column] + [
502
+ f"{agg_expr} AS {result_col}"
503
+ for result_col, agg_expr in aggregation_columns.items()
504
+ ]
505
+
506
+ return f"""
507
+ SELECT
508
+ {', '.join(select_columns)}
509
+ FROM ({subquery}) AS time_buckets
510
+ GROUP BY {group_by_column}
511
+ ORDER BY {order_by_column}
512
+ """
513
+
514
+
515
+ class TimescaleDBNaming:
516
+ """Utility class for TimescaleDB table and view naming conventions."""
517
+
518
+ @staticmethod
519
+ def get_agg_table_name(base_name: str, interval: str) -> str:
520
+ """
521
+ Generate aggregate table name with interval.
522
+
523
+ :param base_name: Base table name
524
+ :param interval: Time interval (e.g., '1h', '1d')
525
+ :return: Aggregate table name (e.g., 'metrics_agg_1h')
526
+ """
527
+ return f"{base_name}_agg_{interval}"
528
+
529
+ @staticmethod
530
+ def get_cagg_view_name(base_name: str, interval: str) -> str:
531
+ """
532
+ Generate continuous aggregate view name with interval.
533
+
534
+ :param base_name: Base table name
535
+ :param interval: Time interval (e.g., '1h', '1d')
536
+ :return: Continuous aggregate view name (e.g., 'metrics_cagg_1h')
537
+ """
538
+ return f"{base_name}_cagg_{interval}"
539
+
540
+ @staticmethod
541
+ def get_agg_pattern(base_pattern: str) -> str:
542
+ """
543
+ Generate SQL LIKE pattern for aggregate tables.
544
+
545
+ :param base_pattern: Base pattern (e.g., 'metrics')
546
+ :return: SQL LIKE pattern (e.g., 'metrics_agg_%')
547
+ """
548
+ return f"{base_pattern}_agg_%"
549
+
550
+ @staticmethod
551
+ def get_cagg_pattern(base_pattern: str) -> str:
552
+ """
553
+ Generate SQL LIKE pattern for continuous aggregate views.
554
+
555
+ :param base_pattern: Base pattern (e.g., 'metrics')
556
+ :return: SQL LIKE pattern (e.g., 'metrics_cagg_%')
557
+ """
558
+ return f"{base_pattern}_cagg_%"
559
+
560
+ @staticmethod
561
+ def get_all_aggregate_patterns(base_pattern: str) -> list[str]:
562
+ """
563
+ Generate both aggregate table and continuous aggregate view patterns.
564
+
565
+ :param base_pattern: Base pattern (e.g., 'metrics')
566
+ :return: List of patterns ['metrics_agg_%', 'metrics_cagg_%']
567
+ """
568
+ return [
569
+ TimescaleDBNaming.get_agg_pattern(base_pattern),
570
+ TimescaleDBNaming.get_cagg_pattern(base_pattern),
571
+ ]
572
+
573
+ @staticmethod
574
+ def get_deletion_patterns(base_pattern: str) -> list[str]:
575
+ """
576
+ Generate all patterns needed for table deletion operations.
577
+
578
+ :param base_pattern: Base pattern (e.g., 'metrics')
579
+ :return: List of patterns [base_pattern, 'metrics_agg_%', 'metrics_cagg_%']
580
+ """
581
+ return [
582
+ base_pattern,
583
+ TimescaleDBNaming.get_agg_pattern(base_pattern),
584
+ TimescaleDBNaming.get_cagg_pattern(base_pattern),
585
+ ]
@@ -0,0 +1,73 @@
1
+ # Copyright 2025 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ from datetime import datetime
17
+
18
+ import mlrun.common.schemas.model_monitoring as mm_schemas
19
+ import mlrun.feature_store.steps
20
+ from mlrun.utils import logger
21
+
22
+
23
+ class ProcessBeforeTimescaleDBWriter(mlrun.feature_store.steps.MapClass):
24
+ """
25
+ Process the data before writing to TimescaleDB via the new async writer.
26
+
27
+ This step combines functionality from the existing stream processor
28
+ to create appropriate table names and format data for TimescaleDB writer targets.
29
+
30
+ :returns: Event as a dictionary which will be written into the TimescaleDB Metrics/App Results tables.
31
+ """
32
+
33
+ def do(self, event):
34
+ logger.info("Process event before writing to TimescaleDB writer", event=event)
35
+
36
+ # Extract project from function URI (existing TimescaleDB pattern)
37
+ if mm_schemas.EventFieldType.FUNCTION_URI in event:
38
+ event[mm_schemas.EventFieldType.PROJECT] = event[
39
+ mm_schemas.EventFieldType.FUNCTION_URI
40
+ ].split("/")[0]
41
+
42
+ # Handle custom metrics serialization (existing TimescaleDB pattern)
43
+ event[mm_schemas.EventKeyMetrics.CUSTOM_METRICS] = json.dumps(
44
+ event.get(mm_schemas.EventFieldType.METRICS, {})
45
+ )
46
+
47
+ # Handle time mapping (existing TimescaleDB pattern)
48
+ # Map WHEN field to END_INFER_TIME for predictions data from model serving
49
+ if mm_schemas.StreamProcessingEvent.WHEN in event:
50
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = event[
51
+ mm_schemas.StreamProcessingEvent.WHEN
52
+ ]
53
+ # For non-prediction events, use timestamp as END_INFER_TIME to maintain consistency
54
+ elif mm_schemas.EventFieldType.TIMESTAMP in event:
55
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = event[
56
+ mm_schemas.EventFieldType.TIMESTAMP
57
+ ]
58
+
59
+ # Handle START_INFER_TIME conversion
60
+ if mm_schemas.WriterEvent.START_INFER_TIME in event and isinstance(
61
+ event[mm_schemas.WriterEvent.START_INFER_TIME], str
62
+ ):
63
+ event[mm_schemas.WriterEvent.START_INFER_TIME] = datetime.fromisoformat(
64
+ event[mm_schemas.WriterEvent.START_INFER_TIME]
65
+ )
66
+
67
+ # Create table column identifier
68
+ # TimescaleDB uses endpoint-based table organization
69
+ event[mm_schemas.EventFieldType.TABLE_COLUMN] = (
70
+ f"_{event.get(mm_schemas.EventFieldType.ENDPOINT_ID)}"
71
+ )
72
+
73
+ return event
@@ -44,7 +44,7 @@ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
44
44
  items = {}
45
45
  for k, v in d.items():
46
46
  new_key = norm_key(k)
47
- full_key = f"{parent_key}.{new_key}" if parent_key else new_key
47
+ full_key = f"{parent_key}:{new_key}" if parent_key else new_key
48
48
  if isinstance(v, dict):
49
49
  items.update(flatten_dict(v, full_key))
50
50
  else:
@@ -151,11 +151,9 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
151
151
 
152
152
 
153
153
  class ErrorExtractor(mlrun.feature_store.steps.MapClass):
154
- def __init__(self, **kwargs):
155
- """
156
- Prepare the event for insertion into the errors TSDB table.
157
- """
158
- super().__init__(**kwargs)
154
+ """
155
+ Prepare the event for insertion into the errors TSDB table.
156
+ """
159
157
 
160
158
  def do(self, event):
161
159
  error = event.get("error")