mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (59) hide show
  1. mlrun/alerts/alert.py +30 -27
  2. mlrun/common/constants.py +3 -0
  3. mlrun/common/helpers.py +0 -1
  4. mlrun/common/schemas/alert.py +3 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
  6. mlrun/common/schemas/notification.py +1 -0
  7. mlrun/config.py +1 -1
  8. mlrun/data_types/to_pandas.py +9 -9
  9. mlrun/datastore/alibaba_oss.py +3 -2
  10. mlrun/datastore/azure_blob.py +7 -9
  11. mlrun/datastore/base.py +13 -1
  12. mlrun/datastore/dbfs_store.py +3 -7
  13. mlrun/datastore/filestore.py +1 -3
  14. mlrun/datastore/google_cloud_storage.py +84 -29
  15. mlrun/datastore/redis.py +1 -0
  16. mlrun/datastore/s3.py +3 -2
  17. mlrun/datastore/sources.py +54 -0
  18. mlrun/datastore/storeytargets.py +147 -0
  19. mlrun/datastore/targets.py +76 -122
  20. mlrun/datastore/v3io.py +1 -0
  21. mlrun/db/httpdb.py +6 -1
  22. mlrun/errors.py +8 -0
  23. mlrun/execution.py +7 -0
  24. mlrun/feature_store/api.py +5 -0
  25. mlrun/feature_store/retrieval/job.py +1 -0
  26. mlrun/model.py +24 -3
  27. mlrun/model_monitoring/api.py +10 -2
  28. mlrun/model_monitoring/applications/_application_steps.py +52 -34
  29. mlrun/model_monitoring/applications/context.py +206 -70
  30. mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
  31. mlrun/model_monitoring/controller.py +15 -12
  32. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
  33. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -9
  34. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +46 -10
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -24
  37. mlrun/model_monitoring/helpers.py +54 -18
  38. mlrun/model_monitoring/stream_processing.py +10 -29
  39. mlrun/projects/pipelines.py +19 -30
  40. mlrun/projects/project.py +86 -67
  41. mlrun/run.py +8 -6
  42. mlrun/runtimes/__init__.py +4 -0
  43. mlrun/runtimes/nuclio/api_gateway.py +18 -0
  44. mlrun/runtimes/nuclio/application/application.py +150 -59
  45. mlrun/runtimes/nuclio/function.py +5 -11
  46. mlrun/runtimes/nuclio/serving.py +2 -2
  47. mlrun/runtimes/utils.py +16 -0
  48. mlrun/serving/routers.py +1 -1
  49. mlrun/serving/server.py +19 -5
  50. mlrun/serving/states.py +8 -0
  51. mlrun/serving/v2_serving.py +34 -26
  52. mlrun/utils/helpers.py +33 -2
  53. mlrun/utils/version/version.json +2 -2
  54. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +9 -12
  55. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +59 -58
  56. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
  57. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
  58. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
  59. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ import pandas as pd
20
20
  import sqlalchemy
21
21
  import sqlalchemy.exc
22
22
  import sqlalchemy.orm
23
- from sqlalchemy.engine import make_url
23
+ from sqlalchemy.engine import Engine, make_url
24
24
  from sqlalchemy.sql.elements import BinaryExpression
25
25
 
26
26
  import mlrun.common.model_monitoring.helpers
@@ -61,9 +61,15 @@ class SQLStoreBase(StoreBase):
61
61
  )
62
62
 
63
63
  self._sql_connection_string = kwargs.get("store_connection_string")
64
- self._engine = get_engine(dsn=self._sql_connection_string)
64
+ self._engine = None
65
65
  self._init_tables()
66
66
 
67
+ @property
68
+ def engine(self) -> Engine:
69
+ if not self._engine:
70
+ self._engine = get_engine(dsn=self._sql_connection_string)
71
+ return self._engine
72
+
67
73
  def create_tables(self):
68
74
  self._create_tables_if_not_exist()
69
75
 
@@ -116,7 +122,7 @@ class SQLStoreBase(StoreBase):
116
122
  :param table_name: Target table name.
117
123
  :param event: Event dictionary that will be written into the DB.
118
124
  """
119
- with self._engine.connect() as connection:
125
+ with self.engine.connect() as connection:
120
126
  # Convert the result into a pandas Dataframe and write it into the database
121
127
  event_df = pd.DataFrame([event])
122
128
  event_df.to_sql(table_name, con=connection, index=False, if_exists="append")
@@ -177,7 +183,7 @@ class SQLStoreBase(StoreBase):
177
183
  param table: SQLAlchemy declarative table.
178
184
  :param criteria: A list of binary expressions that filter the query.
179
185
  """
180
- if not self._engine.has_table(table.__tablename__):
186
+ if not self.engine.has_table(table.__tablename__):
181
187
  logger.debug(
182
188
  f"Table {table.__tablename__} does not exist in the database. Skipping deletion."
183
189
  )
@@ -524,9 +530,9 @@ class SQLStoreBase(StoreBase):
524
530
  for table in self._tables:
525
531
  # Create table if not exist. The `metadata` contains the `ModelEndpointsTable`
526
532
  db_name = make_url(self._sql_connection_string).database
527
- if not self._engine.has_table(table):
533
+ if not self.engine.has_table(table):
528
534
  logger.info(f"Creating table {table} on {db_name} db.")
529
- self._tables[table].metadata.create_all(bind=self._engine)
535
+ self._tables[table].metadata.create_all(bind=self.engine)
530
536
  else:
531
537
  logger.info(f"Table {table} already exists on {db_name} db.")
532
538
 
@@ -574,8 +580,11 @@ class SQLStoreBase(StoreBase):
574
580
  """
575
581
  Delete all the model monitoring resources of the project in the SQL tables.
576
582
  """
583
+ logger.debug(
584
+ "Deleting model monitoring endpoints resources from the SQL tables",
585
+ project=self.project,
586
+ )
577
587
  endpoints = self.list_model_endpoints()
578
- logger.debug("Deleting model monitoring resources", project=self.project)
579
588
 
580
589
  for endpoint_dict in endpoints:
581
590
  endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
@@ -612,7 +621,7 @@ class SQLStoreBase(StoreBase):
612
621
 
613
622
  # Note: the block below does not use self._get, as we need here all the
614
623
  # results, not only `one_or_none`.
615
- with sqlalchemy.orm.Session(self._engine) as session:
624
+ with sqlalchemy.orm.Session(self.engine) as session:
616
625
  metric_rows = (
617
626
  session.query(table) # pyright: ignore[reportOptionalCall]
618
627
  .filter(table.endpoint_id == endpoint_id)
@@ -20,6 +20,7 @@ from http import HTTPStatus
20
20
  import v3io.dataplane
21
21
  import v3io.dataplane.output
22
22
  import v3io.dataplane.response
23
+ from v3io.dataplane import Client as V3IOClient
23
24
 
24
25
  import mlrun.common.model_monitoring.helpers
25
26
  import mlrun.common.schemas.model_monitoring as mm_schemas
@@ -34,11 +35,11 @@ fields_to_encode_decode = [
34
35
  ]
35
36
 
36
37
  _METRIC_FIELDS: list[str] = [
37
- mm_schemas.WriterEvent.APPLICATION_NAME,
38
- mm_schemas.MetricData.METRIC_NAME,
39
- mm_schemas.MetricData.METRIC_VALUE,
40
- mm_schemas.WriterEvent.START_INFER_TIME,
41
- mm_schemas.WriterEvent.END_INFER_TIME,
38
+ mm_schemas.WriterEvent.APPLICATION_NAME.value,
39
+ mm_schemas.MetricData.METRIC_NAME.value,
40
+ mm_schemas.MetricData.METRIC_VALUE.value,
41
+ mm_schemas.WriterEvent.START_INFER_TIME.value,
42
+ mm_schemas.WriterEvent.END_INFER_TIME.value,
42
43
  ]
43
44
 
44
45
 
@@ -100,13 +101,18 @@ class KVStoreBase(StoreBase):
100
101
  project: str,
101
102
  ) -> None:
102
103
  super().__init__(project=project)
103
- # Initialize a V3IO client instance
104
- self.client = mlrun.utils.v3io_clients.get_v3io_client(
105
- endpoint=mlrun.mlconf.v3io_api,
106
- )
104
+ self._client = None
107
105
  # Get the KV table path and container
108
106
  self.path, self.container = self._get_path_and_container()
109
107
 
108
+ @property
109
+ def client(self) -> V3IOClient:
110
+ if not self._client:
111
+ self._client = mlrun.utils.v3io_clients.get_v3io_client(
112
+ endpoint=mlrun.mlconf.v3io_api,
113
+ )
114
+ return self._client
115
+
110
116
  def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
111
117
  """
112
118
  Create a new endpoint record in the KV table.
@@ -285,6 +291,10 @@ class KVStoreBase(StoreBase):
285
291
  """
286
292
  Delete all model endpoints resources in V3IO KV.
287
293
  """
294
+ logger.debug(
295
+ "Deleting model monitoring endpoints resources in V3IO KV",
296
+ project=self.project,
297
+ )
288
298
 
289
299
  endpoints = self.list_model_endpoints()
290
300
 
@@ -17,6 +17,8 @@ from dataclasses import dataclass
17
17
  from io import StringIO
18
18
  from typing import Optional, Union
19
19
 
20
+ import taosws
21
+
20
22
  import mlrun.common.schemas.model_monitoring as mm_schemas
21
23
  import mlrun.common.types
22
24
 
@@ -28,6 +30,9 @@ class _TDEngineColumnType:
28
30
  self.data_type = data_type
29
31
  self.length = length
30
32
 
33
+ def values_to_column(self, values):
34
+ raise NotImplementedError()
35
+
31
36
  def __str__(self):
32
37
  if self.length is not None:
33
38
  return f"{self.data_type}({self.length})"
@@ -44,6 +49,26 @@ class _TDEngineColumn(mlrun.common.types.StrEnum):
44
49
  BINARY_10000 = _TDEngineColumnType("BINARY", 10000)
45
50
 
46
51
 
52
+ def values_to_column(values, column_type):
53
+ if column_type == _TDEngineColumn.TIMESTAMP:
54
+ timestamps = [round(timestamp.timestamp() * 1000) for timestamp in values]
55
+ return taosws.millis_timestamps_to_column(timestamps)
56
+ if column_type == _TDEngineColumn.FLOAT:
57
+ return taosws.floats_to_column(values)
58
+ if column_type == _TDEngineColumn.INT:
59
+ return taosws.ints_to_column(values)
60
+ if column_type == _TDEngineColumn.BINARY_40:
61
+ return taosws.binary_to_column(values)
62
+ if column_type == _TDEngineColumn.BINARY_64:
63
+ return taosws.binary_to_column(values)
64
+ if column_type == _TDEngineColumn.BINARY_10000:
65
+ return taosws.binary_to_column(values)
66
+
67
+ raise mlrun.errors.MLRunInvalidArgumentError(
68
+ f"unsupported column type '{column_type}'"
69
+ )
70
+
71
+
47
72
  @dataclass
48
73
  class TDEngineSchema:
49
74
  """
@@ -55,13 +80,14 @@ class TDEngineSchema:
55
80
  def __init__(
56
81
  self,
57
82
  super_table: str,
58
- columns: dict[str, str],
83
+ columns: dict[str, _TDEngineColumn],
59
84
  tags: dict[str, str],
85
+ database: Optional[str] = None,
60
86
  ):
61
87
  self.super_table = super_table
62
88
  self.columns = columns
63
89
  self.tags = tags
64
- self.database = _MODEL_MONITORING_DATABASE
90
+ self.database = database or _MODEL_MONITORING_DATABASE
65
91
 
66
92
  def _create_super_table_query(self) -> str:
67
93
  columns = ", ".join(f"{col} {val}" for col, val in self.columns.items())
@@ -83,11 +109,23 @@ class TDEngineSchema:
83
109
 
84
110
  def _insert_subtable_query(
85
111
  self,
112
+ connection: taosws.Connection,
86
113
  subtable: str,
87
114
  values: dict[str, Union[str, int, float, datetime.datetime]],
88
- ) -> str:
89
- values = ", ".join(f"'{values[val]}'" for val in self.columns)
90
- return f"INSERT INTO {self.database}.{subtable} VALUES ({values});"
115
+ ) -> taosws.TaosStmt:
116
+ stmt = connection.statement()
117
+ question_marks = ", ".join("?" * len(self.columns))
118
+ stmt.prepare(f"INSERT INTO ? VALUES ({question_marks});")
119
+ stmt.set_tbname_tags(subtable, [])
120
+
121
+ bind_params = []
122
+
123
+ for col_name, col_type in self.columns.items():
124
+ val = values[col_name]
125
+ bind_params.append(values_to_column([val], col_type))
126
+
127
+ stmt.bind_param(bind_params)
128
+ return stmt
91
129
 
92
130
  def _delete_subtable_query(
93
131
  self,
@@ -188,53 +226,53 @@ class TDEngineSchema:
188
226
 
189
227
  @dataclass
190
228
  class AppResultTable(TDEngineSchema):
191
- super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
192
- columns = {
193
- mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
194
- mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
195
- mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
196
- mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
197
- mm_schemas.ResultData.CURRENT_STATS: _TDEngineColumn.BINARY_10000,
198
- }
199
-
200
- tags = {
201
- mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
202
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
203
- mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
204
- mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
205
- mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
206
- }
207
- database = _MODEL_MONITORING_DATABASE
229
+ def __init__(self, database: Optional[str] = None):
230
+ super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
231
+ columns = {
232
+ mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
233
+ mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
234
+ mm_schemas.ResultData.RESULT_VALUE: _TDEngineColumn.FLOAT,
235
+ mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
236
+ }
237
+ tags = {
238
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
239
+ mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
240
+ mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
241
+ mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
242
+ mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
243
+ }
244
+ super().__init__(super_table, columns, tags, database)
208
245
 
209
246
 
210
247
  @dataclass
211
248
  class Metrics(TDEngineSchema):
212
- super_table = mm_schemas.TDEngineSuperTables.METRICS
213
- columns = {
214
- mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
215
- mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
216
- mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
217
- }
218
-
219
- tags = {
220
- mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
221
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
222
- mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
223
- mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
224
- }
225
- database = _MODEL_MONITORING_DATABASE
249
+ def __init__(self, database: Optional[str] = None):
250
+ super_table = mm_schemas.TDEngineSuperTables.METRICS
251
+ columns = {
252
+ mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
253
+ mm_schemas.WriterEvent.START_INFER_TIME: _TDEngineColumn.TIMESTAMP,
254
+ mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
255
+ }
256
+ tags = {
257
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
258
+ mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
259
+ mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
260
+ mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
261
+ }
262
+ super().__init__(super_table, columns, tags, database)
226
263
 
227
264
 
228
265
  @dataclass
229
266
  class Predictions(TDEngineSchema):
230
- super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
231
- columns = {
232
- mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
233
- mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
234
- mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
235
- }
236
- tags = {
237
- mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
238
- mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
239
- }
240
- database = _MODEL_MONITORING_DATABASE
267
+ def __init__(self, database: Optional[str] = None):
268
+ super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
269
+ columns = {
270
+ mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
271
+ mm_schemas.EventFieldType.LATENCY: _TDEngineColumn.FLOAT,
272
+ mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
273
+ }
274
+ tags = {
275
+ mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
276
+ mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
277
+ }
278
+ super().__init__(super_table, columns, tags, database)
@@ -47,10 +47,17 @@ class TDEngineConnector(TSDBConnector):
47
47
  )
48
48
  self._tdengine_connection_string = kwargs.get("connection_string")
49
49
  self.database = database
50
- self._connection = self._create_connection()
50
+
51
+ self._connection = None
51
52
  self._init_super_tables()
52
53
 
53
- def _create_connection(self):
54
+ @property
55
+ def connection(self) -> taosws.Connection:
56
+ if not self._connection:
57
+ self._connection = self._create_connection()
58
+ return self._connection
59
+
60
+ def _create_connection(self) -> taosws.Connection:
54
61
  """Establish a connection to the TSDB server."""
55
62
  conn = taosws.connect(self._tdengine_connection_string)
56
63
  try:
@@ -58,15 +65,26 @@ class TDEngineConnector(TSDBConnector):
58
65
  except taosws.QueryError:
59
66
  # Database already exists
60
67
  pass
61
- conn.execute(f"USE {self.database}")
68
+ try:
69
+ conn.execute(f"USE {self.database}")
70
+ except taosws.QueryError as e:
71
+ raise mlrun.errors.MLRunTSDBConnectionFailure(
72
+ f"Failed to use TDEngine database {self.database}, {mlrun.errors.err_to_str(e)}"
73
+ )
62
74
  return conn
63
75
 
64
76
  def _init_super_tables(self):
65
77
  """Initialize the super tables for the TSDB."""
66
78
  self.tables = {
67
- mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(),
68
- mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(),
69
- mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(),
79
+ mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
80
+ self.database
81
+ ),
82
+ mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
83
+ self.database
84
+ ),
85
+ mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
86
+ self.database
87
+ ),
70
88
  }
71
89
 
72
90
  def create_tables(self):
@@ -97,6 +115,7 @@ class TDEngineConnector(TSDBConnector):
97
115
  table_name = (
98
116
  f"{table_name}_" f"{event[mm_schemas.ResultData.RESULT_NAME]}"
99
117
  ).replace("-", "_")
118
+ event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
100
119
 
101
120
  else:
102
121
  # Write a new metric
@@ -105,14 +124,30 @@ class TDEngineConnector(TSDBConnector):
105
124
  f"{table_name}_" f"{event[mm_schemas.MetricData.METRIC_NAME]}"
106
125
  ).replace("-", "_")
107
126
 
127
+ # Convert the datetime strings to datetime objects
128
+ event[mm_schemas.WriterEvent.END_INFER_TIME] = self._convert_to_datetime(
129
+ val=event[mm_schemas.WriterEvent.END_INFER_TIME]
130
+ )
131
+ event[mm_schemas.WriterEvent.START_INFER_TIME] = self._convert_to_datetime(
132
+ val=event[mm_schemas.WriterEvent.START_INFER_TIME]
133
+ )
134
+
108
135
  create_table_query = table._create_subtable_query(
109
136
  subtable=table_name, values=event
110
137
  )
111
138
  self._connection.execute(create_table_query)
112
- insert_table_query = table._insert_subtable_query(
113
- subtable=table_name, values=event
139
+
140
+ insert_statement = table._insert_subtable_query(
141
+ self._connection,
142
+ subtable=table_name,
143
+ values=event,
114
144
  )
115
- self._connection.execute(insert_table_query)
145
+ insert_statement.add_batch()
146
+ insert_statement.execute()
147
+
148
+ @staticmethod
149
+ def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
150
+ return datetime.fromisoformat(val) if isinstance(val, str) else val
116
151
 
117
152
  def apply_monitoring_stream_steps(self, graph):
118
153
  """
@@ -148,7 +183,8 @@ class TDEngineConnector(TSDBConnector):
148
183
  mm_schemas.EventFieldType.PROJECT,
149
184
  mm_schemas.EventFieldType.ENDPOINT_ID,
150
185
  ],
151
- max_events=10,
186
+ max_events=1000,
187
+ flush_after_seconds=30,
152
188
  )
153
189
 
154
190
  apply_process_before_tsdb()
@@ -24,6 +24,7 @@ import mlrun.common.model_monitoring
24
24
  import mlrun.common.schemas.model_monitoring as mm_schemas
25
25
  import mlrun.feature_store.steps
26
26
  import mlrun.utils.v3io_clients
27
+ from mlrun.common.schemas import EventFieldType
27
28
  from mlrun.model_monitoring.db import TSDBConnector
28
29
  from mlrun.model_monitoring.helpers import get_invocations_fqn
29
30
  from mlrun.utils import logger
@@ -64,14 +65,17 @@ class V3IOTSDBConnector(TSDBConnector):
64
65
  self.container = container
65
66
 
66
67
  self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
67
- self._frames_client: v3io_frames.client.ClientBase = (
68
- self._get_v3io_frames_client(self.container)
69
- )
70
-
68
+ self._frames_client: Optional[v3io_frames.client.ClientBase] = None
71
69
  self._init_tables_path()
70
+ self._create_table = create_table
72
71
 
73
- if create_table:
74
- self.create_tables()
72
+ @property
73
+ def frames_client(self) -> v3io_frames.client.ClientBase:
74
+ if not self._frames_client:
75
+ self._frames_client = self._get_v3io_frames_client(self.container)
76
+ if self._create_table:
77
+ self.create_tables()
78
+ return self._frames_client
75
79
 
76
80
  def _init_tables_path(self):
77
81
  self.tables = {}
@@ -151,7 +155,7 @@ class V3IOTSDBConnector(TSDBConnector):
151
155
  for table_name in application_tables:
152
156
  logger.info("Creating table in V3IO TSDB", table_name=table_name)
153
157
  table = self.tables[table_name]
154
- self._frames_client.create(
158
+ self.frames_client.create(
155
159
  backend=_TSDB_BE,
156
160
  table=table,
157
161
  if_exists=v3io_frames.IGNORE,
@@ -161,8 +165,9 @@ class V3IOTSDBConnector(TSDBConnector):
161
165
  def apply_monitoring_stream_steps(
162
166
  self,
163
167
  graph,
164
- tsdb_batching_max_events: int = 10,
165
- tsdb_batching_timeout_secs: int = 300,
168
+ tsdb_batching_max_events: int = 1000,
169
+ tsdb_batching_timeout_secs: int = 30,
170
+ sample_window: int = 10,
166
171
  ):
167
172
  """
168
173
  Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -173,6 +178,7 @@ class V3IOTSDBConnector(TSDBConnector):
173
178
  - endpoint_features (Prediction and feature names and values)
174
179
  - custom_metrics (user-defined metrics)
175
180
  """
181
+
176
182
  # Write latency per prediction, labeled by endpoint ID only
177
183
  graph.add_step(
178
184
  "storey.TSDBTarget",
@@ -197,17 +203,23 @@ class V3IOTSDBConnector(TSDBConnector):
197
203
  key=mm_schemas.EventFieldType.ENDPOINT_ID,
198
204
  )
199
205
 
206
+ # Emits the event in window size of events based on sample_window size (10 by default)
207
+ graph.add_step(
208
+ "storey.steps.SampleWindow",
209
+ name="sample",
210
+ after="Rename",
211
+ window_size=sample_window,
212
+ key=EventFieldType.ENDPOINT_ID,
213
+ )
214
+
200
215
  # Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
201
216
  # stats and details about the events
202
217
 
203
- def apply_process_before_tsdb():
204
- graph.add_step(
205
- "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
206
- name="ProcessBeforeTSDB",
207
- after="sample",
208
- )
209
-
210
- apply_process_before_tsdb()
218
+ graph.add_step(
219
+ "mlrun.model_monitoring.db.tsdb.v3io.stream_graph_steps.ProcessBeforeTSDB",
220
+ name="ProcessBeforeTSDB",
221
+ after="sample",
222
+ )
211
223
 
212
224
  # Unpacked keys from each dictionary and write to TSDB target
213
225
  def apply_filter_and_unpacked_keys(name, keys):
@@ -273,8 +285,8 @@ class V3IOTSDBConnector(TSDBConnector):
273
285
  def handle_model_error(
274
286
  self,
275
287
  graph,
276
- tsdb_batching_max_events: int = 10,
277
- tsdb_batching_timeout_secs: int = 60,
288
+ tsdb_batching_max_events: int = 1000,
289
+ tsdb_batching_timeout_secs: int = 30,
278
290
  **kwargs,
279
291
  ) -> None:
280
292
  graph.add_step(
@@ -326,12 +338,14 @@ class V3IOTSDBConnector(TSDBConnector):
326
338
  elif kind == mm_schemas.WriterEventKind.RESULT:
327
339
  table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
328
340
  index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
329
- del event[mm_schemas.ResultData.RESULT_EXTRA_DATA]
341
+ event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
342
+ # TODO: remove this when extra data is supported (ML-7460)
343
+ event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
330
344
  else:
331
345
  raise ValueError(f"Invalid {kind = }")
332
346
 
333
347
  try:
334
- self._frames_client.write(
348
+ self.frames_client.write(
335
349
  backend=_TSDB_BE,
336
350
  table=table,
337
351
  dfs=pd.DataFrame.from_records([event]),
@@ -358,7 +372,7 @@ class V3IOTSDBConnector(TSDBConnector):
358
372
  tables = mm_schemas.V3IOTSDBTables.list()
359
373
  for table_to_delete in tables:
360
374
  try:
361
- self._frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
375
+ self.frames_client.delete(backend=_TSDB_BE, table=table_to_delete)
362
376
  except v3io_frames.DeleteError as e:
363
377
  logger.warning(
364
378
  f"Failed to delete TSDB table '{table}'",
@@ -474,7 +488,7 @@ class V3IOTSDBConnector(TSDBConnector):
474
488
  aggregators = ",".join(agg_funcs) if agg_funcs else None
475
489
  table_path = self.tables[table]
476
490
  try:
477
- df = self._frames_client.read(
491
+ df = self.frames_client.read(
478
492
  backend=_TSDB_BE,
479
493
  table=table_path,
480
494
  start=start,
@@ -577,7 +591,7 @@ class V3IOTSDBConnector(TSDBConnector):
577
591
 
578
592
  logger.debug("Querying V3IO TSDB", query=query)
579
593
 
580
- df: pd.DataFrame = self._frames_client.read(
594
+ df: pd.DataFrame = self.frames_client.read(
581
595
  backend=_TSDB_BE,
582
596
  start=start,
583
597
  end=end,