mlrun 1.7.0rc57__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/formatters/feature_set.py +12 -1
- mlrun/config.py +54 -3
- mlrun/datastore/__init__.py +2 -2
- mlrun/db/httpdb.py +3 -1
- mlrun/features.py +2 -1
- mlrun/model_monitoring/applications/_application_steps.py +12 -10
- mlrun/model_monitoring/applications/evidently_base.py +1 -1
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +30 -11
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +45 -30
- mlrun/platforms/iguazio.py +46 -26
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/pipelines.py +184 -55
- mlrun/projects/project.py +15 -0
- mlrun/runtimes/nuclio/serving.py +1 -1
- mlrun/serving/routers.py +10 -1
- mlrun/serving/states.py +4 -2
- mlrun/serving/v2_serving.py +59 -23
- mlrun/utils/helpers.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc57.dist-info → mlrun-1.7.1.dist-info}/METADATA +186 -186
- {mlrun-1.7.0rc57.dist-info → mlrun-1.7.1.dist-info}/RECORD +26 -26
- {mlrun-1.7.0rc57.dist-info → mlrun-1.7.1.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc57.dist-info → mlrun-1.7.1.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc57.dist-info → mlrun-1.7.1.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc57.dist-info → mlrun-1.7.1.dist-info}/top_level.txt +0 -0
|
@@ -28,6 +28,17 @@ class FeatureSetFormat(ObjectFormat, mlrun.common.types.StrEnum):
|
|
|
28
28
|
return {
|
|
29
29
|
FeatureSetFormat.full: None,
|
|
30
30
|
FeatureSetFormat.minimal: FeatureSetFormat.filter_obj_method(
|
|
31
|
-
[
|
|
31
|
+
[
|
|
32
|
+
"metadata.name",
|
|
33
|
+
"metadata.project",
|
|
34
|
+
"metadata.tag",
|
|
35
|
+
"metadata.uid",
|
|
36
|
+
"metadata.labels",
|
|
37
|
+
"spec.entities",
|
|
38
|
+
"spec.description",
|
|
39
|
+
"spec.targets",
|
|
40
|
+
"spec.engine", # It's not needed by the UI, but we override it anyway to storey if empty
|
|
41
|
+
"status.state",
|
|
42
|
+
]
|
|
32
43
|
),
|
|
33
44
|
}[_format]
|
mlrun/config.py
CHANGED
|
@@ -49,6 +49,7 @@ _load_lock = Lock()
|
|
|
49
49
|
_none_type = type(None)
|
|
50
50
|
default_env_file = os.getenv("MLRUN_DEFAULT_ENV_FILE", "~/.mlrun.env")
|
|
51
51
|
|
|
52
|
+
|
|
52
53
|
default_config = {
|
|
53
54
|
"namespace": "", # default kubernetes namespace
|
|
54
55
|
"kubernetes": {
|
|
@@ -532,8 +533,55 @@ default_config = {
|
|
|
532
533
|
},
|
|
533
534
|
},
|
|
534
535
|
"model_endpoint_monitoring": {
|
|
535
|
-
"
|
|
536
|
-
|
|
536
|
+
"serving_stream": {
|
|
537
|
+
"v3io": {
|
|
538
|
+
"shard_count": 2,
|
|
539
|
+
"retention_period_hours": 24,
|
|
540
|
+
"num_workers": 1,
|
|
541
|
+
"min_replicas": 2,
|
|
542
|
+
"max_replicas": 2,
|
|
543
|
+
},
|
|
544
|
+
"kafka": {
|
|
545
|
+
"partition_count": 8,
|
|
546
|
+
"replication_factor": 1,
|
|
547
|
+
"num_workers": 2,
|
|
548
|
+
"min_replicas": 1,
|
|
549
|
+
"max_replicas": 4,
|
|
550
|
+
},
|
|
551
|
+
},
|
|
552
|
+
"application_stream_args": {
|
|
553
|
+
"v3io": {
|
|
554
|
+
"shard_count": 1,
|
|
555
|
+
"retention_period_hours": 24,
|
|
556
|
+
"num_workers": 1,
|
|
557
|
+
"min_replicas": 1,
|
|
558
|
+
"max_replicas": 1,
|
|
559
|
+
},
|
|
560
|
+
"kafka": {
|
|
561
|
+
"partition_count": 1,
|
|
562
|
+
"replication_factor": 1,
|
|
563
|
+
"num_workers": 1,
|
|
564
|
+
"min_replicas": 1,
|
|
565
|
+
"max_replicas": 1,
|
|
566
|
+
},
|
|
567
|
+
},
|
|
568
|
+
"writer_stream_args": {
|
|
569
|
+
"v3io": {
|
|
570
|
+
"shard_count": 1,
|
|
571
|
+
"retention_period_hours": 24,
|
|
572
|
+
"num_workers": 1,
|
|
573
|
+
"min_replicas": 1,
|
|
574
|
+
"max_replicas": 1,
|
|
575
|
+
},
|
|
576
|
+
"kafka": {
|
|
577
|
+
"partition_count": 1,
|
|
578
|
+
# TODO: add retention period configuration
|
|
579
|
+
"replication_factor": 1,
|
|
580
|
+
"num_workers": 1,
|
|
581
|
+
"min_replicas": 1,
|
|
582
|
+
"max_replicas": 1,
|
|
583
|
+
},
|
|
584
|
+
},
|
|
537
585
|
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
538
586
|
# stream, and endpoints.
|
|
539
587
|
"store_prefixes": {
|
|
@@ -556,6 +604,10 @@ default_config = {
|
|
|
556
604
|
"tsdb_connection": "",
|
|
557
605
|
# See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
|
|
558
606
|
"stream_connection": "",
|
|
607
|
+
"tdengine": {
|
|
608
|
+
"timeout": 10,
|
|
609
|
+
"retries": 1,
|
|
610
|
+
},
|
|
559
611
|
},
|
|
560
612
|
"secret_stores": {
|
|
561
613
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -746,7 +798,6 @@ default_config = {
|
|
|
746
798
|
"request_timeout": 5,
|
|
747
799
|
},
|
|
748
800
|
}
|
|
749
|
-
|
|
750
801
|
_is_running_as_api = None
|
|
751
802
|
|
|
752
803
|
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -131,9 +131,9 @@ class _DummyStream:
|
|
|
131
131
|
def __init__(self, event_list=None, **kwargs):
|
|
132
132
|
self.event_list = event_list or []
|
|
133
133
|
|
|
134
|
-
def push(self, data):
|
|
134
|
+
def push(self, data, **kwargs):
|
|
135
135
|
if not isinstance(data, list):
|
|
136
136
|
data = [data]
|
|
137
137
|
for item in data:
|
|
138
|
-
logger.info(f"dummy stream got event: {item}")
|
|
138
|
+
logger.info(f"dummy stream got event: {item}, kwargs={kwargs}")
|
|
139
139
|
self.event_list.append(item)
|
mlrun/db/httpdb.py
CHANGED
|
@@ -1075,7 +1075,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1075
1075
|
category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
|
|
1076
1076
|
tree: str = None,
|
|
1077
1077
|
producer_uri: str = None,
|
|
1078
|
-
format_:
|
|
1078
|
+
format_: Optional[
|
|
1079
|
+
mlrun.common.formatters.ArtifactFormat
|
|
1080
|
+
] = mlrun.common.formatters.ArtifactFormat.full,
|
|
1079
1081
|
limit: int = None,
|
|
1080
1082
|
) -> ArtifactList:
|
|
1081
1083
|
"""List artifacts filtered by various parameters.
|
mlrun/features.py
CHANGED
|
@@ -100,7 +100,8 @@ class Feature(ModelObj):
|
|
|
100
100
|
:param name: name of the feature
|
|
101
101
|
:param validator: feature validation policy
|
|
102
102
|
:param default: default value
|
|
103
|
-
:param labels: a set of key/value labels (tags)
|
|
103
|
+
:param labels: a set of key/value labels (tags). Labels can be used to filter featues, for example,
|
|
104
|
+
in the UI Feature store page.
|
|
104
105
|
"""
|
|
105
106
|
self.name = name or ""
|
|
106
107
|
if isinstance(value_type, ValueType):
|
|
@@ -162,10 +162,17 @@ class _ApplicationErrorHandler(StepToDict):
|
|
|
162
162
|
:param event: Application event.
|
|
163
163
|
"""
|
|
164
164
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
165
|
+
error_data = {
|
|
166
|
+
"Endpoint ID": event.body.endpoint_id,
|
|
167
|
+
"Application Class": event.body.application_name,
|
|
168
|
+
"Error": "".join(
|
|
169
|
+
traceback.format_exception(None, event.error, event.error.__traceback__)
|
|
170
|
+
),
|
|
171
|
+
"Timestamp": event.timestamp,
|
|
172
|
+
}
|
|
173
|
+
logger.error("Error in application step", **error_data)
|
|
174
|
+
|
|
175
|
+
error_data["Error"] = event.error
|
|
169
176
|
|
|
170
177
|
event_data = alert_objects.Event(
|
|
171
178
|
kind=alert_objects.EventKind.MM_APP_FAILED,
|
|
@@ -174,12 +181,7 @@ class _ApplicationErrorHandler(StepToDict):
|
|
|
174
181
|
project=self.project,
|
|
175
182
|
ids=[f"{self.project}_{event.body.application_name}"],
|
|
176
183
|
),
|
|
177
|
-
value_dict=
|
|
178
|
-
"Error": event.error,
|
|
179
|
-
"Timestamp": event.timestamp,
|
|
180
|
-
"Application Class": event.body.application_name,
|
|
181
|
-
"Endpoint ID": event.body.endpoint_id,
|
|
182
|
-
},
|
|
184
|
+
value_dict=error_data,
|
|
183
185
|
)
|
|
184
186
|
|
|
185
187
|
mlrun.get_run_db().generate_event(
|
|
@@ -23,7 +23,7 @@ import mlrun.model_monitoring.applications.base as mm_base
|
|
|
23
23
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
24
24
|
from mlrun.errors import MLRunIncompatibleVersionError
|
|
25
25
|
|
|
26
|
-
SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.
|
|
26
|
+
SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.39")
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
|
|
@@ -82,9 +82,10 @@ class TDEngineSchema:
|
|
|
82
82
|
super_table: str,
|
|
83
83
|
columns: dict[str, _TDEngineColumn],
|
|
84
84
|
tags: dict[str, str],
|
|
85
|
+
project: str,
|
|
85
86
|
database: Optional[str] = None,
|
|
86
87
|
):
|
|
87
|
-
self.super_table = super_table
|
|
88
|
+
self.super_table = f"{super_table}_{project.replace('-', '_')}"
|
|
88
89
|
self.columns = columns
|
|
89
90
|
self.tags = tags
|
|
90
91
|
self.database = database or _MODEL_MONITORING_DATABASE
|
|
@@ -148,6 +149,9 @@ class TDEngineSchema:
|
|
|
148
149
|
) -> str:
|
|
149
150
|
return f"DROP TABLE if EXISTS {self.database}.{subtable};"
|
|
150
151
|
|
|
152
|
+
def drop_supertable_query(self) -> str:
|
|
153
|
+
return f"DROP STABLE if EXISTS {self.database}.{self.super_table};"
|
|
154
|
+
|
|
151
155
|
def _get_subtables_query(
|
|
152
156
|
self,
|
|
153
157
|
values: dict[str, Union[str, int, float, datetime.datetime]],
|
|
@@ -159,7 +163,7 @@ class TDEngineSchema:
|
|
|
159
163
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
160
164
|
f"values must contain at least one tag: {self.tags.keys()}"
|
|
161
165
|
)
|
|
162
|
-
return f"SELECT tbname FROM {self.database}.{self.super_table} WHERE {values};"
|
|
166
|
+
return f"SELECT DISTINCT tbname FROM {self.database}.{self.super_table} WHERE {values};"
|
|
163
167
|
|
|
164
168
|
@staticmethod
|
|
165
169
|
def _get_records_query(
|
|
@@ -227,7 +231,7 @@ class TDEngineSchema:
|
|
|
227
231
|
|
|
228
232
|
@dataclass
|
|
229
233
|
class AppResultTable(TDEngineSchema):
|
|
230
|
-
def __init__(self, database: Optional[str] = None):
|
|
234
|
+
def __init__(self, project: str, database: Optional[str] = None):
|
|
231
235
|
super_table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
232
236
|
columns = {
|
|
233
237
|
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
@@ -236,18 +240,23 @@ class AppResultTable(TDEngineSchema):
|
|
|
236
240
|
mm_schemas.ResultData.RESULT_STATUS: _TDEngineColumn.INT,
|
|
237
241
|
}
|
|
238
242
|
tags = {
|
|
239
|
-
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
240
243
|
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
241
244
|
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
242
245
|
mm_schemas.ResultData.RESULT_NAME: _TDEngineColumn.BINARY_64,
|
|
243
246
|
mm_schemas.ResultData.RESULT_KIND: _TDEngineColumn.INT,
|
|
244
247
|
}
|
|
245
|
-
super().__init__(
|
|
248
|
+
super().__init__(
|
|
249
|
+
super_table=super_table,
|
|
250
|
+
columns=columns,
|
|
251
|
+
tags=tags,
|
|
252
|
+
database=database,
|
|
253
|
+
project=project,
|
|
254
|
+
)
|
|
246
255
|
|
|
247
256
|
|
|
248
257
|
@dataclass
|
|
249
258
|
class Metrics(TDEngineSchema):
|
|
250
|
-
def __init__(self, database: Optional[str] = None):
|
|
259
|
+
def __init__(self, project: str, database: Optional[str] = None):
|
|
251
260
|
super_table = mm_schemas.TDEngineSuperTables.METRICS
|
|
252
261
|
columns = {
|
|
253
262
|
mm_schemas.WriterEvent.END_INFER_TIME: _TDEngineColumn.TIMESTAMP,
|
|
@@ -255,17 +264,22 @@ class Metrics(TDEngineSchema):
|
|
|
255
264
|
mm_schemas.MetricData.METRIC_VALUE: _TDEngineColumn.FLOAT,
|
|
256
265
|
}
|
|
257
266
|
tags = {
|
|
258
|
-
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
259
267
|
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
260
268
|
mm_schemas.WriterEvent.APPLICATION_NAME: _TDEngineColumn.BINARY_64,
|
|
261
269
|
mm_schemas.MetricData.METRIC_NAME: _TDEngineColumn.BINARY_64,
|
|
262
270
|
}
|
|
263
|
-
super().__init__(
|
|
271
|
+
super().__init__(
|
|
272
|
+
super_table=super_table,
|
|
273
|
+
columns=columns,
|
|
274
|
+
tags=tags,
|
|
275
|
+
database=database,
|
|
276
|
+
project=project,
|
|
277
|
+
)
|
|
264
278
|
|
|
265
279
|
|
|
266
280
|
@dataclass
|
|
267
281
|
class Predictions(TDEngineSchema):
|
|
268
|
-
def __init__(self, database: Optional[str] = None):
|
|
282
|
+
def __init__(self, project: str, database: Optional[str] = None):
|
|
269
283
|
super_table = mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
270
284
|
columns = {
|
|
271
285
|
mm_schemas.EventFieldType.TIME: _TDEngineColumn.TIMESTAMP,
|
|
@@ -273,7 +287,12 @@ class Predictions(TDEngineSchema):
|
|
|
273
287
|
mm_schemas.EventKeyMetrics.CUSTOM_METRICS: _TDEngineColumn.BINARY_10000,
|
|
274
288
|
}
|
|
275
289
|
tags = {
|
|
276
|
-
mm_schemas.EventFieldType.PROJECT: _TDEngineColumn.BINARY_64,
|
|
277
290
|
mm_schemas.WriterEvent.ENDPOINT_ID: _TDEngineColumn.BINARY_64,
|
|
278
291
|
}
|
|
279
|
-
super().__init__(
|
|
292
|
+
super().__init__(
|
|
293
|
+
super_table=super_table,
|
|
294
|
+
columns=columns,
|
|
295
|
+
tags=tags,
|
|
296
|
+
database=database,
|
|
297
|
+
project=project,
|
|
298
|
+
)
|
|
@@ -56,6 +56,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
56
56
|
self._connection = None
|
|
57
57
|
self._init_super_tables()
|
|
58
58
|
|
|
59
|
+
self._timeout = mlrun.mlconf.model_endpoint_monitoring.tdengine.timeout
|
|
60
|
+
self._retries = mlrun.mlconf.model_endpoint_monitoring.tdengine.retries
|
|
61
|
+
|
|
59
62
|
@property
|
|
60
63
|
def connection(self) -> TDEngineConnection:
|
|
61
64
|
if not self._connection:
|
|
@@ -66,7 +69,11 @@ class TDEngineConnector(TSDBConnector):
|
|
|
66
69
|
"""Establish a connection to the TSDB server."""
|
|
67
70
|
logger.debug("Creating a new connection to TDEngine", project=self.project)
|
|
68
71
|
conn = TDEngineConnection(self._tdengine_connection_string)
|
|
69
|
-
conn.run(
|
|
72
|
+
conn.run(
|
|
73
|
+
statements=f"CREATE DATABASE IF NOT EXISTS {self.database}",
|
|
74
|
+
timeout=self._timeout,
|
|
75
|
+
retries=self._retries,
|
|
76
|
+
)
|
|
70
77
|
conn.prefix_statements = [f"USE {self.database}"]
|
|
71
78
|
logger.debug("Connected to TDEngine", project=self.project)
|
|
72
79
|
return conn
|
|
@@ -75,13 +82,13 @@ class TDEngineConnector(TSDBConnector):
|
|
|
75
82
|
"""Initialize the super tables for the TSDB."""
|
|
76
83
|
self.tables = {
|
|
77
84
|
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
78
|
-
self.database
|
|
85
|
+
project=self.project, database=self.database
|
|
79
86
|
),
|
|
80
87
|
mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
|
|
81
|
-
self.database
|
|
88
|
+
project=self.project, database=self.database
|
|
82
89
|
),
|
|
83
90
|
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
84
|
-
self.database
|
|
91
|
+
project=self.project, database=self.database
|
|
85
92
|
),
|
|
86
93
|
}
|
|
87
94
|
|
|
@@ -89,7 +96,11 @@ class TDEngineConnector(TSDBConnector):
|
|
|
89
96
|
"""Create TDEngine supertables."""
|
|
90
97
|
for table in self.tables:
|
|
91
98
|
create_table_query = self.tables[table]._create_super_table_query()
|
|
92
|
-
self.connection.run(
|
|
99
|
+
self.connection.run(
|
|
100
|
+
statements=create_table_query,
|
|
101
|
+
timeout=self._timeout,
|
|
102
|
+
retries=self._retries,
|
|
103
|
+
)
|
|
93
104
|
|
|
94
105
|
def write_application_event(
|
|
95
106
|
self,
|
|
@@ -101,11 +112,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
101
112
|
"""
|
|
102
113
|
|
|
103
114
|
table_name = (
|
|
104
|
-
f"{self.project}_"
|
|
105
115
|
f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
|
|
106
|
-
f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}
|
|
116
|
+
f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
|
|
107
117
|
)
|
|
108
|
-
event[mm_schemas.EventFieldType.PROJECT] = self.project
|
|
109
118
|
|
|
110
119
|
if kind == mm_schemas.WriterEventKind.RESULT:
|
|
111
120
|
# Write a new result
|
|
@@ -145,7 +154,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
145
154
|
statements=[
|
|
146
155
|
create_table_sql,
|
|
147
156
|
insert_statement,
|
|
148
|
-
]
|
|
157
|
+
],
|
|
158
|
+
timeout=self._timeout,
|
|
159
|
+
retries=self._retries,
|
|
149
160
|
)
|
|
150
161
|
|
|
151
162
|
@staticmethod
|
|
@@ -174,7 +185,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
174
185
|
name=name,
|
|
175
186
|
after=after,
|
|
176
187
|
url=self._tdengine_connection_string,
|
|
177
|
-
supertable=
|
|
188
|
+
supertable=self.tables[
|
|
189
|
+
mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
190
|
+
].super_table,
|
|
178
191
|
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
179
192
|
time_col=mm_schemas.EventFieldType.TIME,
|
|
180
193
|
database=self.database,
|
|
@@ -207,17 +220,24 @@ class TDEngineConnector(TSDBConnector):
|
|
|
207
220
|
"Deleting all project resources using the TDEngine connector",
|
|
208
221
|
project=self.project,
|
|
209
222
|
)
|
|
223
|
+
drop_statements = []
|
|
210
224
|
for table in self.tables:
|
|
211
|
-
|
|
212
|
-
|
|
225
|
+
drop_statements.append(self.tables[table].drop_supertable_query())
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
self.connection.run(
|
|
229
|
+
statements=drop_statements,
|
|
230
|
+
timeout=self._timeout,
|
|
231
|
+
retries=self._retries,
|
|
232
|
+
)
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.warning(
|
|
235
|
+
"Failed to drop TDEngine tables. You may need to drop them manually. "
|
|
236
|
+
"These can be found under the following supertables: app_results, "
|
|
237
|
+
"metrics, and predictions.",
|
|
238
|
+
project=self.project,
|
|
239
|
+
error=mlrun.errors.err_to_str(e),
|
|
213
240
|
)
|
|
214
|
-
subtables = self.connection.run(query=get_subtable_names_query).data
|
|
215
|
-
drop_statements = []
|
|
216
|
-
for subtable in subtables:
|
|
217
|
-
drop_statements.append(
|
|
218
|
-
self.tables[table]._drop_subtable_query(subtable=subtable[0])
|
|
219
|
-
)
|
|
220
|
-
self.connection.run(statements=drop_statements)
|
|
221
241
|
logger.debug(
|
|
222
242
|
"Deleted all project resources using the TDEngine connector",
|
|
223
243
|
project=self.project,
|
|
@@ -269,13 +289,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
269
289
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
270
290
|
"""
|
|
271
291
|
|
|
272
|
-
project_condition = f"project = '{self.project}'"
|
|
273
|
-
filter_query = (
|
|
274
|
-
f"({filter_query}) AND ({project_condition})"
|
|
275
|
-
if filter_query
|
|
276
|
-
else project_condition
|
|
277
|
-
)
|
|
278
|
-
|
|
279
292
|
full_query = tdengine_schemas.TDEngineSchema._get_records_query(
|
|
280
293
|
table=table,
|
|
281
294
|
start=start,
|
|
@@ -291,7 +304,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
291
304
|
)
|
|
292
305
|
logger.debug("Querying TDEngine", query=full_query)
|
|
293
306
|
try:
|
|
294
|
-
query_result = self.connection.run(
|
|
307
|
+
query_result = self.connection.run(
|
|
308
|
+
query=full_query, timeout=self._timeout, retries=self._retries
|
|
309
|
+
)
|
|
295
310
|
except taosws.QueryError as e:
|
|
296
311
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
297
312
|
f"Failed to query table {table} in database {self.database}, {str(e)}"
|
|
@@ -325,12 +340,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
325
340
|
timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
|
|
326
341
|
columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
327
342
|
if type == "metrics":
|
|
328
|
-
table = mm_schemas.TDEngineSuperTables.METRICS
|
|
343
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
329
344
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
330
345
|
columns += [name, mm_schemas.MetricData.METRIC_VALUE]
|
|
331
346
|
df_handler = self.df_to_metrics_values
|
|
332
347
|
elif type == "results":
|
|
333
|
-
table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
348
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
|
|
334
349
|
name = mm_schemas.ResultData.RESULT_NAME
|
|
335
350
|
columns += [
|
|
336
351
|
name,
|
|
@@ -396,7 +411,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
396
411
|
"both or neither of `aggregation_window` and `agg_funcs` must be provided"
|
|
397
412
|
)
|
|
398
413
|
df = self._get_records(
|
|
399
|
-
table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
|
|
414
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
400
415
|
start=start,
|
|
401
416
|
end=end,
|
|
402
417
|
columns=[mm_schemas.EventFieldType.LATENCY],
|
mlrun/platforms/iguazio.py
CHANGED
|
@@ -97,34 +97,43 @@ class OutputStream:
|
|
|
97
97
|
|
|
98
98
|
self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
|
|
99
99
|
self._container, self._stream_path = split_path(stream_path)
|
|
100
|
+
self._shards = shards
|
|
101
|
+
self._retention_in_hours = retention_in_hours
|
|
102
|
+
self._create = create
|
|
103
|
+
self._endpoint = endpoint
|
|
100
104
|
self._mock = mock
|
|
101
105
|
self._mock_queue = []
|
|
102
106
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
107
|
+
def create_stream(self):
|
|
108
|
+
# this import creates an import loop via the utils module, so putting it in execution path
|
|
109
|
+
from mlrun.utils.helpers import logger
|
|
110
|
+
|
|
111
|
+
logger.debug(
|
|
112
|
+
"Creating output stream",
|
|
113
|
+
endpoint=self._endpoint,
|
|
114
|
+
container=self._container,
|
|
115
|
+
stream_path=self._stream_path,
|
|
116
|
+
shards=self._shards,
|
|
117
|
+
retention_in_hours=self._retention_in_hours,
|
|
118
|
+
)
|
|
119
|
+
response = self._v3io_client.stream.create(
|
|
120
|
+
container=self._container,
|
|
121
|
+
stream_path=self._stream_path,
|
|
122
|
+
shard_count=self._shards or 1,
|
|
123
|
+
retention_period_hours=self._retention_in_hours or 24,
|
|
124
|
+
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
125
|
+
)
|
|
126
|
+
if not (response.status_code == 400 and "ResourceInUse" in str(response.body)):
|
|
127
|
+
response.raise_for_status([409, 204])
|
|
106
128
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
)
|
|
115
|
-
response = self._v3io_client.stream.create(
|
|
116
|
-
container=self._container,
|
|
117
|
-
stream_path=self._stream_path,
|
|
118
|
-
shard_count=shards or 1,
|
|
119
|
-
retention_period_hours=retention_in_hours or 24,
|
|
120
|
-
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
121
|
-
)
|
|
122
|
-
if not (
|
|
123
|
-
response.status_code == 400 and "ResourceInUse" in str(response.body)
|
|
124
|
-
):
|
|
125
|
-
response.raise_for_status([409, 204])
|
|
129
|
+
def _lazy_init(self):
|
|
130
|
+
if self._create and not self._mock:
|
|
131
|
+
self._create = False
|
|
132
|
+
self.create_stream()
|
|
133
|
+
|
|
134
|
+
def push(self, data, partition_key=None):
|
|
135
|
+
self._lazy_init()
|
|
126
136
|
|
|
127
|
-
def push(self, data):
|
|
128
137
|
def dump_record(rec):
|
|
129
138
|
if not isinstance(rec, (str, bytes)):
|
|
130
139
|
return dict_to_json(rec)
|
|
@@ -132,7 +141,14 @@ class OutputStream:
|
|
|
132
141
|
|
|
133
142
|
if not isinstance(data, list):
|
|
134
143
|
data = [data]
|
|
135
|
-
|
|
144
|
+
|
|
145
|
+
records = []
|
|
146
|
+
for rec in data:
|
|
147
|
+
record = {"data": dump_record(rec)}
|
|
148
|
+
if partition_key is not None:
|
|
149
|
+
record["partition_key"] = partition_key
|
|
150
|
+
records.append(record)
|
|
151
|
+
|
|
136
152
|
if self._mock:
|
|
137
153
|
# for mock testing
|
|
138
154
|
self._mock_queue.extend(records)
|
|
@@ -205,7 +221,7 @@ class KafkaOutputStream:
|
|
|
205
221
|
|
|
206
222
|
self._initialized = True
|
|
207
223
|
|
|
208
|
-
def push(self, data):
|
|
224
|
+
def push(self, data, partition_key=None):
|
|
209
225
|
self._lazy_init()
|
|
210
226
|
|
|
211
227
|
def dump_record(rec):
|
|
@@ -226,7 +242,11 @@ class KafkaOutputStream:
|
|
|
226
242
|
else:
|
|
227
243
|
for record in data:
|
|
228
244
|
serialized_record = dump_record(record)
|
|
229
|
-
|
|
245
|
+
if isinstance(partition_key, str):
|
|
246
|
+
partition_key = partition_key.encode("UTF-8")
|
|
247
|
+
self._kafka_producer.send(
|
|
248
|
+
self._topic, serialized_record, key=partition_key
|
|
249
|
+
)
|
|
230
250
|
|
|
231
251
|
|
|
232
252
|
class V3ioStreamClient:
|
mlrun/projects/__init__.py
CHANGED
|
@@ -27,7 +27,12 @@ __all__ = [
|
|
|
27
27
|
]
|
|
28
28
|
|
|
29
29
|
from .operations import build_function, deploy_function, run_function # noqa
|
|
30
|
-
from .pipelines import
|
|
30
|
+
from .pipelines import (
|
|
31
|
+
import_remote_project,
|
|
32
|
+
load_and_run_workflow,
|
|
33
|
+
load_and_run,
|
|
34
|
+
pipeline_context,
|
|
35
|
+
) # noqa
|
|
31
36
|
from .project import (
|
|
32
37
|
MlrunProject,
|
|
33
38
|
ProjectMetadata,
|