mlrun 1.10.0rc5__py3-none-any.whl → 1.10.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +47 -4
- mlrun/artifacts/base.py +0 -27
- mlrun/artifacts/dataset.py +0 -8
- mlrun/artifacts/model.py +3 -10
- mlrun/artifacts/plots.py +0 -13
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +14 -2
- mlrun/common/schemas/model_monitoring/functions.py +66 -0
- mlrun/common/schemas/project.py +3 -0
- mlrun/config.py +3 -3
- mlrun/db/base.py +13 -20
- mlrun/db/httpdb.py +48 -65
- mlrun/db/nopdb.py +12 -13
- mlrun/launcher/base.py +1 -0
- mlrun/launcher/client.py +24 -0
- mlrun/launcher/local.py +4 -0
- mlrun/model_monitoring/applications/_application_steps.py +23 -39
- mlrun/model_monitoring/applications/base.py +167 -32
- mlrun/model_monitoring/db/tsdb/base.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +118 -50
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +117 -24
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +106 -15
- mlrun/model_monitoring/helpers.py +0 -3
- mlrun/projects/operations.py +11 -24
- mlrun/projects/project.py +81 -83
- mlrun/runtimes/base.py +0 -27
- mlrun/runtimes/daskjob.py +6 -4
- mlrun/runtimes/databricks_job/databricks_runtime.py +0 -2
- mlrun/runtimes/kubejob.py +5 -8
- mlrun/runtimes/mpijob/abstract.py +2 -2
- mlrun/runtimes/mpijob/v1.py +2 -2
- mlrun/runtimes/nuclio/application/application.py +0 -5
- mlrun/runtimes/nuclio/function.py +2 -11
- mlrun/runtimes/nuclio/serving.py +46 -6
- mlrun/runtimes/pod.py +4 -3
- mlrun/runtimes/remotesparkjob.py +2 -2
- mlrun/runtimes/sparkjob/spark3job.py +2 -2
- mlrun/serving/server.py +97 -3
- mlrun/serving/states.py +16 -18
- mlrun/utils/helpers.py +15 -4
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc7.dist-info}/METADATA +3 -2
- {mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc7.dist-info}/RECORD +47 -46
- {mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc7.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc7.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc7.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc5.dist-info → mlrun-1.10.0rc7.dist-info}/top_level.txt +0 -0
|
@@ -11,8 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
import traceback
|
|
14
|
+
import time
|
|
16
15
|
from collections.abc import Callable
|
|
17
16
|
from enum import Enum
|
|
18
17
|
from typing import Any, Final, Optional, Union
|
|
@@ -20,6 +19,9 @@ from typing import Any, Final, Optional, Union
|
|
|
20
19
|
import taosws
|
|
21
20
|
from taosws import TaosStmt
|
|
22
21
|
|
|
22
|
+
import mlrun
|
|
23
|
+
from mlrun.utils import logger
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class _StrEnum(str, Enum):
|
|
25
27
|
pass
|
|
@@ -137,40 +139,99 @@ class Statement:
|
|
|
137
139
|
return statement
|
|
138
140
|
|
|
139
141
|
|
|
140
|
-
def _run(connection_string, prefix_statements, q, statements, query):
|
|
141
|
-
try:
|
|
142
|
-
conn = taosws.connect(connection_string)
|
|
143
|
-
|
|
144
|
-
for statement in prefix_statements + statements:
|
|
145
|
-
if isinstance(statement, Statement):
|
|
146
|
-
prepared_statement = statement.prepare(conn.statement())
|
|
147
|
-
prepared_statement.execute()
|
|
148
|
-
else:
|
|
149
|
-
conn.execute(statement)
|
|
150
|
-
|
|
151
|
-
if not query:
|
|
152
|
-
q.put(None)
|
|
153
|
-
return
|
|
154
|
-
|
|
155
|
-
res = conn.query(query)
|
|
156
|
-
|
|
157
|
-
# taosws.TaosField is not serializable
|
|
158
|
-
fields = [
|
|
159
|
-
Field(field.name(), field.type(), field.bytes()) for field in res.fields
|
|
160
|
-
]
|
|
161
|
-
|
|
162
|
-
q.put(QueryResult(list(res), fields))
|
|
163
|
-
except Exception as e:
|
|
164
|
-
tb = traceback.format_exc()
|
|
165
|
-
q.put(ErrorResult(tb, e))
|
|
166
|
-
|
|
167
|
-
|
|
168
142
|
class TDEngineConnection:
|
|
169
|
-
def __init__(self, connection_string):
|
|
143
|
+
def __init__(self, connection_string, max_retries=3, retry_delay=0.5):
|
|
170
144
|
self._connection_string = connection_string
|
|
171
145
|
self.prefix_statements = []
|
|
146
|
+
self._max_retries = max_retries
|
|
147
|
+
self._retry_delay = retry_delay
|
|
172
148
|
|
|
173
|
-
self._conn =
|
|
149
|
+
self._conn = self._create_connection()
|
|
150
|
+
|
|
151
|
+
def _create_connection(self):
|
|
152
|
+
"""Create a new TDEngine connection."""
|
|
153
|
+
return taosws.connect(self._connection_string)
|
|
154
|
+
|
|
155
|
+
def _reconnect(self):
|
|
156
|
+
"""Close current connection and create a new one."""
|
|
157
|
+
try:
|
|
158
|
+
if hasattr(self, "_conn") and self._conn:
|
|
159
|
+
self._conn.close()
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.warning(f"Error closing connection during reconnect: {e}")
|
|
162
|
+
|
|
163
|
+
self._conn = self._create_connection()
|
|
164
|
+
logger.info("Successfully reconnected to TDEngine")
|
|
165
|
+
|
|
166
|
+
def _execute_with_retry(self, operation, operation_name, *args, **kwargs):
|
|
167
|
+
"""
|
|
168
|
+
Execute an operation with retry logic for connection failures.
|
|
169
|
+
|
|
170
|
+
:param operation: The function to execute
|
|
171
|
+
:param operation_name: Name of the operation for logging
|
|
172
|
+
:param args: Arguments to pass to the operation
|
|
173
|
+
:param kwargs: Keyword arguments to pass to the operation
|
|
174
|
+
:return: Result of the operation
|
|
175
|
+
"""
|
|
176
|
+
last_exception = None
|
|
177
|
+
|
|
178
|
+
for attempt in range(self._max_retries + 1): # +1 for initial attempt
|
|
179
|
+
try:
|
|
180
|
+
return operation(*args, **kwargs)
|
|
181
|
+
|
|
182
|
+
except taosws.Error as e:
|
|
183
|
+
last_exception = e
|
|
184
|
+
|
|
185
|
+
if attempt < self._max_retries:
|
|
186
|
+
logger.warning(
|
|
187
|
+
f"Connection error during {operation_name} "
|
|
188
|
+
f"(attempt {attempt + 1}/{self._max_retries + 1}): {e}. "
|
|
189
|
+
f"Retrying in {self._retry_delay} seconds..."
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Wait before retrying
|
|
193
|
+
time.sleep(self._retry_delay)
|
|
194
|
+
|
|
195
|
+
# Reconnect
|
|
196
|
+
try:
|
|
197
|
+
self._reconnect()
|
|
198
|
+
except Exception as reconnect_error:
|
|
199
|
+
logger.error(f"Failed to reconnect: {reconnect_error}")
|
|
200
|
+
if attempt == self._max_retries - 1:
|
|
201
|
+
# Last attempt, raise the reconnection error
|
|
202
|
+
raise TDEngineError(
|
|
203
|
+
f"Failed to reconnect after {operation_name} failure: {reconnect_error}"
|
|
204
|
+
) from reconnect_error
|
|
205
|
+
continue
|
|
206
|
+
else:
|
|
207
|
+
# Max retries exceeded
|
|
208
|
+
logger.error(
|
|
209
|
+
f"Max retries ({self._max_retries}) exceeded for {operation_name}"
|
|
210
|
+
)
|
|
211
|
+
break
|
|
212
|
+
|
|
213
|
+
except Exception as e:
|
|
214
|
+
# Non-TDEngine error, don't retry
|
|
215
|
+
raise TDEngineError(
|
|
216
|
+
f"Unexpected error during {operation_name}: {e}"
|
|
217
|
+
) from e
|
|
218
|
+
|
|
219
|
+
# If we get here, all retries failed
|
|
220
|
+
raise TDEngineError(
|
|
221
|
+
f"Failed to {operation_name} after {self._max_retries} retries: {last_exception}"
|
|
222
|
+
) from last_exception
|
|
223
|
+
|
|
224
|
+
def _execute_statement(self, statement):
|
|
225
|
+
"""Execute a single statement (string or Statement object)."""
|
|
226
|
+
if isinstance(statement, Statement):
|
|
227
|
+
prepared_statement = statement.prepare(self._conn.statement())
|
|
228
|
+
prepared_statement.execute()
|
|
229
|
+
else:
|
|
230
|
+
self._conn.execute(statement)
|
|
231
|
+
|
|
232
|
+
def _execute_query(self, query):
|
|
233
|
+
"""Execute a query and return the result."""
|
|
234
|
+
return self._conn.query(query)
|
|
174
235
|
|
|
175
236
|
def run(
|
|
176
237
|
self,
|
|
@@ -181,33 +242,40 @@ class TDEngineConnection:
|
|
|
181
242
|
if not isinstance(statements, list):
|
|
182
243
|
statements = [statements]
|
|
183
244
|
|
|
184
|
-
|
|
245
|
+
# Execute all statements with retry logic
|
|
246
|
+
all_statements = self.prefix_statements + statements
|
|
247
|
+
for i, statement in enumerate(all_statements):
|
|
248
|
+
operation_name = f"execute statement {i + 1}/{len(all_statements)}"
|
|
185
249
|
if isinstance(statement, Statement):
|
|
186
|
-
|
|
187
|
-
prepared_statement = statement.prepare(self._conn.statement())
|
|
188
|
-
prepared_statement.execute()
|
|
189
|
-
except taosws.Error as e:
|
|
190
|
-
raise TDEngineError(
|
|
191
|
-
f"Failed to run prepared statement `{self._conn.statement()}`: {e}"
|
|
192
|
-
) from e
|
|
250
|
+
operation_name += " (prepared)"
|
|
193
251
|
else:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
raise TDEngineError(
|
|
198
|
-
f"Failed to run statement `{statement}`: {e}"
|
|
199
|
-
) from e
|
|
252
|
+
operation_name += f" `{statement}`"
|
|
253
|
+
|
|
254
|
+
self._execute_with_retry(self._execute_statement, operation_name, statement)
|
|
200
255
|
|
|
201
256
|
if not query:
|
|
202
257
|
return None
|
|
203
258
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
259
|
+
# Execute query with retry logic
|
|
260
|
+
res = self._execute_with_retry(
|
|
261
|
+
self._execute_query, f"execute query `{query}`", query
|
|
262
|
+
)
|
|
208
263
|
|
|
264
|
+
# Process results
|
|
209
265
|
fields = [
|
|
210
266
|
Field(field.name(), field.type(), field.bytes()) for field in res.fields
|
|
211
267
|
]
|
|
212
268
|
|
|
213
269
|
return QueryResult(list(res), fields)
|
|
270
|
+
|
|
271
|
+
def close(self):
|
|
272
|
+
"""Close the connection."""
|
|
273
|
+
try:
|
|
274
|
+
if self._conn:
|
|
275
|
+
self._conn.close()
|
|
276
|
+
logger.debug("TDEngine connection closed")
|
|
277
|
+
self._conn = None
|
|
278
|
+
except Exception as e:
|
|
279
|
+
logger.warning(
|
|
280
|
+
f"Error closing TDEngine connection: {mlrun.errors.err_to_str(e)}"
|
|
281
|
+
)
|
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import threading
|
|
15
16
|
from datetime import datetime, timedelta
|
|
16
|
-
from threading import Lock
|
|
17
17
|
from typing import Callable, Final, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
@@ -32,8 +32,8 @@ from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
|
|
|
32
32
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
33
33
|
from mlrun.utils import logger
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
# Thread-local storage for connections
|
|
36
|
+
_thread_local = threading.local()
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class TDEngineTimestampPrecision(mlrun.common.types.StrEnum):
|
|
@@ -76,16 +76,15 @@ class TDEngineConnector(TSDBConnector):
|
|
|
76
76
|
|
|
77
77
|
@property
|
|
78
78
|
def connection(self) -> TDEngineConnection:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
return _connection
|
|
79
|
+
if not hasattr(_thread_local, "connection"):
|
|
80
|
+
_thread_local.connection = self._create_connection()
|
|
81
|
+
logger.debug(
|
|
82
|
+
"Created new TDEngine connection for thread",
|
|
83
|
+
project=self.project,
|
|
84
|
+
thread_name=threading.current_thread().name,
|
|
85
|
+
thread_id=threading.get_ident(),
|
|
86
|
+
)
|
|
87
|
+
return _thread_local.connection
|
|
89
88
|
|
|
90
89
|
def _create_connection(self) -> TDEngineConnection:
|
|
91
90
|
"""Establish a connection to the TSDB server."""
|
|
@@ -204,14 +203,27 @@ class TDEngineConnector(TSDBConnector):
|
|
|
204
203
|
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
205
204
|
|
|
206
205
|
@staticmethod
|
|
207
|
-
def
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
206
|
+
def _generate_filter_query(
|
|
207
|
+
filter_column: str, filter_values: Union[str, list[Union[str, int]]]
|
|
208
|
+
) -> Optional[str]:
|
|
209
|
+
"""
|
|
210
|
+
Generate a filter query for TDEngine based on the provided column and values.
|
|
211
|
+
|
|
212
|
+
:param filter_column: The column to filter by.
|
|
213
|
+
:param filter_values: A single value or a list of values to filter by.
|
|
214
|
+
|
|
215
|
+
:return: A string representing the filter query.
|
|
216
|
+
:raise: MLRunInvalidArgumentError if the filter values are not of type string or list.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
if isinstance(filter_values, str):
|
|
220
|
+
return f"{filter_column}='{filter_values}'"
|
|
221
|
+
elif isinstance(filter_values, list):
|
|
222
|
+
return f"{filter_column} IN ({', '.join(repr(v) for v in filter_values)}) "
|
|
212
223
|
else:
|
|
213
224
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
214
|
-
"Invalid
|
|
225
|
+
f"Invalid filter values {filter_values}: must be a string or a list, "
|
|
226
|
+
f"got {type(filter_values).__name__}; filter values: {filter_values}"
|
|
215
227
|
)
|
|
216
228
|
|
|
217
229
|
def _drop_database_query(self) -> str:
|
|
@@ -673,7 +685,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
673
685
|
start: Optional[datetime] = None,
|
|
674
686
|
end: Optional[datetime] = None,
|
|
675
687
|
) -> pd.DataFrame:
|
|
676
|
-
filter_query = self.
|
|
688
|
+
filter_query = self._generate_filter_query(
|
|
689
|
+
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
690
|
+
filter_values=endpoint_ids,
|
|
691
|
+
)
|
|
677
692
|
start, end = self._get_start_end(start, end)
|
|
678
693
|
df = self._get_records(
|
|
679
694
|
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
@@ -714,7 +729,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
714
729
|
end: Optional[datetime] = None,
|
|
715
730
|
get_raw: bool = False,
|
|
716
731
|
) -> pd.DataFrame:
|
|
717
|
-
filter_query = self.
|
|
732
|
+
filter_query = self._generate_filter_query(
|
|
733
|
+
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
734
|
+
filter_values=endpoint_ids,
|
|
735
|
+
)
|
|
718
736
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
719
737
|
start, end = self._get_start_end(start, end)
|
|
720
738
|
df = self._get_records(
|
|
@@ -741,6 +759,72 @@ class TDEngineConnector(TSDBConnector):
|
|
|
741
759
|
df.dropna(inplace=True)
|
|
742
760
|
return df
|
|
743
761
|
|
|
762
|
+
def count_results_by_status(
|
|
763
|
+
self,
|
|
764
|
+
start: Optional[Union[datetime, str]] = None,
|
|
765
|
+
end: Optional[Union[datetime, str]] = None,
|
|
766
|
+
endpoint_ids: Optional[Union[str, list[str]]] = None,
|
|
767
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
768
|
+
result_status_list: Optional[list[int]] = None,
|
|
769
|
+
) -> dict[tuple[str, int], int]:
|
|
770
|
+
filter_query = ""
|
|
771
|
+
now = mlrun.utils.datetime_now()
|
|
772
|
+
start = start or (now - timedelta(hours=24))
|
|
773
|
+
end = end or now
|
|
774
|
+
if endpoint_ids:
|
|
775
|
+
filter_query = self._generate_filter_query(
|
|
776
|
+
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
777
|
+
filter_values=endpoint_ids,
|
|
778
|
+
)
|
|
779
|
+
if application_names:
|
|
780
|
+
app_filter_query = self._generate_filter_query(
|
|
781
|
+
filter_column=mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
782
|
+
filter_values=application_names,
|
|
783
|
+
)
|
|
784
|
+
if filter_query:
|
|
785
|
+
filter_query += f" AND {app_filter_query}"
|
|
786
|
+
else:
|
|
787
|
+
filter_query = app_filter_query
|
|
788
|
+
if result_status_list:
|
|
789
|
+
status_filter_query = self._generate_filter_query(
|
|
790
|
+
filter_column=mm_schemas.ResultData.RESULT_STATUS,
|
|
791
|
+
filter_values=result_status_list,
|
|
792
|
+
)
|
|
793
|
+
if filter_query:
|
|
794
|
+
filter_query += f" AND {status_filter_query}"
|
|
795
|
+
else:
|
|
796
|
+
filter_query = status_filter_query
|
|
797
|
+
|
|
798
|
+
df = self._get_records(
|
|
799
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
800
|
+
start=start,
|
|
801
|
+
end=end,
|
|
802
|
+
columns=[
|
|
803
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
804
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
805
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
806
|
+
],
|
|
807
|
+
filter_query=filter_query,
|
|
808
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
809
|
+
group_by=[
|
|
810
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
811
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
812
|
+
],
|
|
813
|
+
agg_funcs=["count"],
|
|
814
|
+
preform_agg_columns=[mm_schemas.ResultData.RESULT_VALUE],
|
|
815
|
+
)
|
|
816
|
+
if df.empty:
|
|
817
|
+
return {}
|
|
818
|
+
|
|
819
|
+
# Convert DataFrame to a dictionary
|
|
820
|
+
return {
|
|
821
|
+
(
|
|
822
|
+
row[mm_schemas.WriterEvent.APPLICATION_NAME],
|
|
823
|
+
row[mm_schemas.ResultData.RESULT_STATUS],
|
|
824
|
+
): row["count(result_value)"]
|
|
825
|
+
for _, row in df.iterrows()
|
|
826
|
+
}
|
|
827
|
+
|
|
744
828
|
def get_metrics_metadata(
|
|
745
829
|
self,
|
|
746
830
|
endpoint_id: Union[str, list[str]],
|
|
@@ -757,7 +841,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
757
841
|
mm_schemas.MetricData.METRIC_NAME,
|
|
758
842
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
759
843
|
],
|
|
760
|
-
filter_query=self.
|
|
844
|
+
filter_query=self._generate_filter_query(
|
|
845
|
+
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
846
|
+
filter_values=endpoint_id,
|
|
847
|
+
),
|
|
761
848
|
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
762
849
|
group_by=[
|
|
763
850
|
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
@@ -795,7 +882,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
795
882
|
mm_schemas.ResultData.RESULT_KIND,
|
|
796
883
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
797
884
|
],
|
|
798
|
-
filter_query=self.
|
|
885
|
+
filter_query=self._generate_filter_query(
|
|
886
|
+
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
887
|
+
filter_values=endpoint_id,
|
|
888
|
+
),
|
|
799
889
|
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
800
890
|
group_by=[
|
|
801
891
|
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
@@ -824,7 +914,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
824
914
|
end: Optional[datetime] = None,
|
|
825
915
|
get_raw: bool = False,
|
|
826
916
|
) -> pd.DataFrame:
|
|
827
|
-
filter_query = self.
|
|
917
|
+
filter_query = self._generate_filter_query(
|
|
918
|
+
filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
919
|
+
filter_values=endpoint_ids,
|
|
920
|
+
)
|
|
828
921
|
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
829
922
|
start, end = self._get_start_end(start, end)
|
|
830
923
|
df = self._get_records(
|
|
@@ -417,6 +417,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
417
417
|
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
418
418
|
mm_schemas.WriterEvent.ENDPOINT_ID,
|
|
419
419
|
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
420
|
+
mm_schemas.WriterEvent.ENDPOINT_NAME,
|
|
420
421
|
]
|
|
421
422
|
|
|
422
423
|
if kind == mm_schemas.WriterEventKind.METRIC:
|
|
@@ -694,22 +695,26 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
694
695
|
)
|
|
695
696
|
|
|
696
697
|
@staticmethod
|
|
697
|
-
def
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
698
|
+
def _generate_filter_query(
|
|
699
|
+
filter_key: str, filter_values: Union[str, list[str]]
|
|
700
|
+
) -> Optional[str]:
|
|
701
|
+
if isinstance(filter_values, str):
|
|
702
|
+
return f"{filter_key}=='{filter_values}'"
|
|
703
|
+
elif isinstance(filter_values, list):
|
|
704
|
+
if len(filter_values) > V3IO_FRAMESD_MEPS_LIMIT:
|
|
702
705
|
logger.info(
|
|
703
|
-
"The number of
|
|
704
|
-
"retrieving all the
|
|
706
|
+
"The number of filter values exceeds the v3io-engine filter-expression limit, "
|
|
707
|
+
"retrieving all the values from the db.",
|
|
708
|
+
filter_key=filter_key,
|
|
705
709
|
limit=V3IO_FRAMESD_MEPS_LIMIT,
|
|
706
|
-
amount=len(
|
|
710
|
+
amount=len(filter_values),
|
|
707
711
|
)
|
|
708
712
|
return None
|
|
709
|
-
return f"
|
|
713
|
+
return f"{filter_key} IN ({', '.join(repr(v) for v in filter_values)}) "
|
|
710
714
|
else:
|
|
711
715
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
712
|
-
f"Invalid
|
|
716
|
+
f"Invalid filter key {filter_key}: must be a string or a list, got {type(filter_values).__name__}; "
|
|
717
|
+
f"filter values: {filter_values}"
|
|
713
718
|
)
|
|
714
719
|
|
|
715
720
|
def read_metrics_data(
|
|
@@ -946,7 +951,11 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
946
951
|
end: Optional[datetime] = None,
|
|
947
952
|
get_raw: bool = False,
|
|
948
953
|
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
949
|
-
filter_query = self.
|
|
954
|
+
filter_query = self._generate_filter_query(
|
|
955
|
+
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
956
|
+
filter_values=endpoint_ids,
|
|
957
|
+
)
|
|
958
|
+
|
|
950
959
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
951
960
|
start, end = self._get_start_end(start, end)
|
|
952
961
|
res = self._get_records(
|
|
@@ -976,7 +985,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
976
985
|
end: Optional[datetime] = None,
|
|
977
986
|
) -> pd.DataFrame:
|
|
978
987
|
start, end = self._get_start_end(start, end)
|
|
979
|
-
filter_query = self.
|
|
988
|
+
filter_query = self._generate_filter_query(
|
|
989
|
+
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
990
|
+
filter_values=endpoint_id,
|
|
991
|
+
)
|
|
980
992
|
df = self._get_records(
|
|
981
993
|
table=mm_schemas.V3IOTSDBTables.METRICS,
|
|
982
994
|
start=start,
|
|
@@ -998,7 +1010,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
998
1010
|
end: Optional[datetime] = None,
|
|
999
1011
|
) -> pd.DataFrame:
|
|
1000
1012
|
start, end = self._get_start_end(start, end)
|
|
1001
|
-
filter_query = self.
|
|
1013
|
+
filter_query = self._generate_filter_query(
|
|
1014
|
+
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
1015
|
+
filter_values=endpoint_id,
|
|
1016
|
+
)
|
|
1002
1017
|
df = self._get_records(
|
|
1003
1018
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
1004
1019
|
start=start,
|
|
@@ -1025,7 +1040,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1025
1040
|
end: Optional[datetime] = None,
|
|
1026
1041
|
get_raw: bool = False,
|
|
1027
1042
|
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
1028
|
-
filter_query = self.
|
|
1043
|
+
filter_query = self._generate_filter_query(
|
|
1044
|
+
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
1045
|
+
filter_values=endpoint_ids,
|
|
1046
|
+
)
|
|
1029
1047
|
if filter_query:
|
|
1030
1048
|
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
1031
1049
|
else:
|
|
@@ -1062,7 +1080,10 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1062
1080
|
end: Optional[datetime] = None,
|
|
1063
1081
|
get_raw: bool = False,
|
|
1064
1082
|
) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
|
|
1065
|
-
filter_query = self.
|
|
1083
|
+
filter_query = self._generate_filter_query(
|
|
1084
|
+
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
1085
|
+
filter_values=endpoint_ids,
|
|
1086
|
+
)
|
|
1066
1087
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
1067
1088
|
start, end = self._get_start_end(start, end)
|
|
1068
1089
|
res = self._get_records(
|
|
@@ -1177,3 +1198,73 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1177
1198
|
mep.status.last_request = last_request_dictionary.get(
|
|
1178
1199
|
uid, mep.status.last_request
|
|
1179
1200
|
)
|
|
1201
|
+
|
|
1202
|
+
def count_results_by_status(
|
|
1203
|
+
self,
|
|
1204
|
+
start: Optional[Union[datetime, str]] = None,
|
|
1205
|
+
end: Optional[Union[datetime, str]] = None,
|
|
1206
|
+
endpoint_ids: Optional[Union[str, list[str]]] = None,
|
|
1207
|
+
application_names: Optional[Union[str, list[str]]] = None,
|
|
1208
|
+
result_status_list: Optional[list[int]] = None,
|
|
1209
|
+
) -> dict[tuple[str, int], int]:
|
|
1210
|
+
now = mlrun.utils.datetime_now()
|
|
1211
|
+
start = start or (now - timedelta(hours=24))
|
|
1212
|
+
end = end or now
|
|
1213
|
+
filter_query = ""
|
|
1214
|
+
if endpoint_ids:
|
|
1215
|
+
filter_query = self._generate_filter_query(
|
|
1216
|
+
filter_key=mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
1217
|
+
filter_values=endpoint_ids,
|
|
1218
|
+
)
|
|
1219
|
+
if application_names:
|
|
1220
|
+
app_filter_query = self._generate_filter_query(
|
|
1221
|
+
filter_key=mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
1222
|
+
filter_values=application_names,
|
|
1223
|
+
)
|
|
1224
|
+
if filter_query:
|
|
1225
|
+
filter_query += f" AND {app_filter_query}"
|
|
1226
|
+
else:
|
|
1227
|
+
filter_query = app_filter_query
|
|
1228
|
+
|
|
1229
|
+
df = self._get_records(
|
|
1230
|
+
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
1231
|
+
start=start,
|
|
1232
|
+
end=end,
|
|
1233
|
+
columns=[
|
|
1234
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
1235
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
1236
|
+
],
|
|
1237
|
+
filter_query=filter_query,
|
|
1238
|
+
)
|
|
1239
|
+
|
|
1240
|
+
# filter result status
|
|
1241
|
+
if result_status_list and not df.empty:
|
|
1242
|
+
df = df[df[mm_schemas.ResultData.RESULT_STATUS].isin(result_status_list)]
|
|
1243
|
+
|
|
1244
|
+
if df.empty:
|
|
1245
|
+
return {}
|
|
1246
|
+
else:
|
|
1247
|
+
# convert application name to lower case
|
|
1248
|
+
df[mm_schemas.ApplicationEvent.APPLICATION_NAME] = df[
|
|
1249
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME
|
|
1250
|
+
].str.lower()
|
|
1251
|
+
|
|
1252
|
+
df = (
|
|
1253
|
+
df[
|
|
1254
|
+
[
|
|
1255
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
1256
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
1257
|
+
mm_schemas.ResultData.RESULT_VALUE,
|
|
1258
|
+
]
|
|
1259
|
+
]
|
|
1260
|
+
.groupby(
|
|
1261
|
+
[
|
|
1262
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
1263
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
1264
|
+
],
|
|
1265
|
+
observed=True,
|
|
1266
|
+
)
|
|
1267
|
+
.count()
|
|
1268
|
+
)
|
|
1269
|
+
|
|
1270
|
+
return df[mm_schemas.ResultData.RESULT_VALUE].to_dict()
|
|
@@ -22,14 +22,11 @@ import numpy as np
|
|
|
22
22
|
import pandas as pd
|
|
23
23
|
|
|
24
24
|
import mlrun
|
|
25
|
-
import mlrun.artifacts
|
|
26
25
|
import mlrun.common.model_monitoring.helpers
|
|
27
26
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
28
27
|
import mlrun.data_types.infer
|
|
29
28
|
import mlrun.datastore.datastore_profile
|
|
30
|
-
import mlrun.model_monitoring
|
|
31
29
|
import mlrun.platforms.iguazio
|
|
32
|
-
import mlrun.utils.helpers
|
|
33
30
|
from mlrun.common.schemas import ModelEndpoint
|
|
34
31
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
35
32
|
ModelEndpointMonitoringMetric,
|
mlrun/projects/operations.py
CHANGED
|
@@ -281,7 +281,7 @@ def build_function(
|
|
|
281
281
|
mlrun_version_specifier=None,
|
|
282
282
|
builder_env: Optional[dict] = None,
|
|
283
283
|
project_object=None,
|
|
284
|
-
overwrite_build_params: bool =
|
|
284
|
+
overwrite_build_params: bool = True,
|
|
285
285
|
extra_args: Optional[str] = None,
|
|
286
286
|
force_build: bool = False,
|
|
287
287
|
) -> Union[BuildStatus, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
@@ -308,13 +308,6 @@ def build_function(
|
|
|
308
308
|
e.g. extra_args="--skip-tls-verify --build-arg A=val"
|
|
309
309
|
:param force_build: Force building the image, even when no changes were made
|
|
310
310
|
"""
|
|
311
|
-
if not overwrite_build_params:
|
|
312
|
-
# TODO: change overwrite_build_params default to True in 1.10.0
|
|
313
|
-
warnings.warn(
|
|
314
|
-
"The `overwrite_build_params` parameter default will change from 'False' to 'True' in 1.10.0.",
|
|
315
|
-
mlrun.utils.OverwriteBuildParamsWarning,
|
|
316
|
-
)
|
|
317
|
-
|
|
318
311
|
engine, function = _get_engine_and_function(function, project_object)
|
|
319
312
|
if function.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
|
|
320
313
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -340,22 +333,16 @@ def build_function(
|
|
|
340
333
|
skip_deployed=skip_deployed,
|
|
341
334
|
)
|
|
342
335
|
else:
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
secret=secret_name,
|
|
354
|
-
requirements=requirements,
|
|
355
|
-
requirements_file=requirements_file,
|
|
356
|
-
overwrite=overwrite_build_params,
|
|
357
|
-
extra_args=extra_args,
|
|
358
|
-
)
|
|
336
|
+
function.build_config(
|
|
337
|
+
image=image,
|
|
338
|
+
base_image=base_image,
|
|
339
|
+
commands=commands,
|
|
340
|
+
secret=secret_name,
|
|
341
|
+
requirements=requirements,
|
|
342
|
+
requirements_file=requirements_file,
|
|
343
|
+
overwrite=overwrite_build_params,
|
|
344
|
+
extra_args=extra_args,
|
|
345
|
+
)
|
|
359
346
|
ready = function.deploy(
|
|
360
347
|
watch=True,
|
|
361
348
|
with_mlrun=with_mlrun,
|