mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +26 -22
- mlrun/__main__.py +15 -16
- mlrun/alerts/alert.py +150 -15
- mlrun/api/schemas/__init__.py +1 -9
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +62 -19
- mlrun/artifacts/dataset.py +17 -17
- mlrun/artifacts/document.py +454 -0
- mlrun/artifacts/manager.py +28 -18
- mlrun/artifacts/model.py +91 -59
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -1
- mlrun/common/formatters/feature_set.py +2 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +12 -62
- mlrun/common/runtimes/constants.py +25 -4
- mlrun/common/schemas/__init__.py +9 -5
- mlrun/common/schemas/alert.py +114 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +22 -9
- mlrun/common/schemas/auth.py +8 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +4 -4
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +4 -8
- mlrun/common/schemas/model_monitoring/constants.py +127 -46
- mlrun/common/schemas/model_monitoring/grafana.py +18 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +142 -0
- mlrun/common/schemas/pipeline.py +3 -3
- mlrun/common/schemas/project.py +26 -18
- mlrun/common/schemas/runs.py +3 -3
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +6 -5
- mlrun/common/types.py +1 -0
- mlrun/config.py +157 -89
- mlrun/data_types/__init__.py +5 -3
- mlrun/data_types/infer.py +13 -3
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +59 -18
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +19 -24
- mlrun/datastore/datastore.py +10 -4
- mlrun/datastore/datastore_profile.py +178 -45
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +14 -3
- mlrun/datastore/sources.py +89 -92
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/storeytargets.py +51 -16
- mlrun/datastore/targets.py +38 -31
- mlrun/datastore/utils.py +87 -4
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +291 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +286 -100
- mlrun/db/httpdb.py +1562 -490
- mlrun/db/nopdb.py +250 -83
- mlrun/errors.py +6 -2
- mlrun/execution.py +194 -50
- mlrun/feature_store/__init__.py +2 -10
- mlrun/feature_store/api.py +20 -458
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +105 -479
- mlrun/feature_store/feature_vector_utils.py +466 -0
- mlrun/feature_store/retrieval/base.py +15 -11
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/retrieval/storey_merger.py +1 -1
- mlrun/feature_store/steps.py +3 -3
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +31 -31
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +6 -2
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +8 -4
- mlrun/model.py +132 -46
- mlrun/model_monitoring/__init__.py +3 -5
- mlrun/model_monitoring/api.py +113 -98
- mlrun/model_monitoring/applications/__init__.py +0 -5
- mlrun/model_monitoring/applications/_application_steps.py +81 -50
- mlrun/model_monitoring/applications/base.py +467 -14
- mlrun/model_monitoring/applications/context.py +212 -134
- mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
- mlrun/model_monitoring/applications/evidently/base.py +146 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
- mlrun/model_monitoring/applications/results.py +67 -15
- mlrun/model_monitoring/controller.py +701 -315
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +242 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
- mlrun/model_monitoring/db/tsdb/base.py +243 -49
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
- mlrun/model_monitoring/helpers.py +356 -114
- mlrun/model_monitoring/stream_processing.py +190 -345
- mlrun/model_monitoring/tracking_policy.py +11 -4
- mlrun/model_monitoring/writer.py +49 -90
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +2 -2
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +35 -32
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +30 -30
- mlrun/projects/pipelines.py +116 -47
- mlrun/projects/project.py +1292 -329
- mlrun/render.py +5 -9
- mlrun/run.py +57 -14
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +30 -22
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
- mlrun/runtimes/function_reference.py +5 -2
- mlrun/runtimes/generators.py +3 -2
- mlrun/runtimes/kubejob.py +6 -7
- mlrun/runtimes/mounts.py +574 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -13
- mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
- mlrun/runtimes/nuclio/function.py +127 -70
- mlrun/runtimes/nuclio/serving.py +105 -37
- mlrun/runtimes/pod.py +159 -54
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +22 -12
- mlrun/runtimes/utils.py +7 -6
- mlrun/secrets.py +2 -2
- mlrun/serving/__init__.py +8 -0
- mlrun/serving/merger.py +7 -5
- mlrun/serving/remote.py +35 -22
- mlrun/serving/routers.py +186 -240
- mlrun/serving/server.py +41 -10
- mlrun/serving/states.py +432 -118
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +161 -203
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +35 -22
- mlrun/utils/clones.py +7 -4
- mlrun/utils/helpers.py +511 -58
- mlrun/utils/logger.py +119 -13
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +39 -15
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +16 -8
- mlrun/utils/notifications/notification/webhook.py +24 -8
- mlrun/utils/notifications/notification_pusher.py +191 -200
- mlrun/utils/regex.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
- mlrun-1.8.0.dist-info/RECORD +351 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/applications/evidently_base.py +0 -137
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.2rc3.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
|
@@ -12,25 +12,42 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
from
|
|
17
|
-
from typing import Union
|
|
15
|
+
from datetime import datetime, timedelta
|
|
16
|
+
from threading import Lock
|
|
17
|
+
from typing import Callable, Final, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import taosws
|
|
21
|
-
from taoswswrap.tdengine_connection import (
|
|
22
|
-
Statement,
|
|
23
|
-
TDEngineConnection,
|
|
24
|
-
)
|
|
25
21
|
|
|
26
22
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
23
|
+
import mlrun.common.types
|
|
27
24
|
import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
28
25
|
import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
|
|
26
|
+
from mlrun.datastore.datastore_profile import DatastoreProfile
|
|
29
27
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
30
|
-
from mlrun.model_monitoring.db.tsdb.tdengine.
|
|
28
|
+
from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
|
|
29
|
+
Statement,
|
|
30
|
+
TDEngineConnection,
|
|
31
|
+
)
|
|
31
32
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
32
33
|
from mlrun.utils import logger
|
|
33
34
|
|
|
35
|
+
_connection = None
|
|
36
|
+
_connection_lock = Lock()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TDEngineTimestampPrecision(mlrun.common.types.StrEnum):
|
|
40
|
+
"""
|
|
41
|
+
The timestamp precision for the TDEngine database.
|
|
42
|
+
For more information, see:
|
|
43
|
+
https://docs.tdengine.com/tdengine-reference/sql-manual/data-types/#timestamp
|
|
44
|
+
https://docs.tdengine.com/tdengine-reference/sql-manual/manage-databases/#create-database
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
MILLISECOND = "ms" # TDEngine's default
|
|
48
|
+
MICROSECOND = "us" # MLRun's default
|
|
49
|
+
NANOSECOND = "ns"
|
|
50
|
+
|
|
34
51
|
|
|
35
52
|
class TDEngineConnector(TSDBConnector):
|
|
36
53
|
"""
|
|
@@ -38,44 +55,46 @@ class TDEngineConnector(TSDBConnector):
|
|
|
38
55
|
"""
|
|
39
56
|
|
|
40
57
|
type: str = mm_schemas.TSDBTarget.TDEngine
|
|
58
|
+
database = f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
|
|
41
59
|
|
|
42
60
|
def __init__(
|
|
43
61
|
self,
|
|
44
62
|
project: str,
|
|
45
|
-
|
|
63
|
+
profile: DatastoreProfile,
|
|
64
|
+
timestamp_precision: TDEngineTimestampPrecision = TDEngineTimestampPrecision.MICROSECOND,
|
|
46
65
|
**kwargs,
|
|
47
66
|
):
|
|
48
67
|
super().__init__(project=project)
|
|
49
|
-
if "connection_string" not in kwargs:
|
|
50
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
51
|
-
"connection_string is a required parameter for TDEngineConnector."
|
|
52
|
-
)
|
|
53
|
-
self._tdengine_connection_string = kwargs.get("connection_string")
|
|
54
|
-
self.database = database
|
|
55
68
|
|
|
56
|
-
self.
|
|
57
|
-
self._init_super_tables()
|
|
69
|
+
self._tdengine_connection_profile = profile
|
|
58
70
|
|
|
59
|
-
self.
|
|
60
|
-
|
|
71
|
+
self._timestamp_precision: Final = ( # cannot be changed after initialization
|
|
72
|
+
timestamp_precision
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
self._init_super_tables()
|
|
61
76
|
|
|
62
77
|
@property
|
|
63
78
|
def connection(self) -> TDEngineConnection:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
79
|
+
global _connection
|
|
80
|
+
|
|
81
|
+
if _connection:
|
|
82
|
+
return _connection
|
|
83
|
+
|
|
84
|
+
with _connection_lock:
|
|
85
|
+
if not _connection:
|
|
86
|
+
_connection = self._create_connection()
|
|
87
|
+
|
|
88
|
+
return _connection
|
|
67
89
|
|
|
68
90
|
def _create_connection(self) -> TDEngineConnection:
|
|
69
91
|
"""Establish a connection to the TSDB server."""
|
|
70
92
|
logger.debug("Creating a new connection to TDEngine", project=self.project)
|
|
71
|
-
conn = TDEngineConnection(
|
|
72
|
-
|
|
73
|
-
statements=f"CREATE DATABASE IF NOT EXISTS {self.database}",
|
|
74
|
-
timeout=self._timeout,
|
|
75
|
-
retries=self._retries,
|
|
93
|
+
conn = TDEngineConnection(
|
|
94
|
+
self._tdengine_connection_profile.dsn(),
|
|
76
95
|
)
|
|
77
96
|
conn.prefix_statements = [f"USE {self.database}"]
|
|
78
|
-
|
|
97
|
+
|
|
79
98
|
return conn
|
|
80
99
|
|
|
81
100
|
def _init_super_tables(self):
|
|
@@ -90,16 +109,35 @@ class TDEngineConnector(TSDBConnector):
|
|
|
90
109
|
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
91
110
|
project=self.project, database=self.database
|
|
92
111
|
),
|
|
112
|
+
mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
|
|
113
|
+
project=self.project, database=self.database
|
|
114
|
+
),
|
|
93
115
|
}
|
|
94
116
|
|
|
117
|
+
def _create_db_if_not_exists(self):
|
|
118
|
+
"""Create the database if it does not exist."""
|
|
119
|
+
self.connection.prefix_statements = []
|
|
120
|
+
self.connection.run(
|
|
121
|
+
statements=f"CREATE DATABASE IF NOT EXISTS {self.database} PRECISION '{self._timestamp_precision}'",
|
|
122
|
+
)
|
|
123
|
+
self.connection.prefix_statements = [f"USE {self.database}"]
|
|
124
|
+
logger.debug(
|
|
125
|
+
"The TDEngine database is currently in use",
|
|
126
|
+
project=self.project,
|
|
127
|
+
database=self.database,
|
|
128
|
+
)
|
|
129
|
+
|
|
95
130
|
def create_tables(self):
|
|
96
131
|
"""Create TDEngine supertables."""
|
|
132
|
+
|
|
133
|
+
# Create the database if it does not exist
|
|
134
|
+
self._create_db_if_not_exists()
|
|
135
|
+
|
|
97
136
|
for table in self.tables:
|
|
98
137
|
create_table_query = self.tables[table]._create_super_table_query()
|
|
99
|
-
self.connection
|
|
138
|
+
conn = self.connection
|
|
139
|
+
conn.run(
|
|
100
140
|
statements=create_table_query,
|
|
101
|
-
timeout=self._timeout,
|
|
102
|
-
retries=self._retries,
|
|
103
141
|
)
|
|
104
142
|
|
|
105
143
|
def write_application_event(
|
|
@@ -122,7 +160,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
122
160
|
table_name = (
|
|
123
161
|
f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
|
|
124
162
|
).replace("-", "_")
|
|
125
|
-
event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
|
|
126
163
|
|
|
127
164
|
else:
|
|
128
165
|
# Write a new metric
|
|
@@ -145,9 +182,14 @@ class TDEngineConnector(TSDBConnector):
|
|
|
145
182
|
|
|
146
183
|
create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
|
|
147
184
|
|
|
185
|
+
# we need the string values to be sent to the connection, not the enum
|
|
186
|
+
columns = {str(key): str(val) for key, val in table.columns.items()}
|
|
187
|
+
|
|
148
188
|
insert_statement = Statement(
|
|
149
|
-
|
|
150
|
-
|
|
189
|
+
columns=columns,
|
|
190
|
+
subtable=table_name,
|
|
191
|
+
values=event,
|
|
192
|
+
timestamp_precision=self._timestamp_precision,
|
|
151
193
|
)
|
|
152
194
|
|
|
153
195
|
self.connection.run(
|
|
@@ -155,15 +197,30 @@ class TDEngineConnector(TSDBConnector):
|
|
|
155
197
|
create_table_sql,
|
|
156
198
|
insert_statement,
|
|
157
199
|
],
|
|
158
|
-
timeout=self._timeout,
|
|
159
|
-
retries=self._retries,
|
|
160
200
|
)
|
|
161
201
|
|
|
162
202
|
@staticmethod
|
|
163
|
-
def _convert_to_datetime(val:
|
|
203
|
+
def _convert_to_datetime(val: Union[str, datetime]) -> datetime:
|
|
164
204
|
return datetime.fromisoformat(val) if isinstance(val, str) else val
|
|
165
205
|
|
|
166
|
-
|
|
206
|
+
@staticmethod
|
|
207
|
+
def _get_endpoint_filter(endpoint_id: Union[str, list[str]]) -> str:
|
|
208
|
+
if isinstance(endpoint_id, str):
|
|
209
|
+
return f"endpoint_id='{endpoint_id}'"
|
|
210
|
+
elif isinstance(endpoint_id, list):
|
|
211
|
+
return f"endpoint_id IN({str(endpoint_id)[1:-1]}) "
|
|
212
|
+
else:
|
|
213
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
214
|
+
"Invalid 'endpoint_id' filter: must be a string or a list."
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def _drop_database_query(self) -> str:
|
|
218
|
+
return f"DROP DATABASE IF EXISTS {self.database};"
|
|
219
|
+
|
|
220
|
+
def _get_table_name_query(self) -> str:
|
|
221
|
+
return f"SELECT table_name FROM information_schema.ins_tables where db_name='{self.database}' LIMIT 1;"
|
|
222
|
+
|
|
223
|
+
def apply_monitoring_stream_steps(self, graph, **kwarg):
|
|
167
224
|
"""
|
|
168
225
|
Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
|
|
169
226
|
different key metric dictionaries. This data is being used by the monitoring dashboards in
|
|
@@ -176,15 +233,15 @@ class TDEngineConnector(TSDBConnector):
|
|
|
176
233
|
graph.add_step(
|
|
177
234
|
"mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ProcessBeforeTDEngine",
|
|
178
235
|
name="ProcessBeforeTDEngine",
|
|
179
|
-
after="
|
|
236
|
+
after="FilterNOP",
|
|
180
237
|
)
|
|
181
238
|
|
|
182
239
|
def apply_tdengine_target(name, after):
|
|
183
240
|
graph.add_step(
|
|
184
|
-
"
|
|
241
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
185
242
|
name=name,
|
|
186
243
|
after=after,
|
|
187
|
-
url=self.
|
|
244
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
188
245
|
supertable=self.tables[
|
|
189
246
|
mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
190
247
|
].super_table,
|
|
@@ -194,9 +251,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
194
251
|
columns=[
|
|
195
252
|
mm_schemas.EventFieldType.LATENCY,
|
|
196
253
|
mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
|
|
254
|
+
mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT,
|
|
255
|
+
mm_schemas.EventFieldType.EFFECTIVE_SAMPLE_COUNT,
|
|
197
256
|
],
|
|
198
257
|
tag_cols=[
|
|
199
|
-
mm_schemas.EventFieldType.PROJECT,
|
|
200
258
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
201
259
|
],
|
|
202
260
|
max_events=1000,
|
|
@@ -209,8 +267,95 @@ class TDEngineConnector(TSDBConnector):
|
|
|
209
267
|
after="ProcessBeforeTDEngine",
|
|
210
268
|
)
|
|
211
269
|
|
|
212
|
-
def handle_model_error(
|
|
213
|
-
|
|
270
|
+
def handle_model_error(
|
|
271
|
+
self,
|
|
272
|
+
graph,
|
|
273
|
+
tsdb_batching_max_events: int = 1000,
|
|
274
|
+
tsdb_batching_timeout_secs: int = 30,
|
|
275
|
+
**kwargs,
|
|
276
|
+
) -> None:
|
|
277
|
+
graph.add_step(
|
|
278
|
+
"mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps.ErrorExtractor",
|
|
279
|
+
name="error_extractor",
|
|
280
|
+
after="ForwardError",
|
|
281
|
+
)
|
|
282
|
+
graph.add_step(
|
|
283
|
+
"mlrun.datastore.storeytargets.TDEngineStoreyTarget",
|
|
284
|
+
name="tsdb_error",
|
|
285
|
+
after="error_extractor",
|
|
286
|
+
url=f"ds://{self._tdengine_connection_profile.name}",
|
|
287
|
+
supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
288
|
+
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
289
|
+
time_col=mm_schemas.EventFieldType.TIME,
|
|
290
|
+
database=self.database,
|
|
291
|
+
columns=[
|
|
292
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
293
|
+
],
|
|
294
|
+
tag_cols=[
|
|
295
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
296
|
+
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
297
|
+
],
|
|
298
|
+
max_events=tsdb_batching_max_events,
|
|
299
|
+
flush_after_seconds=tsdb_batching_timeout_secs,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
def delete_tsdb_records(
|
|
303
|
+
self,
|
|
304
|
+
endpoint_ids: list[str],
|
|
305
|
+
):
|
|
306
|
+
"""
|
|
307
|
+
To delete subtables within TDEngine, we first query the subtables names with the provided endpoint_ids.
|
|
308
|
+
Then, we drop each subtable.
|
|
309
|
+
"""
|
|
310
|
+
logger.debug(
|
|
311
|
+
"Deleting model endpoint resources using the TDEngine connector",
|
|
312
|
+
project=self.project,
|
|
313
|
+
number_of_endpoints_to_delete=len(endpoint_ids),
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Get all subtables with the provided endpoint_ids
|
|
317
|
+
subtables = []
|
|
318
|
+
try:
|
|
319
|
+
for table in self.tables:
|
|
320
|
+
get_subtable_query = self.tables[table]._get_subtables_query_by_tag(
|
|
321
|
+
filter_tag="endpoint_id", filter_values=endpoint_ids
|
|
322
|
+
)
|
|
323
|
+
subtables_result = self.connection.run(
|
|
324
|
+
query=get_subtable_query,
|
|
325
|
+
)
|
|
326
|
+
subtables.extend([subtable[0] for subtable in subtables_result.data])
|
|
327
|
+
except Exception as e:
|
|
328
|
+
logger.warning(
|
|
329
|
+
"Failed to get subtables for deletion. You may need to delete them manually."
|
|
330
|
+
"These can be found under the following supertables: app_results, "
|
|
331
|
+
"metrics, errors, and predictions.",
|
|
332
|
+
project=self.project,
|
|
333
|
+
error=mlrun.errors.err_to_str(e),
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Prepare the drop statements
|
|
337
|
+
drop_statements = []
|
|
338
|
+
for subtable in subtables:
|
|
339
|
+
drop_statements.append(
|
|
340
|
+
self.tables[table].drop_subtable_query(subtable=subtable)
|
|
341
|
+
)
|
|
342
|
+
try:
|
|
343
|
+
self.connection.run(
|
|
344
|
+
statements=drop_statements,
|
|
345
|
+
)
|
|
346
|
+
except Exception as e:
|
|
347
|
+
logger.warning(
|
|
348
|
+
"Failed to delete model endpoint resources. You may need to delete them manually. "
|
|
349
|
+
"These can be found under the following supertables: app_results, "
|
|
350
|
+
"metrics, errors, and predictions.",
|
|
351
|
+
project=self.project,
|
|
352
|
+
error=mlrun.errors.err_to_str(e),
|
|
353
|
+
)
|
|
354
|
+
logger.debug(
|
|
355
|
+
"Deleted all model endpoint resources using the TDEngine connector",
|
|
356
|
+
project=self.project,
|
|
357
|
+
number_of_endpoints_to_delete=len(endpoint_ids),
|
|
358
|
+
)
|
|
214
359
|
|
|
215
360
|
def delete_tsdb_resources(self):
|
|
216
361
|
"""
|
|
@@ -227,14 +372,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
227
372
|
try:
|
|
228
373
|
self.connection.run(
|
|
229
374
|
statements=drop_statements,
|
|
230
|
-
timeout=self._timeout,
|
|
231
|
-
retries=self._retries,
|
|
232
375
|
)
|
|
233
376
|
except Exception as e:
|
|
234
377
|
logger.warning(
|
|
235
378
|
"Failed to drop TDEngine tables. You may need to drop them manually. "
|
|
236
379
|
"These can be found under the following supertables: app_results, "
|
|
237
|
-
"metrics, and predictions.",
|
|
380
|
+
"metrics, errors, and predictions.",
|
|
238
381
|
project=self.project,
|
|
239
382
|
error=mlrun.errors.err_to_str(e),
|
|
240
383
|
)
|
|
@@ -243,6 +386,51 @@ class TDEngineConnector(TSDBConnector):
|
|
|
243
386
|
project=self.project,
|
|
244
387
|
)
|
|
245
388
|
|
|
389
|
+
# Check if database is empty and if so, drop it
|
|
390
|
+
self._drop_database_if_empty()
|
|
391
|
+
|
|
392
|
+
def _drop_database_if_empty(self):
|
|
393
|
+
query_random_table_name = self._get_table_name_query()
|
|
394
|
+
drop_database = False
|
|
395
|
+
try:
|
|
396
|
+
table_name = self.connection.run(
|
|
397
|
+
query=query_random_table_name,
|
|
398
|
+
)
|
|
399
|
+
if len(table_name.data) == 0:
|
|
400
|
+
# no tables were found under the database
|
|
401
|
+
drop_database = True
|
|
402
|
+
|
|
403
|
+
except Exception as e:
|
|
404
|
+
logger.warning(
|
|
405
|
+
"Failed to query tables in the database. You may need to drop the database manually if it is empty.",
|
|
406
|
+
project=self.project,
|
|
407
|
+
error=mlrun.errors.err_to_str(e),
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
if drop_database:
|
|
411
|
+
logger.debug(
|
|
412
|
+
"Going to drop the TDEngine database",
|
|
413
|
+
project=self.project,
|
|
414
|
+
database=self.database,
|
|
415
|
+
)
|
|
416
|
+
drop_database_query = self._drop_database_query()
|
|
417
|
+
try:
|
|
418
|
+
self.connection.run(
|
|
419
|
+
statements=drop_database_query,
|
|
420
|
+
)
|
|
421
|
+
logger.debug(
|
|
422
|
+
"The TDEngine database has been successfully dropped",
|
|
423
|
+
project=self.project,
|
|
424
|
+
database=self.database,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
except Exception as e:
|
|
428
|
+
logger.warning(
|
|
429
|
+
"Failed to drop the database. You may need to drop it manually if it is empty.",
|
|
430
|
+
project=self.project,
|
|
431
|
+
error=mlrun.errors.err_to_str(e),
|
|
432
|
+
)
|
|
433
|
+
|
|
246
434
|
def get_model_endpoint_real_time_metrics(
|
|
247
435
|
self,
|
|
248
436
|
endpoint_id: str,
|
|
@@ -258,13 +446,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
258
446
|
table: str,
|
|
259
447
|
start: datetime,
|
|
260
448
|
end: datetime,
|
|
261
|
-
columns:
|
|
262
|
-
filter_query:
|
|
263
|
-
interval:
|
|
264
|
-
agg_funcs:
|
|
265
|
-
limit:
|
|
266
|
-
sliding_window_step:
|
|
449
|
+
columns: Optional[list[str]] = None,
|
|
450
|
+
filter_query: Optional[str] = None,
|
|
451
|
+
interval: Optional[str] = None,
|
|
452
|
+
agg_funcs: Optional[list] = None,
|
|
453
|
+
limit: Optional[int] = None,
|
|
454
|
+
sliding_window_step: Optional[str] = None,
|
|
267
455
|
timestamp_column: str = mm_schemas.EventFieldType.TIME,
|
|
456
|
+
group_by: Optional[Union[list[str], str]] = None,
|
|
457
|
+
preform_agg_columns: Optional[list] = None,
|
|
458
|
+
order_by: Optional[str] = None,
|
|
459
|
+
desc: Optional[bool] = None,
|
|
268
460
|
) -> pd.DataFrame:
|
|
269
461
|
"""
|
|
270
462
|
Getting records from TSDB data collection.
|
|
@@ -284,6 +476,14 @@ class TDEngineConnector(TSDBConnector):
|
|
|
284
476
|
`sliding_window_step` is provided, interval must be provided as well. Provided
|
|
285
477
|
as a string in the format of '1m', '1h', etc.
|
|
286
478
|
:param timestamp_column: The column name that holds the timestamp index.
|
|
479
|
+
:param group_by: The column name to group by. Note that if `group_by` is provided, aggregation
|
|
480
|
+
functions must bg provided
|
|
481
|
+
:param preform_agg_columns: The columns to preform aggregation on.
|
|
482
|
+
notice that all aggregation functions provided will preform on those columns.
|
|
483
|
+
If not provided The default behavior is to preform on all columns in columns,
|
|
484
|
+
if an empty list was provided The aggregation won't be performed.
|
|
485
|
+
:param order_by: The column or alias to preform ordering on the query.
|
|
486
|
+
:param desc: Whether or not to sort the results in descending order.
|
|
287
487
|
|
|
288
488
|
:return: DataFrame with the provided attributes from the data collection.
|
|
289
489
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
@@ -301,11 +501,15 @@ class TDEngineConnector(TSDBConnector):
|
|
|
301
501
|
sliding_window_step=sliding_window_step,
|
|
302
502
|
timestamp_column=timestamp_column,
|
|
303
503
|
database=self.database,
|
|
504
|
+
group_by=group_by,
|
|
505
|
+
preform_agg_funcs_columns=preform_agg_columns,
|
|
506
|
+
order_by=order_by,
|
|
507
|
+
desc=desc,
|
|
304
508
|
)
|
|
305
509
|
logger.debug("Querying TDEngine", query=full_query)
|
|
306
510
|
try:
|
|
307
511
|
query_result = self.connection.run(
|
|
308
|
-
query=full_query,
|
|
512
|
+
query=full_query,
|
|
309
513
|
)
|
|
310
514
|
except taosws.QueryError as e:
|
|
311
515
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -322,16 +526,17 @@ class TDEngineConnector(TSDBConnector):
|
|
|
322
526
|
start: datetime,
|
|
323
527
|
end: datetime,
|
|
324
528
|
metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
|
|
325
|
-
type:
|
|
326
|
-
|
|
529
|
+
type: Literal["metrics", "results"],
|
|
530
|
+
with_result_extra_data: bool = False,
|
|
531
|
+
) -> Union[
|
|
327
532
|
list[
|
|
328
|
-
|
|
533
|
+
Union[
|
|
329
534
|
mm_schemas.ModelEndpointMonitoringResultValues,
|
|
330
535
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
331
536
|
],
|
|
332
537
|
],
|
|
333
538
|
list[
|
|
334
|
-
|
|
539
|
+
Union[
|
|
335
540
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
336
541
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
337
542
|
],
|
|
@@ -340,6 +545,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
340
545
|
timestamp_column = mm_schemas.WriterEvent.END_INFER_TIME
|
|
341
546
|
columns = [timestamp_column, mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
342
547
|
if type == "metrics":
|
|
548
|
+
if with_result_extra_data:
|
|
549
|
+
logger.warning(
|
|
550
|
+
"The 'with_result_extra_data' parameter is not supported for metrics, just for results",
|
|
551
|
+
project=self.project,
|
|
552
|
+
endpoint_id=endpoint_id,
|
|
553
|
+
)
|
|
343
554
|
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
344
555
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
345
556
|
columns += [name, mm_schemas.MetricData.METRIC_VALUE]
|
|
@@ -353,6 +564,8 @@ class TDEngineConnector(TSDBConnector):
|
|
|
353
564
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
354
565
|
mm_schemas.ResultData.RESULT_KIND,
|
|
355
566
|
]
|
|
567
|
+
if with_result_extra_data:
|
|
568
|
+
columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
|
|
356
569
|
df_handler = self.df_to_results_values
|
|
357
570
|
else:
|
|
358
571
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -389,6 +602,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
389
602
|
is_empty=df.empty,
|
|
390
603
|
)
|
|
391
604
|
|
|
605
|
+
if not with_result_extra_data and type == "results":
|
|
606
|
+
# Set the extra data to an empty string if it's not requested
|
|
607
|
+
df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
|
|
608
|
+
|
|
392
609
|
return df_handler(df=df, metrics=metrics, project=self.project)
|
|
393
610
|
|
|
394
611
|
def read_predictions(
|
|
@@ -397,10 +614,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
397
614
|
endpoint_id: str,
|
|
398
615
|
start: datetime,
|
|
399
616
|
end: datetime,
|
|
400
|
-
aggregation_window:
|
|
401
|
-
agg_funcs:
|
|
402
|
-
limit:
|
|
403
|
-
) ->
|
|
617
|
+
aggregation_window: Optional[str] = None,
|
|
618
|
+
agg_funcs: Optional[list] = None,
|
|
619
|
+
limit: Optional[int] = None,
|
|
620
|
+
) -> Union[
|
|
404
621
|
mm_schemas.ModelEndpointMonitoringMetricValues,
|
|
405
622
|
mm_schemas.ModelEndpointMonitoringMetricNoData,
|
|
406
623
|
]:
|
|
@@ -414,7 +631,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
414
631
|
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
415
632
|
start=start,
|
|
416
633
|
end=end,
|
|
417
|
-
columns=[mm_schemas.EventFieldType.
|
|
634
|
+
columns=[mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT],
|
|
418
635
|
filter_query=f"endpoint_id='{endpoint_id}'",
|
|
419
636
|
agg_funcs=agg_funcs,
|
|
420
637
|
interval=aggregation_window,
|
|
@@ -434,10 +651,10 @@ class TDEngineConnector(TSDBConnector):
|
|
|
434
651
|
df["_wend"] = pd.to_datetime(df["_wend"])
|
|
435
652
|
df.set_index("_wend", inplace=True)
|
|
436
653
|
|
|
437
|
-
|
|
438
|
-
f"{agg_funcs[0]}({mm_schemas.EventFieldType.
|
|
654
|
+
estimated_prediction_count = (
|
|
655
|
+
f"{agg_funcs[0]}({mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT})"
|
|
439
656
|
if agg_funcs
|
|
440
|
-
else mm_schemas.EventFieldType.
|
|
657
|
+
else mm_schemas.EventFieldType.ESTIMATED_PREDICTION_COUNT
|
|
441
658
|
)
|
|
442
659
|
|
|
443
660
|
return mm_schemas.ModelEndpointMonitoringMetricValues(
|
|
@@ -445,7 +662,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
445
662
|
values=list(
|
|
446
663
|
zip(
|
|
447
664
|
df.index,
|
|
448
|
-
df[
|
|
665
|
+
df[estimated_prediction_count],
|
|
449
666
|
)
|
|
450
667
|
), # pyright: ignore[reportArgumentType]
|
|
451
668
|
)
|
|
@@ -453,56 +670,285 @@ class TDEngineConnector(TSDBConnector):
|
|
|
453
670
|
def get_last_request(
|
|
454
671
|
self,
|
|
455
672
|
endpoint_ids: Union[str, list[str]],
|
|
456
|
-
start:
|
|
457
|
-
end:
|
|
673
|
+
start: Optional[datetime] = None,
|
|
674
|
+
end: Optional[datetime] = None,
|
|
458
675
|
) -> pd.DataFrame:
|
|
459
|
-
|
|
676
|
+
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
677
|
+
start, end = self._get_start_end(start, end)
|
|
678
|
+
df = self._get_records(
|
|
679
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
680
|
+
start=start,
|
|
681
|
+
end=end,
|
|
682
|
+
columns=[
|
|
683
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
684
|
+
mm_schemas.EventFieldType.TIME,
|
|
685
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
686
|
+
],
|
|
687
|
+
filter_query=filter_query,
|
|
688
|
+
timestamp_column=mm_schemas.EventFieldType.TIME,
|
|
689
|
+
agg_funcs=["last"],
|
|
690
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
691
|
+
preform_agg_columns=[mm_schemas.EventFieldType.TIME],
|
|
692
|
+
)
|
|
693
|
+
if not df.empty:
|
|
694
|
+
df.dropna(inplace=True)
|
|
695
|
+
df.rename(
|
|
696
|
+
columns={
|
|
697
|
+
f"last({mm_schemas.EventFieldType.TIME})": mm_schemas.EventFieldType.LAST_REQUEST,
|
|
698
|
+
f"{mm_schemas.EventFieldType.LATENCY}": "last_latency",
|
|
699
|
+
},
|
|
700
|
+
inplace=True,
|
|
701
|
+
)
|
|
702
|
+
df[mm_schemas.EventFieldType.LAST_REQUEST] = pd.to_datetime(
|
|
703
|
+
df[mm_schemas.EventFieldType.LAST_REQUEST],
|
|
704
|
+
errors="coerce",
|
|
705
|
+
format="ISO8601",
|
|
706
|
+
utc=True,
|
|
707
|
+
)
|
|
708
|
+
return df
|
|
460
709
|
|
|
461
710
|
def get_drift_status(
|
|
462
711
|
self,
|
|
463
712
|
endpoint_ids: Union[str, list[str]],
|
|
464
|
-
start:
|
|
465
|
-
end:
|
|
713
|
+
start: Optional[datetime] = None,
|
|
714
|
+
end: Optional[datetime] = None,
|
|
715
|
+
get_raw: bool = False,
|
|
466
716
|
) -> pd.DataFrame:
|
|
467
|
-
|
|
717
|
+
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
718
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
719
|
+
start, end = self._get_start_end(start, end)
|
|
720
|
+
df = self._get_records(
|
|
721
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
722
|
+
start=start,
|
|
723
|
+
end=end,
|
|
724
|
+
columns=[
|
|
725
|
+
mm_schemas.ResultData.RESULT_STATUS,
|
|
726
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
727
|
+
],
|
|
728
|
+
filter_query=filter_query,
|
|
729
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
730
|
+
agg_funcs=["max"],
|
|
731
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
732
|
+
preform_agg_columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
733
|
+
)
|
|
734
|
+
df.rename(
|
|
735
|
+
columns={
|
|
736
|
+
f"max({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS
|
|
737
|
+
},
|
|
738
|
+
inplace=True,
|
|
739
|
+
)
|
|
740
|
+
if not df.empty:
|
|
741
|
+
df.dropna(inplace=True)
|
|
742
|
+
return df
|
|
468
743
|
|
|
469
744
|
def get_metrics_metadata(
|
|
470
745
|
self,
|
|
471
|
-
endpoint_id: str,
|
|
472
|
-
start:
|
|
473
|
-
end:
|
|
746
|
+
endpoint_id: Union[str, list[str]],
|
|
747
|
+
start: Optional[datetime] = None,
|
|
748
|
+
end: Optional[datetime] = None,
|
|
474
749
|
) -> pd.DataFrame:
|
|
475
|
-
|
|
750
|
+
start, end = self._get_start_end(start, end)
|
|
751
|
+
df = self._get_records(
|
|
752
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
|
|
753
|
+
start=start,
|
|
754
|
+
end=end,
|
|
755
|
+
columns=[
|
|
756
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
757
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
758
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
759
|
+
],
|
|
760
|
+
filter_query=self._get_endpoint_filter(endpoint_id=endpoint_id),
|
|
761
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
762
|
+
group_by=[
|
|
763
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
764
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
765
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
766
|
+
],
|
|
767
|
+
agg_funcs=["last"],
|
|
768
|
+
)
|
|
769
|
+
df.rename(
|
|
770
|
+
columns={
|
|
771
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
772
|
+
f"last({mm_schemas.MetricData.METRIC_NAME})": mm_schemas.MetricData.METRIC_NAME,
|
|
773
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
774
|
+
},
|
|
775
|
+
inplace=True,
|
|
776
|
+
)
|
|
777
|
+
if not df.empty:
|
|
778
|
+
df.dropna(inplace=True)
|
|
779
|
+
return df
|
|
476
780
|
|
|
477
781
|
def get_results_metadata(
|
|
478
782
|
self,
|
|
479
|
-
endpoint_id: str,
|
|
480
|
-
start:
|
|
481
|
-
end:
|
|
783
|
+
endpoint_id: Union[str, list[str]],
|
|
784
|
+
start: Optional[datetime] = None,
|
|
785
|
+
end: Optional[datetime] = None,
|
|
482
786
|
) -> pd.DataFrame:
|
|
483
|
-
|
|
787
|
+
start, end = self._get_start_end(start, end)
|
|
788
|
+
df = self._get_records(
|
|
789
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
790
|
+
start=start,
|
|
791
|
+
end=end,
|
|
792
|
+
columns=[
|
|
793
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
794
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
795
|
+
mm_schemas.ResultData.RESULT_KIND,
|
|
796
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
797
|
+
],
|
|
798
|
+
filter_query=self._get_endpoint_filter(endpoint_id=endpoint_id),
|
|
799
|
+
timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
|
|
800
|
+
group_by=[
|
|
801
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
802
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
803
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
804
|
+
],
|
|
805
|
+
agg_funcs=["last"],
|
|
806
|
+
)
|
|
807
|
+
df.rename(
|
|
808
|
+
columns={
|
|
809
|
+
f"last({mm_schemas.ApplicationEvent.APPLICATION_NAME})": mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
810
|
+
f"last({mm_schemas.ResultData.RESULT_NAME})": mm_schemas.ResultData.RESULT_NAME,
|
|
811
|
+
f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
|
|
812
|
+
f"last({mm_schemas.EventFieldType.ENDPOINT_ID})": mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
813
|
+
},
|
|
814
|
+
inplace=True,
|
|
815
|
+
)
|
|
816
|
+
if not df.empty:
|
|
817
|
+
df.dropna(inplace=True)
|
|
818
|
+
return df
|
|
484
819
|
|
|
485
820
|
def get_error_count(
|
|
486
821
|
self,
|
|
487
822
|
endpoint_ids: Union[str, list[str]],
|
|
488
|
-
start:
|
|
489
|
-
end:
|
|
823
|
+
start: Optional[datetime] = None,
|
|
824
|
+
end: Optional[datetime] = None,
|
|
825
|
+
get_raw: bool = False,
|
|
490
826
|
) -> pd.DataFrame:
|
|
491
|
-
|
|
827
|
+
filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
|
|
828
|
+
filter_query += f"AND {mm_schemas.EventFieldType.ERROR_TYPE} = '{mm_schemas.EventFieldType.INFER_ERROR}'"
|
|
829
|
+
start, end = self._get_start_end(start, end)
|
|
830
|
+
df = self._get_records(
|
|
831
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
832
|
+
start=start,
|
|
833
|
+
end=end,
|
|
834
|
+
columns=[
|
|
835
|
+
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
836
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
837
|
+
],
|
|
838
|
+
agg_funcs=["count"],
|
|
839
|
+
filter_query=filter_query,
|
|
840
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
841
|
+
preform_agg_columns=[mm_schemas.EventFieldType.MODEL_ERROR],
|
|
842
|
+
)
|
|
843
|
+
df.rename(
|
|
844
|
+
columns={f"count({mm_schemas.EventFieldType.MODEL_ERROR})": "error_count"},
|
|
845
|
+
inplace=True,
|
|
846
|
+
)
|
|
847
|
+
if not df.empty:
|
|
848
|
+
df.dropna(inplace=True)
|
|
849
|
+
return df
|
|
492
850
|
|
|
493
851
|
def get_avg_latency(
|
|
494
852
|
self,
|
|
495
853
|
endpoint_ids: Union[str, list[str]],
|
|
496
|
-
start:
|
|
497
|
-
end:
|
|
854
|
+
start: Optional[datetime] = None,
|
|
855
|
+
end: Optional[datetime] = None,
|
|
856
|
+
get_raw: bool = False,
|
|
498
857
|
) -> pd.DataFrame:
|
|
499
|
-
|
|
858
|
+
endpoint_ids = (
|
|
859
|
+
endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
|
|
860
|
+
)
|
|
861
|
+
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
862
|
+
start, end = self._get_start_end(start, end)
|
|
863
|
+
df = self._get_records(
|
|
864
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
865
|
+
start=start,
|
|
866
|
+
end=end,
|
|
867
|
+
columns=[
|
|
868
|
+
mm_schemas.EventFieldType.LATENCY,
|
|
869
|
+
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
870
|
+
],
|
|
871
|
+
agg_funcs=["avg"],
|
|
872
|
+
filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
|
|
873
|
+
group_by=mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
874
|
+
preform_agg_columns=[mm_schemas.EventFieldType.LATENCY],
|
|
875
|
+
)
|
|
876
|
+
df.rename(
|
|
877
|
+
columns={f"avg({mm_schemas.EventFieldType.LATENCY})": "avg_latency"},
|
|
878
|
+
inplace=True,
|
|
879
|
+
)
|
|
880
|
+
if not df.empty:
|
|
881
|
+
df.dropna(inplace=True)
|
|
882
|
+
return df
|
|
883
|
+
|
|
884
|
+
async def add_basic_metrics(
|
|
885
|
+
self,
|
|
886
|
+
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
887
|
+
project: str,
|
|
888
|
+
run_in_threadpool: Callable,
|
|
889
|
+
metric_list: Optional[list[str]] = None,
|
|
890
|
+
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
891
|
+
"""
|
|
892
|
+
Add basic metrics to the model endpoint object.
|
|
893
|
+
|
|
894
|
+
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
895
|
+
be filled with the relevant basic metrics.
|
|
896
|
+
:param project: The name of the project.
|
|
897
|
+
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
898
|
+
:param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
|
|
899
|
+
|
|
900
|
+
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
901
|
+
"""
|
|
902
|
+
|
|
903
|
+
uids = [mep.metadata.uid for mep in model_endpoint_objects]
|
|
904
|
+
|
|
905
|
+
metric_name_to_function = {
|
|
906
|
+
"error_count": self.get_error_count,
|
|
907
|
+
"last_request": self.get_last_request,
|
|
908
|
+
"avg_latency": self.get_avg_latency,
|
|
909
|
+
"result_status": self.get_drift_status,
|
|
910
|
+
}
|
|
911
|
+
if metric_list is not None:
|
|
912
|
+
for metric_name in list(metric_name_to_function):
|
|
913
|
+
if metric_name not in metric_list:
|
|
914
|
+
del metric_name_to_function[metric_name]
|
|
915
|
+
|
|
916
|
+
metric_name_to_df = {
|
|
917
|
+
metric_name: function(endpoint_ids=uids)
|
|
918
|
+
for metric_name, function in metric_name_to_function.items()
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
def add_metrics(
|
|
922
|
+
mep: mlrun.common.schemas.ModelEndpoint,
|
|
923
|
+
df_dictionary: dict[str, pd.DataFrame],
|
|
924
|
+
):
|
|
925
|
+
for metric in df_dictionary.keys():
|
|
926
|
+
df = df_dictionary.get(metric, pd.DataFrame())
|
|
927
|
+
if not df.empty:
|
|
928
|
+
line = df[df["endpoint_id"] == mep.metadata.uid]
|
|
929
|
+
if not line.empty and metric in line:
|
|
930
|
+
value = line[metric].item()
|
|
931
|
+
if isinstance(value, pd.Timestamp):
|
|
932
|
+
value = value.to_pydatetime()
|
|
933
|
+
setattr(mep.status, metric, value)
|
|
934
|
+
|
|
935
|
+
return mep
|
|
936
|
+
|
|
937
|
+
return list(
|
|
938
|
+
map(
|
|
939
|
+
lambda mep: add_metrics(
|
|
940
|
+
mep=mep,
|
|
941
|
+
df_dictionary=metric_name_to_df,
|
|
942
|
+
),
|
|
943
|
+
model_endpoint_objects,
|
|
944
|
+
)
|
|
945
|
+
)
|
|
500
946
|
|
|
501
947
|
# Note: this function serves as a reference for checking the TSDB for the existence of a metric.
|
|
502
948
|
#
|
|
503
949
|
# def read_prediction_metric_for_endpoint_if_exists(
|
|
504
950
|
# self, endpoint_id: str
|
|
505
|
-
# ) ->
|
|
951
|
+
# ) -> Optional[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
506
952
|
# """
|
|
507
953
|
# Read the "invocations" metric for the provided model endpoint, and return the metric object
|
|
508
954
|
# if it exists.
|