mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +26 -112
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +46 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +47 -48
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +69 -0
- mlrun/common/db/sql_session.py +2 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/common/formatters/artifact.py +21 -0
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/helpers.py +1 -2
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +24 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +18 -8
- mlrun/common/schemas/auth.py +11 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -1
- mlrun/common/schemas/feature_store.py +16 -16
- mlrun/common/schemas/frontend_spec.py +8 -7
- mlrun/common/schemas/function.py +5 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +18 -3
- mlrun/common/schemas/model_monitoring/constants.py +83 -26
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
- mlrun/common/schemas/notification.py +4 -4
- mlrun/common/schemas/object.py +2 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +1 -10
- mlrun/common/schemas/project.py +24 -23
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +3 -3
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +2 -2
- mlrun/common/types.py +7 -1
- mlrun/config.py +54 -17
- mlrun/data_types/to_pandas.py +10 -12
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +17 -5
- mlrun/datastore/base.py +62 -39
- mlrun/datastore/datastore.py +28 -9
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/filestore.py +0 -1
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +6 -2
- mlrun/datastore/s3.py +9 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +201 -96
- mlrun/datastore/spark_utils.py +1 -2
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +358 -104
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +5 -1
- mlrun/db/base.py +185 -35
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +614 -179
- mlrun/db/nopdb.py +210 -26
- mlrun/errors.py +12 -1
- mlrun/execution.py +41 -24
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -72
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +28 -30
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/conversion.py +11 -13
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +37 -34
- mlrun/features.py +9 -20
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +2 -3
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +4 -3
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +14 -16
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +17 -11
- mlrun/launcher/remote.py +16 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +238 -73
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +138 -315
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +24 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +104 -84
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +127 -28
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/prometheus.py +1 -4
- mlrun/model_monitoring/stream_processing.py +62 -231
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +6 -6
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +35 -21
- mlrun/projects/pipelines.py +68 -99
- mlrun/projects/project.py +830 -266
- mlrun/render.py +3 -11
- mlrun/run.py +162 -166
- mlrun/runtimes/__init__.py +62 -7
- mlrun/runtimes/base.py +39 -32
- mlrun/runtimes/daskjob.py +8 -8
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +6 -3
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
- mlrun/runtimes/pod.py +286 -88
- mlrun/runtimes/remotesparkjob.py +2 -2
- mlrun/runtimes/sparkjob/spark3job.py +51 -34
- mlrun/runtimes/utils.py +7 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +13 -10
- mlrun/serving/server.py +22 -26
- mlrun/serving/states.py +99 -25
- mlrun/serving/utils.py +3 -3
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +59 -20
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +1 -2
- mlrun/utils/async_http.py +5 -7
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +3 -3
- mlrun/utils/helpers.py +183 -197
- mlrun/utils/http.py +2 -5
- mlrun/utils/logger.py +76 -14
- mlrun/utils/notifications/notification/__init__.py +17 -12
- mlrun/utils/notifications/notification/base.py +14 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +155 -30
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +2 -4
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc2.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
|
@@ -11,36 +11,91 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import json
|
|
17
16
|
import os
|
|
18
17
|
import typing
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from http import HTTPStatus
|
|
19
20
|
|
|
20
21
|
import v3io.dataplane
|
|
21
|
-
import
|
|
22
|
+
import v3io.dataplane.output
|
|
23
|
+
import v3io.dataplane.response
|
|
22
24
|
|
|
23
25
|
import mlrun.common.model_monitoring.helpers
|
|
24
|
-
import mlrun.common.schemas.model_monitoring
|
|
26
|
+
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
27
|
+
import mlrun.model_monitoring.db
|
|
25
28
|
import mlrun.utils.v3io_clients
|
|
26
29
|
from mlrun.utils import logger
|
|
27
30
|
|
|
28
|
-
from .model_endpoint_store import ModelEndpointStore
|
|
29
|
-
|
|
30
31
|
# Fields to encode before storing in the KV table or to decode after retrieving
|
|
31
32
|
fields_to_encode_decode = [
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
mm_schemas.EventFieldType.FEATURE_STATS,
|
|
34
|
+
mm_schemas.EventFieldType.CURRENT_STATS,
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
_METRIC_FIELDS: list[str] = [
|
|
38
|
+
mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
39
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
40
|
+
mm_schemas.MetricData.METRIC_VALUE,
|
|
41
|
+
mm_schemas.WriterEvent.START_INFER_TIME,
|
|
42
|
+
mm_schemas.WriterEvent.END_INFER_TIME,
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SchemaField(typing.TypedDict):
|
|
47
|
+
name: str
|
|
48
|
+
type: str
|
|
49
|
+
nullable: bool
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class SchemaParams:
|
|
54
|
+
key: str
|
|
55
|
+
fields: list[SchemaField]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
_RESULT_SCHEMA: list[SchemaField] = [
|
|
59
|
+
SchemaField(
|
|
60
|
+
name=mm_schemas.ResultData.RESULT_NAME,
|
|
61
|
+
type=mm_schemas.GrafanaColumnType.STRING,
|
|
62
|
+
nullable=False,
|
|
63
|
+
)
|
|
34
64
|
]
|
|
35
65
|
|
|
66
|
+
_METRIC_SCHEMA: list[SchemaField] = [
|
|
67
|
+
SchemaField(
|
|
68
|
+
name=mm_schemas.WriterEvent.APPLICATION_NAME,
|
|
69
|
+
type=mm_schemas.GrafanaColumnType.STRING,
|
|
70
|
+
nullable=False,
|
|
71
|
+
),
|
|
72
|
+
SchemaField(
|
|
73
|
+
name=mm_schemas.MetricData.METRIC_NAME,
|
|
74
|
+
type=mm_schemas.GrafanaColumnType.STRING,
|
|
75
|
+
nullable=False,
|
|
76
|
+
),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
_KIND_TO_SCHEMA_PARAMS: dict[mm_schemas.WriterEventKind, SchemaParams] = {
|
|
81
|
+
mm_schemas.WriterEventKind.RESULT: SchemaParams(
|
|
82
|
+
key=mm_schemas.WriterEvent.APPLICATION_NAME, fields=_RESULT_SCHEMA
|
|
83
|
+
),
|
|
84
|
+
mm_schemas.WriterEventKind.METRIC: SchemaParams(
|
|
85
|
+
key="metric_id", fields=_METRIC_SCHEMA
|
|
86
|
+
),
|
|
87
|
+
}
|
|
36
88
|
|
|
37
|
-
|
|
89
|
+
_EXCLUDE_SCHEMA_FILTER_EXPRESSION = '__name!=".#schema"'
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class KVStoreBase(mlrun.model_monitoring.db.StoreBase):
|
|
38
93
|
"""
|
|
39
94
|
Handles the DB operations when the DB target is from type KV. For the KV operations, we use an instance of V3IO
|
|
40
95
|
client and usually the KV table can be found under v3io:///users/pipelines/project-name/model-endpoints/endpoints/.
|
|
41
96
|
"""
|
|
42
97
|
|
|
43
|
-
def __init__(self, project: str, access_key: str):
|
|
98
|
+
def __init__(self, project: str, access_key: typing.Optional[str] = None) -> None:
|
|
44
99
|
super().__init__(project=project)
|
|
45
100
|
# Initialize a V3IO client instance
|
|
46
101
|
self.access_key = access_key or os.environ.get("V3IO_ACCESS_KEY")
|
|
@@ -50,7 +105,7 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
50
105
|
# Get the KV table path and container
|
|
51
106
|
self.path, self.container = self._get_path_and_container()
|
|
52
107
|
|
|
53
|
-
def write_model_endpoint(self, endpoint:
|
|
108
|
+
def write_model_endpoint(self, endpoint: dict[str, typing.Any]):
|
|
54
109
|
"""
|
|
55
110
|
Create a new endpoint record in the KV table.
|
|
56
111
|
|
|
@@ -65,14 +120,14 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
65
120
|
self.client.kv.put(
|
|
66
121
|
container=self.container,
|
|
67
122
|
table_path=self.path,
|
|
68
|
-
key=endpoint[
|
|
123
|
+
key=endpoint[mm_schemas.EventFieldType.UID],
|
|
69
124
|
attributes=endpoint,
|
|
70
125
|
)
|
|
71
126
|
|
|
72
127
|
self._infer_kv_schema()
|
|
73
128
|
|
|
74
129
|
def update_model_endpoint(
|
|
75
|
-
self, endpoint_id: str, attributes:
|
|
130
|
+
self, endpoint_id: str, attributes: dict[str, typing.Any]
|
|
76
131
|
):
|
|
77
132
|
"""
|
|
78
133
|
Update a model endpoint record with a given attributes.
|
|
@@ -114,7 +169,7 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
114
169
|
def get_model_endpoint(
|
|
115
170
|
self,
|
|
116
171
|
endpoint_id: str,
|
|
117
|
-
) ->
|
|
172
|
+
) -> dict[str, typing.Any]:
|
|
118
173
|
"""
|
|
119
174
|
Get a single model endpoint record.
|
|
120
175
|
|
|
@@ -152,7 +207,7 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
152
207
|
"""Getting path and container based on the model monitoring configurations"""
|
|
153
208
|
path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
154
209
|
project=self.project,
|
|
155
|
-
kind=
|
|
210
|
+
kind=mm_schemas.ModelMonitoringStoreKinds.ENDPOINTS,
|
|
156
211
|
)
|
|
157
212
|
(
|
|
158
213
|
_,
|
|
@@ -167,10 +222,10 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
167
222
|
self,
|
|
168
223
|
model: str = None,
|
|
169
224
|
function: str = None,
|
|
170
|
-
labels:
|
|
225
|
+
labels: list[str] = None,
|
|
171
226
|
top_level: bool = None,
|
|
172
|
-
uids:
|
|
173
|
-
) ->
|
|
227
|
+
uids: list = None,
|
|
228
|
+
) -> list[dict[str, typing.Any]]:
|
|
174
229
|
"""
|
|
175
230
|
Returns a list of model endpoint dictionaries, supports filtering by model, function, labels or top level.
|
|
176
231
|
By default, when no filters are applied, all available model endpoints for the given project will
|
|
@@ -218,17 +273,11 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
218
273
|
if uids is None:
|
|
219
274
|
uids = []
|
|
220
275
|
for item in items:
|
|
221
|
-
if
|
|
276
|
+
if mm_schemas.EventFieldType.UID not in item:
|
|
222
277
|
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
223
|
-
uids.append(
|
|
224
|
-
item[
|
|
225
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
|
|
226
|
-
]
|
|
227
|
-
)
|
|
278
|
+
uids.append(item[mm_schemas.EventFieldType.ENDPOINT_ID])
|
|
228
279
|
else:
|
|
229
|
-
uids.append(
|
|
230
|
-
item[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
|
|
231
|
-
)
|
|
280
|
+
uids.append(item[mm_schemas.EventFieldType.UID])
|
|
232
281
|
|
|
233
282
|
# Add each relevant model endpoint to the model endpoints list
|
|
234
283
|
for endpoint_id in uids:
|
|
@@ -239,29 +288,20 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
239
288
|
|
|
240
289
|
return endpoint_list
|
|
241
290
|
|
|
242
|
-
def delete_model_endpoints_resources(
|
|
243
|
-
self, endpoints: typing.List[typing.Dict[str, typing.Any]]
|
|
244
|
-
):
|
|
291
|
+
def delete_model_endpoints_resources(self):
|
|
245
292
|
"""
|
|
246
|
-
Delete all model endpoints resources in
|
|
247
|
-
|
|
248
|
-
:param endpoints: A list of model endpoints flattened dictionaries.
|
|
293
|
+
Delete all model endpoints resources in V3IO KV.
|
|
249
294
|
"""
|
|
250
295
|
|
|
296
|
+
endpoints = self.list_model_endpoints()
|
|
297
|
+
|
|
251
298
|
# Delete model endpoint record from KV table
|
|
252
299
|
for endpoint_dict in endpoints:
|
|
253
|
-
if
|
|
254
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
255
|
-
not in endpoint_dict
|
|
256
|
-
):
|
|
300
|
+
if mm_schemas.EventFieldType.UID not in endpoint_dict:
|
|
257
301
|
# This is kept for backwards compatibility - in old versions the key column named endpoint_id
|
|
258
|
-
endpoint_id = endpoint_dict[
|
|
259
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
|
|
260
|
-
]
|
|
302
|
+
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.ENDPOINT_ID]
|
|
261
303
|
else:
|
|
262
|
-
endpoint_id = endpoint_dict[
|
|
263
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.UID
|
|
264
|
-
]
|
|
304
|
+
endpoint_id = endpoint_dict[mm_schemas.EventFieldType.UID]
|
|
265
305
|
self.delete_model_endpoint(
|
|
266
306
|
endpoint_id,
|
|
267
307
|
)
|
|
@@ -284,119 +324,133 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
284
324
|
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
285
325
|
)
|
|
286
326
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# Generate the required tsdb paths
|
|
291
|
-
tsdb_path, filtered_path = self._generate_tsdb_paths()
|
|
327
|
+
@staticmethod
|
|
328
|
+
def _get_results_table_path(endpoint_id: str) -> str:
|
|
329
|
+
return endpoint_id
|
|
292
330
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
backend=mlrun.common.schemas.model_monitoring.TimeSeriesTarget.TSDB,
|
|
297
|
-
table=filtered_path,
|
|
298
|
-
)
|
|
299
|
-
except v3io_frames.errors.DeleteError as e:
|
|
300
|
-
if "No TSDB schema file found" not in str(e):
|
|
301
|
-
logger.warning(
|
|
302
|
-
f"Failed to delete TSDB table '{filtered_path}'",
|
|
303
|
-
err=mlrun.errors.err_to_str(e),
|
|
304
|
-
)
|
|
305
|
-
# Final cleanup of tsdb path
|
|
306
|
-
tsdb_path.replace("://u", ":///u")
|
|
307
|
-
store, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
|
|
308
|
-
store.rm(tsdb_path, recursive=True)
|
|
331
|
+
@staticmethod
|
|
332
|
+
def _get_metrics_table_path(endpoint_id: str) -> str:
|
|
333
|
+
return f"{endpoint_id}_metrics"
|
|
309
334
|
|
|
310
|
-
def
|
|
335
|
+
def write_application_event(
|
|
311
336
|
self,
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
end: str = "now",
|
|
316
|
-
access_key: str = None,
|
|
317
|
-
) -> typing.Dict[str, typing.List[typing.Tuple[str, float]]]:
|
|
337
|
+
event: dict[str, typing.Any],
|
|
338
|
+
kind: mm_schemas.WriterEventKind = mm_schemas.WriterEventKind.RESULT,
|
|
339
|
+
) -> None:
|
|
318
340
|
"""
|
|
319
|
-
|
|
320
|
-
`predictions_per_second` and `latency_avg_5m` but also custom metrics defined by the user.
|
|
341
|
+
Write a new application event in the target table.
|
|
321
342
|
|
|
322
|
-
:param
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
327
|
-
earliest time.
|
|
328
|
-
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
329
|
-
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
330
|
-
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the
|
|
331
|
-
earliest time.
|
|
332
|
-
:param access_key: V3IO access key that will be used for generating Frames client object. If not
|
|
333
|
-
provided, the access key will be retrieved from the environment variables.
|
|
334
|
-
|
|
335
|
-
:return: A dictionary of metrics in which the key is a metric name and the value is a list of tuples that
|
|
336
|
-
includes timestamps and the values.
|
|
343
|
+
:param event: An event dictionary that represents the application result, should be corresponded to the
|
|
344
|
+
schema defined in the :py:class:`~mlrun.common.schemas.model_monitoring.constants.WriterEvent`
|
|
345
|
+
object.
|
|
346
|
+
:param kind: The type of the event, can be either "result" or "metric".
|
|
337
347
|
"""
|
|
338
348
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
if
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
+
container = self.get_v3io_monitoring_apps_container(project_name=self.project)
|
|
350
|
+
endpoint_id = event.pop(mm_schemas.WriterEvent.ENDPOINT_ID)
|
|
351
|
+
|
|
352
|
+
if kind == mm_schemas.WriterEventKind.METRIC:
|
|
353
|
+
table_path = self._get_metrics_table_path(endpoint_id)
|
|
354
|
+
key = f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}.{event[mm_schemas.MetricData.METRIC_NAME]}"
|
|
355
|
+
attributes = {event_key: event[event_key] for event_key in _METRIC_FIELDS}
|
|
356
|
+
elif kind == mm_schemas.WriterEventKind.RESULT:
|
|
357
|
+
table_path = self._get_results_table_path(endpoint_id)
|
|
358
|
+
key = event.pop(mm_schemas.WriterEvent.APPLICATION_NAME)
|
|
359
|
+
metric_name = event.pop(mm_schemas.ResultData.RESULT_NAME)
|
|
360
|
+
attributes = {metric_name: json.dumps(event)}
|
|
361
|
+
else:
|
|
362
|
+
raise ValueError(f"Invalid {kind = }")
|
|
349
363
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
)
|
|
356
|
-
)
|
|
357
|
-
(
|
|
358
|
-
_,
|
|
359
|
-
container,
|
|
360
|
-
events_path,
|
|
361
|
-
) = mlrun.common.model_monitoring.helpers.parse_model_endpoint_store_prefix(
|
|
362
|
-
events_path
|
|
364
|
+
self.client.kv.update(
|
|
365
|
+
container=container,
|
|
366
|
+
table_path=table_path,
|
|
367
|
+
key=key,
|
|
368
|
+
attributes=attributes,
|
|
363
369
|
)
|
|
364
370
|
|
|
365
|
-
|
|
366
|
-
frames_client = mlrun.utils.v3io_clients.get_frames_client(
|
|
367
|
-
token=access_key,
|
|
368
|
-
address=mlrun.mlconf.v3io_framesd,
|
|
371
|
+
schema_file = self.client.kv.new_cursor(
|
|
369
372
|
container=container,
|
|
373
|
+
table_path=table_path,
|
|
374
|
+
filter_expression='__name==".#schema"',
|
|
370
375
|
)
|
|
371
376
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
377
|
+
if not schema_file.all():
|
|
378
|
+
logger.info(
|
|
379
|
+
"Generating a new V3IO KV schema file",
|
|
380
|
+
container=container,
|
|
381
|
+
table_path=table_path,
|
|
382
|
+
)
|
|
383
|
+
self._generate_kv_schema(
|
|
384
|
+
container=container, table_path=table_path, kind=kind
|
|
380
385
|
)
|
|
386
|
+
logger.info("Updated V3IO KV successfully", key=key)
|
|
387
|
+
|
|
388
|
+
def _generate_kv_schema(
|
|
389
|
+
self, *, container: str, table_path: str, kind: mm_schemas.WriterEventKind
|
|
390
|
+
) -> None:
|
|
391
|
+
"""Generate V3IO KV schema file which will be used by the model monitoring applications dashboard in Grafana."""
|
|
392
|
+
schema_params = _KIND_TO_SCHEMA_PARAMS[kind]
|
|
393
|
+
res = self.client.kv.create_schema(
|
|
394
|
+
container=container,
|
|
395
|
+
table_path=table_path,
|
|
396
|
+
key=schema_params.key,
|
|
397
|
+
fields=schema_params.fields,
|
|
398
|
+
)
|
|
399
|
+
if res.status_code != HTTPStatus.OK:
|
|
400
|
+
raise mlrun.errors.MLRunBadRequestError(
|
|
401
|
+
f"Couldn't infer schema for endpoint {table_path} which is required for Grafana dashboards"
|
|
402
|
+
)
|
|
403
|
+
else:
|
|
404
|
+
logger.info("Generated V3IO KV schema successfully", table_path=table_path)
|
|
405
|
+
|
|
406
|
+
def get_last_analyzed(self, endpoint_id: str, application_name: str) -> int:
|
|
407
|
+
"""
|
|
408
|
+
Get the last analyzed time for the provided model endpoint and application.
|
|
381
409
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
for metric in metrics:
|
|
385
|
-
metric_data = data_dict.get(metric)
|
|
386
|
-
if metric_data is None:
|
|
387
|
-
continue
|
|
410
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
411
|
+
:param application_name: Registered application name.
|
|
388
412
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
]
|
|
392
|
-
metrics_mapping[metric] = values
|
|
413
|
+
:return: Timestamp as a Unix time.
|
|
414
|
+
:raise: MLRunNotFoundError if last analyzed value is not found.
|
|
393
415
|
|
|
394
|
-
|
|
395
|
-
|
|
416
|
+
"""
|
|
417
|
+
try:
|
|
418
|
+
data = self.client.kv.get(
|
|
419
|
+
container=self._get_monitoring_schedules_container(
|
|
420
|
+
project_name=self.project
|
|
421
|
+
),
|
|
422
|
+
table_path=endpoint_id,
|
|
423
|
+
key=application_name,
|
|
424
|
+
)
|
|
425
|
+
return data.output.item[mm_schemas.SchedulingKeys.LAST_ANALYZED]
|
|
426
|
+
except v3io.dataplane.response.HttpResponseError as err:
|
|
427
|
+
logger.debug("Error while getting last analyzed time", err=err)
|
|
428
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
429
|
+
f"No last analyzed value has been found for {application_name} "
|
|
430
|
+
f"that processes model endpoint {endpoint_id}",
|
|
431
|
+
)
|
|
396
432
|
|
|
397
|
-
|
|
433
|
+
def update_last_analyzed(
|
|
434
|
+
self, endpoint_id: str, application_name: str, last_analyzed: int
|
|
435
|
+
):
|
|
436
|
+
"""
|
|
437
|
+
Update the last analyzed time for the provided model endpoint and application.
|
|
398
438
|
|
|
399
|
-
|
|
439
|
+
:param endpoint_id: The unique id of the model endpoint.
|
|
440
|
+
:param application_name: Registered application name.
|
|
441
|
+
:param last_analyzed: Timestamp as a Unix time that represents the last analyzed time of a certain
|
|
442
|
+
application and model endpoint.
|
|
443
|
+
"""
|
|
444
|
+
self.client.kv.put(
|
|
445
|
+
container=self._get_monitoring_schedules_container(
|
|
446
|
+
project_name=self.project
|
|
447
|
+
),
|
|
448
|
+
table_path=endpoint_id,
|
|
449
|
+
key=application_name,
|
|
450
|
+
attributes={mm_schemas.SchedulingKeys.LAST_ANALYZED: last_analyzed},
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
def _generate_tsdb_paths(self) -> tuple[str, str]:
|
|
400
454
|
"""Generate a short path to the TSDB resources and a filtered path for the frames object
|
|
401
455
|
:return: A tuple of:
|
|
402
456
|
[0] = Short path to the TSDB resources
|
|
@@ -406,7 +460,7 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
406
460
|
full_path = (
|
|
407
461
|
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
408
462
|
project=self.project,
|
|
409
|
-
kind=
|
|
463
|
+
kind=mm_schemas.ModelMonitoringStoreKinds.EVENTS,
|
|
410
464
|
)
|
|
411
465
|
)
|
|
412
466
|
|
|
@@ -455,7 +509,7 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
455
509
|
project: str,
|
|
456
510
|
function: str = None,
|
|
457
511
|
model: str = None,
|
|
458
|
-
labels:
|
|
512
|
+
labels: list[str] = None,
|
|
459
513
|
top_level: bool = False,
|
|
460
514
|
) -> str:
|
|
461
515
|
"""
|
|
@@ -502,8 +556,8 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
502
556
|
# Apply top_level filter (remove endpoints that considered a child of a router)
|
|
503
557
|
if top_level:
|
|
504
558
|
filter_expression.append(
|
|
505
|
-
f"(endpoint_type=='{str(
|
|
506
|
-
f"OR endpoint_type=='{str(
|
|
559
|
+
f"(endpoint_type=='{str(mm_schemas.EndpointType.NODE_EP.value)}' "
|
|
560
|
+
f"OR endpoint_type=='{str(mm_schemas.EndpointType.ROUTER.value)}')"
|
|
507
561
|
)
|
|
508
562
|
|
|
509
563
|
return " AND ".join(filter_expression)
|
|
@@ -523,41 +577,31 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
523
577
|
# Validate default value for `error_count`
|
|
524
578
|
# For backwards compatibility reasons, we validate that the model endpoint includes the `error_count` key
|
|
525
579
|
if (
|
|
526
|
-
|
|
527
|
-
and endpoint[
|
|
528
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT
|
|
529
|
-
]
|
|
530
|
-
== "null"
|
|
580
|
+
mm_schemas.EventFieldType.ERROR_COUNT in endpoint
|
|
581
|
+
and endpoint[mm_schemas.EventFieldType.ERROR_COUNT] == "null"
|
|
531
582
|
):
|
|
532
|
-
endpoint[
|
|
533
|
-
mlrun.common.schemas.model_monitoring.EventFieldType.ERROR_COUNT
|
|
534
|
-
] = "0"
|
|
583
|
+
endpoint[mm_schemas.EventFieldType.ERROR_COUNT] = "0"
|
|
535
584
|
|
|
536
585
|
# Validate default value for `metrics`
|
|
537
586
|
# For backwards compatibility reasons, we validate that the model endpoint includes the `metrics` key
|
|
538
587
|
if (
|
|
539
|
-
|
|
540
|
-
and endpoint[
|
|
541
|
-
== "null"
|
|
588
|
+
mm_schemas.EventFieldType.METRICS in endpoint
|
|
589
|
+
and endpoint[mm_schemas.EventFieldType.METRICS] == "null"
|
|
542
590
|
):
|
|
543
|
-
endpoint[
|
|
544
|
-
|
|
545
|
-
{
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
|
|
549
|
-
}
|
|
591
|
+
endpoint[mm_schemas.EventFieldType.METRICS] = json.dumps(
|
|
592
|
+
{
|
|
593
|
+
mm_schemas.EventKeyMetrics.GENERIC: {
|
|
594
|
+
mm_schemas.EventLiveStats.LATENCY_AVG_1H: 0,
|
|
595
|
+
mm_schemas.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
|
|
550
596
|
}
|
|
551
|
-
|
|
597
|
+
}
|
|
552
598
|
)
|
|
553
599
|
# Validate key `uid` instead of `endpoint_id`
|
|
554
600
|
# For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
|
|
555
|
-
if
|
|
556
|
-
endpoint[
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
]
|
|
560
|
-
)
|
|
601
|
+
if mm_schemas.EventFieldType.ENDPOINT_ID in endpoint:
|
|
602
|
+
endpoint[mm_schemas.EventFieldType.UID] = endpoint[
|
|
603
|
+
mm_schemas.EventFieldType.ENDPOINT_ID
|
|
604
|
+
]
|
|
561
605
|
|
|
562
606
|
@staticmethod
|
|
563
607
|
def _encode_field(field: typing.Union[str, bytes]) -> bytes:
|
|
@@ -574,3 +618,104 @@ class KVModelEndpointStore(ModelEndpointStore):
|
|
|
574
618
|
if isinstance(field, bytes):
|
|
575
619
|
return field.decode()
|
|
576
620
|
return field
|
|
621
|
+
|
|
622
|
+
@staticmethod
|
|
623
|
+
def get_v3io_monitoring_apps_container(project_name: str) -> str:
|
|
624
|
+
return f"users/pipelines/{project_name}/monitoring-apps"
|
|
625
|
+
|
|
626
|
+
@staticmethod
|
|
627
|
+
def _get_monitoring_schedules_container(project_name: str) -> str:
|
|
628
|
+
return f"users/pipelines/{project_name}/monitoring-schedules/functions"
|
|
629
|
+
|
|
630
|
+
def _extract_results_from_items(
|
|
631
|
+
self, app_items: list[dict[str, str]]
|
|
632
|
+
) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
633
|
+
"""Assuming .#schema items are filtered out"""
|
|
634
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
|
|
635
|
+
for app_item in app_items:
|
|
636
|
+
app_name = app_item.pop("__name")
|
|
637
|
+
for result_name in app_item:
|
|
638
|
+
metrics.append(
|
|
639
|
+
mm_schemas.ModelEndpointMonitoringMetric(
|
|
640
|
+
project=self.project,
|
|
641
|
+
app=app_name,
|
|
642
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.RESULT,
|
|
643
|
+
name=result_name,
|
|
644
|
+
full_name=mm_schemas.model_endpoints._compose_full_name(
|
|
645
|
+
project=self.project, app=app_name, name=result_name
|
|
646
|
+
),
|
|
647
|
+
)
|
|
648
|
+
)
|
|
649
|
+
return metrics
|
|
650
|
+
|
|
651
|
+
def _extract_metrics_from_items(
|
|
652
|
+
self, result_items: list[dict[str, str]]
|
|
653
|
+
) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
654
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
|
|
655
|
+
logger.debug("Result items", result_items=result_items)
|
|
656
|
+
for result_item in result_items:
|
|
657
|
+
app = result_item[mm_schemas.WriterEvent.APPLICATION_NAME]
|
|
658
|
+
name = result_item[mm_schemas.MetricData.METRIC_NAME]
|
|
659
|
+
metrics.append(
|
|
660
|
+
mm_schemas.ModelEndpointMonitoringMetric(
|
|
661
|
+
project=self.project,
|
|
662
|
+
app=app,
|
|
663
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
|
|
664
|
+
name=name,
|
|
665
|
+
full_name=mm_schemas.model_endpoints._compose_full_name(
|
|
666
|
+
project=self.project,
|
|
667
|
+
app=app,
|
|
668
|
+
name=name,
|
|
669
|
+
type=mm_schemas.ModelEndpointMonitoringMetricType.METRIC,
|
|
670
|
+
),
|
|
671
|
+
)
|
|
672
|
+
)
|
|
673
|
+
return metrics
|
|
674
|
+
|
|
675
|
+
def get_model_endpoint_metrics(
|
|
676
|
+
self, endpoint_id: str, type: mm_schemas.ModelEndpointMonitoringMetricType
|
|
677
|
+
) -> list[mm_schemas.ModelEndpointMonitoringMetric]:
|
|
678
|
+
"""Get model monitoring results and metrics on the endpoint"""
|
|
679
|
+
metrics: list[mm_schemas.ModelEndpointMonitoringMetric] = []
|
|
680
|
+
container = self.get_v3io_monitoring_apps_container(self.project)
|
|
681
|
+
if type == mm_schemas.ModelEndpointMonitoringMetricType.METRIC:
|
|
682
|
+
table_path = self._get_metrics_table_path(endpoint_id)
|
|
683
|
+
items_extractor = self._extract_metrics_from_items
|
|
684
|
+
elif type == mm_schemas.ModelEndpointMonitoringMetricType.RESULT:
|
|
685
|
+
table_path = self._get_results_table_path(endpoint_id)
|
|
686
|
+
items_extractor = self._extract_results_from_items
|
|
687
|
+
else:
|
|
688
|
+
raise ValueError(f"Invalid metric {type = }")
|
|
689
|
+
|
|
690
|
+
def scan(
|
|
691
|
+
marker: typing.Optional[str] = None,
|
|
692
|
+
) -> v3io.dataplane.response.Response:
|
|
693
|
+
# TODO: Use AIO client: `v3io.aio.dataplane.client.Client`
|
|
694
|
+
return self.client.kv.scan(
|
|
695
|
+
container=container,
|
|
696
|
+
table_path=table_path,
|
|
697
|
+
marker=marker,
|
|
698
|
+
filter_expression=_EXCLUDE_SCHEMA_FILTER_EXPRESSION,
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
try:
|
|
702
|
+
response = scan()
|
|
703
|
+
except v3io.dataplane.response.HttpResponseError as err:
|
|
704
|
+
if err.status_code == HTTPStatus.NOT_FOUND:
|
|
705
|
+
logger.warning(
|
|
706
|
+
f"Attempt getting {type}s - no data. Check the "
|
|
707
|
+
"project name, endpoint, or wait for the applications to start.",
|
|
708
|
+
container=container,
|
|
709
|
+
table_path=table_path,
|
|
710
|
+
)
|
|
711
|
+
return []
|
|
712
|
+
raise
|
|
713
|
+
|
|
714
|
+
while True:
|
|
715
|
+
output = typing.cast(v3io.dataplane.output.GetItemsOutput, response.output)
|
|
716
|
+
metrics.extend(items_extractor(output.items))
|
|
717
|
+
if output.last:
|
|
718
|
+
break
|
|
719
|
+
response = scan(marker=output.next_marker)
|
|
720
|
+
|
|
721
|
+
return metrics
|