mlrun 1.7.0rc48__py3-none-any.whl → 1.7.0rc52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/formatters/run.py +3 -0
- mlrun/common/schemas/auth.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +0 -7
- mlrun/common/schemas/workflow.py +9 -2
- mlrun/data_types/data_types.py +1 -1
- mlrun/db/httpdb.py +11 -4
- mlrun/execution.py +37 -6
- mlrun/feature_store/retrieval/spark_merger.py +0 -4
- mlrun/model.py +17 -0
- mlrun/model_monitoring/api.py +1 -12
- mlrun/model_monitoring/applications/__init__.py +1 -2
- mlrun/model_monitoring/applications/base.py +2 -182
- mlrun/model_monitoring/applications/context.py +2 -9
- mlrun/model_monitoring/applications/evidently_base.py +0 -74
- mlrun/model_monitoring/applications/histogram_data_drift.py +2 -2
- mlrun/model_monitoring/controller.py +45 -208
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +10 -9
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +38 -29
- mlrun/projects/operations.py +11 -8
- mlrun/projects/pipelines.py +16 -11
- mlrun/projects/project.py +1 -4
- mlrun/runtimes/nuclio/api_gateway.py +6 -0
- mlrun/utils/helpers.py +40 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc52.dist-info}/METADATA +107 -25
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc52.dist-info}/RECORD +30 -32
- mlrun/model_monitoring/application.py +0 -19
- mlrun/model_monitoring/evidently_application.py +0 -20
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc52.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc52.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc52.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc48.dist-info → mlrun-1.7.0rc52.dist-info}/top_level.txt +0 -0
|
@@ -15,28 +15,22 @@
|
|
|
15
15
|
import concurrent.futures
|
|
16
16
|
import datetime
|
|
17
17
|
import json
|
|
18
|
-
import multiprocessing
|
|
19
18
|
import os
|
|
20
19
|
import re
|
|
21
20
|
from collections.abc import Iterator
|
|
22
|
-
from typing import
|
|
21
|
+
from typing import NamedTuple, Optional, Union, cast
|
|
23
22
|
|
|
24
23
|
import nuclio
|
|
25
24
|
|
|
26
25
|
import mlrun
|
|
27
26
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
28
27
|
import mlrun.data_types.infer
|
|
29
|
-
import mlrun.feature_store as fstore
|
|
30
28
|
import mlrun.model_monitoring.db.stores
|
|
31
|
-
from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_hist
|
|
32
29
|
from mlrun.datastore import get_stream_pusher
|
|
33
|
-
from mlrun.datastore.targets import ParquetTarget
|
|
34
30
|
from mlrun.errors import err_to_str
|
|
35
31
|
from mlrun.model_monitoring.helpers import (
|
|
36
32
|
_BatchDict,
|
|
37
33
|
batch_dict2timedelta,
|
|
38
|
-
calculate_inputs_statistics,
|
|
39
|
-
get_monitoring_parquet_path,
|
|
40
34
|
get_stream_path,
|
|
41
35
|
)
|
|
42
36
|
from mlrun.utils import datetime_now, logger
|
|
@@ -292,15 +286,9 @@ class MonitoringApplicationController:
|
|
|
292
286
|
)
|
|
293
287
|
|
|
294
288
|
self.model_monitoring_access_key = self._get_model_monitoring_access_key()
|
|
295
|
-
self.
|
|
296
|
-
self.
|
|
297
|
-
kind=mm_constants.FileTargetKind.APPS_PARQUET,
|
|
289
|
+
self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
|
|
290
|
+
project=self.project
|
|
298
291
|
)
|
|
299
|
-
self.storage_options = None
|
|
300
|
-
if not mlrun.mlconf.is_ce_mode():
|
|
301
|
-
self._initialize_v3io_configurations()
|
|
302
|
-
elif self.parquet_directory.startswith("s3://"):
|
|
303
|
-
self.storage_options = mlrun.mlconf.get_s3_storage_options()
|
|
304
292
|
|
|
305
293
|
@staticmethod
|
|
306
294
|
def _get_model_monitoring_access_key() -> Optional[str]:
|
|
@@ -310,12 +298,6 @@ class MonitoringApplicationController:
|
|
|
310
298
|
access_key = mlrun.mlconf.get_v3io_access_key()
|
|
311
299
|
return access_key
|
|
312
300
|
|
|
313
|
-
def _initialize_v3io_configurations(self) -> None:
|
|
314
|
-
self.storage_options = dict(
|
|
315
|
-
v3io_access_key=self.model_monitoring_access_key,
|
|
316
|
-
v3io_api=mlrun.mlconf.v3io_api,
|
|
317
|
-
)
|
|
318
|
-
|
|
319
301
|
def run(self) -> None:
|
|
320
302
|
"""
|
|
321
303
|
Main method for run all the relevant monitoring applications on each endpoint.
|
|
@@ -367,11 +349,8 @@ class MonitoringApplicationController:
|
|
|
367
349
|
)
|
|
368
350
|
return
|
|
369
351
|
# Initialize a process pool that will be used to run each endpoint applications on a dedicated process
|
|
370
|
-
with concurrent.futures.
|
|
352
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
371
353
|
max_workers=min(len(endpoints), 10),
|
|
372
|
-
# On Linux, the default is "fork" (this is set to change in Python 3.14), which inherits the current heap
|
|
373
|
-
# and resources (such as sockets), which is not what we want (ML-7160)
|
|
374
|
-
mp_context=multiprocessing.get_context("spawn"),
|
|
375
354
|
) as pool:
|
|
376
355
|
for endpoint in endpoints:
|
|
377
356
|
if (
|
|
@@ -395,13 +374,10 @@ class MonitoringApplicationController:
|
|
|
395
374
|
applications_names=applications_names,
|
|
396
375
|
batch_window_generator=self._batch_window_generator,
|
|
397
376
|
project=self.project,
|
|
398
|
-
parquet_directory=self.parquet_directory,
|
|
399
|
-
storage_options=self.storage_options,
|
|
400
377
|
model_monitoring_access_key=self.model_monitoring_access_key,
|
|
378
|
+
tsdb_connector=self.tsdb_connector,
|
|
401
379
|
)
|
|
402
380
|
|
|
403
|
-
self._delete_old_parquet(endpoints=endpoints)
|
|
404
|
-
|
|
405
381
|
@classmethod
|
|
406
382
|
def model_endpoint_process(
|
|
407
383
|
cls,
|
|
@@ -409,9 +385,8 @@ class MonitoringApplicationController:
|
|
|
409
385
|
applications_names: list[str],
|
|
410
386
|
batch_window_generator: _BatchWindowGenerator,
|
|
411
387
|
project: str,
|
|
412
|
-
parquet_directory: str,
|
|
413
|
-
storage_options: dict,
|
|
414
388
|
model_monitoring_access_key: str,
|
|
389
|
+
tsdb_connector: mlrun.model_monitoring.db.tsdb.TSDBConnector,
|
|
415
390
|
) -> None:
|
|
416
391
|
"""
|
|
417
392
|
Process a model endpoint and trigger the monitoring applications. This function running on different process
|
|
@@ -422,16 +397,13 @@ class MonitoringApplicationController:
|
|
|
422
397
|
:param applications_names: (list[str]) List of application names to push results to.
|
|
423
398
|
:param batch_window_generator: (_BatchWindowGenerator) An object that generates _BatchWindow objects.
|
|
424
399
|
:param project: (str) Project name.
|
|
425
|
-
:param parquet_directory: (str) Directory to store application parquet files
|
|
426
|
-
:param storage_options: (dict) Storage options for writing ParquetTarget.
|
|
427
400
|
:param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
|
|
401
|
+
:param tsdb_connector: (mlrun.model_monitoring.db.tsdb.TSDBConnector) TSDB connector
|
|
428
402
|
"""
|
|
429
403
|
endpoint_id = endpoint[mm_constants.EventFieldType.UID]
|
|
404
|
+
# if false the endpoint represent batch infer step.
|
|
405
|
+
has_stream = endpoint[mm_constants.EventFieldType.STREAM_PATH] != ""
|
|
430
406
|
try:
|
|
431
|
-
m_fs = fstore.get_feature_set(
|
|
432
|
-
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
433
|
-
)
|
|
434
|
-
|
|
435
407
|
for application in applications_names:
|
|
436
408
|
batch_window = batch_window_generator.get_batch_window(
|
|
437
409
|
project=project,
|
|
@@ -439,158 +411,70 @@ class MonitoringApplicationController:
|
|
|
439
411
|
application=application,
|
|
440
412
|
first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
|
|
441
413
|
last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
|
|
442
|
-
has_stream=
|
|
414
|
+
has_stream=has_stream,
|
|
443
415
|
)
|
|
444
416
|
|
|
445
417
|
for start_infer_time, end_infer_time in batch_window.get_intervals():
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
418
|
+
prediction_metric = tsdb_connector.read_predictions(
|
|
419
|
+
endpoint_id=endpoint_id,
|
|
420
|
+
start=start_infer_time,
|
|
421
|
+
end=end_infer_time,
|
|
422
|
+
)
|
|
423
|
+
if not prediction_metric.data and has_stream:
|
|
424
|
+
logger.info(
|
|
425
|
+
"No data found for the given interval",
|
|
426
|
+
start=start_infer_time,
|
|
427
|
+
end=end_infer_time,
|
|
428
|
+
endpoint_id=endpoint_id,
|
|
429
|
+
)
|
|
430
|
+
else:
|
|
431
|
+
logger.info(
|
|
432
|
+
"Data found for the given interval",
|
|
433
|
+
start=start_infer_time,
|
|
434
|
+
end=end_infer_time,
|
|
451
435
|
endpoint_id=endpoint_id,
|
|
436
|
+
)
|
|
437
|
+
cls._push_to_applications(
|
|
452
438
|
start_infer_time=start_infer_time,
|
|
453
439
|
end_infer_time=end_infer_time,
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
df = offline_response.to_dataframe()
|
|
460
|
-
parquet_target_path = offline_response.vector.get_target_path()
|
|
461
|
-
|
|
462
|
-
if len(df) == 0:
|
|
463
|
-
logger.info(
|
|
464
|
-
"During this time window, the endpoint has not received any data",
|
|
465
|
-
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
466
|
-
start_time=start_infer_time,
|
|
467
|
-
end_time=end_infer_time,
|
|
468
|
-
)
|
|
469
|
-
continue
|
|
470
|
-
|
|
471
|
-
except FileNotFoundError:
|
|
472
|
-
logger.warn(
|
|
473
|
-
"No parquets were written yet",
|
|
474
|
-
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
440
|
+
endpoint_id=endpoint_id,
|
|
441
|
+
project=project,
|
|
442
|
+
applications_names=[application],
|
|
443
|
+
model_monitoring_access_key=model_monitoring_access_key,
|
|
475
444
|
)
|
|
476
|
-
continue
|
|
477
|
-
|
|
478
|
-
# Get the timestamp of the latest request:
|
|
479
|
-
latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
|
|
480
|
-
|
|
481
|
-
# Get the feature stats from the model endpoint for reference data
|
|
482
|
-
feature_stats = json.loads(
|
|
483
|
-
endpoint[mm_constants.EventFieldType.FEATURE_STATS]
|
|
484
|
-
)
|
|
485
|
-
|
|
486
|
-
# Pad the original feature stats to accommodate current
|
|
487
|
-
# data out of the original range (unless already padded)
|
|
488
|
-
pad_features_hist(FeatureStats(feature_stats))
|
|
489
|
-
|
|
490
|
-
# Get the current stats:
|
|
491
|
-
current_stats = calculate_inputs_statistics(
|
|
492
|
-
sample_set_statistics=feature_stats, inputs=df
|
|
493
|
-
)
|
|
494
|
-
# end - TODO : delete in 1.9.0 (V1 app deprecation)
|
|
495
|
-
cls._push_to_applications(
|
|
496
|
-
current_stats=current_stats,
|
|
497
|
-
feature_stats=feature_stats,
|
|
498
|
-
start_infer_time=start_infer_time,
|
|
499
|
-
end_infer_time=end_infer_time,
|
|
500
|
-
endpoint_id=endpoint_id,
|
|
501
|
-
latest_request=latest_request,
|
|
502
|
-
project=project,
|
|
503
|
-
applications_names=[application],
|
|
504
|
-
model_monitoring_access_key=model_monitoring_access_key,
|
|
505
|
-
parquet_target_path=parquet_target_path,
|
|
506
|
-
)
|
|
507
445
|
except Exception:
|
|
508
446
|
logger.exception(
|
|
509
447
|
"Encountered an exception",
|
|
510
448
|
endpoint_id=endpoint[mm_constants.EventFieldType.UID],
|
|
511
449
|
)
|
|
512
450
|
|
|
513
|
-
def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
|
|
514
|
-
"""
|
|
515
|
-
Delete application parquets older than the argument days.
|
|
516
|
-
|
|
517
|
-
:param endpoints: A list of dictionaries of model endpoints records.
|
|
518
|
-
"""
|
|
519
|
-
if self.parquet_directory.startswith("v3io:///"):
|
|
520
|
-
# create fs with access to the user side (under projects)
|
|
521
|
-
store, _, _ = mlrun.store_manager.get_or_create_store(
|
|
522
|
-
self.parquet_directory,
|
|
523
|
-
{"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
|
|
524
|
-
)
|
|
525
|
-
fs = store.filesystem
|
|
526
|
-
|
|
527
|
-
# calculate time threshold (keep only files from the last 24 hours)
|
|
528
|
-
time_to_keep = (
|
|
529
|
-
datetime.datetime.now(tz=datetime.timezone.utc)
|
|
530
|
-
- datetime.timedelta(days=days)
|
|
531
|
-
).timestamp()
|
|
532
|
-
|
|
533
|
-
for endpoint in endpoints:
|
|
534
|
-
try:
|
|
535
|
-
apps_parquet_directories = fs.listdir(
|
|
536
|
-
path=f"{self.parquet_directory}"
|
|
537
|
-
f"/key={endpoint[mm_constants.EventFieldType.UID]}"
|
|
538
|
-
)
|
|
539
|
-
for directory in apps_parquet_directories:
|
|
540
|
-
if directory["mtime"] < time_to_keep:
|
|
541
|
-
# Delete files
|
|
542
|
-
fs.rm(path=directory["name"], recursive=True)
|
|
543
|
-
# Delete directory
|
|
544
|
-
fs.rmdir(path=directory["name"])
|
|
545
|
-
except FileNotFoundError:
|
|
546
|
-
logger.info(
|
|
547
|
-
"Application parquet directory is empty, "
|
|
548
|
-
"probably parquets have not yet been created for this app",
|
|
549
|
-
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
550
|
-
path=f"{self.parquet_directory}"
|
|
551
|
-
f"/key={endpoint[mm_constants.EventFieldType.UID]}",
|
|
552
|
-
)
|
|
553
|
-
|
|
554
451
|
@staticmethod
|
|
555
452
|
def _push_to_applications(
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
project,
|
|
563
|
-
applications_names,
|
|
564
|
-
model_monitoring_access_key,
|
|
565
|
-
parquet_target_path,
|
|
453
|
+
start_infer_time: datetime.datetime,
|
|
454
|
+
end_infer_time: datetime.datetime,
|
|
455
|
+
endpoint_id: str,
|
|
456
|
+
project: str,
|
|
457
|
+
applications_names: list[str],
|
|
458
|
+
model_monitoring_access_key: str,
|
|
566
459
|
):
|
|
567
460
|
"""
|
|
568
461
|
Pushes data to multiple stream applications.
|
|
569
462
|
|
|
570
|
-
:param
|
|
571
|
-
:param
|
|
572
|
-
:param
|
|
573
|
-
:param
|
|
574
|
-
:param
|
|
575
|
-
:param
|
|
576
|
-
:param project: mlrun Project name.
|
|
577
|
-
:param applications_names: List of application names to which data will be pushed.
|
|
463
|
+
:param start_infer_time: The beginning of the infer interval window.
|
|
464
|
+
:param end_infer_time: The end of the infer interval window.
|
|
465
|
+
:param endpoint_id: Identifier for the model endpoint.
|
|
466
|
+
:param project: mlrun Project name.
|
|
467
|
+
:param applications_names: List of application names to which data will be pushed.
|
|
468
|
+
:param model_monitoring_access_key: Access key to apply the model monitoring process.
|
|
578
469
|
|
|
579
470
|
"""
|
|
580
|
-
|
|
581
471
|
data = {
|
|
582
|
-
mm_constants.ApplicationEvent.CURRENT_STATS: json.dumps(current_stats),
|
|
583
|
-
mm_constants.ApplicationEvent.FEATURE_STATS: json.dumps(feature_stats),
|
|
584
|
-
mm_constants.ApplicationEvent.SAMPLE_PARQUET_PATH: parquet_target_path,
|
|
585
472
|
mm_constants.ApplicationEvent.START_INFER_TIME: start_infer_time.isoformat(
|
|
586
473
|
sep=" ", timespec="microseconds"
|
|
587
474
|
),
|
|
588
475
|
mm_constants.ApplicationEvent.END_INFER_TIME: end_infer_time.isoformat(
|
|
589
476
|
sep=" ", timespec="microseconds"
|
|
590
477
|
),
|
|
591
|
-
mm_constants.ApplicationEvent.LAST_REQUEST: latest_request.isoformat(
|
|
592
|
-
sep=" ", timespec="microseconds"
|
|
593
|
-
),
|
|
594
478
|
mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
|
|
595
479
|
mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
|
|
596
480
|
project=project,
|
|
@@ -608,53 +492,6 @@ class MonitoringApplicationController:
|
|
|
608
492
|
[data]
|
|
609
493
|
)
|
|
610
494
|
|
|
611
|
-
@staticmethod
|
|
612
|
-
def _get_sample_df(
|
|
613
|
-
feature_set: mlrun.common.schemas.FeatureSet,
|
|
614
|
-
endpoint_id: str,
|
|
615
|
-
start_infer_time: datetime.datetime,
|
|
616
|
-
end_infer_time: datetime.datetime,
|
|
617
|
-
parquet_directory: str,
|
|
618
|
-
storage_options: dict,
|
|
619
|
-
application_name: str,
|
|
620
|
-
) -> mlrun.feature_store.OfflineVectorResponse:
|
|
621
|
-
"""
|
|
622
|
-
Retrieves a sample DataFrame of the current input according to the provided infer interval window.
|
|
623
|
-
|
|
624
|
-
:param feature_set: The main feature set.
|
|
625
|
-
:param endpoint_id: Identifier for the model endpoint.
|
|
626
|
-
:param start_infer_time: The beginning of the infer interval window.
|
|
627
|
-
:param end_infer_time: The end of the infer interval window.
|
|
628
|
-
:param parquet_directory: Directory where Parquet files are stored.
|
|
629
|
-
:param storage_options: Storage options for accessing the data.
|
|
630
|
-
:param application_name: Current application name.
|
|
631
|
-
|
|
632
|
-
:return: OfflineVectorResponse that can be used for generating a sample DataFrame for the specified endpoint.
|
|
633
|
-
|
|
634
|
-
"""
|
|
635
|
-
features = [f"{feature_set.metadata.name}.*"]
|
|
636
|
-
vector = fstore.FeatureVector(
|
|
637
|
-
name=f"{endpoint_id}_vector",
|
|
638
|
-
features=features,
|
|
639
|
-
with_indexes=True,
|
|
640
|
-
)
|
|
641
|
-
vector.metadata.tag = application_name
|
|
642
|
-
vector.feature_set_objects = {feature_set.metadata.name: feature_set}
|
|
643
|
-
|
|
644
|
-
# get offline features based on application start and end time.
|
|
645
|
-
# store the result parquet by partitioning by controller end processing time
|
|
646
|
-
offline_response = vector.get_offline_features(
|
|
647
|
-
start_time=start_infer_time,
|
|
648
|
-
end_time=end_infer_time,
|
|
649
|
-
timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
|
|
650
|
-
target=ParquetTarget(
|
|
651
|
-
path=parquet_directory
|
|
652
|
-
+ f"/key={endpoint_id}/{int(start_infer_time.timestamp())}/{application_name}.parquet",
|
|
653
|
-
storage_options=storage_options,
|
|
654
|
-
),
|
|
655
|
-
)
|
|
656
|
-
return offline_response
|
|
657
|
-
|
|
658
495
|
|
|
659
496
|
def handler(context: nuclio.Context, event: nuclio.Event) -> None:
|
|
660
497
|
"""
|
|
@@ -107,25 +107,26 @@ class TDEngineSchema:
|
|
|
107
107
|
)
|
|
108
108
|
return f"CREATE TABLE if NOT EXISTS {self.database}.{subtable} USING {self.super_table} TAGS ({tags});"
|
|
109
109
|
|
|
110
|
+
@staticmethod
|
|
110
111
|
def _insert_subtable_stmt(
|
|
111
|
-
|
|
112
|
-
|
|
112
|
+
statement: taosws.TaosStmt,
|
|
113
|
+
columns: dict[str, _TDEngineColumn],
|
|
113
114
|
subtable: str,
|
|
114
115
|
values: dict[str, Union[str, int, float, datetime.datetime]],
|
|
115
116
|
) -> taosws.TaosStmt:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
stmt.set_tbname(subtable)
|
|
117
|
+
question_marks = ", ".join("?" * len(columns))
|
|
118
|
+
statement.prepare(f"INSERT INTO ? VALUES ({question_marks});")
|
|
119
|
+
statement.set_tbname(subtable)
|
|
120
120
|
|
|
121
121
|
bind_params = []
|
|
122
122
|
|
|
123
|
-
for col_name, col_type in
|
|
123
|
+
for col_name, col_type in columns.items():
|
|
124
124
|
val = values[col_name]
|
|
125
125
|
bind_params.append(values_to_column([val], col_type))
|
|
126
126
|
|
|
127
|
-
|
|
128
|
-
|
|
127
|
+
statement.bind_param(bind_params)
|
|
128
|
+
statement.add_batch()
|
|
129
|
+
return statement
|
|
129
130
|
|
|
130
131
|
def _delete_subtable_query(
|
|
131
132
|
self,
|
|
@@ -18,11 +18,16 @@ from typing import Union
|
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import taosws
|
|
21
|
+
from taoswswrap.tdengine_connection import (
|
|
22
|
+
Statement,
|
|
23
|
+
TDEngineConnection,
|
|
24
|
+
)
|
|
21
25
|
|
|
22
26
|
import mlrun.common.schemas.model_monitoring as mm_schemas
|
|
23
27
|
import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
|
|
24
28
|
import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
|
|
25
29
|
from mlrun.model_monitoring.db import TSDBConnector
|
|
30
|
+
from mlrun.model_monitoring.db.tsdb.tdengine.schemas import TDEngineSchema
|
|
26
31
|
from mlrun.model_monitoring.helpers import get_invocations_fqn
|
|
27
32
|
from mlrun.utils import logger
|
|
28
33
|
|
|
@@ -52,25 +57,18 @@ class TDEngineConnector(TSDBConnector):
|
|
|
52
57
|
self._init_super_tables()
|
|
53
58
|
|
|
54
59
|
@property
|
|
55
|
-
def connection(self) ->
|
|
60
|
+
def connection(self) -> TDEngineConnection:
|
|
56
61
|
if not self._connection:
|
|
57
62
|
self._connection = self._create_connection()
|
|
58
63
|
return self._connection
|
|
59
64
|
|
|
60
|
-
def _create_connection(self) ->
|
|
65
|
+
def _create_connection(self) -> TDEngineConnection:
|
|
61
66
|
"""Establish a connection to the TSDB server."""
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
pass
|
|
68
|
-
try:
|
|
69
|
-
conn.execute(f"USE {self.database}")
|
|
70
|
-
except taosws.QueryError as e:
|
|
71
|
-
raise mlrun.errors.MLRunTSDBConnectionFailureError(
|
|
72
|
-
f"Failed to use TDEngine database {self.database}, {mlrun.errors.err_to_str(e)}"
|
|
73
|
-
)
|
|
67
|
+
logger.debug("Creating a new connection to TDEngine", project=self.project)
|
|
68
|
+
conn = TDEngineConnection(self._tdengine_connection_string)
|
|
69
|
+
conn.run(statements=f"CREATE DATABASE IF NOT EXISTS {self.database}")
|
|
70
|
+
conn.prefix_statements = [f"USE {self.database}"]
|
|
71
|
+
logger.debug("Connected to TDEngine", project=self.project)
|
|
74
72
|
return conn
|
|
75
73
|
|
|
76
74
|
def _init_super_tables(self):
|
|
@@ -91,7 +89,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
91
89
|
"""Create TDEngine supertables."""
|
|
92
90
|
for table in self.tables:
|
|
93
91
|
create_table_query = self.tables[table]._create_super_table_query()
|
|
94
|
-
self.connection.
|
|
92
|
+
self.connection.run(statements=create_table_query)
|
|
95
93
|
|
|
96
94
|
def write_application_event(
|
|
97
95
|
self,
|
|
@@ -137,13 +135,18 @@ class TDEngineConnector(TSDBConnector):
|
|
|
137
135
|
)
|
|
138
136
|
|
|
139
137
|
create_table_sql = table._create_subtable_sql(subtable=table_name, values=event)
|
|
140
|
-
self.connection.execute(create_table_sql)
|
|
141
138
|
|
|
142
|
-
insert_statement =
|
|
143
|
-
|
|
139
|
+
insert_statement = Statement(
|
|
140
|
+
TDEngineSchema._insert_subtable_stmt,
|
|
141
|
+
dict(columns=table.columns, subtable=table_name, values=event),
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
self.connection.run(
|
|
145
|
+
statements=[
|
|
146
|
+
create_table_sql,
|
|
147
|
+
insert_statement,
|
|
148
|
+
]
|
|
144
149
|
)
|
|
145
|
-
insert_statement.add_batch()
|
|
146
|
-
insert_statement.execute()
|
|
147
150
|
|
|
148
151
|
@staticmethod
|
|
149
152
|
def _convert_to_datetime(val: typing.Union[str, datetime]) -> datetime:
|
|
@@ -200,18 +203,24 @@ class TDEngineConnector(TSDBConnector):
|
|
|
200
203
|
"""
|
|
201
204
|
Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
|
|
202
205
|
"""
|
|
206
|
+
logger.debug(
|
|
207
|
+
"Deleting all project resources using the TDEngine connector",
|
|
208
|
+
project=self.project,
|
|
209
|
+
)
|
|
203
210
|
for table in self.tables:
|
|
204
211
|
get_subtable_names_query = self.tables[table]._get_subtables_query(
|
|
205
212
|
values={mm_schemas.EventFieldType.PROJECT: self.project}
|
|
206
213
|
)
|
|
207
|
-
subtables = self.connection.query
|
|
214
|
+
subtables = self.connection.run(query=get_subtable_names_query).data
|
|
215
|
+
drop_statements = []
|
|
208
216
|
for subtable in subtables:
|
|
209
|
-
|
|
210
|
-
subtable=subtable[0]
|
|
217
|
+
drop_statements.append(
|
|
218
|
+
self.tables[table]._drop_subtable_query(subtable=subtable[0])
|
|
211
219
|
)
|
|
212
|
-
|
|
213
|
-
logger.
|
|
214
|
-
|
|
220
|
+
self.connection.run(statements=[drop_statements])
|
|
221
|
+
logger.debug(
|
|
222
|
+
"Deleted all project resources using the TDEngine connector",
|
|
223
|
+
project=self.project,
|
|
215
224
|
)
|
|
216
225
|
|
|
217
226
|
def get_model_endpoint_real_time_metrics(
|
|
@@ -282,14 +291,14 @@ class TDEngineConnector(TSDBConnector):
|
|
|
282
291
|
)
|
|
283
292
|
logger.debug("Querying TDEngine", query=full_query)
|
|
284
293
|
try:
|
|
285
|
-
query_result = self.connection.query
|
|
294
|
+
query_result = self.connection.run(query=full_query)
|
|
286
295
|
except taosws.QueryError as e:
|
|
287
296
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
288
297
|
f"Failed to query table {table} in database {self.database}, {str(e)}"
|
|
289
298
|
)
|
|
290
299
|
|
|
291
|
-
df_columns = [field.name
|
|
292
|
-
return pd.DataFrame(query_result, columns=df_columns)
|
|
300
|
+
df_columns = [field.name for field in query_result.fields]
|
|
301
|
+
return pd.DataFrame(query_result.data, columns=df_columns)
|
|
293
302
|
|
|
294
303
|
def read_metrics_data(
|
|
295
304
|
self,
|
mlrun/projects/operations.py
CHANGED
|
@@ -15,10 +15,13 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
import mlrun_pipelines.common.models
|
|
19
|
+
import mlrun_pipelines.models
|
|
19
20
|
|
|
20
21
|
import mlrun
|
|
21
22
|
import mlrun.common.constants as mlrun_constants
|
|
23
|
+
import mlrun.common.schemas.function
|
|
24
|
+
import mlrun.common.schemas.workflow
|
|
22
25
|
from mlrun.utils import hub_prefix
|
|
23
26
|
|
|
24
27
|
from .pipelines import enrich_function_object, pipeline_context
|
|
@@ -49,7 +52,7 @@ def _get_engine_and_function(function, project=None):
|
|
|
49
52
|
function = enrich_function_object(project, function, copy_function=False)
|
|
50
53
|
|
|
51
54
|
if not pipeline_context.workflow:
|
|
52
|
-
return
|
|
55
|
+
return mlrun.common.schemas.workflow.EngineType.LOCAL, function
|
|
53
56
|
|
|
54
57
|
return pipeline_context.workflow.engine, function
|
|
55
58
|
|
|
@@ -78,7 +81,7 @@ def run_function(
|
|
|
78
81
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
79
82
|
builder_env: Optional[list] = None,
|
|
80
83
|
reset_on_run: Optional[bool] = None,
|
|
81
|
-
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
84
|
+
) -> Union[mlrun.model.RunObject, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
82
85
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
83
86
|
|
|
84
87
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -186,7 +189,7 @@ def run_function(
|
|
|
186
189
|
)
|
|
187
190
|
task.spec.verbose = task.spec.verbose or verbose
|
|
188
191
|
|
|
189
|
-
if engine ==
|
|
192
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
190
193
|
if schedule:
|
|
191
194
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
192
195
|
"Scheduling jobs is not supported when running a workflow with the kfp engine."
|
|
@@ -266,7 +269,7 @@ def build_function(
|
|
|
266
269
|
overwrite_build_params: bool = False,
|
|
267
270
|
extra_args: str = None,
|
|
268
271
|
force_build: bool = False,
|
|
269
|
-
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
272
|
+
) -> Union[BuildStatus, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
270
273
|
"""deploy ML function, build container with its dependencies
|
|
271
274
|
|
|
272
275
|
:param function: Name of the function (in the project) or function object
|
|
@@ -302,7 +305,7 @@ def build_function(
|
|
|
302
305
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
303
306
|
"Cannot build use deploy_function()"
|
|
304
307
|
)
|
|
305
|
-
if engine ==
|
|
308
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
306
309
|
if overwrite_build_params:
|
|
307
310
|
function.spec.build.commands = None
|
|
308
311
|
if requirements or requirements_file:
|
|
@@ -375,7 +378,7 @@ def deploy_function(
|
|
|
375
378
|
builder_env: dict = None,
|
|
376
379
|
project_object=None,
|
|
377
380
|
mock: bool = None,
|
|
378
|
-
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
381
|
+
) -> Union[DeployStatus, mlrun_pipelines.models.PipelineNodeWrapper]:
|
|
379
382
|
"""deploy real-time (nuclio based) functions
|
|
380
383
|
|
|
381
384
|
:param function: name of the function (in the project) or function object
|
|
@@ -392,7 +395,7 @@ def deploy_function(
|
|
|
392
395
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
393
396
|
"deploy is used with real-time functions, for other kinds use build_function()"
|
|
394
397
|
)
|
|
395
|
-
if engine ==
|
|
398
|
+
if engine == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
396
399
|
return function.deploy_step(models=models, env=env, tag=tag, verbose=verbose)
|
|
397
400
|
else:
|
|
398
401
|
if env:
|