mlrun 1.8.0rc1__py3-none-any.whl → 1.8.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +5 -7
- mlrun/__main__.py +1 -1
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/alert.py +31 -18
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +7 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -2
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +5 -5
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
- mlrun/common/schemas/notification.py +18 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +16 -1
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +22 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +22 -0
- mlrun/datastore/datastore_profile.py +19 -19
- mlrun/db/base.py +48 -6
- mlrun/db/httpdb.py +221 -9
- mlrun/db/nopdb.py +34 -5
- mlrun/model.py +2 -2
- mlrun/model_monitoring/applications/results.py +2 -2
- mlrun/model_monitoring/db/tsdb/base.py +2 -2
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +37 -13
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +32 -40
- mlrun/model_monitoring/helpers.py +4 -10
- mlrun/model_monitoring/stream_processing.py +14 -11
- mlrun/platforms/__init__.py +0 -13
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/pipelines.py +184 -55
- mlrun/projects/project.py +95 -28
- mlrun/run.py +4 -1
- mlrun/runtimes/base.py +2 -1
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/nuclio/function.py +1 -2
- mlrun/runtimes/pod.py +82 -18
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/utils/helpers.py +12 -2
- mlrun/utils/logger.py +2 -2
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +12 -12
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +6 -6
- mlrun/utils/notifications/notification/ipython.py +6 -6
- mlrun/utils/notifications/notification/mail.py +149 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +20 -12
- mlrun/utils/regex.py +2 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc2.dist-info}/METADATA +190 -186
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc2.dist-info}/RECORD +76 -74
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc2.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -81,16 +81,16 @@ class TDEngineConnector(TSDBConnector):
|
|
|
81
81
|
"""Initialize the super tables for the TSDB."""
|
|
82
82
|
self.tables = {
|
|
83
83
|
mm_schemas.TDEngineSuperTables.APP_RESULTS: tdengine_schemas.AppResultTable(
|
|
84
|
-
self.database
|
|
84
|
+
project=self.project, database=self.database
|
|
85
85
|
),
|
|
86
86
|
mm_schemas.TDEngineSuperTables.METRICS: tdengine_schemas.Metrics(
|
|
87
|
-
self.database
|
|
87
|
+
project=self.project, database=self.database
|
|
88
88
|
),
|
|
89
89
|
mm_schemas.TDEngineSuperTables.PREDICTIONS: tdengine_schemas.Predictions(
|
|
90
|
-
self.database
|
|
90
|
+
project=self.project, database=self.database
|
|
91
91
|
),
|
|
92
92
|
mm_schemas.TDEngineSuperTables.ERRORS: tdengine_schemas.Errors(
|
|
93
|
-
self.database
|
|
93
|
+
project=self.project, database=self.database
|
|
94
94
|
),
|
|
95
95
|
}
|
|
96
96
|
|
|
@@ -114,11 +114,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
114
114
|
"""
|
|
115
115
|
|
|
116
116
|
table_name = (
|
|
117
|
-
f"{self.project}_"
|
|
118
117
|
f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
|
|
119
|
-
f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}
|
|
118
|
+
f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
|
|
120
119
|
)
|
|
121
|
-
event[mm_schemas.EventFieldType.PROJECT] = self.project
|
|
122
120
|
|
|
123
121
|
if kind == mm_schemas.WriterEventKind.RESULT:
|
|
124
122
|
# Write a new result
|
|
@@ -188,7 +186,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
188
186
|
name=name,
|
|
189
187
|
after=after,
|
|
190
188
|
url=self._tdengine_connection_string,
|
|
191
|
-
supertable=
|
|
189
|
+
supertable=self.tables[
|
|
190
|
+
mm_schemas.TDEngineSuperTables.PREDICTIONS
|
|
191
|
+
].super_table,
|
|
192
192
|
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
193
193
|
time_col=mm_schemas.EventFieldType.TIME,
|
|
194
194
|
database=self.database,
|
|
@@ -197,7 +197,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
197
197
|
mm_schemas.EventKeyMetrics.CUSTOM_METRICS,
|
|
198
198
|
],
|
|
199
199
|
tag_cols=[
|
|
200
|
-
mm_schemas.EventFieldType.PROJECT,
|
|
201
200
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
202
201
|
],
|
|
203
202
|
max_events=1000,
|
|
@@ -227,7 +226,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
227
226
|
name="tsdb_error",
|
|
228
227
|
after="error_extractor",
|
|
229
228
|
url=self._tdengine_connection_string,
|
|
230
|
-
supertable=mm_schemas.TDEngineSuperTables.ERRORS,
|
|
229
|
+
supertable=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
231
230
|
table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
|
|
232
231
|
time_col=mm_schemas.EventFieldType.TIME,
|
|
233
232
|
database=self.database,
|
|
@@ -235,7 +234,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
235
234
|
mm_schemas.EventFieldType.MODEL_ERROR,
|
|
236
235
|
],
|
|
237
236
|
tag_cols=[
|
|
238
|
-
mm_schemas.EventFieldType.PROJECT,
|
|
239
237
|
mm_schemas.EventFieldType.ENDPOINT_ID,
|
|
240
238
|
mm_schemas.EventFieldType.ERROR_TYPE,
|
|
241
239
|
],
|
|
@@ -251,22 +249,23 @@ class TDEngineConnector(TSDBConnector):
|
|
|
251
249
|
"Deleting all project resources using the TDEngine connector",
|
|
252
250
|
project=self.project,
|
|
253
251
|
)
|
|
252
|
+
drop_statements = []
|
|
254
253
|
for table in self.tables:
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
254
|
+
drop_statements.append(self.tables[table].drop_supertable_query())
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
self.connection.run(
|
|
258
|
+
statements=drop_statements,
|
|
260
259
|
timeout=self._timeout,
|
|
261
260
|
retries=self._retries,
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
261
|
+
)
|
|
262
|
+
except Exception as e:
|
|
263
|
+
logger.warning(
|
|
264
|
+
"Failed to drop TDEngine tables. You may need to drop them manually. "
|
|
265
|
+
"These can be found under the following supertables: app_results, "
|
|
266
|
+
"metrics, and predictions.",
|
|
267
|
+
project=self.project,
|
|
268
|
+
error=mlrun.errors.err_to_str(e),
|
|
270
269
|
)
|
|
271
270
|
logger.debug(
|
|
272
271
|
"Deleted all project resources using the TDEngine connector",
|
|
@@ -331,13 +330,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
331
330
|
:raise: MLRunInvalidArgumentError if query the provided table failed.
|
|
332
331
|
"""
|
|
333
332
|
|
|
334
|
-
project_condition = f"project = '{self.project}'"
|
|
335
|
-
filter_query = (
|
|
336
|
-
f"({filter_query}) AND ({project_condition})"
|
|
337
|
-
if filter_query
|
|
338
|
-
else project_condition
|
|
339
|
-
)
|
|
340
|
-
|
|
341
333
|
full_query = tdengine_schemas.TDEngineSchema._get_records_query(
|
|
342
334
|
table=table,
|
|
343
335
|
start=start,
|
|
@@ -400,12 +392,12 @@ class TDEngineConnector(TSDBConnector):
|
|
|
400
392
|
project=self.project,
|
|
401
393
|
endpoint_id=endpoint_id,
|
|
402
394
|
)
|
|
403
|
-
table = mm_schemas.TDEngineSuperTables.METRICS
|
|
395
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
|
|
404
396
|
name = mm_schemas.MetricData.METRIC_NAME
|
|
405
397
|
columns += [name, mm_schemas.MetricData.METRIC_VALUE]
|
|
406
398
|
df_handler = self.df_to_metrics_values
|
|
407
399
|
elif type == "results":
|
|
408
|
-
table = mm_schemas.TDEngineSuperTables.APP_RESULTS
|
|
400
|
+
table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
|
|
409
401
|
name = mm_schemas.ResultData.RESULT_NAME
|
|
410
402
|
columns += [
|
|
411
403
|
name,
|
|
@@ -477,7 +469,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
477
469
|
"both or neither of `aggregation_window` and `agg_funcs` must be provided"
|
|
478
470
|
)
|
|
479
471
|
df = self._get_records(
|
|
480
|
-
table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
|
|
472
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
481
473
|
start=start,
|
|
482
474
|
end=end,
|
|
483
475
|
columns=[mm_schemas.EventFieldType.LATENCY],
|
|
@@ -527,7 +519,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
527
519
|
)
|
|
528
520
|
start, end = self._get_start_end(start, end)
|
|
529
521
|
df = self._get_records(
|
|
530
|
-
table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
|
|
522
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
531
523
|
start=start,
|
|
532
524
|
end=end,
|
|
533
525
|
columns=[
|
|
@@ -571,7 +563,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
571
563
|
start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
|
|
572
564
|
start, end = self._get_start_end(start, end)
|
|
573
565
|
df = self._get_records(
|
|
574
|
-
table=mm_schemas.TDEngineSuperTables.APP_RESULTS,
|
|
566
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
575
567
|
start=start,
|
|
576
568
|
end=end,
|
|
577
569
|
columns=[
|
|
@@ -602,7 +594,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
602
594
|
) -> pd.DataFrame:
|
|
603
595
|
start, end = self._get_start_end(start, end)
|
|
604
596
|
df = self._get_records(
|
|
605
|
-
table=mm_schemas.TDEngineSuperTables.METRICS,
|
|
597
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
|
|
606
598
|
start=start,
|
|
607
599
|
end=end,
|
|
608
600
|
columns=[
|
|
@@ -638,7 +630,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
638
630
|
) -> pd.DataFrame:
|
|
639
631
|
start, end = self._get_start_end(start, end)
|
|
640
632
|
df = self._get_records(
|
|
641
|
-
table=mm_schemas.TDEngineSuperTables.APP_RESULTS,
|
|
633
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table,
|
|
642
634
|
start=start,
|
|
643
635
|
end=end,
|
|
644
636
|
columns=[
|
|
@@ -679,7 +671,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
679
671
|
)
|
|
680
672
|
start, end = self._get_start_end(start, end)
|
|
681
673
|
df = self._get_records(
|
|
682
|
-
table=mm_schemas.TDEngineSuperTables.ERRORS,
|
|
674
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.ERRORS].super_table,
|
|
683
675
|
start=start,
|
|
684
676
|
end=end,
|
|
685
677
|
columns=[
|
|
@@ -711,7 +703,7 @@ class TDEngineConnector(TSDBConnector):
|
|
|
711
703
|
)
|
|
712
704
|
start, end = self._get_start_end(start, end)
|
|
713
705
|
df = self._get_records(
|
|
714
|
-
table=mm_schemas.TDEngineSuperTables.PREDICTIONS,
|
|
706
|
+
table=self.tables[mm_schemas.TDEngineSuperTables.PREDICTIONS].super_table,
|
|
715
707
|
start=start,
|
|
716
708
|
end=end,
|
|
717
709
|
columns=[
|
|
@@ -20,6 +20,7 @@ import numpy as np
|
|
|
20
20
|
import pandas as pd
|
|
21
21
|
|
|
22
22
|
if typing.TYPE_CHECKING:
|
|
23
|
+
from mlrun.datastore import DataItem
|
|
23
24
|
from mlrun.db.base import RunDBInterface
|
|
24
25
|
from mlrun.projects import MlrunProject
|
|
25
26
|
|
|
@@ -28,7 +29,6 @@ import mlrun.artifacts
|
|
|
28
29
|
import mlrun.common.model_monitoring.helpers
|
|
29
30
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
30
31
|
import mlrun.data_types.infer
|
|
31
|
-
import mlrun.datastore
|
|
32
32
|
import mlrun.model_monitoring
|
|
33
33
|
import mlrun.utils.helpers
|
|
34
34
|
from mlrun.common.schemas.model_monitoring.model_endpoints import (
|
|
@@ -134,9 +134,7 @@ def _get_monitoring_drift_measures_file_path(project: str, endpoint_id: str) ->
|
|
|
134
134
|
)
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
def get_monitoring_current_stats_data(
|
|
138
|
-
project: str, endpoint_id: str
|
|
139
|
-
) -> mlrun.datastore.DataItem:
|
|
137
|
+
def get_monitoring_current_stats_data(project: str, endpoint_id: str) -> "DataItem":
|
|
140
138
|
"""
|
|
141
139
|
getter for data item of current stats for project and endpoint
|
|
142
140
|
:param project: project name str
|
|
@@ -150,9 +148,7 @@ def get_monitoring_current_stats_data(
|
|
|
150
148
|
)
|
|
151
149
|
|
|
152
150
|
|
|
153
|
-
def get_monitoring_drift_measures_data(
|
|
154
|
-
project: str, endpoint_id: str
|
|
155
|
-
) -> mlrun.datastore.DataItem:
|
|
151
|
+
def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataItem":
|
|
156
152
|
"""
|
|
157
153
|
getter for data item of drift measures for project and endpoint
|
|
158
154
|
:param project: project name str
|
|
@@ -437,9 +433,7 @@ def _get_monitoring_schedules_file_path(*, project: str, endpoint_id: str) -> st
|
|
|
437
433
|
)
|
|
438
434
|
|
|
439
435
|
|
|
440
|
-
def get_monitoring_schedules_data(
|
|
441
|
-
*, project: str, endpoint_id: str
|
|
442
|
-
) -> mlrun.datastore.DataItem:
|
|
436
|
+
def get_monitoring_schedules_data(*, project: str, endpoint_id: str) -> "DataItem":
|
|
443
437
|
"""
|
|
444
438
|
Get the model monitoring schedules' data item of the project's model endpoint.
|
|
445
439
|
"""
|
|
@@ -30,6 +30,7 @@ import mlrun.model_monitoring.db
|
|
|
30
30
|
import mlrun.serving.states
|
|
31
31
|
import mlrun.utils
|
|
32
32
|
from mlrun.common.schemas.model_monitoring.constants import (
|
|
33
|
+
EndpointType,
|
|
33
34
|
EventFieldType,
|
|
34
35
|
EventKeyMetrics,
|
|
35
36
|
EventLiveStats,
|
|
@@ -783,6 +784,7 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
783
784
|
if not feature_names and self._infer_columns_from_data:
|
|
784
785
|
feature_names = self._infer_feature_names_from_data(event)
|
|
785
786
|
|
|
787
|
+
endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
|
|
786
788
|
if not feature_names:
|
|
787
789
|
logger.warn(
|
|
788
790
|
"Feature names are not initialized, they will be automatically generated",
|
|
@@ -801,11 +803,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
801
803
|
},
|
|
802
804
|
)
|
|
803
805
|
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
806
|
+
if endpoint_type != EndpointType.ROUTER.value:
|
|
807
|
+
update_monitoring_feature_set(
|
|
808
|
+
endpoint_record=endpoint_record,
|
|
809
|
+
feature_names=feature_names,
|
|
810
|
+
feature_values=feature_values,
|
|
811
|
+
)
|
|
809
812
|
|
|
810
813
|
# Similar process with label columns
|
|
811
814
|
if not label_columns and self._infer_columns_from_data:
|
|
@@ -825,11 +828,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
825
828
|
endpoint_id=endpoint_id,
|
|
826
829
|
attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
|
|
827
830
|
)
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
831
|
+
if endpoint_type != EndpointType.ROUTER.value:
|
|
832
|
+
update_monitoring_feature_set(
|
|
833
|
+
endpoint_record=endpoint_record,
|
|
834
|
+
feature_names=label_columns,
|
|
835
|
+
feature_values=label_values,
|
|
836
|
+
)
|
|
833
837
|
|
|
834
838
|
self.label_columns[endpoint_id] = label_columns
|
|
835
839
|
self.feature_names[endpoint_id] = feature_names
|
|
@@ -842,7 +846,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
|
|
|
842
846
|
)
|
|
843
847
|
|
|
844
848
|
# Update the endpoint type within the endpoint types dictionary
|
|
845
|
-
endpoint_type = int(endpoint_record.get(EventFieldType.ENDPOINT_TYPE))
|
|
846
849
|
self.endpoint_type[endpoint_id] = endpoint_type
|
|
847
850
|
|
|
848
851
|
# Add feature_name:value pairs along with a mapping dictionary of all of these pairs
|
mlrun/platforms/__init__.py
CHANGED
|
@@ -17,19 +17,6 @@ from pprint import pprint
|
|
|
17
17
|
from time import sleep
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
|
-
from mlrun_pipelines.common.mounts import VolumeMount
|
|
21
|
-
from mlrun_pipelines.mounts import (
|
|
22
|
-
auto_mount,
|
|
23
|
-
mount_configmap,
|
|
24
|
-
mount_hostpath,
|
|
25
|
-
mount_pvc,
|
|
26
|
-
mount_s3,
|
|
27
|
-
mount_secret,
|
|
28
|
-
mount_v3io,
|
|
29
|
-
set_env_variables,
|
|
30
|
-
v3io_cred,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
20
|
from .iguazio import (
|
|
34
21
|
V3ioStreamClient,
|
|
35
22
|
add_or_refresh_credentials,
|
mlrun/projects/__init__.py
CHANGED
|
@@ -27,7 +27,12 @@ __all__ = [
|
|
|
27
27
|
]
|
|
28
28
|
|
|
29
29
|
from .operations import build_function, deploy_function, run_function # noqa
|
|
30
|
-
from .pipelines import
|
|
30
|
+
from .pipelines import (
|
|
31
|
+
import_remote_project,
|
|
32
|
+
load_and_run_workflow,
|
|
33
|
+
load_and_run,
|
|
34
|
+
pipeline_context,
|
|
35
|
+
) # noqa
|
|
31
36
|
from .project import (
|
|
32
37
|
MlrunProject,
|
|
33
38
|
ProjectMetadata,
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -983,14 +983,25 @@ def github_webhook(request):
|
|
|
983
983
|
return {"msg": "pushed"}
|
|
984
984
|
|
|
985
985
|
|
|
986
|
-
def load_and_run(
|
|
986
|
+
def load_and_run(context, *args, **kwargs):
|
|
987
|
+
"""
|
|
988
|
+
This function serves as an alias to `load_and_run_workflow`,
|
|
989
|
+
allowing to continue using `load_and_run` without modifying existing workflows or exported runs.
|
|
990
|
+
This approach ensures backward compatibility,
|
|
991
|
+
while directing all new calls to the updated `load_and_run_workflow` function.
|
|
992
|
+
"""
|
|
993
|
+
kwargs.pop("load_only", None)
|
|
994
|
+
kwargs.pop("save", None)
|
|
995
|
+
load_and_run_workflow(context, *args, **kwargs)
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
def load_and_run_workflow(
|
|
987
999
|
context: mlrun.execution.MLClientCtx,
|
|
988
1000
|
url: typing.Optional[str] = None,
|
|
989
1001
|
project_name: str = "",
|
|
990
1002
|
init_git: typing.Optional[bool] = None,
|
|
991
1003
|
subpath: typing.Optional[str] = None,
|
|
992
1004
|
clone: bool = False,
|
|
993
|
-
save: bool = True,
|
|
994
1005
|
workflow_name: typing.Optional[str] = None,
|
|
995
1006
|
workflow_path: typing.Optional[str] = None,
|
|
996
1007
|
workflow_arguments: typing.Optional[dict[str, typing.Any]] = None,
|
|
@@ -1003,14 +1014,12 @@ def load_and_run(
|
|
|
1003
1014
|
local: typing.Optional[bool] = None,
|
|
1004
1015
|
schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
|
|
1005
1016
|
cleanup_ttl: typing.Optional[int] = None,
|
|
1006
|
-
load_only: bool = False,
|
|
1007
1017
|
wait_for_completion: bool = False,
|
|
1008
1018
|
project_context: typing.Optional[str] = None,
|
|
1009
1019
|
):
|
|
1010
1020
|
"""
|
|
1011
1021
|
Auxiliary function that the RemoteRunner run once or run every schedule.
|
|
1012
1022
|
This function loads a project from a given remote source and then runs the workflow.
|
|
1013
|
-
|
|
1014
1023
|
:param context: mlrun context.
|
|
1015
1024
|
:param url: remote url that represents the project's source.
|
|
1016
1025
|
See 'mlrun.load_project()' for details
|
|
@@ -1018,7 +1027,6 @@ def load_and_run(
|
|
|
1018
1027
|
:param init_git: if True, will git init the context dir
|
|
1019
1028
|
:param subpath: project subpath (within the archive)
|
|
1020
1029
|
:param clone: if True, always clone (delete any existing content)
|
|
1021
|
-
:param save: whether to save the created project and artifact in the DB
|
|
1022
1030
|
:param workflow_name: name of the workflow
|
|
1023
1031
|
:param workflow_path: url to a workflow file, if not a project workflow
|
|
1024
1032
|
:param workflow_arguments: kubeflow pipelines arguments (parameters)
|
|
@@ -1034,48 +1042,31 @@ def load_and_run(
|
|
|
1034
1042
|
:param schedule: ScheduleCronTrigger class instance or a standard crontab expression string
|
|
1035
1043
|
:param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
1036
1044
|
workflow and all its resources are deleted)
|
|
1037
|
-
:param load_only: for just loading the project, inner use.
|
|
1038
1045
|
:param wait_for_completion: wait for workflow completion before returning
|
|
1039
1046
|
:param project_context: project context path (used for loading the project)
|
|
1040
1047
|
"""
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
["slack"]
|
|
1056
|
-
)
|
|
1057
|
-
url = get_ui_url(project_name, context.uid)
|
|
1058
|
-
link = f"<{url}|*view workflow job details*>"
|
|
1059
|
-
message = (
|
|
1060
|
-
f":x: Failed to run scheduled workflow {workflow_name} in Project {project_name} !\n"
|
|
1061
|
-
f"error: ```{error}```\n{link}"
|
|
1062
|
-
)
|
|
1063
|
-
# Sending Slack Notification without losing the original error:
|
|
1064
|
-
try:
|
|
1065
|
-
notification_pusher.push(
|
|
1066
|
-
message=message,
|
|
1067
|
-
severity=mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
1068
|
-
)
|
|
1069
|
-
|
|
1070
|
-
except Exception as exc:
|
|
1071
|
-
logger.error("Failed to send slack notification", exc=err_to_str(exc))
|
|
1072
|
-
|
|
1073
|
-
raise error
|
|
1074
|
-
|
|
1075
|
-
context.logger.info(f"Loaded project {project.name} successfully")
|
|
1048
|
+
project_context = project_context or f"./{project_name}"
|
|
1049
|
+
|
|
1050
|
+
# Load the project to fetch files which the runner needs, such as remote source files
|
|
1051
|
+
pull_remote_project_files(
|
|
1052
|
+
context=context,
|
|
1053
|
+
project_context=project_context,
|
|
1054
|
+
url=url,
|
|
1055
|
+
project_name=project_name,
|
|
1056
|
+
init_git=init_git,
|
|
1057
|
+
subpath=subpath,
|
|
1058
|
+
clone=clone,
|
|
1059
|
+
schedule=schedule,
|
|
1060
|
+
workflow_name=workflow_name,
|
|
1061
|
+
)
|
|
1076
1062
|
|
|
1077
|
-
|
|
1078
|
-
|
|
1063
|
+
# Retrieve the project object:
|
|
1064
|
+
# - If the project exists in the MLRun database, it will be loaded from there.
|
|
1065
|
+
# - If it doesn't exist in the database, it will be created from the previously loaded local directory.
|
|
1066
|
+
project = mlrun.get_or_create_project(
|
|
1067
|
+
context=project_context or f"./{project_name}",
|
|
1068
|
+
name=project_name,
|
|
1069
|
+
)
|
|
1079
1070
|
|
|
1080
1071
|
# extract "start" notification if exists
|
|
1081
1072
|
start_notifications = [
|
|
@@ -1108,18 +1099,156 @@ def load_and_run(
|
|
|
1108
1099
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1109
1100
|
|
|
1110
1101
|
if wait_for_completion:
|
|
1102
|
+
handle_workflow_completion(
|
|
1103
|
+
run=run,
|
|
1104
|
+
project=project,
|
|
1105
|
+
context=context,
|
|
1106
|
+
workflow_log_message=workflow_log_message,
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def pull_remote_project_files(
|
|
1111
|
+
context: mlrun.execution.MLClientCtx,
|
|
1112
|
+
project_context: str,
|
|
1113
|
+
url: str,
|
|
1114
|
+
project_name: str,
|
|
1115
|
+
init_git: typing.Optional[bool],
|
|
1116
|
+
subpath: typing.Optional[str],
|
|
1117
|
+
clone: bool,
|
|
1118
|
+
schedule: typing.Optional[
|
|
1119
|
+
typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger]
|
|
1120
|
+
],
|
|
1121
|
+
workflow_name: typing.Optional[str],
|
|
1122
|
+
) -> None:
|
|
1123
|
+
"""
|
|
1124
|
+
Load the project to clone remote files if they exist.
|
|
1125
|
+
If an exception occurs during project loading, send a notification if the workflow is scheduled.
|
|
1126
|
+
|
|
1127
|
+
:param context: MLRun execution context.
|
|
1128
|
+
:param project_context: Path to the project context.
|
|
1129
|
+
:param url: URL of the project repository.
|
|
1130
|
+
:param project_name: Name of the project.
|
|
1131
|
+
:param init_git: Initialize a git repository.
|
|
1132
|
+
:param subpath: Project subpath within the repository.
|
|
1133
|
+
:param clone: Whether to clone the repository.
|
|
1134
|
+
:param schedule: Schedule for running the workflow.
|
|
1135
|
+
:param workflow_name: Name of the workflow to run.
|
|
1136
|
+
"""
|
|
1137
|
+
try:
|
|
1138
|
+
# Load the project to clone remote files if they exist.
|
|
1139
|
+
# Using save=False to avoid overriding changes from the database if it already exists.
|
|
1140
|
+
mlrun.load_project(
|
|
1141
|
+
context=project_context,
|
|
1142
|
+
url=url,
|
|
1143
|
+
name=project_name,
|
|
1144
|
+
init_git=init_git,
|
|
1145
|
+
subpath=subpath,
|
|
1146
|
+
clone=clone,
|
|
1147
|
+
save=False,
|
|
1148
|
+
)
|
|
1149
|
+
except Exception as error:
|
|
1150
|
+
notify_scheduled_workflow_failure(
|
|
1151
|
+
schedule=schedule,
|
|
1152
|
+
project_name=project_name,
|
|
1153
|
+
workflow_name=workflow_name,
|
|
1154
|
+
error=error,
|
|
1155
|
+
context_uid=context.uid,
|
|
1156
|
+
)
|
|
1157
|
+
raise error
|
|
1158
|
+
|
|
1159
|
+
|
|
1160
|
+
def notify_scheduled_workflow_failure(
|
|
1161
|
+
schedule,
|
|
1162
|
+
project_name: str,
|
|
1163
|
+
workflow_name: str,
|
|
1164
|
+
error: Exception,
|
|
1165
|
+
context_uid: str,
|
|
1166
|
+
) -> None:
|
|
1167
|
+
if schedule:
|
|
1168
|
+
notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
|
|
1169
|
+
["slack"]
|
|
1170
|
+
)
|
|
1171
|
+
url = get_ui_url(project_name, context_uid)
|
|
1172
|
+
link = f"<{url}|*view workflow job details*>"
|
|
1173
|
+
message = (
|
|
1174
|
+
f":x: Failed to run scheduled workflow {workflow_name} "
|
|
1175
|
+
f"in Project {project_name}!\n"
|
|
1176
|
+
f"Error: ```{err_to_str(error)}```\n{link}"
|
|
1177
|
+
)
|
|
1178
|
+
# Sending Slack Notification without losing the original error:
|
|
1111
1179
|
try:
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
"Failed waiting for workflow completion",
|
|
1116
|
-
workflow=workflow_log_message,
|
|
1117
|
-
exc=err_to_str(exc),
|
|
1180
|
+
notification_pusher.push(
|
|
1181
|
+
message=message,
|
|
1182
|
+
severity=mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
1118
1183
|
)
|
|
1119
1184
|
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1185
|
+
except Exception as exc:
|
|
1186
|
+
logger.error("Failed to send slack notification", exc=err_to_str(exc))
|
|
1187
|
+
|
|
1188
|
+
|
|
1189
|
+
def handle_workflow_completion(
|
|
1190
|
+
run: _PipelineRunStatus,
|
|
1191
|
+
project,
|
|
1192
|
+
context: mlrun.execution.MLClientCtx,
|
|
1193
|
+
workflow_log_message: str,
|
|
1194
|
+
) -> None:
|
|
1195
|
+
"""
|
|
1196
|
+
Handle workflow completion by waiting for it to finish and logging the final state.
|
|
1197
|
+
|
|
1198
|
+
:param run: Run object containing workflow execution details.
|
|
1199
|
+
:param project: MLRun project object.
|
|
1200
|
+
:param context: MLRun execution context.
|
|
1201
|
+
:param workflow_log_message: Message used for logging.
|
|
1202
|
+
"""
|
|
1203
|
+
try:
|
|
1204
|
+
run.wait_for_completion()
|
|
1205
|
+
except Exception as exc:
|
|
1206
|
+
mlrun.utils.logger.error(
|
|
1207
|
+
"Failed waiting for workflow completion",
|
|
1208
|
+
workflow=workflow_log_message,
|
|
1209
|
+
exc=err_to_str(exc),
|
|
1210
|
+
)
|
|
1211
|
+
|
|
1212
|
+
pipeline_state, _, _ = project.get_run_status(run)
|
|
1213
|
+
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1214
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1215
|
+
raise RuntimeError(
|
|
1216
|
+
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1217
|
+
)
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
def import_remote_project(
|
|
1221
|
+
context: mlrun.execution.MLClientCtx,
|
|
1222
|
+
url: typing.Optional[str] = None,
|
|
1223
|
+
project_name: str = "",
|
|
1224
|
+
init_git: typing.Optional[bool] = None,
|
|
1225
|
+
subpath: typing.Optional[str] = None,
|
|
1226
|
+
clone: bool = False,
|
|
1227
|
+
save: bool = True,
|
|
1228
|
+
project_context: typing.Optional[str] = None,
|
|
1229
|
+
):
|
|
1230
|
+
"""
|
|
1231
|
+
This function loads a project from a given remote source.
|
|
1232
|
+
|
|
1233
|
+
:param context: mlrun context.
|
|
1234
|
+
:param url: remote url that represents the project's source.
|
|
1235
|
+
See 'mlrun.load_project()' for details
|
|
1236
|
+
:param project_name: project name
|
|
1237
|
+
:param init_git: if True, will git init the context dir
|
|
1238
|
+
:param subpath: project subpath (within the archive)
|
|
1239
|
+
:param clone: if True, always clone (delete any existing content)
|
|
1240
|
+
:param save: whether to save the created project and artifact in the DB
|
|
1241
|
+
:param project_context: project context path (used for loading the project)
|
|
1242
|
+
"""
|
|
1243
|
+
project = mlrun.load_project(
|
|
1244
|
+
context=project_context or f"./{project_name}",
|
|
1245
|
+
url=url,
|
|
1246
|
+
name=project_name,
|
|
1247
|
+
init_git=init_git,
|
|
1248
|
+
subpath=subpath,
|
|
1249
|
+
clone=clone,
|
|
1250
|
+
save=save,
|
|
1251
|
+
sync_functions=True,
|
|
1252
|
+
)
|
|
1253
|
+
|
|
1254
|
+
context.logger.info(f"Loaded project {project.name} successfully")
|