mlrun 1.10.0rc38__py3-none-any.whl → 1.10.0rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/document.py +6 -1
- mlrun/common/constants.py +6 -0
- mlrun/common/model_monitoring/helpers.py +1 -1
- mlrun/common/schemas/model_monitoring/constants.py +0 -2
- mlrun/common/secrets.py +22 -1
- mlrun/launcher/local.py +2 -0
- mlrun/model.py +7 -1
- mlrun/model_monitoring/api.py +3 -2
- mlrun/model_monitoring/applications/base.py +6 -3
- mlrun/model_monitoring/applications/context.py +1 -0
- mlrun/model_monitoring/db/tsdb/base.py +2 -4
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +8 -9
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +154 -76
- mlrun/projects/project.py +15 -2
- mlrun/run.py +7 -0
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +3 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +5 -0
- mlrun/runtimes/nuclio/application/application.py +2 -0
- mlrun/runtimes/nuclio/function.py +2 -0
- mlrun/runtimes/nuclio/serving.py +67 -4
- mlrun/runtimes/pod.py +59 -10
- mlrun/serving/states.py +45 -21
- mlrun/utils/helpers.py +77 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/METADATA +3 -3
- {mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/RECORD +32 -32
- {mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc38.dist-info → mlrun-1.10.0rc41.dist-info}/top_level.txt +0 -0
mlrun/artifacts/document.py
CHANGED
|
@@ -359,7 +359,12 @@ class DocumentArtifact(Artifact):
|
|
|
359
359
|
self,
|
|
360
360
|
splitter: Optional["TextSplitter"] = None, # noqa: F821
|
|
361
361
|
) -> list["Document"]: # noqa: F821
|
|
362
|
-
|
|
362
|
+
# Try new langchain 1.0+ import path first
|
|
363
|
+
try:
|
|
364
|
+
from langchain_core.documents import Document
|
|
365
|
+
except ImportError:
|
|
366
|
+
# Fall back to old langchain <1.0 import path
|
|
367
|
+
from langchain.schema import Document
|
|
363
368
|
|
|
364
369
|
"""
|
|
365
370
|
Create LC documents from the artifact
|
mlrun/common/constants.py
CHANGED
|
@@ -27,6 +27,12 @@ DASK_LABEL_PREFIX = "dask.org/"
|
|
|
27
27
|
NUCLIO_LABEL_PREFIX = "nuclio.io/"
|
|
28
28
|
RESERVED_TAG_NAME_LATEST = "latest"
|
|
29
29
|
|
|
30
|
+
# Kubernetes DNS-1123 label name length limit
|
|
31
|
+
K8S_DNS_1123_LABEL_MAX_LENGTH = 63
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
RESERVED_BATCH_JOB_SUFFIX = "-batch"
|
|
35
|
+
|
|
30
36
|
JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
|
|
31
37
|
JOB_TYPE_PROJECT_LOADER = "project-loader"
|
|
32
38
|
JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
|
|
@@ -170,6 +170,6 @@ def log_background_task_state(
|
|
|
170
170
|
f"Model endpoint creation task is still in progress with the current state: "
|
|
171
171
|
f"{background_task_state}. Events will not be monitored for the next "
|
|
172
172
|
f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
|
|
173
|
-
function_name=server.
|
|
173
|
+
function_name=server.function_name,
|
|
174
174
|
background_task_check_timestamp=background_task_check_timestamp.isoformat(),
|
|
175
175
|
)
|
|
@@ -486,8 +486,6 @@ class ModelMonitoringLabels:
|
|
|
486
486
|
|
|
487
487
|
_RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
|
|
488
488
|
|
|
489
|
-
_RESERVED_EVALUATE_FUNCTION_SUFFIX = "-batch"
|
|
490
|
-
|
|
491
489
|
|
|
492
490
|
class ModelEndpointMonitoringMetricType(StrEnum):
|
|
493
491
|
RESULT = "result"
|
mlrun/common/secrets.py
CHANGED
|
@@ -11,10 +11,31 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import re
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
|
|
17
17
|
import mlrun.common.schemas
|
|
18
|
+
from mlrun.config import config as mlconf
|
|
19
|
+
|
|
20
|
+
_AUTH_SECRET_NAME_TEMPLATE = re.escape(
|
|
21
|
+
mlconf.secret_stores.kubernetes.auth_secret_name.format(
|
|
22
|
+
hashed_access_key="",
|
|
23
|
+
)
|
|
24
|
+
)
|
|
25
|
+
AUTH_SECRET_PATTERN = re.compile(f"^{_AUTH_SECRET_NAME_TEMPLATE}.*")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_not_forbidden_secret(secret_name: str) -> None:
|
|
29
|
+
"""
|
|
30
|
+
Forbid client-supplied references to internal MLRun auth/project secrets.
|
|
31
|
+
No-op when running inside the API server (API enrichments are allowed).
|
|
32
|
+
"""
|
|
33
|
+
if not secret_name or mlrun.config.is_running_as_api():
|
|
34
|
+
return
|
|
35
|
+
if AUTH_SECRET_PATTERN.match(secret_name):
|
|
36
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
37
|
+
f"Forbidden secret '{secret_name}' matches MLRun auth-secret pattern."
|
|
38
|
+
)
|
|
18
39
|
|
|
19
40
|
|
|
20
41
|
class SecretProviderInterface(ABC):
|
mlrun/launcher/local.py
CHANGED
|
@@ -243,6 +243,8 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
|
|
|
243
243
|
|
|
244
244
|
# if the handler has module prefix force "local" (vs "handler") runtime
|
|
245
245
|
kind = "local" if isinstance(handler, str) and "." in handler else ""
|
|
246
|
+
|
|
247
|
+
# Create temporary local function for execution
|
|
246
248
|
fn = mlrun.new_function(meta.name, command=command, args=args, kind=kind)
|
|
247
249
|
fn.metadata = meta
|
|
248
250
|
setattr(fn, "_is_run_local", True)
|
mlrun/model.py
CHANGED
|
@@ -29,6 +29,7 @@ import pydantic.v1.error_wrappers
|
|
|
29
29
|
import mlrun
|
|
30
30
|
import mlrun.common.constants as mlrun_constants
|
|
31
31
|
import mlrun.common.schemas.notification
|
|
32
|
+
import mlrun.common.secrets
|
|
32
33
|
import mlrun.utils.regex
|
|
33
34
|
|
|
34
35
|
from .utils import (
|
|
@@ -1616,7 +1617,12 @@ class RunTemplate(ModelObj):
|
|
|
1616
1617
|
|
|
1617
1618
|
:returns: The RunTemplate object
|
|
1618
1619
|
"""
|
|
1619
|
-
|
|
1620
|
+
if kind == "azure_vault" and isinstance(source, dict):
|
|
1621
|
+
candidate_secret_name = (source.get("k8s_secret") or "").strip()
|
|
1622
|
+
if candidate_secret_name:
|
|
1623
|
+
mlrun.common.secrets.validate_not_forbidden_secret(
|
|
1624
|
+
candidate_secret_name
|
|
1625
|
+
)
|
|
1620
1626
|
if kind == "vault" and isinstance(source, list):
|
|
1621
1627
|
source = {"project": self.metadata.project, "secrets": source}
|
|
1622
1628
|
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -563,9 +563,10 @@ def _create_model_monitoring_function_base(
|
|
|
563
563
|
"An application cannot have the following names: "
|
|
564
564
|
f"{mm_constants._RESERVED_FUNCTION_NAMES}"
|
|
565
565
|
)
|
|
566
|
-
|
|
566
|
+
_, has_valid_suffix, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(name)
|
|
567
|
+
if name and not has_valid_suffix:
|
|
567
568
|
raise mlrun.errors.MLRunValueError(
|
|
568
|
-
"Model monitoring application names cannot end with
|
|
569
|
+
f"Model monitoring application names cannot end with `{suffix}`"
|
|
569
570
|
)
|
|
570
571
|
if func is None:
|
|
571
572
|
func = ""
|
|
@@ -799,10 +799,13 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
799
799
|
f"`{mm_constants.APP_NAME_REGEX.pattern}`. "
|
|
800
800
|
"Please choose another `func_name`."
|
|
801
801
|
)
|
|
802
|
-
|
|
803
|
-
job_name
|
|
802
|
+
job_name, was_renamed, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(
|
|
803
|
+
job_name
|
|
804
|
+
)
|
|
805
|
+
if was_renamed:
|
|
804
806
|
mlrun.utils.logger.info(
|
|
805
|
-
'Changing function name - adding `"
|
|
807
|
+
f'Changing function name - adding `"{suffix}"` suffix',
|
|
808
|
+
func_name=job_name,
|
|
806
809
|
)
|
|
807
810
|
|
|
808
811
|
return job_name
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
from datetime import datetime, timedelta
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import ClassVar, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import pydantic.v1
|
|
@@ -444,11 +444,9 @@ class TSDBConnector(ABC):
|
|
|
444
444
|
]
|
|
445
445
|
"""
|
|
446
446
|
|
|
447
|
-
|
|
447
|
+
def add_basic_metrics(
|
|
448
448
|
self,
|
|
449
449
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
450
|
-
project: str,
|
|
451
|
-
run_in_threadpool: Callable,
|
|
452
450
|
metric_list: Optional[list[str]] = None,
|
|
453
451
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
454
452
|
raise NotImplementedError()
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import threading
|
|
16
16
|
from datetime import datetime, timedelta
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Final, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import taosws
|
|
@@ -1057,7 +1057,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1057
1057
|
]
|
|
1058
1058
|
):
|
|
1059
1059
|
metric_objects = []
|
|
1060
|
-
|
|
1061
1060
|
if not df_results.empty:
|
|
1062
1061
|
df_results.rename(
|
|
1063
1062
|
columns={
|
|
@@ -1069,7 +1068,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1069
1068
|
metric_objects.append(
|
|
1070
1069
|
mm_schemas.ApplicationResultRecord(
|
|
1071
1070
|
time=datetime.fromisoformat(
|
|
1072
|
-
row[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
1071
|
+
row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
|
|
1072
|
+
" +", "+"
|
|
1073
|
+
)
|
|
1073
1074
|
),
|
|
1074
1075
|
result_name=row[mm_schemas.ResultData.RESULT_NAME],
|
|
1075
1076
|
kind=row[mm_schemas.ResultData.RESULT_KIND],
|
|
@@ -1089,7 +1090,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1089
1090
|
metric_objects.append(
|
|
1090
1091
|
mm_schemas.ApplicationMetricRecord(
|
|
1091
1092
|
time=datetime.fromisoformat(
|
|
1092
|
-
row[mm_schemas.WriterEvent.END_INFER_TIME]
|
|
1093
|
+
row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
|
|
1094
|
+
" +", "+"
|
|
1095
|
+
)
|
|
1093
1096
|
),
|
|
1094
1097
|
metric_name=row[mm_schemas.MetricData.METRIC_NAME],
|
|
1095
1098
|
value=row[mm_schemas.MetricData.METRIC_VALUE],
|
|
@@ -1248,11 +1251,9 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1248
1251
|
df.dropna(inplace=True)
|
|
1249
1252
|
return df
|
|
1250
1253
|
|
|
1251
|
-
|
|
1254
|
+
def add_basic_metrics(
|
|
1252
1255
|
self,
|
|
1253
1256
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
1254
|
-
project: str,
|
|
1255
|
-
run_in_threadpool: Callable,
|
|
1256
1257
|
metric_list: Optional[list[str]] = None,
|
|
1257
1258
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
1258
1259
|
"""
|
|
@@ -1260,8 +1261,6 @@ class TDEngineConnector(TSDBConnector):
|
|
|
1260
1261
|
|
|
1261
1262
|
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
1262
1263
|
be filled with the relevant basic metrics.
|
|
1263
|
-
:param project: The name of the project.
|
|
1264
|
-
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
1265
1264
|
:param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
|
|
1266
1265
|
|
|
1267
1266
|
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
@@ -12,9 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import math
|
|
15
|
-
from datetime import datetime, timedelta
|
|
15
|
+
from datetime import datetime, timedelta, timezone
|
|
16
16
|
from io import StringIO
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import v3io_frames
|
|
@@ -1230,11 +1230,9 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1230
1230
|
)
|
|
1231
1231
|
return df.reset_index(drop=True)
|
|
1232
1232
|
|
|
1233
|
-
|
|
1233
|
+
def add_basic_metrics(
|
|
1234
1234
|
self,
|
|
1235
1235
|
model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
|
|
1236
|
-
project: str,
|
|
1237
|
-
run_in_threadpool: Callable,
|
|
1238
1236
|
metric_list: Optional[list[str]] = None,
|
|
1239
1237
|
) -> list[mlrun.common.schemas.ModelEndpoint]:
|
|
1240
1238
|
"""
|
|
@@ -1242,8 +1240,6 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1242
1240
|
|
|
1243
1241
|
:param model_endpoint_objects: A list of `ModelEndpoint` objects that will
|
|
1244
1242
|
be filled with the relevant basic metrics.
|
|
1245
|
-
:param project: The name of the project.
|
|
1246
|
-
:param run_in_threadpool: A function that runs another function in a thread pool.
|
|
1247
1243
|
:param metric_list: List of metrics to include from the time series DB. Defaults to all metrics.
|
|
1248
1244
|
|
|
1249
1245
|
:return: A list of `ModelEndpointMonitoringMetric` objects.
|
|
@@ -1272,8 +1268,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1272
1268
|
function,
|
|
1273
1269
|
_,
|
|
1274
1270
|
) in metric_name_to_function_and_column_name.items():
|
|
1275
|
-
metric_name_to_result[metric_name] =
|
|
1276
|
-
function,
|
|
1271
|
+
metric_name_to_result[metric_name] = function(
|
|
1277
1272
|
endpoint_ids=uids,
|
|
1278
1273
|
get_raw=True,
|
|
1279
1274
|
)
|
|
@@ -1344,7 +1339,7 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1344
1339
|
else:
|
|
1345
1340
|
filter_query = app_filter_query
|
|
1346
1341
|
|
|
1347
|
-
|
|
1342
|
+
raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
|
|
1348
1343
|
table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
|
|
1349
1344
|
start=start,
|
|
1350
1345
|
end=end,
|
|
@@ -1353,39 +1348,33 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1353
1348
|
mm_schemas.ResultData.RESULT_STATUS,
|
|
1354
1349
|
],
|
|
1355
1350
|
filter_query=filter_query,
|
|
1351
|
+
get_raw=True,
|
|
1356
1352
|
)
|
|
1357
1353
|
|
|
1358
|
-
|
|
1359
|
-
if result_status_list and not df.empty:
|
|
1360
|
-
df = df[df[mm_schemas.ResultData.RESULT_STATUS].isin(result_status_list)]
|
|
1361
|
-
|
|
1362
|
-
if df.empty:
|
|
1354
|
+
if not raw_frames:
|
|
1363
1355
|
return {}
|
|
1364
|
-
else:
|
|
1365
|
-
# convert application name to lower case
|
|
1366
|
-
df[mm_schemas.ApplicationEvent.APPLICATION_NAME] = df[
|
|
1367
|
-
mm_schemas.ApplicationEvent.APPLICATION_NAME
|
|
1368
|
-
].str.lower()
|
|
1369
|
-
|
|
1370
|
-
df = (
|
|
1371
|
-
df[
|
|
1372
|
-
[
|
|
1373
|
-
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
1374
|
-
mm_schemas.ResultData.RESULT_STATUS,
|
|
1375
|
-
mm_schemas.ResultData.RESULT_VALUE,
|
|
1376
|
-
]
|
|
1377
|
-
]
|
|
1378
|
-
.groupby(
|
|
1379
|
-
[
|
|
1380
|
-
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
1381
|
-
mm_schemas.ResultData.RESULT_STATUS,
|
|
1382
|
-
],
|
|
1383
|
-
observed=True,
|
|
1384
|
-
)
|
|
1385
|
-
.count()
|
|
1386
|
-
)
|
|
1387
1356
|
|
|
1388
|
-
|
|
1357
|
+
# Count occurrences by (application_name, result_status) from RawFrame objects
|
|
1358
|
+
count_dict = {}
|
|
1359
|
+
|
|
1360
|
+
for frame in raw_frames:
|
|
1361
|
+
# Extract column data from each RawFrame
|
|
1362
|
+
app_name = frame.column_data(mm_schemas.ApplicationEvent.APPLICATION_NAME)[
|
|
1363
|
+
0
|
|
1364
|
+
]
|
|
1365
|
+
statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
|
|
1366
|
+
|
|
1367
|
+
for status in statuses:
|
|
1368
|
+
# Filter by result status if specified
|
|
1369
|
+
if result_status_list and status not in result_status_list:
|
|
1370
|
+
continue
|
|
1371
|
+
|
|
1372
|
+
# Convert application name to lower case
|
|
1373
|
+
key = (app_name.lower(), status)
|
|
1374
|
+
|
|
1375
|
+
# Update the count in the dictionary
|
|
1376
|
+
count_dict[key] = count_dict.get(key, 0) + 1
|
|
1377
|
+
return count_dict
|
|
1389
1378
|
|
|
1390
1379
|
def count_processed_model_endpoints(
|
|
1391
1380
|
self,
|
|
@@ -1543,51 +1532,140 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1543
1532
|
) -> mm_schemas.ModelEndpointDriftValues:
|
|
1544
1533
|
table = mm_schemas.V3IOTSDBTables.APP_RESULTS
|
|
1545
1534
|
start, end, interval = self._prepare_aligned_start_end(start, end)
|
|
1546
|
-
|
|
1535
|
+
raw_frames: list[v3io_frames.client.RawFrame] = self._get_records(
|
|
1547
1536
|
table=table,
|
|
1548
1537
|
start=start,
|
|
1549
1538
|
end=end,
|
|
1550
1539
|
columns=[mm_schemas.ResultData.RESULT_STATUS],
|
|
1540
|
+
get_raw=True,
|
|
1551
1541
|
)
|
|
1552
|
-
|
|
1553
|
-
if
|
|
1542
|
+
|
|
1543
|
+
if not raw_frames:
|
|
1544
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
1545
|
+
|
|
1546
|
+
aggregated_data = self._aggregate_raw_drift_data(
|
|
1547
|
+
raw_frames=raw_frames, start=start, end=end, interval=interval
|
|
1548
|
+
)
|
|
1549
|
+
if not aggregated_data:
|
|
1550
|
+
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
1551
|
+
|
|
1552
|
+
# Filter to only include entries with max result_status >= 1
|
|
1553
|
+
filtered_data = [
|
|
1554
|
+
(endpoint_id, timestamp, max_status)
|
|
1555
|
+
for endpoint_id, timestamp, max_status in aggregated_data
|
|
1556
|
+
if max_status >= 1
|
|
1557
|
+
]
|
|
1558
|
+
|
|
1559
|
+
if not filtered_data:
|
|
1554
1560
|
return mm_schemas.ModelEndpointDriftValues(values=[])
|
|
1555
|
-
|
|
1556
|
-
return self.
|
|
1561
|
+
|
|
1562
|
+
return self._convert_drift_data_to_values(aggregated_data=filtered_data)
|
|
1557
1563
|
|
|
1558
1564
|
@staticmethod
|
|
1559
1565
|
def _aggregate_raw_drift_data(
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1566
|
+
raw_frames: list[v3io_frames.client.RawFrame],
|
|
1567
|
+
start: datetime,
|
|
1568
|
+
end: datetime,
|
|
1569
|
+
interval: str,
|
|
1570
|
+
) -> list[tuple[str, datetime, float]]:
|
|
1571
|
+
"""
|
|
1572
|
+
Aggregate raw drift data from RawFrame objects.
|
|
1573
|
+
|
|
1574
|
+
:param raw_frames: List of RawFrame objects containing drift data.
|
|
1575
|
+
:param start: Start datetime for filtering data.
|
|
1576
|
+
:param end: End datetime for filtering data.
|
|
1577
|
+
:param interval: Time interval string (e.g., '5min') for aggregation
|
|
1578
|
+
|
|
1579
|
+
:returns: list of tuples: (endpoint_id, timestamp, max_result_status)
|
|
1580
|
+
"""
|
|
1581
|
+
if not raw_frames:
|
|
1582
|
+
return []
|
|
1583
|
+
|
|
1584
|
+
# Parse interval to get timedelta
|
|
1585
|
+
interval_td = pd.Timedelta(interval)
|
|
1586
|
+
|
|
1587
|
+
# Collect all data points from RawFrame objects
|
|
1588
|
+
data_points = []
|
|
1589
|
+
for frame in raw_frames:
|
|
1590
|
+
endpoint_id = frame.column_data(EventFieldType.ENDPOINT_ID)[0]
|
|
1591
|
+
result_statuses = frame.column_data(mm_schemas.ResultData.RESULT_STATUS)
|
|
1592
|
+
timestamps = frame.indices()[0].times
|
|
1593
|
+
|
|
1594
|
+
# Combine data from this frame
|
|
1595
|
+
for i, (status, timestamp) in enumerate(zip(result_statuses, timestamps)):
|
|
1596
|
+
# V3IO TSDB returns timestamps in nanoseconds
|
|
1597
|
+
timestamp_dt = pd.Timestamp(
|
|
1598
|
+
timestamp, unit="ns", tzinfo=timezone.utc
|
|
1599
|
+
).to_pydatetime()
|
|
1600
|
+
|
|
1601
|
+
# Filter by time window
|
|
1602
|
+
if start <= timestamp_dt < end:
|
|
1603
|
+
data_points.append((endpoint_id, timestamp_dt, status))
|
|
1604
|
+
|
|
1605
|
+
if not data_points:
|
|
1606
|
+
return []
|
|
1607
|
+
|
|
1608
|
+
# Group by endpoint_id and time intervals, then find max status
|
|
1609
|
+
# Create time buckets aligned to start
|
|
1610
|
+
grouped_data = {}
|
|
1611
|
+
for endpoint_id, timestamp, status in data_points:
|
|
1612
|
+
# Calculate which interval bucket this timestamp falls into
|
|
1613
|
+
time_diff = timestamp - start
|
|
1614
|
+
bucket_index = int(time_diff / interval_td)
|
|
1615
|
+
bucket_start = start + (bucket_index * interval_td)
|
|
1616
|
+
|
|
1617
|
+
key = (endpoint_id, bucket_start)
|
|
1618
|
+
if key not in grouped_data:
|
|
1619
|
+
grouped_data[key] = status
|
|
1620
|
+
else:
|
|
1621
|
+
# Keep the maximum status value
|
|
1622
|
+
grouped_data[key] = max(grouped_data[key], status)
|
|
1623
|
+
|
|
1624
|
+
# Convert to list of tuples
|
|
1625
|
+
result = [
|
|
1626
|
+
(endpoint_id, timestamp, max_status)
|
|
1627
|
+
for (endpoint_id, timestamp), max_status in grouped_data.items()
|
|
1572
1628
|
]
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1629
|
+
|
|
1630
|
+
return result
|
|
1631
|
+
|
|
1632
|
+
@staticmethod
|
|
1633
|
+
def _convert_drift_data_to_values(
|
|
1634
|
+
aggregated_data: list[tuple[str, datetime, float]],
|
|
1635
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
1636
|
+
"""
|
|
1637
|
+
Convert aggregated drift data to ModelEndpointDriftValues format.
|
|
1638
|
+
|
|
1639
|
+
:param aggregated_data: List of tuples (endpoint_id, timestamp, max_result_status)
|
|
1640
|
+
:return: ModelEndpointDriftValues with counts of suspected and detected per timestamp
|
|
1641
|
+
"""
|
|
1642
|
+
suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
|
|
1643
|
+
detected_val = mm_schemas.constants.ResultStatusApp.detected.value
|
|
1644
|
+
|
|
1645
|
+
# Group by timestamp and result status, then count occurrences
|
|
1646
|
+
timestamp_status_counts = {}
|
|
1647
|
+
for _, timestamp, max_status in aggregated_data:
|
|
1648
|
+
key = (timestamp, max_status)
|
|
1649
|
+
timestamp_status_counts[key] = timestamp_status_counts.get(key, 0) + 1
|
|
1650
|
+
|
|
1651
|
+
# Organize by timestamp with counts for suspected and detected
|
|
1652
|
+
timestamp_counts = {}
|
|
1653
|
+
for (timestamp, status), count in timestamp_status_counts.items():
|
|
1654
|
+
if timestamp not in timestamp_counts:
|
|
1655
|
+
timestamp_counts[timestamp] = {
|
|
1656
|
+
"count_suspected": 0,
|
|
1657
|
+
"count_detected": 0,
|
|
1588
1658
|
}
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1659
|
+
|
|
1660
|
+
if status == suspected_val:
|
|
1661
|
+
timestamp_counts[timestamp]["count_suspected"] = count
|
|
1662
|
+
elif status == detected_val:
|
|
1663
|
+
timestamp_counts[timestamp]["count_detected"] = count
|
|
1664
|
+
|
|
1665
|
+
# Convert to the expected format: list of (timestamp, count_suspected, count_detected)
|
|
1666
|
+
values = [
|
|
1667
|
+
(timestamp, counts["count_suspected"], counts["count_detected"])
|
|
1668
|
+
for timestamp, counts in sorted(timestamp_counts.items())
|
|
1669
|
+
]
|
|
1670
|
+
|
|
1671
|
+
return mm_schemas.ModelEndpointDriftValues(values=values)
|
mlrun/projects/project.py
CHANGED
|
@@ -45,6 +45,7 @@ import mlrun.common.runtimes.constants
|
|
|
45
45
|
import mlrun.common.schemas.alert
|
|
46
46
|
import mlrun.common.schemas.artifact
|
|
47
47
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
48
|
+
import mlrun.common.secrets
|
|
48
49
|
import mlrun.datastore.datastore_profile
|
|
49
50
|
import mlrun.db
|
|
50
51
|
import mlrun.errors
|
|
@@ -3418,7 +3419,12 @@ class MlrunProject(ModelObj):
|
|
|
3418
3419
|
self._initialized = True
|
|
3419
3420
|
return self.spec._function_objects
|
|
3420
3421
|
|
|
3421
|
-
def with_secrets(
|
|
3422
|
+
def with_secrets(
|
|
3423
|
+
self,
|
|
3424
|
+
kind,
|
|
3425
|
+
source,
|
|
3426
|
+
prefix="",
|
|
3427
|
+
):
|
|
3422
3428
|
"""register a secrets source (file, env or dict)
|
|
3423
3429
|
|
|
3424
3430
|
read secrets from a source provider to be used in workflows, example::
|
|
@@ -3440,12 +3446,19 @@ class MlrunProject(ModelObj):
|
|
|
3440
3446
|
|
|
3441
3447
|
This will enable access to all secrets in vault registered to the current project.
|
|
3442
3448
|
|
|
3443
|
-
:param kind: secret type (file, inline, env, vault)
|
|
3449
|
+
:param kind: secret type (file, inline, env, vault, azure_vault)
|
|
3444
3450
|
:param source: secret data or link (see example)
|
|
3445
3451
|
:param prefix: add a prefix to the keys in this source
|
|
3446
3452
|
|
|
3447
3453
|
:returns: project object
|
|
3448
3454
|
"""
|
|
3455
|
+
# Block using mlrun-auth-secrets.* via azure_vault's k8s_secret param (client-side only)
|
|
3456
|
+
if kind == "azure_vault" and isinstance(source, dict):
|
|
3457
|
+
candidate_secret_name = (source.get("k8s_secret") or "").strip()
|
|
3458
|
+
if candidate_secret_name:
|
|
3459
|
+
mlrun.common.secrets.validate_not_forbidden_secret(
|
|
3460
|
+
candidate_secret_name
|
|
3461
|
+
)
|
|
3449
3462
|
|
|
3450
3463
|
if kind == "vault" and isinstance(source, list):
|
|
3451
3464
|
source = {"project": self.metadata.name, "secrets": source}
|
mlrun/run.py
CHANGED
|
@@ -555,6 +555,7 @@ def new_function(
|
|
|
555
555
|
|
|
556
556
|
# make sure function name is valid
|
|
557
557
|
name = mlrun.utils.helpers.normalize_name(name)
|
|
558
|
+
mlrun.utils.helpers.validate_function_name(name)
|
|
558
559
|
|
|
559
560
|
runner.metadata.name = name
|
|
560
561
|
runner.metadata.project = (
|
|
@@ -594,6 +595,7 @@ def new_function(
|
|
|
594
595
|
)
|
|
595
596
|
|
|
596
597
|
runner.prepare_image_for_deploy()
|
|
598
|
+
|
|
597
599
|
return runner
|
|
598
600
|
|
|
599
601
|
|
|
@@ -798,6 +800,9 @@ def code_to_function(
|
|
|
798
800
|
kind=sub_kind,
|
|
799
801
|
ignored_tags=ignored_tags,
|
|
800
802
|
)
|
|
803
|
+
|
|
804
|
+
mlrun.utils.helpers.validate_function_name(name)
|
|
805
|
+
|
|
801
806
|
spec["spec"]["env"].append(
|
|
802
807
|
{
|
|
803
808
|
"name": "MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK",
|
|
@@ -850,6 +855,7 @@ def code_to_function(
|
|
|
850
855
|
runtime.spec.build.code_origin = code_origin
|
|
851
856
|
runtime.spec.build.origin_filename = filename or (name + ".ipynb")
|
|
852
857
|
update_common(runtime, spec)
|
|
858
|
+
|
|
853
859
|
return runtime
|
|
854
860
|
|
|
855
861
|
if kind is None or kind in ["", "Function"]:
|
|
@@ -863,6 +869,7 @@ def code_to_function(
|
|
|
863
869
|
|
|
864
870
|
if not name:
|
|
865
871
|
raise ValueError("name must be specified")
|
|
872
|
+
|
|
866
873
|
h = get_in(spec, "spec.handler", "").split(":")
|
|
867
874
|
runtime.handler = h[0] if len(h) <= 1 else h[1]
|
|
868
875
|
runtime.metadata = get_in(spec, "spec.metadata")
|
mlrun/runtimes/__init__.py
CHANGED
|
@@ -221,6 +221,24 @@ class RuntimeKinds:
|
|
|
221
221
|
return True
|
|
222
222
|
return False
|
|
223
223
|
|
|
224
|
+
@staticmethod
|
|
225
|
+
def requires_k8s_name_validation(kind: str) -> bool:
|
|
226
|
+
"""
|
|
227
|
+
Returns True if the runtime kind creates Kubernetes resources that use the function name.
|
|
228
|
+
|
|
229
|
+
Function names for k8s-deployed runtimes must conform to DNS-1123 label requirements:
|
|
230
|
+
- Lowercase alphanumeric characters or '-'
|
|
231
|
+
- Start and end with an alphanumeric character
|
|
232
|
+
- Maximum 63 characters
|
|
233
|
+
|
|
234
|
+
Local runtimes (local, handler) run on the local machine and don't create k8s resources,
|
|
235
|
+
so they don't require k8s naming validation.
|
|
236
|
+
|
|
237
|
+
:param kind: Runtime kind string (job, spark, serving, local, etc.)
|
|
238
|
+
:return: True if function name needs k8s DNS-1123 validation, False otherwise
|
|
239
|
+
"""
|
|
240
|
+
return not RuntimeKinds.is_local_runtime(kind)
|
|
241
|
+
|
|
224
242
|
@staticmethod
|
|
225
243
|
def requires_absolute_artifacts_path(kind):
|
|
226
244
|
"""
|
mlrun/runtimes/base.py
CHANGED
|
@@ -393,6 +393,9 @@ class BaseRuntime(ModelObj):
|
|
|
393
393
|
FutureWarning,
|
|
394
394
|
)
|
|
395
395
|
output_path = output_path or out_path or artifact_path
|
|
396
|
+
|
|
397
|
+
mlrun.utils.helpers.validate_function_name(self.metadata.name)
|
|
398
|
+
|
|
396
399
|
launcher = mlrun.launcher.factory.LauncherFactory().create_launcher(
|
|
397
400
|
self._is_remote, local=local, **launcher_kwargs
|
|
398
401
|
)
|