mlrun 1.7.0rc7__py3-none-any.whl → 1.7.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +2 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/api_gateway.py +8 -1
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/constants.py +1 -1
- mlrun/common/schemas/pagination.py +26 -0
- mlrun/common/schemas/project.py +15 -10
- mlrun/config.py +28 -10
- mlrun/datastore/__init__.py +3 -7
- mlrun/datastore/datastore_profile.py +19 -1
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +9 -26
- mlrun/datastore/targets.py +131 -11
- mlrun/datastore/utils.py +10 -5
- mlrun/db/base.py +44 -0
- mlrun/db/httpdb.py +122 -21
- mlrun/db/nopdb.py +107 -0
- mlrun/feature_store/api.py +3 -2
- mlrun/feature_store/retrieval/spark_merger.py +27 -23
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/kfpops.py +2 -5
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +2 -2
- mlrun/model_monitoring/helpers.py +3 -1
- mlrun/projects/pipelines.py +1 -1
- mlrun/projects/project.py +32 -21
- mlrun/run.py +5 -1
- mlrun/runtimes/__init__.py +16 -0
- mlrun/runtimes/base.py +4 -1
- mlrun/runtimes/kubejob.py +26 -121
- mlrun/runtimes/nuclio/api_gateway.py +58 -8
- mlrun/runtimes/nuclio/application/application.py +79 -1
- mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
- mlrun/runtimes/nuclio/function.py +11 -8
- mlrun/runtimes/nuclio/serving.py +2 -2
- mlrun/runtimes/pod.py +145 -0
- mlrun/runtimes/utils.py +0 -28
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +4 -3
- mlrun/serving/server.py +1 -1
- mlrun/serving/states.py +6 -9
- mlrun/serving/v2_serving.py +4 -3
- mlrun/utils/http.py +1 -1
- mlrun/utils/retryer.py +1 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/METADATA +15 -15
- {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/RECORD +52 -50
- {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/top_level.txt +0 -0
mlrun/datastore/utils.py
CHANGED
|
@@ -23,24 +23,29 @@ import semver
|
|
|
23
23
|
import mlrun.datastore
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def parse_kafka_url(
|
|
26
|
+
def parse_kafka_url(
|
|
27
|
+
url: str, brokers: typing.Union[list, str] = None
|
|
28
|
+
) -> tuple[str, list]:
|
|
27
29
|
"""Generating Kafka topic and adjusting a list of bootstrap servers.
|
|
28
30
|
|
|
29
31
|
:param url: URL path to parse using urllib.parse.urlparse.
|
|
30
|
-
:param
|
|
32
|
+
:param brokers: List of kafka brokers.
|
|
31
33
|
|
|
32
34
|
:return: A tuple of:
|
|
33
35
|
[0] = Kafka topic value
|
|
34
36
|
[1] = List of bootstrap servers
|
|
35
37
|
"""
|
|
36
|
-
|
|
38
|
+
brokers = brokers or []
|
|
39
|
+
|
|
40
|
+
if isinstance(brokers, str):
|
|
41
|
+
brokers = brokers.split(",")
|
|
37
42
|
|
|
38
43
|
# Parse the provided URL into six components according to the general structure of a URL
|
|
39
44
|
url = urlparse(url)
|
|
40
45
|
|
|
41
46
|
# Add the network location to the bootstrap servers list
|
|
42
47
|
if url.netloc:
|
|
43
|
-
|
|
48
|
+
brokers = [url.netloc] + brokers
|
|
44
49
|
|
|
45
50
|
# Get the topic value from the parsed url
|
|
46
51
|
query_dict = parse_qs(url.query)
|
|
@@ -49,7 +54,7 @@ def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list
|
|
|
49
54
|
else:
|
|
50
55
|
topic = url.path
|
|
51
56
|
topic = topic.lstrip("/")
|
|
52
|
-
return topic,
|
|
57
|
+
return topic, brokers
|
|
53
58
|
|
|
54
59
|
|
|
55
60
|
def upload_tarball(source_dir, target, secrets=None):
|
mlrun/db/base.py
CHANGED
|
@@ -634,6 +634,27 @@ class RunDBInterface(ABC):
|
|
|
634
634
|
def delete_api_gateway(self, name, project=None):
|
|
635
635
|
pass
|
|
636
636
|
|
|
637
|
+
@abstractmethod
|
|
638
|
+
def remote_builder(
|
|
639
|
+
self,
|
|
640
|
+
func: "mlrun.runtimes.BaseRuntime",
|
|
641
|
+
with_mlrun: bool,
|
|
642
|
+
mlrun_version_specifier: Optional[str] = None,
|
|
643
|
+
skip_deployed: bool = False,
|
|
644
|
+
builder_env: Optional[dict] = None,
|
|
645
|
+
force_build: bool = False,
|
|
646
|
+
):
|
|
647
|
+
pass
|
|
648
|
+
|
|
649
|
+
@abstractmethod
|
|
650
|
+
def deploy_nuclio_function(
|
|
651
|
+
self,
|
|
652
|
+
func: "mlrun.runtimes.RemoteRuntime",
|
|
653
|
+
builder_env: Optional[dict] = None,
|
|
654
|
+
):
|
|
655
|
+
pass
|
|
656
|
+
|
|
657
|
+
@abstractmethod
|
|
637
658
|
def get_builder_status(
|
|
638
659
|
self,
|
|
639
660
|
func: "mlrun.runtimes.BaseRuntime",
|
|
@@ -644,6 +665,16 @@ class RunDBInterface(ABC):
|
|
|
644
665
|
):
|
|
645
666
|
pass
|
|
646
667
|
|
|
668
|
+
@abstractmethod
|
|
669
|
+
def get_nuclio_deploy_status(
|
|
670
|
+
self,
|
|
671
|
+
func: "mlrun.runtimes.RemoteRuntime",
|
|
672
|
+
last_log_timestamp: float = 0.0,
|
|
673
|
+
verbose: bool = False,
|
|
674
|
+
):
|
|
675
|
+
pass
|
|
676
|
+
|
|
677
|
+
@abstractmethod
|
|
647
678
|
def set_run_notifications(
|
|
648
679
|
self,
|
|
649
680
|
project: str,
|
|
@@ -652,6 +683,7 @@ class RunDBInterface(ABC):
|
|
|
652
683
|
):
|
|
653
684
|
pass
|
|
654
685
|
|
|
686
|
+
@abstractmethod
|
|
655
687
|
def store_run_notifications(
|
|
656
688
|
self,
|
|
657
689
|
notification_objects: list[mlrun.model.Notification],
|
|
@@ -661,40 +693,49 @@ class RunDBInterface(ABC):
|
|
|
661
693
|
):
|
|
662
694
|
pass
|
|
663
695
|
|
|
696
|
+
@abstractmethod
|
|
664
697
|
def get_log_size(self, uid, project=""):
|
|
665
698
|
pass
|
|
666
699
|
|
|
700
|
+
@abstractmethod
|
|
667
701
|
def watch_log(self, uid, project="", watch=True, offset=0):
|
|
668
702
|
pass
|
|
669
703
|
|
|
704
|
+
@abstractmethod
|
|
670
705
|
def get_datastore_profile(
|
|
671
706
|
self, name: str, project: str
|
|
672
707
|
) -> Optional[mlrun.common.schemas.DatastoreProfile]:
|
|
673
708
|
pass
|
|
674
709
|
|
|
710
|
+
@abstractmethod
|
|
675
711
|
def delete_datastore_profile(
|
|
676
712
|
self, name: str, project: str
|
|
677
713
|
) -> mlrun.common.schemas.DatastoreProfile:
|
|
678
714
|
pass
|
|
679
715
|
|
|
716
|
+
@abstractmethod
|
|
680
717
|
def list_datastore_profiles(
|
|
681
718
|
self, project: str
|
|
682
719
|
) -> list[mlrun.common.schemas.DatastoreProfile]:
|
|
683
720
|
pass
|
|
684
721
|
|
|
722
|
+
@abstractmethod
|
|
685
723
|
def store_datastore_profile(
|
|
686
724
|
self, profile: mlrun.common.schemas.DatastoreProfile, project: str
|
|
687
725
|
):
|
|
688
726
|
pass
|
|
689
727
|
|
|
728
|
+
@abstractmethod
|
|
690
729
|
def function_status(self, project, name, kind, selector):
|
|
691
730
|
pass
|
|
692
731
|
|
|
732
|
+
@abstractmethod
|
|
693
733
|
def start_function(
|
|
694
734
|
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
695
735
|
):
|
|
696
736
|
pass
|
|
697
737
|
|
|
738
|
+
@abstractmethod
|
|
698
739
|
def submit_workflow(
|
|
699
740
|
self,
|
|
700
741
|
project: str,
|
|
@@ -713,6 +754,7 @@ class RunDBInterface(ABC):
|
|
|
713
754
|
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
714
755
|
pass
|
|
715
756
|
|
|
757
|
+
@abstractmethod
|
|
716
758
|
def update_model_monitoring_controller(
|
|
717
759
|
self,
|
|
718
760
|
project: str,
|
|
@@ -721,6 +763,7 @@ class RunDBInterface(ABC):
|
|
|
721
763
|
):
|
|
722
764
|
pass
|
|
723
765
|
|
|
766
|
+
@abstractmethod
|
|
724
767
|
def enable_model_monitoring(
|
|
725
768
|
self,
|
|
726
769
|
project: str,
|
|
@@ -730,6 +773,7 @@ class RunDBInterface(ABC):
|
|
|
730
773
|
) -> None:
|
|
731
774
|
raise NotImplementedError
|
|
732
775
|
|
|
776
|
+
@abstractmethod
|
|
733
777
|
def deploy_histogram_data_drift_app(
|
|
734
778
|
self, project: str, image: str = "mlrun/mlrun"
|
|
735
779
|
) -> None:
|
mlrun/db/httpdb.py
CHANGED
|
@@ -1184,7 +1184,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1184
1184
|
period didn't pass.
|
|
1185
1185
|
:param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
|
|
1186
1186
|
the moment they moved to terminal state
|
|
1187
|
-
(defaults to mlrun.
|
|
1187
|
+
(defaults to mlrun.mlconf.runtime_resources_deletion_grace_period).
|
|
1188
1188
|
|
|
1189
1189
|
:returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
|
|
1190
1190
|
that were removed.
|
|
@@ -1343,21 +1343,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1343
1343
|
:param builder_env: Kaniko builder pod env vars dict (for config/credentials)
|
|
1344
1344
|
:param force_build: Force building the image, even when no changes were made
|
|
1345
1345
|
"""
|
|
1346
|
-
|
|
1347
|
-
"s3://"
|
|
1348
|
-
)
|
|
1349
|
-
is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
|
|
1350
|
-
if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
|
|
1351
|
-
logger.warning(
|
|
1352
|
-
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
1353
|
-
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
1354
|
-
"(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
|
|
1355
|
-
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
1356
|
-
source=func.spec.build.source,
|
|
1357
|
-
load_source_on_run=func.spec.build.load_source_on_run,
|
|
1358
|
-
default_docker_registry=config.httpdb.builder.docker_registry,
|
|
1359
|
-
)
|
|
1360
|
-
|
|
1346
|
+
self.warn_on_s3_and_ecr_permissions_conflict(func)
|
|
1361
1347
|
try:
|
|
1362
1348
|
req = {
|
|
1363
1349
|
"function": func.to_dict(),
|
|
@@ -1376,10 +1362,103 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1376
1362
|
|
|
1377
1363
|
if not resp.ok:
|
|
1378
1364
|
logger.error(f"bad resp!!\n{resp.text}")
|
|
1379
|
-
raise ValueError("bad
|
|
1365
|
+
raise ValueError("bad submit build response")
|
|
1366
|
+
|
|
1367
|
+
return resp.json()
|
|
1368
|
+
|
|
1369
|
+
def deploy_nuclio_function(
|
|
1370
|
+
self,
|
|
1371
|
+
func: mlrun.runtimes.RemoteRuntime,
|
|
1372
|
+
builder_env: Optional[dict] = None,
|
|
1373
|
+
):
|
|
1374
|
+
"""
|
|
1375
|
+
Deploy a Nuclio function.
|
|
1376
|
+
:param func: Function to build.
|
|
1377
|
+
:param builder_env: Kaniko builder pod env vars dict (for config/credentials)
|
|
1378
|
+
"""
|
|
1379
|
+
func.metadata.project = func.metadata.project or config.default_project
|
|
1380
|
+
self.warn_on_s3_and_ecr_permissions_conflict(func)
|
|
1381
|
+
try:
|
|
1382
|
+
req = {
|
|
1383
|
+
"function": func.to_dict(),
|
|
1384
|
+
}
|
|
1385
|
+
if builder_env:
|
|
1386
|
+
req["builder_env"] = builder_env
|
|
1387
|
+
_path = (
|
|
1388
|
+
f"projects/{func.metadata.project}/nuclio/{func.metadata.name}/deploy"
|
|
1389
|
+
)
|
|
1390
|
+
resp = self.api_call("POST", _path, json=req)
|
|
1391
|
+
except OSError as err:
|
|
1392
|
+
logger.error(f"error submitting nuclio deploy task: {err_to_str(err)}")
|
|
1393
|
+
raise OSError(f"error: cannot submit deploy, {err_to_str(err)}")
|
|
1394
|
+
|
|
1395
|
+
if not resp.ok:
|
|
1396
|
+
logger.error(f"deploy nuclio - bad response:\n{resp.text}")
|
|
1397
|
+
raise ValueError("bad nuclio deploy response")
|
|
1380
1398
|
|
|
1381
1399
|
return resp.json()
|
|
1382
1400
|
|
|
1401
|
+
def get_nuclio_deploy_status(
|
|
1402
|
+
self,
|
|
1403
|
+
func: mlrun.runtimes.RemoteRuntime,
|
|
1404
|
+
last_log_timestamp: float = 0.0,
|
|
1405
|
+
verbose: bool = False,
|
|
1406
|
+
):
|
|
1407
|
+
"""Retrieve the status of a deploy operation currently in progress.
|
|
1408
|
+
|
|
1409
|
+
:param func: Function object that is being built.
|
|
1410
|
+
:param last_log_timestamp: Last timestamp of logs that were already retrieved. Function will return only logs
|
|
1411
|
+
later than this parameter.
|
|
1412
|
+
:param verbose: Add verbose logs into the output.
|
|
1413
|
+
|
|
1414
|
+
:returns: The following parameters:
|
|
1415
|
+
|
|
1416
|
+
- Text of builder logs.
|
|
1417
|
+
- Timestamp of last log retrieved, to be used in subsequent calls to this function.
|
|
1418
|
+
"""
|
|
1419
|
+
|
|
1420
|
+
try:
|
|
1421
|
+
params = {
|
|
1422
|
+
"name": normalize_name(func.metadata.name),
|
|
1423
|
+
"project": func.metadata.project,
|
|
1424
|
+
"tag": func.metadata.tag,
|
|
1425
|
+
"last_log_timestamp": str(last_log_timestamp),
|
|
1426
|
+
"verbose": bool2str(verbose),
|
|
1427
|
+
}
|
|
1428
|
+
_path = (
|
|
1429
|
+
f"projects/{func.metadata.project}/nuclio/{func.metadata.name}/deploy"
|
|
1430
|
+
)
|
|
1431
|
+
resp = self.api_call("GET", _path, params=params)
|
|
1432
|
+
except OSError as err:
|
|
1433
|
+
logger.error(f"error getting deploy status: {err_to_str(err)}")
|
|
1434
|
+
raise OSError(f"error: cannot get deploy status, {err_to_str(err)}")
|
|
1435
|
+
|
|
1436
|
+
if not resp.ok:
|
|
1437
|
+
logger.warning(f"failed resp, {resp.text}")
|
|
1438
|
+
raise RunDBError("bad function build response")
|
|
1439
|
+
|
|
1440
|
+
if resp.headers:
|
|
1441
|
+
func.status.state = resp.headers.get("x-mlrun-function-status", "")
|
|
1442
|
+
last_log_timestamp = float(
|
|
1443
|
+
resp.headers.get("x-mlrun-last-timestamp", "0.0")
|
|
1444
|
+
)
|
|
1445
|
+
func.status.address = resp.headers.get("x-mlrun-address", "")
|
|
1446
|
+
func.status.nuclio_name = resp.headers.get("x-mlrun-name", "")
|
|
1447
|
+
func.status.internal_invocation_urls = resp.headers.get(
|
|
1448
|
+
"x-mlrun-internal-invocation-urls", ""
|
|
1449
|
+
).split(",")
|
|
1450
|
+
func.status.external_invocation_urls = resp.headers.get(
|
|
1451
|
+
"x-mlrun-external-invocation-urls", ""
|
|
1452
|
+
).split(",")
|
|
1453
|
+
func.status.container_image = resp.headers.get(
|
|
1454
|
+
"x-mlrun-container-image", ""
|
|
1455
|
+
)
|
|
1456
|
+
|
|
1457
|
+
text = ""
|
|
1458
|
+
if resp.content:
|
|
1459
|
+
text = resp.content.decode()
|
|
1460
|
+
return text, last_log_timestamp
|
|
1461
|
+
|
|
1383
1462
|
def get_builder_status(
|
|
1384
1463
|
self,
|
|
1385
1464
|
func: BaseRuntime,
|
|
@@ -1441,9 +1520,14 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1441
1520
|
func.status.container_image = resp.headers.get(
|
|
1442
1521
|
"x-mlrun-container-image", ""
|
|
1443
1522
|
)
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1523
|
+
|
|
1524
|
+
builder_pod = resp.headers.get("builder_pod", "")
|
|
1525
|
+
if builder_pod:
|
|
1526
|
+
func.status.build_pod = builder_pod
|
|
1527
|
+
|
|
1528
|
+
function_image = resp.headers.get("function_image", "")
|
|
1529
|
+
if function_image:
|
|
1530
|
+
func.spec.image = function_image
|
|
1447
1531
|
|
|
1448
1532
|
text = ""
|
|
1449
1533
|
if resp.content:
|
|
@@ -1506,7 +1590,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1506
1590
|
Retrieve updated information on project background tasks being executed.
|
|
1507
1591
|
If no filter is provided, will return background tasks from the last week.
|
|
1508
1592
|
|
|
1509
|
-
:param project: Project name (defaults to mlrun.
|
|
1593
|
+
:param project: Project name (defaults to mlrun.mlconf.default_project).
|
|
1510
1594
|
:param state: List only background tasks whose state is specified.
|
|
1511
1595
|
:param created_from: Filter by background task created time in ``[created_from, created_to]``.
|
|
1512
1596
|
:param created_to: Filter by background task created time in ``[created_from, created_to]``.
|
|
@@ -3703,6 +3787,23 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3703
3787
|
|
|
3704
3788
|
self.api_call(method="PUT", path=_path, json=profile.dict())
|
|
3705
3789
|
|
|
3790
|
+
@staticmethod
|
|
3791
|
+
def warn_on_s3_and_ecr_permissions_conflict(func):
|
|
3792
|
+
is_s3_source = func.spec.build.source and func.spec.build.source.startswith(
|
|
3793
|
+
"s3://"
|
|
3794
|
+
)
|
|
3795
|
+
is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
|
|
3796
|
+
if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
|
|
3797
|
+
logger.warning(
|
|
3798
|
+
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
3799
|
+
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
3800
|
+
"(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
|
|
3801
|
+
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
3802
|
+
source=func.spec.build.source,
|
|
3803
|
+
load_source_on_run=func.spec.build.load_source_on_run,
|
|
3804
|
+
default_docker_registry=config.httpdb.builder.docker_registry,
|
|
3805
|
+
)
|
|
3806
|
+
|
|
3706
3807
|
|
|
3707
3808
|
def _as_json(obj):
|
|
3708
3809
|
fn = getattr(obj, "to_json", None)
|
mlrun/db/nopdb.py
CHANGED
|
@@ -528,6 +528,65 @@ class NopDB(RunDBInterface):
|
|
|
528
528
|
):
|
|
529
529
|
pass
|
|
530
530
|
|
|
531
|
+
def remote_builder(
|
|
532
|
+
self,
|
|
533
|
+
func: "mlrun.runtimes.BaseRuntime",
|
|
534
|
+
with_mlrun: bool,
|
|
535
|
+
mlrun_version_specifier: Optional[str] = None,
|
|
536
|
+
skip_deployed: bool = False,
|
|
537
|
+
builder_env: Optional[dict] = None,
|
|
538
|
+
force_build: bool = False,
|
|
539
|
+
):
|
|
540
|
+
pass
|
|
541
|
+
|
|
542
|
+
def deploy_nuclio_function(
|
|
543
|
+
self,
|
|
544
|
+
func: "mlrun.runtimes.RemoteRuntime",
|
|
545
|
+
builder_env: Optional[dict] = None,
|
|
546
|
+
):
|
|
547
|
+
pass
|
|
548
|
+
|
|
549
|
+
def get_builder_status(
|
|
550
|
+
self,
|
|
551
|
+
func: "mlrun.runtimes.BaseRuntime",
|
|
552
|
+
offset: int = 0,
|
|
553
|
+
logs: bool = True,
|
|
554
|
+
last_log_timestamp: float = 0.0,
|
|
555
|
+
verbose: bool = False,
|
|
556
|
+
):
|
|
557
|
+
pass
|
|
558
|
+
|
|
559
|
+
def get_nuclio_deploy_status(
|
|
560
|
+
self,
|
|
561
|
+
func: "mlrun.runtimes.RemoteRuntime",
|
|
562
|
+
last_log_timestamp: float = 0.0,
|
|
563
|
+
verbose: bool = False,
|
|
564
|
+
):
|
|
565
|
+
pass
|
|
566
|
+
|
|
567
|
+
def set_run_notifications(
|
|
568
|
+
self,
|
|
569
|
+
project: str,
|
|
570
|
+
runs: list[mlrun.model.RunObject],
|
|
571
|
+
notifications: list[mlrun.model.Notification],
|
|
572
|
+
):
|
|
573
|
+
pass
|
|
574
|
+
|
|
575
|
+
def store_run_notifications(
|
|
576
|
+
self,
|
|
577
|
+
notification_objects: list[mlrun.model.Notification],
|
|
578
|
+
run_uid: str,
|
|
579
|
+
project: str = None,
|
|
580
|
+
mask_params: bool = True,
|
|
581
|
+
):
|
|
582
|
+
pass
|
|
583
|
+
|
|
584
|
+
def get_log_size(self, uid, project=""):
|
|
585
|
+
pass
|
|
586
|
+
|
|
587
|
+
def watch_log(self, uid, project="", watch=True, offset=0):
|
|
588
|
+
pass
|
|
589
|
+
|
|
531
590
|
def get_datastore_profile(
|
|
532
591
|
self, name: str, project: str
|
|
533
592
|
) -> Optional[mlrun.common.schemas.DatastoreProfile]:
|
|
@@ -545,3 +604,51 @@ class NopDB(RunDBInterface):
|
|
|
545
604
|
self, profile: mlrun.common.schemas.DatastoreProfile, project: str
|
|
546
605
|
):
|
|
547
606
|
pass
|
|
607
|
+
|
|
608
|
+
def function_status(self, project, name, kind, selector):
|
|
609
|
+
pass
|
|
610
|
+
|
|
611
|
+
def start_function(
|
|
612
|
+
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
613
|
+
):
|
|
614
|
+
pass
|
|
615
|
+
|
|
616
|
+
def submit_workflow(
|
|
617
|
+
self,
|
|
618
|
+
project: str,
|
|
619
|
+
name: str,
|
|
620
|
+
workflow_spec: Union[
|
|
621
|
+
"mlrun.projects.pipelines.WorkflowSpec",
|
|
622
|
+
"mlrun.common.schemas.WorkflowSpec",
|
|
623
|
+
dict,
|
|
624
|
+
],
|
|
625
|
+
arguments: Optional[dict] = None,
|
|
626
|
+
artifact_path: Optional[str] = None,
|
|
627
|
+
source: Optional[str] = None,
|
|
628
|
+
run_name: Optional[str] = None,
|
|
629
|
+
namespace: Optional[str] = None,
|
|
630
|
+
notifications: list["mlrun.model.Notification"] = None,
|
|
631
|
+
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
632
|
+
pass
|
|
633
|
+
|
|
634
|
+
def update_model_monitoring_controller(
|
|
635
|
+
self,
|
|
636
|
+
project: str,
|
|
637
|
+
base_period: int = 10,
|
|
638
|
+
image: str = "mlrun/mlrun",
|
|
639
|
+
):
|
|
640
|
+
pass
|
|
641
|
+
|
|
642
|
+
def enable_model_monitoring(
|
|
643
|
+
self,
|
|
644
|
+
project: str,
|
|
645
|
+
base_period: int = 10,
|
|
646
|
+
image: str = "mlrun/mlrun",
|
|
647
|
+
deploy_histogram_data_drift_app: bool = True,
|
|
648
|
+
) -> None:
|
|
649
|
+
raise NotImplementedError
|
|
650
|
+
|
|
651
|
+
def deploy_histogram_data_drift_app(
|
|
652
|
+
self, project: str, image: str = "mlrun/mlrun"
|
|
653
|
+
) -> None:
|
|
654
|
+
raise NotImplementedError
|
mlrun/feature_store/api.py
CHANGED
|
@@ -1121,9 +1121,10 @@ def _ingest_with_spark(
|
|
|
1121
1121
|
df_to_write = target.prepare_spark_df(
|
|
1122
1122
|
df_to_write, key_columns, timestamp_key, spark_options
|
|
1123
1123
|
)
|
|
1124
|
+
write_format = spark_options.pop("format", None)
|
|
1124
1125
|
if overwrite:
|
|
1125
1126
|
write_spark_dataframe_with_options(
|
|
1126
|
-
spark_options, df_to_write, "overwrite"
|
|
1127
|
+
spark_options, df_to_write, "overwrite", write_format=write_format
|
|
1127
1128
|
)
|
|
1128
1129
|
else:
|
|
1129
1130
|
# appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
|
|
@@ -1131,7 +1132,7 @@ def _ingest_with_spark(
|
|
|
1131
1132
|
df_to_write.persist()
|
|
1132
1133
|
if df_to_write.count() > 0:
|
|
1133
1134
|
write_spark_dataframe_with_options(
|
|
1134
|
-
spark_options, df_to_write, "append"
|
|
1135
|
+
spark_options, df_to_write, "append", write_format=write_format
|
|
1135
1136
|
)
|
|
1136
1137
|
target.update_resource_status("ready")
|
|
1137
1138
|
|
|
@@ -24,6 +24,32 @@ from .base import BaseMerger
|
|
|
24
24
|
from .conversion import PandasConversionMixin
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
def spark_df_to_pandas(spark_df):
|
|
28
|
+
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
29
|
+
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
30
|
+
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
31
|
+
if semver.parse(pd.__version__)["major"] >= 2:
|
|
32
|
+
import pyspark.sql.functions as pyspark_functions
|
|
33
|
+
|
|
34
|
+
type_conversion_dict = {}
|
|
35
|
+
for field in spark_df.schema.fields:
|
|
36
|
+
if str(field.dataType) == "TimestampType":
|
|
37
|
+
spark_df = spark_df.withColumn(
|
|
38
|
+
field.name,
|
|
39
|
+
pyspark_functions.date_format(
|
|
40
|
+
pyspark_functions.to_timestamp(field.name),
|
|
41
|
+
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
42
|
+
),
|
|
43
|
+
)
|
|
44
|
+
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
45
|
+
df = PandasConversionMixin.toPandas(spark_df)
|
|
46
|
+
if type_conversion_dict:
|
|
47
|
+
df = df.astype(type_conversion_dict)
|
|
48
|
+
return df
|
|
49
|
+
else:
|
|
50
|
+
return PandasConversionMixin.toPandas(spark_df)
|
|
51
|
+
|
|
52
|
+
|
|
27
53
|
class SparkFeatureMerger(BaseMerger):
|
|
28
54
|
engine = "spark"
|
|
29
55
|
support_offline = True
|
|
@@ -166,29 +192,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
166
192
|
def get_df(self, to_pandas=True):
|
|
167
193
|
if to_pandas:
|
|
168
194
|
if self._pandas_df is None:
|
|
169
|
-
df = self._result_df
|
|
170
|
-
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
171
|
-
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
172
|
-
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
173
|
-
if semver.parse(pd.__version__)["major"] >= 2:
|
|
174
|
-
import pyspark.sql.functions as pyspark_functions
|
|
175
|
-
|
|
176
|
-
type_conversion_dict = {}
|
|
177
|
-
for field in df.schema.fields:
|
|
178
|
-
if str(field.dataType) == "TimestampType":
|
|
179
|
-
df = df.withColumn(
|
|
180
|
-
field.name,
|
|
181
|
-
pyspark_functions.date_format(
|
|
182
|
-
pyspark_functions.to_timestamp(field.name),
|
|
183
|
-
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
184
|
-
),
|
|
185
|
-
)
|
|
186
|
-
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
187
|
-
df = PandasConversionMixin.toPandas(df)
|
|
188
|
-
if type_conversion_dict:
|
|
189
|
-
df = df.astype(type_conversion_dict)
|
|
190
|
-
else:
|
|
191
|
-
df = PandasConversionMixin.toPandas(df)
|
|
195
|
+
df = spark_df_to_pandas(self._result_df)
|
|
192
196
|
self._pandas_df = df
|
|
193
197
|
self._set_indexes(self._pandas_df)
|
|
194
198
|
return self._pandas_df
|
|
@@ -19,7 +19,8 @@ from typing import Union
|
|
|
19
19
|
|
|
20
20
|
import tensorflow as tf
|
|
21
21
|
from tensorflow import keras
|
|
22
|
-
from tensorflow.keras.
|
|
22
|
+
from tensorflow.keras.optimizers import Optimizer
|
|
23
|
+
from tensorflow.python.keras.callbacks import (
|
|
23
24
|
BaseLogger,
|
|
24
25
|
Callback,
|
|
25
26
|
CSVLogger,
|
|
@@ -27,7 +28,6 @@ from tensorflow.keras.callbacks import (
|
|
|
27
28
|
ProgbarLogger,
|
|
28
29
|
TensorBoard,
|
|
29
30
|
)
|
|
30
|
-
from tensorflow.keras.optimizers import Optimizer
|
|
31
31
|
|
|
32
32
|
import mlrun
|
|
33
33
|
|
mlrun/kfpops.py
CHANGED
|
@@ -103,7 +103,7 @@ def write_kfpmeta(struct):
|
|
|
103
103
|
with open(path, "w") as fp:
|
|
104
104
|
fp.write(str(val))
|
|
105
105
|
except Exception as exc:
|
|
106
|
-
logger.warning("Failed writing to temp file. Ignoring", exc=
|
|
106
|
+
logger.warning("Failed writing to temp file. Ignoring", exc=err_to_str(exc))
|
|
107
107
|
pass
|
|
108
108
|
|
|
109
109
|
text = "# Run Report\n"
|
|
@@ -112,10 +112,7 @@ def write_kfpmeta(struct):
|
|
|
112
112
|
|
|
113
113
|
text += "## Metadata\n```yaml\n" + dict_to_yaml(struct) + "```\n"
|
|
114
114
|
|
|
115
|
-
metadata = {
|
|
116
|
-
"outputs": output_artifacts
|
|
117
|
-
+ [{"type": "markdown", "storage": "inline", "source": text}]
|
|
118
|
-
}
|
|
115
|
+
metadata = {"outputs": [{"type": "markdown", "storage": "inline", "source": text}]}
|
|
119
116
|
with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
|
|
120
117
|
json.dump(metadata, f)
|
|
121
118
|
|
mlrun/launcher/base.py
CHANGED
|
@@ -353,7 +353,7 @@ class BaseLauncher(abc.ABC):
|
|
|
353
353
|
or {}
|
|
354
354
|
)
|
|
355
355
|
state_thresholds = (
|
|
356
|
-
mlrun.
|
|
356
|
+
mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
|
|
357
357
|
| state_thresholds
|
|
358
358
|
)
|
|
359
359
|
run.spec.state_thresholds = state_thresholds or run.spec.state_thresholds
|
mlrun/launcher/client.py
CHANGED
|
@@ -47,7 +47,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
47
47
|
If build is needed, set the image as the base_image for the build.
|
|
48
48
|
If image is not given set the default one.
|
|
49
49
|
"""
|
|
50
|
-
if runtime.kind in mlrun.runtimes.RuntimeKinds.
|
|
50
|
+
if runtime.kind in mlrun.runtimes.RuntimeKinds.pure_nuclio_deployed_runtimes():
|
|
51
51
|
return
|
|
52
52
|
|
|
53
53
|
require_build = runtime.requires_build()
|
|
@@ -129,7 +129,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
129
129
|
logger.info("no returned result (job may still be in progress)")
|
|
130
130
|
results_tbl.append(run.to_dict())
|
|
131
131
|
|
|
132
|
-
if mlrun.utils.is_ipython and mlrun.
|
|
132
|
+
if mlrun.utils.is_ipython and mlrun.mlconf.ipython_widget:
|
|
133
133
|
results_tbl.show()
|
|
134
134
|
print()
|
|
135
135
|
ui_url = mlrun.utils.get_ui_url(project, uid)
|
|
@@ -42,7 +42,7 @@ class _BatchDict(typing.TypedDict):
|
|
|
42
42
|
def get_stream_path(
|
|
43
43
|
project: str = None,
|
|
44
44
|
function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
|
|
45
|
-
):
|
|
45
|
+
) -> str:
|
|
46
46
|
"""
|
|
47
47
|
Get stream path from the project secret. If wasn't set, take it from the system configurations
|
|
48
48
|
|
|
@@ -61,6 +61,8 @@ def get_stream_path(
|
|
|
61
61
|
function_name=function_name,
|
|
62
62
|
)
|
|
63
63
|
|
|
64
|
+
if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
|
|
65
|
+
stream_uri = stream_uri[1] # get new stream path, under projects
|
|
64
66
|
return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
|
|
65
67
|
stream_uri=stream_uri, project=project, function_name=function_name
|
|
66
68
|
)
|
mlrun/projects/pipelines.py
CHANGED