mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -109
- mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
- mlrun/alerts/alert.py +141 -0
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +20 -41
- mlrun/artifacts/model.py +8 -140
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/alert.py +46 -4
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +40 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +7 -4
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +54 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/base.py +23 -3
- mlrun/db/httpdb.py +101 -47
- mlrun/db/nopdb.py +20 -2
- mlrun/errors.py +5 -0
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +4 -2
- mlrun/model.py +25 -11
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +48 -213
- mlrun/model_monitoring/writer.py +101 -121
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +73 -45
- mlrun/render.py +11 -13
- mlrun/run.py +6 -41
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +6 -6
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +75 -9
- mlrun/runtimes/nuclio/function.py +9 -35
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +1 -39
- mlrun/utils/helpers.py +72 -71
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +12 -5
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +134 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py
CHANGED
|
@@ -20,7 +20,6 @@ import warnings
|
|
|
20
20
|
import zipfile
|
|
21
21
|
|
|
22
22
|
import yaml
|
|
23
|
-
from deprecated import deprecated
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
import mlrun.artifacts
|
|
@@ -192,12 +191,30 @@ class Artifact(ModelObj):
|
|
|
192
191
|
format=None,
|
|
193
192
|
size=None,
|
|
194
193
|
target_path=None,
|
|
195
|
-
# All params up until here are legacy params for compatibility with legacy artifacts.
|
|
196
194
|
project=None,
|
|
195
|
+
src_path: str = None,
|
|
196
|
+
# All params up until here are legacy params for compatibility with legacy artifacts.
|
|
197
|
+
# TODO: remove them in 1.9.0.
|
|
197
198
|
metadata: ArtifactMetadata = None,
|
|
198
199
|
spec: ArtifactSpec = None,
|
|
199
|
-
src_path: str = None,
|
|
200
200
|
):
|
|
201
|
+
if (
|
|
202
|
+
key
|
|
203
|
+
or body
|
|
204
|
+
or viewer
|
|
205
|
+
or is_inline
|
|
206
|
+
or format
|
|
207
|
+
or size
|
|
208
|
+
or target_path
|
|
209
|
+
or project
|
|
210
|
+
or src_path
|
|
211
|
+
):
|
|
212
|
+
warnings.warn(
|
|
213
|
+
"Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
|
|
214
|
+
"Use the metadata and spec parameters instead.",
|
|
215
|
+
DeprecationWarning,
|
|
216
|
+
)
|
|
217
|
+
|
|
201
218
|
self._metadata = None
|
|
202
219
|
self.metadata = metadata
|
|
203
220
|
self._spec = None
|
|
@@ -699,11 +716,18 @@ class LinkArtifact(Artifact):
|
|
|
699
716
|
link_iteration=None,
|
|
700
717
|
link_key=None,
|
|
701
718
|
link_tree=None,
|
|
702
|
-
# All params up until here are legacy params for compatibility with legacy artifacts.
|
|
703
719
|
project=None,
|
|
720
|
+
# All params up until here are legacy params for compatibility with legacy artifacts.
|
|
721
|
+
# TODO: remove them in 1.9.0.
|
|
704
722
|
metadata: ArtifactMetadata = None,
|
|
705
723
|
spec: LinkArtifactSpec = None,
|
|
706
724
|
):
|
|
725
|
+
if key or target_path or link_iteration or link_key or link_tree or project:
|
|
726
|
+
warnings.warn(
|
|
727
|
+
"Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
|
|
728
|
+
"Use the metadata and spec parameters instead.",
|
|
729
|
+
DeprecationWarning,
|
|
730
|
+
)
|
|
707
731
|
super().__init__(
|
|
708
732
|
key, target_path=target_path, project=project, metadata=metadata, spec=spec
|
|
709
733
|
)
|
|
@@ -720,238 +744,6 @@ class LinkArtifact(Artifact):
|
|
|
720
744
|
self._spec = self._verify_dict(spec, "spec", LinkArtifactSpec)
|
|
721
745
|
|
|
722
746
|
|
|
723
|
-
# TODO: remove in 1.7.0
|
|
724
|
-
@deprecated(
|
|
725
|
-
version="1.3.0",
|
|
726
|
-
reason="'LegacyArtifact' will be removed in 1.7.0, use 'Artifact' instead",
|
|
727
|
-
category=FutureWarning,
|
|
728
|
-
)
|
|
729
|
-
class LegacyArtifact(ModelObj):
|
|
730
|
-
_dict_fields = [
|
|
731
|
-
"key",
|
|
732
|
-
"kind",
|
|
733
|
-
"iter",
|
|
734
|
-
"tree",
|
|
735
|
-
"src_path",
|
|
736
|
-
"target_path",
|
|
737
|
-
"hash",
|
|
738
|
-
"description",
|
|
739
|
-
"viewer",
|
|
740
|
-
"inline",
|
|
741
|
-
"format",
|
|
742
|
-
"size",
|
|
743
|
-
"db_key",
|
|
744
|
-
"extra_data",
|
|
745
|
-
"tag",
|
|
746
|
-
]
|
|
747
|
-
kind = ""
|
|
748
|
-
_store_prefix = StorePrefix.Artifact
|
|
749
|
-
|
|
750
|
-
def __init__(
|
|
751
|
-
self,
|
|
752
|
-
key=None,
|
|
753
|
-
body=None,
|
|
754
|
-
viewer=None,
|
|
755
|
-
is_inline=False,
|
|
756
|
-
format=None,
|
|
757
|
-
size=None,
|
|
758
|
-
target_path=None,
|
|
759
|
-
):
|
|
760
|
-
self.key = key
|
|
761
|
-
self.project = ""
|
|
762
|
-
self.db_key = None
|
|
763
|
-
self.size = size
|
|
764
|
-
self.iter = None
|
|
765
|
-
self.tree = None
|
|
766
|
-
self.updated = None
|
|
767
|
-
self.target_path = target_path
|
|
768
|
-
self.src_path = None
|
|
769
|
-
self._body = body
|
|
770
|
-
self.format = format
|
|
771
|
-
self.description = None
|
|
772
|
-
self.viewer = viewer
|
|
773
|
-
self.encoding = None
|
|
774
|
-
self.labels = {}
|
|
775
|
-
self.annotations = None
|
|
776
|
-
self.sources = []
|
|
777
|
-
self.producer = None
|
|
778
|
-
self.hash = None
|
|
779
|
-
self._inline = is_inline
|
|
780
|
-
self.license = ""
|
|
781
|
-
self.extra_data = {}
|
|
782
|
-
self.tag = None # temp store of the tag
|
|
783
|
-
|
|
784
|
-
def before_log(self):
|
|
785
|
-
for key, item in self.extra_data.items():
|
|
786
|
-
if hasattr(item, "target_path"):
|
|
787
|
-
self.extra_data[key] = item.target_path
|
|
788
|
-
|
|
789
|
-
def is_inline(self):
|
|
790
|
-
return self._inline
|
|
791
|
-
|
|
792
|
-
@property
|
|
793
|
-
def is_dir(self):
|
|
794
|
-
"""this is a directory"""
|
|
795
|
-
return False
|
|
796
|
-
|
|
797
|
-
@property
|
|
798
|
-
def inline(self):
|
|
799
|
-
"""inline data (body)"""
|
|
800
|
-
if self._inline:
|
|
801
|
-
return self.get_body()
|
|
802
|
-
return None
|
|
803
|
-
|
|
804
|
-
@inline.setter
|
|
805
|
-
def inline(self, body):
|
|
806
|
-
self._body = body
|
|
807
|
-
if body:
|
|
808
|
-
self._inline = True
|
|
809
|
-
|
|
810
|
-
@property
|
|
811
|
-
def uri(self):
|
|
812
|
-
"""return artifact uri (store://..)"""
|
|
813
|
-
return self.get_store_url()
|
|
814
|
-
|
|
815
|
-
def to_dataitem(self):
|
|
816
|
-
"""return a DataItem object (if available) representing the artifact content"""
|
|
817
|
-
uri = self.get_store_url()
|
|
818
|
-
if uri:
|
|
819
|
-
return mlrun.get_dataitem(uri)
|
|
820
|
-
|
|
821
|
-
def get_body(self):
|
|
822
|
-
"""get the artifact body when inline"""
|
|
823
|
-
return self._body
|
|
824
|
-
|
|
825
|
-
def get_target_path(self):
|
|
826
|
-
"""get the absolute target path for the artifact"""
|
|
827
|
-
return self.target_path
|
|
828
|
-
|
|
829
|
-
def get_store_url(self, with_tag=True, project=None):
|
|
830
|
-
"""get the artifact uri (store://..) with optional parameters"""
|
|
831
|
-
tag = self.tree if with_tag else None
|
|
832
|
-
uri = generate_artifact_uri(
|
|
833
|
-
project or self.project, self.db_key, tag, self.iter
|
|
834
|
-
)
|
|
835
|
-
return mlrun.datastore.get_store_uri(self._store_prefix, uri)
|
|
836
|
-
|
|
837
|
-
def base_dict(self):
|
|
838
|
-
"""return short dict form of the artifact"""
|
|
839
|
-
return super().to_dict()
|
|
840
|
-
|
|
841
|
-
def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
|
|
842
|
-
"""return long dict form of the artifact"""
|
|
843
|
-
return super().to_dict(
|
|
844
|
-
self._dict_fields
|
|
845
|
-
+ ["updated", "labels", "annotations", "producer", "sources", "project"],
|
|
846
|
-
strip=strip,
|
|
847
|
-
)
|
|
848
|
-
|
|
849
|
-
@classmethod
|
|
850
|
-
def from_dict(cls, struct=None, fields=None):
|
|
851
|
-
fields = fields or cls._dict_fields + [
|
|
852
|
-
"updated",
|
|
853
|
-
"labels",
|
|
854
|
-
"annotations",
|
|
855
|
-
"producer",
|
|
856
|
-
"sources",
|
|
857
|
-
"project",
|
|
858
|
-
]
|
|
859
|
-
return super().from_dict(struct, fields=fields)
|
|
860
|
-
|
|
861
|
-
def upload(self):
|
|
862
|
-
"""internal, upload to target store"""
|
|
863
|
-
src_path = self.src_path
|
|
864
|
-
body = self.get_body()
|
|
865
|
-
if body:
|
|
866
|
-
self._upload_body(body)
|
|
867
|
-
else:
|
|
868
|
-
if src_path and os.path.isfile(src_path):
|
|
869
|
-
self._upload_file(src_path)
|
|
870
|
-
|
|
871
|
-
def _upload_body(self, body, target=None):
|
|
872
|
-
if mlrun.mlconf.artifacts.calculate_hash:
|
|
873
|
-
self.hash = calculate_blob_hash(body)
|
|
874
|
-
self.size = len(body)
|
|
875
|
-
mlrun.datastore.store_manager.object(url=target or self.target_path).put(body)
|
|
876
|
-
|
|
877
|
-
def _upload_file(self, src, target=None):
|
|
878
|
-
if mlrun.mlconf.artifacts.calculate_hash:
|
|
879
|
-
self.hash = calculate_local_file_hash(src)
|
|
880
|
-
self.size = os.stat(src).st_size
|
|
881
|
-
mlrun.datastore.store_manager.object(url=target or self.target_path).upload(src)
|
|
882
|
-
|
|
883
|
-
def artifact_kind(self):
|
|
884
|
-
return self.kind
|
|
885
|
-
|
|
886
|
-
def generate_target_path(self, artifact_path, producer):
|
|
887
|
-
return generate_target_path(self, artifact_path, producer)
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
# TODO: remove in 1.7.0
|
|
891
|
-
@deprecated(
|
|
892
|
-
version="1.3.0",
|
|
893
|
-
reason="'LegacyDirArtifact' will be removed in 1.7.0, use 'DirArtifact' instead",
|
|
894
|
-
category=FutureWarning,
|
|
895
|
-
)
|
|
896
|
-
class LegacyDirArtifact(LegacyArtifact):
|
|
897
|
-
_dict_fields = [
|
|
898
|
-
"key",
|
|
899
|
-
"kind",
|
|
900
|
-
"iter",
|
|
901
|
-
"tree",
|
|
902
|
-
"src_path",
|
|
903
|
-
"target_path",
|
|
904
|
-
"description",
|
|
905
|
-
"db_key",
|
|
906
|
-
]
|
|
907
|
-
kind = "dir"
|
|
908
|
-
|
|
909
|
-
@property
|
|
910
|
-
def is_dir(self):
|
|
911
|
-
return True
|
|
912
|
-
|
|
913
|
-
def upload(self):
|
|
914
|
-
if not self.src_path:
|
|
915
|
-
raise ValueError("local/source path not specified")
|
|
916
|
-
|
|
917
|
-
files = os.listdir(self.src_path)
|
|
918
|
-
for f in files:
|
|
919
|
-
file_path = os.path.join(self.src_path, f)
|
|
920
|
-
if not os.path.isfile(file_path):
|
|
921
|
-
raise ValueError(f"file {file_path} not found, cant upload")
|
|
922
|
-
target = os.path.join(self.target_path, f)
|
|
923
|
-
mlrun.datastore.store_manager.object(url=target).upload(file_path)
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
# TODO: remove in 1.7.0
|
|
927
|
-
@deprecated(
|
|
928
|
-
version="1.3.0",
|
|
929
|
-
reason="'LegacyLinkArtifact' will be removed in 1.7.0, use 'LinkArtifact' instead",
|
|
930
|
-
category=FutureWarning,
|
|
931
|
-
)
|
|
932
|
-
class LegacyLinkArtifact(LegacyArtifact):
|
|
933
|
-
_dict_fields = LegacyArtifact._dict_fields + [
|
|
934
|
-
"link_iteration",
|
|
935
|
-
"link_key",
|
|
936
|
-
"link_tree",
|
|
937
|
-
]
|
|
938
|
-
kind = "link"
|
|
939
|
-
|
|
940
|
-
def __init__(
|
|
941
|
-
self,
|
|
942
|
-
key=None,
|
|
943
|
-
target_path="",
|
|
944
|
-
link_iteration=None,
|
|
945
|
-
link_key=None,
|
|
946
|
-
link_tree=None,
|
|
947
|
-
):
|
|
948
|
-
super().__init__(key)
|
|
949
|
-
self.target_path = target_path
|
|
950
|
-
self.link_iteration = link_iteration
|
|
951
|
-
self.link_key = link_key
|
|
952
|
-
self.link_tree = link_tree
|
|
953
|
-
|
|
954
|
-
|
|
955
747
|
def calculate_blob_hash(data):
|
|
956
748
|
if isinstance(data, str):
|
|
957
749
|
data = data.encode()
|
|
@@ -1057,25 +849,16 @@ def generate_target_path(item: Artifact, artifact_path, producer):
|
|
|
1057
849
|
return f"{artifact_path}{item.key}{suffix}"
|
|
1058
850
|
|
|
1059
851
|
|
|
852
|
+
# TODO: left to support data migration from legacy artifacts to new artifacts. Remove in 1.8.0.
|
|
1060
853
|
def convert_legacy_artifact_to_new_format(
|
|
1061
|
-
legacy_artifact:
|
|
854
|
+
legacy_artifact: dict,
|
|
1062
855
|
) -> Artifact:
|
|
1063
856
|
"""Converts a legacy artifact to a new format.
|
|
1064
|
-
|
|
1065
857
|
:param legacy_artifact: The legacy artifact to convert.
|
|
1066
858
|
:return: The converted artifact.
|
|
1067
859
|
"""
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
elif isinstance(legacy_artifact, dict):
|
|
1071
|
-
legacy_artifact_dict = legacy_artifact
|
|
1072
|
-
else:
|
|
1073
|
-
raise TypeError(
|
|
1074
|
-
f"Unsupported type '{type(legacy_artifact)}' for legacy artifact"
|
|
1075
|
-
)
|
|
1076
|
-
|
|
1077
|
-
artifact_key = legacy_artifact_dict.get("key", "")
|
|
1078
|
-
artifact_tag = legacy_artifact_dict.get("tag", "")
|
|
860
|
+
artifact_key = legacy_artifact.get("key", "")
|
|
861
|
+
artifact_tag = legacy_artifact.get("tag", "")
|
|
1079
862
|
if artifact_tag:
|
|
1080
863
|
artifact_key = f"{artifact_key}:{artifact_tag}"
|
|
1081
864
|
# TODO: remove in 1.8.0
|
|
@@ -1086,12 +869,12 @@ def convert_legacy_artifact_to_new_format(
|
|
|
1086
869
|
)
|
|
1087
870
|
|
|
1088
871
|
artifact = mlrun.artifacts.artifact_types.get(
|
|
1089
|
-
|
|
872
|
+
legacy_artifact.get("kind", "artifact"), mlrun.artifacts.Artifact
|
|
1090
873
|
)()
|
|
1091
874
|
|
|
1092
|
-
artifact.metadata = artifact.metadata.from_dict(
|
|
1093
|
-
artifact.spec = artifact.spec.from_dict(
|
|
1094
|
-
artifact.status = artifact.status.from_dict(
|
|
875
|
+
artifact.metadata = artifact.metadata.from_dict(legacy_artifact)
|
|
876
|
+
artifact.spec = artifact.spec.from_dict(legacy_artifact)
|
|
877
|
+
artifact.status = artifact.status.from_dict(legacy_artifact)
|
|
1095
878
|
|
|
1096
879
|
return artifact
|
|
1097
880
|
|
mlrun/artifacts/dataset.py
CHANGED
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import os
|
|
15
15
|
import pathlib
|
|
16
|
+
import warnings
|
|
16
17
|
from io import StringIO
|
|
17
18
|
from typing import Optional
|
|
18
19
|
|
|
19
20
|
import numpy as np
|
|
20
21
|
import pandas as pd
|
|
21
|
-
from deprecated import deprecated
|
|
22
22
|
from pandas.io.json import build_table_schema
|
|
23
23
|
|
|
24
24
|
import mlrun
|
|
@@ -27,7 +27,7 @@ import mlrun.datastore
|
|
|
27
27
|
import mlrun.utils.helpers
|
|
28
28
|
from mlrun.config import config as mlconf
|
|
29
29
|
|
|
30
|
-
from .base import Artifact, ArtifactSpec,
|
|
30
|
+
from .base import Artifact, ArtifactSpec, StorePrefix
|
|
31
31
|
|
|
32
32
|
default_preview_rows_length = 20
|
|
33
33
|
max_preview_columns = mlconf.artifacts.datasets.max_preview_columns
|
|
@@ -161,6 +161,13 @@ class DatasetArtifact(Artifact):
|
|
|
161
161
|
label_column: str = None,
|
|
162
162
|
**kwargs,
|
|
163
163
|
):
|
|
164
|
+
if key or format or target_path:
|
|
165
|
+
warnings.warn(
|
|
166
|
+
"Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
|
|
167
|
+
"Use the metadata and spec parameters instead.",
|
|
168
|
+
DeprecationWarning,
|
|
169
|
+
)
|
|
170
|
+
|
|
164
171
|
format = (format or "").lower()
|
|
165
172
|
super().__init__(key, None, format=format, target_path=target_path)
|
|
166
173
|
if format and format not in self.SUPPORTED_FORMATS:
|
|
@@ -360,194 +367,6 @@ class DatasetArtifact(Artifact):
|
|
|
360
367
|
self.status.stats = stats
|
|
361
368
|
|
|
362
369
|
|
|
363
|
-
# TODO: remove in 1.7.0
|
|
364
|
-
@deprecated(
|
|
365
|
-
version="1.3.0",
|
|
366
|
-
reason="'LegacyTableArtifact' will be removed in 1.7.0, use 'TableArtifact' instead",
|
|
367
|
-
category=FutureWarning,
|
|
368
|
-
)
|
|
369
|
-
class LegacyTableArtifact(LegacyArtifact):
|
|
370
|
-
_dict_fields = LegacyArtifact._dict_fields + ["schema", "header"]
|
|
371
|
-
kind = "table"
|
|
372
|
-
|
|
373
|
-
def __init__(
|
|
374
|
-
self,
|
|
375
|
-
key=None,
|
|
376
|
-
body=None,
|
|
377
|
-
df=None,
|
|
378
|
-
viewer=None,
|
|
379
|
-
visible=False,
|
|
380
|
-
inline=False,
|
|
381
|
-
format=None,
|
|
382
|
-
header=None,
|
|
383
|
-
schema=None,
|
|
384
|
-
):
|
|
385
|
-
if key:
|
|
386
|
-
key_suffix = pathlib.Path(key).suffix
|
|
387
|
-
if not format and key_suffix:
|
|
388
|
-
format = key_suffix[1:]
|
|
389
|
-
super().__init__(key, body, viewer=viewer, is_inline=inline, format=format)
|
|
390
|
-
|
|
391
|
-
if df is not None:
|
|
392
|
-
self._is_df = True
|
|
393
|
-
self.header = df.reset_index(drop=True).columns.values.tolist()
|
|
394
|
-
self.format = "csv" # todo other formats
|
|
395
|
-
# if visible and not key_suffix:
|
|
396
|
-
# key += '.csv'
|
|
397
|
-
self._body = df
|
|
398
|
-
else:
|
|
399
|
-
self._is_df = False
|
|
400
|
-
self.header = header
|
|
401
|
-
|
|
402
|
-
self.schema = schema
|
|
403
|
-
if not viewer:
|
|
404
|
-
viewer = "table" if visible else None
|
|
405
|
-
self.viewer = viewer
|
|
406
|
-
|
|
407
|
-
def get_body(self):
|
|
408
|
-
if not self._is_df:
|
|
409
|
-
return self._body
|
|
410
|
-
csv_buffer = StringIO()
|
|
411
|
-
self._body.to_csv(
|
|
412
|
-
csv_buffer,
|
|
413
|
-
encoding="utf-8",
|
|
414
|
-
**mlrun.utils.line_terminator_kwargs(),
|
|
415
|
-
)
|
|
416
|
-
return csv_buffer.getvalue()
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
# TODO: remove in 1.7.0
|
|
420
|
-
@deprecated(
|
|
421
|
-
version="1.3.0",
|
|
422
|
-
reason="'LegacyDatasetArtifact' will be removed in 1.7.0, use 'DatasetArtifact' instead",
|
|
423
|
-
category=FutureWarning,
|
|
424
|
-
)
|
|
425
|
-
class LegacyDatasetArtifact(LegacyArtifact):
|
|
426
|
-
# List of all the supported saving formats of a DataFrame:
|
|
427
|
-
SUPPORTED_FORMATS = ["csv", "parquet", "pq", "tsdb", "kv"]
|
|
428
|
-
|
|
429
|
-
_dict_fields = LegacyArtifact._dict_fields + [
|
|
430
|
-
"schema",
|
|
431
|
-
"header",
|
|
432
|
-
"length",
|
|
433
|
-
"preview",
|
|
434
|
-
"stats",
|
|
435
|
-
"extra_data",
|
|
436
|
-
"column_metadata",
|
|
437
|
-
]
|
|
438
|
-
kind = "dataset"
|
|
439
|
-
|
|
440
|
-
def __init__(
|
|
441
|
-
self,
|
|
442
|
-
key: str = None,
|
|
443
|
-
df=None,
|
|
444
|
-
preview: int = None,
|
|
445
|
-
format: str = "", # TODO: should be changed to 'fmt'.
|
|
446
|
-
stats: bool = None,
|
|
447
|
-
target_path: str = None,
|
|
448
|
-
extra_data: dict = None,
|
|
449
|
-
column_metadata: dict = None,
|
|
450
|
-
ignore_preview_limits: bool = False,
|
|
451
|
-
**kwargs,
|
|
452
|
-
):
|
|
453
|
-
format = (format or "").lower()
|
|
454
|
-
super().__init__(key, None, format=format, target_path=target_path)
|
|
455
|
-
if format and format not in self.SUPPORTED_FORMATS:
|
|
456
|
-
raise ValueError(
|
|
457
|
-
f"unsupported format {format} use one of {'|'.join(self.SUPPORTED_FORMATS)}"
|
|
458
|
-
)
|
|
459
|
-
|
|
460
|
-
if format == "pq":
|
|
461
|
-
format = "parquet"
|
|
462
|
-
self.format = format
|
|
463
|
-
self.stats = None
|
|
464
|
-
self.extra_data = extra_data or {}
|
|
465
|
-
self.column_metadata = column_metadata or {}
|
|
466
|
-
|
|
467
|
-
if df is not None:
|
|
468
|
-
if hasattr(df, "dask"):
|
|
469
|
-
# If df is a Dask DataFrame, and it's small in-memory, convert to Pandas
|
|
470
|
-
if (df.memory_usage(deep=True).sum().compute() / 1e9) < max_ddf_size:
|
|
471
|
-
df = df.compute()
|
|
472
|
-
self.update_preview_fields_from_df(
|
|
473
|
-
self, df, stats, preview, ignore_preview_limits
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
self._df = df
|
|
477
|
-
self._kw = kwargs
|
|
478
|
-
|
|
479
|
-
def upload(self):
|
|
480
|
-
suffix = pathlib.Path(self.target_path).suffix
|
|
481
|
-
format = self.format
|
|
482
|
-
if not format:
|
|
483
|
-
if suffix and suffix in [".csv", ".parquet", ".pq"]:
|
|
484
|
-
format = "csv" if suffix == ".csv" else "parquet"
|
|
485
|
-
else:
|
|
486
|
-
format = "parquet"
|
|
487
|
-
if not suffix and not self.target_path.startswith("memory://"):
|
|
488
|
-
self.target_path = self.target_path + "." + format
|
|
489
|
-
|
|
490
|
-
self.size, self.hash = upload_dataframe(
|
|
491
|
-
self._df,
|
|
492
|
-
self.target_path,
|
|
493
|
-
format=format,
|
|
494
|
-
src_path=self.src_path,
|
|
495
|
-
**self._kw,
|
|
496
|
-
)
|
|
497
|
-
|
|
498
|
-
@property
|
|
499
|
-
def df(self) -> pd.DataFrame:
|
|
500
|
-
"""
|
|
501
|
-
Get the dataset in this artifact.
|
|
502
|
-
|
|
503
|
-
:return: The dataset as a DataFrame.
|
|
504
|
-
"""
|
|
505
|
-
return self._df
|
|
506
|
-
|
|
507
|
-
@staticmethod
|
|
508
|
-
def is_format_supported(fmt: str) -> bool:
|
|
509
|
-
"""
|
|
510
|
-
Check whether the given dataset format is supported by the DatasetArtifact.
|
|
511
|
-
|
|
512
|
-
:param fmt: The format string to check.
|
|
513
|
-
|
|
514
|
-
:return: True if the format is supported and False if not.
|
|
515
|
-
"""
|
|
516
|
-
return fmt in DatasetArtifact.SUPPORTED_FORMATS
|
|
517
|
-
|
|
518
|
-
@staticmethod
|
|
519
|
-
def update_preview_fields_from_df(
|
|
520
|
-
artifact, df, stats=None, preview_rows_length=None, ignore_preview_limits=False
|
|
521
|
-
):
|
|
522
|
-
preview_rows_length = preview_rows_length or default_preview_rows_length
|
|
523
|
-
if hasattr(df, "dask"):
|
|
524
|
-
artifact.length = df.shape[0].compute()
|
|
525
|
-
preview_df = df.sample(frac=ddf_sample_pct).compute()
|
|
526
|
-
else:
|
|
527
|
-
artifact.length = df.shape[0]
|
|
528
|
-
preview_df = df
|
|
529
|
-
|
|
530
|
-
if artifact.length > preview_rows_length and not ignore_preview_limits:
|
|
531
|
-
preview_df = df.head(preview_rows_length)
|
|
532
|
-
|
|
533
|
-
preview_df = preview_df.reset_index()
|
|
534
|
-
if len(preview_df.columns) > max_preview_columns and not ignore_preview_limits:
|
|
535
|
-
preview_df = preview_df.iloc[:, :max_preview_columns]
|
|
536
|
-
artifact.header = preview_df.columns.values.tolist()
|
|
537
|
-
artifact.preview = preview_df.values.tolist()
|
|
538
|
-
# Table schema parsing doesn't require a column named "index"
|
|
539
|
-
# to align its output with previously generated header and preview data
|
|
540
|
-
if "index" in preview_df.columns:
|
|
541
|
-
preview_df.drop("index", axis=1, inplace=True)
|
|
542
|
-
artifact.schema = build_table_schema(preview_df)
|
|
543
|
-
if (
|
|
544
|
-
stats
|
|
545
|
-
or (artifact.length < max_csv and len(df.columns) < max_preview_columns)
|
|
546
|
-
or ignore_preview_limits
|
|
547
|
-
):
|
|
548
|
-
artifact.stats = get_df_stats(df)
|
|
549
|
-
|
|
550
|
-
|
|
551
370
|
def get_df_stats(df):
|
|
552
371
|
if hasattr(df, "dask"):
|
|
553
372
|
df = df.sample(frac=ddf_sample_pct).compute()
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -16,6 +16,7 @@ import typing
|
|
|
16
16
|
from os.path import exists, isdir
|
|
17
17
|
from urllib.parse import urlparse
|
|
18
18
|
|
|
19
|
+
import mlrun.common.schemas.artifact
|
|
19
20
|
import mlrun.config
|
|
20
21
|
from mlrun.utils.helpers import (
|
|
21
22
|
get_local_file_schema,
|
|
@@ -24,7 +25,6 @@ from mlrun.utils.helpers import (
|
|
|
24
25
|
)
|
|
25
26
|
|
|
26
27
|
from ..utils import (
|
|
27
|
-
is_legacy_artifact,
|
|
28
28
|
is_relative_path,
|
|
29
29
|
logger,
|
|
30
30
|
validate_artifact_key_name,
|
|
@@ -33,56 +33,28 @@ from ..utils import (
|
|
|
33
33
|
from .base import (
|
|
34
34
|
Artifact,
|
|
35
35
|
DirArtifact,
|
|
36
|
-
LegacyArtifact,
|
|
37
|
-
LegacyDirArtifact,
|
|
38
|
-
LegacyLinkArtifact,
|
|
39
36
|
LinkArtifact,
|
|
40
37
|
)
|
|
41
38
|
from .dataset import (
|
|
42
39
|
DatasetArtifact,
|
|
43
|
-
LegacyDatasetArtifact,
|
|
44
|
-
LegacyTableArtifact,
|
|
45
40
|
TableArtifact,
|
|
46
41
|
)
|
|
47
|
-
from .model import
|
|
42
|
+
from .model import ModelArtifact
|
|
48
43
|
from .plots import (
|
|
49
|
-
BokehArtifact,
|
|
50
|
-
ChartArtifact,
|
|
51
|
-
LegacyBokehArtifact,
|
|
52
|
-
LegacyChartArtifact,
|
|
53
|
-
LegacyPlotArtifact,
|
|
54
|
-
LegacyPlotlyArtifact,
|
|
55
44
|
PlotArtifact,
|
|
56
45
|
PlotlyArtifact,
|
|
57
46
|
)
|
|
58
47
|
|
|
59
|
-
# TODO - Remove deprecated types when deleted in 1.7.0
|
|
60
48
|
artifact_types = {
|
|
61
49
|
"": Artifact,
|
|
62
50
|
"artifact": Artifact,
|
|
63
51
|
"dir": DirArtifact,
|
|
64
52
|
"link": LinkArtifact,
|
|
65
53
|
"plot": PlotArtifact,
|
|
66
|
-
"chart": ChartArtifact,
|
|
67
54
|
"table": TableArtifact,
|
|
68
55
|
"model": ModelArtifact,
|
|
69
56
|
"dataset": DatasetArtifact,
|
|
70
57
|
"plotly": PlotlyArtifact,
|
|
71
|
-
"bokeh": BokehArtifact,
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# TODO - Remove this when legacy types are deleted in 1.7.0
|
|
75
|
-
legacy_artifact_types = {
|
|
76
|
-
"": LegacyArtifact,
|
|
77
|
-
"dir": LegacyDirArtifact,
|
|
78
|
-
"link": LegacyLinkArtifact,
|
|
79
|
-
"plot": LegacyPlotArtifact,
|
|
80
|
-
"chart": LegacyChartArtifact,
|
|
81
|
-
"table": LegacyTableArtifact,
|
|
82
|
-
"model": LegacyModelArtifact,
|
|
83
|
-
"dataset": LegacyDatasetArtifact,
|
|
84
|
-
"plotly": LegacyPlotlyArtifact,
|
|
85
|
-
"bokeh": LegacyBokehArtifact,
|
|
86
58
|
}
|
|
87
59
|
|
|
88
60
|
|
|
@@ -102,15 +74,8 @@ class ArtifactProducer:
|
|
|
102
74
|
|
|
103
75
|
|
|
104
76
|
def dict_to_artifact(struct: dict) -> Artifact:
|
|
105
|
-
# Need to distinguish between LegacyArtifact classes and Artifact classes. Use existence of the "metadata"
|
|
106
|
-
# property to make this distinction
|
|
107
77
|
kind = struct.get("kind", "")
|
|
108
|
-
|
|
109
|
-
if is_legacy_artifact(struct):
|
|
110
|
-
return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
|
|
111
|
-
|
|
112
78
|
artifact_class = artifact_types[kind]
|
|
113
|
-
|
|
114
79
|
return artifact_class.from_dict(struct)
|
|
115
80
|
|
|
116
81
|
|
|
@@ -308,10 +273,7 @@ class ArtifactManager:
|
|
|
308
273
|
# before uploading the item, we want to ensure that its tags are valid,
|
|
309
274
|
# so that we don't upload something that won't be stored later
|
|
310
275
|
validate_tag_name(item.metadata.tag, "artifact.metadata.tag")
|
|
311
|
-
|
|
312
|
-
item.upload()
|
|
313
|
-
else:
|
|
314
|
-
item.upload(artifact_path=artifact_path)
|
|
276
|
+
item.upload(artifact_path=artifact_path)
|
|
315
277
|
|
|
316
278
|
if db_key:
|
|
317
279
|
self._log_to_db(db_key, project, producer.inputs, item)
|
|
@@ -382,6 +344,23 @@ class ArtifactManager:
|
|
|
382
344
|
project=project,
|
|
383
345
|
)
|
|
384
346
|
|
|
347
|
+
def delete_artifact(
|
|
348
|
+
self,
|
|
349
|
+
item: Artifact,
|
|
350
|
+
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
351
|
+
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
352
|
+
),
|
|
353
|
+
secrets: dict = None,
|
|
354
|
+
):
|
|
355
|
+
self.artifact_db.del_artifact(
|
|
356
|
+
key=item.db_key,
|
|
357
|
+
project=item.project,
|
|
358
|
+
tag=item.tag,
|
|
359
|
+
tree=item.tree,
|
|
360
|
+
deletion_strategy=deletion_strategy,
|
|
361
|
+
secrets=secrets,
|
|
362
|
+
)
|
|
363
|
+
|
|
385
364
|
|
|
386
365
|
def extend_artifact_path(artifact_path: str, default_artifact_path: str):
|
|
387
366
|
artifact_path = str(artifact_path or "")
|