mlrun 1.8.0rc1__py3-none-any.whl → 1.8.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +5 -7
- mlrun/__main__.py +1 -1
- mlrun/artifacts/__init__.py +1 -0
- mlrun/artifacts/document.py +313 -0
- mlrun/artifacts/manager.py +2 -0
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/schemas/__init__.py +4 -0
- mlrun/common/schemas/alert.py +31 -18
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +7 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -2
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +5 -5
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
- mlrun/common/schemas/notification.py +18 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +16 -1
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +22 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +23 -1
- mlrun/datastore/datastore_profile.py +38 -19
- mlrun/datastore/vectorstore.py +186 -0
- mlrun/db/base.py +58 -6
- mlrun/db/httpdb.py +267 -15
- mlrun/db/nopdb.py +44 -5
- mlrun/execution.py +47 -1
- mlrun/model.py +2 -2
- mlrun/model_monitoring/applications/results.py +2 -2
- mlrun/model_monitoring/db/tsdb/base.py +2 -2
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +37 -13
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +32 -40
- mlrun/model_monitoring/helpers.py +4 -10
- mlrun/model_monitoring/stream_processing.py +14 -11
- mlrun/platforms/__init__.py +44 -13
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/pipelines.py +184 -55
- mlrun/projects/project.py +309 -33
- mlrun/run.py +4 -1
- mlrun/runtimes/base.py +2 -1
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/nuclio/function.py +1 -2
- mlrun/runtimes/pod.py +82 -18
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/helpers.py +12 -2
- mlrun/utils/logger.py +2 -2
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +12 -12
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +6 -6
- mlrun/utils/notifications/notification/ipython.py +6 -6
- mlrun/utils/notifications/notification/mail.py +149 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +20 -12
- mlrun/utils/regex.py +2 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/METADATA +190 -186
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/RECORD +83 -79
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/project.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import datetime
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
|
-
import pydantic
|
|
18
|
+
import pydantic.v1
|
|
19
19
|
from deprecated import deprecated
|
|
20
20
|
|
|
21
21
|
import mlrun.common.types
|
|
@@ -40,14 +40,14 @@ class ProjectsFormat(mlrun.common.types.StrEnum):
|
|
|
40
40
|
leader = "leader"
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
class ProjectMetadata(pydantic.BaseModel):
|
|
43
|
+
class ProjectMetadata(pydantic.v1.BaseModel):
|
|
44
44
|
name: str
|
|
45
45
|
created: typing.Optional[datetime.datetime] = None
|
|
46
46
|
labels: typing.Optional[dict] = {}
|
|
47
47
|
annotations: typing.Optional[dict] = {}
|
|
48
48
|
|
|
49
49
|
class Config:
|
|
50
|
-
extra = pydantic.Extra.allow
|
|
50
|
+
extra = pydantic.v1.Extra.allow
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
class ProjectDesiredState(mlrun.common.types.StrEnum):
|
|
@@ -77,7 +77,7 @@ class ProjectStatus(ObjectStatus):
|
|
|
77
77
|
state: typing.Optional[ProjectState]
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
class ProjectSpec(pydantic.BaseModel):
|
|
80
|
+
class ProjectSpec(pydantic.v1.BaseModel):
|
|
81
81
|
description: typing.Optional[str] = None
|
|
82
82
|
owner: typing.Optional[str] = None
|
|
83
83
|
goals: typing.Optional[str] = None
|
|
@@ -97,10 +97,10 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
97
97
|
default_function_node_selector: typing.Optional[dict] = {}
|
|
98
98
|
|
|
99
99
|
class Config:
|
|
100
|
-
extra = pydantic.Extra.allow
|
|
100
|
+
extra = pydantic.v1.Extra.allow
|
|
101
101
|
|
|
102
102
|
|
|
103
|
-
class ProjectSpecOut(pydantic.BaseModel):
|
|
103
|
+
class ProjectSpecOut(pydantic.v1.BaseModel):
|
|
104
104
|
description: typing.Optional[str] = None
|
|
105
105
|
owner: typing.Optional[str] = None
|
|
106
106
|
goals: typing.Optional[str] = None
|
|
@@ -120,11 +120,11 @@ class ProjectSpecOut(pydantic.BaseModel):
|
|
|
120
120
|
default_function_node_selector: typing.Optional[dict] = {}
|
|
121
121
|
|
|
122
122
|
class Config:
|
|
123
|
-
extra = pydantic.Extra.allow
|
|
123
|
+
extra = pydantic.v1.Extra.allow
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
class Project(pydantic.BaseModel):
|
|
127
|
-
kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
|
|
126
|
+
class Project(pydantic.v1.BaseModel):
|
|
127
|
+
kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
|
|
128
128
|
metadata: ProjectMetadata
|
|
129
129
|
spec: ProjectSpec = ProjectSpec()
|
|
130
130
|
status: ObjectStatus = ObjectStatus()
|
|
@@ -132,19 +132,19 @@ class Project(pydantic.BaseModel):
|
|
|
132
132
|
|
|
133
133
|
# The reason we have a different schema for the response model is that we don't want to validate project.spec.build in
|
|
134
134
|
# the response as the validation was added late and there may be corrupted values in the DB.
|
|
135
|
-
class ProjectOut(pydantic.BaseModel):
|
|
136
|
-
kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
|
|
135
|
+
class ProjectOut(pydantic.v1.BaseModel):
|
|
136
|
+
kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
|
|
137
137
|
metadata: ProjectMetadata
|
|
138
138
|
spec: ProjectSpecOut = ProjectSpecOut()
|
|
139
139
|
status: ObjectStatus = ObjectStatus()
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
class ProjectOwner(pydantic.BaseModel):
|
|
142
|
+
class ProjectOwner(pydantic.v1.BaseModel):
|
|
143
143
|
username: str
|
|
144
144
|
access_key: str
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
class ProjectSummary(pydantic.BaseModel):
|
|
147
|
+
class ProjectSummary(pydantic.v1.BaseModel):
|
|
148
148
|
name: str
|
|
149
149
|
files_count: int = 0
|
|
150
150
|
feature_sets_count: int = 0
|
|
@@ -161,7 +161,7 @@ class ProjectSummary(pydantic.BaseModel):
|
|
|
161
161
|
updated: typing.Optional[datetime.datetime] = None
|
|
162
162
|
|
|
163
163
|
|
|
164
|
-
class IguazioProject(pydantic.BaseModel):
|
|
164
|
+
class IguazioProject(pydantic.v1.BaseModel):
|
|
165
165
|
data: dict
|
|
166
166
|
|
|
167
167
|
|
|
@@ -175,13 +175,18 @@ class IguazioProject(pydantic.BaseModel):
|
|
|
175
175
|
# to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
|
|
176
176
|
# https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
|
|
177
177
|
ProjectOutput = typing.TypeVar(
|
|
178
|
-
"ProjectOutput",
|
|
178
|
+
"ProjectOutput",
|
|
179
|
+
ProjectOut,
|
|
180
|
+
str,
|
|
181
|
+
ProjectSummary,
|
|
182
|
+
IguazioProject,
|
|
183
|
+
tuple[str, datetime.datetime],
|
|
179
184
|
)
|
|
180
185
|
|
|
181
186
|
|
|
182
|
-
class ProjectsOutput(pydantic.BaseModel):
|
|
187
|
+
class ProjectsOutput(pydantic.v1.BaseModel):
|
|
183
188
|
projects: list[ProjectOutput]
|
|
184
189
|
|
|
185
190
|
|
|
186
|
-
class ProjectSummariesOutput(pydantic.BaseModel):
|
|
191
|
+
class ProjectSummariesOutput(pydantic.v1.BaseModel):
|
|
187
192
|
project_summaries: list[ProjectSummary]
|
mlrun/common/schemas/runs.py
CHANGED
|
@@ -14,13 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
from deprecated import deprecated
|
|
19
19
|
|
|
20
20
|
import mlrun.common.types
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class RunIdentifier(pydantic.BaseModel):
|
|
23
|
+
class RunIdentifier(pydantic.v1.BaseModel):
|
|
24
24
|
kind: typing.Literal["run"] = "run"
|
|
25
25
|
uid: typing.Optional[str]
|
|
26
26
|
iter: typing.Optional[int]
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
|
|
19
19
|
import mlrun.common.types
|
|
20
20
|
|
|
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
|
|
|
24
24
|
project = "project"
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
class RuntimeResource(pydantic.BaseModel):
|
|
27
|
+
class RuntimeResource(pydantic.v1.BaseModel):
|
|
28
28
|
name: str
|
|
29
29
|
labels: dict[str, str] = {}
|
|
30
30
|
status: typing.Optional[dict]
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class RuntimeResources(pydantic.BaseModel):
|
|
33
|
+
class RuntimeResources(pydantic.v1.BaseModel):
|
|
34
34
|
crd_resources: list[RuntimeResource] = []
|
|
35
35
|
pod_resources: list[RuntimeResource] = []
|
|
36
36
|
# only for dask runtime
|
|
37
37
|
service_resources: typing.Optional[list[RuntimeResource]] = None
|
|
38
38
|
|
|
39
39
|
class Config:
|
|
40
|
-
extra = pydantic.Extra.allow
|
|
40
|
+
extra = pydantic.v1.Extra.allow
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
class KindRuntimeResources(pydantic.BaseModel):
|
|
43
|
+
class KindRuntimeResources(pydantic.v1.BaseModel):
|
|
44
44
|
kind: str
|
|
45
45
|
resources: RuntimeResources
|
|
46
46
|
|
mlrun/common/schemas/schedule.py
CHANGED
mlrun/common/schemas/secret.py
CHANGED
mlrun/common/schemas/tag.py
CHANGED
|
@@ -13,17 +13,17 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
|
|
16
|
-
import pydantic
|
|
16
|
+
import pydantic.v1
|
|
17
17
|
|
|
18
18
|
from .artifact import ArtifactIdentifier
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class Tag(pydantic.BaseModel):
|
|
21
|
+
class Tag(pydantic.v1.BaseModel):
|
|
22
22
|
name: str
|
|
23
23
|
project: str
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class TagObjects(pydantic.BaseModel):
|
|
26
|
+
class TagObjects(pydantic.v1.BaseModel):
|
|
27
27
|
"""Tag object"""
|
|
28
28
|
|
|
29
29
|
kind: str
|
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -14,14 +14,14 @@
|
|
|
14
14
|
#
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
|
|
19
19
|
from mlrun.common.schemas.notification import Notification
|
|
20
20
|
from mlrun.common.schemas.schedule import ScheduleCronTrigger
|
|
21
21
|
from mlrun.common.types import StrEnum
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class WorkflowSpec(pydantic.BaseModel):
|
|
24
|
+
class WorkflowSpec(pydantic.v1.BaseModel):
|
|
25
25
|
name: str
|
|
26
26
|
engine: typing.Optional[str] = None
|
|
27
27
|
code: typing.Optional[str] = None
|
|
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
|
|
|
36
36
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
class WorkflowRequest(pydantic.BaseModel):
|
|
39
|
+
class WorkflowRequest(pydantic.v1.BaseModel):
|
|
40
40
|
spec: typing.Optional[WorkflowSpec] = None
|
|
41
41
|
arguments: typing.Optional[dict] = None
|
|
42
42
|
artifact_path: typing.Optional[str] = None
|
|
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
|
|
|
46
46
|
notifications: typing.Optional[list[Notification]] = None
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
class WorkflowResponse(pydantic.BaseModel):
|
|
49
|
+
class WorkflowResponse(pydantic.v1.BaseModel):
|
|
50
50
|
project: str = None
|
|
51
51
|
name: str = None
|
|
52
52
|
status: str = None
|
|
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
|
|
|
54
54
|
schedule: typing.Union[str, ScheduleCronTrigger] = None
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
class GetWorkflowResponse(pydantic.BaseModel):
|
|
57
|
+
class GetWorkflowResponse(pydantic.v1.BaseModel):
|
|
58
58
|
workflow_id: str = None
|
|
59
59
|
|
|
60
60
|
|
mlrun/config.py
CHANGED
|
@@ -138,6 +138,9 @@ default_config = {
|
|
|
138
138
|
"object_retentions": {
|
|
139
139
|
"alert_activation": 14 * 7, # days
|
|
140
140
|
},
|
|
141
|
+
# A safety margin to account for delays
|
|
142
|
+
# This ensures that extra partitions are available beyond the specified retention period
|
|
143
|
+
"partitions_buffer_multiplier": 3,
|
|
141
144
|
# the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
|
|
142
145
|
# before deleting them (4 hours)
|
|
143
146
|
"runtime_resources_deletion_grace_period": "14400",
|
|
@@ -530,7 +533,7 @@ default_config = {
|
|
|
530
533
|
"verbose": True,
|
|
531
534
|
},
|
|
532
535
|
"pagination": {
|
|
533
|
-
"default_page_size":
|
|
536
|
+
"default_page_size": 200,
|
|
534
537
|
"pagination_cache": {
|
|
535
538
|
"interval": 60,
|
|
536
539
|
"ttl": 3600,
|
|
@@ -798,11 +801,30 @@ default_config = {
|
|
|
798
801
|
"max_allowed": 10000,
|
|
799
802
|
# maximum allowed value for count in criteria field inside AlertConfig
|
|
800
803
|
"max_criteria_count": 100,
|
|
804
|
+
# interval for periodic events generation job
|
|
805
|
+
"events_generation_interval": "30",
|
|
801
806
|
},
|
|
802
807
|
"auth_with_client_id": {
|
|
803
808
|
"enabled": False,
|
|
804
809
|
"request_timeout": 5,
|
|
805
810
|
},
|
|
811
|
+
"services": {
|
|
812
|
+
# The running service name. One of: "api", "alerts"
|
|
813
|
+
"service_name": "api",
|
|
814
|
+
"hydra": {
|
|
815
|
+
# Comma separated list of services to run on the instance.
|
|
816
|
+
# Currently, this is only considered when the service_name is "api".
|
|
817
|
+
# "*" starts all services on the same instance,
|
|
818
|
+
# other options are considered as running only the api service.
|
|
819
|
+
"services": "*",
|
|
820
|
+
},
|
|
821
|
+
},
|
|
822
|
+
"notifications": {
|
|
823
|
+
"smtp": {
|
|
824
|
+
"config_secret_name": "mlrun-smtp-config",
|
|
825
|
+
"refresh_interval": "30",
|
|
826
|
+
}
|
|
827
|
+
},
|
|
806
828
|
}
|
|
807
829
|
_is_running_as_api = None
|
|
808
830
|
|
|
@@ -19,7 +19,7 @@ import typing
|
|
|
19
19
|
import warnings
|
|
20
20
|
from urllib.parse import ParseResult, urlparse, urlunparse
|
|
21
21
|
|
|
22
|
-
import pydantic
|
|
22
|
+
import pydantic.v1
|
|
23
23
|
from mergedeep import merge
|
|
24
24
|
|
|
25
25
|
import mlrun
|
|
@@ -28,15 +28,15 @@ import mlrun.errors
|
|
|
28
28
|
from ..secrets import get_secret_or_env
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
class DatastoreProfile(pydantic.BaseModel):
|
|
31
|
+
class DatastoreProfile(pydantic.v1.BaseModel):
|
|
32
32
|
type: str
|
|
33
33
|
name: str
|
|
34
34
|
_private_attributes: list = ()
|
|
35
35
|
|
|
36
36
|
class Config:
|
|
37
|
-
extra = pydantic.Extra.forbid
|
|
37
|
+
extra = pydantic.v1.Extra.forbid
|
|
38
38
|
|
|
39
|
-
@pydantic.validator("name")
|
|
39
|
+
@pydantic.v1.validator("name")
|
|
40
40
|
@classmethod
|
|
41
41
|
def lower_case(cls, v):
|
|
42
42
|
return v.lower()
|
|
@@ -75,14 +75,32 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
class DatastoreProfileBasic(DatastoreProfile):
|
|
78
|
-
type: str = pydantic.Field("basic")
|
|
78
|
+
type: str = pydantic.v1.Field("basic")
|
|
79
79
|
_private_attributes = "private"
|
|
80
80
|
public: str
|
|
81
81
|
private: typing.Optional[str] = None
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
class VectorStoreProfile(DatastoreProfile):
|
|
85
|
+
type: str = pydantic.Field("vector")
|
|
86
|
+
_private_attributes = ("kwargs_private",)
|
|
87
|
+
vector_store_class: str
|
|
88
|
+
kwargs_public: typing.Optional[dict] = None
|
|
89
|
+
kwargs_private: typing.Optional[dict] = None
|
|
90
|
+
|
|
91
|
+
def attributes(self, kwargs=None):
|
|
92
|
+
attributes = {}
|
|
93
|
+
if self.kwargs_public:
|
|
94
|
+
attributes = merge(attributes, self.kwargs_public)
|
|
95
|
+
if self.kwargs_private:
|
|
96
|
+
attributes = merge(attributes, self.kwargs_private)
|
|
97
|
+
if kwargs:
|
|
98
|
+
attributes = merge(attributes, kwargs)
|
|
99
|
+
return attributes
|
|
100
|
+
|
|
101
|
+
|
|
84
102
|
class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
85
|
-
type: str = pydantic.Field("kafka_target")
|
|
103
|
+
type: str = pydantic.v1.Field("kafka_target")
|
|
86
104
|
_private_attributes = "kwargs_private"
|
|
87
105
|
bootstrap_servers: typing.Optional[str] = None
|
|
88
106
|
brokers: typing.Optional[str] = None
|
|
@@ -123,7 +141,7 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
|
123
141
|
|
|
124
142
|
|
|
125
143
|
class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
126
|
-
type: str = pydantic.Field("kafka_source")
|
|
144
|
+
type: str = pydantic.v1.Field("kafka_source")
|
|
127
145
|
_private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
|
|
128
146
|
brokers: typing.Union[str, list[str]]
|
|
129
147
|
topics: typing.Union[str, list[str]]
|
|
@@ -162,7 +180,7 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
162
180
|
|
|
163
181
|
|
|
164
182
|
class DatastoreProfileV3io(DatastoreProfile):
|
|
165
|
-
type: str = pydantic.Field("v3io")
|
|
183
|
+
type: str = pydantic.v1.Field("v3io")
|
|
166
184
|
v3io_access_key: typing.Optional[str] = None
|
|
167
185
|
_private_attributes = "v3io_access_key"
|
|
168
186
|
|
|
@@ -178,7 +196,7 @@ class DatastoreProfileV3io(DatastoreProfile):
|
|
|
178
196
|
|
|
179
197
|
|
|
180
198
|
class DatastoreProfileS3(DatastoreProfile):
|
|
181
|
-
type: str = pydantic.Field("s3")
|
|
199
|
+
type: str = pydantic.v1.Field("s3")
|
|
182
200
|
_private_attributes = ("access_key_id", "secret_key")
|
|
183
201
|
endpoint_url: typing.Optional[str] = None
|
|
184
202
|
force_non_anonymous: typing.Optional[str] = None
|
|
@@ -188,7 +206,7 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
188
206
|
secret_key: typing.Optional[str] = None
|
|
189
207
|
bucket: typing.Optional[str] = None
|
|
190
208
|
|
|
191
|
-
@pydantic.validator("bucket")
|
|
209
|
+
@pydantic.v1.validator("bucket")
|
|
192
210
|
@classmethod
|
|
193
211
|
def check_bucket(cls, v):
|
|
194
212
|
if not v:
|
|
@@ -226,7 +244,7 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
226
244
|
|
|
227
245
|
|
|
228
246
|
class DatastoreProfileRedis(DatastoreProfile):
|
|
229
|
-
type: str = pydantic.Field("redis")
|
|
247
|
+
type: str = pydantic.v1.Field("redis")
|
|
230
248
|
_private_attributes = ("username", "password")
|
|
231
249
|
endpoint_url: str
|
|
232
250
|
username: typing.Optional[str] = None
|
|
@@ -269,7 +287,7 @@ class DatastoreProfileRedis(DatastoreProfile):
|
|
|
269
287
|
|
|
270
288
|
|
|
271
289
|
class DatastoreProfileDBFS(DatastoreProfile):
|
|
272
|
-
type: str = pydantic.Field("dbfs")
|
|
290
|
+
type: str = pydantic.v1.Field("dbfs")
|
|
273
291
|
_private_attributes = ("token",)
|
|
274
292
|
endpoint_url: typing.Optional[str] = None # host
|
|
275
293
|
token: typing.Optional[str] = None
|
|
@@ -287,13 +305,13 @@ class DatastoreProfileDBFS(DatastoreProfile):
|
|
|
287
305
|
|
|
288
306
|
|
|
289
307
|
class DatastoreProfileGCS(DatastoreProfile):
|
|
290
|
-
type: str = pydantic.Field("gcs")
|
|
308
|
+
type: str = pydantic.v1.Field("gcs")
|
|
291
309
|
_private_attributes = ("gcp_credentials",)
|
|
292
310
|
credentials_path: typing.Optional[str] = None # path to file.
|
|
293
311
|
gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
|
|
294
312
|
bucket: typing.Optional[str] = None
|
|
295
313
|
|
|
296
|
-
@pydantic.validator("bucket")
|
|
314
|
+
@pydantic.v1.validator("bucket")
|
|
297
315
|
@classmethod
|
|
298
316
|
def check_bucket(cls, v):
|
|
299
317
|
if not v:
|
|
@@ -304,7 +322,7 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
304
322
|
)
|
|
305
323
|
return v
|
|
306
324
|
|
|
307
|
-
@pydantic.validator("gcp_credentials", pre=True, always=True)
|
|
325
|
+
@pydantic.v1.validator("gcp_credentials", pre=True, always=True)
|
|
308
326
|
@classmethod
|
|
309
327
|
def convert_dict_to_json(cls, v):
|
|
310
328
|
if isinstance(v, dict):
|
|
@@ -332,7 +350,7 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
332
350
|
|
|
333
351
|
|
|
334
352
|
class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
335
|
-
type: str = pydantic.Field("az")
|
|
353
|
+
type: str = pydantic.v1.Field("az")
|
|
336
354
|
_private_attributes = (
|
|
337
355
|
"connection_string",
|
|
338
356
|
"account_key",
|
|
@@ -350,7 +368,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
350
368
|
credential: typing.Optional[str] = None
|
|
351
369
|
container: typing.Optional[str] = None
|
|
352
370
|
|
|
353
|
-
@pydantic.validator("container")
|
|
371
|
+
@pydantic.v1.validator("container")
|
|
354
372
|
@classmethod
|
|
355
373
|
def check_container(cls, v):
|
|
356
374
|
if not v:
|
|
@@ -392,7 +410,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
392
410
|
|
|
393
411
|
|
|
394
412
|
class DatastoreProfileHdfs(DatastoreProfile):
|
|
395
|
-
type: str = pydantic.Field("hdfs")
|
|
413
|
+
type: str = pydantic.v1.Field("hdfs")
|
|
396
414
|
_private_attributes = "token"
|
|
397
415
|
host: typing.Optional[str] = None
|
|
398
416
|
port: typing.Optional[int] = None
|
|
@@ -415,7 +433,7 @@ class DatastoreProfileHdfs(DatastoreProfile):
|
|
|
415
433
|
return f"webhdfs://{self.host}:{self.http_port}{subpath}"
|
|
416
434
|
|
|
417
435
|
|
|
418
|
-
class DatastoreProfile2Json(pydantic.BaseModel):
|
|
436
|
+
class DatastoreProfile2Json(pydantic.v1.BaseModel):
|
|
419
437
|
@staticmethod
|
|
420
438
|
def _to_json(attributes):
|
|
421
439
|
# First, base64 encode the values
|
|
@@ -476,6 +494,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
|
|
|
476
494
|
"gcs": DatastoreProfileGCS,
|
|
477
495
|
"az": DatastoreProfileAzureBlob,
|
|
478
496
|
"hdfs": DatastoreProfileHdfs,
|
|
497
|
+
"vector": VectorStoreProfile,
|
|
479
498
|
}
|
|
480
499
|
if datastore_type in ds_profile_factory:
|
|
481
500
|
return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import inspect
|
|
16
|
+
from importlib import import_module
|
|
17
|
+
from typing import Union
|
|
18
|
+
|
|
19
|
+
from mlrun.artifacts import DocumentArtifact
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class VectorStoreCollection:
|
|
23
|
+
"""
|
|
24
|
+
VectorStoreCollection is a class that manages a collection of vector stores, providing methods to add and delete
|
|
25
|
+
documents and artifacts, and to interact with an MLRun context.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
_collection_impl (object): The underlying collection implementation.
|
|
29
|
+
_mlrun_context (Union[MlrunProject, MLClientCtx]): The MLRun context associated with the collection.
|
|
30
|
+
collection_name (str): The name of the collection.
|
|
31
|
+
id (str): The unique identifier of the collection, composed of the datastore profile and collection name.
|
|
32
|
+
|
|
33
|
+
Methods:
|
|
34
|
+
add_documents(documents: list["Document"], **kwargs):
|
|
35
|
+
Adds a list of documents to the collection and updates the MLRun artifacts associated with the documents
|
|
36
|
+
if an MLRun context is present.
|
|
37
|
+
|
|
38
|
+
add_artifacts(artifacts: list[DocumentArtifact], splitter=None, **kwargs):
|
|
39
|
+
Adds a list of DocumentArtifact objects to the collection, optionally using a splitter to convert
|
|
40
|
+
artifacts to documents.
|
|
41
|
+
|
|
42
|
+
remove_itself_from_artifact(artifact: DocumentArtifact):
|
|
43
|
+
Removes the current object from the given artifact's collection and updates the artifact.
|
|
44
|
+
|
|
45
|
+
delete_artifacts(artifacts: list[DocumentArtifact]):
|
|
46
|
+
Deletes a list of DocumentArtifact objects from the collection and updates the MLRun context.
|
|
47
|
+
Raises NotImplementedError if the delete operation is not supported for the collection implementation.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
vector_store_class: str,
|
|
53
|
+
mlrun_context: Union["MlrunProject", "MLClientCtx"], # noqa: F821
|
|
54
|
+
datastore_profile: str,
|
|
55
|
+
collection_name: str,
|
|
56
|
+
**kwargs,
|
|
57
|
+
):
|
|
58
|
+
# Import the vector store class dynamically
|
|
59
|
+
module_name, class_name = vector_store_class.rsplit(".", 1)
|
|
60
|
+
module = import_module(module_name)
|
|
61
|
+
vector_store_class = getattr(module, class_name)
|
|
62
|
+
|
|
63
|
+
signature = inspect.signature(vector_store_class)
|
|
64
|
+
|
|
65
|
+
# Create the vector store instance
|
|
66
|
+
if "collection_name" in signature.parameters.keys():
|
|
67
|
+
vector_store = vector_store_class(collection_name=collection_name, **kwargs)
|
|
68
|
+
else:
|
|
69
|
+
vector_store = vector_store_class(**kwargs)
|
|
70
|
+
|
|
71
|
+
self._collection_impl = vector_store
|
|
72
|
+
self._mlrun_context = mlrun_context
|
|
73
|
+
self.collection_name = collection_name
|
|
74
|
+
self.id = datastore_profile + "/" + collection_name
|
|
75
|
+
|
|
76
|
+
def __getattr__(self, name):
|
|
77
|
+
# This method is called when an attribute is not found in the usual places
|
|
78
|
+
# Forward the attribute access to _collection_impl
|
|
79
|
+
return getattr(self._collection_impl, name)
|
|
80
|
+
|
|
81
|
+
def __setattr__(self, name, value):
|
|
82
|
+
if name in ["_collection_impl", "_mlrun_context"] or name in self.__dict__:
|
|
83
|
+
# Use the base class method to avoid recursion
|
|
84
|
+
super().__setattr__(name, value)
|
|
85
|
+
else:
|
|
86
|
+
# Forward the attribute setting to _collection_impl
|
|
87
|
+
setattr(self._collection_impl, name, value)
|
|
88
|
+
|
|
89
|
+
def add_documents(
|
|
90
|
+
self,
|
|
91
|
+
documents: list["Document"], # noqa: F821
|
|
92
|
+
**kwargs,
|
|
93
|
+
):
|
|
94
|
+
"""
|
|
95
|
+
Add a list of documents to the collection.
|
|
96
|
+
|
|
97
|
+
If the instance has an MLRun context, it will update the MLRun artifacts
|
|
98
|
+
associated with the documents.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
documents (list[Document]): A list of Document objects to be added.
|
|
102
|
+
**kwargs: Additional keyword arguments to be passed to the underlying
|
|
103
|
+
collection implementation.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The result of the underlying collection implementation's add_documents method.
|
|
107
|
+
"""
|
|
108
|
+
if self._mlrun_context:
|
|
109
|
+
for document in documents:
|
|
110
|
+
mlrun_uri = document.metadata.get(
|
|
111
|
+
DocumentArtifact.METADATA_ARTIFACT_URI_KEY
|
|
112
|
+
)
|
|
113
|
+
if mlrun_uri:
|
|
114
|
+
artifact = self._mlrun_context.get_store_resource(mlrun_uri)
|
|
115
|
+
artifact.collection_add(self.id)
|
|
116
|
+
self._mlrun_context.update_artifact(artifact)
|
|
117
|
+
return self._collection_impl.add_documents(documents, **kwargs)
|
|
118
|
+
|
|
119
|
+
def add_artifacts(self, artifacts: list[DocumentArtifact], splitter=None, **kwargs):
|
|
120
|
+
"""
|
|
121
|
+
Add a list of DocumentArtifact objects to the collection.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
artifacts (list[DocumentArtifact]): A list of DocumentArtifact objects to be added.
|
|
125
|
+
splitter (optional): An optional splitter to be used when converting artifacts to documents.
|
|
126
|
+
**kwargs: Additional keyword arguments to be passed to the collection's add_documents method.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
list: A list of IDs of the added documents.
|
|
130
|
+
"""
|
|
131
|
+
all_ids = []
|
|
132
|
+
for artifact in artifacts:
|
|
133
|
+
documents = artifact.to_langchain_documents(splitter)
|
|
134
|
+
artifact.collection_add(self.id)
|
|
135
|
+
self._mlrun_context.update_artifact(artifact)
|
|
136
|
+
ids = self._collection_impl.add_documents(documents, **kwargs)
|
|
137
|
+
all_ids.extend(ids)
|
|
138
|
+
return all_ids
|
|
139
|
+
|
|
140
|
+
def remove_itself_from_artifact(self, artifact: DocumentArtifact):
|
|
141
|
+
"""
|
|
142
|
+
Remove the current object from the given artifact's collection and update the artifact.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
artifact (DocumentArtifact): The artifact from which the current object should be removed.
|
|
146
|
+
"""
|
|
147
|
+
artifact.collection_remove(self.id)
|
|
148
|
+
self._mlrun_context.update_artifact(artifact)
|
|
149
|
+
|
|
150
|
+
def delete_artifacts(self, artifacts: list[DocumentArtifact]):
|
|
151
|
+
"""
|
|
152
|
+
Delete a list of DocumentArtifact objects from the collection.
|
|
153
|
+
|
|
154
|
+
This method removes the specified artifacts from the collection and updates the MLRun context.
|
|
155
|
+
The deletion process varies depending on the type of the underlying collection implementation.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
artifacts (list[DocumentArtifact]): A list of DocumentArtifact objects to be deleted.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
NotImplementedError: If the delete operation is not supported for the collection implementation.
|
|
162
|
+
"""
|
|
163
|
+
store_class = self._collection_impl.__class__.__name__.lower()
|
|
164
|
+
for artifact in artifacts:
|
|
165
|
+
artifact.collection_remove(self.id)
|
|
166
|
+
self._mlrun_context.update_artifact(artifact)
|
|
167
|
+
if store_class == "milvus":
|
|
168
|
+
expr = f"{DocumentArtifact.METADATA_SOURCE_KEY} == '{artifact.source}'"
|
|
169
|
+
return self._collection_impl.delete(expr=expr)
|
|
170
|
+
elif store_class == "chroma":
|
|
171
|
+
where = {DocumentArtifact.METADATA_SOURCE_KEY: artifact.source}
|
|
172
|
+
return self._collection_impl.delete(where=where)
|
|
173
|
+
|
|
174
|
+
elif (
|
|
175
|
+
hasattr(self._collection_impl, "delete")
|
|
176
|
+
and "filter"
|
|
177
|
+
in inspect.signature(self._collection_impl.delete).parameters
|
|
178
|
+
):
|
|
179
|
+
filter = {
|
|
180
|
+
"metadata": {DocumentArtifact.METADATA_SOURCE_KEY: artifact.source}
|
|
181
|
+
}
|
|
182
|
+
return self._collection_impl.delete(filter=filter)
|
|
183
|
+
else:
|
|
184
|
+
raise NotImplementedError(
|
|
185
|
+
f"delete_artifacts() operation not supported for {store_class}"
|
|
186
|
+
)
|