mlrun 1.8.0rc1__py3-none-any.whl → 1.8.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (83) hide show
  1. mlrun/__init__.py +5 -7
  2. mlrun/__main__.py +1 -1
  3. mlrun/artifacts/__init__.py +1 -0
  4. mlrun/artifacts/document.py +313 -0
  5. mlrun/artifacts/manager.py +2 -0
  6. mlrun/common/formatters/project.py +9 -0
  7. mlrun/common/schemas/__init__.py +4 -0
  8. mlrun/common/schemas/alert.py +31 -18
  9. mlrun/common/schemas/api_gateway.py +3 -3
  10. mlrun/common/schemas/artifact.py +7 -7
  11. mlrun/common/schemas/auth.py +6 -4
  12. mlrun/common/schemas/background_task.py +7 -7
  13. mlrun/common/schemas/client_spec.py +2 -2
  14. mlrun/common/schemas/clusterization_spec.py +2 -2
  15. mlrun/common/schemas/common.py +5 -5
  16. mlrun/common/schemas/constants.py +15 -0
  17. mlrun/common/schemas/datastore_profile.py +1 -1
  18. mlrun/common/schemas/feature_store.py +9 -9
  19. mlrun/common/schemas/frontend_spec.py +4 -4
  20. mlrun/common/schemas/function.py +10 -10
  21. mlrun/common/schemas/hub.py +1 -1
  22. mlrun/common/schemas/k8s.py +3 -3
  23. mlrun/common/schemas/memory_reports.py +3 -3
  24. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  25. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +1 -1
  26. mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
  27. mlrun/common/schemas/notification.py +18 -3
  28. mlrun/common/schemas/object.py +1 -1
  29. mlrun/common/schemas/pagination.py +4 -4
  30. mlrun/common/schemas/partition.py +16 -1
  31. mlrun/common/schemas/pipeline.py +2 -2
  32. mlrun/common/schemas/project.py +22 -17
  33. mlrun/common/schemas/runs.py +2 -2
  34. mlrun/common/schemas/runtime_resource.py +5 -5
  35. mlrun/common/schemas/schedule.py +1 -1
  36. mlrun/common/schemas/secret.py +1 -1
  37. mlrun/common/schemas/tag.py +3 -3
  38. mlrun/common/schemas/workflow.py +5 -5
  39. mlrun/config.py +23 -1
  40. mlrun/datastore/datastore_profile.py +38 -19
  41. mlrun/datastore/vectorstore.py +186 -0
  42. mlrun/db/base.py +58 -6
  43. mlrun/db/httpdb.py +267 -15
  44. mlrun/db/nopdb.py +44 -5
  45. mlrun/execution.py +47 -1
  46. mlrun/model.py +2 -2
  47. mlrun/model_monitoring/applications/results.py +2 -2
  48. mlrun/model_monitoring/db/tsdb/base.py +2 -2
  49. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +37 -13
  50. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +32 -40
  51. mlrun/model_monitoring/helpers.py +4 -10
  52. mlrun/model_monitoring/stream_processing.py +14 -11
  53. mlrun/platforms/__init__.py +44 -13
  54. mlrun/projects/__init__.py +6 -1
  55. mlrun/projects/pipelines.py +184 -55
  56. mlrun/projects/project.py +309 -33
  57. mlrun/run.py +4 -1
  58. mlrun/runtimes/base.py +2 -1
  59. mlrun/runtimes/mounts.py +572 -0
  60. mlrun/runtimes/nuclio/function.py +1 -2
  61. mlrun/runtimes/pod.py +82 -18
  62. mlrun/runtimes/remotesparkjob.py +1 -1
  63. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  64. mlrun/utils/clones.py +1 -1
  65. mlrun/utils/helpers.py +12 -2
  66. mlrun/utils/logger.py +2 -2
  67. mlrun/utils/notifications/notification/__init__.py +22 -19
  68. mlrun/utils/notifications/notification/base.py +12 -12
  69. mlrun/utils/notifications/notification/console.py +6 -6
  70. mlrun/utils/notifications/notification/git.py +6 -6
  71. mlrun/utils/notifications/notification/ipython.py +6 -6
  72. mlrun/utils/notifications/notification/mail.py +149 -0
  73. mlrun/utils/notifications/notification/slack.py +6 -6
  74. mlrun/utils/notifications/notification/webhook.py +6 -6
  75. mlrun/utils/notifications/notification_pusher.py +20 -12
  76. mlrun/utils/regex.py +2 -0
  77. mlrun/utils/version/version.json +2 -2
  78. {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/METADATA +190 -186
  79. {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/RECORD +83 -79
  80. {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/WHEEL +1 -1
  81. {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/LICENSE +0 -0
  82. {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/entry_points.txt +0 -0
  83. {mlrun-1.8.0rc1.dist-info → mlrun-1.8.0rc3.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,7 @@
15
15
  import datetime
16
16
  import typing
17
17
 
18
- import pydantic
18
+ import pydantic.v1
19
19
  from deprecated import deprecated
20
20
 
21
21
  import mlrun.common.types
@@ -40,14 +40,14 @@ class ProjectsFormat(mlrun.common.types.StrEnum):
40
40
  leader = "leader"
41
41
 
42
42
 
43
- class ProjectMetadata(pydantic.BaseModel):
43
+ class ProjectMetadata(pydantic.v1.BaseModel):
44
44
  name: str
45
45
  created: typing.Optional[datetime.datetime] = None
46
46
  labels: typing.Optional[dict] = {}
47
47
  annotations: typing.Optional[dict] = {}
48
48
 
49
49
  class Config:
50
- extra = pydantic.Extra.allow
50
+ extra = pydantic.v1.Extra.allow
51
51
 
52
52
 
53
53
  class ProjectDesiredState(mlrun.common.types.StrEnum):
@@ -77,7 +77,7 @@ class ProjectStatus(ObjectStatus):
77
77
  state: typing.Optional[ProjectState]
78
78
 
79
79
 
80
- class ProjectSpec(pydantic.BaseModel):
80
+ class ProjectSpec(pydantic.v1.BaseModel):
81
81
  description: typing.Optional[str] = None
82
82
  owner: typing.Optional[str] = None
83
83
  goals: typing.Optional[str] = None
@@ -97,10 +97,10 @@ class ProjectSpec(pydantic.BaseModel):
97
97
  default_function_node_selector: typing.Optional[dict] = {}
98
98
 
99
99
  class Config:
100
- extra = pydantic.Extra.allow
100
+ extra = pydantic.v1.Extra.allow
101
101
 
102
102
 
103
- class ProjectSpecOut(pydantic.BaseModel):
103
+ class ProjectSpecOut(pydantic.v1.BaseModel):
104
104
  description: typing.Optional[str] = None
105
105
  owner: typing.Optional[str] = None
106
106
  goals: typing.Optional[str] = None
@@ -120,11 +120,11 @@ class ProjectSpecOut(pydantic.BaseModel):
120
120
  default_function_node_selector: typing.Optional[dict] = {}
121
121
 
122
122
  class Config:
123
- extra = pydantic.Extra.allow
123
+ extra = pydantic.v1.Extra.allow
124
124
 
125
125
 
126
- class Project(pydantic.BaseModel):
127
- kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
126
+ class Project(pydantic.v1.BaseModel):
127
+ kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
128
128
  metadata: ProjectMetadata
129
129
  spec: ProjectSpec = ProjectSpec()
130
130
  status: ObjectStatus = ObjectStatus()
@@ -132,19 +132,19 @@ class Project(pydantic.BaseModel):
132
132
 
133
133
  # The reason we have a different schema for the response model is that we don't want to validate project.spec.build in
134
134
  # the response as the validation was added late and there may be corrupted values in the DB.
135
- class ProjectOut(pydantic.BaseModel):
136
- kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
135
+ class ProjectOut(pydantic.v1.BaseModel):
136
+ kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
137
137
  metadata: ProjectMetadata
138
138
  spec: ProjectSpecOut = ProjectSpecOut()
139
139
  status: ObjectStatus = ObjectStatus()
140
140
 
141
141
 
142
- class ProjectOwner(pydantic.BaseModel):
142
+ class ProjectOwner(pydantic.v1.BaseModel):
143
143
  username: str
144
144
  access_key: str
145
145
 
146
146
 
147
- class ProjectSummary(pydantic.BaseModel):
147
+ class ProjectSummary(pydantic.v1.BaseModel):
148
148
  name: str
149
149
  files_count: int = 0
150
150
  feature_sets_count: int = 0
@@ -161,7 +161,7 @@ class ProjectSummary(pydantic.BaseModel):
161
161
  updated: typing.Optional[datetime.datetime] = None
162
162
 
163
163
 
164
- class IguazioProject(pydantic.BaseModel):
164
+ class IguazioProject(pydantic.v1.BaseModel):
165
165
  data: dict
166
166
 
167
167
 
@@ -175,13 +175,18 @@ class IguazioProject(pydantic.BaseModel):
175
175
  # to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
176
176
  # https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
177
177
  ProjectOutput = typing.TypeVar(
178
- "ProjectOutput", ProjectOut, str, ProjectSummary, IguazioProject
178
+ "ProjectOutput",
179
+ ProjectOut,
180
+ str,
181
+ ProjectSummary,
182
+ IguazioProject,
183
+ tuple[str, datetime.datetime],
179
184
  )
180
185
 
181
186
 
182
- class ProjectsOutput(pydantic.BaseModel):
187
+ class ProjectsOutput(pydantic.v1.BaseModel):
183
188
  projects: list[ProjectOutput]
184
189
 
185
190
 
186
- class ProjectSummariesOutput(pydantic.BaseModel):
191
+ class ProjectSummariesOutput(pydantic.v1.BaseModel):
187
192
  project_summaries: list[ProjectSummary]
@@ -14,13 +14,13 @@
14
14
 
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
  from deprecated import deprecated
19
19
 
20
20
  import mlrun.common.types
21
21
 
22
22
 
23
- class RunIdentifier(pydantic.BaseModel):
23
+ class RunIdentifier(pydantic.v1.BaseModel):
24
24
  kind: typing.Literal["run"] = "run"
25
25
  uid: typing.Optional[str]
26
26
  iter: typing.Optional[int]
@@ -14,7 +14,7 @@
14
14
  #
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
 
19
19
  import mlrun.common.types
20
20
 
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
24
24
  project = "project"
25
25
 
26
26
 
27
- class RuntimeResource(pydantic.BaseModel):
27
+ class RuntimeResource(pydantic.v1.BaseModel):
28
28
  name: str
29
29
  labels: dict[str, str] = {}
30
30
  status: typing.Optional[dict]
31
31
 
32
32
 
33
- class RuntimeResources(pydantic.BaseModel):
33
+ class RuntimeResources(pydantic.v1.BaseModel):
34
34
  crd_resources: list[RuntimeResource] = []
35
35
  pod_resources: list[RuntimeResource] = []
36
36
  # only for dask runtime
37
37
  service_resources: typing.Optional[list[RuntimeResource]] = None
38
38
 
39
39
  class Config:
40
- extra = pydantic.Extra.allow
40
+ extra = pydantic.v1.Extra.allow
41
41
 
42
42
 
43
- class KindRuntimeResources(pydantic.BaseModel):
43
+ class KindRuntimeResources(pydantic.v1.BaseModel):
44
44
  kind: str
45
45
  resources: RuntimeResources
46
46
 
@@ -15,7 +15,7 @@
15
15
  from datetime import datetime
16
16
  from typing import Any, Literal, Optional, Union
17
17
 
18
- from pydantic import BaseModel
18
+ from pydantic.v1 import BaseModel
19
19
 
20
20
  import mlrun.common.types
21
21
  from mlrun.common.schemas.auth import Credentials
@@ -14,7 +14,7 @@
14
14
  #
15
15
  from typing import Optional
16
16
 
17
- from pydantic import BaseModel, Field
17
+ from pydantic.v1 import BaseModel, Field
18
18
 
19
19
  import mlrun.common.types
20
20
 
@@ -13,17 +13,17 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- import pydantic
16
+ import pydantic.v1
17
17
 
18
18
  from .artifact import ArtifactIdentifier
19
19
 
20
20
 
21
- class Tag(pydantic.BaseModel):
21
+ class Tag(pydantic.v1.BaseModel):
22
22
  name: str
23
23
  project: str
24
24
 
25
25
 
26
- class TagObjects(pydantic.BaseModel):
26
+ class TagObjects(pydantic.v1.BaseModel):
27
27
  """Tag object"""
28
28
 
29
29
  kind: str
@@ -14,14 +14,14 @@
14
14
  #
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
 
19
19
  from mlrun.common.schemas.notification import Notification
20
20
  from mlrun.common.schemas.schedule import ScheduleCronTrigger
21
21
  from mlrun.common.types import StrEnum
22
22
 
23
23
 
24
- class WorkflowSpec(pydantic.BaseModel):
24
+ class WorkflowSpec(pydantic.v1.BaseModel):
25
25
  name: str
26
26
  engine: typing.Optional[str] = None
27
27
  code: typing.Optional[str] = None
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
36
36
  workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
37
37
 
38
38
 
39
- class WorkflowRequest(pydantic.BaseModel):
39
+ class WorkflowRequest(pydantic.v1.BaseModel):
40
40
  spec: typing.Optional[WorkflowSpec] = None
41
41
  arguments: typing.Optional[dict] = None
42
42
  artifact_path: typing.Optional[str] = None
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
46
46
  notifications: typing.Optional[list[Notification]] = None
47
47
 
48
48
 
49
- class WorkflowResponse(pydantic.BaseModel):
49
+ class WorkflowResponse(pydantic.v1.BaseModel):
50
50
  project: str = None
51
51
  name: str = None
52
52
  status: str = None
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
54
54
  schedule: typing.Union[str, ScheduleCronTrigger] = None
55
55
 
56
56
 
57
- class GetWorkflowResponse(pydantic.BaseModel):
57
+ class GetWorkflowResponse(pydantic.v1.BaseModel):
58
58
  workflow_id: str = None
59
59
 
60
60
 
mlrun/config.py CHANGED
@@ -138,6 +138,9 @@ default_config = {
138
138
  "object_retentions": {
139
139
  "alert_activation": 14 * 7, # days
140
140
  },
141
+ # A safety margin to account for delays
142
+ # This ensures that extra partitions are available beyond the specified retention period
143
+ "partitions_buffer_multiplier": 3,
141
144
  # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
142
145
  # before deleting them (4 hours)
143
146
  "runtime_resources_deletion_grace_period": "14400",
@@ -530,7 +533,7 @@ default_config = {
530
533
  "verbose": True,
531
534
  },
532
535
  "pagination": {
533
- "default_page_size": 20,
536
+ "default_page_size": 200,
534
537
  "pagination_cache": {
535
538
  "interval": 60,
536
539
  "ttl": 3600,
@@ -798,11 +801,30 @@ default_config = {
798
801
  "max_allowed": 10000,
799
802
  # maximum allowed value for count in criteria field inside AlertConfig
800
803
  "max_criteria_count": 100,
804
+ # interval for periodic events generation job
805
+ "events_generation_interval": "30",
801
806
  },
802
807
  "auth_with_client_id": {
803
808
  "enabled": False,
804
809
  "request_timeout": 5,
805
810
  },
811
+ "services": {
812
+ # The running service name. One of: "api", "alerts"
813
+ "service_name": "api",
814
+ "hydra": {
815
+ # Comma separated list of services to run on the instance.
816
+ # Currently, this is only considered when the service_name is "api".
817
+ # "*" starts all services on the same instance,
818
+ # other options are considered as running only the api service.
819
+ "services": "*",
820
+ },
821
+ },
822
+ "notifications": {
823
+ "smtp": {
824
+ "config_secret_name": "mlrun-smtp-config",
825
+ "refresh_interval": "30",
826
+ }
827
+ },
806
828
  }
807
829
  _is_running_as_api = None
808
830
 
@@ -19,7 +19,7 @@ import typing
19
19
  import warnings
20
20
  from urllib.parse import ParseResult, urlparse, urlunparse
21
21
 
22
- import pydantic
22
+ import pydantic.v1
23
23
  from mergedeep import merge
24
24
 
25
25
  import mlrun
@@ -28,15 +28,15 @@ import mlrun.errors
28
28
  from ..secrets import get_secret_or_env
29
29
 
30
30
 
31
- class DatastoreProfile(pydantic.BaseModel):
31
+ class DatastoreProfile(pydantic.v1.BaseModel):
32
32
  type: str
33
33
  name: str
34
34
  _private_attributes: list = ()
35
35
 
36
36
  class Config:
37
- extra = pydantic.Extra.forbid
37
+ extra = pydantic.v1.Extra.forbid
38
38
 
39
- @pydantic.validator("name")
39
+ @pydantic.v1.validator("name")
40
40
  @classmethod
41
41
  def lower_case(cls, v):
42
42
  return v.lower()
@@ -75,14 +75,32 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
75
75
 
76
76
 
77
77
  class DatastoreProfileBasic(DatastoreProfile):
78
- type: str = pydantic.Field("basic")
78
+ type: str = pydantic.v1.Field("basic")
79
79
  _private_attributes = "private"
80
80
  public: str
81
81
  private: typing.Optional[str] = None
82
82
 
83
83
 
84
+ class VectorStoreProfile(DatastoreProfile):
85
+ type: str = pydantic.Field("vector")
86
+ _private_attributes = ("kwargs_private",)
87
+ vector_store_class: str
88
+ kwargs_public: typing.Optional[dict] = None
89
+ kwargs_private: typing.Optional[dict] = None
90
+
91
+ def attributes(self, kwargs=None):
92
+ attributes = {}
93
+ if self.kwargs_public:
94
+ attributes = merge(attributes, self.kwargs_public)
95
+ if self.kwargs_private:
96
+ attributes = merge(attributes, self.kwargs_private)
97
+ if kwargs:
98
+ attributes = merge(attributes, kwargs)
99
+ return attributes
100
+
101
+
84
102
  class DatastoreProfileKafkaTarget(DatastoreProfile):
85
- type: str = pydantic.Field("kafka_target")
103
+ type: str = pydantic.v1.Field("kafka_target")
86
104
  _private_attributes = "kwargs_private"
87
105
  bootstrap_servers: typing.Optional[str] = None
88
106
  brokers: typing.Optional[str] = None
@@ -123,7 +141,7 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
123
141
 
124
142
 
125
143
  class DatastoreProfileKafkaSource(DatastoreProfile):
126
- type: str = pydantic.Field("kafka_source")
144
+ type: str = pydantic.v1.Field("kafka_source")
127
145
  _private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
128
146
  brokers: typing.Union[str, list[str]]
129
147
  topics: typing.Union[str, list[str]]
@@ -162,7 +180,7 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
162
180
 
163
181
 
164
182
  class DatastoreProfileV3io(DatastoreProfile):
165
- type: str = pydantic.Field("v3io")
183
+ type: str = pydantic.v1.Field("v3io")
166
184
  v3io_access_key: typing.Optional[str] = None
167
185
  _private_attributes = "v3io_access_key"
168
186
 
@@ -178,7 +196,7 @@ class DatastoreProfileV3io(DatastoreProfile):
178
196
 
179
197
 
180
198
  class DatastoreProfileS3(DatastoreProfile):
181
- type: str = pydantic.Field("s3")
199
+ type: str = pydantic.v1.Field("s3")
182
200
  _private_attributes = ("access_key_id", "secret_key")
183
201
  endpoint_url: typing.Optional[str] = None
184
202
  force_non_anonymous: typing.Optional[str] = None
@@ -188,7 +206,7 @@ class DatastoreProfileS3(DatastoreProfile):
188
206
  secret_key: typing.Optional[str] = None
189
207
  bucket: typing.Optional[str] = None
190
208
 
191
- @pydantic.validator("bucket")
209
+ @pydantic.v1.validator("bucket")
192
210
  @classmethod
193
211
  def check_bucket(cls, v):
194
212
  if not v:
@@ -226,7 +244,7 @@ class DatastoreProfileS3(DatastoreProfile):
226
244
 
227
245
 
228
246
  class DatastoreProfileRedis(DatastoreProfile):
229
- type: str = pydantic.Field("redis")
247
+ type: str = pydantic.v1.Field("redis")
230
248
  _private_attributes = ("username", "password")
231
249
  endpoint_url: str
232
250
  username: typing.Optional[str] = None
@@ -269,7 +287,7 @@ class DatastoreProfileRedis(DatastoreProfile):
269
287
 
270
288
 
271
289
  class DatastoreProfileDBFS(DatastoreProfile):
272
- type: str = pydantic.Field("dbfs")
290
+ type: str = pydantic.v1.Field("dbfs")
273
291
  _private_attributes = ("token",)
274
292
  endpoint_url: typing.Optional[str] = None # host
275
293
  token: typing.Optional[str] = None
@@ -287,13 +305,13 @@ class DatastoreProfileDBFS(DatastoreProfile):
287
305
 
288
306
 
289
307
  class DatastoreProfileGCS(DatastoreProfile):
290
- type: str = pydantic.Field("gcs")
308
+ type: str = pydantic.v1.Field("gcs")
291
309
  _private_attributes = ("gcp_credentials",)
292
310
  credentials_path: typing.Optional[str] = None # path to file.
293
311
  gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
294
312
  bucket: typing.Optional[str] = None
295
313
 
296
- @pydantic.validator("bucket")
314
+ @pydantic.v1.validator("bucket")
297
315
  @classmethod
298
316
  def check_bucket(cls, v):
299
317
  if not v:
@@ -304,7 +322,7 @@ class DatastoreProfileGCS(DatastoreProfile):
304
322
  )
305
323
  return v
306
324
 
307
- @pydantic.validator("gcp_credentials", pre=True, always=True)
325
+ @pydantic.v1.validator("gcp_credentials", pre=True, always=True)
308
326
  @classmethod
309
327
  def convert_dict_to_json(cls, v):
310
328
  if isinstance(v, dict):
@@ -332,7 +350,7 @@ class DatastoreProfileGCS(DatastoreProfile):
332
350
 
333
351
 
334
352
  class DatastoreProfileAzureBlob(DatastoreProfile):
335
- type: str = pydantic.Field("az")
353
+ type: str = pydantic.v1.Field("az")
336
354
  _private_attributes = (
337
355
  "connection_string",
338
356
  "account_key",
@@ -350,7 +368,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
350
368
  credential: typing.Optional[str] = None
351
369
  container: typing.Optional[str] = None
352
370
 
353
- @pydantic.validator("container")
371
+ @pydantic.v1.validator("container")
354
372
  @classmethod
355
373
  def check_container(cls, v):
356
374
  if not v:
@@ -392,7 +410,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
392
410
 
393
411
 
394
412
  class DatastoreProfileHdfs(DatastoreProfile):
395
- type: str = pydantic.Field("hdfs")
413
+ type: str = pydantic.v1.Field("hdfs")
396
414
  _private_attributes = "token"
397
415
  host: typing.Optional[str] = None
398
416
  port: typing.Optional[int] = None
@@ -415,7 +433,7 @@ class DatastoreProfileHdfs(DatastoreProfile):
415
433
  return f"webhdfs://{self.host}:{self.http_port}{subpath}"
416
434
 
417
435
 
418
- class DatastoreProfile2Json(pydantic.BaseModel):
436
+ class DatastoreProfile2Json(pydantic.v1.BaseModel):
419
437
  @staticmethod
420
438
  def _to_json(attributes):
421
439
  # First, base64 encode the values
@@ -476,6 +494,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
476
494
  "gcs": DatastoreProfileGCS,
477
495
  "az": DatastoreProfileAzureBlob,
478
496
  "hdfs": DatastoreProfileHdfs,
497
+ "vector": VectorStoreProfile,
479
498
  }
480
499
  if datastore_type in ds_profile_factory:
481
500
  return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
@@ -0,0 +1,186 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import inspect
16
+ from importlib import import_module
17
+ from typing import Union
18
+
19
+ from mlrun.artifacts import DocumentArtifact
20
+
21
+
22
+ class VectorStoreCollection:
23
+ """
24
+ VectorStoreCollection is a class that manages a collection of vector stores, providing methods to add and delete
25
+ documents and artifacts, and to interact with an MLRun context.
26
+
27
+ Attributes:
28
+ _collection_impl (object): The underlying collection implementation.
29
+ _mlrun_context (Union[MlrunProject, MLClientCtx]): The MLRun context associated with the collection.
30
+ collection_name (str): The name of the collection.
31
+ id (str): The unique identifier of the collection, composed of the datastore profile and collection name.
32
+
33
+ Methods:
34
+ add_documents(documents: list["Document"], **kwargs):
35
+ Adds a list of documents to the collection and updates the MLRun artifacts associated with the documents
36
+ if an MLRun context is present.
37
+
38
+ add_artifacts(artifacts: list[DocumentArtifact], splitter=None, **kwargs):
39
+ Adds a list of DocumentArtifact objects to the collection, optionally using a splitter to convert
40
+ artifacts to documents.
41
+
42
+ remove_itself_from_artifact(artifact: DocumentArtifact):
43
+ Removes the current object from the given artifact's collection and updates the artifact.
44
+
45
+ delete_artifacts(artifacts: list[DocumentArtifact]):
46
+ Deletes a list of DocumentArtifact objects from the collection and updates the MLRun context.
47
+ Raises NotImplementedError if the delete operation is not supported for the collection implementation.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ vector_store_class: str,
53
+ mlrun_context: Union["MlrunProject", "MLClientCtx"], # noqa: F821
54
+ datastore_profile: str,
55
+ collection_name: str,
56
+ **kwargs,
57
+ ):
58
+ # Import the vector store class dynamically
59
+ module_name, class_name = vector_store_class.rsplit(".", 1)
60
+ module = import_module(module_name)
61
+ vector_store_class = getattr(module, class_name)
62
+
63
+ signature = inspect.signature(vector_store_class)
64
+
65
+ # Create the vector store instance
66
+ if "collection_name" in signature.parameters.keys():
67
+ vector_store = vector_store_class(collection_name=collection_name, **kwargs)
68
+ else:
69
+ vector_store = vector_store_class(**kwargs)
70
+
71
+ self._collection_impl = vector_store
72
+ self._mlrun_context = mlrun_context
73
+ self.collection_name = collection_name
74
+ self.id = datastore_profile + "/" + collection_name
75
+
76
+ def __getattr__(self, name):
77
+ # This method is called when an attribute is not found in the usual places
78
+ # Forward the attribute access to _collection_impl
79
+ return getattr(self._collection_impl, name)
80
+
81
+ def __setattr__(self, name, value):
82
+ if name in ["_collection_impl", "_mlrun_context"] or name in self.__dict__:
83
+ # Use the base class method to avoid recursion
84
+ super().__setattr__(name, value)
85
+ else:
86
+ # Forward the attribute setting to _collection_impl
87
+ setattr(self._collection_impl, name, value)
88
+
89
+ def add_documents(
90
+ self,
91
+ documents: list["Document"], # noqa: F821
92
+ **kwargs,
93
+ ):
94
+ """
95
+ Add a list of documents to the collection.
96
+
97
+ If the instance has an MLRun context, it will update the MLRun artifacts
98
+ associated with the documents.
99
+
100
+ Args:
101
+ documents (list[Document]): A list of Document objects to be added.
102
+ **kwargs: Additional keyword arguments to be passed to the underlying
103
+ collection implementation.
104
+
105
+ Returns:
106
+ The result of the underlying collection implementation's add_documents method.
107
+ """
108
+ if self._mlrun_context:
109
+ for document in documents:
110
+ mlrun_uri = document.metadata.get(
111
+ DocumentArtifact.METADATA_ARTIFACT_URI_KEY
112
+ )
113
+ if mlrun_uri:
114
+ artifact = self._mlrun_context.get_store_resource(mlrun_uri)
115
+ artifact.collection_add(self.id)
116
+ self._mlrun_context.update_artifact(artifact)
117
+ return self._collection_impl.add_documents(documents, **kwargs)
118
+
119
+ def add_artifacts(self, artifacts: list[DocumentArtifact], splitter=None, **kwargs):
120
+ """
121
+ Add a list of DocumentArtifact objects to the collection.
122
+
123
+ Args:
124
+ artifacts (list[DocumentArtifact]): A list of DocumentArtifact objects to be added.
125
+ splitter (optional): An optional splitter to be used when converting artifacts to documents.
126
+ **kwargs: Additional keyword arguments to be passed to the collection's add_documents method.
127
+
128
+ Returns:
129
+ list: A list of IDs of the added documents.
130
+ """
131
+ all_ids = []
132
+ for artifact in artifacts:
133
+ documents = artifact.to_langchain_documents(splitter)
134
+ artifact.collection_add(self.id)
135
+ self._mlrun_context.update_artifact(artifact)
136
+ ids = self._collection_impl.add_documents(documents, **kwargs)
137
+ all_ids.extend(ids)
138
+ return all_ids
139
+
140
+ def remove_itself_from_artifact(self, artifact: DocumentArtifact):
141
+ """
142
+ Remove the current object from the given artifact's collection and update the artifact.
143
+
144
+ Args:
145
+ artifact (DocumentArtifact): The artifact from which the current object should be removed.
146
+ """
147
+ artifact.collection_remove(self.id)
148
+ self._mlrun_context.update_artifact(artifact)
149
+
150
+ def delete_artifacts(self, artifacts: list[DocumentArtifact]):
151
+ """
152
+ Delete a list of DocumentArtifact objects from the collection.
153
+
154
+ This method removes the specified artifacts from the collection and updates the MLRun context.
155
+ The deletion process varies depending on the type of the underlying collection implementation.
156
+
157
+ Args:
158
+ artifacts (list[DocumentArtifact]): A list of DocumentArtifact objects to be deleted.
159
+
160
+ Raises:
161
+ NotImplementedError: If the delete operation is not supported for the collection implementation.
162
+ """
163
+ store_class = self._collection_impl.__class__.__name__.lower()
164
+ for artifact in artifacts:
165
+ artifact.collection_remove(self.id)
166
+ self._mlrun_context.update_artifact(artifact)
167
+ if store_class == "milvus":
168
+ expr = f"{DocumentArtifact.METADATA_SOURCE_KEY} == '{artifact.source}'"
169
+ return self._collection_impl.delete(expr=expr)
170
+ elif store_class == "chroma":
171
+ where = {DocumentArtifact.METADATA_SOURCE_KEY: artifact.source}
172
+ return self._collection_impl.delete(where=where)
173
+
174
+ elif (
175
+ hasattr(self._collection_impl, "delete")
176
+ and "filter"
177
+ in inspect.signature(self._collection_impl.delete).parameters
178
+ ):
179
+ filter = {
180
+ "metadata": {DocumentArtifact.METADATA_SOURCE_KEY: artifact.source}
181
+ }
182
+ return self._collection_impl.delete(filter=filter)
183
+ else:
184
+ raise NotImplementedError(
185
+ f"delete_artifacts() operation not supported for {store_class}"
186
+ )