select-ai 1.2.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,642 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) 2025, Oracle and/or its affiliates.
3
+ #
4
+ # Licensed under the Universal Permissive License v 1.0 as shown at
5
+ # http://oss.oracle.com/licenses/upl.
6
+ # -----------------------------------------------------------------------------
7
+
8
+ import json
9
+ from abc import ABC
10
+ from dataclasses import dataclass
11
+ from typing import AsyncGenerator, Iterator, Optional, Union
12
+
13
+ import oracledb
14
+
15
+ from select_ai import BaseProfile
16
+ from select_ai._abc import SelectAIDataClass
17
+ from select_ai._enums import StrEnum
18
+ from select_ai.async_profile import AsyncProfile
19
+ from select_ai.db import async_cursor, cursor
20
+ from select_ai.errors import ProfileNotFoundError, VectorIndexNotFoundError
21
+ from select_ai.profile import Profile
22
+ from select_ai.sql import (
23
+ GET_USER_VECTOR_INDEX_ATTRIBUTES,
24
+ LIST_USER_VECTOR_INDEXES,
25
+ )
26
+
27
+ UNMODIFIABLE_VECTOR_INDEX_ATTRIBUTES = (
28
+ "location",
29
+ "chunk_size",
30
+ "chunk_overlap",
31
+ "pipeline_name",
32
+ "vector_dimension",
33
+ "vector_table_name",
34
+ "vector_distance_metric",
35
+ )
36
+
37
+
38
+ class VectorDBProvider(StrEnum):
39
+ ORACLE = "oracle"
40
+
41
+
42
+ class VectorDistanceMetric(StrEnum):
43
+ EUCLIDEAN = "EUCLIDEAN"
44
+ L2_SQUARED = "L2_SQUARED"
45
+ COSINE = "COSINE"
46
+ DOT = "DOT"
47
+ MANHATTAN = "MANHATTAN"
48
+ HAMMING = "HAMMING"
49
+
50
+
51
+ @dataclass
52
+ class VectorIndexAttributes(SelectAIDataClass):
53
+ """
54
+ Attributes of a vector index help to manage and configure the behavior of
55
+ the vector index.
56
+
57
+ :param int chunk_size: Text size of chunking the input data.
58
+ :param int chunk_overlap: Specifies the amount of overlapping
59
+ characters between adjacent chunks of text.
60
+ :param str location: Location of the object store.
61
+ :param int match_limit: Specifies the maximum number of results to return
62
+ in a vector search query
63
+ :param str object_storage_credential_name: Name of the credentials for
64
+ accessing object storage.
65
+ :param str profile_name: Name of the AI profile which is used for
66
+ embedding source data and user prompts.
67
+ :param int refresh_rate: Interval of updating data in the vector store.
68
+ The unit is minutes.
69
+ :param float similarity_threshold: Defines the minimum level of similarity
70
+ required for two items to be considered a match
71
+ :param VectorDistanceMetric vector_distance_metric: Specifies the type of
72
+ distance calculation used to compare vectors in a database
73
+ :param VectorDBProvider vector_db_provider: Name of the Vector database
74
+ provider. Default value is "oracle"
75
+ :param str vector_db_endpoint: Endpoint to access the Vector database
76
+ :param str vector_db_credential_name: Name of the credentials for accessing
77
+ Vector database
78
+ :param int vector_dimension: Specifies the number of elements in each
79
+ vector within the vector store
80
+ :param str vector_table_name: Specifies the name of the table or collection
81
+ to store vector embeddings and chunked data
82
+ """
83
+
84
+ chunk_size: Optional[int] = None
85
+ chunk_overlap: Optional[int] = None
86
+ location: Optional[str] = None
87
+ match_limit: Optional[int] = None
88
+ object_storage_credential_name: Optional[str] = None
89
+ profile_name: Optional[str] = None
90
+ refresh_rate: Optional[int] = None
91
+ similarity_threshold: Optional[float] = None
92
+ vector_distance_metric: Optional[VectorDistanceMetric] = None
93
+ vector_db_endpoint: Optional[str] = None
94
+ vector_db_credential_name: Optional[str] = None
95
+ vector_db_provider: Optional[VectorDBProvider] = None
96
+ vector_dimension: Optional[int] = None
97
+ vector_table_name: Optional[str] = None
98
+ pipeline_name: Optional[str] = None
99
+
100
+ @classmethod
101
+ def create(cls, *, vector_db_provider: Optional[str] = None, **kwargs):
102
+ for subclass in cls.__subclasses__():
103
+ if subclass.vector_db_provider == vector_db_provider:
104
+ return subclass(**kwargs)
105
+ return cls(**kwargs)
106
+
107
+
108
+ @dataclass
109
+ class OracleVectorIndexAttributes(VectorIndexAttributes):
110
+ """Oracle specific vector index attributes"""
111
+
112
+ vector_db_provider: Optional[VectorDBProvider] = VectorDBProvider.ORACLE
113
+
114
+
115
+ class _BaseVectorIndex(ABC):
116
+
117
+ def __init__(
118
+ self,
119
+ profile: Optional[BaseProfile] = None,
120
+ index_name: Optional[str] = None,
121
+ description: Optional[str] = None,
122
+ attributes: Optional[VectorIndexAttributes] = None,
123
+ ):
124
+ """Initialize a Vector Index"""
125
+ if attributes and not isinstance(attributes, VectorIndexAttributes):
126
+ raise TypeError(
127
+ "'attributes' must be an object of type "
128
+ "select_ai.VectorIndexAttributes"
129
+ )
130
+ if profile and not isinstance(profile, BaseProfile):
131
+ raise TypeError(
132
+ "'profile' must be an object of type "
133
+ "select_ai.Profile or select_ai.AsyncProfile"
134
+ )
135
+ self.profile = profile
136
+ self.index_name = index_name
137
+ self.attributes = attributes
138
+ self.description = description
139
+
140
+ def __repr__(self):
141
+ return (
142
+ f"{self.__class__.__name__}(profile={self.profile}, "
143
+ f"index_name={self.index_name}, "
144
+ f"attributes={self.attributes}, description={self.description})"
145
+ )
146
+
147
+
148
+ class VectorIndex(_BaseVectorIndex):
149
+ """
150
+ VectorIndex objects let you manage vector indexes
151
+
152
+ :param str index_name: The name of the vector index
153
+ :param str description: The description of the vector index
154
+ :param select_ai.VectorIndexAttributes attributes: The attributes of the vector index
155
+ """
156
+
157
+ @staticmethod
158
+ def _get_attributes(index_name: str) -> VectorIndexAttributes:
159
+ """Get attributes of a vector index
160
+
161
+ :return: select_ai.VectorIndexAttributes
162
+ :raises: VectorIndexNotFoundError
163
+ """
164
+ if index_name is None:
165
+ raise AttributeError("'index_name' is required")
166
+ with cursor() as cr:
167
+ cr.execute(
168
+ GET_USER_VECTOR_INDEX_ATTRIBUTES, index_name=index_name.upper()
169
+ )
170
+ attributes = cr.fetchall()
171
+ if attributes:
172
+ post_processed_attributes = {}
173
+ for k, v in attributes:
174
+ if isinstance(v, oracledb.LOB):
175
+ post_processed_attributes[k] = v.read()
176
+ else:
177
+ post_processed_attributes[k] = v
178
+ return VectorIndexAttributes.create(
179
+ **post_processed_attributes
180
+ )
181
+ else:
182
+ raise VectorIndexNotFoundError(index_name=index_name)
183
+
184
+ def create(self, replace: Optional[bool] = False):
185
+ """Create a vector index in the database and populates the index
186
+ with data from an object store bucket using an async scheduler job
187
+
188
+ :param bool replace: Replace vector index if it exists
189
+ :return: None
190
+ """
191
+
192
+ if self.attributes.profile_name is None:
193
+ self.attributes.profile_name = self.profile.profile_name
194
+
195
+ parameters = {
196
+ "index_name": self.index_name,
197
+ "attributes": self.attributes.json(),
198
+ }
199
+
200
+ if self.description:
201
+ parameters["description"] = self.description
202
+
203
+ with cursor() as cr:
204
+ try:
205
+ cr.callproc(
206
+ "DBMS_CLOUD_AI.CREATE_VECTOR_INDEX",
207
+ keyword_parameters=parameters,
208
+ )
209
+ except oracledb.DatabaseError as e:
210
+ (error,) = e.args
211
+ # If already exists and replace is True then drop and recreate
212
+ if error.code == 20048 and replace:
213
+ self.delete(force=True)
214
+ cr.callproc(
215
+ "DBMS_CLOUD_AI.CREATE_VECTOR_INDEX",
216
+ keyword_parameters=parameters,
217
+ )
218
+ else:
219
+ raise
220
+ self.profile.set_attribute("vector_index_name", self.index_name)
221
+
222
+ def delete(
223
+ self,
224
+ include_data: Optional[bool] = True,
225
+ force: Optional[bool] = False,
226
+ ):
227
+ """This procedure removes a vector store index
228
+
229
+ :param bool include_data: Indicates whether to delete
230
+ both the customer's vector store and vector index
231
+ along with the vector index object
232
+ :param bool force: Indicates whether to ignore errors
233
+ that occur if the vector index does not exist
234
+ :return: None
235
+ :raises: oracledb.DatabaseError
236
+ """
237
+ with cursor() as cr:
238
+ cr.callproc(
239
+ "DBMS_CLOUD_AI.DROP_VECTOR_INDEX",
240
+ keyword_parameters={
241
+ "index_name": self.index_name,
242
+ "include_data": include_data,
243
+ "force": force,
244
+ },
245
+ )
246
+
247
+ def enable(self):
248
+ """This procedure enables or activates a previously disabled vector
249
+ index object. Generally, when you create a vector index, by default
250
+ it is enabled such that the AI profile can use it to perform indexing
251
+ and searching.
252
+
253
+ :return: None
254
+ :raises: oracledb.DatabaseError
255
+
256
+ """
257
+ with cursor() as cr:
258
+ try:
259
+ cr.callproc(
260
+ "DBMS_CLOUD_AI.ENABLE_VECTOR_INDEX",
261
+ keyword_parameters={"index_name": self.index_name},
262
+ )
263
+ except oracledb.Error as e:
264
+ (error,) = e.args
265
+ # ORA-20000: Vector Index is already in the desired status
266
+ if error.code == 20000:
267
+ pass
268
+ else:
269
+ raise
270
+
271
+ def disable(self):
272
+ """This procedure disables a vector index object in the current
273
+ database. When disabled, an AI profile cannot use the vector index,
274
+ and the system does not load data into the vector store as new data
275
+ is added to the object store and does not perform indexing, searching
276
+ or querying based on the index.
277
+
278
+ :return: None
279
+ :raises: oracledb.DatabaseError
280
+ """
281
+ with cursor() as cr:
282
+ try:
283
+ cr.callproc(
284
+ "DBMS_CLOUD_AI.DISABLE_VECTOR_INDEX",
285
+ keyword_parameters={"index_name": self.index_name},
286
+ )
287
+ except oracledb.Error as e:
288
+ (error,) = e.args
289
+ # ORA-20000: Vector Index is already in the desired status
290
+ if error.code == 20000:
291
+ pass
292
+ else:
293
+ raise
294
+
295
+ def set_attribute(
296
+ self,
297
+ attribute_name: str,
298
+ attribute_value: Union[str, int, float],
299
+ ):
300
+ """
301
+ This procedure updates an existing vector store index with a specified
302
+ value of the vector index attribute.
303
+
304
+ :param str attribute_name: Custom attribute name
305
+ :param Union[str, int, float] attribute_value: Attribute Value
306
+
307
+ """
308
+ setattr(self.attributes, attribute_name, attribute_value)
309
+ parameters = {
310
+ "index_name": self.index_name,
311
+ "attribute_name": attribute_name,
312
+ "attribute_value": attribute_value,
313
+ }
314
+ with cursor() as cr:
315
+ cr.callproc(
316
+ "DBMS_CLOUD_AI.UPDATE_VECTOR_INDEX",
317
+ keyword_parameters=parameters,
318
+ )
319
+
320
+ def set_attributes(
321
+ self,
322
+ attributes: VectorIndexAttributes = None,
323
+ ):
324
+ """
325
+ This procedure updates an existing vector store index with a specified
326
+ value of the vector index attributes. Specify multiple attributes by
327
+ passing an object of type :class `VectorIndexAttributes`
328
+
329
+ :param select_ai.VectorIndexAttributes attributes: Use this to
330
+ update multiple attribute values
331
+ :return: None
332
+ :raises: oracledb.DatabaseError
333
+ """
334
+ parameters = {
335
+ "index_name": self.index_name,
336
+ "attributes": attributes.json(),
337
+ }
338
+ with cursor() as cr:
339
+ cr.callproc(
340
+ "DBMS_CLOUD_AI.UPDATE_VECTOR_INDEX",
341
+ keyword_parameters=parameters,
342
+ )
343
+ self.attributes = self.get_attributes()
344
+
345
+ def get_attributes(self) -> VectorIndexAttributes:
346
+ """Get attributes of this vector index
347
+
348
+ :return: select_ai.VectorIndexAttributes
349
+ :raises: VectorIndexNotFoundError
350
+ """
351
+ return self._get_attributes(self.index_name)
352
+
353
+ def get_profile(self) -> Profile:
354
+ """Get Profile object linked to this vector index
355
+
356
+ :return: select_ai.Profile
357
+ :raises: ProfileNotFoundError
358
+ """
359
+ attributes = self._get_attributes(index_name=self.index_name)
360
+ profile = Profile(profile_name=attributes.profile_name)
361
+ return profile
362
+
363
+ @classmethod
364
+ def list(cls, index_name_pattern: str = ".*") -> Iterator["VectorIndex"]:
365
+ """List Vector Indexes
366
+
367
+ :param str index_name_pattern: Regular expressions can be used
368
+ to specify a pattern. Function REGEXP_LIKE is used to perform the
369
+ match. Default value is ".*" i.e. match all vector indexes.
370
+
371
+ :return: Iterator[VectorIndex]
372
+ """
373
+ with cursor() as cr:
374
+ cr.execute(
375
+ LIST_USER_VECTOR_INDEXES,
376
+ index_name_pattern=index_name_pattern,
377
+ )
378
+ for row in cr.fetchall():
379
+ index_name = row[0]
380
+ if row[1]:
381
+ description = row[1].read() # Oracle.LOB
382
+ else:
383
+ description = None
384
+ attributes = cls._get_attributes(index_name=index_name)
385
+ try:
386
+ profile = Profile(profile_name=attributes.profile_name)
387
+ except ProfileNotFoundError:
388
+ profile = None
389
+ yield cls(
390
+ index_name=index_name,
391
+ description=description,
392
+ attributes=attributes,
393
+ profile=profile,
394
+ )
395
+
396
+
397
+ class AsyncVectorIndex(_BaseVectorIndex):
398
+ """
399
+ AsyncVectorIndex objects let you manage vector indexes
400
+ using async APIs. Use this for non-blocking concurrent
401
+ requests
402
+
403
+ :param str index_name: The name of the vector index
404
+ :param str description: The description of the vector index
405
+ :param VectorIndexAttributes attributes: The attributes of the vector index
406
+ """
407
+
408
+ @staticmethod
409
+ async def _get_attributes(index_name: str) -> VectorIndexAttributes:
410
+ """Get attributes of a vector index
411
+
412
+ :return: select_ai.VectorIndexAttributes
413
+ :raises: VectorIndexNotFoundError
414
+ """
415
+ async with async_cursor() as cr:
416
+ await cr.execute(
417
+ GET_USER_VECTOR_INDEX_ATTRIBUTES, index_name=index_name.upper()
418
+ )
419
+ attributes = await cr.fetchall()
420
+ if attributes:
421
+ post_processed_attributes = {}
422
+ for k, v in attributes:
423
+ if isinstance(v, oracledb.AsyncLOB):
424
+ post_processed_attributes[k] = await v.read()
425
+ else:
426
+ post_processed_attributes[k] = v
427
+ return VectorIndexAttributes.create(
428
+ **post_processed_attributes
429
+ )
430
+ else:
431
+ raise VectorIndexNotFoundError(index_name=index_name)
432
+
433
+ async def create(self, replace: Optional[bool] = False) -> None:
434
+ """Create a vector index in the database and populates it with data
435
+ from an object store bucket using an async scheduler job
436
+
437
+ :param bool replace: True to replace existing vector index
438
+
439
+ """
440
+
441
+ if self.attributes.profile_name is None:
442
+ self.attributes.profile_name = self.profile.profile_name
443
+ parameters = {
444
+ "index_name": self.index_name,
445
+ "attributes": self.attributes.json(),
446
+ }
447
+ if self.description:
448
+ parameters["description"] = self.description
449
+ async with async_cursor() as cr:
450
+ try:
451
+ await cr.callproc(
452
+ "DBMS_CLOUD_AI.CREATE_VECTOR_INDEX",
453
+ keyword_parameters=parameters,
454
+ )
455
+ except oracledb.DatabaseError as e:
456
+ (error,) = e.args
457
+ # If already exists and replace is True then drop and recreate
458
+ if error.code == 20048 and replace:
459
+ await self.delete(force=True)
460
+ await cr.callproc(
461
+ "DBMS_CLOUD_AI.CREATE_VECTOR_INDEX",
462
+ keyword_parameters=parameters,
463
+ )
464
+ else:
465
+ raise
466
+
467
+ await self.profile.set_attribute("vector_index_name", self.index_name)
468
+
469
+ async def delete(
470
+ self,
471
+ include_data: Optional[bool] = True,
472
+ force: Optional[bool] = False,
473
+ ) -> None:
474
+ """This procedure removes a vector store index.
475
+
476
+ :param bool include_data: Indicates whether to delete
477
+ both the customer's vector store and vector index
478
+ along with the vector index object.
479
+ :param bool force: Indicates whether to ignore errors
480
+ that occur if the vector index does not exist.
481
+ :return: None
482
+ :raises: oracledb.DatabaseError
483
+
484
+ """
485
+ async with async_cursor() as cr:
486
+ await cr.callproc(
487
+ "DBMS_CLOUD_AI.DROP_VECTOR_INDEX",
488
+ keyword_parameters={
489
+ "index_name": self.index_name,
490
+ "include_data": include_data,
491
+ "force": force,
492
+ },
493
+ )
494
+
495
+ async def enable(self) -> None:
496
+ """This procedure enables or activates a previously disabled vector
497
+ index object. Generally, when you create a vector index, by default
498
+ it is enabled such that the AI profile can use it to perform indexing
499
+ and searching.
500
+
501
+ :return: None
502
+ :raises: oracledb.DatabaseError
503
+
504
+ """
505
+ async with async_cursor() as cr:
506
+ try:
507
+ await cr.callproc(
508
+ "DBMS_CLOUD_AI.ENABLE_VECTOR_INDEX",
509
+ keyword_parameters={"index_name": self.index_name},
510
+ )
511
+ except oracledb.DatabaseError as e:
512
+ (error,) = e.args
513
+ # ORA-20000: Vector Index is already in the desired status
514
+ if error.code == 20000:
515
+ pass
516
+ else:
517
+ raise
518
+
519
+ async def disable(self) -> None:
520
+ """This procedure disables a vector index object in the current
521
+ database. When disabled, an AI profile cannot use the vector index,
522
+ and the system does not load data into the vector store as new data
523
+ is added to the object store and does not perform indexing, searching
524
+ or querying based on the index.
525
+
526
+ :return: None
527
+ :raises: oracledb.DatabaseError
528
+ """
529
+ async with async_cursor() as cr:
530
+ try:
531
+ await cr.callproc(
532
+ "DBMS_CLOUD_AI.DISABLE_VECTOR_INDEX",
533
+ keyword_parameters={"index_name": self.index_name},
534
+ )
535
+ except oracledb.Error as e:
536
+ (error,) = e.args
537
+ if error.code == 20000:
538
+ pass
539
+ else:
540
+ raise
541
+
542
+ async def set_attribute(
543
+ self, attribute_name: str, attribute_value: Union[str, int, float]
544
+ ) -> None:
545
+ """
546
+ This procedure updates an existing vector store index with a specified
547
+ value of the vector index attribute.
548
+
549
+ :param str attribute_name: Custom attribute name
550
+ :param Union[str, int, float] attribute_value: Attribute Value
551
+
552
+ """
553
+ parameters = {
554
+ "index_name": self.index_name,
555
+ "attribute_name": attribute_name,
556
+ "attribute_value": attribute_value,
557
+ }
558
+ setattr(self.attributes, attribute_name, attribute_value)
559
+ async with async_cursor() as cr:
560
+ await cr.callproc(
561
+ "DBMS_CLOUD_AI.UPDATE_VECTOR_INDEX",
562
+ keyword_parameters=parameters,
563
+ )
564
+
565
+ async def set_attributes(self, attributes: VectorIndexAttributes) -> None:
566
+ """
567
+ This procedure updates an existing vector store index with a specified
568
+ value of the vector index attribute. multiple attributes by passing
569
+ an object of type :class `VectorIndexAttributes`
570
+
571
+ :param select_ai.VectorIndexAttributes attributes: Use this to
572
+ update multiple attribute values
573
+ :return: None
574
+ :raises: oracledb.DatabaseError
575
+ """
576
+ parameters = {
577
+ "index_name": self.index_name,
578
+ "attributes": attributes.json(),
579
+ }
580
+ async with async_cursor() as cr:
581
+ await cr.callproc(
582
+ "DBMS_CLOUD_AI.UPDATE_VECTOR_INDEX",
583
+ keyword_parameters=parameters,
584
+ )
585
+ self.attributes = await self.get_attributes()
586
+
587
+ async def get_attributes(self) -> VectorIndexAttributes:
588
+ """Get attributes of a vector index
589
+
590
+ :return: select_ai.VectorIndexAttributes
591
+ :raises: VectorIndexNotFoundError
592
+ """
593
+ return await self._get_attributes(index_name=self.index_name)
594
+
595
+ async def get_profile(self) -> AsyncProfile:
596
+ """Get AsyncProfile object linked to this vector index
597
+
598
+ :return: select_ai.AsyncProfile
599
+ :raises: ProfileNotFoundError
600
+ """
601
+ attributes = await self._get_attributes(index_name=self.index_name)
602
+ profile = await AsyncProfile(profile_name=attributes.profile_name)
603
+ return profile
604
+
605
+ @classmethod
606
+ async def list(
607
+ cls, index_name_pattern: str = ".*"
608
+ ) -> AsyncGenerator[VectorIndex, None]:
609
+ """List Vector Indexes.
610
+
611
+ :param str index_name_pattern: Regular expressions can be used
612
+ to specify a pattern. Function REGEXP_LIKE is used to perform the
613
+ match. Default value is ".*" i.e. match all vector indexes.
614
+
615
+ :return: AsyncGenerator[VectorIndex]
616
+
617
+ """
618
+ async with async_cursor() as cr:
619
+ await cr.execute(
620
+ LIST_USER_VECTOR_INDEXES,
621
+ index_name_pattern=index_name_pattern,
622
+ )
623
+ rows = await cr.fetchall()
624
+ for row in rows:
625
+ index_name = row[0]
626
+ if row[1]:
627
+ description = await row[1].read() # AsyncLOB
628
+ else:
629
+ description = None
630
+ attributes = await cls._get_attributes(index_name=index_name)
631
+ try:
632
+ profile = await AsyncProfile(
633
+ profile_name=attributes.profile_name,
634
+ )
635
+ except ProfileNotFoundError:
636
+ profile = None
637
+ yield VectorIndex(
638
+ index_name=index_name,
639
+ description=description,
640
+ attributes=attributes,
641
+ profile=profile,
642
+ )
select_ai/version.py ADDED
@@ -0,0 +1,8 @@
1
+ # -----------------------------------------------------------------------------
2
+ # Copyright (c) 2025, Oracle and/or its affiliates.
3
+ #
4
+ # Licensed under the Universal Permissive License v 1.0 as shown at
5
+ # http://oss.oracle.com/licenses/upl.
6
+ # -----------------------------------------------------------------------------
7
+
8
+ __version__ = "1.2.0rc3"