gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-datastore-binary might be problematic. Click here for more details.

Files changed (108) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +5 -0
  28. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  29. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  30. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  31. gllm_datastore/core/filters/__init__.pyi +4 -0
  32. gllm_datastore/core/filters/filter.pyi +340 -0
  33. gllm_datastore/core/filters/schema.pyi +149 -0
  34. gllm_datastore/data_store/__init__.pyi +7 -0
  35. gllm_datastore/data_store/base.pyi +138 -0
  36. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  37. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  38. gllm_datastore/data_store/chroma/data_store.pyi +202 -0
  39. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  40. gllm_datastore/data_store/chroma/query.pyi +266 -0
  41. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  42. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  43. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  44. gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
  45. gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
  46. gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
  47. gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
  48. gllm_datastore/data_store/exceptions.pyi +35 -0
  49. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  50. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  51. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  52. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  53. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  54. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  55. gllm_datastore/data_store/redis/data_store.pyi +154 -0
  56. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  57. gllm_datastore/data_store/redis/query.pyi +428 -0
  58. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  59. gllm_datastore/data_store/redis/vector.pyi +131 -0
  60. gllm_datastore/encryptor/__init__.pyi +4 -0
  61. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  62. gllm_datastore/encryptor/encryptor.pyi +52 -0
  63. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  64. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  65. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  66. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  67. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  68. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  69. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  70. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  71. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  72. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  73. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  74. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  75. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  76. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  77. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  78. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  79. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  80. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  81. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  82. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  83. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  84. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  85. gllm_datastore/sql_data_store/constants.pyi +6 -0
  86. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  87. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  88. gllm_datastore/sql_data_store/types.pyi +31 -0
  89. gllm_datastore/utils/__init__.pyi +6 -0
  90. gllm_datastore/utils/converter.pyi +51 -0
  91. gllm_datastore/utils/dict.pyi +21 -0
  92. gllm_datastore/utils/ttl.pyi +25 -0
  93. gllm_datastore/utils/types.pyi +32 -0
  94. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  95. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  96. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  97. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  98. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  99. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  100. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  101. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  102. gllm_datastore.build/.gitignore +1 -0
  103. gllm_datastore.cpython-311-darwin.so +0 -0
  104. gllm_datastore.pyi +156 -0
  105. gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
  106. gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
  107. gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
  108. gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
@@ -0,0 +1,138 @@
1
+ from abc import ABC, abstractmethod
2
+ from enum import StrEnum
3
+ from gllm_datastore.cache import Cache as Cache
4
+ from gllm_datastore.core.capabilities import FulltextCapability as FulltextCapability, GraphCapability as GraphCapability, VectorCapability as VectorCapability
5
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
6
+ from gllm_datastore.data_store.exceptions import NotRegisteredException as NotRegisteredException, NotSupportedException as NotSupportedException
7
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
8
+ from typing import Any
9
+
10
+ class CapabilityType(StrEnum):
11
+ """Enumeration of supported capability types."""
12
+ FULLTEXT: str
13
+ GRAPH: str
14
+ VECTOR: str
15
+
16
+ class BaseDataStore(ABC):
17
+ """Base class for datastores with multiple capabilities.
18
+
19
+ This class provides the infrastructure for capability composition and
20
+ delegation. Datastores inherit from this class and register capability
21
+ handlers based on their configuration.
22
+ """
23
+ def __init__(self) -> None:
24
+ """Initialize the datastore with specified capabilities."""
25
+ @property
26
+ @abstractmethod
27
+ def supported_capabilities(self) -> list[CapabilityType]:
28
+ """Return list of currently supported capabilities.
29
+
30
+ A data store might have more capabilities than the ones that are currently registered.
31
+ Each data store should implement this method to return the list of supported capabilities.
32
+
33
+ Returns:
34
+ list[str]: List of capability names that are supported.
35
+
36
+ Raises:
37
+ NotImplementedError: If the method is not implemented by subclass.
38
+ """
39
+ @property
40
+ def registered_capabilities(self) -> list[CapabilityType]:
41
+ """Return list of currently registered capabilities.
42
+
43
+ Returns:
44
+ list[str]: List of capability names that are registered and available.
45
+ """
46
+ @property
47
+ def fulltext(self) -> FulltextCapability:
48
+ """Access fulltext capability if supported.
49
+
50
+ Returns:
51
+ FulltextCapability: Fulltext capability handler.
52
+
53
+ Raises:
54
+ NotSupportedException: If fulltext capability is not supported.
55
+ """
56
+ @property
57
+ def vector(self) -> VectorCapability:
58
+ """Access vector capability if supported.
59
+
60
+ Returns:
61
+ VectorCapability: Vector capability handler.
62
+
63
+ Raises:
64
+ NotSupportedException: If vector capability is not supported
65
+ """
66
+ @property
67
+ def graph(self) -> GraphCapability:
68
+ """Access graph capability if supported.
69
+
70
+ Returns:
71
+ GraphCapability: Graph capability handler.
72
+
73
+ Raises:
74
+ NotSupportedException: If graph capability is not supported.
75
+ """
76
+ def with_fulltext(self, **kwargs) -> Self:
77
+ """Configure fulltext capability and return datastore instance.
78
+
79
+ Args:
80
+ **kwargs: Fulltext capability configuration parameters.
81
+
82
+ Returns:
83
+ Self: Self for method chaining.
84
+ """
85
+ def with_vector(self, em_invoker: BaseEMInvoker, **kwargs) -> Self:
86
+ """Configure vector capability and return datastore instance.
87
+
88
+ Args:
89
+ em_invoker (BaseEMInvoker): Embedding model invoker (required).
90
+ **kwargs: Vector capability configuration parameters.
91
+
92
+ Returns:
93
+ Self: Self for method chaining.
94
+ """
95
+ def with_graph(self, **kwargs) -> Self:
96
+ """Configure graph capability and return datastore instance.
97
+
98
+ Args:
99
+ **kwargs: Graph capability configuration parameters.
100
+
101
+ Returns:
102
+ Self: Self for method chaining.
103
+ """
104
+ def as_cache(self, eviction_manager: Any | None = None, matching_strategy: Any = None) -> Cache:
105
+ """Create a Cache instance from this datastore.
106
+
107
+ Args:
108
+ eviction_manager (Any | None, optional): Optional eviction manager for cache eviction.
109
+ Defaults to None.
110
+ matching_strategy (Any, optional): Default matching strategy for cache retrieval.
111
+ Defaults to None.
112
+
113
+ Returns:
114
+ Cache: Instance wrapping this datastore.
115
+
116
+ Raises:
117
+ ValueError: If required capabilities not registered.
118
+ """
119
+ @classmethod
120
+ def translate_query_filter(cls, query_filter: FilterClause | QueryFilter | None) -> Any:
121
+ """Translate QueryFilter or FilterClause to datastore's native filter syntax.
122
+
123
+ This method provides a public interface for converting the GLLM DataStore's
124
+ QueryFilter DSL into each datastore's native filter format. Subclasses must
125
+ implement this method to provide their specific translation logic.
126
+
127
+ Args:
128
+ query_filter (FilterClause | QueryFilter | None): The filter to translate.
129
+ Can be a single FilterClause, a QueryFilter with multiple clauses,
130
+ or None for empty filters.
131
+
132
+ Returns:
133
+ Any: The translated filter in the datastore's native format.
134
+ Returns None for empty filters.
135
+
136
+ Raises:
137
+ NotImplementedError: If not implemented by subclass.
138
+ """
@@ -0,0 +1,4 @@
1
+ from gllm_datastore.data_store.chroma.data_store import ChromaDataStore as ChromaDataStore
2
+ from gllm_datastore.data_store.chroma.fulltext import ChromaFulltextCapability as ChromaFulltextCapability
3
+
4
+ __all__ = ['ChromaDataStore', 'ChromaFulltextCapability']
@@ -0,0 +1,13 @@
1
+ import chromadb
2
+
3
+ def safe_import_chromadb() -> chromadb:
4
+ """Import and return the `chromadb` module with SQLite fallback.
5
+
6
+ This function centralizes the logic to import `chromadb`, applying the
7
+ `pysqlite3` fallback for environments where the built-in sqlite3 causes
8
+ issues. Other modules should use `safe_import_chromadb()` to
9
+ avoid duplication.
10
+
11
+ Returns:
12
+ ModuleType: The imported `chromadb` module.
13
+ """
@@ -0,0 +1,202 @@
1
+ from _typeshed import Incomplete
2
+ from enum import StrEnum
3
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
4
+ from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
5
+ from gllm_datastore.data_store.chroma._chroma_import import safe_import_chromadb as safe_import_chromadb
6
+ from gllm_datastore.data_store.chroma.fulltext import ChromaFulltextCapability as ChromaFulltextCapability
7
+ from gllm_datastore.data_store.chroma.query import DEFAULT_NUM_CANDIDATES as DEFAULT_NUM_CANDIDATES
8
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator as ChromaQueryTranslator
9
+ from gllm_datastore.data_store.chroma.vector import ChromaVectorCapability as ChromaVectorCapability
10
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
11
+ from typing import Any
12
+
13
+ chromadb: Incomplete
14
+
15
+ class ChromaClientType(StrEnum):
16
+ """Enum for different types of ChromaDB clients."""
17
+ MEMORY: str
18
+ PERSISTENT: str
19
+ HTTP: str
20
+
21
+ class ChromaDataStore(BaseDataStore):
22
+ """ChromaDB data store with multiple capability support.
23
+
24
+ Attributes:
25
+ collection_name (str): The name of the ChromaDB collection.
26
+ client (chromadb.ClientAPI): The ChromaDB client instance.
27
+ """
28
+ collection_name: Incomplete
29
+ client: Incomplete
30
+ def __init__(self, collection_name: str, client_type: ChromaClientType = ..., persist_directory: str | None = None, host: str | None = None, port: int | None = None, headers: dict | None = None, client_settings: dict | None = None) -> None:
31
+ """Initialize the ChromaDB data store.
32
+
33
+ Args:
34
+ collection_name (str): The name of the ChromaDB collection.
35
+ client_type (ChromaClientType, optional): Type of ChromaDB client to use.
36
+ Defaults to ChromaClientType.MEMORY.
37
+ persist_directory (str | None, optional): Directory to persist vector store data.
38
+ Required for PERSISTENT client type. Defaults to None.
39
+ host (str | None, optional): Host address for ChromaDB server.
40
+ Required for HTTP client type. Defaults to None.
41
+ port (int | None, optional): Port for ChromaDB server.
42
+ Required for HTTP client type. Defaults to None.
43
+ headers (dict | None, optional): A dictionary of headers to send to the Chroma server.
44
+ Used for authentication with the Chroma server for HTTP client type. Defaults to None.
45
+ client_settings (dict | None, optional): A dictionary of additional settings for the Chroma client.
46
+ Defaults to None.
47
+ """
48
+ @property
49
+ def supported_capabilities(self) -> list[str]:
50
+ """Return list of currently supported capabilities.
51
+
52
+ Returns:
53
+ list[str]: List of capability names that are supported.
54
+ """
55
+ @property
56
+ def fulltext(self) -> ChromaFulltextCapability:
57
+ """Access fulltext capability if supported.
58
+
59
+ This method uses the logic of its parent class to return the fulltext capability handler.
60
+ This method overrides the parent class to return the ChromaFulltextCapability handler for better
61
+ type hinting.
62
+
63
+ Returns:
64
+ ChromaFulltextCapability: Fulltext capability handler.
65
+
66
+ Raises:
67
+ NotSupportedException: If fulltext capability is not supported.
68
+ """
69
+ @property
70
+ def vector(self) -> ChromaVectorCapability:
71
+ """Access vector capability if supported.
72
+
73
+ This method uses the logic of its parent class to return the vector capability handler.
74
+ This method overrides the parent class to return the ChromaVectorCapability handler for better
75
+ type hinting.
76
+
77
+ Returns:
78
+ ChromaVectorCapability: Vector capability handler.
79
+
80
+ Raises:
81
+ NotSupportedException: If vector capability is not supported.
82
+ """
83
+ def with_fulltext(self, collection_name: str | None = None, num_candidates: int = ...) -> ChromaDataStore:
84
+ """Configure fulltext capability and return datastore instance.
85
+
86
+ This method uses the logic of its parent class to configure the fulltext capability.
87
+ This method overrides the parent class for better type hinting.
88
+
89
+ Args:
90
+ collection_name (str | None, optional): Name of the collection to use in ChromaDB. Defaults to None,
91
+ in which case the default class attribute will be utilized.
92
+ num_candidates (int, optional): Maximum number of candidates to consider during search.
93
+ Defaults to DEFAULT_NUM_CANDIDATES.
94
+
95
+ Returns:
96
+ Self: Self for method chaining.
97
+ """
98
+ def with_vector(self, em_invoker: BaseEMInvoker, collection_name: str | None = None, num_candidates: int = ...) -> ChromaDataStore:
99
+ """Configure vector capability and return datastore instance.
100
+
101
+ This method uses the logic of its parent class to configure the vector capability.
102
+ This method overrides the parent class for better type hinting.
103
+
104
+ Args:
105
+ em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
106
+ collection_name (str | None, optional): Name of the collection to use in ChromaDB. Defaults to None,
107
+ in which case the default class attribute will be utilized.
108
+ num_candidates (int, optional): Maximum number of candidates to consider during search.
109
+ Defaults to DEFAULT_NUM_CANDIDATES.
110
+
111
+ Returns:
112
+ Self: Self for method chaining.
113
+ """
114
+ @classmethod
115
+ def translate_query_filter(cls, query_filter: FilterClause | QueryFilter | None = None) -> dict[str, Any] | None:
116
+ '''Translate QueryFilter or FilterClause to ChromaDB native filter syntax.
117
+
118
+ This method uses ChromaQueryTranslator to translate filters and returns
119
+ the result as a dictionary.
120
+
121
+ Examples:
122
+ 1. Translate a simple FilterClause:
123
+ ```python
124
+ from gllm_datastore.core.filters import filter as F
125
+
126
+ filter_clause = F.eq("metadata.status", "active")
127
+ result = ChromaDataStore.translate_query_filter(filter_clause)
128
+ # result -> {"where": {"status": "active"}}
129
+ ```
130
+
131
+ 2. Translate QueryFilter with metadata filters:
132
+ ```python
133
+ from gllm_datastore.core.filters import filter as F
134
+
135
+ filters = F.and_(
136
+ F.eq("metadata.category", "tech"),
137
+ F.gte("metadata.price", 10),
138
+ )
139
+ result = ChromaDataStore.translate_query_filter(filters)
140
+ # result ->
141
+ # {
142
+ # "where": {
143
+ # "$and": [
144
+ # {"category": "tech"},
145
+ # {"price": {"$gte": 10}}
146
+ # ]
147
+ # }
148
+ # }
149
+ ```
150
+
151
+ 3. Translate QueryFilter with content filters:
152
+ ```python
153
+ from gllm_datastore.core.filters import filter as F
154
+
155
+ filters = F.text_contains("content", "python")
156
+ result = ChromaDataStore.translate_query_filter(filters)
157
+ # result -> {"where_document": {"$contains": "python"}}
158
+ ```
159
+
160
+ 4. Translate QueryFilter with id filters:
161
+ ```python
162
+ from gllm_datastore.core.filters import filter as F
163
+
164
+ filters = F.in_("id", ["chunk_1", "chunk_2"])
165
+ result = ChromaDataStore.translate_query_filter(filters)
166
+ # result -> {"ids": ["chunk_1", "chunk_2"]}
167
+ ```
168
+
169
+ 5. Translate complex nested QueryFilter:
170
+ ```python
171
+ from gllm_datastore.core.filters import filter as F
172
+
173
+ filters = F.and_(
174
+ F.or_(
175
+ F.eq("metadata.status", "active"),
176
+ F.eq("metadata.status", "pending"),
177
+ ),
178
+ F.text_contains("content", "machine learning"),
179
+ F.in_("id", ["chunk_1", "chunk_2"]),
180
+ )
181
+ result = ChromaDataStore.translate_query_filter(filters)
182
+ # result ->
183
+ # {
184
+ # "where": {
185
+ # "$or": [
186
+ # {"status": "active"},
187
+ # {"status": "pending"}
188
+ # ]
189
+ # },
190
+ # "where_document": {"$contains": "machine learning"},
191
+ # "ids": ["chunk_1", "chunk_2"]
192
+ # }
193
+ ```
194
+
195
+ Args:
196
+ query_filter (FilterClause | QueryFilter | None, optional): The filter to translate.
197
+ Can be a single FilterClause, a QueryFilter with multiple clauses. Defaults to None.
198
+
199
+ Returns:
200
+ dict[str, Any] | None: The translated filter as a ChromaDB query dict.
201
+ Returns None for empty filters.
202
+ '''
@@ -0,0 +1,134 @@
1
+ from _typeshed import Incomplete
2
+ from chromadb import ClientAPI
3
+ from gllm_core.schema import Chunk
4
+ from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS, METADATA_KEYS as METADATA_KEYS
5
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
6
+ from gllm_datastore.data_store.chroma._chroma_import import safe_import_chromadb as safe_import_chromadb
7
+ from gllm_datastore.data_store.chroma.query import ChromaCollectionKeys as ChromaCollectionKeys, DEFAULT_NUM_CANDIDATES as DEFAULT_NUM_CANDIDATES, build_chroma_delete_kwargs as build_chroma_delete_kwargs, build_chroma_get_kwargs as build_chroma_get_kwargs, sanitize_metadata as sanitize_metadata
8
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator as ChromaQueryTranslator
9
+ from typing import Any
10
+
11
+ chromadb: Incomplete
12
+
13
+ class ChromaFulltextCapability:
14
+ """ChromaDB implementation of FulltextCapability protocol.
15
+
16
+ This class provides document CRUD operations and text search using ChromaDB.
17
+
18
+ Attributes:
19
+ collection_name (str): The name of the ChromaDB collection.
20
+ client (ClientAPI): ChromaDB client instance.
21
+ collection: ChromaDB collection instance.
22
+ num_candidates (int): Maximum number of candidates to consider during search.
23
+ """
24
+ collection_name: Incomplete
25
+ client: Incomplete
26
+ collection: Incomplete
27
+ num_candidates: Incomplete
28
+ def __init__(self, collection_name: str, client: ClientAPI, num_candidates: int = ...) -> None:
29
+ """Initialize the ChromaDB fulltext capability.
30
+
31
+ Args:
32
+ collection_name (str): The name of the ChromaDB collection.
33
+ client (ClientAPI): ChromaDB client instance.
34
+ num_candidates (int, optional): Maximum number of candidates to consider during search.
35
+ Defaults to DEFAULT_NUM_CANDIDATES.
36
+ """
37
+ def get_size(self) -> int:
38
+ """Returns the total number of documents in the collection.
39
+
40
+ Returns:
41
+ int: The total number of documents.
42
+ """
43
+ async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
44
+ """Create new records in the datastore.
45
+
46
+ Args:
47
+ data (Chunk | list[Chunk]): Data to create (single item or collection).
48
+ **kwargs: Backend-specific parameters.
49
+
50
+ Raises:
51
+ ValueError: If data structure is invalid.
52
+ """
53
+ async def retrieve(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
54
+ '''Read records from the datastore with optional filtering.
55
+
56
+ Usage Example:
57
+ ```python
58
+ from gllm_datastore.core.filters import filter as F
59
+
60
+ # Direct FilterClause usage
61
+ results = await fulltext_capability.retrieve(filters=F.eq("metadata.category", "tech"))
62
+
63
+ # Multiple filters
64
+ results = await fulltext_capability.retrieve(
65
+ filters=F.and_(F.eq("metadata.category", "tech"), F.eq("metadata.status", "active"))
66
+ )
67
+ ```
68
+
69
+ Args:
70
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
71
+ FilterClause objects are automatically converted to QueryFilter internally.
72
+ Defaults to None.
73
+ options (QueryOptions | None, optional): Query options (sorting, pagination, etc.).
74
+ Defaults to None.
75
+ **kwargs: Backend-specific parameters.
76
+
77
+ Returns:
78
+ list[Chunk]: Query results.
79
+
80
+ Raises:
81
+ NotImplementedError: If unsupported operators are used for id or content filters.
82
+ '''
83
+ async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
84
+ """Find records that fuzzy match the query within distance threshold.
85
+
86
+ Args:
87
+ query (str): Text to fuzzy match against.
88
+ max_distance (int): Maximum edit distance for matches. Defaults to 2.
89
+ filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
90
+ FilterClause objects are automatically converted to QueryFilter internally.
91
+ Defaults to None.
92
+ options (QueryOptions | None, optional): Query options (sorting, limit, etc.). Defaults to None.
93
+ **kwargs: Backend-specific parameters.
94
+
95
+ Returns:
96
+ list[Chunk]: Matched chunks ordered by distance (ascending) or by options.order_by if specified.
97
+ """
98
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
99
+ '''Update existing records in the datastore.
100
+
101
+ Examples:
102
+ Update the content and metadata of the chunk with the id "unique_id" to "updated_content"
103
+ and "published" respectively.
104
+ ```python
105
+ from gllm_datastore.core.filters import filter as F
106
+
107
+ await fulltext_capability.update(
108
+ update_values={"content": "updated_content", "metadata": {"status": "published"}},
109
+ filters=F.eq("id", "unique_id"),
110
+ )
111
+ ```
112
+
113
+ Args:
114
+ update_values (dict[str, Any]): Values to update. Supports "content" for updating document content
115
+ and "metadata" for updating metadata. Other keys are treated as direct metadata updates.
116
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
117
+ FilterClause objects are automatically converted to QueryFilter internally.
118
+ Defaults to None.
119
+
120
+ Note:
121
+ ChromaDB doesn\'t support direct update operations. This method will
122
+ retrieve matching records, update them, and re-add them to the collection.
123
+ '''
124
+ async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
125
+ """Delete records from the datastore.
126
+
127
+ Args:
128
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
129
+ FilterClause objects are automatically converted to QueryFilter internally.
130
+ Defaults to None, in which case no operation is performed (no-op).
131
+ options (QueryOptions | None, optional): Query options for sorting and limiting deletions. Defaults to None.
132
+ """
133
+ async def clear(self) -> None:
134
+ """Clear all records from the datastore."""