gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +7 -0
  28. gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
  29. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  30. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  31. gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
  32. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  33. gllm_datastore/core/filters/__init__.pyi +4 -0
  34. gllm_datastore/core/filters/filter.pyi +340 -0
  35. gllm_datastore/core/filters/schema.pyi +149 -0
  36. gllm_datastore/data_store/__init__.pyi +8 -0
  37. gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
  38. gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
  39. gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
  40. gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
  41. gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
  42. gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
  43. gllm_datastore/data_store/base.pyi +176 -0
  44. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  45. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  46. gllm_datastore/data_store/chroma/data_store.pyi +201 -0
  47. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  48. gllm_datastore/data_store/chroma/query.pyi +266 -0
  49. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  50. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  51. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  52. gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
  53. gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
  54. gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
  55. gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
  56. gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
  57. gllm_datastore/data_store/exceptions.pyi +35 -0
  58. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  59. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  60. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  61. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  62. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  63. gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
  64. gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
  65. gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
  66. gllm_datastore/data_store/opensearch/query.pyi +89 -0
  67. gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
  68. gllm_datastore/data_store/opensearch/vector.pyi +211 -0
  69. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  70. gllm_datastore/data_store/redis/data_store.pyi +153 -0
  71. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  72. gllm_datastore/data_store/redis/query.pyi +428 -0
  73. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  74. gllm_datastore/data_store/redis/vector.pyi +131 -0
  75. gllm_datastore/data_store/sql/__init__.pyi +4 -0
  76. gllm_datastore/data_store/sql/constants.pyi +5 -0
  77. gllm_datastore/data_store/sql/data_store.pyi +201 -0
  78. gllm_datastore/data_store/sql/fulltext.pyi +164 -0
  79. gllm_datastore/data_store/sql/query.pyi +81 -0
  80. gllm_datastore/data_store/sql/query_translator.pyi +51 -0
  81. gllm_datastore/data_store/sql/schema.pyi +16 -0
  82. gllm_datastore/encryptor/__init__.pyi +4 -0
  83. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  84. gllm_datastore/encryptor/capability/__init__.pyi +3 -0
  85. gllm_datastore/encryptor/capability/mixin.pyi +32 -0
  86. gllm_datastore/encryptor/encryptor.pyi +52 -0
  87. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  88. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  89. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  90. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  91. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  92. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  93. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  94. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  95. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  96. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  97. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  98. gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
  99. gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
  100. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  101. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  102. gllm_datastore/graph_data_store/schema.pyi +27 -0
  103. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  104. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  105. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  106. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  107. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  108. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  109. gllm_datastore/signature/__init__.pyi +0 -0
  110. gllm_datastore/signature/webhook_signature.pyi +31 -0
  111. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  112. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  113. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  114. gllm_datastore/sql_data_store/constants.pyi +6 -0
  115. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  116. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  117. gllm_datastore/sql_data_store/types.pyi +31 -0
  118. gllm_datastore/utils/__init__.pyi +6 -0
  119. gllm_datastore/utils/converter.pyi +51 -0
  120. gllm_datastore/utils/dict.pyi +21 -0
  121. gllm_datastore/utils/ttl.pyi +25 -0
  122. gllm_datastore/utils/types.pyi +32 -0
  123. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  124. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  125. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  126. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  127. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  128. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  129. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  130. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  131. gllm_datastore.build/.gitignore +1 -0
  132. gllm_datastore.cpython-312-darwin.so +0 -0
  133. gllm_datastore.pyi +178 -0
  134. gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
  135. gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
  136. gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
  137. gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
@@ -0,0 +1,4 @@
1
+ from gllm_datastore.data_store.chroma.data_store import ChromaDataStore as ChromaDataStore
2
+ from gllm_datastore.data_store.chroma.fulltext import ChromaFulltextCapability as ChromaFulltextCapability
3
+
4
+ __all__ = ['ChromaDataStore', 'ChromaFulltextCapability']
@@ -0,0 +1,13 @@
1
+ import chromadb
2
+
3
+ def safe_import_chromadb() -> chromadb:
4
+ """Import and return the `chromadb` module with SQLite fallback.
5
+
6
+ This function centralizes the logic to import `chromadb`, applying the
7
+ `pysqlite3` fallback for environments where the built-in sqlite3 causes
8
+ issues. Other modules should use `safe_import_chromadb()` to
9
+ avoid duplication.
10
+
11
+ Returns:
12
+ ModuleType: The imported `chromadb` module.
13
+ """
@@ -0,0 +1,201 @@
1
+ from _typeshed import Incomplete
2
+ from enum import StrEnum
3
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
4
+ from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
5
+ from gllm_datastore.data_store.chroma._chroma_import import safe_import_chromadb as safe_import_chromadb
6
+ from gllm_datastore.data_store.chroma.fulltext import ChromaFulltextCapability as ChromaFulltextCapability
7
+ from gllm_datastore.data_store.chroma.query import DEFAULT_NUM_CANDIDATES as DEFAULT_NUM_CANDIDATES
8
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator as ChromaQueryTranslator
9
+ from gllm_datastore.data_store.chroma.vector import ChromaVectorCapability as ChromaVectorCapability
10
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
11
+ from typing import Any
12
+
13
+ chromadb: Incomplete
14
+
15
+ class ChromaClientType(StrEnum):
16
+ """Enum for different types of ChromaDB clients."""
17
+ MEMORY: str
18
+ PERSISTENT: str
19
+ HTTP: str
20
+
21
+ class ChromaDataStore(BaseDataStore):
22
+ """ChromaDB data store with multiple capability support.
23
+
24
+ Attributes:
25
+ collection_name (str): The name of the ChromaDB collection.
26
+ client (chromadb.ClientAPI): The ChromaDB client instance.
27
+ """
28
+ collection_name: Incomplete
29
+ client: Incomplete
30
+ def __init__(self, collection_name: str, client_type: ChromaClientType = ..., persist_directory: str | None = None, host: str | None = None, port: int | None = None, headers: dict | None = None, client_settings: dict | None = None) -> None:
31
+ """Initialize the ChromaDB data store.
32
+
33
+ Args:
34
+ collection_name (str): The name of the ChromaDB collection.
35
+ client_type (ChromaClientType, optional): Type of ChromaDB client to use.
36
+ Defaults to ChromaClientType.MEMORY.
37
+ persist_directory (str | None, optional): Directory to persist vector store data.
38
+ Required for PERSISTENT client type. Defaults to None.
39
+ host (str | None, optional): Host address for ChromaDB server.
40
+ Required for HTTP client type. Defaults to None.
41
+ port (int | None, optional): Port for ChromaDB server.
42
+ Required for HTTP client type. Defaults to None.
43
+ headers (dict | None, optional): A dictionary of headers to send to the Chroma server.
44
+ Used for authentication with the Chroma server for HTTP client type. Defaults to None.
45
+ client_settings (dict | None, optional): A dictionary of additional settings for the Chroma client.
46
+ Defaults to None.
47
+ """
48
+ @property
49
+ def supported_capabilities(self) -> list[str]:
50
+ """Return list of currently supported capabilities.
51
+
52
+ Returns:
53
+ list[str]: List of capability names that are supported.
54
+ """
55
+ @property
56
+ def fulltext(self) -> ChromaFulltextCapability:
57
+ """Access fulltext capability if supported.
58
+
59
+ This method uses the logic of its parent class to return the fulltext capability handler.
60
+ This method overrides the parent class to return the ChromaFulltextCapability handler for better
61
+ type hinting.
62
+
63
+ Returns:
64
+ ChromaFulltextCapability: Fulltext capability handler.
65
+
66
+ Raises:
67
+ NotSupportedException: If fulltext capability is not supported.
68
+ """
69
+ @property
70
+ def vector(self) -> ChromaVectorCapability:
71
+ """Access vector capability if supported.
72
+
73
+ This method uses the logic of its parent class to return the vector capability handler.
74
+ This method overrides the parent class to return the ChromaVectorCapability handler for better
75
+ type hinting.
76
+
77
+ Returns:
78
+ ChromaVectorCapability: Vector capability handler.
79
+
80
+ Raises:
81
+ NotSupportedException: If vector capability is not supported.
82
+ """
83
+ def with_fulltext(self, collection_name: str | None = None, num_candidates: int = ...) -> ChromaDataStore:
84
+ """Configure fulltext capability and return datastore instance.
85
+
86
+ This method uses the logic of its parent class to configure the fulltext capability.
87
+ This method overrides the parent class for better type hinting.
88
+
89
+ Args:
90
+ collection_name (str | None, optional): Name of the collection to use in ChromaDB. Defaults to None,
91
+ in which case the default class attribute will be utilized.
92
+ num_candidates (int, optional): Maximum number of candidates to consider during search.
93
+ Defaults to DEFAULT_NUM_CANDIDATES.
94
+
95
+ Returns:
96
+ Self: Self for method chaining.
97
+ """
98
+ def with_vector(self, em_invoker: BaseEMInvoker, collection_name: str | None = None, num_candidates: int = ...) -> ChromaDataStore:
99
+ """Configure vector capability and return datastore instance.
100
+
101
+ This method uses the logic of its parent class to configure the vector capability.
102
+ This method overrides the parent class for better type hinting.
103
+
104
+ Args:
105
+ em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
106
+ collection_name (str | None, optional): Name of the collection to use in ChromaDB. Defaults to None,
107
+ in which case the default class attribute will be utilized.
108
+ num_candidates (int, optional): Maximum number of candidates to consider during search.
109
+ Defaults to DEFAULT_NUM_CANDIDATES.
110
+
111
+ Returns:
112
+ Self: Self for method chaining.
113
+ """
114
+ @classmethod
115
+ def translate_query_filter(cls, query_filter: FilterClause | QueryFilter) -> dict[str, Any]:
116
+ '''Translate QueryFilter or FilterClause to ChromaDB native filter syntax.
117
+
118
+ This method uses ChromaQueryTranslator to translate filters and returns
119
+ the result as a dictionary.
120
+
121
+ Examples:
122
+ 1. Translate a simple FilterClause:
123
+ ```python
124
+ from gllm_datastore.core.filters import filter as F
125
+
126
+ filter_clause = F.eq("metadata.status", "active")
127
+ result = ChromaDataStore.translate_query_filter(filter_clause)
128
+ # result -> {"where": {"status": "active"}}
129
+ ```
130
+
131
+ 2. Translate QueryFilter with metadata filters:
132
+ ```python
133
+ from gllm_datastore.core.filters import filter as F
134
+
135
+ filters = F.and_(
136
+ F.eq("metadata.category", "tech"),
137
+ F.gte("metadata.price", 10),
138
+ )
139
+ result = ChromaDataStore.translate_query_filter(filters)
140
+ # result ->
141
+ # {
142
+ # "where": {
143
+ # "$and": [
144
+ # {"category": "tech"},
145
+ # {"price": {"$gte": 10}}
146
+ # ]
147
+ # }
148
+ # }
149
+ ```
150
+
151
+ 3. Translate QueryFilter with content filters:
152
+ ```python
153
+ from gllm_datastore.core.filters import filter as F
154
+
155
+ filters = F.text_contains("content", "python")
156
+ result = ChromaDataStore.translate_query_filter(filters)
157
+ # result -> {"where_document": {"$contains": "python"}}
158
+ ```
159
+
160
+ 4. Translate QueryFilter with id filters:
161
+ ```python
162
+ from gllm_datastore.core.filters import filter as F
163
+
164
+ filters = F.in_("id", ["chunk_1", "chunk_2"])
165
+ result = ChromaDataStore.translate_query_filter(filters)
166
+ # result -> {"ids": ["chunk_1", "chunk_2"]}
167
+ ```
168
+
169
+ 5. Translate complex nested QueryFilter:
170
+ ```python
171
+ from gllm_datastore.core.filters import filter as F
172
+
173
+ filters = F.and_(
174
+ F.or_(
175
+ F.eq("metadata.status", "active"),
176
+ F.eq("metadata.status", "pending"),
177
+ ),
178
+ F.text_contains("content", "machine learning"),
179
+ F.in_("id", ["chunk_1", "chunk_2"]),
180
+ )
181
+ result = ChromaDataStore.translate_query_filter(filters)
182
+ # result ->
183
+ # {
184
+ # "where": {
185
+ # "$or": [
186
+ # {"status": "active"},
187
+ # {"status": "pending"}
188
+ # ]
189
+ # },
190
+ # "where_document": {"$contains": "machine learning"},
191
+ # "ids": ["chunk_1", "chunk_2"]
192
+ # }
193
+ ```
194
+
195
+ Args:
196
+ query_filter (FilterClause | QueryFilter): The filter to translate.
197
+ Can be a single FilterClause or a QueryFilter with multiple clauses.
198
+
199
+ Returns:
200
+ dict[str, Any] | None: The translated filter as a ChromaDB query dict.
201
+ '''
@@ -0,0 +1,134 @@
1
+ from _typeshed import Incomplete
2
+ from chromadb import ClientAPI
3
+ from gllm_core.schema import Chunk
4
+ from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS, METADATA_KEYS as METADATA_KEYS
5
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
6
+ from gllm_datastore.data_store.chroma._chroma_import import safe_import_chromadb as safe_import_chromadb
7
+ from gllm_datastore.data_store.chroma.query import ChromaCollectionKeys as ChromaCollectionKeys, DEFAULT_NUM_CANDIDATES as DEFAULT_NUM_CANDIDATES, build_chroma_delete_kwargs as build_chroma_delete_kwargs, build_chroma_get_kwargs as build_chroma_get_kwargs, sanitize_metadata as sanitize_metadata
8
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator as ChromaQueryTranslator
9
+ from typing import Any
10
+
11
+ chromadb: Incomplete
12
+
13
+ class ChromaFulltextCapability:
14
+ """ChromaDB implementation of FulltextCapability protocol.
15
+
16
+ This class provides document CRUD operations and text search using ChromaDB.
17
+
18
+ Attributes:
19
+ collection_name (str): The name of the ChromaDB collection.
20
+ client (ClientAPI): ChromaDB client instance.
21
+ collection: ChromaDB collection instance.
22
+ num_candidates (int): Maximum number of candidates to consider during search.
23
+ """
24
+ collection_name: Incomplete
25
+ client: Incomplete
26
+ collection: Incomplete
27
+ num_candidates: Incomplete
28
+ def __init__(self, collection_name: str, client: ClientAPI, num_candidates: int = ...) -> None:
29
+ """Initialize the ChromaDB fulltext capability.
30
+
31
+ Args:
32
+ collection_name (str): The name of the ChromaDB collection.
33
+ client (ClientAPI): ChromaDB client instance.
34
+ num_candidates (int, optional): Maximum number of candidates to consider during search.
35
+ Defaults to DEFAULT_NUM_CANDIDATES.
36
+ """
37
+ def get_size(self) -> int:
38
+ """Returns the total number of documents in the collection.
39
+
40
+ Returns:
41
+ int: The total number of documents.
42
+ """
43
+ async def create(self, data: Chunk | list[Chunk], **kwargs: Any) -> None:
44
+ """Create new records in the datastore.
45
+
46
+ Args:
47
+ data (Chunk | list[Chunk]): Data to create (single item or collection).
48
+ **kwargs: Backend-specific parameters.
49
+
50
+ Raises:
51
+ ValueError: If data structure is invalid.
52
+ """
53
+ async def retrieve(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
54
+ '''Read records from the datastore with optional filtering.
55
+
56
+ Usage Example:
57
+ ```python
58
+ from gllm_datastore.core.filters import filter as F
59
+
60
+ # Direct FilterClause usage
61
+ results = await fulltext_capability.retrieve(filters=F.eq("metadata.category", "tech"))
62
+
63
+ # Multiple filters
64
+ results = await fulltext_capability.retrieve(
65
+ filters=F.and_(F.eq("metadata.category", "tech"), F.eq("metadata.status", "active"))
66
+ )
67
+ ```
68
+
69
+ Args:
70
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
71
+ FilterClause objects are automatically converted to QueryFilter internally.
72
+ Defaults to None.
73
+ options (QueryOptions | None, optional): Query options (sorting, pagination, etc.).
74
+ Defaults to None.
75
+ **kwargs: Backend-specific parameters.
76
+
77
+ Returns:
78
+ list[Chunk]: Query results.
79
+
80
+ Raises:
81
+ NotImplementedError: If unsupported operators are used for id or content filters.
82
+ '''
83
+ async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs: Any) -> list[Chunk]:
84
+ """Find records that fuzzy match the query within distance threshold.
85
+
86
+ Args:
87
+ query (str): Text to fuzzy match against.
88
+ max_distance (int): Maximum edit distance for matches. Defaults to 2.
89
+ filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
90
+ FilterClause objects are automatically converted to QueryFilter internally.
91
+ Defaults to None.
92
+ options (QueryOptions | None, optional): Query options (sorting, limit, etc.). Defaults to None.
93
+ **kwargs: Backend-specific parameters.
94
+
95
+ Returns:
96
+ list[Chunk]: Matched chunks ordered by distance (ascending) or by options.order_by if specified.
97
+ """
98
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None) -> None:
99
+ '''Update existing records in the datastore.
100
+
101
+ Examples:
102
+ Update the content and metadata of the chunk with the id "unique_id" to "updated_content"
103
+ and "published" respectively.
104
+ ```python
105
+ from gllm_datastore.core.filters import filter as F
106
+
107
+ await fulltext_capability.update(
108
+ update_values={"content": "updated_content", "metadata": {"status": "published"}},
109
+ filters=F.eq("id", "unique_id"),
110
+ )
111
+ ```
112
+
113
+ Args:
114
+ update_values (dict[str, Any]): Values to update. Supports "content" for updating document content
115
+ and "metadata" for updating metadata. Other keys are treated as direct metadata updates.
116
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
117
+ FilterClause objects are automatically converted to QueryFilter internally.
118
+ Defaults to None.
119
+
120
+ Note:
121
+ ChromaDB doesn\'t support direct update operations. This method will
122
+ retrieve matching records, update them, and re-add them to the collection.
123
+ '''
124
+ async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> None:
125
+ """Delete records from the datastore.
126
+
127
+ Args:
128
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
129
+ FilterClause objects are automatically converted to QueryFilter internally.
130
+ Defaults to None, in which case no operation is performed (no-op).
131
+ options (QueryOptions | None, optional): Query options for sorting and limiting deletions. Defaults to None.
132
+ """
133
+ async def clear(self) -> None:
134
+ """Clear all records from the datastore."""
@@ -0,0 +1,266 @@
1
+ import logging
2
+ from chromadb.types import Where, WhereDocument
3
+ from dataclasses import dataclass
4
+ from enum import StrEnum
5
+ from gllm_datastore.core.filters.schema import FilterOperator as FilterOperator, QueryFilter as QueryFilter
6
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator as ChromaQueryTranslator
7
+ from typing import Any
8
+
9
+ DEFAULT_NUM_CANDIDATES: int
10
+
11
+ class ChromaCollectionKeys:
12
+ """Constants for ChromaDB collection method keyword arguments.
13
+
14
+ This class provides constants for all string literals used in ChromaDB
15
+ collection method calls (get, delete, query, etc.) to avoid magic strings
16
+ and improve maintainability.
17
+
18
+ Attributes:
19
+ WHERE (str): Keyword for metadata filtering condition.
20
+ WHERE_DOCUMENT (str): Keyword for document content filtering condition.
21
+ IDS (str): Keyword for filtering by document IDs.
22
+ INCLUDE (str): Keyword for specifying fields to include in results.
23
+ LIMIT (str): Keyword for limiting the number of results.
24
+ METADATA_PREFIX (str): Prefix for metadata field keys.
25
+ """
26
+ WHERE: str
27
+ WHERE_DOCUMENT: str
28
+ IDS: str
29
+ INCLUDE: str
30
+ LIMIT: str
31
+ METADATA_PREFIX: str
32
+
33
+ class ChromaOperators(StrEnum):
34
+ """Constants for ChromaDB query operators.
35
+
36
+ This class provides constants for all operator string literals used in
37
+ ChromaDB query expressions to avoid magic strings and improve maintainability.
38
+
39
+ Attributes:
40
+ AND (str): Logical AND operator for combining filters.
41
+ OR (str): Logical OR operator for combining filters.
42
+ NE (str): Not equal comparison operator.
43
+ GT (str): Greater than comparison operator.
44
+ LT (str): Less than comparison operator.
45
+ GTE (str): Greater than or equal comparison operator.
46
+ LTE (str): Less than or equal comparison operator.
47
+ IN (str): Array membership operator (value in list).
48
+ NIN (str): Array non-membership operator (value not in list).
49
+ TEXT_CONTAINS (str): Document content substring match operator.
50
+ NOT_CONTAINS (str): Document content substring exclusion operator.
51
+ """
52
+ AND: str
53
+ OR: str
54
+ NE: str
55
+ GT: str
56
+ LT: str
57
+ GTE: str
58
+ LTE: str
59
+ IN: str
60
+ NIN: str
61
+ TEXT_CONTAINS: str
62
+ NOT_CONTAINS: str
63
+
64
+ class ChromaOperatorMapper:
65
+ """Maps FilterOperator to ChromaDB operators and provides inverse operator mappings.
66
+
67
+ This class encapsulates operator translation logic.
68
+
69
+ Attributes:
70
+ OPERATOR_TO_CHROMA (dict[FilterOperator, str]): Mapping from FilterOperator to ChromaDB operators.
71
+ OPERATOR_INVERSE (dict[FilterOperator, FilterOperator]): Mapping from FilterOperator to its inverse operator.
72
+ """
73
+ OPERATOR_TO_CHROMA: dict[FilterOperator, str]
74
+ OPERATOR_INVERSE: dict[FilterOperator, FilterOperator]
75
+ @classmethod
76
+ def get_inverse_operator(cls, operator: FilterOperator) -> FilterOperator | None:
77
+ """Get the inverse operator for a given FilterOperator.
78
+
79
+ Args:
80
+ operator (FilterOperator): The operator to get the inverse for.
81
+
82
+ Returns:
83
+ FilterOperator | None: The inverse operator, or None if no inverse exists.
84
+ """
85
+ @classmethod
86
+ def has_inverse(cls, operator: FilterOperator) -> bool:
87
+ """Check if an operator has an inverse mapping.
88
+
89
+ Args:
90
+ operator (FilterOperator): The operator to check.
91
+
92
+ Returns:
93
+ bool: True if the operator has an inverse, False otherwise.
94
+ """
95
+
96
+ @dataclass
97
+ class ChromaQueryComponents:
98
+ """ChromaDB query components extracted from a QueryFilter.
99
+
100
+ Attributes:
101
+ where_condition (Where | None): Where clause for metadata filters, or None.
102
+ where_document (WhereDocument | None): WhereDocument clause for content filters, or None.
103
+ id_values (list[str] | None): List of IDs for id filters, or None.
104
+ """
105
+ where_condition: Where | None
106
+ where_document: WhereDocument | None
107
+ id_values: list[str] | None
108
+ def to_dict(self) -> dict[str, Any] | None:
109
+ """Convert to ChromaDB kwargs dict, omitting None values.
110
+
111
+ Returns:
112
+ dict[str, Any] | None: Dictionary with non-None components,
113
+ or None if all components are None/empty.
114
+ """
115
+
116
+ def sanitize_metadata(metadata: dict[str, Any] | None, logger: logging.Logger) -> dict[str, Any]:
117
+ '''Sanitize metadata by removing list values that ChromaDB doesn\'t support.
118
+
119
+ ChromaDB only supports str, int, float, or bool as metadata values.
120
+ This function filters out list values and logs warnings for each removed key.
121
+
122
+ Examples:
123
+ 1. Remove list values:
124
+ ```python
125
+ logger = logging.getLogger(__name__)
126
+ input_meta = {"status": "active", "tags": ["a", "b"], "age": 30}
127
+ out = sanitize_metadata(input_meta, logger)
128
+ # out -> {"status": "active", "age": 30}
129
+ ```
130
+
131
+ 2. Handle None input:
132
+ ```python
133
+ out = sanitize_metadata(None, logging.getLogger(__name__))
134
+ # out -> {}
135
+ ```
136
+
137
+ Args:
138
+ metadata (dict[str, Any] | None): Metadata dictionary to sanitize.
139
+ logger (logging.Logger): Logger instance for warning messages.
140
+
141
+ Returns:
142
+ dict[str, Any]: Sanitized metadata with list values removed.
143
+ '''
144
+ def build_chroma_get_kwargs(filters: QueryFilter | None, query_translator: ChromaQueryTranslator, include: list[str] | None = None, limit: int | None = None, **additional_kwargs: Any) -> dict[str, Any]:
145
+ '''Build kwargs dictionary for ChromaDB collection.get() operations.
146
+
147
+ This function processes filters and builds a kwargs dictionary that includes
148
+ where, where_document, ids, include, and limit parameters as needed.
149
+
150
+ Examples:
151
+ 1. Build kwargs with metadata and content filters:
152
+ ```python
153
+ from gllm_datastore.core.filters import filter as F
154
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator
155
+
156
+ translator = ChromaQueryTranslator()
157
+ filters = F.and_(
158
+ F.eq("metadata.status", "active"),
159
+ F.text_contains("content", "python"),
160
+ )
161
+
162
+ out = build_chroma_get_kwargs(filters, translator, include=["documents"], limit=10)
163
+ # out ->
164
+ # {
165
+ # "where": {"status": "active"},
166
+ # "where_document": {"$contains": "python"},
167
+ # "include": ["documents"],
168
+ # "limit": 10
169
+ # }
170
+ ```
171
+
172
+ 2. Build kwargs using id filters:
173
+ ```python
174
+ from gllm_datastore.core.filters import filter as F
175
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator
176
+
177
+ translator = ChromaQueryTranslator()
178
+ filters = F.or_(F.eq("id", "123"), F.in_("id", ["a", "b"]))
179
+ out = build_chroma_get_kwargs(filters, translator)
180
+ # out -> {"ids": ["123", "a", "b"]}
181
+ ```
182
+
183
+ Args:
184
+ filters (QueryFilter | None): QueryFilter to process.
185
+ query_translator (ChromaQueryTranslator): Query translator instance to use.
186
+ include (list[str] | None, optional): List of fields to include in results.
187
+ Defaults to None.
188
+ limit (int | None, optional): Maximum number of results to return.
189
+ Defaults to None.
190
+ **additional_kwargs: Additional kwargs to include in the result.
191
+
192
+ Returns:
193
+ dict[str, Any]: Dictionary of kwargs ready for ChromaDB collection.get() call.
194
+ '''
195
+ def build_chroma_delete_kwargs(filters: QueryFilter | None, query_translator: ChromaQueryTranslator, **additional_kwargs: Any) -> dict[str, Any]:
196
+ '''Build kwargs dictionary for ChromaDB collection.delete() operations.
197
+
198
+ This function processes filters and builds a kwargs dictionary that includes
199
+ where, where_document, and ids parameters as needed.
200
+
201
+ Examples:
202
+ 1. Delete by ids or where:
203
+ ```python
204
+ from gllm_datastore.core.filters import filter as F
205
+ from gllm_datastore.data_store.chroma.query_translator import ChromaQueryTranslator
206
+
207
+ translator = ChromaQueryTranslator()
208
+ filters = F.and_(
209
+ F.in_("id", ["x1", "x2"]),
210
+ F.eq("metadata.status", "inactive"),
211
+ )
212
+ out = build_chroma_delete_kwargs(filters, translator)
213
+ # out ->
214
+ # {
215
+ # "ids": ["x1", "x2"],
216
+ # "where": {"status": "inactive"}
217
+ # }
218
+ ```
219
+
220
+ Args:
221
+ filters (QueryFilter | None): QueryFilter to process.
222
+ query_translator (ChromaQueryTranslator): Query translator instance to use.
223
+ **additional_kwargs: Additional kwargs to include in the result.
224
+
225
+ Returns:
226
+ dict[str, Any]: Dictionary of kwargs ready for ChromaDB collection.delete() call.
227
+ '''
228
+ def extract_chroma_query_components(filters: QueryFilter | None) -> ChromaQueryComponents:
229
+ '''Prepare all ChromaDB query parameters from a QueryFilter.
230
+
231
+ This function processes a QueryFilter and extracts:
232
+ 1. Metadata filters -> Where clause
233
+ 2. Content filters -> WhereDocument clause
234
+ 3. id filters -> ids parameter
235
+
236
+ Only operators natively supported by ChromaDB are allowed:
237
+ 1. id: EQ, IN (using ids parameter)
238
+ 2. content: TEXT_CONTAINS (substring match in document content, maps to $contains)
239
+ 3. metadata: EQ, NE, GT, LT, GTE, LTE, IN, NIN (using where clause)
240
+ 4. metadata: ARRAY_CONTAINS (array membership, not supported by ChromaDB - raises NotImplementedError)
241
+
242
+ Examples:
243
+ 1. Extract all components from a mixed filter:
244
+ ```python
245
+ from gllm_datastore.core.filters import filter as F
246
+
247
+ filters = F.and_(
248
+ F.eq("metadata.status", "active"),
249
+ F.text_contains("content", "python"),
250
+ F.in_("id", ["a", "b"]),
251
+ )
252
+ components = extract_chroma_query_components(filters)
253
+ # components.where_condition -> dict
254
+ # components.where_document -> dict
255
+ # components.id_values -> ["a", "b"]
256
+ ```
257
+
258
+ Args:
259
+ filters (QueryFilter | None): QueryFilter to process.
260
+
261
+ Returns:
262
+ ChromaQueryComponents: Dataclass containing where_condition, where_document, and id_values.
263
+
264
+ Raises:
265
+ NotImplementedError: If unsupported operators are used for id or content filters.
266
+ '''
@@ -0,0 +1,41 @@
1
+ from dataclasses import dataclass
2
+ from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS
3
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter
4
+ from gllm_datastore.data_store.chroma.query import ChromaCollectionKeys as ChromaCollectionKeys, ChromaOperatorMapper as ChromaOperatorMapper, ChromaOperators as ChromaOperators, ChromaQueryComponents as ChromaQueryComponents
5
+
6
+ @dataclass
7
+ class FilterSeparationResult:
8
+ """Intermediate result from separating special filters (id, content) from metadata filters.
9
+
10
+ Attributes:
11
+ id_values (list[str] | None): Extracted ID values, or None if no ID filters found.
12
+ document_filters (list[FilterClause | QueryFilter]): List of content FilterClauses or
13
+ QueryFilters for where_document. QueryFilters are used to represent NOT conditions.
14
+ metadata_filters (list[FilterClause | QueryFilter]): Metadata filters for where clause.
15
+ condition (FilterCondition): The original FilterCondition from the QueryFilter.
16
+ """
17
+ id_values: list[str] | None
18
+ document_filters: list[FilterClause | QueryFilter]
19
+ metadata_filters: list[FilterClause | QueryFilter]
20
+ condition: FilterCondition
21
+
22
+ class ChromaQueryTranslator:
23
+ """Translates QueryFilter and FilterClause objects to ChromaDB native filter syntax.
24
+
25
+ This class encapsulates all query translation logic for ChromaDB, converting
26
+ structured FilterClause and QueryFilter objects into ChromaDB's where, where_document,
27
+ and ids parameters.
28
+ """
29
+ def translate(self, filters: QueryFilter | None = None) -> ChromaQueryComponents:
30
+ """Translate QueryFilter to ChromaDB query components.
31
+
32
+ This is the main entry point for query translation. It handles None filters
33
+ and orchestrates filter separation and translation.
34
+
35
+ Args:
36
+ filters (QueryFilter | None, optional): Structured QueryFilter to translate. Defaults to None.
37
+
38
+ Returns:
39
+ ChromaQueryComponents: ChromaDB query components containing where,
40
+ where_document, and id_values, or None if no filters are provided.
41
+ """