gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +7 -0
  28. gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
  29. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  30. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  31. gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
  32. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  33. gllm_datastore/core/filters/__init__.pyi +4 -0
  34. gllm_datastore/core/filters/filter.pyi +340 -0
  35. gllm_datastore/core/filters/schema.pyi +149 -0
  36. gllm_datastore/data_store/__init__.pyi +8 -0
  37. gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
  38. gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
  39. gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
  40. gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
  41. gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
  42. gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
  43. gllm_datastore/data_store/base.pyi +176 -0
  44. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  45. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  46. gllm_datastore/data_store/chroma/data_store.pyi +201 -0
  47. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  48. gllm_datastore/data_store/chroma/query.pyi +266 -0
  49. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  50. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  51. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  52. gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
  53. gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
  54. gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
  55. gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
  56. gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
  57. gllm_datastore/data_store/exceptions.pyi +35 -0
  58. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  59. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  60. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  61. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  62. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  63. gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
  64. gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
  65. gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
  66. gllm_datastore/data_store/opensearch/query.pyi +89 -0
  67. gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
  68. gllm_datastore/data_store/opensearch/vector.pyi +211 -0
  69. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  70. gllm_datastore/data_store/redis/data_store.pyi +153 -0
  71. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  72. gllm_datastore/data_store/redis/query.pyi +428 -0
  73. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  74. gllm_datastore/data_store/redis/vector.pyi +131 -0
  75. gllm_datastore/data_store/sql/__init__.pyi +4 -0
  76. gllm_datastore/data_store/sql/constants.pyi +5 -0
  77. gllm_datastore/data_store/sql/data_store.pyi +201 -0
  78. gllm_datastore/data_store/sql/fulltext.pyi +164 -0
  79. gllm_datastore/data_store/sql/query.pyi +81 -0
  80. gllm_datastore/data_store/sql/query_translator.pyi +51 -0
  81. gllm_datastore/data_store/sql/schema.pyi +16 -0
  82. gllm_datastore/encryptor/__init__.pyi +4 -0
  83. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  84. gllm_datastore/encryptor/capability/__init__.pyi +3 -0
  85. gllm_datastore/encryptor/capability/mixin.pyi +32 -0
  86. gllm_datastore/encryptor/encryptor.pyi +52 -0
  87. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  88. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  89. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  90. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  91. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  92. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  93. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  94. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  95. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  96. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  97. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  98. gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
  99. gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
  100. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  101. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  102. gllm_datastore/graph_data_store/schema.pyi +27 -0
  103. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  104. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  105. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  106. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  107. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  108. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  109. gllm_datastore/signature/__init__.pyi +0 -0
  110. gllm_datastore/signature/webhook_signature.pyi +31 -0
  111. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  112. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  113. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  114. gllm_datastore/sql_data_store/constants.pyi +6 -0
  115. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  116. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  117. gllm_datastore/sql_data_store/types.pyi +31 -0
  118. gllm_datastore/utils/__init__.pyi +6 -0
  119. gllm_datastore/utils/converter.pyi +51 -0
  120. gllm_datastore/utils/dict.pyi +21 -0
  121. gllm_datastore/utils/ttl.pyi +25 -0
  122. gllm_datastore/utils/types.pyi +32 -0
  123. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  124. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  125. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  126. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  127. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  128. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  129. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  130. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  131. gllm_datastore.build/.gitignore +1 -0
  132. gllm_datastore.cpython-312-darwin.so +0 -0
  133. gllm_datastore.pyi +178 -0
  134. gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
  135. gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
  136. gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
  137. gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
@@ -0,0 +1,175 @@
1
+ from gllm_core.schema.chunk import Chunk
2
+ from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS
3
+ from gllm_datastore.core.filters import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
4
+ from gllm_inference.schema import Vector
5
+ from typing import Any
6
+
7
+ def apply_filters(chunks: list[Chunk], filters: FilterClause | QueryFilter) -> list[Chunk]:
8
+ '''Apply filters to chunks.
9
+
10
+ Usage Example:
11
+ ```python
12
+ from gllm_datastore.core.filters import filter as F
13
+
14
+ chunks = [
15
+ Chunk(id="1", content="Chunk 1", metadata={"category": "test"}),
16
+ Chunk(id="2", content="Chunk 2", metadata={"category": "test"}),
17
+ Chunk(id="3", content="Chunk 3", metadata={"category": "test"}),
18
+ ]
19
+ # Direct FilterClause usage
20
+ filters = F.eq("metadata.category", "test")
21
+ filtered_chunks = apply_filters(chunks, filters)
22
+
23
+ # Multiple filters
24
+ filters = F.and_(F.eq("metadata.category", "test"), F.eq("metadata.status", "active"))
25
+ filtered_chunks = apply_filters(chunks, filters)
26
+ ```
27
+
28
+ Args:
29
+ chunks (list[Chunk]): List of chunks to filter.
30
+ filters (FilterClause | QueryFilter): Filter criteria to apply.
31
+ FilterClause objects are automatically converted to QueryFilter internally.
32
+
33
+ Returns:
34
+ list[Chunk]: Filtered list of chunks.
35
+ '''
36
+ def apply_options(chunks: list[Chunk], options: QueryOptions) -> list[Chunk]:
37
+ """Apply query options (sorting, pagination).
38
+
39
+ Note: columns filtering is not applicable to Chunk objects since they have a fixed structure
40
+ and we can only filter on id, content, score, and metadata.
41
+
42
+ Args:
43
+ chunks (list[Chunk]): List of chunks to process.
44
+ options (QueryOptions): Query options to apply.
45
+
46
+ Returns:
47
+ list[Chunk]: Processed list of chunks.
48
+ """
49
+ def get_nested_value(obj: dict[str, Any], key_path: str) -> Any:
50
+ '''Get a nested value from a dictionary using dot notation.
51
+
52
+ Args:
53
+ obj (dict[str, Any]): Dictionary to traverse.
54
+ key_path (str): Dot-separated path to the value (e.g., "user.profile.name").
55
+
56
+ Returns:
57
+ Any: The value at the specified path, or None if not found.
58
+ '''
59
+ def get_sort_value(chunk: Chunk, order_by: str) -> Any:
60
+ """Get the value to sort by.
61
+
62
+ Args:
63
+ chunk (Chunk): Chunk to get the value from.
64
+ order_by (str): The field to sort by.
65
+
66
+ Returns:
67
+ Any: The value to sort by.
68
+ """
69
+ def validate_cache_key(key: str) -> None:
70
+ """Validate cache key format and content.
71
+
72
+ Args:
73
+ key (str): Cache key to validate.
74
+
75
+ Raises:
76
+ TypeError: If key is not a string.
77
+ ValueError: If key is empty or whitespace-only.
78
+ """
79
+ def get_chunks_from_store(store: dict[str, Chunk], filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
80
+ """Get chunks from a store as a list with optional filters and options.
81
+
82
+ Args:
83
+ store (dict[str, Chunk]): Store containing chunks.
84
+ filters (QueryFilter | None, optional): Filter criteria to apply. Defaults to None.
85
+ options (QueryOptions | None, optional): Query options to apply. Defaults to None.
86
+
87
+ Returns:
88
+ list[Chunk]: List of all chunks in the store.
89
+ """
90
+ def apply_filters_and_options(chunks: list[Chunk], filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
91
+ """Apply filters and options to a list of chunks.
92
+
93
+ Args:
94
+ chunks (list[Chunk]): List of chunks to process.
95
+ filters (QueryFilter | None, optional): Filter criteria to apply. Defaults to None.
96
+ options (QueryOptions | None, optional): Query options to apply. Defaults to None.
97
+
98
+ Returns:
99
+ list[Chunk]: Processed list of chunks.
100
+ """
101
+ def create_updated_chunk(existing_chunk: Chunk, update_values: dict[str, Any]) -> Chunk:
102
+ """Create an updated chunk with new values.
103
+
104
+ Args:
105
+ existing_chunk (Chunk): The existing chunk to update.
106
+ update_values (dict[str, Any]): Values to update.
107
+
108
+ Returns:
109
+ Chunk: Updated chunk with new values.
110
+ """
111
+ def delete_chunks_by_filters(store: dict[str, Chunk], filters: QueryFilter | None = None) -> int:
112
+ """Delete chunks from store based on filters.
113
+
114
+ Args:
115
+ store (dict[str, Chunk]): Store containing chunks.
116
+ filters (QueryFilter | None, optional): Filters to select chunks to delete. Defaults to None.
117
+
118
+ Returns:
119
+ int: Number of chunks deleted.
120
+ """
121
+ def find_matching_chunk_ids(store: dict[str, Chunk], filters: QueryFilter) -> list[str]:
122
+ """Find chunk IDs that match the given filters.
123
+
124
+ Args:
125
+ store (dict[str, Chunk]): Store containing chunks.
126
+ filters (QueryFilter): The filters to apply.
127
+
128
+ Returns:
129
+ list[str]: List of chunk IDs that match the filters.
130
+ """
131
+ def similarity_search(query_vector: Vector, store: dict[str, Chunk], filters: QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
132
+ """Retrieve chunks by vector similarity from a store.
133
+
134
+ This method will only return chunks that have a vector in their metadata.
135
+ It will also apply the filters and options to the chunks.
136
+
137
+ Args:
138
+ query_vector (Vector): Query embedding vector.
139
+ store (dict[str, Chunk]): Store containing chunks.
140
+ filters (QueryFilter | None): Query filters to apply.
141
+ options (QueryOptions | None, optional): Query options to apply.
142
+
143
+ Returns:
144
+ list[Chunk]: List of chunks ordered by similarity score.
145
+ """
146
+ def evaluate_filter(chunk: Chunk, filters: QueryFilter) -> bool:
147
+ '''Evaluate if a chunk matches the given filters.
148
+
149
+ Examples:
150
+ ```python
151
+ from gllm_datastore.core.filters import filter as F
152
+
153
+ # Simple filter
154
+ filters = F.and_(F.eq("metadata.category", "tech"))
155
+ result = evaluate_filter(chunk, filters)
156
+
157
+ # Complex nested filter
158
+ filters = F.and_(
159
+ F.gte("metadata.price", 10),
160
+ F.lte("metadata.price", 100),
161
+ F.or_(
162
+ F.eq("metadata.status", "active"),
163
+ F.eq("metadata.status", "pending")
164
+ )
165
+ )
166
+ result = evaluate_filter(chunk, filters)
167
+ ```
168
+
169
+ Args:
170
+ chunk (Chunk): The chunk to evaluate.
171
+ filters (QueryFilter): The filters to apply.
172
+
173
+ Returns:
174
+ bool: True if the chunk matches all filters, False otherwise.
175
+ '''
@@ -0,0 +1,174 @@
1
+ from gllm_core.schema.chunk import Chunk
2
+ from gllm_datastore.constants import CHUNK_KEYS as CHUNK_KEYS
3
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
4
+ from gllm_datastore.data_store.in_memory.query import create_updated_chunk as create_updated_chunk, delete_chunks_by_filters as delete_chunks_by_filters, get_chunks_from_store as get_chunks_from_store, similarity_search as similarity_search
5
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
6
+ from gllm_inference.schema import Vector
7
+ from typing import Any
8
+
9
+ class InMemoryVectorCapability:
10
+ """In-memory implementation of VectorCapability protocol.
11
+
12
+ This class provides vector similarity search operations using pure Python
13
+ data structures optimized for development and testing.
14
+
15
+ Attributes:
16
+ store (dict[str, Chunk]): Dictionary storing Chunk objects with their IDs as keys.
17
+ em_invoker (BaseEMInvoker): The embedding model to perform vectorization.
18
+ """
19
+ store: dict[str, Chunk]
20
+ def __init__(self, em_invoker: BaseEMInvoker, store: dict[str, Any] | None = None) -> None:
21
+ """Initialize the in-memory vector capability.
22
+
23
+ Args:
24
+ em_invoker (BaseEMInvoker): em_invoker model for text-to-vector conversion.
25
+ store (dict[str, Any] | None, optional): Dictionary storing Chunk objects with their IDs as keys.
26
+ Defaults to None.
27
+ """
28
+ @property
29
+ def em_invoker(self) -> BaseEMInvoker:
30
+ """Returns the EM Invoker instance.
31
+
32
+ Returns:
33
+ BaseEMInvoker: The EM Invoker instance.
34
+ """
35
+ async def ensure_index(self) -> None:
36
+ """Ensure in-memory vector store exists, initializing it if necessary.
37
+
38
+ This method is idempotent - if the store already exists, it will skip
39
+ initialization and return early.
40
+ """
41
+ async def create(self, data: Chunk | list[Chunk]) -> None:
42
+ """Add chunks to the vector store with automatic embedding generation.
43
+
44
+ Args:
45
+ data (Chunk | list[Chunk]): Single chunk or list of chunks to add.
46
+ """
47
+ async def create_from_vector(self, chunk_vectors: list[tuple[Chunk, Vector]]) -> None:
48
+ """Add pre-computed vectors directly.
49
+
50
+ Args:
51
+ chunk_vectors (list[tuple[Chunk, Vector]]): List of tuples containing chunks and their
52
+ corresponding vectors.
53
+ """
54
+ async def retrieve(self, query: str, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
55
+ '''Read records from the datastore using text-based similarity search with optional filtering.
56
+
57
+ Usage Example:
58
+ ```python
59
+ from gllm_datastore.core.filters import filter as F
60
+
61
+ # Direct FilterClause usage
62
+ await vector_capability.retrieve(
63
+ query="What is the capital of France?",
64
+ filters=F.eq("metadata.category", "tech"),
65
+ options=QueryOptions(limit=2),
66
+ )
67
+
68
+ # Multiple filters
69
+ filters = F.and_(F.eq("metadata.source", "wikipedia"), F.eq("metadata.category", "tech"))
70
+ await vector_capability.retrieve(
71
+ query="What is the capital of France?",
72
+ filters=filters,
73
+ options=QueryOptions(limit=2),
74
+ )
75
+ ```
76
+ This will retrieve the top 2 chunks by similarity score from the vector store
77
+ that match the query and the filters. The chunks will be sorted by score in descending order.
78
+
79
+ Args:
80
+ query (str): Input text to embed and search with.
81
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
82
+ FilterClause objects are automatically converted to QueryFilter internally.
83
+ Defaults to None.
84
+ options (QueryOptions | None, optional): Query options like limit and sorting.
85
+ Defaults to None, in which case, no sorting is applied and top 10 chunks are returned.
86
+
87
+ Returns:
88
+ list[Chunk]: Top ranked chunks by similarity score.
89
+ '''
90
+ async def retrieve_by_vector(self, vector: Vector, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None) -> list[Chunk]:
91
+ """Direct vector similarity search.
92
+
93
+ Args:
94
+ vector (Vector): Query embedding vector.
95
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
96
+ FilterClause objects are automatically converted to QueryFilter internally.
97
+ Defaults to None.
98
+ options (QueryOptions | None, optional): Query options like limit and sorting.
99
+ Defaults to None, in which case, no sorting is applied and top 10 chunks are returned.
100
+
101
+ Returns:
102
+ list[Chunk]: List of chunks ordered by similarity score.
103
+ """
104
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None, **kwargs: Any) -> None:
105
+ '''Update existing records in the datastore.
106
+
107
+ Examples:
108
+ 1. Update certain metadata of a chunk with specific filters.
109
+ ```python
110
+ from gllm_datastore.core.filters import filter as F
111
+
112
+ # Direct FilterClause usage
113
+ await vector_capability.update(
114
+ update_values={"metadata": {"status": "published"}},
115
+ filters=F.eq("metadata.category", "tech"),
116
+ )
117
+
118
+ # Multiple filters
119
+ await vector_capability.update(
120
+ update_values={"metadata": {"status": "published"}},
121
+ filters=F.and_(F.eq("metadata.status", "draft"), F.eq("metadata.category", "tech")),
122
+ )
123
+ ```
124
+
125
+ 2. Update certain content of a chunk with specific id.
126
+ This will also regenerate the vector of the chunk.
127
+ ```python
128
+ # Direct FilterClause usage
129
+ await vector_capability.update(
130
+ update_values={"content": "new_content"},
131
+ filters=F.eq("id", "unique_id"),
132
+ )
133
+
134
+ # Multiple filters
135
+ await vector_capability.update(
136
+ update_values={"content": "new_content"},
137
+ filters=F.and_(F.eq("id", "unique_id"), F.eq("metadata.category", "tech")),
138
+ )
139
+ ```
140
+
141
+ Args:
142
+ update_values (dict[str, Any]): Values to update.
143
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
144
+ FilterClause objects are automatically converted to QueryFilter internally.
145
+ Defaults to None, in which case no operation is performed (no-op).
146
+ **kwargs: Datastore-specific parameters.
147
+
148
+ Raises:
149
+ ValueError: If content is empty.
150
+ '''
151
+ async def delete(self, filters: FilterClause | QueryFilter | None = None) -> None:
152
+ '''Delete records from the datastore.
153
+
154
+ Usage Example:
155
+ ```python
156
+ from gllm_datastore.core.filters import filter as F
157
+
158
+ # Direct FilterClause usage
159
+ await vector_capability.delete(filters=F.eq("metadata.category", "AI"))
160
+
161
+ # Multiple filters
162
+ await vector_capability.delete(
163
+ filters=F.and_(F.eq("metadata.category", "AI"), F.eq("metadata.status", "published")),
164
+ )
165
+ ```
166
+ This will delete all chunks from the vector store that match the filters.
167
+
168
+ Args:
169
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
170
+ FilterClause objects are automatically converted to QueryFilter internally.
171
+ Defaults to None, in which case no operation is performed (no-op).
172
+ '''
173
+ async def clear(self) -> None:
174
+ """Clear all vectors from the store."""
@@ -0,0 +1,5 @@
1
+ from gllm_datastore.data_store.opensearch.data_store import OpenSearchDataStore as OpenSearchDataStore
2
+ from gllm_datastore.data_store.opensearch.fulltext import OpenSearchFulltextCapability as OpenSearchFulltextCapability
3
+ from gllm_datastore.data_store.opensearch.vector import OpenSearchVectorCapability as OpenSearchVectorCapability
4
+
5
+ __all__ = ['OpenSearchDataStore', 'OpenSearchFulltextCapability', 'OpenSearchVectorCapability']
@@ -0,0 +1,160 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_datastore.constants import DEFAULT_REQUEST_TIMEOUT as DEFAULT_REQUEST_TIMEOUT
3
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
4
+ from gllm_datastore.data_store._elastic_core.client_factory import EngineType as EngineType, create_client as create_client
5
+ from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
6
+ from gllm_datastore.data_store.opensearch.fulltext import OpenSearchFulltextCapability as OpenSearchFulltextCapability
7
+ from gllm_datastore.data_store.opensearch.query_translator import OpenSearchQueryTranslator as OpenSearchQueryTranslator
8
+ from gllm_datastore.data_store.opensearch.vector import OpenSearchVectorCapability as OpenSearchVectorCapability
9
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
10
+ from opensearchpy import AsyncOpenSearch
11
+ from typing import Any
12
+
13
+ class OpenSearchDataStore(BaseDataStore):
14
+ '''OpenSearch data store with multiple capability support.
15
+
16
+ This is the explicit public API for OpenSearch. Users know they\'re
17
+ using OpenSearch, not a generic "elastic-like" datastore.
18
+
19
+ Attributes:
20
+ engine (str): Always "opensearch" for explicit identification.
21
+ This attribute ensures users know they\'re using OpenSearch, not a generic
22
+ "elastic-like" datastore.
23
+ index_name (str): The name of the OpenSearch index.
24
+ client (AsyncOpenSearch): AsyncOpenSearch client.
25
+ '''
26
+ engine: str
27
+ client: Incomplete
28
+ index_name: Incomplete
29
+ def __init__(self, index_name: str, client: AsyncOpenSearch | None = None, url: str | None = None, cloud_id: str | None = None, api_key: str | None = None, username: str | None = None, password: str | None = None, request_timeout: int = ..., connection_params: dict[str, Any] | None = None) -> None:
30
+ '''Initialize the OpenSearch data store.
31
+
32
+ Args:
33
+ index_name (str): The name of the OpenSearch index to use for operations.
34
+ This index name will be used for all queries and operations.
35
+ client (AsyncOpenSearch | None, optional): Pre-configured OpenSearch client instance.
36
+ If provided, it will be used instead of creating a new client from url/cloud_id.
37
+ Must be an instance of AsyncOpenSearch. Defaults to None.
38
+ url (str | None, optional): The URL of the OpenSearch server.
39
+ For example, "http://localhost:9200". Either url or cloud_id must be provided
40
+ if client is None. Defaults to None.
41
+ cloud_id (str | None, optional): The cloud ID of the OpenSearch cluster.
42
+ Used for OpenSearch Service connections. Either url or cloud_id must be provided
43
+ if client is None. Defaults to None.
44
+ api_key (str | None, optional): The API key for authentication.
45
+ If provided, will be used for authentication. Mutually exclusive with username/password.
46
+ Defaults to None.
47
+ username (str | None, optional): The username for basic authentication.
48
+ Must be provided together with password. Mutually exclusive with api_key.
49
+ Defaults to None.
50
+ password (str | None, optional): The password for basic authentication.
51
+ Must be provided together with username. Mutually exclusive with api_key.
52
+ Defaults to None.
53
+ request_timeout (int, optional): The request timeout in seconds.
54
+ Defaults to DEFAULT_REQUEST_TIMEOUT.
55
+ connection_params (dict[str, Any] | None, optional): Additional connection parameters
56
+ for OpenSearch client. These will be merged with automatically detected parameters
57
+ (authentication, SSL settings). User-provided params take precedence. Defaults to None.
58
+ Available parameters include:
59
+ 1. http_auth (tuple[str, str] | None): HTTP authentication tuple (username, password).
60
+ 2. use_ssl (bool): Whether to use SSL/TLS. Defaults to True for HTTPS URLs.
61
+ 3. verify_certs (bool): Whether to verify SSL certificates. Defaults to True for HTTPS URLs.
62
+ Set to False to use self-signed certificates (not recommended for production).
63
+ 4. ssl_show_warn (bool): Whether to show SSL warnings. Defaults to True for HTTPS URLs.
64
+ 5. ssl_assert_hostname (str | None): SSL hostname assertion. Defaults to None.
65
+ 6. max_retries (int): Maximum number of retries for requests. Defaults to 3.
66
+ 7. retry_on_timeout (bool): Whether to retry on timeouts. Defaults to True.
67
+ 8. client_cert (str | None): Path to the client certificate file. Defaults to None.
68
+ 9. client_key (str | None): Path to the client private key file. Defaults to None.
69
+ 10. root_cert (str | None): Path to the root certificate file. Defaults to None.
70
+ 11. Additional kwargs: Any other parameters accepted by OpenSearch client constructor.
71
+
72
+ Raises:
73
+ ValueError: If neither url nor cloud_id is provided when client is None.
74
+ TypeError: If client is provided but is not an instance of AsyncOpenSearch.
75
+ '''
76
+ @property
77
+ def supported_capabilities(self) -> list[str]:
78
+ """Return list of currently supported capabilities.
79
+
80
+ Returns:
81
+ list[str]: List of capability names that are supported.
82
+ Currently returns [CapabilityType.FULLTEXT, CapabilityType.VECTOR].
83
+ """
84
+ @property
85
+ def fulltext(self) -> OpenSearchFulltextCapability:
86
+ """Access fulltext capability if supported.
87
+
88
+ This method uses the logic of its parent class to return the fulltext capability handler.
89
+ This method overrides the parent class to return the OpenSearchFulltextCapability handler for better
90
+ type hinting.
91
+
92
+ Returns:
93
+ OpenSearchFulltextCapability: Fulltext capability handler.
94
+
95
+ Raises:
96
+ NotSupportedException: If fulltext capability is not supported.
97
+ """
98
+ @property
99
+ def vector(self) -> OpenSearchVectorCapability:
100
+ """Access vector capability if supported.
101
+
102
+ This method uses the logic of its parent class to return the vector capability handler.
103
+ This method overrides the parent class to return the OpenSearchVectorCapability handler for better
104
+ type hinting.
105
+
106
+ Returns:
107
+ OpenSearchVectorCapability: Vector capability handler.
108
+
109
+ Raises:
110
+ NotSupportedException: If vector capability is not supported.
111
+ """
112
+ def with_fulltext(self, index_name: str | None = None, query_field: str = 'text') -> OpenSearchDataStore:
113
+ '''Configure fulltext capability and return datastore instance.
114
+
115
+ Overrides parent for better type hinting.
116
+
117
+ Args:
118
+ index_name (str | None, optional): Index name for fulltext operations.
119
+ Uses datastore\'s default if None. Defaults to None.
120
+ query_field (str, optional): Field name for text queries. Defaults to "text".
121
+
122
+ Returns:
123
+ OpenSearchDataStore: Self for method chaining.
124
+ '''
125
+ def with_vector(self, em_invoker: BaseEMInvoker, index_name: str | None = None, query_field: str = 'text', vector_query_field: str = 'vector', retrieval_strategy: Any = None, distance_strategy: str | None = None) -> OpenSearchDataStore:
126
+ '''Configure vector capability and return datastore instance.
127
+
128
+ Overrides parent for better type hinting.
129
+
130
+ Args:
131
+ em_invoker (BaseEMInvoker): Embedding model for vectorization.
132
+ index_name (str | None, optional): Index name. Uses datastore\'s default if None.
133
+ query_field (str, optional): Field name for text queries. Defaults to "text".
134
+ vector_query_field (str, optional): Field name for vector queries. Defaults to "vector".
135
+ retrieval_strategy: Not used (kept for API compatibility). Defaults to None.
136
+ distance_strategy (str | None, optional): Distance strategy (e.g., "l2", "cosine"). Defaults to None.
137
+
138
+ Returns:
139
+ OpenSearchDataStore: Self for method chaining.
140
+
141
+ Note:
142
+ Connection parameters are configured at the data store level during initialization.
143
+ See OpenSearchDataStore.__init__ for connection_params details.
144
+ '''
145
+ @classmethod
146
+ def translate_query_filter(cls, query_filter: FilterClause | QueryFilter | None) -> dict[str, Any] | None:
147
+ """Translate QueryFilter or FilterClause to OpenSearch native filter syntax.
148
+
149
+ This method delegates to the OpenSearchQueryTranslator and returns the result as a dictionary.
150
+
151
+ Args:
152
+ query_filter (FilterClause | QueryFilter | None): The filter to translate.
153
+ Can be a single FilterClause, a QueryFilter with multiple clauses and logical conditions,
154
+ or None for empty filters. FilterClause objects are automatically converted to QueryFilter.
155
+
156
+ Returns:
157
+ dict[str, Any] | None: The translated filter as an OpenSearch DSL dictionary.
158
+ Returns None for empty filters or when query_filter is None.
159
+ The dictionary format matches OpenSearch Query DSL syntax.
160
+ """