gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +7 -0
  28. gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
  29. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  30. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  31. gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
  32. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  33. gllm_datastore/core/filters/__init__.pyi +4 -0
  34. gllm_datastore/core/filters/filter.pyi +340 -0
  35. gllm_datastore/core/filters/schema.pyi +149 -0
  36. gllm_datastore/data_store/__init__.pyi +8 -0
  37. gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
  38. gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
  39. gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
  40. gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
  41. gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
  42. gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
  43. gllm_datastore/data_store/base.pyi +176 -0
  44. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  45. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  46. gllm_datastore/data_store/chroma/data_store.pyi +201 -0
  47. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  48. gllm_datastore/data_store/chroma/query.pyi +266 -0
  49. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  50. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  51. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  52. gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
  53. gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
  54. gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
  55. gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
  56. gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
  57. gllm_datastore/data_store/exceptions.pyi +35 -0
  58. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  59. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  60. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  61. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  62. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  63. gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
  64. gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
  65. gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
  66. gllm_datastore/data_store/opensearch/query.pyi +89 -0
  67. gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
  68. gllm_datastore/data_store/opensearch/vector.pyi +211 -0
  69. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  70. gllm_datastore/data_store/redis/data_store.pyi +153 -0
  71. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  72. gllm_datastore/data_store/redis/query.pyi +428 -0
  73. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  74. gllm_datastore/data_store/redis/vector.pyi +131 -0
  75. gllm_datastore/data_store/sql/__init__.pyi +4 -0
  76. gllm_datastore/data_store/sql/constants.pyi +5 -0
  77. gllm_datastore/data_store/sql/data_store.pyi +201 -0
  78. gllm_datastore/data_store/sql/fulltext.pyi +164 -0
  79. gllm_datastore/data_store/sql/query.pyi +81 -0
  80. gllm_datastore/data_store/sql/query_translator.pyi +51 -0
  81. gllm_datastore/data_store/sql/schema.pyi +16 -0
  82. gllm_datastore/encryptor/__init__.pyi +4 -0
  83. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  84. gllm_datastore/encryptor/capability/__init__.pyi +3 -0
  85. gllm_datastore/encryptor/capability/mixin.pyi +32 -0
  86. gllm_datastore/encryptor/encryptor.pyi +52 -0
  87. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  88. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  89. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  90. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  91. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  92. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  93. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  94. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  95. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  96. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  97. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  98. gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
  99. gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
  100. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  101. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  102. gllm_datastore/graph_data_store/schema.pyi +27 -0
  103. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  104. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  105. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  106. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  107. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  108. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  109. gllm_datastore/signature/__init__.pyi +0 -0
  110. gllm_datastore/signature/webhook_signature.pyi +31 -0
  111. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  112. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  113. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  114. gllm_datastore/sql_data_store/constants.pyi +6 -0
  115. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  116. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  117. gllm_datastore/sql_data_store/types.pyi +31 -0
  118. gllm_datastore/utils/__init__.pyi +6 -0
  119. gllm_datastore/utils/converter.pyi +51 -0
  120. gllm_datastore/utils/dict.pyi +21 -0
  121. gllm_datastore/utils/ttl.pyi +25 -0
  122. gllm_datastore/utils/types.pyi +32 -0
  123. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  124. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  125. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  126. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  127. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  128. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  129. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  130. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  131. gllm_datastore.build/.gitignore +1 -0
  132. gllm_datastore.cpython-312-darwin.so +0 -0
  133. gllm_datastore.pyi +178 -0
  134. gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
  135. gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
  136. gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
  137. gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
@@ -0,0 +1,149 @@
1
+ from enum import StrEnum
2
+ from pydantic import BaseModel
3
+ from typing import Any, Sequence
4
+
5
+ class FilterOperator(StrEnum):
6
+ """Operators for comparing field values."""
7
+ EQ: str
8
+ NE: str
9
+ GT: str
10
+ LT: str
11
+ GTE: str
12
+ LTE: str
13
+ IN: str
14
+ NIN: str
15
+ ANY: str
16
+ ALL: str
17
+ ARRAY_CONTAINS: str
18
+ TEXT_CONTAINS: str
19
+
20
+ class FilterCondition(StrEnum):
21
+ """Logical conditions for combining filters."""
22
+ AND: str
23
+ OR: str
24
+ NOT: str
25
+
26
+ class FilterClause(BaseModel):
27
+ '''Single filter criterion with operator support.
28
+
29
+ Examples:
30
+ ```python
31
+ FilterClause(key="metadata.age", value=25, operator=FilterOperator.GT)
32
+ FilterClause(key="metadata.status", value=["active", "pending"], operator=FilterOperator.IN)
33
+ ```
34
+
35
+ Attributes:
36
+ key (str): The field path to filter on (supports dot notation for nested fields).
37
+ value (int | float | str | bool | list[str] | list[float] | list[int] | list[bool] | None):
38
+ The value to compare against.
39
+ operator (FilterOperator): The comparison operator.
40
+ '''
41
+ key: str
42
+ value: bool | int | float | str | list[str] | list[float] | list[int] | list[bool] | None
43
+ operator: FilterOperator
44
+ def to_query_filter(self) -> QueryFilter:
45
+ '''Convert FilterClause to QueryFilter.
46
+
47
+ This method enables automatic conversion of FilterClause to QueryFilter.
48
+
49
+ Example:
50
+ ```python
51
+ clause = FilterClause(key="metadata.status", value="active", operator=FilterOperator.EQ)
52
+ query_filter = clause.to_query_filter()
53
+ # Results in: QueryFilter(filters=[clause], condition=FilterCondition.AND)
54
+ ```
55
+
56
+ Returns:
57
+ QueryFilter: A QueryFilter wrapping this FilterClause with AND condition.
58
+ '''
59
+
60
+ class QueryFilter(BaseModel):
61
+ '''Composite filter supporting multiple conditions and logical operators.
62
+
63
+ Attributes:
64
+ filters (list[FilterClause | QueryFilter]): List of filters to combine.
65
+ Can include nested QueryFilter for complex logic.
66
+ condition (FilterCondition): Logical operator to combine filters. Defaults to AND.
67
+
68
+ Examples:
69
+ 1. Simple AND: age > 25 AND status == "active"
70
+ ```python
71
+ QueryFilter(
72
+ filters=[
73
+ FilterClause(key="metadata.age", value=25, operator=FilterOperator.GT),
74
+ FilterClause(key="metadata.status", value="active", operator=FilterOperator.EQ)
75
+ ],
76
+ condition=FilterCondition.AND
77
+ )
78
+ ```
79
+
80
+ 2. Complex OR: (status == "active" OR status == "pending") AND age >= 18
81
+ ```python
82
+ QueryFilter(
83
+ filters=[
84
+ QueryFilter(
85
+ filters=[
86
+ FilterClause(key="metadata.status", value="active"),
87
+ FilterClause(key="metadata.status", value="pending")
88
+ ],
89
+ condition=FilterCondition.OR
90
+ ),
91
+ FilterClause(key="metadata.age", value=18, operator=FilterOperator.GTE)
92
+ ],
93
+ condition=FilterCondition.AND
94
+ )
95
+ ```
96
+
97
+ 3. NOT: NOT (status == "deleted")
98
+ ```python
99
+ QueryFilter(
100
+ filters=[
101
+ FilterClause(key="metadata.status", value="deleted")
102
+ ],
103
+ condition=FilterCondition.NOT
104
+ )
105
+ ```
106
+ '''
107
+ filters: list[FilterClause | QueryFilter]
108
+ condition: FilterCondition
109
+ @classmethod
110
+ def from_dicts(cls, filter_dicts: list[dict[str, Any]], condition: FilterCondition = ...) -> QueryFilter:
111
+ '''Create QueryFilter from list of filter dictionaries.
112
+
113
+ Example:
114
+ ```python
115
+ QueryFilter.from_dicts(
116
+ [
117
+ {"key": "metadata.age", "value": 25, "operator": ">"},
118
+ {"key": "metadata.status", "value": "active"}
119
+ ],
120
+ condition=FilterCondition.AND
121
+ )
122
+ ```
123
+
124
+ Args:
125
+ filter_dicts (list[dict[str, Any]]): List of filter dictionaries. Contains the key, value, and operator.
126
+ condition (FilterCondition, optional): Logical operator to combine filters. Defaults to AND.
127
+
128
+ Returns:
129
+ QueryFilter: Composite filter instance.
130
+ '''
131
+
132
+ class QueryOptions(BaseModel):
133
+ '''Model for query options.
134
+
135
+ Attributes:
136
+ include_fields (Sequence[str] | None): The fields to include in the query result. Defaults to None.
137
+ order_by (str | None): The column to order the query result by. Defaults to None.
138
+ order_desc (bool): Whether to order the query result in descending order. Defaults to False.
139
+ limit (int | None): The maximum number of rows to return. Must be >= 0. Defaults to None.
140
+
141
+ Example:
142
+ ```python
143
+ QueryOptions(include_fields=["field1", "field2"], order_by="column1", order_desc=True, limit=10)
144
+ ```
145
+ '''
146
+ include_fields: Sequence[str] | None
147
+ order_by: str | None
148
+ order_desc: bool
149
+ limit: int | None
@@ -0,0 +1,8 @@
1
+ from gllm_datastore.data_store.chroma import ChromaDataStore as ChromaDataStore
2
+ from gllm_datastore.data_store.elasticsearch import ElasticsearchDataStore as ElasticsearchDataStore
3
+ from gllm_datastore.data_store.exceptions import NotRegisteredException as NotRegisteredException, NotSupportedException as NotSupportedException
4
+ from gllm_datastore.data_store.in_memory import InMemoryDataStore as InMemoryDataStore
5
+ from gllm_datastore.data_store.opensearch import OpenSearchDataStore as OpenSearchDataStore
6
+ from gllm_datastore.data_store.redis import RedisDataStore as RedisDataStore
7
+
8
+ __all__ = ['ChromaDataStore', 'ElasticsearchDataStore', 'InMemoryDataStore', 'NotRegisteredException', 'NotSupportedException', 'OpenSearchDataStore', 'RedisDataStore']
File without changes
@@ -0,0 +1,66 @@
1
+ from elasticsearch import AsyncElasticsearch
2
+ from enum import StrEnum
3
+ from gllm_datastore.constants import DEFAULT_REQUEST_TIMEOUT as DEFAULT_REQUEST_TIMEOUT
4
+ from opensearchpy import AsyncOpenSearch
5
+ from typing import Any
6
+
7
+ ElasticLikeClient = AsyncElasticsearch | AsyncOpenSearch
8
+
9
+ class EngineType(StrEnum):
10
+ """Engine type for Elasticsearch-like clients."""
11
+ ELASTICSEARCH: str
12
+ OPENSEARCH: str
13
+
14
+ def create_client(engine: EngineType, client: ElasticLikeClient | None = None, url: str | None = None, cloud_id: str | None = None, api_key: str | None = None, username: str | None = None, password: str | None = None, request_timeout: int = ..., connection_params: dict[str, Any] | None = None) -> ElasticLikeClient:
15
+ '''Create Elasticsearch or OpenSearch client (internal use only).
16
+
17
+ This function is used internally by ElasticsearchDataStore and OpenSearchDataStore.
18
+ It is not part of the public API.
19
+
20
+ Args:
21
+ engine: Engine type ("elasticsearch" or "opensearch").
22
+ Determines which client library to use for connection.
23
+ client: Pre-configured client instance.
24
+ If provided, will be validated and returned as-is without creating a new client.
25
+ Must match the engine type (AsyncElasticsearch for "elasticsearch",
26
+ AsyncOpenSearch for "opensearch"). Defaults to None.
27
+ url (str | None, optional): The URL of the Elasticsearch or OpenSearch server.
28
+ For example, "http://localhost:9200" or "https://localhost:9200".
29
+ If URL starts with "https://", SSL/TLS will be automatically enabled with
30
+ certificate verification enabled by default. To use self-signed certificates,
31
+ set verify_certs=False in connection_params.
32
+ Defaults to None. Either url or cloud_id must be provided if client is None.
33
+ cloud_id (str | None, optional): The cloud ID of the Elasticsearch cluster.
34
+ Used for Elastic Cloud connections. Defaults to None.
35
+ Either url or cloud_id must be provided if client is None.
36
+ NOTE: Not supported for OpenSearch engine. Will raise ValueError if provided with engine="opensearch".
37
+ api_key (str | None, optional): The API key for authentication.
38
+ If provided, will be used for authentication. Mutually exclusive with username/password.
39
+ Defaults to None.
40
+ NOTE: Not supported for OpenSearch engine. Will raise ValueError if provided with engine="opensearch".
41
+ For OpenSearch, use username/password with http_auth instead.
42
+ username (str | None, optional): The username for basic authentication.
43
+ Must be provided together with password. Mutually exclusive with api_key.
44
+ Defaults to None.
45
+ password (str | None, optional): The password for basic authentication.
46
+ Must be provided together with username. Mutually exclusive with api_key.
47
+ Defaults to None.
48
+ request_timeout (int, optional): The request timeout in seconds.
49
+ Defaults to DEFAULT_REQUEST_TIMEOUT.
50
+ connection_params (dict[str, Any] | None, optional): Additional connection parameters
51
+ to override defaults. These will be merged with automatically detected parameters
52
+ (authentication, SSL settings). User-provided params take precedence. Defaults to None.
53
+ Available parameters include use_ssl, verify_certs, ssl_show_warn, max_retries,
54
+ retry_on_timeout, client_cert, client_key, root_cert, etc.
55
+
56
+ Returns:
57
+ The configured client instance.
58
+ Returns AsyncElasticsearch if engine is "elasticsearch",
59
+ AsyncOpenSearch if engine is "opensearch".
60
+
61
+ Raises:
62
+ ValueError: If neither url nor cloud_id is provided when client is None.
63
+ If cloud_id is provided for OpenSearch engine (not supported).
64
+ If api_key is provided for OpenSearch engine (not supported).
65
+ TypeError: If client is provided but has wrong type for the specified engine.
66
+ '''
@@ -0,0 +1,27 @@
1
+ class ELASTIC_RESPONSE_KEYS:
2
+ """Keys used in Elasticsearch/OpenSearch response dictionaries.
3
+
4
+ Attributes:
5
+ SUGGEST (str): Key for suggestions in the response.
6
+ AGGREGATIONS (str): Key for aggregations in the response.
7
+ HITS (str): Key for hits in the response.
8
+ HIGHLIGHT (str): Key for highlights in hit objects.
9
+ OPTIONS (str): Key for options in suggestion objects.
10
+ BUCKETS (str): Key for buckets in aggregation objects.
11
+ TEXT (str): Key for text in suggestion option objects.
12
+ SOURCE (str): Key for source document in hit objects.
13
+ ID (str): Key for document ID in hit objects.
14
+ COUNT (str): Key for count in count response.
15
+ INDEX (str): Key for index name in bulk operations.
16
+ """
17
+ SUGGEST: str
18
+ AGGREGATIONS: str
19
+ HITS: str
20
+ HIGHLIGHT: str
21
+ OPTIONS: str
22
+ BUCKETS: str
23
+ TEXT: str
24
+ SOURCE: str
25
+ ID: str
26
+ COUNT: str
27
+ INDEX: str
@@ -0,0 +1,115 @@
1
+ from _typeshed import Incomplete
2
+ from elasticsearch import AsyncElasticsearch
3
+ from elasticsearch.dsl import AttrDict as ESAttrDict
4
+ from gllm_core.schema import Chunk
5
+ from gllm_datastore.constants import METADATA_KEYS as METADATA_KEYS
6
+ from gllm_datastore.data_store._elastic_core.constants import ELASTIC_RESPONSE_KEYS as ELASTIC_RESPONSE_KEYS
7
+ from opensearchpy import AsyncOpenSearch
8
+ from opensearchpy.helpers.utils import AttrDict as OSAttrDict
9
+ from typing import Any
10
+
11
+ AttrDict = ESAttrDict | OSAttrDict
12
+
13
+ class ElasticLikeCore:
14
+ """Shared core implementation for Elasticsearch-like datastores.
15
+
16
+ This class contains the common logic shared between Elasticsearch and OpenSearch.
17
+ Product-specific datastores delegate to this core and override methods where needed.
18
+
19
+ Attributes:
20
+ index_name (str): The name of the index used for all operations.
21
+ client (AsyncElasticsearch | AsyncOpenSearch): The Elasticsearch or OpenSearch client.
22
+ Used for all index and document operations.
23
+ _logger (Logger): Logger instance for this core. Used for logging operations and errors.
24
+ """
25
+ index_name: Incomplete
26
+ client: Incomplete
27
+ def __init__(self, index_name: str, client: AsyncElasticsearch | AsyncOpenSearch) -> None:
28
+ """Initialize the shared core.
29
+
30
+ Args:
31
+ index_name (str): The name of the index to use for operations.
32
+ This index name will be used for all queries and operations.
33
+ client (AsyncElasticsearch | AsyncOpenSearch): The Elasticsearch or OpenSearch client.
34
+ Must be a properly configured async client instance.
35
+ """
36
+ async def check_index_exists(self) -> bool:
37
+ """Check if index exists.
38
+
39
+ Returns:
40
+ bool: True if the index exists, False otherwise.
41
+ """
42
+ async def get_index_count(self) -> int:
43
+ """Get document count for the index.
44
+
45
+ Returns:
46
+ int: The total number of documents in the index.
47
+ """
48
+ async def create_chunks(self, data: Chunk | list[Chunk], query_field: str = 'text', **kwargs: Any) -> None:
49
+ '''Create new records in the datastore using bulk API.
50
+
51
+ Args:
52
+ data (Chunk | list[Chunk]): Data to create (single item or collection).
53
+ query_field (str, optional): The field name to use for text content. Defaults to "text".
54
+ **kwargs: Backend-specific parameters forwarded to bulk API.
55
+
56
+ Raises:
57
+ ValueError: If data structure is invalid.
58
+ '''
59
+ def create_chunks_from_hits(self, hits: list[AttrDict], query_field: str = 'text') -> list[Chunk]:
60
+ '''Create Chunk objects from Elasticsearch/OpenSearch hits.
61
+
62
+ This method processes hits from Elasticsearch/OpenSearch DSL responses where hits are AttrDict
63
+ objects (from elasticsearch.dsl or opensearchpy.helpers.utils). The _source field is accessed via
64
+ attribute access and is always an AttrDict (nested dicts are automatically wrapped by _wrap function).
65
+
66
+ Args:
67
+ hits (list[AttrDict]): List of Elasticsearch/OpenSearch hits as AttrDict objects.
68
+ query_field (str, optional): The field name to use for text content. Defaults to "text".
69
+
70
+ Returns:
71
+ list[Chunk]: List of Chunk objects.
72
+ '''
73
+ @staticmethod
74
+ def extract_response_suggestions(response: dict[str, Any], suggestion_key: str) -> list[str]:
75
+ """Extract suggestions from Elasticsearch/OpenSearch autocomplete response.
76
+
77
+ Args:
78
+ response (dict[str, Any]): Elasticsearch/OpenSearch response.
79
+ suggestion_key (str): The suggestion key in the response.
80
+
81
+ Returns:
82
+ list[str]: List of suggestions.
83
+ """
84
+ @staticmethod
85
+ def extract_aggregation_buckets(response: dict[str, Any], aggregation_name: str) -> list[str]:
86
+ """Extract bucket keys from Elasticsearch/OpenSearch aggregation response.
87
+
88
+ Args:
89
+ response (dict[str, Any]): Elasticsearch/OpenSearch response.
90
+ aggregation_name (str): The aggregation name in the response.
91
+
92
+ Returns:
93
+ list[str]: List of bucket keys.
94
+ """
95
+ @staticmethod
96
+ def extract_highlighted_text(response: dict[str, Any], field: str) -> list[str]:
97
+ """Extract highlighted text from Elasticsearch/OpenSearch response.
98
+
99
+ Args:
100
+ response (dict[str, Any]): Elasticsearch/OpenSearch response.
101
+ field (str): The field name to extract highlights from.
102
+
103
+ Returns:
104
+ list[str]: List of unique highlighted text snippets.
105
+ """
106
+ def validate_bm25_parameters(self, k1: float | None, b: float | None) -> bool:
107
+ """Validate BM25 parameters.
108
+
109
+ Args:
110
+ k1 (float | None): BM25 parameter controlling term frequency saturation.
111
+ b (float | None): BM25 parameter controlling document length normalization.
112
+
113
+ Returns:
114
+ bool: True if parameters are valid, False otherwise.
115
+ """
@@ -0,0 +1,37 @@
1
+ from elasticsearch import AsyncElasticsearch
2
+ from opensearchpy import AsyncOpenSearch
3
+ from typing import Any
4
+
5
+ async def create_index_if_not_exists(client: AsyncElasticsearch | AsyncOpenSearch, index_name: str, mapping: dict[str, Any] | None = None, settings: dict[str, Any] | None = None) -> None:
6
+ """Create index if it doesn't exist (shared implementation).
7
+
8
+ This function checks if the index exists, and if not, creates it with the provided
9
+ mapping and settings. If the index already exists, the function returns without error.
10
+
11
+ Args:
12
+ client (AsyncElasticsearch | AsyncOpenSearch): The Elasticsearch or OpenSearch client.
13
+ Used to check index existence and create the index.
14
+ index_name (str): The name of the index to create.
15
+ Must be a valid index name according to Elasticsearch/OpenSearch naming rules.
16
+ mapping (dict[str, Any] | None, optional): Optional index mapping dictionary.
17
+ Defines the schema for fields in the index. If None, no custom mapping is applied.
18
+ Defaults to None.
19
+ settings (dict[str, Any] | None, optional): Optional index settings dictionary.
20
+ Defines index-level settings like number of shards, replicas, etc.
21
+ If None, no custom settings are applied. Defaults to None.
22
+
23
+ Raises:
24
+ RuntimeError: If index creation fails after checking existence.
25
+ """
26
+ async def delete_index_if_exists(client: AsyncElasticsearch | AsyncOpenSearch, index_name: str) -> None:
27
+ """Delete index if it exists (shared implementation).
28
+
29
+ This function checks if the index exists, and if it does, deletes it.
30
+ If the index does not exist, the function returns without error.
31
+
32
+ Args:
33
+ client (AsyncElasticsearch | AsyncOpenSearch): The Elasticsearch or OpenSearch client.
34
+ Used to check index existence and delete the index.
35
+ index_name (str): The name of the index to delete.
36
+ Must be a valid index name according to Elasticsearch/OpenSearch naming rules.
37
+ """
@@ -0,0 +1,89 @@
1
+ from abc import ABC
2
+ from elasticsearch.dsl import AsyncSearch as ESAsyncSearch
3
+ from elasticsearch.dsl.query import Query as ESQuery
4
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
5
+ from opensearchpy._async.helpers.search import AsyncSearch as OSAsyncSearch
6
+ from opensearchpy.helpers.query import Query as OSQuery
7
+
8
+ AsyncSearchType = ESAsyncSearch | OSAsyncSearch
9
+ QueryType = ESQuery | OSQuery
10
+
11
+ def convert_filter_clause(filters: FilterClause | QueryFilter | None) -> QueryFilter | None:
12
+ """Convert FilterClause to QueryFilter if needed.
13
+
14
+ Args:
15
+ filters (FilterClause | QueryFilter | None): The filter to convert.
16
+
17
+ Returns:
18
+ QueryFilter | None: The converted QueryFilter or None if input is None.
19
+ """
20
+
21
+ class ElasticLikeQueryTranslator(ABC):
22
+ """Base class for Elasticsearch-like query translators.
23
+
24
+ This class provides shared translation logic for converting FilterClause and
25
+ QueryFilter objects to product-specific Query DSL objects. Subclasses must
26
+ implement abstract methods to create Query objects using their DSL API.
27
+
28
+ Attributes:
29
+ _logger: Logger instance for error messages.
30
+ """
31
+ def __init__(self) -> None:
32
+ """Initialize the query translator."""
33
+ def translate(self, filters: QueryFilter | None) -> QueryType | None:
34
+ """Translate a structured QueryFilter into a Query DSL object.
35
+
36
+ The translation supports comparison operators (EQ, NE, GT, LT, GTE, LTE),
37
+ array operators (IN, NIN, ARRAY_CONTAINS, ANY, ALL), text operators (TEXT_CONTAINS),
38
+ and logical conditions (AND, OR, NOT), including nested filters.
39
+
40
+ Args:
41
+ filters (QueryFilter | None): Structured QueryFilter containing filter clauses
42
+ and logical conditions. If None or empty, returns None.
43
+
44
+ Returns:
45
+ QueryType | None: Query DSL object representing the translated filters.
46
+ Returns None if no filters are provided or filters are empty.
47
+ The actual Query type depends on the product-specific implementation
48
+ (elasticsearch.dsl.query.Query or opensearchpy.helpers.query.Query).
49
+
50
+ Raises:
51
+ ValueError: When the filter structure is invalid or translation fails.
52
+ """
53
+ def apply_options(self, search: AsyncSearchType, options: QueryOptions | None) -> AsyncSearchType:
54
+ """Apply QueryOptions to an Elasticsearch/OpenSearch search object.
55
+
56
+ This method applies query options including limit, field inclusion, and sorting
57
+ to a search object. Both Elasticsearch and OpenSearch AsyncSearch objects
58
+ support the same API for these operations.
59
+
60
+ Args:
61
+ search (AsyncSearchType): Elasticsearch or OpenSearch search object to modify.
62
+ The search object will be modified in-place and returned.
63
+ options (QueryOptions | None): Query options including limit, sort, and fields.
64
+ If None, the search object is returned unchanged. Defaults to None.
65
+
66
+ Returns:
67
+ AsyncSearchType: Modified search object with options applied.
68
+ Returns the same search object instance with modifications.
69
+ """
70
+ def apply_filters_and_options(self, search: AsyncSearchType, filters: QueryFilter | None = None, options: QueryOptions | None = None) -> AsyncSearchType:
71
+ """Apply both filters and options to an Elasticsearch/OpenSearch search object.
72
+
73
+ This method applies filters first (if provided), then applies options.
74
+ Both operations modify the search object in-place.
75
+
76
+ Args:
77
+ search (AsyncSearchType): Elasticsearch or OpenSearch search object to modify.
78
+ The search object will be modified in-place and returned.
79
+ filters (QueryFilter | None, optional): QueryFilter with filters and logical condition.
80
+ If provided, filters will be translated and applied to the search query.
81
+ Defaults to None.
82
+ options (QueryOptions | None, optional): Query options including limit, sort, and fields.
83
+ If provided, options will be applied to the search object.
84
+ Defaults to None.
85
+
86
+ Returns:
87
+ AsyncSearchType: Modified search object with filters and options applied.
88
+ Returns the same search object instance with modifications.
89
+ """
@@ -0,0 +1,176 @@
1
+ from abc import ABC, abstractmethod
2
+ from enum import StrEnum
3
+ from gllm_datastore.cache import Cache as Cache
4
+ from gllm_datastore.core.capabilities import EncryptionCapability as EncryptionCapability, FulltextCapability as FulltextCapability, GraphCapability as GraphCapability, HybridCapability as HybridCapability, SearchConfig as SearchConfig, VectorCapability as VectorCapability
5
+ from gllm_datastore.core.filters.schema import FilterClause as FilterClause, QueryFilter as QueryFilter
6
+ from gllm_datastore.data_store.exceptions import NotRegisteredException as NotRegisteredException, NotSupportedException as NotSupportedException
7
+ from gllm_datastore.encryptor.encryptor import BaseEncryptor as BaseEncryptor
8
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
9
+ from typing import Any, Self
10
+
11
+ class CapabilityType(StrEnum):
12
+ """Enumeration of supported capability types."""
13
+ FULLTEXT: str
14
+ GRAPH: str
15
+ HYBRID: str
16
+ VECTOR: str
17
+
18
+ class BaseDataStore(ABC):
19
+ """Base class for datastores with multiple capabilities.
20
+
21
+ This class provides the infrastructure for capability composition and
22
+ delegation. Datastores inherit from this class and register capability
23
+ handlers based on their configuration.
24
+ """
25
+ def __init__(self) -> None:
26
+ """Initialize the datastore with specified capabilities."""
27
+ @property
28
+ @abstractmethod
29
+ def supported_capabilities(self) -> list[CapabilityType]:
30
+ """Return list of currently supported capabilities.
31
+
32
+ A data store might have more capabilities than the ones that are currently registered.
33
+ Each data store should implement this method to return the list of supported capabilities.
34
+
35
+ Returns:
36
+ list[str]: List of capability names that are supported.
37
+
38
+ Raises:
39
+ NotImplementedError: If the method is not implemented by subclass.
40
+ """
41
+ @property
42
+ def registered_capabilities(self) -> list[CapabilityType]:
43
+ """Return list of currently registered capabilities.
44
+
45
+ Returns:
46
+ list[str]: List of capability names that are registered and available.
47
+ """
48
+ @property
49
+ def fulltext(self) -> FulltextCapability:
50
+ """Access fulltext capability if supported.
51
+
52
+ Returns:
53
+ FulltextCapability: Fulltext capability handler.
54
+
55
+ Raises:
56
+ NotSupportedException: If fulltext capability is not supported.
57
+ """
58
+ @property
59
+ def vector(self) -> VectorCapability:
60
+ """Access vector capability if supported.
61
+
62
+ Returns:
63
+ VectorCapability: Vector capability handler.
64
+
65
+ Raises:
66
+ NotSupportedException: If vector capability is not supported
67
+ """
68
+ @property
69
+ def graph(self) -> GraphCapability:
70
+ """Access graph capability if supported.
71
+
72
+ Returns:
73
+ GraphCapability: Graph capability handler.
74
+
75
+ Raises:
76
+ NotSupportedException: If graph capability is not supported.
77
+ """
78
+ @property
79
+ def hybrid(self) -> HybridCapability:
80
+ """Access hybrid capability if supported.
81
+
82
+ Returns:
83
+ HybridCapability: Hybrid capability handler.
84
+
85
+ Raises:
86
+ NotSupportedException: If hybrid capability is not supported.
87
+ NotRegisteredException: If hybrid capability is not registered.
88
+ """
89
+ def with_fulltext(self, **kwargs) -> Self:
90
+ """Configure fulltext capability and return datastore instance.
91
+
92
+ Args:
93
+ **kwargs: Fulltext capability configuration parameters.
94
+
95
+ Returns:
96
+ Self: Self for method chaining.
97
+ """
98
+ def with_vector(self, em_invoker: BaseEMInvoker, **kwargs) -> Self:
99
+ """Configure vector capability and return datastore instance.
100
+
101
+ Args:
102
+ em_invoker (BaseEMInvoker): Embedding model invoker (required).
103
+ **kwargs: Vector capability configuration parameters.
104
+
105
+ Returns:
106
+ Self: Self for method chaining.
107
+ """
108
+ def with_graph(self, **kwargs) -> Self:
109
+ """Configure graph capability and return datastore instance.
110
+
111
+ Args:
112
+ **kwargs: Graph capability configuration parameters.
113
+
114
+ Returns:
115
+ Self: Self for method chaining.
116
+ """
117
+ def with_encryption(self, encryptor: BaseEncryptor, fields: set[str] | list[str]) -> Self:
118
+ """Enable encryption for specified fields.
119
+
120
+ Encryption works transparently - users don't need to access it directly.
121
+ It's automatically used by fulltext and vector capabilities.
122
+
123
+ Args:
124
+ encryptor (BaseEncryptor): The encryptor instance to use. Must not be None.
125
+ fields (set[str] | list[str]): Set or list of field names to encrypt. Must not be empty.
126
+
127
+ Returns:
128
+ Self: Self for method chaining.
129
+
130
+ Raises:
131
+ ValueError: If encryptor is None or fields is empty.
132
+ """
133
+ def with_hybrid(self, config: list[SearchConfig], **kwargs) -> Self:
134
+ """Configure hybrid capability and return datastore instance.
135
+
136
+ Args:
137
+ config (list[SearchConfig]): List of search configurations for hybrid search.
138
+ **kwargs: Additional hybrid capability configuration parameters.
139
+
140
+ Returns:
141
+ Self: Self for method chaining.
142
+ """
143
+ def as_cache(self, eviction_manager: Any | None = None, matching_strategy: Any = None) -> Cache:
144
+ """Create a Cache instance from this datastore.
145
+
146
+ Args:
147
+ eviction_manager (Any | None, optional): Optional eviction manager for cache eviction.
148
+ Defaults to None.
149
+ matching_strategy (Any, optional): Default matching strategy for cache retrieval.
150
+ Defaults to None.
151
+
152
+ Returns:
153
+ Cache: Instance wrapping this datastore.
154
+
155
+ Raises:
156
+ ValueError: If required capabilities not registered.
157
+ """
158
+ @classmethod
159
+ def translate_query_filter(cls, query_filter: FilterClause | QueryFilter, **kwargs) -> Any:
160
+ """Translate QueryFilter or FilterClause to datastore's native filter syntax.
161
+
162
+ This method provides a public interface for converting the GLLM DataStore's
163
+ QueryFilter DSL into each datastore's native filter format. Subclasses must
164
+ implement this method to provide their specific translation logic.
165
+
166
+ Args:
167
+ query_filter (FilterClause | QueryFilter): The filter to translate.
168
+ Can be a single FilterClause or a QueryFilter with multiple clauses.
169
+ **kwargs: Additional keyword arguments for the datastore's native filter syntax.
170
+
171
+ Returns:
172
+ Any: The translated filter in the datastore's native format.
173
+
174
+ Raises:
175
+ NotImplementedError: If not implemented by subclass.
176
+ """