gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +7 -0
  28. gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
  29. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  30. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  31. gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
  32. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  33. gllm_datastore/core/filters/__init__.pyi +4 -0
  34. gllm_datastore/core/filters/filter.pyi +340 -0
  35. gllm_datastore/core/filters/schema.pyi +149 -0
  36. gllm_datastore/data_store/__init__.pyi +8 -0
  37. gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
  38. gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
  39. gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
  40. gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
  41. gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
  42. gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
  43. gllm_datastore/data_store/base.pyi +176 -0
  44. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  45. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  46. gllm_datastore/data_store/chroma/data_store.pyi +201 -0
  47. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  48. gllm_datastore/data_store/chroma/query.pyi +266 -0
  49. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  50. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  51. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  52. gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
  53. gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
  54. gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
  55. gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
  56. gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
  57. gllm_datastore/data_store/exceptions.pyi +35 -0
  58. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  59. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  60. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  61. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  62. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  63. gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
  64. gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
  65. gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
  66. gllm_datastore/data_store/opensearch/query.pyi +89 -0
  67. gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
  68. gllm_datastore/data_store/opensearch/vector.pyi +211 -0
  69. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  70. gllm_datastore/data_store/redis/data_store.pyi +153 -0
  71. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  72. gllm_datastore/data_store/redis/query.pyi +428 -0
  73. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  74. gllm_datastore/data_store/redis/vector.pyi +131 -0
  75. gllm_datastore/data_store/sql/__init__.pyi +4 -0
  76. gllm_datastore/data_store/sql/constants.pyi +5 -0
  77. gllm_datastore/data_store/sql/data_store.pyi +201 -0
  78. gllm_datastore/data_store/sql/fulltext.pyi +164 -0
  79. gllm_datastore/data_store/sql/query.pyi +81 -0
  80. gllm_datastore/data_store/sql/query_translator.pyi +51 -0
  81. gllm_datastore/data_store/sql/schema.pyi +16 -0
  82. gllm_datastore/encryptor/__init__.pyi +4 -0
  83. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  84. gllm_datastore/encryptor/capability/__init__.pyi +3 -0
  85. gllm_datastore/encryptor/capability/mixin.pyi +32 -0
  86. gllm_datastore/encryptor/encryptor.pyi +52 -0
  87. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  88. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  89. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  90. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  91. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  92. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  93. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  94. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  95. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  96. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  97. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  98. gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
  99. gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
  100. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  101. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  102. gllm_datastore/graph_data_store/schema.pyi +27 -0
  103. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  104. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  105. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  106. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  107. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  108. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  109. gllm_datastore/signature/__init__.pyi +0 -0
  110. gllm_datastore/signature/webhook_signature.pyi +31 -0
  111. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  112. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  113. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  114. gllm_datastore/sql_data_store/constants.pyi +6 -0
  115. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  116. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  117. gllm_datastore/sql_data_store/types.pyi +31 -0
  118. gllm_datastore/utils/__init__.pyi +6 -0
  119. gllm_datastore/utils/converter.pyi +51 -0
  120. gllm_datastore/utils/dict.pyi +21 -0
  121. gllm_datastore/utils/ttl.pyi +25 -0
  122. gllm_datastore/utils/types.pyi +32 -0
  123. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  124. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  125. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  126. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  127. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  128. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  129. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  130. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  131. gllm_datastore.build/.gitignore +1 -0
  132. gllm_datastore.cpython-312-darwin.so +0 -0
  133. gllm_datastore.pyi +178 -0
  134. gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
  135. gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
  136. gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
  137. gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
@@ -0,0 +1,145 @@
1
+ from abc import ABC, abstractmethod
2
+ from datetime import datetime
3
+ from gllm_datastore.cache.cache import MatchingStrategy as MatchingStrategy
4
+ from gllm_datastore.cache.vector_cache.eviction_manager.eviction_manager import BaseEvictionManager as BaseEvictionManager
5
+ from gllm_datastore.cache.vector_cache.vector_cache import VectorCache as VectorCache
6
+ from gllm_datastore.constants import METADATA_KEYS as METADATA_KEYS
7
+ from typing import Any
8
+
9
+ class CacheCompatibleMixin(ABC):
10
+ """Mixin that provides cache-specific matching operations for vector datastores.
11
+
12
+ This mixin adds methods for exact, fuzzy, and semantic matching that are
13
+ required by the VectorCache implementation, without forcing all vector datastores
14
+ to implement these methods.
15
+ """
16
+ async def store_cache(self, key: str, value: Any, metadata: dict[str, Any] | None = None) -> None:
17
+ """Public method to store cache data in the storage.
18
+
19
+ Args:
20
+ key (str): The key to store the cache data.
21
+ value (Any): The cache data to store.
22
+ metadata (dict[str, Any] | None, optional): Additional metadata to store with the cache data.
23
+ Defaults to None.
24
+ """
25
+ @abstractmethod
26
+ async def exact_match(self, key: str, **kwargs) -> Any | None:
27
+ """Find chunks that exactly match the given key.
28
+
29
+ This method should be implemented by subclasses.
30
+
31
+ Args:
32
+ key (str): The key to match.
33
+ **kwargs (Any): Additional parameters for the matching operation.
34
+
35
+ Returns:
36
+ Any: Chunks that exactly match the key.
37
+
38
+ Raises:
39
+ NotImplementedError: If the method is not implemented.
40
+ """
41
+ @abstractmethod
42
+ async def fuzzy_match(self, key: str, max_distance: int = 2, **kwargs) -> Any | None:
43
+ """Find chunks that approximately match the given key using fuzzy matching.
44
+
45
+ This method should be implemented by subclasses.
46
+
47
+ Args:
48
+ key (str): The key to match.
49
+ max_distance (int): Maximum distance for fuzzy matching. Lower values are more strict.
50
+ This is the maximum Levenshtein distance allowed for a match. Defaults to 2.
51
+ **kwargs (Any): Additional parameters for the matching operation.
52
+
53
+ Returns:
54
+ Any: Chunks that fuzzy match the key within the threshold.
55
+
56
+ Raises:
57
+ NotImplementedError: If the method is not implemented.
58
+ """
59
+ @abstractmethod
60
+ async def semantic_match(self, key: str, min_similarity: float = 0.8, metadata: dict[str, Any] | None = None, **kwargs) -> Any | None:
61
+ """Find chunks that semantically match the given key using vector similarity.
62
+
63
+ This method should be implemented by subclasses.
64
+
65
+ Args:
66
+ key (str): The key to match.
67
+ min_similarity (float): Minimum similarity score for semantic matching
68
+ (higher values are more strict). Ranges from 0 to 1. Defaults to 0.8.
69
+ metadata (dict[str, Any] | None, optional): Optional metadata filter to apply to the search.
70
+ Defaults to None.
71
+ **kwargs (Any): Additional parameters for the matching operation.
72
+
73
+ Returns:
74
+ Any: Chunks that semantically match the key above the threshold.
75
+
76
+ Raises:
77
+ NotImplementedError: If the method is not implemented.
78
+ """
79
+ @abstractmethod
80
+ async def delete_expired_entries(self, now: datetime, max_size: int = 10000) -> None:
81
+ """Delete expired entries (for TTL eviction).
82
+
83
+ This method should be implemented by subclasses.
84
+
85
+ Args:
86
+ now (datetime): The current datetime for comparison.
87
+ max_size (int): The maximum number of entries to return. Defaults to 10000.
88
+
89
+ Raises:
90
+ NotImplementedError: If the method is not implemented.
91
+ """
92
+ @abstractmethod
93
+ async def delete_least_frequently_used_entries(self, num_entries: int) -> None:
94
+ """Delete least frequently used entries (for LFU eviction).
95
+
96
+ This method should be implemented by subclasses.
97
+
98
+ Args:
99
+ num_entries (int): Number of entries to return.
100
+
101
+ Raises:
102
+ NotImplementedError: If the method is not implemented.
103
+ """
104
+ @abstractmethod
105
+ async def delete_least_recently_used_entries(self, num_entries: int) -> None:
106
+ """Delete least recently used entries (for LRU eviction).
107
+
108
+ This method should be implemented by subclasses.
109
+
110
+ Args:
111
+ num_entries (int): Number of entries to return.
112
+
113
+ Raises:
114
+ NotImplementedError: If the method is not implemented.
115
+ """
116
+ @abstractmethod
117
+ async def delete_entries_by_key(self, key: str | list[str], metadata: dict[str, Any] | None = None) -> None:
118
+ '''Delete entries by key.
119
+
120
+ This method should be implemented by subclasses.
121
+
122
+ Args:
123
+ key (str): The key to delete entries for.
124
+ metadata (dict[str, Any] | None, optional): Optional metadata filter to apply to the search.
125
+ For example, `{"key": "value"}`. Defaults to None.
126
+
127
+ Raises:
128
+ NotImplementedError: If the method is not implemented.
129
+ '''
130
+ def as_cache(self, eviction_manager: BaseEvictionManager | None = None, matching_strategy: MatchingStrategy = 'exact', matching_config: dict[str, Any] | None = None, saving_config: dict[str, Any] | None = None) -> VectorCache:
131
+ """Return a cache instance that can be used to store and retrieve data.
132
+
133
+ Args:
134
+ eviction_manager (Optional[BaseEvictionManager], optional): The eviction manager to use for cache eviction.
135
+ Defaults to None. If None, no eviction will be performed.
136
+ matching_strategy (MatchingStrategy, optional): The strategy to use for matching keys.
137
+ Defaults to MatchingStrategy.EXACT.
138
+ matching_config (dict[str, Any] | None, optional): Configuration parameters for matching strategies.
139
+ Defaults to None, which means no specific configuration is provided.
140
+ saving_config (dict[str, Any] | None, optional): Configuration parameters for saving strategies.
141
+ Defaults to None, which means no specific configuration is provided.
142
+
143
+ Returns:
144
+ VectorCache: A cache instance that can be used to store and retrieve data.
145
+ """
@@ -0,0 +1,191 @@
1
+ from _typeshed import Incomplete
2
+ from datetime import datetime
3
+ from gllm_core.schema.chunk import Chunk
4
+ from gllm_datastore.constants import DEFAULT_TOP_K as DEFAULT_TOP_K, METADATA_KEYS as METADATA_KEYS
5
+ from gllm_datastore.utils.converter import cosine_distance_to_similarity_score as cosine_distance_to_similarity_score, similarity_score_to_cosine_distance as similarity_score_to_cosine_distance
6
+ from gllm_datastore.vector_data_store.mixin.cache_compatible_mixin import CacheCompatibleMixin as CacheCompatibleMixin
7
+ from gllm_datastore.vector_data_store.vector_data_store import BaseVectorDataStore as BaseVectorDataStore
8
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
9
+ from redis import Redis as Redis
10
+ from redisvl.query.filter import FilterExpression as FilterExpression
11
+ from typing import Any
12
+
13
+ FUZZY_MATCH_MAX_DISTANCE: int
14
+
15
+ class RedisVectorDataStore(BaseVectorDataStore, CacheCompatibleMixin):
16
+ """Vector data store implementation that uses Redis with RedisVL for vector search.
17
+
18
+ This class provides methods to interact with Redis for vector storage and retrieval
19
+ using Redis Vector Search capabilities via RedisVL and langchain-redis.
20
+
21
+ Attributes:
22
+ redis_url (str): URL for Redis connection.
23
+ index_name (str): Name for the vector index.
24
+ search_index (SearchIndex): RedisVL SearchIndex instance.
25
+ cache_store (SemanticCache): RedisVL SemanticCache instance.
26
+ embedding (BaseEMInvoker | None): The embedding model to perform vectorization.
27
+ """
28
+ index_name: Incomplete
29
+ url: Incomplete
30
+ client: Incomplete
31
+ filterable_fields: Incomplete
32
+ cache_store: Incomplete
33
+ def __init__(self, index_name: str, url: str | None = None, client: Redis | None = None, embedding: BaseEMInvoker | None = None, additional_filter_fields: list[dict[str, Any]] | None = None) -> None:
34
+ '''Initialize Redis vector store using RedisVL and langchain-redis.
35
+
36
+ Args:
37
+ index_name (str): Name of the index to use.
38
+ url (str): URL for Redis connection.
39
+ client (Redis | None, optional): Redis client to use for vectorization.
40
+ embedding (BaseEMInvoker | None, optional): Embedding function to use for vectorization.
41
+ Defaults to None. If None, the default embedding model (redis/langcache-embed-v1) will be used.
42
+ additional_filter_fields (list[dict[str, Any]] | None, optional): Additional filterable fields to add
43
+ to the index. For example, to add `entry_id` as a filterable field, pass
44
+ `[{"name": "entry_id", "type": "text"}]`. Defaults to None.
45
+
46
+ Notes:
47
+ Besides the `additional_filter_fields`, the class will automatically create default filterable fields:
48
+ 1. prompt: TEXT (default from redisvl).
49
+ 2. response: TEXT (default from redisvl).
50
+ 3. prompt_vector: VECTOR (default from redisvl).
51
+ 4. chunk_id: TEXT (default additional_filter_fields).
52
+
53
+ Raises:
54
+ TypeError: If `embedding` is not an instance of `BaseEMInvoker`.
55
+ '''
56
+ async def get_size(self) -> int:
57
+ """Returns the total number of vectors in the index.
58
+
59
+ If the index is not initialized returns 0.
60
+
61
+ Returns:
62
+ int: The total number of vectors.
63
+ """
64
+ async def query(self, query: str, top_k: int = ..., retrieval_params: dict[str, Any] | None = None) -> list[Chunk]:
65
+ """Search for semantically similar documents which returns similarity scores.
66
+
67
+ Args:
68
+ query (str): The query text to search for.
69
+ top_k (int): Number of top results to return.
70
+ retrieval_params (dict[str, Any] | None, optional): Additional parameters for the query such as:
71
+ - filter: Redis filter expression to narrow results following RedisVL FilterExpression.
72
+
73
+ Returns:
74
+ list[Chunk]: List of chunks semantically similar to the query
75
+ """
76
+ async def query_by_id(self, id_: str | list[str]) -> list[Chunk]:
77
+ """Retrieve chunks by their IDs.
78
+
79
+ Args:
80
+ id_ (str | list[str]): A single ID or list of chunk IDs to retrieve
81
+
82
+ Returns:
83
+ list[Chunk]: List of retrieved chunks
84
+ """
85
+ async def add_chunks(self, chunks: Chunk | list[Chunk], **kwargs) -> list[str]:
86
+ """Add chunks to the vector store.
87
+
88
+ Args:
89
+ chunks (Chunk | list[Chunk]): A single chunk or a list of chunks to add
90
+ **kwargs: Additional parameters for adding chunks
91
+
92
+ Returns:
93
+ list[str]: List of IDs of the added chunks
94
+ """
95
+ async def delete_chunks(self, query: str, **kwargs: Any) -> None:
96
+ '''Delete chunks from the vector store by filter/query. Not supported for Redis backend.
97
+
98
+ Args:
99
+ query (str): The query to delete chunks by. For example, "user_*" would match keys
100
+ like "user_1", "user_2", etc.
101
+ **kwargs: Additional keyword arguments.
102
+ '''
103
+ async def delete_chunks_by_ids(self, ids: str | list[str], **kwargs: Any) -> None:
104
+ """Delete chunks from the vector store by their IDs.
105
+
106
+ Args:
107
+ ids (str | list[str]): A single ID or a list of IDs to delete.
108
+ **kwargs: Additional keyword arguments.
109
+ """
110
+ async def exact_match(self, key: str, metadata: dict[str, Any] | None = None) -> Any | None:
111
+ '''Find chunks that exactly match the given prompt.
112
+
113
+ Args:
114
+ key (str): The prompt to match.
115
+ metadata (dict[str, Any] | None, optional): Optional metadata filter to apply to the search.
116
+ For example, `{"key": "value"}`. Defaults to None.
117
+
118
+ Returns:
119
+ Any: The value stored with the matching prompt, or None if no match is found.
120
+ '''
121
+ async def fuzzy_match(self, key: str, max_distance: int = 2, metadata: dict[str, Any] | None = None) -> Any | None:
122
+ '''Find chunks that approximately match the given key using fuzzy matching.
123
+
124
+ Args:
125
+ key (str): The key to match
126
+ max_distance (int): Maximum allowed distance for fuzzy matching
127
+ (higher values allow for more differences). Maximum is 3. Defaults to 2.
128
+ metadata (dict[str, Any] | None, optional): Optional metadata filter to apply to the search.
129
+ For example, `{"key": "value"}`. Defaults to None.
130
+
131
+ Note:
132
+ Maximum fuzzy distance is 3. This is a limitation of the Redis Vector Search and the Redis Search module.
133
+ See [5] for more details.
134
+
135
+ Returns:
136
+ Any: The value with the closest fuzzy match, or None if no match is found
137
+ '''
138
+ async def semantic_match(self, key: str, min_similarity: float = 0.8, metadata: dict[str, Any] | None = None) -> Any | None:
139
+ '''Find chunks that semantically match the given key using vector similarity.
140
+
141
+ This method compares the vector embedding of the search key with vector embeddings
142
+ of stored keys to find semantically similar matches.
143
+
144
+ Args:
145
+ key (str): The key to match
146
+ min_similarity (float, optional): Minimum similarity score for semantic matching
147
+ (higher values are more strict). Ranges from 0 to 1. Defaults to 0.8.
148
+ metadata (dict[str, Any] | None, optional): Optional metadata filter to apply to the search.
149
+ For example, `{"key": "value"}`. Defaults to None.
150
+
151
+ Returns:
152
+ Any: The semantically closest value, or None if no match meets the threshold
153
+ '''
154
+ async def delete_expired_entries(self, now: datetime, max_size: int = 10000) -> None:
155
+ """Delete expired entries (for TTL eviction).
156
+
157
+ Args:
158
+ now (datetime): The current datetime for comparison.
159
+ max_size (int): The maximum number of entries to return. Defaults to 10000.
160
+
161
+ Raises:
162
+ NotImplementedError: Currently, app-level eviction is not supported for RedisVectorDataStore.
163
+ """
164
+ async def delete_least_frequently_used_entries(self, num_entries: int) -> None:
165
+ """Delete least frequently used entries (for LFU eviction).
166
+
167
+ Args:
168
+ num_entries (int): Number of entries to return.
169
+
170
+ Raises:
171
+ NotImplementedError: Currently, app-level eviction is not supported for RedisVectorDataStore.
172
+ """
173
+ async def delete_least_recently_used_entries(self, num_entries: int) -> None:
174
+ """Delete least recently used entries (for LRU eviction).
175
+
176
+ Args:
177
+ num_entries (int): Number of entries to return.
178
+
179
+ Raises:
180
+ NotImplementedError: Currently, app-level eviction is not supported for RedisVectorDataStore.
181
+ """
182
+ async def delete_entries_by_key(self, key: str | list[str], metadata: dict[str, Any] | None = None) -> None:
183
+ '''Delete entries by key.
184
+
185
+ Args:
186
+ key (str | list[str]): The key or list of keys to delete entries for.
187
+ metadata (dict[str, Any] | None, optional): Optional metadata filter to apply to the search.
188
+ For example, `{"key": "value"}`. Defaults to None.
189
+ '''
190
+ async def clear(self) -> None:
191
+ """Clear all entries in the storage."""
@@ -0,0 +1,146 @@
1
+ from abc import ABC, abstractmethod
2
+ from gllm_core.schema.chunk import Chunk
3
+ from gllm_datastore.constants import DEFAULT_TOP_K as DEFAULT_TOP_K
4
+ from gllm_inference.em_invoker.em_invoker import BaseEMInvoker
5
+ from langchain_core.embeddings import Embeddings
6
+ from typing import Any
7
+
8
+ class BaseVectorDataStore(ABC):
9
+ """Abstract base class for vector data stores in the retrieval system.
10
+
11
+ This class defines the interface for all vector data store implementations.
12
+ Subclasses must implement the `query` and `query_by_id` methods.
13
+ """
14
+ @property
15
+ def embedding(self) -> BaseEMInvoker | Embeddings | None:
16
+ """Returns the embedding model associated with this data store.
17
+
18
+ Returns:
19
+ BaseEMInvoker | Embeddings | None: The embedding model.
20
+ """
21
+ async def get_size(self) -> int:
22
+ """Returns the total number of vectors in the index.
23
+
24
+ If the index is not initialized returns 0.
25
+
26
+ Returns:
27
+ int: The total number of vectors.
28
+ """
29
+ @abstractmethod
30
+ async def query(self, query: str, top_k: int = ..., retrieval_params: dict[str, Any] | None = None) -> list[Chunk]:
31
+ """Executes a query on the data store.
32
+
33
+ This method must be implemented by subclasses.
34
+
35
+ Args:
36
+ query (str): The query string to execute.
37
+ top_k (int, optional): The maximum number of results to return. Defaults to DEFAULT_TOP_K.
38
+ retrieval_params (dict[str, Any] | None, optional): Additional parameters for the query.
39
+ Defaults to None.
40
+
41
+ Returns:
42
+ list[Chunk]: A list of query results.
43
+
44
+ Raises:
45
+ NotImplementedError: If the method is not implemented.
46
+ """
47
+ @abstractmethod
48
+ async def query_by_id(self, id_: str | list[str]) -> list[Chunk]:
49
+ """Retrieves chunks by their IDs.
50
+
51
+ This method must be implemented by subclasses.
52
+
53
+ Args:
54
+ id_ (str | list[str]): A single ID or a list of IDs to retrieve.
55
+
56
+ Returns:
57
+ list[Chunk]: A list of retrieved chunks.
58
+
59
+ Raises:
60
+ NotImplementedError: If the method is not implemented.
61
+ """
62
+ @abstractmethod
63
+ async def add_chunks(self, chunk: Chunk | list[Chunk], **kwargs) -> list[str]:
64
+ """Adds a chunk or a list of chunks in the data store.
65
+
66
+ This method must be implemented by subclasses.
67
+
68
+ Args:
69
+ chunk (Chunk | list[Chunk]): A single chunk or a list of chunks to index.
70
+ **kwargs: Additional keyword arguments to pass to the method.
71
+
72
+ Returns:
73
+ list[str]: A list of unique identifiers (IDs) assigned to the added chunks.
74
+
75
+ Raises:
76
+ NotImplementedError: If the method is not implemented.
77
+ """
78
+ @abstractmethod
79
+ async def delete_chunks(self, **kwargs: Any) -> None:
80
+ """Deletes chunks from the data store by filter or query.
81
+
82
+ This method must be implemented by subclasses.
83
+
84
+ Args:
85
+ **kwargs: Additional keyword arguments specifying the filter or query for deletion.
86
+ The exact parameters depend on the backend implementation.
87
+
88
+ Raises:
89
+ NotImplementedError: If the method is not implemented.
90
+ """
91
+ @abstractmethod
92
+ async def delete_chunks_by_ids(self, ids: str | list[str], **kwargs: Any) -> None:
93
+ """Deletes a chunk or a list of chunks from the data store by their IDs.
94
+
95
+ This method must be implemented by subclasses.
96
+
97
+ Args:
98
+ ids (str | list[str]): A single ID or a list of IDs to delete.
99
+ **kwargs: Additional keyword arguments.
100
+
101
+ Raises:
102
+ NotImplementedError: If the method is not implemented.
103
+ """
104
+ async def clear(self) -> None:
105
+ """Clear all entries in the storage.
106
+
107
+ This method should be implemented by subclasses.
108
+ """
109
+ async def query_by_field(self, retrieval_params: dict[str, Any], limit: int | None = None, **kwargs) -> list[Chunk]:
110
+ """Retrieve documents that match specific metadata constraints.
111
+
112
+ This method filters and returns stored chunks based on metadata values
113
+ rather than vector similarity. It is particularly useful for structured lookups,
114
+ such as retrieving all chunks from a certain source, tagged with a specific label,
115
+ or authored by a particular user.
116
+
117
+ Unlike semantic search methods, `query_by_field` operates purely on metadata fields
118
+ associated with each document, allowing precise filtering based on key-value pairs.
119
+
120
+ Args:
121
+ retrieval_params (dict[str, Any]): A dictionary defining filter criteria.
122
+ limit (int | None, optional): The maximum number of results to return. If None, all matching
123
+ documents will be returned.
124
+ **kwargs: Additional arguments to support datastore-specific behavior or filtering logic.
125
+
126
+ Returns:
127
+ list[Chunk]: A list of `Chunk` objects that satisfy the metadata criteria.
128
+
129
+ Raises:
130
+ NotImplementedError: If not implemented in the subclass.
131
+ """
132
+ async def query_by_vector(self, vector: list[float], top_k: int = ..., min_similarity: float = 0.8, retrieval_params: dict | None = None) -> list[Chunk]:
133
+ """Search for documents that are similar to a given vector.
134
+
135
+ Args:
136
+ vector (list[float]): The query embedding vector to compare against stored vectors.
137
+ top_k (int, optional): The number of top results to return. Defaults to DEFAULT_TOP_K.
138
+ min_similarity (float): Minimum similarity score for vector similarity.
139
+ retrieval_params (dict | None, optional): Filter parameters to narrow the search:
140
+ - filter (Where): Metadata-based filter.
141
+ - where_document (WhereDocument): Content-based filter.
142
+ Defaults to None.
143
+
144
+ Returns:
145
+ list[Chunk]: A list of Chunk objects with similarity scores based on the input vector.
146
+ """
@@ -0,0 +1 @@
1
+ *
Binary file
gllm_datastore.pyi ADDED
@@ -0,0 +1,178 @@
1
+ # This file was generated by Nuitka
2
+
3
+ # Stubs included by default
4
+
5
+
6
+ __name__ = ...
7
+
8
+
9
+
10
+ # Modules used internally, to allow implicit dependencies to be seen:
11
+ import os
12
+ import abc
13
+ import enum
14
+ import typing
15
+ import asyncio
16
+ import functools
17
+ import json
18
+ import collections
19
+ import collections.OrderedDict
20
+ import datetime
21
+ import gllm_core
22
+ import gllm_core.schema
23
+ import gllm_core.schema.chunk
24
+ import gllm_core.utils
25
+ import gllm_datastore.core.filters.FilterClause
26
+ import gllm_datastore.core.filters.QueryFilter
27
+ import gllm_datastore.core.filters.QueryOptions
28
+ import gzip
29
+ import pickle
30
+ import shutil
31
+ import time
32
+ import gllm_core.utils.logger_manager
33
+ import posixpath
34
+ import gllm_datastore.cache.hybrid_cache.key_matcher.ExactKeyMatcher
35
+ import gllm_datastore.utils.convert_ttl_to_seconds
36
+ import gllm_core.utils.imports
37
+ import Levenshtein
38
+ import gllm_datastore.vector_data_store.ElasticsearchVectorDataStore
39
+ import redis
40
+ import hashlib
41
+ import gllm_datastore.core.filters.FilterCondition
42
+ import gllm_datastore.core.filters.FilterOperator
43
+ import __future__
44
+ import gllm_inference
45
+ import gllm_inference.em_invoker
46
+ import gllm_inference.em_invoker.em_invoker
47
+ import gllm_inference.schema
48
+ import pydantic
49
+ import gllm_datastore.data_store.chroma.ChromaDataStore
50
+ import gllm_datastore.data_store.elasticsearch.ElasticsearchDataStore
51
+ import gllm_datastore.data_store.in_memory.InMemoryDataStore
52
+ import gllm_datastore.data_store.opensearch.OpenSearchDataStore
53
+ import gllm_datastore.data_store.redis.RedisDataStore
54
+ import elasticsearch
55
+ import opensearchpy
56
+ import gllm_datastore.core.capabilities.EncryptionCapability
57
+ import gllm_datastore.core.capabilities.FulltextCapability
58
+ import gllm_datastore.core.capabilities.GraphCapability
59
+ import gllm_datastore.core.capabilities.HybridCapability
60
+ import gllm_datastore.core.capabilities.SearchConfig
61
+ import gllm_datastore.core.capabilities.VectorCapability
62
+ import gllm_datastore.cache.Cache
63
+ import gllm_datastore.cache.MatchingStrategy
64
+ import chromadb
65
+ import sys
66
+ import pysqlite3
67
+ import rapidfuzz
68
+ import rapidfuzz.distance
69
+ import logging
70
+ import re
71
+ import dataclasses
72
+ import gllm_inference.em_invoker.langchain
73
+ import langchain_core
74
+ import langchain_core.runnables
75
+ import langchain_chroma
76
+ import elasticsearch.dsl
77
+ import elasticsearch.dsl.query
78
+ import gllm_core.utils.retry
79
+ import gllm_datastore.utils.flatten_dict
80
+ import langchain_elasticsearch
81
+ import langchain_elasticsearch.vectorstores
82
+ import collections.abc
83
+ import gllm_core.utils.similarity
84
+ import opensearchpy._async
85
+ import opensearchpy._async.helpers
86
+ import opensearchpy._async.helpers.search
87
+ import opensearchpy.helpers
88
+ import opensearchpy.helpers.query
89
+ import opensearchpy._async.helpers.update_by_query
90
+ import opensearchpy.exceptions
91
+ import opensearchpy.helpers.update_by_query
92
+ import gllm_core.utils.concurrency
93
+ import langchain_community
94
+ import langchain_community.vectorstores
95
+ import redis.asyncio
96
+ import redis.asyncio.client
97
+ import redis.exceptions
98
+ import redis.commands
99
+ import redis.commands.search
100
+ import redis.commands.search.query
101
+ import redis.commands.search.field
102
+ import redis.commands.search.indexDefinition
103
+ import numpy
104
+ import redisvl
105
+ import redisvl.redis
106
+ import redisvl.redis.utils
107
+ import redisvl.index
108
+ import redisvl.query
109
+ import redisvl.schema
110
+ import sqlalchemy
111
+ import sqlalchemy.ext
112
+ import sqlalchemy.ext.asyncio
113
+ import sqlalchemy.exc
114
+ import sqlalchemy.orm
115
+ import sqlalchemy.engine
116
+ import sqlalchemy.sql
117
+ import sqlalchemy.sql.expression
118
+ import base64
119
+ import cryptography
120
+ import cryptography.hazmat
121
+ import cryptography.hazmat.primitives
122
+ import cryptography.hazmat.primitives.ciphers
123
+ import cryptography.hazmat.primitives.ciphers.aead
124
+ import threading
125
+ import lightrag
126
+ import tempfile
127
+ import contextlib
128
+ import gllm_inference.lm_invoker
129
+ import gllm_inference.lm_invoker.lm_invoker
130
+ import lightrag.kg
131
+ import lightrag.kg.shared_storage
132
+ import llama_index
133
+ import llama_index.core
134
+ import llama_index.core.base
135
+ import llama_index.core.base.embeddings
136
+ import llama_index.core.base.embeddings.base
137
+ import llama_index.core.graph_stores
138
+ import llama_index.core.graph_stores.types
139
+ import llama_index.core.llms
140
+ import gllm_datastore.graph_data_store.utils.LlamaIndexEMInvokerAdapter
141
+ import gllm_datastore.graph_data_store.utils.LlamaIndexLMInvokerAdapter
142
+ import llama_index.graph_stores
143
+ import llama_index.graph_stores.neo4j
144
+ import nebula3
145
+ import nebula3.Config
146
+ import nebula3.data
147
+ import nebula3.data.DataObject
148
+ import nebula3.gclient
149
+ import nebula3.gclient.net
150
+ import textwrap
151
+ import neo4j
152
+ import neo4j.exceptions
153
+ import gllm_core.schema.graph
154
+ import lightrag.base
155
+ import nest_asyncio
156
+ import llama_index.core.bridge
157
+ import llama_index.core.bridge.pydantic
158
+ import llama_index.core.constants
159
+ import llama_index.core.base.llms
160
+ import llama_index.core.base.llms.types
161
+ import hmac
162
+ import pandas
163
+ import concurrent
164
+ import concurrent.futures
165
+ import concurrent.futures.Future
166
+ import concurrent.futures.ThreadPoolExecutor
167
+ import uuid
168
+ import langchain_core.documents
169
+ import langchain_core.embeddings
170
+ import langchain_core.runnables.config
171
+ import chromadb.types
172
+ import inspect
173
+ import redisvl.extensions
174
+ import redisvl.extensions.cache
175
+ import redisvl.extensions.cache.llm
176
+ import redisvl.utils
177
+ import redisvl.utils.vectorize
178
+ import redisvl.query.filter