gllm-datastore-binary 0.5.50__cp312-cp312-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. gllm_datastore/__init__.pyi +0 -0
  2. gllm_datastore/cache/__init__.pyi +4 -0
  3. gllm_datastore/cache/base.pyi +84 -0
  4. gllm_datastore/cache/cache.pyi +137 -0
  5. gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
  6. gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
  7. gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
  8. gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
  9. gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
  10. gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
  11. gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
  12. gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
  13. gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
  14. gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
  15. gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
  16. gllm_datastore/cache/utils.pyi +34 -0
  17. gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
  18. gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
  19. gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
  20. gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
  21. gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
  22. gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
  23. gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
  24. gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
  25. gllm_datastore/constants.pyi +66 -0
  26. gllm_datastore/core/__init__.pyi +7 -0
  27. gllm_datastore/core/capabilities/__init__.pyi +7 -0
  28. gllm_datastore/core/capabilities/encryption_capability.pyi +21 -0
  29. gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
  30. gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
  31. gllm_datastore/core/capabilities/hybrid_capability.pyi +184 -0
  32. gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
  33. gllm_datastore/core/filters/__init__.pyi +4 -0
  34. gllm_datastore/core/filters/filter.pyi +340 -0
  35. gllm_datastore/core/filters/schema.pyi +149 -0
  36. gllm_datastore/data_store/__init__.pyi +8 -0
  37. gllm_datastore/data_store/_elastic_core/__init__.pyi +0 -0
  38. gllm_datastore/data_store/_elastic_core/client_factory.pyi +66 -0
  39. gllm_datastore/data_store/_elastic_core/constants.pyi +27 -0
  40. gllm_datastore/data_store/_elastic_core/elastic_like_core.pyi +115 -0
  41. gllm_datastore/data_store/_elastic_core/index_manager.pyi +37 -0
  42. gllm_datastore/data_store/_elastic_core/query_translator.pyi +89 -0
  43. gllm_datastore/data_store/base.pyi +176 -0
  44. gllm_datastore/data_store/chroma/__init__.pyi +4 -0
  45. gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
  46. gllm_datastore/data_store/chroma/data_store.pyi +201 -0
  47. gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
  48. gllm_datastore/data_store/chroma/query.pyi +266 -0
  49. gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
  50. gllm_datastore/data_store/chroma/vector.pyi +197 -0
  51. gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
  52. gllm_datastore/data_store/elasticsearch/data_store.pyi +147 -0
  53. gllm_datastore/data_store/elasticsearch/fulltext.pyi +238 -0
  54. gllm_datastore/data_store/elasticsearch/query.pyi +118 -0
  55. gllm_datastore/data_store/elasticsearch/query_translator.pyi +18 -0
  56. gllm_datastore/data_store/elasticsearch/vector.pyi +180 -0
  57. gllm_datastore/data_store/exceptions.pyi +35 -0
  58. gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
  59. gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
  60. gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
  61. gllm_datastore/data_store/in_memory/query.pyi +175 -0
  62. gllm_datastore/data_store/in_memory/vector.pyi +174 -0
  63. gllm_datastore/data_store/opensearch/__init__.pyi +5 -0
  64. gllm_datastore/data_store/opensearch/data_store.pyi +160 -0
  65. gllm_datastore/data_store/opensearch/fulltext.pyi +240 -0
  66. gllm_datastore/data_store/opensearch/query.pyi +89 -0
  67. gllm_datastore/data_store/opensearch/query_translator.pyi +18 -0
  68. gllm_datastore/data_store/opensearch/vector.pyi +211 -0
  69. gllm_datastore/data_store/redis/__init__.pyi +5 -0
  70. gllm_datastore/data_store/redis/data_store.pyi +153 -0
  71. gllm_datastore/data_store/redis/fulltext.pyi +128 -0
  72. gllm_datastore/data_store/redis/query.pyi +428 -0
  73. gllm_datastore/data_store/redis/query_translator.pyi +37 -0
  74. gllm_datastore/data_store/redis/vector.pyi +131 -0
  75. gllm_datastore/data_store/sql/__init__.pyi +4 -0
  76. gllm_datastore/data_store/sql/constants.pyi +5 -0
  77. gllm_datastore/data_store/sql/data_store.pyi +201 -0
  78. gllm_datastore/data_store/sql/fulltext.pyi +164 -0
  79. gllm_datastore/data_store/sql/query.pyi +81 -0
  80. gllm_datastore/data_store/sql/query_translator.pyi +51 -0
  81. gllm_datastore/data_store/sql/schema.pyi +16 -0
  82. gllm_datastore/encryptor/__init__.pyi +4 -0
  83. gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
  84. gllm_datastore/encryptor/capability/__init__.pyi +3 -0
  85. gllm_datastore/encryptor/capability/mixin.pyi +32 -0
  86. gllm_datastore/encryptor/encryptor.pyi +52 -0
  87. gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
  88. gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
  89. gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
  90. gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
  91. gllm_datastore/graph_data_store/__init__.pyi +6 -0
  92. gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
  93. gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
  94. gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
  95. gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
  96. gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
  97. gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
  98. gllm_datastore/graph_data_store/mixins/__init__.pyi +3 -0
  99. gllm_datastore/graph_data_store/mixins/agentic_graph_tools_mixin.pyi +175 -0
  100. gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
  101. gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
  102. gllm_datastore/graph_data_store/schema.pyi +27 -0
  103. gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
  104. gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
  105. gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
  106. gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
  107. gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
  108. gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
  109. gllm_datastore/signature/__init__.pyi +0 -0
  110. gllm_datastore/signature/webhook_signature.pyi +31 -0
  111. gllm_datastore/sql_data_store/__init__.pyi +4 -0
  112. gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
  113. gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
  114. gllm_datastore/sql_data_store/constants.pyi +6 -0
  115. gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
  116. gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
  117. gllm_datastore/sql_data_store/types.pyi +31 -0
  118. gllm_datastore/utils/__init__.pyi +6 -0
  119. gllm_datastore/utils/converter.pyi +51 -0
  120. gllm_datastore/utils/dict.pyi +21 -0
  121. gllm_datastore/utils/ttl.pyi +25 -0
  122. gllm_datastore/utils/types.pyi +32 -0
  123. gllm_datastore/vector_data_store/__init__.pyi +6 -0
  124. gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
  125. gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
  126. gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
  127. gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
  128. gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
  129. gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
  130. gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
  131. gllm_datastore.build/.gitignore +1 -0
  132. gllm_datastore.cpython-312-darwin.so +0 -0
  133. gllm_datastore.pyi +178 -0
  134. gllm_datastore_binary-0.5.50.dist-info/METADATA +185 -0
  135. gllm_datastore_binary-0.5.50.dist-info/RECORD +137 -0
  136. gllm_datastore_binary-0.5.50.dist-info/WHEEL +5 -0
  137. gllm_datastore_binary-0.5.50.dist-info/top_level.txt +1 -0
@@ -0,0 +1,93 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_datastore.cache.hybrid_cache.key_matcher.key_matcher import BaseKeyMatcher as BaseKeyMatcher
3
+ from gllm_datastore.vector_data_store import ElasticsearchVectorDataStore as ElasticsearchVectorDataStore
4
+ from gllm_datastore.vector_data_store.vector_data_store import BaseVectorDataStore as BaseVectorDataStore
5
+
6
+ class SemanticKeyMatcher(BaseKeyMatcher):
7
+ """A key matcher that performs semantic matching strategy.
8
+
9
+ This implementation uses semantic matching to find the closest match between the input key
10
+ and the cached keys. The similarity is calculated using a vector data store instance.
11
+
12
+ Attributes:
13
+ vector_data_store (BaseVectorDataStore): The vector data store to be used for semantic matching.
14
+ max_distance_ratio (float): The ratio of key length to use as maximum Levenshtein distance
15
+ for a key to match with the cached keys (e.g., 0.05 means 5% of key length).
16
+
17
+ Note:
18
+ Since the semantic matching heavily depends on the semantic similarity between the key and the cached
19
+ key, it should only be used when the key is a plain string. Semantic matching SHOULD NOT be used when the key
20
+ is a hash / encryption of the input data.
21
+
22
+ Additionally, the semantic matching is currently has the following tech debts:
23
+ 1. The distance to be evaluated againts the threshold is calculated using the Levenshtein distance.
24
+ This will be updated to use semantic score once the vector data store supports retrieval with scores.
25
+ 2. The vector data store currently only supports `ElasticsearchVectorDataStore`.
26
+ This should be updated once the vector data store supports a general interface to delete and clear all
27
+ chunks from the vector data store.
28
+ """
29
+ vector_data_store: Incomplete
30
+ max_distance_ratio: Incomplete
31
+ def __init__(self, vector_data_store: BaseVectorDataStore, max_distance_ratio: float = 0.05) -> None:
32
+ """Initialize a new instance of the `SemanticKeyMatcher` class.
33
+
34
+ Args:
35
+ vector_data_store (BaseVectorDataStore): The vector data store to be used for semantic matching.
36
+ max_distance_ratio (float, optional): The ratio of key length to use as maximum Levenshtein distance
37
+ for a key to match with the cached keys (e.g., 0.05 means 5% of key length). Must be between 0 and 1.
38
+ Defaults to 0.05.
39
+
40
+ Raises:
41
+ ValueError: If the fuzzy distance ratio is not between 0 and 1.
42
+ ValueError: If the vector data store is not an instance of `ElasticsearchVectorDataStore`.
43
+ """
44
+ async def store(self, key: str) -> None:
45
+ """Store the key as additional information during the matching process.
46
+
47
+ This method adds the key to the vector data store.
48
+
49
+ Args:
50
+ key (str): The key to be stored.
51
+ """
52
+ async def retrieve(self, key: str, cached_keys: set[str]) -> str | None:
53
+ """Retrieve the key with the semantic matching strategy.
54
+
55
+ This method performs semantic matching as follows:
56
+ 1. Retrieve the most similar key from the vector data store.
57
+ 2. Calculate the distance between the input key and the retrieved key.
58
+ 3. Calculate the maximum distance as a ratio of the input key length.
59
+ 4. If the distance is less than or equal to the maximum distance and the retrieved key exists in cached_keys,
60
+ return the retrieved key. Otherwise, return None.
61
+
62
+ Note:
63
+ As of now, the distance to be evaluated againts the threshold is calculated using the Levenshtein distance.
64
+ This will be updated to use semantic score once the vector data store supports retrieval with scores.
65
+
66
+ Args:
67
+ key (str): The input key to be matched.
68
+ cached_keys (set[str]): The set of cached keys to be matched.
69
+
70
+ Returns:
71
+ str | None: The key if it exists in cached_keys, otherwise None.
72
+ """
73
+ async def delete(self, key: str | list[str]) -> None:
74
+ """Delete the key stored as additional information during the matching process.
75
+
76
+ This method deletes the key from the vector data store.
77
+
78
+ Note:
79
+ As of now, this is only compatible with the `ElasticsearchVectorDataStore` implementation.
80
+ This should be updated once the vector data store supports deletion of chunks by query.
81
+
82
+ Args:
83
+ key (str | list[str]): The key(s) to be deleted.
84
+ """
85
+ async def clear(self) -> None:
86
+ """Clear all the keys that are stored as additional information during the matching process.
87
+
88
+ Note:
89
+ As of now, this is only compatible with the `ElasticsearchVectorDataStore` implementation.
90
+ This should be updated once the vector data store supports deletion of chunks by query.
91
+
92
+ This method deletes all keys from the vector data store.
93
+ """
@@ -0,0 +1,34 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_datastore.cache.hybrid_cache.hybrid_cache import BaseHybridCache as BaseHybridCache
3
+ from gllm_datastore.cache.hybrid_cache.key_matcher.key_matcher import BaseKeyMatcher as BaseKeyMatcher
4
+
5
+ class RedisHybridCache(BaseHybridCache):
6
+ """A hybrid cache that stores data in Redis.
7
+
8
+ The `RedisHybridCache` class utilizes Redis to store the cache data.
9
+
10
+ Attributes:
11
+ client (StrictRedis): The Redis client.
12
+ key_matcher (BaseKeyMatcher): The key matcher that defines the cache key matching strategy.
13
+ """
14
+ client: Incomplete
15
+ def __init__(self, host: str, port: int, password: str, db: int = 0, ssl: bool = False, key_matcher: BaseKeyMatcher | None = None) -> None:
16
+ """Initializes a new instance of the RedisHybridCache class.
17
+
18
+ Args:
19
+ host (str): The host of the Redis server.
20
+ port (int): The port of the Redis server.
21
+ password (str): The password for the Redis server.
22
+ db (int, optional): The database number. Defaults to 0.
23
+ ssl (bool, optional): Whether to use SSL. Defaults to False.
24
+ key_matcher (BaseKeyMatcher, optional): The key matcher to use. Defaults to None, in which case the
25
+ `ExactKeyMatcher` will be used.
26
+ """
27
+ async def retrieve_all_keys(self) -> set[str]:
28
+ """Retrieves all keys from the storage.
29
+
30
+ This method filters out and deletes any expired keys before returning the set.
31
+
32
+ Returns:
33
+ set[str]: A set of all keys in the storage.
34
+ """
@@ -0,0 +1,36 @@
1
+ from _typeshed import Incomplete
2
+ from typing import Any
3
+
4
+ logger: Incomplete
5
+
6
+ def generate_cache_key(input_: str, key_prefix: str = '') -> str:
7
+ '''Generate a cache key from the input string.
8
+
9
+ This function generates a cache key from the input string.
10
+ If the input is a valid file, the function will hash the file.
11
+ If the input is not a valid file, the function will hash the input string.
12
+
13
+ Args:
14
+ input_ (str): The input string to generate the cache key from.
15
+ key_prefix (str, optional): The prefix of the cache key. Defaults to "".
16
+
17
+ Returns:
18
+ str: The generated cache key.
19
+
20
+ Raises:
21
+ Exception: If the input file path exists but cannot be read.
22
+ '''
23
+ def generate_key_from_func(func_name: str, *args: Any, **kwargs: Any) -> str:
24
+ """Generate a cache key based on function name and arguments.
25
+
26
+ The key is created by hashing the function name, positional arguments, and keyword arguments.
27
+ If the function name, positional arguments, or keyword arguments are modified, the cache key will change.
28
+
29
+ Args:
30
+ func_name (str): Name of the function being cached
31
+ *args (Any): Positional arguments passed to the function
32
+ **kwargs (Any): Keyword arguments passed to the function
33
+
34
+ Returns:
35
+ str: SHA-256 hash digest to be used as cache key
36
+ """
@@ -0,0 +1,34 @@
1
+ from typing import Any
2
+
3
+ def generate_key_from_func(func_name: str, *args, **kwargs) -> str:
4
+ """Generate a cache key based on function name and arguments.
5
+
6
+ Args:
7
+ func_name (str): The name of the function.
8
+ *args: Positional arguments passed to the function.
9
+ **kwargs: Keyword arguments passed to the function.
10
+
11
+ Returns:
12
+ str: A string key for caching.
13
+ """
14
+ def generate_cache_id(key: str) -> str:
15
+ """Generate a cache entry ID from a key.
16
+
17
+ Args:
18
+ key (str): The cache key.
19
+
20
+ Returns:
21
+ str: A cache entry ID.
22
+ """
23
+ def serialize_pydantic(obj: Any) -> dict[str, Any]:
24
+ """Custom JSON serializer for Pydantic models and other objects.
25
+
26
+ Args:
27
+ obj (Any): The object to serialize.
28
+
29
+ Returns:
30
+ dict[str, Any]: The serialized object.
31
+
32
+ Raises:
33
+ TypeError: If the object cannot be serialized.
34
+ """
File without changes
@@ -0,0 +1,48 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_datastore.cache.vector_cache.eviction_manager.eviction_manager import BaseEvictionManager as BaseEvictionManager
3
+ from gllm_datastore.cache.vector_cache.eviction_strategy.eviction_strategy import BaseEvictionStrategy as BaseEvictionStrategy
4
+ from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore
5
+ from gllm_datastore.vector_data_store.vector_data_store import BaseVectorDataStore as BaseVectorDataStore
6
+
7
+ class AsyncIOEvictionManager(BaseEvictionManager):
8
+ """Eviction manager using asyncio for background tasks.
9
+
10
+ The `AsyncIOEvictionManager` is responsible for:
11
+ 1. Starting and stopping the background task that performs the eviction check and eviction process.
12
+ 2. Providing the eviction strategy to use for the eviction process.
13
+
14
+ This eviction manager should be used in the application that is using the cache, not in the database itself.
15
+ It is specifically designed to handle vector datastores that do not have its own eviction policies or specific
16
+ eviction strategy.
17
+
18
+ The `AsyncIOEvictionManager` could be used in the following scenarios:
19
+ 1. When the `VectorCache` is initialized, it starts the background task.
20
+ 2. When the `VectorCache` is shut down, it stops the background task.
21
+ """
22
+ vector_store: Incomplete
23
+ eviction_strategy: Incomplete
24
+ check_interval: Incomplete
25
+ task: Incomplete
26
+ running: bool
27
+ def __init__(self, vector_store: BaseVectorDataStore | BaseDataStore, eviction_strategy: BaseEvictionStrategy, check_interval: int = 60) -> None:
28
+ """Initialize the asyncio eviction manager.
29
+
30
+ Args:
31
+ vector_store (BaseVectorDataStore | BaseDataStore): The vector datastore to manage evictions for.
32
+ eviction_strategy (BaseEvictionStrategy): The eviction strategy to use.
33
+ check_interval (int): How often to check for entries to evict (in seconds).
34
+ """
35
+ def start(self) -> None:
36
+ """Start the background task for evicting entries.
37
+
38
+ This method starts the background task that periodically checks for entries to evict from the vector datastore
39
+ and evicts them if necessary using the specified eviction strategy.
40
+
41
+ If the task currently exists and is not done or cancelled, it will not start a new one.
42
+ """
43
+ def stop(self) -> None:
44
+ """Stop the background task for evicting entries.
45
+
46
+ This method stops the background task that periodically checks for entries to evict from the vector datastore
47
+ and evicts them if necessary using the specified eviction strategy.
48
+ """
@@ -0,0 +1,38 @@
1
+ from _typeshed import Incomplete
2
+ from abc import ABC, abstractmethod
3
+ from gllm_datastore.cache.vector_cache.eviction_strategy.eviction_strategy import BaseEvictionStrategy as BaseEvictionStrategy
4
+ from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore
5
+ from gllm_datastore.vector_data_store.vector_data_store import BaseVectorDataStore as BaseVectorDataStore
6
+
7
+ class BaseEvictionManager(ABC):
8
+ """Base class for eviction managers that handle the eviction process."""
9
+ vector_store: Incomplete
10
+ eviction_strategy: Incomplete
11
+ check_interval: Incomplete
12
+ def __init__(self, vector_store: BaseVectorDataStore | BaseDataStore, eviction_strategy: BaseEvictionStrategy, check_interval: int = 60) -> None:
13
+ """Initialize the eviction manager.
14
+
15
+ Args:
16
+ vector_store (BaseVectorDataStore | BaseDataStore): The datastore that will be managed by the
17
+ eviction manager.
18
+ eviction_strategy (BaseEvictionStrategy): The eviction strategy to use.
19
+ check_interval (int, optional): How often to check for entries to evict (seconds). Defaults to 60.
20
+ """
21
+ @abstractmethod
22
+ def start(self) -> None:
23
+ """Start the eviction checking process.
24
+
25
+ This method should be implemented by subclasses.
26
+
27
+ Raises:
28
+ NotImplementedError: If the method is not implemented by the subclass
29
+ """
30
+ @abstractmethod
31
+ def stop(self) -> None:
32
+ """Stop the eviction checking process.
33
+
34
+ This method should be implemented by subclasses.
35
+
36
+ Raises:
37
+ NotImplementedError: If the method is not implemented by the subclass
38
+ """
@@ -0,0 +1,34 @@
1
+ from abc import ABC, abstractmethod
2
+ from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore
3
+ from gllm_datastore.vector_data_store.mixin.cache_compatible_mixin import CacheCompatibleMixin as CacheCompatibleMixin
4
+ from typing import Any
5
+
6
+ class BaseEvictionStrategy(ABC):
7
+ """Base class for eviction strategies."""
8
+ @abstractmethod
9
+ async def prepare_metadata(self, **kwargs) -> dict[str, Any]:
10
+ """Prepare metadata for a new cache entry.
11
+
12
+ This method should be implemented by subclasses to define how metadata should be prepared
13
+ for a new cache entry.
14
+
15
+ Returns:
16
+ dict[str, Any]: A dictionary containing metadata for the new entry.
17
+ **kwargs: Additional keyword arguments to pass to the eviction strategy.
18
+
19
+ Raises:
20
+ NotImplementedError: If the method is not implemented by the subclass.
21
+ """
22
+ @abstractmethod
23
+ async def evict(self, vector_store: CacheCompatibleMixin | BaseDataStore) -> None:
24
+ """Evict entries based on the eviction policy.
25
+
26
+ This method should be implemented by subclasses to define how entries should be selected
27
+ for eviction.
28
+
29
+ Args:
30
+ vector_store (CacheCompatibleMixin | BaseDataStore): The cache store to use for eviction.
31
+
32
+ Raises:
33
+ NotImplementedError: If the method is not implemented by the subclass.
34
+ """
@@ -0,0 +1,34 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_datastore.cache.vector_cache.eviction_strategy.eviction_strategy import BaseEvictionStrategy as BaseEvictionStrategy
3
+ from gllm_datastore.constants import METADATA_KEYS as METADATA_KEYS
4
+ from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore
5
+ from gllm_datastore.utils import convert_ttl_to_seconds as convert_ttl_to_seconds
6
+ from gllm_datastore.vector_data_store.mixin.cache_compatible_mixin import CacheCompatibleMixin as CacheCompatibleMixin
7
+ from typing import Any
8
+
9
+ class TTLEvictionStrategy(BaseEvictionStrategy):
10
+ """Eviction strategy based on time-to-live."""
11
+ ttl: Incomplete
12
+ def __init__(self, ttl: int | str) -> None:
13
+ '''Initialize the TTL eviction strategy.
14
+
15
+ Args:
16
+ ttl (int | str): The time-to-live for the cache. This can be an integer (in seconds)
17
+ or a string (e.g., "1h", "30m").
18
+ '''
19
+ async def prepare_metadata(self, ttl: int | str | None = None) -> dict[str, Any]:
20
+ """Prepare metadata with expiration time if TTL is provided.
21
+
22
+ Args:
23
+ ttl (int | None): The time-to-live for the cache, in seconds. If passed -1, the cache will not expire.
24
+ Defaults to None, in which case the class-defined TTL will be used.
25
+
26
+ Returns:
27
+ dict[str, Any]: Metadata dictionary containing creation time and expiration time.
28
+ """
29
+ async def evict(self, vector_store: CacheCompatibleMixin | BaseDataStore) -> None:
30
+ """Evict entries based on the eviction policy.
31
+
32
+ Args:
33
+ vector_store (CacheCompatibleMixin): The cache store to use for eviction.
34
+ """
@@ -0,0 +1,99 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_datastore.cache.cache import BaseCache as BaseCache, MatchingStrategy as MatchingStrategy
3
+ from gllm_datastore.cache.utils import generate_key_from_func as generate_key_from_func, serialize_pydantic as serialize_pydantic
4
+ from gllm_datastore.cache.vector_cache.eviction_manager.eviction_manager import BaseEvictionManager as BaseEvictionManager
5
+ from gllm_datastore.vector_data_store.mixin.cache_compatible_mixin import CacheCompatibleMixin as CacheCompatibleMixin
6
+ from typing import Any, Callable
7
+
8
+ class VectorCache(BaseCache):
9
+ """Cache interface that uses a vector datastore for storage and retrieval."""
10
+ vector_store: Incomplete
11
+ eviction_manager: Incomplete
12
+ eviction_strategy: Incomplete
13
+ matching_strategy: Incomplete
14
+ matching_config: Incomplete
15
+ saving_config: Incomplete
16
+ def __init__(self, vector_store: CacheCompatibleMixin, eviction_manager: BaseEvictionManager | None = None, matching_strategy: MatchingStrategy = ..., matching_config: dict[str, Any] | None = None, saving_config: dict[str, Any] | None = None) -> None:
17
+ """Initialize the vector cache.
18
+
19
+ Args:
20
+ vector_store (CacheCompatibleMixin): The vector datastore to use for storage.
21
+ Must inherit both CacheCompatibleMixin and BaseVectorDataStore.
22
+ eviction_manager (BaseEvictionManager | None, optional): The eviction manager to use for cache eviction.
23
+ Defaults to None. If None, no eviction will be performed.
24
+ matching_strategy (MatchingStrategy, optional): The strategy to use for matching keys.
25
+ Defaults to MatchingStrategy.EXACT.
26
+ matching_config (dict[str, Any] | None, optional): Configuration parameters for matching strategies.
27
+ Defaults to None, which means no specific configuration is provided.
28
+ saving_config (dict[str, Any] | None, optional): Configuration parameters for saving strategies.
29
+ Defaults to None, which means no specific configuration is provided.
30
+ """
31
+ def cache(self, key_func: Callable | None = None, name: str = '', matching_strategy: MatchingStrategy | None = None, matching_config: dict[str, Any] | None = None, saving_config: dict[str, Any] | None = None) -> Callable:
32
+ '''Decorator for caching function results.
33
+
34
+ This decorator caches the results of the decorated function using this cache storage.
35
+ The cache key is generated using the provided key function or a default key generation
36
+ based on the function name and arguments.
37
+
38
+ Synchronous and asynchronous functions are supported.
39
+
40
+ Args:
41
+ key_func (Callable | None, optional): A function to generate the cache key.
42
+ If None, a default key generation will be used.
43
+ name (str, optional): The name of the cache. This can be used to identify the cache in logs or metrics.
44
+ Defaults to an empty string.
45
+ matching_strategy (MatchingStrategy | None, optional): The strategy to use for matching keys.
46
+ This can be one of the values from the MatchingStrategy enum. Defaults to None. If None,
47
+ the class-level matching strategy will be used.
48
+ matching_config (dict[str, Any] | None, optional): Configuration parameters for matching strategies.
49
+ Defaults to None. If None, the class-level matching config will be used.
50
+ saving_config (dict[str, Any] | None, optional): Configuration parameters for saving strategies.
51
+ Defaults to None. If None, the class-level saving config will be used.
52
+
53
+ Example:
54
+ ```python
55
+ def get_user_cache_key(user_id: int) -> str:
56
+ return f"user:{user_id}"
57
+
58
+ @cache_store.cache(key_func=get_user_cache_key, ttl="1h")
59
+ async def get_user(user_id: int) -> User:
60
+ return await db.get_user(user_id)
61
+
62
+ # will use/store cache with key "user:1", expiring after 1 hour
63
+ user1 = await get_user(1)
64
+ ```
65
+
66
+ Returns:
67
+ Callable: A decorator function.
68
+ '''
69
+ async def retrieve(self, key: str, matching_strategy: MatchingStrategy, matching_config: dict[str, Any] | None = None) -> Any | None:
70
+ """Retrieve the cached result based on the key and matching strategy.
71
+
72
+ Args:
73
+ key (str): The cache key to retrieve.
74
+ matching_strategy (MatchingStrategy): The strategy to use for matching keys.
75
+ matching_config (dict[str, Any]): Configuration parameters for the matching strategy.
76
+
77
+ Returns:
78
+ The cached result if found, otherwise None.
79
+ """
80
+ async def store(self, key: str, value: Any, metadata: dict[str, Any] | None = None, **kwargs) -> None:
81
+ """Store the cached result based on the key and matching strategy.
82
+
83
+ Args:
84
+ key (str): The cache key to store.
85
+ value (Any): The value to store in the cache.
86
+ metadata (dict[str, Any] | None, optional): Metadata to store with the cache.
87
+ Defaults to None.
88
+ **kwargs: Additional keyword arguments to pass to the eviction strategy.
89
+ """
90
+ async def delete(self, key: str | list[str], metadata: dict[str, Any] | None = None) -> None:
91
+ '''Delete the cached result based on the key and matching strategy.
92
+
93
+ Args:
94
+ key (str | list[str]): The cache key to delete.
95
+ metadata (dict[str, Any] | None, optional): Optional metadata filter to apply to the search.
96
+ For example, `{"key": "value"}`. Defaults to None.
97
+ '''
98
+ async def clear(self) -> None:
99
+ """Clear all cached results based on the matching strategy."""
@@ -0,0 +1,66 @@
1
+ from enum import Enum
2
+
3
+ DEFAULT_TOP_K: int
4
+ DEFAULT_FETCH_K: int
5
+ DEFAULT_REQUEST_TIMEOUT: int
6
+ SIMILARITY_SCORE: str
7
+ DEFAULT_FUZZY_MATCH_MAX_DISTANCE: int
8
+ DEFAULT_REDIS_QUERY_BATCH_SIZE: int
9
+ REDIS_DEFAULT_HOST: str
10
+ REDIS_DEFAULT_PORT: int
11
+ REDIS_DEFAULT_DB: int
12
+ FIELD_CONFIG_NAME: str
13
+ FIELD_CONFIG_TYPE: str
14
+ BOOL_TRUE_STR: str
15
+ BOOL_FALSE_STR: str
16
+ METADATA_PREFIX: str
17
+ METADATA_SEPARATOR: str
18
+ LIST_SEPARATOR: str
19
+
20
+ class FieldType(str, Enum):
21
+ """Redis Search field types for filterable fields.
22
+
23
+ Attributes:
24
+ NUMERIC: Numeric field type for range queries.
25
+ TAG: Tag field type for exact matching and filtering.
26
+ TEXT: Text field type for full-text search.
27
+ """
28
+ NUMERIC: str
29
+ TAG: str
30
+ TEXT: str
31
+
32
+ class CHUNK_KEYS:
33
+ """Dictionary-like keys used internally for in-memory chunk representation."""
34
+ ID: str
35
+ TEXT: str
36
+ CONTENT: str
37
+ METADATA: str
38
+ VECTOR: str
39
+ SCORE: str
40
+
41
+ class METADATA_KEYS:
42
+ """Metadata keys used in the cache compatible vector data store.
43
+
44
+ Attributes:
45
+ EMBEDDINGS (str): Key for the embeddings in the cache.
46
+ DOCUMENTS (str): Key for the documents in the cache.
47
+ METADATA (str): Key for the metadata in the cache.
48
+ ORIGINAL_KEY (str): Key to store the original key value.
49
+ CACHE_VALUE (str): Key for the cached value.
50
+ CACHE_CREATED (str): Key for the timestamp when the cache was created.
51
+ TTL (str): Key for the time-to-live of the cache.
52
+ EXPIRE_AT (str): Key for the expiration time of the cache.
53
+ LAST_USED_AT (str): Key for the last used time of the cache.
54
+ ACCESS_COUNT (str): Key for the access count of the cache.
55
+ """
56
+ EMBEDDINGS: str
57
+ DOCUMENTS: str
58
+ METADATAS: str
59
+ METADATA: str
60
+ ORIGINAL_KEY: str
61
+ CACHE_VALUE: str
62
+ CACHE_CREATED: str
63
+ TTL: str
64
+ EXPIRE_AT: str
65
+ LAST_USED_AT: str
66
+ ACCESS_COUNT: str
@@ -0,0 +1,7 @@
1
+ from gllm_datastore.core.capabilities.fulltext_capability import FulltextCapability as FulltextCapability
2
+ from gllm_datastore.core.capabilities.graph_capability import GraphCapability as GraphCapability
3
+ from gllm_datastore.core.capabilities.vector_capability import VectorCapability as VectorCapability
4
+ from gllm_datastore.core.filters import FilterClause as FilterClause, FilterCondition as FilterCondition, FilterOperator as FilterOperator, QueryFilter as QueryFilter, QueryOptions as QueryOptions
5
+ from gllm_datastore.core.filters.filter import all_ as all_, and_ as and_, any_ as any_, array_contains as array_contains, eq as eq, gt as gt, gte as gte, in_ as in_, lt as lt, lte as lte, ne as ne, nin as nin, not_ as not_, or_ as or_, text_contains as text_contains
6
+
7
+ __all__ = ['FilterCondition', 'FilterOperator', 'FilterClause', 'QueryFilter', 'QueryOptions', 'FulltextCapability', 'GraphCapability', 'VectorCapability', 'all_', 'and_', 'any_', 'array_contains', 'eq', 'gt', 'gte', 'in_', 'lt', 'lte', 'ne', 'nin', 'not_', 'or_', 'text_contains']
@@ -0,0 +1,7 @@
1
+ from gllm_datastore.core.capabilities.encryption_capability import EncryptionCapability as EncryptionCapability
2
+ from gllm_datastore.core.capabilities.fulltext_capability import FulltextCapability as FulltextCapability
3
+ from gllm_datastore.core.capabilities.graph_capability import GraphCapability as GraphCapability
4
+ from gllm_datastore.core.capabilities.hybrid_capability import HybridCapability as HybridCapability, HybridSearchType as HybridSearchType, SearchConfig as SearchConfig
5
+ from gllm_datastore.core.capabilities.vector_capability import VectorCapability as VectorCapability
6
+
7
+ __all__ = ['EncryptionCapability', 'FulltextCapability', 'GraphCapability', 'HybridCapability', 'SearchConfig', 'HybridSearchType', 'VectorCapability']
@@ -0,0 +1,21 @@
1
+ from typing import Protocol
2
+
3
+ class EncryptionCapability(Protocol):
4
+ """Protocol defining the encryption capability interface.
5
+
6
+ This protocol defines the contract that all encryption implementations must satisfy.
7
+ The EncryptionCapabilityMixin class provides a concrete implementation, but custom
8
+ implementations can also satisfy this protocol without inheriting from the mixin.
9
+
10
+ Note:
11
+ Encryption is an internal-only capability. Unlike fulltext and vector capabilities
12
+ which users access via properties, encryption works transparently in the background.
13
+ Users cannot access store.encryption - it's not exposed as a public property.
14
+ """
15
+ @property
16
+ def encryption_config(self) -> set[str] | None:
17
+ """Get the current encryption configuration.
18
+
19
+ Returns:
20
+ set[str] | None: Set of encrypted field names if encryption is enabled, None otherwise.
21
+ """
@@ -0,0 +1,73 @@
1
+ from gllm_core.schema.chunk import Chunk
2
+ from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
3
+ from typing import Any, Protocol
4
+
5
+ class FulltextCapability(Protocol):
6
+ """Protocol for full-text search and document operations.
7
+
8
+ This protocol defines the interface for datastores that support CRUD operations
9
+ and flexible querying mechanisms for document data.
10
+ """
11
+ async def create(self, data: Chunk | list[Chunk], **kwargs) -> None:
12
+ """Create new records in the datastore.
13
+
14
+ Args:
15
+ data (Chunk | list[Chunk]): Data to create (single item or collection).
16
+ **kwargs: Datastore-specific parameters.
17
+ """
18
+ async def retrieve(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs) -> list[Chunk]:
19
+ """Read records from the datastore with optional filtering.
20
+
21
+ Args:
22
+ filters (FilterClause | QueryFilter | None, optional): Query filters to apply.
23
+ FilterClause objects are automatically converted to QueryFilter internally.
24
+ Defaults to None.
25
+ options (QueryOptions | None, optional): Query options like limit and sorting.
26
+ Defaults to None.
27
+ **kwargs: Datastore-specific parameters.
28
+
29
+ Returns:
30
+ list[Chunk]: Query results.
31
+ """
32
+ async def retrieve_fuzzy(self, query: str, max_distance: int = 2, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs) -> list[Chunk]:
33
+ """Find records that fuzzy match the query within distance threshold.
34
+
35
+ Args:
36
+ query (str): Text to fuzzy match against.
37
+ max_distance (int): Maximum edit distance for matches (Levenshtein distance). Defaults to 2.
38
+ filters (FilterClause | QueryFilter | None, optional): Optional metadata filters to apply.
39
+ FilterClause objects are automatically converted to QueryFilter internally.
40
+ Defaults to None.
41
+ options (QueryOptions | None, optional): Query options (limit, sorting, etc.). Defaults to None.
42
+ **kwargs: Datastore-specific parameters.
43
+
44
+ Returns:
45
+ list[Chunk]: Matched chunks ordered by relevance/distance.
46
+ """
47
+ async def update(self, update_values: dict[str, Any], filters: FilterClause | QueryFilter | None = None, **kwargs) -> None:
48
+ """Update existing records in the datastore.
49
+
50
+ Args:
51
+ update_values (dict[str, Any]): Values to update.
52
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to update.
53
+ FilterClause objects are automatically converted to QueryFilter internally.
54
+ Defaults to None.
55
+ **kwargs: Datastore-specific parameters.
56
+ """
57
+ async def delete(self, filters: FilterClause | QueryFilter | None = None, options: QueryOptions | None = None, **kwargs) -> None:
58
+ """Delete records from the datastore.
59
+
60
+ Args:
61
+ filters (FilterClause | QueryFilter | None, optional): Filters to select records to delete.
62
+ FilterClause objects are automatically converted to QueryFilter internally.
63
+ Defaults to None, in which case no operation is performed (no-op).
64
+ options (QueryOptions | None, optional): Query options for sorting and limiting deletions.
65
+ Defaults to None.
66
+ **kwargs: Datastore-specific parameters.
67
+ """
68
+ async def clear(self, **kwargs) -> None:
69
+ """Clear all records from the datastore.
70
+
71
+ Args:
72
+ **kwargs: Datastore-specific parameters.
73
+ """