gllm-datastore-binary 0.5.45__cp311-cp311-macosx_13_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gllm-datastore-binary might be problematic. Click here for more details.
- gllm_datastore/__init__.pyi +0 -0
- gllm_datastore/cache/__init__.pyi +4 -0
- gllm_datastore/cache/base.pyi +84 -0
- gllm_datastore/cache/cache.pyi +137 -0
- gllm_datastore/cache/hybrid_cache/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/file_system_hybrid_cache.pyi +50 -0
- gllm_datastore/cache/hybrid_cache/hybrid_cache.pyi +115 -0
- gllm_datastore/cache/hybrid_cache/in_memory_hybrid_cache.pyi +29 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/__init__.pyi +5 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/exact_key_matcher.pyi +44 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/fuzzy_key_matcher.pyi +70 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/key_matcher.pyi +60 -0
- gllm_datastore/cache/hybrid_cache/key_matcher/semantic_key_matcher.pyi +93 -0
- gllm_datastore/cache/hybrid_cache/redis_hybrid_cache.pyi +34 -0
- gllm_datastore/cache/hybrid_cache/utils.pyi +36 -0
- gllm_datastore/cache/utils.pyi +34 -0
- gllm_datastore/cache/vector_cache/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_manager/asyncio_eviction_manager.pyi +48 -0
- gllm_datastore/cache/vector_cache/eviction_manager/eviction_manager.pyi +38 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/__init__.pyi +0 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/eviction_strategy/ttl_eviction_strategy.pyi +34 -0
- gllm_datastore/cache/vector_cache/vector_cache.pyi +99 -0
- gllm_datastore/constants.pyi +66 -0
- gllm_datastore/core/__init__.pyi +7 -0
- gllm_datastore/core/capabilities/__init__.pyi +5 -0
- gllm_datastore/core/capabilities/fulltext_capability.pyi +73 -0
- gllm_datastore/core/capabilities/graph_capability.pyi +70 -0
- gllm_datastore/core/capabilities/vector_capability.pyi +90 -0
- gllm_datastore/core/filters/__init__.pyi +4 -0
- gllm_datastore/core/filters/filter.pyi +340 -0
- gllm_datastore/core/filters/schema.pyi +149 -0
- gllm_datastore/data_store/__init__.pyi +7 -0
- gllm_datastore/data_store/base.pyi +138 -0
- gllm_datastore/data_store/chroma/__init__.pyi +4 -0
- gllm_datastore/data_store/chroma/_chroma_import.pyi +13 -0
- gllm_datastore/data_store/chroma/data_store.pyi +202 -0
- gllm_datastore/data_store/chroma/fulltext.pyi +134 -0
- gllm_datastore/data_store/chroma/query.pyi +266 -0
- gllm_datastore/data_store/chroma/query_translator.pyi +41 -0
- gllm_datastore/data_store/chroma/vector.pyi +197 -0
- gllm_datastore/data_store/elasticsearch/__init__.pyi +5 -0
- gllm_datastore/data_store/elasticsearch/data_store.pyi +119 -0
- gllm_datastore/data_store/elasticsearch/fulltext.pyi +237 -0
- gllm_datastore/data_store/elasticsearch/query.pyi +114 -0
- gllm_datastore/data_store/elasticsearch/vector.pyi +179 -0
- gllm_datastore/data_store/exceptions.pyi +35 -0
- gllm_datastore/data_store/in_memory/__init__.pyi +5 -0
- gllm_datastore/data_store/in_memory/data_store.pyi +71 -0
- gllm_datastore/data_store/in_memory/fulltext.pyi +131 -0
- gllm_datastore/data_store/in_memory/query.pyi +175 -0
- gllm_datastore/data_store/in_memory/vector.pyi +174 -0
- gllm_datastore/data_store/redis/__init__.pyi +5 -0
- gllm_datastore/data_store/redis/data_store.pyi +154 -0
- gllm_datastore/data_store/redis/fulltext.pyi +128 -0
- gllm_datastore/data_store/redis/query.pyi +428 -0
- gllm_datastore/data_store/redis/query_translator.pyi +37 -0
- gllm_datastore/data_store/redis/vector.pyi +131 -0
- gllm_datastore/encryptor/__init__.pyi +4 -0
- gllm_datastore/encryptor/aes_gcm_encryptor.pyi +45 -0
- gllm_datastore/encryptor/encryptor.pyi +52 -0
- gllm_datastore/encryptor/key_ring/__init__.pyi +3 -0
- gllm_datastore/encryptor/key_ring/in_memory_key_ring.pyi +52 -0
- gllm_datastore/encryptor/key_ring/key_ring.pyi +45 -0
- gllm_datastore/encryptor/key_rotating_encryptor.pyi +60 -0
- gllm_datastore/graph_data_store/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/graph_data_store.pyi +151 -0
- gllm_datastore/graph_data_store/graph_rag_data_store.pyi +29 -0
- gllm_datastore/graph_data_store/light_rag_data_store.pyi +93 -0
- gllm_datastore/graph_data_store/light_rag_postgres_data_store.pyi +96 -0
- gllm_datastore/graph_data_store/llama_index_graph_rag_data_store.pyi +49 -0
- gllm_datastore/graph_data_store/llama_index_neo4j_graph_rag_data_store.pyi +78 -0
- gllm_datastore/graph_data_store/nebula_graph_data_store.pyi +206 -0
- gllm_datastore/graph_data_store/neo4j_graph_data_store.pyi +182 -0
- gllm_datastore/graph_data_store/utils/__init__.pyi +6 -0
- gllm_datastore/graph_data_store/utils/constants.pyi +21 -0
- gllm_datastore/graph_data_store/utils/light_rag_em_invoker_adapter.pyi +56 -0
- gllm_datastore/graph_data_store/utils/light_rag_lm_invoker_adapter.pyi +43 -0
- gllm_datastore/graph_data_store/utils/llama_index_em_invoker_adapter.pyi +45 -0
- gllm_datastore/graph_data_store/utils/llama_index_lm_invoker_adapter.pyi +169 -0
- gllm_datastore/sql_data_store/__init__.pyi +4 -0
- gllm_datastore/sql_data_store/adapter/__init__.pyi +0 -0
- gllm_datastore/sql_data_store/adapter/sqlalchemy_adapter.pyi +38 -0
- gllm_datastore/sql_data_store/constants.pyi +6 -0
- gllm_datastore/sql_data_store/sql_data_store.pyi +86 -0
- gllm_datastore/sql_data_store/sqlalchemy_sql_data_store.pyi +216 -0
- gllm_datastore/sql_data_store/types.pyi +31 -0
- gllm_datastore/utils/__init__.pyi +6 -0
- gllm_datastore/utils/converter.pyi +51 -0
- gllm_datastore/utils/dict.pyi +21 -0
- gllm_datastore/utils/ttl.pyi +25 -0
- gllm_datastore/utils/types.pyi +32 -0
- gllm_datastore/vector_data_store/__init__.pyi +6 -0
- gllm_datastore/vector_data_store/chroma_vector_data_store.pyi +259 -0
- gllm_datastore/vector_data_store/elasticsearch_vector_data_store.pyi +357 -0
- gllm_datastore/vector_data_store/in_memory_vector_data_store.pyi +179 -0
- gllm_datastore/vector_data_store/mixin/__init__.pyi +0 -0
- gllm_datastore/vector_data_store/mixin/cache_compatible_mixin.pyi +145 -0
- gllm_datastore/vector_data_store/redis_vector_data_store.pyi +191 -0
- gllm_datastore/vector_data_store/vector_data_store.pyi +146 -0
- gllm_datastore.build/.gitignore +1 -0
- gllm_datastore.cpython-311-darwin.so +0 -0
- gllm_datastore.pyi +156 -0
- gllm_datastore_binary-0.5.45.dist-info/METADATA +178 -0
- gllm_datastore_binary-0.5.45.dist-info/RECORD +108 -0
- gllm_datastore_binary-0.5.45.dist-info/WHEEL +5 -0
- gllm_datastore_binary-0.5.45.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from enum import StrEnum
|
|
3
|
+
from typing import Any, Callable
|
|
4
|
+
|
|
5
|
+
class MatchingStrategy(StrEnum):
|
|
6
|
+
"""Defines how keys should be matched during retrieval."""
|
|
7
|
+
EXACT: str
|
|
8
|
+
FUZZY: str
|
|
9
|
+
SEMANTIC: str
|
|
10
|
+
|
|
11
|
+
class BaseCache(ABC):
|
|
12
|
+
"""Base class for cache using data store."""
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def cache(self, key_func: Callable | None = None, name: str = '', matching_strategy: MatchingStrategy = ..., matching_config: dict[str, Any] | None = None, **kwargs) -> Callable:
|
|
15
|
+
"""Decorator to cache the result of a function.
|
|
16
|
+
|
|
17
|
+
This method should be implemented by subclasses to provide the caching functionality.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
key_func (Callable | None, optional): Function to generate the cache key. Defaults to None.
|
|
21
|
+
name (str, optional): Name of the cache. Defaults to an empty string.
|
|
22
|
+
matching_strategy (MatchingStrategy, optional): The strategy to use for matching keys.
|
|
23
|
+
This can be one of the values from the MatchingStrategy enum. Defaults to exact matching.
|
|
24
|
+
matching_config (dict[str, Any] | None, optional): Configuration parameters for matching strategies.
|
|
25
|
+
Defaults to None.
|
|
26
|
+
**kwargs: Additional parameters specific to the caching method.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Callable: A decorator that can be applied to a function to cache its result.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
NotImplementedError: If the method is not implemented.
|
|
33
|
+
"""
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def retrieve(self, key: str, **kwargs) -> Any | None:
|
|
36
|
+
"""Retrieve the cached result.
|
|
37
|
+
|
|
38
|
+
This method should be implemented by subclasses to provide the retrieval functionality.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
key (str): The cache key to retrieve.
|
|
42
|
+
**kwargs: Additional parameters specific to the retrieval method.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Any | None: The cached result if found, otherwise None.
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
NotImplementedError: If the method is not implemented.
|
|
49
|
+
"""
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def store(self, key: str, value: Any, **kwargs) -> None:
|
|
52
|
+
"""Store the cached result.
|
|
53
|
+
|
|
54
|
+
This method should be implemented by subclasses to provide the storage functionality.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
key (str): The cache key to store.
|
|
58
|
+
value (Any): The value to store in the cache.
|
|
59
|
+
**kwargs: Additional parameters specific to the storage method.
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
NotImplementedError: If the method is not implemented.
|
|
63
|
+
"""
|
|
64
|
+
@abstractmethod
|
|
65
|
+
def delete(self, key: str | list[str]) -> None:
|
|
66
|
+
"""Delete the cached result.
|
|
67
|
+
|
|
68
|
+
This method should be implemented by subclasses to provide the deletion functionality.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
key (str | list[str]): The cache key to delete.
|
|
72
|
+
|
|
73
|
+
Raises:
|
|
74
|
+
NotImplementedError: If the method is not implemented.
|
|
75
|
+
"""
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def clear(self) -> None:
|
|
78
|
+
"""Clear all cached results.
|
|
79
|
+
|
|
80
|
+
This method should be implemented by subclasses to provide the clearing functionality.
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
NotImplementedError: If the method is not implemented.
|
|
84
|
+
"""
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.cache.base import BaseCache as BaseCache, MatchingStrategy as MatchingStrategy
|
|
3
|
+
from gllm_datastore.cache.utils import generate_cache_id as generate_cache_id, generate_key_from_func as generate_key_from_func, serialize_pydantic as serialize_pydantic
|
|
4
|
+
from gllm_datastore.cache.vector_cache.eviction_manager.eviction_manager import BaseEvictionManager as BaseEvictionManager
|
|
5
|
+
from gllm_datastore.constants import METADATA_KEYS as METADATA_KEYS
|
|
6
|
+
from gllm_datastore.core.filters import FilterClause as FilterClause, QueryFilter as QueryFilter, QueryOptions as QueryOptions
|
|
7
|
+
from gllm_datastore.data_store.base import BaseDataStore as BaseDataStore, CapabilityType as CapabilityType
|
|
8
|
+
from typing import Any, Callable, Literal, overload
|
|
9
|
+
|
|
10
|
+
class Cache(BaseCache):
|
|
11
|
+
"""Cache interface that uses a data store for storage and retrieval.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
data_store (BaseDataStore): The data store to use for storage.
|
|
15
|
+
eviction_manager (BaseEvictionManager | None): The eviction manager to use for cache eviction.
|
|
16
|
+
matching_strategy (MatchingStrategy): The strategy to use for matching keys.
|
|
17
|
+
eviction_config (dict[str, Any] | None): Configuration parameters for eviction strategies.
|
|
18
|
+
max_locks (int): Maximum number of locks to keep in memory for race condition mitigation.
|
|
19
|
+
"""
|
|
20
|
+
data_store: Incomplete
|
|
21
|
+
eviction_manager: Incomplete
|
|
22
|
+
eviction_strategy: Incomplete
|
|
23
|
+
matching_strategy: Incomplete
|
|
24
|
+
eviction_config: Incomplete
|
|
25
|
+
def __init__(self, data_store: BaseDataStore, eviction_manager: BaseEvictionManager | None = None, matching_strategy: MatchingStrategy = ..., eviction_config: dict[str, Any] | None = None, max_locks: int = 100) -> None:
|
|
26
|
+
"""Initialize the data store cache.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
data_store (BaseDataStore): The data store to use for storage.
|
|
30
|
+
Must have fulltext capability registered.
|
|
31
|
+
Vector capability required only for semantic matching.
|
|
32
|
+
eviction_manager (BaseEvictionManager | None, optional): The eviction manager to use for cache eviction.
|
|
33
|
+
Defaults to None. If None, no eviction will be performed.
|
|
34
|
+
matching_strategy (MatchingStrategy, optional): The strategy to use for matching keys.
|
|
35
|
+
Defaults to MatchingStrategy.EXACT.
|
|
36
|
+
eviction_config (dict[str, Any] | None, optional): Configuration parameters for eviction strategies.
|
|
37
|
+
Defaults to None, which means no specific configuration is provided.
|
|
38
|
+
max_locks (int, optional): Maximum number of locks to keep in memory. When exceeded,
|
|
39
|
+
least recently used locks are automatically evicted. Defaults to 100.
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
ValueError: If data_store doesn't have fulltext capability.
|
|
43
|
+
ValueError: If semantic matching requested without vector capability.
|
|
44
|
+
"""
|
|
45
|
+
def cache(self, key_func: Callable | None = None, name: str = '', matching_strategy: MatchingStrategy | None = None, eviction_config: dict[str, Any] | None = None) -> Callable:
|
|
46
|
+
'''Decorator for caching function results.
|
|
47
|
+
|
|
48
|
+
This decorator caches the results of the decorated function using this cache storage.
|
|
49
|
+
The cache key is generated using the provided key function or a default key generation
|
|
50
|
+
based on the function name and arguments.
|
|
51
|
+
|
|
52
|
+
Synchronous and asynchronous functions are supported.
|
|
53
|
+
|
|
54
|
+
Example:
|
|
55
|
+
1. Basic usage:
|
|
56
|
+
```python
|
|
57
|
+
def get_user_cache_key(user_id: int) -> str:
|
|
58
|
+
return f"user:{user_id}"
|
|
59
|
+
|
|
60
|
+
@cache_store.cache(key_func=get_user_cache_key)
|
|
61
|
+
async def get_user(user_id: int) -> User:
|
|
62
|
+
return await db.get_user(user_id)
|
|
63
|
+
|
|
64
|
+
# will use/store cache with key "user:1"
|
|
65
|
+
user1 = await get_user(1)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
2. Using eviction config:
|
|
69
|
+
```python
|
|
70
|
+
@cache_store.cache(eviction_config={"ttl": "1h"})
|
|
71
|
+
async def get_user(user_id: int) -> User:
|
|
72
|
+
return await db.get_user(user_id)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
key_func (Callable | None, optional): A function to generate the cache key.
|
|
77
|
+
Defaults to None, in which case the function name and arguments will be used to generate the cache key.
|
|
78
|
+
name (str, optional): The name of the cache. This can be used to identify the cache in logs or metrics.
|
|
79
|
+
Defaults to an empty string.
|
|
80
|
+
matching_strategy (MatchingStrategy | None, optional): The strategy to use for matching keys.
|
|
81
|
+
Defaults to None, in which case the class-level matching strategy will be used.
|
|
82
|
+
eviction_config (dict[str, Any] | None, optional): Configuration parameters for eviction strategies.
|
|
83
|
+
Defaults to None, in which case the class-level eviction config will be used.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Callable: A decorator function.
|
|
87
|
+
'''
|
|
88
|
+
async def store(self, key: str, value: str, metadata: dict[str, Any] | None = None, **kwargs) -> None:
|
|
89
|
+
'''Store the cached result based on the key and matching strategy.
|
|
90
|
+
|
|
91
|
+
Example:
|
|
92
|
+
```python
|
|
93
|
+
await cache.store("my_key", "my_value", metadata={"category": "ML", "subcategory": "AI"}, ttl="1h")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
key (str): The cache key to store.
|
|
98
|
+
value (str): The value to store in the cache.
|
|
99
|
+
metadata (dict[str, Any] | None, optional): Metadata to store with the cache.
|
|
100
|
+
Defaults to None.
|
|
101
|
+
**kwargs: Additional keyword arguments to pass to the eviction strategy (e.g. ttl).
|
|
102
|
+
'''
|
|
103
|
+
@overload
|
|
104
|
+
async def retrieve(self, key: str, matching_strategy: Literal[MatchingStrategy.EXACT], filters: FilterClause | QueryFilter | None = None) -> Any | None: ...
|
|
105
|
+
@overload
|
|
106
|
+
async def retrieve(self, key: str, matching_strategy: Literal[MatchingStrategy.FUZZY], max_distance: int = 2, filters: FilterClause | QueryFilter | None = None) -> Any | None: ...
|
|
107
|
+
@overload
|
|
108
|
+
async def retrieve(self, key: str, matching_strategy: Literal[MatchingStrategy.SEMANTIC], min_similarity: float = 0.8, filters: FilterClause | QueryFilter | None = None) -> Any | None: ...
|
|
109
|
+
async def delete(self, key: str | list[str], filters: FilterClause | QueryFilter | None = None) -> None:
|
|
110
|
+
'''Delete the cached result based on the key and matching strategy.
|
|
111
|
+
|
|
112
|
+
Example:
|
|
113
|
+
```python
|
|
114
|
+
# Using QueryFilter for multiple conditions
|
|
115
|
+
await cache.delete(
|
|
116
|
+
"my_key",
|
|
117
|
+
filters=F.and_(F.eq("metadata.category", "ML"), F.eq("metadata.subcategory", "AI"))
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Using FilterClause directly
|
|
121
|
+
await cache.delete("my_key", filters=F.eq("metadata.category", "ML"))
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
key (str | list[str]): The cache key to delete.
|
|
126
|
+
filters (FilterClause | QueryFilter | None, optional): Optional filters to apply to the search.
|
|
127
|
+
FilterClause objects are automatically converted to QueryFilter internally.
|
|
128
|
+
Defaults to None.
|
|
129
|
+
'''
|
|
130
|
+
async def clear(self) -> None:
|
|
131
|
+
"""Clear all cached results based on the matching strategy.
|
|
132
|
+
|
|
133
|
+
Example:
|
|
134
|
+
```python
|
|
135
|
+
await cache.clear()
|
|
136
|
+
```
|
|
137
|
+
"""
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from gllm_datastore.cache.hybrid_cache.file_system_hybrid_cache import FileSystemHybridCache as FileSystemHybridCache
|
|
2
|
+
from gllm_datastore.cache.hybrid_cache.in_memory_hybrid_cache import InMemoryHybridCache as InMemoryHybridCache
|
|
3
|
+
from gllm_datastore.cache.hybrid_cache.redis_hybrid_cache import RedisHybridCache as RedisHybridCache
|
|
4
|
+
|
|
5
|
+
__all__ = ['FileSystemHybridCache', 'InMemoryHybridCache', 'RedisHybridCache']
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.cache.hybrid_cache.hybrid_cache import BaseHybridCache as BaseHybridCache
|
|
3
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.key_matcher import BaseKeyMatcher as BaseKeyMatcher
|
|
4
|
+
|
|
5
|
+
class FileSystemHybridCache(BaseHybridCache):
|
|
6
|
+
'''A cache that stores data in the file system.
|
|
7
|
+
|
|
8
|
+
The `FileSystemHybridCache` class utilizes the file system to store cache data.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
cache_dir (str): The directory to store the cache data.
|
|
12
|
+
cache_version (str): The version of the cache data.
|
|
13
|
+
current_version_dir (str): The directory to store the cache data for the current version.
|
|
14
|
+
metadata_dir (str): The directory to store the metadata for the cache data.
|
|
15
|
+
serialization_format (str): The serialization format to use for storing the cache data.
|
|
16
|
+
The supported serialization formats are "json" and "pickle".
|
|
17
|
+
compression_extension (str): The extension to use for the compression of the cache data. The supported
|
|
18
|
+
compression extensions are "json.gz" and "pkl.gz".
|
|
19
|
+
logger (Logger): The logger to use for logging.
|
|
20
|
+
key_matcher (BaseKeyMatcher): The key matcher to use that defines the cache key matching strategy.
|
|
21
|
+
'''
|
|
22
|
+
logger: Incomplete
|
|
23
|
+
cache_dir: Incomplete
|
|
24
|
+
cache_version: Incomplete
|
|
25
|
+
current_version_dir: Incomplete
|
|
26
|
+
metadata_dir: Incomplete
|
|
27
|
+
serialization_format: Incomplete
|
|
28
|
+
compression_extension: Incomplete
|
|
29
|
+
def __init__(self, cache_dir: str, cache_version: str = '1.0.0', serialization_format: str = 'json', key_matcher: BaseKeyMatcher | None = None) -> None:
|
|
30
|
+
'''Initializes a new instance of the FileSystemHybridCache class.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
cache_dir (str): The directory to store the cache data.
|
|
34
|
+
cache_version (str, optional): The version of the cache data. Defaults to "1.0.0".
|
|
35
|
+
serialization_format (str, optional): The serialization format to use for storing the cache data.
|
|
36
|
+
The supported serialization formats are "json" and "pickle". Defaults to "json".
|
|
37
|
+
key_matcher (BaseKeyMatcher | None, optional): The key matcher to use that defines the cache key
|
|
38
|
+
matching strategy. Defaults to None, in which case the `ExactKeyMatcher` will be used.
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
ValueError: If the serialization format is not supported.
|
|
42
|
+
'''
|
|
43
|
+
async def retrieve_all_keys(self) -> set[str]:
|
|
44
|
+
"""Retrieves all keys from the file system cache.
|
|
45
|
+
|
|
46
|
+
This method filters out and deletes any expired keys before returning the set.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
set[str]: A set of all keys in the file system cache.
|
|
50
|
+
"""
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from gllm_datastore.cache.cache import BaseCache as BaseCache
|
|
4
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher import ExactKeyMatcher as ExactKeyMatcher
|
|
5
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.key_matcher import BaseKeyMatcher as BaseKeyMatcher
|
|
6
|
+
from gllm_datastore.cache.hybrid_cache.utils import generate_key_from_func as generate_key_from_func
|
|
7
|
+
from gllm_datastore.utils import convert_ttl_to_seconds as convert_ttl_to_seconds
|
|
8
|
+
from typing import Any, Callable, ParamSpec, TypeVar
|
|
9
|
+
|
|
10
|
+
P = ParamSpec('P')
|
|
11
|
+
T = TypeVar('T')
|
|
12
|
+
|
|
13
|
+
class BaseHybridCache(BaseCache, ABC):
|
|
14
|
+
"""A base class for hybrid cache used in Gen AI applications.
|
|
15
|
+
|
|
16
|
+
The `BaseHybridCache` class provides a framework for storing and retrieving cache data.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
key_matcher (BaseKeyMatcher): The key matcher that defines the cache key matching strategy.
|
|
20
|
+
"""
|
|
21
|
+
key_matcher: Incomplete
|
|
22
|
+
def __init__(self, key_matcher: BaseKeyMatcher | None = None) -> None:
|
|
23
|
+
"""Initialize a new instance of the `BaseHybridCache` class.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
key_matcher (BaseKeyMatcher | None, optional): The key matcher that defines the cache key matching
|
|
27
|
+
strategy. Defaults to None, in which case the `ExactKeyMatcher` will be used.
|
|
28
|
+
"""
|
|
29
|
+
def cache(self, key_func: Callable[P, str] | None = None, name: str = '', ttl: int | str | None = None) -> Callable[[Callable[P, T]], Callable[P, T]]:
|
|
30
|
+
'''Decorator for caching function results.
|
|
31
|
+
|
|
32
|
+
This decorator caches the results of the decorated function using this cache storage.
|
|
33
|
+
The cache key is generated using the provided key function or a default key generation
|
|
34
|
+
based on the function name and arguments.
|
|
35
|
+
|
|
36
|
+
Synchronous and asynchronous functions are supported.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
key_func (Callable[P, str] | None, optional): Function to generate cache keys.
|
|
40
|
+
Must accept the same parameters as the decorated function.
|
|
41
|
+
name (str, optional): Name to use in the default key generation if key_func is None.
|
|
42
|
+
ttl (int | str | None, optional): The time-to-live for the cached data. Can be an integer
|
|
43
|
+
in seconds or a string (e.g. "1h", "1d", "1w", "1y"). If None, the cache data will not expire.
|
|
44
|
+
Defaults to None. In this case, the cache will not expire.
|
|
45
|
+
matching_strategy (MatchingStrategy, optional): The strategy to use for matching keys.
|
|
46
|
+
This can be one of the values from the MatchingStrategy enum. Defaults to exact matching.
|
|
47
|
+
matching_config (dict[str, Any], optional): Configuration parameters for matching strategies.
|
|
48
|
+
Defaults to None.
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
```python
|
|
52
|
+
def get_user_cache_key(user_id: int) -> str:
|
|
53
|
+
return f"user:{user_id}"
|
|
54
|
+
|
|
55
|
+
@cache_store.cache(key_func=get_user_cache_key, ttl="1h")
|
|
56
|
+
async def get_user(user_id: int) -> User:
|
|
57
|
+
return await db.get_user(user_id)
|
|
58
|
+
|
|
59
|
+
# will use/store cache with key "user:1", expiring after 1 hour
|
|
60
|
+
user1 = await get_user(1)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Callable: A decorator function.
|
|
65
|
+
'''
|
|
66
|
+
async def store(self, key: str, value: Any, ttl: int | str | None = None) -> None:
|
|
67
|
+
'''Stores cache data in the storage.
|
|
68
|
+
|
|
69
|
+
This method preprocesses the TTL (time-to-live) value to seconds if provided, and then calls both
|
|
70
|
+
the `key_matcher.store` and `_store` methods to store the cache data in the storage.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
key (str): The key to store the cache data.
|
|
74
|
+
value (Any): The cache data to store.
|
|
75
|
+
ttl (int | str | None): The time-to-live (TTL) for the cache data. Must either be an integer in seconds
|
|
76
|
+
or a string (e.g. "1h", "1d", "1w", "1y"). If None, the cache data will not expire.
|
|
77
|
+
'''
|
|
78
|
+
async def retrieve(self, key: str) -> Any:
|
|
79
|
+
"""Retrieves cache data from the storage.
|
|
80
|
+
|
|
81
|
+
This method first retrieves the key using the strategy defined in the `key_matcher`. If a matching key is
|
|
82
|
+
found, the method will retrieve the cache data from the storage using the `_retrieve` method. Otherwise,
|
|
83
|
+
the method will return None.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
key (str): The key to retrieve the cache data.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Any: The retrieved cache data.
|
|
90
|
+
"""
|
|
91
|
+
async def delete(self, key: str | list[str]) -> None:
|
|
92
|
+
"""Deletes cache data from the storage.
|
|
93
|
+
|
|
94
|
+
This method deletes the key by calling both the `key_matcher.delete` and `_delete` methods.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
key (str | list[str]): The key(s) to delete the cache data.
|
|
98
|
+
"""
|
|
99
|
+
async def clear(self) -> None:
|
|
100
|
+
"""Clears all cache data from the storage.
|
|
101
|
+
|
|
102
|
+
This method clears all cache data from the storage by calling both the `key_matcher.clear` and `_clear` methods.
|
|
103
|
+
"""
|
|
104
|
+
@abstractmethod
|
|
105
|
+
async def retrieve_all_keys(self) -> set[str]:
|
|
106
|
+
"""Retrieves all keys from the storage.
|
|
107
|
+
|
|
108
|
+
This method must be implemented by the subclasses to define the logic for retrieving all keys from the storage.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
set[str]: A set of all keys in the storage.
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
NotImplementedError: If the method is not implemented.
|
|
115
|
+
"""
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.cache.hybrid_cache.hybrid_cache import BaseHybridCache as BaseHybridCache
|
|
3
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.key_matcher import BaseKeyMatcher as BaseKeyMatcher
|
|
4
|
+
|
|
5
|
+
class InMemoryHybridCache(BaseHybridCache):
|
|
6
|
+
"""A hybrid cache that stores data in an in-memory dictionary.
|
|
7
|
+
|
|
8
|
+
The `InMemoryHybridCache` class utilizes an in-memory dictionary to store the cache data.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
in_memory_cache (dict[str, Any]): An in-memory dictionary to store the cache data.
|
|
12
|
+
key_matcher (BaseKeyMatcher): The key matcher to use that defines the cache key matching strategy.
|
|
13
|
+
"""
|
|
14
|
+
in_memory_cache: Incomplete
|
|
15
|
+
def __init__(self, key_matcher: BaseKeyMatcher | None = None) -> None:
|
|
16
|
+
"""Initializes a new instance of the InMemoryHybridCache class.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
key_matcher (BaseKeyMatcher | None, optional): The key matcher to use that defines the cache key
|
|
20
|
+
matching strategy. Defaults to None, in which case the `ExactKeyMatcher` will be used.
|
|
21
|
+
"""
|
|
22
|
+
async def retrieve_all_keys(self) -> set[str]:
|
|
23
|
+
"""Retrieves all keys from the storage.
|
|
24
|
+
|
|
25
|
+
This method filters out and deletes any expired keys before returning the set.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
set[str]: A set of all keys in the storage.
|
|
29
|
+
"""
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.exact_key_matcher import ExactKeyMatcher as ExactKeyMatcher
|
|
2
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.fuzzy_key_matcher import FuzzyKeyMatcher as FuzzyKeyMatcher
|
|
3
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.semantic_key_matcher import SemanticKeyMatcher as SemanticKeyMatcher
|
|
4
|
+
|
|
5
|
+
__all__ = ['ExactKeyMatcher', 'FuzzyKeyMatcher', 'SemanticKeyMatcher']
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.key_matcher import BaseKeyMatcher as BaseKeyMatcher
|
|
2
|
+
|
|
3
|
+
class ExactKeyMatcher(BaseKeyMatcher):
|
|
4
|
+
"""A key matcher that performs exact matching strategy.
|
|
5
|
+
|
|
6
|
+
This implementation simply checks if the input key exists in the set of cached keys
|
|
7
|
+
and returns it if found, otherwise returns None. The store_key method is a no-op.
|
|
8
|
+
"""
|
|
9
|
+
async def store(self, key: str) -> None:
|
|
10
|
+
"""Store the key as additional information during the matching process.
|
|
11
|
+
|
|
12
|
+
This method does nothing as exact matching doesn't require storing additional information.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
key (str): The key to be stored.
|
|
16
|
+
"""
|
|
17
|
+
async def retrieve(self, key: str, cached_keys: set[str]) -> str | None:
|
|
18
|
+
"""Retrieve the key with exact matching strategy.
|
|
19
|
+
|
|
20
|
+
This method performs exact matching as follows:
|
|
21
|
+
1. Check if the input key exists in the set of cached keys.
|
|
22
|
+
2. If it does, return the input key.
|
|
23
|
+
3. Otherwise, return None.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
key (str): The input key to be matched.
|
|
27
|
+
cached_keys (set[str]): The set of cached keys to be matched.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
str | None: The key if it exists in cached_keys, otherwise None.
|
|
31
|
+
"""
|
|
32
|
+
async def delete(self, key: str | list[str]) -> None:
|
|
33
|
+
"""Delete the key stored as additional information during the matching process.
|
|
34
|
+
|
|
35
|
+
This method does nothing as exact matching doesn't require deleting additional information.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
key (str | list[str]): The key(s) to be deleted.
|
|
39
|
+
"""
|
|
40
|
+
async def clear(self) -> None:
|
|
41
|
+
"""Clear all the keys that are stored as additional information during the matching process.
|
|
42
|
+
|
|
43
|
+
This method does nothing as exact matching doesn't require clearing additional information.
|
|
44
|
+
"""
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
from gllm_datastore.cache.hybrid_cache.key_matcher.key_matcher import BaseKeyMatcher as BaseKeyMatcher
|
|
3
|
+
|
|
4
|
+
class FuzzyKeyMatcher(BaseKeyMatcher):
|
|
5
|
+
"""A key matcher that performs fuzzy matching strategy.
|
|
6
|
+
|
|
7
|
+
This implementation uses fuzzy matching to find the closest match between the input key
|
|
8
|
+
and the cached keys. The distance is calculated using the Levenshtein distance.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
max_distance_ratio (float): The ratio of key length to use as maximum Levenshtein distance
|
|
12
|
+
for a key to match with the cached keys (e.g., 0.05 means 5% of key length).
|
|
13
|
+
|
|
14
|
+
Note:
|
|
15
|
+
Since the fuzzy matching heavily depends on the syntactic similarity between the key and the cached
|
|
16
|
+
key, it should only be used when the key is a plain string. Fuzzy matching SHOULD NOT be used when the key
|
|
17
|
+
is a hash / encryption of the input data.
|
|
18
|
+
"""
|
|
19
|
+
max_distance_ratio: Incomplete
|
|
20
|
+
def __init__(self, max_distance_ratio: float = 0.05) -> None:
|
|
21
|
+
"""Initialize a new instance of the `FuzzyKeyMatcher` class.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
max_distance_ratio (float, optional): The ratio of key length to use as maximum Levenshtein distance
|
|
25
|
+
for a key to match with the cached keys (e.g., 0.05 means 5% of key length). Must be between 0 and 1.
|
|
26
|
+
Defaults to 0.05.
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
ValueError: If the fuzzy distance ratio is not between 0 and 1.
|
|
30
|
+
"""
|
|
31
|
+
async def store(self, key: str) -> None:
|
|
32
|
+
"""Store the key as additional information during the matching process.
|
|
33
|
+
|
|
34
|
+
This method does nothing as fuzzy matching doesn't require storing additional information.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
key (str): The key to be stored.
|
|
38
|
+
"""
|
|
39
|
+
async def retrieve(self, key: str, cached_keys: set[str]) -> str | None:
|
|
40
|
+
"""Retrieve the key with fuzzy matching strategy.
|
|
41
|
+
|
|
42
|
+
This method performs fuzzy matching as follows:
|
|
43
|
+
1. Iterate through all cached keys and calculate the Levenshtein distance between the key and the cached key
|
|
44
|
+
to get the key with the smallest distance.
|
|
45
|
+
2. If a cached key with the distance of 0 is found, the cached value of that key is returned immediately.
|
|
46
|
+
3. If the smallest distance is less than the fuzzy distance ratio, the cached value of the key with the
|
|
47
|
+
smallest distance is returned.
|
|
48
|
+
4. Otherwise, None is returned.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
key (str): The input key to be matched.
|
|
52
|
+
cached_keys (set[str]): The set of cached keys to be matched.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
str | None: The key with the smallest Levenshtein distance, if the distance is less than the fuzzy
|
|
56
|
+
distance ratio, otherwise None.
|
|
57
|
+
"""
|
|
58
|
+
async def delete(self, key: str | list[str]) -> None:
|
|
59
|
+
"""Delete the keys stored as additional information during the matching process.
|
|
60
|
+
|
|
61
|
+
This method does nothing as fuzzy matching doesn't require deleting additional information.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
key (str | list[str]): The key(s) to be deleted.
|
|
65
|
+
"""
|
|
66
|
+
async def clear(self) -> None:
|
|
67
|
+
"""Clear all the keys that are stored as additional information during the matching process.
|
|
68
|
+
|
|
69
|
+
This method does nothing as fuzzy matching doesn't require clearing additional information.
|
|
70
|
+
"""
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
class BaseKeyMatcher(ABC):
|
|
4
|
+
"""A base class for key matcher classes used in hybrid caches.
|
|
5
|
+
|
|
6
|
+
The key matcher is a framework that can be used by hybrid caches to retrieve the key that matches
|
|
7
|
+
the input key using different strategies as defined in the subclasses.
|
|
8
|
+
"""
|
|
9
|
+
@abstractmethod
|
|
10
|
+
async def store(self, key: str) -> None:
|
|
11
|
+
"""Store the key as additional information during the matching process.
|
|
12
|
+
|
|
13
|
+
This method must be implemented by the subclasses to define the logic for storing the key as additional
|
|
14
|
+
information during the matching process.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
key (str): The key to be stored as additional information during the matching process.
|
|
18
|
+
|
|
19
|
+
Raises:
|
|
20
|
+
NotImplementedError: If the method is not implemented.
|
|
21
|
+
"""
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def retrieve(self, key: str, cached_keys: set[str]) -> str | None:
|
|
24
|
+
"""Retrieve the key that matches the input key.
|
|
25
|
+
|
|
26
|
+
This method must be implemented by the subclasses to define the logic for retrieving the matched key.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
key (str): The input key to be matched.
|
|
30
|
+
cached_keys (set[str]): The set of cached keys to be matched.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
str | None: The matched key, if found. Otherwise, None.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
NotImplementedError: If the method is not implemented.
|
|
37
|
+
"""
|
|
38
|
+
@abstractmethod
|
|
39
|
+
async def delete(self, key: str | list[str]) -> None:
|
|
40
|
+
"""Delete the key stored as additional information during the matching process.
|
|
41
|
+
|
|
42
|
+
This method must be implemented by the subclasses to define the logic for deleting the key stored as additional
|
|
43
|
+
information during the matching process.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
key (str | list[str]): The key(s) to be deleted.
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
NotImplementedError: If the method is not implemented.
|
|
50
|
+
"""
|
|
51
|
+
@abstractmethod
|
|
52
|
+
async def clear(self) -> None:
|
|
53
|
+
"""Clear all the keys that are stored as additional information during the matching process.
|
|
54
|
+
|
|
55
|
+
This method must be implemented by the subclasses to define the logic for clearing all the keys that are
|
|
56
|
+
stored as additional information during the matching process.
|
|
57
|
+
|
|
58
|
+
Raises:
|
|
59
|
+
NotImplementedError: If the method is not implemented.
|
|
60
|
+
"""
|