graphrag-cache 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphrag_cache-3.0.0/.gitignore +65 -0
- graphrag_cache-3.0.0/LICENSE +21 -0
- graphrag_cache-3.0.0/PKG-INFO +127 -0
- graphrag_cache-3.0.0/README.md +109 -0
- graphrag_cache-3.0.0/graphrag_cache/__init__.py +20 -0
- graphrag_cache-3.0.0/graphrag_cache/cache.py +74 -0
- graphrag_cache-3.0.0/graphrag_cache/cache_config.py +26 -0
- graphrag_cache-3.0.0/graphrag_cache/cache_factory.py +95 -0
- graphrag_cache-3.0.0/graphrag_cache/cache_key.py +36 -0
- graphrag_cache-3.0.0/graphrag_cache/cache_type.py +15 -0
- graphrag_cache-3.0.0/graphrag_cache/json_cache.py +72 -0
- graphrag_cache-3.0.0/graphrag_cache/memory_cache.py +69 -0
- graphrag_cache-3.0.0/graphrag_cache/noop_cache.py +68 -0
- graphrag_cache-3.0.0/graphrag_cache/py.typed +0 -0
- graphrag_cache-3.0.0/pyproject.toml +43 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Python Artifacts
|
|
2
|
+
python/*/lib/
|
|
3
|
+
dist/
|
|
4
|
+
build/
|
|
5
|
+
*.egg-info/
|
|
6
|
+
|
|
7
|
+
# Test Output
|
|
8
|
+
.coverage
|
|
9
|
+
coverage/
|
|
10
|
+
licenses.txt
|
|
11
|
+
examples_notebooks/*/data
|
|
12
|
+
tests/fixtures/cache
|
|
13
|
+
tests/fixtures/*/cache
|
|
14
|
+
tests/fixtures/*/output
|
|
15
|
+
output/lancedb
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Random
|
|
19
|
+
.DS_Store
|
|
20
|
+
*.log*
|
|
21
|
+
.venv
|
|
22
|
+
venv/
|
|
23
|
+
.conda
|
|
24
|
+
.tmp
|
|
25
|
+
packages/graphrag-llm/notebooks/metrics
|
|
26
|
+
packages/graphrag-llm/notebooks/cache
|
|
27
|
+
|
|
28
|
+
.env
|
|
29
|
+
build.zip
|
|
30
|
+
|
|
31
|
+
.turbo
|
|
32
|
+
|
|
33
|
+
__pycache__
|
|
34
|
+
|
|
35
|
+
.pipeline
|
|
36
|
+
|
|
37
|
+
# Azurite
|
|
38
|
+
temp_azurite/
|
|
39
|
+
__azurite*.json
|
|
40
|
+
__blobstorage*.json
|
|
41
|
+
__blobstorage__/
|
|
42
|
+
|
|
43
|
+
# Getting started example
|
|
44
|
+
ragtest/
|
|
45
|
+
.ragtest/
|
|
46
|
+
.pipelines
|
|
47
|
+
.pipeline
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# mkdocs
|
|
51
|
+
site/
|
|
52
|
+
|
|
53
|
+
# Docs migration
|
|
54
|
+
docsite/
|
|
55
|
+
.yarn/
|
|
56
|
+
.pnp*
|
|
57
|
+
|
|
58
|
+
# PyCharm
|
|
59
|
+
.idea/
|
|
60
|
+
|
|
61
|
+
# Jupyter notebook
|
|
62
|
+
.ipynb_checkpoints/
|
|
63
|
+
|
|
64
|
+
# Root build assets
|
|
65
|
+
packages/*/LICENSE
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) Microsoft Corporation.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: graphrag-cache
|
|
3
|
+
Version: 3.0.0
|
|
4
|
+
Summary: GraphRAG cache package.
|
|
5
|
+
Project-URL: Source, https://github.com/microsoft/graphrag
|
|
6
|
+
Author: Mónica Carvajal
|
|
7
|
+
Author-email: Alonso Guevara Fernández <alonsog@microsoft.com>, Andrés Morales Esquivel <andresmor@microsoft.com>, Chris Trevino <chtrevin@microsoft.com>, David Tittsworth <datittsw@microsoft.com>, Dayenne de Souza <ddesouza@microsoft.com>, Derek Worthen <deworthe@microsoft.com>, Gaudy Blanco Meneses <gaudyb@microsoft.com>, Ha Trinh <trinhha@microsoft.com>, Jonathan Larson <jolarso@microsoft.com>, Josh Bradley <joshbradley@microsoft.com>, Kate Lytvynets <kalytv@microsoft.com>, Kenny Zhang <zhangken@microsoft.com>, Nathan Evans <naevans@microsoft.com>, Rodrigo Racanicci <rracanicci@microsoft.com>, Sarah Smith <smithsarah@microsoft.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Requires-Python: <3.14,>=3.11
|
|
15
|
+
Requires-Dist: graphrag-common==3.0.0
|
|
16
|
+
Requires-Dist: graphrag-storage==3.0.0
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# GraphRAG Cache
|
|
20
|
+
|
|
21
|
+
This package contains a collection of utilities to handle GraphRAG caching implementation.
|
|
22
|
+
|
|
23
|
+
### Basic
|
|
24
|
+
|
|
25
|
+
This example shows how to create a JSON cache with file storage using the GraphRAG cache package's configuration system.
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import asyncio
|
|
29
|
+
from graphrag_storage import StorageConfig, create_storage, StorageType
|
|
30
|
+
from graphrag_cache import CacheConfig, create_cache, CacheType, create_cache_key
|
|
31
|
+
|
|
32
|
+
async def run():
|
|
33
|
+
cache = create_cache()
|
|
34
|
+
|
|
35
|
+
# The above is equivalent to the following:
|
|
36
|
+
cache = create_cache(
|
|
37
|
+
CacheConfig(
|
|
38
|
+
type=CacheType.Json,
|
|
39
|
+
storage=StorageConfig(
|
|
40
|
+
type=StorageType.File,
|
|
41
|
+
base_dir="cache"
|
|
42
|
+
)
|
|
43
|
+
),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
await cache.set("my_key", {"some": "object to cache"})
|
|
47
|
+
print(await cache.get("my_key"))
|
|
48
|
+
|
|
49
|
+
# create cache key from data dict.
|
|
50
|
+
cache_key = create_cache_key({
|
|
51
|
+
"some_arg": "some_value",
|
|
52
|
+
"something_else": 5
|
|
53
|
+
})
|
|
54
|
+
await cache.set(cache_key, {"some": "object to cache"})
|
|
55
|
+
print(await cache.get(cache_key))
|
|
56
|
+
|
|
57
|
+
if __name__ == "__main__":
|
|
58
|
+
asyncio.run(run())
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Custom Cache
|
|
62
|
+
|
|
63
|
+
This demonstrates how to create a custom cache implementation by extending the base Cache class and registering it with the GraphRAG cache system. Once registered, the custom cache can be instantiated through the factory pattern using either CacheConfig or directly via cache_factory, allowing for extensible caching solutions tailored to specific needs.
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import asyncio
|
|
67
|
+
from typing import Any
|
|
68
|
+
from graphrag_storage import Storage
|
|
69
|
+
from graphrag_cache import Cache, CacheConfig, create_cache, register_cache
|
|
70
|
+
|
|
71
|
+
class MyCache(Cache):
|
|
72
|
+
def __init__(self, some_setting: str, optional_setting: str = "default setting", **kwargs: Any):
|
|
73
|
+
# Validate settings and initialize
|
|
74
|
+
# View the JsonCache implementation to see how to create a cache that relies on a Storage provider.
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
#Implement rest of interface
|
|
78
|
+
...
|
|
79
|
+
|
|
80
|
+
register_cache("MyCache", MyCache)
|
|
81
|
+
|
|
82
|
+
async def run():
|
|
83
|
+
cache = create_cache(
|
|
84
|
+
CacheConfig(
|
|
85
|
+
type="MyCache",
|
|
86
|
+
some_setting="My Setting"
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Or use the factory directly to instantiate with a dict instead of using
|
|
91
|
+
# CacheConfig + create_factory
|
|
92
|
+
# from graphrag_cache.cache_factory import cache_factory
|
|
93
|
+
# cache = cache_factory.create(strategy="MyCache", init_args={"some_setting": "My Setting"})
|
|
94
|
+
|
|
95
|
+
await cache.set("my_key", {"some": "object to cache"})
|
|
96
|
+
print(await cache.get("my_key"))
|
|
97
|
+
|
|
98
|
+
if __name__ == "__main__":
|
|
99
|
+
asyncio.run(run())
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
#### Details
|
|
103
|
+
|
|
104
|
+
By default, the `create_cache` comes with the following cache providers registered that correspond to the entries in the `CacheType` enum.
|
|
105
|
+
|
|
106
|
+
- `JsonCache`
|
|
107
|
+
- `MemoryCache`
|
|
108
|
+
- `NoopCache`
|
|
109
|
+
|
|
110
|
+
The preregistration happens dynamically, e.g., `JsonCache` is only imported and registered if you request a `JsonCache` with `create_cache(CacheType.Json, ...)`. There is no need to manually import and register builtin cache providers when using `create_cache`.
|
|
111
|
+
|
|
112
|
+
If you want a clean factory with no preregistered cache providers then directly import `cache_factory` and bypass using `create_cache`. The downside is that `cache_factory.create` uses a dict for init args instead of the strongly typed `CacheConfig` used with `create_cache`.
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from graphrag_cache.cache_factory import cache_factory
|
|
116
|
+
from graphrag_cache.json_cache import JsonCache
|
|
117
|
+
|
|
118
|
+
# cache_factory has no preregistered providers so you must register any
|
|
119
|
+
# providers you plan on using.
|
|
120
|
+
# May also register a custom implementation, see above for example.
|
|
121
|
+
cache_factory.register("my_cache_impl", JsonCache)
|
|
122
|
+
|
|
123
|
+
cache = cache_factory.create(strategy="my_cache_impl", init_args={"some_setting": "..."})
|
|
124
|
+
|
|
125
|
+
...
|
|
126
|
+
|
|
127
|
+
```
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# GraphRAG Cache
|
|
2
|
+
|
|
3
|
+
This package contains a collection of utilities to handle GraphRAG caching implementation.
|
|
4
|
+
|
|
5
|
+
### Basic
|
|
6
|
+
|
|
7
|
+
This example shows how to create a JSON cache with file storage using the GraphRAG cache package's configuration system.
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
import asyncio
|
|
11
|
+
from graphrag_storage import StorageConfig, create_storage, StorageType
|
|
12
|
+
from graphrag_cache import CacheConfig, create_cache, CacheType, create_cache_key
|
|
13
|
+
|
|
14
|
+
async def run():
|
|
15
|
+
cache = create_cache()
|
|
16
|
+
|
|
17
|
+
# The above is equivalent to the following:
|
|
18
|
+
cache = create_cache(
|
|
19
|
+
CacheConfig(
|
|
20
|
+
type=CacheType.Json,
|
|
21
|
+
storage=StorageConfig(
|
|
22
|
+
type=StorageType.File,
|
|
23
|
+
base_dir="cache"
|
|
24
|
+
)
|
|
25
|
+
),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
await cache.set("my_key", {"some": "object to cache"})
|
|
29
|
+
print(await cache.get("my_key"))
|
|
30
|
+
|
|
31
|
+
# create cache key from data dict.
|
|
32
|
+
cache_key = create_cache_key({
|
|
33
|
+
"some_arg": "some_value",
|
|
34
|
+
"something_else": 5
|
|
35
|
+
})
|
|
36
|
+
await cache.set(cache_key, {"some": "object to cache"})
|
|
37
|
+
print(await cache.get(cache_key))
|
|
38
|
+
|
|
39
|
+
if __name__ == "__main__":
|
|
40
|
+
asyncio.run(run())
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Custom Cache
|
|
44
|
+
|
|
45
|
+
This demonstrates how to create a custom cache implementation by extending the base Cache class and registering it with the GraphRAG cache system. Once registered, the custom cache can be instantiated through the factory pattern using either CacheConfig or directly via cache_factory, allowing for extensible caching solutions tailored to specific needs.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import asyncio
|
|
49
|
+
from typing import Any
|
|
50
|
+
from graphrag_storage import Storage
|
|
51
|
+
from graphrag_cache import Cache, CacheConfig, create_cache, register_cache
|
|
52
|
+
|
|
53
|
+
class MyCache(Cache):
|
|
54
|
+
def __init__(self, some_setting: str, optional_setting: str = "default setting", **kwargs: Any):
|
|
55
|
+
# Validate settings and initialize
|
|
56
|
+
# View the JsonCache implementation to see how to create a cache that relies on a Storage provider.
|
|
57
|
+
...
|
|
58
|
+
|
|
59
|
+
#Implement rest of interface
|
|
60
|
+
...
|
|
61
|
+
|
|
62
|
+
register_cache("MyCache", MyCache)
|
|
63
|
+
|
|
64
|
+
async def run():
|
|
65
|
+
cache = create_cache(
|
|
66
|
+
CacheConfig(
|
|
67
|
+
type="MyCache",
|
|
68
|
+
some_setting="My Setting"
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Or use the factory directly to instantiate with a dict instead of using
|
|
73
|
+
# CacheConfig + create_factory
|
|
74
|
+
# from graphrag_cache.cache_factory import cache_factory
|
|
75
|
+
# cache = cache_factory.create(strategy="MyCache", init_args={"some_setting": "My Setting"})
|
|
76
|
+
|
|
77
|
+
await cache.set("my_key", {"some": "object to cache"})
|
|
78
|
+
print(await cache.get("my_key"))
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
asyncio.run(run())
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
#### Details
|
|
85
|
+
|
|
86
|
+
By default, the `create_cache` comes with the following cache providers registered that correspond to the entries in the `CacheType` enum.
|
|
87
|
+
|
|
88
|
+
- `JsonCache`
|
|
89
|
+
- `MemoryCache`
|
|
90
|
+
- `NoopCache`
|
|
91
|
+
|
|
92
|
+
The preregistration happens dynamically, e.g., `JsonCache` is only imported and registered if you request a `JsonCache` with `create_cache(CacheType.Json, ...)`. There is no need to manually import and register builtin cache providers when using `create_cache`.
|
|
93
|
+
|
|
94
|
+
If you want a clean factory with no preregistered cache providers then directly import `cache_factory` and bypass using `create_cache`. The downside is that `cache_factory.create` uses a dict for init args instead of the strongly typed `CacheConfig` used with `create_cache`.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from graphrag_cache.cache_factory import cache_factory
|
|
98
|
+
from graphrag_cache.json_cache import JsonCache
|
|
99
|
+
|
|
100
|
+
# cache_factory has no preregistered providers so you must register any
|
|
101
|
+
# providers you plan on using.
|
|
102
|
+
# May also register a custom implementation, see above for example.
|
|
103
|
+
cache_factory.register("my_cache_impl", JsonCache)
|
|
104
|
+
|
|
105
|
+
cache = cache_factory.create(strategy="my_cache_impl", init_args={"some_setting": "..."})
|
|
106
|
+
|
|
107
|
+
...
|
|
108
|
+
|
|
109
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
"""The GraphRAG Cache package."""
|
|
5
|
+
|
|
6
|
+
from graphrag_cache.cache import Cache
|
|
7
|
+
from graphrag_cache.cache_config import CacheConfig
|
|
8
|
+
from graphrag_cache.cache_factory import create_cache, register_cache
|
|
9
|
+
from graphrag_cache.cache_key import CacheKeyCreator, create_cache_key
|
|
10
|
+
from graphrag_cache.cache_type import CacheType
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Cache",
|
|
14
|
+
"CacheConfig",
|
|
15
|
+
"CacheKeyCreator",
|
|
16
|
+
"CacheType",
|
|
17
|
+
"create_cache",
|
|
18
|
+
"create_cache_key",
|
|
19
|
+
"register_cache",
|
|
20
|
+
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
"""Abstract base class for cache."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from graphrag_storage import Storage
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Cache(ABC):
|
|
16
|
+
"""Provide a cache interface for the pipeline."""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def __init__(self, *, storage: Storage | None, **kwargs: Any) -> None:
|
|
20
|
+
"""Create a cache instance."""
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def get(self, key: str) -> Any:
|
|
24
|
+
"""Get the value for the given key.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
- key - The key to get the value for.
|
|
28
|
+
- as_bytes - Whether or not to return the value as bytes.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
- output - The value for the given key.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
async def set(self, key: str, value: Any, debug_data: dict | None = None) -> None:
|
|
37
|
+
"""Set the value for the given key.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
- key - The key to set the value for.
|
|
41
|
+
- value - The value to set.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def has(self, key: str) -> bool:
|
|
46
|
+
"""Return True if the given key exists in the cache.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
- key - The key to check for.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
- output - True if the key exists in the cache, False otherwise.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
async def delete(self, key: str) -> None:
|
|
58
|
+
"""Delete the given key from the cache.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
- key - The key to delete.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
async def clear(self) -> None:
|
|
66
|
+
"""Clear the cache."""
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def child(self, name: str) -> Cache:
|
|
70
|
+
"""Create a child cache with the given name.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
- name - The name to create the sub cache with.
|
|
74
|
+
"""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
"""Cache configuration model."""
|
|
5
|
+
|
|
6
|
+
from graphrag_storage import StorageConfig, StorageType
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
|
+
|
|
9
|
+
from graphrag_cache.cache_type import CacheType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CacheConfig(BaseModel):
|
|
13
|
+
"""The configuration section for cache."""
|
|
14
|
+
|
|
15
|
+
model_config = ConfigDict(extra="allow")
|
|
16
|
+
"""Allow extra fields to support custom cache implementations."""
|
|
17
|
+
|
|
18
|
+
type: str = Field(
|
|
19
|
+
description="The cache type to use. Builtin types include 'Json', 'Memory', and 'Noop'.",
|
|
20
|
+
default=CacheType.Json,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
storage: StorageConfig | None = Field(
|
|
24
|
+
description="The storage configuration to use for file-based caches such as 'Json'.",
|
|
25
|
+
default_factory=lambda: StorageConfig(type=StorageType.File, base_dir="cache"),
|
|
26
|
+
)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
"""Cache factory implementation."""
|
|
6
|
+
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from graphrag_common.factory import Factory
|
|
11
|
+
from graphrag_storage import create_storage
|
|
12
|
+
|
|
13
|
+
from graphrag_cache.cache_config import CacheConfig
|
|
14
|
+
from graphrag_cache.cache_type import CacheType
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from graphrag_common.factory import ServiceScope
|
|
18
|
+
from graphrag_storage import Storage
|
|
19
|
+
|
|
20
|
+
from graphrag_cache.cache import Cache
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CacheFactory(Factory["Cache"]):
|
|
24
|
+
"""A factory class for cache implementations."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
cache_factory = CacheFactory()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def register_cache(
|
|
31
|
+
cache_type: str,
|
|
32
|
+
cache_initializer: Callable[..., "Cache"],
|
|
33
|
+
scope: "ServiceScope" = "transient",
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Register a custom cache implementation.
|
|
36
|
+
|
|
37
|
+
Args
|
|
38
|
+
----
|
|
39
|
+
- cache_type: str
|
|
40
|
+
The cache id to register.
|
|
41
|
+
- cache_initializer: Callable[..., Cache]
|
|
42
|
+
The cache initializer to register.
|
|
43
|
+
"""
|
|
44
|
+
cache_factory.register(cache_type, cache_initializer, scope)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def create_cache(
|
|
48
|
+
config: CacheConfig | None = None, storage: "Storage | None" = None
|
|
49
|
+
) -> "Cache":
|
|
50
|
+
"""Create a cache implementation based on the given configuration.
|
|
51
|
+
|
|
52
|
+
Args
|
|
53
|
+
----
|
|
54
|
+
- config: CacheConfig
|
|
55
|
+
The cache configuration to use.
|
|
56
|
+
- storage: Storage | None
|
|
57
|
+
The storage implementation to use for file-based caches such as 'Json'.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
Cache
|
|
62
|
+
The created cache implementation.
|
|
63
|
+
"""
|
|
64
|
+
config = config or CacheConfig()
|
|
65
|
+
config_model = config.model_dump()
|
|
66
|
+
cache_strategy = config.type
|
|
67
|
+
|
|
68
|
+
if not storage and config.storage:
|
|
69
|
+
storage = create_storage(config.storage)
|
|
70
|
+
|
|
71
|
+
if cache_strategy not in cache_factory:
|
|
72
|
+
match cache_strategy:
|
|
73
|
+
case CacheType.Json:
|
|
74
|
+
from graphrag_cache.json_cache import JsonCache
|
|
75
|
+
|
|
76
|
+
register_cache(CacheType.Json, JsonCache)
|
|
77
|
+
|
|
78
|
+
case CacheType.Memory:
|
|
79
|
+
from graphrag_cache.memory_cache import MemoryCache
|
|
80
|
+
|
|
81
|
+
register_cache(CacheType.Memory, MemoryCache)
|
|
82
|
+
|
|
83
|
+
case CacheType.Noop:
|
|
84
|
+
from graphrag_cache.noop_cache import NoopCache
|
|
85
|
+
|
|
86
|
+
register_cache(CacheType.Noop, NoopCache)
|
|
87
|
+
|
|
88
|
+
case _:
|
|
89
|
+
msg = f"CacheConfig.type '{cache_strategy}' is not registered in the CacheFactory. Registered types: {', '.join(cache_factory.keys())}."
|
|
90
|
+
raise ValueError(msg)
|
|
91
|
+
|
|
92
|
+
if storage:
|
|
93
|
+
config_model["storage"] = storage
|
|
94
|
+
|
|
95
|
+
return cache_factory.create(strategy=cache_strategy, init_args=config_model)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
"""Create cache key."""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Protocol, runtime_checkable
|
|
7
|
+
|
|
8
|
+
from graphrag_common.hasher import hash_data
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@runtime_checkable
|
|
12
|
+
class CacheKeyCreator(Protocol):
|
|
13
|
+
"""Create cache key function protocol.
|
|
14
|
+
|
|
15
|
+
Args
|
|
16
|
+
----
|
|
17
|
+
input_args: dict[str, Any]
|
|
18
|
+
The input arguments for creating the cache key.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
str
|
|
23
|
+
The generated cache key.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __call__(
|
|
27
|
+
self,
|
|
28
|
+
input_args: dict[str, Any],
|
|
29
|
+
) -> str:
|
|
30
|
+
"""Create cache key."""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def create_cache_key(input_args: dict[str, Any]) -> str:
|
|
35
|
+
"""Create a cache key based on the input arguments."""
|
|
36
|
+
return hash_data(input_args)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
"""Builtin cache implementation types."""
|
|
6
|
+
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CacheType(StrEnum):
|
|
11
|
+
"""Enum for cache types."""
|
|
12
|
+
|
|
13
|
+
Json = "json"
|
|
14
|
+
Memory = "memory"
|
|
15
|
+
Noop = "none"
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
"""A module containing 'JsonCache' model."""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from graphrag_storage import Storage, StorageConfig, create_storage
|
|
10
|
+
|
|
11
|
+
from graphrag_cache.cache import Cache
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class JsonCache(Cache):
|
|
15
|
+
"""File pipeline cache class definition."""
|
|
16
|
+
|
|
17
|
+
_storage: Storage
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
storage: Storage | dict[str, Any] | None = None,
|
|
22
|
+
**kwargs: Any,
|
|
23
|
+
) -> None:
|
|
24
|
+
"""Init method definition."""
|
|
25
|
+
if storage is None:
|
|
26
|
+
msg = "JsonCache requires either a Storage instance to be provided or a StorageConfig to create one."
|
|
27
|
+
raise ValueError(msg)
|
|
28
|
+
if isinstance(storage, Storage):
|
|
29
|
+
self._storage = storage
|
|
30
|
+
else:
|
|
31
|
+
self._storage = create_storage(StorageConfig(**storage))
|
|
32
|
+
|
|
33
|
+
async def get(self, key: str) -> Any | None:
|
|
34
|
+
"""Get method definition."""
|
|
35
|
+
if await self.has(key):
|
|
36
|
+
try:
|
|
37
|
+
data = await self._storage.get(key)
|
|
38
|
+
data = json.loads(data)
|
|
39
|
+
except UnicodeDecodeError:
|
|
40
|
+
await self._storage.delete(key)
|
|
41
|
+
return None
|
|
42
|
+
except json.decoder.JSONDecodeError:
|
|
43
|
+
await self._storage.delete(key)
|
|
44
|
+
return None
|
|
45
|
+
else:
|
|
46
|
+
return data.get("result")
|
|
47
|
+
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
async def set(self, key: str, value: Any, debug_data: dict | None = None) -> None:
|
|
51
|
+
"""Set method definition."""
|
|
52
|
+
if value is None:
|
|
53
|
+
return
|
|
54
|
+
data = {"result": value, **(debug_data or {})}
|
|
55
|
+
await self._storage.set(key, json.dumps(data, ensure_ascii=False))
|
|
56
|
+
|
|
57
|
+
async def has(self, key: str) -> bool:
|
|
58
|
+
"""Has method definition."""
|
|
59
|
+
return await self._storage.has(key)
|
|
60
|
+
|
|
61
|
+
async def delete(self, key: str) -> None:
|
|
62
|
+
"""Delete method definition."""
|
|
63
|
+
if await self.has(key):
|
|
64
|
+
await self._storage.delete(key)
|
|
65
|
+
|
|
66
|
+
async def clear(self) -> None:
|
|
67
|
+
"""Clear method definition."""
|
|
68
|
+
await self._storage.clear()
|
|
69
|
+
|
|
70
|
+
def child(self, name: str) -> "Cache":
|
|
71
|
+
"""Child method definition."""
|
|
72
|
+
return JsonCache(storage=self._storage.child(name))
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
"""MemoryCache implementation."""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from graphrag_cache.cache import Cache
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MemoryCache(Cache):
|
|
12
|
+
"""In memory cache class definition."""
|
|
13
|
+
|
|
14
|
+
_cache: dict[str, Any]
|
|
15
|
+
_name: str
|
|
16
|
+
|
|
17
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
18
|
+
"""Init method definition."""
|
|
19
|
+
self._cache = {}
|
|
20
|
+
|
|
21
|
+
async def get(self, key: str) -> Any:
|
|
22
|
+
"""Get the value for the given key.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
- key - The key to get the value for.
|
|
26
|
+
- as_bytes - Whether or not to return the value as bytes.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
- output - The value for the given key.
|
|
31
|
+
"""
|
|
32
|
+
return self._cache.get(key)
|
|
33
|
+
|
|
34
|
+
async def set(self, key: str, value: Any, debug_data: dict | None = None) -> None:
|
|
35
|
+
"""Set the value for the given key.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
- key - The key to set the value for.
|
|
39
|
+
- value - The value to set.
|
|
40
|
+
"""
|
|
41
|
+
self._cache[key] = value
|
|
42
|
+
|
|
43
|
+
async def has(self, key: str) -> bool:
|
|
44
|
+
"""Return True if the given key exists in the storage.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
- key - The key to check for.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
- output - True if the key exists in the storage, False otherwise.
|
|
52
|
+
"""
|
|
53
|
+
return key in self._cache
|
|
54
|
+
|
|
55
|
+
async def delete(self, key: str) -> None:
|
|
56
|
+
"""Delete the given key from the storage.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
- key - The key to delete.
|
|
60
|
+
"""
|
|
61
|
+
del self._cache[key]
|
|
62
|
+
|
|
63
|
+
async def clear(self) -> None:
|
|
64
|
+
"""Clear the storage."""
|
|
65
|
+
self._cache.clear()
|
|
66
|
+
|
|
67
|
+
def child(self, name: str) -> "Cache":
|
|
68
|
+
"""Create a sub cache with the given name."""
|
|
69
|
+
return MemoryCache()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright (c) 2024 Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License
|
|
3
|
+
|
|
4
|
+
"""NoopCache implementation."""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from graphrag_cache.cache import Cache
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NoopCache(Cache):
|
|
12
|
+
"""A no-op implementation of Cache, usually useful for testing."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
15
|
+
"""Init method definition."""
|
|
16
|
+
|
|
17
|
+
async def get(self, key: str) -> Any:
|
|
18
|
+
"""Get the value for the given key.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
- key - The key to get the value for.
|
|
22
|
+
- as_bytes - Whether or not to return the value as bytes.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
- output - The value for the given key.
|
|
27
|
+
"""
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
async def set(
|
|
31
|
+
self, key: str, value: str | bytes | None, debug_data: dict | None = None
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Set the value for the given key.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
- key - The key to set the value for.
|
|
37
|
+
- value - The value to set.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
async def has(self, key: str) -> bool:
|
|
41
|
+
"""Return True if the given key exists in the cache.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
- key - The key to check for.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
- output - True if the key exists in the cache, False otherwise.
|
|
49
|
+
"""
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
async def delete(self, key: str) -> None:
|
|
53
|
+
"""Delete the given key from the cache.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
- key - The key to delete.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
async def clear(self) -> None:
|
|
60
|
+
"""Clear the cache."""
|
|
61
|
+
|
|
62
|
+
def child(self, name: str) -> "Cache":
|
|
63
|
+
"""Create a child cache with the given name.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
- name - The name to create the sub cache with.
|
|
67
|
+
"""
|
|
68
|
+
return self
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "graphrag-cache"
|
|
3
|
+
version = "3.0.0"
|
|
4
|
+
description = "GraphRAG cache package."
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
|
|
7
|
+
{name = "Andrés Morales Esquivel", email = "andresmor@microsoft.com"},
|
|
8
|
+
{name = "Chris Trevino", email = "chtrevin@microsoft.com"},
|
|
9
|
+
{name = "David Tittsworth", email = "datittsw@microsoft.com"},
|
|
10
|
+
{name = "Dayenne de Souza", email = "ddesouza@microsoft.com"},
|
|
11
|
+
{name = "Derek Worthen", email = "deworthe@microsoft.com"},
|
|
12
|
+
{name = "Gaudy Blanco Meneses", email = "gaudyb@microsoft.com"},
|
|
13
|
+
{name = "Ha Trinh", email = "trinhha@microsoft.com"},
|
|
14
|
+
{name = "Jonathan Larson", email = "jolarso@microsoft.com"},
|
|
15
|
+
{name = "Josh Bradley", email = "joshbradley@microsoft.com"},
|
|
16
|
+
{name = "Kate Lytvynets", email = "kalytv@microsoft.com"},
|
|
17
|
+
{name = "Kenny Zhang", email = "zhangken@microsoft.com"},
|
|
18
|
+
{name = "Mónica Carvajal"},
|
|
19
|
+
{name = "Nathan Evans", email = "naevans@microsoft.com"},
|
|
20
|
+
{name = "Rodrigo Racanicci", email = "rracanicci@microsoft.com"},
|
|
21
|
+
{name = "Sarah Smith", email = "smithsarah@microsoft.com"},
|
|
22
|
+
]
|
|
23
|
+
license = "MIT"
|
|
24
|
+
readme = "README.md"
|
|
25
|
+
license-files = ["LICENSE"]
|
|
26
|
+
requires-python = ">=3.11,<3.14"
|
|
27
|
+
classifiers = [
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Programming Language :: Python :: 3.13",
|
|
32
|
+
]
|
|
33
|
+
dependencies = [
|
|
34
|
+
"graphrag-common==3.0.0",
|
|
35
|
+
"graphrag-storage==3.0.0",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
Source = "https://github.com/microsoft/graphrag"
|
|
40
|
+
|
|
41
|
+
[build-system]
|
|
42
|
+
requires = ["hatchling>=1.27.0,<2.0.0"]
|
|
43
|
+
build-backend = "hatchling.build"
|