naas-abi-core 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- assets/favicon.ico +0 -0
- assets/logo.png +0 -0
- naas_abi_core/__init__.py +1 -0
- naas_abi_core/apps/api/api.py +245 -0
- naas_abi_core/apps/api/api_test.py +281 -0
- naas_abi_core/apps/api/openapi_doc.py +144 -0
- naas_abi_core/apps/mcp/Dockerfile.mcp +35 -0
- naas_abi_core/apps/mcp/mcp_server.py +243 -0
- naas_abi_core/apps/mcp/mcp_server_test.py +163 -0
- naas_abi_core/apps/terminal_agent/main.py +555 -0
- naas_abi_core/apps/terminal_agent/terminal_style.py +175 -0
- naas_abi_core/engine/Engine.py +87 -0
- naas_abi_core/engine/EngineProxy.py +109 -0
- naas_abi_core/engine/Engine_test.py +6 -0
- naas_abi_core/engine/IEngine.py +91 -0
- naas_abi_core/engine/conftest.py +45 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration.py +216 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_Deploy.py +7 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_GenericLoader.py +49 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_ObjectStorageService.py +159 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_ObjectStorageService_test.py +26 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_SecretService.py +138 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_SecretService_test.py +74 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_TripleStoreService.py +224 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_TripleStoreService_test.py +109 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_VectorStoreService.py +76 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_VectorStoreService_test.py +33 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_test.py +9 -0
- naas_abi_core/engine/engine_configuration/utils/PydanticModelValidator.py +15 -0
- naas_abi_core/engine/engine_loaders/EngineModuleLoader.py +302 -0
- naas_abi_core/engine/engine_loaders/EngineOntologyLoader.py +16 -0
- naas_abi_core/engine/engine_loaders/EngineServiceLoader.py +47 -0
- naas_abi_core/integration/__init__.py +7 -0
- naas_abi_core/integration/integration.py +28 -0
- naas_abi_core/models/Model.py +198 -0
- naas_abi_core/models/OpenRouter.py +18 -0
- naas_abi_core/models/OpenRouter_test.py +36 -0
- naas_abi_core/module/Module.py +252 -0
- naas_abi_core/module/ModuleAgentLoader.py +50 -0
- naas_abi_core/module/ModuleUtils.py +20 -0
- naas_abi_core/modules/templatablesparqlquery/README.md +196 -0
- naas_abi_core/modules/templatablesparqlquery/__init__.py +39 -0
- naas_abi_core/modules/templatablesparqlquery/ontologies/TemplatableSparqlQueryOntology.ttl +116 -0
- naas_abi_core/modules/templatablesparqlquery/workflows/GenericWorkflow.py +48 -0
- naas_abi_core/modules/templatablesparqlquery/workflows/TemplatableSparqlQueryLoader.py +192 -0
- naas_abi_core/pipeline/__init__.py +6 -0
- naas_abi_core/pipeline/pipeline.py +70 -0
- naas_abi_core/services/__init__.py +0 -0
- naas_abi_core/services/agent/Agent.py +1619 -0
- naas_abi_core/services/agent/AgentMemory_test.py +28 -0
- naas_abi_core/services/agent/Agent_test.py +214 -0
- naas_abi_core/services/agent/IntentAgent.py +1179 -0
- naas_abi_core/services/agent/IntentAgent_test.py +139 -0
- naas_abi_core/services/agent/beta/Embeddings.py +181 -0
- naas_abi_core/services/agent/beta/IntentMapper.py +120 -0
- naas_abi_core/services/agent/beta/LocalModel.py +88 -0
- naas_abi_core/services/agent/beta/VectorStore.py +89 -0
- naas_abi_core/services/agent/test_agent_memory.py +278 -0
- naas_abi_core/services/agent/test_postgres_integration.py +145 -0
- naas_abi_core/services/cache/CacheFactory.py +31 -0
- naas_abi_core/services/cache/CachePort.py +63 -0
- naas_abi_core/services/cache/CacheService.py +246 -0
- naas_abi_core/services/cache/CacheService_test.py +85 -0
- naas_abi_core/services/cache/adapters/secondary/CacheFSAdapter.py +39 -0
- naas_abi_core/services/object_storage/ObjectStorageFactory.py +57 -0
- naas_abi_core/services/object_storage/ObjectStoragePort.py +47 -0
- naas_abi_core/services/object_storage/ObjectStorageService.py +41 -0
- naas_abi_core/services/object_storage/adapters/secondary/ObjectStorageSecondaryAdapterFS.py +52 -0
- naas_abi_core/services/object_storage/adapters/secondary/ObjectStorageSecondaryAdapterNaas.py +131 -0
- naas_abi_core/services/object_storage/adapters/secondary/ObjectStorageSecondaryAdapterS3.py +171 -0
- naas_abi_core/services/ontology/OntologyPorts.py +36 -0
- naas_abi_core/services/ontology/OntologyService.py +17 -0
- naas_abi_core/services/ontology/adaptors/secondary/OntologyService_SecondaryAdaptor_NERPort.py +37 -0
- naas_abi_core/services/secret/Secret.py +138 -0
- naas_abi_core/services/secret/SecretPorts.py +45 -0
- naas_abi_core/services/secret/Secret_test.py +65 -0
- naas_abi_core/services/secret/adaptors/secondary/Base64Secret.py +57 -0
- naas_abi_core/services/secret/adaptors/secondary/Base64Secret_test.py +39 -0
- naas_abi_core/services/secret/adaptors/secondary/NaasSecret.py +88 -0
- naas_abi_core/services/secret/adaptors/secondary/NaasSecret_test.py +25 -0
- naas_abi_core/services/secret/adaptors/secondary/dotenv_secret_secondaryadaptor.py +29 -0
- naas_abi_core/services/triple_store/TripleStoreFactory.py +116 -0
- naas_abi_core/services/triple_store/TripleStorePorts.py +223 -0
- naas_abi_core/services/triple_store/TripleStoreService.py +419 -0
- naas_abi_core/services/triple_store/adaptors/secondary/AWSNeptune.py +1300 -0
- naas_abi_core/services/triple_store/adaptors/secondary/AWSNeptune_test.py +284 -0
- naas_abi_core/services/triple_store/adaptors/secondary/Oxigraph.py +597 -0
- naas_abi_core/services/triple_store/adaptors/secondary/Oxigraph_test.py +1474 -0
- naas_abi_core/services/triple_store/adaptors/secondary/TripleStoreService__SecondaryAdaptor__Filesystem.py +223 -0
- naas_abi_core/services/triple_store/adaptors/secondary/TripleStoreService__SecondaryAdaptor__ObjectStorage.py +234 -0
- naas_abi_core/services/triple_store/adaptors/secondary/base/TripleStoreService__SecondaryAdaptor__FileBase.py +18 -0
- naas_abi_core/services/vector_store/IVectorStorePort.py +101 -0
- naas_abi_core/services/vector_store/IVectorStorePort_test.py +189 -0
- naas_abi_core/services/vector_store/VectorStoreFactory.py +47 -0
- naas_abi_core/services/vector_store/VectorStoreService.py +171 -0
- naas_abi_core/services/vector_store/VectorStoreService_test.py +185 -0
- naas_abi_core/services/vector_store/__init__.py +13 -0
- naas_abi_core/services/vector_store/adapters/QdrantAdapter.py +251 -0
- naas_abi_core/services/vector_store/adapters/QdrantAdapter_test.py +57 -0
- naas_abi_core/tests/test_services_imports.py +69 -0
- naas_abi_core/utils/Expose.py +55 -0
- naas_abi_core/utils/Graph.py +182 -0
- naas_abi_core/utils/JSON.py +49 -0
- naas_abi_core/utils/LazyLoader.py +44 -0
- naas_abi_core/utils/Logger.py +12 -0
- naas_abi_core/utils/OntologyReasoner.py +141 -0
- naas_abi_core/utils/OntologyYaml.py +681 -0
- naas_abi_core/utils/SPARQL.py +256 -0
- naas_abi_core/utils/Storage.py +33 -0
- naas_abi_core/utils/StorageUtils.py +398 -0
- naas_abi_core/utils/String.py +52 -0
- naas_abi_core/utils/Workers.py +114 -0
- naas_abi_core/utils/__init__.py +0 -0
- naas_abi_core/utils/onto2py/README.md +0 -0
- naas_abi_core/utils/onto2py/__init__.py +10 -0
- naas_abi_core/utils/onto2py/__main__.py +29 -0
- naas_abi_core/utils/onto2py/onto2py.py +611 -0
- naas_abi_core/utils/onto2py/tests/ttl2py_test.py +271 -0
- naas_abi_core/workflow/__init__.py +5 -0
- naas_abi_core/workflow/workflow.py +48 -0
- naas_abi_core-1.4.1.dist-info/METADATA +630 -0
- naas_abi_core-1.4.1.dist-info/RECORD +124 -0
- naas_abi_core-1.4.1.dist-info/WHEEL +4 -0
- naas_abi_core-1.4.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from threading import Lock
|
|
3
|
+
from typing import Any, Dict, List, Tuple
|
|
4
|
+
|
|
5
|
+
import rdflib
|
|
6
|
+
from naas_abi_core.services.triple_store.adaptors.secondary.base.TripleStoreService__SecondaryAdaptor__FileBase import (
|
|
7
|
+
TripleStoreService__SecondaryAdaptor__FileBase,
|
|
8
|
+
)
|
|
9
|
+
from naas_abi_core.services.triple_store.TripleStorePorts import (
|
|
10
|
+
Exceptions,
|
|
11
|
+
ITripleStorePort,
|
|
12
|
+
OntologyEvent,
|
|
13
|
+
)
|
|
14
|
+
from rdflib import RDFS, BNode, Graph, URIRef, query
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TripleStoreService__SecondaryAdaptor__Filesystem(
|
|
18
|
+
ITripleStorePort, TripleStoreService__SecondaryAdaptor__FileBase
|
|
19
|
+
):
|
|
20
|
+
__store_path: str
|
|
21
|
+
__triples_path: str
|
|
22
|
+
|
|
23
|
+
__live_graph: Graph
|
|
24
|
+
|
|
25
|
+
__lock: Lock
|
|
26
|
+
|
|
27
|
+
def __init__(self, store_path: str, triples_path: str = "triples"):
|
|
28
|
+
self.__store_path = store_path
|
|
29
|
+
self.__triples_path = triples_path
|
|
30
|
+
self.__lock = Lock()
|
|
31
|
+
|
|
32
|
+
os.makedirs(os.path.join(self.__store_path, self.__triples_path), exist_ok=True)
|
|
33
|
+
|
|
34
|
+
self.__live_graph = self.load()
|
|
35
|
+
|
|
36
|
+
def __merge_graphs(self, graphs: List[Graph]) -> Graph:
|
|
37
|
+
merged_graph = Graph()
|
|
38
|
+
for graph in graphs:
|
|
39
|
+
merged_graph += graph
|
|
40
|
+
|
|
41
|
+
return merged_graph
|
|
42
|
+
|
|
43
|
+
def hash_triples_path(self, hash_value: str) -> str:
|
|
44
|
+
return os.path.join(
|
|
45
|
+
self.__store_path,
|
|
46
|
+
"triples",
|
|
47
|
+
f"{hash_value}.ttl" if not hash_value.endswith(".ttl") else hash_value,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
## File System Methods
|
|
51
|
+
|
|
52
|
+
def insert(self, triples: Graph):
|
|
53
|
+
with self.__lock:
|
|
54
|
+
triples_by_subject: Dict[Any, List[Tuple[Any, Any]]] = (
|
|
55
|
+
self.triples_by_subject(triples)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
for subject in triples_by_subject:
|
|
59
|
+
subject_hash = self.iri_hash(subject)
|
|
60
|
+
|
|
61
|
+
if not os.path.exists(self.hash_triples_path(subject_hash)):
|
|
62
|
+
graph = Graph()
|
|
63
|
+
|
|
64
|
+
for prefix, namespace in triples.namespaces():
|
|
65
|
+
graph.bind(prefix, namespace)
|
|
66
|
+
else:
|
|
67
|
+
graph = Graph().parse(
|
|
68
|
+
self.hash_triples_path(subject_hash), format="turtle"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
for p, o in triples_by_subject[subject]:
|
|
72
|
+
graph.add((subject, p, o))
|
|
73
|
+
|
|
74
|
+
graph.serialize(
|
|
75
|
+
destination=self.hash_triples_path(subject_hash), format="turtle"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
for prefix, namespace in triples.namespaces():
|
|
79
|
+
self.__live_graph.bind(prefix, namespace)
|
|
80
|
+
|
|
81
|
+
# Update the live graph
|
|
82
|
+
self.__live_graph += triples
|
|
83
|
+
|
|
84
|
+
def remove(self, triples: Graph):
|
|
85
|
+
with self.__lock:
|
|
86
|
+
triples_by_subject: Dict[Any, List[Tuple[Any, Any]]] = (
|
|
87
|
+
self.triples_by_subject(triples)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
for subject in triples_by_subject:
|
|
91
|
+
subject_hash = self.iri_hash(subject)
|
|
92
|
+
|
|
93
|
+
if os.path.exists(self.hash_triples_path(subject_hash)):
|
|
94
|
+
graph = Graph().parse(
|
|
95
|
+
self.hash_triples_path(subject_hash), format="turtle"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
for p, o in triples_by_subject[subject]:
|
|
99
|
+
graph.remove((subject, p, o))
|
|
100
|
+
|
|
101
|
+
graph.serialize(
|
|
102
|
+
destination=self.hash_triples_path(subject_hash),
|
|
103
|
+
format="turtle",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Update the live graph
|
|
107
|
+
self.__live_graph -= triples
|
|
108
|
+
|
|
109
|
+
## Ontology Methods
|
|
110
|
+
|
|
111
|
+
def get(self) -> Graph:
|
|
112
|
+
return self.__live_graph
|
|
113
|
+
|
|
114
|
+
def get_subject_graph(self, subject: URIRef) -> Graph:
|
|
115
|
+
subject_hash = self.iri_hash(subject)
|
|
116
|
+
|
|
117
|
+
if os.path.exists(self.hash_triples_path(subject_hash)):
|
|
118
|
+
return Graph().parse(self.hash_triples_path(subject_hash), format="turtle")
|
|
119
|
+
|
|
120
|
+
raise Exceptions.SubjectNotFoundError(f"Subject {subject} not found")
|
|
121
|
+
|
|
122
|
+
def load(self) -> Graph:
|
|
123
|
+
with self.__lock:
|
|
124
|
+
triples = Graph()
|
|
125
|
+
|
|
126
|
+
for file in os.listdir(os.path.join(self.__store_path, "triples")):
|
|
127
|
+
try:
|
|
128
|
+
g = Graph().parse(self.hash_triples_path(file), format="turtle")
|
|
129
|
+
except Exception as e:
|
|
130
|
+
from naas_abi_core import logger
|
|
131
|
+
|
|
132
|
+
logger.error(
|
|
133
|
+
f"Error loading triples from {self.hash_triples_path(file)}: {e}"
|
|
134
|
+
)
|
|
135
|
+
raise e
|
|
136
|
+
|
|
137
|
+
for prefix, namespace in g.namespaces():
|
|
138
|
+
triples.bind(prefix, namespace)
|
|
139
|
+
|
|
140
|
+
triples += g
|
|
141
|
+
|
|
142
|
+
return triples
|
|
143
|
+
|
|
144
|
+
def query(self, query: str) -> query.Result:
|
|
145
|
+
with self.__lock:
|
|
146
|
+
aggregate_graph = self.get()
|
|
147
|
+
|
|
148
|
+
return aggregate_graph.query(query)
|
|
149
|
+
|
|
150
|
+
def query_view(self, view: str, query: str) -> rdflib.query.Result:
|
|
151
|
+
with self.__lock:
|
|
152
|
+
if os.path.exists(os.path.join(self.__store_path, "views", view)):
|
|
153
|
+
aggregate_graph = Graph()
|
|
154
|
+
|
|
155
|
+
for file in os.listdir(os.path.join(self.__store_path, "views", view)):
|
|
156
|
+
g = Graph().parse(
|
|
157
|
+
os.path.join(self.__store_path, "views", view, file),
|
|
158
|
+
format="turtle",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
for prefix, namespace in g.namespaces():
|
|
162
|
+
aggregate_graph.bind(prefix, namespace)
|
|
163
|
+
|
|
164
|
+
aggregate_graph += g
|
|
165
|
+
|
|
166
|
+
return aggregate_graph.query(query)
|
|
167
|
+
else:
|
|
168
|
+
raise Exceptions.ViewNotFoundError(f"View {view} not found")
|
|
169
|
+
|
|
170
|
+
def handle_view_event(
|
|
171
|
+
self,
|
|
172
|
+
view: Tuple[URIRef | None, URIRef | None, URIRef | None],
|
|
173
|
+
event: OntologyEvent,
|
|
174
|
+
triple: Tuple[URIRef | None, URIRef | None, URIRef | None],
|
|
175
|
+
):
|
|
176
|
+
s, _, o = triple
|
|
177
|
+
|
|
178
|
+
assert isinstance(s, BNode) or isinstance(s, URIRef), type(s)
|
|
179
|
+
assert isinstance(o, BNode) or isinstance(o, URIRef), type(o)
|
|
180
|
+
|
|
181
|
+
partition_hash = self.iri_hash(o)
|
|
182
|
+
|
|
183
|
+
if os.path.exists(self.hash_triples_path(partition_hash)):
|
|
184
|
+
graph = Graph().parse(
|
|
185
|
+
self.hash_triples_path(partition_hash), format="turtle"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
label = graph.value(subject=o, predicate=RDFS.label)
|
|
189
|
+
object_id = str(o).split("/")[-1].split("#")[-1]
|
|
190
|
+
|
|
191
|
+
dir_name = f"{label}_{object_id}"
|
|
192
|
+
|
|
193
|
+
os.makedirs(
|
|
194
|
+
os.path.join(self.__store_path, "views", dir_name), exist_ok=True
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if event == OntologyEvent.INSERT:
|
|
198
|
+
try:
|
|
199
|
+
# Create symbolic link
|
|
200
|
+
os.symlink(
|
|
201
|
+
os.path.join("..", "..", "triples", f"{self.iri_hash(s)}.ttl"),
|
|
202
|
+
os.path.join(
|
|
203
|
+
self.__store_path,
|
|
204
|
+
"views",
|
|
205
|
+
dir_name,
|
|
206
|
+
f"{self.iri_hash(s)}.ttl",
|
|
207
|
+
),
|
|
208
|
+
)
|
|
209
|
+
except FileExistsError:
|
|
210
|
+
pass
|
|
211
|
+
elif event == OntologyEvent.DELETE:
|
|
212
|
+
# Remove symbolic link
|
|
213
|
+
try:
|
|
214
|
+
os.remove(
|
|
215
|
+
os.path.join(
|
|
216
|
+
self.__store_path,
|
|
217
|
+
"views",
|
|
218
|
+
dir_name,
|
|
219
|
+
f"{self.iri_hash(s)}.ttl",
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
except FileNotFoundError:
|
|
223
|
+
pass
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import queue
|
|
2
|
+
import time
|
|
3
|
+
from threading import Lock, Thread
|
|
4
|
+
from typing import Any, Dict, List, Tuple
|
|
5
|
+
|
|
6
|
+
import rdflib
|
|
7
|
+
from naas_abi_core import logger
|
|
8
|
+
from naas_abi_core.services.object_storage.ObjectStoragePort import (
|
|
9
|
+
Exceptions as ObjectStorageExceptions,
|
|
10
|
+
)
|
|
11
|
+
from naas_abi_core.services.object_storage.ObjectStorageService import (
|
|
12
|
+
ObjectStorageService,
|
|
13
|
+
)
|
|
14
|
+
from naas_abi_core.services.triple_store.adaptors.secondary.base.TripleStoreService__SecondaryAdaptor__FileBase import (
|
|
15
|
+
TripleStoreService__SecondaryAdaptor__FileBase,
|
|
16
|
+
)
|
|
17
|
+
from naas_abi_core.services.triple_store.TripleStorePorts import (
|
|
18
|
+
Exceptions,
|
|
19
|
+
ITripleStorePort,
|
|
20
|
+
OntologyEvent,
|
|
21
|
+
)
|
|
22
|
+
from naas_abi_core.utils.Workers import Job, WorkerPool
|
|
23
|
+
from rdflib import Graph, Node, URIRef, query
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TripleStoreService__SecondaryAdaptor__ObjectStorage(
|
|
27
|
+
ITripleStorePort, TripleStoreService__SecondaryAdaptor__FileBase
|
|
28
|
+
):
|
|
29
|
+
__object_storage_service: ObjectStorageService
|
|
30
|
+
|
|
31
|
+
__triples_prefix: str
|
|
32
|
+
|
|
33
|
+
__live_graph: Graph
|
|
34
|
+
|
|
35
|
+
__lock: Lock
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
object_storage_service: ObjectStorageService,
|
|
40
|
+
triples_prefix: str = "triples",
|
|
41
|
+
):
|
|
42
|
+
logger.debug("Initializing TripleStoreService__SecondaryAdaptor__ObjectStorage")
|
|
43
|
+
self.__object_storage_service = object_storage_service
|
|
44
|
+
self.__triples_prefix = triples_prefix
|
|
45
|
+
|
|
46
|
+
self.__lock = Lock()
|
|
47
|
+
|
|
48
|
+
self.__insert_pool = WorkerPool(num_workers=50)
|
|
49
|
+
|
|
50
|
+
self.__live_graph = self.load()
|
|
51
|
+
|
|
52
|
+
def load_triples(self, subject_hash: str) -> Graph:
|
|
53
|
+
obj: bytes = self.__object_storage_service.get_object(
|
|
54
|
+
prefix=self.__triples_prefix, key=f"{subject_hash}.ttl"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
content: str = obj.decode("utf-8")
|
|
58
|
+
|
|
59
|
+
return Graph().parse(data=str(content), format="turtle")
|
|
60
|
+
|
|
61
|
+
def store(self, name: str, triples: Graph):
|
|
62
|
+
serialized_triples = triples.serialize(format="turtle").encode("utf-8")
|
|
63
|
+
self.__object_storage_service.put_object(
|
|
64
|
+
prefix=self.__triples_prefix, key=f"{name}.ttl", content=serialized_triples
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def insert(self, triples: Graph):
|
|
68
|
+
with self.__lock:
|
|
69
|
+
triples_by_subject: Dict[Node, List[Tuple[Node, Node]]] = (
|
|
70
|
+
self.triples_by_subject(triples)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def __insert(
|
|
74
|
+
subject: URIRef, triples_by_subject: Dict[Node, List[Tuple[Node, Node]]]
|
|
75
|
+
):
|
|
76
|
+
subject_hash = self.iri_hash(subject)
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
graph = self.load_triples(subject_hash)
|
|
80
|
+
except ObjectStorageExceptions.ObjectNotFound:
|
|
81
|
+
graph = Graph()
|
|
82
|
+
for prefix, namespace in triples.namespaces():
|
|
83
|
+
graph.bind(prefix, namespace)
|
|
84
|
+
|
|
85
|
+
for p, o in triples_by_subject[subject]:
|
|
86
|
+
graph.add((subject, p, o))
|
|
87
|
+
|
|
88
|
+
self.store(subject_hash, graph)
|
|
89
|
+
|
|
90
|
+
jobs: List[Job] = [
|
|
91
|
+
Job(
|
|
92
|
+
queue=None,
|
|
93
|
+
func=__insert,
|
|
94
|
+
subject=subject,
|
|
95
|
+
triples_by_subject=triples_by_subject,
|
|
96
|
+
)
|
|
97
|
+
for subject in triples_by_subject
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
result_queue: queue.Queue[Job] = self.__insert_pool.submit_all(jobs)
|
|
101
|
+
|
|
102
|
+
while result_queue.qsize() < result_queue.maxsize:
|
|
103
|
+
logger.debug(f"Inserting {result_queue.qsize()}/{result_queue.maxsize}")
|
|
104
|
+
time.sleep(0.1)
|
|
105
|
+
|
|
106
|
+
for prefix, namespace in triples.namespaces():
|
|
107
|
+
self.__live_graph.bind(prefix, namespace)
|
|
108
|
+
|
|
109
|
+
self.__live_graph += triples
|
|
110
|
+
|
|
111
|
+
def remove(self, triples: Graph):
|
|
112
|
+
with self.__lock:
|
|
113
|
+
triples_by_subject: Dict[Any, List[Tuple[Any, Any]]] = (
|
|
114
|
+
self.triples_by_subject(triples)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
for subject in triples_by_subject:
|
|
118
|
+
subject_hash = self.iri_hash(subject)
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
graph = self.load_triples(subject_hash)
|
|
122
|
+
for p, o in triples_by_subject[subject]:
|
|
123
|
+
graph.add((subject, p, o))
|
|
124
|
+
|
|
125
|
+
self.store(subject_hash, graph)
|
|
126
|
+
except ObjectStorageExceptions.ObjectNotFound:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
for prefix, namespace in triples.namespaces():
|
|
130
|
+
self.__live_graph.bind(prefix, namespace)
|
|
131
|
+
|
|
132
|
+
self.__live_graph -= triples
|
|
133
|
+
|
|
134
|
+
def get_subject_graph(self, subject: str | URIRef) -> Graph:
|
|
135
|
+
subject_hash = (
|
|
136
|
+
self.iri_hash(URIRef(subject))
|
|
137
|
+
if isinstance(subject, str)
|
|
138
|
+
else self.iri_hash(subject)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
graph = self.load_triples(subject_hash)
|
|
143
|
+
return graph
|
|
144
|
+
except ObjectStorageExceptions.ObjectNotFound:
|
|
145
|
+
raise Exceptions.SubjectNotFoundError(f"Subject {subject} not found")
|
|
146
|
+
|
|
147
|
+
def load(self) -> Graph:
|
|
148
|
+
with self.__lock:
|
|
149
|
+
logger.debug("Loading triples from object storage")
|
|
150
|
+
triples = Graph()
|
|
151
|
+
|
|
152
|
+
# Queue to stream files as they are discovered
|
|
153
|
+
files_queue: queue.Queue[str] = queue.Queue()
|
|
154
|
+
worker_pool = WorkerPool(num_workers=50)
|
|
155
|
+
result_queue: queue.Queue[Job] = queue.Queue()
|
|
156
|
+
job_nbr = 0
|
|
157
|
+
|
|
158
|
+
def list_objects_worker(files_queue: queue.Queue):
|
|
159
|
+
try:
|
|
160
|
+
self.__object_storage_service.list_objects(
|
|
161
|
+
prefix=self.__triples_prefix, queue=files_queue
|
|
162
|
+
)
|
|
163
|
+
except ObjectStorageExceptions.ObjectNotFound:
|
|
164
|
+
pass
|
|
165
|
+
|
|
166
|
+
list_object_thread = Thread(
|
|
167
|
+
target=list_objects_worker, args=(files_queue,), daemon=True
|
|
168
|
+
)
|
|
169
|
+
list_object_thread.start()
|
|
170
|
+
|
|
171
|
+
# Process files as they are discovered
|
|
172
|
+
while not files_queue.empty() or list_object_thread.is_alive():
|
|
173
|
+
try:
|
|
174
|
+
file: str = files_queue.get(timeout=1.0)
|
|
175
|
+
|
|
176
|
+
file_hash = file.split("/")[-1].split(".")[0]
|
|
177
|
+
worker_pool.submit(
|
|
178
|
+
Job(
|
|
179
|
+
queue=result_queue,
|
|
180
|
+
func=self.load_triples,
|
|
181
|
+
subject_hash=file_hash,
|
|
182
|
+
)
|
|
183
|
+
)
|
|
184
|
+
job_nbr += 1
|
|
185
|
+
except queue.Empty:
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# Consume completed jobs.
|
|
189
|
+
completed_jobs = 0
|
|
190
|
+
while completed_jobs < job_nbr:
|
|
191
|
+
logger.debug(f"{completed_jobs}/{job_nbr}")
|
|
192
|
+
try:
|
|
193
|
+
job = result_queue.get(timeout=1.0)
|
|
194
|
+
if job.is_completed():
|
|
195
|
+
result_graph = job.get_result()
|
|
196
|
+
triples += result_graph
|
|
197
|
+
|
|
198
|
+
for prefix, namespace in result_graph.namespaces():
|
|
199
|
+
triples.bind(prefix, namespace)
|
|
200
|
+
|
|
201
|
+
completed_jobs += 1
|
|
202
|
+
except queue.Empty:
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
logger.debug(f"Loaded {len(triples)} triples")
|
|
206
|
+
|
|
207
|
+
logger.debug("Joining list objects thread")
|
|
208
|
+
list_object_thread.join()
|
|
209
|
+
|
|
210
|
+
logger.debug("Shutting down worker pool")
|
|
211
|
+
worker_pool.shutdown()
|
|
212
|
+
|
|
213
|
+
logger.debug("Done")
|
|
214
|
+
|
|
215
|
+
return triples
|
|
216
|
+
|
|
217
|
+
def get(self) -> Graph:
|
|
218
|
+
return self.__live_graph
|
|
219
|
+
|
|
220
|
+
def query(self, query: str) -> query.Result:
|
|
221
|
+
with self.__lock:
|
|
222
|
+
return self.get().query(query)
|
|
223
|
+
|
|
224
|
+
def query_view(self, view: str, query: str) -> rdflib.query.Result:
|
|
225
|
+
with self.__lock:
|
|
226
|
+
return self.get().query(query)
|
|
227
|
+
|
|
228
|
+
def handle_view_event(
|
|
229
|
+
self,
|
|
230
|
+
view: Tuple[URIRef | None, URIRef | None, URIRef | None],
|
|
231
|
+
event: OntologyEvent,
|
|
232
|
+
triple: Tuple[URIRef | None, URIRef | None, URIRef | None],
|
|
233
|
+
):
|
|
234
|
+
pass
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from rdflib import Graph, Node, URIRef
|
|
3
|
+
from typing import Dict, List, Tuple
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TripleStoreService__SecondaryAdaptor__FileBase:
|
|
7
|
+
def iri_hash(self, iri: URIRef) -> str:
|
|
8
|
+
return hashlib.sha256(iri.encode("utf-8")).hexdigest()
|
|
9
|
+
|
|
10
|
+
def triples_by_subject(self, triples: Graph) -> Dict[Node, List[Tuple[Node, Node]]]:
|
|
11
|
+
triples_by_subject: Dict[Node, List[Tuple[Node, Node]]] = {}
|
|
12
|
+
|
|
13
|
+
for s, p, o in triples.triples((None, None, None)):
|
|
14
|
+
if s not in triples_by_subject:
|
|
15
|
+
triples_by_subject[s] = []
|
|
16
|
+
triples_by_subject[s].append((p, o))
|
|
17
|
+
|
|
18
|
+
return triples_by_subject
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List, Dict, Any, Optional
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class VectorDocument:
|
|
9
|
+
id: str
|
|
10
|
+
vector: np.ndarray
|
|
11
|
+
metadata: Dict[str, Any]
|
|
12
|
+
payload: Optional[Dict[str, Any]] = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class SearchResult:
|
|
17
|
+
id: str
|
|
18
|
+
score: float
|
|
19
|
+
vector: Optional[np.ndarray] = None
|
|
20
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
21
|
+
payload: Optional[Dict[str, Any]] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class IVectorStorePort(ABC):
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def initialize(self) -> None:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def create_collection(
|
|
31
|
+
self,
|
|
32
|
+
collection_name: str,
|
|
33
|
+
dimension: int,
|
|
34
|
+
distance_metric: str = "cosine",
|
|
35
|
+
**kwargs
|
|
36
|
+
) -> None:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def delete_collection(self, collection_name: str) -> None:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def list_collections(self) -> List[str]:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def store_vectors(
|
|
49
|
+
self,
|
|
50
|
+
collection_name: str,
|
|
51
|
+
documents: List[VectorDocument]
|
|
52
|
+
) -> None:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def search(
|
|
57
|
+
self,
|
|
58
|
+
collection_name: str,
|
|
59
|
+
query_vector: np.ndarray,
|
|
60
|
+
k: int = 10,
|
|
61
|
+
filter: Optional[Dict[str, Any]] = None,
|
|
62
|
+
include_vectors: bool = False,
|
|
63
|
+
include_metadata: bool = True
|
|
64
|
+
) -> List[SearchResult]:
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def get_vector(
|
|
69
|
+
self,
|
|
70
|
+
collection_name: str,
|
|
71
|
+
vector_id: str,
|
|
72
|
+
include_vector: bool = True
|
|
73
|
+
) -> Optional[VectorDocument]:
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def update_vector(
|
|
78
|
+
self,
|
|
79
|
+
collection_name: str,
|
|
80
|
+
vector_id: str,
|
|
81
|
+
vector: Optional[np.ndarray] = None,
|
|
82
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
83
|
+
payload: Optional[Dict[str, Any]] = None
|
|
84
|
+
) -> None:
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def delete_vectors(
|
|
89
|
+
self,
|
|
90
|
+
collection_name: str,
|
|
91
|
+
vector_ids: List[str]
|
|
92
|
+
) -> None:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def count_vectors(self, collection_name: str) -> int:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def close(self) -> None:
|
|
101
|
+
pass
|