naas-abi-core 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- assets/favicon.ico +0 -0
- assets/logo.png +0 -0
- naas_abi_core/__init__.py +1 -0
- naas_abi_core/apps/api/api.py +245 -0
- naas_abi_core/apps/api/api_test.py +281 -0
- naas_abi_core/apps/api/openapi_doc.py +144 -0
- naas_abi_core/apps/mcp/Dockerfile.mcp +35 -0
- naas_abi_core/apps/mcp/mcp_server.py +243 -0
- naas_abi_core/apps/mcp/mcp_server_test.py +163 -0
- naas_abi_core/apps/terminal_agent/main.py +555 -0
- naas_abi_core/apps/terminal_agent/terminal_style.py +175 -0
- naas_abi_core/engine/Engine.py +87 -0
- naas_abi_core/engine/EngineProxy.py +109 -0
- naas_abi_core/engine/Engine_test.py +6 -0
- naas_abi_core/engine/IEngine.py +91 -0
- naas_abi_core/engine/conftest.py +45 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration.py +216 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_Deploy.py +7 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_GenericLoader.py +49 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_ObjectStorageService.py +159 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_ObjectStorageService_test.py +26 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_SecretService.py +138 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_SecretService_test.py +74 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_TripleStoreService.py +224 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_TripleStoreService_test.py +109 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_VectorStoreService.py +76 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_VectorStoreService_test.py +33 -0
- naas_abi_core/engine/engine_configuration/EngineConfiguration_test.py +9 -0
- naas_abi_core/engine/engine_configuration/utils/PydanticModelValidator.py +15 -0
- naas_abi_core/engine/engine_loaders/EngineModuleLoader.py +302 -0
- naas_abi_core/engine/engine_loaders/EngineOntologyLoader.py +16 -0
- naas_abi_core/engine/engine_loaders/EngineServiceLoader.py +47 -0
- naas_abi_core/integration/__init__.py +7 -0
- naas_abi_core/integration/integration.py +28 -0
- naas_abi_core/models/Model.py +198 -0
- naas_abi_core/models/OpenRouter.py +18 -0
- naas_abi_core/models/OpenRouter_test.py +36 -0
- naas_abi_core/module/Module.py +252 -0
- naas_abi_core/module/ModuleAgentLoader.py +50 -0
- naas_abi_core/module/ModuleUtils.py +20 -0
- naas_abi_core/modules/templatablesparqlquery/README.md +196 -0
- naas_abi_core/modules/templatablesparqlquery/__init__.py +39 -0
- naas_abi_core/modules/templatablesparqlquery/ontologies/TemplatableSparqlQueryOntology.ttl +116 -0
- naas_abi_core/modules/templatablesparqlquery/workflows/GenericWorkflow.py +48 -0
- naas_abi_core/modules/templatablesparqlquery/workflows/TemplatableSparqlQueryLoader.py +192 -0
- naas_abi_core/pipeline/__init__.py +6 -0
- naas_abi_core/pipeline/pipeline.py +70 -0
- naas_abi_core/services/__init__.py +0 -0
- naas_abi_core/services/agent/Agent.py +1619 -0
- naas_abi_core/services/agent/AgentMemory_test.py +28 -0
- naas_abi_core/services/agent/Agent_test.py +214 -0
- naas_abi_core/services/agent/IntentAgent.py +1179 -0
- naas_abi_core/services/agent/IntentAgent_test.py +139 -0
- naas_abi_core/services/agent/beta/Embeddings.py +181 -0
- naas_abi_core/services/agent/beta/IntentMapper.py +120 -0
- naas_abi_core/services/agent/beta/LocalModel.py +88 -0
- naas_abi_core/services/agent/beta/VectorStore.py +89 -0
- naas_abi_core/services/agent/test_agent_memory.py +278 -0
- naas_abi_core/services/agent/test_postgres_integration.py +145 -0
- naas_abi_core/services/cache/CacheFactory.py +31 -0
- naas_abi_core/services/cache/CachePort.py +63 -0
- naas_abi_core/services/cache/CacheService.py +246 -0
- naas_abi_core/services/cache/CacheService_test.py +85 -0
- naas_abi_core/services/cache/adapters/secondary/CacheFSAdapter.py +39 -0
- naas_abi_core/services/object_storage/ObjectStorageFactory.py +57 -0
- naas_abi_core/services/object_storage/ObjectStoragePort.py +47 -0
- naas_abi_core/services/object_storage/ObjectStorageService.py +41 -0
- naas_abi_core/services/object_storage/adapters/secondary/ObjectStorageSecondaryAdapterFS.py +52 -0
- naas_abi_core/services/object_storage/adapters/secondary/ObjectStorageSecondaryAdapterNaas.py +131 -0
- naas_abi_core/services/object_storage/adapters/secondary/ObjectStorageSecondaryAdapterS3.py +171 -0
- naas_abi_core/services/ontology/OntologyPorts.py +36 -0
- naas_abi_core/services/ontology/OntologyService.py +17 -0
- naas_abi_core/services/ontology/adaptors/secondary/OntologyService_SecondaryAdaptor_NERPort.py +37 -0
- naas_abi_core/services/secret/Secret.py +138 -0
- naas_abi_core/services/secret/SecretPorts.py +45 -0
- naas_abi_core/services/secret/Secret_test.py +65 -0
- naas_abi_core/services/secret/adaptors/secondary/Base64Secret.py +57 -0
- naas_abi_core/services/secret/adaptors/secondary/Base64Secret_test.py +39 -0
- naas_abi_core/services/secret/adaptors/secondary/NaasSecret.py +88 -0
- naas_abi_core/services/secret/adaptors/secondary/NaasSecret_test.py +25 -0
- naas_abi_core/services/secret/adaptors/secondary/dotenv_secret_secondaryadaptor.py +29 -0
- naas_abi_core/services/triple_store/TripleStoreFactory.py +116 -0
- naas_abi_core/services/triple_store/TripleStorePorts.py +223 -0
- naas_abi_core/services/triple_store/TripleStoreService.py +419 -0
- naas_abi_core/services/triple_store/adaptors/secondary/AWSNeptune.py +1300 -0
- naas_abi_core/services/triple_store/adaptors/secondary/AWSNeptune_test.py +284 -0
- naas_abi_core/services/triple_store/adaptors/secondary/Oxigraph.py +597 -0
- naas_abi_core/services/triple_store/adaptors/secondary/Oxigraph_test.py +1474 -0
- naas_abi_core/services/triple_store/adaptors/secondary/TripleStoreService__SecondaryAdaptor__Filesystem.py +223 -0
- naas_abi_core/services/triple_store/adaptors/secondary/TripleStoreService__SecondaryAdaptor__ObjectStorage.py +234 -0
- naas_abi_core/services/triple_store/adaptors/secondary/base/TripleStoreService__SecondaryAdaptor__FileBase.py +18 -0
- naas_abi_core/services/vector_store/IVectorStorePort.py +101 -0
- naas_abi_core/services/vector_store/IVectorStorePort_test.py +189 -0
- naas_abi_core/services/vector_store/VectorStoreFactory.py +47 -0
- naas_abi_core/services/vector_store/VectorStoreService.py +171 -0
- naas_abi_core/services/vector_store/VectorStoreService_test.py +185 -0
- naas_abi_core/services/vector_store/__init__.py +13 -0
- naas_abi_core/services/vector_store/adapters/QdrantAdapter.py +251 -0
- naas_abi_core/services/vector_store/adapters/QdrantAdapter_test.py +57 -0
- naas_abi_core/tests/test_services_imports.py +69 -0
- naas_abi_core/utils/Expose.py +55 -0
- naas_abi_core/utils/Graph.py +182 -0
- naas_abi_core/utils/JSON.py +49 -0
- naas_abi_core/utils/LazyLoader.py +44 -0
- naas_abi_core/utils/Logger.py +12 -0
- naas_abi_core/utils/OntologyReasoner.py +141 -0
- naas_abi_core/utils/OntologyYaml.py +681 -0
- naas_abi_core/utils/SPARQL.py +256 -0
- naas_abi_core/utils/Storage.py +33 -0
- naas_abi_core/utils/StorageUtils.py +398 -0
- naas_abi_core/utils/String.py +52 -0
- naas_abi_core/utils/Workers.py +114 -0
- naas_abi_core/utils/__init__.py +0 -0
- naas_abi_core/utils/onto2py/README.md +0 -0
- naas_abi_core/utils/onto2py/__init__.py +10 -0
- naas_abi_core/utils/onto2py/__main__.py +29 -0
- naas_abi_core/utils/onto2py/onto2py.py +611 -0
- naas_abi_core/utils/onto2py/tests/ttl2py_test.py +271 -0
- naas_abi_core/workflow/__init__.py +5 -0
- naas_abi_core/workflow/workflow.py +48 -0
- naas_abi_core-1.4.1.dist-info/METADATA +630 -0
- naas_abi_core-1.4.1.dist-info/RECORD +124 -0
- naas_abi_core-1.4.1.dist-info/WHEEL +4 -0
- naas_abi_core-1.4.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
import rdflib
|
|
4
|
+
from rdflib import DCTERMS, OWL, RDF, RDFS, XSD, Graph, URIRef, query
|
|
5
|
+
|
|
6
|
+
from naas_abi_core import logger
|
|
7
|
+
from naas_abi_core.services.triple_store.TripleStorePorts import ITripleStoreService
|
|
8
|
+
from naas_abi_core.utils.Graph import ABI, BFO, CCO, TEST
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SPARQLUtils:
|
|
12
|
+
__triple_store_service: ITripleStoreService
|
|
13
|
+
|
|
14
|
+
def __init__(self, triple_store_service: ITripleStoreService):
|
|
15
|
+
self.__triple_store_service = triple_store_service
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def triple_store_service(self) -> ITripleStoreService:
|
|
19
|
+
return self.__triple_store_service
|
|
20
|
+
|
|
21
|
+
def results_to_list(self, results: rdflib.query.Result) -> Optional[List[Dict]]:
|
|
22
|
+
"""
|
|
23
|
+
Transform SPARQL query results to a list of dictionaries using the labels as keys.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
results (query.Result): The SPARQL query results to transform
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Optional[List[Dict]]: List of dictionaries with query results, or None if no results
|
|
30
|
+
"""
|
|
31
|
+
data = []
|
|
32
|
+
for row in results:
|
|
33
|
+
assert isinstance(row, query.ResultRow)
|
|
34
|
+
logger.debug(f"==> Row: {row}")
|
|
35
|
+
data_dict = {}
|
|
36
|
+
for key in row.labels:
|
|
37
|
+
data_dict[key] = str(row[key]) if row[key] else None
|
|
38
|
+
data.append(data_dict)
|
|
39
|
+
return data if len(data) > 0 else None
|
|
40
|
+
|
|
41
|
+
def get_class_uri_from_individual_uri(
|
|
42
|
+
self,
|
|
43
|
+
uri: str | URIRef,
|
|
44
|
+
) -> Optional[str]:
|
|
45
|
+
"""
|
|
46
|
+
Get the class URI for a given individual URI from the triple store.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
uri (str | URIRef): The individual URI to look up
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Optional[str]: The class URI if found, None otherwise
|
|
53
|
+
"""
|
|
54
|
+
sparql_query = f"""
|
|
55
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
56
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
57
|
+
SELECT ?type
|
|
58
|
+
WHERE {{
|
|
59
|
+
<{uri}> rdf:type ?type .
|
|
60
|
+
FILTER(?type != owl:NamedIndividual)
|
|
61
|
+
}}
|
|
62
|
+
LIMIT 1
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
results = self.triple_store_service.query(sparql_query)
|
|
67
|
+
for row in results:
|
|
68
|
+
assert isinstance(row, query.ResultRow)
|
|
69
|
+
return URIRef(str(row.type))
|
|
70
|
+
return None
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error(f"Error getting class URI for {uri}: {e}")
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
def get_rdfs_label_from_individual_uri(
|
|
76
|
+
self,
|
|
77
|
+
uri: str | URIRef,
|
|
78
|
+
) -> Optional[str]:
|
|
79
|
+
"""
|
|
80
|
+
Get the RDFS label for a given individual URI from the triple store.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
uri (str | URIRef): The individual URI to look up
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Optional[str]: The RDFS label if found, None otherwise
|
|
87
|
+
"""
|
|
88
|
+
sparql_query = f"""
|
|
89
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
90
|
+
SELECT ?label
|
|
91
|
+
WHERE {{
|
|
92
|
+
<{uri}> rdfs:label ?label .
|
|
93
|
+
}}
|
|
94
|
+
LIMIT 1
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
results = self.triple_store_service.query(sparql_query)
|
|
99
|
+
for row in results:
|
|
100
|
+
assert isinstance(row, query.ResultRow)
|
|
101
|
+
return str(row.label)
|
|
102
|
+
return None
|
|
103
|
+
except Exception as e:
|
|
104
|
+
logger.error(f"Error getting label for {uri}: {e}")
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def get_identifier(
|
|
108
|
+
self,
|
|
109
|
+
identifier: str,
|
|
110
|
+
type: URIRef = URIRef("http://ontology.naas.ai/abi/unique_id"),
|
|
111
|
+
graph: Graph = Graph(),
|
|
112
|
+
) -> Optional[URIRef]:
|
|
113
|
+
"""
|
|
114
|
+
Get the URI for a given identifier from the triple store or provided graph.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
identifier (str): The identifier string to look up
|
|
118
|
+
type (URIRef, optional): The predicate type to use for the lookup.
|
|
119
|
+
Defaults to "http://ontology.naas.ai/abi/unique_id"
|
|
120
|
+
graph (Graph, optional): Optional RDFlib Graph to query instead of triple store.
|
|
121
|
+
Defaults to empty Graph.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Optional[URIRef]: The URI if found, None otherwise
|
|
125
|
+
"""
|
|
126
|
+
sparql_query = f"""
|
|
127
|
+
SELECT ?s
|
|
128
|
+
WHERE {{
|
|
129
|
+
?s <{str(type)}> "{identifier}" .
|
|
130
|
+
}}
|
|
131
|
+
LIMIT 1
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
if len(graph) > 0:
|
|
135
|
+
results = graph.query(sparql_query)
|
|
136
|
+
else:
|
|
137
|
+
results = self.query(sparql_query) # type: ignore
|
|
138
|
+
|
|
139
|
+
for row in results:
|
|
140
|
+
assert isinstance(row, query.ResultRow)
|
|
141
|
+
# Use existing URI if found
|
|
142
|
+
return URIRef(str(row.s))
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.error(f"Error getting identifier for {identifier}: {e}")
|
|
145
|
+
return None
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
def get_identifiers(
|
|
149
|
+
self,
|
|
150
|
+
property_uri: URIRef = URIRef("http://ontology.naas.ai/abi/unique_id"),
|
|
151
|
+
class_uri: Optional[URIRef] = None,
|
|
152
|
+
) -> dict[str, URIRef]:
|
|
153
|
+
"""
|
|
154
|
+
Get a mapping of all identifiers to their URIs from the triple store.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
property_uri (URIRef, optional): The predicate URI to use for the lookup.
|
|
158
|
+
Defaults to "http://ontology.naas.ai/abi/unique_id"
|
|
159
|
+
class_uri (URIRef, optional): Optional class URI to filter results.
|
|
160
|
+
Only return identifiers for instances of this class.
|
|
161
|
+
Defaults to None.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
dict[str, URIRef]: Dictionary mapping identifiers to their URIs
|
|
165
|
+
"""
|
|
166
|
+
sparql_query = f"""
|
|
167
|
+
SELECT ?s ?id
|
|
168
|
+
WHERE {{
|
|
169
|
+
?s <{str(property_uri)}> ?id .
|
|
170
|
+
{f"?s a <{str(class_uri)}> ." if class_uri else ""}
|
|
171
|
+
}}
|
|
172
|
+
"""
|
|
173
|
+
try:
|
|
174
|
+
results = self.query(sparql_query) # type: ignore
|
|
175
|
+
|
|
176
|
+
id_map = {}
|
|
177
|
+
for row in results:
|
|
178
|
+
assert isinstance(row, query.ResultRow)
|
|
179
|
+
id_map[str(row.id)] = URIRef(str(row.s))
|
|
180
|
+
return id_map
|
|
181
|
+
except Exception as e:
|
|
182
|
+
logger.error(f"Error getting identifiers map: {e}")
|
|
183
|
+
return {}
|
|
184
|
+
|
|
185
|
+
def get_subject_graph(self, uri: str | URIRef, depth: int = 1) -> Graph:
|
|
186
|
+
"""
|
|
187
|
+
Get a graph for a given URI with a specified depth of relationships.
|
|
188
|
+
This recursively follows relationships to build a more detailed subgraph.
|
|
189
|
+
The resulting graph includes all triples where the given URI is the subject,
|
|
190
|
+
and optionally follows object URIs to include their relationships up to the specified depth.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
uri (str | URIRef): The URI to build the graph around
|
|
194
|
+
depth (int): How many levels deep to traverse relationships. A depth of 0 returns an empty graph,
|
|
195
|
+
1 returns direct relationships, 2 includes relationships of related objects, etc.
|
|
196
|
+
Defaults to 1.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Graph: RDFlib Graph containing all triples within the specified depth, with standard namespace
|
|
200
|
+
prefixes bound (rdfs, rdf, owl, xsd, dcterms, abi, bfo, cco, test)
|
|
201
|
+
"""
|
|
202
|
+
if depth <= 0:
|
|
203
|
+
return Graph()
|
|
204
|
+
|
|
205
|
+
# Build the CONSTRUCT query dynamically based on depth
|
|
206
|
+
construct_clauses = []
|
|
207
|
+
where_clauses = []
|
|
208
|
+
|
|
209
|
+
# Add patterns for each depth level
|
|
210
|
+
for i in range(depth):
|
|
211
|
+
if i == 0:
|
|
212
|
+
construct_clauses.append(f"<{str(uri)}> ?p{i} ?o{i} .")
|
|
213
|
+
where_clauses.append(f"<{str(uri)}> ?p{i} ?o{i} .")
|
|
214
|
+
else:
|
|
215
|
+
construct_clauses.append(f"?o{i - 1} ?p{i} ?o{i} .")
|
|
216
|
+
where_clauses.append(
|
|
217
|
+
f"OPTIONAL {{ ?o{i - 1} ?p{i} ?o{i} . FILTER(isURI(?o{i - 1})) }}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
sparql_query = f"""
|
|
221
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
222
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
223
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
224
|
+
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
|
225
|
+
PREFIX dcterms: <http://purl.org/dc/terms/>
|
|
226
|
+
PREFIX abi: <http://ontology.naas.ai/abi/>
|
|
227
|
+
PREFIX bfo: <http://purl.obolibrary.org/obo/>
|
|
228
|
+
PREFIX cco: <https://www.commoncoreontologies.org/>
|
|
229
|
+
CONSTRUCT {{
|
|
230
|
+
{" ".join(construct_clauses)}
|
|
231
|
+
}}
|
|
232
|
+
WHERE {{
|
|
233
|
+
{" ".join(where_clauses)}
|
|
234
|
+
}}
|
|
235
|
+
"""
|
|
236
|
+
try:
|
|
237
|
+
results = self.triple_store_service.query(sparql_query)
|
|
238
|
+
except Exception as e:
|
|
239
|
+
logger.error(f"Error getting subject graph for {uri}: {e}")
|
|
240
|
+
return Graph()
|
|
241
|
+
|
|
242
|
+
graph = Graph()
|
|
243
|
+
graph.bind("rdfs", RDFS)
|
|
244
|
+
graph.bind("rdf", RDF)
|
|
245
|
+
graph.bind("owl", OWL)
|
|
246
|
+
graph.bind("xsd", XSD)
|
|
247
|
+
graph.bind("dcterms", DCTERMS)
|
|
248
|
+
graph.bind("abi", ABI)
|
|
249
|
+
graph.bind("bfo", BFO)
|
|
250
|
+
graph.bind("cco", CCO)
|
|
251
|
+
graph.bind("test", TEST)
|
|
252
|
+
for triple in results:
|
|
253
|
+
# CONSTRUCT queries return triples directly, no need for ResultRow handling
|
|
254
|
+
graph.add(triple) # type: ignore
|
|
255
|
+
|
|
256
|
+
return graph
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
class NoStorageFolderFound(Exception):
|
|
4
|
+
pass
|
|
5
|
+
|
|
6
|
+
# Look for a "storage" folder until we reach /
|
|
7
|
+
def find_storage_folder(base_path: str, needle: str = "storage") -> str:
|
|
8
|
+
if os.path.exists(os.path.join(base_path, needle)):
|
|
9
|
+
return os.path.join(base_path, needle)
|
|
10
|
+
|
|
11
|
+
if base_path == "/":
|
|
12
|
+
raise NoStorageFolderFound("No storage folder found")
|
|
13
|
+
|
|
14
|
+
return find_storage_folder(os.path.dirname(base_path))
|
|
15
|
+
|
|
16
|
+
def ensure_data_directory(module_name: str, component: str) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Ensures the data directory exists for a module component following Code-Data Symmetry.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
module_name: Name of the module (e.g., "__demo__", "your_module")
|
|
22
|
+
component: Component type (e.g., "orchestration", "pipelines", "workflows")
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
str: The absolute path to the created directory
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
data_dir = ensure_data_directory("__demo__", "orchestration")
|
|
29
|
+
# Returns: /path/to/project/storage/datastore/core/modules/__demo__/orchestration
|
|
30
|
+
"""
|
|
31
|
+
data_dir = os.path.join("storage", "datastore", "core", "modules", module_name, component)
|
|
32
|
+
os.makedirs(data_dir, exist_ok=True)
|
|
33
|
+
return os.path.abspath(data_dir)
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
from typing import Dict, Tuple
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import yaml
|
|
8
|
+
from rdflib import Graph
|
|
9
|
+
|
|
10
|
+
from naas_abi_core import logger
|
|
11
|
+
from naas_abi_core.services.object_storage.ObjectStoragePort import IObjectStorageDomain
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class StorageUtils:
|
|
15
|
+
__storage_service: IObjectStorageDomain
|
|
16
|
+
|
|
17
|
+
def __init__(self, storage_service: IObjectStorageDomain):
|
|
18
|
+
self.__storage_service = storage_service
|
|
19
|
+
|
|
20
|
+
def __make_copy(
|
|
21
|
+
self, dir_path: str, file_name: str, content: bytes
|
|
22
|
+
) -> Tuple[str, str]:
|
|
23
|
+
"""
|
|
24
|
+
Make a copy of a file in storage with timestamp in the name.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
file_name = f"{datetime.now().strftime('%Y%m%dT%H%M%S')}_{file_name}"
|
|
28
|
+
self.__storage_service.put_object(
|
|
29
|
+
prefix=dir_path, key=file_name, content=content
|
|
30
|
+
)
|
|
31
|
+
return dir_path, file_name
|
|
32
|
+
except Exception as e:
|
|
33
|
+
logger.error(f"Error making copy in {dir_path}: {e}")
|
|
34
|
+
return dir_path, file_name
|
|
35
|
+
|
|
36
|
+
def get_text(
|
|
37
|
+
self, dir_path: str, file_name: str, encoding: str = "utf-8"
|
|
38
|
+
) -> str | None:
|
|
39
|
+
"""
|
|
40
|
+
Get a text file from storage.
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
content = self.__storage_service.get_object(dir_path, file_name)
|
|
44
|
+
return content.decode(encoding)
|
|
45
|
+
except Exception as e:
|
|
46
|
+
logger.warning(f"Error getting text from {dir_path}: {e}")
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
def save_text(
|
|
50
|
+
self,
|
|
51
|
+
text: str,
|
|
52
|
+
dir_path: str,
|
|
53
|
+
file_name: str,
|
|
54
|
+
encoding: str = "utf-8",
|
|
55
|
+
copy: bool = True,
|
|
56
|
+
) -> Tuple[str, str]:
|
|
57
|
+
"""
|
|
58
|
+
Save a text file to storage.
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
content = text.encode(encoding)
|
|
62
|
+
self.__storage_service.put_object(
|
|
63
|
+
prefix=dir_path, key=file_name, content=content
|
|
64
|
+
)
|
|
65
|
+
if copy:
|
|
66
|
+
self.__make_copy(dir_path, file_name, content)
|
|
67
|
+
logger.debug(
|
|
68
|
+
f"[save_text] File successfully written to storage: {dir_path}/{file_name}"
|
|
69
|
+
)
|
|
70
|
+
return dir_path, file_name
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error(f"Error saving text to {dir_path}: {e}")
|
|
73
|
+
return dir_path, file_name
|
|
74
|
+
|
|
75
|
+
def get_image(self, dir_path: str, file_name: str) -> bytes | None:
|
|
76
|
+
"""
|
|
77
|
+
Get an image from storage.
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
return self.__storage_service.get_object(dir_path, file_name)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.warning(f"Error getting image from {dir_path}: {e}")
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
def save_image(
|
|
86
|
+
self, image: bytes, dir_path: str, file_name: str, copy: bool = True
|
|
87
|
+
) -> Tuple[str, str]:
|
|
88
|
+
"""
|
|
89
|
+
Save an image to storage.
|
|
90
|
+
"""
|
|
91
|
+
try:
|
|
92
|
+
self.__storage_service.put_object(
|
|
93
|
+
prefix=dir_path, key=file_name, content=image
|
|
94
|
+
)
|
|
95
|
+
if copy:
|
|
96
|
+
self.__make_copy(dir_path, file_name, image)
|
|
97
|
+
logger.debug(
|
|
98
|
+
f"[save_image] File successfully written to storage: {dir_path}/{file_name}"
|
|
99
|
+
)
|
|
100
|
+
return dir_path, file_name
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.error(f"Error saving image to {dir_path}: {e}")
|
|
103
|
+
return dir_path, file_name
|
|
104
|
+
|
|
105
|
+
def get_csv(
|
|
106
|
+
self,
|
|
107
|
+
dir_path: str,
|
|
108
|
+
file_name: str,
|
|
109
|
+
sep: str = ";",
|
|
110
|
+
decimal: str = ",",
|
|
111
|
+
encoding: str = "utf-8",
|
|
112
|
+
) -> pd.DataFrame:
|
|
113
|
+
"""
|
|
114
|
+
Get a CSV file from storage.
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
file_content = self.__storage_service.get_object(dir_path, file_name)
|
|
118
|
+
# Create a BytesIO object to avoid file name length issues
|
|
119
|
+
from io import BytesIO
|
|
120
|
+
|
|
121
|
+
csv_buffer = BytesIO(file_content)
|
|
122
|
+
return pd.read_csv(csv_buffer, sep=sep, decimal=decimal, encoding=encoding)
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.warning(f"Error getting CSV file from {dir_path}: {e}")
|
|
125
|
+
return pd.DataFrame()
|
|
126
|
+
|
|
127
|
+
def save_csv(
|
|
128
|
+
self,
|
|
129
|
+
data: pd.DataFrame,
|
|
130
|
+
dir_path: str,
|
|
131
|
+
file_name: str,
|
|
132
|
+
sep: str = ";",
|
|
133
|
+
decimal: str = ",",
|
|
134
|
+
encoding: str = "utf-8",
|
|
135
|
+
copy: bool = True,
|
|
136
|
+
) -> Tuple[str, str]:
|
|
137
|
+
"""
|
|
138
|
+
Save a CSV file to storage.
|
|
139
|
+
"""
|
|
140
|
+
try:
|
|
141
|
+
self.__storage_service.put_object(
|
|
142
|
+
prefix=dir_path,
|
|
143
|
+
key=file_name,
|
|
144
|
+
content=data.to_csv(
|
|
145
|
+
index=False, encoding=encoding, sep=sep, decimal=decimal
|
|
146
|
+
).encode(encoding),
|
|
147
|
+
)
|
|
148
|
+
if copy:
|
|
149
|
+
self.__make_copy(
|
|
150
|
+
dir_path,
|
|
151
|
+
file_name,
|
|
152
|
+
data.to_csv(
|
|
153
|
+
index=False, encoding=encoding, sep=sep, decimal=decimal
|
|
154
|
+
).encode(encoding),
|
|
155
|
+
)
|
|
156
|
+
logger.debug(
|
|
157
|
+
f"[save_csv] File successfully written to storage: {dir_path}/{file_name}"
|
|
158
|
+
)
|
|
159
|
+
return dir_path, file_name
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"Error saving CSV file to {dir_path}: {e}")
|
|
162
|
+
return dir_path, file_name
|
|
163
|
+
|
|
164
|
+
def get_excel(
|
|
165
|
+
self,
|
|
166
|
+
dir_path: str,
|
|
167
|
+
file_name: str,
|
|
168
|
+
sheet_name: str,
|
|
169
|
+
skiprows: int = 0,
|
|
170
|
+
usecols: list | None = None,
|
|
171
|
+
) -> pd.DataFrame:
|
|
172
|
+
"""
|
|
173
|
+
Get an Excel file from storage.
|
|
174
|
+
"""
|
|
175
|
+
try:
|
|
176
|
+
file_content = BytesIO(
|
|
177
|
+
self.__storage_service.get_object(dir_path, file_name)
|
|
178
|
+
)
|
|
179
|
+
return pd.read_excel(
|
|
180
|
+
file_content, sheet_name=sheet_name, skiprows=skiprows, usecols=usecols
|
|
181
|
+
)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
logger.warning(f"Error getting Excel file from {dir_path}: {e}")
|
|
184
|
+
return pd.DataFrame()
|
|
185
|
+
|
|
186
|
+
def save_excel(
|
|
187
|
+
self,
|
|
188
|
+
data: pd.DataFrame,
|
|
189
|
+
dir_path: str,
|
|
190
|
+
file_name: str,
|
|
191
|
+
sheet_name: str,
|
|
192
|
+
copy: bool = True,
|
|
193
|
+
) -> Tuple[str, str]:
|
|
194
|
+
"""
|
|
195
|
+
Save an Excel file to storage.
|
|
196
|
+
"""
|
|
197
|
+
try:
|
|
198
|
+
excel_buffer = BytesIO()
|
|
199
|
+
data.to_excel(excel_buffer, index=False, sheet_name=sheet_name)
|
|
200
|
+
excel_buffer.seek(0)
|
|
201
|
+
self.__storage_service.put_object(
|
|
202
|
+
prefix=dir_path, key=file_name, content=excel_buffer.getvalue()
|
|
203
|
+
)
|
|
204
|
+
if copy:
|
|
205
|
+
self.__make_copy(dir_path, file_name, excel_buffer.getvalue())
|
|
206
|
+
logger.debug(
|
|
207
|
+
f"[save_excel] File successfully written to storage: {dir_path}/{file_name}"
|
|
208
|
+
)
|
|
209
|
+
return dir_path, file_name
|
|
210
|
+
except Exception as e:
|
|
211
|
+
logger.error(f"Error saving Excel file to {dir_path}: {e}")
|
|
212
|
+
return dir_path, file_name
|
|
213
|
+
|
|
214
|
+
def get_json(self, dir_path: str, file_name: str) -> Dict:
|
|
215
|
+
"""
|
|
216
|
+
Get JSON data from storage.
|
|
217
|
+
"""
|
|
218
|
+
try:
|
|
219
|
+
file_content = self.__storage_service.get_object(
|
|
220
|
+
dir_path, file_name
|
|
221
|
+
).decode("utf-8")
|
|
222
|
+
data = json.loads(file_content)
|
|
223
|
+
return data
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.warning(f"Error getting JSON data from {dir_path}: {e}")
|
|
226
|
+
return {}
|
|
227
|
+
|
|
228
|
+
def save_json(
|
|
229
|
+
self, data: dict | list, dir_path: str, file_name: str, copy: bool = True
|
|
230
|
+
) -> Tuple[str, str]:
|
|
231
|
+
"""
|
|
232
|
+
Save JSON data to storage.
|
|
233
|
+
"""
|
|
234
|
+
try:
|
|
235
|
+
self.__storage_service.put_object(
|
|
236
|
+
prefix=dir_path,
|
|
237
|
+
key=file_name,
|
|
238
|
+
content=json.dumps(data, indent=4, ensure_ascii=False).encode("utf-8"),
|
|
239
|
+
)
|
|
240
|
+
if copy:
|
|
241
|
+
# Only create a timestamped copy in storage, not local filesystem
|
|
242
|
+
self.__make_copy(
|
|
243
|
+
dir_path,
|
|
244
|
+
file_name,
|
|
245
|
+
json.dumps(data, indent=4, ensure_ascii=False).encode("utf-8"),
|
|
246
|
+
)
|
|
247
|
+
logger.debug(
|
|
248
|
+
f"[save_json] File successfully written to storage: {dir_path}/{file_name}"
|
|
249
|
+
)
|
|
250
|
+
return dir_path, file_name
|
|
251
|
+
except Exception as e:
|
|
252
|
+
logger.error(f"Error saving JSON data to {dir_path}: {e}")
|
|
253
|
+
return dir_path, file_name
|
|
254
|
+
|
|
255
|
+
def get_yaml(self, dir_path: str, file_name: str) -> Dict:
|
|
256
|
+
"""
|
|
257
|
+
Get YAML data from storage.
|
|
258
|
+
"""
|
|
259
|
+
try:
|
|
260
|
+
file_content = self.__storage_service.get_object(
|
|
261
|
+
dir_path, file_name
|
|
262
|
+
).decode("utf-8")
|
|
263
|
+
data = yaml.safe_load(file_content)
|
|
264
|
+
return data if data is not None else {}
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.debug(f"Error getting YAML data from {dir_path}: {e}")
|
|
267
|
+
return {}
|
|
268
|
+
|
|
269
|
+
def save_yaml(
|
|
270
|
+
self, data: dict | list, dir_path: str, file_name: str, copy: bool = True
|
|
271
|
+
) -> Tuple[str, str]:
|
|
272
|
+
"""
|
|
273
|
+
Save YAML data to storage.
|
|
274
|
+
"""
|
|
275
|
+
try:
|
|
276
|
+
self.__storage_service.put_object(
|
|
277
|
+
prefix=dir_path,
|
|
278
|
+
key=file_name,
|
|
279
|
+
content=yaml.dump(
|
|
280
|
+
data, default_flow_style=False, allow_unicode=True, sort_keys=False
|
|
281
|
+
).encode("utf-8"),
|
|
282
|
+
)
|
|
283
|
+
if copy:
|
|
284
|
+
self.__make_copy(
|
|
285
|
+
dir_path,
|
|
286
|
+
file_name,
|
|
287
|
+
yaml.dump(
|
|
288
|
+
data,
|
|
289
|
+
default_flow_style=False,
|
|
290
|
+
allow_unicode=True,
|
|
291
|
+
sort_keys=False,
|
|
292
|
+
).encode("utf-8"),
|
|
293
|
+
)
|
|
294
|
+
logger.debug(
|
|
295
|
+
f"[save_yaml] File successfully written to storage: {dir_path}/{file_name}"
|
|
296
|
+
)
|
|
297
|
+
return dir_path, file_name
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.error(f"Error saving YAML data to {dir_path}: {e}")
|
|
300
|
+
return dir_path, file_name
|
|
301
|
+
|
|
302
|
+
def get_triples(
|
|
303
|
+
self, dir_path: str, file_name: str, format: str = "turtle"
|
|
304
|
+
) -> Graph:
|
|
305
|
+
"""
|
|
306
|
+
Get Turtle data from storage and return as RDFlib Graph.
|
|
307
|
+
"""
|
|
308
|
+
try:
|
|
309
|
+
file_content = self.__storage_service.get_object(
|
|
310
|
+
dir_path, file_name
|
|
311
|
+
).decode("utf-8")
|
|
312
|
+
graph = Graph()
|
|
313
|
+
graph.parse(data=file_content, format=format)
|
|
314
|
+
return graph
|
|
315
|
+
except Exception as e:
|
|
316
|
+
logger.warning(f"Error getting triples from {dir_path}: {e}")
|
|
317
|
+
return Graph()
|
|
318
|
+
|
|
319
|
+
def save_triples(
|
|
320
|
+
self,
|
|
321
|
+
graph: Graph,
|
|
322
|
+
dir_path: str,
|
|
323
|
+
file_name: str,
|
|
324
|
+
format: str = "turtle",
|
|
325
|
+
copy: bool = True,
|
|
326
|
+
) -> Tuple[str, str]:
|
|
327
|
+
"""
|
|
328
|
+
Save RDFlib Graph data to storage in Turtle format.
|
|
329
|
+
"""
|
|
330
|
+
try:
|
|
331
|
+
turtle_content = graph.serialize(format=format, sort=False)
|
|
332
|
+
self.__storage_service.put_object(
|
|
333
|
+
prefix=dir_path, key=file_name, content=turtle_content.encode("utf-8")
|
|
334
|
+
)
|
|
335
|
+
if copy:
|
|
336
|
+
self.__make_copy(dir_path, file_name, turtle_content.encode("utf-8"))
|
|
337
|
+
logger.debug(
|
|
338
|
+
f"[save_triples] File successfully written to storage: {dir_path}/{file_name}"
|
|
339
|
+
)
|
|
340
|
+
return dir_path, file_name
|
|
341
|
+
except Exception as e:
|
|
342
|
+
logger.error(f"Error saving triples to {dir_path}: {e}")
|
|
343
|
+
return dir_path, file_name
|
|
344
|
+
|
|
345
|
+
def get_powerpoint_presentation(self, dir_path: str, file_name: str) -> BytesIO:
|
|
346
|
+
"""
|
|
347
|
+
Get a PowerPoint presentation from storage.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
output_dir: The directory where the presentation is stored.
|
|
351
|
+
file_name: The name of the presentation file.
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
BytesIO: A BytesIO object containing the presentation data.
|
|
355
|
+
"""
|
|
356
|
+
try:
|
|
357
|
+
data = self.__storage_service.get_object(dir_path, file_name)
|
|
358
|
+
byte_stream = BytesIO(data)
|
|
359
|
+
byte_stream.seek(0)
|
|
360
|
+
return byte_stream
|
|
361
|
+
except Exception as e:
|
|
362
|
+
logger.warning(
|
|
363
|
+
f"Error getting PowerPoint presentation from {dir_path}: {e}"
|
|
364
|
+
)
|
|
365
|
+
return BytesIO()
|
|
366
|
+
|
|
367
|
+
def save_powerpoint_presentation(
|
|
368
|
+
self, presentation, dir_path: str, file_name: str, copy: bool = True
|
|
369
|
+
) -> Tuple[str, str]:
|
|
370
|
+
"""
|
|
371
|
+
Save a PowerPoint presentation to a file and create an asset in Naas.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
presentation: The presentation to save.
|
|
375
|
+
output_dir: The directory to save the presentation to.
|
|
376
|
+
file_name: The name of the file to save the presentation to.
|
|
377
|
+
naas_integration: The Naas integration to use to create the asset.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
str: The URL of the asset.
|
|
381
|
+
"""
|
|
382
|
+
try:
|
|
383
|
+
# Create byte stream
|
|
384
|
+
byte_stream = BytesIO()
|
|
385
|
+
presentation.save(byte_stream)
|
|
386
|
+
byte_stream.seek(0)
|
|
387
|
+
self.__storage_service.put_object(
|
|
388
|
+
prefix=dir_path, key=file_name, content=byte_stream.getvalue()
|
|
389
|
+
)
|
|
390
|
+
if copy:
|
|
391
|
+
self.__make_copy(dir_path, file_name, byte_stream.getvalue())
|
|
392
|
+
logger.debug(
|
|
393
|
+
f"[save_powerpoint_presentation] File successfully written to storage: {dir_path}/{file_name}"
|
|
394
|
+
)
|
|
395
|
+
return dir_path, file_name
|
|
396
|
+
except Exception as e:
|
|
397
|
+
logger.error(f"Error saving PowerPoint presentation to {dir_path}: {e}")
|
|
398
|
+
return dir_path, file_name
|