naas-abi 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- naas_abi/__init__.py +35 -0
- naas_abi/agents/AbiAgent.py +442 -0
- naas_abi/agents/AbiAgent_test.py +157 -0
- naas_abi/agents/EntitytoSPARQLAgent.py +952 -0
- naas_abi/agents/EntitytoSPARQLAgent_test.py +66 -0
- naas_abi/agents/KnowledgeGraphBuilderAgent.py +321 -0
- naas_abi/agents/KnowledgeGraphBuilderAgent_test.py +86 -0
- naas_abi/agents/OntologyEngineerAgent.py +115 -0
- naas_abi/agents/OntologyEngineerAgent_test.py +42 -0
- naas_abi/apps/oxigraph_admin/main.py +392 -0
- naas_abi/apps/oxigraph_admin/terminal_style.py +151 -0
- naas_abi/apps/sparql_terminal/main.py +68 -0
- naas_abi/apps/sparql_terminal/terminal_style.py +236 -0
- naas_abi/apps/terminal_agent/main.py +553 -0
- naas_abi/apps/terminal_agent/terminal_style.py +175 -0
- naas_abi/cli.py +714 -0
- naas_abi/mappings.py +83 -0
- naas_abi/models/airgap_gemma.py +220 -0
- naas_abi/models/airgap_qwen.py +24 -0
- naas_abi/models/default.py +23 -0
- naas_abi/models/gpt_4_1.py +25 -0
- naas_abi/pipelines/AIAgentOntologyGenerationPipeline.py +635 -0
- naas_abi/pipelines/AIAgentOntologyGenerationPipeline_test.py +133 -0
- naas_abi/pipelines/AddIndividualPipeline.py +215 -0
- naas_abi/pipelines/AddIndividualPipeline_test.py +66 -0
- naas_abi/pipelines/InsertDataSPARQLPipeline.py +197 -0
- naas_abi/pipelines/InsertDataSPARQLPipeline_test.py +96 -0
- naas_abi/pipelines/MergeIndividualsPipeline.py +245 -0
- naas_abi/pipelines/MergeIndividualsPipeline_test.py +98 -0
- naas_abi/pipelines/RemoveIndividualPipeline.py +166 -0
- naas_abi/pipelines/RemoveIndividualPipeline_test.py +58 -0
- naas_abi/pipelines/UpdateCommercialOrganizationPipeline.py +198 -0
- naas_abi/pipelines/UpdateDataPropertyPipeline.py +175 -0
- naas_abi/pipelines/UpdateLegalNamePipeline.py +107 -0
- naas_abi/pipelines/UpdateLinkedInPagePipeline.py +179 -0
- naas_abi/pipelines/UpdatePersonPipeline.py +184 -0
- naas_abi/pipelines/UpdateSkillPipeline.py +118 -0
- naas_abi/pipelines/UpdateTickerPipeline.py +104 -0
- naas_abi/pipelines/UpdateWebsitePipeline.py +106 -0
- naas_abi/triggers.py +131 -0
- naas_abi/workflows/AgentRecommendationWorkflow.py +321 -0
- naas_abi/workflows/AgentRecommendationWorkflow_test.py +160 -0
- naas_abi/workflows/ArtificialAnalysisWorkflow.py +337 -0
- naas_abi/workflows/ArtificialAnalysisWorkflow_test.py +57 -0
- naas_abi/workflows/ConvertOntologyGraphToYamlWorkflow.py +210 -0
- naas_abi/workflows/ConvertOntologyGraphToYamlWorkflow_test.py +78 -0
- naas_abi/workflows/CreateClassOntologyYamlWorkflow.py +208 -0
- naas_abi/workflows/CreateClassOntologyYamlWorkflow_test.py +65 -0
- naas_abi/workflows/CreateIndividualOntologyYamlWorkflow.py +183 -0
- naas_abi/workflows/CreateIndividualOntologyYamlWorkflow_test.py +86 -0
- naas_abi/workflows/ExportGraphInstancesToExcelWorkflow.py +450 -0
- naas_abi/workflows/ExportGraphInstancesToExcelWorkflow_test.py +33 -0
- naas_abi/workflows/GetObjectPropertiesFromClassWorkflow.py +385 -0
- naas_abi/workflows/GetObjectPropertiesFromClassWorkflow_test.py +57 -0
- naas_abi/workflows/GetSubjectGraphWorkflow.py +84 -0
- naas_abi/workflows/GetSubjectGraphWorkflow_test.py +71 -0
- naas_abi/workflows/SearchIndividualWorkflow.py +190 -0
- naas_abi/workflows/SearchIndividualWorkflow_test.py +98 -0
- naas_abi-1.0.0.dist-info/METADATA +9 -0
- naas_abi-1.0.0.dist-info/RECORD +62 -0
- naas_abi-1.0.0.dist-info/WHEEL +5 -0
- naas_abi-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from naas_abi import services
|
|
3
|
+
from naas_abi.pipelines.InsertDataSPARQLPipeline import (
|
|
4
|
+
InsertDataSPARQLPipeline,
|
|
5
|
+
InsertDataSPARQLPipelineConfiguration,
|
|
6
|
+
InsertDataSPARQLPipelineParameters,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def pipeline() -> InsertDataSPARQLPipeline:
|
|
12
|
+
pipeline = InsertDataSPARQLPipeline(
|
|
13
|
+
configuration=InsertDataSPARQLPipelineConfiguration(
|
|
14
|
+
triple_store=services.triple_store_service
|
|
15
|
+
)
|
|
16
|
+
)
|
|
17
|
+
return pipeline
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_insert_data_sparql_pipeline(pipeline: InsertDataSPARQLPipeline):
|
|
21
|
+
from naas_abi.utils.SPARQL import results_to_list
|
|
22
|
+
from rdflib import Literal, Namespace, URIRef
|
|
23
|
+
|
|
24
|
+
sparql_statement = """
|
|
25
|
+
PREFIX abi: <http://ontology.naas.ai/abi/>
|
|
26
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
27
|
+
PREFIX cco: <https://www.commoncoreontologies.org/>
|
|
28
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
29
|
+
|
|
30
|
+
INSERT DATA {
|
|
31
|
+
abi:john a cco:ont00001262, owl:NamedIndividual ;
|
|
32
|
+
abi:name "John Doe" ;
|
|
33
|
+
abi:age 30 ;
|
|
34
|
+
abi:email "john.doe@example.com" .
|
|
35
|
+
|
|
36
|
+
abi:jane a cco:ont00001262, owl:NamedIndividual ;
|
|
37
|
+
abi:name "Jane Smith" ;
|
|
38
|
+
abi:age 28 ;
|
|
39
|
+
abi:email "jane.smith@example.com" .
|
|
40
|
+
}
|
|
41
|
+
"""
|
|
42
|
+
graph = pipeline.run(
|
|
43
|
+
InsertDataSPARQLPipelineParameters(sparql_statement=sparql_statement)
|
|
44
|
+
)
|
|
45
|
+
ABI = Namespace("http://ontology.naas.ai/abi/")
|
|
46
|
+
|
|
47
|
+
assert graph is not None, graph.serialize(format="turtle")
|
|
48
|
+
assert (
|
|
49
|
+
len(list(graph.triples((URIRef(ABI.john), ABI.name, Literal("John Doe"))))) == 1
|
|
50
|
+
), graph.serialize(format="turtle")
|
|
51
|
+
assert (
|
|
52
|
+
len(list(graph.triples((URIRef(ABI.jane), ABI.name, Literal("Jane Smith")))))
|
|
53
|
+
== 1
|
|
54
|
+
), graph.serialize(format="turtle")
|
|
55
|
+
|
|
56
|
+
# Remove graph
|
|
57
|
+
services.triple_store_service.remove(graph)
|
|
58
|
+
|
|
59
|
+
# Check triples are removed from the triple store
|
|
60
|
+
sparql_query = """
|
|
61
|
+
PREFIX abi: <http://ontology.naas.ai/abi/>
|
|
62
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
63
|
+
PREFIX cco: <https://www.commoncoreontologies.org/>
|
|
64
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
65
|
+
|
|
66
|
+
SELECT ?s ?p ?o
|
|
67
|
+
WHERE {
|
|
68
|
+
?s ?p ?o .
|
|
69
|
+
FILTER(?s = abi:john || ?s = abi:jane)
|
|
70
|
+
}
|
|
71
|
+
"""
|
|
72
|
+
results = services.triple_store_service.query(sparql_query)
|
|
73
|
+
results_list = results_to_list(results)
|
|
74
|
+
assert results_list is None, results_list
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_extract_sparql_from_text(pipeline: InsertDataSPARQLPipeline):
|
|
78
|
+
text = """
|
|
79
|
+
```sparql
|
|
80
|
+
INSERT DATA {
|
|
81
|
+
<http://ontology.naas.ai/abi/john> <http://www.w3.org/2000/01/rdf-schema#label> "John Doe" .
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
"""
|
|
85
|
+
sparql_statement = pipeline.get_sparql_from_text(
|
|
86
|
+
InsertDataSPARQLPipelineParameters(sparql_statement=text)
|
|
87
|
+
)
|
|
88
|
+
assert sparql_statement is not None, sparql_statement
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_extract_sparql_from_text_no_sparql(pipeline: InsertDataSPARQLPipeline):
|
|
92
|
+
text = "This is a text without a SPARQL INSERT DATA statement"
|
|
93
|
+
sparql_statement = pipeline.get_sparql_from_text(
|
|
94
|
+
InsertDataSPARQLPipelineParameters(sparql_statement=text)
|
|
95
|
+
)
|
|
96
|
+
assert sparql_statement == "No SPARQL INSERT DATA statement found", sparql_statement
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter
|
|
6
|
+
from langchain_core.tools import BaseTool, StructuredTool
|
|
7
|
+
from naas_abi import ABIModule
|
|
8
|
+
from naas_abi_core import logger
|
|
9
|
+
from naas_abi_core.pipeline import Pipeline, PipelineConfiguration, PipelineParameters
|
|
10
|
+
from naas_abi_core.services.triple_store.TripleStorePorts import ITripleStoreService
|
|
11
|
+
from naas_abi_core.utils.Graph import URI_REGEX
|
|
12
|
+
from naas_abi_core.utils.SPARQL import SPARQLUtils
|
|
13
|
+
from naas_abi_core.utils.StorageUtils import StorageUtils
|
|
14
|
+
from pydantic import Field
|
|
15
|
+
from rdflib import RDFS, SKOS, Graph, Literal, Namespace, URIRef
|
|
16
|
+
|
|
17
|
+
# Define namespaces
|
|
18
|
+
BFO = Namespace("http://purl.obolibrary.org/obo/")
|
|
19
|
+
CCO = Namespace("https://www.commoncoreontologies.org/")
|
|
20
|
+
ABI = Namespace("http://ontology.naas.ai/abi/")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class MergeIndividualsPipelineConfiguration(PipelineConfiguration):
|
|
25
|
+
"""Configuration for MergeIndividualsPipeline.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
triple_store (ITripleStoreService): The triple store service to use
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
triple_store: ITripleStoreService
|
|
32
|
+
datastore_path: str = "datastore/ontology/merged_individual"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class MergeIndividualsPipelineParameters(PipelineParameters):
|
|
36
|
+
uri_to_keep: Annotated[
|
|
37
|
+
str,
|
|
38
|
+
Field(
|
|
39
|
+
description="The URI that will remain and receive the merged triples",
|
|
40
|
+
pattern=URI_REGEX,
|
|
41
|
+
),
|
|
42
|
+
]
|
|
43
|
+
uri_to_merge: Annotated[
|
|
44
|
+
str,
|
|
45
|
+
Field(
|
|
46
|
+
description="The URI that will be merged into uri_to_keep and then removed",
|
|
47
|
+
pattern=URI_REGEX,
|
|
48
|
+
),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class MergeIndividualsPipeline(Pipeline):
|
|
53
|
+
"""Pipeline for merging two individuals in the ontology."""
|
|
54
|
+
|
|
55
|
+
__configuration: MergeIndividualsPipelineConfiguration
|
|
56
|
+
__sparql_utils: SPARQLUtils
|
|
57
|
+
__storage_utils: StorageUtils
|
|
58
|
+
|
|
59
|
+
def __init__(self, configuration: MergeIndividualsPipelineConfiguration):
|
|
60
|
+
super().__init__(configuration)
|
|
61
|
+
self.__configuration = configuration
|
|
62
|
+
self.__sparql_utils: SPARQLUtils = SPARQLUtils(
|
|
63
|
+
ABIModule.get_instance().engine.services.triple_store
|
|
64
|
+
)
|
|
65
|
+
self.__storage_utils: StorageUtils = StorageUtils(
|
|
66
|
+
ABIModule.get_instance().engine.services.object_storage
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def get_all_triples_for_uri(self, uri: str):
|
|
70
|
+
"""
|
|
71
|
+
Retrieve all triples where the given URI appears as either a subject or object.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
uri (str): The URI to search for
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
rdflib.query.Result: Query results containing all triples where the URI appears
|
|
78
|
+
"""
|
|
79
|
+
sparql_query = f"""
|
|
80
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
81
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
82
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
83
|
+
|
|
84
|
+
SELECT ?s ?p ?o
|
|
85
|
+
WHERE {{
|
|
86
|
+
{{
|
|
87
|
+
# Find triples where the URI is the subject
|
|
88
|
+
<{uri}> ?p ?o .
|
|
89
|
+
BIND(<{uri}> AS ?s)
|
|
90
|
+
}}
|
|
91
|
+
UNION
|
|
92
|
+
{{
|
|
93
|
+
# Find triples where the URI is the object
|
|
94
|
+
?s ?p <{uri}> .
|
|
95
|
+
BIND(<{uri}> AS ?o)
|
|
96
|
+
}}
|
|
97
|
+
}}
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
return self.__configuration.triple_store.query(sparql_query)
|
|
101
|
+
|
|
102
|
+
def run(self, parameters: PipelineParameters) -> Graph:
|
|
103
|
+
if not isinstance(parameters, MergeIndividualsPipelineParameters):
|
|
104
|
+
raise ValueError(
|
|
105
|
+
"Parameters must be of type MergeIndividualsPipelineParameters"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
output_dir = self.__configuration.datastore_path
|
|
109
|
+
|
|
110
|
+
# Get all triples for both URIs
|
|
111
|
+
keep_results = self.get_all_triples_for_uri(parameters.uri_to_keep)
|
|
112
|
+
keep_graph = Graph()
|
|
113
|
+
for row in keep_results:
|
|
114
|
+
s, p, o = row
|
|
115
|
+
keep_graph.add((s, p, o))
|
|
116
|
+
logger.info(
|
|
117
|
+
f"Found {len(keep_results)} triples for URI to keep: {parameters.uri_to_keep}"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
merge_results = self.get_all_triples_for_uri(parameters.uri_to_merge)
|
|
121
|
+
merge_graph = Graph()
|
|
122
|
+
for row in merge_results:
|
|
123
|
+
s, p, o = row
|
|
124
|
+
merge_graph.add((s, p, o))
|
|
125
|
+
logger.info(
|
|
126
|
+
f"Found {len(merge_results)} triples for URI to merge: {parameters.uri_to_merge}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
graph_insert = Graph()
|
|
130
|
+
graph_insert.bind("bfo", BFO)
|
|
131
|
+
graph_insert.bind("cco", CCO)
|
|
132
|
+
graph_insert.bind("abi", ABI)
|
|
133
|
+
graph_remove = Graph()
|
|
134
|
+
uri_to_keep_ref = URIRef(parameters.uri_to_keep)
|
|
135
|
+
uri_to_keep_label = keep_graph.value(uri_to_keep_ref, RDFS.label)
|
|
136
|
+
uri_to_merge_ref = URIRef(parameters.uri_to_merge)
|
|
137
|
+
uri_to_merge_label = merge_graph.value(uri_to_merge_ref, RDFS.label)
|
|
138
|
+
|
|
139
|
+
# Process triples from uri_to_merge
|
|
140
|
+
logger.info(
|
|
141
|
+
f"Merging '{uri_to_merge_label}' ({parameters.uri_to_merge}) into '{uri_to_keep_label}' ({parameters.uri_to_keep})"
|
|
142
|
+
)
|
|
143
|
+
for row in merge_results:
|
|
144
|
+
s, p, o = row
|
|
145
|
+
if s == uri_to_merge_ref and p not in [RDFS.label, ABI.universal_name]:
|
|
146
|
+
check_properties = keep_graph.triples((uri_to_keep_ref, p, o))
|
|
147
|
+
if len(list(check_properties)) == 0:
|
|
148
|
+
if isinstance(o, URIRef):
|
|
149
|
+
graph_insert.add((uri_to_keep_ref, URIRef(p), URIRef(o)))
|
|
150
|
+
elif isinstance(o, Literal):
|
|
151
|
+
# Preserve datatype and language tag if present
|
|
152
|
+
datatype = o.datatype if hasattr(o, "datatype") else None
|
|
153
|
+
lang = o.language if hasattr(o, "language") else None
|
|
154
|
+
graph_insert.add(
|
|
155
|
+
(
|
|
156
|
+
uri_to_keep_ref,
|
|
157
|
+
URIRef(p),
|
|
158
|
+
Literal(str(o), datatype=datatype, lang=lang),
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
elif s == uri_to_merge_ref and p in [RDFS.label, ABI.universal_name]:
|
|
163
|
+
datatype = o.datatype if hasattr(o, "datatype") else None
|
|
164
|
+
lang = o.language if hasattr(o, "language") else None
|
|
165
|
+
graph_insert.add(
|
|
166
|
+
(
|
|
167
|
+
uri_to_keep_ref,
|
|
168
|
+
SKOS.altLabel,
|
|
169
|
+
Literal(str(o), datatype=datatype, lang=lang),
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
elif o == uri_to_merge_ref:
|
|
174
|
+
check_properties = keep_graph.triples((s, p, uri_to_keep_ref))
|
|
175
|
+
if len(list(check_properties)) == 0:
|
|
176
|
+
graph_insert.add((s, p, uri_to_keep_ref))
|
|
177
|
+
|
|
178
|
+
# Always add original triple for removal
|
|
179
|
+
graph_remove.add((s, p, o))
|
|
180
|
+
|
|
181
|
+
if len(graph_insert) > 0:
|
|
182
|
+
logger.info(f"✅ Inserting {len(graph_insert)} triples")
|
|
183
|
+
logger.info(graph_insert.serialize(format="turtle"))
|
|
184
|
+
self.__storage_utils.save_triples(
|
|
185
|
+
graph_insert,
|
|
186
|
+
output_dir,
|
|
187
|
+
f"{uri_to_keep_label}_{parameters.uri_to_keep.split('/')[-1]}_merged.ttl",
|
|
188
|
+
)
|
|
189
|
+
self.__configuration.triple_store.insert(graph_insert)
|
|
190
|
+
if len(graph_remove) > 0:
|
|
191
|
+
logger.info(f"✅ Removing {len(graph_remove)} triples")
|
|
192
|
+
logger.info(graph_remove.serialize(format="turtle"))
|
|
193
|
+
self.__storage_utils.save_triples(
|
|
194
|
+
graph_remove,
|
|
195
|
+
output_dir,
|
|
196
|
+
f"{uri_to_merge_label}_{parameters.uri_to_merge.split('/')[-1]}_removed.ttl",
|
|
197
|
+
)
|
|
198
|
+
self.__configuration.triple_store.remove(graph_remove)
|
|
199
|
+
|
|
200
|
+
return self.__sparql_utils.get_subject_graph(parameters.uri_to_keep)
|
|
201
|
+
|
|
202
|
+
def as_tools(self) -> list[BaseTool]:
|
|
203
|
+
return [
|
|
204
|
+
StructuredTool(
|
|
205
|
+
name="merge_individuals",
|
|
206
|
+
description="Merge two individuals in the triplestore by transferring all triples from one to another",
|
|
207
|
+
func=lambda **kwargs: self.run(
|
|
208
|
+
MergeIndividualsPipelineParameters(**kwargs)
|
|
209
|
+
),
|
|
210
|
+
args_schema=MergeIndividualsPipelineParameters,
|
|
211
|
+
)
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
def as_api(
|
|
215
|
+
self,
|
|
216
|
+
router: APIRouter,
|
|
217
|
+
route_name: str = "",
|
|
218
|
+
name: str = "",
|
|
219
|
+
description: str = "",
|
|
220
|
+
description_stream: str = "",
|
|
221
|
+
tags: list[str | Enum] | None = None,
|
|
222
|
+
) -> None:
|
|
223
|
+
if tags is None:
|
|
224
|
+
tags = []
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if __name__ == "__main__":
|
|
229
|
+
from naas_abi import services
|
|
230
|
+
|
|
231
|
+
uri_to_keep = "http://ontology.naas.ai/abi/69a231b9-e87a-4503-8f80-a530ed8eaa4b" # URI that will remain
|
|
232
|
+
uri_to_merge = "http://ontology.naas.ai/abi/4f92bbdd-e710-4e43-9480-9b6cd6d9af80" # URI that will be merged and removed
|
|
233
|
+
|
|
234
|
+
configuration = MergeIndividualsPipelineConfiguration(
|
|
235
|
+
triple_store=services.triple_store_service
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
pipeline = MergeIndividualsPipeline(configuration)
|
|
239
|
+
graph = pipeline.run(
|
|
240
|
+
MergeIndividualsPipelineParameters(
|
|
241
|
+
uri_to_keep=uri_to_keep,
|
|
242
|
+
uri_to_merge=uri_to_merge,
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
logger.info(graph.serialize(format="turtle"))
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from naas_abi import services
|
|
3
|
+
from naas_abi.pipelines.MergeIndividualsPipeline import (
|
|
4
|
+
MergeIndividualsPipeline,
|
|
5
|
+
MergeIndividualsPipelineConfiguration,
|
|
6
|
+
MergeIndividualsPipelineParameters,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def pipeline() -> MergeIndividualsPipeline:
|
|
12
|
+
return MergeIndividualsPipeline(
|
|
13
|
+
MergeIndividualsPipelineConfiguration(
|
|
14
|
+
triple_store=services.triple_store_service
|
|
15
|
+
)
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_merge_individuals_pipeline(pipeline: MergeIndividualsPipeline):
|
|
20
|
+
import time
|
|
21
|
+
from uuid import uuid4
|
|
22
|
+
|
|
23
|
+
from naas_abi.utils.SPARQL import get_subject_graph
|
|
24
|
+
from naas_abi_core import logger, services
|
|
25
|
+
from rdflib import OWL, RDF, RDFS, SKOS, Graph, Literal, Namespace, URIRef
|
|
26
|
+
|
|
27
|
+
ABI = Namespace("http://ontology.naas.ai/abi/")
|
|
28
|
+
|
|
29
|
+
graph = Graph()
|
|
30
|
+
uri_to_keep = ABI[str(uuid4())]
|
|
31
|
+
graph.add(
|
|
32
|
+
(
|
|
33
|
+
uri_to_keep,
|
|
34
|
+
RDF.type,
|
|
35
|
+
URIRef("https://www.commoncoreontologies.org/ont00000443"),
|
|
36
|
+
)
|
|
37
|
+
)
|
|
38
|
+
graph.add((uri_to_keep, RDF.type, OWL.NamedIndividual))
|
|
39
|
+
graph.add((uri_to_keep, RDFS.label, Literal("Naas.ai")))
|
|
40
|
+
graph.add(
|
|
41
|
+
(
|
|
42
|
+
uri_to_keep,
|
|
43
|
+
ABI.logo,
|
|
44
|
+
Literal(
|
|
45
|
+
"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ9gXMaBLQZ39W6Pk53PRuzFjUvv_6lLRWPoQ&s"
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
uri_to_merge = ABI[str(uuid4())]
|
|
51
|
+
graph.add(
|
|
52
|
+
(
|
|
53
|
+
uri_to_merge,
|
|
54
|
+
RDF.type,
|
|
55
|
+
URIRef("https://www.commoncoreontologies.org/ont00000443"),
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
graph.add((uri_to_merge, RDF.type, OWL.NamedIndividual))
|
|
59
|
+
graph.add((uri_to_merge, RDFS.label, Literal("Naas.ai 2")))
|
|
60
|
+
graph.add(
|
|
61
|
+
(
|
|
62
|
+
uri_to_merge,
|
|
63
|
+
ABI.logo,
|
|
64
|
+
Literal(
|
|
65
|
+
"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ9gXMaBLQZ39W6Pk53PRuzFjUvv_6lLRWPoQ&s"
|
|
66
|
+
),
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
logger.info("Inserting triples to triplestore")
|
|
71
|
+
services.triple_store_service.insert(graph)
|
|
72
|
+
time.sleep(3)
|
|
73
|
+
|
|
74
|
+
# Run pipeline to merge individuals
|
|
75
|
+
graph_merged = pipeline.run(
|
|
76
|
+
MergeIndividualsPipelineParameters(
|
|
77
|
+
uri_to_keep=uri_to_keep,
|
|
78
|
+
uri_to_merge=uri_to_merge,
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
assert graph_merged is not None, graph_merged.serialize(format="turtle")
|
|
82
|
+
assert str(graph_merged.value(uri_to_keep, RDFS.label)) == "Naas.ai", (
|
|
83
|
+
graph_merged.serialize(format="turtle")
|
|
84
|
+
)
|
|
85
|
+
assert str(graph_merged.value(uri_to_keep, SKOS.altLabel)) == "Naas.ai 2", (
|
|
86
|
+
graph_merged.serialize(format="turtle")
|
|
87
|
+
)
|
|
88
|
+
assert len(list(graph_merged.triples((uri_to_keep, ABI.logo, None)))) == 1, (
|
|
89
|
+
graph_merged.serialize(format="turtle")
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Check if uri_to_merge is removed in triplestore
|
|
93
|
+
graph = get_subject_graph(str(uri_to_merge), 1)
|
|
94
|
+
assert len(graph) == 0, graph.serialize(format="turtle")
|
|
95
|
+
|
|
96
|
+
# Remove graphs
|
|
97
|
+
services.triple_store_service.remove(graph_merged)
|
|
98
|
+
time.sleep(3)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Annotated, List
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter
|
|
6
|
+
from langchain_core.tools import BaseTool, StructuredTool
|
|
7
|
+
from naas_abi import ABIModule, logger
|
|
8
|
+
from naas_abi_core.pipeline import Pipeline, PipelineConfiguration, PipelineParameters
|
|
9
|
+
from naas_abi_core.services.triple_store.TripleStorePorts import ITripleStoreService
|
|
10
|
+
from naas_abi_core.utils.StorageUtils import StorageUtils
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
from rdflib import Graph, Namespace
|
|
13
|
+
|
|
14
|
+
# Define namespaces
|
|
15
|
+
BFO = Namespace("http://purl.obolibrary.org/obo/")
|
|
16
|
+
CCO = Namespace("https://www.commoncoreontologies.org/")
|
|
17
|
+
ABI = Namespace("http://ontology.naas.ai/abi/")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class RemoveIndividualPipelineConfiguration(PipelineConfiguration):
|
|
22
|
+
"""Configuration for RemoveIndividualPipeline.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
triple_store (ITripleStoreService): The triple store service to use
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
triple_store: ITripleStoreService
|
|
29
|
+
datastore_path: str = "datastore/ontology/removed_individual"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RemoveIndividualPipelineParameters(PipelineParameters):
|
|
33
|
+
uris_to_remove: Annotated[
|
|
34
|
+
List[str],
|
|
35
|
+
Field(description="List of URIs to remove from the ontology", min_items=1),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class RemoveIndividualPipeline(Pipeline):
|
|
40
|
+
"""Pipeline for removing individuals from the ontology."""
|
|
41
|
+
|
|
42
|
+
__configuration: RemoveIndividualPipelineConfiguration
|
|
43
|
+
__storage_utils: StorageUtils
|
|
44
|
+
|
|
45
|
+
def __init__(self, configuration: RemoveIndividualPipelineConfiguration):
|
|
46
|
+
super().__init__(configuration)
|
|
47
|
+
self.__configuration = configuration
|
|
48
|
+
self.__storage_utils: StorageUtils = StorageUtils(
|
|
49
|
+
ABIModule.get_instance().engine.services.object_storage
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def get_all_triples_for_uri(self, uri: str):
|
|
53
|
+
"""
|
|
54
|
+
Retrieve all triples where the given URI appears as either a subject or object.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
uri (str): The URI to search for
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
rdflib.query.Result: Query results containing all triples where the URI appears
|
|
61
|
+
"""
|
|
62
|
+
sparql_query = f"""
|
|
63
|
+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
64
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
65
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
66
|
+
|
|
67
|
+
SELECT ?s ?p ?o
|
|
68
|
+
WHERE {{
|
|
69
|
+
{{
|
|
70
|
+
# Find triples where the URI is the subject
|
|
71
|
+
<{uri}> ?p ?o .
|
|
72
|
+
BIND(<{uri}> AS ?s)
|
|
73
|
+
}}
|
|
74
|
+
UNION
|
|
75
|
+
{{
|
|
76
|
+
# Find triples where the URI is the object
|
|
77
|
+
?s ?p <{uri}> .
|
|
78
|
+
BIND(<{uri}> AS ?o)
|
|
79
|
+
}}
|
|
80
|
+
}}
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
return self.__configuration.triple_store.query(sparql_query)
|
|
84
|
+
|
|
85
|
+
def run(self, parameters: PipelineParameters) -> Graph:
|
|
86
|
+
if not isinstance(parameters, RemoveIndividualPipelineParameters):
|
|
87
|
+
raise ValueError(
|
|
88
|
+
"Parameters must be of type RemoveIndividualPipelineParameters"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
output_dir = self.__configuration.datastore_path
|
|
92
|
+
removed_graph = Graph()
|
|
93
|
+
removed_graph.bind("bfo", BFO)
|
|
94
|
+
removed_graph.bind("cco", CCO)
|
|
95
|
+
removed_graph.bind("abi", ABI)
|
|
96
|
+
|
|
97
|
+
for uri in parameters.uris_to_remove:
|
|
98
|
+
logger.info(f"Getting triples for URI: {uri}")
|
|
99
|
+
results = self.get_all_triples_for_uri(uri)
|
|
100
|
+
graph_remove = Graph()
|
|
101
|
+
|
|
102
|
+
for row in results:
|
|
103
|
+
s, p, o = row
|
|
104
|
+
graph_remove.add((s, p, o))
|
|
105
|
+
|
|
106
|
+
if len(graph_remove) > 0:
|
|
107
|
+
logger.info(f"✅ Removing {len(graph_remove)} triples for URI: {uri}")
|
|
108
|
+
logger.info(graph_remove.serialize(format="turtle"))
|
|
109
|
+
self.__storage_utils.save_triples(
|
|
110
|
+
graph_remove, output_dir, f"{uri.split('/')[-1]}.ttl"
|
|
111
|
+
)
|
|
112
|
+
self.__configuration.triple_store.remove(graph_remove)
|
|
113
|
+
|
|
114
|
+
# Add to the combined removed graph for return
|
|
115
|
+
for triple in graph_remove:
|
|
116
|
+
removed_graph.add(triple)
|
|
117
|
+
else:
|
|
118
|
+
logger.info(f"No triples found for {uri}")
|
|
119
|
+
|
|
120
|
+
return removed_graph
|
|
121
|
+
|
|
122
|
+
def as_tools(self) -> list[BaseTool]:
|
|
123
|
+
return [
|
|
124
|
+
StructuredTool(
|
|
125
|
+
name="remove_individuals",
|
|
126
|
+
description="Remove individuals from the triplestore by deleting all their associated triples",
|
|
127
|
+
func=lambda **kwargs: self.run(
|
|
128
|
+
RemoveIndividualPipelineParameters(**kwargs)
|
|
129
|
+
),
|
|
130
|
+
args_schema=RemoveIndividualPipelineParameters,
|
|
131
|
+
)
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
def as_api(
|
|
135
|
+
self,
|
|
136
|
+
router: APIRouter,
|
|
137
|
+
route_name: str = "",
|
|
138
|
+
name: str = "",
|
|
139
|
+
description: str = "",
|
|
140
|
+
description_stream: str = "",
|
|
141
|
+
tags: list[str | Enum] | None = None,
|
|
142
|
+
) -> None:
|
|
143
|
+
if tags is None:
|
|
144
|
+
tags = []
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
if __name__ == "__main__":
|
|
149
|
+
from naas_abi import services
|
|
150
|
+
|
|
151
|
+
uris_to_remove = [
|
|
152
|
+
"http://ontology.naas.ai/abi/example-uri-1",
|
|
153
|
+
"http://ontology.naas.ai/abi/example-uri-2",
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
configuration = RemoveIndividualPipelineConfiguration(
|
|
157
|
+
triple_store=services.triple_store_service
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
pipeline = RemoveIndividualPipeline(configuration)
|
|
161
|
+
graph = pipeline.run(
|
|
162
|
+
RemoveIndividualPipelineParameters(
|
|
163
|
+
uris_to_remove=uris_to_remove,
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
logger.info(graph.serialize(format="turtle"))
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from naas_abi import services
|
|
3
|
+
from naas_abi.pipelines.RemoveIndividualPipeline import (
|
|
4
|
+
RemoveIndividualPipeline,
|
|
5
|
+
RemoveIndividualPipelineConfiguration,
|
|
6
|
+
RemoveIndividualPipelineParameters,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def pipeline() -> RemoveIndividualPipeline:
|
|
12
|
+
return RemoveIndividualPipeline(
|
|
13
|
+
RemoveIndividualPipelineConfiguration(
|
|
14
|
+
triple_store=services.triple_store_service
|
|
15
|
+
)
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_remove_individual_pipeline(pipeline: RemoveIndividualPipeline):
|
|
20
|
+
import time
|
|
21
|
+
from uuid import uuid4
|
|
22
|
+
|
|
23
|
+
from naas_abi import services
|
|
24
|
+
from naas_abi.utils.SPARQL import get_subject_graph
|
|
25
|
+
from rdflib import OWL, RDF, RDFS, Graph, Literal, Namespace, URIRef
|
|
26
|
+
|
|
27
|
+
ABI = Namespace("http://ontology.naas.ai/abi/")
|
|
28
|
+
|
|
29
|
+
graph = Graph()
|
|
30
|
+
uri = ABI[str(uuid4())]
|
|
31
|
+
graph.add(
|
|
32
|
+
(uri, RDF.type, URIRef("https://www.commoncoreontologies.org/ont00000443"))
|
|
33
|
+
)
|
|
34
|
+
graph.add((uri, RDF.type, OWL.NamedIndividual))
|
|
35
|
+
graph.add((uri, RDFS.label, Literal("Naas.ai")))
|
|
36
|
+
graph.add(
|
|
37
|
+
(
|
|
38
|
+
uri,
|
|
39
|
+
ABI.logo,
|
|
40
|
+
Literal(
|
|
41
|
+
"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ9gXMaBLQZ39W6Pk53PRuzFjUvv_6lLRWPoQ&s"
|
|
42
|
+
),
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
services.triple_store_service.insert(graph)
|
|
46
|
+
time.sleep(3)
|
|
47
|
+
|
|
48
|
+
# Run pipeline to remove triples
|
|
49
|
+
graph = pipeline.run(
|
|
50
|
+
RemoveIndividualPipelineParameters(
|
|
51
|
+
uris_to_remove=[str(uri)],
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
assert graph is not None, graph.serialize(format="turtle")
|
|
55
|
+
|
|
56
|
+
# Check if uri is removed in triplestore
|
|
57
|
+
graph = get_subject_graph(str(uri), 1)
|
|
58
|
+
assert len(graph) == 0, graph.serialize(format="turtle")
|