iolanta 2.1.11__py3-none-any.whl → 2.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iolanta/cli/main.py +200 -80
- iolanta/facets/facet.py +15 -9
- iolanta/facets/mermaid_roadmap/__init__.py +0 -0
- iolanta/facets/mermaid_roadmap/facet.py +133 -0
- iolanta/facets/mermaid_roadmap/inference/blocks.sparql +13 -0
- iolanta/facets/mermaid_roadmap/inference/has-task-default-type.sparql +16 -0
- iolanta/facets/mermaid_roadmap/inference/task.sparql +26 -0
- iolanta/facets/mermaid_roadmap/inference/unblocked.sparql +21 -0
- iolanta/facets/mermaid_roadmap/mermaid_roadmap.yamlld +59 -0
- iolanta/facets/mermaid_roadmap/sparql/edges.sparql +25 -0
- iolanta/facets/mermaid_roadmap/sparql/nodes.sparql +17 -0
- iolanta/facets/query/__init__.py +0 -0
- iolanta/facets/query/ask_result_csv.py +23 -0
- iolanta/facets/query/ask_result_json.py +24 -0
- iolanta/facets/query/ask_result_table.py +23 -0
- iolanta/facets/query/construct_result_csv.py +34 -0
- iolanta/facets/query/construct_result_json.py +32 -0
- iolanta/facets/query/construct_result_table.py +55 -0
- iolanta/facets/query/data/query_result.yamlld +102 -0
- iolanta/facets/query/select_result_csv.py +36 -0
- iolanta/facets/query/select_result_json.py +24 -0
- iolanta/facets/query/select_result_table.py +48 -0
- iolanta/iolanta.py +146 -55
- iolanta/mcp/cli.py +16 -3
- iolanta/mermaid/models.py +74 -40
- iolanta/sparqlspace/processor.py +232 -179
- {iolanta-2.1.11.dist-info → iolanta-2.1.13.dist-info}/METADATA +2 -2
- {iolanta-2.1.11.dist-info → iolanta-2.1.13.dist-info}/RECORD +30 -10
- {iolanta-2.1.11.dist-info → iolanta-2.1.13.dist-info}/WHEEL +1 -1
- {iolanta-2.1.11.dist-info → iolanta-2.1.13.dist-info}/entry_points.txt +10 -0
iolanta/sparqlspace/processor.py
CHANGED
|
@@ -5,10 +5,8 @@ from pathlib import Path
|
|
|
5
5
|
from threading import Lock
|
|
6
6
|
from typing import Any, Iterable, Mapping
|
|
7
7
|
|
|
8
|
-
import diskcache
|
|
9
8
|
import funcy
|
|
10
9
|
import loguru
|
|
11
|
-
import platformdirs
|
|
12
10
|
import requests
|
|
13
11
|
import yaml_ld
|
|
14
12
|
from nanopub import NanopubClient
|
|
@@ -44,31 +42,24 @@ from iolanta.sparqlspace.redirects import apply_redirect
|
|
|
44
42
|
REASONING_ENABLED = True
|
|
45
43
|
OWL_REASONING_ENABLED = False
|
|
46
44
|
|
|
47
|
-
INFERENCE_DIR = Path(__file__).parent /
|
|
45
|
+
INFERENCE_DIR = Path(__file__).parent / "inference"
|
|
48
46
|
INDICES = [ # noqa: WPS407
|
|
49
|
-
URIRef(
|
|
47
|
+
URIRef("https://iolanta.tech/visualizations/index.yaml"),
|
|
50
48
|
]
|
|
51
49
|
|
|
52
50
|
|
|
53
|
-
@diskcache.Cache(
|
|
54
|
-
directory=str(
|
|
55
|
-
platformdirs.user_cache_path(
|
|
56
|
-
appname='iolanta',
|
|
57
|
-
) / 'find_retractions_for',
|
|
58
|
-
),
|
|
59
|
-
).memoize(expire=datetime.timedelta(days=8).total_seconds())
|
|
60
51
|
def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
|
|
61
52
|
"""Find nanopublications that retract the given one."""
|
|
62
53
|
# See https://github.com/fair-workflows/nanopub/issues/168 for
|
|
63
54
|
# context of this dirty hack.
|
|
64
|
-
use_server =
|
|
55
|
+
use_server = "http://grlc.nanopubs.lod.labs.vu.nl/api/local/local/"
|
|
65
56
|
|
|
66
57
|
client = NanopubClient(use_server=use_server)
|
|
67
58
|
client.grlc_urls = [use_server]
|
|
68
59
|
|
|
69
60
|
http_url = str(nanopublication).replace(
|
|
70
|
-
|
|
71
|
-
|
|
61
|
+
"https://",
|
|
62
|
+
"http://",
|
|
72
63
|
)
|
|
73
64
|
|
|
74
65
|
try:
|
|
@@ -79,60 +70,72 @@ def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
|
|
|
79
70
|
return {URIRef(retraction) for retraction in retractions}
|
|
80
71
|
|
|
81
72
|
|
|
82
|
-
def _extract_from_mapping( # noqa: WPS213
|
|
73
|
+
def _extract_from_mapping( # noqa: WPS213, WPS231
|
|
83
74
|
algebra: Mapping[str, Any],
|
|
84
75
|
) -> Iterable[URIRef | Variable]:
|
|
85
76
|
match algebra.name: # noqa: WPS242
|
|
86
|
-
case
|
|
87
|
-
yield from extract_mentioned_urls(algebra[
|
|
77
|
+
case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Slice":
|
|
78
|
+
yield from extract_mentioned_urls(algebra["p"]) # noqa: WPS226
|
|
88
79
|
|
|
89
|
-
case
|
|
90
|
-
yield from [
|
|
80
|
+
case "BGP":
|
|
81
|
+
yield from [ # noqa: WPS353, WPS221
|
|
91
82
|
term
|
|
92
|
-
for triple in algebra[
|
|
83
|
+
for triple in algebra["triples"]
|
|
93
84
|
for term in triple
|
|
94
85
|
if isinstance(term, (URIRef, Variable))
|
|
95
86
|
]
|
|
96
87
|
|
|
97
|
-
case
|
|
98
|
-
yield from extract_mentioned_urls(algebra[
|
|
88
|
+
case "Filter" | "UnaryNot" | "OrderCondition":
|
|
89
|
+
yield from extract_mentioned_urls(algebra["expr"]) # noqa: WPS204, WPS226
|
|
99
90
|
|
|
100
|
-
case
|
|
101
|
-
|
|
91
|
+
case "Builtin_EXISTS":
|
|
92
|
+
# Builtin_EXISTS uses 'graph' instead of 'arg'
|
|
93
|
+
yield from extract_mentioned_urls(algebra["graph"])
|
|
102
94
|
|
|
103
|
-
case
|
|
104
|
-
|
|
105
|
-
|
|
95
|
+
case built_in if built_in.startswith("Builtin_"):
|
|
96
|
+
# Some built-ins may not have an 'arg' key
|
|
97
|
+
arg_value = algebra.get("arg")
|
|
98
|
+
if arg_value is not None:
|
|
99
|
+
yield from extract_mentioned_urls(arg_value)
|
|
106
100
|
|
|
107
|
-
case
|
|
108
|
-
yield from extract_mentioned_urls(algebra[
|
|
109
|
-
yield from extract_mentioned_urls(algebra[
|
|
110
|
-
yield from extract_mentioned_urls(algebra['expr'])
|
|
101
|
+
case "RelationalExpression":
|
|
102
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
103
|
+
yield from extract_mentioned_urls(algebra["other"])
|
|
111
104
|
|
|
112
|
-
case
|
|
113
|
-
yield from extract_mentioned_urls(algebra[
|
|
114
|
-
yield from extract_mentioned_urls(algebra[
|
|
105
|
+
case "LeftJoin":
|
|
106
|
+
yield from extract_mentioned_urls(algebra["p1"])
|
|
107
|
+
yield from extract_mentioned_urls(algebra["p2"])
|
|
108
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
115
109
|
|
|
116
|
-
case
|
|
117
|
-
yield from extract_mentioned_urls(algebra[
|
|
118
|
-
yield from extract_mentioned_urls(algebra[
|
|
110
|
+
case "Join" | "Union":
|
|
111
|
+
yield from extract_mentioned_urls(algebra["p1"])
|
|
112
|
+
yield from extract_mentioned_urls(algebra["p2"])
|
|
119
113
|
|
|
120
|
-
case
|
|
121
|
-
|
|
122
|
-
yield from extract_mentioned_urls(algebra[
|
|
114
|
+
case "Extend":
|
|
115
|
+
# Extend is used for BIND expressions - process pattern and expression
|
|
116
|
+
yield from extract_mentioned_urls(algebra["p"])
|
|
117
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
123
118
|
|
|
124
|
-
case
|
|
119
|
+
case "ConditionalOrExpression" | "ConditionalAndExpression":
|
|
120
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
121
|
+
yield from extract_mentioned_urls(algebra["other"])
|
|
122
|
+
|
|
123
|
+
case "OrderBy":
|
|
124
|
+
yield from extract_mentioned_urls(algebra["p"])
|
|
125
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
126
|
+
|
|
127
|
+
case "TrueFilter":
|
|
125
128
|
return
|
|
126
129
|
|
|
127
|
-
case
|
|
128
|
-
yield from extract_mentioned_urls(algebra[
|
|
129
|
-
yield from extract_mentioned_urls(algebra[
|
|
130
|
+
case "Graph":
|
|
131
|
+
yield from extract_mentioned_urls(algebra["p"])
|
|
132
|
+
yield from extract_mentioned_urls(algebra["term"])
|
|
130
133
|
|
|
131
134
|
case unknown_name:
|
|
132
|
-
formatted_keys =
|
|
135
|
+
formatted_keys = ", ".join(algebra.keys())
|
|
133
136
|
loguru.logger.info(
|
|
134
|
-
|
|
135
|
-
f
|
|
137
|
+
"Unknown SPARQL expression "
|
|
138
|
+
f"{unknown_name}({formatted_keys}): {algebra}",
|
|
136
139
|
)
|
|
137
140
|
return
|
|
138
141
|
|
|
@@ -158,7 +161,7 @@ def extract_mentioned_urls(
|
|
|
158
161
|
case unknown_algebra:
|
|
159
162
|
algebra_type = type(unknown_algebra)
|
|
160
163
|
raise ValueError(
|
|
161
|
-
f
|
|
164
|
+
f"Algebra of unknown type {algebra_type}: {unknown_algebra}",
|
|
162
165
|
)
|
|
163
166
|
|
|
164
167
|
|
|
@@ -176,7 +179,7 @@ def normalize_term(term: Node) -> Node:
|
|
|
176
179
|
"""
|
|
177
180
|
if isinstance(term, URIRef):
|
|
178
181
|
return apply_redirect(term)
|
|
179
|
-
|
|
182
|
+
|
|
180
183
|
return term
|
|
181
184
|
|
|
182
185
|
|
|
@@ -192,10 +195,7 @@ def resolve_variables(
|
|
|
192
195
|
|
|
193
196
|
case Variable() as query_variable:
|
|
194
197
|
variable_value = bindings.get(str(query_variable))
|
|
195
|
-
if (
|
|
196
|
-
variable_value is not None
|
|
197
|
-
and isinstance(variable_value, URIRef)
|
|
198
|
-
):
|
|
198
|
+
if variable_value is not None and isinstance(variable_value, URIRef):
|
|
199
199
|
yield variable_value
|
|
200
200
|
|
|
201
201
|
|
|
@@ -231,34 +231,37 @@ def _extract_nanopublication_uris( # noqa: WPS231
|
|
|
231
231
|
algebra: CompValue,
|
|
232
232
|
) -> Iterable[URIRef]:
|
|
233
233
|
"""Extract nanopublications to get retracting information for."""
|
|
234
|
-
match algebra.name:
|
|
235
|
-
case
|
|
236
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
237
|
-
case
|
|
234
|
+
match algebra.name: # noqa: WPS242
|
|
235
|
+
case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Graph":
|
|
236
|
+
yield from _extract_nanopublication_uris(algebra["p"])
|
|
237
|
+
case "ConstructQuery":
|
|
238
238
|
# CONSTRUCT queries don't have nanopublication URIs in bindings
|
|
239
239
|
return
|
|
240
240
|
|
|
241
|
-
case
|
|
242
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
241
|
+
case "Slice":
|
|
242
|
+
yield from _extract_nanopublication_uris(algebra["p"])
|
|
243
243
|
|
|
244
|
-
case
|
|
245
|
-
for retractor, retracts, retractee in algebra[
|
|
244
|
+
case "BGP":
|
|
245
|
+
for retractor, retracts, retractee in algebra["triples"]:
|
|
246
246
|
if retracts == URIRef(
|
|
247
|
-
|
|
247
|
+
"https://purl.org/nanopub/x/retracts",
|
|
248
248
|
) and isinstance(retractor, Variable):
|
|
249
249
|
yield retractee
|
|
250
250
|
|
|
251
|
-
case
|
|
252
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
253
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
251
|
+
case "LeftJoin" | "Join" | "Union":
|
|
252
|
+
yield from _extract_nanopublication_uris(algebra["p1"])
|
|
253
|
+
yield from _extract_nanopublication_uris(algebra["p2"])
|
|
254
|
+
|
|
255
|
+
case "Extend":
|
|
256
|
+
# Extend is used for BIND expressions - process the pattern recursively
|
|
257
|
+
yield from _extract_nanopublication_uris(algebra["p"])
|
|
254
258
|
|
|
255
|
-
case
|
|
259
|
+
case "Filter" | "OrderBy":
|
|
256
260
|
return
|
|
257
261
|
|
|
258
262
|
case unknown_name:
|
|
259
263
|
raise ValueError(
|
|
260
|
-
f
|
|
261
|
-
f'content: {algebra}',
|
|
264
|
+
f"Unknown algebra name: {unknown_name}, content: {algebra}",
|
|
262
265
|
)
|
|
263
266
|
|
|
264
267
|
|
|
@@ -266,7 +269,7 @@ def extract_triples(algebra: CompValue) -> Iterable[tuple[Node, Node, Node]]:
|
|
|
266
269
|
"""Extract triples from a SPARQL query algebra instance."""
|
|
267
270
|
if isinstance(algebra, CompValue):
|
|
268
271
|
for key, value in algebra.items(): # noqa: WPS110
|
|
269
|
-
if key ==
|
|
272
|
+
if key == "triples":
|
|
270
273
|
yield from value
|
|
271
274
|
|
|
272
275
|
else:
|
|
@@ -302,13 +305,13 @@ class NanopubQueryPlugin:
|
|
|
302
305
|
|
|
303
306
|
FIXME: Can we cache this?
|
|
304
307
|
"""
|
|
305
|
-
response = requests.post(
|
|
306
|
-
|
|
308
|
+
response = requests.post( # noqa: S113
|
|
309
|
+
"https://query.knowledgepixels.com/repo/full",
|
|
307
310
|
data={
|
|
308
|
-
|
|
311
|
+
"query": "CONSTRUCT WHERE { ?instance a <%s> }" % class_uri,
|
|
309
312
|
},
|
|
310
313
|
headers={
|
|
311
|
-
|
|
314
|
+
"Accept": "application/ld+json",
|
|
312
315
|
},
|
|
313
316
|
)
|
|
314
317
|
|
|
@@ -316,21 +319,21 @@ class NanopubQueryPlugin:
|
|
|
316
319
|
|
|
317
320
|
self.graph.get_context(BNode()).parse(
|
|
318
321
|
data=response.text,
|
|
319
|
-
format=
|
|
322
|
+
format="json-ld",
|
|
320
323
|
)
|
|
321
324
|
|
|
322
325
|
def _is_from_nanopubs(self, class_uri: URIRef) -> bool:
|
|
323
326
|
if not isinstance(class_uri, URIRef):
|
|
324
|
-
raise ValueError(f
|
|
327
|
+
raise ValueError(f"Not a URIRef: {class_uri}")
|
|
325
328
|
|
|
326
|
-
return self.graph.query(
|
|
329
|
+
return self.graph.query( # noqa: WPS462
|
|
327
330
|
"""
|
|
328
331
|
ASK WHERE {
|
|
329
332
|
?_whatever <https://purl.org/nanopub/x/introduces> $class
|
|
330
333
|
}
|
|
331
334
|
""",
|
|
332
335
|
initBindings={
|
|
333
|
-
|
|
336
|
+
"class": class_uri,
|
|
334
337
|
},
|
|
335
338
|
).askAnswer
|
|
336
339
|
|
|
@@ -360,7 +363,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
360
363
|
|
|
361
364
|
self.graph._indices_loaded = True
|
|
362
365
|
|
|
363
|
-
def query(
|
|
366
|
+
def query( # noqa: WPS211, WPS210, WPS231, WPS213, C901
|
|
364
367
|
self,
|
|
365
368
|
strOrQuery,
|
|
366
369
|
initBindings=None,
|
|
@@ -377,7 +380,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
377
380
|
|
|
378
381
|
initBindings = initBindings or {}
|
|
379
382
|
initNs = initNs or {}
|
|
380
|
-
|
|
383
|
+
|
|
381
384
|
if isinstance(strOrQuery, Query):
|
|
382
385
|
query = strOrQuery
|
|
383
386
|
|
|
@@ -386,7 +389,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
386
389
|
query = translateQuery(parse_tree, base, initNs)
|
|
387
390
|
|
|
388
391
|
# Only extract nanopublications from SELECT/ASK queries, not CONSTRUCT
|
|
389
|
-
if query.algebra.name !=
|
|
392
|
+
if query.algebra.name != "ConstructQuery":
|
|
390
393
|
self.load_retracting_nanopublications_by_query(
|
|
391
394
|
query=query,
|
|
392
395
|
bindings=initBindings,
|
|
@@ -402,22 +405,24 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
402
405
|
)
|
|
403
406
|
|
|
404
407
|
# Filter out inference graph names (they're not URLs to load)
|
|
405
|
-
urls = {url for url in urls if not str(url).startswith(
|
|
408
|
+
urls = {url for url in urls if not str(url).startswith("inference:")}
|
|
406
409
|
|
|
407
410
|
for url in urls:
|
|
408
411
|
try:
|
|
409
412
|
self.load(url)
|
|
410
413
|
except Exception as err:
|
|
411
|
-
self.logger.exception(f
|
|
414
|
+
self.logger.exception(f"Failed to load {url}: {err}", url, err)
|
|
412
415
|
|
|
413
416
|
# Run inference if there's new data since last inference run
|
|
414
417
|
# (after URLs are loaded so inference can use the loaded data)
|
|
415
418
|
if self.graph.last_not_inferred_source is not None: # noqa: WPS504
|
|
416
419
|
last_source = self.graph.last_not_inferred_source
|
|
417
|
-
self.logger.debug(
|
|
420
|
+
self.logger.debug(
|
|
421
|
+
f"Running inference, last_not_inferred_source: {last_source}"
|
|
422
|
+
) # noqa: WPS237
|
|
418
423
|
self._run_inference()
|
|
419
424
|
else:
|
|
420
|
-
self.logger.debug(
|
|
425
|
+
self.logger.debug("Skipping inference, last_not_inferred_source is None")
|
|
421
426
|
|
|
422
427
|
NanopubQueryPlugin(graph=self.graph)(query, bindings=initBindings)
|
|
423
428
|
|
|
@@ -428,7 +433,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
428
433
|
query_result = evalQuery(self.graph, query, initBindings, base)
|
|
429
434
|
|
|
430
435
|
try:
|
|
431
|
-
bindings = list(query_result[
|
|
436
|
+
bindings = list(query_result["bindings"])
|
|
432
437
|
except KeyError:
|
|
433
438
|
# This was probably an ASK query
|
|
434
439
|
return query_result
|
|
@@ -436,37 +441,43 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
436
441
|
for row in bindings:
|
|
437
442
|
break
|
|
438
443
|
for _, maybe_iri in row.items(): # noqa: WPS427
|
|
439
|
-
if (
|
|
440
|
-
|
|
441
|
-
and isinstance(self.load(maybe_iri), Loaded)
|
|
444
|
+
if isinstance(maybe_iri, URIRef) and isinstance(
|
|
445
|
+
self.load(maybe_iri), Loaded
|
|
442
446
|
):
|
|
443
|
-
is_anything_loaded = True
|
|
444
|
-
self.logger.info(
|
|
445
|
-
|
|
447
|
+
is_anything_loaded = True # noqa: WPS220
|
|
448
|
+
self.logger.info( # noqa: WPS220
|
|
449
|
+
"Newly loaded: {uri}",
|
|
446
450
|
uri=maybe_iri,
|
|
447
451
|
)
|
|
448
452
|
|
|
449
|
-
query_result[
|
|
453
|
+
query_result["bindings"] = bindings
|
|
450
454
|
return query_result
|
|
451
455
|
|
|
452
456
|
def _is_loaded(self, uri: URIRef) -> bool:
|
|
453
457
|
"""Find out if this URI in the graph already."""
|
|
454
|
-
return
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
458
|
+
return (
|
|
459
|
+
funcy.first(
|
|
460
|
+
self.graph.quads(
|
|
461
|
+
(
|
|
462
|
+
uri,
|
|
463
|
+
IOLANTA["last-loaded-time"],
|
|
464
|
+
None,
|
|
465
|
+
META,
|
|
466
|
+
)
|
|
467
|
+
),
|
|
468
|
+
)
|
|
469
|
+
is not None
|
|
470
|
+
)
|
|
462
471
|
|
|
463
472
|
def _mark_as_loaded(self, uri: URIRef):
|
|
464
|
-
self.graph.add(
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
473
|
+
self.graph.add(
|
|
474
|
+
(
|
|
475
|
+
uri,
|
|
476
|
+
IOLANTA["last-loaded-time"],
|
|
477
|
+
Literal(datetime.datetime.now()),
|
|
478
|
+
META,
|
|
479
|
+
)
|
|
480
|
+
)
|
|
470
481
|
|
|
471
482
|
def _follow_is_visualized_with_links(self, uri: URIRef):
|
|
472
483
|
"""Follow `dcterms:isReferencedBy` links."""
|
|
@@ -475,7 +486,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
475
486
|
if isinstance(visualization, URIRef):
|
|
476
487
|
self.load(visualization)
|
|
477
488
|
|
|
478
|
-
def load(
|
|
489
|
+
def load( # noqa: C901, WPS210, WPS212, WPS213, WPS231
|
|
479
490
|
self,
|
|
480
491
|
source: URIRef,
|
|
481
492
|
) -> LoadResult:
|
|
@@ -487,15 +498,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
487
498
|
# Blank nodes cannot be loaded from URLs
|
|
488
499
|
if isinstance(source, BNode):
|
|
489
500
|
return Skipped()
|
|
490
|
-
|
|
501
|
+
|
|
491
502
|
# Also check if URIRef represents a blank node (can happen if BNode
|
|
492
503
|
# was serialized to string and converted to URIRef)
|
|
493
|
-
if isinstance(source, URIRef) and str(source).startswith(
|
|
494
|
-
raise ValueError(
|
|
495
|
-
|
|
504
|
+
if isinstance(source, URIRef) and str(source).startswith("_:"):
|
|
505
|
+
raise ValueError("This is actually a blank node but masked as a URIREF")
|
|
506
|
+
|
|
496
507
|
url = URL(source)
|
|
497
508
|
|
|
498
|
-
if url.scheme in {
|
|
509
|
+
if url.scheme in {"file", "python", "local", "urn", "doi"}:
|
|
499
510
|
# FIXME temporary fix. `yaml-ld` doesn't read `context.*` files and
|
|
500
511
|
# fails.
|
|
501
512
|
return Skipped()
|
|
@@ -506,14 +517,14 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
506
517
|
# TODO: It works differently for JSON-LD documents AFAIK. Need to
|
|
507
518
|
# double check that.
|
|
508
519
|
url = url.with_fragment(None)
|
|
509
|
-
source = URIRef(str(f
|
|
520
|
+
source = URIRef(str(f"{url}#"))
|
|
510
521
|
|
|
511
522
|
self._follow_is_visualized_with_links(source)
|
|
512
523
|
|
|
513
524
|
new_source = apply_redirect(source)
|
|
514
525
|
if new_source != source:
|
|
515
526
|
self.logger.info(
|
|
516
|
-
|
|
527
|
+
"Rewriting: {source} → {new_source}",
|
|
517
528
|
source=source,
|
|
518
529
|
new_source=new_source,
|
|
519
530
|
)
|
|
@@ -526,45 +537,49 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
526
537
|
# FIXME This is definitely inefficient. However, python-yaml-ld caches
|
|
527
538
|
# the document, so the performance overhead is not super high.
|
|
528
539
|
try:
|
|
529
|
-
resolved_source = yaml_ld.load_document(source)[
|
|
540
|
+
resolved_source = yaml_ld.load_document(source)["documentUrl"]
|
|
530
541
|
except NotFound as not_found:
|
|
531
|
-
self.logger.info(f
|
|
542
|
+
self.logger.info(f"{not_found.path} | 404 Not Found")
|
|
532
543
|
namespaces = [RDF, RDFS, OWL, FOAF, DC, VANN]
|
|
533
544
|
|
|
534
545
|
for namespace in namespaces:
|
|
535
546
|
if not_found.path.startswith(str(namespace)):
|
|
536
547
|
self.load(URIRef(namespace))
|
|
537
548
|
self.logger.info(
|
|
538
|
-
|
|
549
|
+
"Redirecting %s → namespace %s",
|
|
539
550
|
not_found.path,
|
|
540
551
|
namespace,
|
|
541
552
|
)
|
|
542
553
|
return Loaded()
|
|
543
554
|
|
|
544
555
|
self.logger.info(
|
|
545
|
-
|
|
556
|
+
"{path} | Cannot find a matching namespace",
|
|
546
557
|
path=not_found.path,
|
|
547
558
|
)
|
|
548
559
|
|
|
549
|
-
self.graph.add(
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
560
|
+
self.graph.add(
|
|
561
|
+
(
|
|
562
|
+
source_uri,
|
|
563
|
+
RDF.type,
|
|
564
|
+
IOLANTA["not-found"],
|
|
565
|
+
source_uri,
|
|
566
|
+
)
|
|
567
|
+
)
|
|
555
568
|
|
|
556
569
|
self._mark_as_loaded(source_uri)
|
|
557
570
|
|
|
558
571
|
return Loaded()
|
|
559
572
|
|
|
560
573
|
except Exception as err:
|
|
561
|
-
self.logger.info(f
|
|
562
|
-
self.graph.add(
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
574
|
+
self.logger.info(f"{source} | Failed: {err}")
|
|
575
|
+
self.graph.add(
|
|
576
|
+
(
|
|
577
|
+
URIRef(source),
|
|
578
|
+
RDF.type,
|
|
579
|
+
IOLANTA["failed"],
|
|
580
|
+
source_uri,
|
|
581
|
+
)
|
|
582
|
+
)
|
|
568
583
|
self._mark_as_loaded(source_uri)
|
|
569
584
|
|
|
570
585
|
return Loaded()
|
|
@@ -572,11 +587,13 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
572
587
|
if resolved_source:
|
|
573
588
|
resolved_source_uri_ref = URIRef(resolved_source)
|
|
574
589
|
if resolved_source_uri_ref != URIRef(source):
|
|
575
|
-
self.graph.add(
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
590
|
+
self.graph.add(
|
|
591
|
+
(
|
|
592
|
+
source_uri,
|
|
593
|
+
IOLANTA["redirects-to"],
|
|
594
|
+
resolved_source_uri_ref,
|
|
595
|
+
)
|
|
596
|
+
)
|
|
580
597
|
source = resolved_source
|
|
581
598
|
|
|
582
599
|
self._mark_as_loaded(source_uri)
|
|
@@ -585,19 +602,19 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
585
602
|
ld_rdf = yaml_ld.to_rdf(source)
|
|
586
603
|
except ConnectionError as name_resolution_error:
|
|
587
604
|
self.logger.info(
|
|
588
|
-
|
|
605
|
+
"%s | name resolution error: %s",
|
|
589
606
|
source,
|
|
590
607
|
str(name_resolution_error),
|
|
591
608
|
)
|
|
592
609
|
return Loaded()
|
|
593
610
|
except ParserNotFound as parser_not_found:
|
|
594
|
-
self.logger.info(f
|
|
611
|
+
self.logger.info(f"{source} | {parser_not_found}")
|
|
595
612
|
return Loaded()
|
|
596
613
|
except YAMLLDError as yaml_ld_error:
|
|
597
|
-
self.logger.error(f
|
|
614
|
+
self.logger.error(f"{source} | {yaml_ld_error}")
|
|
598
615
|
return Loaded()
|
|
599
616
|
except HTTPError as http_error:
|
|
600
|
-
self.logger.warning(f
|
|
617
|
+
self.logger.warning(f"{source} | HTTP error: {http_error}")
|
|
601
618
|
return Loaded()
|
|
602
619
|
|
|
603
620
|
try:
|
|
@@ -616,18 +633,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
616
633
|
)
|
|
617
634
|
|
|
618
635
|
if not quads:
|
|
619
|
-
self.logger.info(
|
|
636
|
+
self.logger.info("{source} | No data found", source=source)
|
|
620
637
|
return Loaded()
|
|
621
638
|
|
|
622
639
|
self.graph.addN(quads)
|
|
623
640
|
self.graph.last_not_inferred_source = source
|
|
624
641
|
|
|
625
|
-
into_graphs =
|
|
626
|
-
quad.graph
|
|
627
|
-
for quad in quads
|
|
628
|
-
})
|
|
642
|
+
into_graphs = ", ".join({quad.graph for quad in quads})
|
|
629
643
|
self.logger.info(
|
|
630
|
-
f
|
|
644
|
+
f"{source} | loaded {len(quads)} triples into graphs: {into_graphs}",
|
|
631
645
|
)
|
|
632
646
|
|
|
633
647
|
return Loaded()
|
|
@@ -642,6 +656,74 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
642
656
|
|
|
643
657
|
return term
|
|
644
658
|
|
|
659
|
+
def _run_inference_from_directory( # noqa: WPS231, WPS220, WPS210
|
|
660
|
+
self,
|
|
661
|
+
inference_dir: Path,
|
|
662
|
+
graph_prefix: str = "inference",
|
|
663
|
+
):
|
|
664
|
+
"""
|
|
665
|
+
Run inference queries from a given inference directory.
|
|
666
|
+
|
|
667
|
+
For each SPARQL file in the inference directory:
|
|
668
|
+
1. Truncate the named graph `local:{graph_prefix}-{filename}`
|
|
669
|
+
2. Execute the CONSTRUCT query
|
|
670
|
+
3. Insert the resulting triples into that graph
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
inference_dir: Directory containing inference SPARQL files
|
|
674
|
+
graph_prefix: Prefix for inference graph names
|
|
675
|
+
return_count: Whether to return the count of inferred triples
|
|
676
|
+
|
|
677
|
+
Returns the total number of triples inferred.
|
|
678
|
+
"""
|
|
679
|
+
if not inference_dir.exists():
|
|
680
|
+
return 0
|
|
681
|
+
|
|
682
|
+
total_inferred = 0
|
|
683
|
+
for inference_file in inference_dir.glob("*.sparql"):
|
|
684
|
+
filename = inference_file.stem # filename without .sparql extension
|
|
685
|
+
inference_graph = URIRef(f"{graph_prefix}:{filename}")
|
|
686
|
+
|
|
687
|
+
# Truncate the inference graph
|
|
688
|
+
context = self.graph.get_context(inference_graph)
|
|
689
|
+
context.remove((None, None, None))
|
|
690
|
+
|
|
691
|
+
# Read and execute the CONSTRUCT query
|
|
692
|
+
query_text = inference_file.read_text()
|
|
693
|
+
query_result = self.graph.query(query_text) # noqa: WPS110
|
|
694
|
+
|
|
695
|
+
# CONSTRUCT queries return a SPARQLResult with a graph attribute
|
|
696
|
+
result_graph = (
|
|
697
|
+
query_result.get("graph")
|
|
698
|
+
if isinstance(query_result, dict)
|
|
699
|
+
else query_result.graph
|
|
700
|
+
)
|
|
701
|
+
self.logger.debug(
|
|
702
|
+
f"Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}"
|
|
703
|
+
)
|
|
704
|
+
if result_graph is not None: # noqa: WPS504
|
|
705
|
+
inferred_quads = [
|
|
706
|
+
(s, p, o, inference_graph) # noqa: WPS111
|
|
707
|
+
for s, p, o in result_graph # noqa: WPS111
|
|
708
|
+
]
|
|
709
|
+
self.logger.debug(
|
|
710
|
+
f"Inference {filename}: generated {len(inferred_quads)} quads"
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
if inferred_quads:
|
|
714
|
+
self.graph.addN(inferred_quads) # noqa: WPS220
|
|
715
|
+
inferred_count = len(inferred_quads)
|
|
716
|
+
total_inferred += inferred_count
|
|
717
|
+
self.logger.info( # noqa: WPS220
|
|
718
|
+
"Inference {filename}: added {count} triples",
|
|
719
|
+
filename=filename,
|
|
720
|
+
count=inferred_count,
|
|
721
|
+
)
|
|
722
|
+
else:
|
|
723
|
+
self.logger.debug(f"Inference {filename}: result_graph is None")
|
|
724
|
+
|
|
725
|
+
return total_inferred
|
|
726
|
+
|
|
645
727
|
def _run_inference(self): # noqa: WPS231, WPS220, WPS210
|
|
646
728
|
"""
|
|
647
729
|
Run inference queries from the inference directory.
|
|
@@ -652,42 +734,13 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
652
734
|
3. Insert the resulting triples into that graph
|
|
653
735
|
"""
|
|
654
736
|
with self.inference_lock:
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
# Truncate the inference graph
|
|
660
|
-
context = self.graph.get_context(inference_graph)
|
|
661
|
-
context.remove((None, None, None))
|
|
662
|
-
|
|
663
|
-
# Read and execute the CONSTRUCT query
|
|
664
|
-
query_text = inference_file.read_text()
|
|
665
|
-
query_result = self.graph.query(query_text) # noqa: WPS110
|
|
666
|
-
|
|
667
|
-
# CONSTRUCT queries return a SPARQLResult with a graph attribute
|
|
668
|
-
result_graph = query_result.get('graph') if isinstance(query_result, dict) else query_result.graph
|
|
669
|
-
self.logger.debug(f'Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}')
|
|
670
|
-
if result_graph is not None: # noqa: WPS504
|
|
671
|
-
inferred_quads = [
|
|
672
|
-
(s, p, o, inference_graph) # noqa: WPS111
|
|
673
|
-
for s, p, o in result_graph # noqa: WPS111
|
|
674
|
-
]
|
|
675
|
-
self.logger.debug(f'Inference {filename}: generated {len(inferred_quads)} quads')
|
|
676
|
-
|
|
677
|
-
if inferred_quads:
|
|
678
|
-
self.graph.addN(inferred_quads) # noqa: WPS220
|
|
679
|
-
self.logger.info( # noqa: WPS220
|
|
680
|
-
'Inference {filename}: added {count} triples',
|
|
681
|
-
filename=filename,
|
|
682
|
-
count=len(inferred_quads),
|
|
683
|
-
)
|
|
684
|
-
else:
|
|
685
|
-
self.logger.debug(f'Inference {filename}: result_graph is None')
|
|
686
|
-
|
|
737
|
+
# Run global inference (deprecated, will be removed later)
|
|
738
|
+
self._run_inference_from_directory(INFERENCE_DIR, graph_prefix="inference")
|
|
739
|
+
|
|
687
740
|
# Clear the flag after running inference
|
|
688
741
|
self.graph.last_not_inferred_source = None
|
|
689
742
|
|
|
690
|
-
def load_retracting_nanopublications_by_query(
|
|
743
|
+
def load_retracting_nanopublications_by_query( # noqa: WPS231
|
|
691
744
|
self,
|
|
692
745
|
query: Query,
|
|
693
746
|
bindings: dict[str, Node],
|