iolanta 2.1.10__py3-none-any.whl → 2.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iolanta/facets/facet.py +15 -9
- iolanta/facets/mermaid_roadmap/__init__.py +0 -0
- iolanta/facets/mermaid_roadmap/facet.py +133 -0
- iolanta/facets/mermaid_roadmap/inference/blocks.sparql +13 -0
- iolanta/facets/mermaid_roadmap/inference/has-task-default-type.sparql +16 -0
- iolanta/facets/mermaid_roadmap/inference/task.sparql +26 -0
- iolanta/facets/mermaid_roadmap/inference/unblocked.sparql +21 -0
- iolanta/facets/mermaid_roadmap/mermaid_roadmap.yamlld +59 -0
- iolanta/facets/mermaid_roadmap/sparql/edges.sparql +25 -0
- iolanta/facets/mermaid_roadmap/sparql/nodes.sparql +17 -0
- iolanta/iolanta.py +146 -55
- iolanta/mcp/cli.py +1 -17
- iolanta/mermaid/models.py +61 -36
- iolanta/parse_quads.py +18 -15
- iolanta/sparqlspace/processor.py +250 -255
- iolanta/sparqlspace/redirects.py +79 -0
- {iolanta-2.1.10.dist-info → iolanta-2.1.12.dist-info}/METADATA +2 -2
- {iolanta-2.1.10.dist-info → iolanta-2.1.12.dist-info}/RECORD +20 -12
- {iolanta-2.1.10.dist-info → iolanta-2.1.12.dist-info}/entry_points.txt +1 -0
- iolanta/mcp/prompts/nanopublication_assertion_authoring_rules.md +0 -63
- iolanta/mcp/prompts/rules.md +0 -83
- {iolanta-2.1.10.dist-info → iolanta-2.1.12.dist-info}/WHEEL +0 -0
iolanta/sparqlspace/processor.py
CHANGED
|
@@ -1,17 +1,12 @@
|
|
|
1
|
+
# noqa: WPS201, WPS202, WPS402
|
|
1
2
|
import dataclasses
|
|
2
3
|
import datetime
|
|
3
|
-
import re
|
|
4
|
-
import time
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from threading import Lock
|
|
7
|
-
from types import MappingProxyType
|
|
8
6
|
from typing import Any, Iterable, Mapping
|
|
9
7
|
|
|
10
|
-
import diskcache
|
|
11
8
|
import funcy
|
|
12
9
|
import loguru
|
|
13
|
-
import platformdirs
|
|
14
|
-
import reasonable
|
|
15
10
|
import requests
|
|
16
11
|
import yaml_ld
|
|
17
12
|
from nanopub import NanopubClient
|
|
@@ -35,74 +30,36 @@ from iolanta.namespaces import ( # noqa: WPS235
|
|
|
35
30
|
DCTERMS,
|
|
36
31
|
FOAF,
|
|
37
32
|
IOLANTA,
|
|
38
|
-
LOCAL,
|
|
39
33
|
META,
|
|
40
34
|
OWL,
|
|
41
|
-
PROV,
|
|
42
35
|
RDF,
|
|
43
36
|
RDFS,
|
|
44
37
|
VANN,
|
|
45
38
|
)
|
|
46
|
-
from iolanta.parse_quads import
|
|
39
|
+
from iolanta.parse_quads import parse_quads
|
|
40
|
+
from iolanta.sparqlspace.redirects import apply_redirect
|
|
47
41
|
|
|
48
42
|
REASONING_ENABLED = True
|
|
49
43
|
OWL_REASONING_ENABLED = False
|
|
50
44
|
|
|
51
|
-
INFERENCE_DIR = Path(__file__).parent /
|
|
52
|
-
INDICES = [
|
|
53
|
-
URIRef(
|
|
45
|
+
INFERENCE_DIR = Path(__file__).parent / "inference"
|
|
46
|
+
INDICES = [ # noqa: WPS407
|
|
47
|
+
URIRef("https://iolanta.tech/visualizations/index.yaml"),
|
|
54
48
|
]
|
|
55
49
|
|
|
56
50
|
|
|
57
|
-
REDIRECTS = MappingProxyType({
|
|
58
|
-
# FIXME This is presently hardcoded; we need to
|
|
59
|
-
# - either find a way to resolve these URLs automatically,
|
|
60
|
-
# - or create a repository of those redirects online.
|
|
61
|
-
'http://purl.org/vocab/vann/': URIRef(
|
|
62
|
-
'https://vocab.org/vann/vann-vocab-20100607.rdf',
|
|
63
|
-
),
|
|
64
|
-
URIRef(DC): URIRef(DCTERMS),
|
|
65
|
-
URIRef(RDF): URIRef(RDF),
|
|
66
|
-
URIRef(RDFS): URIRef(RDFS),
|
|
67
|
-
URIRef(OWL): URIRef(OWL),
|
|
68
|
-
|
|
69
|
-
# Redirect FOAF namespace to GitHub mirror
|
|
70
|
-
URIRef('https?://xmlns.com/foaf/0.1/.+'): URIRef(
|
|
71
|
-
'https://raw.githubusercontent.com/foaf/foaf/refs/heads/master/xmlns.com/htdocs/foaf/0.1/index.rdf',
|
|
72
|
-
),
|
|
73
|
-
URIRef('https://www.nanopub.org/nschema'): URIRef(
|
|
74
|
-
'https://www.nanopub.net/nschema#',
|
|
75
|
-
),
|
|
76
|
-
URIRef('https://nanopub.org/nschema'): URIRef(
|
|
77
|
-
'https://nanopub.net/nschema#',
|
|
78
|
-
),
|
|
79
|
-
URIRef(PROV): URIRef('https://www.w3.org/ns/prov-o'),
|
|
80
|
-
|
|
81
|
-
# Convert lexvo.org/id URLs to lexvo.org/data URLs
|
|
82
|
-
r'https://lexvo\.org/id/(.+)': r'http://lexvo.org/data/\1',
|
|
83
|
-
r'https://www\.lexinfo\.net/(.+)': r'http://www.lexinfo.net/\1',
|
|
84
|
-
})
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
@diskcache.Cache(
|
|
88
|
-
directory=str(
|
|
89
|
-
platformdirs.user_cache_path(
|
|
90
|
-
appname='iolanta',
|
|
91
|
-
) / 'find_retractions_for',
|
|
92
|
-
),
|
|
93
|
-
).memoize(expire=datetime.timedelta(days=8).total_seconds())
|
|
94
51
|
def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
|
|
95
52
|
"""Find nanopublications that retract the given one."""
|
|
96
53
|
# See https://github.com/fair-workflows/nanopub/issues/168 for
|
|
97
54
|
# context of this dirty hack.
|
|
98
|
-
use_server =
|
|
55
|
+
use_server = "http://grlc.nanopubs.lod.labs.vu.nl/api/local/local/"
|
|
99
56
|
|
|
100
57
|
client = NanopubClient(use_server=use_server)
|
|
101
58
|
client.grlc_urls = [use_server]
|
|
102
59
|
|
|
103
60
|
http_url = str(nanopublication).replace(
|
|
104
|
-
|
|
105
|
-
|
|
61
|
+
"https://",
|
|
62
|
+
"http://",
|
|
106
63
|
)
|
|
107
64
|
|
|
108
65
|
try:
|
|
@@ -113,60 +70,72 @@ def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
|
|
|
113
70
|
return {URIRef(retraction) for retraction in retractions}
|
|
114
71
|
|
|
115
72
|
|
|
116
|
-
def _extract_from_mapping( # noqa: WPS213
|
|
73
|
+
def _extract_from_mapping( # noqa: WPS213, WPS231
|
|
117
74
|
algebra: Mapping[str, Any],
|
|
118
75
|
) -> Iterable[URIRef | Variable]:
|
|
119
|
-
match algebra.name:
|
|
120
|
-
case
|
|
121
|
-
yield from extract_mentioned_urls(algebra[
|
|
76
|
+
match algebra.name: # noqa: WPS242
|
|
77
|
+
case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Slice":
|
|
78
|
+
yield from extract_mentioned_urls(algebra["p"]) # noqa: WPS226
|
|
122
79
|
|
|
123
|
-
case
|
|
124
|
-
yield from [
|
|
80
|
+
case "BGP":
|
|
81
|
+
yield from [ # noqa: WPS353, WPS221
|
|
125
82
|
term
|
|
126
|
-
for triple in algebra[
|
|
83
|
+
for triple in algebra["triples"]
|
|
127
84
|
for term in triple
|
|
128
|
-
if isinstance(term, URIRef)
|
|
85
|
+
if isinstance(term, (URIRef, Variable))
|
|
129
86
|
]
|
|
130
87
|
|
|
131
|
-
case
|
|
132
|
-
yield from extract_mentioned_urls(algebra[
|
|
88
|
+
case "Filter" | "UnaryNot" | "OrderCondition":
|
|
89
|
+
yield from extract_mentioned_urls(algebra["expr"]) # noqa: WPS204, WPS226
|
|
133
90
|
|
|
134
|
-
case
|
|
135
|
-
|
|
91
|
+
case "Builtin_EXISTS":
|
|
92
|
+
# Builtin_EXISTS uses 'graph' instead of 'arg'
|
|
93
|
+
yield from extract_mentioned_urls(algebra["graph"])
|
|
136
94
|
|
|
137
|
-
case
|
|
138
|
-
|
|
139
|
-
|
|
95
|
+
case built_in if built_in.startswith("Builtin_"):
|
|
96
|
+
# Some built-ins may not have an 'arg' key
|
|
97
|
+
arg_value = algebra.get("arg")
|
|
98
|
+
if arg_value is not None:
|
|
99
|
+
yield from extract_mentioned_urls(arg_value)
|
|
140
100
|
|
|
141
|
-
case
|
|
142
|
-
yield from extract_mentioned_urls(algebra[
|
|
143
|
-
yield from extract_mentioned_urls(algebra[
|
|
144
|
-
yield from extract_mentioned_urls(algebra['expr'])
|
|
101
|
+
case "RelationalExpression":
|
|
102
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
103
|
+
yield from extract_mentioned_urls(algebra["other"])
|
|
145
104
|
|
|
146
|
-
case
|
|
147
|
-
yield from extract_mentioned_urls(algebra[
|
|
148
|
-
yield from extract_mentioned_urls(algebra[
|
|
105
|
+
case "LeftJoin":
|
|
106
|
+
yield from extract_mentioned_urls(algebra["p1"])
|
|
107
|
+
yield from extract_mentioned_urls(algebra["p2"])
|
|
108
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
149
109
|
|
|
150
|
-
case
|
|
151
|
-
yield from extract_mentioned_urls(algebra[
|
|
152
|
-
yield from extract_mentioned_urls(algebra[
|
|
110
|
+
case "Join" | "Union":
|
|
111
|
+
yield from extract_mentioned_urls(algebra["p1"])
|
|
112
|
+
yield from extract_mentioned_urls(algebra["p2"])
|
|
153
113
|
|
|
154
|
-
case
|
|
155
|
-
|
|
156
|
-
yield from extract_mentioned_urls(algebra[
|
|
114
|
+
case "Extend":
|
|
115
|
+
# Extend is used for BIND expressions - process pattern and expression
|
|
116
|
+
yield from extract_mentioned_urls(algebra["p"])
|
|
117
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
157
118
|
|
|
158
|
-
case
|
|
119
|
+
case "ConditionalOrExpression" | "ConditionalAndExpression":
|
|
120
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
121
|
+
yield from extract_mentioned_urls(algebra["other"])
|
|
122
|
+
|
|
123
|
+
case "OrderBy":
|
|
124
|
+
yield from extract_mentioned_urls(algebra["p"])
|
|
125
|
+
yield from extract_mentioned_urls(algebra["expr"])
|
|
126
|
+
|
|
127
|
+
case "TrueFilter":
|
|
159
128
|
return
|
|
160
129
|
|
|
161
|
-
case
|
|
162
|
-
yield from extract_mentioned_urls(algebra[
|
|
163
|
-
yield from extract_mentioned_urls(algebra[
|
|
130
|
+
case "Graph":
|
|
131
|
+
yield from extract_mentioned_urls(algebra["p"])
|
|
132
|
+
yield from extract_mentioned_urls(algebra["term"])
|
|
164
133
|
|
|
165
134
|
case unknown_name:
|
|
166
|
-
formatted_keys =
|
|
135
|
+
formatted_keys = ", ".join(algebra.keys())
|
|
167
136
|
loguru.logger.info(
|
|
168
|
-
|
|
169
|
-
f
|
|
137
|
+
"Unknown SPARQL expression "
|
|
138
|
+
f"{unknown_name}({formatted_keys}): {algebra}",
|
|
170
139
|
)
|
|
171
140
|
return
|
|
172
141
|
|
|
@@ -192,7 +161,7 @@ def extract_mentioned_urls(
|
|
|
192
161
|
case unknown_algebra:
|
|
193
162
|
algebra_type = type(unknown_algebra)
|
|
194
163
|
raise ValueError(
|
|
195
|
-
f
|
|
164
|
+
f"Algebra of unknown type {algebra_type}: {unknown_algebra}",
|
|
196
165
|
)
|
|
197
166
|
|
|
198
167
|
|
|
@@ -208,7 +177,10 @@ def normalize_term(term: Node) -> Node:
|
|
|
208
177
|
* A dirty hack;
|
|
209
178
|
* Based on hard code.
|
|
210
179
|
"""
|
|
211
|
-
|
|
180
|
+
if isinstance(term, URIRef):
|
|
181
|
+
return apply_redirect(term)
|
|
182
|
+
|
|
183
|
+
return term
|
|
212
184
|
|
|
213
185
|
|
|
214
186
|
def resolve_variables(
|
|
@@ -223,10 +195,7 @@ def resolve_variables(
|
|
|
223
195
|
|
|
224
196
|
case Variable() as query_variable:
|
|
225
197
|
variable_value = bindings.get(str(query_variable))
|
|
226
|
-
if (
|
|
227
|
-
variable_value is not None
|
|
228
|
-
and isinstance(variable_value, URIRef)
|
|
229
|
-
):
|
|
198
|
+
if variable_value is not None and isinstance(variable_value, URIRef):
|
|
230
199
|
yield variable_value
|
|
231
200
|
|
|
232
201
|
|
|
@@ -258,74 +227,49 @@ class Skipped:
|
|
|
258
227
|
LoadResult = Loaded | Skipped
|
|
259
228
|
|
|
260
229
|
|
|
261
|
-
def _extract_nanopublication_uris(
|
|
230
|
+
def _extract_nanopublication_uris( # noqa: WPS231
|
|
262
231
|
algebra: CompValue,
|
|
263
232
|
) -> Iterable[URIRef]:
|
|
264
233
|
"""Extract nanopublications to get retracting information for."""
|
|
265
|
-
match algebra.name:
|
|
266
|
-
case
|
|
267
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
268
|
-
case
|
|
234
|
+
match algebra.name: # noqa: WPS242
|
|
235
|
+
case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Graph":
|
|
236
|
+
yield from _extract_nanopublication_uris(algebra["p"])
|
|
237
|
+
case "ConstructQuery":
|
|
269
238
|
# CONSTRUCT queries don't have nanopublication URIs in bindings
|
|
270
239
|
return
|
|
271
240
|
|
|
272
|
-
case
|
|
273
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
241
|
+
case "Slice":
|
|
242
|
+
yield from _extract_nanopublication_uris(algebra["p"])
|
|
274
243
|
|
|
275
|
-
case
|
|
276
|
-
for retractor, retracts, retractee in algebra[
|
|
244
|
+
case "BGP":
|
|
245
|
+
for retractor, retracts, retractee in algebra["triples"]:
|
|
277
246
|
if retracts == URIRef(
|
|
278
|
-
|
|
247
|
+
"https://purl.org/nanopub/x/retracts",
|
|
279
248
|
) and isinstance(retractor, Variable):
|
|
280
249
|
yield retractee
|
|
281
250
|
|
|
282
|
-
case
|
|
283
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
284
|
-
yield from _extract_nanopublication_uris(algebra[
|
|
251
|
+
case "LeftJoin" | "Join" | "Union":
|
|
252
|
+
yield from _extract_nanopublication_uris(algebra["p1"])
|
|
253
|
+
yield from _extract_nanopublication_uris(algebra["p2"])
|
|
254
|
+
|
|
255
|
+
case "Extend":
|
|
256
|
+
# Extend is used for BIND expressions - process the pattern recursively
|
|
257
|
+
yield from _extract_nanopublication_uris(algebra["p"])
|
|
285
258
|
|
|
286
|
-
case
|
|
259
|
+
case "Filter" | "OrderBy":
|
|
287
260
|
return
|
|
288
261
|
|
|
289
262
|
case unknown_name:
|
|
290
263
|
raise ValueError(
|
|
291
|
-
f
|
|
292
|
-
f'content: {algebra}',
|
|
264
|
+
f"Unknown algebra name: {unknown_name}, content: {algebra}",
|
|
293
265
|
)
|
|
294
266
|
|
|
295
267
|
|
|
296
|
-
def apply_redirect(source: URIRef) -> URIRef: # noqa: WPS210
|
|
297
|
-
"""
|
|
298
|
-
Rewrite the URL using regex patterns and group substitutions.
|
|
299
|
-
|
|
300
|
-
For each pattern in REDIRECTS:
|
|
301
|
-
- If the pattern matches the source URI
|
|
302
|
-
- Replace the source with the destination, substituting any regex groups
|
|
303
|
-
"""
|
|
304
|
-
source_str = str(source)
|
|
305
|
-
|
|
306
|
-
for pattern, destination in REDIRECTS.items():
|
|
307
|
-
pattern_str = str(pattern)
|
|
308
|
-
destination_str = str(destination)
|
|
309
|
-
|
|
310
|
-
match = re.match(pattern_str, source_str)
|
|
311
|
-
if match:
|
|
312
|
-
# Replace any group references in the destination
|
|
313
|
-
# (like \1, \2, etc.)
|
|
314
|
-
redirected_uri = re.sub(
|
|
315
|
-
pattern_str,
|
|
316
|
-
destination_str,
|
|
317
|
-
source_str,
|
|
318
|
-
)
|
|
319
|
-
return URIRef(redirected_uri)
|
|
320
|
-
|
|
321
|
-
return source
|
|
322
|
-
|
|
323
|
-
|
|
324
268
|
def extract_triples(algebra: CompValue) -> Iterable[tuple[Node, Node, Node]]:
|
|
325
269
|
"""Extract triples from a SPARQL query algebra instance."""
|
|
326
270
|
if isinstance(algebra, CompValue):
|
|
327
271
|
for key, value in algebra.items(): # noqa: WPS110
|
|
328
|
-
if key ==
|
|
272
|
+
if key == "triples":
|
|
329
273
|
yield from value
|
|
330
274
|
|
|
331
275
|
else:
|
|
@@ -361,13 +305,13 @@ class NanopubQueryPlugin:
|
|
|
361
305
|
|
|
362
306
|
FIXME: Can we cache this?
|
|
363
307
|
"""
|
|
364
|
-
response = requests.post(
|
|
365
|
-
|
|
308
|
+
response = requests.post( # noqa: S113
|
|
309
|
+
"https://query.knowledgepixels.com/repo/full",
|
|
366
310
|
data={
|
|
367
|
-
|
|
311
|
+
"query": "CONSTRUCT WHERE { ?instance a <%s> }" % class_uri,
|
|
368
312
|
},
|
|
369
313
|
headers={
|
|
370
|
-
|
|
314
|
+
"Accept": "application/ld+json",
|
|
371
315
|
},
|
|
372
316
|
)
|
|
373
317
|
|
|
@@ -375,21 +319,21 @@ class NanopubQueryPlugin:
|
|
|
375
319
|
|
|
376
320
|
self.graph.get_context(BNode()).parse(
|
|
377
321
|
data=response.text,
|
|
378
|
-
format=
|
|
322
|
+
format="json-ld",
|
|
379
323
|
)
|
|
380
324
|
|
|
381
325
|
def _is_from_nanopubs(self, class_uri: URIRef) -> bool:
|
|
382
326
|
if not isinstance(class_uri, URIRef):
|
|
383
|
-
raise ValueError(f
|
|
327
|
+
raise ValueError(f"Not a URIRef: {class_uri}")
|
|
384
328
|
|
|
385
|
-
return self.graph.query(
|
|
329
|
+
return self.graph.query( # noqa: WPS462
|
|
386
330
|
"""
|
|
387
331
|
ASK WHERE {
|
|
388
332
|
?_whatever <https://purl.org/nanopub/x/introduces> $class
|
|
389
333
|
}
|
|
390
334
|
""",
|
|
391
335
|
initBindings={
|
|
392
|
-
|
|
336
|
+
"class": class_uri,
|
|
393
337
|
},
|
|
394
338
|
).askAnswer
|
|
395
339
|
|
|
@@ -419,7 +363,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
419
363
|
|
|
420
364
|
self.graph._indices_loaded = True
|
|
421
365
|
|
|
422
|
-
def query(
|
|
366
|
+
def query( # noqa: WPS211, WPS210, WPS231, WPS213, C901
|
|
423
367
|
self,
|
|
424
368
|
strOrQuery,
|
|
425
369
|
initBindings=None,
|
|
@@ -436,7 +380,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
436
380
|
|
|
437
381
|
initBindings = initBindings or {}
|
|
438
382
|
initNs = initNs or {}
|
|
439
|
-
|
|
383
|
+
|
|
440
384
|
if isinstance(strOrQuery, Query):
|
|
441
385
|
query = strOrQuery
|
|
442
386
|
|
|
@@ -445,7 +389,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
445
389
|
query = translateQuery(parse_tree, base, initNs)
|
|
446
390
|
|
|
447
391
|
# Only extract nanopublications from SELECT/ASK queries, not CONSTRUCT
|
|
448
|
-
if query.algebra.name !=
|
|
392
|
+
if query.algebra.name != "ConstructQuery":
|
|
449
393
|
self.load_retracting_nanopublications_by_query(
|
|
450
394
|
query=query,
|
|
451
395
|
bindings=initBindings,
|
|
@@ -461,21 +405,24 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
461
405
|
)
|
|
462
406
|
|
|
463
407
|
# Filter out inference graph names (they're not URLs to load)
|
|
464
|
-
urls = {url for url in urls if not str(url).startswith(
|
|
408
|
+
urls = {url for url in urls if not str(url).startswith("inference:")}
|
|
465
409
|
|
|
466
410
|
for url in urls:
|
|
467
411
|
try:
|
|
468
412
|
self.load(url)
|
|
469
413
|
except Exception as err:
|
|
470
|
-
self.logger.exception(f
|
|
414
|
+
self.logger.exception(f"Failed to load {url}: {err}", url, err)
|
|
471
415
|
|
|
472
416
|
# Run inference if there's new data since last inference run
|
|
473
417
|
# (after URLs are loaded so inference can use the loaded data)
|
|
474
|
-
if self.graph.last_not_inferred_source is not None:
|
|
475
|
-
|
|
418
|
+
if self.graph.last_not_inferred_source is not None: # noqa: WPS504
|
|
419
|
+
last_source = self.graph.last_not_inferred_source
|
|
420
|
+
self.logger.debug(
|
|
421
|
+
f"Running inference, last_not_inferred_source: {last_source}"
|
|
422
|
+
) # noqa: WPS237
|
|
476
423
|
self._run_inference()
|
|
477
424
|
else:
|
|
478
|
-
self.logger.debug(
|
|
425
|
+
self.logger.debug("Skipping inference, last_not_inferred_source is None")
|
|
479
426
|
|
|
480
427
|
NanopubQueryPlugin(graph=self.graph)(query, bindings=initBindings)
|
|
481
428
|
|
|
@@ -486,45 +433,51 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
486
433
|
query_result = evalQuery(self.graph, query, initBindings, base)
|
|
487
434
|
|
|
488
435
|
try:
|
|
489
|
-
bindings = list(query_result[
|
|
436
|
+
bindings = list(query_result["bindings"])
|
|
490
437
|
except KeyError:
|
|
491
438
|
# This was probably an ASK query
|
|
492
439
|
return query_result
|
|
493
440
|
|
|
494
441
|
for row in bindings:
|
|
495
442
|
break
|
|
496
|
-
for _, maybe_iri in row.items():
|
|
497
|
-
if (
|
|
498
|
-
|
|
499
|
-
and isinstance(self.load(maybe_iri), Loaded)
|
|
443
|
+
for _, maybe_iri in row.items(): # noqa: WPS427
|
|
444
|
+
if isinstance(maybe_iri, URIRef) and isinstance(
|
|
445
|
+
self.load(maybe_iri), Loaded
|
|
500
446
|
):
|
|
501
|
-
is_anything_loaded = True
|
|
502
|
-
self.logger.info(
|
|
503
|
-
|
|
447
|
+
is_anything_loaded = True # noqa: WPS220
|
|
448
|
+
self.logger.info( # noqa: WPS220
|
|
449
|
+
"Newly loaded: {uri}",
|
|
504
450
|
uri=maybe_iri,
|
|
505
451
|
)
|
|
506
452
|
|
|
507
|
-
query_result[
|
|
453
|
+
query_result["bindings"] = bindings
|
|
508
454
|
return query_result
|
|
509
455
|
|
|
510
456
|
def _is_loaded(self, uri: URIRef) -> bool:
|
|
511
457
|
"""Find out if this URI in the graph already."""
|
|
512
|
-
return
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
458
|
+
return (
|
|
459
|
+
funcy.first(
|
|
460
|
+
self.graph.quads(
|
|
461
|
+
(
|
|
462
|
+
uri,
|
|
463
|
+
IOLANTA["last-loaded-time"],
|
|
464
|
+
None,
|
|
465
|
+
META,
|
|
466
|
+
)
|
|
467
|
+
),
|
|
468
|
+
)
|
|
469
|
+
is not None
|
|
470
|
+
)
|
|
520
471
|
|
|
521
472
|
def _mark_as_loaded(self, uri: URIRef):
|
|
522
|
-
self.graph.add(
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
473
|
+
self.graph.add(
|
|
474
|
+
(
|
|
475
|
+
uri,
|
|
476
|
+
IOLANTA["last-loaded-time"],
|
|
477
|
+
Literal(datetime.datetime.now()),
|
|
478
|
+
META,
|
|
479
|
+
)
|
|
480
|
+
)
|
|
528
481
|
|
|
529
482
|
def _follow_is_visualized_with_links(self, uri: URIRef):
|
|
530
483
|
"""Follow `dcterms:isReferencedBy` links."""
|
|
@@ -533,7 +486,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
533
486
|
if isinstance(visualization, URIRef):
|
|
534
487
|
self.load(visualization)
|
|
535
488
|
|
|
536
|
-
def load(
|
|
489
|
+
def load( # noqa: C901, WPS210, WPS212, WPS213, WPS231
|
|
537
490
|
self,
|
|
538
491
|
source: URIRef,
|
|
539
492
|
) -> LoadResult:
|
|
@@ -545,15 +498,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
545
498
|
# Blank nodes cannot be loaded from URLs
|
|
546
499
|
if isinstance(source, BNode):
|
|
547
500
|
return Skipped()
|
|
548
|
-
|
|
501
|
+
|
|
549
502
|
# Also check if URIRef represents a blank node (can happen if BNode
|
|
550
503
|
# was serialized to string and converted to URIRef)
|
|
551
|
-
if isinstance(source, URIRef) and str(source).startswith(
|
|
552
|
-
raise ValueError(
|
|
553
|
-
|
|
504
|
+
if isinstance(source, URIRef) and str(source).startswith("_:"):
|
|
505
|
+
raise ValueError("This is actually a blank node but masked as a URIREF")
|
|
506
|
+
|
|
554
507
|
url = URL(source)
|
|
555
508
|
|
|
556
|
-
if url.scheme in {
|
|
509
|
+
if url.scheme in {"file", "python", "local", "urn", "doi"}:
|
|
557
510
|
# FIXME temporary fix. `yaml-ld` doesn't read `context.*` files and
|
|
558
511
|
# fails.
|
|
559
512
|
return Skipped()
|
|
@@ -564,14 +517,14 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
564
517
|
# TODO: It works differently for JSON-LD documents AFAIK. Need to
|
|
565
518
|
# double check that.
|
|
566
519
|
url = url.with_fragment(None)
|
|
567
|
-
source = URIRef(str(f
|
|
520
|
+
source = URIRef(str(f"{url}#"))
|
|
568
521
|
|
|
569
522
|
self._follow_is_visualized_with_links(source)
|
|
570
523
|
|
|
571
524
|
new_source = apply_redirect(source)
|
|
572
525
|
if new_source != source:
|
|
573
526
|
self.logger.info(
|
|
574
|
-
|
|
527
|
+
"Rewriting: {source} → {new_source}",
|
|
575
528
|
source=source,
|
|
576
529
|
new_source=new_source,
|
|
577
530
|
)
|
|
@@ -584,58 +537,64 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
584
537
|
# FIXME This is definitely inefficient. However, python-yaml-ld caches
|
|
585
538
|
# the document, so the performance overhead is not super high.
|
|
586
539
|
try:
|
|
587
|
-
|
|
540
|
+
resolved_source = yaml_ld.load_document(source)["documentUrl"]
|
|
588
541
|
except NotFound as not_found:
|
|
589
|
-
self.logger.info(f
|
|
542
|
+
self.logger.info(f"{not_found.path} | 404 Not Found")
|
|
590
543
|
namespaces = [RDF, RDFS, OWL, FOAF, DC, VANN]
|
|
591
544
|
|
|
592
545
|
for namespace in namespaces:
|
|
593
546
|
if not_found.path.startswith(str(namespace)):
|
|
594
547
|
self.load(URIRef(namespace))
|
|
595
548
|
self.logger.info(
|
|
596
|
-
|
|
549
|
+
"Redirecting %s → namespace %s",
|
|
597
550
|
not_found.path,
|
|
598
551
|
namespace,
|
|
599
552
|
)
|
|
600
553
|
return Loaded()
|
|
601
554
|
|
|
602
555
|
self.logger.info(
|
|
603
|
-
|
|
556
|
+
"{path} | Cannot find a matching namespace",
|
|
604
557
|
path=not_found.path,
|
|
605
558
|
)
|
|
606
559
|
|
|
607
|
-
self.graph.add(
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
560
|
+
self.graph.add(
|
|
561
|
+
(
|
|
562
|
+
source_uri,
|
|
563
|
+
RDF.type,
|
|
564
|
+
IOLANTA["not-found"],
|
|
565
|
+
source_uri,
|
|
566
|
+
)
|
|
567
|
+
)
|
|
613
568
|
|
|
614
569
|
self._mark_as_loaded(source_uri)
|
|
615
570
|
|
|
616
571
|
return Loaded()
|
|
617
572
|
|
|
618
573
|
except Exception as err:
|
|
619
|
-
self.logger.info(f
|
|
620
|
-
self.graph.add(
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
574
|
+
self.logger.info(f"{source} | Failed: {err}")
|
|
575
|
+
self.graph.add(
|
|
576
|
+
(
|
|
577
|
+
URIRef(source),
|
|
578
|
+
RDF.type,
|
|
579
|
+
IOLANTA["failed"],
|
|
580
|
+
source_uri,
|
|
581
|
+
)
|
|
582
|
+
)
|
|
626
583
|
self._mark_as_loaded(source_uri)
|
|
627
584
|
|
|
628
585
|
return Loaded()
|
|
629
586
|
|
|
630
|
-
if
|
|
631
|
-
|
|
632
|
-
if
|
|
633
|
-
self.graph.add(
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
587
|
+
if resolved_source:
|
|
588
|
+
resolved_source_uri_ref = URIRef(resolved_source)
|
|
589
|
+
if resolved_source_uri_ref != URIRef(source):
|
|
590
|
+
self.graph.add(
|
|
591
|
+
(
|
|
592
|
+
source_uri,
|
|
593
|
+
IOLANTA["redirects-to"],
|
|
594
|
+
resolved_source_uri_ref,
|
|
595
|
+
)
|
|
596
|
+
)
|
|
597
|
+
source = resolved_source
|
|
639
598
|
|
|
640
599
|
self._mark_as_loaded(source_uri)
|
|
641
600
|
|
|
@@ -643,19 +602,19 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
643
602
|
ld_rdf = yaml_ld.to_rdf(source)
|
|
644
603
|
except ConnectionError as name_resolution_error:
|
|
645
604
|
self.logger.info(
|
|
646
|
-
|
|
605
|
+
"%s | name resolution error: %s",
|
|
647
606
|
source,
|
|
648
607
|
str(name_resolution_error),
|
|
649
608
|
)
|
|
650
609
|
return Loaded()
|
|
651
610
|
except ParserNotFound as parser_not_found:
|
|
652
|
-
self.logger.info(f
|
|
611
|
+
self.logger.info(f"{source} | {parser_not_found}")
|
|
653
612
|
return Loaded()
|
|
654
613
|
except YAMLLDError as yaml_ld_error:
|
|
655
|
-
self.logger.error(f
|
|
614
|
+
self.logger.error(f"{source} | {yaml_ld_error}")
|
|
656
615
|
return Loaded()
|
|
657
616
|
except HTTPError as http_error:
|
|
658
|
-
self.logger.warning(f
|
|
617
|
+
self.logger.warning(f"{source} | HTTP error: {http_error}")
|
|
659
618
|
return Loaded()
|
|
660
619
|
|
|
661
620
|
try:
|
|
@@ -674,18 +633,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
674
633
|
)
|
|
675
634
|
|
|
676
635
|
if not quads:
|
|
677
|
-
self.logger.info(
|
|
636
|
+
self.logger.info("{source} | No data found", source=source)
|
|
678
637
|
return Loaded()
|
|
679
638
|
|
|
680
639
|
self.graph.addN(quads)
|
|
681
640
|
self.graph.last_not_inferred_source = source
|
|
682
641
|
|
|
683
|
-
into_graphs =
|
|
684
|
-
quad.graph
|
|
685
|
-
for quad in quads
|
|
686
|
-
})
|
|
642
|
+
into_graphs = ", ".join({quad.graph for quad in quads})
|
|
687
643
|
self.logger.info(
|
|
688
|
-
f
|
|
644
|
+
f"{source} | loaded {len(quads)} triples into graphs: {into_graphs}",
|
|
689
645
|
)
|
|
690
646
|
|
|
691
647
|
return Loaded()
|
|
@@ -700,7 +656,75 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
700
656
|
|
|
701
657
|
return term
|
|
702
658
|
|
|
703
|
-
def
|
|
659
|
+
def _run_inference_from_directory( # noqa: WPS231, WPS220, WPS210
|
|
660
|
+
self,
|
|
661
|
+
inference_dir: Path,
|
|
662
|
+
graph_prefix: str = "inference",
|
|
663
|
+
):
|
|
664
|
+
"""
|
|
665
|
+
Run inference queries from a given inference directory.
|
|
666
|
+
|
|
667
|
+
For each SPARQL file in the inference directory:
|
|
668
|
+
1. Truncate the named graph `local:{graph_prefix}-{filename}`
|
|
669
|
+
2. Execute the CONSTRUCT query
|
|
670
|
+
3. Insert the resulting triples into that graph
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
inference_dir: Directory containing inference SPARQL files
|
|
674
|
+
graph_prefix: Prefix for inference graph names
|
|
675
|
+
return_count: Whether to return the count of inferred triples
|
|
676
|
+
|
|
677
|
+
Returns the total number of triples inferred.
|
|
678
|
+
"""
|
|
679
|
+
if not inference_dir.exists():
|
|
680
|
+
return 0
|
|
681
|
+
|
|
682
|
+
total_inferred = 0
|
|
683
|
+
for inference_file in inference_dir.glob("*.sparql"):
|
|
684
|
+
filename = inference_file.stem # filename without .sparql extension
|
|
685
|
+
inference_graph = URIRef(f"{graph_prefix}:{filename}")
|
|
686
|
+
|
|
687
|
+
# Truncate the inference graph
|
|
688
|
+
context = self.graph.get_context(inference_graph)
|
|
689
|
+
context.remove((None, None, None))
|
|
690
|
+
|
|
691
|
+
# Read and execute the CONSTRUCT query
|
|
692
|
+
query_text = inference_file.read_text()
|
|
693
|
+
query_result = self.graph.query(query_text) # noqa: WPS110
|
|
694
|
+
|
|
695
|
+
# CONSTRUCT queries return a SPARQLResult with a graph attribute
|
|
696
|
+
result_graph = (
|
|
697
|
+
query_result.get("graph")
|
|
698
|
+
if isinstance(query_result, dict)
|
|
699
|
+
else query_result.graph
|
|
700
|
+
)
|
|
701
|
+
self.logger.debug(
|
|
702
|
+
f"Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}"
|
|
703
|
+
)
|
|
704
|
+
if result_graph is not None: # noqa: WPS504
|
|
705
|
+
inferred_quads = [
|
|
706
|
+
(s, p, o, inference_graph) # noqa: WPS111
|
|
707
|
+
for s, p, o in result_graph # noqa: WPS111
|
|
708
|
+
]
|
|
709
|
+
self.logger.debug(
|
|
710
|
+
f"Inference {filename}: generated {len(inferred_quads)} quads"
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
if inferred_quads:
|
|
714
|
+
self.graph.addN(inferred_quads) # noqa: WPS220
|
|
715
|
+
inferred_count = len(inferred_quads)
|
|
716
|
+
total_inferred += inferred_count
|
|
717
|
+
self.logger.info( # noqa: WPS220
|
|
718
|
+
"Inference {filename}: added {count} triples",
|
|
719
|
+
filename=filename,
|
|
720
|
+
count=inferred_count,
|
|
721
|
+
)
|
|
722
|
+
else:
|
|
723
|
+
self.logger.debug(f"Inference {filename}: result_graph is None")
|
|
724
|
+
|
|
725
|
+
return total_inferred
|
|
726
|
+
|
|
727
|
+
def _run_inference(self): # noqa: WPS231, WPS220, WPS210
|
|
704
728
|
"""
|
|
705
729
|
Run inference queries from the inference directory.
|
|
706
730
|
|
|
@@ -710,42 +734,13 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
|
|
|
710
734
|
3. Insert the resulting triples into that graph
|
|
711
735
|
"""
|
|
712
736
|
with self.inference_lock:
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
# Truncate the inference graph
|
|
718
|
-
context = self.graph.get_context(inference_graph)
|
|
719
|
-
context.remove((None, None, None))
|
|
720
|
-
|
|
721
|
-
# Read and execute the CONSTRUCT query
|
|
722
|
-
query_text = inference_file.read_text()
|
|
723
|
-
result = self.graph.query(query_text)
|
|
724
|
-
|
|
725
|
-
# CONSTRUCT queries return a SPARQLResult with a graph attribute
|
|
726
|
-
result_graph = result.get('graph') if isinstance(result, dict) else result.graph
|
|
727
|
-
self.logger.debug(f'Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}')
|
|
728
|
-
if result_graph is not None:
|
|
729
|
-
inferred_quads = [
|
|
730
|
-
(s, p, o, inference_graph)
|
|
731
|
-
for s, p, o in result_graph
|
|
732
|
-
]
|
|
733
|
-
self.logger.debug(f'Inference {filename}: generated {len(inferred_quads)} quads')
|
|
734
|
-
|
|
735
|
-
if inferred_quads:
|
|
736
|
-
self.graph.addN(inferred_quads)
|
|
737
|
-
self.logger.info(
|
|
738
|
-
'Inference {filename}: added {count} triples',
|
|
739
|
-
filename=filename,
|
|
740
|
-
count=len(inferred_quads),
|
|
741
|
-
)
|
|
742
|
-
else:
|
|
743
|
-
self.logger.debug(f'Inference {filename}: result_graph is None')
|
|
744
|
-
|
|
737
|
+
# Run global inference (deprecated, will be removed later)
|
|
738
|
+
self._run_inference_from_directory(INFERENCE_DIR, graph_prefix="inference")
|
|
739
|
+
|
|
745
740
|
# Clear the flag after running inference
|
|
746
741
|
self.graph.last_not_inferred_source = None
|
|
747
742
|
|
|
748
|
-
def load_retracting_nanopublications_by_query(
|
|
743
|
+
def load_retracting_nanopublications_by_query( # noqa: WPS231
|
|
749
744
|
self,
|
|
750
745
|
query: Query,
|
|
751
746
|
bindings: dict[str, Node],
|