iolanta 2.1.10__py3-none-any.whl → 2.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,12 @@
1
+ # noqa: WPS201, WPS202, WPS402
1
2
  import dataclasses
2
3
  import datetime
3
- import re
4
- import time
5
4
  from pathlib import Path
6
5
  from threading import Lock
7
- from types import MappingProxyType
8
6
  from typing import Any, Iterable, Mapping
9
7
 
10
- import diskcache
11
8
  import funcy
12
9
  import loguru
13
- import platformdirs
14
- import reasonable
15
10
  import requests
16
11
  import yaml_ld
17
12
  from nanopub import NanopubClient
@@ -35,74 +30,36 @@ from iolanta.namespaces import ( # noqa: WPS235
35
30
  DCTERMS,
36
31
  FOAF,
37
32
  IOLANTA,
38
- LOCAL,
39
33
  META,
40
34
  OWL,
41
- PROV,
42
35
  RDF,
43
36
  RDFS,
44
37
  VANN,
45
38
  )
46
- from iolanta.parse_quads import NORMALIZE_TERMS_MAP, parse_quads
39
+ from iolanta.parse_quads import parse_quads
40
+ from iolanta.sparqlspace.redirects import apply_redirect
47
41
 
48
42
  REASONING_ENABLED = True
49
43
  OWL_REASONING_ENABLED = False
50
44
 
51
- INFERENCE_DIR = Path(__file__).parent / 'inference'
52
- INDICES = [
53
- URIRef('https://iolanta.tech/visualizations/index.yaml'),
45
+ INFERENCE_DIR = Path(__file__).parent / "inference"
46
+ INDICES = [ # noqa: WPS407
47
+ URIRef("https://iolanta.tech/visualizations/index.yaml"),
54
48
  ]
55
49
 
56
50
 
57
- REDIRECTS = MappingProxyType({
58
- # FIXME This is presently hardcoded; we need to
59
- # - either find a way to resolve these URLs automatically,
60
- # - or create a repository of those redirects online.
61
- 'http://purl.org/vocab/vann/': URIRef(
62
- 'https://vocab.org/vann/vann-vocab-20100607.rdf',
63
- ),
64
- URIRef(DC): URIRef(DCTERMS),
65
- URIRef(RDF): URIRef(RDF),
66
- URIRef(RDFS): URIRef(RDFS),
67
- URIRef(OWL): URIRef(OWL),
68
-
69
- # Redirect FOAF namespace to GitHub mirror
70
- URIRef('https?://xmlns.com/foaf/0.1/.+'): URIRef(
71
- 'https://raw.githubusercontent.com/foaf/foaf/refs/heads/master/xmlns.com/htdocs/foaf/0.1/index.rdf',
72
- ),
73
- URIRef('https://www.nanopub.org/nschema'): URIRef(
74
- 'https://www.nanopub.net/nschema#',
75
- ),
76
- URIRef('https://nanopub.org/nschema'): URIRef(
77
- 'https://nanopub.net/nschema#',
78
- ),
79
- URIRef(PROV): URIRef('https://www.w3.org/ns/prov-o'),
80
-
81
- # Convert lexvo.org/id URLs to lexvo.org/data URLs
82
- r'https://lexvo\.org/id/(.+)': r'http://lexvo.org/data/\1',
83
- r'https://www\.lexinfo\.net/(.+)': r'http://www.lexinfo.net/\1',
84
- })
85
-
86
-
87
- @diskcache.Cache(
88
- directory=str(
89
- platformdirs.user_cache_path(
90
- appname='iolanta',
91
- ) / 'find_retractions_for',
92
- ),
93
- ).memoize(expire=datetime.timedelta(days=8).total_seconds())
94
51
  def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
95
52
  """Find nanopublications that retract the given one."""
96
53
  # See https://github.com/fair-workflows/nanopub/issues/168 for
97
54
  # context of this dirty hack.
98
- use_server = 'http://grlc.nanopubs.lod.labs.vu.nl/api/local/local/'
55
+ use_server = "http://grlc.nanopubs.lod.labs.vu.nl/api/local/local/"
99
56
 
100
57
  client = NanopubClient(use_server=use_server)
101
58
  client.grlc_urls = [use_server]
102
59
 
103
60
  http_url = str(nanopublication).replace(
104
- 'https://',
105
- 'http://',
61
+ "https://",
62
+ "http://",
106
63
  )
107
64
 
108
65
  try:
@@ -113,60 +70,72 @@ def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
113
70
  return {URIRef(retraction) for retraction in retractions}
114
71
 
115
72
 
116
- def _extract_from_mapping( # noqa: WPS213
73
+ def _extract_from_mapping( # noqa: WPS213, WPS231
117
74
  algebra: Mapping[str, Any],
118
75
  ) -> Iterable[URIRef | Variable]:
119
- match algebra.name:
120
- case 'SelectQuery' | 'AskQuery' | 'Project' | 'Distinct' | 'Slice':
121
- yield from extract_mentioned_urls(algebra['p'])
76
+ match algebra.name: # noqa: WPS242
77
+ case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Slice":
78
+ yield from extract_mentioned_urls(algebra["p"]) # noqa: WPS226
122
79
 
123
- case 'BGP':
124
- yield from [ # noqa: WPS353, WPS221
80
+ case "BGP":
81
+ yield from [ # noqa: WPS353, WPS221
125
82
  term
126
- for triple in algebra['triples']
83
+ for triple in algebra["triples"]
127
84
  for term in triple
128
- if isinstance(term, URIRef)
85
+ if isinstance(term, (URIRef, Variable))
129
86
  ]
130
87
 
131
- case 'Filter' | 'UnaryNot' | 'OrderCondition':
132
- yield from extract_mentioned_urls(algebra['expr']) # noqa: WPS204
88
+ case "Filter" | "UnaryNot" | "OrderCondition":
89
+ yield from extract_mentioned_urls(algebra["expr"]) # noqa: WPS204, WPS226
133
90
 
134
- case built_in if built_in.startswith('Builtin_'):
135
- yield from extract_mentioned_urls(algebra['arg'])
91
+ case "Builtin_EXISTS":
92
+ # Builtin_EXISTS uses 'graph' instead of 'arg'
93
+ yield from extract_mentioned_urls(algebra["graph"])
136
94
 
137
- case 'RelationalExpression':
138
- yield from extract_mentioned_urls(algebra['expr'])
139
- yield from extract_mentioned_urls(algebra['other'])
95
+ case built_in if built_in.startswith("Builtin_"):
96
+ # Some built-ins may not have an 'arg' key
97
+ arg_value = algebra.get("arg")
98
+ if arg_value is not None:
99
+ yield from extract_mentioned_urls(arg_value)
140
100
 
141
- case 'LeftJoin':
142
- yield from extract_mentioned_urls(algebra['p1'])
143
- yield from extract_mentioned_urls(algebra['p2'])
144
- yield from extract_mentioned_urls(algebra['expr'])
101
+ case "RelationalExpression":
102
+ yield from extract_mentioned_urls(algebra["expr"])
103
+ yield from extract_mentioned_urls(algebra["other"])
145
104
 
146
- case 'Join':
147
- yield from extract_mentioned_urls(algebra['p1'])
148
- yield from extract_mentioned_urls(algebra['p2'])
105
+ case "LeftJoin":
106
+ yield from extract_mentioned_urls(algebra["p1"])
107
+ yield from extract_mentioned_urls(algebra["p2"])
108
+ yield from extract_mentioned_urls(algebra["expr"])
149
109
 
150
- case 'ConditionalOrExpression' | 'ConditionalAndExpression':
151
- yield from extract_mentioned_urls(algebra['expr'])
152
- yield from extract_mentioned_urls(algebra['other'])
110
+ case "Join" | "Union":
111
+ yield from extract_mentioned_urls(algebra["p1"])
112
+ yield from extract_mentioned_urls(algebra["p2"])
153
113
 
154
- case 'OrderBy':
155
- yield from extract_mentioned_urls(algebra['p'])
156
- yield from extract_mentioned_urls(algebra['expr'])
114
+ case "Extend":
115
+ # Extend is used for BIND expressions - process pattern and expression
116
+ yield from extract_mentioned_urls(algebra["p"])
117
+ yield from extract_mentioned_urls(algebra["expr"])
157
118
 
158
- case 'TrueFilter':
119
+ case "ConditionalOrExpression" | "ConditionalAndExpression":
120
+ yield from extract_mentioned_urls(algebra["expr"])
121
+ yield from extract_mentioned_urls(algebra["other"])
122
+
123
+ case "OrderBy":
124
+ yield from extract_mentioned_urls(algebra["p"])
125
+ yield from extract_mentioned_urls(algebra["expr"])
126
+
127
+ case "TrueFilter":
159
128
  return
160
129
 
161
- case 'Graph':
162
- yield from extract_mentioned_urls(algebra['p'])
163
- yield from extract_mentioned_urls(algebra['term'])
130
+ case "Graph":
131
+ yield from extract_mentioned_urls(algebra["p"])
132
+ yield from extract_mentioned_urls(algebra["term"])
164
133
 
165
134
  case unknown_name:
166
- formatted_keys = ', '.join(algebra.keys())
135
+ formatted_keys = ", ".join(algebra.keys())
167
136
  loguru.logger.info(
168
- 'Unknown SPARQL expression '
169
- f'{unknown_name}({formatted_keys}): {algebra}',
137
+ "Unknown SPARQL expression "
138
+ f"{unknown_name}({formatted_keys}): {algebra}",
170
139
  )
171
140
  return
172
141
 
@@ -192,7 +161,7 @@ def extract_mentioned_urls(
192
161
  case unknown_algebra:
193
162
  algebra_type = type(unknown_algebra)
194
163
  raise ValueError(
195
- f'Algebra of unknown type {algebra_type}: {unknown_algebra}',
164
+ f"Algebra of unknown type {algebra_type}: {unknown_algebra}",
196
165
  )
197
166
 
198
167
 
@@ -208,7 +177,10 @@ def normalize_term(term: Node) -> Node:
208
177
  * A dirty hack;
209
178
  * Based on hard code.
210
179
  """
211
- return NORMALIZE_TERMS_MAP.get(term, term)
180
+ if isinstance(term, URIRef):
181
+ return apply_redirect(term)
182
+
183
+ return term
212
184
 
213
185
 
214
186
  def resolve_variables(
@@ -223,10 +195,7 @@ def resolve_variables(
223
195
 
224
196
  case Variable() as query_variable:
225
197
  variable_value = bindings.get(str(query_variable))
226
- if (
227
- variable_value is not None
228
- and isinstance(variable_value, URIRef)
229
- ):
198
+ if variable_value is not None and isinstance(variable_value, URIRef):
230
199
  yield variable_value
231
200
 
232
201
 
@@ -258,74 +227,49 @@ class Skipped:
258
227
  LoadResult = Loaded | Skipped
259
228
 
260
229
 
261
- def _extract_nanopublication_uris(
230
+ def _extract_nanopublication_uris( # noqa: WPS231
262
231
  algebra: CompValue,
263
232
  ) -> Iterable[URIRef]:
264
233
  """Extract nanopublications to get retracting information for."""
265
- match algebra.name:
266
- case 'SelectQuery' | 'AskQuery' | 'Project' | 'Distinct' | 'Graph':
267
- yield from _extract_nanopublication_uris(algebra['p'])
268
- case 'ConstructQuery':
234
+ match algebra.name: # noqa: WPS242
235
+ case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Graph":
236
+ yield from _extract_nanopublication_uris(algebra["p"])
237
+ case "ConstructQuery":
269
238
  # CONSTRUCT queries don't have nanopublication URIs in bindings
270
239
  return
271
240
 
272
- case 'Slice':
273
- yield from _extract_nanopublication_uris(algebra['p'])
241
+ case "Slice":
242
+ yield from _extract_nanopublication_uris(algebra["p"])
274
243
 
275
- case 'BGP':
276
- for retractor, retracts, retractee in algebra['triples']:
244
+ case "BGP":
245
+ for retractor, retracts, retractee in algebra["triples"]:
277
246
  if retracts == URIRef(
278
- 'https://purl.org/nanopub/x/retracts',
247
+ "https://purl.org/nanopub/x/retracts",
279
248
  ) and isinstance(retractor, Variable):
280
249
  yield retractee
281
250
 
282
- case 'LeftJoin' | 'Join':
283
- yield from _extract_nanopublication_uris(algebra['p1'])
284
- yield from _extract_nanopublication_uris(algebra['p2'])
251
+ case "LeftJoin" | "Join" | "Union":
252
+ yield from _extract_nanopublication_uris(algebra["p1"])
253
+ yield from _extract_nanopublication_uris(algebra["p2"])
254
+
255
+ case "Extend":
256
+ # Extend is used for BIND expressions - process the pattern recursively
257
+ yield from _extract_nanopublication_uris(algebra["p"])
285
258
 
286
- case 'Filter' | 'OrderBy':
259
+ case "Filter" | "OrderBy":
287
260
  return
288
261
 
289
262
  case unknown_name:
290
263
  raise ValueError(
291
- f'Unknown algebra name: {unknown_name}, '
292
- f'content: {algebra}',
264
+ f"Unknown algebra name: {unknown_name}, content: {algebra}",
293
265
  )
294
266
 
295
267
 
296
- def apply_redirect(source: URIRef) -> URIRef: # noqa: WPS210
297
- """
298
- Rewrite the URL using regex patterns and group substitutions.
299
-
300
- For each pattern in REDIRECTS:
301
- - If the pattern matches the source URI
302
- - Replace the source with the destination, substituting any regex groups
303
- """
304
- source_str = str(source)
305
-
306
- for pattern, destination in REDIRECTS.items():
307
- pattern_str = str(pattern)
308
- destination_str = str(destination)
309
-
310
- match = re.match(pattern_str, source_str)
311
- if match:
312
- # Replace any group references in the destination
313
- # (like \1, \2, etc.)
314
- redirected_uri = re.sub(
315
- pattern_str,
316
- destination_str,
317
- source_str,
318
- )
319
- return URIRef(redirected_uri)
320
-
321
- return source
322
-
323
-
324
268
  def extract_triples(algebra: CompValue) -> Iterable[tuple[Node, Node, Node]]:
325
269
  """Extract triples from a SPARQL query algebra instance."""
326
270
  if isinstance(algebra, CompValue):
327
271
  for key, value in algebra.items(): # noqa: WPS110
328
- if key == 'triples':
272
+ if key == "triples":
329
273
  yield from value
330
274
 
331
275
  else:
@@ -361,13 +305,13 @@ class NanopubQueryPlugin:
361
305
 
362
306
  FIXME: Can we cache this?
363
307
  """
364
- response = requests.post( # noqa: S113
365
- 'https://query.knowledgepixels.com/repo/full',
308
+ response = requests.post( # noqa: S113
309
+ "https://query.knowledgepixels.com/repo/full",
366
310
  data={
367
- 'query': 'CONSTRUCT WHERE { ?instance a <%s> }' % class_uri,
311
+ "query": "CONSTRUCT WHERE { ?instance a <%s> }" % class_uri,
368
312
  },
369
313
  headers={
370
- 'Accept': 'application/ld+json',
314
+ "Accept": "application/ld+json",
371
315
  },
372
316
  )
373
317
 
@@ -375,21 +319,21 @@ class NanopubQueryPlugin:
375
319
 
376
320
  self.graph.get_context(BNode()).parse(
377
321
  data=response.text,
378
- format='json-ld',
322
+ format="json-ld",
379
323
  )
380
324
 
381
325
  def _is_from_nanopubs(self, class_uri: URIRef) -> bool:
382
326
  if not isinstance(class_uri, URIRef):
383
- raise ValueError(f'Not a URIRef: {class_uri}')
327
+ raise ValueError(f"Not a URIRef: {class_uri}")
384
328
 
385
- return self.graph.query( # noqa: WPS462
329
+ return self.graph.query( # noqa: WPS462
386
330
  """
387
331
  ASK WHERE {
388
332
  ?_whatever <https://purl.org/nanopub/x/introduces> $class
389
333
  }
390
334
  """,
391
335
  initBindings={
392
- 'class': class_uri,
336
+ "class": class_uri,
393
337
  },
394
338
  ).askAnswer
395
339
 
@@ -419,7 +363,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
419
363
 
420
364
  self.graph._indices_loaded = True
421
365
 
422
- def query( # noqa: WPS211, WPS210, WPS231, C901
366
+ def query( # noqa: WPS211, WPS210, WPS231, WPS213, C901
423
367
  self,
424
368
  strOrQuery,
425
369
  initBindings=None,
@@ -436,7 +380,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
436
380
 
437
381
  initBindings = initBindings or {}
438
382
  initNs = initNs or {}
439
-
383
+
440
384
  if isinstance(strOrQuery, Query):
441
385
  query = strOrQuery
442
386
 
@@ -445,7 +389,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
445
389
  query = translateQuery(parse_tree, base, initNs)
446
390
 
447
391
  # Only extract nanopublications from SELECT/ASK queries, not CONSTRUCT
448
- if query.algebra.name != 'ConstructQuery':
392
+ if query.algebra.name != "ConstructQuery":
449
393
  self.load_retracting_nanopublications_by_query(
450
394
  query=query,
451
395
  bindings=initBindings,
@@ -461,21 +405,24 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
461
405
  )
462
406
 
463
407
  # Filter out inference graph names (they're not URLs to load)
464
- urls = {url for url in urls if not str(url).startswith('inference:')}
408
+ urls = {url for url in urls if not str(url).startswith("inference:")}
465
409
 
466
410
  for url in urls:
467
411
  try:
468
412
  self.load(url)
469
413
  except Exception as err:
470
- self.logger.exception(f'Failed to load {url}: {err}', url, err)
414
+ self.logger.exception(f"Failed to load {url}: {err}", url, err)
471
415
 
472
416
  # Run inference if there's new data since last inference run
473
417
  # (after URLs are loaded so inference can use the loaded data)
474
- if self.graph.last_not_inferred_source is not None:
475
- self.logger.debug(f'Running inference, last_not_inferred_source: {self.graph.last_not_inferred_source}')
418
+ if self.graph.last_not_inferred_source is not None: # noqa: WPS504
419
+ last_source = self.graph.last_not_inferred_source
420
+ self.logger.debug(
421
+ f"Running inference, last_not_inferred_source: {last_source}"
422
+ ) # noqa: WPS237
476
423
  self._run_inference()
477
424
  else:
478
- self.logger.debug('Skipping inference, last_not_inferred_source is None')
425
+ self.logger.debug("Skipping inference, last_not_inferred_source is None")
479
426
 
480
427
  NanopubQueryPlugin(graph=self.graph)(query, bindings=initBindings)
481
428
 
@@ -486,45 +433,51 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
486
433
  query_result = evalQuery(self.graph, query, initBindings, base)
487
434
 
488
435
  try:
489
- bindings = list(query_result['bindings'])
436
+ bindings = list(query_result["bindings"])
490
437
  except KeyError:
491
438
  # This was probably an ASK query
492
439
  return query_result
493
440
 
494
441
  for row in bindings:
495
442
  break
496
- for _, maybe_iri in row.items():
497
- if (
498
- isinstance(maybe_iri, URIRef)
499
- and isinstance(self.load(maybe_iri), Loaded)
443
+ for _, maybe_iri in row.items(): # noqa: WPS427
444
+ if isinstance(maybe_iri, URIRef) and isinstance(
445
+ self.load(maybe_iri), Loaded
500
446
  ):
501
- is_anything_loaded = True # noqa: WPS220
502
- self.logger.info( # noqa: WPS220
503
- 'Newly loaded: {uri}',
447
+ is_anything_loaded = True # noqa: WPS220
448
+ self.logger.info( # noqa: WPS220
449
+ "Newly loaded: {uri}",
504
450
  uri=maybe_iri,
505
451
  )
506
452
 
507
- query_result['bindings'] = bindings
453
+ query_result["bindings"] = bindings
508
454
  return query_result
509
455
 
510
456
  def _is_loaded(self, uri: URIRef) -> bool:
511
457
  """Find out if this URI in the graph already."""
512
- return funcy.first(
513
- self.graph.quads((
514
- uri,
515
- IOLANTA['last-loaded-time'],
516
- None,
517
- META,
518
- )),
519
- ) is not None
458
+ return (
459
+ funcy.first(
460
+ self.graph.quads(
461
+ (
462
+ uri,
463
+ IOLANTA["last-loaded-time"],
464
+ None,
465
+ META,
466
+ )
467
+ ),
468
+ )
469
+ is not None
470
+ )
520
471
 
521
472
  def _mark_as_loaded(self, uri: URIRef):
522
- self.graph.add((
523
- uri,
524
- IOLANTA['last-loaded-time'],
525
- Literal(datetime.datetime.now()),
526
- META,
527
- ))
473
+ self.graph.add(
474
+ (
475
+ uri,
476
+ IOLANTA["last-loaded-time"],
477
+ Literal(datetime.datetime.now()),
478
+ META,
479
+ )
480
+ )
528
481
 
529
482
  def _follow_is_visualized_with_links(self, uri: URIRef):
530
483
  """Follow `dcterms:isReferencedBy` links."""
@@ -533,7 +486,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
533
486
  if isinstance(visualization, URIRef):
534
487
  self.load(visualization)
535
488
 
536
- def load( # noqa: C901, WPS210, WPS212, WPS213, WPS231
489
+ def load( # noqa: C901, WPS210, WPS212, WPS213, WPS231
537
490
  self,
538
491
  source: URIRef,
539
492
  ) -> LoadResult:
@@ -545,15 +498,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
545
498
  # Blank nodes cannot be loaded from URLs
546
499
  if isinstance(source, BNode):
547
500
  return Skipped()
548
-
501
+
549
502
  # Also check if URIRef represents a blank node (can happen if BNode
550
503
  # was serialized to string and converted to URIRef)
551
- if isinstance(source, URIRef) and str(source).startswith('_:'):
552
- raise ValueError('This is actually a blank node but masked as a URIREF')
553
-
504
+ if isinstance(source, URIRef) and str(source).startswith("_:"):
505
+ raise ValueError("This is actually a blank node but masked as a URIREF")
506
+
554
507
  url = URL(source)
555
508
 
556
- if url.scheme in {'file', 'python', 'local', 'urn', 'doi'}:
509
+ if url.scheme in {"file", "python", "local", "urn", "doi"}:
557
510
  # FIXME temporary fix. `yaml-ld` doesn't read `context.*` files and
558
511
  # fails.
559
512
  return Skipped()
@@ -564,14 +517,14 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
564
517
  # TODO: It works differently for JSON-LD documents AFAIK. Need to
565
518
  # double check that.
566
519
  url = url.with_fragment(None)
567
- source = URIRef(str(f'{url}#'))
520
+ source = URIRef(str(f"{url}#"))
568
521
 
569
522
  self._follow_is_visualized_with_links(source)
570
523
 
571
524
  new_source = apply_redirect(source)
572
525
  if new_source != source:
573
526
  self.logger.info(
574
- 'Rewriting: {source} → {new_source}',
527
+ "Rewriting: {source} → {new_source}",
575
528
  source=source,
576
529
  new_source=new_source,
577
530
  )
@@ -584,58 +537,64 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
584
537
  # FIXME This is definitely inefficient. However, python-yaml-ld caches
585
538
  # the document, so the performance overhead is not super high.
586
539
  try:
587
- _resolved_source = yaml_ld.load_document(source)['documentUrl']
540
+ resolved_source = yaml_ld.load_document(source)["documentUrl"]
588
541
  except NotFound as not_found:
589
- self.logger.info(f'{not_found.path} | 404 Not Found')
542
+ self.logger.info(f"{not_found.path} | 404 Not Found")
590
543
  namespaces = [RDF, RDFS, OWL, FOAF, DC, VANN]
591
544
 
592
545
  for namespace in namespaces:
593
546
  if not_found.path.startswith(str(namespace)):
594
547
  self.load(URIRef(namespace))
595
548
  self.logger.info(
596
- 'Redirecting %s → namespace %s',
549
+ "Redirecting %s → namespace %s",
597
550
  not_found.path,
598
551
  namespace,
599
552
  )
600
553
  return Loaded()
601
554
 
602
555
  self.logger.info(
603
- '{path} | Cannot find a matching namespace',
556
+ "{path} | Cannot find a matching namespace",
604
557
  path=not_found.path,
605
558
  )
606
559
 
607
- self.graph.add((
608
- source_uri,
609
- RDF.type,
610
- IOLANTA['not-found'],
611
- source_uri,
612
- ))
560
+ self.graph.add(
561
+ (
562
+ source_uri,
563
+ RDF.type,
564
+ IOLANTA["not-found"],
565
+ source_uri,
566
+ )
567
+ )
613
568
 
614
569
  self._mark_as_loaded(source_uri)
615
570
 
616
571
  return Loaded()
617
572
 
618
573
  except Exception as err:
619
- self.logger.info(f'{source} | Failed: {err}')
620
- self.graph.add((
621
- URIRef(source),
622
- RDF.type,
623
- IOLANTA['failed'],
624
- source_uri,
625
- ))
574
+ self.logger.info(f"{source} | Failed: {err}")
575
+ self.graph.add(
576
+ (
577
+ URIRef(source),
578
+ RDF.type,
579
+ IOLANTA["failed"],
580
+ source_uri,
581
+ )
582
+ )
626
583
  self._mark_as_loaded(source_uri)
627
584
 
628
585
  return Loaded()
629
586
 
630
- if _resolved_source:
631
- _resolved_source_uri_ref = URIRef(_resolved_source)
632
- if _resolved_source_uri_ref != URIRef(source):
633
- self.graph.add((
634
- source_uri,
635
- IOLANTA['redirects-to'],
636
- _resolved_source_uri_ref,
637
- ))
638
- source = _resolved_source
587
+ if resolved_source:
588
+ resolved_source_uri_ref = URIRef(resolved_source)
589
+ if resolved_source_uri_ref != URIRef(source):
590
+ self.graph.add(
591
+ (
592
+ source_uri,
593
+ IOLANTA["redirects-to"],
594
+ resolved_source_uri_ref,
595
+ )
596
+ )
597
+ source = resolved_source
639
598
 
640
599
  self._mark_as_loaded(source_uri)
641
600
 
@@ -643,19 +602,19 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
643
602
  ld_rdf = yaml_ld.to_rdf(source)
644
603
  except ConnectionError as name_resolution_error:
645
604
  self.logger.info(
646
- '%s | name resolution error: %s',
605
+ "%s | name resolution error: %s",
647
606
  source,
648
607
  str(name_resolution_error),
649
608
  )
650
609
  return Loaded()
651
610
  except ParserNotFound as parser_not_found:
652
- self.logger.info(f'{source} | {parser_not_found}')
611
+ self.logger.info(f"{source} | {parser_not_found}")
653
612
  return Loaded()
654
613
  except YAMLLDError as yaml_ld_error:
655
- self.logger.error(f'{source} | {yaml_ld_error}')
614
+ self.logger.error(f"{source} | {yaml_ld_error}")
656
615
  return Loaded()
657
616
  except HTTPError as http_error:
658
- self.logger.warning(f'{source} | HTTP error: {http_error}')
617
+ self.logger.warning(f"{source} | HTTP error: {http_error}")
659
618
  return Loaded()
660
619
 
661
620
  try:
@@ -674,18 +633,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
674
633
  )
675
634
 
676
635
  if not quads:
677
- self.logger.info('{source} | No data found', source=source)
636
+ self.logger.info("{source} | No data found", source=source)
678
637
  return Loaded()
679
638
 
680
639
  self.graph.addN(quads)
681
640
  self.graph.last_not_inferred_source = source
682
641
 
683
- into_graphs = ', '.join({
684
- quad.graph
685
- for quad in quads
686
- })
642
+ into_graphs = ", ".join({quad.graph for quad in quads})
687
643
  self.logger.info(
688
- f'{source} | loaded {len(quads)} triples into graphs: {into_graphs}',
644
+ f"{source} | loaded {len(quads)} triples into graphs: {into_graphs}",
689
645
  )
690
646
 
691
647
  return Loaded()
@@ -700,7 +656,75 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
700
656
 
701
657
  return term
702
658
 
703
- def _run_inference(self): # noqa: WPS231
659
+ def _run_inference_from_directory( # noqa: WPS231, WPS220, WPS210
660
+ self,
661
+ inference_dir: Path,
662
+ graph_prefix: str = "inference",
663
+ ):
664
+ """
665
+ Run inference queries from a given inference directory.
666
+
667
+ For each SPARQL file in the inference directory:
668
+ 1. Truncate the named graph `local:{graph_prefix}-{filename}`
669
+ 2. Execute the CONSTRUCT query
670
+ 3. Insert the resulting triples into that graph
671
+
672
+ Args:
673
+ inference_dir: Directory containing inference SPARQL files
674
+ graph_prefix: Prefix for inference graph names
675
+ return_count: Whether to return the count of inferred triples
676
+
677
+ Returns the total number of triples inferred.
678
+ """
679
+ if not inference_dir.exists():
680
+ return 0
681
+
682
+ total_inferred = 0
683
+ for inference_file in inference_dir.glob("*.sparql"):
684
+ filename = inference_file.stem # filename without .sparql extension
685
+ inference_graph = URIRef(f"{graph_prefix}:{filename}")
686
+
687
+ # Truncate the inference graph
688
+ context = self.graph.get_context(inference_graph)
689
+ context.remove((None, None, None))
690
+
691
+ # Read and execute the CONSTRUCT query
692
+ query_text = inference_file.read_text()
693
+ query_result = self.graph.query(query_text) # noqa: WPS110
694
+
695
+ # CONSTRUCT queries return a SPARQLResult with a graph attribute
696
+ result_graph = (
697
+ query_result.get("graph")
698
+ if isinstance(query_result, dict)
699
+ else query_result.graph
700
+ )
701
+ self.logger.debug(
702
+ f"Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}"
703
+ )
704
+ if result_graph is not None: # noqa: WPS504
705
+ inferred_quads = [
706
+ (s, p, o, inference_graph) # noqa: WPS111
707
+ for s, p, o in result_graph # noqa: WPS111
708
+ ]
709
+ self.logger.debug(
710
+ f"Inference {filename}: generated {len(inferred_quads)} quads"
711
+ )
712
+
713
+ if inferred_quads:
714
+ self.graph.addN(inferred_quads) # noqa: WPS220
715
+ inferred_count = len(inferred_quads)
716
+ total_inferred += inferred_count
717
+ self.logger.info( # noqa: WPS220
718
+ "Inference {filename}: added {count} triples",
719
+ filename=filename,
720
+ count=inferred_count,
721
+ )
722
+ else:
723
+ self.logger.debug(f"Inference {filename}: result_graph is None")
724
+
725
+ return total_inferred
726
+
727
+ def _run_inference(self): # noqa: WPS231, WPS220, WPS210
704
728
  """
705
729
  Run inference queries from the inference directory.
706
730
 
@@ -710,42 +734,13 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
710
734
  3. Insert the resulting triples into that graph
711
735
  """
712
736
  with self.inference_lock:
713
- for inference_file in INFERENCE_DIR.glob('*.sparql'):
714
- filename = inference_file.stem # filename without .sparql extension
715
- inference_graph = URIRef(f'inference:{filename}')
716
-
717
- # Truncate the inference graph
718
- context = self.graph.get_context(inference_graph)
719
- context.remove((None, None, None))
720
-
721
- # Read and execute the CONSTRUCT query
722
- query_text = inference_file.read_text()
723
- result = self.graph.query(query_text)
724
-
725
- # CONSTRUCT queries return a SPARQLResult with a graph attribute
726
- result_graph = result.get('graph') if isinstance(result, dict) else result.graph
727
- self.logger.debug(f'Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}')
728
- if result_graph is not None:
729
- inferred_quads = [
730
- (s, p, o, inference_graph)
731
- for s, p, o in result_graph
732
- ]
733
- self.logger.debug(f'Inference {filename}: generated {len(inferred_quads)} quads')
734
-
735
- if inferred_quads:
736
- self.graph.addN(inferred_quads)
737
- self.logger.info(
738
- 'Inference {filename}: added {count} triples',
739
- filename=filename,
740
- count=len(inferred_quads),
741
- )
742
- else:
743
- self.logger.debug(f'Inference {filename}: result_graph is None')
744
-
737
+ # Run global inference (deprecated, will be removed later)
738
+ self._run_inference_from_directory(INFERENCE_DIR, graph_prefix="inference")
739
+
745
740
  # Clear the flag after running inference
746
741
  self.graph.last_not_inferred_source = None
747
742
 
748
- def load_retracting_nanopublications_by_query( # noqa: WPS231
743
+ def load_retracting_nanopublications_by_query( # noqa: WPS231
749
744
  self,
750
745
  query: Query,
751
746
  bindings: dict[str, Node],