iolanta 2.1.11__py3-none-any.whl → 2.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,8 @@ from pathlib import Path
5
5
  from threading import Lock
6
6
  from typing import Any, Iterable, Mapping
7
7
 
8
- import diskcache
9
8
  import funcy
10
9
  import loguru
11
- import platformdirs
12
10
  import requests
13
11
  import yaml_ld
14
12
  from nanopub import NanopubClient
@@ -44,31 +42,24 @@ from iolanta.sparqlspace.redirects import apply_redirect
44
42
  REASONING_ENABLED = True
45
43
  OWL_REASONING_ENABLED = False
46
44
 
47
- INFERENCE_DIR = Path(__file__).parent / 'inference'
45
+ INFERENCE_DIR = Path(__file__).parent / "inference"
48
46
  INDICES = [ # noqa: WPS407
49
- URIRef('https://iolanta.tech/visualizations/index.yaml'),
47
+ URIRef("https://iolanta.tech/visualizations/index.yaml"),
50
48
  ]
51
49
 
52
50
 
53
- @diskcache.Cache(
54
- directory=str(
55
- platformdirs.user_cache_path(
56
- appname='iolanta',
57
- ) / 'find_retractions_for',
58
- ),
59
- ).memoize(expire=datetime.timedelta(days=8).total_seconds())
60
51
  def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
61
52
  """Find nanopublications that retract the given one."""
62
53
  # See https://github.com/fair-workflows/nanopub/issues/168 for
63
54
  # context of this dirty hack.
64
- use_server = 'http://grlc.nanopubs.lod.labs.vu.nl/api/local/local/'
55
+ use_server = "http://grlc.nanopubs.lod.labs.vu.nl/api/local/local/"
65
56
 
66
57
  client = NanopubClient(use_server=use_server)
67
58
  client.grlc_urls = [use_server]
68
59
 
69
60
  http_url = str(nanopublication).replace(
70
- 'https://',
71
- 'http://',
61
+ "https://",
62
+ "http://",
72
63
  )
73
64
 
74
65
  try:
@@ -79,60 +70,72 @@ def find_retractions_for(nanopublication: URIRef) -> set[URIRef]:
79
70
  return {URIRef(retraction) for retraction in retractions}
80
71
 
81
72
 
82
- def _extract_from_mapping( # noqa: WPS213
73
+ def _extract_from_mapping( # noqa: WPS213, WPS231
83
74
  algebra: Mapping[str, Any],
84
75
  ) -> Iterable[URIRef | Variable]:
85
76
  match algebra.name: # noqa: WPS242
86
- case 'SelectQuery' | 'AskQuery' | 'Project' | 'Distinct' | 'Slice':
87
- yield from extract_mentioned_urls(algebra['p']) # noqa: WPS226
77
+ case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Slice":
78
+ yield from extract_mentioned_urls(algebra["p"]) # noqa: WPS226
88
79
 
89
- case 'BGP':
90
- yield from [ # noqa: WPS353, WPS221
80
+ case "BGP":
81
+ yield from [ # noqa: WPS353, WPS221
91
82
  term
92
- for triple in algebra['triples']
83
+ for triple in algebra["triples"]
93
84
  for term in triple
94
85
  if isinstance(term, (URIRef, Variable))
95
86
  ]
96
87
 
97
- case 'Filter' | 'UnaryNot' | 'OrderCondition':
98
- yield from extract_mentioned_urls(algebra['expr']) # noqa: WPS204, WPS226
88
+ case "Filter" | "UnaryNot" | "OrderCondition":
89
+ yield from extract_mentioned_urls(algebra["expr"]) # noqa: WPS204, WPS226
99
90
 
100
- case built_in if built_in.startswith('Builtin_'):
101
- yield from extract_mentioned_urls(algebra['arg'])
91
+ case "Builtin_EXISTS":
92
+ # Builtin_EXISTS uses 'graph' instead of 'arg'
93
+ yield from extract_mentioned_urls(algebra["graph"])
102
94
 
103
- case 'RelationalExpression':
104
- yield from extract_mentioned_urls(algebra['expr'])
105
- yield from extract_mentioned_urls(algebra['other'])
95
+ case built_in if built_in.startswith("Builtin_"):
96
+ # Some built-ins may not have an 'arg' key
97
+ arg_value = algebra.get("arg")
98
+ if arg_value is not None:
99
+ yield from extract_mentioned_urls(arg_value)
106
100
 
107
- case 'LeftJoin':
108
- yield from extract_mentioned_urls(algebra['p1'])
109
- yield from extract_mentioned_urls(algebra['p2'])
110
- yield from extract_mentioned_urls(algebra['expr'])
101
+ case "RelationalExpression":
102
+ yield from extract_mentioned_urls(algebra["expr"])
103
+ yield from extract_mentioned_urls(algebra["other"])
111
104
 
112
- case 'Join':
113
- yield from extract_mentioned_urls(algebra['p1'])
114
- yield from extract_mentioned_urls(algebra['p2'])
105
+ case "LeftJoin":
106
+ yield from extract_mentioned_urls(algebra["p1"])
107
+ yield from extract_mentioned_urls(algebra["p2"])
108
+ yield from extract_mentioned_urls(algebra["expr"])
115
109
 
116
- case 'ConditionalOrExpression' | 'ConditionalAndExpression':
117
- yield from extract_mentioned_urls(algebra['expr'])
118
- yield from extract_mentioned_urls(algebra['other'])
110
+ case "Join" | "Union":
111
+ yield from extract_mentioned_urls(algebra["p1"])
112
+ yield from extract_mentioned_urls(algebra["p2"])
119
113
 
120
- case 'OrderBy':
121
- yield from extract_mentioned_urls(algebra['p'])
122
- yield from extract_mentioned_urls(algebra['expr'])
114
+ case "Extend":
115
+ # Extend is used for BIND expressions - process pattern and expression
116
+ yield from extract_mentioned_urls(algebra["p"])
117
+ yield from extract_mentioned_urls(algebra["expr"])
123
118
 
124
- case 'TrueFilter':
119
+ case "ConditionalOrExpression" | "ConditionalAndExpression":
120
+ yield from extract_mentioned_urls(algebra["expr"])
121
+ yield from extract_mentioned_urls(algebra["other"])
122
+
123
+ case "OrderBy":
124
+ yield from extract_mentioned_urls(algebra["p"])
125
+ yield from extract_mentioned_urls(algebra["expr"])
126
+
127
+ case "TrueFilter":
125
128
  return
126
129
 
127
- case 'Graph':
128
- yield from extract_mentioned_urls(algebra['p'])
129
- yield from extract_mentioned_urls(algebra['term'])
130
+ case "Graph":
131
+ yield from extract_mentioned_urls(algebra["p"])
132
+ yield from extract_mentioned_urls(algebra["term"])
130
133
 
131
134
  case unknown_name:
132
- formatted_keys = ', '.join(algebra.keys())
135
+ formatted_keys = ", ".join(algebra.keys())
133
136
  loguru.logger.info(
134
- 'Unknown SPARQL expression '
135
- f'{unknown_name}({formatted_keys}): {algebra}',
137
+ "Unknown SPARQL expression "
138
+ f"{unknown_name}({formatted_keys}): {algebra}",
136
139
  )
137
140
  return
138
141
 
@@ -158,7 +161,7 @@ def extract_mentioned_urls(
158
161
  case unknown_algebra:
159
162
  algebra_type = type(unknown_algebra)
160
163
  raise ValueError(
161
- f'Algebra of unknown type {algebra_type}: {unknown_algebra}',
164
+ f"Algebra of unknown type {algebra_type}: {unknown_algebra}",
162
165
  )
163
166
 
164
167
 
@@ -176,7 +179,7 @@ def normalize_term(term: Node) -> Node:
176
179
  """
177
180
  if isinstance(term, URIRef):
178
181
  return apply_redirect(term)
179
-
182
+
180
183
  return term
181
184
 
182
185
 
@@ -192,10 +195,7 @@ def resolve_variables(
192
195
 
193
196
  case Variable() as query_variable:
194
197
  variable_value = bindings.get(str(query_variable))
195
- if (
196
- variable_value is not None
197
- and isinstance(variable_value, URIRef)
198
- ):
198
+ if variable_value is not None and isinstance(variable_value, URIRef):
199
199
  yield variable_value
200
200
 
201
201
 
@@ -231,34 +231,37 @@ def _extract_nanopublication_uris( # noqa: WPS231
231
231
  algebra: CompValue,
232
232
  ) -> Iterable[URIRef]:
233
233
  """Extract nanopublications to get retracting information for."""
234
- match algebra.name:
235
- case 'SelectQuery' | 'AskQuery' | 'Project' | 'Distinct' | 'Graph':
236
- yield from _extract_nanopublication_uris(algebra['p'])
237
- case 'ConstructQuery':
234
+ match algebra.name: # noqa: WPS242
235
+ case "SelectQuery" | "AskQuery" | "Project" | "Distinct" | "Graph":
236
+ yield from _extract_nanopublication_uris(algebra["p"])
237
+ case "ConstructQuery":
238
238
  # CONSTRUCT queries don't have nanopublication URIs in bindings
239
239
  return
240
240
 
241
- case 'Slice':
242
- yield from _extract_nanopublication_uris(algebra['p'])
241
+ case "Slice":
242
+ yield from _extract_nanopublication_uris(algebra["p"])
243
243
 
244
- case 'BGP':
245
- for retractor, retracts, retractee in algebra['triples']:
244
+ case "BGP":
245
+ for retractor, retracts, retractee in algebra["triples"]:
246
246
  if retracts == URIRef(
247
- 'https://purl.org/nanopub/x/retracts',
247
+ "https://purl.org/nanopub/x/retracts",
248
248
  ) and isinstance(retractor, Variable):
249
249
  yield retractee
250
250
 
251
- case 'LeftJoin' | 'Join':
252
- yield from _extract_nanopublication_uris(algebra['p1'])
253
- yield from _extract_nanopublication_uris(algebra['p2'])
251
+ case "LeftJoin" | "Join" | "Union":
252
+ yield from _extract_nanopublication_uris(algebra["p1"])
253
+ yield from _extract_nanopublication_uris(algebra["p2"])
254
+
255
+ case "Extend":
256
+ # Extend is used for BIND expressions - process the pattern recursively
257
+ yield from _extract_nanopublication_uris(algebra["p"])
254
258
 
255
- case 'Filter' | 'OrderBy':
259
+ case "Filter" | "OrderBy":
256
260
  return
257
261
 
258
262
  case unknown_name:
259
263
  raise ValueError(
260
- f'Unknown algebra name: {unknown_name}, '
261
- f'content: {algebra}',
264
+ f"Unknown algebra name: {unknown_name}, content: {algebra}",
262
265
  )
263
266
 
264
267
 
@@ -266,7 +269,7 @@ def extract_triples(algebra: CompValue) -> Iterable[tuple[Node, Node, Node]]:
266
269
  """Extract triples from a SPARQL query algebra instance."""
267
270
  if isinstance(algebra, CompValue):
268
271
  for key, value in algebra.items(): # noqa: WPS110
269
- if key == 'triples':
272
+ if key == "triples":
270
273
  yield from value
271
274
 
272
275
  else:
@@ -302,13 +305,13 @@ class NanopubQueryPlugin:
302
305
 
303
306
  FIXME: Can we cache this?
304
307
  """
305
- response = requests.post( # noqa: S113
306
- 'https://query.knowledgepixels.com/repo/full',
308
+ response = requests.post( # noqa: S113
309
+ "https://query.knowledgepixels.com/repo/full",
307
310
  data={
308
- 'query': 'CONSTRUCT WHERE { ?instance a <%s> }' % class_uri,
311
+ "query": "CONSTRUCT WHERE { ?instance a <%s> }" % class_uri,
309
312
  },
310
313
  headers={
311
- 'Accept': 'application/ld+json',
314
+ "Accept": "application/ld+json",
312
315
  },
313
316
  )
314
317
 
@@ -316,21 +319,21 @@ class NanopubQueryPlugin:
316
319
 
317
320
  self.graph.get_context(BNode()).parse(
318
321
  data=response.text,
319
- format='json-ld',
322
+ format="json-ld",
320
323
  )
321
324
 
322
325
  def _is_from_nanopubs(self, class_uri: URIRef) -> bool:
323
326
  if not isinstance(class_uri, URIRef):
324
- raise ValueError(f'Not a URIRef: {class_uri}')
327
+ raise ValueError(f"Not a URIRef: {class_uri}")
325
328
 
326
- return self.graph.query( # noqa: WPS462
329
+ return self.graph.query( # noqa: WPS462
327
330
  """
328
331
  ASK WHERE {
329
332
  ?_whatever <https://purl.org/nanopub/x/introduces> $class
330
333
  }
331
334
  """,
332
335
  initBindings={
333
- 'class': class_uri,
336
+ "class": class_uri,
334
337
  },
335
338
  ).askAnswer
336
339
 
@@ -360,7 +363,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
360
363
 
361
364
  self.graph._indices_loaded = True
362
365
 
363
- def query( # noqa: WPS211, WPS210, WPS231, WPS213, C901
366
+ def query( # noqa: WPS211, WPS210, WPS231, WPS213, C901
364
367
  self,
365
368
  strOrQuery,
366
369
  initBindings=None,
@@ -377,7 +380,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
377
380
 
378
381
  initBindings = initBindings or {}
379
382
  initNs = initNs or {}
380
-
383
+
381
384
  if isinstance(strOrQuery, Query):
382
385
  query = strOrQuery
383
386
 
@@ -386,7 +389,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
386
389
  query = translateQuery(parse_tree, base, initNs)
387
390
 
388
391
  # Only extract nanopublications from SELECT/ASK queries, not CONSTRUCT
389
- if query.algebra.name != 'ConstructQuery':
392
+ if query.algebra.name != "ConstructQuery":
390
393
  self.load_retracting_nanopublications_by_query(
391
394
  query=query,
392
395
  bindings=initBindings,
@@ -402,22 +405,24 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
402
405
  )
403
406
 
404
407
  # Filter out inference graph names (they're not URLs to load)
405
- urls = {url for url in urls if not str(url).startswith('inference:')}
408
+ urls = {url for url in urls if not str(url).startswith("inference:")}
406
409
 
407
410
  for url in urls:
408
411
  try:
409
412
  self.load(url)
410
413
  except Exception as err:
411
- self.logger.exception(f'Failed to load {url}: {err}', url, err)
414
+ self.logger.exception(f"Failed to load {url}: {err}", url, err)
412
415
 
413
416
  # Run inference if there's new data since last inference run
414
417
  # (after URLs are loaded so inference can use the loaded data)
415
418
  if self.graph.last_not_inferred_source is not None: # noqa: WPS504
416
419
  last_source = self.graph.last_not_inferred_source
417
- self.logger.debug(f'Running inference, last_not_inferred_source: {last_source}') # noqa: WPS237
420
+ self.logger.debug(
421
+ f"Running inference, last_not_inferred_source: {last_source}"
422
+ ) # noqa: WPS237
418
423
  self._run_inference()
419
424
  else:
420
- self.logger.debug('Skipping inference, last_not_inferred_source is None')
425
+ self.logger.debug("Skipping inference, last_not_inferred_source is None")
421
426
 
422
427
  NanopubQueryPlugin(graph=self.graph)(query, bindings=initBindings)
423
428
 
@@ -428,7 +433,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
428
433
  query_result = evalQuery(self.graph, query, initBindings, base)
429
434
 
430
435
  try:
431
- bindings = list(query_result['bindings'])
436
+ bindings = list(query_result["bindings"])
432
437
  except KeyError:
433
438
  # This was probably an ASK query
434
439
  return query_result
@@ -436,37 +441,43 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
436
441
  for row in bindings:
437
442
  break
438
443
  for _, maybe_iri in row.items(): # noqa: WPS427
439
- if (
440
- isinstance(maybe_iri, URIRef)
441
- and isinstance(self.load(maybe_iri), Loaded)
444
+ if isinstance(maybe_iri, URIRef) and isinstance(
445
+ self.load(maybe_iri), Loaded
442
446
  ):
443
- is_anything_loaded = True # noqa: WPS220
444
- self.logger.info( # noqa: WPS220
445
- 'Newly loaded: {uri}',
447
+ is_anything_loaded = True # noqa: WPS220
448
+ self.logger.info( # noqa: WPS220
449
+ "Newly loaded: {uri}",
446
450
  uri=maybe_iri,
447
451
  )
448
452
 
449
- query_result['bindings'] = bindings
453
+ query_result["bindings"] = bindings
450
454
  return query_result
451
455
 
452
456
  def _is_loaded(self, uri: URIRef) -> bool:
453
457
  """Find out if this URI in the graph already."""
454
- return funcy.first(
455
- self.graph.quads((
456
- uri,
457
- IOLANTA['last-loaded-time'],
458
- None,
459
- META,
460
- )),
461
- ) is not None
458
+ return (
459
+ funcy.first(
460
+ self.graph.quads(
461
+ (
462
+ uri,
463
+ IOLANTA["last-loaded-time"],
464
+ None,
465
+ META,
466
+ )
467
+ ),
468
+ )
469
+ is not None
470
+ )
462
471
 
463
472
  def _mark_as_loaded(self, uri: URIRef):
464
- self.graph.add((
465
- uri,
466
- IOLANTA['last-loaded-time'],
467
- Literal(datetime.datetime.now()),
468
- META,
469
- ))
473
+ self.graph.add(
474
+ (
475
+ uri,
476
+ IOLANTA["last-loaded-time"],
477
+ Literal(datetime.datetime.now()),
478
+ META,
479
+ )
480
+ )
470
481
 
471
482
  def _follow_is_visualized_with_links(self, uri: URIRef):
472
483
  """Follow `dcterms:isReferencedBy` links."""
@@ -475,7 +486,7 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
475
486
  if isinstance(visualization, URIRef):
476
487
  self.load(visualization)
477
488
 
478
- def load( # noqa: C901, WPS210, WPS212, WPS213, WPS231
489
+ def load( # noqa: C901, WPS210, WPS212, WPS213, WPS231
479
490
  self,
480
491
  source: URIRef,
481
492
  ) -> LoadResult:
@@ -487,15 +498,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
487
498
  # Blank nodes cannot be loaded from URLs
488
499
  if isinstance(source, BNode):
489
500
  return Skipped()
490
-
501
+
491
502
  # Also check if URIRef represents a blank node (can happen if BNode
492
503
  # was serialized to string and converted to URIRef)
493
- if isinstance(source, URIRef) and str(source).startswith('_:'):
494
- raise ValueError('This is actually a blank node but masked as a URIREF')
495
-
504
+ if isinstance(source, URIRef) and str(source).startswith("_:"):
505
+ raise ValueError("This is actually a blank node but masked as a URIREF")
506
+
496
507
  url = URL(source)
497
508
 
498
- if url.scheme in {'file', 'python', 'local', 'urn', 'doi'}:
509
+ if url.scheme in {"file", "python", "local", "urn", "doi"}:
499
510
  # FIXME temporary fix. `yaml-ld` doesn't read `context.*` files and
500
511
  # fails.
501
512
  return Skipped()
@@ -506,14 +517,14 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
506
517
  # TODO: It works differently for JSON-LD documents AFAIK. Need to
507
518
  # double check that.
508
519
  url = url.with_fragment(None)
509
- source = URIRef(str(f'{url}#'))
520
+ source = URIRef(str(f"{url}#"))
510
521
 
511
522
  self._follow_is_visualized_with_links(source)
512
523
 
513
524
  new_source = apply_redirect(source)
514
525
  if new_source != source:
515
526
  self.logger.info(
516
- 'Rewriting: {source} → {new_source}',
527
+ "Rewriting: {source} → {new_source}",
517
528
  source=source,
518
529
  new_source=new_source,
519
530
  )
@@ -526,45 +537,49 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
526
537
  # FIXME This is definitely inefficient. However, python-yaml-ld caches
527
538
  # the document, so the performance overhead is not super high.
528
539
  try:
529
- resolved_source = yaml_ld.load_document(source)['documentUrl']
540
+ resolved_source = yaml_ld.load_document(source)["documentUrl"]
530
541
  except NotFound as not_found:
531
- self.logger.info(f'{not_found.path} | 404 Not Found')
542
+ self.logger.info(f"{not_found.path} | 404 Not Found")
532
543
  namespaces = [RDF, RDFS, OWL, FOAF, DC, VANN]
533
544
 
534
545
  for namespace in namespaces:
535
546
  if not_found.path.startswith(str(namespace)):
536
547
  self.load(URIRef(namespace))
537
548
  self.logger.info(
538
- 'Redirecting %s → namespace %s',
549
+ "Redirecting %s → namespace %s",
539
550
  not_found.path,
540
551
  namespace,
541
552
  )
542
553
  return Loaded()
543
554
 
544
555
  self.logger.info(
545
- '{path} | Cannot find a matching namespace',
556
+ "{path} | Cannot find a matching namespace",
546
557
  path=not_found.path,
547
558
  )
548
559
 
549
- self.graph.add((
550
- source_uri,
551
- RDF.type,
552
- IOLANTA['not-found'],
553
- source_uri,
554
- ))
560
+ self.graph.add(
561
+ (
562
+ source_uri,
563
+ RDF.type,
564
+ IOLANTA["not-found"],
565
+ source_uri,
566
+ )
567
+ )
555
568
 
556
569
  self._mark_as_loaded(source_uri)
557
570
 
558
571
  return Loaded()
559
572
 
560
573
  except Exception as err:
561
- self.logger.info(f'{source} | Failed: {err}')
562
- self.graph.add((
563
- URIRef(source),
564
- RDF.type,
565
- IOLANTA['failed'],
566
- source_uri,
567
- ))
574
+ self.logger.info(f"{source} | Failed: {err}")
575
+ self.graph.add(
576
+ (
577
+ URIRef(source),
578
+ RDF.type,
579
+ IOLANTA["failed"],
580
+ source_uri,
581
+ )
582
+ )
568
583
  self._mark_as_loaded(source_uri)
569
584
 
570
585
  return Loaded()
@@ -572,11 +587,13 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
572
587
  if resolved_source:
573
588
  resolved_source_uri_ref = URIRef(resolved_source)
574
589
  if resolved_source_uri_ref != URIRef(source):
575
- self.graph.add((
576
- source_uri,
577
- IOLANTA['redirects-to'],
578
- resolved_source_uri_ref,
579
- ))
590
+ self.graph.add(
591
+ (
592
+ source_uri,
593
+ IOLANTA["redirects-to"],
594
+ resolved_source_uri_ref,
595
+ )
596
+ )
580
597
  source = resolved_source
581
598
 
582
599
  self._mark_as_loaded(source_uri)
@@ -585,19 +602,19 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
585
602
  ld_rdf = yaml_ld.to_rdf(source)
586
603
  except ConnectionError as name_resolution_error:
587
604
  self.logger.info(
588
- '%s | name resolution error: %s',
605
+ "%s | name resolution error: %s",
589
606
  source,
590
607
  str(name_resolution_error),
591
608
  )
592
609
  return Loaded()
593
610
  except ParserNotFound as parser_not_found:
594
- self.logger.info(f'{source} | {parser_not_found}')
611
+ self.logger.info(f"{source} | {parser_not_found}")
595
612
  return Loaded()
596
613
  except YAMLLDError as yaml_ld_error:
597
- self.logger.error(f'{source} | {yaml_ld_error}')
614
+ self.logger.error(f"{source} | {yaml_ld_error}")
598
615
  return Loaded()
599
616
  except HTTPError as http_error:
600
- self.logger.warning(f'{source} | HTTP error: {http_error}')
617
+ self.logger.warning(f"{source} | HTTP error: {http_error}")
601
618
  return Loaded()
602
619
 
603
620
  try:
@@ -616,18 +633,15 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
616
633
  )
617
634
 
618
635
  if not quads:
619
- self.logger.info('{source} | No data found', source=source)
636
+ self.logger.info("{source} | No data found", source=source)
620
637
  return Loaded()
621
638
 
622
639
  self.graph.addN(quads)
623
640
  self.graph.last_not_inferred_source = source
624
641
 
625
- into_graphs = ', '.join({
626
- quad.graph
627
- for quad in quads
628
- })
642
+ into_graphs = ", ".join({quad.graph for quad in quads})
629
643
  self.logger.info(
630
- f'{source} | loaded {len(quads)} triples into graphs: {into_graphs}',
644
+ f"{source} | loaded {len(quads)} triples into graphs: {into_graphs}",
631
645
  )
632
646
 
633
647
  return Loaded()
@@ -642,6 +656,74 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
642
656
 
643
657
  return term
644
658
 
659
+ def _run_inference_from_directory( # noqa: WPS231, WPS220, WPS210
660
+ self,
661
+ inference_dir: Path,
662
+ graph_prefix: str = "inference",
663
+ ):
664
+ """
665
+ Run inference queries from a given inference directory.
666
+
667
+ For each SPARQL file in the inference directory:
668
+ 1. Truncate the named graph `local:{graph_prefix}-{filename}`
669
+ 2. Execute the CONSTRUCT query
670
+ 3. Insert the resulting triples into that graph
671
+
672
+ Args:
673
+ inference_dir: Directory containing inference SPARQL files
674
+ graph_prefix: Prefix for inference graph names
675
+ return_count: Whether to return the count of inferred triples
676
+
677
+ Returns the total number of triples inferred.
678
+ """
679
+ if not inference_dir.exists():
680
+ return 0
681
+
682
+ total_inferred = 0
683
+ for inference_file in inference_dir.glob("*.sparql"):
684
+ filename = inference_file.stem # filename without .sparql extension
685
+ inference_graph = URIRef(f"{graph_prefix}:{filename}")
686
+
687
+ # Truncate the inference graph
688
+ context = self.graph.get_context(inference_graph)
689
+ context.remove((None, None, None))
690
+
691
+ # Read and execute the CONSTRUCT query
692
+ query_text = inference_file.read_text()
693
+ query_result = self.graph.query(query_text) # noqa: WPS110
694
+
695
+ # CONSTRUCT queries return a SPARQLResult with a graph attribute
696
+ result_graph = (
697
+ query_result.get("graph")
698
+ if isinstance(query_result, dict)
699
+ else query_result.graph
700
+ )
701
+ self.logger.debug(
702
+ f"Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}"
703
+ )
704
+ if result_graph is not None: # noqa: WPS504
705
+ inferred_quads = [
706
+ (s, p, o, inference_graph) # noqa: WPS111
707
+ for s, p, o in result_graph # noqa: WPS111
708
+ ]
709
+ self.logger.debug(
710
+ f"Inference {filename}: generated {len(inferred_quads)} quads"
711
+ )
712
+
713
+ if inferred_quads:
714
+ self.graph.addN(inferred_quads) # noqa: WPS220
715
+ inferred_count = len(inferred_quads)
716
+ total_inferred += inferred_count
717
+ self.logger.info( # noqa: WPS220
718
+ "Inference {filename}: added {count} triples",
719
+ filename=filename,
720
+ count=inferred_count,
721
+ )
722
+ else:
723
+ self.logger.debug(f"Inference {filename}: result_graph is None")
724
+
725
+ return total_inferred
726
+
645
727
  def _run_inference(self): # noqa: WPS231, WPS220, WPS210
646
728
  """
647
729
  Run inference queries from the inference directory.
@@ -652,42 +734,13 @@ class GlobalSPARQLProcessor(Processor): # noqa: WPS338, WPS214
652
734
  3. Insert the resulting triples into that graph
653
735
  """
654
736
  with self.inference_lock:
655
- for inference_file in INFERENCE_DIR.glob('*.sparql'):
656
- filename = inference_file.stem # filename without .sparql extension
657
- inference_graph = URIRef(f'inference:{filename}')
658
-
659
- # Truncate the inference graph
660
- context = self.graph.get_context(inference_graph)
661
- context.remove((None, None, None))
662
-
663
- # Read and execute the CONSTRUCT query
664
- query_text = inference_file.read_text()
665
- query_result = self.graph.query(query_text) # noqa: WPS110
666
-
667
- # CONSTRUCT queries return a SPARQLResult with a graph attribute
668
- result_graph = query_result.get('graph') if isinstance(query_result, dict) else query_result.graph
669
- self.logger.debug(f'Inference {filename}: result_graph is {result_graph}, type: {type(result_graph)}')
670
- if result_graph is not None: # noqa: WPS504
671
- inferred_quads = [
672
- (s, p, o, inference_graph) # noqa: WPS111
673
- for s, p, o in result_graph # noqa: WPS111
674
- ]
675
- self.logger.debug(f'Inference {filename}: generated {len(inferred_quads)} quads')
676
-
677
- if inferred_quads:
678
- self.graph.addN(inferred_quads) # noqa: WPS220
679
- self.logger.info( # noqa: WPS220
680
- 'Inference {filename}: added {count} triples',
681
- filename=filename,
682
- count=len(inferred_quads),
683
- )
684
- else:
685
- self.logger.debug(f'Inference {filename}: result_graph is None')
686
-
737
+ # Run global inference (deprecated, will be removed later)
738
+ self._run_inference_from_directory(INFERENCE_DIR, graph_prefix="inference")
739
+
687
740
  # Clear the flag after running inference
688
741
  self.graph.last_not_inferred_source = None
689
742
 
690
- def load_retracting_nanopublications_by_query( # noqa: WPS231
743
+ def load_retracting_nanopublications_by_query( # noqa: WPS231
691
744
  self,
692
745
  query: Query,
693
746
  bindings: dict[str, Node],