rdf-starbase 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1035 @@
1
+ """
2
+ RDFS and OWL Reasoning Engine.
3
+
4
+ Implements forward-chaining RDFS and OWL 2 RL entailment for RDF-StarBase.
5
+ Materializes inferred triples into the FactStore with INFERRED flag.
6
+
7
+ Supported RDFS entailment rules:
8
+ - rdfs2: Domain inference (x p y) + (p rdfs:domain C) => (x rdf:type C)
9
+ - rdfs3: Range inference (x p y) + (p rdfs:range C) => (y rdf:type C)
10
+ - rdfs5: Transitive subPropertyOf (p1 subProp p2) + (p2 subProp p3) => (p1 subProp p3)
11
+ - rdfs7: Property inheritance (x p1 y) + (p1 subProp p2) => (x p2 y)
12
+ - rdfs9: Type inheritance (x type C1) + (C1 subClass C2) => (x type C2)
13
+ - rdfs11: Transitive subClassOf (C1 subClass C2) + (C2 subClass C3) => (C1 subClass C3)
14
+
15
+ Supported OWL 2 RL entailment rules:
16
+ - owl:sameAs symmetry and transitivity
17
+ - owl:equivalentClass => mutual rdfs:subClassOf
18
+ - owl:equivalentProperty => mutual rdfs:subPropertyOf
19
+ - owl:inverseOf (x p y) + (p inverseOf q) => (y q x)
20
+ - owl:TransitiveProperty (x p y) + (y p z) => (x p z)
21
+ - owl:SymmetricProperty (x p y) => (y p x)
22
+ - owl:FunctionalProperty (x p y1) + (x p y2) => (y1 owl:sameAs y2)
23
+ - owl:InverseFunctionalProperty (x1 p y) + (x2 p y) => (x1 owl:sameAs x2)
24
+ - owl:hasValue + owl:onProperty class membership inference
25
+ - owl:someValuesFrom existence inference
26
+ - owl:intersectionOf class membership
27
+
28
+ Implementation approach: Forward-chaining with fixed-point iteration.
29
+ """
30
+
31
+ from typing import Set, Tuple, Optional, List
32
+ from dataclasses import dataclass, field
33
+
34
+ import polars as pl
35
+
36
+ from rdf_starbase.storage.terms import TermDict, TermId, Term, TermKind
37
+ from rdf_starbase.storage.facts import FactStore, FactFlags, DEFAULT_GRAPH_ID
38
+
39
+
40
+ # RDFS vocabulary IRIs
41
+ RDFS_NS = "http://www.w3.org/2000/01/rdf-schema#"
42
+ RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
43
+ OWL_NS = "http://www.w3.org/2002/07/owl#"
44
+
45
+ RDFS_SUBCLASS_OF = RDFS_NS + "subClassOf"
46
+ RDFS_SUBPROPERTY_OF = RDFS_NS + "subPropertyOf"
47
+ RDFS_DOMAIN = RDFS_NS + "domain"
48
+ RDFS_RANGE = RDFS_NS + "range"
49
+ RDF_TYPE = RDF_NS + "type"
50
+
51
+ # OWL vocabulary IRIs
52
+ OWL_SAME_AS = OWL_NS + "sameAs"
53
+ OWL_EQUIVALENT_CLASS = OWL_NS + "equivalentClass"
54
+ OWL_EQUIVALENT_PROPERTY = OWL_NS + "equivalentProperty"
55
+ OWL_INVERSE_OF = OWL_NS + "inverseOf"
56
+ OWL_TRANSITIVE_PROPERTY = OWL_NS + "TransitiveProperty"
57
+ OWL_SYMMETRIC_PROPERTY = OWL_NS + "SymmetricProperty"
58
+ OWL_FUNCTIONAL_PROPERTY = OWL_NS + "FunctionalProperty"
59
+ OWL_INVERSE_FUNCTIONAL_PROPERTY = OWL_NS + "InverseFunctionalProperty"
60
+ OWL_HAS_VALUE = OWL_NS + "hasValue"
61
+ OWL_ON_PROPERTY = OWL_NS + "onProperty"
62
+ OWL_SOME_VALUES_FROM = OWL_NS + "someValuesFrom"
63
+ OWL_ALL_VALUES_FROM = OWL_NS + "allValuesFrom"
64
+ OWL_INTERSECTION_OF = OWL_NS + "intersectionOf"
65
+ RDF_FIRST = RDF_NS + "first"
66
+ RDF_REST = RDF_NS + "rest"
67
+ RDF_NIL = RDF_NS + "nil"
68
+
69
+
70
+ @dataclass
71
+ class ReasoningStats:
72
+ """Statistics from a reasoning run."""
73
+ iterations: int
74
+ triples_inferred: int
75
+ rdfs2_inferences: int # domain
76
+ rdfs3_inferences: int # range
77
+ rdfs5_inferences: int # subPropertyOf transitivity
78
+ rdfs7_inferences: int # property inheritance
79
+ rdfs9_inferences: int # type inheritance
80
+ rdfs11_inferences: int # subClassOf transitivity
81
+ # OWL statistics
82
+ owl_same_as_inferences: int = 0
83
+ owl_equivalent_class_inferences: int = 0
84
+ owl_equivalent_property_inferences: int = 0
85
+ owl_inverse_of_inferences: int = 0
86
+ owl_transitive_inferences: int = 0
87
+ owl_symmetric_inferences: int = 0
88
+ owl_functional_inferences: int = 0
89
+ owl_inverse_functional_inferences: int = 0
90
+ owl_has_value_inferences: int = 0
91
+
92
+
93
+ class RDFSReasoner:
94
+ """
95
+ Forward-chaining RDFS and OWL 2 RL reasoner.
96
+
97
+ Materializes RDFS and OWL entailments into the FactStore.
98
+ Uses fixed-point iteration to compute transitive closure.
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ term_dict: TermDict,
104
+ fact_store: FactStore,
105
+ max_iterations: int = 100,
106
+ enable_owl: bool = True,
107
+ ):
108
+ """
109
+ Initialize the reasoner.
110
+
111
+ Args:
112
+ term_dict: TermDict for term lookup
113
+ fact_store: FactStore containing the facts
114
+ max_iterations: Maximum iterations for fixed-point (default 100)
115
+ enable_owl: Enable OWL reasoning (default True)
116
+ """
117
+ self._term_dict = term_dict
118
+ self._fact_store = fact_store
119
+ self._max_iterations = max_iterations
120
+ self._enable_owl = enable_owl
121
+
122
+ # Cache vocabulary term IDs
123
+ self._vocab_ids: Optional[dict] = None
124
+
125
+ def _ensure_vocab_ids(self) -> dict:
126
+ """Ensure vocabulary term IDs are cached."""
127
+ if self._vocab_ids is not None:
128
+ return self._vocab_ids
129
+
130
+ # Get IDs for RDFS vocabulary terms (only if they exist in data)
131
+ vocab_terms = [
132
+ (RDFS_SUBCLASS_OF, "subClassOf"),
133
+ (RDFS_SUBPROPERTY_OF, "subPropertyOf"),
134
+ (RDFS_DOMAIN, "domain"),
135
+ (RDFS_RANGE, "range"),
136
+ ]
137
+
138
+ # OWL vocabulary terms
139
+ owl_vocab_terms = [
140
+ (OWL_SAME_AS, "sameAs"),
141
+ (OWL_EQUIVALENT_CLASS, "equivalentClass"),
142
+ (OWL_EQUIVALENT_PROPERTY, "equivalentProperty"),
143
+ (OWL_INVERSE_OF, "inverseOf"),
144
+ (OWL_TRANSITIVE_PROPERTY, "TransitiveProperty"),
145
+ (OWL_SYMMETRIC_PROPERTY, "SymmetricProperty"),
146
+ (OWL_FUNCTIONAL_PROPERTY, "FunctionalProperty"),
147
+ (OWL_INVERSE_FUNCTIONAL_PROPERTY, "InverseFunctionalProperty"),
148
+ (OWL_HAS_VALUE, "hasValue"),
149
+ (OWL_ON_PROPERTY, "onProperty"),
150
+ (OWL_SOME_VALUES_FROM, "someValuesFrom"),
151
+ (OWL_INTERSECTION_OF, "intersectionOf"),
152
+ (RDF_FIRST, "first"),
153
+ (RDF_REST, "rest"),
154
+ (RDF_NIL, "nil"),
155
+ ]
156
+
157
+ self._vocab_ids = {}
158
+ for iri, name in vocab_terms:
159
+ term = Term(kind=TermKind.IRI, lex=iri)
160
+ term_id = self._term_dict.get_id(term)
161
+ if term_id is not None:
162
+ self._vocab_ids[name] = term_id
163
+
164
+ # Add OWL vocabulary if enabled
165
+ if self._enable_owl:
166
+ for iri, name in owl_vocab_terms:
167
+ term = Term(kind=TermKind.IRI, lex=iri)
168
+ term_id = self._term_dict.get_id(term)
169
+ if term_id is not None:
170
+ self._vocab_ids[name] = term_id
171
+
172
+ # Check if we need to create vocabulary terms for inference output
173
+ needs_type = (
174
+ "domain" in self._vocab_ids or
175
+ "range" in self._vocab_ids or
176
+ "subClassOf" in self._vocab_ids or
177
+ "hasValue" in self._vocab_ids
178
+ )
179
+ needs_same_as = (
180
+ "FunctionalProperty" in self._vocab_ids or
181
+ "InverseFunctionalProperty" in self._vocab_ids
182
+ )
183
+ needs_subclass = "equivalentClass" in self._vocab_ids
184
+ needs_subprop = "equivalentProperty" in self._vocab_ids
185
+
186
+ # Create vocabulary terms that will be used in inferred triples
187
+ if needs_type:
188
+ type_term = Term(kind=TermKind.IRI, lex=RDF_TYPE)
189
+ self._vocab_ids["type"] = self._term_dict.get_or_create(type_term)
190
+ else:
191
+ type_term = Term(kind=TermKind.IRI, lex=RDF_TYPE)
192
+ type_id = self._term_dict.get_id(type_term)
193
+ if type_id is not None:
194
+ self._vocab_ids["type"] = type_id
195
+
196
+ if needs_same_as and "sameAs" not in self._vocab_ids:
197
+ same_term = Term(kind=TermKind.IRI, lex=OWL_SAME_AS)
198
+ self._vocab_ids["sameAs"] = self._term_dict.get_or_create(same_term)
199
+
200
+ if needs_subclass and "subClassOf" not in self._vocab_ids:
201
+ subclass_term = Term(kind=TermKind.IRI, lex=RDFS_SUBCLASS_OF)
202
+ self._vocab_ids["subClassOf"] = self._term_dict.get_or_create(subclass_term)
203
+
204
+ if needs_subprop and "subPropertyOf" not in self._vocab_ids:
205
+ subprop_term = Term(kind=TermKind.IRI, lex=RDFS_SUBPROPERTY_OF)
206
+ self._vocab_ids["subPropertyOf"] = self._term_dict.get_or_create(subprop_term)
207
+
208
+ return self._vocab_ids
209
+
210
+ def reason(self, graph_id: TermId = DEFAULT_GRAPH_ID) -> ReasoningStats:
211
+ """
212
+ Run RDFS and OWL forward-chaining inference.
213
+
214
+ Materializes all entailments into the FactStore.
215
+
216
+ Args:
217
+ graph_id: Graph to reason over (default: default graph)
218
+
219
+ Returns:
220
+ ReasoningStats with counts of inferred triples
221
+ """
222
+ vocab = self._ensure_vocab_ids()
223
+
224
+ # If no vocabulary in the data, nothing to infer
225
+ if not vocab:
226
+ return ReasoningStats(0, 0, 0, 0, 0, 0, 0, 0)
227
+
228
+ stats = ReasoningStats(
229
+ iterations=0,
230
+ triples_inferred=0,
231
+ rdfs2_inferences=0,
232
+ rdfs3_inferences=0,
233
+ rdfs5_inferences=0,
234
+ rdfs7_inferences=0,
235
+ rdfs9_inferences=0,
236
+ rdfs11_inferences=0,
237
+ )
238
+
239
+ # Track existing facts to avoid duplicates
240
+ existing_facts: Set[Tuple[TermId, TermId, TermId, TermId]] = set()
241
+ df = self._fact_store.scan_facts()
242
+ for row in df.iter_rows(named=True):
243
+ existing_facts.add((row["g"], row["s"], row["p"], row["o"]))
244
+
245
+ # Fixed-point iteration
246
+ for iteration in range(self._max_iterations):
247
+ stats.iterations = iteration + 1
248
+ new_facts: List[Tuple[TermId, TermId, TermId, TermId]] = []
249
+
250
+ # RDFS rules (always applied)
251
+ new_facts.extend(self._apply_rdfs11(vocab, existing_facts, graph_id, stats))
252
+ new_facts.extend(self._apply_rdfs5(vocab, existing_facts, graph_id, stats))
253
+ new_facts.extend(self._apply_rdfs9(vocab, existing_facts, graph_id, stats))
254
+ new_facts.extend(self._apply_rdfs7(vocab, existing_facts, graph_id, stats))
255
+ new_facts.extend(self._apply_rdfs2(vocab, existing_facts, graph_id, stats))
256
+ new_facts.extend(self._apply_rdfs3(vocab, existing_facts, graph_id, stats))
257
+
258
+ # OWL rules (if enabled)
259
+ if self._enable_owl:
260
+ new_facts.extend(self._apply_owl_same_as(vocab, existing_facts, graph_id, stats))
261
+ new_facts.extend(self._apply_owl_equivalent_class(vocab, existing_facts, graph_id, stats))
262
+ new_facts.extend(self._apply_owl_equivalent_property(vocab, existing_facts, graph_id, stats))
263
+ new_facts.extend(self._apply_owl_inverse_of(vocab, existing_facts, graph_id, stats))
264
+ new_facts.extend(self._apply_owl_transitive(vocab, existing_facts, graph_id, stats))
265
+ new_facts.extend(self._apply_owl_symmetric(vocab, existing_facts, graph_id, stats))
266
+ new_facts.extend(self._apply_owl_functional(vocab, existing_facts, graph_id, stats))
267
+ new_facts.extend(self._apply_owl_inverse_functional(vocab, existing_facts, graph_id, stats))
268
+ new_facts.extend(self._apply_owl_has_value(vocab, existing_facts, graph_id, stats))
269
+
270
+ if not new_facts:
271
+ # Fixed point reached
272
+ break
273
+
274
+ # Add new facts to store and existing set
275
+ self._fact_store.add_facts_batch(
276
+ new_facts,
277
+ flags=FactFlags.INFERRED,
278
+ process=vocab.get("type"), # Mark with process if available
279
+ )
280
+
281
+ for fact in new_facts:
282
+ existing_facts.add(fact)
283
+
284
+ stats.triples_inferred += len(new_facts)
285
+
286
+ return stats
287
+
288
+ def _get_facts_with_predicate(
289
+ self,
290
+ predicate_id: TermId,
291
+ graph_id: TermId,
292
+ ) -> List[Tuple[TermId, TermId]]:
293
+ """Get all (subject, object) pairs for a given predicate."""
294
+ df = self._fact_store.scan_facts()
295
+
296
+ filtered = df.filter(
297
+ (pl.col("p") == predicate_id) &
298
+ (pl.col("g") == graph_id) &
299
+ (~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
300
+ )
301
+
302
+ return [
303
+ (row["s"], row["o"])
304
+ for row in filtered.select(["s", "o"]).iter_rows(named=True)
305
+ ]
306
+
307
+ def _apply_rdfs11(
308
+ self,
309
+ vocab: dict,
310
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
311
+ graph_id: TermId,
312
+ stats: ReasoningStats,
313
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
314
+ """
315
+ RDFS11: Transitive subClassOf.
316
+
317
+ (C1 subClassOf C2) + (C2 subClassOf C3) => (C1 subClassOf C3)
318
+ """
319
+ subclass_id = vocab.get("subClassOf")
320
+ if subclass_id is None:
321
+ return []
322
+
323
+ # Get all subClassOf facts
324
+ subclass_pairs = self._get_facts_with_predicate(subclass_id, graph_id)
325
+ if not subclass_pairs:
326
+ return []
327
+
328
+ # Build adjacency map: C1 -> [C2, C3, ...]
329
+ subclass_of: dict[TermId, Set[TermId]] = {}
330
+ for c1, c2 in subclass_pairs:
331
+ if c1 not in subclass_of:
332
+ subclass_of[c1] = set()
333
+ subclass_of[c1].add(c2)
334
+
335
+ # Find transitive closures
336
+ new_facts = []
337
+ for c1, direct_supers in subclass_of.items():
338
+ for c2 in list(direct_supers):
339
+ if c2 in subclass_of:
340
+ for c3 in subclass_of[c2]:
341
+ fact = (graph_id, c1, subclass_id, c3)
342
+ if fact not in existing and c1 != c3:
343
+ new_facts.append(fact)
344
+ stats.rdfs11_inferences += 1
345
+
346
+ return new_facts
347
+
348
+ def _apply_rdfs5(
349
+ self,
350
+ vocab: dict,
351
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
352
+ graph_id: TermId,
353
+ stats: ReasoningStats,
354
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
355
+ """
356
+ RDFS5: Transitive subPropertyOf.
357
+
358
+ (p1 subProp p2) + (p2 subProp p3) => (p1 subProp p3)
359
+ """
360
+ subprop_id = vocab.get("subPropertyOf")
361
+ if subprop_id is None:
362
+ return []
363
+
364
+ # Get all subPropertyOf facts
365
+ subprop_pairs = self._get_facts_with_predicate(subprop_id, graph_id)
366
+ if not subprop_pairs:
367
+ return []
368
+
369
+ # Build adjacency map
370
+ subprop_of: dict[TermId, Set[TermId]] = {}
371
+ for p1, p2 in subprop_pairs:
372
+ if p1 not in subprop_of:
373
+ subprop_of[p1] = set()
374
+ subprop_of[p1].add(p2)
375
+
376
+ # Find transitive closures
377
+ new_facts = []
378
+ for p1, direct_supers in subprop_of.items():
379
+ for p2 in list(direct_supers):
380
+ if p2 in subprop_of:
381
+ for p3 in subprop_of[p2]:
382
+ fact = (graph_id, p1, subprop_id, p3)
383
+ if fact not in existing and p1 != p3:
384
+ new_facts.append(fact)
385
+ stats.rdfs5_inferences += 1
386
+
387
+ return new_facts
388
+
389
+ def _apply_rdfs9(
390
+ self,
391
+ vocab: dict,
392
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
393
+ graph_id: TermId,
394
+ stats: ReasoningStats,
395
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
396
+ """
397
+ RDFS9: Type inheritance through subClassOf.
398
+
399
+ (x rdf:type C1) + (C1 subClassOf C2) => (x rdf:type C2)
400
+ """
401
+ type_id = vocab.get("type")
402
+ subclass_id = vocab.get("subClassOf")
403
+ if type_id is None or subclass_id is None:
404
+ return []
405
+
406
+ # Get type assertions
407
+ type_pairs = self._get_facts_with_predicate(type_id, graph_id)
408
+ if not type_pairs:
409
+ return []
410
+
411
+ # Get subClassOf hierarchy
412
+ subclass_pairs = self._get_facts_with_predicate(subclass_id, graph_id)
413
+ if not subclass_pairs:
414
+ return []
415
+
416
+ # Build subClassOf map: C1 -> [C2, ...]
417
+ subclass_of: dict[TermId, Set[TermId]] = {}
418
+ for c1, c2 in subclass_pairs:
419
+ if c1 not in subclass_of:
420
+ subclass_of[c1] = set()
421
+ subclass_of[c1].add(c2)
422
+
423
+ # Infer types
424
+ new_facts = []
425
+ for x, c1 in type_pairs:
426
+ if c1 in subclass_of:
427
+ for c2 in subclass_of[c1]:
428
+ fact = (graph_id, x, type_id, c2)
429
+ if fact not in existing:
430
+ new_facts.append(fact)
431
+ stats.rdfs9_inferences += 1
432
+
433
+ return new_facts
434
+
435
+ def _apply_rdfs7(
436
+ self,
437
+ vocab: dict,
438
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
439
+ graph_id: TermId,
440
+ stats: ReasoningStats,
441
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
442
+ """
443
+ RDFS7: Property inheritance through subPropertyOf.
444
+
445
+ (x p1 y) + (p1 subProp p2) => (x p2 y)
446
+ """
447
+ subprop_id = vocab.get("subPropertyOf")
448
+ if subprop_id is None:
449
+ return []
450
+
451
+ # Get subPropertyOf hierarchy
452
+ subprop_pairs = self._get_facts_with_predicate(subprop_id, graph_id)
453
+ if not subprop_pairs:
454
+ return []
455
+
456
+ # Build subPropertyOf map: p1 -> [p2, ...]
457
+ subprop_of: dict[TermId, Set[TermId]] = {}
458
+ for p1, p2 in subprop_pairs:
459
+ if p1 not in subprop_of:
460
+ subprop_of[p1] = set()
461
+ subprop_of[p1].add(p2)
462
+
463
+ # Get all facts and apply property inheritance
464
+ df = self._fact_store.scan_facts()
465
+ filtered = df.filter(
466
+ (pl.col("g") == graph_id) &
467
+ (~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
468
+ )
469
+
470
+ new_facts = []
471
+ for row in filtered.iter_rows(named=True):
472
+ p1 = row["p"]
473
+ if p1 in subprop_of:
474
+ for p2 in subprop_of[p1]:
475
+ fact = (graph_id, row["s"], p2, row["o"])
476
+ if fact not in existing:
477
+ new_facts.append(fact)
478
+ stats.rdfs7_inferences += 1
479
+
480
+ return new_facts
481
+
482
+ def _apply_rdfs2(
483
+ self,
484
+ vocab: dict,
485
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
486
+ graph_id: TermId,
487
+ stats: ReasoningStats,
488
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
489
+ """
490
+ RDFS2: Domain inference.
491
+
492
+ (x p y) + (p rdfs:domain C) => (x rdf:type C)
493
+ """
494
+ domain_id = vocab.get("domain")
495
+ type_id = vocab.get("type")
496
+ if domain_id is None or type_id is None:
497
+ return []
498
+
499
+ # Get domain declarations
500
+ domain_pairs = self._get_facts_with_predicate(domain_id, graph_id)
501
+ if not domain_pairs:
502
+ return []
503
+
504
+ # Build domain map: p -> C
505
+ domain_of: dict[TermId, TermId] = {}
506
+ for p, c in domain_pairs:
507
+ domain_of[p] = c
508
+
509
+ # Get all facts and apply domain inference
510
+ df = self._fact_store.scan_facts()
511
+ filtered = df.filter(
512
+ (pl.col("g") == graph_id) &
513
+ (~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
514
+ )
515
+
516
+ new_facts = []
517
+ for row in filtered.iter_rows(named=True):
518
+ p = row["p"]
519
+ if p in domain_of:
520
+ c = domain_of[p]
521
+ fact = (graph_id, row["s"], type_id, c)
522
+ if fact not in existing:
523
+ new_facts.append(fact)
524
+ stats.rdfs2_inferences += 1
525
+
526
+ return new_facts
527
+
528
+ def _apply_rdfs3(
529
+ self,
530
+ vocab: dict,
531
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
532
+ graph_id: TermId,
533
+ stats: ReasoningStats,
534
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
535
+ """
536
+ RDFS3: Range inference.
537
+
538
+ (x p y) + (p rdfs:range C) => (y rdf:type C)
539
+ """
540
+ range_id = vocab.get("range")
541
+ type_id = vocab.get("type")
542
+ if range_id is None or type_id is None:
543
+ return []
544
+
545
+ # Get range declarations
546
+ range_pairs = self._get_facts_with_predicate(range_id, graph_id)
547
+ if not range_pairs:
548
+ return []
549
+
550
+ # Build range map: p -> C
551
+ range_of: dict[TermId, TermId] = {}
552
+ for p, c in range_pairs:
553
+ range_of[p] = c
554
+
555
+ # Get all facts and apply range inference
556
+ df = self._fact_store.scan_facts()
557
+ filtered = df.filter(
558
+ (pl.col("g") == graph_id) &
559
+ (~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
560
+ )
561
+
562
+ new_facts = []
563
+ for row in filtered.iter_rows(named=True):
564
+ p = row["p"]
565
+ if p in range_of:
566
+ c = range_of[p]
567
+ fact = (graph_id, row["o"], type_id, c)
568
+ if fact not in existing:
569
+ new_facts.append(fact)
570
+ stats.rdfs3_inferences += 1
571
+
572
+ return new_facts
573
+
574
+ # =========================================================================
575
+ # OWL Entailment Rules
576
+ # =========================================================================
577
+
578
+ def _apply_owl_same_as(
579
+ self,
580
+ vocab: dict,
581
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
582
+ graph_id: TermId,
583
+ stats: ReasoningStats,
584
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
585
+ """
586
+ owl:sameAs symmetry and transitivity.
587
+
588
+ Symmetry: (x sameAs y) => (y sameAs x)
589
+ Transitivity: (x sameAs y) + (y sameAs z) => (x sameAs z)
590
+ """
591
+ same_as_id = vocab.get("sameAs")
592
+ if same_as_id is None:
593
+ return []
594
+
595
+ # Get all sameAs pairs
596
+ same_pairs = self._get_facts_with_predicate(same_as_id, graph_id)
597
+ if not same_pairs:
598
+ return []
599
+
600
+ new_facts = []
601
+
602
+ # Symmetry: (x sameAs y) => (y sameAs x)
603
+ for x, y in same_pairs:
604
+ fact = (graph_id, y, same_as_id, x)
605
+ if fact not in existing and x != y:
606
+ new_facts.append(fact)
607
+ stats.owl_same_as_inferences += 1
608
+
609
+ # Build adjacency for transitivity
610
+ same_as_map: dict[TermId, Set[TermId]] = {}
611
+ for x, y in same_pairs:
612
+ if x not in same_as_map:
613
+ same_as_map[x] = set()
614
+ same_as_map[x].add(y)
615
+
616
+ # Transitivity: (x sameAs y) + (y sameAs z) => (x sameAs z)
617
+ for x, ys in same_as_map.items():
618
+ for y in list(ys):
619
+ if y in same_as_map:
620
+ for z in same_as_map[y]:
621
+ fact = (graph_id, x, same_as_id, z)
622
+ if fact not in existing and x != z:
623
+ new_facts.append(fact)
624
+ stats.owl_same_as_inferences += 1
625
+
626
+ return new_facts
627
+
628
+ def _apply_owl_equivalent_class(
629
+ self,
630
+ vocab: dict,
631
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
632
+ graph_id: TermId,
633
+ stats: ReasoningStats,
634
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
635
+ """
636
+ owl:equivalentClass => mutual rdfs:subClassOf.
637
+
638
+ (C1 equivalentClass C2) => (C1 subClassOf C2) + (C2 subClassOf C1)
639
+ """
640
+ equiv_id = vocab.get("equivalentClass")
641
+ subclass_id = vocab.get("subClassOf")
642
+ if equiv_id is None or subclass_id is None:
643
+ return []
644
+
645
+ equiv_pairs = self._get_facts_with_predicate(equiv_id, graph_id)
646
+ if not equiv_pairs:
647
+ return []
648
+
649
+ new_facts = []
650
+ for c1, c2 in equiv_pairs:
651
+ # C1 subClassOf C2
652
+ fact1 = (graph_id, c1, subclass_id, c2)
653
+ if fact1 not in existing:
654
+ new_facts.append(fact1)
655
+ stats.owl_equivalent_class_inferences += 1
656
+
657
+ # C2 subClassOf C1
658
+ fact2 = (graph_id, c2, subclass_id, c1)
659
+ if fact2 not in existing:
660
+ new_facts.append(fact2)
661
+ stats.owl_equivalent_class_inferences += 1
662
+
663
+ return new_facts
664
+
665
+ def _apply_owl_equivalent_property(
666
+ self,
667
+ vocab: dict,
668
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
669
+ graph_id: TermId,
670
+ stats: ReasoningStats,
671
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
672
+ """
673
+ owl:equivalentProperty => mutual rdfs:subPropertyOf.
674
+
675
+ (p1 equivalentProperty p2) => (p1 subPropertyOf p2) + (p2 subPropertyOf p1)
676
+ """
677
+ equiv_id = vocab.get("equivalentProperty")
678
+ subprop_id = vocab.get("subPropertyOf")
679
+ if equiv_id is None or subprop_id is None:
680
+ return []
681
+
682
+ equiv_pairs = self._get_facts_with_predicate(equiv_id, graph_id)
683
+ if not equiv_pairs:
684
+ return []
685
+
686
+ new_facts = []
687
+ for p1, p2 in equiv_pairs:
688
+ # p1 subPropertyOf p2
689
+ fact1 = (graph_id, p1, subprop_id, p2)
690
+ if fact1 not in existing:
691
+ new_facts.append(fact1)
692
+ stats.owl_equivalent_property_inferences += 1
693
+
694
+ # p2 subPropertyOf p1
695
+ fact2 = (graph_id, p2, subprop_id, p1)
696
+ if fact2 not in existing:
697
+ new_facts.append(fact2)
698
+ stats.owl_equivalent_property_inferences += 1
699
+
700
+ return new_facts
701
+
702
+ def _apply_owl_inverse_of(
703
+ self,
704
+ vocab: dict,
705
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
706
+ graph_id: TermId,
707
+ stats: ReasoningStats,
708
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
709
+ """
710
+ owl:inverseOf property inversion.
711
+
712
+ (x p y) + (p inverseOf q) => (y q x)
713
+ """
714
+ inverse_id = vocab.get("inverseOf")
715
+ if inverse_id is None:
716
+ return []
717
+
718
+ # Get inverse declarations
719
+ inverse_pairs = self._get_facts_with_predicate(inverse_id, graph_id)
720
+ if not inverse_pairs:
721
+ return []
722
+
723
+ # Build inverse map: p -> q
724
+ inverse_of: dict[TermId, TermId] = {}
725
+ for p, q in inverse_pairs:
726
+ inverse_of[p] = q
727
+ # inverseOf is symmetric
728
+ inverse_of[q] = p
729
+
730
+ # Get all facts and apply inverse inference
731
+ df = self._fact_store.scan_facts()
732
+ filtered = df.filter(
733
+ (pl.col("g") == graph_id) &
734
+ (~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
735
+ )
736
+
737
+ new_facts = []
738
+ for row in filtered.iter_rows(named=True):
739
+ p = row["p"]
740
+ if p in inverse_of:
741
+ q = inverse_of[p]
742
+ # (x p y) => (y q x)
743
+ fact = (graph_id, row["o"], q, row["s"])
744
+ if fact not in existing:
745
+ new_facts.append(fact)
746
+ stats.owl_inverse_of_inferences += 1
747
+
748
+ return new_facts
749
+
750
+ def _apply_owl_transitive(
751
+ self,
752
+ vocab: dict,
753
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
754
+ graph_id: TermId,
755
+ stats: ReasoningStats,
756
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
757
+ """
758
+ owl:TransitiveProperty transitive closure.
759
+
760
+ (p rdf:type TransitiveProperty) + (x p y) + (y p z) => (x p z)
761
+ """
762
+ trans_type_id = vocab.get("TransitiveProperty")
763
+ type_id = vocab.get("type")
764
+ if trans_type_id is None or type_id is None:
765
+ return []
766
+
767
+ # Find all transitive properties
768
+ type_pairs = self._get_facts_with_predicate(type_id, graph_id)
769
+ transitive_props: Set[TermId] = set()
770
+ for s, o in type_pairs:
771
+ if o == trans_type_id:
772
+ transitive_props.add(s)
773
+
774
+ if not transitive_props:
775
+ return []
776
+
777
+ new_facts = []
778
+
779
+ # For each transitive property, compute transitive closure
780
+ for prop_id in transitive_props:
781
+ prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
782
+ if not prop_pairs:
783
+ continue
784
+
785
+ # Build adjacency map
786
+ adjacency: dict[TermId, Set[TermId]] = {}
787
+ for x, y in prop_pairs:
788
+ if x not in adjacency:
789
+ adjacency[x] = set()
790
+ adjacency[x].add(y)
791
+
792
+ # Find transitive closures
793
+ for x, ys in adjacency.items():
794
+ for y in list(ys):
795
+ if y in adjacency:
796
+ for z in adjacency[y]:
797
+ fact = (graph_id, x, prop_id, z)
798
+ if fact not in existing and x != z:
799
+ new_facts.append(fact)
800
+ stats.owl_transitive_inferences += 1
801
+
802
+ return new_facts
803
+
804
+ def _apply_owl_symmetric(
805
+ self,
806
+ vocab: dict,
807
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
808
+ graph_id: TermId,
809
+ stats: ReasoningStats,
810
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
811
+ """
812
+ owl:SymmetricProperty symmetry.
813
+
814
+ (p rdf:type SymmetricProperty) + (x p y) => (y p x)
815
+ """
816
+ sym_type_id = vocab.get("SymmetricProperty")
817
+ type_id = vocab.get("type")
818
+ if sym_type_id is None or type_id is None:
819
+ return []
820
+
821
+ # Find all symmetric properties
822
+ type_pairs = self._get_facts_with_predicate(type_id, graph_id)
823
+ symmetric_props: Set[TermId] = set()
824
+ for s, o in type_pairs:
825
+ if o == sym_type_id:
826
+ symmetric_props.add(s)
827
+
828
+ if not symmetric_props:
829
+ return []
830
+
831
+ new_facts = []
832
+
833
+ # For each symmetric property, add inverse facts
834
+ for prop_id in symmetric_props:
835
+ prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
836
+ for x, y in prop_pairs:
837
+ fact = (graph_id, y, prop_id, x)
838
+ if fact not in existing and x != y:
839
+ new_facts.append(fact)
840
+ stats.owl_symmetric_inferences += 1
841
+
842
+ return new_facts
843
+
844
+ def _apply_owl_functional(
845
+ self,
846
+ vocab: dict,
847
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
848
+ graph_id: TermId,
849
+ stats: ReasoningStats,
850
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
851
+ """
852
+ owl:FunctionalProperty sameAs inference.
853
+
854
+ (p rdf:type FunctionalProperty) + (x p y1) + (x p y2) => (y1 sameAs y2)
855
+ """
856
+ func_type_id = vocab.get("FunctionalProperty")
857
+ type_id = vocab.get("type")
858
+ same_as_id = vocab.get("sameAs")
859
+ if func_type_id is None or type_id is None or same_as_id is None:
860
+ return []
861
+
862
+ # Find all functional properties
863
+ type_pairs = self._get_facts_with_predicate(type_id, graph_id)
864
+ functional_props: Set[TermId] = set()
865
+ for s, o in type_pairs:
866
+ if o == func_type_id:
867
+ functional_props.add(s)
868
+
869
+ if not functional_props:
870
+ return []
871
+
872
+ new_facts = []
873
+
874
+ # For each functional property, find conflicting values
875
+ for prop_id in functional_props:
876
+ prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
877
+
878
+ # Group by subject
879
+ by_subject: dict[TermId, List[TermId]] = {}
880
+ for x, y in prop_pairs:
881
+ if x not in by_subject:
882
+ by_subject[x] = []
883
+ by_subject[x].append(y)
884
+
885
+ # If multiple values, they must be sameAs
886
+ for x, ys in by_subject.items():
887
+ if len(ys) > 1:
888
+ for i, y1 in enumerate(ys):
889
+ for y2 in ys[i + 1:]:
890
+ fact = (graph_id, y1, same_as_id, y2)
891
+ if fact not in existing and y1 != y2:
892
+ new_facts.append(fact)
893
+ stats.owl_functional_inferences += 1
894
+
895
+ return new_facts
896
+
897
+ def _apply_owl_inverse_functional(
898
+ self,
899
+ vocab: dict,
900
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
901
+ graph_id: TermId,
902
+ stats: ReasoningStats,
903
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
904
+ """
905
+ owl:InverseFunctionalProperty sameAs inference.
906
+
907
+ (p rdf:type InverseFunctionalProperty) + (x1 p y) + (x2 p y) => (x1 sameAs x2)
908
+ """
909
+ inv_func_type_id = vocab.get("InverseFunctionalProperty")
910
+ type_id = vocab.get("type")
911
+ same_as_id = vocab.get("sameAs")
912
+ if inv_func_type_id is None or type_id is None or same_as_id is None:
913
+ return []
914
+
915
+ # Find all inverse functional properties
916
+ type_pairs = self._get_facts_with_predicate(type_id, graph_id)
917
+ inv_functional_props: Set[TermId] = set()
918
+ for s, o in type_pairs:
919
+ if o == inv_func_type_id:
920
+ inv_functional_props.add(s)
921
+
922
+ if not inv_functional_props:
923
+ return []
924
+
925
+ new_facts = []
926
+
927
+ # For each inverse functional property, find conflicting subjects
928
+ for prop_id in inv_functional_props:
929
+ prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
930
+
931
+ # Group by object
932
+ by_object: dict[TermId, List[TermId]] = {}
933
+ for x, y in prop_pairs:
934
+ if y not in by_object:
935
+ by_object[y] = []
936
+ by_object[y].append(x)
937
+
938
+ # If multiple subjects, they must be sameAs
939
+ for y, xs in by_object.items():
940
+ if len(xs) > 1:
941
+ for i, x1 in enumerate(xs):
942
+ for x2 in xs[i + 1:]:
943
+ fact = (graph_id, x1, same_as_id, x2)
944
+ if fact not in existing and x1 != x2:
945
+ new_facts.append(fact)
946
+ stats.owl_inverse_functional_inferences += 1
947
+
948
+ return new_facts
949
+
950
+ def _apply_owl_has_value(
951
+ self,
952
+ vocab: dict,
953
+ existing: Set[Tuple[TermId, TermId, TermId, TermId]],
954
+ graph_id: TermId,
955
+ stats: ReasoningStats,
956
+ ) -> List[Tuple[TermId, TermId, TermId, TermId]]:
957
+ """
958
+ owl:hasValue restriction inference.
959
+
960
+ (C owl:onProperty p) + (C owl:hasValue v) + (x rdf:type C) => (x p v)
961
+ Also: (C owl:onProperty p) + (C owl:hasValue v) + (x p v) => (x rdf:type C)
962
+ """
963
+ has_value_id = vocab.get("hasValue")
964
+ on_property_id = vocab.get("onProperty")
965
+ type_id = vocab.get("type")
966
+ if has_value_id is None or on_property_id is None or type_id is None:
967
+ return []
968
+
969
+ # Get hasValue and onProperty declarations
970
+ has_value_pairs = self._get_facts_with_predicate(has_value_id, graph_id)
971
+ on_property_pairs = self._get_facts_with_predicate(on_property_id, graph_id)
972
+
973
+ if not has_value_pairs or not on_property_pairs:
974
+ return []
975
+
976
+ # Build restriction maps: C -> (p, v)
977
+ restrictions: dict[TermId, Tuple[TermId, TermId]] = {}
978
+
979
+ # Map C -> p
980
+ c_to_prop: dict[TermId, TermId] = {}
981
+ for c, p in on_property_pairs:
982
+ c_to_prop[c] = p
983
+
984
+ # Map C -> v and combine
985
+ for c, v in has_value_pairs:
986
+ if c in c_to_prop:
987
+ restrictions[c] = (c_to_prop[c], v)
988
+
989
+ if not restrictions:
990
+ return []
991
+
992
+ new_facts = []
993
+
994
+ # Get type assertions
995
+ type_pairs = self._get_facts_with_predicate(type_id, graph_id)
996
+
997
+ # Forward: (x type C) => (x p v)
998
+ for x, c in type_pairs:
999
+ if c in restrictions:
1000
+ p, v = restrictions[c]
1001
+ fact = (graph_id, x, p, v)
1002
+ if fact not in existing:
1003
+ new_facts.append(fact)
1004
+ stats.owl_has_value_inferences += 1
1005
+
1006
+ # Backward: (x p v) => (x type C)
1007
+ df = self._fact_store.scan_facts()
1008
+ filtered = df.filter(
1009
+ (pl.col("g") == graph_id) &
1010
+ (~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
1011
+ )
1012
+
1013
+ # Build reverse lookup: (p, v) -> C
1014
+ pv_to_class: dict[Tuple[TermId, TermId], TermId] = {}
1015
+ for c, (p, v) in restrictions.items():
1016
+ pv_to_class[(p, v)] = c
1017
+
1018
+ for row in filtered.iter_rows(named=True):
1019
+ key = (row["p"], row["o"])
1020
+ if key in pv_to_class:
1021
+ c = pv_to_class[key]
1022
+ fact = (graph_id, row["s"], type_id, c)
1023
+ if fact not in existing:
1024
+ new_facts.append(fact)
1025
+ stats.owl_has_value_inferences += 1
1026
+
1027
+ return new_facts
1028
+
1029
+ def get_inferred_count(self, graph_id: TermId = DEFAULT_GRAPH_ID) -> int:
1030
+ """Count the number of inferred facts in the store."""
1031
+ df = self._fact_store.scan_facts()
1032
+ return df.filter(
1033
+ (pl.col("g") == graph_id) &
1034
+ ((pl.col("flags").cast(pl.Int32) & int(FactFlags.INFERRED)).cast(pl.Boolean))
1035
+ ).height