rdf-starbase 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_starbase/__init__.py +57 -0
- rdf_starbase/ai_grounding.py +728 -0
- rdf_starbase/compat/__init__.py +26 -0
- rdf_starbase/compat/rdflib.py +1104 -0
- rdf_starbase/formats/__init__.py +29 -0
- rdf_starbase/formats/jsonld.py +488 -0
- rdf_starbase/formats/ntriples.py +419 -0
- rdf_starbase/formats/rdfxml.py +434 -0
- rdf_starbase/formats/turtle.py +882 -0
- rdf_starbase/models.py +92 -0
- rdf_starbase/registry.py +540 -0
- rdf_starbase/repositories.py +407 -0
- rdf_starbase/repository_api.py +739 -0
- rdf_starbase/sparql/__init__.py +35 -0
- rdf_starbase/sparql/ast.py +910 -0
- rdf_starbase/sparql/executor.py +1925 -0
- rdf_starbase/sparql/parser.py +1716 -0
- rdf_starbase/storage/__init__.py +44 -0
- rdf_starbase/storage/executor.py +1914 -0
- rdf_starbase/storage/facts.py +850 -0
- rdf_starbase/storage/lsm.py +531 -0
- rdf_starbase/storage/persistence.py +338 -0
- rdf_starbase/storage/quoted_triples.py +292 -0
- rdf_starbase/storage/reasoner.py +1035 -0
- rdf_starbase/storage/terms.py +628 -0
- rdf_starbase/store.py +1049 -0
- rdf_starbase/store_legacy.py +748 -0
- rdf_starbase/web.py +568 -0
- rdf_starbase-0.1.0.dist-info/METADATA +706 -0
- rdf_starbase-0.1.0.dist-info/RECORD +31 -0
- rdf_starbase-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1035 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RDFS and OWL Reasoning Engine.
|
|
3
|
+
|
|
4
|
+
Implements forward-chaining RDFS and OWL 2 RL entailment for RDF-StarBase.
|
|
5
|
+
Materializes inferred triples into the FactStore with INFERRED flag.
|
|
6
|
+
|
|
7
|
+
Supported RDFS entailment rules:
|
|
8
|
+
- rdfs2: Domain inference (x p y) + (p rdfs:domain C) => (x rdf:type C)
|
|
9
|
+
- rdfs3: Range inference (x p y) + (p rdfs:range C) => (y rdf:type C)
|
|
10
|
+
- rdfs5: Transitive subPropertyOf (p1 subProp p2) + (p2 subProp p3) => (p1 subProp p3)
|
|
11
|
+
- rdfs7: Property inheritance (x p1 y) + (p1 subProp p2) => (x p2 y)
|
|
12
|
+
- rdfs9: Type inheritance (x type C1) + (C1 subClass C2) => (x type C2)
|
|
13
|
+
- rdfs11: Transitive subClassOf (C1 subClass C2) + (C2 subClass C3) => (C1 subClass C3)
|
|
14
|
+
|
|
15
|
+
Supported OWL 2 RL entailment rules:
|
|
16
|
+
- owl:sameAs symmetry and transitivity
|
|
17
|
+
- owl:equivalentClass => mutual rdfs:subClassOf
|
|
18
|
+
- owl:equivalentProperty => mutual rdfs:subPropertyOf
|
|
19
|
+
- owl:inverseOf (x p y) + (p inverseOf q) => (y q x)
|
|
20
|
+
- owl:TransitiveProperty (x p y) + (y p z) => (x p z)
|
|
21
|
+
- owl:SymmetricProperty (x p y) => (y p x)
|
|
22
|
+
- owl:FunctionalProperty (x p y1) + (x p y2) => (y1 owl:sameAs y2)
|
|
23
|
+
- owl:InverseFunctionalProperty (x1 p y) + (x2 p y) => (x1 owl:sameAs x2)
|
|
24
|
+
- owl:hasValue + owl:onProperty class membership inference
|
|
25
|
+
- owl:someValuesFrom existence inference
|
|
26
|
+
- owl:intersectionOf class membership
|
|
27
|
+
|
|
28
|
+
Implementation approach: Forward-chaining with fixed-point iteration.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from typing import Set, Tuple, Optional, List
|
|
32
|
+
from dataclasses import dataclass, field
|
|
33
|
+
|
|
34
|
+
import polars as pl
|
|
35
|
+
|
|
36
|
+
from rdf_starbase.storage.terms import TermDict, TermId, Term, TermKind
|
|
37
|
+
from rdf_starbase.storage.facts import FactStore, FactFlags, DEFAULT_GRAPH_ID
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# RDFS vocabulary IRIs
|
|
41
|
+
RDFS_NS = "http://www.w3.org/2000/01/rdf-schema#"
|
|
42
|
+
RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
43
|
+
OWL_NS = "http://www.w3.org/2002/07/owl#"
|
|
44
|
+
|
|
45
|
+
RDFS_SUBCLASS_OF = RDFS_NS + "subClassOf"
|
|
46
|
+
RDFS_SUBPROPERTY_OF = RDFS_NS + "subPropertyOf"
|
|
47
|
+
RDFS_DOMAIN = RDFS_NS + "domain"
|
|
48
|
+
RDFS_RANGE = RDFS_NS + "range"
|
|
49
|
+
RDF_TYPE = RDF_NS + "type"
|
|
50
|
+
|
|
51
|
+
# OWL vocabulary IRIs
|
|
52
|
+
OWL_SAME_AS = OWL_NS + "sameAs"
|
|
53
|
+
OWL_EQUIVALENT_CLASS = OWL_NS + "equivalentClass"
|
|
54
|
+
OWL_EQUIVALENT_PROPERTY = OWL_NS + "equivalentProperty"
|
|
55
|
+
OWL_INVERSE_OF = OWL_NS + "inverseOf"
|
|
56
|
+
OWL_TRANSITIVE_PROPERTY = OWL_NS + "TransitiveProperty"
|
|
57
|
+
OWL_SYMMETRIC_PROPERTY = OWL_NS + "SymmetricProperty"
|
|
58
|
+
OWL_FUNCTIONAL_PROPERTY = OWL_NS + "FunctionalProperty"
|
|
59
|
+
OWL_INVERSE_FUNCTIONAL_PROPERTY = OWL_NS + "InverseFunctionalProperty"
|
|
60
|
+
OWL_HAS_VALUE = OWL_NS + "hasValue"
|
|
61
|
+
OWL_ON_PROPERTY = OWL_NS + "onProperty"
|
|
62
|
+
OWL_SOME_VALUES_FROM = OWL_NS + "someValuesFrom"
|
|
63
|
+
OWL_ALL_VALUES_FROM = OWL_NS + "allValuesFrom"
|
|
64
|
+
OWL_INTERSECTION_OF = OWL_NS + "intersectionOf"
|
|
65
|
+
RDF_FIRST = RDF_NS + "first"
|
|
66
|
+
RDF_REST = RDF_NS + "rest"
|
|
67
|
+
RDF_NIL = RDF_NS + "nil"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class ReasoningStats:
|
|
72
|
+
"""Statistics from a reasoning run."""
|
|
73
|
+
iterations: int
|
|
74
|
+
triples_inferred: int
|
|
75
|
+
rdfs2_inferences: int # domain
|
|
76
|
+
rdfs3_inferences: int # range
|
|
77
|
+
rdfs5_inferences: int # subPropertyOf transitivity
|
|
78
|
+
rdfs7_inferences: int # property inheritance
|
|
79
|
+
rdfs9_inferences: int # type inheritance
|
|
80
|
+
rdfs11_inferences: int # subClassOf transitivity
|
|
81
|
+
# OWL statistics
|
|
82
|
+
owl_same_as_inferences: int = 0
|
|
83
|
+
owl_equivalent_class_inferences: int = 0
|
|
84
|
+
owl_equivalent_property_inferences: int = 0
|
|
85
|
+
owl_inverse_of_inferences: int = 0
|
|
86
|
+
owl_transitive_inferences: int = 0
|
|
87
|
+
owl_symmetric_inferences: int = 0
|
|
88
|
+
owl_functional_inferences: int = 0
|
|
89
|
+
owl_inverse_functional_inferences: int = 0
|
|
90
|
+
owl_has_value_inferences: int = 0
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class RDFSReasoner:
|
|
94
|
+
"""
|
|
95
|
+
Forward-chaining RDFS and OWL 2 RL reasoner.
|
|
96
|
+
|
|
97
|
+
Materializes RDFS and OWL entailments into the FactStore.
|
|
98
|
+
Uses fixed-point iteration to compute transitive closure.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
term_dict: TermDict,
|
|
104
|
+
fact_store: FactStore,
|
|
105
|
+
max_iterations: int = 100,
|
|
106
|
+
enable_owl: bool = True,
|
|
107
|
+
):
|
|
108
|
+
"""
|
|
109
|
+
Initialize the reasoner.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
term_dict: TermDict for term lookup
|
|
113
|
+
fact_store: FactStore containing the facts
|
|
114
|
+
max_iterations: Maximum iterations for fixed-point (default 100)
|
|
115
|
+
enable_owl: Enable OWL reasoning (default True)
|
|
116
|
+
"""
|
|
117
|
+
self._term_dict = term_dict
|
|
118
|
+
self._fact_store = fact_store
|
|
119
|
+
self._max_iterations = max_iterations
|
|
120
|
+
self._enable_owl = enable_owl
|
|
121
|
+
|
|
122
|
+
# Cache vocabulary term IDs
|
|
123
|
+
self._vocab_ids: Optional[dict] = None
|
|
124
|
+
|
|
125
|
+
def _ensure_vocab_ids(self) -> dict:
|
|
126
|
+
"""Ensure vocabulary term IDs are cached."""
|
|
127
|
+
if self._vocab_ids is not None:
|
|
128
|
+
return self._vocab_ids
|
|
129
|
+
|
|
130
|
+
# Get IDs for RDFS vocabulary terms (only if they exist in data)
|
|
131
|
+
vocab_terms = [
|
|
132
|
+
(RDFS_SUBCLASS_OF, "subClassOf"),
|
|
133
|
+
(RDFS_SUBPROPERTY_OF, "subPropertyOf"),
|
|
134
|
+
(RDFS_DOMAIN, "domain"),
|
|
135
|
+
(RDFS_RANGE, "range"),
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
# OWL vocabulary terms
|
|
139
|
+
owl_vocab_terms = [
|
|
140
|
+
(OWL_SAME_AS, "sameAs"),
|
|
141
|
+
(OWL_EQUIVALENT_CLASS, "equivalentClass"),
|
|
142
|
+
(OWL_EQUIVALENT_PROPERTY, "equivalentProperty"),
|
|
143
|
+
(OWL_INVERSE_OF, "inverseOf"),
|
|
144
|
+
(OWL_TRANSITIVE_PROPERTY, "TransitiveProperty"),
|
|
145
|
+
(OWL_SYMMETRIC_PROPERTY, "SymmetricProperty"),
|
|
146
|
+
(OWL_FUNCTIONAL_PROPERTY, "FunctionalProperty"),
|
|
147
|
+
(OWL_INVERSE_FUNCTIONAL_PROPERTY, "InverseFunctionalProperty"),
|
|
148
|
+
(OWL_HAS_VALUE, "hasValue"),
|
|
149
|
+
(OWL_ON_PROPERTY, "onProperty"),
|
|
150
|
+
(OWL_SOME_VALUES_FROM, "someValuesFrom"),
|
|
151
|
+
(OWL_INTERSECTION_OF, "intersectionOf"),
|
|
152
|
+
(RDF_FIRST, "first"),
|
|
153
|
+
(RDF_REST, "rest"),
|
|
154
|
+
(RDF_NIL, "nil"),
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
self._vocab_ids = {}
|
|
158
|
+
for iri, name in vocab_terms:
|
|
159
|
+
term = Term(kind=TermKind.IRI, lex=iri)
|
|
160
|
+
term_id = self._term_dict.get_id(term)
|
|
161
|
+
if term_id is not None:
|
|
162
|
+
self._vocab_ids[name] = term_id
|
|
163
|
+
|
|
164
|
+
# Add OWL vocabulary if enabled
|
|
165
|
+
if self._enable_owl:
|
|
166
|
+
for iri, name in owl_vocab_terms:
|
|
167
|
+
term = Term(kind=TermKind.IRI, lex=iri)
|
|
168
|
+
term_id = self._term_dict.get_id(term)
|
|
169
|
+
if term_id is not None:
|
|
170
|
+
self._vocab_ids[name] = term_id
|
|
171
|
+
|
|
172
|
+
# Check if we need to create vocabulary terms for inference output
|
|
173
|
+
needs_type = (
|
|
174
|
+
"domain" in self._vocab_ids or
|
|
175
|
+
"range" in self._vocab_ids or
|
|
176
|
+
"subClassOf" in self._vocab_ids or
|
|
177
|
+
"hasValue" in self._vocab_ids
|
|
178
|
+
)
|
|
179
|
+
needs_same_as = (
|
|
180
|
+
"FunctionalProperty" in self._vocab_ids or
|
|
181
|
+
"InverseFunctionalProperty" in self._vocab_ids
|
|
182
|
+
)
|
|
183
|
+
needs_subclass = "equivalentClass" in self._vocab_ids
|
|
184
|
+
needs_subprop = "equivalentProperty" in self._vocab_ids
|
|
185
|
+
|
|
186
|
+
# Create vocabulary terms that will be used in inferred triples
|
|
187
|
+
if needs_type:
|
|
188
|
+
type_term = Term(kind=TermKind.IRI, lex=RDF_TYPE)
|
|
189
|
+
self._vocab_ids["type"] = self._term_dict.get_or_create(type_term)
|
|
190
|
+
else:
|
|
191
|
+
type_term = Term(kind=TermKind.IRI, lex=RDF_TYPE)
|
|
192
|
+
type_id = self._term_dict.get_id(type_term)
|
|
193
|
+
if type_id is not None:
|
|
194
|
+
self._vocab_ids["type"] = type_id
|
|
195
|
+
|
|
196
|
+
if needs_same_as and "sameAs" not in self._vocab_ids:
|
|
197
|
+
same_term = Term(kind=TermKind.IRI, lex=OWL_SAME_AS)
|
|
198
|
+
self._vocab_ids["sameAs"] = self._term_dict.get_or_create(same_term)
|
|
199
|
+
|
|
200
|
+
if needs_subclass and "subClassOf" not in self._vocab_ids:
|
|
201
|
+
subclass_term = Term(kind=TermKind.IRI, lex=RDFS_SUBCLASS_OF)
|
|
202
|
+
self._vocab_ids["subClassOf"] = self._term_dict.get_or_create(subclass_term)
|
|
203
|
+
|
|
204
|
+
if needs_subprop and "subPropertyOf" not in self._vocab_ids:
|
|
205
|
+
subprop_term = Term(kind=TermKind.IRI, lex=RDFS_SUBPROPERTY_OF)
|
|
206
|
+
self._vocab_ids["subPropertyOf"] = self._term_dict.get_or_create(subprop_term)
|
|
207
|
+
|
|
208
|
+
return self._vocab_ids
|
|
209
|
+
|
|
210
|
+
def reason(self, graph_id: TermId = DEFAULT_GRAPH_ID) -> ReasoningStats:
|
|
211
|
+
"""
|
|
212
|
+
Run RDFS and OWL forward-chaining inference.
|
|
213
|
+
|
|
214
|
+
Materializes all entailments into the FactStore.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
graph_id: Graph to reason over (default: default graph)
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
ReasoningStats with counts of inferred triples
|
|
221
|
+
"""
|
|
222
|
+
vocab = self._ensure_vocab_ids()
|
|
223
|
+
|
|
224
|
+
# If no vocabulary in the data, nothing to infer
|
|
225
|
+
if not vocab:
|
|
226
|
+
return ReasoningStats(0, 0, 0, 0, 0, 0, 0, 0)
|
|
227
|
+
|
|
228
|
+
stats = ReasoningStats(
|
|
229
|
+
iterations=0,
|
|
230
|
+
triples_inferred=0,
|
|
231
|
+
rdfs2_inferences=0,
|
|
232
|
+
rdfs3_inferences=0,
|
|
233
|
+
rdfs5_inferences=0,
|
|
234
|
+
rdfs7_inferences=0,
|
|
235
|
+
rdfs9_inferences=0,
|
|
236
|
+
rdfs11_inferences=0,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Track existing facts to avoid duplicates
|
|
240
|
+
existing_facts: Set[Tuple[TermId, TermId, TermId, TermId]] = set()
|
|
241
|
+
df = self._fact_store.scan_facts()
|
|
242
|
+
for row in df.iter_rows(named=True):
|
|
243
|
+
existing_facts.add((row["g"], row["s"], row["p"], row["o"]))
|
|
244
|
+
|
|
245
|
+
# Fixed-point iteration
|
|
246
|
+
for iteration in range(self._max_iterations):
|
|
247
|
+
stats.iterations = iteration + 1
|
|
248
|
+
new_facts: List[Tuple[TermId, TermId, TermId, TermId]] = []
|
|
249
|
+
|
|
250
|
+
# RDFS rules (always applied)
|
|
251
|
+
new_facts.extend(self._apply_rdfs11(vocab, existing_facts, graph_id, stats))
|
|
252
|
+
new_facts.extend(self._apply_rdfs5(vocab, existing_facts, graph_id, stats))
|
|
253
|
+
new_facts.extend(self._apply_rdfs9(vocab, existing_facts, graph_id, stats))
|
|
254
|
+
new_facts.extend(self._apply_rdfs7(vocab, existing_facts, graph_id, stats))
|
|
255
|
+
new_facts.extend(self._apply_rdfs2(vocab, existing_facts, graph_id, stats))
|
|
256
|
+
new_facts.extend(self._apply_rdfs3(vocab, existing_facts, graph_id, stats))
|
|
257
|
+
|
|
258
|
+
# OWL rules (if enabled)
|
|
259
|
+
if self._enable_owl:
|
|
260
|
+
new_facts.extend(self._apply_owl_same_as(vocab, existing_facts, graph_id, stats))
|
|
261
|
+
new_facts.extend(self._apply_owl_equivalent_class(vocab, existing_facts, graph_id, stats))
|
|
262
|
+
new_facts.extend(self._apply_owl_equivalent_property(vocab, existing_facts, graph_id, stats))
|
|
263
|
+
new_facts.extend(self._apply_owl_inverse_of(vocab, existing_facts, graph_id, stats))
|
|
264
|
+
new_facts.extend(self._apply_owl_transitive(vocab, existing_facts, graph_id, stats))
|
|
265
|
+
new_facts.extend(self._apply_owl_symmetric(vocab, existing_facts, graph_id, stats))
|
|
266
|
+
new_facts.extend(self._apply_owl_functional(vocab, existing_facts, graph_id, stats))
|
|
267
|
+
new_facts.extend(self._apply_owl_inverse_functional(vocab, existing_facts, graph_id, stats))
|
|
268
|
+
new_facts.extend(self._apply_owl_has_value(vocab, existing_facts, graph_id, stats))
|
|
269
|
+
|
|
270
|
+
if not new_facts:
|
|
271
|
+
# Fixed point reached
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
# Add new facts to store and existing set
|
|
275
|
+
self._fact_store.add_facts_batch(
|
|
276
|
+
new_facts,
|
|
277
|
+
flags=FactFlags.INFERRED,
|
|
278
|
+
process=vocab.get("type"), # Mark with process if available
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
for fact in new_facts:
|
|
282
|
+
existing_facts.add(fact)
|
|
283
|
+
|
|
284
|
+
stats.triples_inferred += len(new_facts)
|
|
285
|
+
|
|
286
|
+
return stats
|
|
287
|
+
|
|
288
|
+
def _get_facts_with_predicate(
|
|
289
|
+
self,
|
|
290
|
+
predicate_id: TermId,
|
|
291
|
+
graph_id: TermId,
|
|
292
|
+
) -> List[Tuple[TermId, TermId]]:
|
|
293
|
+
"""Get all (subject, object) pairs for a given predicate."""
|
|
294
|
+
df = self._fact_store.scan_facts()
|
|
295
|
+
|
|
296
|
+
filtered = df.filter(
|
|
297
|
+
(pl.col("p") == predicate_id) &
|
|
298
|
+
(pl.col("g") == graph_id) &
|
|
299
|
+
(~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
return [
|
|
303
|
+
(row["s"], row["o"])
|
|
304
|
+
for row in filtered.select(["s", "o"]).iter_rows(named=True)
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
def _apply_rdfs11(
|
|
308
|
+
self,
|
|
309
|
+
vocab: dict,
|
|
310
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
311
|
+
graph_id: TermId,
|
|
312
|
+
stats: ReasoningStats,
|
|
313
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
314
|
+
"""
|
|
315
|
+
RDFS11: Transitive subClassOf.
|
|
316
|
+
|
|
317
|
+
(C1 subClassOf C2) + (C2 subClassOf C3) => (C1 subClassOf C3)
|
|
318
|
+
"""
|
|
319
|
+
subclass_id = vocab.get("subClassOf")
|
|
320
|
+
if subclass_id is None:
|
|
321
|
+
return []
|
|
322
|
+
|
|
323
|
+
# Get all subClassOf facts
|
|
324
|
+
subclass_pairs = self._get_facts_with_predicate(subclass_id, graph_id)
|
|
325
|
+
if not subclass_pairs:
|
|
326
|
+
return []
|
|
327
|
+
|
|
328
|
+
# Build adjacency map: C1 -> [C2, C3, ...]
|
|
329
|
+
subclass_of: dict[TermId, Set[TermId]] = {}
|
|
330
|
+
for c1, c2 in subclass_pairs:
|
|
331
|
+
if c1 not in subclass_of:
|
|
332
|
+
subclass_of[c1] = set()
|
|
333
|
+
subclass_of[c1].add(c2)
|
|
334
|
+
|
|
335
|
+
# Find transitive closures
|
|
336
|
+
new_facts = []
|
|
337
|
+
for c1, direct_supers in subclass_of.items():
|
|
338
|
+
for c2 in list(direct_supers):
|
|
339
|
+
if c2 in subclass_of:
|
|
340
|
+
for c3 in subclass_of[c2]:
|
|
341
|
+
fact = (graph_id, c1, subclass_id, c3)
|
|
342
|
+
if fact not in existing and c1 != c3:
|
|
343
|
+
new_facts.append(fact)
|
|
344
|
+
stats.rdfs11_inferences += 1
|
|
345
|
+
|
|
346
|
+
return new_facts
|
|
347
|
+
|
|
348
|
+
def _apply_rdfs5(
|
|
349
|
+
self,
|
|
350
|
+
vocab: dict,
|
|
351
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
352
|
+
graph_id: TermId,
|
|
353
|
+
stats: ReasoningStats,
|
|
354
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
355
|
+
"""
|
|
356
|
+
RDFS5: Transitive subPropertyOf.
|
|
357
|
+
|
|
358
|
+
(p1 subProp p2) + (p2 subProp p3) => (p1 subProp p3)
|
|
359
|
+
"""
|
|
360
|
+
subprop_id = vocab.get("subPropertyOf")
|
|
361
|
+
if subprop_id is None:
|
|
362
|
+
return []
|
|
363
|
+
|
|
364
|
+
# Get all subPropertyOf facts
|
|
365
|
+
subprop_pairs = self._get_facts_with_predicate(subprop_id, graph_id)
|
|
366
|
+
if not subprop_pairs:
|
|
367
|
+
return []
|
|
368
|
+
|
|
369
|
+
# Build adjacency map
|
|
370
|
+
subprop_of: dict[TermId, Set[TermId]] = {}
|
|
371
|
+
for p1, p2 in subprop_pairs:
|
|
372
|
+
if p1 not in subprop_of:
|
|
373
|
+
subprop_of[p1] = set()
|
|
374
|
+
subprop_of[p1].add(p2)
|
|
375
|
+
|
|
376
|
+
# Find transitive closures
|
|
377
|
+
new_facts = []
|
|
378
|
+
for p1, direct_supers in subprop_of.items():
|
|
379
|
+
for p2 in list(direct_supers):
|
|
380
|
+
if p2 in subprop_of:
|
|
381
|
+
for p3 in subprop_of[p2]:
|
|
382
|
+
fact = (graph_id, p1, subprop_id, p3)
|
|
383
|
+
if fact not in existing and p1 != p3:
|
|
384
|
+
new_facts.append(fact)
|
|
385
|
+
stats.rdfs5_inferences += 1
|
|
386
|
+
|
|
387
|
+
return new_facts
|
|
388
|
+
|
|
389
|
+
def _apply_rdfs9(
|
|
390
|
+
self,
|
|
391
|
+
vocab: dict,
|
|
392
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
393
|
+
graph_id: TermId,
|
|
394
|
+
stats: ReasoningStats,
|
|
395
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
396
|
+
"""
|
|
397
|
+
RDFS9: Type inheritance through subClassOf.
|
|
398
|
+
|
|
399
|
+
(x rdf:type C1) + (C1 subClassOf C2) => (x rdf:type C2)
|
|
400
|
+
"""
|
|
401
|
+
type_id = vocab.get("type")
|
|
402
|
+
subclass_id = vocab.get("subClassOf")
|
|
403
|
+
if type_id is None or subclass_id is None:
|
|
404
|
+
return []
|
|
405
|
+
|
|
406
|
+
# Get type assertions
|
|
407
|
+
type_pairs = self._get_facts_with_predicate(type_id, graph_id)
|
|
408
|
+
if not type_pairs:
|
|
409
|
+
return []
|
|
410
|
+
|
|
411
|
+
# Get subClassOf hierarchy
|
|
412
|
+
subclass_pairs = self._get_facts_with_predicate(subclass_id, graph_id)
|
|
413
|
+
if not subclass_pairs:
|
|
414
|
+
return []
|
|
415
|
+
|
|
416
|
+
# Build subClassOf map: C1 -> [C2, ...]
|
|
417
|
+
subclass_of: dict[TermId, Set[TermId]] = {}
|
|
418
|
+
for c1, c2 in subclass_pairs:
|
|
419
|
+
if c1 not in subclass_of:
|
|
420
|
+
subclass_of[c1] = set()
|
|
421
|
+
subclass_of[c1].add(c2)
|
|
422
|
+
|
|
423
|
+
# Infer types
|
|
424
|
+
new_facts = []
|
|
425
|
+
for x, c1 in type_pairs:
|
|
426
|
+
if c1 in subclass_of:
|
|
427
|
+
for c2 in subclass_of[c1]:
|
|
428
|
+
fact = (graph_id, x, type_id, c2)
|
|
429
|
+
if fact not in existing:
|
|
430
|
+
new_facts.append(fact)
|
|
431
|
+
stats.rdfs9_inferences += 1
|
|
432
|
+
|
|
433
|
+
return new_facts
|
|
434
|
+
|
|
435
|
+
def _apply_rdfs7(
|
|
436
|
+
self,
|
|
437
|
+
vocab: dict,
|
|
438
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
439
|
+
graph_id: TermId,
|
|
440
|
+
stats: ReasoningStats,
|
|
441
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
442
|
+
"""
|
|
443
|
+
RDFS7: Property inheritance through subPropertyOf.
|
|
444
|
+
|
|
445
|
+
(x p1 y) + (p1 subProp p2) => (x p2 y)
|
|
446
|
+
"""
|
|
447
|
+
subprop_id = vocab.get("subPropertyOf")
|
|
448
|
+
if subprop_id is None:
|
|
449
|
+
return []
|
|
450
|
+
|
|
451
|
+
# Get subPropertyOf hierarchy
|
|
452
|
+
subprop_pairs = self._get_facts_with_predicate(subprop_id, graph_id)
|
|
453
|
+
if not subprop_pairs:
|
|
454
|
+
return []
|
|
455
|
+
|
|
456
|
+
# Build subPropertyOf map: p1 -> [p2, ...]
|
|
457
|
+
subprop_of: dict[TermId, Set[TermId]] = {}
|
|
458
|
+
for p1, p2 in subprop_pairs:
|
|
459
|
+
if p1 not in subprop_of:
|
|
460
|
+
subprop_of[p1] = set()
|
|
461
|
+
subprop_of[p1].add(p2)
|
|
462
|
+
|
|
463
|
+
# Get all facts and apply property inheritance
|
|
464
|
+
df = self._fact_store.scan_facts()
|
|
465
|
+
filtered = df.filter(
|
|
466
|
+
(pl.col("g") == graph_id) &
|
|
467
|
+
(~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
new_facts = []
|
|
471
|
+
for row in filtered.iter_rows(named=True):
|
|
472
|
+
p1 = row["p"]
|
|
473
|
+
if p1 in subprop_of:
|
|
474
|
+
for p2 in subprop_of[p1]:
|
|
475
|
+
fact = (graph_id, row["s"], p2, row["o"])
|
|
476
|
+
if fact not in existing:
|
|
477
|
+
new_facts.append(fact)
|
|
478
|
+
stats.rdfs7_inferences += 1
|
|
479
|
+
|
|
480
|
+
return new_facts
|
|
481
|
+
|
|
482
|
+
def _apply_rdfs2(
|
|
483
|
+
self,
|
|
484
|
+
vocab: dict,
|
|
485
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
486
|
+
graph_id: TermId,
|
|
487
|
+
stats: ReasoningStats,
|
|
488
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
489
|
+
"""
|
|
490
|
+
RDFS2: Domain inference.
|
|
491
|
+
|
|
492
|
+
(x p y) + (p rdfs:domain C) => (x rdf:type C)
|
|
493
|
+
"""
|
|
494
|
+
domain_id = vocab.get("domain")
|
|
495
|
+
type_id = vocab.get("type")
|
|
496
|
+
if domain_id is None or type_id is None:
|
|
497
|
+
return []
|
|
498
|
+
|
|
499
|
+
# Get domain declarations
|
|
500
|
+
domain_pairs = self._get_facts_with_predicate(domain_id, graph_id)
|
|
501
|
+
if not domain_pairs:
|
|
502
|
+
return []
|
|
503
|
+
|
|
504
|
+
# Build domain map: p -> C
|
|
505
|
+
domain_of: dict[TermId, TermId] = {}
|
|
506
|
+
for p, c in domain_pairs:
|
|
507
|
+
domain_of[p] = c
|
|
508
|
+
|
|
509
|
+
# Get all facts and apply domain inference
|
|
510
|
+
df = self._fact_store.scan_facts()
|
|
511
|
+
filtered = df.filter(
|
|
512
|
+
(pl.col("g") == graph_id) &
|
|
513
|
+
(~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
new_facts = []
|
|
517
|
+
for row in filtered.iter_rows(named=True):
|
|
518
|
+
p = row["p"]
|
|
519
|
+
if p in domain_of:
|
|
520
|
+
c = domain_of[p]
|
|
521
|
+
fact = (graph_id, row["s"], type_id, c)
|
|
522
|
+
if fact not in existing:
|
|
523
|
+
new_facts.append(fact)
|
|
524
|
+
stats.rdfs2_inferences += 1
|
|
525
|
+
|
|
526
|
+
return new_facts
|
|
527
|
+
|
|
528
|
+
def _apply_rdfs3(
|
|
529
|
+
self,
|
|
530
|
+
vocab: dict,
|
|
531
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
532
|
+
graph_id: TermId,
|
|
533
|
+
stats: ReasoningStats,
|
|
534
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
535
|
+
"""
|
|
536
|
+
RDFS3: Range inference.
|
|
537
|
+
|
|
538
|
+
(x p y) + (p rdfs:range C) => (y rdf:type C)
|
|
539
|
+
"""
|
|
540
|
+
range_id = vocab.get("range")
|
|
541
|
+
type_id = vocab.get("type")
|
|
542
|
+
if range_id is None or type_id is None:
|
|
543
|
+
return []
|
|
544
|
+
|
|
545
|
+
# Get range declarations
|
|
546
|
+
range_pairs = self._get_facts_with_predicate(range_id, graph_id)
|
|
547
|
+
if not range_pairs:
|
|
548
|
+
return []
|
|
549
|
+
|
|
550
|
+
# Build range map: p -> C
|
|
551
|
+
range_of: dict[TermId, TermId] = {}
|
|
552
|
+
for p, c in range_pairs:
|
|
553
|
+
range_of[p] = c
|
|
554
|
+
|
|
555
|
+
# Get all facts and apply range inference
|
|
556
|
+
df = self._fact_store.scan_facts()
|
|
557
|
+
filtered = df.filter(
|
|
558
|
+
(pl.col("g") == graph_id) &
|
|
559
|
+
(~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
new_facts = []
|
|
563
|
+
for row in filtered.iter_rows(named=True):
|
|
564
|
+
p = row["p"]
|
|
565
|
+
if p in range_of:
|
|
566
|
+
c = range_of[p]
|
|
567
|
+
fact = (graph_id, row["o"], type_id, c)
|
|
568
|
+
if fact not in existing:
|
|
569
|
+
new_facts.append(fact)
|
|
570
|
+
stats.rdfs3_inferences += 1
|
|
571
|
+
|
|
572
|
+
return new_facts
|
|
573
|
+
|
|
574
|
+
# =========================================================================
|
|
575
|
+
# OWL Entailment Rules
|
|
576
|
+
# =========================================================================
|
|
577
|
+
|
|
578
|
+
def _apply_owl_same_as(
|
|
579
|
+
self,
|
|
580
|
+
vocab: dict,
|
|
581
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
582
|
+
graph_id: TermId,
|
|
583
|
+
stats: ReasoningStats,
|
|
584
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
585
|
+
"""
|
|
586
|
+
owl:sameAs symmetry and transitivity.
|
|
587
|
+
|
|
588
|
+
Symmetry: (x sameAs y) => (y sameAs x)
|
|
589
|
+
Transitivity: (x sameAs y) + (y sameAs z) => (x sameAs z)
|
|
590
|
+
"""
|
|
591
|
+
same_as_id = vocab.get("sameAs")
|
|
592
|
+
if same_as_id is None:
|
|
593
|
+
return []
|
|
594
|
+
|
|
595
|
+
# Get all sameAs pairs
|
|
596
|
+
same_pairs = self._get_facts_with_predicate(same_as_id, graph_id)
|
|
597
|
+
if not same_pairs:
|
|
598
|
+
return []
|
|
599
|
+
|
|
600
|
+
new_facts = []
|
|
601
|
+
|
|
602
|
+
# Symmetry: (x sameAs y) => (y sameAs x)
|
|
603
|
+
for x, y in same_pairs:
|
|
604
|
+
fact = (graph_id, y, same_as_id, x)
|
|
605
|
+
if fact not in existing and x != y:
|
|
606
|
+
new_facts.append(fact)
|
|
607
|
+
stats.owl_same_as_inferences += 1
|
|
608
|
+
|
|
609
|
+
# Build adjacency for transitivity
|
|
610
|
+
same_as_map: dict[TermId, Set[TermId]] = {}
|
|
611
|
+
for x, y in same_pairs:
|
|
612
|
+
if x not in same_as_map:
|
|
613
|
+
same_as_map[x] = set()
|
|
614
|
+
same_as_map[x].add(y)
|
|
615
|
+
|
|
616
|
+
# Transitivity: (x sameAs y) + (y sameAs z) => (x sameAs z)
|
|
617
|
+
for x, ys in same_as_map.items():
|
|
618
|
+
for y in list(ys):
|
|
619
|
+
if y in same_as_map:
|
|
620
|
+
for z in same_as_map[y]:
|
|
621
|
+
fact = (graph_id, x, same_as_id, z)
|
|
622
|
+
if fact not in existing and x != z:
|
|
623
|
+
new_facts.append(fact)
|
|
624
|
+
stats.owl_same_as_inferences += 1
|
|
625
|
+
|
|
626
|
+
return new_facts
|
|
627
|
+
|
|
628
|
+
def _apply_owl_equivalent_class(
|
|
629
|
+
self,
|
|
630
|
+
vocab: dict,
|
|
631
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
632
|
+
graph_id: TermId,
|
|
633
|
+
stats: ReasoningStats,
|
|
634
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
635
|
+
"""
|
|
636
|
+
owl:equivalentClass => mutual rdfs:subClassOf.
|
|
637
|
+
|
|
638
|
+
(C1 equivalentClass C2) => (C1 subClassOf C2) + (C2 subClassOf C1)
|
|
639
|
+
"""
|
|
640
|
+
equiv_id = vocab.get("equivalentClass")
|
|
641
|
+
subclass_id = vocab.get("subClassOf")
|
|
642
|
+
if equiv_id is None or subclass_id is None:
|
|
643
|
+
return []
|
|
644
|
+
|
|
645
|
+
equiv_pairs = self._get_facts_with_predicate(equiv_id, graph_id)
|
|
646
|
+
if not equiv_pairs:
|
|
647
|
+
return []
|
|
648
|
+
|
|
649
|
+
new_facts = []
|
|
650
|
+
for c1, c2 in equiv_pairs:
|
|
651
|
+
# C1 subClassOf C2
|
|
652
|
+
fact1 = (graph_id, c1, subclass_id, c2)
|
|
653
|
+
if fact1 not in existing:
|
|
654
|
+
new_facts.append(fact1)
|
|
655
|
+
stats.owl_equivalent_class_inferences += 1
|
|
656
|
+
|
|
657
|
+
# C2 subClassOf C1
|
|
658
|
+
fact2 = (graph_id, c2, subclass_id, c1)
|
|
659
|
+
if fact2 not in existing:
|
|
660
|
+
new_facts.append(fact2)
|
|
661
|
+
stats.owl_equivalent_class_inferences += 1
|
|
662
|
+
|
|
663
|
+
return new_facts
|
|
664
|
+
|
|
665
|
+
def _apply_owl_equivalent_property(
|
|
666
|
+
self,
|
|
667
|
+
vocab: dict,
|
|
668
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
669
|
+
graph_id: TermId,
|
|
670
|
+
stats: ReasoningStats,
|
|
671
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
672
|
+
"""
|
|
673
|
+
owl:equivalentProperty => mutual rdfs:subPropertyOf.
|
|
674
|
+
|
|
675
|
+
(p1 equivalentProperty p2) => (p1 subPropertyOf p2) + (p2 subPropertyOf p1)
|
|
676
|
+
"""
|
|
677
|
+
equiv_id = vocab.get("equivalentProperty")
|
|
678
|
+
subprop_id = vocab.get("subPropertyOf")
|
|
679
|
+
if equiv_id is None or subprop_id is None:
|
|
680
|
+
return []
|
|
681
|
+
|
|
682
|
+
equiv_pairs = self._get_facts_with_predicate(equiv_id, graph_id)
|
|
683
|
+
if not equiv_pairs:
|
|
684
|
+
return []
|
|
685
|
+
|
|
686
|
+
new_facts = []
|
|
687
|
+
for p1, p2 in equiv_pairs:
|
|
688
|
+
# p1 subPropertyOf p2
|
|
689
|
+
fact1 = (graph_id, p1, subprop_id, p2)
|
|
690
|
+
if fact1 not in existing:
|
|
691
|
+
new_facts.append(fact1)
|
|
692
|
+
stats.owl_equivalent_property_inferences += 1
|
|
693
|
+
|
|
694
|
+
# p2 subPropertyOf p1
|
|
695
|
+
fact2 = (graph_id, p2, subprop_id, p1)
|
|
696
|
+
if fact2 not in existing:
|
|
697
|
+
new_facts.append(fact2)
|
|
698
|
+
stats.owl_equivalent_property_inferences += 1
|
|
699
|
+
|
|
700
|
+
return new_facts
|
|
701
|
+
|
|
702
|
+
def _apply_owl_inverse_of(
|
|
703
|
+
self,
|
|
704
|
+
vocab: dict,
|
|
705
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
706
|
+
graph_id: TermId,
|
|
707
|
+
stats: ReasoningStats,
|
|
708
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
709
|
+
"""
|
|
710
|
+
owl:inverseOf property inversion.
|
|
711
|
+
|
|
712
|
+
(x p y) + (p inverseOf q) => (y q x)
|
|
713
|
+
"""
|
|
714
|
+
inverse_id = vocab.get("inverseOf")
|
|
715
|
+
if inverse_id is None:
|
|
716
|
+
return []
|
|
717
|
+
|
|
718
|
+
# Get inverse declarations
|
|
719
|
+
inverse_pairs = self._get_facts_with_predicate(inverse_id, graph_id)
|
|
720
|
+
if not inverse_pairs:
|
|
721
|
+
return []
|
|
722
|
+
|
|
723
|
+
# Build inverse map: p -> q
|
|
724
|
+
inverse_of: dict[TermId, TermId] = {}
|
|
725
|
+
for p, q in inverse_pairs:
|
|
726
|
+
inverse_of[p] = q
|
|
727
|
+
# inverseOf is symmetric
|
|
728
|
+
inverse_of[q] = p
|
|
729
|
+
|
|
730
|
+
# Get all facts and apply inverse inference
|
|
731
|
+
df = self._fact_store.scan_facts()
|
|
732
|
+
filtered = df.filter(
|
|
733
|
+
(pl.col("g") == graph_id) &
|
|
734
|
+
(~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
new_facts = []
|
|
738
|
+
for row in filtered.iter_rows(named=True):
|
|
739
|
+
p = row["p"]
|
|
740
|
+
if p in inverse_of:
|
|
741
|
+
q = inverse_of[p]
|
|
742
|
+
# (x p y) => (y q x)
|
|
743
|
+
fact = (graph_id, row["o"], q, row["s"])
|
|
744
|
+
if fact not in existing:
|
|
745
|
+
new_facts.append(fact)
|
|
746
|
+
stats.owl_inverse_of_inferences += 1
|
|
747
|
+
|
|
748
|
+
return new_facts
|
|
749
|
+
|
|
750
|
+
def _apply_owl_transitive(
|
|
751
|
+
self,
|
|
752
|
+
vocab: dict,
|
|
753
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
754
|
+
graph_id: TermId,
|
|
755
|
+
stats: ReasoningStats,
|
|
756
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
757
|
+
"""
|
|
758
|
+
owl:TransitiveProperty transitive closure.
|
|
759
|
+
|
|
760
|
+
(p rdf:type TransitiveProperty) + (x p y) + (y p z) => (x p z)
|
|
761
|
+
"""
|
|
762
|
+
trans_type_id = vocab.get("TransitiveProperty")
|
|
763
|
+
type_id = vocab.get("type")
|
|
764
|
+
if trans_type_id is None or type_id is None:
|
|
765
|
+
return []
|
|
766
|
+
|
|
767
|
+
# Find all transitive properties
|
|
768
|
+
type_pairs = self._get_facts_with_predicate(type_id, graph_id)
|
|
769
|
+
transitive_props: Set[TermId] = set()
|
|
770
|
+
for s, o in type_pairs:
|
|
771
|
+
if o == trans_type_id:
|
|
772
|
+
transitive_props.add(s)
|
|
773
|
+
|
|
774
|
+
if not transitive_props:
|
|
775
|
+
return []
|
|
776
|
+
|
|
777
|
+
new_facts = []
|
|
778
|
+
|
|
779
|
+
# For each transitive property, compute transitive closure
|
|
780
|
+
for prop_id in transitive_props:
|
|
781
|
+
prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
|
|
782
|
+
if not prop_pairs:
|
|
783
|
+
continue
|
|
784
|
+
|
|
785
|
+
# Build adjacency map
|
|
786
|
+
adjacency: dict[TermId, Set[TermId]] = {}
|
|
787
|
+
for x, y in prop_pairs:
|
|
788
|
+
if x not in adjacency:
|
|
789
|
+
adjacency[x] = set()
|
|
790
|
+
adjacency[x].add(y)
|
|
791
|
+
|
|
792
|
+
# Find transitive closures
|
|
793
|
+
for x, ys in adjacency.items():
|
|
794
|
+
for y in list(ys):
|
|
795
|
+
if y in adjacency:
|
|
796
|
+
for z in adjacency[y]:
|
|
797
|
+
fact = (graph_id, x, prop_id, z)
|
|
798
|
+
if fact not in existing and x != z:
|
|
799
|
+
new_facts.append(fact)
|
|
800
|
+
stats.owl_transitive_inferences += 1
|
|
801
|
+
|
|
802
|
+
return new_facts
|
|
803
|
+
|
|
804
|
+
def _apply_owl_symmetric(
|
|
805
|
+
self,
|
|
806
|
+
vocab: dict,
|
|
807
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
808
|
+
graph_id: TermId,
|
|
809
|
+
stats: ReasoningStats,
|
|
810
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
811
|
+
"""
|
|
812
|
+
owl:SymmetricProperty symmetry.
|
|
813
|
+
|
|
814
|
+
(p rdf:type SymmetricProperty) + (x p y) => (y p x)
|
|
815
|
+
"""
|
|
816
|
+
sym_type_id = vocab.get("SymmetricProperty")
|
|
817
|
+
type_id = vocab.get("type")
|
|
818
|
+
if sym_type_id is None or type_id is None:
|
|
819
|
+
return []
|
|
820
|
+
|
|
821
|
+
# Find all symmetric properties
|
|
822
|
+
type_pairs = self._get_facts_with_predicate(type_id, graph_id)
|
|
823
|
+
symmetric_props: Set[TermId] = set()
|
|
824
|
+
for s, o in type_pairs:
|
|
825
|
+
if o == sym_type_id:
|
|
826
|
+
symmetric_props.add(s)
|
|
827
|
+
|
|
828
|
+
if not symmetric_props:
|
|
829
|
+
return []
|
|
830
|
+
|
|
831
|
+
new_facts = []
|
|
832
|
+
|
|
833
|
+
# For each symmetric property, add inverse facts
|
|
834
|
+
for prop_id in symmetric_props:
|
|
835
|
+
prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
|
|
836
|
+
for x, y in prop_pairs:
|
|
837
|
+
fact = (graph_id, y, prop_id, x)
|
|
838
|
+
if fact not in existing and x != y:
|
|
839
|
+
new_facts.append(fact)
|
|
840
|
+
stats.owl_symmetric_inferences += 1
|
|
841
|
+
|
|
842
|
+
return new_facts
|
|
843
|
+
|
|
844
|
+
def _apply_owl_functional(
|
|
845
|
+
self,
|
|
846
|
+
vocab: dict,
|
|
847
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
848
|
+
graph_id: TermId,
|
|
849
|
+
stats: ReasoningStats,
|
|
850
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
851
|
+
"""
|
|
852
|
+
owl:FunctionalProperty sameAs inference.
|
|
853
|
+
|
|
854
|
+
(p rdf:type FunctionalProperty) + (x p y1) + (x p y2) => (y1 sameAs y2)
|
|
855
|
+
"""
|
|
856
|
+
func_type_id = vocab.get("FunctionalProperty")
|
|
857
|
+
type_id = vocab.get("type")
|
|
858
|
+
same_as_id = vocab.get("sameAs")
|
|
859
|
+
if func_type_id is None or type_id is None or same_as_id is None:
|
|
860
|
+
return []
|
|
861
|
+
|
|
862
|
+
# Find all functional properties
|
|
863
|
+
type_pairs = self._get_facts_with_predicate(type_id, graph_id)
|
|
864
|
+
functional_props: Set[TermId] = set()
|
|
865
|
+
for s, o in type_pairs:
|
|
866
|
+
if o == func_type_id:
|
|
867
|
+
functional_props.add(s)
|
|
868
|
+
|
|
869
|
+
if not functional_props:
|
|
870
|
+
return []
|
|
871
|
+
|
|
872
|
+
new_facts = []
|
|
873
|
+
|
|
874
|
+
# For each functional property, find conflicting values
|
|
875
|
+
for prop_id in functional_props:
|
|
876
|
+
prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
|
|
877
|
+
|
|
878
|
+
# Group by subject
|
|
879
|
+
by_subject: dict[TermId, List[TermId]] = {}
|
|
880
|
+
for x, y in prop_pairs:
|
|
881
|
+
if x not in by_subject:
|
|
882
|
+
by_subject[x] = []
|
|
883
|
+
by_subject[x].append(y)
|
|
884
|
+
|
|
885
|
+
# If multiple values, they must be sameAs
|
|
886
|
+
for x, ys in by_subject.items():
|
|
887
|
+
if len(ys) > 1:
|
|
888
|
+
for i, y1 in enumerate(ys):
|
|
889
|
+
for y2 in ys[i + 1:]:
|
|
890
|
+
fact = (graph_id, y1, same_as_id, y2)
|
|
891
|
+
if fact not in existing and y1 != y2:
|
|
892
|
+
new_facts.append(fact)
|
|
893
|
+
stats.owl_functional_inferences += 1
|
|
894
|
+
|
|
895
|
+
return new_facts
|
|
896
|
+
|
|
897
|
+
def _apply_owl_inverse_functional(
|
|
898
|
+
self,
|
|
899
|
+
vocab: dict,
|
|
900
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
901
|
+
graph_id: TermId,
|
|
902
|
+
stats: ReasoningStats,
|
|
903
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
904
|
+
"""
|
|
905
|
+
owl:InverseFunctionalProperty sameAs inference.
|
|
906
|
+
|
|
907
|
+
(p rdf:type InverseFunctionalProperty) + (x1 p y) + (x2 p y) => (x1 sameAs x2)
|
|
908
|
+
"""
|
|
909
|
+
inv_func_type_id = vocab.get("InverseFunctionalProperty")
|
|
910
|
+
type_id = vocab.get("type")
|
|
911
|
+
same_as_id = vocab.get("sameAs")
|
|
912
|
+
if inv_func_type_id is None or type_id is None or same_as_id is None:
|
|
913
|
+
return []
|
|
914
|
+
|
|
915
|
+
# Find all inverse functional properties
|
|
916
|
+
type_pairs = self._get_facts_with_predicate(type_id, graph_id)
|
|
917
|
+
inv_functional_props: Set[TermId] = set()
|
|
918
|
+
for s, o in type_pairs:
|
|
919
|
+
if o == inv_func_type_id:
|
|
920
|
+
inv_functional_props.add(s)
|
|
921
|
+
|
|
922
|
+
if not inv_functional_props:
|
|
923
|
+
return []
|
|
924
|
+
|
|
925
|
+
new_facts = []
|
|
926
|
+
|
|
927
|
+
# For each inverse functional property, find conflicting subjects
|
|
928
|
+
for prop_id in inv_functional_props:
|
|
929
|
+
prop_pairs = self._get_facts_with_predicate(prop_id, graph_id)
|
|
930
|
+
|
|
931
|
+
# Group by object
|
|
932
|
+
by_object: dict[TermId, List[TermId]] = {}
|
|
933
|
+
for x, y in prop_pairs:
|
|
934
|
+
if y not in by_object:
|
|
935
|
+
by_object[y] = []
|
|
936
|
+
by_object[y].append(x)
|
|
937
|
+
|
|
938
|
+
# If multiple subjects, they must be sameAs
|
|
939
|
+
for y, xs in by_object.items():
|
|
940
|
+
if len(xs) > 1:
|
|
941
|
+
for i, x1 in enumerate(xs):
|
|
942
|
+
for x2 in xs[i + 1:]:
|
|
943
|
+
fact = (graph_id, x1, same_as_id, x2)
|
|
944
|
+
if fact not in existing and x1 != x2:
|
|
945
|
+
new_facts.append(fact)
|
|
946
|
+
stats.owl_inverse_functional_inferences += 1
|
|
947
|
+
|
|
948
|
+
return new_facts
|
|
949
|
+
|
|
950
|
+
def _apply_owl_has_value(
|
|
951
|
+
self,
|
|
952
|
+
vocab: dict,
|
|
953
|
+
existing: Set[Tuple[TermId, TermId, TermId, TermId]],
|
|
954
|
+
graph_id: TermId,
|
|
955
|
+
stats: ReasoningStats,
|
|
956
|
+
) -> List[Tuple[TermId, TermId, TermId, TermId]]:
|
|
957
|
+
"""
|
|
958
|
+
owl:hasValue restriction inference.
|
|
959
|
+
|
|
960
|
+
(C owl:onProperty p) + (C owl:hasValue v) + (x rdf:type C) => (x p v)
|
|
961
|
+
Also: (C owl:onProperty p) + (C owl:hasValue v) + (x p v) => (x rdf:type C)
|
|
962
|
+
"""
|
|
963
|
+
has_value_id = vocab.get("hasValue")
|
|
964
|
+
on_property_id = vocab.get("onProperty")
|
|
965
|
+
type_id = vocab.get("type")
|
|
966
|
+
if has_value_id is None or on_property_id is None or type_id is None:
|
|
967
|
+
return []
|
|
968
|
+
|
|
969
|
+
# Get hasValue and onProperty declarations
|
|
970
|
+
has_value_pairs = self._get_facts_with_predicate(has_value_id, graph_id)
|
|
971
|
+
on_property_pairs = self._get_facts_with_predicate(on_property_id, graph_id)
|
|
972
|
+
|
|
973
|
+
if not has_value_pairs or not on_property_pairs:
|
|
974
|
+
return []
|
|
975
|
+
|
|
976
|
+
# Build restriction maps: C -> (p, v)
|
|
977
|
+
restrictions: dict[TermId, Tuple[TermId, TermId]] = {}
|
|
978
|
+
|
|
979
|
+
# Map C -> p
|
|
980
|
+
c_to_prop: dict[TermId, TermId] = {}
|
|
981
|
+
for c, p in on_property_pairs:
|
|
982
|
+
c_to_prop[c] = p
|
|
983
|
+
|
|
984
|
+
# Map C -> v and combine
|
|
985
|
+
for c, v in has_value_pairs:
|
|
986
|
+
if c in c_to_prop:
|
|
987
|
+
restrictions[c] = (c_to_prop[c], v)
|
|
988
|
+
|
|
989
|
+
if not restrictions:
|
|
990
|
+
return []
|
|
991
|
+
|
|
992
|
+
new_facts = []
|
|
993
|
+
|
|
994
|
+
# Get type assertions
|
|
995
|
+
type_pairs = self._get_facts_with_predicate(type_id, graph_id)
|
|
996
|
+
|
|
997
|
+
# Forward: (x type C) => (x p v)
|
|
998
|
+
for x, c in type_pairs:
|
|
999
|
+
if c in restrictions:
|
|
1000
|
+
p, v = restrictions[c]
|
|
1001
|
+
fact = (graph_id, x, p, v)
|
|
1002
|
+
if fact not in existing:
|
|
1003
|
+
new_facts.append(fact)
|
|
1004
|
+
stats.owl_has_value_inferences += 1
|
|
1005
|
+
|
|
1006
|
+
# Backward: (x p v) => (x type C)
|
|
1007
|
+
df = self._fact_store.scan_facts()
|
|
1008
|
+
filtered = df.filter(
|
|
1009
|
+
(pl.col("g") == graph_id) &
|
|
1010
|
+
(~(pl.col("flags").cast(pl.Int32) & int(FactFlags.DELETED)).cast(pl.Boolean))
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
# Build reverse lookup: (p, v) -> C
|
|
1014
|
+
pv_to_class: dict[Tuple[TermId, TermId], TermId] = {}
|
|
1015
|
+
for c, (p, v) in restrictions.items():
|
|
1016
|
+
pv_to_class[(p, v)] = c
|
|
1017
|
+
|
|
1018
|
+
for row in filtered.iter_rows(named=True):
|
|
1019
|
+
key = (row["p"], row["o"])
|
|
1020
|
+
if key in pv_to_class:
|
|
1021
|
+
c = pv_to_class[key]
|
|
1022
|
+
fact = (graph_id, row["s"], type_id, c)
|
|
1023
|
+
if fact not in existing:
|
|
1024
|
+
new_facts.append(fact)
|
|
1025
|
+
stats.owl_has_value_inferences += 1
|
|
1026
|
+
|
|
1027
|
+
return new_facts
|
|
1028
|
+
|
|
1029
|
+
def get_inferred_count(self, graph_id: TermId = DEFAULT_GRAPH_ID) -> int:
|
|
1030
|
+
"""Count the number of inferred facts in the store."""
|
|
1031
|
+
df = self._fact_store.scan_facts()
|
|
1032
|
+
return df.filter(
|
|
1033
|
+
(pl.col("g") == graph_id) &
|
|
1034
|
+
((pl.col("flags").cast(pl.Int32) & int(FactFlags.INFERRED)).cast(pl.Boolean))
|
|
1035
|
+
).height
|