rdf-starbase 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_starbase/__init__.py +57 -0
- rdf_starbase/ai_grounding.py +728 -0
- rdf_starbase/compat/__init__.py +26 -0
- rdf_starbase/compat/rdflib.py +1104 -0
- rdf_starbase/formats/__init__.py +29 -0
- rdf_starbase/formats/jsonld.py +488 -0
- rdf_starbase/formats/ntriples.py +419 -0
- rdf_starbase/formats/rdfxml.py +434 -0
- rdf_starbase/formats/turtle.py +882 -0
- rdf_starbase/models.py +92 -0
- rdf_starbase/registry.py +540 -0
- rdf_starbase/repositories.py +407 -0
- rdf_starbase/repository_api.py +739 -0
- rdf_starbase/sparql/__init__.py +35 -0
- rdf_starbase/sparql/ast.py +910 -0
- rdf_starbase/sparql/executor.py +1925 -0
- rdf_starbase/sparql/parser.py +1716 -0
- rdf_starbase/storage/__init__.py +44 -0
- rdf_starbase/storage/executor.py +1914 -0
- rdf_starbase/storage/facts.py +850 -0
- rdf_starbase/storage/lsm.py +531 -0
- rdf_starbase/storage/persistence.py +338 -0
- rdf_starbase/storage/quoted_triples.py +292 -0
- rdf_starbase/storage/reasoner.py +1035 -0
- rdf_starbase/storage/terms.py +628 -0
- rdf_starbase/store.py +1049 -0
- rdf_starbase/store_legacy.py +748 -0
- rdf_starbase/web.py +568 -0
- rdf_starbase-0.1.0.dist-info/METADATA +706 -0
- rdf_starbase-0.1.0.dist-info/RECORD +31 -0
- rdf_starbase-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
rdflib Compatibility Layer.
|
|
3
|
+
|
|
4
|
+
Provides a drop-in replacement for rdflib's Graph class, backed by RDF-StarBase's
|
|
5
|
+
high-performance Polars engine.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# Instead of:
|
|
9
|
+
# from rdflib import Graph, URIRef, Literal, Namespace
|
|
10
|
+
|
|
11
|
+
# Use:
|
|
12
|
+
from rdf_starbase.compat.rdflib import Graph, URIRef, Literal, Namespace
|
|
13
|
+
|
|
14
|
+
g = Graph()
|
|
15
|
+
g.parse("data.ttl", format="turtle")
|
|
16
|
+
|
|
17
|
+
for s, p, o in g.triples((None, RDF.type, None)):
|
|
18
|
+
print(s)
|
|
19
|
+
|
|
20
|
+
g.serialize(destination="out.ttl", format="turtle")
|
|
21
|
+
|
|
22
|
+
This module aims to be API-compatible with rdflib while providing:
|
|
23
|
+
- 10-50x faster parsing
|
|
24
|
+
- 10-100x faster queries
|
|
25
|
+
- Native RDF-Star support
|
|
26
|
+
- Built-in provenance tracking
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from typing import Optional, Iterator, Tuple, Any, Union, IO
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from io import StringIO, BytesIO
|
|
32
|
+
import re
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# =============================================================================
|
|
36
|
+
# RDF Term Classes (rdflib-compatible)
|
|
37
|
+
# =============================================================================
|
|
38
|
+
|
|
39
|
+
class Identifier:
|
|
40
|
+
"""Base class for RDF terms."""
|
|
41
|
+
__slots__ = ()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class URIRef(Identifier, str):
|
|
45
|
+
"""
|
|
46
|
+
An RDF URI Reference.
|
|
47
|
+
|
|
48
|
+
Compatible with rdflib.URIRef.
|
|
49
|
+
"""
|
|
50
|
+
__slots__ = ()
|
|
51
|
+
|
|
52
|
+
def __new__(cls, value: str, base: Optional[str] = None):
|
|
53
|
+
if base is not None:
|
|
54
|
+
# Resolve relative URI against base
|
|
55
|
+
# Simple implementation - full resolution would need urllib
|
|
56
|
+
if not value.startswith(('http://', 'https://', 'urn:', 'file://')):
|
|
57
|
+
if base.endswith('/'):
|
|
58
|
+
value = base + value
|
|
59
|
+
else:
|
|
60
|
+
value = base + '/' + value
|
|
61
|
+
return str.__new__(cls, value)
|
|
62
|
+
|
|
63
|
+
def __repr__(self):
|
|
64
|
+
return f"URIRef({super().__repr__()})"
|
|
65
|
+
|
|
66
|
+
def __hash__(self):
|
|
67
|
+
return str.__hash__(self)
|
|
68
|
+
|
|
69
|
+
def __eq__(self, other):
|
|
70
|
+
if isinstance(other, URIRef):
|
|
71
|
+
return str.__eq__(self, other)
|
|
72
|
+
return str.__eq__(self, other)
|
|
73
|
+
|
|
74
|
+
def n3(self, namespace_manager=None) -> str:
|
|
75
|
+
"""Return N3/Turtle representation."""
|
|
76
|
+
# TODO: Use namespace_manager for prefix compression
|
|
77
|
+
return f"<{self}>"
|
|
78
|
+
|
|
79
|
+
def toPython(self) -> str:
|
|
80
|
+
"""Return Python string representation."""
|
|
81
|
+
return str(self)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class Literal(Identifier):
|
|
85
|
+
"""
|
|
86
|
+
An RDF Literal.
|
|
87
|
+
|
|
88
|
+
Compatible with rdflib.Literal.
|
|
89
|
+
"""
|
|
90
|
+
__slots__ = ('_value', '_datatype', '_language')
|
|
91
|
+
|
|
92
|
+
def __init__(
|
|
93
|
+
self,
|
|
94
|
+
value: Any,
|
|
95
|
+
lang: Optional[str] = None,
|
|
96
|
+
datatype: Optional[URIRef] = None
|
|
97
|
+
):
|
|
98
|
+
if lang is not None and datatype is not None:
|
|
99
|
+
raise TypeError("Literal cannot have both lang and datatype")
|
|
100
|
+
|
|
101
|
+
self._value = value
|
|
102
|
+
self._language = lang.lower() if lang else None
|
|
103
|
+
|
|
104
|
+
if datatype is not None:
|
|
105
|
+
self._datatype = URIRef(datatype) if not isinstance(datatype, URIRef) else datatype
|
|
106
|
+
elif lang is not None:
|
|
107
|
+
self._datatype = None # Language-tagged literals have no datatype
|
|
108
|
+
elif isinstance(value, bool):
|
|
109
|
+
self._datatype = XSD.boolean
|
|
110
|
+
elif isinstance(value, int):
|
|
111
|
+
self._datatype = XSD.integer
|
|
112
|
+
elif isinstance(value, float):
|
|
113
|
+
self._datatype = XSD.double
|
|
114
|
+
else:
|
|
115
|
+
self._datatype = XSD.string
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def value(self) -> Any:
|
|
119
|
+
return self._value
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def language(self) -> Optional[str]:
|
|
123
|
+
return self._language
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def datatype(self) -> Optional[URIRef]:
|
|
127
|
+
return self._datatype
|
|
128
|
+
|
|
129
|
+
def __str__(self):
|
|
130
|
+
return str(self._value)
|
|
131
|
+
|
|
132
|
+
def __repr__(self):
|
|
133
|
+
if self._language:
|
|
134
|
+
return f"Literal({self._value!r}, lang={self._language!r})"
|
|
135
|
+
elif self._datatype and self._datatype != XSD.string:
|
|
136
|
+
return f"Literal({self._value!r}, datatype={self._datatype!r})"
|
|
137
|
+
return f"Literal({self._value!r})"
|
|
138
|
+
|
|
139
|
+
def __hash__(self):
|
|
140
|
+
return hash((str(self._value), self._language, self._datatype))
|
|
141
|
+
|
|
142
|
+
def __eq__(self, other):
|
|
143
|
+
if isinstance(other, Literal):
|
|
144
|
+
return (
|
|
145
|
+
str(self._value) == str(other._value) and
|
|
146
|
+
self._language == other._language and
|
|
147
|
+
self._datatype == other._datatype
|
|
148
|
+
)
|
|
149
|
+
return str(self._value) == str(other)
|
|
150
|
+
|
|
151
|
+
def n3(self, namespace_manager=None) -> str:
|
|
152
|
+
"""Return N3/Turtle representation."""
|
|
153
|
+
value_str = str(self._value)
|
|
154
|
+
# Escape special characters
|
|
155
|
+
value_str = value_str.replace('\\', '\\\\').replace('"', '\\"')
|
|
156
|
+
|
|
157
|
+
if self._language:
|
|
158
|
+
return f'"{value_str}"@{self._language}'
|
|
159
|
+
elif self._datatype and self._datatype != XSD.string:
|
|
160
|
+
return f'"{value_str}"^^<{self._datatype}>'
|
|
161
|
+
return f'"{value_str}"'
|
|
162
|
+
|
|
163
|
+
def toPython(self) -> Any:
|
|
164
|
+
"""Convert to Python native type."""
|
|
165
|
+
if self._datatype == XSD.integer:
|
|
166
|
+
return int(self._value)
|
|
167
|
+
elif self._datatype == XSD.double or self._datatype == XSD.decimal:
|
|
168
|
+
return float(self._value)
|
|
169
|
+
elif self._datatype == XSD.boolean:
|
|
170
|
+
return str(self._value).lower() in ('true', '1')
|
|
171
|
+
return str(self._value)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class BNode(Identifier):
|
|
175
|
+
"""
|
|
176
|
+
An RDF Blank Node.
|
|
177
|
+
|
|
178
|
+
Compatible with rdflib.BNode.
|
|
179
|
+
"""
|
|
180
|
+
__slots__ = ('_id',)
|
|
181
|
+
_next_id = 0
|
|
182
|
+
|
|
183
|
+
def __init__(self, value: Optional[str] = None):
|
|
184
|
+
if value is None:
|
|
185
|
+
BNode._next_id += 1
|
|
186
|
+
self._id = f"N{BNode._next_id}"
|
|
187
|
+
else:
|
|
188
|
+
self._id = value
|
|
189
|
+
|
|
190
|
+
def __str__(self):
|
|
191
|
+
return self._id
|
|
192
|
+
|
|
193
|
+
def __repr__(self):
|
|
194
|
+
return f"BNode({self._id!r})"
|
|
195
|
+
|
|
196
|
+
def __hash__(self):
|
|
197
|
+
return hash(self._id)
|
|
198
|
+
|
|
199
|
+
def __eq__(self, other):
|
|
200
|
+
if isinstance(other, BNode):
|
|
201
|
+
return self._id == other._id
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
def n3(self, namespace_manager=None) -> str:
|
|
205
|
+
"""Return N3/Turtle representation."""
|
|
206
|
+
return f"_:{self._id}"
|
|
207
|
+
|
|
208
|
+
def toPython(self) -> str:
|
|
209
|
+
return self._id
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# =============================================================================
|
|
213
|
+
# Namespace Support
|
|
214
|
+
# =============================================================================
|
|
215
|
+
|
|
216
|
+
class Namespace(URIRef):
|
|
217
|
+
"""
|
|
218
|
+
An RDF Namespace.
|
|
219
|
+
|
|
220
|
+
Allows attribute access for creating URIRefs:
|
|
221
|
+
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
|
222
|
+
RDF.type # Returns URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
|
|
223
|
+
"""
|
|
224
|
+
__slots__ = ()
|
|
225
|
+
|
|
226
|
+
def __new__(cls, value: str):
|
|
227
|
+
return URIRef.__new__(cls, value)
|
|
228
|
+
|
|
229
|
+
def __getattr__(self, name: str) -> URIRef:
|
|
230
|
+
if name.startswith('_'):
|
|
231
|
+
raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
|
|
232
|
+
return URIRef(f"{self}{name}")
|
|
233
|
+
|
|
234
|
+
def __getitem__(self, key: str) -> URIRef:
|
|
235
|
+
return URIRef(f"{self}{key}")
|
|
236
|
+
|
|
237
|
+
def term(self, name: str) -> URIRef:
|
|
238
|
+
return URIRef(f"{self}{name}")
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class ClosedNamespace(Namespace):
|
|
242
|
+
"""A namespace with a fixed set of terms."""
|
|
243
|
+
|
|
244
|
+
def __new__(cls, uri: str, terms: list):
|
|
245
|
+
inst = Namespace.__new__(cls, uri)
|
|
246
|
+
inst._terms = frozenset(terms)
|
|
247
|
+
return inst
|
|
248
|
+
|
|
249
|
+
def __getattr__(self, name: str) -> URIRef:
|
|
250
|
+
if name.startswith('_'):
|
|
251
|
+
raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'")
|
|
252
|
+
if name not in self._terms:
|
|
253
|
+
raise AttributeError(f"term '{name}' not in namespace")
|
|
254
|
+
return URIRef(f"{self}{name}")
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# Well-known namespaces
|
|
258
|
+
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
|
259
|
+
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
|
|
260
|
+
OWL = Namespace("http://www.w3.org/2002/07/owl#")
|
|
261
|
+
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
|
|
262
|
+
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
|
|
263
|
+
DC = Namespace("http://purl.org/dc/elements/1.1/")
|
|
264
|
+
DCTERMS = Namespace("http://purl.org/dc/terms/")
|
|
265
|
+
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
|
|
266
|
+
PROV = Namespace("http://www.w3.org/ns/prov#")
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# =============================================================================
|
|
270
|
+
# Graph Class (Main API)
|
|
271
|
+
# =============================================================================
|
|
272
|
+
|
|
273
|
+
# Type alias for triples
|
|
274
|
+
Triple = Tuple[Identifier, URIRef, Identifier]
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class Graph:
|
|
278
|
+
"""
|
|
279
|
+
An RDF Graph backed by RDF-StarBase.
|
|
280
|
+
|
|
281
|
+
API-compatible with rdflib.Graph while providing superior performance.
|
|
282
|
+
|
|
283
|
+
Example:
|
|
284
|
+
g = Graph()
|
|
285
|
+
g.parse("data.ttl", format="turtle")
|
|
286
|
+
|
|
287
|
+
for s, p, o in g.triples((None, RDF.type, None)):
|
|
288
|
+
print(f"{s} is a {o}")
|
|
289
|
+
|
|
290
|
+
g.add((URIRef("http://example.org/s"), RDF.type, FOAF.Person))
|
|
291
|
+
g.serialize(destination="out.ttl", format="turtle")
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
def __init__(self, store=None, identifier=None):
|
|
295
|
+
"""
|
|
296
|
+
Create a new Graph.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
store: Optional backing store (ignored, uses TripleStore)
|
|
300
|
+
identifier: Optional graph identifier
|
|
301
|
+
"""
|
|
302
|
+
# Import here to avoid circular imports
|
|
303
|
+
from rdf_starbase import TripleStore
|
|
304
|
+
|
|
305
|
+
self._store = TripleStore()
|
|
306
|
+
self._identifier = identifier or BNode()
|
|
307
|
+
self._namespace_manager = NamespaceManager(self)
|
|
308
|
+
|
|
309
|
+
@property
|
|
310
|
+
def store(self):
|
|
311
|
+
"""The backing store."""
|
|
312
|
+
return self._store
|
|
313
|
+
|
|
314
|
+
@property
|
|
315
|
+
def identifier(self):
|
|
316
|
+
"""The graph identifier."""
|
|
317
|
+
return self._identifier
|
|
318
|
+
|
|
319
|
+
@property
|
|
320
|
+
def namespace_manager(self):
|
|
321
|
+
"""The namespace manager."""
|
|
322
|
+
return self._namespace_manager
|
|
323
|
+
|
|
324
|
+
def __len__(self) -> int:
|
|
325
|
+
"""Return the number of triples in the graph."""
|
|
326
|
+
return len(self._store)
|
|
327
|
+
|
|
328
|
+
def __iter__(self) -> Iterator[Triple]:
|
|
329
|
+
"""Iterate over all triples."""
|
|
330
|
+
return self.triples((None, None, None))
|
|
331
|
+
|
|
332
|
+
def __contains__(self, triple: Triple) -> bool:
|
|
333
|
+
"""Check if a triple is in the graph."""
|
|
334
|
+
s, p, o = triple
|
|
335
|
+
for _ in self.triples((s, p, o)):
|
|
336
|
+
return True
|
|
337
|
+
return False
|
|
338
|
+
|
|
339
|
+
def add(self, triple: Triple) -> "Graph":
|
|
340
|
+
"""
|
|
341
|
+
Add a triple to the graph.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
triple: A (subject, predicate, object) tuple
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
self for chaining
|
|
348
|
+
"""
|
|
349
|
+
from rdf_starbase.models import ProvenanceContext
|
|
350
|
+
|
|
351
|
+
s, p, o = triple
|
|
352
|
+
prov = ProvenanceContext(source="rdflib_compat", confidence=1.0)
|
|
353
|
+
self._store.add_triple(
|
|
354
|
+
subject=str(s),
|
|
355
|
+
predicate=str(p),
|
|
356
|
+
obj=self._term_to_value(o),
|
|
357
|
+
provenance=prov
|
|
358
|
+
)
|
|
359
|
+
return self
|
|
360
|
+
|
|
361
|
+
def remove(self, triple: Triple) -> "Graph":
|
|
362
|
+
"""
|
|
363
|
+
Remove a triple from the graph.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
triple: A (subject, predicate, object) tuple with optional None wildcards
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
self for chaining
|
|
370
|
+
"""
|
|
371
|
+
s, p, o = triple
|
|
372
|
+
# Get matching triples and deprecate them
|
|
373
|
+
matches = self._store.get_triples(
|
|
374
|
+
subject=str(s) if s is not None else None,
|
|
375
|
+
predicate=str(p) if p is not None else None,
|
|
376
|
+
obj=self._term_to_value(o) if o is not None else None,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
for row in matches.iter_rows(named=True):
|
|
380
|
+
# Deprecate the assertion
|
|
381
|
+
if 'assertion_id' in row:
|
|
382
|
+
try:
|
|
383
|
+
from uuid import UUID
|
|
384
|
+
self._store.deprecate(UUID(row['assertion_id']))
|
|
385
|
+
except (ValueError, KeyError):
|
|
386
|
+
pass
|
|
387
|
+
return self
|
|
388
|
+
|
|
389
|
+
def triples(
|
|
390
|
+
self,
|
|
391
|
+
pattern: Tuple[Optional[Identifier], Optional[URIRef], Optional[Identifier]]
|
|
392
|
+
) -> Iterator[Triple]:
|
|
393
|
+
"""
|
|
394
|
+
Iterate over triples matching a pattern.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
pattern: (subject, predicate, object) with None as wildcard
|
|
398
|
+
|
|
399
|
+
Yields:
|
|
400
|
+
Matching (subject, predicate, object) tuples
|
|
401
|
+
"""
|
|
402
|
+
s, p, o = pattern
|
|
403
|
+
|
|
404
|
+
results = self._store.get_triples(
|
|
405
|
+
subject=str(s) if s is not None else None,
|
|
406
|
+
predicate=str(p) if p is not None else None,
|
|
407
|
+
obj=self._term_to_value(o) if o is not None else None,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
for row in results.iter_rows(named=True):
|
|
411
|
+
yield (
|
|
412
|
+
self._value_to_term(row['subject'], is_uri=True),
|
|
413
|
+
URIRef(row['predicate']),
|
|
414
|
+
self._value_to_term(row['object']),
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
def subjects(
|
|
418
|
+
self,
|
|
419
|
+
predicate: Optional[URIRef] = None,
|
|
420
|
+
object: Optional[Identifier] = None,
|
|
421
|
+
unique: bool = True
|
|
422
|
+
) -> Iterator[Identifier]:
|
|
423
|
+
"""Iterate over subjects matching the pattern."""
|
|
424
|
+
seen = set() if unique else None
|
|
425
|
+
for s, p, o in self.triples((None, predicate, object)):
|
|
426
|
+
if seen is not None:
|
|
427
|
+
if s in seen:
|
|
428
|
+
continue
|
|
429
|
+
seen.add(s)
|
|
430
|
+
yield s
|
|
431
|
+
|
|
432
|
+
def predicates(
|
|
433
|
+
self,
|
|
434
|
+
subject: Optional[Identifier] = None,
|
|
435
|
+
object: Optional[Identifier] = None,
|
|
436
|
+
unique: bool = True
|
|
437
|
+
) -> Iterator[URIRef]:
|
|
438
|
+
"""Iterate over predicates matching the pattern."""
|
|
439
|
+
seen = set() if unique else None
|
|
440
|
+
for s, p, o in self.triples((subject, None, object)):
|
|
441
|
+
if seen is not None:
|
|
442
|
+
if p in seen:
|
|
443
|
+
continue
|
|
444
|
+
seen.add(p)
|
|
445
|
+
yield p
|
|
446
|
+
|
|
447
|
+
def objects(
|
|
448
|
+
self,
|
|
449
|
+
subject: Optional[Identifier] = None,
|
|
450
|
+
predicate: Optional[URIRef] = None,
|
|
451
|
+
unique: bool = True
|
|
452
|
+
) -> Iterator[Identifier]:
|
|
453
|
+
"""Iterate over objects matching the pattern."""
|
|
454
|
+
seen = set() if unique else None
|
|
455
|
+
for s, p, o in self.triples((subject, predicate, None)):
|
|
456
|
+
if seen is not None:
|
|
457
|
+
if o in seen:
|
|
458
|
+
continue
|
|
459
|
+
seen.add(o)
|
|
460
|
+
yield o
|
|
461
|
+
|
|
462
|
+
def subject_objects(
|
|
463
|
+
self,
|
|
464
|
+
predicate: Optional[URIRef] = None,
|
|
465
|
+
unique: bool = True
|
|
466
|
+
) -> Iterator[Tuple[Identifier, Identifier]]:
|
|
467
|
+
"""Iterate over (subject, object) pairs matching the predicate."""
|
|
468
|
+
seen = set() if unique else None
|
|
469
|
+
for s, p, o in self.triples((None, predicate, None)):
|
|
470
|
+
pair = (s, o)
|
|
471
|
+
if seen is not None:
|
|
472
|
+
if pair in seen:
|
|
473
|
+
continue
|
|
474
|
+
seen.add(pair)
|
|
475
|
+
yield pair
|
|
476
|
+
|
|
477
|
+
def subject_predicates(
|
|
478
|
+
self,
|
|
479
|
+
object: Optional[Identifier] = None,
|
|
480
|
+
unique: bool = True
|
|
481
|
+
) -> Iterator[Tuple[Identifier, URIRef]]:
|
|
482
|
+
"""Iterate over (subject, predicate) pairs matching the object."""
|
|
483
|
+
seen = set() if unique else None
|
|
484
|
+
for s, p, o in self.triples((None, None, object)):
|
|
485
|
+
pair = (s, p)
|
|
486
|
+
if seen is not None:
|
|
487
|
+
if pair in seen:
|
|
488
|
+
continue
|
|
489
|
+
seen.add(pair)
|
|
490
|
+
yield pair
|
|
491
|
+
|
|
492
|
+
def predicate_objects(
|
|
493
|
+
self,
|
|
494
|
+
subject: Optional[Identifier] = None,
|
|
495
|
+
unique: bool = True
|
|
496
|
+
) -> Iterator[Tuple[URIRef, Identifier]]:
|
|
497
|
+
"""Iterate over (predicate, object) pairs matching the subject."""
|
|
498
|
+
seen = set() if unique else None
|
|
499
|
+
for s, p, o in self.triples((subject, None, None)):
|
|
500
|
+
pair = (p, o)
|
|
501
|
+
if seen is not None:
|
|
502
|
+
if pair in seen:
|
|
503
|
+
continue
|
|
504
|
+
seen.add(pair)
|
|
505
|
+
yield pair
|
|
506
|
+
|
|
507
|
+
def value(
|
|
508
|
+
self,
|
|
509
|
+
subject: Optional[Identifier] = None,
|
|
510
|
+
predicate: Optional[URIRef] = None,
|
|
511
|
+
object: Optional[Identifier] = None,
|
|
512
|
+
default: Any = None,
|
|
513
|
+
any: bool = True
|
|
514
|
+
) -> Optional[Identifier]:
|
|
515
|
+
"""Get a single value for the unbound component."""
|
|
516
|
+
for s, p, o in self.triples((subject, predicate, object)):
|
|
517
|
+
if subject is None:
|
|
518
|
+
return s
|
|
519
|
+
elif predicate is None:
|
|
520
|
+
return p
|
|
521
|
+
else:
|
|
522
|
+
return o
|
|
523
|
+
return default
|
|
524
|
+
|
|
525
|
+
def parse(
|
|
526
|
+
self,
|
|
527
|
+
source: Optional[Union[str, Path, IO]] = None,
|
|
528
|
+
publicID: Optional[str] = None,
|
|
529
|
+
format: Optional[str] = None,
|
|
530
|
+
location: Optional[str] = None,
|
|
531
|
+
file: Optional[IO] = None,
|
|
532
|
+
data: Optional[Union[str, bytes]] = None,
|
|
533
|
+
**kwargs
|
|
534
|
+
) -> "Graph":
|
|
535
|
+
"""
|
|
536
|
+
Parse RDF data into this graph.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
source: File path, URL, or file-like object
|
|
540
|
+
publicID: The logical URI of the graph
|
|
541
|
+
format: Format hint (turtle, xml, n3, nt, json-ld)
|
|
542
|
+
location: Alternative to source (URL to fetch)
|
|
543
|
+
file: File-like object
|
|
544
|
+
data: Raw string/bytes data
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
self for chaining
|
|
548
|
+
"""
|
|
549
|
+
# Determine the content to parse
|
|
550
|
+
content = None
|
|
551
|
+
|
|
552
|
+
if data is not None:
|
|
553
|
+
content = data if isinstance(data, str) else data.decode('utf-8')
|
|
554
|
+
elif file is not None:
|
|
555
|
+
content = file.read()
|
|
556
|
+
if isinstance(content, bytes):
|
|
557
|
+
content = content.decode('utf-8')
|
|
558
|
+
elif source is not None:
|
|
559
|
+
if isinstance(source, (str, Path)):
|
|
560
|
+
path = Path(source)
|
|
561
|
+
if path.exists():
|
|
562
|
+
content = path.read_text(encoding='utf-8')
|
|
563
|
+
if format is None:
|
|
564
|
+
format = self._guess_format(path)
|
|
565
|
+
else:
|
|
566
|
+
# Might be a URL - try to fetch
|
|
567
|
+
# For now, just raise
|
|
568
|
+
raise FileNotFoundError(f"File not found: {source}")
|
|
569
|
+
elif hasattr(source, 'read'):
|
|
570
|
+
content = source.read()
|
|
571
|
+
if isinstance(content, bytes):
|
|
572
|
+
content = content.decode('utf-8')
|
|
573
|
+
elif location is not None:
|
|
574
|
+
raise NotImplementedError("URL fetching not implemented yet")
|
|
575
|
+
else:
|
|
576
|
+
raise ValueError("No input source provided")
|
|
577
|
+
|
|
578
|
+
# Determine format
|
|
579
|
+
if format is None:
|
|
580
|
+
format = 'turtle' # Default
|
|
581
|
+
|
|
582
|
+
format = format.lower()
|
|
583
|
+
if format in ('ttl', 'turtle', 'n3'):
|
|
584
|
+
self._parse_turtle(content)
|
|
585
|
+
elif format in ('nt', 'ntriples', 'n-triples'):
|
|
586
|
+
self._parse_ntriples(content)
|
|
587
|
+
elif format in ('xml', 'rdf/xml', 'rdfxml', 'application/rdf+xml'):
|
|
588
|
+
self._parse_rdfxml(content)
|
|
589
|
+
elif format in ('json-ld', 'jsonld'):
|
|
590
|
+
self._parse_jsonld(content)
|
|
591
|
+
else:
|
|
592
|
+
raise ValueError(f"Unknown format: {format}")
|
|
593
|
+
|
|
594
|
+
return self
|
|
595
|
+
|
|
596
|
+
def serialize(
|
|
597
|
+
self,
|
|
598
|
+
destination: Optional[Union[str, Path, IO]] = None,
|
|
599
|
+
format: str = "turtle",
|
|
600
|
+
base: Optional[str] = None,
|
|
601
|
+
encoding: Optional[str] = None,
|
|
602
|
+
**kwargs
|
|
603
|
+
) -> Optional[str]:
|
|
604
|
+
"""
|
|
605
|
+
Serialize the graph to RDF.
|
|
606
|
+
|
|
607
|
+
Args:
|
|
608
|
+
destination: File path or file-like object (None for string return)
|
|
609
|
+
format: Output format (turtle, xml, nt, json-ld)
|
|
610
|
+
base: Base URI
|
|
611
|
+
encoding: Character encoding
|
|
612
|
+
|
|
613
|
+
Returns:
|
|
614
|
+
Serialized string if destination is None
|
|
615
|
+
"""
|
|
616
|
+
format = format.lower()
|
|
617
|
+
|
|
618
|
+
if format in ('ttl', 'turtle', 'n3'):
|
|
619
|
+
content = self._serialize_turtle()
|
|
620
|
+
elif format in ('nt', 'ntriples', 'n-triples'):
|
|
621
|
+
content = self._serialize_ntriples()
|
|
622
|
+
elif format in ('xml', 'rdf/xml', 'rdfxml', 'pretty-xml'):
|
|
623
|
+
content = self._serialize_rdfxml()
|
|
624
|
+
elif format in ('json-ld', 'jsonld'):
|
|
625
|
+
content = self._serialize_jsonld()
|
|
626
|
+
else:
|
|
627
|
+
raise ValueError(f"Unknown format: {format}")
|
|
628
|
+
|
|
629
|
+
if destination is None:
|
|
630
|
+
return content
|
|
631
|
+
|
|
632
|
+
if isinstance(destination, (str, Path)):
|
|
633
|
+
Path(destination).write_text(content, encoding=encoding or 'utf-8')
|
|
634
|
+
else:
|
|
635
|
+
destination.write(content.encode(encoding or 'utf-8') if hasattr(destination, 'mode') and 'b' in destination.mode else content)
|
|
636
|
+
|
|
637
|
+
return None
|
|
638
|
+
|
|
639
|
+
def bind(self, prefix: str, namespace: Union[str, URIRef, Namespace], override: bool = True, replace: bool = False):
|
|
640
|
+
"""Bind a namespace prefix."""
|
|
641
|
+
self._namespace_manager.bind(prefix, namespace, override, replace)
|
|
642
|
+
return self
|
|
643
|
+
|
|
644
|
+
def namespaces(self) -> Iterator[Tuple[str, URIRef]]:
|
|
645
|
+
"""Iterate over bound namespace prefixes."""
|
|
646
|
+
return iter(self._namespace_manager.namespaces())
|
|
647
|
+
|
|
648
|
+
def query(self, query: str, initBindings=None, initNs=None, **kwargs):
|
|
649
|
+
"""
|
|
650
|
+
Execute a SPARQL query.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
query: SPARQL query string
|
|
654
|
+
initBindings: Initial variable bindings
|
|
655
|
+
initNs: Namespace prefix mappings
|
|
656
|
+
|
|
657
|
+
Returns:
|
|
658
|
+
Query results
|
|
659
|
+
"""
|
|
660
|
+
from rdf_starbase import execute_sparql
|
|
661
|
+
|
|
662
|
+
# Add namespace prefixes if provided
|
|
663
|
+
if initNs:
|
|
664
|
+
prefix_block = ""
|
|
665
|
+
for prefix, ns in initNs.items():
|
|
666
|
+
prefix_block += f"PREFIX {prefix}: <{ns}>\n"
|
|
667
|
+
query = prefix_block + query
|
|
668
|
+
|
|
669
|
+
result = execute_sparql(self._store, query)
|
|
670
|
+
return QueryResult(result, initBindings)
|
|
671
|
+
|
|
672
|
+
def update(self, update_query: str, initBindings=None, initNs=None, **kwargs):
|
|
673
|
+
"""Execute a SPARQL Update query."""
|
|
674
|
+
from rdf_starbase import execute_sparql
|
|
675
|
+
|
|
676
|
+
if initNs:
|
|
677
|
+
prefix_block = ""
|
|
678
|
+
for prefix, ns in initNs.items():
|
|
679
|
+
prefix_block += f"PREFIX {prefix}: <{ns}>\n"
|
|
680
|
+
update_query = prefix_block + update_query
|
|
681
|
+
|
|
682
|
+
return execute_sparql(self._store, update_query)
|
|
683
|
+
|
|
684
|
+
# =========================================================================
|
|
685
|
+
# Internal parsing methods
|
|
686
|
+
# =========================================================================
|
|
687
|
+
|
|
688
|
+
def _parse_turtle(self, content: str):
|
|
689
|
+
"""Parse Turtle content."""
|
|
690
|
+
from rdf_starbase.formats.turtle import parse_turtle
|
|
691
|
+
|
|
692
|
+
doc = parse_turtle(content)
|
|
693
|
+
|
|
694
|
+
# Extract columns from parsed triples
|
|
695
|
+
subjects = [t.subject for t in doc.triples]
|
|
696
|
+
predicates = [t.predicate for t in doc.triples]
|
|
697
|
+
objects = [t.object for t in doc.triples]
|
|
698
|
+
|
|
699
|
+
# Use columnar insert (much faster than one-by-one)
|
|
700
|
+
self._store.add_triples_columnar(
|
|
701
|
+
subjects=subjects,
|
|
702
|
+
predicates=predicates,
|
|
703
|
+
objects=objects,
|
|
704
|
+
source="turtle_parse",
|
|
705
|
+
confidence=1.0,
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
def _parse_ntriples(self, content: str):
|
|
709
|
+
"""Parse N-Triples content."""
|
|
710
|
+
from rdf_starbase.formats.ntriples import parse_ntriples
|
|
711
|
+
|
|
712
|
+
doc = parse_ntriples(content)
|
|
713
|
+
|
|
714
|
+
subjects = [t.subject for t in doc.triples]
|
|
715
|
+
predicates = [t.predicate for t in doc.triples]
|
|
716
|
+
objects = [t.object for t in doc.triples]
|
|
717
|
+
|
|
718
|
+
self._store.add_triples_columnar(
|
|
719
|
+
subjects=subjects,
|
|
720
|
+
predicates=predicates,
|
|
721
|
+
objects=objects,
|
|
722
|
+
source="ntriples_parse",
|
|
723
|
+
confidence=1.0,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
def _parse_rdfxml(self, content: str):
|
|
727
|
+
"""Parse RDF/XML content."""
|
|
728
|
+
from rdf_starbase.formats.rdfxml import parse_rdfxml
|
|
729
|
+
|
|
730
|
+
doc = parse_rdfxml(content)
|
|
731
|
+
|
|
732
|
+
subjects = [t.subject for t in doc.triples]
|
|
733
|
+
predicates = [t.predicate for t in doc.triples]
|
|
734
|
+
objects = [t.object for t in doc.triples]
|
|
735
|
+
|
|
736
|
+
self._store.add_triples_columnar(
|
|
737
|
+
subjects=subjects,
|
|
738
|
+
predicates=predicates,
|
|
739
|
+
objects=objects,
|
|
740
|
+
source="rdfxml_parse",
|
|
741
|
+
confidence=1.0,
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
def _parse_jsonld(self, content: str):
|
|
745
|
+
"""Parse JSON-LD content."""
|
|
746
|
+
from rdf_starbase.formats.jsonld import parse_jsonld
|
|
747
|
+
|
|
748
|
+
doc = parse_jsonld(content)
|
|
749
|
+
|
|
750
|
+
subjects = [t.subject for t in doc.triples]
|
|
751
|
+
predicates = [t.predicate for t in doc.triples]
|
|
752
|
+
objects = [t.object for t in doc.triples]
|
|
753
|
+
|
|
754
|
+
self._store.add_triples_columnar(
|
|
755
|
+
subjects=subjects,
|
|
756
|
+
predicates=predicates,
|
|
757
|
+
objects=objects,
|
|
758
|
+
source="jsonld_parse",
|
|
759
|
+
confidence=1.0,
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# =========================================================================
|
|
763
|
+
# Internal serialization methods
|
|
764
|
+
# =========================================================================
|
|
765
|
+
|
|
766
|
+
def _serialize_turtle(self) -> str:
|
|
767
|
+
"""Serialize to Turtle."""
|
|
768
|
+
lines = []
|
|
769
|
+
|
|
770
|
+
# Convert namespaces to dict and write prefix declarations
|
|
771
|
+
prefixes = {prefix: str(ns) for prefix, ns in self._namespace_manager.namespaces()}
|
|
772
|
+
for prefix, namespace in sorted(prefixes.items()):
|
|
773
|
+
lines.append(f"@prefix {prefix}: <{namespace}> .")
|
|
774
|
+
|
|
775
|
+
if prefixes:
|
|
776
|
+
lines.append("")
|
|
777
|
+
|
|
778
|
+
# Build reverse prefix lookup for compression
|
|
779
|
+
reverse_prefixes = {v: k for k, v in prefixes.items()}
|
|
780
|
+
|
|
781
|
+
def compress_uri(uri: str) -> str:
|
|
782
|
+
"""Try to compress URI with prefix."""
|
|
783
|
+
for ns, prefix in sorted(reverse_prefixes.items(), key=lambda x: -len(x[0])):
|
|
784
|
+
if uri.startswith(ns):
|
|
785
|
+
local = uri[len(ns):]
|
|
786
|
+
# Only use prefix if local part is valid
|
|
787
|
+
if local and local[0].isalpha() and all(c.isalnum() or c == '_' for c in local):
|
|
788
|
+
return f"{prefix}:{local}"
|
|
789
|
+
return f"<{uri}>"
|
|
790
|
+
|
|
791
|
+
def format_value(v) -> str:
|
|
792
|
+
"""Format a value as Turtle."""
|
|
793
|
+
if isinstance(v, str):
|
|
794
|
+
if v.startswith(('http://', 'https://', 'urn:')):
|
|
795
|
+
return compress_uri(v)
|
|
796
|
+
elif v.startswith('_:'):
|
|
797
|
+
return v
|
|
798
|
+
else:
|
|
799
|
+
# Escape and quote literal
|
|
800
|
+
escaped = v.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
|
|
801
|
+
return f'"{escaped}"'
|
|
802
|
+
else:
|
|
803
|
+
return f'"{v}"'
|
|
804
|
+
|
|
805
|
+
# Group by subject for prettier output
|
|
806
|
+
by_subject = {}
|
|
807
|
+
for row in self._store._df.iter_rows(named=True):
|
|
808
|
+
if row.get('deprecated', False):
|
|
809
|
+
continue
|
|
810
|
+
s = row['subject']
|
|
811
|
+
if s not in by_subject:
|
|
812
|
+
by_subject[s] = []
|
|
813
|
+
by_subject[s].append((row['predicate'], row['object']))
|
|
814
|
+
|
|
815
|
+
# Write grouped triples
|
|
816
|
+
for subject, po_list in by_subject.items():
|
|
817
|
+
s_str = compress_uri(subject) if subject.startswith(('http://', 'https://')) else subject
|
|
818
|
+
lines.append(f"{s_str}")
|
|
819
|
+
|
|
820
|
+
for i, (pred, obj) in enumerate(po_list):
|
|
821
|
+
p_str = compress_uri(pred)
|
|
822
|
+
o_str = format_value(obj)
|
|
823
|
+
sep = " ;" if i < len(po_list) - 1 else " ."
|
|
824
|
+
lines.append(f" {p_str} {o_str}{sep}")
|
|
825
|
+
|
|
826
|
+
lines.append("")
|
|
827
|
+
|
|
828
|
+
return '\n'.join(lines)
|
|
829
|
+
|
|
830
|
+
def _serialize_ntriples(self) -> str:
|
|
831
|
+
"""Serialize to N-Triples."""
|
|
832
|
+
lines = []
|
|
833
|
+
for row in self._store._df.iter_rows(named=True):
|
|
834
|
+
if row.get('deprecated', False):
|
|
835
|
+
continue
|
|
836
|
+
s = row['subject']
|
|
837
|
+
p = row['predicate']
|
|
838
|
+
o = row['object']
|
|
839
|
+
|
|
840
|
+
# Format subject
|
|
841
|
+
s_str = f"<{s}>" if not s.startswith('_:') else s
|
|
842
|
+
p_str = f"<{p}>"
|
|
843
|
+
|
|
844
|
+
# Format object
|
|
845
|
+
if isinstance(o, str) and (o.startswith('http://') or o.startswith('https://') or o.startswith('urn:')):
|
|
846
|
+
o_str = f"<{o}>"
|
|
847
|
+
elif isinstance(o, str) and o.startswith('_:'):
|
|
848
|
+
o_str = o
|
|
849
|
+
else:
|
|
850
|
+
o_str = f'"{o}"'
|
|
851
|
+
|
|
852
|
+
lines.append(f"{s_str} {p_str} {o_str} .")
|
|
853
|
+
|
|
854
|
+
return '\n'.join(lines)
|
|
855
|
+
|
|
856
|
+
def _serialize_rdfxml(self) -> str:
|
|
857
|
+
"""Serialize to RDF/XML."""
|
|
858
|
+
# Basic implementation
|
|
859
|
+
lines = [
|
|
860
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
861
|
+
'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">',
|
|
862
|
+
]
|
|
863
|
+
|
|
864
|
+
for row in self._store._df.iter_rows(named=True):
|
|
865
|
+
if row.get('deprecated', False):
|
|
866
|
+
continue
|
|
867
|
+
s = row['subject']
|
|
868
|
+
p = row['predicate']
|
|
869
|
+
o = row['object']
|
|
870
|
+
|
|
871
|
+
lines.append(f' <rdf:Description rdf:about="{s}">')
|
|
872
|
+
|
|
873
|
+
# Simple predicate handling
|
|
874
|
+
if isinstance(o, str) and o.startswith('http'):
|
|
875
|
+
lines.append(f' <{p} rdf:resource="{o}"/>')
|
|
876
|
+
else:
|
|
877
|
+
lines.append(f' <{p}>{o}</{p}>')
|
|
878
|
+
|
|
879
|
+
lines.append(' </rdf:Description>')
|
|
880
|
+
|
|
881
|
+
lines.append('</rdf:RDF>')
|
|
882
|
+
return '\n'.join(lines)
|
|
883
|
+
|
|
884
|
+
def _serialize_jsonld(self) -> str:
|
|
885
|
+
"""Serialize to JSON-LD."""
|
|
886
|
+
import json
|
|
887
|
+
|
|
888
|
+
# Group by subject
|
|
889
|
+
subjects = {}
|
|
890
|
+
for row in self._store._df.iter_rows(named=True):
|
|
891
|
+
if row.get('deprecated', False):
|
|
892
|
+
continue
|
|
893
|
+
s = row['subject']
|
|
894
|
+
p = row['predicate']
|
|
895
|
+
o = row['object']
|
|
896
|
+
|
|
897
|
+
if s not in subjects:
|
|
898
|
+
subjects[s] = {"@id": s}
|
|
899
|
+
|
|
900
|
+
if p == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type":
|
|
901
|
+
if "@type" not in subjects[s]:
|
|
902
|
+
subjects[s]["@type"] = []
|
|
903
|
+
subjects[s]["@type"].append(o)
|
|
904
|
+
else:
|
|
905
|
+
if p not in subjects[s]:
|
|
906
|
+
subjects[s][p] = []
|
|
907
|
+
subjects[s][p].append(o)
|
|
908
|
+
|
|
909
|
+
return json.dumps(list(subjects.values()), indent=2)
|
|
910
|
+
|
|
911
|
+
# =========================================================================
|
|
912
|
+
# Helper methods
|
|
913
|
+
# =========================================================================
|
|
914
|
+
|
|
915
|
+
def _guess_format(self, path: Path) -> str:
|
|
916
|
+
"""Guess format from file extension."""
|
|
917
|
+
suffix = path.suffix.lower()
|
|
918
|
+
return {
|
|
919
|
+
'.ttl': 'turtle',
|
|
920
|
+
'.turtle': 'turtle',
|
|
921
|
+
'.n3': 'n3',
|
|
922
|
+
'.nt': 'nt',
|
|
923
|
+
'.ntriples': 'nt',
|
|
924
|
+
'.rdf': 'xml',
|
|
925
|
+
'.xml': 'xml',
|
|
926
|
+
'.owl': 'xml',
|
|
927
|
+
'.jsonld': 'json-ld',
|
|
928
|
+
'.json': 'json-ld',
|
|
929
|
+
}.get(suffix, 'turtle')
|
|
930
|
+
|
|
931
|
+
def _term_to_value(self, term: Identifier) -> Any:
|
|
932
|
+
"""Convert an RDF term to a storage value."""
|
|
933
|
+
if isinstance(term, URIRef):
|
|
934
|
+
return str(term)
|
|
935
|
+
elif isinstance(term, Literal):
|
|
936
|
+
return term.toPython()
|
|
937
|
+
elif isinstance(term, BNode):
|
|
938
|
+
return f"_:{term._id}"
|
|
939
|
+
else:
|
|
940
|
+
return str(term)
|
|
941
|
+
|
|
942
|
+
def _value_to_term(self, value: Any, is_uri: bool = False) -> Identifier:
|
|
943
|
+
"""Convert a storage value to an RDF term."""
|
|
944
|
+
if isinstance(value, str):
|
|
945
|
+
if value.startswith('_:'):
|
|
946
|
+
return BNode(value[2:])
|
|
947
|
+
elif value.startswith(('http://', 'https://', 'urn:', 'file://')) or is_uri:
|
|
948
|
+
return URIRef(value)
|
|
949
|
+
else:
|
|
950
|
+
return Literal(value)
|
|
951
|
+
elif isinstance(value, (int, float, bool)):
|
|
952
|
+
return Literal(value)
|
|
953
|
+
else:
|
|
954
|
+
return Literal(str(value))
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
# =============================================================================
|
|
958
|
+
# Namespace Manager
|
|
959
|
+
# =============================================================================
|
|
960
|
+
|
|
961
|
+
class NamespaceManager:
|
|
962
|
+
"""Manages namespace prefix bindings for a graph."""
|
|
963
|
+
|
|
964
|
+
def __init__(self, graph: Optional[Graph] = None):
|
|
965
|
+
self._graph = graph
|
|
966
|
+
self._bindings: dict[str, URIRef] = {}
|
|
967
|
+
self._reverse: dict[str, str] = {}
|
|
968
|
+
|
|
969
|
+
# Default bindings
|
|
970
|
+
self.bind("rdf", RDF)
|
|
971
|
+
self.bind("rdfs", RDFS)
|
|
972
|
+
self.bind("owl", OWL)
|
|
973
|
+
self.bind("xsd", XSD)
|
|
974
|
+
|
|
975
|
+
def bind(
|
|
976
|
+
self,
|
|
977
|
+
prefix: str,
|
|
978
|
+
namespace: Union[str, URIRef, Namespace],
|
|
979
|
+
override: bool = True,
|
|
980
|
+
replace: bool = False
|
|
981
|
+
):
|
|
982
|
+
"""Bind a prefix to a namespace."""
|
|
983
|
+
ns = URIRef(namespace) if not isinstance(namespace, URIRef) else namespace
|
|
984
|
+
|
|
985
|
+
if not override and prefix in self._bindings:
|
|
986
|
+
return
|
|
987
|
+
|
|
988
|
+
if replace:
|
|
989
|
+
# Remove old binding for this namespace
|
|
990
|
+
old_prefix = self._reverse.get(str(ns))
|
|
991
|
+
if old_prefix:
|
|
992
|
+
del self._bindings[old_prefix]
|
|
993
|
+
|
|
994
|
+
self._bindings[prefix] = ns
|
|
995
|
+
self._reverse[str(ns)] = prefix
|
|
996
|
+
|
|
997
|
+
def namespaces(self) -> Iterator[Tuple[str, URIRef]]:
|
|
998
|
+
"""Iterate over (prefix, namespace) pairs."""
|
|
999
|
+
return iter(self._bindings.items())
|
|
1000
|
+
|
|
1001
|
+
def compute_qname(self, uri: str, generate: bool = True) -> Tuple[str, str, str]:
|
|
1002
|
+
"""Compute a qname (prefix, namespace, local) for a URI."""
|
|
1003
|
+
for prefix, ns in sorted(self._bindings.items(), key=lambda x: -len(x[1])):
|
|
1004
|
+
ns_str = str(ns)
|
|
1005
|
+
if uri.startswith(ns_str):
|
|
1006
|
+
local = uri[len(ns_str):]
|
|
1007
|
+
return (prefix, ns_str, local)
|
|
1008
|
+
|
|
1009
|
+
# Try to generate a prefix
|
|
1010
|
+
if generate:
|
|
1011
|
+
# Split URI into namespace and local
|
|
1012
|
+
for sep in ('#', '/', ':'):
|
|
1013
|
+
if sep in uri:
|
|
1014
|
+
idx = uri.rfind(sep)
|
|
1015
|
+
ns = uri[:idx + 1]
|
|
1016
|
+
local = uri[idx + 1:]
|
|
1017
|
+
if ns in self._reverse:
|
|
1018
|
+
return (self._reverse[ns], ns, local)
|
|
1019
|
+
# Generate new prefix
|
|
1020
|
+
prefix = f"ns{len(self._bindings)}"
|
|
1021
|
+
self.bind(prefix, ns)
|
|
1022
|
+
return (prefix, ns, local)
|
|
1023
|
+
|
|
1024
|
+
raise ValueError(f"Cannot compute qname for {uri}")
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
# =============================================================================
|
|
1028
|
+
# Query Results
|
|
1029
|
+
# =============================================================================
|
|
1030
|
+
|
|
1031
|
+
class QueryResult:
|
|
1032
|
+
"""Wrapper for SPARQL query results."""
|
|
1033
|
+
|
|
1034
|
+
def __init__(self, result, bindings=None):
|
|
1035
|
+
self._result = result
|
|
1036
|
+
self._bindings = bindings or {}
|
|
1037
|
+
|
|
1038
|
+
def __iter__(self):
|
|
1039
|
+
"""Iterate over result rows."""
|
|
1040
|
+
import polars as pl
|
|
1041
|
+
if isinstance(self._result, pl.DataFrame):
|
|
1042
|
+
for row in self._result.iter_rows(named=True):
|
|
1043
|
+
yield QueryRow(row)
|
|
1044
|
+
elif isinstance(self._result, bool):
|
|
1045
|
+
yield self._result
|
|
1046
|
+
else:
|
|
1047
|
+
yield self._result
|
|
1048
|
+
|
|
1049
|
+
def __bool__(self):
|
|
1050
|
+
"""For ASK queries."""
|
|
1051
|
+
if isinstance(self._result, bool):
|
|
1052
|
+
return self._result
|
|
1053
|
+
return len(self._result) > 0
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
class QueryRow:
|
|
1057
|
+
"""A single result row from a SPARQL query."""
|
|
1058
|
+
|
|
1059
|
+
def __init__(self, row: dict):
|
|
1060
|
+
self._row = row
|
|
1061
|
+
self._keys = list(row.keys())
|
|
1062
|
+
|
|
1063
|
+
def __getitem__(self, key):
|
|
1064
|
+
# Support both string keys and integer indices
|
|
1065
|
+
if isinstance(key, int):
|
|
1066
|
+
if 0 <= key < len(self._keys):
|
|
1067
|
+
key = self._keys[key]
|
|
1068
|
+
else:
|
|
1069
|
+
raise IndexError(f"Row index out of range: {key}")
|
|
1070
|
+
|
|
1071
|
+
value = self._row.get(key)
|
|
1072
|
+
if value is None:
|
|
1073
|
+
return None
|
|
1074
|
+
if isinstance(value, str):
|
|
1075
|
+
if value.startswith(('http://', 'https://', 'urn:')):
|
|
1076
|
+
return URIRef(value)
|
|
1077
|
+
elif value.startswith('_:'):
|
|
1078
|
+
return BNode(value[2:])
|
|
1079
|
+
return Literal(value)
|
|
1080
|
+
return Literal(value)
|
|
1081
|
+
|
|
1082
|
+
def __iter__(self):
|
|
1083
|
+
return iter(self._row.values())
|
|
1084
|
+
|
|
1085
|
+
def asdict(self):
|
|
1086
|
+
return dict(self._row)
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
# =============================================================================
|
|
1090
|
+
# Exports
|
|
1091
|
+
# =============================================================================
|
|
1092
|
+
|
|
1093
|
+
__all__ = [
|
|
1094
|
+
# Term classes
|
|
1095
|
+
'URIRef', 'Literal', 'BNode', 'Identifier',
|
|
1096
|
+
# Namespace
|
|
1097
|
+
'Namespace', 'ClosedNamespace', 'NamespaceManager',
|
|
1098
|
+
# Well-known namespaces
|
|
1099
|
+
'RDF', 'RDFS', 'OWL', 'XSD', 'FOAF', 'DC', 'DCTERMS', 'SKOS', 'PROV',
|
|
1100
|
+
# Graph
|
|
1101
|
+
'Graph',
|
|
1102
|
+
# Query
|
|
1103
|
+
'QueryResult', 'QueryRow',
|
|
1104
|
+
]
|