rdf-starbase 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_starbase/__init__.py +57 -0
- rdf_starbase/ai_grounding.py +728 -0
- rdf_starbase/compat/__init__.py +26 -0
- rdf_starbase/compat/rdflib.py +1104 -0
- rdf_starbase/formats/__init__.py +29 -0
- rdf_starbase/formats/jsonld.py +488 -0
- rdf_starbase/formats/ntriples.py +419 -0
- rdf_starbase/formats/rdfxml.py +434 -0
- rdf_starbase/formats/turtle.py +882 -0
- rdf_starbase/models.py +92 -0
- rdf_starbase/registry.py +540 -0
- rdf_starbase/repositories.py +407 -0
- rdf_starbase/repository_api.py +739 -0
- rdf_starbase/sparql/__init__.py +35 -0
- rdf_starbase/sparql/ast.py +910 -0
- rdf_starbase/sparql/executor.py +1925 -0
- rdf_starbase/sparql/parser.py +1716 -0
- rdf_starbase/storage/__init__.py +44 -0
- rdf_starbase/storage/executor.py +1914 -0
- rdf_starbase/storage/facts.py +850 -0
- rdf_starbase/storage/lsm.py +531 -0
- rdf_starbase/storage/persistence.py +338 -0
- rdf_starbase/storage/quoted_triples.py +292 -0
- rdf_starbase/storage/reasoner.py +1035 -0
- rdf_starbase/storage/terms.py +628 -0
- rdf_starbase/store.py +1049 -0
- rdf_starbase/store_legacy.py +748 -0
- rdf_starbase/web.py +568 -0
- rdf_starbase-0.1.0.dist-info/METADATA +706 -0
- rdf_starbase-0.1.0.dist-info/RECORD +31 -0
- rdf_starbase-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,910 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract Syntax Tree (AST) nodes for SPARQL-Star queries.
|
|
3
|
+
|
|
4
|
+
These classes represent the parsed structure of a SPARQL-Star query,
|
|
5
|
+
enabling type-safe query manipulation and execution.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Union, Optional, Any
|
|
10
|
+
from enum import Enum, auto
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ComparisonOp(Enum):
|
|
15
|
+
"""Comparison operators for FILTER expressions."""
|
|
16
|
+
EQ = auto() # =
|
|
17
|
+
NE = auto() # !=
|
|
18
|
+
LT = auto() # <
|
|
19
|
+
LE = auto() # <=
|
|
20
|
+
GT = auto() # >
|
|
21
|
+
GE = auto() # >=
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def from_str(cls, op: str) -> "ComparisonOp":
|
|
25
|
+
mapping = {
|
|
26
|
+
"=": cls.EQ, "==": cls.EQ,
|
|
27
|
+
"!=": cls.NE, "<>": cls.NE,
|
|
28
|
+
"<": cls.LT, "<=": cls.LE,
|
|
29
|
+
">": cls.GT, ">=": cls.GE,
|
|
30
|
+
}
|
|
31
|
+
return mapping[op]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class LogicalOp(Enum):
|
|
35
|
+
"""Logical operators for combining FILTER expressions."""
|
|
36
|
+
AND = auto()
|
|
37
|
+
OR = auto()
|
|
38
|
+
NOT = auto()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# =============================================================================
|
|
42
|
+
# Term Types (subjects, predicates, objects)
|
|
43
|
+
# =============================================================================
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class Variable:
|
|
47
|
+
"""
|
|
48
|
+
A SPARQL variable (e.g., ?name, $person).
|
|
49
|
+
|
|
50
|
+
Variables are bound during query execution to values from matching triples.
|
|
51
|
+
"""
|
|
52
|
+
name: str
|
|
53
|
+
|
|
54
|
+
def __str__(self) -> str:
|
|
55
|
+
return f"?{self.name}"
|
|
56
|
+
|
|
57
|
+
def __hash__(self) -> int:
|
|
58
|
+
return hash(self.name)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass(frozen=True)
|
|
62
|
+
class IRI:
|
|
63
|
+
"""
|
|
64
|
+
An Internationalized Resource Identifier.
|
|
65
|
+
|
|
66
|
+
Can be a full IRI (<http://...>) or a prefixed name (foaf:name).
|
|
67
|
+
"""
|
|
68
|
+
value: str
|
|
69
|
+
|
|
70
|
+
def __str__(self) -> str:
|
|
71
|
+
return f"<{self.value}>"
|
|
72
|
+
|
|
73
|
+
def __hash__(self) -> int:
|
|
74
|
+
return hash(self.value)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(frozen=True)
|
|
78
|
+
class Literal:
|
|
79
|
+
"""
|
|
80
|
+
An RDF Literal value.
|
|
81
|
+
|
|
82
|
+
Can have an optional language tag (@en) or datatype (^^xsd:integer).
|
|
83
|
+
"""
|
|
84
|
+
value: Any
|
|
85
|
+
language: Optional[str] = None
|
|
86
|
+
datatype: Optional[str] = None
|
|
87
|
+
|
|
88
|
+
def __str__(self) -> str:
|
|
89
|
+
base = f'"{self.value}"'
|
|
90
|
+
if self.language:
|
|
91
|
+
return f"{base}@{self.language}"
|
|
92
|
+
if self.datatype:
|
|
93
|
+
return f"{base}^^<{self.datatype}>"
|
|
94
|
+
return base
|
|
95
|
+
|
|
96
|
+
def __hash__(self) -> int:
|
|
97
|
+
return hash((self.value, self.language, self.datatype))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass(frozen=True)
|
|
101
|
+
class BlankNode:
|
|
102
|
+
"""A blank node (anonymous resource)."""
|
|
103
|
+
label: str
|
|
104
|
+
|
|
105
|
+
def __str__(self) -> str:
|
|
106
|
+
return f"_:{self.label}"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# =============================================================================
|
|
110
|
+
# Property Paths (SPARQL 1.1)
|
|
111
|
+
# =============================================================================
|
|
112
|
+
|
|
113
|
+
class PropertyPathModifier(Enum):
|
|
114
|
+
"""Modifiers for property path repetition."""
|
|
115
|
+
ZERO_OR_MORE = auto() # *
|
|
116
|
+
ONE_OR_MORE = auto() # +
|
|
117
|
+
ZERO_OR_ONE = auto() # ?
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass(frozen=True)
|
|
121
|
+
class PropertyPath:
|
|
122
|
+
"""
|
|
123
|
+
Base class for property path expressions.
|
|
124
|
+
|
|
125
|
+
Property paths allow navigation through RDF graphs:
|
|
126
|
+
- foaf:knows/foaf:name (sequence)
|
|
127
|
+
- foaf:knows|foaf:friend (alternative)
|
|
128
|
+
- foaf:knows* (zero or more)
|
|
129
|
+
- foaf:knows+ (one or more)
|
|
130
|
+
- foaf:knows? (zero or one)
|
|
131
|
+
- ^foaf:knows (inverse)
|
|
132
|
+
- !(foaf:knows|foaf:hates) (negated property set)
|
|
133
|
+
"""
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass(frozen=True)
|
|
138
|
+
class PathIRI(PropertyPath):
|
|
139
|
+
"""A simple IRI in a property path."""
|
|
140
|
+
iri: IRI
|
|
141
|
+
|
|
142
|
+
def __str__(self) -> str:
|
|
143
|
+
return str(self.iri)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass(frozen=True)
|
|
147
|
+
class PathSequence(PropertyPath):
|
|
148
|
+
"""A sequence of property paths (path1/path2/...)."""
|
|
149
|
+
paths: tuple[PropertyPath, ...]
|
|
150
|
+
|
|
151
|
+
def __str__(self) -> str:
|
|
152
|
+
return "/".join(str(p) for p in self.paths)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass(frozen=True)
|
|
156
|
+
class PathAlternative(PropertyPath):
|
|
157
|
+
"""An alternative of property paths (path1|path2|...)."""
|
|
158
|
+
paths: tuple[PropertyPath, ...]
|
|
159
|
+
|
|
160
|
+
def __str__(self) -> str:
|
|
161
|
+
return "|".join(str(p) for p in self.paths)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@dataclass(frozen=True)
|
|
165
|
+
class PathInverse(PropertyPath):
|
|
166
|
+
"""An inverse property path (^path)."""
|
|
167
|
+
path: PropertyPath
|
|
168
|
+
|
|
169
|
+
def __str__(self) -> str:
|
|
170
|
+
return f"^{self.path}"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@dataclass(frozen=True)
|
|
174
|
+
class PathMod(PropertyPath):
|
|
175
|
+
"""A modified property path (path*, path+, path?)."""
|
|
176
|
+
path: PropertyPath
|
|
177
|
+
modifier: PropertyPathModifier
|
|
178
|
+
|
|
179
|
+
def __str__(self) -> str:
|
|
180
|
+
mod = {
|
|
181
|
+
PropertyPathModifier.ZERO_OR_MORE: "*",
|
|
182
|
+
PropertyPathModifier.ONE_OR_MORE: "+",
|
|
183
|
+
PropertyPathModifier.ZERO_OR_ONE: "?",
|
|
184
|
+
}[self.modifier]
|
|
185
|
+
return f"{self.path}{mod}"
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@dataclass(frozen=True)
|
|
189
|
+
class PathNegatedPropertySet(PropertyPath):
|
|
190
|
+
"""A negated property set (!(iri1|iri2|...))."""
|
|
191
|
+
iris: tuple[IRI, ...]
|
|
192
|
+
|
|
193
|
+
def __str__(self) -> str:
|
|
194
|
+
inner = "|".join(str(i) for i in self.iris)
|
|
195
|
+
return f"!({inner})"
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# Type alias for predicate which can be an IRI, Variable, or PropertyPath
|
|
199
|
+
PredicatePath = Union[IRI, Variable, PropertyPath]
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# Type alias for any term that can appear in a triple pattern
|
|
203
|
+
Term = Union[Variable, IRI, Literal, BlankNode, "QuotedTriplePattern"]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# =============================================================================
|
|
207
|
+
# Triple Patterns
|
|
208
|
+
# =============================================================================
|
|
209
|
+
|
|
210
|
+
@dataclass(frozen=True)
|
|
211
|
+
class TriplePattern:
|
|
212
|
+
"""
|
|
213
|
+
A basic graph pattern matching triples in the store.
|
|
214
|
+
|
|
215
|
+
Each position can be a variable (for matching) or a concrete term (for filtering).
|
|
216
|
+
The predicate can also be a PropertyPath for path expressions.
|
|
217
|
+
"""
|
|
218
|
+
subject: Term
|
|
219
|
+
predicate: Union[Term, PropertyPath] # Can be a property path
|
|
220
|
+
object: Term
|
|
221
|
+
|
|
222
|
+
def __str__(self) -> str:
|
|
223
|
+
return f"{self.subject} {self.predicate} {self.object} ."
|
|
224
|
+
|
|
225
|
+
def get_variables(self) -> set[Variable]:
|
|
226
|
+
"""Return all variables in this pattern."""
|
|
227
|
+
vars = set()
|
|
228
|
+
for term in (self.subject, self.predicate, self.object):
|
|
229
|
+
if isinstance(term, Variable):
|
|
230
|
+
vars.add(term)
|
|
231
|
+
elif isinstance(term, QuotedTriplePattern):
|
|
232
|
+
vars.update(term.get_variables())
|
|
233
|
+
return vars
|
|
234
|
+
|
|
235
|
+
def has_property_path(self) -> bool:
|
|
236
|
+
"""Check if predicate is a property path."""
|
|
237
|
+
return isinstance(self.predicate, PropertyPath)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@dataclass(frozen=True)
|
|
241
|
+
class QuotedTriplePattern:
|
|
242
|
+
"""
|
|
243
|
+
An RDF-Star quoted triple pattern (<< s p o >>).
|
|
244
|
+
|
|
245
|
+
This is the key innovation of SPARQL-Star - allows matching and
|
|
246
|
+
querying statements about statements.
|
|
247
|
+
"""
|
|
248
|
+
subject: Term
|
|
249
|
+
predicate: Term
|
|
250
|
+
object: Term
|
|
251
|
+
|
|
252
|
+
def __str__(self) -> str:
|
|
253
|
+
return f"<< {self.subject} {self.predicate} {self.object} >>"
|
|
254
|
+
|
|
255
|
+
def get_variables(self) -> set[Variable]:
|
|
256
|
+
"""Return all variables in this quoted pattern."""
|
|
257
|
+
vars = set()
|
|
258
|
+
for term in (self.subject, self.predicate, self.object):
|
|
259
|
+
if isinstance(term, Variable):
|
|
260
|
+
vars.add(term)
|
|
261
|
+
elif isinstance(term, QuotedTriplePattern):
|
|
262
|
+
vars.update(term.get_variables())
|
|
263
|
+
return vars
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# =============================================================================
|
|
267
|
+
# Filter Expressions
|
|
268
|
+
# =============================================================================
|
|
269
|
+
|
|
270
|
+
@dataclass
|
|
271
|
+
class Comparison:
|
|
272
|
+
"""A comparison expression (e.g., ?age > 30)."""
|
|
273
|
+
left: Union[Variable, Literal, IRI, "FunctionCall"]
|
|
274
|
+
operator: ComparisonOp
|
|
275
|
+
right: Union[Variable, Literal, IRI, "FunctionCall"]
|
|
276
|
+
|
|
277
|
+
def __str__(self) -> str:
|
|
278
|
+
op_str = {
|
|
279
|
+
ComparisonOp.EQ: "=", ComparisonOp.NE: "!=",
|
|
280
|
+
ComparisonOp.LT: "<", ComparisonOp.LE: "<=",
|
|
281
|
+
ComparisonOp.GT: ">", ComparisonOp.GE: ">=",
|
|
282
|
+
}[self.operator]
|
|
283
|
+
return f"{self.left} {op_str} {self.right}"
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@dataclass
|
|
287
|
+
class LogicalExpression:
|
|
288
|
+
"""A logical combination of expressions (AND, OR, NOT)."""
|
|
289
|
+
operator: LogicalOp
|
|
290
|
+
operands: list[Union["Comparison", "LogicalExpression", "FunctionCall"]]
|
|
291
|
+
|
|
292
|
+
def __str__(self) -> str:
|
|
293
|
+
if self.operator == LogicalOp.NOT:
|
|
294
|
+
return f"!({self.operands[0]})"
|
|
295
|
+
op_str = " && " if self.operator == LogicalOp.AND else " || "
|
|
296
|
+
return f"({op_str.join(str(o) for o in self.operands)})"
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@dataclass
|
|
300
|
+
class FunctionCall:
|
|
301
|
+
"""A SPARQL function call (e.g., BOUND(?x), STR(?y))."""
|
|
302
|
+
name: str
|
|
303
|
+
arguments: list[Union[Variable, Literal, IRI, "FunctionCall"]]
|
|
304
|
+
|
|
305
|
+
def __str__(self) -> str:
|
|
306
|
+
args = ", ".join(str(a) for a in self.arguments)
|
|
307
|
+
return f"{self.name}({args})"
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
@dataclass
|
|
311
|
+
class AggregateExpression:
|
|
312
|
+
"""
|
|
313
|
+
An aggregate function call (COUNT, SUM, AVG, MIN, MAX, GROUP_CONCAT, SAMPLE).
|
|
314
|
+
|
|
315
|
+
Examples:
|
|
316
|
+
COUNT(?x)
|
|
317
|
+
COUNT(DISTINCT ?x)
|
|
318
|
+
SUM(?price)
|
|
319
|
+
GROUP_CONCAT(?name; separator=", ")
|
|
320
|
+
"""
|
|
321
|
+
function: str # COUNT, SUM, AVG, MIN, MAX, GROUP_CONCAT, SAMPLE
|
|
322
|
+
argument: Optional[Union[Variable, Literal, IRI, "FunctionCall"]] # None for COUNT(*)
|
|
323
|
+
distinct: bool = False
|
|
324
|
+
separator: Optional[str] = None # For GROUP_CONCAT
|
|
325
|
+
alias: Optional[Variable] = None # AS ?varname
|
|
326
|
+
|
|
327
|
+
def __str__(self) -> str:
|
|
328
|
+
if self.argument is None:
|
|
329
|
+
arg_str = "*"
|
|
330
|
+
elif self.distinct:
|
|
331
|
+
arg_str = f"DISTINCT {self.argument}"
|
|
332
|
+
else:
|
|
333
|
+
arg_str = str(self.argument)
|
|
334
|
+
|
|
335
|
+
if self.separator and self.function == "GROUP_CONCAT":
|
|
336
|
+
result = f'{self.function}({arg_str}; separator="{self.separator}")'
|
|
337
|
+
else:
|
|
338
|
+
result = f"{self.function}({arg_str})"
|
|
339
|
+
|
|
340
|
+
if self.alias:
|
|
341
|
+
result = f"({result} AS {self.alias})"
|
|
342
|
+
return result
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# Type alias for SELECT expressions (can be variables or aggregates)
|
|
346
|
+
SelectExpression = Union[Variable, AggregateExpression]
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
@dataclass
|
|
350
|
+
class Filter:
|
|
351
|
+
"""A FILTER clause constraining query results."""
|
|
352
|
+
expression: Union[Comparison, LogicalExpression, FunctionCall]
|
|
353
|
+
|
|
354
|
+
def __str__(self) -> str:
|
|
355
|
+
return f"FILTER({self.expression})"
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
@dataclass
|
|
359
|
+
class ProvenanceFilter:
|
|
360
|
+
"""
|
|
361
|
+
A FILTER clause for provenance-specific filtering.
|
|
362
|
+
|
|
363
|
+
Used internally to optimize queries like:
|
|
364
|
+
FILTER(?confidence > 0.8)
|
|
365
|
+
when ?confidence is bound to a provenance column.
|
|
366
|
+
"""
|
|
367
|
+
expression: Union[Comparison, LogicalExpression, FunctionCall]
|
|
368
|
+
provenance_field: str # e.g., "confidence", "source", "timestamp"
|
|
369
|
+
|
|
370
|
+
def __str__(self) -> str:
|
|
371
|
+
return f"FILTER_PROVENANCE({self.provenance_field}, {self.expression})"
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
@dataclass
|
|
375
|
+
class Bind:
|
|
376
|
+
"""
|
|
377
|
+
A BIND clause assigning an expression to a variable.
|
|
378
|
+
|
|
379
|
+
BIND(?price * ?quantity AS ?total)
|
|
380
|
+
BIND("default" AS ?value)
|
|
381
|
+
"""
|
|
382
|
+
expression: Union[Variable, Literal, IRI, Comparison, FunctionCall]
|
|
383
|
+
variable: Variable
|
|
384
|
+
|
|
385
|
+
def __str__(self) -> str:
|
|
386
|
+
return f"BIND({self.expression} AS {self.variable})"
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
@dataclass
|
|
390
|
+
class ValuesClause:
|
|
391
|
+
"""
|
|
392
|
+
A VALUES clause providing inline data.
|
|
393
|
+
|
|
394
|
+
VALUES ?x { 1 2 3 }
|
|
395
|
+
VALUES (?x ?y) { (1 2) (3 4) }
|
|
396
|
+
"""
|
|
397
|
+
variables: list[Variable]
|
|
398
|
+
bindings: list[list[Union[Literal, IRI, None]]] # None for UNDEF
|
|
399
|
+
|
|
400
|
+
def __str__(self) -> str:
|
|
401
|
+
if len(self.variables) == 1:
|
|
402
|
+
vals = " ".join(str(b[0]) if b[0] else "UNDEF" for b in self.bindings)
|
|
403
|
+
return f"VALUES {self.variables[0]} {{ {vals} }}"
|
|
404
|
+
else:
|
|
405
|
+
vars_str = " ".join(str(v) for v in self.variables)
|
|
406
|
+
rows = []
|
|
407
|
+
for row in self.bindings:
|
|
408
|
+
row_vals = " ".join(str(v) if v else "UNDEF" for v in row)
|
|
409
|
+
rows.append(f"({row_vals})")
|
|
410
|
+
return f"VALUES ({vars_str}) {{ {' '.join(rows)} }}"
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
# =============================================================================
|
|
414
|
+
# Graph Patterns (OPTIONAL, UNION, etc.)
|
|
415
|
+
# =============================================================================
|
|
416
|
+
|
|
417
|
+
@dataclass
|
|
418
|
+
class OptionalPattern:
|
|
419
|
+
"""
|
|
420
|
+
An OPTIONAL graph pattern.
|
|
421
|
+
|
|
422
|
+
OPTIONAL { ?s ?p ?o }
|
|
423
|
+
|
|
424
|
+
Results include rows even if the optional pattern doesn't match.
|
|
425
|
+
"""
|
|
426
|
+
patterns: list[Union[TriplePattern, QuotedTriplePattern, "OptionalPattern", "UnionPattern"]] = field(default_factory=list)
|
|
427
|
+
filters: list[Filter] = field(default_factory=list)
|
|
428
|
+
|
|
429
|
+
def __str__(self) -> str:
|
|
430
|
+
inner = " ".join(str(p) for p in self.patterns)
|
|
431
|
+
return f"OPTIONAL {{ {inner} }}"
|
|
432
|
+
|
|
433
|
+
def get_variables(self) -> set[Variable]:
|
|
434
|
+
vars = set()
|
|
435
|
+
for pattern in self.patterns:
|
|
436
|
+
vars.update(pattern.get_variables())
|
|
437
|
+
return vars
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
@dataclass
|
|
441
|
+
class UnionPattern:
|
|
442
|
+
"""
|
|
443
|
+
A UNION of graph patterns.
|
|
444
|
+
|
|
445
|
+
{ ?s ?p ?o } UNION { ?s ?q ?r }
|
|
446
|
+
"""
|
|
447
|
+
alternatives: list["GraphPattern"] = field(default_factory=list)
|
|
448
|
+
|
|
449
|
+
def __str__(self) -> str:
|
|
450
|
+
parts = [f"{{ {' '.join(str(p) for p in alt)} }}" for alt in self.alternatives]
|
|
451
|
+
return " UNION ".join(parts)
|
|
452
|
+
|
|
453
|
+
def get_variables(self) -> set[Variable]:
|
|
454
|
+
vars = set()
|
|
455
|
+
for alt in self.alternatives:
|
|
456
|
+
for pattern in alt:
|
|
457
|
+
if hasattr(pattern, 'get_variables'):
|
|
458
|
+
vars.update(pattern.get_variables())
|
|
459
|
+
return vars
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
@dataclass
|
|
463
|
+
class GraphPattern:
|
|
464
|
+
"""A named graph pattern: GRAPH <uri> { ... }"""
|
|
465
|
+
graph: Union[Variable, IRI]
|
|
466
|
+
patterns: list[Union[TriplePattern, QuotedTriplePattern]] = field(default_factory=list)
|
|
467
|
+
|
|
468
|
+
def get_variables(self) -> set[Variable]:
|
|
469
|
+
vars = set()
|
|
470
|
+
if isinstance(self.graph, Variable):
|
|
471
|
+
vars.add(self.graph)
|
|
472
|
+
for pattern in self.patterns:
|
|
473
|
+
vars.update(pattern.get_variables())
|
|
474
|
+
return vars
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
@dataclass
|
|
478
|
+
class MinusPattern:
|
|
479
|
+
"""
|
|
480
|
+
A MINUS graph pattern for set difference.
|
|
481
|
+
|
|
482
|
+
MINUS { ?s ?p ?o }
|
|
483
|
+
|
|
484
|
+
Removes solutions where the MINUS pattern matches.
|
|
485
|
+
"""
|
|
486
|
+
patterns: list[Union[TriplePattern, QuotedTriplePattern, "OptionalPattern", "UnionPattern"]] = field(default_factory=list)
|
|
487
|
+
filters: list[Filter] = field(default_factory=list)
|
|
488
|
+
|
|
489
|
+
def __str__(self) -> str:
|
|
490
|
+
inner = " ".join(str(p) for p in self.patterns)
|
|
491
|
+
return f"MINUS {{ {inner} }}"
|
|
492
|
+
|
|
493
|
+
def get_variables(self) -> set[Variable]:
|
|
494
|
+
vars = set()
|
|
495
|
+
for pattern in self.patterns:
|
|
496
|
+
vars.update(pattern.get_variables())
|
|
497
|
+
return vars
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
# Type alias for patterns that can appear in WHERE
|
|
501
|
+
WherePattern = Union[TriplePattern, QuotedTriplePattern, OptionalPattern, UnionPattern, GraphPattern, Bind, ValuesClause, MinusPattern]
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
# =============================================================================
|
|
505
|
+
# Query Structure
|
|
506
|
+
# =============================================================================
|
|
507
|
+
|
|
508
|
+
@dataclass
|
|
509
|
+
class WhereClause:
|
|
510
|
+
"""The WHERE clause containing graph patterns and filters."""
|
|
511
|
+
patterns: list[WherePattern] = field(default_factory=list)
|
|
512
|
+
filters: list[Filter] = field(default_factory=list)
|
|
513
|
+
optional_patterns: list[OptionalPattern] = field(default_factory=list)
|
|
514
|
+
union_patterns: list[UnionPattern] = field(default_factory=list)
|
|
515
|
+
minus_patterns: list[MinusPattern] = field(default_factory=list)
|
|
516
|
+
binds: list[Bind] = field(default_factory=list)
|
|
517
|
+
values: Optional[ValuesClause] = None
|
|
518
|
+
graph_patterns: list[GraphPattern] = field(default_factory=list)
|
|
519
|
+
|
|
520
|
+
def get_all_variables(self) -> set[Variable]:
|
|
521
|
+
"""Return all variables used in this WHERE clause."""
|
|
522
|
+
vars = set()
|
|
523
|
+
for pattern in self.patterns:
|
|
524
|
+
if hasattr(pattern, 'get_variables'):
|
|
525
|
+
vars.update(pattern.get_variables())
|
|
526
|
+
for opt in self.optional_patterns:
|
|
527
|
+
vars.update(opt.get_variables())
|
|
528
|
+
for union in self.union_patterns:
|
|
529
|
+
vars.update(union.get_variables())
|
|
530
|
+
for minus in self.minus_patterns:
|
|
531
|
+
vars.update(minus.get_variables())
|
|
532
|
+
for graph in self.graph_patterns:
|
|
533
|
+
vars.update(graph.get_variables())
|
|
534
|
+
return vars
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
@dataclass
|
|
538
|
+
class Query:
|
|
539
|
+
"""Base class for all SPARQL query types."""
|
|
540
|
+
prefixes: dict[str, str] = field(default_factory=dict)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
@dataclass
|
|
544
|
+
class SelectQuery(Query):
|
|
545
|
+
"""
|
|
546
|
+
A SELECT query returning variable bindings.
|
|
547
|
+
|
|
548
|
+
SELECT ?s ?p ?o
|
|
549
|
+
WHERE { ?s ?p ?o }
|
|
550
|
+
|
|
551
|
+
SELECT ?s ?p ?o
|
|
552
|
+
FROM <http://example.org/graph1>
|
|
553
|
+
WHERE { ?s ?p ?o }
|
|
554
|
+
|
|
555
|
+
SELECT ?s (COUNT(?p) AS ?count)
|
|
556
|
+
WHERE { ?s ?p ?o }
|
|
557
|
+
GROUP BY ?s
|
|
558
|
+
HAVING (COUNT(?p) > 5)
|
|
559
|
+
|
|
560
|
+
Time-travel query:
|
|
561
|
+
SELECT ?s ?p ?o
|
|
562
|
+
WHERE { ?s ?p ?o }
|
|
563
|
+
AS OF "2025-01-15T00:00:00Z"
|
|
564
|
+
"""
|
|
565
|
+
variables: list[SelectExpression] = field(default_factory=list) # Empty list means SELECT *
|
|
566
|
+
where: WhereClause = field(default_factory=WhereClause)
|
|
567
|
+
distinct: bool = False
|
|
568
|
+
limit: Optional[int] = None
|
|
569
|
+
offset: Optional[int] = None
|
|
570
|
+
order_by: list[tuple[Variable, bool]] = field(default_factory=list) # (var, ascending)
|
|
571
|
+
group_by: list[Variable] = field(default_factory=list)
|
|
572
|
+
having: Optional[Union[Comparison, LogicalExpression, FunctionCall]] = None
|
|
573
|
+
as_of: Optional[datetime] = None # Time-travel: query as of this timestamp
|
|
574
|
+
from_graphs: list[IRI] = field(default_factory=list) # FROM <graph> clauses
|
|
575
|
+
from_named_graphs: list[IRI] = field(default_factory=list) # FROM NAMED <graph> clauses
|
|
576
|
+
|
|
577
|
+
def is_select_all(self) -> bool:
|
|
578
|
+
"""Check if this is a SELECT * query."""
|
|
579
|
+
return len(self.variables) == 0
|
|
580
|
+
|
|
581
|
+
def has_aggregates(self) -> bool:
|
|
582
|
+
"""Check if any SELECT expression is an aggregate."""
|
|
583
|
+
return any(isinstance(v, AggregateExpression) for v in self.variables)
|
|
584
|
+
|
|
585
|
+
def is_select_all(self) -> bool:
|
|
586
|
+
"""Check if this is a SELECT * query."""
|
|
587
|
+
return len(self.variables) == 0
|
|
588
|
+
|
|
589
|
+
def __str__(self) -> str:
|
|
590
|
+
parts = []
|
|
591
|
+
|
|
592
|
+
# Prefixes
|
|
593
|
+
for prefix, uri in self.prefixes.items():
|
|
594
|
+
parts.append(f"PREFIX {prefix}: <{uri}>")
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# SELECT clause
|
|
598
|
+
distinct_str = "DISTINCT " if self.distinct else ""
|
|
599
|
+
if self.is_select_all():
|
|
600
|
+
parts.append(f"SELECT {distinct_str}*")
|
|
601
|
+
else:
|
|
602
|
+
vars_str = " ".join(str(v) for v in self.variables)
|
|
603
|
+
parts.append(f"SELECT {distinct_str}{vars_str}")
|
|
604
|
+
|
|
605
|
+
# WHERE clause
|
|
606
|
+
parts.append("WHERE {")
|
|
607
|
+
for pattern in self.where.patterns:
|
|
608
|
+
parts.append(f" {pattern}")
|
|
609
|
+
for filter in self.where.filters:
|
|
610
|
+
parts.append(f" {filter}")
|
|
611
|
+
parts.append("}")
|
|
612
|
+
|
|
613
|
+
# Modifiers
|
|
614
|
+
if self.order_by:
|
|
615
|
+
order_parts = []
|
|
616
|
+
for var, asc in self.order_by:
|
|
617
|
+
order_parts.append(str(var) if asc else f"DESC({var})")
|
|
618
|
+
parts.append(f"ORDER BY {' '.join(order_parts)}")
|
|
619
|
+
|
|
620
|
+
if self.limit:
|
|
621
|
+
parts.append(f"LIMIT {self.limit}")
|
|
622
|
+
|
|
623
|
+
if self.offset:
|
|
624
|
+
parts.append(f"OFFSET {self.offset}")
|
|
625
|
+
|
|
626
|
+
return "\n".join(parts)
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
@dataclass
|
|
630
|
+
class AskQuery(Query):
|
|
631
|
+
"""An ASK query returning boolean."""
|
|
632
|
+
where: WhereClause = field(default_factory=WhereClause)
|
|
633
|
+
as_of: Optional[datetime] = None # Time-travel: query as of this timestamp
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
@dataclass
|
|
637
|
+
class ConstructQuery(Query):
|
|
638
|
+
"""A CONSTRUCT query returning a new graph."""
|
|
639
|
+
template: list[TriplePattern] = field(default_factory=list)
|
|
640
|
+
where: WhereClause = field(default_factory=WhereClause)
|
|
641
|
+
as_of: Optional[datetime] = None # Time-travel: query as of this timestamp
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
@dataclass
|
|
645
|
+
class DescribeQuery(Query):
|
|
646
|
+
"""A DESCRIBE query returning information about resources."""
|
|
647
|
+
resources: list[Union[Variable, IRI]] = field(default_factory=list)
|
|
648
|
+
where: Optional[WhereClause] = None
|
|
649
|
+
as_of: Optional[datetime] = None # Time-travel: query as of this timestamp
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
@dataclass
|
|
653
|
+
class InsertDataQuery(Query):
|
|
654
|
+
"""
|
|
655
|
+
An INSERT DATA update operation.
|
|
656
|
+
|
|
657
|
+
INSERT DATA {
|
|
658
|
+
<s1> <p1> <o1> .
|
|
659
|
+
<s2> <p2> "literal" .
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
Note: INSERT DATA does not allow variables - all terms must be ground.
|
|
663
|
+
"""
|
|
664
|
+
triples: list[TriplePattern] = field(default_factory=list)
|
|
665
|
+
graph: Optional[IRI] = None # Optional GRAPH clause
|
|
666
|
+
|
|
667
|
+
def __str__(self) -> str:
|
|
668
|
+
parts = []
|
|
669
|
+
for prefix, uri in self.prefixes.items():
|
|
670
|
+
parts.append(f"PREFIX {prefix}: <{uri}>")
|
|
671
|
+
|
|
672
|
+
parts.append("INSERT DATA {")
|
|
673
|
+
if self.graph:
|
|
674
|
+
parts.append(f" GRAPH <{self.graph.value}> {{")
|
|
675
|
+
for triple in self.triples:
|
|
676
|
+
parts.append(f" {triple} .")
|
|
677
|
+
parts.append(" }")
|
|
678
|
+
else:
|
|
679
|
+
for triple in self.triples:
|
|
680
|
+
parts.append(f" {triple} .")
|
|
681
|
+
parts.append("}")
|
|
682
|
+
return "\n".join(parts)
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
@dataclass
|
|
686
|
+
class DeleteDataQuery(Query):
|
|
687
|
+
"""
|
|
688
|
+
A DELETE DATA update operation.
|
|
689
|
+
|
|
690
|
+
DELETE DATA {
|
|
691
|
+
<s1> <p1> <o1> .
|
|
692
|
+
}
|
|
693
|
+
"""
|
|
694
|
+
triples: list[TriplePattern] = field(default_factory=list)
|
|
695
|
+
graph: Optional[IRI] = None
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
@dataclass
|
|
699
|
+
class UpdateQuery(Query):
|
|
700
|
+
"""Base class for SPARQL UPDATE operations."""
|
|
701
|
+
pass
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
@dataclass
|
|
705
|
+
class DeleteWhereQuery(Query):
|
|
706
|
+
"""
|
|
707
|
+
A DELETE WHERE update operation.
|
|
708
|
+
|
|
709
|
+
DELETE WHERE {
|
|
710
|
+
?s ?p ?o .
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
The pattern in the WHERE clause is also used as the delete template.
|
|
714
|
+
"""
|
|
715
|
+
where: WhereClause = field(default_factory=WhereClause)
|
|
716
|
+
graph: Optional[IRI] = None
|
|
717
|
+
|
|
718
|
+
def __str__(self) -> str:
|
|
719
|
+
parts = []
|
|
720
|
+
for prefix, uri in self.prefixes.items():
|
|
721
|
+
parts.append(f"PREFIX {prefix}: <{uri}>")
|
|
722
|
+
parts.append("DELETE WHERE {")
|
|
723
|
+
for pattern in self.where.patterns:
|
|
724
|
+
parts.append(f" {pattern} .")
|
|
725
|
+
parts.append("}")
|
|
726
|
+
return "\n".join(parts)
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
@dataclass
|
|
730
|
+
class ModifyQuery(Query):
|
|
731
|
+
"""
|
|
732
|
+
A DELETE/INSERT WHERE update operation.
|
|
733
|
+
|
|
734
|
+
DELETE { <delete patterns> }
|
|
735
|
+
INSERT { <insert patterns> }
|
|
736
|
+
WHERE { <where patterns> }
|
|
737
|
+
|
|
738
|
+
Can have either DELETE, INSERT, or both.
|
|
739
|
+
Variables in the templates are bound from the WHERE clause.
|
|
740
|
+
"""
|
|
741
|
+
delete_patterns: list[TriplePattern] = field(default_factory=list)
|
|
742
|
+
insert_patterns: list[TriplePattern] = field(default_factory=list)
|
|
743
|
+
where: WhereClause = field(default_factory=WhereClause)
|
|
744
|
+
graph: Optional[IRI] = None
|
|
745
|
+
|
|
746
|
+
def __str__(self) -> str:
|
|
747
|
+
parts = []
|
|
748
|
+
for prefix, uri in self.prefixes.items():
|
|
749
|
+
parts.append(f"PREFIX {prefix}: <{uri}>")
|
|
750
|
+
|
|
751
|
+
if self.delete_patterns:
|
|
752
|
+
parts.append("DELETE {")
|
|
753
|
+
for pattern in self.delete_patterns:
|
|
754
|
+
parts.append(f" {pattern} .")
|
|
755
|
+
parts.append("}")
|
|
756
|
+
|
|
757
|
+
if self.insert_patterns:
|
|
758
|
+
parts.append("INSERT {")
|
|
759
|
+
for pattern in self.insert_patterns:
|
|
760
|
+
parts.append(f" {pattern} .")
|
|
761
|
+
parts.append("}")
|
|
762
|
+
|
|
763
|
+
parts.append("WHERE {")
|
|
764
|
+
for pattern in self.where.patterns:
|
|
765
|
+
parts.append(f" {pattern} .")
|
|
766
|
+
parts.append("}")
|
|
767
|
+
return "\n".join(parts)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
# =============================================================================
|
|
771
|
+
# Graph Management Queries
|
|
772
|
+
# =============================================================================
|
|
773
|
+
|
|
774
|
+
@dataclass
|
|
775
|
+
class CreateGraphQuery(Query):
|
|
776
|
+
"""
|
|
777
|
+
CREATE GRAPH <uri>
|
|
778
|
+
|
|
779
|
+
Creates a new empty named graph.
|
|
780
|
+
"""
|
|
781
|
+
graph_uri: IRI = None
|
|
782
|
+
silent: bool = False
|
|
783
|
+
|
|
784
|
+
def __str__(self) -> str:
|
|
785
|
+
silent_str = "SILENT " if self.silent else ""
|
|
786
|
+
return f"CREATE {silent_str}GRAPH <{self.graph_uri.value}>"
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
@dataclass
|
|
790
|
+
class DropGraphQuery(Query):
|
|
791
|
+
"""
|
|
792
|
+
DROP GRAPH <uri>
|
|
793
|
+
DROP DEFAULT
|
|
794
|
+
DROP NAMED
|
|
795
|
+
DROP ALL
|
|
796
|
+
|
|
797
|
+
Removes a graph from the graph store.
|
|
798
|
+
"""
|
|
799
|
+
graph_uri: Optional[IRI] = None
|
|
800
|
+
target: str = "graph" # "graph", "default", "named", "all"
|
|
801
|
+
silent: bool = False
|
|
802
|
+
|
|
803
|
+
def __str__(self) -> str:
|
|
804
|
+
silent_str = "SILENT " if self.silent else ""
|
|
805
|
+
if self.target == "default":
|
|
806
|
+
return f"DROP {silent_str}DEFAULT"
|
|
807
|
+
elif self.target == "named":
|
|
808
|
+
return f"DROP {silent_str}NAMED"
|
|
809
|
+
elif self.target == "all":
|
|
810
|
+
return f"DROP {silent_str}ALL"
|
|
811
|
+
else:
|
|
812
|
+
return f"DROP {silent_str}GRAPH <{self.graph_uri.value}>"
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
@dataclass
|
|
816
|
+
class ClearGraphQuery(Query):
|
|
817
|
+
"""
|
|
818
|
+
CLEAR GRAPH <uri>
|
|
819
|
+
CLEAR DEFAULT
|
|
820
|
+
CLEAR NAMED
|
|
821
|
+
CLEAR ALL
|
|
822
|
+
|
|
823
|
+
Removes all triples from a graph but keeps the graph.
|
|
824
|
+
"""
|
|
825
|
+
graph_uri: Optional[IRI] = None
|
|
826
|
+
target: str = "graph" # "graph", "default", "named", "all"
|
|
827
|
+
silent: bool = False
|
|
828
|
+
|
|
829
|
+
def __str__(self) -> str:
|
|
830
|
+
silent_str = "SILENT " if self.silent else ""
|
|
831
|
+
if self.target == "default":
|
|
832
|
+
return f"CLEAR {silent_str}DEFAULT"
|
|
833
|
+
elif self.target == "named":
|
|
834
|
+
return f"CLEAR {silent_str}NAMED"
|
|
835
|
+
elif self.target == "all":
|
|
836
|
+
return f"CLEAR {silent_str}ALL"
|
|
837
|
+
else:
|
|
838
|
+
return f"CLEAR {silent_str}GRAPH <{self.graph_uri.value}>"
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
@dataclass
|
|
842
|
+
class LoadQuery(Query):
|
|
843
|
+
"""
|
|
844
|
+
LOAD <source> INTO GRAPH <dest>
|
|
845
|
+
|
|
846
|
+
Loads RDF from a source URI into a graph.
|
|
847
|
+
"""
|
|
848
|
+
source_uri: IRI = None
|
|
849
|
+
graph_uri: Optional[IRI] = None # None = default graph
|
|
850
|
+
silent: bool = False
|
|
851
|
+
|
|
852
|
+
def __str__(self) -> str:
|
|
853
|
+
silent_str = "SILENT " if self.silent else ""
|
|
854
|
+
if self.graph_uri:
|
|
855
|
+
return f"LOAD {silent_str}<{self.source_uri.value}> INTO GRAPH <{self.graph_uri.value}>"
|
|
856
|
+
return f"LOAD {silent_str}<{self.source_uri.value}>"
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
@dataclass
|
|
860
|
+
class CopyGraphQuery(Query):
|
|
861
|
+
"""
|
|
862
|
+
COPY <source> TO <dest>
|
|
863
|
+
|
|
864
|
+
Copies all triples from source to destination (clears dest first).
|
|
865
|
+
"""
|
|
866
|
+
source_graph: Optional[IRI] = None # None = DEFAULT
|
|
867
|
+
dest_graph: IRI = None
|
|
868
|
+
silent: bool = False
|
|
869
|
+
source_is_default: bool = False
|
|
870
|
+
|
|
871
|
+
def __str__(self) -> str:
|
|
872
|
+
silent_str = "SILENT " if self.silent else ""
|
|
873
|
+
src = "DEFAULT" if self.source_is_default else f"GRAPH <{self.source_graph.value}>"
|
|
874
|
+
return f"COPY {silent_str}{src} TO GRAPH <{self.dest_graph.value}>"
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
@dataclass
|
|
878
|
+
class MoveGraphQuery(Query):
|
|
879
|
+
"""
|
|
880
|
+
MOVE <source> TO <dest>
|
|
881
|
+
|
|
882
|
+
Moves all triples from source to destination (clears both source and dest).
|
|
883
|
+
"""
|
|
884
|
+
source_graph: Optional[IRI] = None # None = DEFAULT
|
|
885
|
+
dest_graph: IRI = None
|
|
886
|
+
silent: bool = False
|
|
887
|
+
source_is_default: bool = False
|
|
888
|
+
|
|
889
|
+
def __str__(self) -> str:
|
|
890
|
+
silent_str = "SILENT " if self.silent else ""
|
|
891
|
+
src = "DEFAULT" if self.source_is_default else f"GRAPH <{self.source_graph.value}>"
|
|
892
|
+
return f"MOVE {silent_str}{src} TO GRAPH <{self.dest_graph.value}>"
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
@dataclass
|
|
896
|
+
class AddGraphQuery(Query):
|
|
897
|
+
"""
|
|
898
|
+
ADD <source> TO <dest>
|
|
899
|
+
|
|
900
|
+
Adds all triples from source to destination (doesn't clear dest).
|
|
901
|
+
"""
|
|
902
|
+
source_graph: Optional[IRI] = None # None = DEFAULT
|
|
903
|
+
dest_graph: IRI = None
|
|
904
|
+
silent: bool = False
|
|
905
|
+
source_is_default: bool = False
|
|
906
|
+
|
|
907
|
+
def __str__(self) -> str:
|
|
908
|
+
silent_str = "SILENT " if self.silent else ""
|
|
909
|
+
src = "DEFAULT" if self.source_is_default else f"GRAPH <{self.source_graph.value}>"
|
|
910
|
+
return f"ADD {silent_str}{src} TO GRAPH <{self.dest_graph.value}>"
|