rdf-starbase 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_starbase/__init__.py +57 -0
- rdf_starbase/ai_grounding.py +728 -0
- rdf_starbase/compat/__init__.py +26 -0
- rdf_starbase/compat/rdflib.py +1104 -0
- rdf_starbase/formats/__init__.py +29 -0
- rdf_starbase/formats/jsonld.py +488 -0
- rdf_starbase/formats/ntriples.py +419 -0
- rdf_starbase/formats/rdfxml.py +434 -0
- rdf_starbase/formats/turtle.py +882 -0
- rdf_starbase/models.py +92 -0
- rdf_starbase/registry.py +540 -0
- rdf_starbase/repositories.py +407 -0
- rdf_starbase/repository_api.py +739 -0
- rdf_starbase/sparql/__init__.py +35 -0
- rdf_starbase/sparql/ast.py +910 -0
- rdf_starbase/sparql/executor.py +1925 -0
- rdf_starbase/sparql/parser.py +1716 -0
- rdf_starbase/storage/__init__.py +44 -0
- rdf_starbase/storage/executor.py +1914 -0
- rdf_starbase/storage/facts.py +850 -0
- rdf_starbase/storage/lsm.py +531 -0
- rdf_starbase/storage/persistence.py +338 -0
- rdf_starbase/storage/quoted_triples.py +292 -0
- rdf_starbase/storage/reasoner.py +1035 -0
- rdf_starbase/storage/terms.py +628 -0
- rdf_starbase/store.py +1049 -0
- rdf_starbase/store_legacy.py +748 -0
- rdf_starbase/web.py +568 -0
- rdf_starbase-0.1.0.dist-info/METADATA +706 -0
- rdf_starbase-0.1.0.dist-info/RECORD +31 -0
- rdf_starbase-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1716 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPARQL-Star Parser using pyparsing.
|
|
3
|
+
|
|
4
|
+
Implements parsing of SPARQL-Star queries following the W3C specification.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
import pyparsing as pp
|
|
9
|
+
from pyparsing import (
|
|
10
|
+
Keyword, Literal as Lit, Word, Regex, QuotedString,
|
|
11
|
+
Suppress, Group, Optional as Opt, ZeroOrMore, OneOrMore,
|
|
12
|
+
Forward, alphas, alphanums, nums, pyparsing_common,
|
|
13
|
+
CaselessKeyword, Combine,
|
|
14
|
+
DelimitedList,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from rdf_starbase.sparql.ast import (
|
|
18
|
+
Query, SelectQuery, AskQuery, InsertDataQuery, DeleteDataQuery,
|
|
19
|
+
DeleteWhereQuery, ModifyQuery,
|
|
20
|
+
DescribeQuery, ConstructQuery,
|
|
21
|
+
TriplePattern, QuotedTriplePattern,
|
|
22
|
+
OptionalPattern, UnionPattern, GraphPattern, MinusPattern,
|
|
23
|
+
Variable, IRI, Literal, BlankNode,
|
|
24
|
+
Filter, Comparison, LogicalExpression, FunctionCall,
|
|
25
|
+
AggregateExpression, Bind, ValuesClause,
|
|
26
|
+
ComparisonOp, LogicalOp,
|
|
27
|
+
WhereClause,
|
|
28
|
+
Term,
|
|
29
|
+
# Property Path types
|
|
30
|
+
PropertyPath, PathIRI, PathSequence, PathAlternative,
|
|
31
|
+
PathInverse, PathMod, PathNegatedPropertySet,
|
|
32
|
+
PropertyPathModifier,
|
|
33
|
+
# Graph management
|
|
34
|
+
CreateGraphQuery, DropGraphQuery, ClearGraphQuery,
|
|
35
|
+
LoadQuery, CopyGraphQuery, MoveGraphQuery, AddGraphQuery,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SPARQLStarParser:
|
|
40
|
+
"""
|
|
41
|
+
Parser for SPARQL-Star queries.
|
|
42
|
+
|
|
43
|
+
Supports:
|
|
44
|
+
- Standard SPARQL SELECT, ASK queries
|
|
45
|
+
- RDF-Star quoted triple patterns (<< s p o >>)
|
|
46
|
+
- FILTER expressions with comparisons and functions
|
|
47
|
+
- RDF-StarBase provenance extensions
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self):
|
|
51
|
+
self._build_grammar()
|
|
52
|
+
|
|
53
|
+
def _build_grammar(self):
|
|
54
|
+
"""Build the pyparsing grammar for SPARQL-Star."""
|
|
55
|
+
|
|
56
|
+
# Enable packrat parsing for performance
|
|
57
|
+
pp.ParserElement.enable_packrat()
|
|
58
|
+
|
|
59
|
+
# =================================================================
|
|
60
|
+
# Lexical tokens
|
|
61
|
+
# =================================================================
|
|
62
|
+
|
|
63
|
+
# Keywords (case-insensitive)
|
|
64
|
+
SELECT = CaselessKeyword("SELECT")
|
|
65
|
+
ASK = CaselessKeyword("ASK")
|
|
66
|
+
WHERE = CaselessKeyword("WHERE")
|
|
67
|
+
FILTER = CaselessKeyword("FILTER")
|
|
68
|
+
PREFIX = CaselessKeyword("PREFIX")
|
|
69
|
+
DISTINCT = CaselessKeyword("DISTINCT")
|
|
70
|
+
LIMIT = CaselessKeyword("LIMIT")
|
|
71
|
+
OFFSET = CaselessKeyword("OFFSET")
|
|
72
|
+
ORDER = CaselessKeyword("ORDER")
|
|
73
|
+
BY = CaselessKeyword("BY")
|
|
74
|
+
ASC = CaselessKeyword("ASC")
|
|
75
|
+
DESC = CaselessKeyword("DESC")
|
|
76
|
+
AND = CaselessKeyword("AND") | Lit("&&")
|
|
77
|
+
OR = CaselessKeyword("OR") | Lit("||")
|
|
78
|
+
NOT = CaselessKeyword("NOT") | Lit("!")
|
|
79
|
+
BOUND = CaselessKeyword("BOUND")
|
|
80
|
+
ISIRI = CaselessKeyword("ISIRI") | CaselessKeyword("ISURI")
|
|
81
|
+
ISBLANK = CaselessKeyword("ISBLANK")
|
|
82
|
+
ISLITERAL = CaselessKeyword("ISLITERAL")
|
|
83
|
+
STR = CaselessKeyword("STR")
|
|
84
|
+
LANG = CaselessKeyword("LANG")
|
|
85
|
+
DATATYPE = CaselessKeyword("DATATYPE")
|
|
86
|
+
|
|
87
|
+
# SPARQL Update keywords
|
|
88
|
+
INSERT = CaselessKeyword("INSERT")
|
|
89
|
+
DELETE = CaselessKeyword("DELETE")
|
|
90
|
+
DATA = CaselessKeyword("DATA")
|
|
91
|
+
GRAPH = CaselessKeyword("GRAPH")
|
|
92
|
+
|
|
93
|
+
# Graph management keywords
|
|
94
|
+
CREATE = CaselessKeyword("CREATE")
|
|
95
|
+
DROP = CaselessKeyword("DROP")
|
|
96
|
+
CLEAR = CaselessKeyword("CLEAR")
|
|
97
|
+
LOAD = CaselessKeyword("LOAD")
|
|
98
|
+
COPY = CaselessKeyword("COPY")
|
|
99
|
+
MOVE = CaselessKeyword("MOVE")
|
|
100
|
+
ADD = CaselessKeyword("ADD")
|
|
101
|
+
TO = CaselessKeyword("TO")
|
|
102
|
+
INTO = CaselessKeyword("INTO")
|
|
103
|
+
DEFAULT = CaselessKeyword("DEFAULT")
|
|
104
|
+
NAMED = CaselessKeyword("NAMED")
|
|
105
|
+
ALL = CaselessKeyword("ALL")
|
|
106
|
+
SILENT = CaselessKeyword("SILENT")
|
|
107
|
+
FROM = CaselessKeyword("FROM")
|
|
108
|
+
|
|
109
|
+
# Additional SPARQL keywords
|
|
110
|
+
OPTIONAL = CaselessKeyword("OPTIONAL")
|
|
111
|
+
UNION = CaselessKeyword("UNION")
|
|
112
|
+
MINUS = CaselessKeyword("MINUS")
|
|
113
|
+
DESCRIBE = CaselessKeyword("DESCRIBE")
|
|
114
|
+
CONSTRUCT = CaselessKeyword("CONSTRUCT")
|
|
115
|
+
|
|
116
|
+
# GROUP BY and HAVING keywords
|
|
117
|
+
GROUP = CaselessKeyword("GROUP")
|
|
118
|
+
HAVING = CaselessKeyword("HAVING")
|
|
119
|
+
AS = CaselessKeyword("AS")
|
|
120
|
+
|
|
121
|
+
# BIND and VALUES keywords
|
|
122
|
+
BIND = CaselessKeyword("BIND")
|
|
123
|
+
VALUES = CaselessKeyword("VALUES")
|
|
124
|
+
UNDEF = CaselessKeyword("UNDEF")
|
|
125
|
+
|
|
126
|
+
# Time-travel keyword
|
|
127
|
+
OF = CaselessKeyword("OF") # AS is already defined, we combine AS + OF
|
|
128
|
+
|
|
129
|
+
# Aggregate function keywords
|
|
130
|
+
COUNT = CaselessKeyword("COUNT")
|
|
131
|
+
SUM = CaselessKeyword("SUM")
|
|
132
|
+
AVG = CaselessKeyword("AVG")
|
|
133
|
+
MIN = CaselessKeyword("MIN")
|
|
134
|
+
MAX = CaselessKeyword("MAX")
|
|
135
|
+
GROUP_CONCAT = CaselessKeyword("GROUP_CONCAT")
|
|
136
|
+
SAMPLE = CaselessKeyword("SAMPLE")
|
|
137
|
+
SEPARATOR = CaselessKeyword("SEPARATOR")
|
|
138
|
+
|
|
139
|
+
# Punctuation
|
|
140
|
+
LBRACE = Suppress(Lit("{"))
|
|
141
|
+
RBRACE = Suppress(Lit("}"))
|
|
142
|
+
LPAREN = Suppress(Lit("("))
|
|
143
|
+
RPAREN = Suppress(Lit(")"))
|
|
144
|
+
DOT = Suppress(Lit("."))
|
|
145
|
+
COMMA = Suppress(Lit(","))
|
|
146
|
+
STAR = Lit("*")
|
|
147
|
+
LQUOTE = Suppress(Lit("<<"))
|
|
148
|
+
RQUOTE = Suppress(Lit(">>"))
|
|
149
|
+
|
|
150
|
+
# Comparison operators
|
|
151
|
+
comp_op = (
|
|
152
|
+
Lit("<=") | Lit(">=") | Lit("!=") | Lit("<>") |
|
|
153
|
+
Lit("=") | Lit("<") | Lit(">")
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# =================================================================
|
|
157
|
+
# Terms
|
|
158
|
+
# =================================================================
|
|
159
|
+
|
|
160
|
+
# Variable: ?name or $name
|
|
161
|
+
def make_variable(tokens):
|
|
162
|
+
return Variable(tokens[0][1:])
|
|
163
|
+
|
|
164
|
+
variable = Combine(
|
|
165
|
+
(Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
|
|
166
|
+
).set_parse_action(make_variable)
|
|
167
|
+
|
|
168
|
+
# IRI: <http://...> or prefix:localname
|
|
169
|
+
def make_full_iri(tokens):
|
|
170
|
+
return IRI(tokens[0][1:-1])
|
|
171
|
+
|
|
172
|
+
full_iri = Combine(
|
|
173
|
+
Lit("<") + Regex(r'[^<>]+') + Lit(">")
|
|
174
|
+
).set_parse_action(make_full_iri)
|
|
175
|
+
|
|
176
|
+
# Prefixed name: prefix:local
|
|
177
|
+
# Note: Forward slashes NOT allowed here (they are path separators in property paths)
|
|
178
|
+
# Use full IRIs for path-like local names, e.g., <http://example.org/customer/123>
|
|
179
|
+
pname_ns = Combine(Opt(Word(alphas, alphanums + "_")) + Lit(":"))
|
|
180
|
+
pname_local = Word(alphanums + "_.-")
|
|
181
|
+
|
|
182
|
+
def make_prefixed_name(tokens):
|
|
183
|
+
return IRI(tokens[0])
|
|
184
|
+
|
|
185
|
+
prefixed_name = Combine(pname_ns + Opt(pname_local)).set_parse_action(make_prefixed_name)
|
|
186
|
+
|
|
187
|
+
iri = full_iri | prefixed_name
|
|
188
|
+
|
|
189
|
+
# 'a' keyword as shorthand for rdf:type (SPARQL standard)
|
|
190
|
+
RDF_TYPE_IRI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
|
191
|
+
|
|
192
|
+
def make_a_keyword(tokens):
|
|
193
|
+
return IRI(RDF_TYPE_IRI)
|
|
194
|
+
|
|
195
|
+
a_keyword = Keyword("a").set_parse_action(make_a_keyword)
|
|
196
|
+
|
|
197
|
+
# IRI or 'a' keyword (for predicates)
|
|
198
|
+
iri_or_a = iri | a_keyword
|
|
199
|
+
|
|
200
|
+
# Literals
|
|
201
|
+
string_literal = (
|
|
202
|
+
QuotedString('"', esc_char='\\', multiline=True) |
|
|
203
|
+
QuotedString("'", esc_char='\\', multiline=True)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Language tag: @en, @en-US
|
|
207
|
+
lang_tag = Combine(Lit("@") + Word(alphas + "-"))
|
|
208
|
+
|
|
209
|
+
# Datatype: ^^<type> or ^^prefix:type
|
|
210
|
+
datatype = Suppress(Lit("^^")) + iri
|
|
211
|
+
|
|
212
|
+
# Full literal with optional language or datatype
|
|
213
|
+
def make_literal(tokens):
|
|
214
|
+
value = tokens[0]
|
|
215
|
+
lang = None
|
|
216
|
+
dtype = None
|
|
217
|
+
if len(tokens) > 1:
|
|
218
|
+
if isinstance(tokens[1], str) and tokens[1].startswith("@"):
|
|
219
|
+
lang = tokens[1][1:]
|
|
220
|
+
elif isinstance(tokens[1], IRI):
|
|
221
|
+
dtype = tokens[1].value
|
|
222
|
+
return Literal(value, language=lang, datatype=dtype)
|
|
223
|
+
|
|
224
|
+
literal = (string_literal + Opt(lang_tag | datatype)).set_parse_action(make_literal)
|
|
225
|
+
|
|
226
|
+
# Numeric literals
|
|
227
|
+
def make_int_literal(tokens):
|
|
228
|
+
return Literal(tokens[0], datatype="http://www.w3.org/2001/XMLSchema#integer")
|
|
229
|
+
|
|
230
|
+
def make_float_literal(tokens):
|
|
231
|
+
return Literal(tokens[0], datatype="http://www.w3.org/2001/XMLSchema#decimal")
|
|
232
|
+
|
|
233
|
+
integer_literal = pyparsing_common.signed_integer.copy().set_parse_action(make_int_literal)
|
|
234
|
+
float_literal = pyparsing_common.real.copy().set_parse_action(make_float_literal)
|
|
235
|
+
|
|
236
|
+
# Boolean literals
|
|
237
|
+
def make_true(tokens):
|
|
238
|
+
return Literal(True)
|
|
239
|
+
|
|
240
|
+
def make_false(tokens):
|
|
241
|
+
return Literal(False)
|
|
242
|
+
|
|
243
|
+
boolean_literal = (
|
|
244
|
+
CaselessKeyword("true").set_parse_action(make_true) |
|
|
245
|
+
CaselessKeyword("false").set_parse_action(make_false)
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# =================================================================
|
|
249
|
+
# AS OF Clause (Time-travel queries)
|
|
250
|
+
# =================================================================
|
|
251
|
+
|
|
252
|
+
from datetime import datetime, timezone
|
|
253
|
+
|
|
254
|
+
# ISO 8601 datetime string: "2025-01-15T00:00:00Z" or "2025-01-15"
|
|
255
|
+
def parse_datetime(tokens):
|
|
256
|
+
dt_str = tokens[0]
|
|
257
|
+
# Try various ISO formats
|
|
258
|
+
for fmt in [
|
|
259
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
|
260
|
+
"%Y-%m-%dT%H:%M:%S%z",
|
|
261
|
+
"%Y-%m-%dT%H:%M:%S",
|
|
262
|
+
"%Y-%m-%d",
|
|
263
|
+
]:
|
|
264
|
+
try:
|
|
265
|
+
dt = datetime.strptime(dt_str, fmt)
|
|
266
|
+
# Ensure UTC if no timezone
|
|
267
|
+
if dt.tzinfo is None:
|
|
268
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
269
|
+
return dt
|
|
270
|
+
except ValueError:
|
|
271
|
+
continue
|
|
272
|
+
raise ValueError(f"Cannot parse datetime: {dt_str}")
|
|
273
|
+
|
|
274
|
+
datetime_literal = QuotedString('"', esc_char='\\').copy().set_parse_action(parse_datetime)
|
|
275
|
+
|
|
276
|
+
as_of_clause = (Suppress(AS) + Suppress(OF) + datetime_literal)
|
|
277
|
+
|
|
278
|
+
# Blank node
|
|
279
|
+
def make_blank_node(tokens):
|
|
280
|
+
return BlankNode(tokens[0][2:])
|
|
281
|
+
|
|
282
|
+
blank_node = Combine(
|
|
283
|
+
Lit("_:") + Word(alphanums + "_")
|
|
284
|
+
).set_parse_action(make_blank_node)
|
|
285
|
+
|
|
286
|
+
# =================================================================
|
|
287
|
+
# Quoted Triple Pattern (RDF-Star)
|
|
288
|
+
# =================================================================
|
|
289
|
+
|
|
290
|
+
# Forward declaration for recursive quoted triples
|
|
291
|
+
quoted_triple = Forward()
|
|
292
|
+
|
|
293
|
+
# Term that can appear in a triple (including nested quoted triples)
|
|
294
|
+
graph_term = variable | iri | literal | float_literal | integer_literal | boolean_literal | blank_node | quoted_triple
|
|
295
|
+
|
|
296
|
+
# Quoted triple: << subject predicate object >>
|
|
297
|
+
def make_quoted_triple(tokens):
|
|
298
|
+
return QuotedTriplePattern(
|
|
299
|
+
subject=tokens[0],
|
|
300
|
+
predicate=tokens[1],
|
|
301
|
+
object=tokens[2]
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
quoted_triple <<= (
|
|
305
|
+
LQUOTE + graph_term + graph_term + graph_term + RQUOTE
|
|
306
|
+
).set_parse_action(make_quoted_triple)
|
|
307
|
+
|
|
308
|
+
# Term including quoted triples
|
|
309
|
+
term = graph_term
|
|
310
|
+
|
|
311
|
+
# =================================================================
|
|
312
|
+
# Property Paths (SPARQL 1.1)
|
|
313
|
+
# =================================================================
|
|
314
|
+
#
|
|
315
|
+
# Property paths are recognized by explicit path operators:
|
|
316
|
+
# - ^ (inverse) at the start
|
|
317
|
+
# - ! (negated) at the start
|
|
318
|
+
# - *, +, ? after an IRI
|
|
319
|
+
# - / between IRIs (sequence)
|
|
320
|
+
# - | between paths (alternative)
|
|
321
|
+
#
|
|
322
|
+
# A plain IRI like foaf:knows is NOT a property path.
|
|
323
|
+
|
|
324
|
+
# Path modifiers - must NOT be followed by alphanumeric (to avoid ?var collision)
|
|
325
|
+
from pyparsing import NotAny, Regex as PpRegex
|
|
326
|
+
|
|
327
|
+
PATH_STAR = Lit("*") + NotAny(Word(alphanums))
|
|
328
|
+
PATH_PLUS = Lit("+") + NotAny(Word(alphanums))
|
|
329
|
+
PATH_QUESTION = Lit("?") + NotAny(Word(alphanums + "_")) # ?name is a variable, not a modifier
|
|
330
|
+
PATH_CARET = Lit("^")
|
|
331
|
+
PATH_SLASH = Lit("/")
|
|
332
|
+
PATH_PIPE = Lit("|")
|
|
333
|
+
PATH_EXCLAIM = Lit("!")
|
|
334
|
+
|
|
335
|
+
# Forward declaration
|
|
336
|
+
path_expression = Forward()
|
|
337
|
+
|
|
338
|
+
def make_path_iri(iri_val):
|
|
339
|
+
if isinstance(iri_val, IRI):
|
|
340
|
+
return PathIRI(iri=iri_val)
|
|
341
|
+
return PathIRI(iri=IRI(str(iri_val)))
|
|
342
|
+
|
|
343
|
+
# Grouped path: ( path_expression )
|
|
344
|
+
path_group = (Suppress(LPAREN) + path_expression + Suppress(RPAREN))
|
|
345
|
+
|
|
346
|
+
# Inverse path: ^iri or ^(path)
|
|
347
|
+
def make_path_inverse(tokens):
|
|
348
|
+
inner = tokens[0]
|
|
349
|
+
if isinstance(inner, IRI):
|
|
350
|
+
inner = make_path_iri(inner)
|
|
351
|
+
return PathInverse(path=inner)
|
|
352
|
+
|
|
353
|
+
path_inverse = (
|
|
354
|
+
Suppress(PATH_CARET) + (iri | path_group)
|
|
355
|
+
).set_parse_action(make_path_inverse)
|
|
356
|
+
|
|
357
|
+
# Negated property set: !(iri|iri|...) or !iri
|
|
358
|
+
def make_path_negated(tokens):
|
|
359
|
+
iris = []
|
|
360
|
+
for t in tokens:
|
|
361
|
+
if isinstance(t, IRI):
|
|
362
|
+
iris.append(t)
|
|
363
|
+
elif isinstance(t, PathIRI):
|
|
364
|
+
iris.append(t.iri)
|
|
365
|
+
return PathNegatedPropertySet(iris=tuple(iris))
|
|
366
|
+
|
|
367
|
+
path_negated = (
|
|
368
|
+
Suppress(PATH_EXCLAIM) +
|
|
369
|
+
(
|
|
370
|
+
(Suppress(LPAREN) + DelimitedList(iri, delim="|") + Suppress(RPAREN)) |
|
|
371
|
+
iri
|
|
372
|
+
)
|
|
373
|
+
).set_parse_action(make_path_negated)
|
|
374
|
+
|
|
375
|
+
# Modified IRI: iri+ or iri* or iri?
|
|
376
|
+
def make_path_mod(tokens):
|
|
377
|
+
iri_val = tokens[0]
|
|
378
|
+
mod_str = tokens[1]
|
|
379
|
+
path = make_path_iri(iri_val)
|
|
380
|
+
if mod_str == "*":
|
|
381
|
+
return PathMod(path=path, modifier=PropertyPathModifier.ZERO_OR_MORE)
|
|
382
|
+
elif mod_str == "+":
|
|
383
|
+
return PathMod(path=path, modifier=PropertyPathModifier.ONE_OR_MORE)
|
|
384
|
+
elif mod_str == "?":
|
|
385
|
+
return PathMod(path=path, modifier=PropertyPathModifier.ZERO_OR_ONE)
|
|
386
|
+
return path
|
|
387
|
+
|
|
388
|
+
path_iri_modified = (
|
|
389
|
+
iri + (PATH_STAR | PATH_PLUS | PATH_QUESTION)
|
|
390
|
+
).set_parse_action(make_path_mod)
|
|
391
|
+
|
|
392
|
+
# A path element: inverse, negated, modified IRI, or grouped
|
|
393
|
+
path_element = path_inverse | path_negated | path_iri_modified | path_group
|
|
394
|
+
|
|
395
|
+
# A path step (for sequences): path element or plain IRI
|
|
396
|
+
def wrap_path_step(tokens):
|
|
397
|
+
t = tokens[0]
|
|
398
|
+
if isinstance(t, IRI):
|
|
399
|
+
return make_path_iri(t)
|
|
400
|
+
return t
|
|
401
|
+
|
|
402
|
+
path_step = (path_element | iri.copy().set_parse_action(wrap_path_step))
|
|
403
|
+
|
|
404
|
+
# Sequence path: path1/path2/... (requires at least one /)
|
|
405
|
+
def make_path_sequence(tokens):
|
|
406
|
+
paths = list(tokens)
|
|
407
|
+
if len(paths) == 1:
|
|
408
|
+
return paths[0]
|
|
409
|
+
return PathSequence(paths=tuple(paths))
|
|
410
|
+
|
|
411
|
+
path_sequence = (
|
|
412
|
+
path_step + OneOrMore(Suppress(PATH_SLASH) + path_step)
|
|
413
|
+
).set_parse_action(make_path_sequence)
|
|
414
|
+
|
|
415
|
+
# Alternative path: path1|path2|... (requires at least one |)
|
|
416
|
+
def make_path_alternative(tokens):
|
|
417
|
+
paths = list(tokens)
|
|
418
|
+
if len(paths) == 1:
|
|
419
|
+
return paths[0]
|
|
420
|
+
return PathAlternative(paths=tuple(paths))
|
|
421
|
+
|
|
422
|
+
# Atomic path for alternatives: sequence, element, or plain IRI wrapped
|
|
423
|
+
# We include path_step here to allow plain IRIs in alternatives
|
|
424
|
+
path_atomic = path_sequence | path_element | path_step
|
|
425
|
+
|
|
426
|
+
path_alternative = (
|
|
427
|
+
path_atomic + OneOrMore(Suppress(PATH_PIPE) + path_atomic)
|
|
428
|
+
).set_parse_action(make_path_alternative)
|
|
429
|
+
|
|
430
|
+
# Complete path expression
|
|
431
|
+
path_expression <<= path_alternative | path_atomic
|
|
432
|
+
|
|
433
|
+
# Predicate: try 'a' keyword, path expression, or term
|
|
434
|
+
predicate_path = a_keyword | path_expression | term
|
|
435
|
+
|
|
436
|
+
# =================================================================
|
|
437
|
+
# Triple Patterns (with property list and object list support)
|
|
438
|
+
# =================================================================
|
|
439
|
+
|
|
440
|
+
# SPARQL property lists use:
|
|
441
|
+
# - ; (semicolon) = same subject, different predicate-object pair
|
|
442
|
+
# - , (comma) = same subject and predicate, different object
|
|
443
|
+
|
|
444
|
+
SEMICOLON = Suppress(Lit(";"))
|
|
445
|
+
|
|
446
|
+
def make_single_triple(tokens):
|
|
447
|
+
"""Create a single triple pattern."""
|
|
448
|
+
pred = tokens[1]
|
|
449
|
+
if isinstance(pred, PathIRI):
|
|
450
|
+
pred = pred.iri
|
|
451
|
+
return TriplePattern(
|
|
452
|
+
subject=tokens[0],
|
|
453
|
+
predicate=pred,
|
|
454
|
+
object=tokens[2]
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Simple triple without property/object lists
|
|
458
|
+
simple_triple = (
|
|
459
|
+
term + predicate_path + term
|
|
460
|
+
).set_parse_action(make_single_triple)
|
|
461
|
+
|
|
462
|
+
# Object list: same subject and predicate, multiple objects
|
|
463
|
+
# <s> <p> <o1> , <o2> , <o3>
|
|
464
|
+
object_list = term + ZeroOrMore(COMMA + term)
|
|
465
|
+
|
|
466
|
+
# Predicate-object pair: predicate followed by object(s)
|
|
467
|
+
predicate_object = predicate_path + object_list
|
|
468
|
+
|
|
469
|
+
# Predicate-object list: multiple predicate-object pairs separated by ;
|
|
470
|
+
# <p1> <o1> ; <p2> <o2> ; <p3> <o3>
|
|
471
|
+
predicate_object_list = predicate_object + ZeroOrMore(SEMICOLON + predicate_object)
|
|
472
|
+
|
|
473
|
+
def is_predicate_type(token):
|
|
474
|
+
"""Check if a token is a valid predicate type."""
|
|
475
|
+
# Path expressions are always predicates
|
|
476
|
+
if isinstance(token, (PathIRI, PathSequence, PathAlternative, PathInverse, PathMod, PathNegatedPropertySet)):
|
|
477
|
+
return True
|
|
478
|
+
# Plain IRIs can be predicates
|
|
479
|
+
if isinstance(token, IRI):
|
|
480
|
+
return True
|
|
481
|
+
# Variables can be predicates (e.g., ?s ?p ?o)
|
|
482
|
+
if isinstance(token, Variable):
|
|
483
|
+
return True
|
|
484
|
+
return False
|
|
485
|
+
|
|
486
|
+
def is_object_type(token):
|
|
487
|
+
"""Check if a token is a valid object type."""
|
|
488
|
+
return isinstance(token, (IRI, Variable, Literal, BlankNode, QuotedTriplePattern))
|
|
489
|
+
|
|
490
|
+
def normalize_predicate(pred):
|
|
491
|
+
"""Normalize predicate - unwrap PathIRI to IRI for simple cases."""
|
|
492
|
+
if isinstance(pred, PathIRI):
|
|
493
|
+
return pred.iri
|
|
494
|
+
return pred
|
|
495
|
+
|
|
496
|
+
def make_triple_block(tokens):
|
|
497
|
+
"""Parse a triple block with optional property/object lists.
|
|
498
|
+
|
|
499
|
+
Tokens come as a flat list after semicolons/commas are suppressed:
|
|
500
|
+
[subject, pred1, obj1, pred2, obj2, ...]
|
|
501
|
+
|
|
502
|
+
For property paths like <foaf:knows>+, the predicate is a PathMod.
|
|
503
|
+
|
|
504
|
+
Expands:
|
|
505
|
+
- ?s <p1> <o1> ; <p2> <o2> . → [(?s, <p1>, <o1>), (?s, <p2>, <o2>)]
|
|
506
|
+
- ?s <p> <o1> , <o2> . → [(?s, <p>, <o1>), (?s, <p>, <o2>)]
|
|
507
|
+
"""
|
|
508
|
+
tokens_list = list(tokens)
|
|
509
|
+
if not tokens_list:
|
|
510
|
+
return []
|
|
511
|
+
|
|
512
|
+
subject = tokens_list[0]
|
|
513
|
+
triples = []
|
|
514
|
+
|
|
515
|
+
# Process remaining tokens as alternating predicate-objects
|
|
516
|
+
# Since semicolons are suppressed, we get: [pred1, obj1, pred2, obj2, ...]
|
|
517
|
+
i = 1
|
|
518
|
+
while i < len(tokens_list):
|
|
519
|
+
# Get predicate - can be IRI, PathIRI, or other path expressions
|
|
520
|
+
pred = tokens_list[i]
|
|
521
|
+
|
|
522
|
+
if not is_predicate_type(pred):
|
|
523
|
+
# Skip non-predicates (shouldn't happen but defensive)
|
|
524
|
+
i += 1
|
|
525
|
+
continue
|
|
526
|
+
|
|
527
|
+
# Normalize simple PathIRI to IRI
|
|
528
|
+
pred = normalize_predicate(pred)
|
|
529
|
+
|
|
530
|
+
i += 1
|
|
531
|
+
if i >= len(tokens_list):
|
|
532
|
+
break
|
|
533
|
+
|
|
534
|
+
# Get object(s) - handle object lists with comma
|
|
535
|
+
while i < len(tokens_list):
|
|
536
|
+
obj = tokens_list[i]
|
|
537
|
+
|
|
538
|
+
# If it's an object type, create triple
|
|
539
|
+
if is_object_type(obj):
|
|
540
|
+
triples.append(TriplePattern(
|
|
541
|
+
subject=subject,
|
|
542
|
+
predicate=pred,
|
|
543
|
+
object=obj
|
|
544
|
+
))
|
|
545
|
+
i += 1
|
|
546
|
+
|
|
547
|
+
# Check if next token is also an object (comma was suppressed)
|
|
548
|
+
if i < len(tokens_list):
|
|
549
|
+
next_tok = tokens_list[i]
|
|
550
|
+
# If next is a path expression (not just IRI), it's a predicate
|
|
551
|
+
if isinstance(next_tok, (PathIRI, PathSequence, PathAlternative, PathInverse, PathMod, PathNegatedPropertySet)):
|
|
552
|
+
break
|
|
553
|
+
# If next is IRI, need to peek further to determine if predicate or object
|
|
554
|
+
# Heuristic: if it's followed by something that could be an object, it's a predicate
|
|
555
|
+
if isinstance(next_tok, IRI):
|
|
556
|
+
# Look ahead to see if there's an object after this
|
|
557
|
+
if i + 1 < len(tokens_list) and is_object_type(tokens_list[i + 1]):
|
|
558
|
+
break # It's a predicate
|
|
559
|
+
# If next is still an object type (Variable, Literal, etc), continue object list
|
|
560
|
+
if isinstance(next_tok, (Variable, Literal, BlankNode)):
|
|
561
|
+
continue # Continue in object list
|
|
562
|
+
break
|
|
563
|
+
else:
|
|
564
|
+
break
|
|
565
|
+
|
|
566
|
+
return triples if triples else []
|
|
567
|
+
|
|
568
|
+
# Full triple block: subject + predicate-object list + optional dot
|
|
569
|
+
triple_block = (
|
|
570
|
+
term + predicate_object_list + Opt(DOT)
|
|
571
|
+
).set_parse_action(make_triple_block)
|
|
572
|
+
|
|
573
|
+
# triple_pattern now returns a list of TriplePatterns
|
|
574
|
+
triple_pattern = triple_block
|
|
575
|
+
|
|
576
|
+
# =================================================================
|
|
577
|
+
# FILTER Expressions
|
|
578
|
+
# =================================================================
|
|
579
|
+
|
|
580
|
+
# Expression forward declaration
|
|
581
|
+
expression = Forward()
|
|
582
|
+
|
|
583
|
+
# Function call
|
|
584
|
+
func_name = (
|
|
585
|
+
BOUND | ISIRI | ISBLANK | ISLITERAL | STR | LANG | DATATYPE |
|
|
586
|
+
Word(alphas, alphanums + "_")
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
def make_function_call(tokens):
|
|
590
|
+
return FunctionCall(name=str(tokens[0]).upper(), arguments=list(tokens[1:]))
|
|
591
|
+
|
|
592
|
+
function_call = (
|
|
593
|
+
func_name + LPAREN + Opt(DelimitedList(expression)) + RPAREN
|
|
594
|
+
).set_parse_action(make_function_call)
|
|
595
|
+
|
|
596
|
+
# Primary expression
|
|
597
|
+
primary_expr = (
|
|
598
|
+
function_call |
|
|
599
|
+
variable |
|
|
600
|
+
literal |
|
|
601
|
+
float_literal |
|
|
602
|
+
integer_literal |
|
|
603
|
+
boolean_literal |
|
|
604
|
+
iri |
|
|
605
|
+
(LPAREN + expression + RPAREN)
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Comparison expression
|
|
609
|
+
def make_comparison(tokens):
|
|
610
|
+
if len(tokens) == 3:
|
|
611
|
+
return Comparison(
|
|
612
|
+
left=tokens[0],
|
|
613
|
+
operator=ComparisonOp.from_str(tokens[1]),
|
|
614
|
+
right=tokens[2]
|
|
615
|
+
)
|
|
616
|
+
return tokens[0]
|
|
617
|
+
|
|
618
|
+
comparison_expr = (
|
|
619
|
+
primary_expr + Opt(comp_op + primary_expr)
|
|
620
|
+
).set_parse_action(make_comparison)
|
|
621
|
+
|
|
622
|
+
# NOT expression
|
|
623
|
+
def make_not(tokens):
|
|
624
|
+
if len(tokens) == 2: # Has NOT
|
|
625
|
+
return LogicalExpression(LogicalOp.NOT, [tokens[1]])
|
|
626
|
+
return tokens[0]
|
|
627
|
+
|
|
628
|
+
not_expr = (
|
|
629
|
+
Opt(NOT) + comparison_expr
|
|
630
|
+
).set_parse_action(make_not)
|
|
631
|
+
|
|
632
|
+
# AND expression
|
|
633
|
+
def make_and(tokens):
|
|
634
|
+
tokens = list(tokens)
|
|
635
|
+
if len(tokens) == 1:
|
|
636
|
+
return tokens[0]
|
|
637
|
+
return LogicalExpression(LogicalOp.AND, tokens)
|
|
638
|
+
|
|
639
|
+
and_expr = (
|
|
640
|
+
not_expr + ZeroOrMore(Suppress(AND) + not_expr)
|
|
641
|
+
).set_parse_action(make_and)
|
|
642
|
+
|
|
643
|
+
# OR expression (lowest precedence)
|
|
644
|
+
def make_or(tokens):
|
|
645
|
+
tokens = list(tokens)
|
|
646
|
+
if len(tokens) == 1:
|
|
647
|
+
return tokens[0]
|
|
648
|
+
return LogicalExpression(LogicalOp.OR, tokens)
|
|
649
|
+
|
|
650
|
+
expression <<= (
|
|
651
|
+
and_expr + ZeroOrMore(Suppress(OR) + and_expr)
|
|
652
|
+
).set_parse_action(make_or)
|
|
653
|
+
|
|
654
|
+
# Standard FILTER
|
|
655
|
+
def make_filter(tokens):
|
|
656
|
+
return Filter(expression=tokens[0])
|
|
657
|
+
|
|
658
|
+
filter_clause = (
|
|
659
|
+
Suppress(FILTER) + LPAREN + expression + RPAREN
|
|
660
|
+
).set_parse_action(make_filter)
|
|
661
|
+
|
|
662
|
+
# =================================================================
|
|
663
|
+
# OPTIONAL and UNION Patterns
|
|
664
|
+
# =================================================================
|
|
665
|
+
|
|
666
|
+
# Forward declaration for nested group patterns
|
|
667
|
+
group_graph_pattern = Forward()
|
|
668
|
+
|
|
669
|
+
# OPTIONAL { ... }
|
|
670
|
+
def make_optional(tokens):
|
|
671
|
+
patterns = []
|
|
672
|
+
filters = []
|
|
673
|
+
for token in tokens:
|
|
674
|
+
if isinstance(token, (TriplePattern, QuotedTriplePattern)):
|
|
675
|
+
patterns.append(token)
|
|
676
|
+
elif isinstance(token, list):
|
|
677
|
+
for item in token:
|
|
678
|
+
if isinstance(item, (TriplePattern, QuotedTriplePattern)):
|
|
679
|
+
patterns.append(item)
|
|
680
|
+
elif isinstance(token, Filter):
|
|
681
|
+
filters.append(token)
|
|
682
|
+
elif isinstance(token, OptionalPattern):
|
|
683
|
+
patterns.append(token)
|
|
684
|
+
return OptionalPattern(patterns=patterns, filters=filters)
|
|
685
|
+
|
|
686
|
+
optional_pattern = (
|
|
687
|
+
Suppress(OPTIONAL) + LBRACE + ZeroOrMore(triple_pattern | filter_clause) + RBRACE
|
|
688
|
+
).set_parse_action(make_optional)
|
|
689
|
+
|
|
690
|
+
# UNION { ... } UNION { ... }
|
|
691
|
+
# A group graph pattern that can participate in UNION
|
|
692
|
+
def make_group_pattern(tokens):
|
|
693
|
+
"""Convert a list of patterns/filters into a tuple for UNION alternatives."""
|
|
694
|
+
patterns = []
|
|
695
|
+
filters = []
|
|
696
|
+
for token in tokens:
|
|
697
|
+
if isinstance(token, (TriplePattern, QuotedTriplePattern)):
|
|
698
|
+
patterns.append(token)
|
|
699
|
+
elif isinstance(token, list):
|
|
700
|
+
for item in token:
|
|
701
|
+
if isinstance(item, (TriplePattern, QuotedTriplePattern)):
|
|
702
|
+
patterns.append(item)
|
|
703
|
+
elif isinstance(token, Filter):
|
|
704
|
+
filters.append(token)
|
|
705
|
+
elif isinstance(token, OptionalPattern):
|
|
706
|
+
patterns.append(token)
|
|
707
|
+
return (patterns, filters)
|
|
708
|
+
|
|
709
|
+
union_alternative = (
|
|
710
|
+
LBRACE + ZeroOrMore(triple_pattern | filter_clause | optional_pattern) + RBRACE
|
|
711
|
+
).set_parse_action(make_group_pattern)
|
|
712
|
+
|
|
713
|
+
def make_union(tokens):
|
|
714
|
+
"""Combine UNION alternatives into UnionPattern."""
|
|
715
|
+
alternatives = []
|
|
716
|
+
for token in tokens:
|
|
717
|
+
if isinstance(token, tuple) and len(token) == 2:
|
|
718
|
+
patterns, filters = token
|
|
719
|
+
alternatives.append(patterns)
|
|
720
|
+
return UnionPattern(alternatives=alternatives)
|
|
721
|
+
|
|
722
|
+
union_pattern = (
|
|
723
|
+
union_alternative + OneOrMore(Suppress(UNION) + union_alternative)
|
|
724
|
+
).set_parse_action(make_union)
|
|
725
|
+
|
|
726
|
+
# =================================================================
|
|
727
|
+
# MINUS Pattern
|
|
728
|
+
# =================================================================
|
|
729
|
+
|
|
730
|
+
def make_minus(tokens):
|
|
731
|
+
"""Create a MINUS pattern for set difference."""
|
|
732
|
+
patterns = []
|
|
733
|
+
filters = []
|
|
734
|
+
for token in tokens:
|
|
735
|
+
if isinstance(token, (TriplePattern, QuotedTriplePattern)):
|
|
736
|
+
patterns.append(token)
|
|
737
|
+
elif isinstance(token, list):
|
|
738
|
+
for item in token:
|
|
739
|
+
if isinstance(item, (TriplePattern, QuotedTriplePattern)):
|
|
740
|
+
patterns.append(item)
|
|
741
|
+
elif isinstance(token, Filter):
|
|
742
|
+
filters.append(token)
|
|
743
|
+
elif isinstance(token, OptionalPattern):
|
|
744
|
+
patterns.append(token)
|
|
745
|
+
return MinusPattern(patterns=patterns, filters=filters)
|
|
746
|
+
|
|
747
|
+
minus_pattern = (
|
|
748
|
+
Suppress(MINUS) + LBRACE + ZeroOrMore(triple_pattern | filter_clause) + RBRACE
|
|
749
|
+
).set_parse_action(make_minus)
|
|
750
|
+
|
|
751
|
+
# =================================================================
|
|
752
|
+
# BIND Clause
|
|
753
|
+
# =================================================================
|
|
754
|
+
|
|
755
|
+
bind_variable = Combine(
|
|
756
|
+
(Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
|
|
757
|
+
).set_parse_action(make_variable)
|
|
758
|
+
|
|
759
|
+
def make_bind(tokens):
|
|
760
|
+
# BIND(expr AS ?var)
|
|
761
|
+
expr = tokens[0]
|
|
762
|
+
var = tokens[1]
|
|
763
|
+
return Bind(expression=expr, variable=var)
|
|
764
|
+
|
|
765
|
+
bind_clause = (
|
|
766
|
+
Suppress(BIND) + LPAREN +
|
|
767
|
+
(expression | literal | float_literal | integer_literal | variable | iri) +
|
|
768
|
+
Suppress(AS) + bind_variable +
|
|
769
|
+
RPAREN
|
|
770
|
+
).set_parse_action(make_bind)
|
|
771
|
+
|
|
772
|
+
# =================================================================
|
|
773
|
+
# VALUES Clause
|
|
774
|
+
# =================================================================
|
|
775
|
+
|
|
776
|
+
values_variable = Combine(
|
|
777
|
+
(Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
|
|
778
|
+
).set_parse_action(make_variable)
|
|
779
|
+
|
|
780
|
+
# Value term (can be UNDEF or a value)
|
|
781
|
+
def make_undef(tokens):
|
|
782
|
+
return None
|
|
783
|
+
|
|
784
|
+
value_term = (
|
|
785
|
+
UNDEF.set_parse_action(make_undef) |
|
|
786
|
+
iri | literal | float_literal | integer_literal | boolean_literal
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
# Single variable VALUES: VALUES ?x { 1 2 3 }
|
|
790
|
+
def make_single_values(tokens):
|
|
791
|
+
var = tokens[0]
|
|
792
|
+
bindings = [[v] for v in tokens[1:]]
|
|
793
|
+
return ValuesClause(variables=[var], bindings=bindings)
|
|
794
|
+
|
|
795
|
+
single_values = (
|
|
796
|
+
Suppress(VALUES) + values_variable + LBRACE + ZeroOrMore(value_term) + RBRACE
|
|
797
|
+
).set_parse_action(make_single_values)
|
|
798
|
+
|
|
799
|
+
# Multi-variable VALUES: VALUES (?x ?y) { (1 2) (3 4) }
|
|
800
|
+
def make_value_row(tokens):
|
|
801
|
+
return list(tokens)
|
|
802
|
+
|
|
803
|
+
value_row = (LPAREN + ZeroOrMore(value_term) + RPAREN).set_parse_action(make_value_row)
|
|
804
|
+
|
|
805
|
+
def make_multi_values(tokens):
|
|
806
|
+
# First tokens are variables, rest are rows
|
|
807
|
+
vars_list = []
|
|
808
|
+
rows = []
|
|
809
|
+
for token in tokens:
|
|
810
|
+
if isinstance(token, Variable):
|
|
811
|
+
vars_list.append(token)
|
|
812
|
+
elif isinstance(token, list):
|
|
813
|
+
rows.append(token)
|
|
814
|
+
return ValuesClause(variables=vars_list, bindings=rows)
|
|
815
|
+
|
|
816
|
+
multi_values = (
|
|
817
|
+
Suppress(VALUES) + LPAREN + OneOrMore(values_variable) + RPAREN +
|
|
818
|
+
LBRACE + ZeroOrMore(value_row) + RBRACE
|
|
819
|
+
).set_parse_action(make_multi_values)
|
|
820
|
+
|
|
821
|
+
values_clause = multi_values | single_values
|
|
822
|
+
|
|
823
|
+
# =================================================================
|
|
824
|
+
# GRAPH Pattern
|
|
825
|
+
# =================================================================
|
|
826
|
+
|
|
827
|
+
# Forward declaration for graph_pattern since where_pattern needs it
|
|
828
|
+
graph_pattern = Forward()
|
|
829
|
+
|
|
830
|
+
def make_graph_pattern(tokens):
|
|
831
|
+
graph_ref = tokens[0]
|
|
832
|
+
patterns = []
|
|
833
|
+
for token in tokens[1:]:
|
|
834
|
+
if isinstance(token, (TriplePattern, QuotedTriplePattern)):
|
|
835
|
+
patterns.append(token)
|
|
836
|
+
elif isinstance(token, list):
|
|
837
|
+
for item in token:
|
|
838
|
+
if isinstance(item, (TriplePattern, QuotedTriplePattern)):
|
|
839
|
+
patterns.append(item)
|
|
840
|
+
return GraphPattern(graph=graph_ref, patterns=patterns)
|
|
841
|
+
|
|
842
|
+
graph_pattern <<= (
|
|
843
|
+
Suppress(GRAPH) + (variable | iri) + LBRACE + ZeroOrMore(triple_pattern) + RBRACE
|
|
844
|
+
).set_parse_action(make_graph_pattern)
|
|
845
|
+
|
|
846
|
+
# =================================================================
|
|
847
|
+
# WHERE Clause
|
|
848
|
+
# =================================================================
|
|
849
|
+
|
|
850
|
+
where_pattern = triple_pattern | filter_clause | optional_pattern | union_pattern | minus_pattern | bind_clause | values_clause | graph_pattern
|
|
851
|
+
|
|
852
|
+
def make_where_clause(tokens):
|
|
853
|
+
patterns = []
|
|
854
|
+
filters = []
|
|
855
|
+
optional_patterns = []
|
|
856
|
+
union_patterns = []
|
|
857
|
+
minus_patterns = []
|
|
858
|
+
binds = []
|
|
859
|
+
values = None
|
|
860
|
+
graph_patterns = []
|
|
861
|
+
for token in tokens:
|
|
862
|
+
if isinstance(token, (TriplePattern, QuotedTriplePattern)):
|
|
863
|
+
patterns.append(token)
|
|
864
|
+
elif isinstance(token, list):
|
|
865
|
+
# Handle expanded triple blocks (from property/object lists)
|
|
866
|
+
for item in token:
|
|
867
|
+
if isinstance(item, (TriplePattern, QuotedTriplePattern)):
|
|
868
|
+
patterns.append(item)
|
|
869
|
+
elif isinstance(token, Filter):
|
|
870
|
+
filters.append(token)
|
|
871
|
+
elif isinstance(token, OptionalPattern):
|
|
872
|
+
optional_patterns.append(token)
|
|
873
|
+
elif isinstance(token, UnionPattern):
|
|
874
|
+
union_patterns.append(token)
|
|
875
|
+
elif isinstance(token, MinusPattern):
|
|
876
|
+
minus_patterns.append(token)
|
|
877
|
+
elif isinstance(token, Bind):
|
|
878
|
+
binds.append(token)
|
|
879
|
+
elif isinstance(token, ValuesClause):
|
|
880
|
+
values = token
|
|
881
|
+
elif isinstance(token, GraphPattern):
|
|
882
|
+
graph_patterns.append(token)
|
|
883
|
+
return WhereClause(
|
|
884
|
+
patterns=patterns,
|
|
885
|
+
filters=filters,
|
|
886
|
+
optional_patterns=optional_patterns,
|
|
887
|
+
union_patterns=union_patterns,
|
|
888
|
+
minus_patterns=minus_patterns,
|
|
889
|
+
binds=binds,
|
|
890
|
+
values=values,
|
|
891
|
+
graph_patterns=graph_patterns
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
# WHERE clause with optional WHERE keyword (for ASK queries)
|
|
895
|
+
where_clause = (
|
|
896
|
+
Suppress(Opt(WHERE)) + LBRACE + ZeroOrMore(where_pattern) + RBRACE
|
|
897
|
+
).set_parse_action(make_where_clause)
|
|
898
|
+
|
|
899
|
+
# =================================================================
|
|
900
|
+
# PREFIX Declarations
|
|
901
|
+
# =================================================================
|
|
902
|
+
|
|
903
|
+
def make_prefix(tokens):
|
|
904
|
+
prefix = tokens[0][:-1] # Remove trailing colon
|
|
905
|
+
uri = tokens[1].value
|
|
906
|
+
return (prefix, uri)
|
|
907
|
+
|
|
908
|
+
prefix_decl = (
|
|
909
|
+
Suppress(PREFIX) + pname_ns + full_iri
|
|
910
|
+
).set_parse_action(make_prefix)
|
|
911
|
+
|
|
912
|
+
# =================================================================
|
|
913
|
+
# SELECT Query
|
|
914
|
+
# =================================================================
|
|
915
|
+
|
|
916
|
+
# Use a fresh copy of variable for select to avoid parse action interference
|
|
917
|
+
select_variable = Combine(
|
|
918
|
+
(Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
|
|
919
|
+
).set_parse_action(make_variable)
|
|
920
|
+
|
|
921
|
+
# Aggregate functions
|
|
922
|
+
aggregate_name = COUNT | SUM | AVG | MIN | MAX | GROUP_CONCAT | SAMPLE
|
|
923
|
+
|
|
924
|
+
# Separator for GROUP_CONCAT
|
|
925
|
+
separator_clause = Suppress(Lit(";")) + Suppress(SEPARATOR) + Suppress(Lit("=")) + (
|
|
926
|
+
QuotedString('"', esc_char='\\') | QuotedString("'", esc_char='\\')
|
|
927
|
+
)
|
|
928
|
+
|
|
929
|
+
def make_aggregate(tokens):
|
|
930
|
+
func_name = str(tokens[0]).upper()
|
|
931
|
+
distinct = False
|
|
932
|
+
arg = None
|
|
933
|
+
separator = None
|
|
934
|
+
|
|
935
|
+
for i, t in enumerate(tokens[1:], 1):
|
|
936
|
+
if str(t).upper() == "DISTINCT":
|
|
937
|
+
distinct = True
|
|
938
|
+
elif t == "*":
|
|
939
|
+
arg = None # COUNT(*)
|
|
940
|
+
elif isinstance(t, Variable):
|
|
941
|
+
arg = t
|
|
942
|
+
elif isinstance(t, str) and t not in ("DISTINCT", "*"):
|
|
943
|
+
separator = t
|
|
944
|
+
|
|
945
|
+
return AggregateExpression(
|
|
946
|
+
function=func_name,
|
|
947
|
+
argument=arg,
|
|
948
|
+
distinct=distinct,
|
|
949
|
+
separator=separator
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
# COUNT(*) or COUNT(DISTINCT ?var) or COUNT(?var)
|
|
953
|
+
aggregate_arg = (
|
|
954
|
+
Opt(DISTINCT) + (STAR | select_variable) + Opt(separator_clause)
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
aggregate_expr = (
|
|
958
|
+
aggregate_name + LPAREN + aggregate_arg + RPAREN
|
|
959
|
+
).set_parse_action(make_aggregate)
|
|
960
|
+
|
|
961
|
+
# Aggregate with alias: (COUNT(?x) AS ?count)
|
|
962
|
+
def make_aggregate_with_alias(tokens):
|
|
963
|
+
agg = tokens[0]
|
|
964
|
+
if len(tokens) > 1 and isinstance(tokens[1], Variable):
|
|
965
|
+
agg.alias = tokens[1]
|
|
966
|
+
return agg
|
|
967
|
+
|
|
968
|
+
aliased_aggregate = (
|
|
969
|
+
LPAREN + aggregate_expr + Suppress(AS) + select_variable + RPAREN
|
|
970
|
+
).set_parse_action(make_aggregate_with_alias)
|
|
971
|
+
|
|
972
|
+
# Select expression: variable or (aggregate AS ?alias)
|
|
973
|
+
select_expr = aliased_aggregate | aggregate_expr | select_variable
|
|
974
|
+
|
|
975
|
+
# Variable list or *
|
|
976
|
+
def make_star(tokens):
|
|
977
|
+
return []
|
|
978
|
+
|
|
979
|
+
select_vars = (
|
|
980
|
+
STAR.set_parse_action(make_star) |
|
|
981
|
+
OneOrMore(select_expr)
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
# GROUP BY clause
|
|
985
|
+
group_by_variable = Combine(
|
|
986
|
+
(Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
|
|
987
|
+
).set_parse_action(make_variable)
|
|
988
|
+
|
|
989
|
+
def make_group_by_marker(tokens):
|
|
990
|
+
"""Mark this as a GROUP BY list."""
|
|
991
|
+
return ("GROUP_BY", list(tokens))
|
|
992
|
+
|
|
993
|
+
group_by_clause = (
|
|
994
|
+
Suppress(GROUP) + Suppress(BY) + OneOrMore(group_by_variable)
|
|
995
|
+
).set_parse_action(make_group_by_marker)
|
|
996
|
+
|
|
997
|
+
# HAVING clause
|
|
998
|
+
having_clause = Suppress(HAVING) + LPAREN + expression + RPAREN
|
|
999
|
+
|
|
1000
|
+
# ORDER BY clause - use fresh copy
|
|
1001
|
+
order_variable = Combine(
|
|
1002
|
+
(Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
|
|
1003
|
+
).set_parse_action(make_variable)
|
|
1004
|
+
|
|
1005
|
+
def make_order_desc(tokens):
|
|
1006
|
+
return (tokens[0], False)
|
|
1007
|
+
|
|
1008
|
+
def make_order_asc(tokens):
|
|
1009
|
+
return (tokens[0], True)
|
|
1010
|
+
|
|
1011
|
+
# Plain variable for order by (no ASC/DESC) needs special handling
|
|
1012
|
+
def make_plain_order(tokens):
|
|
1013
|
+
# tokens[0] is the raw string like "?name", need to convert to Variable
|
|
1014
|
+
var_name = tokens[0][1:] # Remove the ? or $
|
|
1015
|
+
return (Variable(var_name), True) # Default to ascending
|
|
1016
|
+
|
|
1017
|
+
plain_order_var = Combine(
|
|
1018
|
+
(Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
|
|
1019
|
+
).set_parse_action(make_plain_order)
|
|
1020
|
+
|
|
1021
|
+
order_condition = (
|
|
1022
|
+
(Suppress(DESC) + LPAREN + order_variable + RPAREN).set_parse_action(make_order_desc) |
|
|
1023
|
+
(Suppress(ASC) + LPAREN + order_variable + RPAREN).set_parse_action(make_order_asc) |
|
|
1024
|
+
plain_order_var
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
order_clause = Suppress(ORDER) + Suppress(BY) + OneOrMore(order_condition)
|
|
1028
|
+
|
|
1029
|
+
# LIMIT and OFFSET
|
|
1030
|
+
limit_clause = Suppress(LIMIT) + pyparsing_common.integer
|
|
1031
|
+
offset_clause = Suppress(OFFSET) + pyparsing_common.integer
|
|
1032
|
+
|
|
1033
|
+
# FROM clause for dataset specification
|
|
1034
|
+
from_clause = Suppress(FROM) + iri
|
|
1035
|
+
from_named_clause = Suppress(FROM) + Suppress(NAMED) + iri
|
|
1036
|
+
|
|
1037
|
+
def make_select_query(tokens):
|
|
1038
|
+
prefixes = {}
|
|
1039
|
+
variables = []
|
|
1040
|
+
distinct = False
|
|
1041
|
+
where = WhereClause()
|
|
1042
|
+
limit = None
|
|
1043
|
+
offset = None
|
|
1044
|
+
order_by = []
|
|
1045
|
+
group_by = []
|
|
1046
|
+
having = None
|
|
1047
|
+
as_of = None
|
|
1048
|
+
from_graphs = []
|
|
1049
|
+
from_named_graphs = []
|
|
1050
|
+
|
|
1051
|
+
for token in tokens:
|
|
1052
|
+
if isinstance(token, datetime):
|
|
1053
|
+
as_of = token
|
|
1054
|
+
elif isinstance(token, tuple) and len(token) == 2:
|
|
1055
|
+
if token[0] == "GROUP_BY":
|
|
1056
|
+
# This is a GROUP BY clause
|
|
1057
|
+
group_by = token[1]
|
|
1058
|
+
elif token[0] == "FROM":
|
|
1059
|
+
# This is a FROM clause
|
|
1060
|
+
from_graphs.append(token[1])
|
|
1061
|
+
elif token[0] == "FROM_NAMED":
|
|
1062
|
+
# This is a FROM NAMED clause
|
|
1063
|
+
from_named_graphs.append(token[1])
|
|
1064
|
+
elif isinstance(token[0], str) and isinstance(token[1], str):
|
|
1065
|
+
# This is a prefix declaration
|
|
1066
|
+
prefixes[token[0]] = token[1]
|
|
1067
|
+
elif isinstance(token[0], Variable):
|
|
1068
|
+
# This is an order by condition
|
|
1069
|
+
order_by.append(token)
|
|
1070
|
+
elif token == "DISTINCT":
|
|
1071
|
+
distinct = True
|
|
1072
|
+
elif isinstance(token, AggregateExpression):
|
|
1073
|
+
variables.append(token)
|
|
1074
|
+
elif isinstance(token, Variable):
|
|
1075
|
+
variables.append(token)
|
|
1076
|
+
elif isinstance(token, (Comparison, LogicalExpression, FunctionCall)):
|
|
1077
|
+
# HAVING expression
|
|
1078
|
+
having = token
|
|
1079
|
+
elif isinstance(token, pp.ParseResults) or isinstance(token, list):
|
|
1080
|
+
# Check what's in the list
|
|
1081
|
+
token_list = list(token)
|
|
1082
|
+
if token_list and isinstance(token_list[0], (Variable, AggregateExpression)):
|
|
1083
|
+
variables = token_list
|
|
1084
|
+
elif token_list and isinstance(token_list[0], tuple):
|
|
1085
|
+
# Could be order_by or group_by marker
|
|
1086
|
+
if token_list[0][0] == "GROUP_BY":
|
|
1087
|
+
group_by = token_list[0][1]
|
|
1088
|
+
else:
|
|
1089
|
+
order_by = token_list
|
|
1090
|
+
elif token_list == []:
|
|
1091
|
+
pass # SELECT *
|
|
1092
|
+
elif isinstance(token, WhereClause):
|
|
1093
|
+
where = token
|
|
1094
|
+
elif isinstance(token, int):
|
|
1095
|
+
if limit is None:
|
|
1096
|
+
limit = token
|
|
1097
|
+
else:
|
|
1098
|
+
offset = token
|
|
1099
|
+
|
|
1100
|
+
return SelectQuery(
|
|
1101
|
+
prefixes=prefixes,
|
|
1102
|
+
variables=variables,
|
|
1103
|
+
where=where,
|
|
1104
|
+
distinct=distinct,
|
|
1105
|
+
limit=limit,
|
|
1106
|
+
offset=offset,
|
|
1107
|
+
order_by=order_by,
|
|
1108
|
+
group_by=group_by,
|
|
1109
|
+
having=having,
|
|
1110
|
+
as_of=as_of,
|
|
1111
|
+
from_graphs=from_graphs,
|
|
1112
|
+
from_named_graphs=from_named_graphs,
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
def make_distinct(tokens):
|
|
1116
|
+
return "DISTINCT"
|
|
1117
|
+
|
|
1118
|
+
def make_from_clause(tokens):
|
|
1119
|
+
return ("FROM", tokens[0])
|
|
1120
|
+
|
|
1121
|
+
def make_from_named_clause(tokens):
|
|
1122
|
+
return ("FROM_NAMED", tokens[0])
|
|
1123
|
+
|
|
1124
|
+
select_query = (
|
|
1125
|
+
ZeroOrMore(prefix_decl) +
|
|
1126
|
+
Suppress(SELECT) +
|
|
1127
|
+
Opt(DISTINCT.set_parse_action(make_distinct)) +
|
|
1128
|
+
Group(select_vars) +
|
|
1129
|
+
ZeroOrMore(from_named_clause.set_parse_action(make_from_named_clause) | from_clause.set_parse_action(make_from_clause)) +
|
|
1130
|
+
where_clause +
|
|
1131
|
+
Opt(Group(group_by_clause)) +
|
|
1132
|
+
Opt(having_clause) +
|
|
1133
|
+
Opt(Group(order_clause)) +
|
|
1134
|
+
Opt(limit_clause) +
|
|
1135
|
+
Opt(offset_clause) +
|
|
1136
|
+
Opt(as_of_clause)
|
|
1137
|
+
).set_parse_action(make_select_query)
|
|
1138
|
+
|
|
1139
|
+
# =================================================================
|
|
1140
|
+
# ASK Query
|
|
1141
|
+
# =================================================================
|
|
1142
|
+
|
|
1143
|
+
def make_ask_query(tokens):
|
|
1144
|
+
prefixes = {}
|
|
1145
|
+
where = WhereClause()
|
|
1146
|
+
as_of = None
|
|
1147
|
+
for token in tokens:
|
|
1148
|
+
if isinstance(token, datetime):
|
|
1149
|
+
as_of = token
|
|
1150
|
+
elif isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
|
|
1151
|
+
prefixes[token[0]] = token[1]
|
|
1152
|
+
elif isinstance(token, WhereClause):
|
|
1153
|
+
where = token
|
|
1154
|
+
return AskQuery(prefixes=prefixes, where=where, as_of=as_of)
|
|
1155
|
+
|
|
1156
|
+
ask_query = (
|
|
1157
|
+
ZeroOrMore(prefix_decl) +
|
|
1158
|
+
Suppress(ASK) +
|
|
1159
|
+
where_clause +
|
|
1160
|
+
Opt(as_of_clause)
|
|
1161
|
+
).set_parse_action(make_ask_query)
|
|
1162
|
+
|
|
1163
|
+
# =================================================================
|
|
1164
|
+
# INSERT DATA Update (with full Turtle syntax support)
|
|
1165
|
+
# =================================================================
|
|
1166
|
+
|
|
1167
|
+
# Special prefixed name for Turtle/INSERT DATA that allows slashes
|
|
1168
|
+
# (unlike the main prefixed_name which doesn't, to avoid conflicts with property paths)
|
|
1169
|
+
turtle_pname_local = Word(alphanums + "_.-/")
|
|
1170
|
+
|
|
1171
|
+
def make_turtle_prefixed_name(tokens):
|
|
1172
|
+
return IRI(tokens[0])
|
|
1173
|
+
|
|
1174
|
+
turtle_prefixed_name = Combine(pname_ns + Opt(turtle_pname_local)).set_parse_action(make_turtle_prefixed_name)
|
|
1175
|
+
turtle_iri = full_iri | turtle_prefixed_name
|
|
1176
|
+
turtle_iri_or_a = turtle_iri | a_keyword
|
|
1177
|
+
|
|
1178
|
+
# Ground term for INSERT DATA (no variables, allows path-like prefixed names)
|
|
1179
|
+
# Also includes quoted_triple for RDF-Star annotation support
|
|
1180
|
+
ground_term = quoted_triple | turtle_iri | literal | float_literal | integer_literal | boolean_literal | blank_node
|
|
1181
|
+
|
|
1182
|
+
# Turtle-style triple parsing with semicolons and commas
|
|
1183
|
+
# Semicolon (;) = same subject, new predicate-object pair
|
|
1184
|
+
# Comma (,) = same subject and predicate, new object
|
|
1185
|
+
# Dot (.) = end of triple block
|
|
1186
|
+
|
|
1187
|
+
SEMICOLON = Suppress(Lit(";"))
|
|
1188
|
+
|
|
1189
|
+
def parse_turtle_triples(tokens):
|
|
1190
|
+
"""Parse Turtle-style triples into a list of TriplePattern objects.
|
|
1191
|
+
|
|
1192
|
+
Handles:
|
|
1193
|
+
- Simple triples: <s> <p> <o> .
|
|
1194
|
+
- Property lists: <s> <p1> <o1> ; <p2> <o2> .
|
|
1195
|
+
- Object lists: <s> <p> <o1> , <o2> , <o3> .
|
|
1196
|
+
- Combined: <s> <p1> <o1> , <o2> ; <p2> <o3> .
|
|
1197
|
+
- RDF-Star: << s p o >> <annotation_pred> <value> .
|
|
1198
|
+
"""
|
|
1199
|
+
triples = []
|
|
1200
|
+
token_list = list(tokens)
|
|
1201
|
+
|
|
1202
|
+
i = 0
|
|
1203
|
+
current_subject = None
|
|
1204
|
+
current_predicate = None
|
|
1205
|
+
|
|
1206
|
+
while i < len(token_list):
|
|
1207
|
+
token = token_list[i]
|
|
1208
|
+
|
|
1209
|
+
# Skip punctuation strings if they slip through
|
|
1210
|
+
if isinstance(token, str) and token in '.;,':
|
|
1211
|
+
i += 1
|
|
1212
|
+
continue
|
|
1213
|
+
|
|
1214
|
+
# If we have a ground term and no subject yet, or after a dot
|
|
1215
|
+
if current_subject is None:
|
|
1216
|
+
if isinstance(token, (IRI, Literal, BlankNode, QuotedTriplePattern)):
|
|
1217
|
+
current_subject = token
|
|
1218
|
+
current_predicate = None
|
|
1219
|
+
i += 1
|
|
1220
|
+
continue
|
|
1221
|
+
|
|
1222
|
+
# If we have subject but no predicate
|
|
1223
|
+
if current_subject is not None and current_predicate is None:
|
|
1224
|
+
if isinstance(token, IRI):
|
|
1225
|
+
current_predicate = token
|
|
1226
|
+
i += 1
|
|
1227
|
+
continue
|
|
1228
|
+
|
|
1229
|
+
# If we have subject and predicate, next is object
|
|
1230
|
+
if current_subject is not None and current_predicate is not None:
|
|
1231
|
+
if isinstance(token, (IRI, Literal, BlankNode, QuotedTriplePattern)):
|
|
1232
|
+
triples.append(TriplePattern(
|
|
1233
|
+
subject=current_subject,
|
|
1234
|
+
predicate=current_predicate,
|
|
1235
|
+
object=token
|
|
1236
|
+
))
|
|
1237
|
+
i += 1
|
|
1238
|
+
|
|
1239
|
+
# Check what comes next
|
|
1240
|
+
if i < len(token_list):
|
|
1241
|
+
next_token = token_list[i]
|
|
1242
|
+
if isinstance(next_token, str):
|
|
1243
|
+
if next_token == ',':
|
|
1244
|
+
# Same subject and predicate, new object
|
|
1245
|
+
i += 1
|
|
1246
|
+
continue
|
|
1247
|
+
elif next_token == ';':
|
|
1248
|
+
# Same subject, new predicate
|
|
1249
|
+
current_predicate = None
|
|
1250
|
+
i += 1
|
|
1251
|
+
continue
|
|
1252
|
+
elif next_token == '.':
|
|
1253
|
+
# End of this subject block
|
|
1254
|
+
current_subject = None
|
|
1255
|
+
current_predicate = None
|
|
1256
|
+
i += 1
|
|
1257
|
+
continue
|
|
1258
|
+
continue
|
|
1259
|
+
|
|
1260
|
+
i += 1
|
|
1261
|
+
|
|
1262
|
+
return triples
|
|
1263
|
+
|
|
1264
|
+
# Object list: <o1> , <o2> , <o3>
|
|
1265
|
+
turtle_object = ground_term
|
|
1266
|
+
turtle_object_list = turtle_object + ZeroOrMore(Lit(",") + turtle_object)
|
|
1267
|
+
|
|
1268
|
+
# Predicate-object: <p> <o1> , <o2> (use turtle_iri_or_a for path-like prefixed names)
|
|
1269
|
+
turtle_predicate = turtle_iri_or_a
|
|
1270
|
+
turtle_predicate_object = turtle_predicate + turtle_object_list
|
|
1271
|
+
turtle_predicate_object_list = turtle_predicate_object + ZeroOrMore(Lit(";") + Opt(turtle_predicate_object))
|
|
1272
|
+
|
|
1273
|
+
# Full triple block: <s> <p1> <o1> ; <p2> <o2> , <o3> .
|
|
1274
|
+
turtle_triple_block = ground_term + turtle_predicate_object_list + Opt(Lit("."))
|
|
1275
|
+
|
|
1276
|
+
# Multiple triple blocks
|
|
1277
|
+
turtle_triples = ZeroOrMore(turtle_triple_block)
|
|
1278
|
+
turtle_triples.set_parse_action(parse_turtle_triples)
|
|
1279
|
+
|
|
1280
|
+
# Ground triple for INSERT DATA (simple form - backward compatibility)
|
|
1281
|
+
def make_ground_triple(tokens):
|
|
1282
|
+
return TriplePattern(
|
|
1283
|
+
subject=tokens[0],
|
|
1284
|
+
predicate=tokens[1],
|
|
1285
|
+
object=tokens[2],
|
|
1286
|
+
)
|
|
1287
|
+
|
|
1288
|
+
ground_triple = (
|
|
1289
|
+
ground_term + ground_term + ground_term + Opt(DOT)
|
|
1290
|
+
).set_parse_action(make_ground_triple)
|
|
1291
|
+
|
|
1292
|
+
# INSERT DATA { triples } - supports full Turtle syntax
|
|
1293
|
+
def make_insert_data_query(tokens):
|
|
1294
|
+
prefixes = {}
|
|
1295
|
+
triples = []
|
|
1296
|
+
graph = None
|
|
1297
|
+
|
|
1298
|
+
for token in tokens:
|
|
1299
|
+
if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
|
|
1300
|
+
prefixes[token[0]] = token[1]
|
|
1301
|
+
elif isinstance(token, TriplePattern):
|
|
1302
|
+
triples.append(token)
|
|
1303
|
+
elif isinstance(token, IRI):
|
|
1304
|
+
graph = token
|
|
1305
|
+
elif isinstance(token, list) or isinstance(token, pp.ParseResults):
|
|
1306
|
+
for item in token:
|
|
1307
|
+
if isinstance(item, TriplePattern):
|
|
1308
|
+
triples.append(item)
|
|
1309
|
+
|
|
1310
|
+
return InsertDataQuery(prefixes=prefixes, triples=triples, graph=graph)
|
|
1311
|
+
|
|
1312
|
+
# Use turtle_triples for full Turtle syntax support
|
|
1313
|
+
insert_data_body = LBRACE + turtle_triples + RBRACE
|
|
1314
|
+
|
|
1315
|
+
insert_data_query = (
|
|
1316
|
+
ZeroOrMore(prefix_decl) +
|
|
1317
|
+
Suppress(INSERT) +
|
|
1318
|
+
Suppress(DATA) +
|
|
1319
|
+
insert_data_body
|
|
1320
|
+
).set_parse_action(make_insert_data_query)
|
|
1321
|
+
|
|
1322
|
+
# =================================================================
|
|
1323
|
+
# DELETE DATA Update
|
|
1324
|
+
# =================================================================
|
|
1325
|
+
|
|
1326
|
+
def make_delete_data_query(tokens):
|
|
1327
|
+
prefixes = {}
|
|
1328
|
+
triples = []
|
|
1329
|
+
graph = None
|
|
1330
|
+
|
|
1331
|
+
for token in tokens:
|
|
1332
|
+
if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
|
|
1333
|
+
prefixes[token[0]] = token[1]
|
|
1334
|
+
elif isinstance(token, TriplePattern):
|
|
1335
|
+
triples.append(token)
|
|
1336
|
+
elif isinstance(token, list) or isinstance(token, pp.ParseResults):
|
|
1337
|
+
for item in token:
|
|
1338
|
+
if isinstance(item, TriplePattern):
|
|
1339
|
+
triples.append(item)
|
|
1340
|
+
|
|
1341
|
+
return DeleteDataQuery(prefixes=prefixes, triples=triples, graph=graph)
|
|
1342
|
+
|
|
1343
|
+
delete_data_query = (
|
|
1344
|
+
ZeroOrMore(prefix_decl) +
|
|
1345
|
+
Suppress(DELETE) +
|
|
1346
|
+
Suppress(DATA) +
|
|
1347
|
+
insert_data_body
|
|
1348
|
+
).set_parse_action(make_delete_data_query)
|
|
1349
|
+
|
|
1350
|
+
# =================================================================
|
|
1351
|
+
# DELETE WHERE Update
|
|
1352
|
+
# =================================================================
|
|
1353
|
+
|
|
1354
|
+
def make_delete_where_query(tokens):
|
|
1355
|
+
prefixes = {}
|
|
1356
|
+
where = WhereClause()
|
|
1357
|
+
graph = None
|
|
1358
|
+
|
|
1359
|
+
for token in tokens:
|
|
1360
|
+
if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
|
|
1361
|
+
prefixes[token[0]] = token[1]
|
|
1362
|
+
elif isinstance(token, WhereClause):
|
|
1363
|
+
where = token
|
|
1364
|
+
|
|
1365
|
+
return DeleteWhereQuery(prefixes=prefixes, where=where, graph=graph)
|
|
1366
|
+
|
|
1367
|
+
delete_where_query = (
|
|
1368
|
+
ZeroOrMore(prefix_decl) +
|
|
1369
|
+
Suppress(DELETE) +
|
|
1370
|
+
where_clause
|
|
1371
|
+
).set_parse_action(make_delete_where_query)
|
|
1372
|
+
|
|
1373
|
+
# =================================================================
|
|
1374
|
+
# DELETE/INSERT WHERE (Modify) Update
|
|
1375
|
+
# =================================================================
|
|
1376
|
+
|
|
1377
|
+
# Template for DELETE/INSERT patterns (can contain variables)
|
|
1378
|
+
template_triple = (
|
|
1379
|
+
term + term + term + Opt(DOT)
|
|
1380
|
+
).set_parse_action(make_ground_triple)
|
|
1381
|
+
|
|
1382
|
+
# DELETE { patterns } clause (patterns in braces, not WHERE keyword)
|
|
1383
|
+
delete_template = LBRACE + ZeroOrMore(template_triple) + RBRACE
|
|
1384
|
+
|
|
1385
|
+
# INSERT { patterns } clause
|
|
1386
|
+
insert_template = LBRACE + ZeroOrMore(template_triple) + RBRACE
|
|
1387
|
+
|
|
1388
|
+
def make_modify_query(tokens):
|
|
1389
|
+
prefixes = {}
|
|
1390
|
+
delete_patterns = []
|
|
1391
|
+
insert_patterns = []
|
|
1392
|
+
where = WhereClause()
|
|
1393
|
+
|
|
1394
|
+
# Track which section we're in
|
|
1395
|
+
# Tokens will be structured as: [prefixes...], [delete_patterns...], [insert_patterns...], WhereClause
|
|
1396
|
+
section = "prefixes"
|
|
1397
|
+
|
|
1398
|
+
for token in tokens:
|
|
1399
|
+
if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
|
|
1400
|
+
prefixes[token[0]] = token[1]
|
|
1401
|
+
elif token == "DELETE_SECTION":
|
|
1402
|
+
section = "delete"
|
|
1403
|
+
elif token == "INSERT_SECTION":
|
|
1404
|
+
section = "insert"
|
|
1405
|
+
elif isinstance(token, WhereClause):
|
|
1406
|
+
where = token
|
|
1407
|
+
elif isinstance(token, TriplePattern):
|
|
1408
|
+
if section == "delete":
|
|
1409
|
+
delete_patterns.append(token)
|
|
1410
|
+
elif section == "insert":
|
|
1411
|
+
insert_patterns.append(token)
|
|
1412
|
+
elif isinstance(token, pp.ParseResults):
|
|
1413
|
+
for item in token:
|
|
1414
|
+
if isinstance(item, TriplePattern):
|
|
1415
|
+
if section == "delete":
|
|
1416
|
+
delete_patterns.append(item)
|
|
1417
|
+
elif section == "insert":
|
|
1418
|
+
insert_patterns.append(item)
|
|
1419
|
+
|
|
1420
|
+
return ModifyQuery(
|
|
1421
|
+
prefixes=prefixes,
|
|
1422
|
+
delete_patterns=delete_patterns,
|
|
1423
|
+
insert_patterns=insert_patterns,
|
|
1424
|
+
where=where
|
|
1425
|
+
)
|
|
1426
|
+
|
|
1427
|
+
# DELETE { } INSERT { } WHERE { } - full modify query
|
|
1428
|
+
# We need markers to distinguish delete vs insert patterns
|
|
1429
|
+
delete_section = (
|
|
1430
|
+
Suppress(DELETE) +
|
|
1431
|
+
pp.Literal("{").suppress().set_parse_action(lambda: "DELETE_SECTION") +
|
|
1432
|
+
ZeroOrMore(template_triple) +
|
|
1433
|
+
Suppress(RBRACE)
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
insert_section = (
|
|
1437
|
+
Suppress(INSERT) +
|
|
1438
|
+
pp.Literal("{").suppress().set_parse_action(lambda: "INSERT_SECTION") +
|
|
1439
|
+
ZeroOrMore(template_triple) +
|
|
1440
|
+
Suppress(RBRACE)
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
# Modify query with both DELETE and INSERT (or just one)
|
|
1444
|
+
# Must have at least one of DELETE or INSERT followed by WHERE
|
|
1445
|
+
modify_query = (
|
|
1446
|
+
ZeroOrMore(prefix_decl) +
|
|
1447
|
+
Opt(delete_section) +
|
|
1448
|
+
Opt(insert_section) +
|
|
1449
|
+
where_clause
|
|
1450
|
+
).set_parse_action(make_modify_query)
|
|
1451
|
+
|
|
1452
|
+
# =================================================================
|
|
1453
|
+
# DESCRIBE Query
|
|
1454
|
+
# =================================================================
|
|
1455
|
+
|
|
1456
|
+
describe_resource = iri | variable
|
|
1457
|
+
|
|
1458
|
+
def make_describe_query(tokens):
|
|
1459
|
+
prefixes = {}
|
|
1460
|
+
resources = []
|
|
1461
|
+
where = None
|
|
1462
|
+
|
|
1463
|
+
for token in tokens:
|
|
1464
|
+
if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
|
|
1465
|
+
prefixes[token[0]] = token[1]
|
|
1466
|
+
elif isinstance(token, (IRI, Variable)):
|
|
1467
|
+
resources.append(token)
|
|
1468
|
+
elif isinstance(token, WhereClause):
|
|
1469
|
+
where = token
|
|
1470
|
+
|
|
1471
|
+
return DescribeQuery(prefixes=prefixes, resources=resources, where=where)
|
|
1472
|
+
|
|
1473
|
+
describe_query = (
|
|
1474
|
+
ZeroOrMore(prefix_decl) +
|
|
1475
|
+
Suppress(DESCRIBE) +
|
|
1476
|
+
OneOrMore(describe_resource) +
|
|
1477
|
+
Opt(where_clause)
|
|
1478
|
+
).set_parse_action(make_describe_query)
|
|
1479
|
+
|
|
1480
|
+
# =================================================================
|
|
1481
|
+
# CONSTRUCT Query
|
|
1482
|
+
# =================================================================
|
|
1483
|
+
|
|
1484
|
+
construct_template = LBRACE + ZeroOrMore(triple_pattern) + RBRACE
|
|
1485
|
+
|
|
1486
|
+
def make_construct_query(tokens):
|
|
1487
|
+
prefixes = {}
|
|
1488
|
+
template = []
|
|
1489
|
+
where = WhereClause()
|
|
1490
|
+
|
|
1491
|
+
for token in tokens:
|
|
1492
|
+
if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
|
|
1493
|
+
prefixes[token[0]] = token[1]
|
|
1494
|
+
elif isinstance(token, TriplePattern):
|
|
1495
|
+
template.append(token)
|
|
1496
|
+
elif isinstance(token, WhereClause):
|
|
1497
|
+
where = token
|
|
1498
|
+
|
|
1499
|
+
return ConstructQuery(prefixes=prefixes, template=template, where=where)
|
|
1500
|
+
|
|
1501
|
+
construct_query = (
|
|
1502
|
+
ZeroOrMore(prefix_decl) +
|
|
1503
|
+
Suppress(CONSTRUCT) +
|
|
1504
|
+
construct_template +
|
|
1505
|
+
where_clause
|
|
1506
|
+
).set_parse_action(make_construct_query)
|
|
1507
|
+
|
|
1508
|
+
# =================================================================
|
|
1509
|
+
# Graph Management Queries
|
|
1510
|
+
# =================================================================
|
|
1511
|
+
|
|
1512
|
+
# CREATE [SILENT] GRAPH <uri>
|
|
1513
|
+
def make_create_graph(tokens):
|
|
1514
|
+
silent = False
|
|
1515
|
+
graph_uri = None
|
|
1516
|
+
for token in tokens:
|
|
1517
|
+
if token == "SILENT":
|
|
1518
|
+
silent = True
|
|
1519
|
+
elif isinstance(token, IRI):
|
|
1520
|
+
graph_uri = token
|
|
1521
|
+
return CreateGraphQuery(prefixes={}, graph_uri=graph_uri, silent=silent)
|
|
1522
|
+
|
|
1523
|
+
create_graph_query = (
|
|
1524
|
+
Suppress(CREATE) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
|
|
1525
|
+
Suppress(GRAPH) + iri
|
|
1526
|
+
).set_parse_action(make_create_graph)
|
|
1527
|
+
|
|
1528
|
+
# DROP [SILENT] (GRAPH <uri> | DEFAULT | NAMED | ALL)
|
|
1529
|
+
def make_drop_graph(tokens):
|
|
1530
|
+
silent = False
|
|
1531
|
+
graph_uri = None
|
|
1532
|
+
target = "graph"
|
|
1533
|
+
for token in tokens:
|
|
1534
|
+
if token == "SILENT":
|
|
1535
|
+
silent = True
|
|
1536
|
+
elif token == "DEFAULT":
|
|
1537
|
+
target = "default"
|
|
1538
|
+
elif token == "NAMED":
|
|
1539
|
+
target = "named"
|
|
1540
|
+
elif token == "ALL":
|
|
1541
|
+
target = "all"
|
|
1542
|
+
elif isinstance(token, IRI):
|
|
1543
|
+
graph_uri = token
|
|
1544
|
+
return DropGraphQuery(prefixes={}, graph_uri=graph_uri, target=target, silent=silent)
|
|
1545
|
+
|
|
1546
|
+
drop_target = (
|
|
1547
|
+
(Suppress(GRAPH) + iri) |
|
|
1548
|
+
DEFAULT.set_parse_action(lambda: "DEFAULT") |
|
|
1549
|
+
NAMED.set_parse_action(lambda: "NAMED") |
|
|
1550
|
+
ALL.set_parse_action(lambda: "ALL")
|
|
1551
|
+
)
|
|
1552
|
+
drop_graph_query = (
|
|
1553
|
+
Suppress(DROP) + Opt(SILENT.set_parse_action(lambda: "SILENT")) + drop_target
|
|
1554
|
+
).set_parse_action(make_drop_graph)
|
|
1555
|
+
|
|
1556
|
+
# CLEAR [SILENT] (GRAPH <uri> | DEFAULT | NAMED | ALL)
|
|
1557
|
+
def make_clear_graph(tokens):
|
|
1558
|
+
silent = False
|
|
1559
|
+
graph_uri = None
|
|
1560
|
+
target = "graph"
|
|
1561
|
+
for token in tokens:
|
|
1562
|
+
if token == "SILENT":
|
|
1563
|
+
silent = True
|
|
1564
|
+
elif token == "DEFAULT":
|
|
1565
|
+
target = "default"
|
|
1566
|
+
elif token == "NAMED":
|
|
1567
|
+
target = "named"
|
|
1568
|
+
elif token == "ALL":
|
|
1569
|
+
target = "all"
|
|
1570
|
+
elif isinstance(token, IRI):
|
|
1571
|
+
graph_uri = token
|
|
1572
|
+
return ClearGraphQuery(prefixes={}, graph_uri=graph_uri, target=target, silent=silent)
|
|
1573
|
+
|
|
1574
|
+
clear_graph_query = (
|
|
1575
|
+
Suppress(CLEAR) + Opt(SILENT.set_parse_action(lambda: "SILENT")) + drop_target
|
|
1576
|
+
).set_parse_action(make_clear_graph)
|
|
1577
|
+
|
|
1578
|
+
# LOAD [SILENT] <source> [INTO GRAPH <dest>]
|
|
1579
|
+
def make_load(tokens):
|
|
1580
|
+
silent = False
|
|
1581
|
+
source_uri = None
|
|
1582
|
+
graph_uri = None
|
|
1583
|
+
for token in tokens:
|
|
1584
|
+
if token == "SILENT":
|
|
1585
|
+
silent = True
|
|
1586
|
+
elif isinstance(token, IRI):
|
|
1587
|
+
if source_uri is None:
|
|
1588
|
+
source_uri = token
|
|
1589
|
+
else:
|
|
1590
|
+
graph_uri = token
|
|
1591
|
+
return LoadQuery(prefixes={}, source_uri=source_uri, graph_uri=graph_uri, silent=silent)
|
|
1592
|
+
|
|
1593
|
+
load_query = (
|
|
1594
|
+
Suppress(LOAD) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
|
|
1595
|
+
iri + Opt(Suppress(INTO) + Suppress(GRAPH) + iri)
|
|
1596
|
+
).set_parse_action(make_load)
|
|
1597
|
+
|
|
1598
|
+
# COPY/MOVE/ADD [SILENT] (DEFAULT | GRAPH <uri>) TO (DEFAULT | GRAPH <uri>)
|
|
1599
|
+
def make_graph_transfer(operation):
|
|
1600
|
+
def action(tokens):
|
|
1601
|
+
silent = False
|
|
1602
|
+
source_graph = None
|
|
1603
|
+
dest_graph = None
|
|
1604
|
+
source_is_default = False
|
|
1605
|
+
|
|
1606
|
+
token_list = list(tokens)
|
|
1607
|
+
i = 0
|
|
1608
|
+
while i < len(token_list):
|
|
1609
|
+
token = token_list[i]
|
|
1610
|
+
if token == "SILENT":
|
|
1611
|
+
silent = True
|
|
1612
|
+
elif token == "DEFAULT":
|
|
1613
|
+
if source_graph is None and not source_is_default:
|
|
1614
|
+
source_is_default = True
|
|
1615
|
+
# dest_graph would be set via IRI
|
|
1616
|
+
elif isinstance(token, IRI):
|
|
1617
|
+
if source_graph is None and not source_is_default:
|
|
1618
|
+
source_graph = token
|
|
1619
|
+
else:
|
|
1620
|
+
dest_graph = token
|
|
1621
|
+
i += 1
|
|
1622
|
+
|
|
1623
|
+
if operation == "COPY":
|
|
1624
|
+
return CopyGraphQuery(
|
|
1625
|
+
prefixes={}, source_graph=source_graph, dest_graph=dest_graph,
|
|
1626
|
+
silent=silent, source_is_default=source_is_default
|
|
1627
|
+
)
|
|
1628
|
+
elif operation == "MOVE":
|
|
1629
|
+
return MoveGraphQuery(
|
|
1630
|
+
prefixes={}, source_graph=source_graph, dest_graph=dest_graph,
|
|
1631
|
+
silent=silent, source_is_default=source_is_default
|
|
1632
|
+
)
|
|
1633
|
+
else: # ADD
|
|
1634
|
+
return AddGraphQuery(
|
|
1635
|
+
prefixes={}, source_graph=source_graph, dest_graph=dest_graph,
|
|
1636
|
+
silent=silent, source_is_default=source_is_default
|
|
1637
|
+
)
|
|
1638
|
+
return action
|
|
1639
|
+
|
|
1640
|
+
graph_ref = (
|
|
1641
|
+
DEFAULT.set_parse_action(lambda: "DEFAULT") |
|
|
1642
|
+
(Suppress(GRAPH) + iri)
|
|
1643
|
+
)
|
|
1644
|
+
|
|
1645
|
+
copy_query = (
|
|
1646
|
+
Suppress(COPY) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
|
|
1647
|
+
graph_ref + Suppress(TO) + graph_ref
|
|
1648
|
+
).set_parse_action(make_graph_transfer("COPY"))
|
|
1649
|
+
|
|
1650
|
+
move_query = (
|
|
1651
|
+
Suppress(MOVE) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
|
|
1652
|
+
graph_ref + Suppress(TO) + graph_ref
|
|
1653
|
+
).set_parse_action(make_graph_transfer("MOVE"))
|
|
1654
|
+
|
|
1655
|
+
add_query = (
|
|
1656
|
+
Suppress(ADD) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
|
|
1657
|
+
graph_ref + Suppress(TO) + graph_ref
|
|
1658
|
+
).set_parse_action(make_graph_transfer("ADD"))
|
|
1659
|
+
|
|
1660
|
+
# =================================================================
|
|
1661
|
+
# Top-level Query
|
|
1662
|
+
# =================================================================
|
|
1663
|
+
|
|
1664
|
+
# Note: Order matters - more specific patterns must come first
|
|
1665
|
+
# modify_query must come before delete_where_query because:
|
|
1666
|
+
# DELETE { ... } WHERE { ... } should match modify_query
|
|
1667
|
+
# DELETE WHERE { ... } should match delete_where_query
|
|
1668
|
+
# delete_data_query must come before delete_where_query (DATA keyword distinguishes)
|
|
1669
|
+
self.query = (
|
|
1670
|
+
select_query | ask_query | describe_query | construct_query |
|
|
1671
|
+
insert_data_query | delete_data_query | modify_query | delete_where_query |
|
|
1672
|
+
create_graph_query | drop_graph_query | clear_graph_query |
|
|
1673
|
+
load_query | copy_query | move_query | add_query
|
|
1674
|
+
)
|
|
1675
|
+
|
|
1676
|
+
# Ignore comments
|
|
1677
|
+
self.query.ignore(pp.pythonStyleComment)
|
|
1678
|
+
self.query.ignore(Lit("#") + pp.restOfLine)
|
|
1679
|
+
|
|
1680
|
+
def parse(self, query_string: str) -> Query:
|
|
1681
|
+
"""
|
|
1682
|
+
Parse a SPARQL-Star query string into an AST.
|
|
1683
|
+
|
|
1684
|
+
Args:
|
|
1685
|
+
query_string: The SPARQL-Star query to parse
|
|
1686
|
+
|
|
1687
|
+
Returns:
|
|
1688
|
+
Parsed Query AST
|
|
1689
|
+
|
|
1690
|
+
Raises:
|
|
1691
|
+
ParseException: If the query is malformed
|
|
1692
|
+
"""
|
|
1693
|
+
result = self.query.parse_string(query_string, parse_all=True)
|
|
1694
|
+
return result[0]
|
|
1695
|
+
|
|
1696
|
+
|
|
1697
|
+
# Module-level parser instance for convenience
|
|
1698
|
+
_parser: Optional[SPARQLStarParser] = None
|
|
1699
|
+
|
|
1700
|
+
|
|
1701
|
+
def parse_query(query_string: str) -> Query:
|
|
1702
|
+
"""
|
|
1703
|
+
Parse a SPARQL-Star query string.
|
|
1704
|
+
|
|
1705
|
+
This is a convenience function that uses a cached parser instance.
|
|
1706
|
+
|
|
1707
|
+
Args:
|
|
1708
|
+
query_string: The SPARQL-Star query to parse
|
|
1709
|
+
|
|
1710
|
+
Returns:
|
|
1711
|
+
Parsed Query AST
|
|
1712
|
+
"""
|
|
1713
|
+
global _parser
|
|
1714
|
+
if _parser is None:
|
|
1715
|
+
_parser = SPARQLStarParser()
|
|
1716
|
+
return _parser.parse(query_string)
|