rdf-starbase 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1716 @@
1
+ """
2
+ SPARQL-Star Parser using pyparsing.
3
+
4
+ Implements parsing of SPARQL-Star queries following the W3C specification.
5
+ """
6
+
7
+ from typing import Any, Optional
8
+ import pyparsing as pp
9
+ from pyparsing import (
10
+ Keyword, Literal as Lit, Word, Regex, QuotedString,
11
+ Suppress, Group, Optional as Opt, ZeroOrMore, OneOrMore,
12
+ Forward, alphas, alphanums, nums, pyparsing_common,
13
+ CaselessKeyword, Combine,
14
+ DelimitedList,
15
+ )
16
+
17
+ from rdf_starbase.sparql.ast import (
18
+ Query, SelectQuery, AskQuery, InsertDataQuery, DeleteDataQuery,
19
+ DeleteWhereQuery, ModifyQuery,
20
+ DescribeQuery, ConstructQuery,
21
+ TriplePattern, QuotedTriplePattern,
22
+ OptionalPattern, UnionPattern, GraphPattern, MinusPattern,
23
+ Variable, IRI, Literal, BlankNode,
24
+ Filter, Comparison, LogicalExpression, FunctionCall,
25
+ AggregateExpression, Bind, ValuesClause,
26
+ ComparisonOp, LogicalOp,
27
+ WhereClause,
28
+ Term,
29
+ # Property Path types
30
+ PropertyPath, PathIRI, PathSequence, PathAlternative,
31
+ PathInverse, PathMod, PathNegatedPropertySet,
32
+ PropertyPathModifier,
33
+ # Graph management
34
+ CreateGraphQuery, DropGraphQuery, ClearGraphQuery,
35
+ LoadQuery, CopyGraphQuery, MoveGraphQuery, AddGraphQuery,
36
+ )
37
+
38
+
39
+ class SPARQLStarParser:
40
+ """
41
+ Parser for SPARQL-Star queries.
42
+
43
+ Supports:
44
+ - Standard SPARQL SELECT, ASK queries
45
+ - RDF-Star quoted triple patterns (<< s p o >>)
46
+ - FILTER expressions with comparisons and functions
47
+ - RDF-StarBase provenance extensions
48
+ """
49
+
50
+ def __init__(self):
51
+ self._build_grammar()
52
+
53
+ def _build_grammar(self):
54
+ """Build the pyparsing grammar for SPARQL-Star."""
55
+
56
+ # Enable packrat parsing for performance
57
+ pp.ParserElement.enable_packrat()
58
+
59
+ # =================================================================
60
+ # Lexical tokens
61
+ # =================================================================
62
+
63
+ # Keywords (case-insensitive)
64
+ SELECT = CaselessKeyword("SELECT")
65
+ ASK = CaselessKeyword("ASK")
66
+ WHERE = CaselessKeyword("WHERE")
67
+ FILTER = CaselessKeyword("FILTER")
68
+ PREFIX = CaselessKeyword("PREFIX")
69
+ DISTINCT = CaselessKeyword("DISTINCT")
70
+ LIMIT = CaselessKeyword("LIMIT")
71
+ OFFSET = CaselessKeyword("OFFSET")
72
+ ORDER = CaselessKeyword("ORDER")
73
+ BY = CaselessKeyword("BY")
74
+ ASC = CaselessKeyword("ASC")
75
+ DESC = CaselessKeyword("DESC")
76
+ AND = CaselessKeyword("AND") | Lit("&&")
77
+ OR = CaselessKeyword("OR") | Lit("||")
78
+ NOT = CaselessKeyword("NOT") | Lit("!")
79
+ BOUND = CaselessKeyword("BOUND")
80
+ ISIRI = CaselessKeyword("ISIRI") | CaselessKeyword("ISURI")
81
+ ISBLANK = CaselessKeyword("ISBLANK")
82
+ ISLITERAL = CaselessKeyword("ISLITERAL")
83
+ STR = CaselessKeyword("STR")
84
+ LANG = CaselessKeyword("LANG")
85
+ DATATYPE = CaselessKeyword("DATATYPE")
86
+
87
+ # SPARQL Update keywords
88
+ INSERT = CaselessKeyword("INSERT")
89
+ DELETE = CaselessKeyword("DELETE")
90
+ DATA = CaselessKeyword("DATA")
91
+ GRAPH = CaselessKeyword("GRAPH")
92
+
93
+ # Graph management keywords
94
+ CREATE = CaselessKeyword("CREATE")
95
+ DROP = CaselessKeyword("DROP")
96
+ CLEAR = CaselessKeyword("CLEAR")
97
+ LOAD = CaselessKeyword("LOAD")
98
+ COPY = CaselessKeyword("COPY")
99
+ MOVE = CaselessKeyword("MOVE")
100
+ ADD = CaselessKeyword("ADD")
101
+ TO = CaselessKeyword("TO")
102
+ INTO = CaselessKeyword("INTO")
103
+ DEFAULT = CaselessKeyword("DEFAULT")
104
+ NAMED = CaselessKeyword("NAMED")
105
+ ALL = CaselessKeyword("ALL")
106
+ SILENT = CaselessKeyword("SILENT")
107
+ FROM = CaselessKeyword("FROM")
108
+
109
+ # Additional SPARQL keywords
110
+ OPTIONAL = CaselessKeyword("OPTIONAL")
111
+ UNION = CaselessKeyword("UNION")
112
+ MINUS = CaselessKeyword("MINUS")
113
+ DESCRIBE = CaselessKeyword("DESCRIBE")
114
+ CONSTRUCT = CaselessKeyword("CONSTRUCT")
115
+
116
+ # GROUP BY and HAVING keywords
117
+ GROUP = CaselessKeyword("GROUP")
118
+ HAVING = CaselessKeyword("HAVING")
119
+ AS = CaselessKeyword("AS")
120
+
121
+ # BIND and VALUES keywords
122
+ BIND = CaselessKeyword("BIND")
123
+ VALUES = CaselessKeyword("VALUES")
124
+ UNDEF = CaselessKeyword("UNDEF")
125
+
126
+ # Time-travel keyword
127
+ OF = CaselessKeyword("OF") # AS is already defined, we combine AS + OF
128
+
129
+ # Aggregate function keywords
130
+ COUNT = CaselessKeyword("COUNT")
131
+ SUM = CaselessKeyword("SUM")
132
+ AVG = CaselessKeyword("AVG")
133
+ MIN = CaselessKeyword("MIN")
134
+ MAX = CaselessKeyword("MAX")
135
+ GROUP_CONCAT = CaselessKeyword("GROUP_CONCAT")
136
+ SAMPLE = CaselessKeyword("SAMPLE")
137
+ SEPARATOR = CaselessKeyword("SEPARATOR")
138
+
139
+ # Punctuation
140
+ LBRACE = Suppress(Lit("{"))
141
+ RBRACE = Suppress(Lit("}"))
142
+ LPAREN = Suppress(Lit("("))
143
+ RPAREN = Suppress(Lit(")"))
144
+ DOT = Suppress(Lit("."))
145
+ COMMA = Suppress(Lit(","))
146
+ STAR = Lit("*")
147
+ LQUOTE = Suppress(Lit("<<"))
148
+ RQUOTE = Suppress(Lit(">>"))
149
+
150
+ # Comparison operators
151
+ comp_op = (
152
+ Lit("<=") | Lit(">=") | Lit("!=") | Lit("<>") |
153
+ Lit("=") | Lit("<") | Lit(">")
154
+ )
155
+
156
+ # =================================================================
157
+ # Terms
158
+ # =================================================================
159
+
160
+ # Variable: ?name or $name
161
+ def make_variable(tokens):
162
+ return Variable(tokens[0][1:])
163
+
164
+ variable = Combine(
165
+ (Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
166
+ ).set_parse_action(make_variable)
167
+
168
+ # IRI: <http://...> or prefix:localname
169
+ def make_full_iri(tokens):
170
+ return IRI(tokens[0][1:-1])
171
+
172
+ full_iri = Combine(
173
+ Lit("<") + Regex(r'[^<>]+') + Lit(">")
174
+ ).set_parse_action(make_full_iri)
175
+
176
+ # Prefixed name: prefix:local
177
+ # Note: Forward slashes NOT allowed here (they are path separators in property paths)
178
+ # Use full IRIs for path-like local names, e.g., <http://example.org/customer/123>
179
+ pname_ns = Combine(Opt(Word(alphas, alphanums + "_")) + Lit(":"))
180
+ pname_local = Word(alphanums + "_.-")
181
+
182
+ def make_prefixed_name(tokens):
183
+ return IRI(tokens[0])
184
+
185
+ prefixed_name = Combine(pname_ns + Opt(pname_local)).set_parse_action(make_prefixed_name)
186
+
187
+ iri = full_iri | prefixed_name
188
+
189
+ # 'a' keyword as shorthand for rdf:type (SPARQL standard)
190
+ RDF_TYPE_IRI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
191
+
192
+ def make_a_keyword(tokens):
193
+ return IRI(RDF_TYPE_IRI)
194
+
195
+ a_keyword = Keyword("a").set_parse_action(make_a_keyword)
196
+
197
+ # IRI or 'a' keyword (for predicates)
198
+ iri_or_a = iri | a_keyword
199
+
200
+ # Literals
201
+ string_literal = (
202
+ QuotedString('"', esc_char='\\', multiline=True) |
203
+ QuotedString("'", esc_char='\\', multiline=True)
204
+ )
205
+
206
+ # Language tag: @en, @en-US
207
+ lang_tag = Combine(Lit("@") + Word(alphas + "-"))
208
+
209
+ # Datatype: ^^<type> or ^^prefix:type
210
+ datatype = Suppress(Lit("^^")) + iri
211
+
212
+ # Full literal with optional language or datatype
213
+ def make_literal(tokens):
214
+ value = tokens[0]
215
+ lang = None
216
+ dtype = None
217
+ if len(tokens) > 1:
218
+ if isinstance(tokens[1], str) and tokens[1].startswith("@"):
219
+ lang = tokens[1][1:]
220
+ elif isinstance(tokens[1], IRI):
221
+ dtype = tokens[1].value
222
+ return Literal(value, language=lang, datatype=dtype)
223
+
224
+ literal = (string_literal + Opt(lang_tag | datatype)).set_parse_action(make_literal)
225
+
226
+ # Numeric literals
227
+ def make_int_literal(tokens):
228
+ return Literal(tokens[0], datatype="http://www.w3.org/2001/XMLSchema#integer")
229
+
230
+ def make_float_literal(tokens):
231
+ return Literal(tokens[0], datatype="http://www.w3.org/2001/XMLSchema#decimal")
232
+
233
+ integer_literal = pyparsing_common.signed_integer.copy().set_parse_action(make_int_literal)
234
+ float_literal = pyparsing_common.real.copy().set_parse_action(make_float_literal)
235
+
236
+ # Boolean literals
237
+ def make_true(tokens):
238
+ return Literal(True)
239
+
240
+ def make_false(tokens):
241
+ return Literal(False)
242
+
243
+ boolean_literal = (
244
+ CaselessKeyword("true").set_parse_action(make_true) |
245
+ CaselessKeyword("false").set_parse_action(make_false)
246
+ )
247
+
248
+ # =================================================================
249
+ # AS OF Clause (Time-travel queries)
250
+ # =================================================================
251
+
252
+ from datetime import datetime, timezone
253
+
254
+ # ISO 8601 datetime string: "2025-01-15T00:00:00Z" or "2025-01-15"
255
+ def parse_datetime(tokens):
256
+ dt_str = tokens[0]
257
+ # Try various ISO formats
258
+ for fmt in [
259
+ "%Y-%m-%dT%H:%M:%SZ",
260
+ "%Y-%m-%dT%H:%M:%S%z",
261
+ "%Y-%m-%dT%H:%M:%S",
262
+ "%Y-%m-%d",
263
+ ]:
264
+ try:
265
+ dt = datetime.strptime(dt_str, fmt)
266
+ # Ensure UTC if no timezone
267
+ if dt.tzinfo is None:
268
+ dt = dt.replace(tzinfo=timezone.utc)
269
+ return dt
270
+ except ValueError:
271
+ continue
272
+ raise ValueError(f"Cannot parse datetime: {dt_str}")
273
+
274
+ datetime_literal = QuotedString('"', esc_char='\\').copy().set_parse_action(parse_datetime)
275
+
276
+ as_of_clause = (Suppress(AS) + Suppress(OF) + datetime_literal)
277
+
278
+ # Blank node
279
+ def make_blank_node(tokens):
280
+ return BlankNode(tokens[0][2:])
281
+
282
+ blank_node = Combine(
283
+ Lit("_:") + Word(alphanums + "_")
284
+ ).set_parse_action(make_blank_node)
285
+
286
+ # =================================================================
287
+ # Quoted Triple Pattern (RDF-Star)
288
+ # =================================================================
289
+
290
+ # Forward declaration for recursive quoted triples
291
+ quoted_triple = Forward()
292
+
293
+ # Term that can appear in a triple (including nested quoted triples)
294
+ graph_term = variable | iri | literal | float_literal | integer_literal | boolean_literal | blank_node | quoted_triple
295
+
296
+ # Quoted triple: << subject predicate object >>
297
+ def make_quoted_triple(tokens):
298
+ return QuotedTriplePattern(
299
+ subject=tokens[0],
300
+ predicate=tokens[1],
301
+ object=tokens[2]
302
+ )
303
+
304
+ quoted_triple <<= (
305
+ LQUOTE + graph_term + graph_term + graph_term + RQUOTE
306
+ ).set_parse_action(make_quoted_triple)
307
+
308
+ # Term including quoted triples
309
+ term = graph_term
310
+
311
+ # =================================================================
312
+ # Property Paths (SPARQL 1.1)
313
+ # =================================================================
314
+ #
315
+ # Property paths are recognized by explicit path operators:
316
+ # - ^ (inverse) at the start
317
+ # - ! (negated) at the start
318
+ # - *, +, ? after an IRI
319
+ # - / between IRIs (sequence)
320
+ # - | between paths (alternative)
321
+ #
322
+ # A plain IRI like foaf:knows is NOT a property path.
323
+
324
+ # Path modifiers - must NOT be followed by alphanumeric (to avoid ?var collision)
325
+ from pyparsing import NotAny, Regex as PpRegex
326
+
327
+ PATH_STAR = Lit("*") + NotAny(Word(alphanums))
328
+ PATH_PLUS = Lit("+") + NotAny(Word(alphanums))
329
+ PATH_QUESTION = Lit("?") + NotAny(Word(alphanums + "_")) # ?name is a variable, not a modifier
330
+ PATH_CARET = Lit("^")
331
+ PATH_SLASH = Lit("/")
332
+ PATH_PIPE = Lit("|")
333
+ PATH_EXCLAIM = Lit("!")
334
+
335
+ # Forward declaration
336
+ path_expression = Forward()
337
+
338
+ def make_path_iri(iri_val):
339
+ if isinstance(iri_val, IRI):
340
+ return PathIRI(iri=iri_val)
341
+ return PathIRI(iri=IRI(str(iri_val)))
342
+
343
+ # Grouped path: ( path_expression )
344
+ path_group = (Suppress(LPAREN) + path_expression + Suppress(RPAREN))
345
+
346
+ # Inverse path: ^iri or ^(path)
347
+ def make_path_inverse(tokens):
348
+ inner = tokens[0]
349
+ if isinstance(inner, IRI):
350
+ inner = make_path_iri(inner)
351
+ return PathInverse(path=inner)
352
+
353
+ path_inverse = (
354
+ Suppress(PATH_CARET) + (iri | path_group)
355
+ ).set_parse_action(make_path_inverse)
356
+
357
+ # Negated property set: !(iri|iri|...) or !iri
358
+ def make_path_negated(tokens):
359
+ iris = []
360
+ for t in tokens:
361
+ if isinstance(t, IRI):
362
+ iris.append(t)
363
+ elif isinstance(t, PathIRI):
364
+ iris.append(t.iri)
365
+ return PathNegatedPropertySet(iris=tuple(iris))
366
+
367
+ path_negated = (
368
+ Suppress(PATH_EXCLAIM) +
369
+ (
370
+ (Suppress(LPAREN) + DelimitedList(iri, delim="|") + Suppress(RPAREN)) |
371
+ iri
372
+ )
373
+ ).set_parse_action(make_path_negated)
374
+
375
+ # Modified IRI: iri+ or iri* or iri?
376
+ def make_path_mod(tokens):
377
+ iri_val = tokens[0]
378
+ mod_str = tokens[1]
379
+ path = make_path_iri(iri_val)
380
+ if mod_str == "*":
381
+ return PathMod(path=path, modifier=PropertyPathModifier.ZERO_OR_MORE)
382
+ elif mod_str == "+":
383
+ return PathMod(path=path, modifier=PropertyPathModifier.ONE_OR_MORE)
384
+ elif mod_str == "?":
385
+ return PathMod(path=path, modifier=PropertyPathModifier.ZERO_OR_ONE)
386
+ return path
387
+
388
+ path_iri_modified = (
389
+ iri + (PATH_STAR | PATH_PLUS | PATH_QUESTION)
390
+ ).set_parse_action(make_path_mod)
391
+
392
+ # A path element: inverse, negated, modified IRI, or grouped
393
+ path_element = path_inverse | path_negated | path_iri_modified | path_group
394
+
395
+ # A path step (for sequences): path element or plain IRI
396
+ def wrap_path_step(tokens):
397
+ t = tokens[0]
398
+ if isinstance(t, IRI):
399
+ return make_path_iri(t)
400
+ return t
401
+
402
+ path_step = (path_element | iri.copy().set_parse_action(wrap_path_step))
403
+
404
+ # Sequence path: path1/path2/... (requires at least one /)
405
+ def make_path_sequence(tokens):
406
+ paths = list(tokens)
407
+ if len(paths) == 1:
408
+ return paths[0]
409
+ return PathSequence(paths=tuple(paths))
410
+
411
+ path_sequence = (
412
+ path_step + OneOrMore(Suppress(PATH_SLASH) + path_step)
413
+ ).set_parse_action(make_path_sequence)
414
+
415
+ # Alternative path: path1|path2|... (requires at least one |)
416
+ def make_path_alternative(tokens):
417
+ paths = list(tokens)
418
+ if len(paths) == 1:
419
+ return paths[0]
420
+ return PathAlternative(paths=tuple(paths))
421
+
422
+ # Atomic path for alternatives: sequence, element, or plain IRI wrapped
423
+ # We include path_step here to allow plain IRIs in alternatives
424
+ path_atomic = path_sequence | path_element | path_step
425
+
426
+ path_alternative = (
427
+ path_atomic + OneOrMore(Suppress(PATH_PIPE) + path_atomic)
428
+ ).set_parse_action(make_path_alternative)
429
+
430
+ # Complete path expression
431
+ path_expression <<= path_alternative | path_atomic
432
+
433
+ # Predicate: try 'a' keyword, path expression, or term
434
+ predicate_path = a_keyword | path_expression | term
435
+
436
+ # =================================================================
437
+ # Triple Patterns (with property list and object list support)
438
+ # =================================================================
439
+
440
+ # SPARQL property lists use:
441
+ # - ; (semicolon) = same subject, different predicate-object pair
442
+ # - , (comma) = same subject and predicate, different object
443
+
444
+ SEMICOLON = Suppress(Lit(";"))
445
+
446
+ def make_single_triple(tokens):
447
+ """Create a single triple pattern."""
448
+ pred = tokens[1]
449
+ if isinstance(pred, PathIRI):
450
+ pred = pred.iri
451
+ return TriplePattern(
452
+ subject=tokens[0],
453
+ predicate=pred,
454
+ object=tokens[2]
455
+ )
456
+
457
+ # Simple triple without property/object lists
458
+ simple_triple = (
459
+ term + predicate_path + term
460
+ ).set_parse_action(make_single_triple)
461
+
462
+ # Object list: same subject and predicate, multiple objects
463
+ # <s> <p> <o1> , <o2> , <o3>
464
+ object_list = term + ZeroOrMore(COMMA + term)
465
+
466
+ # Predicate-object pair: predicate followed by object(s)
467
+ predicate_object = predicate_path + object_list
468
+
469
+ # Predicate-object list: multiple predicate-object pairs separated by ;
470
+ # <p1> <o1> ; <p2> <o2> ; <p3> <o3>
471
+ predicate_object_list = predicate_object + ZeroOrMore(SEMICOLON + predicate_object)
472
+
473
+ def is_predicate_type(token):
474
+ """Check if a token is a valid predicate type."""
475
+ # Path expressions are always predicates
476
+ if isinstance(token, (PathIRI, PathSequence, PathAlternative, PathInverse, PathMod, PathNegatedPropertySet)):
477
+ return True
478
+ # Plain IRIs can be predicates
479
+ if isinstance(token, IRI):
480
+ return True
481
+ # Variables can be predicates (e.g., ?s ?p ?o)
482
+ if isinstance(token, Variable):
483
+ return True
484
+ return False
485
+
486
+ def is_object_type(token):
487
+ """Check if a token is a valid object type."""
488
+ return isinstance(token, (IRI, Variable, Literal, BlankNode, QuotedTriplePattern))
489
+
490
+ def normalize_predicate(pred):
491
+ """Normalize predicate - unwrap PathIRI to IRI for simple cases."""
492
+ if isinstance(pred, PathIRI):
493
+ return pred.iri
494
+ return pred
495
+
496
+ def make_triple_block(tokens):
497
+ """Parse a triple block with optional property/object lists.
498
+
499
+ Tokens come as a flat list after semicolons/commas are suppressed:
500
+ [subject, pred1, obj1, pred2, obj2, ...]
501
+
502
+ For property paths like <foaf:knows>+, the predicate is a PathMod.
503
+
504
+ Expands:
505
+ - ?s <p1> <o1> ; <p2> <o2> . → [(?s, <p1>, <o1>), (?s, <p2>, <o2>)]
506
+ - ?s <p> <o1> , <o2> . → [(?s, <p>, <o1>), (?s, <p>, <o2>)]
507
+ """
508
+ tokens_list = list(tokens)
509
+ if not tokens_list:
510
+ return []
511
+
512
+ subject = tokens_list[0]
513
+ triples = []
514
+
515
+ # Process remaining tokens as alternating predicate-objects
516
+ # Since semicolons are suppressed, we get: [pred1, obj1, pred2, obj2, ...]
517
+ i = 1
518
+ while i < len(tokens_list):
519
+ # Get predicate - can be IRI, PathIRI, or other path expressions
520
+ pred = tokens_list[i]
521
+
522
+ if not is_predicate_type(pred):
523
+ # Skip non-predicates (shouldn't happen but defensive)
524
+ i += 1
525
+ continue
526
+
527
+ # Normalize simple PathIRI to IRI
528
+ pred = normalize_predicate(pred)
529
+
530
+ i += 1
531
+ if i >= len(tokens_list):
532
+ break
533
+
534
+ # Get object(s) - handle object lists with comma
535
+ while i < len(tokens_list):
536
+ obj = tokens_list[i]
537
+
538
+ # If it's an object type, create triple
539
+ if is_object_type(obj):
540
+ triples.append(TriplePattern(
541
+ subject=subject,
542
+ predicate=pred,
543
+ object=obj
544
+ ))
545
+ i += 1
546
+
547
+ # Check if next token is also an object (comma was suppressed)
548
+ if i < len(tokens_list):
549
+ next_tok = tokens_list[i]
550
+ # If next is a path expression (not just IRI), it's a predicate
551
+ if isinstance(next_tok, (PathIRI, PathSequence, PathAlternative, PathInverse, PathMod, PathNegatedPropertySet)):
552
+ break
553
+ # If next is IRI, need to peek further to determine if predicate or object
554
+ # Heuristic: if it's followed by something that could be an object, it's a predicate
555
+ if isinstance(next_tok, IRI):
556
+ # Look ahead to see if there's an object after this
557
+ if i + 1 < len(tokens_list) and is_object_type(tokens_list[i + 1]):
558
+ break # It's a predicate
559
+ # If next is still an object type (Variable, Literal, etc), continue object list
560
+ if isinstance(next_tok, (Variable, Literal, BlankNode)):
561
+ continue # Continue in object list
562
+ break
563
+ else:
564
+ break
565
+
566
+ return triples if triples else []
567
+
568
+ # Full triple block: subject + predicate-object list + optional dot
569
+ triple_block = (
570
+ term + predicate_object_list + Opt(DOT)
571
+ ).set_parse_action(make_triple_block)
572
+
573
+ # triple_pattern now returns a list of TriplePatterns
574
+ triple_pattern = triple_block
575
+
576
+ # =================================================================
577
+ # FILTER Expressions
578
+ # =================================================================
579
+
580
+ # Expression forward declaration
581
+ expression = Forward()
582
+
583
+ # Function call
584
+ func_name = (
585
+ BOUND | ISIRI | ISBLANK | ISLITERAL | STR | LANG | DATATYPE |
586
+ Word(alphas, alphanums + "_")
587
+ )
588
+
589
+ def make_function_call(tokens):
590
+ return FunctionCall(name=str(tokens[0]).upper(), arguments=list(tokens[1:]))
591
+
592
+ function_call = (
593
+ func_name + LPAREN + Opt(DelimitedList(expression)) + RPAREN
594
+ ).set_parse_action(make_function_call)
595
+
596
+ # Primary expression
597
+ primary_expr = (
598
+ function_call |
599
+ variable |
600
+ literal |
601
+ float_literal |
602
+ integer_literal |
603
+ boolean_literal |
604
+ iri |
605
+ (LPAREN + expression + RPAREN)
606
+ )
607
+
608
+ # Comparison expression
609
+ def make_comparison(tokens):
610
+ if len(tokens) == 3:
611
+ return Comparison(
612
+ left=tokens[0],
613
+ operator=ComparisonOp.from_str(tokens[1]),
614
+ right=tokens[2]
615
+ )
616
+ return tokens[0]
617
+
618
+ comparison_expr = (
619
+ primary_expr + Opt(comp_op + primary_expr)
620
+ ).set_parse_action(make_comparison)
621
+
622
+ # NOT expression
623
+ def make_not(tokens):
624
+ if len(tokens) == 2: # Has NOT
625
+ return LogicalExpression(LogicalOp.NOT, [tokens[1]])
626
+ return tokens[0]
627
+
628
+ not_expr = (
629
+ Opt(NOT) + comparison_expr
630
+ ).set_parse_action(make_not)
631
+
632
+ # AND expression
633
+ def make_and(tokens):
634
+ tokens = list(tokens)
635
+ if len(tokens) == 1:
636
+ return tokens[0]
637
+ return LogicalExpression(LogicalOp.AND, tokens)
638
+
639
+ and_expr = (
640
+ not_expr + ZeroOrMore(Suppress(AND) + not_expr)
641
+ ).set_parse_action(make_and)
642
+
643
+ # OR expression (lowest precedence)
644
+ def make_or(tokens):
645
+ tokens = list(tokens)
646
+ if len(tokens) == 1:
647
+ return tokens[0]
648
+ return LogicalExpression(LogicalOp.OR, tokens)
649
+
650
+ expression <<= (
651
+ and_expr + ZeroOrMore(Suppress(OR) + and_expr)
652
+ ).set_parse_action(make_or)
653
+
654
+ # Standard FILTER
655
+ def make_filter(tokens):
656
+ return Filter(expression=tokens[0])
657
+
658
+ filter_clause = (
659
+ Suppress(FILTER) + LPAREN + expression + RPAREN
660
+ ).set_parse_action(make_filter)
661
+
662
+ # =================================================================
663
+ # OPTIONAL and UNION Patterns
664
+ # =================================================================
665
+
666
+ # Forward declaration for nested group patterns
667
+ group_graph_pattern = Forward()
668
+
669
+ # OPTIONAL { ... }
670
+ def make_optional(tokens):
671
+ patterns = []
672
+ filters = []
673
+ for token in tokens:
674
+ if isinstance(token, (TriplePattern, QuotedTriplePattern)):
675
+ patterns.append(token)
676
+ elif isinstance(token, list):
677
+ for item in token:
678
+ if isinstance(item, (TriplePattern, QuotedTriplePattern)):
679
+ patterns.append(item)
680
+ elif isinstance(token, Filter):
681
+ filters.append(token)
682
+ elif isinstance(token, OptionalPattern):
683
+ patterns.append(token)
684
+ return OptionalPattern(patterns=patterns, filters=filters)
685
+
686
+ optional_pattern = (
687
+ Suppress(OPTIONAL) + LBRACE + ZeroOrMore(triple_pattern | filter_clause) + RBRACE
688
+ ).set_parse_action(make_optional)
689
+
690
+ # UNION { ... } UNION { ... }
691
+ # A group graph pattern that can participate in UNION
692
+ def make_group_pattern(tokens):
693
+ """Convert a list of patterns/filters into a tuple for UNION alternatives."""
694
+ patterns = []
695
+ filters = []
696
+ for token in tokens:
697
+ if isinstance(token, (TriplePattern, QuotedTriplePattern)):
698
+ patterns.append(token)
699
+ elif isinstance(token, list):
700
+ for item in token:
701
+ if isinstance(item, (TriplePattern, QuotedTriplePattern)):
702
+ patterns.append(item)
703
+ elif isinstance(token, Filter):
704
+ filters.append(token)
705
+ elif isinstance(token, OptionalPattern):
706
+ patterns.append(token)
707
+ return (patterns, filters)
708
+
709
+ union_alternative = (
710
+ LBRACE + ZeroOrMore(triple_pattern | filter_clause | optional_pattern) + RBRACE
711
+ ).set_parse_action(make_group_pattern)
712
+
713
+ def make_union(tokens):
714
+ """Combine UNION alternatives into UnionPattern."""
715
+ alternatives = []
716
+ for token in tokens:
717
+ if isinstance(token, tuple) and len(token) == 2:
718
+ patterns, filters = token
719
+ alternatives.append(patterns)
720
+ return UnionPattern(alternatives=alternatives)
721
+
722
+ union_pattern = (
723
+ union_alternative + OneOrMore(Suppress(UNION) + union_alternative)
724
+ ).set_parse_action(make_union)
725
+
726
+ # =================================================================
727
+ # MINUS Pattern
728
+ # =================================================================
729
+
730
+ def make_minus(tokens):
731
+ """Create a MINUS pattern for set difference."""
732
+ patterns = []
733
+ filters = []
734
+ for token in tokens:
735
+ if isinstance(token, (TriplePattern, QuotedTriplePattern)):
736
+ patterns.append(token)
737
+ elif isinstance(token, list):
738
+ for item in token:
739
+ if isinstance(item, (TriplePattern, QuotedTriplePattern)):
740
+ patterns.append(item)
741
+ elif isinstance(token, Filter):
742
+ filters.append(token)
743
+ elif isinstance(token, OptionalPattern):
744
+ patterns.append(token)
745
+ return MinusPattern(patterns=patterns, filters=filters)
746
+
747
+ minus_pattern = (
748
+ Suppress(MINUS) + LBRACE + ZeroOrMore(triple_pattern | filter_clause) + RBRACE
749
+ ).set_parse_action(make_minus)
750
+
751
+ # =================================================================
752
+ # BIND Clause
753
+ # =================================================================
754
+
755
+ bind_variable = Combine(
756
+ (Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
757
+ ).set_parse_action(make_variable)
758
+
759
+ def make_bind(tokens):
760
+ # BIND(expr AS ?var)
761
+ expr = tokens[0]
762
+ var = tokens[1]
763
+ return Bind(expression=expr, variable=var)
764
+
765
+ bind_clause = (
766
+ Suppress(BIND) + LPAREN +
767
+ (expression | literal | float_literal | integer_literal | variable | iri) +
768
+ Suppress(AS) + bind_variable +
769
+ RPAREN
770
+ ).set_parse_action(make_bind)
771
+
772
+ # =================================================================
773
+ # VALUES Clause
774
+ # =================================================================
775
+
776
+ values_variable = Combine(
777
+ (Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
778
+ ).set_parse_action(make_variable)
779
+
780
+ # Value term (can be UNDEF or a value)
781
+ def make_undef(tokens):
782
+ return None
783
+
784
+ value_term = (
785
+ UNDEF.set_parse_action(make_undef) |
786
+ iri | literal | float_literal | integer_literal | boolean_literal
787
+ )
788
+
789
+ # Single variable VALUES: VALUES ?x { 1 2 3 }
790
+ def make_single_values(tokens):
791
+ var = tokens[0]
792
+ bindings = [[v] for v in tokens[1:]]
793
+ return ValuesClause(variables=[var], bindings=bindings)
794
+
795
+ single_values = (
796
+ Suppress(VALUES) + values_variable + LBRACE + ZeroOrMore(value_term) + RBRACE
797
+ ).set_parse_action(make_single_values)
798
+
799
+ # Multi-variable VALUES: VALUES (?x ?y) { (1 2) (3 4) }
800
+ def make_value_row(tokens):
801
+ return list(tokens)
802
+
803
+ value_row = (LPAREN + ZeroOrMore(value_term) + RPAREN).set_parse_action(make_value_row)
804
+
805
+ def make_multi_values(tokens):
806
+ # First tokens are variables, rest are rows
807
+ vars_list = []
808
+ rows = []
809
+ for token in tokens:
810
+ if isinstance(token, Variable):
811
+ vars_list.append(token)
812
+ elif isinstance(token, list):
813
+ rows.append(token)
814
+ return ValuesClause(variables=vars_list, bindings=rows)
815
+
816
+ multi_values = (
817
+ Suppress(VALUES) + LPAREN + OneOrMore(values_variable) + RPAREN +
818
+ LBRACE + ZeroOrMore(value_row) + RBRACE
819
+ ).set_parse_action(make_multi_values)
820
+
821
+ values_clause = multi_values | single_values
822
+
823
+ # =================================================================
824
+ # GRAPH Pattern
825
+ # =================================================================
826
+
827
+ # Forward declaration for graph_pattern since where_pattern needs it
828
+ graph_pattern = Forward()
829
+
830
+ def make_graph_pattern(tokens):
831
+ graph_ref = tokens[0]
832
+ patterns = []
833
+ for token in tokens[1:]:
834
+ if isinstance(token, (TriplePattern, QuotedTriplePattern)):
835
+ patterns.append(token)
836
+ elif isinstance(token, list):
837
+ for item in token:
838
+ if isinstance(item, (TriplePattern, QuotedTriplePattern)):
839
+ patterns.append(item)
840
+ return GraphPattern(graph=graph_ref, patterns=patterns)
841
+
842
+ graph_pattern <<= (
843
+ Suppress(GRAPH) + (variable | iri) + LBRACE + ZeroOrMore(triple_pattern) + RBRACE
844
+ ).set_parse_action(make_graph_pattern)
845
+
846
+ # =================================================================
847
+ # WHERE Clause
848
+ # =================================================================
849
+
850
+ where_pattern = triple_pattern | filter_clause | optional_pattern | union_pattern | minus_pattern | bind_clause | values_clause | graph_pattern
851
+
852
+ def make_where_clause(tokens):
853
+ patterns = []
854
+ filters = []
855
+ optional_patterns = []
856
+ union_patterns = []
857
+ minus_patterns = []
858
+ binds = []
859
+ values = None
860
+ graph_patterns = []
861
+ for token in tokens:
862
+ if isinstance(token, (TriplePattern, QuotedTriplePattern)):
863
+ patterns.append(token)
864
+ elif isinstance(token, list):
865
+ # Handle expanded triple blocks (from property/object lists)
866
+ for item in token:
867
+ if isinstance(item, (TriplePattern, QuotedTriplePattern)):
868
+ patterns.append(item)
869
+ elif isinstance(token, Filter):
870
+ filters.append(token)
871
+ elif isinstance(token, OptionalPattern):
872
+ optional_patterns.append(token)
873
+ elif isinstance(token, UnionPattern):
874
+ union_patterns.append(token)
875
+ elif isinstance(token, MinusPattern):
876
+ minus_patterns.append(token)
877
+ elif isinstance(token, Bind):
878
+ binds.append(token)
879
+ elif isinstance(token, ValuesClause):
880
+ values = token
881
+ elif isinstance(token, GraphPattern):
882
+ graph_patterns.append(token)
883
+ return WhereClause(
884
+ patterns=patterns,
885
+ filters=filters,
886
+ optional_patterns=optional_patterns,
887
+ union_patterns=union_patterns,
888
+ minus_patterns=minus_patterns,
889
+ binds=binds,
890
+ values=values,
891
+ graph_patterns=graph_patterns
892
+ )
893
+
894
+ # WHERE clause with optional WHERE keyword (for ASK queries)
895
+ where_clause = (
896
+ Suppress(Opt(WHERE)) + LBRACE + ZeroOrMore(where_pattern) + RBRACE
897
+ ).set_parse_action(make_where_clause)
898
+
899
+ # =================================================================
900
+ # PREFIX Declarations
901
+ # =================================================================
902
+
903
+ def make_prefix(tokens):
904
+ prefix = tokens[0][:-1] # Remove trailing colon
905
+ uri = tokens[1].value
906
+ return (prefix, uri)
907
+
908
+ prefix_decl = (
909
+ Suppress(PREFIX) + pname_ns + full_iri
910
+ ).set_parse_action(make_prefix)
911
+
912
+ # =================================================================
913
+ # SELECT Query
914
+ # =================================================================
915
+
916
+ # Use a fresh copy of variable for select to avoid parse action interference
917
+ select_variable = Combine(
918
+ (Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
919
+ ).set_parse_action(make_variable)
920
+
921
+ # Aggregate functions
922
+ aggregate_name = COUNT | SUM | AVG | MIN | MAX | GROUP_CONCAT | SAMPLE
923
+
924
+ # Separator for GROUP_CONCAT
925
+ separator_clause = Suppress(Lit(";")) + Suppress(SEPARATOR) + Suppress(Lit("=")) + (
926
+ QuotedString('"', esc_char='\\') | QuotedString("'", esc_char='\\')
927
+ )
928
+
929
+ def make_aggregate(tokens):
930
+ func_name = str(tokens[0]).upper()
931
+ distinct = False
932
+ arg = None
933
+ separator = None
934
+
935
+ for i, t in enumerate(tokens[1:], 1):
936
+ if str(t).upper() == "DISTINCT":
937
+ distinct = True
938
+ elif t == "*":
939
+ arg = None # COUNT(*)
940
+ elif isinstance(t, Variable):
941
+ arg = t
942
+ elif isinstance(t, str) and t not in ("DISTINCT", "*"):
943
+ separator = t
944
+
945
+ return AggregateExpression(
946
+ function=func_name,
947
+ argument=arg,
948
+ distinct=distinct,
949
+ separator=separator
950
+ )
951
+
952
+ # COUNT(*) or COUNT(DISTINCT ?var) or COUNT(?var)
953
+ aggregate_arg = (
954
+ Opt(DISTINCT) + (STAR | select_variable) + Opt(separator_clause)
955
+ )
956
+
957
+ aggregate_expr = (
958
+ aggregate_name + LPAREN + aggregate_arg + RPAREN
959
+ ).set_parse_action(make_aggregate)
960
+
961
+ # Aggregate with alias: (COUNT(?x) AS ?count)
962
+ def make_aggregate_with_alias(tokens):
963
+ agg = tokens[0]
964
+ if len(tokens) > 1 and isinstance(tokens[1], Variable):
965
+ agg.alias = tokens[1]
966
+ return agg
967
+
968
+ aliased_aggregate = (
969
+ LPAREN + aggregate_expr + Suppress(AS) + select_variable + RPAREN
970
+ ).set_parse_action(make_aggregate_with_alias)
971
+
972
+ # Select expression: variable or (aggregate AS ?alias)
973
+ select_expr = aliased_aggregate | aggregate_expr | select_variable
974
+
975
+ # Variable list or *
976
+ def make_star(tokens):
977
+ return []
978
+
979
+ select_vars = (
980
+ STAR.set_parse_action(make_star) |
981
+ OneOrMore(select_expr)
982
+ )
983
+
984
+ # GROUP BY clause
985
+ group_by_variable = Combine(
986
+ (Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
987
+ ).set_parse_action(make_variable)
988
+
989
+ def make_group_by_marker(tokens):
990
+ """Mark this as a GROUP BY list."""
991
+ return ("GROUP_BY", list(tokens))
992
+
993
+ group_by_clause = (
994
+ Suppress(GROUP) + Suppress(BY) + OneOrMore(group_by_variable)
995
+ ).set_parse_action(make_group_by_marker)
996
+
997
+ # HAVING clause
998
+ having_clause = Suppress(HAVING) + LPAREN + expression + RPAREN
999
+
1000
+ # ORDER BY clause - use fresh copy
1001
+ order_variable = Combine(
1002
+ (Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
1003
+ ).set_parse_action(make_variable)
1004
+
1005
+ def make_order_desc(tokens):
1006
+ return (tokens[0], False)
1007
+
1008
+ def make_order_asc(tokens):
1009
+ return (tokens[0], True)
1010
+
1011
+ # Plain variable for order by (no ASC/DESC) needs special handling
1012
+ def make_plain_order(tokens):
1013
+ # tokens[0] is the raw string like "?name", need to convert to Variable
1014
+ var_name = tokens[0][1:] # Remove the ? or $
1015
+ return (Variable(var_name), True) # Default to ascending
1016
+
1017
+ plain_order_var = Combine(
1018
+ (Lit("?") | Lit("$")) + Word(alphas + "_", alphanums + "_")
1019
+ ).set_parse_action(make_plain_order)
1020
+
1021
+ order_condition = (
1022
+ (Suppress(DESC) + LPAREN + order_variable + RPAREN).set_parse_action(make_order_desc) |
1023
+ (Suppress(ASC) + LPAREN + order_variable + RPAREN).set_parse_action(make_order_asc) |
1024
+ plain_order_var
1025
+ )
1026
+
1027
+ order_clause = Suppress(ORDER) + Suppress(BY) + OneOrMore(order_condition)
1028
+
1029
+ # LIMIT and OFFSET
1030
+ limit_clause = Suppress(LIMIT) + pyparsing_common.integer
1031
+ offset_clause = Suppress(OFFSET) + pyparsing_common.integer
1032
+
1033
+ # FROM clause for dataset specification
1034
+ from_clause = Suppress(FROM) + iri
1035
+ from_named_clause = Suppress(FROM) + Suppress(NAMED) + iri
1036
+
1037
+ def make_select_query(tokens):
1038
+ prefixes = {}
1039
+ variables = []
1040
+ distinct = False
1041
+ where = WhereClause()
1042
+ limit = None
1043
+ offset = None
1044
+ order_by = []
1045
+ group_by = []
1046
+ having = None
1047
+ as_of = None
1048
+ from_graphs = []
1049
+ from_named_graphs = []
1050
+
1051
+ for token in tokens:
1052
+ if isinstance(token, datetime):
1053
+ as_of = token
1054
+ elif isinstance(token, tuple) and len(token) == 2:
1055
+ if token[0] == "GROUP_BY":
1056
+ # This is a GROUP BY clause
1057
+ group_by = token[1]
1058
+ elif token[0] == "FROM":
1059
+ # This is a FROM clause
1060
+ from_graphs.append(token[1])
1061
+ elif token[0] == "FROM_NAMED":
1062
+ # This is a FROM NAMED clause
1063
+ from_named_graphs.append(token[1])
1064
+ elif isinstance(token[0], str) and isinstance(token[1], str):
1065
+ # This is a prefix declaration
1066
+ prefixes[token[0]] = token[1]
1067
+ elif isinstance(token[0], Variable):
1068
+ # This is an order by condition
1069
+ order_by.append(token)
1070
+ elif token == "DISTINCT":
1071
+ distinct = True
1072
+ elif isinstance(token, AggregateExpression):
1073
+ variables.append(token)
1074
+ elif isinstance(token, Variable):
1075
+ variables.append(token)
1076
+ elif isinstance(token, (Comparison, LogicalExpression, FunctionCall)):
1077
+ # HAVING expression
1078
+ having = token
1079
+ elif isinstance(token, pp.ParseResults) or isinstance(token, list):
1080
+ # Check what's in the list
1081
+ token_list = list(token)
1082
+ if token_list and isinstance(token_list[0], (Variable, AggregateExpression)):
1083
+ variables = token_list
1084
+ elif token_list and isinstance(token_list[0], tuple):
1085
+ # Could be order_by or group_by marker
1086
+ if token_list[0][0] == "GROUP_BY":
1087
+ group_by = token_list[0][1]
1088
+ else:
1089
+ order_by = token_list
1090
+ elif token_list == []:
1091
+ pass # SELECT *
1092
+ elif isinstance(token, WhereClause):
1093
+ where = token
1094
+ elif isinstance(token, int):
1095
+ if limit is None:
1096
+ limit = token
1097
+ else:
1098
+ offset = token
1099
+
1100
+ return SelectQuery(
1101
+ prefixes=prefixes,
1102
+ variables=variables,
1103
+ where=where,
1104
+ distinct=distinct,
1105
+ limit=limit,
1106
+ offset=offset,
1107
+ order_by=order_by,
1108
+ group_by=group_by,
1109
+ having=having,
1110
+ as_of=as_of,
1111
+ from_graphs=from_graphs,
1112
+ from_named_graphs=from_named_graphs,
1113
+ )
1114
+
1115
+ def make_distinct(tokens):
1116
+ return "DISTINCT"
1117
+
1118
+ def make_from_clause(tokens):
1119
+ return ("FROM", tokens[0])
1120
+
1121
+ def make_from_named_clause(tokens):
1122
+ return ("FROM_NAMED", tokens[0])
1123
+
1124
+ select_query = (
1125
+ ZeroOrMore(prefix_decl) +
1126
+ Suppress(SELECT) +
1127
+ Opt(DISTINCT.set_parse_action(make_distinct)) +
1128
+ Group(select_vars) +
1129
+ ZeroOrMore(from_named_clause.set_parse_action(make_from_named_clause) | from_clause.set_parse_action(make_from_clause)) +
1130
+ where_clause +
1131
+ Opt(Group(group_by_clause)) +
1132
+ Opt(having_clause) +
1133
+ Opt(Group(order_clause)) +
1134
+ Opt(limit_clause) +
1135
+ Opt(offset_clause) +
1136
+ Opt(as_of_clause)
1137
+ ).set_parse_action(make_select_query)
1138
+
1139
+ # =================================================================
1140
+ # ASK Query
1141
+ # =================================================================
1142
+
1143
+ def make_ask_query(tokens):
1144
+ prefixes = {}
1145
+ where = WhereClause()
1146
+ as_of = None
1147
+ for token in tokens:
1148
+ if isinstance(token, datetime):
1149
+ as_of = token
1150
+ elif isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
1151
+ prefixes[token[0]] = token[1]
1152
+ elif isinstance(token, WhereClause):
1153
+ where = token
1154
+ return AskQuery(prefixes=prefixes, where=where, as_of=as_of)
1155
+
1156
+ ask_query = (
1157
+ ZeroOrMore(prefix_decl) +
1158
+ Suppress(ASK) +
1159
+ where_clause +
1160
+ Opt(as_of_clause)
1161
+ ).set_parse_action(make_ask_query)
1162
+
1163
+ # =================================================================
1164
+ # INSERT DATA Update (with full Turtle syntax support)
1165
+ # =================================================================
1166
+
1167
+ # Special prefixed name for Turtle/INSERT DATA that allows slashes
1168
+ # (unlike the main prefixed_name which doesn't, to avoid conflicts with property paths)
1169
+ turtle_pname_local = Word(alphanums + "_.-/")
1170
+
1171
+ def make_turtle_prefixed_name(tokens):
1172
+ return IRI(tokens[0])
1173
+
1174
+ turtle_prefixed_name = Combine(pname_ns + Opt(turtle_pname_local)).set_parse_action(make_turtle_prefixed_name)
1175
+ turtle_iri = full_iri | turtle_prefixed_name
1176
+ turtle_iri_or_a = turtle_iri | a_keyword
1177
+
1178
+ # Ground term for INSERT DATA (no variables, allows path-like prefixed names)
1179
+ # Also includes quoted_triple for RDF-Star annotation support
1180
+ ground_term = quoted_triple | turtle_iri | literal | float_literal | integer_literal | boolean_literal | blank_node
1181
+
1182
+ # Turtle-style triple parsing with semicolons and commas
1183
+ # Semicolon (;) = same subject, new predicate-object pair
1184
+ # Comma (,) = same subject and predicate, new object
1185
+ # Dot (.) = end of triple block
1186
+
1187
+ SEMICOLON = Suppress(Lit(";"))
1188
+
1189
+ def parse_turtle_triples(tokens):
1190
+ """Parse Turtle-style triples into a list of TriplePattern objects.
1191
+
1192
+ Handles:
1193
+ - Simple triples: <s> <p> <o> .
1194
+ - Property lists: <s> <p1> <o1> ; <p2> <o2> .
1195
+ - Object lists: <s> <p> <o1> , <o2> , <o3> .
1196
+ - Combined: <s> <p1> <o1> , <o2> ; <p2> <o3> .
1197
+ - RDF-Star: << s p o >> <annotation_pred> <value> .
1198
+ """
1199
+ triples = []
1200
+ token_list = list(tokens)
1201
+
1202
+ i = 0
1203
+ current_subject = None
1204
+ current_predicate = None
1205
+
1206
+ while i < len(token_list):
1207
+ token = token_list[i]
1208
+
1209
+ # Skip punctuation strings if they slip through
1210
+ if isinstance(token, str) and token in '.;,':
1211
+ i += 1
1212
+ continue
1213
+
1214
+ # If we have a ground term and no subject yet, or after a dot
1215
+ if current_subject is None:
1216
+ if isinstance(token, (IRI, Literal, BlankNode, QuotedTriplePattern)):
1217
+ current_subject = token
1218
+ current_predicate = None
1219
+ i += 1
1220
+ continue
1221
+
1222
+ # If we have subject but no predicate
1223
+ if current_subject is not None and current_predicate is None:
1224
+ if isinstance(token, IRI):
1225
+ current_predicate = token
1226
+ i += 1
1227
+ continue
1228
+
1229
+ # If we have subject and predicate, next is object
1230
+ if current_subject is not None and current_predicate is not None:
1231
+ if isinstance(token, (IRI, Literal, BlankNode, QuotedTriplePattern)):
1232
+ triples.append(TriplePattern(
1233
+ subject=current_subject,
1234
+ predicate=current_predicate,
1235
+ object=token
1236
+ ))
1237
+ i += 1
1238
+
1239
+ # Check what comes next
1240
+ if i < len(token_list):
1241
+ next_token = token_list[i]
1242
+ if isinstance(next_token, str):
1243
+ if next_token == ',':
1244
+ # Same subject and predicate, new object
1245
+ i += 1
1246
+ continue
1247
+ elif next_token == ';':
1248
+ # Same subject, new predicate
1249
+ current_predicate = None
1250
+ i += 1
1251
+ continue
1252
+ elif next_token == '.':
1253
+ # End of this subject block
1254
+ current_subject = None
1255
+ current_predicate = None
1256
+ i += 1
1257
+ continue
1258
+ continue
1259
+
1260
+ i += 1
1261
+
1262
+ return triples
1263
+
1264
+ # Object list: <o1> , <o2> , <o3>
1265
+ turtle_object = ground_term
1266
+ turtle_object_list = turtle_object + ZeroOrMore(Lit(",") + turtle_object)
1267
+
1268
+ # Predicate-object: <p> <o1> , <o2> (use turtle_iri_or_a for path-like prefixed names)
1269
+ turtle_predicate = turtle_iri_or_a
1270
+ turtle_predicate_object = turtle_predicate + turtle_object_list
1271
+ turtle_predicate_object_list = turtle_predicate_object + ZeroOrMore(Lit(";") + Opt(turtle_predicate_object))
1272
+
1273
+ # Full triple block: <s> <p1> <o1> ; <p2> <o2> , <o3> .
1274
+ turtle_triple_block = ground_term + turtle_predicate_object_list + Opt(Lit("."))
1275
+
1276
+ # Multiple triple blocks
1277
+ turtle_triples = ZeroOrMore(turtle_triple_block)
1278
+ turtle_triples.set_parse_action(parse_turtle_triples)
1279
+
1280
+ # Ground triple for INSERT DATA (simple form - backward compatibility)
1281
+ def make_ground_triple(tokens):
1282
+ return TriplePattern(
1283
+ subject=tokens[0],
1284
+ predicate=tokens[1],
1285
+ object=tokens[2],
1286
+ )
1287
+
1288
+ ground_triple = (
1289
+ ground_term + ground_term + ground_term + Opt(DOT)
1290
+ ).set_parse_action(make_ground_triple)
1291
+
1292
+ # INSERT DATA { triples } - supports full Turtle syntax
1293
+ def make_insert_data_query(tokens):
1294
+ prefixes = {}
1295
+ triples = []
1296
+ graph = None
1297
+
1298
+ for token in tokens:
1299
+ if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
1300
+ prefixes[token[0]] = token[1]
1301
+ elif isinstance(token, TriplePattern):
1302
+ triples.append(token)
1303
+ elif isinstance(token, IRI):
1304
+ graph = token
1305
+ elif isinstance(token, list) or isinstance(token, pp.ParseResults):
1306
+ for item in token:
1307
+ if isinstance(item, TriplePattern):
1308
+ triples.append(item)
1309
+
1310
+ return InsertDataQuery(prefixes=prefixes, triples=triples, graph=graph)
1311
+
1312
+ # Use turtle_triples for full Turtle syntax support
1313
+ insert_data_body = LBRACE + turtle_triples + RBRACE
1314
+
1315
+ insert_data_query = (
1316
+ ZeroOrMore(prefix_decl) +
1317
+ Suppress(INSERT) +
1318
+ Suppress(DATA) +
1319
+ insert_data_body
1320
+ ).set_parse_action(make_insert_data_query)
1321
+
1322
+ # =================================================================
1323
+ # DELETE DATA Update
1324
+ # =================================================================
1325
+
1326
+ def make_delete_data_query(tokens):
1327
+ prefixes = {}
1328
+ triples = []
1329
+ graph = None
1330
+
1331
+ for token in tokens:
1332
+ if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
1333
+ prefixes[token[0]] = token[1]
1334
+ elif isinstance(token, TriplePattern):
1335
+ triples.append(token)
1336
+ elif isinstance(token, list) or isinstance(token, pp.ParseResults):
1337
+ for item in token:
1338
+ if isinstance(item, TriplePattern):
1339
+ triples.append(item)
1340
+
1341
+ return DeleteDataQuery(prefixes=prefixes, triples=triples, graph=graph)
1342
+
1343
+ delete_data_query = (
1344
+ ZeroOrMore(prefix_decl) +
1345
+ Suppress(DELETE) +
1346
+ Suppress(DATA) +
1347
+ insert_data_body
1348
+ ).set_parse_action(make_delete_data_query)
1349
+
1350
+ # =================================================================
1351
+ # DELETE WHERE Update
1352
+ # =================================================================
1353
+
1354
+ def make_delete_where_query(tokens):
1355
+ prefixes = {}
1356
+ where = WhereClause()
1357
+ graph = None
1358
+
1359
+ for token in tokens:
1360
+ if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
1361
+ prefixes[token[0]] = token[1]
1362
+ elif isinstance(token, WhereClause):
1363
+ where = token
1364
+
1365
+ return DeleteWhereQuery(prefixes=prefixes, where=where, graph=graph)
1366
+
1367
+ delete_where_query = (
1368
+ ZeroOrMore(prefix_decl) +
1369
+ Suppress(DELETE) +
1370
+ where_clause
1371
+ ).set_parse_action(make_delete_where_query)
1372
+
1373
+ # =================================================================
1374
+ # DELETE/INSERT WHERE (Modify) Update
1375
+ # =================================================================
1376
+
1377
+ # Template for DELETE/INSERT patterns (can contain variables)
1378
+ template_triple = (
1379
+ term + term + term + Opt(DOT)
1380
+ ).set_parse_action(make_ground_triple)
1381
+
1382
+ # DELETE { patterns } clause (patterns in braces, not WHERE keyword)
1383
+ delete_template = LBRACE + ZeroOrMore(template_triple) + RBRACE
1384
+
1385
+ # INSERT { patterns } clause
1386
+ insert_template = LBRACE + ZeroOrMore(template_triple) + RBRACE
1387
+
1388
+ def make_modify_query(tokens):
1389
+ prefixes = {}
1390
+ delete_patterns = []
1391
+ insert_patterns = []
1392
+ where = WhereClause()
1393
+
1394
+ # Track which section we're in
1395
+ # Tokens will be structured as: [prefixes...], [delete_patterns...], [insert_patterns...], WhereClause
1396
+ section = "prefixes"
1397
+
1398
+ for token in tokens:
1399
+ if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
1400
+ prefixes[token[0]] = token[1]
1401
+ elif token == "DELETE_SECTION":
1402
+ section = "delete"
1403
+ elif token == "INSERT_SECTION":
1404
+ section = "insert"
1405
+ elif isinstance(token, WhereClause):
1406
+ where = token
1407
+ elif isinstance(token, TriplePattern):
1408
+ if section == "delete":
1409
+ delete_patterns.append(token)
1410
+ elif section == "insert":
1411
+ insert_patterns.append(token)
1412
+ elif isinstance(token, pp.ParseResults):
1413
+ for item in token:
1414
+ if isinstance(item, TriplePattern):
1415
+ if section == "delete":
1416
+ delete_patterns.append(item)
1417
+ elif section == "insert":
1418
+ insert_patterns.append(item)
1419
+
1420
+ return ModifyQuery(
1421
+ prefixes=prefixes,
1422
+ delete_patterns=delete_patterns,
1423
+ insert_patterns=insert_patterns,
1424
+ where=where
1425
+ )
1426
+
1427
+ # DELETE { } INSERT { } WHERE { } - full modify query
1428
+ # We need markers to distinguish delete vs insert patterns
1429
+ delete_section = (
1430
+ Suppress(DELETE) +
1431
+ pp.Literal("{").suppress().set_parse_action(lambda: "DELETE_SECTION") +
1432
+ ZeroOrMore(template_triple) +
1433
+ Suppress(RBRACE)
1434
+ )
1435
+
1436
+ insert_section = (
1437
+ Suppress(INSERT) +
1438
+ pp.Literal("{").suppress().set_parse_action(lambda: "INSERT_SECTION") +
1439
+ ZeroOrMore(template_triple) +
1440
+ Suppress(RBRACE)
1441
+ )
1442
+
1443
+ # Modify query with both DELETE and INSERT (or just one)
1444
+ # Must have at least one of DELETE or INSERT followed by WHERE
1445
+ modify_query = (
1446
+ ZeroOrMore(prefix_decl) +
1447
+ Opt(delete_section) +
1448
+ Opt(insert_section) +
1449
+ where_clause
1450
+ ).set_parse_action(make_modify_query)
1451
+
1452
+ # =================================================================
1453
+ # DESCRIBE Query
1454
+ # =================================================================
1455
+
1456
+ describe_resource = iri | variable
1457
+
1458
+ def make_describe_query(tokens):
1459
+ prefixes = {}
1460
+ resources = []
1461
+ where = None
1462
+
1463
+ for token in tokens:
1464
+ if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
1465
+ prefixes[token[0]] = token[1]
1466
+ elif isinstance(token, (IRI, Variable)):
1467
+ resources.append(token)
1468
+ elif isinstance(token, WhereClause):
1469
+ where = token
1470
+
1471
+ return DescribeQuery(prefixes=prefixes, resources=resources, where=where)
1472
+
1473
+ describe_query = (
1474
+ ZeroOrMore(prefix_decl) +
1475
+ Suppress(DESCRIBE) +
1476
+ OneOrMore(describe_resource) +
1477
+ Opt(where_clause)
1478
+ ).set_parse_action(make_describe_query)
1479
+
1480
+ # =================================================================
1481
+ # CONSTRUCT Query
1482
+ # =================================================================
1483
+
1484
+ construct_template = LBRACE + ZeroOrMore(triple_pattern) + RBRACE
1485
+
1486
+ def make_construct_query(tokens):
1487
+ prefixes = {}
1488
+ template = []
1489
+ where = WhereClause()
1490
+
1491
+ for token in tokens:
1492
+ if isinstance(token, tuple) and len(token) == 2 and isinstance(token[0], str):
1493
+ prefixes[token[0]] = token[1]
1494
+ elif isinstance(token, TriplePattern):
1495
+ template.append(token)
1496
+ elif isinstance(token, WhereClause):
1497
+ where = token
1498
+
1499
+ return ConstructQuery(prefixes=prefixes, template=template, where=where)
1500
+
1501
+ construct_query = (
1502
+ ZeroOrMore(prefix_decl) +
1503
+ Suppress(CONSTRUCT) +
1504
+ construct_template +
1505
+ where_clause
1506
+ ).set_parse_action(make_construct_query)
1507
+
1508
+ # =================================================================
1509
+ # Graph Management Queries
1510
+ # =================================================================
1511
+
1512
+ # CREATE [SILENT] GRAPH <uri>
1513
+ def make_create_graph(tokens):
1514
+ silent = False
1515
+ graph_uri = None
1516
+ for token in tokens:
1517
+ if token == "SILENT":
1518
+ silent = True
1519
+ elif isinstance(token, IRI):
1520
+ graph_uri = token
1521
+ return CreateGraphQuery(prefixes={}, graph_uri=graph_uri, silent=silent)
1522
+
1523
+ create_graph_query = (
1524
+ Suppress(CREATE) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
1525
+ Suppress(GRAPH) + iri
1526
+ ).set_parse_action(make_create_graph)
1527
+
1528
+ # DROP [SILENT] (GRAPH <uri> | DEFAULT | NAMED | ALL)
1529
+ def make_drop_graph(tokens):
1530
+ silent = False
1531
+ graph_uri = None
1532
+ target = "graph"
1533
+ for token in tokens:
1534
+ if token == "SILENT":
1535
+ silent = True
1536
+ elif token == "DEFAULT":
1537
+ target = "default"
1538
+ elif token == "NAMED":
1539
+ target = "named"
1540
+ elif token == "ALL":
1541
+ target = "all"
1542
+ elif isinstance(token, IRI):
1543
+ graph_uri = token
1544
+ return DropGraphQuery(prefixes={}, graph_uri=graph_uri, target=target, silent=silent)
1545
+
1546
+ drop_target = (
1547
+ (Suppress(GRAPH) + iri) |
1548
+ DEFAULT.set_parse_action(lambda: "DEFAULT") |
1549
+ NAMED.set_parse_action(lambda: "NAMED") |
1550
+ ALL.set_parse_action(lambda: "ALL")
1551
+ )
1552
+ drop_graph_query = (
1553
+ Suppress(DROP) + Opt(SILENT.set_parse_action(lambda: "SILENT")) + drop_target
1554
+ ).set_parse_action(make_drop_graph)
1555
+
1556
+ # CLEAR [SILENT] (GRAPH <uri> | DEFAULT | NAMED | ALL)
1557
+ def make_clear_graph(tokens):
1558
+ silent = False
1559
+ graph_uri = None
1560
+ target = "graph"
1561
+ for token in tokens:
1562
+ if token == "SILENT":
1563
+ silent = True
1564
+ elif token == "DEFAULT":
1565
+ target = "default"
1566
+ elif token == "NAMED":
1567
+ target = "named"
1568
+ elif token == "ALL":
1569
+ target = "all"
1570
+ elif isinstance(token, IRI):
1571
+ graph_uri = token
1572
+ return ClearGraphQuery(prefixes={}, graph_uri=graph_uri, target=target, silent=silent)
1573
+
1574
+ clear_graph_query = (
1575
+ Suppress(CLEAR) + Opt(SILENT.set_parse_action(lambda: "SILENT")) + drop_target
1576
+ ).set_parse_action(make_clear_graph)
1577
+
1578
+ # LOAD [SILENT] <source> [INTO GRAPH <dest>]
1579
+ def make_load(tokens):
1580
+ silent = False
1581
+ source_uri = None
1582
+ graph_uri = None
1583
+ for token in tokens:
1584
+ if token == "SILENT":
1585
+ silent = True
1586
+ elif isinstance(token, IRI):
1587
+ if source_uri is None:
1588
+ source_uri = token
1589
+ else:
1590
+ graph_uri = token
1591
+ return LoadQuery(prefixes={}, source_uri=source_uri, graph_uri=graph_uri, silent=silent)
1592
+
1593
+ load_query = (
1594
+ Suppress(LOAD) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
1595
+ iri + Opt(Suppress(INTO) + Suppress(GRAPH) + iri)
1596
+ ).set_parse_action(make_load)
1597
+
1598
+ # COPY/MOVE/ADD [SILENT] (DEFAULT | GRAPH <uri>) TO (DEFAULT | GRAPH <uri>)
1599
+ def make_graph_transfer(operation):
1600
+ def action(tokens):
1601
+ silent = False
1602
+ source_graph = None
1603
+ dest_graph = None
1604
+ source_is_default = False
1605
+
1606
+ token_list = list(tokens)
1607
+ i = 0
1608
+ while i < len(token_list):
1609
+ token = token_list[i]
1610
+ if token == "SILENT":
1611
+ silent = True
1612
+ elif token == "DEFAULT":
1613
+ if source_graph is None and not source_is_default:
1614
+ source_is_default = True
1615
+ # dest_graph would be set via IRI
1616
+ elif isinstance(token, IRI):
1617
+ if source_graph is None and not source_is_default:
1618
+ source_graph = token
1619
+ else:
1620
+ dest_graph = token
1621
+ i += 1
1622
+
1623
+ if operation == "COPY":
1624
+ return CopyGraphQuery(
1625
+ prefixes={}, source_graph=source_graph, dest_graph=dest_graph,
1626
+ silent=silent, source_is_default=source_is_default
1627
+ )
1628
+ elif operation == "MOVE":
1629
+ return MoveGraphQuery(
1630
+ prefixes={}, source_graph=source_graph, dest_graph=dest_graph,
1631
+ silent=silent, source_is_default=source_is_default
1632
+ )
1633
+ else: # ADD
1634
+ return AddGraphQuery(
1635
+ prefixes={}, source_graph=source_graph, dest_graph=dest_graph,
1636
+ silent=silent, source_is_default=source_is_default
1637
+ )
1638
+ return action
1639
+
1640
+ graph_ref = (
1641
+ DEFAULT.set_parse_action(lambda: "DEFAULT") |
1642
+ (Suppress(GRAPH) + iri)
1643
+ )
1644
+
1645
+ copy_query = (
1646
+ Suppress(COPY) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
1647
+ graph_ref + Suppress(TO) + graph_ref
1648
+ ).set_parse_action(make_graph_transfer("COPY"))
1649
+
1650
+ move_query = (
1651
+ Suppress(MOVE) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
1652
+ graph_ref + Suppress(TO) + graph_ref
1653
+ ).set_parse_action(make_graph_transfer("MOVE"))
1654
+
1655
+ add_query = (
1656
+ Suppress(ADD) + Opt(SILENT.set_parse_action(lambda: "SILENT")) +
1657
+ graph_ref + Suppress(TO) + graph_ref
1658
+ ).set_parse_action(make_graph_transfer("ADD"))
1659
+
1660
+ # =================================================================
1661
+ # Top-level Query
1662
+ # =================================================================
1663
+
1664
+ # Note: Order matters - more specific patterns must come first
1665
+ # modify_query must come before delete_where_query because:
1666
+ # DELETE { ... } WHERE { ... } should match modify_query
1667
+ # DELETE WHERE { ... } should match delete_where_query
1668
+ # delete_data_query must come before delete_where_query (DATA keyword distinguishes)
1669
+ self.query = (
1670
+ select_query | ask_query | describe_query | construct_query |
1671
+ insert_data_query | delete_data_query | modify_query | delete_where_query |
1672
+ create_graph_query | drop_graph_query | clear_graph_query |
1673
+ load_query | copy_query | move_query | add_query
1674
+ )
1675
+
1676
+ # Ignore comments
1677
+ self.query.ignore(pp.pythonStyleComment)
1678
+ self.query.ignore(Lit("#") + pp.restOfLine)
1679
+
1680
+ def parse(self, query_string: str) -> Query:
1681
+ """
1682
+ Parse a SPARQL-Star query string into an AST.
1683
+
1684
+ Args:
1685
+ query_string: The SPARQL-Star query to parse
1686
+
1687
+ Returns:
1688
+ Parsed Query AST
1689
+
1690
+ Raises:
1691
+ ParseException: If the query is malformed
1692
+ """
1693
+ result = self.query.parse_string(query_string, parse_all=True)
1694
+ return result[0]
1695
+
1696
+
1697
+ # Module-level parser instance for convenience
1698
+ _parser: Optional[SPARQLStarParser] = None
1699
+
1700
+
1701
+ def parse_query(query_string: str) -> Query:
1702
+ """
1703
+ Parse a SPARQL-Star query string.
1704
+
1705
+ This is a convenience function that uses a cached parser instance.
1706
+
1707
+ Args:
1708
+ query_string: The SPARQL-Star query to parse
1709
+
1710
+ Returns:
1711
+ Parsed Query AST
1712
+ """
1713
+ global _parser
1714
+ if _parser is None:
1715
+ _parser = SPARQLStarParser()
1716
+ return _parser.parse(query_string)