rdf-starbase 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1925 @@
1
+
2
+ """
3
+ SPARQL-Star Query Executor using Polars.
4
+
5
+ Translates SPARQL-Star AST to Polars operations for blazingly fast execution.
6
+
7
+ Includes internal optimizations for provenance queries that map standard
8
+ SPARQL-Star patterns like << ?s ?p ?o >> prov:value ?conf to efficient
9
+ columnar access.
10
+
11
+ Supported provenance vocabularies:
12
+ - PROV-O: W3C Provenance Ontology (prov:wasAttributedTo, prov:value, etc.)
13
+ - DQV: Data Quality Vocabulary (dqv:hasQualityMeasurement)
14
+ - PAV: Provenance, Authoring and Versioning (pav:createdBy, pav:authoredBy)
15
+ - DCAT: Data Catalog Vocabulary (dcat:accessURL, etc.)
16
+
17
+ When inserting RDF-Star annotations like:
18
+ << ex:s ex:p ex:o >> prov:wasAttributedTo "IMDb" .
19
+ << ex:s ex:p ex:o >> prov:value 0.95 .
20
+
21
+ The executor recognizes these predicates and maps them to internal assertion
22
+ metadata (source, confidence) rather than creating separate triples.
23
+ """
24
+
25
+ from typing import Any, Optional, Union, TYPE_CHECKING
26
+ from datetime import datetime
27
+
28
+ import polars as pl
29
+
30
+ from rdf_starbase.sparql.ast import (
31
+ Query, SelectQuery, AskQuery, InsertDataQuery, DeleteDataQuery,
32
+ DeleteWhereQuery, ModifyQuery,
33
+ DescribeQuery, ConstructQuery,
34
+ CreateGraphQuery, DropGraphQuery, ClearGraphQuery,
35
+ LoadQuery, CopyGraphQuery, MoveGraphQuery, AddGraphQuery,
36
+ TriplePattern, QuotedTriplePattern,
37
+ OptionalPattern, UnionPattern, GraphPattern,
38
+ Variable, IRI, Literal, BlankNode,
39
+ Filter, Comparison, LogicalExpression, FunctionCall,
40
+ AggregateExpression, Bind, ValuesClause,
41
+ ComparisonOp, LogicalOp,
42
+ WhereClause,
43
+ Term,
44
+ )
45
+ from rdf_starbase.models import ProvenanceContext
46
+
47
+ if TYPE_CHECKING:
48
+ from rdf_starbase.store import TripleStore
49
+
50
+
51
+ # =============================================================================
52
+ # Provenance Predicate Mappings
53
+ # =============================================================================
54
+ # These predicates, when used in RDF-Star annotations, are recognized and
55
+ # mapped to internal assertion metadata fields rather than stored as
56
+ # separate triples.
57
+
58
+ # Maps predicate IRIs to internal field names
59
+ PROVENANCE_SOURCE_PREDICATES = {
60
+ # PROV-O - W3C Provenance Ontology
61
+ "http://www.w3.org/ns/prov#wasAttributedTo",
62
+ "http://www.w3.org/ns/prov#wasDerivedFrom",
63
+ "http://www.w3.org/ns/prov#wasGeneratedBy",
64
+ "http://www.w3.org/ns/prov#hadPrimarySource",
65
+ # PAV - Provenance, Authoring and Versioning
66
+ "http://purl.org/pav/createdBy",
67
+ "http://purl.org/pav/authoredBy",
68
+ "http://purl.org/pav/importedFrom",
69
+ "http://purl.org/pav/retrievedFrom",
70
+ "http://purl.org/pav/sourceAccessedAt",
71
+ # Dublin Core
72
+ "http://purl.org/dc/terms/source",
73
+ "http://purl.org/dc/elements/1.1/source",
74
+ # Schema.org
75
+ "http://schema.org/isBasedOn",
76
+ "http://schema.org/citation",
77
+ # Custom RDF-StarBase
78
+ "http://rdf-starbase.io/source",
79
+ "source", # Short form
80
+ }
81
+
82
+ PROVENANCE_CONFIDENCE_PREDICATES = {
83
+ # PROV-O
84
+ "http://www.w3.org/ns/prov#value",
85
+ # DQV - Data Quality Vocabulary
86
+ "http://www.w3.org/ns/dqv#hasQualityMeasurement",
87
+ "http://www.w3.org/ns/dqv#value",
88
+ # Schema.org
89
+ "http://schema.org/ratingValue",
90
+ # Custom RDF-StarBase
91
+ "http://rdf-starbase.io/confidence",
92
+ "confidence", # Short form
93
+ }
94
+
95
+ PROVENANCE_TIMESTAMP_PREDICATES = {
96
+ # PROV-O
97
+ "http://www.w3.org/ns/prov#generatedAtTime",
98
+ "http://www.w3.org/ns/prov#invalidatedAtTime",
99
+ # PAV
100
+ "http://purl.org/pav/createdOn",
101
+ "http://purl.org/pav/authoredOn",
102
+ "http://purl.org/pav/lastRefreshedOn",
103
+ # Dublin Core
104
+ "http://purl.org/dc/terms/created",
105
+ "http://purl.org/dc/terms/modified",
106
+ # Custom
107
+ "http://rdf-starbase.io/timestamp",
108
+ "timestamp",
109
+ }
110
+
111
+ # Legacy map for query optimization (reading provenance)
112
+ PROV_PREDICATE_MAP = {
113
+ "http://www.w3.org/ns/prov#value": "confidence",
114
+ "http://www.w3.org/ns/prov#wasDerivedFrom": "source",
115
+ "http://www.w3.org/ns/prov#generatedAtTime": "timestamp",
116
+ "http://www.w3.org/ns/prov#wasGeneratedBy": "process",
117
+ "prov:value": "confidence",
118
+ "prov:wasDerivedFrom": "source",
119
+ "prov:generatedAtTime": "timestamp",
120
+ "prov:wasGeneratedBy": "process",
121
+ }
122
+
123
+
124
+ class SPARQLExecutor:
125
+ """
126
+ Executes SPARQL-Star queries against a TripleStore.
127
+
128
+ Translation strategy:
129
+ - Each TriplePattern becomes a filtered view of the DataFrame
130
+ - Variables become column selections
131
+ - Joins are performed for patterns sharing variables
132
+ - Filters become Polars filter expressions
133
+ - Uses lazy evaluation for query optimization
134
+ """
135
+
136
+ def __init__(self, store: "TripleStore"):
137
+ """
138
+ Initialize executor with a triple store.
139
+
140
+ Args:
141
+ store: The TripleStore to query
142
+ """
143
+ self.store = store
144
+ self._var_counter = 0
145
+
146
+ def execute(
147
+ self,
148
+ query: Query,
149
+ provenance: Optional[ProvenanceContext] = None
150
+ ) -> Union[pl.DataFrame, bool, dict]:
151
+ """
152
+ Execute a SPARQL-Star query.
153
+
154
+ Args:
155
+ query: Parsed Query AST
156
+ provenance: Optional provenance context for INSERT/DELETE operations
157
+
158
+ Returns:
159
+ DataFrame for SELECT queries, bool for ASK queries,
160
+ dict with count for INSERT/DELETE operations
161
+ """
162
+ if isinstance(query, SelectQuery):
163
+ return self._execute_select(query)
164
+ elif isinstance(query, AskQuery):
165
+ return self._execute_ask(query)
166
+ elif isinstance(query, DescribeQuery):
167
+ return self._execute_describe(query)
168
+ elif isinstance(query, ConstructQuery):
169
+ return self._execute_construct(query)
170
+ elif isinstance(query, InsertDataQuery):
171
+ return self._execute_insert_data(query, provenance)
172
+ elif isinstance(query, DeleteDataQuery):
173
+ return self._execute_delete_data(query)
174
+ elif isinstance(query, DeleteWhereQuery):
175
+ return self._execute_delete_where(query)
176
+ elif isinstance(query, ModifyQuery):
177
+ return self._execute_modify(query, provenance)
178
+ elif isinstance(query, CreateGraphQuery):
179
+ return self._execute_create_graph(query)
180
+ elif isinstance(query, DropGraphQuery):
181
+ return self._execute_drop_graph(query)
182
+ elif isinstance(query, ClearGraphQuery):
183
+ return self._execute_clear_graph(query)
184
+ elif isinstance(query, LoadQuery):
185
+ return self._execute_load(query, provenance)
186
+ elif isinstance(query, CopyGraphQuery):
187
+ return self._execute_copy_graph(query)
188
+ elif isinstance(query, MoveGraphQuery):
189
+ return self._execute_move_graph(query)
190
+ elif isinstance(query, AddGraphQuery):
191
+ return self._execute_add_graph(query)
192
+ else:
193
+ raise NotImplementedError(f"Query type {type(query)} not yet supported")
194
+
195
+ def _execute_select(self, query: SelectQuery) -> pl.DataFrame:
196
+ """Execute a SELECT query."""
197
+ # Handle FROM clause - restrict to specified graphs
198
+ from_graphs = None
199
+ if query.from_graphs:
200
+ # Merge all FROM graphs into default graph behavior
201
+ from_graphs = [g.value for g in query.from_graphs]
202
+
203
+ # Start with lazy frame for optimization
204
+ df = self._execute_where(
205
+ query.where,
206
+ query.prefixes,
207
+ as_of=query.as_of,
208
+ from_graphs=from_graphs
209
+ )
210
+
211
+ # Bind provenance variables if requested (source, confidence, timestamp, process)
212
+ # These are special variable names that map to assertion metadata
213
+ provenance_var_mapping = {
214
+ "source": "source",
215
+ "confidence": "confidence",
216
+ "timestamp": "timestamp",
217
+ "process": "process",
218
+ }
219
+
220
+ for var in query.variables:
221
+ if isinstance(var, Variable) and var.name in provenance_var_mapping:
222
+ prov_col = provenance_var_mapping[var.name]
223
+ # Find the first pattern's provenance column
224
+ for col in df.columns:
225
+ if col.startswith("_prov_") and col.endswith(f"_{prov_col}"):
226
+ df = df.with_columns(pl.col(col).alias(var.name))
227
+ break
228
+
229
+ # Determine columns to select before DISTINCT (DISTINCT should only apply to output columns)
230
+ select_cols = None
231
+ if not query.is_select_all():
232
+ select_cols = []
233
+ for v in query.variables:
234
+ if isinstance(v, Variable) and v.name in df.columns:
235
+ select_cols.append(v.name)
236
+ elif isinstance(v, AggregateExpression) and v.alias and v.alias.name in df.columns:
237
+ select_cols.append(v.alias.name)
238
+
239
+ # Handle GROUP BY and aggregates
240
+ if query.group_by or query.has_aggregates():
241
+ df = self._apply_group_by_aggregates(df, query)
242
+ else:
243
+ # Apply DISTINCT if requested (non-aggregate)
244
+ # Must apply DISTINCT only on the projected columns, not internal _prov_* columns
245
+ if query.distinct:
246
+ if select_cols:
247
+ df = df.unique(subset=select_cols)
248
+ else:
249
+ # SELECT * - apply unique to all non-internal columns
250
+ non_internal = [c for c in df.columns if not c.startswith("_prov_")]
251
+ df = df.unique(subset=non_internal if non_internal else None)
252
+
253
+ # Apply HAVING (filter after grouping)
254
+ if query.having:
255
+ df = self._apply_filter(df, Filter(expression=query.having))
256
+
257
+ # Apply ORDER BY
258
+ if query.order_by:
259
+ order_cols = []
260
+ descending = []
261
+ for var, asc in query.order_by:
262
+ if var.name in df.columns:
263
+ order_cols.append(var.name)
264
+ descending.append(not asc)
265
+ if order_cols:
266
+ df = df.sort(order_cols, descending=descending)
267
+
268
+ # Apply LIMIT and OFFSET
269
+ if query.offset:
270
+ df = df.slice(query.offset, query.limit or len(df))
271
+ elif query.limit:
272
+ df = df.head(query.limit)
273
+
274
+ # Select only requested variables (or all if SELECT *)
275
+ if not query.is_select_all():
276
+ select_cols = []
277
+ for v in query.variables:
278
+ if isinstance(v, Variable) and v.name in df.columns:
279
+ select_cols.append(v.name)
280
+ elif isinstance(v, AggregateExpression) and v.alias and v.alias.name in df.columns:
281
+ select_cols.append(v.alias.name)
282
+ if select_cols:
283
+ df = df.select(select_cols)
284
+
285
+ return df
286
+
287
+ def _apply_group_by_aggregates(
288
+ self,
289
+ df: pl.DataFrame,
290
+ query: SelectQuery
291
+ ) -> pl.DataFrame:
292
+ """
293
+ Apply GROUP BY and aggregate functions to a DataFrame.
294
+
295
+ Supports: COUNT, SUM, AVG, MIN, MAX, GROUP_CONCAT, SAMPLE
296
+ """
297
+ if len(df) == 0:
298
+ return df
299
+
300
+ # Build aggregation expressions
301
+ agg_exprs = []
302
+
303
+ for var in query.variables:
304
+ if isinstance(var, AggregateExpression):
305
+ agg_expr = self._build_aggregate_expr(var)
306
+ if agg_expr is not None:
307
+ agg_exprs.append(agg_expr)
308
+
309
+ # If we have GROUP BY, use it; otherwise aggregate entire result
310
+ if query.group_by:
311
+ group_cols = [v.name for v in query.group_by if v.name in df.columns]
312
+ if group_cols and agg_exprs:
313
+ df = df.group_by(group_cols).agg(agg_exprs)
314
+ elif group_cols:
315
+ # GROUP BY without aggregates - just unique combinations
316
+ df = df.select(group_cols).unique()
317
+ elif agg_exprs:
318
+ # Aggregates without GROUP BY - aggregate entire result
319
+ df = df.select(agg_exprs)
320
+
321
+ return df
322
+
323
+ def _build_aggregate_expr(self, agg: AggregateExpression) -> Optional[pl.Expr]:
324
+ """Build a Polars aggregation expression from an AggregateExpression AST."""
325
+ # Get the column to aggregate
326
+ if agg.argument is None:
327
+ # COUNT(*) - count all rows
328
+ col_name = None
329
+ elif isinstance(agg.argument, Variable):
330
+ col_name = agg.argument.name
331
+ else:
332
+ return None
333
+
334
+ # Determine alias
335
+ alias = agg.alias.name if agg.alias else f"{agg.function.lower()}"
336
+
337
+ # Build the aggregation
338
+ if agg.function == "COUNT":
339
+ if col_name is None:
340
+ expr = pl.len().alias(alias)
341
+ elif agg.distinct:
342
+ expr = pl.col(col_name).n_unique().alias(alias)
343
+ else:
344
+ expr = pl.col(col_name).count().alias(alias)
345
+ elif agg.function == "SUM":
346
+ if col_name:
347
+ expr = pl.col(col_name).cast(pl.Float64).sum().alias(alias)
348
+ else:
349
+ return None
350
+ elif agg.function == "AVG":
351
+ if col_name:
352
+ expr = pl.col(col_name).cast(pl.Float64).mean().alias(alias)
353
+ else:
354
+ return None
355
+ elif agg.function == "MIN":
356
+ if col_name:
357
+ expr = pl.col(col_name).min().alias(alias)
358
+ else:
359
+ return None
360
+ elif agg.function == "MAX":
361
+ if col_name:
362
+ expr = pl.col(col_name).max().alias(alias)
363
+ else:
364
+ return None
365
+ elif agg.function == "GROUP_CONCAT":
366
+ if col_name:
367
+ sep = agg.separator or " "
368
+ expr = pl.col(col_name).cast(pl.Utf8).str.concat(sep).alias(alias)
369
+ else:
370
+ return None
371
+ elif agg.function == "SAMPLE":
372
+ if col_name:
373
+ expr = pl.col(col_name).first().alias(alias)
374
+ else:
375
+ return None
376
+ else:
377
+ return None
378
+
379
+ return expr
380
+
381
+ def _execute_ask(self, query: AskQuery) -> bool:
382
+ """Execute an ASK query."""
383
+ df = self._execute_where(query.where, query.prefixes, as_of=query.as_of)
384
+ return len(df) > 0
385
+
386
+ def _execute_describe(self, query: DescribeQuery) -> pl.DataFrame:
387
+ """
388
+ Execute a DESCRIBE query.
389
+
390
+ Returns all triples where the resource appears as subject or object.
391
+ """
392
+ prefixes = query.prefixes
393
+
394
+ # Get resource URIs to describe
395
+ if query.where:
396
+ # Execute WHERE clause to get bindings
397
+ bindings = self._execute_where(query.where, prefixes, as_of=query.as_of)
398
+ resources = set()
399
+ for resource in query.resources:
400
+ if isinstance(resource, Variable) and resource.name in bindings.columns:
401
+ resources.update(bindings[resource.name].unique().to_list())
402
+ elif isinstance(resource, IRI):
403
+ resources.add(self._expand_iri(resource.value, prefixes))
404
+ else:
405
+ resources = {
406
+ self._expand_iri(r.value, prefixes) if isinstance(r, IRI) else str(r)
407
+ for r in query.resources
408
+ }
409
+
410
+ # Get all triples where resource is subject or object
411
+ df = self.store._df
412
+
413
+ # Apply time-travel filter if specified
414
+ if query.as_of:
415
+ df = df.filter(pl.col("timestamp") <= query.as_of)
416
+
417
+ if len(df) == 0:
418
+ return df
419
+
420
+ resource_list = list(resources)
421
+ result = df.filter(
422
+ pl.col("subject").is_in(resource_list) |
423
+ pl.col("object").is_in(resource_list)
424
+ )
425
+
426
+ return result
427
+
428
+ def _execute_construct(self, query: ConstructQuery) -> pl.DataFrame:
429
+ """
430
+ Execute a CONSTRUCT query.
431
+
432
+ Returns triples constructed from the template using WHERE bindings.
433
+ """
434
+ prefixes = query.prefixes
435
+ bindings = self._execute_where(query.where, prefixes, as_of=query.as_of)
436
+
437
+ if len(bindings) == 0:
438
+ return pl.DataFrame({"subject": [], "predicate": [], "object": []})
439
+
440
+ # Build result triples from template
441
+ result_triples = []
442
+
443
+ for row in bindings.iter_rows(named=True):
444
+ for pattern in query.template:
445
+ # Substitute variables with bound values
446
+ subject = self._substitute_term(pattern.subject, row, prefixes)
447
+ predicate = self._substitute_term(pattern.predicate, row, prefixes)
448
+ obj = self._substitute_term(pattern.object, row, prefixes)
449
+
450
+ if subject is not None and predicate is not None and obj is not None:
451
+ result_triples.append({
452
+ "subject": subject,
453
+ "predicate": predicate,
454
+ "object": obj,
455
+ })
456
+
457
+ return pl.DataFrame(result_triples) if result_triples else pl.DataFrame({"subject": [], "predicate": [], "object": []})
458
+
459
+ def _substitute_term(self, term: Term, row: dict, prefixes: dict) -> Optional[str]:
460
+ """Substitute a term with a value from bindings."""
461
+ if isinstance(term, Variable):
462
+ return row.get(term.name)
463
+ elif isinstance(term, IRI):
464
+ return self._expand_iri(term.value, prefixes)
465
+ elif isinstance(term, Literal):
466
+ return term.value
467
+ elif isinstance(term, BlankNode):
468
+ return f"_:{term.label}"
469
+ return str(term)
470
+
471
+ def _expand_iri(self, iri: str, prefixes: dict) -> str:
472
+ """Expand a prefixed IRI using prefix declarations."""
473
+ if ":" in iri and not iri.startswith("http"):
474
+ parts = iri.split(":", 1)
475
+ if len(parts) == 2 and parts[0] in prefixes:
476
+ return prefixes[parts[0]] + parts[1]
477
+ return iri
478
+
479
+ def _try_optimize_provenance_pattern(
480
+ self,
481
+ pattern: TriplePattern,
482
+ prefixes: dict[str, str],
483
+ pattern_idx: int
484
+ ) -> Optional[tuple[str, str, QuotedTriplePattern, Optional[str]]]:
485
+ """
486
+ Try to optimize a provenance pattern to direct column access.
487
+
488
+ Detects patterns like:
489
+ << ?s ?p ?o >> prov:value ?conf (specific predicate)
490
+ << ?s ?p ?o >> ?mp ?mo (variable predicate - get ALL)
491
+
492
+ And maps them to the corresponding columnar provenance data
493
+ (confidence, source, timestamp, process).
494
+
495
+ Returns:
496
+ Tuple of (object_var_name, column_name_or_"*", inner_pattern, predicate_var_name)
497
+ - column_name is "*" when predicate is a variable (return all provenance)
498
+ - predicate_var_name is set when predicate is a variable
499
+ None if not a provenance pattern.
500
+ """
501
+ # Must be a triple pattern with a quoted triple as subject
502
+ if not isinstance(pattern.subject, QuotedTriplePattern):
503
+ return None
504
+
505
+ # Object must be a variable to bind the provenance value
506
+ if not isinstance(pattern.object, Variable):
507
+ return None
508
+
509
+ # Check if predicate is a variable - if so, return ALL provenance
510
+ if isinstance(pattern.predicate, Variable):
511
+ return (pattern.object.name, "*", pattern.subject, pattern.predicate.name)
512
+
513
+ # Predicate must be a known provenance predicate IRI
514
+ if not isinstance(pattern.predicate, IRI):
515
+ return None
516
+
517
+ pred_iri = self._expand_iri(pattern.predicate.value, prefixes)
518
+
519
+ # Check if it's a provenance predicate we can optimize
520
+ column_name = PROV_PREDICATE_MAP.get(pred_iri)
521
+ if not column_name:
522
+ # Also check without expansion
523
+ column_name = PROV_PREDICATE_MAP.get(pattern.predicate.value)
524
+
525
+ if not column_name:
526
+ return None
527
+
528
+ return (pattern.object.name, column_name, pattern.subject, None)
529
+
530
+ def _execute_where(
531
+ self,
532
+ where: WhereClause,
533
+ prefixes: dict[str, str],
534
+ as_of: Optional[datetime] = None,
535
+ from_graphs: Optional[list[str]] = None,
536
+ ) -> pl.DataFrame:
537
+ """
538
+ Execute a WHERE clause and return matching bindings.
539
+
540
+ Args:
541
+ where: The WHERE clause to execute
542
+ prefixes: Prefix mappings
543
+ as_of: Optional timestamp for time-travel queries
544
+ from_graphs: Optional list of graph URIs to restrict query to
545
+
546
+ Includes internal optimization for provenance patterns:
547
+ When detecting patterns like << ?s ?p ?o >> prov:value ?conf,
548
+ we map directly to the confidence column instead of doing a join.
549
+ Also handles << ?s ?p ?o >> ?mp ?mo to return ALL provenance.
550
+ """
551
+ # Handle case where UNION is the only pattern
552
+ if not where.patterns and not where.union_patterns and not where.graph_patterns:
553
+ return pl.DataFrame()
554
+
555
+ # Separate regular patterns from optimizable provenance patterns
556
+ # For provenance patterns, we execute the inner pattern and bind provenance columns
557
+ patterns_to_execute = [] # List of (idx, pattern, prov_bindings)
558
+
559
+ for i, pattern in enumerate(where.patterns):
560
+ opt_result = self._try_optimize_provenance_pattern(pattern, prefixes, i)
561
+ if opt_result:
562
+ # This is a provenance pattern - execute inner pattern and bind column
563
+ obj_var_name, col_name, inner_pattern, pred_var_name = opt_result
564
+ # Create a TriplePattern from the inner QuotedTriplePattern
565
+ inner_triple = TriplePattern(
566
+ subject=inner_pattern.subject,
567
+ predicate=inner_pattern.predicate,
568
+ object=inner_pattern.object
569
+ )
570
+ patterns_to_execute.append((i, inner_triple, (obj_var_name, col_name, pred_var_name)))
571
+ else:
572
+ patterns_to_execute.append((i, pattern, None))
573
+
574
+ # Execute patterns and join results
575
+ result_df: Optional[pl.DataFrame] = None
576
+
577
+ for i, pattern, prov_binding in patterns_to_execute:
578
+ pattern_df = self._execute_pattern(pattern, prefixes, i, as_of=as_of, from_graphs=from_graphs)
579
+
580
+ # If this pattern has a provenance binding, add it as a column alias
581
+ if prov_binding:
582
+ obj_var_name, col_name, pred_var_name = prov_binding
583
+
584
+ if col_name == "*":
585
+ # Variable predicate - unpivot ALL provenance columns into rows
586
+ # Map column names to their prov predicates
587
+ prov_col_to_pred = {
588
+ "source": "<http://www.w3.org/ns/prov#wasDerivedFrom>",
589
+ "confidence": "<http://www.w3.org/ns/prov#value>",
590
+ "timestamp": "<http://www.w3.org/ns/prov#generatedAtTime>",
591
+ "process": "<http://www.w3.org/ns/prov#wasGeneratedBy>",
592
+ }
593
+
594
+ # Find all _prov_ columns for this pattern
595
+ prov_cols = [c for c in pattern_df.columns if c.startswith(f"_prov_{i}_")]
596
+
597
+ if prov_cols:
598
+ # Build unpivoted dataframe - one row per provenance value
599
+ unpivoted_dfs = []
600
+ base_cols = [c for c in pattern_df.columns if not c.startswith("_prov_")]
601
+
602
+ for prov_col in prov_cols:
603
+ # Extract column type from _prov_{idx}_{type}
604
+ col_type = prov_col.split("_")[-1] # e.g., "source", "confidence"
605
+ pred_uri = prov_col_to_pred.get(col_type)
606
+
607
+ if pred_uri:
608
+ # Create a df with this provenance column as the object
609
+ row_df = pattern_df.select(base_cols + [prov_col])
610
+ # Filter out nulls
611
+ row_df = row_df.filter(pl.col(prov_col).is_not_null())
612
+
613
+ if len(row_df) > 0:
614
+ # Add predicate and rename object column
615
+ row_df = row_df.with_columns([
616
+ pl.lit(pred_uri).alias(pred_var_name),
617
+ pl.col(prov_col).cast(pl.Utf8).alias(obj_var_name)
618
+ ]).drop(prov_col)
619
+ unpivoted_dfs.append(row_df)
620
+
621
+ if unpivoted_dfs:
622
+ pattern_df = pl.concat(unpivoted_dfs)
623
+ else:
624
+ # No provenance data - return empty with correct columns
625
+ pattern_df = pattern_df.select(base_cols).with_columns([
626
+ pl.lit(None).cast(pl.Utf8).alias(pred_var_name),
627
+ pl.lit(None).cast(pl.Utf8).alias(obj_var_name)
628
+ ]).head(0)
629
+ else:
630
+ # Specific predicate - just alias the column
631
+ prov_col = f"_prov_{i}_{col_name}"
632
+ if prov_col in pattern_df.columns:
633
+ pattern_df = pattern_df.with_columns(
634
+ pl.col(prov_col).alias(obj_var_name)
635
+ )
636
+
637
+ if result_df is None:
638
+ result_df = pattern_df
639
+ else:
640
+ # Find shared variables to join on
641
+ shared_cols = set(result_df.columns) & set(pattern_df.columns)
642
+ shared_cols -= {"_pattern_idx"} # Don't join on internal columns
643
+ # Also exclude provenance internal columns from join keys
644
+ shared_cols = {c for c in shared_cols if not c.startswith("_prov_")}
645
+
646
+ if shared_cols:
647
+ result_df = result_df.join(
648
+ pattern_df,
649
+ on=list(shared_cols),
650
+ how="inner"
651
+ )
652
+ else:
653
+ # Cross join if no shared variables
654
+ result_df = result_df.join(pattern_df, how="cross")
655
+
656
+ # Handle GRAPH patterns
657
+ if where.graph_patterns:
658
+ for graph_pattern in where.graph_patterns:
659
+ graph_df = self._execute_graph_pattern(graph_pattern, prefixes, as_of=as_of)
660
+ if result_df is None:
661
+ result_df = graph_df
662
+ elif len(graph_df) > 0:
663
+ # Join with existing results
664
+ shared_cols = set(result_df.columns) & set(graph_df.columns)
665
+ shared_cols -= {"_pattern_idx"}
666
+ shared_cols = {c for c in shared_cols if not c.startswith("_prov_")}
667
+
668
+ if shared_cols:
669
+ result_df = result_df.join(graph_df, on=list(shared_cols), how="inner")
670
+ else:
671
+ result_df = result_df.join(graph_df, how="cross")
672
+
673
+ # Handle UNION patterns - these can be standalone or combined with other patterns
674
+ if where.union_patterns:
675
+ for union in where.union_patterns:
676
+ if result_df is None or len(result_df) == 0:
677
+ # UNION is the primary pattern - execute it directly
678
+ result_df = self._execute_union_standalone(union, prefixes)
679
+ else:
680
+ # Combine UNION results with existing patterns
681
+ result_df = self._apply_union(result_df, union, prefixes)
682
+
683
+ if result_df is None:
684
+ return pl.DataFrame()
685
+
686
+ # Apply standard FILTER clauses
687
+ for filter_clause in where.filters:
688
+ result_df = self._apply_filter(result_df, filter_clause)
689
+
690
+ # Apply OPTIONAL patterns with left outer joins
691
+ for optional in where.optional_patterns:
692
+ result_df = self._apply_optional(result_df, optional, prefixes)
693
+
694
+ # Apply BIND clauses - add new columns with computed values
695
+ for bind in where.binds:
696
+ result_df = self._apply_bind(result_df, bind, prefixes)
697
+
698
+ # Apply VALUES clause - filter/join with inline data
699
+ if where.values:
700
+ result_df = self._apply_values(result_df, where.values, prefixes)
701
+
702
+ # Check if we have matches before removing internal columns
703
+ has_matches = len(result_df) > 0
704
+
705
+ # Remove internal columns EXCEPT provenance columns (keep _prov_*)
706
+ internal_cols = [c for c in result_df.columns if c.startswith("_") and not c.startswith("_prov_")]
707
+ if internal_cols:
708
+ result_df = result_df.drop(internal_cols)
709
+
710
+ # If we had matches but now have no columns (all terms were concrete),
711
+ # return a DataFrame with a single row to indicate a match exists
712
+ if has_matches and len(result_df.columns) == 0:
713
+ result_df = pl.DataFrame({"_matched": [True] * has_matches})
714
+ # Actually just need count, not the values
715
+ result_df = pl.DataFrame({"_matched": [True]})
716
+
717
+ return result_df
718
+
719
+ def _execute_pattern(
720
+ self,
721
+ pattern: TriplePattern,
722
+ prefixes: dict[str, str],
723
+ pattern_idx: int,
724
+ as_of: Optional[datetime] = None,
725
+ from_graphs: Optional[list[str]] = None,
726
+ ) -> pl.DataFrame:
727
+ """
728
+ Execute a single triple pattern against the store.
729
+
730
+ Args:
731
+ pattern: The triple pattern to match
732
+ prefixes: Prefix mappings
733
+ pattern_idx: Index of this pattern (for internal column naming)
734
+ as_of: Optional timestamp for time-travel queries
735
+ from_graphs: Optional list of graph URIs to restrict query to
736
+
737
+ Returns a DataFrame with columns for each variable in the pattern.
738
+ """
739
+ # Start with all assertions
740
+ df = self.store._df.lazy()
741
+
742
+ # Apply time-travel filter if specified
743
+ if as_of is not None:
744
+ df = df.filter(pl.col("timestamp") <= as_of)
745
+
746
+ # Apply FROM graph restriction
747
+ if from_graphs is not None:
748
+ # Match triples in specified graphs (None for default graph)
749
+ graph_conditions = []
750
+ for g in from_graphs:
751
+ if g is None or g == "":
752
+ graph_conditions.append(pl.col("graph").is_null())
753
+ else:
754
+ graph_conditions.append(pl.col("graph") == g)
755
+ if graph_conditions:
756
+ combined = graph_conditions[0]
757
+ for cond in graph_conditions[1:]:
758
+ combined = combined | cond
759
+ df = df.filter(combined)
760
+
761
+ # Apply time-travel filter if specified
762
+ if as_of is not None:
763
+ df = df.filter(pl.col("timestamp") <= as_of)
764
+
765
+ # Apply filters for concrete terms
766
+ if not isinstance(pattern.subject, Variable):
767
+ value = self._resolve_term(pattern.subject, prefixes)
768
+ # Match both with and without angle brackets for URIs
769
+ if value.startswith("http"):
770
+ df = df.filter(
771
+ (pl.col("subject") == value) |
772
+ (pl.col("subject") == f"<{value}>")
773
+ )
774
+ else:
775
+ df = df.filter(pl.col("subject") == value)
776
+
777
+ if not isinstance(pattern.predicate, Variable):
778
+ value = self._resolve_term(pattern.predicate, prefixes)
779
+ # Match both with and without angle brackets for URIs
780
+ if value.startswith("http"):
781
+ df = df.filter(
782
+ (pl.col("predicate") == value) |
783
+ (pl.col("predicate") == f"<{value}>")
784
+ )
785
+ else:
786
+ df = df.filter(pl.col("predicate") == value)
787
+
788
+ if not isinstance(pattern.object, (Variable, QuotedTriplePattern)):
789
+ value = self._resolve_term(pattern.object, prefixes)
790
+ str_value = str(value)
791
+ # Match both with and without angle brackets for URIs
792
+ if str_value.startswith("http"):
793
+ df = df.filter(
794
+ (pl.col("object") == str_value) |
795
+ (pl.col("object") == f"<{str_value}>")
796
+ )
797
+ else:
798
+ df = df.filter(pl.col("object") == str_value)
799
+
800
+ # Exclude deprecated by default
801
+ df = df.filter(~pl.col("deprecated"))
802
+
803
+ # Collect results
804
+ result = df.collect()
805
+
806
+ # Rename columns to variable names and select relevant columns
807
+ renames = {}
808
+ select_cols = []
809
+
810
+ if isinstance(pattern.subject, Variable):
811
+ renames["subject"] = pattern.subject.name
812
+ select_cols.append("subject")
813
+
814
+ if isinstance(pattern.predicate, Variable):
815
+ renames["predicate"] = pattern.predicate.name
816
+ select_cols.append("predicate")
817
+
818
+ if isinstance(pattern.object, Variable):
819
+ renames["object"] = pattern.object.name
820
+ select_cols.append("object")
821
+ # Also include typed object_value for numeric FILTER comparisons
822
+ # Rename it to the variable name with "_value" suffix
823
+ if "object_value" in result.columns:
824
+ renames["object_value"] = f"{pattern.object.name}_value"
825
+ select_cols.append("object_value")
826
+
827
+ # Always include provenance columns for provenance filters
828
+ provenance_cols = ["source", "confidence", "timestamp", "process"]
829
+ for col in provenance_cols:
830
+ renames[col] = f"_prov_{pattern_idx}_{col}"
831
+ select_cols.append(col)
832
+
833
+ # Select and rename
834
+ if select_cols:
835
+ result = result.select(select_cols)
836
+ result = result.rename(renames)
837
+ else:
838
+ # Pattern has no variables - just return count
839
+ result = pl.DataFrame({"_match": [True] * len(result)})
840
+
841
+ return result
842
+
843
+ def _execute_graph_pattern(
844
+ self,
845
+ graph_pattern: "GraphPattern",
846
+ prefixes: dict[str, str],
847
+ as_of: Optional[datetime] = None,
848
+ ) -> pl.DataFrame:
849
+ """
850
+ Execute a GRAPH pattern: GRAPH <uri> { patterns }.
851
+
852
+ Args:
853
+ graph_pattern: The GRAPH pattern to execute
854
+ prefixes: Prefix mappings
855
+ as_of: Optional timestamp for time-travel queries
856
+
857
+ Returns:
858
+ DataFrame with matching bindings from the specified graph
859
+ """
860
+ # Resolve the graph reference
861
+ if isinstance(graph_pattern.graph, IRI):
862
+ graph_uri = self._resolve_term(graph_pattern.graph, prefixes)
863
+ graph_filter = [graph_uri]
864
+ elif isinstance(graph_pattern.graph, Variable):
865
+ # Variable graph - match all named graphs and bind the variable
866
+ graph_filter = None # Will filter manually
867
+ graph_var_name = graph_pattern.graph.name
868
+ else:
869
+ return pl.DataFrame()
870
+
871
+ # Execute each pattern in the graph
872
+ result_df: Optional[pl.DataFrame] = None
873
+
874
+ for i, pattern in enumerate(graph_pattern.patterns):
875
+ pattern_df = self._execute_pattern(
876
+ pattern,
877
+ prefixes,
878
+ 1000 + i, # Use high pattern idx to avoid conflicts
879
+ as_of=as_of,
880
+ from_graphs=graph_filter
881
+ )
882
+
883
+ # If graph is a variable, add the graph column as a binding
884
+ if isinstance(graph_pattern.graph, Variable):
885
+ # Need to also get graph column from store
886
+ df = self.store._df.lazy()
887
+ if as_of is not None:
888
+ df = df.filter(pl.col("timestamp") <= as_of)
889
+ df = df.filter(~pl.col("deprecated"))
890
+ df = df.filter(pl.col("graph").is_not_null()) # Only named graphs
891
+
892
+ # Re-execute pattern with graph column
893
+ graph_df = self._execute_pattern_with_graph(
894
+ pattern, prefixes, 1000 + i, as_of=as_of
895
+ )
896
+ if graph_var_name not in graph_df.columns and "graph" in graph_df.columns:
897
+ graph_df = graph_df.rename({"graph": graph_var_name})
898
+ pattern_df = graph_df
899
+
900
+ if result_df is None:
901
+ result_df = pattern_df
902
+ else:
903
+ # Join on shared variables
904
+ shared_cols = set(result_df.columns) & set(pattern_df.columns)
905
+ shared_cols = {c for c in shared_cols if not c.startswith("_prov_")}
906
+ if shared_cols:
907
+ result_df = result_df.join(pattern_df, on=list(shared_cols), how="inner")
908
+ else:
909
+ result_df = result_df.join(pattern_df, how="cross")
910
+
911
+ return result_df if result_df is not None else pl.DataFrame()
912
+
913
+ def _execute_pattern_with_graph(
914
+ self,
915
+ pattern: TriplePattern,
916
+ prefixes: dict[str, str],
917
+ pattern_idx: int,
918
+ as_of: Optional[datetime] = None,
919
+ ) -> pl.DataFrame:
920
+ """Execute a pattern and include the graph column in results."""
921
+ # Start with all assertions
922
+ df = self.store._df.lazy()
923
+
924
+ if as_of is not None:
925
+ df = df.filter(pl.col("timestamp") <= as_of)
926
+
927
+ # Only named graphs
928
+ df = df.filter(pl.col("graph").is_not_null())
929
+
930
+ # Apply filters for concrete terms
931
+ if not isinstance(pattern.subject, Variable):
932
+ value = self._resolve_term(pattern.subject, prefixes)
933
+ if value.startswith("http"):
934
+ df = df.filter(
935
+ (pl.col("subject") == value) |
936
+ (pl.col("subject") == f"<{value}>")
937
+ )
938
+ else:
939
+ df = df.filter(pl.col("subject") == value)
940
+
941
+ if not isinstance(pattern.predicate, Variable):
942
+ value = self._resolve_term(pattern.predicate, prefixes)
943
+ if value.startswith("http"):
944
+ df = df.filter(
945
+ (pl.col("predicate") == value) |
946
+ (pl.col("predicate") == f"<{value}>")
947
+ )
948
+ else:
949
+ df = df.filter(pl.col("predicate") == value)
950
+
951
+ if not isinstance(pattern.object, (Variable, QuotedTriplePattern)):
952
+ value = self._resolve_term(pattern.object, prefixes)
953
+ str_value = str(value)
954
+ if str_value.startswith("http"):
955
+ df = df.filter(
956
+ (pl.col("object") == str_value) |
957
+ (pl.col("object") == f"<{str_value}>")
958
+ )
959
+ else:
960
+ df = df.filter(pl.col("object") == str_value)
961
+
962
+ df = df.filter(~pl.col("deprecated"))
963
+ result = df.collect()
964
+
965
+ # Rename and select columns
966
+ renames = {}
967
+ select_cols = ["graph"] # Always include graph
968
+
969
+ if isinstance(pattern.subject, Variable):
970
+ renames["subject"] = pattern.subject.name
971
+ select_cols.append("subject")
972
+
973
+ if isinstance(pattern.predicate, Variable):
974
+ renames["predicate"] = pattern.predicate.name
975
+ select_cols.append("predicate")
976
+
977
+ if isinstance(pattern.object, Variable):
978
+ renames["object"] = pattern.object.name
979
+ select_cols.append("object")
980
+
981
+ if select_cols:
982
+ result = result.select(select_cols)
983
+ result = result.rename(renames)
984
+
985
+ return result
986
+
987
+ def _resolve_term(self, term: Term, prefixes: dict[str, str]) -> str:
988
+ """Resolve a term to its string value for matching against store."""
989
+ if isinstance(term, IRI):
990
+ value = term.value
991
+ # Expand prefixed names
992
+ if ":" in value and not value.startswith("http"):
993
+ prefix, local = value.split(":", 1)
994
+ if prefix in prefixes:
995
+ value = prefixes[prefix] + local
996
+ # Return without angle brackets - store has mixed formats
997
+ # The _execute_pattern will try both with/without brackets
998
+ return value
999
+ elif isinstance(term, Literal):
1000
+ return str(term.value)
1001
+ elif isinstance(term, BlankNode):
1002
+ return f"_:{term.label}"
1003
+ else:
1004
+ return str(term)
1005
+
1006
+ def _apply_filter(self, df: pl.DataFrame, filter_clause: Filter) -> pl.DataFrame:
1007
+ """Apply a standard FILTER to the DataFrame."""
1008
+ expr = self._build_filter_expression(filter_clause.expression)
1009
+ if expr is not None:
1010
+ return df.filter(expr)
1011
+ return df
1012
+
1013
+ def _apply_optional(
1014
+ self,
1015
+ df: pl.DataFrame,
1016
+ optional: OptionalPattern,
1017
+ prefixes: dict[str, str]
1018
+ ) -> pl.DataFrame:
1019
+ """
1020
+ Apply an OPTIONAL pattern using left outer join.
1021
+
1022
+ OPTIONAL { ... } patterns add bindings when matched but keep
1023
+ rows even when no match exists (with NULL for optional columns).
1024
+ """
1025
+ # Execute the optional patterns
1026
+ optional_df: Optional[pl.DataFrame] = None
1027
+
1028
+ for i, pattern in enumerate(optional.patterns):
1029
+ if isinstance(pattern, (TriplePattern, QuotedTriplePattern)):
1030
+ pattern_df = self._execute_pattern(pattern, prefixes, 1000 + i)
1031
+
1032
+ if optional_df is None:
1033
+ optional_df = pattern_df
1034
+ else:
1035
+ shared_cols = set(optional_df.columns) & set(pattern_df.columns)
1036
+ shared_cols -= {"_pattern_idx"}
1037
+
1038
+ if shared_cols:
1039
+ optional_df = optional_df.join(pattern_df, on=list(shared_cols), how="inner")
1040
+ else:
1041
+ optional_df = optional_df.join(pattern_df, how="cross")
1042
+
1043
+ if optional_df is None or len(optional_df) == 0:
1044
+ return df
1045
+
1046
+ # Apply filters within the optional block
1047
+ for filter_clause in optional.filters:
1048
+ optional_df = self._apply_filter(optional_df, filter_clause)
1049
+
1050
+ # Remove internal columns from optional_df
1051
+ internal_cols = [c for c in optional_df.columns if c.startswith("_")]
1052
+ if internal_cols:
1053
+ optional_df = optional_df.drop(internal_cols)
1054
+
1055
+ # Find shared columns for the join
1056
+ shared_cols = set(df.columns) & set(optional_df.columns)
1057
+
1058
+ if shared_cols:
1059
+ # Left outer join - keep all rows from df, add optional columns where matched
1060
+ return df.join(optional_df, on=list(shared_cols), how="left")
1061
+ else:
1062
+ # No shared columns - this is unusual for OPTIONAL, but handle it
1063
+ return df
1064
+
1065
+ def _apply_union(
1066
+ self,
1067
+ df: pl.DataFrame,
1068
+ union: UnionPattern,
1069
+ prefixes: dict[str, str]
1070
+ ) -> pl.DataFrame:
1071
+ """
1072
+ Apply a UNION pattern by combining results from alternatives.
1073
+
1074
+ UNION combines results from multiple pattern groups:
1075
+ { ?s ?p ?o } UNION { ?s ?q ?r }
1076
+
1077
+ Returns all rows matching ANY of the alternatives.
1078
+ """
1079
+ union_results = []
1080
+
1081
+ for i, alternative in enumerate(union.alternatives):
1082
+ # Execute each alternative as a mini WHERE clause
1083
+ alt_where = WhereClause(patterns=alternative)
1084
+ alt_df = self._execute_where(alt_where, prefixes)
1085
+
1086
+ if len(alt_df) > 0:
1087
+ union_results.append(alt_df)
1088
+
1089
+ if not union_results:
1090
+ return df
1091
+
1092
+ # Combine all union results
1093
+ if len(union_results) == 1:
1094
+ union_df = union_results[0]
1095
+ else:
1096
+ # Align schemas - add missing columns with null values
1097
+ all_columns = set()
1098
+ for r in union_results:
1099
+ all_columns.update(r.columns)
1100
+
1101
+ aligned_results = []
1102
+ for r in union_results:
1103
+ missing = all_columns - set(r.columns)
1104
+ if missing:
1105
+ for col in missing:
1106
+ r = r.with_columns(pl.lit(None).alias(col))
1107
+ aligned_results.append(r.select(sorted(all_columns)))
1108
+
1109
+ union_df = pl.concat(aligned_results, how="vertical")
1110
+
1111
+ # If we have existing results, join with them
1112
+ if len(df) > 0 and len(df.columns) > 0:
1113
+ shared_cols = set(df.columns) & set(union_df.columns)
1114
+ if shared_cols:
1115
+ return df.join(union_df, on=list(shared_cols), how="inner")
1116
+ else:
1117
+ return df.join(union_df, how="cross")
1118
+
1119
+ return union_df
1120
+
1121
+ def _execute_union_standalone(
1122
+ self,
1123
+ union: UnionPattern,
1124
+ prefixes: dict[str, str]
1125
+ ) -> pl.DataFrame:
1126
+ """
1127
+ Execute a UNION pattern as a standalone query (no prior patterns).
1128
+
1129
+ Returns combined results from all alternatives.
1130
+ """
1131
+ union_results = []
1132
+
1133
+ for alternative in union.alternatives:
1134
+ # Execute each alternative as a mini WHERE clause
1135
+ alt_where = WhereClause(patterns=alternative)
1136
+ alt_df = self._execute_where(alt_where, prefixes)
1137
+
1138
+ if len(alt_df) > 0:
1139
+ union_results.append(alt_df)
1140
+
1141
+ if not union_results:
1142
+ return pl.DataFrame()
1143
+
1144
+ # Combine all union results
1145
+ if len(union_results) == 1:
1146
+ return union_results[0]
1147
+
1148
+ # Align schemas - add missing columns with null values
1149
+ all_columns = set()
1150
+ for r in union_results:
1151
+ all_columns.update(r.columns)
1152
+
1153
+ aligned_results = []
1154
+ for r in union_results:
1155
+ missing = all_columns - set(r.columns)
1156
+ if missing:
1157
+ for col in missing:
1158
+ r = r.with_columns(pl.lit(None).alias(col))
1159
+ aligned_results.append(r.select(sorted(all_columns)))
1160
+
1161
+ return pl.concat(aligned_results, how="vertical")
1162
+
1163
+ def _apply_bind(
1164
+ self,
1165
+ df: pl.DataFrame,
1166
+ bind: Bind,
1167
+ prefixes: dict[str, str]
1168
+ ) -> pl.DataFrame:
1169
+ """
1170
+ Apply a BIND clause, adding a new column with the computed value.
1171
+
1172
+ BIND(?price * 1.1 AS ?taxed_price)
1173
+ BIND("default" AS ?label)
1174
+ """
1175
+ var_name = bind.variable.name
1176
+
1177
+ # Handle different expression types
1178
+ if isinstance(bind.expression, Variable):
1179
+ # BIND(?x AS ?y) - copy column
1180
+ src_name = bind.expression.name
1181
+ if src_name in df.columns:
1182
+ df = df.with_columns(pl.col(src_name).alias(var_name))
1183
+ elif isinstance(bind.expression, Literal):
1184
+ # BIND("value" AS ?var) - add constant
1185
+ df = df.with_columns(pl.lit(bind.expression.value).alias(var_name))
1186
+ elif isinstance(bind.expression, IRI):
1187
+ # BIND(<uri> AS ?var) - add constant IRI
1188
+ value = self._resolve_term(bind.expression, prefixes)
1189
+ df = df.with_columns(pl.lit(value).alias(var_name))
1190
+ elif isinstance(bind.expression, Comparison):
1191
+ # BIND(?x > 5 AS ?flag) - boolean expression
1192
+ expr = self._build_filter_expression(bind.expression)
1193
+ if expr is not None:
1194
+ df = df.with_columns(expr.alias(var_name))
1195
+ elif isinstance(bind.expression, FunctionCall):
1196
+ # BIND(CONCAT(?a, ?b) AS ?c) - function call
1197
+ expr = self._build_function_call(bind.expression)
1198
+ if expr is not None:
1199
+ df = df.with_columns(expr.alias(var_name))
1200
+
1201
+ return df
1202
+
1203
+ def _apply_values(
1204
+ self,
1205
+ df: pl.DataFrame,
1206
+ values: ValuesClause,
1207
+ prefixes: dict[str, str]
1208
+ ) -> pl.DataFrame:
1209
+ """
1210
+ Apply a VALUES clause, joining with inline data.
1211
+
1212
+ VALUES ?x { 1 2 3 }
1213
+ VALUES (?x ?y) { (1 2) (3 4) }
1214
+ """
1215
+ # Build a DataFrame from the VALUES data
1216
+ var_names = [v.name for v in values.variables]
1217
+
1218
+ # Convert bindings to column data
1219
+ columns = {name: [] for name in var_names}
1220
+
1221
+ for row in values.bindings:
1222
+ for i, val in enumerate(row):
1223
+ if i < len(var_names):
1224
+ if val is None:
1225
+ columns[var_names[i]].append(None)
1226
+ elif isinstance(val, Literal):
1227
+ columns[var_names[i]].append(val.value)
1228
+ elif isinstance(val, IRI):
1229
+ columns[var_names[i]].append(self._resolve_term(val, prefixes))
1230
+ else:
1231
+ columns[var_names[i]].append(str(val))
1232
+
1233
+ values_df = pl.DataFrame(columns)
1234
+
1235
+ if len(df) == 0 or len(df.columns) == 0:
1236
+ # VALUES is the only source - return it directly
1237
+ return values_df
1238
+
1239
+ # Join with existing results
1240
+ shared_cols = set(df.columns) & set(values_df.columns)
1241
+
1242
+ if shared_cols:
1243
+ # Inner join on shared columns - filter to matching values
1244
+ return df.join(values_df, on=list(shared_cols), how="inner")
1245
+ else:
1246
+ # Cross join - add all value combinations
1247
+ return df.join(values_df, how="cross")
1248
+
1249
+ def _build_filter_expression(
1250
+ self,
1251
+ expr: Union[Comparison, LogicalExpression, FunctionCall]
1252
+ ) -> Optional[pl.Expr]:
1253
+ """Build a Polars filter expression from SPARQL filter AST."""
1254
+
1255
+ if isinstance(expr, Comparison):
1256
+ # Handle type coercion for variable vs literal comparisons
1257
+ left, right = self._build_comparison_operands(expr.left, expr.right)
1258
+
1259
+ if left is None or right is None:
1260
+ return None
1261
+
1262
+ op_map = {
1263
+ ComparisonOp.EQ: lambda l, r: l == r,
1264
+ ComparisonOp.NE: lambda l, r: l != r,
1265
+ ComparisonOp.LT: lambda l, r: l < r,
1266
+ ComparisonOp.LE: lambda l, r: l <= r,
1267
+ ComparisonOp.GT: lambda l, r: l > r,
1268
+ ComparisonOp.GE: lambda l, r: l >= r,
1269
+ }
1270
+
1271
+ return op_map[expr.operator](left, right)
1272
+
1273
+ elif isinstance(expr, LogicalExpression):
1274
+ operand_exprs = [
1275
+ self._build_filter_expression(op) for op in expr.operands
1276
+ ]
1277
+ operand_exprs = [e for e in operand_exprs if e is not None]
1278
+
1279
+ if not operand_exprs:
1280
+ return None
1281
+
1282
+ if expr.operator == LogicalOp.NOT:
1283
+ return ~operand_exprs[0]
1284
+ elif expr.operator == LogicalOp.AND:
1285
+ result = operand_exprs[0]
1286
+ for e in operand_exprs[1:]:
1287
+ result = result & e
1288
+ return result
1289
+ elif expr.operator == LogicalOp.OR:
1290
+ result = operand_exprs[0]
1291
+ for e in operand_exprs[1:]:
1292
+ result = result | e
1293
+ return result
1294
+
1295
+ elif isinstance(expr, FunctionCall):
1296
+ return self._build_function_call(expr)
1297
+
1298
+ return None
1299
+
1300
+ def _build_comparison_operands(
1301
+ self,
1302
+ left_term: Union[Variable, Literal, IRI, FunctionCall],
1303
+ right_term: Union[Variable, Literal, IRI, FunctionCall]
1304
+ ) -> tuple[Optional[pl.Expr], Optional[pl.Expr]]:
1305
+ """
1306
+ Build comparison operands with proper type coercion.
1307
+
1308
+ When comparing a variable (column) with a typed literal, uses the
1309
+ pre-computed typed value column (e.g., age_value) if available.
1310
+ """
1311
+ left = self._term_to_expr(left_term)
1312
+ right = self._term_to_expr(right_term)
1313
+
1314
+ if left is None or right is None:
1315
+ return left, right
1316
+
1317
+ # Use typed _value column for numeric comparisons with variables
1318
+ if isinstance(left_term, Variable) and isinstance(right_term, Literal):
1319
+ if right_term.datatype and self._is_numeric_datatype(right_term.datatype):
1320
+ # Use the pre-computed typed value column
1321
+ left = pl.col(f"{left_term.name}_value")
1322
+ elif isinstance(right_term, Variable) and isinstance(left_term, Literal):
1323
+ if left_term.datatype and self._is_numeric_datatype(left_term.datatype):
1324
+ # Use the pre-computed typed value column
1325
+ right = pl.col(f"{right_term.name}_value")
1326
+
1327
+ return left, right
1328
+
1329
+ def _is_numeric_datatype(self, datatype: str) -> bool:
1330
+ """Check if a datatype is numeric (integer, decimal, double, float, boolean)."""
1331
+ numeric_indicators = ["integer", "int", "decimal", "float", "double", "boolean"]
1332
+ datatype_lower = datatype.lower()
1333
+ return any(ind in datatype_lower for ind in numeric_indicators)
1334
+
1335
+ def _cast_column_for_comparison(self, col_expr: pl.Expr, datatype: str) -> pl.Expr:
1336
+ """Cast a column expression based on the datatype of the comparison literal."""
1337
+ if "integer" in datatype or "int" in datatype:
1338
+ return col_expr.cast(pl.Int64, strict=False)
1339
+ elif "decimal" in datatype or "float" in datatype or "double" in datatype:
1340
+ return col_expr.cast(pl.Float64, strict=False)
1341
+ elif "boolean" in datatype:
1342
+ return col_expr.cast(pl.Boolean, strict=False)
1343
+ return col_expr
1344
+
1345
+ def _term_to_expr(
1346
+ self,
1347
+ term: Union[Variable, Literal, IRI, FunctionCall]
1348
+ ) -> Optional[pl.Expr]:
1349
+ """Convert a term to a Polars expression."""
1350
+ if isinstance(term, Variable):
1351
+ return pl.col(term.name)
1352
+ elif isinstance(term, Literal):
1353
+ # Convert typed literals to appropriate Python types
1354
+ value = term.value
1355
+ if term.datatype:
1356
+ value = self._convert_typed_value(value, term.datatype)
1357
+ return pl.lit(value)
1358
+ elif isinstance(term, IRI):
1359
+ return pl.lit(term.value)
1360
+ elif isinstance(term, FunctionCall):
1361
+ return self._build_function_call(term)
1362
+ return None
1363
+
1364
+ def _convert_typed_value(self, value: Any, datatype: str) -> Any:
1365
+ """Convert a literal value based on its XSD datatype."""
1366
+ if isinstance(value, (int, float, bool)):
1367
+ return value # Already native type
1368
+
1369
+ # XSD numeric types
1370
+ if "integer" in datatype or "int" in datatype:
1371
+ try:
1372
+ return int(value)
1373
+ except (ValueError, TypeError):
1374
+ return value
1375
+ elif "decimal" in datatype or "float" in datatype or "double" in datatype:
1376
+ try:
1377
+ return float(value)
1378
+ except (ValueError, TypeError):
1379
+ return value
1380
+ elif "boolean" in datatype:
1381
+ if isinstance(value, str):
1382
+ return value.lower() == "true"
1383
+ return bool(value)
1384
+
1385
+ return value
1386
+
1387
+ def _build_function_call(self, func: FunctionCall) -> Optional[pl.Expr]:
1388
+ """Build a Polars expression for a SPARQL function."""
1389
+ name = func.name.upper()
1390
+
1391
+ if name == "BOUND":
1392
+ if func.arguments and isinstance(func.arguments[0], Variable):
1393
+ return pl.col(func.arguments[0].name).is_not_null()
1394
+
1395
+ elif name in ("ISIRI", "ISURI"):
1396
+ if func.arguments and isinstance(func.arguments[0], Variable):
1397
+ col = pl.col(func.arguments[0].name)
1398
+ return col.str.starts_with("http")
1399
+
1400
+ elif name == "ISLITERAL":
1401
+ if func.arguments and isinstance(func.arguments[0], Variable):
1402
+ col = pl.col(func.arguments[0].name)
1403
+ return ~col.str.starts_with("http") & ~col.str.starts_with("_:")
1404
+
1405
+ elif name == "ISBLANK":
1406
+ if func.arguments and isinstance(func.arguments[0], Variable):
1407
+ col = pl.col(func.arguments[0].name)
1408
+ return col.str.starts_with("_:")
1409
+
1410
+ elif name == "STR":
1411
+ if func.arguments and isinstance(func.arguments[0], Variable):
1412
+ return pl.col(func.arguments[0].name).cast(pl.Utf8)
1413
+
1414
+ # Add more functions as needed
1415
+
1416
+ return None
1417
+
1418
+ def _execute_insert_data(
1419
+ self,
1420
+ query: InsertDataQuery,
1421
+ provenance: Optional[ProvenanceContext] = None
1422
+ ) -> dict:
1423
+ """
1424
+ Execute an INSERT DATA query with RDF-Star provenance recognition.
1425
+
1426
+ This method intelligently handles RDF-Star annotations:
1427
+ - Regular triples are inserted with default provenance
1428
+ - Quoted triple annotations like << s p o >> prov:wasAttributedTo "source"
1429
+ are recognized and applied to the base triple's metadata
1430
+
1431
+ Args:
1432
+ query: The InsertDataQuery AST
1433
+ provenance: Optional default provenance context
1434
+
1435
+ Returns:
1436
+ Dict with 'count' of inserted triples
1437
+ """
1438
+ if provenance is None:
1439
+ provenance = ProvenanceContext(source="SPARQL_INSERT", confidence=1.0)
1440
+
1441
+ prefixes = query.prefixes
1442
+
1443
+ # First pass: collect provenance annotations for quoted triples
1444
+ # Key: (subject, predicate, object) tuple of the base triple
1445
+ # Value: dict with 'source', 'confidence', 'timestamp' overrides
1446
+ provenance_annotations: dict[tuple[str, str, str], dict[str, Any]] = {}
1447
+
1448
+ # Separate regular triples from provenance annotations
1449
+ regular_triples = []
1450
+
1451
+ for triple in query.triples:
1452
+ # Check if this is a provenance annotation (subject is a quoted triple)
1453
+ if isinstance(triple.subject, QuotedTriplePattern):
1454
+ # This is an RDF-Star annotation like:
1455
+ # << ex:s ex:p ex:o >> prov:wasAttributedTo "IMDb" .
1456
+ quoted = triple.subject
1457
+ predicate_iri = self._resolve_term_value(triple.predicate, prefixes)
1458
+ obj_value = self._resolve_term_value(triple.object, prefixes)
1459
+
1460
+ # Get the base triple key
1461
+ base_s = self._resolve_term_value(quoted.subject, prefixes)
1462
+ base_p = self._resolve_term_value(quoted.predicate, prefixes)
1463
+ base_o = self._resolve_term_value(quoted.object, prefixes)
1464
+ base_key = (base_s, base_p, base_o)
1465
+
1466
+ # Initialize annotations dict for this triple if needed
1467
+ if base_key not in provenance_annotations:
1468
+ provenance_annotations[base_key] = {}
1469
+
1470
+ # Check if this predicate maps to a provenance field
1471
+ if predicate_iri in PROVENANCE_SOURCE_PREDICATES:
1472
+ provenance_annotations[base_key]['source'] = str(obj_value)
1473
+ elif predicate_iri in PROVENANCE_CONFIDENCE_PREDICATES:
1474
+ try:
1475
+ conf_val = float(obj_value)
1476
+ provenance_annotations[base_key]['confidence'] = conf_val
1477
+ except (ValueError, TypeError):
1478
+ # If can't parse as float, store as-is (will be ignored)
1479
+ pass
1480
+ elif predicate_iri in PROVENANCE_TIMESTAMP_PREDICATES:
1481
+ provenance_annotations[base_key]['timestamp'] = str(obj_value)
1482
+ else:
1483
+ # Not a recognized provenance predicate - treat as regular triple
1484
+ # (This creates an actual RDF-Star triple about the quoted triple)
1485
+ regular_triples.append(triple)
1486
+ else:
1487
+ # Regular triple
1488
+ regular_triples.append(triple)
1489
+
1490
+ # Second pass: insert regular triples with their provenance
1491
+ count = 0
1492
+
1493
+ for triple in regular_triples:
1494
+ subject = self._resolve_term_value(triple.subject, prefixes)
1495
+ predicate = self._resolve_term_value(triple.predicate, prefixes)
1496
+ obj = self._resolve_term_value(triple.object, prefixes)
1497
+
1498
+ # Check if we have provenance annotations for this triple
1499
+ triple_key = (subject, predicate, obj)
1500
+ if triple_key in provenance_annotations:
1501
+ annotations = provenance_annotations[triple_key]
1502
+ # Create provenance context with overrides
1503
+ triple_prov = ProvenanceContext(
1504
+ source=annotations.get('source', provenance.source),
1505
+ confidence=annotations.get('confidence', provenance.confidence),
1506
+ timestamp=provenance.timestamp,
1507
+ )
1508
+ else:
1509
+ triple_prov = provenance
1510
+
1511
+ self.store.add_triple(subject, predicate, obj, triple_prov)
1512
+ count += 1
1513
+
1514
+ # Also insert any base triples that only had annotations (no regular triple)
1515
+ # This handles the case where annotations come first:
1516
+ # << ex:s ex:p ex:o >> prov:wasAttributedTo "source" .
1517
+ # (but no explicit ex:s ex:p ex:o . triple)
1518
+ inserted_keys = {
1519
+ (self._resolve_term_value(t.subject, prefixes),
1520
+ self._resolve_term_value(t.predicate, prefixes),
1521
+ self._resolve_term_value(t.object, prefixes))
1522
+ for t in regular_triples
1523
+ if not isinstance(t.subject, QuotedTriplePattern)
1524
+ }
1525
+
1526
+ for base_key, annotations in provenance_annotations.items():
1527
+ if base_key not in inserted_keys:
1528
+ # This triple was only defined via annotations, insert it
1529
+ subject, predicate, obj = base_key
1530
+ triple_prov = ProvenanceContext(
1531
+ source=annotations.get('source', provenance.source),
1532
+ confidence=annotations.get('confidence', provenance.confidence),
1533
+ timestamp=provenance.timestamp,
1534
+ )
1535
+ self.store.add_triple(subject, predicate, obj, triple_prov)
1536
+ count += 1
1537
+
1538
+ return {"count": count, "operation": "INSERT DATA"}
1539
+
1540
+ def _execute_delete_data(self, query: DeleteDataQuery) -> dict:
1541
+ """
1542
+ Execute a DELETE DATA query.
1543
+
1544
+ DELETE DATA {
1545
+ <subject> <predicate> <object> .
1546
+ }
1547
+
1548
+ Deletes the specified concrete triples from the store.
1549
+ """
1550
+ prefixes = query.prefixes
1551
+ count = 0
1552
+
1553
+ for triple in query.triples:
1554
+ subject = self._resolve_term_value(triple.subject, prefixes)
1555
+ predicate = self._resolve_term_value(triple.predicate, prefixes)
1556
+ obj = self._resolve_term_value(triple.object, prefixes)
1557
+
1558
+ # Mark the triple as deleted
1559
+ deleted = self.store.mark_deleted(s=subject, p=predicate, o=obj)
1560
+ count += deleted
1561
+
1562
+ return {"count": count, "operation": "DELETE DATA"}
1563
+
1564
+ def _execute_delete_where(self, query: DeleteWhereQuery) -> dict:
1565
+ """
1566
+ Execute a DELETE WHERE query.
1567
+
1568
+ DELETE WHERE { ?s ?p ?o }
1569
+
1570
+ Finds all matching triples and deletes them.
1571
+ """
1572
+ # First, execute the WHERE clause to find matching bindings
1573
+ where = query.where
1574
+ prefixes = query.prefixes
1575
+
1576
+ if not where.patterns:
1577
+ return {"count": 0, "operation": "DELETE WHERE", "error": "No patterns in WHERE clause"}
1578
+
1579
+ # Execute WHERE to get bindings
1580
+ bindings = self._execute_where(where, prefixes)
1581
+
1582
+ if bindings is None or bindings.height == 0:
1583
+ return {"count": 0, "operation": "DELETE WHERE"}
1584
+
1585
+ # Build delete patterns from WHERE patterns
1586
+ count = 0
1587
+ for i in range(bindings.height):
1588
+ row = bindings.row(i, named=True)
1589
+ for pattern in where.patterns:
1590
+ if isinstance(pattern, TriplePattern):
1591
+ # Resolve each component using bindings
1592
+ subject = self._resolve_pattern_term(pattern.subject, row, query.prefixes)
1593
+ predicate = self._resolve_pattern_term(pattern.predicate, row, query.prefixes)
1594
+ obj = self._resolve_pattern_term(pattern.object, row, query.prefixes)
1595
+
1596
+ if subject and predicate and obj:
1597
+ # Mark as deleted
1598
+ deleted = self.store.mark_deleted(s=subject, p=predicate, o=obj)
1599
+ count += deleted
1600
+
1601
+ return {"count": count, "operation": "DELETE WHERE"}
1602
+
1603
+ def _execute_modify(
1604
+ self,
1605
+ query: ModifyQuery,
1606
+ provenance: Optional[ProvenanceContext] = None
1607
+ ) -> dict:
1608
+ """
1609
+ Execute a DELETE/INSERT WHERE (modify) query.
1610
+
1611
+ DELETE { <patterns> }
1612
+ INSERT { <patterns> }
1613
+ WHERE { <patterns> }
1614
+
1615
+ 1. Execute WHERE to get variable bindings
1616
+ 2. For each binding, delete matching patterns from DELETE clause
1617
+ 3. For each binding, insert patterns from INSERT clause
1618
+ """
1619
+ where = query.where
1620
+ prefixes = query.prefixes
1621
+
1622
+ # Execute WHERE to get bindings
1623
+ bindings = self._execute_where(where, prefixes)
1624
+
1625
+ if bindings is None or bindings.height == 0:
1626
+ # No matches - nothing to delete or insert
1627
+ return {
1628
+ "deleted": 0,
1629
+ "inserted": 0,
1630
+ "operation": "MODIFY"
1631
+ }
1632
+
1633
+ deleted_count = 0
1634
+ inserted_count = 0
1635
+
1636
+ # Process each row of bindings
1637
+ for i in range(bindings.height):
1638
+ row = bindings.row(i, named=True)
1639
+
1640
+ # Delete patterns
1641
+ for pattern in query.delete_patterns:
1642
+ subject = self._resolve_pattern_term(pattern.subject, row, query.prefixes)
1643
+ predicate = self._resolve_pattern_term(pattern.predicate, row, query.prefixes)
1644
+ obj = self._resolve_pattern_term(pattern.object, row, query.prefixes)
1645
+
1646
+ if subject and predicate and obj:
1647
+ deleted = self.store.mark_deleted(s=subject, p=predicate, o=obj)
1648
+ deleted_count += deleted
1649
+
1650
+ # Insert patterns
1651
+ for pattern in query.insert_patterns:
1652
+ subject = self._resolve_pattern_term(pattern.subject, row, query.prefixes)
1653
+ predicate = self._resolve_pattern_term(pattern.predicate, row, query.prefixes)
1654
+ obj = self._resolve_pattern_term(pattern.object, row, query.prefixes)
1655
+
1656
+ if subject and predicate and obj:
1657
+ prov = provenance or ProvenanceContext(source="SPARQL_UPDATE", confidence=1.0)
1658
+ self.store.add_triple(subject, predicate, obj, prov)
1659
+ inserted_count += 1
1660
+
1661
+ return {
1662
+ "deleted": deleted_count,
1663
+ "inserted": inserted_count,
1664
+ "operation": "MODIFY"
1665
+ }
1666
+
1667
+ # =================================================================
1668
+ # Graph Management Execution Methods
1669
+ # =================================================================
1670
+
1671
+ def _execute_create_graph(self, query: CreateGraphQuery) -> dict:
1672
+ """Execute a CREATE GRAPH query."""
1673
+ graph_uri = self._resolve_term_value(query.graph_uri, query.prefixes)
1674
+ try:
1675
+ self.store.create_graph(graph_uri)
1676
+ return {"operation": "CREATE GRAPH", "graph": graph_uri, "success": True}
1677
+ except ValueError as e:
1678
+ if query.silent:
1679
+ return {"operation": "CREATE GRAPH", "graph": graph_uri, "success": False, "reason": str(e)}
1680
+ raise
1681
+
1682
+ def _execute_drop_graph(self, query: DropGraphQuery) -> dict:
1683
+ """Execute a DROP GRAPH query."""
1684
+ if query.target == "default":
1685
+ # Drop the default graph (clear triples with empty graph)
1686
+ self.store.clear_graph(None, silent=query.silent)
1687
+ return {"operation": "DROP", "target": "DEFAULT", "success": True}
1688
+ elif query.target == "named":
1689
+ # Drop all named graphs
1690
+ graphs = self.store.list_graphs()
1691
+ for g in graphs:
1692
+ if g: # Skip default graph
1693
+ self.store.drop_graph(g, silent=query.silent)
1694
+ return {"operation": "DROP", "target": "NAMED", "graphs_dropped": len([g for g in graphs if g]), "success": True}
1695
+ elif query.target == "all":
1696
+ # Drop all graphs including default
1697
+ graphs = self.store.list_graphs()
1698
+ for g in graphs:
1699
+ if g:
1700
+ self.store.drop_graph(g, silent=query.silent)
1701
+ self.store.clear_graph(None, silent=query.silent)
1702
+ return {"operation": "DROP", "target": "ALL", "success": True}
1703
+ else:
1704
+ # Drop specific graph
1705
+ graph_uri = self._resolve_term_value(query.graph_uri, query.prefixes)
1706
+ try:
1707
+ self.store.drop_graph(graph_uri, silent=query.silent)
1708
+ return {"operation": "DROP GRAPH", "graph": graph_uri, "success": True}
1709
+ except ValueError as e:
1710
+ if query.silent:
1711
+ return {"operation": "DROP GRAPH", "graph": graph_uri, "success": False, "reason": str(e)}
1712
+ raise
1713
+
1714
+ def _execute_clear_graph(self, query: ClearGraphQuery) -> dict:
1715
+ """Execute a CLEAR GRAPH query."""
1716
+ if query.target == "default":
1717
+ count = self.store.clear_graph(None, silent=query.silent)
1718
+ return {"operation": "CLEAR", "target": "DEFAULT", "triples_cleared": count, "success": True}
1719
+ elif query.target == "named":
1720
+ total_cleared = 0
1721
+ graphs = self.store.list_graphs()
1722
+ for g in graphs:
1723
+ if g: # Skip default graph
1724
+ count = self.store.clear_graph(g, silent=query.silent)
1725
+ total_cleared += count
1726
+ return {"operation": "CLEAR", "target": "NAMED", "triples_cleared": total_cleared, "success": True}
1727
+ elif query.target == "all":
1728
+ total_cleared = 0
1729
+ graphs = self.store.list_graphs()
1730
+ for g in graphs:
1731
+ count = self.store.clear_graph(g if g else None, silent=query.silent)
1732
+ total_cleared += count
1733
+ return {"operation": "CLEAR", "target": "ALL", "triples_cleared": total_cleared, "success": True}
1734
+ else:
1735
+ # Clear specific graph
1736
+ graph_uri = self._resolve_term_value(query.graph_uri, query.prefixes)
1737
+ try:
1738
+ count = self.store.clear_graph(graph_uri, silent=query.silent)
1739
+ return {"operation": "CLEAR GRAPH", "graph": graph_uri, "triples_cleared": count, "success": True}
1740
+ except ValueError as e:
1741
+ if query.silent:
1742
+ return {"operation": "CLEAR GRAPH", "graph": graph_uri, "success": False, "reason": str(e)}
1743
+ raise
1744
+
1745
+ def _execute_load(self, query: LoadQuery, provenance: Optional[ProvenanceContext] = None) -> dict:
1746
+ """Execute a LOAD query."""
1747
+ source_uri = self._resolve_term_value(query.source_uri, query.prefixes)
1748
+ graph_uri = None
1749
+ if query.graph_uri:
1750
+ graph_uri = self._resolve_term_value(query.graph_uri, query.prefixes)
1751
+
1752
+ try:
1753
+ count = self.store.load_graph(source_uri, graph_uri, silent=query.silent)
1754
+ return {
1755
+ "operation": "LOAD",
1756
+ "source": source_uri,
1757
+ "graph": graph_uri,
1758
+ "triples_loaded": count,
1759
+ "success": True
1760
+ }
1761
+ except Exception as e:
1762
+ if query.silent:
1763
+ return {
1764
+ "operation": "LOAD",
1765
+ "source": source_uri,
1766
+ "graph": graph_uri,
1767
+ "success": False,
1768
+ "reason": str(e)
1769
+ }
1770
+ raise
1771
+
1772
+ def _execute_copy_graph(self, query: CopyGraphQuery) -> dict:
1773
+ """Execute a COPY graph query."""
1774
+ source = None
1775
+ if not query.source_is_default and query.source_graph:
1776
+ source = self._resolve_term_value(query.source_graph, query.prefixes)
1777
+
1778
+ dest = None
1779
+ if query.dest_graph:
1780
+ dest = self._resolve_term_value(query.dest_graph, query.prefixes)
1781
+
1782
+ try:
1783
+ count = self.store.copy_graph(source, dest, silent=query.silent)
1784
+ return {
1785
+ "operation": "COPY",
1786
+ "source": source or "DEFAULT",
1787
+ "destination": dest or "DEFAULT",
1788
+ "triples_copied": count,
1789
+ "success": True
1790
+ }
1791
+ except ValueError as e:
1792
+ if query.silent:
1793
+ return {
1794
+ "operation": "COPY",
1795
+ "source": source or "DEFAULT",
1796
+ "destination": dest or "DEFAULT",
1797
+ "success": False,
1798
+ "reason": str(e)
1799
+ }
1800
+ raise
1801
+
1802
+ def _execute_move_graph(self, query: MoveGraphQuery) -> dict:
1803
+ """Execute a MOVE graph query."""
1804
+ source = None
1805
+ if not query.source_is_default and query.source_graph:
1806
+ source = self._resolve_term_value(query.source_graph, query.prefixes)
1807
+
1808
+ dest = None
1809
+ if query.dest_graph:
1810
+ dest = self._resolve_term_value(query.dest_graph, query.prefixes)
1811
+
1812
+ try:
1813
+ count = self.store.move_graph(source, dest, silent=query.silent)
1814
+ return {
1815
+ "operation": "MOVE",
1816
+ "source": source or "DEFAULT",
1817
+ "destination": dest or "DEFAULT",
1818
+ "triples_moved": count,
1819
+ "success": True
1820
+ }
1821
+ except ValueError as e:
1822
+ if query.silent:
1823
+ return {
1824
+ "operation": "MOVE",
1825
+ "source": source or "DEFAULT",
1826
+ "destination": dest or "DEFAULT",
1827
+ "success": False,
1828
+ "reason": str(e)
1829
+ }
1830
+ raise
1831
+
1832
+ def _execute_add_graph(self, query: AddGraphQuery) -> dict:
1833
+ """Execute an ADD graph query."""
1834
+ source = None
1835
+ if not query.source_is_default and query.source_graph:
1836
+ source = self._resolve_term_value(query.source_graph, query.prefixes)
1837
+
1838
+ dest = None
1839
+ if query.dest_graph:
1840
+ dest = self._resolve_term_value(query.dest_graph, query.prefixes)
1841
+
1842
+ try:
1843
+ count = self.store.add_graph(source, dest, silent=query.silent)
1844
+ return {
1845
+ "operation": "ADD",
1846
+ "source": source or "DEFAULT",
1847
+ "destination": dest or "DEFAULT",
1848
+ "triples_added": count,
1849
+ "success": True
1850
+ }
1851
+ except ValueError as e:
1852
+ if query.silent:
1853
+ return {
1854
+ "operation": "ADD",
1855
+ "source": source or "DEFAULT",
1856
+ "destination": dest or "DEFAULT",
1857
+ "success": False,
1858
+ "reason": str(e)
1859
+ }
1860
+ raise
1861
+
1862
+ def _resolve_pattern_term(
1863
+ self,
1864
+ term: Term,
1865
+ bindings: dict[str, Any],
1866
+ prefixes: dict[str, str]
1867
+ ) -> Optional[str]:
1868
+ """
1869
+ Resolve a pattern term using variable bindings.
1870
+
1871
+ Args:
1872
+ term: The term (Variable, IRI, Literal, etc.)
1873
+ bindings: Variable bindings from WHERE execution
1874
+ prefixes: Prefix mappings
1875
+
1876
+ Returns:
1877
+ The resolved value or None if variable not bound
1878
+ """
1879
+ if isinstance(term, Variable):
1880
+ value = bindings.get(term.name)
1881
+ if value is None:
1882
+ return None
1883
+ return str(value)
1884
+ else:
1885
+ return self._resolve_term_value(term, prefixes)
1886
+
1887
+ def _resolve_term_value(self, term: Term, prefixes: dict[str, str]) -> Any:
1888
+ """Resolve a term to its actual value, expanding prefixes."""
1889
+ if isinstance(term, IRI):
1890
+ iri = term.value
1891
+ # Check if it's a prefixed name
1892
+ if ":" in iri and not iri.startswith("http"):
1893
+ prefix, local = iri.split(":", 1)
1894
+ if prefix in prefixes:
1895
+ return prefixes[prefix] + local
1896
+ return iri
1897
+ elif isinstance(term, Literal):
1898
+ return term.value
1899
+ elif isinstance(term, BlankNode):
1900
+ return f"_:{term.id}"
1901
+ else:
1902
+ return str(term)
1903
+
1904
+
1905
+ def execute_sparql(
1906
+ store: "TripleStore",
1907
+ query_string: str,
1908
+ provenance: Optional[ProvenanceContext] = None
1909
+ ) -> Union[pl.DataFrame, bool, dict]:
1910
+ """
1911
+ Convenience function to parse and execute a SPARQL-Star query.
1912
+
1913
+ Args:
1914
+ store: The TripleStore to query
1915
+ query_string: SPARQL-Star query string
1916
+ provenance: Optional provenance for INSERT/DELETE operations
1917
+
1918
+ Returns:
1919
+ Query results (DataFrame for SELECT, bool for ASK, dict for UPDATE)
1920
+ """
1921
+ from rdf_starbase.sparql.parser import parse_query
1922
+
1923
+ query = parse_query(query_string)
1924
+ executor = SPARQLExecutor(store)
1925
+ return executor.execute(query, provenance)