rdf-starbase 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,748 @@
1
+ """
2
+ Core TripleStore implementation using Polars.
3
+
4
+ The TripleStore is the heart of RDF-StarBase, leveraging Polars DataFrames
5
+ for blazingly fast RDF-Star operations.
6
+ """
7
+
8
+ from datetime import datetime, timezone
9
+ from typing import Optional, Any, Literal
10
+ from uuid import UUID, uuid4
11
+ from pathlib import Path
12
+
13
+ import polars as pl
14
+
15
+ from rdf_starbase.models import Triple, QuotedTriple, Assertion, ProvenanceContext
16
+
17
+
18
+ class TripleStore:
19
+ """
20
+ A high-performance RDF-Star triple store backed by Polars DataFrames.
21
+
22
+ Key design decisions:
23
+ - Each assertion is a row in a Polars DataFrame
24
+ - Quoted triples are stored with unique IDs for reference
25
+ - Provenance columns are first-class (not metadata)
26
+ - Uses Polars lazy evaluation for query optimization
27
+ """
28
+
29
+ def __init__(self):
30
+ """Initialize an empty triple store."""
31
+ self._df = self._create_empty_dataframe()
32
+ self._quoted_triples: dict[UUID, QuotedTriple] = {}
33
+
34
+ @staticmethod
35
+ def _create_empty_dataframe() -> pl.DataFrame:
36
+ """Create the schema for the assertion DataFrame."""
37
+ return pl.DataFrame({
38
+ "assertion_id": pl.Series([], dtype=pl.Utf8),
39
+ "subject": pl.Series([], dtype=pl.Utf8),
40
+ "predicate": pl.Series([], dtype=pl.Utf8),
41
+ "object": pl.Series([], dtype=pl.Utf8),
42
+ "object_type": pl.Series([], dtype=pl.Utf8), # uri, literal, int, float, bool
43
+ "graph": pl.Series([], dtype=pl.Utf8),
44
+ "quoted_triple_id": pl.Series([], dtype=pl.Utf8), # If subject/object is a quoted triple
45
+ # Provenance columns
46
+ "source": pl.Series([], dtype=pl.Utf8),
47
+ "timestamp": pl.Series([], dtype=pl.Datetime("us", "UTC")),
48
+ "confidence": pl.Series([], dtype=pl.Float64),
49
+ "process": pl.Series([], dtype=pl.Utf8),
50
+ "version": pl.Series([], dtype=pl.Utf8),
51
+ "metadata": pl.Series([], dtype=pl.Utf8), # JSON string
52
+ # Status
53
+ "superseded_by": pl.Series([], dtype=pl.Utf8),
54
+ "deprecated": pl.Series([], dtype=pl.Boolean),
55
+ })
56
+
57
+ def add_triple(
58
+ self,
59
+ subject: str,
60
+ predicate: str,
61
+ obj: Any,
62
+ provenance: ProvenanceContext,
63
+ graph: Optional[str] = None,
64
+ ) -> UUID:
65
+ """
66
+ Add a triple with provenance to the store.
67
+
68
+ Args:
69
+ subject: Subject URI or blank node
70
+ predicate: Predicate URI
71
+ obj: Object (URI, literal, or value)
72
+ provenance: Provenance context for this assertion
73
+ graph: Optional named graph
74
+
75
+ Returns:
76
+ UUID of the created assertion
77
+ """
78
+ assertion_id = uuid4()
79
+
80
+ # Determine object type
81
+ if isinstance(obj, str) and obj.startswith("http"):
82
+ obj_type = "uri"
83
+ elif isinstance(obj, str):
84
+ obj_type = "literal"
85
+ elif isinstance(obj, bool):
86
+ obj_type = "bool"
87
+ elif isinstance(obj, int):
88
+ obj_type = "int"
89
+ elif isinstance(obj, float):
90
+ obj_type = "float"
91
+ else:
92
+ obj_type = "literal"
93
+ obj = str(obj)
94
+
95
+ # Create new row
96
+ new_row = pl.DataFrame({
97
+ "assertion_id": [str(assertion_id)],
98
+ "subject": [subject],
99
+ "predicate": [predicate],
100
+ "object": [str(obj)],
101
+ "object_type": [obj_type],
102
+ "graph": [graph],
103
+ "quoted_triple_id": [None],
104
+ "source": [provenance.source],
105
+ "timestamp": [provenance.timestamp],
106
+ "confidence": [provenance.confidence],
107
+ "process": [provenance.process],
108
+ "version": [provenance.version],
109
+ "metadata": [str(provenance.metadata)],
110
+ "superseded_by": [None],
111
+ "deprecated": [False],
112
+ })
113
+
114
+ # Append to main dataframe
115
+ self._df = pl.concat([self._df, new_row], how="vertical")
116
+
117
+ return assertion_id
118
+
119
+ def add_assertion(self, assertion: Assertion) -> UUID:
120
+ """Add a complete assertion object to the store."""
121
+ return self.add_triple(
122
+ subject=assertion.triple.subject,
123
+ predicate=assertion.triple.predicate,
124
+ obj=assertion.triple.object,
125
+ provenance=assertion.provenance,
126
+ graph=assertion.triple.graph,
127
+ )
128
+
129
+ def add_triples_batch(
130
+ self,
131
+ triples: list[dict],
132
+ ) -> int:
133
+ """
134
+ Add multiple triples in a single batch operation.
135
+
136
+ This is MUCH faster than calling add_triple() repeatedly because:
137
+ - Single DataFrame concatenation instead of N concatenations
138
+ - Batch UUID generation
139
+ - No intermediate DataFrame creation
140
+
141
+ Args:
142
+ triples: List of dicts with keys:
143
+ - subject: str
144
+ - predicate: str
145
+ - object: Any
146
+ - source: str
147
+ - confidence: float (optional, default 1.0)
148
+ - process: str (optional)
149
+ - timestamp: datetime (optional)
150
+ - graph: str (optional)
151
+
152
+ Returns:
153
+ Number of triples added
154
+ """
155
+ if not triples:
156
+ return 0
157
+
158
+ # Prepare batch data
159
+ rows = {
160
+ "assertion_id": [],
161
+ "subject": [],
162
+ "predicate": [],
163
+ "object": [],
164
+ "object_type": [],
165
+ "graph": [],
166
+ "quoted_triple_id": [],
167
+ "source": [],
168
+ "timestamp": [],
169
+ "confidence": [],
170
+ "process": [],
171
+ "version": [],
172
+ "metadata": [],
173
+ "superseded_by": [],
174
+ "deprecated": [],
175
+ }
176
+
177
+ now = datetime.now()
178
+
179
+ for t in triples:
180
+ obj = t.get("object", "")
181
+
182
+ # Determine object type
183
+ if isinstance(obj, str) and obj.startswith("http"):
184
+ obj_type = "uri"
185
+ elif isinstance(obj, str):
186
+ obj_type = "literal"
187
+ elif isinstance(obj, bool):
188
+ obj_type = "bool"
189
+ elif isinstance(obj, int):
190
+ obj_type = "int"
191
+ elif isinstance(obj, float):
192
+ obj_type = "float"
193
+ else:
194
+ obj_type = "literal"
195
+ obj = str(obj)
196
+
197
+ rows["assertion_id"].append(str(uuid4()))
198
+ rows["subject"].append(t["subject"])
199
+ rows["predicate"].append(t["predicate"])
200
+ rows["object"].append(str(obj))
201
+ rows["object_type"].append(obj_type)
202
+ rows["graph"].append(t.get("graph"))
203
+ rows["quoted_triple_id"].append(None)
204
+ rows["source"].append(t.get("source", "unknown"))
205
+ rows["timestamp"].append(t.get("timestamp", now))
206
+ rows["confidence"].append(t.get("confidence", 1.0))
207
+ rows["process"].append(t.get("process"))
208
+ rows["version"].append(t.get("version"))
209
+ rows["metadata"].append(str(t.get("metadata", {})))
210
+ rows["superseded_by"].append(None)
211
+ rows["deprecated"].append(False)
212
+
213
+ # Create batch DataFrame
214
+ batch_df = pl.DataFrame(rows)
215
+
216
+ # Single concatenation
217
+ self._df = pl.concat([self._df, batch_df], how="vertical")
218
+
219
+ return len(triples)
220
+
221
+ def get_triples(
222
+ self,
223
+ subject: Optional[str] = None,
224
+ predicate: Optional[str] = None,
225
+ obj: Optional[str] = None,
226
+ graph: Optional[str] = None,
227
+ source: Optional[str] = None,
228
+ min_confidence: float = 0.0,
229
+ include_deprecated: bool = False,
230
+ ) -> pl.DataFrame:
231
+ """
232
+ Query triples with optional filters.
233
+
234
+ This is a basic pattern matching query - the foundation of SPARQL.
235
+ Uses Polars' lazy evaluation for optimization.
236
+
237
+ Args:
238
+ subject: Filter by subject (None = wildcard)
239
+ predicate: Filter by predicate (None = wildcard)
240
+ obj: Filter by object (None = wildcard)
241
+ graph: Filter by graph (None = wildcard)
242
+ source: Filter by provenance source
243
+ min_confidence: Minimum confidence threshold
244
+ include_deprecated: Whether to include deprecated assertions
245
+
246
+ Returns:
247
+ Filtered DataFrame of matching assertions
248
+ """
249
+ df = self._df.lazy()
250
+
251
+ # Apply filters
252
+ if subject is not None:
253
+ df = df.filter(pl.col("subject") == subject)
254
+ if predicate is not None:
255
+ df = df.filter(pl.col("predicate") == predicate)
256
+ if obj is not None:
257
+ df = df.filter(pl.col("object") == str(obj))
258
+ if graph is not None:
259
+ df = df.filter(pl.col("graph") == graph)
260
+ if source is not None:
261
+ df = df.filter(pl.col("source") == source)
262
+
263
+ if min_confidence is not None:
264
+ df = df.filter(pl.col("confidence") >= min_confidence)
265
+
266
+ if not include_deprecated:
267
+ df = df.filter(~pl.col("deprecated"))
268
+
269
+ return df.collect()
270
+
271
+ def get_competing_claims(
272
+ self,
273
+ subject: str,
274
+ predicate: str,
275
+ ) -> pl.DataFrame:
276
+ """
277
+ Find competing assertions about the same subject-predicate pair.
278
+
279
+ This implements the "Competing Claims View" primitive from the manifesto.
280
+
281
+ Returns assertions sorted by confidence (desc) and recency (desc).
282
+ """
283
+ df = self.get_triples(subject=subject, predicate=predicate, include_deprecated=False)
284
+
285
+ # Sort by confidence (descending) then timestamp (descending)
286
+ df = df.sort(["confidence", "timestamp"], descending=[True, True])
287
+
288
+ return df
289
+
290
+ def deprecate_assertion(self, assertion_id: UUID, superseded_by: Optional[UUID] = None) -> None:
291
+ """
292
+ Mark an assertion as deprecated, optionally linking to superseding assertion.
293
+
294
+ Args:
295
+ assertion_id: ID of assertion to deprecate
296
+ superseded_by: Optional ID of the assertion that supersedes this one
297
+ """
298
+ self._df = self._df.with_columns([
299
+ pl.when(pl.col("assertion_id") == str(assertion_id))
300
+ .then(True)
301
+ .otherwise(pl.col("deprecated"))
302
+ .alias("deprecated"),
303
+
304
+ pl.when(pl.col("assertion_id") == str(assertion_id))
305
+ .then(str(superseded_by) if superseded_by else None)
306
+ .otherwise(pl.col("superseded_by"))
307
+ .alias("superseded_by"),
308
+ ])
309
+
310
+ def get_provenance_timeline(self, subject: str, predicate: str) -> pl.DataFrame:
311
+ """
312
+ Get the full history of assertions about a subject-predicate pair.
313
+
314
+ This implements the "Provenance Timeline" primitive from the manifesto.
315
+ Shows the evolution of knowledge over time, including deprecated assertions.
316
+ """
317
+ df = self.get_triples(
318
+ subject=subject,
319
+ predicate=predicate,
320
+ include_deprecated=True
321
+ )
322
+
323
+ # Sort by timestamp
324
+ df = df.sort("timestamp")
325
+
326
+ return df
327
+
328
+ def mark_deleted(
329
+ self,
330
+ s: Optional[str] = None,
331
+ p: Optional[str] = None,
332
+ o: Optional[str] = None
333
+ ) -> int:
334
+ """
335
+ Mark matching triples as deprecated (soft delete).
336
+
337
+ Args:
338
+ s: Subject filter (optional)
339
+ p: Predicate filter (optional)
340
+ o: Object filter (optional)
341
+
342
+ Returns:
343
+ Number of triples marked as deleted
344
+ """
345
+ # Build filter condition
346
+ condition = pl.lit(True)
347
+ if s is not None:
348
+ condition = condition & (pl.col("subject") == s)
349
+ if p is not None:
350
+ condition = condition & (pl.col("predicate") == p)
351
+ if o is not None:
352
+ condition = condition & (pl.col("object") == o)
353
+
354
+ # Count matching rows
355
+ count = self._df.filter(condition & ~pl.col("deprecated")).height
356
+
357
+ # Mark matching rows as deprecated
358
+ self._df = self._df.with_columns([
359
+ pl.when(condition)
360
+ .then(True)
361
+ .otherwise(pl.col("deprecated"))
362
+ .alias("deprecated"),
363
+ ])
364
+
365
+ return count
366
+
367
+ def save(self, path: Path | str) -> None:
368
+ """
369
+ Save the triple store to disk using Parquet format.
370
+
371
+ Parquet is Polars' native format and provides excellent compression
372
+ and query performance.
373
+ """
374
+ path = Path(path)
375
+ path.parent.mkdir(parents=True, exist_ok=True)
376
+ self._df.write_parquet(path)
377
+
378
+ @classmethod
379
+ def load(cls, path: Path | str) -> "TripleStore":
380
+ """
381
+ Load a triple store from disk.
382
+
383
+ Args:
384
+ path: Path to the Parquet file
385
+
386
+ Returns:
387
+ Loaded TripleStore instance
388
+ """
389
+ store = cls()
390
+ store._df = pl.read_parquet(path)
391
+ return store
392
+
393
+ def stats(self) -> dict[str, Any]:
394
+ """Get statistics about the triple store."""
395
+ total = len(self._df)
396
+ active = len(self._df.filter(~pl.col("deprecated")))
397
+ deprecated = total - active
398
+
399
+ sources = self._df.select("source").unique().height
400
+
401
+ return {
402
+ "total_assertions": total,
403
+ "active_assertions": active,
404
+ "deprecated_assertions": deprecated,
405
+ "unique_sources": sources,
406
+ "unique_subjects": self._df.select("subject").unique().height,
407
+ "unique_predicates": self._df.select("predicate").unique().height,
408
+ }
409
+
410
+ def __len__(self) -> int:
411
+ """Return the number of active assertions."""
412
+ return len(self._df.filter(~pl.col("deprecated")))
413
+
414
+ def __repr__(self) -> str:
415
+ stats = self.stats()
416
+ return (
417
+ f"TripleStore("
418
+ f"assertions={stats['active_assertions']}, "
419
+ f"sources={stats['unique_sources']}, "
420
+ f"subjects={stats['unique_subjects']})"
421
+ )
422
+
423
+ # =========================================================================
424
+ # Named Graph Management
425
+ # =========================================================================
426
+
427
+ def list_graphs(self) -> list[str]:
428
+ """
429
+ List all named graphs in the store.
430
+
431
+ Returns:
432
+ List of graph URIs (excluding None/default graph)
433
+ """
434
+ graphs = (
435
+ self._df
436
+ .filter(pl.col("graph").is_not_null() & ~pl.col("deprecated"))
437
+ .select("graph")
438
+ .unique()
439
+ .to_series()
440
+ .to_list()
441
+ )
442
+ return sorted(graphs)
443
+
444
+ def create_graph(self, graph_uri: str) -> bool:
445
+ """
446
+ Create an empty named graph.
447
+
448
+ In RDF-StarBase, graphs are created implicitly when triples are added.
449
+ This method is provided for SPARQL compatibility and returns True
450
+ if the graph didn't exist (was created) or False if it already exists.
451
+
452
+ Args:
453
+ graph_uri: The IRI of the graph to create
454
+
455
+ Returns:
456
+ True if graph was created, False if it already existed
457
+ """
458
+ existing = self._df.filter(
459
+ (pl.col("graph") == graph_uri) & ~pl.col("deprecated")
460
+ ).height
461
+ return existing == 0
462
+
463
+ def drop_graph(self, graph_uri: str, silent: bool = False) -> int:
464
+ """
465
+ Drop (delete) a named graph and all its triples.
466
+
467
+ Args:
468
+ graph_uri: The IRI of the graph to drop
469
+ silent: If True, don't raise error if graph doesn't exist
470
+
471
+ Returns:
472
+ Number of triples removed
473
+ """
474
+ condition = (pl.col("graph") == graph_uri) & ~pl.col("deprecated")
475
+ count = self._df.filter(condition).height
476
+
477
+ if count == 0 and not silent:
478
+ # Graph doesn't exist - in SPARQL, DROP on non-existent graph is fine
479
+ return 0
480
+
481
+ # Mark all triples in the graph as deprecated
482
+ self._df = self._df.with_columns([
483
+ pl.when(condition)
484
+ .then(True)
485
+ .otherwise(pl.col("deprecated"))
486
+ .alias("deprecated"),
487
+ ])
488
+
489
+ return count
490
+
491
+ def clear_graph(self, graph_uri: Optional[str] = None, silent: bool = False) -> int:
492
+ """
493
+ Clear all triples from a graph (or default graph if None).
494
+
495
+ Unlike DROP, CLEAR keeps the graph existing but empty.
496
+ For the default graph (None), removes all triples not in named graphs.
497
+
498
+ Args:
499
+ graph_uri: The IRI of the graph to clear, or None for default graph
500
+ silent: If True, don't raise error if graph doesn't exist
501
+
502
+ Returns:
503
+ Number of triples removed
504
+ """
505
+ if graph_uri is None:
506
+ # Clear default graph (where graph column is null)
507
+ condition = pl.col("graph").is_null() & ~pl.col("deprecated")
508
+ else:
509
+ condition = (pl.col("graph") == graph_uri) & ~pl.col("deprecated")
510
+
511
+ count = self._df.filter(condition).height
512
+
513
+ # Mark matching triples as deprecated
514
+ self._df = self._df.with_columns([
515
+ pl.when(condition)
516
+ .then(True)
517
+ .otherwise(pl.col("deprecated"))
518
+ .alias("deprecated"),
519
+ ])
520
+
521
+ return count
522
+
523
+ def copy_graph(
524
+ self,
525
+ source_graph: Optional[str],
526
+ dest_graph: str,
527
+ silent: bool = False,
528
+ ) -> int:
529
+ """
530
+ Copy all triples from source graph to destination graph.
531
+
532
+ The destination graph is cleared first, then populated with
533
+ copies of all triples from the source graph.
534
+
535
+ Args:
536
+ source_graph: Source graph IRI (None for default graph)
537
+ dest_graph: Destination graph IRI
538
+ silent: If True, don't fail if source doesn't exist
539
+
540
+ Returns:
541
+ Number of triples copied
542
+ """
543
+ # Clear destination first
544
+ self.clear_graph(dest_graph, silent=True)
545
+
546
+ # Get source triples
547
+ if source_graph is None:
548
+ source_df = self._df.filter(
549
+ pl.col("graph").is_null() & ~pl.col("deprecated")
550
+ )
551
+ else:
552
+ source_df = self._df.filter(
553
+ (pl.col("graph") == source_graph) & ~pl.col("deprecated")
554
+ )
555
+
556
+ if source_df.height == 0:
557
+ return 0
558
+
559
+ # Create copies with new assertion IDs and target graph
560
+ from uuid import uuid4
561
+
562
+ new_rows = source_df.with_columns([
563
+ pl.lit(str(uuid4())).alias("assertion_id"),
564
+ pl.lit(dest_graph).alias("graph"),
565
+ pl.lit(datetime.now(timezone.utc)).alias("timestamp"),
566
+ ])
567
+
568
+ self._df = pl.concat([self._df, new_rows])
569
+ return new_rows.height
570
+
571
+ def move_graph(
572
+ self,
573
+ source_graph: Optional[str],
574
+ dest_graph: str,
575
+ silent: bool = False,
576
+ ) -> int:
577
+ """
578
+ Move all triples from source graph to destination graph.
579
+
580
+ Like COPY but also removes triples from source graph.
581
+
582
+ Args:
583
+ source_graph: Source graph IRI (None for default graph)
584
+ dest_graph: Destination graph IRI
585
+ silent: If True, don't fail if source doesn't exist
586
+
587
+ Returns:
588
+ Number of triples moved
589
+ """
590
+ count = self.copy_graph(source_graph, dest_graph, silent)
591
+
592
+ # Clear source
593
+ if source_graph is None:
594
+ self.clear_graph(None, silent=True)
595
+ else:
596
+ self.clear_graph(source_graph, silent=True)
597
+
598
+ return count
599
+
600
+ def add_graph(
601
+ self,
602
+ source_graph: Optional[str],
603
+ dest_graph: str,
604
+ silent: bool = False,
605
+ ) -> int:
606
+ """
607
+ Add all triples from source graph to destination graph.
608
+
609
+ Unlike COPY, doesn't clear destination first - adds to existing triples.
610
+
611
+ Args:
612
+ source_graph: Source graph IRI (None for default graph)
613
+ dest_graph: Destination graph IRI
614
+ silent: If True, don't fail if source doesn't exist
615
+
616
+ Returns:
617
+ Number of triples added
618
+ """
619
+ # Get source triples
620
+ if source_graph is None:
621
+ source_df = self._df.filter(
622
+ pl.col("graph").is_null() & ~pl.col("deprecated")
623
+ )
624
+ else:
625
+ source_df = self._df.filter(
626
+ (pl.col("graph") == source_graph) & ~pl.col("deprecated")
627
+ )
628
+
629
+ if source_df.height == 0:
630
+ return 0
631
+
632
+ # Create copies with new assertion IDs and target graph
633
+ from uuid import uuid4
634
+
635
+ new_rows = source_df.with_columns([
636
+ pl.lit(str(uuid4())).alias("assertion_id"),
637
+ pl.lit(dest_graph).alias("graph"),
638
+ pl.lit(datetime.now(timezone.utc)).alias("timestamp"),
639
+ ])
640
+
641
+ self._df = pl.concat([self._df, new_rows])
642
+ return new_rows.height
643
+
644
+ def load_graph(
645
+ self,
646
+ source_uri: str,
647
+ graph_uri: Optional[str] = None,
648
+ silent: bool = False,
649
+ ) -> int:
650
+ """
651
+ Load RDF data from a URI into a graph.
652
+
653
+ Supports loading from:
654
+ - Local files (file:// or plain paths)
655
+ - HTTP/HTTPS URLs
656
+ - Formats: Turtle, N-Triples, RDF/XML, JSON-LD (auto-detected)
657
+
658
+ Args:
659
+ source_uri: URI to load data from
660
+ graph_uri: Target graph (None for default graph)
661
+ silent: If True, don't fail on errors
662
+
663
+ Returns:
664
+ Number of triples loaded
665
+ """
666
+ from pathlib import Path
667
+ from urllib.parse import urlparse, unquote
668
+ from rdf_starbase.models import ProvenanceContext
669
+
670
+ # Determine file path
671
+ if source_uri.startswith("file://"):
672
+ # Properly parse file:// URI
673
+ parsed = urlparse(source_uri)
674
+ # unquote handles percent-encoded characters
675
+ file_path_str = unquote(parsed.path)
676
+ # On Windows, file:///C:/path becomes /C:/path, remove leading /
677
+ if len(file_path_str) > 2 and file_path_str[0] == '/' and file_path_str[2] == ':':
678
+ file_path_str = file_path_str[1:]
679
+ file_path = Path(file_path_str)
680
+ elif source_uri.startswith(("http://", "https://")):
681
+ # Download to temp file
682
+ import tempfile
683
+ import urllib.request
684
+ try:
685
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".ttl") as f:
686
+ urllib.request.urlretrieve(source_uri, f.name)
687
+ file_path = Path(f.name)
688
+ except Exception as e:
689
+ if silent:
690
+ return 0
691
+ raise ValueError(f"Failed to download {source_uri}: {e}")
692
+ else:
693
+ file_path = Path(source_uri)
694
+
695
+ if not file_path.exists():
696
+ if silent:
697
+ return 0
698
+ raise FileNotFoundError(f"Source file not found: {file_path}")
699
+
700
+ # Determine format from extension
701
+ suffix = file_path.suffix.lower()
702
+
703
+ try:
704
+ if suffix in (".ttl", ".turtle"):
705
+ from rdf_starbase.formats.turtle import parse_turtle
706
+ parsed = parse_turtle(file_path.read_text())
707
+ triples = parsed.triples
708
+ elif suffix in (".nt", ".ntriples"):
709
+ from rdf_starbase.formats.ntriples import parse_ntriples
710
+ parsed = parse_ntriples(file_path.read_text())
711
+ triples = parsed.triples
712
+ elif suffix in (".rdf", ".xml"):
713
+ from rdf_starbase.formats.rdfxml import parse_rdfxml
714
+ parsed = parse_rdfxml(file_path.read_text())
715
+ triples = parsed.triples
716
+ elif suffix in (".jsonld", ".json"):
717
+ from rdf_starbase.formats.jsonld import parse_jsonld
718
+ parsed = parse_jsonld(file_path.read_text())
719
+ triples = parsed.triples
720
+ else:
721
+ # Default to Turtle
722
+ from rdf_starbase.formats.turtle import parse_turtle
723
+ parsed = parse_turtle(file_path.read_text())
724
+ triples = parsed.triples
725
+ except Exception as e:
726
+ if silent:
727
+ return 0
728
+ raise ValueError(f"Failed to parse {file_path}: {e}")
729
+
730
+ # Add triples to the graph
731
+ prov = ProvenanceContext(
732
+ source=source_uri,
733
+ confidence=1.0,
734
+ process="LOAD",
735
+ )
736
+
737
+ count = 0
738
+ for triple in triples:
739
+ self.add_triple(
740
+ subject=triple.subject,
741
+ predicate=triple.predicate,
742
+ obj=triple.object,
743
+ provenance=prov,
744
+ graph=graph_uri,
745
+ )
746
+ count += 1
747
+
748
+ return count