rdf-starbase 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ """
2
+ RDF Format Parsers and Serializers.
3
+
4
+ Supports:
5
+ - Turtle (.ttl) with Turtle-Star extensions
6
+ - N-Triples (.nt)
7
+ - JSON-LD (.jsonld)
8
+ - RDF/XML (.rdf, .xml)
9
+ """
10
+
11
+ from rdf_starbase.formats.turtle import TurtleParser, TurtleSerializer
12
+ from rdf_starbase.formats.ntriples import NTriplesParser, NTriplesSerializer
13
+ from rdf_starbase.formats.jsonld import JSONLDParser, JSONLDSerializer, parse_jsonld, serialize_jsonld
14
+ from rdf_starbase.formats.rdfxml import RDFXMLParser, RDFXMLSerializer, parse_rdfxml, serialize_rdfxml
15
+
16
+ __all__ = [
17
+ "TurtleParser",
18
+ "TurtleSerializer",
19
+ "NTriplesParser",
20
+ "NTriplesSerializer",
21
+ "JSONLDParser",
22
+ "JSONLDSerializer",
23
+ "parse_jsonld",
24
+ "serialize_jsonld",
25
+ "RDFXMLParser",
26
+ "RDFXMLSerializer",
27
+ "parse_rdfxml",
28
+ "serialize_rdfxml",
29
+ ]
@@ -0,0 +1,488 @@
1
+ """
2
+ JSON-LD parser and serializer for RDF-StarBase.
3
+
4
+ JSON-LD (JSON for Linked Data) is a W3C standard for embedding RDF
5
+ data in JSON. It's web-friendly and widely used for structured data
6
+ on the web (Schema.org, etc.).
7
+
8
+ Key features:
9
+ - @context: Defines prefixes and term mappings
10
+ - @id: Identifies resources (IRIs)
11
+ - @type: Specifies rdf:type
12
+ - @value/@language/@type: Literal representation
13
+ - @graph: Named graphs
14
+ - Quoted triples via annotation syntax for RDF-Star
15
+
16
+ Example JSON-LD:
17
+ {
18
+ "@context": {
19
+ "foaf": "http://xmlns.com/foaf/0.1/",
20
+ "name": "foaf:name",
21
+ "knows": {"@id": "foaf:knows", "@type": "@id"}
22
+ },
23
+ "@id": "http://example.org/alice",
24
+ "@type": "foaf:Person",
25
+ "name": "Alice",
26
+ "knows": "http://example.org/bob"
27
+ }
28
+ """
29
+
30
+ import json
31
+ from dataclasses import dataclass, field
32
+ from typing import Any, Optional, Union
33
+ import re
34
+
35
+
36
+ @dataclass
37
+ class Triple:
38
+ """A simple triple representation for parsing."""
39
+ subject: str
40
+ predicate: str
41
+ object: str
42
+ subject_triple: Optional["Triple"] = None
43
+ object_triple: Optional["Triple"] = None
44
+
45
+
46
+ @dataclass
47
+ class JSONLDDocument:
48
+ """Parsed JSON-LD document."""
49
+ context: dict[str, Any] = field(default_factory=dict)
50
+ triples: list[Triple] = field(default_factory=list)
51
+ base: Optional[str] = None
52
+
53
+
54
+ # Standard prefixes
55
+ STANDARD_PREFIXES = {
56
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
57
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
58
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
59
+ "owl": "http://www.w3.org/2002/07/owl#",
60
+ "foaf": "http://xmlns.com/foaf/0.1/",
61
+ "dc": "http://purl.org/dc/elements/1.1/",
62
+ "dcterms": "http://purl.org/dc/terms/",
63
+ "schema": "http://schema.org/",
64
+ }
65
+
66
+
67
+ class JSONLDParser:
68
+ """
69
+ Parser for JSON-LD documents.
70
+
71
+ Supports:
72
+ - @context with prefix definitions
73
+ - @id for resource identification
74
+ - @type for rdf:type
75
+ - @value, @language, @type for literals
76
+ - @list for RDF lists
77
+ - Nested objects
78
+ - RDF-Star annotation syntax (experimental)
79
+ """
80
+
81
+ def __init__(self):
82
+ self.context = {}
83
+ self.base = None
84
+ self.triples = []
85
+ self.blank_counter = 0
86
+
87
+ def parse(self, source: Union[str, dict]) -> JSONLDDocument:
88
+ """Parse JSON-LD from string or dict."""
89
+ if isinstance(source, str):
90
+ data = json.loads(source)
91
+ else:
92
+ data = source
93
+
94
+ self.context = {}
95
+ self.base = None
96
+ self.triples = []
97
+ self.blank_counter = 0
98
+
99
+ # Process @context
100
+ if "@context" in data:
101
+ self._process_context(data["@context"])
102
+
103
+ # Process @base
104
+ if "@base" in data:
105
+ self.base = data["@base"]
106
+
107
+ # Process the document
108
+ if "@graph" in data:
109
+ # Multiple resources in @graph
110
+ for item in data["@graph"]:
111
+ self._process_node(item)
112
+ else:
113
+ # Single resource at top level
114
+ self._process_node(data)
115
+
116
+ return JSONLDDocument(
117
+ context=self.context.copy(),
118
+ triples=self.triples.copy(),
119
+ base=self.base
120
+ )
121
+
122
+ def _process_context(self, ctx: Union[str, dict, list]):
123
+ """Process @context to extract prefixes and term mappings."""
124
+ if isinstance(ctx, str):
125
+ # Remote context URL - not fully supported yet
126
+ return
127
+
128
+ if isinstance(ctx, list):
129
+ for item in ctx:
130
+ self._process_context(item)
131
+ return
132
+
133
+ if isinstance(ctx, dict):
134
+ for key, value in ctx.items():
135
+ if key.startswith("@"):
136
+ continue # Skip keywords
137
+
138
+ if isinstance(value, str):
139
+ # Simple prefix: "foaf": "http://..."
140
+ self.context[key] = {"@id": value}
141
+ elif isinstance(value, dict):
142
+ # Complex term: "name": {"@id": "foaf:name", "@type": "@id"}
143
+ self.context[key] = value
144
+
145
+ def _expand_iri(self, value: str) -> str:
146
+ """Expand a compact IRI or term to full IRI."""
147
+ if not value or value.startswith("@"):
148
+ return value
149
+
150
+ # Already a full IRI
151
+ if value.startswith("http://") or value.startswith("https://") or value.startswith("urn:"):
152
+ return value
153
+
154
+ # Check for term mapping first (e.g., "name" -> "foaf:name")
155
+ if value in self.context:
156
+ term = self.context[value]
157
+ if isinstance(term, dict):
158
+ expanded = term.get("@id", value)
159
+ else:
160
+ expanded = str(term)
161
+ # Recursively expand in case it's a prefixed name
162
+ if expanded != value:
163
+ return self._expand_iri(expanded)
164
+ return expanded
165
+
166
+ # Check for prefix:localName
167
+ if ":" in value:
168
+ prefix, local = value.split(":", 1)
169
+ if prefix in self.context:
170
+ term = self.context[prefix]
171
+ if isinstance(term, dict):
172
+ base = term.get("@id", "")
173
+ else:
174
+ base = str(term)
175
+ return base + local
176
+ if prefix in STANDARD_PREFIXES:
177
+ return STANDARD_PREFIXES[prefix] + local
178
+
179
+ # Apply base if available
180
+ if self.base and not value.startswith("_:"):
181
+ return self.base + value
182
+
183
+ return value
184
+
185
+ def _gen_blank_node(self) -> str:
186
+ """Generate a new blank node ID."""
187
+ self.blank_counter += 1
188
+ return f"_:b{self.blank_counter}"
189
+
190
+ def _process_node(self, node: dict, subject: Optional[str] = None) -> str:
191
+ """Process a JSON-LD node and extract triples."""
192
+ if not isinstance(node, dict):
193
+ return str(node)
194
+
195
+ # Get or generate subject
196
+ if "@id" in node:
197
+ subject = self._expand_iri(node["@id"])
198
+ elif subject is None:
199
+ subject = self._gen_blank_node()
200
+
201
+ for key, value in node.items():
202
+ if key.startswith("@"):
203
+ if key == "@type":
204
+ # Handle @type
205
+ self._process_type(subject, value)
206
+ # Skip other keywords
207
+ continue
208
+
209
+ # Get predicate
210
+ predicate = self._expand_iri(key)
211
+
212
+ # Get term definition for type coercion
213
+ term_def = self.context.get(key, {})
214
+ if isinstance(term_def, str):
215
+ term_def = {"@id": term_def}
216
+
217
+ # Process value(s)
218
+ if isinstance(value, list):
219
+ for item in value:
220
+ self._process_value(subject, predicate, item, term_def)
221
+ else:
222
+ self._process_value(subject, predicate, value, term_def)
223
+
224
+ return subject
225
+
226
+ def _process_type(self, subject: str, type_value: Union[str, list]):
227
+ """Process @type values."""
228
+ rdf_type = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
229
+
230
+ if isinstance(type_value, list):
231
+ for t in type_value:
232
+ type_iri = self._expand_iri(t)
233
+ self.triples.append(Triple(subject, rdf_type, type_iri))
234
+ else:
235
+ type_iri = self._expand_iri(type_value)
236
+ self.triples.append(Triple(subject, rdf_type, type_iri))
237
+
238
+ def _process_value(self, subject: str, predicate: str, value: Any, term_def: dict):
239
+ """Process a property value and create triples."""
240
+ type_coercion = term_def.get("@type")
241
+
242
+ if isinstance(value, dict):
243
+ # Check for value object
244
+ if "@value" in value:
245
+ obj = self._make_literal(value)
246
+ elif "@list" in value:
247
+ obj = self._process_list(value["@list"])
248
+ elif "@set" in value:
249
+ # @set is just syntactic sugar - process items
250
+ for item in value["@set"]:
251
+ self._process_value(subject, predicate, item, term_def)
252
+ return
253
+ else:
254
+ # Nested node
255
+ obj = self._process_node(value)
256
+ elif isinstance(value, bool):
257
+ obj = f'"{str(value).lower()}"^^<http://www.w3.org/2001/XMLSchema#boolean>'
258
+ elif isinstance(value, int):
259
+ obj = f'"{value}"^^<http://www.w3.org/2001/XMLSchema#integer>'
260
+ elif isinstance(value, float):
261
+ obj = f'"{value}"^^<http://www.w3.org/2001/XMLSchema#double>'
262
+ elif type_coercion == "@id":
263
+ # IRI reference
264
+ obj = self._expand_iri(value)
265
+ else:
266
+ # String literal
267
+ obj = f'"{self._escape_string(value)}"'
268
+
269
+ self.triples.append(Triple(subject, predicate, obj))
270
+
271
+ def _make_literal(self, value_obj: dict) -> str:
272
+ """Create a literal from a value object."""
273
+ val = value_obj["@value"]
274
+ escaped = self._escape_string(str(val))
275
+
276
+ if "@language" in value_obj:
277
+ return f'"{escaped}"@{value_obj["@language"]}'
278
+ elif "@type" in value_obj:
279
+ datatype = self._expand_iri(value_obj["@type"])
280
+ return f'"{escaped}"^^<{datatype}>'
281
+ else:
282
+ return f'"{escaped}"'
283
+
284
+ def _process_list(self, items: list) -> str:
285
+ """Process an @list and create RDF list structure."""
286
+ rdf_first = "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"
287
+ rdf_rest = "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"
288
+ rdf_nil = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"
289
+
290
+ if not items:
291
+ return rdf_nil
292
+
293
+ head = self._gen_blank_node()
294
+ current = head
295
+
296
+ for i, item in enumerate(items):
297
+ # Process the item
298
+ if isinstance(item, dict):
299
+ if "@value" in item:
300
+ item_value = self._make_literal(item)
301
+ else:
302
+ item_value = self._process_node(item)
303
+ elif isinstance(item, str):
304
+ item_value = f'"{self._escape_string(item)}"'
305
+ else:
306
+ item_value = str(item)
307
+
308
+ self.triples.append(Triple(current, rdf_first, item_value))
309
+
310
+ if i < len(items) - 1:
311
+ next_node = self._gen_blank_node()
312
+ self.triples.append(Triple(current, rdf_rest, next_node))
313
+ current = next_node
314
+ else:
315
+ self.triples.append(Triple(current, rdf_rest, rdf_nil))
316
+
317
+ return head
318
+
319
+ def _escape_string(self, s: str) -> str:
320
+ """Escape special characters in a string."""
321
+ s = s.replace("\\", "\\\\")
322
+ s = s.replace('"', '\\"')
323
+ s = s.replace("\n", "\\n")
324
+ s = s.replace("\r", "\\r")
325
+ s = s.replace("\t", "\\t")
326
+ return s
327
+
328
+
329
+ class JSONLDSerializer:
330
+ """
331
+ Serializer for JSON-LD output.
332
+
333
+ Converts triples to compact JSON-LD format.
334
+ """
335
+
336
+ def __init__(self, context: Optional[dict] = None):
337
+ self.context = context or {}
338
+ self.inverse_context = {}
339
+ self._build_inverse_context()
340
+
341
+ def _build_inverse_context(self):
342
+ """Build inverse mappings for compaction."""
343
+ for term, value in self.context.items():
344
+ if isinstance(value, str):
345
+ self.inverse_context[value] = term
346
+ elif isinstance(value, dict) and "@id" in value:
347
+ self.inverse_context[value["@id"]] = term
348
+
349
+ def serialize(self, triples: list[Triple], pretty: bool = True) -> str:
350
+ """Serialize triples to JSON-LD."""
351
+ # Group triples by subject
352
+ subjects = {}
353
+ for triple in triples:
354
+ if triple.subject not in subjects:
355
+ subjects[triple.subject] = []
356
+ subjects[triple.subject].append(triple)
357
+
358
+ # Build JSON-LD nodes
359
+ nodes = []
360
+ for subject, subject_triples in subjects.items():
361
+ node = self._build_node(subject, subject_triples)
362
+ nodes.append(node)
363
+
364
+ # Build result
365
+ if len(nodes) == 1 and not self.context:
366
+ result = nodes[0]
367
+ elif len(nodes) == 1:
368
+ result = {"@context": self._build_context(), **nodes[0]}
369
+ else:
370
+ result = {
371
+ "@context": self._build_context(),
372
+ "@graph": nodes
373
+ }
374
+
375
+ if pretty:
376
+ return json.dumps(result, indent=2, ensure_ascii=False)
377
+ return json.dumps(result, ensure_ascii=False)
378
+
379
+ def _build_context(self) -> dict:
380
+ """Build @context for output."""
381
+ ctx = {}
382
+ for term, value in self.context.items():
383
+ ctx[term] = value
384
+ return ctx
385
+
386
+ def _build_node(self, subject: str, triples: list[Triple]) -> dict:
387
+ """Build a JSON-LD node from triples."""
388
+ node = {}
389
+
390
+ if not subject.startswith("_:"):
391
+ node["@id"] = self._compact_iri(subject)
392
+
393
+ rdf_type = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
394
+
395
+ for triple in triples:
396
+ pred = triple.predicate
397
+ obj = triple.object
398
+
399
+ if pred == rdf_type:
400
+ # Handle @type
401
+ types = node.setdefault("@type", [])
402
+ types.append(self._compact_iri(obj))
403
+ else:
404
+ # Regular property
405
+ key = self._compact_iri(pred)
406
+ value = self._compact_value(obj)
407
+
408
+ if key in node:
409
+ # Multiple values - make array
410
+ if not isinstance(node[key], list):
411
+ node[key] = [node[key]]
412
+ node[key].append(value)
413
+ else:
414
+ node[key] = value
415
+
416
+ # Simplify single @type
417
+ if "@type" in node and len(node["@type"]) == 1:
418
+ node["@type"] = node["@type"][0]
419
+
420
+ return node
421
+
422
+ def _compact_iri(self, iri: str) -> str:
423
+ """Compact an IRI using context."""
424
+ if iri in self.inverse_context:
425
+ return self.inverse_context[iri]
426
+
427
+ # Try to find matching prefix
428
+ for prefix, ns in STANDARD_PREFIXES.items():
429
+ if iri.startswith(ns):
430
+ local = iri[len(ns):]
431
+ return f"{prefix}:{local}"
432
+
433
+ return iri
434
+
435
+ def _compact_value(self, value: str) -> Any:
436
+ """Compact an object value."""
437
+ # Check for literal
438
+ if value.startswith('"'):
439
+ return self._parse_literal(value)
440
+
441
+ # IRI
442
+ return self._compact_iri(value)
443
+
444
+ def _parse_literal(self, lit: str) -> Any:
445
+ """Parse a literal string."""
446
+ # Extract value, language, datatype
447
+ match = re.match(r'"((?:[^"\\]|\\.)*)"\s*(?:@([a-z-]+)|(?:\^\^<([^>]+)>))?', lit, re.I)
448
+ if not match:
449
+ return lit
450
+
451
+ value = match.group(1)
452
+ value = value.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r")
453
+ value = value.replace('\\"', '"').replace("\\\\", "\\")
454
+
455
+ lang = match.group(2)
456
+ datatype = match.group(3)
457
+
458
+ if lang:
459
+ return {"@value": value, "@language": lang}
460
+
461
+ if datatype:
462
+ xsd = "http://www.w3.org/2001/XMLSchema#"
463
+ if datatype == f"{xsd}integer":
464
+ return int(value)
465
+ elif datatype == f"{xsd}double" or datatype == f"{xsd}decimal":
466
+ return float(value)
467
+ elif datatype == f"{xsd}boolean":
468
+ return value.lower() == "true"
469
+ else:
470
+ return {"@value": value, "@type": self._compact_iri(datatype)}
471
+
472
+ return value
473
+
474
+
475
+ def parse_jsonld(source: Union[str, dict]) -> JSONLDDocument:
476
+ """Convenience function to parse JSON-LD."""
477
+ parser = JSONLDParser()
478
+ return parser.parse(source)
479
+
480
+
481
+ def serialize_jsonld(
482
+ triples: list[Triple],
483
+ context: Optional[dict] = None,
484
+ pretty: bool = True
485
+ ) -> str:
486
+ """Convenience function to serialize to JSON-LD."""
487
+ serializer = JSONLDSerializer(context)
488
+ return serializer.serialize(triples, pretty)