pyjelly 0.7.1__cp311-cp311-macosx_11_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so +0 -0
  2. pyjelly/__init__.py +0 -0
  3. pyjelly/_proto/grpc.proto +33 -0
  4. pyjelly/_proto/patch.proto +165 -0
  5. pyjelly/_proto/rdf.proto +384 -0
  6. pyjelly/errors.py +10 -0
  7. pyjelly/integrations/__init__.py +0 -0
  8. pyjelly/integrations/generic/__init__.py +0 -0
  9. pyjelly/integrations/generic/generic_sink.py +202 -0
  10. pyjelly/integrations/generic/parse.py +412 -0
  11. pyjelly/integrations/generic/serialize.cpython-311-darwin.so +0 -0
  12. pyjelly/integrations/generic/serialize.py +402 -0
  13. pyjelly/integrations/rdflib/__init__.py +24 -0
  14. pyjelly/integrations/rdflib/parse.py +560 -0
  15. pyjelly/integrations/rdflib/serialize.py +408 -0
  16. pyjelly/jelly/__init__.py +5 -0
  17. pyjelly/jelly/rdf_pb2.py +70 -0
  18. pyjelly/jelly/rdf_pb2.pyi +231 -0
  19. pyjelly/options.py +141 -0
  20. pyjelly/parse/__init__.py +0 -0
  21. pyjelly/parse/decode.cpython-311-darwin.so +0 -0
  22. pyjelly/parse/decode.py +447 -0
  23. pyjelly/parse/ioutils.cpython-311-darwin.so +0 -0
  24. pyjelly/parse/ioutils.py +115 -0
  25. pyjelly/parse/lookup.cpython-311-darwin.so +0 -0
  26. pyjelly/parse/lookup.py +70 -0
  27. pyjelly/serialize/__init__.py +0 -0
  28. pyjelly/serialize/encode.cpython-311-darwin.so +0 -0
  29. pyjelly/serialize/encode.py +397 -0
  30. pyjelly/serialize/flows.py +196 -0
  31. pyjelly/serialize/ioutils.cpython-311-darwin.so +0 -0
  32. pyjelly/serialize/ioutils.py +13 -0
  33. pyjelly/serialize/lookup.cpython-311-darwin.so +0 -0
  34. pyjelly/serialize/lookup.py +137 -0
  35. pyjelly/serialize/streams.cpython-311-darwin.so +0 -0
  36. pyjelly/serialize/streams.py +281 -0
  37. pyjelly-0.7.1.dist-info/METADATA +114 -0
  38. pyjelly-0.7.1.dist-info/RECORD +41 -0
  39. pyjelly-0.7.1.dist-info/WHEEL +6 -0
  40. pyjelly-0.7.1.dist-info/entry_points.txt +7 -0
  41. pyjelly-0.7.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,560 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Generator, Iterable, MutableMapping
4
+ from contextvars import ContextVar
5
+ from itertools import chain
6
+ from typing import IO, Any, TypeAlias, cast
7
+ from typing_extensions import Never, Self, override
8
+
9
+ import rdflib
10
+ from rdflib import BNode, Node, URIRef
11
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
12
+ from rdflib.parser import InputSource
13
+ from rdflib.parser import Parser as RDFLibParser
14
+
15
+ from pyjelly import jelly
16
+ from pyjelly.errors import JellyConformanceError
17
+ from pyjelly.options import StreamTypes
18
+ from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
19
+ from pyjelly.parse.ioutils import get_options_and_frames
20
+
21
+ GraphName: TypeAlias = URIRef | BNode | str
22
+
23
+
24
+ class Triple(tuple[Node, Node, Node]):
25
+ """
26
+ Describe RDFLib triple.
27
+
28
+ Args:
29
+ tuple (Node, Node, Node): s/p/o tuple of RDFLib Nodes.
30
+
31
+ Returns:
32
+ Triple: triple as tuple.
33
+
34
+ """
35
+
36
+ __slots__ = ()
37
+
38
+ def __new__(cls, s: Node, p: Node, o: Node) -> Self:
39
+ return tuple.__new__(cls, (s, p, o))
40
+
41
+ @property
42
+ def s(self) -> Node:
43
+ return self[0]
44
+
45
+ @property
46
+ def p(self) -> Node:
47
+ return self[1]
48
+
49
+ @property
50
+ def o(self) -> Node:
51
+ return self[2]
52
+
53
+
54
+ class Quad(tuple[Node, Node, Node, GraphName]):
55
+ """
56
+ Describe RDFLib quad.
57
+
58
+ Args:
59
+ tuple (Node, Node, Node, GraphName):
60
+ s/p/o/g as a tuple of RDFLib nodes and a GraphName,
61
+
62
+ Returns:
63
+ Quad: quad as tuple.
64
+
65
+ """
66
+
67
+ __slots__ = ()
68
+
69
+ def __new__(cls, s: Node, p: Node, o: Node, g: GraphName) -> Self:
70
+ return tuple.__new__(cls, (s, p, o, g))
71
+
72
+ @property
73
+ def s(self) -> Node:
74
+ return self[0]
75
+
76
+ @property
77
+ def p(self) -> Node:
78
+ return self[1]
79
+
80
+ @property
81
+ def o(self) -> Node:
82
+ return self[2]
83
+
84
+ @property
85
+ def g(self) -> GraphName:
86
+ return self[3]
87
+
88
+
89
+ Statement = Triple | Quad
90
+
91
+
92
+ class Prefix(tuple[str, rdflib.URIRef]):
93
+ """
94
+ Describe RDF Prefix(i.e, namespace declaration).
95
+
96
+ Args:
97
+ tuple (str, rdflib.URIRef): expects prefix as a string,
98
+ and full namespace URI as Rdflib.URIRef.
99
+
100
+ Returns:
101
+ Prefix: prefix as tuple(prefix, iri).
102
+
103
+ """
104
+
105
+ __slots__ = ()
106
+
107
+ def __new__(cls, prefix: str, iri: rdflib.URIRef) -> Self:
108
+ return tuple.__new__(cls, (prefix, iri))
109
+
110
+ @property
111
+ def prefix(self) -> str:
112
+ return self[0]
113
+
114
+ @property
115
+ def iri(self) -> rdflib.URIRef:
116
+ return self[1]
117
+
118
+
119
+ class RDFLibAdapter(Adapter):
120
+ """
121
+ RDFLib adapter class, is extended by triples and quads implementations.
122
+
123
+ Args:
124
+ Adapter (): abstract adapter class
125
+
126
+ """
127
+
128
+ @override
129
+ def iri(self, iri: str) -> rdflib.URIRef:
130
+ return rdflib.URIRef(iri)
131
+
132
+ @override
133
+ def bnode(self, bnode: str) -> rdflib.BNode:
134
+ return rdflib.BNode(bnode)
135
+
136
+ @override
137
+ def default_graph(self) -> rdflib.URIRef:
138
+ return DATASET_DEFAULT_GRAPH_ID
139
+
140
+ @override
141
+ def literal(
142
+ self,
143
+ lex: str,
144
+ language: str | None = None,
145
+ datatype: str | None = None,
146
+ ) -> rdflib.Literal:
147
+ return rdflib.Literal(lex, lang=language, datatype=datatype)
148
+
149
+ @override
150
+ def namespace_declaration(self, name: str, iri: str) -> Prefix:
151
+ return Prefix(name, self.iri(iri))
152
+
153
+
154
+ def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
155
+ """
156
+ Raise error if functionality is missing in adapter.
157
+
158
+ Args:
159
+ feature (str): function which is not implemented
160
+ stream_types (StreamTypes): what combination of physical/logical types
161
+ triggered the error
162
+
163
+ Raises:
164
+ NotImplementedError: raises error with message with missing functionality
165
+ and types encountered
166
+
167
+ Returns:
168
+ Never: only raises errors
169
+
170
+ """
171
+ physical_type_name = jelly.PhysicalStreamType.Name(stream_types.physical_type)
172
+ logical_type_name = jelly.LogicalStreamType.Name(stream_types.logical_type)
173
+ msg = (
174
+ f"adapter with {physical_type_name} and {logical_type_name} "
175
+ f"does not implement {feature}"
176
+ )
177
+ raise NotImplementedError(msg)
178
+
179
+
180
+ class RDFLibTriplesAdapter(RDFLibAdapter):
181
+ """
182
+ Triples adapter RDFLib implementation.
183
+
184
+ Notes: returns triple/namespace declaration as soon as receives them.
185
+ """
186
+
187
+ def __init__(
188
+ self,
189
+ options: ParserOptions,
190
+ ) -> None:
191
+ super().__init__(options=options)
192
+
193
+ @override
194
+ def triple(self, terms: Iterable[Any]) -> Triple:
195
+ return Triple(*terms)
196
+
197
+
198
+ class RDFLibQuadsBaseAdapter(RDFLibAdapter):
199
+ def __init__(self, options: ParserOptions) -> None:
200
+ super().__init__(options=options)
201
+
202
+
203
+ class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
204
+ """
205
+ Extended RDFLib adapter for the QUADS physical type.
206
+
207
+ Args:
208
+ RDFLibQuadsBaseAdapter (RDFLibAdapter): base quads adapter
209
+ (shared with graphs physical type)
210
+
211
+ """
212
+
213
+ @override
214
+ def quad(self, terms: Iterable[Any]) -> Quad:
215
+ return Quad(*terms)
216
+
217
+
218
+ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
219
+ """
220
+ Extension of RDFLibQuadsBaseAdapter for the GRAPHS physical type.
221
+
222
+ Notes: introduces graph start/end, checks if graph exists.
223
+
224
+ Args:
225
+ RDFLibQuadsBaseAdapter (RDFLibAdapter): base adapter for quads management.
226
+
227
+ Raises:
228
+ JellyConformanceError: if no graph_start was encountered
229
+
230
+ """
231
+
232
+ _graph_id: str | None
233
+
234
+ def __init__(
235
+ self,
236
+ options: ParserOptions,
237
+ ) -> None:
238
+ super().__init__(options=options)
239
+ self._graph_id = None
240
+
241
+ @property
242
+ def graph(self) -> None:
243
+ if self._graph_id is None:
244
+ msg = "new graph was not started"
245
+ raise JellyConformanceError(msg)
246
+
247
+ @override
248
+ def graph_start(self, graph_id: str) -> None:
249
+ self._graph_id = graph_id
250
+
251
+ @override
252
+ def triple(self, terms: Iterable[Any]) -> Quad:
253
+ return Quad(*chain(terms, [self._graph_id]))
254
+
255
+ @override
256
+ def graph_end(self) -> None:
257
+ self._graph_id = None
258
+
259
+
260
+ def parse_triples_stream(
261
+ frames: Iterable[jelly.RdfStreamFrame],
262
+ options: ParserOptions,
263
+ frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
264
+ ) -> Generator[Iterable[Triple | Prefix]]:
265
+ """
266
+ Parse flat triple stream.
267
+
268
+ Args:
269
+ frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
270
+ options (ParserOptions): stream options
271
+ frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
272
+ used for extracting frame metadata
273
+
274
+ Yields:
275
+ Generator[Iterable[Triple | Prefix]]:
276
+ Generator of iterables of Triple or Prefix objects,
277
+ one iterable per frame.
278
+
279
+ """
280
+ adapter = RDFLibTriplesAdapter(options)
281
+ decoder = Decoder(adapter=adapter)
282
+ for frame in frames:
283
+ if frame_metadata is not None:
284
+ frame_metadata.set(
285
+ frame.metadata
286
+ ) if frame.metadata else frame_metadata.set({})
287
+ yield decoder.iter_rows(frame)
288
+ return
289
+
290
+
291
+ def parse_quads_stream(
292
+ frames: Iterable[jelly.RdfStreamFrame],
293
+ options: ParserOptions,
294
+ frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
295
+ ) -> Generator[Iterable[Quad | Prefix]]:
296
+ """
297
+ Parse flat quads stream.
298
+
299
+ Args:
300
+ frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
301
+ options (ParserOptions): stream options
302
+ frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
303
+ used for extracting frame metadata
304
+
305
+ Yields:
306
+ Generator[Iterable[Quad | Prefix]]:
307
+ Generator of iterables of Quad or Prefix objects,
308
+ one iterable per frame.
309
+
310
+ """
311
+ adapter_class: type[RDFLibQuadsBaseAdapter]
312
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
313
+ adapter_class = RDFLibQuadsAdapter
314
+ else:
315
+ adapter_class = RDFLibGraphsAdapter
316
+ adapter = adapter_class(options=options)
317
+ decoder = Decoder(adapter=adapter)
318
+ for frame in frames:
319
+ if frame_metadata is not None:
320
+ frame_metadata.set(
321
+ frame.metadata
322
+ ) if frame.metadata else frame_metadata.set({})
323
+ yield decoder.iter_rows(frame)
324
+ return
325
+
326
+
327
+ def parse_jelly_grouped(
328
+ inp: IO[bytes],
329
+ graph_factory: Callable[[], Graph] = lambda: Graph(),
330
+ dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
331
+ *,
332
+ logical_type_strict: bool = False,
333
+ frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
334
+ ) -> Generator[Graph] | Generator[Dataset]:
335
+ """
336
+ Take jelly file and return generators based on the detected physical type.
337
+
338
+ Yields one graph/dataset per frame.
339
+
340
+ Args:
341
+ inp (IO[bytes]): input jelly buffered binary stream
342
+ graph_factory (Callable): lambda to construct a Graph.
343
+ By default creates an empty in-memory Graph,
344
+ but you can pass something else here.
345
+ dataset_factory (Callable): lambda to construct a Dataset.
346
+ By default creates an empty in-memory Dataset,
347
+ but you can pass something else here.
348
+ logical_type_strict (bool): If True, validate the *logical* type in
349
+ stream options and require a grouped logical type. Otherwise, only the
350
+ physical type is used to route parsing.
351
+ frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
352
+ used for extracting frame metadata
353
+
354
+
355
+
356
+ Raises:
357
+ NotImplementedError: is raised if a physical type is not implemented
358
+
359
+ Yields:
360
+ Generator[Graph] | Generator[Dataset]:
361
+ returns generators for graphs/datasets based on the type of input
362
+
363
+ """
364
+ options, frames = get_options_and_frames(inp)
365
+
366
+ st = getattr(options, "stream_types", None)
367
+ if logical_type_strict and (
368
+ st is None
369
+ or st.logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
370
+ or st.flat
371
+ ):
372
+ lt_name = (
373
+ "UNSPECIFIED"
374
+ if st is None
375
+ else jelly.LogicalStreamType.Name(st.logical_type)
376
+ )
377
+
378
+ msg = (
379
+ "strict logical type check requires options.stream_types"
380
+ if st is None
381
+ else f"expected GROUPED logical type, got {lt_name}"
382
+ )
383
+ raise JellyConformanceError(msg)
384
+
385
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
386
+ for graph in parse_triples_stream(
387
+ frames=frames,
388
+ options=options,
389
+ frame_metadata=frame_metadata,
390
+ ):
391
+ sink = graph_factory()
392
+ for graph_item in graph:
393
+ if isinstance(graph_item, Prefix):
394
+ sink.bind(graph_item.prefix, graph_item.iri)
395
+ else:
396
+ sink.add(graph_item)
397
+ yield sink
398
+ return
399
+ elif options.stream_types.physical_type in (
400
+ jelly.PHYSICAL_STREAM_TYPE_QUADS,
401
+ jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
402
+ ):
403
+ for dataset in parse_quads_stream(
404
+ frames=frames, options=options, frame_metadata=frame_metadata
405
+ ):
406
+ sink = dataset_factory()
407
+ for item in dataset:
408
+ if isinstance(item, Prefix):
409
+ sink.bind(item.prefix, item.iri)
410
+ else:
411
+ s, p, o, graph_name = item
412
+ context = sink.get_context(graph_name)
413
+ sink.add((s, p, o, context))
414
+ yield sink
415
+ return
416
+
417
+ physical_type_name = jelly.PhysicalStreamType.Name(
418
+ options.stream_types.physical_type
419
+ )
420
+ msg = f"the stream type {physical_type_name} is not supported "
421
+ raise NotImplementedError(msg)
422
+
423
+
424
+ def parse_jelly_to_graph(
425
+ inp: IO[bytes],
426
+ graph_factory: Callable[[], Graph] = lambda: Graph(),
427
+ dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
428
+ ) -> Graph | Dataset:
429
+ """
430
+ Add statements from Generator to provided Graph/Dataset.
431
+
432
+ Args:
433
+ inp (IO[bytes]): input jelly stream.
434
+ graph_factory (Callable[[], Graph]): factory to create Graph.
435
+ By default creates an empty in-memory Graph,
436
+ but you can pass something else here.
437
+ dataset_factory (Callable[[], Dataset]): factory to create Dataset.
438
+ By default creates an empty in-memory Dataset,
439
+ but you can pass something else here.
440
+
441
+ Returns:
442
+ Dataset | Graph: Dataset or Graph with statements.
443
+
444
+ """
445
+ options, frames = get_options_and_frames(inp)
446
+
447
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
448
+ sink = graph_factory()
449
+ if options.stream_types.physical_type in (
450
+ jelly.PHYSICAL_STREAM_TYPE_QUADS,
451
+ jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
452
+ ):
453
+ quad_sink = dataset_factory()
454
+ sink = quad_sink
455
+
456
+ for item in parse_jelly_flat(inp=inp, frames=frames, options=options):
457
+ if isinstance(item, Prefix):
458
+ sink.bind(item.prefix, item.iri)
459
+ if isinstance(item, Triple):
460
+ sink.add(item)
461
+ if isinstance(item, Quad):
462
+ s, p, o, graph_name = item
463
+ context = quad_sink.get_context(graph_name)
464
+ quad_sink.add((s, p, o, context))
465
+ return sink
466
+
467
+
468
+ def parse_jelly_flat(
469
+ inp: IO[bytes],
470
+ frames: Iterable[jelly.RdfStreamFrame] | None = None,
471
+ options: ParserOptions | None = None,
472
+ *,
473
+ logical_type_strict: bool = False,
474
+ ) -> Generator[Statement | Prefix]:
475
+ """
476
+ Parse jelly file with FLAT logical type into a Generator of stream events.
477
+
478
+ Args:
479
+ inp (IO[bytes]): input jelly buffered binary stream.
480
+ frames (Iterable[jelly.RdfStreamFrame | None):
481
+ jelly frames if read before.
482
+ options (ParserOptions | None): stream options
483
+ if read before.
484
+ logical_type_strict (bool): If True, validate the *logical* type in
485
+ stream options and require FLAT_(TRIPLES|QUADS). Otherwise, only the
486
+ physical type is used to route parsing.
487
+
488
+ Raises:
489
+ NotImplementedError: if physical type is not supported
490
+
491
+ Yields:
492
+ Generator[Statement | Prefix]: Generator of stream events
493
+
494
+ """
495
+ if frames is None or options is None:
496
+ options, frames = get_options_and_frames(inp)
497
+
498
+ st = getattr(options, "stream_types", None)
499
+ if logical_type_strict and (st is None or not st.flat):
500
+ lt_name = (
501
+ "UNSPECIFIED"
502
+ if st is None
503
+ else jelly.LogicalStreamType.Name(st.logical_type)
504
+ )
505
+ msg = (
506
+ "strict logical type check requires options.stream_types"
507
+ if st is None
508
+ else f"expected FLAT logical type (TRIPLES/QUADS), got {lt_name}"
509
+ )
510
+ raise JellyConformanceError(msg)
511
+
512
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
513
+ for triples in parse_triples_stream(frames=frames, options=options):
514
+ yield from triples
515
+ return
516
+ if options.stream_types.physical_type in (
517
+ jelly.PHYSICAL_STREAM_TYPE_QUADS,
518
+ jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
519
+ ):
520
+ for quads in parse_quads_stream(frames=frames, options=options):
521
+ yield from quads
522
+ return
523
+ physical_type_name = jelly.PhysicalStreamType.Name(
524
+ options.stream_types.physical_type
525
+ )
526
+ msg = f"the stream type {physical_type_name} is not supported "
527
+ raise NotImplementedError(msg)
528
+
529
+
530
+ class RDFLibJellyParser(RDFLibParser):
531
+ def parse(
532
+ self,
533
+ source: InputSource,
534
+ sink: Graph,
535
+ ) -> None:
536
+ """
537
+ Parse jelly file into provided RDFLib Graph.
538
+
539
+ Args:
540
+ source (InputSource): jelly file as buffered binary stream InputSource obj
541
+ sink (Graph): RDFLib Graph
542
+
543
+ Raises:
544
+ TypeError: raises error if invalid input
545
+
546
+ """
547
+ byte_stream = source.getByteStream()
548
+ if byte_stream is None:
549
+ msg = "expected source to be a stream of bytes"
550
+ raise TypeError(msg)
551
+
552
+ inp = cast(IO[bytes], byte_stream)
553
+ if inp is None:
554
+ msg = "expected source to be a stream of bytes"
555
+ raise TypeError(msg)
556
+ parse_jelly_to_graph(
557
+ inp,
558
+ graph_factory=lambda: Graph(store=sink.store, identifier=sink.identifier),
559
+ dataset_factory=lambda: Dataset(store=sink.store),
560
+ )