pyjelly 0.7.1__cp311-cp311-macosx_11_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so +0 -0
- pyjelly/__init__.py +0 -0
- pyjelly/_proto/grpc.proto +33 -0
- pyjelly/_proto/patch.proto +165 -0
- pyjelly/_proto/rdf.proto +384 -0
- pyjelly/errors.py +10 -0
- pyjelly/integrations/__init__.py +0 -0
- pyjelly/integrations/generic/__init__.py +0 -0
- pyjelly/integrations/generic/generic_sink.py +202 -0
- pyjelly/integrations/generic/parse.py +412 -0
- pyjelly/integrations/generic/serialize.cpython-311-darwin.so +0 -0
- pyjelly/integrations/generic/serialize.py +402 -0
- pyjelly/integrations/rdflib/__init__.py +24 -0
- pyjelly/integrations/rdflib/parse.py +560 -0
- pyjelly/integrations/rdflib/serialize.py +408 -0
- pyjelly/jelly/__init__.py +5 -0
- pyjelly/jelly/rdf_pb2.py +70 -0
- pyjelly/jelly/rdf_pb2.pyi +231 -0
- pyjelly/options.py +141 -0
- pyjelly/parse/__init__.py +0 -0
- pyjelly/parse/decode.cpython-311-darwin.so +0 -0
- pyjelly/parse/decode.py +447 -0
- pyjelly/parse/ioutils.cpython-311-darwin.so +0 -0
- pyjelly/parse/ioutils.py +115 -0
- pyjelly/parse/lookup.cpython-311-darwin.so +0 -0
- pyjelly/parse/lookup.py +70 -0
- pyjelly/serialize/__init__.py +0 -0
- pyjelly/serialize/encode.cpython-311-darwin.so +0 -0
- pyjelly/serialize/encode.py +397 -0
- pyjelly/serialize/flows.py +196 -0
- pyjelly/serialize/ioutils.cpython-311-darwin.so +0 -0
- pyjelly/serialize/ioutils.py +13 -0
- pyjelly/serialize/lookup.cpython-311-darwin.so +0 -0
- pyjelly/serialize/lookup.py +137 -0
- pyjelly/serialize/streams.cpython-311-darwin.so +0 -0
- pyjelly/serialize/streams.py +281 -0
- pyjelly-0.7.1.dist-info/METADATA +114 -0
- pyjelly-0.7.1.dist-info/RECORD +41 -0
- pyjelly-0.7.1.dist-info/WHEEL +6 -0
- pyjelly-0.7.1.dist-info/entry_points.txt +7 -0
- pyjelly-0.7.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Generator, Iterable, MutableMapping
|
|
4
|
+
from contextvars import ContextVar
|
|
5
|
+
from itertools import chain
|
|
6
|
+
from typing import IO, Any, TypeAlias, cast
|
|
7
|
+
from typing_extensions import Never, Self, override
|
|
8
|
+
|
|
9
|
+
import rdflib
|
|
10
|
+
from rdflib import BNode, Node, URIRef
|
|
11
|
+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
|
|
12
|
+
from rdflib.parser import InputSource
|
|
13
|
+
from rdflib.parser import Parser as RDFLibParser
|
|
14
|
+
|
|
15
|
+
from pyjelly import jelly
|
|
16
|
+
from pyjelly.errors import JellyConformanceError
|
|
17
|
+
from pyjelly.options import StreamTypes
|
|
18
|
+
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
19
|
+
from pyjelly.parse.ioutils import get_options_and_frames
|
|
20
|
+
|
|
21
|
+
GraphName: TypeAlias = URIRef | BNode | str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Triple(tuple[Node, Node, Node]):
|
|
25
|
+
"""
|
|
26
|
+
Describe RDFLib triple.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
tuple (Node, Node, Node): s/p/o tuple of RDFLib Nodes.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Triple: triple as tuple.
|
|
33
|
+
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
__slots__ = ()
|
|
37
|
+
|
|
38
|
+
def __new__(cls, s: Node, p: Node, o: Node) -> Self:
|
|
39
|
+
return tuple.__new__(cls, (s, p, o))
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def s(self) -> Node:
|
|
43
|
+
return self[0]
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def p(self) -> Node:
|
|
47
|
+
return self[1]
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def o(self) -> Node:
|
|
51
|
+
return self[2]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Quad(tuple[Node, Node, Node, GraphName]):
|
|
55
|
+
"""
|
|
56
|
+
Describe RDFLib quad.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
tuple (Node, Node, Node, GraphName):
|
|
60
|
+
s/p/o/g as a tuple of RDFLib nodes and a GraphName,
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Quad: quad as tuple.
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
__slots__ = ()
|
|
68
|
+
|
|
69
|
+
def __new__(cls, s: Node, p: Node, o: Node, g: GraphName) -> Self:
|
|
70
|
+
return tuple.__new__(cls, (s, p, o, g))
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def s(self) -> Node:
|
|
74
|
+
return self[0]
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def p(self) -> Node:
|
|
78
|
+
return self[1]
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def o(self) -> Node:
|
|
82
|
+
return self[2]
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def g(self) -> GraphName:
|
|
86
|
+
return self[3]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
Statement = Triple | Quad
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Prefix(tuple[str, rdflib.URIRef]):
|
|
93
|
+
"""
|
|
94
|
+
Describe RDF Prefix(i.e, namespace declaration).
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
tuple (str, rdflib.URIRef): expects prefix as a string,
|
|
98
|
+
and full namespace URI as Rdflib.URIRef.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Prefix: prefix as tuple(prefix, iri).
|
|
102
|
+
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
__slots__ = ()
|
|
106
|
+
|
|
107
|
+
def __new__(cls, prefix: str, iri: rdflib.URIRef) -> Self:
|
|
108
|
+
return tuple.__new__(cls, (prefix, iri))
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def prefix(self) -> str:
|
|
112
|
+
return self[0]
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def iri(self) -> rdflib.URIRef:
|
|
116
|
+
return self[1]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class RDFLibAdapter(Adapter):
|
|
120
|
+
"""
|
|
121
|
+
RDFLib adapter class, is extended by triples and quads implementations.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
Adapter (): abstract adapter class
|
|
125
|
+
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
@override
|
|
129
|
+
def iri(self, iri: str) -> rdflib.URIRef:
|
|
130
|
+
return rdflib.URIRef(iri)
|
|
131
|
+
|
|
132
|
+
@override
|
|
133
|
+
def bnode(self, bnode: str) -> rdflib.BNode:
|
|
134
|
+
return rdflib.BNode(bnode)
|
|
135
|
+
|
|
136
|
+
@override
|
|
137
|
+
def default_graph(self) -> rdflib.URIRef:
|
|
138
|
+
return DATASET_DEFAULT_GRAPH_ID
|
|
139
|
+
|
|
140
|
+
@override
|
|
141
|
+
def literal(
|
|
142
|
+
self,
|
|
143
|
+
lex: str,
|
|
144
|
+
language: str | None = None,
|
|
145
|
+
datatype: str | None = None,
|
|
146
|
+
) -> rdflib.Literal:
|
|
147
|
+
return rdflib.Literal(lex, lang=language, datatype=datatype)
|
|
148
|
+
|
|
149
|
+
@override
|
|
150
|
+
def namespace_declaration(self, name: str, iri: str) -> Prefix:
|
|
151
|
+
return Prefix(name, self.iri(iri))
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
155
|
+
"""
|
|
156
|
+
Raise error if functionality is missing in adapter.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
feature (str): function which is not implemented
|
|
160
|
+
stream_types (StreamTypes): what combination of physical/logical types
|
|
161
|
+
triggered the error
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
NotImplementedError: raises error with message with missing functionality
|
|
165
|
+
and types encountered
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Never: only raises errors
|
|
169
|
+
|
|
170
|
+
"""
|
|
171
|
+
physical_type_name = jelly.PhysicalStreamType.Name(stream_types.physical_type)
|
|
172
|
+
logical_type_name = jelly.LogicalStreamType.Name(stream_types.logical_type)
|
|
173
|
+
msg = (
|
|
174
|
+
f"adapter with {physical_type_name} and {logical_type_name} "
|
|
175
|
+
f"does not implement {feature}"
|
|
176
|
+
)
|
|
177
|
+
raise NotImplementedError(msg)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class RDFLibTriplesAdapter(RDFLibAdapter):
|
|
181
|
+
"""
|
|
182
|
+
Triples adapter RDFLib implementation.
|
|
183
|
+
|
|
184
|
+
Notes: returns triple/namespace declaration as soon as receives them.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def __init__(
|
|
188
|
+
self,
|
|
189
|
+
options: ParserOptions,
|
|
190
|
+
) -> None:
|
|
191
|
+
super().__init__(options=options)
|
|
192
|
+
|
|
193
|
+
@override
|
|
194
|
+
def triple(self, terms: Iterable[Any]) -> Triple:
|
|
195
|
+
return Triple(*terms)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class RDFLibQuadsBaseAdapter(RDFLibAdapter):
|
|
199
|
+
def __init__(self, options: ParserOptions) -> None:
|
|
200
|
+
super().__init__(options=options)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
|
|
204
|
+
"""
|
|
205
|
+
Extended RDFLib adapter for the QUADS physical type.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base quads adapter
|
|
209
|
+
(shared with graphs physical type)
|
|
210
|
+
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
@override
|
|
214
|
+
def quad(self, terms: Iterable[Any]) -> Quad:
|
|
215
|
+
return Quad(*terms)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
219
|
+
"""
|
|
220
|
+
Extension of RDFLibQuadsBaseAdapter for the GRAPHS physical type.
|
|
221
|
+
|
|
222
|
+
Notes: introduces graph start/end, checks if graph exists.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base adapter for quads management.
|
|
226
|
+
|
|
227
|
+
Raises:
|
|
228
|
+
JellyConformanceError: if no graph_start was encountered
|
|
229
|
+
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
_graph_id: str | None
|
|
233
|
+
|
|
234
|
+
def __init__(
|
|
235
|
+
self,
|
|
236
|
+
options: ParserOptions,
|
|
237
|
+
) -> None:
|
|
238
|
+
super().__init__(options=options)
|
|
239
|
+
self._graph_id = None
|
|
240
|
+
|
|
241
|
+
@property
|
|
242
|
+
def graph(self) -> None:
|
|
243
|
+
if self._graph_id is None:
|
|
244
|
+
msg = "new graph was not started"
|
|
245
|
+
raise JellyConformanceError(msg)
|
|
246
|
+
|
|
247
|
+
@override
|
|
248
|
+
def graph_start(self, graph_id: str) -> None:
|
|
249
|
+
self._graph_id = graph_id
|
|
250
|
+
|
|
251
|
+
@override
|
|
252
|
+
def triple(self, terms: Iterable[Any]) -> Quad:
|
|
253
|
+
return Quad(*chain(terms, [self._graph_id]))
|
|
254
|
+
|
|
255
|
+
@override
|
|
256
|
+
def graph_end(self) -> None:
|
|
257
|
+
self._graph_id = None
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def parse_triples_stream(
|
|
261
|
+
frames: Iterable[jelly.RdfStreamFrame],
|
|
262
|
+
options: ParserOptions,
|
|
263
|
+
frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
|
|
264
|
+
) -> Generator[Iterable[Triple | Prefix]]:
|
|
265
|
+
"""
|
|
266
|
+
Parse flat triple stream.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
270
|
+
options (ParserOptions): stream options
|
|
271
|
+
frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
|
|
272
|
+
used for extracting frame metadata
|
|
273
|
+
|
|
274
|
+
Yields:
|
|
275
|
+
Generator[Iterable[Triple | Prefix]]:
|
|
276
|
+
Generator of iterables of Triple or Prefix objects,
|
|
277
|
+
one iterable per frame.
|
|
278
|
+
|
|
279
|
+
"""
|
|
280
|
+
adapter = RDFLibTriplesAdapter(options)
|
|
281
|
+
decoder = Decoder(adapter=adapter)
|
|
282
|
+
for frame in frames:
|
|
283
|
+
if frame_metadata is not None:
|
|
284
|
+
frame_metadata.set(
|
|
285
|
+
frame.metadata
|
|
286
|
+
) if frame.metadata else frame_metadata.set({})
|
|
287
|
+
yield decoder.iter_rows(frame)
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def parse_quads_stream(
|
|
292
|
+
frames: Iterable[jelly.RdfStreamFrame],
|
|
293
|
+
options: ParserOptions,
|
|
294
|
+
frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
|
|
295
|
+
) -> Generator[Iterable[Quad | Prefix]]:
|
|
296
|
+
"""
|
|
297
|
+
Parse flat quads stream.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
301
|
+
options (ParserOptions): stream options
|
|
302
|
+
frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
|
|
303
|
+
used for extracting frame metadata
|
|
304
|
+
|
|
305
|
+
Yields:
|
|
306
|
+
Generator[Iterable[Quad | Prefix]]:
|
|
307
|
+
Generator of iterables of Quad or Prefix objects,
|
|
308
|
+
one iterable per frame.
|
|
309
|
+
|
|
310
|
+
"""
|
|
311
|
+
adapter_class: type[RDFLibQuadsBaseAdapter]
|
|
312
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
|
|
313
|
+
adapter_class = RDFLibQuadsAdapter
|
|
314
|
+
else:
|
|
315
|
+
adapter_class = RDFLibGraphsAdapter
|
|
316
|
+
adapter = adapter_class(options=options)
|
|
317
|
+
decoder = Decoder(adapter=adapter)
|
|
318
|
+
for frame in frames:
|
|
319
|
+
if frame_metadata is not None:
|
|
320
|
+
frame_metadata.set(
|
|
321
|
+
frame.metadata
|
|
322
|
+
) if frame.metadata else frame_metadata.set({})
|
|
323
|
+
yield decoder.iter_rows(frame)
|
|
324
|
+
return
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def parse_jelly_grouped(
|
|
328
|
+
inp: IO[bytes],
|
|
329
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
330
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
331
|
+
*,
|
|
332
|
+
logical_type_strict: bool = False,
|
|
333
|
+
frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
|
|
334
|
+
) -> Generator[Graph] | Generator[Dataset]:
|
|
335
|
+
"""
|
|
336
|
+
Take jelly file and return generators based on the detected physical type.
|
|
337
|
+
|
|
338
|
+
Yields one graph/dataset per frame.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
inp (IO[bytes]): input jelly buffered binary stream
|
|
342
|
+
graph_factory (Callable): lambda to construct a Graph.
|
|
343
|
+
By default creates an empty in-memory Graph,
|
|
344
|
+
but you can pass something else here.
|
|
345
|
+
dataset_factory (Callable): lambda to construct a Dataset.
|
|
346
|
+
By default creates an empty in-memory Dataset,
|
|
347
|
+
but you can pass something else here.
|
|
348
|
+
logical_type_strict (bool): If True, validate the *logical* type in
|
|
349
|
+
stream options and require a grouped logical type. Otherwise, only the
|
|
350
|
+
physical type is used to route parsing.
|
|
351
|
+
frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
|
|
352
|
+
used for extracting frame metadata
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
Raises:
|
|
357
|
+
NotImplementedError: is raised if a physical type is not implemented
|
|
358
|
+
|
|
359
|
+
Yields:
|
|
360
|
+
Generator[Graph] | Generator[Dataset]:
|
|
361
|
+
returns generators for graphs/datasets based on the type of input
|
|
362
|
+
|
|
363
|
+
"""
|
|
364
|
+
options, frames = get_options_and_frames(inp)
|
|
365
|
+
|
|
366
|
+
st = getattr(options, "stream_types", None)
|
|
367
|
+
if logical_type_strict and (
|
|
368
|
+
st is None
|
|
369
|
+
or st.logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
|
|
370
|
+
or st.flat
|
|
371
|
+
):
|
|
372
|
+
lt_name = (
|
|
373
|
+
"UNSPECIFIED"
|
|
374
|
+
if st is None
|
|
375
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
msg = (
|
|
379
|
+
"strict logical type check requires options.stream_types"
|
|
380
|
+
if st is None
|
|
381
|
+
else f"expected GROUPED logical type, got {lt_name}"
|
|
382
|
+
)
|
|
383
|
+
raise JellyConformanceError(msg)
|
|
384
|
+
|
|
385
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
386
|
+
for graph in parse_triples_stream(
|
|
387
|
+
frames=frames,
|
|
388
|
+
options=options,
|
|
389
|
+
frame_metadata=frame_metadata,
|
|
390
|
+
):
|
|
391
|
+
sink = graph_factory()
|
|
392
|
+
for graph_item in graph:
|
|
393
|
+
if isinstance(graph_item, Prefix):
|
|
394
|
+
sink.bind(graph_item.prefix, graph_item.iri)
|
|
395
|
+
else:
|
|
396
|
+
sink.add(graph_item)
|
|
397
|
+
yield sink
|
|
398
|
+
return
|
|
399
|
+
elif options.stream_types.physical_type in (
|
|
400
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
401
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
402
|
+
):
|
|
403
|
+
for dataset in parse_quads_stream(
|
|
404
|
+
frames=frames, options=options, frame_metadata=frame_metadata
|
|
405
|
+
):
|
|
406
|
+
sink = dataset_factory()
|
|
407
|
+
for item in dataset:
|
|
408
|
+
if isinstance(item, Prefix):
|
|
409
|
+
sink.bind(item.prefix, item.iri)
|
|
410
|
+
else:
|
|
411
|
+
s, p, o, graph_name = item
|
|
412
|
+
context = sink.get_context(graph_name)
|
|
413
|
+
sink.add((s, p, o, context))
|
|
414
|
+
yield sink
|
|
415
|
+
return
|
|
416
|
+
|
|
417
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
418
|
+
options.stream_types.physical_type
|
|
419
|
+
)
|
|
420
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
421
|
+
raise NotImplementedError(msg)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def parse_jelly_to_graph(
|
|
425
|
+
inp: IO[bytes],
|
|
426
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
427
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
428
|
+
) -> Graph | Dataset:
|
|
429
|
+
"""
|
|
430
|
+
Add statements from Generator to provided Graph/Dataset.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
inp (IO[bytes]): input jelly stream.
|
|
434
|
+
graph_factory (Callable[[], Graph]): factory to create Graph.
|
|
435
|
+
By default creates an empty in-memory Graph,
|
|
436
|
+
but you can pass something else here.
|
|
437
|
+
dataset_factory (Callable[[], Dataset]): factory to create Dataset.
|
|
438
|
+
By default creates an empty in-memory Dataset,
|
|
439
|
+
but you can pass something else here.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Dataset | Graph: Dataset or Graph with statements.
|
|
443
|
+
|
|
444
|
+
"""
|
|
445
|
+
options, frames = get_options_and_frames(inp)
|
|
446
|
+
|
|
447
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
448
|
+
sink = graph_factory()
|
|
449
|
+
if options.stream_types.physical_type in (
|
|
450
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
451
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
452
|
+
):
|
|
453
|
+
quad_sink = dataset_factory()
|
|
454
|
+
sink = quad_sink
|
|
455
|
+
|
|
456
|
+
for item in parse_jelly_flat(inp=inp, frames=frames, options=options):
|
|
457
|
+
if isinstance(item, Prefix):
|
|
458
|
+
sink.bind(item.prefix, item.iri)
|
|
459
|
+
if isinstance(item, Triple):
|
|
460
|
+
sink.add(item)
|
|
461
|
+
if isinstance(item, Quad):
|
|
462
|
+
s, p, o, graph_name = item
|
|
463
|
+
context = quad_sink.get_context(graph_name)
|
|
464
|
+
quad_sink.add((s, p, o, context))
|
|
465
|
+
return sink
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def parse_jelly_flat(
|
|
469
|
+
inp: IO[bytes],
|
|
470
|
+
frames: Iterable[jelly.RdfStreamFrame] | None = None,
|
|
471
|
+
options: ParserOptions | None = None,
|
|
472
|
+
*,
|
|
473
|
+
logical_type_strict: bool = False,
|
|
474
|
+
) -> Generator[Statement | Prefix]:
|
|
475
|
+
"""
|
|
476
|
+
Parse jelly file with FLAT logical type into a Generator of stream events.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
inp (IO[bytes]): input jelly buffered binary stream.
|
|
480
|
+
frames (Iterable[jelly.RdfStreamFrame | None):
|
|
481
|
+
jelly frames if read before.
|
|
482
|
+
options (ParserOptions | None): stream options
|
|
483
|
+
if read before.
|
|
484
|
+
logical_type_strict (bool): If True, validate the *logical* type in
|
|
485
|
+
stream options and require FLAT_(TRIPLES|QUADS). Otherwise, only the
|
|
486
|
+
physical type is used to route parsing.
|
|
487
|
+
|
|
488
|
+
Raises:
|
|
489
|
+
NotImplementedError: if physical type is not supported
|
|
490
|
+
|
|
491
|
+
Yields:
|
|
492
|
+
Generator[Statement | Prefix]: Generator of stream events
|
|
493
|
+
|
|
494
|
+
"""
|
|
495
|
+
if frames is None or options is None:
|
|
496
|
+
options, frames = get_options_and_frames(inp)
|
|
497
|
+
|
|
498
|
+
st = getattr(options, "stream_types", None)
|
|
499
|
+
if logical_type_strict and (st is None or not st.flat):
|
|
500
|
+
lt_name = (
|
|
501
|
+
"UNSPECIFIED"
|
|
502
|
+
if st is None
|
|
503
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
504
|
+
)
|
|
505
|
+
msg = (
|
|
506
|
+
"strict logical type check requires options.stream_types"
|
|
507
|
+
if st is None
|
|
508
|
+
else f"expected FLAT logical type (TRIPLES/QUADS), got {lt_name}"
|
|
509
|
+
)
|
|
510
|
+
raise JellyConformanceError(msg)
|
|
511
|
+
|
|
512
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
513
|
+
for triples in parse_triples_stream(frames=frames, options=options):
|
|
514
|
+
yield from triples
|
|
515
|
+
return
|
|
516
|
+
if options.stream_types.physical_type in (
|
|
517
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
518
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
519
|
+
):
|
|
520
|
+
for quads in parse_quads_stream(frames=frames, options=options):
|
|
521
|
+
yield from quads
|
|
522
|
+
return
|
|
523
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
524
|
+
options.stream_types.physical_type
|
|
525
|
+
)
|
|
526
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
527
|
+
raise NotImplementedError(msg)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
class RDFLibJellyParser(RDFLibParser):
|
|
531
|
+
def parse(
|
|
532
|
+
self,
|
|
533
|
+
source: InputSource,
|
|
534
|
+
sink: Graph,
|
|
535
|
+
) -> None:
|
|
536
|
+
"""
|
|
537
|
+
Parse jelly file into provided RDFLib Graph.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
source (InputSource): jelly file as buffered binary stream InputSource obj
|
|
541
|
+
sink (Graph): RDFLib Graph
|
|
542
|
+
|
|
543
|
+
Raises:
|
|
544
|
+
TypeError: raises error if invalid input
|
|
545
|
+
|
|
546
|
+
"""
|
|
547
|
+
byte_stream = source.getByteStream()
|
|
548
|
+
if byte_stream is None:
|
|
549
|
+
msg = "expected source to be a stream of bytes"
|
|
550
|
+
raise TypeError(msg)
|
|
551
|
+
|
|
552
|
+
inp = cast(IO[bytes], byte_stream)
|
|
553
|
+
if inp is None:
|
|
554
|
+
msg = "expected source to be a stream of bytes"
|
|
555
|
+
raise TypeError(msg)
|
|
556
|
+
parse_jelly_to_graph(
|
|
557
|
+
inp,
|
|
558
|
+
graph_factory=lambda: Graph(store=sink.store, identifier=sink.identifier),
|
|
559
|
+
dataset_factory=lambda: Dataset(store=sink.store),
|
|
560
|
+
)
|