pyjelly 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjelly might be problematic. Click here for more details.
- pyjelly/integrations/generic/__init__.py +0 -0
- pyjelly/integrations/generic/generic_sink.py +163 -0
- pyjelly/integrations/generic/parse.py +339 -0
- pyjelly/integrations/generic/serialize.py +361 -0
- pyjelly/integrations/rdflib/parse.py +235 -156
- pyjelly/integrations/rdflib/serialize.py +189 -60
- pyjelly/jelly/rdf_pb2.py +3 -3
- pyjelly/jelly/rdf_pb2.pyi +2 -1
- pyjelly/options.py +9 -0
- pyjelly/parse/decode.py +32 -10
- pyjelly/parse/ioutils.py +10 -4
- pyjelly/serialize/encode.py +30 -3
- pyjelly/serialize/flows.py +24 -14
- pyjelly/serialize/streams.py +5 -2
- {pyjelly-0.3.0.dist-info → pyjelly-0.5.0.dist-info}/METADATA +10 -9
- pyjelly-0.5.0.dist-info/RECORD +32 -0
- pyjelly-0.3.0.dist-info/RECORD +0 -28
- {pyjelly-0.3.0.dist-info → pyjelly-0.5.0.dist-info}/WHEEL +0 -0
- {pyjelly-0.3.0.dist-info → pyjelly-0.5.0.dist-info}/entry_points.txt +0 -0
- {pyjelly-0.3.0.dist-info → pyjelly-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections.abc import Generator, Iterable
|
|
4
|
-
from
|
|
5
|
-
from
|
|
4
|
+
from itertools import chain
|
|
5
|
+
from typing import IO, Any, Callable, Union
|
|
6
|
+
from typing_extensions import Never, Self, override
|
|
6
7
|
|
|
7
8
|
import rdflib
|
|
9
|
+
from rdflib import Node
|
|
8
10
|
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
|
|
9
11
|
from rdflib.parser import InputSource
|
|
10
12
|
from rdflib.parser import Parser as RDFLibParser
|
|
@@ -12,16 +14,113 @@ from rdflib.parser import Parser as RDFLibParser
|
|
|
12
14
|
from pyjelly import jelly
|
|
13
15
|
from pyjelly.errors import JellyConformanceError
|
|
14
16
|
from pyjelly.options import StreamTypes
|
|
15
|
-
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
17
|
+
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
16
18
|
from pyjelly.parse.ioutils import get_options_and_frames
|
|
17
19
|
|
|
20
|
+
GraphName = Union[rdflib.URIRef, rdflib.BNode, str]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Triple(tuple[Node, Node, Node]):
|
|
24
|
+
"""
|
|
25
|
+
Describe RDFLib triple.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
tuple (Node, Node, Node): s/p/o tuple of RDFLib Nodes.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Triple: triple as tuple.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
__slots__ = ()
|
|
36
|
+
|
|
37
|
+
def __new__(cls, s: Node, p: Node, o: Node) -> Self:
|
|
38
|
+
return tuple.__new__(cls, (s, p, o))
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def s(self) -> Node:
|
|
42
|
+
return self[0]
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def p(self) -> Node:
|
|
46
|
+
return self[1]
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def o(self) -> Node:
|
|
50
|
+
return self[2]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Quad(tuple[Node, Node, Node, GraphName]):
|
|
54
|
+
"""
|
|
55
|
+
Describe RDFLib quad.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
tuple (Node, Node, Node, GraphName):
|
|
59
|
+
s/p/o/g as a tuple of RDFLib nodes and a GraphName,
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Quad: quad as tuple.
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
__slots__ = ()
|
|
67
|
+
|
|
68
|
+
def __new__(cls, s: Node, p: Node, o: Node, g: GraphName) -> Self:
|
|
69
|
+
return tuple.__new__(cls, (s, p, o, g))
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def s(self) -> Node:
|
|
73
|
+
return self[0]
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def p(self) -> Node:
|
|
77
|
+
return self[1]
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def o(self) -> Node:
|
|
81
|
+
return self[2]
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def g(self) -> GraphName:
|
|
85
|
+
return self[3]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
Statement = Union[Triple, Quad]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class Prefix(tuple[str, rdflib.URIRef]):
|
|
92
|
+
"""
|
|
93
|
+
Describe RDF Prefix(i.e, namespace declaration).
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
tuple (str, rdflib.URIRef): expects prefix as a string,
|
|
97
|
+
and full namespace URI as Rdflib.URIRef.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Prefix: prefix as tuple(prefix, iri).
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
__slots__ = ()
|
|
105
|
+
|
|
106
|
+
def __new__(cls, prefix: str, iri: rdflib.URIRef) -> Self:
|
|
107
|
+
return tuple.__new__(cls, (prefix, iri))
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def prefix(self) -> str:
|
|
111
|
+
return self[0]
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def iri(self) -> rdflib.URIRef:
|
|
115
|
+
return self[1]
|
|
116
|
+
|
|
18
117
|
|
|
19
118
|
class RDFLibAdapter(Adapter):
|
|
20
119
|
"""
|
|
21
120
|
RDFLib adapter class, is extended by triples and quads implementations.
|
|
22
121
|
|
|
23
122
|
Args:
|
|
24
|
-
Adapter (
|
|
123
|
+
Adapter (): abstract adapter class
|
|
25
124
|
|
|
26
125
|
"""
|
|
27
126
|
|
|
@@ -46,13 +145,15 @@ class RDFLibAdapter(Adapter):
|
|
|
46
145
|
) -> rdflib.Literal:
|
|
47
146
|
return rdflib.Literal(lex, lang=language, datatype=datatype)
|
|
48
147
|
|
|
148
|
+
@override
|
|
149
|
+
def namespace_declaration(self, name: str, iri: str) -> Prefix:
|
|
150
|
+
return Prefix(name, self.iri(iri))
|
|
151
|
+
|
|
49
152
|
|
|
50
153
|
def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
51
154
|
"""
|
|
52
155
|
Raise error if functionality is missing in adapter.
|
|
53
156
|
|
|
54
|
-
TODO: currently not used anywhere due to logical types being removed
|
|
55
|
-
|
|
56
157
|
Args:
|
|
57
158
|
feature (str): function which is not implemented
|
|
58
159
|
stream_types (StreamTypes): what combination of physical/logical types
|
|
@@ -79,93 +180,48 @@ class RDFLibTriplesAdapter(RDFLibAdapter):
|
|
|
79
180
|
"""
|
|
80
181
|
Triples adapter RDFLib implementation.
|
|
81
182
|
|
|
82
|
-
Notes:
|
|
83
|
-
triples and namespaces and can get flushed between frames.
|
|
183
|
+
Notes: returns triple/namespace declaration as soon as receives them.
|
|
84
184
|
"""
|
|
85
185
|
|
|
86
186
|
def __init__(
|
|
87
187
|
self,
|
|
88
188
|
options: ParserOptions,
|
|
89
|
-
graph_factory: Callable[[], Graph],
|
|
90
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
91
189
|
) -> None:
|
|
92
|
-
super().__init__(options=options
|
|
93
|
-
self.graph = graph_factory()
|
|
94
|
-
self.graph_factory = graph_factory
|
|
95
|
-
self.parsing_mode = parsing_mode
|
|
190
|
+
super().__init__(options=options)
|
|
96
191
|
|
|
97
192
|
@override
|
|
98
|
-
def triple(self, terms: Iterable[Any]) ->
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
@override
|
|
102
|
-
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
103
|
-
self.graph.bind(name, self.iri(iri))
|
|
104
|
-
|
|
105
|
-
def frame(self) -> Graph:
|
|
106
|
-
"""
|
|
107
|
-
Finalize one frame in triples stream.
|
|
108
|
-
|
|
109
|
-
Returns:
|
|
110
|
-
Graph: frame content as a separate Graph
|
|
111
|
-
and starts a new Graph
|
|
112
|
-
|
|
113
|
-
"""
|
|
114
|
-
this_graph = self.graph
|
|
115
|
-
self.graph = self.graph_factory()
|
|
116
|
-
return this_graph
|
|
193
|
+
def triple(self, terms: Iterable[Any]) -> Triple:
|
|
194
|
+
return Triple(*terms)
|
|
117
195
|
|
|
118
196
|
|
|
119
197
|
class RDFLibQuadsBaseAdapter(RDFLibAdapter):
|
|
120
|
-
def __init__(
|
|
121
|
-
|
|
122
|
-
options: ParserOptions,
|
|
123
|
-
dataset_factory: Callable[[], Dataset],
|
|
124
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
125
|
-
) -> None:
|
|
126
|
-
super().__init__(options=options, parsing_mode=parsing_mode)
|
|
127
|
-
self.dataset = dataset_factory()
|
|
128
|
-
self.dataset_factory = dataset_factory
|
|
129
|
-
|
|
130
|
-
@override
|
|
131
|
-
def frame(self) -> Dataset:
|
|
132
|
-
current_dataset = self.dataset
|
|
133
|
-
self.dataset = self.dataset_factory()
|
|
134
|
-
return current_dataset
|
|
198
|
+
def __init__(self, options: ParserOptions) -> None:
|
|
199
|
+
super().__init__(options=options)
|
|
135
200
|
|
|
136
201
|
|
|
137
202
|
class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
|
|
138
203
|
"""
|
|
139
204
|
Extended RDFLib adapter for the QUADS physical type.
|
|
140
205
|
|
|
141
|
-
Notes:
|
|
142
|
-
Adds triples and namespaces directly to
|
|
143
|
-
dataset, so RDFLib handles the rest.
|
|
144
|
-
|
|
145
206
|
Args:
|
|
146
|
-
RDFLibQuadsBaseAdapter (
|
|
207
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base quads adapter
|
|
147
208
|
(shared with graphs physical type)
|
|
148
209
|
|
|
149
210
|
"""
|
|
150
211
|
|
|
151
212
|
@override
|
|
152
|
-
def
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
@override
|
|
156
|
-
def quad(self, terms: Iterable[Any]) -> Any:
|
|
157
|
-
self.dataset.add(tuple(terms))
|
|
213
|
+
def quad(self, terms: Iterable[Any]) -> Quad:
|
|
214
|
+
return Quad(*terms)
|
|
158
215
|
|
|
159
216
|
|
|
160
217
|
class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
161
218
|
"""
|
|
162
219
|
Extension of RDFLibQuadsBaseAdapter for the GRAPHS physical type.
|
|
163
220
|
|
|
164
|
-
Notes: introduces graph start/end, checks if graph exists
|
|
165
|
-
dataset store management.
|
|
221
|
+
Notes: introduces graph start/end, checks if graph exists.
|
|
166
222
|
|
|
167
223
|
Args:
|
|
168
|
-
RDFLibQuadsBaseAdapter (
|
|
224
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base adapter for quads management.
|
|
169
225
|
|
|
170
226
|
Raises:
|
|
171
227
|
JellyConformanceError: if no graph_start was encountered
|
|
@@ -177,14 +233,8 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
|
177
233
|
def __init__(
|
|
178
234
|
self,
|
|
179
235
|
options: ParserOptions,
|
|
180
|
-
dataset_factory: Callable[[], Dataset],
|
|
181
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
182
236
|
) -> None:
|
|
183
|
-
super().__init__(
|
|
184
|
-
options=options,
|
|
185
|
-
dataset_factory=dataset_factory,
|
|
186
|
-
parsing_mode=parsing_mode,
|
|
187
|
-
)
|
|
237
|
+
super().__init__(options=options)
|
|
188
238
|
self._graph_id = None
|
|
189
239
|
|
|
190
240
|
@property
|
|
@@ -198,12 +248,8 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
|
198
248
|
self._graph_id = graph_id
|
|
199
249
|
|
|
200
250
|
@override
|
|
201
|
-
def
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
@override
|
|
205
|
-
def triple(self, terms: Iterable[Any]) -> None:
|
|
206
|
-
self.dataset.add((*terms, self._graph_id))
|
|
251
|
+
def triple(self, terms: Iterable[Any]) -> Quad:
|
|
252
|
+
return Quad(*chain(terms, [self._graph_id]))
|
|
207
253
|
|
|
208
254
|
@override
|
|
209
255
|
def graph_end(self) -> None:
|
|
@@ -213,54 +259,42 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
|
213
259
|
def parse_triples_stream(
|
|
214
260
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
215
261
|
options: ParserOptions,
|
|
216
|
-
|
|
217
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
218
|
-
) -> Generator[Graph]:
|
|
262
|
+
) -> Generator[Iterable[Triple | Prefix]]:
|
|
219
263
|
"""
|
|
220
264
|
Parse flat triple stream.
|
|
221
265
|
|
|
222
266
|
Args:
|
|
223
267
|
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
224
268
|
options (ParserOptions): stream options
|
|
225
|
-
graph_factory (Callable): Lambda to construct a graph
|
|
226
|
-
parsing_mode (ParsingMode): specifies whether this is
|
|
227
|
-
a flat or grouped parsing.
|
|
228
269
|
|
|
229
270
|
Yields:
|
|
230
|
-
Generator[
|
|
271
|
+
Generator[Iterable[Triple | Prefix]]:
|
|
272
|
+
Generator of iterables of Triple or Prefix objects,
|
|
273
|
+
one iterable per frame.
|
|
231
274
|
|
|
232
275
|
"""
|
|
233
|
-
adapter = RDFLibTriplesAdapter(
|
|
234
|
-
options, graph_factory=graph_factory, parsing_mode=parsing_mode
|
|
235
|
-
)
|
|
276
|
+
adapter = RDFLibTriplesAdapter(options)
|
|
236
277
|
decoder = Decoder(adapter=adapter)
|
|
237
278
|
for frame in frames:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
yield g
|
|
241
|
-
|
|
242
|
-
if parsing_mode is ParsingMode.FLAT:
|
|
243
|
-
yield adapter.graph
|
|
279
|
+
yield decoder.iter_rows(frame)
|
|
280
|
+
return
|
|
244
281
|
|
|
245
282
|
|
|
246
283
|
def parse_quads_stream(
|
|
247
284
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
248
285
|
options: ParserOptions,
|
|
249
|
-
|
|
250
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
251
|
-
) -> Generator[Dataset]:
|
|
286
|
+
) -> Generator[Iterable[Quad | Prefix]]:
|
|
252
287
|
"""
|
|
253
288
|
Parse flat quads stream.
|
|
254
289
|
|
|
255
290
|
Args:
|
|
256
291
|
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
257
292
|
options (ParserOptions): stream options
|
|
258
|
-
dataset_factory (Callable): Lambda to construct a dataset
|
|
259
|
-
parsing_mode (ParsingMode): specifies whether this is
|
|
260
|
-
a flat or grouped parsing.
|
|
261
293
|
|
|
262
294
|
Yields:
|
|
263
|
-
Generator[
|
|
295
|
+
Generator[Iterable[Quad | Prefix]]:
|
|
296
|
+
Generator of iterables of Quad or Prefix objects,
|
|
297
|
+
one iterable per frame.
|
|
264
298
|
|
|
265
299
|
"""
|
|
266
300
|
adapter_class: type[RDFLibQuadsBaseAdapter]
|
|
@@ -268,65 +302,71 @@ def parse_quads_stream(
|
|
|
268
302
|
adapter_class = RDFLibQuadsAdapter
|
|
269
303
|
else:
|
|
270
304
|
adapter_class = RDFLibGraphsAdapter
|
|
271
|
-
adapter = adapter_class(
|
|
272
|
-
options=options,
|
|
273
|
-
dataset_factory=dataset_factory,
|
|
274
|
-
parsing_mode=parsing_mode,
|
|
275
|
-
)
|
|
305
|
+
adapter = adapter_class(options=options)
|
|
276
306
|
decoder = Decoder(adapter=adapter)
|
|
277
307
|
for frame in frames:
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
yield ds
|
|
281
|
-
|
|
282
|
-
if parsing_mode is ParsingMode.FLAT:
|
|
283
|
-
yield adapter.dataset
|
|
308
|
+
yield decoder.iter_rows(frame)
|
|
309
|
+
return
|
|
284
310
|
|
|
285
311
|
|
|
286
312
|
def parse_jelly_grouped(
|
|
287
313
|
inp: IO[bytes],
|
|
288
|
-
graph_factory: Callable[[], Graph],
|
|
289
|
-
dataset_factory: Callable[[], Dataset],
|
|
290
|
-
) -> Generator[
|
|
314
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
315
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
316
|
+
) -> Generator[Graph] | Generator[Dataset]:
|
|
291
317
|
"""
|
|
292
|
-
Take jelly file and return generators based on the detected
|
|
318
|
+
Take jelly file and return generators based on the detected physical type.
|
|
293
319
|
|
|
294
320
|
Yields one graph/dataset per frame.
|
|
295
321
|
|
|
296
322
|
Args:
|
|
297
323
|
inp (IO[bytes]): input jelly buffered binary stream
|
|
298
|
-
graph_factory (Callable): lambda to construct a Graph
|
|
299
|
-
|
|
324
|
+
graph_factory (Callable): lambda to construct a Graph.
|
|
325
|
+
By default creates an empty in-memory Graph,
|
|
326
|
+
but you can pass something else here.
|
|
327
|
+
dataset_factory (Callable): lambda to construct a Dataset.
|
|
328
|
+
By default creates an empty in-memory Dataset,
|
|
329
|
+
but you can pass something else here.
|
|
300
330
|
|
|
301
331
|
Raises:
|
|
302
|
-
NotImplementedError: is raised if a
|
|
332
|
+
NotImplementedError: is raised if a physical type is not implemented
|
|
303
333
|
|
|
304
334
|
Yields:
|
|
305
|
-
Generator[
|
|
335
|
+
Generator[Graph] | Generator[Dataset]:
|
|
306
336
|
returns generators for graphs/datasets based on the type of input
|
|
307
337
|
|
|
308
338
|
"""
|
|
309
339
|
options, frames = get_options_and_frames(inp)
|
|
310
|
-
|
|
311
340
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
312
|
-
|
|
341
|
+
for graph in parse_triples_stream(
|
|
313
342
|
frames=frames,
|
|
314
343
|
options=options,
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
344
|
+
):
|
|
345
|
+
sink = graph_factory()
|
|
346
|
+
for graph_item in graph:
|
|
347
|
+
if isinstance(graph_item, Prefix):
|
|
348
|
+
sink.bind(graph_item.prefix, graph_item.iri)
|
|
349
|
+
else:
|
|
350
|
+
sink.add(graph_item)
|
|
351
|
+
yield sink
|
|
318
352
|
return
|
|
319
|
-
|
|
320
|
-
if options.stream_types.physical_type in (
|
|
353
|
+
elif options.stream_types.physical_type in (
|
|
321
354
|
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
322
355
|
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
323
356
|
):
|
|
324
|
-
|
|
357
|
+
for dataset in parse_quads_stream(
|
|
325
358
|
frames=frames,
|
|
326
359
|
options=options,
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
360
|
+
):
|
|
361
|
+
sink = dataset_factory()
|
|
362
|
+
for item in dataset:
|
|
363
|
+
if isinstance(item, Prefix):
|
|
364
|
+
sink.bind(item.prefix, item.iri)
|
|
365
|
+
else:
|
|
366
|
+
s, p, o, graph_name = item
|
|
367
|
+
context = sink.get_context(graph_name)
|
|
368
|
+
sink.add((s, p, o, context))
|
|
369
|
+
yield sink
|
|
330
370
|
return
|
|
331
371
|
|
|
332
372
|
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
@@ -336,50 +376,89 @@ def parse_jelly_grouped(
|
|
|
336
376
|
raise NotImplementedError(msg)
|
|
337
377
|
|
|
338
378
|
|
|
379
|
+
def parse_jelly_to_graph(
|
|
380
|
+
inp: IO[bytes],
|
|
381
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
382
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
383
|
+
) -> Graph | Dataset:
|
|
384
|
+
"""
|
|
385
|
+
Add statements from Generator to provided Graph/Dataset.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
inp (IO[bytes]): input jelly stream.
|
|
389
|
+
graph_factory (Callable[[], Graph]): factory to create Graph.
|
|
390
|
+
By default creates an empty in-memory Graph,
|
|
391
|
+
but you can pass something else here.
|
|
392
|
+
dataset_factory (Callable[[], Dataset]): factory to create Dataset.
|
|
393
|
+
By default creates an empty in-memory Dataset,
|
|
394
|
+
but you can pass something else here.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dataset | Graph: Dataset or Graph with statements.
|
|
398
|
+
|
|
399
|
+
"""
|
|
400
|
+
options, frames = get_options_and_frames(inp)
|
|
401
|
+
|
|
402
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
403
|
+
sink = graph_factory()
|
|
404
|
+
if options.stream_types.physical_type in (
|
|
405
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
406
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
407
|
+
):
|
|
408
|
+
quad_sink = dataset_factory()
|
|
409
|
+
sink = quad_sink
|
|
410
|
+
|
|
411
|
+
for item in parse_jelly_flat(inp=inp, frames=frames, options=options):
|
|
412
|
+
if isinstance(item, Prefix):
|
|
413
|
+
sink.bind(item.prefix, item.iri)
|
|
414
|
+
if isinstance(item, Triple):
|
|
415
|
+
sink.add(item)
|
|
416
|
+
if isinstance(item, Quad):
|
|
417
|
+
s, p, o, graph_name = item
|
|
418
|
+
context = quad_sink.get_context(graph_name)
|
|
419
|
+
quad_sink.add((s, p, o, context))
|
|
420
|
+
return sink
|
|
421
|
+
|
|
422
|
+
|
|
339
423
|
def parse_jelly_flat(
|
|
340
424
|
inp: IO[bytes],
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
) ->
|
|
425
|
+
frames: Iterable[jelly.RdfStreamFrame] | None = None,
|
|
426
|
+
options: ParserOptions | None = None,
|
|
427
|
+
) -> Generator[Statement | Prefix]:
|
|
344
428
|
"""
|
|
345
|
-
Parse jelly file with FLAT
|
|
429
|
+
Parse jelly file with FLAT logical type into a Generator of stream events.
|
|
346
430
|
|
|
347
431
|
Args:
|
|
348
|
-
inp (IO[bytes]): input jelly buffered binary stream
|
|
349
|
-
|
|
350
|
-
|
|
432
|
+
inp (IO[bytes]): input jelly buffered binary stream.
|
|
433
|
+
frames (Iterable[jelly.RdfStreamFrame | None):
|
|
434
|
+
jelly frames if read before.
|
|
435
|
+
options (ParserOptions | None): stream options
|
|
436
|
+
if read before.
|
|
351
437
|
|
|
352
438
|
Raises:
|
|
353
439
|
NotImplementedError: if physical type is not supported
|
|
354
440
|
|
|
355
|
-
|
|
356
|
-
|
|
441
|
+
Yields:
|
|
442
|
+
Generator[Statement | Prefix]: Generator of stream events
|
|
357
443
|
|
|
358
444
|
"""
|
|
359
|
-
|
|
445
|
+
if not frames or not options:
|
|
446
|
+
options, frames = get_options_and_frames(inp)
|
|
360
447
|
|
|
361
448
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
options=options,
|
|
366
|
-
graph_factory=graph_factory,
|
|
367
|
-
parsing_mode=ParsingMode.FLAT,
|
|
368
|
-
)
|
|
369
|
-
)
|
|
370
|
-
|
|
449
|
+
for triples in parse_triples_stream(frames=frames, options=options):
|
|
450
|
+
yield from triples
|
|
451
|
+
return
|
|
371
452
|
if options.stream_types.physical_type in (
|
|
372
453
|
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
373
454
|
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
374
455
|
):
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
)
|
|
382
|
-
)
|
|
456
|
+
for quads in parse_quads_stream(
|
|
457
|
+
frames=frames,
|
|
458
|
+
options=options,
|
|
459
|
+
):
|
|
460
|
+
yield from quads
|
|
461
|
+
return
|
|
383
462
|
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
384
463
|
options.stream_types.physical_type
|
|
385
464
|
)
|
|
@@ -404,7 +483,7 @@ class RDFLibJellyParser(RDFLibParser):
|
|
|
404
483
|
if inp is None:
|
|
405
484
|
msg = "expected source to be a stream of bytes"
|
|
406
485
|
raise TypeError(msg)
|
|
407
|
-
|
|
486
|
+
parse_jelly_to_graph(
|
|
408
487
|
inp,
|
|
409
488
|
graph_factory=lambda: Graph(store=sink.store, identifier=sink.identifier),
|
|
410
489
|
dataset_factory=lambda: Dataset(store=sink.store),
|