pyjelly 0.2.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjelly might be problematic. Click here for more details.
- pyjelly/integrations/rdflib/parse.py +364 -166
- pyjelly/integrations/rdflib/serialize.py +168 -43
- pyjelly/jelly/rdf_pb2.py +3 -3
- pyjelly/jelly/rdf_pb2.pyi +2 -1
- pyjelly/parse/decode.py +166 -5
- pyjelly/parse/ioutils.py +26 -4
- pyjelly/serialize/encode.py +117 -0
- pyjelly/serialize/flows.py +70 -13
- pyjelly/serialize/streams.py +87 -3
- {pyjelly-0.2.3.dist-info → pyjelly-0.4.0.dist-info}/METADATA +8 -8
- {pyjelly-0.2.3.dist-info → pyjelly-0.4.0.dist-info}/RECORD +14 -14
- {pyjelly-0.2.3.dist-info → pyjelly-0.4.0.dist-info}/WHEEL +0 -0
- {pyjelly-0.2.3.dist-info → pyjelly-0.4.0.dist-info}/entry_points.txt +0 -0
- {pyjelly-0.2.3.dist-info → pyjelly-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections.abc import Generator, Iterable
|
|
4
|
-
from
|
|
5
|
-
from
|
|
4
|
+
from itertools import chain
|
|
5
|
+
from typing import IO, Any, Callable, Union
|
|
6
|
+
from typing_extensions import Never, Self, override
|
|
6
7
|
|
|
7
8
|
import rdflib
|
|
9
|
+
from rdflib import Node
|
|
8
10
|
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
|
|
9
11
|
from rdflib.parser import InputSource
|
|
10
12
|
from rdflib.parser import Parser as RDFLibParser
|
|
11
|
-
from rdflib.store import Store
|
|
12
13
|
|
|
13
14
|
from pyjelly import jelly
|
|
14
15
|
from pyjelly.errors import JellyConformanceError
|
|
@@ -16,8 +17,113 @@ from pyjelly.options import StreamTypes
|
|
|
16
17
|
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
17
18
|
from pyjelly.parse.ioutils import get_options_and_frames
|
|
18
19
|
|
|
20
|
+
GraphName = Union[rdflib.URIRef, rdflib.BNode, str]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Triple(tuple[Node, Node, Node]):
|
|
24
|
+
"""
|
|
25
|
+
Describe RDFLib triple.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
tuple (Node, Node, Node): s/p/o tuple of RDFLib Nodes.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Triple: triple as tuple.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
__slots__ = ()
|
|
36
|
+
|
|
37
|
+
def __new__(cls, s: Node, p: Node, o: Node) -> Self:
|
|
38
|
+
return tuple.__new__(cls, (s, p, o))
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def s(self) -> Node:
|
|
42
|
+
return self[0]
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def p(self) -> Node:
|
|
46
|
+
return self[1]
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def o(self) -> Node:
|
|
50
|
+
return self[2]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Quad(tuple[Node, Node, Node, GraphName]):
|
|
54
|
+
"""
|
|
55
|
+
Describe RDFLib quad.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
tuple (Node, Node, Node, GraphName):
|
|
59
|
+
s/p/o/g as a tuple of RDFLib nodes and a GraphName,
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Quad: quad as tuple.
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
__slots__ = ()
|
|
67
|
+
|
|
68
|
+
def __new__(cls, s: Node, p: Node, o: Node, g: GraphName) -> Self:
|
|
69
|
+
return tuple.__new__(cls, (s, p, o, g))
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def s(self) -> Node:
|
|
73
|
+
return self[0]
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def p(self) -> Node:
|
|
77
|
+
return self[1]
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def o(self) -> Node:
|
|
81
|
+
return self[2]
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def g(self) -> GraphName:
|
|
85
|
+
return self[3]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
Statement = Union[Triple, Quad]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class Prefix(tuple[str, rdflib.URIRef]):
|
|
92
|
+
"""
|
|
93
|
+
Describe RDF Prefix(i.e, namespace declaration).
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
tuple (str, rdflib.URIRef): expects prefix as a string,
|
|
97
|
+
and full namespace URI as Rdflib.URIRef.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Prefix: prefix as tuple(prefix, iri).
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
__slots__ = ()
|
|
105
|
+
|
|
106
|
+
def __new__(cls, prefix: str, iri: rdflib.URIRef) -> Self:
|
|
107
|
+
return tuple.__new__(cls, (prefix, iri))
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def prefix(self) -> str:
|
|
111
|
+
return self[0]
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def iri(self) -> rdflib.URIRef:
|
|
115
|
+
return self[1]
|
|
116
|
+
|
|
19
117
|
|
|
20
118
|
class RDFLibAdapter(Adapter):
|
|
119
|
+
"""
|
|
120
|
+
RDFLib adapter class, is extended by triples and quads implementations.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
Adapter (): abstract adapter class
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
|
|
21
127
|
@override
|
|
22
128
|
def iri(self, iri: str) -> rdflib.URIRef:
|
|
23
129
|
return rdflib.URIRef(iri)
|
|
@@ -39,8 +145,28 @@ class RDFLibAdapter(Adapter):
|
|
|
39
145
|
) -> rdflib.Literal:
|
|
40
146
|
return rdflib.Literal(lex, lang=language, datatype=datatype)
|
|
41
147
|
|
|
148
|
+
@override
|
|
149
|
+
def namespace_declaration(self, name: str, iri: str) -> Prefix:
|
|
150
|
+
return Prefix(name, self.iri(iri))
|
|
151
|
+
|
|
42
152
|
|
|
43
153
|
def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
154
|
+
"""
|
|
155
|
+
Raise error if functionality is missing in adapter.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
feature (str): function which is not implemented
|
|
159
|
+
stream_types (StreamTypes): what combination of physical/logical types
|
|
160
|
+
triggered the error
|
|
161
|
+
|
|
162
|
+
Raises:
|
|
163
|
+
NotImplementedError: raises error with message with missing functionality
|
|
164
|
+
and types encountered
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Never: only raises errors
|
|
168
|
+
|
|
169
|
+
"""
|
|
44
170
|
physical_type_name = jelly.PhysicalStreamType.Name(stream_types.physical_type)
|
|
45
171
|
logical_type_name = jelly.LogicalStreamType.Name(stream_types.logical_type)
|
|
46
172
|
msg = (
|
|
@@ -51,242 +177,314 @@ def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
|
51
177
|
|
|
52
178
|
|
|
53
179
|
class RDFLibTriplesAdapter(RDFLibAdapter):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def __init__(self, options: ParserOptions, store: Store | str = "default") -> None:
|
|
57
|
-
super().__init__(options=options)
|
|
58
|
-
self.graph = Graph(store=store)
|
|
180
|
+
"""
|
|
181
|
+
Triples adapter RDFLib implementation.
|
|
59
182
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
self.graph.add(terms) # type: ignore[arg-type]
|
|
183
|
+
Notes: returns triple/namespace declaration as soon as receives them.
|
|
184
|
+
"""
|
|
63
185
|
|
|
64
|
-
@override
|
|
65
|
-
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
66
|
-
self.graph.bind(name, self.iri(iri))
|
|
67
|
-
|
|
68
|
-
def frame(self) -> Graph | None:
|
|
69
|
-
if self.options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_GRAPHS:
|
|
70
|
-
this_graph = self.graph
|
|
71
|
-
self.graph = Graph(store=self.graph.store)
|
|
72
|
-
return this_graph
|
|
73
|
-
if self.options.stream_types.logical_type in (
|
|
74
|
-
jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED,
|
|
75
|
-
jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES,
|
|
76
|
-
):
|
|
77
|
-
return None
|
|
78
|
-
return _adapter_missing(
|
|
79
|
-
"interpreting frames",
|
|
80
|
-
stream_types=self.options.stream_types,
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class RDFLibQuadsBaseAdapter(RDFLibAdapter):
|
|
85
186
|
def __init__(
|
|
86
187
|
self,
|
|
87
188
|
options: ParserOptions,
|
|
88
|
-
store: Store | str,
|
|
89
189
|
) -> None:
|
|
90
190
|
super().__init__(options=options)
|
|
91
|
-
self.store = store
|
|
92
|
-
self.dataset = self.new_dataset()
|
|
93
|
-
|
|
94
|
-
def new_dataset(self) -> Dataset:
|
|
95
|
-
return Dataset(store=self.store, default_union=True)
|
|
96
191
|
|
|
97
192
|
@override
|
|
98
|
-
def
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS,
|
|
106
|
-
):
|
|
107
|
-
return None
|
|
108
|
-
return _adapter_missing(
|
|
109
|
-
"interpreting frames", stream_types=self.options.stream_types
|
|
110
|
-
)
|
|
193
|
+
def triple(self, terms: Iterable[Any]) -> Triple:
|
|
194
|
+
return Triple(*terms)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class RDFLibQuadsBaseAdapter(RDFLibAdapter):
|
|
198
|
+
def __init__(self, options: ParserOptions) -> None:
|
|
199
|
+
super().__init__(options=options)
|
|
111
200
|
|
|
112
201
|
|
|
113
202
|
class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
203
|
+
"""
|
|
204
|
+
Extended RDFLib adapter for the QUADS physical type.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base quads adapter
|
|
208
|
+
(shared with graphs physical type)
|
|
209
|
+
|
|
210
|
+
"""
|
|
117
211
|
|
|
118
212
|
@override
|
|
119
|
-
def quad(self, terms: Iterable[Any]) ->
|
|
120
|
-
|
|
213
|
+
def quad(self, terms: Iterable[Any]) -> Quad:
|
|
214
|
+
return Quad(*terms)
|
|
121
215
|
|
|
122
216
|
|
|
123
217
|
class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
124
|
-
|
|
218
|
+
"""
|
|
219
|
+
Extension of RDFLibQuadsBaseAdapter for the GRAPHS physical type.
|
|
220
|
+
|
|
221
|
+
Notes: introduces graph start/end, checks if graph exists.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base adapter for quads management.
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
JellyConformanceError: if no graph_start was encountered
|
|
228
|
+
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
_graph_id: str | None
|
|
125
232
|
|
|
126
233
|
def __init__(
|
|
127
234
|
self,
|
|
128
235
|
options: ParserOptions,
|
|
129
|
-
store: Store | str,
|
|
130
236
|
) -> None:
|
|
131
|
-
super().__init__(options=options
|
|
132
|
-
self.
|
|
237
|
+
super().__init__(options=options)
|
|
238
|
+
self._graph_id = None
|
|
133
239
|
|
|
134
240
|
@property
|
|
135
|
-
def graph(self) ->
|
|
136
|
-
if self.
|
|
241
|
+
def graph(self) -> None:
|
|
242
|
+
if self._graph_id is None:
|
|
137
243
|
msg = "new graph was not started"
|
|
138
244
|
raise JellyConformanceError(msg)
|
|
139
|
-
return self._graph
|
|
140
245
|
|
|
141
246
|
@override
|
|
142
247
|
def graph_start(self, graph_id: str) -> None:
|
|
143
|
-
self.
|
|
248
|
+
self._graph_id = graph_id
|
|
144
249
|
|
|
145
250
|
@override
|
|
146
|
-
def
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
@override
|
|
150
|
-
def triple(self, terms: Iterable[Any]) -> None:
|
|
151
|
-
self.graph.add(terms) # type: ignore[arg-type]
|
|
251
|
+
def triple(self, terms: Iterable[Any]) -> Quad:
|
|
252
|
+
return Quad(*chain(terms, [self._graph_id]))
|
|
152
253
|
|
|
153
254
|
@override
|
|
154
255
|
def graph_end(self) -> None:
|
|
155
|
-
self.
|
|
156
|
-
self._graph = None
|
|
256
|
+
self._graph_id = None
|
|
157
257
|
|
|
158
|
-
def frame(self) -> Dataset | None:
|
|
159
|
-
if self.options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_DATASETS:
|
|
160
|
-
this_dataset = self.dataset
|
|
161
|
-
self._graph = None
|
|
162
|
-
self.dataset = self.new_dataset()
|
|
163
|
-
return this_dataset
|
|
164
|
-
return super().frame()
|
|
165
258
|
|
|
166
|
-
|
|
167
|
-
def parse_flat_triples_stream(
|
|
259
|
+
def parse_triples_stream(
|
|
168
260
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
169
261
|
options: ParserOptions,
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
262
|
+
) -> Generator[Iterable[Triple | Prefix]]:
|
|
263
|
+
"""
|
|
264
|
+
Parse flat triple stream.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
268
|
+
options (ParserOptions): stream options
|
|
269
|
+
|
|
270
|
+
Yields:
|
|
271
|
+
Generator[Iterable[Triple | Prefix]]:
|
|
272
|
+
Generator of iterables of Triple or Prefix objects,
|
|
273
|
+
one iterable per frame.
|
|
274
|
+
|
|
275
|
+
"""
|
|
276
|
+
adapter = RDFLibTriplesAdapter(options)
|
|
177
277
|
decoder = Decoder(adapter=adapter)
|
|
178
278
|
for frame in frames:
|
|
179
|
-
decoder.
|
|
180
|
-
return
|
|
279
|
+
yield decoder.iter_rows(frame)
|
|
280
|
+
return
|
|
181
281
|
|
|
182
282
|
|
|
183
|
-
def
|
|
283
|
+
def parse_quads_stream(
|
|
184
284
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
185
285
|
options: ParserOptions,
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
286
|
+
) -> Generator[Iterable[Quad | Prefix]]:
|
|
287
|
+
"""
|
|
288
|
+
Parse flat quads stream.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
292
|
+
options (ParserOptions): stream options
|
|
293
|
+
|
|
294
|
+
Yields:
|
|
295
|
+
Generator[Iterable[Quad | Prefix]]:
|
|
296
|
+
Generator of iterables of Quad or Prefix objects,
|
|
297
|
+
one iterable per frame.
|
|
298
|
+
|
|
299
|
+
"""
|
|
190
300
|
adapter_class: type[RDFLibQuadsBaseAdapter]
|
|
191
301
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
|
|
192
302
|
adapter_class = RDFLibQuadsAdapter
|
|
193
|
-
else:
|
|
303
|
+
else:
|
|
194
304
|
adapter_class = RDFLibGraphsAdapter
|
|
195
|
-
adapter = adapter_class(options=options
|
|
196
|
-
adapter.dataset.default_context = Graph(identifier=identifier, store=store)
|
|
197
|
-
decoder = Decoder(adapter=adapter)
|
|
198
|
-
for frame in frames:
|
|
199
|
-
decoder.decode_frame(frame=frame)
|
|
200
|
-
return adapter.dataset
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def parse_graph_stream(
|
|
204
|
-
frames: Iterable[jelly.RdfStreamFrame],
|
|
205
|
-
options: ParserOptions,
|
|
206
|
-
store: Store | str = "default",
|
|
207
|
-
) -> Generator[Graph]:
|
|
208
|
-
assert options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_GRAPHS
|
|
209
|
-
adapter = RDFLibTriplesAdapter(options, store=store)
|
|
305
|
+
adapter = adapter_class(options=options)
|
|
210
306
|
decoder = Decoder(adapter=adapter)
|
|
211
307
|
for frame in frames:
|
|
212
|
-
yield decoder.
|
|
308
|
+
yield decoder.iter_rows(frame)
|
|
309
|
+
return
|
|
213
310
|
|
|
214
311
|
|
|
215
|
-
def
|
|
312
|
+
def parse_jelly_grouped(
|
|
216
313
|
inp: IO[bytes],
|
|
217
|
-
|
|
218
|
-
|
|
314
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
315
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
316
|
+
) -> Generator[Graph] | Generator[Dataset]:
|
|
317
|
+
"""
|
|
318
|
+
Take jelly file and return generators based on the detected logical type.
|
|
319
|
+
|
|
320
|
+
Yields one graph/dataset per frame.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
inp (IO[bytes]): input jelly buffered binary stream
|
|
324
|
+
graph_factory (Callable): lambda to construct a Graph.
|
|
325
|
+
By default creates an empty in-memory Graph,
|
|
326
|
+
but you can pass something else here.
|
|
327
|
+
dataset_factory (Callable): lambda to construct a Dataset.
|
|
328
|
+
By default creates an empty in-memory Dataset,
|
|
329
|
+
but you can pass something else here.
|
|
330
|
+
|
|
331
|
+
Raises:
|
|
332
|
+
NotImplementedError: is raised if a logical type is not implemented
|
|
333
|
+
|
|
334
|
+
Yields:
|
|
335
|
+
Generator[Graph] | Generator[Dataset]:
|
|
336
|
+
returns generators for graphs/datasets based on the type of input
|
|
337
|
+
|
|
338
|
+
"""
|
|
219
339
|
options, frames = get_options_and_frames(inp)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
340
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
341
|
+
for graph in parse_triples_stream(
|
|
342
|
+
frames=frames,
|
|
343
|
+
options=options,
|
|
344
|
+
):
|
|
345
|
+
sink = graph_factory()
|
|
346
|
+
for graph_item in graph:
|
|
347
|
+
if isinstance(graph_item, Prefix):
|
|
348
|
+
sink.bind(graph_item.prefix, graph_item.iri)
|
|
349
|
+
else:
|
|
350
|
+
sink.add(graph_item)
|
|
351
|
+
yield sink
|
|
227
352
|
return
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
353
|
+
elif options.stream_types.physical_type in (
|
|
354
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
355
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
356
|
+
):
|
|
357
|
+
for dataset in parse_quads_stream(
|
|
358
|
+
frames=frames,
|
|
359
|
+
options=options,
|
|
360
|
+
):
|
|
361
|
+
sink = dataset_factory()
|
|
362
|
+
for item in dataset:
|
|
363
|
+
if isinstance(item, Prefix):
|
|
364
|
+
sink.bind(item.prefix, item.iri)
|
|
365
|
+
else:
|
|
366
|
+
s, p, o, graph_name = item
|
|
367
|
+
context = sink.get_context(graph_name)
|
|
368
|
+
sink.add((s, p, o, context))
|
|
369
|
+
yield sink
|
|
231
370
|
return
|
|
232
371
|
|
|
233
|
-
|
|
234
|
-
|
|
372
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
373
|
+
options.stream_types.physical_type
|
|
374
|
+
)
|
|
375
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
235
376
|
raise NotImplementedError(msg)
|
|
236
377
|
|
|
237
378
|
|
|
238
|
-
def
|
|
379
|
+
def parse_jelly_to_graph(
|
|
239
380
|
inp: IO[bytes],
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
) ->
|
|
381
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
382
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
383
|
+
) -> Graph | Dataset:
|
|
384
|
+
"""
|
|
385
|
+
Add statements from Generator to provided Graph/Dataset.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
inp (IO[bytes]): input jelly stream.
|
|
389
|
+
graph_factory (Callable[[], Graph]): factory to create Graph.
|
|
390
|
+
By default creates an empty in-memory Graph,
|
|
391
|
+
but you can pass something else here.
|
|
392
|
+
dataset_factory (Callable[[], Dataset]): factory to create Dataset.
|
|
393
|
+
By default creates an empty in-memory Dataset,
|
|
394
|
+
but you can pass something else here.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dataset | Graph: Dataset or Graph with statements.
|
|
398
|
+
|
|
399
|
+
"""
|
|
243
400
|
options, frames = get_options_and_frames(inp)
|
|
244
401
|
|
|
245
|
-
if options.stream_types.
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
402
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
403
|
+
sink = graph_factory()
|
|
404
|
+
if options.stream_types.physical_type in (
|
|
405
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
406
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
407
|
+
):
|
|
408
|
+
quad_sink = dataset_factory()
|
|
409
|
+
sink = quad_sink
|
|
410
|
+
|
|
411
|
+
for item in parse_jelly_flat(inp=inp, frames=frames, options=options):
|
|
412
|
+
if isinstance(item, Prefix):
|
|
413
|
+
sink.bind(item.prefix, item.iri)
|
|
414
|
+
if isinstance(item, Triple):
|
|
415
|
+
sink.add(item)
|
|
416
|
+
if isinstance(item, Quad):
|
|
417
|
+
s, p, o, graph_name = item
|
|
418
|
+
context = quad_sink.get_context(graph_name)
|
|
419
|
+
quad_sink.add((s, p, o, context))
|
|
420
|
+
return sink
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def parse_jelly_flat(
|
|
424
|
+
inp: IO[bytes],
|
|
425
|
+
frames: Iterable[jelly.RdfStreamFrame] | None = None,
|
|
426
|
+
options: ParserOptions | None = None,
|
|
427
|
+
) -> Generator[Statement | Prefix]:
|
|
428
|
+
"""
|
|
429
|
+
Parse jelly file with FLAT physical type into a Generator of stream events.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
inp (IO[bytes]): input jelly buffered binary stream.
|
|
433
|
+
frames (Iterable[jelly.RdfStreamFrame | None):
|
|
434
|
+
jelly frames if read before.
|
|
435
|
+
options (ParserOptions | None): stream options
|
|
436
|
+
if read before.
|
|
437
|
+
|
|
438
|
+
Raises:
|
|
439
|
+
NotImplementedError: if physical type is not supported
|
|
440
|
+
|
|
441
|
+
Yields:
|
|
442
|
+
Generator[Statement | Prefix]: Generator of stream events
|
|
443
|
+
|
|
444
|
+
"""
|
|
445
|
+
if not frames or not options:
|
|
446
|
+
options, frames = get_options_and_frames(inp)
|
|
447
|
+
|
|
448
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
449
|
+
for triples in parse_triples_stream(frames=frames, options=options):
|
|
450
|
+
yield from triples
|
|
451
|
+
return
|
|
452
|
+
if options.stream_types.physical_type in (
|
|
453
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
454
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
455
|
+
):
|
|
456
|
+
for quads in parse_quads_stream(
|
|
262
457
|
frames=frames,
|
|
263
458
|
options=options,
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
for graph in parse_graph_stream(frames=frames, options=options, store=store):
|
|
273
|
-
ds.add_graph(graph)
|
|
274
|
-
|
|
275
|
-
return ds
|
|
276
|
-
|
|
277
|
-
logical_type_name = jelly.LogicalStreamType.Name(options.stream_types.logical_type)
|
|
278
|
-
msg = f"the stream type {logical_type_name} is not supported "
|
|
459
|
+
):
|
|
460
|
+
yield from quads
|
|
461
|
+
return
|
|
462
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
463
|
+
options.stream_types.physical_type
|
|
464
|
+
)
|
|
465
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
279
466
|
raise NotImplementedError(msg)
|
|
280
467
|
|
|
281
468
|
|
|
282
469
|
class RDFLibJellyParser(RDFLibParser):
|
|
283
470
|
def parse(self, source: InputSource, sink: Graph) -> None:
|
|
471
|
+
"""
|
|
472
|
+
Parse jelly file into provided RDFLib Graph.
|
|
473
|
+
|
|
474
|
+
Args:
|
|
475
|
+
source (InputSource): jelly file as buffered binary stream InputSource obj
|
|
476
|
+
sink (Graph): RDFLib Graph
|
|
477
|
+
|
|
478
|
+
Raises:
|
|
479
|
+
TypeError: raises error if invalid input
|
|
480
|
+
|
|
481
|
+
"""
|
|
284
482
|
inp = source.getByteStream() # type: ignore[no-untyped-call]
|
|
285
483
|
if inp is None:
|
|
286
484
|
msg = "expected source to be a stream of bytes"
|
|
287
485
|
raise TypeError(msg)
|
|
288
|
-
|
|
486
|
+
parse_jelly_to_graph(
|
|
289
487
|
inp,
|
|
290
|
-
identifier=sink.identifier,
|
|
291
|
-
store=sink.store,
|
|
488
|
+
graph_factory=lambda: Graph(store=sink.store, identifier=sink.identifier),
|
|
489
|
+
dataset_factory=lambda: Dataset(store=sink.store),
|
|
292
490
|
)
|