pyjelly 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjelly might be problematic. Click here for more details.
- pyjelly/integrations/rdflib/parse.py +261 -142
- pyjelly/integrations/rdflib/serialize.py +68 -0
- pyjelly/parse/decode.py +163 -2
- pyjelly/parse/ioutils.py +16 -0
- pyjelly/serialize/encode.py +117 -0
- pyjelly/serialize/flows.py +48 -1
- pyjelly/serialize/streams.py +82 -1
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/METADATA +1 -1
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/RECORD +12 -12
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/WHEEL +0 -0
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/entry_points.txt +0 -0
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,23 +1,30 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections.abc import Generator, Iterable
|
|
4
|
-
from typing import IO, Any
|
|
4
|
+
from typing import IO, Any, Callable
|
|
5
5
|
from typing_extensions import Never, override
|
|
6
6
|
|
|
7
7
|
import rdflib
|
|
8
8
|
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
|
|
9
9
|
from rdflib.parser import InputSource
|
|
10
10
|
from rdflib.parser import Parser as RDFLibParser
|
|
11
|
-
from rdflib.store import Store
|
|
12
11
|
|
|
13
12
|
from pyjelly import jelly
|
|
14
13
|
from pyjelly.errors import JellyConformanceError
|
|
15
14
|
from pyjelly.options import StreamTypes
|
|
16
|
-
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
15
|
+
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions, ParsingMode
|
|
17
16
|
from pyjelly.parse.ioutils import get_options_and_frames
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
class RDFLibAdapter(Adapter):
|
|
20
|
+
"""
|
|
21
|
+
RDFLib adapter class, is extended by triples and quads implementations.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
Adapter (_type_): abstract adapter class
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
|
|
21
28
|
@override
|
|
22
29
|
def iri(self, iri: str) -> rdflib.URIRef:
|
|
23
30
|
return rdflib.URIRef(iri)
|
|
@@ -41,6 +48,24 @@ class RDFLibAdapter(Adapter):
|
|
|
41
48
|
|
|
42
49
|
|
|
43
50
|
def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
51
|
+
"""
|
|
52
|
+
Raise error if functionality is missing in adapter.
|
|
53
|
+
|
|
54
|
+
TODO: currently not used anywhere due to logical types being removed
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
feature (str): function which is not implemented
|
|
58
|
+
stream_types (StreamTypes): what combination of physical/logical types
|
|
59
|
+
triggered the error
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
NotImplementedError: raises error with message with missing functionality
|
|
63
|
+
and types encountered
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Never: only raises errors
|
|
67
|
+
|
|
68
|
+
"""
|
|
44
69
|
physical_type_name = jelly.PhysicalStreamType.Name(stream_types.physical_type)
|
|
45
70
|
logical_type_name = jelly.LogicalStreamType.Name(stream_types.logical_type)
|
|
46
71
|
msg = (
|
|
@@ -51,242 +76,336 @@ def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
|
51
76
|
|
|
52
77
|
|
|
53
78
|
class RDFLibTriplesAdapter(RDFLibAdapter):
|
|
54
|
-
|
|
79
|
+
"""
|
|
80
|
+
Triples adapter RDFLib implementation.
|
|
81
|
+
|
|
82
|
+
Notes: has internal graph object which tracks
|
|
83
|
+
triples and namespaces and can get flushed between frames.
|
|
84
|
+
"""
|
|
55
85
|
|
|
56
|
-
def __init__(
|
|
57
|
-
|
|
58
|
-
|
|
86
|
+
def __init__(
|
|
87
|
+
self,
|
|
88
|
+
options: ParserOptions,
|
|
89
|
+
graph_factory: Callable[[], Graph],
|
|
90
|
+
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
91
|
+
) -> None:
|
|
92
|
+
super().__init__(options=options, parsing_mode=parsing_mode)
|
|
93
|
+
self.graph = graph_factory()
|
|
94
|
+
self.graph_factory = graph_factory
|
|
95
|
+
self.parsing_mode = parsing_mode
|
|
59
96
|
|
|
60
97
|
@override
|
|
61
98
|
def triple(self, terms: Iterable[Any]) -> Any:
|
|
62
|
-
self.graph.add(terms)
|
|
99
|
+
self.graph.add(tuple(terms))
|
|
63
100
|
|
|
64
101
|
@override
|
|
65
102
|
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
66
103
|
self.graph.bind(name, self.iri(iri))
|
|
67
104
|
|
|
68
|
-
def frame(self) -> Graph
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
stream_types=self.options.stream_types,
|
|
81
|
-
)
|
|
105
|
+
def frame(self) -> Graph:
|
|
106
|
+
"""
|
|
107
|
+
Finalize one frame in triples stream.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Graph: frame content as a separate Graph
|
|
111
|
+
and starts a new Graph
|
|
112
|
+
|
|
113
|
+
"""
|
|
114
|
+
this_graph = self.graph
|
|
115
|
+
self.graph = self.graph_factory()
|
|
116
|
+
return this_graph
|
|
82
117
|
|
|
83
118
|
|
|
84
119
|
class RDFLibQuadsBaseAdapter(RDFLibAdapter):
|
|
85
120
|
def __init__(
|
|
86
121
|
self,
|
|
87
122
|
options: ParserOptions,
|
|
88
|
-
|
|
123
|
+
dataset_factory: Callable[[], Dataset],
|
|
124
|
+
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
89
125
|
) -> None:
|
|
90
|
-
super().__init__(options=options)
|
|
91
|
-
self.
|
|
92
|
-
self.
|
|
93
|
-
|
|
94
|
-
def new_dataset(self) -> Dataset:
|
|
95
|
-
return Dataset(store=self.store, default_union=True)
|
|
126
|
+
super().__init__(options=options, parsing_mode=parsing_mode)
|
|
127
|
+
self.dataset = dataset_factory()
|
|
128
|
+
self.dataset_factory = dataset_factory
|
|
96
129
|
|
|
97
130
|
@override
|
|
98
|
-
def frame(self) -> Dataset
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
return this_dataset
|
|
103
|
-
if self.options.stream_types.logical_type in (
|
|
104
|
-
jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED,
|
|
105
|
-
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS,
|
|
106
|
-
):
|
|
107
|
-
return None
|
|
108
|
-
return _adapter_missing(
|
|
109
|
-
"interpreting frames", stream_types=self.options.stream_types
|
|
110
|
-
)
|
|
131
|
+
def frame(self) -> Dataset:
|
|
132
|
+
current_dataset = self.dataset
|
|
133
|
+
self.dataset = self.dataset_factory()
|
|
134
|
+
return current_dataset
|
|
111
135
|
|
|
112
136
|
|
|
113
137
|
class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
|
|
138
|
+
"""
|
|
139
|
+
Extended RDFLib adapter for the QUADS physical type.
|
|
140
|
+
|
|
141
|
+
Notes:
|
|
142
|
+
Adds triples and namespaces directly to
|
|
143
|
+
dataset, so RDFLib handles the rest.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
RDFLibQuadsBaseAdapter (_type_): base quads adapter
|
|
147
|
+
(shared with graphs physical type)
|
|
148
|
+
|
|
149
|
+
"""
|
|
150
|
+
|
|
114
151
|
@override
|
|
115
152
|
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
116
153
|
self.dataset.bind(name, self.iri(iri))
|
|
117
154
|
|
|
118
155
|
@override
|
|
119
156
|
def quad(self, terms: Iterable[Any]) -> Any:
|
|
120
|
-
self.dataset.add(terms)
|
|
157
|
+
self.dataset.add(tuple(terms))
|
|
121
158
|
|
|
122
159
|
|
|
123
160
|
class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
124
|
-
|
|
161
|
+
"""
|
|
162
|
+
Extension of RDFLibQuadsBaseAdapter for the GRAPHS physical type.
|
|
163
|
+
|
|
164
|
+
Notes: introduces graph start/end, checks if graph exists,
|
|
165
|
+
dataset store management.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
RDFLibQuadsBaseAdapter (_type_): base adapter for quads management.
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
JellyConformanceError: if no graph_start was encountered
|
|
172
|
+
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
_graph_id: str | None
|
|
125
176
|
|
|
126
177
|
def __init__(
|
|
127
178
|
self,
|
|
128
179
|
options: ParserOptions,
|
|
129
|
-
|
|
180
|
+
dataset_factory: Callable[[], Dataset],
|
|
181
|
+
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
130
182
|
) -> None:
|
|
131
|
-
super().__init__(
|
|
132
|
-
|
|
183
|
+
super().__init__(
|
|
184
|
+
options=options,
|
|
185
|
+
dataset_factory=dataset_factory,
|
|
186
|
+
parsing_mode=parsing_mode,
|
|
187
|
+
)
|
|
188
|
+
self._graph_id = None
|
|
133
189
|
|
|
134
190
|
@property
|
|
135
|
-
def graph(self) ->
|
|
136
|
-
if self.
|
|
191
|
+
def graph(self) -> None:
|
|
192
|
+
if self._graph_id is None:
|
|
137
193
|
msg = "new graph was not started"
|
|
138
194
|
raise JellyConformanceError(msg)
|
|
139
|
-
return self._graph
|
|
140
195
|
|
|
141
196
|
@override
|
|
142
197
|
def graph_start(self, graph_id: str) -> None:
|
|
143
|
-
self.
|
|
198
|
+
self._graph_id = graph_id
|
|
144
199
|
|
|
145
200
|
@override
|
|
146
201
|
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
147
|
-
self.
|
|
202
|
+
self.dataset.bind(name, self.iri(iri))
|
|
148
203
|
|
|
149
204
|
@override
|
|
150
205
|
def triple(self, terms: Iterable[Any]) -> None:
|
|
151
|
-
self.
|
|
206
|
+
self.dataset.add((*terms, self._graph_id))
|
|
152
207
|
|
|
153
208
|
@override
|
|
154
209
|
def graph_end(self) -> None:
|
|
155
|
-
self.
|
|
156
|
-
self._graph = None
|
|
210
|
+
self._graph_id = None
|
|
157
211
|
|
|
158
|
-
def frame(self) -> Dataset | None:
|
|
159
|
-
if self.options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_DATASETS:
|
|
160
|
-
this_dataset = self.dataset
|
|
161
|
-
self._graph = None
|
|
162
|
-
self.dataset = self.new_dataset()
|
|
163
|
-
return this_dataset
|
|
164
|
-
return super().frame()
|
|
165
212
|
|
|
166
|
-
|
|
167
|
-
def parse_flat_triples_stream(
|
|
213
|
+
def parse_triples_stream(
|
|
168
214
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
169
215
|
options: ParserOptions,
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
) ->
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
216
|
+
graph_factory: Callable[[], Graph],
|
|
217
|
+
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
218
|
+
) -> Generator[Graph]:
|
|
219
|
+
"""
|
|
220
|
+
Parse flat triple stream.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
224
|
+
options (ParserOptions): stream options
|
|
225
|
+
graph_factory (Callable): Lambda to construct a graph
|
|
226
|
+
parsing_mode (ParsingMode): specifies whether this is
|
|
227
|
+
a flat or grouped parsing.
|
|
228
|
+
|
|
229
|
+
Yields:
|
|
230
|
+
Generator[Graph]: RDFLib Graph(s)
|
|
231
|
+
|
|
232
|
+
"""
|
|
233
|
+
adapter = RDFLibTriplesAdapter(
|
|
234
|
+
options, graph_factory=graph_factory, parsing_mode=parsing_mode
|
|
235
|
+
)
|
|
177
236
|
decoder = Decoder(adapter=adapter)
|
|
178
237
|
for frame in frames:
|
|
179
|
-
decoder.decode_frame(frame
|
|
180
|
-
|
|
238
|
+
g = decoder.decode_frame(frame)
|
|
239
|
+
if g is not None:
|
|
240
|
+
yield g
|
|
241
|
+
|
|
242
|
+
if parsing_mode is ParsingMode.FLAT:
|
|
243
|
+
yield adapter.graph
|
|
181
244
|
|
|
182
245
|
|
|
183
|
-
def
|
|
246
|
+
def parse_quads_stream(
|
|
184
247
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
185
248
|
options: ParserOptions,
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
) -> Dataset:
|
|
189
|
-
|
|
249
|
+
dataset_factory: Callable[[], Dataset],
|
|
250
|
+
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
251
|
+
) -> Generator[Dataset]:
|
|
252
|
+
"""
|
|
253
|
+
Parse flat quads stream.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
257
|
+
options (ParserOptions): stream options
|
|
258
|
+
dataset_factory (Callable): Lambda to construct a dataset
|
|
259
|
+
parsing_mode (ParsingMode): specifies whether this is
|
|
260
|
+
a flat or grouped parsing.
|
|
261
|
+
|
|
262
|
+
Yields:
|
|
263
|
+
Generator[Dataset]: RDFLib dataset(s)
|
|
264
|
+
|
|
265
|
+
"""
|
|
190
266
|
adapter_class: type[RDFLibQuadsBaseAdapter]
|
|
191
267
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
|
|
192
268
|
adapter_class = RDFLibQuadsAdapter
|
|
193
|
-
else:
|
|
269
|
+
else:
|
|
194
270
|
adapter_class = RDFLibGraphsAdapter
|
|
195
|
-
adapter = adapter_class(
|
|
196
|
-
|
|
271
|
+
adapter = adapter_class(
|
|
272
|
+
options=options,
|
|
273
|
+
dataset_factory=dataset_factory,
|
|
274
|
+
parsing_mode=parsing_mode,
|
|
275
|
+
)
|
|
197
276
|
decoder = Decoder(adapter=adapter)
|
|
198
277
|
for frame in frames:
|
|
199
|
-
decoder.decode_frame(frame
|
|
200
|
-
|
|
278
|
+
ds = decoder.decode_frame(frame)
|
|
279
|
+
if ds is not None:
|
|
280
|
+
yield ds
|
|
201
281
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
frames: Iterable[jelly.RdfStreamFrame],
|
|
205
|
-
options: ParserOptions,
|
|
206
|
-
store: Store | str = "default",
|
|
207
|
-
) -> Generator[Graph]:
|
|
208
|
-
assert options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_GRAPHS
|
|
209
|
-
adapter = RDFLibTriplesAdapter(options, store=store)
|
|
210
|
-
decoder = Decoder(adapter=adapter)
|
|
211
|
-
for frame in frames:
|
|
212
|
-
yield decoder.decode_frame(frame=frame)
|
|
282
|
+
if parsing_mode is ParsingMode.FLAT:
|
|
283
|
+
yield adapter.dataset
|
|
213
284
|
|
|
214
285
|
|
|
215
|
-
def
|
|
286
|
+
def parse_jelly_grouped(
|
|
216
287
|
inp: IO[bytes],
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
yield parse_flat_triples_stream(frames=frames, options=options, store=store)
|
|
223
|
-
return
|
|
288
|
+
graph_factory: Callable[[], Graph],
|
|
289
|
+
dataset_factory: Callable[[], Dataset],
|
|
290
|
+
) -> Generator[Any] | Generator[Graph] | Generator[Dataset]:
|
|
291
|
+
"""
|
|
292
|
+
Take jelly file and return generators based on the detected logical type.
|
|
224
293
|
|
|
225
|
-
|
|
226
|
-
yield parse_flat_quads_stream(frames=frames, options=options, store=store)
|
|
227
|
-
return
|
|
294
|
+
Yields one graph/dataset per frame.
|
|
228
295
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
296
|
+
Args:
|
|
297
|
+
inp (IO[bytes]): input jelly buffered binary stream
|
|
298
|
+
graph_factory (Callable): lambda to construct a Graph
|
|
299
|
+
dataset_factory (Callable): lambda to construct a Dataset
|
|
232
300
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
raise NotImplementedError(msg)
|
|
301
|
+
Raises:
|
|
302
|
+
NotImplementedError: is raised if a logical type is not implemented
|
|
236
303
|
|
|
304
|
+
Yields:
|
|
305
|
+
Generator[Any] | Generator[Dataset] | Generator[Graph]:
|
|
306
|
+
returns generators for graphs/datasets based on the type of input
|
|
237
307
|
|
|
238
|
-
|
|
239
|
-
inp: IO[bytes],
|
|
240
|
-
store: Store | str = "default",
|
|
241
|
-
identifier: str | None = None,
|
|
242
|
-
) -> Any | Dataset | Graph:
|
|
308
|
+
"""
|
|
243
309
|
options, frames = get_options_and_frames(inp)
|
|
244
310
|
|
|
245
|
-
if options.stream_types.
|
|
246
|
-
|
|
247
|
-
"the stream contains multiple datasets and cannot be parsed into "
|
|
248
|
-
"a single dataset"
|
|
249
|
-
)
|
|
250
|
-
raise NotImplementedError(msg)
|
|
251
|
-
|
|
252
|
-
if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES:
|
|
253
|
-
return parse_flat_triples_stream(
|
|
311
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
312
|
+
yield from parse_triples_stream(
|
|
254
313
|
frames=frames,
|
|
255
314
|
options=options,
|
|
256
|
-
|
|
257
|
-
|
|
315
|
+
graph_factory=graph_factory,
|
|
316
|
+
parsing_mode=ParsingMode.GROUPED,
|
|
258
317
|
)
|
|
318
|
+
return
|
|
259
319
|
|
|
260
|
-
if options.stream_types.
|
|
261
|
-
|
|
320
|
+
if options.stream_types.physical_type in (
|
|
321
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
322
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
323
|
+
):
|
|
324
|
+
yield from parse_quads_stream(
|
|
262
325
|
frames=frames,
|
|
263
326
|
options=options,
|
|
264
|
-
|
|
265
|
-
|
|
327
|
+
dataset_factory=dataset_factory,
|
|
328
|
+
parsing_mode=ParsingMode.GROUPED,
|
|
266
329
|
)
|
|
330
|
+
return
|
|
267
331
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
332
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
333
|
+
options.stream_types.physical_type
|
|
334
|
+
)
|
|
335
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
336
|
+
raise NotImplementedError(msg)
|
|
271
337
|
|
|
272
|
-
for graph in parse_graph_stream(frames=frames, options=options, store=store):
|
|
273
|
-
ds.add_graph(graph)
|
|
274
338
|
|
|
275
|
-
|
|
339
|
+
def parse_jelly_flat(
|
|
340
|
+
inp: IO[bytes],
|
|
341
|
+
graph_factory: Callable[[], Graph],
|
|
342
|
+
dataset_factory: Callable[[], Dataset],
|
|
343
|
+
) -> Any | Dataset | Graph:
|
|
344
|
+
"""
|
|
345
|
+
Parse jelly file with FLAT physical type into one Graph/Dataset.
|
|
276
346
|
|
|
277
|
-
|
|
278
|
-
|
|
347
|
+
Args:
|
|
348
|
+
inp (IO[bytes]): input jelly buffered binary stream
|
|
349
|
+
graph_factory (Callable): lambda to construct a Graph
|
|
350
|
+
dataset_factory (Callable): lambda to construct a Dataset
|
|
351
|
+
|
|
352
|
+
Raises:
|
|
353
|
+
NotImplementedError: if physical type is not supported
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
RDFLib Graph or Dataset
|
|
357
|
+
|
|
358
|
+
"""
|
|
359
|
+
options, frames = get_options_and_frames(inp)
|
|
360
|
+
|
|
361
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
362
|
+
return next(
|
|
363
|
+
parse_triples_stream(
|
|
364
|
+
frames=frames,
|
|
365
|
+
options=options,
|
|
366
|
+
graph_factory=graph_factory,
|
|
367
|
+
parsing_mode=ParsingMode.FLAT,
|
|
368
|
+
)
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
if options.stream_types.physical_type in (
|
|
372
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
373
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
374
|
+
):
|
|
375
|
+
return next(
|
|
376
|
+
parse_quads_stream(
|
|
377
|
+
frames=frames,
|
|
378
|
+
options=options,
|
|
379
|
+
dataset_factory=dataset_factory,
|
|
380
|
+
parsing_mode=ParsingMode.FLAT,
|
|
381
|
+
)
|
|
382
|
+
)
|
|
383
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
384
|
+
options.stream_types.physical_type
|
|
385
|
+
)
|
|
386
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
279
387
|
raise NotImplementedError(msg)
|
|
280
388
|
|
|
281
389
|
|
|
282
390
|
class RDFLibJellyParser(RDFLibParser):
|
|
283
391
|
def parse(self, source: InputSource, sink: Graph) -> None:
|
|
392
|
+
"""
|
|
393
|
+
Parse jelly file into provided RDFLib Graph.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
source (InputSource): jelly file as buffered binary stream InputSource obj
|
|
397
|
+
sink (Graph): RDFLib Graph
|
|
398
|
+
|
|
399
|
+
Raises:
|
|
400
|
+
TypeError: raises error if invalid input
|
|
401
|
+
|
|
402
|
+
"""
|
|
284
403
|
inp = source.getByteStream() # type: ignore[no-untyped-call]
|
|
285
404
|
if inp is None:
|
|
286
405
|
msg = "expected source to be a stream of bytes"
|
|
287
406
|
raise TypeError(msg)
|
|
288
|
-
|
|
407
|
+
parse_jelly_flat(
|
|
289
408
|
inp,
|
|
290
|
-
identifier=sink.identifier,
|
|
291
|
-
store=sink.store,
|
|
409
|
+
graph_factory=lambda: Graph(store=sink.store, identifier=sink.identifier),
|
|
410
|
+
dataset_factory=lambda: Dataset(store=sink.store),
|
|
292
411
|
)
|
|
@@ -23,6 +23,17 @@ from pyjelly.serialize.streams import (
|
|
|
23
23
|
|
|
24
24
|
class RDFLibTermEncoder(TermEncoder):
|
|
25
25
|
def encode_any(self, term: object, slot: Slot) -> RowsAndTerm:
|
|
26
|
+
"""
|
|
27
|
+
Encode term based on its RDFLib object.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
term (object): term to encode
|
|
31
|
+
slot (Slot): its place in statement.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
RowsAndTerm: encoded extra rows and a jelly term to encode
|
|
35
|
+
|
|
36
|
+
"""
|
|
26
37
|
if slot is Slot.graph and term == DATASET_DEFAULT_GRAPH_ID:
|
|
27
38
|
return self.encode_default_graph()
|
|
28
39
|
|
|
@@ -60,6 +71,21 @@ def triples_stream_frames(
|
|
|
60
71
|
stream: TripleStream,
|
|
61
72
|
data: Graph | Dataset,
|
|
62
73
|
) -> Generator[jelly.RdfStreamFrame]:
|
|
74
|
+
"""
|
|
75
|
+
Serialize a Graph/Dataset into jelly frames.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
stream (TripleStream): stream that specifies triples processing
|
|
79
|
+
data (Graph | Dataset): Graph/Dataset to serialize.
|
|
80
|
+
|
|
81
|
+
Notes:
|
|
82
|
+
if Dataset is given, its graphs are unpacked and iterated over
|
|
83
|
+
if flow is GraphsFrameFlow, emits a frame per graph.
|
|
84
|
+
|
|
85
|
+
Yields:
|
|
86
|
+
Generator[jelly.RdfStreamFrame]: jelly frames.
|
|
87
|
+
|
|
88
|
+
"""
|
|
63
89
|
stream.enroll()
|
|
64
90
|
if stream.options.params.namespace_declarations:
|
|
65
91
|
namespace_declarations(data, stream)
|
|
@@ -68,6 +94,7 @@ def triples_stream_frames(
|
|
|
68
94
|
for terms in graph:
|
|
69
95
|
if frame := stream.triple(terms):
|
|
70
96
|
yield frame
|
|
97
|
+
# this part turns each graph to a frame for graphs logical type
|
|
71
98
|
if frame := stream.flow.frame_from_graph():
|
|
72
99
|
yield frame
|
|
73
100
|
if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
|
|
@@ -79,6 +106,20 @@ def quads_stream_frames(
|
|
|
79
106
|
stream: QuadStream,
|
|
80
107
|
data: Dataset,
|
|
81
108
|
) -> Generator[jelly.RdfStreamFrame]:
|
|
109
|
+
"""
|
|
110
|
+
Serialize a Dataset into jelly frames.
|
|
111
|
+
|
|
112
|
+
Notes:
|
|
113
|
+
Emits one frame per dataset if flow is of DatasetsFrameFlow.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
stream (QuadStream): stream that specifies quads processing
|
|
117
|
+
data (Dataset): Dataset to serialize.
|
|
118
|
+
|
|
119
|
+
Yields:
|
|
120
|
+
Generator[jelly.RdfStreamFrame]: jelly frames
|
|
121
|
+
|
|
122
|
+
"""
|
|
82
123
|
assert isinstance(data, Dataset)
|
|
83
124
|
stream.enroll()
|
|
84
125
|
if stream.options.params.namespace_declarations:
|
|
@@ -97,6 +138,21 @@ def graphs_stream_frames(
|
|
|
97
138
|
stream: GraphStream,
|
|
98
139
|
data: Dataset,
|
|
99
140
|
) -> Generator[jelly.RdfStreamFrame]:
|
|
141
|
+
"""
|
|
142
|
+
Serialize a Dataset into jelly frames as a stream of graphs.
|
|
143
|
+
|
|
144
|
+
Notes:
|
|
145
|
+
If flow of DatasetsFrameFlow type, the whole dataset
|
|
146
|
+
will be encoded into one frame.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
stream (GraphStream): stream that specifies graphs processing
|
|
150
|
+
data (Dataset): Dataset to serialize.
|
|
151
|
+
|
|
152
|
+
Yields:
|
|
153
|
+
Generator[jelly.RdfStreamFrame]: jelly frames
|
|
154
|
+
|
|
155
|
+
"""
|
|
100
156
|
assert isinstance(data, Dataset)
|
|
101
157
|
stream.enroll()
|
|
102
158
|
if stream.options.params.namespace_declarations:
|
|
@@ -171,6 +227,18 @@ class RDFLibJellySerializer(RDFLibSerializer):
|
|
|
171
227
|
options: SerializerOptions | None = None,
|
|
172
228
|
**unused: Any,
|
|
173
229
|
) -> None:
|
|
230
|
+
"""
|
|
231
|
+
Serialize self.store content to Jelly format.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
out (IO[bytes]): output buffered writer
|
|
235
|
+
stream (Stream | None, optional): Jelly stream object. Defaults to None.
|
|
236
|
+
options (SerializerOptions | None, optional): Serializer options
|
|
237
|
+
if defined beforehand, e.g., read from a separate file.
|
|
238
|
+
Defaults to None.
|
|
239
|
+
**unused(Any): unused args for RDFLib serialize
|
|
240
|
+
|
|
241
|
+
"""
|
|
174
242
|
if options is None:
|
|
175
243
|
options = self.guess_options()
|
|
176
244
|
if stream is None:
|