pyjelly 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjelly might be problematic. Click here for more details.
- pyjelly/integrations/rdflib/parse.py +233 -154
- pyjelly/integrations/rdflib/serialize.py +100 -43
- pyjelly/jelly/rdf_pb2.py +3 -3
- pyjelly/jelly/rdf_pb2.pyi +2 -1
- pyjelly/parse/decode.py +10 -10
- pyjelly/parse/ioutils.py +10 -4
- pyjelly/serialize/flows.py +24 -14
- pyjelly/serialize/streams.py +5 -2
- {pyjelly-0.3.0.dist-info → pyjelly-0.4.0.dist-info}/METADATA +8 -8
- {pyjelly-0.3.0.dist-info → pyjelly-0.4.0.dist-info}/RECORD +13 -13
- {pyjelly-0.3.0.dist-info → pyjelly-0.4.0.dist-info}/WHEEL +0 -0
- {pyjelly-0.3.0.dist-info → pyjelly-0.4.0.dist-info}/entry_points.txt +0 -0
- {pyjelly-0.3.0.dist-info → pyjelly-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections.abc import Generator, Iterable
|
|
4
|
-
from
|
|
5
|
-
from
|
|
4
|
+
from itertools import chain
|
|
5
|
+
from typing import IO, Any, Callable, Union
|
|
6
|
+
from typing_extensions import Never, Self, override
|
|
6
7
|
|
|
7
8
|
import rdflib
|
|
9
|
+
from rdflib import Node
|
|
8
10
|
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
|
|
9
11
|
from rdflib.parser import InputSource
|
|
10
12
|
from rdflib.parser import Parser as RDFLibParser
|
|
@@ -12,16 +14,113 @@ from rdflib.parser import Parser as RDFLibParser
|
|
|
12
14
|
from pyjelly import jelly
|
|
13
15
|
from pyjelly.errors import JellyConformanceError
|
|
14
16
|
from pyjelly.options import StreamTypes
|
|
15
|
-
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
17
|
+
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
16
18
|
from pyjelly.parse.ioutils import get_options_and_frames
|
|
17
19
|
|
|
20
|
+
GraphName = Union[rdflib.URIRef, rdflib.BNode, str]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Triple(tuple[Node, Node, Node]):
|
|
24
|
+
"""
|
|
25
|
+
Describe RDFLib triple.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
tuple (Node, Node, Node): s/p/o tuple of RDFLib Nodes.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Triple: triple as tuple.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
__slots__ = ()
|
|
36
|
+
|
|
37
|
+
def __new__(cls, s: Node, p: Node, o: Node) -> Self:
|
|
38
|
+
return tuple.__new__(cls, (s, p, o))
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def s(self) -> Node:
|
|
42
|
+
return self[0]
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def p(self) -> Node:
|
|
46
|
+
return self[1]
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def o(self) -> Node:
|
|
50
|
+
return self[2]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Quad(tuple[Node, Node, Node, GraphName]):
|
|
54
|
+
"""
|
|
55
|
+
Describe RDFLib quad.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
tuple (Node, Node, Node, GraphName):
|
|
59
|
+
s/p/o/g as a tuple of RDFLib nodes and a GraphName,
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Quad: quad as tuple.
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
__slots__ = ()
|
|
67
|
+
|
|
68
|
+
def __new__(cls, s: Node, p: Node, o: Node, g: GraphName) -> Self:
|
|
69
|
+
return tuple.__new__(cls, (s, p, o, g))
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def s(self) -> Node:
|
|
73
|
+
return self[0]
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def p(self) -> Node:
|
|
77
|
+
return self[1]
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def o(self) -> Node:
|
|
81
|
+
return self[2]
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def g(self) -> GraphName:
|
|
85
|
+
return self[3]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
Statement = Union[Triple, Quad]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class Prefix(tuple[str, rdflib.URIRef]):
|
|
92
|
+
"""
|
|
93
|
+
Describe RDF Prefix(i.e, namespace declaration).
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
tuple (str, rdflib.URIRef): expects prefix as a string,
|
|
97
|
+
and full namespace URI as Rdflib.URIRef.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Prefix: prefix as tuple(prefix, iri).
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
__slots__ = ()
|
|
105
|
+
|
|
106
|
+
def __new__(cls, prefix: str, iri: rdflib.URIRef) -> Self:
|
|
107
|
+
return tuple.__new__(cls, (prefix, iri))
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def prefix(self) -> str:
|
|
111
|
+
return self[0]
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def iri(self) -> rdflib.URIRef:
|
|
115
|
+
return self[1]
|
|
116
|
+
|
|
18
117
|
|
|
19
118
|
class RDFLibAdapter(Adapter):
|
|
20
119
|
"""
|
|
21
120
|
RDFLib adapter class, is extended by triples and quads implementations.
|
|
22
121
|
|
|
23
122
|
Args:
|
|
24
|
-
Adapter (
|
|
123
|
+
Adapter (): abstract adapter class
|
|
25
124
|
|
|
26
125
|
"""
|
|
27
126
|
|
|
@@ -46,13 +145,15 @@ class RDFLibAdapter(Adapter):
|
|
|
46
145
|
) -> rdflib.Literal:
|
|
47
146
|
return rdflib.Literal(lex, lang=language, datatype=datatype)
|
|
48
147
|
|
|
148
|
+
@override
|
|
149
|
+
def namespace_declaration(self, name: str, iri: str) -> Prefix:
|
|
150
|
+
return Prefix(name, self.iri(iri))
|
|
151
|
+
|
|
49
152
|
|
|
50
153
|
def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
51
154
|
"""
|
|
52
155
|
Raise error if functionality is missing in adapter.
|
|
53
156
|
|
|
54
|
-
TODO: currently not used anywhere due to logical types being removed
|
|
55
|
-
|
|
56
157
|
Args:
|
|
57
158
|
feature (str): function which is not implemented
|
|
58
159
|
stream_types (StreamTypes): what combination of physical/logical types
|
|
@@ -79,93 +180,48 @@ class RDFLibTriplesAdapter(RDFLibAdapter):
|
|
|
79
180
|
"""
|
|
80
181
|
Triples adapter RDFLib implementation.
|
|
81
182
|
|
|
82
|
-
Notes:
|
|
83
|
-
triples and namespaces and can get flushed between frames.
|
|
183
|
+
Notes: returns triple/namespace declaration as soon as receives them.
|
|
84
184
|
"""
|
|
85
185
|
|
|
86
186
|
def __init__(
|
|
87
187
|
self,
|
|
88
188
|
options: ParserOptions,
|
|
89
|
-
graph_factory: Callable[[], Graph],
|
|
90
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
91
189
|
) -> None:
|
|
92
|
-
super().__init__(options=options
|
|
93
|
-
self.graph = graph_factory()
|
|
94
|
-
self.graph_factory = graph_factory
|
|
95
|
-
self.parsing_mode = parsing_mode
|
|
190
|
+
super().__init__(options=options)
|
|
96
191
|
|
|
97
192
|
@override
|
|
98
|
-
def triple(self, terms: Iterable[Any]) ->
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
@override
|
|
102
|
-
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
103
|
-
self.graph.bind(name, self.iri(iri))
|
|
104
|
-
|
|
105
|
-
def frame(self) -> Graph:
|
|
106
|
-
"""
|
|
107
|
-
Finalize one frame in triples stream.
|
|
108
|
-
|
|
109
|
-
Returns:
|
|
110
|
-
Graph: frame content as a separate Graph
|
|
111
|
-
and starts a new Graph
|
|
112
|
-
|
|
113
|
-
"""
|
|
114
|
-
this_graph = self.graph
|
|
115
|
-
self.graph = self.graph_factory()
|
|
116
|
-
return this_graph
|
|
193
|
+
def triple(self, terms: Iterable[Any]) -> Triple:
|
|
194
|
+
return Triple(*terms)
|
|
117
195
|
|
|
118
196
|
|
|
119
197
|
class RDFLibQuadsBaseAdapter(RDFLibAdapter):
|
|
120
|
-
def __init__(
|
|
121
|
-
|
|
122
|
-
options: ParserOptions,
|
|
123
|
-
dataset_factory: Callable[[], Dataset],
|
|
124
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
125
|
-
) -> None:
|
|
126
|
-
super().__init__(options=options, parsing_mode=parsing_mode)
|
|
127
|
-
self.dataset = dataset_factory()
|
|
128
|
-
self.dataset_factory = dataset_factory
|
|
129
|
-
|
|
130
|
-
@override
|
|
131
|
-
def frame(self) -> Dataset:
|
|
132
|
-
current_dataset = self.dataset
|
|
133
|
-
self.dataset = self.dataset_factory()
|
|
134
|
-
return current_dataset
|
|
198
|
+
def __init__(self, options: ParserOptions) -> None:
|
|
199
|
+
super().__init__(options=options)
|
|
135
200
|
|
|
136
201
|
|
|
137
202
|
class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
|
|
138
203
|
"""
|
|
139
204
|
Extended RDFLib adapter for the QUADS physical type.
|
|
140
205
|
|
|
141
|
-
Notes:
|
|
142
|
-
Adds triples and namespaces directly to
|
|
143
|
-
dataset, so RDFLib handles the rest.
|
|
144
|
-
|
|
145
206
|
Args:
|
|
146
|
-
RDFLibQuadsBaseAdapter (
|
|
207
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base quads adapter
|
|
147
208
|
(shared with graphs physical type)
|
|
148
209
|
|
|
149
210
|
"""
|
|
150
211
|
|
|
151
212
|
@override
|
|
152
|
-
def
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
@override
|
|
156
|
-
def quad(self, terms: Iterable[Any]) -> Any:
|
|
157
|
-
self.dataset.add(tuple(terms))
|
|
213
|
+
def quad(self, terms: Iterable[Any]) -> Quad:
|
|
214
|
+
return Quad(*terms)
|
|
158
215
|
|
|
159
216
|
|
|
160
217
|
class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
161
218
|
"""
|
|
162
219
|
Extension of RDFLibQuadsBaseAdapter for the GRAPHS physical type.
|
|
163
220
|
|
|
164
|
-
Notes: introduces graph start/end, checks if graph exists
|
|
165
|
-
dataset store management.
|
|
221
|
+
Notes: introduces graph start/end, checks if graph exists.
|
|
166
222
|
|
|
167
223
|
Args:
|
|
168
|
-
RDFLibQuadsBaseAdapter (
|
|
224
|
+
RDFLibQuadsBaseAdapter (RDFLibAdapter): base adapter for quads management.
|
|
169
225
|
|
|
170
226
|
Raises:
|
|
171
227
|
JellyConformanceError: if no graph_start was encountered
|
|
@@ -177,14 +233,8 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
|
177
233
|
def __init__(
|
|
178
234
|
self,
|
|
179
235
|
options: ParserOptions,
|
|
180
|
-
dataset_factory: Callable[[], Dataset],
|
|
181
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
182
236
|
) -> None:
|
|
183
|
-
super().__init__(
|
|
184
|
-
options=options,
|
|
185
|
-
dataset_factory=dataset_factory,
|
|
186
|
-
parsing_mode=parsing_mode,
|
|
187
|
-
)
|
|
237
|
+
super().__init__(options=options)
|
|
188
238
|
self._graph_id = None
|
|
189
239
|
|
|
190
240
|
@property
|
|
@@ -198,12 +248,8 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
|
198
248
|
self._graph_id = graph_id
|
|
199
249
|
|
|
200
250
|
@override
|
|
201
|
-
def
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
@override
|
|
205
|
-
def triple(self, terms: Iterable[Any]) -> None:
|
|
206
|
-
self.dataset.add((*terms, self._graph_id))
|
|
251
|
+
def triple(self, terms: Iterable[Any]) -> Quad:
|
|
252
|
+
return Quad(*chain(terms, [self._graph_id]))
|
|
207
253
|
|
|
208
254
|
@override
|
|
209
255
|
def graph_end(self) -> None:
|
|
@@ -213,54 +259,42 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
|
|
|
213
259
|
def parse_triples_stream(
|
|
214
260
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
215
261
|
options: ParserOptions,
|
|
216
|
-
|
|
217
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
218
|
-
) -> Generator[Graph]:
|
|
262
|
+
) -> Generator[Iterable[Triple | Prefix]]:
|
|
219
263
|
"""
|
|
220
264
|
Parse flat triple stream.
|
|
221
265
|
|
|
222
266
|
Args:
|
|
223
267
|
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
224
268
|
options (ParserOptions): stream options
|
|
225
|
-
graph_factory (Callable): Lambda to construct a graph
|
|
226
|
-
parsing_mode (ParsingMode): specifies whether this is
|
|
227
|
-
a flat or grouped parsing.
|
|
228
269
|
|
|
229
270
|
Yields:
|
|
230
|
-
Generator[
|
|
271
|
+
Generator[Iterable[Triple | Prefix]]:
|
|
272
|
+
Generator of iterables of Triple or Prefix objects,
|
|
273
|
+
one iterable per frame.
|
|
231
274
|
|
|
232
275
|
"""
|
|
233
|
-
adapter = RDFLibTriplesAdapter(
|
|
234
|
-
options, graph_factory=graph_factory, parsing_mode=parsing_mode
|
|
235
|
-
)
|
|
276
|
+
adapter = RDFLibTriplesAdapter(options)
|
|
236
277
|
decoder = Decoder(adapter=adapter)
|
|
237
278
|
for frame in frames:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
yield g
|
|
241
|
-
|
|
242
|
-
if parsing_mode is ParsingMode.FLAT:
|
|
243
|
-
yield adapter.graph
|
|
279
|
+
yield decoder.iter_rows(frame)
|
|
280
|
+
return
|
|
244
281
|
|
|
245
282
|
|
|
246
283
|
def parse_quads_stream(
|
|
247
284
|
frames: Iterable[jelly.RdfStreamFrame],
|
|
248
285
|
options: ParserOptions,
|
|
249
|
-
|
|
250
|
-
parsing_mode: ParsingMode = ParsingMode.FLAT,
|
|
251
|
-
) -> Generator[Dataset]:
|
|
286
|
+
) -> Generator[Iterable[Quad | Prefix]]:
|
|
252
287
|
"""
|
|
253
288
|
Parse flat quads stream.
|
|
254
289
|
|
|
255
290
|
Args:
|
|
256
291
|
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
257
292
|
options (ParserOptions): stream options
|
|
258
|
-
dataset_factory (Callable): Lambda to construct a dataset
|
|
259
|
-
parsing_mode (ParsingMode): specifies whether this is
|
|
260
|
-
a flat or grouped parsing.
|
|
261
293
|
|
|
262
294
|
Yields:
|
|
263
|
-
Generator[
|
|
295
|
+
Generator[Iterable[Quad | Prefix]]:
|
|
296
|
+
Generator of iterables of Quad or Prefix objects,
|
|
297
|
+
one iterable per frame.
|
|
264
298
|
|
|
265
299
|
"""
|
|
266
300
|
adapter_class: type[RDFLibQuadsBaseAdapter]
|
|
@@ -268,26 +302,18 @@ def parse_quads_stream(
|
|
|
268
302
|
adapter_class = RDFLibQuadsAdapter
|
|
269
303
|
else:
|
|
270
304
|
adapter_class = RDFLibGraphsAdapter
|
|
271
|
-
adapter = adapter_class(
|
|
272
|
-
options=options,
|
|
273
|
-
dataset_factory=dataset_factory,
|
|
274
|
-
parsing_mode=parsing_mode,
|
|
275
|
-
)
|
|
305
|
+
adapter = adapter_class(options=options)
|
|
276
306
|
decoder = Decoder(adapter=adapter)
|
|
277
307
|
for frame in frames:
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
yield ds
|
|
281
|
-
|
|
282
|
-
if parsing_mode is ParsingMode.FLAT:
|
|
283
|
-
yield adapter.dataset
|
|
308
|
+
yield decoder.iter_rows(frame)
|
|
309
|
+
return
|
|
284
310
|
|
|
285
311
|
|
|
286
312
|
def parse_jelly_grouped(
|
|
287
313
|
inp: IO[bytes],
|
|
288
|
-
graph_factory: Callable[[], Graph],
|
|
289
|
-
dataset_factory: Callable[[], Dataset],
|
|
290
|
-
) -> Generator[
|
|
314
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
315
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
316
|
+
) -> Generator[Graph] | Generator[Dataset]:
|
|
291
317
|
"""
|
|
292
318
|
Take jelly file and return generators based on the detected logical type.
|
|
293
319
|
|
|
@@ -295,38 +321,52 @@ def parse_jelly_grouped(
|
|
|
295
321
|
|
|
296
322
|
Args:
|
|
297
323
|
inp (IO[bytes]): input jelly buffered binary stream
|
|
298
|
-
graph_factory (Callable): lambda to construct a Graph
|
|
299
|
-
|
|
324
|
+
graph_factory (Callable): lambda to construct a Graph.
|
|
325
|
+
By default creates an empty in-memory Graph,
|
|
326
|
+
but you can pass something else here.
|
|
327
|
+
dataset_factory (Callable): lambda to construct a Dataset.
|
|
328
|
+
By default creates an empty in-memory Dataset,
|
|
329
|
+
but you can pass something else here.
|
|
300
330
|
|
|
301
331
|
Raises:
|
|
302
332
|
NotImplementedError: is raised if a logical type is not implemented
|
|
303
333
|
|
|
304
334
|
Yields:
|
|
305
|
-
Generator[
|
|
335
|
+
Generator[Graph] | Generator[Dataset]:
|
|
306
336
|
returns generators for graphs/datasets based on the type of input
|
|
307
337
|
|
|
308
338
|
"""
|
|
309
339
|
options, frames = get_options_and_frames(inp)
|
|
310
|
-
|
|
311
340
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
312
|
-
|
|
341
|
+
for graph in parse_triples_stream(
|
|
313
342
|
frames=frames,
|
|
314
343
|
options=options,
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
344
|
+
):
|
|
345
|
+
sink = graph_factory()
|
|
346
|
+
for graph_item in graph:
|
|
347
|
+
if isinstance(graph_item, Prefix):
|
|
348
|
+
sink.bind(graph_item.prefix, graph_item.iri)
|
|
349
|
+
else:
|
|
350
|
+
sink.add(graph_item)
|
|
351
|
+
yield sink
|
|
318
352
|
return
|
|
319
|
-
|
|
320
|
-
if options.stream_types.physical_type in (
|
|
353
|
+
elif options.stream_types.physical_type in (
|
|
321
354
|
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
322
355
|
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
323
356
|
):
|
|
324
|
-
|
|
357
|
+
for dataset in parse_quads_stream(
|
|
325
358
|
frames=frames,
|
|
326
359
|
options=options,
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
360
|
+
):
|
|
361
|
+
sink = dataset_factory()
|
|
362
|
+
for item in dataset:
|
|
363
|
+
if isinstance(item, Prefix):
|
|
364
|
+
sink.bind(item.prefix, item.iri)
|
|
365
|
+
else:
|
|
366
|
+
s, p, o, graph_name = item
|
|
367
|
+
context = sink.get_context(graph_name)
|
|
368
|
+
sink.add((s, p, o, context))
|
|
369
|
+
yield sink
|
|
330
370
|
return
|
|
331
371
|
|
|
332
372
|
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
@@ -336,50 +376,89 @@ def parse_jelly_grouped(
|
|
|
336
376
|
raise NotImplementedError(msg)
|
|
337
377
|
|
|
338
378
|
|
|
379
|
+
def parse_jelly_to_graph(
|
|
380
|
+
inp: IO[bytes],
|
|
381
|
+
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
382
|
+
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
383
|
+
) -> Graph | Dataset:
|
|
384
|
+
"""
|
|
385
|
+
Add statements from Generator to provided Graph/Dataset.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
inp (IO[bytes]): input jelly stream.
|
|
389
|
+
graph_factory (Callable[[], Graph]): factory to create Graph.
|
|
390
|
+
By default creates an empty in-memory Graph,
|
|
391
|
+
but you can pass something else here.
|
|
392
|
+
dataset_factory (Callable[[], Dataset]): factory to create Dataset.
|
|
393
|
+
By default creates an empty in-memory Dataset,
|
|
394
|
+
but you can pass something else here.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dataset | Graph: Dataset or Graph with statements.
|
|
398
|
+
|
|
399
|
+
"""
|
|
400
|
+
options, frames = get_options_and_frames(inp)
|
|
401
|
+
|
|
402
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
403
|
+
sink = graph_factory()
|
|
404
|
+
if options.stream_types.physical_type in (
|
|
405
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
406
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
407
|
+
):
|
|
408
|
+
quad_sink = dataset_factory()
|
|
409
|
+
sink = quad_sink
|
|
410
|
+
|
|
411
|
+
for item in parse_jelly_flat(inp=inp, frames=frames, options=options):
|
|
412
|
+
if isinstance(item, Prefix):
|
|
413
|
+
sink.bind(item.prefix, item.iri)
|
|
414
|
+
if isinstance(item, Triple):
|
|
415
|
+
sink.add(item)
|
|
416
|
+
if isinstance(item, Quad):
|
|
417
|
+
s, p, o, graph_name = item
|
|
418
|
+
context = quad_sink.get_context(graph_name)
|
|
419
|
+
quad_sink.add((s, p, o, context))
|
|
420
|
+
return sink
|
|
421
|
+
|
|
422
|
+
|
|
339
423
|
def parse_jelly_flat(
|
|
340
424
|
inp: IO[bytes],
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
) ->
|
|
425
|
+
frames: Iterable[jelly.RdfStreamFrame] | None = None,
|
|
426
|
+
options: ParserOptions | None = None,
|
|
427
|
+
) -> Generator[Statement | Prefix]:
|
|
344
428
|
"""
|
|
345
|
-
Parse jelly file with FLAT physical type into
|
|
429
|
+
Parse jelly file with FLAT physical type into a Generator of stream events.
|
|
346
430
|
|
|
347
431
|
Args:
|
|
348
|
-
inp (IO[bytes]): input jelly buffered binary stream
|
|
349
|
-
|
|
350
|
-
|
|
432
|
+
inp (IO[bytes]): input jelly buffered binary stream.
|
|
433
|
+
frames (Iterable[jelly.RdfStreamFrame | None):
|
|
434
|
+
jelly frames if read before.
|
|
435
|
+
options (ParserOptions | None): stream options
|
|
436
|
+
if read before.
|
|
351
437
|
|
|
352
438
|
Raises:
|
|
353
439
|
NotImplementedError: if physical type is not supported
|
|
354
440
|
|
|
355
|
-
|
|
356
|
-
|
|
441
|
+
Yields:
|
|
442
|
+
Generator[Statement | Prefix]: Generator of stream events
|
|
357
443
|
|
|
358
444
|
"""
|
|
359
|
-
|
|
445
|
+
if not frames or not options:
|
|
446
|
+
options, frames = get_options_and_frames(inp)
|
|
360
447
|
|
|
361
448
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
options=options,
|
|
366
|
-
graph_factory=graph_factory,
|
|
367
|
-
parsing_mode=ParsingMode.FLAT,
|
|
368
|
-
)
|
|
369
|
-
)
|
|
370
|
-
|
|
449
|
+
for triples in parse_triples_stream(frames=frames, options=options):
|
|
450
|
+
yield from triples
|
|
451
|
+
return
|
|
371
452
|
if options.stream_types.physical_type in (
|
|
372
453
|
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
373
454
|
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
374
455
|
):
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
)
|
|
382
|
-
)
|
|
456
|
+
for quads in parse_quads_stream(
|
|
457
|
+
frames=frames,
|
|
458
|
+
options=options,
|
|
459
|
+
):
|
|
460
|
+
yield from quads
|
|
461
|
+
return
|
|
383
462
|
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
384
463
|
options.stream_types.physical_type
|
|
385
464
|
)
|
|
@@ -404,7 +483,7 @@ class RDFLibJellyParser(RDFLibParser):
|
|
|
404
483
|
if inp is None:
|
|
405
484
|
msg = "expected source to be a stream of bytes"
|
|
406
485
|
raise TypeError(msg)
|
|
407
|
-
|
|
486
|
+
parse_jelly_to_graph(
|
|
408
487
|
inp,
|
|
409
488
|
graph_factory=lambda: Graph(store=sink.store, identifier=sink.identifier),
|
|
410
489
|
dataset_factory=lambda: Dataset(store=sink.store),
|
|
@@ -1,12 +1,14 @@
|
|
|
1
|
+
# ruff: noqa: I001
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
|
|
3
4
|
from collections.abc import Generator
|
|
4
5
|
from functools import singledispatch
|
|
5
|
-
from typing import
|
|
6
|
+
from typing import Any, IO
|
|
6
7
|
from typing_extensions import override
|
|
7
8
|
|
|
8
9
|
import rdflib
|
|
9
|
-
from rdflib
|
|
10
|
+
from rdflib import Graph
|
|
11
|
+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, QuotedGraph
|
|
10
12
|
from rdflib.serializer import Serializer as RDFLibSerializer
|
|
11
13
|
|
|
12
14
|
from pyjelly import jelly
|
|
@@ -18,7 +20,7 @@ from pyjelly.serialize.streams import (
|
|
|
18
20
|
SerializerOptions,
|
|
19
21
|
Stream,
|
|
20
22
|
TripleStream,
|
|
21
|
-
)
|
|
23
|
+
) # ruff: enable
|
|
22
24
|
|
|
23
25
|
|
|
24
26
|
class RDFLibTermEncoder(TermEncoder):
|
|
@@ -61,7 +63,10 @@ def namespace_declarations(store: Graph, stream: Stream) -> None:
|
|
|
61
63
|
|
|
62
64
|
|
|
63
65
|
@singledispatch
|
|
64
|
-
def stream_frames(
|
|
66
|
+
def stream_frames(
|
|
67
|
+
stream: Stream,
|
|
68
|
+
data: Graph, # noqa: ARG001
|
|
69
|
+
) -> Generator[jelly.RdfStreamFrame]:
|
|
65
70
|
msg = f"invalid stream implementation {stream}"
|
|
66
71
|
raise TypeError(msg)
|
|
67
72
|
|
|
@@ -165,6 +170,48 @@ def graphs_stream_frames(
|
|
|
165
170
|
yield frame
|
|
166
171
|
|
|
167
172
|
|
|
173
|
+
def guess_options(sink: Graph | Dataset) -> SerializerOptions:
|
|
174
|
+
"""
|
|
175
|
+
Guess the serializer options based on the store type.
|
|
176
|
+
|
|
177
|
+
>>> guess_options(Graph()).logical_type
|
|
178
|
+
1
|
|
179
|
+
>>> guess_options(Dataset()).logical_type
|
|
180
|
+
2
|
|
181
|
+
"""
|
|
182
|
+
logical_type = (
|
|
183
|
+
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
|
|
184
|
+
if isinstance(sink, Dataset)
|
|
185
|
+
else jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
|
|
186
|
+
)
|
|
187
|
+
return SerializerOptions(logical_type=logical_type)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def guess_stream(options: SerializerOptions, sink: Graph | Dataset) -> Stream:
|
|
191
|
+
"""
|
|
192
|
+
Return an appropriate stream implementation for the given options.
|
|
193
|
+
|
|
194
|
+
Notes: if base(!) logical type is GRAPHS and Dataset is given,
|
|
195
|
+
initializes TripleStream
|
|
196
|
+
|
|
197
|
+
>>> graph_ser = RDFLibJellySerializer(Graph())
|
|
198
|
+
>>> ds_ser = RDFLibJellySerializer(Dataset())
|
|
199
|
+
|
|
200
|
+
>>> type(guess_stream(guess_options(graph_ser.store), graph_ser.store))
|
|
201
|
+
<class 'pyjelly.serialize.streams.TripleStream'>
|
|
202
|
+
>>> type(guess_stream(guess_options(ds_ser.store), ds_ser.store))
|
|
203
|
+
<class 'pyjelly.serialize.streams.QuadStream'>
|
|
204
|
+
"""
|
|
205
|
+
stream_cls: type[Stream]
|
|
206
|
+
if (options.logical_type % 10) != jelly.LOGICAL_STREAM_TYPE_GRAPHS and isinstance(
|
|
207
|
+
sink, Dataset
|
|
208
|
+
):
|
|
209
|
+
stream_cls = QuadStream
|
|
210
|
+
else:
|
|
211
|
+
stream_cls = TripleStream
|
|
212
|
+
return stream_cls.for_rdflib(options=options)
|
|
213
|
+
|
|
214
|
+
|
|
168
215
|
class RDFLibJellySerializer(RDFLibSerializer):
|
|
169
216
|
"""
|
|
170
217
|
RDFLib serializer for writing graphs in Jelly RDF stream format.
|
|
@@ -180,43 +227,6 @@ class RDFLibJellySerializer(RDFLibSerializer):
|
|
|
180
227
|
raise NotImplementedError(msg)
|
|
181
228
|
super().__init__(store)
|
|
182
229
|
|
|
183
|
-
def guess_options(self) -> SerializerOptions:
|
|
184
|
-
"""
|
|
185
|
-
Guess the serializer options based on the store type.
|
|
186
|
-
|
|
187
|
-
>>> RDFLibJellySerializer(Graph()).guess_options().logical_type
|
|
188
|
-
1
|
|
189
|
-
>>> RDFLibJellySerializer(Dataset()).guess_options().logical_type
|
|
190
|
-
2
|
|
191
|
-
"""
|
|
192
|
-
logical_type = (
|
|
193
|
-
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
|
|
194
|
-
if isinstance(self.store, Dataset)
|
|
195
|
-
else jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
|
|
196
|
-
)
|
|
197
|
-
return SerializerOptions(logical_type=logical_type)
|
|
198
|
-
|
|
199
|
-
def guess_stream(self, options: SerializerOptions) -> Stream:
|
|
200
|
-
"""
|
|
201
|
-
Return an appropriate stream implementation for the given options.
|
|
202
|
-
|
|
203
|
-
>>> graph_ser = RDFLibJellySerializer(Graph())
|
|
204
|
-
>>> ds_ser = RDFLibJellySerializer(Dataset())
|
|
205
|
-
|
|
206
|
-
>>> type(graph_ser.guess_stream(graph_ser.guess_options()))
|
|
207
|
-
<class 'pyjelly.serialize.streams.TripleStream'>
|
|
208
|
-
>>> type(ds_ser.guess_stream(ds_ser.guess_options()))
|
|
209
|
-
<class 'pyjelly.serialize.streams.QuadStream'>
|
|
210
|
-
"""
|
|
211
|
-
stream_cls: type[Stream]
|
|
212
|
-
if options.logical_type != jelly.LOGICAL_STREAM_TYPE_GRAPHS and isinstance(
|
|
213
|
-
self.store, Dataset
|
|
214
|
-
):
|
|
215
|
-
stream_cls = QuadStream
|
|
216
|
-
else:
|
|
217
|
-
stream_cls = TripleStream
|
|
218
|
-
return stream_cls.for_rdflib(options=options)
|
|
219
|
-
|
|
220
230
|
@override
|
|
221
231
|
def serialize( # type: ignore[override]
|
|
222
232
|
self,
|
|
@@ -240,9 +250,56 @@ class RDFLibJellySerializer(RDFLibSerializer):
|
|
|
240
250
|
|
|
241
251
|
"""
|
|
242
252
|
if options is None:
|
|
243
|
-
options = self.
|
|
253
|
+
options = guess_options(self.store)
|
|
244
254
|
if stream is None:
|
|
245
|
-
stream =
|
|
255
|
+
stream = guess_stream(options, self.store)
|
|
246
256
|
write = write_delimited if stream.options.params.delimited else write_single
|
|
247
257
|
for stream_frame in stream_frames(stream, self.store):
|
|
248
258
|
write(stream_frame, out)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def grouped_stream_to_frames(
|
|
262
|
+
sink_generator: Generator[Graph] | Generator[Dataset],
|
|
263
|
+
options: SerializerOptions | None = None,
|
|
264
|
+
) -> Generator[jelly.RdfStreamFrame]:
|
|
265
|
+
"""
|
|
266
|
+
Transform Graphs/Datasets into Jelly frames, one frame per Graph/Dataset.
|
|
267
|
+
|
|
268
|
+
Note: options are guessed if not provided.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
sink_generator (Generator[Graph] | Generator[Dataset]): Generator of
|
|
272
|
+
Graphs/Dataset to transform.
|
|
273
|
+
options (SerializerOptions | None, optional): stream options to use.
|
|
274
|
+
Options are guessed based on the sink store type. Defaults to None.
|
|
275
|
+
|
|
276
|
+
Yields:
|
|
277
|
+
Generator[jelly.RdfStreamFrame]: produced Jelly frames
|
|
278
|
+
|
|
279
|
+
"""
|
|
280
|
+
stream = None
|
|
281
|
+
for sink in sink_generator:
|
|
282
|
+
if not stream:
|
|
283
|
+
if options is None:
|
|
284
|
+
options = guess_options(sink)
|
|
285
|
+
stream = guess_stream(options, sink)
|
|
286
|
+
yield from stream_frames(stream, sink)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def grouped_stream_to_file(
|
|
290
|
+
stream: Generator[Graph] | Generator[Dataset],
|
|
291
|
+
output_file: IO[bytes],
|
|
292
|
+
**kwargs: Any,
|
|
293
|
+
) -> None:
|
|
294
|
+
"""
|
|
295
|
+
Write stream of Graphs/Datasets to a binary file.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
stream (Generator[Graph] | Generator[Dataset]): Generator of
|
|
299
|
+
Graphs/Dataset to transform.
|
|
300
|
+
output_file (IO[bytes]): opened output file.
|
|
301
|
+
**kwargs (Any): options to pass to stream.
|
|
302
|
+
|
|
303
|
+
"""
|
|
304
|
+
for frame in grouped_stream_to_frames(stream, **kwargs):
|
|
305
|
+
write_delimited(frame, output_file)
|
pyjelly/jelly/rdf_pb2.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
3
|
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
4
|
# source: rdf.proto
|
|
5
|
-
# Protobuf Python Version:
|
|
5
|
+
# Protobuf Python Version: 6.31.0
|
|
6
6
|
"""Generated protocol buffer code."""
|
|
7
7
|
from google.protobuf import descriptor as _descriptor
|
|
8
8
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
@@ -11,8 +11,8 @@ from google.protobuf import symbol_database as _symbol_database
|
|
|
11
11
|
from google.protobuf.internal import builder as _builder
|
|
12
12
|
_runtime_version.ValidateProtobufRuntimeVersion(
|
|
13
13
|
_runtime_version.Domain.PUBLIC,
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
6,
|
|
15
|
+
31,
|
|
16
16
|
0,
|
|
17
17
|
'',
|
|
18
18
|
'rdf.proto'
|
pyjelly/jelly/rdf_pb2.pyi
CHANGED
|
@@ -2,7 +2,8 @@ from google.protobuf.internal import containers as _containers
|
|
|
2
2
|
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
|
|
3
3
|
from google.protobuf import descriptor as _descriptor
|
|
4
4
|
from google.protobuf import message as _message
|
|
5
|
-
from
|
|
5
|
+
from collections.abc import Iterable as _Iterable, Mapping as _Mapping
|
|
6
|
+
from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
|
|
6
7
|
|
|
7
8
|
DESCRIPTOR: _descriptor.FileDescriptor
|
|
8
9
|
|
pyjelly/parse/decode.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from abc import ABCMeta, abstractmethod
|
|
4
|
-
from collections.abc import Iterable, Sequence
|
|
4
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
5
5
|
from enum import Enum, auto
|
|
6
6
|
from typing import Any, ClassVar, NamedTuple
|
|
7
7
|
from typing_extensions import Never
|
|
@@ -166,23 +166,23 @@ class Decoder:
|
|
|
166
166
|
def options(self) -> ParserOptions:
|
|
167
167
|
return self.adapter.options
|
|
168
168
|
|
|
169
|
-
def
|
|
169
|
+
def iter_rows(self, frame: jelly.RdfStreamFrame) -> Iterator[Any]:
|
|
170
170
|
"""
|
|
171
|
-
|
|
171
|
+
Iterate through rows in the frame.
|
|
172
172
|
|
|
173
173
|
Args:
|
|
174
174
|
frame (jelly.RdfStreamFrame): jelly frame
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
Any: custom obj based on adapter logic
|
|
175
|
+
Yields:
|
|
176
|
+
Iterator[Any]: decoded rows
|
|
178
177
|
|
|
179
178
|
"""
|
|
180
179
|
for row_owner in frame.rows:
|
|
181
180
|
row = getattr(row_owner, row_owner.WhichOneof("row"))
|
|
182
|
-
self.decode_row(row)
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
181
|
+
decoded_row = self.decode_row(row)
|
|
182
|
+
if isinstance(
|
|
183
|
+
row, (jelly.RdfTriple, jelly.RdfQuad, jelly.RdfNamespaceDeclaration)
|
|
184
|
+
):
|
|
185
|
+
yield decoded_row
|
|
186
186
|
|
|
187
187
|
def decode_row(self, row: Any) -> Any | None:
|
|
188
188
|
"""
|
pyjelly/parse/ioutils.py
CHANGED
|
@@ -55,8 +55,7 @@ def delimited_jelly_hint(header: bytes) -> bool:
|
|
|
55
55
|
|
|
56
56
|
def frame_iterator(inp: IO[bytes]) -> Generator[jelly.RdfStreamFrame]:
|
|
57
57
|
while frame := parse_length_prefixed(jelly.RdfStreamFrame, inp):
|
|
58
|
-
|
|
59
|
-
yield frame
|
|
58
|
+
yield frame
|
|
60
59
|
|
|
61
60
|
|
|
62
61
|
def get_options_and_frames(
|
|
@@ -82,14 +81,21 @@ def get_options_and_frames(
|
|
|
82
81
|
inp.seek(-len(bytes_read), os.SEEK_CUR)
|
|
83
82
|
|
|
84
83
|
if is_delimited:
|
|
84
|
+
first_frame = None
|
|
85
|
+
skipped_frames = []
|
|
85
86
|
frames = frame_iterator(inp)
|
|
86
|
-
|
|
87
|
+
for frame in frames:
|
|
88
|
+
if not frame.rows:
|
|
89
|
+
skipped_frames.append(frame)
|
|
90
|
+
else:
|
|
91
|
+
first_frame = frame
|
|
92
|
+
break
|
|
87
93
|
if first_frame is None:
|
|
88
94
|
msg = "No non-empty frames found in the stream"
|
|
89
95
|
raise JellyConformanceError(msg)
|
|
90
96
|
|
|
91
97
|
options = options_from_frame(first_frame, delimited=True)
|
|
92
|
-
return options, chain((first_frame,), frames)
|
|
98
|
+
return options, chain(skipped_frames, (first_frame,), frames)
|
|
93
99
|
|
|
94
100
|
frame = parse(jelly.RdfStreamFrame, inp.read())
|
|
95
101
|
|
pyjelly/serialize/flows.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from collections import UserList
|
|
4
4
|
from collections.abc import Iterable
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import ClassVar
|
|
6
|
+
from typing import Any, ClassVar
|
|
7
7
|
from typing_extensions import override
|
|
8
8
|
|
|
9
9
|
from pyjelly import jelly
|
|
@@ -16,11 +16,24 @@ class FrameFlow(UserList[jelly.RdfStreamRow]):
|
|
|
16
16
|
Abstract base class for producing Jelly frames from RDF stream rows.
|
|
17
17
|
|
|
18
18
|
Collects stream rows and assembles them into RdfStreamFrame objects when ready.
|
|
19
|
+
|
|
20
|
+
Allows for passing LogicalStreamType, required for
|
|
21
|
+
logical subtypes and non-delimited streams.
|
|
19
22
|
"""
|
|
20
23
|
|
|
21
24
|
logical_type: jelly.LogicalStreamType
|
|
22
25
|
registry: ClassVar[dict[jelly.LogicalStreamType, type[FrameFlow]]] = {}
|
|
23
26
|
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
initlist: Iterable[jelly.RdfStreamRow] | None = None,
|
|
30
|
+
*,
|
|
31
|
+
logical_type: jelly.LogicalStreamType | None = None,
|
|
32
|
+
**__kwargs: Any,
|
|
33
|
+
) -> None:
|
|
34
|
+
super().__init__(initlist)
|
|
35
|
+
self.logical_type = logical_type or self.__class__.logical_type
|
|
36
|
+
|
|
24
37
|
def frame_from_graph(self) -> jelly.RdfStreamFrame | None:
|
|
25
38
|
"""
|
|
26
39
|
Treat the current rows as a graph and produce a frame.
|
|
@@ -71,15 +84,6 @@ class ManualFrameFlow(FrameFlow):
|
|
|
71
84
|
|
|
72
85
|
logical_type = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
|
|
73
86
|
|
|
74
|
-
def __init__(
|
|
75
|
-
self,
|
|
76
|
-
initlist: Iterable[jelly.RdfStreamRow] | None = None,
|
|
77
|
-
*,
|
|
78
|
-
logical_type: jelly.LogicalStreamType = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED,
|
|
79
|
-
) -> None:
|
|
80
|
-
super().__init__(initlist)
|
|
81
|
-
self.logical_type = logical_type
|
|
82
|
-
|
|
83
87
|
|
|
84
88
|
@dataclass
|
|
85
89
|
class BoundedFrameFlow(FrameFlow):
|
|
@@ -92,13 +96,15 @@ class BoundedFrameFlow(FrameFlow):
|
|
|
92
96
|
logical_type = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
|
|
93
97
|
frame_size: int
|
|
94
98
|
|
|
99
|
+
@override
|
|
95
100
|
def __init__(
|
|
96
101
|
self,
|
|
97
102
|
initlist: Iterable[jelly.RdfStreamRow] | None = None,
|
|
103
|
+
logical_type: jelly.LogicalStreamType | None = None,
|
|
98
104
|
*,
|
|
99
105
|
frame_size: int | None = None,
|
|
100
106
|
) -> None:
|
|
101
|
-
super().__init__(initlist)
|
|
107
|
+
super().__init__(initlist, logical_type=logical_type)
|
|
102
108
|
self.frame_size = frame_size or DEFAULT_FRAME_SIZE
|
|
103
109
|
|
|
104
110
|
@override
|
|
@@ -153,7 +159,6 @@ class DatasetsFrameFlow(FrameFlow):
|
|
|
153
159
|
return self.to_stream_frame()
|
|
154
160
|
|
|
155
161
|
|
|
156
|
-
# TODO(Nastya): issue #184
|
|
157
162
|
FLOW_DISPATCH: dict[jelly.LogicalStreamType, type[FrameFlow]] = {
|
|
158
163
|
jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES: FlatTriplesFrameFlow,
|
|
159
164
|
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS: FlatQuadsFrameFlow,
|
|
@@ -166,18 +171,23 @@ def flow_for_type(logical_type: jelly.LogicalStreamType) -> type[FrameFlow]:
|
|
|
166
171
|
"""
|
|
167
172
|
Return flow based on logical type requested.
|
|
168
173
|
|
|
174
|
+
Note: uses base logical type for subtypes (i.e., SUBJECT_GRAPHS uses
|
|
175
|
+
the same flow as its base type GRAPHS).
|
|
176
|
+
|
|
169
177
|
Args:
|
|
170
178
|
logical_type (jelly.LogicalStreamType): logical type requested.
|
|
171
179
|
|
|
172
180
|
Raises:
|
|
173
|
-
NotImplementedError: if logical type not supported.
|
|
181
|
+
NotImplementedError: if (base) logical stream type is not supported.
|
|
174
182
|
|
|
175
183
|
Returns:
|
|
176
184
|
type[FrameFlow]: FrameFlow for respective logical type.
|
|
177
185
|
|
|
178
186
|
"""
|
|
179
187
|
try:
|
|
180
|
-
|
|
188
|
+
base_logical_type_value = logical_type % 10
|
|
189
|
+
base_name = jelly.LogicalStreamType.Name(base_logical_type_value)
|
|
190
|
+
return FLOW_DISPATCH[getattr(jelly.LogicalStreamType, base_name)]
|
|
181
191
|
except KeyError:
|
|
182
192
|
msg = (
|
|
183
193
|
"unsupported logical stream type: "
|
pyjelly/serialize/streams.py
CHANGED
|
@@ -78,9 +78,12 @@ class Stream:
|
|
|
78
78
|
jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES,
|
|
79
79
|
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS,
|
|
80
80
|
):
|
|
81
|
-
flow = flow_class(
|
|
81
|
+
flow = flow_class(
|
|
82
|
+
logical_type=self.options.logical_type,
|
|
83
|
+
frame_size=self.options.frame_size,
|
|
84
|
+
)
|
|
82
85
|
else:
|
|
83
|
-
flow = flow_class()
|
|
86
|
+
flow = flow_class(logical_type=self.options.logical_type)
|
|
84
87
|
else:
|
|
85
88
|
flow = ManualFrameFlow(logical_type=self.options.logical_type)
|
|
86
89
|
return flow
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyjelly
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Jelly-RDF implementation for Python
|
|
5
5
|
Project-URL: Homepage, https://w3id.org/jelly/pyjelly
|
|
6
6
|
Project-URL: Documentation, https://w3id.org/jelly/pyjelly
|
|
@@ -28,7 +28,7 @@ Classifier: Topic :: File Formats
|
|
|
28
28
|
Classifier: Topic :: Software Development :: Libraries
|
|
29
29
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
30
30
|
Requires-Python: >=3.9
|
|
31
|
-
Requires-Dist: protobuf>=
|
|
31
|
+
Requires-Dist: protobuf>=6.30.0
|
|
32
32
|
Requires-Dist: typing-extensions>=4.12.2
|
|
33
33
|
Provides-Extra: rdflib
|
|
34
34
|
Requires-Dist: rdflib>=7.1.4; extra == 'rdflib'
|
|
@@ -50,13 +50,13 @@ Description-Content-Type: text/markdown
|
|
|
50
50
|
|
|
51
51
|
## Getting started
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
Install pyjelly from **[PyPI](https://pypi.org/project/pyjelly/)**:
|
|
54
54
|
|
|
55
55
|
```bash
|
|
56
56
|
pip install pyjelly[rdflib]
|
|
57
57
|
```
|
|
58
58
|
|
|
59
|
-
To write an RDF graph to a Jelly file
|
|
59
|
+
To write an RDF graph to a Jelly file:
|
|
60
60
|
|
|
61
61
|
```python
|
|
62
62
|
from rdflib import Graph
|
|
@@ -66,7 +66,7 @@ g.parse("http://xmlns.com/foaf/spec/index.rdf")
|
|
|
66
66
|
g.serialize(destination="foaf.jelly", format="jelly")
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
-
To read a Jelly file and convert it to an rdflib Graph
|
|
69
|
+
To read a Jelly file and convert it to an rdflib `Graph`:
|
|
70
70
|
|
|
71
71
|
```python
|
|
72
72
|
from rdflib import Graph
|
|
@@ -75,19 +75,19 @@ g = Graph()
|
|
|
75
75
|
g.parse("foaf.jelly", format="jelly")
|
|
76
76
|
```
|
|
77
77
|
|
|
78
|
-
**See
|
|
78
|
+
**See [our documentation](https://w3id.org/jelly/pyjelly) for [further examples](https://w3id.org/jelly/pyjelly/dev/getting-started/), a full [API reference](https://w3id.org/jelly/pyjelly/dev/api), and more.**
|
|
79
79
|
|
|
80
80
|
## Contributing and support
|
|
81
81
|
|
|
82
82
|
This project is being actively developed – you can stay tuned by [watching this repository](https://docs.github.com/en/account-and-profile/managing-subscriptions-and-notifications-on-github/setting-up-notifications/about-notifications#subscription-options).
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
Join the **[Jelly Discord chat](https://discord.gg/A8sN5XwVa5)** to ask questions about pyjelly and to be up-to-date with the development activities.
|
|
85
85
|
|
|
86
86
|
### Commercial support
|
|
87
87
|
|
|
88
88
|
**[NeverBlink](https://neverblink.eu)** provides commercial support services for Jelly, including implementing custom features, system integrations, implementations for new frameworks, benchmarking, and more.
|
|
89
89
|
|
|
90
|
-
|
|
90
|
+
### Contributing
|
|
91
91
|
|
|
92
92
|
If you'd like to contribute, check out our [contributing guidelines](CONTRIBUTING.md).
|
|
93
93
|
|
|
@@ -6,23 +6,23 @@ pyjelly/_proto/patch.proto,sha256=gASUm0xDG9J1advNoq_cCsJYxudTbQaiZQBq4oW3kw4,52
|
|
|
6
6
|
pyjelly/_proto/rdf.proto,sha256=EKxyG421B4m0Wx5-6jjojdga_hA3jpZfF6-T3lMc0hI,12763
|
|
7
7
|
pyjelly/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
pyjelly/integrations/rdflib/__init__.py,sha256=lpIz6iildMf5bDvj3aBqZJ7kgKFrTx_tsqSb6PkLis0,552
|
|
9
|
-
pyjelly/integrations/rdflib/parse.py,sha256=
|
|
10
|
-
pyjelly/integrations/rdflib/serialize.py,sha256=
|
|
9
|
+
pyjelly/integrations/rdflib/parse.py,sha256=_H4f5qUU0IvMPaq1Tnm2UF1aXX-DY1qPn_7GmoIa4V0,13717
|
|
10
|
+
pyjelly/integrations/rdflib/serialize.py,sha256=etFY10HjmsjmuR6BjvWw32f2zBThW26mTGFn67jcOdc,9524
|
|
11
11
|
pyjelly/jelly/__init__.py,sha256=9kacwn8Ew_1fcgj1abz6miEz-AtUdPT2ltFWaRIE5VE,126
|
|
12
|
-
pyjelly/jelly/rdf_pb2.py,sha256=
|
|
13
|
-
pyjelly/jelly/rdf_pb2.pyi,sha256=-
|
|
12
|
+
pyjelly/jelly/rdf_pb2.py,sha256=qjgS3kQnCJqoOmgzvgk1BeYxGbeDX2zygJPc2vDjRts,8952
|
|
13
|
+
pyjelly/jelly/rdf_pb2.pyi,sha256=-gxZO-r2wyN68l83XomySz60c82SZmoPKh1HxamBjZs,11816
|
|
14
14
|
pyjelly/parse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
pyjelly/parse/decode.py,sha256=
|
|
16
|
-
pyjelly/parse/ioutils.py,sha256=
|
|
15
|
+
pyjelly/parse/decode.py,sha256=ERgCLzBvbkJWTlg3RT0p-h8J60KNqpQ4mj_-JHoD7kg,13299
|
|
16
|
+
pyjelly/parse/ioutils.py,sha256=O3wRtL5tf1WyIZ1LTfHjHwjKEGrhIWqFisOWjYmspNg,3434
|
|
17
17
|
pyjelly/parse/lookup.py,sha256=1AbdZEycLC4tRfh3fgF5hv5PrhwhdWvCUC53iHt-E4c,2193
|
|
18
18
|
pyjelly/serialize/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
pyjelly/serialize/encode.py,sha256=WSeqxyBtxpMrWrefhnmNLf8ummzlT0rY7NKoUzFY8NQ,9498
|
|
20
|
-
pyjelly/serialize/flows.py,sha256=
|
|
20
|
+
pyjelly/serialize/flows.py,sha256=0C2soigJKyHr3xoR-7v0kc1RL8COwnuCRd4iVZpukFU,5524
|
|
21
21
|
pyjelly/serialize/ioutils.py,sha256=2_NaadLfHO3jKR1ZV7aK6jQ09sPKBar9iLFHYwourz8,400
|
|
22
22
|
pyjelly/serialize/lookup.py,sha256=h0lYFjdB6CIuN2DzAW6EE4ILJFUuto3paAK6DG1DZYg,4091
|
|
23
|
-
pyjelly/serialize/streams.py,sha256=
|
|
24
|
-
pyjelly-0.
|
|
25
|
-
pyjelly-0.
|
|
26
|
-
pyjelly-0.
|
|
27
|
-
pyjelly-0.
|
|
28
|
-
pyjelly-0.
|
|
23
|
+
pyjelly/serialize/streams.py,sha256=F_T3k9yLSPtUW2ZaL99hmjlPKmgG4nYNeNXUiee3jEY,8421
|
|
24
|
+
pyjelly-0.4.0.dist-info/METADATA,sha256=iLIzLhRWIN9fj1Fua45fBosWbLS1bQOnLZmVsGbfFQw,4607
|
|
25
|
+
pyjelly-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
26
|
+
pyjelly-0.4.0.dist-info/entry_points.txt,sha256=kUG0p9zso7HpitdMaQaXEj_KSqgOGsL0Ky9ARbecN1g,339
|
|
27
|
+
pyjelly-0.4.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
28
|
+
pyjelly-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|