pyjelly 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjelly might be problematic. Click here for more details.
- pyjelly/integrations/rdflib/parse.py +261 -142
- pyjelly/integrations/rdflib/serialize.py +68 -0
- pyjelly/parse/decode.py +163 -2
- pyjelly/parse/ioutils.py +16 -0
- pyjelly/serialize/encode.py +117 -0
- pyjelly/serialize/flows.py +48 -1
- pyjelly/serialize/streams.py +82 -1
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/METADATA +1 -1
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/RECORD +12 -12
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/WHEEL +0 -0
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/entry_points.txt +0 -0
- {pyjelly-0.2.3.dist-info → pyjelly-0.3.0.dist-info}/licenses/LICENSE +0 -0
pyjelly/parse/decode.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from abc import ABCMeta, abstractmethod
|
|
4
4
|
from collections.abc import Iterable, Sequence
|
|
5
|
+
from enum import Enum, auto
|
|
5
6
|
from typing import Any, ClassVar, NamedTuple
|
|
6
7
|
from typing_extensions import Never
|
|
7
8
|
|
|
@@ -10,6 +11,21 @@ from pyjelly.options import LookupPreset, StreamParameters, StreamTypes
|
|
|
10
11
|
from pyjelly.parse.lookup import LookupDecoder
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
class ParsingMode(Enum):
|
|
15
|
+
"""
|
|
16
|
+
Specifies how jelly frames should be treated.
|
|
17
|
+
|
|
18
|
+
Modes:
|
|
19
|
+
FLAT
|
|
20
|
+
Yield all frames as one Graph or Dataset.
|
|
21
|
+
GROUPED
|
|
22
|
+
Yield one Graph/Dataset per frame (grouped parsing).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
FLAT = auto()
|
|
26
|
+
GROUPED = auto()
|
|
27
|
+
|
|
28
|
+
|
|
13
29
|
class ParserOptions(NamedTuple):
|
|
14
30
|
stream_types: StreamTypes
|
|
15
31
|
lookup_preset: LookupPreset
|
|
@@ -21,6 +37,21 @@ def options_from_frame(
|
|
|
21
37
|
*,
|
|
22
38
|
delimited: bool,
|
|
23
39
|
) -> ParserOptions:
|
|
40
|
+
"""
|
|
41
|
+
Fill stream options based on the options row.
|
|
42
|
+
|
|
43
|
+
Notes:
|
|
44
|
+
generalized_statements, rdf_star, and namespace declarations
|
|
45
|
+
are set to false by default
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
frame (jelly.RdfStreamFrame): first non-empty frame from the stream
|
|
49
|
+
delimited (bool): derived delimited flag
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
ParserOptions: filled options with types/lookups/stream parameters information
|
|
53
|
+
|
|
54
|
+
"""
|
|
24
55
|
row = frame.rows[0]
|
|
25
56
|
options = row.options
|
|
26
57
|
return ParserOptions(
|
|
@@ -52,8 +83,11 @@ def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
|
|
|
52
83
|
|
|
53
84
|
|
|
54
85
|
class Adapter(metaclass=ABCMeta):
|
|
55
|
-
def __init__(
|
|
86
|
+
def __init__(
|
|
87
|
+
self, options: ParserOptions, parsing_mode: ParsingMode = ParsingMode.FLAT
|
|
88
|
+
) -> None:
|
|
56
89
|
self.options = options
|
|
90
|
+
self.parsing_mode = parsing_mode
|
|
57
91
|
|
|
58
92
|
# Obligatory abstract methods--all adapters must implement these
|
|
59
93
|
@abstractmethod
|
|
@@ -106,6 +140,18 @@ class Adapter(metaclass=ABCMeta):
|
|
|
106
140
|
|
|
107
141
|
class Decoder:
|
|
108
142
|
def __init__(self, adapter: Adapter) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Initialize decoder.
|
|
145
|
+
|
|
146
|
+
Initializes decoder with a lookup tables with preset sizes,
|
|
147
|
+
integration-dependent adapter and empty repeated terms dictionary.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
adapter (Adapter): integration-dependent adapter that specifies terms
|
|
151
|
+
conversion to specific objects, framing,
|
|
152
|
+
namespace declarations, and graphs/datasets forming.
|
|
153
|
+
|
|
154
|
+
"""
|
|
109
155
|
self.adapter = adapter
|
|
110
156
|
self.names = LookupDecoder(lookup_size=self.options.lookup_preset.max_names)
|
|
111
157
|
self.prefixes = LookupDecoder(
|
|
@@ -121,12 +167,42 @@ class Decoder:
|
|
|
121
167
|
return self.adapter.options
|
|
122
168
|
|
|
123
169
|
def decode_frame(self, frame: jelly.RdfStreamFrame) -> Any:
|
|
170
|
+
"""
|
|
171
|
+
Decode a frame to custom object based on adapter implementation.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
frame (jelly.RdfStreamFrame): jelly frame
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Any: custom obj based on adapter logic
|
|
178
|
+
|
|
179
|
+
"""
|
|
124
180
|
for row_owner in frame.rows:
|
|
125
181
|
row = getattr(row_owner, row_owner.WhichOneof("row"))
|
|
126
182
|
self.decode_row(row)
|
|
127
|
-
|
|
183
|
+
if self.adapter.parsing_mode is ParsingMode.GROUPED:
|
|
184
|
+
return self.adapter.frame()
|
|
185
|
+
return None
|
|
128
186
|
|
|
129
187
|
def decode_row(self, row: Any) -> Any | None:
|
|
188
|
+
"""
|
|
189
|
+
Decode a row based on its type.
|
|
190
|
+
|
|
191
|
+
Notes: uses custom adapters to decode triples/quads, namespace declarations,
|
|
192
|
+
graph start/end.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
row (Any): protobuf row message
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
TypeError: raises error if this type of protobuf message does not have
|
|
199
|
+
a respective handler
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Any | None: decoded row -
|
|
203
|
+
result from calling decode_row (row type appropriate handler)
|
|
204
|
+
|
|
205
|
+
"""
|
|
130
206
|
try:
|
|
131
207
|
decode_row = self.row_handlers[type(row)]
|
|
132
208
|
except KeyError:
|
|
@@ -145,15 +221,51 @@ class Decoder:
|
|
|
145
221
|
assert lookup_preset.max_names == options.max_name_table_size
|
|
146
222
|
|
|
147
223
|
def ingest_prefix_entry(self, entry: jelly.RdfPrefixEntry) -> None:
|
|
224
|
+
"""
|
|
225
|
+
Update prefix lookup table based on the table entry.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
entry (jelly.RdfPrefixEntry): prefix message, containing id and value
|
|
229
|
+
|
|
230
|
+
"""
|
|
148
231
|
self.prefixes.assign_entry(index=entry.id, value=entry.value)
|
|
149
232
|
|
|
150
233
|
def ingest_name_entry(self, entry: jelly.RdfNameEntry) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Update name lookup table based on the table entry.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
entry (jelly.RdfNameEntry): name message, containing id and value
|
|
239
|
+
|
|
240
|
+
"""
|
|
151
241
|
self.names.assign_entry(index=entry.id, value=entry.value)
|
|
152
242
|
|
|
153
243
|
def ingest_datatype_entry(self, entry: jelly.RdfDatatypeEntry) -> None:
|
|
244
|
+
"""
|
|
245
|
+
Update datatype lookup table based on the table entry.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
entry (jelly.RdfDatatypeEntry): name message, containing id and value
|
|
249
|
+
|
|
250
|
+
"""
|
|
154
251
|
self.datatypes.assign_entry(index=entry.id, value=entry.value)
|
|
155
252
|
|
|
156
253
|
def decode_term(self, term: Any) -> Any:
|
|
254
|
+
"""
|
|
255
|
+
Decode a term based on its type: IRI/literal/BN/default graph.
|
|
256
|
+
|
|
257
|
+
Notes: requires a custom adapter with implemented methods for terms decoding.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
term (Any): IRI/literal/BN(string)/Default graph message
|
|
261
|
+
|
|
262
|
+
Raises:
|
|
263
|
+
TypeError: raises error if no handler for the term is found
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Any: decoded term (currently, rdflib objects, e.g., rdflib.term.URIRef)
|
|
267
|
+
|
|
268
|
+
"""
|
|
157
269
|
try:
|
|
158
270
|
decode_term = self.term_handlers[type(term)]
|
|
159
271
|
except KeyError:
|
|
@@ -162,6 +274,16 @@ class Decoder:
|
|
|
162
274
|
return decode_term(self, term)
|
|
163
275
|
|
|
164
276
|
def decode_iri(self, iri: jelly.RdfIri) -> Any:
|
|
277
|
+
"""
|
|
278
|
+
Decode RdfIri message to IRI using a custom adapter.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
iri (jelly.RdfIri): RdfIri message
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Any: IRI, based on adapter implementation, e.g., rdflib.term.URIRef
|
|
285
|
+
|
|
286
|
+
"""
|
|
165
287
|
name = self.names.decode_name_term_index(iri.name_id)
|
|
166
288
|
prefix = self.prefixes.decode_prefix_term_index(iri.prefix_id)
|
|
167
289
|
return self.adapter.iri(iri=prefix + name)
|
|
@@ -170,9 +292,32 @@ class Decoder:
|
|
|
170
292
|
return self.adapter.default_graph()
|
|
171
293
|
|
|
172
294
|
def decode_bnode(self, bnode: str) -> Any:
|
|
295
|
+
"""
|
|
296
|
+
Decode string message to blank node (BN) using a custom adapter.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
bnode (str): blank node id
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
Any: blank node object from the custom adapter
|
|
303
|
+
|
|
304
|
+
"""
|
|
173
305
|
return self.adapter.bnode(bnode)
|
|
174
306
|
|
|
175
307
|
def decode_literal(self, literal: jelly.RdfLiteral) -> Any:
|
|
308
|
+
"""
|
|
309
|
+
Decode RdfLiteral to literal based on custom adapter implementation.
|
|
310
|
+
|
|
311
|
+
Notes: checks for langtag existence;
|
|
312
|
+
for datatype checks for non-zero table size and datatype field presence
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
literal (jelly.RdfLiteral): RdfLiteral message
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Any: literal returned by the custom adapter
|
|
319
|
+
|
|
320
|
+
"""
|
|
176
321
|
language = datatype = None
|
|
177
322
|
if literal.langtag:
|
|
178
323
|
language = literal.langtag
|
|
@@ -203,6 +348,22 @@ class Decoder:
|
|
|
203
348
|
statement: jelly.RdfTriple | jelly.RdfQuad,
|
|
204
349
|
oneofs: Sequence[str],
|
|
205
350
|
) -> Any:
|
|
351
|
+
"""
|
|
352
|
+
Decode a triple/quad message.
|
|
353
|
+
|
|
354
|
+
Notes: also updates repeated terms dictionary
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
statement (jelly.RdfTriple | jelly.RdfQuad): triple/quad message
|
|
358
|
+
oneofs (Sequence[str]): terms s/p/o/g(if quads)
|
|
359
|
+
|
|
360
|
+
Raises:
|
|
361
|
+
ValueError: if a missing repeated term is encountered
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Any: a list of decoded terms
|
|
365
|
+
|
|
366
|
+
"""
|
|
206
367
|
terms = []
|
|
207
368
|
for oneof in oneofs:
|
|
208
369
|
field = statement.WhichOneof(oneof)
|
pyjelly/parse/ioutils.py
CHANGED
|
@@ -62,6 +62,22 @@ def frame_iterator(inp: IO[bytes]) -> Generator[jelly.RdfStreamFrame]:
|
|
|
62
62
|
def get_options_and_frames(
|
|
63
63
|
inp: IO[bytes],
|
|
64
64
|
) -> tuple[ParserOptions, Iterator[jelly.RdfStreamFrame]]:
|
|
65
|
+
"""
|
|
66
|
+
Return stream options and frames from the buffered binary stream.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
inp (IO[bytes]): jelly buffered binary stream
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
JellyConformanceError: if no non-empty frames detected in the delimited stream
|
|
73
|
+
JellyConformanceError: if non-delimited,
|
|
74
|
+
error is raised if no rows are detected (empty frame)
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
tuple[ParserOptions, Iterator[jelly.RdfStreamFrame]]: ParserOptions holds:
|
|
78
|
+
stream types, lookup presets and other stream options
|
|
79
|
+
|
|
80
|
+
"""
|
|
65
81
|
is_delimited = delimited_jelly_hint(bytes_read := inp.read(3))
|
|
66
82
|
inp.seek(-len(bytes_read), os.SEEK_CUR)
|
|
67
83
|
|
pyjelly/serialize/encode.py
CHANGED
|
@@ -11,6 +11,16 @@ from pyjelly.serialize.lookup import LookupEncoder
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def split_iri(iri_string: str) -> tuple[str, str]:
|
|
14
|
+
"""
|
|
15
|
+
Split iri into prefix and name.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
iri_string (str): full iri string.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
tuple[str, str]: iri's prefix and name.
|
|
22
|
+
|
|
23
|
+
"""
|
|
14
24
|
name = iri_string
|
|
15
25
|
prefix = ""
|
|
16
26
|
for sep in "#", "/":
|
|
@@ -47,6 +57,16 @@ class TermEncoder:
|
|
|
47
57
|
self.datatypes = LookupEncoder(lookup_size=lookup_preset.max_datatypes)
|
|
48
58
|
|
|
49
59
|
def encode_iri(self, iri_string: str) -> RowsAnd[jelly.RdfIri]:
|
|
60
|
+
"""
|
|
61
|
+
Encode iri.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
iri_string (str): full iri in string format.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
RowsAnd[jelly.RdfIri]: extra rows and protobuf RdfIri message.
|
|
68
|
+
|
|
69
|
+
"""
|
|
50
70
|
prefix, name = split_iri(iri_string)
|
|
51
71
|
if self.prefixes.lookup.max_size:
|
|
52
72
|
prefix_entry_index = self.prefixes.encode_entry_index(prefix)
|
|
@@ -70,9 +90,27 @@ class TermEncoder:
|
|
|
70
90
|
return term_rows, jelly.RdfIri(prefix_id=prefix_index, name_id=name_index)
|
|
71
91
|
|
|
72
92
|
def encode_default_graph(self) -> RowsAnd[jelly.RdfDefaultGraph]:
|
|
93
|
+
"""
|
|
94
|
+
Encode default graph.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
RowsAnd[jelly.RdfDefaultGraph]: empty extra rows and
|
|
98
|
+
default graph message.
|
|
99
|
+
|
|
100
|
+
"""
|
|
73
101
|
return (), jelly.RdfDefaultGraph()
|
|
74
102
|
|
|
75
103
|
def encode_bnode(self, bnode: str) -> RowsAnd[str]:
|
|
104
|
+
"""
|
|
105
|
+
Encode blank node (BN).
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
bnode (str): BN internal identifier in string format.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
RowsAnd[str]: empty extra rows and original BN string.
|
|
112
|
+
|
|
113
|
+
"""
|
|
76
114
|
return (), bnode
|
|
77
115
|
|
|
78
116
|
def encode_literal(
|
|
@@ -82,6 +120,24 @@ class TermEncoder:
|
|
|
82
120
|
language: str | None = None,
|
|
83
121
|
datatype: str | None = None,
|
|
84
122
|
) -> RowsAnd[jelly.RdfLiteral]:
|
|
123
|
+
"""
|
|
124
|
+
Encode literal.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
lex (str): lexical form/literal value
|
|
128
|
+
language (str | None, optional): langtag. Defaults to None.
|
|
129
|
+
datatype (str | None, optional): data type if
|
|
130
|
+
it is a typed literal. Defaults to None.
|
|
131
|
+
|
|
132
|
+
Raises:
|
|
133
|
+
JellyConformanceError: if datatype specified while
|
|
134
|
+
datatable is not used.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
RowsAnd[jelly.RdfLiteral]: extra rows (i.e., datatype entries)
|
|
138
|
+
and RdfLiteral message.
|
|
139
|
+
|
|
140
|
+
"""
|
|
85
141
|
datatype_id = None
|
|
86
142
|
term_rows: tuple[()] | tuple[jelly.RdfStreamRow] = ()
|
|
87
143
|
|
|
@@ -129,6 +185,19 @@ def encode_statement(
|
|
|
129
185
|
term_encoder: TermEncoder,
|
|
130
186
|
repeated_terms: dict[Slot, object],
|
|
131
187
|
) -> tuple[list[jelly.RdfStreamRow], dict[str, Any]]:
|
|
188
|
+
"""
|
|
189
|
+
Encode a statement.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
terms (Iterable[object]): original terms to encode
|
|
193
|
+
term_encoder (TermEncoder): encoder with lookup tables
|
|
194
|
+
repeated_terms (dict[Slot, object]): dictionary of repeated terms.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
tuple[list[jelly.RdfStreamRow], dict[str, Any]]:
|
|
198
|
+
extra rows to append and jelly terms.
|
|
199
|
+
|
|
200
|
+
"""
|
|
132
201
|
statement: dict[str, object] = {}
|
|
133
202
|
rows: list[jelly.RdfStreamRow] = []
|
|
134
203
|
for slot, term in zip(Slot, terms):
|
|
@@ -147,6 +216,18 @@ def encode_triple(
|
|
|
147
216
|
term_encoder: TermEncoder,
|
|
148
217
|
repeated_terms: dict[Slot, object],
|
|
149
218
|
) -> list[jelly.RdfStreamRow]:
|
|
219
|
+
"""
|
|
220
|
+
Encode one triple.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
terms (Iterable[object]): original terms to encode
|
|
224
|
+
term_encoder (TermEncoder): current encoder with lookup tables
|
|
225
|
+
repeated_terms (dict[Slot, object]): dictionary of repeated terms.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
list[jelly.RdfStreamRow]: list of rows to add to the current flow.
|
|
229
|
+
|
|
230
|
+
"""
|
|
150
231
|
rows, statement = encode_statement(terms, term_encoder, repeated_terms)
|
|
151
232
|
row = jelly.RdfStreamRow(triple=jelly.RdfTriple(**statement))
|
|
152
233
|
rows.append(row)
|
|
@@ -158,6 +239,18 @@ def encode_quad(
|
|
|
158
239
|
term_encoder: TermEncoder,
|
|
159
240
|
repeated_terms: dict[Slot, object],
|
|
160
241
|
) -> list[jelly.RdfStreamRow]:
|
|
242
|
+
"""
|
|
243
|
+
Encode one quad.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
terms (Iterable[object]): original terms to encode
|
|
247
|
+
term_encoder (TermEncoder): current encoder with lookup tables
|
|
248
|
+
repeated_terms (dict[Slot, object]): dictionary of repeated terms.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
list[jelly.RdfStreamRow]: list of messages to append to current flow.
|
|
252
|
+
|
|
253
|
+
"""
|
|
161
254
|
rows, statement = encode_statement(terms, term_encoder, repeated_terms)
|
|
162
255
|
row = jelly.RdfStreamRow(quad=jelly.RdfQuad(**statement))
|
|
163
256
|
rows.append(row)
|
|
@@ -169,6 +262,18 @@ def encode_namespace_declaration(
|
|
|
169
262
|
value: str,
|
|
170
263
|
term_encoder: TermEncoder,
|
|
171
264
|
) -> list[jelly.RdfStreamRow]:
|
|
265
|
+
"""
|
|
266
|
+
Encode namespace declaration.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
name (str): namespace prefix label
|
|
270
|
+
value (str): namespace iri
|
|
271
|
+
term_encoder (TermEncoder): current encoder
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
list[jelly.RdfStreamRow]: list of messages to append to current flow.
|
|
275
|
+
|
|
276
|
+
"""
|
|
172
277
|
[*rows], iri = term_encoder.encode_iri(value)
|
|
173
278
|
declaration = jelly.RdfNamespaceDeclaration(name=name, value=iri)
|
|
174
279
|
row = jelly.RdfStreamRow(namespace=declaration)
|
|
@@ -181,6 +286,18 @@ def encode_options(
|
|
|
181
286
|
stream_types: options.StreamTypes,
|
|
182
287
|
params: options.StreamParameters,
|
|
183
288
|
) -> jelly.RdfStreamRow:
|
|
289
|
+
"""
|
|
290
|
+
Encode stream options to ProtoBuf message.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
lookup_preset (options.LookupPreset): lookup tables options
|
|
294
|
+
stream_types (options.StreamTypes): physical and logical types
|
|
295
|
+
params (options.StreamParameters): other params.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
jelly.RdfStreamRow: encoded stream options row
|
|
299
|
+
|
|
300
|
+
"""
|
|
184
301
|
return jelly.RdfStreamRow(
|
|
185
302
|
options=jelly.RdfStreamOptions(
|
|
186
303
|
stream_name=params.stream_name,
|
pyjelly/serialize/flows.py
CHANGED
|
@@ -41,6 +41,16 @@ class FrameFlow(UserList[jelly.RdfStreamRow]):
|
|
|
41
41
|
return None
|
|
42
42
|
|
|
43
43
|
def to_stream_frame(self) -> jelly.RdfStreamFrame | None:
|
|
44
|
+
"""
|
|
45
|
+
Create stream frame from flow content.
|
|
46
|
+
|
|
47
|
+
Notes:
|
|
48
|
+
Clears flow content after creating the frame.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
jelly.RdfStreamFrame | None: stream frame
|
|
52
|
+
|
|
53
|
+
"""
|
|
44
54
|
if not self:
|
|
45
55
|
return None
|
|
46
56
|
frame = jelly.RdfStreamFrame(rows=self)
|
|
@@ -74,7 +84,7 @@ class ManualFrameFlow(FrameFlow):
|
|
|
74
84
|
@dataclass
|
|
75
85
|
class BoundedFrameFlow(FrameFlow):
|
|
76
86
|
"""
|
|
77
|
-
|
|
87
|
+
Produce frames automatically when a fixed number of rows is reached.
|
|
78
88
|
|
|
79
89
|
Used for delimited encoding (default mode).
|
|
80
90
|
"""
|
|
@@ -93,6 +103,13 @@ class BoundedFrameFlow(FrameFlow):
|
|
|
93
103
|
|
|
94
104
|
@override
|
|
95
105
|
def frame_from_bounds(self) -> jelly.RdfStreamFrame | None:
|
|
106
|
+
"""
|
|
107
|
+
Emit frame from flow if full.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
jelly.RdfStreamFrame | None: stream frame
|
|
111
|
+
|
|
112
|
+
"""
|
|
96
113
|
if len(self) >= self.frame_size:
|
|
97
114
|
return self.to_stream_frame()
|
|
98
115
|
return None
|
|
@@ -110,6 +127,14 @@ class GraphsFrameFlow(FrameFlow):
|
|
|
110
127
|
logical_type = jelly.LOGICAL_STREAM_TYPE_GRAPHS
|
|
111
128
|
|
|
112
129
|
def frame_from_graph(self) -> jelly.RdfStreamFrame | None:
|
|
130
|
+
"""
|
|
131
|
+
Emit current flow content (one graph) as jelly frame.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
jelly.RdfStreamFrame | None: jelly frame or none if
|
|
135
|
+
flow is empty.
|
|
136
|
+
|
|
137
|
+
"""
|
|
113
138
|
return self.to_stream_frame()
|
|
114
139
|
|
|
115
140
|
|
|
@@ -117,9 +142,18 @@ class DatasetsFrameFlow(FrameFlow):
|
|
|
117
142
|
logical_type = jelly.LOGICAL_STREAM_TYPE_DATASETS
|
|
118
143
|
|
|
119
144
|
def frame_from_dataset(self) -> jelly.RdfStreamFrame | None:
|
|
145
|
+
"""
|
|
146
|
+
Emit current flow content (dataset) as jelly frame.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
jelly.RdfStreamFrame | None: jelly frame or none if
|
|
150
|
+
flow is empty.
|
|
151
|
+
|
|
152
|
+
"""
|
|
120
153
|
return self.to_stream_frame()
|
|
121
154
|
|
|
122
155
|
|
|
156
|
+
# TODO(Nastya): issue #184
|
|
123
157
|
FLOW_DISPATCH: dict[jelly.LogicalStreamType, type[FrameFlow]] = {
|
|
124
158
|
jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES: FlatTriplesFrameFlow,
|
|
125
159
|
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS: FlatQuadsFrameFlow,
|
|
@@ -129,6 +163,19 @@ FLOW_DISPATCH: dict[jelly.LogicalStreamType, type[FrameFlow]] = {
|
|
|
129
163
|
|
|
130
164
|
|
|
131
165
|
def flow_for_type(logical_type: jelly.LogicalStreamType) -> type[FrameFlow]:
|
|
166
|
+
"""
|
|
167
|
+
Return flow based on logical type requested.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
logical_type (jelly.LogicalStreamType): logical type requested.
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
NotImplementedError: if logical type not supported.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
type[FrameFlow]: FrameFlow for respective logical type.
|
|
177
|
+
|
|
178
|
+
"""
|
|
132
179
|
try:
|
|
133
180
|
return FLOW_DISPATCH[logical_type]
|
|
134
181
|
except KeyError:
|
pyjelly/serialize/streams.py
CHANGED
|
@@ -60,6 +60,13 @@ class Stream:
|
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
def infer_flow(self) -> FrameFlow:
|
|
63
|
+
"""
|
|
64
|
+
Return flow based on the stream options provided.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
FrameFlow: initialised FrameFlow object.
|
|
68
|
+
|
|
69
|
+
"""
|
|
63
70
|
flow: FrameFlow
|
|
64
71
|
if self.options.params.delimited:
|
|
65
72
|
if self.options.logical_type != jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED:
|
|
@@ -79,11 +86,13 @@ class Stream:
|
|
|
79
86
|
return flow
|
|
80
87
|
|
|
81
88
|
def enroll(self) -> None:
|
|
89
|
+
"""Initialize start of the stream."""
|
|
82
90
|
if not self.enrolled:
|
|
83
91
|
self.stream_options()
|
|
84
92
|
self.enrolled = True
|
|
85
93
|
|
|
86
94
|
def stream_options(self) -> None:
|
|
95
|
+
"""Encode and append stream options row to the current flow."""
|
|
87
96
|
self.flow.append(
|
|
88
97
|
encode_options(
|
|
89
98
|
stream_types=self.stream_types,
|
|
@@ -93,6 +102,14 @@ class Stream:
|
|
|
93
102
|
)
|
|
94
103
|
|
|
95
104
|
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
105
|
+
"""
|
|
106
|
+
Add namespace declaration to jelly stream.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
name (str): namespace prefix label
|
|
110
|
+
iri (str): namespace iri
|
|
111
|
+
|
|
112
|
+
"""
|
|
96
113
|
rows = encode_namespace_declaration(
|
|
97
114
|
name=name,
|
|
98
115
|
value=iri,
|
|
@@ -102,6 +119,20 @@ class Stream:
|
|
|
102
119
|
|
|
103
120
|
@classmethod
|
|
104
121
|
def for_rdflib(cls, options: SerializerOptions | None = None) -> Stream:
|
|
122
|
+
"""
|
|
123
|
+
Initialize stream with RDFLib encoder.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
options (SerializerOptions | None, optional): Stream options.
|
|
127
|
+
Defaults to None.
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
TypeError: if Stream is passed, and not a Stream for specific physical type.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Stream: initialized stream with RDFLib encoder.
|
|
134
|
+
|
|
135
|
+
"""
|
|
105
136
|
if cls is Stream:
|
|
106
137
|
msg = "Stream is an abstract base class, use a subclass instead"
|
|
107
138
|
raise TypeError(msg)
|
|
@@ -117,6 +148,19 @@ class Stream:
|
|
|
117
148
|
|
|
118
149
|
|
|
119
150
|
def stream_for_type(physical_type: jelly.PhysicalStreamType) -> type[Stream]:
|
|
151
|
+
"""
|
|
152
|
+
Give a Stream based on physical type specified.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
physical_type (jelly.PhysicalStreamType): jelly stream physical type.
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
NotImplementedError: if no stream for requested physical type is available.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
type[Stream]: jelly stream
|
|
162
|
+
|
|
163
|
+
"""
|
|
120
164
|
try:
|
|
121
165
|
stream_cls = STREAM_DISPATCH[physical_type]
|
|
122
166
|
except KeyError:
|
|
@@ -133,6 +177,21 @@ class TripleStream(Stream):
|
|
|
133
177
|
default_delimited_flow_class: ClassVar = FlatTriplesFrameFlow
|
|
134
178
|
|
|
135
179
|
def triple(self, terms: Iterable[object]) -> jelly.RdfStreamFrame | None:
|
|
180
|
+
"""
|
|
181
|
+
Process one triple to Protobuf messages.
|
|
182
|
+
|
|
183
|
+
Note:
|
|
184
|
+
Adds new rows to the current flow and returns StreamFrame if
|
|
185
|
+
frame size conditions are met.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
terms (Iterable[object]): RDF terms to encode.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
jelly.RdfStreamFrame | None: stream frame if
|
|
192
|
+
flow supports frames slicing and current flow is full
|
|
193
|
+
|
|
194
|
+
"""
|
|
136
195
|
new_rows = encode_triple(
|
|
137
196
|
terms,
|
|
138
197
|
term_encoder=self.encoder,
|
|
@@ -147,6 +206,17 @@ class QuadStream(Stream):
|
|
|
147
206
|
default_delimited_flow_class: ClassVar = FlatQuadsFrameFlow
|
|
148
207
|
|
|
149
208
|
def quad(self, terms: Iterable[object]) -> jelly.RdfStreamFrame | None:
|
|
209
|
+
"""
|
|
210
|
+
Process one quad to Protobuf messages.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
terms (Iterable[object]): terms to encode.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
jelly.RdfStreamFrame | None: stream frame if
|
|
217
|
+
flow supports frames slicing and current flow is full
|
|
218
|
+
|
|
219
|
+
"""
|
|
150
220
|
new_rows = encode_quad(
|
|
151
221
|
terms,
|
|
152
222
|
term_encoder=self.encoder,
|
|
@@ -165,6 +235,17 @@ class GraphStream(TripleStream):
|
|
|
165
235
|
graph_id: object,
|
|
166
236
|
graph: Iterable[Iterable[object]],
|
|
167
237
|
) -> Generator[jelly.RdfStreamFrame]:
|
|
238
|
+
"""
|
|
239
|
+
Process one graph into a sequence of jelly frames.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
graph_id (object): graph id (BN, Literal, iri, default)
|
|
243
|
+
graph (Iterable[Iterable[object]]): iterable of triples (graph's content)
|
|
244
|
+
|
|
245
|
+
Yields:
|
|
246
|
+
Generator[jelly.RdfStreamFrame]: jelly frames.
|
|
247
|
+
|
|
248
|
+
"""
|
|
168
249
|
[*graph_rows], graph_node = self.encoder.encode_any(graph_id, Slot.graph)
|
|
169
250
|
kw_name = f"{Slot.graph}_{self.encoder.TERM_ONEOF_NAMES[type(graph_node)]}"
|
|
170
251
|
kws: dict[Any, Any] = {kw_name: graph_node}
|
|
@@ -172,7 +253,7 @@ class GraphStream(TripleStream):
|
|
|
172
253
|
graph_rows.append(start_row)
|
|
173
254
|
self.flow.extend(graph_rows)
|
|
174
255
|
for triple in graph:
|
|
175
|
-
if frame := self.triple(triple):
|
|
256
|
+
if frame := self.triple(triple): # has frame slicing inside
|
|
176
257
|
yield frame
|
|
177
258
|
end_row = jelly.RdfStreamRow(graph_end=jelly.RdfGraphEnd())
|
|
178
259
|
self.flow.append(end_row)
|