pyjelly 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjelly might be problematic. Click here for more details.
- pyjelly/integrations/generic/generic_sink.py +36 -6
- pyjelly/integrations/generic/parse.py +53 -6
- pyjelly/integrations/generic/serialize.py +55 -14
- pyjelly/integrations/rdflib/parse.py +48 -1
- pyjelly/integrations/rdflib/serialize.py +40 -11
- pyjelly/parse/decode.py +5 -1
- pyjelly/parse/ioutils.py +11 -3
- pyjelly/serialize/encode.py +143 -88
- pyjelly/serialize/streams.py +6 -6
- {pyjelly-0.5.3.dist-info → pyjelly-0.6.1.dist-info}/METADATA +1 -1
- {pyjelly-0.5.3.dist-info → pyjelly-0.6.1.dist-info}/RECORD +14 -14
- {pyjelly-0.5.3.dist-info → pyjelly-0.6.1.dist-info}/WHEEL +0 -0
- {pyjelly-0.5.3.dist-info → pyjelly-0.6.1.dist-info}/entry_points.txt +0 -0
- {pyjelly-0.5.3.dist-info → pyjelly-0.6.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,7 +4,13 @@ from collections import deque
|
|
|
4
4
|
from collections.abc import Generator
|
|
5
5
|
from typing import IO, NamedTuple, Union
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
class _DefaultGraph:
|
|
9
|
+
def __repr__(self) -> str:
|
|
10
|
+
return ""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
DefaultGraph = _DefaultGraph()
|
|
8
14
|
|
|
9
15
|
|
|
10
16
|
class BlankNode:
|
|
@@ -19,6 +25,14 @@ class BlankNode:
|
|
|
19
25
|
def __repr__(self) -> str:
|
|
20
26
|
return f"BlankNode(identifier={self._identifier})"
|
|
21
27
|
|
|
28
|
+
def __eq__(self, other: object) -> bool:
|
|
29
|
+
if isinstance(other, BlankNode):
|
|
30
|
+
return self._identifier == other._identifier
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
def __hash__(self) -> int:
|
|
34
|
+
return hash(self._identifier)
|
|
35
|
+
|
|
22
36
|
|
|
23
37
|
class IRI:
|
|
24
38
|
"""Class for IRIs, storing IRI as a string."""
|
|
@@ -37,6 +51,9 @@ class IRI:
|
|
|
37
51
|
return self._iri == other._iri
|
|
38
52
|
return False
|
|
39
53
|
|
|
54
|
+
def __hash__(self) -> int:
|
|
55
|
+
return hash(self._iri)
|
|
56
|
+
|
|
40
57
|
|
|
41
58
|
class Literal:
|
|
42
59
|
"""
|
|
@@ -69,8 +86,21 @@ class Literal:
|
|
|
69
86
|
f"datatype={self._datatype!r})"
|
|
70
87
|
)
|
|
71
88
|
|
|
89
|
+
def __eq__(self, other: object) -> bool:
|
|
90
|
+
if isinstance(other, Literal):
|
|
91
|
+
return (
|
|
92
|
+
self._lex == other._lex
|
|
93
|
+
and self._langtag == other._langtag
|
|
94
|
+
and self._datatype == other._datatype
|
|
95
|
+
)
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
def __hash__(self) -> int:
|
|
99
|
+
return hash((self._lex, self._langtag, self._datatype))
|
|
100
|
+
|
|
72
101
|
|
|
73
|
-
Node = Union[BlankNode, IRI, Literal, "Triple"
|
|
102
|
+
Node = Union[BlankNode, IRI, Literal, "Triple"]
|
|
103
|
+
GraphName = Union[Node, _DefaultGraph]
|
|
74
104
|
|
|
75
105
|
|
|
76
106
|
TRIPLE_ARITY = 3
|
|
@@ -90,7 +120,7 @@ class Quad(NamedTuple):
|
|
|
90
120
|
s: Node
|
|
91
121
|
p: Node
|
|
92
122
|
o: Node
|
|
93
|
-
g:
|
|
123
|
+
g: GraphName
|
|
94
124
|
|
|
95
125
|
|
|
96
126
|
class Prefix(NamedTuple):
|
|
@@ -103,7 +133,7 @@ class Prefix(NamedTuple):
|
|
|
103
133
|
class GenericStatementSink:
|
|
104
134
|
_store: deque[Triple | Quad]
|
|
105
135
|
|
|
106
|
-
def __init__(self, identifier:
|
|
136
|
+
def __init__(self, identifier: GraphName = DefaultGraph) -> None:
|
|
107
137
|
"""
|
|
108
138
|
Initialize statements storage, namespaces dictionary, and parser.
|
|
109
139
|
|
|
@@ -112,7 +142,7 @@ class GenericStatementSink:
|
|
|
112
142
|
|
|
113
143
|
Args:
|
|
114
144
|
identifier (str, optional): Identifier for a sink.
|
|
115
|
-
Defaults to
|
|
145
|
+
Defaults to DefaultGraph.
|
|
116
146
|
|
|
117
147
|
"""
|
|
118
148
|
self._store: deque[Triple | Quad] = deque()
|
|
@@ -136,7 +166,7 @@ class GenericStatementSink:
|
|
|
136
166
|
yield from self._namespaces.items()
|
|
137
167
|
|
|
138
168
|
@property
|
|
139
|
-
def identifier(self) ->
|
|
169
|
+
def identifier(self) -> GraphName:
|
|
140
170
|
return self._identifier
|
|
141
171
|
|
|
142
172
|
@property
|
|
@@ -10,7 +10,9 @@ from pyjelly.errors import JellyConformanceError
|
|
|
10
10
|
from pyjelly.integrations.generic.generic_sink import (
|
|
11
11
|
IRI,
|
|
12
12
|
BlankNode,
|
|
13
|
+
DefaultGraph,
|
|
13
14
|
GenericStatementSink,
|
|
15
|
+
GraphName,
|
|
14
16
|
Literal,
|
|
15
17
|
Prefix,
|
|
16
18
|
Quad,
|
|
@@ -44,8 +46,8 @@ class GenericStatementSinkAdapter(Adapter):
|
|
|
44
46
|
return BlankNode(bnode)
|
|
45
47
|
|
|
46
48
|
@override
|
|
47
|
-
def default_graph(self) ->
|
|
48
|
-
return
|
|
49
|
+
def default_graph(self) -> GraphName:
|
|
50
|
+
return DefaultGraph
|
|
49
51
|
|
|
50
52
|
@override
|
|
51
53
|
def literal(
|
|
@@ -122,7 +124,7 @@ class GenericGraphsAdapter(GenericQuadsBaseAdapter):
|
|
|
122
124
|
|
|
123
125
|
"""
|
|
124
126
|
|
|
125
|
-
_graph_id:
|
|
127
|
+
_graph_id: GraphName | None
|
|
126
128
|
|
|
127
129
|
def __init__(
|
|
128
130
|
self,
|
|
@@ -138,7 +140,7 @@ class GenericGraphsAdapter(GenericQuadsBaseAdapter):
|
|
|
138
140
|
raise JellyConformanceError(msg)
|
|
139
141
|
|
|
140
142
|
@override
|
|
141
|
-
def graph_start(self, graph_id:
|
|
143
|
+
def graph_start(self, graph_id: GraphName) -> None:
|
|
142
144
|
self._graph_id = graph_id
|
|
143
145
|
|
|
144
146
|
@override
|
|
@@ -206,6 +208,8 @@ def parse_quads_stream(
|
|
|
206
208
|
def parse_jelly_grouped(
|
|
207
209
|
inp: IO[bytes],
|
|
208
210
|
sink_factory: Callable[[], GenericStatementSink] = lambda: GenericStatementSink(),
|
|
211
|
+
*,
|
|
212
|
+
logical_type_strict: bool = False,
|
|
209
213
|
) -> Generator[GenericStatementSink]:
|
|
210
214
|
"""
|
|
211
215
|
Take a jelly file and return generators of generic statements sinks.
|
|
@@ -215,7 +219,10 @@ def parse_jelly_grouped(
|
|
|
215
219
|
Args:
|
|
216
220
|
inp (IO[bytes]): input jelly buffered binary stream
|
|
217
221
|
sink_factory (Callable): lambda to construct a statement sink.
|
|
218
|
-
By default creates an empty in-memory GenericStatementSink.
|
|
222
|
+
By default, creates an empty in-memory GenericStatementSink.
|
|
223
|
+
logical_type_strict (bool): If True, validate the *logical* type
|
|
224
|
+
in stream options and require a grouped logical type.
|
|
225
|
+
Otherwise, only the physical type is used to route parsing.
|
|
219
226
|
|
|
220
227
|
Raises:
|
|
221
228
|
NotImplementedError: is raised if a physical type is not implemented
|
|
@@ -226,6 +233,26 @@ def parse_jelly_grouped(
|
|
|
226
233
|
|
|
227
234
|
"""
|
|
228
235
|
options, frames = get_options_and_frames(inp)
|
|
236
|
+
|
|
237
|
+
st = getattr(options, "stream_types", None)
|
|
238
|
+
if logical_type_strict and (
|
|
239
|
+
st is None
|
|
240
|
+
or st.logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
|
|
241
|
+
or st.flat
|
|
242
|
+
):
|
|
243
|
+
lt_name = (
|
|
244
|
+
"UNSPECIFIED"
|
|
245
|
+
if st is None
|
|
246
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
msg = (
|
|
250
|
+
"strict logical type check requires options.stream_types"
|
|
251
|
+
if st is None
|
|
252
|
+
else f"expected GROUPED logical type, got {lt_name}"
|
|
253
|
+
)
|
|
254
|
+
raise JellyConformanceError(msg)
|
|
255
|
+
|
|
229
256
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
230
257
|
for graph in parse_triples_stream(
|
|
231
258
|
frames=frames,
|
|
@@ -297,6 +324,8 @@ def parse_jelly_flat(
|
|
|
297
324
|
inp: IO[bytes],
|
|
298
325
|
frames: Iterable[jelly.RdfStreamFrame] | None = None,
|
|
299
326
|
options: ParserOptions | None = None,
|
|
327
|
+
*,
|
|
328
|
+
logical_type_strict: bool = False,
|
|
300
329
|
) -> Generator[Statement | Prefix]:
|
|
301
330
|
"""
|
|
302
331
|
Parse jelly file with FLAT logical type into a Generator of stream events.
|
|
@@ -307,6 +336,9 @@ def parse_jelly_flat(
|
|
|
307
336
|
jelly frames if read before.
|
|
308
337
|
options (ParserOptions | None): stream options
|
|
309
338
|
if read before.
|
|
339
|
+
logical_type_strict (bool): If True, validate the *logical* type
|
|
340
|
+
in stream options and require FLAT (TRIPLES/QUADS).
|
|
341
|
+
Otherwise, only the physical type is used to route parsing.
|
|
310
342
|
|
|
311
343
|
Raises:
|
|
312
344
|
NotImplementedError: if physical type is not supported
|
|
@@ -315,9 +347,24 @@ def parse_jelly_flat(
|
|
|
315
347
|
Generator[Statement | Prefix]: Generator of stream events
|
|
316
348
|
|
|
317
349
|
"""
|
|
318
|
-
if
|
|
350
|
+
if frames is None or options is None:
|
|
319
351
|
options, frames = get_options_and_frames(inp)
|
|
320
352
|
|
|
353
|
+
st = getattr(options, "stream_types", None)
|
|
354
|
+
if logical_type_strict and (st is None or not st.flat):
|
|
355
|
+
lt_name = (
|
|
356
|
+
"UNSPECIFIED"
|
|
357
|
+
if st is None
|
|
358
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
msg = (
|
|
362
|
+
"strict logical type check requires options.stream_types"
|
|
363
|
+
if st is None
|
|
364
|
+
else f"expected FLAT logical type (TRIPLES/QUADS), got {lt_name}"
|
|
365
|
+
)
|
|
366
|
+
raise JellyConformanceError(msg)
|
|
367
|
+
|
|
321
368
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
322
369
|
for triples in parse_triples_stream(frames=frames, options=options):
|
|
323
370
|
yield from triples
|
|
@@ -5,19 +5,20 @@ from collections.abc import Generator
|
|
|
5
5
|
from functools import singledispatch
|
|
6
6
|
from typing import Any, IO
|
|
7
7
|
from itertools import chain
|
|
8
|
+
from pyjelly.options import StreamParameters
|
|
8
9
|
from pyjelly.integrations.generic.generic_sink import (
|
|
9
10
|
GenericStatementSink,
|
|
10
11
|
Quad,
|
|
11
12
|
Triple,
|
|
12
|
-
|
|
13
|
+
DefaultGraph,
|
|
14
|
+
GraphName,
|
|
13
15
|
IRI,
|
|
14
16
|
BlankNode,
|
|
15
17
|
Literal,
|
|
16
|
-
Node,
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
from pyjelly import jelly
|
|
20
|
-
from pyjelly.serialize.encode import
|
|
21
|
+
from pyjelly.serialize.encode import Rows, Slot, TermEncoder, HasGraph, Statement
|
|
21
22
|
from pyjelly.serialize.ioutils import write_delimited
|
|
22
23
|
from pyjelly.serialize.streams import (
|
|
23
24
|
GraphStream,
|
|
@@ -31,38 +32,75 @@ QUAD_ARITY = 4
|
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class GenericSinkTermEncoder(TermEncoder):
|
|
34
|
-
def
|
|
35
|
+
def encode_spo(self, term: object, slot: Slot, statement: Statement) -> Rows:
|
|
35
36
|
"""
|
|
36
37
|
Encode term based on its GenericSink object.
|
|
37
38
|
|
|
38
39
|
Args:
|
|
39
40
|
term (object): term to encode
|
|
40
41
|
slot (Slot): its place in statement.
|
|
42
|
+
statement (Statement): Triple/Quad/GraphStart message to fill with terms.
|
|
41
43
|
|
|
42
44
|
Returns:
|
|
43
|
-
|
|
45
|
+
Rows: encoded extra rows
|
|
44
46
|
|
|
45
47
|
"""
|
|
46
|
-
if slot is Slot.graph and term == DEFAULT_GRAPH_IDENTIFIER:
|
|
47
|
-
return self.encode_default_graph()
|
|
48
|
-
|
|
49
48
|
if isinstance(term, IRI):
|
|
50
|
-
|
|
49
|
+
iri = self.get_iri_field(statement, slot)
|
|
50
|
+
return self.encode_iri(term._iri, iri)
|
|
51
51
|
|
|
52
52
|
if isinstance(term, Literal):
|
|
53
|
+
literal = self.get_literal_field(statement, slot)
|
|
53
54
|
return self.encode_literal(
|
|
54
55
|
lex=term._lex,
|
|
55
56
|
language=term._langtag,
|
|
56
57
|
datatype=term._datatype,
|
|
58
|
+
literal=literal,
|
|
57
59
|
)
|
|
58
60
|
|
|
59
61
|
if isinstance(term, BlankNode):
|
|
60
|
-
|
|
62
|
+
self.set_bnode_field(
|
|
63
|
+
statement,
|
|
64
|
+
slot,
|
|
65
|
+
term._identifier,
|
|
66
|
+
)
|
|
67
|
+
return ()
|
|
61
68
|
|
|
62
69
|
if isinstance(term, Triple):
|
|
63
|
-
|
|
70
|
+
quoted_statement = self.get_triple_field(statement, slot)
|
|
71
|
+
return self.encode_quoted_triple(term, quoted_statement)
|
|
72
|
+
|
|
73
|
+
return super().encode_spo(term, slot, statement) # error if not handled
|
|
64
74
|
|
|
65
|
-
|
|
75
|
+
def encode_graph(self, term: object, statement: HasGraph) -> Rows:
|
|
76
|
+
"""
|
|
77
|
+
Encode graph term based on its GenericSink object.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
term (object): term to encode
|
|
81
|
+
statement (HasGraph): Quad/GraphStart message to fill g_{} in.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Rows: encoded extra rows
|
|
85
|
+
|
|
86
|
+
"""
|
|
87
|
+
if term == DefaultGraph:
|
|
88
|
+
return self.encode_default_graph(statement.g_default_graph)
|
|
89
|
+
if isinstance(term, IRI):
|
|
90
|
+
return self.encode_iri(term._iri, statement.g_iri)
|
|
91
|
+
|
|
92
|
+
if isinstance(term, Literal):
|
|
93
|
+
return self.encode_literal(
|
|
94
|
+
lex=term._lex,
|
|
95
|
+
language=term._langtag,
|
|
96
|
+
datatype=term._datatype,
|
|
97
|
+
literal=statement.g_literal,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if isinstance(term, BlankNode):
|
|
101
|
+
statement.g_bnode = term._identifier
|
|
102
|
+
return ()
|
|
103
|
+
return super().encode_graph(term, statement) # error if not handled
|
|
66
104
|
|
|
67
105
|
|
|
68
106
|
def namespace_declarations(store: GenericStatementSink, stream: Stream) -> None:
|
|
@@ -210,7 +248,7 @@ def split_to_graphs(data: Generator[Quad]) -> Generator[GenericStatementSink]:
|
|
|
210
248
|
each having triples in store and identifier set.
|
|
211
249
|
|
|
212
250
|
"""
|
|
213
|
-
current_g:
|
|
251
|
+
current_g: GraphName | None = None
|
|
214
252
|
current_sink: GenericStatementSink | None = None
|
|
215
253
|
for statement in data:
|
|
216
254
|
if current_g != statement.g:
|
|
@@ -234,7 +272,10 @@ def guess_options(sink: GenericStatementSink) -> SerializerOptions:
|
|
|
234
272
|
if sink.is_triples_sink
|
|
235
273
|
else jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
|
|
236
274
|
)
|
|
237
|
-
|
|
275
|
+
# Generic sink supports both RDF-star and generalized statements by default
|
|
276
|
+
# as it can handle any term types including quoted triples and generalized RDF terms
|
|
277
|
+
params = StreamParameters(generalized_statements=True, rdf_star=True)
|
|
278
|
+
return SerializerOptions(logical_type=logical_type, params=params)
|
|
238
279
|
|
|
239
280
|
|
|
240
281
|
def guess_stream(options: SerializerOptions, sink: GenericStatementSink) -> Stream:
|
|
@@ -313,6 +313,8 @@ def parse_jelly_grouped(
|
|
|
313
313
|
inp: IO[bytes],
|
|
314
314
|
graph_factory: Callable[[], Graph] = lambda: Graph(),
|
|
315
315
|
dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
|
|
316
|
+
*,
|
|
317
|
+
logical_type_strict: bool = False,
|
|
316
318
|
) -> Generator[Graph] | Generator[Dataset]:
|
|
317
319
|
"""
|
|
318
320
|
Take jelly file and return generators based on the detected physical type.
|
|
@@ -327,6 +329,11 @@ def parse_jelly_grouped(
|
|
|
327
329
|
dataset_factory (Callable): lambda to construct a Dataset.
|
|
328
330
|
By default creates an empty in-memory Dataset,
|
|
329
331
|
but you can pass something else here.
|
|
332
|
+
logical_type_strict (bool): If True, validate the *logical* type in
|
|
333
|
+
stream options and require a grouped logical type. Otherwise, only the
|
|
334
|
+
physical type is used to route parsing.
|
|
335
|
+
|
|
336
|
+
|
|
330
337
|
|
|
331
338
|
Raises:
|
|
332
339
|
NotImplementedError: is raised if a physical type is not implemented
|
|
@@ -337,6 +344,26 @@ def parse_jelly_grouped(
|
|
|
337
344
|
|
|
338
345
|
"""
|
|
339
346
|
options, frames = get_options_and_frames(inp)
|
|
347
|
+
|
|
348
|
+
st = getattr(options, "stream_types", None)
|
|
349
|
+
if logical_type_strict and (
|
|
350
|
+
st is None
|
|
351
|
+
or st.logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
|
|
352
|
+
or st.flat
|
|
353
|
+
):
|
|
354
|
+
lt_name = (
|
|
355
|
+
"UNSPECIFIED"
|
|
356
|
+
if st is None
|
|
357
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
msg = (
|
|
361
|
+
"strict logical type check requires options.stream_types"
|
|
362
|
+
if st is None
|
|
363
|
+
else f"expected GROUPED logical type, got {lt_name}"
|
|
364
|
+
)
|
|
365
|
+
raise JellyConformanceError(msg)
|
|
366
|
+
|
|
340
367
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
341
368
|
for graph in parse_triples_stream(
|
|
342
369
|
frames=frames,
|
|
@@ -424,6 +451,8 @@ def parse_jelly_flat(
|
|
|
424
451
|
inp: IO[bytes],
|
|
425
452
|
frames: Iterable[jelly.RdfStreamFrame] | None = None,
|
|
426
453
|
options: ParserOptions | None = None,
|
|
454
|
+
*,
|
|
455
|
+
logical_type_strict: bool = False,
|
|
427
456
|
) -> Generator[Statement | Prefix]:
|
|
428
457
|
"""
|
|
429
458
|
Parse jelly file with FLAT logical type into a Generator of stream events.
|
|
@@ -434,6 +463,10 @@ def parse_jelly_flat(
|
|
|
434
463
|
jelly frames if read before.
|
|
435
464
|
options (ParserOptions | None): stream options
|
|
436
465
|
if read before.
|
|
466
|
+
logical_type_strict (bool): If True, validate the *logical* type in
|
|
467
|
+
stream options and require FLAT_(TRIPLES|QUADS). Otherwise, only the
|
|
468
|
+
physical type is used to route parsing.
|
|
469
|
+
|
|
437
470
|
|
|
438
471
|
Raises:
|
|
439
472
|
NotImplementedError: if physical type is not supported
|
|
@@ -442,9 +475,23 @@ def parse_jelly_flat(
|
|
|
442
475
|
Generator[Statement | Prefix]: Generator of stream events
|
|
443
476
|
|
|
444
477
|
"""
|
|
445
|
-
if
|
|
478
|
+
if frames is None or options is None:
|
|
446
479
|
options, frames = get_options_and_frames(inp)
|
|
447
480
|
|
|
481
|
+
st = getattr(options, "stream_types", None)
|
|
482
|
+
if logical_type_strict and (st is None or not st.flat):
|
|
483
|
+
lt_name = (
|
|
484
|
+
"UNSPECIFIED"
|
|
485
|
+
if st is None
|
|
486
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
487
|
+
)
|
|
488
|
+
msg = (
|
|
489
|
+
"strict logical type check requires options.stream_types"
|
|
490
|
+
if st is None
|
|
491
|
+
else f"expected FLAT logical type (TRIPLES/QUADS), got {lt_name}"
|
|
492
|
+
)
|
|
493
|
+
raise JellyConformanceError(msg)
|
|
494
|
+
|
|
448
495
|
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
449
496
|
for triples in parse_triples_stream(frames=frames, options=options):
|
|
450
497
|
yield from triples
|
|
@@ -7,6 +7,7 @@ from typing import Any, IO
|
|
|
7
7
|
from typing_extensions import override
|
|
8
8
|
from itertools import chain
|
|
9
9
|
from pyjelly.integrations.rdflib.parse import Quad, Triple
|
|
10
|
+
from pyjelly.options import StreamParameters
|
|
10
11
|
|
|
11
12
|
import rdflib
|
|
12
13
|
from rdflib import Graph
|
|
@@ -14,7 +15,7 @@ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, QuotedGraph
|
|
|
14
15
|
from rdflib.serializer import Serializer as RDFLibSerializer
|
|
15
16
|
|
|
16
17
|
from pyjelly import jelly
|
|
17
|
-
from pyjelly.serialize.encode import
|
|
18
|
+
from pyjelly.serialize.encode import Rows, Slot, TermEncoder, Statement, HasGraph
|
|
18
19
|
from pyjelly.serialize.ioutils import write_delimited, write_single
|
|
19
20
|
from pyjelly.serialize.streams import (
|
|
20
21
|
GraphStream,
|
|
@@ -28,37 +29,62 @@ QUAD_ARITY = 4
|
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class RDFLibTermEncoder(TermEncoder):
|
|
31
|
-
def
|
|
32
|
+
def encode_spo(self, term: object, slot: Slot, statement: Statement) -> Rows:
|
|
32
33
|
"""
|
|
33
|
-
Encode term based on its RDFLib object.
|
|
34
|
+
Encode s/p/o term based on its RDFLib object.
|
|
34
35
|
|
|
35
36
|
Args:
|
|
36
37
|
term (object): term to encode
|
|
37
38
|
slot (Slot): its place in statement.
|
|
39
|
+
statement (Statement): Triple/Quad message to fill with s/p/o terms.
|
|
38
40
|
|
|
39
41
|
Returns:
|
|
40
|
-
|
|
42
|
+
Rows: encoded extra rows
|
|
41
43
|
|
|
42
44
|
"""
|
|
43
|
-
if slot is Slot.graph and term == DATASET_DEFAULT_GRAPH_ID:
|
|
44
|
-
return self.encode_default_graph()
|
|
45
|
-
|
|
46
45
|
if isinstance(term, rdflib.URIRef):
|
|
47
|
-
|
|
46
|
+
iri = self.get_iri_field(statement, slot)
|
|
47
|
+
return self.encode_iri(term, iri)
|
|
48
48
|
|
|
49
49
|
if isinstance(term, rdflib.Literal):
|
|
50
|
+
literal = self.get_literal_field(statement, slot)
|
|
50
51
|
return self.encode_literal(
|
|
51
52
|
lex=str(term),
|
|
52
53
|
language=term.language,
|
|
53
54
|
# `datatype` is cast to `str` explicitly because
|
|
54
55
|
# `URIRef.__eq__` overrides `str.__eq__` in an incompatible manner
|
|
55
56
|
datatype=term.datatype and str(term.datatype),
|
|
57
|
+
literal=literal,
|
|
56
58
|
)
|
|
57
59
|
|
|
58
60
|
if isinstance(term, rdflib.BNode):
|
|
59
|
-
|
|
61
|
+
self.set_bnode_field(statement, slot, str(term))
|
|
62
|
+
return ()
|
|
63
|
+
|
|
64
|
+
return super().encode_spo(term, slot, statement) # error if not handled
|
|
65
|
+
|
|
66
|
+
def encode_graph(self, term: object, statement: HasGraph) -> Rows:
|
|
67
|
+
"""
|
|
68
|
+
Encode graph name term based on its RDFLib object.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
term (object): term to encode
|
|
72
|
+
statement (HasGraph): Quad/GraphStart message to fill g_{} in.
|
|
60
73
|
|
|
61
|
-
|
|
74
|
+
Returns:
|
|
75
|
+
Rows: encoded extra rows
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
if term == DATASET_DEFAULT_GRAPH_ID:
|
|
79
|
+
return self.encode_default_graph(statement.g_default_graph)
|
|
80
|
+
|
|
81
|
+
if isinstance(term, rdflib.URIRef):
|
|
82
|
+
return self.encode_iri(term, statement.g_iri)
|
|
83
|
+
|
|
84
|
+
if isinstance(term, rdflib.BNode):
|
|
85
|
+
statement.g_bnode = str(term)
|
|
86
|
+
return ()
|
|
87
|
+
return super().encode_graph(term, statement) # error if not handled
|
|
62
88
|
|
|
63
89
|
|
|
64
90
|
def namespace_declarations(store: Graph, stream: Stream) -> None:
|
|
@@ -205,7 +231,10 @@ def guess_options(sink: Graph | Dataset) -> SerializerOptions:
|
|
|
205
231
|
if isinstance(sink, Dataset)
|
|
206
232
|
else jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
|
|
207
233
|
)
|
|
208
|
-
|
|
234
|
+
# RDFLib doesn't support RDF-star and generalized statements by default
|
|
235
|
+
# as it requires specific handling for quoted triples and non-standard RDF terms
|
|
236
|
+
params = StreamParameters(generalized_statements=False, rdf_star=False)
|
|
237
|
+
return SerializerOptions(logical_type=logical_type, params=params)
|
|
209
238
|
|
|
210
239
|
|
|
211
240
|
def guess_stream(options: SerializerOptions, sink: Graph | Dataset) -> Stream:
|
pyjelly/parse/decode.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import Any, ClassVar, NamedTuple
|
|
|
7
7
|
from typing_extensions import Never
|
|
8
8
|
|
|
9
9
|
from pyjelly import jelly
|
|
10
|
-
from pyjelly.options import LookupPreset, StreamParameters, StreamTypes
|
|
10
|
+
from pyjelly.options import MAX_VERSION, LookupPreset, StreamParameters, StreamTypes
|
|
11
11
|
from pyjelly.parse.lookup import LookupDecoder
|
|
12
12
|
|
|
13
13
|
|
|
@@ -54,6 +54,9 @@ def options_from_frame(
|
|
|
54
54
|
"""
|
|
55
55
|
row = frame.rows[0]
|
|
56
56
|
options = row.options
|
|
57
|
+
nd = getattr(options, "namespace_declarations", False) or (
|
|
58
|
+
options.version >= MAX_VERSION
|
|
59
|
+
)
|
|
57
60
|
return ParserOptions(
|
|
58
61
|
stream_types=StreamTypes(
|
|
59
62
|
physical_type=options.physical_type,
|
|
@@ -70,6 +73,7 @@ def options_from_frame(
|
|
|
70
73
|
rdf_star=options.rdf_star,
|
|
71
74
|
version=options.version,
|
|
72
75
|
delimited=delimited,
|
|
76
|
+
namespace_declarations=nd,
|
|
73
77
|
),
|
|
74
78
|
)
|
|
75
79
|
|
pyjelly/parse/ioutils.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import io
|
|
1
2
|
import os
|
|
2
3
|
from collections.abc import Generator, Iterator
|
|
3
4
|
from itertools import chain
|
|
@@ -48,7 +49,7 @@ def delimited_jelly_hint(header: bytes) -> bool:
|
|
|
48
49
|
False
|
|
49
50
|
"""
|
|
50
51
|
magic = 0x0A
|
|
51
|
-
return len(header)
|
|
52
|
+
return len(header) >= 3 and ( # noqa: PLR2004
|
|
52
53
|
header[0] != magic or (header[1] == magic and header[2] != magic)
|
|
53
54
|
)
|
|
54
55
|
|
|
@@ -77,8 +78,15 @@ def get_options_and_frames(
|
|
|
77
78
|
stream types, lookup presets and other stream options
|
|
78
79
|
|
|
79
80
|
"""
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
if not inp.seekable():
|
|
82
|
+
# Input may not be seekable (e.g. a network stream) -- then we need to buffer
|
|
83
|
+
# it to determine if it's delimited.
|
|
84
|
+
# See also: https://github.com/Jelly-RDF/pyjelly/issues/298
|
|
85
|
+
inp = io.BufferedReader(inp) # type: ignore[arg-type]
|
|
86
|
+
is_delimited = delimited_jelly_hint(inp.peek(3))
|
|
87
|
+
else:
|
|
88
|
+
is_delimited = delimited_jelly_hint(bytes_read := inp.read(3))
|
|
89
|
+
inp.seek(-len(bytes_read), os.SEEK_CUR)
|
|
82
90
|
|
|
83
91
|
if is_delimited:
|
|
84
92
|
first_frame = None
|
pyjelly/serialize/encode.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from collections.abc import Iterable, Sequence
|
|
4
|
-
from enum import
|
|
5
|
-
from typing import
|
|
3
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
4
|
+
from enum import IntEnum
|
|
5
|
+
from typing import TypeVar, Union
|
|
6
6
|
from typing_extensions import TypeAlias
|
|
7
7
|
|
|
8
8
|
from pyjelly import jelly, options
|
|
@@ -31,20 +31,15 @@ def split_iri(iri_string: str) -> tuple[str, str]:
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
T = TypeVar("T")
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
Rows: TypeAlias = Sequence[jelly.RdfStreamRow]
|
|
35
|
+
Statement: TypeAlias = Union[jelly.RdfQuad, jelly.RdfTriple]
|
|
36
|
+
HasGraph: TypeAlias = Union[jelly.RdfQuad, jelly.RdfGraphStart]
|
|
37
|
+
Terms: TypeAlias = Union[
|
|
38
|
+
jelly.RdfIri, jelly.RdfLiteral, str, jelly.RdfDefaultGraph, jelly.RdfTriple
|
|
39
|
+
]
|
|
37
40
|
|
|
38
41
|
|
|
39
42
|
class TermEncoder:
|
|
40
|
-
TERM_ONEOF_NAMES: ClassVar = {
|
|
41
|
-
jelly.RdfIri: "iri",
|
|
42
|
-
jelly.RdfLiteral: "literal",
|
|
43
|
-
str: "bnode",
|
|
44
|
-
jelly.RdfDefaultGraph: "default_graph",
|
|
45
|
-
jelly.RdfTriple: "triple_term",
|
|
46
|
-
}
|
|
47
|
-
|
|
48
43
|
def __init__(
|
|
49
44
|
self,
|
|
50
45
|
lookup_preset: options.LookupPreset | None = None,
|
|
@@ -56,15 +51,16 @@ class TermEncoder:
|
|
|
56
51
|
self.prefixes = LookupEncoder(lookup_size=lookup_preset.max_prefixes)
|
|
57
52
|
self.datatypes = LookupEncoder(lookup_size=lookup_preset.max_datatypes)
|
|
58
53
|
|
|
59
|
-
def
|
|
54
|
+
def encode_iri_indices(self, iri_string: str) -> tuple[Rows, int, int]:
|
|
60
55
|
"""
|
|
61
|
-
Encode
|
|
56
|
+
Encode lookup indices for IRI.
|
|
62
57
|
|
|
63
58
|
Args:
|
|
64
59
|
iri_string (str): full iri in string format.
|
|
65
60
|
|
|
66
61
|
Returns:
|
|
67
|
-
|
|
62
|
+
tuple[Rows, int, int]: additional rows (if any) and
|
|
63
|
+
indices in prefix and name tables.
|
|
68
64
|
|
|
69
65
|
"""
|
|
70
66
|
prefix, name = split_iri(iri_string)
|
|
@@ -87,31 +83,35 @@ class TermEncoder:
|
|
|
87
83
|
|
|
88
84
|
prefix_index = self.prefixes.encode_prefix_term_index(prefix)
|
|
89
85
|
name_index = self.names.encode_name_term_index(name)
|
|
90
|
-
return term_rows,
|
|
86
|
+
return term_rows, prefix_index, name_index
|
|
91
87
|
|
|
92
|
-
def
|
|
88
|
+
def encode_iri(self, iri_string: str, iri: jelly.RdfIri) -> Rows:
|
|
93
89
|
"""
|
|
94
|
-
Encode
|
|
90
|
+
Encode iri.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
iri_string (str): full iri in string format.
|
|
94
|
+
iri (jelly.RdfIri): iri to fill
|
|
95
95
|
|
|
96
96
|
Returns:
|
|
97
|
-
|
|
98
|
-
default graph message.
|
|
97
|
+
Rows: extra rows for prefix and name tables, if any.
|
|
99
98
|
|
|
100
99
|
"""
|
|
101
|
-
|
|
100
|
+
term_rows, prefix_index, name_index = self.encode_iri_indices(iri_string)
|
|
101
|
+
iri.prefix_id = prefix_index
|
|
102
|
+
iri.name_id = name_index
|
|
103
|
+
return term_rows
|
|
102
104
|
|
|
103
|
-
def
|
|
105
|
+
def encode_default_graph(self, g_default_graph: jelly.RdfDefaultGraph) -> Rows:
|
|
104
106
|
"""
|
|
105
|
-
Encode
|
|
106
|
-
|
|
107
|
-
Args:
|
|
108
|
-
bnode (str): BN internal identifier in string format.
|
|
107
|
+
Encode default graph.
|
|
109
108
|
|
|
110
109
|
Returns:
|
|
111
|
-
|
|
110
|
+
Rows: empty extra rows (for API consistency)
|
|
112
111
|
|
|
113
112
|
"""
|
|
114
|
-
|
|
113
|
+
g_default_graph.CopyFrom(jelly.RdfDefaultGraph())
|
|
114
|
+
return ()
|
|
115
115
|
|
|
116
116
|
def encode_literal(
|
|
117
117
|
self,
|
|
@@ -119,7 +119,8 @@ class TermEncoder:
|
|
|
119
119
|
lex: str,
|
|
120
120
|
language: str | None = None,
|
|
121
121
|
datatype: str | None = None,
|
|
122
|
-
|
|
122
|
+
literal: jelly.RdfLiteral,
|
|
123
|
+
) -> Rows:
|
|
123
124
|
"""
|
|
124
125
|
Encode literal.
|
|
125
126
|
|
|
@@ -128,14 +129,14 @@ class TermEncoder:
|
|
|
128
129
|
language (str | None, optional): langtag. Defaults to None.
|
|
129
130
|
datatype (str | None, optional): data type if
|
|
130
131
|
it is a typed literal. Defaults to None.
|
|
132
|
+
literal (jelly.RdfLiteral): literal to fill.
|
|
131
133
|
|
|
132
134
|
Raises:
|
|
133
135
|
JellyConformanceError: if datatype specified while
|
|
134
136
|
datatable is not used.
|
|
135
137
|
|
|
136
138
|
Returns:
|
|
137
|
-
|
|
138
|
-
and RdfLiteral message.
|
|
139
|
+
Rows: extra rows (i.e., datatype entries).
|
|
139
140
|
|
|
140
141
|
"""
|
|
141
142
|
datatype_id = None
|
|
@@ -157,13 +158,16 @@ class TermEncoder:
|
|
|
157
158
|
|
|
158
159
|
datatype_id = self.datatypes.encode_datatype_term_index(datatype)
|
|
159
160
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
langtag=language
|
|
163
|
-
|
|
164
|
-
|
|
161
|
+
literal.lex = lex
|
|
162
|
+
if language:
|
|
163
|
+
literal.langtag = language
|
|
164
|
+
if datatype_id:
|
|
165
|
+
literal.datatype = datatype_id
|
|
166
|
+
return term_rows
|
|
165
167
|
|
|
166
|
-
def encode_quoted_triple(
|
|
168
|
+
def encode_quoted_triple(
|
|
169
|
+
self, terms: Iterable[object], quoted_statement: jelly.RdfTriple
|
|
170
|
+
) -> Rows:
|
|
167
171
|
"""
|
|
168
172
|
Encode a quoted triple.
|
|
169
173
|
|
|
@@ -173,75 +177,116 @@ class TermEncoder:
|
|
|
173
177
|
|
|
174
178
|
Args:
|
|
175
179
|
terms (Iterable[object]): triple terms to encode.
|
|
180
|
+
quoted_statement (jelly.RdfTriple): quoted triple to fill.
|
|
176
181
|
|
|
177
182
|
Returns:
|
|
178
|
-
|
|
179
|
-
information (prefixes, names, datatypes rows, if any)
|
|
180
|
-
and the encoded triple row.
|
|
183
|
+
Rows: additional stream rows with preceeding
|
|
184
|
+
information (prefixes, names, datatypes rows, if any).
|
|
181
185
|
|
|
182
186
|
"""
|
|
183
|
-
statement: dict[str, Any] = {}
|
|
184
187
|
rows: list[jelly.RdfStreamRow] = []
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
188
|
+
terms = iter(terms)
|
|
189
|
+
extra_rows = self.encode_spo(next(terms), Slot.subject, quoted_statement)
|
|
190
|
+
rows.extend(extra_rows)
|
|
191
|
+
extra_rows = self.encode_spo(next(terms), Slot.predicate, quoted_statement)
|
|
192
|
+
rows.extend(extra_rows)
|
|
193
|
+
extra_rows = self.encode_spo(next(terms), Slot.object, quoted_statement)
|
|
194
|
+
rows.extend(extra_rows)
|
|
195
|
+
return rows
|
|
196
|
+
|
|
197
|
+
def encode_spo(self, term: object, slot: Slot, statement: Statement) -> Rows:
|
|
194
198
|
msg = f"unsupported term type: {type(term)}"
|
|
195
199
|
raise NotImplementedError(msg)
|
|
196
200
|
|
|
201
|
+
def encode_graph(self, term: object, statement: HasGraph) -> Rows:
|
|
202
|
+
msg = f"unsupported term type: {type(term)}"
|
|
203
|
+
raise NotImplementedError(msg)
|
|
204
|
+
|
|
205
|
+
def get_iri_field(self, statement: Statement, slot: Slot) -> jelly.RdfIri:
|
|
206
|
+
"""Get IRI field directly based on slot."""
|
|
207
|
+
if slot == Slot.subject:
|
|
208
|
+
return statement.s_iri
|
|
209
|
+
if slot == Slot.predicate:
|
|
210
|
+
return statement.p_iri
|
|
211
|
+
return statement.o_iri
|
|
212
|
+
|
|
213
|
+
def get_literal_field(self, statement: Statement, slot: Slot) -> jelly.RdfLiteral:
|
|
214
|
+
"""Get literal field directly based on slot."""
|
|
215
|
+
if slot == Slot.subject:
|
|
216
|
+
return statement.s_literal
|
|
217
|
+
if slot == Slot.predicate:
|
|
218
|
+
return statement.p_literal
|
|
219
|
+
return statement.o_literal
|
|
220
|
+
|
|
221
|
+
def set_bnode_field(
|
|
222
|
+
self, statement: Statement, slot: Slot, identifier: str
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Set bnode field directly based on slot."""
|
|
225
|
+
if slot == Slot.subject:
|
|
226
|
+
statement.s_bnode = identifier
|
|
227
|
+
elif slot == Slot.predicate:
|
|
228
|
+
statement.p_bnode = identifier
|
|
229
|
+
else:
|
|
230
|
+
statement.o_bnode = identifier
|
|
197
231
|
|
|
198
|
-
|
|
199
|
-
|
|
232
|
+
def get_triple_field(self, statement: Statement, slot: Slot) -> jelly.RdfTriple:
|
|
233
|
+
"""Get triple term field directly based on slot."""
|
|
234
|
+
if slot == Slot.subject:
|
|
235
|
+
return statement.s_triple_term
|
|
236
|
+
if slot == Slot.predicate:
|
|
237
|
+
return statement.p_triple_term
|
|
238
|
+
return statement.o_triple_term
|
|
200
239
|
|
|
201
|
-
subject = "s"
|
|
202
|
-
predicate = "p"
|
|
203
|
-
object = "o"
|
|
204
|
-
graph = "g"
|
|
205
240
|
|
|
206
|
-
|
|
207
|
-
|
|
241
|
+
class Slot(IntEnum):
|
|
242
|
+
subject = 0
|
|
243
|
+
predicate = 1
|
|
244
|
+
object = 2
|
|
245
|
+
graph = 3
|
|
208
246
|
|
|
209
247
|
|
|
210
|
-
def
|
|
211
|
-
terms:
|
|
248
|
+
def encode_spo(
|
|
249
|
+
terms: Iterator[object],
|
|
212
250
|
term_encoder: TermEncoder,
|
|
213
|
-
repeated_terms:
|
|
214
|
-
|
|
251
|
+
repeated_terms: list[object | None],
|
|
252
|
+
statement: Statement,
|
|
253
|
+
) -> list[jelly.RdfStreamRow]:
|
|
215
254
|
"""
|
|
216
|
-
Encode a statement.
|
|
255
|
+
Encode the s/p/o of a statement.
|
|
217
256
|
|
|
218
257
|
Args:
|
|
219
|
-
terms (
|
|
258
|
+
terms (Iterator[object]): iterator for original terms to encode
|
|
220
259
|
term_encoder (TermEncoder): encoder with lookup tables
|
|
221
|
-
repeated_terms (
|
|
260
|
+
repeated_terms (list[object | None): list of repeated terms.
|
|
261
|
+
statement (Statement): Triple/Quad to fill.
|
|
222
262
|
|
|
223
263
|
Returns:
|
|
224
|
-
|
|
225
|
-
extra rows to append and jelly terms.
|
|
264
|
+
list[jelly.RdfStreamRow] extra rows to append.
|
|
226
265
|
|
|
227
266
|
"""
|
|
228
|
-
statement: dict[str, object] = {}
|
|
229
267
|
rows: list[jelly.RdfStreamRow] = []
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
268
|
+
s = next(terms)
|
|
269
|
+
if repeated_terms[Slot.subject] != s:
|
|
270
|
+
extra_rows = term_encoder.encode_spo(s, Slot.subject, statement)
|
|
271
|
+
rows.extend(extra_rows)
|
|
272
|
+
repeated_terms[Slot.subject] = s
|
|
273
|
+
p = next(terms)
|
|
274
|
+
if repeated_terms[Slot.predicate] != p:
|
|
275
|
+
extra_rows = term_encoder.encode_spo(p, Slot.predicate, statement)
|
|
276
|
+
rows.extend(extra_rows)
|
|
277
|
+
repeated_terms[Slot.predicate] = p
|
|
278
|
+
o = next(terms)
|
|
279
|
+
if repeated_terms[Slot.object] != o:
|
|
280
|
+
extra_rows = term_encoder.encode_spo(o, Slot.object, statement)
|
|
281
|
+
rows.extend(extra_rows)
|
|
282
|
+
repeated_terms[Slot.object] = o
|
|
283
|
+
return rows
|
|
239
284
|
|
|
240
285
|
|
|
241
286
|
def encode_triple(
|
|
242
287
|
terms: Iterable[object],
|
|
243
288
|
term_encoder: TermEncoder,
|
|
244
|
-
repeated_terms:
|
|
289
|
+
repeated_terms: list[object | None],
|
|
245
290
|
) -> list[jelly.RdfStreamRow]:
|
|
246
291
|
"""
|
|
247
292
|
Encode one triple.
|
|
@@ -249,14 +294,16 @@ def encode_triple(
|
|
|
249
294
|
Args:
|
|
250
295
|
terms (Iterable[object]): original terms to encode
|
|
251
296
|
term_encoder (TermEncoder): current encoder with lookup tables
|
|
252
|
-
repeated_terms (
|
|
297
|
+
repeated_terms (list[object | None]): list of repeated terms.
|
|
253
298
|
|
|
254
299
|
Returns:
|
|
255
300
|
list[jelly.RdfStreamRow]: list of rows to add to the current flow.
|
|
256
301
|
|
|
257
302
|
"""
|
|
258
|
-
|
|
259
|
-
|
|
303
|
+
triple = jelly.RdfTriple()
|
|
304
|
+
terms = iter(terms)
|
|
305
|
+
rows = encode_spo(terms, term_encoder, repeated_terms, triple)
|
|
306
|
+
row = jelly.RdfStreamRow(triple=triple)
|
|
260
307
|
rows.append(row)
|
|
261
308
|
return rows
|
|
262
309
|
|
|
@@ -264,7 +311,7 @@ def encode_triple(
|
|
|
264
311
|
def encode_quad(
|
|
265
312
|
terms: Iterable[object],
|
|
266
313
|
term_encoder: TermEncoder,
|
|
267
|
-
repeated_terms:
|
|
314
|
+
repeated_terms: list[object | None],
|
|
268
315
|
) -> list[jelly.RdfStreamRow]:
|
|
269
316
|
"""
|
|
270
317
|
Encode one quad.
|
|
@@ -272,14 +319,21 @@ def encode_quad(
|
|
|
272
319
|
Args:
|
|
273
320
|
terms (Iterable[object]): original terms to encode
|
|
274
321
|
term_encoder (TermEncoder): current encoder with lookup tables
|
|
275
|
-
repeated_terms (
|
|
322
|
+
repeated_terms (list[object | None]): list of repeated terms.
|
|
276
323
|
|
|
277
324
|
Returns:
|
|
278
325
|
list[jelly.RdfStreamRow]: list of messages to append to current flow.
|
|
279
326
|
|
|
280
327
|
"""
|
|
281
|
-
|
|
282
|
-
|
|
328
|
+
terms = iter(terms)
|
|
329
|
+
quad = jelly.RdfQuad()
|
|
330
|
+
rows = encode_spo(terms, term_encoder, repeated_terms, quad)
|
|
331
|
+
g = next(terms)
|
|
332
|
+
if repeated_terms[Slot.graph] != g:
|
|
333
|
+
extra_rows = term_encoder.encode_graph(g, quad)
|
|
334
|
+
rows.extend(extra_rows)
|
|
335
|
+
repeated_terms[Slot.graph] = g
|
|
336
|
+
row = jelly.RdfStreamRow(quad=quad)
|
|
283
337
|
rows.append(row)
|
|
284
338
|
return rows
|
|
285
339
|
|
|
@@ -301,7 +355,8 @@ def encode_namespace_declaration(
|
|
|
301
355
|
list[jelly.RdfStreamRow]: list of messages to append to current flow.
|
|
302
356
|
|
|
303
357
|
"""
|
|
304
|
-
|
|
358
|
+
iri = jelly.RdfIri()
|
|
359
|
+
[*rows] = term_encoder.encode_iri(value, iri=iri)
|
|
305
360
|
declaration = jelly.RdfNamespaceDeclaration(name=name, value=iri)
|
|
306
361
|
row = jelly.RdfStreamRow(namespace=declaration)
|
|
307
362
|
rows.append(row)
|
pyjelly/serialize/streams.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from collections.abc import Generator, Iterable
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import ClassVar
|
|
6
6
|
|
|
7
7
|
from pyjelly import jelly
|
|
8
8
|
from pyjelly.options import LookupPreset, StreamParameters, StreamTypes
|
|
@@ -37,6 +37,7 @@ class SerializerOptions:
|
|
|
37
37
|
class Stream:
|
|
38
38
|
physical_type: ClassVar[jelly.PhysicalStreamType]
|
|
39
39
|
default_delimited_flow_class: ClassVar[type[BoundedFrameFlow]]
|
|
40
|
+
repeated_terms: list[object | None]
|
|
40
41
|
|
|
41
42
|
def __init__(
|
|
42
43
|
self,
|
|
@@ -52,7 +53,7 @@ class Stream:
|
|
|
52
53
|
if flow is None:
|
|
53
54
|
flow = self.infer_flow()
|
|
54
55
|
self.flow = flow
|
|
55
|
-
self.repeated_terms =
|
|
56
|
+
self.repeated_terms = [None] * len(Slot)
|
|
56
57
|
self.enrolled = False
|
|
57
58
|
self.stream_types = StreamTypes(
|
|
58
59
|
physical_type=self.physical_type,
|
|
@@ -249,10 +250,9 @@ class GraphStream(TripleStream):
|
|
|
249
250
|
Generator[jelly.RdfStreamFrame]: jelly frames.
|
|
250
251
|
|
|
251
252
|
"""
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
start_row = jelly.RdfStreamRow(graph_start=jelly.RdfGraphStart(**kws))
|
|
253
|
+
graph_start = jelly.RdfGraphStart()
|
|
254
|
+
[*graph_rows] = self.encoder.encode_graph(graph_id, graph_start)
|
|
255
|
+
start_row = jelly.RdfStreamRow(graph_start=graph_start)
|
|
256
256
|
graph_rows.append(start_row)
|
|
257
257
|
self.flow.extend(graph_rows)
|
|
258
258
|
for triple in graph:
|
|
@@ -6,27 +6,27 @@ pyjelly/_proto/patch.proto,sha256=gASUm0xDG9J1advNoq_cCsJYxudTbQaiZQBq4oW3kw4,52
|
|
|
6
6
|
pyjelly/_proto/rdf.proto,sha256=EKxyG421B4m0Wx5-6jjojdga_hA3jpZfF6-T3lMc0hI,12763
|
|
7
7
|
pyjelly/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
pyjelly/integrations/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
pyjelly/integrations/generic/generic_sink.py,sha256
|
|
10
|
-
pyjelly/integrations/generic/parse.py,sha256=
|
|
11
|
-
pyjelly/integrations/generic/serialize.py,sha256=
|
|
9
|
+
pyjelly/integrations/generic/generic_sink.py,sha256=-53sANwfCT3ygb6FrvFSa0Ri8Hb63Cpi8AfR7igJgcQ,4949
|
|
10
|
+
pyjelly/integrations/generic/parse.py,sha256=ScbBUo8kc4ztJjD9Nr7YdfSFZmNgWmIxmizBvXpy9kw,11212
|
|
11
|
+
pyjelly/integrations/generic/serialize.py,sha256=eJYHTtBKvLl82DMPZBn0Ojp1sQrhmTttY9KPuYwYnl4,12494
|
|
12
12
|
pyjelly/integrations/rdflib/__init__.py,sha256=lpIz6iildMf5bDvj3aBqZJ7kgKFrTx_tsqSb6PkLis0,552
|
|
13
|
-
pyjelly/integrations/rdflib/parse.py,sha256=
|
|
14
|
-
pyjelly/integrations/rdflib/serialize.py,sha256=
|
|
13
|
+
pyjelly/integrations/rdflib/parse.py,sha256=J5WsuAFXc2MNOaaMAknxQRub89RhcOWVWarcl5jb3PI,15306
|
|
14
|
+
pyjelly/integrations/rdflib/serialize.py,sha256=G6W-NpaIk4nDb50D_g-S2WcDqG9FlXfUOer0UYKYIyA,12886
|
|
15
15
|
pyjelly/jelly/__init__.py,sha256=9kacwn8Ew_1fcgj1abz6miEz-AtUdPT2ltFWaRIE5VE,126
|
|
16
16
|
pyjelly/jelly/rdf_pb2.py,sha256=qjgS3kQnCJqoOmgzvgk1BeYxGbeDX2zygJPc2vDjRts,8952
|
|
17
17
|
pyjelly/jelly/rdf_pb2.pyi,sha256=-gxZO-r2wyN68l83XomySz60c82SZmoPKh1HxamBjZs,11816
|
|
18
18
|
pyjelly/parse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
pyjelly/parse/decode.py,sha256=
|
|
20
|
-
pyjelly/parse/ioutils.py,sha256=
|
|
19
|
+
pyjelly/parse/decode.py,sha256=ze1u7xDz_ySxs4tTqaNN_mpd7GCRN6_Tuz9-xZOwCt8,14381
|
|
20
|
+
pyjelly/parse/ioutils.py,sha256=0eqLmY1lpxRT5PZjx93g8FGfvpfDsXOkZBq2Ag8vptw,3808
|
|
21
21
|
pyjelly/parse/lookup.py,sha256=1AbdZEycLC4tRfh3fgF5hv5PrhwhdWvCUC53iHt-E4c,2193
|
|
22
22
|
pyjelly/serialize/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
pyjelly/serialize/encode.py,sha256=
|
|
23
|
+
pyjelly/serialize/encode.py,sha256=_ACdaxYAoh7z5ND77ceJikKF_AGlZ-P69XPsnl7Fmsg,12664
|
|
24
24
|
pyjelly/serialize/flows.py,sha256=0C2soigJKyHr3xoR-7v0kc1RL8COwnuCRd4iVZpukFU,5524
|
|
25
25
|
pyjelly/serialize/ioutils.py,sha256=2_NaadLfHO3jKR1ZV7aK6jQ09sPKBar9iLFHYwourz8,400
|
|
26
26
|
pyjelly/serialize/lookup.py,sha256=h0lYFjdB6CIuN2DzAW6EE4ILJFUuto3paAK6DG1DZYg,4091
|
|
27
|
-
pyjelly/serialize/streams.py,sha256=
|
|
28
|
-
pyjelly-0.
|
|
29
|
-
pyjelly-0.
|
|
30
|
-
pyjelly-0.
|
|
31
|
-
pyjelly-0.
|
|
32
|
-
pyjelly-0.
|
|
27
|
+
pyjelly/serialize/streams.py,sha256=p4RTQ750C4mT64vDxiK0KcUwD0qaAy-rZ7vwwvV_Cy8,8339
|
|
28
|
+
pyjelly-0.6.1.dist-info/METADATA,sha256=scN1jhC-58MfUscg9VcgWRCBPW4Y9h2_RjlOsxuS6uw,4753
|
|
29
|
+
pyjelly-0.6.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
30
|
+
pyjelly-0.6.1.dist-info/entry_points.txt,sha256=kUG0p9zso7HpitdMaQaXEj_KSqgOGsL0Ky9ARbecN1g,339
|
|
31
|
+
pyjelly-0.6.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
32
|
+
pyjelly-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|