pyjelly 0.7.1__cp311-cp311-macosx_11_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so +0 -0
- pyjelly/__init__.py +0 -0
- pyjelly/_proto/grpc.proto +33 -0
- pyjelly/_proto/patch.proto +165 -0
- pyjelly/_proto/rdf.proto +384 -0
- pyjelly/errors.py +10 -0
- pyjelly/integrations/__init__.py +0 -0
- pyjelly/integrations/generic/__init__.py +0 -0
- pyjelly/integrations/generic/generic_sink.py +202 -0
- pyjelly/integrations/generic/parse.py +412 -0
- pyjelly/integrations/generic/serialize.cpython-311-darwin.so +0 -0
- pyjelly/integrations/generic/serialize.py +402 -0
- pyjelly/integrations/rdflib/__init__.py +24 -0
- pyjelly/integrations/rdflib/parse.py +560 -0
- pyjelly/integrations/rdflib/serialize.py +408 -0
- pyjelly/jelly/__init__.py +5 -0
- pyjelly/jelly/rdf_pb2.py +70 -0
- pyjelly/jelly/rdf_pb2.pyi +231 -0
- pyjelly/options.py +141 -0
- pyjelly/parse/__init__.py +0 -0
- pyjelly/parse/decode.cpython-311-darwin.so +0 -0
- pyjelly/parse/decode.py +447 -0
- pyjelly/parse/ioutils.cpython-311-darwin.so +0 -0
- pyjelly/parse/ioutils.py +115 -0
- pyjelly/parse/lookup.cpython-311-darwin.so +0 -0
- pyjelly/parse/lookup.py +70 -0
- pyjelly/serialize/__init__.py +0 -0
- pyjelly/serialize/encode.cpython-311-darwin.so +0 -0
- pyjelly/serialize/encode.py +397 -0
- pyjelly/serialize/flows.py +196 -0
- pyjelly/serialize/ioutils.cpython-311-darwin.so +0 -0
- pyjelly/serialize/ioutils.py +13 -0
- pyjelly/serialize/lookup.cpython-311-darwin.so +0 -0
- pyjelly/serialize/lookup.py +137 -0
- pyjelly/serialize/streams.cpython-311-darwin.so +0 -0
- pyjelly/serialize/streams.py +281 -0
- pyjelly-0.7.1.dist-info/METADATA +114 -0
- pyjelly-0.7.1.dist-info/RECORD +41 -0
- pyjelly-0.7.1.dist-info/WHEEL +6 -0
- pyjelly-0.7.1.dist-info/entry_points.txt +7 -0
- pyjelly-0.7.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
from collections.abc import Generator
|
|
5
|
+
from typing import IO, NamedTuple, Union
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class _DefaultGraph:
|
|
9
|
+
def __repr__(self) -> str:
|
|
10
|
+
return ""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
DefaultGraph = _DefaultGraph()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BlankNode:
|
|
17
|
+
"""Class for blank nodes, storing BN's identifier as a string."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, identifier: str) -> None:
|
|
20
|
+
self._identifier: str = identifier
|
|
21
|
+
|
|
22
|
+
def __str__(self) -> str:
|
|
23
|
+
return f"_:{self._identifier}"
|
|
24
|
+
|
|
25
|
+
def __repr__(self) -> str:
|
|
26
|
+
return f"BlankNode(identifier={self._identifier})"
|
|
27
|
+
|
|
28
|
+
def __eq__(self, other: object) -> bool:
|
|
29
|
+
if isinstance(other, BlankNode):
|
|
30
|
+
return self._identifier == other._identifier
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
def __hash__(self) -> int:
|
|
34
|
+
return hash(self._identifier)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class IRI:
|
|
38
|
+
"""Class for IRIs, storing IRI as a string."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, iri: str) -> None:
|
|
41
|
+
self._iri: str = iri
|
|
42
|
+
|
|
43
|
+
def __str__(self) -> str:
|
|
44
|
+
return f"<{self._iri}>"
|
|
45
|
+
|
|
46
|
+
def __repr__(self) -> str:
|
|
47
|
+
return f"IRI({self._iri})"
|
|
48
|
+
|
|
49
|
+
def __eq__(self, other: object) -> bool:
|
|
50
|
+
if isinstance(other, IRI):
|
|
51
|
+
return self._iri == other._iri
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
def __hash__(self) -> int:
|
|
55
|
+
return hash(self._iri)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Literal:
|
|
59
|
+
"""
|
|
60
|
+
Class for literals.
|
|
61
|
+
|
|
62
|
+
Notes:
|
|
63
|
+
Consists of: lexical form, and optional language tag and datatype.
|
|
64
|
+
All parts of literal are stored as strings.
|
|
65
|
+
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self, lex: str, langtag: str | None = None, datatype: str | None = None
|
|
70
|
+
) -> None:
|
|
71
|
+
self._lex: str = lex
|
|
72
|
+
self._langtag: str | None = langtag
|
|
73
|
+
self._datatype: str | None = datatype
|
|
74
|
+
|
|
75
|
+
def __str__(self) -> str:
|
|
76
|
+
suffix = ""
|
|
77
|
+
if self._langtag:
|
|
78
|
+
suffix = f"@{self._langtag}"
|
|
79
|
+
elif self._datatype:
|
|
80
|
+
suffix = f"^^<{self._datatype}>"
|
|
81
|
+
return f'"{self._lex}"{suffix}'
|
|
82
|
+
|
|
83
|
+
def __repr__(self) -> str:
|
|
84
|
+
return (
|
|
85
|
+
f"Literal({self._lex!r}, langtag={self._langtag!r}, "
|
|
86
|
+
f"datatype={self._datatype!r})"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def __eq__(self, other: object) -> bool:
|
|
90
|
+
if isinstance(other, Literal):
|
|
91
|
+
return (
|
|
92
|
+
self._lex == other._lex
|
|
93
|
+
and self._langtag == other._langtag
|
|
94
|
+
and self._datatype == other._datatype
|
|
95
|
+
)
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
def __hash__(self) -> int:
|
|
99
|
+
return hash((self._lex, self._langtag, self._datatype))
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
Node = Union[BlankNode, IRI, Literal, "Triple"]
|
|
103
|
+
GraphName = Node | _DefaultGraph
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
TRIPLE_ARITY = 3
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class Triple(NamedTuple):
|
|
110
|
+
"""Class for RDF triples."""
|
|
111
|
+
|
|
112
|
+
s: Node
|
|
113
|
+
p: Node
|
|
114
|
+
o: Node
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class Quad(NamedTuple):
|
|
118
|
+
"""Class for RDF quads."""
|
|
119
|
+
|
|
120
|
+
s: Node
|
|
121
|
+
p: Node
|
|
122
|
+
o: Node
|
|
123
|
+
g: GraphName
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class Prefix(NamedTuple):
|
|
127
|
+
"""Class for generic namespace declaration."""
|
|
128
|
+
|
|
129
|
+
prefix: str
|
|
130
|
+
iri: IRI
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class GenericStatementSink:
|
|
134
|
+
_store: deque[Triple | Quad]
|
|
135
|
+
|
|
136
|
+
def __init__(self, identifier: GraphName = DefaultGraph) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Initialize statements storage, namespaces dictionary, and parser.
|
|
139
|
+
|
|
140
|
+
Notes:
|
|
141
|
+
_store preserves the order of statements.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
identifier (str, optional): Identifier for a sink.
|
|
145
|
+
Defaults to DefaultGraph.
|
|
146
|
+
|
|
147
|
+
"""
|
|
148
|
+
self._store: deque[Triple | Quad] = deque()
|
|
149
|
+
self._namespaces: dict[str, IRI] = {}
|
|
150
|
+
self._identifier = identifier
|
|
151
|
+
|
|
152
|
+
def add(self, statement: Triple | Quad) -> None:
|
|
153
|
+
self._store.append(statement)
|
|
154
|
+
|
|
155
|
+
def bind(self, prefix: str, namespace: IRI) -> None:
|
|
156
|
+
self._namespaces.update({prefix: namespace})
|
|
157
|
+
|
|
158
|
+
def __iter__(self) -> Generator[Triple | Quad]:
|
|
159
|
+
yield from self._store
|
|
160
|
+
|
|
161
|
+
def __len__(self) -> int:
|
|
162
|
+
return len(self._store)
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def namespaces(self) -> Generator[tuple[str, IRI]]:
|
|
166
|
+
yield from self._namespaces.items()
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def identifier(self) -> GraphName:
|
|
170
|
+
return self._identifier
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def store(self) -> Generator[Triple | Quad]:
|
|
174
|
+
yield from self._store
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def is_triples_sink(self) -> bool:
|
|
178
|
+
"""
|
|
179
|
+
Check if the sink contains triples or quads.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
bool: true, if length of statement is 3.
|
|
183
|
+
|
|
184
|
+
"""
|
|
185
|
+
return bool(self._store) and len(self._store[0]) == TRIPLE_ARITY
|
|
186
|
+
|
|
187
|
+
def parse(self, input_file: IO[bytes]) -> None:
|
|
188
|
+
from pyjelly.integrations.generic.parse import ( # noqa: PLC0415
|
|
189
|
+
parse_jelly_to_graph,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
parsed_result = parse_jelly_to_graph(input_file)
|
|
193
|
+
self._store = parsed_result._store
|
|
194
|
+
self._namespaces = parsed_result._namespaces
|
|
195
|
+
self._identifier = parsed_result._identifier
|
|
196
|
+
|
|
197
|
+
def serialize(self, output_file: IO[bytes]) -> None:
|
|
198
|
+
from pyjelly.integrations.generic.serialize import ( # noqa: PLC0415
|
|
199
|
+
grouped_stream_to_file,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
grouped_stream_to_file((sink for sink in [self]), output_file)
|
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Generator, Iterable, MutableMapping
|
|
4
|
+
from contextvars import ContextVar
|
|
5
|
+
from itertools import chain
|
|
6
|
+
from typing import IO, Any
|
|
7
|
+
from typing_extensions import override
|
|
8
|
+
|
|
9
|
+
from mypy_extensions import mypyc_attr
|
|
10
|
+
|
|
11
|
+
from pyjelly import jelly
|
|
12
|
+
from pyjelly.errors import JellyConformanceError
|
|
13
|
+
from pyjelly.integrations.generic.generic_sink import (
|
|
14
|
+
IRI,
|
|
15
|
+
BlankNode,
|
|
16
|
+
DefaultGraph,
|
|
17
|
+
GenericStatementSink,
|
|
18
|
+
GraphName,
|
|
19
|
+
Literal,
|
|
20
|
+
Prefix,
|
|
21
|
+
Quad,
|
|
22
|
+
Triple,
|
|
23
|
+
)
|
|
24
|
+
from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
|
|
25
|
+
from pyjelly.parse.ioutils import get_options_and_frames
|
|
26
|
+
|
|
27
|
+
Statement = Triple | Quad
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
31
|
+
class GenericStatementSinkAdapter(Adapter):
|
|
32
|
+
"""
|
|
33
|
+
Implement Adapter for generic statements.
|
|
34
|
+
|
|
35
|
+
Notes:
|
|
36
|
+
Returns custom RDF terms expected by GenericStatementSink,
|
|
37
|
+
handles namespace declarations, and quoted triples.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
Adapter (_type_): base Adapter class
|
|
41
|
+
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@override
|
|
45
|
+
def iri(self, iri: str) -> IRI:
|
|
46
|
+
return IRI(iri)
|
|
47
|
+
|
|
48
|
+
@override
|
|
49
|
+
def bnode(self, bnode: str) -> BlankNode:
|
|
50
|
+
return BlankNode(bnode)
|
|
51
|
+
|
|
52
|
+
@override
|
|
53
|
+
def default_graph(self) -> GraphName:
|
|
54
|
+
return DefaultGraph
|
|
55
|
+
|
|
56
|
+
@override
|
|
57
|
+
def literal(
|
|
58
|
+
self,
|
|
59
|
+
lex: str,
|
|
60
|
+
language: str | None = None,
|
|
61
|
+
datatype: str | None = None,
|
|
62
|
+
) -> Literal:
|
|
63
|
+
return Literal(lex, language, datatype)
|
|
64
|
+
|
|
65
|
+
@override
|
|
66
|
+
def namespace_declaration(self, name: str, iri: str) -> Prefix:
|
|
67
|
+
return Prefix(name, self.iri(iri))
|
|
68
|
+
|
|
69
|
+
@override
|
|
70
|
+
def quoted_triple(self, terms: Iterable[Any]) -> Triple:
|
|
71
|
+
return Triple(*terms)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
75
|
+
class GenericTriplesAdapter(GenericStatementSinkAdapter):
|
|
76
|
+
"""
|
|
77
|
+
Triples adapted implementation for GenericStatementSink.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
GenericStatementSinkAdapter (_type_): base GenericStatementSink
|
|
81
|
+
adapter implementation that handles terms and namespaces.
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
options: ParserOptions,
|
|
88
|
+
) -> None:
|
|
89
|
+
super().__init__(options=options)
|
|
90
|
+
|
|
91
|
+
@override
|
|
92
|
+
def triple(self, terms: Iterable[Any]) -> Triple:
|
|
93
|
+
return Triple(*terms)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
97
|
+
class GenericQuadsBaseAdapter(GenericStatementSinkAdapter):
|
|
98
|
+
def __init__(self, options: ParserOptions) -> None:
|
|
99
|
+
super().__init__(options=options)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
103
|
+
class GenericQuadsAdapter(GenericQuadsBaseAdapter):
|
|
104
|
+
"""
|
|
105
|
+
Extends GenericQuadsBaseAdapter for QUADS physical type.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
GenericQuadsBaseAdapter (_type_): quads adapter that handles
|
|
109
|
+
base quads processing.
|
|
110
|
+
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
@override
|
|
114
|
+
def quad(self, terms: Iterable[Any]) -> Quad:
|
|
115
|
+
return Quad(*terms)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
119
|
+
class GenericGraphsAdapter(GenericQuadsBaseAdapter):
|
|
120
|
+
"""
|
|
121
|
+
Extends GenericQuadsBaseAdapter for GRAPHS physical type.
|
|
122
|
+
|
|
123
|
+
Notes:
|
|
124
|
+
introduces graph start/end, checks if graph exists.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
GenericQuadsBaseAdapter (_type_): quads adapter that handles
|
|
128
|
+
base quads processing.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
JellyConformanceError: raised if graph start message was not received.
|
|
132
|
+
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
_graph_id: GraphName | None
|
|
136
|
+
|
|
137
|
+
def __init__(
|
|
138
|
+
self,
|
|
139
|
+
options: ParserOptions,
|
|
140
|
+
) -> None:
|
|
141
|
+
super().__init__(options=options)
|
|
142
|
+
self._graph_id = None
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def graph(self) -> None:
|
|
146
|
+
if self._graph_id is None:
|
|
147
|
+
msg = "new graph was not started"
|
|
148
|
+
raise JellyConformanceError(msg)
|
|
149
|
+
|
|
150
|
+
@override
|
|
151
|
+
def graph_start(self, graph_id: GraphName) -> None:
|
|
152
|
+
self._graph_id = graph_id
|
|
153
|
+
|
|
154
|
+
@override
|
|
155
|
+
def triple(self, terms: Iterable[Any]) -> Quad:
|
|
156
|
+
return Quad(*chain(terms, [self._graph_id]))
|
|
157
|
+
|
|
158
|
+
@override
|
|
159
|
+
def graph_end(self) -> None:
|
|
160
|
+
self._graph_id = None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def parse_triples_stream(
|
|
164
|
+
frames: Iterable[jelly.RdfStreamFrame],
|
|
165
|
+
options: ParserOptions,
|
|
166
|
+
frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
|
|
167
|
+
) -> Generator[Iterable[Triple | Prefix]]:
|
|
168
|
+
"""
|
|
169
|
+
Parse flat triple stream.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
173
|
+
options (ParserOptions): stream options
|
|
174
|
+
frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
|
|
175
|
+
used for extracting frame metadata
|
|
176
|
+
|
|
177
|
+
Yields:
|
|
178
|
+
Generator[Iterable[Triple | Prefix]]:
|
|
179
|
+
Generator of iterables of Triple or Prefix objects,
|
|
180
|
+
one iterable per frame.
|
|
181
|
+
|
|
182
|
+
"""
|
|
183
|
+
adapter = GenericTriplesAdapter(options)
|
|
184
|
+
decoder = Decoder(adapter=adapter)
|
|
185
|
+
for frame in frames:
|
|
186
|
+
if frame_metadata is not None:
|
|
187
|
+
frame_metadata.set(
|
|
188
|
+
frame.metadata
|
|
189
|
+
) if frame.metadata else frame_metadata.set({})
|
|
190
|
+
yield decoder.iter_rows(frame)
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def parse_quads_stream(
|
|
195
|
+
frames: Iterable[jelly.RdfStreamFrame],
|
|
196
|
+
options: ParserOptions,
|
|
197
|
+
frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
|
|
198
|
+
) -> Generator[Iterable[Quad | Prefix]]:
|
|
199
|
+
"""
|
|
200
|
+
Parse flat quads stream.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
|
|
204
|
+
options (ParserOptions): stream options
|
|
205
|
+
frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
|
|
206
|
+
used for extracting frame metadata
|
|
207
|
+
|
|
208
|
+
Yields:
|
|
209
|
+
Generator[Iterable[Quad | Prefix]]:
|
|
210
|
+
Generator of iterables of Quad or Prefix objects,
|
|
211
|
+
one iterable per frame.
|
|
212
|
+
|
|
213
|
+
"""
|
|
214
|
+
adapter_class: type[GenericQuadsBaseAdapter]
|
|
215
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
|
|
216
|
+
adapter_class = GenericQuadsAdapter
|
|
217
|
+
else:
|
|
218
|
+
adapter_class = GenericGraphsAdapter
|
|
219
|
+
adapter = adapter_class(options=options)
|
|
220
|
+
decoder = Decoder(adapter=adapter)
|
|
221
|
+
for frame in frames:
|
|
222
|
+
if frame_metadata is not None:
|
|
223
|
+
frame_metadata.set(
|
|
224
|
+
frame.metadata
|
|
225
|
+
) if frame.metadata else frame_metadata.set({})
|
|
226
|
+
yield decoder.iter_rows(frame)
|
|
227
|
+
return
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def parse_jelly_grouped(
|
|
231
|
+
inp: IO[bytes],
|
|
232
|
+
sink_factory: Callable[[], GenericStatementSink] = lambda: GenericStatementSink(),
|
|
233
|
+
*,
|
|
234
|
+
logical_type_strict: bool = False,
|
|
235
|
+
frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
|
|
236
|
+
) -> Generator[GenericStatementSink]:
|
|
237
|
+
"""
|
|
238
|
+
Take a jelly file and return generators of generic statements sinks.
|
|
239
|
+
|
|
240
|
+
Yields one generic statements sink per frame.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
inp (IO[bytes]): input jelly buffered binary stream
|
|
244
|
+
sink_factory (Callable): lambda to construct a statement sink.
|
|
245
|
+
By default, creates an empty in-memory GenericStatementSink.
|
|
246
|
+
logical_type_strict (bool): If True, validate the *logical* type
|
|
247
|
+
in stream options and require a grouped logical type.
|
|
248
|
+
Otherwise, only the physical type is used to route parsing.
|
|
249
|
+
frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
|
|
250
|
+
used for extracting frame metadata
|
|
251
|
+
|
|
252
|
+
Raises:
|
|
253
|
+
NotImplementedError: is raised if a physical type is not implemented
|
|
254
|
+
|
|
255
|
+
Yields:
|
|
256
|
+
Generator[GenericStatementSink]:
|
|
257
|
+
returns generators for GenericStatementSink, regardless of stream type.
|
|
258
|
+
|
|
259
|
+
"""
|
|
260
|
+
options, frames = get_options_and_frames(inp)
|
|
261
|
+
|
|
262
|
+
st = getattr(options, "stream_types", None)
|
|
263
|
+
if logical_type_strict and (
|
|
264
|
+
st is None
|
|
265
|
+
or st.logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
|
|
266
|
+
or st.flat
|
|
267
|
+
):
|
|
268
|
+
lt_name = (
|
|
269
|
+
"UNSPECIFIED"
|
|
270
|
+
if st is None
|
|
271
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
msg = (
|
|
275
|
+
"strict logical type check requires options.stream_types"
|
|
276
|
+
if st is None
|
|
277
|
+
else f"expected GROUPED logical type, got {lt_name}"
|
|
278
|
+
)
|
|
279
|
+
raise JellyConformanceError(msg)
|
|
280
|
+
|
|
281
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
282
|
+
for graph in parse_triples_stream(
|
|
283
|
+
frames=frames,
|
|
284
|
+
options=options,
|
|
285
|
+
**{"frame_metadata": frame_metadata} if frame_metadata is not None else {},
|
|
286
|
+
):
|
|
287
|
+
sink = sink_factory()
|
|
288
|
+
for graph_item in graph:
|
|
289
|
+
if isinstance(graph_item, Prefix):
|
|
290
|
+
sink.bind(graph_item.prefix, graph_item.iri)
|
|
291
|
+
else:
|
|
292
|
+
sink.add(graph_item)
|
|
293
|
+
yield sink
|
|
294
|
+
return
|
|
295
|
+
elif options.stream_types.physical_type in (
|
|
296
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
297
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
298
|
+
):
|
|
299
|
+
for dataset in parse_quads_stream(
|
|
300
|
+
frames=frames,
|
|
301
|
+
options=options,
|
|
302
|
+
**{"frame_metadata": frame_metadata} if frame_metadata is not None else {},
|
|
303
|
+
):
|
|
304
|
+
sink = sink_factory()
|
|
305
|
+
for item in dataset:
|
|
306
|
+
if isinstance(item, Prefix):
|
|
307
|
+
sink.bind(item.prefix, item.iri)
|
|
308
|
+
else:
|
|
309
|
+
sink.add(item)
|
|
310
|
+
yield sink
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
314
|
+
options.stream_types.physical_type
|
|
315
|
+
)
|
|
316
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
317
|
+
raise NotImplementedError(msg)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def parse_jelly_to_graph(
|
|
321
|
+
inp: IO[bytes],
|
|
322
|
+
sink_factory: Callable[[], GenericStatementSink] = lambda: GenericStatementSink(),
|
|
323
|
+
) -> GenericStatementSink:
|
|
324
|
+
"""
|
|
325
|
+
Add statements from Generator to GenericStatementSink.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
inp (IO[bytes]): input jelly stream.
|
|
329
|
+
sink_factory (Callable[[], GenericStatementSink]): factory to create
|
|
330
|
+
statement sink.
|
|
331
|
+
By default creates an empty in-memory GenericStatementSink.
|
|
332
|
+
Has no division for datasets/graphs,
|
|
333
|
+
utilizes the same underlying data structures.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
GenericStatementSink: GenericStatementSink with statements.
|
|
337
|
+
|
|
338
|
+
"""
|
|
339
|
+
options, frames = get_options_and_frames(inp)
|
|
340
|
+
sink = sink_factory()
|
|
341
|
+
|
|
342
|
+
for item in parse_jelly_flat(
|
|
343
|
+
inp=inp, frames=frames, options=options, logical_type_strict=False
|
|
344
|
+
):
|
|
345
|
+
if isinstance(item, Prefix):
|
|
346
|
+
sink.bind(item.prefix, item.iri) # type: ignore[union-attr, unused-ignore]
|
|
347
|
+
else:
|
|
348
|
+
sink.add(item)
|
|
349
|
+
return sink
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def parse_jelly_flat(
|
|
353
|
+
inp: IO[bytes],
|
|
354
|
+
frames: Iterable[jelly.RdfStreamFrame] | None = None,
|
|
355
|
+
options: ParserOptions | None = None,
|
|
356
|
+
*,
|
|
357
|
+
logical_type_strict: bool = False,
|
|
358
|
+
) -> Generator[Statement | Prefix]: # type: ignore[valid-type, unused-ignore]
|
|
359
|
+
"""
|
|
360
|
+
Parse jelly file with FLAT logical type into a Generator of stream events.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
inp (IO[bytes]): input jelly buffered binary stream.
|
|
364
|
+
frames (Iterable[jelly.RdfStreamFrame | None):
|
|
365
|
+
jelly frames if read before.
|
|
366
|
+
options (ParserOptions | None): stream options
|
|
367
|
+
if read before.
|
|
368
|
+
logical_type_strict (bool): If True, validate the *logical* type
|
|
369
|
+
in stream options and require FLAT (TRIPLES/QUADS).
|
|
370
|
+
Otherwise, only the physical type is used to route parsing.
|
|
371
|
+
|
|
372
|
+
Raises:
|
|
373
|
+
NotImplementedError: if physical type is not supported
|
|
374
|
+
|
|
375
|
+
Yields:
|
|
376
|
+
Generator[Statement | Prefix]: Generator of stream events
|
|
377
|
+
|
|
378
|
+
"""
|
|
379
|
+
if frames is None or options is None:
|
|
380
|
+
options, frames = get_options_and_frames(inp)
|
|
381
|
+
|
|
382
|
+
st = getattr(options, "stream_types", None)
|
|
383
|
+
if logical_type_strict and (st is None or not st.flat):
|
|
384
|
+
lt_name = (
|
|
385
|
+
"UNSPECIFIED"
|
|
386
|
+
if st is None
|
|
387
|
+
else jelly.LogicalStreamType.Name(st.logical_type)
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
msg = (
|
|
391
|
+
"strict logical type check requires options.stream_types"
|
|
392
|
+
if st is None
|
|
393
|
+
else f"expected FLAT logical type (TRIPLES/QUADS), got {lt_name}"
|
|
394
|
+
)
|
|
395
|
+
raise JellyConformanceError(msg)
|
|
396
|
+
|
|
397
|
+
if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
|
|
398
|
+
for triples in parse_triples_stream(frames=frames, options=options):
|
|
399
|
+
yield from triples
|
|
400
|
+
return
|
|
401
|
+
if options.stream_types.physical_type in (
|
|
402
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS,
|
|
403
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
|
|
404
|
+
):
|
|
405
|
+
for quads in parse_quads_stream(frames=frames, options=options):
|
|
406
|
+
yield from quads
|
|
407
|
+
return
|
|
408
|
+
physical_type_name = jelly.PhysicalStreamType.Name(
|
|
409
|
+
options.stream_types.physical_type
|
|
410
|
+
)
|
|
411
|
+
msg = f"the stream type {physical_type_name} is not supported "
|
|
412
|
+
raise NotImplementedError(msg)
|
|
Binary file
|