pyjelly 0.7.1__cp311-cp311-macosx_11_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so +0 -0
  2. pyjelly/__init__.py +0 -0
  3. pyjelly/_proto/grpc.proto +33 -0
  4. pyjelly/_proto/patch.proto +165 -0
  5. pyjelly/_proto/rdf.proto +384 -0
  6. pyjelly/errors.py +10 -0
  7. pyjelly/integrations/__init__.py +0 -0
  8. pyjelly/integrations/generic/__init__.py +0 -0
  9. pyjelly/integrations/generic/generic_sink.py +202 -0
  10. pyjelly/integrations/generic/parse.py +412 -0
  11. pyjelly/integrations/generic/serialize.cpython-311-darwin.so +0 -0
  12. pyjelly/integrations/generic/serialize.py +402 -0
  13. pyjelly/integrations/rdflib/__init__.py +24 -0
  14. pyjelly/integrations/rdflib/parse.py +560 -0
  15. pyjelly/integrations/rdflib/serialize.py +408 -0
  16. pyjelly/jelly/__init__.py +5 -0
  17. pyjelly/jelly/rdf_pb2.py +70 -0
  18. pyjelly/jelly/rdf_pb2.pyi +231 -0
  19. pyjelly/options.py +141 -0
  20. pyjelly/parse/__init__.py +0 -0
  21. pyjelly/parse/decode.cpython-311-darwin.so +0 -0
  22. pyjelly/parse/decode.py +447 -0
  23. pyjelly/parse/ioutils.cpython-311-darwin.so +0 -0
  24. pyjelly/parse/ioutils.py +115 -0
  25. pyjelly/parse/lookup.cpython-311-darwin.so +0 -0
  26. pyjelly/parse/lookup.py +70 -0
  27. pyjelly/serialize/__init__.py +0 -0
  28. pyjelly/serialize/encode.cpython-311-darwin.so +0 -0
  29. pyjelly/serialize/encode.py +397 -0
  30. pyjelly/serialize/flows.py +196 -0
  31. pyjelly/serialize/ioutils.cpython-311-darwin.so +0 -0
  32. pyjelly/serialize/ioutils.py +13 -0
  33. pyjelly/serialize/lookup.cpython-311-darwin.so +0 -0
  34. pyjelly/serialize/lookup.py +137 -0
  35. pyjelly/serialize/streams.cpython-311-darwin.so +0 -0
  36. pyjelly/serialize/streams.py +281 -0
  37. pyjelly-0.7.1.dist-info/METADATA +114 -0
  38. pyjelly-0.7.1.dist-info/RECORD +41 -0
  39. pyjelly-0.7.1.dist-info/WHEEL +6 -0
  40. pyjelly-0.7.1.dist-info/entry_points.txt +7 -0
  41. pyjelly-0.7.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,447 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABCMeta, abstractmethod
4
+ from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
5
+ from enum import Enum, auto
6
+ from typing import Any, ClassVar, NamedTuple
7
+ from typing_extensions import Never
8
+
9
+ from mypy_extensions import mypyc_attr
10
+
11
+ from pyjelly import jelly
12
+ from pyjelly.options import MAX_VERSION, LookupPreset, StreamParameters, StreamTypes
13
+ from pyjelly.parse.lookup import LookupDecoder
14
+
15
+ RowHandler = Callable[[Any], Any | None]
16
+ TermHandler = Callable[[Any], Any | None]
17
+ RdfStreamOptions = jelly.RdfStreamOptions
18
+
19
+
20
+ class ParsingMode(Enum):
21
+ """
22
+ Specifies how jelly frames should be treated.
23
+
24
+ Modes:
25
+ FLAT
26
+ Yield all frames as one Graph or Dataset.
27
+ GROUPED
28
+ Yield one Graph/Dataset per frame (grouped parsing).
29
+ """
30
+
31
+ FLAT = auto()
32
+ GROUPED = auto()
33
+
34
+
35
+ @mypyc_attr(allow_interpreted_subclasses=True)
36
+ class ParserOptions(NamedTuple):
37
+ stream_types: StreamTypes
38
+ lookup_preset: LookupPreset
39
+ params: StreamParameters
40
+
41
+
42
+ def options_from_frame(
43
+ frame: jelly.RdfStreamFrame,
44
+ *,
45
+ delimited: bool,
46
+ ) -> ParserOptions:
47
+ """
48
+ Fill stream options based on the options row.
49
+
50
+ Notes:
51
+ generalized_statements, rdf_star, and namespace declarations
52
+ are set to false by default
53
+
54
+ Args:
55
+ frame (jelly.RdfStreamFrame): first non-empty frame from the stream
56
+ delimited (bool): derived delimited flag
57
+
58
+ Returns:
59
+ ParserOptions: filled options with types/lookups/stream parameters information
60
+
61
+ """
62
+ row = frame.rows[0]
63
+ options = row.options
64
+ nd = getattr(options, "namespace_declarations", False) or (
65
+ options.version >= MAX_VERSION
66
+ )
67
+ return ParserOptions(
68
+ stream_types=StreamTypes(
69
+ physical_type=options.physical_type,
70
+ logical_type=options.logical_type,
71
+ ),
72
+ lookup_preset=LookupPreset(
73
+ max_names=options.max_name_table_size,
74
+ max_prefixes=options.max_prefix_table_size,
75
+ max_datatypes=options.max_datatype_table_size,
76
+ ),
77
+ params=StreamParameters(
78
+ stream_name=options.stream_name,
79
+ generalized_statements=options.generalized_statements,
80
+ rdf_star=options.rdf_star,
81
+ version=options.version,
82
+ delimited=delimited,
83
+ namespace_declarations=nd,
84
+ ),
85
+ )
86
+
87
+
88
+ def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
89
+ physical_type_name = jelly.PhysicalStreamType.Name(stream_types.physical_type)
90
+ logical_type_name = jelly.LogicalStreamType.Name(stream_types.logical_type)
91
+ msg = (
92
+ f"adapter with {physical_type_name} and {logical_type_name} "
93
+ f"does not implement {feature}"
94
+ )
95
+ raise NotImplementedError(msg)
96
+
97
+
98
+ @mypyc_attr(allow_interpreted_subclasses=True)
99
+ class Adapter(metaclass=ABCMeta):
100
+ def __init__(
101
+ self, options: ParserOptions, parsing_mode: ParsingMode = ParsingMode.FLAT
102
+ ) -> None:
103
+ self.options = options
104
+ self.parsing_mode = parsing_mode
105
+
106
+ # Obligatory abstract methods--all adapters must implement these
107
+ @abstractmethod
108
+ def iri(self, iri: str) -> Any:
109
+ raise NotImplementedError
110
+
111
+ @abstractmethod
112
+ def default_graph(self) -> Any:
113
+ raise NotImplementedError
114
+
115
+ @abstractmethod
116
+ def bnode(self, bnode: str) -> Any:
117
+ raise NotImplementedError
118
+
119
+ @abstractmethod
120
+ def literal(
121
+ self,
122
+ lex: str,
123
+ language: str | None = None,
124
+ datatype: str | None = None,
125
+ ) -> Any:
126
+ raise NotImplementedError
127
+
128
+ # Optional abstract methods--not required to be implemented by all adapters
129
+ def triple(self, terms: Iterable[Any]) -> Any: # noqa: ARG002
130
+ _adapter_missing("decoding triples", stream_types=self.options.stream_types)
131
+
132
+ def quad(self, terms: Iterable[Any]) -> Any: # noqa: ARG002
133
+ _adapter_missing("decoding quads", stream_types=self.options.stream_types)
134
+
135
+ def graph_start(self, graph_id: Any) -> Any: # noqa: ARG002
136
+ _adapter_missing(
137
+ "decoding graph start markers", stream_types=self.options.stream_types
138
+ )
139
+
140
+ def graph_end(self) -> Any:
141
+ _adapter_missing(
142
+ "decoding graph end markers", stream_types=self.options.stream_types
143
+ )
144
+
145
+ def namespace_declaration(self, name: str, iri: str) -> Any: # noqa: ARG002
146
+ _adapter_missing(
147
+ "decoding namespace declarations",
148
+ stream_types=self.options.stream_types,
149
+ )
150
+
151
+ def quoted_triple(self, terms: Iterable[Any]) -> Any: # noqa: ARG002
152
+ _adapter_missing(
153
+ "decoding quoted triple", stream_types=self.options.stream_types
154
+ )
155
+
156
+ def frame(self) -> Any:
157
+ return None
158
+
159
+
160
+ @mypyc_attr(allow_interpreted_subclasses=True)
161
+ class Decoder:
162
+ _ROW_HANDLER_NAMES: ClassVar[Mapping[type[Any], str]] = {
163
+ jelly.RdfStreamOptions: "validate_stream_options",
164
+ jelly.RdfPrefixEntry: "ingest_prefix_entry",
165
+ jelly.RdfNameEntry: "ingest_name_entry",
166
+ jelly.RdfDatatypeEntry: "ingest_datatype_entry",
167
+ jelly.RdfTriple: "decode_triple",
168
+ jelly.RdfQuad: "decode_quad",
169
+ jelly.RdfGraphStart: "decode_graph_start",
170
+ jelly.RdfGraphEnd: "decode_graph_end",
171
+ jelly.RdfNamespaceDeclaration: "decode_namespace_declaration",
172
+ }
173
+
174
+ _TERM_HANDLER_NAMES: ClassVar[Mapping[type[Any], str]] = {
175
+ jelly.RdfIri: "decode_iri",
176
+ str: "decode_bnode",
177
+ jelly.RdfLiteral: "decode_literal",
178
+ jelly.RdfDefaultGraph: "decode_default_graph",
179
+ jelly.RdfTriple: "decode_quoted_triple",
180
+ }
181
+
182
+ def __init__(self, adapter: Adapter) -> None:
183
+ """
184
+ Initialize decoder.
185
+
186
+ Initializes decoder with a lookup tables with preset sizes,
187
+ integration-dependent adapter and empty repeated terms dictionary.
188
+
189
+ Args:
190
+ adapter (Adapter): integration-dependent adapter that specifies terms
191
+ conversion to specific objects, framing,
192
+ namespace declarations, and graphs/datasets forming.
193
+
194
+ """
195
+ self.adapter = adapter
196
+ self.names = LookupDecoder(lookup_size=self.options.lookup_preset.max_names)
197
+ self.prefixes = LookupDecoder(
198
+ lookup_size=self.options.lookup_preset.max_prefixes
199
+ )
200
+ self.datatypes = LookupDecoder(
201
+ lookup_size=self.options.lookup_preset.max_datatypes
202
+ )
203
+ self.repeated_terms: dict[str, jelly.RdfIri | str | jelly.RdfLiteral] = {}
204
+
205
+ self.row_handlers: dict[type[Any], RowHandler] = {
206
+ t: getattr(self, name) for t, name in self._ROW_HANDLER_NAMES.items()
207
+ }
208
+ self.term_handlers: dict[type[Any], TermHandler] = {
209
+ t: getattr(self, name) for t, name in self._TERM_HANDLER_NAMES.items()
210
+ }
211
+
212
+ @property
213
+ def options(self) -> ParserOptions:
214
+ return self.adapter.options
215
+
216
+ def iter_rows(self, frame: jelly.RdfStreamFrame) -> Iterator[Any]:
217
+ """
218
+ Iterate through rows in the frame.
219
+
220
+ Args:
221
+ frame (jelly.RdfStreamFrame): jelly frame
222
+ Yields:
223
+ Iterator[Any]: decoded rows
224
+
225
+ """
226
+ for row_owner in frame.rows:
227
+ row = getattr(row_owner, row_owner.WhichOneof("row"))
228
+ decoded_row = self.decode_row(row)
229
+ if isinstance(
230
+ row, (jelly.RdfTriple, jelly.RdfQuad, jelly.RdfNamespaceDeclaration)
231
+ ):
232
+ yield decoded_row
233
+
234
+ def decode_row(self, row: Any) -> Any | None:
235
+ """
236
+ Decode a row based on its type.
237
+
238
+ Notes: uses custom adapters to decode triples/quads, namespace declarations,
239
+ graph start/end.
240
+
241
+ Args:
242
+ row (Any): protobuf row message
243
+
244
+ Raises:
245
+ TypeError: raises error if this type of protobuf message does not have
246
+ a respective handler
247
+
248
+ Returns:
249
+ Any | None: decoded row -
250
+ result from calling decode_row (row type appropriate handler)
251
+
252
+ """
253
+ handler = self.row_handlers.get(type(row))
254
+ if handler is None:
255
+ msg = f"decoder not implemented for {type(row)}"
256
+ raise TypeError(msg) from None
257
+ return handler(row)
258
+
259
+ def validate_stream_options(self, options: jelly.RdfStreamOptions) -> None:
260
+ stream_types, lookup_preset, params = self.options
261
+ assert stream_types.physical_type == options.physical_type
262
+ assert stream_types.logical_type == options.logical_type
263
+ assert params.stream_name == options.stream_name
264
+ assert params.version >= options.version
265
+ assert lookup_preset.max_prefixes == options.max_prefix_table_size
266
+ assert lookup_preset.max_datatypes == options.max_datatype_table_size
267
+ assert lookup_preset.max_names == options.max_name_table_size
268
+
269
+ def ingest_prefix_entry(self, entry: jelly.RdfPrefixEntry) -> None:
270
+ """
271
+ Update prefix lookup table based on the table entry.
272
+
273
+ Args:
274
+ entry (jelly.RdfPrefixEntry): prefix message, containing id and value
275
+
276
+ """
277
+ self.prefixes.assign_entry(index=entry.id, value=entry.value)
278
+
279
+ def ingest_name_entry(self, entry: jelly.RdfNameEntry) -> None:
280
+ """
281
+ Update name lookup table based on the table entry.
282
+
283
+ Args:
284
+ entry (jelly.RdfNameEntry): name message, containing id and value
285
+
286
+ """
287
+ self.names.assign_entry(index=entry.id, value=entry.value)
288
+
289
+ def ingest_datatype_entry(self, entry: jelly.RdfDatatypeEntry) -> None:
290
+ """
291
+ Update datatype lookup table based on the table entry.
292
+
293
+ Args:
294
+ entry (jelly.RdfDatatypeEntry): name message, containing id and value
295
+
296
+ """
297
+ self.datatypes.assign_entry(index=entry.id, value=entry.value)
298
+
299
+ def decode_term(self, term: Any) -> Any:
300
+ """
301
+ Decode a term based on its type: IRI/literal/BN/default graph.
302
+
303
+ Notes: requires a custom adapter with implemented methods for terms decoding.
304
+
305
+ Args:
306
+ term (Any): IRI/literal/BN(string)/Default graph message
307
+
308
+ Raises:
309
+ TypeError: raises error if no handler for the term is found
310
+
311
+ Returns:
312
+ Any: decoded term (currently, rdflib objects, e.g., rdflib.term.URIRef)
313
+
314
+ """
315
+ decode_term = self.term_handlers.get(type(term))
316
+ if decode_term is None:
317
+ msg = f"decoder not implemented for {type(term)}"
318
+ raise TypeError(msg) from None
319
+ return decode_term(term)
320
+
321
+ def decode_iri(self, iri: jelly.RdfIri) -> Any:
322
+ """
323
+ Decode RdfIri message to IRI using a custom adapter.
324
+
325
+ Args:
326
+ iri (jelly.RdfIri): RdfIri message
327
+
328
+ Returns:
329
+ Any: IRI, based on adapter implementation, e.g., rdflib.term.URIRef
330
+
331
+ """
332
+ name = self.names.decode_name_term_index(iri.name_id)
333
+ prefix = self.prefixes.decode_prefix_term_index(iri.prefix_id)
334
+ return self.adapter.iri(iri=prefix + name)
335
+
336
+ def decode_default_graph(self, _: jelly.RdfDefaultGraph) -> Any:
337
+ return self.adapter.default_graph()
338
+
339
+ def decode_bnode(self, bnode: str) -> Any:
340
+ """
341
+ Decode string message to blank node (BN) using a custom adapter.
342
+
343
+ Args:
344
+ bnode (str): blank node id
345
+
346
+ Returns:
347
+ Any: blank node object from the custom adapter
348
+
349
+ """
350
+ return self.adapter.bnode(bnode)
351
+
352
+ def decode_literal(self, literal: jelly.RdfLiteral) -> Any:
353
+ """
354
+ Decode RdfLiteral to literal based on custom adapter implementation.
355
+
356
+ Notes: checks for langtag existence;
357
+ for datatype checks for non-zero table size and datatype field presence
358
+
359
+ Args:
360
+ literal (jelly.RdfLiteral): RdfLiteral message
361
+
362
+ Returns:
363
+ Any: literal returned by the custom adapter
364
+
365
+ """
366
+ language = datatype = None
367
+ if literal.langtag:
368
+ language = literal.langtag
369
+ elif self.datatypes.lookup_size and literal.HasField("datatype"):
370
+ datatype = self.datatypes.decode_datatype_term_index(literal.datatype)
371
+ return self.adapter.literal(
372
+ lex=literal.lex,
373
+ language=language,
374
+ datatype=datatype,
375
+ )
376
+
377
+ def decode_namespace_declaration(
378
+ self,
379
+ declaration: jelly.RdfNamespaceDeclaration,
380
+ ) -> Any:
381
+ iri = self.decode_iri(declaration.value)
382
+ return self.adapter.namespace_declaration(declaration.name, iri)
383
+
384
+ def decode_graph_start(self, graph_start: jelly.RdfGraphStart) -> Any:
385
+ term = getattr(graph_start, graph_start.WhichOneof("graph"))
386
+ return self.adapter.graph_start(self.decode_term(term))
387
+
388
+ def decode_graph_end(self, _: jelly.RdfGraphEnd) -> Any:
389
+ return self.adapter.graph_end()
390
+
391
+ def decode_statement(
392
+ self,
393
+ statement: jelly.RdfTriple | jelly.RdfQuad,
394
+ oneofs: Sequence[str],
395
+ ) -> Any:
396
+ """
397
+ Decode a triple/quad message.
398
+
399
+ Notes: also updates repeated terms dictionary
400
+
401
+ Args:
402
+ statement (jelly.RdfTriple | jelly.RdfQuad): triple/quad message
403
+ oneofs (Sequence[str]): terms s/p/o/g(if quads)
404
+
405
+ Raises:
406
+ ValueError: if a missing repeated term is encountered
407
+
408
+ Returns:
409
+ Any: a list of decoded terms
410
+
411
+ """
412
+ terms = []
413
+ for oneof in oneofs:
414
+ field = statement.WhichOneof(oneof)
415
+ if field:
416
+ jelly_term = getattr(statement, field)
417
+ decoded_term = self.decode_term(jelly_term)
418
+ self.repeated_terms[oneof] = decoded_term
419
+ else:
420
+ decoded_term = self.repeated_terms[oneof]
421
+ if decoded_term is None:
422
+ msg = f"missing repeated term {oneof}"
423
+ raise ValueError(msg)
424
+ terms.append(decoded_term)
425
+ return terms
426
+
427
+ def decode_triple(self, triple: jelly.RdfTriple) -> Any:
428
+ terms = self.decode_statement(triple, ("subject", "predicate", "object"))
429
+ return self.adapter.triple(terms)
430
+
431
+ def decode_quoted_triple(self, triple: jelly.RdfTriple) -> Any:
432
+ oneofs: Sequence[str] = ("subject", "predicate", "object")
433
+ terms = []
434
+ for oneof in oneofs:
435
+ field = triple.WhichOneof(oneof)
436
+ if field:
437
+ jelly_term = getattr(triple, field)
438
+ decoded_term = self.decode_term(jelly_term)
439
+ else:
440
+ msg = "repeated terms are not allowed in quoted triples"
441
+ raise ValueError(msg)
442
+ terms.append(decoded_term)
443
+ return self.adapter.quoted_triple(terms)
444
+
445
+ def decode_quad(self, quad: jelly.RdfQuad) -> Any:
446
+ terms = self.decode_statement(quad, ("subject", "predicate", "object", "graph"))
447
+ return self.adapter.quad(terms)
@@ -0,0 +1,115 @@
1
+ import io
2
+ import os
3
+ from collections.abc import Generator, Iterator
4
+ from itertools import chain
5
+ from typing import IO
6
+
7
+ from google.protobuf.proto import parse, parse_length_prefixed
8
+
9
+ from pyjelly import jelly
10
+ from pyjelly.errors import JellyConformanceError
11
+ from pyjelly.parse.decode import ParserOptions, options_from_frame
12
+
13
+
14
+ def delimited_jelly_hint(header: bytes) -> bool:
15
+ """
16
+ Detect whether a Jelly file is delimited from its first 3 bytes.
17
+
18
+ Truth table (notation: `0A` = `0x0A`, `NN` = `not 0x0A`, `??` = _don't care_):
19
+
20
+ | Byte 1 | Byte 2 | Byte 3 | Result |
21
+ |--------|--------|--------|------------------------------------------|
22
+ | `NN` | `??` | `??` | Delimited |
23
+ | `0A` | `NN` | `??` | Non-delimited |
24
+ | `0A` | `0A` | `NN` | Delimited (size = 10) |
25
+ | `0A` | `0A` | `0A` | Non-delimited (stream options size = 10) |
26
+
27
+ >>> delimited_jelly_hint(bytes([0x00, 0x00, 0x00]))
28
+ True
29
+
30
+ >>> delimited_jelly_hint(bytes([0x00, 0x00, 0x0A]))
31
+ True
32
+
33
+ >>> delimited_jelly_hint(bytes([0x00, 0x0A, 0x00]))
34
+ True
35
+
36
+ >>> delimited_jelly_hint(bytes([0x00, 0x0A, 0x0A]))
37
+ True
38
+
39
+ >>> delimited_jelly_hint(bytes([0x0A, 0x00, 0x00]))
40
+ False
41
+
42
+ >>> delimited_jelly_hint(bytes([0x0A, 0x00, 0x0A]))
43
+ False
44
+
45
+ >>> delimited_jelly_hint(bytes([0x0A, 0x0A, 0x00]))
46
+ True
47
+
48
+ >>> delimited_jelly_hint(bytes([0x0A, 0x0A, 0x0A]))
49
+ False
50
+ """
51
+ magic = 0x0A
52
+ return len(header) >= 3 and ( # noqa: PLR2004
53
+ header[0] != magic or (header[1] == magic and header[2] != magic)
54
+ )
55
+
56
+
57
+ def frame_iterator(inp: IO[bytes]) -> Generator[jelly.RdfStreamFrame]:
58
+ while frame := parse_length_prefixed(jelly.RdfStreamFrame, inp):
59
+ yield frame
60
+
61
+
62
+ def get_options_and_frames(
63
+ inp: IO[bytes],
64
+ ) -> tuple[ParserOptions, Iterator[jelly.RdfStreamFrame]]:
65
+ """
66
+ Return stream options and frames from the buffered binary stream.
67
+
68
+ Args:
69
+ inp (IO[bytes]): jelly buffered binary stream
70
+
71
+ Raises:
72
+ JellyConformanceError: if no non-empty frames detected in the delimited stream
73
+ JellyConformanceError: if non-delimited,
74
+ error is raised if no rows are detected (empty frame)
75
+
76
+ Returns:
77
+ tuple[ParserOptions, Iterator[jelly.RdfStreamFrame]]: ParserOptions holds:
78
+ stream types, lookup presets and other stream options
79
+
80
+ """
81
+ if not inp.seekable():
82
+ # Input may not be seekable (e.g. a network stream) -- then we need to buffer
83
+ # it to determine if it's delimited.
84
+ # See also: https://github.com/Jelly-RDF/pyjelly/issues/298
85
+ inp = io.BufferedReader(inp) # type: ignore[arg-type, type-var, unused-ignore]
86
+ is_delimited = delimited_jelly_hint(inp.peek(3))
87
+ else:
88
+ is_delimited = delimited_jelly_hint(bytes_read := inp.read(3))
89
+ inp.seek(-len(bytes_read), os.SEEK_CUR)
90
+
91
+ if is_delimited:
92
+ first_frame = None
93
+ skipped_frames = []
94
+ frames = frame_iterator(inp)
95
+ for frame in frames:
96
+ if not frame.rows:
97
+ skipped_frames.append(frame)
98
+ else:
99
+ first_frame = frame
100
+ break
101
+ if first_frame is None:
102
+ msg = "No non-empty frames found in the stream"
103
+ raise JellyConformanceError(msg)
104
+
105
+ options = options_from_frame(first_frame, delimited=True)
106
+ return options, chain(skipped_frames, (first_frame,), frames)
107
+
108
+ frame = parse(jelly.RdfStreamFrame, inp.read())
109
+
110
+ if not frame.rows:
111
+ msg = "The stream is corrupted (only contains an empty frame)"
112
+ raise JellyConformanceError(msg)
113
+
114
+ options = options_from_frame(frame, delimited=False)
115
+ return options, iter((frame,))
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import deque
4
+ from dataclasses import dataclass
5
+
6
+ from pyjelly.errors import JellyAssertionError, JellyConformanceError
7
+ from pyjelly.options import MAX_LOOKUP_SIZE
8
+
9
+
10
+ @dataclass
11
+ class LookupDecoder:
12
+ """
13
+ Shared base for RDF lookup encoders using Jelly compression.
14
+
15
+ Tracks the last assigned and last reused index.
16
+
17
+ Parameters
18
+ ----------
19
+ lookup_size
20
+ Maximum lookup size.
21
+
22
+ """
23
+
24
+ last_assigned_index: int
25
+ last_reused_index: int
26
+
27
+ def __init__(self, *, lookup_size: int) -> None:
28
+ if lookup_size > MAX_LOOKUP_SIZE:
29
+ msg = f"lookup size cannot be larger than {MAX_LOOKUP_SIZE}"
30
+ raise JellyAssertionError(msg)
31
+ self.lookup_size = lookup_size
32
+ placeholders = (None,) * lookup_size
33
+ self.data: deque[str | None] = deque(placeholders, maxlen=lookup_size)
34
+ self.last_assigned_index = 0
35
+ self.last_reused_index = 0
36
+
37
+ def assign_entry(self, index: int, value: str) -> None:
38
+ previous_index = self.last_assigned_index
39
+ if index == 0:
40
+ index = previous_index + 1
41
+ assert index > 0
42
+ self.data[index - 1] = value
43
+ self.last_assigned_index = index
44
+
45
+ def at(self, index: int) -> str:
46
+ self.last_reused_index = index
47
+ value = self.data[index - 1]
48
+ if value is None:
49
+ msg = f"invalid resolved index {index}"
50
+ raise IndexError(msg)
51
+ return value
52
+
53
+ def decode_prefix_term_index(self, index: int) -> str:
54
+ actual_index = index or self.last_reused_index
55
+ if actual_index == 0:
56
+ return ""
57
+ return self.at(actual_index)
58
+
59
+ def decode_name_term_index(self, index: int) -> str:
60
+ actual_index = index or self.last_reused_index + 1
61
+ if actual_index == 0:
62
+ msg = "0 is not a valid name term index"
63
+ raise JellyConformanceError(msg)
64
+ return self.at(actual_index)
65
+
66
+ def decode_datatype_term_index(self, index: int) -> str | None:
67
+ if index == 0:
68
+ msg = "0 is not a valid datatype term index"
69
+ raise JellyConformanceError(msg)
70
+ return self.at(index)
File without changes