pyjelly 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjelly might be problematic. Click here for more details.

@@ -1,12 +1,16 @@
1
+ # ruff: noqa: I001
1
2
  from __future__ import annotations
2
-
3
+ from typing import cast
3
4
  from collections.abc import Generator
4
5
  from functools import singledispatch
5
- from typing import IO, Any
6
+ from typing import Any, IO
6
7
  from typing_extensions import override
8
+ from itertools import chain
9
+ from pyjelly.integrations.rdflib.parse import Quad, Triple
7
10
 
8
11
  import rdflib
9
- from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph, QuotedGraph
12
+ from rdflib import Graph
13
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, QuotedGraph
10
14
  from rdflib.serializer import Serializer as RDFLibSerializer
11
15
 
12
16
  from pyjelly import jelly
@@ -18,7 +22,9 @@ from pyjelly.serialize.streams import (
18
22
  SerializerOptions,
19
23
  Stream,
20
24
  TripleStream,
21
- )
25
+ ) # ruff: enable
26
+
27
+ QUAD_ARITY = 4
22
28
 
23
29
 
24
30
  class RDFLibTermEncoder(TermEncoder):
@@ -61,7 +67,10 @@ def namespace_declarations(store: Graph, stream: Stream) -> None:
61
67
 
62
68
 
63
69
  @singledispatch
64
- def stream_frames(stream: Stream, data: Graph) -> Generator[jelly.RdfStreamFrame]: # noqa: ARG001
70
+ def stream_frames(
71
+ stream: Stream,
72
+ data: Graph | Generator[Quad | Triple], # noqa: ARG001
73
+ ) -> Generator[jelly.RdfStreamFrame]:
65
74
  msg = f"invalid stream implementation {stream}"
66
75
  raise TypeError(msg)
67
76
 
@@ -69,14 +78,15 @@ def stream_frames(stream: Stream, data: Graph) -> Generator[jelly.RdfStreamFrame
69
78
  @stream_frames.register(TripleStream)
70
79
  def triples_stream_frames(
71
80
  stream: TripleStream,
72
- data: Graph | Dataset,
81
+ data: Graph | Dataset | Generator[Triple],
73
82
  ) -> Generator[jelly.RdfStreamFrame]:
74
83
  """
75
84
  Serialize a Graph/Dataset into jelly frames.
76
85
 
77
86
  Args:
78
87
  stream (TripleStream): stream that specifies triples processing
79
- data (Graph | Dataset): Graph/Dataset to serialize.
88
+ data (Graph | Dataset | Generator[Triple]):
89
+ Graph/Dataset/Statements to serialize.
80
90
 
81
91
  Notes:
82
92
  if Dataset is given, its graphs are unpacked and iterated over
@@ -87,24 +97,24 @@ def triples_stream_frames(
87
97
 
88
98
  """
89
99
  stream.enroll()
90
- if stream.options.params.namespace_declarations:
100
+ if isinstance(data, Graph) and stream.options.params.namespace_declarations:
91
101
  namespace_declarations(data, stream)
102
+
92
103
  graphs = (data,) if not isinstance(data, Dataset) else data.graphs()
93
104
  for graph in graphs:
94
105
  for terms in graph:
95
106
  if frame := stream.triple(terms):
96
107
  yield frame
97
- # this part turns each graph to a frame for graphs logical type
98
108
  if frame := stream.flow.frame_from_graph():
99
109
  yield frame
100
110
  if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
101
111
  yield frame
102
112
 
103
113
 
104
- @stream_frames.register
114
+ @stream_frames.register(QuadStream)
105
115
  def quads_stream_frames(
106
116
  stream: QuadStream,
107
- data: Dataset,
117
+ data: Dataset | Generator[Quad],
108
118
  ) -> Generator[jelly.RdfStreamFrame]:
109
119
  """
110
120
  Serialize a Dataset into jelly frames.
@@ -114,17 +124,23 @@ def quads_stream_frames(
114
124
 
115
125
  Args:
116
126
  stream (QuadStream): stream that specifies quads processing
117
- data (Dataset): Dataset to serialize.
127
+ data (Dataset | Generator[Quad]): Dataset to serialize.
118
128
 
119
129
  Yields:
120
130
  Generator[jelly.RdfStreamFrame]: jelly frames
121
131
 
122
132
  """
123
- assert isinstance(data, Dataset)
124
133
  stream.enroll()
125
134
  if stream.options.params.namespace_declarations:
126
- namespace_declarations(data, stream)
127
- for terms in data.quads():
135
+ namespace_declarations(data, stream) # type: ignore[arg-type]
136
+
137
+ iterator: Generator[Quad, None, None]
138
+ if isinstance(data, Dataset):
139
+ iterator = cast(Generator[Quad, None, None], data.quads())
140
+ else:
141
+ iterator = data
142
+
143
+ for terms in iterator:
128
144
  if frame := stream.quad(terms):
129
145
  yield frame
130
146
  if frame := stream.flow.frame_from_dataset():
@@ -133,10 +149,10 @@ def quads_stream_frames(
133
149
  yield frame
134
150
 
135
151
 
136
- @stream_frames.register
152
+ @stream_frames.register(GraphStream)
137
153
  def graphs_stream_frames(
138
154
  stream: GraphStream,
139
- data: Dataset,
155
+ data: Dataset | Generator[Quad],
140
156
  ) -> Generator[jelly.RdfStreamFrame]:
141
157
  """
142
158
  Serialize a Dataset into jelly frames as a stream of graphs.
@@ -147,24 +163,76 @@ def graphs_stream_frames(
147
163
 
148
164
  Args:
149
165
  stream (GraphStream): stream that specifies graphs processing
150
- data (Dataset): Dataset to serialize.
166
+ data (Dataset | Generator[Quad]): Dataset to serialize.
151
167
 
152
168
  Yields:
153
169
  Generator[jelly.RdfStreamFrame]: jelly frames
154
170
 
155
171
  """
156
- assert isinstance(data, Dataset)
157
172
  stream.enroll()
158
173
  if stream.options.params.namespace_declarations:
159
- namespace_declarations(data, stream)
160
- for graph in data.graphs():
174
+ namespace_declarations(data, stream) # type: ignore[arg-type]
175
+
176
+ if isinstance(data, Dataset):
177
+ graphs = data.graphs()
178
+ else:
179
+ ds = Dataset()
180
+ for quad in data:
181
+ ctx = ds.get_context(quad.g)
182
+ ctx.add((quad.s, quad.p, quad.o))
183
+ graphs = ds.graphs()
184
+
185
+ for graph in graphs:
161
186
  yield from stream.graph(graph_id=graph.identifier, graph=graph)
187
+
162
188
  if frame := stream.flow.frame_from_dataset():
163
189
  yield frame
164
190
  if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
165
191
  yield frame
166
192
 
167
193
 
194
+ def guess_options(sink: Graph | Dataset) -> SerializerOptions:
195
+ """
196
+ Guess the serializer options based on the store type.
197
+
198
+ >>> guess_options(Graph()).logical_type
199
+ 1
200
+ >>> guess_options(Dataset()).logical_type
201
+ 2
202
+ """
203
+ logical_type = (
204
+ jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
205
+ if isinstance(sink, Dataset)
206
+ else jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
207
+ )
208
+ return SerializerOptions(logical_type=logical_type)
209
+
210
+
211
+ def guess_stream(options: SerializerOptions, sink: Graph | Dataset) -> Stream:
212
+ """
213
+ Return an appropriate stream implementation for the given options.
214
+
215
+ Notes: if base(!) logical type is GRAPHS and Dataset is given,
216
+ initializes TripleStream
217
+
218
+ >>> graph_ser = RDFLibJellySerializer(Graph())
219
+ >>> ds_ser = RDFLibJellySerializer(Dataset())
220
+
221
+ >>> type(guess_stream(guess_options(graph_ser.store), graph_ser.store))
222
+ <class 'pyjelly.serialize.streams.TripleStream'>
223
+ >>> type(guess_stream(guess_options(ds_ser.store), ds_ser.store))
224
+ <class 'pyjelly.serialize.streams.QuadStream'>
225
+ """
226
+ stream_cls: type[Stream]
227
+ if (options.logical_type % 10) != jelly.LOGICAL_STREAM_TYPE_GRAPHS and isinstance(
228
+ sink, Dataset
229
+ ):
230
+ stream_cls = QuadStream
231
+ else:
232
+ stream_cls = TripleStream
233
+ return stream_cls.for_rdflib(options=options)
234
+
235
+
168
236
  class RDFLibJellySerializer(RDFLibSerializer):
169
237
  """
170
238
  RDFLib serializer for writing graphs in Jelly RDF stream format.
@@ -180,43 +248,6 @@ class RDFLibJellySerializer(RDFLibSerializer):
180
248
  raise NotImplementedError(msg)
181
249
  super().__init__(store)
182
250
 
183
- def guess_options(self) -> SerializerOptions:
184
- """
185
- Guess the serializer options based on the store type.
186
-
187
- >>> RDFLibJellySerializer(Graph()).guess_options().logical_type
188
- 1
189
- >>> RDFLibJellySerializer(Dataset()).guess_options().logical_type
190
- 2
191
- """
192
- logical_type = (
193
- jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
194
- if isinstance(self.store, Dataset)
195
- else jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
196
- )
197
- return SerializerOptions(logical_type=logical_type)
198
-
199
- def guess_stream(self, options: SerializerOptions) -> Stream:
200
- """
201
- Return an appropriate stream implementation for the given options.
202
-
203
- >>> graph_ser = RDFLibJellySerializer(Graph())
204
- >>> ds_ser = RDFLibJellySerializer(Dataset())
205
-
206
- >>> type(graph_ser.guess_stream(graph_ser.guess_options()))
207
- <class 'pyjelly.serialize.streams.TripleStream'>
208
- >>> type(ds_ser.guess_stream(ds_ser.guess_options()))
209
- <class 'pyjelly.serialize.streams.QuadStream'>
210
- """
211
- stream_cls: type[Stream]
212
- if options.logical_type != jelly.LOGICAL_STREAM_TYPE_GRAPHS and isinstance(
213
- self.store, Dataset
214
- ):
215
- stream_cls = QuadStream
216
- else:
217
- stream_cls = TripleStream
218
- return stream_cls.for_rdflib(options=options)
219
-
220
251
  @override
221
252
  def serialize( # type: ignore[override]
222
253
  self,
@@ -240,9 +271,107 @@ class RDFLibJellySerializer(RDFLibSerializer):
240
271
 
241
272
  """
242
273
  if options is None:
243
- options = self.guess_options()
274
+ options = guess_options(self.store)
244
275
  if stream is None:
245
- stream = self.guess_stream(options)
276
+ stream = guess_stream(options, self.store)
246
277
  write = write_delimited if stream.options.params.delimited else write_single
247
278
  for stream_frame in stream_frames(stream, self.store):
248
279
  write(stream_frame, out)
280
+
281
+
282
+ def grouped_stream_to_frames(
283
+ sink_generator: Generator[Graph] | Generator[Dataset],
284
+ options: SerializerOptions | None = None,
285
+ ) -> Generator[jelly.RdfStreamFrame]:
286
+ """
287
+ Transform Graphs/Datasets into Jelly frames, one frame per Graph/Dataset.
288
+
289
+ Note: options are guessed if not provided.
290
+
291
+ Args:
292
+ sink_generator (Generator[Graph] | Generator[Dataset]): Generator of
293
+ Graphs/Dataset to transform.
294
+ options (SerializerOptions | None, optional): stream options to use.
295
+ Options are guessed based on the sink store type. Defaults to None.
296
+
297
+ Yields:
298
+ Generator[jelly.RdfStreamFrame]: produced Jelly frames
299
+
300
+ """
301
+ stream = None
302
+ for sink in sink_generator:
303
+ if not stream:
304
+ if options is None:
305
+ options = guess_options(sink)
306
+ stream = guess_stream(options, sink)
307
+ yield from stream_frames(stream, sink)
308
+
309
+
310
+ def grouped_stream_to_file(
311
+ stream: Generator[Graph] | Generator[Dataset],
312
+ output_file: IO[bytes],
313
+ **kwargs: Any,
314
+ ) -> None:
315
+ """
316
+ Write stream of Graphs/Datasets to a binary file.
317
+
318
+ Args:
319
+ stream (Generator[Graph] | Generator[Dataset]): Generator of
320
+ Graphs/Dataset to transform.
321
+ output_file (IO[bytes]): output buffered writer.
322
+ **kwargs (Any): options to pass to stream.
323
+
324
+ """
325
+ for frame in grouped_stream_to_frames(stream, **kwargs):
326
+ write_delimited(frame, output_file)
327
+
328
+
329
+ def flat_stream_to_frames(
330
+ statements: Generator[Triple | Quad],
331
+ options: SerializerOptions | None = None,
332
+ ) -> Generator[jelly.RdfStreamFrame]:
333
+ """
334
+ Serialize a stream of raw triples or quads into Jelly frames.
335
+
336
+ Args:
337
+ statements (Generator[Triple | Quad]):
338
+ s/p/o triples or s/p/o/g quads to serialize.
339
+ options (SerializerOptions | None, optional):
340
+ if omitted, guessed based on the first tuple.
341
+
342
+ Yields:
343
+ Generator[jelly.RdfStreamFrame]: generated frames.
344
+
345
+ """
346
+ first = next(statements, None)
347
+ if first is None:
348
+ return
349
+
350
+ sink = Dataset() if len(first) == QUAD_ARITY else Graph()
351
+ if options is None:
352
+ options = guess_options(sink)
353
+ stream = guess_stream(options, sink)
354
+
355
+ combined: Generator[Triple | Quad] | Graph = (
356
+ item for item in chain([first], statements)
357
+ )
358
+
359
+ yield from stream_frames(stream, combined)
360
+
361
+
362
+ def flat_stream_to_file(
363
+ statements: Generator[Triple | Quad],
364
+ output_file: IO[bytes],
365
+ options: SerializerOptions | None = None,
366
+ ) -> None:
367
+ """
368
+ Write Triple or Quad events to a binary file in Jelly flat format.
369
+
370
+ Args:
371
+ statements (Generator[Triple | Quad]): statements to serialize.
372
+ output_file (IO[bytes]): output buffered writer.
373
+ options (SerializerOptions | None, optional): stream options.
374
+
375
+ """
376
+ for frame in flat_stream_to_frames(statements, options):
377
+ write_delimited(frame, output_file)
pyjelly/jelly/rdf_pb2.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # Generated by the protocol buffer compiler. DO NOT EDIT!
3
3
  # NO CHECKED-IN PROTOBUF GENCODE
4
4
  # source: rdf.proto
5
- # Protobuf Python Version: 5.29.0
5
+ # Protobuf Python Version: 6.31.0
6
6
  """Generated protocol buffer code."""
7
7
  from google.protobuf import descriptor as _descriptor
8
8
  from google.protobuf import descriptor_pool as _descriptor_pool
@@ -11,8 +11,8 @@ from google.protobuf import symbol_database as _symbol_database
11
11
  from google.protobuf.internal import builder as _builder
12
12
  _runtime_version.ValidateProtobufRuntimeVersion(
13
13
  _runtime_version.Domain.PUBLIC,
14
- 5,
15
- 29,
14
+ 6,
15
+ 31,
16
16
  0,
17
17
  '',
18
18
  'rdf.proto'
pyjelly/jelly/rdf_pb2.pyi CHANGED
@@ -2,7 +2,8 @@ from google.protobuf.internal import containers as _containers
2
2
  from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
3
3
  from google.protobuf import descriptor as _descriptor
4
4
  from google.protobuf import message as _message
5
- from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
5
+ from collections.abc import Iterable as _Iterable, Mapping as _Mapping
6
+ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
6
7
 
7
8
  DESCRIPTOR: _descriptor.FileDescriptor
8
9
 
pyjelly/options.py CHANGED
@@ -15,6 +15,7 @@ from pyjelly.errors import (
15
15
  MIN_NAME_LOOKUP_SIZE: Final[int] = 8
16
16
 
17
17
  MAX_LOOKUP_SIZE: Final[int] = 4096
18
+ MIN_VERSION: Final[int] = 1
18
19
  MAX_VERSION: Final[int] = 2
19
20
 
20
21
  DEFAULT_NAME_LOOKUP_SIZE: Final[int] = 4000
@@ -97,6 +98,14 @@ class StreamParameters:
97
98
  namespace_declarations: bool = False
98
99
  stream_name: str = ""
99
100
 
101
+ def __post_init__(self) -> None:
102
+ selected = MAX_VERSION if self.namespace_declarations else MIN_VERSION
103
+ if not (MIN_VERSION <= selected <= MAX_VERSION):
104
+ msg = f"""Error occured while settin up the Stream options.
105
+ Version must be between {MIN_VERSION} and {MAX_VERSION}."""
106
+ raise JellyConformanceError(msg)
107
+ object.__setattr__(self, "version", selected)
108
+
100
109
 
101
110
  TRIPLES_ONLY_LOGICAL_TYPES = {
102
111
  jelly.LOGICAL_STREAM_TYPE_GRAPHS,
pyjelly/parse/decode.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from abc import ABCMeta, abstractmethod
4
- from collections.abc import Iterable, Sequence
4
+ from collections.abc import Iterable, Iterator, Sequence
5
5
  from enum import Enum, auto
6
6
  from typing import Any, ClassVar, NamedTuple
7
7
  from typing_extensions import Never
@@ -66,6 +66,8 @@ def options_from_frame(
66
66
  ),
67
67
  params=StreamParameters(
68
68
  stream_name=options.stream_name,
69
+ generalized_statements=options.generalized_statements,
70
+ rdf_star=options.rdf_star,
69
71
  version=options.version,
70
72
  delimited=delimited,
71
73
  ),
@@ -134,6 +136,11 @@ class Adapter(metaclass=ABCMeta):
134
136
  stream_types=self.options.stream_types,
135
137
  )
136
138
 
139
+ def quoted_triple(self, terms: Iterable[Any]) -> Any: # noqa: ARG002
140
+ _adapter_missing(
141
+ "decoding quoted triple", stream_types=self.options.stream_types
142
+ )
143
+
137
144
  def frame(self) -> Any:
138
145
  return None
139
146
 
@@ -166,23 +173,23 @@ class Decoder:
166
173
  def options(self) -> ParserOptions:
167
174
  return self.adapter.options
168
175
 
169
- def decode_frame(self, frame: jelly.RdfStreamFrame) -> Any:
176
+ def iter_rows(self, frame: jelly.RdfStreamFrame) -> Iterator[Any]:
170
177
  """
171
- Decode a frame to custom object based on adapter implementation.
178
+ Iterate through rows in the frame.
172
179
 
173
180
  Args:
174
181
  frame (jelly.RdfStreamFrame): jelly frame
175
-
176
- Returns:
177
- Any: custom obj based on adapter logic
182
+ Yields:
183
+ Iterator[Any]: decoded rows
178
184
 
179
185
  """
180
186
  for row_owner in frame.rows:
181
187
  row = getattr(row_owner, row_owner.WhichOneof("row"))
182
- self.decode_row(row)
183
- if self.adapter.parsing_mode is ParsingMode.GROUPED:
184
- return self.adapter.frame()
185
- return None
188
+ decoded_row = self.decode_row(row)
189
+ if isinstance(
190
+ row, (jelly.RdfTriple, jelly.RdfQuad, jelly.RdfNamespaceDeclaration)
191
+ ):
192
+ yield decoded_row
186
193
 
187
194
  def decode_row(self, row: Any) -> Any | None:
188
195
  """
@@ -383,6 +390,20 @@ class Decoder:
383
390
  terms = self.decode_statement(triple, ("subject", "predicate", "object"))
384
391
  return self.adapter.triple(terms)
385
392
 
393
+ def decode_quoted_triple(self, triple: jelly.RdfTriple) -> Any:
394
+ oneofs: Sequence[str] = ("subject", "predicate", "object")
395
+ terms = []
396
+ for oneof in oneofs:
397
+ field = triple.WhichOneof(oneof)
398
+ if field:
399
+ jelly_term = getattr(triple, field)
400
+ decoded_term = self.decode_term(jelly_term)
401
+ else:
402
+ msg = "repeated terms are not allowed in quoted triples"
403
+ raise ValueError(msg)
404
+ terms.append(decoded_term)
405
+ return self.adapter.quoted_triple(terms)
406
+
386
407
  def decode_quad(self, quad: jelly.RdfQuad) -> Any:
387
408
  terms = self.decode_statement(quad, ("subject", "predicate", "object", "graph"))
388
409
  return self.adapter.quad(terms)
@@ -405,4 +426,5 @@ class Decoder:
405
426
  str: decode_bnode,
406
427
  jelly.RdfLiteral: decode_literal,
407
428
  jelly.RdfDefaultGraph: decode_default_graph,
429
+ jelly.RdfTriple: decode_quoted_triple,
408
430
  }
pyjelly/parse/ioutils.py CHANGED
@@ -55,8 +55,7 @@ def delimited_jelly_hint(header: bytes) -> bool:
55
55
 
56
56
  def frame_iterator(inp: IO[bytes]) -> Generator[jelly.RdfStreamFrame]:
57
57
  while frame := parse_length_prefixed(jelly.RdfStreamFrame, inp):
58
- if frame.rows:
59
- yield frame
58
+ yield frame
60
59
 
61
60
 
62
61
  def get_options_and_frames(
@@ -82,14 +81,21 @@ def get_options_and_frames(
82
81
  inp.seek(-len(bytes_read), os.SEEK_CUR)
83
82
 
84
83
  if is_delimited:
84
+ first_frame = None
85
+ skipped_frames = []
85
86
  frames = frame_iterator(inp)
86
- first_frame = next(frames, None)
87
+ for frame in frames:
88
+ if not frame.rows:
89
+ skipped_frames.append(frame)
90
+ else:
91
+ first_frame = frame
92
+ break
87
93
  if first_frame is None:
88
94
  msg = "No non-empty frames found in the stream"
89
95
  raise JellyConformanceError(msg)
90
96
 
91
97
  options = options_from_frame(first_frame, delimited=True)
92
- return options, chain((first_frame,), frames)
98
+ return options, chain(skipped_frames, (first_frame,), frames)
93
99
 
94
100
  frame = parse(jelly.RdfStreamFrame, inp.read())
95
101
 
@@ -32,9 +32,8 @@ def split_iri(iri_string: str) -> tuple[str, str]:
32
32
 
33
33
  T = TypeVar("T")
34
34
  RowsAnd: TypeAlias = tuple[Sequence[jelly.RdfStreamRow], T]
35
- RowsAndTerm: TypeAlias = (
36
- "RowsAnd[jelly.RdfIri | jelly.RdfLiteral | str | jelly.RdfDefaultGraph]"
37
- )
35
+ RowsAndTerm: TypeAlias = "RowsAnd[jelly.RdfIri | jelly.RdfLiteral | str | \
36
+ jelly.RdfDefaultGraph | jelly.RdfTriple]"
38
37
 
39
38
 
40
39
  class TermEncoder:
@@ -43,6 +42,7 @@ class TermEncoder:
43
42
  jelly.RdfLiteral: "literal",
44
43
  str: "bnode",
45
44
  jelly.RdfDefaultGraph: "default_graph",
45
+ jelly.RdfTriple: "triple_term",
46
46
  }
47
47
 
48
48
  def __init__(
@@ -163,6 +163,33 @@ class TermEncoder:
163
163
  datatype=datatype_id,
164
164
  )
165
165
 
166
+ def encode_quoted_triple(self, terms: Iterable[object]) -> RowsAndTerm:
167
+ """
168
+ Encode a quoted triple.
169
+
170
+ Notes:
171
+ Although a triple, it is treated as a part of a statement.
172
+ Repeated terms are not used when encoding quoted triples.
173
+
174
+ Args:
175
+ terms (Iterable[object]): triple terms to encode.
176
+
177
+ Returns:
178
+ RowsAndTerm: additional stream rows with preceeding
179
+ information (prefixes, names, datatypes rows, if any)
180
+ and the encoded triple row.
181
+
182
+ """
183
+ statement: dict[str, Any] = {}
184
+ rows: list[jelly.RdfStreamRow] = []
185
+ for slot, term in zip(Slot, terms):
186
+ extra_rows, value = self.encode_any(term, slot)
187
+ oneof = self.TERM_ONEOF_NAMES[type(value)]
188
+ rows.extend(extra_rows)
189
+ field = f"{slot}_{oneof}"
190
+ statement[field] = value
191
+ return rows, jelly.RdfTriple(**statement)
192
+
166
193
  def encode_any(self, term: object, slot: Slot) -> RowsAndTerm:
167
194
  msg = f"unsupported term type: {type(term)}"
168
195
  raise NotImplementedError(msg)
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from collections import UserList
4
4
  from collections.abc import Iterable
5
5
  from dataclasses import dataclass
6
- from typing import ClassVar
6
+ from typing import Any, ClassVar
7
7
  from typing_extensions import override
8
8
 
9
9
  from pyjelly import jelly
@@ -16,11 +16,24 @@ class FrameFlow(UserList[jelly.RdfStreamRow]):
16
16
  Abstract base class for producing Jelly frames from RDF stream rows.
17
17
 
18
18
  Collects stream rows and assembles them into RdfStreamFrame objects when ready.
19
+
20
+ Allows for passing LogicalStreamType, required for
21
+ logical subtypes and non-delimited streams.
19
22
  """
20
23
 
21
24
  logical_type: jelly.LogicalStreamType
22
25
  registry: ClassVar[dict[jelly.LogicalStreamType, type[FrameFlow]]] = {}
23
26
 
27
+ def __init__(
28
+ self,
29
+ initlist: Iterable[jelly.RdfStreamRow] | None = None,
30
+ *,
31
+ logical_type: jelly.LogicalStreamType | None = None,
32
+ **__kwargs: Any,
33
+ ) -> None:
34
+ super().__init__(initlist)
35
+ self.logical_type = logical_type or self.__class__.logical_type
36
+
24
37
  def frame_from_graph(self) -> jelly.RdfStreamFrame | None:
25
38
  """
26
39
  Treat the current rows as a graph and produce a frame.
@@ -71,15 +84,6 @@ class ManualFrameFlow(FrameFlow):
71
84
 
72
85
  logical_type = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
73
86
 
74
- def __init__(
75
- self,
76
- initlist: Iterable[jelly.RdfStreamRow] | None = None,
77
- *,
78
- logical_type: jelly.LogicalStreamType = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED,
79
- ) -> None:
80
- super().__init__(initlist)
81
- self.logical_type = logical_type
82
-
83
87
 
84
88
  @dataclass
85
89
  class BoundedFrameFlow(FrameFlow):
@@ -92,13 +96,15 @@ class BoundedFrameFlow(FrameFlow):
92
96
  logical_type = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
93
97
  frame_size: int
94
98
 
99
+ @override
95
100
  def __init__(
96
101
  self,
97
102
  initlist: Iterable[jelly.RdfStreamRow] | None = None,
103
+ logical_type: jelly.LogicalStreamType | None = None,
98
104
  *,
99
105
  frame_size: int | None = None,
100
106
  ) -> None:
101
- super().__init__(initlist)
107
+ super().__init__(initlist, logical_type=logical_type)
102
108
  self.frame_size = frame_size or DEFAULT_FRAME_SIZE
103
109
 
104
110
  @override
@@ -153,7 +159,6 @@ class DatasetsFrameFlow(FrameFlow):
153
159
  return self.to_stream_frame()
154
160
 
155
161
 
156
- # TODO(Nastya): issue #184
157
162
  FLOW_DISPATCH: dict[jelly.LogicalStreamType, type[FrameFlow]] = {
158
163
  jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES: FlatTriplesFrameFlow,
159
164
  jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS: FlatQuadsFrameFlow,
@@ -166,18 +171,23 @@ def flow_for_type(logical_type: jelly.LogicalStreamType) -> type[FrameFlow]:
166
171
  """
167
172
  Return flow based on logical type requested.
168
173
 
174
+ Note: uses base logical type for subtypes (i.e., SUBJECT_GRAPHS uses
175
+ the same flow as its base type GRAPHS).
176
+
169
177
  Args:
170
178
  logical_type (jelly.LogicalStreamType): logical type requested.
171
179
 
172
180
  Raises:
173
- NotImplementedError: if logical type not supported.
181
+ NotImplementedError: if (base) logical stream type is not supported.
174
182
 
175
183
  Returns:
176
184
  type[FrameFlow]: FrameFlow for respective logical type.
177
185
 
178
186
  """
179
187
  try:
180
- return FLOW_DISPATCH[logical_type]
188
+ base_logical_type_value = logical_type % 10
189
+ base_name = jelly.LogicalStreamType.Name(base_logical_type_value)
190
+ return FLOW_DISPATCH[getattr(jelly.LogicalStreamType, base_name)]
181
191
  except KeyError:
182
192
  msg = (
183
193
  "unsupported logical stream type: "