pyjelly 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjelly might be problematic. Click here for more details.

@@ -12,8 +12,8 @@ from rdflib.store import Store
12
12
 
13
13
  from pyjelly import jelly
14
14
  from pyjelly.errors import JellyConformanceError
15
- from pyjelly.options import StreamOptions
16
- from pyjelly.parse.decode import Adapter, Decoder
15
+ from pyjelly.options import StreamTypes
16
+ from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
17
17
  from pyjelly.parse.ioutils import get_options_and_frames
18
18
 
19
19
 
@@ -40,11 +40,9 @@ class RDFLibAdapter(Adapter):
40
40
  return rdflib.Literal(lex, lang=language, datatype=datatype)
41
41
 
42
42
 
43
- def _adapter_missing(feature: str, *, options: StreamOptions) -> Never:
44
- physical_type_name = jelly.PhysicalStreamType.Name(
45
- options.stream_types.physical_type
46
- )
47
- logical_type_name = jelly.LogicalStreamType.Name(options.stream_types.logical_type)
43
+ def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
44
+ physical_type_name = jelly.PhysicalStreamType.Name(stream_types.physical_type)
45
+ logical_type_name = jelly.LogicalStreamType.Name(stream_types.logical_type)
48
46
  msg = (
49
47
  f"adapter with {physical_type_name} and {logical_type_name} "
50
48
  f"does not implement {feature}"
@@ -55,9 +53,9 @@ def _adapter_missing(feature: str, *, options: StreamOptions) -> Never:
55
53
  class RDFLibTriplesAdapter(RDFLibAdapter):
56
54
  graph: Graph
57
55
 
58
- def __init__(self, graph: Graph, options: StreamOptions) -> None:
56
+ def __init__(self, options: ParserOptions, store: Store | str = "default") -> None:
59
57
  super().__init__(options=options)
60
- self.graph = graph
58
+ self.graph = Graph(store=store)
61
59
 
62
60
  @override
63
61
  def triple(self, terms: Iterable[Any]) -> Any:
@@ -77,26 +75,39 @@ class RDFLibTriplesAdapter(RDFLibAdapter):
77
75
  jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES,
78
76
  ):
79
77
  return None
80
- return _adapter_missing("interpreting frames", options=self.options)
78
+ return _adapter_missing(
79
+ "interpreting frames",
80
+ stream_types=self.options.stream_types,
81
+ )
81
82
 
82
83
 
83
84
  class RDFLibQuadsBaseAdapter(RDFLibAdapter):
84
- def __init__(self, dataset: Dataset, options: StreamOptions) -> None:
85
+ def __init__(
86
+ self,
87
+ options: ParserOptions,
88
+ store: Store | str,
89
+ ) -> None:
85
90
  super().__init__(options=options)
86
- self.dataset = dataset
91
+ self.store = store
92
+ self.dataset = self.new_dataset()
93
+
94
+ def new_dataset(self) -> Dataset:
95
+ return Dataset(store=self.store, default_union=True)
87
96
 
88
97
  @override
89
98
  def frame(self) -> Dataset | None:
90
99
  if self.options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_DATASETS:
91
100
  this_dataset = self.dataset
92
- self.dataset = Dataset(store=self.dataset.store)
101
+ self.dataset = self.new_dataset()
93
102
  return this_dataset
94
103
  if self.options.stream_types.logical_type in (
95
104
  jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED,
96
105
  jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS,
97
106
  ):
98
107
  return None
99
- return _adapter_missing("interpreting frames", options=self.options)
108
+ return _adapter_missing(
109
+ "interpreting frames", stream_types=self.options.stream_types
110
+ )
100
111
 
101
112
 
102
113
  class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
@@ -112,8 +123,12 @@ class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
112
123
  class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
113
124
  _graph: Graph | None = None
114
125
 
115
- def __init__(self, dataset: Dataset, options: StreamOptions) -> None:
116
- super().__init__(dataset=dataset, options=options)
126
+ def __init__(
127
+ self,
128
+ options: ParserOptions,
129
+ store: Store | str,
130
+ ) -> None:
131
+ super().__init__(options=options, store=store)
117
132
  self._graph = None
118
133
 
119
134
  @property
@@ -144,90 +159,134 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
144
159
  if self.options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_DATASETS:
145
160
  this_dataset = self.dataset
146
161
  self._graph = None
147
- self.dataset = Dataset(store=self.dataset.store)
162
+ self.dataset = self.new_dataset()
148
163
  return this_dataset
149
164
  return super().frame()
150
165
 
151
166
 
152
- def parse_flat_stream(
167
+ def parse_flat_triples_stream(
153
168
  frames: Iterable[jelly.RdfStreamFrame],
154
- sink: Graph,
155
- options: StreamOptions,
169
+ options: ParserOptions,
170
+ store: Store | str = "default",
171
+ identifier: str | None = None,
156
172
  ) -> Dataset | Graph:
157
- assert options.stream_types.flat
158
- ds = None
159
-
160
- adapter: Adapter
161
- if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
162
- adapter = RDFLibTriplesAdapter(graph=sink, options=options)
163
- else:
164
- ds = Dataset(store=sink.store, default_union=True)
165
- ds.default_context = sink
166
-
167
- if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
168
- adapter = RDFLibQuadsAdapter(dataset=ds, options=options)
169
-
170
- else: # jelly.PHYSICAL_STREAM_TYPE_GRAPHS
171
- adapter = RDFLibGraphsAdapter(dataset=ds, options=options)
173
+ assert options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
174
+ adapter = RDFLibTriplesAdapter(options, store=store)
175
+ if identifier is not None:
176
+ adapter.graph = Graph(identifier=identifier, store=store)
172
177
  decoder = Decoder(adapter=adapter)
173
178
  for frame in frames:
174
179
  decoder.decode_frame(frame=frame)
175
- return ds or sink
180
+ return adapter.graph
176
181
 
177
182
 
178
- def parse_grouped_graph_stream(
183
+ def parse_flat_quads_stream(
179
184
  frames: Iterable[jelly.RdfStreamFrame],
180
- sink: Graph,
181
- options: StreamOptions,
185
+ options: ParserOptions,
186
+ store: Store | str = "default",
187
+ identifier: str | None = None,
182
188
  ) -> Dataset:
183
- adapter = RDFLibTriplesAdapter(graph=sink, options=options)
184
- ds = Dataset(store=sink.store, default_union=True)
185
- ds.default_context = sink
189
+ assert options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
190
+ adapter_class: type[RDFLibQuadsBaseAdapter]
191
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
192
+ adapter_class = RDFLibQuadsAdapter
193
+ else: # jelly.PHYSICAL_STREAM_TYPE_GRAPHS
194
+ adapter_class = RDFLibGraphsAdapter
195
+ adapter = adapter_class(options=options, store=store)
196
+ adapter.dataset.default_context = Graph(identifier=identifier, store=store)
186
197
  decoder = Decoder(adapter=adapter)
187
198
  for frame in frames:
188
- graph = decoder.decode_frame(frame=frame)
189
- ds.add_graph(graph)
190
- return ds
199
+ decoder.decode_frame(frame=frame)
200
+ return adapter.dataset
191
201
 
192
202
 
193
- def parse_grouped_dataset_stream(
203
+ def parse_graph_stream(
194
204
  frames: Iterable[jelly.RdfStreamFrame],
195
- options: StreamOptions,
205
+ options: ParserOptions,
196
206
  store: Store | str = "default",
197
- ) -> Generator[Dataset]:
198
- adapter = RDFLibGraphsAdapter(dataset=Dataset(store=store), options=options)
207
+ ) -> Generator[Graph]:
208
+ assert options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_GRAPHS
209
+ adapter = RDFLibTriplesAdapter(options, store=store)
199
210
  decoder = Decoder(adapter=adapter)
200
211
  for frame in frames:
201
212
  yield decoder.decode_frame(frame=frame)
202
213
 
203
214
 
204
- def graph_or_dataset_from_jelly(
215
+ def graphs_from_jelly(
205
216
  inp: IO[bytes],
206
- sink: Graph,
207
- ) -> Dataset | Graph:
217
+ store: Store | str = "default",
218
+ ) -> Generator[Any] | Generator[Dataset] | Generator[Graph]:
208
219
  options, frames = get_options_and_frames(inp)
209
220
 
210
- if options.stream_types.flat:
211
- return parse_flat_stream(frames=frames, sink=sink, options=options)
221
+ if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES:
222
+ yield parse_flat_triples_stream(frames=frames, options=options, store=store)
223
+ return
212
224
 
213
- if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
214
- return parse_grouped_graph_stream(frames=frames, sink=sink, options=options)
225
+ if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS:
226
+ yield parse_flat_quads_stream(frames=frames, options=options, store=store)
227
+ return
215
228
 
216
- msg = (
217
- "the stream contains multiple datasets and cannot be parsed into "
218
- "a single dataset"
219
- )
229
+ if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_GRAPHS:
230
+ yield from parse_graph_stream(frames=frames, options=options, store=store)
231
+ return
232
+
233
+ logical_type_name = jelly.LogicalStreamType.Name(options.stream_types.logical_type)
234
+ msg = f"the stream type {logical_type_name} is not supported "
235
+ raise NotImplementedError(msg)
236
+
237
+
238
+ def graph_from_jelly(
239
+ inp: IO[bytes],
240
+ store: Store | str = "default",
241
+ identifier: str | None = None,
242
+ ) -> Any | Dataset | Graph:
243
+ options, frames = get_options_and_frames(inp)
244
+
245
+ if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_DATASETS:
246
+ msg = (
247
+ "the stream contains multiple datasets and cannot be parsed into "
248
+ "a single dataset"
249
+ )
250
+ raise NotImplementedError(msg)
251
+
252
+ if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES:
253
+ return parse_flat_triples_stream(
254
+ frames=frames,
255
+ options=options,
256
+ store=store,
257
+ identifier=identifier,
258
+ )
259
+
260
+ if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS:
261
+ return parse_flat_quads_stream(
262
+ frames=frames,
263
+ options=options,
264
+ store=store,
265
+ identifier=identifier,
266
+ )
267
+
268
+ if options.stream_types.logical_type == jelly.LOGICAL_STREAM_TYPE_GRAPHS:
269
+ ds = Dataset(store=store, default_union=True)
270
+ ds.default_context = Graph(identifier=identifier, store=store)
271
+
272
+ for graph in parse_graph_stream(frames=frames, options=options, store=store):
273
+ ds.add_graph(graph)
274
+
275
+ return ds
276
+
277
+ logical_type_name = jelly.LogicalStreamType.Name(options.stream_types.logical_type)
278
+ msg = f"the stream type {logical_type_name} is not supported "
220
279
  raise NotImplementedError(msg)
221
280
 
222
281
 
223
282
  class RDFLibJellyParser(RDFLibParser):
224
- def parse(
225
- self,
226
- source: InputSource,
227
- sink: Graph,
228
- ) -> None:
283
+ def parse(self, source: InputSource, sink: Graph) -> None:
229
284
  inp = source.getByteStream() # type: ignore[no-untyped-call]
230
285
  if inp is None:
231
286
  msg = "expected source to be a stream of bytes"
232
287
  raise TypeError(msg)
233
- graph_or_dataset_from_jelly(inp, sink=sink)
288
+ graph_from_jelly(
289
+ inp,
290
+ identifier=sink.identifier,
291
+ store=sink.store,
292
+ )
@@ -12,7 +12,13 @@ from rdflib.serializer import Serializer as RDFLibSerializer
12
12
  from pyjelly import jelly
13
13
  from pyjelly.serialize.encode import RowsAndTerm, Slot, TermEncoder
14
14
  from pyjelly.serialize.ioutils import write_delimited, write_single
15
- from pyjelly.serialize.streams import GraphStream, QuadStream, Stream, TripleStream
15
+ from pyjelly.serialize.streams import (
16
+ GraphStream,
17
+ QuadStream,
18
+ SerializerOptions,
19
+ Stream,
20
+ TripleStream,
21
+ )
16
22
 
17
23
 
18
24
  class RDFLibTermEncoder(TermEncoder):
@@ -49,44 +55,57 @@ def stream_frames(stream: Stream, data: Graph) -> Generator[jelly.RdfStreamFrame
49
55
  raise TypeError(msg)
50
56
 
51
57
 
52
- @stream_frames.register
53
- def triples_stream(
58
+ @stream_frames.register(TripleStream)
59
+ def triples_stream_frames(
54
60
  stream: TripleStream,
55
- data: Graph,
61
+ data: Graph | Dataset,
56
62
  ) -> Generator[jelly.RdfStreamFrame]:
57
- assert not isinstance(data, Dataset)
58
63
  stream.enroll()
59
- if stream.options.namespace_declarations:
64
+ if stream.options.params.namespace_declarations:
60
65
  namespace_declarations(data, stream)
61
- for terms in data:
62
- if frame := stream.triple(terms):
66
+ graphs = (data,) if not isinstance(data, Dataset) else data.graphs()
67
+ for graph in graphs:
68
+ for terms in graph:
69
+ if frame := stream.triple(terms):
70
+ yield frame
71
+ if frame := stream.flow.frame_from_graph():
63
72
  yield frame
64
- if frame := stream.flow.to_stream_frame():
73
+ if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
65
74
  yield frame
66
75
 
67
76
 
68
77
  @stream_frames.register
69
- def quads_stream(stream: QuadStream, data: Graph) -> Generator[jelly.RdfStreamFrame]:
78
+ def quads_stream_frames(
79
+ stream: QuadStream,
80
+ data: Dataset,
81
+ ) -> Generator[jelly.RdfStreamFrame]:
70
82
  assert isinstance(data, Dataset)
71
83
  stream.enroll()
72
- if stream.options.namespace_declarations:
84
+ if stream.options.params.namespace_declarations:
73
85
  namespace_declarations(data, stream)
74
86
  for terms in data.quads():
75
87
  if frame := stream.quad(terms):
76
88
  yield frame
77
- if frame := stream.flow.to_stream_frame():
89
+ if frame := stream.flow.frame_from_dataset():
90
+ yield frame
91
+ if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
78
92
  yield frame
79
93
 
80
94
 
81
95
  @stream_frames.register
82
- def graphs_stream(stream: GraphStream, data: Graph) -> Generator[jelly.RdfStreamFrame]:
96
+ def graphs_stream_frames(
97
+ stream: GraphStream,
98
+ data: Dataset,
99
+ ) -> Generator[jelly.RdfStreamFrame]:
83
100
  assert isinstance(data, Dataset)
84
101
  stream.enroll()
85
- if stream.options.namespace_declarations:
102
+ if stream.options.params.namespace_declarations:
86
103
  namespace_declarations(data, stream)
87
104
  for graph in data.graphs():
88
105
  yield from stream.graph(graph_id=graph.identifier, graph=graph)
89
- if frame := stream.flow.to_stream_frame():
106
+ if frame := stream.flow.frame_from_dataset():
107
+ yield frame
108
+ if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
90
109
  yield frame
91
110
 
92
111
 
@@ -105,15 +124,57 @@ class RDFLibJellySerializer(RDFLibSerializer):
105
124
  raise NotImplementedError(msg)
106
125
  super().__init__(store)
107
126
 
127
+ def guess_options(self) -> SerializerOptions:
128
+ """
129
+ Guess the serializer options based on the store type.
130
+
131
+ >>> RDFLibJellySerializer(Graph()).guess_options().logical_type
132
+ 1
133
+ >>> RDFLibJellySerializer(Dataset()).guess_options().logical_type
134
+ 2
135
+ """
136
+ logical_type = (
137
+ jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
138
+ if isinstance(self.store, Dataset)
139
+ else jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
140
+ )
141
+ return SerializerOptions(logical_type=logical_type)
142
+
143
+ def guess_stream(self, options: SerializerOptions) -> Stream:
144
+ """
145
+ Return an appropriate stream implementation for the given options.
146
+
147
+ >>> graph_ser = RDFLibJellySerializer(Graph())
148
+ >>> ds_ser = RDFLibJellySerializer(Dataset())
149
+
150
+ >>> type(graph_ser.guess_stream(graph_ser.guess_options()))
151
+ <class 'pyjelly.serialize.streams.TripleStream'>
152
+ >>> type(ds_ser.guess_stream(ds_ser.guess_options()))
153
+ <class 'pyjelly.serialize.streams.QuadStream'>
154
+ """
155
+ stream_cls: type[Stream]
156
+ if options.logical_type != jelly.LOGICAL_STREAM_TYPE_GRAPHS and isinstance(
157
+ self.store, Dataset
158
+ ):
159
+ stream_cls = QuadStream
160
+ else:
161
+ stream_cls = TripleStream
162
+ return stream_cls.for_rdflib(options=options)
163
+
108
164
  @override
109
165
  def serialize( # type: ignore[override]
110
166
  self,
111
167
  out: IO[bytes],
112
168
  /,
113
169
  *,
114
- stream: Stream,
170
+ stream: Stream | None = None,
171
+ options: SerializerOptions | None = None,
115
172
  **unused: Any,
116
173
  ) -> None:
117
- write = write_delimited if stream.options.delimited else write_single
174
+ if options is None:
175
+ options = self.guess_options()
176
+ if stream is None:
177
+ stream = self.guess_stream(options)
178
+ write = write_delimited if stream.options.params.delimited else write_single
118
179
  for stream_frame in stream_frames(stream, self.store):
119
180
  write(stream_frame, out)
pyjelly/options.py CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import mimetypes
4
4
  from contextlib import suppress
5
- from dataclasses import dataclass, field
5
+ from dataclasses import dataclass
6
6
  from typing import Final
7
7
  from typing_extensions import Self
8
8
 
@@ -10,7 +10,6 @@ from pyjelly import jelly
10
10
  from pyjelly.errors import (
11
11
  JellyAssertionError,
12
12
  JellyConformanceError,
13
- JellyNotImplementedError,
14
13
  )
15
14
 
16
15
  MIN_NAME_LOOKUP_SIZE: Final[int] = 8
@@ -65,7 +64,7 @@ class LookupPreset:
65
64
 
66
65
  @dataclass(frozen=True)
67
66
  class StreamTypes:
68
- physical_type: jelly.PhysicalStreamType
67
+ physical_type: jelly.PhysicalStreamType = jelly.PHYSICAL_STREAM_TYPE_UNSPECIFIED
69
68
  logical_type: jelly.LogicalStreamType = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
70
69
 
71
70
  @property
@@ -83,9 +82,6 @@ class StreamTypes:
83
82
  return f"StreamTypes({self.physical_type}, {self.logical_type})"
84
83
 
85
84
  def __post_init__(self) -> None:
86
- if self.physical_type == jelly.PHYSICAL_STREAM_TYPE_UNSPECIFIED:
87
- msg = "physical type must be specified"
88
- raise JellyNotImplementedError(msg)
89
85
  validate_type_compatibility(
90
86
  physical_type=self.physical_type,
91
87
  logical_type=self.logical_type,
@@ -93,9 +89,7 @@ class StreamTypes:
93
89
 
94
90
 
95
91
  @dataclass(frozen=True)
96
- class StreamOptions:
97
- stream_types: StreamTypes
98
- lookup_preset: LookupPreset = field(default_factory=LookupPreset)
92
+ class StreamParameters:
99
93
  generalized_statements: bool = False
100
94
  rdf_star: bool = False
101
95
  version: int = MAX_VERSION
@@ -115,7 +109,10 @@ def validate_type_compatibility(
115
109
  physical_type: jelly.PhysicalStreamType,
116
110
  logical_type: jelly.LogicalStreamType,
117
111
  ) -> None:
118
- if logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED:
112
+ if (
113
+ physical_type == jelly.PHYSICAL_STREAM_TYPE_UNSPECIFIED
114
+ or logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
115
+ ):
119
116
  return
120
117
  triples_physical_type = physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES
121
118
  triples_logical_type = logical_type in TRIPLES_ONLY_LOGICAL_TYPES
pyjelly/parse/decode.py CHANGED
@@ -2,22 +2,28 @@ from __future__ import annotations
2
2
 
3
3
  from abc import ABCMeta, abstractmethod
4
4
  from collections.abc import Iterable, Sequence
5
- from typing import Any, ClassVar
5
+ from typing import Any, ClassVar, NamedTuple
6
6
  from typing_extensions import Never
7
7
 
8
8
  from pyjelly import jelly
9
- from pyjelly.options import LookupPreset, StreamOptions, StreamTypes
9
+ from pyjelly.options import LookupPreset, StreamParameters, StreamTypes
10
10
  from pyjelly.parse.lookup import LookupDecoder
11
11
 
12
12
 
13
+ class ParserOptions(NamedTuple):
14
+ stream_types: StreamTypes
15
+ lookup_preset: LookupPreset
16
+ params: StreamParameters
17
+
18
+
13
19
  def options_from_frame(
14
20
  frame: jelly.RdfStreamFrame,
15
21
  *,
16
22
  delimited: bool,
17
- ) -> StreamOptions:
23
+ ) -> ParserOptions:
18
24
  row = frame.rows[0]
19
25
  options = row.options
20
- return StreamOptions(
26
+ return ParserOptions(
21
27
  stream_types=StreamTypes(
22
28
  physical_type=options.physical_type,
23
29
  logical_type=options.logical_type,
@@ -27,17 +33,17 @@ def options_from_frame(
27
33
  max_prefixes=options.max_prefix_table_size,
28
34
  max_datatypes=options.max_datatype_table_size,
29
35
  ),
30
- stream_name=options.stream_name,
31
- version=options.version,
32
- delimited=delimited,
36
+ params=StreamParameters(
37
+ stream_name=options.stream_name,
38
+ version=options.version,
39
+ delimited=delimited,
40
+ ),
33
41
  )
34
42
 
35
43
 
36
- def _adapter_missing(feature: str, *, options: StreamOptions) -> Never:
37
- physical_type_name = jelly.PhysicalStreamType.Name(
38
- options.stream_types.physical_type
39
- )
40
- logical_type_name = jelly.LogicalStreamType.Name(options.stream_types.logical_type)
44
+ def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
45
+ physical_type_name = jelly.PhysicalStreamType.Name(stream_types.physical_type)
46
+ logical_type_name = jelly.LogicalStreamType.Name(stream_types.logical_type)
41
47
  msg = (
42
48
  f"adapter with {physical_type_name} and {logical_type_name} "
43
49
  f"does not implement {feature}"
@@ -46,7 +52,7 @@ def _adapter_missing(feature: str, *, options: StreamOptions) -> Never:
46
52
 
47
53
 
48
54
  class Adapter(metaclass=ABCMeta):
49
- def __init__(self, options: StreamOptions) -> None:
55
+ def __init__(self, options: ParserOptions) -> None:
50
56
  self.options = options
51
57
 
52
58
  # Obligatory abstract methods--all adapters must implement these
@@ -73,19 +79,26 @@ class Adapter(metaclass=ABCMeta):
73
79
 
74
80
  # Optional abstract methods--not required to be implemented by all adapters
75
81
  def triple(self, terms: Iterable[Any]) -> Any: # noqa: ARG002
76
- _adapter_missing("decoding triples", options=self.options)
82
+ _adapter_missing("decoding triples", stream_types=self.options.stream_types)
77
83
 
78
84
  def quad(self, terms: Iterable[Any]) -> Any: # noqa: ARG002
79
- _adapter_missing("decoding quads", options=self.options)
85
+ _adapter_missing("decoding quads", stream_types=self.options.stream_types)
80
86
 
81
87
  def graph_start(self, graph_id: Any) -> Any: # noqa: ARG002
82
- _adapter_missing("decoding graph start markers", options=self.options)
88
+ _adapter_missing(
89
+ "decoding graph start markers", stream_types=self.options.stream_types
90
+ )
83
91
 
84
92
  def graph_end(self) -> Any:
85
- _adapter_missing("decoding graph end markers", options=self.options)
93
+ _adapter_missing(
94
+ "decoding graph end markers", stream_types=self.options.stream_types
95
+ )
86
96
 
87
97
  def namespace_declaration(self, name: str, iri: str) -> Any: # noqa: ARG002
88
- _adapter_missing("decoding namespace declarations", options=self.options)
98
+ _adapter_missing(
99
+ "decoding namespace declarations",
100
+ stream_types=self.options.stream_types,
101
+ )
89
102
 
90
103
  def frame(self) -> Any:
91
104
  return None
@@ -104,7 +117,7 @@ class Decoder:
104
117
  self.repeated_terms: dict[str, jelly.RdfIri | str | jelly.RdfLiteral] = {}
105
118
 
106
119
  @property
107
- def options(self) -> StreamOptions:
120
+ def options(self) -> ParserOptions:
108
121
  return self.adapter.options
109
122
 
110
123
  def decode_frame(self, frame: jelly.RdfStreamFrame) -> Any:
@@ -122,13 +135,14 @@ class Decoder:
122
135
  return decode_row(self, row)
123
136
 
124
137
  def validate_stream_options(self, options: jelly.RdfStreamOptions) -> None:
125
- assert self.options.stream_name == options.stream_name
126
- assert self.options.version >= options.version
127
- assert self.options.lookup_preset.max_prefixes == options.max_prefix_table_size
128
- assert (
129
- self.options.lookup_preset.max_datatypes == options.max_datatype_table_size
130
- )
131
- assert self.options.lookup_preset.max_names == options.max_name_table_size
138
+ stream_types, lookup_preset, params = self.options
139
+ assert stream_types.physical_type == options.physical_type
140
+ assert stream_types.logical_type == options.logical_type
141
+ assert params.stream_name == options.stream_name
142
+ assert params.version >= options.version
143
+ assert lookup_preset.max_prefixes == options.max_prefix_table_size
144
+ assert lookup_preset.max_datatypes == options.max_datatype_table_size
145
+ assert lookup_preset.max_names == options.max_name_table_size
132
146
 
133
147
  def ingest_prefix_entry(self, entry: jelly.RdfPrefixEntry) -> None:
134
148
  self.prefixes.assign_entry(index=entry.id, value=entry.value)
pyjelly/parse/ioutils.py CHANGED
@@ -7,8 +7,7 @@ from google.protobuf.proto import parse, parse_length_prefixed
7
7
 
8
8
  from pyjelly import jelly
9
9
  from pyjelly.errors import JellyConformanceError
10
- from pyjelly.options import StreamOptions
11
- from pyjelly.parse.decode import options_from_frame
10
+ from pyjelly.parse.decode import ParserOptions, options_from_frame
12
11
 
13
12
 
14
13
  def delimited_jelly_hint(header: bytes) -> bool:
@@ -62,7 +61,7 @@ def frame_iterator(inp: IO[bytes]) -> Generator[jelly.RdfStreamFrame]:
62
61
 
63
62
  def get_options_and_frames(
64
63
  inp: IO[bytes],
65
- ) -> tuple[StreamOptions, Iterator[jelly.RdfStreamFrame]]:
64
+ ) -> tuple[ParserOptions, Iterator[jelly.RdfStreamFrame]]:
66
65
  is_delimited = delimited_jelly_hint(bytes_read := inp.read(3))
67
66
  inp.seek(-len(bytes_read), os.SEEK_CUR)
68
67
 
@@ -37,24 +37,24 @@ class TermEncoder:
37
37
 
38
38
  def __init__(
39
39
  self,
40
- max_names: int = options.DEFAULT_NAME_LOOKUP_SIZE,
41
- max_prefixes: int = options.DEFAULT_PREFIX_LOOKUP_SIZE,
42
- max_datatypes: int = options.DEFAULT_DATATYPE_LOOKUP_SIZE,
40
+ lookup_preset: options.LookupPreset | None = None,
43
41
  ) -> None:
44
- self.names = LookupEncoder(lookup_size=max_names)
45
- self.prefixes = LookupEncoder(lookup_size=max_prefixes)
46
- self.datatypes = LookupEncoder(lookup_size=max_datatypes)
42
+ if lookup_preset is None:
43
+ lookup_preset = options.LookupPreset()
44
+ self.lookup_preset = lookup_preset
45
+ self.names = LookupEncoder(lookup_size=lookup_preset.max_names)
46
+ self.prefixes = LookupEncoder(lookup_size=lookup_preset.max_prefixes)
47
+ self.datatypes = LookupEncoder(lookup_size=lookup_preset.max_datatypes)
47
48
 
48
49
  def encode_iri(self, iri_string: str) -> RowsAnd[jelly.RdfIri]:
49
50
  prefix, name = split_iri(iri_string)
50
- if prefix and self.prefixes.lookup.max_size:
51
+ if self.prefixes.lookup.max_size:
51
52
  prefix_entry_index = self.prefixes.encode_entry_index(prefix)
52
53
  else:
53
54
  name = iri_string
54
55
  prefix_entry_index = None
55
56
 
56
57
  name_entry_index = self.names.encode_entry_index(name)
57
-
58
58
  term_rows = []
59
59
 
60
60
  if prefix_entry_index is not None:
@@ -124,11 +124,6 @@ class Slot(str, Enum):
124
124
  return self.value
125
125
 
126
126
 
127
- def new_repeated_terms() -> dict[Slot, object]:
128
- """Create a new dictionary for repeated terms."""
129
- return dict.fromkeys(Slot)
130
-
131
-
132
127
  def encode_statement(
133
128
  terms: Iterable[object],
134
129
  term_encoder: TermEncoder,
@@ -181,17 +176,21 @@ def encode_namespace_declaration(
181
176
  return rows
182
177
 
183
178
 
184
- def encode_options(options: options.StreamOptions) -> jelly.RdfStreamRow:
179
+ def encode_options(
180
+ lookup_preset: options.LookupPreset,
181
+ stream_types: options.StreamTypes,
182
+ params: options.StreamParameters,
183
+ ) -> jelly.RdfStreamRow:
185
184
  return jelly.RdfStreamRow(
186
185
  options=jelly.RdfStreamOptions(
187
- stream_name=options.stream_name,
188
- physical_type=options.stream_types.physical_type,
189
- generalized_statements=options.generalized_statements,
190
- rdf_star=options.rdf_star,
191
- max_name_table_size=options.lookup_preset.max_names,
192
- max_prefix_table_size=options.lookup_preset.max_prefixes,
193
- max_datatype_table_size=options.lookup_preset.max_datatypes,
194
- logical_type=options.stream_types.logical_type,
195
- version=options.version,
186
+ stream_name=params.stream_name,
187
+ physical_type=stream_types.physical_type,
188
+ generalized_statements=params.generalized_statements,
189
+ rdf_star=params.rdf_star,
190
+ max_name_table_size=lookup_preset.max_names,
191
+ max_prefix_table_size=lookup_preset.max_prefixes,
192
+ max_datatype_table_size=lookup_preset.max_datatypes,
193
+ logical_type=stream_types.logical_type,
194
+ version=params.version,
196
195
  )
197
196
  )
@@ -8,6 +8,8 @@ from typing_extensions import override
8
8
 
9
9
  from pyjelly import jelly
10
10
 
11
+ DEFAULT_FRAME_SIZE = 250
12
+
11
13
 
12
14
  class FrameFlow(UserList[jelly.RdfStreamRow]):
13
15
  """
@@ -16,9 +18,25 @@ class FrameFlow(UserList[jelly.RdfStreamRow]):
16
18
  Collects stream rows and assembles them into RdfStreamFrame objects when ready.
17
19
  """
18
20
 
19
- logical_type: ClassVar[jelly.LogicalStreamType]
21
+ logical_type: jelly.LogicalStreamType
20
22
  registry: ClassVar[dict[jelly.LogicalStreamType, type[FrameFlow]]] = {}
21
23
 
24
+ def frame_from_graph(self) -> jelly.RdfStreamFrame | None:
25
+ """
26
+ Treat the current rows as a graph and produce a frame.
27
+
28
+ Default implementation returns None.
29
+ """
30
+ return None
31
+
32
+ def frame_from_dataset(self) -> jelly.RdfStreamFrame | None:
33
+ """
34
+ Treat the current rows as a dataset and produce a frame.
35
+
36
+ Default implementation returns None.
37
+ """
38
+ return None
39
+
22
40
  def frame_from_bounds(self) -> jelly.RdfStreamFrame | None:
23
41
  return None
24
42
 
@@ -29,15 +47,6 @@ class FrameFlow(UserList[jelly.RdfStreamRow]):
29
47
  self.clear()
30
48
  return frame
31
49
 
32
- def __init_subclass__(cls) -> None:
33
- """
34
- Register subclasses of FrameFlow with their logical stream type.
35
-
36
- This allows for dynamic dispatch based on the logical stream type.
37
- """
38
- if cls.logical_type != jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED:
39
- cls.registry[cls.logical_type] = cls
40
-
41
50
 
42
51
  class ManualFrameFlow(FrameFlow):
43
52
  """
@@ -52,6 +61,15 @@ class ManualFrameFlow(FrameFlow):
52
61
 
53
62
  logical_type = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
54
63
 
64
+ def __init__(
65
+ self,
66
+ initlist: Iterable[jelly.RdfStreamRow] | None = None,
67
+ *,
68
+ logical_type: jelly.LogicalStreamType = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED,
69
+ ) -> None:
70
+ super().__init__(initlist)
71
+ self.logical_type = logical_type
72
+
55
73
 
56
74
  @dataclass
57
75
  class BoundedFrameFlow(FrameFlow):
@@ -62,9 +80,7 @@ class BoundedFrameFlow(FrameFlow):
62
80
  """
63
81
 
64
82
  logical_type = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
65
-
66
83
  frame_size: int
67
- default_frame_size: ClassVar[int] = 250
68
84
 
69
85
  def __init__(
70
86
  self,
@@ -73,7 +89,7 @@ class BoundedFrameFlow(FrameFlow):
73
89
  frame_size: int | None = None,
74
90
  ) -> None:
75
91
  super().__init__(initlist)
76
- self.frame_size = frame_size or self.default_frame_size
92
+ self.frame_size = frame_size or DEFAULT_FRAME_SIZE
77
93
 
78
94
  @override
79
95
  def frame_from_bounds(self) -> jelly.RdfStreamFrame | None:
@@ -82,13 +98,42 @@ class BoundedFrameFlow(FrameFlow):
82
98
  return None
83
99
 
84
100
 
85
- # Fallback for unspecified logical types
86
- FrameFlow.registry[jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED] = BoundedFrameFlow
87
-
88
-
89
101
  class FlatTriplesFrameFlow(BoundedFrameFlow):
90
102
  logical_type = jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
91
103
 
92
104
 
93
105
  class FlatQuadsFrameFlow(BoundedFrameFlow):
94
106
  logical_type = jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
107
+
108
+
109
+ class GraphsFrameFlow(FrameFlow):
110
+ logical_type = jelly.LOGICAL_STREAM_TYPE_GRAPHS
111
+
112
+ def frame_from_graph(self) -> jelly.RdfStreamFrame | None:
113
+ return self.to_stream_frame()
114
+
115
+
116
+ class DatasetsFrameFlow(FrameFlow):
117
+ logical_type = jelly.LOGICAL_STREAM_TYPE_DATASETS
118
+
119
+ def frame_from_dataset(self) -> jelly.RdfStreamFrame | None:
120
+ return self.to_stream_frame()
121
+
122
+
123
+ FLOW_DISPATCH: dict[jelly.LogicalStreamType, type[FrameFlow]] = {
124
+ jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES: FlatTriplesFrameFlow,
125
+ jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS: FlatQuadsFrameFlow,
126
+ jelly.LOGICAL_STREAM_TYPE_GRAPHS: GraphsFrameFlow,
127
+ jelly.LOGICAL_STREAM_TYPE_DATASETS: DatasetsFrameFlow,
128
+ }
129
+
130
+
131
+ def flow_for_type(logical_type: jelly.LogicalStreamType) -> type[FrameFlow]:
132
+ try:
133
+ return FLOW_DISPATCH[logical_type]
134
+ except KeyError:
135
+ msg = (
136
+ "unsupported logical stream type: "
137
+ f"{jelly.LogicalStreamType.Name(logical_type)}"
138
+ )
139
+ raise NotImplementedError(msg) from None
@@ -108,11 +108,13 @@ class LookupEncoder:
108
108
  return current_index
109
109
 
110
110
  def encode_prefix_term_index(self, value: str) -> int:
111
- if not value or self.lookup.max_size == 0:
111
+ if self.lookup.max_size == 0:
112
112
  return 0
113
113
  previous_index = self.last_reused_index
114
+ if not value and previous_index == 0:
115
+ return 0
114
116
  current_index = self.encode_term_index(value)
115
- if value and previous_index == 0:
117
+ if previous_index == 0:
116
118
  return current_index
117
119
  if current_index == previous_index:
118
120
  return 0
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections.abc import Generator, Iterable
4
+ from dataclasses import dataclass, field
4
5
  from typing import Any, ClassVar
5
6
 
6
7
  from pyjelly import jelly
7
- from pyjelly.options import StreamOptions
8
+ from pyjelly.options import LookupPreset, StreamParameters, StreamTypes
8
9
  from pyjelly.serialize.encode import (
9
10
  Slot,
10
11
  TermEncoder,
@@ -12,61 +13,84 @@ from pyjelly.serialize.encode import (
12
13
  encode_options,
13
14
  encode_quad,
14
15
  encode_triple,
15
- new_repeated_terms,
16
16
  )
17
- from pyjelly.serialize.flows import FrameFlow, ManualFrameFlow
17
+ from pyjelly.serialize.flows import (
18
+ DEFAULT_FRAME_SIZE,
19
+ BoundedFrameFlow,
20
+ FlatQuadsFrameFlow,
21
+ FlatTriplesFrameFlow,
22
+ FrameFlow,
23
+ ManualFrameFlow,
24
+ flow_for_type,
25
+ )
26
+
27
+
28
+ @dataclass
29
+ class SerializerOptions:
30
+ flow: FrameFlow | None = None
31
+ frame_size: int = DEFAULT_FRAME_SIZE
32
+ logical_type: jelly.LogicalStreamType = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
33
+ params: StreamParameters = field(default_factory=StreamParameters)
34
+ lookup_preset: LookupPreset = field(default_factory=LookupPreset)
18
35
 
19
36
 
20
37
  class Stream:
21
38
  physical_type: ClassVar[jelly.PhysicalStreamType]
22
- registry: ClassVar[dict[jelly.PhysicalStreamType, type[Stream]]] = {}
23
- flow: FrameFlow
39
+ default_delimited_flow_class: ClassVar[type[BoundedFrameFlow]]
24
40
 
25
41
  def __init__(
26
42
  self,
27
43
  *,
28
- options: StreamOptions,
29
- encoder_class: type[TermEncoder],
30
- **flow_args: Any,
44
+ encoder: TermEncoder,
45
+ options: SerializerOptions | None = None,
31
46
  ) -> None:
32
- assert options.stream_types.physical_type == self.physical_type
47
+ self.encoder = encoder
48
+ if options is None:
49
+ options = SerializerOptions()
33
50
  self.options = options
34
- self.encoder = encoder_class(
35
- max_prefixes=options.lookup_preset.max_prefixes,
36
- max_names=options.lookup_preset.max_names,
37
- max_datatypes=options.lookup_preset.max_datatypes,
38
- )
39
- flow_class = FrameFlow.registry[self.options.stream_types.logical_type]
40
- if not options.delimited:
41
- flow_class = ManualFrameFlow
42
- self.flow = flow_class(**flow_args)
43
- self.repeated_terms = new_repeated_terms()
51
+ flow = options.flow
52
+ if flow is None:
53
+ flow = self.infer_flow()
54
+ self.flow = flow
55
+ self.repeated_terms = dict.fromkeys(Slot)
44
56
  self.enrolled = False
45
-
46
- @staticmethod
47
- def from_options(
48
- options: StreamOptions,
49
- encoder_class: type[TermEncoder] | None = None,
50
- **flow_args: Any,
51
- ) -> Any:
52
- if encoder_class is None:
53
- from pyjelly.integrations.rdflib.serialize import RDFLibTermEncoder
54
-
55
- encoder_class = RDFLibTermEncoder
56
- stream_class = Stream.registry[options.stream_types.physical_type]
57
- return stream_class(
58
- options=options,
59
- encoder_class=encoder_class,
60
- **flow_args,
57
+ self.stream_types = StreamTypes(
58
+ physical_type=self.physical_type,
59
+ logical_type=self.flow.logical_type,
61
60
  )
62
61
 
62
+ def infer_flow(self) -> FrameFlow:
63
+ flow: FrameFlow
64
+ if self.options.params.delimited:
65
+ if self.options.logical_type != jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED:
66
+ flow_class = flow_for_type(self.options.logical_type)
67
+ else:
68
+ flow_class = self.default_delimited_flow_class
69
+
70
+ if self.options.logical_type in (
71
+ jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES,
72
+ jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS,
73
+ ):
74
+ flow = flow_class(frame_size=self.options.frame_size) # type: ignore[call-overload]
75
+ else:
76
+ flow = flow_class()
77
+ else:
78
+ flow = ManualFrameFlow(logical_type=self.options.logical_type)
79
+ return flow
80
+
63
81
  def enroll(self) -> None:
64
82
  if not self.enrolled:
65
83
  self.stream_options()
66
84
  self.enrolled = True
67
85
 
68
86
  def stream_options(self) -> None:
69
- self.flow.append(encode_options(self.options))
87
+ self.flow.append(
88
+ encode_options(
89
+ stream_types=self.stream_types,
90
+ params=self.options.params,
91
+ lookup_preset=self.options.lookup_preset,
92
+ )
93
+ )
70
94
 
71
95
  def namespace_declaration(self, name: str, iri: str) -> None:
72
96
  rows = encode_namespace_declaration(
@@ -76,12 +100,37 @@ class Stream:
76
100
  )
77
101
  self.flow.extend(rows)
78
102
 
79
- def __init_subclass__(cls) -> None:
80
- cls.registry[cls.physical_type] = cls
103
+ @classmethod
104
+ def for_rdflib(cls, options: SerializerOptions | None = None) -> Stream:
105
+ if cls is Stream:
106
+ msg = "Stream is an abstract base class, use a subclass instead"
107
+ raise TypeError(msg)
108
+ from pyjelly.integrations.rdflib.serialize import RDFLibTermEncoder
109
+
110
+ lookup_preset: LookupPreset | None = None
111
+ if options is not None:
112
+ lookup_preset = options.lookup_preset
113
+ return cls(
114
+ encoder=RDFLibTermEncoder(lookup_preset=lookup_preset),
115
+ options=options,
116
+ )
117
+
118
+
119
+ def stream_for_type(physical_type: jelly.PhysicalStreamType) -> type[Stream]:
120
+ try:
121
+ stream_cls = STREAM_DISPATCH[physical_type]
122
+ except KeyError:
123
+ msg = (
124
+ "no stream class for physical type "
125
+ f"{jelly.PhysicalStreamType.Name(physical_type)}"
126
+ )
127
+ raise NotImplementedError(msg) from None
128
+ return stream_cls
81
129
 
82
130
 
83
131
  class TripleStream(Stream):
84
132
  physical_type = jelly.PHYSICAL_STREAM_TYPE_TRIPLES
133
+ default_delimited_flow_class: ClassVar = FlatTriplesFrameFlow
85
134
 
86
135
  def triple(self, terms: Iterable[object]) -> jelly.RdfStreamFrame | None:
87
136
  new_rows = encode_triple(
@@ -90,13 +139,12 @@ class TripleStream(Stream):
90
139
  repeated_terms=self.repeated_terms,
91
140
  )
92
141
  self.flow.extend(new_rows)
93
- if frame := self.flow.frame_from_bounds():
94
- return frame
95
- return None
142
+ return self.flow.frame_from_bounds()
96
143
 
97
144
 
98
145
  class QuadStream(Stream):
99
146
  physical_type = jelly.PHYSICAL_STREAM_TYPE_QUADS
147
+ default_delimited_flow_class: ClassVar = FlatQuadsFrameFlow
100
148
 
101
149
  def quad(self, terms: Iterable[object]) -> jelly.RdfStreamFrame | None:
102
150
  new_rows = encode_quad(
@@ -105,13 +153,12 @@ class QuadStream(Stream):
105
153
  repeated_terms=self.repeated_terms,
106
154
  )
107
155
  self.flow.extend(new_rows)
108
- if frame := self.flow.frame_from_bounds():
109
- return frame
110
- return None
156
+ return self.flow.frame_from_bounds()
111
157
 
112
158
 
113
159
  class GraphStream(TripleStream):
114
160
  physical_type = jelly.PHYSICAL_STREAM_TYPE_GRAPHS
161
+ default_delimited_flow_class: ClassVar = FlatQuadsFrameFlow
115
162
 
116
163
  def graph(
117
164
  self,
@@ -129,5 +176,12 @@ class GraphStream(TripleStream):
129
176
  yield frame
130
177
  end_row = jelly.RdfStreamRow(graph_end=jelly.RdfGraphEnd())
131
178
  self.flow.append(end_row)
132
- if self.flow.frame_from_bounds():
133
- yield self.flow.to_stream_frame() # type: ignore[misc]
179
+ if frame := self.flow.frame_from_bounds():
180
+ yield frame
181
+
182
+
183
+ STREAM_DISPATCH: dict[jelly.PhysicalStreamType, type[Stream]] = {
184
+ jelly.PHYSICAL_STREAM_TYPE_TRIPLES: TripleStream,
185
+ jelly.PHYSICAL_STREAM_TYPE_QUADS: QuadStream,
186
+ jelly.PHYSICAL_STREAM_TYPE_GRAPHS: GraphStream,
187
+ }
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyjelly
3
+ Version: 0.2.1
4
+ Summary: Jelly-RDF implementation for Python
5
+ Project-URL: Homepage, https://w3id.org/jelly/pyjelly
6
+ Project-URL: Documentation, https://w3id.org/jelly/pyjelly
7
+ Project-URL: Repository, https://github.com/Jelly-RDF/pyjelly
8
+ Project-URL: Issues, https://github.com/Jelly-RDF/pyjelly/issues
9
+ Project-URL: Changelog, https://github.com/Jelly-RDF/pyjelly/releases
10
+ Author-email: "NeverBlink et al." <contact@neverblink.eu>
11
+ License-Expression: Apache-2.0
12
+ License-File: LICENSE
13
+ Keywords: Jelly,Knowledge graph,RDF,Serialization format
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Environment :: Console
16
+ Classifier: Environment :: Plugins
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Intended Audience :: Information Technology
19
+ Classifier: License :: OSI Approved :: Apache Software License
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Programming Language :: Python :: 3.13
26
+ Classifier: Topic :: Database
27
+ Classifier: Topic :: File Formats
28
+ Classifier: Topic :: Software Development :: Libraries
29
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
30
+ Requires-Python: >=3.9
31
+ Requires-Dist: protobuf>=5.29.3
32
+ Requires-Dist: typing-extensions>=4.12.2
33
+ Provides-Extra: rdflib
34
+ Requires-Dist: rdflib>=7.1.4; extra == 'rdflib'
35
+ Description-Content-Type: text/markdown
36
+
37
+ [![Documentation](https://img.shields.io/website?url=https%3A%2F%2Fw3id.org%2Fjelly%2Fpyjelly&label=Documentation)](https://w3id.org/jelly/pyjelly) [![PyPI – Version](https://img.shields.io/pypi/v/pyjelly)](https://pypi.org/project/pyjelly/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyjelly)](https://pypi.org/project/pyjelly/) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![CI status](https://github.com/Jelly-RDF/pyjelly/actions/workflows/ci.yml/badge.svg)](https://github.com/Jelly-RDF/pyjelly/actions/workflows/ci.yml) [![Discord](https://img.shields.io/discord/1333391881404420179?label=Discord%20chat)](https://discord.gg/A8sN5XwVa5)
38
+
39
+ # pyjelly
40
+
41
+ **pyjelly** is a Python implementation of [Jelly](http://w3id.org/jelly), a high-performance binary serialization format and streaming protocol for RDF knowledge graphs.
42
+
43
+ **Documentation, usage guide and more: https://w3id.org/jelly/pyjelly**
44
+
45
+ ## Features
46
+
47
+ - **Fast reading and writing** of RDF knowledge graphs in the [Jelly format](http://w3id.org/jelly)
48
+ - **Seamless integration with [rdflib](https://rdflib.readthedocs.io/)**
49
+ - **Stream processing support** for large datasets or streams of many RDF graphs/datasets
50
+
51
+ ## Getting started
52
+
53
+ To get started with pyjelly, you can install it via pip:
54
+
55
+ ```bash
56
+ pip install pyjelly[rdflib]
57
+ ```
58
+
59
+ To write an RDF graph to a Jelly file, you can use the following code:
60
+
61
+ ```python
62
+ from rdflib import Graph
63
+
64
+ g = Graph()
65
+ g.parse("http://xmlns.com/foaf/spec/index.rdf")
66
+ g.serialize(destination="foaf.jelly", format="jelly")
67
+ ```
68
+
69
+ To read a Jelly file and convert it to an rdflib Graph, you can use:
70
+
71
+ ```python
72
+ from rdflib import Graph
73
+
74
+ g = Graph()
75
+ g.parse("foaf.jelly", format="jelly")
76
+ ```
77
+
78
+ **See more examples, [API reference](https://w3id.org/jelly/pyjelly/dev/api), and more in the [documentation](https://w3id.org/jelly/pyjelly).**
79
+
80
+ ## Contributing and support
81
+
82
+ This project is being actively developed – you can stay tuned by [watching this repository](https://docs.github.com/en/account-and-profile/managing-subscriptions-and-notifications-on-github/setting-up-notifications/about-notifications#subscription-options).
83
+
84
+ You can also join the **[Jelly Discord chat](https://discord.gg/A8sN5XwVa5)** to ask questions about pyjelly and to be up-to-date with the development activities.
85
+
86
+ ### Commercial support
87
+
88
+ **[NeverBlink](https://neverblink.eu)** provides commercial support services for Jelly, including implementing custom features, system integrations, implementations for new frameworks, benchmarking, and more.
89
+
90
+ ## Contributing
91
+
92
+ If you'd like to contribute, check out our [contributing guidelines](CONTRIBUTING.md).
93
+
94
+ ## License
95
+
96
+ The pyjelly library is licensed under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
97
+
98
+ ----
99
+
100
+ The development of the Jelly protocol, its implementations, and supporting tooling was co-funded by the European Union. **[More details](https://w3id.org/jelly/dev/licensing/projects)**.
@@ -1,28 +1,28 @@
1
1
  pyjelly/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  pyjelly/errors.py,sha256=R-xRB4a9S19J9dzAL4a5MCaBwb9ev_kvphGFkQJX6ZU,332
3
- pyjelly/options.py,sha256=QyGLpHOyNvBSVYYtbLD2rW43gHXRY580NAA17G9dhHs,4045
3
+ pyjelly/options.py,sha256=jYVPNdaMTh76Oqtdk8kaJwG5gv8dUjlMkW6nXWohCn4,3862
4
4
  pyjelly/_proto/grpc.proto,sha256=3PfcZWqKhUSzP_T-xT-80raUYERr_dXWd8rITzXIqek,1188
5
5
  pyjelly/_proto/patch.proto,sha256=gASUm0xDG9J1advNoq_cCsJYxudTbQaiZQBq4oW3kw4,5291
6
6
  pyjelly/_proto/rdf.proto,sha256=EKxyG421B4m0Wx5-6jjojdga_hA3jpZfF6-T3lMc0hI,12763
7
7
  pyjelly/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  pyjelly/integrations/rdflib/__init__.py,sha256=lpIz6iildMf5bDvj3aBqZJ7kgKFrTx_tsqSb6PkLis0,552
9
- pyjelly/integrations/rdflib/parse.py,sha256=k7cNSFgFXK0_4792eZ-lDRzzSqLI7DFMZmbsPD9SLyE,7474
10
- pyjelly/integrations/rdflib/serialize.py,sha256=YNwKBD_a4oKNktUQa092UXvmdcu9JYAJDkYRfki2p-w,3940
9
+ pyjelly/integrations/rdflib/parse.py,sha256=ZjFQt72U64jpVSuyVHq28H9cSNISl8O4_W_HlyqtZXQ,9738
10
+ pyjelly/integrations/rdflib/serialize.py,sha256=5M17BN5LKSR5bPX2V97AjKNRF9UKQHwj1CMreILCUE4,6044
11
11
  pyjelly/jelly/__init__.py,sha256=9kacwn8Ew_1fcgj1abz6miEz-AtUdPT2ltFWaRIE5VE,126
12
12
  pyjelly/jelly/rdf_pb2.py,sha256=L_fPtDaURFCpLIMqVdl4RwiWyVgEFOwtB4-If3MpoSg,8952
13
13
  pyjelly/jelly/rdf_pb2.pyi,sha256=-Vv2HlpUWhaKPEb0YXOTx21cIKoqoBmTY8U6HPMUcLw,11789
14
14
  pyjelly/parse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- pyjelly/parse/decode.py,sha256=7yfpZ4w8HeXTqWyDROsqfXSaZcCsdnYNhYoRtxRaWEs,8455
16
- pyjelly/parse/ioutils.py,sha256=FnQNPiDAWLk0IXxUkmVxjsVEjC1y-dBTKKk6lf224SM,2747
15
+ pyjelly/parse/decode.py,sha256=LJHWBXqYP0Bk4_3FIxeTNXPCRVbWMU3YzFObrzvcIzE,8910
16
+ pyjelly/parse/ioutils.py,sha256=m7Kxqw0lAhM7swFuDWPRBE3ngjkiYRVUoM2O1QOVyT4,2720
17
17
  pyjelly/parse/lookup.py,sha256=1AbdZEycLC4tRfh3fgF5hv5PrhwhdWvCUC53iHt-E4c,2193
18
18
  pyjelly/serialize/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- pyjelly/serialize/encode.py,sha256=ev8Z7B-ptvch1Xu173lDO3JW3egW-QyzOngDfLDzKsI,6548
20
- pyjelly/serialize/flows.py,sha256=vezvYeYYumEH0IceebogW4QwM2d1GeOv1yASrJWTTHc,2665
19
+ pyjelly/serialize/encode.py,sha256=U3B2VkEO3J0bABTF18rvC8FCzqsi4Xo1RqtHCMbaub0,6485
20
+ pyjelly/serialize/flows.py,sha256=mCGJnR7UyuAPn4EArXaBt3llpZ3_iwLq90bQwRTbhpg,3933
21
21
  pyjelly/serialize/ioutils.py,sha256=2_NaadLfHO3jKR1ZV7aK6jQ09sPKBar9iLFHYwourz8,400
22
- pyjelly/serialize/lookup.py,sha256=vH21uzs7gvjk-Yc0hoSC3_LPsVff86YHuUhikP9djYo,4047
23
- pyjelly/serialize/streams.py,sha256=0csixgSnGprXhoHOoVimtoPFgt4mqU4Lgv_l6d0EW6g,4247
24
- pyjelly-0.1.0.dist-info/METADATA,sha256=Kh6HOcK8aLfkSyszqaKOejBbqaI1JrR1gzJ85xfWlr8,348
25
- pyjelly-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
- pyjelly-0.1.0.dist-info/entry_points.txt,sha256=kUG0p9zso7HpitdMaQaXEj_KSqgOGsL0Ky9ARbecN1g,339
27
- pyjelly-0.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
28
- pyjelly-0.1.0.dist-info/RECORD,,
22
+ pyjelly/serialize/lookup.py,sha256=h0lYFjdB6CIuN2DzAW6EE4ILJFUuto3paAK6DG1DZYg,4091
23
+ pyjelly/serialize/streams.py,sha256=JiYHZc2YkyWuSspsLBybGimBpTOPZJSrzC8fRVYQ2p8,6117
24
+ pyjelly-0.2.1.dist-info/METADATA,sha256=L-9nN34XpAcL3WHsLg1yZIW40DSif9iw1WC4x-sILrw,4585
25
+ pyjelly-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
+ pyjelly-0.2.1.dist-info/entry_points.txt,sha256=kUG0p9zso7HpitdMaQaXEj_KSqgOGsL0Ky9ARbecN1g,339
27
+ pyjelly-0.2.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
28
+ pyjelly-0.2.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: pyjelly
3
- Version: 0.1.0
4
- Summary: Jelly RDF implementation for Python
5
- Author-email: Bartosz Sławecki <bartosz@neverblink.eu>, Anastasiya Danilenka <anastasiya@neverblink.eu>
6
- License-File: LICENSE
7
- Requires-Python: >=3.9
8
- Requires-Dist: protobuf>=5.29.3
9
- Requires-Dist: rdflib>=7.1.4
10
- Requires-Dist: typing-extensions>=4.12.2