pyjelly 0.7.1__cp311-cp311-macosx_11_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so +0 -0
  2. pyjelly/__init__.py +0 -0
  3. pyjelly/_proto/grpc.proto +33 -0
  4. pyjelly/_proto/patch.proto +165 -0
  5. pyjelly/_proto/rdf.proto +384 -0
  6. pyjelly/errors.py +10 -0
  7. pyjelly/integrations/__init__.py +0 -0
  8. pyjelly/integrations/generic/__init__.py +0 -0
  9. pyjelly/integrations/generic/generic_sink.py +202 -0
  10. pyjelly/integrations/generic/parse.py +412 -0
  11. pyjelly/integrations/generic/serialize.cpython-311-darwin.so +0 -0
  12. pyjelly/integrations/generic/serialize.py +402 -0
  13. pyjelly/integrations/rdflib/__init__.py +24 -0
  14. pyjelly/integrations/rdflib/parse.py +560 -0
  15. pyjelly/integrations/rdflib/serialize.py +408 -0
  16. pyjelly/jelly/__init__.py +5 -0
  17. pyjelly/jelly/rdf_pb2.py +70 -0
  18. pyjelly/jelly/rdf_pb2.pyi +231 -0
  19. pyjelly/options.py +141 -0
  20. pyjelly/parse/__init__.py +0 -0
  21. pyjelly/parse/decode.cpython-311-darwin.so +0 -0
  22. pyjelly/parse/decode.py +447 -0
  23. pyjelly/parse/ioutils.cpython-311-darwin.so +0 -0
  24. pyjelly/parse/ioutils.py +115 -0
  25. pyjelly/parse/lookup.cpython-311-darwin.so +0 -0
  26. pyjelly/parse/lookup.py +70 -0
  27. pyjelly/serialize/__init__.py +0 -0
  28. pyjelly/serialize/encode.cpython-311-darwin.so +0 -0
  29. pyjelly/serialize/encode.py +397 -0
  30. pyjelly/serialize/flows.py +196 -0
  31. pyjelly/serialize/ioutils.cpython-311-darwin.so +0 -0
  32. pyjelly/serialize/ioutils.py +13 -0
  33. pyjelly/serialize/lookup.cpython-311-darwin.so +0 -0
  34. pyjelly/serialize/lookup.py +137 -0
  35. pyjelly/serialize/streams.cpython-311-darwin.so +0 -0
  36. pyjelly/serialize/streams.py +281 -0
  37. pyjelly-0.7.1.dist-info/METADATA +114 -0
  38. pyjelly-0.7.1.dist-info/RECORD +41 -0
  39. pyjelly-0.7.1.dist-info/WHEEL +6 -0
  40. pyjelly-0.7.1.dist-info/entry_points.txt +7 -0
  41. pyjelly-0.7.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,402 @@
1
+ # ruff: noqa: I001
2
+ from __future__ import annotations
3
+ from typing import cast
4
+ from collections.abc import Generator
5
+ from functools import singledispatch
6
+ from typing import Any, IO
7
+ from itertools import chain
8
+ from pyjelly.options import StreamParameters
9
+ from pyjelly.integrations.generic.generic_sink import (
10
+ GenericStatementSink,
11
+ Quad,
12
+ Triple,
13
+ DefaultGraph,
14
+ GraphName,
15
+ IRI,
16
+ BlankNode,
17
+ Literal,
18
+ )
19
+
20
+ from pyjelly import jelly
21
+ from pyjelly.serialize.encode import Rows, Slot, TermEncoder, HasGraph, Statement
22
+ from pyjelly.serialize.ioutils import write_delimited
23
+ from pyjelly.serialize.streams import (
24
+ GraphStream,
25
+ QuadStream,
26
+ SerializerOptions,
27
+ Stream,
28
+ TripleStream,
29
+ ) # ruff: enable
30
+
31
+ QUAD_ARITY = 4
32
+
33
+
34
+ class GenericSinkTermEncoder(TermEncoder):
35
+ def encode_spo(self, term: object, slot: Slot, statement: Statement) -> Rows:
36
+ """
37
+ Encode term based on its GenericSink object.
38
+
39
+ Args:
40
+ term (object): term to encode
41
+ slot (Slot): its place in statement.
42
+ statement (Statement): Triple/Quad/GraphStart message to fill with terms.
43
+
44
+ Returns:
45
+ Rows: encoded extra rows
46
+
47
+ """
48
+ if isinstance(term, IRI):
49
+ iri = self.get_iri_field(statement, slot)
50
+ return self.encode_iri(term._iri, iri)
51
+
52
+ if isinstance(term, Literal):
53
+ literal = self.get_literal_field(statement, slot)
54
+ return self.encode_literal(
55
+ lex=term._lex,
56
+ language=term._langtag,
57
+ datatype=term._datatype,
58
+ literal=literal,
59
+ )
60
+
61
+ if isinstance(term, BlankNode):
62
+ self.set_bnode_field(
63
+ statement,
64
+ slot,
65
+ term._identifier,
66
+ )
67
+ return ()
68
+
69
+ if isinstance(term, Triple):
70
+ quoted_statement = self.get_triple_field(statement, slot)
71
+ return self.encode_quoted_triple(term, quoted_statement)
72
+
73
+ return super().encode_spo(term, slot, statement) # error if not handled
74
+
75
+ def encode_graph(self, term: object, statement: HasGraph) -> Rows:
76
+ """
77
+ Encode graph term based on its GenericSink object.
78
+
79
+ Args:
80
+ term (object): term to encode
81
+ statement (HasGraph): Quad/GraphStart message to fill g_{} in.
82
+
83
+ Returns:
84
+ Rows: encoded extra rows
85
+
86
+ """
87
+ if term == DefaultGraph:
88
+ return self.encode_default_graph(statement.g_default_graph)
89
+ if isinstance(term, IRI):
90
+ return self.encode_iri(term._iri, statement.g_iri)
91
+
92
+ if isinstance(term, Literal):
93
+ return self.encode_literal(
94
+ lex=term._lex,
95
+ language=term._langtag,
96
+ datatype=term._datatype,
97
+ literal=statement.g_literal,
98
+ )
99
+
100
+ if isinstance(term, BlankNode):
101
+ statement.g_bnode = term._identifier
102
+ return ()
103
+ return super().encode_graph(term, statement) # error if not handled
104
+
105
+
106
+ def namespace_declarations(store: GenericStatementSink, stream: Stream) -> None:
107
+ for prefix, namespace in store.namespaces:
108
+ stream.namespace_declaration(name=prefix, iri=str(namespace))
109
+
110
+
111
+ @singledispatch
112
+ def stream_frames(
113
+ stream: Stream,
114
+ data: GenericStatementSink | Generator[Quad | Triple], # noqa: ARG001
115
+ ) -> Generator[jelly.RdfStreamFrame]:
116
+ msg = f"invalid stream implementation {stream}"
117
+ raise TypeError(msg)
118
+
119
+
120
+ @stream_frames.register(TripleStream)
121
+ def triples_stream_frames(
122
+ stream: TripleStream,
123
+ data: GenericStatementSink | Generator[Triple],
124
+ ) -> Generator[jelly.RdfStreamFrame]:
125
+ """
126
+ Serialize a GenericStatementSink into frames using physical type triples stream.
127
+
128
+ Args:
129
+ stream (TripleStream): stream that specifies triples processing
130
+ data (GenericStatementSink | Generator[Triple]):
131
+ GenericStatementSink/Statements to serialize.
132
+
133
+ Yields:
134
+ Generator[jelly.RdfStreamFrame]: jelly frames.
135
+
136
+ """
137
+ stream.enroll()
138
+ if (
139
+ isinstance(data, GenericStatementSink)
140
+ and stream.options.params.namespace_declarations
141
+ ):
142
+ namespace_declarations(data, stream)
143
+
144
+ graphs = (data,)
145
+ for graph in graphs:
146
+ for terms in graph:
147
+ if frame := stream.triple(terms):
148
+ yield frame
149
+ if frame := stream.flow.frame_from_graph():
150
+ yield frame
151
+ if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
152
+ yield frame
153
+
154
+
155
+ @stream_frames.register(QuadStream)
156
+ def quads_stream_frames(
157
+ stream: QuadStream,
158
+ data: GenericStatementSink | Generator[Quad],
159
+ ) -> Generator[jelly.RdfStreamFrame]:
160
+ """
161
+ Serialize a GenericStatementSink into jelly frames using physical type quads stream.
162
+
163
+ Args:
164
+ stream (QuadStream): stream that specifies quads processing
165
+ data (GenericStatementSink | Generator[Quad]): Dataset to serialize.
166
+
167
+ Yields:
168
+ Generator[jelly.RdfStreamFrame]: jelly frames
169
+
170
+ """
171
+ stream.enroll()
172
+ if stream.options.params.namespace_declarations:
173
+ namespace_declarations(data, stream) # type: ignore[arg-type]
174
+
175
+ iterator: Generator[Quad]
176
+ if isinstance(data, GenericStatementSink):
177
+ iterator = cast(Generator[Quad], data.store)
178
+ else:
179
+ iterator = data
180
+
181
+ for terms in iterator:
182
+ if frame := stream.quad(terms):
183
+ yield frame
184
+ if frame := stream.flow.frame_from_dataset():
185
+ yield frame
186
+ if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
187
+ yield frame
188
+
189
+
190
+ @stream_frames.register(GraphStream)
191
+ def graphs_stream_frames(
192
+ stream: GraphStream,
193
+ data: GenericStatementSink | Generator[Quad],
194
+ ) -> Generator[jelly.RdfStreamFrame]:
195
+ """
196
+ Serialize a GenericStatementSink into jelly frames as a stream of graphs.
197
+
198
+ Notes:
199
+ If flow of DatasetsFrameFlow type, the whole dataset
200
+ will be encoded into one frame.
201
+ Graphs are generated from the GenericStatementSink by
202
+ iterating over statements and yielding one new GenericStatementSink
203
+ per a sequence of quads with the same g term.
204
+
205
+ Args:
206
+ stream (GraphStream): stream that specifies graphs processing
207
+ data (GenericStatementSink | Generator[Quad]): Dataset to serialize.
208
+
209
+ Yields:
210
+ Generator[jelly.RdfStreamFrame]: jelly frames
211
+
212
+ """
213
+ stream.enroll()
214
+ if stream.options.params.namespace_declarations:
215
+ namespace_declarations(data, stream) # type: ignore[arg-type]
216
+
217
+ statements: Generator[Quad]
218
+ if isinstance(data, GenericStatementSink):
219
+ statements = cast(Generator[Quad], data.store)
220
+ graphs = split_to_graphs(statements)
221
+ elif iter(data):
222
+ statements = data
223
+ graphs = split_to_graphs(statements)
224
+
225
+ for graph in graphs:
226
+ yield from stream.graph(graph_id=graph.identifier, graph=graph)
227
+
228
+ if frame := stream.flow.frame_from_dataset():
229
+ yield frame
230
+ if stream.stream_types.flat and (frame := stream.flow.to_stream_frame()):
231
+ yield frame
232
+
233
+
234
+ def split_to_graphs(data: Generator[Quad]) -> Generator[GenericStatementSink]:
235
+ """
236
+ Split a generator of quads to graphs.
237
+
238
+ Notes:
239
+ New graph is generated by
240
+ iterating over statements and yielding one new GenericStatementSink
241
+ per a sequence of quads with the same g term.
242
+
243
+ Args:
244
+ data (Generator[Quad]): generator of quads
245
+
246
+ Yields:
247
+ Generator[GenericStatementSink]: generator of GenericStatementSinks,
248
+ each having triples in store and identifier set.
249
+
250
+ """
251
+ current_g: GraphName | None = None
252
+ current_sink: GenericStatementSink | None = None
253
+ for statement in data:
254
+ if current_g != statement.g:
255
+ if current_sink is not None:
256
+ yield current_sink
257
+
258
+ current_g = statement.g
259
+ current_sink = GenericStatementSink(identifier=current_g)
260
+
261
+ assert current_sink is not None
262
+ current_sink.add(Triple(statement.s, statement.p, statement.o))
263
+
264
+ if current_sink is not None:
265
+ yield current_sink
266
+
267
+
268
+ def guess_options(sink: GenericStatementSink) -> SerializerOptions:
269
+ """Guess the serializer options based on the store type."""
270
+ logical_type = (
271
+ jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES
272
+ if sink.is_triples_sink
273
+ else jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS
274
+ )
275
+ # Generic sink supports both RDF-star and generalized statements by default
276
+ # as it can handle any term types including quoted triples and generalized RDF terms
277
+ params = StreamParameters(generalized_statements=True, rdf_star=True)
278
+ return SerializerOptions(logical_type=logical_type, params=params)
279
+
280
+
281
+ def guess_stream(options: SerializerOptions, sink: GenericStatementSink) -> Stream:
282
+ """
283
+ Return an appropriate stream implementation for the given options.
284
+
285
+ Notes: if base(!) logical type is GRAPHS and sink.is_triples_sink is false,
286
+ initializes TripleStream
287
+ """
288
+ stream_cls: type[Stream]
289
+ if (
290
+ options.logical_type % 10
291
+ ) != jelly.LOGICAL_STREAM_TYPE_GRAPHS and not sink.is_triples_sink:
292
+ stream_cls = QuadStream
293
+ else:
294
+ stream_cls = TripleStream
295
+ if options is not None:
296
+ lookup_preset = options.lookup_preset
297
+ return stream_cls(
298
+ encoder=GenericSinkTermEncoder(lookup_preset=lookup_preset),
299
+ options=options,
300
+ )
301
+
302
+
303
+ def grouped_stream_to_frames(
304
+ sink_generator: Generator[GenericStatementSink],
305
+ options: SerializerOptions | None = None,
306
+ ) -> Generator[jelly.RdfStreamFrame]:
307
+ """
308
+ Transform multiple GenericStatementSinks into Jelly frames.
309
+
310
+ Notes:
311
+ One frame per GenericStatementSink.
312
+
313
+ Note: options are guessed if not provided.
314
+
315
+ Args:
316
+ sink_generator (Generator[GenericStatementSink]): Generator of
317
+ GenericStatementSink to transform.
318
+ options (SerializerOptions | None, optional): stream options to use.
319
+ Options are guessed based on the sink store type. Defaults to None.
320
+
321
+ Yields:
322
+ Generator[jelly.RdfStreamFrame]: produced Jelly frames
323
+
324
+ """
325
+ stream = None
326
+ for sink in sink_generator:
327
+ if not stream:
328
+ if options is None:
329
+ options = guess_options(sink)
330
+ stream = guess_stream(options, sink)
331
+ yield from stream_frames(stream, sink)
332
+
333
+
334
+ def grouped_stream_to_file(
335
+ stream: Generator[GenericStatementSink],
336
+ output_file: IO[bytes],
337
+ **kwargs: Any,
338
+ ) -> None:
339
+ """
340
+ Write stream of GenericStatementSink to a binary file.
341
+
342
+ Args:
343
+ stream (Generator[GenericStatementSink]): Generator of
344
+ GenericStatementSink to serialize.
345
+ output_file (IO[bytes]): output buffered writer.
346
+ **kwargs (Any): options to pass to stream.
347
+
348
+ """
349
+ for frame in grouped_stream_to_frames(stream, **kwargs):
350
+ write_delimited(frame, output_file)
351
+
352
+
353
+ def flat_stream_to_frames(
354
+ statements: Generator[Triple | Quad],
355
+ options: SerializerOptions | None = None,
356
+ ) -> Generator[jelly.RdfStreamFrame]:
357
+ """
358
+ Serialize a stream of raw GenericStatementSink's triples or quads into Jelly frames.
359
+
360
+ Args:
361
+ statements (Generator[Triple | Quad]):
362
+ s/p/o triples or s/p/o/g quads to serialize.
363
+ options (SerializerOptions | None, optional):
364
+ if omitted, guessed based on the first tuple.
365
+
366
+ Yields:
367
+ Generator[jelly.RdfStreamFrame]: generated frames.
368
+
369
+ """
370
+ first = next(statements, None)
371
+ if first is None:
372
+ return
373
+
374
+ sink = GenericStatementSink()
375
+ sink.add(first)
376
+ if options is None:
377
+ options = guess_options(sink)
378
+ stream = guess_stream(options, sink)
379
+
380
+ combined: Generator[Triple | Quad] | GenericStatementSink = (
381
+ item for item in chain([first], statements)
382
+ )
383
+
384
+ yield from stream_frames(stream, combined)
385
+
386
+
387
+ def flat_stream_to_file(
388
+ statements: Generator[Triple | Quad],
389
+ output_file: IO[bytes],
390
+ options: SerializerOptions | None = None,
391
+ ) -> None:
392
+ """
393
+ Write Triple or Quad events to a binary file.
394
+
395
+ Args:
396
+ statements (Generator[Triple | Quad]): statements to serialize.
397
+ output_file (IO[bytes]): output buffered writer.
398
+ options (SerializerOptions | None, optional): stream options.
399
+
400
+ """
401
+ for frame in flat_stream_to_frames(statements, options):
402
+ write_delimited(frame, output_file)
@@ -0,0 +1,24 @@
1
+ import rdflib
2
+ import rdflib.util
3
+
4
+ from pyjelly import options
5
+
6
+
7
+ def register_extension_to_rdflib(extension: str = ".jelly") -> None:
8
+ """
9
+ Make [rdflib.util.guess_format][] discover Jelly format.
10
+
11
+ >>> rdflib.util.guess_format("foo.jelly")
12
+ >>> register_extension_to_rdflib()
13
+ >>> rdflib.util.guess_format("foo.jelly")
14
+ 'jelly'
15
+ """
16
+ rdflib.util.SUFFIX_FORMAT_MAP[extension.removeprefix(".")] = "jelly"
17
+
18
+
19
+ def _side_effects() -> None:
20
+ register_extension_to_rdflib()
21
+
22
+
23
+ if options.INTEGRATION_SIDE_EFFECTS:
24
+ _side_effects()