pyjelly 0.7.1__cp311-cp311-macosx_11_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so +0 -0
  2. pyjelly/__init__.py +0 -0
  3. pyjelly/_proto/grpc.proto +33 -0
  4. pyjelly/_proto/patch.proto +165 -0
  5. pyjelly/_proto/rdf.proto +384 -0
  6. pyjelly/errors.py +10 -0
  7. pyjelly/integrations/__init__.py +0 -0
  8. pyjelly/integrations/generic/__init__.py +0 -0
  9. pyjelly/integrations/generic/generic_sink.py +202 -0
  10. pyjelly/integrations/generic/parse.py +412 -0
  11. pyjelly/integrations/generic/serialize.cpython-311-darwin.so +0 -0
  12. pyjelly/integrations/generic/serialize.py +402 -0
  13. pyjelly/integrations/rdflib/__init__.py +24 -0
  14. pyjelly/integrations/rdflib/parse.py +560 -0
  15. pyjelly/integrations/rdflib/serialize.py +408 -0
  16. pyjelly/jelly/__init__.py +5 -0
  17. pyjelly/jelly/rdf_pb2.py +70 -0
  18. pyjelly/jelly/rdf_pb2.pyi +231 -0
  19. pyjelly/options.py +141 -0
  20. pyjelly/parse/__init__.py +0 -0
  21. pyjelly/parse/decode.cpython-311-darwin.so +0 -0
  22. pyjelly/parse/decode.py +447 -0
  23. pyjelly/parse/ioutils.cpython-311-darwin.so +0 -0
  24. pyjelly/parse/ioutils.py +115 -0
  25. pyjelly/parse/lookup.cpython-311-darwin.so +0 -0
  26. pyjelly/parse/lookup.py +70 -0
  27. pyjelly/serialize/__init__.py +0 -0
  28. pyjelly/serialize/encode.cpython-311-darwin.so +0 -0
  29. pyjelly/serialize/encode.py +397 -0
  30. pyjelly/serialize/flows.py +196 -0
  31. pyjelly/serialize/ioutils.cpython-311-darwin.so +0 -0
  32. pyjelly/serialize/ioutils.py +13 -0
  33. pyjelly/serialize/lookup.cpython-311-darwin.so +0 -0
  34. pyjelly/serialize/lookup.py +137 -0
  35. pyjelly/serialize/streams.cpython-311-darwin.so +0 -0
  36. pyjelly/serialize/streams.py +281 -0
  37. pyjelly-0.7.1.dist-info/METADATA +114 -0
  38. pyjelly-0.7.1.dist-info/RECORD +41 -0
  39. pyjelly-0.7.1.dist-info/WHEEL +6 -0
  40. pyjelly-0.7.1.dist-info/entry_points.txt +7 -0
  41. pyjelly-0.7.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,202 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import deque
4
+ from collections.abc import Generator
5
+ from typing import IO, NamedTuple, Union
6
+
7
+
8
+ class _DefaultGraph:
9
+ def __repr__(self) -> str:
10
+ return ""
11
+
12
+
13
+ DefaultGraph = _DefaultGraph()
14
+
15
+
16
+ class BlankNode:
17
+ """Class for blank nodes, storing BN's identifier as a string."""
18
+
19
+ def __init__(self, identifier: str) -> None:
20
+ self._identifier: str = identifier
21
+
22
+ def __str__(self) -> str:
23
+ return f"_:{self._identifier}"
24
+
25
+ def __repr__(self) -> str:
26
+ return f"BlankNode(identifier={self._identifier})"
27
+
28
+ def __eq__(self, other: object) -> bool:
29
+ if isinstance(other, BlankNode):
30
+ return self._identifier == other._identifier
31
+ return False
32
+
33
+ def __hash__(self) -> int:
34
+ return hash(self._identifier)
35
+
36
+
37
+ class IRI:
38
+ """Class for IRIs, storing IRI as a string."""
39
+
40
+ def __init__(self, iri: str) -> None:
41
+ self._iri: str = iri
42
+
43
+ def __str__(self) -> str:
44
+ return f"<{self._iri}>"
45
+
46
+ def __repr__(self) -> str:
47
+ return f"IRI({self._iri})"
48
+
49
+ def __eq__(self, other: object) -> bool:
50
+ if isinstance(other, IRI):
51
+ return self._iri == other._iri
52
+ return False
53
+
54
+ def __hash__(self) -> int:
55
+ return hash(self._iri)
56
+
57
+
58
+ class Literal:
59
+ """
60
+ Class for literals.
61
+
62
+ Notes:
63
+ Consists of: lexical form, and optional language tag and datatype.
64
+ All parts of literal are stored as strings.
65
+
66
+ """
67
+
68
+ def __init__(
69
+ self, lex: str, langtag: str | None = None, datatype: str | None = None
70
+ ) -> None:
71
+ self._lex: str = lex
72
+ self._langtag: str | None = langtag
73
+ self._datatype: str | None = datatype
74
+
75
+ def __str__(self) -> str:
76
+ suffix = ""
77
+ if self._langtag:
78
+ suffix = f"@{self._langtag}"
79
+ elif self._datatype:
80
+ suffix = f"^^<{self._datatype}>"
81
+ return f'"{self._lex}"{suffix}'
82
+
83
+ def __repr__(self) -> str:
84
+ return (
85
+ f"Literal({self._lex!r}, langtag={self._langtag!r}, "
86
+ f"datatype={self._datatype!r})"
87
+ )
88
+
89
+ def __eq__(self, other: object) -> bool:
90
+ if isinstance(other, Literal):
91
+ return (
92
+ self._lex == other._lex
93
+ and self._langtag == other._langtag
94
+ and self._datatype == other._datatype
95
+ )
96
+ return False
97
+
98
+ def __hash__(self) -> int:
99
+ return hash((self._lex, self._langtag, self._datatype))
100
+
101
+
102
+ Node = Union[BlankNode, IRI, Literal, "Triple"]
103
+ GraphName = Node | _DefaultGraph
104
+
105
+
106
+ TRIPLE_ARITY = 3
107
+
108
+
109
+ class Triple(NamedTuple):
110
+ """Class for RDF triples."""
111
+
112
+ s: Node
113
+ p: Node
114
+ o: Node
115
+
116
+
117
+ class Quad(NamedTuple):
118
+ """Class for RDF quads."""
119
+
120
+ s: Node
121
+ p: Node
122
+ o: Node
123
+ g: GraphName
124
+
125
+
126
+ class Prefix(NamedTuple):
127
+ """Class for generic namespace declaration."""
128
+
129
+ prefix: str
130
+ iri: IRI
131
+
132
+
133
+ class GenericStatementSink:
134
+ _store: deque[Triple | Quad]
135
+
136
+ def __init__(self, identifier: GraphName = DefaultGraph) -> None:
137
+ """
138
+ Initialize statements storage, namespaces dictionary, and parser.
139
+
140
+ Notes:
141
+ _store preserves the order of statements.
142
+
143
+ Args:
144
+ identifier (str, optional): Identifier for a sink.
145
+ Defaults to DefaultGraph.
146
+
147
+ """
148
+ self._store: deque[Triple | Quad] = deque()
149
+ self._namespaces: dict[str, IRI] = {}
150
+ self._identifier = identifier
151
+
152
+ def add(self, statement: Triple | Quad) -> None:
153
+ self._store.append(statement)
154
+
155
+ def bind(self, prefix: str, namespace: IRI) -> None:
156
+ self._namespaces.update({prefix: namespace})
157
+
158
+ def __iter__(self) -> Generator[Triple | Quad]:
159
+ yield from self._store
160
+
161
+ def __len__(self) -> int:
162
+ return len(self._store)
163
+
164
+ @property
165
+ def namespaces(self) -> Generator[tuple[str, IRI]]:
166
+ yield from self._namespaces.items()
167
+
168
+ @property
169
+ def identifier(self) -> GraphName:
170
+ return self._identifier
171
+
172
+ @property
173
+ def store(self) -> Generator[Triple | Quad]:
174
+ yield from self._store
175
+
176
+ @property
177
+ def is_triples_sink(self) -> bool:
178
+ """
179
+ Check if the sink contains triples or quads.
180
+
181
+ Returns:
182
+ bool: true, if length of statement is 3.
183
+
184
+ """
185
+ return bool(self._store) and len(self._store[0]) == TRIPLE_ARITY
186
+
187
+ def parse(self, input_file: IO[bytes]) -> None:
188
+ from pyjelly.integrations.generic.parse import ( # noqa: PLC0415
189
+ parse_jelly_to_graph,
190
+ )
191
+
192
+ parsed_result = parse_jelly_to_graph(input_file)
193
+ self._store = parsed_result._store
194
+ self._namespaces = parsed_result._namespaces
195
+ self._identifier = parsed_result._identifier
196
+
197
+ def serialize(self, output_file: IO[bytes]) -> None:
198
+ from pyjelly.integrations.generic.serialize import ( # noqa: PLC0415
199
+ grouped_stream_to_file,
200
+ )
201
+
202
+ grouped_stream_to_file((sink for sink in [self]), output_file)
@@ -0,0 +1,412 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Generator, Iterable, MutableMapping
4
+ from contextvars import ContextVar
5
+ from itertools import chain
6
+ from typing import IO, Any
7
+ from typing_extensions import override
8
+
9
+ from mypy_extensions import mypyc_attr
10
+
11
+ from pyjelly import jelly
12
+ from pyjelly.errors import JellyConformanceError
13
+ from pyjelly.integrations.generic.generic_sink import (
14
+ IRI,
15
+ BlankNode,
16
+ DefaultGraph,
17
+ GenericStatementSink,
18
+ GraphName,
19
+ Literal,
20
+ Prefix,
21
+ Quad,
22
+ Triple,
23
+ )
24
+ from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
25
+ from pyjelly.parse.ioutils import get_options_and_frames
26
+
27
+ Statement = Triple | Quad
28
+
29
+
30
+ @mypyc_attr(allow_interpreted_subclasses=True)
31
+ class GenericStatementSinkAdapter(Adapter):
32
+ """
33
+ Implement Adapter for generic statements.
34
+
35
+ Notes:
36
+ Returns custom RDF terms expected by GenericStatementSink,
37
+ handles namespace declarations, and quoted triples.
38
+
39
+ Args:
40
+ Adapter (_type_): base Adapter class
41
+
42
+ """
43
+
44
+ @override
45
+ def iri(self, iri: str) -> IRI:
46
+ return IRI(iri)
47
+
48
+ @override
49
+ def bnode(self, bnode: str) -> BlankNode:
50
+ return BlankNode(bnode)
51
+
52
+ @override
53
+ def default_graph(self) -> GraphName:
54
+ return DefaultGraph
55
+
56
+ @override
57
+ def literal(
58
+ self,
59
+ lex: str,
60
+ language: str | None = None,
61
+ datatype: str | None = None,
62
+ ) -> Literal:
63
+ return Literal(lex, language, datatype)
64
+
65
+ @override
66
+ def namespace_declaration(self, name: str, iri: str) -> Prefix:
67
+ return Prefix(name, self.iri(iri))
68
+
69
+ @override
70
+ def quoted_triple(self, terms: Iterable[Any]) -> Triple:
71
+ return Triple(*terms)
72
+
73
+
74
+ @mypyc_attr(allow_interpreted_subclasses=True)
75
+ class GenericTriplesAdapter(GenericStatementSinkAdapter):
76
+ """
77
+ Triples adapted implementation for GenericStatementSink.
78
+
79
+ Args:
80
+ GenericStatementSinkAdapter (_type_): base GenericStatementSink
81
+ adapter implementation that handles terms and namespaces.
82
+
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ options: ParserOptions,
88
+ ) -> None:
89
+ super().__init__(options=options)
90
+
91
+ @override
92
+ def triple(self, terms: Iterable[Any]) -> Triple:
93
+ return Triple(*terms)
94
+
95
+
96
+ @mypyc_attr(allow_interpreted_subclasses=True)
97
+ class GenericQuadsBaseAdapter(GenericStatementSinkAdapter):
98
+ def __init__(self, options: ParserOptions) -> None:
99
+ super().__init__(options=options)
100
+
101
+
102
+ @mypyc_attr(allow_interpreted_subclasses=True)
103
+ class GenericQuadsAdapter(GenericQuadsBaseAdapter):
104
+ """
105
+ Extends GenericQuadsBaseAdapter for QUADS physical type.
106
+
107
+ Args:
108
+ GenericQuadsBaseAdapter (_type_): quads adapter that handles
109
+ base quads processing.
110
+
111
+ """
112
+
113
+ @override
114
+ def quad(self, terms: Iterable[Any]) -> Quad:
115
+ return Quad(*terms)
116
+
117
+
118
+ @mypyc_attr(allow_interpreted_subclasses=True)
119
+ class GenericGraphsAdapter(GenericQuadsBaseAdapter):
120
+ """
121
+ Extends GenericQuadsBaseAdapter for GRAPHS physical type.
122
+
123
+ Notes:
124
+ introduces graph start/end, checks if graph exists.
125
+
126
+ Args:
127
+ GenericQuadsBaseAdapter (_type_): quads adapter that handles
128
+ base quads processing.
129
+
130
+ Raises:
131
+ JellyConformanceError: raised if graph start message was not received.
132
+
133
+ """
134
+
135
+ _graph_id: GraphName | None
136
+
137
+ def __init__(
138
+ self,
139
+ options: ParserOptions,
140
+ ) -> None:
141
+ super().__init__(options=options)
142
+ self._graph_id = None
143
+
144
+ @property
145
+ def graph(self) -> None:
146
+ if self._graph_id is None:
147
+ msg = "new graph was not started"
148
+ raise JellyConformanceError(msg)
149
+
150
+ @override
151
+ def graph_start(self, graph_id: GraphName) -> None:
152
+ self._graph_id = graph_id
153
+
154
+ @override
155
+ def triple(self, terms: Iterable[Any]) -> Quad:
156
+ return Quad(*chain(terms, [self._graph_id]))
157
+
158
+ @override
159
+ def graph_end(self) -> None:
160
+ self._graph_id = None
161
+
162
+
163
+ def parse_triples_stream(
164
+ frames: Iterable[jelly.RdfStreamFrame],
165
+ options: ParserOptions,
166
+ frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
167
+ ) -> Generator[Iterable[Triple | Prefix]]:
168
+ """
169
+ Parse flat triple stream.
170
+
171
+ Args:
172
+ frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
173
+ options (ParserOptions): stream options
174
+ frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
175
+ used for extracting frame metadata
176
+
177
+ Yields:
178
+ Generator[Iterable[Triple | Prefix]]:
179
+ Generator of iterables of Triple or Prefix objects,
180
+ one iterable per frame.
181
+
182
+ """
183
+ adapter = GenericTriplesAdapter(options)
184
+ decoder = Decoder(adapter=adapter)
185
+ for frame in frames:
186
+ if frame_metadata is not None:
187
+ frame_metadata.set(
188
+ frame.metadata
189
+ ) if frame.metadata else frame_metadata.set({})
190
+ yield decoder.iter_rows(frame)
191
+ return
192
+
193
+
194
+ def parse_quads_stream(
195
+ frames: Iterable[jelly.RdfStreamFrame],
196
+ options: ParserOptions,
197
+ frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
198
+ ) -> Generator[Iterable[Quad | Prefix]]:
199
+ """
200
+ Parse flat quads stream.
201
+
202
+ Args:
203
+ frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
204
+ options (ParserOptions): stream options
205
+ frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
206
+ used for extracting frame metadata
207
+
208
+ Yields:
209
+ Generator[Iterable[Quad | Prefix]]:
210
+ Generator of iterables of Quad or Prefix objects,
211
+ one iterable per frame.
212
+
213
+ """
214
+ adapter_class: type[GenericQuadsBaseAdapter]
215
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_QUADS:
216
+ adapter_class = GenericQuadsAdapter
217
+ else:
218
+ adapter_class = GenericGraphsAdapter
219
+ adapter = adapter_class(options=options)
220
+ decoder = Decoder(adapter=adapter)
221
+ for frame in frames:
222
+ if frame_metadata is not None:
223
+ frame_metadata.set(
224
+ frame.metadata
225
+ ) if frame.metadata else frame_metadata.set({})
226
+ yield decoder.iter_rows(frame)
227
+ return
228
+
229
+
230
+ def parse_jelly_grouped(
231
+ inp: IO[bytes],
232
+ sink_factory: Callable[[], GenericStatementSink] = lambda: GenericStatementSink(),
233
+ *,
234
+ logical_type_strict: bool = False,
235
+ frame_metadata: ContextVar[MutableMapping[str, bytes]] | None = None,
236
+ ) -> Generator[GenericStatementSink]:
237
+ """
238
+ Take a jelly file and return generators of generic statements sinks.
239
+
240
+ Yields one generic statements sink per frame.
241
+
242
+ Args:
243
+ inp (IO[bytes]): input jelly buffered binary stream
244
+ sink_factory (Callable): lambda to construct a statement sink.
245
+ By default, creates an empty in-memory GenericStatementSink.
246
+ logical_type_strict (bool): If True, validate the *logical* type
247
+ in stream options and require a grouped logical type.
248
+ Otherwise, only the physical type is used to route parsing.
249
+ frame_metadata: (ContextVar[ScalarMap[str, bytes]]): context variable
250
+ used for extracting frame metadata
251
+
252
+ Raises:
253
+ NotImplementedError: is raised if a physical type is not implemented
254
+
255
+ Yields:
256
+ Generator[GenericStatementSink]:
257
+ returns generators for GenericStatementSink, regardless of stream type.
258
+
259
+ """
260
+ options, frames = get_options_and_frames(inp)
261
+
262
+ st = getattr(options, "stream_types", None)
263
+ if logical_type_strict and (
264
+ st is None
265
+ or st.logical_type == jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
266
+ or st.flat
267
+ ):
268
+ lt_name = (
269
+ "UNSPECIFIED"
270
+ if st is None
271
+ else jelly.LogicalStreamType.Name(st.logical_type)
272
+ )
273
+
274
+ msg = (
275
+ "strict logical type check requires options.stream_types"
276
+ if st is None
277
+ else f"expected GROUPED logical type, got {lt_name}"
278
+ )
279
+ raise JellyConformanceError(msg)
280
+
281
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
282
+ for graph in parse_triples_stream(
283
+ frames=frames,
284
+ options=options,
285
+ **{"frame_metadata": frame_metadata} if frame_metadata is not None else {},
286
+ ):
287
+ sink = sink_factory()
288
+ for graph_item in graph:
289
+ if isinstance(graph_item, Prefix):
290
+ sink.bind(graph_item.prefix, graph_item.iri)
291
+ else:
292
+ sink.add(graph_item)
293
+ yield sink
294
+ return
295
+ elif options.stream_types.physical_type in (
296
+ jelly.PHYSICAL_STREAM_TYPE_QUADS,
297
+ jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
298
+ ):
299
+ for dataset in parse_quads_stream(
300
+ frames=frames,
301
+ options=options,
302
+ **{"frame_metadata": frame_metadata} if frame_metadata is not None else {},
303
+ ):
304
+ sink = sink_factory()
305
+ for item in dataset:
306
+ if isinstance(item, Prefix):
307
+ sink.bind(item.prefix, item.iri)
308
+ else:
309
+ sink.add(item)
310
+ yield sink
311
+ return
312
+
313
+ physical_type_name = jelly.PhysicalStreamType.Name(
314
+ options.stream_types.physical_type
315
+ )
316
+ msg = f"the stream type {physical_type_name} is not supported "
317
+ raise NotImplementedError(msg)
318
+
319
+
320
+ def parse_jelly_to_graph(
321
+ inp: IO[bytes],
322
+ sink_factory: Callable[[], GenericStatementSink] = lambda: GenericStatementSink(),
323
+ ) -> GenericStatementSink:
324
+ """
325
+ Add statements from Generator to GenericStatementSink.
326
+
327
+ Args:
328
+ inp (IO[bytes]): input jelly stream.
329
+ sink_factory (Callable[[], GenericStatementSink]): factory to create
330
+ statement sink.
331
+ By default creates an empty in-memory GenericStatementSink.
332
+ Has no division for datasets/graphs,
333
+ utilizes the same underlying data structures.
334
+
335
+ Returns:
336
+ GenericStatementSink: GenericStatementSink with statements.
337
+
338
+ """
339
+ options, frames = get_options_and_frames(inp)
340
+ sink = sink_factory()
341
+
342
+ for item in parse_jelly_flat(
343
+ inp=inp, frames=frames, options=options, logical_type_strict=False
344
+ ):
345
+ if isinstance(item, Prefix):
346
+ sink.bind(item.prefix, item.iri) # type: ignore[union-attr, unused-ignore]
347
+ else:
348
+ sink.add(item)
349
+ return sink
350
+
351
+
352
+ def parse_jelly_flat(
353
+ inp: IO[bytes],
354
+ frames: Iterable[jelly.RdfStreamFrame] | None = None,
355
+ options: ParserOptions | None = None,
356
+ *,
357
+ logical_type_strict: bool = False,
358
+ ) -> Generator[Statement | Prefix]: # type: ignore[valid-type, unused-ignore]
359
+ """
360
+ Parse jelly file with FLAT logical type into a Generator of stream events.
361
+
362
+ Args:
363
+ inp (IO[bytes]): input jelly buffered binary stream.
364
+ frames (Iterable[jelly.RdfStreamFrame | None):
365
+ jelly frames if read before.
366
+ options (ParserOptions | None): stream options
367
+ if read before.
368
+ logical_type_strict (bool): If True, validate the *logical* type
369
+ in stream options and require FLAT (TRIPLES/QUADS).
370
+ Otherwise, only the physical type is used to route parsing.
371
+
372
+ Raises:
373
+ NotImplementedError: if physical type is not supported
374
+
375
+ Yields:
376
+ Generator[Statement | Prefix]: Generator of stream events
377
+
378
+ """
379
+ if frames is None or options is None:
380
+ options, frames = get_options_and_frames(inp)
381
+
382
+ st = getattr(options, "stream_types", None)
383
+ if logical_type_strict and (st is None or not st.flat):
384
+ lt_name = (
385
+ "UNSPECIFIED"
386
+ if st is None
387
+ else jelly.LogicalStreamType.Name(st.logical_type)
388
+ )
389
+
390
+ msg = (
391
+ "strict logical type check requires options.stream_types"
392
+ if st is None
393
+ else f"expected FLAT logical type (TRIPLES/QUADS), got {lt_name}"
394
+ )
395
+ raise JellyConformanceError(msg)
396
+
397
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
398
+ for triples in parse_triples_stream(frames=frames, options=options):
399
+ yield from triples
400
+ return
401
+ if options.stream_types.physical_type in (
402
+ jelly.PHYSICAL_STREAM_TYPE_QUADS,
403
+ jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
404
+ ):
405
+ for quads in parse_quads_stream(frames=frames, options=options):
406
+ yield from quads
407
+ return
408
+ physical_type_name = jelly.PhysicalStreamType.Name(
409
+ options.stream_types.physical_type
410
+ )
411
+ msg = f"the stream type {physical_type_name} is not supported "
412
+ raise NotImplementedError(msg)