pyjelly 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjelly might be problematic. Click here for more details.

@@ -1,10 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections.abc import Generator, Iterable
4
- from typing import IO, Any, Callable
5
- from typing_extensions import Never, override
4
+ from itertools import chain
5
+ from typing import IO, Any, Callable, Union
6
+ from typing_extensions import Never, Self, override
6
7
 
7
8
  import rdflib
9
+ from rdflib import Node
8
10
  from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
9
11
  from rdflib.parser import InputSource
10
12
  from rdflib.parser import Parser as RDFLibParser
@@ -12,16 +14,113 @@ from rdflib.parser import Parser as RDFLibParser
12
14
  from pyjelly import jelly
13
15
  from pyjelly.errors import JellyConformanceError
14
16
  from pyjelly.options import StreamTypes
15
- from pyjelly.parse.decode import Adapter, Decoder, ParserOptions, ParsingMode
17
+ from pyjelly.parse.decode import Adapter, Decoder, ParserOptions
16
18
  from pyjelly.parse.ioutils import get_options_and_frames
17
19
 
20
+ GraphName = Union[rdflib.URIRef, rdflib.BNode, str]
21
+
22
+
23
+ class Triple(tuple[Node, Node, Node]):
24
+ """
25
+ Describe RDFLib triple.
26
+
27
+ Args:
28
+ tuple (Node, Node, Node): s/p/o tuple of RDFLib Nodes.
29
+
30
+ Returns:
31
+ Triple: triple as tuple.
32
+
33
+ """
34
+
35
+ __slots__ = ()
36
+
37
+ def __new__(cls, s: Node, p: Node, o: Node) -> Self:
38
+ return tuple.__new__(cls, (s, p, o))
39
+
40
+ @property
41
+ def s(self) -> Node:
42
+ return self[0]
43
+
44
+ @property
45
+ def p(self) -> Node:
46
+ return self[1]
47
+
48
+ @property
49
+ def o(self) -> Node:
50
+ return self[2]
51
+
52
+
53
+ class Quad(tuple[Node, Node, Node, GraphName]):
54
+ """
55
+ Describe RDFLib quad.
56
+
57
+ Args:
58
+ tuple (Node, Node, Node, GraphName):
59
+ s/p/o/g as a tuple of RDFLib nodes and a GraphName,
60
+
61
+ Returns:
62
+ Quad: quad as tuple.
63
+
64
+ """
65
+
66
+ __slots__ = ()
67
+
68
+ def __new__(cls, s: Node, p: Node, o: Node, g: GraphName) -> Self:
69
+ return tuple.__new__(cls, (s, p, o, g))
70
+
71
+ @property
72
+ def s(self) -> Node:
73
+ return self[0]
74
+
75
+ @property
76
+ def p(self) -> Node:
77
+ return self[1]
78
+
79
+ @property
80
+ def o(self) -> Node:
81
+ return self[2]
82
+
83
+ @property
84
+ def g(self) -> GraphName:
85
+ return self[3]
86
+
87
+
88
+ Statement = Union[Triple, Quad]
89
+
90
+
91
+ class Prefix(tuple[str, rdflib.URIRef]):
92
+ """
93
+ Describe RDF Prefix(i.e, namespace declaration).
94
+
95
+ Args:
96
+ tuple (str, rdflib.URIRef): expects prefix as a string,
97
+ and full namespace URI as Rdflib.URIRef.
98
+
99
+ Returns:
100
+ Prefix: prefix as tuple(prefix, iri).
101
+
102
+ """
103
+
104
+ __slots__ = ()
105
+
106
+ def __new__(cls, prefix: str, iri: rdflib.URIRef) -> Self:
107
+ return tuple.__new__(cls, (prefix, iri))
108
+
109
+ @property
110
+ def prefix(self) -> str:
111
+ return self[0]
112
+
113
+ @property
114
+ def iri(self) -> rdflib.URIRef:
115
+ return self[1]
116
+
18
117
 
19
118
  class RDFLibAdapter(Adapter):
20
119
  """
21
120
  RDFLib adapter class, is extended by triples and quads implementations.
22
121
 
23
122
  Args:
24
- Adapter (_type_): abstract adapter class
123
+ Adapter (): abstract adapter class
25
124
 
26
125
  """
27
126
 
@@ -46,13 +145,15 @@ class RDFLibAdapter(Adapter):
46
145
  ) -> rdflib.Literal:
47
146
  return rdflib.Literal(lex, lang=language, datatype=datatype)
48
147
 
148
+ @override
149
+ def namespace_declaration(self, name: str, iri: str) -> Prefix:
150
+ return Prefix(name, self.iri(iri))
151
+
49
152
 
50
153
  def _adapter_missing(feature: str, *, stream_types: StreamTypes) -> Never:
51
154
  """
52
155
  Raise error if functionality is missing in adapter.
53
156
 
54
- TODO: currently not used anywhere due to logical types being removed
55
-
56
157
  Args:
57
158
  feature (str): function which is not implemented
58
159
  stream_types (StreamTypes): what combination of physical/logical types
@@ -79,93 +180,48 @@ class RDFLibTriplesAdapter(RDFLibAdapter):
79
180
  """
80
181
  Triples adapter RDFLib implementation.
81
182
 
82
- Notes: has internal graph object which tracks
83
- triples and namespaces and can get flushed between frames.
183
+ Notes: returns triple/namespace declaration as soon as receives them.
84
184
  """
85
185
 
86
186
  def __init__(
87
187
  self,
88
188
  options: ParserOptions,
89
- graph_factory: Callable[[], Graph],
90
- parsing_mode: ParsingMode = ParsingMode.FLAT,
91
189
  ) -> None:
92
- super().__init__(options=options, parsing_mode=parsing_mode)
93
- self.graph = graph_factory()
94
- self.graph_factory = graph_factory
95
- self.parsing_mode = parsing_mode
190
+ super().__init__(options=options)
96
191
 
97
192
  @override
98
- def triple(self, terms: Iterable[Any]) -> Any:
99
- self.graph.add(tuple(terms))
100
-
101
- @override
102
- def namespace_declaration(self, name: str, iri: str) -> None:
103
- self.graph.bind(name, self.iri(iri))
104
-
105
- def frame(self) -> Graph:
106
- """
107
- Finalize one frame in triples stream.
108
-
109
- Returns:
110
- Graph: frame content as a separate Graph
111
- and starts a new Graph
112
-
113
- """
114
- this_graph = self.graph
115
- self.graph = self.graph_factory()
116
- return this_graph
193
+ def triple(self, terms: Iterable[Any]) -> Triple:
194
+ return Triple(*terms)
117
195
 
118
196
 
119
197
  class RDFLibQuadsBaseAdapter(RDFLibAdapter):
120
- def __init__(
121
- self,
122
- options: ParserOptions,
123
- dataset_factory: Callable[[], Dataset],
124
- parsing_mode: ParsingMode = ParsingMode.FLAT,
125
- ) -> None:
126
- super().__init__(options=options, parsing_mode=parsing_mode)
127
- self.dataset = dataset_factory()
128
- self.dataset_factory = dataset_factory
129
-
130
- @override
131
- def frame(self) -> Dataset:
132
- current_dataset = self.dataset
133
- self.dataset = self.dataset_factory()
134
- return current_dataset
198
+ def __init__(self, options: ParserOptions) -> None:
199
+ super().__init__(options=options)
135
200
 
136
201
 
137
202
  class RDFLibQuadsAdapter(RDFLibQuadsBaseAdapter):
138
203
  """
139
204
  Extended RDFLib adapter for the QUADS physical type.
140
205
 
141
- Notes:
142
- Adds triples and namespaces directly to
143
- dataset, so RDFLib handles the rest.
144
-
145
206
  Args:
146
- RDFLibQuadsBaseAdapter (_type_): base quads adapter
207
+ RDFLibQuadsBaseAdapter (RDFLibAdapter): base quads adapter
147
208
  (shared with graphs physical type)
148
209
 
149
210
  """
150
211
 
151
212
  @override
152
- def namespace_declaration(self, name: str, iri: str) -> None:
153
- self.dataset.bind(name, self.iri(iri))
154
-
155
- @override
156
- def quad(self, terms: Iterable[Any]) -> Any:
157
- self.dataset.add(tuple(terms))
213
+ def quad(self, terms: Iterable[Any]) -> Quad:
214
+ return Quad(*terms)
158
215
 
159
216
 
160
217
  class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
161
218
  """
162
219
  Extension of RDFLibQuadsBaseAdapter for the GRAPHS physical type.
163
220
 
164
- Notes: introduces graph start/end, checks if graph exists,
165
- dataset store management.
221
+ Notes: introduces graph start/end, checks if graph exists.
166
222
 
167
223
  Args:
168
- RDFLibQuadsBaseAdapter (_type_): base adapter for quads management.
224
+ RDFLibQuadsBaseAdapter (RDFLibAdapter): base adapter for quads management.
169
225
 
170
226
  Raises:
171
227
  JellyConformanceError: if no graph_start was encountered
@@ -177,14 +233,8 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
177
233
  def __init__(
178
234
  self,
179
235
  options: ParserOptions,
180
- dataset_factory: Callable[[], Dataset],
181
- parsing_mode: ParsingMode = ParsingMode.FLAT,
182
236
  ) -> None:
183
- super().__init__(
184
- options=options,
185
- dataset_factory=dataset_factory,
186
- parsing_mode=parsing_mode,
187
- )
237
+ super().__init__(options=options)
188
238
  self._graph_id = None
189
239
 
190
240
  @property
@@ -198,12 +248,8 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
198
248
  self._graph_id = graph_id
199
249
 
200
250
  @override
201
- def namespace_declaration(self, name: str, iri: str) -> None:
202
- self.dataset.bind(name, self.iri(iri))
203
-
204
- @override
205
- def triple(self, terms: Iterable[Any]) -> None:
206
- self.dataset.add((*terms, self._graph_id))
251
+ def triple(self, terms: Iterable[Any]) -> Quad:
252
+ return Quad(*chain(terms, [self._graph_id]))
207
253
 
208
254
  @override
209
255
  def graph_end(self) -> None:
@@ -213,54 +259,42 @@ class RDFLibGraphsAdapter(RDFLibQuadsBaseAdapter):
213
259
  def parse_triples_stream(
214
260
  frames: Iterable[jelly.RdfStreamFrame],
215
261
  options: ParserOptions,
216
- graph_factory: Callable[[], Graph],
217
- parsing_mode: ParsingMode = ParsingMode.FLAT,
218
- ) -> Generator[Graph]:
262
+ ) -> Generator[Iterable[Triple | Prefix]]:
219
263
  """
220
264
  Parse flat triple stream.
221
265
 
222
266
  Args:
223
267
  frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
224
268
  options (ParserOptions): stream options
225
- graph_factory (Callable): Lambda to construct a graph
226
- parsing_mode (ParsingMode): specifies whether this is
227
- a flat or grouped parsing.
228
269
 
229
270
  Yields:
230
- Generator[Graph]: RDFLib Graph(s)
271
+ Generator[Iterable[Triple | Prefix]]:
272
+ Generator of iterables of Triple or Prefix objects,
273
+ one iterable per frame.
231
274
 
232
275
  """
233
- adapter = RDFLibTriplesAdapter(
234
- options, graph_factory=graph_factory, parsing_mode=parsing_mode
235
- )
276
+ adapter = RDFLibTriplesAdapter(options)
236
277
  decoder = Decoder(adapter=adapter)
237
278
  for frame in frames:
238
- g = decoder.decode_frame(frame)
239
- if g is not None:
240
- yield g
241
-
242
- if parsing_mode is ParsingMode.FLAT:
243
- yield adapter.graph
279
+ yield decoder.iter_rows(frame)
280
+ return
244
281
 
245
282
 
246
283
  def parse_quads_stream(
247
284
  frames: Iterable[jelly.RdfStreamFrame],
248
285
  options: ParserOptions,
249
- dataset_factory: Callable[[], Dataset],
250
- parsing_mode: ParsingMode = ParsingMode.FLAT,
251
- ) -> Generator[Dataset]:
286
+ ) -> Generator[Iterable[Quad | Prefix]]:
252
287
  """
253
288
  Parse flat quads stream.
254
289
 
255
290
  Args:
256
291
  frames (Iterable[jelly.RdfStreamFrame]): iterator over stream frames
257
292
  options (ParserOptions): stream options
258
- dataset_factory (Callable): Lambda to construct a dataset
259
- parsing_mode (ParsingMode): specifies whether this is
260
- a flat or grouped parsing.
261
293
 
262
294
  Yields:
263
- Generator[Dataset]: RDFLib dataset(s)
295
+ Generator[Iterable[Quad | Prefix]]:
296
+ Generator of iterables of Quad or Prefix objects,
297
+ one iterable per frame.
264
298
 
265
299
  """
266
300
  adapter_class: type[RDFLibQuadsBaseAdapter]
@@ -268,65 +302,71 @@ def parse_quads_stream(
268
302
  adapter_class = RDFLibQuadsAdapter
269
303
  else:
270
304
  adapter_class = RDFLibGraphsAdapter
271
- adapter = adapter_class(
272
- options=options,
273
- dataset_factory=dataset_factory,
274
- parsing_mode=parsing_mode,
275
- )
305
+ adapter = adapter_class(options=options)
276
306
  decoder = Decoder(adapter=adapter)
277
307
  for frame in frames:
278
- ds = decoder.decode_frame(frame)
279
- if ds is not None:
280
- yield ds
281
-
282
- if parsing_mode is ParsingMode.FLAT:
283
- yield adapter.dataset
308
+ yield decoder.iter_rows(frame)
309
+ return
284
310
 
285
311
 
286
312
  def parse_jelly_grouped(
287
313
  inp: IO[bytes],
288
- graph_factory: Callable[[], Graph],
289
- dataset_factory: Callable[[], Dataset],
290
- ) -> Generator[Any] | Generator[Graph] | Generator[Dataset]:
314
+ graph_factory: Callable[[], Graph] = lambda: Graph(),
315
+ dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
316
+ ) -> Generator[Graph] | Generator[Dataset]:
291
317
  """
292
- Take jelly file and return generators based on the detected logical type.
318
+ Take jelly file and return generators based on the detected physical type.
293
319
 
294
320
  Yields one graph/dataset per frame.
295
321
 
296
322
  Args:
297
323
  inp (IO[bytes]): input jelly buffered binary stream
298
- graph_factory (Callable): lambda to construct a Graph
299
- dataset_factory (Callable): lambda to construct a Dataset
324
+ graph_factory (Callable): lambda to construct a Graph.
325
+ By default creates an empty in-memory Graph,
326
+ but you can pass something else here.
327
+ dataset_factory (Callable): lambda to construct a Dataset.
328
+ By default creates an empty in-memory Dataset,
329
+ but you can pass something else here.
300
330
 
301
331
  Raises:
302
- NotImplementedError: is raised if a logical type is not implemented
332
+ NotImplementedError: is raised if a physical type is not implemented
303
333
 
304
334
  Yields:
305
- Generator[Any] | Generator[Dataset] | Generator[Graph]:
335
+ Generator[Graph] | Generator[Dataset]:
306
336
  returns generators for graphs/datasets based on the type of input
307
337
 
308
338
  """
309
339
  options, frames = get_options_and_frames(inp)
310
-
311
340
  if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
312
- yield from parse_triples_stream(
341
+ for graph in parse_triples_stream(
313
342
  frames=frames,
314
343
  options=options,
315
- graph_factory=graph_factory,
316
- parsing_mode=ParsingMode.GROUPED,
317
- )
344
+ ):
345
+ sink = graph_factory()
346
+ for graph_item in graph:
347
+ if isinstance(graph_item, Prefix):
348
+ sink.bind(graph_item.prefix, graph_item.iri)
349
+ else:
350
+ sink.add(graph_item)
351
+ yield sink
318
352
  return
319
-
320
- if options.stream_types.physical_type in (
353
+ elif options.stream_types.physical_type in (
321
354
  jelly.PHYSICAL_STREAM_TYPE_QUADS,
322
355
  jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
323
356
  ):
324
- yield from parse_quads_stream(
357
+ for dataset in parse_quads_stream(
325
358
  frames=frames,
326
359
  options=options,
327
- dataset_factory=dataset_factory,
328
- parsing_mode=ParsingMode.GROUPED,
329
- )
360
+ ):
361
+ sink = dataset_factory()
362
+ for item in dataset:
363
+ if isinstance(item, Prefix):
364
+ sink.bind(item.prefix, item.iri)
365
+ else:
366
+ s, p, o, graph_name = item
367
+ context = sink.get_context(graph_name)
368
+ sink.add((s, p, o, context))
369
+ yield sink
330
370
  return
331
371
 
332
372
  physical_type_name = jelly.PhysicalStreamType.Name(
@@ -336,50 +376,89 @@ def parse_jelly_grouped(
336
376
  raise NotImplementedError(msg)
337
377
 
338
378
 
379
+ def parse_jelly_to_graph(
380
+ inp: IO[bytes],
381
+ graph_factory: Callable[[], Graph] = lambda: Graph(),
382
+ dataset_factory: Callable[[], Dataset] = lambda: Dataset(),
383
+ ) -> Graph | Dataset:
384
+ """
385
+ Add statements from Generator to provided Graph/Dataset.
386
+
387
+ Args:
388
+ inp (IO[bytes]): input jelly stream.
389
+ graph_factory (Callable[[], Graph]): factory to create Graph.
390
+ By default creates an empty in-memory Graph,
391
+ but you can pass something else here.
392
+ dataset_factory (Callable[[], Dataset]): factory to create Dataset.
393
+ By default creates an empty in-memory Dataset,
394
+ but you can pass something else here.
395
+
396
+ Returns:
397
+ Dataset | Graph: Dataset or Graph with statements.
398
+
399
+ """
400
+ options, frames = get_options_and_frames(inp)
401
+
402
+ if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
403
+ sink = graph_factory()
404
+ if options.stream_types.physical_type in (
405
+ jelly.PHYSICAL_STREAM_TYPE_QUADS,
406
+ jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
407
+ ):
408
+ quad_sink = dataset_factory()
409
+ sink = quad_sink
410
+
411
+ for item in parse_jelly_flat(inp=inp, frames=frames, options=options):
412
+ if isinstance(item, Prefix):
413
+ sink.bind(item.prefix, item.iri)
414
+ if isinstance(item, Triple):
415
+ sink.add(item)
416
+ if isinstance(item, Quad):
417
+ s, p, o, graph_name = item
418
+ context = quad_sink.get_context(graph_name)
419
+ quad_sink.add((s, p, o, context))
420
+ return sink
421
+
422
+
339
423
  def parse_jelly_flat(
340
424
  inp: IO[bytes],
341
- graph_factory: Callable[[], Graph],
342
- dataset_factory: Callable[[], Dataset],
343
- ) -> Any | Dataset | Graph:
425
+ frames: Iterable[jelly.RdfStreamFrame] | None = None,
426
+ options: ParserOptions | None = None,
427
+ ) -> Generator[Statement | Prefix]:
344
428
  """
345
- Parse jelly file with FLAT physical type into one Graph/Dataset.
429
+ Parse jelly file with FLAT logical type into a Generator of stream events.
346
430
 
347
431
  Args:
348
- inp (IO[bytes]): input jelly buffered binary stream
349
- graph_factory (Callable): lambda to construct a Graph
350
- dataset_factory (Callable): lambda to construct a Dataset
432
+ inp (IO[bytes]): input jelly buffered binary stream.
433
+ frames (Iterable[jelly.RdfStreamFrame | None):
434
+ jelly frames if read before.
435
+ options (ParserOptions | None): stream options
436
+ if read before.
351
437
 
352
438
  Raises:
353
439
  NotImplementedError: if physical type is not supported
354
440
 
355
- Returns:
356
- RDFLib Graph or Dataset
441
+ Yields:
442
+ Generator[Statement | Prefix]: Generator of stream events
357
443
 
358
444
  """
359
- options, frames = get_options_and_frames(inp)
445
+ if not frames or not options:
446
+ options, frames = get_options_and_frames(inp)
360
447
 
361
448
  if options.stream_types.physical_type == jelly.PHYSICAL_STREAM_TYPE_TRIPLES:
362
- return next(
363
- parse_triples_stream(
364
- frames=frames,
365
- options=options,
366
- graph_factory=graph_factory,
367
- parsing_mode=ParsingMode.FLAT,
368
- )
369
- )
370
-
449
+ for triples in parse_triples_stream(frames=frames, options=options):
450
+ yield from triples
451
+ return
371
452
  if options.stream_types.physical_type in (
372
453
  jelly.PHYSICAL_STREAM_TYPE_QUADS,
373
454
  jelly.PHYSICAL_STREAM_TYPE_GRAPHS,
374
455
  ):
375
- return next(
376
- parse_quads_stream(
377
- frames=frames,
378
- options=options,
379
- dataset_factory=dataset_factory,
380
- parsing_mode=ParsingMode.FLAT,
381
- )
382
- )
456
+ for quads in parse_quads_stream(
457
+ frames=frames,
458
+ options=options,
459
+ ):
460
+ yield from quads
461
+ return
383
462
  physical_type_name = jelly.PhysicalStreamType.Name(
384
463
  options.stream_types.physical_type
385
464
  )
@@ -404,7 +483,7 @@ class RDFLibJellyParser(RDFLibParser):
404
483
  if inp is None:
405
484
  msg = "expected source to be a stream of bytes"
406
485
  raise TypeError(msg)
407
- parse_jelly_flat(
486
+ parse_jelly_to_graph(
408
487
  inp,
409
488
  graph_factory=lambda: Graph(store=sink.store, identifier=sink.identifier),
410
489
  dataset_factory=lambda: Dataset(store=sink.store),