pyjelly 0.7.1__cp311-cp311-macosx_11_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so +0 -0
- pyjelly/__init__.py +0 -0
- pyjelly/_proto/grpc.proto +33 -0
- pyjelly/_proto/patch.proto +165 -0
- pyjelly/_proto/rdf.proto +384 -0
- pyjelly/errors.py +10 -0
- pyjelly/integrations/__init__.py +0 -0
- pyjelly/integrations/generic/__init__.py +0 -0
- pyjelly/integrations/generic/generic_sink.py +202 -0
- pyjelly/integrations/generic/parse.py +412 -0
- pyjelly/integrations/generic/serialize.cpython-311-darwin.so +0 -0
- pyjelly/integrations/generic/serialize.py +402 -0
- pyjelly/integrations/rdflib/__init__.py +24 -0
- pyjelly/integrations/rdflib/parse.py +560 -0
- pyjelly/integrations/rdflib/serialize.py +408 -0
- pyjelly/jelly/__init__.py +5 -0
- pyjelly/jelly/rdf_pb2.py +70 -0
- pyjelly/jelly/rdf_pb2.pyi +231 -0
- pyjelly/options.py +141 -0
- pyjelly/parse/__init__.py +0 -0
- pyjelly/parse/decode.cpython-311-darwin.so +0 -0
- pyjelly/parse/decode.py +447 -0
- pyjelly/parse/ioutils.cpython-311-darwin.so +0 -0
- pyjelly/parse/ioutils.py +115 -0
- pyjelly/parse/lookup.cpython-311-darwin.so +0 -0
- pyjelly/parse/lookup.py +70 -0
- pyjelly/serialize/__init__.py +0 -0
- pyjelly/serialize/encode.cpython-311-darwin.so +0 -0
- pyjelly/serialize/encode.py +397 -0
- pyjelly/serialize/flows.py +196 -0
- pyjelly/serialize/ioutils.cpython-311-darwin.so +0 -0
- pyjelly/serialize/ioutils.py +13 -0
- pyjelly/serialize/lookup.cpython-311-darwin.so +0 -0
- pyjelly/serialize/lookup.py +137 -0
- pyjelly/serialize/streams.cpython-311-darwin.so +0 -0
- pyjelly/serialize/streams.py +281 -0
- pyjelly-0.7.1.dist-info/METADATA +114 -0
- pyjelly-0.7.1.dist-info/RECORD +41 -0
- pyjelly-0.7.1.dist-info/WHEEL +6 -0
- pyjelly-0.7.1.dist-info/entry_points.txt +7 -0
- pyjelly-0.7.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import final
|
|
6
|
+
|
|
7
|
+
from mypy_extensions import mypyc_attr
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
11
|
+
@final
|
|
12
|
+
class Lookup:
|
|
13
|
+
"""
|
|
14
|
+
Fixed-size 1-based string-to-index mapping with LRU eviction.
|
|
15
|
+
|
|
16
|
+
- Assigns incrementing indices starting from 1.
|
|
17
|
+
- After reaching the maximum size, reuses the existing indices from evicting
|
|
18
|
+
the least-recently-used entries.
|
|
19
|
+
- Index 0 is reserved for delta encoding in Jelly streams.
|
|
20
|
+
|
|
21
|
+
To check if a key exists, use `.move(key)` and catch `KeyError`.
|
|
22
|
+
If `KeyError` is raised, the key can be inserted with `.insert(key)`.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
max_size
|
|
27
|
+
Maximum number of entries. Zero disables lookup.
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, max_size: int) -> None:
|
|
32
|
+
self.data = OrderedDict[str, int]()
|
|
33
|
+
self.max_size = max_size
|
|
34
|
+
self._evicting = False
|
|
35
|
+
|
|
36
|
+
def make_last_to_evict(self, key: str) -> None:
|
|
37
|
+
self.data.move_to_end(key)
|
|
38
|
+
|
|
39
|
+
def insert(self, key: str) -> int:
|
|
40
|
+
if not self.max_size:
|
|
41
|
+
msg = "lookup is zero, cannot insert"
|
|
42
|
+
raise IndexError(msg)
|
|
43
|
+
assert key not in self.data, f"key {key!r} already present"
|
|
44
|
+
if self._evicting:
|
|
45
|
+
_, index = self.data.popitem(last=False)
|
|
46
|
+
self.data[key] = index
|
|
47
|
+
else:
|
|
48
|
+
index = len(self.data) + 1
|
|
49
|
+
self.data[key] = index
|
|
50
|
+
self._evicting = index == self.max_size
|
|
51
|
+
return index
|
|
52
|
+
|
|
53
|
+
def __repr__(self) -> str:
|
|
54
|
+
max_size, data = self.max_size, self.data
|
|
55
|
+
return f"Lookup({max_size=!r}, {data=!r})"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
59
|
+
@dataclass
|
|
60
|
+
class LookupEncoder:
|
|
61
|
+
"""
|
|
62
|
+
Shared base for RDF lookup encoders using Jelly compression.
|
|
63
|
+
|
|
64
|
+
Tracks the last assigned and last reused index.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
lookup_size
|
|
69
|
+
Maximum lookup size.
|
|
70
|
+
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
last_assigned_index: int
|
|
74
|
+
last_reused_index: int
|
|
75
|
+
|
|
76
|
+
def __init__(self, *, lookup_size: int) -> None:
|
|
77
|
+
self.lookup = Lookup(max_size=lookup_size)
|
|
78
|
+
self.last_assigned_index = 0
|
|
79
|
+
self.last_reused_index = 0
|
|
80
|
+
|
|
81
|
+
def encode_entry_index(self, key: str) -> int | None:
|
|
82
|
+
"""
|
|
83
|
+
Get or assign the index to use in an entry.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
int or None
|
|
88
|
+
- 0 if the new index is sequential (`last_assigned_index + 1`)
|
|
89
|
+
- actual assigned/reused index otherwise
|
|
90
|
+
- None if the key already exists
|
|
91
|
+
|
|
92
|
+
If the return value is None, the entry is already in the lookup and does not
|
|
93
|
+
need to be emitted. Any integer value (including 0) means the entry is new
|
|
94
|
+
and should be emitted.
|
|
95
|
+
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
self.lookup.make_last_to_evict(key)
|
|
99
|
+
return None # noqa: TRY300
|
|
100
|
+
except KeyError:
|
|
101
|
+
previous_index = self.last_assigned_index
|
|
102
|
+
index = self.lookup.insert(key)
|
|
103
|
+
self.last_assigned_index = index
|
|
104
|
+
if index == previous_index + 1:
|
|
105
|
+
return 0
|
|
106
|
+
return index
|
|
107
|
+
|
|
108
|
+
def encode_term_index(self, value: str) -> int:
|
|
109
|
+
self.lookup.make_last_to_evict(value)
|
|
110
|
+
current_index = self.lookup.data[value]
|
|
111
|
+
self.last_reused_index = current_index
|
|
112
|
+
return current_index
|
|
113
|
+
|
|
114
|
+
def encode_prefix_term_index(self, value: str) -> int:
|
|
115
|
+
if self.lookup.max_size == 0:
|
|
116
|
+
return 0
|
|
117
|
+
previous_index = self.last_reused_index
|
|
118
|
+
if not value and previous_index == 0:
|
|
119
|
+
return 0
|
|
120
|
+
current_index = self.encode_term_index(value)
|
|
121
|
+
if previous_index == 0:
|
|
122
|
+
return current_index
|
|
123
|
+
if current_index == previous_index:
|
|
124
|
+
return 0
|
|
125
|
+
return current_index
|
|
126
|
+
|
|
127
|
+
def encode_name_term_index(self, value: str) -> int:
|
|
128
|
+
previous_index = self.last_reused_index
|
|
129
|
+
current_index = self.encode_term_index(value)
|
|
130
|
+
if current_index == previous_index + 1:
|
|
131
|
+
return 0
|
|
132
|
+
return current_index
|
|
133
|
+
|
|
134
|
+
def encode_datatype_term_index(self, value: str) -> int:
|
|
135
|
+
if self.lookup.max_size == 0:
|
|
136
|
+
return 0
|
|
137
|
+
return self.encode_term_index(value)
|
|
Binary file
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Generator, Iterable
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
6
|
+
|
|
7
|
+
from mypy_extensions import mypyc_attr
|
|
8
|
+
|
|
9
|
+
from pyjelly import jelly
|
|
10
|
+
from pyjelly.options import LookupPreset, StreamParameters, StreamTypes
|
|
11
|
+
from pyjelly.serialize.encode import (
|
|
12
|
+
Slot,
|
|
13
|
+
TermEncoder,
|
|
14
|
+
encode_namespace_declaration,
|
|
15
|
+
encode_options,
|
|
16
|
+
encode_quad,
|
|
17
|
+
encode_triple,
|
|
18
|
+
)
|
|
19
|
+
from pyjelly.serialize.flows import (
|
|
20
|
+
DEFAULT_FRAME_SIZE,
|
|
21
|
+
BoundedFrameFlow,
|
|
22
|
+
FlatQuadsFrameFlow,
|
|
23
|
+
FlatTriplesFrameFlow,
|
|
24
|
+
FrameFlow,
|
|
25
|
+
ManualFrameFlow,
|
|
26
|
+
flow_for_type,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from jelly import LogicalStreamType # type: ignore[import-not-found]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class SerializerOptions:
|
|
35
|
+
flow: FrameFlow | None = None
|
|
36
|
+
frame_size: int = DEFAULT_FRAME_SIZE
|
|
37
|
+
logical_type: LogicalStreamType = jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED
|
|
38
|
+
params: StreamParameters = field(default_factory=StreamParameters)
|
|
39
|
+
lookup_preset: LookupPreset = field(default_factory=LookupPreset)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
43
|
+
class Stream:
|
|
44
|
+
physical_type: ClassVar[jelly.PhysicalStreamType]
|
|
45
|
+
default_delimited_flow_class: ClassVar[type[BoundedFrameFlow]]
|
|
46
|
+
repeated_terms: list[object | None]
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
*,
|
|
51
|
+
encoder: TermEncoder,
|
|
52
|
+
options: SerializerOptions | None = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
self.encoder = encoder
|
|
55
|
+
if options is None:
|
|
56
|
+
options = SerializerOptions()
|
|
57
|
+
self.options = options
|
|
58
|
+
flow = options.flow
|
|
59
|
+
if flow is None:
|
|
60
|
+
flow = self.infer_flow()
|
|
61
|
+
self.flow = flow
|
|
62
|
+
self.repeated_terms = [None] * len(Slot)
|
|
63
|
+
self.enrolled = False
|
|
64
|
+
self.stream_types = StreamTypes(
|
|
65
|
+
physical_type=self.physical_type,
|
|
66
|
+
logical_type=self.flow.logical_type,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def infer_flow(self) -> FrameFlow:
|
|
70
|
+
"""
|
|
71
|
+
Return flow based on the stream options provided.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
FrameFlow: initialised FrameFlow object.
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
flow: FrameFlow
|
|
78
|
+
if self.options.params.delimited:
|
|
79
|
+
if self.options.logical_type != jelly.LOGICAL_STREAM_TYPE_UNSPECIFIED:
|
|
80
|
+
flow_class = flow_for_type(self.options.logical_type)
|
|
81
|
+
else:
|
|
82
|
+
flow_class = self.default_delimited_flow_class
|
|
83
|
+
|
|
84
|
+
if self.options.logical_type in (
|
|
85
|
+
jelly.LOGICAL_STREAM_TYPE_FLAT_TRIPLES,
|
|
86
|
+
jelly.LOGICAL_STREAM_TYPE_FLAT_QUADS,
|
|
87
|
+
):
|
|
88
|
+
flow = flow_class(
|
|
89
|
+
logical_type=self.options.logical_type,
|
|
90
|
+
frame_size=self.options.frame_size,
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
flow = flow_class(logical_type=self.options.logical_type)
|
|
94
|
+
else:
|
|
95
|
+
flow = ManualFrameFlow(logical_type=self.options.logical_type)
|
|
96
|
+
return flow
|
|
97
|
+
|
|
98
|
+
def enroll(self) -> None:
|
|
99
|
+
"""Initialize start of the stream."""
|
|
100
|
+
if not self.enrolled:
|
|
101
|
+
self.stream_options()
|
|
102
|
+
self.enrolled = True
|
|
103
|
+
|
|
104
|
+
def stream_options(self) -> None:
|
|
105
|
+
"""Encode and append stream options row to the current flow."""
|
|
106
|
+
self.flow.append(
|
|
107
|
+
encode_options(
|
|
108
|
+
stream_types=self.stream_types,
|
|
109
|
+
params=self.options.params,
|
|
110
|
+
lookup_preset=self.options.lookup_preset,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def namespace_declaration(self, name: str, iri: str) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Add namespace declaration to jelly stream.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
name (str): namespace prefix label
|
|
120
|
+
iri (str): namespace iri
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
rows = encode_namespace_declaration(
|
|
124
|
+
name=name,
|
|
125
|
+
value=iri,
|
|
126
|
+
term_encoder=self.encoder,
|
|
127
|
+
)
|
|
128
|
+
self.flow.extend(rows)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def for_rdflib(cls, options: SerializerOptions | None = None) -> Stream:
|
|
132
|
+
"""
|
|
133
|
+
Initialize stream with RDFLib encoder.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
options (SerializerOptions | None, optional): Stream options.
|
|
137
|
+
Defaults to None.
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
TypeError: if Stream is passed, and not a Stream for specific physical type.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Stream: initialized stream with RDFLib encoder.
|
|
144
|
+
|
|
145
|
+
"""
|
|
146
|
+
if cls is Stream:
|
|
147
|
+
msg = "Stream is an abstract base class, use a subclass instead"
|
|
148
|
+
raise TypeError(msg)
|
|
149
|
+
from pyjelly.integrations.rdflib.serialize import ( # noqa: PLC0415
|
|
150
|
+
RDFLibTermEncoder,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
lookup_preset: LookupPreset | None = None
|
|
154
|
+
if options is not None:
|
|
155
|
+
lookup_preset = options.lookup_preset
|
|
156
|
+
return cls(
|
|
157
|
+
encoder=RDFLibTermEncoder(lookup_preset=lookup_preset),
|
|
158
|
+
options=options,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def stream_for_type(physical_type: jelly.PhysicalStreamType) -> type[Stream]:
|
|
163
|
+
"""
|
|
164
|
+
Give a Stream based on physical type specified.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
physical_type (jelly.PhysicalStreamType): jelly stream physical type.
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
NotImplementedError: if no stream for requested physical type is available.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
type[Stream]: jelly stream
|
|
174
|
+
|
|
175
|
+
"""
|
|
176
|
+
try:
|
|
177
|
+
stream_cls = STREAM_DISPATCH[physical_type]
|
|
178
|
+
except KeyError:
|
|
179
|
+
msg = (
|
|
180
|
+
"no stream class for physical type "
|
|
181
|
+
f"{jelly.PhysicalStreamType.Name(physical_type)}"
|
|
182
|
+
)
|
|
183
|
+
raise NotImplementedError(msg) from None
|
|
184
|
+
return stream_cls
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class TripleStream(Stream):
|
|
188
|
+
physical_type = jelly.PHYSICAL_STREAM_TYPE_TRIPLES
|
|
189
|
+
default_delimited_flow_class: ClassVar[type[BoundedFrameFlow]] = (
|
|
190
|
+
FlatTriplesFrameFlow
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def triple(self, terms: Iterable[object]) -> jelly.RdfStreamFrame | None:
|
|
194
|
+
"""
|
|
195
|
+
Process one triple to Protobuf messages.
|
|
196
|
+
|
|
197
|
+
Note:
|
|
198
|
+
Adds new rows to the current flow and returns StreamFrame if
|
|
199
|
+
frame size conditions are met.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
terms (Iterable[object]): RDF terms to encode.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
jelly.RdfStreamFrame | None: stream frame if
|
|
206
|
+
flow supports frames slicing and current flow is full
|
|
207
|
+
|
|
208
|
+
"""
|
|
209
|
+
new_rows = encode_triple(
|
|
210
|
+
terms,
|
|
211
|
+
term_encoder=self.encoder,
|
|
212
|
+
repeated_terms=self.repeated_terms,
|
|
213
|
+
)
|
|
214
|
+
self.flow.extend(new_rows)
|
|
215
|
+
return self.flow.frame_from_bounds()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class QuadStream(Stream):
|
|
219
|
+
physical_type = jelly.PHYSICAL_STREAM_TYPE_QUADS
|
|
220
|
+
default_delimited_flow_class: ClassVar = FlatQuadsFrameFlow
|
|
221
|
+
|
|
222
|
+
def quad(self, terms: Iterable[object]) -> jelly.RdfStreamFrame | None:
|
|
223
|
+
"""
|
|
224
|
+
Process one quad to Protobuf messages.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
terms (Iterable[object]): terms to encode.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
jelly.RdfStreamFrame | None: stream frame if
|
|
231
|
+
flow supports frames slicing and current flow is full
|
|
232
|
+
|
|
233
|
+
"""
|
|
234
|
+
new_rows = encode_quad(
|
|
235
|
+
terms,
|
|
236
|
+
term_encoder=self.encoder,
|
|
237
|
+
repeated_terms=self.repeated_terms,
|
|
238
|
+
)
|
|
239
|
+
self.flow.extend(new_rows)
|
|
240
|
+
return self.flow.frame_from_bounds()
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class GraphStream(TripleStream):
|
|
244
|
+
physical_type = jelly.PHYSICAL_STREAM_TYPE_GRAPHS
|
|
245
|
+
default_delimited_flow_class: ClassVar[type[BoundedFrameFlow]] = FlatQuadsFrameFlow
|
|
246
|
+
|
|
247
|
+
def graph(
|
|
248
|
+
self,
|
|
249
|
+
graph_id: object,
|
|
250
|
+
graph: Iterable[Iterable[object]],
|
|
251
|
+
) -> Generator[jelly.RdfStreamFrame]:
|
|
252
|
+
"""
|
|
253
|
+
Process one graph into a sequence of jelly frames.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
graph_id (object): graph id (BN, Literal, iri, default)
|
|
257
|
+
graph (Iterable[Iterable[object]]): iterable of triples (graph's content)
|
|
258
|
+
|
|
259
|
+
Yields:
|
|
260
|
+
Generator[jelly.RdfStreamFrame]: jelly frames.
|
|
261
|
+
|
|
262
|
+
"""
|
|
263
|
+
graph_start = jelly.RdfGraphStart()
|
|
264
|
+
[*graph_rows] = self.encoder.encode_graph(graph_id, graph_start)
|
|
265
|
+
start_row = jelly.RdfStreamRow(graph_start=graph_start)
|
|
266
|
+
graph_rows.append(start_row)
|
|
267
|
+
self.flow.extend(graph_rows)
|
|
268
|
+
for triple in graph:
|
|
269
|
+
if frame := self.triple(triple): # has frame slicing inside
|
|
270
|
+
yield frame
|
|
271
|
+
end_row = jelly.RdfStreamRow(graph_end=jelly.RdfGraphEnd())
|
|
272
|
+
self.flow.append(end_row)
|
|
273
|
+
if frame := self.flow.frame_from_bounds():
|
|
274
|
+
yield frame
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
STREAM_DISPATCH: dict[jelly.PhysicalStreamType, type[Stream]] = {
|
|
278
|
+
jelly.PHYSICAL_STREAM_TYPE_TRIPLES: TripleStream,
|
|
279
|
+
jelly.PHYSICAL_STREAM_TYPE_QUADS: QuadStream,
|
|
280
|
+
jelly.PHYSICAL_STREAM_TYPE_GRAPHS: GraphStream,
|
|
281
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyjelly
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Jelly-RDF implementation for Python
|
|
5
|
+
Project-URL: Homepage, https://w3id.org/jelly/pyjelly
|
|
6
|
+
Project-URL: Documentation, https://w3id.org/jelly/pyjelly
|
|
7
|
+
Project-URL: Repository, https://github.com/Jelly-RDF/pyjelly
|
|
8
|
+
Project-URL: Issues, https://github.com/Jelly-RDF/pyjelly/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/Jelly-RDF/pyjelly/releases
|
|
10
|
+
Author-email: "NeverBlink et al." <contact@neverblink.eu>
|
|
11
|
+
License-Expression: Apache-2.0
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: Jelly,Knowledge graph,RDF,Serialization format
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Environment :: Console
|
|
16
|
+
Classifier: Environment :: Plugins
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: Information Technology
|
|
19
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
26
|
+
Classifier: Topic :: Database
|
|
27
|
+
Classifier: Topic :: File Formats
|
|
28
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
29
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
30
|
+
Requires-Python: <3.15,>=3.10
|
|
31
|
+
Requires-Dist: mypy-extensions>=1.0.0
|
|
32
|
+
Requires-Dist: protobuf>=6.30.0
|
|
33
|
+
Requires-Dist: typing-extensions>=4.12.2
|
|
34
|
+
Provides-Extra: rdflib
|
|
35
|
+
Requires-Dist: rdflib>=7.1.4; extra == 'rdflib'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
[](https://w3id.org/jelly/pyjelly) [](https://pypi.org/project/pyjelly/) [](https://pypi.org/project/pyjelly/) [](https://opensource.org/licenses/Apache-2.0) [](https://github.com/Jelly-RDF/pyjelly/actions/workflows/ci.yml) [](https://codecov.io/gh/Jelly-RDF/pyjelly) [](https://discord.gg/A8sN5XwVa5)
|
|
39
|
+
|
|
40
|
+
# pyjelly
|
|
41
|
+
|
|
42
|
+
**pyjelly** is a Python implementation of [Jelly](http://w3id.org/jelly), a high-performance binary serialization format and streaming protocol for RDF knowledge graphs.
|
|
43
|
+
|
|
44
|
+
**Documentation, usage guide and more: https://w3id.org/jelly/pyjelly**
|
|
45
|
+
|
|
46
|
+
## Features
|
|
47
|
+
|
|
48
|
+
- **Fast reading and writing** of RDF knowledge graphs in the [Jelly format](http://w3id.org/jelly).
|
|
49
|
+
- **Standalone [generic API](https://w3id.org/jelly/pyjelly/dev/generic-sink)** with no third-party dependencies, allowing for:
|
|
50
|
+
- Serialization and parsing of statements to and from Jelly files.
|
|
51
|
+
- Parsing and serializing streams of graphs and statements.
|
|
52
|
+
- Precise control over **serialization options, framing and compression**.
|
|
53
|
+
- **Seamless** integration with:
|
|
54
|
+
- **[rdflib](https://w3id.org/jelly/pyjelly/dev/getting-started)**
|
|
55
|
+
- **[RDFLib-Neo4j](https://w3id.org/jelly/pyjelly/dev/rdflib-neo4j-integration)**
|
|
56
|
+
- **[NetworkX](https://w3id.org/jelly/pyjelly/dev/networkx-integration)**
|
|
57
|
+
- **Stream processing support** for large datasets or streams of all [physical stream types](https://w3id.org/jelly/dev/specification/reference/#physicalstreamtype).
|
|
58
|
+
|
|
59
|
+
**pyjelly** is useful when dealing with (see [full description](https://w3id.org/jelly/pyjelly/dev/overview/#use-cases)):
|
|
60
|
+
|
|
61
|
+
- Dumping and loading **large RDF datasets**.
|
|
62
|
+
- **Client-server communication**.
|
|
63
|
+
- Workflows, where **streaming** is required.
|
|
64
|
+
|
|
65
|
+
## Getting started
|
|
66
|
+
|
|
67
|
+
Install pyjelly from **[PyPI](https://pypi.org/project/pyjelly/)**:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install pyjelly[rdflib]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
To write an RDF graph to a Jelly file:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from rdflib import Graph
|
|
77
|
+
|
|
78
|
+
g = Graph()
|
|
79
|
+
g.parse("http://xmlns.com/foaf/spec/index.rdf")
|
|
80
|
+
g.serialize(destination="foaf.jelly", format="jelly")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
To read a Jelly file and convert it to an rdflib `Graph`:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from rdflib import Graph
|
|
87
|
+
|
|
88
|
+
g = Graph()
|
|
89
|
+
g.parse("foaf.jelly", format="jelly")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**See [our documentation](https://w3id.org/jelly/pyjelly) for [further examples](https://w3id.org/jelly/pyjelly/dev/getting-started/), a full [API reference](https://w3id.org/jelly/pyjelly/dev/api), and more.**
|
|
93
|
+
|
|
94
|
+
## Contributing and support
|
|
95
|
+
|
|
96
|
+
This project is being actively developed – you can stay tuned by [watching this repository](https://docs.github.com/en/account-and-profile/managing-subscriptions-and-notifications-on-github/setting-up-notifications/about-notifications#subscription-options).
|
|
97
|
+
|
|
98
|
+
Join the **[Jelly Discord chat](https://discord.gg/A8sN5XwVa5)** to ask questions about pyjelly and to be up-to-date with the development activities.
|
|
99
|
+
|
|
100
|
+
### Commercial support
|
|
101
|
+
|
|
102
|
+
**[NeverBlink](https://neverblink.eu)** provides commercial support services for Jelly, including implementing custom features, system integrations, implementations for new frameworks, benchmarking, and more.
|
|
103
|
+
|
|
104
|
+
### Contributing
|
|
105
|
+
|
|
106
|
+
If you'd like to contribute, check out our [contributing guidelines](CONTRIBUTING.md).
|
|
107
|
+
|
|
108
|
+
## License
|
|
109
|
+
|
|
110
|
+
The pyjelly library is licensed under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
|
|
111
|
+
|
|
112
|
+
----
|
|
113
|
+
|
|
114
|
+
The development of the Jelly protocol, its implementations, and supporting tooling was co-funded by the European Union. **[More details](https://w3id.org/jelly/dev/licensing/projects)**.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
cb523b6bada1c6eba8b4__mypyc.cpython-311-darwin.so,sha256=_X6MmZHdZpy0q2WOCYZ4Iej_nTeQPgzl2bbr33x8v6U,846280
|
|
2
|
+
pyjelly/options.py,sha256=u1JseR5HsvSewPe8CI26xDu-v7CM0rpAU5F5kwR8qtA,4545
|
|
3
|
+
pyjelly/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
pyjelly/errors.py,sha256=R-xRB4a9S19J9dzAL4a5MCaBwb9ev_kvphGFkQJX6ZU,332
|
|
5
|
+
pyjelly/_proto/patch.proto,sha256=gASUm0xDG9J1advNoq_cCsJYxudTbQaiZQBq4oW3kw4,5291
|
|
6
|
+
pyjelly/_proto/rdf.proto,sha256=EKxyG421B4m0Wx5-6jjojdga_hA3jpZfF6-T3lMc0hI,12763
|
|
7
|
+
pyjelly/_proto/grpc.proto,sha256=3PfcZWqKhUSzP_T-xT-80raUYERr_dXWd8rITzXIqek,1188
|
|
8
|
+
pyjelly/parse/lookup.py,sha256=_E9sg4p5X8INeeqAZqmLH_HENpONr3IF-GqWVV6TeJc,2197
|
|
9
|
+
pyjelly/parse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
pyjelly/parse/decode.cpython-311-darwin.so,sha256=e6mMijg2HhP9Cza1Wj0N-lV7sXBnqfBysz1WGLR4zTI,13288
|
|
11
|
+
pyjelly/parse/ioutils.cpython-311-darwin.so,sha256=TGoTfj2UbdqO_SOTfzZlHIKi_0rKFGFJzMfKvpuB4sw,13288
|
|
12
|
+
pyjelly/parse/lookup.cpython-311-darwin.so,sha256=tIOlNRsyy3oLdMtFe1ziTDK2OCHdzK9B_aANSFklCEQ,13288
|
|
13
|
+
pyjelly/parse/decode.py,sha256=IkXc__ulPnS33FHBzhQqeQJCv35TSnClGMx5-WF6BCw,15017
|
|
14
|
+
pyjelly/parse/ioutils.py,sha256=w7REdMkwlfWu10dxZZ7LdvDNBJksQoRUCVUAACIgiRs,3833
|
|
15
|
+
pyjelly/jelly/rdf_pb2.pyi,sha256=-gxZO-r2wyN68l83XomySz60c82SZmoPKh1HxamBjZs,11816
|
|
16
|
+
pyjelly/jelly/__init__.py,sha256=9kacwn8Ew_1fcgj1abz6miEz-AtUdPT2ltFWaRIE5VE,126
|
|
17
|
+
pyjelly/jelly/rdf_pb2.py,sha256=qjgS3kQnCJqoOmgzvgk1BeYxGbeDX2zygJPc2vDjRts,8952
|
|
18
|
+
pyjelly/serialize/streams.py,sha256=GUfYC5Z8iyXNndgigp6qD5NNBqCpQ-5CVJaJrYVspuk,8634
|
|
19
|
+
pyjelly/serialize/encode.py,sha256=CaI-R4HJXPiL8FLv0uGUy4-mzIgyLb1WCZqhZj_OtF0,12702
|
|
20
|
+
pyjelly/serialize/encode.cpython-311-darwin.so,sha256=B8zRh3cX3fCsQycnWRL2Ddy2z86Fy1yaby32ykkDcT0,13296
|
|
21
|
+
pyjelly/serialize/lookup.py,sha256=4QlekQ-0idEtr8qyYoC6WBKRVk_bwiVAAWrQGtxBgTQ,4225
|
|
22
|
+
pyjelly/serialize/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
+
pyjelly/serialize/streams.cpython-311-darwin.so,sha256=QuE18MqWUP-6PSAlyC-GP9696IxP7GOde_WtYxp2sk4,13296
|
|
24
|
+
pyjelly/serialize/ioutils.cpython-311-darwin.so,sha256=ABwDwlKxcdTsHUcNGRGWlgxwAqEi9JGFALzHtmcOB5k,13296
|
|
25
|
+
pyjelly/serialize/flows.py,sha256=0C2soigJKyHr3xoR-7v0kc1RL8COwnuCRd4iVZpukFU,5524
|
|
26
|
+
pyjelly/serialize/lookup.cpython-311-darwin.so,sha256=w5u7xrb8saZFeLetGe-IKu2DKz7eBQdWb9vedFOXcZc,13296
|
|
27
|
+
pyjelly/serialize/ioutils.py,sha256=2_NaadLfHO3jKR1ZV7aK6jQ09sPKBar9iLFHYwourz8,400
|
|
28
|
+
pyjelly/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
+
pyjelly/integrations/rdflib/serialize.py,sha256=lS2K3UQy1SfKk8x9eX_EA0ubmAg-RDf_JXJ_cBNJj4M,12972
|
|
30
|
+
pyjelly/integrations/rdflib/__init__.py,sha256=lpIz6iildMf5bDvj3aBqZJ7kgKFrTx_tsqSb6PkLis0,552
|
|
31
|
+
pyjelly/integrations/rdflib/parse.py,sha256=9ajBRpl4OJslkcsP2XX3jMG_57IixoOcSEDxp4pMexU,16489
|
|
32
|
+
pyjelly/integrations/generic/serialize.py,sha256=eJYHTtBKvLl82DMPZBn0Ojp1sQrhmTttY9KPuYwYnl4,12494
|
|
33
|
+
pyjelly/integrations/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
pyjelly/integrations/generic/serialize.cpython-311-darwin.so,sha256=RMVGrvE5ZJrHMePCjeCdPuXJHsUWDrWcMXNfEPZW2qo,13328
|
|
35
|
+
pyjelly/integrations/generic/generic_sink.py,sha256=vfc2EKN4cKd3VYLulFThrQ_1ThLKrp2XrG0Afdrnckw,5049
|
|
36
|
+
pyjelly/integrations/generic/parse.py,sha256=tkttb9KUW89n7SrB1SlxhCcROthFoq78TZhb0FAN1wo,12726
|
|
37
|
+
pyjelly-0.7.1.dist-info/RECORD,,
|
|
38
|
+
pyjelly-0.7.1.dist-info/WHEEL,sha256=FF98O_-RqTE3_UpKT5A1SOUKGkcpJJGqmOIOcWT6xWc,134
|
|
39
|
+
pyjelly-0.7.1.dist-info/entry_points.txt,sha256=kUG0p9zso7HpitdMaQaXEj_KSqgOGsL0Ky9ARbecN1g,339
|
|
40
|
+
pyjelly-0.7.1.dist-info/METADATA,sha256=nywNaQz0wEhOx0tS0_sgPW-4N2ksplYbAQHVcWt_L4k,5661
|
|
41
|
+
pyjelly-0.7.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
[rdf.plugins.parser]
|
|
2
|
+
application/x-jelly-rdf = pyjelly.integrations.rdflib.parse:RDFLibJellyParser
|
|
3
|
+
jelly = pyjelly.integrations.rdflib.parse:RDFLibJellyParser
|
|
4
|
+
|
|
5
|
+
[rdf.plugins.serializer]
|
|
6
|
+
application/x-jelly-rdf = pyjelly.integrations.rdflib.serialize:RDFLibJellySerializer
|
|
7
|
+
jelly = pyjelly.integrations.rdflib.serialize:RDFLibJellySerializer
|