pypinch 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pypinch-0.0.1/LICENSE ADDED
@@ -0,0 +1,17 @@
1
+ MIT License
2
+ Copyright (c) 2026 Aharon Sambol
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+ The above copyright notice and this permission notice shall be included in all
10
+ copies or substantial portions of the Software.
11
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17
+ SOFTWARE.
pypinch-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: pypinch
3
+ Version: 0.0.1
4
+ Summary: Schemaless binary serialization format without limitations
5
+ Author-email: Aharon Sambol <aharon.sambol@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/AharonSambol/pypinch
8
+ Project-URL: Issues, https://github.com/AharonSambol/pypinch/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Utilities
12
+ Classifier: Topic :: File Formats
13
+ Classifier: Typing :: Typed
14
+ Classifier: Intended Audience :: Developers
15
+ Requires-Python: >=3.8
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Dynamic: license-file
File without changes
@@ -0,0 +1,31 @@
1
+ [build-system]
2
+ requires = ["setuptools >= 77.0.3"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pypinch"
7
+ version = "0.0.1"
8
+ authors = [
9
+ { name="Aharon Sambol", email="aharon.sambol@gmail.com" },
10
+ ]
11
+ description = "Schemaless binary serialization format without limitations"
12
+ readme = "README.md"
13
+ requires-python = ">=3.8"
14
+ dynamic = ["dependencies"]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Utilities",
19
+ "Topic :: File Formats",
20
+ "Typing :: Typed",
21
+ "Intended Audience :: Developers",
22
+ ]
23
+ license = "MIT"
24
+ license-files = ["LICEN[CS]E*"]
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/AharonSambol/pypinch"
28
+ Issues = "https://github.com/AharonSambol/pypinch/issues"
29
+
30
+ [tool.setuptools.dynamic]
31
+ dependencies = {file = ["requirements.txt"]}
File without changes
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,5 @@
1
+ from .serialize.serialize import dump_bytes
2
+ from .deserialize.deserialize import load_bytes
3
+
4
+ pinch = dump_bytes
5
+ unpinch = load_bytes
File without changes
@@ -0,0 +1,301 @@
1
+ import gc
2
+ import math
3
+ import struct
4
+ import typing
5
+ from typing import Tuple, Optional, List
6
+
7
+ from consts import NUMBER_BASE, ObjType, POSITIVE_INT_FLAG, FALSE_FLAG, TRUE_FLAG, NULL_FLAG, BYTES_FLAG, \
8
+ LIST_FLAG, \
9
+ DICT_FLAG, STR_KEY_DICT_FLAG, FLOAT_FLAG, STR_FLAG, NEGATIVE_INT_FLAG, EMPTY_STR_FLAG, EMPTY_BYTES_FLAG, \
10
+ EMPTY_LIST_FLAG, EMPTY_DICT_FLAG, CONSISTENT_TYPE_LIST_FLAG, INT_FLAG, BOOL_FLAG, POINTER_FLAG, \
11
+ ByteLike, HEADER, CONSISTENT_TYPE_DICT_FLAG, REVERSE_SMALL_INTS, BIG_ENDIAN_DOUBLE_FORMAT, NUMBER_OF_BITS_IN_BYTE, \
12
+ LEFTMOST_BIT_MASK, BYTES_IN_DOUBLE, NEGATIVE_NUMBER_SIGN
13
+
14
+
15
+ from exceptions import DecodingError
16
+ from src.pinch_python.deserialize.settings import Settings
17
+ from src.pinch_python.deserialize.utils import decode_number
18
+
19
+
20
+
21
+ def load_bytes(
22
+ buffer: ByteLike,
23
+ *,
24
+ modify_input: bool = False,
25
+ encoding: Optional[str] = None,
26
+ use_tuples: bool = False,
27
+ use_pointers: bool = True,
28
+ stop_gc: bool = False,
29
+ ) -> ObjType:
30
+ try:
31
+ if stop_gc:
32
+ gc.freeze()
33
+
34
+ settings = Settings(
35
+ encoding=encoding,
36
+ use_tuples=use_tuples, # TODO
37
+ use_pointers=use_pointers,
38
+ pointers={} if use_pointers else None
39
+ )
40
+ if modify_input and type(buffer) is bytearray:
41
+ original_buffer_len = len(buffer)
42
+ del buffer[:len(HEADER)]
43
+ return deserialize_object_from_bytearray(buffer, original_buffer_len, settings)
44
+ else:
45
+ return deserialize_object(buffer, len(HEADER), settings)[0]
46
+ finally:
47
+ if stop_gc:
48
+ gc.unfreeze()
49
+
50
+
51
+ def deserialize_object_from_bytearray(buffer: bytearray, original_buffer_len: int, settings: Settings) -> ObjType:
52
+ flag = buffer[0]
53
+ del buffer[0]
54
+ if flag == POSITIVE_INT_FLAG:
55
+ num, end = decode_number(buffer, 0)
56
+ del buffer[:end]
57
+ return num
58
+ elif flag == NEGATIVE_INT_FLAG:
59
+ num, end = decode_number(buffer, 0)
60
+ del buffer[:end]
61
+ return -num
62
+ elif flag == FLOAT_FLAG:
63
+ num = struct.unpack(BIG_ENDIAN_DOUBLE_FORMAT, buffer[:BYTES_IN_DOUBLE])[0]
64
+ del buffer[:BYTES_IN_DOUBLE]
65
+ return num
66
+ elif flag == STR_FLAG:
67
+ return deserialize_str_from_bytearray(buffer, original_buffer_len, settings)
68
+ elif flag == EMPTY_STR_FLAG:
69
+ return ""
70
+ elif flag == BYTES_FLAG:
71
+ length, pointer = decode_number(buffer, 0)
72
+ byts = buffer[pointer:pointer + length]
73
+ del buffer[:pointer + length]
74
+ return bytes(byts)
75
+ elif flag == EMPTY_BYTES_FLAG:
76
+ return b""
77
+ elif flag == TRUE_FLAG:
78
+ return True
79
+ elif flag == FALSE_FLAG:
80
+ return False
81
+ elif flag == NULL_FLAG:
82
+ return None
83
+ elif flag == LIST_FLAG:
84
+ length, pointer = decode_number(buffer, 0)
85
+ del buffer[:pointer]
86
+ if settings.use_tuples:
87
+ return tuple(deserialize_object_from_bytearray(buffer, original_buffer_len, settings) for _ in range(length))
88
+ return [deserialize_object_from_bytearray(buffer, original_buffer_len, settings) for _ in range(length)]
89
+ elif flag == EMPTY_LIST_FLAG:
90
+ return tuple() if settings.use_tuples else []
91
+ elif flag == CONSISTENT_TYPE_LIST_FLAG:
92
+ typ_flag = buffer[0]
93
+ length, pointer = decode_number(buffer, 1)
94
+ del buffer[:pointer]
95
+ if typ_flag == NULL_FLAG:
96
+ return ((None,) if settings.use_tuples else [None]) * length
97
+ elif typ_flag == INT_FLAG:
98
+ def extract_number(_buffer: bytearray) -> int:
99
+ if _buffer[0] == NEGATIVE_NUMBER_SIGN:
100
+ _num, _pointer = decode_number(_buffer, 1, base=NUMBER_BASE - 1)
101
+ del _buffer[:_pointer]
102
+ return -_num
103
+ else:
104
+ _num, _pointer = decode_number(_buffer, 0, base=NUMBER_BASE - 1)
105
+ del _buffer[:_pointer]
106
+ return _num
107
+ if settings.use_tuples:
108
+ return tuple(extract_number(buffer) for _ in range(length))
109
+ return [extract_number(buffer) for _ in range(length)]
110
+ elif typ_flag == BOOL_FLAG:
111
+ res_list = typing.cast(List[bool], [None] * length)
112
+ length_in_bytes = math.ceil(length / NUMBER_OF_BITS_IN_BYTE)
113
+ try:
114
+ for i, byte in enumerate(buffer[:length_in_bytes]):
115
+ for j in range(NUMBER_OF_BITS_IN_BYTE):
116
+ res_list[i * NUMBER_OF_BITS_IN_BYTE + j] = (byte & LEFTMOST_BIT_MASK) == LEFTMOST_BIT_MASK
117
+ byte <<= 1
118
+ except IndexError:
119
+ pass
120
+ return res_list
121
+ elif typ_flag == BYTES_FLAG:
122
+ res_list = typing.cast(List[bytes], [None] * length)
123
+ for i in range(length):
124
+ length, pointer = decode_number(buffer, 0)
125
+ res_list[i] = bytes(buffer[pointer:pointer + length])
126
+ del buffer[:pointer + length]
127
+ return res_list
128
+ elif typ_flag == STR_FLAG:
129
+ if settings.use_tuples:
130
+ return tuple(deserialize_str_from_bytearray(buffer, original_buffer_len, settings) for _ in range(length))
131
+ return [deserialize_str_from_bytearray(buffer, original_buffer_len, settings) for _ in range(length)]
132
+ elif typ_flag == FLOAT_FLAG:
133
+ res_list = typing.cast(List[float], [None] * length)
134
+ for i in range(length):
135
+ res_list[i] = struct.unpack(BIG_ENDIAN_DOUBLE_FORMAT, buffer[:BYTES_IN_DOUBLE])[0]
136
+ del buffer[:BYTES_IN_DOUBLE]
137
+ return res_list
138
+ else:
139
+ raise DecodingError(f"Unexpected type flag: {typ_flag}")
140
+ elif flag == DICT_FLAG:
141
+ length, pointer = decode_number(buffer, 0)
142
+ del buffer[:pointer]
143
+ return {
144
+ deserialize_object_from_bytearray(buffer, original_buffer_len, settings): deserialize_object_from_bytearray(buffer, original_buffer_len, settings)
145
+ for _ in range(length)
146
+ }
147
+ elif flag == EMPTY_DICT_FLAG:
148
+ return {}
149
+ elif flag == STR_KEY_DICT_FLAG:
150
+ length, pointer = decode_number(buffer, 0)
151
+ del buffer[:pointer]
152
+ return {
153
+ deserialize_str_from_bytearray(buffer, original_buffer_len, settings): deserialize_object_from_bytearray(buffer, original_buffer_len, settings)
154
+ for _ in range(length)
155
+ }
156
+ elif flag == CONSISTENT_TYPE_DICT_FLAG:
157
+ raise Exception("not implemented yet") # todo
158
+ elif flag == POINTER_FLAG:
159
+ position, pointer = decode_number(buffer, 0)
160
+ del buffer[:pointer]
161
+ return settings.pointers[position]
162
+ elif flag == BOOL_FLAG:
163
+ raise DecodingError("unexpected flag: BOOL")
164
+ elif flag == INT_FLAG:
165
+ raise DecodingError("unexpected flag: INT")
166
+ else:
167
+ return REVERSE_SMALL_INTS[flag]
168
+
169
+
170
+ def deserialize_str_from_bytearray(buffer: bytearray, original_buffer_len: int, settings: Settings) -> str:
171
+ position = original_buffer_len - len(buffer)
172
+ length, pointer = decode_number(buffer, 0)
173
+ encoded_str = buffer[pointer:pointer + length]
174
+ del buffer[:pointer + length]
175
+ string = encoded_str.decode(encoding=settings.encoding) if settings.encoding else encoded_str.decode()
176
+ if settings.use_pointers:
177
+ settings.pointers[position] = string
178
+ return string
179
+
180
+
181
+ def deserialize_object(buffer: bytes, pointer: int, settings: Settings) -> (ObjType, int):
182
+ flag = buffer[pointer]
183
+ pointer += 1
184
+ if flag == POSITIVE_INT_FLAG:
185
+ return decode_number(buffer, pointer)
186
+ elif flag == NEGATIVE_INT_FLAG:
187
+ num, pointer = decode_number(buffer, pointer)
188
+ return -num, pointer
189
+ elif flag == INT_FLAG:
190
+ raise DecodingError("unexpected flag")
191
+ elif flag == FLOAT_FLAG:
192
+ num = struct.unpack(BIG_ENDIAN_DOUBLE_FORMAT, buffer[pointer:pointer + BYTES_IN_DOUBLE])[0]
193
+ return num, pointer + BYTES_IN_DOUBLE
194
+ elif flag == STR_FLAG:
195
+ return deserialize_str(buffer, pointer, settings)
196
+ elif flag == EMPTY_STR_FLAG:
197
+ return "", pointer
198
+ elif flag == BYTES_FLAG:
199
+ length, pointer = decode_number(buffer, pointer)
200
+ return bytes(buffer[pointer:pointer + length]), pointer + length
201
+ elif flag == EMPTY_BYTES_FLAG:
202
+ return b"", pointer
203
+ elif flag == BOOL_FLAG:
204
+ raise DecodingError("unexpected flag")
205
+ elif flag == TRUE_FLAG:
206
+ return True, pointer
207
+ elif flag == FALSE_FLAG:
208
+ return False, pointer
209
+ elif flag == NULL_FLAG:
210
+ return None, pointer
211
+ elif flag == LIST_FLAG:
212
+ length, pointer = decode_number(buffer, pointer)
213
+ res_list = [None] * length
214
+ for i in range(length):
215
+ res_list[i], pointer = deserialize_object(buffer, pointer, settings)
216
+ return res_list, pointer
217
+ elif flag == EMPTY_LIST_FLAG:
218
+ return (tuple() if settings.use_tuples else []), pointer
219
+ elif flag == CONSISTENT_TYPE_LIST_FLAG:
220
+ typ_flag = buffer[pointer]
221
+ length, pointer = decode_number(buffer, pointer + 1)
222
+ if typ_flag == NULL_FLAG:
223
+ return ((None,) if settings.use_tuples else [None]) * length, pointer
224
+ elif typ_flag == INT_FLAG:
225
+ res_list = typing.cast(List[int], [None] * length)
226
+ for i in range(length):
227
+ if buffer[pointer] == NUMBER_BASE - 1:
228
+ num, pointer = decode_number(buffer, pointer + 1, base=NUMBER_BASE - 1)
229
+ res_list[i] = -num
230
+ else:
231
+ num, pointer = decode_number(buffer, pointer, base=NUMBER_BASE - 1)
232
+ res_list[i] = num
233
+ return res_list, pointer
234
+ elif typ_flag == BOOL_FLAG:
235
+ res_list = typing.cast(List[bool], [None] * length)
236
+ length_in_bytes = math.ceil(length / NUMBER_OF_BITS_IN_BYTE)
237
+ try:
238
+ for i in range(length_in_bytes):
239
+ byte = buffer[pointer + i]
240
+ for j in range(NUMBER_OF_BITS_IN_BYTE):
241
+ res_list[i * NUMBER_OF_BITS_IN_BYTE + j] = (byte & LEFTMOST_BIT_MASK) == LEFTMOST_BIT_MASK
242
+ byte <<= 1
243
+ except IndexError:
244
+ pass
245
+ return res_list, pointer + length_in_bytes
246
+ elif typ_flag == BYTES_FLAG:
247
+ res_list = typing.cast(List[bytes], [None] * length)
248
+ for i in range(length):
249
+ bytes_length, pointer = decode_number(buffer, pointer)
250
+ res_list[i] = bytes(buffer[pointer:pointer + bytes_length])
251
+ pointer += bytes_length
252
+ return res_list, pointer
253
+ elif typ_flag == STR_FLAG:
254
+ res_list = typing.cast(List[str], [None] * length)
255
+ for i in range(length):
256
+ res_list[i], pointer = deserialize_str(buffer, pointer, settings)
257
+ return res_list, pointer
258
+ elif typ_flag == FLOAT_FLAG:
259
+ res_list = typing.cast(List[float], [None] * length)
260
+ for i in range(length):
261
+ res_list[i] = struct.unpack(BIG_ENDIAN_DOUBLE_FORMAT, buffer[pointer:pointer + BYTES_IN_DOUBLE])[0]
262
+ pointer += BYTES_IN_DOUBLE
263
+ return res_list, pointer
264
+ else:
265
+ raise DecodingError(f"Unexpected type flag: {typ_flag}")
266
+ elif flag == DICT_FLAG:
267
+ length, pointer = decode_number(buffer, pointer)
268
+ res_dict = {}
269
+ for i in range(length):
270
+ k, pointer = deserialize_object(buffer, pointer, settings)
271
+ v, pointer = deserialize_object(buffer, pointer, settings)
272
+ res_dict[k] = v
273
+ return res_dict, pointer
274
+ elif flag == EMPTY_DICT_FLAG:
275
+ return {}, pointer
276
+ elif flag == STR_KEY_DICT_FLAG:
277
+ length, pointer = decode_number(buffer, pointer)
278
+ res_dict = {}
279
+ for i in range(length):
280
+ k, pointer = deserialize_str(buffer, pointer, settings)
281
+ v, pointer = deserialize_object(buffer, pointer, settings)
282
+ res_dict[k] = v
283
+ return res_dict, pointer
284
+
285
+ elif flag == CONSISTENT_TYPE_DICT_FLAG:
286
+ raise Exception("not implemented yet") # todo
287
+ elif flag == POINTER_FLAG:
288
+ position, pointer = decode_number(buffer, pointer)
289
+ return settings.pointers[position], pointer
290
+ else:
291
+ return REVERSE_SMALL_INTS[flag], pointer
292
+
293
+
294
+ def deserialize_str(buffer: bytes, pointer: int, settings: Settings) -> Tuple[str, int]:
295
+ start = pointer
296
+ length, pointer = decode_number(buffer, pointer)
297
+ encoded_str = buffer[pointer:pointer + length]
298
+ string = encoded_str.decode(encoding=settings.encoding) if settings.encoding else encoded_str.decode()
299
+ if settings.use_pointers:
300
+ settings.pointers[start] = string
301
+ return string, pointer + length
@@ -0,0 +1,10 @@
1
+ from dataclasses import dataclass
2
+ from typing import Dict, Optional
3
+
4
+
5
+ @dataclass
6
+ class Settings:
7
+ encoding: Optional[str]
8
+ use_tuples: bool
9
+ use_pointers: bool
10
+ pointers: Dict
@@ -0,0 +1,15 @@
1
+ from typing import Tuple
2
+
3
+ from consts import ByteLike, NUMBER_BASE, ENDING_FLAG
4
+
5
+
6
+ def decode_number(num: ByteLike, pointer: int, base: int = NUMBER_BASE) -> Tuple[int, int]:
7
+ if num[pointer] != ENDING_FLAG:
8
+ return num[pointer], pointer + 1
9
+ power = res = 0
10
+ pointer += 1
11
+ while num[pointer] != ENDING_FLAG:
12
+ res += num[pointer] * base ** power
13
+ power += 1
14
+ pointer += 1
15
+ return res, pointer + 1
File without changes
@@ -0,0 +1,222 @@
1
+ import struct
2
+ from datetime import datetime
3
+ from typing import Union, List, Tuple
4
+
5
+ from consts import NUMBER_BASE, ObjType, POSITIVE_INT_FLAG, FALSE_FLAG, TRUE_FLAG, NULL_FLAG, BYTES_FLAG, \
6
+ LIST_FLAG, \
7
+ DICT_FLAG, STR_KEY_DICT_FLAG, FLOAT_FLAG, STR_FLAG, NEGATIVE_INT_FLAG, EMPTY_STR_FLAG, EMPTY_BYTES_FLAG, \
8
+ EMPTY_LIST_FLAG, EMPTY_DICT_FLAG, SMALL_INTS, CONSISTENT_TYPE_LIST_FLAG, INT_FLAG, BOOL_FLAG, POINTER_FLAG, HEADER, \
9
+ BIG_ENDIAN_DOUBLE_FORMAT, NUMBER_OF_BITS_IN_BYTE
10
+ from exceptions import EncodingError
11
+ from src.pinch_python.serialize.settings import Settings
12
+ from src.pinch_python.serialize.utils import encode_number
13
+
14
+ _pack_double = struct.Struct(BIG_ENDIAN_DOUBLE_FORMAT).pack
15
+
16
+
17
+ def dump_bytes(obj: ObjType, *, allow_non_string_keys: bool = True, modify_input: bool = False, encoding: str = None,
18
+ use_pointers: bool = False, serialize_dates: bool = True) -> bytearray:
19
+ settings = Settings(
20
+ allow_non_string_keys=allow_non_string_keys,
21
+ modify_input=modify_input, # TODO
22
+ encoding=encoding,
23
+ use_pointers=False,
24
+ pointers={} if use_pointers else None,
25
+ serialize_dates=serialize_dates,
26
+ )
27
+ buffer = bytearray(HEADER)
28
+ serialize_object_with_type(buffer, obj, settings)
29
+ return buffer
30
+
31
+
32
+ def serialize_object_with_type(buffer: bytearray, obj: ObjType, settings: Settings) -> None:
33
+ typ = type(obj)
34
+ if typ is str:
35
+ encode_normally = True
36
+ if len(obj) == 0:
37
+ buffer.append(EMPTY_STR_FLAG)
38
+ encode_normally = False
39
+ # todo python 3.9
40
+ elif settings.use_pointers and (prev_pos := settings.pointers.get(obj)):
41
+ temp_buffer = bytearray()
42
+ temp_buffer.append(POINTER_FLAG)
43
+ encode_number(temp_buffer, prev_pos)
44
+ if len(temp_buffer) <= len(obj) + 1:
45
+ buffer.extend(temp_buffer)
46
+ encode_normally = False
47
+ if encode_normally:
48
+ buffer.append(STR_FLAG)
49
+ if settings.use_pointers:
50
+ settings.pointers[obj] = len(buffer)
51
+ encoded_str = obj.encode(encoding=settings.encoding) if settings.encoding else obj.encode()
52
+ encode_number(buffer, len(encoded_str))
53
+ buffer.extend(encoded_str)
54
+ elif typ is int:
55
+ if num_byte := SMALL_INTS.get(obj):
56
+ buffer.append(num_byte)
57
+ elif obj > 0:
58
+ buffer.append(POSITIVE_INT_FLAG)
59
+ encode_number(buffer, obj)
60
+ else:
61
+ buffer.append(NEGATIVE_INT_FLAG)
62
+ encode_number(buffer, -obj)
63
+ elif typ is bool:
64
+ buffer.append(TRUE_FLAG if obj else FALSE_FLAG)
65
+ elif obj is None:
66
+ buffer.append(NULL_FLAG)
67
+ elif typ is list or typ is tuple:
68
+ if len(obj) == 0:
69
+ buffer.append(EMPTY_LIST_FLAG)
70
+ elif is_consistent_type_list(obj, settings):
71
+ first_type = type(obj[0])
72
+ if first_type is str and settings.use_pointers:
73
+ serialize_normal_list(buffer, obj, settings)
74
+ elif obj[0] is None:
75
+ buffer.append(CONSISTENT_TYPE_LIST_FLAG)
76
+ buffer.append(NULL_FLAG)
77
+ encode_number(buffer, len(obj))
78
+ elif first_type is int:
79
+ # no longer have the flag to distinguish between positive and negative numbers so do this instead
80
+ buffer.append(CONSISTENT_TYPE_LIST_FLAG)
81
+ buffer.append(INT_FLAG)
82
+ encode_number(buffer, len(obj))
83
+ for item in obj:
84
+ if item <= 0:
85
+ buffer.append(NUMBER_BASE - 1)
86
+ encode_number(buffer, -item, base=NUMBER_BASE - 1)
87
+ else:
88
+ encode_number(buffer, item, base=NUMBER_BASE - 1)
89
+ elif first_type is bool:
90
+ buffer.append(CONSISTENT_TYPE_LIST_FLAG)
91
+ buffer.append(BOOL_FLAG)
92
+ encode_number(buffer, len(obj))
93
+ byte = number_of_bits = 0
94
+ for item in obj:
95
+ byte = (byte << 1) | item
96
+ number_of_bits += 1
97
+ if number_of_bits == NUMBER_OF_BITS_IN_BYTE:
98
+ buffer.append(byte)
99
+ byte = number_of_bits = 0
100
+ if number_of_bits:
101
+ buffer.append(byte << (NUMBER_OF_BITS_IN_BYTE - number_of_bits))
102
+ else:
103
+ buffer.append(CONSISTENT_TYPE_LIST_FLAG)
104
+ try:
105
+ buffer.append({str: STR_FLAG, bytes: BYTES_FLAG, float: FLOAT_FLAG, datetime: STR_FLAG}[first_type])
106
+ except KeyError:
107
+ raise EncodingError(f"Unexpected type: {first_type}")
108
+
109
+ encode_number(buffer, len(obj))
110
+ for item in obj:
111
+ serialize_object_without_type(buffer, item, settings)
112
+ else:
113
+ serialize_normal_list(buffer, obj, settings)
114
+ elif typ is dict:
115
+ if len(obj) == 0:
116
+ buffer.append(EMPTY_DICT_FLAG)
117
+ elif not settings.use_pointers and not settings.allow_non_string_keys:
118
+ buffer.append(STR_KEY_DICT_FLAG)
119
+ encode_number(buffer, len(obj))
120
+ for k, v in obj.items():
121
+ if type(k) is not str:
122
+ raise EncodingError("Encountered a non string key while allow_non_string_keys is False")
123
+ serialize_object_without_type(buffer, k, settings)
124
+ serialize_object_with_type(buffer, v, settings)
125
+ elif not settings.use_pointers and all(type(x) is str for x in obj.keys()):
126
+ buffer.append(STR_KEY_DICT_FLAG)
127
+ encode_number(buffer, len(obj))
128
+ for k, v in obj.items():
129
+ serialize_object_without_type(buffer, k, settings)
130
+ serialize_object_with_type(buffer, v, settings)
131
+ else:
132
+ buffer.append(DICT_FLAG)
133
+ encode_number(buffer, len(obj))
134
+ for k, v in obj.items():
135
+ serialize_object_with_type(buffer, k, settings)
136
+ serialize_object_with_type(buffer, v, settings)
137
+ elif typ is float:
138
+ buffer.append(FLOAT_FLAG)
139
+ buffer.extend(_pack_double(obj))
140
+ elif typ is bytes:
141
+ if len(obj) == 0:
142
+ buffer.append(EMPTY_BYTES_FLAG)
143
+ else:
144
+ buffer.append(BYTES_FLAG)
145
+ encode_number(buffer, len(obj))
146
+ buffer.extend(obj)
147
+ elif typ is datetime and settings.serialize_dates:
148
+ return serialize_object_with_type(buffer, obj.isoformat(), settings)
149
+ else:
150
+ if typ is datetime and not settings.serialize_dates:
151
+ raise EncodingError(f"Unexpected type: datetime, with flag serialize_dates disabled")
152
+ raise EncodingError(f"Unexpected type: {typ}")
153
+
154
+
155
+ def serialize_normal_list(buffer: bytearray, obj: Union[List, Tuple], settings: Settings) -> None:
156
+ buffer.append(LIST_FLAG)
157
+ encode_number(buffer, len(obj))
158
+ for item in obj:
159
+ serialize_object_with_type(buffer, item, settings)
160
+
161
+
162
+ def is_consistent_type_list(obj: Union[List, Tuple], settings: Settings) -> bool:
163
+ if len(obj) <= 1:
164
+ return False
165
+ first_type = type(obj[0])
166
+ if first_type in [list, dict, tuple]:
167
+ return False
168
+ if first_type is str and settings.use_pointers:
169
+ return all(type(x) is str and x not in settings.pointers for x in obj)
170
+ return all(type(x) is first_type for x in obj)
171
+
172
+
173
+ def serialize_object_without_type(buffer: bytearray, obj: ObjType, settings: Settings) -> None:
174
+ typ = type(obj)
175
+ if typ is int:
176
+ encode_number(buffer, obj if obj > 0 else -obj)
177
+ elif typ is bool:
178
+ buffer.append(TRUE_FLAG if obj else FALSE_FLAG)
179
+ elif obj is None:
180
+ buffer.append(NULL_FLAG)
181
+ elif typ is bytes:
182
+ encode_number(buffer, len(obj))
183
+ buffer.extend(obj)
184
+ elif typ is list or typ is tuple:
185
+ encode_number(buffer, len(obj))
186
+ for item in obj:
187
+ serialize_object_with_type(buffer, item, settings)
188
+ elif typ is dict:
189
+ if len(obj) == 0:
190
+ buffer.append(EMPTY_DICT_FLAG)
191
+ elif not settings.use_pointers and not settings.allow_non_string_keys:
192
+ buffer.append(STR_KEY_DICT_FLAG)
193
+ encode_number(buffer, len(obj))
194
+ for k, v in obj.items():
195
+ if type(k) is not str:
196
+ raise EncodingError("Encountered a non string key while allow_non_string_keys is False")
197
+ serialize_object_without_type(buffer, k, settings)
198
+ serialize_object_with_type(buffer, v, settings)
199
+ elif not settings.use_pointers and all(type(x) is str for x in obj.keys()):
200
+ buffer.append(STR_KEY_DICT_FLAG)
201
+ encode_number(buffer, len(obj))
202
+ for k, v in obj.items():
203
+ serialize_object_without_type(buffer, k, settings)
204
+ serialize_object_with_type(buffer, v, settings)
205
+ else:
206
+ buffer.append(DICT_FLAG)
207
+ encode_number(buffer, len(obj))
208
+ for k, v in obj.items():
209
+ serialize_object_with_type(buffer, k, settings)
210
+ serialize_object_with_type(buffer, v, settings)
211
+ elif typ is float:
212
+ buffer.extend(_pack_double(obj))
213
+ elif typ is str:
214
+ encoded_str = obj.encode(encoding=settings.encoding) if settings.encoding else obj.encode()
215
+ if settings.use_pointers:
216
+ settings.pointers[obj] = len(buffer)
217
+ encode_number(buffer, len(encoded_str))
218
+ buffer.extend(encoded_str)
219
+ elif typ is datetime and settings.serialize_dates:
220
+ return serialize_object_without_type(buffer, obj.isoformat(), settings)
221
+ else:
222
+ raise EncodingError(f"Unexpected type: {typ}")
@@ -0,0 +1,12 @@
1
+ from dataclasses import dataclass
2
+ from typing import Dict
3
+
4
+
5
+ @dataclass
6
+ class Settings:
7
+ allow_non_string_keys: bool
8
+ modify_input: bool
9
+ encoding: str
10
+ use_pointers: bool
11
+ pointers: Dict
12
+ serialize_dates: bool
@@ -0,0 +1,12 @@
1
+ from consts import NUMBER_BASE, ENDING_FLAG
2
+
3
+
4
+ def encode_number(buffer: bytearray, num: int, base: int = NUMBER_BASE) -> None:
5
+ if num < base:
6
+ buffer.append(num)
7
+ else:
8
+ buffer.append(ENDING_FLAG)
9
+ while num:
10
+ num, remainder = divmod(num, base)
11
+ buffer.append(remainder)
12
+ buffer.append(ENDING_FLAG)
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: pypinch
3
+ Version: 0.0.1
4
+ Summary: Schemaless binary serialization format without limitations
5
+ Author-email: Aharon Sambol <aharon.sambol@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/AharonSambol/pypinch
8
+ Project-URL: Issues, https://github.com/AharonSambol/pypinch/issues
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Utilities
12
+ Classifier: Topic :: File Formats
13
+ Classifier: Typing :: Typed
14
+ Classifier: Intended Audience :: Developers
15
+ Requires-Python: >=3.8
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Dynamic: license-file
@@ -0,0 +1,19 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ requirements.txt
5
+ src/__init__.py
6
+ src/pinch_python/__init__.py
7
+ src/pinch_python/deserialize/__init__.py
8
+ src/pinch_python/deserialize/deserialize.py
9
+ src/pinch_python/deserialize/settings.py
10
+ src/pinch_python/deserialize/utils.py
11
+ src/pinch_python/serialize/__init__.py
12
+ src/pinch_python/serialize/serialize.py
13
+ src/pinch_python/serialize/settings.py
14
+ src/pinch_python/serialize/utils.py
15
+ src/pypinch.egg-info/PKG-INFO
16
+ src/pypinch.egg-info/SOURCES.txt
17
+ src/pypinch.egg-info/dependency_links.txt
18
+ src/pypinch.egg-info/top_level.txt
19
+ tests/test__serialize_deserialize.py
@@ -0,0 +1,2 @@
1
+ __init__
2
+ pinch_python
@@ -0,0 +1,255 @@
1
+ import copy
2
+ import datetime
3
+ import math
4
+ import sys
5
+
6
+ if sys.version_info.major == 3 and sys.version_info.minor < 9:
7
+ from backports.zoneinfo import ZoneInfo
8
+ else:
9
+ from zoneinfo import ZoneInfo
10
+
11
+ import pytest
12
+
13
+ from src import pinch_python
14
+
15
+ ALL_TYPES_OF_OBJECTS = [
16
+ (1231,),
17
+ (1,),
18
+ (332634,),
19
+ (5437890568343289547384,),
20
+ (0,),
21
+ (-1231,),
22
+ (-1,),
23
+ (-332634,),
24
+ (-5437890568343289547384,),
25
+
26
+ (1.000000000001,),
27
+ (100000000000000000000000000000000000000000.1,),
28
+ (23423523.543262346234,),
29
+ (4.4,),
30
+ (-1.000000000001,),
31
+ (-100000000000000000000000000000000000000000.1,),
32
+ (-23423523.543262346234,),
33
+ (-4.4,),
34
+
35
+ (math.inf,),
36
+ (-math.inf,),
37
+
38
+ ("afsag",),
39
+ pytest.param("092u384oiwjrklsgmfoisgjldkxfmoweij;lksgzwaoi;elgjskznwoi;jetlaksfdnv" * 1_000_000, id="long string"),
40
+ pytest.param("".join(chr(i) for i in range(10000)), id="lots of unicode"),
41
+ ("",),
42
+
43
+ (b"1234",),
44
+ (b"abcdefghijklmnopqrstuvwxyz",),
45
+ (b"",),
46
+ pytest.param(b"".join(bytes(i) for i in range(10000)), id="lots of bytes"),
47
+
48
+ (None,),
49
+ (True,),
50
+ (False,),
51
+
52
+ ([None] * 10,),
53
+ ([b"1234", b"asgsa", b"sgaeg4we"],),
54
+ ([0.1, 0.2, 0.3, 0.4],),
55
+ ([-91, 0, 1, 2, 3, 4, 5, 6, 7, 8],),
56
+ (list(range(50, 1000)),),
57
+ (["aaaa", "aaaa", "aaaa"],),
58
+ ([1, "asdg", b"234sa", 4.5, [1, 2, 3, 4, 5], False, [], None],),
59
+
60
+ ({"a": "sdgaeiogn", "waegw": 123, "sdagweg": list(range(10)), "aegsag": {"asdg": 235, "Asg": b"asg"}},),
61
+ ({1: "afdbda", "ar": "23wesd", False: 23453, 1234: 12324356, "": {"sgdfn32rwefsdvre": 34}},),
62
+
63
+ ({"a": "sdgaeiogn", "content": b"1243567" * 1024 * 1024 * 50, "sdagweg": list(range(10)),
64
+ "aegsag": {"asdg": 235, "Asg": b"asg"}},),
65
+ pytest.param([True, False, False] * 1000, id="list of booleans"),
66
+ ]
67
+
68
+
69
+ @pytest.mark.parametrize(
70
+ ["obj"],
71
+ ALL_TYPES_OF_OBJECTS
72
+ )
73
+ def test__serialize_deserialize__modify_input(obj):
74
+ # Arrange
75
+ original_obj = copy.deepcopy(obj)
76
+
77
+ # Act
78
+ serialized = pinch_python.dump_bytes(obj, modify_input=True)
79
+ unserialized = pinch_python.load_bytes(serialized)
80
+
81
+ # Assert
82
+ assert unserialized == original_obj
83
+
84
+
85
+ def test__serialize_deserialize__nan():
86
+ # Act
87
+ serialized = pinch_python.dump_bytes(float("nan"))
88
+ unserialized = pinch_python.load_bytes(serialized)
89
+
90
+ # Assert
91
+ assert math.isnan(unserialized)
92
+
93
+ @pytest.mark.parametrize(
94
+ ["input_tuple", "expected"],
95
+ [
96
+ (tuple(), []),
97
+ ((1, 2, 3), [1, 2, 3]),
98
+ ((((),),), [[[]]]),
99
+ ((1, None, 2.3, "rtjg", b"5y4rthf", [], {}, tuple()), [1, None, 2.3, "rtjg", b"5y4rthf", [], {}, []]),
100
+ ]
101
+ )
102
+ def test__tuples_serialize_deserialize__into_list(input_tuple, expected):
103
+ # Act
104
+ serialized = pinch_python.dump_bytes(float("nan"))
105
+ unserialized = pinch_python.load_bytes(serialized)
106
+
107
+ # Assert
108
+ assert math.isnan(unserialized)
109
+
110
+ @pytest.mark.parametrize(
111
+ ["obj", "expected"],
112
+ [
113
+ (datetime.datetime(2026, 10, 4, 23, 2, 9, 53, tzinfo=datetime.timezone.utc), "2026-10-04T23:02:09.000053+00:00"),
114
+ (
115
+ [
116
+ datetime.datetime(2026, 10, 4, 23, 2, 9, 53, tzinfo=datetime.timezone.utc),
117
+ datetime.datetime(1995, 1, 2, 6, 3, 18, tzinfo=ZoneInfo("America/Los_Angeles")),
118
+ datetime.datetime(2050, 4, 1, tzinfo=ZoneInfo("Asia/Kolkata")),
119
+ ],
120
+ [
121
+ "2026-10-04T23:02:09.000053+00:00",
122
+ "1995-01-02T06:03:18-08:00",
123
+ "2050-04-01T00:00:00+05:30",
124
+ ]
125
+ ),
126
+ ]
127
+ )
128
+ def test__serialize_unknown_types(obj, expected):
129
+ # Act
130
+ serialized = pinch_python.dump_bytes(obj)
131
+ unserialized = pinch_python.load_bytes(serialized)
132
+
133
+ # Assert
134
+ assert unserialized == expected
135
+
136
+
137
+ @pytest.mark.parametrize(
138
+ ["obj"],
139
+ ALL_TYPES_OF_OBJECTS
140
+ )
141
+ def test__serialize_deserialize__with_pointers(obj):
142
+ # Arrange
143
+ original_obj = copy.deepcopy(obj)
144
+
145
+ # Act
146
+ serialized = pinch_python.dump_bytes(obj, use_pointers=True)
147
+ unserialized = pinch_python.load_bytes(serialized)
148
+
149
+ # Assert
150
+ assert unserialized == original_obj
151
+
152
+
153
+ @pytest.mark.parametrize(
154
+ ["obj"],
155
+ ALL_TYPES_OF_OBJECTS
156
+ )
157
+ def test__serialize_deserialize__dont_modify_input(obj):
158
+ # Arrange
159
+ original_obj = copy.deepcopy(obj)
160
+
161
+ # Act
162
+ serialized = pinch_python.dump_bytes(obj, modify_input=False)
163
+ unserialized = pinch_python.load_bytes(serialized)
164
+
165
+ # Assert
166
+ assert unserialized == original_obj
167
+ assert obj == original_obj
168
+
169
+
170
+ @pytest.mark.parametrize(
171
+ ["obj"],
172
+ ALL_TYPES_OF_OBJECTS
173
+ )
174
+ def test__serialize_deserialize__dont_modify_serialized_data(obj):
175
+ # Arrange
176
+ original_obj = copy.deepcopy(obj)
177
+ serialized = pinch_python.dump_bytes(obj)
178
+ original_serialized = copy.deepcopy(serialized)
179
+
180
+ # Act
181
+ unserialized = pinch_python.load_bytes(serialized, modify_input=False)
182
+
183
+ # Assert
184
+ assert unserialized == original_obj
185
+ assert serialized == original_serialized
186
+
187
+
188
+ @pytest.mark.parametrize(
189
+ ["obj"],
190
+ ALL_TYPES_OF_OBJECTS
191
+ )
192
+ def test__serialize_deserialize__modify_serialized_data(obj):
193
+ # Arrange
194
+ original_obj = copy.deepcopy(obj)
195
+ serialized = pinch_python.dump_bytes(obj)
196
+
197
+ # Act
198
+ unserialized = pinch_python.load_bytes(serialized, modify_input=True)
199
+
200
+ # Assert
201
+ assert unserialized == original_obj
202
+
203
+
204
+ @pytest.mark.parametrize(
205
+ ["obj"],
206
+ ALL_TYPES_OF_OBJECTS
207
+ )
208
+ def test__serialize_deserialize__bytes_serialized_data(obj):
209
+ # Arrange
210
+ original_obj = copy.deepcopy(obj)
211
+ serialized = bytes(pinch_python.dump_bytes(obj))
212
+
213
+ # Act
214
+ unserialized = pinch_python.load_bytes(serialized, modify_input=True)
215
+
216
+ # Assert
217
+ assert unserialized == original_obj
218
+
219
+
220
+ @pytest.mark.parametrize(
221
+ ["obj", "expected"],
222
+ [
223
+ ([1, 2, 3], (1, 2, 3)),
224
+ ({"a": [], "b": [[1], ["f"]]}, {"a": tuple(), "b": ((1,), ("f",))}),
225
+ ]
226
+ )
227
+ def test__serialize_deserialize__use_tuples(obj, expected):
228
+ # Act
229
+ serialized = pinch_python.dump_bytes(obj)
230
+ unserialized = pinch_python.load_bytes(serialized, use_tuples=True, modify_input=True)
231
+
232
+ # Assert
233
+ assert unserialized == expected
234
+
235
+
236
+ @pytest.mark.parametrize(
237
+ ["obj", "encoding"],
238
+ [
239
+ ("abcdef", "utf-16"),
240
+ ("abcdef", "utf-32-le"),
241
+ ("abcdef", "ascii"),
242
+ ("abcdef", "cp775"),
243
+ ("abcdef", "windows-1256"),
244
+ ]
245
+ )
246
+ def test__serialize_deserialize__with_encoding(obj, encoding):
247
+ # Arrange
248
+ original_object = copy.deepcopy(obj)
249
+
250
+ # Act
251
+ serialized = pinch_python.dump_bytes(obj, encoding=encoding)
252
+ unserialized = pinch_python.load_bytes(serialized, modify_input=True, encoding=encoding)
253
+
254
+ # Assert
255
+ assert unserialized == original_object