fastbencode 0.1__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastbencode-0.3.1/PKG-INFO +59 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/README.md +6 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/__init__.py +6 -7
- fastbencode-0.3.1/fastbencode/_bencode_py.py +186 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/_bencode_pyx.pyi +1 -1
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/_bencode_pyx.pyx +62 -12
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/tests/test_bencode.py +89 -17
- fastbencode-0.3.1/fastbencode.egg-info/PKG-INFO +59 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode.egg-info/SOURCES.txt +0 -8
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode.egg-info/requires.txt +3 -0
- fastbencode-0.3.1/pyproject.toml +75 -0
- fastbencode-0.3.1/setup.cfg +4 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/setup.py +6 -4
- fastbencode-0.1/.github/workflows/disperse.yml +0 -24
- fastbencode-0.1/.github/workflows/pythonpackage.yml +0 -32
- fastbencode-0.1/.github/workflows/pythonpublish.yml +0 -58
- fastbencode-0.1/.gitignore +0 -8
- fastbencode-0.1/CODE_OF_CONDUCT.md +0 -76
- fastbencode-0.1/PKG-INFO +0 -40
- fastbencode-0.1/SECURITY.md +0 -5
- fastbencode-0.1/disperse.conf +0 -8
- fastbencode-0.1/fastbencode/_bencode_py.py +0 -162
- fastbencode-0.1/fastbencode.egg-info/PKG-INFO +0 -40
- fastbencode-0.1/pyproject.toml +0 -3
- fastbencode-0.1/setup.cfg +0 -24
- {fastbencode-0.1 → fastbencode-0.3.1}/COPYING +0 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/MANIFEST.in +0 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/_bencode_pyx.h +0 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/py.typed +0 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/python-compat.h +0 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode/tests/__init__.py +0 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode.egg-info/dependency_links.txt +0 -0
- {fastbencode-0.1 → fastbencode-0.3.1}/fastbencode.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: fastbencode
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Implementation of bencode with optional fast C extensions
|
|
5
|
+
Maintainer-email: Breezy Developers <breezy-core@googlegroups.com>
|
|
6
|
+
License: GPLv2 or later
|
|
7
|
+
Project-URL: Homepage, https://github.com/breezy-team/fastbencode
|
|
8
|
+
Project-URL: GitHub, https://github.com/breezy-team/fastbencode
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
15
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
16
|
+
Classifier: Operating System :: POSIX
|
|
17
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
18
|
+
Requires-Python: >=3.8
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: COPYING
|
|
21
|
+
Provides-Extra: cext
|
|
22
|
+
Requires-Dist: cython>=0.29; extra == "cext"
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: ruff==0.4.3; extra == "dev"
|
|
25
|
+
|
|
26
|
+
fastbencode
|
|
27
|
+
===========
|
|
28
|
+
|
|
29
|
+
fastbencode is an implementation of the bencode serialization format originally
|
|
30
|
+
used by BitTorrent.
|
|
31
|
+
|
|
32
|
+
The package includes both a pure-Python version and an optional C extension
|
|
33
|
+
based on Cython. Both provide the same functionality, but the C extension
|
|
34
|
+
provides significantly better performance.
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
|
|
38
|
+
>>> from fastbencode import bencode, bdecode
|
|
39
|
+
>>> bencode([1, 2, b'a', {b'd': 3}])
|
|
40
|
+
b'li1ei2e1:ad1:di3eee'
|
|
41
|
+
>>> bdecode(bencode([1, 2, b'a', {b'd': 3}]))
|
|
42
|
+
[1, 2, b'a', {b'd': 3}]
|
|
43
|
+
|
|
44
|
+
The default ``bencode``/``bdecode`` functions just operate on
|
|
45
|
+
bytestrings. Use ``bencode_utf8`` / ``bdecode_utf8`` to
|
|
46
|
+
serialize/deserialize all plain strings as UTF-8 bytestrings.
|
|
47
|
+
Note that for performance reasons, all dictionary keys still have to be
|
|
48
|
+
bytestrings.
|
|
49
|
+
|
|
50
|
+
License
|
|
51
|
+
=======
|
|
52
|
+
fastbencode is available under the GNU GPL, version 2 or later.
|
|
53
|
+
|
|
54
|
+
Copyright
|
|
55
|
+
=========
|
|
56
|
+
|
|
57
|
+
* Original Pure-Python bencoder (c) Petru Paler
|
|
58
|
+
* Cython version and modifications (c) Canonical Ltd
|
|
59
|
+
* Split out from Bazaar/Breezy by Jelmer Vernooij
|
|
@@ -16,6 +16,12 @@ Example:
|
|
|
16
16
|
>>> bdecode(bencode([1, 2, b'a', {b'd': 3}]))
|
|
17
17
|
[1, 2, b'a', {b'd': 3}]
|
|
18
18
|
|
|
19
|
+
The default ``bencode``/``bdecode`` functions just operate on
|
|
20
|
+
bytestrings. Use ``bencode_utf8`` / ``bdecode_utf8`` to
|
|
21
|
+
serialize/deserialize all plain strings as UTF-8 bytestrings.
|
|
22
|
+
Note that for performance reasons, all dictionary keys still have to be
|
|
23
|
+
bytestrings.
|
|
24
|
+
|
|
19
25
|
License
|
|
20
26
|
=======
|
|
21
27
|
fastbencode is available under the GNU GPL, version 2 or later.
|
|
@@ -14,12 +14,11 @@
|
|
|
14
14
|
# along with this program; if not, write to the Free Software
|
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16
16
|
|
|
17
|
-
"""Wrapper around the bencode cython and python implementation"""
|
|
17
|
+
"""Wrapper around the bencode cython and python implementation."""
|
|
18
18
|
|
|
19
19
|
from typing import Type
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
__version__ = (0, 1)
|
|
21
|
+
__version__ = (0, 3, 1)
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
_extension_load_failures = []
|
|
@@ -50,7 +49,7 @@ def failed_to_load_extension(exception):
|
|
|
50
49
|
if exception_str not in _extension_load_failures:
|
|
51
50
|
import warnings
|
|
52
51
|
warnings.warn(
|
|
53
|
-
'failed to load compiled extension:
|
|
52
|
+
f'failed to load compiled extension: {exception_str}',
|
|
54
53
|
UserWarning)
|
|
55
54
|
_extension_load_failures.append(exception_str)
|
|
56
55
|
|
|
@@ -58,12 +57,12 @@ def failed_to_load_extension(exception):
|
|
|
58
57
|
Bencached: Type
|
|
59
58
|
|
|
60
59
|
try:
|
|
61
|
-
from ._bencode_pyx import bdecode, bdecode_as_tuple, bencode
|
|
60
|
+
from ._bencode_pyx import Bencached, bdecode, bdecode_as_tuple, bencode
|
|
62
61
|
except ImportError as e:
|
|
63
62
|
failed_to_load_extension(e)
|
|
64
63
|
from ._bencode_py import ( # noqa: F401
|
|
64
|
+
Bencached,
|
|
65
65
|
bdecode,
|
|
66
66
|
bdecode_as_tuple,
|
|
67
67
|
bencode,
|
|
68
|
-
|
|
69
|
-
)
|
|
68
|
+
)
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# bencode structured encoding
|
|
2
|
+
#
|
|
3
|
+
# Written by Petru Paler
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person
|
|
6
|
+
# obtaining a copy of this software and associated documentation files
|
|
7
|
+
# (the "Software"), to deal in the Software without restriction,
|
|
8
|
+
# including without limitation the rights to use, copy, modify, merge,
|
|
9
|
+
# publish, distribute, sublicense, and/or sell copies of the Software,
|
|
10
|
+
# and to permit persons to whom the Software is furnished to do so,
|
|
11
|
+
# subject to the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be
|
|
14
|
+
# included in all copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# Modifications copyright (C) 2008 Canonical Ltd
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
from typing import Callable, Dict, List, Type
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BDecoder:
|
|
23
|
+
|
|
24
|
+
def __init__(self, yield_tuples=False, bytestring_encoding=None) -> None:
|
|
25
|
+
"""Constructor.
|
|
26
|
+
|
|
27
|
+
:param yield_tuples: if true, decode "l" elements as tuples rather than
|
|
28
|
+
lists.
|
|
29
|
+
"""
|
|
30
|
+
self.yield_tuples = yield_tuples
|
|
31
|
+
self.bytestring_encoding = bytestring_encoding
|
|
32
|
+
decode_func = {}
|
|
33
|
+
decode_func[b'l'] = self.decode_list
|
|
34
|
+
decode_func[b'd'] = self.decode_dict
|
|
35
|
+
decode_func[b'i'] = self.decode_int
|
|
36
|
+
decode_func[b'0'] = self.decode_bytes
|
|
37
|
+
decode_func[b'1'] = self.decode_bytes
|
|
38
|
+
decode_func[b'2'] = self.decode_bytes
|
|
39
|
+
decode_func[b'3'] = self.decode_bytes
|
|
40
|
+
decode_func[b'4'] = self.decode_bytes
|
|
41
|
+
decode_func[b'5'] = self.decode_bytes
|
|
42
|
+
decode_func[b'6'] = self.decode_bytes
|
|
43
|
+
decode_func[b'7'] = self.decode_bytes
|
|
44
|
+
decode_func[b'8'] = self.decode_bytes
|
|
45
|
+
decode_func[b'9'] = self.decode_bytes
|
|
46
|
+
self.decode_func = decode_func
|
|
47
|
+
|
|
48
|
+
def decode_int(self, x, f):
|
|
49
|
+
f += 1
|
|
50
|
+
newf = x.index(b'e', f)
|
|
51
|
+
n = int(x[f:newf])
|
|
52
|
+
if x[f:f + 2] == b'-0':
|
|
53
|
+
raise ValueError
|
|
54
|
+
elif x[f:f + 1] == b'0' and newf != f + 1:
|
|
55
|
+
raise ValueError
|
|
56
|
+
return (n, newf + 1)
|
|
57
|
+
|
|
58
|
+
def decode_bytes(self, x, f):
|
|
59
|
+
colon = x.index(b':', f)
|
|
60
|
+
n = int(x[f:colon])
|
|
61
|
+
if x[f:f + 1] == b'0' and colon != f + 1:
|
|
62
|
+
raise ValueError
|
|
63
|
+
colon += 1
|
|
64
|
+
d = x[colon:colon + n]
|
|
65
|
+
if self.bytestring_encoding:
|
|
66
|
+
d = d.decode(self.bytestring_encoding)
|
|
67
|
+
return (d, colon + n)
|
|
68
|
+
|
|
69
|
+
def decode_list(self, x, f):
|
|
70
|
+
r, f = [], f + 1
|
|
71
|
+
while x[f:f + 1] != b'e':
|
|
72
|
+
v, f = self.decode_func[x[f:f + 1]](x, f)
|
|
73
|
+
r.append(v)
|
|
74
|
+
if self.yield_tuples:
|
|
75
|
+
r = tuple(r)
|
|
76
|
+
return (r, f + 1)
|
|
77
|
+
|
|
78
|
+
def decode_dict(self, x, f):
|
|
79
|
+
r, f = {}, f + 1
|
|
80
|
+
lastkey = None
|
|
81
|
+
while x[f:f + 1] != b'e':
|
|
82
|
+
k, f = self.decode_bytes(x, f)
|
|
83
|
+
if lastkey is not None and lastkey >= k:
|
|
84
|
+
raise ValueError
|
|
85
|
+
lastkey = k
|
|
86
|
+
r[k], f = self.decode_func[x[f:f + 1]](x, f)
|
|
87
|
+
return (r, f + 1)
|
|
88
|
+
|
|
89
|
+
def bdecode(self, x):
|
|
90
|
+
if not isinstance(x, bytes):
|
|
91
|
+
raise TypeError
|
|
92
|
+
try:
|
|
93
|
+
r, l = self.decode_func[x[:1]](x, 0) # noqa: E741
|
|
94
|
+
except (IndexError, KeyError, OverflowError) as e:
|
|
95
|
+
raise ValueError(str(e))
|
|
96
|
+
if l != len(x): # noqa: E741
|
|
97
|
+
raise ValueError
|
|
98
|
+
return r
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
_decoder = BDecoder()
|
|
102
|
+
bdecode = _decoder.bdecode
|
|
103
|
+
|
|
104
|
+
_tuple_decoder = BDecoder(True)
|
|
105
|
+
bdecode_as_tuple = _tuple_decoder.bdecode
|
|
106
|
+
|
|
107
|
+
_utf8_decoder = BDecoder(bytestring_encoding='utf-8')
|
|
108
|
+
bdecode_utf8 = _utf8_decoder.bdecode
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class Bencached:
|
|
112
|
+
__slots__ = ['bencoded']
|
|
113
|
+
|
|
114
|
+
def __init__(self, s) -> None:
|
|
115
|
+
self.bencoded = s
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class BEncoder:
|
|
119
|
+
|
|
120
|
+
def __init__(self, bytestring_encoding=None):
|
|
121
|
+
self.bytestring_encoding = bytestring_encoding
|
|
122
|
+
self.encode_func: Dict[Type, Callable[[object, List[bytes]], None]] = {
|
|
123
|
+
Bencached: self.encode_bencached,
|
|
124
|
+
int: self.encode_int,
|
|
125
|
+
bytes: self.encode_bytes,
|
|
126
|
+
list: self.encode_list,
|
|
127
|
+
tuple: self.encode_list,
|
|
128
|
+
dict: self.encode_dict,
|
|
129
|
+
bool: self.encode_bool,
|
|
130
|
+
str: self.encode_str,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
def encode_bencached(self, x, r):
|
|
134
|
+
r.append(x.bencoded)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def encode_bool(self, x, r):
|
|
138
|
+
self.encode_int(int(x), r)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def encode_int(self, x, r):
|
|
142
|
+
r.extend((b'i', int_to_bytes(x), b'e'))
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def encode_bytes(self, x, r):
|
|
146
|
+
r.extend((int_to_bytes(len(x)), b':', x))
|
|
147
|
+
|
|
148
|
+
def encode_list(self, x, r):
|
|
149
|
+
r.append(b'l')
|
|
150
|
+
for i in x:
|
|
151
|
+
self.encode(i, r)
|
|
152
|
+
r.append(b'e')
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def encode_dict(self, x, r):
|
|
156
|
+
r.append(b'd')
|
|
157
|
+
ilist = sorted(x.items())
|
|
158
|
+
for k, v in ilist:
|
|
159
|
+
r.extend((int_to_bytes(len(k)), b':', k))
|
|
160
|
+
self.encode(v, r)
|
|
161
|
+
r.append(b'e')
|
|
162
|
+
|
|
163
|
+
def encode_str(self, x, r):
|
|
164
|
+
if self.bytestring_encoding is None:
|
|
165
|
+
raise TypeError("string found but no encoding specified. "
|
|
166
|
+
"Use bencode_utf8 rather bencode?")
|
|
167
|
+
return self.encode_bytes(x.encode(self.bytestring_encoding), r)
|
|
168
|
+
|
|
169
|
+
def encode(self, x, r):
|
|
170
|
+
self.encode_func[type(x)](x, r)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def int_to_bytes(n):
|
|
174
|
+
return b'%d' % n
|
|
175
|
+
|
|
176
|
+
def bencode(x):
|
|
177
|
+
r = []
|
|
178
|
+
encoder = BEncoder()
|
|
179
|
+
encoder.encode(x, r)
|
|
180
|
+
return b''.join(r)
|
|
181
|
+
|
|
182
|
+
def bencode_utf8(x):
|
|
183
|
+
r = []
|
|
184
|
+
encoder = BEncoder(bytestring_encoding='utf-8')
|
|
185
|
+
encoder.encode(x, r)
|
|
186
|
+
return b''.join(r)
|
|
@@ -46,15 +46,22 @@ from cpython.mem cimport (
|
|
|
46
46
|
PyMem_Malloc,
|
|
47
47
|
PyMem_Realloc,
|
|
48
48
|
)
|
|
49
|
+
from cpython.unicode cimport (
|
|
50
|
+
PyUnicode_FromEncodedObject,
|
|
51
|
+
PyUnicode_FromStringAndSize,
|
|
52
|
+
PyUnicode_Check,
|
|
53
|
+
)
|
|
49
54
|
from cpython.tuple cimport (
|
|
50
55
|
PyTuple_CheckExact,
|
|
51
56
|
)
|
|
52
57
|
|
|
53
58
|
from libc.stdlib cimport (
|
|
54
59
|
strtol,
|
|
60
|
+
free,
|
|
55
61
|
)
|
|
56
62
|
from libc.string cimport (
|
|
57
63
|
memcpy,
|
|
64
|
+
strdup,
|
|
58
65
|
)
|
|
59
66
|
|
|
60
67
|
cdef extern from "python-compat.h":
|
|
@@ -79,9 +86,10 @@ cdef class Decoder:
|
|
|
79
86
|
cdef readonly char *tail
|
|
80
87
|
cdef readonly int size
|
|
81
88
|
cdef readonly int _yield_tuples
|
|
89
|
+
cdef readonly char *_bytestring_encoding
|
|
82
90
|
cdef object text
|
|
83
91
|
|
|
84
|
-
def __init__(self, s, yield_tuples=0):
|
|
92
|
+
def __init__(self, s, yield_tuples=0, str bytestring_encoding=None):
|
|
85
93
|
"""Initialize decoder engine.
|
|
86
94
|
@param s: Python string.
|
|
87
95
|
"""
|
|
@@ -92,6 +100,13 @@ cdef class Decoder:
|
|
|
92
100
|
self.tail = PyBytes_AS_STRING(s)
|
|
93
101
|
self.size = PyBytes_GET_SIZE(s)
|
|
94
102
|
self._yield_tuples = int(yield_tuples)
|
|
103
|
+
if bytestring_encoding is None:
|
|
104
|
+
self._bytestring_encoding = NULL
|
|
105
|
+
else:
|
|
106
|
+
self._bytestring_encoding = strdup(bytestring_encoding.encode('utf-8'))
|
|
107
|
+
|
|
108
|
+
def __dealloc__(self):
|
|
109
|
+
free(self._bytestring_encoding)
|
|
95
110
|
|
|
96
111
|
def decode(self):
|
|
97
112
|
result = self._decode_object()
|
|
@@ -112,7 +127,7 @@ cdef class Decoder:
|
|
|
112
127
|
try:
|
|
113
128
|
ch = self.tail[0]
|
|
114
129
|
if c'0' <= ch <= c'9':
|
|
115
|
-
return self.
|
|
130
|
+
return self._decode_bytes()
|
|
116
131
|
elif ch == c'l':
|
|
117
132
|
D_UPDATE_TAIL(self, 1)
|
|
118
133
|
return self._decode_list()
|
|
@@ -155,12 +170,12 @@ cdef class Decoder:
|
|
|
155
170
|
D_UPDATE_TAIL(self, i+1)
|
|
156
171
|
return ret
|
|
157
172
|
|
|
158
|
-
cdef object
|
|
173
|
+
cdef object _decode_bytes(self):
|
|
159
174
|
cdef int n
|
|
160
175
|
cdef char *next_tail
|
|
161
176
|
# strtol allows leading whitespace, negatives, and leading zeros
|
|
162
177
|
# however, all callers have already checked that '0' <= tail[0] <= '9'
|
|
163
|
-
# or they wouldn't have called
|
|
178
|
+
# or they wouldn't have called _decode_bytes
|
|
164
179
|
# strtol will stop at trailing whitespace, etc
|
|
165
180
|
n = strtol(self.tail, &next_tail, 10)
|
|
166
181
|
if next_tail == NULL or next_tail[0] != c':':
|
|
@@ -171,13 +186,22 @@ cdef class Decoder:
|
|
|
171
186
|
raise ValueError('leading zeros are not allowed')
|
|
172
187
|
D_UPDATE_TAIL(self, next_tail - self.tail + 1)
|
|
173
188
|
if n == 0:
|
|
174
|
-
|
|
189
|
+
if self._bytestring_encoding == NULL:
|
|
190
|
+
return b''
|
|
191
|
+
else:
|
|
192
|
+
return ''
|
|
175
193
|
if n > self.size:
|
|
176
194
|
raise ValueError('stream underflow')
|
|
177
195
|
if n < 0:
|
|
178
196
|
raise ValueError('string size below zero: %d' % n)
|
|
179
197
|
|
|
180
|
-
|
|
198
|
+
if self._bytestring_encoding == NULL:
|
|
199
|
+
result = PyBytes_FromStringAndSize(self.tail, n)
|
|
200
|
+
elif self._bytestring_encoding == b'utf-8':
|
|
201
|
+
result = PyUnicode_FromStringAndSize(self.tail, n)
|
|
202
|
+
else:
|
|
203
|
+
result = PyBytes_FromStringAndSize(self.tail, n)
|
|
204
|
+
result = PyUnicode_FromEncodedObject(result, self._bytestring_encoding, NULL)
|
|
181
205
|
D_UPDATE_TAIL(self, n)
|
|
182
206
|
return result
|
|
183
207
|
|
|
@@ -214,7 +238,7 @@ cdef class Decoder:
|
|
|
214
238
|
# keys should be strings only
|
|
215
239
|
if self.tail[0] < c'0' or self.tail[0] > c'9':
|
|
216
240
|
raise ValueError('key was not a simple string.')
|
|
217
|
-
key = self.
|
|
241
|
+
key = self._decode_bytes()
|
|
218
242
|
if lastkey is not None and lastkey >= key:
|
|
219
243
|
raise ValueError('dict keys disordered')
|
|
220
244
|
else:
|
|
@@ -235,6 +259,11 @@ def bdecode_as_tuple(object s):
|
|
|
235
259
|
return Decoder(s, True).decode()
|
|
236
260
|
|
|
237
261
|
|
|
262
|
+
def bdecode_utf8(object s):
|
|
263
|
+
"""Decode string x to Python object, decoding bytestrings as UTF8 strings."""
|
|
264
|
+
return Decoder(s, bytestring_encoding='utf-8').decode()
|
|
265
|
+
|
|
266
|
+
|
|
238
267
|
class Bencached(object):
|
|
239
268
|
__slots__ = ['bencoded']
|
|
240
269
|
|
|
@@ -254,8 +283,9 @@ cdef class Encoder:
|
|
|
254
283
|
cdef readonly int size
|
|
255
284
|
cdef readonly char *buffer
|
|
256
285
|
cdef readonly int maxsize
|
|
286
|
+
cdef readonly object _bytestring_encoding
|
|
257
287
|
|
|
258
|
-
def __init__(self, int maxsize=INITSIZE):
|
|
288
|
+
def __init__(self, int maxsize=INITSIZE, str bytestring_encoding=None):
|
|
259
289
|
"""Initialize encoder engine
|
|
260
290
|
@param maxsize: initial size of internal char buffer
|
|
261
291
|
"""
|
|
@@ -273,6 +303,8 @@ cdef class Encoder:
|
|
|
273
303
|
self.maxsize = maxsize
|
|
274
304
|
self.tail = p
|
|
275
305
|
|
|
306
|
+
self._bytestring_encoding = bytestring_encoding
|
|
307
|
+
|
|
276
308
|
def __dealloc__(self):
|
|
277
309
|
PyMem_Free(self.buffer)
|
|
278
310
|
self.buffer = NULL
|
|
@@ -329,7 +361,7 @@ cdef class Encoder:
|
|
|
329
361
|
E_UPDATE_TAIL(self, n)
|
|
330
362
|
return 1
|
|
331
363
|
|
|
332
|
-
cdef int
|
|
364
|
+
cdef int _encode_bytes(self, x) except 0:
|
|
333
365
|
cdef int n
|
|
334
366
|
cdef Py_ssize_t x_len
|
|
335
367
|
x_len = PyBytes_GET_SIZE(x)
|
|
@@ -341,6 +373,12 @@ cdef class Encoder:
|
|
|
341
373
|
E_UPDATE_TAIL(self, n + x_len)
|
|
342
374
|
return 1
|
|
343
375
|
|
|
376
|
+
cdef int _encode_string(self, x) except 0:
|
|
377
|
+
if self._bytestring_encoding is None:
|
|
378
|
+
raise TypeError("string found but no encoding specified. "
|
|
379
|
+
"Use bencode_utf8 rather bencode?")
|
|
380
|
+
return self._encode_bytes(x.encode(self._bytestring_encoding))
|
|
381
|
+
|
|
344
382
|
cdef int _encode_list(self, x) except 0:
|
|
345
383
|
self._ensure_buffer(1)
|
|
346
384
|
self.tail[0] = c'l'
|
|
@@ -362,7 +400,7 @@ cdef class Encoder:
|
|
|
362
400
|
for k in sorted(x):
|
|
363
401
|
if not PyBytes_CheckExact(k):
|
|
364
402
|
raise TypeError('key in dict should be string')
|
|
365
|
-
self.
|
|
403
|
+
self._encode_bytes(k)
|
|
366
404
|
self.process(x[k])
|
|
367
405
|
|
|
368
406
|
self._ensure_buffer(1)
|
|
@@ -374,7 +412,7 @@ cdef class Encoder:
|
|
|
374
412
|
BrzPy_EnterRecursiveCall(" while bencode encoding")
|
|
375
413
|
try:
|
|
376
414
|
if PyBytes_CheckExact(x):
|
|
377
|
-
self.
|
|
415
|
+
self._encode_bytes(x)
|
|
378
416
|
elif PyInt_CheckExact(x) and x.bit_length() < 32:
|
|
379
417
|
self._encode_int(x)
|
|
380
418
|
elif PyLong_CheckExact(x):
|
|
@@ -385,6 +423,8 @@ cdef class Encoder:
|
|
|
385
423
|
self._encode_dict(x)
|
|
386
424
|
elif PyBool_Check(x):
|
|
387
425
|
self._encode_int(int(x))
|
|
426
|
+
elif PyUnicode_Check(x):
|
|
427
|
+
self._encode_string(x)
|
|
388
428
|
elif isinstance(x, Bencached):
|
|
389
429
|
self._append_string(x.bencoded)
|
|
390
430
|
else:
|
|
@@ -394,7 +434,17 @@ cdef class Encoder:
|
|
|
394
434
|
|
|
395
435
|
|
|
396
436
|
def bencode(x):
|
|
397
|
-
"""Encode Python object x to
|
|
437
|
+
"""Encode Python object x to bytestring"""
|
|
398
438
|
encoder = Encoder()
|
|
399
439
|
encoder.process(x)
|
|
400
440
|
return encoder.to_bytes()
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def bencode_utf8(x):
|
|
444
|
+
"""Encode Python object x to bytestring.
|
|
445
|
+
|
|
446
|
+
Encode any strings as UTF8
|
|
447
|
+
"""
|
|
448
|
+
encoder = Encoder(bytestring_encoding='utf-8')
|
|
449
|
+
encoder.process(x)
|
|
450
|
+
return encoder.to_bytes()
|
|
@@ -14,11 +14,10 @@
|
|
|
14
14
|
# along with this program; if not, write to the Free Software
|
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16
16
|
|
|
17
|
-
"""Tests for bencode structured encoding"""
|
|
17
|
+
"""Tests for bencode structured encoding."""
|
|
18
18
|
|
|
19
19
|
import copy
|
|
20
20
|
import sys
|
|
21
|
-
|
|
22
21
|
from unittest import TestCase, TestSuite
|
|
23
22
|
|
|
24
23
|
|
|
@@ -62,16 +61,14 @@ def get_named_object(module_name, member_name=None):
|
|
|
62
61
|
|
|
63
62
|
|
|
64
63
|
def iter_suite_tests(suite):
|
|
65
|
-
"""Return all tests in a suite, recursing through nested suites"""
|
|
64
|
+
"""Return all tests in a suite, recursing through nested suites."""
|
|
66
65
|
if isinstance(suite, TestCase):
|
|
67
66
|
yield suite
|
|
68
67
|
elif isinstance(suite, TestSuite):
|
|
69
68
|
for item in suite:
|
|
70
|
-
|
|
71
|
-
yield r
|
|
69
|
+
yield from iter_suite_tests(item)
|
|
72
70
|
else:
|
|
73
|
-
raise Exception('unknown type
|
|
74
|
-
% (type(suite), suite))
|
|
71
|
+
raise Exception(f'unknown type {type(suite)!r} for object {suite!r}')
|
|
75
72
|
|
|
76
73
|
|
|
77
74
|
def clone_test(test, new_id):
|
|
@@ -108,7 +105,7 @@ def apply_scenario(test, scenario):
|
|
|
108
105
|
test.
|
|
109
106
|
:return: The adapted test.
|
|
110
107
|
"""
|
|
111
|
-
new_id = "
|
|
108
|
+
new_id = f"{test.id()}({scenario[0]})"
|
|
112
109
|
new_test = clone_test(test, new_id)
|
|
113
110
|
for name, value in scenario[1].items():
|
|
114
111
|
setattr(new_test, name, value)
|
|
@@ -189,7 +186,6 @@ def permute_tests_for_extension(standard_tests, loader, py_module_name,
|
|
|
189
186
|
tests. feature is the Feature object that can be used to determine if
|
|
190
187
|
the module is available.
|
|
191
188
|
"""
|
|
192
|
-
|
|
193
189
|
py_module = get_named_object(py_module_name)
|
|
194
190
|
scenarios = [
|
|
195
191
|
('python', {'module': py_module}),
|
|
@@ -211,10 +207,10 @@ def load_tests(loader, standard_tests, pattern):
|
|
|
211
207
|
'fastbencode._bencode_pyx')
|
|
212
208
|
|
|
213
209
|
|
|
214
|
-
class RecursionLimit
|
|
210
|
+
class RecursionLimit:
|
|
215
211
|
"""Context manager that lowers recursion limit for testing."""
|
|
216
212
|
|
|
217
|
-
def __init__(self, limit=100):
|
|
213
|
+
def __init__(self, limit=100) -> None:
|
|
218
214
|
self._new_limit = limit
|
|
219
215
|
self._old_limit = sys.getrecursionlimit()
|
|
220
216
|
|
|
@@ -291,10 +287,16 @@ class TestBencodeDecode(TestCase):
|
|
|
291
287
|
|
|
292
288
|
def test_list_deepnested(self):
|
|
293
289
|
import platform
|
|
294
|
-
if platform.python_implementation() == 'PyPy'
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
290
|
+
if (platform.python_implementation() == 'PyPy'
|
|
291
|
+
or sys.version_info[:2] >= (3, 12)):
|
|
292
|
+
expected = []
|
|
293
|
+
for i in range(99):
|
|
294
|
+
expected = [expected]
|
|
295
|
+
self._check(expected, (b"l" * 100) + (b"e" * 100))
|
|
296
|
+
else:
|
|
297
|
+
with RecursionLimit():
|
|
298
|
+
self._run_check_error(
|
|
299
|
+
RuntimeError, (b"l" * 100) + (b"e" * 100))
|
|
298
300
|
|
|
299
301
|
def test_malformed_list(self):
|
|
300
302
|
self._run_check_error(ValueError, b'l')
|
|
@@ -339,16 +341,57 @@ class TestBencodeDecode(TestCase):
|
|
|
339
341
|
self.assertRaises(ValueError, self.module.bdecode, b'relwjhrlewjh')
|
|
340
342
|
|
|
341
343
|
def test_unsupported_type(self):
|
|
342
|
-
self._run_check_error(TypeError,
|
|
344
|
+
self._run_check_error(TypeError, 1.5)
|
|
343
345
|
self._run_check_error(TypeError, None)
|
|
344
346
|
self._run_check_error(TypeError, lambda x: x)
|
|
345
347
|
self._run_check_error(TypeError, object)
|
|
346
|
-
self._run_check_error(TypeError,
|
|
348
|
+
self._run_check_error(TypeError, "ie")
|
|
347
349
|
|
|
348
350
|
def test_decoder_type_error(self):
|
|
349
351
|
self.assertRaises(TypeError, self.module.bdecode, 1)
|
|
350
352
|
|
|
351
353
|
|
|
354
|
+
class TestBdecodeUtf8(TestCase):
|
|
355
|
+
|
|
356
|
+
module = None
|
|
357
|
+
|
|
358
|
+
def _check(self, expected, source):
|
|
359
|
+
self.assertEqual(expected, self.module.bdecode_utf8(source))
|
|
360
|
+
|
|
361
|
+
def _run_check_error(self, exc, bad):
|
|
362
|
+
"""Check that bdecoding a string raises a particular exception."""
|
|
363
|
+
self.assertRaises(exc, self.module.bdecode_utf8, bad)
|
|
364
|
+
|
|
365
|
+
def test_string(self):
|
|
366
|
+
self._check('', b'0:')
|
|
367
|
+
self._check('aäc', b'4:a\xc3\xa4c')
|
|
368
|
+
self._check('1234567890', b'10:1234567890')
|
|
369
|
+
|
|
370
|
+
def test_large_string(self):
|
|
371
|
+
self.assertRaises(
|
|
372
|
+
ValueError, self.module.bdecode_utf8, b"2147483639:foo")
|
|
373
|
+
|
|
374
|
+
def test_malformed_string(self):
|
|
375
|
+
self._run_check_error(ValueError, b'10:x')
|
|
376
|
+
self._run_check_error(ValueError, b'10:')
|
|
377
|
+
self._run_check_error(ValueError, b'10')
|
|
378
|
+
self._run_check_error(ValueError, b'01:x')
|
|
379
|
+
self._run_check_error(ValueError, b'00:')
|
|
380
|
+
self._run_check_error(ValueError, b'35208734823ljdahflajhdf')
|
|
381
|
+
self._run_check_error(ValueError, b'432432432432432:foo')
|
|
382
|
+
self._run_check_error(ValueError, b' 1:x') # leading whitespace
|
|
383
|
+
self._run_check_error(ValueError, b'-1:x') # negative
|
|
384
|
+
self._run_check_error(ValueError, b'1 x') # space vs colon
|
|
385
|
+
self._run_check_error(ValueError, b'1x') # missing colon
|
|
386
|
+
self._run_check_error(ValueError, (b'1' * 1000) + b':')
|
|
387
|
+
|
|
388
|
+
def test_empty_string(self):
|
|
389
|
+
self.assertRaises(ValueError, self.module.bdecode_utf8, b'')
|
|
390
|
+
|
|
391
|
+
def test_invalid_utf8(self):
|
|
392
|
+
self._run_check_error(UnicodeDecodeError, b'3:\xff\xfe\xfd')
|
|
393
|
+
|
|
394
|
+
|
|
352
395
|
class TestBencodeEncode(TestCase):
|
|
353
396
|
|
|
354
397
|
module = None
|
|
@@ -412,3 +455,32 @@ class TestBencodeEncode(TestCase):
|
|
|
412
455
|
def test_bool(self):
|
|
413
456
|
self._check(b'i1e', True)
|
|
414
457
|
self._check(b'i0e', False)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
class TestBencodeEncodeUtf8(TestCase):
|
|
461
|
+
|
|
462
|
+
module = None
|
|
463
|
+
|
|
464
|
+
def _check(self, expected, source):
|
|
465
|
+
self.assertEqual(expected, self.module.bencode_utf8(source))
|
|
466
|
+
|
|
467
|
+
def test_string(self):
|
|
468
|
+
self._check(b'0:', '')
|
|
469
|
+
self._check(b'3:abc', 'abc')
|
|
470
|
+
self._check(b'10:1234567890', '1234567890')
|
|
471
|
+
|
|
472
|
+
def test_list(self):
|
|
473
|
+
self._check(b'le', [])
|
|
474
|
+
self._check(b'li1ei2ei3ee', [1, 2, 3])
|
|
475
|
+
self._check(b'll5:Alice3:Bobeli2ei3eee', [['Alice', 'Bob'], [2, 3]])
|
|
476
|
+
|
|
477
|
+
def test_list_as_tuple(self):
|
|
478
|
+
self._check(b'le', ())
|
|
479
|
+
self._check(b'li1ei2ei3ee', (1, 2, 3))
|
|
480
|
+
self._check(b'll5:Alice3:Bobeli2ei3eee', (('Alice', 'Bob'), (2, 3)))
|
|
481
|
+
|
|
482
|
+
def test_dict(self):
|
|
483
|
+
self._check(b'de', {})
|
|
484
|
+
self._check(b'd3:agei25e4:eyes4:bluee', {b'age': 25, b'eyes': 'blue'})
|
|
485
|
+
self._check(b'd8:spam.mp3d6:author5:Alice6:lengthi100000eee',
|
|
486
|
+
{b'spam.mp3': {b'author': b'Alice', b'length': 100000}})
|