qs-codec 1.5.2__tar.gz → 1.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qs_codec-1.5.2 → qs_codec-1.6.1}/CHANGELOG.md +10 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/PKG-INFO +176 -16
- {qs_codec-1.5.2 → qs_codec-1.6.1}/README.rst +175 -15
- {qs_codec-1.5.2 → qs_codec-1.6.1}/docs/README.rst +80 -8
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/__init__.py +1 -3
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/decode.py +13 -16
- qs_codec-1.6.1/src/qs_codec/enums/sentinel.py +50 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/decode_options.py +7 -3
- qs_codec-1.6.1/src/qs_codec/models/undefined.py +62 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/utils/utils.py +83 -84
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/package.json +2 -2
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/pnpm-lock.yaml +10 -9
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/decode_test.py +184 -1
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/example_test.py +0 -3
- qs_codec-1.6.1/tests/unit/package_test.py +18 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/utils_test.py +115 -6
- qs_codec-1.5.2/src/qs_codec/enums/sentinel.py +0 -50
- qs_codec-1.5.2/src/qs_codec/models/undefined.py +0 -73
- {qs_codec-1.5.2 → qs_codec-1.6.1}/.gitignore +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/CODE-OF-CONDUCT.md +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/LICENSE +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/pyproject.toml +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/requirements_dev.txt +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/constants/__init__.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/constants/encode_constants.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/encode.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/enums/__init__.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/enums/charset.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/enums/decode_kind.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/enums/duplicates.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/enums/format.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/enums/list_format.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/__init__.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/cycle_state.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/encode_frame.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/encode_options.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/key_path_node.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/overflow_dict.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/structured_key_scan.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/models/weak_wrapper.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/py.typed +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/utils/__init__.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/utils/decode_utils.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/src/qs_codec/utils/encode_utils.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/.gitignore +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/__init__.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/compare_outputs.sh +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/pnpm-workspace.yaml +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/qs.js +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/qs.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/comparison/test_cases.json +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/e2e/__init__.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/e2e/e2e_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/__init__.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/decode_options_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/encode_internal_helpers_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/encode_options_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/encode_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/fixed_qs_issues_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/key_path_node_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/list_format_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/thread_safety_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/weakref_test.py +0 -0
- {qs_codec-1.5.2 → qs_codec-1.6.1}/tests/unit/wpt_urlencoded_parser_test.py +0 -0
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
## 1.6.1
|
|
2
|
+
|
|
3
|
+
* [FIX] match Node `qs` 6.15.3 cumulative list-limit enforcement across duplicate-key combinations and mixed list merges
|
|
4
|
+
* [FIX] reject oversized flat comma values before allocating their split lists or decoding their values when `raise_on_limit_exceeded` is enabled
|
|
5
|
+
* [CHORE] add Node `qs` 6.15.3 regression coverage for unbalanced bracket keys and cyclic compaction
|
|
6
|
+
|
|
7
|
+
## 1.6.0
|
|
8
|
+
|
|
9
|
+
* [CHORE] make `Undefined` internal by removing `qs_codec.Undefined` and its public documentation
|
|
10
|
+
|
|
1
11
|
## 1.5.2
|
|
2
12
|
|
|
3
13
|
* [CHORE] simplify mapping and key-iteration checks in encode/decode internals
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qs-codec
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.6.1
|
|
4
4
|
Summary: A query string encoding and decoding library for Python. Ported from qs for JavaScript.
|
|
5
5
|
Project-URL: Homepage, https://techouse.github.io/qs_codec/
|
|
6
6
|
Project-URL: Documentation, https://techouse.github.io/qs_codec/
|
|
@@ -88,7 +88,7 @@ Highlights
|
|
|
88
88
|
- Pluggable hooks: custom ``encoder``/``decoder`` callables; options to sort keys, filter output, and control percent-encoding (keys-only, values-only).
|
|
89
89
|
- Nulls & empties: ``strict_null_handling`` and ``skip_nulls``; support for empty lists/arrays when desired.
|
|
90
90
|
- Dates: ``serialize_date`` for ISO 8601 or custom (e.g., UNIX timestamp).
|
|
91
|
-
- Safety limits: configurable decode depth and encode max depth, parameter limit, and list
|
|
91
|
+
- Safety limits: configurable decode depth and encode max depth, parameter limit, and list element limit; optional strict-depth errors; duplicate-key strategies (combine/first/last).
|
|
92
92
|
- Extras: numeric entity decoding (e.g. ``☺`` → ☺), alternate delimiters/regex, and query-prefix helpers.
|
|
93
93
|
|
|
94
94
|
Compatibility
|
|
@@ -112,6 +112,157 @@ A simple usage example:
|
|
|
112
112
|
# Decoding
|
|
113
113
|
assert qs.decode('a=b') == {'a': 'b'}
|
|
114
114
|
|
|
115
|
+
Compared with ``urllib.parse``
|
|
116
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
117
|
+
|
|
118
|
+
The standard library's
|
|
119
|
+
`urlencode <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode>`__,
|
|
120
|
+
`parse_qs <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs>`__,
|
|
121
|
+
and
|
|
122
|
+
`parse_qsl <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qsl>`__
|
|
123
|
+
are designed for conventional flat ``application/x-www-form-urlencoded``
|
|
124
|
+
data. Use ``qs_codec`` when the query represents nested dictionaries or lists,
|
|
125
|
+
must interoperate with Node ``qs``, or needs configurable list, duplicate, null,
|
|
126
|
+
or resource-limit behavior.
|
|
127
|
+
|
|
128
|
+
``urlencode`` can expand a flat sequence into repeated keys with
|
|
129
|
+
``doseq=True``, which corresponds to ``ListFormat.REPEAT``. It does not
|
|
130
|
+
recursively encode nested mappings; ``qs.encode`` uses bracket or dot paths
|
|
131
|
+
instead:
|
|
132
|
+
|
|
133
|
+
.. code:: python
|
|
134
|
+
|
|
135
|
+
from urllib.parse import urlencode
|
|
136
|
+
|
|
137
|
+
import qs_codec as qs
|
|
138
|
+
|
|
139
|
+
assert urlencode({'tags': ['a', 'b']}, doseq=True) == 'tags=a&tags=b'
|
|
140
|
+
assert qs.encode(
|
|
141
|
+
{'tags': ['a', 'b']},
|
|
142
|
+
qs.EncodeOptions(list_format=qs.ListFormat.REPEAT),
|
|
143
|
+
) == 'tags=a&tags=b'
|
|
144
|
+
assert qs.encode(
|
|
145
|
+
{'filter': {'name': 'Jane'}},
|
|
146
|
+
) == 'filter%5Bname%5D=Jane'
|
|
147
|
+
|
|
148
|
+
The encoding defaults also differ: ``urlencode`` emits spaces as ``+`` and
|
|
149
|
+
uses Python scalar spellings such as ``True`` and ``None``; ``qs.encode`` uses
|
|
150
|
+
``%20``, lowercase booleans, and an empty value for ``None`` by default.
|
|
151
|
+
|
|
152
|
+
On decode, ``parse_qs`` returns a dictionary whose values are always lists,
|
|
153
|
+
while ``parse_qsl`` returns an ordered list of name/value pairs and preserves
|
|
154
|
+
interleaved duplicate keys. Both treat bracket expressions as literal key
|
|
155
|
+
names, drop blank values unless ``keep_blank_values=True``, and collapse a
|
|
156
|
+
name-only token and an explicit empty value to the same empty string.
|
|
157
|
+
``qs.decode`` normally returns a scalar for one value, reconstructs bracket
|
|
158
|
+
paths, and can preserve that null distinction:
|
|
159
|
+
|
|
160
|
+
.. code:: python
|
|
161
|
+
|
|
162
|
+
from urllib.parse import parse_qs, parse_qsl
|
|
163
|
+
|
|
164
|
+
import qs_codec as qs
|
|
165
|
+
|
|
166
|
+
query = 'a=1&b=2&a=3&filter%5Bname%5D=Jane&flag&empty='
|
|
167
|
+
|
|
168
|
+
assert parse_qs(query, keep_blank_values=True) == {
|
|
169
|
+
'a': ['1', '3'],
|
|
170
|
+
'b': ['2'],
|
|
171
|
+
'filter[name]': ['Jane'],
|
|
172
|
+
'flag': [''],
|
|
173
|
+
'empty': [''],
|
|
174
|
+
}
|
|
175
|
+
assert parse_qsl(query, keep_blank_values=True) == [
|
|
176
|
+
('a', '1'),
|
|
177
|
+
('b', '2'),
|
|
178
|
+
('a', '3'),
|
|
179
|
+
('filter[name]', 'Jane'),
|
|
180
|
+
('flag', ''),
|
|
181
|
+
('empty', ''),
|
|
182
|
+
]
|
|
183
|
+
assert qs.decode(
|
|
184
|
+
query,
|
|
185
|
+
qs.DecodeOptions(strict_null_handling=True),
|
|
186
|
+
) == {
|
|
187
|
+
'a': ['1', '3'],
|
|
188
|
+
'b': '2',
|
|
189
|
+
'filter': {'name': 'Jane'},
|
|
190
|
+
'flag': None,
|
|
191
|
+
'empty': '',
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
All three decoders leave primitive values as strings. ``parse_qs`` and
|
|
195
|
+
``qs.decode`` combine repeated flat keys under their default behavior;
|
|
196
|
+
``parse_qsl`` instead retains each pair in input order. The standard-library
|
|
197
|
+
parsers offer ``max_num_fields``; ``qs.decode`` additionally provides default
|
|
198
|
+
parameter, nesting-depth, and list limits plus configurable duplicate handling.
|
|
199
|
+
|
|
200
|
+
Use ``parse_qsl`` when flat pair order or duplicate interleaving matters, but
|
|
201
|
+
not as a raw-query round-trip format: it percent-decodes names and values,
|
|
202
|
+
normalizes ``+`` and ``%20`` to the same space, and cannot retain the distinction
|
|
203
|
+
between a name-only token and an explicit empty value.
|
|
204
|
+
|
|
205
|
+
Working with URLs
|
|
206
|
+
~~~~~~~~~~~~~~~~~
|
|
207
|
+
|
|
208
|
+
Use `urllib.parse.urlsplit <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit>`__
|
|
209
|
+
to keep URL parsing separate from query-string decoding. Pass the encoded
|
|
210
|
+
``query`` component directly to ``qs.decode`` without calling ``unquote``,
|
|
211
|
+
``unquote_plus``, ``parse_qs``, or ``parse_qsl`` first:
|
|
212
|
+
|
|
213
|
+
.. code:: python
|
|
214
|
+
|
|
215
|
+
from urllib.parse import urlsplit
|
|
216
|
+
|
|
217
|
+
import qs_codec as qs
|
|
218
|
+
|
|
219
|
+
parts = urlsplit(
|
|
220
|
+
'https://example.com/search?filter%5Bname%5D=Jane%20Doe&flag#results'
|
|
221
|
+
)
|
|
222
|
+
params = qs.decode(
|
|
223
|
+
parts.query,
|
|
224
|
+
qs.DecodeOptions(strict_null_handling=True),
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
assert params == {
|
|
228
|
+
'filter': {'name': 'Jane Doe'},
|
|
229
|
+
'flag': None,
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
Passing the encoded component unchanged ensures that escaped delimiters such as
|
|
233
|
+
``%26``, escaped percent signs such as ``%2525``, and encoded bracket syntax
|
|
234
|
+
reach ``qs.decode`` without being decoded twice.
|
|
235
|
+
|
|
236
|
+
To replace a URL query, encode fresh data and assign it to the split result's
|
|
237
|
+
``query`` component:
|
|
238
|
+
|
|
239
|
+
.. code:: python
|
|
240
|
+
|
|
241
|
+
updated = parts._replace(
|
|
242
|
+
query=qs.encode({
|
|
243
|
+
'filter': {'name': 'John Doe'},
|
|
244
|
+
'tags': ['a', 'b'],
|
|
245
|
+
}),
|
|
246
|
+
).geturl()
|
|
247
|
+
|
|
248
|
+
assert updated == (
|
|
249
|
+
'https://example.com/search?'
|
|
250
|
+
'filter%5Bname%5D=John%20Doe&tags%5B0%5D=a&tags%5B1%5D=b'
|
|
251
|
+
'#results'
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
Keep `EncodeOptions.add_query_prefix <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.encode_options.EncodeOptions.add_query_prefix>`__
|
|
255
|
+
set to ``False`` (the default) when assigning to ``SplitResult.query``. Options
|
|
256
|
+
such as ``encode=False``, ``encode_values_only=True``, or a custom encoder can
|
|
257
|
+
emit raw URL-structural characters, so callers using them must ensure the result
|
|
258
|
+
is safe query-component text.
|
|
259
|
+
|
|
260
|
+
This pattern replaces the existing query; it does not append or merge it.
|
|
261
|
+
Appending or decoding and re-encoding an arbitrary query can change delimiter,
|
|
262
|
+
duplicate-key, name-only, list-format, ordering, and percent-encoding semantics.
|
|
263
|
+
``SplitResult.geturl()`` may also normalize URL spelling and removes an explicit
|
|
264
|
+
empty ``?`` delimiter.
|
|
265
|
+
|
|
115
266
|
Decoding
|
|
116
267
|
~~~~~~~~
|
|
117
268
|
|
|
@@ -409,12 +560,11 @@ Note that an empty ``str``\ing is also a value, and will be preserved:
|
|
|
409
560
|
|
|
410
561
|
assert qs.decode('a[0]=b&a[1]=&a[2]=c') == {'a': ['b', '', 'c']}
|
|
411
562
|
|
|
412
|
-
`decode <https://techouse.github.io/qs_codec/qs_codec.html#module-qs_codec.decode>`__
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
the index as the key. This
|
|
416
|
-
|
|
417
|
-
over this huge ``list``.
|
|
563
|
+
`decode <https://techouse.github.io/qs_codec/qs_codec.html#module-qs_codec.decode>`__ also limits each ``list`` to a
|
|
564
|
+
maximum element count of ``20``. Index ``19`` is the last index that can create
|
|
565
|
+
a default ``list``; index ``20`` and higher are converted to a ``dict`` with
|
|
566
|
+
the index as the key. This prevents inputs such as ``a[999999999]`` from
|
|
567
|
+
creating massive sparse lists.
|
|
418
568
|
|
|
419
569
|
.. code:: python
|
|
420
570
|
|
|
@@ -434,6 +584,24 @@ option:
|
|
|
434
584
|
qs.DecodeOptions(list_limit=0),
|
|
435
585
|
) == {'a': {'1': 'b'}}
|
|
436
586
|
|
|
587
|
+
The same limit is enforced cumulatively when duplicate keys, mixed list
|
|
588
|
+
notation, or comma-separated values grow a list. A result exactly at the limit
|
|
589
|
+
remains a ``list``. Above the limit, decoding uses a numeric-keyed ``dict`` by
|
|
590
|
+
default, or raises ``ValueError`` when ``raise_on_limit_exceeded=True``.
|
|
591
|
+
|
|
592
|
+
.. code:: python
|
|
593
|
+
|
|
594
|
+
import qs_codec as qs
|
|
595
|
+
|
|
596
|
+
assert qs.decode(
|
|
597
|
+
'a=x&a=y',
|
|
598
|
+
qs.DecodeOptions(list_limit=1),
|
|
599
|
+
) == {'a': {'0': 'x', '1': 'y'}}
|
|
600
|
+
|
|
601
|
+
With ``comma=True``, a flat comma value is subject to the same limit. A value
|
|
602
|
+
assigned through ``[]=`` counts as one outer list element, so its inner
|
|
603
|
+
comma-separated group may contain more values than ``list_limit``.
|
|
604
|
+
|
|
437
605
|
To disable ``list`` parsing entirely, set `parse_lists <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.decode_options.DecodeOptions.parse_lists>`__
|
|
438
606
|
to ``False``.
|
|
439
607
|
|
|
@@ -752,14 +920,6 @@ Keys with no values (such as an empty ``dict`` or ``list``) will return nothing:
|
|
|
752
920
|
|
|
753
921
|
assert qs.encode({'a': {'b': {}}}) == ''
|
|
754
922
|
|
|
755
|
-
`Undefined <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.undefined.Undefined>`__ properties will be omitted entirely:
|
|
756
|
-
|
|
757
|
-
.. code:: python
|
|
758
|
-
|
|
759
|
-
import qs_codec as qs
|
|
760
|
-
|
|
761
|
-
assert qs.encode({'a': None, 'b': qs.Undefined()}) == 'a='
|
|
762
|
-
|
|
763
923
|
The query string may optionally be prepended with a question mark (``?``) by setting
|
|
764
924
|
`add_query_prefix <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.encode_options.EncodeOptions.add_query_prefix>`__ to ``True``:
|
|
765
925
|
|
|
@@ -25,7 +25,7 @@ Highlights
|
|
|
25
25
|
- Pluggable hooks: custom ``encoder``/``decoder`` callables; options to sort keys, filter output, and control percent-encoding (keys-only, values-only).
|
|
26
26
|
- Nulls & empties: ``strict_null_handling`` and ``skip_nulls``; support for empty lists/arrays when desired.
|
|
27
27
|
- Dates: ``serialize_date`` for ISO 8601 or custom (e.g., UNIX timestamp).
|
|
28
|
-
- Safety limits: configurable decode depth and encode max depth, parameter limit, and list
|
|
28
|
+
- Safety limits: configurable decode depth and encode max depth, parameter limit, and list element limit; optional strict-depth errors; duplicate-key strategies (combine/first/last).
|
|
29
29
|
- Extras: numeric entity decoding (e.g. ``☺`` → ☺), alternate delimiters/regex, and query-prefix helpers.
|
|
30
30
|
|
|
31
31
|
Compatibility
|
|
@@ -49,6 +49,157 @@ A simple usage example:
|
|
|
49
49
|
# Decoding
|
|
50
50
|
assert qs.decode('a=b') == {'a': 'b'}
|
|
51
51
|
|
|
52
|
+
Compared with ``urllib.parse``
|
|
53
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
54
|
+
|
|
55
|
+
The standard library's
|
|
56
|
+
`urlencode <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode>`__,
|
|
57
|
+
`parse_qs <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs>`__,
|
|
58
|
+
and
|
|
59
|
+
`parse_qsl <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qsl>`__
|
|
60
|
+
are designed for conventional flat ``application/x-www-form-urlencoded``
|
|
61
|
+
data. Use ``qs_codec`` when the query represents nested dictionaries or lists,
|
|
62
|
+
must interoperate with Node ``qs``, or needs configurable list, duplicate, null,
|
|
63
|
+
or resource-limit behavior.
|
|
64
|
+
|
|
65
|
+
``urlencode`` can expand a flat sequence into repeated keys with
|
|
66
|
+
``doseq=True``, which corresponds to ``ListFormat.REPEAT``. It does not
|
|
67
|
+
recursively encode nested mappings; ``qs.encode`` uses bracket or dot paths
|
|
68
|
+
instead:
|
|
69
|
+
|
|
70
|
+
.. code:: python
|
|
71
|
+
|
|
72
|
+
from urllib.parse import urlencode
|
|
73
|
+
|
|
74
|
+
import qs_codec as qs
|
|
75
|
+
|
|
76
|
+
assert urlencode({'tags': ['a', 'b']}, doseq=True) == 'tags=a&tags=b'
|
|
77
|
+
assert qs.encode(
|
|
78
|
+
{'tags': ['a', 'b']},
|
|
79
|
+
qs.EncodeOptions(list_format=qs.ListFormat.REPEAT),
|
|
80
|
+
) == 'tags=a&tags=b'
|
|
81
|
+
assert qs.encode(
|
|
82
|
+
{'filter': {'name': 'Jane'}},
|
|
83
|
+
) == 'filter%5Bname%5D=Jane'
|
|
84
|
+
|
|
85
|
+
The encoding defaults also differ: ``urlencode`` emits spaces as ``+`` and
|
|
86
|
+
uses Python scalar spellings such as ``True`` and ``None``; ``qs.encode`` uses
|
|
87
|
+
``%20``, lowercase booleans, and an empty value for ``None`` by default.
|
|
88
|
+
|
|
89
|
+
On decode, ``parse_qs`` returns a dictionary whose values are always lists,
|
|
90
|
+
while ``parse_qsl`` returns an ordered list of name/value pairs and preserves
|
|
91
|
+
interleaved duplicate keys. Both treat bracket expressions as literal key
|
|
92
|
+
names, drop blank values unless ``keep_blank_values=True``, and collapse a
|
|
93
|
+
name-only token and an explicit empty value to the same empty string.
|
|
94
|
+
``qs.decode`` normally returns a scalar for one value, reconstructs bracket
|
|
95
|
+
paths, and can preserve that null distinction:
|
|
96
|
+
|
|
97
|
+
.. code:: python
|
|
98
|
+
|
|
99
|
+
from urllib.parse import parse_qs, parse_qsl
|
|
100
|
+
|
|
101
|
+
import qs_codec as qs
|
|
102
|
+
|
|
103
|
+
query = 'a=1&b=2&a=3&filter%5Bname%5D=Jane&flag&empty='
|
|
104
|
+
|
|
105
|
+
assert parse_qs(query, keep_blank_values=True) == {
|
|
106
|
+
'a': ['1', '3'],
|
|
107
|
+
'b': ['2'],
|
|
108
|
+
'filter[name]': ['Jane'],
|
|
109
|
+
'flag': [''],
|
|
110
|
+
'empty': [''],
|
|
111
|
+
}
|
|
112
|
+
assert parse_qsl(query, keep_blank_values=True) == [
|
|
113
|
+
('a', '1'),
|
|
114
|
+
('b', '2'),
|
|
115
|
+
('a', '3'),
|
|
116
|
+
('filter[name]', 'Jane'),
|
|
117
|
+
('flag', ''),
|
|
118
|
+
('empty', ''),
|
|
119
|
+
]
|
|
120
|
+
assert qs.decode(
|
|
121
|
+
query,
|
|
122
|
+
qs.DecodeOptions(strict_null_handling=True),
|
|
123
|
+
) == {
|
|
124
|
+
'a': ['1', '3'],
|
|
125
|
+
'b': '2',
|
|
126
|
+
'filter': {'name': 'Jane'},
|
|
127
|
+
'flag': None,
|
|
128
|
+
'empty': '',
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
All three decoders leave primitive values as strings. ``parse_qs`` and
|
|
132
|
+
``qs.decode`` combine repeated flat keys under their default behavior;
|
|
133
|
+
``parse_qsl`` instead retains each pair in input order. The standard-library
|
|
134
|
+
parsers offer ``max_num_fields``; ``qs.decode`` additionally provides default
|
|
135
|
+
parameter, nesting-depth, and list limits plus configurable duplicate handling.
|
|
136
|
+
|
|
137
|
+
Use ``parse_qsl`` when flat pair order or duplicate interleaving matters, but
|
|
138
|
+
not as a raw-query round-trip format: it percent-decodes names and values,
|
|
139
|
+
normalizes ``+`` and ``%20`` to the same space, and cannot retain the distinction
|
|
140
|
+
between a name-only token and an explicit empty value.
|
|
141
|
+
|
|
142
|
+
Working with URLs
|
|
143
|
+
~~~~~~~~~~~~~~~~~
|
|
144
|
+
|
|
145
|
+
Use `urllib.parse.urlsplit <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit>`__
|
|
146
|
+
to keep URL parsing separate from query-string decoding. Pass the encoded
|
|
147
|
+
``query`` component directly to ``qs.decode`` without calling ``unquote``,
|
|
148
|
+
``unquote_plus``, ``parse_qs``, or ``parse_qsl`` first:
|
|
149
|
+
|
|
150
|
+
.. code:: python
|
|
151
|
+
|
|
152
|
+
from urllib.parse import urlsplit
|
|
153
|
+
|
|
154
|
+
import qs_codec as qs
|
|
155
|
+
|
|
156
|
+
parts = urlsplit(
|
|
157
|
+
'https://example.com/search?filter%5Bname%5D=Jane%20Doe&flag#results'
|
|
158
|
+
)
|
|
159
|
+
params = qs.decode(
|
|
160
|
+
parts.query,
|
|
161
|
+
qs.DecodeOptions(strict_null_handling=True),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
assert params == {
|
|
165
|
+
'filter': {'name': 'Jane Doe'},
|
|
166
|
+
'flag': None,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
Passing the encoded component unchanged ensures that escaped delimiters such as
|
|
170
|
+
``%26``, escaped percent signs such as ``%2525``, and encoded bracket syntax
|
|
171
|
+
reach ``qs.decode`` without being decoded twice.
|
|
172
|
+
|
|
173
|
+
To replace a URL query, encode fresh data and assign it to the split result's
|
|
174
|
+
``query`` component:
|
|
175
|
+
|
|
176
|
+
.. code:: python
|
|
177
|
+
|
|
178
|
+
updated = parts._replace(
|
|
179
|
+
query=qs.encode({
|
|
180
|
+
'filter': {'name': 'John Doe'},
|
|
181
|
+
'tags': ['a', 'b'],
|
|
182
|
+
}),
|
|
183
|
+
).geturl()
|
|
184
|
+
|
|
185
|
+
assert updated == (
|
|
186
|
+
'https://example.com/search?'
|
|
187
|
+
'filter%5Bname%5D=John%20Doe&tags%5B0%5D=a&tags%5B1%5D=b'
|
|
188
|
+
'#results'
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
Keep `EncodeOptions.add_query_prefix <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.encode_options.EncodeOptions.add_query_prefix>`__
|
|
192
|
+
set to ``False`` (the default) when assigning to ``SplitResult.query``. Options
|
|
193
|
+
such as ``encode=False``, ``encode_values_only=True``, or a custom encoder can
|
|
194
|
+
emit raw URL-structural characters, so callers using them must ensure the result
|
|
195
|
+
is safe query-component text.
|
|
196
|
+
|
|
197
|
+
This pattern replaces the existing query; it does not append or merge it.
|
|
198
|
+
Appending or decoding and re-encoding an arbitrary query can change delimiter,
|
|
199
|
+
duplicate-key, name-only, list-format, ordering, and percent-encoding semantics.
|
|
200
|
+
``SplitResult.geturl()`` may also normalize URL spelling and removes an explicit
|
|
201
|
+
empty ``?`` delimiter.
|
|
202
|
+
|
|
52
203
|
Decoding
|
|
53
204
|
~~~~~~~~
|
|
54
205
|
|
|
@@ -346,12 +497,11 @@ Note that an empty ``str``\ing is also a value, and will be preserved:
|
|
|
346
497
|
|
|
347
498
|
assert qs.decode('a[0]=b&a[1]=&a[2]=c') == {'a': ['b', '', 'c']}
|
|
348
499
|
|
|
349
|
-
`decode <https://techouse.github.io/qs_codec/qs_codec.html#module-qs_codec.decode>`__
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
the index as the key. This
|
|
353
|
-
|
|
354
|
-
over this huge ``list``.
|
|
500
|
+
`decode <https://techouse.github.io/qs_codec/qs_codec.html#module-qs_codec.decode>`__ also limits each ``list`` to a
|
|
501
|
+
maximum element count of ``20``. Index ``19`` is the last index that can create
|
|
502
|
+
a default ``list``; index ``20`` and higher are converted to a ``dict`` with
|
|
503
|
+
the index as the key. This prevents inputs such as ``a[999999999]`` from
|
|
504
|
+
creating massive sparse lists.
|
|
355
505
|
|
|
356
506
|
.. code:: python
|
|
357
507
|
|
|
@@ -371,6 +521,24 @@ option:
|
|
|
371
521
|
qs.DecodeOptions(list_limit=0),
|
|
372
522
|
) == {'a': {'1': 'b'}}
|
|
373
523
|
|
|
524
|
+
The same limit is enforced cumulatively when duplicate keys, mixed list
|
|
525
|
+
notation, or comma-separated values grow a list. A result exactly at the limit
|
|
526
|
+
remains a ``list``. Above the limit, decoding uses a numeric-keyed ``dict`` by
|
|
527
|
+
default, or raises ``ValueError`` when ``raise_on_limit_exceeded=True``.
|
|
528
|
+
|
|
529
|
+
.. code:: python
|
|
530
|
+
|
|
531
|
+
import qs_codec as qs
|
|
532
|
+
|
|
533
|
+
assert qs.decode(
|
|
534
|
+
'a=x&a=y',
|
|
535
|
+
qs.DecodeOptions(list_limit=1),
|
|
536
|
+
) == {'a': {'0': 'x', '1': 'y'}}
|
|
537
|
+
|
|
538
|
+
With ``comma=True``, a flat comma value is subject to the same limit. A value
|
|
539
|
+
assigned through ``[]=`` counts as one outer list element, so its inner
|
|
540
|
+
comma-separated group may contain more values than ``list_limit``.
|
|
541
|
+
|
|
374
542
|
To disable ``list`` parsing entirely, set `parse_lists <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.decode_options.DecodeOptions.parse_lists>`__
|
|
375
543
|
to ``False``.
|
|
376
544
|
|
|
@@ -689,14 +857,6 @@ Keys with no values (such as an empty ``dict`` or ``list``) will return nothing:
|
|
|
689
857
|
|
|
690
858
|
assert qs.encode({'a': {'b': {}}}) == ''
|
|
691
859
|
|
|
692
|
-
`Undefined <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.undefined.Undefined>`__ properties will be omitted entirely:
|
|
693
|
-
|
|
694
|
-
.. code:: python
|
|
695
|
-
|
|
696
|
-
import qs_codec as qs
|
|
697
|
-
|
|
698
|
-
assert qs.encode({'a': None, 'b': qs.Undefined()}) == 'a='
|
|
699
|
-
|
|
700
860
|
The query string may optionally be prepended with a question mark (``?``) by setting
|
|
701
861
|
`add_query_prefix <https://techouse.github.io/qs_codec/qs_codec.models.html#qs_codec.models.encode_options.EncodeOptions.add_query_prefix>`__ to ``True``:
|
|
702
862
|
|
|
@@ -10,6 +10,67 @@ Do not mutate caller-owned input containers or shared callback state while an
|
|
|
10
10
|
free-threaded CPython build, which is supported and covered by the thread-safety
|
|
11
11
|
test suite without changing these mutation guarantees.
|
|
12
12
|
|
|
13
|
+
Working with URLs
|
|
14
|
+
~~~~~~~~~~~~~~~~~
|
|
15
|
+
|
|
16
|
+
Use `urllib.parse.urlsplit <https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit>`_
|
|
17
|
+
to keep URL parsing separate from query-string decoding. Pass the encoded
|
|
18
|
+
``query`` component directly to :py:attr:`qs_codec.decode` without calling
|
|
19
|
+
``unquote``, ``unquote_plus``, ``parse_qs``, or ``parse_qsl`` first:
|
|
20
|
+
|
|
21
|
+
.. code:: python
|
|
22
|
+
|
|
23
|
+
from urllib.parse import urlsplit
|
|
24
|
+
|
|
25
|
+
import qs_codec as qs
|
|
26
|
+
|
|
27
|
+
parts = urlsplit(
|
|
28
|
+
'https://example.com/search?filter%5Bname%5D=Jane%20Doe&flag#results'
|
|
29
|
+
)
|
|
30
|
+
params = qs.decode(
|
|
31
|
+
parts.query,
|
|
32
|
+
qs.DecodeOptions(strict_null_handling=True),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
assert params == {
|
|
36
|
+
'filter': {'name': 'Jane Doe'},
|
|
37
|
+
'flag': None,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
Passing the encoded component unchanged ensures that escaped delimiters such as
|
|
41
|
+
``%26``, escaped percent signs such as ``%2525``, and encoded bracket syntax
|
|
42
|
+
reach :py:attr:`qs_codec.decode` without being decoded twice.
|
|
43
|
+
|
|
44
|
+
To replace a URL query, encode fresh data and assign it to the split result's
|
|
45
|
+
``query`` component:
|
|
46
|
+
|
|
47
|
+
.. code:: python
|
|
48
|
+
|
|
49
|
+
updated = parts._replace(
|
|
50
|
+
query=qs.encode({
|
|
51
|
+
'filter': {'name': 'John Doe'},
|
|
52
|
+
'tags': ['a', 'b'],
|
|
53
|
+
}),
|
|
54
|
+
).geturl()
|
|
55
|
+
|
|
56
|
+
assert updated == (
|
|
57
|
+
'https://example.com/search?'
|
|
58
|
+
'filter%5Bname%5D=John%20Doe&tags%5B0%5D=a&tags%5B1%5D=b'
|
|
59
|
+
'#results'
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
Keep :py:attr:`add_query_prefix <qs_codec.models.encode_options.EncodeOptions.add_query_prefix>`
|
|
63
|
+
set to ``False`` (the default) when assigning to ``SplitResult.query``. Options
|
|
64
|
+
such as ``encode=False``, ``encode_values_only=True``, or a custom encoder can
|
|
65
|
+
emit raw URL-structural characters, so callers using them must ensure the result
|
|
66
|
+
is safe query-component text.
|
|
67
|
+
|
|
68
|
+
This pattern replaces the existing query; it does not append or merge it.
|
|
69
|
+
Appending or decoding and re-encoding an arbitrary query can change delimiter,
|
|
70
|
+
duplicate-key, name-only, list-format, ordering, and percent-encoding semantics.
|
|
71
|
+
``SplitResult.geturl()`` may also normalize URL spelling and removes an explicit
|
|
72
|
+
empty ``?`` delimiter.
|
|
73
|
+
|
|
13
74
|
Decoding
|
|
14
75
|
~~~~~~~~
|
|
15
76
|
|
|
@@ -366,6 +427,25 @@ option:
|
|
|
366
427
|
qs.DecodeOptions(list_limit=0),
|
|
367
428
|
) == {'a': {'1': 'b'}}
|
|
368
429
|
|
|
430
|
+
The same limit is enforced cumulatively when duplicate keys, mixed list
|
|
431
|
+
notation, or comma-separated values grow a list. A result exactly at the limit
|
|
432
|
+
remains a ``list``. Above the limit, decoding uses a numeric-keyed ``dict`` by
|
|
433
|
+
default, or raises ``ValueError`` when
|
|
434
|
+
:py:attr:`raise_on_limit_exceeded <qs_codec.models.decode_options.DecodeOptions.raise_on_limit_exceeded>` is ``True``.
|
|
435
|
+
|
|
436
|
+
.. code:: python
|
|
437
|
+
|
|
438
|
+
import qs_codec as qs
|
|
439
|
+
|
|
440
|
+
assert qs.decode(
|
|
441
|
+
'a=x&a=y',
|
|
442
|
+
qs.DecodeOptions(list_limit=1),
|
|
443
|
+
) == {'a': {'0': 'x', '1': 'y'}}
|
|
444
|
+
|
|
445
|
+
With ``comma=True``, a flat comma value is subject to the same limit. A value
|
|
446
|
+
assigned through ``[]=`` counts as one outer list element, so its inner
|
|
447
|
+
comma-separated group may contain more values than ``list_limit``.
|
|
448
|
+
|
|
369
449
|
To disable ``list`` parsing entirely, set :py:attr:`parse_lists <qs_codec.models.decode_options.DecodeOptions.parse_lists>`
|
|
370
450
|
to ``False``.
|
|
371
451
|
|
|
@@ -683,14 +763,6 @@ Keys with no values (such as an empty ``dict`` or ``list``) will return nothing:
|
|
|
683
763
|
|
|
684
764
|
assert qs.encode({'a': {'b': {}}}) == ''
|
|
685
765
|
|
|
686
|
-
:py:attr:`Undefined <qs_codec.models.undefined.Undefined>` properties will be omitted entirely:
|
|
687
|
-
|
|
688
|
-
.. code:: python
|
|
689
|
-
|
|
690
|
-
import qs_codec as qs
|
|
691
|
-
|
|
692
|
-
assert qs.encode({'a': None, 'b': qs.Undefined()}) == 'a='
|
|
693
|
-
|
|
694
766
|
The query string may optionally be prepended with a question mark (``?``) by setting
|
|
695
767
|
:py:attr:`add_query_prefix <qs_codec.models.encode_options.EncodeOptions.add_query_prefix>` to ``True``:
|
|
696
768
|
|
|
@@ -14,7 +14,7 @@ The package root re-exports the most commonly used functions and enums so you ca
|
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
# Package version (PEP 440). Bump in lockstep with distribution metadata.
|
|
17
|
-
__version__ = "1.
|
|
17
|
+
__version__ = "1.6.1"
|
|
18
18
|
|
|
19
19
|
# Public API surface re-exported at the package root.
|
|
20
20
|
__all__ = [
|
|
@@ -31,7 +31,6 @@ __all__ = [
|
|
|
31
31
|
"Sentinel",
|
|
32
32
|
"DecodeOptions",
|
|
33
33
|
"EncodeOptions",
|
|
34
|
-
"Undefined",
|
|
35
34
|
]
|
|
36
35
|
|
|
37
36
|
from .decode import decode, load, loads
|
|
@@ -44,4 +43,3 @@ from .enums.list_format import ListFormat
|
|
|
44
43
|
from .enums.sentinel import Sentinel
|
|
45
44
|
from .models.decode_options import DecodeOptions
|
|
46
45
|
from .models.encode_options import EncodeOptions
|
|
47
|
-
from .models.undefined import Undefined
|
|
@@ -63,7 +63,8 @@ def decode(
|
|
|
63
63
|
Notes
|
|
64
64
|
-----
|
|
65
65
|
- Empty/falsey ``value`` returns an empty dict.
|
|
66
|
-
-
|
|
66
|
+
- ``parse_lists`` is honored directly throughout decoding. ``list_limit`` is enforced while constructing and
|
|
67
|
+
merging lists without changing the configured list-parsing mode.
|
|
67
68
|
"""
|
|
68
69
|
obj: t.Dict[str, t.Any] = {}
|
|
69
70
|
|
|
@@ -78,17 +79,6 @@ def decode(
|
|
|
78
79
|
str_value: str = t.cast(str, value) if decode_from_string else ""
|
|
79
80
|
mapping_value: t.Mapping[str, t.Any] = t.cast(t.Mapping[str, t.Any], value) if not decode_from_string else {}
|
|
80
81
|
|
|
81
|
-
parse_lists_effective: bool = opts.parse_lists
|
|
82
|
-
if decode_from_string and parse_lists_effective:
|
|
83
|
-
# Keep caller options immutable: compute a local parse_lists switch only for this invocation.
|
|
84
|
-
query = str_value.replace("?", "", 1) if opts.ignore_query_prefix else str_value
|
|
85
|
-
if isinstance(opts.delimiter, re.Pattern):
|
|
86
|
-
parts_count = len(re.split(opts.delimiter, query)) if query else 0
|
|
87
|
-
else:
|
|
88
|
-
parts_count = (query.count(opts.delimiter) + 1) if query else 0
|
|
89
|
-
if 0 < opts.list_limit < parts_count:
|
|
90
|
-
parse_lists_effective = False
|
|
91
|
-
|
|
92
82
|
if decode_from_string:
|
|
93
83
|
temp_obj: t.Optional[t.Dict[str, t.Any]] = _parse_query_string_values(str_value, opts)
|
|
94
84
|
else:
|
|
@@ -118,7 +108,7 @@ def decode(
|
|
|
118
108
|
obj[key] = val
|
|
119
109
|
continue
|
|
120
110
|
|
|
121
|
-
new_obj: t.Any = _parse_keys(key, val, opts, decode_from_string, parse_lists=
|
|
111
|
+
new_obj: t.Any = _parse_keys(key, val, opts, decode_from_string, parse_lists=opts.parse_lists)
|
|
122
112
|
|
|
123
113
|
if not obj and isinstance(new_obj, dict):
|
|
124
114
|
obj = new_obj
|
|
@@ -253,10 +243,17 @@ def _parse_array_value(
|
|
|
253
243
|
Either the original value or a list of values, without decoding (that happens later).
|
|
254
244
|
"""
|
|
255
245
|
if isinstance(value, str) and value and options.comma and "," in value:
|
|
246
|
+
if enforce_comma_limit and options.raise_on_limit_exceeded:
|
|
247
|
+
comma_count = 0
|
|
248
|
+
comma_index = value.find(",")
|
|
249
|
+
while comma_index >= 0:
|
|
250
|
+
comma_count += 1
|
|
251
|
+
if comma_count >= options.list_limit:
|
|
252
|
+
raise ValueError(_list_limit_exceeded_message(options.list_limit))
|
|
253
|
+
comma_index = value.find(",", comma_index + 1)
|
|
254
|
+
|
|
256
255
|
split_val: t.List[str] = value.split(",")
|
|
257
256
|
if enforce_comma_limit and len(split_val) > options.list_limit:
|
|
258
|
-
if options.raise_on_limit_exceeded:
|
|
259
|
-
raise ValueError(_list_limit_exceeded_message(options.list_limit))
|
|
260
257
|
return CommaOverflowDict({str(i): item for i, item in enumerate(split_val)})
|
|
261
258
|
return split_val
|
|
262
259
|
|
|
@@ -384,7 +381,7 @@ def _parse_query_string_values(value: str, options: DecodeOptions) -> t.Dict[str
|
|
|
384
381
|
part[pos + 1 :],
|
|
385
382
|
options,
|
|
386
383
|
len(obj[key]) if key in obj and isinstance(obj[key], (list, tuple)) else 0,
|
|
387
|
-
enforce_comma_limit=
|
|
384
|
+
enforce_comma_limit=not bracket_array_assignment,
|
|
388
385
|
)
|
|
389
386
|
list_limit_exceeded = isinstance(parsed_value, (list, tuple)) and len(parsed_value) > options.list_limit
|
|
390
387
|
if isinstance(parsed_value, (list, tuple)):
|