asgi-tools 1.2.0__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ # cython: language_level=3
2
+
3
+
4
+ cdef class BaseParser:
5
+
6
+ cdef dict callbacks
7
+
8
+ cdef void callback(self, str name, bytes data, int start, int end) except *
9
+
10
+ cpdef void write(self, bytes data) except *
11
+
12
+ cpdef void finalize(self)
13
+
14
+
15
+ cdef class QueryStringParser(BaseParser):
16
+
17
+ cdef unsigned int cursize
18
+ cdef unsigned int max_size
19
+ cdef unsigned char state
20
+
21
+
22
+ cdef class MultipartParser(BaseParser):
23
+
24
+ cdef unsigned int cursize
25
+ cdef unsigned int max_size
26
+ cdef unsigned char state
27
+ cdef unsigned int index
28
+ cdef short flags
29
+ cdef int header_field_pos
30
+ cdef int header_value_pos
31
+ cdef int part_data_pos
32
+ cdef bytes boundary
33
+ cdef frozenset boundary_chars
34
+ cdef list lookbehind
@@ -0,0 +1,589 @@
1
+ """The code is based on a great work of Andrew Dunham
2
+ (https://github.com/andrew-d/python-multipart) and has been changed to improve speed.
3
+
4
+ The original code is licensed by Apache2 license.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from contextlib import suppress
10
+
11
+ # Flags for the multipart parser.
12
+ from typing import Final
13
+
14
+ FLAG_PART_BOUNDARY: Final = 1
15
+ FLAG_LAST_BOUNDARY: Final = 2
16
+
17
+ # Get constants. Since iterating over a str on Python 2 gives you a 1-length
18
+ # string, but iterating over a bytes object on Python 3 gives you an integer,
19
+ # we need to save these constants.
20
+ AMPERSAND: Final = b"&"[0]
21
+ COLON: Final = b":"[0]
22
+ CR: Final = b"\r"[0]
23
+ EQUAL: Final = b"="[0]
24
+ HYPHEN: Final = b"-"[0]
25
+ LF: Final = b"\n"[0]
26
+ SEMICOLON: Final = b";"[0]
27
+ SPACE: Final = b" "[0]
28
+ EMPTY: Final = b"\x00"[0]
29
+
30
+
31
+ class BaseParser:
32
+ """This class is the base class for all parsers. It contains the logic for
33
+ calling and adding callbacks.
34
+
35
+ A callback can be one of two different forms. "Notification callbacks" are
36
+ callbacks that are called when something happens - for example, when a new
37
+ part of a multipart message is encountered by the parser. "Data callbacks"
38
+ are called when we get some sort of data - for example, part of the body of
39
+ a multipart chunk. Notification callbacks are called with no parameters,
40
+ whereas data callbacks are called with three, as follows::
41
+
42
+ data_callback(data, start, end)
43
+
44
+ The "data" parameter is a bytestring. "start" and "end" are integer indexes into the "data"
45
+ string that represent the data of interest. Thus, in a data callback, the slice
46
+ `data[start:end]` represents the data that the callback is "interested in". The callback is
47
+ not passed a copy of the data, since copying severely hurts performance.
48
+
49
+ """
50
+
51
+ __slots__ = ("callbacks",)
52
+
53
+ def __init__(self, callbacks: dict):
54
+ self.callbacks = callbacks
55
+
56
+ def callback(self, name: str, data: bytes, start: int, end: int):
57
+ with suppress(KeyError):
58
+ func = self.callbacks[name]
59
+ func(data, start, end)
60
+
61
+ def write(self, _: bytes):
62
+ pass
63
+
64
+ def finalize(self):
65
+ pass
66
+
67
+
68
+ STATE_BEFORE_FIELD = 0
69
+ STATE_FIELD_NAME = 1
70
+ STATE_FIELD_DATA = 2
71
+
72
+
73
+ class QueryStringParser(BaseParser):
74
+ """this is a streaming querystring parser. it will consume data, and call
75
+ the callbacks given when it has data.
76
+
77
+ .. list-table::
78
+ :widths: 15 10 30
79
+ :header-rows: 1
80
+
81
+ * - callback name
82
+ - parameters
83
+ - description
84
+ * - field_start
85
+ - none
86
+ - called when a new field is encountered.
87
+ * - field_name
88
+ - data, start, end
89
+ - called when a portion of a field's name is encountered.
90
+ * - field_data
91
+ - data, start, end
92
+ - called when a portion of a field's data is encountered.
93
+ * - field_end
94
+ - none
95
+ - called when the end of a field is encountered.
96
+ * - end
97
+ - none
98
+ - called when the parser is finished parsing all data.
99
+
100
+ :param callbacks: a dictionary of callbacks. see the documentation for
101
+ :class:`baseparser`.
102
+
103
+ :param max_size: the maximum size of body to parse. defaults to 0
104
+ """
105
+
106
+ __slots__ = "callbacks", "cursize", "max_size", "state"
107
+
108
+ def __init__(self, callbacks: dict, max_size: int = 0):
109
+ super().__init__(callbacks)
110
+ self.cursize = 0
111
+ self.max_size = max_size
112
+
113
+ self.state = STATE_BEFORE_FIELD
114
+
115
+ def write(self, data: bytes): # noqa: C901, PLR0912
116
+ data_len = prune_data(len(data), self.cursize, self.max_size)
117
+
118
+ idx = 0
119
+ state = self.state
120
+
121
+ while idx < data_len:
122
+ ch = data[idx]
123
+ if state == STATE_BEFORE_FIELD:
124
+ if ch not in (AMPERSAND, SEMICOLON):
125
+ self.callback("field_start", b"", 0, 0)
126
+ idx -= 1
127
+ state = STATE_FIELD_NAME
128
+
129
+ elif state == STATE_FIELD_NAME:
130
+ sep_pos = data.find(AMPERSAND, idx)
131
+ if sep_pos == -1:
132
+ sep_pos = data.find(SEMICOLON, idx)
133
+
134
+ if sep_pos != -1:
135
+ equals_pos = data.find(EQUAL, idx, sep_pos)
136
+ else:
137
+ equals_pos = data.find(EQUAL, idx)
138
+
139
+ if equals_pos != -1:
140
+ self.callback("field_name", data, idx, equals_pos)
141
+ idx = equals_pos
142
+ state = STATE_FIELD_DATA
143
+
144
+ elif sep_pos == -1:
145
+ self.callback("field_name", data, idx, data_len)
146
+ idx = data_len
147
+
148
+ else:
149
+ self.callback("field_name", data, idx, sep_pos)
150
+ self.callback("field_end", b"", 0, 0)
151
+ idx = sep_pos - 1
152
+ state = STATE_BEFORE_FIELD
153
+
154
+ elif state == STATE_FIELD_DATA:
155
+ sep_pos = data.find(AMPERSAND, idx)
156
+ if sep_pos == -1:
157
+ sep_pos = data.find(SEMICOLON, idx)
158
+
159
+ if sep_pos == -1:
160
+ self.callback("field_data", data, idx, data_len)
161
+ idx = data_len
162
+
163
+ else:
164
+ self.callback("field_data", data, idx, sep_pos)
165
+ self.callback("field_end", b"", 0, 0)
166
+
167
+ idx = sep_pos - 1
168
+ state = STATE_BEFORE_FIELD
169
+
170
+ else:
171
+ raise ValueError(f"Reached an unknown state {state} at {idx}")
172
+
173
+ idx += 1
174
+
175
+ self.state = state
176
+ self.cursize += data_len
177
+
178
+ def finalize(self):
179
+ """Finalize this parser, which signals to that we are finished parsing,
180
+ if we're still in the middle of a field, an on_field_end callback, and
181
+ then the on_end callback.
182
+ """
183
+ # If we're currently in the middle of a field, we finish it.
184
+ if self.state == STATE_FIELD_DATA:
185
+ self.callback("field_end", b"", 0, 0)
186
+ self.callback("end", b"", 0, 0)
187
+
188
+
189
+ STATE_START = 0
190
+ STATE_START_BOUNDARY = 1
191
+ STATE_HEADER_FIELD_START = 2
192
+ STATE_HEADER_FIELD = 3
193
+ STATE_HEADER_VALUE_START = 4
194
+ STATE_HEADER_VALUE = 5
195
+ STATE_HEADER_VALUE_ALMOST_DONE = 6
196
+ STATE_HEADERS_ALMOST_DONE = 7
197
+ STATE_PART_DATA_START = 8
198
+ STATE_PART_DATA = 9
199
+ STATE_PART_DATA_END = 10
200
+ STATE_END = 11
201
+
202
+
203
+ class MultipartParser(BaseParser):
204
+ """This class is a streaming multipart/form-data parser.
205
+
206
+ .. list-table::
207
+ :widths: 15 10 30
208
+ :header-rows: 1
209
+
210
+ * - Callback Name
211
+ - Parameters
212
+ - Description
213
+ * - part_begin
214
+ - None
215
+ - Called when a new part of the multipart message is encountered.
216
+ * - part_data
217
+ - data, start, end
218
+ - Called when a portion of a part's data is encountered.
219
+ * - part_end
220
+ - None
221
+ - Called when the end of a part is reached.
222
+ * - header_begin
223
+ - None
224
+ - Called when we've found a new header in a part of a multipart
225
+ message
226
+ * - header_field
227
+ - data, start, end
228
+ - Called each time an additional portion of a header is read (i.e. the
229
+ part of the header that is before the colon; the "Foo" in
230
+ "Foo: Bar").
231
+ * - header_value
232
+ - data, start, end
233
+ - Called when we get data for a header.
234
+ * - header_end
235
+ - None
236
+ - Called when the current header is finished - i.e. we've reached the
237
+ newline at the end of the header.
238
+ * - headers_finished
239
+ - None
240
+ - Called when all headers are finished, and before the part data
241
+ starts.
242
+ * - end
243
+ - None
244
+ - Called when the parser is finished parsing all data.
245
+
246
+
247
+ :param boundary: The multipart boundary. This is required, and must match
248
+ what is given in the HTTP request - usually in the
249
+ Content-Type header.
250
+
251
+ :param callbacks: A dictionary of callbacks. See the documentation for
252
+ :class:`BaseParser`.
253
+
254
+ :param max_size: The maximum size of body to parse. Defaults to 0
255
+
256
+ """
257
+
258
+ __slots__ = (
259
+ "boundary",
260
+ "boundary_chars",
261
+ "callbacks",
262
+ "cursize",
263
+ "flags",
264
+ "header_field_pos",
265
+ "header_value_pos",
266
+ "index",
267
+ "lookbehind",
268
+ "max_size",
269
+ "part_data_pos",
270
+ "state",
271
+ )
272
+
273
+ def __init__(self, boundary, callbacks: dict, max_size: int = 0):
274
+ super().__init__(callbacks)
275
+ self.cursize = 0
276
+ self.max_size = max_size
277
+ self.state = STATE_START
278
+ self.index = self.flags = 0
279
+
280
+ self.header_field_pos = -1
281
+ self.header_value_pos = -1
282
+ self.part_data_pos = -1
283
+
284
+ if isinstance(boundary, str):
285
+ boundary = boundary.encode("latin-1")
286
+
287
+ self.boundary = b"\r\n--" + boundary
288
+
289
+ # Get a set of characters that belong to our boundary.
290
+ self.boundary_chars = frozenset(self.boundary)
291
+
292
+ # We also create a lookbehind list.
293
+ # Note: the +8 is since we can have, at maximum, "\r\n--" + boundary +
294
+ # "--\r\n" at the final boundary, and the length of '\r\n--' and
295
+ # '--\r\n' is 8 bytes.
296
+ self.lookbehind = [EMPTY for _ in range(len(boundary) + 8)]
297
+
298
+ def write(self, data): # noqa: C901, PLR0912, PLR0915
299
+ data_len = prune_data(len(data), self.cursize, self.max_size)
300
+
301
+ idx = 0
302
+ index = self.index
303
+ state = self.state
304
+ flags = self.flags
305
+ boundary = self.boundary
306
+ boundary_len = len(boundary)
307
+
308
+ while idx < data_len:
309
+ ch = data[idx]
310
+
311
+ if state == STATE_START_BOUNDARY:
312
+ # Check to ensure that the last 2 characters in our boundary
313
+ # are CRLF.
314
+ if index == boundary_len - 2:
315
+ if ch != CR:
316
+ raise ValueError(f"Did not find \\r at end of boundary ({idx})")
317
+ index += 1
318
+
319
+ elif index == boundary_len - 2 + 1:
320
+ if ch != LF:
321
+ raise ValueError(f"Did not find \\n at end of boundary ({idx})")
322
+
323
+ state = STATE_HEADER_FIELD_START
324
+ self.callback("part_begin", b"", 0, 0)
325
+
326
+ # Check to ensure our boundary matches
327
+ elif ch == boundary[index + 2]:
328
+ # Increment index into boundary and continue.
329
+ index += 1
330
+
331
+ else:
332
+ raise ValueError(
333
+ f"Did not find boundary character {ch:c} at index {idx}",
334
+ )
335
+
336
+ elif state == STATE_HEADER_FIELD_START:
337
+ # Mark the start of a header field here, reset the index, and
338
+ # continue parsing our header field.
339
+ index = 0
340
+ self.header_field_pos = idx
341
+ idx -= 1
342
+ state = STATE_HEADER_FIELD
343
+
344
+ elif state == STATE_HEADER_FIELD:
345
+ # If we've reached a CR at the beginning of a header, it means
346
+ # that we've reached the second of 2 newlines, and so there are
347
+ # no more headers to parse.
348
+ if ch == CR:
349
+ self.header_field_pos = -1
350
+ state = STATE_HEADERS_ALMOST_DONE
351
+ idx += 1
352
+ continue
353
+
354
+ index += 1
355
+
356
+ # If we've reached a colon, we're done with this header.
357
+ if ch == COLON:
358
+ # A 0-length header is an error.
359
+ if index == 1:
360
+ raise ValueError(f"Found 0-length header at {idx}")
361
+
362
+ # Call our callback with the header field.
363
+ if self.header_field_pos != -1:
364
+ self.callback("header_field", data, self.header_field_pos, idx)
365
+ self.header_field_pos = -1
366
+
367
+ # Move to parsing the header value.
368
+ state = STATE_HEADER_VALUE_START
369
+
370
+ elif state == STATE_HEADER_VALUE_START:
371
+ # Skip leading spaces.
372
+ if ch != SPACE:
373
+ # Mark the start of the header value.
374
+ self.header_value_pos = idx
375
+ idx -= 1
376
+ # Move to the header-value state, reprocessing this character.
377
+ state = STATE_HEADER_VALUE
378
+
379
+ elif state == STATE_HEADER_VALUE:
380
+ # If we've got a CR, we're nearly done our headers. Otherwise,
381
+ # we do nothing and just move past this character.
382
+ if ch == CR:
383
+ if self.header_value_pos != -1:
384
+ self.callback("header_value", data, self.header_value_pos, idx)
385
+ self.header_value_pos = -1
386
+
387
+ self.callback("header_end", b"", 0, 0)
388
+ state = STATE_HEADER_VALUE_ALMOST_DONE
389
+
390
+ elif state == STATE_HEADER_VALUE_ALMOST_DONE:
391
+ # The last character should be a LF. If not, it's an error.
392
+ if ch != LF:
393
+ raise ValueError(
394
+ f"Did not find \\n at end of header (found {ch:c})",
395
+ )
396
+
397
+ # Move back to the start of another header. Note that if that
398
+ # state detects ANOTHER newline, it'll trigger the end of our
399
+ # headers.
400
+ state = STATE_HEADER_FIELD_START
401
+
402
+ elif state == STATE_HEADERS_ALMOST_DONE:
403
+ # We're almost done our headers. This is reached when we parse
404
+ # a CR at the beginning of a header, so our next character
405
+ # should be a LF, or it's an error.
406
+ if ch != LF:
407
+ raise ValueError(
408
+ f"Did not find \\n at end of headers (found {ch:c})",
409
+ )
410
+
411
+ self.callback("headers_finished", b"", 0, 0)
412
+ # Mark the start of our part data.
413
+ self.part_data_pos = idx + 1
414
+ state = STATE_PART_DATA
415
+
416
+ elif state == STATE_PART_DATA:
417
+ # We're processing our part data right now. During this, we
418
+ # need to efficiently search for our boundary, since any data
419
+ # on any number of lines can be a part of the current data.
420
+ # We use the Boyer-Moore-Horspool algorithm to efficiently
421
+ # search through the remainder of the buffer looking for our
422
+ # boundary.
423
+
424
+ # Save the current value of our index. We use this in case we
425
+ # find part of a boundary, but it doesn't match fully.
426
+ prev_index = index
427
+
428
+ # Set up variables.
429
+ boundary_end = boundary_len - 1
430
+ boundary_chars = self.boundary_chars
431
+
432
+ # If our index is 0, we're starting a new part, so start our
433
+ # search.
434
+ if index == 0:
435
+ # Search forward until we either hit the end of our buffer,
436
+ # or reach a character that's in our boundary.
437
+ idx += boundary_end
438
+ while idx < data_len - 1 and data[idx] not in boundary_chars:
439
+ idx += boundary_len
440
+
441
+ # Reset i back the length of our boundary, which is the
442
+ # earliest possible location that could be our match (i.e.
443
+ # if we've just broken out of our loop since we saw the
444
+ # last character in our boundary)
445
+ idx -= boundary_end
446
+ ch = data[idx]
447
+
448
+ # Now, we have a couple of cases here. If our index is before
449
+ # the end of the boundary...
450
+ if index < boundary_len:
451
+ # If the character matches...
452
+ if boundary[index] == ch:
453
+ # If we found a match for our boundary, we send the
454
+ # existing data.
455
+ if index == 0 and self.part_data_pos != -1:
456
+ self.callback("part_data", data, self.part_data_pos, idx)
457
+ self.part_data_pos = -1
458
+
459
+ # The current character matches, so continue!
460
+ index += 1
461
+ else:
462
+ index = 0
463
+
464
+ # Our index is equal to the length of our boundary!
465
+ elif index == boundary_len:
466
+ # First we increment it.
467
+ index += 1
468
+
469
+ # Now, if we've reached a newline, we need to set this as
470
+ # the potential end of our boundary.
471
+ if ch == CR:
472
+ flags |= FLAG_PART_BOUNDARY
473
+
474
+ # Otherwise, if this is a hyphen, we might be at the last
475
+ # of all boundaries.
476
+ elif ch == HYPHEN:
477
+ flags |= FLAG_LAST_BOUNDARY
478
+
479
+ # Otherwise, we reset our index, since this isn't either a
480
+ # newline or a hyphen.
481
+ else:
482
+ index = 0
483
+
484
+ # Our index is right after the part boundary, which should be
485
+ # a LF.
486
+ elif index == boundary_len + 1:
487
+ # If we're at a part boundary (i.e. we've seen a CR
488
+ # character already)...
489
+ if flags & FLAG_PART_BOUNDARY:
490
+ # We need a LF character next.
491
+ if ch == LF:
492
+ # Unset the part boundary flag.
493
+ flags &= ~FLAG_PART_BOUNDARY
494
+
495
+ # Callback indicating that we've reached the end of
496
+ # a part, and are starting a new one.
497
+ self.callback("part_end", b"", 0, 0)
498
+ self.callback("part_begin", b"", 0, 0)
499
+
500
+ # Move to parsing new headers.
501
+ index = 0
502
+ state = STATE_HEADER_FIELD_START
503
+ idx += 1
504
+ continue
505
+
506
+ # We didn't find an LF character, so no match. Reset
507
+ # our index and clear our flag.
508
+ index = 0
509
+ flags &= ~FLAG_PART_BOUNDARY
510
+
511
+ # Otherwise, if we're at the last boundary (i.e. we've
512
+ # seen a hyphen already)...
513
+ elif flags & FLAG_LAST_BOUNDARY:
514
+ # We need a second hyphen here.
515
+ if ch == HYPHEN:
516
+ # Callback to end the current part, and then the
517
+ # message.
518
+ self.callback("part_end", b"", 0, 0)
519
+ self.callback("end", b"", 0, 0)
520
+ state = STATE_END
521
+ else:
522
+ # No match, so reset index.
523
+ index = 0
524
+
525
+ # If we have an index, we need to keep this byte for later, in
526
+ # case we can't match the full boundary.
527
+ if index > 0:
528
+ self.lookbehind[index - 1] = ch
529
+
530
+ # Otherwise, our index is 0. If the previous index is not, it
531
+ # means we reset something, and we need to take the data we
532
+ # thought was part of our boundary and send it along as actual
533
+ # data.
534
+ elif prev_index > 0:
535
+ # Callback to write the saved data.
536
+ lb_data = bytes(self.lookbehind)
537
+ self.callback("part_data", lb_data, 0, prev_index)
538
+
539
+ # Overwrite our previous index.
540
+ prev_index = 0
541
+
542
+ # Re-set our mark for part data.
543
+ self.part_data_pos = idx
544
+
545
+ # Re-consider the current character, since this could be
546
+ # the start of the boundary itself.
547
+ idx -= 1
548
+
549
+ elif state == STATE_START:
550
+ # Skip leading newlines
551
+ if ch not in (CR, LF):
552
+ # Move to the next state, but decrement i so that we re-process
553
+ # this character.
554
+ idx -= 1
555
+ state = STATE_START_BOUNDARY
556
+
557
+ elif state == STATE_END:
558
+ # Do nothing and just consume a byte in the end state.
559
+ pass
560
+
561
+ else:
562
+ raise ValueError(f"Reached an unknown state {state} at {idx}")
563
+
564
+ # Move to the next byte.
565
+ idx += 1
566
+
567
+ if self.header_field_pos != -1:
568
+ self.callback("header_field", data, self.header_field_pos, data_len)
569
+ self.header_field_pos = 0
570
+
571
+ if self.header_value_pos != -1:
572
+ self.callback("header_value", data, self.header_value_pos, data_len)
573
+ self.header_value_pos = 0
574
+
575
+ if self.part_data_pos != -1:
576
+ self.callback("part_data", data, self.part_data_pos, data_len)
577
+ self.part_data_pos = 0
578
+
579
+ self.index = index
580
+ self.state = state
581
+ self.flags = flags
582
+ self.cursize += data_len
583
+
584
+
585
+ def prune_data(data_len: int, cursize: int, max_size: int) -> int:
586
+ if max_size and (cursize + data_len) > max_size:
587
+ return max_size - cursize
588
+
589
+ return data_len