asgi-tools 1.2.0__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,565 @@
1
+ """The code is based on a great work of Andrew Dunham
2
+ (https://github.com/andrew-d/python-multipart) and has been changed to improve speed.
3
+
4
+ The original code is licensed by Apache2 license.
5
+ """
6
+ # Flags for the multipart parser.
7
+ cdef unsigned char FLAG_PART_BOUNDARY = 1
8
+ cdef unsigned char FLAG_LAST_BOUNDARY = 2
9
+
10
+ # Get constants. Since iterating over a str on Python 2 gives you a 1-length
11
+ # string, but iterating over a bytes object on Python 3 gives you an integer,
12
+ # we need to save these constants.
13
+ cdef char AMPERSAND = b'&'
14
+ cdef char COLON = b':'
15
+ cdef char CR = b'\r'
16
+ cdef char EQUAL = b'='
17
+ cdef char HYPHEN = b'-'
18
+ cdef char LF = b'\n'
19
+ cdef char SEMICOLON = b';'
20
+ cdef char SPACE = b' '
21
+ cdef char EMPTY = b'\x00'
22
+
23
+
24
+ cdef class BaseParser:
25
+ """This class is the base class for all parsers. It contains the logic for
26
+ calling and adding callbacks.
27
+
28
+ A callback can be one of two different forms. "Notification callbacks" are
29
+ callbacks that are called when something happens - for example, when a new
30
+ part of a multipart message is encountered by the parser. "Data callbacks"
31
+ are called when we get some sort of data - for example, part of the body of
32
+ a multipart chunk. Notification callbacks are called with no parameters,
33
+ whereas data callbacks are called with three, as follows::
34
+
35
+ data_callback(data, start, end)
36
+
37
+ The "data" parameter is a bytestring. "start" and "end" are integer indexes into the "data"
38
+ string that represent the data of interest. Thus, in a data callback, the slice
39
+ `data[start:end]` represents the data that the callback is "interested in". The callback is
40
+ not passed a copy of the data, since copying severely hurts performance.
41
+
42
+ """
43
+
44
+ def __init__(self, dict callbacks):
45
+ self.callbacks = callbacks
46
+
47
+ cdef void callback(self, str name, bytes data, int start, int end) except *:
48
+ try:
49
+ func = self.callbacks[name]
50
+ func(data, start, end)
51
+ except KeyError:
52
+ pass
53
+
54
+ cpdef void write(self, bytes data) except *:
55
+ pass
56
+
57
+ cpdef void finalize(self):
58
+ pass
59
+
60
+
61
+ cdef unsigned char STATE_BEFORE_FIELD = 0
62
+ cdef unsigned char STATE_FIELD_NAME = 1
63
+ cdef unsigned char STATE_FIELD_DATA = 2
64
+
65
+
66
+ cdef class QueryStringParser(BaseParser):
67
+ """this is a streaming querystring parser. it will consume data, and call
68
+ the callbacks given when it has data.
69
+
70
+ .. list-table::
71
+ :widths: 15 10 30
72
+ :header-rows: 1
73
+
74
+ * - callback name
75
+ - parameters
76
+ - description
77
+ * - field_start
78
+ - none
79
+ - called when a new field is encountered.
80
+ * - field_name
81
+ - data, start, end
82
+ - called when a portion of a field's name is encountered.
83
+ * - field_data
84
+ - data, start, end
85
+ - called when a portion of a field's data is encountered.
86
+ * - field_end
87
+ - none
88
+ - called when the end of a field is encountered.
89
+ * - end
90
+ - none
91
+ - called when the parser is finished parsing all data.
92
+
93
+ :param callbacks: a dictionary of callbacks. see the documentation for
94
+ :class:`baseparser`.
95
+
96
+ :param max_size: the maximum size of body to parse. defaults to 0
97
+ """
98
+
99
+ def __init__(self, dict callbacks, unsigned int max_size=0):
100
+ self.callbacks = callbacks
101
+ self.cursize = 0
102
+ self.max_size = max_size
103
+ self.state = STATE_BEFORE_FIELD
104
+
105
+ cpdef void write(self, bytes data) except *:
106
+ cdef int data_len = prune_data(len(data), self.cursize, self.max_size)
107
+ cdef int idx = 0
108
+ cdef unsigned char state = self.state
109
+ cdef char ch
110
+ cdef int sep_pos, equals_pos
111
+
112
+ while idx < data_len:
113
+ ch = data[idx]
114
+ if state == STATE_BEFORE_FIELD:
115
+
116
+ if not (ch == AMPERSAND or ch == SEMICOLON):
117
+ self.callback('field_start', b'', 0, 0)
118
+ idx -= 1
119
+ state = STATE_FIELD_NAME
120
+
121
+ elif state == STATE_FIELD_NAME:
122
+ sep_pos = data.find(AMPERSAND, idx)
123
+ if sep_pos == -1:
124
+ sep_pos = data.find(SEMICOLON, idx)
125
+
126
+ if sep_pos != -1:
127
+ equals_pos = data.find(EQUAL, idx, sep_pos)
128
+ else:
129
+ equals_pos = data.find(EQUAL, idx)
130
+
131
+ if equals_pos != -1:
132
+ self.callback('field_name', data, idx, equals_pos)
133
+ idx = equals_pos
134
+ state = STATE_FIELD_DATA
135
+
136
+ elif sep_pos == -1:
137
+ self.callback('field_name', data, idx, data_len)
138
+ idx = data_len
139
+
140
+ else:
141
+ self.callback('field_name', data, idx, sep_pos)
142
+ self.callback('field_end', b'', 0, 0)
143
+ idx = sep_pos - 1
144
+ state = STATE_BEFORE_FIELD
145
+
146
+ elif state == STATE_FIELD_DATA:
147
+ sep_pos = data.find(AMPERSAND, idx)
148
+ if sep_pos == -1:
149
+ sep_pos = data.find(SEMICOLON, idx)
150
+
151
+ if sep_pos == -1:
152
+ self.callback('field_data', data, idx, data_len)
153
+ idx = data_len
154
+
155
+ else:
156
+ self.callback('field_data', data, idx, sep_pos)
157
+ self.callback('field_end', b'', 0, 0)
158
+
159
+ idx = sep_pos - 1
160
+ state = STATE_BEFORE_FIELD
161
+
162
+ else:
163
+ raise ValueError(f"Reached an unknown state {state} at {idx}")
164
+
165
+ idx += 1
166
+
167
+ self.state = state
168
+ self.cursize += data_len
169
+
170
+ cpdef void finalize(self):
171
+ """Finalize this parser, which signals to that we are finished parsing,
172
+ if we're still in the middle of a field, an on_field_end callback, and
173
+ then the on_end callback.
174
+ """
175
+ # If we're currently in the middle of a field, we finish it.
176
+ if self.state == STATE_FIELD_DATA:
177
+ self.callback('field_end', b'', 0, 0)
178
+ self.callback('end', b'', 0, 0)
179
+
180
+
181
+ cdef unsigned char STATE_START = 0
182
+ cdef unsigned char STATE_START_BOUNDARY = 1
183
+ cdef unsigned char STATE_HEADER_FIELD_START = 2
184
+ cdef unsigned char STATE_HEADER_FIELD = 3
185
+ cdef unsigned char STATE_HEADER_VALUE_START = 4
186
+ cdef unsigned char STATE_HEADER_VALUE = 5
187
+ cdef unsigned char STATE_HEADER_VALUE_ALMOST_DONE = 6
188
+ cdef unsigned char STATE_HEADERS_ALMOST_DONE = 7
189
+ cdef unsigned char STATE_PART_DATA_START = 8
190
+ cdef unsigned char STATE_PART_DATA = 9
191
+ cdef unsigned char STATE_PART_DATA_END = 10
192
+ cdef unsigned char STATE_END = 11
193
+
194
+
195
+ cdef class MultipartParser(BaseParser):
196
+ """This class is a streaming multipart/form-data parser.
197
+
198
+ .. list-table::
199
+ :widths: 15 10 30
200
+ :header-rows: 1
201
+
202
+ * - Callback Name
203
+ - Parameters
204
+ - Description
205
+ * - part_begin
206
+ - None
207
+ - Called when a new part of the multipart message is encountered.
208
+ * - part_data
209
+ - data, start, end
210
+ - Called when a portion of a part's data is encountered.
211
+ * - part_end
212
+ - None
213
+ - Called when the end of a part is reached.
214
+ * - header_begin
215
+ - None
216
+ - Called when we've found a new header in a part of a multipart
217
+ message
218
+ * - header_field
219
+ - data, start, end
220
+ - Called each time an additional portion of a header is read (i.e. the
221
+ part of the header that is before the colon; the "Foo" in
222
+ "Foo: Bar").
223
+ * - header_value
224
+ - data, start, end
225
+ - Called when we get data for a header.
226
+ * - header_end
227
+ - None
228
+ - Called when the current header is finished - i.e. we've reached the
229
+ newline at the end of the header.
230
+ * - headers_finished
231
+ - None
232
+ - Called when all headers are finished, and before the part data
233
+ starts.
234
+ * - end
235
+ - None
236
+ - Called when the parser is finished parsing all data.
237
+
238
+
239
+ :param boundary: The multipart boundary. This is required, and must match
240
+ what is given in the HTTP request - usually in the
241
+ Content-Type header.
242
+
243
+ :param callbacks: A dictionary of callbacks. See the documentation for
244
+ :class:`BaseParser`.
245
+
246
+ :param max_size: The maximum size of body to parse. Defaults to 0
247
+ """
248
+
249
+ def __init__(self, object boundary, dict callbacks, unsigned int max_size=0):
250
+ self.callbacks = callbacks
251
+ self.cursize = 0
252
+ self.max_size = max_size
253
+ self.state = STATE_START
254
+ self.index = self.flags = 0
255
+
256
+ self.header_field_pos = -1
257
+ self.header_value_pos = -1
258
+ self.part_data_pos = -1
259
+
260
+ if isinstance(boundary, str):
261
+ boundary = boundary.encode('latin-1')
262
+
263
+ self.boundary = b'\r\n--' + boundary
264
+
265
+ # Get a set of characters that belong to our boundary.
266
+ self.boundary_chars = frozenset(self.boundary)
267
+
268
+ # We also create a lookbehind list.
269
+ # Note: the +8 is since we can have, at maximum, "\r\n--" + boundary +
270
+ # "--\r\n" at the final boundary, and the length of '\r\n--' and
271
+ # '--\r\n' is 8 bytes.
272
+ self.lookbehind = [EMPTY for x in range(len(boundary) + 8)]
273
+
274
+ cpdef void write(self, bytes data) except *: # noqa
275
+ cdef int data_len = prune_data(len(data), self.cursize, self.max_size)
276
+
277
+ cdef int idx = 0
278
+ cdef unsigned int index = self.index
279
+ cdef unsigned char state = self.state
280
+ cdef short flags = self.flags
281
+ cdef bytes boundary = self.boundary
282
+ cdef unsigned int boundary_len = len(boundary)
283
+ cdef char ch
284
+ cdef int boundary_end, prev_index
285
+
286
+ while idx < data_len:
287
+ ch = data[idx]
288
+
289
+ if state == STATE_START_BOUNDARY:
290
+ # Check to ensure that the last 2 characters in our boundary
291
+ # are CRLF.
292
+ if index == boundary_len - 2:
293
+ if ch != CR:
294
+ raise ValueError(f"Did not find \\r at end of boundary ({idx})")
295
+ index += 1
296
+
297
+ elif index == boundary_len - 2 + 1:
298
+ if ch != LF:
299
+ raise ValueError(f"Did not find \\n at end of boundary ({idx})")
300
+
301
+ state = STATE_HEADER_FIELD_START
302
+ self.callback('part_begin', b'', 0, 0)
303
+
304
+ # Check to ensure our boundary matches
305
+ elif ch == boundary[index + 2]:
306
+ # Increment index into boundary and continue.
307
+ index += 1
308
+
309
+ else:
310
+ raise ValueError(f"Did not find boundary character {chr(ch)} at index {idx}")
311
+
312
+ elif state == STATE_HEADER_FIELD_START:
313
+ # Mark the start of a header field here, reset the index, and
314
+ # continue parsing our header field.
315
+ index = 0
316
+ self.header_field_pos = idx
317
+ idx -= 1
318
+ state = STATE_HEADER_FIELD
319
+
320
+ elif state == STATE_HEADER_FIELD:
321
+ # If we've reached a CR at the beginning of a header, it means
322
+ # that we've reached the second of 2 newlines, and so there are
323
+ # no more headers to parse.
324
+ if ch == CR:
325
+ self.header_field_pos = -1
326
+ state = STATE_HEADERS_ALMOST_DONE
327
+ idx += 1
328
+ continue
329
+
330
+ index += 1
331
+
332
+ # If we've reached a colon, we're done with this header.
333
+ if ch == COLON:
334
+ # A 0-length header is an error.
335
+ if index == 1:
336
+ raise ValueError(f"Found 0-length header at {idx}")
337
+
338
+ # Call our callback with the header field.
339
+ if self.header_field_pos != -1:
340
+ self.callback('header_field', data, self.header_field_pos, idx)
341
+ self.header_field_pos = -1
342
+
343
+ # Move to parsing the header value.
344
+ state = STATE_HEADER_VALUE_START
345
+
346
+ elif state == STATE_HEADER_VALUE_START:
347
+ # Skip leading spaces.
348
+ if ch != SPACE:
349
+ # Mark the start of the header value.
350
+ self.header_value_pos = idx
351
+ idx -= 1
352
+ # Move to the header-value state, reprocessing this character.
353
+ state = STATE_HEADER_VALUE
354
+
355
+ elif state == STATE_HEADER_VALUE:
356
+ # If we've got a CR, we're nearly done our headers. Otherwise,
357
+ # we do nothing and just move past this character.
358
+ if ch == CR:
359
+ if self.header_value_pos != -1:
360
+ self.callback('header_value', data, self.header_value_pos, idx)
361
+ self.header_value_pos = -1
362
+
363
+ self.callback('header_end', b'', 0, 0)
364
+ state = STATE_HEADER_VALUE_ALMOST_DONE
365
+
366
+ elif state == STATE_HEADER_VALUE_ALMOST_DONE:
367
+ # The last character should be a LF. If not, it's an error.
368
+ if ch != LF:
369
+ raise ValueError(f"Did not find \\n at end of header (found {chr(ch)})")
370
+
371
+ # Move back to the start of another header. Note that if that
372
+ # state detects ANOTHER newline, it'll trigger the end of our
373
+ # headers.
374
+ state = STATE_HEADER_FIELD_START
375
+
376
+ elif state == STATE_HEADERS_ALMOST_DONE:
377
+ # We're almost done our headers. This is reached when we parse
378
+ # a CR at the beginning of a header, so our next character
379
+ # should be a LF, or it's an error.
380
+ if ch != LF:
381
+ raise ValueError(f"Did not find \\n at end of headers (found {chr(ch)})")
382
+
383
+ self.callback('headers_finished', b'', 0, 0)
384
+ # Mark the start of our part data.
385
+ self.part_data_pos = idx + 1
386
+ state = STATE_PART_DATA
387
+
388
+ elif state == STATE_PART_DATA:
389
+ # We're processing our part data right now. During this, we
390
+ # need to efficiently search for our boundary, since any data
391
+ # on any number of lines can be a part of the current data.
392
+ # We use the Boyer-Moore-Horspool algorithm to efficiently
393
+ # search through the remainder of the buffer looking for our
394
+ # boundary.
395
+
396
+ # Save the current value of our index. We use this in case we
397
+ # find part of a boundary, but it doesn't match fully.
398
+ prev_index = index
399
+
400
+ # Set up variables.
401
+ boundary_end = boundary_len - 1
402
+ boundary_chars = self.boundary_chars
403
+
404
+ # If our index is 0, we're starting a new part, so start our
405
+ # search.
406
+ if index == 0:
407
+ # Search forward until we either hit the end of our buffer,
408
+ # or reach a character that's in our boundary.
409
+ idx += boundary_end
410
+ while idx < data_len - 1 and data[idx] not in boundary_chars:
411
+ idx += boundary_len
412
+
413
+ # Reset i back the length of our boundary, which is the
414
+ # earliest possible location that could be our match (i.e.
415
+ # if we've just broken out of our loop since we saw the
416
+ # last character in our boundary)
417
+ idx -= boundary_end
418
+ ch = data[idx]
419
+
420
+ # Now, we have a couple of cases here. If our index is before
421
+ # the end of the boundary...
422
+ if index < boundary_len:
423
+ # If the character matches...
424
+ if boundary[index] == ch:
425
+ # If we found a match for our boundary, we send the
426
+ # existing data.
427
+ if index == 0 and self.part_data_pos != -1:
428
+ self.callback('part_data', data, self.part_data_pos, idx)
429
+ self.part_data_pos = -1
430
+
431
+ # The current character matches, so continue!
432
+ index += 1
433
+ else:
434
+ index = 0
435
+
436
+ # Our index is equal to the length of our boundary!
437
+ elif index == boundary_len:
438
+ # First we increment it.
439
+ index += 1
440
+
441
+ # Now, if we've reached a newline, we need to set this as
442
+ # the potential end of our boundary.
443
+ if ch == CR:
444
+ flags |= FLAG_PART_BOUNDARY
445
+
446
+ # Otherwise, if this is a hyphen, we might be at the last
447
+ # of all boundaries.
448
+ elif ch == HYPHEN:
449
+ flags |= FLAG_LAST_BOUNDARY
450
+
451
+ # Otherwise, we reset our index, since this isn't either a
452
+ # newline or a hyphen.
453
+ else:
454
+ index = 0
455
+
456
+ # Our index is right after the part boundary, which should be
457
+ # a LF.
458
+ elif index == boundary_len + 1:
459
+ # If we're at a part boundary (i.e. we've seen a CR
460
+ # character already)...
461
+ if flags & FLAG_PART_BOUNDARY:
462
+ # We need a LF character next.
463
+ if ch == LF:
464
+ # Unset the part boundary flag.
465
+ flags &= (~FLAG_PART_BOUNDARY)
466
+
467
+ # Callback indicating that we've reached the end of
468
+ # a part, and are starting a new one.
469
+ self.callback('part_end', b'', 0, 0)
470
+ self.callback('part_begin', b'', 0, 0)
471
+
472
+ # Move to parsing new headers.
473
+ index = 0
474
+ state = STATE_HEADER_FIELD_START
475
+ idx += 1
476
+ continue
477
+
478
+ # We didn't find an LF character, so no match. Reset
479
+ # our index and clear our flag.
480
+ index = 0
481
+ flags &= (~FLAG_PART_BOUNDARY)
482
+
483
+ # Otherwise, if we're at the last boundary (i.e. we've
484
+ # seen a hyphen already)...
485
+ elif flags & FLAG_LAST_BOUNDARY:
486
+ # We need a second hyphen here.
487
+ if ch == HYPHEN:
488
+ # Callback to end the current part, and then the
489
+ # message.
490
+ self.callback('part_end', b'', 0, 0)
491
+ self.callback('end', b'', 0, 0)
492
+ state = STATE_END
493
+ else:
494
+ # No match, so reset index.
495
+ index = 0
496
+
497
+ # If we have an index, we need to keep this byte for later, in
498
+ # case we can't match the full boundary.
499
+ if index > 0:
500
+ self.lookbehind[index - 1] = ch
501
+
502
+ # Otherwise, our index is 0. If the previous index is not, it
503
+ # means we reset something, and we need to take the data we
504
+ # thought was part of our boundary and send it along as actual
505
+ # data.
506
+ elif prev_index > 0:
507
+ # Callback to write the saved data.
508
+ lb_data = bytes(self.lookbehind)
509
+ self.callback('part_data', lb_data, 0, prev_index)
510
+
511
+ # Overwrite our previous index.
512
+ prev_index = 0
513
+
514
+ # Re-set our mark for part data.
515
+ self.part_data_pos = idx
516
+
517
+ # Re-consider the current character, since this could be
518
+ # the start of the boundary itself.
519
+ idx -= 1
520
+
521
+ elif state == STATE_START:
522
+ # Skip leading newlines
523
+ if not (ch == CR or ch == LF):
524
+
525
+ # Move to the next state, but decrement i so that we re-process
526
+ # this character.
527
+ idx -= 1
528
+ state = STATE_START_BOUNDARY
529
+
530
+ elif state == STATE_END:
531
+ # Do nothing and just consume a byte in the end state.
532
+ pass
533
+
534
+ else:
535
+ raise ValueError(f"Reached an unknown state {state} at {idx}")
536
+
537
+ # Move to the next byte.
538
+ idx += 1
539
+
540
+ if self.header_field_pos != -1:
541
+ self.callback('header_field', data, self.header_field_pos, data_len)
542
+ self.header_field_pos = 0
543
+
544
+ if self.header_value_pos != -1:
545
+ self.callback('header_value', data, self.header_value_pos, data_len)
546
+ self.header_value_pos = 0
547
+
548
+ if self.part_data_pos != -1:
549
+ self.callback('part_data', data, self.part_data_pos, data_len)
550
+ self.part_data_pos = 0
551
+
552
+ self.index = index
553
+ self.state = state
554
+ self.flags = flags
555
+ self.cursize += data_len
556
+
557
+
558
+ cdef int prune_data(int data_len, int cursize, int max_size):
559
+ if max_size and (cursize + data_len) > max_size:
560
+ data_len = max_size - cursize
561
+
562
+ return data_len
563
+
564
+
565
+ # pylama:ignore=D,E221
asgi_tools/py.typed ADDED
File without changes