noderb-http 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1632 @@
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include <http_parser.h>
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+
28
+
29
+ #ifndef MIN
30
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
31
+ #endif
32
+
33
+
34
+ #define CALLBACK2(FOR) \
35
+ do { \
36
+ if (settings->on_##FOR) { \
37
+ if (0 != settings->on_##FOR(parser)) return (p - data); \
38
+ } \
39
+ } while (0)
40
+
41
+
42
+ #define MARK(FOR) \
43
+ do { \
44
+ FOR##_mark = p; \
45
+ } while (0)
46
+
47
+ #define CALLBACK_NOCLEAR(FOR) \
48
+ do { \
49
+ if (FOR##_mark) { \
50
+ if (settings->on_##FOR) { \
51
+ if (0 != settings->on_##FOR(parser, \
52
+ FOR##_mark, \
53
+ p - FOR##_mark)) \
54
+ { \
55
+ return (p - data); \
56
+ } \
57
+ } \
58
+ } \
59
+ } while (0)
60
+
61
+
62
+ #define CALLBACK(FOR) \
63
+ do { \
64
+ CALLBACK_NOCLEAR(FOR); \
65
+ FOR##_mark = NULL; \
66
+ } while (0)
67
+
68
+
69
+ #define PROXY_CONNECTION "proxy-connection"
70
+ #define CONNECTION "connection"
71
+ #define CONTENT_LENGTH "content-length"
72
+ #define TRANSFER_ENCODING "transfer-encoding"
73
+ #define UPGRADE "upgrade"
74
+ #define CHUNKED "chunked"
75
+ #define KEEP_ALIVE "keep-alive"
76
+ #define CLOSE "close"
77
+
78
+
79
+ static const char *method_strings[] =
80
+ { "DELETE"
81
+ , "GET"
82
+ , "HEAD"
83
+ , "POST"
84
+ , "PUT"
85
+ , "CONNECT"
86
+ , "OPTIONS"
87
+ , "TRACE"
88
+ , "COPY"
89
+ , "LOCK"
90
+ , "MKCOL"
91
+ , "MOVE"
92
+ , "PROPFIND"
93
+ , "PROPPATCH"
94
+ , "UNLOCK"
95
+ , "REPORT"
96
+ , "MKACTIVITY"
97
+ , "CHECKOUT"
98
+ , "MERGE"
99
+ , "M-SEARCH"
100
+ , "NOTIFY"
101
+ , "SUBSCRIBE"
102
+ , "UNSUBSCRIBE"
103
+ , "PATCH"
104
+ };
105
+
106
+
107
+ /* Tokens as defined by rfc 2616. Also lowercases them.
108
+ * token = 1*<any CHAR except CTLs or separators>
109
+ * separators = "(" | ")" | "<" | ">" | "@"
110
+ * | "," | ";" | ":" | "\" | <">
111
+ * | "/" | "[" | "]" | "?" | "="
112
+ * | "{" | "}" | SP | HT
113
+ */
114
+ static const char tokens[256] = {
115
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
116
+ 0, 0, 0, 0, 0, 0, 0, 0,
117
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
118
+ 0, 0, 0, 0, 0, 0, 0, 0,
119
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
120
+ 0, 0, 0, 0, 0, 0, 0, 0,
121
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
122
+ 0, 0, 0, 0, 0, 0, 0, 0,
123
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
124
+ ' ', '!', '"', '#', '$', '%', '&', '\'',
125
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
126
+ 0, 0, '*', '+', 0, '-', '.', '/',
127
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
128
+ '0', '1', '2', '3', '4', '5', '6', '7',
129
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
130
+ '8', '9', 0, 0, 0, 0, 0, 0,
131
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
132
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
133
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
134
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
135
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
136
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
137
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
138
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
139
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
140
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
141
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
142
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
143
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
144
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
145
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
146
+ 'x', 'y', 'z', 0, '|', '}', '~', 0 };
147
+
148
+
149
+ static const int8_t unhex[256] =
150
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
151
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
152
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
153
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
154
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
155
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
156
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
157
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
158
+ };
159
+
160
+
161
+ static const uint8_t normal_url_char[256] = {
162
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
163
+ 0, 0, 0, 0, 0, 0, 0, 0,
164
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
165
+ 0, 0, 0, 0, 0, 0, 0, 0,
166
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
167
+ 0, 0, 0, 0, 0, 0, 0, 0,
168
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
169
+ 0, 0, 0, 0, 0, 0, 0, 0,
170
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
171
+ 0, 1, 1, 0, 1, 1, 1, 1,
172
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
173
+ 1, 1, 1, 1, 1, 1, 1, 1,
174
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
175
+ 1, 1, 1, 1, 1, 1, 1, 1,
176
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
177
+ 1, 1, 1, 1, 1, 1, 1, 0,
178
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
179
+ 1, 1, 1, 1, 1, 1, 1, 1,
180
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
181
+ 1, 1, 1, 1, 1, 1, 1, 1,
182
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
183
+ 1, 1, 1, 1, 1, 1, 1, 1,
184
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
185
+ 1, 1, 1, 1, 1, 1, 1, 1,
186
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
187
+ 1, 1, 1, 1, 1, 1, 1, 1,
188
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
189
+ 1, 1, 1, 1, 1, 1, 1, 1,
190
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
191
+ 1, 1, 1, 1, 1, 1, 1, 1,
192
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
193
+ 1, 1, 1, 1, 1, 1, 1, 0, };
194
+
195
+
196
+ enum state
197
+ { s_dead = 1 /* important that this is > 0 */
198
+
199
+ , s_start_req_or_res
200
+ , s_res_or_resp_H
201
+ , s_start_res
202
+ , s_res_H
203
+ , s_res_HT
204
+ , s_res_HTT
205
+ , s_res_HTTP
206
+ , s_res_first_http_major
207
+ , s_res_http_major
208
+ , s_res_first_http_minor
209
+ , s_res_http_minor
210
+ , s_res_first_status_code
211
+ , s_res_status_code
212
+ , s_res_status
213
+ , s_res_line_almost_done
214
+
215
+ , s_start_req
216
+
217
+ , s_req_method
218
+ , s_req_spaces_before_url
219
+ , s_req_schema
220
+ , s_req_schema_slash
221
+ , s_req_schema_slash_slash
222
+ , s_req_host
223
+ , s_req_port
224
+ , s_req_path
225
+ , s_req_query_string_start
226
+ , s_req_query_string
227
+ , s_req_fragment_start
228
+ , s_req_fragment
229
+ , s_req_http_start
230
+ , s_req_http_H
231
+ , s_req_http_HT
232
+ , s_req_http_HTT
233
+ , s_req_http_HTTP
234
+ , s_req_first_http_major
235
+ , s_req_http_major
236
+ , s_req_first_http_minor
237
+ , s_req_http_minor
238
+ , s_req_line_almost_done
239
+
240
+ , s_header_field_start
241
+ , s_header_field
242
+ , s_header_value_start
243
+ , s_header_value
244
+
245
+ , s_header_almost_done
246
+
247
+ , s_chunk_size_start
248
+ , s_chunk_size
249
+ , s_chunk_parameters
250
+ , s_chunk_size_almost_done
251
+
252
+ , s_headers_almost_done
253
+ /* Important: 's_headers_almost_done' must be the last 'header' state. All
254
+ * states beyond this must be 'body' states. It is used for overflow
255
+ * checking. See the PARSING_HEADER() macro.
256
+ */
257
+
258
+ , s_chunk_data
259
+ , s_chunk_data_almost_done
260
+ , s_chunk_data_done
261
+
262
+ , s_body_identity
263
+ , s_body_identity_eof
264
+ };
265
+
266
+
267
+ #define PARSING_HEADER(state) (state <= s_headers_almost_done)
268
+
269
+
270
+ enum header_states
271
+ { h_general = 0
272
+ , h_C
273
+ , h_CO
274
+ , h_CON
275
+
276
+ , h_matching_connection
277
+ , h_matching_proxy_connection
278
+ , h_matching_content_length
279
+ , h_matching_transfer_encoding
280
+ , h_matching_upgrade
281
+
282
+ , h_connection
283
+ , h_content_length
284
+ , h_transfer_encoding
285
+ , h_upgrade
286
+
287
+ , h_matching_transfer_encoding_chunked
288
+ , h_matching_connection_keep_alive
289
+ , h_matching_connection_close
290
+
291
+ , h_transfer_encoding_chunked
292
+ , h_connection_keep_alive
293
+ , h_connection_close
294
+ };
295
+
296
+
297
+ /* Macros for character classes; depends on strict-mode */
298
+ #define CR '\r'
299
+ #define LF '\n'
300
+ #define LOWER(c) (unsigned char)(c | 0x20)
301
+ #define TOKEN(c) (tokens[(unsigned char)c])
302
+ #define IS_ALPHA(c) ((c) >= 'a' && (c) <= 'z')
303
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
304
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
305
+
306
+ #if HTTP_PARSER_STRICT
307
+ #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
308
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
309
+ #else
310
+ #define IS_URL_CHAR(c) \
311
+ (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
312
+ #define IS_HOST_CHAR(c) \
313
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
314
+ #endif
315
+
316
+
317
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
318
+
319
+
320
+ #if HTTP_PARSER_STRICT
321
+ # define STRICT_CHECK(cond) if (cond) goto error
322
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
323
+ #else
324
+ # define STRICT_CHECK(cond)
325
+ # define NEW_MESSAGE() start_state
326
+ #endif
327
+
328
+
329
+ size_t http_parser_execute (http_parser *parser,
330
+ const http_parser_settings *settings,
331
+ const char *data,
332
+ size_t len)
333
+ {
334
+ char c, ch;
335
+ const char *p = data, *pe;
336
+ int64_t to_read;
337
+
338
+ enum state state = (enum state) parser->state;
339
+ enum header_states header_state = (enum header_states) parser->header_state;
340
+ uint64_t index = parser->index;
341
+ uint64_t nread = parser->nread;
342
+
343
+ if (len == 0) {
344
+ switch (state) {
345
+ case s_body_identity_eof:
346
+ CALLBACK2(message_complete);
347
+ return 0;
348
+
349
+ case s_dead:
350
+ case s_start_req_or_res:
351
+ case s_start_res:
352
+ case s_start_req:
353
+ return 0;
354
+
355
+ default:
356
+ return 1; // error
357
+ }
358
+ }
359
+
360
+ /* technically we could combine all of these (except for url_mark) into one
361
+ variable, saving stack space, but it seems more clear to have them
362
+ separated. */
363
+ const char *header_field_mark = 0;
364
+ const char *header_value_mark = 0;
365
+ const char *fragment_mark = 0;
366
+ const char *query_string_mark = 0;
367
+ const char *path_mark = 0;
368
+ const char *url_mark = 0;
369
+
370
+ if (state == s_header_field)
371
+ header_field_mark = data;
372
+ if (state == s_header_value)
373
+ header_value_mark = data;
374
+ if (state == s_req_fragment)
375
+ fragment_mark = data;
376
+ if (state == s_req_query_string)
377
+ query_string_mark = data;
378
+ if (state == s_req_path)
379
+ path_mark = data;
380
+ if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
381
+ || state == s_req_schema_slash_slash || state == s_req_port
382
+ || state == s_req_query_string_start || state == s_req_query_string
383
+ || state == s_req_host
384
+ || state == s_req_fragment_start || state == s_req_fragment)
385
+ url_mark = data;
386
+
387
+ for (p=data, pe=data+len; p != pe; p++) {
388
+ ch = *p;
389
+
390
+ if (PARSING_HEADER(state)) {
391
+ ++nread;
392
+ /* Buffer overflow attack */
393
+ if (nread > HTTP_MAX_HEADER_SIZE) goto error;
394
+ }
395
+
396
+ switch (state) {
397
+
398
+ case s_dead:
399
+ /* this state is used after a 'Connection: close' message
400
+ * the parser will error out if it reads another message
401
+ */
402
+ goto error;
403
+
404
+ case s_start_req_or_res:
405
+ {
406
+ if (ch == CR || ch == LF)
407
+ break;
408
+ parser->flags = 0;
409
+ parser->content_length = -1;
410
+
411
+ CALLBACK2(message_begin);
412
+
413
+ if (ch == 'H')
414
+ state = s_res_or_resp_H;
415
+ else {
416
+ parser->type = HTTP_REQUEST;
417
+ goto start_req_method_assign;
418
+ }
419
+ break;
420
+ }
421
+
422
+ case s_res_or_resp_H:
423
+ if (ch == 'T') {
424
+ parser->type = HTTP_RESPONSE;
425
+ state = s_res_HT;
426
+ } else {
427
+ if (ch != 'E') goto error;
428
+ parser->type = HTTP_REQUEST;
429
+ parser->method = HTTP_HEAD;
430
+ index = 2;
431
+ state = s_req_method;
432
+ }
433
+ break;
434
+
435
+ case s_start_res:
436
+ {
437
+ parser->flags = 0;
438
+ parser->content_length = -1;
439
+
440
+ CALLBACK2(message_begin);
441
+
442
+ switch (ch) {
443
+ case 'H':
444
+ state = s_res_H;
445
+ break;
446
+
447
+ case CR:
448
+ case LF:
449
+ break;
450
+
451
+ default:
452
+ goto error;
453
+ }
454
+ break;
455
+ }
456
+
457
+ case s_res_H:
458
+ STRICT_CHECK(ch != 'T');
459
+ state = s_res_HT;
460
+ break;
461
+
462
+ case s_res_HT:
463
+ STRICT_CHECK(ch != 'T');
464
+ state = s_res_HTT;
465
+ break;
466
+
467
+ case s_res_HTT:
468
+ STRICT_CHECK(ch != 'P');
469
+ state = s_res_HTTP;
470
+ break;
471
+
472
+ case s_res_HTTP:
473
+ STRICT_CHECK(ch != '/');
474
+ state = s_res_first_http_major;
475
+ break;
476
+
477
+ case s_res_first_http_major:
478
+ if (ch < '1' || ch > '9') goto error;
479
+ parser->http_major = ch - '0';
480
+ state = s_res_http_major;
481
+ break;
482
+
483
+ /* major HTTP version or dot */
484
+ case s_res_http_major:
485
+ {
486
+ if (ch == '.') {
487
+ state = s_res_first_http_minor;
488
+ break;
489
+ }
490
+
491
+ if (!IS_NUM(ch)) goto error;
492
+
493
+ parser->http_major *= 10;
494
+ parser->http_major += ch - '0';
495
+
496
+ if (parser->http_major > 999) goto error;
497
+ break;
498
+ }
499
+
500
+ /* first digit of minor HTTP version */
501
+ case s_res_first_http_minor:
502
+ if (!IS_NUM(ch)) goto error;
503
+ parser->http_minor = ch - '0';
504
+ state = s_res_http_minor;
505
+ break;
506
+
507
+ /* minor HTTP version or end of request line */
508
+ case s_res_http_minor:
509
+ {
510
+ if (ch == ' ') {
511
+ state = s_res_first_status_code;
512
+ break;
513
+ }
514
+
515
+ if (!IS_NUM(ch)) goto error;
516
+
517
+ parser->http_minor *= 10;
518
+ parser->http_minor += ch - '0';
519
+
520
+ if (parser->http_minor > 999) goto error;
521
+ break;
522
+ }
523
+
524
+ case s_res_first_status_code:
525
+ {
526
+ if (!IS_NUM(ch)) {
527
+ if (ch == ' ') {
528
+ break;
529
+ }
530
+ goto error;
531
+ }
532
+ parser->status_code = ch - '0';
533
+ state = s_res_status_code;
534
+ break;
535
+ }
536
+
537
+ case s_res_status_code:
538
+ {
539
+ if (!IS_NUM(ch)) {
540
+ switch (ch) {
541
+ case ' ':
542
+ state = s_res_status;
543
+ break;
544
+ case CR:
545
+ state = s_res_line_almost_done;
546
+ break;
547
+ case LF:
548
+ state = s_header_field_start;
549
+ break;
550
+ default:
551
+ goto error;
552
+ }
553
+ break;
554
+ }
555
+
556
+ parser->status_code *= 10;
557
+ parser->status_code += ch - '0';
558
+
559
+ if (parser->status_code > 999) goto error;
560
+ break;
561
+ }
562
+
563
+ case s_res_status:
564
+ /* the human readable status. e.g. "NOT FOUND"
565
+ * we are not humans so just ignore this */
566
+ if (ch == CR) {
567
+ state = s_res_line_almost_done;
568
+ break;
569
+ }
570
+
571
+ if (ch == LF) {
572
+ state = s_header_field_start;
573
+ break;
574
+ }
575
+ break;
576
+
577
+ case s_res_line_almost_done:
578
+ STRICT_CHECK(ch != LF);
579
+ state = s_header_field_start;
580
+ break;
581
+
582
+ case s_start_req:
583
+ {
584
+ if (ch == CR || ch == LF)
585
+ break;
586
+ parser->flags = 0;
587
+ parser->content_length = -1;
588
+
589
+ CALLBACK2(message_begin);
590
+
591
+ if (!IS_ALPHA(LOWER(ch))) goto error;
592
+
593
+ start_req_method_assign:
594
+ parser->method = (enum http_method) 0;
595
+ index = 1;
596
+ switch (ch) {
597
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
598
+ case 'D': parser->method = HTTP_DELETE; break;
599
+ case 'G': parser->method = HTTP_GET; break;
600
+ case 'H': parser->method = HTTP_HEAD; break;
601
+ case 'L': parser->method = HTTP_LOCK; break;
602
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
603
+ case 'N': parser->method = HTTP_NOTIFY; break;
604
+ case 'O': parser->method = HTTP_OPTIONS; break;
605
+ case 'P': parser->method = HTTP_POST;
606
+ /* or PROPFIND or PROPPATCH or PUT or PATCH */
607
+ break;
608
+ case 'R': parser->method = HTTP_REPORT; break;
609
+ case 'S': parser->method = HTTP_SUBSCRIBE; break;
610
+ case 'T': parser->method = HTTP_TRACE; break;
611
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
612
+ default: goto error;
613
+ }
614
+ state = s_req_method;
615
+ break;
616
+ }
617
+
618
+ case s_req_method:
619
+ {
620
+ if (ch == '\0')
621
+ goto error;
622
+
623
+ const char *matcher = method_strings[parser->method];
624
+ if (ch == ' ' && matcher[index] == '\0') {
625
+ state = s_req_spaces_before_url;
626
+ } else if (ch == matcher[index]) {
627
+ ; /* nada */
628
+ } else if (parser->method == HTTP_CONNECT) {
629
+ if (index == 1 && ch == 'H') {
630
+ parser->method = HTTP_CHECKOUT;
631
+ } else if (index == 2 && ch == 'P') {
632
+ parser->method = HTTP_COPY;
633
+ }
634
+ } else if (parser->method == HTTP_MKCOL) {
635
+ if (index == 1 && ch == 'O') {
636
+ parser->method = HTTP_MOVE;
637
+ } else if (index == 1 && ch == 'E') {
638
+ parser->method = HTTP_MERGE;
639
+ } else if (index == 1 && ch == '-') {
640
+ parser->method = HTTP_MSEARCH;
641
+ } else if (index == 2 && ch == 'A') {
642
+ parser->method = HTTP_MKACTIVITY;
643
+ }
644
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
645
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
646
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
647
+ parser->method = HTTP_PUT;
648
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'A') {
649
+ parser->method = HTTP_PATCH;
650
+ } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
651
+ parser->method = HTTP_UNSUBSCRIBE;
652
+ } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
653
+ parser->method = HTTP_PROPPATCH;
654
+ } else {
655
+ goto error;
656
+ }
657
+
658
+ ++index;
659
+ break;
660
+ }
661
+ case s_req_spaces_before_url:
662
+ {
663
+ if (ch == ' ') break;
664
+
665
+ if (ch == '/' || ch == '*') {
666
+ MARK(url);
667
+ MARK(path);
668
+ state = s_req_path;
669
+ break;
670
+ }
671
+
672
+ c = LOWER(ch);
673
+
674
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
675
+ * CONNECT is followed by a hostname, which begins with alphanum.
676
+ * All other methods are followed by '/' or '*' (handled above).
677
+ */
678
+ if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) {
679
+ MARK(url);
680
+ state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema;
681
+ break;
682
+ }
683
+
684
+ goto error;
685
+ }
686
+
687
+ case s_req_schema:
688
+ {
689
+ c = LOWER(ch);
690
+
691
+ if (IS_ALPHA(c)) break;
692
+
693
+ if (ch == ':') {
694
+ state = s_req_schema_slash;
695
+ break;
696
+ }
697
+
698
+ goto error;
699
+ }
700
+
701
+ case s_req_schema_slash:
702
+ STRICT_CHECK(ch != '/');
703
+ state = s_req_schema_slash_slash;
704
+ break;
705
+
706
+ case s_req_schema_slash_slash:
707
+ STRICT_CHECK(ch != '/');
708
+ state = s_req_host;
709
+ break;
710
+
711
+ case s_req_host:
712
+ {
713
+ c = LOWER(ch);
714
+ if (IS_HOST_CHAR(ch)) break;
715
+ switch (ch) {
716
+ case ':':
717
+ state = s_req_port;
718
+ break;
719
+ case '/':
720
+ MARK(path);
721
+ state = s_req_path;
722
+ break;
723
+ case ' ':
724
+ /* The request line looks like:
725
+ * "GET http://foo.bar.com HTTP/1.1"
726
+ * That is, there is no path.
727
+ */
728
+ CALLBACK(url);
729
+ state = s_req_http_start;
730
+ break;
731
+ case '?':
732
+ state = s_req_query_string_start;
733
+ break;
734
+ default:
735
+ goto error;
736
+ }
737
+ break;
738
+ }
739
+
740
+ case s_req_port:
741
+ {
742
+ if (IS_NUM(ch)) break;
743
+ switch (ch) {
744
+ case '/':
745
+ MARK(path);
746
+ state = s_req_path;
747
+ break;
748
+ case ' ':
749
+ /* The request line looks like:
750
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
751
+ * That is, there is no path.
752
+ */
753
+ CALLBACK(url);
754
+ state = s_req_http_start;
755
+ break;
756
+ case '?':
757
+ state = s_req_query_string_start;
758
+ break;
759
+ default:
760
+ goto error;
761
+ }
762
+ break;
763
+ }
764
+
765
+ case s_req_path:
766
+ {
767
+ if (IS_URL_CHAR(ch)) break;
768
+
769
+ switch (ch) {
770
+ case ' ':
771
+ CALLBACK(url);
772
+ CALLBACK(path);
773
+ state = s_req_http_start;
774
+ break;
775
+ case CR:
776
+ CALLBACK(url);
777
+ CALLBACK(path);
778
+ parser->http_major = 0;
779
+ parser->http_minor = 9;
780
+ state = s_req_line_almost_done;
781
+ break;
782
+ case LF:
783
+ CALLBACK(url);
784
+ CALLBACK(path);
785
+ parser->http_major = 0;
786
+ parser->http_minor = 9;
787
+ state = s_header_field_start;
788
+ break;
789
+ case '?':
790
+ CALLBACK(path);
791
+ state = s_req_query_string_start;
792
+ break;
793
+ case '#':
794
+ CALLBACK(path);
795
+ state = s_req_fragment_start;
796
+ break;
797
+ default:
798
+ goto error;
799
+ }
800
+ break;
801
+ }
802
+
803
+ case s_req_query_string_start:
804
+ {
805
+ if (IS_URL_CHAR(ch)) {
806
+ MARK(query_string);
807
+ state = s_req_query_string;
808
+ break;
809
+ }
810
+
811
+ switch (ch) {
812
+ case '?':
813
+ break; /* XXX ignore extra '?' ... is this right? */
814
+ case ' ':
815
+ CALLBACK(url);
816
+ state = s_req_http_start;
817
+ break;
818
+ case CR:
819
+ CALLBACK(url);
820
+ parser->http_major = 0;
821
+ parser->http_minor = 9;
822
+ state = s_req_line_almost_done;
823
+ break;
824
+ case LF:
825
+ CALLBACK(url);
826
+ parser->http_major = 0;
827
+ parser->http_minor = 9;
828
+ state = s_header_field_start;
829
+ break;
830
+ case '#':
831
+ state = s_req_fragment_start;
832
+ break;
833
+ default:
834
+ goto error;
835
+ }
836
+ break;
837
+ }
838
+
839
+ case s_req_query_string:
840
+ {
841
+ if (IS_URL_CHAR(ch)) break;
842
+
843
+ switch (ch) {
844
+ case '?':
845
+ /* allow extra '?' in query string */
846
+ break;
847
+ case ' ':
848
+ CALLBACK(url);
849
+ CALLBACK(query_string);
850
+ state = s_req_http_start;
851
+ break;
852
+ case CR:
853
+ CALLBACK(url);
854
+ CALLBACK(query_string);
855
+ parser->http_major = 0;
856
+ parser->http_minor = 9;
857
+ state = s_req_line_almost_done;
858
+ break;
859
+ case LF:
860
+ CALLBACK(url);
861
+ CALLBACK(query_string);
862
+ parser->http_major = 0;
863
+ parser->http_minor = 9;
864
+ state = s_header_field_start;
865
+ break;
866
+ case '#':
867
+ CALLBACK(query_string);
868
+ state = s_req_fragment_start;
869
+ break;
870
+ default:
871
+ goto error;
872
+ }
873
+ break;
874
+ }
875
+
876
+ case s_req_fragment_start:
877
+ {
878
+ if (IS_URL_CHAR(ch)) {
879
+ MARK(fragment);
880
+ state = s_req_fragment;
881
+ break;
882
+ }
883
+
884
+ switch (ch) {
885
+ case ' ':
886
+ CALLBACK(url);
887
+ state = s_req_http_start;
888
+ break;
889
+ case CR:
890
+ CALLBACK(url);
891
+ parser->http_major = 0;
892
+ parser->http_minor = 9;
893
+ state = s_req_line_almost_done;
894
+ break;
895
+ case LF:
896
+ CALLBACK(url);
897
+ parser->http_major = 0;
898
+ parser->http_minor = 9;
899
+ state = s_header_field_start;
900
+ break;
901
+ case '?':
902
+ MARK(fragment);
903
+ state = s_req_fragment;
904
+ break;
905
+ case '#':
906
+ break;
907
+ default:
908
+ goto error;
909
+ }
910
+ break;
911
+ }
912
+
913
+ case s_req_fragment:
914
+ {
915
+ if (IS_URL_CHAR(ch)) break;
916
+
917
+ switch (ch) {
918
+ case ' ':
919
+ CALLBACK(url);
920
+ CALLBACK(fragment);
921
+ state = s_req_http_start;
922
+ break;
923
+ case CR:
924
+ CALLBACK(url);
925
+ CALLBACK(fragment);
926
+ parser->http_major = 0;
927
+ parser->http_minor = 9;
928
+ state = s_req_line_almost_done;
929
+ break;
930
+ case LF:
931
+ CALLBACK(url);
932
+ CALLBACK(fragment);
933
+ parser->http_major = 0;
934
+ parser->http_minor = 9;
935
+ state = s_header_field_start;
936
+ break;
937
+ case '?':
938
+ case '#':
939
+ break;
940
+ default:
941
+ goto error;
942
+ }
943
+ break;
944
+ }
945
+
946
+ case s_req_http_start:
947
+ switch (ch) {
948
+ case 'H':
949
+ state = s_req_http_H;
950
+ break;
951
+ case ' ':
952
+ break;
953
+ default:
954
+ goto error;
955
+ }
956
+ break;
957
+
958
+ case s_req_http_H:
959
+ STRICT_CHECK(ch != 'T');
960
+ state = s_req_http_HT;
961
+ break;
962
+
963
+ case s_req_http_HT:
964
+ STRICT_CHECK(ch != 'T');
965
+ state = s_req_http_HTT;
966
+ break;
967
+
968
+ case s_req_http_HTT:
969
+ STRICT_CHECK(ch != 'P');
970
+ state = s_req_http_HTTP;
971
+ break;
972
+
973
+ case s_req_http_HTTP:
974
+ STRICT_CHECK(ch != '/');
975
+ state = s_req_first_http_major;
976
+ break;
977
+
978
+ /* first digit of major HTTP version */
979
+ case s_req_first_http_major:
980
+ if (ch < '1' || ch > '9') goto error;
981
+ parser->http_major = ch - '0';
982
+ state = s_req_http_major;
983
+ break;
984
+
985
+ /* major HTTP version or dot */
986
+ case s_req_http_major:
987
+ {
988
+ if (ch == '.') {
989
+ state = s_req_first_http_minor;
990
+ break;
991
+ }
992
+
993
+ if (!IS_NUM(ch)) goto error;
994
+
995
+ parser->http_major *= 10;
996
+ parser->http_major += ch - '0';
997
+
998
+ if (parser->http_major > 999) goto error;
999
+ break;
1000
+ }
1001
+
1002
+ /* first digit of minor HTTP version */
1003
+ case s_req_first_http_minor:
1004
+ if (!IS_NUM(ch)) goto error;
1005
+ parser->http_minor = ch - '0';
1006
+ state = s_req_http_minor;
1007
+ break;
1008
+
1009
+ /* minor HTTP version or end of request line */
1010
+ case s_req_http_minor:
1011
+ {
1012
+ if (ch == CR) {
1013
+ state = s_req_line_almost_done;
1014
+ break;
1015
+ }
1016
+
1017
+ if (ch == LF) {
1018
+ state = s_header_field_start;
1019
+ break;
1020
+ }
1021
+
1022
+ /* XXX allow spaces after digit? */
1023
+
1024
+ if (!IS_NUM(ch)) goto error;
1025
+
1026
+ parser->http_minor *= 10;
1027
+ parser->http_minor += ch - '0';
1028
+
1029
+ if (parser->http_minor > 999) goto error;
1030
+ break;
1031
+ }
1032
+
1033
+ /* end of request line */
1034
+ case s_req_line_almost_done:
1035
+ {
1036
+ if (ch != LF) goto error;
1037
+ state = s_header_field_start;
1038
+ break;
1039
+ }
1040
+
1041
+ case s_header_field_start:
1042
+ {
1043
+ if (ch == CR) {
1044
+ state = s_headers_almost_done;
1045
+ break;
1046
+ }
1047
+
1048
+ if (ch == LF) {
1049
+ /* they might be just sending \n instead of \r\n so this would be
1050
+ * the second \n to denote the end of headers*/
1051
+ state = s_headers_almost_done;
1052
+ goto headers_almost_done;
1053
+ }
1054
+
1055
+ c = TOKEN(ch);
1056
+
1057
+ if (!c) goto error;
1058
+
1059
+ MARK(header_field);
1060
+
1061
+ index = 0;
1062
+ state = s_header_field;
1063
+
1064
+ switch (c) {
1065
+ case 'c':
1066
+ header_state = h_C;
1067
+ break;
1068
+
1069
+ case 'p':
1070
+ header_state = h_matching_proxy_connection;
1071
+ break;
1072
+
1073
+ case 't':
1074
+ header_state = h_matching_transfer_encoding;
1075
+ break;
1076
+
1077
+ case 'u':
1078
+ header_state = h_matching_upgrade;
1079
+ break;
1080
+
1081
+ default:
1082
+ header_state = h_general;
1083
+ break;
1084
+ }
1085
+ break;
1086
+ }
1087
+
1088
+ case s_header_field:
1089
+ {
1090
+ c = TOKEN(ch);
1091
+
1092
+ if (c) {
1093
+ switch (header_state) {
1094
+ case h_general:
1095
+ break;
1096
+
1097
+ case h_C:
1098
+ index++;
1099
+ header_state = (c == 'o' ? h_CO : h_general);
1100
+ break;
1101
+
1102
+ case h_CO:
1103
+ index++;
1104
+ header_state = (c == 'n' ? h_CON : h_general);
1105
+ break;
1106
+
1107
+ case h_CON:
1108
+ index++;
1109
+ switch (c) {
1110
+ case 'n':
1111
+ header_state = h_matching_connection;
1112
+ break;
1113
+ case 't':
1114
+ header_state = h_matching_content_length;
1115
+ break;
1116
+ default:
1117
+ header_state = h_general;
1118
+ break;
1119
+ }
1120
+ break;
1121
+
1122
+ /* connection */
1123
+
1124
+ case h_matching_connection:
1125
+ index++;
1126
+ if (index > sizeof(CONNECTION)-1
1127
+ || c != CONNECTION[index]) {
1128
+ header_state = h_general;
1129
+ } else if (index == sizeof(CONNECTION)-2) {
1130
+ header_state = h_connection;
1131
+ }
1132
+ break;
1133
+
1134
+ /* proxy-connection */
1135
+
1136
+ case h_matching_proxy_connection:
1137
+ index++;
1138
+ if (index > sizeof(PROXY_CONNECTION)-1
1139
+ || c != PROXY_CONNECTION[index]) {
1140
+ header_state = h_general;
1141
+ } else if (index == sizeof(PROXY_CONNECTION)-2) {
1142
+ header_state = h_connection;
1143
+ }
1144
+ break;
1145
+
1146
+ /* content-length */
1147
+
1148
+ case h_matching_content_length:
1149
+ index++;
1150
+ if (index > sizeof(CONTENT_LENGTH)-1
1151
+ || c != CONTENT_LENGTH[index]) {
1152
+ header_state = h_general;
1153
+ } else if (index == sizeof(CONTENT_LENGTH)-2) {
1154
+ header_state = h_content_length;
1155
+ }
1156
+ break;
1157
+
1158
+ /* transfer-encoding */
1159
+
1160
+ case h_matching_transfer_encoding:
1161
+ index++;
1162
+ if (index > sizeof(TRANSFER_ENCODING)-1
1163
+ || c != TRANSFER_ENCODING[index]) {
1164
+ header_state = h_general;
1165
+ } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1166
+ header_state = h_transfer_encoding;
1167
+ }
1168
+ break;
1169
+
1170
+ /* upgrade */
1171
+
1172
+ case h_matching_upgrade:
1173
+ index++;
1174
+ if (index > sizeof(UPGRADE)-1
1175
+ || c != UPGRADE[index]) {
1176
+ header_state = h_general;
1177
+ } else if (index == sizeof(UPGRADE)-2) {
1178
+ header_state = h_upgrade;
1179
+ }
1180
+ break;
1181
+
1182
+ case h_connection:
1183
+ case h_content_length:
1184
+ case h_transfer_encoding:
1185
+ case h_upgrade:
1186
+ if (ch != ' ') header_state = h_general;
1187
+ break;
1188
+
1189
+ default:
1190
+ assert(0 && "Unknown header_state");
1191
+ break;
1192
+ }
1193
+ break;
1194
+ }
1195
+
1196
+ if (ch == ':') {
1197
+ CALLBACK(header_field);
1198
+ state = s_header_value_start;
1199
+ break;
1200
+ }
1201
+
1202
+ if (ch == CR) {
1203
+ state = s_header_almost_done;
1204
+ CALLBACK(header_field);
1205
+ break;
1206
+ }
1207
+
1208
+ if (ch == LF) {
1209
+ CALLBACK(header_field);
1210
+ state = s_header_field_start;
1211
+ break;
1212
+ }
1213
+
1214
+ goto error;
1215
+ }
1216
+
1217
+ case s_header_value_start:
1218
+ {
1219
+ if (ch == ' ') break;
1220
+
1221
+ MARK(header_value);
1222
+
1223
+ state = s_header_value;
1224
+ index = 0;
1225
+
1226
+ c = LOWER(ch);
1227
+
1228
+ if (ch == CR) {
1229
+ CALLBACK(header_value);
1230
+ header_state = h_general;
1231
+ state = s_header_almost_done;
1232
+ break;
1233
+ }
1234
+
1235
+ if (ch == LF) {
1236
+ CALLBACK(header_value);
1237
+ state = s_header_field_start;
1238
+ break;
1239
+ }
1240
+
1241
+ switch (header_state) {
1242
+ case h_upgrade:
1243
+ parser->flags |= F_UPGRADE;
1244
+ header_state = h_general;
1245
+ break;
1246
+
1247
+ case h_transfer_encoding:
1248
+ /* looking for 'Transfer-Encoding: chunked' */
1249
+ if ('c' == c) {
1250
+ header_state = h_matching_transfer_encoding_chunked;
1251
+ } else {
1252
+ header_state = h_general;
1253
+ }
1254
+ break;
1255
+
1256
+ case h_content_length:
1257
+ if (!IS_NUM(ch)) goto error;
1258
+ parser->content_length = ch - '0';
1259
+ break;
1260
+
1261
+ case h_connection:
1262
+ /* looking for 'Connection: keep-alive' */
1263
+ if (c == 'k') {
1264
+ header_state = h_matching_connection_keep_alive;
1265
+ /* looking for 'Connection: close' */
1266
+ } else if (c == 'c') {
1267
+ header_state = h_matching_connection_close;
1268
+ } else {
1269
+ header_state = h_general;
1270
+ }
1271
+ break;
1272
+
1273
+ default:
1274
+ header_state = h_general;
1275
+ break;
1276
+ }
1277
+ break;
1278
+ }
1279
+
1280
+ case s_header_value:
1281
+ {
1282
+ c = LOWER(ch);
1283
+
1284
+ if (ch == CR) {
1285
+ CALLBACK(header_value);
1286
+ state = s_header_almost_done;
1287
+ break;
1288
+ }
1289
+
1290
+ if (ch == LF) {
1291
+ CALLBACK(header_value);
1292
+ goto header_almost_done;
1293
+ }
1294
+
1295
+ switch (header_state) {
1296
+ case h_general:
1297
+ break;
1298
+
1299
+ case h_connection:
1300
+ case h_transfer_encoding:
1301
+ assert(0 && "Shouldn't get here.");
1302
+ break;
1303
+
1304
+ case h_content_length:
1305
+ if (ch == ' ') break;
1306
+ if (!IS_NUM(ch)) goto error;
1307
+ parser->content_length *= 10;
1308
+ parser->content_length += ch - '0';
1309
+ break;
1310
+
1311
+ /* Transfer-Encoding: chunked */
1312
+ case h_matching_transfer_encoding_chunked:
1313
+ index++;
1314
+ if (index > sizeof(CHUNKED)-1
1315
+ || c != CHUNKED[index]) {
1316
+ header_state = h_general;
1317
+ } else if (index == sizeof(CHUNKED)-2) {
1318
+ header_state = h_transfer_encoding_chunked;
1319
+ }
1320
+ break;
1321
+
1322
+ /* looking for 'Connection: keep-alive' */
1323
+ case h_matching_connection_keep_alive:
1324
+ index++;
1325
+ if (index > sizeof(KEEP_ALIVE)-1
1326
+ || c != KEEP_ALIVE[index]) {
1327
+ header_state = h_general;
1328
+ } else if (index == sizeof(KEEP_ALIVE)-2) {
1329
+ header_state = h_connection_keep_alive;
1330
+ }
1331
+ break;
1332
+
1333
+ /* looking for 'Connection: close' */
1334
+ case h_matching_connection_close:
1335
+ index++;
1336
+ if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1337
+ header_state = h_general;
1338
+ } else if (index == sizeof(CLOSE)-2) {
1339
+ header_state = h_connection_close;
1340
+ }
1341
+ break;
1342
+
1343
+ case h_transfer_encoding_chunked:
1344
+ case h_connection_keep_alive:
1345
+ case h_connection_close:
1346
+ if (ch != ' ') header_state = h_general;
1347
+ break;
1348
+
1349
+ default:
1350
+ state = s_header_value;
1351
+ header_state = h_general;
1352
+ break;
1353
+ }
1354
+ break;
1355
+ }
1356
+
1357
+ case s_header_almost_done:
1358
+ header_almost_done:
1359
+ {
1360
+ STRICT_CHECK(ch != LF);
1361
+
1362
+ state = s_header_field_start;
1363
+
1364
+ switch (header_state) {
1365
+ case h_connection_keep_alive:
1366
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1367
+ break;
1368
+ case h_connection_close:
1369
+ parser->flags |= F_CONNECTION_CLOSE;
1370
+ break;
1371
+ case h_transfer_encoding_chunked:
1372
+ parser->flags |= F_CHUNKED;
1373
+ break;
1374
+ default:
1375
+ break;
1376
+ }
1377
+ break;
1378
+ }
1379
+
1380
+ case s_headers_almost_done:
1381
+ headers_almost_done:
1382
+ {
1383
+ STRICT_CHECK(ch != LF);
1384
+
1385
+ if (parser->flags & F_TRAILING) {
1386
+ /* End of a chunked request */
1387
+ CALLBACK2(message_complete);
1388
+ state = NEW_MESSAGE();
1389
+ break;
1390
+ }
1391
+
1392
+ nread = 0;
1393
+
1394
+ if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1395
+ parser->upgrade = 1;
1396
+ }
1397
+
1398
+ /* Here we call the headers_complete callback. This is somewhat
1399
+ * different than other callbacks because if the user returns 1, we
1400
+ * will interpret that as saying that this message has no body. This
1401
+ * is needed for the annoying case of recieving a response to a HEAD
1402
+ * request.
1403
+ */
1404
+ if (settings->on_headers_complete) {
1405
+ switch (settings->on_headers_complete(parser)) {
1406
+ case 0:
1407
+ break;
1408
+
1409
+ case 1:
1410
+ parser->flags |= F_SKIPBODY;
1411
+ break;
1412
+
1413
+ default:
1414
+ parser->state = state;
1415
+ return p - data; /* Error */
1416
+ }
1417
+ }
1418
+
1419
+ /* Exit, the rest of the connect is in a different protocol. */
1420
+ if (parser->upgrade) {
1421
+ CALLBACK2(message_complete);
1422
+ return (p - data);
1423
+ }
1424
+
1425
+ if (parser->flags & F_SKIPBODY) {
1426
+ CALLBACK2(message_complete);
1427
+ state = NEW_MESSAGE();
1428
+ } else if (parser->flags & F_CHUNKED) {
1429
+ /* chunked encoding - ignore Content-Length header */
1430
+ state = s_chunk_size_start;
1431
+ } else {
1432
+ if (parser->content_length == 0) {
1433
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1434
+ CALLBACK2(message_complete);
1435
+ state = NEW_MESSAGE();
1436
+ } else if (parser->content_length > 0) {
1437
+ /* Content-Length header given and non-zero */
1438
+ state = s_body_identity;
1439
+ } else {
1440
+ if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1441
+ /* Assume content-length 0 - read the next */
1442
+ CALLBACK2(message_complete);
1443
+ state = NEW_MESSAGE();
1444
+ } else {
1445
+ /* Read body until EOF */
1446
+ state = s_body_identity_eof;
1447
+ }
1448
+ }
1449
+ }
1450
+
1451
+ break;
1452
+ }
1453
+
1454
+ case s_body_identity:
1455
+ to_read = MIN(pe - p, (int64_t)parser->content_length);
1456
+ if (to_read > 0) {
1457
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1458
+ p += to_read - 1;
1459
+ parser->content_length -= to_read;
1460
+ if (parser->content_length == 0) {
1461
+ CALLBACK2(message_complete);
1462
+ state = NEW_MESSAGE();
1463
+ }
1464
+ }
1465
+ break;
1466
+
1467
+ /* read until EOF */
1468
+ case s_body_identity_eof:
1469
+ to_read = pe - p;
1470
+ if (to_read > 0) {
1471
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1472
+ p += to_read - 1;
1473
+ }
1474
+ break;
1475
+
1476
+ case s_chunk_size_start:
1477
+ {
1478
+ assert(nread == 1);
1479
+ assert(parser->flags & F_CHUNKED);
1480
+
1481
+ c = unhex[(unsigned char)ch];
1482
+ if (c == -1) goto error;
1483
+ parser->content_length = c;
1484
+ state = s_chunk_size;
1485
+ break;
1486
+ }
1487
+
1488
+ case s_chunk_size:
1489
+ {
1490
+ assert(parser->flags & F_CHUNKED);
1491
+
1492
+ if (ch == CR) {
1493
+ state = s_chunk_size_almost_done;
1494
+ break;
1495
+ }
1496
+
1497
+ c = unhex[(unsigned char)ch];
1498
+
1499
+ if (c == -1) {
1500
+ if (ch == ';' || ch == ' ') {
1501
+ state = s_chunk_parameters;
1502
+ break;
1503
+ }
1504
+ goto error;
1505
+ }
1506
+
1507
+ parser->content_length *= 16;
1508
+ parser->content_length += c;
1509
+ break;
1510
+ }
1511
+
1512
+ case s_chunk_parameters:
1513
+ {
1514
+ assert(parser->flags & F_CHUNKED);
1515
+ /* just ignore this shit. TODO check for overflow */
1516
+ if (ch == CR) {
1517
+ state = s_chunk_size_almost_done;
1518
+ break;
1519
+ }
1520
+ break;
1521
+ }
1522
+
1523
+ case s_chunk_size_almost_done:
1524
+ {
1525
+ assert(parser->flags & F_CHUNKED);
1526
+ STRICT_CHECK(ch != LF);
1527
+
1528
+ nread = 0;
1529
+
1530
+ if (parser->content_length == 0) {
1531
+ parser->flags |= F_TRAILING;
1532
+ state = s_header_field_start;
1533
+ } else {
1534
+ state = s_chunk_data;
1535
+ }
1536
+ break;
1537
+ }
1538
+
1539
+ case s_chunk_data:
1540
+ {
1541
+ assert(parser->flags & F_CHUNKED);
1542
+
1543
+ to_read = MIN(pe - p, (int64_t)(parser->content_length));
1544
+
1545
+ if (to_read > 0) {
1546
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1547
+ p += to_read - 1;
1548
+ }
1549
+
1550
+ if (to_read == parser->content_length) {
1551
+ state = s_chunk_data_almost_done;
1552
+ }
1553
+
1554
+ parser->content_length -= to_read;
1555
+ break;
1556
+ }
1557
+
1558
+ case s_chunk_data_almost_done:
1559
+ assert(parser->flags & F_CHUNKED);
1560
+ STRICT_CHECK(ch != CR);
1561
+ state = s_chunk_data_done;
1562
+ break;
1563
+
1564
+ case s_chunk_data_done:
1565
+ assert(parser->flags & F_CHUNKED);
1566
+ STRICT_CHECK(ch != LF);
1567
+ state = s_chunk_size_start;
1568
+ break;
1569
+
1570
+ default:
1571
+ assert(0 && "unhandled state");
1572
+ goto error;
1573
+ }
1574
+ }
1575
+
1576
+ CALLBACK_NOCLEAR(header_field);
1577
+ CALLBACK_NOCLEAR(header_value);
1578
+ CALLBACK_NOCLEAR(fragment);
1579
+ CALLBACK_NOCLEAR(query_string);
1580
+ CALLBACK_NOCLEAR(path);
1581
+ CALLBACK_NOCLEAR(url);
1582
+
1583
+ parser->state = state;
1584
+ parser->header_state = header_state;
1585
+ parser->index = index;
1586
+ parser->nread = nread;
1587
+
1588
+ return len;
1589
+
1590
+ error:
1591
+ parser->state = s_dead;
1592
+ return (p - data);
1593
+ }
1594
+
1595
+
1596
+ int
1597
+ http_should_keep_alive (http_parser *parser)
1598
+ {
1599
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1600
+ /* HTTP/1.1 */
1601
+ if (parser->flags & F_CONNECTION_CLOSE) {
1602
+ return 0;
1603
+ } else {
1604
+ return 1;
1605
+ }
1606
+ } else {
1607
+ /* HTTP/1.0 or earlier */
1608
+ if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1609
+ return 1;
1610
+ } else {
1611
+ return 0;
1612
+ }
1613
+ }
1614
+ }
1615
+
1616
+
1617
+ const char * http_method_str (enum http_method m)
1618
+ {
1619
+ return method_strings[m];
1620
+ }
1621
+
1622
+
1623
+ void
1624
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1625
+ {
1626
+ parser->type = t;
1627
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1628
+ parser->nread = 0;
1629
+ parser->upgrade = 0;
1630
+ parser->flags = 0;
1631
+ parser->method = 0;
1632
+ }