noderb-http 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1632 @@
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include <http_parser.h>
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+
28
+
29
+ #ifndef MIN
30
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
31
+ #endif
32
+
33
+
34
+ #define CALLBACK2(FOR) \
35
+ do { \
36
+ if (settings->on_##FOR) { \
37
+ if (0 != settings->on_##FOR(parser)) return (p - data); \
38
+ } \
39
+ } while (0)
40
+
41
+
42
+ #define MARK(FOR) \
43
+ do { \
44
+ FOR##_mark = p; \
45
+ } while (0)
46
+
47
+ #define CALLBACK_NOCLEAR(FOR) \
48
+ do { \
49
+ if (FOR##_mark) { \
50
+ if (settings->on_##FOR) { \
51
+ if (0 != settings->on_##FOR(parser, \
52
+ FOR##_mark, \
53
+ p - FOR##_mark)) \
54
+ { \
55
+ return (p - data); \
56
+ } \
57
+ } \
58
+ } \
59
+ } while (0)
60
+
61
+
62
+ #define CALLBACK(FOR) \
63
+ do { \
64
+ CALLBACK_NOCLEAR(FOR); \
65
+ FOR##_mark = NULL; \
66
+ } while (0)
67
+
68
+
69
+ #define PROXY_CONNECTION "proxy-connection"
70
+ #define CONNECTION "connection"
71
+ #define CONTENT_LENGTH "content-length"
72
+ #define TRANSFER_ENCODING "transfer-encoding"
73
+ #define UPGRADE "upgrade"
74
+ #define CHUNKED "chunked"
75
+ #define KEEP_ALIVE "keep-alive"
76
+ #define CLOSE "close"
77
+
78
+
79
+ static const char *method_strings[] =
80
+ { "DELETE"
81
+ , "GET"
82
+ , "HEAD"
83
+ , "POST"
84
+ , "PUT"
85
+ , "CONNECT"
86
+ , "OPTIONS"
87
+ , "TRACE"
88
+ , "COPY"
89
+ , "LOCK"
90
+ , "MKCOL"
91
+ , "MOVE"
92
+ , "PROPFIND"
93
+ , "PROPPATCH"
94
+ , "UNLOCK"
95
+ , "REPORT"
96
+ , "MKACTIVITY"
97
+ , "CHECKOUT"
98
+ , "MERGE"
99
+ , "M-SEARCH"
100
+ , "NOTIFY"
101
+ , "SUBSCRIBE"
102
+ , "UNSUBSCRIBE"
103
+ , "PATCH"
104
+ };
105
+
106
+
107
+ /* Tokens as defined by rfc 2616. Also lowercases them.
108
+ * token = 1*<any CHAR except CTLs or separators>
109
+ * separators = "(" | ")" | "<" | ">" | "@"
110
+ * | "," | ";" | ":" | "\" | <">
111
+ * | "/" | "[" | "]" | "?" | "="
112
+ * | "{" | "}" | SP | HT
113
+ */
114
+ static const char tokens[256] = {
115
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
116
+ 0, 0, 0, 0, 0, 0, 0, 0,
117
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
118
+ 0, 0, 0, 0, 0, 0, 0, 0,
119
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
120
+ 0, 0, 0, 0, 0, 0, 0, 0,
121
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
122
+ 0, 0, 0, 0, 0, 0, 0, 0,
123
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
124
+ ' ', '!', '"', '#', '$', '%', '&', '\'',
125
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
126
+ 0, 0, '*', '+', 0, '-', '.', '/',
127
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
128
+ '0', '1', '2', '3', '4', '5', '6', '7',
129
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
130
+ '8', '9', 0, 0, 0, 0, 0, 0,
131
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
132
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
133
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
134
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
135
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
136
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
137
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
138
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
139
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
140
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
141
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
142
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
143
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
144
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
145
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
146
+ 'x', 'y', 'z', 0, '|', '}', '~', 0 };
147
+
148
+
149
+ static const int8_t unhex[256] =
150
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
151
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
152
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
153
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
154
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
155
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
156
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
157
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
158
+ };
159
+
160
+
161
+ static const uint8_t normal_url_char[256] = {
162
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
163
+ 0, 0, 0, 0, 0, 0, 0, 0,
164
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
165
+ 0, 0, 0, 0, 0, 0, 0, 0,
166
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
167
+ 0, 0, 0, 0, 0, 0, 0, 0,
168
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
169
+ 0, 0, 0, 0, 0, 0, 0, 0,
170
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
171
+ 0, 1, 1, 0, 1, 1, 1, 1,
172
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
173
+ 1, 1, 1, 1, 1, 1, 1, 1,
174
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
175
+ 1, 1, 1, 1, 1, 1, 1, 1,
176
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
177
+ 1, 1, 1, 1, 1, 1, 1, 0,
178
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
179
+ 1, 1, 1, 1, 1, 1, 1, 1,
180
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
181
+ 1, 1, 1, 1, 1, 1, 1, 1,
182
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
183
+ 1, 1, 1, 1, 1, 1, 1, 1,
184
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
185
+ 1, 1, 1, 1, 1, 1, 1, 1,
186
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
187
+ 1, 1, 1, 1, 1, 1, 1, 1,
188
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
189
+ 1, 1, 1, 1, 1, 1, 1, 1,
190
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
191
+ 1, 1, 1, 1, 1, 1, 1, 1,
192
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
193
+ 1, 1, 1, 1, 1, 1, 1, 0, };
194
+
195
+
196
+ enum state
197
+ { s_dead = 1 /* important that this is > 0 */
198
+
199
+ , s_start_req_or_res
200
+ , s_res_or_resp_H
201
+ , s_start_res
202
+ , s_res_H
203
+ , s_res_HT
204
+ , s_res_HTT
205
+ , s_res_HTTP
206
+ , s_res_first_http_major
207
+ , s_res_http_major
208
+ , s_res_first_http_minor
209
+ , s_res_http_minor
210
+ , s_res_first_status_code
211
+ , s_res_status_code
212
+ , s_res_status
213
+ , s_res_line_almost_done
214
+
215
+ , s_start_req
216
+
217
+ , s_req_method
218
+ , s_req_spaces_before_url
219
+ , s_req_schema
220
+ , s_req_schema_slash
221
+ , s_req_schema_slash_slash
222
+ , s_req_host
223
+ , s_req_port
224
+ , s_req_path
225
+ , s_req_query_string_start
226
+ , s_req_query_string
227
+ , s_req_fragment_start
228
+ , s_req_fragment
229
+ , s_req_http_start
230
+ , s_req_http_H
231
+ , s_req_http_HT
232
+ , s_req_http_HTT
233
+ , s_req_http_HTTP
234
+ , s_req_first_http_major
235
+ , s_req_http_major
236
+ , s_req_first_http_minor
237
+ , s_req_http_minor
238
+ , s_req_line_almost_done
239
+
240
+ , s_header_field_start
241
+ , s_header_field
242
+ , s_header_value_start
243
+ , s_header_value
244
+
245
+ , s_header_almost_done
246
+
247
+ , s_chunk_size_start
248
+ , s_chunk_size
249
+ , s_chunk_parameters
250
+ , s_chunk_size_almost_done
251
+
252
+ , s_headers_almost_done
253
+ /* Important: 's_headers_almost_done' must be the last 'header' state. All
254
+ * states beyond this must be 'body' states. It is used for overflow
255
+ * checking. See the PARSING_HEADER() macro.
256
+ */
257
+
258
+ , s_chunk_data
259
+ , s_chunk_data_almost_done
260
+ , s_chunk_data_done
261
+
262
+ , s_body_identity
263
+ , s_body_identity_eof
264
+ };
265
+
266
+
267
+ #define PARSING_HEADER(state) (state <= s_headers_almost_done)
268
+
269
+
270
+ enum header_states
271
+ { h_general = 0
272
+ , h_C
273
+ , h_CO
274
+ , h_CON
275
+
276
+ , h_matching_connection
277
+ , h_matching_proxy_connection
278
+ , h_matching_content_length
279
+ , h_matching_transfer_encoding
280
+ , h_matching_upgrade
281
+
282
+ , h_connection
283
+ , h_content_length
284
+ , h_transfer_encoding
285
+ , h_upgrade
286
+
287
+ , h_matching_transfer_encoding_chunked
288
+ , h_matching_connection_keep_alive
289
+ , h_matching_connection_close
290
+
291
+ , h_transfer_encoding_chunked
292
+ , h_connection_keep_alive
293
+ , h_connection_close
294
+ };
295
+
296
+
297
+ /* Macros for character classes; depends on strict-mode */
298
+ #define CR '\r'
299
+ #define LF '\n'
300
+ #define LOWER(c) (unsigned char)(c | 0x20)
301
+ #define TOKEN(c) (tokens[(unsigned char)c])
302
+ #define IS_ALPHA(c) ((c) >= 'a' && (c) <= 'z')
303
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
304
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
305
+
306
+ #if HTTP_PARSER_STRICT
307
+ #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
308
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
309
+ #else
310
+ #define IS_URL_CHAR(c) \
311
+ (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
312
+ #define IS_HOST_CHAR(c) \
313
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
314
+ #endif
315
+
316
+
317
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
318
+
319
+
320
+ #if HTTP_PARSER_STRICT
321
+ # define STRICT_CHECK(cond) if (cond) goto error
322
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
323
+ #else
324
+ # define STRICT_CHECK(cond)
325
+ # define NEW_MESSAGE() start_state
326
+ #endif
327
+
328
+
329
+ size_t http_parser_execute (http_parser *parser,
330
+ const http_parser_settings *settings,
331
+ const char *data,
332
+ size_t len)
333
+ {
334
+ char c, ch;
335
+ const char *p = data, *pe;
336
+ int64_t to_read;
337
+
338
+ enum state state = (enum state) parser->state;
339
+ enum header_states header_state = (enum header_states) parser->header_state;
340
+ uint64_t index = parser->index;
341
+ uint64_t nread = parser->nread;
342
+
343
+ if (len == 0) {
344
+ switch (state) {
345
+ case s_body_identity_eof:
346
+ CALLBACK2(message_complete);
347
+ return 0;
348
+
349
+ case s_dead:
350
+ case s_start_req_or_res:
351
+ case s_start_res:
352
+ case s_start_req:
353
+ return 0;
354
+
355
+ default:
356
+ return 1; // error
357
+ }
358
+ }
359
+
360
+ /* technically we could combine all of these (except for url_mark) into one
361
+ variable, saving stack space, but it seems more clear to have them
362
+ separated. */
363
+ const char *header_field_mark = 0;
364
+ const char *header_value_mark = 0;
365
+ const char *fragment_mark = 0;
366
+ const char *query_string_mark = 0;
367
+ const char *path_mark = 0;
368
+ const char *url_mark = 0;
369
+
370
+ if (state == s_header_field)
371
+ header_field_mark = data;
372
+ if (state == s_header_value)
373
+ header_value_mark = data;
374
+ if (state == s_req_fragment)
375
+ fragment_mark = data;
376
+ if (state == s_req_query_string)
377
+ query_string_mark = data;
378
+ if (state == s_req_path)
379
+ path_mark = data;
380
+ if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
381
+ || state == s_req_schema_slash_slash || state == s_req_port
382
+ || state == s_req_query_string_start || state == s_req_query_string
383
+ || state == s_req_host
384
+ || state == s_req_fragment_start || state == s_req_fragment)
385
+ url_mark = data;
386
+
387
+ for (p=data, pe=data+len; p != pe; p++) {
388
+ ch = *p;
389
+
390
+ if (PARSING_HEADER(state)) {
391
+ ++nread;
392
+ /* Buffer overflow attack */
393
+ if (nread > HTTP_MAX_HEADER_SIZE) goto error;
394
+ }
395
+
396
+ switch (state) {
397
+
398
+ case s_dead:
399
+ /* this state is used after a 'Connection: close' message
400
+ * the parser will error out if it reads another message
401
+ */
402
+ goto error;
403
+
404
+ case s_start_req_or_res:
405
+ {
406
+ if (ch == CR || ch == LF)
407
+ break;
408
+ parser->flags = 0;
409
+ parser->content_length = -1;
410
+
411
+ CALLBACK2(message_begin);
412
+
413
+ if (ch == 'H')
414
+ state = s_res_or_resp_H;
415
+ else {
416
+ parser->type = HTTP_REQUEST;
417
+ goto start_req_method_assign;
418
+ }
419
+ break;
420
+ }
421
+
422
+ case s_res_or_resp_H:
423
+ if (ch == 'T') {
424
+ parser->type = HTTP_RESPONSE;
425
+ state = s_res_HT;
426
+ } else {
427
+ if (ch != 'E') goto error;
428
+ parser->type = HTTP_REQUEST;
429
+ parser->method = HTTP_HEAD;
430
+ index = 2;
431
+ state = s_req_method;
432
+ }
433
+ break;
434
+
435
+ case s_start_res:
436
+ {
437
+ parser->flags = 0;
438
+ parser->content_length = -1;
439
+
440
+ CALLBACK2(message_begin);
441
+
442
+ switch (ch) {
443
+ case 'H':
444
+ state = s_res_H;
445
+ break;
446
+
447
+ case CR:
448
+ case LF:
449
+ break;
450
+
451
+ default:
452
+ goto error;
453
+ }
454
+ break;
455
+ }
456
+
457
+ case s_res_H:
458
+ STRICT_CHECK(ch != 'T');
459
+ state = s_res_HT;
460
+ break;
461
+
462
+ case s_res_HT:
463
+ STRICT_CHECK(ch != 'T');
464
+ state = s_res_HTT;
465
+ break;
466
+
467
+ case s_res_HTT:
468
+ STRICT_CHECK(ch != 'P');
469
+ state = s_res_HTTP;
470
+ break;
471
+
472
+ case s_res_HTTP:
473
+ STRICT_CHECK(ch != '/');
474
+ state = s_res_first_http_major;
475
+ break;
476
+
477
+ case s_res_first_http_major:
478
+ if (ch < '1' || ch > '9') goto error;
479
+ parser->http_major = ch - '0';
480
+ state = s_res_http_major;
481
+ break;
482
+
483
+ /* major HTTP version or dot */
484
+ case s_res_http_major:
485
+ {
486
+ if (ch == '.') {
487
+ state = s_res_first_http_minor;
488
+ break;
489
+ }
490
+
491
+ if (!IS_NUM(ch)) goto error;
492
+
493
+ parser->http_major *= 10;
494
+ parser->http_major += ch - '0';
495
+
496
+ if (parser->http_major > 999) goto error;
497
+ break;
498
+ }
499
+
500
+ /* first digit of minor HTTP version */
501
+ case s_res_first_http_minor:
502
+ if (!IS_NUM(ch)) goto error;
503
+ parser->http_minor = ch - '0';
504
+ state = s_res_http_minor;
505
+ break;
506
+
507
+ /* minor HTTP version or end of request line */
508
+ case s_res_http_minor:
509
+ {
510
+ if (ch == ' ') {
511
+ state = s_res_first_status_code;
512
+ break;
513
+ }
514
+
515
+ if (!IS_NUM(ch)) goto error;
516
+
517
+ parser->http_minor *= 10;
518
+ parser->http_minor += ch - '0';
519
+
520
+ if (parser->http_minor > 999) goto error;
521
+ break;
522
+ }
523
+
524
+ case s_res_first_status_code:
525
+ {
526
+ if (!IS_NUM(ch)) {
527
+ if (ch == ' ') {
528
+ break;
529
+ }
530
+ goto error;
531
+ }
532
+ parser->status_code = ch - '0';
533
+ state = s_res_status_code;
534
+ break;
535
+ }
536
+
537
+ case s_res_status_code:
538
+ {
539
+ if (!IS_NUM(ch)) {
540
+ switch (ch) {
541
+ case ' ':
542
+ state = s_res_status;
543
+ break;
544
+ case CR:
545
+ state = s_res_line_almost_done;
546
+ break;
547
+ case LF:
548
+ state = s_header_field_start;
549
+ break;
550
+ default:
551
+ goto error;
552
+ }
553
+ break;
554
+ }
555
+
556
+ parser->status_code *= 10;
557
+ parser->status_code += ch - '0';
558
+
559
+ if (parser->status_code > 999) goto error;
560
+ break;
561
+ }
562
+
563
+ case s_res_status:
564
+ /* the human readable status. e.g. "NOT FOUND"
565
+ * we are not humans so just ignore this */
566
+ if (ch == CR) {
567
+ state = s_res_line_almost_done;
568
+ break;
569
+ }
570
+
571
+ if (ch == LF) {
572
+ state = s_header_field_start;
573
+ break;
574
+ }
575
+ break;
576
+
577
+ case s_res_line_almost_done:
578
+ STRICT_CHECK(ch != LF);
579
+ state = s_header_field_start;
580
+ break;
581
+
582
+ case s_start_req:
583
+ {
584
+ if (ch == CR || ch == LF)
585
+ break;
586
+ parser->flags = 0;
587
+ parser->content_length = -1;
588
+
589
+ CALLBACK2(message_begin);
590
+
591
+ if (!IS_ALPHA(LOWER(ch))) goto error;
592
+
593
+ start_req_method_assign:
594
+ parser->method = (enum http_method) 0;
595
+ index = 1;
596
+ switch (ch) {
597
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
598
+ case 'D': parser->method = HTTP_DELETE; break;
599
+ case 'G': parser->method = HTTP_GET; break;
600
+ case 'H': parser->method = HTTP_HEAD; break;
601
+ case 'L': parser->method = HTTP_LOCK; break;
602
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
603
+ case 'N': parser->method = HTTP_NOTIFY; break;
604
+ case 'O': parser->method = HTTP_OPTIONS; break;
605
+ case 'P': parser->method = HTTP_POST;
606
+ /* or PROPFIND or PROPPATCH or PUT or PATCH */
607
+ break;
608
+ case 'R': parser->method = HTTP_REPORT; break;
609
+ case 'S': parser->method = HTTP_SUBSCRIBE; break;
610
+ case 'T': parser->method = HTTP_TRACE; break;
611
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
612
+ default: goto error;
613
+ }
614
+ state = s_req_method;
615
+ break;
616
+ }
617
+
618
+ case s_req_method:
619
+ {
620
+ if (ch == '\0')
621
+ goto error;
622
+
623
+ const char *matcher = method_strings[parser->method];
624
+ if (ch == ' ' && matcher[index] == '\0') {
625
+ state = s_req_spaces_before_url;
626
+ } else if (ch == matcher[index]) {
627
+ ; /* nada */
628
+ } else if (parser->method == HTTP_CONNECT) {
629
+ if (index == 1 && ch == 'H') {
630
+ parser->method = HTTP_CHECKOUT;
631
+ } else if (index == 2 && ch == 'P') {
632
+ parser->method = HTTP_COPY;
633
+ }
634
+ } else if (parser->method == HTTP_MKCOL) {
635
+ if (index == 1 && ch == 'O') {
636
+ parser->method = HTTP_MOVE;
637
+ } else if (index == 1 && ch == 'E') {
638
+ parser->method = HTTP_MERGE;
639
+ } else if (index == 1 && ch == '-') {
640
+ parser->method = HTTP_MSEARCH;
641
+ } else if (index == 2 && ch == 'A') {
642
+ parser->method = HTTP_MKACTIVITY;
643
+ }
644
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
645
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
646
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
647
+ parser->method = HTTP_PUT;
648
+ } else if (index == 1 && parser->method == HTTP_POST && ch == 'A') {
649
+ parser->method = HTTP_PATCH;
650
+ } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
651
+ parser->method = HTTP_UNSUBSCRIBE;
652
+ } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
653
+ parser->method = HTTP_PROPPATCH;
654
+ } else {
655
+ goto error;
656
+ }
657
+
658
+ ++index;
659
+ break;
660
+ }
661
+ case s_req_spaces_before_url:
662
+ {
663
+ if (ch == ' ') break;
664
+
665
+ if (ch == '/' || ch == '*') {
666
+ MARK(url);
667
+ MARK(path);
668
+ state = s_req_path;
669
+ break;
670
+ }
671
+
672
+ c = LOWER(ch);
673
+
674
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
675
+ * CONNECT is followed by a hostname, which begins with alphanum.
676
+ * All other methods are followed by '/' or '*' (handled above).
677
+ */
678
+ if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) {
679
+ MARK(url);
680
+ state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema;
681
+ break;
682
+ }
683
+
684
+ goto error;
685
+ }
686
+
687
+ case s_req_schema:
688
+ {
689
+ c = LOWER(ch);
690
+
691
+ if (IS_ALPHA(c)) break;
692
+
693
+ if (ch == ':') {
694
+ state = s_req_schema_slash;
695
+ break;
696
+ }
697
+
698
+ goto error;
699
+ }
700
+
701
+ case s_req_schema_slash:
702
+ STRICT_CHECK(ch != '/');
703
+ state = s_req_schema_slash_slash;
704
+ break;
705
+
706
+ case s_req_schema_slash_slash:
707
+ STRICT_CHECK(ch != '/');
708
+ state = s_req_host;
709
+ break;
710
+
711
+ case s_req_host:
712
+ {
713
+ c = LOWER(ch);
714
+ if (IS_HOST_CHAR(ch)) break;
715
+ switch (ch) {
716
+ case ':':
717
+ state = s_req_port;
718
+ break;
719
+ case '/':
720
+ MARK(path);
721
+ state = s_req_path;
722
+ break;
723
+ case ' ':
724
+ /* The request line looks like:
725
+ * "GET http://foo.bar.com HTTP/1.1"
726
+ * That is, there is no path.
727
+ */
728
+ CALLBACK(url);
729
+ state = s_req_http_start;
730
+ break;
731
+ case '?':
732
+ state = s_req_query_string_start;
733
+ break;
734
+ default:
735
+ goto error;
736
+ }
737
+ break;
738
+ }
739
+
740
+ case s_req_port:
741
+ {
742
+ if (IS_NUM(ch)) break;
743
+ switch (ch) {
744
+ case '/':
745
+ MARK(path);
746
+ state = s_req_path;
747
+ break;
748
+ case ' ':
749
+ /* The request line looks like:
750
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
751
+ * That is, there is no path.
752
+ */
753
+ CALLBACK(url);
754
+ state = s_req_http_start;
755
+ break;
756
+ case '?':
757
+ state = s_req_query_string_start;
758
+ break;
759
+ default:
760
+ goto error;
761
+ }
762
+ break;
763
+ }
764
+
765
+ case s_req_path:
766
+ {
767
+ if (IS_URL_CHAR(ch)) break;
768
+
769
+ switch (ch) {
770
+ case ' ':
771
+ CALLBACK(url);
772
+ CALLBACK(path);
773
+ state = s_req_http_start;
774
+ break;
775
+ case CR:
776
+ CALLBACK(url);
777
+ CALLBACK(path);
778
+ parser->http_major = 0;
779
+ parser->http_minor = 9;
780
+ state = s_req_line_almost_done;
781
+ break;
782
+ case LF:
783
+ CALLBACK(url);
784
+ CALLBACK(path);
785
+ parser->http_major = 0;
786
+ parser->http_minor = 9;
787
+ state = s_header_field_start;
788
+ break;
789
+ case '?':
790
+ CALLBACK(path);
791
+ state = s_req_query_string_start;
792
+ break;
793
+ case '#':
794
+ CALLBACK(path);
795
+ state = s_req_fragment_start;
796
+ break;
797
+ default:
798
+ goto error;
799
+ }
800
+ break;
801
+ }
802
+
803
+ case s_req_query_string_start:
804
+ {
805
+ if (IS_URL_CHAR(ch)) {
806
+ MARK(query_string);
807
+ state = s_req_query_string;
808
+ break;
809
+ }
810
+
811
+ switch (ch) {
812
+ case '?':
813
+ break; /* XXX ignore extra '?' ... is this right? */
814
+ case ' ':
815
+ CALLBACK(url);
816
+ state = s_req_http_start;
817
+ break;
818
+ case CR:
819
+ CALLBACK(url);
820
+ parser->http_major = 0;
821
+ parser->http_minor = 9;
822
+ state = s_req_line_almost_done;
823
+ break;
824
+ case LF:
825
+ CALLBACK(url);
826
+ parser->http_major = 0;
827
+ parser->http_minor = 9;
828
+ state = s_header_field_start;
829
+ break;
830
+ case '#':
831
+ state = s_req_fragment_start;
832
+ break;
833
+ default:
834
+ goto error;
835
+ }
836
+ break;
837
+ }
838
+
839
+ case s_req_query_string:
840
+ {
841
+ if (IS_URL_CHAR(ch)) break;
842
+
843
+ switch (ch) {
844
+ case '?':
845
+ /* allow extra '?' in query string */
846
+ break;
847
+ case ' ':
848
+ CALLBACK(url);
849
+ CALLBACK(query_string);
850
+ state = s_req_http_start;
851
+ break;
852
+ case CR:
853
+ CALLBACK(url);
854
+ CALLBACK(query_string);
855
+ parser->http_major = 0;
856
+ parser->http_minor = 9;
857
+ state = s_req_line_almost_done;
858
+ break;
859
+ case LF:
860
+ CALLBACK(url);
861
+ CALLBACK(query_string);
862
+ parser->http_major = 0;
863
+ parser->http_minor = 9;
864
+ state = s_header_field_start;
865
+ break;
866
+ case '#':
867
+ CALLBACK(query_string);
868
+ state = s_req_fragment_start;
869
+ break;
870
+ default:
871
+ goto error;
872
+ }
873
+ break;
874
+ }
875
+
876
+ case s_req_fragment_start:
877
+ {
878
+ if (IS_URL_CHAR(ch)) {
879
+ MARK(fragment);
880
+ state = s_req_fragment;
881
+ break;
882
+ }
883
+
884
+ switch (ch) {
885
+ case ' ':
886
+ CALLBACK(url);
887
+ state = s_req_http_start;
888
+ break;
889
+ case CR:
890
+ CALLBACK(url);
891
+ parser->http_major = 0;
892
+ parser->http_minor = 9;
893
+ state = s_req_line_almost_done;
894
+ break;
895
+ case LF:
896
+ CALLBACK(url);
897
+ parser->http_major = 0;
898
+ parser->http_minor = 9;
899
+ state = s_header_field_start;
900
+ break;
901
+ case '?':
902
+ MARK(fragment);
903
+ state = s_req_fragment;
904
+ break;
905
+ case '#':
906
+ break;
907
+ default:
908
+ goto error;
909
+ }
910
+ break;
911
+ }
912
+
913
+ case s_req_fragment:
914
+ {
915
+ if (IS_URL_CHAR(ch)) break;
916
+
917
+ switch (ch) {
918
+ case ' ':
919
+ CALLBACK(url);
920
+ CALLBACK(fragment);
921
+ state = s_req_http_start;
922
+ break;
923
+ case CR:
924
+ CALLBACK(url);
925
+ CALLBACK(fragment);
926
+ parser->http_major = 0;
927
+ parser->http_minor = 9;
928
+ state = s_req_line_almost_done;
929
+ break;
930
+ case LF:
931
+ CALLBACK(url);
932
+ CALLBACK(fragment);
933
+ parser->http_major = 0;
934
+ parser->http_minor = 9;
935
+ state = s_header_field_start;
936
+ break;
937
+ case '?':
938
+ case '#':
939
+ break;
940
+ default:
941
+ goto error;
942
+ }
943
+ break;
944
+ }
945
+
946
+ case s_req_http_start:
947
+ switch (ch) {
948
+ case 'H':
949
+ state = s_req_http_H;
950
+ break;
951
+ case ' ':
952
+ break;
953
+ default:
954
+ goto error;
955
+ }
956
+ break;
957
+
958
+ case s_req_http_H:
959
+ STRICT_CHECK(ch != 'T');
960
+ state = s_req_http_HT;
961
+ break;
962
+
963
+ case s_req_http_HT:
964
+ STRICT_CHECK(ch != 'T');
965
+ state = s_req_http_HTT;
966
+ break;
967
+
968
+ case s_req_http_HTT:
969
+ STRICT_CHECK(ch != 'P');
970
+ state = s_req_http_HTTP;
971
+ break;
972
+
973
+ case s_req_http_HTTP:
974
+ STRICT_CHECK(ch != '/');
975
+ state = s_req_first_http_major;
976
+ break;
977
+
978
+ /* first digit of major HTTP version */
979
+ case s_req_first_http_major:
980
+ if (ch < '1' || ch > '9') goto error;
981
+ parser->http_major = ch - '0';
982
+ state = s_req_http_major;
983
+ break;
984
+
985
+ /* major HTTP version or dot */
986
+ case s_req_http_major:
987
+ {
988
+ if (ch == '.') {
989
+ state = s_req_first_http_minor;
990
+ break;
991
+ }
992
+
993
+ if (!IS_NUM(ch)) goto error;
994
+
995
+ parser->http_major *= 10;
996
+ parser->http_major += ch - '0';
997
+
998
+ if (parser->http_major > 999) goto error;
999
+ break;
1000
+ }
1001
+
1002
+ /* first digit of minor HTTP version */
1003
+ case s_req_first_http_minor:
1004
+ if (!IS_NUM(ch)) goto error;
1005
+ parser->http_minor = ch - '0';
1006
+ state = s_req_http_minor;
1007
+ break;
1008
+
1009
+ /* minor HTTP version or end of request line */
1010
+ case s_req_http_minor:
1011
+ {
1012
+ if (ch == CR) {
1013
+ state = s_req_line_almost_done;
1014
+ break;
1015
+ }
1016
+
1017
+ if (ch == LF) {
1018
+ state = s_header_field_start;
1019
+ break;
1020
+ }
1021
+
1022
+ /* XXX allow spaces after digit? */
1023
+
1024
+ if (!IS_NUM(ch)) goto error;
1025
+
1026
+ parser->http_minor *= 10;
1027
+ parser->http_minor += ch - '0';
1028
+
1029
+ if (parser->http_minor > 999) goto error;
1030
+ break;
1031
+ }
1032
+
1033
+ /* end of request line */
1034
+ case s_req_line_almost_done:
1035
+ {
1036
+ if (ch != LF) goto error;
1037
+ state = s_header_field_start;
1038
+ break;
1039
+ }
1040
+
1041
+ case s_header_field_start:
1042
+ {
1043
+ if (ch == CR) {
1044
+ state = s_headers_almost_done;
1045
+ break;
1046
+ }
1047
+
1048
+ if (ch == LF) {
1049
+ /* they might be just sending \n instead of \r\n so this would be
1050
+ * the second \n to denote the end of headers*/
1051
+ state = s_headers_almost_done;
1052
+ goto headers_almost_done;
1053
+ }
1054
+
1055
+ c = TOKEN(ch);
1056
+
1057
+ if (!c) goto error;
1058
+
1059
+ MARK(header_field);
1060
+
1061
+ index = 0;
1062
+ state = s_header_field;
1063
+
1064
+ switch (c) {
1065
+ case 'c':
1066
+ header_state = h_C;
1067
+ break;
1068
+
1069
+ case 'p':
1070
+ header_state = h_matching_proxy_connection;
1071
+ break;
1072
+
1073
+ case 't':
1074
+ header_state = h_matching_transfer_encoding;
1075
+ break;
1076
+
1077
+ case 'u':
1078
+ header_state = h_matching_upgrade;
1079
+ break;
1080
+
1081
+ default:
1082
+ header_state = h_general;
1083
+ break;
1084
+ }
1085
+ break;
1086
+ }
1087
+
1088
+ case s_header_field:
1089
+ {
1090
+ c = TOKEN(ch);
1091
+
1092
+ if (c) {
1093
+ switch (header_state) {
1094
+ case h_general:
1095
+ break;
1096
+
1097
+ case h_C:
1098
+ index++;
1099
+ header_state = (c == 'o' ? h_CO : h_general);
1100
+ break;
1101
+
1102
+ case h_CO:
1103
+ index++;
1104
+ header_state = (c == 'n' ? h_CON : h_general);
1105
+ break;
1106
+
1107
+ case h_CON:
1108
+ index++;
1109
+ switch (c) {
1110
+ case 'n':
1111
+ header_state = h_matching_connection;
1112
+ break;
1113
+ case 't':
1114
+ header_state = h_matching_content_length;
1115
+ break;
1116
+ default:
1117
+ header_state = h_general;
1118
+ break;
1119
+ }
1120
+ break;
1121
+
1122
+ /* connection */
1123
+
1124
+ case h_matching_connection:
1125
+ index++;
1126
+ if (index > sizeof(CONNECTION)-1
1127
+ || c != CONNECTION[index]) {
1128
+ header_state = h_general;
1129
+ } else if (index == sizeof(CONNECTION)-2) {
1130
+ header_state = h_connection;
1131
+ }
1132
+ break;
1133
+
1134
+ /* proxy-connection */
1135
+
1136
+ case h_matching_proxy_connection:
1137
+ index++;
1138
+ if (index > sizeof(PROXY_CONNECTION)-1
1139
+ || c != PROXY_CONNECTION[index]) {
1140
+ header_state = h_general;
1141
+ } else if (index == sizeof(PROXY_CONNECTION)-2) {
1142
+ header_state = h_connection;
1143
+ }
1144
+ break;
1145
+
1146
+ /* content-length */
1147
+
1148
+ case h_matching_content_length:
1149
+ index++;
1150
+ if (index > sizeof(CONTENT_LENGTH)-1
1151
+ || c != CONTENT_LENGTH[index]) {
1152
+ header_state = h_general;
1153
+ } else if (index == sizeof(CONTENT_LENGTH)-2) {
1154
+ header_state = h_content_length;
1155
+ }
1156
+ break;
1157
+
1158
+ /* transfer-encoding */
1159
+
1160
+ case h_matching_transfer_encoding:
1161
+ index++;
1162
+ if (index > sizeof(TRANSFER_ENCODING)-1
1163
+ || c != TRANSFER_ENCODING[index]) {
1164
+ header_state = h_general;
1165
+ } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1166
+ header_state = h_transfer_encoding;
1167
+ }
1168
+ break;
1169
+
1170
+ /* upgrade */
1171
+
1172
+ case h_matching_upgrade:
1173
+ index++;
1174
+ if (index > sizeof(UPGRADE)-1
1175
+ || c != UPGRADE[index]) {
1176
+ header_state = h_general;
1177
+ } else if (index == sizeof(UPGRADE)-2) {
1178
+ header_state = h_upgrade;
1179
+ }
1180
+ break;
1181
+
1182
+ case h_connection:
1183
+ case h_content_length:
1184
+ case h_transfer_encoding:
1185
+ case h_upgrade:
1186
+ if (ch != ' ') header_state = h_general;
1187
+ break;
1188
+
1189
+ default:
1190
+ assert(0 && "Unknown header_state");
1191
+ break;
1192
+ }
1193
+ break;
1194
+ }
1195
+
1196
+ if (ch == ':') {
1197
+ CALLBACK(header_field);
1198
+ state = s_header_value_start;
1199
+ break;
1200
+ }
1201
+
1202
+ if (ch == CR) {
1203
+ state = s_header_almost_done;
1204
+ CALLBACK(header_field);
1205
+ break;
1206
+ }
1207
+
1208
+ if (ch == LF) {
1209
+ CALLBACK(header_field);
1210
+ state = s_header_field_start;
1211
+ break;
1212
+ }
1213
+
1214
+ goto error;
1215
+ }
1216
+
1217
+ case s_header_value_start:
1218
+ {
1219
+ if (ch == ' ') break;
1220
+
1221
+ MARK(header_value);
1222
+
1223
+ state = s_header_value;
1224
+ index = 0;
1225
+
1226
+ c = LOWER(ch);
1227
+
1228
+ if (ch == CR) {
1229
+ CALLBACK(header_value);
1230
+ header_state = h_general;
1231
+ state = s_header_almost_done;
1232
+ break;
1233
+ }
1234
+
1235
+ if (ch == LF) {
1236
+ CALLBACK(header_value);
1237
+ state = s_header_field_start;
1238
+ break;
1239
+ }
1240
+
1241
+ switch (header_state) {
1242
+ case h_upgrade:
1243
+ parser->flags |= F_UPGRADE;
1244
+ header_state = h_general;
1245
+ break;
1246
+
1247
+ case h_transfer_encoding:
1248
+ /* looking for 'Transfer-Encoding: chunked' */
1249
+ if ('c' == c) {
1250
+ header_state = h_matching_transfer_encoding_chunked;
1251
+ } else {
1252
+ header_state = h_general;
1253
+ }
1254
+ break;
1255
+
1256
+ case h_content_length:
1257
+ if (!IS_NUM(ch)) goto error;
1258
+ parser->content_length = ch - '0';
1259
+ break;
1260
+
1261
+ case h_connection:
1262
+ /* looking for 'Connection: keep-alive' */
1263
+ if (c == 'k') {
1264
+ header_state = h_matching_connection_keep_alive;
1265
+ /* looking for 'Connection: close' */
1266
+ } else if (c == 'c') {
1267
+ header_state = h_matching_connection_close;
1268
+ } else {
1269
+ header_state = h_general;
1270
+ }
1271
+ break;
1272
+
1273
+ default:
1274
+ header_state = h_general;
1275
+ break;
1276
+ }
1277
+ break;
1278
+ }
1279
+
1280
+ case s_header_value:
1281
+ {
1282
+ c = LOWER(ch);
1283
+
1284
+ if (ch == CR) {
1285
+ CALLBACK(header_value);
1286
+ state = s_header_almost_done;
1287
+ break;
1288
+ }
1289
+
1290
+ if (ch == LF) {
1291
+ CALLBACK(header_value);
1292
+ goto header_almost_done;
1293
+ }
1294
+
1295
+ switch (header_state) {
1296
+ case h_general:
1297
+ break;
1298
+
1299
+ case h_connection:
1300
+ case h_transfer_encoding:
1301
+ assert(0 && "Shouldn't get here.");
1302
+ break;
1303
+
1304
+ case h_content_length:
1305
+ if (ch == ' ') break;
1306
+ if (!IS_NUM(ch)) goto error;
1307
+ parser->content_length *= 10;
1308
+ parser->content_length += ch - '0';
1309
+ break;
1310
+
1311
+ /* Transfer-Encoding: chunked */
1312
+ case h_matching_transfer_encoding_chunked:
1313
+ index++;
1314
+ if (index > sizeof(CHUNKED)-1
1315
+ || c != CHUNKED[index]) {
1316
+ header_state = h_general;
1317
+ } else if (index == sizeof(CHUNKED)-2) {
1318
+ header_state = h_transfer_encoding_chunked;
1319
+ }
1320
+ break;
1321
+
1322
+ /* looking for 'Connection: keep-alive' */
1323
+ case h_matching_connection_keep_alive:
1324
+ index++;
1325
+ if (index > sizeof(KEEP_ALIVE)-1
1326
+ || c != KEEP_ALIVE[index]) {
1327
+ header_state = h_general;
1328
+ } else if (index == sizeof(KEEP_ALIVE)-2) {
1329
+ header_state = h_connection_keep_alive;
1330
+ }
1331
+ break;
1332
+
1333
+ /* looking for 'Connection: close' */
1334
+ case h_matching_connection_close:
1335
+ index++;
1336
+ if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1337
+ header_state = h_general;
1338
+ } else if (index == sizeof(CLOSE)-2) {
1339
+ header_state = h_connection_close;
1340
+ }
1341
+ break;
1342
+
1343
+ case h_transfer_encoding_chunked:
1344
+ case h_connection_keep_alive:
1345
+ case h_connection_close:
1346
+ if (ch != ' ') header_state = h_general;
1347
+ break;
1348
+
1349
+ default:
1350
+ state = s_header_value;
1351
+ header_state = h_general;
1352
+ break;
1353
+ }
1354
+ break;
1355
+ }
1356
+
1357
+ case s_header_almost_done:
1358
+ header_almost_done:
1359
+ {
1360
+ STRICT_CHECK(ch != LF);
1361
+
1362
+ state = s_header_field_start;
1363
+
1364
+ switch (header_state) {
1365
+ case h_connection_keep_alive:
1366
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1367
+ break;
1368
+ case h_connection_close:
1369
+ parser->flags |= F_CONNECTION_CLOSE;
1370
+ break;
1371
+ case h_transfer_encoding_chunked:
1372
+ parser->flags |= F_CHUNKED;
1373
+ break;
1374
+ default:
1375
+ break;
1376
+ }
1377
+ break;
1378
+ }
1379
+
1380
+ case s_headers_almost_done:
1381
+ headers_almost_done:
1382
+ {
1383
+ STRICT_CHECK(ch != LF);
1384
+
1385
+ if (parser->flags & F_TRAILING) {
1386
+ /* End of a chunked request */
1387
+ CALLBACK2(message_complete);
1388
+ state = NEW_MESSAGE();
1389
+ break;
1390
+ }
1391
+
1392
+ nread = 0;
1393
+
1394
+ if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1395
+ parser->upgrade = 1;
1396
+ }
1397
+
1398
+ /* Here we call the headers_complete callback. This is somewhat
1399
+ * different than other callbacks because if the user returns 1, we
1400
+ * will interpret that as saying that this message has no body. This
1401
+ * is needed for the annoying case of recieving a response to a HEAD
1402
+ * request.
1403
+ */
1404
+ if (settings->on_headers_complete) {
1405
+ switch (settings->on_headers_complete(parser)) {
1406
+ case 0:
1407
+ break;
1408
+
1409
+ case 1:
1410
+ parser->flags |= F_SKIPBODY;
1411
+ break;
1412
+
1413
+ default:
1414
+ parser->state = state;
1415
+ return p - data; /* Error */
1416
+ }
1417
+ }
1418
+
1419
+ /* Exit, the rest of the connect is in a different protocol. */
1420
+ if (parser->upgrade) {
1421
+ CALLBACK2(message_complete);
1422
+ return (p - data);
1423
+ }
1424
+
1425
+ if (parser->flags & F_SKIPBODY) {
1426
+ CALLBACK2(message_complete);
1427
+ state = NEW_MESSAGE();
1428
+ } else if (parser->flags & F_CHUNKED) {
1429
+ /* chunked encoding - ignore Content-Length header */
1430
+ state = s_chunk_size_start;
1431
+ } else {
1432
+ if (parser->content_length == 0) {
1433
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1434
+ CALLBACK2(message_complete);
1435
+ state = NEW_MESSAGE();
1436
+ } else if (parser->content_length > 0) {
1437
+ /* Content-Length header given and non-zero */
1438
+ state = s_body_identity;
1439
+ } else {
1440
+ if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1441
+ /* Assume content-length 0 - read the next */
1442
+ CALLBACK2(message_complete);
1443
+ state = NEW_MESSAGE();
1444
+ } else {
1445
+ /* Read body until EOF */
1446
+ state = s_body_identity_eof;
1447
+ }
1448
+ }
1449
+ }
1450
+
1451
+ break;
1452
+ }
1453
+
1454
+ case s_body_identity:
1455
+ to_read = MIN(pe - p, (int64_t)parser->content_length);
1456
+ if (to_read > 0) {
1457
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1458
+ p += to_read - 1;
1459
+ parser->content_length -= to_read;
1460
+ if (parser->content_length == 0) {
1461
+ CALLBACK2(message_complete);
1462
+ state = NEW_MESSAGE();
1463
+ }
1464
+ }
1465
+ break;
1466
+
1467
+ /* read until EOF */
1468
+ case s_body_identity_eof:
1469
+ to_read = pe - p;
1470
+ if (to_read > 0) {
1471
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1472
+ p += to_read - 1;
1473
+ }
1474
+ break;
1475
+
1476
+ case s_chunk_size_start:
1477
+ {
1478
+ assert(nread == 1);
1479
+ assert(parser->flags & F_CHUNKED);
1480
+
1481
+ c = unhex[(unsigned char)ch];
1482
+ if (c == -1) goto error;
1483
+ parser->content_length = c;
1484
+ state = s_chunk_size;
1485
+ break;
1486
+ }
1487
+
1488
+ case s_chunk_size:
1489
+ {
1490
+ assert(parser->flags & F_CHUNKED);
1491
+
1492
+ if (ch == CR) {
1493
+ state = s_chunk_size_almost_done;
1494
+ break;
1495
+ }
1496
+
1497
+ c = unhex[(unsigned char)ch];
1498
+
1499
+ if (c == -1) {
1500
+ if (ch == ';' || ch == ' ') {
1501
+ state = s_chunk_parameters;
1502
+ break;
1503
+ }
1504
+ goto error;
1505
+ }
1506
+
1507
+ parser->content_length *= 16;
1508
+ parser->content_length += c;
1509
+ break;
1510
+ }
1511
+
1512
+ case s_chunk_parameters:
1513
+ {
1514
+ assert(parser->flags & F_CHUNKED);
1515
+ /* just ignore this shit. TODO check for overflow */
1516
+ if (ch == CR) {
1517
+ state = s_chunk_size_almost_done;
1518
+ break;
1519
+ }
1520
+ break;
1521
+ }
1522
+
1523
+ case s_chunk_size_almost_done:
1524
+ {
1525
+ assert(parser->flags & F_CHUNKED);
1526
+ STRICT_CHECK(ch != LF);
1527
+
1528
+ nread = 0;
1529
+
1530
+ if (parser->content_length == 0) {
1531
+ parser->flags |= F_TRAILING;
1532
+ state = s_header_field_start;
1533
+ } else {
1534
+ state = s_chunk_data;
1535
+ }
1536
+ break;
1537
+ }
1538
+
1539
+ case s_chunk_data:
1540
+ {
1541
+ assert(parser->flags & F_CHUNKED);
1542
+
1543
+ to_read = MIN(pe - p, (int64_t)(parser->content_length));
1544
+
1545
+ if (to_read > 0) {
1546
+ if (settings->on_body) settings->on_body(parser, p, to_read);
1547
+ p += to_read - 1;
1548
+ }
1549
+
1550
+ if (to_read == parser->content_length) {
1551
+ state = s_chunk_data_almost_done;
1552
+ }
1553
+
1554
+ parser->content_length -= to_read;
1555
+ break;
1556
+ }
1557
+
1558
+ case s_chunk_data_almost_done:
1559
+ assert(parser->flags & F_CHUNKED);
1560
+ STRICT_CHECK(ch != CR);
1561
+ state = s_chunk_data_done;
1562
+ break;
1563
+
1564
+ case s_chunk_data_done:
1565
+ assert(parser->flags & F_CHUNKED);
1566
+ STRICT_CHECK(ch != LF);
1567
+ state = s_chunk_size_start;
1568
+ break;
1569
+
1570
+ default:
1571
+ assert(0 && "unhandled state");
1572
+ goto error;
1573
+ }
1574
+ }
1575
+
1576
+ CALLBACK_NOCLEAR(header_field);
1577
+ CALLBACK_NOCLEAR(header_value);
1578
+ CALLBACK_NOCLEAR(fragment);
1579
+ CALLBACK_NOCLEAR(query_string);
1580
+ CALLBACK_NOCLEAR(path);
1581
+ CALLBACK_NOCLEAR(url);
1582
+
1583
+ parser->state = state;
1584
+ parser->header_state = header_state;
1585
+ parser->index = index;
1586
+ parser->nread = nread;
1587
+
1588
+ return len;
1589
+
1590
+ error:
1591
+ parser->state = s_dead;
1592
+ return (p - data);
1593
+ }
1594
+
1595
+
1596
+ int
1597
+ http_should_keep_alive (http_parser *parser)
1598
+ {
1599
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1600
+ /* HTTP/1.1 */
1601
+ if (parser->flags & F_CONNECTION_CLOSE) {
1602
+ return 0;
1603
+ } else {
1604
+ return 1;
1605
+ }
1606
+ } else {
1607
+ /* HTTP/1.0 or earlier */
1608
+ if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1609
+ return 1;
1610
+ } else {
1611
+ return 0;
1612
+ }
1613
+ }
1614
+ }
1615
+
1616
+
1617
+ const char * http_method_str (enum http_method m)
1618
+ {
1619
+ return method_strings[m];
1620
+ }
1621
+
1622
+
1623
+ void
1624
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1625
+ {
1626
+ parser->type = t;
1627
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1628
+ parser->nread = 0;
1629
+ parser->upgrade = 0;
1630
+ parser->flags = 0;
1631
+ parser->method = 0;
1632
+ }