yakischloba-http-parser 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,502 @@
1
+ /* Copyright (c) 2008, 2009 Ryan Dahl (ry@tinyclouds.org)
2
+ * Based on Zed Shaw's Mongrel, copyright (c) Zed A. Shaw
3
+ *
4
+ * All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining
7
+ * a copy of this software and associated documentation files (the
8
+ * "Software"), to deal in the Software without restriction, including
9
+ * without limitation the rights to use, copy, modify, merge, publish,
10
+ * distribute, sublicense, and/or sell copies of the Software, and to
11
+ * permit persons to whom the Software is furnished to do so, subject to
12
+ * the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be
15
+ * included in all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
+ */
25
+ #include "http_parser.h"
26
+ #include <limits.h>
27
+ #include <assert.h>
28
+
29
+ static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
30
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
31
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
32
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
33
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
34
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
35
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
36
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
37
+ };
38
+ #define TRUE 1
39
+ #define FALSE 0
40
+ #define MIN(a,b) (a < b ? a : b)
41
+ #define NULL (void*)(0)
42
+
43
+ #define MAX_FIELD_SIZE 80*1024
44
+
45
+ #define REMAINING (unsigned long)(pe - p)
46
+ #define CALLBACK(FOR) \
47
+ do { \
48
+ if (parser->FOR##_mark) { \
49
+ parser->FOR##_size += p - parser->FOR##_mark; \
50
+ if (parser->FOR##_size > MAX_FIELD_SIZE) { \
51
+ parser->error = TRUE; \
52
+ return 0; \
53
+ } \
54
+ if (parser->on_##FOR) { \
55
+ callback_return_value = parser->on_##FOR(parser, \
56
+ parser->FOR##_mark, \
57
+ p - parser->FOR##_mark); \
58
+ } \
59
+ } \
60
+ } while(0)
61
+
62
+ #define RESET_PARSER(parser) \
63
+ parser->chunk_size = 0; \
64
+ parser->eating = 0; \
65
+ parser->header_field_mark = NULL; \
66
+ parser->header_value_mark = NULL; \
67
+ parser->query_string_mark = NULL; \
68
+ parser->path_mark = NULL; \
69
+ parser->uri_mark = NULL; \
70
+ parser->fragment_mark = NULL; \
71
+ parser->status_code = 0; \
72
+ parser->method = 0; \
73
+ parser->transfer_encoding = HTTP_IDENTITY; \
74
+ parser->version_major = 0; \
75
+ parser->version_minor = 0; \
76
+ parser->keep_alive = -1; \
77
+ parser->content_length = 0; \
78
+ parser->body_read = 0;
79
+
80
+ #define END_REQUEST \
81
+ do { \
82
+ if (parser->on_message_complete) { \
83
+ callback_return_value = \
84
+ parser->on_message_complete(parser); \
85
+ } \
86
+ RESET_PARSER(parser); \
87
+ } while (0)
88
+
89
+ #define SKIP_BODY(nskip) \
90
+ do { \
91
+ tmp = (nskip); \
92
+ if (parser->on_body && tmp > 0) { \
93
+ callback_return_value = parser->on_body(parser, p, tmp); \
94
+ } \
95
+ if (callback_return_value == 0) { \
96
+ p += tmp; \
97
+ parser->body_read += tmp; \
98
+ parser->chunk_size -= tmp; \
99
+ if (0 == parser->chunk_size) { \
100
+ parser->eating = FALSE; \
101
+ if (parser->transfer_encoding == HTTP_IDENTITY) { \
102
+ END_REQUEST; \
103
+ } \
104
+ } else { \
105
+ parser->eating = TRUE; \
106
+ } \
107
+ } \
108
+ } while (0)
109
+
110
+ %%{
111
+ machine http_parser;
112
+
113
+ action mark_header_field {
114
+ parser->header_field_mark = p;
115
+ parser->header_field_size = 0;
116
+ }
117
+
118
+ action mark_header_value {
119
+ parser->header_value_mark = p;
120
+ parser->header_value_size = 0;
121
+ }
122
+
123
+ action mark_fragment {
124
+ parser->fragment_mark = p;
125
+ parser->fragment_size = 0;
126
+ }
127
+
128
+ action mark_query_string {
129
+ parser->query_string_mark = p;
130
+ parser->query_string_size = 0;
131
+ }
132
+
133
+ action mark_request_path {
134
+ parser->path_mark = p;
135
+ parser->path_size = 0;
136
+ }
137
+
138
+ action mark_request_uri {
139
+ parser->uri_mark = p;
140
+ parser->uri_size = 0;
141
+ }
142
+
143
+ action header_field {
144
+ CALLBACK(header_field);
145
+ if (callback_return_value != 0) {
146
+ parser->error = TRUE;
147
+ return 0;
148
+ }
149
+ parser->header_field_mark = NULL;
150
+ parser->header_field_size = 0;
151
+ }
152
+
153
+ action header_value {
154
+ CALLBACK(header_value);
155
+ if (callback_return_value != 0) {
156
+ parser->error = TRUE;
157
+ return 0;
158
+ }
159
+ parser->header_value_mark = NULL;
160
+ parser->header_value_size = 0;
161
+ }
162
+
163
+ action request_uri {
164
+ CALLBACK(uri);
165
+ if (callback_return_value != 0) {
166
+ parser->error = TRUE;
167
+ return 0;
168
+ }
169
+ parser->uri_mark = NULL;
170
+ parser->uri_size = 0;
171
+ }
172
+
173
+ action fragment {
174
+ CALLBACK(fragment);
175
+ if (callback_return_value != 0) {
176
+ parser->error = TRUE;
177
+ return 0;
178
+ }
179
+ parser->fragment_mark = NULL;
180
+ parser->fragment_size = 0;
181
+ }
182
+
183
+ action query_string {
184
+ CALLBACK(query_string);
185
+ if (callback_return_value != 0) {
186
+ parser->error = TRUE;
187
+ return 0;
188
+ }
189
+ parser->query_string_mark = NULL;
190
+ parser->query_string_size = 0;
191
+ }
192
+
193
+ action request_path {
194
+ CALLBACK(path);
195
+ if (callback_return_value != 0) {
196
+ parser->error = TRUE;
197
+ return 0;
198
+ }
199
+ parser->path_mark = NULL;
200
+ parser->path_size = 0;
201
+ }
202
+
203
+ action headers_complete {
204
+ if(parser->on_headers_complete) {
205
+ callback_return_value = parser->on_headers_complete(parser);
206
+ if (callback_return_value != 0) {
207
+ parser->error = TRUE;
208
+ return 0;
209
+ }
210
+ }
211
+ }
212
+
213
+ action begin_message {
214
+ if(parser->on_message_begin) {
215
+ callback_return_value = parser->on_message_begin(parser);
216
+ if (callback_return_value != 0) {
217
+ parser->error = TRUE;
218
+ return 0;
219
+ }
220
+ }
221
+ }
222
+
223
+ action content_length {
224
+ if (parser->content_length > INT_MAX) {
225
+ parser->error = TRUE;
226
+ return 0;
227
+ }
228
+ parser->content_length *= 10;
229
+ parser->content_length += *p - '0';
230
+ }
231
+
232
+ action status_code {
233
+ parser->status_code *= 10;
234
+ parser->status_code += *p - '0';
235
+ }
236
+
237
+ action use_identity_encoding { parser->transfer_encoding = HTTP_IDENTITY; }
238
+ action use_chunked_encoding { parser->transfer_encoding = HTTP_CHUNKED; }
239
+
240
+ action set_keep_alive { parser->keep_alive = TRUE; }
241
+ action set_not_keep_alive { parser->keep_alive = FALSE; }
242
+
243
+ action version_major {
244
+ parser->version_major *= 10;
245
+ parser->version_major += *p - '0';
246
+ }
247
+
248
+ action version_minor {
249
+ parser->version_minor *= 10;
250
+ parser->version_minor += *p - '0';
251
+ }
252
+
253
+ action add_to_chunk_size {
254
+ parser->chunk_size *= 16;
255
+ parser->chunk_size += unhex[(int)*p];
256
+ }
257
+
258
+ action skip_chunk_data {
259
+ SKIP_BODY(MIN(parser->chunk_size, REMAINING));
260
+ if (callback_return_value != 0) {
261
+ parser->error = TRUE;
262
+ return 0;
263
+ }
264
+
265
+ fhold;
266
+ if (parser->chunk_size > REMAINING) {
267
+ fbreak;
268
+ } else {
269
+ fgoto chunk_end;
270
+ }
271
+ }
272
+
273
+ action end_chunked_body {
274
+ END_REQUEST;
275
+ if (parser->type == HTTP_REQUEST) {
276
+ fnext Requests;
277
+ } else {
278
+ fnext Responses;
279
+ }
280
+ }
281
+
282
+ action body_logic {
283
+ if (parser->transfer_encoding == HTTP_CHUNKED) {
284
+ fnext ChunkedBody;
285
+ } else {
286
+ /* this is pretty stupid. i'd prefer to combine this with skip_chunk_data */
287
+ parser->chunk_size = parser->content_length;
288
+ p += 1;
289
+
290
+ SKIP_BODY(MIN(REMAINING, parser->content_length));
291
+
292
+ if (callback_return_value != 0) {
293
+ parser->error = TRUE;
294
+ return 0;
295
+ }
296
+
297
+ fhold;
298
+ if(parser->chunk_size > REMAINING) {
299
+ fbreak;
300
+ }
301
+ }
302
+ }
303
+
304
+ CRLF = "\r\n";
305
+
306
+ # character types
307
+ CTL = (cntrl | 127);
308
+ safe = ("$" | "-" | "_" | ".");
309
+ extra = ("!" | "*" | "'" | "(" | ")" | ",");
310
+ reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
311
+ unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
312
+ national = any -- (alpha | digit | reserved | extra | safe | unsafe);
313
+ unreserved = (alpha | digit | safe | extra | national);
314
+ escape = ("%" xdigit xdigit);
315
+ uchar = (unreserved | escape);
316
+ pchar = (uchar | ":" | "@" | "&" | "=" | "+");
317
+ tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\""
318
+ | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
319
+
320
+ # elements
321
+ token = (ascii -- (CTL | tspecials));
322
+ quote = "\"";
323
+ # qdtext = token -- "\"";
324
+ # quoted_pair = "\" ascii;
325
+ # quoted_string = "\"" (qdtext | quoted_pair )* "\"";
326
+
327
+ # headers
328
+
329
+ Method = ( "COPY" %{ parser->method = HTTP_COPY; }
330
+ | "DELETE" %{ parser->method = HTTP_DELETE; }
331
+ | "GET" %{ parser->method = HTTP_GET; }
332
+ | "HEAD" %{ parser->method = HTTP_HEAD; }
333
+ | "LOCK" %{ parser->method = HTTP_LOCK; }
334
+ | "MKCOL" %{ parser->method = HTTP_MKCOL; }
335
+ | "MOVE" %{ parser->method = HTTP_MOVE; }
336
+ | "OPTIONS" %{ parser->method = HTTP_OPTIONS; }
337
+ | "POST" %{ parser->method = HTTP_POST; }
338
+ | "PROPFIND" %{ parser->method = HTTP_PROPFIND; }
339
+ | "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; }
340
+ | "PUT" %{ parser->method = HTTP_PUT; }
341
+ | "TRACE" %{ parser->method = HTTP_TRACE; }
342
+ | "UNLOCK" %{ parser->method = HTTP_UNLOCK; }
343
+ ); # Not allowing extension methods
344
+
345
+ HTTP_Version = "HTTP/" digit $version_major "." digit $version_minor;
346
+
347
+ scheme = ( alpha | digit | "+" | "-" | "." )* ;
348
+ absolute_uri = (scheme ":" (uchar | reserved )*);
349
+ path = ( pchar+ ( "/" pchar* )* ) ;
350
+ query = ( uchar | reserved )* >mark_query_string %query_string ;
351
+ param = ( pchar | "/" )* ;
352
+ params = ( param ( ";" param )* ) ;
353
+ rel_path = ( path? (";" params)? ) ;
354
+ absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
355
+ Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
356
+ Fragment = ( uchar | reserved )* >mark_fragment %fragment;
357
+
358
+ field_name = ( token -- ":" )+;
359
+ Field_Name = field_name >mark_header_field %header_field;
360
+
361
+ field_value = ((any - " ") any*)?;
362
+ Field_Value = field_value >mark_header_value %header_value;
363
+
364
+ hsep = ":" " "*;
365
+ header = (field_name hsep field_value) :> CRLF;
366
+ Header = ( ("Content-Length"i hsep digit+ $content_length)
367
+ | ("Connection"i hsep
368
+ ( "Keep-Alive"i %set_keep_alive
369
+ | "close"i %set_not_keep_alive
370
+ )
371
+ )
372
+ | ("Transfer-Encoding"i %use_chunked_encoding hsep "identity" %use_identity_encoding)
373
+ | (Field_Name hsep Field_Value)
374
+ ) :> CRLF;
375
+
376
+ Headers = (Header)* :> CRLF @headers_complete;
377
+
378
+ Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
379
+
380
+ StatusCode = (digit digit digit) $status_code;
381
+ ReasonPhrase = ascii* -- ("\r" | "\n");
382
+ StatusLine = HTTP_Version " " StatusCode (" " ReasonPhrase)? CRLF;
383
+
384
+ # chunked message
385
+ trailing_headers = header*;
386
+ #chunk_ext_val = token | quoted_string;
387
+ chunk_ext_val = token*;
388
+ chunk_ext_name = token*;
389
+ chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
390
+ last_chunk = "0"+ chunk_extension CRLF;
391
+ chunk_size = (xdigit* [1-9a-fA-F] xdigit*) $add_to_chunk_size;
392
+ chunk_end = CRLF;
393
+ chunk_body = any >skip_chunk_data;
394
+ chunk_begin = chunk_size chunk_extension CRLF;
395
+ chunk = chunk_begin chunk_body chunk_end;
396
+ ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
397
+
398
+ Request = (Request_Line Headers) >begin_message @body_logic;
399
+ Response = (StatusLine Headers) >begin_message @body_logic;
400
+
401
+ Requests := Request*;
402
+ Responses := Response*;
403
+
404
+ main := any >{
405
+ fhold;
406
+ if (parser->type == HTTP_REQUEST) {
407
+ fgoto Requests;
408
+ } else {
409
+ fgoto Responses;
410
+ }
411
+ };
412
+
413
+ }%%
414
+
415
+ %% write data;
416
+
417
+ void
418
+ http_parser_init (http_parser *parser, enum http_parser_type type)
419
+ {
420
+ int cs = 0;
421
+ %% write init;
422
+ parser->cs = cs;
423
+ parser->type = type;
424
+ parser->error = 0;
425
+
426
+ parser->on_message_begin = NULL;
427
+ parser->on_path = NULL;
428
+ parser->on_query_string = NULL;
429
+ parser->on_uri = NULL;
430
+ parser->on_fragment = NULL;
431
+ parser->on_header_field = NULL;
432
+ parser->on_header_value = NULL;
433
+ parser->on_headers_complete = NULL;
434
+ parser->on_body = NULL;
435
+ parser->on_message_complete = NULL;
436
+
437
+ RESET_PARSER(parser);
438
+ }
439
+
440
+ /** exec **/
441
+ size_t
442
+ http_parser_execute (http_parser *parser, const char *buffer, size_t len)
443
+ {
444
+ size_t tmp; // REMOVE ME this is extremely hacky
445
+ int callback_return_value = 0;
446
+ const char *p, *pe;
447
+ int cs = parser->cs;
448
+
449
+ p = buffer;
450
+ pe = buffer+len;
451
+
452
+ if (0 < parser->chunk_size && parser->eating) {
453
+ /* eat body */
454
+ SKIP_BODY(MIN(len, parser->chunk_size));
455
+ if (callback_return_value != 0) {
456
+ parser->error = TRUE;
457
+ return 0;
458
+ }
459
+ }
460
+
461
+ if (parser->header_field_mark) parser->header_field_mark = buffer;
462
+ if (parser->header_value_mark) parser->header_value_mark = buffer;
463
+ if (parser->fragment_mark) parser->fragment_mark = buffer;
464
+ if (parser->query_string_mark) parser->query_string_mark = buffer;
465
+ if (parser->path_mark) parser->path_mark = buffer;
466
+ if (parser->uri_mark) parser->uri_mark = buffer;
467
+
468
+ %% write exec;
469
+
470
+ parser->cs = cs;
471
+
472
+ CALLBACK(header_field);
473
+ CALLBACK(header_value);
474
+ CALLBACK(fragment);
475
+ CALLBACK(query_string);
476
+ CALLBACK(path);
477
+ CALLBACK(uri);
478
+
479
+ assert(p <= pe && "buffer overflow after parsing execute");
480
+ return(p - buffer);
481
+ }
482
+
483
+ int
484
+ http_parser_has_error (http_parser *parser)
485
+ {
486
+ if (parser->error) return TRUE;
487
+ return parser->cs == http_parser_error;
488
+ }
489
+
490
+ int
491
+ http_parser_should_keep_alive (http_parser *parser)
492
+ {
493
+ if (parser->keep_alive == -1)
494
+ if (parser->version_major == 1)
495
+ return (parser->version_minor != 0);
496
+ else if (parser->version_major == 0)
497
+ return FALSE;
498
+ else
499
+ return TRUE;
500
+ else
501
+ return parser->keep_alive;
502
+ }