http-parser-lite 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,318 @@
1
+ /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to
5
+ * deal in the Software without restriction, including without limitation the
6
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ * sell copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ * IN THE SOFTWARE.
20
+ */
21
+ #ifndef http_parser_h
22
+ #define http_parser_h
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+
27
+ #define HTTP_PARSER_VERSION_MAJOR 1
28
+ #define HTTP_PARSER_VERSION_MINOR 0
29
+
30
+ #include <sys/types.h>
31
+ #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
32
+ typedef __int8 int8_t;
33
+ typedef unsigned __int8 uint8_t;
34
+ typedef __int16 int16_t;
35
+ typedef unsigned __int16 uint16_t;
36
+ typedef __int32 int32_t;
37
+ typedef unsigned __int32 uint32_t;
38
+ typedef __int64 int64_t;
39
+ typedef unsigned __int64 uint64_t;
40
+
41
+ typedef unsigned int size_t;
42
+ typedef int ssize_t;
43
+ #else
44
+ #include <stdint.h>
45
+ #endif
46
+
47
+ /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
48
+ * faster
49
+ */
50
+ #ifndef HTTP_PARSER_STRICT
51
+ # define HTTP_PARSER_STRICT 1
52
+ #endif
53
+
54
+ /* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to
55
+ * the error reporting facility.
56
+ */
57
+ #ifndef HTTP_PARSER_DEBUG
58
+ # define HTTP_PARSER_DEBUG 0
59
+ #endif
60
+
61
+
62
+ /* Maximium header size allowed */
63
+ #define HTTP_MAX_HEADER_SIZE (80*1024)
64
+
65
+
66
+ typedef struct http_parser http_parser;
67
+ typedef struct http_parser_settings http_parser_settings;
68
+
69
+
70
+ /* Callbacks should return non-zero to indicate an error. The parser will
71
+ * then halt execution.
72
+ *
73
+ * The one exception is on_headers_complete. In a HTTP_RESPONSE parser
74
+ * returning '1' from on_headers_complete will tell the parser that it
75
+ * should not expect a body. This is used when receiving a response to a
76
+ * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
77
+ * chunked' headers that indicate the presence of a body.
78
+ *
79
+ * http_data_cb does not return data chunks. It will be call arbitrarally
80
+ * many times for each string. E.G. you might get 10 callbacks for "on_path"
81
+ * each providing just a few characters more data.
82
+ */
83
+ typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
84
+ typedef int (*http_cb) (http_parser*);
85
+
86
+
87
+ /* Request Methods */
88
+ #define HTTP_METHOD_MAP(XX) \
89
+ XX(0, DELETE, DELETE) \
90
+ XX(1, GET, GET) \
91
+ XX(2, HEAD, HEAD) \
92
+ XX(3, POST, POST) \
93
+ XX(4, PUT, PUT) \
94
+ /* pathological */ \
95
+ XX(5, CONNECT, CONNECT) \
96
+ XX(6, OPTIONS, OPTIONS) \
97
+ XX(7, TRACE, TRACE) \
98
+ /* webdav */ \
99
+ XX(8, COPY, COPY) \
100
+ XX(9, LOCK, LOCK) \
101
+ XX(10, MKCOL, MKCOL) \
102
+ XX(11, MOVE, MOVE) \
103
+ XX(12, PROPFIND, PROPFIND) \
104
+ XX(13, PROPPATCH, PROPPATCH) \
105
+ XX(14, SEARCH, SEARCH) \
106
+ XX(15, UNLOCK, UNLOCK) \
107
+ /* subversion */ \
108
+ XX(16, REPORT, REPORT) \
109
+ XX(17, MKACTIVITY, MKACTIVITY) \
110
+ XX(18, CHECKOUT, CHECKOUT) \
111
+ XX(19, MERGE, MERGE) \
112
+ /* upnp */ \
113
+ XX(20, MSEARCH, M-SEARCH) \
114
+ XX(21, NOTIFY, NOTIFY) \
115
+ XX(22, SUBSCRIBE, SUBSCRIBE) \
116
+ XX(23, UNSUBSCRIBE, UNSUBSCRIBE) \
117
+ /* RFC-5789 */ \
118
+ XX(24, PATCH, PATCH) \
119
+ XX(25, PURGE, PURGE) \
120
+
121
+ enum http_method
122
+ {
123
+ #define XX(num, name, string) HTTP_##name = num,
124
+ HTTP_METHOD_MAP(XX)
125
+ #undef XX
126
+ };
127
+
128
+
129
+ enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
130
+
131
+
132
+ /* Flag values for http_parser.flags field */
133
+ enum flags
134
+ { F_CHUNKED = 1 << 0
135
+ , F_CONNECTION_KEEP_ALIVE = 1 << 1
136
+ , F_CONNECTION_CLOSE = 1 << 2
137
+ , F_TRAILING = 1 << 3
138
+ , F_UPGRADE = 1 << 4
139
+ , F_SKIPBODY = 1 << 5
140
+ };
141
+
142
+
143
+ /* Map for errno-related constants
144
+ *
145
+ * The provided argument should be a macro that takes 2 arguments.
146
+ */
147
+ #define HTTP_ERRNO_MAP(XX) \
148
+ /* No error */ \
149
+ XX(OK, "success") \
150
+ \
151
+ /* Callback-related errors */ \
152
+ XX(CB_message_begin, "the on_message_begin callback failed") \
153
+ XX(CB_url, "the on_url callback failed") \
154
+ XX(CB_header_field, "the on_header_field callback failed") \
155
+ XX(CB_header_value, "the on_header_value callback failed") \
156
+ XX(CB_headers_complete, "the on_headers_complete callback failed") \
157
+ XX(CB_body, "the on_body callback failed") \
158
+ XX(CB_message_complete, "the on_message_complete callback failed") \
159
+ \
160
+ /* Parsing-related errors */ \
161
+ XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \
162
+ XX(HEADER_OVERFLOW, \
163
+ "too many header bytes seen; overflow detected") \
164
+ XX(CLOSED_CONNECTION, \
165
+ "data received after completed connection: close message") \
166
+ XX(INVALID_VERSION, "invalid HTTP version") \
167
+ XX(INVALID_STATUS, "invalid HTTP status code") \
168
+ XX(INVALID_METHOD, "invalid HTTP method") \
169
+ XX(INVALID_URL, "invalid URL") \
170
+ XX(INVALID_HOST, "invalid host") \
171
+ XX(INVALID_PORT, "invalid port") \
172
+ XX(INVALID_PATH, "invalid path") \
173
+ XX(INVALID_QUERY_STRING, "invalid query string") \
174
+ XX(INVALID_FRAGMENT, "invalid fragment") \
175
+ XX(LF_EXPECTED, "LF character expected") \
176
+ XX(INVALID_HEADER_TOKEN, "invalid character in header") \
177
+ XX(INVALID_CONTENT_LENGTH, \
178
+ "invalid character in content-length header") \
179
+ XX(INVALID_CHUNK_SIZE, \
180
+ "invalid character in chunk size header") \
181
+ XX(INVALID_CONSTANT, "invalid constant string") \
182
+ XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
183
+ XX(STRICT, "strict mode assertion failed") \
184
+ XX(PAUSED, "parser is paused") \
185
+ XX(UNKNOWN, "an unknown error occurred")
186
+
187
+
188
+ /* Define HPE_* values for each errno value above */
189
+ #define HTTP_ERRNO_GEN(n, s) HPE_##n,
190
+ enum http_errno {
191
+ HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
192
+ };
193
+ #undef HTTP_ERRNO_GEN
194
+
195
+
196
+ /* Get an http_errno value from an http_parser */
197
+ #define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno)
198
+
199
+ /* Get the line number that generated the current error */
200
+ #if HTTP_PARSER_DEBUG
201
+ #define HTTP_PARSER_ERRNO_LINE(p) ((p)->error_lineno)
202
+ #else
203
+ #define HTTP_PARSER_ERRNO_LINE(p) 0
204
+ #endif
205
+
206
+
207
+ struct http_parser {
208
+ /** PRIVATE **/
209
+ unsigned char type : 2; /* enum http_parser_type */
210
+ unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */
211
+ unsigned char state; /* enum state from http_parser.c */
212
+ unsigned char header_state; /* enum header_state from http_parser.c */
213
+ unsigned char index; /* index into current matcher */
214
+
215
+ uint32_t nread; /* # bytes read in various scenarios */
216
+ uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
217
+
218
+ /** READ-ONLY **/
219
+ unsigned short http_major;
220
+ unsigned short http_minor;
221
+ unsigned short status_code; /* responses only */
222
+ unsigned char method; /* requests only */
223
+ unsigned char http_errno : 7;
224
+
225
+ /* 1 = Upgrade header was present and the parser has exited because of that.
226
+ * 0 = No upgrade header present.
227
+ * Should be checked when http_parser_execute() returns in addition to
228
+ * error checking.
229
+ */
230
+ unsigned char upgrade : 1;
231
+
232
+ #if HTTP_PARSER_DEBUG
233
+ uint32_t error_lineno;
234
+ #endif
235
+
236
+ /** PUBLIC **/
237
+ void *data; /* A pointer to get hook to the "connection" or "socket" object */
238
+ };
239
+
240
+
241
+ struct http_parser_settings {
242
+ http_cb on_message_begin;
243
+ http_data_cb on_url;
244
+ http_data_cb on_header_field;
245
+ http_data_cb on_header_value;
246
+ http_cb on_headers_complete;
247
+ http_data_cb on_body;
248
+ http_cb on_message_complete;
249
+ };
250
+
251
+
252
+ enum http_parser_url_fields
253
+ { UF_SCHEMA = 0
254
+ , UF_HOST = 1
255
+ , UF_PORT = 2
256
+ , UF_PATH = 3
257
+ , UF_QUERY = 4
258
+ , UF_FRAGMENT = 5
259
+ , UF_MAX = 6
260
+ };
261
+
262
+
263
+ /* Result structure for http_parser_parse_url().
264
+ *
265
+ * Callers should index into field_data[] with UF_* values iff field_set
266
+ * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
267
+ * because we probably have padding left over), we convert any port to
268
+ * a uint16_t.
269
+ */
270
+ struct http_parser_url {
271
+ uint16_t field_set; /* Bitmask of (1 << UF_*) values */
272
+ uint16_t port; /* Converted UF_PORT string */
273
+
274
+ struct {
275
+ uint16_t off; /* Offset into buffer in which field starts */
276
+ uint16_t len; /* Length of run in buffer */
277
+ } field_data[UF_MAX];
278
+ };
279
+
280
+
281
+ void http_parser_init(http_parser *parser, enum http_parser_type type);
282
+
283
+
284
+ size_t http_parser_execute(http_parser *parser,
285
+ const http_parser_settings *settings,
286
+ const char *data,
287
+ size_t len);
288
+
289
+
290
+ /* If http_should_keep_alive() in the on_headers_complete or
291
+ * on_message_complete callback returns true, then this will be should be
292
+ * the last message on the connection.
293
+ * If you are the server, respond with the "Connection: close" header.
294
+ * If you are the client, close the connection.
295
+ */
296
+ int http_should_keep_alive(http_parser *parser);
297
+
298
+ /* Returns a string version of the HTTP method. */
299
+ const char *http_method_str(enum http_method m);
300
+
301
+ /* Return a string name of the given error */
302
+ const char *http_errno_name(enum http_errno err);
303
+
304
+ /* Return a string description of the given error */
305
+ const char *http_errno_description(enum http_errno err);
306
+
307
+ /* Parse a URL; return nonzero on failure */
308
+ int http_parser_parse_url(const char *buf, size_t buflen,
309
+ int is_connect,
310
+ struct http_parser_url *u);
311
+
312
+ /* Pause or un-pause the parser; a nonzero value pauses */
313
+ void http_parser_pause(http_parser *parser, int paused);
314
+
315
+ #ifdef __cplusplus
316
+ }
317
+ #endif
318
+ #endif
@@ -0,0 +1,131 @@
1
+ // vim:ts=4:sts=4:sw=4:expandtab
2
+ // (c) Bharanee Rathna 2012
3
+
4
+ #include <ruby/ruby.h>
5
+ #include "http_parser.h"
6
+
7
+ static VALUE mHTTP, cParser, eParserError;
8
+
9
+ static void rb_parser_free(http_parser *parser) {
10
+ if (parser)
11
+ free(parser);
12
+ }
13
+
14
+ VALUE rb_parser_allocate(VALUE klass) {
15
+ http_parser *parser = (http_parser *)malloc(sizeof(http_parser));
16
+ http_parser_init(parser, HTTP_BOTH);
17
+ return (VALUE)(parser->data = (void*)Data_Wrap_Struct(klass, 0, rb_parser_free, parser));
18
+ }
19
+
20
+ http_parser* rb_http_parser_handle(VALUE self) {
21
+ http_parser *parser = 0;
22
+ Data_Get_Struct(self, http_parser, parser);
23
+ if (!parser)
24
+ rb_raise(rb_eArgError, "Invalid HTTP::Parser instance");
25
+ return parser;
26
+ }
27
+
28
+ VALUE rb_parser_callback_for(VALUE self, VALUE name) {
29
+ return rb_hash_aref(rb_iv_get(self, "@callbacks"), name);
30
+ }
31
+
32
+ void rb_parser_callback_call(VALUE self, const char *name, char *data, size_t length) {
33
+ VALUE func = rb_parser_callback_for(self, ID2SYM(rb_intern(name)));
34
+ if (!NIL_P(func)) {
35
+ VALUE args = rb_ary_new();
36
+ if (data)
37
+ rb_ary_push(args, rb_str_new(data, length));
38
+ rb_proc_call(func, args);
39
+ }
40
+ }
41
+
42
+ int rb_parser_on_url(http_parser *parser, char *data, size_t length) {
43
+ VALUE self = (VALUE)parser->data;
44
+ rb_parser_callback_call(self, "on_url", data, length);
45
+ return 0;
46
+ }
47
+
48
+ int rb_parser_on_header_field(http_parser *parser, char *data, size_t length) {
49
+ VALUE self = (VALUE)parser->data;
50
+ rb_parser_callback_call(self, "on_header_field", data, length);
51
+ return 0;
52
+ }
53
+
54
+ int rb_parser_on_header_value(http_parser *parser, char *data, size_t length) {
55
+ VALUE self = (VALUE)parser->data;
56
+ rb_parser_callback_call(self, "on_header_value", data, length);
57
+ return 0;
58
+ }
59
+
60
+ int rb_parser_on_body(http_parser *parser, char *data, size_t length) {
61
+ VALUE self = (VALUE)parser->data;
62
+ rb_parser_callback_call(self, "on_body", data, length);
63
+ return 0;
64
+ }
65
+
66
+ int rb_parser_on_message_begin(http_parser *parser) {
67
+ VALUE self = (VALUE)parser->data;
68
+ rb_parser_callback_call(self, "on_message_begin", 0, 0);
69
+ return 0;
70
+ }
71
+
72
+ int rb_parser_on_message_complete(http_parser *parser) {
73
+ VALUE self = (VALUE)parser->data;
74
+ rb_parser_callback_call(self, "on_message_complete", 0, 0);
75
+ return 0;
76
+ }
77
+
78
+ VALUE rb_parser_parse(VALUE self, VALUE data) {
79
+ http_parser *parser = rb_http_parser_handle(self);
80
+ http_parser_settings settings = {
81
+ .on_url = (http_data_cb)rb_parser_on_url,
82
+ .on_header_field = (http_data_cb)rb_parser_on_header_field,
83
+ .on_header_value = (http_data_cb)rb_parser_on_header_value,
84
+ .on_body = (http_data_cb)rb_parser_on_body,
85
+ .on_message_begin = (http_cb)rb_parser_on_message_begin,
86
+ .on_message_complete = (http_cb)rb_parser_on_message_complete
87
+ };
88
+
89
+ size_t parsed = http_parser_execute(parser, &settings, RSTRING_PTR(data), RSTRING_LEN(data));
90
+ if (parsed != (size_t)RSTRING_LEN(data))
91
+ rb_raise(eParserError, "Error Parsing data: %s", http_errno_description(HTTP_PARSER_ERRNO(parser)));
92
+ return Qtrue;
93
+ }
94
+
95
+ VALUE rb_parser_reset(VALUE self) {
96
+ http_parser *parser = rb_http_parser_handle(self);
97
+ http_parser_init(parser, HTTP_BOTH);
98
+ return Qtrue;
99
+ }
100
+
101
+ VALUE rb_parser_http_method(VALUE self) {
102
+ http_parser *parser = rb_http_parser_handle(self);
103
+ return rb_str_new2(http_method_str(parser->method));
104
+ }
105
+
106
+ VALUE rb_parser_http_version(VALUE self) {
107
+ char version[16];
108
+ http_parser *parser = rb_http_parser_handle(self);
109
+ snprintf(version, 16, "%d.%d", parser->http_major, parser->http_minor);
110
+ return rb_str_new2(version);
111
+ }
112
+
113
+ VALUE rb_parser_http_status(VALUE self) {
114
+ http_parser *parser = rb_http_parser_handle(self);
115
+ return INT2NUM(parser->status_code);
116
+ }
117
+
118
+ Init_http_parser() {
119
+ mHTTP = rb_define_module("HTTP");
120
+ cParser = rb_define_class_under(mHTTP, "Parser", rb_cObject);
121
+ eParserError = rb_define_class_under(mHTTP, "ParserError", rb_eStandardError);
122
+
123
+ rb_define_alloc_func(cParser, rb_parser_allocate);
124
+
125
+ rb_define_method(cParser, "<<", rb_parser_parse, 1);
126
+ rb_define_method(cParser, "parse", rb_parser_parse, 1);
127
+ rb_define_method(cParser, "reset", rb_parser_reset, 0);
128
+ rb_define_method(cParser, "http_method", rb_parser_http_method, 0);
129
+ rb_define_method(cParser, "http_version", rb_parser_http_version, 0);
130
+ rb_define_method(cParser, "http_status", rb_parser_http_status, 0);
131
+ }
@@ -0,0 +1,18 @@
1
+ require 'http-parser/http_parser'
2
+
3
+ module HTTP
4
+ class Parser
5
+ CALLBACKS = %w(on_url on_header_field on_header_value on_body on_message_begin on_message_complete)
6
+
7
+ CALLBACKS.each do |name|
8
+ define_method(name) do |&block|
9
+ raise ArgumentError, "block expected" unless block
10
+ @callbacks[name.to_sym] = block
11
+ end
12
+ end
13
+
14
+ def initialize
15
+ @callbacks = {}
16
+ end
17
+ end
18
+ end