http_parser.rb 0.6.0.beta.1 → 0.6.0.beta.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,4 +3,4 @@
3
3
  url = git://github.com/joyent/http-parser.git
4
4
  [submodule "http-parser-java"]
5
5
  path = ext/ruby_http_parser/vendor/http-parser-java
6
- url = git://github.com/http-parser/http-parser.java
6
+ url = git://github.com/tmm1/http-parser.java
@@ -1,17 +1,17 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- http_parser.rb (0.5.3)
4
+ http_parser.rb (0.6.0.beta.2)
5
5
 
6
6
  GEM
7
- remote: http://rubygems.org/
7
+ remote: https://rubygems.org/
8
8
  specs:
9
9
  benchmark_suite (0.8.0)
10
10
  diff-lcs (1.1.2)
11
11
  ffi (1.0.11)
12
12
  ffi (1.0.11-java)
13
- json (1.5.1)
14
- json (1.5.1-java)
13
+ json (1.8.0)
14
+ json (1.8.0-java)
15
15
  rake (0.9.2)
16
16
  rake-compiler (0.7.9)
17
17
  rake
@@ -1,32 +1,31 @@
1
1
  package org.ruby_http_parser;
2
2
 
3
+ import http_parser.HTTPException;
4
+ import http_parser.HTTPMethod;
5
+ import http_parser.HTTPParser;
6
+ import http_parser.lolevel.HTTPCallback;
7
+ import http_parser.lolevel.HTTPDataCallback;
8
+ import http_parser.lolevel.ParserSettings;
9
+
10
+ import java.nio.ByteBuffer;
11
+
12
+ import org.jcodings.Encoding;
13
+ import org.jcodings.specific.UTF8Encoding;
3
14
  import org.jruby.Ruby;
4
15
  import org.jruby.RubyArray;
5
16
  import org.jruby.RubyClass;
6
17
  import org.jruby.RubyHash;
7
- import org.jruby.RubyModule;
8
18
  import org.jruby.RubyNumeric;
9
19
  import org.jruby.RubyObject;
10
20
  import org.jruby.RubyString;
11
21
  import org.jruby.RubySymbol;
12
-
22
+ import org.jruby.anno.JRubyMethod;
23
+ import org.jruby.exceptions.RaiseException;
13
24
  import org.jruby.runtime.ObjectAllocator;
14
25
  import org.jruby.runtime.ThreadContext;
15
26
  import org.jruby.runtime.builtin.IRubyObject;
16
-
17
- import org.jruby.anno.JRubyMethod;
18
- import org.jruby.exceptions.RaiseException;
19
-
20
27
  import org.jruby.util.ByteList;
21
28
 
22
- import org.jcodings.specific.UTF8Encoding;
23
-
24
- import java.nio.ByteBuffer;
25
- import http_parser.*;
26
- import http_parser.lolevel.ParserSettings;
27
- import http_parser.lolevel.HTTPCallback;
28
- import http_parser.lolevel.HTTPDataCallback;
29
-
30
29
  public class RubyHttpParser extends RubyObject {
31
30
 
32
31
  @JRubyMethod(name = "strict?", module = true)
@@ -40,7 +39,7 @@ public class RubyHttpParser extends RubyObject {
40
39
  }
41
40
  };
42
41
 
43
- byte[] fetchBytes (ByteBuffer b, int pos, int len) {
42
+ byte[] fetchBytes(ByteBuffer b, int pos, int len) {
44
43
  byte[] by = new byte[len];
45
44
  int saved = b.position();
46
45
  b.position(pos);
@@ -80,11 +79,13 @@ public class RubyHttpParser extends RubyObject {
80
79
  private byte[] _current_header;
81
80
  private byte[] _last_header;
82
81
 
82
+ private static final Encoding UTF8 = UTF8Encoding.INSTANCE;
83
+
83
84
  public RubyHttpParser(final Ruby runtime, RubyClass clazz) {
84
- super(runtime,clazz);
85
+ super(runtime, clazz);
85
86
 
86
87
  this.runtime = runtime;
87
- this.eParserError = (RubyClass)runtime.getModule("HTTP").getClass("Parser").getConstant("Error");
88
+ this.eParserError = (RubyClass) runtime.getModule("HTTP").getClass("Parser").getConstant("Error");
88
89
 
89
90
  this.on_message_begin = null;
90
91
  this.on_headers_complete = null;
@@ -95,7 +96,8 @@ public class RubyHttpParser extends RubyObject {
95
96
 
96
97
  this.completed = false;
97
98
 
98
- this.header_value_type = runtime.getModule("HTTP").getClass("Parser").getInstanceVariable("@default_header_value_type");
99
+ this.header_value_type = runtime.getModule("HTTP").getClass("Parser")
100
+ .getInstanceVariable("@default_header_value_type");
99
101
 
100
102
  initSettings();
101
103
  init();
@@ -105,15 +107,19 @@ public class RubyHttpParser extends RubyObject {
105
107
  this.settings = new ParserSettings();
106
108
 
107
109
  this.settings.on_url = new HTTPDataCallback() {
108
- public int cb (http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
110
+ public int cb(http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
109
111
  byte[] data = fetchBytes(buf, pos, len);
110
- ((RubyString)requestUrl).cat(data);
112
+ if (runtime.is1_9() || runtime.is2_0()) {
113
+ ((RubyString) requestUrl).cat(data, 0, data.length, UTF8);
114
+ } else {
115
+ ((RubyString) requestUrl).cat(data);
116
+ }
111
117
  return 0;
112
118
  }
113
119
  };
114
120
 
115
121
  this.settings.on_header_field = new HTTPDataCallback() {
116
- public int cb (http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
122
+ public int cb(http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
117
123
  byte[] data = fetchBytes(buf, pos, len);
118
124
 
119
125
  if (_current_header == null)
@@ -123,7 +129,7 @@ public class RubyHttpParser extends RubyObject {
123
129
  System.arraycopy(_current_header, 0, tmp, 0, _current_header.length);
124
130
  System.arraycopy(data, 0, tmp, _current_header.length, data.length);
125
131
  _current_header = tmp;
126
- }
132
+ }
127
133
 
128
134
  return 0;
129
135
  }
@@ -133,7 +139,7 @@ public class RubyHttpParser extends RubyObject {
133
139
  final RubySymbol stopSym = runtime.newSymbol("stop");
134
140
  final RubySymbol resetSym = runtime.newSymbol("reset");
135
141
  this.settings.on_header_value = new HTTPDataCallback() {
136
- public int cb (http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
142
+ public int cb(http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
137
143
  byte[] data = fetchBytes(buf, pos, len);
138
144
  ThreadContext context = headers.getRuntime().getCurrentContext();
139
145
  IRubyObject key, val;
@@ -145,57 +151,74 @@ public class RubyHttpParser extends RubyObject {
145
151
  _current_header = null;
146
152
  }
147
153
 
148
- key = RubyString.newString(runtime, new ByteList(_last_header, UTF8Encoding.INSTANCE, false));
154
+ key = RubyString.newString(runtime, new ByteList(_last_header, UTF8, false));
149
155
  val = headers.op_aref(context, key);
150
156
 
151
157
  if (new_field == 1) {
152
158
  if (val.isNil()) {
153
159
  if (header_value_type == arraysSym) {
154
- headers.op_aset(context, key, RubyArray.newArrayLight(runtime, RubyString.newStringLight(runtime, 10)));
160
+ headers.op_aset(context, key,
161
+ RubyArray.newArrayLight(runtime, RubyString.newStringLight(runtime, 10, UTF8)));
155
162
  } else {
156
- headers.op_aset(context, key, RubyString.newStringLight(runtime, 10));
163
+ headers.op_aset(context, key, RubyString.newStringLight(runtime, 10, UTF8));
157
164
  }
158
165
  } else {
159
166
  if (header_value_type == mixedSym) {
160
167
  if (val instanceof RubyString) {
161
- headers.op_aset(context, key, RubyArray.newArrayLight(runtime, val, RubyString.newStringLight(runtime, 10)));
168
+ headers.op_aset(context, key,
169
+ RubyArray.newArrayLight(runtime, val, RubyString.newStringLight(runtime, 10, UTF8)));
162
170
  } else {
163
- ((RubyArray)val).add(RubyString.newStringLight(runtime, 10));
171
+ ((RubyArray) val).add(RubyString.newStringLight(runtime, 10, UTF8));
164
172
  }
165
173
  } else if (header_value_type == arraysSym) {
166
- ((RubyArray)val).add(RubyString.newStringLight(runtime, 10));
174
+ ((RubyArray) val).add(RubyString.newStringLight(runtime, 10, UTF8));
167
175
  } else {
168
- ((RubyString)val).cat(',').cat(' ');
176
+ if (runtime.is1_9() || runtime.is2_0()) {
177
+ ((RubyString) val).cat(',', UTF8).cat(' ', UTF8);
178
+ } else {
179
+ ((RubyString) val).cat(',').cat(' ');
180
+ }
169
181
  }
170
182
  }
171
183
  val = headers.op_aref(context, key);
172
184
  }
173
185
 
174
186
  if (val instanceof RubyArray) {
175
- val = ((RubyArray)val).entry(-1);
187
+ val = ((RubyArray) val).entry(-1);
176
188
  }
177
189
 
178
- ((RubyString)val).cat(data);
190
+ if (runtime.is1_9() || runtime.is2_0()) {
191
+ ((RubyString) val).cat(data, 0, data.length, UTF8);
192
+ } else {
193
+ ((RubyString) val).cat(data);
194
+ }
179
195
 
180
196
  return 0;
181
197
  }
182
198
  };
183
199
 
184
200
  this.settings.on_message_begin = new HTTPCallback() {
185
- public int cb (http_parser.lolevel.HTTPParser p) {
201
+ public int cb(http_parser.lolevel.HTTPParser p) {
186
202
  headers = new RubyHash(runtime);
187
203
 
188
- requestUrl = RubyString.newEmptyString(runtime);
189
- requestPath = RubyString.newEmptyString(runtime);
190
- queryString = RubyString.newEmptyString(runtime);
191
- fragment = RubyString.newEmptyString(runtime);
192
-
193
- upgradeData = RubyString.newEmptyString(runtime);
204
+ if (runtime.is1_9() || runtime.is2_0()) {
205
+ requestUrl = RubyString.newEmptyString(runtime, UTF8);
206
+ requestPath = RubyString.newEmptyString(runtime, UTF8);
207
+ queryString = RubyString.newEmptyString(runtime, UTF8);
208
+ fragment = RubyString.newEmptyString(runtime, UTF8);
209
+ upgradeData = RubyString.newEmptyString(runtime, UTF8);
210
+ } else {
211
+ requestUrl = RubyString.newEmptyString(runtime);
212
+ requestPath = RubyString.newEmptyString(runtime);
213
+ queryString = RubyString.newEmptyString(runtime);
214
+ fragment = RubyString.newEmptyString(runtime);
215
+ upgradeData = RubyString.newEmptyString(runtime);
216
+ }
194
217
 
195
218
  IRubyObject ret = runtime.getNil();
196
219
 
197
220
  if (callback_object != null) {
198
- if (((RubyObject)callback_object).respondsTo("on_message_begin")) {
221
+ if (((RubyObject) callback_object).respondsTo("on_message_begin")) {
199
222
  ThreadContext context = callback_object.getRuntime().getCurrentContext();
200
223
  ret = callback_object.callMethod(context, "on_message_begin");
201
224
  }
@@ -212,13 +235,13 @@ public class RubyHttpParser extends RubyObject {
212
235
  }
213
236
  };
214
237
  this.settings.on_message_complete = new HTTPCallback() {
215
- public int cb (http_parser.lolevel.HTTPParser p) {
238
+ public int cb(http_parser.lolevel.HTTPParser p) {
216
239
  IRubyObject ret = runtime.getNil();
217
240
 
218
241
  completed = true;
219
242
 
220
243
  if (callback_object != null) {
221
- if (((RubyObject)callback_object).respondsTo("on_message_complete")) {
244
+ if (((RubyObject) callback_object).respondsTo("on_message_complete")) {
222
245
  ThreadContext context = callback_object.getRuntime().getCurrentContext();
223
246
  ret = callback_object.callMethod(context, "on_message_complete");
224
247
  }
@@ -235,11 +258,11 @@ public class RubyHttpParser extends RubyObject {
235
258
  }
236
259
  };
237
260
  this.settings.on_headers_complete = new HTTPCallback() {
238
- public int cb (http_parser.lolevel.HTTPParser p) {
261
+ public int cb(http_parser.lolevel.HTTPParser p) {
239
262
  IRubyObject ret = runtime.getNil();
240
263
 
241
264
  if (callback_object != null) {
242
- if (((RubyObject)callback_object).respondsTo("on_headers_complete")) {
265
+ if (((RubyObject) callback_object).respondsTo("on_headers_complete")) {
243
266
  ThreadContext context = callback_object.getRuntime().getCurrentContext();
244
267
  ret = callback_object.callMethod(context, "on_headers_complete", headers);
245
268
  }
@@ -258,18 +281,19 @@ public class RubyHttpParser extends RubyObject {
258
281
  }
259
282
  };
260
283
  this.settings.on_body = new HTTPDataCallback() {
261
- public int cb (http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
284
+ public int cb(http_parser.lolevel.HTTPParser p, ByteBuffer buf, int pos, int len) {
262
285
  IRubyObject ret = runtime.getNil();
263
286
  byte[] data = fetchBytes(buf, pos, len);
264
287
 
265
288
  if (callback_object != null) {
266
- if (((RubyObject)callback_object).respondsTo("on_body")) {
289
+ if (((RubyObject) callback_object).respondsTo("on_body")) {
267
290
  ThreadContext context = callback_object.getRuntime().getCurrentContext();
268
- ret = callback_object.callMethod(context, "on_body", RubyString.newString(runtime, new ByteList(data, UTF8Encoding.INSTANCE, false)));
291
+ ret = callback_object.callMethod(context, "on_body",
292
+ RubyString.newString(runtime, new ByteList(data, UTF8, false)));
269
293
  }
270
294
  } else if (on_body != null) {
271
295
  ThreadContext context = on_body.getRuntime().getCurrentContext();
272
- ret = on_body.callMethod(context, "call", RubyString.newString(runtime, new ByteList(data, UTF8Encoding.INSTANCE, false)));
296
+ ret = on_body.callMethod(context, "call", RubyString.newString(runtime, new ByteList(data, UTF8, false)));
273
297
  }
274
298
 
275
299
  if (ret == stopSym) {
@@ -337,7 +361,7 @@ public class RubyHttpParser extends RubyObject {
337
361
 
338
362
  @JRubyMethod(name = "<<")
339
363
  public IRubyObject execute(IRubyObject data) {
340
- RubyString str = (RubyString)data;
364
+ RubyString str = (RubyString) data;
341
365
  ByteList byteList = str.getByteList();
342
366
  ByteBuffer buf = ByteBuffer.wrap(byteList.getUnsafeBytes(), byteList.getBegin(), byteList.getRealSize());
343
367
  boolean stopped = false;
@@ -352,8 +376,11 @@ public class RubyHttpParser extends RubyObject {
352
376
 
353
377
  if (parser.getUpgrade()) {
354
378
  byte[] upData = fetchBytes(buf, buf.position(), buf.limit() - buf.position());
355
- ((RubyString)upgradeData).cat(upData);
356
-
379
+ if (runtime.is1_9() || runtime.is2_0()) {
380
+ ((RubyString) upgradeData).cat(upData, 0, upData.length, UTF8);
381
+ } else {
382
+ ((RubyString) upgradeData).cat(upData);
383
+ }
357
384
  } else if (buf.hasRemaining() && !completed) {
358
385
  if (!stopped)
359
386
  throw new RaiseException(runtime, eParserError, "Could not parse data entirely", true);
@@ -37,19 +37,24 @@
37
37
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
38
  #endif
39
39
 
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
40
53
 
41
- #if HTTP_PARSER_DEBUG
42
- #define SET_ERRNO(e) \
43
- do { \
44
- parser->http_errno = (e); \
45
- parser->error_lineno = __LINE__; \
46
- } while (0)
47
- #else
48
54
  #define SET_ERRNO(e) \
49
55
  do { \
50
56
  parser->http_errno = (e); \
51
57
  } while(0)
52
- #endif
53
58
 
54
59
 
55
60
  /* Run the notify callback FOR, returning ER if it fails */
@@ -94,7 +99,7 @@ do { \
94
99
  FOR##_mark = NULL; \
95
100
  } \
96
101
  } while (0)
97
-
102
+
98
103
  /* Run the data callback FOR and consume the current byte */
99
104
  #define CALLBACK_DATA(FOR) \
100
105
  CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
@@ -123,31 +128,10 @@ do { \
123
128
 
124
129
 
125
130
  static const char *method_strings[] =
126
- { "DELETE"
127
- , "GET"
128
- , "HEAD"
129
- , "POST"
130
- , "PUT"
131
- , "CONNECT"
132
- , "OPTIONS"
133
- , "TRACE"
134
- , "COPY"
135
- , "LOCK"
136
- , "MKCOL"
137
- , "MOVE"
138
- , "PROPFIND"
139
- , "PROPPATCH"
140
- , "UNLOCK"
141
- , "REPORT"
142
- , "MKACTIVITY"
143
- , "CHECKOUT"
144
- , "MERGE"
145
- , "M-SEARCH"
146
- , "NOTIFY"
147
- , "SUBSCRIBE"
148
- , "UNSUBSCRIBE"
149
- , "PATCH"
150
- , "PURGE"
131
+ {
132
+ #define XX(num, name, string) #string,
133
+ HTTP_METHOD_MAP(XX)
134
+ #undef XX
151
135
  };
152
136
 
153
137
 
@@ -205,40 +189,48 @@ static const int8_t unhex[256] =
205
189
  };
206
190
 
207
191
 
208
- static const uint8_t normal_url_char[256] = {
192
+ #if HTTP_PARSER_STRICT
193
+ # define T(v) 0
194
+ #else
195
+ # define T(v) v
196
+ #endif
197
+
198
+
199
+ static const uint8_t normal_url_char[32] = {
209
200
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
210
- 0, 0, 0, 0, 0, 0, 0, 0,
201
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
211
202
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
212
- 0, 0, 0, 0, 0, 0, 0, 0,
203
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
213
204
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
214
- 0, 0, 0, 0, 0, 0, 0, 0,
205
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
215
206
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
216
- 0, 0, 0, 0, 0, 0, 0, 0,
207
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
217
208
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
218
- 0, 1, 1, 0, 1, 1, 1, 1,
209
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
219
210
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
220
- 1, 1, 1, 1, 1, 1, 1, 1,
211
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
221
212
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
222
- 1, 1, 1, 1, 1, 1, 1, 1,
213
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
223
214
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
224
- 1, 1, 1, 1, 1, 1, 1, 0,
215
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
225
216
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
226
- 1, 1, 1, 1, 1, 1, 1, 1,
217
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
227
218
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
228
- 1, 1, 1, 1, 1, 1, 1, 1,
219
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
229
220
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
230
- 1, 1, 1, 1, 1, 1, 1, 1,
221
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
231
222
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
232
- 1, 1, 1, 1, 1, 1, 1, 1,
223
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
233
224
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
234
- 1, 1, 1, 1, 1, 1, 1, 1,
225
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
235
226
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
236
- 1, 1, 1, 1, 1, 1, 1, 1,
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
237
228
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
238
- 1, 1, 1, 1, 1, 1, 1, 1,
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
239
230
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
240
- 1, 1, 1, 1, 1, 1, 1, 0, };
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
241
232
 
233
+ #undef T
242
234
 
243
235
  enum state
244
236
  { s_dead = 1 /* important that this is > 0 */
@@ -266,13 +258,9 @@ enum state
266
258
  , s_req_schema
267
259
  , s_req_schema_slash
268
260
  , s_req_schema_slash_slash
269
- , s_req_host_start
270
- , s_req_host_v6_start
271
- , s_req_host_v6
272
- , s_req_host_v6_end
273
- , s_req_host
274
- , s_req_port_start
275
- , s_req_port
261
+ , s_req_server_start
262
+ , s_req_server
263
+ , s_req_server_with_at
276
264
  , s_req_path
277
265
  , s_req_query_string_start
278
266
  , s_req_query_string
@@ -350,6 +338,19 @@ enum header_states
350
338
  , h_connection_close
351
339
  };
352
340
 
341
+ enum http_host_state
342
+ {
343
+ s_http_host_dead = 1
344
+ , s_http_userinfo_start
345
+ , s_http_userinfo
346
+ , s_http_host_start
347
+ , s_http_host_v6_start
348
+ , s_http_host
349
+ , s_http_host_v6
350
+ , s_http_host_v6_end
351
+ , s_http_host_port_start
352
+ , s_http_host_port
353
+ };
353
354
 
354
355
  /* Macros for character classes; depends on strict-mode */
355
356
  #define CR '\r'
@@ -359,15 +360,21 @@ enum header_states
359
360
  #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
360
361
  #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
361
362
  #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
363
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
364
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
365
+ (c) == ')')
366
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
367
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368
+ (c) == '$' || (c) == ',')
362
369
 
363
370
  #if HTTP_PARSER_STRICT
364
371
  #define TOKEN(c) (tokens[(unsigned char)c])
365
- #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
372
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
366
373
  #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
367
374
  #else
368
375
  #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
369
376
  #define IS_URL_CHAR(c) \
370
- (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
377
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
371
378
  #define IS_HOST_CHAR(c) \
372
379
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
373
380
  #endif
@@ -401,7 +408,7 @@ static struct {
401
408
  };
402
409
  #undef HTTP_STRERROR_GEN
403
410
 
404
- int http_message_needs_eof(http_parser *parser);
411
+ int http_message_needs_eof(const http_parser *parser);
405
412
 
406
413
  /* Our URL parser.
407
414
  *
@@ -417,7 +424,15 @@ int http_message_needs_eof(http_parser *parser);
417
424
  static enum state
418
425
  parse_url_char(enum state s, const char ch)
419
426
  {
420
- assert(!isspace(ch));
427
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
428
+ return s_dead;
429
+ }
430
+
431
+ #if HTTP_PARSER_STRICT
432
+ if (ch == '\t' || ch == '\f') {
433
+ return s_dead;
434
+ }
435
+ #endif
421
436
 
422
437
  switch (s) {
423
438
  case s_req_spaces_before_url:
@@ -455,67 +470,33 @@ parse_url_char(enum state s, const char ch)
455
470
 
456
471
  case s_req_schema_slash_slash:
457
472
  if (ch == '/') {
458
- return s_req_host_start;
459
- }
460
-
461
- break;
462
-
463
- case s_req_host_start:
464
- if (ch == '[') {
465
- return s_req_host_v6_start;
466
- }
467
-
468
- if (IS_HOST_CHAR(ch)) {
469
- return s_req_host;
473
+ return s_req_server_start;
470
474
  }
471
475
 
472
476
  break;
473
477
 
474
- case s_req_host:
475
- if (IS_HOST_CHAR(ch)) {
476
- return s_req_host;
477
- }
478
-
479
- /* FALLTHROUGH */
480
- case s_req_host_v6_end:
481
- switch (ch) {
482
- case ':':
483
- return s_req_port_start;
484
-
485
- case '/':
486
- return s_req_path;
487
-
488
- case '?':
489
- return s_req_query_string_start;
478
+ case s_req_server_with_at:
479
+ if (ch == '@') {
480
+ return s_dead;
490
481
  }
491
482
 
492
- break;
493
-
494
- case s_req_host_v6:
495
- if (ch == ']') {
496
- return s_req_host_v6_end;
483
+ /* FALLTHROUGH */
484
+ case s_req_server_start:
485
+ case s_req_server:
486
+ if (ch == '/') {
487
+ return s_req_path;
497
488
  }
498
489
 
499
- /* FALLTHROUGH */
500
- case s_req_host_v6_start:
501
- if (IS_HEX(ch) || ch == ':') {
502
- return s_req_host_v6;
490
+ if (ch == '?') {
491
+ return s_req_query_string_start;
503
492
  }
504
- break;
505
493
 
506
- case s_req_port:
507
- switch (ch) {
508
- case '/':
509
- return s_req_path;
510
-
511
- case '?':
512
- return s_req_query_string_start;
494
+ if (ch == '@') {
495
+ return s_req_server_with_at;
513
496
  }
514
497
 
515
- /* FALLTHROUGH */
516
- case s_req_port_start:
517
- if (IS_NUM(ch)) {
518
- return s_req_port;
498
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
499
+ return s_req_server;
519
500
  }
520
501
 
521
502
  break;
@@ -637,13 +618,9 @@ size_t http_parser_execute (http_parser *parser,
637
618
  case s_req_schema:
638
619
  case s_req_schema_slash:
639
620
  case s_req_schema_slash_slash:
640
- case s_req_host_start:
641
- case s_req_host_v6_start:
642
- case s_req_host_v6:
643
- case s_req_host_v6_end:
644
- case s_req_host:
645
- case s_req_port_start:
646
- case s_req_port:
621
+ case s_req_server_start:
622
+ case s_req_server:
623
+ case s_req_server_with_at:
647
624
  case s_req_query_string_start:
648
625
  case s_req_query_string:
649
626
  case s_req_fragment_start:
@@ -889,6 +866,7 @@ size_t http_parser_execute (http_parser *parser,
889
866
  case s_res_line_almost_done:
890
867
  STRICT_CHECK(ch != LF);
891
868
  parser->state = s_header_field_start;
869
+ CALLBACK_NOTIFY(status_complete);
892
870
  break;
893
871
 
894
872
  case s_start_req:
@@ -918,7 +896,7 @@ size_t http_parser_execute (http_parser *parser,
918
896
  /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
919
897
  break;
920
898
  case 'R': parser->method = HTTP_REPORT; break;
921
- case 'S': parser->method = HTTP_SUBSCRIBE; break;
899
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
922
900
  case 'T': parser->method = HTTP_TRACE; break;
923
901
  case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
924
902
  default:
@@ -965,6 +943,12 @@ size_t http_parser_execute (http_parser *parser,
965
943
  } else {
966
944
  goto error;
967
945
  }
946
+ } else if (parser->method == HTTP_SUBSCRIBE) {
947
+ if (parser->index == 1 && ch == 'E') {
948
+ parser->method = HTTP_SEARCH;
949
+ } else {
950
+ goto error;
951
+ }
968
952
  } else if (parser->index == 1 && parser->method == HTTP_POST) {
969
953
  if (ch == 'R') {
970
954
  parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
@@ -998,7 +982,7 @@ size_t http_parser_execute (http_parser *parser,
998
982
 
999
983
  MARK(url);
1000
984
  if (parser->method == HTTP_CONNECT) {
1001
- parser->state = s_req_host_start;
985
+ parser->state = s_req_server_start;
1002
986
  }
1003
987
 
1004
988
  parser->state = parse_url_char((enum state)parser->state, ch);
@@ -1013,10 +997,7 @@ size_t http_parser_execute (http_parser *parser,
1013
997
  case s_req_schema:
1014
998
  case s_req_schema_slash:
1015
999
  case s_req_schema_slash_slash:
1016
- case s_req_host_start:
1017
- case s_req_host_v6_start:
1018
- case s_req_host_v6:
1019
- case s_req_port_start:
1000
+ case s_req_server_start:
1020
1001
  {
1021
1002
  switch (ch) {
1022
1003
  /* No whitespace allowed here */
@@ -1036,9 +1017,8 @@ size_t http_parser_execute (http_parser *parser,
1036
1017
  break;
1037
1018
  }
1038
1019
 
1039
- case s_req_host:
1040
- case s_req_host_v6_end:
1041
- case s_req_port:
1020
+ case s_req_server:
1021
+ case s_req_server_with_at:
1042
1022
  case s_req_path:
1043
1023
  case s_req_query_string_start:
1044
1024
  case s_req_query_string:
@@ -1867,7 +1847,7 @@ error:
1867
1847
 
1868
1848
  /* Does the parser need to see an EOF to find the end of the message? */
1869
1849
  int
1870
- http_message_needs_eof (http_parser *parser)
1850
+ http_message_needs_eof (const http_parser *parser)
1871
1851
  {
1872
1852
  if (parser->type == HTTP_REQUEST) {
1873
1853
  return 0;
@@ -1890,7 +1870,7 @@ http_message_needs_eof (http_parser *parser)
1890
1870
 
1891
1871
 
1892
1872
  int
1893
- http_should_keep_alive (http_parser *parser)
1873
+ http_should_keep_alive (const http_parser *parser)
1894
1874
  {
1895
1875
  if (parser->http_major > 0 && parser->http_minor > 0) {
1896
1876
  /* HTTP/1.1 */
@@ -1908,9 +1888,10 @@ http_should_keep_alive (http_parser *parser)
1908
1888
  }
1909
1889
 
1910
1890
 
1911
- const char * http_method_str (enum http_method m)
1891
+ const char *
1892
+ http_method_str (enum http_method m)
1912
1893
  {
1913
- return method_strings[m];
1894
+ return ELEM_AT(method_strings, m, "<unknown>");
1914
1895
  }
1915
1896
 
1916
1897
 
@@ -1937,6 +1918,144 @@ http_errno_description(enum http_errno err) {
1937
1918
  return http_strerror_tab[err].description;
1938
1919
  }
1939
1920
 
1921
+ static enum http_host_state
1922
+ http_parse_host_char(enum http_host_state s, const char ch) {
1923
+ switch(s) {
1924
+ case s_http_userinfo:
1925
+ case s_http_userinfo_start:
1926
+ if (ch == '@') {
1927
+ return s_http_host_start;
1928
+ }
1929
+
1930
+ if (IS_USERINFO_CHAR(ch)) {
1931
+ return s_http_userinfo;
1932
+ }
1933
+ break;
1934
+
1935
+ case s_http_host_start:
1936
+ if (ch == '[') {
1937
+ return s_http_host_v6_start;
1938
+ }
1939
+
1940
+ if (IS_HOST_CHAR(ch)) {
1941
+ return s_http_host;
1942
+ }
1943
+
1944
+ break;
1945
+
1946
+ case s_http_host:
1947
+ if (IS_HOST_CHAR(ch)) {
1948
+ return s_http_host;
1949
+ }
1950
+
1951
+ /* FALLTHROUGH */
1952
+ case s_http_host_v6_end:
1953
+ if (ch == ':') {
1954
+ return s_http_host_port_start;
1955
+ }
1956
+
1957
+ break;
1958
+
1959
+ case s_http_host_v6:
1960
+ if (ch == ']') {
1961
+ return s_http_host_v6_end;
1962
+ }
1963
+
1964
+ /* FALLTHROUGH */
1965
+ case s_http_host_v6_start:
1966
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
1967
+ return s_http_host_v6;
1968
+ }
1969
+
1970
+ break;
1971
+
1972
+ case s_http_host_port:
1973
+ case s_http_host_port_start:
1974
+ if (IS_NUM(ch)) {
1975
+ return s_http_host_port;
1976
+ }
1977
+
1978
+ break;
1979
+
1980
+ default:
1981
+ break;
1982
+ }
1983
+ return s_http_host_dead;
1984
+ }
1985
+
1986
+ static int
1987
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1988
+ enum http_host_state s;
1989
+
1990
+ const char *p;
1991
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1992
+
1993
+ u->field_data[UF_HOST].len = 0;
1994
+
1995
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
1996
+
1997
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
1998
+ enum http_host_state new_s = http_parse_host_char(s, *p);
1999
+
2000
+ if (new_s == s_http_host_dead) {
2001
+ return 1;
2002
+ }
2003
+
2004
+ switch(new_s) {
2005
+ case s_http_host:
2006
+ if (s != s_http_host) {
2007
+ u->field_data[UF_HOST].off = p - buf;
2008
+ }
2009
+ u->field_data[UF_HOST].len++;
2010
+ break;
2011
+
2012
+ case s_http_host_v6:
2013
+ if (s != s_http_host_v6) {
2014
+ u->field_data[UF_HOST].off = p - buf;
2015
+ }
2016
+ u->field_data[UF_HOST].len++;
2017
+ break;
2018
+
2019
+ case s_http_host_port:
2020
+ if (s != s_http_host_port) {
2021
+ u->field_data[UF_PORT].off = p - buf;
2022
+ u->field_data[UF_PORT].len = 0;
2023
+ u->field_set |= (1 << UF_PORT);
2024
+ }
2025
+ u->field_data[UF_PORT].len++;
2026
+ break;
2027
+
2028
+ case s_http_userinfo:
2029
+ if (s != s_http_userinfo) {
2030
+ u->field_data[UF_USERINFO].off = p - buf ;
2031
+ u->field_data[UF_USERINFO].len = 0;
2032
+ u->field_set |= (1 << UF_USERINFO);
2033
+ }
2034
+ u->field_data[UF_USERINFO].len++;
2035
+ break;
2036
+
2037
+ default:
2038
+ break;
2039
+ }
2040
+ s = new_s;
2041
+ }
2042
+
2043
+ /* Make sure we don't end somewhere unexpected */
2044
+ switch (s) {
2045
+ case s_http_host_start:
2046
+ case s_http_host_v6_start:
2047
+ case s_http_host_v6:
2048
+ case s_http_host_port_start:
2049
+ case s_http_userinfo:
2050
+ case s_http_userinfo_start:
2051
+ return 1;
2052
+ default:
2053
+ break;
2054
+ }
2055
+
2056
+ return 0;
2057
+ }
2058
+
1940
2059
  int
1941
2060
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1942
2061
  struct http_parser_url *u)
@@ -1944,9 +2063,10 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1944
2063
  enum state s;
1945
2064
  const char *p;
1946
2065
  enum http_parser_url_fields uf, old_uf;
2066
+ int found_at = 0;
1947
2067
 
1948
2068
  u->port = u->field_set = 0;
1949
- s = is_connect ? s_req_host_start : s_req_spaces_before_url;
2069
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
1950
2070
  uf = old_uf = UF_MAX;
1951
2071
 
1952
2072
  for (p = buf; p < buf + buflen; p++) {
@@ -1960,10 +2080,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1960
2080
  /* Skip delimeters */
1961
2081
  case s_req_schema_slash:
1962
2082
  case s_req_schema_slash_slash:
1963
- case s_req_host_start:
1964
- case s_req_host_v6_start:
1965
- case s_req_host_v6_end:
1966
- case s_req_port_start:
2083
+ case s_req_server_start:
1967
2084
  case s_req_query_string_start:
1968
2085
  case s_req_fragment_start:
1969
2086
  continue;
@@ -1972,13 +2089,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1972
2089
  uf = UF_SCHEMA;
1973
2090
  break;
1974
2091
 
1975
- case s_req_host:
1976
- case s_req_host_v6:
1977
- uf = UF_HOST;
1978
- break;
2092
+ case s_req_server_with_at:
2093
+ found_at = 1;
1979
2094
 
1980
- case s_req_port:
1981
- uf = UF_PORT;
2095
+ /* FALLTROUGH */
2096
+ case s_req_server:
2097
+ uf = UF_HOST;
1982
2098
  break;
1983
2099
 
1984
2100
  case s_req_path:
@@ -2011,21 +2127,17 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2011
2127
  old_uf = uf;
2012
2128
  }
2013
2129
 
2014
- /* CONNECT requests can only contain "hostname:port" */
2015
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2016
- return 1;
2130
+ /* host must be present if there is a schema */
2131
+ /* parsing http:///toto will fail */
2132
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2133
+ if (http_parse_host(buf, u, found_at) != 0) {
2134
+ return 1;
2135
+ }
2017
2136
  }
2018
2137
 
2019
- /* Make sure we don't end somewhere unexpected */
2020
- switch (s) {
2021
- case s_req_host_v6_start:
2022
- case s_req_host_v6:
2023
- case s_req_host_v6_end:
2024
- case s_req_host:
2025
- case s_req_port_start:
2138
+ /* CONNECT requests can only contain "hostname:port" */
2139
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2026
2140
  return 1;
2027
- default:
2028
- break;
2029
2141
  }
2030
2142
 
2031
2143
  if (u->field_set & (1 << UF_PORT)) {
@@ -2056,3 +2168,8 @@ http_parser_pause(http_parser *parser, int paused) {
2056
2168
  assert(0 && "Attempting to pause parser in error state");
2057
2169
  }
2058
2170
  }
2171
+
2172
+ int
2173
+ http_body_is_final(const struct http_parser *parser) {
2174
+ return parser->state == s_message_done;
2175
+ }