http_parser.rb 0.5.3-java → 0.6.0.beta.2-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/.gitmodules +3 -3
  2. data/Gemfile +1 -1
  3. data/Gemfile.lock +14 -5
  4. data/README.md +50 -45
  5. data/bench/standalone.rb +23 -0
  6. data/bench/thin.rb +1 -0
  7. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +120 -85
  8. data/ext/ruby_http_parser/ruby_http_parser.c +10 -41
  9. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  10. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  11. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  12. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  13. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1202 -671
  14. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +172 -51
  16. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  17. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +8 -3
  19. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +35 -102
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +775 -682
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +8 -4
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +70 -20
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  37. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1637 -280
  38. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  39. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +32 -0
  40. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +5 -1
  41. data/ext/ruby_http_parser/vendor/http-parser/README.md +9 -2
  42. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1029 -615
  43. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +79 -0
  44. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +145 -16
  45. data/ext/ruby_http_parser/vendor/http-parser/test.c +1065 -141
  46. data/http_parser.rb.gemspec +3 -1
  47. data/spec/parser_spec.rb +41 -17
  48. data/spec/support/requests.json +236 -24
  49. data/spec/support/responses.json +182 -36
  50. data/tasks/compile.rake +2 -2
  51. data/tasks/fixtures.rake +7 -1
  52. metadata +213 -162
  53. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  54. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  55. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
@@ -8,6 +8,7 @@ public enum HTTPMethod {
8
8
  , HTTP_HEAD("HEAD")
9
9
  , HTTP_POST("POST")
10
10
  , HTTP_PUT("PUT")
11
+ , HTTP_PATCH("PATCH")
11
12
  /* pathological */
12
13
  , HTTP_CONNECT("CONNECT")
13
14
  , HTTP_OPTIONS("OPTIONS")
@@ -28,7 +29,7 @@ public enum HTTPMethod {
28
29
  , HTTP_NOTIFY("NOTIFY")
29
30
  , HTTP_SUBSCRIBE("SUBSCRIBE")
30
31
  , HTTP_UNSUBSCRIBE("UNSUBSCRIBE")
31
-
32
+ , HTTP_PURGE("PURGE")
32
33
  ;
33
34
 
34
35
  private static Charset ASCII;
@@ -38,7 +39,7 @@ public enum HTTPMethod {
38
39
  public byte[] bytes;
39
40
 
40
41
  HTTPMethod(String name) {
41
- // good grief, Charlie Brown, the following is necessary because
42
+ // good grief, Charlie Brown, the following is necessary because
42
43
  // java is retarded:
43
44
  // illegal reference to static field from initializer
44
45
  // this.bytes = name.getBytes(ASCII);
@@ -57,6 +58,8 @@ public enum HTTPMethod {
57
58
  else if ("POST".equalsIgnoreCase(s)) {return HTTP_POST;}
58
59
  else if ("HTTP_PUT".equalsIgnoreCase(s)) {return HTTP_PUT;}
59
60
  else if ("PUT".equalsIgnoreCase(s)) {return HTTP_PUT;}
61
+ else if ("HTTP_PATCH".equalsIgnoreCase(s)) {return HTTP_PATCH;}
62
+ else if ("PATCH".equalsIgnoreCase(s)) {return HTTP_PATCH;}
60
63
  else if ("HTTP_CONNECT".equalsIgnoreCase(s)) {return HTTP_CONNECT;}
61
64
  else if ("CONNECT".equalsIgnoreCase(s)) {return HTTP_CONNECT;}
62
65
  else if ("HTTP_OPTIONS".equalsIgnoreCase(s)) {return HTTP_OPTIONS;}
@@ -93,8 +96,10 @@ public enum HTTPMethod {
93
96
  else if ("SUBSCRIBE".equalsIgnoreCase(s)) {return HTTP_SUBSCRIBE;}
94
97
  else if ("HTTP_UNSUBSCRIBE".equalsIgnoreCase(s)) {return HTTP_UNSUBSCRIBE;}
95
98
  else if ("UNSUBSCRIBE".equalsIgnoreCase(s)) {return HTTP_UNSUBSCRIBE;}
99
+ else if ("PATCH".equalsIgnoreCase(s)) {return HTTP_PATCH;}
100
+ else if ("PURGE".equalsIgnoreCase(s)) {return HTTP_PURGE;}
96
101
  else {return null;}
97
- }
102
+ }
98
103
  void init (String name) {
99
104
  ASCII = null == ASCII ? Charset.forName("US-ASCII") : ASCII;
100
105
  this.bytes = name.getBytes(ASCII);
@@ -0,0 +1,76 @@
1
+ package http_parser;
2
+
3
+ import http_parser.lolevel.*;
4
+ import http_parser.lolevel.HTTPParser;
5
+
6
+ import java.io.UnsupportedEncodingException;
7
+ import java.nio.ByteBuffer;
8
+ import java.util.Arrays;
9
+
10
+ /**
11
+ */
12
+ public class HTTPParserUrl {
13
+
14
+ public int field_set;
15
+ public int port;
16
+
17
+ public FieldData[] field_data = new FieldData[]{
18
+ new FieldData(0,0),
19
+ new FieldData(0,0),
20
+ new FieldData(0,0),
21
+ new FieldData(0,0),
22
+ new FieldData(0,0),
23
+ new FieldData(0,0)
24
+ }; //UF_MAX
25
+
26
+ public HTTPParserUrl(){}
27
+
28
+ public HTTPParserUrl(int field_set, int port, FieldData[] field_data){
29
+ this.field_set = field_set;
30
+ this.port = port;
31
+ this.field_data = field_data;
32
+ }
33
+
34
+ public String getFieldValue(HTTPParser.UrlFields field, ByteBuffer data) throws UnsupportedEncodingException {
35
+ FieldData fd = this.field_data[field.getIndex()];
36
+ if(fd.off == 0 & fd.len == 0) return "";
37
+ byte[] dst = new byte[fd.len];
38
+ int current_pos = data.position();
39
+ data.position(fd.off);
40
+ data.get(dst,0,fd.len);
41
+ data.position(current_pos);
42
+ String v = new String(dst, "UTF8");
43
+ return v;
44
+ }
45
+
46
+ @Override
47
+ public boolean equals(Object o) {
48
+ if (this == o) return true;
49
+ if (o == null || getClass() != o.getClass()) return false;
50
+
51
+ HTTPParserUrl that = (HTTPParserUrl) o;
52
+
53
+ if (field_set != that.field_set) return false;
54
+ if (port != that.port) return false;
55
+ if (!Arrays.equals(field_data, that.field_data)) return false;
56
+
57
+ return true;
58
+ }
59
+
60
+ @Override
61
+ public int hashCode() {
62
+ int result = field_set;
63
+ result = 31 * result + port;
64
+ result = 31 * result + Arrays.hashCode(field_data);
65
+ return result;
66
+ }
67
+
68
+ @Override
69
+ public String toString() {
70
+ return "HTTPParserUrl{" +
71
+ "field_set=" + field_set +
72
+ ", port=" + port +
73
+ ", field_data=" + (field_data == null ? null : Arrays.asList(field_data)) +
74
+ '}';
75
+ }
76
+ }
@@ -5,45 +5,47 @@ package http_parser;
5
5
  import primitive.collection.ByteList;
6
6
 
7
7
  public class ParserSettings extends http_parser.lolevel.ParserSettings {
8
-
8
+
9
9
  public HTTPCallback on_message_begin;
10
10
  public HTTPDataCallback on_path;
11
11
  public HTTPDataCallback on_query_string;
12
12
  public HTTPDataCallback on_url;
13
13
  public HTTPDataCallback on_fragment;
14
+ public HTTPCallback on_status_complete;
14
15
  public HTTPDataCallback on_header_field;
15
16
  public HTTPDataCallback on_header_value;
16
-
17
+
17
18
  public HTTPCallback on_headers_complete;
18
19
  public HTTPDataCallback on_body;
19
20
  public HTTPCallback on_message_complete;
20
-
21
+
21
22
  public HTTPErrorCallback on_error;
22
-
23
+
23
24
  private HTTPCallback _on_message_begin;
24
25
  private HTTPDataCallback _on_path;
25
26
  private HTTPDataCallback _on_query_string;
26
27
  private HTTPDataCallback _on_url;
27
28
  private HTTPDataCallback _on_fragment;
29
+ private HTTPCallback _on_status_complete;
28
30
  private HTTPDataCallback _on_header_field;
29
31
  private HTTPDataCallback _on_header_value;
30
32
  private HTTPCallback _on_headers_complete;
31
33
  private HTTPDataCallback _on_body;
32
34
  private HTTPCallback _on_message_complete;
33
35
  private HTTPErrorCallback _on_error;
34
-
36
+
35
37
  private http_parser.lolevel.ParserSettings settings;
36
-
38
+
37
39
  protected ByteList field = new ByteList();
38
40
  protected ByteList value = new ByteList();
39
41
  protected ByteList body = new ByteList();
40
-
42
+
41
43
  public ParserSettings() {
42
44
  this.settings = new http_parser.lolevel.ParserSettings();
43
45
  createMirrorCallbacks();
44
46
  attachCallbacks();
45
47
  }
46
-
48
+
47
49
  protected http_parser.lolevel.ParserSettings getLoLevelSettings() {
48
50
  return this.settings;
49
51
  }
@@ -93,7 +95,16 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
93
95
  return 0;
94
96
  }
95
97
  };
96
- this._on_error = new HTTPErrorCallback() {
98
+ this._on_status_complete = new HTTPCallback() {
99
+ @Override
100
+ public int cb(HTTPParser p) {
101
+ if (null != ParserSettings.this.on_status_complete) {
102
+ return ParserSettings.this.on_status_complete.cb(p);
103
+ }
104
+ return 0;
105
+ }
106
+ };
107
+ this._on_error = new HTTPErrorCallback() {
97
108
  @Override
98
109
  public void cb(HTTPParser parser, String error) {
99
110
  if (null != ParserSettings.this.on_error) {
@@ -101,11 +112,11 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
101
112
  } else {
102
113
  throw new HTTPException(error);
103
114
  }
104
-
115
+
105
116
  }
106
117
  };
107
-
108
-
118
+
119
+
109
120
 
110
121
  // (on_header_field and on_header_value shortened to on_h_*)
111
122
  // ------------------------ ------------ --------------------------------------------
@@ -142,19 +153,19 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
142
153
  ParserSettings.this.value.clear();
143
154
  }
144
155
  }
145
-
156
+
146
157
  if (null == ParserSettings.this.on_header_field) {
147
158
  return 0;
148
159
  }
149
-
160
+
150
161
  ParserSettings.this.field.addAll(by);
151
162
  return 0;
152
163
  }
153
164
  };
154
- this._on_header_value = new HTTPDataCallback() {
165
+ this._on_header_value = new HTTPDataCallback() {
155
166
  @Override
156
167
  public int cb(HTTPParser p, byte[] by, int pos, int len) {
157
-
168
+
158
169
  // previous field complete, call on_field with full field value, reset field.
159
170
  if (0 != ParserSettings.this.field.size()) {
160
171
  // check we're even interested...
@@ -167,7 +178,7 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
167
178
  ParserSettings.this.field.clear();
168
179
  }
169
180
  }
170
-
181
+
171
182
  if (null == ParserSettings.this.on_header_value) {
172
183
  return 0;
173
184
  }
@@ -195,9 +206,9 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
195
206
  }
196
207
  return 0;
197
208
  }
198
-
209
+
199
210
  };
200
- this._on_body = new HTTPDataCallback() {
211
+ this._on_body = new HTTPDataCallback() {
201
212
  @Override
202
213
  public int cb(HTTPParser p, byte[] by, int pos, int len) {
203
214
  if (null != ParserSettings.this.on_body) {
@@ -206,8 +217,8 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
206
217
  return 0;
207
218
  }
208
219
  };
209
-
210
- this._on_message_complete = new HTTPCallback() {
220
+
221
+ this._on_message_complete = new HTTPCallback() {
211
222
  @Override
212
223
  public int cb(HTTPParser parser) {
213
224
  if (null != ParserSettings.this.on_body) {
@@ -224,7 +235,7 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
224
235
  return 0;
225
236
  }
226
237
  };
227
-
238
+
228
239
  }
229
240
 
230
241
  private void attachCallbacks() {
@@ -234,90 +245,12 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
234
245
  this.settings.on_query_string = this._on_query_string;
235
246
  this.settings.on_url = this._on_url;
236
247
  this.settings.on_fragment = this._on_fragment;
248
+ this.settings.on_status_complete = this._on_status_complete;
237
249
  this.settings.on_header_field = this._on_header_field;
238
- this.settings.on_header_value = this._on_header_value;
250
+ this.settings.on_header_value = this._on_header_value;
239
251
  this.settings.on_headers_complete = this._on_headers_complete;
240
252
  this.settings.on_body = this._on_body;
241
253
  this.settings.on_message_complete = this._on_message_complete;
242
254
  this.settings.on_error = this._on_error;
243
255
  }
244
256
  }
245
- //import http_parser.HTTPException;
246
- //public class ParserSettings extends http_parser.lolevel.ParserSettings{
247
- //
248
- //
249
- //
250
- //
251
- // public HTTPCallback on_message_begin;
252
- // public HTTPDataCallback on_path;
253
- // public HTTPDataCallback on_query_string;
254
- // public HTTPDataCallback on_url;
255
- // public HTTPDataCallback on_fragment;
256
- // public HTTPDataCallback on_header_field;
257
- // public HTTPDataCallback on_header_value;
258
- // public HTTPCallback on_headers_complete;
259
- // public HTTPDataCallback on_body;
260
- // public HTTPCallback on_message_complete;
261
- // public HTTPErrorCallback on_error;
262
- //
263
- // void call_on_message_begin (HTTPParser p) {
264
- // call_on(on_message_begin, p);
265
- // }
266
- //
267
- // void call_on_message_complete (HTTPParser p) {
268
- // call_on(on_message_complete, p);
269
- // }
270
- //
271
- // // this one is a little bit different:
272
- // // the current `position` of the buffer is the location of the
273
- // // error, `ini_pos` indicates where the position of
274
- // // the buffer when it was passed to the `execute` method of the parser, i.e.
275
- // // using this information and `limit` we'll know all the valid data
276
- // // in the buffer around the error we can use to print pretty error
277
- // // messages.
278
- // void call_on_error (HTTPParser p, String mes, ByteBuffer buf, int ini_pos) {
279
- // if (null != on_error) {
280
- // on_error.cb(p, mes, buf, ini_pos);
281
- // }
282
- // // if on_error gets called it MUST throw an exception, else the parser
283
- // // will attempt to continue parsing, which it can't because it's
284
- // // in an invalid state.
285
- // throw new HTTPException(mes);
286
- // }
287
- //
288
- // void call_on_header_field (HTTPParser p, ByteBuffer buf, int pos, int len) {
289
- // call_on(on_header_field, p, buf, pos, len);
290
- // }
291
- // void call_on_query_string (HTTPParser p, ByteBuffer buf, int pos, int len) {
292
- // call_on(on_query_string, p, buf, pos, len);
293
- // }
294
- // void call_on_fragment (HTTPParser p, ByteBuffer buf, int pos, int len) {
295
- // call_on(on_fragment, p, buf, pos, len);
296
- // }
297
- // void call_on_path (HTTPParser p, ByteBuffer buf, int pos, int len) {
298
- // call_on(on_path, p, buf, pos, len);
299
- // }
300
- // void call_on_header_value (HTTPParser p, ByteBuffer buf, int pos, int len) {
301
- // call_on(on_header_value, p, buf, pos, len);
302
- // }
303
- // void call_on_url (HTTPParser p, ByteBuffer buf, int pos, int len) {
304
- // call_on(on_url, p, buf, pos, len);
305
- // }
306
- // void call_on_body(HTTPParser p, ByteBuffer buf, int pos, int len) {
307
- // call_on(on_body, p, buf, pos, len);
308
- // }
309
- // void call_on_headers_complete(HTTPParser p) {
310
- // call_on(on_headers_complete, p);
311
- // }
312
- // void call_on (HTTPCallback cb, HTTPParser p) {
313
- // // cf. CALLBACK2 macro
314
- // if (null != cb) {
315
- // cb.cb(p);
316
- // }
317
- // }
318
- // void call_on (HTTPDataCallback cb, HTTPParser p, ByteBuffer buf, int pos, int len) {
319
- // if (null != cb && -1 != pos) {
320
- // cb.cb(p,buf,pos,len);
321
- // }
322
- // }
323
- //}
@@ -30,7 +30,7 @@ public class Util {
30
30
  //
31
31
  // }
32
32
 
33
- public static String error (String mes, ByteBuffer b, int begining) {
33
+ public static String error (String mes, ByteBuffer b, int beginning) {
34
34
  // the error message should look like this:
35
35
  //
36
36
  // Bla expected something, but it's not there (mes)
@@ -50,7 +50,7 @@ public class Util {
50
50
  final int mes_width = 72;
51
51
  int p = b.position(); // error position
52
52
  int end = b.limit(); // this is the end
53
- int m = end - begining; // max mes length
53
+ int m = end - beginning; // max mes length
54
54
 
55
55
  StringBuilder builder = new StringBuilder();
56
56
  int p_adj = p;
@@ -58,9 +58,9 @@ public class Util {
58
58
  byte [] orig = new byte[0];
59
59
  if (m <= mes_width) {
60
60
  orig = new byte[m];
61
- b.position(begining);
61
+ b.position(beginning);
62
62
  b.get(orig, 0, m);
63
- p_adj = p-begining;
63
+ p_adj = p-beginning;
64
64
 
65
65
 
66
66
  } else {
@@ -73,7 +73,7 @@ public class Util {
73
73
  // CAN'T be not enough stuff aorund p in total, because
74
74
  // m>meswidth (see if to this else)
75
75
 
76
- int before = p-begining;
76
+ int before = p-beginning;
77
77
  int after = end - p;
78
78
  if ( (before > mes_width/2) && (after > mes_width/2)) {
79
79
  // plenty of stuff in front of and behind error
@@ -82,7 +82,7 @@ public class Util {
82
82
  b.get(orig, 0, mes_width);
83
83
  } else if (before <= mes_width/2) {
84
84
  // take all of the begining.
85
- b.position(begining);
85
+ b.position(beginning);
86
86
  // and as much of the rest as possible
87
87
 
88
88
  b.get(orig, 0, mes_width);
@@ -3,6 +3,7 @@ package http_parser.lolevel;
3
3
  import java.nio.ByteBuffer;
4
4
  import http_parser.HTTPException;
5
5
  import http_parser.HTTPMethod;
6
+ import http_parser.HTTPParserUrl;
6
7
  import http_parser.ParserType;
7
8
  import static http_parser.lolevel.HTTPParser.C.*;
8
9
  import static http_parser.lolevel.HTTPParser.State.*;
@@ -16,14 +17,13 @@ public class HTTPParser {
16
17
  HState header_state;
17
18
  boolean strict;
18
19
 
19
- int index;
20
+ int index;
20
21
  int flags; // TODO
21
22
 
22
23
  int nread;
23
- int content_length;
24
+ long content_length;
24
25
 
25
- int start_position;
26
- ByteBuffer data;
26
+ int p_start; // updated each call to execute to indicate where the buffer was before we began calling it.
27
27
 
28
28
  /** READ-ONLY **/
29
29
  public int http_major;
@@ -36,27 +36,25 @@ public class HTTPParser {
36
36
  * Should be checked when http_parser_execute() returns in addition to
37
37
  * error checking.
38
38
  */
39
- public boolean upgrade;
39
+ public boolean upgrade;
40
40
 
41
41
  /** PUBLIC **/
42
42
  // TODO : this is used in c to maintain application state.
43
43
  // is this even necessary? we have state in java ?
44
- // consider
44
+ // consider
45
45
  // Object data; /* A pointer to get hook to the "connection" or "socket" object */
46
-
47
46
 
48
- /*
47
+
48
+ /*
49
49
  * technically we could combine all of these (except for url_mark) into one
50
50
  * variable, saving stack space, but it seems more clear to have them
51
- * separated.
51
+ * separated.
52
52
  */
53
53
  int header_field_mark = -1;
54
54
  int header_value_mark = -1;
55
- int fragment_mark = -1;
56
- int query_string_mark = -1;
57
- int path_mark = -1;
58
55
  int url_mark = -1;
59
-
56
+ int body_mark = -1;
57
+
60
58
  /**
61
59
  * Construct a Parser for ParserType.HTTP_BOTH, meaning it
62
60
  * determines whether it's parsing a request or a response.
@@ -64,9 +62,9 @@ public class HTTPParser {
64
62
  public HTTPParser() {
65
63
  this(ParserType.HTTP_BOTH);
66
64
  }
67
-
65
+
68
66
  /**
69
- * Construct a Parser and initialise it to parse either
67
+ * Construct a Parser and initialise it to parse either
70
68
  * requests or responses.
71
69
  */
72
70
  public HTTPParser(ParserType type) {
@@ -79,18 +77,184 @@ public class HTTPParser {
79
77
  this.state = State.start_res;
80
78
  break;
81
79
  case HTTP_BOTH:
82
- this.state = State.start_res_or_res;
80
+ this.state = State.start_req_or_res;
83
81
  break;
84
82
  default:
85
83
  throw new HTTPException("can't happen, invalid ParserType enum");
86
84
  }
87
85
  }
88
-
86
+
89
87
  /*
90
88
  * Utility to facilitate System.out.println style debugging (the way god intended)
91
89
  */
92
90
  static void p(Object o) {System.out.println(o);}
93
91
 
92
+ /** Comment from C version follows
93
+ *
94
+ * Our URL parser.
95
+ *
96
+ * This is designed to be shared by http_parser_execute() for URL validation,
97
+ * hence it has a state transition + byte-for-byte interface. In addition, it
98
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
99
+ * work of turning state transitions URL components for its API.
100
+ *
101
+ * This function should only be invoked with non-space characters. It is
102
+ * assumed that the caller cares about (and can detect) the transition between
103
+ * URL and non-URL states by looking for these.
104
+ */
105
+ public State parse_url_char(byte ch) {
106
+
107
+ int chi = ch & 0xff; // utility, ch without signedness for table lookups.
108
+
109
+ if(SPACE == ch){
110
+ throw new HTTPException("space as url char");
111
+ }
112
+
113
+ switch(state) {
114
+ case req_spaces_before_url:
115
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
116
+ * All methods except CONNECT are followed by '/' or '*'.
117
+ */
118
+ if(SLASH == ch || STAR == ch){
119
+ return req_path;
120
+ }
121
+ if(isAtoZ(ch)){
122
+ return req_schema;
123
+ }
124
+ break;
125
+ case req_schema:
126
+ if(isAtoZ(ch)){
127
+ return req_schema;
128
+ }
129
+ if(COLON == ch){
130
+ return req_schema_slash;
131
+ }
132
+ break;
133
+ case req_schema_slash:
134
+ if(SLASH == ch){
135
+ return req_schema_slash_slash;
136
+ }
137
+ break;
138
+ case req_schema_slash_slash:
139
+ if(SLASH == ch){
140
+ return req_host_start;
141
+ }
142
+ break;
143
+ case req_host_start:
144
+ if (ch == (byte)'[') {
145
+ return req_host_v6_start;
146
+ }
147
+ if (isHostChar(ch)) {
148
+ return req_host;
149
+ }
150
+ break;
151
+
152
+ case req_host:
153
+ if (isHostChar(ch)) {
154
+ return req_host;
155
+ }
156
+
157
+ /* FALLTHROUGH */
158
+ case req_host_v6_end:
159
+ switch (ch) {
160
+ case ':':
161
+ return req_port_start;
162
+ case '/':
163
+ return req_path;
164
+ case '?':
165
+ return req_query_string_start;
166
+ }
167
+ break;
168
+
169
+ case req_host_v6:
170
+ if (ch == ']') {
171
+ return req_host_v6_end;
172
+ }
173
+
174
+ /* FALLTHROUGH */
175
+ case req_host_v6_start:
176
+ if (isHex(ch) || ch == ':') {
177
+ return req_host_v6;
178
+ }
179
+ break;
180
+
181
+ case req_port:
182
+ switch (ch) {
183
+ case '/':
184
+ return req_path;
185
+ case '?':
186
+ return req_query_string_start;
187
+ }
188
+
189
+ /* FALLTHROUGH */
190
+ case req_port_start:
191
+ if (isDigit(ch)) {
192
+ return req_port;
193
+ }
194
+ break;
195
+
196
+ case req_path:
197
+ if (isNormalUrlChar(chi)) {
198
+ return req_path;
199
+ }
200
+ switch (ch) {
201
+ case '?':
202
+ return req_query_string_start;
203
+ case '#':
204
+ return req_fragment_start;
205
+ }
206
+
207
+ break;
208
+
209
+ case req_query_string_start:
210
+ case req_query_string:
211
+ if (isNormalUrlChar(chi)) {
212
+ return req_query_string;
213
+ }
214
+
215
+ switch (ch) {
216
+ case '?':
217
+ /* allow extra '?' in query string */
218
+ return req_query_string;
219
+
220
+ case '#':
221
+ return req_fragment_start;
222
+ }
223
+
224
+ break;
225
+
226
+ case req_fragment_start:
227
+ if (isNormalUrlChar(chi)) {
228
+ return req_fragment;
229
+ }
230
+ switch (ch) {
231
+ case '?':
232
+ return req_fragment;
233
+
234
+ case '#':
235
+ return req_fragment_start;
236
+ }
237
+ break;
238
+
239
+ case req_fragment:
240
+ if (isNormalUrlChar(ch)) {
241
+ return req_fragment;
242
+ }
243
+
244
+ switch (ch) {
245
+ case '?':
246
+ case '#':
247
+ return req_fragment;
248
+ }
249
+
250
+ break;
251
+ default:
252
+ break;
253
+ }
254
+
255
+ /* We should never fall out of the switch above unless there's an error */
256
+ return dead;
257
+ }
94
258
 
95
259
  /** Execute the parser with the currently available data contained in
96
260
  * the buffer. The buffers position() and limit() need to be set
@@ -100,30 +264,29 @@ public class HTTPParser {
100
264
  public int execute(ParserSettings settings, ByteBuffer data) {
101
265
 
102
266
  int p = data.position();
103
- int p_err = p; // this is used for pretty printing errors.
104
-
105
- this.start_position = p;
106
- this.data = data;
267
+ this.p_start = p; // this is used for pretty printing errors.
268
+ // and returning the amount of processed bytes.
269
+
107
270
 
108
271
  // In case the headers don't provide information about the content
109
272
  // length, `execute` needs to be called with an empty buffer to
110
273
  // indicate that all the data has been send be the client/server,
111
- // else there is no way of knowing the message is complete.
274
+ // else there is no way of knowing the message is complete.
112
275
  int len = (data.limit() - data.position());
113
276
  if (0 == len) {
114
- // if (State.body_identity_eof == state) {
115
- // settings.call_on_message_complete(this);
116
- // }
277
+ // if (State.body_identity_eof == state) {
278
+ // settings.call_on_message_complete(this);
279
+ // }
117
280
  switch (state) {
118
281
  case body_identity_eof:
119
282
  settings.call_on_message_complete(this);
120
- return data.position() - start_position;
283
+ return data.position() - this.p_start;
121
284
 
122
285
  case dead:
123
- case start_res_or_res:
286
+ case start_req_or_res:
124
287
  case start_res:
125
288
  case start_req:
126
- return data.position() - start_position;
289
+ return data.position() - this.p_start;
127
290
 
128
291
  default:
129
292
  // should we really consider this an error!?
@@ -131,7 +294,7 @@ public class HTTPParser {
131
294
  }
132
295
  }
133
296
 
134
-
297
+
135
298
  // in case the _previous_ call to the parser only has data to get to
136
299
  // the middle of certain fields, we need to update marks to point at
137
300
  // the beginning of the current buffer.
@@ -142,79 +305,85 @@ public class HTTPParser {
142
305
  case header_value:
143
306
  header_value_mark = p;
144
307
  break;
145
- case req_fragment:
146
- fragment_mark = p;
147
- url_mark = p;
148
- break;
149
- case req_query_string:
150
- query_string_mark = p;
151
- url_mark = p;
152
- break;
153
308
  case req_path:
154
- path_mark = p;
155
-
156
- case req_host:
157
309
  case req_schema:
158
310
  case req_schema_slash:
159
311
  case req_schema_slash_slash:
312
+ case req_host_start:
313
+ case req_host_v6_start:
314
+ case req_host_v6:
315
+ case req_host_v6_end:
316
+ case req_host:
317
+ case req_port_start:
160
318
  case req_port:
161
319
  case req_query_string_start:
320
+ case req_query_string:
162
321
  case req_fragment_start:
322
+ case req_fragment:
163
323
  url_mark = p;
164
324
  break;
165
325
  }
326
+ boolean reexecute = false;
327
+ int pe = 0;
328
+ byte ch = 0;
329
+ int chi = 0;
330
+ byte c = -1;
331
+ int to_read = 0;
166
332
 
167
333
  // this is where the work gets done, traverse the available data...
168
- while (data.position() != data.limit()) {
169
-
170
- p = data.position();
171
- int pe = data.limit();
172
-
173
- byte ch = data.get(); // the current character to process.
174
- int chi = ch & 0xff; // utility, ch without signedness for table lookups.
175
- byte c = -1; // utility variably used for up- and downcasing etc.
176
- int to_read = 0; // used to keep track of how much of body, etc. is left to read
177
-
178
- if (parsing_header(state)) {
179
- ++nread;
180
- if (nread > HTTP_MAX_HEADER_SIZE) {
181
- settings.call_on_error(this, "possible buffer overflow", data, p_err);
182
- return error();
334
+ while (data.position() != data.limit() || reexecute) {
335
+ // p(state + ": r: " + reexecute + " :: " +p );
336
+
337
+ if(!reexecute){
338
+ p = data.position();
339
+ pe = data.limit();
340
+ ch = data.get(); // the current character to process.
341
+ chi = ch & 0xff; // utility, ch without signedness for table lookups.
342
+ c = -1; // utility variably used for up- and downcasing etc.
343
+ to_read = 0; // used to keep track of how much of body, etc. is left to read
344
+
345
+ if (parsing_header(state)) {
346
+ ++nread;
347
+ if (nread > HTTP_MAX_HEADER_SIZE) {
348
+ return error(settings, "possible buffer overflow", data);
349
+ }
183
350
  }
184
351
  }
185
- //p(state + ":" + ch +":"+p);
352
+ reexecute = false;
353
+ // p(state + " ::: " + ch + " : " + (((CR == ch) || (LF == ch)) ? ch : ("'" + (char)ch + "'")) +": "+p );
354
+
186
355
  switch (state) {
187
356
  /*
188
357
  * this state is used after a 'Connection: close' message
189
358
  * the parser will error out if it reads another message
190
359
  */
191
360
  case dead:
192
- settings.call_on_error(this, "Connection already closed", data, p_err);
193
- return error();
361
+ if (CR == ch || LF == ch){
362
+ break;
363
+ }
364
+ return error(settings, "Connection already closed", data);
194
365
 
195
366
 
196
367
 
197
- case start_res_or_res:
368
+ case start_req_or_res:
198
369
  if (CR == ch || LF == ch){
199
370
  break;
200
371
  }
201
372
  flags = 0;
202
373
  content_length = -1;
203
374
 
204
- settings.call_on_message_begin(this);
205
-
206
- if (H == ch) {
375
+ if (H == ch) {
207
376
  state = State.res_or_resp_H;
208
377
  } else {
209
- type = ParserType.HTTP_REQUEST;
210
- method = start_req_method_assign(ch);
378
+ type = ParserType.HTTP_REQUEST;
379
+ method = start_req_method_assign(ch);
211
380
  if (null == method) {
212
- settings.call_on_error(this, "invalid method", data, p_err);
213
- return error();
381
+ return error(settings, "invalid method", data);
214
382
  }
215
383
  index = 1;
216
384
  state = State.req_method;
217
385
  }
386
+ settings.call_on_message_begin(this);
218
387
  break;
219
388
 
220
389
 
@@ -225,8 +394,7 @@ public class HTTPParser {
225
394
  state = State.res_HT;
226
395
  } else {
227
396
  if (E != ch) {
228
- settings.call_on_error(this, "not E", data, p_err);
229
- return error();
397
+ return error(settings, "not E", data);
230
398
  }
231
399
  type = ParserType.HTTP_REQUEST;
232
400
  method = HTTPMethod.HTTP_HEAD;
@@ -241,8 +409,6 @@ public class HTTPParser {
241
409
  flags = 0;
242
410
  content_length = -1;
243
411
 
244
- settings.call_on_message_begin(this);
245
-
246
412
  switch(ch) {
247
413
  case H:
248
414
  state = State.res_H;
@@ -251,38 +417,35 @@ public class HTTPParser {
251
417
  case LF:
252
418
  break;
253
419
  default:
254
- settings.call_on_error(this, "Not H or CR/LF", data, p_err);
255
- return error();
420
+ return error(settings, "Not H or CR/LF", data);
256
421
  }
422
+
423
+ settings.call_on_message_begin(this);
257
424
  break;
258
425
 
259
426
 
260
427
 
261
428
  case res_H:
262
429
  if (strict && T != ch) {
263
- settings.call_on_error(this, "Not T", data, p_err);
264
- return error();
430
+ return error(settings, "Not T", data);
265
431
  }
266
432
  state = State.res_HT;
267
433
  break;
268
434
  case res_HT:
269
435
  if (strict && T != ch) {
270
- settings.call_on_error(this, "Not T2", data, p_err);
271
- return error();
436
+ return error(settings, "Not T2", data);
272
437
  }
273
438
  state = State.res_HTT;
274
439
  break;
275
440
  case res_HTT:
276
441
  if (strict && P != ch) {
277
- settings.call_on_error(this, "Not P", data, p_err);
278
- return error();
442
+ return error(settings, "Not P", data);
279
443
  }
280
444
  state = State.res_HTTP;
281
445
  break;
282
446
  case res_HTTP:
283
447
  if (strict && SLASH != ch) {
284
- settings.call_on_error(this, "Not '/'", data, p_err);
285
- return error();
448
+ return error(settings, "Not '/'", data);
286
449
  }
287
450
  state = State.res_first_http_major;
288
451
  break;
@@ -291,8 +454,7 @@ public class HTTPParser {
291
454
 
292
455
  case res_first_http_major:
293
456
  if (!isDigit(ch)) {
294
- settings.call_on_error(this, "Not a digit", data, p_err);
295
- return error();
457
+ return error(settings, "Not a digit", data);
296
458
  }
297
459
  http_major = (int) ch - 0x30;
298
460
  state = State.res_http_major;
@@ -305,23 +467,20 @@ public class HTTPParser {
305
467
  break;
306
468
  }
307
469
  if (!isDigit(ch)) {
308
- settings.call_on_error(this, "Not a digit", data, p_err);
309
- return error();
470
+ return error(settings, "Not a digit", data);
310
471
  }
311
472
  http_major *= 10;
312
473
  http_major += (ch - 0x30);
313
474
 
314
475
  if (http_major > 999) {
315
- settings.call_on_error(this, "invalid http major version: "+http_major, data, p_err);
316
- return error();
476
+ return error(settings, "invalid http major version: ", data);
317
477
  }
318
478
  break;
319
-
479
+
320
480
  /* first digit of minor HTTP version */
321
481
  case res_first_http_minor:
322
482
  if (!isDigit(ch)) {
323
- settings.call_on_error(this, "Not a digit", data, p_err);
324
- return error();
483
+ return error(settings, "Not a digit", data);
325
484
  }
326
485
  http_minor = (int)ch - 0x30;
327
486
  state = State.res_http_minor;
@@ -334,14 +493,12 @@ public class HTTPParser {
334
493
  break;
335
494
  }
336
495
  if (!isDigit(ch)) {
337
- settings.call_on_error(this, "Not a digit", data, p_err);
338
- return error();
496
+ return error(settings, "Not a digit", data);
339
497
  }
340
498
  http_minor *= 10;
341
499
  http_minor += (ch - 0x30);
342
500
  if (http_minor > 999) {
343
- settings.call_on_error(this, "invalid http minor version: "+http_minor, data, p_err);
344
- return error();
501
+ return error(settings, "invalid http minor version: ", data);
345
502
  }
346
503
  break;
347
504
 
@@ -352,8 +509,7 @@ public class HTTPParser {
352
509
  if (SPACE == ch) {
353
510
  break;
354
511
  }
355
- settings.call_on_error(this, "Not a digit (status code)", data, p_err);
356
- return error();
512
+ return error(settings, "Not a digit (status code)", data);
357
513
  }
358
514
  status_code = (int)ch - 0x30;
359
515
  state = State.res_status_code;
@@ -372,29 +528,31 @@ public class HTTPParser {
372
528
  state = State.header_field_start;
373
529
  break;
374
530
  default:
375
- settings.call_on_error(this, "not a valid status code", data, p_err);
376
- return error();
531
+ return error(settings, "not a valid status code", data);
377
532
  }
378
533
  break;
379
534
  }
380
535
  status_code *= 10;
381
536
  status_code += (int)ch - 0x30;
382
537
  if (status_code > 999) {
383
- settings.call_on_error(this, "ridiculous status code:"+status_code, data, p_err);
384
- return error();
538
+ return error(settings, "ridiculous status code:", data);
539
+ }
540
+
541
+ if (status_code > 99) {
542
+ settings.call_on_status_complete(this);
385
543
  }
386
544
  break;
387
545
 
388
546
  case res_status:
389
547
  /* the human readable status. e.g. "NOT FOUND"
390
- * we are not humans so just ignore this
548
+ * we are not humans so just ignore this
391
549
  * we are not men, we are devo. */
392
550
 
393
551
  if (CR == ch) {
394
552
  state = State.res_line_almost_done;
395
553
  break;
396
554
  }
397
- if (LF == ch) {
555
+ if (LF == ch) {
398
556
  state = State.header_field_start;
399
557
  break;
400
558
  }
@@ -402,8 +560,7 @@ public class HTTPParser {
402
560
 
403
561
  case res_line_almost_done:
404
562
  if (strict && LF != ch) {
405
- settings.call_on_error(this, "not LF", data, p_err);
406
- return error();
563
+ return error(settings, "not LF", data);
407
564
  }
408
565
  state = State.header_field_start;
409
566
  break;
@@ -416,24 +573,28 @@ public class HTTPParser {
416
573
  }
417
574
  flags = 0;
418
575
  content_length = -1;
419
- settings.call_on_message_begin(this);
576
+
577
+ if(!isAtoZ(ch)){
578
+ return error(settings, "invalid method", data);
579
+ }
580
+
420
581
  method = start_req_method_assign(ch);
421
582
  if (null == method) {
422
- settings.call_on_error(this, "invalid method", data, p_err);
423
- return error();
583
+ return error(settings, "invalid method", data);
424
584
  }
425
585
  index = 1;
426
586
  state = State.req_method;
587
+
588
+ settings.call_on_message_begin(this);
427
589
  break;
428
-
590
+
429
591
 
430
592
 
431
593
  case req_method:
432
594
  if (0 == ch) {
433
- settings.call_on_error(this, "NULL in method", data, p_err);
434
- return error();
595
+ return error(settings, "NULL in method", data);
435
596
  }
436
-
597
+
437
598
  byte [] arr = method.bytes;
438
599
 
439
600
  if (SPACE == ch && index == arr.length) {
@@ -456,22 +617,33 @@ public class HTTPParser {
456
617
  } else if (2 == index && A == ch) {
457
618
  method = HTTPMethod.HTTP_MKACTIVITY;
458
619
  }
459
- } else if (1 == index && HTTPMethod.HTTP_POST == method && R == ch) {
460
- method = HTTPMethod.HTTP_PROPFIND;
461
- } else if (1 == index && HTTPMethod.HTTP_POST == method && U == ch) {
462
- method = HTTPMethod.HTTP_PUT;
463
- } else if (2 == index && HTTPMethod.HTTP_UNLOCK == method && S == ch) {
464
- method = HTTPMethod.HTTP_UNSUBSCRIBE;
465
- } else if (4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch) {
620
+ } else if (1 == index && HTTPMethod.HTTP_POST == method) {
621
+ if(R == ch) {
622
+ method = HTTPMethod.HTTP_PROPFIND; /* or HTTP_PROPPATCH */
623
+ }else if(U == ch){
624
+ method = HTTPMethod.HTTP_PUT; /* or HTTP_PURGE */
625
+ }else if(A == ch){
626
+ method = HTTPMethod.HTTP_PATCH;
627
+ }
628
+ } else if (2 == index) {
629
+ if(HTTPMethod.HTTP_PUT == method) {
630
+ if(R == ch){
631
+ method = HTTPMethod.HTTP_PURGE;
632
+ }
633
+ }else if(HTTPMethod.HTTP_UNLOCK == method){
634
+ if(S == ch){
635
+ method = HTTPMethod.HTTP_UNSUBSCRIBE;
636
+ }
637
+ }
638
+ }else if(4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch){
466
639
  method = HTTPMethod.HTTP_PROPPATCH;
467
640
  } else {
468
- settings.call_on_error(this, "Invalid HTTP method", data, p_err);
469
- return error();
641
+ return error(settings, "Invalid HTTP method", data);
470
642
  }
471
643
 
472
644
  ++index;
473
645
  break;
474
-
646
+
475
647
 
476
648
 
477
649
  /******************* URL *******************/
@@ -479,324 +651,68 @@ public class HTTPParser {
479
651
  if (SPACE == ch) {
480
652
  break;
481
653
  }
482
- if (SLASH == ch || STAR == ch) {
483
- url_mark = p;
484
- path_mark = p;
485
- state = State.req_path;
486
- break;
487
- }
488
- if (isAtoZ(ch)) {
489
- url_mark = p;
490
- state = State.req_schema;
491
- break;
492
- }
493
- settings.call_on_error(this, "Invalid something", data, p_err);
494
- return error();
495
-
496
- case req_schema:
497
- if (isAtoZ(ch)){
498
- break;
499
- }
500
- if (COLON == ch) {
501
- state = State.req_schema_slash;
502
- break;
503
- } else if (DOT == ch || isDigit(ch)) {
504
- state = State.req_host;
505
- break;
654
+ url_mark = p;
655
+ if(HTTPMethod.HTTP_CONNECT == method){
656
+ state = req_host_start;
506
657
  }
507
- settings.call_on_error(this, "invalid char in schema: "+ch, data, p_err);
508
- return error();
509
658
 
510
- case req_schema_slash:
511
- if (strict && SLASH != ch) {
512
- settings.call_on_error(this, "invalid char in schema, not /", data, p_err);
513
- return error();
659
+ state = parse_url_char(ch);
660
+ if(state == dead){
661
+ return error(settings, "Invalid something", data);
514
662
  }
515
- state = State.req_schema_slash_slash;
516
663
  break;
517
664
 
665
+
666
+ case req_schema:
667
+ case req_schema_slash:
518
668
  case req_schema_slash_slash:
519
- if (strict && SLASH != ch) {
520
- settings.call_on_error(this, "invalid char in schema, not /", data, p_err);
521
- return error();
522
- }
523
- state = State.req_host;
524
- break;
525
-
526
- case req_host:
527
- if (isAtoZ(ch)) {
528
- break;
529
- }
530
- if (isDigit(ch) || DOT == ch || DASH == ch) break;
669
+ case req_host_start:
670
+ case req_host_v6_start:
671
+ case req_host_v6:
672
+ case req_port_start:
531
673
  switch (ch) {
532
- case COLON:
533
- state = State.req_port;
534
- break;
535
- case SLASH:
536
- path_mark = p;
537
- break;
674
+ /* No whitespace allowed here */
538
675
  case SPACE:
539
- /* The request line looks like:
540
- * "GET http://foo.bar.com HTTP/1.1"
541
- * That is, there is no path.
542
- */
543
- settings.call_on_url(this, data, url_mark, p-url_mark);
544
- url_mark = -1;
545
- state = State.req_http_start;
546
- break;
547
- case QMARK:
548
- state = State.req_query_string_start;
549
- break;
676
+ case CR:
677
+ case LF:
678
+ return error(settings, "unexpected char in path", data);
550
679
  default:
551
- settings.call_on_error(this, "host error in method line", data, p_err);
552
- return error();
680
+ state = parse_url_char(ch);
681
+ if(dead == state){
682
+ return error(settings, "unexpected char in path", data);
683
+ }
553
684
  }
554
685
  break;
555
686
 
687
+ case req_host:
688
+ case req_host_v6_end:
556
689
  case req_port:
557
- if (isDigit(ch)) break;
558
- switch (ch) {
559
- case SLASH:
560
- path_mark = p;
561
- state = State.req_path;
562
- break;
563
- case SPACE:
564
- /* The request line looks like:
565
- * "GET http://foo.bar.com:1234 HTTP/1.1"
566
- * That is, there is no path.
567
- */
568
- settings.call_on_url(this,data,url_mark,p-url_mark);
569
- url_mark = -1;
570
- state = State.req_http_start;
571
- break;
572
- case QMARK:
573
- state = State.req_query_string_start;
574
- break;
575
- default:
576
- settings.call_on_error(this, "invalid port", data, p_err);
577
- return error();
578
- }
579
- break;
580
-
581
690
  case req_path:
582
- if (normal_url_char[chi]) break;
583
- switch (ch) {
584
- case SPACE:
585
- settings.call_on_url(this,data,url_mark, p-url_mark);
586
- url_mark = -1;
587
-
588
- settings.call_on_path(this,data,path_mark, p-path_mark);
589
- path_mark = -1;
590
-
591
- state = State.req_http_start;
592
- break;
593
-
594
- case CR:
595
- settings.call_on_url(this,data,url_mark, p-url_mark);
596
- url_mark = -1;
597
-
598
- settings.call_on_path(this,data,path_mark, p-path_mark);
599
- path_mark = -1;
600
-
601
- http_minor = 9;
602
- state = State.res_line_almost_done;
603
- break;
604
-
605
- case LF:
606
- settings.call_on_url(this,data,url_mark, p-url_mark);
607
- url_mark = -1;
608
-
609
- settings.call_on_path(this,data,path_mark, p-path_mark);
610
- path_mark = -1;
611
-
612
- http_minor = 9;
613
- state = State.header_field_start;
614
- break;
615
-
616
- case QMARK:
617
- settings.call_on_path(this,data,path_mark, p-path_mark);
618
- path_mark = -1;
619
-
620
- state = State.req_query_string_start;
621
- break;
622
-
623
- case HASH:
624
- settings.call_on_path(this,data,path_mark, p-path_mark);
625
- path_mark = -1;
626
-
627
- state = State.req_fragment_start;
628
- break;
629
-
630
- default:
631
- settings.call_on_error(this, "unexpected char in path", data, p_err);
632
- return error();
633
- }
634
- break;
635
-
636
691
  case req_query_string_start:
637
- if (normal_url_char[chi]) {
638
- query_string_mark = p;
639
- state = State.req_query_string;
640
- break;
641
- }
642
-
643
- switch (ch) {
644
- case QMARK: break;
645
- case SPACE:
646
- settings.call_on_url(this, data, url_mark, p-url_mark);
647
- url_mark = -1;
648
- state = State.req_http_start;
649
- break;
650
- case CR:
651
- settings.call_on_url(this,data,url_mark, p-url_mark);
652
- url_mark = -1;
653
- http_minor = 9;
654
- state = State.res_line_almost_done;
655
- break;
656
- case LF:
657
- settings.call_on_url(this,data,url_mark, p-url_mark);
658
- url_mark = -1;
659
- http_minor = 9;
660
- state = State.header_field_start;
661
- break;
662
- case HASH:
663
- state = State.req_fragment_start;
664
- break;
665
- default:
666
- settings.call_on_error(this, "unexpected char in path", data, p_err);
667
- return error();
668
- }
669
- break;
670
-
671
692
  case req_query_string:
672
- if (normal_url_char[chi]) {
673
- break;
674
- }
675
-
676
- switch (ch) {
677
- case QMARK: break; // allow extra '?' in query string
678
- case SPACE:
679
- settings.call_on_url(this, data, url_mark, p-url_mark);
680
- url_mark = -1;
681
-
682
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
683
- query_string_mark = -1;
684
-
685
- state = State.req_http_start;
686
- break;
687
- case CR:
688
- settings.call_on_url(this,data,url_mark, p-url_mark);
689
- url_mark = -1;
690
-
691
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
692
- query_string_mark = -1;
693
-
694
- http_minor = 9;
695
- state = State.res_line_almost_done;
696
- break;
697
- case LF:
698
- settings.call_on_url(this,data,url_mark, p-url_mark);
699
- url_mark = -1;
700
-
701
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
702
- query_string_mark = -1;
703
- http_minor = 9;
704
-
705
- state = State.header_field_start;
706
- break;
707
- case HASH:
708
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
709
- query_string_mark = -1;
710
-
711
- state = State.req_fragment_start;
712
- break;
713
- default:
714
- settings.call_on_error(this, "unexpected char in path", data, p_err);
715
- return error();
716
- }
717
- break;
718
-
719
693
  case req_fragment_start:
720
- if (normal_url_char[chi]) {
721
- fragment_mark = p;
722
- state = State.req_fragment;
723
- break;
724
- }
725
-
726
- switch (ch) {
727
- case SPACE:
728
- settings.call_on_url(this, data, url_mark, p-url_mark);
729
- url_mark = -1;
730
-
731
- state = State.req_http_start;
732
- break;
733
- case CR:
734
- settings.call_on_url(this,data,url_mark, p-url_mark);
735
- url_mark = -1;
736
-
737
- http_minor = 9;
738
- state = State.res_line_almost_done;
739
- break;
740
- case LF:
741
- settings.call_on_url(this,data,url_mark, p-url_mark);
742
- url_mark = -1;
743
-
744
- http_minor = 9;
745
- state = State.header_field_start;
746
- break;
747
- case QMARK:
748
- fragment_mark = p;
749
- state = State.req_fragment;
750
- break;
751
- case HASH:
752
- break;
753
- default:
754
- settings.call_on_error(this, "unexpected char in path", data, p_err);
755
- return error();
756
- }
757
- break;
758
-
759
694
  case req_fragment:
760
- if (normal_url_char[chi]) {
761
- break;
762
- }
763
-
764
695
  switch (ch) {
765
- case SPACE:
696
+ case SPACE:
766
697
  settings.call_on_url(this, data, url_mark, p-url_mark);
698
+ settings.call_on_path(this, data, url_mark, p - url_mark);
767
699
  url_mark = -1;
768
-
769
- settings.call_on_fragment(this, data, fragment_mark, p-fragment_mark);
770
- fragment_mark = -1;
771
-
772
700
  state = State.req_http_start;
773
701
  break;
774
702
  case CR:
775
- settings.call_on_url(this,data,url_mark, p-url_mark);
776
- url_mark = -1;
777
-
778
- settings.call_on_fragment(this, data, query_string_mark, p-query_string_mark);
779
- fragment_mark = -1;
780
-
781
- http_minor = 9;
782
- state = State.res_line_almost_done;
783
- break;
784
703
  case LF:
785
- settings.call_on_url(this,data,url_mark, p-url_mark);
786
- url_mark = -1;
787
-
788
- settings.call_on_fragment(this, data, query_string_mark, p-query_string_mark);
789
- fragment_mark = -1;
790
-
704
+ http_major = 0;
791
705
  http_minor = 9;
792
- state = State.header_field_start;
793
- break;
794
- case QMARK:
795
- case HASH:
706
+ state = (CR == ch) ? req_line_almost_done : header_field_start;
707
+ settings.call_on_url(this, data, url_mark, p-url_mark); //TODO check params!!!
708
+ settings.call_on_path(this, data, url_mark, p-url_mark);
709
+ url_mark = -1;
796
710
  break;
797
711
  default:
798
- settings.call_on_error(this, "unexpected char in path", data, p_err);
799
- return error();
712
+ state = parse_url_char(ch);
713
+ if(dead == state){
714
+ return error(settings, "unexpected char in path", data);
715
+ }
800
716
  }
801
717
  break;
802
718
  /******************* URL *******************/
@@ -812,39 +728,34 @@ public class HTTPParser {
812
728
  case SPACE:
813
729
  break;
814
730
  default:
815
- settings.call_on_error(this, "error in req_http_H", data, p_err);
816
- return error();
731
+ return error(settings, "error in req_http_H", data);
817
732
  }
818
733
  break;
819
734
 
820
735
  case req_http_H:
821
736
  if (strict && T != ch) {
822
- settings.call_on_error(this, "unexpected char", data, p_err);
823
- return error();
737
+ return error(settings, "unexpected char", data);
824
738
  }
825
739
  state = State.req_http_HT;
826
740
  break;
827
741
 
828
742
  case req_http_HT:
829
743
  if (strict && T != ch) {
830
- settings.call_on_error(this, "unexpected char", data, p_err);
831
- return error();
744
+ return error(settings, "unexpected char", data);
832
745
  }
833
746
  state = State.req_http_HTT;
834
747
  break;
835
748
 
836
749
  case req_http_HTT:
837
750
  if (strict && P != ch) {
838
- settings.call_on_error(this, "unexpected char", data, p_err);
839
- return error();
751
+ return error(settings, "unexpected char", data);
840
752
  }
841
753
  state = State.req_http_HTTP;
842
754
  break;
843
755
 
844
756
  case req_http_HTTP:
845
757
  if (strict && SLASH != ch) {
846
- settings.call_on_error(this, "unexpected char", data, p_err);
847
- return error();
758
+ return error(settings, "unexpected char", data);
848
759
  }
849
760
  state = req_first_http_major;
850
761
  break;
@@ -852,8 +763,7 @@ public class HTTPParser {
852
763
  /* first digit of major HTTP version */
853
764
  case req_first_http_major:
854
765
  if (!isDigit(ch)) {
855
- settings.call_on_error(this, "non digit in http major", data, p_err);
856
- return error();
766
+ return error(settings, "non digit in http major", data);
857
767
  }
858
768
  http_major = (int)ch - 0x30;
859
769
  state = State.req_http_major;
@@ -867,24 +777,21 @@ public class HTTPParser {
867
777
  }
868
778
 
869
779
  if (!isDigit(ch)) {
870
- settings.call_on_error(this, "non digit in http major", data, p_err);
871
- return error();
780
+ return error(settings, "non digit in http major", data);
872
781
  }
873
782
 
874
783
  http_major *= 10;
875
784
  http_major += (int)ch - 0x30;
876
785
 
877
786
  if (http_major > 999) {
878
- settings.call_on_error(this, "ridiculous http major", data, p_err);
879
- return error();
787
+ return error(settings, "ridiculous http major", data);
880
788
  };
881
789
  break;
882
-
790
+
883
791
  /* first digit of minor HTTP version */
884
792
  case req_first_http_minor:
885
793
  if (!isDigit(ch)) {
886
- settings.call_on_error(this, "non digit in http minor", data, p_err);
887
- return error();
794
+ return error(settings, "non digit in http minor", data);
888
795
  }
889
796
  http_minor = (int)ch - 0x30;
890
797
  state = State.req_http_minor;
@@ -904,29 +811,26 @@ public class HTTPParser {
904
811
  /* XXX allow spaces after digit? */
905
812
 
906
813
  if (!isDigit(ch)) {
907
- settings.call_on_error(this, "non digit in http minor", data, p_err);
908
- return error();
814
+ return error(settings, "non digit in http minor", data);
909
815
  }
910
816
 
911
817
  http_minor *= 10;
912
818
  http_minor += (int)ch - 0x30;
913
819
 
914
-
820
+
915
821
  if (http_minor > 999) {
916
- settings.call_on_error(this, "ridiculous http minor", data, p_err);
917
- return error();
822
+ return error(settings, "ridiculous http minor", data);
918
823
  };
919
-
824
+
920
825
  break;
921
826
 
922
827
  /* end of request line */
923
828
  case req_line_almost_done:
924
829
  {
925
830
  if (ch != LF) {
926
- settings.call_on_error(this, "missing LF after request line", data, p_err);
927
- return error();
831
+ return error(settings, "missing LF after request line", data);
928
832
  }
929
- state = State.header_field_start;
833
+ state = header_field_start;
930
834
  break;
931
835
  }
932
836
 
@@ -938,7 +842,7 @@ public class HTTPParser {
938
842
  case header_field_start:
939
843
  {
940
844
  if (ch == CR) {
941
- state = State.headers_almost_done;
845
+ state = headers_almost_done;
942
846
  break;
943
847
  }
944
848
 
@@ -946,22 +850,15 @@ public class HTTPParser {
946
850
  /* they might be just sending \n instead of \r\n so this would be
947
851
  * the second \n to denote the end of headers*/
948
852
  state = State.headers_almost_done;
949
- if (!headers_almost_done(ch, settings)) {
950
- settings.call_on_error(this, "header not properly completed", data, p_err);
951
- return error();
952
- }
953
- if (upgrade) {
954
- return data.position() - start_position;
955
- }
853
+ reexecute = true;
956
854
  break;
957
855
  }
958
856
 
959
857
  c = token(ch);
960
858
 
961
859
  if (0 == c) {
962
- settings.call_on_error(this, "invalid char in header:"+c, data, p_err);
963
- return error();
964
- };
860
+ return error(settings, "invalid char in header:", data);
861
+ }
965
862
 
966
863
  header_field_mark = p;
967
864
 
@@ -969,7 +866,7 @@ public class HTTPParser {
969
866
  state = State.header_field;
970
867
 
971
868
  switch (c) {
972
- case C:
869
+ case C:
973
870
  header_state = HState.C;
974
871
  break;
975
872
 
@@ -997,7 +894,7 @@ public class HTTPParser {
997
894
  case header_field:
998
895
  {
999
896
  c = token(ch);
1000
- if (0 != c) {
897
+ if (0 != c) {
1001
898
  switch (header_state) {
1002
899
  case general:
1003
900
  break;
@@ -1090,8 +987,7 @@ public class HTTPParser {
1090
987
  break;
1091
988
 
1092
989
  default:
1093
- settings.call_on_error(this, "Unknown Header State", data, p_err);
1094
- return error();
990
+ return error(settings, "Unknown Header State", data);
1095
991
  } // switch: header_state
1096
992
  break;
1097
993
  } // 0 != c
@@ -1107,7 +1003,7 @@ public class HTTPParser {
1107
1003
  if (CR == ch) {
1108
1004
  state = State.header_almost_done;
1109
1005
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1110
-
1006
+
1111
1007
  header_field_mark = -1;
1112
1008
  break;
1113
1009
  }
@@ -1115,20 +1011,19 @@ public class HTTPParser {
1115
1011
  if (ch == LF) {
1116
1012
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1117
1013
  header_field_mark = -1;
1118
-
1014
+
1119
1015
  state = State.header_field_start;
1120
1016
  break;
1121
1017
  }
1122
1018
 
1123
- settings.call_on_error(this, "invalid header field", data, p_err);
1124
- return error();
1019
+ return error(settings, "invalid header field", data);
1125
1020
  }
1126
1021
 
1127
1022
 
1128
1023
 
1129
1024
  case header_value_start:
1130
1025
  {
1131
- if (SPACE == ch) break;
1026
+ if ((SPACE == ch) || (TAB == ch)) break;
1132
1027
 
1133
1028
  header_value_mark = p;
1134
1029
 
@@ -1148,7 +1043,7 @@ public class HTTPParser {
1148
1043
  if (LF == ch) {
1149
1044
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1150
1045
  header_value_mark = -1;
1151
-
1046
+
1152
1047
  state = State.header_field_start;
1153
1048
  break;
1154
1049
  }
@@ -1173,9 +1068,8 @@ public class HTTPParser {
1173
1068
 
1174
1069
  case content_length:
1175
1070
  if (!isDigit(ch)) {
1176
- settings.call_on_error(this, "Content-Length not numeric", data, p_err);
1177
- return error();
1178
- }
1071
+ return error(settings, "Content-Length not numeric", data);
1072
+ }
1179
1073
  content_length = (int)ch - 0x30;
1180
1074
  break;
1181
1075
 
@@ -1214,11 +1108,8 @@ public class HTTPParser {
1214
1108
  if (LF == ch) {
1215
1109
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1216
1110
  header_value_mark = -1;
1217
-
1218
- if (!header_almost_done(ch)) {
1219
- settings.call_on_error(this,"incorrect header ending, expection LF", data, p_err);
1220
- return error();
1221
- }
1111
+ state = header_almost_done;
1112
+ reexecute = true;
1222
1113
  break;
1223
1114
  }
1224
1115
 
@@ -1229,20 +1120,26 @@ public class HTTPParser {
1229
1120
 
1230
1121
  case connection:
1231
1122
  case transfer_encoding:
1232
- settings.call_on_error(this, "Shouldn't be here", data, p_err);
1233
- return error();
1123
+ return error(settings, "Shouldn't be here", data);
1234
1124
 
1235
1125
  case content_length:
1236
1126
  if (SPACE == ch) {
1237
1127
  break;
1238
1128
  }
1239
1129
  if (!isDigit(ch)) {
1240
- settings.call_on_error(this, "Content-Length not numeric", data, p_err);
1241
- return error();
1242
- }
1130
+ return error(settings, "Content-Length not numeric", data);
1131
+ }
1132
+
1133
+ long t = content_length;
1134
+ t *= 10;
1135
+ t += (long)ch - 0x30;
1243
1136
 
1244
- content_length *= 10;
1245
- content_length += (int)ch - 0x30;
1137
+ /* Overflow? */
1138
+ // t will wrap and become negative ...
1139
+ if (t < content_length) {
1140
+ return error(settings, "Invalid content length", data);
1141
+ }
1142
+ content_length = t;
1246
1143
  break;
1247
1144
 
1248
1145
  /* Transfer-Encoding: chunked */
@@ -1293,21 +1190,121 @@ public class HTTPParser {
1293
1190
 
1294
1191
  case header_almost_done:
1295
1192
  if (!header_almost_done(ch)) {
1296
- settings.call_on_error(this,"incorrect header ending, expection LF", data, p_err);
1297
- return error();
1193
+ return error(settings, "incorrect header ending, expecting LF", data);
1194
+ }
1195
+ break;
1196
+
1197
+ case header_value_lws:
1198
+ if (SPACE == ch || TAB == ch ){
1199
+ state = header_value_start;
1200
+ } else {
1201
+ state = header_field_start;
1202
+ reexecute = true;
1298
1203
  }
1299
1204
  break;
1300
1205
 
1301
1206
  case headers_almost_done:
1302
- if (!headers_almost_done(ch, settings)) {
1303
- settings.call_on_error(this, "header not properly completed", data, p_err);
1304
- return error();
1207
+ if (LF != ch) {
1208
+ return error(settings, "header not properly completed", data);
1209
+ }
1210
+ if (0 != (flags & F_TRAILING)) {
1211
+ /* End of a chunked request */
1212
+ state = new_message();
1213
+ settings.call_on_headers_complete(this);
1214
+ settings.call_on_message_complete(this);
1215
+ break;
1305
1216
  }
1306
- if (upgrade) {
1307
- return data.position()-start_position ;
1217
+
1218
+ state = headers_done;
1219
+
1220
+ if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1221
+ upgrade = true;
1308
1222
  }
1223
+
1224
+ /* Here we call the headers_complete callback. This is somewhat
1225
+ * different than other callbacks because if the user returns 1, we
1226
+ * will interpret that as saying that this message has no body. This
1227
+ * is needed for the annoying case of recieving a response to a HEAD
1228
+ * request.
1229
+ */
1230
+
1231
+ /* (responses to HEAD request contain a CONTENT-LENGTH header
1232
+ * but no content)
1233
+ *
1234
+ * Consider what to do here: I don't like the idea of the callback
1235
+ * interface having a different contract in the case of HEAD
1236
+ * responses. The alternatives would be either to:
1237
+ *
1238
+ * a.) require the header_complete callback to implement a different
1239
+ * interface or
1240
+ *
1241
+ * b.) provide an overridden execute(bla, bla, boolean
1242
+ * parsingHeader) implementation ...
1243
+ */
1244
+
1245
+ /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1246
+ if (null != settings.on_headers_complete) {
1247
+ settings.call_on_headers_complete(this);
1248
+ //return;
1249
+ }
1250
+
1251
+ // if (null != settings.on_headers_complete) {
1252
+ // switch (settings.on_headers_complete.cb(parser)) {
1253
+ // case 0:
1254
+ // break;
1255
+ //
1256
+ // case 1:
1257
+ // flags |= F_SKIPBODY;
1258
+ // break;
1259
+ //
1260
+ // default:
1261
+ // return p - data; /* Error */ // TODO // RuntimeException ?
1262
+ // }
1263
+ // }
1264
+ reexecute = true;
1309
1265
  break;
1310
1266
 
1267
+ case headers_done:
1268
+ if (strict && (LF != ch)) {
1269
+ return error(settings, "STRICT CHECK", data); //TODO correct error msg
1270
+ }
1271
+
1272
+ nread = 0;
1273
+
1274
+ // Exit, the rest of the connect is in a different protocol.
1275
+ if (upgrade) {
1276
+ state = new_message();
1277
+ settings.call_on_message_complete(this);
1278
+ return data.position()-this.p_start;
1279
+ }
1280
+
1281
+ if (0 != (flags & F_SKIPBODY)) {
1282
+ state = new_message();
1283
+ settings.call_on_message_complete(this);
1284
+ } else if (0 != (flags & F_CHUNKED)) {
1285
+ /* chunked encoding - ignore Content-Length header */
1286
+ state = State.chunk_size_start;
1287
+ } else {
1288
+ if (content_length == 0) {
1289
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1290
+ state = new_message();
1291
+ settings.call_on_message_complete(this);
1292
+ } else if (content_length != -1) {
1293
+ /* Content-Length header given and non-zero */
1294
+ state = State.body_identity;
1295
+ } else {
1296
+ if (type == ParserType.HTTP_REQUEST || !http_message_needs_eof()) {
1297
+ /* Assume content-length 0 - read the next */
1298
+ state = new_message();
1299
+ settings.call_on_message_complete(this);
1300
+ } else {
1301
+ /* Read body until EOF */
1302
+ state = State.body_identity_eof;
1303
+ }
1304
+ }
1305
+ }
1306
+
1307
+ break;
1311
1308
  /******************* Header *******************/
1312
1309
 
1313
1310
 
@@ -1315,15 +1312,17 @@ public class HTTPParser {
1315
1312
 
1316
1313
  /******************* Body *******************/
1317
1314
  case body_identity:
1318
- to_read = min(pe - p, content_length); //TODO change to use buffer?
1315
+ to_read = min(pe - p, content_length); //TODO change to use buffer?
1316
+ body_mark = p;
1319
1317
 
1320
1318
  if (to_read > 0) {
1321
- settings.call_on_body(this, data, p, to_read);
1319
+ settings.call_on_body(this, data, p, to_read);
1322
1320
  data.position(p+to_read);
1323
1321
  content_length -= to_read;
1322
+
1324
1323
  if (content_length == 0) {
1325
- settings.call_on_message_complete(this);
1326
- state = new_message();
1324
+ state = message_done;
1325
+ reexecute = true;
1327
1326
  }
1328
1327
  }
1329
1328
  break;
@@ -1333,10 +1332,15 @@ public class HTTPParser {
1333
1332
  case body_identity_eof:
1334
1333
  to_read = pe - p; // TODO change to use buffer ?
1335
1334
  if (to_read > 0) {
1336
- settings.call_on_body(this, data, p, to_read);
1335
+ settings.call_on_body(this, data, p, to_read);
1337
1336
  data.position(p+to_read);
1338
1337
  }
1339
1338
  break;
1339
+
1340
+ case message_done:
1341
+ state = new_message();
1342
+ settings.call_on_message_complete(this);
1343
+ break;
1340
1344
  /******************* Body *******************/
1341
1345
 
1342
1346
 
@@ -1344,19 +1348,16 @@ public class HTTPParser {
1344
1348
  /******************* Chunk *******************/
1345
1349
  case chunk_size_start:
1346
1350
  if (1 != this.nread) {
1347
- settings.call_on_error(this, "nread != 1 (chunking)", data, p_err);
1348
- return error();
1349
-
1351
+ return error(settings, "nread != 1 (chunking)", data);
1352
+
1350
1353
  }
1351
1354
  if (0 == (flags & F_CHUNKED)) {
1352
- settings.call_on_error(this, "not chunked", data, p_err);
1353
- return error();
1355
+ return error(settings, "not chunked", data);
1354
1356
  }
1355
1357
 
1356
1358
  c = UNHEX[chi];
1357
1359
  if (c == -1) {
1358
- settings.call_on_error(this, "invalid hex char in chunk content length", data, p_err);
1359
- return error();
1360
+ return error(settings, "invalid hex char in chunk content length", data);
1360
1361
  }
1361
1362
  content_length = c;
1362
1363
  state = State.chunk_size;
@@ -1366,8 +1367,7 @@ public class HTTPParser {
1366
1367
 
1367
1368
  case chunk_size:
1368
1369
  if (0 == (flags & F_CHUNKED)) {
1369
- settings.call_on_error(this, "not chunked", data, p_err);
1370
- return error();
1370
+ return error(settings, "not chunked", data);
1371
1371
  }
1372
1372
 
1373
1373
  if (CR == ch) {
@@ -1382,20 +1382,23 @@ public class HTTPParser {
1382
1382
  state = State.chunk_parameters;
1383
1383
  break;
1384
1384
  }
1385
- settings.call_on_error(this, "invalid hex char in chunk content length", data, p_err);
1386
- return error();
1385
+ return error(settings, "invalid hex char in chunk content length", data);
1387
1386
  }
1388
-
1389
- content_length *= 16;
1390
- content_length += c;
1387
+ long t = content_length;
1388
+
1389
+ t *= 16;
1390
+ t += c;
1391
+ if(t < content_length){
1392
+ return error(settings, "invalid content length", data);
1393
+ }
1394
+ content_length = t;
1391
1395
  break;
1392
1396
 
1393
1397
 
1394
1398
 
1395
1399
  case chunk_parameters:
1396
1400
  if (0 == (flags & F_CHUNKED)) {
1397
- settings.call_on_error(this, "not chunked", data, p_err);
1398
- return error();
1401
+ return error(settings, "not chunked", data);
1399
1402
  }
1400
1403
  /* just ignore this shit. TODO check for overflow */
1401
1404
  if (CR == ch) {
@@ -1403,17 +1406,15 @@ public class HTTPParser {
1403
1406
  break;
1404
1407
  }
1405
1408
  break;
1406
-
1409
+
1407
1410
 
1408
1411
 
1409
1412
  case chunk_size_almost_done:
1410
1413
  if (0 == (flags & F_CHUNKED)) {
1411
- settings.call_on_error(this, "not chunked", data, p_err);
1412
- return error();
1414
+ return error(settings, "not chunked", data);
1413
1415
  }
1414
1416
  if (strict && LF != ch) {
1415
- settings.call_on_error(this, "expected LF at end of chunk size", data, p_err);
1416
- return error();
1417
+ return error(settings, "expected LF at end of chunk size", data);
1417
1418
  }
1418
1419
 
1419
1420
  this.nread = 0;
@@ -1429,10 +1430,9 @@ public class HTTPParser {
1429
1430
 
1430
1431
 
1431
1432
  case chunk_data:
1432
- {
1433
+ //TODO Apply changes from C version for s_chunk_data
1433
1434
  if (0 == (flags & F_CHUNKED)) {
1434
- settings.call_on_error(this, "not chunked", data, p_err);
1435
- return error();
1435
+ return error(settings, "not chunked", data);
1436
1436
  }
1437
1437
 
1438
1438
  to_read = min(pe-p, content_length);
@@ -1447,43 +1447,39 @@ public class HTTPParser {
1447
1447
 
1448
1448
  content_length -= to_read;
1449
1449
  break;
1450
- }
1451
1450
 
1452
1451
 
1453
1452
 
1454
1453
  case chunk_data_almost_done:
1455
1454
  if (0 == (flags & F_CHUNKED)) {
1456
- settings.call_on_error(this, "not chunked", data, p_err);
1457
- return error();
1455
+ return error(settings, "not chunked", data);
1458
1456
  }
1459
1457
  if (strict && CR != ch) {
1460
- settings.call_on_error(this, "chunk data terminated incorrectly, expected CR", data, p_err);
1461
- return error();
1458
+ return error(settings, "chunk data terminated incorrectly, expected CR", data);
1462
1459
  }
1463
1460
  state = State.chunk_data_done;
1461
+ //TODO CALLBACK_DATA(body)
1462
+ // settings.call_on_body(this, data,p,?);
1464
1463
  break;
1465
1464
 
1466
1465
 
1467
1466
 
1468
1467
  case chunk_data_done:
1469
1468
  if (0 == (flags & F_CHUNKED)) {
1470
- settings.call_on_error(this, "not chunked", data, p_err);
1471
- return error();
1469
+ return error(settings, "not chunked", data);
1472
1470
  }
1473
1471
  if (strict && LF != ch) {
1474
- settings.call_on_error(this, "chunk data terminated incorrectly, expected LF", data, p_err);
1475
- return error();
1472
+ return error(settings, "chunk data terminated incorrectly, expected LF", data);
1476
1473
  }
1477
1474
  state = State.chunk_size_start;
1478
1475
  break;
1479
1476
  /******************* Chunk *******************/
1480
-
1481
-
1482
-
1477
+
1478
+
1479
+
1483
1480
  default:
1484
- settings.call_on_error(this, "unhandled state", data, p_err);
1485
- return error();
1486
-
1481
+ return error(settings, "unhandled state", data);
1482
+
1487
1483
  } // switch
1488
1484
  } // while
1489
1485
 
@@ -1492,20 +1488,37 @@ public class HTTPParser {
1492
1488
 
1493
1489
  /* Reaching this point assumes that we only received part of a
1494
1490
  * message, inform the callbacks about the progress made so far*/
1495
-
1491
+
1496
1492
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1497
1493
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1498
- settings.call_on_fragment (this, data, fragment_mark, p-fragment_mark);
1499
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
1500
- settings.call_on_path (this, data, path_mark, p-path_mark);
1501
1494
  settings.call_on_url (this, data, url_mark, p-url_mark);
1495
+ settings.call_on_path (this, data, url_mark, p-url_mark);
1502
1496
 
1503
- return data.position()-start_position;
1497
+ return data.position()-this.p_start;
1504
1498
  } // execute
1505
1499
 
1506
- int error () {
1500
+ int error (ParserSettings settings, String mes, ByteBuffer data) {
1501
+ settings.call_on_error(this, mes, data, this.p_start);
1507
1502
  this.state = State.dead;
1508
- return this.data.position()-start_position;
1503
+ return data.position()-this.p_start;
1504
+ }
1505
+
1506
+ public boolean http_message_needs_eof() {
1507
+ if(type == ParserType.HTTP_REQUEST){
1508
+ return false;
1509
+ }
1510
+ /* See RFC 2616 section 4.4 */
1511
+ if ((status_code / 100 == 1) || /* 1xx e.g. Continue */
1512
+ (status_code == 204) || /* No Content */
1513
+ (status_code == 304) || /* Not Modified */
1514
+ (flags & F_SKIPBODY) != 0) { /* response to a HEAD request */
1515
+ return false;
1516
+ }
1517
+ if ((flags & F_CHUNKED) != 0 || content_length != -1) {
1518
+ return false;
1519
+ }
1520
+
1521
+ return true;
1509
1522
  }
1510
1523
 
1511
1524
  /* If http_should_keep_alive() in the on_headers_complete or
@@ -1519,19 +1532,139 @@ public class HTTPParser {
1519
1532
  /* HTTP/1.1 */
1520
1533
  if ( 0 != (flags & F_CONNECTION_CLOSE) ) {
1521
1534
  return false;
1522
- } else {
1523
- return true;
1524
1535
  }
1525
1536
  } else {
1526
1537
  /* HTTP/1.0 or earlier */
1527
- if ( 0 != (flags & F_CONNECTION_KEEP_ALIVE) ) {
1528
- return true;
1529
- } else {
1538
+ if ( 0 == (flags & F_CONNECTION_KEEP_ALIVE) ) {
1530
1539
  return false;
1531
1540
  }
1532
1541
  }
1542
+ return !http_message_needs_eof();
1533
1543
  }
1534
1544
 
1545
+ public int parse_url(ByteBuffer data, boolean is_connect, HTTPParserUrl u) {
1546
+
1547
+ UrlFields uf = UrlFields.UF_MAX;
1548
+ UrlFields old_uf = UrlFields.UF_MAX;
1549
+ u.port = 0;
1550
+ u.field_set = 0;
1551
+ state = (is_connect ? State.req_host_start : State.req_spaces_before_url);
1552
+ int p_init = data.position();
1553
+ int p = 0;
1554
+ byte ch = 0;
1555
+ while (data.position() != data.limit()) {
1556
+ p = data.position();
1557
+ ch = data.get();
1558
+ state = parse_url_char(ch);
1559
+ switch(state) {
1560
+ case dead:
1561
+ return 1;
1562
+
1563
+ /* Skip delimeters */
1564
+ case req_schema_slash:
1565
+ case req_schema_slash_slash:
1566
+ case req_host_start:
1567
+ case req_host_v6_start:
1568
+ case req_host_v6_end:
1569
+ case req_port_start:
1570
+ case req_query_string_start:
1571
+ case req_fragment_start:
1572
+ continue;
1573
+
1574
+ case req_schema:
1575
+ uf = UrlFields.UF_SCHEMA;
1576
+ break;
1577
+
1578
+ case req_host:
1579
+ case req_host_v6:
1580
+ uf = UrlFields.UF_HOST;
1581
+ break;
1582
+
1583
+ case req_port:
1584
+ uf = UrlFields.UF_PORT;
1585
+ break;
1586
+
1587
+ case req_path:
1588
+ uf = UrlFields.UF_PATH;
1589
+ break;
1590
+
1591
+ case req_query_string:
1592
+ uf = UrlFields.UF_QUERY;
1593
+ break;
1594
+
1595
+ case req_fragment:
1596
+ uf = UrlFields.UF_FRAGMENT;
1597
+ break;
1598
+
1599
+ default:
1600
+ return 1;
1601
+ }
1602
+ /* Nothing's changed; soldier on */
1603
+ if (uf == old_uf) {
1604
+ u.field_data[uf.getIndex()].len++;
1605
+ continue;
1606
+ }
1607
+
1608
+ u.field_data[uf.getIndex()].off = p - p_init;
1609
+ u.field_data[uf.getIndex()].len = 1;
1610
+
1611
+ u.field_set |= (1 << uf.getIndex());
1612
+ old_uf = uf;
1613
+
1614
+ }
1615
+
1616
+ /* CONNECT requests can only contain "hostname:port" */
1617
+ if (is_connect && u.field_set != ((1 << UrlFields.UF_HOST.getIndex())|(1 << UrlFields.UF_PORT.getIndex()))) {
1618
+ return 1;
1619
+ }
1620
+
1621
+ /* Make sure we don't end somewhere unexpected */
1622
+ switch (state) {
1623
+ case req_host_v6_start:
1624
+ case req_host_v6:
1625
+ case req_host_v6_end:
1626
+ case req_host:
1627
+ case req_port_start:
1628
+ return 1;
1629
+ default:
1630
+ break;
1631
+ }
1632
+
1633
+ if (0 != (u.field_set & (1 << UrlFields.UF_PORT.getIndex()))) {
1634
+ /* Don't bother with endp; we've already validated the string */
1635
+ int v = strtoi(data, p_init + u.field_data[UrlFields.UF_PORT.getIndex()].off);
1636
+
1637
+ /* Ports have a max value of 2^16 */
1638
+ if (v > 0xffff) {
1639
+ return 1;
1640
+ }
1641
+
1642
+ u.port = v;
1643
+ }
1644
+
1645
+ return 0;
1646
+ }
1647
+
1648
+ //hacky reimplementation of srttoul, tailored for our simple needs
1649
+ //we only need to parse port val, so no negative values etc
1650
+ int strtoi(ByteBuffer data, int start_pos) {
1651
+ data.position(start_pos);
1652
+ byte ch;
1653
+ String str = "";
1654
+ while(data.position() < data.limit()) {
1655
+ ch = data.get();
1656
+ if(Character.isWhitespace((char)ch)){
1657
+ continue;
1658
+ }
1659
+ if(isDigit(ch)){
1660
+ str = str + (char)ch; //TODO replace with something less hacky
1661
+ }else{
1662
+ break;
1663
+ }
1664
+ }
1665
+ return Integer.parseInt(str);
1666
+ }
1667
+
1535
1668
  boolean isDigit(byte b) {
1536
1669
  if (b >= 0x30 && b <=0x39) {
1537
1670
  return true;
@@ -1539,6 +1672,10 @@ public class HTTPParser {
1539
1672
  return false;
1540
1673
  }
1541
1674
 
1675
+ boolean isHex(byte b) {
1676
+ return isDigit(b) || (lower(b) >= 0x61 /*a*/ && lower(b) <= 0x66 /*f*/);
1677
+ }
1678
+
1542
1679
  boolean isAtoZ(byte b) {
1543
1680
  byte c = lower(b);
1544
1681
  return (c>= 0x61 /*a*/ && c <= 0x7a /*z*/);
@@ -1555,25 +1692,44 @@ public class HTTPParser {
1555
1692
  }
1556
1693
 
1557
1694
  byte token(byte b) {
1558
- return (byte)tokens[b];
1695
+ if(!strict){
1696
+ return (b == (byte)' ') ? (byte)' ' : (byte)tokens[b] ;
1697
+ }else{
1698
+ return (byte)tokens[b];
1699
+ }
1700
+ }
1701
+
1702
+ boolean isHostChar(byte ch){
1703
+ if(!strict){
1704
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch || UNDER == ch ;
1705
+ }else{
1706
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch;
1707
+ }
1708
+ }
1709
+
1710
+ boolean isNormalUrlChar(int chi) {
1711
+ if(!strict){
1712
+ return (chi > 0x80) || normal_url_char[chi];
1713
+ }else{
1714
+ return normal_url_char[chi];
1715
+ }
1559
1716
  }
1560
-
1561
1717
 
1562
1718
  HTTPMethod start_req_method_assign(byte c){
1563
1719
  switch (c) {
1564
1720
  case C: return HTTPMethod.HTTP_CONNECT; /* or COPY, CHECKOUT */
1565
- case D: return HTTPMethod.HTTP_DELETE;
1566
- case G: return HTTPMethod.HTTP_GET;
1567
- case H: return HTTPMethod.HTTP_HEAD;
1568
- case L: return HTTPMethod.HTTP_LOCK;
1721
+ case D: return HTTPMethod.HTTP_DELETE;
1722
+ case G: return HTTPMethod.HTTP_GET;
1723
+ case H: return HTTPMethod.HTTP_HEAD;
1724
+ case L: return HTTPMethod.HTTP_LOCK;
1569
1725
  case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */
1570
- case N: return HTTPMethod.HTTP_NOTIFY;
1571
- case O: return HTTPMethod.HTTP_OPTIONS;
1572
- case P: return HTTPMethod.HTTP_POST; /* or PROPFIND, PROPPATH, PUT */
1726
+ case N: return HTTPMethod.HTTP_NOTIFY;
1727
+ case O: return HTTPMethod.HTTP_OPTIONS;
1728
+ case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1573
1729
  case R: return HTTPMethod.HTTP_REPORT;
1574
1730
  case S: return HTTPMethod.HTTP_SUBSCRIBE;
1575
- case T: return HTTPMethod.HTTP_TRACE;
1576
- case U: return HTTPMethod.HTTP_UNLOCK; /* or UNSUBSCRIBE */
1731
+ case T: return HTTPMethod.HTTP_TRACE;
1732
+ case U: return HTTPMethod.HTTP_UNLOCK; /* or UNSUBSCRIBE */
1577
1733
  }
1578
1734
  return null; // ugh.
1579
1735
  }
@@ -1583,7 +1739,7 @@ public class HTTPParser {
1583
1739
  return false;
1584
1740
  }
1585
1741
 
1586
- state = State.header_field_start;
1742
+ state = State.header_value_lws;
1587
1743
  // TODO java enums support some sort of bitflag mechanism !?
1588
1744
  switch (header_state) {
1589
1745
  case connection_keep_alive:
@@ -1601,111 +1757,18 @@ public class HTTPParser {
1601
1757
  return true;
1602
1758
  }
1603
1759
 
1604
- boolean headers_almost_done (byte ch, ParserSettings settings) {
1605
-
1606
- if (LF != ch) {
1607
- return false;
1608
- }
1609
- if (0 != (flags & F_TRAILING)) {
1610
- /* End of a chunked request */
1611
-
1612
- settings.call_on_headers_complete(this);
1613
- settings.call_on_message_complete(this);
1614
-
1615
- state = new_message();
1616
-
1617
- return true;
1618
- }
1619
-
1620
- nread = 0;
1621
-
1622
- if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1623
- upgrade = true;
1624
- }
1625
-
1626
-
1627
- /* Here we call the headers_complete callback. This is somewhat
1628
- * different than other callbacks because if the user returns 1, we
1629
- * will interpret that as saying that this message has no body. This
1630
- * is needed for the annoying case of recieving a response to a HEAD
1631
- * request.
1632
- */
1633
-
1634
- /* (responses to HEAD request contain a CONTENT-LENGTH header
1635
- * but no content)
1636
- *
1637
- * Consider what to do here: I don't like the idea of the callback
1638
- * interface having a different contract in the case of HEAD
1639
- * responses. The alternatives would be either to:
1640
- *
1641
- * a.) require the header_complete callback to implement a different
1642
- * interface or
1643
- *
1644
- * b.) provide an overridden execute(bla, bla, boolean
1645
- * parsingHeader) implementation ...
1646
- */
1647
-
1648
- /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1649
- if (null != settings.on_headers_complete) {
1650
- settings.call_on_headers_complete(this);
1651
- //return;
1652
- }
1653
-
1654
- // if (null != settings.on_headers_complete) {
1655
- // switch (settings.on_headers_complete.cb(parser)) {
1656
- // case 0:
1657
- // break;
1658
- //
1659
- // case 1:
1660
- // flags |= F_SKIPBODY;
1661
- // break;
1662
- //
1663
- // default:
1664
- // return p - data; /* Error */ // TODO // RuntimeException ?
1665
- // }
1666
- // }
1667
-
1668
-
1669
- // Exit, the rest of the connect is in a different protocol.
1670
- if (upgrade) {
1671
- settings.call_on_message_complete(this);
1672
- state = State.body_identity_eof;
1673
- return true;
1674
- }
1675
-
1676
- if (0 != (flags & F_SKIPBODY)) {
1677
- settings.call_on_message_complete(this);
1678
- state = new_message();
1679
- } else if (0 != (flags & F_CHUNKED)) {
1680
- /* chunked encoding - ignore Content-Length header */
1681
- state = State.chunk_size_start;
1682
- } else {
1683
- if (content_length == 0) {
1684
- /* Content-Length header given but zero: Content-Length: 0\r\n */
1685
- settings.call_on_message_complete(this);
1686
- state = new_message();
1687
- } else if (content_length > 0) {
1688
- /* Content-Length header given and non-zero */
1689
- state = State.body_identity;
1690
- } else {
1691
- if (type == ParserType.HTTP_REQUEST || http_should_keep_alive()) {
1692
- /* Assume content-length 0 - read the next */
1693
- settings.call_on_message_complete(this);
1694
- state = new_message();
1695
- } else {
1696
- /* Read body until EOF */
1697
- state = State.body_identity_eof;
1698
- }
1699
- }
1700
- }
1701
- return true;
1702
- } // headers_almost_fone
1760
+ // boolean headers_almost_done (byte ch, ParserSettings settings) {
1761
+ // } // headers_almost_done
1703
1762
 
1704
1763
 
1705
1764
  final int min (int a, int b) {
1706
1765
  return a < b ? a : b;
1707
1766
  }
1708
1767
 
1768
+ final int min (int a, long b) {
1769
+ return a < b ? a : (int)b;
1770
+ }
1771
+
1709
1772
  /* probably not the best place to hide this ... */
1710
1773
  public boolean HTTP_PARSER_STRICT;
1711
1774
  State new_message() {
@@ -1716,7 +1779,7 @@ public class HTTPParser {
1716
1779
  }
1717
1780
 
1718
1781
  }
1719
-
1782
+
1720
1783
  State start_state() {
1721
1784
  return type == ParserType.HTTP_REQUEST ? State.start_req : State.start_res;
1722
1785
  }
@@ -1730,6 +1793,7 @@ public class HTTPParser {
1730
1793
  case chunk_data_done :
1731
1794
  case body_identity :
1732
1795
  case body_identity_eof :
1796
+ case message_done :
1733
1797
  return false;
1734
1798
 
1735
1799
  }
@@ -1766,28 +1830,28 @@ public class HTTPParser {
1766
1830
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1767
1831
  };
1768
1832
  static final byte [] CONNECTION = {
1769
- 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1833
+ 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1770
1834
  };
1771
1835
  static final byte [] PROXY_CONNECTION = {
1772
- 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1836
+ 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1773
1837
  };
1774
1838
  static final byte [] CONTENT_LENGTH = {
1775
- 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1839
+ 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1776
1840
  };
1777
1841
  static final byte [] TRANSFER_ENCODING = {
1778
- 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1842
+ 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1779
1843
  };
1780
1844
  static final byte [] UPGRADE = {
1781
- 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1845
+ 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1782
1846
  };
1783
1847
  static final byte [] CHUNKED = {
1784
- 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1848
+ 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1785
1849
  };
1786
1850
  static final byte [] KEEP_ALIVE = {
1787
- 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1851
+ 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1788
1852
  };
1789
1853
  static final byte [] CLOSE = {
1790
- 0x43, 0x4c, 0x4f, 0x53, 0x45,
1854
+ 0x43, 0x4c, 0x4f, 0x53, 0x45,
1791
1855
  };
1792
1856
 
1793
1857
  /* Tokens as defined by rfc 2616. Also lowercases them.
@@ -1808,9 +1872,9 @@ public class HTTPParser {
1808
1872
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1809
1873
  0, 0, 0, 0, 0, 0, 0, 0,
1810
1874
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1811
- ' ', '!', '"', '#', '$', '%', '&', '\'',
1875
+ 0, '!', 0, '#', '$', '%', '&', '\'',
1812
1876
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1813
- 0, 0, '*', '+', 0, '-', '.', '/' ,
1877
+ 0, 0, '*', '+', 0, '-', '.', 0 ,
1814
1878
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1815
1879
  '0', '1', '2', '3', '4', '5', '6', '7',
1816
1880
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -1830,7 +1894,7 @@ public class HTTPParser {
1830
1894
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1831
1895
  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1832
1896
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1833
- 'X', 'Y', 'Z', 0, '|', '}', 0, 0,
1897
+ 'X', 'Y', 'Z', 0, '|', 0, '~', 0,
1834
1898
  /* hi bit set, not ascii */
1835
1899
  0, 0, 0, 0, 0, 0, 0, 0,
1836
1900
  0, 0, 0, 0, 0, 0, 0, 0,
@@ -1907,23 +1971,23 @@ public class HTTPParser {
1907
1971
  * encoded paths. This is out of spec, but clients generate this and most other
1908
1972
  * HTTP servers support it. We should, too. */
1909
1973
 
1910
- true, true, true, true, true, true, true, true,
1911
- true, true, true, true, true, true, true, true,
1912
- true, true, true, true, true, true, true, true,
1913
- true, true, true, true, true, true, true, true,
1914
- true, true, true, true, true, true, true, true,
1915
- true, true, true, true, true, true, true, true,
1916
- true, true, true, true, true, true, true, true,
1917
- true, true, true, true, true, true, true, true,
1918
- true, true, true, true, true, true, true, true,
1919
- true, true, true, true, true, true, true, true,
1920
- true, true, true, true, true, true, true, true,
1921
- true, true, true, true, true, true, true, true,
1922
- true, true, true, true, true, true, true, true,
1923
- true, true, true, true, true, true, true, true,
1924
- true, true, true, true, true, true, true, true,
1925
- true, true, true, true, true, true, true, true,
1926
-
1974
+ true, true, true, true, true, true, true, true,
1975
+ true, true, true, true, true, true, true, true,
1976
+ true, true, true, true, true, true, true, true,
1977
+ true, true, true, true, true, true, true, true,
1978
+ true, true, true, true, true, true, true, true,
1979
+ true, true, true, true, true, true, true, true,
1980
+ true, true, true, true, true, true, true, true,
1981
+ true, true, true, true, true, true, true, true,
1982
+ true, true, true, true, true, true, true, true,
1983
+ true, true, true, true, true, true, true, true,
1984
+ true, true, true, true, true, true, true, true,
1985
+ true, true, true, true, true, true, true, true,
1986
+ true, true, true, true, true, true, true, true,
1987
+ true, true, true, true, true, true, true, true,
1988
+ true, true, true, true, true, true, true, true,
1989
+ true, true, true, true, true, true, true, true,
1990
+
1927
1991
  };
1928
1992
 
1929
1993
  public static final byte A = 0x41;
@@ -1952,10 +2016,12 @@ public class HTTPParser {
1952
2016
  public static final byte X = 0x58;
1953
2017
  public static final byte Y = 0x59;
1954
2018
  public static final byte Z = 0x5a;
2019
+ public static final byte UNDER = 0x5f;
1955
2020
  public static final byte CR = 0x0d;
1956
2021
  public static final byte LF = 0x0a;
1957
2022
  public static final byte DOT = 0x2e;
1958
2023
  public static final byte SPACE = 0x20;
2024
+ public static final byte TAB = 0x09;
1959
2025
  public static final byte SEMI = 0x3b;
1960
2026
  public static final byte COLON = 0x3a;
1961
2027
  public static final byte HASH = 0x23;
@@ -1968,9 +2034,9 @@ public class HTTPParser {
1968
2034
 
1969
2035
  enum State {
1970
2036
 
1971
- dead
2037
+ dead
1972
2038
 
1973
- , start_res_or_res
2039
+ , start_req_or_res
1974
2040
  , res_or_resp_H
1975
2041
  , start_res
1976
2042
  , res_H
@@ -1993,7 +2059,12 @@ public class HTTPParser {
1993
2059
  , req_schema
1994
2060
  , req_schema_slash
1995
2061
  , req_schema_slash_slash
2062
+ , req_host_start
2063
+ , req_host_v6_start
2064
+ , req_host_v6
2065
+ , req_host_v6_end
1996
2066
  , req_host
2067
+ , req_port_start
1997
2068
  , req_port
1998
2069
  , req_path
1999
2070
  , req_query_string_start
@@ -2015,6 +2086,7 @@ public class HTTPParser {
2015
2086
  , header_field
2016
2087
  , header_value_start
2017
2088
  , header_value
2089
+ , header_value_lws
2018
2090
 
2019
2091
  , header_almost_done
2020
2092
 
@@ -2024,10 +2096,11 @@ public class HTTPParser {
2024
2096
  , chunk_size_almost_done
2025
2097
 
2026
2098
  , headers_almost_done
2099
+ , headers_done
2027
2100
  // This space intentionally not left blank, comment from c, for orientation...
2028
2101
  // the c version uses <= s_header_almost_done in java, we list the states explicitly
2029
2102
  // in `parsing_header()`
2030
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
2103
+ /* Important: 's_headers_done' must be the last 'header' state. All
2031
2104
  * states beyond this must be 'body' states. It is used for overflow
2032
2105
  * checking. See the PARSING_HEADER() macro.
2033
2106
  */
@@ -2036,8 +2109,8 @@ public class HTTPParser {
2036
2109
  , chunk_data_done
2037
2110
 
2038
2111
  , body_identity
2039
- , body_identity_eof;
2040
-
2112
+ , body_identity_eof
2113
+ , message_done
2041
2114
 
2042
2115
  }
2043
2116
  enum HState {
@@ -2065,4 +2138,24 @@ public class HTTPParser {
2065
2138
  , connection_keep_alive
2066
2139
  , connection_close
2067
2140
  }
2141
+ public enum UrlFields {
2142
+ UF_SCHEMA(0)
2143
+ , UF_HOST(1)
2144
+ , UF_PORT(2)
2145
+ , UF_PATH(3)
2146
+ , UF_QUERY(4)
2147
+ , UF_FRAGMENT(5)
2148
+ , UF_MAX(6);
2149
+
2150
+
2151
+ private final int index;
2152
+
2153
+ private UrlFields(int index) {
2154
+ this.index = index;
2155
+ }
2156
+ public int getIndex() {
2157
+ return index;
2158
+ }
2159
+
2160
+ }
2068
2161
  }