http_parser.rb 0.5.3 → 0.6.0.beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/.gitmodules +3 -3
  2. data/Gemfile +1 -1
  3. data/Gemfile.lock +9 -2
  4. data/README.md +50 -45
  5. data/bench/standalone.rb +23 -0
  6. data/bench/thin.rb +1 -0
  7. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +66 -58
  8. data/ext/ruby_http_parser/ruby_http_parser.c +10 -41
  9. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  10. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  11. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  12. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  13. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1029 -615
  14. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +177 -43
  16. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  17. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +13 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +4 -1
  20. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +2 -2
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +12 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +715 -637
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +1 -1
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +71 -21
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  39. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1141 -210
  40. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  41. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +32 -0
  42. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +5 -1
  43. data/ext/ruby_http_parser/vendor/http-parser/README.md +9 -2
  44. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1029 -615
  45. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +79 -0
  46. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +145 -16
  47. data/ext/ruby_http_parser/vendor/http-parser/test.c +1065 -141
  48. data/http_parser.rb.gemspec +3 -1
  49. data/spec/parser_spec.rb +41 -17
  50. data/spec/support/requests.json +236 -24
  51. data/spec/support/responses.json +182 -36
  52. data/tasks/compile.rake +2 -2
  53. data/tasks/fixtures.rake +7 -1
  54. metadata +57 -19
  55. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  57. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  58. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
@@ -0,0 +1,76 @@
1
+ package http_parser;
2
+
3
+ import http_parser.lolevel.*;
4
+ import http_parser.lolevel.HTTPParser;
5
+
6
+ import java.io.UnsupportedEncodingException;
7
+ import java.nio.ByteBuffer;
8
+ import java.util.Arrays;
9
+
10
+ /**
11
+ */
12
+ public class HTTPParserUrl {
13
+
14
+ public int field_set;
15
+ public int port;
16
+
17
+ public FieldData[] field_data = new FieldData[]{
18
+ new FieldData(0,0),
19
+ new FieldData(0,0),
20
+ new FieldData(0,0),
21
+ new FieldData(0,0),
22
+ new FieldData(0,0),
23
+ new FieldData(0,0)
24
+ }; //UF_MAX
25
+
26
+ public HTTPParserUrl(){}
27
+
28
+ public HTTPParserUrl(int field_set, int port, FieldData[] field_data){
29
+ this.field_set = field_set;
30
+ this.port = port;
31
+ this.field_data = field_data;
32
+ }
33
+
34
+ public String getFieldValue(HTTPParser.UrlFields field, ByteBuffer data) throws UnsupportedEncodingException {
35
+ FieldData fd = this.field_data[field.getIndex()];
36
+ if(fd.off == 0 & fd.len == 0) return "";
37
+ byte[] dst = new byte[fd.len];
38
+ int current_pos = data.position();
39
+ data.position(fd.off);
40
+ data.get(dst,0,fd.len);
41
+ data.position(current_pos);
42
+ String v = new String(dst, "UTF8");
43
+ return v;
44
+ }
45
+
46
+ @Override
47
+ public boolean equals(Object o) {
48
+ if (this == o) return true;
49
+ if (o == null || getClass() != o.getClass()) return false;
50
+
51
+ HTTPParserUrl that = (HTTPParserUrl) o;
52
+
53
+ if (field_set != that.field_set) return false;
54
+ if (port != that.port) return false;
55
+ if (!Arrays.equals(field_data, that.field_data)) return false;
56
+
57
+ return true;
58
+ }
59
+
60
+ @Override
61
+ public int hashCode() {
62
+ int result = field_set;
63
+ result = 31 * result + port;
64
+ result = 31 * result + Arrays.hashCode(field_data);
65
+ return result;
66
+ }
67
+
68
+ @Override
69
+ public String toString() {
70
+ return "HTTPParserUrl{" +
71
+ "field_set=" + field_set +
72
+ ", port=" + port +
73
+ ", field_data=" + (field_data == null ? null : Arrays.asList(field_data)) +
74
+ '}';
75
+ }
76
+ }
@@ -27,7 +27,7 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
27
27
  private HTTPDataCallback _on_fragment;
28
28
  private HTTPDataCallback _on_header_field;
29
29
  private HTTPDataCallback _on_header_value;
30
- private HTTPCallback _on_headers_complete;
30
+ private HTTPHeadersCompleteCallback _on_headers_complete;
31
31
  private HTTPDataCallback _on_body;
32
32
  private HTTPCallback _on_message_complete;
33
33
  private HTTPErrorCallback _on_error;
@@ -175,7 +175,7 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
175
175
  return 0;
176
176
  }
177
177
  };
178
- this._on_headers_complete = new HTTPCallback() {
178
+ this._on_headers_complete = new HTTPHeadersCompleteCallback() {
179
179
  @Override
180
180
  public int cb(HTTPParser parser) {
181
181
  // is there an uncompleted value ... ?
@@ -30,7 +30,7 @@ public class Util {
30
30
  //
31
31
  // }
32
32
 
33
- public static String error (String mes, ByteBuffer b, int begining) {
33
+ public static String error (String mes, ByteBuffer b, int beginning) {
34
34
  // the error message should look like this:
35
35
  //
36
36
  // Bla expected something, but it's not there (mes)
@@ -50,7 +50,7 @@ public class Util {
50
50
  final int mes_width = 72;
51
51
  int p = b.position(); // error position
52
52
  int end = b.limit(); // this is the end
53
- int m = end - begining; // max mes length
53
+ int m = end - beginning; // max mes length
54
54
 
55
55
  StringBuilder builder = new StringBuilder();
56
56
  int p_adj = p;
@@ -58,9 +58,9 @@ public class Util {
58
58
  byte [] orig = new byte[0];
59
59
  if (m <= mes_width) {
60
60
  orig = new byte[m];
61
- b.position(begining);
61
+ b.position(beginning);
62
62
  b.get(orig, 0, m);
63
- p_adj = p-begining;
63
+ p_adj = p-beginning;
64
64
 
65
65
 
66
66
  } else {
@@ -73,7 +73,7 @@ public class Util {
73
73
  // CAN'T be not enough stuff aorund p in total, because
74
74
  // m>meswidth (see if to this else)
75
75
 
76
- int before = p-begining;
76
+ int before = p-beginning;
77
77
  int after = end - p;
78
78
  if ( (before > mes_width/2) && (after > mes_width/2)) {
79
79
  // plenty of stuff in front of and behind error
@@ -82,7 +82,7 @@ public class Util {
82
82
  b.get(orig, 0, mes_width);
83
83
  } else if (before <= mes_width/2) {
84
84
  // take all of the begining.
85
- b.position(begining);
85
+ b.position(beginning);
86
86
  // and as much of the rest as possible
87
87
 
88
88
  b.get(orig, 0, mes_width);
@@ -0,0 +1,12 @@
1
+ package http_parser.lolevel;
2
+
3
+ /**
4
+ * Special interface for headers_complete callback.
5
+ * This is somewhat different than other callbacks because if the user returns 1, we
6
+ * will interpret that as saying that this message has no body. This
7
+ * is needed for the annoying case of receiving a response to a HEAD
8
+ * request.
9
+ */
10
+ public interface HTTPHeadersCompleteCallback extends HTTPCallback{
11
+
12
+ }
@@ -1,8 +1,10 @@
1
1
  package http_parser.lolevel;
2
2
 
3
+ import java.io.UnsupportedEncodingException;
3
4
  import java.nio.ByteBuffer;
4
5
  import http_parser.HTTPException;
5
6
  import http_parser.HTTPMethod;
7
+ import http_parser.HTTPParserUrl;
6
8
  import http_parser.ParserType;
7
9
  import static http_parser.lolevel.HTTPParser.C.*;
8
10
  import static http_parser.lolevel.HTTPParser.State.*;
@@ -20,10 +22,9 @@ public class HTTPParser {
20
22
  int flags; // TODO
21
23
 
22
24
  int nread;
23
- int content_length;
25
+ long content_length;
24
26
 
25
- int start_position;
26
- ByteBuffer data;
27
+ int p_start; // updated each call to execute to indicate where the buffer was before we began calling it.
27
28
 
28
29
  /** READ-ONLY **/
29
30
  public int http_major;
@@ -52,10 +53,8 @@ public class HTTPParser {
52
53
  */
53
54
  int header_field_mark = -1;
54
55
  int header_value_mark = -1;
55
- int fragment_mark = -1;
56
- int query_string_mark = -1;
57
- int path_mark = -1;
58
56
  int url_mark = -1;
57
+ int body_mark = -1;
59
58
 
60
59
  /**
61
60
  * Construct a Parser for ParserType.HTTP_BOTH, meaning it
@@ -79,7 +78,7 @@ public class HTTPParser {
79
78
  this.state = State.start_res;
80
79
  break;
81
80
  case HTTP_BOTH:
82
- this.state = State.start_res_or_res;
81
+ this.state = State.start_req_or_res;
83
82
  break;
84
83
  default:
85
84
  throw new HTTPException("can't happen, invalid ParserType enum");
@@ -91,6 +90,172 @@ public class HTTPParser {
91
90
  */
92
91
  static void p(Object o) {System.out.println(o);}
93
92
 
93
+ /** Comment from C version follows
94
+ *
95
+ * Our URL parser.
96
+ *
97
+ * This is designed to be shared by http_parser_execute() for URL validation,
98
+ * hence it has a state transition + byte-for-byte interface. In addition, it
99
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
100
+ * work of turning state transitions URL components for its API.
101
+ *
102
+ * This function should only be invoked with non-space characters. It is
103
+ * assumed that the caller cares about (and can detect) the transition between
104
+ * URL and non-URL states by looking for these.
105
+ */
106
+ public State parse_url_char(byte ch) {
107
+
108
+ int chi = ch & 0xff; // utility, ch without signedness for table lookups.
109
+
110
+ if(SPACE == ch){
111
+ throw new HTTPException("space as url char");
112
+ }
113
+
114
+ switch(state) {
115
+ case req_spaces_before_url:
116
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
117
+ * All methods except CONNECT are followed by '/' or '*'.
118
+ */
119
+ if(SLASH == ch || STAR == ch){
120
+ return req_path;
121
+ }
122
+ if(isAtoZ(ch)){
123
+ return req_schema;
124
+ }
125
+ break;
126
+ case req_schema:
127
+ if(isAtoZ(ch)){
128
+ return req_schema;
129
+ }
130
+ if(COLON == ch){
131
+ return req_schema_slash;
132
+ }
133
+ break;
134
+ case req_schema_slash:
135
+ if(SLASH == ch){
136
+ return req_schema_slash_slash;
137
+ }
138
+ break;
139
+ case req_schema_slash_slash:
140
+ if(SLASH == ch){
141
+ return req_host_start;
142
+ }
143
+ break;
144
+ case req_host_start:
145
+ if (ch == (byte)'[') {
146
+ return req_host_v6_start;
147
+ }
148
+ if (isHostChar(ch)) {
149
+ return req_host;
150
+ }
151
+ break;
152
+
153
+ case req_host:
154
+ if (isHostChar(ch)) {
155
+ return req_host;
156
+ }
157
+
158
+ /* FALLTHROUGH */
159
+ case req_host_v6_end:
160
+ switch (ch) {
161
+ case ':':
162
+ return req_port_start;
163
+ case '/':
164
+ return req_path;
165
+ case '?':
166
+ return req_query_string_start;
167
+ }
168
+ break;
169
+
170
+ case req_host_v6:
171
+ if (ch == ']') {
172
+ return req_host_v6_end;
173
+ }
174
+
175
+ /* FALLTHROUGH */
176
+ case req_host_v6_start:
177
+ if (isHex(ch) || ch == ':') {
178
+ return req_host_v6;
179
+ }
180
+ break;
181
+
182
+ case req_port:
183
+ switch (ch) {
184
+ case '/':
185
+ return req_path;
186
+ case '?':
187
+ return req_query_string_start;
188
+ }
189
+
190
+ /* FALLTHROUGH */
191
+ case req_port_start:
192
+ if (isDigit(ch)) {
193
+ return req_port;
194
+ }
195
+ break;
196
+
197
+ case req_path:
198
+ if (isNormalUrlChar(chi)) {
199
+ return req_path;
200
+ }
201
+ switch (ch) {
202
+ case '?':
203
+ return req_query_string_start;
204
+ case '#':
205
+ return req_fragment_start;
206
+ }
207
+
208
+ break;
209
+
210
+ case req_query_string_start:
211
+ case req_query_string:
212
+ if (isNormalUrlChar(chi)) {
213
+ return req_query_string;
214
+ }
215
+
216
+ switch (ch) {
217
+ case '?':
218
+ /* allow extra '?' in query string */
219
+ return req_query_string;
220
+
221
+ case '#':
222
+ return req_fragment_start;
223
+ }
224
+
225
+ break;
226
+
227
+ case req_fragment_start:
228
+ if (isNormalUrlChar(chi)) {
229
+ return req_fragment;
230
+ }
231
+ switch (ch) {
232
+ case '?':
233
+ return req_fragment;
234
+
235
+ case '#':
236
+ return req_fragment_start;
237
+ }
238
+ break;
239
+
240
+ case req_fragment:
241
+ if (isNormalUrlChar(ch)) {
242
+ return req_fragment;
243
+ }
244
+
245
+ switch (ch) {
246
+ case '?':
247
+ case '#':
248
+ return req_fragment;
249
+ }
250
+
251
+ break;
252
+ default:
253
+ break;
254
+ }
255
+
256
+ /* We should never fall out of the switch above unless there's an error */
257
+ return dead;
258
+ }
94
259
 
95
260
  /** Execute the parser with the currently available data contained in
96
261
  * the buffer. The buffers position() and limit() need to be set
@@ -100,10 +265,9 @@ public class HTTPParser {
100
265
  public int execute(ParserSettings settings, ByteBuffer data) {
101
266
 
102
267
  int p = data.position();
103
- int p_err = p; // this is used for pretty printing errors.
268
+ this.p_start = p; // this is used for pretty printing errors.
269
+ // and returning the amount of processed bytes.
104
270
 
105
- this.start_position = p;
106
- this.data = data;
107
271
 
108
272
  // In case the headers don't provide information about the content
109
273
  // length, `execute` needs to be called with an empty buffer to
@@ -111,19 +275,19 @@ public class HTTPParser {
111
275
  // else there is no way of knowing the message is complete.
112
276
  int len = (data.limit() - data.position());
113
277
  if (0 == len) {
114
- // if (State.body_identity_eof == state) {
115
- // settings.call_on_message_complete(this);
116
- // }
278
+ // if (State.body_identity_eof == state) {
279
+ // settings.call_on_message_complete(this);
280
+ // }
117
281
  switch (state) {
118
282
  case body_identity_eof:
119
283
  settings.call_on_message_complete(this);
120
- return data.position() - start_position;
284
+ return data.position() - this.p_start;
121
285
 
122
286
  case dead:
123
- case start_res_or_res:
287
+ case start_req_or_res:
124
288
  case start_res:
125
289
  case start_req:
126
- return data.position() - start_position;
290
+ return data.position() - this.p_start;
127
291
 
128
292
  default:
129
293
  // should we really consider this an error!?
@@ -142,78 +306,81 @@ public class HTTPParser {
142
306
  case header_value:
143
307
  header_value_mark = p;
144
308
  break;
145
- case req_fragment:
146
- fragment_mark = p;
147
- url_mark = p;
148
- break;
149
- case req_query_string:
150
- query_string_mark = p;
151
- url_mark = p;
152
- break;
153
309
  case req_path:
154
- path_mark = p;
155
-
156
- case req_host:
157
310
  case req_schema:
158
311
  case req_schema_slash:
159
312
  case req_schema_slash_slash:
313
+ case req_host_start:
314
+ case req_host_v6_start:
315
+ case req_host_v6:
316
+ case req_host_v6_end:
317
+ case req_host:
318
+ case req_port_start:
160
319
  case req_port:
161
320
  case req_query_string_start:
321
+ case req_query_string:
162
322
  case req_fragment_start:
323
+ case req_fragment:
163
324
  url_mark = p;
164
325
  break;
165
326
  }
327
+ boolean reexecute = false;
328
+ int pe = 0;
329
+ byte ch = 0;
330
+ int chi = 0;
331
+ byte c = -1;
332
+ int to_read = 0;
166
333
 
167
334
  // this is where the work gets done, traverse the available data...
168
- while (data.position() != data.limit()) {
169
-
170
- p = data.position();
171
- int pe = data.limit();
172
-
173
- byte ch = data.get(); // the current character to process.
174
- int chi = ch & 0xff; // utility, ch without signedness for table lookups.
175
- byte c = -1; // utility variably used for up- and downcasing etc.
176
- int to_read = 0; // used to keep track of how much of body, etc. is left to read
177
-
178
- if (parsing_header(state)) {
179
- ++nread;
180
- if (nread > HTTP_MAX_HEADER_SIZE) {
181
- settings.call_on_error(this, "possible buffer overflow", data, p_err);
182
- return error();
335
+ while (data.position() != data.limit() || reexecute) {
336
+ // p(state + ": r: " + reexecute + " :: " +p );
337
+
338
+ if(!reexecute){
339
+ p = data.position();
340
+ pe = data.limit();
341
+ ch = data.get(); // the current character to process.
342
+ chi = ch & 0xff; // utility, ch without signedness for table lookups.
343
+ c = -1; // utility variably used for up- and downcasing etc.
344
+ to_read = 0; // used to keep track of how much of body, etc. is left to read
345
+
346
+ if (parsing_header(state)) {
347
+ ++nread;
348
+ if (nread > HTTP_MAX_HEADER_SIZE) {
349
+ return error(settings, "possible buffer overflow", data);
350
+ }
183
351
  }
184
352
  }
185
- //p(state + ":" + ch +":"+p);
353
+ reexecute = false;
354
+ // p(state + " ::: " + ch + " : " + (((CR == ch) || (LF == ch)) ? ch : ("'" + (char)ch + "'")) +": "+p );
355
+
186
356
  switch (state) {
187
357
  /*
188
358
  * this state is used after a 'Connection: close' message
189
359
  * the parser will error out if it reads another message
190
360
  */
191
361
  case dead:
192
- settings.call_on_error(this, "Connection already closed", data, p_err);
193
- return error();
362
+ if (CR == ch || LF == ch){
363
+ break;
364
+ }
365
+ return error(settings, "Connection already closed", data);
194
366
 
195
367
 
196
368
 
197
- case start_res_or_res:
369
+ case start_req_or_res:
198
370
  if (CR == ch || LF == ch){
199
371
  break;
200
372
  }
201
373
  flags = 0;
202
374
  content_length = -1;
203
375
 
204
- settings.call_on_message_begin(this);
205
-
206
- if (H == ch) {
376
+ if (H == ch) {
207
377
  state = State.res_or_resp_H;
378
+ settings.call_on_message_begin(this);
208
379
  } else {
209
- type = ParserType.HTTP_REQUEST;
210
- method = start_req_method_assign(ch);
211
- if (null == method) {
212
- settings.call_on_error(this, "invalid method", data, p_err);
213
- return error();
214
- }
215
- index = 1;
216
- state = State.req_method;
380
+ type = ParserType.HTTP_REQUEST;
381
+ state = State.start_req;
382
+ index = 1;
383
+ reexecute = true;
217
384
  }
218
385
  break;
219
386
 
@@ -225,8 +392,7 @@ public class HTTPParser {
225
392
  state = State.res_HT;
226
393
  } else {
227
394
  if (E != ch) {
228
- settings.call_on_error(this, "not E", data, p_err);
229
- return error();
395
+ return error(settings, "not E", data);
230
396
  }
231
397
  type = ParserType.HTTP_REQUEST;
232
398
  method = HTTPMethod.HTTP_HEAD;
@@ -241,8 +407,6 @@ public class HTTPParser {
241
407
  flags = 0;
242
408
  content_length = -1;
243
409
 
244
- settings.call_on_message_begin(this);
245
-
246
410
  switch(ch) {
247
411
  case H:
248
412
  state = State.res_H;
@@ -251,38 +415,35 @@ public class HTTPParser {
251
415
  case LF:
252
416
  break;
253
417
  default:
254
- settings.call_on_error(this, "Not H or CR/LF", data, p_err);
255
- return error();
418
+ return error(settings, "Not H or CR/LF", data);
256
419
  }
420
+
421
+ settings.call_on_message_begin(this);
257
422
  break;
258
423
 
259
424
 
260
425
 
261
426
  case res_H:
262
427
  if (strict && T != ch) {
263
- settings.call_on_error(this, "Not T", data, p_err);
264
- return error();
428
+ return error(settings, "Not T", data);
265
429
  }
266
430
  state = State.res_HT;
267
431
  break;
268
432
  case res_HT:
269
433
  if (strict && T != ch) {
270
- settings.call_on_error(this, "Not T2", data, p_err);
271
- return error();
434
+ return error(settings, "Not T2", data);
272
435
  }
273
436
  state = State.res_HTT;
274
437
  break;
275
438
  case res_HTT:
276
439
  if (strict && P != ch) {
277
- settings.call_on_error(this, "Not P", data, p_err);
278
- return error();
440
+ return error(settings, "Not P", data);
279
441
  }
280
442
  state = State.res_HTTP;
281
443
  break;
282
444
  case res_HTTP:
283
445
  if (strict && SLASH != ch) {
284
- settings.call_on_error(this, "Not '/'", data, p_err);
285
- return error();
446
+ return error(settings, "Not '/'", data);
286
447
  }
287
448
  state = State.res_first_http_major;
288
449
  break;
@@ -291,8 +452,7 @@ public class HTTPParser {
291
452
 
292
453
  case res_first_http_major:
293
454
  if (!isDigit(ch)) {
294
- settings.call_on_error(this, "Not a digit", data, p_err);
295
- return error();
455
+ return error(settings, "Not a digit", data);
296
456
  }
297
457
  http_major = (int) ch - 0x30;
298
458
  state = State.res_http_major;
@@ -305,23 +465,20 @@ public class HTTPParser {
305
465
  break;
306
466
  }
307
467
  if (!isDigit(ch)) {
308
- settings.call_on_error(this, "Not a digit", data, p_err);
309
- return error();
468
+ return error(settings, "Not a digit", data);
310
469
  }
311
470
  http_major *= 10;
312
471
  http_major += (ch - 0x30);
313
472
 
314
473
  if (http_major > 999) {
315
- settings.call_on_error(this, "invalid http major version: "+http_major, data, p_err);
316
- return error();
474
+ return error(settings, "invalid http major version: ", data);
317
475
  }
318
476
  break;
319
477
 
320
478
  /* first digit of minor HTTP version */
321
479
  case res_first_http_minor:
322
480
  if (!isDigit(ch)) {
323
- settings.call_on_error(this, "Not a digit", data, p_err);
324
- return error();
481
+ return error(settings, "Not a digit", data);
325
482
  }
326
483
  http_minor = (int)ch - 0x30;
327
484
  state = State.res_http_minor;
@@ -334,14 +491,12 @@ public class HTTPParser {
334
491
  break;
335
492
  }
336
493
  if (!isDigit(ch)) {
337
- settings.call_on_error(this, "Not a digit", data, p_err);
338
- return error();
494
+ return error(settings, "Not a digit", data);
339
495
  }
340
496
  http_minor *= 10;
341
497
  http_minor += (ch - 0x30);
342
498
  if (http_minor > 999) {
343
- settings.call_on_error(this, "invalid http minor version: "+http_minor, data, p_err);
344
- return error();
499
+ return error(settings, "invalid http minor version: ", data);
345
500
  }
346
501
  break;
347
502
 
@@ -352,8 +507,7 @@ public class HTTPParser {
352
507
  if (SPACE == ch) {
353
508
  break;
354
509
  }
355
- settings.call_on_error(this, "Not a digit (status code)", data, p_err);
356
- return error();
510
+ return error(settings, "Not a digit (status code)", data);
357
511
  }
358
512
  status_code = (int)ch - 0x30;
359
513
  state = State.res_status_code;
@@ -372,16 +526,14 @@ public class HTTPParser {
372
526
  state = State.header_field_start;
373
527
  break;
374
528
  default:
375
- settings.call_on_error(this, "not a valid status code", data, p_err);
376
- return error();
529
+ return error(settings, "not a valid status code", data);
377
530
  }
378
531
  break;
379
532
  }
380
533
  status_code *= 10;
381
534
  status_code += (int)ch - 0x30;
382
535
  if (status_code > 999) {
383
- settings.call_on_error(this, "ridiculous status code:"+status_code, data, p_err);
384
- return error();
536
+ return error(settings, "ridiculous status code:", data);
385
537
  }
386
538
  break;
387
539
 
@@ -402,8 +554,7 @@ public class HTTPParser {
402
554
 
403
555
  case res_line_almost_done:
404
556
  if (strict && LF != ch) {
405
- settings.call_on_error(this, "not LF", data, p_err);
406
- return error();
557
+ return error(settings, "not LF", data);
407
558
  }
408
559
  state = State.header_field_start;
409
560
  break;
@@ -416,22 +567,26 @@ public class HTTPParser {
416
567
  }
417
568
  flags = 0;
418
569
  content_length = -1;
419
- settings.call_on_message_begin(this);
570
+
571
+ if(!isAtoZ(ch)){
572
+ return error(settings, "invalid method", data);
573
+ }
574
+
420
575
  method = start_req_method_assign(ch);
421
576
  if (null == method) {
422
- settings.call_on_error(this, "invalid method", data, p_err);
423
- return error();
577
+ return error(settings, "invalid method", data);
424
578
  }
425
579
  index = 1;
426
580
  state = State.req_method;
581
+
582
+ settings.call_on_message_begin(this);
427
583
  break;
428
584
 
429
585
 
430
586
 
431
587
  case req_method:
432
588
  if (0 == ch) {
433
- settings.call_on_error(this, "NULL in method", data, p_err);
434
- return error();
589
+ return error(settings, "NULL in method", data);
435
590
  }
436
591
 
437
592
  byte [] arr = method.bytes;
@@ -456,17 +611,28 @@ public class HTTPParser {
456
611
  } else if (2 == index && A == ch) {
457
612
  method = HTTPMethod.HTTP_MKACTIVITY;
458
613
  }
459
- } else if (1 == index && HTTPMethod.HTTP_POST == method && R == ch) {
460
- method = HTTPMethod.HTTP_PROPFIND;
461
- } else if (1 == index && HTTPMethod.HTTP_POST == method && U == ch) {
462
- method = HTTPMethod.HTTP_PUT;
463
- } else if (2 == index && HTTPMethod.HTTP_UNLOCK == method && S == ch) {
464
- method = HTTPMethod.HTTP_UNSUBSCRIBE;
465
- } else if (4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch) {
614
+ } else if (1 == index && HTTPMethod.HTTP_POST == method) {
615
+ if(R == ch) {
616
+ method = HTTPMethod.HTTP_PROPFIND; /* or HTTP_PROPPATCH */
617
+ }else if(U == ch){
618
+ method = HTTPMethod.HTTP_PUT; /* or HTTP_PURGE */
619
+ }else if(A == ch){
620
+ method = HTTPMethod.HTTP_PATCH;
621
+ }
622
+ } else if (2 == index) {
623
+ if(HTTPMethod.HTTP_PUT == method) {
624
+ if(R == ch){
625
+ method = HTTPMethod.HTTP_PURGE;
626
+ }
627
+ }else if(HTTPMethod.HTTP_UNLOCK == method){
628
+ if(S == ch){
629
+ method = HTTPMethod.HTTP_UNSUBSCRIBE;
630
+ }
631
+ }
632
+ }else if(4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch){
466
633
  method = HTTPMethod.HTTP_PROPPATCH;
467
634
  } else {
468
- settings.call_on_error(this, "Invalid HTTP method", data, p_err);
469
- return error();
635
+ return error(settings, "Invalid HTTP method", data);
470
636
  }
471
637
 
472
638
  ++index;
@@ -479,324 +645,68 @@ public class HTTPParser {
479
645
  if (SPACE == ch) {
480
646
  break;
481
647
  }
482
- if (SLASH == ch || STAR == ch) {
483
- url_mark = p;
484
- path_mark = p;
485
- state = State.req_path;
486
- break;
487
- }
488
- if (isAtoZ(ch)) {
489
- url_mark = p;
490
- state = State.req_schema;
491
- break;
492
- }
493
- settings.call_on_error(this, "Invalid something", data, p_err);
494
- return error();
495
-
496
- case req_schema:
497
- if (isAtoZ(ch)){
498
- break;
499
- }
500
- if (COLON == ch) {
501
- state = State.req_schema_slash;
502
- break;
503
- } else if (DOT == ch || isDigit(ch)) {
504
- state = State.req_host;
505
- break;
648
+ url_mark = p;
649
+ if(HTTPMethod.HTTP_CONNECT == method){
650
+ state = req_host_start;
506
651
  }
507
- settings.call_on_error(this, "invalid char in schema: "+ch, data, p_err);
508
- return error();
509
652
 
510
- case req_schema_slash:
511
- if (strict && SLASH != ch) {
512
- settings.call_on_error(this, "invalid char in schema, not /", data, p_err);
513
- return error();
653
+ state = parse_url_char(ch);
654
+ if(state == dead){
655
+ return error(settings, "Invalid something", data);
514
656
  }
515
- state = State.req_schema_slash_slash;
516
657
  break;
517
658
 
659
+
660
+ case req_schema:
661
+ case req_schema_slash:
518
662
  case req_schema_slash_slash:
519
- if (strict && SLASH != ch) {
520
- settings.call_on_error(this, "invalid char in schema, not /", data, p_err);
521
- return error();
522
- }
523
- state = State.req_host;
524
- break;
525
-
526
- case req_host:
527
- if (isAtoZ(ch)) {
528
- break;
529
- }
530
- if (isDigit(ch) || DOT == ch || DASH == ch) break;
663
+ case req_host_start:
664
+ case req_host_v6_start:
665
+ case req_host_v6:
666
+ case req_port_start:
531
667
  switch (ch) {
532
- case COLON:
533
- state = State.req_port;
534
- break;
535
- case SLASH:
536
- path_mark = p;
537
- break;
668
+ /* No whitespace allowed here */
538
669
  case SPACE:
539
- /* The request line looks like:
540
- * "GET http://foo.bar.com HTTP/1.1"
541
- * That is, there is no path.
542
- */
543
- settings.call_on_url(this, data, url_mark, p-url_mark);
544
- url_mark = -1;
545
- state = State.req_http_start;
546
- break;
547
- case QMARK:
548
- state = State.req_query_string_start;
549
- break;
670
+ case CR:
671
+ case LF:
672
+ return error(settings, "unexpected char in path", data);
550
673
  default:
551
- settings.call_on_error(this, "host error in method line", data, p_err);
552
- return error();
674
+ state = parse_url_char(ch);
675
+ if(dead == state){
676
+ return error(settings, "unexpected char in path", data);
677
+ }
553
678
  }
554
679
  break;
555
680
 
681
+ case req_host:
682
+ case req_host_v6_end:
556
683
  case req_port:
557
- if (isDigit(ch)) break;
558
- switch (ch) {
559
- case SLASH:
560
- path_mark = p;
561
- state = State.req_path;
562
- break;
563
- case SPACE:
564
- /* The request line looks like:
565
- * "GET http://foo.bar.com:1234 HTTP/1.1"
566
- * That is, there is no path.
567
- */
568
- settings.call_on_url(this,data,url_mark,p-url_mark);
569
- url_mark = -1;
570
- state = State.req_http_start;
571
- break;
572
- case QMARK:
573
- state = State.req_query_string_start;
574
- break;
575
- default:
576
- settings.call_on_error(this, "invalid port", data, p_err);
577
- return error();
578
- }
579
- break;
580
-
581
684
  case req_path:
582
- if (normal_url_char[chi]) break;
583
- switch (ch) {
584
- case SPACE:
585
- settings.call_on_url(this,data,url_mark, p-url_mark);
586
- url_mark = -1;
587
-
588
- settings.call_on_path(this,data,path_mark, p-path_mark);
589
- path_mark = -1;
590
-
591
- state = State.req_http_start;
592
- break;
593
-
594
- case CR:
595
- settings.call_on_url(this,data,url_mark, p-url_mark);
596
- url_mark = -1;
597
-
598
- settings.call_on_path(this,data,path_mark, p-path_mark);
599
- path_mark = -1;
600
-
601
- http_minor = 9;
602
- state = State.res_line_almost_done;
603
- break;
604
-
605
- case LF:
606
- settings.call_on_url(this,data,url_mark, p-url_mark);
607
- url_mark = -1;
608
-
609
- settings.call_on_path(this,data,path_mark, p-path_mark);
610
- path_mark = -1;
611
-
612
- http_minor = 9;
613
- state = State.header_field_start;
614
- break;
615
-
616
- case QMARK:
617
- settings.call_on_path(this,data,path_mark, p-path_mark);
618
- path_mark = -1;
619
-
620
- state = State.req_query_string_start;
621
- break;
622
-
623
- case HASH:
624
- settings.call_on_path(this,data,path_mark, p-path_mark);
625
- path_mark = -1;
626
-
627
- state = State.req_fragment_start;
628
- break;
629
-
630
- default:
631
- settings.call_on_error(this, "unexpected char in path", data, p_err);
632
- return error();
633
- }
634
- break;
635
-
636
685
  case req_query_string_start:
637
- if (normal_url_char[chi]) {
638
- query_string_mark = p;
639
- state = State.req_query_string;
640
- break;
641
- }
642
-
643
- switch (ch) {
644
- case QMARK: break;
645
- case SPACE:
646
- settings.call_on_url(this, data, url_mark, p-url_mark);
647
- url_mark = -1;
648
- state = State.req_http_start;
649
- break;
650
- case CR:
651
- settings.call_on_url(this,data,url_mark, p-url_mark);
652
- url_mark = -1;
653
- http_minor = 9;
654
- state = State.res_line_almost_done;
655
- break;
656
- case LF:
657
- settings.call_on_url(this,data,url_mark, p-url_mark);
658
- url_mark = -1;
659
- http_minor = 9;
660
- state = State.header_field_start;
661
- break;
662
- case HASH:
663
- state = State.req_fragment_start;
664
- break;
665
- default:
666
- settings.call_on_error(this, "unexpected char in path", data, p_err);
667
- return error();
668
- }
669
- break;
670
-
671
686
  case req_query_string:
672
- if (normal_url_char[chi]) {
673
- break;
674
- }
675
-
676
- switch (ch) {
677
- case QMARK: break; // allow extra '?' in query string
678
- case SPACE:
679
- settings.call_on_url(this, data, url_mark, p-url_mark);
680
- url_mark = -1;
681
-
682
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
683
- query_string_mark = -1;
684
-
685
- state = State.req_http_start;
686
- break;
687
- case CR:
688
- settings.call_on_url(this,data,url_mark, p-url_mark);
689
- url_mark = -1;
690
-
691
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
692
- query_string_mark = -1;
693
-
694
- http_minor = 9;
695
- state = State.res_line_almost_done;
696
- break;
697
- case LF:
698
- settings.call_on_url(this,data,url_mark, p-url_mark);
699
- url_mark = -1;
700
-
701
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
702
- query_string_mark = -1;
703
- http_minor = 9;
704
-
705
- state = State.header_field_start;
706
- break;
707
- case HASH:
708
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
709
- query_string_mark = -1;
710
-
711
- state = State.req_fragment_start;
712
- break;
713
- default:
714
- settings.call_on_error(this, "unexpected char in path", data, p_err);
715
- return error();
716
- }
717
- break;
718
-
719
687
  case req_fragment_start:
720
- if (normal_url_char[chi]) {
721
- fragment_mark = p;
722
- state = State.req_fragment;
723
- break;
724
- }
725
-
726
- switch (ch) {
727
- case SPACE:
728
- settings.call_on_url(this, data, url_mark, p-url_mark);
729
- url_mark = -1;
730
-
731
- state = State.req_http_start;
732
- break;
733
- case CR:
734
- settings.call_on_url(this,data,url_mark, p-url_mark);
735
- url_mark = -1;
736
-
737
- http_minor = 9;
738
- state = State.res_line_almost_done;
739
- break;
740
- case LF:
741
- settings.call_on_url(this,data,url_mark, p-url_mark);
742
- url_mark = -1;
743
-
744
- http_minor = 9;
745
- state = State.header_field_start;
746
- break;
747
- case QMARK:
748
- fragment_mark = p;
749
- state = State.req_fragment;
750
- break;
751
- case HASH:
752
- break;
753
- default:
754
- settings.call_on_error(this, "unexpected char in path", data, p_err);
755
- return error();
756
- }
757
- break;
758
-
759
688
  case req_fragment:
760
- if (normal_url_char[chi]) {
761
- break;
762
- }
763
-
764
689
  switch (ch) {
765
690
  case SPACE:
766
691
  settings.call_on_url(this, data, url_mark, p-url_mark);
692
+ settings.call_on_path(this, data, url_mark, p - url_mark);
767
693
  url_mark = -1;
768
-
769
- settings.call_on_fragment(this, data, fragment_mark, p-fragment_mark);
770
- fragment_mark = -1;
771
-
772
694
  state = State.req_http_start;
773
695
  break;
774
696
  case CR:
775
- settings.call_on_url(this,data,url_mark, p-url_mark);
776
- url_mark = -1;
777
-
778
- settings.call_on_fragment(this, data, query_string_mark, p-query_string_mark);
779
- fragment_mark = -1;
780
-
781
- http_minor = 9;
782
- state = State.res_line_almost_done;
783
- break;
784
697
  case LF:
785
- settings.call_on_url(this,data,url_mark, p-url_mark);
786
- url_mark = -1;
787
-
788
- settings.call_on_fragment(this, data, query_string_mark, p-query_string_mark);
789
- fragment_mark = -1;
790
-
698
+ http_major = 0;
791
699
  http_minor = 9;
792
- state = State.header_field_start;
793
- break;
794
- case QMARK:
795
- case HASH:
700
+ state = (CR == ch) ? req_line_almost_done : header_field_start;
701
+ settings.call_on_url(this, data, url_mark, p-url_mark); //TODO check params!!!
702
+ settings.call_on_path(this, data, url_mark, p-url_mark);
703
+ url_mark = -1;
796
704
  break;
797
705
  default:
798
- settings.call_on_error(this, "unexpected char in path", data, p_err);
799
- return error();
706
+ state = parse_url_char(ch);
707
+ if(dead == state){
708
+ return error(settings, "unexpected char in path", data);
709
+ }
800
710
  }
801
711
  break;
802
712
  /******************* URL *******************/
@@ -812,39 +722,34 @@ public class HTTPParser {
812
722
  case SPACE:
813
723
  break;
814
724
  default:
815
- settings.call_on_error(this, "error in req_http_H", data, p_err);
816
- return error();
725
+ return error(settings, "error in req_http_H", data);
817
726
  }
818
727
  break;
819
728
 
820
729
  case req_http_H:
821
730
  if (strict && T != ch) {
822
- settings.call_on_error(this, "unexpected char", data, p_err);
823
- return error();
731
+ return error(settings, "unexpected char", data);
824
732
  }
825
733
  state = State.req_http_HT;
826
734
  break;
827
735
 
828
736
  case req_http_HT:
829
737
  if (strict && T != ch) {
830
- settings.call_on_error(this, "unexpected char", data, p_err);
831
- return error();
738
+ return error(settings, "unexpected char", data);
832
739
  }
833
740
  state = State.req_http_HTT;
834
741
  break;
835
742
 
836
743
  case req_http_HTT:
837
744
  if (strict && P != ch) {
838
- settings.call_on_error(this, "unexpected char", data, p_err);
839
- return error();
745
+ return error(settings, "unexpected char", data);
840
746
  }
841
747
  state = State.req_http_HTTP;
842
748
  break;
843
749
 
844
750
  case req_http_HTTP:
845
751
  if (strict && SLASH != ch) {
846
- settings.call_on_error(this, "unexpected char", data, p_err);
847
- return error();
752
+ return error(settings, "unexpected char", data);
848
753
  }
849
754
  state = req_first_http_major;
850
755
  break;
@@ -852,8 +757,7 @@ public class HTTPParser {
852
757
  /* first digit of major HTTP version */
853
758
  case req_first_http_major:
854
759
  if (!isDigit(ch)) {
855
- settings.call_on_error(this, "non digit in http major", data, p_err);
856
- return error();
760
+ return error(settings, "non digit in http major", data);
857
761
  }
858
762
  http_major = (int)ch - 0x30;
859
763
  state = State.req_http_major;
@@ -867,24 +771,21 @@ public class HTTPParser {
867
771
  }
868
772
 
869
773
  if (!isDigit(ch)) {
870
- settings.call_on_error(this, "non digit in http major", data, p_err);
871
- return error();
774
+ return error(settings, "non digit in http major", data);
872
775
  }
873
776
 
874
777
  http_major *= 10;
875
778
  http_major += (int)ch - 0x30;
876
779
 
877
780
  if (http_major > 999) {
878
- settings.call_on_error(this, "ridiculous http major", data, p_err);
879
- return error();
781
+ return error(settings, "ridiculous http major", data);
880
782
  };
881
783
  break;
882
784
 
883
785
  /* first digit of minor HTTP version */
884
786
  case req_first_http_minor:
885
787
  if (!isDigit(ch)) {
886
- settings.call_on_error(this, "non digit in http minor", data, p_err);
887
- return error();
788
+ return error(settings, "non digit in http minor", data);
888
789
  }
889
790
  http_minor = (int)ch - 0x30;
890
791
  state = State.req_http_minor;
@@ -904,8 +805,7 @@ public class HTTPParser {
904
805
  /* XXX allow spaces after digit? */
905
806
 
906
807
  if (!isDigit(ch)) {
907
- settings.call_on_error(this, "non digit in http minor", data, p_err);
908
- return error();
808
+ return error(settings, "non digit in http minor", data);
909
809
  }
910
810
 
911
811
  http_minor *= 10;
@@ -913,8 +813,7 @@ public class HTTPParser {
913
813
 
914
814
 
915
815
  if (http_minor > 999) {
916
- settings.call_on_error(this, "ridiculous http minor", data, p_err);
917
- return error();
816
+ return error(settings, "ridiculous http minor", data);
918
817
  };
919
818
 
920
819
  break;
@@ -923,10 +822,9 @@ public class HTTPParser {
923
822
  case req_line_almost_done:
924
823
  {
925
824
  if (ch != LF) {
926
- settings.call_on_error(this, "missing LF after request line", data, p_err);
927
- return error();
825
+ return error(settings, "missing LF after request line", data);
928
826
  }
929
- state = State.header_field_start;
827
+ state = header_field_start;
930
828
  break;
931
829
  }
932
830
 
@@ -938,7 +836,7 @@ public class HTTPParser {
938
836
  case header_field_start:
939
837
  {
940
838
  if (ch == CR) {
941
- state = State.headers_almost_done;
839
+ state = headers_almost_done;
942
840
  break;
943
841
  }
944
842
 
@@ -946,22 +844,15 @@ public class HTTPParser {
946
844
  /* they might be just sending \n instead of \r\n so this would be
947
845
  * the second \n to denote the end of headers*/
948
846
  state = State.headers_almost_done;
949
- if (!headers_almost_done(ch, settings)) {
950
- settings.call_on_error(this, "header not properly completed", data, p_err);
951
- return error();
952
- }
953
- if (upgrade) {
954
- return data.position() - start_position;
955
- }
847
+ reexecute = true;
956
848
  break;
957
849
  }
958
850
 
959
851
  c = token(ch);
960
852
 
961
853
  if (0 == c) {
962
- settings.call_on_error(this, "invalid char in header:"+c, data, p_err);
963
- return error();
964
- };
854
+ return error(settings, "invalid char in header:", data);
855
+ }
965
856
 
966
857
  header_field_mark = p;
967
858
 
@@ -1090,8 +981,7 @@ public class HTTPParser {
1090
981
  break;
1091
982
 
1092
983
  default:
1093
- settings.call_on_error(this, "Unknown Header State", data, p_err);
1094
- return error();
984
+ return error(settings, "Unknown Header State", data);
1095
985
  } // switch: header_state
1096
986
  break;
1097
987
  } // 0 != c
@@ -1120,15 +1010,14 @@ public class HTTPParser {
1120
1010
  break;
1121
1011
  }
1122
1012
 
1123
- settings.call_on_error(this, "invalid header field", data, p_err);
1124
- return error();
1013
+ return error(settings, "invalid header field", data);
1125
1014
  }
1126
1015
 
1127
1016
 
1128
1017
 
1129
1018
  case header_value_start:
1130
1019
  {
1131
- if (SPACE == ch) break;
1020
+ if ((SPACE == ch) || (TAB == ch)) break;
1132
1021
 
1133
1022
  header_value_mark = p;
1134
1023
 
@@ -1173,8 +1062,7 @@ public class HTTPParser {
1173
1062
 
1174
1063
  case content_length:
1175
1064
  if (!isDigit(ch)) {
1176
- settings.call_on_error(this, "Content-Length not numeric", data, p_err);
1177
- return error();
1065
+ return error(settings, "Content-Length not numeric", data);
1178
1066
  }
1179
1067
  content_length = (int)ch - 0x30;
1180
1068
  break;
@@ -1214,11 +1102,8 @@ public class HTTPParser {
1214
1102
  if (LF == ch) {
1215
1103
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1216
1104
  header_value_mark = -1;
1217
-
1218
- if (!header_almost_done(ch)) {
1219
- settings.call_on_error(this,"incorrect header ending, expection LF", data, p_err);
1220
- return error();
1221
- }
1105
+ state = header_almost_done;
1106
+ reexecute = true;
1222
1107
  break;
1223
1108
  }
1224
1109
 
@@ -1229,20 +1114,26 @@ public class HTTPParser {
1229
1114
 
1230
1115
  case connection:
1231
1116
  case transfer_encoding:
1232
- settings.call_on_error(this, "Shouldn't be here", data, p_err);
1233
- return error();
1117
+ return error(settings, "Shouldn't be here", data);
1234
1118
 
1235
1119
  case content_length:
1236
1120
  if (SPACE == ch) {
1237
1121
  break;
1238
1122
  }
1239
1123
  if (!isDigit(ch)) {
1240
- settings.call_on_error(this, "Content-Length not numeric", data, p_err);
1241
- return error();
1124
+ return error(settings, "Content-Length not numeric", data);
1242
1125
  }
1243
1126
 
1244
- content_length *= 10;
1245
- content_length += (int)ch - 0x30;
1127
+ long t = content_length;
1128
+ t *= 10;
1129
+ t += (long)ch - 0x30;
1130
+
1131
+ /* Overflow? */
1132
+ // t will wrap and become negative ...
1133
+ if (t < content_length) {
1134
+ return error(settings, "Invalid content length", data);
1135
+ }
1136
+ content_length = t;
1246
1137
  break;
1247
1138
 
1248
1139
  /* Transfer-Encoding: chunked */
@@ -1293,21 +1184,121 @@ public class HTTPParser {
1293
1184
 
1294
1185
  case header_almost_done:
1295
1186
  if (!header_almost_done(ch)) {
1296
- settings.call_on_error(this,"incorrect header ending, expection LF", data, p_err);
1297
- return error();
1187
+ return error(settings, "incorrect header ending, expecting LF", data);
1188
+ }
1189
+ break;
1190
+
1191
+ case header_value_lws:
1192
+ if (SPACE == ch || TAB == ch ){
1193
+ state = header_value_start;
1194
+ } else {
1195
+ state = header_field_start;
1196
+ reexecute = true;
1298
1197
  }
1299
1198
  break;
1300
1199
 
1301
1200
  case headers_almost_done:
1302
- if (!headers_almost_done(ch, settings)) {
1303
- settings.call_on_error(this, "header not properly completed", data, p_err);
1304
- return error();
1201
+ if (LF != ch) {
1202
+ return error(settings, "header not properly completed", data);
1203
+ }
1204
+ if (0 != (flags & F_TRAILING)) {
1205
+ /* End of a chunked request */
1206
+ state = new_message();
1207
+ settings.call_on_headers_complete(this);
1208
+ settings.call_on_message_complete(this);
1209
+ break;
1305
1210
  }
1211
+
1212
+ state = headers_done;
1213
+
1214
+ if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1215
+ upgrade = true;
1216
+ }
1217
+
1218
+ /* Here we call the headers_complete callback. This is somewhat
1219
+ * different than other callbacks because if the user returns 1, we
1220
+ * will interpret that as saying that this message has no body. This
1221
+ * is needed for the annoying case of receiving a response to a HEAD
1222
+ * request.
1223
+ */
1224
+
1225
+ /* (responses to HEAD request contain a CONTENT-LENGTH header
1226
+ * but no content)
1227
+ *
1228
+ * Consider what to do here: I don't like the idea of the callback
1229
+ * interface having a different contract in the case of HEAD
1230
+ * responses. The alternatives would be either to:
1231
+ *
1232
+ * a.) require the header_complete callback to implement a different
1233
+ * interface or
1234
+ *
1235
+ * b.) provide an overridden execute(bla, bla, boolean
1236
+ * parsingHeader) implementation ...
1237
+ */
1238
+
1239
+ // /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1240
+ // if (null != settings.on_headers_complete) {
1241
+ // settings.call_on_headers_complete(this);
1242
+ // //return;
1243
+ // }
1244
+
1245
+ if (null != settings.on_headers_complete) {
1246
+ switch (settings.on_headers_complete.cb(this)) {
1247
+ case 0:
1248
+ break;
1249
+
1250
+ case 1:
1251
+ flags |= F_SKIPBODY;
1252
+ break;
1253
+
1254
+ default:
1255
+ return error(settings, "HPE_CB_headers_complete", data); /* Error */
1256
+ }
1257
+ }
1258
+ reexecute = true;
1259
+ break;
1260
+
1261
+ case headers_done:
1262
+ if (strict && (LF != ch)) {
1263
+ return error(settings, "STRICT CHECK", data); //TODO correct error msg
1264
+ }
1265
+
1266
+ nread = 0;
1267
+
1268
+ // Exit, the rest of the connect is in a different protocol.
1306
1269
  if (upgrade) {
1307
- return data.position()-start_position ;
1270
+ state = new_message();
1271
+ settings.call_on_message_complete(this);
1272
+ return data.position()-this.p_start;
1273
+ }
1274
+
1275
+ if (0 != (flags & F_SKIPBODY)) {
1276
+ state = new_message();
1277
+ settings.call_on_message_complete(this);
1278
+ } else if (0 != (flags & F_CHUNKED)) {
1279
+ /* chunked encoding - ignore Content-Length header */
1280
+ state = State.chunk_size_start;
1281
+ } else {
1282
+ if (content_length == 0) {
1283
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1284
+ state = new_message();
1285
+ settings.call_on_message_complete(this);
1286
+ } else if (content_length != -1) {
1287
+ /* Content-Length header given and non-zero */
1288
+ state = State.body_identity;
1289
+ } else {
1290
+ if (type == ParserType.HTTP_REQUEST || !http_message_needs_eof()) {
1291
+ /* Assume content-length 0 - read the next */
1292
+ state = new_message();
1293
+ settings.call_on_message_complete(this);
1294
+ } else {
1295
+ /* Read body until EOF */
1296
+ state = State.body_identity_eof;
1297
+ }
1298
+ }
1308
1299
  }
1309
- break;
1310
1300
 
1301
+ break;
1311
1302
  /******************* Header *******************/
1312
1303
 
1313
1304
 
@@ -1315,15 +1306,18 @@ public class HTTPParser {
1315
1306
 
1316
1307
  /******************* Body *******************/
1317
1308
  case body_identity:
1318
- to_read = min(pe - p, content_length); //TODO change to use buffer?
1309
+ to_read = min(pe - p, content_length); //TODO change to use buffer?
1310
+ body_mark = p;
1319
1311
 
1320
1312
  if (to_read > 0) {
1321
- settings.call_on_body(this, data, p, to_read);
1313
+ settings.call_on_body(this, data, p, to_read);
1322
1314
  data.position(p+to_read);
1323
1315
  content_length -= to_read;
1316
+
1324
1317
  if (content_length == 0) {
1325
- settings.call_on_message_complete(this);
1326
- state = new_message();
1318
+ state = message_done;
1319
+ p += to_read;
1320
+ reexecute = true;
1327
1321
  }
1328
1322
  }
1329
1323
  break;
@@ -1333,10 +1327,15 @@ public class HTTPParser {
1333
1327
  case body_identity_eof:
1334
1328
  to_read = pe - p; // TODO change to use buffer ?
1335
1329
  if (to_read > 0) {
1336
- settings.call_on_body(this, data, p, to_read);
1330
+ settings.call_on_body(this, data, p, to_read);
1337
1331
  data.position(p+to_read);
1338
1332
  }
1339
1333
  break;
1334
+
1335
+ case message_done:
1336
+ state = new_message();
1337
+ settings.call_on_message_complete(this);
1338
+ break;
1340
1339
  /******************* Body *******************/
1341
1340
 
1342
1341
 
@@ -1344,19 +1343,16 @@ public class HTTPParser {
1344
1343
  /******************* Chunk *******************/
1345
1344
  case chunk_size_start:
1346
1345
  if (1 != this.nread) {
1347
- settings.call_on_error(this, "nread != 1 (chunking)", data, p_err);
1348
- return error();
1346
+ return error(settings, "nread != 1 (chunking)", data);
1349
1347
 
1350
1348
  }
1351
1349
  if (0 == (flags & F_CHUNKED)) {
1352
- settings.call_on_error(this, "not chunked", data, p_err);
1353
- return error();
1350
+ return error(settings, "not chunked", data);
1354
1351
  }
1355
1352
 
1356
1353
  c = UNHEX[chi];
1357
1354
  if (c == -1) {
1358
- settings.call_on_error(this, "invalid hex char in chunk content length", data, p_err);
1359
- return error();
1355
+ return error(settings, "invalid hex char in chunk content length", data);
1360
1356
  }
1361
1357
  content_length = c;
1362
1358
  state = State.chunk_size;
@@ -1366,8 +1362,7 @@ public class HTTPParser {
1366
1362
 
1367
1363
  case chunk_size:
1368
1364
  if (0 == (flags & F_CHUNKED)) {
1369
- settings.call_on_error(this, "not chunked", data, p_err);
1370
- return error();
1365
+ return error(settings, "not chunked", data);
1371
1366
  }
1372
1367
 
1373
1368
  if (CR == ch) {
@@ -1382,20 +1377,23 @@ public class HTTPParser {
1382
1377
  state = State.chunk_parameters;
1383
1378
  break;
1384
1379
  }
1385
- settings.call_on_error(this, "invalid hex char in chunk content length", data, p_err);
1386
- return error();
1380
+ return error(settings, "invalid hex char in chunk content length", data);
1387
1381
  }
1388
-
1389
- content_length *= 16;
1390
- content_length += c;
1382
+ long t = content_length;
1383
+
1384
+ t *= 16;
1385
+ t += c;
1386
+ if(t < content_length){
1387
+ return error(settings, "invalid content length", data);
1388
+ }
1389
+ content_length = t;
1391
1390
  break;
1392
1391
 
1393
1392
 
1394
1393
 
1395
1394
  case chunk_parameters:
1396
1395
  if (0 == (flags & F_CHUNKED)) {
1397
- settings.call_on_error(this, "not chunked", data, p_err);
1398
- return error();
1396
+ return error(settings, "not chunked", data);
1399
1397
  }
1400
1398
  /* just ignore this shit. TODO check for overflow */
1401
1399
  if (CR == ch) {
@@ -1408,12 +1406,10 @@ public class HTTPParser {
1408
1406
 
1409
1407
  case chunk_size_almost_done:
1410
1408
  if (0 == (flags & F_CHUNKED)) {
1411
- settings.call_on_error(this, "not chunked", data, p_err);
1412
- return error();
1409
+ return error(settings, "not chunked", data);
1413
1410
  }
1414
1411
  if (strict && LF != ch) {
1415
- settings.call_on_error(this, "expected LF at end of chunk size", data, p_err);
1416
- return error();
1412
+ return error(settings, "expected LF at end of chunk size", data);
1417
1413
  }
1418
1414
 
1419
1415
  this.nread = 0;
@@ -1429,10 +1425,9 @@ public class HTTPParser {
1429
1425
 
1430
1426
 
1431
1427
  case chunk_data:
1432
- {
1428
+ //TODO Apply changes from C version for s_chunk_data
1433
1429
  if (0 == (flags & F_CHUNKED)) {
1434
- settings.call_on_error(this, "not chunked", data, p_err);
1435
- return error();
1430
+ return error(settings, "not chunked", data);
1436
1431
  }
1437
1432
 
1438
1433
  to_read = min(pe-p, content_length);
@@ -1447,32 +1442,29 @@ public class HTTPParser {
1447
1442
 
1448
1443
  content_length -= to_read;
1449
1444
  break;
1450
- }
1451
1445
 
1452
1446
 
1453
1447
 
1454
1448
  case chunk_data_almost_done:
1455
1449
  if (0 == (flags & F_CHUNKED)) {
1456
- settings.call_on_error(this, "not chunked", data, p_err);
1457
- return error();
1450
+ return error(settings, "not chunked", data);
1458
1451
  }
1459
1452
  if (strict && CR != ch) {
1460
- settings.call_on_error(this, "chunk data terminated incorrectly, expected CR", data, p_err);
1461
- return error();
1453
+ return error(settings, "chunk data terminated incorrectly, expected CR", data);
1462
1454
  }
1463
1455
  state = State.chunk_data_done;
1456
+ //TODO CALLBACK_DATA(body)
1457
+ // settings.call_on_body(this, data,p,?);
1464
1458
  break;
1465
1459
 
1466
1460
 
1467
1461
 
1468
1462
  case chunk_data_done:
1469
1463
  if (0 == (flags & F_CHUNKED)) {
1470
- settings.call_on_error(this, "not chunked", data, p_err);
1471
- return error();
1464
+ return error(settings, "not chunked", data);
1472
1465
  }
1473
1466
  if (strict && LF != ch) {
1474
- settings.call_on_error(this, "chunk data terminated incorrectly, expected LF", data, p_err);
1475
- return error();
1467
+ return error(settings, "chunk data terminated incorrectly, expected LF", data);
1476
1468
  }
1477
1469
  state = State.chunk_size_start;
1478
1470
  break;
@@ -1481,8 +1473,7 @@ public class HTTPParser {
1481
1473
 
1482
1474
 
1483
1475
  default:
1484
- settings.call_on_error(this, "unhandled state", data, p_err);
1485
- return error();
1476
+ return error(settings, "unhandled state", data);
1486
1477
 
1487
1478
  } // switch
1488
1479
  } // while
@@ -1495,17 +1486,34 @@ public class HTTPParser {
1495
1486
 
1496
1487
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1497
1488
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1498
- settings.call_on_fragment (this, data, fragment_mark, p-fragment_mark);
1499
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
1500
- settings.call_on_path (this, data, path_mark, p-path_mark);
1501
1489
  settings.call_on_url (this, data, url_mark, p-url_mark);
1490
+ settings.call_on_path (this, data, url_mark, p-url_mark);
1502
1491
 
1503
- return data.position()-start_position;
1492
+ return data.position()-this.p_start;
1504
1493
  } // execute
1505
1494
 
1506
- int error () {
1495
+ int error (ParserSettings settings, String mes, ByteBuffer data) {
1496
+ settings.call_on_error(this, mes, data, this.p_start);
1507
1497
  this.state = State.dead;
1508
- return this.data.position()-start_position;
1498
+ return data.position()-this.p_start;
1499
+ }
1500
+
1501
+ public boolean http_message_needs_eof() {
1502
+ if(type == ParserType.HTTP_REQUEST){
1503
+ return false;
1504
+ }
1505
+ /* See RFC 2616 section 4.4 */
1506
+ if ((status_code / 100 == 1) || /* 1xx e.g. Continue */
1507
+ (status_code == 204) || /* No Content */
1508
+ (status_code == 304) || /* Not Modified */
1509
+ (flags & F_SKIPBODY) != 0) { /* response to a HEAD request */
1510
+ return false;
1511
+ }
1512
+ if ((flags & F_CHUNKED) != 0 || content_length != -1) {
1513
+ return false;
1514
+ }
1515
+
1516
+ return true;
1509
1517
  }
1510
1518
 
1511
1519
  /* If http_should_keep_alive() in the on_headers_complete or
@@ -1519,19 +1527,153 @@ public class HTTPParser {
1519
1527
  /* HTTP/1.1 */
1520
1528
  if ( 0 != (flags & F_CONNECTION_CLOSE) ) {
1521
1529
  return false;
1522
- } else {
1523
- return true;
1524
1530
  }
1525
1531
  } else {
1526
1532
  /* HTTP/1.0 or earlier */
1527
- if ( 0 != (flags & F_CONNECTION_KEEP_ALIVE) ) {
1528
- return true;
1529
- } else {
1533
+ if ( 0 == (flags & F_CONNECTION_KEEP_ALIVE) ) {
1530
1534
  return false;
1531
1535
  }
1532
1536
  }
1537
+ return !http_message_needs_eof();
1538
+ }
1539
+
1540
+ public int parse_url(ByteBuffer data, boolean is_connect, HTTPParserUrl u) {
1541
+
1542
+ UrlFields uf = UrlFields.UF_MAX;
1543
+ UrlFields old_uf = UrlFields.UF_MAX;
1544
+ u.port = 0;
1545
+ u.field_set = 0;
1546
+ state = (is_connect ? State.req_host_start : State.req_spaces_before_url);
1547
+ int p_init = data.position();
1548
+ int p = 0;
1549
+ byte ch = 0;
1550
+ while (data.position() != data.limit()) {
1551
+ p = data.position();
1552
+ ch = data.get();
1553
+ state = parse_url_char(ch);
1554
+ switch(state) {
1555
+ case dead:
1556
+ return 1;
1557
+
1558
+ /* Skip delimeters */
1559
+ case req_schema_slash:
1560
+ case req_schema_slash_slash:
1561
+ case req_host_start:
1562
+ case req_host_v6_start:
1563
+ case req_host_v6_end:
1564
+ case req_port_start:
1565
+ case req_query_string_start:
1566
+ case req_fragment_start:
1567
+ continue;
1568
+
1569
+ case req_schema:
1570
+ uf = UrlFields.UF_SCHEMA;
1571
+ break;
1572
+
1573
+ case req_host:
1574
+ case req_host_v6:
1575
+ uf = UrlFields.UF_HOST;
1576
+ break;
1577
+
1578
+ case req_port:
1579
+ uf = UrlFields.UF_PORT;
1580
+ break;
1581
+
1582
+ case req_path:
1583
+ uf = UrlFields.UF_PATH;
1584
+ break;
1585
+
1586
+ case req_query_string:
1587
+ uf = UrlFields.UF_QUERY;
1588
+ break;
1589
+
1590
+ case req_fragment:
1591
+ uf = UrlFields.UF_FRAGMENT;
1592
+ break;
1593
+
1594
+ default:
1595
+ return 1;
1596
+ }
1597
+ /* Nothing's changed; soldier on */
1598
+ if (uf == old_uf) {
1599
+ u.field_data[uf.getIndex()].len++;
1600
+ continue;
1601
+ }
1602
+
1603
+ u.field_data[uf.getIndex()].off = p - p_init;
1604
+ u.field_data[uf.getIndex()].len = 1;
1605
+
1606
+ u.field_set |= (1 << uf.getIndex());
1607
+ old_uf = uf;
1608
+
1609
+ }
1610
+
1611
+ /* CONNECT requests can only contain "hostname:port" */
1612
+ if (is_connect && u.field_set != ((1 << UrlFields.UF_HOST.getIndex())|(1 << UrlFields.UF_PORT.getIndex()))) {
1613
+ return 1;
1614
+ }
1615
+
1616
+ /* Make sure we don't end somewhere unexpected */
1617
+ switch (state) {
1618
+ case req_host_v6_start:
1619
+ case req_host_v6:
1620
+ case req_host_v6_end:
1621
+ case req_host:
1622
+ case req_port_start:
1623
+ return 1;
1624
+ default:
1625
+ break;
1626
+ }
1627
+
1628
+ if (0 != (u.field_set & (1 << UrlFields.UF_PORT.getIndex()))) {
1629
+ /* Don't bother with endp; we've already validated the string */
1630
+ int v = strtoi(data, p_init + u.field_data[UrlFields.UF_PORT.getIndex()].off);
1631
+
1632
+ /* Ports have a max value of 2^16 */
1633
+ if (v > 0xffff) {
1634
+ return 1;
1635
+ }
1636
+
1637
+ u.port = v;
1638
+ }
1639
+
1640
+ return 0;
1533
1641
  }
1534
1642
 
1643
+ //hacky reimplementation of srttoul, tailored for our simple needs
1644
+ //we only need to parse port val, so no negative values etc
1645
+ int strtoi(ByteBuffer data, int start_pos) {
1646
+ data.position(start_pos);
1647
+ byte ch;
1648
+ int start = data.position();
1649
+ int end = data.limit();
1650
+ while(data.position() < data.limit()) {
1651
+ ch = data.get();
1652
+ if(Character.isWhitespace((char)ch)){
1653
+ start++;
1654
+ continue;
1655
+ }
1656
+ if(isDigit(ch)){
1657
+ continue;
1658
+ }else{
1659
+ end = data.position() - 1;
1660
+ break;
1661
+ }
1662
+ }
1663
+ byte[] s = new byte[end - start];
1664
+ int cur_pos = data.position();
1665
+ data.position(start);
1666
+ data.get(s);
1667
+ int result = 0;
1668
+ try{
1669
+ result = Integer.parseInt(new String(s, "UTF8"));
1670
+ } catch (UnsupportedEncodingException e) {
1671
+ result = 0;
1672
+ }
1673
+ data.position(cur_pos);
1674
+ return result;
1675
+ }
1676
+
1535
1677
  boolean isDigit(byte b) {
1536
1678
  if (b >= 0x30 && b <=0x39) {
1537
1679
  return true;
@@ -1539,6 +1681,10 @@ public class HTTPParser {
1539
1681
  return false;
1540
1682
  }
1541
1683
 
1684
+ boolean isHex(byte b) {
1685
+ return isDigit(b) || (lower(b) >= 0x61 /*a*/ && lower(b) <= 0x66 /*f*/);
1686
+ }
1687
+
1542
1688
  boolean isAtoZ(byte b) {
1543
1689
  byte c = lower(b);
1544
1690
  return (c>= 0x61 /*a*/ && c <= 0x7a /*z*/);
@@ -1555,9 +1701,28 @@ public class HTTPParser {
1555
1701
  }
1556
1702
 
1557
1703
  byte token(byte b) {
1558
- return (byte)tokens[b];
1704
+ if(!strict){
1705
+ return (b == (byte)' ') ? (byte)' ' : (byte)tokens[b] ;
1706
+ }else{
1707
+ return (byte)tokens[b];
1708
+ }
1709
+ }
1710
+
1711
+ boolean isHostChar(byte ch){
1712
+ if(!strict){
1713
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch || UNDER == ch ;
1714
+ }else{
1715
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch;
1716
+ }
1717
+ }
1718
+
1719
+ boolean isNormalUrlChar(int chi) {
1720
+ if(!strict){
1721
+ return (chi > 0x80) || normal_url_char[chi];
1722
+ }else{
1723
+ return normal_url_char[chi];
1724
+ }
1559
1725
  }
1560
-
1561
1726
 
1562
1727
  HTTPMethod start_req_method_assign(byte c){
1563
1728
  switch (c) {
@@ -1569,7 +1734,7 @@ public class HTTPParser {
1569
1734
  case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */
1570
1735
  case N: return HTTPMethod.HTTP_NOTIFY;
1571
1736
  case O: return HTTPMethod.HTTP_OPTIONS;
1572
- case P: return HTTPMethod.HTTP_POST; /* or PROPFIND, PROPPATH, PUT */
1737
+ case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1573
1738
  case R: return HTTPMethod.HTTP_REPORT;
1574
1739
  case S: return HTTPMethod.HTTP_SUBSCRIBE;
1575
1740
  case T: return HTTPMethod.HTTP_TRACE;
@@ -1583,7 +1748,7 @@ public class HTTPParser {
1583
1748
  return false;
1584
1749
  }
1585
1750
 
1586
- state = State.header_field_start;
1751
+ state = State.header_value_lws;
1587
1752
  // TODO java enums support some sort of bitflag mechanism !?
1588
1753
  switch (header_state) {
1589
1754
  case connection_keep_alive:
@@ -1601,111 +1766,17 @@ public class HTTPParser {
1601
1766
  return true;
1602
1767
  }
1603
1768
 
1604
- boolean headers_almost_done (byte ch, ParserSettings settings) {
1605
-
1606
- if (LF != ch) {
1607
- return false;
1608
- }
1609
- if (0 != (flags & F_TRAILING)) {
1610
- /* End of a chunked request */
1611
-
1612
- settings.call_on_headers_complete(this);
1613
- settings.call_on_message_complete(this);
1614
-
1615
- state = new_message();
1616
-
1617
- return true;
1618
- }
1619
-
1620
- nread = 0;
1621
-
1622
- if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1623
- upgrade = true;
1624
- }
1625
-
1626
-
1627
- /* Here we call the headers_complete callback. This is somewhat
1628
- * different than other callbacks because if the user returns 1, we
1629
- * will interpret that as saying that this message has no body. This
1630
- * is needed for the annoying case of recieving a response to a HEAD
1631
- * request.
1632
- */
1633
-
1634
- /* (responses to HEAD request contain a CONTENT-LENGTH header
1635
- * but no content)
1636
- *
1637
- * Consider what to do here: I don't like the idea of the callback
1638
- * interface having a different contract in the case of HEAD
1639
- * responses. The alternatives would be either to:
1640
- *
1641
- * a.) require the header_complete callback to implement a different
1642
- * interface or
1643
- *
1644
- * b.) provide an overridden execute(bla, bla, boolean
1645
- * parsingHeader) implementation ...
1646
- */
1647
-
1648
- /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1649
- if (null != settings.on_headers_complete) {
1650
- settings.call_on_headers_complete(this);
1651
- //return;
1652
- }
1653
-
1654
- // if (null != settings.on_headers_complete) {
1655
- // switch (settings.on_headers_complete.cb(parser)) {
1656
- // case 0:
1657
- // break;
1658
- //
1659
- // case 1:
1660
- // flags |= F_SKIPBODY;
1661
- // break;
1662
- //
1663
- // default:
1664
- // return p - data; /* Error */ // TODO // RuntimeException ?
1665
- // }
1666
- // }
1667
-
1668
-
1669
- // Exit, the rest of the connect is in a different protocol.
1670
- if (upgrade) {
1671
- settings.call_on_message_complete(this);
1672
- state = State.body_identity_eof;
1673
- return true;
1674
- }
1675
-
1676
- if (0 != (flags & F_SKIPBODY)) {
1677
- settings.call_on_message_complete(this);
1678
- state = new_message();
1679
- } else if (0 != (flags & F_CHUNKED)) {
1680
- /* chunked encoding - ignore Content-Length header */
1681
- state = State.chunk_size_start;
1682
- } else {
1683
- if (content_length == 0) {
1684
- /* Content-Length header given but zero: Content-Length: 0\r\n */
1685
- settings.call_on_message_complete(this);
1686
- state = new_message();
1687
- } else if (content_length > 0) {
1688
- /* Content-Length header given and non-zero */
1689
- state = State.body_identity;
1690
- } else {
1691
- if (type == ParserType.HTTP_REQUEST || http_should_keep_alive()) {
1692
- /* Assume content-length 0 - read the next */
1693
- settings.call_on_message_complete(this);
1694
- state = new_message();
1695
- } else {
1696
- /* Read body until EOF */
1697
- state = State.body_identity_eof;
1698
- }
1699
- }
1700
- }
1701
- return true;
1702
- } // headers_almost_fone
1769
+ // boolean headers_almost_done (byte ch, ParserSettings settings) {
1770
+ // } // headers_almost_done
1703
1771
 
1704
1772
 
1705
1773
  final int min (int a, int b) {
1706
1774
  return a < b ? a : b;
1707
1775
  }
1708
1776
 
1777
+ final int min (int a, long b) {
1778
+ return a < b ? a : (int)b;
1779
+ }
1709
1780
  /* probably not the best place to hide this ... */
1710
1781
  public boolean HTTP_PARSER_STRICT;
1711
1782
  State new_message() {
@@ -1730,6 +1801,7 @@ public class HTTPParser {
1730
1801
  case chunk_data_done :
1731
1802
  case body_identity :
1732
1803
  case body_identity_eof :
1804
+ case message_done :
1733
1805
  return false;
1734
1806
 
1735
1807
  }
@@ -1808,9 +1880,9 @@ public class HTTPParser {
1808
1880
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1809
1881
  0, 0, 0, 0, 0, 0, 0, 0,
1810
1882
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1811
- ' ', '!', '"', '#', '$', '%', '&', '\'',
1883
+ 0, '!', 0, '#', '$', '%', '&', '\'',
1812
1884
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1813
- 0, 0, '*', '+', 0, '-', '.', '/' ,
1885
+ 0, 0, '*', '+', 0, '-', '.', 0 ,
1814
1886
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1815
1887
  '0', '1', '2', '3', '4', '5', '6', '7',
1816
1888
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -1830,7 +1902,7 @@ public class HTTPParser {
1830
1902
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1831
1903
  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1832
1904
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1833
- 'X', 'Y', 'Z', 0, '|', '}', 0, 0,
1905
+ 'X', 'Y', 'Z', 0, '|', 0, '~', 0,
1834
1906
  /* hi bit set, not ascii */
1835
1907
  0, 0, 0, 0, 0, 0, 0, 0,
1836
1908
  0, 0, 0, 0, 0, 0, 0, 0,
@@ -1901,29 +1973,6 @@ public class HTTPParser {
1901
1973
  true, true, true, true, true, true, true, true,
1902
1974
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1903
1975
  true, true, true, true, true, true, true, false,
1904
-
1905
- /* hi bit set, not ascii */
1906
- /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
1907
- * encoded paths. This is out of spec, but clients generate this and most other
1908
- * HTTP servers support it. We should, too. */
1909
-
1910
- true, true, true, true, true, true, true, true,
1911
- true, true, true, true, true, true, true, true,
1912
- true, true, true, true, true, true, true, true,
1913
- true, true, true, true, true, true, true, true,
1914
- true, true, true, true, true, true, true, true,
1915
- true, true, true, true, true, true, true, true,
1916
- true, true, true, true, true, true, true, true,
1917
- true, true, true, true, true, true, true, true,
1918
- true, true, true, true, true, true, true, true,
1919
- true, true, true, true, true, true, true, true,
1920
- true, true, true, true, true, true, true, true,
1921
- true, true, true, true, true, true, true, true,
1922
- true, true, true, true, true, true, true, true,
1923
- true, true, true, true, true, true, true, true,
1924
- true, true, true, true, true, true, true, true,
1925
- true, true, true, true, true, true, true, true,
1926
-
1927
1976
  };
1928
1977
 
1929
1978
  public static final byte A = 0x41;
@@ -1952,10 +2001,12 @@ public class HTTPParser {
1952
2001
  public static final byte X = 0x58;
1953
2002
  public static final byte Y = 0x59;
1954
2003
  public static final byte Z = 0x5a;
2004
+ public static final byte UNDER = 0x5f;
1955
2005
  public static final byte CR = 0x0d;
1956
2006
  public static final byte LF = 0x0a;
1957
2007
  public static final byte DOT = 0x2e;
1958
2008
  public static final byte SPACE = 0x20;
2009
+ public static final byte TAB = 0x09;
1959
2010
  public static final byte SEMI = 0x3b;
1960
2011
  public static final byte COLON = 0x3a;
1961
2012
  public static final byte HASH = 0x23;
@@ -1970,7 +2021,7 @@ public class HTTPParser {
1970
2021
 
1971
2022
  dead
1972
2023
 
1973
- , start_res_or_res
2024
+ , start_req_or_res
1974
2025
  , res_or_resp_H
1975
2026
  , start_res
1976
2027
  , res_H
@@ -1993,7 +2044,12 @@ public class HTTPParser {
1993
2044
  , req_schema
1994
2045
  , req_schema_slash
1995
2046
  , req_schema_slash_slash
2047
+ , req_host_start
2048
+ , req_host_v6_start
2049
+ , req_host_v6
2050
+ , req_host_v6_end
1996
2051
  , req_host
2052
+ , req_port_start
1997
2053
  , req_port
1998
2054
  , req_path
1999
2055
  , req_query_string_start
@@ -2015,6 +2071,7 @@ public class HTTPParser {
2015
2071
  , header_field
2016
2072
  , header_value_start
2017
2073
  , header_value
2074
+ , header_value_lws
2018
2075
 
2019
2076
  , header_almost_done
2020
2077
 
@@ -2024,10 +2081,11 @@ public class HTTPParser {
2024
2081
  , chunk_size_almost_done
2025
2082
 
2026
2083
  , headers_almost_done
2084
+ , headers_done
2027
2085
  // This space intentionally not left blank, comment from c, for orientation...
2028
2086
  // the c version uses <= s_header_almost_done in java, we list the states explicitly
2029
2087
  // in `parsing_header()`
2030
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
2088
+ /* Important: 's_headers_done' must be the last 'header' state. All
2031
2089
  * states beyond this must be 'body' states. It is used for overflow
2032
2090
  * checking. See the PARSING_HEADER() macro.
2033
2091
  */
@@ -2036,8 +2094,8 @@ public class HTTPParser {
2036
2094
  , chunk_data_done
2037
2095
 
2038
2096
  , body_identity
2039
- , body_identity_eof;
2040
-
2097
+ , body_identity_eof
2098
+ , message_done
2041
2099
 
2042
2100
  }
2043
2101
  enum HState {
@@ -2065,4 +2123,24 @@ public class HTTPParser {
2065
2123
  , connection_keep_alive
2066
2124
  , connection_close
2067
2125
  }
2126
+ public enum UrlFields {
2127
+ UF_SCHEMA(0)
2128
+ , UF_HOST(1)
2129
+ , UF_PORT(2)
2130
+ , UF_PATH(3)
2131
+ , UF_QUERY(4)
2132
+ , UF_FRAGMENT(5)
2133
+ , UF_MAX(6);
2134
+
2135
+
2136
+ private final int index;
2137
+
2138
+ private UrlFields(int index) {
2139
+ this.index = index;
2140
+ }
2141
+ public int getIndex() {
2142
+ return index;
2143
+ }
2144
+
2145
+ }
2068
2146
  }