http_parser.rb 0.5.3 → 0.6.0.beta.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. data/.gitmodules +3 -3
  2. data/Gemfile +1 -1
  3. data/Gemfile.lock +9 -2
  4. data/README.md +50 -45
  5. data/bench/standalone.rb +23 -0
  6. data/bench/thin.rb +1 -0
  7. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +66 -58
  8. data/ext/ruby_http_parser/ruby_http_parser.c +10 -41
  9. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  10. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  11. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  12. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  13. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1029 -615
  14. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +177 -43
  16. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  17. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +13 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +4 -1
  20. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +2 -2
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +12 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +715 -637
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +1 -1
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +71 -21
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  39. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1141 -210
  40. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  41. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +32 -0
  42. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +5 -1
  43. data/ext/ruby_http_parser/vendor/http-parser/README.md +9 -2
  44. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1029 -615
  45. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +79 -0
  46. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +145 -16
  47. data/ext/ruby_http_parser/vendor/http-parser/test.c +1065 -141
  48. data/http_parser.rb.gemspec +3 -1
  49. data/spec/parser_spec.rb +41 -17
  50. data/spec/support/requests.json +236 -24
  51. data/spec/support/responses.json +182 -36
  52. data/tasks/compile.rake +2 -2
  53. data/tasks/fixtures.rake +7 -1
  54. metadata +57 -19
  55. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  57. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  58. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
@@ -0,0 +1,76 @@
1
+ package http_parser;
2
+
3
+ import http_parser.lolevel.*;
4
+ import http_parser.lolevel.HTTPParser;
5
+
6
+ import java.io.UnsupportedEncodingException;
7
+ import java.nio.ByteBuffer;
8
+ import java.util.Arrays;
9
+
10
+ /**
11
+ */
12
+ public class HTTPParserUrl {
13
+
14
+ public int field_set;
15
+ public int port;
16
+
17
+ public FieldData[] field_data = new FieldData[]{
18
+ new FieldData(0,0),
19
+ new FieldData(0,0),
20
+ new FieldData(0,0),
21
+ new FieldData(0,0),
22
+ new FieldData(0,0),
23
+ new FieldData(0,0)
24
+ }; //UF_MAX
25
+
26
+ public HTTPParserUrl(){}
27
+
28
+ public HTTPParserUrl(int field_set, int port, FieldData[] field_data){
29
+ this.field_set = field_set;
30
+ this.port = port;
31
+ this.field_data = field_data;
32
+ }
33
+
34
+ public String getFieldValue(HTTPParser.UrlFields field, ByteBuffer data) throws UnsupportedEncodingException {
35
+ FieldData fd = this.field_data[field.getIndex()];
36
+ if(fd.off == 0 & fd.len == 0) return "";
37
+ byte[] dst = new byte[fd.len];
38
+ int current_pos = data.position();
39
+ data.position(fd.off);
40
+ data.get(dst,0,fd.len);
41
+ data.position(current_pos);
42
+ String v = new String(dst, "UTF8");
43
+ return v;
44
+ }
45
+
46
+ @Override
47
+ public boolean equals(Object o) {
48
+ if (this == o) return true;
49
+ if (o == null || getClass() != o.getClass()) return false;
50
+
51
+ HTTPParserUrl that = (HTTPParserUrl) o;
52
+
53
+ if (field_set != that.field_set) return false;
54
+ if (port != that.port) return false;
55
+ if (!Arrays.equals(field_data, that.field_data)) return false;
56
+
57
+ return true;
58
+ }
59
+
60
+ @Override
61
+ public int hashCode() {
62
+ int result = field_set;
63
+ result = 31 * result + port;
64
+ result = 31 * result + Arrays.hashCode(field_data);
65
+ return result;
66
+ }
67
+
68
+ @Override
69
+ public String toString() {
70
+ return "HTTPParserUrl{" +
71
+ "field_set=" + field_set +
72
+ ", port=" + port +
73
+ ", field_data=" + (field_data == null ? null : Arrays.asList(field_data)) +
74
+ '}';
75
+ }
76
+ }
@@ -27,7 +27,7 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
27
27
  private HTTPDataCallback _on_fragment;
28
28
  private HTTPDataCallback _on_header_field;
29
29
  private HTTPDataCallback _on_header_value;
30
- private HTTPCallback _on_headers_complete;
30
+ private HTTPHeadersCompleteCallback _on_headers_complete;
31
31
  private HTTPDataCallback _on_body;
32
32
  private HTTPCallback _on_message_complete;
33
33
  private HTTPErrorCallback _on_error;
@@ -175,7 +175,7 @@ public class ParserSettings extends http_parser.lolevel.ParserSettings {
175
175
  return 0;
176
176
  }
177
177
  };
178
- this._on_headers_complete = new HTTPCallback() {
178
+ this._on_headers_complete = new HTTPHeadersCompleteCallback() {
179
179
  @Override
180
180
  public int cb(HTTPParser parser) {
181
181
  // is there an uncompleted value ... ?
@@ -30,7 +30,7 @@ public class Util {
30
30
  //
31
31
  // }
32
32
 
33
- public static String error (String mes, ByteBuffer b, int begining) {
33
+ public static String error (String mes, ByteBuffer b, int beginning) {
34
34
  // the error message should look like this:
35
35
  //
36
36
  // Bla expected something, but it's not there (mes)
@@ -50,7 +50,7 @@ public class Util {
50
50
  final int mes_width = 72;
51
51
  int p = b.position(); // error position
52
52
  int end = b.limit(); // this is the end
53
- int m = end - begining; // max mes length
53
+ int m = end - beginning; // max mes length
54
54
 
55
55
  StringBuilder builder = new StringBuilder();
56
56
  int p_adj = p;
@@ -58,9 +58,9 @@ public class Util {
58
58
  byte [] orig = new byte[0];
59
59
  if (m <= mes_width) {
60
60
  orig = new byte[m];
61
- b.position(begining);
61
+ b.position(beginning);
62
62
  b.get(orig, 0, m);
63
- p_adj = p-begining;
63
+ p_adj = p-beginning;
64
64
 
65
65
 
66
66
  } else {
@@ -73,7 +73,7 @@ public class Util {
73
73
  // CAN'T be not enough stuff aorund p in total, because
74
74
  // m>meswidth (see if to this else)
75
75
 
76
- int before = p-begining;
76
+ int before = p-beginning;
77
77
  int after = end - p;
78
78
  if ( (before > mes_width/2) && (after > mes_width/2)) {
79
79
  // plenty of stuff in front of and behind error
@@ -82,7 +82,7 @@ public class Util {
82
82
  b.get(orig, 0, mes_width);
83
83
  } else if (before <= mes_width/2) {
84
84
  // take all of the begining.
85
- b.position(begining);
85
+ b.position(beginning);
86
86
  // and as much of the rest as possible
87
87
 
88
88
  b.get(orig, 0, mes_width);
@@ -0,0 +1,12 @@
1
+ package http_parser.lolevel;
2
+
3
+ /**
4
+ * Special interface for headers_complete callback.
5
+ * This is somewhat different than other callbacks because if the user returns 1, we
6
+ * will interpret that as saying that this message has no body. This
7
+ * is needed for the annoying case of receiving a response to a HEAD
8
+ * request.
9
+ */
10
+ public interface HTTPHeadersCompleteCallback extends HTTPCallback{
11
+
12
+ }
@@ -1,8 +1,10 @@
1
1
  package http_parser.lolevel;
2
2
 
3
+ import java.io.UnsupportedEncodingException;
3
4
  import java.nio.ByteBuffer;
4
5
  import http_parser.HTTPException;
5
6
  import http_parser.HTTPMethod;
7
+ import http_parser.HTTPParserUrl;
6
8
  import http_parser.ParserType;
7
9
  import static http_parser.lolevel.HTTPParser.C.*;
8
10
  import static http_parser.lolevel.HTTPParser.State.*;
@@ -20,10 +22,9 @@ public class HTTPParser {
20
22
  int flags; // TODO
21
23
 
22
24
  int nread;
23
- int content_length;
25
+ long content_length;
24
26
 
25
- int start_position;
26
- ByteBuffer data;
27
+ int p_start; // updated each call to execute to indicate where the buffer was before we began calling it.
27
28
 
28
29
  /** READ-ONLY **/
29
30
  public int http_major;
@@ -52,10 +53,8 @@ public class HTTPParser {
52
53
  */
53
54
  int header_field_mark = -1;
54
55
  int header_value_mark = -1;
55
- int fragment_mark = -1;
56
- int query_string_mark = -1;
57
- int path_mark = -1;
58
56
  int url_mark = -1;
57
+ int body_mark = -1;
59
58
 
60
59
  /**
61
60
  * Construct a Parser for ParserType.HTTP_BOTH, meaning it
@@ -79,7 +78,7 @@ public class HTTPParser {
79
78
  this.state = State.start_res;
80
79
  break;
81
80
  case HTTP_BOTH:
82
- this.state = State.start_res_or_res;
81
+ this.state = State.start_req_or_res;
83
82
  break;
84
83
  default:
85
84
  throw new HTTPException("can't happen, invalid ParserType enum");
@@ -91,6 +90,172 @@ public class HTTPParser {
91
90
  */
92
91
  static void p(Object o) {System.out.println(o);}
93
92
 
93
+ /** Comment from C version follows
94
+ *
95
+ * Our URL parser.
96
+ *
97
+ * This is designed to be shared by http_parser_execute() for URL validation,
98
+ * hence it has a state transition + byte-for-byte interface. In addition, it
99
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
100
+ * work of turning state transitions URL components for its API.
101
+ *
102
+ * This function should only be invoked with non-space characters. It is
103
+ * assumed that the caller cares about (and can detect) the transition between
104
+ * URL and non-URL states by looking for these.
105
+ */
106
+ public State parse_url_char(byte ch) {
107
+
108
+ int chi = ch & 0xff; // utility, ch without signedness for table lookups.
109
+
110
+ if(SPACE == ch){
111
+ throw new HTTPException("space as url char");
112
+ }
113
+
114
+ switch(state) {
115
+ case req_spaces_before_url:
116
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
117
+ * All methods except CONNECT are followed by '/' or '*'.
118
+ */
119
+ if(SLASH == ch || STAR == ch){
120
+ return req_path;
121
+ }
122
+ if(isAtoZ(ch)){
123
+ return req_schema;
124
+ }
125
+ break;
126
+ case req_schema:
127
+ if(isAtoZ(ch)){
128
+ return req_schema;
129
+ }
130
+ if(COLON == ch){
131
+ return req_schema_slash;
132
+ }
133
+ break;
134
+ case req_schema_slash:
135
+ if(SLASH == ch){
136
+ return req_schema_slash_slash;
137
+ }
138
+ break;
139
+ case req_schema_slash_slash:
140
+ if(SLASH == ch){
141
+ return req_host_start;
142
+ }
143
+ break;
144
+ case req_host_start:
145
+ if (ch == (byte)'[') {
146
+ return req_host_v6_start;
147
+ }
148
+ if (isHostChar(ch)) {
149
+ return req_host;
150
+ }
151
+ break;
152
+
153
+ case req_host:
154
+ if (isHostChar(ch)) {
155
+ return req_host;
156
+ }
157
+
158
+ /* FALLTHROUGH */
159
+ case req_host_v6_end:
160
+ switch (ch) {
161
+ case ':':
162
+ return req_port_start;
163
+ case '/':
164
+ return req_path;
165
+ case '?':
166
+ return req_query_string_start;
167
+ }
168
+ break;
169
+
170
+ case req_host_v6:
171
+ if (ch == ']') {
172
+ return req_host_v6_end;
173
+ }
174
+
175
+ /* FALLTHROUGH */
176
+ case req_host_v6_start:
177
+ if (isHex(ch) || ch == ':') {
178
+ return req_host_v6;
179
+ }
180
+ break;
181
+
182
+ case req_port:
183
+ switch (ch) {
184
+ case '/':
185
+ return req_path;
186
+ case '?':
187
+ return req_query_string_start;
188
+ }
189
+
190
+ /* FALLTHROUGH */
191
+ case req_port_start:
192
+ if (isDigit(ch)) {
193
+ return req_port;
194
+ }
195
+ break;
196
+
197
+ case req_path:
198
+ if (isNormalUrlChar(chi)) {
199
+ return req_path;
200
+ }
201
+ switch (ch) {
202
+ case '?':
203
+ return req_query_string_start;
204
+ case '#':
205
+ return req_fragment_start;
206
+ }
207
+
208
+ break;
209
+
210
+ case req_query_string_start:
211
+ case req_query_string:
212
+ if (isNormalUrlChar(chi)) {
213
+ return req_query_string;
214
+ }
215
+
216
+ switch (ch) {
217
+ case '?':
218
+ /* allow extra '?' in query string */
219
+ return req_query_string;
220
+
221
+ case '#':
222
+ return req_fragment_start;
223
+ }
224
+
225
+ break;
226
+
227
+ case req_fragment_start:
228
+ if (isNormalUrlChar(chi)) {
229
+ return req_fragment;
230
+ }
231
+ switch (ch) {
232
+ case '?':
233
+ return req_fragment;
234
+
235
+ case '#':
236
+ return req_fragment_start;
237
+ }
238
+ break;
239
+
240
+ case req_fragment:
241
+ if (isNormalUrlChar(ch)) {
242
+ return req_fragment;
243
+ }
244
+
245
+ switch (ch) {
246
+ case '?':
247
+ case '#':
248
+ return req_fragment;
249
+ }
250
+
251
+ break;
252
+ default:
253
+ break;
254
+ }
255
+
256
+ /* We should never fall out of the switch above unless there's an error */
257
+ return dead;
258
+ }
94
259
 
95
260
  /** Execute the parser with the currently available data contained in
96
261
  * the buffer. The buffers position() and limit() need to be set
@@ -100,10 +265,9 @@ public class HTTPParser {
100
265
  public int execute(ParserSettings settings, ByteBuffer data) {
101
266
 
102
267
  int p = data.position();
103
- int p_err = p; // this is used for pretty printing errors.
268
+ this.p_start = p; // this is used for pretty printing errors.
269
+ // and returning the amount of processed bytes.
104
270
 
105
- this.start_position = p;
106
- this.data = data;
107
271
 
108
272
  // In case the headers don't provide information about the content
109
273
  // length, `execute` needs to be called with an empty buffer to
@@ -111,19 +275,19 @@ public class HTTPParser {
111
275
  // else there is no way of knowing the message is complete.
112
276
  int len = (data.limit() - data.position());
113
277
  if (0 == len) {
114
- // if (State.body_identity_eof == state) {
115
- // settings.call_on_message_complete(this);
116
- // }
278
+ // if (State.body_identity_eof == state) {
279
+ // settings.call_on_message_complete(this);
280
+ // }
117
281
  switch (state) {
118
282
  case body_identity_eof:
119
283
  settings.call_on_message_complete(this);
120
- return data.position() - start_position;
284
+ return data.position() - this.p_start;
121
285
 
122
286
  case dead:
123
- case start_res_or_res:
287
+ case start_req_or_res:
124
288
  case start_res:
125
289
  case start_req:
126
- return data.position() - start_position;
290
+ return data.position() - this.p_start;
127
291
 
128
292
  default:
129
293
  // should we really consider this an error!?
@@ -142,78 +306,81 @@ public class HTTPParser {
142
306
  case header_value:
143
307
  header_value_mark = p;
144
308
  break;
145
- case req_fragment:
146
- fragment_mark = p;
147
- url_mark = p;
148
- break;
149
- case req_query_string:
150
- query_string_mark = p;
151
- url_mark = p;
152
- break;
153
309
  case req_path:
154
- path_mark = p;
155
-
156
- case req_host:
157
310
  case req_schema:
158
311
  case req_schema_slash:
159
312
  case req_schema_slash_slash:
313
+ case req_host_start:
314
+ case req_host_v6_start:
315
+ case req_host_v6:
316
+ case req_host_v6_end:
317
+ case req_host:
318
+ case req_port_start:
160
319
  case req_port:
161
320
  case req_query_string_start:
321
+ case req_query_string:
162
322
  case req_fragment_start:
323
+ case req_fragment:
163
324
  url_mark = p;
164
325
  break;
165
326
  }
327
+ boolean reexecute = false;
328
+ int pe = 0;
329
+ byte ch = 0;
330
+ int chi = 0;
331
+ byte c = -1;
332
+ int to_read = 0;
166
333
 
167
334
  // this is where the work gets done, traverse the available data...
168
- while (data.position() != data.limit()) {
169
-
170
- p = data.position();
171
- int pe = data.limit();
172
-
173
- byte ch = data.get(); // the current character to process.
174
- int chi = ch & 0xff; // utility, ch without signedness for table lookups.
175
- byte c = -1; // utility variably used for up- and downcasing etc.
176
- int to_read = 0; // used to keep track of how much of body, etc. is left to read
177
-
178
- if (parsing_header(state)) {
179
- ++nread;
180
- if (nread > HTTP_MAX_HEADER_SIZE) {
181
- settings.call_on_error(this, "possible buffer overflow", data, p_err);
182
- return error();
335
+ while (data.position() != data.limit() || reexecute) {
336
+ // p(state + ": r: " + reexecute + " :: " +p );
337
+
338
+ if(!reexecute){
339
+ p = data.position();
340
+ pe = data.limit();
341
+ ch = data.get(); // the current character to process.
342
+ chi = ch & 0xff; // utility, ch without signedness for table lookups.
343
+ c = -1; // utility variably used for up- and downcasing etc.
344
+ to_read = 0; // used to keep track of how much of body, etc. is left to read
345
+
346
+ if (parsing_header(state)) {
347
+ ++nread;
348
+ if (nread > HTTP_MAX_HEADER_SIZE) {
349
+ return error(settings, "possible buffer overflow", data);
350
+ }
183
351
  }
184
352
  }
185
- //p(state + ":" + ch +":"+p);
353
+ reexecute = false;
354
+ // p(state + " ::: " + ch + " : " + (((CR == ch) || (LF == ch)) ? ch : ("'" + (char)ch + "'")) +": "+p );
355
+
186
356
  switch (state) {
187
357
  /*
188
358
  * this state is used after a 'Connection: close' message
189
359
  * the parser will error out if it reads another message
190
360
  */
191
361
  case dead:
192
- settings.call_on_error(this, "Connection already closed", data, p_err);
193
- return error();
362
+ if (CR == ch || LF == ch){
363
+ break;
364
+ }
365
+ return error(settings, "Connection already closed", data);
194
366
 
195
367
 
196
368
 
197
- case start_res_or_res:
369
+ case start_req_or_res:
198
370
  if (CR == ch || LF == ch){
199
371
  break;
200
372
  }
201
373
  flags = 0;
202
374
  content_length = -1;
203
375
 
204
- settings.call_on_message_begin(this);
205
-
206
- if (H == ch) {
376
+ if (H == ch) {
207
377
  state = State.res_or_resp_H;
378
+ settings.call_on_message_begin(this);
208
379
  } else {
209
- type = ParserType.HTTP_REQUEST;
210
- method = start_req_method_assign(ch);
211
- if (null == method) {
212
- settings.call_on_error(this, "invalid method", data, p_err);
213
- return error();
214
- }
215
- index = 1;
216
- state = State.req_method;
380
+ type = ParserType.HTTP_REQUEST;
381
+ state = State.start_req;
382
+ index = 1;
383
+ reexecute = true;
217
384
  }
218
385
  break;
219
386
 
@@ -225,8 +392,7 @@ public class HTTPParser {
225
392
  state = State.res_HT;
226
393
  } else {
227
394
  if (E != ch) {
228
- settings.call_on_error(this, "not E", data, p_err);
229
- return error();
395
+ return error(settings, "not E", data);
230
396
  }
231
397
  type = ParserType.HTTP_REQUEST;
232
398
  method = HTTPMethod.HTTP_HEAD;
@@ -241,8 +407,6 @@ public class HTTPParser {
241
407
  flags = 0;
242
408
  content_length = -1;
243
409
 
244
- settings.call_on_message_begin(this);
245
-
246
410
  switch(ch) {
247
411
  case H:
248
412
  state = State.res_H;
@@ -251,38 +415,35 @@ public class HTTPParser {
251
415
  case LF:
252
416
  break;
253
417
  default:
254
- settings.call_on_error(this, "Not H or CR/LF", data, p_err);
255
- return error();
418
+ return error(settings, "Not H or CR/LF", data);
256
419
  }
420
+
421
+ settings.call_on_message_begin(this);
257
422
  break;
258
423
 
259
424
 
260
425
 
261
426
  case res_H:
262
427
  if (strict && T != ch) {
263
- settings.call_on_error(this, "Not T", data, p_err);
264
- return error();
428
+ return error(settings, "Not T", data);
265
429
  }
266
430
  state = State.res_HT;
267
431
  break;
268
432
  case res_HT:
269
433
  if (strict && T != ch) {
270
- settings.call_on_error(this, "Not T2", data, p_err);
271
- return error();
434
+ return error(settings, "Not T2", data);
272
435
  }
273
436
  state = State.res_HTT;
274
437
  break;
275
438
  case res_HTT:
276
439
  if (strict && P != ch) {
277
- settings.call_on_error(this, "Not P", data, p_err);
278
- return error();
440
+ return error(settings, "Not P", data);
279
441
  }
280
442
  state = State.res_HTTP;
281
443
  break;
282
444
  case res_HTTP:
283
445
  if (strict && SLASH != ch) {
284
- settings.call_on_error(this, "Not '/'", data, p_err);
285
- return error();
446
+ return error(settings, "Not '/'", data);
286
447
  }
287
448
  state = State.res_first_http_major;
288
449
  break;
@@ -291,8 +452,7 @@ public class HTTPParser {
291
452
 
292
453
  case res_first_http_major:
293
454
  if (!isDigit(ch)) {
294
- settings.call_on_error(this, "Not a digit", data, p_err);
295
- return error();
455
+ return error(settings, "Not a digit", data);
296
456
  }
297
457
  http_major = (int) ch - 0x30;
298
458
  state = State.res_http_major;
@@ -305,23 +465,20 @@ public class HTTPParser {
305
465
  break;
306
466
  }
307
467
  if (!isDigit(ch)) {
308
- settings.call_on_error(this, "Not a digit", data, p_err);
309
- return error();
468
+ return error(settings, "Not a digit", data);
310
469
  }
311
470
  http_major *= 10;
312
471
  http_major += (ch - 0x30);
313
472
 
314
473
  if (http_major > 999) {
315
- settings.call_on_error(this, "invalid http major version: "+http_major, data, p_err);
316
- return error();
474
+ return error(settings, "invalid http major version: ", data);
317
475
  }
318
476
  break;
319
477
 
320
478
  /* first digit of minor HTTP version */
321
479
  case res_first_http_minor:
322
480
  if (!isDigit(ch)) {
323
- settings.call_on_error(this, "Not a digit", data, p_err);
324
- return error();
481
+ return error(settings, "Not a digit", data);
325
482
  }
326
483
  http_minor = (int)ch - 0x30;
327
484
  state = State.res_http_minor;
@@ -334,14 +491,12 @@ public class HTTPParser {
334
491
  break;
335
492
  }
336
493
  if (!isDigit(ch)) {
337
- settings.call_on_error(this, "Not a digit", data, p_err);
338
- return error();
494
+ return error(settings, "Not a digit", data);
339
495
  }
340
496
  http_minor *= 10;
341
497
  http_minor += (ch - 0x30);
342
498
  if (http_minor > 999) {
343
- settings.call_on_error(this, "invalid http minor version: "+http_minor, data, p_err);
344
- return error();
499
+ return error(settings, "invalid http minor version: ", data);
345
500
  }
346
501
  break;
347
502
 
@@ -352,8 +507,7 @@ public class HTTPParser {
352
507
  if (SPACE == ch) {
353
508
  break;
354
509
  }
355
- settings.call_on_error(this, "Not a digit (status code)", data, p_err);
356
- return error();
510
+ return error(settings, "Not a digit (status code)", data);
357
511
  }
358
512
  status_code = (int)ch - 0x30;
359
513
  state = State.res_status_code;
@@ -372,16 +526,14 @@ public class HTTPParser {
372
526
  state = State.header_field_start;
373
527
  break;
374
528
  default:
375
- settings.call_on_error(this, "not a valid status code", data, p_err);
376
- return error();
529
+ return error(settings, "not a valid status code", data);
377
530
  }
378
531
  break;
379
532
  }
380
533
  status_code *= 10;
381
534
  status_code += (int)ch - 0x30;
382
535
  if (status_code > 999) {
383
- settings.call_on_error(this, "ridiculous status code:"+status_code, data, p_err);
384
- return error();
536
+ return error(settings, "ridiculous status code:", data);
385
537
  }
386
538
  break;
387
539
 
@@ -402,8 +554,7 @@ public class HTTPParser {
402
554
 
403
555
  case res_line_almost_done:
404
556
  if (strict && LF != ch) {
405
- settings.call_on_error(this, "not LF", data, p_err);
406
- return error();
557
+ return error(settings, "not LF", data);
407
558
  }
408
559
  state = State.header_field_start;
409
560
  break;
@@ -416,22 +567,26 @@ public class HTTPParser {
416
567
  }
417
568
  flags = 0;
418
569
  content_length = -1;
419
- settings.call_on_message_begin(this);
570
+
571
+ if(!isAtoZ(ch)){
572
+ return error(settings, "invalid method", data);
573
+ }
574
+
420
575
  method = start_req_method_assign(ch);
421
576
  if (null == method) {
422
- settings.call_on_error(this, "invalid method", data, p_err);
423
- return error();
577
+ return error(settings, "invalid method", data);
424
578
  }
425
579
  index = 1;
426
580
  state = State.req_method;
581
+
582
+ settings.call_on_message_begin(this);
427
583
  break;
428
584
 
429
585
 
430
586
 
431
587
  case req_method:
432
588
  if (0 == ch) {
433
- settings.call_on_error(this, "NULL in method", data, p_err);
434
- return error();
589
+ return error(settings, "NULL in method", data);
435
590
  }
436
591
 
437
592
  byte [] arr = method.bytes;
@@ -456,17 +611,28 @@ public class HTTPParser {
456
611
  } else if (2 == index && A == ch) {
457
612
  method = HTTPMethod.HTTP_MKACTIVITY;
458
613
  }
459
- } else if (1 == index && HTTPMethod.HTTP_POST == method && R == ch) {
460
- method = HTTPMethod.HTTP_PROPFIND;
461
- } else if (1 == index && HTTPMethod.HTTP_POST == method && U == ch) {
462
- method = HTTPMethod.HTTP_PUT;
463
- } else if (2 == index && HTTPMethod.HTTP_UNLOCK == method && S == ch) {
464
- method = HTTPMethod.HTTP_UNSUBSCRIBE;
465
- } else if (4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch) {
614
+ } else if (1 == index && HTTPMethod.HTTP_POST == method) {
615
+ if(R == ch) {
616
+ method = HTTPMethod.HTTP_PROPFIND; /* or HTTP_PROPPATCH */
617
+ }else if(U == ch){
618
+ method = HTTPMethod.HTTP_PUT; /* or HTTP_PURGE */
619
+ }else if(A == ch){
620
+ method = HTTPMethod.HTTP_PATCH;
621
+ }
622
+ } else if (2 == index) {
623
+ if(HTTPMethod.HTTP_PUT == method) {
624
+ if(R == ch){
625
+ method = HTTPMethod.HTTP_PURGE;
626
+ }
627
+ }else if(HTTPMethod.HTTP_UNLOCK == method){
628
+ if(S == ch){
629
+ method = HTTPMethod.HTTP_UNSUBSCRIBE;
630
+ }
631
+ }
632
+ }else if(4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch){
466
633
  method = HTTPMethod.HTTP_PROPPATCH;
467
634
  } else {
468
- settings.call_on_error(this, "Invalid HTTP method", data, p_err);
469
- return error();
635
+ return error(settings, "Invalid HTTP method", data);
470
636
  }
471
637
 
472
638
  ++index;
@@ -479,324 +645,68 @@ public class HTTPParser {
479
645
  if (SPACE == ch) {
480
646
  break;
481
647
  }
482
- if (SLASH == ch || STAR == ch) {
483
- url_mark = p;
484
- path_mark = p;
485
- state = State.req_path;
486
- break;
487
- }
488
- if (isAtoZ(ch)) {
489
- url_mark = p;
490
- state = State.req_schema;
491
- break;
492
- }
493
- settings.call_on_error(this, "Invalid something", data, p_err);
494
- return error();
495
-
496
- case req_schema:
497
- if (isAtoZ(ch)){
498
- break;
499
- }
500
- if (COLON == ch) {
501
- state = State.req_schema_slash;
502
- break;
503
- } else if (DOT == ch || isDigit(ch)) {
504
- state = State.req_host;
505
- break;
648
+ url_mark = p;
649
+ if(HTTPMethod.HTTP_CONNECT == method){
650
+ state = req_host_start;
506
651
  }
507
- settings.call_on_error(this, "invalid char in schema: "+ch, data, p_err);
508
- return error();
509
652
 
510
- case req_schema_slash:
511
- if (strict && SLASH != ch) {
512
- settings.call_on_error(this, "invalid char in schema, not /", data, p_err);
513
- return error();
653
+ state = parse_url_char(ch);
654
+ if(state == dead){
655
+ return error(settings, "Invalid something", data);
514
656
  }
515
- state = State.req_schema_slash_slash;
516
657
  break;
517
658
 
659
+
660
+ case req_schema:
661
+ case req_schema_slash:
518
662
  case req_schema_slash_slash:
519
- if (strict && SLASH != ch) {
520
- settings.call_on_error(this, "invalid char in schema, not /", data, p_err);
521
- return error();
522
- }
523
- state = State.req_host;
524
- break;
525
-
526
- case req_host:
527
- if (isAtoZ(ch)) {
528
- break;
529
- }
530
- if (isDigit(ch) || DOT == ch || DASH == ch) break;
663
+ case req_host_start:
664
+ case req_host_v6_start:
665
+ case req_host_v6:
666
+ case req_port_start:
531
667
  switch (ch) {
532
- case COLON:
533
- state = State.req_port;
534
- break;
535
- case SLASH:
536
- path_mark = p;
537
- break;
668
+ /* No whitespace allowed here */
538
669
  case SPACE:
539
- /* The request line looks like:
540
- * "GET http://foo.bar.com HTTP/1.1"
541
- * That is, there is no path.
542
- */
543
- settings.call_on_url(this, data, url_mark, p-url_mark);
544
- url_mark = -1;
545
- state = State.req_http_start;
546
- break;
547
- case QMARK:
548
- state = State.req_query_string_start;
549
- break;
670
+ case CR:
671
+ case LF:
672
+ return error(settings, "unexpected char in path", data);
550
673
  default:
551
- settings.call_on_error(this, "host error in method line", data, p_err);
552
- return error();
674
+ state = parse_url_char(ch);
675
+ if(dead == state){
676
+ return error(settings, "unexpected char in path", data);
677
+ }
553
678
  }
554
679
  break;
555
680
 
681
+ case req_host:
682
+ case req_host_v6_end:
556
683
  case req_port:
557
- if (isDigit(ch)) break;
558
- switch (ch) {
559
- case SLASH:
560
- path_mark = p;
561
- state = State.req_path;
562
- break;
563
- case SPACE:
564
- /* The request line looks like:
565
- * "GET http://foo.bar.com:1234 HTTP/1.1"
566
- * That is, there is no path.
567
- */
568
- settings.call_on_url(this,data,url_mark,p-url_mark);
569
- url_mark = -1;
570
- state = State.req_http_start;
571
- break;
572
- case QMARK:
573
- state = State.req_query_string_start;
574
- break;
575
- default:
576
- settings.call_on_error(this, "invalid port", data, p_err);
577
- return error();
578
- }
579
- break;
580
-
581
684
  case req_path:
582
- if (normal_url_char[chi]) break;
583
- switch (ch) {
584
- case SPACE:
585
- settings.call_on_url(this,data,url_mark, p-url_mark);
586
- url_mark = -1;
587
-
588
- settings.call_on_path(this,data,path_mark, p-path_mark);
589
- path_mark = -1;
590
-
591
- state = State.req_http_start;
592
- break;
593
-
594
- case CR:
595
- settings.call_on_url(this,data,url_mark, p-url_mark);
596
- url_mark = -1;
597
-
598
- settings.call_on_path(this,data,path_mark, p-path_mark);
599
- path_mark = -1;
600
-
601
- http_minor = 9;
602
- state = State.res_line_almost_done;
603
- break;
604
-
605
- case LF:
606
- settings.call_on_url(this,data,url_mark, p-url_mark);
607
- url_mark = -1;
608
-
609
- settings.call_on_path(this,data,path_mark, p-path_mark);
610
- path_mark = -1;
611
-
612
- http_minor = 9;
613
- state = State.header_field_start;
614
- break;
615
-
616
- case QMARK:
617
- settings.call_on_path(this,data,path_mark, p-path_mark);
618
- path_mark = -1;
619
-
620
- state = State.req_query_string_start;
621
- break;
622
-
623
- case HASH:
624
- settings.call_on_path(this,data,path_mark, p-path_mark);
625
- path_mark = -1;
626
-
627
- state = State.req_fragment_start;
628
- break;
629
-
630
- default:
631
- settings.call_on_error(this, "unexpected char in path", data, p_err);
632
- return error();
633
- }
634
- break;
635
-
636
685
  case req_query_string_start:
637
- if (normal_url_char[chi]) {
638
- query_string_mark = p;
639
- state = State.req_query_string;
640
- break;
641
- }
642
-
643
- switch (ch) {
644
- case QMARK: break;
645
- case SPACE:
646
- settings.call_on_url(this, data, url_mark, p-url_mark);
647
- url_mark = -1;
648
- state = State.req_http_start;
649
- break;
650
- case CR:
651
- settings.call_on_url(this,data,url_mark, p-url_mark);
652
- url_mark = -1;
653
- http_minor = 9;
654
- state = State.res_line_almost_done;
655
- break;
656
- case LF:
657
- settings.call_on_url(this,data,url_mark, p-url_mark);
658
- url_mark = -1;
659
- http_minor = 9;
660
- state = State.header_field_start;
661
- break;
662
- case HASH:
663
- state = State.req_fragment_start;
664
- break;
665
- default:
666
- settings.call_on_error(this, "unexpected char in path", data, p_err);
667
- return error();
668
- }
669
- break;
670
-
671
686
  case req_query_string:
672
- if (normal_url_char[chi]) {
673
- break;
674
- }
675
-
676
- switch (ch) {
677
- case QMARK: break; // allow extra '?' in query string
678
- case SPACE:
679
- settings.call_on_url(this, data, url_mark, p-url_mark);
680
- url_mark = -1;
681
-
682
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
683
- query_string_mark = -1;
684
-
685
- state = State.req_http_start;
686
- break;
687
- case CR:
688
- settings.call_on_url(this,data,url_mark, p-url_mark);
689
- url_mark = -1;
690
-
691
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
692
- query_string_mark = -1;
693
-
694
- http_minor = 9;
695
- state = State.res_line_almost_done;
696
- break;
697
- case LF:
698
- settings.call_on_url(this,data,url_mark, p-url_mark);
699
- url_mark = -1;
700
-
701
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
702
- query_string_mark = -1;
703
- http_minor = 9;
704
-
705
- state = State.header_field_start;
706
- break;
707
- case HASH:
708
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
709
- query_string_mark = -1;
710
-
711
- state = State.req_fragment_start;
712
- break;
713
- default:
714
- settings.call_on_error(this, "unexpected char in path", data, p_err);
715
- return error();
716
- }
717
- break;
718
-
719
687
  case req_fragment_start:
720
- if (normal_url_char[chi]) {
721
- fragment_mark = p;
722
- state = State.req_fragment;
723
- break;
724
- }
725
-
726
- switch (ch) {
727
- case SPACE:
728
- settings.call_on_url(this, data, url_mark, p-url_mark);
729
- url_mark = -1;
730
-
731
- state = State.req_http_start;
732
- break;
733
- case CR:
734
- settings.call_on_url(this,data,url_mark, p-url_mark);
735
- url_mark = -1;
736
-
737
- http_minor = 9;
738
- state = State.res_line_almost_done;
739
- break;
740
- case LF:
741
- settings.call_on_url(this,data,url_mark, p-url_mark);
742
- url_mark = -1;
743
-
744
- http_minor = 9;
745
- state = State.header_field_start;
746
- break;
747
- case QMARK:
748
- fragment_mark = p;
749
- state = State.req_fragment;
750
- break;
751
- case HASH:
752
- break;
753
- default:
754
- settings.call_on_error(this, "unexpected char in path", data, p_err);
755
- return error();
756
- }
757
- break;
758
-
759
688
  case req_fragment:
760
- if (normal_url_char[chi]) {
761
- break;
762
- }
763
-
764
689
  switch (ch) {
765
690
  case SPACE:
766
691
  settings.call_on_url(this, data, url_mark, p-url_mark);
692
+ settings.call_on_path(this, data, url_mark, p - url_mark);
767
693
  url_mark = -1;
768
-
769
- settings.call_on_fragment(this, data, fragment_mark, p-fragment_mark);
770
- fragment_mark = -1;
771
-
772
694
  state = State.req_http_start;
773
695
  break;
774
696
  case CR:
775
- settings.call_on_url(this,data,url_mark, p-url_mark);
776
- url_mark = -1;
777
-
778
- settings.call_on_fragment(this, data, query_string_mark, p-query_string_mark);
779
- fragment_mark = -1;
780
-
781
- http_minor = 9;
782
- state = State.res_line_almost_done;
783
- break;
784
697
  case LF:
785
- settings.call_on_url(this,data,url_mark, p-url_mark);
786
- url_mark = -1;
787
-
788
- settings.call_on_fragment(this, data, query_string_mark, p-query_string_mark);
789
- fragment_mark = -1;
790
-
698
+ http_major = 0;
791
699
  http_minor = 9;
792
- state = State.header_field_start;
793
- break;
794
- case QMARK:
795
- case HASH:
700
+ state = (CR == ch) ? req_line_almost_done : header_field_start;
701
+ settings.call_on_url(this, data, url_mark, p-url_mark); //TODO check params!!!
702
+ settings.call_on_path(this, data, url_mark, p-url_mark);
703
+ url_mark = -1;
796
704
  break;
797
705
  default:
798
- settings.call_on_error(this, "unexpected char in path", data, p_err);
799
- return error();
706
+ state = parse_url_char(ch);
707
+ if(dead == state){
708
+ return error(settings, "unexpected char in path", data);
709
+ }
800
710
  }
801
711
  break;
802
712
  /******************* URL *******************/
@@ -812,39 +722,34 @@ public class HTTPParser {
812
722
  case SPACE:
813
723
  break;
814
724
  default:
815
- settings.call_on_error(this, "error in req_http_H", data, p_err);
816
- return error();
725
+ return error(settings, "error in req_http_H", data);
817
726
  }
818
727
  break;
819
728
 
820
729
  case req_http_H:
821
730
  if (strict && T != ch) {
822
- settings.call_on_error(this, "unexpected char", data, p_err);
823
- return error();
731
+ return error(settings, "unexpected char", data);
824
732
  }
825
733
  state = State.req_http_HT;
826
734
  break;
827
735
 
828
736
  case req_http_HT:
829
737
  if (strict && T != ch) {
830
- settings.call_on_error(this, "unexpected char", data, p_err);
831
- return error();
738
+ return error(settings, "unexpected char", data);
832
739
  }
833
740
  state = State.req_http_HTT;
834
741
  break;
835
742
 
836
743
  case req_http_HTT:
837
744
  if (strict && P != ch) {
838
- settings.call_on_error(this, "unexpected char", data, p_err);
839
- return error();
745
+ return error(settings, "unexpected char", data);
840
746
  }
841
747
  state = State.req_http_HTTP;
842
748
  break;
843
749
 
844
750
  case req_http_HTTP:
845
751
  if (strict && SLASH != ch) {
846
- settings.call_on_error(this, "unexpected char", data, p_err);
847
- return error();
752
+ return error(settings, "unexpected char", data);
848
753
  }
849
754
  state = req_first_http_major;
850
755
  break;
@@ -852,8 +757,7 @@ public class HTTPParser {
852
757
  /* first digit of major HTTP version */
853
758
  case req_first_http_major:
854
759
  if (!isDigit(ch)) {
855
- settings.call_on_error(this, "non digit in http major", data, p_err);
856
- return error();
760
+ return error(settings, "non digit in http major", data);
857
761
  }
858
762
  http_major = (int)ch - 0x30;
859
763
  state = State.req_http_major;
@@ -867,24 +771,21 @@ public class HTTPParser {
867
771
  }
868
772
 
869
773
  if (!isDigit(ch)) {
870
- settings.call_on_error(this, "non digit in http major", data, p_err);
871
- return error();
774
+ return error(settings, "non digit in http major", data);
872
775
  }
873
776
 
874
777
  http_major *= 10;
875
778
  http_major += (int)ch - 0x30;
876
779
 
877
780
  if (http_major > 999) {
878
- settings.call_on_error(this, "ridiculous http major", data, p_err);
879
- return error();
781
+ return error(settings, "ridiculous http major", data);
880
782
  };
881
783
  break;
882
784
 
883
785
  /* first digit of minor HTTP version */
884
786
  case req_first_http_minor:
885
787
  if (!isDigit(ch)) {
886
- settings.call_on_error(this, "non digit in http minor", data, p_err);
887
- return error();
788
+ return error(settings, "non digit in http minor", data);
888
789
  }
889
790
  http_minor = (int)ch - 0x30;
890
791
  state = State.req_http_minor;
@@ -904,8 +805,7 @@ public class HTTPParser {
904
805
  /* XXX allow spaces after digit? */
905
806
 
906
807
  if (!isDigit(ch)) {
907
- settings.call_on_error(this, "non digit in http minor", data, p_err);
908
- return error();
808
+ return error(settings, "non digit in http minor", data);
909
809
  }
910
810
 
911
811
  http_minor *= 10;
@@ -913,8 +813,7 @@ public class HTTPParser {
913
813
 
914
814
 
915
815
  if (http_minor > 999) {
916
- settings.call_on_error(this, "ridiculous http minor", data, p_err);
917
- return error();
816
+ return error(settings, "ridiculous http minor", data);
918
817
  };
919
818
 
920
819
  break;
@@ -923,10 +822,9 @@ public class HTTPParser {
923
822
  case req_line_almost_done:
924
823
  {
925
824
  if (ch != LF) {
926
- settings.call_on_error(this, "missing LF after request line", data, p_err);
927
- return error();
825
+ return error(settings, "missing LF after request line", data);
928
826
  }
929
- state = State.header_field_start;
827
+ state = header_field_start;
930
828
  break;
931
829
  }
932
830
 
@@ -938,7 +836,7 @@ public class HTTPParser {
938
836
  case header_field_start:
939
837
  {
940
838
  if (ch == CR) {
941
- state = State.headers_almost_done;
839
+ state = headers_almost_done;
942
840
  break;
943
841
  }
944
842
 
@@ -946,22 +844,15 @@ public class HTTPParser {
946
844
  /* they might be just sending \n instead of \r\n so this would be
947
845
  * the second \n to denote the end of headers*/
948
846
  state = State.headers_almost_done;
949
- if (!headers_almost_done(ch, settings)) {
950
- settings.call_on_error(this, "header not properly completed", data, p_err);
951
- return error();
952
- }
953
- if (upgrade) {
954
- return data.position() - start_position;
955
- }
847
+ reexecute = true;
956
848
  break;
957
849
  }
958
850
 
959
851
  c = token(ch);
960
852
 
961
853
  if (0 == c) {
962
- settings.call_on_error(this, "invalid char in header:"+c, data, p_err);
963
- return error();
964
- };
854
+ return error(settings, "invalid char in header:", data);
855
+ }
965
856
 
966
857
  header_field_mark = p;
967
858
 
@@ -1090,8 +981,7 @@ public class HTTPParser {
1090
981
  break;
1091
982
 
1092
983
  default:
1093
- settings.call_on_error(this, "Unknown Header State", data, p_err);
1094
- return error();
984
+ return error(settings, "Unknown Header State", data);
1095
985
  } // switch: header_state
1096
986
  break;
1097
987
  } // 0 != c
@@ -1120,15 +1010,14 @@ public class HTTPParser {
1120
1010
  break;
1121
1011
  }
1122
1012
 
1123
- settings.call_on_error(this, "invalid header field", data, p_err);
1124
- return error();
1013
+ return error(settings, "invalid header field", data);
1125
1014
  }
1126
1015
 
1127
1016
 
1128
1017
 
1129
1018
  case header_value_start:
1130
1019
  {
1131
- if (SPACE == ch) break;
1020
+ if ((SPACE == ch) || (TAB == ch)) break;
1132
1021
 
1133
1022
  header_value_mark = p;
1134
1023
 
@@ -1173,8 +1062,7 @@ public class HTTPParser {
1173
1062
 
1174
1063
  case content_length:
1175
1064
  if (!isDigit(ch)) {
1176
- settings.call_on_error(this, "Content-Length not numeric", data, p_err);
1177
- return error();
1065
+ return error(settings, "Content-Length not numeric", data);
1178
1066
  }
1179
1067
  content_length = (int)ch - 0x30;
1180
1068
  break;
@@ -1214,11 +1102,8 @@ public class HTTPParser {
1214
1102
  if (LF == ch) {
1215
1103
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1216
1104
  header_value_mark = -1;
1217
-
1218
- if (!header_almost_done(ch)) {
1219
- settings.call_on_error(this,"incorrect header ending, expection LF", data, p_err);
1220
- return error();
1221
- }
1105
+ state = header_almost_done;
1106
+ reexecute = true;
1222
1107
  break;
1223
1108
  }
1224
1109
 
@@ -1229,20 +1114,26 @@ public class HTTPParser {
1229
1114
 
1230
1115
  case connection:
1231
1116
  case transfer_encoding:
1232
- settings.call_on_error(this, "Shouldn't be here", data, p_err);
1233
- return error();
1117
+ return error(settings, "Shouldn't be here", data);
1234
1118
 
1235
1119
  case content_length:
1236
1120
  if (SPACE == ch) {
1237
1121
  break;
1238
1122
  }
1239
1123
  if (!isDigit(ch)) {
1240
- settings.call_on_error(this, "Content-Length not numeric", data, p_err);
1241
- return error();
1124
+ return error(settings, "Content-Length not numeric", data);
1242
1125
  }
1243
1126
 
1244
- content_length *= 10;
1245
- content_length += (int)ch - 0x30;
1127
+ long t = content_length;
1128
+ t *= 10;
1129
+ t += (long)ch - 0x30;
1130
+
1131
+ /* Overflow? */
1132
+ // t will wrap and become negative ...
1133
+ if (t < content_length) {
1134
+ return error(settings, "Invalid content length", data);
1135
+ }
1136
+ content_length = t;
1246
1137
  break;
1247
1138
 
1248
1139
  /* Transfer-Encoding: chunked */
@@ -1293,21 +1184,121 @@ public class HTTPParser {
1293
1184
 
1294
1185
  case header_almost_done:
1295
1186
  if (!header_almost_done(ch)) {
1296
- settings.call_on_error(this,"incorrect header ending, expection LF", data, p_err);
1297
- return error();
1187
+ return error(settings, "incorrect header ending, expecting LF", data);
1188
+ }
1189
+ break;
1190
+
1191
+ case header_value_lws:
1192
+ if (SPACE == ch || TAB == ch ){
1193
+ state = header_value_start;
1194
+ } else {
1195
+ state = header_field_start;
1196
+ reexecute = true;
1298
1197
  }
1299
1198
  break;
1300
1199
 
1301
1200
  case headers_almost_done:
1302
- if (!headers_almost_done(ch, settings)) {
1303
- settings.call_on_error(this, "header not properly completed", data, p_err);
1304
- return error();
1201
+ if (LF != ch) {
1202
+ return error(settings, "header not properly completed", data);
1203
+ }
1204
+ if (0 != (flags & F_TRAILING)) {
1205
+ /* End of a chunked request */
1206
+ state = new_message();
1207
+ settings.call_on_headers_complete(this);
1208
+ settings.call_on_message_complete(this);
1209
+ break;
1305
1210
  }
1211
+
1212
+ state = headers_done;
1213
+
1214
+ if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1215
+ upgrade = true;
1216
+ }
1217
+
1218
+ /* Here we call the headers_complete callback. This is somewhat
1219
+ * different than other callbacks because if the user returns 1, we
1220
+ * will interpret that as saying that this message has no body. This
1221
+ * is needed for the annoying case of receiving a response to a HEAD
1222
+ * request.
1223
+ */
1224
+
1225
+ /* (responses to HEAD request contain a CONTENT-LENGTH header
1226
+ * but no content)
1227
+ *
1228
+ * Consider what to do here: I don't like the idea of the callback
1229
+ * interface having a different contract in the case of HEAD
1230
+ * responses. The alternatives would be either to:
1231
+ *
1232
+ * a.) require the header_complete callback to implement a different
1233
+ * interface or
1234
+ *
1235
+ * b.) provide an overridden execute(bla, bla, boolean
1236
+ * parsingHeader) implementation ...
1237
+ */
1238
+
1239
+ // /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1240
+ // if (null != settings.on_headers_complete) {
1241
+ // settings.call_on_headers_complete(this);
1242
+ // //return;
1243
+ // }
1244
+
1245
+ if (null != settings.on_headers_complete) {
1246
+ switch (settings.on_headers_complete.cb(this)) {
1247
+ case 0:
1248
+ break;
1249
+
1250
+ case 1:
1251
+ flags |= F_SKIPBODY;
1252
+ break;
1253
+
1254
+ default:
1255
+ return error(settings, "HPE_CB_headers_complete", data); /* Error */
1256
+ }
1257
+ }
1258
+ reexecute = true;
1259
+ break;
1260
+
1261
+ case headers_done:
1262
+ if (strict && (LF != ch)) {
1263
+ return error(settings, "STRICT CHECK", data); //TODO correct error msg
1264
+ }
1265
+
1266
+ nread = 0;
1267
+
1268
+ // Exit, the rest of the connect is in a different protocol.
1306
1269
  if (upgrade) {
1307
- return data.position()-start_position ;
1270
+ state = new_message();
1271
+ settings.call_on_message_complete(this);
1272
+ return data.position()-this.p_start;
1273
+ }
1274
+
1275
+ if (0 != (flags & F_SKIPBODY)) {
1276
+ state = new_message();
1277
+ settings.call_on_message_complete(this);
1278
+ } else if (0 != (flags & F_CHUNKED)) {
1279
+ /* chunked encoding - ignore Content-Length header */
1280
+ state = State.chunk_size_start;
1281
+ } else {
1282
+ if (content_length == 0) {
1283
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1284
+ state = new_message();
1285
+ settings.call_on_message_complete(this);
1286
+ } else if (content_length != -1) {
1287
+ /* Content-Length header given and non-zero */
1288
+ state = State.body_identity;
1289
+ } else {
1290
+ if (type == ParserType.HTTP_REQUEST || !http_message_needs_eof()) {
1291
+ /* Assume content-length 0 - read the next */
1292
+ state = new_message();
1293
+ settings.call_on_message_complete(this);
1294
+ } else {
1295
+ /* Read body until EOF */
1296
+ state = State.body_identity_eof;
1297
+ }
1298
+ }
1308
1299
  }
1309
- break;
1310
1300
 
1301
+ break;
1311
1302
  /******************* Header *******************/
1312
1303
 
1313
1304
 
@@ -1315,15 +1306,18 @@ public class HTTPParser {
1315
1306
 
1316
1307
  /******************* Body *******************/
1317
1308
  case body_identity:
1318
- to_read = min(pe - p, content_length); //TODO change to use buffer?
1309
+ to_read = min(pe - p, content_length); //TODO change to use buffer?
1310
+ body_mark = p;
1319
1311
 
1320
1312
  if (to_read > 0) {
1321
- settings.call_on_body(this, data, p, to_read);
1313
+ settings.call_on_body(this, data, p, to_read);
1322
1314
  data.position(p+to_read);
1323
1315
  content_length -= to_read;
1316
+
1324
1317
  if (content_length == 0) {
1325
- settings.call_on_message_complete(this);
1326
- state = new_message();
1318
+ state = message_done;
1319
+ p += to_read;
1320
+ reexecute = true;
1327
1321
  }
1328
1322
  }
1329
1323
  break;
@@ -1333,10 +1327,15 @@ public class HTTPParser {
1333
1327
  case body_identity_eof:
1334
1328
  to_read = pe - p; // TODO change to use buffer ?
1335
1329
  if (to_read > 0) {
1336
- settings.call_on_body(this, data, p, to_read);
1330
+ settings.call_on_body(this, data, p, to_read);
1337
1331
  data.position(p+to_read);
1338
1332
  }
1339
1333
  break;
1334
+
1335
+ case message_done:
1336
+ state = new_message();
1337
+ settings.call_on_message_complete(this);
1338
+ break;
1340
1339
  /******************* Body *******************/
1341
1340
 
1342
1341
 
@@ -1344,19 +1343,16 @@ public class HTTPParser {
1344
1343
  /******************* Chunk *******************/
1345
1344
  case chunk_size_start:
1346
1345
  if (1 != this.nread) {
1347
- settings.call_on_error(this, "nread != 1 (chunking)", data, p_err);
1348
- return error();
1346
+ return error(settings, "nread != 1 (chunking)", data);
1349
1347
 
1350
1348
  }
1351
1349
  if (0 == (flags & F_CHUNKED)) {
1352
- settings.call_on_error(this, "not chunked", data, p_err);
1353
- return error();
1350
+ return error(settings, "not chunked", data);
1354
1351
  }
1355
1352
 
1356
1353
  c = UNHEX[chi];
1357
1354
  if (c == -1) {
1358
- settings.call_on_error(this, "invalid hex char in chunk content length", data, p_err);
1359
- return error();
1355
+ return error(settings, "invalid hex char in chunk content length", data);
1360
1356
  }
1361
1357
  content_length = c;
1362
1358
  state = State.chunk_size;
@@ -1366,8 +1362,7 @@ public class HTTPParser {
1366
1362
 
1367
1363
  case chunk_size:
1368
1364
  if (0 == (flags & F_CHUNKED)) {
1369
- settings.call_on_error(this, "not chunked", data, p_err);
1370
- return error();
1365
+ return error(settings, "not chunked", data);
1371
1366
  }
1372
1367
 
1373
1368
  if (CR == ch) {
@@ -1382,20 +1377,23 @@ public class HTTPParser {
1382
1377
  state = State.chunk_parameters;
1383
1378
  break;
1384
1379
  }
1385
- settings.call_on_error(this, "invalid hex char in chunk content length", data, p_err);
1386
- return error();
1380
+ return error(settings, "invalid hex char in chunk content length", data);
1387
1381
  }
1388
-
1389
- content_length *= 16;
1390
- content_length += c;
1382
+ long t = content_length;
1383
+
1384
+ t *= 16;
1385
+ t += c;
1386
+ if(t < content_length){
1387
+ return error(settings, "invalid content length", data);
1388
+ }
1389
+ content_length = t;
1391
1390
  break;
1392
1391
 
1393
1392
 
1394
1393
 
1395
1394
  case chunk_parameters:
1396
1395
  if (0 == (flags & F_CHUNKED)) {
1397
- settings.call_on_error(this, "not chunked", data, p_err);
1398
- return error();
1396
+ return error(settings, "not chunked", data);
1399
1397
  }
1400
1398
  /* just ignore this shit. TODO check for overflow */
1401
1399
  if (CR == ch) {
@@ -1408,12 +1406,10 @@ public class HTTPParser {
1408
1406
 
1409
1407
  case chunk_size_almost_done:
1410
1408
  if (0 == (flags & F_CHUNKED)) {
1411
- settings.call_on_error(this, "not chunked", data, p_err);
1412
- return error();
1409
+ return error(settings, "not chunked", data);
1413
1410
  }
1414
1411
  if (strict && LF != ch) {
1415
- settings.call_on_error(this, "expected LF at end of chunk size", data, p_err);
1416
- return error();
1412
+ return error(settings, "expected LF at end of chunk size", data);
1417
1413
  }
1418
1414
 
1419
1415
  this.nread = 0;
@@ -1429,10 +1425,9 @@ public class HTTPParser {
1429
1425
 
1430
1426
 
1431
1427
  case chunk_data:
1432
- {
1428
+ //TODO Apply changes from C version for s_chunk_data
1433
1429
  if (0 == (flags & F_CHUNKED)) {
1434
- settings.call_on_error(this, "not chunked", data, p_err);
1435
- return error();
1430
+ return error(settings, "not chunked", data);
1436
1431
  }
1437
1432
 
1438
1433
  to_read = min(pe-p, content_length);
@@ -1447,32 +1442,29 @@ public class HTTPParser {
1447
1442
 
1448
1443
  content_length -= to_read;
1449
1444
  break;
1450
- }
1451
1445
 
1452
1446
 
1453
1447
 
1454
1448
  case chunk_data_almost_done:
1455
1449
  if (0 == (flags & F_CHUNKED)) {
1456
- settings.call_on_error(this, "not chunked", data, p_err);
1457
- return error();
1450
+ return error(settings, "not chunked", data);
1458
1451
  }
1459
1452
  if (strict && CR != ch) {
1460
- settings.call_on_error(this, "chunk data terminated incorrectly, expected CR", data, p_err);
1461
- return error();
1453
+ return error(settings, "chunk data terminated incorrectly, expected CR", data);
1462
1454
  }
1463
1455
  state = State.chunk_data_done;
1456
+ //TODO CALLBACK_DATA(body)
1457
+ // settings.call_on_body(this, data,p,?);
1464
1458
  break;
1465
1459
 
1466
1460
 
1467
1461
 
1468
1462
  case chunk_data_done:
1469
1463
  if (0 == (flags & F_CHUNKED)) {
1470
- settings.call_on_error(this, "not chunked", data, p_err);
1471
- return error();
1464
+ return error(settings, "not chunked", data);
1472
1465
  }
1473
1466
  if (strict && LF != ch) {
1474
- settings.call_on_error(this, "chunk data terminated incorrectly, expected LF", data, p_err);
1475
- return error();
1467
+ return error(settings, "chunk data terminated incorrectly, expected LF", data);
1476
1468
  }
1477
1469
  state = State.chunk_size_start;
1478
1470
  break;
@@ -1481,8 +1473,7 @@ public class HTTPParser {
1481
1473
 
1482
1474
 
1483
1475
  default:
1484
- settings.call_on_error(this, "unhandled state", data, p_err);
1485
- return error();
1476
+ return error(settings, "unhandled state", data);
1486
1477
 
1487
1478
  } // switch
1488
1479
  } // while
@@ -1495,17 +1486,34 @@ public class HTTPParser {
1495
1486
 
1496
1487
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1497
1488
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1498
- settings.call_on_fragment (this, data, fragment_mark, p-fragment_mark);
1499
- settings.call_on_query_string(this, data, query_string_mark, p-query_string_mark);
1500
- settings.call_on_path (this, data, path_mark, p-path_mark);
1501
1489
  settings.call_on_url (this, data, url_mark, p-url_mark);
1490
+ settings.call_on_path (this, data, url_mark, p-url_mark);
1502
1491
 
1503
- return data.position()-start_position;
1492
+ return data.position()-this.p_start;
1504
1493
  } // execute
1505
1494
 
1506
- int error () {
1495
+ int error (ParserSettings settings, String mes, ByteBuffer data) {
1496
+ settings.call_on_error(this, mes, data, this.p_start);
1507
1497
  this.state = State.dead;
1508
- return this.data.position()-start_position;
1498
+ return data.position()-this.p_start;
1499
+ }
1500
+
1501
+ public boolean http_message_needs_eof() {
1502
+ if(type == ParserType.HTTP_REQUEST){
1503
+ return false;
1504
+ }
1505
+ /* See RFC 2616 section 4.4 */
1506
+ if ((status_code / 100 == 1) || /* 1xx e.g. Continue */
1507
+ (status_code == 204) || /* No Content */
1508
+ (status_code == 304) || /* Not Modified */
1509
+ (flags & F_SKIPBODY) != 0) { /* response to a HEAD request */
1510
+ return false;
1511
+ }
1512
+ if ((flags & F_CHUNKED) != 0 || content_length != -1) {
1513
+ return false;
1514
+ }
1515
+
1516
+ return true;
1509
1517
  }
1510
1518
 
1511
1519
  /* If http_should_keep_alive() in the on_headers_complete or
@@ -1519,19 +1527,153 @@ public class HTTPParser {
1519
1527
  /* HTTP/1.1 */
1520
1528
  if ( 0 != (flags & F_CONNECTION_CLOSE) ) {
1521
1529
  return false;
1522
- } else {
1523
- return true;
1524
1530
  }
1525
1531
  } else {
1526
1532
  /* HTTP/1.0 or earlier */
1527
- if ( 0 != (flags & F_CONNECTION_KEEP_ALIVE) ) {
1528
- return true;
1529
- } else {
1533
+ if ( 0 == (flags & F_CONNECTION_KEEP_ALIVE) ) {
1530
1534
  return false;
1531
1535
  }
1532
1536
  }
1537
+ return !http_message_needs_eof();
1538
+ }
1539
+
1540
+ public int parse_url(ByteBuffer data, boolean is_connect, HTTPParserUrl u) {
1541
+
1542
+ UrlFields uf = UrlFields.UF_MAX;
1543
+ UrlFields old_uf = UrlFields.UF_MAX;
1544
+ u.port = 0;
1545
+ u.field_set = 0;
1546
+ state = (is_connect ? State.req_host_start : State.req_spaces_before_url);
1547
+ int p_init = data.position();
1548
+ int p = 0;
1549
+ byte ch = 0;
1550
+ while (data.position() != data.limit()) {
1551
+ p = data.position();
1552
+ ch = data.get();
1553
+ state = parse_url_char(ch);
1554
+ switch(state) {
1555
+ case dead:
1556
+ return 1;
1557
+
1558
+ /* Skip delimeters */
1559
+ case req_schema_slash:
1560
+ case req_schema_slash_slash:
1561
+ case req_host_start:
1562
+ case req_host_v6_start:
1563
+ case req_host_v6_end:
1564
+ case req_port_start:
1565
+ case req_query_string_start:
1566
+ case req_fragment_start:
1567
+ continue;
1568
+
1569
+ case req_schema:
1570
+ uf = UrlFields.UF_SCHEMA;
1571
+ break;
1572
+
1573
+ case req_host:
1574
+ case req_host_v6:
1575
+ uf = UrlFields.UF_HOST;
1576
+ break;
1577
+
1578
+ case req_port:
1579
+ uf = UrlFields.UF_PORT;
1580
+ break;
1581
+
1582
+ case req_path:
1583
+ uf = UrlFields.UF_PATH;
1584
+ break;
1585
+
1586
+ case req_query_string:
1587
+ uf = UrlFields.UF_QUERY;
1588
+ break;
1589
+
1590
+ case req_fragment:
1591
+ uf = UrlFields.UF_FRAGMENT;
1592
+ break;
1593
+
1594
+ default:
1595
+ return 1;
1596
+ }
1597
+ /* Nothing's changed; soldier on */
1598
+ if (uf == old_uf) {
1599
+ u.field_data[uf.getIndex()].len++;
1600
+ continue;
1601
+ }
1602
+
1603
+ u.field_data[uf.getIndex()].off = p - p_init;
1604
+ u.field_data[uf.getIndex()].len = 1;
1605
+
1606
+ u.field_set |= (1 << uf.getIndex());
1607
+ old_uf = uf;
1608
+
1609
+ }
1610
+
1611
+ /* CONNECT requests can only contain "hostname:port" */
1612
+ if (is_connect && u.field_set != ((1 << UrlFields.UF_HOST.getIndex())|(1 << UrlFields.UF_PORT.getIndex()))) {
1613
+ return 1;
1614
+ }
1615
+
1616
+ /* Make sure we don't end somewhere unexpected */
1617
+ switch (state) {
1618
+ case req_host_v6_start:
1619
+ case req_host_v6:
1620
+ case req_host_v6_end:
1621
+ case req_host:
1622
+ case req_port_start:
1623
+ return 1;
1624
+ default:
1625
+ break;
1626
+ }
1627
+
1628
+ if (0 != (u.field_set & (1 << UrlFields.UF_PORT.getIndex()))) {
1629
+ /* Don't bother with endp; we've already validated the string */
1630
+ int v = strtoi(data, p_init + u.field_data[UrlFields.UF_PORT.getIndex()].off);
1631
+
1632
+ /* Ports have a max value of 2^16 */
1633
+ if (v > 0xffff) {
1634
+ return 1;
1635
+ }
1636
+
1637
+ u.port = v;
1638
+ }
1639
+
1640
+ return 0;
1533
1641
  }
1534
1642
 
1643
+ //hacky reimplementation of srttoul, tailored for our simple needs
1644
+ //we only need to parse port val, so no negative values etc
1645
+ int strtoi(ByteBuffer data, int start_pos) {
1646
+ data.position(start_pos);
1647
+ byte ch;
1648
+ int start = data.position();
1649
+ int end = data.limit();
1650
+ while(data.position() < data.limit()) {
1651
+ ch = data.get();
1652
+ if(Character.isWhitespace((char)ch)){
1653
+ start++;
1654
+ continue;
1655
+ }
1656
+ if(isDigit(ch)){
1657
+ continue;
1658
+ }else{
1659
+ end = data.position() - 1;
1660
+ break;
1661
+ }
1662
+ }
1663
+ byte[] s = new byte[end - start];
1664
+ int cur_pos = data.position();
1665
+ data.position(start);
1666
+ data.get(s);
1667
+ int result = 0;
1668
+ try{
1669
+ result = Integer.parseInt(new String(s, "UTF8"));
1670
+ } catch (UnsupportedEncodingException e) {
1671
+ result = 0;
1672
+ }
1673
+ data.position(cur_pos);
1674
+ return result;
1675
+ }
1676
+
1535
1677
  boolean isDigit(byte b) {
1536
1678
  if (b >= 0x30 && b <=0x39) {
1537
1679
  return true;
@@ -1539,6 +1681,10 @@ public class HTTPParser {
1539
1681
  return false;
1540
1682
  }
1541
1683
 
1684
+ boolean isHex(byte b) {
1685
+ return isDigit(b) || (lower(b) >= 0x61 /*a*/ && lower(b) <= 0x66 /*f*/);
1686
+ }
1687
+
1542
1688
  boolean isAtoZ(byte b) {
1543
1689
  byte c = lower(b);
1544
1690
  return (c>= 0x61 /*a*/ && c <= 0x7a /*z*/);
@@ -1555,9 +1701,28 @@ public class HTTPParser {
1555
1701
  }
1556
1702
 
1557
1703
  byte token(byte b) {
1558
- return (byte)tokens[b];
1704
+ if(!strict){
1705
+ return (b == (byte)' ') ? (byte)' ' : (byte)tokens[b] ;
1706
+ }else{
1707
+ return (byte)tokens[b];
1708
+ }
1709
+ }
1710
+
1711
+ boolean isHostChar(byte ch){
1712
+ if(!strict){
1713
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch || UNDER == ch ;
1714
+ }else{
1715
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch;
1716
+ }
1717
+ }
1718
+
1719
+ boolean isNormalUrlChar(int chi) {
1720
+ if(!strict){
1721
+ return (chi > 0x80) || normal_url_char[chi];
1722
+ }else{
1723
+ return normal_url_char[chi];
1724
+ }
1559
1725
  }
1560
-
1561
1726
 
1562
1727
  HTTPMethod start_req_method_assign(byte c){
1563
1728
  switch (c) {
@@ -1569,7 +1734,7 @@ public class HTTPParser {
1569
1734
  case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */
1570
1735
  case N: return HTTPMethod.HTTP_NOTIFY;
1571
1736
  case O: return HTTPMethod.HTTP_OPTIONS;
1572
- case P: return HTTPMethod.HTTP_POST; /* or PROPFIND, PROPPATH, PUT */
1737
+ case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1573
1738
  case R: return HTTPMethod.HTTP_REPORT;
1574
1739
  case S: return HTTPMethod.HTTP_SUBSCRIBE;
1575
1740
  case T: return HTTPMethod.HTTP_TRACE;
@@ -1583,7 +1748,7 @@ public class HTTPParser {
1583
1748
  return false;
1584
1749
  }
1585
1750
 
1586
- state = State.header_field_start;
1751
+ state = State.header_value_lws;
1587
1752
  // TODO java enums support some sort of bitflag mechanism !?
1588
1753
  switch (header_state) {
1589
1754
  case connection_keep_alive:
@@ -1601,111 +1766,17 @@ public class HTTPParser {
1601
1766
  return true;
1602
1767
  }
1603
1768
 
1604
- boolean headers_almost_done (byte ch, ParserSettings settings) {
1605
-
1606
- if (LF != ch) {
1607
- return false;
1608
- }
1609
- if (0 != (flags & F_TRAILING)) {
1610
- /* End of a chunked request */
1611
-
1612
- settings.call_on_headers_complete(this);
1613
- settings.call_on_message_complete(this);
1614
-
1615
- state = new_message();
1616
-
1617
- return true;
1618
- }
1619
-
1620
- nread = 0;
1621
-
1622
- if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1623
- upgrade = true;
1624
- }
1625
-
1626
-
1627
- /* Here we call the headers_complete callback. This is somewhat
1628
- * different than other callbacks because if the user returns 1, we
1629
- * will interpret that as saying that this message has no body. This
1630
- * is needed for the annoying case of recieving a response to a HEAD
1631
- * request.
1632
- */
1633
-
1634
- /* (responses to HEAD request contain a CONTENT-LENGTH header
1635
- * but no content)
1636
- *
1637
- * Consider what to do here: I don't like the idea of the callback
1638
- * interface having a different contract in the case of HEAD
1639
- * responses. The alternatives would be either to:
1640
- *
1641
- * a.) require the header_complete callback to implement a different
1642
- * interface or
1643
- *
1644
- * b.) provide an overridden execute(bla, bla, boolean
1645
- * parsingHeader) implementation ...
1646
- */
1647
-
1648
- /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1649
- if (null != settings.on_headers_complete) {
1650
- settings.call_on_headers_complete(this);
1651
- //return;
1652
- }
1653
-
1654
- // if (null != settings.on_headers_complete) {
1655
- // switch (settings.on_headers_complete.cb(parser)) {
1656
- // case 0:
1657
- // break;
1658
- //
1659
- // case 1:
1660
- // flags |= F_SKIPBODY;
1661
- // break;
1662
- //
1663
- // default:
1664
- // return p - data; /* Error */ // TODO // RuntimeException ?
1665
- // }
1666
- // }
1667
-
1668
-
1669
- // Exit, the rest of the connect is in a different protocol.
1670
- if (upgrade) {
1671
- settings.call_on_message_complete(this);
1672
- state = State.body_identity_eof;
1673
- return true;
1674
- }
1675
-
1676
- if (0 != (flags & F_SKIPBODY)) {
1677
- settings.call_on_message_complete(this);
1678
- state = new_message();
1679
- } else if (0 != (flags & F_CHUNKED)) {
1680
- /* chunked encoding - ignore Content-Length header */
1681
- state = State.chunk_size_start;
1682
- } else {
1683
- if (content_length == 0) {
1684
- /* Content-Length header given but zero: Content-Length: 0\r\n */
1685
- settings.call_on_message_complete(this);
1686
- state = new_message();
1687
- } else if (content_length > 0) {
1688
- /* Content-Length header given and non-zero */
1689
- state = State.body_identity;
1690
- } else {
1691
- if (type == ParserType.HTTP_REQUEST || http_should_keep_alive()) {
1692
- /* Assume content-length 0 - read the next */
1693
- settings.call_on_message_complete(this);
1694
- state = new_message();
1695
- } else {
1696
- /* Read body until EOF */
1697
- state = State.body_identity_eof;
1698
- }
1699
- }
1700
- }
1701
- return true;
1702
- } // headers_almost_fone
1769
+ // boolean headers_almost_done (byte ch, ParserSettings settings) {
1770
+ // } // headers_almost_done
1703
1771
 
1704
1772
 
1705
1773
  final int min (int a, int b) {
1706
1774
  return a < b ? a : b;
1707
1775
  }
1708
1776
 
1777
+ final int min (int a, long b) {
1778
+ return a < b ? a : (int)b;
1779
+ }
1709
1780
  /* probably not the best place to hide this ... */
1710
1781
  public boolean HTTP_PARSER_STRICT;
1711
1782
  State new_message() {
@@ -1730,6 +1801,7 @@ public class HTTPParser {
1730
1801
  case chunk_data_done :
1731
1802
  case body_identity :
1732
1803
  case body_identity_eof :
1804
+ case message_done :
1733
1805
  return false;
1734
1806
 
1735
1807
  }
@@ -1808,9 +1880,9 @@ public class HTTPParser {
1808
1880
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1809
1881
  0, 0, 0, 0, 0, 0, 0, 0,
1810
1882
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1811
- ' ', '!', '"', '#', '$', '%', '&', '\'',
1883
+ 0, '!', 0, '#', '$', '%', '&', '\'',
1812
1884
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1813
- 0, 0, '*', '+', 0, '-', '.', '/' ,
1885
+ 0, 0, '*', '+', 0, '-', '.', 0 ,
1814
1886
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1815
1887
  '0', '1', '2', '3', '4', '5', '6', '7',
1816
1888
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -1830,7 +1902,7 @@ public class HTTPParser {
1830
1902
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1831
1903
  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1832
1904
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1833
- 'X', 'Y', 'Z', 0, '|', '}', 0, 0,
1905
+ 'X', 'Y', 'Z', 0, '|', 0, '~', 0,
1834
1906
  /* hi bit set, not ascii */
1835
1907
  0, 0, 0, 0, 0, 0, 0, 0,
1836
1908
  0, 0, 0, 0, 0, 0, 0, 0,
@@ -1901,29 +1973,6 @@ public class HTTPParser {
1901
1973
  true, true, true, true, true, true, true, true,
1902
1974
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1903
1975
  true, true, true, true, true, true, true, false,
1904
-
1905
- /* hi bit set, not ascii */
1906
- /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
1907
- * encoded paths. This is out of spec, but clients generate this and most other
1908
- * HTTP servers support it. We should, too. */
1909
-
1910
- true, true, true, true, true, true, true, true,
1911
- true, true, true, true, true, true, true, true,
1912
- true, true, true, true, true, true, true, true,
1913
- true, true, true, true, true, true, true, true,
1914
- true, true, true, true, true, true, true, true,
1915
- true, true, true, true, true, true, true, true,
1916
- true, true, true, true, true, true, true, true,
1917
- true, true, true, true, true, true, true, true,
1918
- true, true, true, true, true, true, true, true,
1919
- true, true, true, true, true, true, true, true,
1920
- true, true, true, true, true, true, true, true,
1921
- true, true, true, true, true, true, true, true,
1922
- true, true, true, true, true, true, true, true,
1923
- true, true, true, true, true, true, true, true,
1924
- true, true, true, true, true, true, true, true,
1925
- true, true, true, true, true, true, true, true,
1926
-
1927
1976
  };
1928
1977
 
1929
1978
  public static final byte A = 0x41;
@@ -1952,10 +2001,12 @@ public class HTTPParser {
1952
2001
  public static final byte X = 0x58;
1953
2002
  public static final byte Y = 0x59;
1954
2003
  public static final byte Z = 0x5a;
2004
+ public static final byte UNDER = 0x5f;
1955
2005
  public static final byte CR = 0x0d;
1956
2006
  public static final byte LF = 0x0a;
1957
2007
  public static final byte DOT = 0x2e;
1958
2008
  public static final byte SPACE = 0x20;
2009
+ public static final byte TAB = 0x09;
1959
2010
  public static final byte SEMI = 0x3b;
1960
2011
  public static final byte COLON = 0x3a;
1961
2012
  public static final byte HASH = 0x23;
@@ -1970,7 +2021,7 @@ public class HTTPParser {
1970
2021
 
1971
2022
  dead
1972
2023
 
1973
- , start_res_or_res
2024
+ , start_req_or_res
1974
2025
  , res_or_resp_H
1975
2026
  , start_res
1976
2027
  , res_H
@@ -1993,7 +2044,12 @@ public class HTTPParser {
1993
2044
  , req_schema
1994
2045
  , req_schema_slash
1995
2046
  , req_schema_slash_slash
2047
+ , req_host_start
2048
+ , req_host_v6_start
2049
+ , req_host_v6
2050
+ , req_host_v6_end
1996
2051
  , req_host
2052
+ , req_port_start
1997
2053
  , req_port
1998
2054
  , req_path
1999
2055
  , req_query_string_start
@@ -2015,6 +2071,7 @@ public class HTTPParser {
2015
2071
  , header_field
2016
2072
  , header_value_start
2017
2073
  , header_value
2074
+ , header_value_lws
2018
2075
 
2019
2076
  , header_almost_done
2020
2077
 
@@ -2024,10 +2081,11 @@ public class HTTPParser {
2024
2081
  , chunk_size_almost_done
2025
2082
 
2026
2083
  , headers_almost_done
2084
+ , headers_done
2027
2085
  // This space intentionally not left blank, comment from c, for orientation...
2028
2086
  // the c version uses <= s_header_almost_done in java, we list the states explicitly
2029
2087
  // in `parsing_header()`
2030
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
2088
+ /* Important: 's_headers_done' must be the last 'header' state. All
2031
2089
  * states beyond this must be 'body' states. It is used for overflow
2032
2090
  * checking. See the PARSING_HEADER() macro.
2033
2091
  */
@@ -2036,8 +2094,8 @@ public class HTTPParser {
2036
2094
  , chunk_data_done
2037
2095
 
2038
2096
  , body_identity
2039
- , body_identity_eof;
2040
-
2097
+ , body_identity_eof
2098
+ , message_done
2041
2099
 
2042
2100
  }
2043
2101
  enum HState {
@@ -2065,4 +2123,24 @@ public class HTTPParser {
2065
2123
  , connection_keep_alive
2066
2124
  , connection_close
2067
2125
  }
2126
+ public enum UrlFields {
2127
+ UF_SCHEMA(0)
2128
+ , UF_HOST(1)
2129
+ , UF_PORT(2)
2130
+ , UF_PATH(3)
2131
+ , UF_QUERY(4)
2132
+ , UF_FRAGMENT(5)
2133
+ , UF_MAX(6);
2134
+
2135
+
2136
+ private final int index;
2137
+
2138
+ private UrlFields(int index) {
2139
+ this.index = index;
2140
+ }
2141
+ public int getIndex() {
2142
+ return index;
2143
+ }
2144
+
2145
+ }
2068
2146
  }