midori_http_parser 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.gitmodules +6 -0
  4. data/.travis.yml +33 -0
  5. data/Gemfile +2 -0
  6. data/LICENSE-MIT +20 -0
  7. data/README.md +90 -0
  8. data/Rakefile +6 -0
  9. data/bench/standalone.rb +23 -0
  10. data/bench/thin.rb +58 -0
  11. data/ext/ruby_http_parser/.gitignore +1 -0
  12. data/ext/ruby_http_parser/RubyHttpParserService.java +18 -0
  13. data/ext/ruby_http_parser/ext_help.h +18 -0
  14. data/ext/ruby_http_parser/extconf.rb +24 -0
  15. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +495 -0
  16. data/ext/ruby_http_parser/ruby_http_parser.c +516 -0
  17. data/ext/ruby_http_parser/vendor/.gitkeep +0 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +48 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +183 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +28 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/build.xml +74 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +2175 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +304 -0
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPCallback.java +8 -0
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPDataCallback.java +34 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPErrorCallback.java +12 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPException.java +9 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +113 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParser.java +36 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +256 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserType.java +13 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +111 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPCallback.java +5 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPDataCallback.java +25 -0
  40. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPErrorCallback.java +7 -0
  41. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +2171 -0
  42. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +83 -0
  43. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +374 -0
  44. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  45. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +69 -0
  46. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +52 -0
  47. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +16 -0
  48. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +48 -0
  49. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +212 -0
  50. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +62 -0
  51. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +117 -0
  52. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +27 -0
  53. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  54. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +236 -0
  55. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +59 -0
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +3425 -0
  57. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +845 -0
  58. data/ext/ruby_http_parser/vendor/http-parser-java/tests.utf8 +17 -0
  59. data/ext/ruby_http_parser/vendor/http-parser-java/tools/byte_constants.rb +6 -0
  60. data/ext/ruby_http_parser/vendor/http-parser-java/tools/const_char.rb +13 -0
  61. data/ext/ruby_http_parser/vendor/http-parser-java/tools/lowcase.rb +15 -0
  62. data/ext/ruby_http_parser/vendor/http-parser-java/tools/parse_tests.rb +33 -0
  63. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +68 -0
  64. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +23 -0
  65. data/ext/ruby_http_parser/vendor/http-parser/README.md +246 -0
  66. data/ext/ruby_http_parser/vendor/http-parser/bench.c +111 -0
  67. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +160 -0
  68. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  69. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +2470 -0
  70. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +111 -0
  71. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +432 -0
  72. data/ext/ruby_http_parser/vendor/http-parser/test.c +4226 -0
  73. data/ext/ruby_http_parser/vendor/http-parser/test_fast +0 -0
  74. data/ext/ruby_http_parser/vendor/http-parser/test_g +0 -0
  75. data/lib/http/parser.rb +1 -0
  76. data/lib/http_parser.rb +21 -0
  77. data/midori_http_parser.gemspec +24 -0
  78. data/spec/parser_spec.rb +376 -0
  79. data/spec/spec_helper.rb +1 -0
  80. data/spec/support/requests.json +631 -0
  81. data/spec/support/responses.json +375 -0
  82. data/tasks/compile.rake +42 -0
  83. data/tasks/fixtures.rake +71 -0
  84. data/tasks/spec.rake +5 -0
  85. data/tasks/submodules.rake +7 -0
  86. metadata +206 -0
@@ -0,0 +1,13 @@
1
+ package http_parser;
2
+
3
+ public enum ParserType {
4
+ HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH;
5
+
6
+ public static ParserType parse(String s) {
7
+ if ("HTTP_REQUEST".equalsIgnoreCase(s)) { return HTTP_REQUEST; }
8
+ else if ("HTTP_RESPONSE".equalsIgnoreCase(s)) { return HTTP_RESPONSE; }
9
+ else if ("HTTP_BOTH".equalsIgnoreCase(s)) { return HTTP_BOTH; }
10
+ else { return null; }
11
+ }
12
+ }
13
+
@@ -0,0 +1,111 @@
1
+ package http_parser;
2
+
3
+ import java.nio.ByteBuffer;
4
+
5
+ public class Util {
6
+ // public static String toString(http_parser.lolevel.HTTPParser p) {
7
+ // StringBuilder builder = new StringBuilder();
8
+ //
9
+ // // the stuff up to the break is ephermeral and only meaningful
10
+ // // while the parser is parsing. In general, this method is
11
+ // // probably only useful during debugging.
12
+ //
13
+ // builder.append("state :"); builder.append(p.state); builder.append("\n");
14
+ // builder.append("header_state :"); builder.append(p.header_state); builder.append("\n");
15
+ // builder.append("strict :"); builder.append(p.strict); builder.append("\n");
16
+ // builder.append("index :"); builder.append(p.index); builder.append("\n");
17
+ // builder.append("flags :"); builder.append(p.flags); builder.append("\n");
18
+ // builder.append("nread :"); builder.append(p.nread); builder.append("\n");
19
+ // builder.append("content_length :"); builder.append(p.content_length); builder.append("\n");
20
+ //
21
+ //
22
+ // builder.append("type :"); builder.append(p.type); builder.append("\n");
23
+ // builder.append("http_major :"); builder.append(p.http_major); builder.append("\n");
24
+ // builder.append("http_minor :"); builder.append(p.http_minor); builder.append("\n");
25
+ // builder.append("status_code :"); builder.append(p.status_code); builder.append("\n");
26
+ // builder.append("method :"); builder.append(p.method); builder.append("\n");
27
+ // builder.append("upgrade :"); builder.append(p.upgrade); builder.append("\n");
28
+ //
29
+ // return builder.toString();
30
+ //
31
+ // }
32
+
33
+ public static String error (String mes, ByteBuffer b, int beginning) {
34
+ // the error message should look like this:
35
+ //
36
+ // Bla expected something, but it's not there (mes)
37
+ // GEt / HTTP 1_1
38
+ // ............^.
39
+ //
40
+ // |----------------- 72 -------------------------|
41
+
42
+ // This is ridiculously complicated and probably riddled with
43
+ // off-by-one errors, should be moved into high level interface.
44
+ // TODO.
45
+
46
+ // also: need to keep track of the initial buffer position in
47
+ // execute so that we don't screw up any `mark()` that may have
48
+ // been set outside of our control to be nice.
49
+
50
+ final int mes_width = 72;
51
+ int p = b.position(); // error position
52
+ int end = b.limit(); // this is the end
53
+ int m = end - beginning; // max mes length
54
+
55
+ StringBuilder builder = new StringBuilder();
56
+ int p_adj = p;
57
+
58
+ byte [] orig = new byte[0];
59
+ if (m <= mes_width) {
60
+ orig = new byte[m];
61
+ b.position(beginning);
62
+ b.get(orig, 0, m);
63
+ p_adj = p-beginning;
64
+
65
+
66
+ } else {
67
+ // we'll need to trim bit off the beginning and/or end
68
+ orig = new byte[mes_width];
69
+ // three possibilities:
70
+ // a.) plenty of stuff around p
71
+ // b.) plenty of stuff in front of p
72
+ // c.) plenty of stuff behind p
73
+ // CAN'T be not enough stuff aorund p in total, because
74
+ // m>meswidth (see if to this else)
75
+
76
+ int before = p-beginning;
77
+ int after = end - p;
78
+ if ( (before > mes_width/2) && (after > mes_width/2)) {
79
+ // plenty of stuff in front of and behind error
80
+ p_adj = mes_width/2;
81
+ b.position(p - mes_width/2);
82
+ b.get(orig, 0, mes_width);
83
+ } else if (before <= mes_width/2) {
84
+ // take all of the begining.
85
+ b.position(beginning);
86
+ // and as much of the rest as possible
87
+
88
+ b.get(orig, 0, mes_width);
89
+
90
+ } else {
91
+ // plenty of stuff before
92
+ before = end-mes_width;
93
+ b.position(before);
94
+ p_adj = p - before;
95
+ b.get(orig, 0, mes_width);
96
+ }
97
+ }
98
+
99
+ builder.append(new String(orig));
100
+ builder.append("\n");
101
+ for (int i = 0; i!= p_adj; ++i) {
102
+ builder.append(".");
103
+ }
104
+ builder.append("^");
105
+
106
+
107
+ b.position(p); // restore position
108
+ return builder.toString();
109
+
110
+ }
111
+ }
@@ -0,0 +1,5 @@
1
+ package http_parser.lolevel;
2
+
3
+ public interface HTTPCallback {
4
+ public int cb (HTTPParser parser);
5
+ }
@@ -0,0 +1,25 @@
1
+ package http_parser.lolevel;
2
+
3
+ import java.nio.ByteBuffer;
4
+
5
+ public interface HTTPDataCallback {
6
+ /*
7
+ very raw and extremly foolhardy! DANGER!
8
+ The whole Buffer concept is difficult enough to grasp as it is,
9
+ we pass in a buffer with an arbitrary position.
10
+
11
+ The interesting data is located at position pos and is len
12
+ bytes long.
13
+
14
+ The contract of this callback is that the buffer is
15
+ returned in the state that it was passed in, so implementing
16
+ this require good citizenship, you'll need to remember the current
17
+ position, change the position to get at the data you're interested
18
+ in and then set the position back to how you found it...
19
+
20
+ //TODO: there should be an abstract implementation that implements
21
+ cb as described above, marks it final an provides a new callback
22
+ with signature cb(byte[], int, int)
23
+ */
24
+ public int cb(HTTPParser p, ByteBuffer buf, int pos, int len);
25
+ }
@@ -0,0 +1,7 @@
1
+ package http_parser.lolevel;
2
+
3
+ import java.nio.ByteBuffer;
4
+
5
+ public interface HTTPErrorCallback {
6
+ public void cb (HTTPParser parser, String mes, ByteBuffer buf, int initial_position);
7
+ }
@@ -0,0 +1,2171 @@
1
+ package http_parser.lolevel;
2
+
3
+ import java.nio.ByteBuffer;
4
+ import http_parser.HTTPException;
5
+ import http_parser.HTTPMethod;
6
+ import http_parser.HTTPParserUrl;
7
+ import http_parser.ParserType;
8
+ import static http_parser.lolevel.HTTPParser.C.*;
9
+ import static http_parser.lolevel.HTTPParser.State.*;
10
+
11
+ public class HTTPParser {
12
+ /* lots of unsigned chars here, not sure what
13
+ to about them, `bytes` in java suck... */
14
+
15
+ ParserType type;
16
+ State state;
17
+ HState header_state;
18
+ boolean strict;
19
+
20
+ int index;
21
+ int flags; // TODO
22
+
23
+ int nread;
24
+ long content_length;
25
+
26
+ int p_start; // updated each call to execute to indicate where the buffer was before we began calling it.
27
+
28
+ /** READ-ONLY **/
29
+ public int http_major;
30
+ public int http_minor;
31
+ public int status_code; /* responses only */
32
+ public HTTPMethod method; /* requests only */
33
+
34
+ /* true = Upgrade header was present and the parser has exited because of that.
35
+ * false = No upgrade header present.
36
+ * Should be checked when http_parser_execute() returns in addition to
37
+ * error checking.
38
+ */
39
+ public boolean upgrade;
40
+
41
+ /** PUBLIC **/
42
+ // TODO : this is used in c to maintain application state.
43
+ // is this even necessary? we have state in java ?
44
+ // consider
45
+ // Object data; /* A pointer to get hook to the "connection" or "socket" object */
46
+
47
+
48
+ /*
49
+ * technically we could combine all of these (except for url_mark) into one
50
+ * variable, saving stack space, but it seems more clear to have them
51
+ * separated.
52
+ */
53
+ int header_field_mark = -1;
54
+ int header_value_mark = -1;
55
+ int url_mark = -1;
56
+ int body_mark = -1;
57
+
58
+ /**
59
+ * Construct a Parser for ParserType.HTTP_BOTH, meaning it
60
+ * determines whether it's parsing a request or a response.
61
+ */
62
+ public HTTPParser() {
63
+ this(ParserType.HTTP_BOTH);
64
+ }
65
+
66
+ /**
67
+ * Construct a Parser and initialise it to parse either
68
+ * requests or responses.
69
+ */
70
+ public HTTPParser(ParserType type) {
71
+ this.type = type;
72
+ switch(type) {
73
+ case HTTP_REQUEST:
74
+ this.state = State.start_req;
75
+ break;
76
+ case HTTP_RESPONSE:
77
+ this.state = State.start_res;
78
+ break;
79
+ case HTTP_BOTH:
80
+ this.state = State.start_req_or_res;
81
+ break;
82
+ default:
83
+ throw new HTTPException("can't happen, invalid ParserType enum");
84
+ }
85
+ }
86
+
87
+ /*
88
+ * Utility to facilitate System.out.println style debugging (the way god intended)
89
+ */
90
+ static void p(Object o) {System.out.println(o);}
91
+
92
+ /** Comment from C version follows
93
+ *
94
+ * Our URL parser.
95
+ *
96
+ * This is designed to be shared by http_parser_execute() for URL validation,
97
+ * hence it has a state transition + byte-for-byte interface. In addition, it
98
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
99
+ * work of turning state transitions URL components for its API.
100
+ *
101
+ * This function should only be invoked with non-space characters. It is
102
+ * assumed that the caller cares about (and can detect) the transition between
103
+ * URL and non-URL states by looking for these.
104
+ */
105
+ public State parse_url_char(byte ch) {
106
+
107
+ int chi = ch & 0xff; // utility, ch without signedness for table lookups.
108
+
109
+ if(SPACE == ch){
110
+ throw new HTTPException("space as url char");
111
+ }
112
+
113
+ switch(state) {
114
+ case req_spaces_before_url:
115
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
116
+ * All methods except CONNECT are followed by '/' or '*'.
117
+ */
118
+ if(SLASH == ch || STAR == ch){
119
+ return req_path;
120
+ }
121
+ if(isAtoZ(ch)){
122
+ return req_schema;
123
+ }
124
+ break;
125
+ case req_schema:
126
+ if(isAtoZ(ch)){
127
+ return req_schema;
128
+ }
129
+ if(COLON == ch){
130
+ return req_schema_slash;
131
+ }
132
+ break;
133
+ case req_schema_slash:
134
+ if(SLASH == ch){
135
+ return req_schema_slash_slash;
136
+ }
137
+ break;
138
+ case req_schema_slash_slash:
139
+ if(SLASH == ch){
140
+ return req_host_start;
141
+ }
142
+ break;
143
+ case req_host_start:
144
+ if (ch == (byte)'[') {
145
+ return req_host_v6_start;
146
+ }
147
+ if (isHostChar(ch)) {
148
+ return req_host;
149
+ }
150
+ break;
151
+
152
+ case req_host:
153
+ if (isHostChar(ch)) {
154
+ return req_host;
155
+ }
156
+
157
+ /* FALLTHROUGH */
158
+ case req_host_v6_end:
159
+ switch (ch) {
160
+ case ':':
161
+ return req_port_start;
162
+ case '/':
163
+ return req_path;
164
+ case '?':
165
+ return req_query_string_start;
166
+ }
167
+ break;
168
+
169
+ case req_host_v6:
170
+ if (ch == ']') {
171
+ return req_host_v6_end;
172
+ }
173
+
174
+ /* FALLTHROUGH */
175
+ case req_host_v6_start:
176
+ if (isHex(ch) || ch == ':') {
177
+ return req_host_v6;
178
+ }
179
+ break;
180
+
181
+ case req_port:
182
+ switch (ch) {
183
+ case '/':
184
+ return req_path;
185
+ case '?':
186
+ return req_query_string_start;
187
+ }
188
+
189
+ /* FALLTHROUGH */
190
+ case req_port_start:
191
+ if (isDigit(ch)) {
192
+ return req_port;
193
+ }
194
+ break;
195
+
196
+ case req_path:
197
+ if (isNormalUrlChar(chi)) {
198
+ return req_path;
199
+ }
200
+ switch (ch) {
201
+ case '?':
202
+ return req_query_string_start;
203
+ case '#':
204
+ return req_fragment_start;
205
+ }
206
+
207
+ break;
208
+
209
+ case req_query_string_start:
210
+ case req_query_string:
211
+ if (isNormalUrlChar(chi)) {
212
+ return req_query_string;
213
+ }
214
+
215
+ switch (ch) {
216
+ case '?':
217
+ /* allow extra '?' in query string */
218
+ return req_query_string;
219
+
220
+ case '#':
221
+ return req_fragment_start;
222
+ }
223
+
224
+ break;
225
+
226
+ case req_fragment_start:
227
+ if (isNormalUrlChar(chi)) {
228
+ return req_fragment;
229
+ }
230
+ switch (ch) {
231
+ case '?':
232
+ return req_fragment;
233
+
234
+ case '#':
235
+ return req_fragment_start;
236
+ }
237
+ break;
238
+
239
+ case req_fragment:
240
+ if (isNormalUrlChar(ch)) {
241
+ return req_fragment;
242
+ }
243
+
244
+ switch (ch) {
245
+ case '?':
246
+ case '#':
247
+ return req_fragment;
248
+ }
249
+
250
+ break;
251
+ default:
252
+ break;
253
+ }
254
+
255
+ /* We should never fall out of the switch above unless there's an error */
256
+ return dead;
257
+ }
258
+
259
+ /** Execute the parser with the currently available data contained in
260
+ * the buffer. The buffers position() and limit() need to be set
261
+ * correctly (obviously) and a will be updated approriately when the
262
+ * method returns to reflect the consumed data.
263
+ */
264
+ public int execute(ParserSettings settings, ByteBuffer data) {
265
+
266
+ int p = data.position();
267
+ this.p_start = p; // this is used for pretty printing errors.
268
+ // and returning the amount of processed bytes.
269
+
270
+
271
+ // In case the headers don't provide information about the content
272
+ // length, `execute` needs to be called with an empty buffer to
273
+ // indicate that all the data has been send be the client/server,
274
+ // else there is no way of knowing the message is complete.
275
+ int len = (data.limit() - data.position());
276
+ if (0 == len) {
277
+ // if (State.body_identity_eof == state) {
278
+ // settings.call_on_message_complete(this);
279
+ // }
280
+ switch (state) {
281
+ case body_identity_eof:
282
+ settings.call_on_message_complete(this);
283
+ return data.position() - this.p_start;
284
+
285
+ case dead:
286
+ case start_req_or_res:
287
+ case start_res:
288
+ case start_req:
289
+ return data.position() - this.p_start;
290
+
291
+ default:
292
+ // should we really consider this an error!?
293
+ throw new HTTPException("empty bytes! "+state); // error
294
+ }
295
+ }
296
+
297
+
298
+ // in case the _previous_ call to the parser only has data to get to
299
+ // the middle of certain fields, we need to update marks to point at
300
+ // the beginning of the current buffer.
301
+ switch (state) {
302
+ case header_field:
303
+ header_field_mark = p;
304
+ break;
305
+ case header_value:
306
+ header_value_mark = p;
307
+ break;
308
+ case req_path:
309
+ case req_schema:
310
+ case req_schema_slash:
311
+ case req_schema_slash_slash:
312
+ case req_host_start:
313
+ case req_host_v6_start:
314
+ case req_host_v6:
315
+ case req_host_v6_end:
316
+ case req_host:
317
+ case req_port_start:
318
+ case req_port:
319
+ case req_query_string_start:
320
+ case req_query_string:
321
+ case req_fragment_start:
322
+ case req_fragment:
323
+ url_mark = p;
324
+ break;
325
+ }
326
+ boolean reexecute = false;
327
+ int pe = 0;
328
+ byte ch = 0;
329
+ int chi = 0;
330
+ byte c = -1;
331
+ int to_read = 0;
332
+
333
+ // this is where the work gets done, traverse the available data...
334
+ while (data.position() != data.limit() || reexecute) {
335
+ // p(state + ": r: " + reexecute + " :: " +p );
336
+
337
+ if(!reexecute){
338
+ p = data.position();
339
+ pe = data.limit();
340
+ ch = data.get(); // the current character to process.
341
+ chi = ch & 0xff; // utility, ch without signedness for table lookups.
342
+ c = -1; // utility variably used for up- and downcasing etc.
343
+ to_read = 0; // used to keep track of how much of body, etc. is left to read
344
+
345
+ if (parsing_header(state)) {
346
+ ++nread;
347
+ if (nread > HTTP_MAX_HEADER_SIZE) {
348
+ return error(settings, "possible buffer overflow", data);
349
+ }
350
+ }
351
+ }
352
+ reexecute = false;
353
+ // p(state + " ::: " + ch + " : " + (((CR == ch) || (LF == ch)) ? ch : ("'" + (char)ch + "'")) +": "+p );
354
+
355
+ switch (state) {
356
+ /*
357
+ * this state is used after a 'Connection: close' message
358
+ * the parser will error out if it reads another message
359
+ */
360
+ case dead:
361
+ if (CR == ch || LF == ch){
362
+ break;
363
+ }
364
+ return error(settings, "Connection already closed", data);
365
+
366
+
367
+
368
+ case start_req_or_res:
369
+ if (CR == ch || LF == ch){
370
+ break;
371
+ }
372
+ flags = 0;
373
+ content_length = -1;
374
+
375
+ if (H == ch) {
376
+ state = State.res_or_resp_H;
377
+ } else {
378
+ type = ParserType.HTTP_REQUEST;
379
+ method = start_req_method_assign(ch);
380
+ if (null == method) {
381
+ return error(settings, "invalid method", data);
382
+ }
383
+ index = 1;
384
+ state = State.req_method;
385
+ }
386
+ settings.call_on_message_begin(this);
387
+ break;
388
+
389
+
390
+
391
+ case res_or_resp_H:
392
+ if (T == ch) {
393
+ type = ParserType.HTTP_RESPONSE;
394
+ state = State.res_HT;
395
+ } else {
396
+ if (E != ch) {
397
+ return error(settings, "not E", data);
398
+ }
399
+ type = ParserType.HTTP_REQUEST;
400
+ method = HTTPMethod.HTTP_HEAD;
401
+ index = 2;
402
+ state = State.req_method;
403
+ }
404
+ break;
405
+
406
+
407
+
408
+ case start_res:
409
+ flags = 0;
410
+ content_length = -1;
411
+
412
+ switch(ch) {
413
+ case H:
414
+ state = State.res_H;
415
+ break;
416
+ case CR:
417
+ case LF:
418
+ break;
419
+ default:
420
+ return error(settings, "Not H or CR/LF", data);
421
+ }
422
+
423
+ settings.call_on_message_begin(this);
424
+ break;
425
+
426
+
427
+
428
+ case res_H:
429
+ if (strict && T != ch) {
430
+ return error(settings, "Not T", data);
431
+ }
432
+ state = State.res_HT;
433
+ break;
434
+ case res_HT:
435
+ if (strict && T != ch) {
436
+ return error(settings, "Not T2", data);
437
+ }
438
+ state = State.res_HTT;
439
+ break;
440
+ case res_HTT:
441
+ if (strict && P != ch) {
442
+ return error(settings, "Not P", data);
443
+ }
444
+ state = State.res_HTTP;
445
+ break;
446
+ case res_HTTP:
447
+ if (strict && SLASH != ch) {
448
+ return error(settings, "Not '/'", data);
449
+ }
450
+ state = State.res_first_http_major;
451
+ break;
452
+
453
+
454
+
455
+ case res_first_http_major:
456
+ if (!isDigit(ch)) {
457
+ return error(settings, "Not a digit", data);
458
+ }
459
+ http_major = (int) ch - 0x30;
460
+ state = State.res_http_major;
461
+ break;
462
+
463
+ /* major HTTP version or dot */
464
+ case res_http_major:
465
+ if (DOT == ch) {
466
+ state = State.res_first_http_minor;
467
+ break;
468
+ }
469
+ if (!isDigit(ch)) {
470
+ return error(settings, "Not a digit", data);
471
+ }
472
+ http_major *= 10;
473
+ http_major += (ch - 0x30);
474
+
475
+ if (http_major > 999) {
476
+ return error(settings, "invalid http major version: ", data);
477
+ }
478
+ break;
479
+
480
+ /* first digit of minor HTTP version */
481
+ case res_first_http_minor:
482
+ if (!isDigit(ch)) {
483
+ return error(settings, "Not a digit", data);
484
+ }
485
+ http_minor = (int)ch - 0x30;
486
+ state = State.res_http_minor;
487
+ break;
488
+
489
+ /* minor HTTP version or end of request line */
490
+ case res_http_minor:
491
+ if (SPACE == ch) {
492
+ state = State.res_first_status_code;
493
+ break;
494
+ }
495
+ if (!isDigit(ch)) {
496
+ return error(settings, "Not a digit", data);
497
+ }
498
+ http_minor *= 10;
499
+ http_minor += (ch - 0x30);
500
+ if (http_minor > 999) {
501
+ return error(settings, "invalid http minor version: ", data);
502
+ }
503
+ break;
504
+
505
+
506
+
507
+ case res_first_status_code:
508
+ if (!isDigit(ch)) {
509
+ if (SPACE == ch) {
510
+ break;
511
+ }
512
+ return error(settings, "Not a digit (status code)", data);
513
+ }
514
+ status_code = (int)ch - 0x30;
515
+ state = State.res_status_code;
516
+ break;
517
+
518
+ case res_status_code:
519
+ if (!isDigit(ch)) {
520
+ switch(ch) {
521
+ case SPACE:
522
+ state = State.res_status;
523
+ break;
524
+ case CR:
525
+ state = State.res_line_almost_done;
526
+ break;
527
+ case LF:
528
+ state = State.header_field_start;
529
+ break;
530
+ default:
531
+ return error(settings, "not a valid status code", data);
532
+ }
533
+ break;
534
+ }
535
+ status_code *= 10;
536
+ status_code += (int)ch - 0x30;
537
+ if (status_code > 999) {
538
+ return error(settings, "ridiculous status code:", data);
539
+ }
540
+
541
+ if (status_code > 99) {
542
+ settings.call_on_status_complete(this);
543
+ }
544
+ break;
545
+
546
+ case res_status:
547
+ /* the human readable status. e.g. "NOT FOUND"
548
+ * we are not humans so just ignore this
549
+ * we are not men, we are devo. */
550
+
551
+ if (CR == ch) {
552
+ state = State.res_line_almost_done;
553
+ break;
554
+ }
555
+ if (LF == ch) {
556
+ state = State.header_field_start;
557
+ break;
558
+ }
559
+ break;
560
+
561
+ case res_line_almost_done:
562
+ if (strict && LF != ch) {
563
+ return error(settings, "not LF", data);
564
+ }
565
+ state = State.header_field_start;
566
+ break;
567
+
568
+
569
+
570
+ case start_req:
571
+ if (CR==ch || LF == ch) {
572
+ break;
573
+ }
574
+ flags = 0;
575
+ content_length = -1;
576
+
577
+ if(!isAtoZ(ch)){
578
+ return error(settings, "invalid method", data);
579
+ }
580
+
581
+ method = start_req_method_assign(ch);
582
+ if (null == method) {
583
+ return error(settings, "invalid method", data);
584
+ }
585
+ index = 1;
586
+ state = State.req_method;
587
+
588
+ settings.call_on_message_begin(this);
589
+ break;
590
+
591
+
592
+
593
+ case req_method:
594
+ if (0 == ch) {
595
+ return error(settings, "NULL in method", data);
596
+ }
597
+
598
+ byte [] arr = method.bytes;
599
+
600
+ if (SPACE == ch && index == arr.length) {
601
+ state = State.req_spaces_before_url;
602
+ } else if (arr[index] == ch) {
603
+ // wuhu!
604
+ } else if (HTTPMethod.HTTP_CONNECT == method) {
605
+ if (1 == index && H == ch) {
606
+ method = HTTPMethod.HTTP_CHECKOUT;
607
+ } else if (2 == index && P == ch) {
608
+ method = HTTPMethod.HTTP_COPY;
609
+ }
610
+ } else if (HTTPMethod.HTTP_MKCOL == method) {
611
+ if (1 == index && O == ch) {
612
+ method = HTTPMethod.HTTP_MOVE;
613
+ } else if (1 == index && E == ch) {
614
+ method = HTTPMethod.HTTP_MERGE;
615
+ } else if (1 == index && DASH == ch) { /* M-SEARCH */
616
+ method = HTTPMethod.HTTP_MSEARCH;
617
+ } else if (2 == index && A == ch) {
618
+ method = HTTPMethod.HTTP_MKACTIVITY;
619
+ }
620
+ } else if (1 == index && HTTPMethod.HTTP_POST == method) {
621
+ if(R == ch) {
622
+ method = HTTPMethod.HTTP_PROPFIND; /* or HTTP_PROPPATCH */
623
+ }else if(U == ch){
624
+ method = HTTPMethod.HTTP_PUT; /* or HTTP_PURGE */
625
+ }else if(A == ch){
626
+ method = HTTPMethod.HTTP_PATCH;
627
+ }
628
+ } else if (1 == index) {
629
+ if (HTTPMethod.HTTP_LINK == method) {
630
+ if (O == ch) {
631
+ method = HTTPMethod.HTTP_LOCK;
632
+ }
633
+ }
634
+ } else if (2 == index) {
635
+ if (HTTPMethod.HTTP_PUT == method) {
636
+ if (R == ch) {
637
+ method = HTTPMethod.HTTP_PURGE;
638
+ }
639
+ }
640
+ } else if (3 == index) {
641
+ if (HTTPMethod.HTTP_UNLINK == method) {
642
+ if (U == ch) {
643
+ method = HTTPMethod.HTTP_UNSUBSCRIBE;
644
+ } else if (O == ch) {
645
+ method = HTTPMethod.HTTP_UNLOCK;
646
+ }
647
+ }
648
+ } else if(4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch){
649
+ method = HTTPMethod.HTTP_PROPPATCH;
650
+ } else {
651
+ return error(settings, "Invalid HTTP method", data);
652
+ }
653
+
654
+ ++index;
655
+ break;
656
+
657
+
658
+
659
+ /******************* URL *******************/
660
+ case req_spaces_before_url:
661
+ if (SPACE == ch) {
662
+ break;
663
+ }
664
+ url_mark = p;
665
+ if(HTTPMethod.HTTP_CONNECT == method){
666
+ state = req_host_start;
667
+ }
668
+
669
+ state = parse_url_char(ch);
670
+ if(state == dead){
671
+ return error(settings, "Invalid something", data);
672
+ }
673
+ break;
674
+
675
+
676
+ case req_schema:
677
+ case req_schema_slash:
678
+ case req_schema_slash_slash:
679
+ case req_host_start:
680
+ case req_host_v6_start:
681
+ case req_host_v6:
682
+ case req_port_start:
683
+ switch (ch) {
684
+ /* No whitespace allowed here */
685
+ case SPACE:
686
+ case CR:
687
+ case LF:
688
+ return error(settings, "unexpected char in path", data);
689
+ default:
690
+ state = parse_url_char(ch);
691
+ if(dead == state){
692
+ return error(settings, "unexpected char in path", data);
693
+ }
694
+ }
695
+ break;
696
+
697
+ case req_host:
698
+ case req_host_v6_end:
699
+ case req_port:
700
+ case req_path:
701
+ case req_query_string_start:
702
+ case req_query_string:
703
+ case req_fragment_start:
704
+ case req_fragment:
705
+ switch (ch) {
706
+ case SPACE:
707
+ settings.call_on_url(this, data, url_mark, p-url_mark);
708
+ settings.call_on_path(this, data, url_mark, p - url_mark);
709
+ url_mark = -1;
710
+ state = State.req_http_start;
711
+ break;
712
+ case CR:
713
+ case LF:
714
+ http_major = 0;
715
+ http_minor = 9;
716
+ state = (CR == ch) ? req_line_almost_done : header_field_start;
717
+ settings.call_on_url(this, data, url_mark, p-url_mark); //TODO check params!!!
718
+ settings.call_on_path(this, data, url_mark, p-url_mark);
719
+ url_mark = -1;
720
+ break;
721
+ default:
722
+ state = parse_url_char(ch);
723
+ if(dead == state){
724
+ return error(settings, "unexpected char in path", data);
725
+ }
726
+ }
727
+ break;
728
+ /******************* URL *******************/
729
+
730
+
731
+
732
+ /******************* HTTP 1.1 *******************/
733
+ case req_http_start:
734
+ switch (ch) {
735
+ case H:
736
+ state = State.req_http_H;
737
+ break;
738
+ case SPACE:
739
+ break;
740
+ default:
741
+ return error(settings, "error in req_http_H", data);
742
+ }
743
+ break;
744
+
745
+ case req_http_H:
746
+ if (strict && T != ch) {
747
+ return error(settings, "unexpected char", data);
748
+ }
749
+ state = State.req_http_HT;
750
+ break;
751
+
752
+ case req_http_HT:
753
+ if (strict && T != ch) {
754
+ return error(settings, "unexpected char", data);
755
+ }
756
+ state = State.req_http_HTT;
757
+ break;
758
+
759
+ case req_http_HTT:
760
+ if (strict && P != ch) {
761
+ return error(settings, "unexpected char", data);
762
+ }
763
+ state = State.req_http_HTTP;
764
+ break;
765
+
766
+ case req_http_HTTP:
767
+ if (strict && SLASH != ch) {
768
+ return error(settings, "unexpected char", data);
769
+ }
770
+ state = req_first_http_major;
771
+ break;
772
+
773
+ /* first digit of major HTTP version */
774
+ case req_first_http_major:
775
+ if (!isDigit(ch)) {
776
+ return error(settings, "non digit in http major", data);
777
+ }
778
+ http_major = (int)ch - 0x30;
779
+ state = State.req_http_major;
780
+ break;
781
+
782
+ /* major HTTP version or dot */
783
+ case req_http_major:
784
+ if (DOT == ch) {
785
+ state = State.req_first_http_minor;
786
+ break;
787
+ }
788
+
789
+ if (!isDigit(ch)) {
790
+ return error(settings, "non digit in http major", data);
791
+ }
792
+
793
+ http_major *= 10;
794
+ http_major += (int)ch - 0x30;
795
+
796
+ if (http_major > 999) {
797
+ return error(settings, "ridiculous http major", data);
798
+ };
799
+ break;
800
+
801
+ /* first digit of minor HTTP version */
802
+ case req_first_http_minor:
803
+ if (!isDigit(ch)) {
804
+ return error(settings, "non digit in http minor", data);
805
+ }
806
+ http_minor = (int)ch - 0x30;
807
+ state = State.req_http_minor;
808
+ break;
809
+
810
+ case req_http_minor:
811
+ if (ch == CR) {
812
+ state = State.req_line_almost_done;
813
+ break;
814
+ }
815
+
816
+ if (ch == LF) {
817
+ state = State.header_field_start;
818
+ break;
819
+ }
820
+
821
+ /* XXX allow spaces after digit? */
822
+
823
+ if (!isDigit(ch)) {
824
+ return error(settings, "non digit in http minor", data);
825
+ }
826
+
827
+ http_minor *= 10;
828
+ http_minor += (int)ch - 0x30;
829
+
830
+
831
+ if (http_minor > 999) {
832
+ return error(settings, "ridiculous http minor", data);
833
+ };
834
+
835
+ break;
836
+
837
+ /* end of request line */
838
+ case req_line_almost_done:
839
+ {
840
+ if (ch != LF) {
841
+ return error(settings, "missing LF after request line", data);
842
+ }
843
+ state = header_field_start;
844
+ break;
845
+ }
846
+
847
+ /******************* HTTP 1.1 *******************/
848
+
849
+
850
+
851
+ /******************* Header *******************/
852
+ case header_field_start:
853
+ {
854
+ if (ch == CR) {
855
+ state = headers_almost_done;
856
+ break;
857
+ }
858
+
859
+ if (ch == LF) {
860
+ /* they might be just sending \n instead of \r\n so this would be
861
+ * the second \n to denote the end of headers*/
862
+ state = State.headers_almost_done;
863
+ reexecute = true;
864
+ break;
865
+ }
866
+
867
+ c = token(ch);
868
+
869
+ if (0 == c) {
870
+ return error(settings, "invalid char in header:", data);
871
+ }
872
+
873
+ header_field_mark = p;
874
+
875
+ index = 0;
876
+ state = State.header_field;
877
+
878
+ switch (c) {
879
+ case C:
880
+ header_state = HState.C;
881
+ break;
882
+
883
+ case P:
884
+ header_state = HState.matching_proxy_connection;
885
+ break;
886
+
887
+ case T:
888
+ header_state = HState.matching_transfer_encoding;
889
+ break;
890
+
891
+ case U:
892
+ header_state = HState.matching_upgrade;
893
+ break;
894
+
895
+ default:
896
+ header_state = HState.general;
897
+ break;
898
+ }
899
+ break;
900
+ }
901
+
902
+
903
+
904
+ case header_field:
905
+ {
906
+ c = token(ch);
907
+ if (0 != c) {
908
+ switch (header_state) {
909
+ case general:
910
+ break;
911
+
912
+ case C:
913
+ index++;
914
+ header_state = (O == c ? HState.CO : HState.general);
915
+ break;
916
+
917
+ case CO:
918
+ index++;
919
+ header_state = (N == c ? HState.CON : HState.general);
920
+ break;
921
+
922
+ case CON:
923
+ index++;
924
+ switch (c) {
925
+ case N:
926
+ header_state = HState.matching_connection;
927
+ break;
928
+ case T:
929
+ header_state = HState.matching_content_length;
930
+ break;
931
+ default:
932
+ header_state = HState.general;
933
+ break;
934
+ }
935
+ break;
936
+
937
+ /* connection */
938
+
939
+ case matching_connection:
940
+ index++;
941
+ if (index > CONNECTION.length || c != CONNECTION[index]) {
942
+ header_state = HState.general;
943
+ } else if (index == CONNECTION.length-1) {
944
+ header_state = HState.connection;
945
+ }
946
+ break;
947
+
948
+ /* proxy-connection */
949
+
950
+ case matching_proxy_connection:
951
+ index++;
952
+ if (index > PROXY_CONNECTION.length || c != PROXY_CONNECTION[index]) {
953
+ header_state = HState.general;
954
+ } else if (index == PROXY_CONNECTION.length-1) {
955
+ header_state = HState.connection;
956
+ }
957
+ break;
958
+
959
+ /* content-length */
960
+
961
+ case matching_content_length:
962
+ index++;
963
+ if (index > CONTENT_LENGTH.length || c != CONTENT_LENGTH[index]) {
964
+ header_state = HState.general;
965
+ } else if (index == CONTENT_LENGTH.length-1) {
966
+ header_state = HState.content_length;
967
+ }
968
+ break;
969
+
970
+ /* transfer-encoding */
971
+
972
+ case matching_transfer_encoding:
973
+ index++;
974
+ if (index > TRANSFER_ENCODING.length || c != TRANSFER_ENCODING[index]) {
975
+ header_state = HState.general;
976
+ } else if (index == TRANSFER_ENCODING.length-1) {
977
+ header_state = HState.transfer_encoding;
978
+ }
979
+ break;
980
+
981
+ /* upgrade */
982
+
983
+ case matching_upgrade:
984
+ index++;
985
+ if (index > UPGRADE.length || c != UPGRADE[index]) {
986
+ header_state = HState.general;
987
+ } else if (index == UPGRADE.length-1) {
988
+ header_state = HState.upgrade;
989
+ }
990
+ break;
991
+
992
+ case connection:
993
+ case content_length:
994
+ case transfer_encoding:
995
+ case upgrade:
996
+ if (SPACE != ch) header_state = HState.general;
997
+ break;
998
+
999
+ default:
1000
+ return error(settings, "Unknown Header State", data);
1001
+ } // switch: header_state
1002
+ break;
1003
+ } // 0 != c
1004
+
1005
+ if (COLON == ch) {
1006
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1007
+ header_field_mark = -1;
1008
+
1009
+ state = State.header_value_start;
1010
+ break;
1011
+ }
1012
+
1013
+ if (CR == ch) {
1014
+ state = State.header_almost_done;
1015
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1016
+
1017
+ header_field_mark = -1;
1018
+ break;
1019
+ }
1020
+
1021
+ if (ch == LF) {
1022
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1023
+ header_field_mark = -1;
1024
+
1025
+ state = State.header_field_start;
1026
+ break;
1027
+ }
1028
+
1029
+ return error(settings, "invalid header field", data);
1030
+ }
1031
+
1032
+
1033
+
1034
+ case header_value_start:
1035
+ {
1036
+ if ((SPACE == ch) || (TAB == ch)) break;
1037
+
1038
+ header_value_mark = p;
1039
+
1040
+ state = State.header_value;
1041
+ index = 0;
1042
+
1043
+
1044
+ if (CR == ch) {
1045
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1046
+ header_value_mark = -1;
1047
+
1048
+ header_state = HState.general;
1049
+ state = State.header_almost_done;
1050
+ break;
1051
+ }
1052
+
1053
+ if (LF == ch) {
1054
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1055
+ header_value_mark = -1;
1056
+
1057
+ state = State.header_field_start;
1058
+ break;
1059
+ }
1060
+
1061
+
1062
+ c = upper(ch);
1063
+
1064
+ switch (header_state) {
1065
+ case upgrade:
1066
+ flags |= F_UPGRADE;
1067
+ header_state = HState.general;
1068
+ break;
1069
+
1070
+ case transfer_encoding:
1071
+ /* looking for 'Transfer-Encoding: chunked' */
1072
+ if (C == c) {
1073
+ header_state = HState.matching_transfer_encoding_chunked;
1074
+ } else {
1075
+ header_state = HState.general;
1076
+ }
1077
+ break;
1078
+
1079
+ case content_length:
1080
+ if (!isDigit(ch)) {
1081
+ return error(settings, "Content-Length not numeric", data);
1082
+ }
1083
+ content_length = (int)ch - 0x30;
1084
+ break;
1085
+
1086
+ case connection:
1087
+ /* looking for 'Connection: keep-alive' */
1088
+ if (K == c) {
1089
+ header_state = HState.matching_connection_keep_alive;
1090
+ /* looking for 'Connection: close' */
1091
+ } else if (C == c) {
1092
+ header_state = HState.matching_connection_close;
1093
+ } else {
1094
+ header_state = HState.general;
1095
+ }
1096
+ break;
1097
+
1098
+ default:
1099
+ header_state = HState.general;
1100
+ break;
1101
+ }
1102
+ break;
1103
+ } // header value start
1104
+
1105
+
1106
+
1107
+ case header_value:
1108
+ {
1109
+
1110
+ if (CR == ch) {
1111
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1112
+ header_value_mark = -1;
1113
+
1114
+ state = State.header_almost_done;
1115
+ break;
1116
+ }
1117
+
1118
+ if (LF == ch) {
1119
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1120
+ header_value_mark = -1;
1121
+ state = header_almost_done;
1122
+ reexecute = true;
1123
+ break;
1124
+ }
1125
+
1126
+ c = upper(ch);
1127
+ switch (header_state) {
1128
+ case general:
1129
+ break;
1130
+
1131
+ case connection:
1132
+ case transfer_encoding:
1133
+ return error(settings, "Shouldn't be here", data);
1134
+
1135
+ case content_length:
1136
+ if (SPACE == ch) {
1137
+ break;
1138
+ }
1139
+ if (!isDigit(ch)) {
1140
+ return error(settings, "Content-Length not numeric", data);
1141
+ }
1142
+
1143
+ long t = content_length;
1144
+ t *= 10;
1145
+ t += (long)ch - 0x30;
1146
+
1147
+ /* Overflow? */
1148
+ // t will wrap and become negative ...
1149
+ if (t < content_length) {
1150
+ return error(settings, "Invalid content length", data);
1151
+ }
1152
+ content_length = t;
1153
+ break;
1154
+
1155
+ /* Transfer-Encoding: chunked */
1156
+ case matching_transfer_encoding_chunked:
1157
+ index++;
1158
+ if (index > CHUNKED.length || c != CHUNKED[index]) {
1159
+ header_state = HState.general;
1160
+ } else if (index == CHUNKED.length-1) {
1161
+ header_state = HState.transfer_encoding_chunked;
1162
+ }
1163
+ break;
1164
+
1165
+ /* looking for 'Connection: keep-alive' */
1166
+ case matching_connection_keep_alive:
1167
+ index++;
1168
+ if (index > KEEP_ALIVE.length || c != KEEP_ALIVE[index]) {
1169
+ header_state = HState.general;
1170
+ } else if (index == KEEP_ALIVE.length-1) {
1171
+ header_state = HState.connection_keep_alive;
1172
+ }
1173
+ break;
1174
+
1175
+ /* looking for 'Connection: close' */
1176
+ case matching_connection_close:
1177
+ index++;
1178
+ if (index > CLOSE.length || c != CLOSE[index]) {
1179
+ header_state = HState.general;
1180
+ } else if (index == CLOSE.length-1) {
1181
+ header_state = HState.connection_close;
1182
+ }
1183
+ break;
1184
+
1185
+ case transfer_encoding_chunked:
1186
+ case connection_keep_alive:
1187
+ case connection_close:
1188
+ if (SPACE != ch) header_state = HState.general;
1189
+ break;
1190
+
1191
+ default:
1192
+ state = State.header_value;
1193
+ header_state = HState.general;
1194
+ break;
1195
+ }
1196
+ break;
1197
+ } // header_value
1198
+
1199
+
1200
+
1201
+ case header_almost_done:
1202
+ if (!header_almost_done(ch)) {
1203
+ return error(settings, "incorrect header ending, expecting LF", data);
1204
+ }
1205
+ break;
1206
+
1207
+ case header_value_lws:
1208
+ if (SPACE == ch || TAB == ch ){
1209
+ state = header_value_start;
1210
+ } else {
1211
+ state = header_field_start;
1212
+ reexecute = true;
1213
+ }
1214
+ break;
1215
+
1216
+ case headers_almost_done:
1217
+ if (LF != ch) {
1218
+ return error(settings, "header not properly completed", data);
1219
+ }
1220
+ if (0 != (flags & F_TRAILING)) {
1221
+ /* End of a chunked request */
1222
+ state = new_message();
1223
+ settings.call_on_headers_complete(this);
1224
+ settings.call_on_message_complete(this);
1225
+ break;
1226
+ }
1227
+
1228
+ state = headers_done;
1229
+
1230
+ if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1231
+ upgrade = true;
1232
+ }
1233
+
1234
+ /* Here we call the headers_complete callback. This is somewhat
1235
+ * different than other callbacks because if the user returns 1, we
1236
+ * will interpret that as saying that this message has no body. This
1237
+ * is needed for the annoying case of recieving a response to a HEAD
1238
+ * request.
1239
+ */
1240
+
1241
+ /* (responses to HEAD request contain a CONTENT-LENGTH header
1242
+ * but no content)
1243
+ *
1244
+ * Consider what to do here: I don't like the idea of the callback
1245
+ * interface having a different contract in the case of HEAD
1246
+ * responses. The alternatives would be either to:
1247
+ *
1248
+ * a.) require the header_complete callback to implement a different
1249
+ * interface or
1250
+ *
1251
+ * b.) provide an overridden execute(bla, bla, boolean
1252
+ * parsingHeader) implementation ...
1253
+ */
1254
+
1255
+ /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1256
+ if (null != settings.on_headers_complete) {
1257
+ settings.call_on_headers_complete(this);
1258
+ //return;
1259
+ }
1260
+
1261
+ // if (null != settings.on_headers_complete) {
1262
+ // switch (settings.on_headers_complete.cb(parser)) {
1263
+ // case 0:
1264
+ // break;
1265
+ //
1266
+ // case 1:
1267
+ // flags |= F_SKIPBODY;
1268
+ // break;
1269
+ //
1270
+ // default:
1271
+ // return p - data; /* Error */ // TODO // RuntimeException ?
1272
+ // }
1273
+ // }
1274
+ reexecute = true;
1275
+ break;
1276
+
1277
+ case headers_done:
1278
+ if (strict && (LF != ch)) {
1279
+ return error(settings, "STRICT CHECK", data); //TODO correct error msg
1280
+ }
1281
+
1282
+ nread = 0;
1283
+
1284
+ // Exit, the rest of the connect is in a different protocol.
1285
+ if (upgrade) {
1286
+ state = new_message();
1287
+ settings.call_on_message_complete(this);
1288
+ return data.position()-this.p_start;
1289
+ }
1290
+
1291
+ if (0 != (flags & F_SKIPBODY)) {
1292
+ state = new_message();
1293
+ settings.call_on_message_complete(this);
1294
+ } else if (0 != (flags & F_CHUNKED)) {
1295
+ /* chunked encoding - ignore Content-Length header */
1296
+ state = State.chunk_size_start;
1297
+ } else {
1298
+ if (content_length == 0) {
1299
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1300
+ state = new_message();
1301
+ settings.call_on_message_complete(this);
1302
+ } else if (content_length != -1) {
1303
+ /* Content-Length header given and non-zero */
1304
+ state = State.body_identity;
1305
+ } else {
1306
+ if (type == ParserType.HTTP_REQUEST || !http_message_needs_eof()) {
1307
+ /* Assume content-length 0 - read the next */
1308
+ state = new_message();
1309
+ settings.call_on_message_complete(this);
1310
+ } else {
1311
+ /* Read body until EOF */
1312
+ state = State.body_identity_eof;
1313
+ }
1314
+ }
1315
+ }
1316
+
1317
+ break;
1318
+ /******************* Header *******************/
1319
+
1320
+
1321
+
1322
+
1323
+ /******************* Body *******************/
1324
+ case body_identity:
1325
+ to_read = min(pe - p, content_length); //TODO change to use buffer?
1326
+ body_mark = p;
1327
+
1328
+ if (to_read > 0) {
1329
+ settings.call_on_body(this, data, p, to_read);
1330
+ data.position(p+to_read);
1331
+ content_length -= to_read;
1332
+
1333
+ if (content_length == 0) {
1334
+ state = message_done;
1335
+ reexecute = true;
1336
+ }
1337
+ }
1338
+ break;
1339
+
1340
+
1341
+
1342
+ case body_identity_eof:
1343
+ to_read = pe - p; // TODO change to use buffer ?
1344
+ if (to_read > 0) {
1345
+ settings.call_on_body(this, data, p, to_read);
1346
+ data.position(p+to_read);
1347
+ }
1348
+ break;
1349
+
1350
+ case message_done:
1351
+ state = new_message();
1352
+ settings.call_on_message_complete(this);
1353
+ break;
1354
+ /******************* Body *******************/
1355
+
1356
+
1357
+
1358
+ /******************* Chunk *******************/
1359
+ case chunk_size_start:
1360
+ if (1 != this.nread) {
1361
+ return error(settings, "nread != 1 (chunking)", data);
1362
+
1363
+ }
1364
+ if (0 == (flags & F_CHUNKED)) {
1365
+ return error(settings, "not chunked", data);
1366
+ }
1367
+
1368
+ c = UNHEX[chi];
1369
+ if (c == -1) {
1370
+ return error(settings, "invalid hex char in chunk content length", data);
1371
+ }
1372
+ content_length = c;
1373
+ state = State.chunk_size;
1374
+ break;
1375
+
1376
+
1377
+
1378
+ case chunk_size:
1379
+ if (0 == (flags & F_CHUNKED)) {
1380
+ return error(settings, "not chunked", data);
1381
+ }
1382
+
1383
+ if (CR == ch) {
1384
+ state = State.chunk_size_almost_done;
1385
+ break;
1386
+ }
1387
+
1388
+ c = UNHEX[chi];
1389
+
1390
+ if (c == -1) {
1391
+ if (SEMI == ch || SPACE == ch) {
1392
+ state = State.chunk_parameters;
1393
+ break;
1394
+ }
1395
+ return error(settings, "invalid hex char in chunk content length", data);
1396
+ }
1397
+ long t = content_length;
1398
+
1399
+ t *= 16;
1400
+ t += c;
1401
+ if(t < content_length){
1402
+ return error(settings, "invalid content length", data);
1403
+ }
1404
+ content_length = t;
1405
+ break;
1406
+
1407
+
1408
+
1409
+ case chunk_parameters:
1410
+ if (0 == (flags & F_CHUNKED)) {
1411
+ return error(settings, "not chunked", data);
1412
+ }
1413
+ /* just ignore this shit. TODO check for overflow */
1414
+ if (CR == ch) {
1415
+ state = State.chunk_size_almost_done;
1416
+ break;
1417
+ }
1418
+ break;
1419
+
1420
+
1421
+
1422
+ case chunk_size_almost_done:
1423
+ if (0 == (flags & F_CHUNKED)) {
1424
+ return error(settings, "not chunked", data);
1425
+ }
1426
+ if (strict && LF != ch) {
1427
+ return error(settings, "expected LF at end of chunk size", data);
1428
+ }
1429
+
1430
+ this.nread = 0;
1431
+
1432
+ if (0 == content_length) {
1433
+ flags |= F_TRAILING;
1434
+ state = State.header_field_start;
1435
+ } else {
1436
+ state = State.chunk_data;
1437
+ }
1438
+ break;
1439
+
1440
+
1441
+
1442
+ case chunk_data:
1443
+ //TODO Apply changes from C version for s_chunk_data
1444
+ if (0 == (flags & F_CHUNKED)) {
1445
+ return error(settings, "not chunked", data);
1446
+ }
1447
+
1448
+ to_read = min(pe-p, content_length);
1449
+ if (to_read > 0) {
1450
+ settings.call_on_body(this, data, p, to_read);
1451
+ data.position(p+to_read);
1452
+ }
1453
+
1454
+ if (to_read == content_length) {
1455
+ state = State.chunk_data_almost_done;
1456
+ }
1457
+
1458
+ content_length -= to_read;
1459
+ break;
1460
+
1461
+
1462
+
1463
+ case chunk_data_almost_done:
1464
+ if (0 == (flags & F_CHUNKED)) {
1465
+ return error(settings, "not chunked", data);
1466
+ }
1467
+ if (strict && CR != ch) {
1468
+ return error(settings, "chunk data terminated incorrectly, expected CR", data);
1469
+ }
1470
+ state = State.chunk_data_done;
1471
+ //TODO CALLBACK_DATA(body)
1472
+ // settings.call_on_body(this, data,p,?);
1473
+ break;
1474
+
1475
+
1476
+
1477
+ case chunk_data_done:
1478
+ if (0 == (flags & F_CHUNKED)) {
1479
+ return error(settings, "not chunked", data);
1480
+ }
1481
+ if (strict && LF != ch) {
1482
+ return error(settings, "chunk data terminated incorrectly, expected LF", data);
1483
+ }
1484
+ state = State.chunk_size_start;
1485
+ break;
1486
+ /******************* Chunk *******************/
1487
+
1488
+
1489
+
1490
+ default:
1491
+ return error(settings, "unhandled state", data);
1492
+
1493
+ } // switch
1494
+ } // while
1495
+
1496
+ p = data.position();
1497
+
1498
+
1499
+ /* Reaching this point assumes that we only received part of a
1500
+ * message, inform the callbacks about the progress made so far*/
1501
+
1502
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1503
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1504
+ settings.call_on_url (this, data, url_mark, p-url_mark);
1505
+ settings.call_on_path (this, data, url_mark, p-url_mark);
1506
+
1507
+ return data.position()-this.p_start;
1508
+ } // execute
1509
+
1510
+ int error (ParserSettings settings, String mes, ByteBuffer data) {
1511
+ settings.call_on_error(this, mes, data, this.p_start);
1512
+ this.state = State.dead;
1513
+ return data.position()-this.p_start;
1514
+ }
1515
+
1516
+ public boolean http_message_needs_eof() {
1517
+ if(type == ParserType.HTTP_REQUEST){
1518
+ return false;
1519
+ }
1520
+ /* See RFC 2616 section 4.4 */
1521
+ if ((status_code / 100 == 1) || /* 1xx e.g. Continue */
1522
+ (status_code == 204) || /* No Content */
1523
+ (status_code == 304) || /* Not Modified */
1524
+ (flags & F_SKIPBODY) != 0) { /* response to a HEAD request */
1525
+ return false;
1526
+ }
1527
+ if ((flags & F_CHUNKED) != 0 || content_length != -1) {
1528
+ return false;
1529
+ }
1530
+
1531
+ return true;
1532
+ }
1533
+
1534
+ /* If http_should_keep_alive() in the on_headers_complete or
1535
+ * on_message_complete callback returns true, then this will be should be
1536
+ * the last message on the connection.
1537
+ * If you are the server, respond with the "Connection: close" header.
1538
+ * If you are the client, close the connection.
1539
+ */
1540
+ public boolean http_should_keep_alive() {
1541
+ if (http_major > 0 && http_minor > 0) {
1542
+ /* HTTP/1.1 */
1543
+ if ( 0 != (flags & F_CONNECTION_CLOSE) ) {
1544
+ return false;
1545
+ }
1546
+ } else {
1547
+ /* HTTP/1.0 or earlier */
1548
+ if ( 0 == (flags & F_CONNECTION_KEEP_ALIVE) ) {
1549
+ return false;
1550
+ }
1551
+ }
1552
+ return !http_message_needs_eof();
1553
+ }
1554
+
1555
+ public int parse_url(ByteBuffer data, boolean is_connect, HTTPParserUrl u) {
1556
+
1557
+ UrlFields uf = UrlFields.UF_MAX;
1558
+ UrlFields old_uf = UrlFields.UF_MAX;
1559
+ u.port = 0;
1560
+ u.field_set = 0;
1561
+ state = (is_connect ? State.req_host_start : State.req_spaces_before_url);
1562
+ int p_init = data.position();
1563
+ int p = 0;
1564
+ byte ch = 0;
1565
+ while (data.position() != data.limit()) {
1566
+ p = data.position();
1567
+ ch = data.get();
1568
+ state = parse_url_char(ch);
1569
+ switch(state) {
1570
+ case dead:
1571
+ return 1;
1572
+
1573
+ /* Skip delimeters */
1574
+ case req_schema_slash:
1575
+ case req_schema_slash_slash:
1576
+ case req_host_start:
1577
+ case req_host_v6_start:
1578
+ case req_host_v6_end:
1579
+ case req_port_start:
1580
+ case req_query_string_start:
1581
+ case req_fragment_start:
1582
+ continue;
1583
+
1584
+ case req_schema:
1585
+ uf = UrlFields.UF_SCHEMA;
1586
+ break;
1587
+
1588
+ case req_host:
1589
+ case req_host_v6:
1590
+ uf = UrlFields.UF_HOST;
1591
+ break;
1592
+
1593
+ case req_port:
1594
+ uf = UrlFields.UF_PORT;
1595
+ break;
1596
+
1597
+ case req_path:
1598
+ uf = UrlFields.UF_PATH;
1599
+ break;
1600
+
1601
+ case req_query_string:
1602
+ uf = UrlFields.UF_QUERY;
1603
+ break;
1604
+
1605
+ case req_fragment:
1606
+ uf = UrlFields.UF_FRAGMENT;
1607
+ break;
1608
+
1609
+ default:
1610
+ return 1;
1611
+ }
1612
+ /* Nothing's changed; soldier on */
1613
+ if (uf == old_uf) {
1614
+ u.field_data[uf.getIndex()].len++;
1615
+ continue;
1616
+ }
1617
+
1618
+ u.field_data[uf.getIndex()].off = p - p_init;
1619
+ u.field_data[uf.getIndex()].len = 1;
1620
+
1621
+ u.field_set |= (1 << uf.getIndex());
1622
+ old_uf = uf;
1623
+
1624
+ }
1625
+
1626
+ /* CONNECT requests can only contain "hostname:port" */
1627
+ if (is_connect && u.field_set != ((1 << UrlFields.UF_HOST.getIndex())|(1 << UrlFields.UF_PORT.getIndex()))) {
1628
+ return 1;
1629
+ }
1630
+
1631
+ /* Make sure we don't end somewhere unexpected */
1632
+ switch (state) {
1633
+ case req_host_v6_start:
1634
+ case req_host_v6:
1635
+ case req_host_v6_end:
1636
+ case req_host:
1637
+ case req_port_start:
1638
+ return 1;
1639
+ default:
1640
+ break;
1641
+ }
1642
+
1643
+ if (0 != (u.field_set & (1 << UrlFields.UF_PORT.getIndex()))) {
1644
+ /* Don't bother with endp; we've already validated the string */
1645
+ int v = strtoi(data, p_init + u.field_data[UrlFields.UF_PORT.getIndex()].off);
1646
+
1647
+ /* Ports have a max value of 2^16 */
1648
+ if (v > 0xffff) {
1649
+ return 1;
1650
+ }
1651
+
1652
+ u.port = v;
1653
+ }
1654
+
1655
+ return 0;
1656
+ }
1657
+
1658
+ //hacky reimplementation of srttoul, tailored for our simple needs
1659
+ //we only need to parse port val, so no negative values etc
1660
+ int strtoi(ByteBuffer data, int start_pos) {
1661
+ data.position(start_pos);
1662
+ byte ch;
1663
+ String str = "";
1664
+ while(data.position() < data.limit()) {
1665
+ ch = data.get();
1666
+ if(Character.isWhitespace((char)ch)){
1667
+ continue;
1668
+ }
1669
+ if(isDigit(ch)){
1670
+ str = str + (char)ch; //TODO replace with something less hacky
1671
+ }else{
1672
+ break;
1673
+ }
1674
+ }
1675
+ return Integer.parseInt(str);
1676
+ }
1677
+
1678
+ boolean isDigit(byte b) {
1679
+ if (b >= 0x30 && b <=0x39) {
1680
+ return true;
1681
+ }
1682
+ return false;
1683
+ }
1684
+
1685
+ boolean isHex(byte b) {
1686
+ return isDigit(b) || (lower(b) >= 0x61 /*a*/ && lower(b) <= 0x66 /*f*/);
1687
+ }
1688
+
1689
+ boolean isAtoZ(byte b) {
1690
+ byte c = lower(b);
1691
+ return (c>= 0x61 /*a*/ && c <= 0x7a /*z*/);
1692
+ }
1693
+
1694
+
1695
+ byte lower (byte b) {
1696
+ return (byte)(b|0x20);
1697
+ }
1698
+
1699
+ byte upper(byte b) {
1700
+ char c = (char)(b);
1701
+ return (byte)Character.toUpperCase(c);
1702
+ }
1703
+
1704
+ byte token(byte b) {
1705
+ if(!strict){
1706
+ return (b == (byte)' ') ? (byte)' ' : (byte)tokens[b] ;
1707
+ }else{
1708
+ return (byte)tokens[b];
1709
+ }
1710
+ }
1711
+
1712
+ boolean isHostChar(byte ch){
1713
+ if(!strict){
1714
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch || UNDER == ch ;
1715
+ }else{
1716
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch;
1717
+ }
1718
+ }
1719
+
1720
+ boolean isNormalUrlChar(int chi) {
1721
+ if(!strict){
1722
+ return (chi > 0x80) || normal_url_char[chi];
1723
+ }else{
1724
+ return normal_url_char[chi];
1725
+ }
1726
+ }
1727
+
1728
+ HTTPMethod start_req_method_assign(byte c){
1729
+ switch (c) {
1730
+ case C: return HTTPMethod.HTTP_CONNECT; /* or COPY, CHECKOUT */
1731
+ case D: return HTTPMethod.HTTP_DELETE;
1732
+ case G: return HTTPMethod.HTTP_GET;
1733
+ case H: return HTTPMethod.HTTP_HEAD;
1734
+ case L: return HTTPMethod.HTTP_LINK; /* or LOCK */
1735
+ case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */
1736
+ case N: return HTTPMethod.HTTP_NOTIFY;
1737
+ case O: return HTTPMethod.HTTP_OPTIONS;
1738
+ case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1739
+ case R: return HTTPMethod.HTTP_REPORT;
1740
+ case S: return HTTPMethod.HTTP_SUBSCRIBE;
1741
+ case T: return HTTPMethod.HTTP_TRACE;
1742
+ case U: return HTTPMethod.HTTP_UNLINK; /* or UNSUBSCRIBE, UNLOCK */
1743
+ }
1744
+ return null; // ugh.
1745
+ }
1746
+
1747
+ boolean header_almost_done(byte ch) {
1748
+ if (strict && LF != ch) {
1749
+ return false;
1750
+ }
1751
+
1752
+ state = State.header_value_lws;
1753
+ // TODO java enums support some sort of bitflag mechanism !?
1754
+ switch (header_state) {
1755
+ case connection_keep_alive:
1756
+ flags |= F_CONNECTION_KEEP_ALIVE;
1757
+ break;
1758
+ case connection_close:
1759
+ flags |= F_CONNECTION_CLOSE;
1760
+ break;
1761
+ case transfer_encoding_chunked:
1762
+ flags |= F_CHUNKED;
1763
+ break;
1764
+ default:
1765
+ break;
1766
+ }
1767
+ return true;
1768
+ }
1769
+
1770
+ // boolean headers_almost_done (byte ch, ParserSettings settings) {
1771
+ // } // headers_almost_done
1772
+
1773
+
1774
+ final int min (int a, int b) {
1775
+ return a < b ? a : b;
1776
+ }
1777
+
1778
+ final int min (int a, long b) {
1779
+ return a < b ? a : (int)b;
1780
+ }
1781
+
1782
+ /* probably not the best place to hide this ... */
1783
+ public boolean HTTP_PARSER_STRICT;
1784
+ State new_message() {
1785
+ if (HTTP_PARSER_STRICT){
1786
+ return http_should_keep_alive() ? start_state() : State.dead;
1787
+ } else {
1788
+ return start_state();
1789
+ }
1790
+
1791
+ }
1792
+
1793
+ State start_state() {
1794
+ return type == ParserType.HTTP_REQUEST ? State.start_req : State.start_res;
1795
+ }
1796
+
1797
+
1798
+ boolean parsing_header(State state) {
1799
+
1800
+ switch (state) {
1801
+ case chunk_data :
1802
+ case chunk_data_almost_done :
1803
+ case chunk_data_done :
1804
+ case body_identity :
1805
+ case body_identity_eof :
1806
+ case message_done :
1807
+ return false;
1808
+
1809
+ }
1810
+ return true;
1811
+ }
1812
+
1813
+ /* "Dial C for Constants" */
1814
+ static class C {
1815
+ static final int HTTP_MAX_HEADER_SIZE = 80 * 1024;
1816
+
1817
+ static final int F_CHUNKED = 1 << 0;
1818
+ static final int F_CONNECTION_KEEP_ALIVE = 1 << 1;
1819
+ static final int F_CONNECTION_CLOSE = 1 << 2;
1820
+ static final int F_TRAILING = 1 << 3;
1821
+ static final int F_UPGRADE = 1 << 4;
1822
+ static final int F_SKIPBODY = 1 << 5;
1823
+
1824
+ static final byte [] UPCASE = {
1825
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1826
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1827
+ 0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
1828
+ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 0x38,0x39,0x00,0x00,0x00,0x00,0x00,0x00,
1829
+ 0x00,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
1830
+ 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 0x58,0x59,0x5a,0x00,0x00,0x00,0x00,0x5f,
1831
+ 0x00,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
1832
+ 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 0x58,0x59,0x5a,0x00,0x00,0x00,0x00,0x00,
1833
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1834
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1835
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1836
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1837
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1838
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1839
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1840
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1841
+ };
1842
+ static final byte [] CONNECTION = {
1843
+ 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1844
+ };
1845
+ static final byte [] PROXY_CONNECTION = {
1846
+ 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1847
+ };
1848
+ static final byte [] CONTENT_LENGTH = {
1849
+ 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1850
+ };
1851
+ static final byte [] TRANSFER_ENCODING = {
1852
+ 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1853
+ };
1854
+ static final byte [] UPGRADE = {
1855
+ 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1856
+ };
1857
+ static final byte [] CHUNKED = {
1858
+ 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1859
+ };
1860
+ static final byte [] KEEP_ALIVE = {
1861
+ 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1862
+ };
1863
+ static final byte [] CLOSE = {
1864
+ 0x43, 0x4c, 0x4f, 0x53, 0x45,
1865
+ };
1866
+
1867
+ /* Tokens as defined by rfc 2616. Also lowercases them.
1868
+ * token = 1*<any CHAR except CTLs or separators>
1869
+ * separators = "(" | ")" | "<" | ">" | "@"
1870
+ * | "," | ";" | ":" | "\" | <">
1871
+ * | "/" | "[" | "]" | "?" | "="
1872
+ * | "{" | "}" | SP | HT
1873
+ */
1874
+
1875
+ static final char [] tokens = {
1876
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
1877
+ 0, 0, 0, 0, 0, 0, 0, 0,
1878
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
1879
+ 0, 0, 0, 0, 0, 0, 0, 0,
1880
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
1881
+ 0, 0, 0, 0, 0, 0, 0, 0,
1882
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1883
+ 0, 0, 0, 0, 0, 0, 0, 0,
1884
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1885
+ 0, '!', 0, '#', '$', '%', '&', '\'',
1886
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1887
+ 0, 0, '*', '+', 0, '-', '.', 0 ,
1888
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1889
+ '0', '1', '2', '3', '4', '5', '6', '7',
1890
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
1891
+ '8', '9', 0, 0, 0, 0, 0, 0,
1892
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1893
+ 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1894
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
1895
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1896
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
1897
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1898
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
1899
+ 'X', 'Y', 'Z', 0, 0, 0, 0, '_',
1900
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
1901
+ 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1902
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
1903
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1904
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1905
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1906
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1907
+ 'X', 'Y', 'Z', 0, '|', 0, '~', 0,
1908
+ /* hi bit set, not ascii */
1909
+ 0, 0, 0, 0, 0, 0, 0, 0,
1910
+ 0, 0, 0, 0, 0, 0, 0, 0,
1911
+ 0, 0, 0, 0, 0, 0, 0, 0,
1912
+ 0, 0, 0, 0, 0, 0, 0, 0,
1913
+ 0, 0, 0, 0, 0, 0, 0, 0,
1914
+ 0, 0, 0, 0, 0, 0, 0, 0,
1915
+ 0, 0, 0, 0, 0, 0, 0, 0,
1916
+ 0, 0, 0, 0, 0, 0, 0, 0,
1917
+ 0, 0, 0, 0, 0, 0, 0, 0,
1918
+ 0, 0, 0, 0, 0, 0, 0, 0,
1919
+ 0, 0, 0, 0, 0, 0, 0, 0,
1920
+ 0, 0, 0, 0, 0, 0, 0, 0,
1921
+ 0, 0, 0, 0, 0, 0, 0, 0,
1922
+ 0, 0, 0, 0, 0, 0, 0, 0,
1923
+ 0, 0, 0, 0, 0, 0, 0, 0,
1924
+ 0, 0, 0, 0, 0, 0, 0, 0, };
1925
+
1926
+ static final byte [] UNHEX =
1927
+ { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1928
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1929
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1930
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
1931
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
1932
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1933
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
1934
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1935
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1936
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1937
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1938
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1939
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1940
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1941
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1942
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1943
+ };
1944
+
1945
+ static final boolean [] normal_url_char = {
1946
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
1947
+ false, false, false, false, false, false, false, false,
1948
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
1949
+ false, false, false, false, false, false, false, false,
1950
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
1951
+ false, false, false, false, false, false, false, false,
1952
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1953
+ false, false, false, false, false, false, false, false,
1954
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1955
+ false, true, true, false, true, true, true, true,
1956
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1957
+ true, true, true, true, true, true, true, true,
1958
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1959
+ true, true, true, true, true, true, true, true,
1960
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
1961
+ true, true, true, true, true, true, true, false,
1962
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1963
+ true, true, true, true, true, true, true, true,
1964
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
1965
+ true, true, true, true, true, true, true, true,
1966
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
1967
+ true, true, true, true, true, true, true, true,
1968
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
1969
+ true, true, true, true, true, true, true, true,
1970
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
1971
+ true, true, true, true, true, true, true, true,
1972
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
1973
+ true, true, true, true, true, true, true, true,
1974
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1975
+ true, true, true, true, true, true, true, true,
1976
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1977
+ true, true, true, true, true, true, true, false,
1978
+
1979
+ /* hi bit set, not ascii */
1980
+ /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
1981
+ * encoded paths. This is out of spec, but clients generate this and most other
1982
+ * HTTP servers support it. We should, too. */
1983
+
1984
+ true, true, true, true, true, true, true, true,
1985
+ true, true, true, true, true, true, true, true,
1986
+ true, true, true, true, true, true, true, true,
1987
+ true, true, true, true, true, true, true, true,
1988
+ true, true, true, true, true, true, true, true,
1989
+ true, true, true, true, true, true, true, true,
1990
+ true, true, true, true, true, true, true, true,
1991
+ true, true, true, true, true, true, true, true,
1992
+ true, true, true, true, true, true, true, true,
1993
+ true, true, true, true, true, true, true, true,
1994
+ true, true, true, true, true, true, true, true,
1995
+ true, true, true, true, true, true, true, true,
1996
+ true, true, true, true, true, true, true, true,
1997
+ true, true, true, true, true, true, true, true,
1998
+ true, true, true, true, true, true, true, true,
1999
+ true, true, true, true, true, true, true, true,
2000
+
2001
+ };
2002
+
2003
+ public static final byte A = 0x41;
2004
+ public static final byte B = 0x42;
2005
+ public static final byte C = 0x43;
2006
+ public static final byte D = 0x44;
2007
+ public static final byte E = 0x45;
2008
+ public static final byte F = 0x46;
2009
+ public static final byte G = 0x47;
2010
+ public static final byte H = 0x48;
2011
+ public static final byte I = 0x49;
2012
+ public static final byte J = 0x4a;
2013
+ public static final byte K = 0x4b;
2014
+ public static final byte L = 0x4c;
2015
+ public static final byte M = 0x4d;
2016
+ public static final byte N = 0x4e;
2017
+ public static final byte O = 0x4f;
2018
+ public static final byte P = 0x50;
2019
+ public static final byte Q = 0x51;
2020
+ public static final byte R = 0x52;
2021
+ public static final byte S = 0x53;
2022
+ public static final byte T = 0x54;
2023
+ public static final byte U = 0x55;
2024
+ public static final byte V = 0x56;
2025
+ public static final byte W = 0x57;
2026
+ public static final byte X = 0x58;
2027
+ public static final byte Y = 0x59;
2028
+ public static final byte Z = 0x5a;
2029
+ public static final byte UNDER = 0x5f;
2030
+ public static final byte CR = 0x0d;
2031
+ public static final byte LF = 0x0a;
2032
+ public static final byte DOT = 0x2e;
2033
+ public static final byte SPACE = 0x20;
2034
+ public static final byte TAB = 0x09;
2035
+ public static final byte SEMI = 0x3b;
2036
+ public static final byte COLON = 0x3a;
2037
+ public static final byte HASH = 0x23;
2038
+ public static final byte QMARK = 0x3f;
2039
+ public static final byte SLASH = 0x2f;
2040
+ public static final byte DASH = 0x2d;
2041
+ public static final byte STAR = 0x2a;
2042
+ public static final byte NULL = 0x00;
2043
+ }
2044
+
2045
+ enum State {
2046
+
2047
+ dead
2048
+
2049
+ , start_req_or_res
2050
+ , res_or_resp_H
2051
+ , start_res
2052
+ , res_H
2053
+ , res_HT
2054
+ , res_HTT
2055
+ , res_HTTP
2056
+ , res_first_http_major
2057
+ , res_http_major
2058
+ , res_first_http_minor
2059
+ , res_http_minor
2060
+ , res_first_status_code
2061
+ , res_status_code
2062
+ , res_status
2063
+ , res_line_almost_done
2064
+
2065
+ , start_req
2066
+
2067
+ , req_method
2068
+ , req_spaces_before_url
2069
+ , req_schema
2070
+ , req_schema_slash
2071
+ , req_schema_slash_slash
2072
+ , req_host_start
2073
+ , req_host_v6_start
2074
+ , req_host_v6
2075
+ , req_host_v6_end
2076
+ , req_host
2077
+ , req_port_start
2078
+ , req_port
2079
+ , req_path
2080
+ , req_query_string_start
2081
+ , req_query_string
2082
+ , req_fragment_start
2083
+ , req_fragment
2084
+ , req_http_start
2085
+ , req_http_H
2086
+ , req_http_HT
2087
+ , req_http_HTT
2088
+ , req_http_HTTP
2089
+ , req_first_http_major
2090
+ , req_http_major
2091
+ , req_first_http_minor
2092
+ , req_http_minor
2093
+ , req_line_almost_done
2094
+
2095
+ , header_field_start
2096
+ , header_field
2097
+ , header_value_start
2098
+ , header_value
2099
+ , header_value_lws
2100
+
2101
+ , header_almost_done
2102
+
2103
+ , chunk_size_start
2104
+ , chunk_size
2105
+ , chunk_parameters
2106
+ , chunk_size_almost_done
2107
+
2108
+ , headers_almost_done
2109
+ , headers_done
2110
+ // This space intentionally not left blank, comment from c, for orientation...
2111
+ // the c version uses <= s_header_almost_done in java, we list the states explicitly
2112
+ // in `parsing_header()`
2113
+ /* Important: 's_headers_done' must be the last 'header' state. All
2114
+ * states beyond this must be 'body' states. It is used for overflow
2115
+ * checking. See the PARSING_HEADER() macro.
2116
+ */
2117
+ , chunk_data
2118
+ , chunk_data_almost_done
2119
+ , chunk_data_done
2120
+
2121
+ , body_identity
2122
+ , body_identity_eof
2123
+ , message_done
2124
+
2125
+ }
2126
+ enum HState {
2127
+ general
2128
+ , C
2129
+ , CO
2130
+ , CON
2131
+
2132
+ , matching_connection
2133
+ , matching_proxy_connection
2134
+ , matching_content_length
2135
+ , matching_transfer_encoding
2136
+ , matching_upgrade
2137
+
2138
+ , connection
2139
+ , content_length
2140
+ , transfer_encoding
2141
+ , upgrade
2142
+
2143
+ , matching_transfer_encoding_chunked
2144
+ , matching_connection_keep_alive
2145
+ , matching_connection_close
2146
+
2147
+ , transfer_encoding_chunked
2148
+ , connection_keep_alive
2149
+ , connection_close
2150
+ }
2151
+ public enum UrlFields {
2152
+ UF_SCHEMA(0)
2153
+ , UF_HOST(1)
2154
+ , UF_PORT(2)
2155
+ , UF_PATH(3)
2156
+ , UF_QUERY(4)
2157
+ , UF_FRAGMENT(5)
2158
+ , UF_MAX(6);
2159
+
2160
+
2161
+ private final int index;
2162
+
2163
+ private UrlFields(int index) {
2164
+ this.index = index;
2165
+ }
2166
+ public int getIndex() {
2167
+ return index;
2168
+ }
2169
+
2170
+ }
2171
+ }