midori_http_parser 0.6.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +12 -0
  3. data/.gitmodules +6 -0
  4. data/.travis.yml +33 -0
  5. data/Gemfile +2 -0
  6. data/LICENSE-MIT +20 -0
  7. data/README.md +90 -0
  8. data/Rakefile +6 -0
  9. data/bench/standalone.rb +23 -0
  10. data/bench/thin.rb +58 -0
  11. data/ext/ruby_http_parser/.gitignore +1 -0
  12. data/ext/ruby_http_parser/RubyHttpParserService.java +18 -0
  13. data/ext/ruby_http_parser/ext_help.h +18 -0
  14. data/ext/ruby_http_parser/extconf.rb +24 -0
  15. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +495 -0
  16. data/ext/ruby_http_parser/ruby_http_parser.c +516 -0
  17. data/ext/ruby_http_parser/vendor/.gitkeep +0 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +48 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +183 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +28 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/build.xml +74 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +2175 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +304 -0
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPCallback.java +8 -0
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPDataCallback.java +34 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPErrorCallback.java +12 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPException.java +9 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +113 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParser.java +36 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +256 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserType.java +13 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +111 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPCallback.java +5 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPDataCallback.java +25 -0
  40. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPErrorCallback.java +7 -0
  41. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +2171 -0
  42. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +83 -0
  43. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +374 -0
  44. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  45. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +69 -0
  46. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +52 -0
  47. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +16 -0
  48. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +48 -0
  49. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +212 -0
  50. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +62 -0
  51. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +117 -0
  52. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +27 -0
  53. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  54. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +236 -0
  55. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +59 -0
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +3425 -0
  57. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +845 -0
  58. data/ext/ruby_http_parser/vendor/http-parser-java/tests.utf8 +17 -0
  59. data/ext/ruby_http_parser/vendor/http-parser-java/tools/byte_constants.rb +6 -0
  60. data/ext/ruby_http_parser/vendor/http-parser-java/tools/const_char.rb +13 -0
  61. data/ext/ruby_http_parser/vendor/http-parser-java/tools/lowcase.rb +15 -0
  62. data/ext/ruby_http_parser/vendor/http-parser-java/tools/parse_tests.rb +33 -0
  63. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +68 -0
  64. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +23 -0
  65. data/ext/ruby_http_parser/vendor/http-parser/README.md +246 -0
  66. data/ext/ruby_http_parser/vendor/http-parser/bench.c +111 -0
  67. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +160 -0
  68. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  69. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +2470 -0
  70. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +111 -0
  71. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +432 -0
  72. data/ext/ruby_http_parser/vendor/http-parser/test.c +4226 -0
  73. data/ext/ruby_http_parser/vendor/http-parser/test_fast +0 -0
  74. data/ext/ruby_http_parser/vendor/http-parser/test_g +0 -0
  75. data/lib/http/parser.rb +1 -0
  76. data/lib/http_parser.rb +21 -0
  77. data/midori_http_parser.gemspec +24 -0
  78. data/spec/parser_spec.rb +376 -0
  79. data/spec/spec_helper.rb +1 -0
  80. data/spec/support/requests.json +631 -0
  81. data/spec/support/responses.json +375 -0
  82. data/tasks/compile.rake +42 -0
  83. data/tasks/fixtures.rake +71 -0
  84. data/tasks/spec.rake +5 -0
  85. data/tasks/submodules.rake +7 -0
  86. metadata +206 -0
@@ -0,0 +1,13 @@
1
+ package http_parser;
2
+
3
+ public enum ParserType {
4
+ HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH;
5
+
6
+ public static ParserType parse(String s) {
7
+ if ("HTTP_REQUEST".equalsIgnoreCase(s)) { return HTTP_REQUEST; }
8
+ else if ("HTTP_RESPONSE".equalsIgnoreCase(s)) { return HTTP_RESPONSE; }
9
+ else if ("HTTP_BOTH".equalsIgnoreCase(s)) { return HTTP_BOTH; }
10
+ else { return null; }
11
+ }
12
+ }
13
+
@@ -0,0 +1,111 @@
1
+ package http_parser;
2
+
3
+ import java.nio.ByteBuffer;
4
+
5
+ public class Util {
6
+ // public static String toString(http_parser.lolevel.HTTPParser p) {
7
+ // StringBuilder builder = new StringBuilder();
8
+ //
9
+ // // the stuff up to the break is ephermeral and only meaningful
10
+ // // while the parser is parsing. In general, this method is
11
+ // // probably only useful during debugging.
12
+ //
13
+ // builder.append("state :"); builder.append(p.state); builder.append("\n");
14
+ // builder.append("header_state :"); builder.append(p.header_state); builder.append("\n");
15
+ // builder.append("strict :"); builder.append(p.strict); builder.append("\n");
16
+ // builder.append("index :"); builder.append(p.index); builder.append("\n");
17
+ // builder.append("flags :"); builder.append(p.flags); builder.append("\n");
18
+ // builder.append("nread :"); builder.append(p.nread); builder.append("\n");
19
+ // builder.append("content_length :"); builder.append(p.content_length); builder.append("\n");
20
+ //
21
+ //
22
+ // builder.append("type :"); builder.append(p.type); builder.append("\n");
23
+ // builder.append("http_major :"); builder.append(p.http_major); builder.append("\n");
24
+ // builder.append("http_minor :"); builder.append(p.http_minor); builder.append("\n");
25
+ // builder.append("status_code :"); builder.append(p.status_code); builder.append("\n");
26
+ // builder.append("method :"); builder.append(p.method); builder.append("\n");
27
+ // builder.append("upgrade :"); builder.append(p.upgrade); builder.append("\n");
28
+ //
29
+ // return builder.toString();
30
+ //
31
+ // }
32
+
33
+ public static String error (String mes, ByteBuffer b, int beginning) {
34
+ // the error message should look like this:
35
+ //
36
+ // Bla expected something, but it's not there (mes)
37
+ // GEt / HTTP 1_1
38
+ // ............^.
39
+ //
40
+ // |----------------- 72 -------------------------|
41
+
42
+ // This is ridiculously complicated and probably riddled with
43
+ // off-by-one errors, should be moved into high level interface.
44
+ // TODO.
45
+
46
+ // also: need to keep track of the initial buffer position in
47
+ // execute so that we don't screw up any `mark()` that may have
48
+ // been set outside of our control to be nice.
49
+
50
+ final int mes_width = 72;
51
+ int p = b.position(); // error position
52
+ int end = b.limit(); // this is the end
53
+ int m = end - beginning; // max mes length
54
+
55
+ StringBuilder builder = new StringBuilder();
56
+ int p_adj = p;
57
+
58
+ byte [] orig = new byte[0];
59
+ if (m <= mes_width) {
60
+ orig = new byte[m];
61
+ b.position(beginning);
62
+ b.get(orig, 0, m);
63
+ p_adj = p-beginning;
64
+
65
+
66
+ } else {
67
+ // we'll need to trim bit off the beginning and/or end
68
+ orig = new byte[mes_width];
69
+ // three possibilities:
70
+ // a.) plenty of stuff around p
71
+ // b.) plenty of stuff in front of p
72
+ // c.) plenty of stuff behind p
73
+ // CAN'T be not enough stuff aorund p in total, because
74
+ // m>meswidth (see if to this else)
75
+
76
+ int before = p-beginning;
77
+ int after = end - p;
78
+ if ( (before > mes_width/2) && (after > mes_width/2)) {
79
+ // plenty of stuff in front of and behind error
80
+ p_adj = mes_width/2;
81
+ b.position(p - mes_width/2);
82
+ b.get(orig, 0, mes_width);
83
+ } else if (before <= mes_width/2) {
84
+ // take all of the begining.
85
+ b.position(beginning);
86
+ // and as much of the rest as possible
87
+
88
+ b.get(orig, 0, mes_width);
89
+
90
+ } else {
91
+ // plenty of stuff before
92
+ before = end-mes_width;
93
+ b.position(before);
94
+ p_adj = p - before;
95
+ b.get(orig, 0, mes_width);
96
+ }
97
+ }
98
+
99
+ builder.append(new String(orig));
100
+ builder.append("\n");
101
+ for (int i = 0; i!= p_adj; ++i) {
102
+ builder.append(".");
103
+ }
104
+ builder.append("^");
105
+
106
+
107
+ b.position(p); // restore position
108
+ return builder.toString();
109
+
110
+ }
111
+ }
@@ -0,0 +1,5 @@
1
+ package http_parser.lolevel;
2
+
3
+ public interface HTTPCallback {
4
+ public int cb (HTTPParser parser);
5
+ }
@@ -0,0 +1,25 @@
1
+ package http_parser.lolevel;
2
+
3
+ import java.nio.ByteBuffer;
4
+
5
+ public interface HTTPDataCallback {
6
+ /*
7
+ very raw and extremly foolhardy! DANGER!
8
+ The whole Buffer concept is difficult enough to grasp as it is,
9
+ we pass in a buffer with an arbitrary position.
10
+
11
+ The interesting data is located at position pos and is len
12
+ bytes long.
13
+
14
+ The contract of this callback is that the buffer is
15
+ returned in the state that it was passed in, so implementing
16
+ this require good citizenship, you'll need to remember the current
17
+ position, change the position to get at the data you're interested
18
+ in and then set the position back to how you found it...
19
+
20
+ //TODO: there should be an abstract implementation that implements
21
+ cb as described above, marks it final an provides a new callback
22
+ with signature cb(byte[], int, int)
23
+ */
24
+ public int cb(HTTPParser p, ByteBuffer buf, int pos, int len);
25
+ }
@@ -0,0 +1,7 @@
1
+ package http_parser.lolevel;
2
+
3
+ import java.nio.ByteBuffer;
4
+
5
+ public interface HTTPErrorCallback {
6
+ public void cb (HTTPParser parser, String mes, ByteBuffer buf, int initial_position);
7
+ }
@@ -0,0 +1,2171 @@
1
+ package http_parser.lolevel;
2
+
3
+ import java.nio.ByteBuffer;
4
+ import http_parser.HTTPException;
5
+ import http_parser.HTTPMethod;
6
+ import http_parser.HTTPParserUrl;
7
+ import http_parser.ParserType;
8
+ import static http_parser.lolevel.HTTPParser.C.*;
9
+ import static http_parser.lolevel.HTTPParser.State.*;
10
+
11
+ public class HTTPParser {
12
+ /* lots of unsigned chars here, not sure what
13
+ to about them, `bytes` in java suck... */
14
+
15
+ ParserType type;
16
+ State state;
17
+ HState header_state;
18
+ boolean strict;
19
+
20
+ int index;
21
+ int flags; // TODO
22
+
23
+ int nread;
24
+ long content_length;
25
+
26
+ int p_start; // updated each call to execute to indicate where the buffer was before we began calling it.
27
+
28
+ /** READ-ONLY **/
29
+ public int http_major;
30
+ public int http_minor;
31
+ public int status_code; /* responses only */
32
+ public HTTPMethod method; /* requests only */
33
+
34
+ /* true = Upgrade header was present and the parser has exited because of that.
35
+ * false = No upgrade header present.
36
+ * Should be checked when http_parser_execute() returns in addition to
37
+ * error checking.
38
+ */
39
+ public boolean upgrade;
40
+
41
+ /** PUBLIC **/
42
+ // TODO : this is used in c to maintain application state.
43
+ // is this even necessary? we have state in java ?
44
+ // consider
45
+ // Object data; /* A pointer to get hook to the "connection" or "socket" object */
46
+
47
+
48
+ /*
49
+ * technically we could combine all of these (except for url_mark) into one
50
+ * variable, saving stack space, but it seems more clear to have them
51
+ * separated.
52
+ */
53
+ int header_field_mark = -1;
54
+ int header_value_mark = -1;
55
+ int url_mark = -1;
56
+ int body_mark = -1;
57
+
58
+ /**
59
+ * Construct a Parser for ParserType.HTTP_BOTH, meaning it
60
+ * determines whether it's parsing a request or a response.
61
+ */
62
+ public HTTPParser() {
63
+ this(ParserType.HTTP_BOTH);
64
+ }
65
+
66
+ /**
67
+ * Construct a Parser and initialise it to parse either
68
+ * requests or responses.
69
+ */
70
+ public HTTPParser(ParserType type) {
71
+ this.type = type;
72
+ switch(type) {
73
+ case HTTP_REQUEST:
74
+ this.state = State.start_req;
75
+ break;
76
+ case HTTP_RESPONSE:
77
+ this.state = State.start_res;
78
+ break;
79
+ case HTTP_BOTH:
80
+ this.state = State.start_req_or_res;
81
+ break;
82
+ default:
83
+ throw new HTTPException("can't happen, invalid ParserType enum");
84
+ }
85
+ }
86
+
87
+ /*
88
+ * Utility to facilitate System.out.println style debugging (the way god intended)
89
+ */
90
+ static void p(Object o) {System.out.println(o);}
91
+
92
+ /** Comment from C version follows
93
+ *
94
+ * Our URL parser.
95
+ *
96
+ * This is designed to be shared by http_parser_execute() for URL validation,
97
+ * hence it has a state transition + byte-for-byte interface. In addition, it
98
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
99
+ * work of turning state transitions URL components for its API.
100
+ *
101
+ * This function should only be invoked with non-space characters. It is
102
+ * assumed that the caller cares about (and can detect) the transition between
103
+ * URL and non-URL states by looking for these.
104
+ */
105
+ public State parse_url_char(byte ch) {
106
+
107
+ int chi = ch & 0xff; // utility, ch without signedness for table lookups.
108
+
109
+ if(SPACE == ch){
110
+ throw new HTTPException("space as url char");
111
+ }
112
+
113
+ switch(state) {
114
+ case req_spaces_before_url:
115
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
116
+ * All methods except CONNECT are followed by '/' or '*'.
117
+ */
118
+ if(SLASH == ch || STAR == ch){
119
+ return req_path;
120
+ }
121
+ if(isAtoZ(ch)){
122
+ return req_schema;
123
+ }
124
+ break;
125
+ case req_schema:
126
+ if(isAtoZ(ch)){
127
+ return req_schema;
128
+ }
129
+ if(COLON == ch){
130
+ return req_schema_slash;
131
+ }
132
+ break;
133
+ case req_schema_slash:
134
+ if(SLASH == ch){
135
+ return req_schema_slash_slash;
136
+ }
137
+ break;
138
+ case req_schema_slash_slash:
139
+ if(SLASH == ch){
140
+ return req_host_start;
141
+ }
142
+ break;
143
+ case req_host_start:
144
+ if (ch == (byte)'[') {
145
+ return req_host_v6_start;
146
+ }
147
+ if (isHostChar(ch)) {
148
+ return req_host;
149
+ }
150
+ break;
151
+
152
+ case req_host:
153
+ if (isHostChar(ch)) {
154
+ return req_host;
155
+ }
156
+
157
+ /* FALLTHROUGH */
158
+ case req_host_v6_end:
159
+ switch (ch) {
160
+ case ':':
161
+ return req_port_start;
162
+ case '/':
163
+ return req_path;
164
+ case '?':
165
+ return req_query_string_start;
166
+ }
167
+ break;
168
+
169
+ case req_host_v6:
170
+ if (ch == ']') {
171
+ return req_host_v6_end;
172
+ }
173
+
174
+ /* FALLTHROUGH */
175
+ case req_host_v6_start:
176
+ if (isHex(ch) || ch == ':') {
177
+ return req_host_v6;
178
+ }
179
+ break;
180
+
181
+ case req_port:
182
+ switch (ch) {
183
+ case '/':
184
+ return req_path;
185
+ case '?':
186
+ return req_query_string_start;
187
+ }
188
+
189
+ /* FALLTHROUGH */
190
+ case req_port_start:
191
+ if (isDigit(ch)) {
192
+ return req_port;
193
+ }
194
+ break;
195
+
196
+ case req_path:
197
+ if (isNormalUrlChar(chi)) {
198
+ return req_path;
199
+ }
200
+ switch (ch) {
201
+ case '?':
202
+ return req_query_string_start;
203
+ case '#':
204
+ return req_fragment_start;
205
+ }
206
+
207
+ break;
208
+
209
+ case req_query_string_start:
210
+ case req_query_string:
211
+ if (isNormalUrlChar(chi)) {
212
+ return req_query_string;
213
+ }
214
+
215
+ switch (ch) {
216
+ case '?':
217
+ /* allow extra '?' in query string */
218
+ return req_query_string;
219
+
220
+ case '#':
221
+ return req_fragment_start;
222
+ }
223
+
224
+ break;
225
+
226
+ case req_fragment_start:
227
+ if (isNormalUrlChar(chi)) {
228
+ return req_fragment;
229
+ }
230
+ switch (ch) {
231
+ case '?':
232
+ return req_fragment;
233
+
234
+ case '#':
235
+ return req_fragment_start;
236
+ }
237
+ break;
238
+
239
+ case req_fragment:
240
+ if (isNormalUrlChar(ch)) {
241
+ return req_fragment;
242
+ }
243
+
244
+ switch (ch) {
245
+ case '?':
246
+ case '#':
247
+ return req_fragment;
248
+ }
249
+
250
+ break;
251
+ default:
252
+ break;
253
+ }
254
+
255
+ /* We should never fall out of the switch above unless there's an error */
256
+ return dead;
257
+ }
258
+
259
+ /** Execute the parser with the currently available data contained in
260
+ * the buffer. The buffers position() and limit() need to be set
261
+ * correctly (obviously) and a will be updated approriately when the
262
+ * method returns to reflect the consumed data.
263
+ */
264
+ public int execute(ParserSettings settings, ByteBuffer data) {
265
+
266
+ int p = data.position();
267
+ this.p_start = p; // this is used for pretty printing errors.
268
+ // and returning the amount of processed bytes.
269
+
270
+
271
+ // In case the headers don't provide information about the content
272
+ // length, `execute` needs to be called with an empty buffer to
273
+ // indicate that all the data has been send be the client/server,
274
+ // else there is no way of knowing the message is complete.
275
+ int len = (data.limit() - data.position());
276
+ if (0 == len) {
277
+ // if (State.body_identity_eof == state) {
278
+ // settings.call_on_message_complete(this);
279
+ // }
280
+ switch (state) {
281
+ case body_identity_eof:
282
+ settings.call_on_message_complete(this);
283
+ return data.position() - this.p_start;
284
+
285
+ case dead:
286
+ case start_req_or_res:
287
+ case start_res:
288
+ case start_req:
289
+ return data.position() - this.p_start;
290
+
291
+ default:
292
+ // should we really consider this an error!?
293
+ throw new HTTPException("empty bytes! "+state); // error
294
+ }
295
+ }
296
+
297
+
298
+ // in case the _previous_ call to the parser only has data to get to
299
+ // the middle of certain fields, we need to update marks to point at
300
+ // the beginning of the current buffer.
301
+ switch (state) {
302
+ case header_field:
303
+ header_field_mark = p;
304
+ break;
305
+ case header_value:
306
+ header_value_mark = p;
307
+ break;
308
+ case req_path:
309
+ case req_schema:
310
+ case req_schema_slash:
311
+ case req_schema_slash_slash:
312
+ case req_host_start:
313
+ case req_host_v6_start:
314
+ case req_host_v6:
315
+ case req_host_v6_end:
316
+ case req_host:
317
+ case req_port_start:
318
+ case req_port:
319
+ case req_query_string_start:
320
+ case req_query_string:
321
+ case req_fragment_start:
322
+ case req_fragment:
323
+ url_mark = p;
324
+ break;
325
+ }
326
+ boolean reexecute = false;
327
+ int pe = 0;
328
+ byte ch = 0;
329
+ int chi = 0;
330
+ byte c = -1;
331
+ int to_read = 0;
332
+
333
+ // this is where the work gets done, traverse the available data...
334
+ while (data.position() != data.limit() || reexecute) {
335
+ // p(state + ": r: " + reexecute + " :: " +p );
336
+
337
+ if(!reexecute){
338
+ p = data.position();
339
+ pe = data.limit();
340
+ ch = data.get(); // the current character to process.
341
+ chi = ch & 0xff; // utility, ch without signedness for table lookups.
342
+ c = -1; // utility variably used for up- and downcasing etc.
343
+ to_read = 0; // used to keep track of how much of body, etc. is left to read
344
+
345
+ if (parsing_header(state)) {
346
+ ++nread;
347
+ if (nread > HTTP_MAX_HEADER_SIZE) {
348
+ return error(settings, "possible buffer overflow", data);
349
+ }
350
+ }
351
+ }
352
+ reexecute = false;
353
+ // p(state + " ::: " + ch + " : " + (((CR == ch) || (LF == ch)) ? ch : ("'" + (char)ch + "'")) +": "+p );
354
+
355
+ switch (state) {
356
+ /*
357
+ * this state is used after a 'Connection: close' message
358
+ * the parser will error out if it reads another message
359
+ */
360
+ case dead:
361
+ if (CR == ch || LF == ch){
362
+ break;
363
+ }
364
+ return error(settings, "Connection already closed", data);
365
+
366
+
367
+
368
+ case start_req_or_res:
369
+ if (CR == ch || LF == ch){
370
+ break;
371
+ }
372
+ flags = 0;
373
+ content_length = -1;
374
+
375
+ if (H == ch) {
376
+ state = State.res_or_resp_H;
377
+ } else {
378
+ type = ParserType.HTTP_REQUEST;
379
+ method = start_req_method_assign(ch);
380
+ if (null == method) {
381
+ return error(settings, "invalid method", data);
382
+ }
383
+ index = 1;
384
+ state = State.req_method;
385
+ }
386
+ settings.call_on_message_begin(this);
387
+ break;
388
+
389
+
390
+
391
+ case res_or_resp_H:
392
+ if (T == ch) {
393
+ type = ParserType.HTTP_RESPONSE;
394
+ state = State.res_HT;
395
+ } else {
396
+ if (E != ch) {
397
+ return error(settings, "not E", data);
398
+ }
399
+ type = ParserType.HTTP_REQUEST;
400
+ method = HTTPMethod.HTTP_HEAD;
401
+ index = 2;
402
+ state = State.req_method;
403
+ }
404
+ break;
405
+
406
+
407
+
408
+ case start_res:
409
+ flags = 0;
410
+ content_length = -1;
411
+
412
+ switch(ch) {
413
+ case H:
414
+ state = State.res_H;
415
+ break;
416
+ case CR:
417
+ case LF:
418
+ break;
419
+ default:
420
+ return error(settings, "Not H or CR/LF", data);
421
+ }
422
+
423
+ settings.call_on_message_begin(this);
424
+ break;
425
+
426
+
427
+
428
+ case res_H:
429
+ if (strict && T != ch) {
430
+ return error(settings, "Not T", data);
431
+ }
432
+ state = State.res_HT;
433
+ break;
434
+ case res_HT:
435
+ if (strict && T != ch) {
436
+ return error(settings, "Not T2", data);
437
+ }
438
+ state = State.res_HTT;
439
+ break;
440
+ case res_HTT:
441
+ if (strict && P != ch) {
442
+ return error(settings, "Not P", data);
443
+ }
444
+ state = State.res_HTTP;
445
+ break;
446
+ case res_HTTP:
447
+ if (strict && SLASH != ch) {
448
+ return error(settings, "Not '/'", data);
449
+ }
450
+ state = State.res_first_http_major;
451
+ break;
452
+
453
+
454
+
455
+ case res_first_http_major:
456
+ if (!isDigit(ch)) {
457
+ return error(settings, "Not a digit", data);
458
+ }
459
+ http_major = (int) ch - 0x30;
460
+ state = State.res_http_major;
461
+ break;
462
+
463
+ /* major HTTP version or dot */
464
+ case res_http_major:
465
+ if (DOT == ch) {
466
+ state = State.res_first_http_minor;
467
+ break;
468
+ }
469
+ if (!isDigit(ch)) {
470
+ return error(settings, "Not a digit", data);
471
+ }
472
+ http_major *= 10;
473
+ http_major += (ch - 0x30);
474
+
475
+ if (http_major > 999) {
476
+ return error(settings, "invalid http major version: ", data);
477
+ }
478
+ break;
479
+
480
+ /* first digit of minor HTTP version */
481
+ case res_first_http_minor:
482
+ if (!isDigit(ch)) {
483
+ return error(settings, "Not a digit", data);
484
+ }
485
+ http_minor = (int)ch - 0x30;
486
+ state = State.res_http_minor;
487
+ break;
488
+
489
+ /* minor HTTP version or end of request line */
490
+ case res_http_minor:
491
+ if (SPACE == ch) {
492
+ state = State.res_first_status_code;
493
+ break;
494
+ }
495
+ if (!isDigit(ch)) {
496
+ return error(settings, "Not a digit", data);
497
+ }
498
+ http_minor *= 10;
499
+ http_minor += (ch - 0x30);
500
+ if (http_minor > 999) {
501
+ return error(settings, "invalid http minor version: ", data);
502
+ }
503
+ break;
504
+
505
+
506
+
507
+ case res_first_status_code:
508
+ if (!isDigit(ch)) {
509
+ if (SPACE == ch) {
510
+ break;
511
+ }
512
+ return error(settings, "Not a digit (status code)", data);
513
+ }
514
+ status_code = (int)ch - 0x30;
515
+ state = State.res_status_code;
516
+ break;
517
+
518
+ case res_status_code:
519
+ if (!isDigit(ch)) {
520
+ switch(ch) {
521
+ case SPACE:
522
+ state = State.res_status;
523
+ break;
524
+ case CR:
525
+ state = State.res_line_almost_done;
526
+ break;
527
+ case LF:
528
+ state = State.header_field_start;
529
+ break;
530
+ default:
531
+ return error(settings, "not a valid status code", data);
532
+ }
533
+ break;
534
+ }
535
+ status_code *= 10;
536
+ status_code += (int)ch - 0x30;
537
+ if (status_code > 999) {
538
+ return error(settings, "ridiculous status code:", data);
539
+ }
540
+
541
+ if (status_code > 99) {
542
+ settings.call_on_status_complete(this);
543
+ }
544
+ break;
545
+
546
+ case res_status:
547
+ /* the human readable status. e.g. "NOT FOUND"
548
+ * we are not humans so just ignore this
549
+ * we are not men, we are devo. */
550
+
551
+ if (CR == ch) {
552
+ state = State.res_line_almost_done;
553
+ break;
554
+ }
555
+ if (LF == ch) {
556
+ state = State.header_field_start;
557
+ break;
558
+ }
559
+ break;
560
+
561
+ case res_line_almost_done:
562
+ if (strict && LF != ch) {
563
+ return error(settings, "not LF", data);
564
+ }
565
+ state = State.header_field_start;
566
+ break;
567
+
568
+
569
+
570
+ case start_req:
571
+ if (CR==ch || LF == ch) {
572
+ break;
573
+ }
574
+ flags = 0;
575
+ content_length = -1;
576
+
577
+ if(!isAtoZ(ch)){
578
+ return error(settings, "invalid method", data);
579
+ }
580
+
581
+ method = start_req_method_assign(ch);
582
+ if (null == method) {
583
+ return error(settings, "invalid method", data);
584
+ }
585
+ index = 1;
586
+ state = State.req_method;
587
+
588
+ settings.call_on_message_begin(this);
589
+ break;
590
+
591
+
592
+
593
+ case req_method:
594
+ if (0 == ch) {
595
+ return error(settings, "NULL in method", data);
596
+ }
597
+
598
+ byte [] arr = method.bytes;
599
+
600
+ if (SPACE == ch && index == arr.length) {
601
+ state = State.req_spaces_before_url;
602
+ } else if (arr[index] == ch) {
603
+ // wuhu!
604
+ } else if (HTTPMethod.HTTP_CONNECT == method) {
605
+ if (1 == index && H == ch) {
606
+ method = HTTPMethod.HTTP_CHECKOUT;
607
+ } else if (2 == index && P == ch) {
608
+ method = HTTPMethod.HTTP_COPY;
609
+ }
610
+ } else if (HTTPMethod.HTTP_MKCOL == method) {
611
+ if (1 == index && O == ch) {
612
+ method = HTTPMethod.HTTP_MOVE;
613
+ } else if (1 == index && E == ch) {
614
+ method = HTTPMethod.HTTP_MERGE;
615
+ } else if (1 == index && DASH == ch) { /* M-SEARCH */
616
+ method = HTTPMethod.HTTP_MSEARCH;
617
+ } else if (2 == index && A == ch) {
618
+ method = HTTPMethod.HTTP_MKACTIVITY;
619
+ }
620
+ } else if (1 == index && HTTPMethod.HTTP_POST == method) {
621
+ if(R == ch) {
622
+ method = HTTPMethod.HTTP_PROPFIND; /* or HTTP_PROPPATCH */
623
+ }else if(U == ch){
624
+ method = HTTPMethod.HTTP_PUT; /* or HTTP_PURGE */
625
+ }else if(A == ch){
626
+ method = HTTPMethod.HTTP_PATCH;
627
+ }
628
+ } else if (1 == index) {
629
+ if (HTTPMethod.HTTP_LINK == method) {
630
+ if (O == ch) {
631
+ method = HTTPMethod.HTTP_LOCK;
632
+ }
633
+ }
634
+ } else if (2 == index) {
635
+ if (HTTPMethod.HTTP_PUT == method) {
636
+ if (R == ch) {
637
+ method = HTTPMethod.HTTP_PURGE;
638
+ }
639
+ }
640
+ } else if (3 == index) {
641
+ if (HTTPMethod.HTTP_UNLINK == method) {
642
+ if (U == ch) {
643
+ method = HTTPMethod.HTTP_UNSUBSCRIBE;
644
+ } else if (O == ch) {
645
+ method = HTTPMethod.HTTP_UNLOCK;
646
+ }
647
+ }
648
+ } else if(4 == index && HTTPMethod.HTTP_PROPFIND == method && P == ch){
649
+ method = HTTPMethod.HTTP_PROPPATCH;
650
+ } else {
651
+ return error(settings, "Invalid HTTP method", data);
652
+ }
653
+
654
+ ++index;
655
+ break;
656
+
657
+
658
+
659
+ /******************* URL *******************/
660
+ case req_spaces_before_url:
661
+ if (SPACE == ch) {
662
+ break;
663
+ }
664
+ url_mark = p;
665
+ if(HTTPMethod.HTTP_CONNECT == method){
666
+ state = req_host_start;
667
+ }
668
+
669
+ state = parse_url_char(ch);
670
+ if(state == dead){
671
+ return error(settings, "Invalid something", data);
672
+ }
673
+ break;
674
+
675
+
676
+ case req_schema:
677
+ case req_schema_slash:
678
+ case req_schema_slash_slash:
679
+ case req_host_start:
680
+ case req_host_v6_start:
681
+ case req_host_v6:
682
+ case req_port_start:
683
+ switch (ch) {
684
+ /* No whitespace allowed here */
685
+ case SPACE:
686
+ case CR:
687
+ case LF:
688
+ return error(settings, "unexpected char in path", data);
689
+ default:
690
+ state = parse_url_char(ch);
691
+ if(dead == state){
692
+ return error(settings, "unexpected char in path", data);
693
+ }
694
+ }
695
+ break;
696
+
697
+ case req_host:
698
+ case req_host_v6_end:
699
+ case req_port:
700
+ case req_path:
701
+ case req_query_string_start:
702
+ case req_query_string:
703
+ case req_fragment_start:
704
+ case req_fragment:
705
+ switch (ch) {
706
+ case SPACE:
707
+ settings.call_on_url(this, data, url_mark, p-url_mark);
708
+ settings.call_on_path(this, data, url_mark, p - url_mark);
709
+ url_mark = -1;
710
+ state = State.req_http_start;
711
+ break;
712
+ case CR:
713
+ case LF:
714
+ http_major = 0;
715
+ http_minor = 9;
716
+ state = (CR == ch) ? req_line_almost_done : header_field_start;
717
+ settings.call_on_url(this, data, url_mark, p-url_mark); //TODO check params!!!
718
+ settings.call_on_path(this, data, url_mark, p-url_mark);
719
+ url_mark = -1;
720
+ break;
721
+ default:
722
+ state = parse_url_char(ch);
723
+ if(dead == state){
724
+ return error(settings, "unexpected char in path", data);
725
+ }
726
+ }
727
+ break;
728
+ /******************* URL *******************/
729
+
730
+
731
+
732
+ /******************* HTTP 1.1 *******************/
733
+ case req_http_start:
734
+ switch (ch) {
735
+ case H:
736
+ state = State.req_http_H;
737
+ break;
738
+ case SPACE:
739
+ break;
740
+ default:
741
+ return error(settings, "error in req_http_H", data);
742
+ }
743
+ break;
744
+
745
+ case req_http_H:
746
+ if (strict && T != ch) {
747
+ return error(settings, "unexpected char", data);
748
+ }
749
+ state = State.req_http_HT;
750
+ break;
751
+
752
+ case req_http_HT:
753
+ if (strict && T != ch) {
754
+ return error(settings, "unexpected char", data);
755
+ }
756
+ state = State.req_http_HTT;
757
+ break;
758
+
759
+ case req_http_HTT:
760
+ if (strict && P != ch) {
761
+ return error(settings, "unexpected char", data);
762
+ }
763
+ state = State.req_http_HTTP;
764
+ break;
765
+
766
+ case req_http_HTTP:
767
+ if (strict && SLASH != ch) {
768
+ return error(settings, "unexpected char", data);
769
+ }
770
+ state = req_first_http_major;
771
+ break;
772
+
773
+ /* first digit of major HTTP version */
774
+ case req_first_http_major:
775
+ if (!isDigit(ch)) {
776
+ return error(settings, "non digit in http major", data);
777
+ }
778
+ http_major = (int)ch - 0x30;
779
+ state = State.req_http_major;
780
+ break;
781
+
782
+ /* major HTTP version or dot */
783
+ case req_http_major:
784
+ if (DOT == ch) {
785
+ state = State.req_first_http_minor;
786
+ break;
787
+ }
788
+
789
+ if (!isDigit(ch)) {
790
+ return error(settings, "non digit in http major", data);
791
+ }
792
+
793
+ http_major *= 10;
794
+ http_major += (int)ch - 0x30;
795
+
796
+ if (http_major > 999) {
797
+ return error(settings, "ridiculous http major", data);
798
+ };
799
+ break;
800
+
801
+ /* first digit of minor HTTP version */
802
+ case req_first_http_minor:
803
+ if (!isDigit(ch)) {
804
+ return error(settings, "non digit in http minor", data);
805
+ }
806
+ http_minor = (int)ch - 0x30;
807
+ state = State.req_http_minor;
808
+ break;
809
+
810
+ case req_http_minor:
811
+ if (ch == CR) {
812
+ state = State.req_line_almost_done;
813
+ break;
814
+ }
815
+
816
+ if (ch == LF) {
817
+ state = State.header_field_start;
818
+ break;
819
+ }
820
+
821
+ /* XXX allow spaces after digit? */
822
+
823
+ if (!isDigit(ch)) {
824
+ return error(settings, "non digit in http minor", data);
825
+ }
826
+
827
+ http_minor *= 10;
828
+ http_minor += (int)ch - 0x30;
829
+
830
+
831
+ if (http_minor > 999) {
832
+ return error(settings, "ridiculous http minor", data);
833
+ };
834
+
835
+ break;
836
+
837
+ /* end of request line */
838
+ case req_line_almost_done:
839
+ {
840
+ if (ch != LF) {
841
+ return error(settings, "missing LF after request line", data);
842
+ }
843
+ state = header_field_start;
844
+ break;
845
+ }
846
+
847
+ /******************* HTTP 1.1 *******************/
848
+
849
+
850
+
851
+ /******************* Header *******************/
852
+ case header_field_start:
853
+ {
854
+ if (ch == CR) {
855
+ state = headers_almost_done;
856
+ break;
857
+ }
858
+
859
+ if (ch == LF) {
860
+ /* they might be just sending \n instead of \r\n so this would be
861
+ * the second \n to denote the end of headers*/
862
+ state = State.headers_almost_done;
863
+ reexecute = true;
864
+ break;
865
+ }
866
+
867
+ c = token(ch);
868
+
869
+ if (0 == c) {
870
+ return error(settings, "invalid char in header:", data);
871
+ }
872
+
873
+ header_field_mark = p;
874
+
875
+ index = 0;
876
+ state = State.header_field;
877
+
878
+ switch (c) {
879
+ case C:
880
+ header_state = HState.C;
881
+ break;
882
+
883
+ case P:
884
+ header_state = HState.matching_proxy_connection;
885
+ break;
886
+
887
+ case T:
888
+ header_state = HState.matching_transfer_encoding;
889
+ break;
890
+
891
+ case U:
892
+ header_state = HState.matching_upgrade;
893
+ break;
894
+
895
+ default:
896
+ header_state = HState.general;
897
+ break;
898
+ }
899
+ break;
900
+ }
901
+
902
+
903
+
904
+ case header_field:
905
+ {
906
+ c = token(ch);
907
+ if (0 != c) {
908
+ switch (header_state) {
909
+ case general:
910
+ break;
911
+
912
+ case C:
913
+ index++;
914
+ header_state = (O == c ? HState.CO : HState.general);
915
+ break;
916
+
917
+ case CO:
918
+ index++;
919
+ header_state = (N == c ? HState.CON : HState.general);
920
+ break;
921
+
922
+ case CON:
923
+ index++;
924
+ switch (c) {
925
+ case N:
926
+ header_state = HState.matching_connection;
927
+ break;
928
+ case T:
929
+ header_state = HState.matching_content_length;
930
+ break;
931
+ default:
932
+ header_state = HState.general;
933
+ break;
934
+ }
935
+ break;
936
+
937
+ /* connection */
938
+
939
+ case matching_connection:
940
+ index++;
941
+ if (index > CONNECTION.length || c != CONNECTION[index]) {
942
+ header_state = HState.general;
943
+ } else if (index == CONNECTION.length-1) {
944
+ header_state = HState.connection;
945
+ }
946
+ break;
947
+
948
+ /* proxy-connection */
949
+
950
+ case matching_proxy_connection:
951
+ index++;
952
+ if (index > PROXY_CONNECTION.length || c != PROXY_CONNECTION[index]) {
953
+ header_state = HState.general;
954
+ } else if (index == PROXY_CONNECTION.length-1) {
955
+ header_state = HState.connection;
956
+ }
957
+ break;
958
+
959
+ /* content-length */
960
+
961
+ case matching_content_length:
962
+ index++;
963
+ if (index > CONTENT_LENGTH.length || c != CONTENT_LENGTH[index]) {
964
+ header_state = HState.general;
965
+ } else if (index == CONTENT_LENGTH.length-1) {
966
+ header_state = HState.content_length;
967
+ }
968
+ break;
969
+
970
+ /* transfer-encoding */
971
+
972
+ case matching_transfer_encoding:
973
+ index++;
974
+ if (index > TRANSFER_ENCODING.length || c != TRANSFER_ENCODING[index]) {
975
+ header_state = HState.general;
976
+ } else if (index == TRANSFER_ENCODING.length-1) {
977
+ header_state = HState.transfer_encoding;
978
+ }
979
+ break;
980
+
981
+ /* upgrade */
982
+
983
+ case matching_upgrade:
984
+ index++;
985
+ if (index > UPGRADE.length || c != UPGRADE[index]) {
986
+ header_state = HState.general;
987
+ } else if (index == UPGRADE.length-1) {
988
+ header_state = HState.upgrade;
989
+ }
990
+ break;
991
+
992
+ case connection:
993
+ case content_length:
994
+ case transfer_encoding:
995
+ case upgrade:
996
+ if (SPACE != ch) header_state = HState.general;
997
+ break;
998
+
999
+ default:
1000
+ return error(settings, "Unknown Header State", data);
1001
+ } // switch: header_state
1002
+ break;
1003
+ } // 0 != c
1004
+
1005
+ if (COLON == ch) {
1006
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1007
+ header_field_mark = -1;
1008
+
1009
+ state = State.header_value_start;
1010
+ break;
1011
+ }
1012
+
1013
+ if (CR == ch) {
1014
+ state = State.header_almost_done;
1015
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1016
+
1017
+ header_field_mark = -1;
1018
+ break;
1019
+ }
1020
+
1021
+ if (ch == LF) {
1022
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1023
+ header_field_mark = -1;
1024
+
1025
+ state = State.header_field_start;
1026
+ break;
1027
+ }
1028
+
1029
+ return error(settings, "invalid header field", data);
1030
+ }
1031
+
1032
+
1033
+
1034
+ case header_value_start:
1035
+ {
1036
+ if ((SPACE == ch) || (TAB == ch)) break;
1037
+
1038
+ header_value_mark = p;
1039
+
1040
+ state = State.header_value;
1041
+ index = 0;
1042
+
1043
+
1044
+ if (CR == ch) {
1045
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1046
+ header_value_mark = -1;
1047
+
1048
+ header_state = HState.general;
1049
+ state = State.header_almost_done;
1050
+ break;
1051
+ }
1052
+
1053
+ if (LF == ch) {
1054
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1055
+ header_value_mark = -1;
1056
+
1057
+ state = State.header_field_start;
1058
+ break;
1059
+ }
1060
+
1061
+
1062
+ c = upper(ch);
1063
+
1064
+ switch (header_state) {
1065
+ case upgrade:
1066
+ flags |= F_UPGRADE;
1067
+ header_state = HState.general;
1068
+ break;
1069
+
1070
+ case transfer_encoding:
1071
+ /* looking for 'Transfer-Encoding: chunked' */
1072
+ if (C == c) {
1073
+ header_state = HState.matching_transfer_encoding_chunked;
1074
+ } else {
1075
+ header_state = HState.general;
1076
+ }
1077
+ break;
1078
+
1079
+ case content_length:
1080
+ if (!isDigit(ch)) {
1081
+ return error(settings, "Content-Length not numeric", data);
1082
+ }
1083
+ content_length = (int)ch - 0x30;
1084
+ break;
1085
+
1086
+ case connection:
1087
+ /* looking for 'Connection: keep-alive' */
1088
+ if (K == c) {
1089
+ header_state = HState.matching_connection_keep_alive;
1090
+ /* looking for 'Connection: close' */
1091
+ } else if (C == c) {
1092
+ header_state = HState.matching_connection_close;
1093
+ } else {
1094
+ header_state = HState.general;
1095
+ }
1096
+ break;
1097
+
1098
+ default:
1099
+ header_state = HState.general;
1100
+ break;
1101
+ }
1102
+ break;
1103
+ } // header value start
1104
+
1105
+
1106
+
1107
+ case header_value:
1108
+ {
1109
+
1110
+ if (CR == ch) {
1111
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1112
+ header_value_mark = -1;
1113
+
1114
+ state = State.header_almost_done;
1115
+ break;
1116
+ }
1117
+
1118
+ if (LF == ch) {
1119
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1120
+ header_value_mark = -1;
1121
+ state = header_almost_done;
1122
+ reexecute = true;
1123
+ break;
1124
+ }
1125
+
1126
+ c = upper(ch);
1127
+ switch (header_state) {
1128
+ case general:
1129
+ break;
1130
+
1131
+ case connection:
1132
+ case transfer_encoding:
1133
+ return error(settings, "Shouldn't be here", data);
1134
+
1135
+ case content_length:
1136
+ if (SPACE == ch) {
1137
+ break;
1138
+ }
1139
+ if (!isDigit(ch)) {
1140
+ return error(settings, "Content-Length not numeric", data);
1141
+ }
1142
+
1143
+ long t = content_length;
1144
+ t *= 10;
1145
+ t += (long)ch - 0x30;
1146
+
1147
+ /* Overflow? */
1148
+ // t will wrap and become negative ...
1149
+ if (t < content_length) {
1150
+ return error(settings, "Invalid content length", data);
1151
+ }
1152
+ content_length = t;
1153
+ break;
1154
+
1155
+ /* Transfer-Encoding: chunked */
1156
+ case matching_transfer_encoding_chunked:
1157
+ index++;
1158
+ if (index > CHUNKED.length || c != CHUNKED[index]) {
1159
+ header_state = HState.general;
1160
+ } else if (index == CHUNKED.length-1) {
1161
+ header_state = HState.transfer_encoding_chunked;
1162
+ }
1163
+ break;
1164
+
1165
+ /* looking for 'Connection: keep-alive' */
1166
+ case matching_connection_keep_alive:
1167
+ index++;
1168
+ if (index > KEEP_ALIVE.length || c != KEEP_ALIVE[index]) {
1169
+ header_state = HState.general;
1170
+ } else if (index == KEEP_ALIVE.length-1) {
1171
+ header_state = HState.connection_keep_alive;
1172
+ }
1173
+ break;
1174
+
1175
+ /* looking for 'Connection: close' */
1176
+ case matching_connection_close:
1177
+ index++;
1178
+ if (index > CLOSE.length || c != CLOSE[index]) {
1179
+ header_state = HState.general;
1180
+ } else if (index == CLOSE.length-1) {
1181
+ header_state = HState.connection_close;
1182
+ }
1183
+ break;
1184
+
1185
+ case transfer_encoding_chunked:
1186
+ case connection_keep_alive:
1187
+ case connection_close:
1188
+ if (SPACE != ch) header_state = HState.general;
1189
+ break;
1190
+
1191
+ default:
1192
+ state = State.header_value;
1193
+ header_state = HState.general;
1194
+ break;
1195
+ }
1196
+ break;
1197
+ } // header_value
1198
+
1199
+
1200
+
1201
+ case header_almost_done:
1202
+ if (!header_almost_done(ch)) {
1203
+ return error(settings, "incorrect header ending, expecting LF", data);
1204
+ }
1205
+ break;
1206
+
1207
+ case header_value_lws:
1208
+ if (SPACE == ch || TAB == ch ){
1209
+ state = header_value_start;
1210
+ } else {
1211
+ state = header_field_start;
1212
+ reexecute = true;
1213
+ }
1214
+ break;
1215
+
1216
+ case headers_almost_done:
1217
+ if (LF != ch) {
1218
+ return error(settings, "header not properly completed", data);
1219
+ }
1220
+ if (0 != (flags & F_TRAILING)) {
1221
+ /* End of a chunked request */
1222
+ state = new_message();
1223
+ settings.call_on_headers_complete(this);
1224
+ settings.call_on_message_complete(this);
1225
+ break;
1226
+ }
1227
+
1228
+ state = headers_done;
1229
+
1230
+ if (0 != (flags & F_UPGRADE) || HTTPMethod.HTTP_CONNECT == method) {
1231
+ upgrade = true;
1232
+ }
1233
+
1234
+ /* Here we call the headers_complete callback. This is somewhat
1235
+ * different than other callbacks because if the user returns 1, we
1236
+ * will interpret that as saying that this message has no body. This
1237
+ * is needed for the annoying case of recieving a response to a HEAD
1238
+ * request.
1239
+ */
1240
+
1241
+ /* (responses to HEAD request contain a CONTENT-LENGTH header
1242
+ * but no content)
1243
+ *
1244
+ * Consider what to do here: I don't like the idea of the callback
1245
+ * interface having a different contract in the case of HEAD
1246
+ * responses. The alternatives would be either to:
1247
+ *
1248
+ * a.) require the header_complete callback to implement a different
1249
+ * interface or
1250
+ *
1251
+ * b.) provide an overridden execute(bla, bla, boolean
1252
+ * parsingHeader) implementation ...
1253
+ */
1254
+
1255
+ /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1256
+ if (null != settings.on_headers_complete) {
1257
+ settings.call_on_headers_complete(this);
1258
+ //return;
1259
+ }
1260
+
1261
+ // if (null != settings.on_headers_complete) {
1262
+ // switch (settings.on_headers_complete.cb(parser)) {
1263
+ // case 0:
1264
+ // break;
1265
+ //
1266
+ // case 1:
1267
+ // flags |= F_SKIPBODY;
1268
+ // break;
1269
+ //
1270
+ // default:
1271
+ // return p - data; /* Error */ // TODO // RuntimeException ?
1272
+ // }
1273
+ // }
1274
+ reexecute = true;
1275
+ break;
1276
+
1277
+ case headers_done:
1278
+ if (strict && (LF != ch)) {
1279
+ return error(settings, "STRICT CHECK", data); //TODO correct error msg
1280
+ }
1281
+
1282
+ nread = 0;
1283
+
1284
+ // Exit, the rest of the connect is in a different protocol.
1285
+ if (upgrade) {
1286
+ state = new_message();
1287
+ settings.call_on_message_complete(this);
1288
+ return data.position()-this.p_start;
1289
+ }
1290
+
1291
+ if (0 != (flags & F_SKIPBODY)) {
1292
+ state = new_message();
1293
+ settings.call_on_message_complete(this);
1294
+ } else if (0 != (flags & F_CHUNKED)) {
1295
+ /* chunked encoding - ignore Content-Length header */
1296
+ state = State.chunk_size_start;
1297
+ } else {
1298
+ if (content_length == 0) {
1299
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1300
+ state = new_message();
1301
+ settings.call_on_message_complete(this);
1302
+ } else if (content_length != -1) {
1303
+ /* Content-Length header given and non-zero */
1304
+ state = State.body_identity;
1305
+ } else {
1306
+ if (type == ParserType.HTTP_REQUEST || !http_message_needs_eof()) {
1307
+ /* Assume content-length 0 - read the next */
1308
+ state = new_message();
1309
+ settings.call_on_message_complete(this);
1310
+ } else {
1311
+ /* Read body until EOF */
1312
+ state = State.body_identity_eof;
1313
+ }
1314
+ }
1315
+ }
1316
+
1317
+ break;
1318
+ /******************* Header *******************/
1319
+
1320
+
1321
+
1322
+
1323
+ /******************* Body *******************/
1324
+ case body_identity:
1325
+ to_read = min(pe - p, content_length); //TODO change to use buffer?
1326
+ body_mark = p;
1327
+
1328
+ if (to_read > 0) {
1329
+ settings.call_on_body(this, data, p, to_read);
1330
+ data.position(p+to_read);
1331
+ content_length -= to_read;
1332
+
1333
+ if (content_length == 0) {
1334
+ state = message_done;
1335
+ reexecute = true;
1336
+ }
1337
+ }
1338
+ break;
1339
+
1340
+
1341
+
1342
+ case body_identity_eof:
1343
+ to_read = pe - p; // TODO change to use buffer ?
1344
+ if (to_read > 0) {
1345
+ settings.call_on_body(this, data, p, to_read);
1346
+ data.position(p+to_read);
1347
+ }
1348
+ break;
1349
+
1350
+ case message_done:
1351
+ state = new_message();
1352
+ settings.call_on_message_complete(this);
1353
+ break;
1354
+ /******************* Body *******************/
1355
+
1356
+
1357
+
1358
+ /******************* Chunk *******************/
1359
+ case chunk_size_start:
1360
+ if (1 != this.nread) {
1361
+ return error(settings, "nread != 1 (chunking)", data);
1362
+
1363
+ }
1364
+ if (0 == (flags & F_CHUNKED)) {
1365
+ return error(settings, "not chunked", data);
1366
+ }
1367
+
1368
+ c = UNHEX[chi];
1369
+ if (c == -1) {
1370
+ return error(settings, "invalid hex char in chunk content length", data);
1371
+ }
1372
+ content_length = c;
1373
+ state = State.chunk_size;
1374
+ break;
1375
+
1376
+
1377
+
1378
+ case chunk_size:
1379
+ if (0 == (flags & F_CHUNKED)) {
1380
+ return error(settings, "not chunked", data);
1381
+ }
1382
+
1383
+ if (CR == ch) {
1384
+ state = State.chunk_size_almost_done;
1385
+ break;
1386
+ }
1387
+
1388
+ c = UNHEX[chi];
1389
+
1390
+ if (c == -1) {
1391
+ if (SEMI == ch || SPACE == ch) {
1392
+ state = State.chunk_parameters;
1393
+ break;
1394
+ }
1395
+ return error(settings, "invalid hex char in chunk content length", data);
1396
+ }
1397
+ long t = content_length;
1398
+
1399
+ t *= 16;
1400
+ t += c;
1401
+ if(t < content_length){
1402
+ return error(settings, "invalid content length", data);
1403
+ }
1404
+ content_length = t;
1405
+ break;
1406
+
1407
+
1408
+
1409
+ case chunk_parameters:
1410
+ if (0 == (flags & F_CHUNKED)) {
1411
+ return error(settings, "not chunked", data);
1412
+ }
1413
+ /* just ignore this shit. TODO check for overflow */
1414
+ if (CR == ch) {
1415
+ state = State.chunk_size_almost_done;
1416
+ break;
1417
+ }
1418
+ break;
1419
+
1420
+
1421
+
1422
+ case chunk_size_almost_done:
1423
+ if (0 == (flags & F_CHUNKED)) {
1424
+ return error(settings, "not chunked", data);
1425
+ }
1426
+ if (strict && LF != ch) {
1427
+ return error(settings, "expected LF at end of chunk size", data);
1428
+ }
1429
+
1430
+ this.nread = 0;
1431
+
1432
+ if (0 == content_length) {
1433
+ flags |= F_TRAILING;
1434
+ state = State.header_field_start;
1435
+ } else {
1436
+ state = State.chunk_data;
1437
+ }
1438
+ break;
1439
+
1440
+
1441
+
1442
+ case chunk_data:
1443
+ //TODO Apply changes from C version for s_chunk_data
1444
+ if (0 == (flags & F_CHUNKED)) {
1445
+ return error(settings, "not chunked", data);
1446
+ }
1447
+
1448
+ to_read = min(pe-p, content_length);
1449
+ if (to_read > 0) {
1450
+ settings.call_on_body(this, data, p, to_read);
1451
+ data.position(p+to_read);
1452
+ }
1453
+
1454
+ if (to_read == content_length) {
1455
+ state = State.chunk_data_almost_done;
1456
+ }
1457
+
1458
+ content_length -= to_read;
1459
+ break;
1460
+
1461
+
1462
+
1463
+ case chunk_data_almost_done:
1464
+ if (0 == (flags & F_CHUNKED)) {
1465
+ return error(settings, "not chunked", data);
1466
+ }
1467
+ if (strict && CR != ch) {
1468
+ return error(settings, "chunk data terminated incorrectly, expected CR", data);
1469
+ }
1470
+ state = State.chunk_data_done;
1471
+ //TODO CALLBACK_DATA(body)
1472
+ // settings.call_on_body(this, data,p,?);
1473
+ break;
1474
+
1475
+
1476
+
1477
+ case chunk_data_done:
1478
+ if (0 == (flags & F_CHUNKED)) {
1479
+ return error(settings, "not chunked", data);
1480
+ }
1481
+ if (strict && LF != ch) {
1482
+ return error(settings, "chunk data terminated incorrectly, expected LF", data);
1483
+ }
1484
+ state = State.chunk_size_start;
1485
+ break;
1486
+ /******************* Chunk *******************/
1487
+
1488
+
1489
+
1490
+ default:
1491
+ return error(settings, "unhandled state", data);
1492
+
1493
+ } // switch
1494
+ } // while
1495
+
1496
+ p = data.position();
1497
+
1498
+
1499
+ /* Reaching this point assumes that we only received part of a
1500
+ * message, inform the callbacks about the progress made so far*/
1501
+
1502
+ settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1503
+ settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1504
+ settings.call_on_url (this, data, url_mark, p-url_mark);
1505
+ settings.call_on_path (this, data, url_mark, p-url_mark);
1506
+
1507
+ return data.position()-this.p_start;
1508
+ } // execute
1509
+
1510
+ int error (ParserSettings settings, String mes, ByteBuffer data) {
1511
+ settings.call_on_error(this, mes, data, this.p_start);
1512
+ this.state = State.dead;
1513
+ return data.position()-this.p_start;
1514
+ }
1515
+
1516
+ public boolean http_message_needs_eof() {
1517
+ if(type == ParserType.HTTP_REQUEST){
1518
+ return false;
1519
+ }
1520
+ /* See RFC 2616 section 4.4 */
1521
+ if ((status_code / 100 == 1) || /* 1xx e.g. Continue */
1522
+ (status_code == 204) || /* No Content */
1523
+ (status_code == 304) || /* Not Modified */
1524
+ (flags & F_SKIPBODY) != 0) { /* response to a HEAD request */
1525
+ return false;
1526
+ }
1527
+ if ((flags & F_CHUNKED) != 0 || content_length != -1) {
1528
+ return false;
1529
+ }
1530
+
1531
+ return true;
1532
+ }
1533
+
1534
+ /* If http_should_keep_alive() in the on_headers_complete or
1535
+ * on_message_complete callback returns true, then this will be should be
1536
+ * the last message on the connection.
1537
+ * If you are the server, respond with the "Connection: close" header.
1538
+ * If you are the client, close the connection.
1539
+ */
1540
+ public boolean http_should_keep_alive() {
1541
+ if (http_major > 0 && http_minor > 0) {
1542
+ /* HTTP/1.1 */
1543
+ if ( 0 != (flags & F_CONNECTION_CLOSE) ) {
1544
+ return false;
1545
+ }
1546
+ } else {
1547
+ /* HTTP/1.0 or earlier */
1548
+ if ( 0 == (flags & F_CONNECTION_KEEP_ALIVE) ) {
1549
+ return false;
1550
+ }
1551
+ }
1552
+ return !http_message_needs_eof();
1553
+ }
1554
+
1555
+ public int parse_url(ByteBuffer data, boolean is_connect, HTTPParserUrl u) {
1556
+
1557
+ UrlFields uf = UrlFields.UF_MAX;
1558
+ UrlFields old_uf = UrlFields.UF_MAX;
1559
+ u.port = 0;
1560
+ u.field_set = 0;
1561
+ state = (is_connect ? State.req_host_start : State.req_spaces_before_url);
1562
+ int p_init = data.position();
1563
+ int p = 0;
1564
+ byte ch = 0;
1565
+ while (data.position() != data.limit()) {
1566
+ p = data.position();
1567
+ ch = data.get();
1568
+ state = parse_url_char(ch);
1569
+ switch(state) {
1570
+ case dead:
1571
+ return 1;
1572
+
1573
+ /* Skip delimeters */
1574
+ case req_schema_slash:
1575
+ case req_schema_slash_slash:
1576
+ case req_host_start:
1577
+ case req_host_v6_start:
1578
+ case req_host_v6_end:
1579
+ case req_port_start:
1580
+ case req_query_string_start:
1581
+ case req_fragment_start:
1582
+ continue;
1583
+
1584
+ case req_schema:
1585
+ uf = UrlFields.UF_SCHEMA;
1586
+ break;
1587
+
1588
+ case req_host:
1589
+ case req_host_v6:
1590
+ uf = UrlFields.UF_HOST;
1591
+ break;
1592
+
1593
+ case req_port:
1594
+ uf = UrlFields.UF_PORT;
1595
+ break;
1596
+
1597
+ case req_path:
1598
+ uf = UrlFields.UF_PATH;
1599
+ break;
1600
+
1601
+ case req_query_string:
1602
+ uf = UrlFields.UF_QUERY;
1603
+ break;
1604
+
1605
+ case req_fragment:
1606
+ uf = UrlFields.UF_FRAGMENT;
1607
+ break;
1608
+
1609
+ default:
1610
+ return 1;
1611
+ }
1612
+ /* Nothing's changed; soldier on */
1613
+ if (uf == old_uf) {
1614
+ u.field_data[uf.getIndex()].len++;
1615
+ continue;
1616
+ }
1617
+
1618
+ u.field_data[uf.getIndex()].off = p - p_init;
1619
+ u.field_data[uf.getIndex()].len = 1;
1620
+
1621
+ u.field_set |= (1 << uf.getIndex());
1622
+ old_uf = uf;
1623
+
1624
+ }
1625
+
1626
+ /* CONNECT requests can only contain "hostname:port" */
1627
+ if (is_connect && u.field_set != ((1 << UrlFields.UF_HOST.getIndex())|(1 << UrlFields.UF_PORT.getIndex()))) {
1628
+ return 1;
1629
+ }
1630
+
1631
+ /* Make sure we don't end somewhere unexpected */
1632
+ switch (state) {
1633
+ case req_host_v6_start:
1634
+ case req_host_v6:
1635
+ case req_host_v6_end:
1636
+ case req_host:
1637
+ case req_port_start:
1638
+ return 1;
1639
+ default:
1640
+ break;
1641
+ }
1642
+
1643
+ if (0 != (u.field_set & (1 << UrlFields.UF_PORT.getIndex()))) {
1644
+ /* Don't bother with endp; we've already validated the string */
1645
+ int v = strtoi(data, p_init + u.field_data[UrlFields.UF_PORT.getIndex()].off);
1646
+
1647
+ /* Ports have a max value of 2^16 */
1648
+ if (v > 0xffff) {
1649
+ return 1;
1650
+ }
1651
+
1652
+ u.port = v;
1653
+ }
1654
+
1655
+ return 0;
1656
+ }
1657
+
1658
+ //hacky reimplementation of srttoul, tailored for our simple needs
1659
+ //we only need to parse port val, so no negative values etc
1660
+ int strtoi(ByteBuffer data, int start_pos) {
1661
+ data.position(start_pos);
1662
+ byte ch;
1663
+ String str = "";
1664
+ while(data.position() < data.limit()) {
1665
+ ch = data.get();
1666
+ if(Character.isWhitespace((char)ch)){
1667
+ continue;
1668
+ }
1669
+ if(isDigit(ch)){
1670
+ str = str + (char)ch; //TODO replace with something less hacky
1671
+ }else{
1672
+ break;
1673
+ }
1674
+ }
1675
+ return Integer.parseInt(str);
1676
+ }
1677
+
1678
+ boolean isDigit(byte b) {
1679
+ if (b >= 0x30 && b <=0x39) {
1680
+ return true;
1681
+ }
1682
+ return false;
1683
+ }
1684
+
1685
+ boolean isHex(byte b) {
1686
+ return isDigit(b) || (lower(b) >= 0x61 /*a*/ && lower(b) <= 0x66 /*f*/);
1687
+ }
1688
+
1689
+ boolean isAtoZ(byte b) {
1690
+ byte c = lower(b);
1691
+ return (c>= 0x61 /*a*/ && c <= 0x7a /*z*/);
1692
+ }
1693
+
1694
+
1695
+ byte lower (byte b) {
1696
+ return (byte)(b|0x20);
1697
+ }
1698
+
1699
+ byte upper(byte b) {
1700
+ char c = (char)(b);
1701
+ return (byte)Character.toUpperCase(c);
1702
+ }
1703
+
1704
+ byte token(byte b) {
1705
+ if(!strict){
1706
+ return (b == (byte)' ') ? (byte)' ' : (byte)tokens[b] ;
1707
+ }else{
1708
+ return (byte)tokens[b];
1709
+ }
1710
+ }
1711
+
1712
+ boolean isHostChar(byte ch){
1713
+ if(!strict){
1714
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch || UNDER == ch ;
1715
+ }else{
1716
+ return (isAtoZ(ch)) || isDigit(ch) || DOT == ch || DASH == ch;
1717
+ }
1718
+ }
1719
+
1720
+ boolean isNormalUrlChar(int chi) {
1721
+ if(!strict){
1722
+ return (chi > 0x80) || normal_url_char[chi];
1723
+ }else{
1724
+ return normal_url_char[chi];
1725
+ }
1726
+ }
1727
+
1728
+ HTTPMethod start_req_method_assign(byte c){
1729
+ switch (c) {
1730
+ case C: return HTTPMethod.HTTP_CONNECT; /* or COPY, CHECKOUT */
1731
+ case D: return HTTPMethod.HTTP_DELETE;
1732
+ case G: return HTTPMethod.HTTP_GET;
1733
+ case H: return HTTPMethod.HTTP_HEAD;
1734
+ case L: return HTTPMethod.HTTP_LINK; /* or LOCK */
1735
+ case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */
1736
+ case N: return HTTPMethod.HTTP_NOTIFY;
1737
+ case O: return HTTPMethod.HTTP_OPTIONS;
1738
+ case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1739
+ case R: return HTTPMethod.HTTP_REPORT;
1740
+ case S: return HTTPMethod.HTTP_SUBSCRIBE;
1741
+ case T: return HTTPMethod.HTTP_TRACE;
1742
+ case U: return HTTPMethod.HTTP_UNLINK; /* or UNSUBSCRIBE, UNLOCK */
1743
+ }
1744
+ return null; // ugh.
1745
+ }
1746
+
1747
+ boolean header_almost_done(byte ch) {
1748
+ if (strict && LF != ch) {
1749
+ return false;
1750
+ }
1751
+
1752
+ state = State.header_value_lws;
1753
+ // TODO java enums support some sort of bitflag mechanism !?
1754
+ switch (header_state) {
1755
+ case connection_keep_alive:
1756
+ flags |= F_CONNECTION_KEEP_ALIVE;
1757
+ break;
1758
+ case connection_close:
1759
+ flags |= F_CONNECTION_CLOSE;
1760
+ break;
1761
+ case transfer_encoding_chunked:
1762
+ flags |= F_CHUNKED;
1763
+ break;
1764
+ default:
1765
+ break;
1766
+ }
1767
+ return true;
1768
+ }
1769
+
1770
+ // boolean headers_almost_done (byte ch, ParserSettings settings) {
1771
+ // } // headers_almost_done
1772
+
1773
+
1774
+ final int min (int a, int b) {
1775
+ return a < b ? a : b;
1776
+ }
1777
+
1778
+ final int min (int a, long b) {
1779
+ return a < b ? a : (int)b;
1780
+ }
1781
+
1782
+ /* probably not the best place to hide this ... */
1783
+ public boolean HTTP_PARSER_STRICT;
1784
+ State new_message() {
1785
+ if (HTTP_PARSER_STRICT){
1786
+ return http_should_keep_alive() ? start_state() : State.dead;
1787
+ } else {
1788
+ return start_state();
1789
+ }
1790
+
1791
+ }
1792
+
1793
+ State start_state() {
1794
+ return type == ParserType.HTTP_REQUEST ? State.start_req : State.start_res;
1795
+ }
1796
+
1797
+
1798
+ boolean parsing_header(State state) {
1799
+
1800
+ switch (state) {
1801
+ case chunk_data :
1802
+ case chunk_data_almost_done :
1803
+ case chunk_data_done :
1804
+ case body_identity :
1805
+ case body_identity_eof :
1806
+ case message_done :
1807
+ return false;
1808
+
1809
+ }
1810
+ return true;
1811
+ }
1812
+
1813
+ /* "Dial C for Constants" */
1814
+ static class C {
1815
+ static final int HTTP_MAX_HEADER_SIZE = 80 * 1024;
1816
+
1817
+ static final int F_CHUNKED = 1 << 0;
1818
+ static final int F_CONNECTION_KEEP_ALIVE = 1 << 1;
1819
+ static final int F_CONNECTION_CLOSE = 1 << 2;
1820
+ static final int F_TRAILING = 1 << 3;
1821
+ static final int F_UPGRADE = 1 << 4;
1822
+ static final int F_SKIPBODY = 1 << 5;
1823
+
1824
+ static final byte [] UPCASE = {
1825
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1826
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1827
+ 0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
1828
+ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, 0x38,0x39,0x00,0x00,0x00,0x00,0x00,0x00,
1829
+ 0x00,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
1830
+ 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 0x58,0x59,0x5a,0x00,0x00,0x00,0x00,0x5f,
1831
+ 0x00,0x41,0x42,0x43,0x44,0x45,0x46,0x47, 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
1832
+ 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, 0x58,0x59,0x5a,0x00,0x00,0x00,0x00,0x00,
1833
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1834
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1835
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1836
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1837
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1838
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1839
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1840
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1841
+ };
1842
+ static final byte [] CONNECTION = {
1843
+ 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1844
+ };
1845
+ static final byte [] PROXY_CONNECTION = {
1846
+ 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1847
+ };
1848
+ static final byte [] CONTENT_LENGTH = {
1849
+ 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1850
+ };
1851
+ static final byte [] TRANSFER_ENCODING = {
1852
+ 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1853
+ };
1854
+ static final byte [] UPGRADE = {
1855
+ 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1856
+ };
1857
+ static final byte [] CHUNKED = {
1858
+ 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1859
+ };
1860
+ static final byte [] KEEP_ALIVE = {
1861
+ 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1862
+ };
1863
+ static final byte [] CLOSE = {
1864
+ 0x43, 0x4c, 0x4f, 0x53, 0x45,
1865
+ };
1866
+
1867
+ /* Tokens as defined by rfc 2616. Also lowercases them.
1868
+ * token = 1*<any CHAR except CTLs or separators>
1869
+ * separators = "(" | ")" | "<" | ">" | "@"
1870
+ * | "," | ";" | ":" | "\" | <">
1871
+ * | "/" | "[" | "]" | "?" | "="
1872
+ * | "{" | "}" | SP | HT
1873
+ */
1874
+
1875
+ static final char [] tokens = {
1876
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
1877
+ 0, 0, 0, 0, 0, 0, 0, 0,
1878
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
1879
+ 0, 0, 0, 0, 0, 0, 0, 0,
1880
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
1881
+ 0, 0, 0, 0, 0, 0, 0, 0,
1882
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1883
+ 0, 0, 0, 0, 0, 0, 0, 0,
1884
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1885
+ 0, '!', 0, '#', '$', '%', '&', '\'',
1886
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1887
+ 0, 0, '*', '+', 0, '-', '.', 0 ,
1888
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1889
+ '0', '1', '2', '3', '4', '5', '6', '7',
1890
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
1891
+ '8', '9', 0, 0, 0, 0, 0, 0,
1892
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1893
+ 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1894
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
1895
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1896
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
1897
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1898
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
1899
+ 'X', 'Y', 'Z', 0, 0, 0, 0, '_',
1900
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
1901
+ 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1902
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
1903
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1904
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1905
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1906
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1907
+ 'X', 'Y', 'Z', 0, '|', 0, '~', 0,
1908
+ /* hi bit set, not ascii */
1909
+ 0, 0, 0, 0, 0, 0, 0, 0,
1910
+ 0, 0, 0, 0, 0, 0, 0, 0,
1911
+ 0, 0, 0, 0, 0, 0, 0, 0,
1912
+ 0, 0, 0, 0, 0, 0, 0, 0,
1913
+ 0, 0, 0, 0, 0, 0, 0, 0,
1914
+ 0, 0, 0, 0, 0, 0, 0, 0,
1915
+ 0, 0, 0, 0, 0, 0, 0, 0,
1916
+ 0, 0, 0, 0, 0, 0, 0, 0,
1917
+ 0, 0, 0, 0, 0, 0, 0, 0,
1918
+ 0, 0, 0, 0, 0, 0, 0, 0,
1919
+ 0, 0, 0, 0, 0, 0, 0, 0,
1920
+ 0, 0, 0, 0, 0, 0, 0, 0,
1921
+ 0, 0, 0, 0, 0, 0, 0, 0,
1922
+ 0, 0, 0, 0, 0, 0, 0, 0,
1923
+ 0, 0, 0, 0, 0, 0, 0, 0,
1924
+ 0, 0, 0, 0, 0, 0, 0, 0, };
1925
+
1926
+ static final byte [] UNHEX =
1927
+ { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1928
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1929
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1930
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
1931
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
1932
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1933
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
1934
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1935
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1936
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1937
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1938
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1939
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1940
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1941
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1942
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1943
+ };
1944
+
1945
+ static final boolean [] normal_url_char = {
1946
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
1947
+ false, false, false, false, false, false, false, false,
1948
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
1949
+ false, false, false, false, false, false, false, false,
1950
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
1951
+ false, false, false, false, false, false, false, false,
1952
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1953
+ false, false, false, false, false, false, false, false,
1954
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1955
+ false, true, true, false, true, true, true, true,
1956
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1957
+ true, true, true, true, true, true, true, true,
1958
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
1959
+ true, true, true, true, true, true, true, true,
1960
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
1961
+ true, true, true, true, true, true, true, false,
1962
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1963
+ true, true, true, true, true, true, true, true,
1964
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
1965
+ true, true, true, true, true, true, true, true,
1966
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
1967
+ true, true, true, true, true, true, true, true,
1968
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
1969
+ true, true, true, true, true, true, true, true,
1970
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
1971
+ true, true, true, true, true, true, true, true,
1972
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
1973
+ true, true, true, true, true, true, true, true,
1974
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1975
+ true, true, true, true, true, true, true, true,
1976
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1977
+ true, true, true, true, true, true, true, false,
1978
+
1979
+ /* hi bit set, not ascii */
1980
+ /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
1981
+ * encoded paths. This is out of spec, but clients generate this and most other
1982
+ * HTTP servers support it. We should, too. */
1983
+
1984
+ true, true, true, true, true, true, true, true,
1985
+ true, true, true, true, true, true, true, true,
1986
+ true, true, true, true, true, true, true, true,
1987
+ true, true, true, true, true, true, true, true,
1988
+ true, true, true, true, true, true, true, true,
1989
+ true, true, true, true, true, true, true, true,
1990
+ true, true, true, true, true, true, true, true,
1991
+ true, true, true, true, true, true, true, true,
1992
+ true, true, true, true, true, true, true, true,
1993
+ true, true, true, true, true, true, true, true,
1994
+ true, true, true, true, true, true, true, true,
1995
+ true, true, true, true, true, true, true, true,
1996
+ true, true, true, true, true, true, true, true,
1997
+ true, true, true, true, true, true, true, true,
1998
+ true, true, true, true, true, true, true, true,
1999
+ true, true, true, true, true, true, true, true,
2000
+
2001
+ };
2002
+
2003
+ public static final byte A = 0x41;
2004
+ public static final byte B = 0x42;
2005
+ public static final byte C = 0x43;
2006
+ public static final byte D = 0x44;
2007
+ public static final byte E = 0x45;
2008
+ public static final byte F = 0x46;
2009
+ public static final byte G = 0x47;
2010
+ public static final byte H = 0x48;
2011
+ public static final byte I = 0x49;
2012
+ public static final byte J = 0x4a;
2013
+ public static final byte K = 0x4b;
2014
+ public static final byte L = 0x4c;
2015
+ public static final byte M = 0x4d;
2016
+ public static final byte N = 0x4e;
2017
+ public static final byte O = 0x4f;
2018
+ public static final byte P = 0x50;
2019
+ public static final byte Q = 0x51;
2020
+ public static final byte R = 0x52;
2021
+ public static final byte S = 0x53;
2022
+ public static final byte T = 0x54;
2023
+ public static final byte U = 0x55;
2024
+ public static final byte V = 0x56;
2025
+ public static final byte W = 0x57;
2026
+ public static final byte X = 0x58;
2027
+ public static final byte Y = 0x59;
2028
+ public static final byte Z = 0x5a;
2029
+ public static final byte UNDER = 0x5f;
2030
+ public static final byte CR = 0x0d;
2031
+ public static final byte LF = 0x0a;
2032
+ public static final byte DOT = 0x2e;
2033
+ public static final byte SPACE = 0x20;
2034
+ public static final byte TAB = 0x09;
2035
+ public static final byte SEMI = 0x3b;
2036
+ public static final byte COLON = 0x3a;
2037
+ public static final byte HASH = 0x23;
2038
+ public static final byte QMARK = 0x3f;
2039
+ public static final byte SLASH = 0x2f;
2040
+ public static final byte DASH = 0x2d;
2041
+ public static final byte STAR = 0x2a;
2042
+ public static final byte NULL = 0x00;
2043
+ }
2044
+
2045
+ enum State {
2046
+
2047
+ dead
2048
+
2049
+ , start_req_or_res
2050
+ , res_or_resp_H
2051
+ , start_res
2052
+ , res_H
2053
+ , res_HT
2054
+ , res_HTT
2055
+ , res_HTTP
2056
+ , res_first_http_major
2057
+ , res_http_major
2058
+ , res_first_http_minor
2059
+ , res_http_minor
2060
+ , res_first_status_code
2061
+ , res_status_code
2062
+ , res_status
2063
+ , res_line_almost_done
2064
+
2065
+ , start_req
2066
+
2067
+ , req_method
2068
+ , req_spaces_before_url
2069
+ , req_schema
2070
+ , req_schema_slash
2071
+ , req_schema_slash_slash
2072
+ , req_host_start
2073
+ , req_host_v6_start
2074
+ , req_host_v6
2075
+ , req_host_v6_end
2076
+ , req_host
2077
+ , req_port_start
2078
+ , req_port
2079
+ , req_path
2080
+ , req_query_string_start
2081
+ , req_query_string
2082
+ , req_fragment_start
2083
+ , req_fragment
2084
+ , req_http_start
2085
+ , req_http_H
2086
+ , req_http_HT
2087
+ , req_http_HTT
2088
+ , req_http_HTTP
2089
+ , req_first_http_major
2090
+ , req_http_major
2091
+ , req_first_http_minor
2092
+ , req_http_minor
2093
+ , req_line_almost_done
2094
+
2095
+ , header_field_start
2096
+ , header_field
2097
+ , header_value_start
2098
+ , header_value
2099
+ , header_value_lws
2100
+
2101
+ , header_almost_done
2102
+
2103
+ , chunk_size_start
2104
+ , chunk_size
2105
+ , chunk_parameters
2106
+ , chunk_size_almost_done
2107
+
2108
+ , headers_almost_done
2109
+ , headers_done
2110
+ // This space intentionally not left blank, comment from c, for orientation...
2111
+ // the c version uses <= s_header_almost_done in java, we list the states explicitly
2112
+ // in `parsing_header()`
2113
+ /* Important: 's_headers_done' must be the last 'header' state. All
2114
+ * states beyond this must be 'body' states. It is used for overflow
2115
+ * checking. See the PARSING_HEADER() macro.
2116
+ */
2117
+ , chunk_data
2118
+ , chunk_data_almost_done
2119
+ , chunk_data_done
2120
+
2121
+ , body_identity
2122
+ , body_identity_eof
2123
+ , message_done
2124
+
2125
+ }
2126
+ enum HState {
2127
+ general
2128
+ , C
2129
+ , CO
2130
+ , CON
2131
+
2132
+ , matching_connection
2133
+ , matching_proxy_connection
2134
+ , matching_content_length
2135
+ , matching_transfer_encoding
2136
+ , matching_upgrade
2137
+
2138
+ , connection
2139
+ , content_length
2140
+ , transfer_encoding
2141
+ , upgrade
2142
+
2143
+ , matching_transfer_encoding_chunked
2144
+ , matching_connection_keep_alive
2145
+ , matching_connection_close
2146
+
2147
+ , transfer_encoding_chunked
2148
+ , connection_keep_alive
2149
+ , connection_close
2150
+ }
2151
+ public enum UrlFields {
2152
+ UF_SCHEMA(0)
2153
+ , UF_HOST(1)
2154
+ , UF_PORT(2)
2155
+ , UF_PATH(3)
2156
+ , UF_QUERY(4)
2157
+ , UF_FRAGMENT(5)
2158
+ , UF_MAX(6);
2159
+
2160
+
2161
+ private final int index;
2162
+
2163
+ private UrlFields(int index) {
2164
+ this.index = index;
2165
+ }
2166
+ public int getIndex() {
2167
+ return index;
2168
+ }
2169
+
2170
+ }
2171
+ }