http_parser.rb 0.6.0.beta.1 → 0.6.0.beta.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,5 @@
1
1
  package http_parser.lolevel;
2
2
 
3
- import java.io.UnsupportedEncodingException;
4
3
  import java.nio.ByteBuffer;
5
4
  import http_parser.HTTPException;
6
5
  import http_parser.HTTPMethod;
@@ -18,7 +17,7 @@ public class HTTPParser {
18
17
  HState header_state;
19
18
  boolean strict;
20
19
 
21
- int index;
20
+ int index;
22
21
  int flags; // TODO
23
22
 
24
23
  int nread;
@@ -37,25 +36,25 @@ public class HTTPParser {
37
36
  * Should be checked when http_parser_execute() returns in addition to
38
37
  * error checking.
39
38
  */
40
- public boolean upgrade;
39
+ public boolean upgrade;
41
40
 
42
41
  /** PUBLIC **/
43
42
  // TODO : this is used in c to maintain application state.
44
43
  // is this even necessary? we have state in java ?
45
- // consider
44
+ // consider
46
45
  // Object data; /* A pointer to get hook to the "connection" or "socket" object */
47
-
48
46
 
49
- /*
47
+
48
+ /*
50
49
  * technically we could combine all of these (except for url_mark) into one
51
50
  * variable, saving stack space, but it seems more clear to have them
52
- * separated.
51
+ * separated.
53
52
  */
54
53
  int header_field_mark = -1;
55
54
  int header_value_mark = -1;
56
55
  int url_mark = -1;
57
56
  int body_mark = -1;
58
-
57
+
59
58
  /**
60
59
  * Construct a Parser for ParserType.HTTP_BOTH, meaning it
61
60
  * determines whether it's parsing a request or a response.
@@ -63,9 +62,9 @@ public class HTTPParser {
63
62
  public HTTPParser() {
64
63
  this(ParserType.HTTP_BOTH);
65
64
  }
66
-
65
+
67
66
  /**
68
- * Construct a Parser and initialise it to parse either
67
+ * Construct a Parser and initialise it to parse either
69
68
  * requests or responses.
70
69
  */
71
70
  public HTTPParser(ParserType type) {
@@ -84,7 +83,7 @@ public class HTTPParser {
84
83
  throw new HTTPException("can't happen, invalid ParserType enum");
85
84
  }
86
85
  }
87
-
86
+
88
87
  /*
89
88
  * Utility to facilitate System.out.println style debugging (the way god intended)
90
89
  */
@@ -267,12 +266,12 @@ public class HTTPParser {
267
266
  int p = data.position();
268
267
  this.p_start = p; // this is used for pretty printing errors.
269
268
  // and returning the amount of processed bytes.
270
-
269
+
271
270
 
272
271
  // In case the headers don't provide information about the content
273
272
  // length, `execute` needs to be called with an empty buffer to
274
273
  // indicate that all the data has been send be the client/server,
275
- // else there is no way of knowing the message is complete.
274
+ // else there is no way of knowing the message is complete.
276
275
  int len = (data.limit() - data.position());
277
276
  if (0 == len) {
278
277
  // if (State.body_identity_eof == state) {
@@ -295,7 +294,7 @@ public class HTTPParser {
295
294
  }
296
295
  }
297
296
 
298
-
297
+
299
298
  // in case the _previous_ call to the parser only has data to get to
300
299
  // the middle of certain fields, we need to update marks to point at
301
300
  // the beginning of the current buffer.
@@ -375,13 +374,16 @@ public class HTTPParser {
375
374
 
376
375
  if (H == ch) {
377
376
  state = State.res_or_resp_H;
378
- settings.call_on_message_begin(this);
379
377
  } else {
380
378
  type = ParserType.HTTP_REQUEST;
381
- state = State.start_req;
382
- index = 1;
383
- reexecute = true;
379
+ method = start_req_method_assign(ch);
380
+ if (null == method) {
381
+ return error(settings, "invalid method", data);
382
+ }
383
+ index = 1;
384
+ state = State.req_method;
384
385
  }
386
+ settings.call_on_message_begin(this);
385
387
  break;
386
388
 
387
389
 
@@ -474,7 +476,7 @@ return error(settings, "Not a digit", data);
474
476
  return error(settings, "invalid http major version: ", data);
475
477
  }
476
478
  break;
477
-
479
+
478
480
  /* first digit of minor HTTP version */
479
481
  case res_first_http_minor:
480
482
  if (!isDigit(ch)) {
@@ -535,18 +537,22 @@ return error(settings, "not a valid status code", data);
535
537
  if (status_code > 999) {
536
538
  return error(settings, "ridiculous status code:", data);
537
539
  }
540
+
541
+ if (status_code > 99) {
542
+ settings.call_on_status_complete(this);
543
+ }
538
544
  break;
539
545
 
540
546
  case res_status:
541
547
  /* the human readable status. e.g. "NOT FOUND"
542
- * we are not humans so just ignore this
548
+ * we are not humans so just ignore this
543
549
  * we are not men, we are devo. */
544
550
 
545
551
  if (CR == ch) {
546
552
  state = State.res_line_almost_done;
547
553
  break;
548
554
  }
549
- if (LF == ch) {
555
+ if (LF == ch) {
550
556
  state = State.header_field_start;
551
557
  break;
552
558
  }
@@ -581,14 +587,14 @@ return error(settings, "not LF", data);
581
587
 
582
588
  settings.call_on_message_begin(this);
583
589
  break;
584
-
590
+
585
591
 
586
592
 
587
593
  case req_method:
588
594
  if (0 == ch) {
589
595
  return error(settings, "NULL in method", data);
590
596
  }
591
-
597
+
592
598
  byte [] arr = method.bytes;
593
599
 
594
600
  if (SPACE == ch && index == arr.length) {
@@ -637,7 +643,7 @@ return error(settings, "not LF", data);
637
643
 
638
644
  ++index;
639
645
  break;
640
-
646
+
641
647
 
642
648
 
643
649
  /******************* URL *******************/
@@ -687,7 +693,7 @@ return error(settings, "not LF", data);
687
693
  case req_fragment_start:
688
694
  case req_fragment:
689
695
  switch (ch) {
690
- case SPACE:
696
+ case SPACE:
691
697
  settings.call_on_url(this, data, url_mark, p-url_mark);
692
698
  settings.call_on_path(this, data, url_mark, p - url_mark);
693
699
  url_mark = -1;
@@ -781,7 +787,7 @@ return error(settings, "non digit in http major", data);
781
787
  return error(settings, "ridiculous http major", data);
782
788
  };
783
789
  break;
784
-
790
+
785
791
  /* first digit of minor HTTP version */
786
792
  case req_first_http_minor:
787
793
  if (!isDigit(ch)) {
@@ -811,11 +817,11 @@ return error(settings, "non digit in http minor", data);
811
817
  http_minor *= 10;
812
818
  http_minor += (int)ch - 0x30;
813
819
 
814
-
820
+
815
821
  if (http_minor > 999) {
816
822
  return error(settings, "ridiculous http minor", data);
817
823
  };
818
-
824
+
819
825
  break;
820
826
 
821
827
  /* end of request line */
@@ -860,7 +866,7 @@ return error(settings, "missing LF after request line", data);
860
866
  state = State.header_field;
861
867
 
862
868
  switch (c) {
863
- case C:
869
+ case C:
864
870
  header_state = HState.C;
865
871
  break;
866
872
 
@@ -888,7 +894,7 @@ return error(settings, "missing LF after request line", data);
888
894
  case header_field:
889
895
  {
890
896
  c = token(ch);
891
- if (0 != c) {
897
+ if (0 != c) {
892
898
  switch (header_state) {
893
899
  case general:
894
900
  break;
@@ -997,7 +1003,7 @@ return error(settings, "Unknown Header State", data);
997
1003
  if (CR == ch) {
998
1004
  state = State.header_almost_done;
999
1005
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1000
-
1006
+
1001
1007
  header_field_mark = -1;
1002
1008
  break;
1003
1009
  }
@@ -1005,7 +1011,7 @@ return error(settings, "Unknown Header State", data);
1005
1011
  if (ch == LF) {
1006
1012
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1007
1013
  header_field_mark = -1;
1008
-
1014
+
1009
1015
  state = State.header_field_start;
1010
1016
  break;
1011
1017
  }
@@ -1037,7 +1043,7 @@ return error(settings, "invalid header field", data);
1037
1043
  if (LF == ch) {
1038
1044
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1039
1045
  header_value_mark = -1;
1040
-
1046
+
1041
1047
  state = State.header_field_start;
1042
1048
  break;
1043
1049
  }
@@ -1063,7 +1069,7 @@ return error(settings, "invalid header field", data);
1063
1069
  case content_length:
1064
1070
  if (!isDigit(ch)) {
1065
1071
  return error(settings, "Content-Length not numeric", data);
1066
- }
1072
+ }
1067
1073
  content_length = (int)ch - 0x30;
1068
1074
  break;
1069
1075
 
@@ -1122,7 +1128,7 @@ return error(settings, "Shouldn't be here", data);
1122
1128
  }
1123
1129
  if (!isDigit(ch)) {
1124
1130
  return error(settings, "Content-Length not numeric", data);
1125
- }
1131
+ }
1126
1132
 
1127
1133
  long t = content_length;
1128
1134
  t *= 10;
@@ -1218,7 +1224,7 @@ return error(settings, "Content-Length not numeric", data);
1218
1224
  /* Here we call the headers_complete callback. This is somewhat
1219
1225
  * different than other callbacks because if the user returns 1, we
1220
1226
  * will interpret that as saying that this message has no body. This
1221
- * is needed for the annoying case of receiving a response to a HEAD
1227
+ * is needed for the annoying case of recieving a response to a HEAD
1222
1228
  * request.
1223
1229
  */
1224
1230
 
@@ -1236,27 +1242,27 @@ return error(settings, "Content-Length not numeric", data);
1236
1242
  * parsingHeader) implementation ...
1237
1243
  */
1238
1244
 
1239
- // /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1240
- // if (null != settings.on_headers_complete) {
1241
- // settings.call_on_headers_complete(this);
1242
- // //return;
1243
- // }
1244
-
1245
- if (null != settings.on_headers_complete) {
1246
- switch (settings.on_headers_complete.cb(this)) {
1247
- case 0:
1248
- break;
1249
-
1250
- case 1:
1251
- flags |= F_SKIPBODY;
1252
- break;
1253
-
1254
- default:
1255
- return error(settings, "HPE_CB_headers_complete", data); /* Error */
1256
- }
1257
- }
1258
- reexecute = true;
1259
- break;
1245
+ /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1246
+ if (null != settings.on_headers_complete) {
1247
+ settings.call_on_headers_complete(this);
1248
+ //return;
1249
+ }
1250
+
1251
+ // if (null != settings.on_headers_complete) {
1252
+ // switch (settings.on_headers_complete.cb(parser)) {
1253
+ // case 0:
1254
+ // break;
1255
+ //
1256
+ // case 1:
1257
+ // flags |= F_SKIPBODY;
1258
+ // break;
1259
+ //
1260
+ // default:
1261
+ // return p - data; /* Error */ // TODO // RuntimeException ?
1262
+ // }
1263
+ // }
1264
+ reexecute = true;
1265
+ break;
1260
1266
 
1261
1267
  case headers_done:
1262
1268
  if (strict && (LF != ch)) {
@@ -1316,7 +1322,6 @@ return error(settings, "Content-Length not numeric", data);
1316
1322
 
1317
1323
  if (content_length == 0) {
1318
1324
  state = message_done;
1319
- p += to_read;
1320
1325
  reexecute = true;
1321
1326
  }
1322
1327
  }
@@ -1344,7 +1349,7 @@ return error(settings, "Content-Length not numeric", data);
1344
1349
  case chunk_size_start:
1345
1350
  if (1 != this.nread) {
1346
1351
  return error(settings, "nread != 1 (chunking)", data);
1347
-
1352
+
1348
1353
  }
1349
1354
  if (0 == (flags & F_CHUNKED)) {
1350
1355
  return error(settings, "not chunked", data);
@@ -1401,7 +1406,7 @@ return error(settings, "not chunked", data);
1401
1406
  break;
1402
1407
  }
1403
1408
  break;
1404
-
1409
+
1405
1410
 
1406
1411
 
1407
1412
  case chunk_size_almost_done:
@@ -1469,12 +1474,12 @@ return error(settings, "chunk data terminated incorrectly, expected LF", data);
1469
1474
  state = State.chunk_size_start;
1470
1475
  break;
1471
1476
  /******************* Chunk *******************/
1472
-
1473
-
1474
-
1477
+
1478
+
1479
+
1475
1480
  default:
1476
1481
  return error(settings, "unhandled state", data);
1477
-
1482
+
1478
1483
  } // switch
1479
1484
  } // while
1480
1485
 
@@ -1483,13 +1488,13 @@ return error(settings, "unhandled state", data);
1483
1488
 
1484
1489
  /* Reaching this point assumes that we only received part of a
1485
1490
  * message, inform the callbacks about the progress made so far*/
1486
-
1491
+
1487
1492
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1488
1493
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1489
1494
  settings.call_on_url (this, data, url_mark, p-url_mark);
1490
1495
  settings.call_on_path (this, data, url_mark, p-url_mark);
1491
1496
 
1492
- return data.position()-this.p_start;
1497
+ return data.position()-this.p_start;
1493
1498
  } // execute
1494
1499
 
1495
1500
  int error (ParserSettings settings, String mes, ByteBuffer data) {
@@ -1645,33 +1650,19 @@ return error(settings, "unhandled state", data);
1645
1650
  int strtoi(ByteBuffer data, int start_pos) {
1646
1651
  data.position(start_pos);
1647
1652
  byte ch;
1648
- int start = data.position();
1649
- int end = data.limit();
1653
+ String str = "";
1650
1654
  while(data.position() < data.limit()) {
1651
1655
  ch = data.get();
1652
1656
  if(Character.isWhitespace((char)ch)){
1653
- start++;
1654
1657
  continue;
1655
1658
  }
1656
1659
  if(isDigit(ch)){
1657
- continue;
1660
+ str = str + (char)ch; //TODO replace with something less hacky
1658
1661
  }else{
1659
- end = data.position() - 1;
1660
1662
  break;
1661
1663
  }
1662
1664
  }
1663
- byte[] s = new byte[end - start];
1664
- int cur_pos = data.position();
1665
- data.position(start);
1666
- data.get(s);
1667
- int result = 0;
1668
- try{
1669
- result = Integer.parseInt(new String(s, "UTF8"));
1670
- } catch (UnsupportedEncodingException e) {
1671
- result = 0;
1672
- }
1673
- data.position(cur_pos);
1674
- return result;
1665
+ return Integer.parseInt(str);
1675
1666
  }
1676
1667
 
1677
1668
  boolean isDigit(byte b) {
@@ -1727,18 +1718,18 @@ return error(settings, "unhandled state", data);
1727
1718
  HTTPMethod start_req_method_assign(byte c){
1728
1719
  switch (c) {
1729
1720
  case C: return HTTPMethod.HTTP_CONNECT; /* or COPY, CHECKOUT */
1730
- case D: return HTTPMethod.HTTP_DELETE;
1731
- case G: return HTTPMethod.HTTP_GET;
1732
- case H: return HTTPMethod.HTTP_HEAD;
1733
- case L: return HTTPMethod.HTTP_LOCK;
1721
+ case D: return HTTPMethod.HTTP_DELETE;
1722
+ case G: return HTTPMethod.HTTP_GET;
1723
+ case H: return HTTPMethod.HTTP_HEAD;
1724
+ case L: return HTTPMethod.HTTP_LOCK;
1734
1725
  case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */
1735
- case N: return HTTPMethod.HTTP_NOTIFY;
1736
- case O: return HTTPMethod.HTTP_OPTIONS;
1726
+ case N: return HTTPMethod.HTTP_NOTIFY;
1727
+ case O: return HTTPMethod.HTTP_OPTIONS;
1737
1728
  case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1738
1729
  case R: return HTTPMethod.HTTP_REPORT;
1739
1730
  case S: return HTTPMethod.HTTP_SUBSCRIBE;
1740
- case T: return HTTPMethod.HTTP_TRACE;
1741
- case U: return HTTPMethod.HTTP_UNLOCK; /* or UNSUBSCRIBE */
1731
+ case T: return HTTPMethod.HTTP_TRACE;
1732
+ case U: return HTTPMethod.HTTP_UNLOCK; /* or UNSUBSCRIBE */
1742
1733
  }
1743
1734
  return null; // ugh.
1744
1735
  }
@@ -1777,6 +1768,7 @@ return error(settings, "unhandled state", data);
1777
1768
  final int min (int a, long b) {
1778
1769
  return a < b ? a : (int)b;
1779
1770
  }
1771
+
1780
1772
  /* probably not the best place to hide this ... */
1781
1773
  public boolean HTTP_PARSER_STRICT;
1782
1774
  State new_message() {
@@ -1787,7 +1779,7 @@ return error(settings, "unhandled state", data);
1787
1779
  }
1788
1780
 
1789
1781
  }
1790
-
1782
+
1791
1783
  State start_state() {
1792
1784
  return type == ParserType.HTTP_REQUEST ? State.start_req : State.start_res;
1793
1785
  }
@@ -1838,28 +1830,28 @@ return error(settings, "unhandled state", data);
1838
1830
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1839
1831
  };
1840
1832
  static final byte [] CONNECTION = {
1841
- 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1833
+ 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1842
1834
  };
1843
1835
  static final byte [] PROXY_CONNECTION = {
1844
- 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1836
+ 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1845
1837
  };
1846
1838
  static final byte [] CONTENT_LENGTH = {
1847
- 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1839
+ 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1848
1840
  };
1849
1841
  static final byte [] TRANSFER_ENCODING = {
1850
- 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1842
+ 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1851
1843
  };
1852
1844
  static final byte [] UPGRADE = {
1853
- 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1845
+ 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1854
1846
  };
1855
1847
  static final byte [] CHUNKED = {
1856
- 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1848
+ 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1857
1849
  };
1858
1850
  static final byte [] KEEP_ALIVE = {
1859
- 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1851
+ 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1860
1852
  };
1861
1853
  static final byte [] CLOSE = {
1862
- 0x43, 0x4c, 0x4f, 0x53, 0x45,
1854
+ 0x43, 0x4c, 0x4f, 0x53, 0x45,
1863
1855
  };
1864
1856
 
1865
1857
  /* Tokens as defined by rfc 2616. Also lowercases them.
@@ -1973,6 +1965,29 @@ return error(settings, "unhandled state", data);
1973
1965
  true, true, true, true, true, true, true, true,
1974
1966
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1975
1967
  true, true, true, true, true, true, true, false,
1968
+
1969
+ /* hi bit set, not ascii */
1970
+ /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
1971
+ * encoded paths. This is out of spec, but clients generate this and most other
1972
+ * HTTP servers support it. We should, too. */
1973
+
1974
+ true, true, true, true, true, true, true, true,
1975
+ true, true, true, true, true, true, true, true,
1976
+ true, true, true, true, true, true, true, true,
1977
+ true, true, true, true, true, true, true, true,
1978
+ true, true, true, true, true, true, true, true,
1979
+ true, true, true, true, true, true, true, true,
1980
+ true, true, true, true, true, true, true, true,
1981
+ true, true, true, true, true, true, true, true,
1982
+ true, true, true, true, true, true, true, true,
1983
+ true, true, true, true, true, true, true, true,
1984
+ true, true, true, true, true, true, true, true,
1985
+ true, true, true, true, true, true, true, true,
1986
+ true, true, true, true, true, true, true, true,
1987
+ true, true, true, true, true, true, true, true,
1988
+ true, true, true, true, true, true, true, true,
1989
+ true, true, true, true, true, true, true, true,
1990
+
1976
1991
  };
1977
1992
 
1978
1993
  public static final byte A = 0x41;
@@ -2019,7 +2034,7 @@ return error(settings, "unhandled state", data);
2019
2034
 
2020
2035
  enum State {
2021
2036
 
2022
- dead
2037
+ dead
2023
2038
 
2024
2039
  , start_req_or_res
2025
2040
  , res_or_resp_H