http_parser.rb 0.6.0.beta.1 → 0.6.0.beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
1
  package http_parser.lolevel;
2
2
 
3
- import java.io.UnsupportedEncodingException;
4
3
  import java.nio.ByteBuffer;
5
4
  import http_parser.HTTPException;
6
5
  import http_parser.HTTPMethod;
@@ -18,7 +17,7 @@ public class HTTPParser {
18
17
  HState header_state;
19
18
  boolean strict;
20
19
 
21
- int index;
20
+ int index;
22
21
  int flags; // TODO
23
22
 
24
23
  int nread;
@@ -37,25 +36,25 @@ public class HTTPParser {
37
36
  * Should be checked when http_parser_execute() returns in addition to
38
37
  * error checking.
39
38
  */
40
- public boolean upgrade;
39
+ public boolean upgrade;
41
40
 
42
41
  /** PUBLIC **/
43
42
  // TODO : this is used in c to maintain application state.
44
43
  // is this even necessary? we have state in java ?
45
- // consider
44
+ // consider
46
45
  // Object data; /* A pointer to get hook to the "connection" or "socket" object */
47
-
48
46
 
49
- /*
47
+
48
+ /*
50
49
  * technically we could combine all of these (except for url_mark) into one
51
50
  * variable, saving stack space, but it seems more clear to have them
52
- * separated.
51
+ * separated.
53
52
  */
54
53
  int header_field_mark = -1;
55
54
  int header_value_mark = -1;
56
55
  int url_mark = -1;
57
56
  int body_mark = -1;
58
-
57
+
59
58
  /**
60
59
  * Construct a Parser for ParserType.HTTP_BOTH, meaning it
61
60
  * determines whether it's parsing a request or a response.
@@ -63,9 +62,9 @@ public class HTTPParser {
63
62
  public HTTPParser() {
64
63
  this(ParserType.HTTP_BOTH);
65
64
  }
66
-
65
+
67
66
  /**
68
- * Construct a Parser and initialise it to parse either
67
+ * Construct a Parser and initialise it to parse either
69
68
  * requests or responses.
70
69
  */
71
70
  public HTTPParser(ParserType type) {
@@ -84,7 +83,7 @@ public class HTTPParser {
84
83
  throw new HTTPException("can't happen, invalid ParserType enum");
85
84
  }
86
85
  }
87
-
86
+
88
87
  /*
89
88
  * Utility to facilitate System.out.println style debugging (the way god intended)
90
89
  */
@@ -267,12 +266,12 @@ public class HTTPParser {
267
266
  int p = data.position();
268
267
  this.p_start = p; // this is used for pretty printing errors.
269
268
  // and returning the amount of processed bytes.
270
-
269
+
271
270
 
272
271
  // In case the headers don't provide information about the content
273
272
  // length, `execute` needs to be called with an empty buffer to
274
273
  // indicate that all the data has been send be the client/server,
275
- // else there is no way of knowing the message is complete.
274
+ // else there is no way of knowing the message is complete.
276
275
  int len = (data.limit() - data.position());
277
276
  if (0 == len) {
278
277
  // if (State.body_identity_eof == state) {
@@ -295,7 +294,7 @@ public class HTTPParser {
295
294
  }
296
295
  }
297
296
 
298
-
297
+
299
298
  // in case the _previous_ call to the parser only has data to get to
300
299
  // the middle of certain fields, we need to update marks to point at
301
300
  // the beginning of the current buffer.
@@ -375,13 +374,16 @@ public class HTTPParser {
375
374
 
376
375
  if (H == ch) {
377
376
  state = State.res_or_resp_H;
378
- settings.call_on_message_begin(this);
379
377
  } else {
380
378
  type = ParserType.HTTP_REQUEST;
381
- state = State.start_req;
382
- index = 1;
383
- reexecute = true;
379
+ method = start_req_method_assign(ch);
380
+ if (null == method) {
381
+ return error(settings, "invalid method", data);
382
+ }
383
+ index = 1;
384
+ state = State.req_method;
384
385
  }
386
+ settings.call_on_message_begin(this);
385
387
  break;
386
388
 
387
389
 
@@ -474,7 +476,7 @@ return error(settings, "Not a digit", data);
474
476
  return error(settings, "invalid http major version: ", data);
475
477
  }
476
478
  break;
477
-
479
+
478
480
  /* first digit of minor HTTP version */
479
481
  case res_first_http_minor:
480
482
  if (!isDigit(ch)) {
@@ -535,18 +537,22 @@ return error(settings, "not a valid status code", data);
535
537
  if (status_code > 999) {
536
538
  return error(settings, "ridiculous status code:", data);
537
539
  }
540
+
541
+ if (status_code > 99) {
542
+ settings.call_on_status_complete(this);
543
+ }
538
544
  break;
539
545
 
540
546
  case res_status:
541
547
  /* the human readable status. e.g. "NOT FOUND"
542
- * we are not humans so just ignore this
548
+ * we are not humans so just ignore this
543
549
  * we are not men, we are devo. */
544
550
 
545
551
  if (CR == ch) {
546
552
  state = State.res_line_almost_done;
547
553
  break;
548
554
  }
549
- if (LF == ch) {
555
+ if (LF == ch) {
550
556
  state = State.header_field_start;
551
557
  break;
552
558
  }
@@ -581,14 +587,14 @@ return error(settings, "not LF", data);
581
587
 
582
588
  settings.call_on_message_begin(this);
583
589
  break;
584
-
590
+
585
591
 
586
592
 
587
593
  case req_method:
588
594
  if (0 == ch) {
589
595
  return error(settings, "NULL in method", data);
590
596
  }
591
-
597
+
592
598
  byte [] arr = method.bytes;
593
599
 
594
600
  if (SPACE == ch && index == arr.length) {
@@ -637,7 +643,7 @@ return error(settings, "not LF", data);
637
643
 
638
644
  ++index;
639
645
  break;
640
-
646
+
641
647
 
642
648
 
643
649
  /******************* URL *******************/
@@ -687,7 +693,7 @@ return error(settings, "not LF", data);
687
693
  case req_fragment_start:
688
694
  case req_fragment:
689
695
  switch (ch) {
690
- case SPACE:
696
+ case SPACE:
691
697
  settings.call_on_url(this, data, url_mark, p-url_mark);
692
698
  settings.call_on_path(this, data, url_mark, p - url_mark);
693
699
  url_mark = -1;
@@ -781,7 +787,7 @@ return error(settings, "non digit in http major", data);
781
787
  return error(settings, "ridiculous http major", data);
782
788
  };
783
789
  break;
784
-
790
+
785
791
  /* first digit of minor HTTP version */
786
792
  case req_first_http_minor:
787
793
  if (!isDigit(ch)) {
@@ -811,11 +817,11 @@ return error(settings, "non digit in http minor", data);
811
817
  http_minor *= 10;
812
818
  http_minor += (int)ch - 0x30;
813
819
 
814
-
820
+
815
821
  if (http_minor > 999) {
816
822
  return error(settings, "ridiculous http minor", data);
817
823
  };
818
-
824
+
819
825
  break;
820
826
 
821
827
  /* end of request line */
@@ -860,7 +866,7 @@ return error(settings, "missing LF after request line", data);
860
866
  state = State.header_field;
861
867
 
862
868
  switch (c) {
863
- case C:
869
+ case C:
864
870
  header_state = HState.C;
865
871
  break;
866
872
 
@@ -888,7 +894,7 @@ return error(settings, "missing LF after request line", data);
888
894
  case header_field:
889
895
  {
890
896
  c = token(ch);
891
- if (0 != c) {
897
+ if (0 != c) {
892
898
  switch (header_state) {
893
899
  case general:
894
900
  break;
@@ -997,7 +1003,7 @@ return error(settings, "Unknown Header State", data);
997
1003
  if (CR == ch) {
998
1004
  state = State.header_almost_done;
999
1005
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1000
-
1006
+
1001
1007
  header_field_mark = -1;
1002
1008
  break;
1003
1009
  }
@@ -1005,7 +1011,7 @@ return error(settings, "Unknown Header State", data);
1005
1011
  if (ch == LF) {
1006
1012
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1007
1013
  header_field_mark = -1;
1008
-
1014
+
1009
1015
  state = State.header_field_start;
1010
1016
  break;
1011
1017
  }
@@ -1037,7 +1043,7 @@ return error(settings, "invalid header field", data);
1037
1043
  if (LF == ch) {
1038
1044
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1039
1045
  header_value_mark = -1;
1040
-
1046
+
1041
1047
  state = State.header_field_start;
1042
1048
  break;
1043
1049
  }
@@ -1063,7 +1069,7 @@ return error(settings, "invalid header field", data);
1063
1069
  case content_length:
1064
1070
  if (!isDigit(ch)) {
1065
1071
  return error(settings, "Content-Length not numeric", data);
1066
- }
1072
+ }
1067
1073
  content_length = (int)ch - 0x30;
1068
1074
  break;
1069
1075
 
@@ -1122,7 +1128,7 @@ return error(settings, "Shouldn't be here", data);
1122
1128
  }
1123
1129
  if (!isDigit(ch)) {
1124
1130
  return error(settings, "Content-Length not numeric", data);
1125
- }
1131
+ }
1126
1132
 
1127
1133
  long t = content_length;
1128
1134
  t *= 10;
@@ -1218,7 +1224,7 @@ return error(settings, "Content-Length not numeric", data);
1218
1224
  /* Here we call the headers_complete callback. This is somewhat
1219
1225
  * different than other callbacks because if the user returns 1, we
1220
1226
  * will interpret that as saying that this message has no body. This
1221
- * is needed for the annoying case of receiving a response to a HEAD
1227
+ * is needed for the annoying case of recieving a response to a HEAD
1222
1228
  * request.
1223
1229
  */
1224
1230
 
@@ -1236,27 +1242,27 @@ return error(settings, "Content-Length not numeric", data);
1236
1242
  * parsingHeader) implementation ...
1237
1243
  */
1238
1244
 
1239
- // /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1240
- // if (null != settings.on_headers_complete) {
1241
- // settings.call_on_headers_complete(this);
1242
- // //return;
1243
- // }
1244
-
1245
- if (null != settings.on_headers_complete) {
1246
- switch (settings.on_headers_complete.cb(this)) {
1247
- case 0:
1248
- break;
1249
-
1250
- case 1:
1251
- flags |= F_SKIPBODY;
1252
- break;
1253
-
1254
- default:
1255
- return error(settings, "HPE_CB_headers_complete", data); /* Error */
1256
- }
1257
- }
1258
- reexecute = true;
1259
- break;
1245
+ /*TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO */
1246
+ if (null != settings.on_headers_complete) {
1247
+ settings.call_on_headers_complete(this);
1248
+ //return;
1249
+ }
1250
+
1251
+ // if (null != settings.on_headers_complete) {
1252
+ // switch (settings.on_headers_complete.cb(parser)) {
1253
+ // case 0:
1254
+ // break;
1255
+ //
1256
+ // case 1:
1257
+ // flags |= F_SKIPBODY;
1258
+ // break;
1259
+ //
1260
+ // default:
1261
+ // return p - data; /* Error */ // TODO // RuntimeException ?
1262
+ // }
1263
+ // }
1264
+ reexecute = true;
1265
+ break;
1260
1266
 
1261
1267
  case headers_done:
1262
1268
  if (strict && (LF != ch)) {
@@ -1316,7 +1322,6 @@ return error(settings, "Content-Length not numeric", data);
1316
1322
 
1317
1323
  if (content_length == 0) {
1318
1324
  state = message_done;
1319
- p += to_read;
1320
1325
  reexecute = true;
1321
1326
  }
1322
1327
  }
@@ -1344,7 +1349,7 @@ return error(settings, "Content-Length not numeric", data);
1344
1349
  case chunk_size_start:
1345
1350
  if (1 != this.nread) {
1346
1351
  return error(settings, "nread != 1 (chunking)", data);
1347
-
1352
+
1348
1353
  }
1349
1354
  if (0 == (flags & F_CHUNKED)) {
1350
1355
  return error(settings, "not chunked", data);
@@ -1401,7 +1406,7 @@ return error(settings, "not chunked", data);
1401
1406
  break;
1402
1407
  }
1403
1408
  break;
1404
-
1409
+
1405
1410
 
1406
1411
 
1407
1412
  case chunk_size_almost_done:
@@ -1469,12 +1474,12 @@ return error(settings, "chunk data terminated incorrectly, expected LF", data);
1469
1474
  state = State.chunk_size_start;
1470
1475
  break;
1471
1476
  /******************* Chunk *******************/
1472
-
1473
-
1474
-
1477
+
1478
+
1479
+
1475
1480
  default:
1476
1481
  return error(settings, "unhandled state", data);
1477
-
1482
+
1478
1483
  } // switch
1479
1484
  } // while
1480
1485
 
@@ -1483,13 +1488,13 @@ return error(settings, "unhandled state", data);
1483
1488
 
1484
1489
  /* Reaching this point assumes that we only received part of a
1485
1490
  * message, inform the callbacks about the progress made so far*/
1486
-
1491
+
1487
1492
  settings.call_on_header_field(this, data, header_field_mark, p-header_field_mark);
1488
1493
  settings.call_on_header_value(this, data, header_value_mark, p-header_value_mark);
1489
1494
  settings.call_on_url (this, data, url_mark, p-url_mark);
1490
1495
  settings.call_on_path (this, data, url_mark, p-url_mark);
1491
1496
 
1492
- return data.position()-this.p_start;
1497
+ return data.position()-this.p_start;
1493
1498
  } // execute
1494
1499
 
1495
1500
  int error (ParserSettings settings, String mes, ByteBuffer data) {
@@ -1645,33 +1650,19 @@ return error(settings, "unhandled state", data);
1645
1650
  int strtoi(ByteBuffer data, int start_pos) {
1646
1651
  data.position(start_pos);
1647
1652
  byte ch;
1648
- int start = data.position();
1649
- int end = data.limit();
1653
+ String str = "";
1650
1654
  while(data.position() < data.limit()) {
1651
1655
  ch = data.get();
1652
1656
  if(Character.isWhitespace((char)ch)){
1653
- start++;
1654
1657
  continue;
1655
1658
  }
1656
1659
  if(isDigit(ch)){
1657
- continue;
1660
+ str = str + (char)ch; //TODO replace with something less hacky
1658
1661
  }else{
1659
- end = data.position() - 1;
1660
1662
  break;
1661
1663
  }
1662
1664
  }
1663
- byte[] s = new byte[end - start];
1664
- int cur_pos = data.position();
1665
- data.position(start);
1666
- data.get(s);
1667
- int result = 0;
1668
- try{
1669
- result = Integer.parseInt(new String(s, "UTF8"));
1670
- } catch (UnsupportedEncodingException e) {
1671
- result = 0;
1672
- }
1673
- data.position(cur_pos);
1674
- return result;
1665
+ return Integer.parseInt(str);
1675
1666
  }
1676
1667
 
1677
1668
  boolean isDigit(byte b) {
@@ -1727,18 +1718,18 @@ return error(settings, "unhandled state", data);
1727
1718
  HTTPMethod start_req_method_assign(byte c){
1728
1719
  switch (c) {
1729
1720
  case C: return HTTPMethod.HTTP_CONNECT; /* or COPY, CHECKOUT */
1730
- case D: return HTTPMethod.HTTP_DELETE;
1731
- case G: return HTTPMethod.HTTP_GET;
1732
- case H: return HTTPMethod.HTTP_HEAD;
1733
- case L: return HTTPMethod.HTTP_LOCK;
1721
+ case D: return HTTPMethod.HTTP_DELETE;
1722
+ case G: return HTTPMethod.HTTP_GET;
1723
+ case H: return HTTPMethod.HTTP_HEAD;
1724
+ case L: return HTTPMethod.HTTP_LOCK;
1734
1725
  case M: return HTTPMethod.HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */
1735
- case N: return HTTPMethod.HTTP_NOTIFY;
1736
- case O: return HTTPMethod.HTTP_OPTIONS;
1726
+ case N: return HTTPMethod.HTTP_NOTIFY;
1727
+ case O: return HTTPMethod.HTTP_OPTIONS;
1737
1728
  case P: return HTTPMethod.HTTP_POST; /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
1738
1729
  case R: return HTTPMethod.HTTP_REPORT;
1739
1730
  case S: return HTTPMethod.HTTP_SUBSCRIBE;
1740
- case T: return HTTPMethod.HTTP_TRACE;
1741
- case U: return HTTPMethod.HTTP_UNLOCK; /* or UNSUBSCRIBE */
1731
+ case T: return HTTPMethod.HTTP_TRACE;
1732
+ case U: return HTTPMethod.HTTP_UNLOCK; /* or UNSUBSCRIBE */
1742
1733
  }
1743
1734
  return null; // ugh.
1744
1735
  }
@@ -1777,6 +1768,7 @@ return error(settings, "unhandled state", data);
1777
1768
  final int min (int a, long b) {
1778
1769
  return a < b ? a : (int)b;
1779
1770
  }
1771
+
1780
1772
  /* probably not the best place to hide this ... */
1781
1773
  public boolean HTTP_PARSER_STRICT;
1782
1774
  State new_message() {
@@ -1787,7 +1779,7 @@ return error(settings, "unhandled state", data);
1787
1779
  }
1788
1780
 
1789
1781
  }
1790
-
1782
+
1791
1783
  State start_state() {
1792
1784
  return type == ParserType.HTTP_REQUEST ? State.start_req : State.start_res;
1793
1785
  }
@@ -1838,28 +1830,28 @@ return error(settings, "unhandled state", data);
1838
1830
  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1839
1831
  };
1840
1832
  static final byte [] CONNECTION = {
1841
- 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1833
+ 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1842
1834
  };
1843
1835
  static final byte [] PROXY_CONNECTION = {
1844
- 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1836
+ 0x50, 0x52, 0x4f, 0x58, 0x59, 0x2d, 0x43, 0x4f, 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e,
1845
1837
  };
1846
1838
  static final byte [] CONTENT_LENGTH = {
1847
- 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1839
+ 0x43, 0x4f, 0x4e, 0x54, 0x45, 0x4e, 0x54, 0x2d, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48,
1848
1840
  };
1849
1841
  static final byte [] TRANSFER_ENCODING = {
1850
- 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1842
+ 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x45, 0x52, 0x2d, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47,
1851
1843
  };
1852
1844
  static final byte [] UPGRADE = {
1853
- 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1845
+ 0x55, 0x50, 0x47, 0x52, 0x41, 0x44, 0x45,
1854
1846
  };
1855
1847
  static final byte [] CHUNKED = {
1856
- 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1848
+ 0x43, 0x48, 0x55, 0x4e, 0x4b, 0x45, 0x44,
1857
1849
  };
1858
1850
  static final byte [] KEEP_ALIVE = {
1859
- 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1851
+ 0x4b, 0x45, 0x45, 0x50, 0x2d, 0x41, 0x4c, 0x49, 0x56, 0x45,
1860
1852
  };
1861
1853
  static final byte [] CLOSE = {
1862
- 0x43, 0x4c, 0x4f, 0x53, 0x45,
1854
+ 0x43, 0x4c, 0x4f, 0x53, 0x45,
1863
1855
  };
1864
1856
 
1865
1857
  /* Tokens as defined by rfc 2616. Also lowercases them.
@@ -1973,6 +1965,29 @@ return error(settings, "unhandled state", data);
1973
1965
  true, true, true, true, true, true, true, true,
1974
1966
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1975
1967
  true, true, true, true, true, true, true, false,
1968
+
1969
+ /* hi bit set, not ascii */
1970
+ /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
1971
+ * encoded paths. This is out of spec, but clients generate this and most other
1972
+ * HTTP servers support it. We should, too. */
1973
+
1974
+ true, true, true, true, true, true, true, true,
1975
+ true, true, true, true, true, true, true, true,
1976
+ true, true, true, true, true, true, true, true,
1977
+ true, true, true, true, true, true, true, true,
1978
+ true, true, true, true, true, true, true, true,
1979
+ true, true, true, true, true, true, true, true,
1980
+ true, true, true, true, true, true, true, true,
1981
+ true, true, true, true, true, true, true, true,
1982
+ true, true, true, true, true, true, true, true,
1983
+ true, true, true, true, true, true, true, true,
1984
+ true, true, true, true, true, true, true, true,
1985
+ true, true, true, true, true, true, true, true,
1986
+ true, true, true, true, true, true, true, true,
1987
+ true, true, true, true, true, true, true, true,
1988
+ true, true, true, true, true, true, true, true,
1989
+ true, true, true, true, true, true, true, true,
1990
+
1976
1991
  };
1977
1992
 
1978
1993
  public static final byte A = 0x41;
@@ -2019,7 +2034,7 @@ return error(settings, "unhandled state", data);
2019
2034
 
2020
2035
  enum State {
2021
2036
 
2022
- dead
2037
+ dead
2023
2038
 
2024
2039
  , start_req_or_res
2025
2040
  , res_or_resp_H