http-parser-lite 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.5.0 (2012-07-21)
2
+
3
+ * merged in pull/118 upstream to support url parsing with username and password.
4
+
1
5
  == 0.4.0 (2012-06-30)
2
6
 
3
7
  * added missing on_headers_complete callback.
data/README.md CHANGED
@@ -22,7 +22,7 @@ parser.on_message_complete do
22
22
  end
23
23
 
24
24
  parser.on_headers_complete do
25
- puts "value: #{value}"
25
+ puts "headers complete"
26
26
  end
27
27
 
28
28
  parser.on_url do |url|
@@ -254,12 +254,8 @@ enum state
254
254
  , s_req_schema_slash
255
255
  , s_req_schema_slash_slash
256
256
  , s_req_host_start
257
- , s_req_host_v6_start
258
- , s_req_host_v6
259
- , s_req_host_v6_end
260
257
  , s_req_host
261
- , s_req_port_start
262
- , s_req_port
258
+ , s_req_host_with_at
263
259
  , s_req_path
264
260
  , s_req_query_string_start
265
261
  , s_req_query_string
@@ -337,6 +333,19 @@ enum header_states
337
333
  , h_connection_close
338
334
  };
339
335
 
336
+ enum http_host_state
337
+ {
338
+ s_http_host_dead = 1
339
+ , s_http_userinfo_start
340
+ , s_http_userinfo
341
+ , s_http_host_start
342
+ , s_http_host_v6_start
343
+ , s_http_host
344
+ , s_http_host_v6
345
+ , s_http_host_v6_end
346
+ , s_http_host_port_start
347
+ , s_http_host_port
348
+ };
340
349
 
341
350
  /* Macros for character classes; depends on strict-mode */
342
351
  #define CR '\r'
@@ -346,6 +355,12 @@ enum header_states
346
355
  #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
347
356
  #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
348
357
  #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
358
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
359
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
360
+ (c) == ')')
361
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
362
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
363
+ (c) == '$' || (c) == ',')
349
364
 
350
365
  #if HTTP_PARSER_STRICT
351
366
  #define TOKEN(c) (tokens[(unsigned char)c])
@@ -455,62 +470,28 @@ parse_url_char(enum state s, const char ch)
455
470
 
456
471
  break;
457
472
 
458
- case s_req_host_start:
459
- if (ch == '[') {
460
- return s_req_host_v6_start;
461
- }
462
-
463
- if (IS_HOST_CHAR(ch)) {
464
- return s_req_host;
473
+ case s_req_host_with_at:
474
+ if (ch == '@') {
475
+ return s_dead;
465
476
  }
466
477
 
467
- break;
468
-
478
+ /* FALLTHROUGH */
479
+ case s_req_host_start:
469
480
  case s_req_host:
470
- if (IS_HOST_CHAR(ch)) {
471
- return s_req_host;
472
- }
473
-
474
- /* FALLTHROUGH */
475
- case s_req_host_v6_end:
476
- switch (ch) {
477
- case ':':
478
- return s_req_port_start;
479
-
480
- case '/':
481
- return s_req_path;
482
-
483
- case '?':
484
- return s_req_query_string_start;
485
- }
486
-
487
- break;
488
-
489
- case s_req_host_v6:
490
- if (ch == ']') {
491
- return s_req_host_v6_end;
481
+ if (ch == '/') {
482
+ return s_req_path;
492
483
  }
493
484
 
494
- /* FALLTHROUGH */
495
- case s_req_host_v6_start:
496
- if (IS_HEX(ch) || ch == ':') {
497
- return s_req_host_v6;
485
+ if (ch == '?') {
486
+ return s_req_query_string_start;
498
487
  }
499
- break;
500
-
501
- case s_req_port:
502
- switch (ch) {
503
- case '/':
504
- return s_req_path;
505
488
 
506
- case '?':
507
- return s_req_query_string_start;
489
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
490
+ return s_req_host;
508
491
  }
509
492
 
510
- /* FALLTHROUGH */
511
- case s_req_port_start:
512
- if (IS_NUM(ch)) {
513
- return s_req_port;
493
+ if (ch == '@') {
494
+ return s_req_host_with_at;
514
495
  }
515
496
 
516
497
  break;
@@ -633,12 +614,8 @@ size_t http_parser_execute (http_parser *parser,
633
614
  case s_req_schema_slash:
634
615
  case s_req_schema_slash_slash:
635
616
  case s_req_host_start:
636
- case s_req_host_v6_start:
637
- case s_req_host_v6:
638
- case s_req_host_v6_end:
639
617
  case s_req_host:
640
- case s_req_port_start:
641
- case s_req_port:
618
+ case s_req_host_with_at:
642
619
  case s_req_query_string_start:
643
620
  case s_req_query_string:
644
621
  case s_req_fragment_start:
@@ -1015,9 +992,6 @@ size_t http_parser_execute (http_parser *parser,
1015
992
  case s_req_schema_slash:
1016
993
  case s_req_schema_slash_slash:
1017
994
  case s_req_host_start:
1018
- case s_req_host_v6_start:
1019
- case s_req_host_v6:
1020
- case s_req_port_start:
1021
995
  {
1022
996
  switch (ch) {
1023
997
  /* No whitespace allowed here */
@@ -1038,8 +1012,7 @@ size_t http_parser_execute (http_parser *parser,
1038
1012
  }
1039
1013
 
1040
1014
  case s_req_host:
1041
- case s_req_host_v6_end:
1042
- case s_req_port:
1015
+ case s_req_host_with_at:
1043
1016
  case s_req_path:
1044
1017
  case s_req_query_string_start:
1045
1018
  case s_req_query_string:
@@ -1938,6 +1911,144 @@ http_errno_description(enum http_errno err) {
1938
1911
  return http_strerror_tab[err].description;
1939
1912
  }
1940
1913
 
1914
+ static enum http_host_state
1915
+ http_parse_host_char(enum http_host_state s, const char ch) {
1916
+ switch(s) {
1917
+ case s_http_userinfo:
1918
+ case s_http_userinfo_start:
1919
+ if (ch == '@') {
1920
+ return s_http_host_start;
1921
+ }
1922
+
1923
+ if (IS_USERINFO_CHAR(ch)) {
1924
+ return s_http_userinfo;
1925
+ }
1926
+ break;
1927
+
1928
+ case s_http_host_start:
1929
+ if (ch == '[') {
1930
+ return s_http_host_v6_start;
1931
+ }
1932
+
1933
+ if (IS_HOST_CHAR(ch)) {
1934
+ return s_http_host;
1935
+ }
1936
+
1937
+ break;
1938
+
1939
+ case s_http_host:
1940
+ if (IS_HOST_CHAR(ch)) {
1941
+ return s_http_host;
1942
+ }
1943
+
1944
+ /* FALLTHROUGH */
1945
+ case s_http_host_v6_end:
1946
+ if (ch == ':') {
1947
+ return s_http_host_port_start;
1948
+ }
1949
+
1950
+ break;
1951
+
1952
+ case s_http_host_v6:
1953
+ if (ch == ']') {
1954
+ return s_http_host_v6_end;
1955
+ }
1956
+
1957
+ /* FALLTHROUGH */
1958
+ case s_http_host_v6_start:
1959
+ if (IS_HEX(ch) || ch == ':') {
1960
+ return s_http_host_v6;
1961
+ }
1962
+
1963
+ break;
1964
+
1965
+ case s_http_host_port:
1966
+ case s_http_host_port_start:
1967
+ if (IS_NUM(ch)) {
1968
+ return s_http_host_port;
1969
+ }
1970
+
1971
+ break;
1972
+
1973
+ default:
1974
+ break;
1975
+ }
1976
+ return s_http_host_dead;
1977
+ }
1978
+
1979
+ static int
1980
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1981
+ enum http_host_state s;
1982
+
1983
+ const char *p;
1984
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1985
+
1986
+ u->field_data[UF_HOST].len = 0;
1987
+
1988
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
1989
+
1990
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
1991
+ enum http_host_state new_s = http_parse_host_char(s, *p);
1992
+
1993
+ if (new_s == s_http_host_dead) {
1994
+ return 1;
1995
+ }
1996
+
1997
+ switch(new_s) {
1998
+ case s_http_host:
1999
+ if (s != s_http_host) {
2000
+ u->field_data[UF_HOST].off = p - buf;
2001
+ }
2002
+ u->field_data[UF_HOST].len ++;
2003
+ break;
2004
+
2005
+ case s_http_host_v6:
2006
+ if (s != s_http_host_v6) {
2007
+ u->field_data[UF_HOST].off = p - buf;
2008
+ }
2009
+ u->field_data[UF_HOST].len ++;
2010
+ break;
2011
+
2012
+ case s_http_host_port:
2013
+ if (s != s_http_host_port) {
2014
+ u->field_data[UF_PORT].off = p - buf;
2015
+ u->field_data[UF_PORT].len = 0;
2016
+ u->field_set |= (1 << UF_PORT);
2017
+ }
2018
+ u->field_data[UF_PORT].len ++;
2019
+ break;
2020
+
2021
+ case s_http_userinfo:
2022
+ if (s != s_http_userinfo) {
2023
+ u->field_data[UF_USERINFO].off = p - buf ;
2024
+ u->field_data[UF_USERINFO].len = 0;
2025
+ u->field_set |= (1 << UF_USERINFO);
2026
+ }
2027
+ u->field_data[UF_USERINFO].len ++;
2028
+ break;
2029
+
2030
+ default:
2031
+ break;
2032
+ }
2033
+ s = new_s;
2034
+ }
2035
+
2036
+ /* Make sure we don't end somewhere unexpected */
2037
+ switch (s) {
2038
+ case s_http_host_start:
2039
+ case s_http_host_v6_start:
2040
+ case s_http_host_v6:
2041
+ case s_http_host_port_start:
2042
+ case s_http_userinfo:
2043
+ case s_http_userinfo_start:
2044
+ return 1;
2045
+ default:
2046
+ break;
2047
+ }
2048
+
2049
+ return 0;
2050
+ }
2051
+
1941
2052
  int
1942
2053
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1943
2054
  struct http_parser_url *u)
@@ -1945,6 +2056,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1945
2056
  enum state s;
1946
2057
  const char *p;
1947
2058
  enum http_parser_url_fields uf, old_uf;
2059
+ int found_at = 0;
1948
2060
 
1949
2061
  u->port = u->field_set = 0;
1950
2062
  s = is_connect ? s_req_host_start : s_req_spaces_before_url;
@@ -1962,9 +2074,6 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1962
2074
  case s_req_schema_slash:
1963
2075
  case s_req_schema_slash_slash:
1964
2076
  case s_req_host_start:
1965
- case s_req_host_v6_start:
1966
- case s_req_host_v6_end:
1967
- case s_req_port_start:
1968
2077
  case s_req_query_string_start:
1969
2078
  case s_req_fragment_start:
1970
2079
  continue;
@@ -1973,15 +2082,14 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1973
2082
  uf = UF_SCHEMA;
1974
2083
  break;
1975
2084
 
2085
+ case s_req_host_with_at:
2086
+ found_at = 1;
2087
+
2088
+ /* FALLTROUGH */
1976
2089
  case s_req_host:
1977
- case s_req_host_v6:
1978
2090
  uf = UF_HOST;
1979
2091
  break;
1980
2092
 
1981
- case s_req_port:
1982
- uf = UF_PORT;
1983
- break;
1984
-
1985
2093
  case s_req_path:
1986
2094
  uf = UF_PATH;
1987
2095
  break;
@@ -2012,21 +2120,16 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2012
2120
  old_uf = uf;
2013
2121
  }
2014
2122
 
2015
- /* CONNECT requests can only contain "hostname:port" */
2016
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2017
- return 1;
2123
+ /* host must be present if there is a schema */
2124
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2125
+ if (http_parse_host(buf, u, found_at) != 0) {
2126
+ return 1;
2127
+ }
2018
2128
  }
2019
2129
 
2020
- /* Make sure we don't end somewhere unexpected */
2021
- switch (s) {
2022
- case s_req_host_v6_start:
2023
- case s_req_host_v6:
2024
- case s_req_host_v6_end:
2025
- case s_req_host:
2026
- case s_req_port_start:
2130
+ /* CONNECT requests can only contain "hostname:port" */
2131
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2027
2132
  return 1;
2028
- default:
2029
- break;
2030
2133
  }
2031
2134
 
2032
2135
  if (u->field_set & (1 << UF_PORT)) {
@@ -141,7 +141,7 @@ enum flags
141
141
 
142
142
 
143
143
  /* Map for errno-related constants
144
- *
144
+ *
145
145
  * The provided argument should be a macro that takes 2 arguments.
146
146
  */
147
147
  #define HTTP_ERRNO_MAP(XX) \
@@ -256,7 +256,8 @@ enum http_parser_url_fields
256
256
  , UF_PATH = 3
257
257
  , UF_QUERY = 4
258
258
  , UF_FRAGMENT = 5
259
- , UF_MAX = 6
259
+ , UF_USERINFO = 6
260
+ , UF_MAX = 7
260
261
  };
261
262
 
262
263
 
@@ -0,0 +1 @@
1
+ require 'http-parser'
@@ -154,4 +154,17 @@ describe 'http-parser' do
154
154
  assert !parser.error?
155
155
  assert !parser.error
156
156
  end
157
+
158
+ it 'should parser urls with user:pass' do
159
+ parser.reset(HTTP::Parser::TYPE_REQUEST)
160
+
161
+ url = 'http://foo:bar@example.org/test.cgi?param1=1'
162
+ data = []
163
+ parser.on_url {|url| data << url}
164
+
165
+ parser << "GET #{url} HTTP/1.0\r\n\r\n"
166
+
167
+ assert !parser.error?
168
+ assert_equal url, data.first
169
+ end
157
170
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http-parser-lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-30 00:00:00.000000000 Z
12
+ date: 2012-07-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -35,13 +35,14 @@ extensions:
35
35
  - ext/http-parser/extconf.rb
36
36
  extra_rdoc_files: []
37
37
  files:
38
- - ext/http-parser/http_parser.c
39
38
  - ext/http-parser/ruby_http_parser.c
39
+ - ext/http-parser/http_parser.c
40
40
  - ext/http-parser/http_parser.h
41
41
  - ext/http-parser/extconf.rb
42
42
  - test/helper.rb
43
43
  - test/test_http_parser.rb
44
44
  - lib/http-parser.rb
45
+ - lib/http-parser-lite.rb
45
46
  - README.md
46
47
  - CHANGELOG
47
48
  homepage: http://github.com/deepfryed/http-parser-lite
@@ -70,3 +71,4 @@ signing_key:
70
71
  specification_version: 3
71
72
  summary: Simple wrapper around Joyent http-parser
72
73
  test_files: []
74
+ has_rdoc: