http-parser-lite 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ == 0.5.0 (2012-07-21)
2
+
3
+ * merged in pull/118 upstream to support url parsing with username and password.
4
+
1
5
  == 0.4.0 (2012-06-30)
2
6
 
3
7
  * added missing on_headers_complete callback.
data/README.md CHANGED
@@ -22,7 +22,7 @@ parser.on_message_complete do
22
22
  end
23
23
 
24
24
  parser.on_headers_complete do
25
- puts "value: #{value}"
25
+ puts "headers complete"
26
26
  end
27
27
 
28
28
  parser.on_url do |url|
@@ -254,12 +254,8 @@ enum state
254
254
  , s_req_schema_slash
255
255
  , s_req_schema_slash_slash
256
256
  , s_req_host_start
257
- , s_req_host_v6_start
258
- , s_req_host_v6
259
- , s_req_host_v6_end
260
257
  , s_req_host
261
- , s_req_port_start
262
- , s_req_port
258
+ , s_req_host_with_at
263
259
  , s_req_path
264
260
  , s_req_query_string_start
265
261
  , s_req_query_string
@@ -337,6 +333,19 @@ enum header_states
337
333
  , h_connection_close
338
334
  };
339
335
 
336
+ enum http_host_state
337
+ {
338
+ s_http_host_dead = 1
339
+ , s_http_userinfo_start
340
+ , s_http_userinfo
341
+ , s_http_host_start
342
+ , s_http_host_v6_start
343
+ , s_http_host
344
+ , s_http_host_v6
345
+ , s_http_host_v6_end
346
+ , s_http_host_port_start
347
+ , s_http_host_port
348
+ };
340
349
 
341
350
  /* Macros for character classes; depends on strict-mode */
342
351
  #define CR '\r'
@@ -346,6 +355,12 @@ enum header_states
346
355
  #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
347
356
  #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
348
357
  #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
358
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
359
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
360
+ (c) == ')')
361
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
362
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
363
+ (c) == '$' || (c) == ',')
349
364
 
350
365
  #if HTTP_PARSER_STRICT
351
366
  #define TOKEN(c) (tokens[(unsigned char)c])
@@ -455,62 +470,28 @@ parse_url_char(enum state s, const char ch)
455
470
 
456
471
  break;
457
472
 
458
- case s_req_host_start:
459
- if (ch == '[') {
460
- return s_req_host_v6_start;
461
- }
462
-
463
- if (IS_HOST_CHAR(ch)) {
464
- return s_req_host;
473
+ case s_req_host_with_at:
474
+ if (ch == '@') {
475
+ return s_dead;
465
476
  }
466
477
 
467
- break;
468
-
478
+ /* FALLTHROUGH */
479
+ case s_req_host_start:
469
480
  case s_req_host:
470
- if (IS_HOST_CHAR(ch)) {
471
- return s_req_host;
472
- }
473
-
474
- /* FALLTHROUGH */
475
- case s_req_host_v6_end:
476
- switch (ch) {
477
- case ':':
478
- return s_req_port_start;
479
-
480
- case '/':
481
- return s_req_path;
482
-
483
- case '?':
484
- return s_req_query_string_start;
485
- }
486
-
487
- break;
488
-
489
- case s_req_host_v6:
490
- if (ch == ']') {
491
- return s_req_host_v6_end;
481
+ if (ch == '/') {
482
+ return s_req_path;
492
483
  }
493
484
 
494
- /* FALLTHROUGH */
495
- case s_req_host_v6_start:
496
- if (IS_HEX(ch) || ch == ':') {
497
- return s_req_host_v6;
485
+ if (ch == '?') {
486
+ return s_req_query_string_start;
498
487
  }
499
- break;
500
-
501
- case s_req_port:
502
- switch (ch) {
503
- case '/':
504
- return s_req_path;
505
488
 
506
- case '?':
507
- return s_req_query_string_start;
489
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
490
+ return s_req_host;
508
491
  }
509
492
 
510
- /* FALLTHROUGH */
511
- case s_req_port_start:
512
- if (IS_NUM(ch)) {
513
- return s_req_port;
493
+ if (ch == '@') {
494
+ return s_req_host_with_at;
514
495
  }
515
496
 
516
497
  break;
@@ -633,12 +614,8 @@ size_t http_parser_execute (http_parser *parser,
633
614
  case s_req_schema_slash:
634
615
  case s_req_schema_slash_slash:
635
616
  case s_req_host_start:
636
- case s_req_host_v6_start:
637
- case s_req_host_v6:
638
- case s_req_host_v6_end:
639
617
  case s_req_host:
640
- case s_req_port_start:
641
- case s_req_port:
618
+ case s_req_host_with_at:
642
619
  case s_req_query_string_start:
643
620
  case s_req_query_string:
644
621
  case s_req_fragment_start:
@@ -1015,9 +992,6 @@ size_t http_parser_execute (http_parser *parser,
1015
992
  case s_req_schema_slash:
1016
993
  case s_req_schema_slash_slash:
1017
994
  case s_req_host_start:
1018
- case s_req_host_v6_start:
1019
- case s_req_host_v6:
1020
- case s_req_port_start:
1021
995
  {
1022
996
  switch (ch) {
1023
997
  /* No whitespace allowed here */
@@ -1038,8 +1012,7 @@ size_t http_parser_execute (http_parser *parser,
1038
1012
  }
1039
1013
 
1040
1014
  case s_req_host:
1041
- case s_req_host_v6_end:
1042
- case s_req_port:
1015
+ case s_req_host_with_at:
1043
1016
  case s_req_path:
1044
1017
  case s_req_query_string_start:
1045
1018
  case s_req_query_string:
@@ -1938,6 +1911,144 @@ http_errno_description(enum http_errno err) {
1938
1911
  return http_strerror_tab[err].description;
1939
1912
  }
1940
1913
 
1914
+ static enum http_host_state
1915
+ http_parse_host_char(enum http_host_state s, const char ch) {
1916
+ switch(s) {
1917
+ case s_http_userinfo:
1918
+ case s_http_userinfo_start:
1919
+ if (ch == '@') {
1920
+ return s_http_host_start;
1921
+ }
1922
+
1923
+ if (IS_USERINFO_CHAR(ch)) {
1924
+ return s_http_userinfo;
1925
+ }
1926
+ break;
1927
+
1928
+ case s_http_host_start:
1929
+ if (ch == '[') {
1930
+ return s_http_host_v6_start;
1931
+ }
1932
+
1933
+ if (IS_HOST_CHAR(ch)) {
1934
+ return s_http_host;
1935
+ }
1936
+
1937
+ break;
1938
+
1939
+ case s_http_host:
1940
+ if (IS_HOST_CHAR(ch)) {
1941
+ return s_http_host;
1942
+ }
1943
+
1944
+ /* FALLTHROUGH */
1945
+ case s_http_host_v6_end:
1946
+ if (ch == ':') {
1947
+ return s_http_host_port_start;
1948
+ }
1949
+
1950
+ break;
1951
+
1952
+ case s_http_host_v6:
1953
+ if (ch == ']') {
1954
+ return s_http_host_v6_end;
1955
+ }
1956
+
1957
+ /* FALLTHROUGH */
1958
+ case s_http_host_v6_start:
1959
+ if (IS_HEX(ch) || ch == ':') {
1960
+ return s_http_host_v6;
1961
+ }
1962
+
1963
+ break;
1964
+
1965
+ case s_http_host_port:
1966
+ case s_http_host_port_start:
1967
+ if (IS_NUM(ch)) {
1968
+ return s_http_host_port;
1969
+ }
1970
+
1971
+ break;
1972
+
1973
+ default:
1974
+ break;
1975
+ }
1976
+ return s_http_host_dead;
1977
+ }
1978
+
1979
+ static int
1980
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1981
+ enum http_host_state s;
1982
+
1983
+ const char *p;
1984
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1985
+
1986
+ u->field_data[UF_HOST].len = 0;
1987
+
1988
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
1989
+
1990
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
1991
+ enum http_host_state new_s = http_parse_host_char(s, *p);
1992
+
1993
+ if (new_s == s_http_host_dead) {
1994
+ return 1;
1995
+ }
1996
+
1997
+ switch(new_s) {
1998
+ case s_http_host:
1999
+ if (s != s_http_host) {
2000
+ u->field_data[UF_HOST].off = p - buf;
2001
+ }
2002
+ u->field_data[UF_HOST].len ++;
2003
+ break;
2004
+
2005
+ case s_http_host_v6:
2006
+ if (s != s_http_host_v6) {
2007
+ u->field_data[UF_HOST].off = p - buf;
2008
+ }
2009
+ u->field_data[UF_HOST].len ++;
2010
+ break;
2011
+
2012
+ case s_http_host_port:
2013
+ if (s != s_http_host_port) {
2014
+ u->field_data[UF_PORT].off = p - buf;
2015
+ u->field_data[UF_PORT].len = 0;
2016
+ u->field_set |= (1 << UF_PORT);
2017
+ }
2018
+ u->field_data[UF_PORT].len ++;
2019
+ break;
2020
+
2021
+ case s_http_userinfo:
2022
+ if (s != s_http_userinfo) {
2023
+ u->field_data[UF_USERINFO].off = p - buf ;
2024
+ u->field_data[UF_USERINFO].len = 0;
2025
+ u->field_set |= (1 << UF_USERINFO);
2026
+ }
2027
+ u->field_data[UF_USERINFO].len ++;
2028
+ break;
2029
+
2030
+ default:
2031
+ break;
2032
+ }
2033
+ s = new_s;
2034
+ }
2035
+
2036
+ /* Make sure we don't end somewhere unexpected */
2037
+ switch (s) {
2038
+ case s_http_host_start:
2039
+ case s_http_host_v6_start:
2040
+ case s_http_host_v6:
2041
+ case s_http_host_port_start:
2042
+ case s_http_userinfo:
2043
+ case s_http_userinfo_start:
2044
+ return 1;
2045
+ default:
2046
+ break;
2047
+ }
2048
+
2049
+ return 0;
2050
+ }
2051
+
1941
2052
  int
1942
2053
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1943
2054
  struct http_parser_url *u)
@@ -1945,6 +2056,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1945
2056
  enum state s;
1946
2057
  const char *p;
1947
2058
  enum http_parser_url_fields uf, old_uf;
2059
+ int found_at = 0;
1948
2060
 
1949
2061
  u->port = u->field_set = 0;
1950
2062
  s = is_connect ? s_req_host_start : s_req_spaces_before_url;
@@ -1962,9 +2074,6 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1962
2074
  case s_req_schema_slash:
1963
2075
  case s_req_schema_slash_slash:
1964
2076
  case s_req_host_start:
1965
- case s_req_host_v6_start:
1966
- case s_req_host_v6_end:
1967
- case s_req_port_start:
1968
2077
  case s_req_query_string_start:
1969
2078
  case s_req_fragment_start:
1970
2079
  continue;
@@ -1973,15 +2082,14 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1973
2082
  uf = UF_SCHEMA;
1974
2083
  break;
1975
2084
 
2085
+ case s_req_host_with_at:
2086
+ found_at = 1;
2087
+
2088
+ /* FALLTROUGH */
1976
2089
  case s_req_host:
1977
- case s_req_host_v6:
1978
2090
  uf = UF_HOST;
1979
2091
  break;
1980
2092
 
1981
- case s_req_port:
1982
- uf = UF_PORT;
1983
- break;
1984
-
1985
2093
  case s_req_path:
1986
2094
  uf = UF_PATH;
1987
2095
  break;
@@ -2012,21 +2120,16 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2012
2120
  old_uf = uf;
2013
2121
  }
2014
2122
 
2015
- /* CONNECT requests can only contain "hostname:port" */
2016
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2017
- return 1;
2123
+ /* host must be present if there is a schema */
2124
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2125
+ if (http_parse_host(buf, u, found_at) != 0) {
2126
+ return 1;
2127
+ }
2018
2128
  }
2019
2129
 
2020
- /* Make sure we don't end somewhere unexpected */
2021
- switch (s) {
2022
- case s_req_host_v6_start:
2023
- case s_req_host_v6:
2024
- case s_req_host_v6_end:
2025
- case s_req_host:
2026
- case s_req_port_start:
2130
+ /* CONNECT requests can only contain "hostname:port" */
2131
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2027
2132
  return 1;
2028
- default:
2029
- break;
2030
2133
  }
2031
2134
 
2032
2135
  if (u->field_set & (1 << UF_PORT)) {
@@ -141,7 +141,7 @@ enum flags
141
141
 
142
142
 
143
143
  /* Map for errno-related constants
144
- *
144
+ *
145
145
  * The provided argument should be a macro that takes 2 arguments.
146
146
  */
147
147
  #define HTTP_ERRNO_MAP(XX) \
@@ -256,7 +256,8 @@ enum http_parser_url_fields
256
256
  , UF_PATH = 3
257
257
  , UF_QUERY = 4
258
258
  , UF_FRAGMENT = 5
259
- , UF_MAX = 6
259
+ , UF_USERINFO = 6
260
+ , UF_MAX = 7
260
261
  };
261
262
 
262
263
 
@@ -0,0 +1 @@
1
+ require 'http-parser'
@@ -154,4 +154,17 @@ describe 'http-parser' do
154
154
  assert !parser.error?
155
155
  assert !parser.error
156
156
  end
157
+
158
+ it 'should parser urls with user:pass' do
159
+ parser.reset(HTTP::Parser::TYPE_REQUEST)
160
+
161
+ url = 'http://foo:bar@example.org/test.cgi?param1=1'
162
+ data = []
163
+ parser.on_url {|url| data << url}
164
+
165
+ parser << "GET #{url} HTTP/1.0\r\n\r\n"
166
+
167
+ assert !parser.error?
168
+ assert_equal url, data.first
169
+ end
157
170
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http-parser-lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-30 00:00:00.000000000 Z
12
+ date: 2012-07-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -35,13 +35,14 @@ extensions:
35
35
  - ext/http-parser/extconf.rb
36
36
  extra_rdoc_files: []
37
37
  files:
38
- - ext/http-parser/http_parser.c
39
38
  - ext/http-parser/ruby_http_parser.c
39
+ - ext/http-parser/http_parser.c
40
40
  - ext/http-parser/http_parser.h
41
41
  - ext/http-parser/extconf.rb
42
42
  - test/helper.rb
43
43
  - test/test_http_parser.rb
44
44
  - lib/http-parser.rb
45
+ - lib/http-parser-lite.rb
45
46
  - README.md
46
47
  - CHANGELOG
47
48
  homepage: http://github.com/deepfryed/http-parser-lite
@@ -70,3 +71,4 @@ signing_key:
70
71
  specification_version: 3
71
72
  summary: Simple wrapper around Joyent http-parser
72
73
  test_files: []
74
+ has_rdoc: