http-parser-lite 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/README.md +1 -1
- data/ext/http-parser/http_parser.c +185 -82
- data/ext/http-parser/http_parser.h +3 -2
- data/lib/http-parser-lite.rb +1 -0
- data/test/test_http_parser.rb +13 -0
- metadata +5 -3
data/CHANGELOG
CHANGED
data/README.md
CHANGED
@@ -254,12 +254,8 @@ enum state
|
|
254
254
|
, s_req_schema_slash
|
255
255
|
, s_req_schema_slash_slash
|
256
256
|
, s_req_host_start
|
257
|
-
, s_req_host_v6_start
|
258
|
-
, s_req_host_v6
|
259
|
-
, s_req_host_v6_end
|
260
257
|
, s_req_host
|
261
|
-
,
|
262
|
-
, s_req_port
|
258
|
+
, s_req_host_with_at
|
263
259
|
, s_req_path
|
264
260
|
, s_req_query_string_start
|
265
261
|
, s_req_query_string
|
@@ -337,6 +333,19 @@ enum header_states
|
|
337
333
|
, h_connection_close
|
338
334
|
};
|
339
335
|
|
336
|
+
enum http_host_state
|
337
|
+
{
|
338
|
+
s_http_host_dead = 1
|
339
|
+
, s_http_userinfo_start
|
340
|
+
, s_http_userinfo
|
341
|
+
, s_http_host_start
|
342
|
+
, s_http_host_v6_start
|
343
|
+
, s_http_host
|
344
|
+
, s_http_host_v6
|
345
|
+
, s_http_host_v6_end
|
346
|
+
, s_http_host_port_start
|
347
|
+
, s_http_host_port
|
348
|
+
};
|
340
349
|
|
341
350
|
/* Macros for character classes; depends on strict-mode */
|
342
351
|
#define CR '\r'
|
@@ -346,6 +355,12 @@ enum header_states
|
|
346
355
|
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
|
347
356
|
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
|
348
357
|
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
|
358
|
+
#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
|
359
|
+
(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
|
360
|
+
(c) == ')')
|
361
|
+
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
|
362
|
+
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
|
363
|
+
(c) == '$' || (c) == ',')
|
349
364
|
|
350
365
|
#if HTTP_PARSER_STRICT
|
351
366
|
#define TOKEN(c) (tokens[(unsigned char)c])
|
@@ -455,62 +470,28 @@ parse_url_char(enum state s, const char ch)
|
|
455
470
|
|
456
471
|
break;
|
457
472
|
|
458
|
-
case
|
459
|
-
if (ch == '
|
460
|
-
return
|
461
|
-
}
|
462
|
-
|
463
|
-
if (IS_HOST_CHAR(ch)) {
|
464
|
-
return s_req_host;
|
473
|
+
case s_req_host_with_at:
|
474
|
+
if (ch == '@') {
|
475
|
+
return s_dead;
|
465
476
|
}
|
466
477
|
|
467
|
-
|
468
|
-
|
478
|
+
/* FALLTHROUGH */
|
479
|
+
case s_req_host_start:
|
469
480
|
case s_req_host:
|
470
|
-
if (
|
471
|
-
return
|
472
|
-
}
|
473
|
-
|
474
|
-
/* FALLTHROUGH */
|
475
|
-
case s_req_host_v6_end:
|
476
|
-
switch (ch) {
|
477
|
-
case ':':
|
478
|
-
return s_req_port_start;
|
479
|
-
|
480
|
-
case '/':
|
481
|
-
return s_req_path;
|
482
|
-
|
483
|
-
case '?':
|
484
|
-
return s_req_query_string_start;
|
485
|
-
}
|
486
|
-
|
487
|
-
break;
|
488
|
-
|
489
|
-
case s_req_host_v6:
|
490
|
-
if (ch == ']') {
|
491
|
-
return s_req_host_v6_end;
|
481
|
+
if (ch == '/') {
|
482
|
+
return s_req_path;
|
492
483
|
}
|
493
484
|
|
494
|
-
|
495
|
-
|
496
|
-
if (IS_HEX(ch) || ch == ':') {
|
497
|
-
return s_req_host_v6;
|
485
|
+
if (ch == '?') {
|
486
|
+
return s_req_query_string_start;
|
498
487
|
}
|
499
|
-
break;
|
500
|
-
|
501
|
-
case s_req_port:
|
502
|
-
switch (ch) {
|
503
|
-
case '/':
|
504
|
-
return s_req_path;
|
505
488
|
|
506
|
-
|
507
|
-
|
489
|
+
if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
|
490
|
+
return s_req_host;
|
508
491
|
}
|
509
492
|
|
510
|
-
|
511
|
-
|
512
|
-
if (IS_NUM(ch)) {
|
513
|
-
return s_req_port;
|
493
|
+
if (ch == '@') {
|
494
|
+
return s_req_host_with_at;
|
514
495
|
}
|
515
496
|
|
516
497
|
break;
|
@@ -633,12 +614,8 @@ size_t http_parser_execute (http_parser *parser,
|
|
633
614
|
case s_req_schema_slash:
|
634
615
|
case s_req_schema_slash_slash:
|
635
616
|
case s_req_host_start:
|
636
|
-
case s_req_host_v6_start:
|
637
|
-
case s_req_host_v6:
|
638
|
-
case s_req_host_v6_end:
|
639
617
|
case s_req_host:
|
640
|
-
case
|
641
|
-
case s_req_port:
|
618
|
+
case s_req_host_with_at:
|
642
619
|
case s_req_query_string_start:
|
643
620
|
case s_req_query_string:
|
644
621
|
case s_req_fragment_start:
|
@@ -1015,9 +992,6 @@ size_t http_parser_execute (http_parser *parser,
|
|
1015
992
|
case s_req_schema_slash:
|
1016
993
|
case s_req_schema_slash_slash:
|
1017
994
|
case s_req_host_start:
|
1018
|
-
case s_req_host_v6_start:
|
1019
|
-
case s_req_host_v6:
|
1020
|
-
case s_req_port_start:
|
1021
995
|
{
|
1022
996
|
switch (ch) {
|
1023
997
|
/* No whitespace allowed here */
|
@@ -1038,8 +1012,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
1038
1012
|
}
|
1039
1013
|
|
1040
1014
|
case s_req_host:
|
1041
|
-
case
|
1042
|
-
case s_req_port:
|
1015
|
+
case s_req_host_with_at:
|
1043
1016
|
case s_req_path:
|
1044
1017
|
case s_req_query_string_start:
|
1045
1018
|
case s_req_query_string:
|
@@ -1938,6 +1911,144 @@ http_errno_description(enum http_errno err) {
|
|
1938
1911
|
return http_strerror_tab[err].description;
|
1939
1912
|
}
|
1940
1913
|
|
1914
|
+
static enum http_host_state
|
1915
|
+
http_parse_host_char(enum http_host_state s, const char ch) {
|
1916
|
+
switch(s) {
|
1917
|
+
case s_http_userinfo:
|
1918
|
+
case s_http_userinfo_start:
|
1919
|
+
if (ch == '@') {
|
1920
|
+
return s_http_host_start;
|
1921
|
+
}
|
1922
|
+
|
1923
|
+
if (IS_USERINFO_CHAR(ch)) {
|
1924
|
+
return s_http_userinfo;
|
1925
|
+
}
|
1926
|
+
break;
|
1927
|
+
|
1928
|
+
case s_http_host_start:
|
1929
|
+
if (ch == '[') {
|
1930
|
+
return s_http_host_v6_start;
|
1931
|
+
}
|
1932
|
+
|
1933
|
+
if (IS_HOST_CHAR(ch)) {
|
1934
|
+
return s_http_host;
|
1935
|
+
}
|
1936
|
+
|
1937
|
+
break;
|
1938
|
+
|
1939
|
+
case s_http_host:
|
1940
|
+
if (IS_HOST_CHAR(ch)) {
|
1941
|
+
return s_http_host;
|
1942
|
+
}
|
1943
|
+
|
1944
|
+
/* FALLTHROUGH */
|
1945
|
+
case s_http_host_v6_end:
|
1946
|
+
if (ch == ':') {
|
1947
|
+
return s_http_host_port_start;
|
1948
|
+
}
|
1949
|
+
|
1950
|
+
break;
|
1951
|
+
|
1952
|
+
case s_http_host_v6:
|
1953
|
+
if (ch == ']') {
|
1954
|
+
return s_http_host_v6_end;
|
1955
|
+
}
|
1956
|
+
|
1957
|
+
/* FALLTHROUGH */
|
1958
|
+
case s_http_host_v6_start:
|
1959
|
+
if (IS_HEX(ch) || ch == ':') {
|
1960
|
+
return s_http_host_v6;
|
1961
|
+
}
|
1962
|
+
|
1963
|
+
break;
|
1964
|
+
|
1965
|
+
case s_http_host_port:
|
1966
|
+
case s_http_host_port_start:
|
1967
|
+
if (IS_NUM(ch)) {
|
1968
|
+
return s_http_host_port;
|
1969
|
+
}
|
1970
|
+
|
1971
|
+
break;
|
1972
|
+
|
1973
|
+
default:
|
1974
|
+
break;
|
1975
|
+
}
|
1976
|
+
return s_http_host_dead;
|
1977
|
+
}
|
1978
|
+
|
1979
|
+
static int
|
1980
|
+
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
|
1981
|
+
enum http_host_state s;
|
1982
|
+
|
1983
|
+
const char *p;
|
1984
|
+
size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
|
1985
|
+
|
1986
|
+
u->field_data[UF_HOST].len = 0;
|
1987
|
+
|
1988
|
+
s = found_at ? s_http_userinfo_start : s_http_host_start;
|
1989
|
+
|
1990
|
+
for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
|
1991
|
+
enum http_host_state new_s = http_parse_host_char(s, *p);
|
1992
|
+
|
1993
|
+
if (new_s == s_http_host_dead) {
|
1994
|
+
return 1;
|
1995
|
+
}
|
1996
|
+
|
1997
|
+
switch(new_s) {
|
1998
|
+
case s_http_host:
|
1999
|
+
if (s != s_http_host) {
|
2000
|
+
u->field_data[UF_HOST].off = p - buf;
|
2001
|
+
}
|
2002
|
+
u->field_data[UF_HOST].len ++;
|
2003
|
+
break;
|
2004
|
+
|
2005
|
+
case s_http_host_v6:
|
2006
|
+
if (s != s_http_host_v6) {
|
2007
|
+
u->field_data[UF_HOST].off = p - buf;
|
2008
|
+
}
|
2009
|
+
u->field_data[UF_HOST].len ++;
|
2010
|
+
break;
|
2011
|
+
|
2012
|
+
case s_http_host_port:
|
2013
|
+
if (s != s_http_host_port) {
|
2014
|
+
u->field_data[UF_PORT].off = p - buf;
|
2015
|
+
u->field_data[UF_PORT].len = 0;
|
2016
|
+
u->field_set |= (1 << UF_PORT);
|
2017
|
+
}
|
2018
|
+
u->field_data[UF_PORT].len ++;
|
2019
|
+
break;
|
2020
|
+
|
2021
|
+
case s_http_userinfo:
|
2022
|
+
if (s != s_http_userinfo) {
|
2023
|
+
u->field_data[UF_USERINFO].off = p - buf ;
|
2024
|
+
u->field_data[UF_USERINFO].len = 0;
|
2025
|
+
u->field_set |= (1 << UF_USERINFO);
|
2026
|
+
}
|
2027
|
+
u->field_data[UF_USERINFO].len ++;
|
2028
|
+
break;
|
2029
|
+
|
2030
|
+
default:
|
2031
|
+
break;
|
2032
|
+
}
|
2033
|
+
s = new_s;
|
2034
|
+
}
|
2035
|
+
|
2036
|
+
/* Make sure we don't end somewhere unexpected */
|
2037
|
+
switch (s) {
|
2038
|
+
case s_http_host_start:
|
2039
|
+
case s_http_host_v6_start:
|
2040
|
+
case s_http_host_v6:
|
2041
|
+
case s_http_host_port_start:
|
2042
|
+
case s_http_userinfo:
|
2043
|
+
case s_http_userinfo_start:
|
2044
|
+
return 1;
|
2045
|
+
default:
|
2046
|
+
break;
|
2047
|
+
}
|
2048
|
+
|
2049
|
+
return 0;
|
2050
|
+
}
|
2051
|
+
|
1941
2052
|
int
|
1942
2053
|
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
1943
2054
|
struct http_parser_url *u)
|
@@ -1945,6 +2056,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1945
2056
|
enum state s;
|
1946
2057
|
const char *p;
|
1947
2058
|
enum http_parser_url_fields uf, old_uf;
|
2059
|
+
int found_at = 0;
|
1948
2060
|
|
1949
2061
|
u->port = u->field_set = 0;
|
1950
2062
|
s = is_connect ? s_req_host_start : s_req_spaces_before_url;
|
@@ -1962,9 +2074,6 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1962
2074
|
case s_req_schema_slash:
|
1963
2075
|
case s_req_schema_slash_slash:
|
1964
2076
|
case s_req_host_start:
|
1965
|
-
case s_req_host_v6_start:
|
1966
|
-
case s_req_host_v6_end:
|
1967
|
-
case s_req_port_start:
|
1968
2077
|
case s_req_query_string_start:
|
1969
2078
|
case s_req_fragment_start:
|
1970
2079
|
continue;
|
@@ -1973,15 +2082,14 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1973
2082
|
uf = UF_SCHEMA;
|
1974
2083
|
break;
|
1975
2084
|
|
2085
|
+
case s_req_host_with_at:
|
2086
|
+
found_at = 1;
|
2087
|
+
|
2088
|
+
/* FALLTROUGH */
|
1976
2089
|
case s_req_host:
|
1977
|
-
case s_req_host_v6:
|
1978
2090
|
uf = UF_HOST;
|
1979
2091
|
break;
|
1980
2092
|
|
1981
|
-
case s_req_port:
|
1982
|
-
uf = UF_PORT;
|
1983
|
-
break;
|
1984
|
-
|
1985
2093
|
case s_req_path:
|
1986
2094
|
uf = UF_PATH;
|
1987
2095
|
break;
|
@@ -2012,21 +2120,16 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
2012
2120
|
old_uf = uf;
|
2013
2121
|
}
|
2014
2122
|
|
2015
|
-
/*
|
2016
|
-
if (
|
2017
|
-
|
2123
|
+
/* host must be present if there is a schema */
|
2124
|
+
if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
|
2125
|
+
if (http_parse_host(buf, u, found_at) != 0) {
|
2126
|
+
return 1;
|
2127
|
+
}
|
2018
2128
|
}
|
2019
2129
|
|
2020
|
-
/*
|
2021
|
-
|
2022
|
-
case s_req_host_v6_start:
|
2023
|
-
case s_req_host_v6:
|
2024
|
-
case s_req_host_v6_end:
|
2025
|
-
case s_req_host:
|
2026
|
-
case s_req_port_start:
|
2130
|
+
/* CONNECT requests can only contain "hostname:port" */
|
2131
|
+
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
|
2027
2132
|
return 1;
|
2028
|
-
default:
|
2029
|
-
break;
|
2030
2133
|
}
|
2031
2134
|
|
2032
2135
|
if (u->field_set & (1 << UF_PORT)) {
|
@@ -141,7 +141,7 @@ enum flags
|
|
141
141
|
|
142
142
|
|
143
143
|
/* Map for errno-related constants
|
144
|
-
*
|
144
|
+
*
|
145
145
|
* The provided argument should be a macro that takes 2 arguments.
|
146
146
|
*/
|
147
147
|
#define HTTP_ERRNO_MAP(XX) \
|
@@ -256,7 +256,8 @@ enum http_parser_url_fields
|
|
256
256
|
, UF_PATH = 3
|
257
257
|
, UF_QUERY = 4
|
258
258
|
, UF_FRAGMENT = 5
|
259
|
-
,
|
259
|
+
, UF_USERINFO = 6
|
260
|
+
, UF_MAX = 7
|
260
261
|
};
|
261
262
|
|
262
263
|
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'http-parser'
|
data/test/test_http_parser.rb
CHANGED
@@ -154,4 +154,17 @@ describe 'http-parser' do
|
|
154
154
|
assert !parser.error?
|
155
155
|
assert !parser.error
|
156
156
|
end
|
157
|
+
|
158
|
+
it 'should parser urls with user:pass' do
|
159
|
+
parser.reset(HTTP::Parser::TYPE_REQUEST)
|
160
|
+
|
161
|
+
url = 'http://foo:bar@example.org/test.cgi?param1=1'
|
162
|
+
data = []
|
163
|
+
parser.on_url {|url| data << url}
|
164
|
+
|
165
|
+
parser << "GET #{url} HTTP/1.0\r\n\r\n"
|
166
|
+
|
167
|
+
assert !parser.error?
|
168
|
+
assert_equal url, data.first
|
169
|
+
end
|
157
170
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http-parser-lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -35,13 +35,14 @@ extensions:
|
|
35
35
|
- ext/http-parser/extconf.rb
|
36
36
|
extra_rdoc_files: []
|
37
37
|
files:
|
38
|
-
- ext/http-parser/http_parser.c
|
39
38
|
- ext/http-parser/ruby_http_parser.c
|
39
|
+
- ext/http-parser/http_parser.c
|
40
40
|
- ext/http-parser/http_parser.h
|
41
41
|
- ext/http-parser/extconf.rb
|
42
42
|
- test/helper.rb
|
43
43
|
- test/test_http_parser.rb
|
44
44
|
- lib/http-parser.rb
|
45
|
+
- lib/http-parser-lite.rb
|
45
46
|
- README.md
|
46
47
|
- CHANGELOG
|
47
48
|
homepage: http://github.com/deepfryed/http-parser-lite
|
@@ -70,3 +71,4 @@ signing_key:
|
|
70
71
|
specification_version: 3
|
71
72
|
summary: Simple wrapper around Joyent http-parser
|
72
73
|
test_files: []
|
74
|
+
has_rdoc:
|