http-parser-lite 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/README.md +1 -1
- data/ext/http-parser/http_parser.c +185 -82
- data/ext/http-parser/http_parser.h +3 -2
- data/lib/http-parser-lite.rb +1 -0
- data/test/test_http_parser.rb +13 -0
- metadata +5 -3
data/CHANGELOG
CHANGED
data/README.md
CHANGED
@@ -254,12 +254,8 @@ enum state
|
|
254
254
|
, s_req_schema_slash
|
255
255
|
, s_req_schema_slash_slash
|
256
256
|
, s_req_host_start
|
257
|
-
, s_req_host_v6_start
|
258
|
-
, s_req_host_v6
|
259
|
-
, s_req_host_v6_end
|
260
257
|
, s_req_host
|
261
|
-
,
|
262
|
-
, s_req_port
|
258
|
+
, s_req_host_with_at
|
263
259
|
, s_req_path
|
264
260
|
, s_req_query_string_start
|
265
261
|
, s_req_query_string
|
@@ -337,6 +333,19 @@ enum header_states
|
|
337
333
|
, h_connection_close
|
338
334
|
};
|
339
335
|
|
336
|
+
enum http_host_state
|
337
|
+
{
|
338
|
+
s_http_host_dead = 1
|
339
|
+
, s_http_userinfo_start
|
340
|
+
, s_http_userinfo
|
341
|
+
, s_http_host_start
|
342
|
+
, s_http_host_v6_start
|
343
|
+
, s_http_host
|
344
|
+
, s_http_host_v6
|
345
|
+
, s_http_host_v6_end
|
346
|
+
, s_http_host_port_start
|
347
|
+
, s_http_host_port
|
348
|
+
};
|
340
349
|
|
341
350
|
/* Macros for character classes; depends on strict-mode */
|
342
351
|
#define CR '\r'
|
@@ -346,6 +355,12 @@ enum header_states
|
|
346
355
|
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
|
347
356
|
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
|
348
357
|
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
|
358
|
+
#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
|
359
|
+
(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
|
360
|
+
(c) == ')')
|
361
|
+
#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
|
362
|
+
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
|
363
|
+
(c) == '$' || (c) == ',')
|
349
364
|
|
350
365
|
#if HTTP_PARSER_STRICT
|
351
366
|
#define TOKEN(c) (tokens[(unsigned char)c])
|
@@ -455,62 +470,28 @@ parse_url_char(enum state s, const char ch)
|
|
455
470
|
|
456
471
|
break;
|
457
472
|
|
458
|
-
case
|
459
|
-
if (ch == '
|
460
|
-
return
|
461
|
-
}
|
462
|
-
|
463
|
-
if (IS_HOST_CHAR(ch)) {
|
464
|
-
return s_req_host;
|
473
|
+
case s_req_host_with_at:
|
474
|
+
if (ch == '@') {
|
475
|
+
return s_dead;
|
465
476
|
}
|
466
477
|
|
467
|
-
|
468
|
-
|
478
|
+
/* FALLTHROUGH */
|
479
|
+
case s_req_host_start:
|
469
480
|
case s_req_host:
|
470
|
-
if (
|
471
|
-
return
|
472
|
-
}
|
473
|
-
|
474
|
-
/* FALLTHROUGH */
|
475
|
-
case s_req_host_v6_end:
|
476
|
-
switch (ch) {
|
477
|
-
case ':':
|
478
|
-
return s_req_port_start;
|
479
|
-
|
480
|
-
case '/':
|
481
|
-
return s_req_path;
|
482
|
-
|
483
|
-
case '?':
|
484
|
-
return s_req_query_string_start;
|
485
|
-
}
|
486
|
-
|
487
|
-
break;
|
488
|
-
|
489
|
-
case s_req_host_v6:
|
490
|
-
if (ch == ']') {
|
491
|
-
return s_req_host_v6_end;
|
481
|
+
if (ch == '/') {
|
482
|
+
return s_req_path;
|
492
483
|
}
|
493
484
|
|
494
|
-
|
495
|
-
|
496
|
-
if (IS_HEX(ch) || ch == ':') {
|
497
|
-
return s_req_host_v6;
|
485
|
+
if (ch == '?') {
|
486
|
+
return s_req_query_string_start;
|
498
487
|
}
|
499
|
-
break;
|
500
|
-
|
501
|
-
case s_req_port:
|
502
|
-
switch (ch) {
|
503
|
-
case '/':
|
504
|
-
return s_req_path;
|
505
488
|
|
506
|
-
|
507
|
-
|
489
|
+
if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
|
490
|
+
return s_req_host;
|
508
491
|
}
|
509
492
|
|
510
|
-
|
511
|
-
|
512
|
-
if (IS_NUM(ch)) {
|
513
|
-
return s_req_port;
|
493
|
+
if (ch == '@') {
|
494
|
+
return s_req_host_with_at;
|
514
495
|
}
|
515
496
|
|
516
497
|
break;
|
@@ -633,12 +614,8 @@ size_t http_parser_execute (http_parser *parser,
|
|
633
614
|
case s_req_schema_slash:
|
634
615
|
case s_req_schema_slash_slash:
|
635
616
|
case s_req_host_start:
|
636
|
-
case s_req_host_v6_start:
|
637
|
-
case s_req_host_v6:
|
638
|
-
case s_req_host_v6_end:
|
639
617
|
case s_req_host:
|
640
|
-
case
|
641
|
-
case s_req_port:
|
618
|
+
case s_req_host_with_at:
|
642
619
|
case s_req_query_string_start:
|
643
620
|
case s_req_query_string:
|
644
621
|
case s_req_fragment_start:
|
@@ -1015,9 +992,6 @@ size_t http_parser_execute (http_parser *parser,
|
|
1015
992
|
case s_req_schema_slash:
|
1016
993
|
case s_req_schema_slash_slash:
|
1017
994
|
case s_req_host_start:
|
1018
|
-
case s_req_host_v6_start:
|
1019
|
-
case s_req_host_v6:
|
1020
|
-
case s_req_port_start:
|
1021
995
|
{
|
1022
996
|
switch (ch) {
|
1023
997
|
/* No whitespace allowed here */
|
@@ -1038,8 +1012,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
1038
1012
|
}
|
1039
1013
|
|
1040
1014
|
case s_req_host:
|
1041
|
-
case
|
1042
|
-
case s_req_port:
|
1015
|
+
case s_req_host_with_at:
|
1043
1016
|
case s_req_path:
|
1044
1017
|
case s_req_query_string_start:
|
1045
1018
|
case s_req_query_string:
|
@@ -1938,6 +1911,144 @@ http_errno_description(enum http_errno err) {
|
|
1938
1911
|
return http_strerror_tab[err].description;
|
1939
1912
|
}
|
1940
1913
|
|
1914
|
+
static enum http_host_state
|
1915
|
+
http_parse_host_char(enum http_host_state s, const char ch) {
|
1916
|
+
switch(s) {
|
1917
|
+
case s_http_userinfo:
|
1918
|
+
case s_http_userinfo_start:
|
1919
|
+
if (ch == '@') {
|
1920
|
+
return s_http_host_start;
|
1921
|
+
}
|
1922
|
+
|
1923
|
+
if (IS_USERINFO_CHAR(ch)) {
|
1924
|
+
return s_http_userinfo;
|
1925
|
+
}
|
1926
|
+
break;
|
1927
|
+
|
1928
|
+
case s_http_host_start:
|
1929
|
+
if (ch == '[') {
|
1930
|
+
return s_http_host_v6_start;
|
1931
|
+
}
|
1932
|
+
|
1933
|
+
if (IS_HOST_CHAR(ch)) {
|
1934
|
+
return s_http_host;
|
1935
|
+
}
|
1936
|
+
|
1937
|
+
break;
|
1938
|
+
|
1939
|
+
case s_http_host:
|
1940
|
+
if (IS_HOST_CHAR(ch)) {
|
1941
|
+
return s_http_host;
|
1942
|
+
}
|
1943
|
+
|
1944
|
+
/* FALLTHROUGH */
|
1945
|
+
case s_http_host_v6_end:
|
1946
|
+
if (ch == ':') {
|
1947
|
+
return s_http_host_port_start;
|
1948
|
+
}
|
1949
|
+
|
1950
|
+
break;
|
1951
|
+
|
1952
|
+
case s_http_host_v6:
|
1953
|
+
if (ch == ']') {
|
1954
|
+
return s_http_host_v6_end;
|
1955
|
+
}
|
1956
|
+
|
1957
|
+
/* FALLTHROUGH */
|
1958
|
+
case s_http_host_v6_start:
|
1959
|
+
if (IS_HEX(ch) || ch == ':') {
|
1960
|
+
return s_http_host_v6;
|
1961
|
+
}
|
1962
|
+
|
1963
|
+
break;
|
1964
|
+
|
1965
|
+
case s_http_host_port:
|
1966
|
+
case s_http_host_port_start:
|
1967
|
+
if (IS_NUM(ch)) {
|
1968
|
+
return s_http_host_port;
|
1969
|
+
}
|
1970
|
+
|
1971
|
+
break;
|
1972
|
+
|
1973
|
+
default:
|
1974
|
+
break;
|
1975
|
+
}
|
1976
|
+
return s_http_host_dead;
|
1977
|
+
}
|
1978
|
+
|
1979
|
+
static int
|
1980
|
+
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
|
1981
|
+
enum http_host_state s;
|
1982
|
+
|
1983
|
+
const char *p;
|
1984
|
+
size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
|
1985
|
+
|
1986
|
+
u->field_data[UF_HOST].len = 0;
|
1987
|
+
|
1988
|
+
s = found_at ? s_http_userinfo_start : s_http_host_start;
|
1989
|
+
|
1990
|
+
for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
|
1991
|
+
enum http_host_state new_s = http_parse_host_char(s, *p);
|
1992
|
+
|
1993
|
+
if (new_s == s_http_host_dead) {
|
1994
|
+
return 1;
|
1995
|
+
}
|
1996
|
+
|
1997
|
+
switch(new_s) {
|
1998
|
+
case s_http_host:
|
1999
|
+
if (s != s_http_host) {
|
2000
|
+
u->field_data[UF_HOST].off = p - buf;
|
2001
|
+
}
|
2002
|
+
u->field_data[UF_HOST].len ++;
|
2003
|
+
break;
|
2004
|
+
|
2005
|
+
case s_http_host_v6:
|
2006
|
+
if (s != s_http_host_v6) {
|
2007
|
+
u->field_data[UF_HOST].off = p - buf;
|
2008
|
+
}
|
2009
|
+
u->field_data[UF_HOST].len ++;
|
2010
|
+
break;
|
2011
|
+
|
2012
|
+
case s_http_host_port:
|
2013
|
+
if (s != s_http_host_port) {
|
2014
|
+
u->field_data[UF_PORT].off = p - buf;
|
2015
|
+
u->field_data[UF_PORT].len = 0;
|
2016
|
+
u->field_set |= (1 << UF_PORT);
|
2017
|
+
}
|
2018
|
+
u->field_data[UF_PORT].len ++;
|
2019
|
+
break;
|
2020
|
+
|
2021
|
+
case s_http_userinfo:
|
2022
|
+
if (s != s_http_userinfo) {
|
2023
|
+
u->field_data[UF_USERINFO].off = p - buf ;
|
2024
|
+
u->field_data[UF_USERINFO].len = 0;
|
2025
|
+
u->field_set |= (1 << UF_USERINFO);
|
2026
|
+
}
|
2027
|
+
u->field_data[UF_USERINFO].len ++;
|
2028
|
+
break;
|
2029
|
+
|
2030
|
+
default:
|
2031
|
+
break;
|
2032
|
+
}
|
2033
|
+
s = new_s;
|
2034
|
+
}
|
2035
|
+
|
2036
|
+
/* Make sure we don't end somewhere unexpected */
|
2037
|
+
switch (s) {
|
2038
|
+
case s_http_host_start:
|
2039
|
+
case s_http_host_v6_start:
|
2040
|
+
case s_http_host_v6:
|
2041
|
+
case s_http_host_port_start:
|
2042
|
+
case s_http_userinfo:
|
2043
|
+
case s_http_userinfo_start:
|
2044
|
+
return 1;
|
2045
|
+
default:
|
2046
|
+
break;
|
2047
|
+
}
|
2048
|
+
|
2049
|
+
return 0;
|
2050
|
+
}
|
2051
|
+
|
1941
2052
|
int
|
1942
2053
|
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
1943
2054
|
struct http_parser_url *u)
|
@@ -1945,6 +2056,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1945
2056
|
enum state s;
|
1946
2057
|
const char *p;
|
1947
2058
|
enum http_parser_url_fields uf, old_uf;
|
2059
|
+
int found_at = 0;
|
1948
2060
|
|
1949
2061
|
u->port = u->field_set = 0;
|
1950
2062
|
s = is_connect ? s_req_host_start : s_req_spaces_before_url;
|
@@ -1962,9 +2074,6 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1962
2074
|
case s_req_schema_slash:
|
1963
2075
|
case s_req_schema_slash_slash:
|
1964
2076
|
case s_req_host_start:
|
1965
|
-
case s_req_host_v6_start:
|
1966
|
-
case s_req_host_v6_end:
|
1967
|
-
case s_req_port_start:
|
1968
2077
|
case s_req_query_string_start:
|
1969
2078
|
case s_req_fragment_start:
|
1970
2079
|
continue;
|
@@ -1973,15 +2082,14 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
1973
2082
|
uf = UF_SCHEMA;
|
1974
2083
|
break;
|
1975
2084
|
|
2085
|
+
case s_req_host_with_at:
|
2086
|
+
found_at = 1;
|
2087
|
+
|
2088
|
+
/* FALLTROUGH */
|
1976
2089
|
case s_req_host:
|
1977
|
-
case s_req_host_v6:
|
1978
2090
|
uf = UF_HOST;
|
1979
2091
|
break;
|
1980
2092
|
|
1981
|
-
case s_req_port:
|
1982
|
-
uf = UF_PORT;
|
1983
|
-
break;
|
1984
|
-
|
1985
2093
|
case s_req_path:
|
1986
2094
|
uf = UF_PATH;
|
1987
2095
|
break;
|
@@ -2012,21 +2120,16 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
|
2012
2120
|
old_uf = uf;
|
2013
2121
|
}
|
2014
2122
|
|
2015
|
-
/*
|
2016
|
-
if (
|
2017
|
-
|
2123
|
+
/* host must be present if there is a schema */
|
2124
|
+
if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
|
2125
|
+
if (http_parse_host(buf, u, found_at) != 0) {
|
2126
|
+
return 1;
|
2127
|
+
}
|
2018
2128
|
}
|
2019
2129
|
|
2020
|
-
/*
|
2021
|
-
|
2022
|
-
case s_req_host_v6_start:
|
2023
|
-
case s_req_host_v6:
|
2024
|
-
case s_req_host_v6_end:
|
2025
|
-
case s_req_host:
|
2026
|
-
case s_req_port_start:
|
2130
|
+
/* CONNECT requests can only contain "hostname:port" */
|
2131
|
+
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
|
2027
2132
|
return 1;
|
2028
|
-
default:
|
2029
|
-
break;
|
2030
2133
|
}
|
2031
2134
|
|
2032
2135
|
if (u->field_set & (1 << UF_PORT)) {
|
@@ -141,7 +141,7 @@ enum flags
|
|
141
141
|
|
142
142
|
|
143
143
|
/* Map for errno-related constants
|
144
|
-
*
|
144
|
+
*
|
145
145
|
* The provided argument should be a macro that takes 2 arguments.
|
146
146
|
*/
|
147
147
|
#define HTTP_ERRNO_MAP(XX) \
|
@@ -256,7 +256,8 @@ enum http_parser_url_fields
|
|
256
256
|
, UF_PATH = 3
|
257
257
|
, UF_QUERY = 4
|
258
258
|
, UF_FRAGMENT = 5
|
259
|
-
,
|
259
|
+
, UF_USERINFO = 6
|
260
|
+
, UF_MAX = 7
|
260
261
|
};
|
261
262
|
|
262
263
|
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'http-parser'
|
data/test/test_http_parser.rb
CHANGED
@@ -154,4 +154,17 @@ describe 'http-parser' do
|
|
154
154
|
assert !parser.error?
|
155
155
|
assert !parser.error
|
156
156
|
end
|
157
|
+
|
158
|
+
it 'should parser urls with user:pass' do
|
159
|
+
parser.reset(HTTP::Parser::TYPE_REQUEST)
|
160
|
+
|
161
|
+
url = 'http://foo:bar@example.org/test.cgi?param1=1'
|
162
|
+
data = []
|
163
|
+
parser.on_url {|url| data << url}
|
164
|
+
|
165
|
+
parser << "GET #{url} HTTP/1.0\r\n\r\n"
|
166
|
+
|
167
|
+
assert !parser.error?
|
168
|
+
assert_equal url, data.first
|
169
|
+
end
|
157
170
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http-parser-lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -35,13 +35,14 @@ extensions:
|
|
35
35
|
- ext/http-parser/extconf.rb
|
36
36
|
extra_rdoc_files: []
|
37
37
|
files:
|
38
|
-
- ext/http-parser/http_parser.c
|
39
38
|
- ext/http-parser/ruby_http_parser.c
|
39
|
+
- ext/http-parser/http_parser.c
|
40
40
|
- ext/http-parser/http_parser.h
|
41
41
|
- ext/http-parser/extconf.rb
|
42
42
|
- test/helper.rb
|
43
43
|
- test/test_http_parser.rb
|
44
44
|
- lib/http-parser.rb
|
45
|
+
- lib/http-parser-lite.rb
|
45
46
|
- README.md
|
46
47
|
- CHANGELOG
|
47
48
|
homepage: http://github.com/deepfryed/http-parser-lite
|
@@ -70,3 +71,4 @@ signing_key:
|
|
70
71
|
specification_version: 3
|
71
72
|
summary: Simple wrapper around Joyent http-parser
|
72
73
|
test_files: []
|
74
|
+
has_rdoc:
|