uri_scanner 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +73 -0
- data/Rakefile +35 -0
- data/example/parse.rb +32 -0
- data/example/scanner.rb +14 -0
- data/lib/machines/ip_addr.rl +60 -0
- data/lib/machines/ruby_actions.rl +49 -0
- data/lib/machines/sip_uri.rl +52 -0
- data/lib/machines/uri.rl +92 -0
- data/lib/uri_scanner.rb +24 -0
- data/lib/uri_scanner/ip_address.rb +532 -0
- data/lib/uri_scanner/ip_address.rl +27 -0
- data/lib/uri_scanner/uri_parser.rb +10539 -0
- data/lib/uri_scanner/uri_parser.rl +44 -0
- data/lib/uri_scanner/uri_scanner.rb +1007 -0
- data/lib/uri_scanner/uri_scanner.rl +45 -0
- data/lib/uri_scanner/version.rb +3 -0
- data/spec/ip_addr_spec.rb +64 -0
- data/spec/scanner_spec.rb +40 -0
- data/spec/spec_helper.rb +96 -0
- data/spec/uri_scanner_spec.rb +43 -0
- data/spec/uri_spec.rb +185 -0
- data/spec/url.txt +156 -0
- data/uri_scanner.gemspec +23 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0c1cf86b6e9135a1fb326524b5069413b1f3bff9
|
4
|
+
data.tar.gz: 687da77c9318bbdb712e1bac6d1f164518526c47
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 33be598298215e59cb044256b6ccd25a3d40394a5bf0a23c9ea5d173938a095f6fbd8ae7bdf9bfb63fe3ef34202cfda1ed0b692187ec31ab6ad523ce0d25c547
|
7
|
+
data.tar.gz: 344bed1a5df1cc61cace654f3d3d34a875383654f0af5076eaf866db00920cbe4f592b3c286bd71087b1a936350762c1f8d4c9fa62b205ce1631771e58914852
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2016 Stas Kobzar
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
# UriScanner
|
2
|
+
|
3
|
+
Simple library that parses URI or scans input text for URIs.
|
4
|
+
RFC3986 compliant. SIP URIs parsing implemented following RFC3261.
|
5
|
+
|
6
|
+
This library is based on [Ragel State Machine Compiler](http://www.colm.net/open-source/ragel/).
|
7
|
+
Ragel is great software created by Dr. Adrian D. Thurston.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'uri_scanner'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install uri_scanner
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Start with:
|
28
|
+
```ruby
|
29
|
+
require 'uri_scanner'
|
30
|
+
```
|
31
|
+
There are only four core methods:
|
32
|
+
|
33
|
+
```scan```: Scans text and return array of found URIs
|
34
|
+
```ruby
|
35
|
+
URIScanner.scan(text)
|
36
|
+
```
|
37
|
+
|
38
|
+
```parse_uri```: Parses uri and return object that allows access to URI segments.
|
39
|
+
Raises ```URIParserError```
|
40
|
+
```ruby
|
41
|
+
uri = URIScanner.parse_uri(uri_string)
|
42
|
+
uri.scheme
|
43
|
+
uri.host
|
44
|
+
uri.port
|
45
|
+
uri.userinfo
|
46
|
+
uri.username
|
47
|
+
uri.password
|
48
|
+
uri.path
|
49
|
+
uri.query
|
50
|
+
uri.fragment
|
51
|
+
uri.param
|
52
|
+
uri.header
|
53
|
+
```
|
54
|
+
|
55
|
+
```scan_and_parse```: Same as ```scan```, but retruns array of parsed URI objects (see parse_uri)
|
56
|
+
```ruby
|
57
|
+
URIScanner.scan_and_parse(text)
|
58
|
+
```
|
59
|
+
|
60
|
+
```is_ip_valid?```: Additional methos that validates IPv4/IPv6 (RFC3986 ABNF)
|
61
|
+
```ruby
|
62
|
+
URIScanner.is_ip_valid?(ip_string)
|
63
|
+
```
|
64
|
+
|
65
|
+
Check folder "example".
|
66
|
+
|
67
|
+
## Contributing
|
68
|
+
|
69
|
+
1. Fork it ( https://github.com/[my-github-username]/uri_scanner/fork )
|
70
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
71
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
72
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
73
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'mkmf'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
require "bundler/gem_tasks"
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
7
|
+
|
8
|
+
task :default do
|
9
|
+
Rake::Task['ragel'].invoke
|
10
|
+
Rake::Task['spec'].invoke
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Build ragel machines into ruby classes."
|
14
|
+
task :ragel do
|
15
|
+
ragel = find_executable('ragel')
|
16
|
+
raise ArgumentError, "Ragel executable not found" unless ragel
|
17
|
+
Dir["lib/uri_scanner/*.rl"].each do |file|
|
18
|
+
sh "#{ragel} -R #{file}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "Create transition graphs."
|
23
|
+
task :graph do
|
24
|
+
format = "svg"
|
25
|
+
ragel = find_executable('ragel')
|
26
|
+
raise ArgumentError, "Ragel executable not found" unless ragel
|
27
|
+
dot = find_executable('dot')
|
28
|
+
raise ArgumentError, "Graphviz executable not found" unless dot
|
29
|
+
Dir["lib/uri_scanner/*.rl"].each do |file|
|
30
|
+
sh "#{ragel} -R #{file}"
|
31
|
+
sh "#{ragel} -Vp #{file} -o #{file}.dot"
|
32
|
+
sh "#{dot} #{file}.dot -T#{format} -o #{file}.#{format}"
|
33
|
+
sh "rm #{file}.dot"
|
34
|
+
end
|
35
|
+
end
|
data/example/parse.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Scan text and get all uris
|
4
|
+
# How to use
|
5
|
+
# >$ gem install uri_scanner
|
6
|
+
# >$ ruby example/parse.rb URL
|
7
|
+
#
|
8
|
+
# Try:
|
9
|
+
# ruby example/parse.rb "foo://user:pass@example.com:8042/over/there?name=ferret#nose"
|
10
|
+
# ruby example/parse.rb "sips:alice:secretW0rd@gateway.com:5061;transport=udp;user=phone;method=REGISTER?subject=sales%20meeting&priority=urgent&to=sales%40city.com"
|
11
|
+
#
|
12
|
+
|
13
|
+
require 'uri_scanner'
|
14
|
+
|
15
|
+
begin
|
16
|
+
raise "Usage: #{__FILE__} URI" unless ARGV.first
|
17
|
+
uri = URIScanner.parse_uri ARGV.first
|
18
|
+
puts "URI #{ARGV.first} segments:"
|
19
|
+
puts "scheme: #{uri.scheme}"
|
20
|
+
puts "host: #{uri.host}"
|
21
|
+
puts "port: #{uri.port}"
|
22
|
+
puts "userinfo: #{uri.userinfo}"
|
23
|
+
puts "username: #{uri.username}"
|
24
|
+
puts "password: #{uri.password}"
|
25
|
+
puts "path: #{uri.path}"
|
26
|
+
puts "query: #{uri.query}"
|
27
|
+
puts "fragment: #{uri.fragment}"
|
28
|
+
puts "param: #{uri.param}"
|
29
|
+
puts "header: #{uri.header}"
|
30
|
+
rescue Exception => e
|
31
|
+
puts e.message
|
32
|
+
end
|
data/example/scanner.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Scan text and get all uris
|
4
|
+
# How to use
|
5
|
+
# >$ gem install uri_scanner
|
6
|
+
# >$ curl https://rubygems.org/ | ruby example/scanner.rb
|
7
|
+
|
8
|
+
require 'uri_scanner'
|
9
|
+
|
10
|
+
URIScanner.scan( $stdin.read ).each do |uri|
|
11
|
+
puts uri
|
12
|
+
end
|
13
|
+
# To parse to uri objects:
|
14
|
+
# URIScanner.scan_and_parse( $stdin.read ).each ...
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Ragel machine : IP address
|
2
|
+
# IPv4 and IPv6 addresses
|
3
|
+
%%{
|
4
|
+
machine ip_addr;
|
5
|
+
|
6
|
+
# Ragel machine: IPv4 addresses
|
7
|
+
# ===
|
8
|
+
# Implements RFC 3986 [Section 3.2.2]
|
9
|
+
# IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
|
10
|
+
#
|
11
|
+
# dec-octet = DIGIT ; 0-9
|
12
|
+
# / %x31-39 DIGIT ; 10-99
|
13
|
+
# / "1" 2DIGIT ; 100-199
|
14
|
+
# / "2" %x30-34 DIGIT ; 200-249
|
15
|
+
# / "25" %x30-35 ; 250-255
|
16
|
+
#
|
17
|
+
# Additionally also treating leading "0".
|
18
|
+
# For example 001.010.100.1 == 1.10.100.1
|
19
|
+
OCTET4 = ("0" | "00")? digit |
|
20
|
+
"0"? [1-9] digit |
|
21
|
+
"1" digit{2} |
|
22
|
+
"2" [0-4] digit |
|
23
|
+
"25" [0-5] ;
|
24
|
+
|
25
|
+
IPv4_ADDR = (OCTET4 "."){3} OCTET4;
|
26
|
+
|
27
|
+
# IPv6 addresses
|
28
|
+
# Implements RFC 3986 [Section 3.2.2]
|
29
|
+
# IPv6address = 6( h16 ":" ) ls32
|
30
|
+
# / "::" 5( h16 ":" ) ls32
|
31
|
+
# / [ h16 ] "::" 4( h16 ":" ) ls32
|
32
|
+
# / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
|
33
|
+
# / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
|
34
|
+
# / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
|
35
|
+
# / [ *4( h16 ":" ) h16 ] "::" ls32
|
36
|
+
# / [ *5( h16 ":" ) h16 ] "::" h16
|
37
|
+
# / [ *6( h16 ":" ) h16 ] "::"
|
38
|
+
#
|
39
|
+
# ls32 = ( h16 ":" h16 ) / IPv4address
|
40
|
+
# ; least-significant 32 bits of address
|
41
|
+
#
|
42
|
+
# h16 = 1*4HEXDIG
|
43
|
+
# ; 16 bits of address represented in hexadecimal
|
44
|
+
H16 = xdigit{1,4} ;
|
45
|
+
LS32 = (H16 ":" H16) | IPv4_ADDR ;
|
46
|
+
IPv6_ADDR = (H16 ":"){6} LS32 |
|
47
|
+
"::" H16 |
|
48
|
+
"::" (H16 ":"){5} LS32 |
|
49
|
+
H16 "::" (H16 ":"){4} LS32 |
|
50
|
+
(H16 ":"){,1} H16 "::" (H16 ":"){3} LS32 |
|
51
|
+
(H16 ":"){,2} H16 "::" (H16 ":"){2} LS32 |
|
52
|
+
(H16 ":"){,3} H16 "::" H16 ":" LS32 |
|
53
|
+
(H16 ":"){,4} H16 "::" LS32 |
|
54
|
+
(H16 ":"){,5} H16 "::" H16 |
|
55
|
+
(H16 ":"){,6} H16 "::" ;
|
56
|
+
|
57
|
+
# IP address
|
58
|
+
IP_ADDR = IPv4_ADDR | IPv6_ADDR;
|
59
|
+
|
60
|
+
}%%
|
@@ -0,0 +1,49 @@
|
|
1
|
+
%%{
|
2
|
+
machine actions;
|
3
|
+
|
4
|
+
action mark_start{
|
5
|
+
mark = p
|
6
|
+
}
|
7
|
+
|
8
|
+
action fetch_scheme{
|
9
|
+
@scheme = data[0..p-1]
|
10
|
+
}
|
11
|
+
|
12
|
+
action fetch_host{
|
13
|
+
@host = data[mark..p-1]
|
14
|
+
}
|
15
|
+
|
16
|
+
action fetch_userinfo{
|
17
|
+
@userinfo = data[mark..p-1]
|
18
|
+
@username, @password = @userinfo.split(":")
|
19
|
+
}
|
20
|
+
|
21
|
+
action fetch_port{
|
22
|
+
@port = data[mark..p-1].to_i
|
23
|
+
}
|
24
|
+
|
25
|
+
action fetch_path{
|
26
|
+
@path = data[mark..p-1]
|
27
|
+
}
|
28
|
+
|
29
|
+
action fetch_query{
|
30
|
+
@query = data[mark..p-1]
|
31
|
+
}
|
32
|
+
|
33
|
+
action fetch_fragment{
|
34
|
+
@fragment = data[mark..p-1]
|
35
|
+
}
|
36
|
+
|
37
|
+
# Actions for SIP URI
|
38
|
+
action fetch_uri_params{
|
39
|
+
data[mark..p-1].split(";").each do |param|
|
40
|
+
next if param.empty?
|
41
|
+
k,v = param.split("=")
|
42
|
+
@param[k.to_sym] = v
|
43
|
+
end
|
44
|
+
}
|
45
|
+
action fetch_sipuri_header{
|
46
|
+
k,v = data[mark..p-1].split("=")
|
47
|
+
@header[k.to_sym] = URI.unescape(v)
|
48
|
+
}
|
49
|
+
}%%
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Ragel machine : SIP URI
|
2
|
+
# RFC 3261
|
3
|
+
# SIP URI scheme seems to be somehow not compatible with RFC986.
|
4
|
+
# http://www.ietf.org/mail-archive/web/sip/current/msg26338.html
|
5
|
+
# http://www.ietf.org/mail-archive/web/sip/current/msg26385.html
|
6
|
+
# Here comes RFC3261 ABN form.
|
7
|
+
|
8
|
+
%%{
|
9
|
+
machine sip_uri;
|
10
|
+
|
11
|
+
ESCAPED = PCT_ENC;
|
12
|
+
SIP_UNRESERVED = alnum | [\-_\.!~\*'()];
|
13
|
+
USER_UNRESERVED = [&=+$,;?/];
|
14
|
+
PARAM_UNRESERVED= [[\]/:&+$];
|
15
|
+
TOKEN = (alnum | [\-\.!%*_+`'~]){1,};
|
16
|
+
PARAMCHAR = PARAM_UNRESERVED | SIP_UNRESERVED | ESCAPED;
|
17
|
+
PNAME = PARAMCHAR{1,};
|
18
|
+
PVALUE = PARAMCHAR{1,};
|
19
|
+
HNV_UNRESERVED = [[\]/?:+$];
|
20
|
+
HNAME = (HNV_UNRESERVED | SIP_UNRESERVED | ESCAPED){1,};
|
21
|
+
HVALUE = (HNV_UNRESERVED | SIP_UNRESERVED | ESCAPED)*;
|
22
|
+
HEADER = HNAME >mark_start "=" HVALUE %fetch_sipuri_header;
|
23
|
+
|
24
|
+
TEL_SUBSCRIBER = zlen; # will implement in future
|
25
|
+
|
26
|
+
USER = ( SIP_UNRESERVED | ESCAPED | USER_UNRESERVED ){1,};
|
27
|
+
PASSWORD = ( SIP_UNRESERVED | ESCAPED | [&=+$,] )*;
|
28
|
+
SIP_UINFO = ( USER | TEL_SUBSCRIBER ) >mark_start ( ":" PASSWORD )? %fetch_userinfo "@";
|
29
|
+
DOMAINLABEL = alnum | (alnum (alnum | "-")* alnum);
|
30
|
+
TOPLABEL = alpha | (alpha (alnum | "-")* alnum);
|
31
|
+
HOSTNAME = (DOMAINLABEL ".")* TOPLABEL "."?;
|
32
|
+
SIPHOST = (HOSTNAME | IPv4_ADDR | IPv6_ADDR) >mark_start %fetch_host;
|
33
|
+
|
34
|
+
HOSTPORT = SIPHOST (":" digit{1,} >mark_start %fetch_port)? ;
|
35
|
+
|
36
|
+
OTHER_PARAM = PNAME ("=" PVALUE)?;
|
37
|
+
LR_PARAM = "lr";
|
38
|
+
MADDR_PARAM = "maddr=" SIPHOST;
|
39
|
+
TTL_PARAM = "ttl=" digit{1,3};
|
40
|
+
METHOD_PARAM = "method=" TOKEN{1,};
|
41
|
+
USER_PARAM = "user=" ( "phone" | "ip" | TOKEN);
|
42
|
+
TRANSPORT_PARAM = "transport=" ( "udp" | "tcp" | "sctp" | "tls" | TOKEN );
|
43
|
+
URI_PARAM = TRANSPORT_PARAM | USER_PARAM | METHOD_PARAM |
|
44
|
+
TTL_PARAM | MADDR_PARAM | LR_PARAM | OTHER_PARAM;
|
45
|
+
|
46
|
+
URI_PARAMS = ( ";" URI_PARAM )* >mark_start %fetch_uri_params;
|
47
|
+
|
48
|
+
HEADERS = "?" HEADER ("&" HEADER)*;
|
49
|
+
|
50
|
+
SIP_URI = ("sip"i [sS]?) %fetch_scheme ":"
|
51
|
+
SIP_UINFO? HOSTPORT URI_PARAMS HEADERS?;
|
52
|
+
}%%
|
data/lib/machines/uri.rl
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
# Ragel machine : URI
|
2
|
+
# RFC 3986
|
3
|
+
# foo://example.com:8042/over/there?name=ferret#nose
|
4
|
+
# \_/ \______________/\_________/ \_________/ \__/
|
5
|
+
# | | | | |
|
6
|
+
# scheme authority path query fragment
|
7
|
+
# | _____________________|__
|
8
|
+
# / \ / \
|
9
|
+
# urn:example:animal:ferret:nose
|
10
|
+
|
11
|
+
%%{
|
12
|
+
machine uri;
|
13
|
+
|
14
|
+
# Percent-Encoding. Example: "%20" (space)
|
15
|
+
PCT_ENC = "%" xdigit xdigit;
|
16
|
+
|
17
|
+
# These characters are called "reserved" because
|
18
|
+
# they may (or may not) be defined as delimiters by
|
19
|
+
# the generic syntax.
|
20
|
+
GEN_DELIMS = [:/?#[\]@];
|
21
|
+
SUB_DELIMS = [!$&'()*+,;=];
|
22
|
+
RESERVED = GEN_DELIMS | SUB_DELIMS;
|
23
|
+
|
24
|
+
# Characters that are allowed in a URI but do not
|
25
|
+
# have a reserved purpose.
|
26
|
+
UNRESERVED = alnum | [\-\._~];
|
27
|
+
|
28
|
+
# URI Scheme
|
29
|
+
# scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
30
|
+
SCHEME = alpha (alnum | [+\-\.])* %fetch_scheme;
|
31
|
+
|
32
|
+
# Hierarchical element for a naming authority
|
33
|
+
# authority = [ userinfo "@" ] host [ ":" port ]
|
34
|
+
USERINFO = (UNRESERVED | PCT_ENC | SUB_DELIMS | ":")* ;
|
35
|
+
IPv_FUTURE = "v" xdigit{1,} "." (UNRESERVED | SUB_DELIMS | ":"){1,};
|
36
|
+
IP_LITERAL = "[" (IPv6_ADDR | IPv_FUTURE) "]";
|
37
|
+
REG_NAME = (UNRESERVED | PCT_ENC | SUB_DELIMS)*;
|
38
|
+
HOST = (IP_LITERAL | IPv4_ADDR | REG_NAME) >mark_start %fetch_host;
|
39
|
+
PORT = digit*;
|
40
|
+
AUTHORITY = (USERINFO >mark_start %fetch_userinfo "@")? HOST (":" PORT >mark_start %fetch_port)?;
|
41
|
+
|
42
|
+
# Path RFC 3986 Section 3.3
|
43
|
+
PCHAR = UNRESERVED | PCT_ENC | SUB_DELIMS | ":" | "@";
|
44
|
+
SEG_NZ_NC = (UNRESERVED | PCT_ENC | SUB_DELIMS | "@"){1,};
|
45
|
+
SEG_NZ = PCHAR{1,};
|
46
|
+
SEGMENT = PCHAR*;
|
47
|
+
PATH_EMPTY = '\0';
|
48
|
+
PATH_ROOTLESS = (SEG_NZ ("/" SEGMENT)*) >mark_start %fetch_path;
|
49
|
+
PATH_NOSCHEME = (SEG_NZ_NC ("/" SEGMENT)*) >mark_start %fetch_path;
|
50
|
+
PATH_ABSOLUTE = ("/" (SEG_NZ ("/" SEGMENT)*)?) >mark_start %fetch_path;
|
51
|
+
PATH_ABEMPTY = ("/" SEGMENT)* >mark_start %fetch_path;
|
52
|
+
|
53
|
+
PATH = PATH_ABEMPTY | # begins with "/" or is empty
|
54
|
+
PATH_ABSOLUTE | # begins with "/" but not "//"
|
55
|
+
PATH_NOSCHEME | # begins with a non-colon segment
|
56
|
+
PATH_ROOTLESS | # begins with a segment
|
57
|
+
PATH_EMPTY ; # zero characters
|
58
|
+
|
59
|
+
# The query component contains non-hierarchical
|
60
|
+
# Section 3.4
|
61
|
+
QUERY = (PCHAR | "/" | "?")* >mark_start %fetch_query;
|
62
|
+
|
63
|
+
# Fragment
|
64
|
+
# Section 3.5
|
65
|
+
FRAGMENT = (PCHAR | "/" | "?")* >mark_start %fetch_fragment;
|
66
|
+
|
67
|
+
# Relative Reference
|
68
|
+
# Section 4.2
|
69
|
+
REL_PART = ("//" AUTHORITY PATH_ABEMPTY) |
|
70
|
+
PATH_ABSOLUTE |
|
71
|
+
PATH_NOSCHEME |
|
72
|
+
PATH_EMPTY ;
|
73
|
+
|
74
|
+
REL_REF = REL_PART ("?" QUERY)? ("#" FRAGMENT)?;
|
75
|
+
|
76
|
+
# Absolute URI
|
77
|
+
# Section 4.3
|
78
|
+
HIER_PART = ("//" AUTHORITY PATH_ABEMPTY) |
|
79
|
+
PATH_ABSOLUTE |
|
80
|
+
PATH_ROOTLESS |
|
81
|
+
PATH_EMPTY;
|
82
|
+
|
83
|
+
URI_ABS = SCHEME ":" HIER_PART ("?" QUERY)?;
|
84
|
+
|
85
|
+
# URI Reference
|
86
|
+
# Section 4.1; Section 3
|
87
|
+
URI = SCHEME ":" HIER_PART ("?" QUERY)? ("#" FRAGMENT)?;
|
88
|
+
|
89
|
+
# generic
|
90
|
+
URI_REF = URI | REL_REF;
|
91
|
+
|
92
|
+
}%%
|