uri_scanner 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,45 @@
1
+ #
2
+ # @LANG: ruby
3
+ # Scan text and extracts URI(s)
4
+ #
5
+
6
+ =begin
7
+ %%{
8
+ machine uri_scanner;
9
+ include actions "../machines/ruby_actions.rl";
10
+ include ip_addr "../machines/ip_addr.rl";
11
+ include uri "../machines/uri.rl";
12
+ include sip_uri "../machines/sip_uri.rl";
13
+
14
+ main := |*
15
+ URI => {@collection << data[ts..te-1]};
16
+ any;
17
+ *|;
18
+ }%%
19
+ =end
20
+
21
+ class MachineURIScanner
22
+ attr_accessor :collection
23
+
24
+ def initialize(data)
25
+ @collection = []
26
+ eof = data.length
27
+ %% write data;
28
+ %% write init;
29
+ %% write exec;
30
+ #%
31
+ end
32
+
33
+ class << self
34
+ def scan(data, to_objects=false)
35
+ if to_objects
36
+ self.new(data).collection.map do |url|
37
+ MachineURI.new(url)
38
+ end
39
+ else
40
+ self.new(data).collection
41
+ end
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,3 @@
1
+ module URIScanner
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,64 @@
1
+ require 'uri_scanner/ip_address'
2
+
3
+ RSpec.describe MachineIpAddr do
4
+ let(:ip){ MachineIpAddr.new }
5
+ context "parsing IPv4" do
6
+ specify { expect(ip.is_valid? "8.8.8.8").to be true }
7
+ specify { expect(ip.is_valid? "0.0.0.0").to be true }
8
+ specify { expect(ip.is_valid? "0.0.0.1").to be true }
9
+ specify { expect(ip.is_valid? "10.0.0.1").to be true }
10
+ specify { expect(ip.is_valid? "192.168.100.10").to be true }
11
+ specify { expect(ip.is_valid? "172.31.1.34").to be true }
12
+ specify { expect(ip.is_valid? "10.00.00.01").to be true }
13
+ specify { expect(ip.is_valid? "010.000.000.001").to be true }
14
+
15
+ specify { expect(ip.is_valid? "").to be false }
16
+ specify { expect(ip.is_valid? "12.12.a.b").to be false }
17
+ specify { expect(ip.is_valid? "1.2.3.4.5").to be false }
18
+ specify { expect(ip.is_valid? "1.2.3.4.").to be false }
19
+ specify { expect(ip.is_valid? "111.222.333.444").to be false }
20
+ specify { expect(ip.is_valid? "256.222.33.44").to be false }
21
+ specify { expect(ip.is_valid? "25.299.33.44").to be false }
22
+ specify { expect(ip.is_valid? "25.99.333.44").to be false }
23
+ specify { expect(ip.is_valid? "25.99.3.440").to be false }
24
+ end
25
+
26
+ # examples from RFC5952
27
+ context "parsing IPv6" do
28
+ specify { expect(ip.is_valid? "2001:db8:0:0:1:0:0:1").to be true }
29
+ specify { expect(ip.is_valid? "2001:0db8:0:0:1:0:0:1").to be true }
30
+ specify { expect(ip.is_valid? "2001:db8::1:0:0:1").to be true }
31
+ specify { expect(ip.is_valid? "2001:db8::0:1:0:0:1").to be true }
32
+ specify { expect(ip.is_valid? "2001:0db8::1:0:0:1").to be true }
33
+ specify { expect(ip.is_valid? "2001:db8:0:0:1::1").to be true }
34
+ specify { expect(ip.is_valid? "2001:db8:0000:0:1::1").to be true }
35
+ specify { expect(ip.is_valid? "2001:DB8:0:0:1::1").to be true }
36
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:0001").to be true }
37
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:001").to be true }
38
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:01").to be true }
39
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:1").to be true }
40
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd::1").to be true }
41
+ specify { expect(ip.is_valid? "2001:db8:0:0:0::1").to be true }
42
+ specify { expect(ip.is_valid? "2001:db8:0:0::1").to be true }
43
+ specify { expect(ip.is_valid? "2001:db8:0::1").to be true }
44
+ specify { expect(ip.is_valid? "2001:db8::1").to be true }
45
+ specify { expect(ip.is_valid? "2001:db8::aaaa:0:0:1").to be true }
46
+ specify { expect(ip.is_valid? "2001:db8:0:0:aaaa::1").to be true }
47
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:AAAA").to be true }
48
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:AaAa").to be true }
49
+ specify { expect(ip.is_valid? "2001:db8:0:0:0:0:2:1").to be true }
50
+ specify { expect(ip.is_valid? "2001:db8::2:1").to be true }
51
+ specify { expect(ip.is_valid? "2001:db8:0:1:1:1:1:1").to be true }
52
+ specify { expect(ip.is_valid? "::1").to be true }
53
+ specify { expect(ip.is_valid? "0000:0000:0000:0000:0000:0000:0000:0001").to be true }
54
+
55
+ specify { expect(ip.is_valid? "").to be false }
56
+ specify { expect(ip.is_valid? "2001:db8::1:1:1:::1").to be false }
57
+ specify { expect(ip.is_valid? "2016:::1:1").to be false }
58
+ specify { expect(ip.is_valid? "ab::1:2::a").to be false }
59
+ specify { expect(ip.is_valid? "qwe:1:2:3:4:5:6:7").to be false }
60
+ specify { expect(ip.is_valid? "aa:bb:cc:dd:ff:11:22:33:44").to be false }
61
+ specify { expect(ip.is_valid? "aa:bb:11:").to be false }
62
+ specify { expect(ip.is_valid? ":1234:aaaa:22:bb:ff").to be false }
63
+ end
64
+ end
@@ -0,0 +1,40 @@
1
+ require 'uri_scanner/uri_parser'
2
+ require 'uri_scanner/uri_scanner'
3
+
4
+ RSpec.describe MachineURIScanner do
5
+ describe "#scan" do
6
+ it "returns single address" do
7
+ data = "text before http://example.com:8042/index.php?name=ferret#nose text after"
8
+ expect(MachineURIScanner.scan(data)).to eql(%w{http://example.com:8042/index.php?name=ferret#nose})
9
+ end
10
+ it "returns three URIs" do
11
+ data = "Lorem ipsum dolor sit amet, consectetur "
12
+ data += "adipiscing elit, \"ftp://ftp.is.co.za/rfc/rfc1808.txt\" sed "
13
+ data += "do eiusmod tempor incididunt <http://www.ietf.org/rfc/rfc2396.txt> "
14
+ data += "ut labore et ldap://[2001:db8::7]/c=GB?objectClass?one dolore magna aliqua."
15
+ expect(MachineURIScanner.scan(data).length).to be 3
16
+ expect(MachineURIScanner.scan(data)).to eql(%w{
17
+ ftp://ftp.is.co.za/rfc/rfc1808.txt
18
+ http://www.ietf.org/rfc/rfc2396.txt
19
+ ldap://[2001:db8::7]/c=GB?objectClass?one
20
+ })
21
+ end
22
+ it "scans text from file" do
23
+ f = File.open("spec/url.txt")
24
+ expect(MachineURIScanner.scan(f.read).length).to be 15
25
+ end
26
+
27
+ context "to array of MachineURI instances" do
28
+ let(:data) {%Q{text before http://example.com:8042/index.php?name=ferret#nose text after
29
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam
30
+ nec erat "foo://info.example.com?fred" Proin sollicitudin <sip:12345@sip-provider.info:5060> }}
31
+ let(:scanner) { MachineURIScanner.scan(data, true) }
32
+ specify{expect(scanner.first.scheme).to eq("http")}
33
+ specify{expect(scanner.first.port).to eq(8042)}
34
+ specify{expect(scanner.last.host).to eq("sip-provider.info")}
35
+ specify{expect(scanner[1].query).to eq("fred")}
36
+ end
37
+
38
+ end
39
+ end
40
+
@@ -0,0 +1,96 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
4
+ # this file to always be loaded, without a need to explicitly require it in any
5
+ # files.
6
+ #
7
+ # Given that it is always loaded, you are encouraged to keep this file as
8
+ # light-weight as possible. Requiring heavyweight dependencies from this file
9
+ # will add to the boot time of your test suite on EVERY test run, even for an
10
+ # individual file that may not need all of that loaded. Instead, consider making
11
+ # a separate helper file that requires the additional dependencies and performs
12
+ # the additional setup, and require it from the spec files that actually need
13
+ # it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+ RSpec.configure do |config|
20
+ # rspec-expectations config goes here. You can use an alternate
21
+ # assertion/expectation library such as wrong or the stdlib/minitest
22
+ # assertions if you prefer.
23
+ config.expect_with :rspec do |expectations|
24
+ # This option will default to `true` in RSpec 4. It makes the `description`
25
+ # and `failure_message` of custom matchers include text for helper methods
26
+ # defined using `chain`, e.g.:
27
+ # be_bigger_than(2).and_smaller_than(4).description
28
+ # # => "be bigger than 2 and smaller than 4"
29
+ # ...rather than:
30
+ # # => "be bigger than 2"
31
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
32
+ end
33
+
34
+ # rspec-mocks config goes here. You can use an alternate test double
35
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
36
+ config.mock_with :rspec do |mocks|
37
+ # Prevents you from mocking or stubbing a method that does not exist on
38
+ # a real object. This is generally recommended, and will default to
39
+ # `true` in RSpec 4.
40
+ mocks.verify_partial_doubles = true
41
+ end
42
+
43
+ # The settings below are suggested to provide a good initial experience
44
+ # with RSpec, but feel free to customize to your heart's content.
45
+ =begin
46
+ # These two settings work together to allow you to limit a spec run
47
+ # to individual examples or groups you care about by tagging them with
48
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
49
+ # get run.
50
+ config.filter_run :focus
51
+ config.run_all_when_everything_filtered = true
52
+
53
+ # Allows RSpec to persist some state between runs in order to support
54
+ # the `--only-failures` and `--next-failure` CLI options. We recommend
55
+ # you configure your source control system to ignore this file.
56
+ config.example_status_persistence_file_path = "spec/examples.txt"
57
+
58
+ # Limits the available syntax to the non-monkey patched syntax that is
59
+ # recommended. For more details, see:
60
+ # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
61
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
62
+ # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
63
+ config.disable_monkey_patching!
64
+
65
+ # This setting enables warnings. It's recommended, but in some cases may
66
+ # be too noisy due to issues in dependencies.
67
+ config.warnings = true
68
+
69
+ # Many RSpec users commonly either run the entire suite or an individual
70
+ # file, and it's useful to allow more verbose output when running an
71
+ # individual spec file.
72
+ if config.files_to_run.one?
73
+ # Use the documentation formatter for detailed output,
74
+ # unless a formatter has already been configured
75
+ # (e.g. via a command-line flag).
76
+ config.default_formatter = 'doc'
77
+ end
78
+
79
+ # Print the 10 slowest examples and example groups at the
80
+ # end of the spec run, to help surface which specs are running
81
+ # particularly slow.
82
+ config.profile_examples = 10
83
+
84
+ # Run specs in random order to surface order dependencies. If you find an
85
+ # order dependency and want to debug it, you can fix the order by providing
86
+ # the seed, which is printed after each run.
87
+ # --seed 1234
88
+ config.order = :random
89
+
90
+ # Seed global randomization in this process using the `--seed` CLI option.
91
+ # Setting this allows you to use `--seed` to deterministically reproduce
92
+ # test failures related to randomization by passing the same `--seed` value
93
+ # as the one that triggered the failure.
94
+ Kernel.srand config.seed
95
+ =end
96
+ end
@@ -0,0 +1,43 @@
1
+ require 'uri_scanner'
2
+
3
+ RSpec.describe URIScanner do
4
+ describe "#is_ip_valid?" do
5
+ it{expect(URIScanner.is_ip_valid?("172.31.1.222")).to be true}
6
+ it{expect(URIScanner.is_ip_valid?("172.311.1.222")).to be false}
7
+ it{expect(URIScanner.is_ip_valid?("::1")).to be true}
8
+ it{expect(URIScanner.is_ip_valid?("2016:::1:1")).to be false}
9
+ it{expect(URIScanner.is_ip_valid?("")).to be false}
10
+ end
11
+
12
+ describe "#parse_uri" do
13
+ context "parsing 'ldap://[2001:db8::7]/c=GB?objectClass?one'" do
14
+ let(:uri){ URIScanner.parse_uri "ldap://[2001:db8::7]/c=GB?objectClass?one"}
15
+ it {expect(uri.scheme).to eq("ldap")}
16
+ it {expect(uri.host).to eq("[2001:db8::7]")}
17
+ it {expect(uri.port).to be_nil}
18
+ it {expect(uri.path).to eq("/c=GB")}
19
+ it {expect(uri.query).to eq("objectClass?one")}
20
+ end
21
+ end
22
+
23
+ describe "#scan" do
24
+ context "text and returns array of strings" do
25
+ let(:data) {File.read "spec/url.txt" }
26
+ let(:uris) {URIScanner.scan data}
27
+ it{expect(uris.length).to be 15}
28
+ it{uris.each{|u| expect(u).to be_a String}}
29
+ end
30
+
31
+ context "text and returns array of uri objects" do
32
+ let(:data) {%Q{text before http://example.com:8042/index.php?name=ferret#nose text after
33
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam
34
+ nec erat "foo://info.example.com?fred=homme#world" Proin sollicitudin <sip:12345@sip-provider.info:5060> }}
35
+ let(:uri) {URIScanner.scan_and_parse(data)[1]}
36
+ it{expect(uri.scheme).to eq("foo")}
37
+ it{expect(uri.host).to eq("info.example.com")}
38
+ it{expect(uri.port).to be_nil}
39
+ it{expect(uri.query).to eq("fred=homme")}
40
+ it{expect(uri.fragment).to eq("world")}
41
+ end
42
+ end
43
+ end
data/spec/uri_spec.rb ADDED
@@ -0,0 +1,185 @@
1
+ require 'uri_scanner/uri_parser'
2
+
3
+ RSpec.describe MachineURI do
4
+ context "validate URI" do
5
+ specify {
6
+ expect(MachineURI.new("http://www.ietf.org/rfc/rfc2396.txt")
7
+ .is_valid?).to be true}
8
+
9
+ specify {
10
+ expect{ MachineURI.new("1http://www.ietf.org/rfc/rfc2396.txt") }
11
+ .to raise_error(URIParserError)}
12
+
13
+ it "testing multiple samples with authority part" do
14
+ %W{
15
+ http://localhost/
16
+ ftp://ftp.is.co.za/rfc/rfc1808.txt
17
+ http://www.ietf.org/rfc/rfc2396.txt
18
+ ldap://[2001:db8::7]/c=GB?objectClass?one
19
+ telnet://192.0.2.16:80/
20
+ example://a/b/c/%7Bfoo%7D
21
+ eXAMPLE://a/./b/../b/%63/%7bfoo%7d
22
+ foo://example.com:8042/over/there?name=ferret#nose
23
+ foo://info.example.com?fred
24
+ redis://host:6503/dbindex/keyname
25
+ }.each do |uri|
26
+ expect( MachineURI.new(uri).is_valid? ).to be true
27
+ end
28
+ end
29
+ it "testing multiple samples without authority part" do
30
+ %W{
31
+ urn:oasis:names:specification:docbook:dtd:xml:4.1.2
32
+ urn:example:animal:ferret:nose
33
+ mailto:John.Doe@example.com
34
+ news:comp.infosystems.www.servers.unix
35
+ sip:12345@sip-provider.info:5060
36
+ sips:johndoe@sip.secure.com
37
+ tel:+1-816-555-1212
38
+ }.each do |uri|
39
+ expect( MachineURI.new(uri).is_valid? ).to be true
40
+ end
41
+ end
42
+ end
43
+
44
+ context "component parts parsing" do
45
+ describe "#scheme" do
46
+ it "should be 'foo'" do
47
+ uri = MachineURI.new "foo://example.com:8042/over/there?name=ferret#nose"
48
+ expect(uri.scheme).to eq("foo")
49
+ end
50
+ it "should be 'sip'" do
51
+ uri = MachineURI.new "sip:username@example.com"
52
+ expect(uri.scheme).to eq("sip")
53
+ end
54
+ it "raises error on invalid scheme" do
55
+ expect {
56
+ MachineURI.new "f~oo://google.com"
57
+ }.to raise_error(URIParserError)
58
+ expect {
59
+ MachineURI.new "1http://google.com"
60
+ }.to raise_error(URIParserError)
61
+ end
62
+ end
63
+
64
+ describe "#host" do
65
+ it "should be 'example.com'" do
66
+ uri = MachineURI.new "foo://example.com:8042/over/there?name=ferret#nose"
67
+ expect(uri.host).to eq("example.com")
68
+ end
69
+ it "should be 'sip-provider.info'" do
70
+ uri = MachineURI.new "sip:12345@sip-provider.info:5060"
71
+ expect(uri.host).to eq("sip-provider.info")
72
+ end
73
+ it "should be '[2001:db8::7]'" do
74
+ uri = MachineURI.new "ldap://[2001:db8::7]/c=GB?objectClass?one"
75
+ expect(uri.host).to eq("[2001:db8::7]")
76
+ end
77
+ it "should be '192.0.2.16'" do
78
+ uri = MachineURI.new "telnet://192.0.2.16:80/"
79
+ expect(uri.host).to eq("192.0.2.16")
80
+ end
81
+ it "should be nil" do
82
+ uri = MachineURI.new "urn:example:animal:ferret:nose"
83
+ expect(uri.host).to be_nil
84
+ end
85
+ end
86
+
87
+ describe "#userinfo" do
88
+ it "should be 'john'" do
89
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
90
+ expect(uri.userinfo).to eq("john")
91
+ end
92
+ it "should be nil" do
93
+ uri = MachineURI.new "ldap://[2001:db8::7]/c=GB?objectClass?one"
94
+ expect(uri.userinfo).to be_nil
95
+ end
96
+ it "should be '12345'" do
97
+ uri = MachineURI.new "sip:12345@sip-provider.info:5060"
98
+ expect(uri.userinfo).to eq("12345")
99
+ end
100
+ end
101
+
102
+ describe "#port" do
103
+ it "should be 8042" do
104
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
105
+ expect(uri.port).to eq(8042)
106
+ end
107
+ it "should be nil" do
108
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt"
109
+ expect(uri.port).to be_nil
110
+ end
111
+ it "should be '5060'" do
112
+ uri = MachineURI.new "sip:12345@sip-provider.info:5060"
113
+ expect(uri.port).to eq(5060)
114
+ end
115
+ end
116
+
117
+ describe "#path" do
118
+ it "should be '/rfc/rfc2396.txt'" do
119
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt#section%205.2"
120
+ expect(uri.path).to eq("/rfc/rfc2396.txt")
121
+ end
122
+ it "should be nil'" do
123
+ uri = MachineURI.new "telnet://192.0.2.16:80"
124
+ expect(uri.path).to be_empty
125
+ end
126
+ it "should be 'example:animal:ferret:nose'" do
127
+ uri = MachineURI.new "urn:example:animal:ferret:nose"
128
+ expect(uri.path).to eq("example:animal:ferret:nose")
129
+ end
130
+ end
131
+
132
+ describe "#query" do
133
+ it "should be 'name=ferret'" do
134
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
135
+ expect(uri.query).to eq("name=ferret")
136
+ end
137
+ it "should be nil" do
138
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt"
139
+ expect(uri.query).to be_nil
140
+ end
141
+ end
142
+
143
+ describe "#fragment" do
144
+ it "should be 'nose'" do
145
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
146
+ expect(uri.fragment).to eq("nose")
147
+ end
148
+ it "should be nil" do
149
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt?user=john"
150
+ expect(uri.fragment).to be_nil
151
+ end
152
+ it "should be 'section%205.2'" do
153
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt#section%205.2"
154
+ expect(uri.fragment).to eq("section%205.2")
155
+ end
156
+ end
157
+
158
+ context "should be parsed to fields" do
159
+ let(:uri) {MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"}
160
+ it{expect(uri.scheme).to eq("foo")}
161
+ it{expect(uri.userinfo).to eq("john")}
162
+ it{expect(uri.host).to eq("example.com")}
163
+ it{expect(uri.port).to eq(8042)}
164
+ it{expect(uri.path).to eq("/over/there")}
165
+ it{expect(uri.query).to eq("name=ferret")}
166
+ it{expect(uri.fragment).to eq("nose")}
167
+ end
168
+
169
+ context "should parse SIP to fields" do
170
+ let(:uri){MachineURI.new "sips:alice:secretW0rd@gateway.com:5061;transport=udp;user=phone;method=REGISTER?subject=sales%20meeting&priority=urgent&to=sales%40city.com"}
171
+ it{expect(uri.scheme).to eq("sips")}
172
+ it{expect(uri.username).to eq("alice")}
173
+ it{expect(uri.password).to eq("secretW0rd")}
174
+ it{expect(uri.host).to eq("gateway.com")}
175
+ it{expect(uri.port).to eq(5061)}
176
+ it{expect(uri.param[:transport]).to eq("udp")}
177
+ it{expect(uri.param[:user]).to eq("phone")}
178
+ it{expect(uri.param[:method]).to eq("REGISTER")}
179
+ it{expect(uri.header[:subject]).to eq("sales meeting")}
180
+ it{expect(uri.header[:priority]).to eq("urgent")}
181
+ it{expect(uri.header[:to]).to eq("sales@city.com")}
182
+ end
183
+ end
184
+ end
185
+