uri_scanner 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ #
2
+ # @LANG: ruby
3
+ # Scan text and extracts URI(s)
4
+ #
5
+
6
+ =begin
7
+ %%{
8
+ machine uri_scanner;
9
+ include actions "../machines/ruby_actions.rl";
10
+ include ip_addr "../machines/ip_addr.rl";
11
+ include uri "../machines/uri.rl";
12
+ include sip_uri "../machines/sip_uri.rl";
13
+
14
+ main := |*
15
+ URI => {@collection << data[ts..te-1]};
16
+ any;
17
+ *|;
18
+ }%%
19
+ =end
20
+
21
+ class MachineURIScanner
22
+ attr_accessor :collection
23
+
24
+ def initialize(data)
25
+ @collection = []
26
+ eof = data.length
27
+ %% write data;
28
+ %% write init;
29
+ %% write exec;
30
+ #%
31
+ end
32
+
33
+ class << self
34
+ def scan(data, to_objects=false)
35
+ if to_objects
36
+ self.new(data).collection.map do |url|
37
+ MachineURI.new(url)
38
+ end
39
+ else
40
+ self.new(data).collection
41
+ end
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,3 @@
1
+ module URIScanner
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,64 @@
1
+ require 'uri_scanner/ip_address'
2
+
3
+ RSpec.describe MachineIpAddr do
4
+ let(:ip){ MachineIpAddr.new }
5
+ context "parsing IPv4" do
6
+ specify { expect(ip.is_valid? "8.8.8.8").to be true }
7
+ specify { expect(ip.is_valid? "0.0.0.0").to be true }
8
+ specify { expect(ip.is_valid? "0.0.0.1").to be true }
9
+ specify { expect(ip.is_valid? "10.0.0.1").to be true }
10
+ specify { expect(ip.is_valid? "192.168.100.10").to be true }
11
+ specify { expect(ip.is_valid? "172.31.1.34").to be true }
12
+ specify { expect(ip.is_valid? "10.00.00.01").to be true }
13
+ specify { expect(ip.is_valid? "010.000.000.001").to be true }
14
+
15
+ specify { expect(ip.is_valid? "").to be false }
16
+ specify { expect(ip.is_valid? "12.12.a.b").to be false }
17
+ specify { expect(ip.is_valid? "1.2.3.4.5").to be false }
18
+ specify { expect(ip.is_valid? "1.2.3.4.").to be false }
19
+ specify { expect(ip.is_valid? "111.222.333.444").to be false }
20
+ specify { expect(ip.is_valid? "256.222.33.44").to be false }
21
+ specify { expect(ip.is_valid? "25.299.33.44").to be false }
22
+ specify { expect(ip.is_valid? "25.99.333.44").to be false }
23
+ specify { expect(ip.is_valid? "25.99.3.440").to be false }
24
+ end
25
+
26
+ # examples from RFC5952
27
+ context "parsing IPv6" do
28
+ specify { expect(ip.is_valid? "2001:db8:0:0:1:0:0:1").to be true }
29
+ specify { expect(ip.is_valid? "2001:0db8:0:0:1:0:0:1").to be true }
30
+ specify { expect(ip.is_valid? "2001:db8::1:0:0:1").to be true }
31
+ specify { expect(ip.is_valid? "2001:db8::0:1:0:0:1").to be true }
32
+ specify { expect(ip.is_valid? "2001:0db8::1:0:0:1").to be true }
33
+ specify { expect(ip.is_valid? "2001:db8:0:0:1::1").to be true }
34
+ specify { expect(ip.is_valid? "2001:db8:0000:0:1::1").to be true }
35
+ specify { expect(ip.is_valid? "2001:DB8:0:0:1::1").to be true }
36
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:0001").to be true }
37
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:001").to be true }
38
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:01").to be true }
39
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:1").to be true }
40
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd::1").to be true }
41
+ specify { expect(ip.is_valid? "2001:db8:0:0:0::1").to be true }
42
+ specify { expect(ip.is_valid? "2001:db8:0:0::1").to be true }
43
+ specify { expect(ip.is_valid? "2001:db8:0::1").to be true }
44
+ specify { expect(ip.is_valid? "2001:db8::1").to be true }
45
+ specify { expect(ip.is_valid? "2001:db8::aaaa:0:0:1").to be true }
46
+ specify { expect(ip.is_valid? "2001:db8:0:0:aaaa::1").to be true }
47
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:AAAA").to be true }
48
+ specify { expect(ip.is_valid? "2001:db8:aaaa:bbbb:cccc:dddd:eeee:AaAa").to be true }
49
+ specify { expect(ip.is_valid? "2001:db8:0:0:0:0:2:1").to be true }
50
+ specify { expect(ip.is_valid? "2001:db8::2:1").to be true }
51
+ specify { expect(ip.is_valid? "2001:db8:0:1:1:1:1:1").to be true }
52
+ specify { expect(ip.is_valid? "::1").to be true }
53
+ specify { expect(ip.is_valid? "0000:0000:0000:0000:0000:0000:0000:0001").to be true }
54
+
55
+ specify { expect(ip.is_valid? "").to be false }
56
+ specify { expect(ip.is_valid? "2001:db8::1:1:1:::1").to be false }
57
+ specify { expect(ip.is_valid? "2016:::1:1").to be false }
58
+ specify { expect(ip.is_valid? "ab::1:2::a").to be false }
59
+ specify { expect(ip.is_valid? "qwe:1:2:3:4:5:6:7").to be false }
60
+ specify { expect(ip.is_valid? "aa:bb:cc:dd:ff:11:22:33:44").to be false }
61
+ specify { expect(ip.is_valid? "aa:bb:11:").to be false }
62
+ specify { expect(ip.is_valid? ":1234:aaaa:22:bb:ff").to be false }
63
+ end
64
+ end
@@ -0,0 +1,40 @@
1
+ require 'uri_scanner/uri_parser'
2
+ require 'uri_scanner/uri_scanner'
3
+
4
+ RSpec.describe MachineURIScanner do
5
+ describe "#scan" do
6
+ it "returns single address" do
7
+ data = "text before http://example.com:8042/index.php?name=ferret#nose text after"
8
+ expect(MachineURIScanner.scan(data)).to eql(%w{http://example.com:8042/index.php?name=ferret#nose})
9
+ end
10
+ it "returns three URIs" do
11
+ data = "Lorem ipsum dolor sit amet, consectetur "
12
+ data += "adipiscing elit, \"ftp://ftp.is.co.za/rfc/rfc1808.txt\" sed "
13
+ data += "do eiusmod tempor incididunt <http://www.ietf.org/rfc/rfc2396.txt> "
14
+ data += "ut labore et ldap://[2001:db8::7]/c=GB?objectClass?one dolore magna aliqua."
15
+ expect(MachineURIScanner.scan(data).length).to be 3
16
+ expect(MachineURIScanner.scan(data)).to eql(%w{
17
+ ftp://ftp.is.co.za/rfc/rfc1808.txt
18
+ http://www.ietf.org/rfc/rfc2396.txt
19
+ ldap://[2001:db8::7]/c=GB?objectClass?one
20
+ })
21
+ end
22
+ it "scans text from file" do
23
+ f = File.open("spec/url.txt")
24
+ expect(MachineURIScanner.scan(f.read).length).to be 15
25
+ end
26
+
27
+ context "to array of MachineURI instances" do
28
+ let(:data) {%Q{text before http://example.com:8042/index.php?name=ferret#nose text after
29
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam
30
+ nec erat "foo://info.example.com?fred" Proin sollicitudin <sip:12345@sip-provider.info:5060> }}
31
+ let(:scanner) { MachineURIScanner.scan(data, true) }
32
+ specify{expect(scanner.first.scheme).to eq("http")}
33
+ specify{expect(scanner.first.port).to eq(8042)}
34
+ specify{expect(scanner.last.host).to eq("sip-provider.info")}
35
+ specify{expect(scanner[1].query).to eq("fred")}
36
+ end
37
+
38
+ end
39
+ end
40
+
@@ -0,0 +1,96 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
4
+ # this file to always be loaded, without a need to explicitly require it in any
5
+ # files.
6
+ #
7
+ # Given that it is always loaded, you are encouraged to keep this file as
8
+ # light-weight as possible. Requiring heavyweight dependencies from this file
9
+ # will add to the boot time of your test suite on EVERY test run, even for an
10
+ # individual file that may not need all of that loaded. Instead, consider making
11
+ # a separate helper file that requires the additional dependencies and performs
12
+ # the additional setup, and require it from the spec files that actually need
13
+ # it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+ RSpec.configure do |config|
20
+ # rspec-expectations config goes here. You can use an alternate
21
+ # assertion/expectation library such as wrong or the stdlib/minitest
22
+ # assertions if you prefer.
23
+ config.expect_with :rspec do |expectations|
24
+ # This option will default to `true` in RSpec 4. It makes the `description`
25
+ # and `failure_message` of custom matchers include text for helper methods
26
+ # defined using `chain`, e.g.:
27
+ # be_bigger_than(2).and_smaller_than(4).description
28
+ # # => "be bigger than 2 and smaller than 4"
29
+ # ...rather than:
30
+ # # => "be bigger than 2"
31
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
32
+ end
33
+
34
+ # rspec-mocks config goes here. You can use an alternate test double
35
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
36
+ config.mock_with :rspec do |mocks|
37
+ # Prevents you from mocking or stubbing a method that does not exist on
38
+ # a real object. This is generally recommended, and will default to
39
+ # `true` in RSpec 4.
40
+ mocks.verify_partial_doubles = true
41
+ end
42
+
43
+ # The settings below are suggested to provide a good initial experience
44
+ # with RSpec, but feel free to customize to your heart's content.
45
+ =begin
46
+ # These two settings work together to allow you to limit a spec run
47
+ # to individual examples or groups you care about by tagging them with
48
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
49
+ # get run.
50
+ config.filter_run :focus
51
+ config.run_all_when_everything_filtered = true
52
+
53
+ # Allows RSpec to persist some state between runs in order to support
54
+ # the `--only-failures` and `--next-failure` CLI options. We recommend
55
+ # you configure your source control system to ignore this file.
56
+ config.example_status_persistence_file_path = "spec/examples.txt"
57
+
58
+ # Limits the available syntax to the non-monkey patched syntax that is
59
+ # recommended. For more details, see:
60
+ # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
61
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
62
+ # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
63
+ config.disable_monkey_patching!
64
+
65
+ # This setting enables warnings. It's recommended, but in some cases may
66
+ # be too noisy due to issues in dependencies.
67
+ config.warnings = true
68
+
69
+ # Many RSpec users commonly either run the entire suite or an individual
70
+ # file, and it's useful to allow more verbose output when running an
71
+ # individual spec file.
72
+ if config.files_to_run.one?
73
+ # Use the documentation formatter for detailed output,
74
+ # unless a formatter has already been configured
75
+ # (e.g. via a command-line flag).
76
+ config.default_formatter = 'doc'
77
+ end
78
+
79
+ # Print the 10 slowest examples and example groups at the
80
+ # end of the spec run, to help surface which specs are running
81
+ # particularly slow.
82
+ config.profile_examples = 10
83
+
84
+ # Run specs in random order to surface order dependencies. If you find an
85
+ # order dependency and want to debug it, you can fix the order by providing
86
+ # the seed, which is printed after each run.
87
+ # --seed 1234
88
+ config.order = :random
89
+
90
+ # Seed global randomization in this process using the `--seed` CLI option.
91
+ # Setting this allows you to use `--seed` to deterministically reproduce
92
+ # test failures related to randomization by passing the same `--seed` value
93
+ # as the one that triggered the failure.
94
+ Kernel.srand config.seed
95
+ =end
96
+ end
@@ -0,0 +1,43 @@
1
+ require 'uri_scanner'
2
+
3
+ RSpec.describe URIScanner do
4
+ describe "#is_ip_valid?" do
5
+ it{expect(URIScanner.is_ip_valid?("172.31.1.222")).to be true}
6
+ it{expect(URIScanner.is_ip_valid?("172.311.1.222")).to be false}
7
+ it{expect(URIScanner.is_ip_valid?("::1")).to be true}
8
+ it{expect(URIScanner.is_ip_valid?("2016:::1:1")).to be false}
9
+ it{expect(URIScanner.is_ip_valid?("")).to be false}
10
+ end
11
+
12
+ describe "#parse_uri" do
13
+ context "parsing 'ldap://[2001:db8::7]/c=GB?objectClass?one'" do
14
+ let(:uri){ URIScanner.parse_uri "ldap://[2001:db8::7]/c=GB?objectClass?one"}
15
+ it {expect(uri.scheme).to eq("ldap")}
16
+ it {expect(uri.host).to eq("[2001:db8::7]")}
17
+ it {expect(uri.port).to be_nil}
18
+ it {expect(uri.path).to eq("/c=GB")}
19
+ it {expect(uri.query).to eq("objectClass?one")}
20
+ end
21
+ end
22
+
23
+ describe "#scan" do
24
+ context "text and returns array of strings" do
25
+ let(:data) {File.read "spec/url.txt" }
26
+ let(:uris) {URIScanner.scan data}
27
+ it{expect(uris.length).to be 15}
28
+ it{uris.each{|u| expect(u).to be_a String}}
29
+ end
30
+
31
+ context "text and returns array of uri objects" do
32
+ let(:data) {%Q{text before http://example.com:8042/index.php?name=ferret#nose text after
33
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam
34
+ nec erat "foo://info.example.com?fred=homme#world" Proin sollicitudin <sip:12345@sip-provider.info:5060> }}
35
+ let(:uri) {URIScanner.scan_and_parse(data)[1]}
36
+ it{expect(uri.scheme).to eq("foo")}
37
+ it{expect(uri.host).to eq("info.example.com")}
38
+ it{expect(uri.port).to be_nil}
39
+ it{expect(uri.query).to eq("fred=homme")}
40
+ it{expect(uri.fragment).to eq("world")}
41
+ end
42
+ end
43
+ end
data/spec/uri_spec.rb ADDED
@@ -0,0 +1,185 @@
1
+ require 'uri_scanner/uri_parser'
2
+
3
+ RSpec.describe MachineURI do
4
+ context "validate URI" do
5
+ specify {
6
+ expect(MachineURI.new("http://www.ietf.org/rfc/rfc2396.txt")
7
+ .is_valid?).to be true}
8
+
9
+ specify {
10
+ expect{ MachineURI.new("1http://www.ietf.org/rfc/rfc2396.txt") }
11
+ .to raise_error(URIParserError)}
12
+
13
+ it "testing multiple samples with authority part" do
14
+ %W{
15
+ http://localhost/
16
+ ftp://ftp.is.co.za/rfc/rfc1808.txt
17
+ http://www.ietf.org/rfc/rfc2396.txt
18
+ ldap://[2001:db8::7]/c=GB?objectClass?one
19
+ telnet://192.0.2.16:80/
20
+ example://a/b/c/%7Bfoo%7D
21
+ eXAMPLE://a/./b/../b/%63/%7bfoo%7d
22
+ foo://example.com:8042/over/there?name=ferret#nose
23
+ foo://info.example.com?fred
24
+ redis://host:6503/dbindex/keyname
25
+ }.each do |uri|
26
+ expect( MachineURI.new(uri).is_valid? ).to be true
27
+ end
28
+ end
29
+ it "testing multiple samples without authority part" do
30
+ %W{
31
+ urn:oasis:names:specification:docbook:dtd:xml:4.1.2
32
+ urn:example:animal:ferret:nose
33
+ mailto:John.Doe@example.com
34
+ news:comp.infosystems.www.servers.unix
35
+ sip:12345@sip-provider.info:5060
36
+ sips:johndoe@sip.secure.com
37
+ tel:+1-816-555-1212
38
+ }.each do |uri|
39
+ expect( MachineURI.new(uri).is_valid? ).to be true
40
+ end
41
+ end
42
+ end
43
+
44
+ context "component parts parsing" do
45
+ describe "#scheme" do
46
+ it "should be 'foo'" do
47
+ uri = MachineURI.new "foo://example.com:8042/over/there?name=ferret#nose"
48
+ expect(uri.scheme).to eq("foo")
49
+ end
50
+ it "should be 'sip'" do
51
+ uri = MachineURI.new "sip:username@example.com"
52
+ expect(uri.scheme).to eq("sip")
53
+ end
54
+ it "raises error on invalid scheme" do
55
+ expect {
56
+ MachineURI.new "f~oo://google.com"
57
+ }.to raise_error(URIParserError)
58
+ expect {
59
+ MachineURI.new "1http://google.com"
60
+ }.to raise_error(URIParserError)
61
+ end
62
+ end
63
+
64
+ describe "#host" do
65
+ it "should be 'example.com'" do
66
+ uri = MachineURI.new "foo://example.com:8042/over/there?name=ferret#nose"
67
+ expect(uri.host).to eq("example.com")
68
+ end
69
+ it "should be 'sip-provider.info'" do
70
+ uri = MachineURI.new "sip:12345@sip-provider.info:5060"
71
+ expect(uri.host).to eq("sip-provider.info")
72
+ end
73
+ it "should be '[2001:db8::7]'" do
74
+ uri = MachineURI.new "ldap://[2001:db8::7]/c=GB?objectClass?one"
75
+ expect(uri.host).to eq("[2001:db8::7]")
76
+ end
77
+ it "should be '192.0.2.16'" do
78
+ uri = MachineURI.new "telnet://192.0.2.16:80/"
79
+ expect(uri.host).to eq("192.0.2.16")
80
+ end
81
+ it "should be nil" do
82
+ uri = MachineURI.new "urn:example:animal:ferret:nose"
83
+ expect(uri.host).to be_nil
84
+ end
85
+ end
86
+
87
+ describe "#userinfo" do
88
+ it "should be 'john'" do
89
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
90
+ expect(uri.userinfo).to eq("john")
91
+ end
92
+ it "should be nil" do
93
+ uri = MachineURI.new "ldap://[2001:db8::7]/c=GB?objectClass?one"
94
+ expect(uri.userinfo).to be_nil
95
+ end
96
+ it "should be '12345'" do
97
+ uri = MachineURI.new "sip:12345@sip-provider.info:5060"
98
+ expect(uri.userinfo).to eq("12345")
99
+ end
100
+ end
101
+
102
+ describe "#port" do
103
+ it "should be 8042" do
104
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
105
+ expect(uri.port).to eq(8042)
106
+ end
107
+ it "should be nil" do
108
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt"
109
+ expect(uri.port).to be_nil
110
+ end
111
+ it "should be '5060'" do
112
+ uri = MachineURI.new "sip:12345@sip-provider.info:5060"
113
+ expect(uri.port).to eq(5060)
114
+ end
115
+ end
116
+
117
+ describe "#path" do
118
+ it "should be '/rfc/rfc2396.txt'" do
119
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt#section%205.2"
120
+ expect(uri.path).to eq("/rfc/rfc2396.txt")
121
+ end
122
+ it "should be nil'" do
123
+ uri = MachineURI.new "telnet://192.0.2.16:80"
124
+ expect(uri.path).to be_empty
125
+ end
126
+ it "should be 'example:animal:ferret:nose'" do
127
+ uri = MachineURI.new "urn:example:animal:ferret:nose"
128
+ expect(uri.path).to eq("example:animal:ferret:nose")
129
+ end
130
+ end
131
+
132
+ describe "#query" do
133
+ it "should be 'name=ferret'" do
134
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
135
+ expect(uri.query).to eq("name=ferret")
136
+ end
137
+ it "should be nil" do
138
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt"
139
+ expect(uri.query).to be_nil
140
+ end
141
+ end
142
+
143
+ describe "#fragment" do
144
+ it "should be 'nose'" do
145
+ uri = MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"
146
+ expect(uri.fragment).to eq("nose")
147
+ end
148
+ it "should be nil" do
149
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt?user=john"
150
+ expect(uri.fragment).to be_nil
151
+ end
152
+ it "should be 'section%205.2'" do
153
+ uri = MachineURI.new "http://www.ietf.org/rfc/rfc2396.txt#section%205.2"
154
+ expect(uri.fragment).to eq("section%205.2")
155
+ end
156
+ end
157
+
158
+ context "should be parsed to fields" do
159
+ let(:uri) {MachineURI.new "foo://john@example.com:8042/over/there?name=ferret#nose"}
160
+ it{expect(uri.scheme).to eq("foo")}
161
+ it{expect(uri.userinfo).to eq("john")}
162
+ it{expect(uri.host).to eq("example.com")}
163
+ it{expect(uri.port).to eq(8042)}
164
+ it{expect(uri.path).to eq("/over/there")}
165
+ it{expect(uri.query).to eq("name=ferret")}
166
+ it{expect(uri.fragment).to eq("nose")}
167
+ end
168
+
169
+ context "should parse SIP to fields" do
170
+ let(:uri){MachineURI.new "sips:alice:secretW0rd@gateway.com:5061;transport=udp;user=phone;method=REGISTER?subject=sales%20meeting&priority=urgent&to=sales%40city.com"}
171
+ it{expect(uri.scheme).to eq("sips")}
172
+ it{expect(uri.username).to eq("alice")}
173
+ it{expect(uri.password).to eq("secretW0rd")}
174
+ it{expect(uri.host).to eq("gateway.com")}
175
+ it{expect(uri.port).to eq(5061)}
176
+ it{expect(uri.param[:transport]).to eq("udp")}
177
+ it{expect(uri.param[:user]).to eq("phone")}
178
+ it{expect(uri.param[:method]).to eq("REGISTER")}
179
+ it{expect(uri.header[:subject]).to eq("sales meeting")}
180
+ it{expect(uri.header[:priority]).to eq("urgent")}
181
+ it{expect(uri.header[:to]).to eq("sales@city.com")}
182
+ end
183
+ end
184
+ end
185
+