fuzzyurl 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c776f197c4a67210a4f28d6aae36964f4df56104
4
+ data.tar.gz: 818e98e321eb54afe95b643e368472ff7ab719ad
5
+ SHA512:
6
+ metadata.gz: a3c90e49a699fcea3f7e9b117a52b36d5728b29179a03d6a337038101b8a2c97c6dee5d2601589c879519827df931e77fbb4aecf77bfd616e7dd08f7dcd2a50b
7
+ data.tar.gz: d1e34f6f08c77d3a22fc2dfbf1a3c07ab2807f98f9fcf5fbfff84a0c90fbaf1875a1e76718617b68dbce7a57e7dfc015216f1cf7a477ba529daf7e4df8fb1092
@@ -0,0 +1,210 @@
1
+ require 'fuzzy_url/version'
2
+ require 'fuzzy_url/matching'
3
+ require 'fuzzy_url/url_components'
4
+ require 'pp'
5
+
6
+ ## FuzzyURL is a class to represent URLs and URL-like things. FuzzyURL aids
7
+ ## in the manipulation and matching of URLs by providing non-strict parsing,
8
+ ## wildcard matching, ranked matching, `#to_s`, and more.
9
+ ##
10
+ ## Example usage:
11
+ ##
12
+ ## ```
13
+ ## require 'fuzzyurl'
14
+ ## fuzzy_url = FuzzyURL.new('http://example.com/*')
15
+ ## fuzzy_url.matches?('http://example.com') # => true
16
+ ## fuzzy_url.matches?('http://example.com/a/b/c') # => true
17
+ ## fuzzy_url.matches?('https://example.com') # => false
18
+ ## fuzzy_url.matches?('http://foobar.com') # => false
19
+ ## ```
20
+ ##
21
+ ## It is important to note that FuzzyURL is not a URL validator! It performs
22
+ ## lenient matching of URLs and URL-like things that look like the following:
23
+ ##
24
+ ## ```
25
+ ## [protocol ://] [username [: password] @] [hostname] [: port] [/ path] [? query] [# fragment]
26
+ ## ```
27
+ ##
28
+ ## In a FuzzyURL, any part of the above may be replaced with a `*` character
29
+ ## to match anything.
30
+ ##
31
+ ## In a hostname, the leftmost label of the host (e.g., the `xyz`
32
+ ## in `xyz.us.example.com`) may be replaced with a `*` character
33
+ ## (e.g., `*.us.example.com`) in order to match domains like
34
+ ## `xxx.us.example.com` and `yyy.zzz.us.example.com`, but not `us.example.com`.
35
+ ##
36
+ ## In a path, a `*` character may be placed after the last `/` path separator
37
+ ## (e.g., `/a/b/*`) in order to match paths like `/a/b` and `/a/b/c/d`,
38
+ ## but not `/a/bcde`.
39
+
40
+ class FuzzyURL
41
+ include FuzzyURL::Matching
42
+ include FuzzyURL::URLComponents
43
+
44
+
45
+ ## Creates a new FuzzyURL with the given URL or URL-like object of type
46
+ ## String, Hash, or FuzzyURL.
47
+ ## Acceptable hash keys are :protocol, :username, :password, :hostname,
48
+ ## :port, :path, :query, and :fragment. Hash keys other than these are
49
+ ## ignored.
50
+ def initialize(url='')
51
+ default_components = {:protocol=>nil, :username=>nil, :password=>nil,
52
+ :hostname=>nil, :port=>nil, :path=>nil,
53
+ :query=>nil, :fragment=>nil}
54
+ case url
55
+ when String
56
+ unless hash = self.class.url_to_hash(url)
57
+ raise ArgumentError, "Bad url URL: #{url.inspect}"
58
+ end
59
+ @components = default_components.merge(hash)
60
+ when Hash, FuzzyURL
61
+ @components = default_components.merge(url.to_hash)
62
+ else
63
+ raise ArgumentError, "url must be a String, Hash, or FuzzyURL; got #{url.inspect}"
64
+ end
65
+ end
66
+
67
+ ## Matches the given URL string, hash, or FuzzyURL against this FuzzyURL.
68
+ ## Returns nil on negative match, and an integer match score otherwise.
69
+ ## This match score is higher for more specific matches.
70
+ def match(url)
71
+ case url
72
+ when String
73
+ self.class.match_hash(self.to_hash, self.class.url_to_hash(url))
74
+ when Hash, FuzzyURL
75
+ self.class.match_hash(self.to_hash, url.to_hash)
76
+ else
77
+ raise ArgumentError, "url must be a String, Hash, or FuzzyURL; got #{url.inspect}"
78
+ end
79
+ end
80
+
81
+ ## Matches the given URL string, hash, or FuzzyURL against this FuzzyURL.
82
+ ## Returns true on positive match, false otherwise.
83
+ def matches?(url)
84
+ match(url) ? true : false
85
+ end
86
+
87
+ ## Returns this FuzzyURL's hash form.
88
+ def to_hash
89
+ Hash[@components]
90
+ end
91
+
92
+ ## Returns this FuzzyURL's string form.
93
+ def to_s
94
+ self.class.hash_to_url(@components)
95
+ end
96
+
97
+
98
+ class << self
99
+
100
+ ## Given a URL, returns a hash containing :protocol, :username, :password,
101
+ ## :hostname, :port, :path, :query, and :fragment fields (all String
102
+ ## or nil).
103
+ ## Accepts `*` in place of any of the above fields, or as part of hostname
104
+ ## or path.
105
+ ## Returns nil if given a malformed URL.
106
+ ##
107
+ ## Example:
108
+ ##
109
+ ## ```
110
+ ## FuzzyURL.url_to_hash('http://user:pass@example.com:8080/some/path/?foo=bar&baz=1#url-fragment')
111
+ ## # => {:protocol=>"http", :username=>"user", :password=>"pass", :hostname=>"example.com", :port=>8080, :path=>"/some/path/", :query=>"foo=bar&baz=1", :fragment=>"url-fragment"}
112
+ ## ```
113
+
114
+ def url_to_hash(url)
115
+ if m = url.match(%r{
116
+ ^
117
+
118
+ (?: (\* | [a-zA-Z]+) ://)? ## m[1] is protocol
119
+
120
+ (?: (\* | [a-zA-Z0-9_]+) ## m[2] is username
121
+ (?: : (\* | [a-zA-Z0-9_]*))? ## m[3] is password
122
+ @
123
+ )?
124
+
125
+ ([a-zA-Z0-9\.\*\-]+?)? ## m[4] is hostname
126
+
127
+ (?: : (\* | \d+))? ## m[5] is port
128
+
129
+ (/ [^\?\#]*)? ## m[6] is path
130
+ ## captures leading /
131
+
132
+ (?: \? ([^\#]*) )? ## m[7] is query
133
+
134
+ (?: \# (.*) )? ## m[8] is fragment
135
+
136
+ $
137
+ }x)
138
+
139
+ protocol = m[1] ? m[1].downcase : nil
140
+ username = m[2]
141
+ password = m[3]
142
+ hostname = m[4] ? m[4].downcase : nil
143
+ port = m[5] ? m[5].to_i : nil
144
+ path = m[6]
145
+ query = m[7]
146
+ fragment = m[8]
147
+
148
+ { :protocol => protocol,
149
+ :username => username,
150
+ :password => password,
151
+ :hostname => hostname,
152
+ :port => port,
153
+ :path => path,
154
+ :query => query,
155
+ :fragment => fragment }
156
+
157
+ else ## no match
158
+ nil
159
+ end
160
+ end
161
+
162
+ ## Given a hash containing :protocol, :username, :password,
163
+ ## :hostname, :port, :path, :query, and :fragment fields (all String
164
+ ## or nil), return a URL string containing these elements.
165
+ def hash_to_url(hash)
166
+ url = ''
167
+ url << "#{ hash[:protocol] }://" if hash[:protocol]
168
+ if hash[:username]
169
+ url << "#{hash[:username]}"
170
+ url << ":#{hash[:password]}" if hash[:password]
171
+ url << '@'
172
+ end
173
+ url << "#{hash[:hostname]}" if hash[:hostname]
174
+ url << ":#{hash[:port]}" if hash[:port]
175
+
176
+ ## make sure path starts with a / if it's defined
177
+ path = hash[:path]
178
+ path = "/#{path}" if path && path.index('/') != 0
179
+ url << "#{path}"
180
+
181
+ url << "?#{hash[:query]}" if hash[:query]
182
+ url << "##{hash[:fragment]}" if hash[:fragment]
183
+ url
184
+ end
185
+
186
+ ## Matches a URL mask string with a URL string.
187
+ ## Raises ArgumentError when given malformed URLs.
188
+ ## Returns true on positive match, false otherwise.
189
+ def matches?(mask, url)
190
+ match(mask, url) ? true : false
191
+ end
192
+
193
+ ## Matches a URL mask string with a URL string.
194
+ ## Raises ArgumentError when given malformed URLs.
195
+ ## Returns nil on negative match, and an integer match score otherwise.
196
+ ## This match score is higher for more specific matches.
197
+ def match(mask, url)
198
+ unless mask_hash = url_to_hash(mask)
199
+ raise ArgumentError, "Badly formed URL mask: #{mask.inspect}"
200
+ end
201
+ unless url_hash = url_to_hash(url)
202
+ raise ArgumentError, "Badly formed URL: #{url.inspect}"
203
+ end
204
+ match_hash(mask_hash, url_hash)
205
+ end
206
+
207
+ end # class << self
208
+
209
+ end
210
+
@@ -0,0 +1,118 @@
1
+ class FuzzyURL
2
+
3
+ ## FuzzyURL::Matching provides the logic for
4
+ module Matching
5
+
6
+ def self.included(klass)
7
+ klass.extend(ClassMethods)
8
+ end
9
+
10
+ module ClassMethods
11
+
12
+ ## Compares a URL mask hash with a URL hash.
13
+ ## Returns nil on negative match, and an integer match score otherwise.
14
+ ## This match score is higher for more specific matches.
15
+ def match_hash(mask, url)
16
+ score = 0
17
+ tally = Proc.new {|x| return nil unless x; score += x}
18
+
19
+ tally.call match_hostnames(mask[:hostname], url[:hostname])
20
+ tally.call match_protocols_and_ports(mask, url)
21
+ tally.call match_paths(mask[:path], url[:path])
22
+ tally.call fuzzy_match(mask[:port], url[:port])
23
+ tally.call fuzzy_match(mask[:query], url[:query])
24
+ tally.call fuzzy_match(mask[:username], url[:username])
25
+ tally.call fuzzy_match(mask[:password], url[:password])
26
+ tally.call fuzzy_match(mask[:fragment], url[:fragment])
27
+ end
28
+
29
+ private
30
+
31
+ ## Matches a URL mask hash against a URL hash.
32
+ ## Returns true on positive match, false otherwise.
33
+ def matches_hash?(mask, url)
34
+ match_hash(mask, url) ? true : false
35
+ end
36
+
37
+ ## Matches protocol and port information.
38
+ ## Returns nil for no match, 0 if two wildcard matches were made, 1 if
39
+ ## one wildcard match was made, and 2 for an exact match.
40
+ def match_protocols_and_ports(mask_hash, url_hash)
41
+ wildcard_matches = 0
42
+ mask_protocol = mask_hash[:protocol] || 'http'
43
+ url_protocol = url_hash[:protocol] || 'http'
44
+ if mask_hash[:protocol] && mask_protocol != '*'
45
+ return nil if mask_protocol != url_protocol
46
+ else
47
+ wildcard_matches += 1
48
+ end
49
+
50
+ mask_port = mask_hash[:port]
51
+ url_port = url_hash[:port]
52
+ if mask_hash[:port] && mask_port != '*'
53
+ if mask_port == PORT_BY_PROTOCOL[url_protocol]
54
+ wildcard_matches += 1
55
+ else
56
+ return nil if mask_port != url_port
57
+ end
58
+ else
59
+ wildcard_matches += 1
60
+ end
61
+
62
+ (2 - wildcard_matches)
63
+ end
64
+
65
+ PORT_BY_PROTOCOL = {
66
+ 'http' => 80,
67
+ 'https' => 443,
68
+ 'file' => nil,
69
+ }
70
+
71
+ ## Matches a picee of a mask against a piece of a URL. Handles wildcards.
72
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
73
+ ## exact match.
74
+ def fuzzy_match(mask, piece)
75
+ return 0 if !mask || mask == '*' # || !piece
76
+ return 1 if mask == piece
77
+ nil
78
+ end
79
+
80
+ ## Matches a hostname mask against a hostname.
81
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
82
+ ## exact match.
83
+ def match_hostnames(mask, host)
84
+ mask_pieces = (mask || '').split('.').reverse
85
+ host_pieces = (host || '').split('.').reverse
86
+ return 1 if mask && host && mask_pieces==host_pieces
87
+ return 0 if match_pieces(mask_pieces, host_pieces, :ignore_depth => false)
88
+ nil
89
+ end
90
+
91
+ ## Matches a path mask against a path.
92
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
93
+ ## exact match.
94
+ def match_paths(mask, path)
95
+ mask_pieces = (mask || '*').split(%r{/})
96
+ path_pieces = (path || '/').split(%r{/})
97
+ return 1 if mask && path && mask_pieces==path_pieces
98
+ return 0 if match_pieces(mask_pieces, path_pieces, :ignore_depth => true)
99
+ nil
100
+ end
101
+
102
+ ## Matches arrays of URL or hostname pieces.
103
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
104
+ ## exact match.
105
+ def match_pieces(mask, pieces, args)
106
+ ignore_depth = args[:ignore_depth]
107
+ return nil if !ignore_depth && mask.count > pieces.count
108
+ pieces.each_with_index do |piece, i|
109
+ return 0 if piece && mask[i] == '*'
110
+ return nil if mask[i] != piece
111
+ end
112
+ 1
113
+ end
114
+
115
+ end
116
+
117
+ end
118
+ end
@@ -0,0 +1,91 @@
1
+ class FuzzyURL
2
+
3
+ ## FuzzyURL::URLComponents provides getting/setting of URL components
4
+ ## on FuzzyURL objects in hash style (e.g. `foo[:hostname]`) and
5
+ ## method style (e.g. `foo.hostname`). Acceptable URL components are
6
+ ## :protocol, :username, :password, :hostname, :port, :path, :query,
7
+ ## and :fragment.
8
+ module URLComponents
9
+
10
+ COMPONENTS = [:protocol, :username, :password, :hostname,
11
+ :port, :path, :query, :fragment]
12
+
13
+ ## Gets a URL component.
14
+ def [](component)
15
+ component_sym = component.to_sym
16
+ if !COMPONENTS.include?(component_sym)
17
+ raise ArgumentError, "#{component.inspect} is not a URL component. "+
18
+ COMPONENTS.inspect
19
+ end
20
+ @components[component_sym]
21
+ end
22
+
23
+ ## Sets a URL component.
24
+ def []=(component, value)
25
+ component_sym = component.to_sym
26
+ if !COMPONENTS.include?(component_sym)
27
+ raise ArgumentError, "#{component.inspect} is not a URL component. "+
28
+ COMPONENTS.inspect
29
+ end
30
+ @components[component_sym] = value
31
+ end
32
+
33
+
34
+ ## Get the protocol for this FuzzyURL.
35
+ def protocol; self[:protocol] end
36
+
37
+ ## Set the protocol for this FuzzyURL.
38
+ def protocol=(v); self[:protocol]=v end
39
+
40
+
41
+ ## Get the username for this FuzzyURL.
42
+ def username; self[:username] end
43
+
44
+ ## Set the username for this FuzzyURL.
45
+ def username=(v); self[:username]=v end
46
+
47
+
48
+ ## Get the password for this FuzzyURL.
49
+ def password; self[:password] end
50
+
51
+ ## Set the password for this FuzzyURL.
52
+ def password=(v); self[:password]=v end
53
+
54
+
55
+ ## Get the hostname for this FuzzyURL.
56
+ def hostname; self[:hostname] end
57
+
58
+ ## Set the hostname for this FuzzyURL.
59
+ def hostname=(v); self[:hostname]=v end
60
+
61
+
62
+ ## Get the port for this FuzzyURL.
63
+ def port; self[:port] end
64
+
65
+ ## Set the port for this FuzzyURL.
66
+ def port=(v); self[:port]=v end
67
+
68
+
69
+ ## Get the path for this FuzzyURL.
70
+ def path; self[:path] end
71
+
72
+ ## Set the path for this FuzzyURL.
73
+ def path=(v); self[:path]=v end
74
+
75
+
76
+ ## Get the query for this FuzzyURL.
77
+ def query; self[:query] end
78
+
79
+ ## Set the query for this FuzzyURL.
80
+ def query=(v); self[:query]=v end
81
+
82
+
83
+ ## Get the fragment for this FuzzyURL.
84
+ def fragment; self[:fragment] end
85
+
86
+ ## Set the fragment for this FuzzyURL.
87
+ def fragment=(v); self[:fragment]=v end
88
+
89
+ end
90
+ end
91
+
@@ -0,0 +1,6 @@
1
+ # @private
2
+ class FuzzyURL
3
+ VERSION = '0.2.0'
4
+ VERSION_DATE = '2014-04-03'
5
+ end
6
+
@@ -0,0 +1,2 @@
1
+ require 'fuzzy_url'
2
+
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzyurl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Pete Gamache
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 10.0.4
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 10.0.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 4.7.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 4.7.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: mocha
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.13.3
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.13.3
55
+ description: |2
56
+ FuzzyURL provides two related functions: fuzzy matching of a URL to a URL
57
+ mask that can contain wildcards, and non-strict parsing of URLs into their
58
+ component pieces: protocol, username, password, hostname, port, path,
59
+ query, and fragment.
60
+ email: pete@gamache.org
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - lib/fuzzy_url.rb
66
+ - lib/fuzzy_url/matching.rb
67
+ - lib/fuzzy_url/url_components.rb
68
+ - lib/fuzzy_url/version.rb
69
+ - lib/fuzzyurl.rb
70
+ homepage: https://github.com/gamache/fuzzyurl
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.8.7
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.2.0
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Non-strict URL parsing and URL fuzzy matching.
94
+ test_files: []