fuzzyurl 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c776f197c4a67210a4f28d6aae36964f4df56104
4
+ data.tar.gz: 818e98e321eb54afe95b643e368472ff7ab719ad
5
+ SHA512:
6
+ metadata.gz: a3c90e49a699fcea3f7e9b117a52b36d5728b29179a03d6a337038101b8a2c97c6dee5d2601589c879519827df931e77fbb4aecf77bfd616e7dd08f7dcd2a50b
7
+ data.tar.gz: d1e34f6f08c77d3a22fc2dfbf1a3c07ab2807f98f9fcf5fbfff84a0c90fbaf1875a1e76718617b68dbce7a57e7dfc015216f1cf7a477ba529daf7e4df8fb1092
@@ -0,0 +1,210 @@
1
+ require 'fuzzy_url/version'
2
+ require 'fuzzy_url/matching'
3
+ require 'fuzzy_url/url_components'
4
+ require 'pp'
5
+
6
+ ## FuzzyURL is a class to represent URLs and URL-like things. FuzzyURL aids
7
+ ## in the manipulation and matching of URLs by providing non-strict parsing,
8
+ ## wildcard matching, ranked matching, `#to_s`, and more.
9
+ ##
10
+ ## Example usage:
11
+ ##
12
+ ## ```
13
+ ## require 'fuzzyurl'
14
+ ## fuzzy_url = FuzzyURL.new('http://example.com/*')
15
+ ## fuzzy_url.matches?('http://example.com') # => true
16
+ ## fuzzy_url.matches?('http://example.com/a/b/c') # => true
17
+ ## fuzzy_url.matches?('https://example.com') # => false
18
+ ## fuzzy_url.matches?('http://foobar.com') # => false
19
+ ## ```
20
+ ##
21
+ ## It is important to note that FuzzyURL is not a URL validator! It performs
22
+ ## lenient matching of URLs and URL-like things that look like the following:
23
+ ##
24
+ ## ```
25
+ ## [protocol ://] [username [: password] @] [hostname] [: port] [/ path] [? query] [# fragment]
26
+ ## ```
27
+ ##
28
+ ## In a FuzzyURL, any part of the above may be replaced with a `*` character
29
+ ## to match anything.
30
+ ##
31
+ ## In a hostname, the leftmost label of the host (e.g., the `xyz`
32
+ ## in `xyz.us.example.com`) may be replaced with a `*` character
33
+ ## (e.g., `*.us.example.com`) in order to match domains like
34
+ ## `xxx.us.example.com` and `yyy.zzz.us.example.com`, but not `us.example.com`.
35
+ ##
36
+ ## In a path, a `*` character may be placed after the last `/` path separator
37
+ ## (e.g., `/a/b/*`) in order to match paths like `/a/b` and `/a/b/c/d`,
38
+ ## but not `/a/bcde`.
39
+
40
+ class FuzzyURL
41
+ include FuzzyURL::Matching
42
+ include FuzzyURL::URLComponents
43
+
44
+
45
+ ## Creates a new FuzzyURL with the given URL or URL-like object of type
46
+ ## String, Hash, or FuzzyURL.
47
+ ## Acceptable hash keys are :protocol, :username, :password, :hostname,
48
+ ## :port, :path, :query, and :fragment. Hash keys other than these are
49
+ ## ignored.
50
+ def initialize(url='')
51
+ default_components = {:protocol=>nil, :username=>nil, :password=>nil,
52
+ :hostname=>nil, :port=>nil, :path=>nil,
53
+ :query=>nil, :fragment=>nil}
54
+ case url
55
+ when String
56
+ unless hash = self.class.url_to_hash(url)
57
+ raise ArgumentError, "Bad url URL: #{url.inspect}"
58
+ end
59
+ @components = default_components.merge(hash)
60
+ when Hash, FuzzyURL
61
+ @components = default_components.merge(url.to_hash)
62
+ else
63
+ raise ArgumentError, "url must be a String, Hash, or FuzzyURL; got #{url.inspect}"
64
+ end
65
+ end
66
+
67
+ ## Matches the given URL string, hash, or FuzzyURL against this FuzzyURL.
68
+ ## Returns nil on negative match, and an integer match score otherwise.
69
+ ## This match score is higher for more specific matches.
70
+ def match(url)
71
+ case url
72
+ when String
73
+ self.class.match_hash(self.to_hash, self.class.url_to_hash(url))
74
+ when Hash, FuzzyURL
75
+ self.class.match_hash(self.to_hash, url.to_hash)
76
+ else
77
+ raise ArgumentError, "url must be a String, Hash, or FuzzyURL; got #{url.inspect}"
78
+ end
79
+ end
80
+
81
+ ## Matches the given URL string, hash, or FuzzyURL against this FuzzyURL.
82
+ ## Returns true on positive match, false otherwise.
83
+ def matches?(url)
84
+ match(url) ? true : false
85
+ end
86
+
87
+ ## Returns this FuzzyURL's hash form.
88
+ def to_hash
89
+ Hash[@components]
90
+ end
91
+
92
+ ## Returns this FuzzyURL's string form.
93
+ def to_s
94
+ self.class.hash_to_url(@components)
95
+ end
96
+
97
+
98
+ class << self
99
+
100
+ ## Given a URL, returns a hash containing :protocol, :username, :password,
101
+ ## :hostname, :port, :path, :query, and :fragment fields (all String
102
+ ## or nil).
103
+ ## Accepts `*` in place of any of the above fields, or as part of hostname
104
+ ## or path.
105
+ ## Returns nil if given a malformed URL.
106
+ ##
107
+ ## Example:
108
+ ##
109
+ ## ```
110
+ ## FuzzyURL.url_to_hash('http://user:pass@example.com:8080/some/path/?foo=bar&baz=1#url-fragment')
111
+ ## # => {:protocol=>"http", :username=>"user", :password=>"pass", :hostname=>"example.com", :port=>8080, :path=>"/some/path/", :query=>"foo=bar&baz=1", :fragment=>"url-fragment"}
112
+ ## ```
113
+
114
+ def url_to_hash(url)
115
+ if m = url.match(%r{
116
+ ^
117
+
118
+ (?: (\* | [a-zA-Z]+) ://)? ## m[1] is protocol
119
+
120
+ (?: (\* | [a-zA-Z0-9_]+) ## m[2] is username
121
+ (?: : (\* | [a-zA-Z0-9_]*))? ## m[3] is password
122
+ @
123
+ )?
124
+
125
+ ([a-zA-Z0-9\.\*\-]+?)? ## m[4] is hostname
126
+
127
+ (?: : (\* | \d+))? ## m[5] is port
128
+
129
+ (/ [^\?\#]*)? ## m[6] is path
130
+ ## captures leading /
131
+
132
+ (?: \? ([^\#]*) )? ## m[7] is query
133
+
134
+ (?: \# (.*) )? ## m[8] is fragment
135
+
136
+ $
137
+ }x)
138
+
139
+ protocol = m[1] ? m[1].downcase : nil
140
+ username = m[2]
141
+ password = m[3]
142
+ hostname = m[4] ? m[4].downcase : nil
143
+ port = m[5] ? m[5].to_i : nil
144
+ path = m[6]
145
+ query = m[7]
146
+ fragment = m[8]
147
+
148
+ { :protocol => protocol,
149
+ :username => username,
150
+ :password => password,
151
+ :hostname => hostname,
152
+ :port => port,
153
+ :path => path,
154
+ :query => query,
155
+ :fragment => fragment }
156
+
157
+ else ## no match
158
+ nil
159
+ end
160
+ end
161
+
162
+ ## Given a hash containing :protocol, :username, :password,
163
+ ## :hostname, :port, :path, :query, and :fragment fields (all String
164
+ ## or nil), return a URL string containing these elements.
165
+ def hash_to_url(hash)
166
+ url = ''
167
+ url << "#{ hash[:protocol] }://" if hash[:protocol]
168
+ if hash[:username]
169
+ url << "#{hash[:username]}"
170
+ url << ":#{hash[:password]}" if hash[:password]
171
+ url << '@'
172
+ end
173
+ url << "#{hash[:hostname]}" if hash[:hostname]
174
+ url << ":#{hash[:port]}" if hash[:port]
175
+
176
+ ## make sure path starts with a / if it's defined
177
+ path = hash[:path]
178
+ path = "/#{path}" if path && path.index('/') != 0
179
+ url << "#{path}"
180
+
181
+ url << "?#{hash[:query]}" if hash[:query]
182
+ url << "##{hash[:fragment]}" if hash[:fragment]
183
+ url
184
+ end
185
+
186
+ ## Matches a URL mask string with a URL string.
187
+ ## Raises ArgumentError when given malformed URLs.
188
+ ## Returns true on positive match, false otherwise.
189
+ def matches?(mask, url)
190
+ match(mask, url) ? true : false
191
+ end
192
+
193
+ ## Matches a URL mask string with a URL string.
194
+ ## Raises ArgumentError when given malformed URLs.
195
+ ## Returns nil on negative match, and an integer match score otherwise.
196
+ ## This match score is higher for more specific matches.
197
+ def match(mask, url)
198
+ unless mask_hash = url_to_hash(mask)
199
+ raise ArgumentError, "Badly formed URL mask: #{mask.inspect}"
200
+ end
201
+ unless url_hash = url_to_hash(url)
202
+ raise ArgumentError, "Badly formed URL: #{url.inspect}"
203
+ end
204
+ match_hash(mask_hash, url_hash)
205
+ end
206
+
207
+ end # class << self
208
+
209
+ end
210
+
@@ -0,0 +1,118 @@
1
+ class FuzzyURL
2
+
3
+ ## FuzzyURL::Matching provides the logic for
4
+ module Matching
5
+
6
+ def self.included(klass)
7
+ klass.extend(ClassMethods)
8
+ end
9
+
10
+ module ClassMethods
11
+
12
+ ## Compares a URL mask hash with a URL hash.
13
+ ## Returns nil on negative match, and an integer match score otherwise.
14
+ ## This match score is higher for more specific matches.
15
+ def match_hash(mask, url)
16
+ score = 0
17
+ tally = Proc.new {|x| return nil unless x; score += x}
18
+
19
+ tally.call match_hostnames(mask[:hostname], url[:hostname])
20
+ tally.call match_protocols_and_ports(mask, url)
21
+ tally.call match_paths(mask[:path], url[:path])
22
+ tally.call fuzzy_match(mask[:port], url[:port])
23
+ tally.call fuzzy_match(mask[:query], url[:query])
24
+ tally.call fuzzy_match(mask[:username], url[:username])
25
+ tally.call fuzzy_match(mask[:password], url[:password])
26
+ tally.call fuzzy_match(mask[:fragment], url[:fragment])
27
+ end
28
+
29
+ private
30
+
31
+ ## Matches a URL mask hash against a URL hash.
32
+ ## Returns true on positive match, false otherwise.
33
+ def matches_hash?(mask, url)
34
+ match_hash(mask, url) ? true : false
35
+ end
36
+
37
+ ## Matches protocol and port information.
38
+ ## Returns nil for no match, 0 if two wildcard matches were made, 1 if
39
+ ## one wildcard match was made, and 2 for an exact match.
40
+ def match_protocols_and_ports(mask_hash, url_hash)
41
+ wildcard_matches = 0
42
+ mask_protocol = mask_hash[:protocol] || 'http'
43
+ url_protocol = url_hash[:protocol] || 'http'
44
+ if mask_hash[:protocol] && mask_protocol != '*'
45
+ return nil if mask_protocol != url_protocol
46
+ else
47
+ wildcard_matches += 1
48
+ end
49
+
50
+ mask_port = mask_hash[:port]
51
+ url_port = url_hash[:port]
52
+ if mask_hash[:port] && mask_port != '*'
53
+ if mask_port == PORT_BY_PROTOCOL[url_protocol]
54
+ wildcard_matches += 1
55
+ else
56
+ return nil if mask_port != url_port
57
+ end
58
+ else
59
+ wildcard_matches += 1
60
+ end
61
+
62
+ (2 - wildcard_matches)
63
+ end
64
+
65
+ PORT_BY_PROTOCOL = {
66
+ 'http' => 80,
67
+ 'https' => 443,
68
+ 'file' => nil,
69
+ }
70
+
71
+ ## Matches a picee of a mask against a piece of a URL. Handles wildcards.
72
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
73
+ ## exact match.
74
+ def fuzzy_match(mask, piece)
75
+ return 0 if !mask || mask == '*' # || !piece
76
+ return 1 if mask == piece
77
+ nil
78
+ end
79
+
80
+ ## Matches a hostname mask against a hostname.
81
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
82
+ ## exact match.
83
+ def match_hostnames(mask, host)
84
+ mask_pieces = (mask || '').split('.').reverse
85
+ host_pieces = (host || '').split('.').reverse
86
+ return 1 if mask && host && mask_pieces==host_pieces
87
+ return 0 if match_pieces(mask_pieces, host_pieces, :ignore_depth => false)
88
+ nil
89
+ end
90
+
91
+ ## Matches a path mask against a path.
92
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
93
+ ## exact match.
94
+ def match_paths(mask, path)
95
+ mask_pieces = (mask || '*').split(%r{/})
96
+ path_pieces = (path || '/').split(%r{/})
97
+ return 1 if mask && path && mask_pieces==path_pieces
98
+ return 0 if match_pieces(mask_pieces, path_pieces, :ignore_depth => true)
99
+ nil
100
+ end
101
+
102
+ ## Matches arrays of URL or hostname pieces.
103
+ ## Returns nil for no match, 0 for a wildcard match, or 1 for an
104
+ ## exact match.
105
+ def match_pieces(mask, pieces, args)
106
+ ignore_depth = args[:ignore_depth]
107
+ return nil if !ignore_depth && mask.count > pieces.count
108
+ pieces.each_with_index do |piece, i|
109
+ return 0 if piece && mask[i] == '*'
110
+ return nil if mask[i] != piece
111
+ end
112
+ 1
113
+ end
114
+
115
+ end
116
+
117
+ end
118
+ end
@@ -0,0 +1,91 @@
1
+ class FuzzyURL
2
+
3
+ ## FuzzyURL::URLComponents provides getting/setting of URL components
4
+ ## on FuzzyURL objects in hash style (e.g. `foo[:hostname]`) and
5
+ ## method style (e.g. `foo.hostname`). Acceptable URL components are
6
+ ## :protocol, :username, :password, :hostname, :port, :path, :query,
7
+ ## and :fragment.
8
+ module URLComponents
9
+
10
+ COMPONENTS = [:protocol, :username, :password, :hostname,
11
+ :port, :path, :query, :fragment]
12
+
13
+ ## Gets a URL component.
14
+ def [](component)
15
+ component_sym = component.to_sym
16
+ if !COMPONENTS.include?(component_sym)
17
+ raise ArgumentError, "#{component.inspect} is not a URL component. "+
18
+ COMPONENTS.inspect
19
+ end
20
+ @components[component_sym]
21
+ end
22
+
23
+ ## Sets a URL component.
24
+ def []=(component, value)
25
+ component_sym = component.to_sym
26
+ if !COMPONENTS.include?(component_sym)
27
+ raise ArgumentError, "#{component.inspect} is not a URL component. "+
28
+ COMPONENTS.inspect
29
+ end
30
+ @components[component_sym] = value
31
+ end
32
+
33
+
34
+ ## Get the protocol for this FuzzyURL.
35
+ def protocol; self[:protocol] end
36
+
37
+ ## Set the protocol for this FuzzyURL.
38
+ def protocol=(v); self[:protocol]=v end
39
+
40
+
41
+ ## Get the username for this FuzzyURL.
42
+ def username; self[:username] end
43
+
44
+ ## Set the username for this FuzzyURL.
45
+ def username=(v); self[:username]=v end
46
+
47
+
48
+ ## Get the password for this FuzzyURL.
49
+ def password; self[:password] end
50
+
51
+ ## Set the password for this FuzzyURL.
52
+ def password=(v); self[:password]=v end
53
+
54
+
55
+ ## Get the hostname for this FuzzyURL.
56
+ def hostname; self[:hostname] end
57
+
58
+ ## Set the hostname for this FuzzyURL.
59
+ def hostname=(v); self[:hostname]=v end
60
+
61
+
62
+ ## Get the port for this FuzzyURL.
63
+ def port; self[:port] end
64
+
65
+ ## Set the port for this FuzzyURL.
66
+ def port=(v); self[:port]=v end
67
+
68
+
69
+ ## Get the path for this FuzzyURL.
70
+ def path; self[:path] end
71
+
72
+ ## Set the path for this FuzzyURL.
73
+ def path=(v); self[:path]=v end
74
+
75
+
76
+ ## Get the query for this FuzzyURL.
77
+ def query; self[:query] end
78
+
79
+ ## Set the query for this FuzzyURL.
80
+ def query=(v); self[:query]=v end
81
+
82
+
83
+ ## Get the fragment for this FuzzyURL.
84
+ def fragment; self[:fragment] end
85
+
86
+ ## Set the fragment for this FuzzyURL.
87
+ def fragment=(v); self[:fragment]=v end
88
+
89
+ end
90
+ end
91
+
@@ -0,0 +1,6 @@
1
+ # @private
2
+ class FuzzyURL
3
+ VERSION = '0.2.0'
4
+ VERSION_DATE = '2014-04-03'
5
+ end
6
+
@@ -0,0 +1,2 @@
1
+ require 'fuzzy_url'
2
+
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzyurl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Pete Gamache
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 10.0.4
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 10.0.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 4.7.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 4.7.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: mocha
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.13.3
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.13.3
55
+ description: |2
56
+ FuzzyURL provides two related functions: fuzzy matching of a URL to a URL
57
+ mask that can contain wildcards, and non-strict parsing of URLs into their
58
+ component pieces: protocol, username, password, hostname, port, path,
59
+ query, and fragment.
60
+ email: pete@gamache.org
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - lib/fuzzy_url.rb
66
+ - lib/fuzzy_url/matching.rb
67
+ - lib/fuzzy_url/url_components.rb
68
+ - lib/fuzzy_url/version.rb
69
+ - lib/fuzzyurl.rb
70
+ homepage: https://github.com/gamache/fuzzyurl
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.8.7
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.2.0
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Non-strict URL parsing and URL fuzzy matching.
94
+ test_files: []