fuzzyurl 0.2.3 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/fuzzyurl/fields.rb +13 -0
- data/lib/fuzzyurl/match.rb +127 -0
- data/lib/fuzzyurl/protocols.rb +26 -0
- data/lib/fuzzyurl/strings.rb +56 -0
- data/lib/fuzzyurl/version.rb +5 -0
- data/lib/fuzzyurl.rb +193 -1
- metadata +27 -15
- data/lib/fuzzy_url/matching.rb +0 -120
- data/lib/fuzzy_url/url_components.rb +0 -91
- data/lib/fuzzy_url/version.rb +0 -6
- data/lib/fuzzy_url.rb +0 -210
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8bc03d75fbe841afed852424c2123761aeff21b
|
4
|
+
data.tar.gz: 0124a1e435856981f3088a25c9ca7348b172d4b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3acd8d87e7df1e4ae87c91e9edde769b84c1b1bd993d7f8275a19d06f2a611384c5af8ca8e7eba1542c06404aedd7161d313b6577ae5c16e84507ce39b41ca9e
|
7
|
+
data.tar.gz: 18eef24b09a6f54690e6ebe53db791a07188050e8fae0fb652db938f076403941c4c2557b1eaf6f9f1eacee4d788fe6d7ee9333eef6ffa215ea2333cd2faaaf6
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'fuzzyurl/protocols'
|
2
|
+
|
3
|
+
class Fuzzyurl::Match
|
4
|
+
class << self
|
5
|
+
|
6
|
+
# If `mask` (which may contain * wildcards) matches `url` (which may not),
|
7
|
+
# returns an integer representing how closely they match (higher is closer).
|
8
|
+
# If `mask` does not match `url`, returns null.
|
9
|
+
#
|
10
|
+
# @param mask [Fuzzyurl] Fuzzyurl mask to match with
|
11
|
+
# @param url [Fuzzyurl] Fuzzyurl URL to match
|
12
|
+
# @returns [Fuzzyurl] Fuzzyurl-like object containing match scores
|
13
|
+
def match(mask, url)
|
14
|
+
scores = match_scores(mask, url)
|
15
|
+
return nil if scores.values.include?(nil)
|
16
|
+
scores.values.reduce(:+)
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
# If `mask` (which may contain * wildcards) matches `url` (which may not),
|
21
|
+
# returns true; otherwise returns false.
|
22
|
+
#
|
23
|
+
# @param mask [Fuzzyurl] Fuzzyurl mask to match with
|
24
|
+
# @param url [Fuzzyurl] Fuzzyurl URL to match
|
25
|
+
# @returns [Fuzzyurl] Fuzzyurl-like object containing match scores
|
26
|
+
def matches?(mask, url)
|
27
|
+
match(mask, url) != nil
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
# Returns a Fuzzyurl-like object containing values representing how well
|
32
|
+
# different parts of `mask` and `url` match. Values are integers for
|
33
|
+
# matches or null for no match; higher integers indicate a better match.
|
34
|
+
#
|
35
|
+
# @param mask [Fuzzyurl] Fuzzyurl mask to match with
|
36
|
+
# @param url [Fuzzyurl] Fuzzyurl URL to match
|
37
|
+
# @returns [Hash] Hash containing match scores for each field
|
38
|
+
def match_scores(mask, url)
|
39
|
+
url_protocol = url.protocol || Fuzzyurl::Protocols.get_protocol(url.port)
|
40
|
+
url_port = url.port || Fuzzyurl::Protocols.get_port(url.protocol)
|
41
|
+
{
|
42
|
+
protocol: fuzzy_match(mask.protocol, url_protocol),
|
43
|
+
username: fuzzy_match(mask.username, url.username),
|
44
|
+
password: fuzzy_match(mask.password, url.password),
|
45
|
+
hostname: fuzzy_match(mask.hostname, url.hostname),
|
46
|
+
port: fuzzy_match(mask.port, url_port),
|
47
|
+
path: fuzzy_match(mask.path, url.path),
|
48
|
+
query: fuzzy_match(mask.query, url.query),
|
49
|
+
fragment: fuzzy_match(mask.fragment, url.fragment)
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# From a list of Fuzzyurl `masks`, returns the index of the one which best
|
55
|
+
# matches `url`. Returns null if none of `masks` match.
|
56
|
+
#
|
57
|
+
# @param [Array] Array of Fuzzyurl URL mask objects to match with.
|
58
|
+
# @param [Fuzzyurl] Fuzzyurl URL to match.
|
59
|
+
# @returns [Integer|nil] Index of best matching mask, or null if none match.
|
60
|
+
def best_match_index(masks, url)
|
61
|
+
best_index = nil
|
62
|
+
best_score = -1
|
63
|
+
masks.each_with_index do |mask, i|
|
64
|
+
score = match(mask, url)
|
65
|
+
if score && score > best_score
|
66
|
+
best_score = score
|
67
|
+
best_index = i
|
68
|
+
end
|
69
|
+
end
|
70
|
+
best_index
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
# If `mask` (which may contain * wildcards) matches `url` (which may not),
|
75
|
+
# returns 1 if `mask` and `url` match perfectly, 0 if `mask` and `url`
|
76
|
+
# are a wildcard match, or null otherwise.
|
77
|
+
#
|
78
|
+
# Wildcard language:
|
79
|
+
#
|
80
|
+
# * matches anything
|
81
|
+
# foo/* matches "foo/" and "foo/bar/baz" but not "foo"
|
82
|
+
# foo/** matches "foo/" and "foo/bar/baz" and "foo"
|
83
|
+
# *.example.com matches "api.v1.example.com" but not "example.com"
|
84
|
+
# **.example.com matches "api.v1.example.com" and "example.com"
|
85
|
+
#
|
86
|
+
# Any other form is treated as a literal match.
|
87
|
+
#
|
88
|
+
# @param mask [String] String mask to match with (may contain wildcards).
|
89
|
+
# @param value [String] String value to match.
|
90
|
+
# @returns [Integer|nil] 0 for wildcard match, 1 for perfect match, else nil.
|
91
|
+
def fuzzy_match(mask, value)
|
92
|
+
return 0 if mask == "*"
|
93
|
+
return 1 if mask == value
|
94
|
+
return nil if !mask || !value
|
95
|
+
|
96
|
+
if mask.index("**.") == 0
|
97
|
+
mask_value = mask[3..-1]
|
98
|
+
return 0 if value.end_with?(".#{mask_value}")
|
99
|
+
return 0 if mask_value == value
|
100
|
+
return nil
|
101
|
+
end
|
102
|
+
if mask.index("*") == 0
|
103
|
+
return 0 if value.end_with?(mask[1..-1])
|
104
|
+
return nil
|
105
|
+
end
|
106
|
+
|
107
|
+
rev_mask = mask.reverse
|
108
|
+
rev_value = value.reverse
|
109
|
+
|
110
|
+
if rev_mask.index("**/") == 0
|
111
|
+
rev_mask_value = rev_mask[3..-1]
|
112
|
+
return 0 if rev_value.end_with?("/#{rev_mask_value}")
|
113
|
+
return 0 if rev_mask_value == rev_value
|
114
|
+
return nil
|
115
|
+
end
|
116
|
+
|
117
|
+
if rev_mask.index("*") == 0
|
118
|
+
return 0 if rev_value.end_with?(rev_mask[1..-1])
|
119
|
+
return nil
|
120
|
+
end
|
121
|
+
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Fuzzyurl::Protocols
|
2
|
+
PORTS_BY_PROTOCOL = {
|
3
|
+
'ssh' => '22',
|
4
|
+
'http' => '80',
|
5
|
+
'https' => '443'
|
6
|
+
}
|
7
|
+
|
8
|
+
PROTOCOLS_BY_PORT = {
|
9
|
+
'22' => 'ssh',
|
10
|
+
'80' => 'http',
|
11
|
+
'443' => 'https'
|
12
|
+
}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def get_port(protocol)
|
16
|
+
return nil unless protocol
|
17
|
+
base_protocol = protocol.split('+').last
|
18
|
+
PORTS_BY_PROTOCOL[base_protocol.to_s]
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_protocol(port)
|
22
|
+
PROTOCOLS_BY_PORT[port.to_s]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'fuzzyurl/fields'
|
2
|
+
|
3
|
+
class Fuzzyurl::Strings
|
4
|
+
REGEX = %r{
|
5
|
+
^
|
6
|
+
(?: (?<protocol> \* | [a-zA-Z][A-Za-z+.-]+) ://)?
|
7
|
+
(?: (?<username> \* | [a-zA-Z0-9%_.!~*'();&=+$,-]+)
|
8
|
+
(?: : (?<password> \* | [a-zA-Z0-9%_.!~*'();&=+$,-]*))?
|
9
|
+
@
|
10
|
+
)?
|
11
|
+
(?<hostname> [a-zA-Z0-9\.\*\-]+?)?
|
12
|
+
(?: : (?<port> \* | \d+))?
|
13
|
+
(?<path> / [^\?\#]*)? ## captures leading /
|
14
|
+
(?: \? (?<query> [^\#]*) )?
|
15
|
+
(?: \# (?<fragment> .*) )?
|
16
|
+
$
|
17
|
+
}x
|
18
|
+
|
19
|
+
class << self
|
20
|
+
|
21
|
+
def from_string(str, opts={})
|
22
|
+
return nil unless str.kind_of?(String)
|
23
|
+
|
24
|
+
default = opts[:default]
|
25
|
+
if m = REGEX.match(str)
|
26
|
+
fu = Fuzzyurl.new
|
27
|
+
Fuzzyurl::FIELDS.each do |f|
|
28
|
+
fu.send("#{f}=", m[f] || default)
|
29
|
+
end
|
30
|
+
fu
|
31
|
+
else
|
32
|
+
raise ArgumentError, "Couldn't parse url string: #{str}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_string(fuzzyurl)
|
37
|
+
if !fuzzyurl.kind_of?(Fuzzyurl)
|
38
|
+
raise ArgumentError, "`fuzzyurl` must be a Fuzzyurl"
|
39
|
+
end
|
40
|
+
|
41
|
+
fu = fuzzyurl
|
42
|
+
str = ""
|
43
|
+
str << "#{fu.protocol}://" if fu.protocol
|
44
|
+
str << "#{fu.username}" if fu.username
|
45
|
+
str << ":#{fu.password}" if fu.password
|
46
|
+
str << "@" if fu.username
|
47
|
+
str << "#{fu.hostname}" if fu.hostname
|
48
|
+
str << ":#{fu.port}" if fu.port
|
49
|
+
str << "#{fu.path}" if fu.path
|
50
|
+
str << "?#{fu.query}" if fu.query
|
51
|
+
str << "##{fu.fragment}" if fu.fragment
|
52
|
+
str
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
data/lib/fuzzyurl.rb
CHANGED
@@ -1,2 +1,194 @@
|
|
1
|
-
require '
|
1
|
+
require 'fuzzyurl/version'
|
2
|
+
require 'fuzzyurl/fields'
|
3
|
+
require 'fuzzyurl/protocols'
|
4
|
+
require 'fuzzyurl/match'
|
5
|
+
require 'fuzzyurl/strings'
|
6
|
+
|
7
|
+
class Fuzzyurl
|
8
|
+
FIELDS.each {|f| attr_accessor f}
|
9
|
+
|
10
|
+
# Creates a new Fuzzyurl object from the given params or URL string.
|
11
|
+
# Keys of `params` should be symbols.
|
12
|
+
#
|
13
|
+
# @param params [Hash|String|nil] URL string or parameter hash.
|
14
|
+
# @return [Fuzzyurl] New Fuzzyurl object.
|
15
|
+
def initialize(params={})
|
16
|
+
p = params.kind_of?(String) ? Fuzzyurl.from_string(params).to_hash : params
|
17
|
+
(FIELDS & p.keys).each do |f|
|
18
|
+
self.send("#{f}=", p[f])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns a hash representation of this Fuzzyurl, with one key/value pair
|
23
|
+
# for each of `Fuzzyurl::FIELDS`.
|
24
|
+
#
|
25
|
+
# @return [Hash] Hash representation of this Fuzzyurl.
|
26
|
+
def to_hash
|
27
|
+
FIELDS.reduce({}) do |hash, f|
|
28
|
+
val = self.send(f)
|
29
|
+
val = val.to_s if val
|
30
|
+
hash[f] = val
|
31
|
+
hash
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns a new copy of this Fuzzyurl, with the given params changed.
|
36
|
+
#
|
37
|
+
# @param params [Hash|nil] New parameter values.
|
38
|
+
# @return [Fuzzyurl] Copy of `self` with the given parameters changed.
|
39
|
+
def with(params={})
|
40
|
+
fu = Fuzzyurl.new(self.to_hash)
|
41
|
+
(FIELDS & params.keys).each do |f|
|
42
|
+
fu.send("#{f}=", params[f].to_s)
|
43
|
+
end
|
44
|
+
fu
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a string representation of this Fuzzyurl.
|
48
|
+
#
|
49
|
+
# @return [String] String representation of this Fuzzyurl.
|
50
|
+
def to_s
|
51
|
+
Fuzzyurl::Strings.to_string(self)
|
52
|
+
end
|
53
|
+
|
54
|
+
# @private
|
55
|
+
def ==(other)
|
56
|
+
self.to_hash == other.to_hash
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
class << self
|
61
|
+
|
62
|
+
# Returns a Fuzzyurl suitable for use as a URL mask, with the given
|
63
|
+
# values optionally set from `params` (Hash or String).
|
64
|
+
#
|
65
|
+
# @param params [Hash|String|nil] Parameters to set.
|
66
|
+
# @return [Fuzzyurl] Fuzzyurl mask object.
|
67
|
+
def mask(params={})
|
68
|
+
params ||= {}
|
69
|
+
return from_string(params, default: "*") if params.kind_of?(String)
|
70
|
+
|
71
|
+
m = Fuzzyurl.new
|
72
|
+
FIELDS.each do |f|
|
73
|
+
m.send("#{f}=", params.has_key?(f) ? params[f].to_s : "*")
|
74
|
+
end
|
75
|
+
m
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns a string representation of `fuzzyurl`.
|
79
|
+
#
|
80
|
+
# @param fuzzyurl [Fuzzyurl] Fuzzyurl to convert to string.
|
81
|
+
# @return [String] String representation of `fuzzyurl`.
|
82
|
+
def to_string(fuzzyurl)
|
83
|
+
Fuzzyurl::Strings.to_string(fuzzyurl)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns a Fuzzyurl representation of the given URL string.
|
87
|
+
# Any fields not present in `str` will be assigned the value
|
88
|
+
# of `opts[:default]` (defaults to nil).
|
89
|
+
#
|
90
|
+
# @param str [String] String URL to convert to Fuzzyurl.
|
91
|
+
# @param opts [Hash|nil] Options.
|
92
|
+
# @return [Fuzzyurl] Fuzzyurl representation of `str`.
|
93
|
+
def from_string(str, opts={})
|
94
|
+
Fuzzyurl::Strings.from_string(str, opts)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns an integer representing how closely `mask` matches `url`
|
98
|
+
# (0 means wildcard match, higher is closer), or nil for no match.
|
99
|
+
#
|
100
|
+
# `mask` and `url` may each be Fuzzyurl or String format.
|
101
|
+
#
|
102
|
+
# @param mask [Fuzzyurl|String] URL mask.
|
103
|
+
# @param url [Fuzzyurl|String] URL.
|
104
|
+
# @return [Integer|nil] 0 for wildcard match, 1 for perfect match, or nil.
|
105
|
+
def match(mask, url)
|
106
|
+
m = mask.kind_of?(Fuzzyurl) ? mask : Fuzzyurl.mask(mask)
|
107
|
+
u = url.kind_of?(Fuzzyurl) ? url : Fuzzyurl.from_string(url)
|
108
|
+
Fuzzyurl::Match.match(m, u)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Returns true if `mask` matches `url`, false otherwise.
|
112
|
+
#
|
113
|
+
# `mask` and `url` may each be Fuzzyurl or String format.
|
114
|
+
#
|
115
|
+
# @param mask [Fuzzyurl|String] URL mask.
|
116
|
+
# @param url [Fuzzyurl|String] URL.
|
117
|
+
# @return [Boolean] Whether `mask` matches `url`.
|
118
|
+
def matches?(mask, url)
|
119
|
+
m = mask.kind_of?(Fuzzyurl) ? m : Fuzzyurl.mask(m)
|
120
|
+
u = url.kind_of?(Fuzzyurl) ? u : Fuzzyurl.from_string(u)
|
121
|
+
m = mask.kind_of?(Fuzzyurl) ? mask : Fuzzyurl.mask(mask)
|
122
|
+
u = url.kind_of?(Fuzzyurl) ? url : Fuzzyurl.from_string(url)
|
123
|
+
Fuzzyurl::Match.matches?(m, u)
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns a Hash of match scores for each field of `mask` and
|
127
|
+
# `url`, indicating the closeness of the match. Values are from
|
128
|
+
# `fuzzy_match`: 0 indicates wildcard match, 1 indicates perfect
|
129
|
+
# match, and nil indicates no match.
|
130
|
+
#
|
131
|
+
# `mask` and `url` may each be Fuzzyurl or String format.
|
132
|
+
#
|
133
|
+
# @param mask [Fuzzyurl|String] URL mask.
|
134
|
+
# @param url [Fuzzyurl|String] URL.
|
135
|
+
def match_scores(mask, url)
|
136
|
+
m = mask.kind_of?(Fuzzyurl) ? m : Fuzzyurl.mask(m)
|
137
|
+
u = url.kind_of?(Fuzzyurl) ? u : Fuzzyurl.from_string(u)
|
138
|
+
m = mask.kind_of?(Fuzzyurl) ? mask : Fuzzyurl.mask(mask)
|
139
|
+
u = url.kind_of?(Fuzzyurl) ? url : Fuzzyurl.from_string(url)
|
140
|
+
Fuzzyurl::Match.match_scores(m, u)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Given an array of URL masks, returns the array index of the one which
|
144
|
+
# most closely matches `url`, or nil if none match.
|
145
|
+
#
|
146
|
+
# `url` and each element of `masks` may be Fuzzyurl or String format.
|
147
|
+
#
|
148
|
+
# @param masks [Array] Array of URL masks.
|
149
|
+
# @param url [Fuzzyurl|String] URL.
|
150
|
+
# @return [Integer|nil] Array index of best-matching mask, or nil for no match.
|
151
|
+
def best_match_index(masks, url)
|
152
|
+
ms = masks.map {|m| m.kind_of?(Fuzzyurl) ? m : Fuzzyurl.mask(m)}
|
153
|
+
u = url.kind_of?(Fuzzyurl) ? url : Fuzzyurl.from_string(url)
|
154
|
+
Fuzzyurl::Match.best_match_index(ms, u)
|
155
|
+
end
|
156
|
+
|
157
|
+
# Given an array of URL masks, returns the one which
|
158
|
+
# most closely matches `url`, or nil if none match.
|
159
|
+
#
|
160
|
+
# `url` and each element of `masks` may be Fuzzyurl or String format.
|
161
|
+
#
|
162
|
+
# @param masks [Array] Array of URL masks.
|
163
|
+
# @param url [Fuzzyurl|String] URL.
|
164
|
+
# @return [Integer|nil] Best-matching given mask, or nil for no match.
|
165
|
+
def best_match(masks, url)
|
166
|
+
index = best_match_index(masks, url)
|
167
|
+
index && masks[index]
|
168
|
+
end
|
169
|
+
|
170
|
+
# If `mask` (which may contain * wildcards) matches `url` (which may not),
|
171
|
+
# returns 1 if `mask` and `url` match perfectly, 0 if `mask` and `url`
|
172
|
+
# are a wildcard match, or null otherwise.
|
173
|
+
#
|
174
|
+
# Wildcard language:
|
175
|
+
#
|
176
|
+
# * matches anything
|
177
|
+
# foo/* matches "foo/" and "foo/bar/baz" but not "foo"
|
178
|
+
# foo/** matches "foo/" and "foo/bar/baz" and "foo"
|
179
|
+
# *.example.com matches "api.v1.example.com" but not "example.com"
|
180
|
+
# **.example.com matches "api.v1.example.com" and "example.com"
|
181
|
+
#
|
182
|
+
# Any other form is treated as a literal match.
|
183
|
+
#
|
184
|
+
# @param mask [String] String mask to match with (may contain wildcards).
|
185
|
+
# @param value [String] String value to match.
|
186
|
+
# @returns [Integer|nil] 0 for wildcard match, 1 for perfect match, else nil.
|
187
|
+
def fuzzy_match(mask, value)
|
188
|
+
Fuzzyurl::Match.fuzzy_match(mask, value)
|
189
|
+
end
|
190
|
+
|
191
|
+
end # class << self
|
192
|
+
|
193
|
+
end
|
2
194
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fuzzyurl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pete Gamache
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -52,22 +52,33 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.13.3
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pry
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description:
|
60
70
|
email: pete@gamache.org
|
61
71
|
executables: []
|
62
72
|
extensions: []
|
63
73
|
extra_rdoc_files: []
|
64
74
|
files:
|
65
|
-
- lib/fuzzy_url.rb
|
66
|
-
- lib/fuzzy_url/matching.rb
|
67
|
-
- lib/fuzzy_url/url_components.rb
|
68
|
-
- lib/fuzzy_url/version.rb
|
69
75
|
- lib/fuzzyurl.rb
|
70
|
-
|
76
|
+
- lib/fuzzyurl/fields.rb
|
77
|
+
- lib/fuzzyurl/match.rb
|
78
|
+
- lib/fuzzyurl/protocols.rb
|
79
|
+
- lib/fuzzyurl/strings.rb
|
80
|
+
- lib/fuzzyurl/version.rb
|
81
|
+
homepage: https://github.com/gamache/fuzzyurl.rb
|
71
82
|
licenses:
|
72
83
|
- MIT
|
73
84
|
metadata: {}
|
@@ -79,7 +90,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
79
90
|
requirements:
|
80
91
|
- - ">="
|
81
92
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.
|
93
|
+
version: 1.9.3
|
83
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
95
|
requirements:
|
85
96
|
- - ">="
|
@@ -87,8 +98,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
98
|
version: '0'
|
88
99
|
requirements: []
|
89
100
|
rubyforge_project:
|
90
|
-
rubygems_version: 2.4.
|
101
|
+
rubygems_version: 2.4.8
|
91
102
|
signing_key:
|
92
103
|
specification_version: 4
|
93
|
-
summary:
|
104
|
+
summary: A library for non-strict parsing, construction, and wildcard-matching of
|
105
|
+
URLs.
|
94
106
|
test_files: []
|
data/lib/fuzzy_url/matching.rb
DELETED
@@ -1,120 +0,0 @@
|
|
1
|
-
class FuzzyURL
|
2
|
-
|
3
|
-
## FuzzyURL::Matching provides the logic for
|
4
|
-
module Matching
|
5
|
-
|
6
|
-
def self.included(klass)
|
7
|
-
klass.extend(ClassMethods)
|
8
|
-
end
|
9
|
-
|
10
|
-
module ClassMethods
|
11
|
-
|
12
|
-
## Compares a URL mask hash with a URL hash.
|
13
|
-
## Returns nil on negative match, and an integer match score otherwise.
|
14
|
-
## This match score is higher for more specific matches.
|
15
|
-
def match_hash(mask, url)
|
16
|
-
score = 0
|
17
|
-
tally = Proc.new {|x| return nil unless x; score += x}
|
18
|
-
|
19
|
-
tally.call match_hostnames(mask[:hostname], url[:hostname])
|
20
|
-
tally.call match_protocols_and_ports(mask, url)
|
21
|
-
tally.call match_paths(mask[:path], url[:path])
|
22
|
-
tally.call fuzzy_match(mask[:query], url[:query])
|
23
|
-
tally.call fuzzy_match(mask[:username], url[:username])
|
24
|
-
tally.call fuzzy_match(mask[:password], url[:password])
|
25
|
-
tally.call fuzzy_match(mask[:fragment], url[:fragment])
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
## Matches a URL mask hash against a URL hash.
|
31
|
-
## Returns true on positive match, false otherwise.
|
32
|
-
def matches_hash?(mask, url)
|
33
|
-
match_hash(mask, url) ? true : false
|
34
|
-
end
|
35
|
-
|
36
|
-
## Matches protocol and port information.
|
37
|
-
## Returns nil for no match, 0 if two wildcard matches were made, 1 if
|
38
|
-
## one wildcard match was made, and 2 for an exact match.
|
39
|
-
def match_protocols_and_ports(mask_hash, url_hash)
|
40
|
-
wildcard_matches = 0
|
41
|
-
mask_protocol = mask_hash[:protocol] || 'http'
|
42
|
-
url_protocol = url_hash[:protocol] || 'http'
|
43
|
-
if mask_hash[:protocol] && mask_protocol != '*'
|
44
|
-
return nil if mask_protocol != url_protocol
|
45
|
-
else
|
46
|
-
wildcard_matches += 1
|
47
|
-
end
|
48
|
-
|
49
|
-
mask_port = mask_hash[:port]
|
50
|
-
url_port = url_hash[:port] # || PORT_BY_PROTOCOL[url_protocol]
|
51
|
-
|
52
|
-
if !mask_port || mask_port == '*'
|
53
|
-
wildcard_matches += 1
|
54
|
-
elsif !url_port && PORT_BY_PROTOCOL[url_protocol] == mask_port.to_i
|
55
|
-
wildcard_matches += 1
|
56
|
-
elsif mask_port == url_port
|
57
|
-
## cool
|
58
|
-
else
|
59
|
-
## not cool
|
60
|
-
return nil
|
61
|
-
end
|
62
|
-
|
63
|
-
(2 - wildcard_matches)
|
64
|
-
end
|
65
|
-
|
66
|
-
PORT_BY_PROTOCOL = {
|
67
|
-
'http' => 80,
|
68
|
-
'https' => 443,
|
69
|
-
}
|
70
|
-
|
71
|
-
## Matches a picee of a mask against a piece of a URL. Handles wildcards.
|
72
|
-
## Returns nil for no match, 0 for a wildcard match, or 1 for an
|
73
|
-
## exact match.
|
74
|
-
def fuzzy_match(mask, piece)
|
75
|
-
return 0 if !mask || mask == '*' # || !piece
|
76
|
-
return 1 if mask == piece
|
77
|
-
nil
|
78
|
-
end
|
79
|
-
|
80
|
-
## Matches a hostname mask against a hostname.
|
81
|
-
## Returns nil for no match, 0 for a wildcard match, or 1 for an
|
82
|
-
## exact match.
|
83
|
-
def match_hostnames(mask, host)
|
84
|
-
mask_pieces = (mask || '').split('.').reverse
|
85
|
-
host_pieces = (host || '').split('.').reverse
|
86
|
-
return 1 if mask && host && mask_pieces==host_pieces
|
87
|
-
return 0 if match_pieces(mask_pieces, host_pieces, :ignore_depth => false)
|
88
|
-
nil
|
89
|
-
end
|
90
|
-
|
91
|
-
## Matches a path mask against a path.
|
92
|
-
## Returns nil for no match, 0 for a wildcard match, or 1 for an
|
93
|
-
## exact match.
|
94
|
-
def match_paths(mask, path)
|
95
|
-
mask = '/'+mask if mask && mask.index('/') != 0
|
96
|
-
path = '/'+path if path && path.index('/') != 0
|
97
|
-
mask_pieces = (mask || '*').split(%r{/})
|
98
|
-
path_pieces = (path || '/').split(%r{/})
|
99
|
-
return 1 if mask && path && mask_pieces==path_pieces
|
100
|
-
return 0 if match_pieces(mask_pieces, path_pieces, :ignore_depth => true)
|
101
|
-
nil
|
102
|
-
end
|
103
|
-
|
104
|
-
## Matches arrays of URL or hostname pieces.
|
105
|
-
## Returns nil for no match, 0 for a wildcard match, or 1 for an
|
106
|
-
## exact match.
|
107
|
-
def match_pieces(mask, pieces, args)
|
108
|
-
ignore_depth = args[:ignore_depth]
|
109
|
-
return nil if !ignore_depth && mask.count > pieces.count
|
110
|
-
pieces.each_with_index do |piece, i|
|
111
|
-
return 0 if piece && mask[i] == '*'
|
112
|
-
return nil if mask[i] != piece
|
113
|
-
end
|
114
|
-
1
|
115
|
-
end
|
116
|
-
|
117
|
-
end
|
118
|
-
|
119
|
-
end
|
120
|
-
end
|
@@ -1,91 +0,0 @@
|
|
1
|
-
class FuzzyURL
|
2
|
-
|
3
|
-
## FuzzyURL::URLComponents provides getting/setting of URL components
|
4
|
-
## on FuzzyURL objects in hash style (e.g. `foo[:hostname]`) and
|
5
|
-
## method style (e.g. `foo.hostname`). Acceptable URL components are
|
6
|
-
## :protocol, :username, :password, :hostname, :port, :path, :query,
|
7
|
-
## and :fragment.
|
8
|
-
module URLComponents
|
9
|
-
|
10
|
-
COMPONENTS = [:protocol, :username, :password, :hostname,
|
11
|
-
:port, :path, :query, :fragment]
|
12
|
-
|
13
|
-
## Gets a URL component.
|
14
|
-
def [](component)
|
15
|
-
component_sym = component.to_sym
|
16
|
-
if !COMPONENTS.include?(component_sym)
|
17
|
-
raise ArgumentError, "#{component.inspect} is not a URL component. "+
|
18
|
-
COMPONENTS.inspect
|
19
|
-
end
|
20
|
-
@components[component_sym]
|
21
|
-
end
|
22
|
-
|
23
|
-
## Sets a URL component.
|
24
|
-
def []=(component, value)
|
25
|
-
component_sym = component.to_sym
|
26
|
-
if !COMPONENTS.include?(component_sym)
|
27
|
-
raise ArgumentError, "#{component.inspect} is not a URL component. "+
|
28
|
-
COMPONENTS.inspect
|
29
|
-
end
|
30
|
-
@components[component_sym] = value
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
## Get the protocol for this FuzzyURL.
|
35
|
-
def protocol; self[:protocol] end
|
36
|
-
|
37
|
-
## Set the protocol for this FuzzyURL.
|
38
|
-
def protocol=(v); self[:protocol]=v end
|
39
|
-
|
40
|
-
|
41
|
-
## Get the username for this FuzzyURL.
|
42
|
-
def username; self[:username] end
|
43
|
-
|
44
|
-
## Set the username for this FuzzyURL.
|
45
|
-
def username=(v); self[:username]=v end
|
46
|
-
|
47
|
-
|
48
|
-
## Get the password for this FuzzyURL.
|
49
|
-
def password; self[:password] end
|
50
|
-
|
51
|
-
## Set the password for this FuzzyURL.
|
52
|
-
def password=(v); self[:password]=v end
|
53
|
-
|
54
|
-
|
55
|
-
## Get the hostname for this FuzzyURL.
|
56
|
-
def hostname; self[:hostname] end
|
57
|
-
|
58
|
-
## Set the hostname for this FuzzyURL.
|
59
|
-
def hostname=(v); self[:hostname]=v end
|
60
|
-
|
61
|
-
|
62
|
-
## Get the port for this FuzzyURL.
|
63
|
-
def port; self[:port] end
|
64
|
-
|
65
|
-
## Set the port for this FuzzyURL.
|
66
|
-
def port=(v); self[:port]=v end
|
67
|
-
|
68
|
-
|
69
|
-
## Get the path for this FuzzyURL.
|
70
|
-
def path; self[:path] end
|
71
|
-
|
72
|
-
## Set the path for this FuzzyURL.
|
73
|
-
def path=(v); self[:path]=v end
|
74
|
-
|
75
|
-
|
76
|
-
## Get the query for this FuzzyURL.
|
77
|
-
def query; self[:query] end
|
78
|
-
|
79
|
-
## Set the query for this FuzzyURL.
|
80
|
-
def query=(v); self[:query]=v end
|
81
|
-
|
82
|
-
|
83
|
-
## Get the fragment for this FuzzyURL.
|
84
|
-
def fragment; self[:fragment] end
|
85
|
-
|
86
|
-
## Set the fragment for this FuzzyURL.
|
87
|
-
def fragment=(v); self[:fragment]=v end
|
88
|
-
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
data/lib/fuzzy_url/version.rb
DELETED
data/lib/fuzzy_url.rb
DELETED
@@ -1,210 +0,0 @@
|
|
1
|
-
require 'fuzzy_url/version'
|
2
|
-
require 'fuzzy_url/matching'
|
3
|
-
require 'fuzzy_url/url_components'
|
4
|
-
require 'pp'
|
5
|
-
|
6
|
-
## FuzzyURL is a class to represent URLs and URL-like things. FuzzyURL aids
|
7
|
-
## in the manipulation and matching of URLs by providing non-strict parsing,
|
8
|
-
## wildcard matching, ranked matching, `#to_s`, and more.
|
9
|
-
##
|
10
|
-
## Example usage:
|
11
|
-
##
|
12
|
-
## ```
|
13
|
-
## require 'fuzzyurl'
|
14
|
-
## fuzzy_url = FuzzyURL.new('http://example.com/*')
|
15
|
-
## fuzzy_url.matches?('http://example.com') # => true
|
16
|
-
## fuzzy_url.matches?('http://example.com/a/b/c') # => true
|
17
|
-
## fuzzy_url.matches?('https://example.com') # => false
|
18
|
-
## fuzzy_url.matches?('http://foobar.com') # => false
|
19
|
-
## ```
|
20
|
-
##
|
21
|
-
## It is important to note that FuzzyURL is not a URL validator! It performs
|
22
|
-
## lenient matching of URLs and URL-like things that look like the following:
|
23
|
-
##
|
24
|
-
## ```
|
25
|
-
## [protocol ://] [username [: password] @] [hostname] [: port] [/ path] [? query] [# fragment]
|
26
|
-
## ```
|
27
|
-
##
|
28
|
-
## In a FuzzyURL, any part of the above may be replaced with a `*` character
|
29
|
-
## to match anything.
|
30
|
-
##
|
31
|
-
## In a hostname, the leftmost label of the host (e.g., the `xyz`
|
32
|
-
## in `xyz.us.example.com`) may be replaced with a `*` character
|
33
|
-
## (e.g., `*.us.example.com`) in order to match domains like
|
34
|
-
## `xxx.us.example.com` and `yyy.zzz.us.example.com`, but not `us.example.com`.
|
35
|
-
##
|
36
|
-
## In a path, a `*` character may be placed after the last `/` path separator
|
37
|
-
## (e.g., `/a/b/*`) in order to match paths like `/a/b` and `/a/b/c/d`,
|
38
|
-
## but not `/a/bcde`.
|
39
|
-
|
40
|
-
class FuzzyURL
|
41
|
-
include FuzzyURL::Matching
|
42
|
-
include FuzzyURL::URLComponents
|
43
|
-
|
44
|
-
|
45
|
-
## Creates a new FuzzyURL with the given URL or URL-like object of type
|
46
|
-
## String, Hash, or FuzzyURL.
|
47
|
-
## Acceptable hash keys are :protocol, :username, :password, :hostname,
|
48
|
-
## :port, :path, :query, and :fragment. Hash keys other than these are
|
49
|
-
## ignored.
|
50
|
-
def initialize(url='')
|
51
|
-
default_components = {:protocol=>nil, :username=>nil, :password=>nil,
|
52
|
-
:hostname=>nil, :port=>nil, :path=>nil,
|
53
|
-
:query=>nil, :fragment=>nil}
|
54
|
-
case url
|
55
|
-
when String
|
56
|
-
unless hash = self.class.url_to_hash(url)
|
57
|
-
raise ArgumentError, "Bad url URL: #{url.inspect}"
|
58
|
-
end
|
59
|
-
@components = default_components.merge(hash)
|
60
|
-
when Hash, FuzzyURL
|
61
|
-
@components = default_components.merge(url.to_hash)
|
62
|
-
else
|
63
|
-
raise ArgumentError, "url must be a String, Hash, or FuzzyURL; got #{url.inspect}"
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
## Matches the given URL string, hash, or FuzzyURL against this FuzzyURL.
|
68
|
-
## Returns nil on negative match, and an integer match score otherwise.
|
69
|
-
## This match score is higher for more specific matches.
|
70
|
-
def match(url)
|
71
|
-
case url
|
72
|
-
when String
|
73
|
-
self.class.match_hash(self.to_hash, self.class.url_to_hash(url))
|
74
|
-
when Hash, FuzzyURL
|
75
|
-
self.class.match_hash(self.to_hash, url.to_hash)
|
76
|
-
else
|
77
|
-
raise ArgumentError, "url must be a String, Hash, or FuzzyURL; got #{url.inspect}"
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
## Matches the given URL string, hash, or FuzzyURL against this FuzzyURL.
|
82
|
-
## Returns true on positive match, false otherwise.
|
83
|
-
def matches?(url)
|
84
|
-
match(url) ? true : false
|
85
|
-
end
|
86
|
-
|
87
|
-
## Returns this FuzzyURL's hash form.
|
88
|
-
def to_hash
|
89
|
-
Hash[@components]
|
90
|
-
end
|
91
|
-
|
92
|
-
## Returns this FuzzyURL's string form.
|
93
|
-
def to_s
|
94
|
-
self.class.hash_to_url(@components)
|
95
|
-
end
|
96
|
-
|
97
|
-
|
98
|
-
class << self
|
99
|
-
|
100
|
-
## Given a URL, returns a hash containing :protocol, :username, :password,
|
101
|
-
## :hostname, :port, :path, :query, and :fragment fields (all String
|
102
|
-
## or nil).
|
103
|
-
## Accepts `*` in place of any of the above fields, or as part of hostname
|
104
|
-
## or path.
|
105
|
-
## Returns nil if given a malformed URL.
|
106
|
-
##
|
107
|
-
## Example:
|
108
|
-
##
|
109
|
-
## ```
|
110
|
-
## FuzzyURL.url_to_hash('http://user:pass@example.com:8080/some/path/?foo=bar&baz=1#url-fragment')
|
111
|
-
## # => {:protocol=>"http", :username=>"user", :password=>"pass", :hostname=>"example.com", :port=>8080, :path=>"/some/path/", :query=>"foo=bar&baz=1", :fragment=>"url-fragment"}
|
112
|
-
## ```
|
113
|
-
|
114
|
-
def url_to_hash(url)
|
115
|
-
if m = url.match(%r{
|
116
|
-
^
|
117
|
-
|
118
|
-
(?: (\* | [a-zA-Z][A-Za-z+.-]+) ://)? ## m[1] is protocol
|
119
|
-
|
120
|
-
(?: (\* | [a-zA-Z0-9%_.!~*'();&=+$,-]+) ## m[2] is username
|
121
|
-
(?: : (\* | [a-zA-Z0-9%_.!~*'();&=+$,-]*))? ## m[3] is password
|
122
|
-
@
|
123
|
-
)?
|
124
|
-
|
125
|
-
([a-zA-Z0-9\.\*\-]+?)? ## m[4] is hostname
|
126
|
-
|
127
|
-
(?: : (\* | \d+))? ## m[5] is port
|
128
|
-
|
129
|
-
(/ [^\?\#]*)? ## m[6] is path
|
130
|
-
## captures leading /
|
131
|
-
|
132
|
-
(?: \? ([^\#]*) )? ## m[7] is query
|
133
|
-
|
134
|
-
(?: \# (.*) )? ## m[8] is fragment
|
135
|
-
|
136
|
-
$
|
137
|
-
}x)
|
138
|
-
|
139
|
-
protocol = m[1] ? m[1].downcase : nil
|
140
|
-
username = m[2]
|
141
|
-
password = m[3]
|
142
|
-
hostname = m[4] ? m[4].downcase : nil
|
143
|
-
port = m[5] ? (m[5] == '*' ? '*' : m[5].to_i) : nil
|
144
|
-
path = m[6]
|
145
|
-
query = m[7]
|
146
|
-
fragment = m[8]
|
147
|
-
|
148
|
-
{ :protocol => protocol,
|
149
|
-
:username => username,
|
150
|
-
:password => password,
|
151
|
-
:hostname => hostname,
|
152
|
-
:port => port,
|
153
|
-
:path => path,
|
154
|
-
:query => query,
|
155
|
-
:fragment => fragment }
|
156
|
-
|
157
|
-
else ## no match
|
158
|
-
nil
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
## Given a hash containing :protocol, :username, :password,
|
163
|
-
## :hostname, :port, :path, :query, and :fragment fields (all String
|
164
|
-
## or nil), return a URL string containing these elements.
|
165
|
-
def hash_to_url(hash)
|
166
|
-
url = ''
|
167
|
-
url << "#{ hash[:protocol] }://" if hash[:protocol]
|
168
|
-
if hash[:username]
|
169
|
-
url << "#{hash[:username]}"
|
170
|
-
url << ":#{hash[:password]}" if hash[:password]
|
171
|
-
url << '@'
|
172
|
-
end
|
173
|
-
url << "#{hash[:hostname]}" if hash[:hostname]
|
174
|
-
url << ":#{hash[:port]}" if hash[:port]
|
175
|
-
|
176
|
-
## make sure path starts with a / if it's defined
|
177
|
-
path = hash[:path]
|
178
|
-
path = "/#{path}" if path && path.index('/') != 0
|
179
|
-
url << "#{path}"
|
180
|
-
|
181
|
-
url << "?#{hash[:query]}" if hash[:query]
|
182
|
-
url << "##{hash[:fragment]}" if hash[:fragment]
|
183
|
-
url
|
184
|
-
end
|
185
|
-
|
186
|
-
## Matches a URL mask string with a URL string.
|
187
|
-
## Raises ArgumentError when given malformed URLs.
|
188
|
-
## Returns true on positive match, false otherwise.
|
189
|
-
def matches?(mask, url)
|
190
|
-
match(mask, url) ? true : false
|
191
|
-
end
|
192
|
-
|
193
|
-
## Matches a URL mask string with a URL string.
|
194
|
-
## Raises ArgumentError when given malformed URLs.
|
195
|
-
## Returns nil on negative match, and an integer match score otherwise.
|
196
|
-
## This match score is higher for more specific matches.
|
197
|
-
def match(mask, url)
|
198
|
-
unless mask_hash = url_to_hash(mask)
|
199
|
-
raise ArgumentError, "Badly formed URL mask: #{mask.inspect}"
|
200
|
-
end
|
201
|
-
unless url_hash = url_to_hash(url)
|
202
|
-
raise ArgumentError, "Badly formed URL: #{url.inspect}"
|
203
|
-
end
|
204
|
-
match_hash(mask_hash, url_hash)
|
205
|
-
end
|
206
|
-
|
207
|
-
end # class << self
|
208
|
-
|
209
|
-
end
|
210
|
-
|