rfeedparser 0.9.87 → 0.9.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rfeedparser.rb +37 -3191
- data/lib/rfeedparser/aliases.rb +432 -0
- data/lib/rfeedparser/better_attributelist.rb +41 -0
- data/lib/rfeedparser/better_sgmlparser.rb +264 -0
- data/lib/rfeedparser/encoding_helpers.rb +257 -0
- data/lib/rfeedparser/feedparserdict.rb +93 -0
- data/lib/rfeedparser/forgiving_uri.rb +947 -0
- data/lib/rfeedparser/markup_helpers.rb +74 -0
- data/lib/rfeedparser/parser_mixin.rb +1235 -0
- data/lib/rfeedparser/parsers.rb +177 -0
- data/lib/rfeedparser/scrub.rb +207 -0
- data/lib/rfeedparser/time_helpers.rb +408 -0
- data/tests/rfeedparsertest.rb +3 -1
- metadata +3270 -3249
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
module FeedParserUtilities
|
|
3
|
+
class FeedParserDict < Hash
|
|
4
|
+
=begin
|
|
5
|
+
The naming of a certain common attribute (such as, "When was the last
|
|
6
|
+
time this feed was updated?") can have many different names depending
|
|
7
|
+
on the type of feed we are handling. This class allows us to satisfy
|
|
8
|
+
the expectations of both the developer who has prior knowledge of the
|
|
9
|
+
feed type as well as the developer who wants a consistent application
|
|
10
|
+
interface.
|
|
11
|
+
|
|
12
|
+
@@keymap is a Hash that contains information on what a certain
|
|
13
|
+
attribute names "really are" in each kind of feed. It does this by
|
|
14
|
+
providing a common name that will map to any feed type in the keys,
|
|
15
|
+
with possible "correct" attributes in the its values. the #[] and #[]=
|
|
16
|
+
methods check with keymaps to see what attribute the developer "really
|
|
17
|
+
means" if they've asked for one which happens to be in @@keymap's keys.
|
|
18
|
+
=end
|
|
19
|
+
@@keymap = {'channel' => 'feed',
|
|
20
|
+
'items' => 'entries',
|
|
21
|
+
'guid' => 'id',
|
|
22
|
+
'date' => 'updated',
|
|
23
|
+
'date_parsed' => 'updated_parsed',
|
|
24
|
+
'description' => ['subtitle', 'summary'],
|
|
25
|
+
'url' => ['href'],
|
|
26
|
+
'modified' => 'updated',
|
|
27
|
+
'modified_parsed' => 'updated_parsed',
|
|
28
|
+
'issued' => 'published',
|
|
29
|
+
'issued_parsed' => 'published_parsed',
|
|
30
|
+
'copyright' => 'rights',
|
|
31
|
+
'copyright_detail' => 'rights_detail',
|
|
32
|
+
'tagline' => 'subtitle',
|
|
33
|
+
'tagline_detail' => 'subtitle_detail'}
|
|
34
|
+
|
|
35
|
+
def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
|
|
36
|
+
return self['entries']
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# We could include the [] rewrite in new using Hash.new's fancy pants block thing
|
|
40
|
+
# but we'd still have to overwrite []= and such.
|
|
41
|
+
# I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
|
|
42
|
+
def initialize(pairs=nil)
|
|
43
|
+
if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
|
|
44
|
+
pairs.each do |l|
|
|
45
|
+
k,v = l
|
|
46
|
+
self[k] = v
|
|
47
|
+
end
|
|
48
|
+
elsif pairs.class == Hash
|
|
49
|
+
self.merge!(pairs)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def [](key)
|
|
54
|
+
if key == 'category'
|
|
55
|
+
return self['tags'][0]['term']
|
|
56
|
+
end
|
|
57
|
+
if key == 'categories'
|
|
58
|
+
return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
|
|
59
|
+
end
|
|
60
|
+
realkey = @@keymap[key] || key
|
|
61
|
+
if realkey.class == Array
|
|
62
|
+
realkey.each{ |key| return self[key] if has_key?key }
|
|
63
|
+
end
|
|
64
|
+
# Note that the original key is preferred over the realkey we (might
|
|
65
|
+
# have) found in @@keymap
|
|
66
|
+
if has_key?(key)
|
|
67
|
+
return super(key)
|
|
68
|
+
end
|
|
69
|
+
return super(realkey)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def []=(key,value)
|
|
73
|
+
if @@keymap.key?key
|
|
74
|
+
key = @@keymap[key]
|
|
75
|
+
if key.class == Array
|
|
76
|
+
key = key[0]
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
super(key,value)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def method_missing(msym, *args)
|
|
83
|
+
methodname = msym.to_s
|
|
84
|
+
if methodname[-1] == '='
|
|
85
|
+
return self[methodname[0..-2]] = args[0]
|
|
86
|
+
elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private?
|
|
87
|
+
return self[methodname]
|
|
88
|
+
else
|
|
89
|
+
raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,947 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
# From Robert Aman's GentleCMS URI.
|
|
3
|
+
# GentleCMS, Copyright (c) 2006 Robert Aman
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+
# a copy of this software and associated documentation files (the
|
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+
# the following conditions:
|
|
12
|
+
#
|
|
13
|
+
# The above copyright notice and this permission notice shall be
|
|
14
|
+
# included in all copies or substantial portions of the Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
23
|
+
|
|
24
|
+
# This is an implementation of a URI parser based on RFC 3986.
|
|
25
|
+
class ForgivingURI
|
|
26
|
+
# Raised if something other than a uri is supplied.
|
|
27
|
+
class InvalidURIError < StandardError
|
|
28
|
+
end
|
|
29
|
+
# Raised if an invalid method option is supplied.
|
|
30
|
+
class InvalidOptionError < StandardError
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns a URI object based on the parsed string.
|
|
34
|
+
def self.parse(uri_string)
|
|
35
|
+
return nil if uri_string.nil?
|
|
36
|
+
|
|
37
|
+
# If a URI object is passed, just return itself.
|
|
38
|
+
return uri_string if uri_string.kind_of?(self)
|
|
39
|
+
|
|
40
|
+
# If a URI object of the Ruby standard library variety is passed,
|
|
41
|
+
# convert it to a string, then parse the string.
|
|
42
|
+
if uri_string.class.name =~ /^URI::/
|
|
43
|
+
uri_string = uri_string.to_s
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
uri_regex =
|
|
47
|
+
/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
|
|
48
|
+
scan = uri_string.scan(uri_regex)
|
|
49
|
+
fragments = scan[0]
|
|
50
|
+
return nil if fragments.nil?
|
|
51
|
+
scheme = fragments[1]
|
|
52
|
+
authority = fragments[3]
|
|
53
|
+
path = fragments[4]
|
|
54
|
+
query = fragments[6]
|
|
55
|
+
fragment = fragments[8]
|
|
56
|
+
userinfo = nil
|
|
57
|
+
host = nil
|
|
58
|
+
port = nil
|
|
59
|
+
if authority != nil
|
|
60
|
+
userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
|
|
61
|
+
host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
|
|
62
|
+
port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
|
|
63
|
+
end
|
|
64
|
+
if port.nil? || port == ""
|
|
65
|
+
port = nil
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# WARNING: Not standards-compliant, but follows the theme
|
|
69
|
+
# of Postel's law:
|
|
70
|
+
#
|
|
71
|
+
# Special exception for dealing with the retarded idea of the
|
|
72
|
+
# feed pseudo-protocol. Without this exception, the parser will read
|
|
73
|
+
# the URI as having a blank port number, instead of as having a second
|
|
74
|
+
# URI embedded within. This exception translates these broken URIs
|
|
75
|
+
# and instead treats the inner URI as opaque.
|
|
76
|
+
if scheme == "feed" && host == "http"
|
|
77
|
+
userinfo = nil
|
|
78
|
+
host = nil
|
|
79
|
+
port = nil
|
|
80
|
+
path = authority + path
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
return ForgivingURI.new(scheme, userinfo, host, port, path, query, fragment)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Converts a path to a file protocol URI. If the path supplied is
|
|
87
|
+
# relative, it will be returned as a relative URI. If the path supplied
|
|
88
|
+
# is actually a URI, it will return the parsed URI.
|
|
89
|
+
def self.convert_path(path)
|
|
90
|
+
return nil if path.nil?
|
|
91
|
+
|
|
92
|
+
converted_uri = path.strip
|
|
93
|
+
if converted_uri.length > 0 && converted_uri[0..0] == "/"
|
|
94
|
+
converted_uri = "file://" + converted_uri
|
|
95
|
+
end
|
|
96
|
+
if converted_uri.length > 0 &&
|
|
97
|
+
converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
|
|
98
|
+
converted_uri = "file:///" + converted_uri
|
|
99
|
+
end
|
|
100
|
+
converted_uri.gsub!(/^file:\/*/i, "file:///")
|
|
101
|
+
if converted_uri =~ /^file:/i
|
|
102
|
+
# Adjust windows-style uris
|
|
103
|
+
converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
|
|
104
|
+
converted_uri.gsub!(/\\/, '/')
|
|
105
|
+
converted_uri = self.parse(converted_uri).normalize
|
|
106
|
+
if File.exists?(converted_uri.path) &&
|
|
107
|
+
File.stat(converted_uri.path).directory?
|
|
108
|
+
converted_uri.path.gsub!(/\/$/, "")
|
|
109
|
+
converted_uri.path = converted_uri.path + '/'
|
|
110
|
+
end
|
|
111
|
+
else
|
|
112
|
+
converted_uri = self.parse(converted_uri)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
return converted_uri
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Joins several uris together.
|
|
119
|
+
def self.join(*uris)
|
|
120
|
+
uri_objects = uris.collect do |uri|
|
|
121
|
+
uri.kind_of?(self) ? uri : self.parse(uri.to_s)
|
|
122
|
+
end
|
|
123
|
+
result = uri_objects.shift.dup
|
|
124
|
+
for uri in uri_objects
|
|
125
|
+
result.merge!(uri)
|
|
126
|
+
end
|
|
127
|
+
return result
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Correctly escapes a uri.
|
|
131
|
+
def self.escape(uri)
|
|
132
|
+
uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
|
|
133
|
+
return ForgivingURI.new(
|
|
134
|
+
uri_object.scheme,
|
|
135
|
+
uri_object.userinfo,
|
|
136
|
+
uri_object.host,
|
|
137
|
+
uri_object.specified_port,
|
|
138
|
+
self.normalize_escaping(uri_object.path),
|
|
139
|
+
self.normalize_escaping(uri_object.query),
|
|
140
|
+
self.normalize_escaping(uri_object.fragment)
|
|
141
|
+
).to_s
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Extracts uris from an arbitrary body of text.
|
|
145
|
+
def self.extract(text, options={})
|
|
146
|
+
defaults = {:base => nil, :parse => false}
|
|
147
|
+
options = defaults.merge(options)
|
|
148
|
+
raise InvalidOptionError unless (options.keys - defaults.keys).empty?
|
|
149
|
+
# This regular expression needs to be less forgiving or else it would
|
|
150
|
+
# match virtually all text. Which isn't exactly what we're going for.
|
|
151
|
+
extract_regex = /((([a-z\+]+):)[^ \n\<\>\"\\]+[\w\/])/
|
|
152
|
+
extracted_uris =
|
|
153
|
+
text.scan(extract_regex).collect { |match| match[0] }
|
|
154
|
+
sgml_extract_regex = /<[^>]+href=\"([^\"]+?)\"[^>]*>/
|
|
155
|
+
sgml_extracted_uris =
|
|
156
|
+
text.scan(sgml_extract_regex).collect { |match| match[0] }
|
|
157
|
+
extracted_uris.concat(sgml_extracted_uris - extracted_uris)
|
|
158
|
+
textile_extract_regex = /\".+?\":([^ ]+\/[^ ]+)[ \,\.\;\:\?\!\<\>\"]/i
|
|
159
|
+
textile_extracted_uris =
|
|
160
|
+
text.scan(textile_extract_regex).collect { |match| match[0] }
|
|
161
|
+
extracted_uris.concat(textile_extracted_uris - extracted_uris)
|
|
162
|
+
parsed_uris = []
|
|
163
|
+
base_uri = nil
|
|
164
|
+
if options[:base] != nil
|
|
165
|
+
base_uri = options[:base] if options[:base].kind_of?(self)
|
|
166
|
+
base_uri = self.parse(options[:base].to_s) if base_uri == nil
|
|
167
|
+
end
|
|
168
|
+
for uri_string in extracted_uris
|
|
169
|
+
begin
|
|
170
|
+
if base_uri == nil
|
|
171
|
+
parsed_uris << self.parse(uri_string)
|
|
172
|
+
else
|
|
173
|
+
parsed_uris << (base_uri + self.parse(uri_string))
|
|
174
|
+
end
|
|
175
|
+
rescue Exception
|
|
176
|
+
nil
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
parsed_uris.reject! do |uri|
|
|
180
|
+
(uri.scheme =~ /T\d+/ ||
|
|
181
|
+
uri.scheme == "xmlns" ||
|
|
182
|
+
uri.scheme == "xml" ||
|
|
183
|
+
uri.scheme == "thr" ||
|
|
184
|
+
uri.scheme == "this" ||
|
|
185
|
+
uri.scheme == "float" ||
|
|
186
|
+
uri.scheme == "user" ||
|
|
187
|
+
uri.scheme == "username" ||
|
|
188
|
+
uri.scheme == "out")
|
|
189
|
+
end
|
|
190
|
+
if options[:parse]
|
|
191
|
+
return parsed_uris
|
|
192
|
+
else
|
|
193
|
+
return parsed_uris.collect { |uri| uri.to_s }
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Creates a new uri object from component parts. Passing nil for
|
|
198
|
+
# any of these parameters is acceptable.
|
|
199
|
+
def initialize(scheme, userinfo, host, port, path, query, fragment)
|
|
200
|
+
assign_components(scheme, userinfo, host, port, path, query, fragment)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Returns the scheme (protocol) for this URI.
|
|
204
|
+
def scheme
|
|
205
|
+
return nil if @scheme.nil? || @scheme.strip == ""
|
|
206
|
+
return @scheme
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Sets the scheme (protocol for this URI.)
|
|
210
|
+
def scheme=(new_scheme)
|
|
211
|
+
@scheme = new_scheme
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Returns the username and password segment of this URI.
|
|
215
|
+
def userinfo
|
|
216
|
+
return @userinfo
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Sets the username and password segment of this URI.
|
|
220
|
+
def userinfo=(new_userinfo)
|
|
221
|
+
@userinfo = new_userinfo
|
|
222
|
+
@authority = nil
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Returns the host for this URI.
|
|
226
|
+
def host
|
|
227
|
+
return @host
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Sets the host for this URI.
|
|
231
|
+
def host=(new_host)
|
|
232
|
+
@host = new_host
|
|
233
|
+
@authority = nil
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Returns the authority segment of this URI.
|
|
237
|
+
def authority
|
|
238
|
+
if !defined?(@authority) || @authority.nil?
|
|
239
|
+
return nil if self.host.nil?
|
|
240
|
+
@authority = ""
|
|
241
|
+
if self.userinfo != nil
|
|
242
|
+
@authority << "#{self.userinfo}@"
|
|
243
|
+
end
|
|
244
|
+
@authority << self.host
|
|
245
|
+
if self.specified_port != nil
|
|
246
|
+
@authority << ":#{self.specified_port}"
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
return @authority
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Sets the authority segment of this URI.
|
|
253
|
+
def authority=(new_authority)
|
|
254
|
+
@authority = new_authority
|
|
255
|
+
new_userinfo = nil
|
|
256
|
+
new_host = nil
|
|
257
|
+
new_port = nil
|
|
258
|
+
if new_authority != nil
|
|
259
|
+
new_userinfo = new_authority.scan(/^([^\[\]]*)@/).flatten[0]
|
|
260
|
+
new_host =
|
|
261
|
+
new_authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
|
|
262
|
+
new_port = new_authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
|
|
263
|
+
end
|
|
264
|
+
if new_port.nil? || new_port == ""
|
|
265
|
+
new_port = nil
|
|
266
|
+
end
|
|
267
|
+
@userinfo = new_userinfo
|
|
268
|
+
@host = new_host
|
|
269
|
+
@port = nil
|
|
270
|
+
@specified_port = new_port
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Returns the user for this URI.
|
|
274
|
+
def user
|
|
275
|
+
if !defined?(@user) || @user.nil?
|
|
276
|
+
@user = nil
|
|
277
|
+
return @user if @userinfo.nil?
|
|
278
|
+
if @userinfo =~ /:/
|
|
279
|
+
@user = @userinfo.strip.scan(/^(.*):/).flatten[0].strip
|
|
280
|
+
else
|
|
281
|
+
@user = @userinfo.dup
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
return @user
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Sets the user for this URI.
|
|
288
|
+
def user=(new_user)
|
|
289
|
+
current_password = self.password
|
|
290
|
+
@user = new_user
|
|
291
|
+
if new_user == nil && current_password == nil
|
|
292
|
+
@userinfo = nil
|
|
293
|
+
elsif new_user == nil && current_password != nil
|
|
294
|
+
@userinfo = ":#{current_password}"
|
|
295
|
+
elsif new_user != nil && current_password == nil
|
|
296
|
+
@userinfo = "#{new_user}"
|
|
297
|
+
elsif new_user != nil && current_password != nil
|
|
298
|
+
@userinfo = "#{new_user}:#{current_password}"
|
|
299
|
+
end
|
|
300
|
+
@user = nil
|
|
301
|
+
@password = nil
|
|
302
|
+
@authority = nil
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Returns the password for this URI.
|
|
306
|
+
def password
|
|
307
|
+
if !defined?(@password) || @password.nil?
|
|
308
|
+
@password = nil
|
|
309
|
+
return @password if @userinfo.nil?
|
|
310
|
+
if @userinfo =~ /:/
|
|
311
|
+
@password = @userinfo.strip.scan(/:(.*)$/).flatten[0].strip
|
|
312
|
+
else
|
|
313
|
+
return nil
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
return @password
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Sets the password for this URI.
|
|
320
|
+
def password=(new_password)
|
|
321
|
+
current_user = self.user
|
|
322
|
+
@password = new_password
|
|
323
|
+
if current_user == nil && new_password == nil
|
|
324
|
+
@userinfo = nil
|
|
325
|
+
elsif current_user == nil && new_password != nil
|
|
326
|
+
@userinfo = ":#{new_password}"
|
|
327
|
+
elsif current_user != nil && new_password == nil
|
|
328
|
+
@userinfo = "#{current_user}"
|
|
329
|
+
elsif current_user != nil && new_password != nil
|
|
330
|
+
@userinfo = "#{current_user}:#{new_password}"
|
|
331
|
+
end
|
|
332
|
+
@user = nil
|
|
333
|
+
@authority = nil
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# Returns an array of known ip-based schemes. These schemes typically
|
|
337
|
+
# use a similar URI form:
|
|
338
|
+
# //<user>:<password>@<host>:<port>/<url-path>
|
|
339
|
+
def self.ip_based_schemes
|
|
340
|
+
return self.scheme_mapping.keys
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Returns a hash of common IP-based schemes and their default port
|
|
344
|
+
# numbers. Adding new schemes to this hash, as necessary, will allow
|
|
345
|
+
# for better URI normalization.
|
|
346
|
+
def self.scheme_mapping
|
|
347
|
+
if !defined?(@protocol_mapping) || @protocol_mapping.nil?
|
|
348
|
+
@protocol_mapping = {
|
|
349
|
+
"http" => 80,
|
|
350
|
+
"https" => 443,
|
|
351
|
+
"ftp" => 21,
|
|
352
|
+
"tftp" => 69,
|
|
353
|
+
"ssh" => 22,
|
|
354
|
+
"svn+ssh" => 22,
|
|
355
|
+
"telnet" => 23,
|
|
356
|
+
"nntp" => 119,
|
|
357
|
+
"gopher" => 70,
|
|
358
|
+
"wais" => 210,
|
|
359
|
+
"prospero" => 1525
|
|
360
|
+
}
|
|
361
|
+
end
|
|
362
|
+
return @protocol_mapping
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Returns the port number for this URI. This method will normalize to the
|
|
366
|
+
# default port for the URI's scheme if the port isn't explicitly specified
|
|
367
|
+
# in the URI.
|
|
368
|
+
def port
|
|
369
|
+
if @port.to_i == 0
|
|
370
|
+
if self.scheme.nil?
|
|
371
|
+
@port = nil
|
|
372
|
+
else
|
|
373
|
+
@port = self.class.scheme_mapping[self.scheme.strip.downcase]
|
|
374
|
+
end
|
|
375
|
+
return @port
|
|
376
|
+
else
|
|
377
|
+
@port = @port.to_i
|
|
378
|
+
return @port
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
# Sets the port for this URI.
|
|
383
|
+
def port=(new_port)
|
|
384
|
+
@port = new_port.to_s.to_i
|
|
385
|
+
@specified_port = @port
|
|
386
|
+
@authority = nil
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Returns the port number that was actually specified in the URI string.
|
|
390
|
+
def specified_port
|
|
391
|
+
@specified_port = nil if !defined?(@specified_port)
|
|
392
|
+
return nil if @specified_port.nil?
|
|
393
|
+
port = @specified_port.to_s.to_i
|
|
394
|
+
if port == 0
|
|
395
|
+
return nil
|
|
396
|
+
else
|
|
397
|
+
return port
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# Returns the path for this URI.
|
|
402
|
+
def path
|
|
403
|
+
return @path
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Sets the path for this URI.
|
|
407
|
+
def path=(new_path)
|
|
408
|
+
@path = new_path
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Returns the basename, if any, of the file at the path being referenced.
|
|
412
|
+
# Returns nil if there is no path component.
|
|
413
|
+
def basename
|
|
414
|
+
return nil if self.path == nil
|
|
415
|
+
return File.basename(self.path).gsub(/;[^\/]*$/, "")
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Returns the extension, if any, of the file at the path being referenced.
|
|
419
|
+
# Returns "" if there is no extension or nil if there is no path
|
|
420
|
+
# component.
|
|
421
|
+
def extname
|
|
422
|
+
return nil if self.path == nil
|
|
423
|
+
return File.extname(self.basename.gsub(/;[^\/]*$/, ""))
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# Returns the query string for this URI.
|
|
427
|
+
def query
|
|
428
|
+
return @query
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
# Sets the query string for this URI.
|
|
432
|
+
def query=(new_query)
|
|
433
|
+
@query = new_query
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
# Returns the fragment for this URI.
|
|
437
|
+
def fragment
|
|
438
|
+
return @fragment
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Sets the fragment for this URI.
|
|
442
|
+
def fragment=(new_fragment)
|
|
443
|
+
@fragment = new_fragment
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
# Returns true if the URI uses an IP-based protocol.
|
|
447
|
+
def ip_based?
|
|
448
|
+
return false if self.scheme.nil?
|
|
449
|
+
return self.class.ip_based_schemes.include?(self.scheme.strip.downcase)
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
# Returns true if this URI is known to be relative.
|
|
453
|
+
def relative?
|
|
454
|
+
return self.scheme.nil?
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Returns true if this URI is known to be absolute.
|
|
458
|
+
def absolute?
|
|
459
|
+
return !relative?
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
# Joins two URIs together.
|
|
463
|
+
def +(uri)
|
|
464
|
+
if !uri.kind_of?(self.class)
|
|
465
|
+
uri = ForgivingURI.parse(uri.to_s)
|
|
466
|
+
end
|
|
467
|
+
if uri.to_s == ""
|
|
468
|
+
return self.dup
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
joined_scheme = nil
|
|
472
|
+
joined_userinfo = nil
|
|
473
|
+
joined_host = nil
|
|
474
|
+
joined_port = nil
|
|
475
|
+
joined_path = nil
|
|
476
|
+
joined_query = nil
|
|
477
|
+
joined_fragment = nil
|
|
478
|
+
|
|
479
|
+
# Section 5.2.2 of RFC 3986
|
|
480
|
+
if uri.scheme != nil
|
|
481
|
+
joined_scheme = uri.scheme
|
|
482
|
+
joined_userinfo = uri.userinfo
|
|
483
|
+
joined_host = uri.host
|
|
484
|
+
joined_port = uri.specified_port
|
|
485
|
+
joined_path = self.class.normalize_path(uri.path)
|
|
486
|
+
joined_query = uri.query
|
|
487
|
+
else
|
|
488
|
+
if uri.authority != nil
|
|
489
|
+
joined_userinfo = uri.userinfo
|
|
490
|
+
joined_host = uri.host
|
|
491
|
+
joined_port = uri.specified_port
|
|
492
|
+
joined_path = self.class.normalize_path(uri.path)
|
|
493
|
+
joined_query = uri.query
|
|
494
|
+
else
|
|
495
|
+
if uri.path == nil || uri.path == ""
|
|
496
|
+
joined_path = self.path
|
|
497
|
+
if uri.query != nil
|
|
498
|
+
joined_query = uri.query
|
|
499
|
+
else
|
|
500
|
+
joined_query = self.query
|
|
501
|
+
end
|
|
502
|
+
else
|
|
503
|
+
if uri.path[0..0] == "/"
|
|
504
|
+
joined_path = self.class.normalize_path(uri.path)
|
|
505
|
+
else
|
|
506
|
+
base_path = self.path.nil? ? "" : self.path.dup
|
|
507
|
+
base_path = self.class.normalize_path(base_path)
|
|
508
|
+
base_path.gsub!(/\/[^\/]+$/, "/")
|
|
509
|
+
joined_path = self.class.normalize_path(base_path + uri.path)
|
|
510
|
+
end
|
|
511
|
+
joined_query = uri.query
|
|
512
|
+
end
|
|
513
|
+
joined_userinfo = self.userinfo
|
|
514
|
+
joined_host = self.host
|
|
515
|
+
joined_port = self.specified_port
|
|
516
|
+
end
|
|
517
|
+
joined_scheme = self.scheme
|
|
518
|
+
end
|
|
519
|
+
joined_fragment = uri.fragment
|
|
520
|
+
|
|
521
|
+
return ForgivingURI.new(
|
|
522
|
+
joined_scheme,
|
|
523
|
+
joined_userinfo,
|
|
524
|
+
joined_host,
|
|
525
|
+
joined_port,
|
|
526
|
+
joined_path,
|
|
527
|
+
joined_query,
|
|
528
|
+
joined_fragment
|
|
529
|
+
)
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
# Merges two URIs together.
|
|
533
|
+
def merge(uri)
|
|
534
|
+
return self + uri
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Destructive form of merge.
|
|
538
|
+
def merge!(uri)
|
|
539
|
+
replace_self(self.merge(uri))
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
# Returns a normalized URI object.
|
|
543
|
+
#
|
|
544
|
+
# NOTE: This method does not attempt to conform to specifications. It
|
|
545
|
+
# exists largely to correct other people's failures to read the
|
|
546
|
+
# specifications, and also to deal with caching issues since several
|
|
547
|
+
# different URIs may represent the same resource and should not be
|
|
548
|
+
# cached multiple times.
|
|
549
|
+
def normalize
|
|
550
|
+
normalized_scheme = nil
|
|
551
|
+
normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
|
|
552
|
+
normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
|
|
553
|
+
if normalized_scheme == "feed"
|
|
554
|
+
if self.to_s =~ /^feed:\/*http:\/*/
|
|
555
|
+
return self.class.parse(
|
|
556
|
+
self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
|
|
557
|
+
end
|
|
558
|
+
end
|
|
559
|
+
normalized_userinfo = nil
|
|
560
|
+
normalized_userinfo = self.userinfo.strip if self.userinfo != nil
|
|
561
|
+
normalized_host = nil
|
|
562
|
+
normalized_host = self.host.strip.downcase if self.host != nil
|
|
563
|
+
if normalized_host != nil
|
|
564
|
+
begin
|
|
565
|
+
normalized_host = ForgivingURI::IDNA.to_ascii(normalized_host)
|
|
566
|
+
rescue Exception
|
|
567
|
+
nil
|
|
568
|
+
end
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
# Normalize IPv4 addresses that were generated with the stupid
|
|
572
|
+
# assumption that inet_addr() would be used to parse the IP address.
|
|
573
|
+
if normalized_host != nil && normalized_host.strip =~ /^\d+$/
|
|
574
|
+
# Decimal IPv4 address.
|
|
575
|
+
decimal = normalized_host.to_i
|
|
576
|
+
if decimal < (256 ** 4)
|
|
577
|
+
octets = [0,0,0,0]
|
|
578
|
+
octets[0] = decimal >> 24
|
|
579
|
+
decimal -= (octets[0] * (256 ** 3))
|
|
580
|
+
octets[1] = decimal >> 16
|
|
581
|
+
decimal -= (octets[1] * (256 ** 2))
|
|
582
|
+
octets[2] = decimal >> 8
|
|
583
|
+
decimal -= (octets[2] * (256 ** 1))
|
|
584
|
+
octets[3] = decimal
|
|
585
|
+
normalized_host = octets.join(".")
|
|
586
|
+
end
|
|
587
|
+
elsif (normalized_host != nil && normalized_host.strip =~
|
|
588
|
+
/^0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}$/)
|
|
589
|
+
# Octal IPv4 address.
|
|
590
|
+
octet_strings = normalized_host.split('.')
|
|
591
|
+
octets = []
|
|
592
|
+
octet_strings.each do |octet_string|
|
|
593
|
+
decimal = octet_string.to_i(8)
|
|
594
|
+
octets << decimal
|
|
595
|
+
end
|
|
596
|
+
normalized_host = octets.join(".")
|
|
597
|
+
elsif (normalized_host != nil && normalized_host.strip =~
|
|
598
|
+
/^0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}$/i)
|
|
599
|
+
# Hexidecimal IPv4 address.
|
|
600
|
+
octet_strings = normalized_host.split('.')
|
|
601
|
+
octets = []
|
|
602
|
+
octet_strings.each do |octet_string|
|
|
603
|
+
decimal = octet_string[2...4].to_i(16)
|
|
604
|
+
octets << decimal
|
|
605
|
+
end
|
|
606
|
+
normalized_host = octets.join(".")
|
|
607
|
+
end
|
|
608
|
+
normalized_port = self.port
|
|
609
|
+
if self.class.scheme_mapping[normalized_scheme] == normalized_port
|
|
610
|
+
normalized_port = nil
|
|
611
|
+
end
|
|
612
|
+
normalized_path = nil
|
|
613
|
+
normalized_path = self.path.strip if self.path != nil
|
|
614
|
+
if normalized_scheme != nil && normalized_host == nil
|
|
615
|
+
if self.class.ip_based_schemes.include?(normalized_scheme) &&
|
|
616
|
+
normalized_path =~ /[\w\.]+/
|
|
617
|
+
normalized_host = normalized_path
|
|
618
|
+
normalized_path = nil
|
|
619
|
+
unless normalized_host =~ /\./
|
|
620
|
+
normalized_host = normalized_host + ".com"
|
|
621
|
+
end
|
|
622
|
+
end
|
|
623
|
+
end
|
|
624
|
+
if normalized_path == nil &&
|
|
625
|
+
normalized_scheme != nil &&
|
|
626
|
+
normalized_host != nil
|
|
627
|
+
normalized_path = "/"
|
|
628
|
+
end
|
|
629
|
+
if normalized_path != nil
|
|
630
|
+
normalized_path = self.class.normalize_path(normalized_path)
|
|
631
|
+
normalized_path = self.class.normalize_escaping(normalized_path)
|
|
632
|
+
end
|
|
633
|
+
if normalized_path == ""
|
|
634
|
+
if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
|
|
635
|
+
normalized_path = "/"
|
|
636
|
+
end
|
|
637
|
+
end
|
|
638
|
+
normalized_path.gsub!(/%3B/, ";") if normalized_path != nil
|
|
639
|
+
normalized_path.gsub!(/%3A/, ":") if normalized_path != nil
|
|
640
|
+
normalized_path.gsub!(/%40/, "@") if normalized_path != nil
|
|
641
|
+
normalized_path.gsub!(/%2C/, ",") if normalized_path != nil
|
|
642
|
+
normalized_path.gsub!(/%3D/, "=") if normalized_path != nil
|
|
643
|
+
|
|
644
|
+
normalized_query = nil
|
|
645
|
+
normalized_query = self.query.strip if self.query != nil
|
|
646
|
+
normalized_query = self.class.normalize_escaping(normalized_query)
|
|
647
|
+
normalized_query.gsub!(/%3D/, "=") if normalized_query != nil
|
|
648
|
+
normalized_query.gsub!(/%26/, "&") if normalized_query != nil
|
|
649
|
+
normalized_fragment = nil
|
|
650
|
+
normalized_fragment = self.fragment.strip if self.fragment != nil
|
|
651
|
+
normalized_fragment = self.class.normalize_escaping(normalized_fragment)
|
|
652
|
+
return ForgivingURI.new(
|
|
653
|
+
normalized_scheme,
|
|
654
|
+
normalized_userinfo,
|
|
655
|
+
normalized_host,
|
|
656
|
+
normalized_port,
|
|
657
|
+
normalized_path,
|
|
658
|
+
normalized_query,
|
|
659
|
+
normalized_fragment
|
|
660
|
+
)
|
|
661
|
+
end
|
|
662
|
+
|
|
663
|
+
# Destructively normalizes this URI object.
|
|
664
|
+
def normalize!
|
|
665
|
+
replace_self(self.normalize)
|
|
666
|
+
end
|
|
667
|
+
|
|
668
|
+
# Creates a URI suitable for display to users. If semantic attacks are
|
|
669
|
+
# likely, the application should try to detect these and warn the user.
|
|
670
|
+
# See RFC 3986 section 7.6 for more information.
|
|
671
|
+
def display_uri
|
|
672
|
+
display_uri = self.normalize
|
|
673
|
+
begin
|
|
674
|
+
display_uri.instance_variable_set("@host",
|
|
675
|
+
ForgivingURI::IDNA.to_unicode(display_uri.host))
|
|
676
|
+
rescue Exception
|
|
677
|
+
nil
|
|
678
|
+
end
|
|
679
|
+
return display_uri
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
# Returns true if the URI objects are equal. This method normalizes
|
|
683
|
+
# both URIs before doing the comparison, and allows comparison against
|
|
684
|
+
# strings.
|
|
685
|
+
def ===(uri)
|
|
686
|
+
uri_string = nil
|
|
687
|
+
if uri.respond_to?(:normalize)
|
|
688
|
+
uri_string = uri.normalize.to_s
|
|
689
|
+
else
|
|
690
|
+
begin
|
|
691
|
+
uri_string = ForgivingURI.parse(uri.to_s).normalize.to_s
|
|
692
|
+
rescue Exception
|
|
693
|
+
return false
|
|
694
|
+
end
|
|
695
|
+
end
|
|
696
|
+
return self.normalize.to_s == uri_string
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
# Returns true if the URI objects are equal. This method normalizes
|
|
700
|
+
# both URIs before doing the comparison.
|
|
701
|
+
def ==(uri)
|
|
702
|
+
return false unless uri.kind_of?(self.class)
|
|
703
|
+
return self.normalize.to_s == uri.normalize.to_s
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
# Returns true if the URI objects are equal. This method does NOT
|
|
707
|
+
# normalize either URI before doing the comparison.
|
|
708
|
+
def eql?(uri)
|
|
709
|
+
return false unless uri.kind_of?(self.class)
|
|
710
|
+
return self.to_s == uri.to_s
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
# Clones the URI object.
|
|
714
|
+
def dup
|
|
715
|
+
duplicated_scheme = nil
|
|
716
|
+
duplicated_scheme = self.scheme.dup if self.scheme != nil
|
|
717
|
+
duplicated_userinfo = nil
|
|
718
|
+
duplicated_userinfo = self.userinfo.dup if self.userinfo != nil
|
|
719
|
+
duplicated_host = nil
|
|
720
|
+
duplicated_host = self.host.dup if self.host != nil
|
|
721
|
+
duplicated_port = self.port
|
|
722
|
+
duplicated_path = nil
|
|
723
|
+
duplicated_path = self.path.dup if self.path != nil
|
|
724
|
+
duplicated_query = nil
|
|
725
|
+
duplicated_query = self.query.dup if self.query != nil
|
|
726
|
+
duplicated_fragment = nil
|
|
727
|
+
duplicated_fragment = self.fragment.dup if self.fragment != nil
|
|
728
|
+
duplicated_uri = ForgivingURI.new(
|
|
729
|
+
duplicated_scheme,
|
|
730
|
+
duplicated_userinfo,
|
|
731
|
+
duplicated_host,
|
|
732
|
+
duplicated_port,
|
|
733
|
+
duplicated_path,
|
|
734
|
+
duplicated_query,
|
|
735
|
+
duplicated_fragment
|
|
736
|
+
)
|
|
737
|
+
@specified_port = nil if !defined?(@specified_port)
|
|
738
|
+
duplicated_uri.instance_variable_set("@specified_port", @specified_port)
|
|
739
|
+
return duplicated_uri
|
|
740
|
+
end
|
|
741
|
+
|
|
742
|
+
# Returns the assembled URI as a string.
|
|
743
|
+
def to_s
|
|
744
|
+
uri_string = ""
|
|
745
|
+
if self.scheme != nil
|
|
746
|
+
uri_string << "#{self.scheme}:"
|
|
747
|
+
end
|
|
748
|
+
if self.authority != nil
|
|
749
|
+
uri_string << "//#{self.authority}"
|
|
750
|
+
end
|
|
751
|
+
if self.path != nil
|
|
752
|
+
uri_string << self.path
|
|
753
|
+
end
|
|
754
|
+
if self.query != nil
|
|
755
|
+
uri_string << "?#{self.query}"
|
|
756
|
+
end
|
|
757
|
+
if self.fragment != nil
|
|
758
|
+
uri_string << "##{self.fragment}"
|
|
759
|
+
end
|
|
760
|
+
return uri_string
|
|
761
|
+
end
|
|
762
|
+
|
|
763
|
+
# Returns a string representation of the URI object's state.
|
|
764
|
+
def inspect
|
|
765
|
+
sprintf("#<%s:%#0x URI:%s>", self.class.to_s, self.object_id, self.to_s)
|
|
766
|
+
end
|
|
767
|
+
|
|
768
|
+
# This module handles internationalized domain names. When Ruby has an
|
|
769
|
+
# implementation of nameprep, stringprep, punycode, etc, this
|
|
770
|
+
# module should contain an actual implementation of IDNA instead of
|
|
771
|
+
# returning nil if libidn can't be used.
|
|
772
|
+
module IDNA
|
|
773
|
+
# Returns the ascii representation of the label.
|
|
774
|
+
def self.to_ascii(label)
|
|
775
|
+
return nil if label.nil?
|
|
776
|
+
if self.use_libidn?
|
|
777
|
+
return IDN::Idna.toASCII(label)
|
|
778
|
+
else
|
|
779
|
+
raise NotImplementedError,
|
|
780
|
+
"There is no available pure-ruby implementation. " +
|
|
781
|
+
"Install libidn bindings."
|
|
782
|
+
end
|
|
783
|
+
end
|
|
784
|
+
|
|
785
|
+
# Returns the unicode representation of the label.
|
|
786
|
+
def self.to_unicode(label)
|
|
787
|
+
return nil if label.nil?
|
|
788
|
+
if self.use_libidn?
|
|
789
|
+
return IDN::Idna.toUnicode(label)
|
|
790
|
+
else
|
|
791
|
+
raise NotImplementedError,
|
|
792
|
+
"There is no available pure-ruby implementation. " +
|
|
793
|
+
"Install libidn bindings."
|
|
794
|
+
end
|
|
795
|
+
end
|
|
796
|
+
|
|
797
|
+
private
|
|
798
|
+
# Determines if the libidn bindings are available and able to be used.
|
|
799
|
+
def self.use_libidn?
|
|
800
|
+
if !defined?(@use_libidn) || @use_libidn.nil?
|
|
801
|
+
begin
|
|
802
|
+
require 'rubygems'
|
|
803
|
+
rescue LoadError
|
|
804
|
+
nil
|
|
805
|
+
end
|
|
806
|
+
begin
|
|
807
|
+
require 'idn'
|
|
808
|
+
rescue LoadError
|
|
809
|
+
nil
|
|
810
|
+
end
|
|
811
|
+
@use_libidn = !!(defined?(IDN::Idna))
|
|
812
|
+
end
|
|
813
|
+
return @use_libidn
|
|
814
|
+
end
|
|
815
|
+
end
|
|
816
|
+
|
|
817
|
+
private
|
|
818
|
+
# Resolves paths to their simplest form.
|
|
819
|
+
def self.normalize_path(path)
|
|
820
|
+
return nil if path.nil?
|
|
821
|
+
normalized_path = path.dup
|
|
822
|
+
previous_state = normalized_path.dup
|
|
823
|
+
begin
|
|
824
|
+
previous_state = normalized_path.dup
|
|
825
|
+
normalized_path.gsub!(/\/\.\//, "/")
|
|
826
|
+
normalized_path.gsub!(/\/\.$/, "/")
|
|
827
|
+
parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
|
|
828
|
+
if parent != "." && parent != ".."
|
|
829
|
+
normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
|
|
830
|
+
end
|
|
831
|
+
parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
|
|
832
|
+
if parent != "." && parent != ".."
|
|
833
|
+
normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
|
|
834
|
+
end
|
|
835
|
+
normalized_path.gsub!(/^\.\.?\/?/, "")
|
|
836
|
+
normalized_path.gsub!(/^\/\.\.?\//, "/")
|
|
837
|
+
end until previous_state == normalized_path
|
|
838
|
+
return normalized_path
|
|
839
|
+
end
|
|
840
|
+
|
|
841
|
+
# Normalizes percent escaping of characters
|
|
842
|
+
def self.normalize_escaping(escaped_section)
|
|
843
|
+
return nil if escaped_section.nil?
|
|
844
|
+
normalized_section = escaped_section.dup
|
|
845
|
+
normalized_section.gsub!(/%[0-9a-f]{2}/i) do |sequence|
|
|
846
|
+
sequence[1..3].to_i(16).chr
|
|
847
|
+
end
|
|
848
|
+
if ForgivingURI::IDNA.send(:use_libidn?)
|
|
849
|
+
normalized_section =
|
|
850
|
+
IDN::Stringprep.nfkc_normalize(normalized_section)
|
|
851
|
+
end
|
|
852
|
+
new_section = ""
|
|
853
|
+
for index in 0...normalized_section.size
|
|
854
|
+
if self.unreserved?(normalized_section[index]) ||
|
|
855
|
+
normalized_section[index] == '/'[0]
|
|
856
|
+
new_section << normalized_section[index..index]
|
|
857
|
+
else
|
|
858
|
+
new_section << ("%" + normalized_section[index].to_s(16).upcase)
|
|
859
|
+
end
|
|
860
|
+
end
|
|
861
|
+
normalized_section = new_section
|
|
862
|
+
return normalized_section
|
|
863
|
+
end
|
|
864
|
+
|
|
865
|
+
# Returns true if the specified character is unreserved.
|
|
866
|
+
def self.unreserved?(character)
|
|
867
|
+
character_string = nil
|
|
868
|
+
character_string = character.chr if character.respond_to?(:chr)
|
|
869
|
+
character_string = character[0..0] if character.kind_of?(String)
|
|
870
|
+
return self.unreserved.include?(character_string)
|
|
871
|
+
end
|
|
872
|
+
|
|
873
|
+
# Returns a list of unreserved characters.
|
|
874
|
+
def self.unreserved
|
|
875
|
+
if !defined?(@unreserved) || @unreserved.nil?
|
|
876
|
+
@unreserved = ["-", ".", "_", "~"]
|
|
877
|
+
for c in "a".."z"
|
|
878
|
+
@unreserved << c
|
|
879
|
+
@unreserved << c.upcase
|
|
880
|
+
end
|
|
881
|
+
for c in "0".."9"
|
|
882
|
+
@unreserved << c
|
|
883
|
+
end
|
|
884
|
+
@unreserved.sort!
|
|
885
|
+
end
|
|
886
|
+
return @unreserved
|
|
887
|
+
end
|
|
888
|
+
|
|
889
|
+
# Assigns the specified components to the appropriate instance variables.
|
|
890
|
+
# Used in destructive operations to avoid code repetition.
|
|
891
|
+
def assign_components(scheme, userinfo, host, port, path, query, fragment)
|
|
892
|
+
if scheme == nil && userinfo == nil && host == nil && port == nil &&
|
|
893
|
+
path == nil && query == nil && fragment == nil
|
|
894
|
+
raise InvalidURIError, "All parameters were nil."
|
|
895
|
+
end
|
|
896
|
+
@scheme = scheme
|
|
897
|
+
@userinfo = userinfo
|
|
898
|
+
@host = host
|
|
899
|
+
@specified_port = port.to_s
|
|
900
|
+
@port = port
|
|
901
|
+
@port = @port.to_s if @port.kind_of?(Fixnum)
|
|
902
|
+
if @port != nil && !(@port =~ /^\d+$/)
|
|
903
|
+
raise InvalidURIError,
|
|
904
|
+
"Invalid port number: #{@port.inspect}"
|
|
905
|
+
end
|
|
906
|
+
@port = @port.to_i
|
|
907
|
+
@port = nil if @port == 0
|
|
908
|
+
@path = path
|
|
909
|
+
@query = query
|
|
910
|
+
@fragment = fragment
|
|
911
|
+
if @scheme != nil && @host == "" && @path == ""
|
|
912
|
+
raise InvalidURIError,
|
|
913
|
+
"Absolute URI missing hierarchical segment."
|
|
914
|
+
end
|
|
915
|
+
end
|
|
916
|
+
|
|
917
|
+
# Replaces the internal state of self with the specified URI's state.
|
|
918
|
+
# Used in destructive operations to avoid massive code repetition.
|
|
919
|
+
def replace_self(uri)
|
|
920
|
+
@authority = nil
|
|
921
|
+
@user = nil
|
|
922
|
+
@password = nil
|
|
923
|
+
|
|
924
|
+
@scheme = uri.scheme
|
|
925
|
+
@userinfo = uri.userinfo
|
|
926
|
+
@host = uri.host
|
|
927
|
+
@specified_port = uri.instance_variable_get("@specified_port")
|
|
928
|
+
@port = @specified_port.to_s.to_i
|
|
929
|
+
@path = uri.path
|
|
930
|
+
@query = uri.query
|
|
931
|
+
@fragment = uri.fragment
|
|
932
|
+
return self
|
|
933
|
+
end
|
|
934
|
+
end
|
|
935
|
+
|
|
936
|
+
def urljoin(base, uri)
|
|
937
|
+
urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
|
|
938
|
+
uri = uri.sub(urifixer, '\1\3')
|
|
939
|
+
begin
|
|
940
|
+
return ForgivingURI.join(base, uri).to_s
|
|
941
|
+
rescue URI::BadURIError => e
|
|
942
|
+
if ForgivingURI.parse(base).relative?
|
|
943
|
+
return ForgivingURI.parse(uri).to_s
|
|
944
|
+
end
|
|
945
|
+
end
|
|
946
|
+
end
|
|
947
|
+
|