rfeedparser 0.9.87 → 0.9.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserUtilities
3
+ class FeedParserDict < Hash
4
+ =begin
5
+ The naming of a certain common attribute (such as, "When was the last
6
+ time this feed was updated?") can have many different names depending
7
+ on the type of feed we are handling. This class allows us to satisfy
8
+ the expectations of both the developer who has prior knowledge of the
9
+ feed type as well as the developer who wants a consistent application
10
+ interface.
11
+
12
+ @@keymap is a Hash that contains information on what a certain
13
+ attribute names "really are" in each kind of feed. It does this by
14
+ providing a common name that will map to any feed type in the keys,
15
+ with possible "correct" attributes in the its values. the #[] and #[]=
16
+ methods check with keymaps to see what attribute the developer "really
17
+ means" if they've asked for one which happens to be in @@keymap's keys.
18
+ =end
19
+ @@keymap = {'channel' => 'feed',
20
+ 'items' => 'entries',
21
+ 'guid' => 'id',
22
+ 'date' => 'updated',
23
+ 'date_parsed' => 'updated_parsed',
24
+ 'description' => ['subtitle', 'summary'],
25
+ 'url' => ['href'],
26
+ 'modified' => 'updated',
27
+ 'modified_parsed' => 'updated_parsed',
28
+ 'issued' => 'published',
29
+ 'issued_parsed' => 'published_parsed',
30
+ 'copyright' => 'rights',
31
+ 'copyright_detail' => 'rights_detail',
32
+ 'tagline' => 'subtitle',
33
+ 'tagline_detail' => 'subtitle_detail'}
34
+
35
+ def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
36
+ return self['entries']
37
+ end
38
+
39
+ # We could include the [] rewrite in new using Hash.new's fancy pants block thing
40
+ # but we'd still have to overwrite []= and such.
41
+ # I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
42
+ def initialize(pairs=nil)
43
+ if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
44
+ pairs.each do |l|
45
+ k,v = l
46
+ self[k] = v
47
+ end
48
+ elsif pairs.class == Hash
49
+ self.merge!(pairs)
50
+ end
51
+ end
52
+
53
+ def [](key)
54
+ if key == 'category'
55
+ return self['tags'][0]['term']
56
+ end
57
+ if key == 'categories'
58
+ return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
59
+ end
60
+ realkey = @@keymap[key] || key
61
+ if realkey.class == Array
62
+ realkey.each{ |key| return self[key] if has_key?key }
63
+ end
64
+ # Note that the original key is preferred over the realkey we (might
65
+ # have) found in @@keymap
66
+ if has_key?(key)
67
+ return super(key)
68
+ end
69
+ return super(realkey)
70
+ end
71
+
72
+ def []=(key,value)
73
+ if @@keymap.key?key
74
+ key = @@keymap[key]
75
+ if key.class == Array
76
+ key = key[0]
77
+ end
78
+ end
79
+ super(key,value)
80
+ end
81
+
82
+ def method_missing(msym, *args)
83
+ methodname = msym.to_s
84
+ if methodname[-1] == '='
85
+ return self[methodname[0..-2]] = args[0]
86
+ elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private?
87
+ return self[methodname]
88
+ else
89
+ raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,947 @@
1
+ #!/usr/bin/ruby
2
+ # From Robert Aman's GentleCMS URI.
3
+ # GentleCMS, Copyright (c) 2006 Robert Aman
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining
6
+ # a copy of this software and associated documentation files (the
7
+ # "Software"), to deal in the Software without restriction, including
8
+ # without limitation the rights to use, copy, modify, merge, publish,
9
+ # distribute, sublicense, and/or sell copies of the Software, and to
10
+ # permit persons to whom the Software is furnished to do so, subject to
11
+ # the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be
14
+ # included in all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ # This is an implementation of a URI parser based on RFC 3986.
25
+ class ForgivingURI
26
+ # Raised if something other than a uri is supplied.
27
+ class InvalidURIError < StandardError
28
+ end
29
+ # Raised if an invalid method option is supplied.
30
+ class InvalidOptionError < StandardError
31
+ end
32
+
33
+ # Returns a URI object based on the parsed string.
34
+ def self.parse(uri_string)
35
+ return nil if uri_string.nil?
36
+
37
+ # If a URI object is passed, just return itself.
38
+ return uri_string if uri_string.kind_of?(self)
39
+
40
+ # If a URI object of the Ruby standard library variety is passed,
41
+ # convert it to a string, then parse the string.
42
+ if uri_string.class.name =~ /^URI::/
43
+ uri_string = uri_string.to_s
44
+ end
45
+
46
+ uri_regex =
47
+ /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
48
+ scan = uri_string.scan(uri_regex)
49
+ fragments = scan[0]
50
+ return nil if fragments.nil?
51
+ scheme = fragments[1]
52
+ authority = fragments[3]
53
+ path = fragments[4]
54
+ query = fragments[6]
55
+ fragment = fragments[8]
56
+ userinfo = nil
57
+ host = nil
58
+ port = nil
59
+ if authority != nil
60
+ userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
61
+ host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
62
+ port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
63
+ end
64
+ if port.nil? || port == ""
65
+ port = nil
66
+ end
67
+
68
+ # WARNING: Not standards-compliant, but follows the theme
69
+ # of Postel's law:
70
+ #
71
+ # Special exception for dealing with the retarded idea of the
72
+ # feed pseudo-protocol. Without this exception, the parser will read
73
+ # the URI as having a blank port number, instead of as having a second
74
+ # URI embedded within. This exception translates these broken URIs
75
+ # and instead treats the inner URI as opaque.
76
+ if scheme == "feed" && host == "http"
77
+ userinfo = nil
78
+ host = nil
79
+ port = nil
80
+ path = authority + path
81
+ end
82
+
83
+ return ForgivingURI.new(scheme, userinfo, host, port, path, query, fragment)
84
+ end
85
+
86
+ # Converts a path to a file protocol URI. If the path supplied is
87
+ # relative, it will be returned as a relative URI. If the path supplied
88
+ # is actually a URI, it will return the parsed URI.
89
+ def self.convert_path(path)
90
+ return nil if path.nil?
91
+
92
+ converted_uri = path.strip
93
+ if converted_uri.length > 0 && converted_uri[0..0] == "/"
94
+ converted_uri = "file://" + converted_uri
95
+ end
96
+ if converted_uri.length > 0 &&
97
+ converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
98
+ converted_uri = "file:///" + converted_uri
99
+ end
100
+ converted_uri.gsub!(/^file:\/*/i, "file:///")
101
+ if converted_uri =~ /^file:/i
102
+ # Adjust windows-style uris
103
+ converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
104
+ converted_uri.gsub!(/\\/, '/')
105
+ converted_uri = self.parse(converted_uri).normalize
106
+ if File.exists?(converted_uri.path) &&
107
+ File.stat(converted_uri.path).directory?
108
+ converted_uri.path.gsub!(/\/$/, "")
109
+ converted_uri.path = converted_uri.path + '/'
110
+ end
111
+ else
112
+ converted_uri = self.parse(converted_uri)
113
+ end
114
+
115
+ return converted_uri
116
+ end
117
+
118
+ # Joins several uris together.
119
+ def self.join(*uris)
120
+ uri_objects = uris.collect do |uri|
121
+ uri.kind_of?(self) ? uri : self.parse(uri.to_s)
122
+ end
123
+ result = uri_objects.shift.dup
124
+ for uri in uri_objects
125
+ result.merge!(uri)
126
+ end
127
+ return result
128
+ end
129
+
130
+ # Correctly escapes a uri.
131
+ def self.escape(uri)
132
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
133
+ return ForgivingURI.new(
134
+ uri_object.scheme,
135
+ uri_object.userinfo,
136
+ uri_object.host,
137
+ uri_object.specified_port,
138
+ self.normalize_escaping(uri_object.path),
139
+ self.normalize_escaping(uri_object.query),
140
+ self.normalize_escaping(uri_object.fragment)
141
+ ).to_s
142
+ end
143
+
144
+ # Extracts uris from an arbitrary body of text.
145
+ def self.extract(text, options={})
146
+ defaults = {:base => nil, :parse => false}
147
+ options = defaults.merge(options)
148
+ raise InvalidOptionError unless (options.keys - defaults.keys).empty?
149
+ # This regular expression needs to be less forgiving or else it would
150
+ # match virtually all text. Which isn't exactly what we're going for.
151
+ extract_regex = /((([a-z\+]+):)[^ \n\<\>\"\\]+[\w\/])/
152
+ extracted_uris =
153
+ text.scan(extract_regex).collect { |match| match[0] }
154
+ sgml_extract_regex = /<[^>]+href=\"([^\"]+?)\"[^>]*>/
155
+ sgml_extracted_uris =
156
+ text.scan(sgml_extract_regex).collect { |match| match[0] }
157
+ extracted_uris.concat(sgml_extracted_uris - extracted_uris)
158
+ textile_extract_regex = /\".+?\":([^ ]+\/[^ ]+)[ \,\.\;\:\?\!\<\>\"]/i
159
+ textile_extracted_uris =
160
+ text.scan(textile_extract_regex).collect { |match| match[0] }
161
+ extracted_uris.concat(textile_extracted_uris - extracted_uris)
162
+ parsed_uris = []
163
+ base_uri = nil
164
+ if options[:base] != nil
165
+ base_uri = options[:base] if options[:base].kind_of?(self)
166
+ base_uri = self.parse(options[:base].to_s) if base_uri == nil
167
+ end
168
+ for uri_string in extracted_uris
169
+ begin
170
+ if base_uri == nil
171
+ parsed_uris << self.parse(uri_string)
172
+ else
173
+ parsed_uris << (base_uri + self.parse(uri_string))
174
+ end
175
+ rescue Exception
176
+ nil
177
+ end
178
+ end
179
+ parsed_uris.reject! do |uri|
180
+ (uri.scheme =~ /T\d+/ ||
181
+ uri.scheme == "xmlns" ||
182
+ uri.scheme == "xml" ||
183
+ uri.scheme == "thr" ||
184
+ uri.scheme == "this" ||
185
+ uri.scheme == "float" ||
186
+ uri.scheme == "user" ||
187
+ uri.scheme == "username" ||
188
+ uri.scheme == "out")
189
+ end
190
+ if options[:parse]
191
+ return parsed_uris
192
+ else
193
+ return parsed_uris.collect { |uri| uri.to_s }
194
+ end
195
+ end
196
+
197
+ # Creates a new uri object from component parts. Passing nil for
198
+ # any of these parameters is acceptable.
199
+ def initialize(scheme, userinfo, host, port, path, query, fragment)
200
+ assign_components(scheme, userinfo, host, port, path, query, fragment)
201
+ end
202
+
203
+ # Returns the scheme (protocol) for this URI.
204
+ def scheme
205
+ return nil if @scheme.nil? || @scheme.strip == ""
206
+ return @scheme
207
+ end
208
+
209
+ # Sets the scheme (protocol for this URI.)
210
+ def scheme=(new_scheme)
211
+ @scheme = new_scheme
212
+ end
213
+
214
+ # Returns the username and password segment of this URI.
215
+ def userinfo
216
+ return @userinfo
217
+ end
218
+
219
+ # Sets the username and password segment of this URI.
220
+ def userinfo=(new_userinfo)
221
+ @userinfo = new_userinfo
222
+ @authority = nil
223
+ end
224
+
225
+ # Returns the host for this URI.
226
+ def host
227
+ return @host
228
+ end
229
+
230
+ # Sets the host for this URI.
231
+ def host=(new_host)
232
+ @host = new_host
233
+ @authority = nil
234
+ end
235
+
236
+ # Returns the authority segment of this URI.
237
+ def authority
238
+ if !defined?(@authority) || @authority.nil?
239
+ return nil if self.host.nil?
240
+ @authority = ""
241
+ if self.userinfo != nil
242
+ @authority << "#{self.userinfo}@"
243
+ end
244
+ @authority << self.host
245
+ if self.specified_port != nil
246
+ @authority << ":#{self.specified_port}"
247
+ end
248
+ end
249
+ return @authority
250
+ end
251
+
252
+ # Sets the authority segment of this URI.
253
+ def authority=(new_authority)
254
+ @authority = new_authority
255
+ new_userinfo = nil
256
+ new_host = nil
257
+ new_port = nil
258
+ if new_authority != nil
259
+ new_userinfo = new_authority.scan(/^([^\[\]]*)@/).flatten[0]
260
+ new_host =
261
+ new_authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
262
+ new_port = new_authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
263
+ end
264
+ if new_port.nil? || new_port == ""
265
+ new_port = nil
266
+ end
267
+ @userinfo = new_userinfo
268
+ @host = new_host
269
+ @port = nil
270
+ @specified_port = new_port
271
+ end
272
+
273
+ # Returns the user for this URI.
274
+ def user
275
+ if !defined?(@user) || @user.nil?
276
+ @user = nil
277
+ return @user if @userinfo.nil?
278
+ if @userinfo =~ /:/
279
+ @user = @userinfo.strip.scan(/^(.*):/).flatten[0].strip
280
+ else
281
+ @user = @userinfo.dup
282
+ end
283
+ end
284
+ return @user
285
+ end
286
+
287
+ # Sets the user for this URI.
288
+ def user=(new_user)
289
+ current_password = self.password
290
+ @user = new_user
291
+ if new_user == nil && current_password == nil
292
+ @userinfo = nil
293
+ elsif new_user == nil && current_password != nil
294
+ @userinfo = ":#{current_password}"
295
+ elsif new_user != nil && current_password == nil
296
+ @userinfo = "#{new_user}"
297
+ elsif new_user != nil && current_password != nil
298
+ @userinfo = "#{new_user}:#{current_password}"
299
+ end
300
+ @user = nil
301
+ @password = nil
302
+ @authority = nil
303
+ end
304
+
305
+ # Returns the password for this URI.
306
+ def password
307
+ if !defined?(@password) || @password.nil?
308
+ @password = nil
309
+ return @password if @userinfo.nil?
310
+ if @userinfo =~ /:/
311
+ @password = @userinfo.strip.scan(/:(.*)$/).flatten[0].strip
312
+ else
313
+ return nil
314
+ end
315
+ end
316
+ return @password
317
+ end
318
+
319
+ # Sets the password for this URI.
320
+ def password=(new_password)
321
+ current_user = self.user
322
+ @password = new_password
323
+ if current_user == nil && new_password == nil
324
+ @userinfo = nil
325
+ elsif current_user == nil && new_password != nil
326
+ @userinfo = ":#{new_password}"
327
+ elsif current_user != nil && new_password == nil
328
+ @userinfo = "#{current_user}"
329
+ elsif current_user != nil && new_password != nil
330
+ @userinfo = "#{current_user}:#{new_password}"
331
+ end
332
+ @user = nil
333
+ @authority = nil
334
+ end
335
+
336
+ # Returns an array of known ip-based schemes. These schemes typically
337
+ # use a similar URI form:
338
+ # //<user>:<password>@<host>:<port>/<url-path>
339
+ def self.ip_based_schemes
340
+ return self.scheme_mapping.keys
341
+ end
342
+
343
+ # Returns a hash of common IP-based schemes and their default port
344
+ # numbers. Adding new schemes to this hash, as necessary, will allow
345
+ # for better URI normalization.
346
+ def self.scheme_mapping
347
+ if !defined?(@protocol_mapping) || @protocol_mapping.nil?
348
+ @protocol_mapping = {
349
+ "http" => 80,
350
+ "https" => 443,
351
+ "ftp" => 21,
352
+ "tftp" => 69,
353
+ "ssh" => 22,
354
+ "svn+ssh" => 22,
355
+ "telnet" => 23,
356
+ "nntp" => 119,
357
+ "gopher" => 70,
358
+ "wais" => 210,
359
+ "prospero" => 1525
360
+ }
361
+ end
362
+ return @protocol_mapping
363
+ end
364
+
365
+ # Returns the port number for this URI. This method will normalize to the
366
+ # default port for the URI's scheme if the port isn't explicitly specified
367
+ # in the URI.
368
+ def port
369
+ if @port.to_i == 0
370
+ if self.scheme.nil?
371
+ @port = nil
372
+ else
373
+ @port = self.class.scheme_mapping[self.scheme.strip.downcase]
374
+ end
375
+ return @port
376
+ else
377
+ @port = @port.to_i
378
+ return @port
379
+ end
380
+ end
381
+
382
+ # Sets the port for this URI.
383
+ def port=(new_port)
384
+ @port = new_port.to_s.to_i
385
+ @specified_port = @port
386
+ @authority = nil
387
+ end
388
+
389
+ # Returns the port number that was actually specified in the URI string.
390
+ def specified_port
391
+ @specified_port = nil if !defined?(@specified_port)
392
+ return nil if @specified_port.nil?
393
+ port = @specified_port.to_s.to_i
394
+ if port == 0
395
+ return nil
396
+ else
397
+ return port
398
+ end
399
+ end
400
+
401
+ # Returns the path for this URI.
402
+ def path
403
+ return @path
404
+ end
405
+
406
+ # Sets the path for this URI.
407
+ def path=(new_path)
408
+ @path = new_path
409
+ end
410
+
411
+ # Returns the basename, if any, of the file at the path being referenced.
412
+ # Returns nil if there is no path component.
413
+ def basename
414
+ return nil if self.path == nil
415
+ return File.basename(self.path).gsub(/;[^\/]*$/, "")
416
+ end
417
+
418
+ # Returns the extension, if any, of the file at the path being referenced.
419
+ # Returns "" if there is no extension or nil if there is no path
420
+ # component.
421
+ def extname
422
+ return nil if self.path == nil
423
+ return File.extname(self.basename.gsub(/;[^\/]*$/, ""))
424
+ end
425
+
426
+ # Returns the query string for this URI.
427
+ def query
428
+ return @query
429
+ end
430
+
431
+ # Sets the query string for this URI.
432
+ def query=(new_query)
433
+ @query = new_query
434
+ end
435
+
436
+ # Returns the fragment for this URI.
437
+ def fragment
438
+ return @fragment
439
+ end
440
+
441
+ # Sets the fragment for this URI.
442
+ def fragment=(new_fragment)
443
+ @fragment = new_fragment
444
+ end
445
+
446
+ # Returns true if the URI uses an IP-based protocol.
447
+ def ip_based?
448
+ return false if self.scheme.nil?
449
+ return self.class.ip_based_schemes.include?(self.scheme.strip.downcase)
450
+ end
451
+
452
+ # Returns true if this URI is known to be relative.
453
+ def relative?
454
+ return self.scheme.nil?
455
+ end
456
+
457
+ # Returns true if this URI is known to be absolute.
458
+ def absolute?
459
+ return !relative?
460
+ end
461
+
462
+ # Joins two URIs together.
463
+ def +(uri)
464
+ if !uri.kind_of?(self.class)
465
+ uri = ForgivingURI.parse(uri.to_s)
466
+ end
467
+ if uri.to_s == ""
468
+ return self.dup
469
+ end
470
+
471
+ joined_scheme = nil
472
+ joined_userinfo = nil
473
+ joined_host = nil
474
+ joined_port = nil
475
+ joined_path = nil
476
+ joined_query = nil
477
+ joined_fragment = nil
478
+
479
+ # Section 5.2.2 of RFC 3986
480
+ if uri.scheme != nil
481
+ joined_scheme = uri.scheme
482
+ joined_userinfo = uri.userinfo
483
+ joined_host = uri.host
484
+ joined_port = uri.specified_port
485
+ joined_path = self.class.normalize_path(uri.path)
486
+ joined_query = uri.query
487
+ else
488
+ if uri.authority != nil
489
+ joined_userinfo = uri.userinfo
490
+ joined_host = uri.host
491
+ joined_port = uri.specified_port
492
+ joined_path = self.class.normalize_path(uri.path)
493
+ joined_query = uri.query
494
+ else
495
+ if uri.path == nil || uri.path == ""
496
+ joined_path = self.path
497
+ if uri.query != nil
498
+ joined_query = uri.query
499
+ else
500
+ joined_query = self.query
501
+ end
502
+ else
503
+ if uri.path[0..0] == "/"
504
+ joined_path = self.class.normalize_path(uri.path)
505
+ else
506
+ base_path = self.path.nil? ? "" : self.path.dup
507
+ base_path = self.class.normalize_path(base_path)
508
+ base_path.gsub!(/\/[^\/]+$/, "/")
509
+ joined_path = self.class.normalize_path(base_path + uri.path)
510
+ end
511
+ joined_query = uri.query
512
+ end
513
+ joined_userinfo = self.userinfo
514
+ joined_host = self.host
515
+ joined_port = self.specified_port
516
+ end
517
+ joined_scheme = self.scheme
518
+ end
519
+ joined_fragment = uri.fragment
520
+
521
+ return ForgivingURI.new(
522
+ joined_scheme,
523
+ joined_userinfo,
524
+ joined_host,
525
+ joined_port,
526
+ joined_path,
527
+ joined_query,
528
+ joined_fragment
529
+ )
530
+ end
531
+
532
+ # Merges two URIs together.
533
+ def merge(uri)
534
+ return self + uri
535
+ end
536
+
537
+ # Destructive form of merge.
538
+ def merge!(uri)
539
+ replace_self(self.merge(uri))
540
+ end
541
+
542
+ # Returns a normalized URI object.
543
+ #
544
+ # NOTE: This method does not attempt to conform to specifications. It
545
+ # exists largely to correct other people's failures to read the
546
+ # specifications, and also to deal with caching issues since several
547
+ # different URIs may represent the same resource and should not be
548
+ # cached multiple times.
549
+ def normalize
550
+ normalized_scheme = nil
551
+ normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
552
+ normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
553
+ if normalized_scheme == "feed"
554
+ if self.to_s =~ /^feed:\/*http:\/*/
555
+ return self.class.parse(
556
+ self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
557
+ end
558
+ end
559
+ normalized_userinfo = nil
560
+ normalized_userinfo = self.userinfo.strip if self.userinfo != nil
561
+ normalized_host = nil
562
+ normalized_host = self.host.strip.downcase if self.host != nil
563
+ if normalized_host != nil
564
+ begin
565
+ normalized_host = ForgivingURI::IDNA.to_ascii(normalized_host)
566
+ rescue Exception
567
+ nil
568
+ end
569
+ end
570
+
571
+ # Normalize IPv4 addresses that were generated with the stupid
572
+ # assumption that inet_addr() would be used to parse the IP address.
573
+ if normalized_host != nil && normalized_host.strip =~ /^\d+$/
574
+ # Decimal IPv4 address.
575
+ decimal = normalized_host.to_i
576
+ if decimal < (256 ** 4)
577
+ octets = [0,0,0,0]
578
+ octets[0] = decimal >> 24
579
+ decimal -= (octets[0] * (256 ** 3))
580
+ octets[1] = decimal >> 16
581
+ decimal -= (octets[1] * (256 ** 2))
582
+ octets[2] = decimal >> 8
583
+ decimal -= (octets[2] * (256 ** 1))
584
+ octets[3] = decimal
585
+ normalized_host = octets.join(".")
586
+ end
587
+ elsif (normalized_host != nil && normalized_host.strip =~
588
+ /^0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}$/)
589
+ # Octal IPv4 address.
590
+ octet_strings = normalized_host.split('.')
591
+ octets = []
592
+ octet_strings.each do |octet_string|
593
+ decimal = octet_string.to_i(8)
594
+ octets << decimal
595
+ end
596
+ normalized_host = octets.join(".")
597
+ elsif (normalized_host != nil && normalized_host.strip =~
598
+ /^0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}$/i)
599
+ # Hexidecimal IPv4 address.
600
+ octet_strings = normalized_host.split('.')
601
+ octets = []
602
+ octet_strings.each do |octet_string|
603
+ decimal = octet_string[2...4].to_i(16)
604
+ octets << decimal
605
+ end
606
+ normalized_host = octets.join(".")
607
+ end
608
+ normalized_port = self.port
609
+ if self.class.scheme_mapping[normalized_scheme] == normalized_port
610
+ normalized_port = nil
611
+ end
612
+ normalized_path = nil
613
+ normalized_path = self.path.strip if self.path != nil
614
+ if normalized_scheme != nil && normalized_host == nil
615
+ if self.class.ip_based_schemes.include?(normalized_scheme) &&
616
+ normalized_path =~ /[\w\.]+/
617
+ normalized_host = normalized_path
618
+ normalized_path = nil
619
+ unless normalized_host =~ /\./
620
+ normalized_host = normalized_host + ".com"
621
+ end
622
+ end
623
+ end
624
+ if normalized_path == nil &&
625
+ normalized_scheme != nil &&
626
+ normalized_host != nil
627
+ normalized_path = "/"
628
+ end
629
+ if normalized_path != nil
630
+ normalized_path = self.class.normalize_path(normalized_path)
631
+ normalized_path = self.class.normalize_escaping(normalized_path)
632
+ end
633
+ if normalized_path == ""
634
+ if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
635
+ normalized_path = "/"
636
+ end
637
+ end
638
+ normalized_path.gsub!(/%3B/, ";") if normalized_path != nil
639
+ normalized_path.gsub!(/%3A/, ":") if normalized_path != nil
640
+ normalized_path.gsub!(/%40/, "@") if normalized_path != nil
641
+ normalized_path.gsub!(/%2C/, ",") if normalized_path != nil
642
+ normalized_path.gsub!(/%3D/, "=") if normalized_path != nil
643
+
644
+ normalized_query = nil
645
+ normalized_query = self.query.strip if self.query != nil
646
+ normalized_query = self.class.normalize_escaping(normalized_query)
647
+ normalized_query.gsub!(/%3D/, "=") if normalized_query != nil
648
+ normalized_query.gsub!(/%26/, "&") if normalized_query != nil
649
+ normalized_fragment = nil
650
+ normalized_fragment = self.fragment.strip if self.fragment != nil
651
+ normalized_fragment = self.class.normalize_escaping(normalized_fragment)
652
+ return ForgivingURI.new(
653
+ normalized_scheme,
654
+ normalized_userinfo,
655
+ normalized_host,
656
+ normalized_port,
657
+ normalized_path,
658
+ normalized_query,
659
+ normalized_fragment
660
+ )
661
+ end
662
+
663
+ # Destructively normalizes this URI object.
664
+ def normalize!
665
+ replace_self(self.normalize)
666
+ end
667
+
668
+ # Creates a URI suitable for display to users. If semantic attacks are
669
+ # likely, the application should try to detect these and warn the user.
670
+ # See RFC 3986 section 7.6 for more information.
671
+ def display_uri
672
+ display_uri = self.normalize
673
+ begin
674
+ display_uri.instance_variable_set("@host",
675
+ ForgivingURI::IDNA.to_unicode(display_uri.host))
676
+ rescue Exception
677
+ nil
678
+ end
679
+ return display_uri
680
+ end
681
+
682
+ # Returns true if the URI objects are equal. This method normalizes
683
+ # both URIs before doing the comparison, and allows comparison against
684
+ # strings.
685
+ def ===(uri)
686
+ uri_string = nil
687
+ if uri.respond_to?(:normalize)
688
+ uri_string = uri.normalize.to_s
689
+ else
690
+ begin
691
+ uri_string = ForgivingURI.parse(uri.to_s).normalize.to_s
692
+ rescue Exception
693
+ return false
694
+ end
695
+ end
696
+ return self.normalize.to_s == uri_string
697
+ end
698
+
699
+ # Returns true if the URI objects are equal. This method normalizes
700
+ # both URIs before doing the comparison.
701
+ def ==(uri)
702
+ return false unless uri.kind_of?(self.class)
703
+ return self.normalize.to_s == uri.normalize.to_s
704
+ end
705
+
706
+ # Returns true if the URI objects are equal. This method does NOT
707
+ # normalize either URI before doing the comparison.
708
+ def eql?(uri)
709
+ return false unless uri.kind_of?(self.class)
710
+ return self.to_s == uri.to_s
711
+ end
712
+
713
+ # Clones the URI object.
714
+ def dup
715
+ duplicated_scheme = nil
716
+ duplicated_scheme = self.scheme.dup if self.scheme != nil
717
+ duplicated_userinfo = nil
718
+ duplicated_userinfo = self.userinfo.dup if self.userinfo != nil
719
+ duplicated_host = nil
720
+ duplicated_host = self.host.dup if self.host != nil
721
+ duplicated_port = self.port
722
+ duplicated_path = nil
723
+ duplicated_path = self.path.dup if self.path != nil
724
+ duplicated_query = nil
725
+ duplicated_query = self.query.dup if self.query != nil
726
+ duplicated_fragment = nil
727
+ duplicated_fragment = self.fragment.dup if self.fragment != nil
728
+ duplicated_uri = ForgivingURI.new(
729
+ duplicated_scheme,
730
+ duplicated_userinfo,
731
+ duplicated_host,
732
+ duplicated_port,
733
+ duplicated_path,
734
+ duplicated_query,
735
+ duplicated_fragment
736
+ )
737
+ @specified_port = nil if !defined?(@specified_port)
738
+ duplicated_uri.instance_variable_set("@specified_port", @specified_port)
739
+ return duplicated_uri
740
+ end
741
+
742
+ # Returns the assembled URI as a string.
743
+ def to_s
744
+ uri_string = ""
745
+ if self.scheme != nil
746
+ uri_string << "#{self.scheme}:"
747
+ end
748
+ if self.authority != nil
749
+ uri_string << "//#{self.authority}"
750
+ end
751
+ if self.path != nil
752
+ uri_string << self.path
753
+ end
754
+ if self.query != nil
755
+ uri_string << "?#{self.query}"
756
+ end
757
+ if self.fragment != nil
758
+ uri_string << "##{self.fragment}"
759
+ end
760
+ return uri_string
761
+ end
762
+
763
+ # Returns a string representation of the URI object's state.
764
+ def inspect
765
+ sprintf("#<%s:%#0x URI:%s>", self.class.to_s, self.object_id, self.to_s)
766
+ end
767
+
768
+ # This module handles internationalized domain names. When Ruby has an
769
+ # implementation of nameprep, stringprep, punycode, etc, this
770
+ # module should contain an actual implementation of IDNA instead of
771
+ # returning nil if libidn can't be used.
772
+ module IDNA
773
+ # Returns the ascii representation of the label.
774
+ def self.to_ascii(label)
775
+ return nil if label.nil?
776
+ if self.use_libidn?
777
+ return IDN::Idna.toASCII(label)
778
+ else
779
+ raise NotImplementedError,
780
+ "There is no available pure-ruby implementation. " +
781
+ "Install libidn bindings."
782
+ end
783
+ end
784
+
785
+ # Returns the unicode representation of the label.
786
+ def self.to_unicode(label)
787
+ return nil if label.nil?
788
+ if self.use_libidn?
789
+ return IDN::Idna.toUnicode(label)
790
+ else
791
+ raise NotImplementedError,
792
+ "There is no available pure-ruby implementation. " +
793
+ "Install libidn bindings."
794
+ end
795
+ end
796
+
797
+ private
798
+ # Determines if the libidn bindings are available and able to be used.
799
+ def self.use_libidn?
800
+ if !defined?(@use_libidn) || @use_libidn.nil?
801
+ begin
802
+ require 'rubygems'
803
+ rescue LoadError
804
+ nil
805
+ end
806
+ begin
807
+ require 'idn'
808
+ rescue LoadError
809
+ nil
810
+ end
811
+ @use_libidn = !!(defined?(IDN::Idna))
812
+ end
813
+ return @use_libidn
814
+ end
815
+ end
816
+
817
+ private
818
+ # Resolves paths to their simplest form.
819
+ def self.normalize_path(path)
820
+ return nil if path.nil?
821
+ normalized_path = path.dup
822
+ previous_state = normalized_path.dup
823
+ begin
824
+ previous_state = normalized_path.dup
825
+ normalized_path.gsub!(/\/\.\//, "/")
826
+ normalized_path.gsub!(/\/\.$/, "/")
827
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
828
+ if parent != "." && parent != ".."
829
+ normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
830
+ end
831
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
832
+ if parent != "." && parent != ".."
833
+ normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
834
+ end
835
+ normalized_path.gsub!(/^\.\.?\/?/, "")
836
+ normalized_path.gsub!(/^\/\.\.?\//, "/")
837
+ end until previous_state == normalized_path
838
+ return normalized_path
839
+ end
840
+
841
+ # Normalizes percent escaping of characters
842
+ def self.normalize_escaping(escaped_section)
843
+ return nil if escaped_section.nil?
844
+ normalized_section = escaped_section.dup
845
+ normalized_section.gsub!(/%[0-9a-f]{2}/i) do |sequence|
846
+ sequence[1..3].to_i(16).chr
847
+ end
848
+ if ForgivingURI::IDNA.send(:use_libidn?)
849
+ normalized_section =
850
+ IDN::Stringprep.nfkc_normalize(normalized_section)
851
+ end
852
+ new_section = ""
853
+ for index in 0...normalized_section.size
854
+ if self.unreserved?(normalized_section[index]) ||
855
+ normalized_section[index] == '/'[0]
856
+ new_section << normalized_section[index..index]
857
+ else
858
+ new_section << ("%" + normalized_section[index].to_s(16).upcase)
859
+ end
860
+ end
861
+ normalized_section = new_section
862
+ return normalized_section
863
+ end
864
+
865
+ # Returns true if the specified character is unreserved.
866
+ def self.unreserved?(character)
867
+ character_string = nil
868
+ character_string = character.chr if character.respond_to?(:chr)
869
+ character_string = character[0..0] if character.kind_of?(String)
870
+ return self.unreserved.include?(character_string)
871
+ end
872
+
873
+ # Returns a list of unreserved characters.
874
+ def self.unreserved
875
+ if !defined?(@unreserved) || @unreserved.nil?
876
+ @unreserved = ["-", ".", "_", "~"]
877
+ for c in "a".."z"
878
+ @unreserved << c
879
+ @unreserved << c.upcase
880
+ end
881
+ for c in "0".."9"
882
+ @unreserved << c
883
+ end
884
+ @unreserved.sort!
885
+ end
886
+ return @unreserved
887
+ end
888
+
889
+ # Assigns the specified components to the appropriate instance variables.
890
+ # Used in destructive operations to avoid code repetition.
891
+ def assign_components(scheme, userinfo, host, port, path, query, fragment)
892
+ if scheme == nil && userinfo == nil && host == nil && port == nil &&
893
+ path == nil && query == nil && fragment == nil
894
+ raise InvalidURIError, "All parameters were nil."
895
+ end
896
+ @scheme = scheme
897
+ @userinfo = userinfo
898
+ @host = host
899
+ @specified_port = port.to_s
900
+ @port = port
901
+ @port = @port.to_s if @port.kind_of?(Fixnum)
902
+ if @port != nil && !(@port =~ /^\d+$/)
903
+ raise InvalidURIError,
904
+ "Invalid port number: #{@port.inspect}"
905
+ end
906
+ @port = @port.to_i
907
+ @port = nil if @port == 0
908
+ @path = path
909
+ @query = query
910
+ @fragment = fragment
911
+ if @scheme != nil && @host == "" && @path == ""
912
+ raise InvalidURIError,
913
+ "Absolute URI missing hierarchical segment."
914
+ end
915
+ end
916
+
917
+ # Replaces the internal state of self with the specified URI's state.
918
+ # Used in destructive operations to avoid massive code repetition.
919
+ def replace_self(uri)
920
+ @authority = nil
921
+ @user = nil
922
+ @password = nil
923
+
924
+ @scheme = uri.scheme
925
+ @userinfo = uri.userinfo
926
+ @host = uri.host
927
+ @specified_port = uri.instance_variable_get("@specified_port")
928
+ @port = @specified_port.to_s.to_i
929
+ @path = uri.path
930
+ @query = uri.query
931
+ @fragment = uri.fragment
932
+ return self
933
+ end
934
+ end
935
+
936
+ def urljoin(base, uri)
937
+ urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
938
+ uri = uri.sub(urifixer, '\1\3')
939
+ begin
940
+ return ForgivingURI.join(base, uri).to_s
941
+ rescue URI::BadURIError => e
942
+ if ForgivingURI.parse(base).relative?
943
+ return ForgivingURI.parse(uri).to_s
944
+ end
945
+ end
946
+ end
947
+