rfeedparser 0.9.87 → 0.9.91

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/ruby
2
+ module FeedParserUtilities
3
+ class FeedParserDict < Hash
4
+ =begin
5
+ The naming of a certain common attribute (such as, "When was the last
6
+ time this feed was updated?") can have many different names depending
7
+ on the type of feed we are handling. This class allows us to satisfy
8
+ the expectations of both the developer who has prior knowledge of the
9
+ feed type as well as the developer who wants a consistent application
10
+ interface.
11
+
12
+ @@keymap is a Hash that contains information on what a certain
13
+ attribute names "really are" in each kind of feed. It does this by
14
+ providing a common name that will map to any feed type in the keys,
15
+ with possible "correct" attributes in the its values. the #[] and #[]=
16
+ methods check with keymaps to see what attribute the developer "really
17
+ means" if they've asked for one which happens to be in @@keymap's keys.
18
+ =end
19
+ @@keymap = {'channel' => 'feed',
20
+ 'items' => 'entries',
21
+ 'guid' => 'id',
22
+ 'date' => 'updated',
23
+ 'date_parsed' => 'updated_parsed',
24
+ 'description' => ['subtitle', 'summary'],
25
+ 'url' => ['href'],
26
+ 'modified' => 'updated',
27
+ 'modified_parsed' => 'updated_parsed',
28
+ 'issued' => 'published',
29
+ 'issued_parsed' => 'published_parsed',
30
+ 'copyright' => 'rights',
31
+ 'copyright_detail' => 'rights_detail',
32
+ 'tagline' => 'subtitle',
33
+ 'tagline_detail' => 'subtitle_detail'}
34
+
35
+ def entries # Apparently, Hash has an entries method! That blew a good 3 hours or more of my time
36
+ return self['entries']
37
+ end
38
+
39
+ # We could include the [] rewrite in new using Hash.new's fancy pants block thing
40
+ # but we'd still have to overwrite []= and such.
41
+ # I'm going to make it easy to turn lists of pairs into FeedParserDicts's though.
42
+ def initialize(pairs=nil)
43
+ if pairs.class == Array and pairs[0].class == Array and pairs[0].length == 2
44
+ pairs.each do |l|
45
+ k,v = l
46
+ self[k] = v
47
+ end
48
+ elsif pairs.class == Hash
49
+ self.merge!(pairs)
50
+ end
51
+ end
52
+
53
+ def [](key)
54
+ if key == 'category'
55
+ return self['tags'][0]['term']
56
+ end
57
+ if key == 'categories'
58
+ return self['tags'].collect{|tag| [tag['scheme'],tag['term']]}
59
+ end
60
+ realkey = @@keymap[key] || key
61
+ if realkey.class == Array
62
+ realkey.each{ |key| return self[key] if has_key?key }
63
+ end
64
+ # Note that the original key is preferred over the realkey we (might
65
+ # have) found in @@keymap
66
+ if has_key?(key)
67
+ return super(key)
68
+ end
69
+ return super(realkey)
70
+ end
71
+
72
+ def []=(key,value)
73
+ if @@keymap.key?key
74
+ key = @@keymap[key]
75
+ if key.class == Array
76
+ key = key[0]
77
+ end
78
+ end
79
+ super(key,value)
80
+ end
81
+
82
+ def method_missing(msym, *args)
83
+ methodname = msym.to_s
84
+ if methodname[-1] == '='
85
+ return self[methodname[0..-2]] = args[0]
86
+ elsif methodname[-1] != '!' and methodname[-1] != '?' and methodname[0] != "_" # FIXME implement with private?
87
+ return self[methodname]
88
+ else
89
+ raise NoMethodError, "whoops, we don't know about the attribute or method called `#{methodname}' for #{self}:#{self.class}"
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,947 @@
1
+ #!/usr/bin/ruby
2
+ # From Robert Aman's GentleCMS URI.
3
+ # GentleCMS, Copyright (c) 2006 Robert Aman
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining
6
+ # a copy of this software and associated documentation files (the
7
+ # "Software"), to deal in the Software without restriction, including
8
+ # without limitation the rights to use, copy, modify, merge, publish,
9
+ # distribute, sublicense, and/or sell copies of the Software, and to
10
+ # permit persons to whom the Software is furnished to do so, subject to
11
+ # the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be
14
+ # included in all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ # This is an implementation of a URI parser based on RFC 3986.
25
+ class ForgivingURI
26
+ # Raised if something other than a uri is supplied.
27
+ class InvalidURIError < StandardError
28
+ end
29
+ # Raised if an invalid method option is supplied.
30
+ class InvalidOptionError < StandardError
31
+ end
32
+
33
+ # Returns a URI object based on the parsed string.
34
+ def self.parse(uri_string)
35
+ return nil if uri_string.nil?
36
+
37
+ # If a URI object is passed, just return itself.
38
+ return uri_string if uri_string.kind_of?(self)
39
+
40
+ # If a URI object of the Ruby standard library variety is passed,
41
+ # convert it to a string, then parse the string.
42
+ if uri_string.class.name =~ /^URI::/
43
+ uri_string = uri_string.to_s
44
+ end
45
+
46
+ uri_regex =
47
+ /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
48
+ scan = uri_string.scan(uri_regex)
49
+ fragments = scan[0]
50
+ return nil if fragments.nil?
51
+ scheme = fragments[1]
52
+ authority = fragments[3]
53
+ path = fragments[4]
54
+ query = fragments[6]
55
+ fragment = fragments[8]
56
+ userinfo = nil
57
+ host = nil
58
+ port = nil
59
+ if authority != nil
60
+ userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
61
+ host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
62
+ port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
63
+ end
64
+ if port.nil? || port == ""
65
+ port = nil
66
+ end
67
+
68
+ # WARNING: Not standards-compliant, but follows the theme
69
+ # of Postel's law:
70
+ #
71
+ # Special exception for dealing with the retarded idea of the
72
+ # feed pseudo-protocol. Without this exception, the parser will read
73
+ # the URI as having a blank port number, instead of as having a second
74
+ # URI embedded within. This exception translates these broken URIs
75
+ # and instead treats the inner URI as opaque.
76
+ if scheme == "feed" && host == "http"
77
+ userinfo = nil
78
+ host = nil
79
+ port = nil
80
+ path = authority + path
81
+ end
82
+
83
+ return ForgivingURI.new(scheme, userinfo, host, port, path, query, fragment)
84
+ end
85
+
86
+ # Converts a path to a file protocol URI. If the path supplied is
87
+ # relative, it will be returned as a relative URI. If the path supplied
88
+ # is actually a URI, it will return the parsed URI.
89
+ def self.convert_path(path)
90
+ return nil if path.nil?
91
+
92
+ converted_uri = path.strip
93
+ if converted_uri.length > 0 && converted_uri[0..0] == "/"
94
+ converted_uri = "file://" + converted_uri
95
+ end
96
+ if converted_uri.length > 0 &&
97
+ converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
98
+ converted_uri = "file:///" + converted_uri
99
+ end
100
+ converted_uri.gsub!(/^file:\/*/i, "file:///")
101
+ if converted_uri =~ /^file:/i
102
+ # Adjust windows-style uris
103
+ converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
104
+ converted_uri.gsub!(/\\/, '/')
105
+ converted_uri = self.parse(converted_uri).normalize
106
+ if File.exists?(converted_uri.path) &&
107
+ File.stat(converted_uri.path).directory?
108
+ converted_uri.path.gsub!(/\/$/, "")
109
+ converted_uri.path = converted_uri.path + '/'
110
+ end
111
+ else
112
+ converted_uri = self.parse(converted_uri)
113
+ end
114
+
115
+ return converted_uri
116
+ end
117
+
118
+ # Joins several uris together.
119
+ def self.join(*uris)
120
+ uri_objects = uris.collect do |uri|
121
+ uri.kind_of?(self) ? uri : self.parse(uri.to_s)
122
+ end
123
+ result = uri_objects.shift.dup
124
+ for uri in uri_objects
125
+ result.merge!(uri)
126
+ end
127
+ return result
128
+ end
129
+
130
+ # Correctly escapes a uri.
131
+ def self.escape(uri)
132
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
133
+ return ForgivingURI.new(
134
+ uri_object.scheme,
135
+ uri_object.userinfo,
136
+ uri_object.host,
137
+ uri_object.specified_port,
138
+ self.normalize_escaping(uri_object.path),
139
+ self.normalize_escaping(uri_object.query),
140
+ self.normalize_escaping(uri_object.fragment)
141
+ ).to_s
142
+ end
143
+
144
+ # Extracts uris from an arbitrary body of text.
145
+ def self.extract(text, options={})
146
+ defaults = {:base => nil, :parse => false}
147
+ options = defaults.merge(options)
148
+ raise InvalidOptionError unless (options.keys - defaults.keys).empty?
149
+ # This regular expression needs to be less forgiving or else it would
150
+ # match virtually all text. Which isn't exactly what we're going for.
151
+ extract_regex = /((([a-z\+]+):)[^ \n\<\>\"\\]+[\w\/])/
152
+ extracted_uris =
153
+ text.scan(extract_regex).collect { |match| match[0] }
154
+ sgml_extract_regex = /<[^>]+href=\"([^\"]+?)\"[^>]*>/
155
+ sgml_extracted_uris =
156
+ text.scan(sgml_extract_regex).collect { |match| match[0] }
157
+ extracted_uris.concat(sgml_extracted_uris - extracted_uris)
158
+ textile_extract_regex = /\".+?\":([^ ]+\/[^ ]+)[ \,\.\;\:\?\!\<\>\"]/i
159
+ textile_extracted_uris =
160
+ text.scan(textile_extract_regex).collect { |match| match[0] }
161
+ extracted_uris.concat(textile_extracted_uris - extracted_uris)
162
+ parsed_uris = []
163
+ base_uri = nil
164
+ if options[:base] != nil
165
+ base_uri = options[:base] if options[:base].kind_of?(self)
166
+ base_uri = self.parse(options[:base].to_s) if base_uri == nil
167
+ end
168
+ for uri_string in extracted_uris
169
+ begin
170
+ if base_uri == nil
171
+ parsed_uris << self.parse(uri_string)
172
+ else
173
+ parsed_uris << (base_uri + self.parse(uri_string))
174
+ end
175
+ rescue Exception
176
+ nil
177
+ end
178
+ end
179
+ parsed_uris.reject! do |uri|
180
+ (uri.scheme =~ /T\d+/ ||
181
+ uri.scheme == "xmlns" ||
182
+ uri.scheme == "xml" ||
183
+ uri.scheme == "thr" ||
184
+ uri.scheme == "this" ||
185
+ uri.scheme == "float" ||
186
+ uri.scheme == "user" ||
187
+ uri.scheme == "username" ||
188
+ uri.scheme == "out")
189
+ end
190
+ if options[:parse]
191
+ return parsed_uris
192
+ else
193
+ return parsed_uris.collect { |uri| uri.to_s }
194
+ end
195
+ end
196
+
197
+ # Creates a new uri object from component parts. Passing nil for
198
+ # any of these parameters is acceptable.
199
+ def initialize(scheme, userinfo, host, port, path, query, fragment)
200
+ assign_components(scheme, userinfo, host, port, path, query, fragment)
201
+ end
202
+
203
+ # Returns the scheme (protocol) for this URI.
204
+ def scheme
205
+ return nil if @scheme.nil? || @scheme.strip == ""
206
+ return @scheme
207
+ end
208
+
209
+ # Sets the scheme (protocol for this URI.)
210
+ def scheme=(new_scheme)
211
+ @scheme = new_scheme
212
+ end
213
+
214
+ # Returns the username and password segment of this URI.
215
+ def userinfo
216
+ return @userinfo
217
+ end
218
+
219
+ # Sets the username and password segment of this URI.
220
+ def userinfo=(new_userinfo)
221
+ @userinfo = new_userinfo
222
+ @authority = nil
223
+ end
224
+
225
+ # Returns the host for this URI.
226
+ def host
227
+ return @host
228
+ end
229
+
230
+ # Sets the host for this URI.
231
+ def host=(new_host)
232
+ @host = new_host
233
+ @authority = nil
234
+ end
235
+
236
+ # Returns the authority segment of this URI.
237
+ def authority
238
+ if !defined?(@authority) || @authority.nil?
239
+ return nil if self.host.nil?
240
+ @authority = ""
241
+ if self.userinfo != nil
242
+ @authority << "#{self.userinfo}@"
243
+ end
244
+ @authority << self.host
245
+ if self.specified_port != nil
246
+ @authority << ":#{self.specified_port}"
247
+ end
248
+ end
249
+ return @authority
250
+ end
251
+
252
+ # Sets the authority segment of this URI.
253
+ def authority=(new_authority)
254
+ @authority = new_authority
255
+ new_userinfo = nil
256
+ new_host = nil
257
+ new_port = nil
258
+ if new_authority != nil
259
+ new_userinfo = new_authority.scan(/^([^\[\]]*)@/).flatten[0]
260
+ new_host =
261
+ new_authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
262
+ new_port = new_authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
263
+ end
264
+ if new_port.nil? || new_port == ""
265
+ new_port = nil
266
+ end
267
+ @userinfo = new_userinfo
268
+ @host = new_host
269
+ @port = nil
270
+ @specified_port = new_port
271
+ end
272
+
273
+ # Returns the user for this URI.
274
+ def user
275
+ if !defined?(@user) || @user.nil?
276
+ @user = nil
277
+ return @user if @userinfo.nil?
278
+ if @userinfo =~ /:/
279
+ @user = @userinfo.strip.scan(/^(.*):/).flatten[0].strip
280
+ else
281
+ @user = @userinfo.dup
282
+ end
283
+ end
284
+ return @user
285
+ end
286
+
287
+ # Sets the user for this URI.
288
+ def user=(new_user)
289
+ current_password = self.password
290
+ @user = new_user
291
+ if new_user == nil && current_password == nil
292
+ @userinfo = nil
293
+ elsif new_user == nil && current_password != nil
294
+ @userinfo = ":#{current_password}"
295
+ elsif new_user != nil && current_password == nil
296
+ @userinfo = "#{new_user}"
297
+ elsif new_user != nil && current_password != nil
298
+ @userinfo = "#{new_user}:#{current_password}"
299
+ end
300
+ @user = nil
301
+ @password = nil
302
+ @authority = nil
303
+ end
304
+
305
+ # Returns the password for this URI.
306
+ def password
307
+ if !defined?(@password) || @password.nil?
308
+ @password = nil
309
+ return @password if @userinfo.nil?
310
+ if @userinfo =~ /:/
311
+ @password = @userinfo.strip.scan(/:(.*)$/).flatten[0].strip
312
+ else
313
+ return nil
314
+ end
315
+ end
316
+ return @password
317
+ end
318
+
319
+ # Sets the password for this URI.
320
+ def password=(new_password)
321
+ current_user = self.user
322
+ @password = new_password
323
+ if current_user == nil && new_password == nil
324
+ @userinfo = nil
325
+ elsif current_user == nil && new_password != nil
326
+ @userinfo = ":#{new_password}"
327
+ elsif current_user != nil && new_password == nil
328
+ @userinfo = "#{current_user}"
329
+ elsif current_user != nil && new_password != nil
330
+ @userinfo = "#{current_user}:#{new_password}"
331
+ end
332
+ @user = nil
333
+ @authority = nil
334
+ end
335
+
336
+ # Returns an array of known ip-based schemes. These schemes typically
337
+ # use a similar URI form:
338
+ # //<user>:<password>@<host>:<port>/<url-path>
339
+ def self.ip_based_schemes
340
+ return self.scheme_mapping.keys
341
+ end
342
+
343
+ # Returns a hash of common IP-based schemes and their default port
344
+ # numbers. Adding new schemes to this hash, as necessary, will allow
345
+ # for better URI normalization.
346
+ def self.scheme_mapping
347
+ if !defined?(@protocol_mapping) || @protocol_mapping.nil?
348
+ @protocol_mapping = {
349
+ "http" => 80,
350
+ "https" => 443,
351
+ "ftp" => 21,
352
+ "tftp" => 69,
353
+ "ssh" => 22,
354
+ "svn+ssh" => 22,
355
+ "telnet" => 23,
356
+ "nntp" => 119,
357
+ "gopher" => 70,
358
+ "wais" => 210,
359
+ "prospero" => 1525
360
+ }
361
+ end
362
+ return @protocol_mapping
363
+ end
364
+
365
+ # Returns the port number for this URI. This method will normalize to the
366
+ # default port for the URI's scheme if the port isn't explicitly specified
367
+ # in the URI.
368
+ def port
369
+ if @port.to_i == 0
370
+ if self.scheme.nil?
371
+ @port = nil
372
+ else
373
+ @port = self.class.scheme_mapping[self.scheme.strip.downcase]
374
+ end
375
+ return @port
376
+ else
377
+ @port = @port.to_i
378
+ return @port
379
+ end
380
+ end
381
+
382
+ # Sets the port for this URI.
383
+ def port=(new_port)
384
+ @port = new_port.to_s.to_i
385
+ @specified_port = @port
386
+ @authority = nil
387
+ end
388
+
389
+ # Returns the port number that was actually specified in the URI string.
390
+ def specified_port
391
+ @specified_port = nil if !defined?(@specified_port)
392
+ return nil if @specified_port.nil?
393
+ port = @specified_port.to_s.to_i
394
+ if port == 0
395
+ return nil
396
+ else
397
+ return port
398
+ end
399
+ end
400
+
401
+ # Returns the path for this URI.
402
+ def path
403
+ return @path
404
+ end
405
+
406
+ # Sets the path for this URI.
407
+ def path=(new_path)
408
+ @path = new_path
409
+ end
410
+
411
+ # Returns the basename, if any, of the file at the path being referenced.
412
+ # Returns nil if there is no path component.
413
+ def basename
414
+ return nil if self.path == nil
415
+ return File.basename(self.path).gsub(/;[^\/]*$/, "")
416
+ end
417
+
418
+ # Returns the extension, if any, of the file at the path being referenced.
419
+ # Returns "" if there is no extension or nil if there is no path
420
+ # component.
421
+ def extname
422
+ return nil if self.path == nil
423
+ return File.extname(self.basename.gsub(/;[^\/]*$/, ""))
424
+ end
425
+
426
+ # Returns the query string for this URI.
427
+ def query
428
+ return @query
429
+ end
430
+
431
+ # Sets the query string for this URI.
432
+ def query=(new_query)
433
+ @query = new_query
434
+ end
435
+
436
+ # Returns the fragment for this URI.
437
+ def fragment
438
+ return @fragment
439
+ end
440
+
441
+ # Sets the fragment for this URI.
442
+ def fragment=(new_fragment)
443
+ @fragment = new_fragment
444
+ end
445
+
446
+ # Returns true if the URI uses an IP-based protocol.
447
+ def ip_based?
448
+ return false if self.scheme.nil?
449
+ return self.class.ip_based_schemes.include?(self.scheme.strip.downcase)
450
+ end
451
+
452
+ # Returns true if this URI is known to be relative.
453
+ def relative?
454
+ return self.scheme.nil?
455
+ end
456
+
457
+ # Returns true if this URI is known to be absolute.
458
+ def absolute?
459
+ return !relative?
460
+ end
461
+
462
+ # Joins two URIs together.
463
+ def +(uri)
464
+ if !uri.kind_of?(self.class)
465
+ uri = ForgivingURI.parse(uri.to_s)
466
+ end
467
+ if uri.to_s == ""
468
+ return self.dup
469
+ end
470
+
471
+ joined_scheme = nil
472
+ joined_userinfo = nil
473
+ joined_host = nil
474
+ joined_port = nil
475
+ joined_path = nil
476
+ joined_query = nil
477
+ joined_fragment = nil
478
+
479
+ # Section 5.2.2 of RFC 3986
480
+ if uri.scheme != nil
481
+ joined_scheme = uri.scheme
482
+ joined_userinfo = uri.userinfo
483
+ joined_host = uri.host
484
+ joined_port = uri.specified_port
485
+ joined_path = self.class.normalize_path(uri.path)
486
+ joined_query = uri.query
487
+ else
488
+ if uri.authority != nil
489
+ joined_userinfo = uri.userinfo
490
+ joined_host = uri.host
491
+ joined_port = uri.specified_port
492
+ joined_path = self.class.normalize_path(uri.path)
493
+ joined_query = uri.query
494
+ else
495
+ if uri.path == nil || uri.path == ""
496
+ joined_path = self.path
497
+ if uri.query != nil
498
+ joined_query = uri.query
499
+ else
500
+ joined_query = self.query
501
+ end
502
+ else
503
+ if uri.path[0..0] == "/"
504
+ joined_path = self.class.normalize_path(uri.path)
505
+ else
506
+ base_path = self.path.nil? ? "" : self.path.dup
507
+ base_path = self.class.normalize_path(base_path)
508
+ base_path.gsub!(/\/[^\/]+$/, "/")
509
+ joined_path = self.class.normalize_path(base_path + uri.path)
510
+ end
511
+ joined_query = uri.query
512
+ end
513
+ joined_userinfo = self.userinfo
514
+ joined_host = self.host
515
+ joined_port = self.specified_port
516
+ end
517
+ joined_scheme = self.scheme
518
+ end
519
+ joined_fragment = uri.fragment
520
+
521
+ return ForgivingURI.new(
522
+ joined_scheme,
523
+ joined_userinfo,
524
+ joined_host,
525
+ joined_port,
526
+ joined_path,
527
+ joined_query,
528
+ joined_fragment
529
+ )
530
+ end
531
+
532
+ # Merges two URIs together.
533
+ def merge(uri)
534
+ return self + uri
535
+ end
536
+
537
+ # Destructive form of merge.
538
+ def merge!(uri)
539
+ replace_self(self.merge(uri))
540
+ end
541
+
542
+ # Returns a normalized URI object.
543
+ #
544
+ # NOTE: This method does not attempt to conform to specifications. It
545
+ # exists largely to correct other people's failures to read the
546
+ # specifications, and also to deal with caching issues since several
547
+ # different URIs may represent the same resource and should not be
548
+ # cached multiple times.
549
+ def normalize
550
+ normalized_scheme = nil
551
+ normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
552
+ normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
553
+ if normalized_scheme == "feed"
554
+ if self.to_s =~ /^feed:\/*http:\/*/
555
+ return self.class.parse(
556
+ self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
557
+ end
558
+ end
559
+ normalized_userinfo = nil
560
+ normalized_userinfo = self.userinfo.strip if self.userinfo != nil
561
+ normalized_host = nil
562
+ normalized_host = self.host.strip.downcase if self.host != nil
563
+ if normalized_host != nil
564
+ begin
565
+ normalized_host = ForgivingURI::IDNA.to_ascii(normalized_host)
566
+ rescue Exception
567
+ nil
568
+ end
569
+ end
570
+
571
+ # Normalize IPv4 addresses that were generated with the stupid
572
+ # assumption that inet_addr() would be used to parse the IP address.
573
+ if normalized_host != nil && normalized_host.strip =~ /^\d+$/
574
+ # Decimal IPv4 address.
575
+ decimal = normalized_host.to_i
576
+ if decimal < (256 ** 4)
577
+ octets = [0,0,0,0]
578
+ octets[0] = decimal >> 24
579
+ decimal -= (octets[0] * (256 ** 3))
580
+ octets[1] = decimal >> 16
581
+ decimal -= (octets[1] * (256 ** 2))
582
+ octets[2] = decimal >> 8
583
+ decimal -= (octets[2] * (256 ** 1))
584
+ octets[3] = decimal
585
+ normalized_host = octets.join(".")
586
+ end
587
+ elsif (normalized_host != nil && normalized_host.strip =~
588
+ /^0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}$/)
589
+ # Octal IPv4 address.
590
+ octet_strings = normalized_host.split('.')
591
+ octets = []
592
+ octet_strings.each do |octet_string|
593
+ decimal = octet_string.to_i(8)
594
+ octets << decimal
595
+ end
596
+ normalized_host = octets.join(".")
597
+ elsif (normalized_host != nil && normalized_host.strip =~
598
+ /^0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}$/i)
599
+ # Hexidecimal IPv4 address.
600
+ octet_strings = normalized_host.split('.')
601
+ octets = []
602
+ octet_strings.each do |octet_string|
603
+ decimal = octet_string[2...4].to_i(16)
604
+ octets << decimal
605
+ end
606
+ normalized_host = octets.join(".")
607
+ end
608
+ normalized_port = self.port
609
+ if self.class.scheme_mapping[normalized_scheme] == normalized_port
610
+ normalized_port = nil
611
+ end
612
+ normalized_path = nil
613
+ normalized_path = self.path.strip if self.path != nil
614
+ if normalized_scheme != nil && normalized_host == nil
615
+ if self.class.ip_based_schemes.include?(normalized_scheme) &&
616
+ normalized_path =~ /[\w\.]+/
617
+ normalized_host = normalized_path
618
+ normalized_path = nil
619
+ unless normalized_host =~ /\./
620
+ normalized_host = normalized_host + ".com"
621
+ end
622
+ end
623
+ end
624
+ if normalized_path == nil &&
625
+ normalized_scheme != nil &&
626
+ normalized_host != nil
627
+ normalized_path = "/"
628
+ end
629
+ if normalized_path != nil
630
+ normalized_path = self.class.normalize_path(normalized_path)
631
+ normalized_path = self.class.normalize_escaping(normalized_path)
632
+ end
633
+ if normalized_path == ""
634
+ if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
635
+ normalized_path = "/"
636
+ end
637
+ end
638
+ normalized_path.gsub!(/%3B/, ";") if normalized_path != nil
639
+ normalized_path.gsub!(/%3A/, ":") if normalized_path != nil
640
+ normalized_path.gsub!(/%40/, "@") if normalized_path != nil
641
+ normalized_path.gsub!(/%2C/, ",") if normalized_path != nil
642
+ normalized_path.gsub!(/%3D/, "=") if normalized_path != nil
643
+
644
+ normalized_query = nil
645
+ normalized_query = self.query.strip if self.query != nil
646
+ normalized_query = self.class.normalize_escaping(normalized_query)
647
+ normalized_query.gsub!(/%3D/, "=") if normalized_query != nil
648
+ normalized_query.gsub!(/%26/, "&") if normalized_query != nil
649
+ normalized_fragment = nil
650
+ normalized_fragment = self.fragment.strip if self.fragment != nil
651
+ normalized_fragment = self.class.normalize_escaping(normalized_fragment)
652
+ return ForgivingURI.new(
653
+ normalized_scheme,
654
+ normalized_userinfo,
655
+ normalized_host,
656
+ normalized_port,
657
+ normalized_path,
658
+ normalized_query,
659
+ normalized_fragment
660
+ )
661
+ end
662
+
663
+ # Destructively normalizes this URI object.
664
+ def normalize!
665
+ replace_self(self.normalize)
666
+ end
667
+
668
+ # Creates a URI suitable for display to users. If semantic attacks are
669
+ # likely, the application should try to detect these and warn the user.
670
+ # See RFC 3986 section 7.6 for more information.
671
+ def display_uri
672
+ display_uri = self.normalize
673
+ begin
674
+ display_uri.instance_variable_set("@host",
675
+ ForgivingURI::IDNA.to_unicode(display_uri.host))
676
+ rescue Exception
677
+ nil
678
+ end
679
+ return display_uri
680
+ end
681
+
682
+ # Returns true if the URI objects are equal. This method normalizes
683
+ # both URIs before doing the comparison, and allows comparison against
684
+ # strings.
685
+ def ===(uri)
686
+ uri_string = nil
687
+ if uri.respond_to?(:normalize)
688
+ uri_string = uri.normalize.to_s
689
+ else
690
+ begin
691
+ uri_string = ForgivingURI.parse(uri.to_s).normalize.to_s
692
+ rescue Exception
693
+ return false
694
+ end
695
+ end
696
+ return self.normalize.to_s == uri_string
697
+ end
698
+
699
+ # Returns true if the URI objects are equal. This method normalizes
700
+ # both URIs before doing the comparison.
701
+ def ==(uri)
702
+ return false unless uri.kind_of?(self.class)
703
+ return self.normalize.to_s == uri.normalize.to_s
704
+ end
705
+
706
+ # Returns true if the URI objects are equal. This method does NOT
707
+ # normalize either URI before doing the comparison.
708
+ def eql?(uri)
709
+ return false unless uri.kind_of?(self.class)
710
+ return self.to_s == uri.to_s
711
+ end
712
+
713
+ # Clones the URI object.
714
+ def dup
715
+ duplicated_scheme = nil
716
+ duplicated_scheme = self.scheme.dup if self.scheme != nil
717
+ duplicated_userinfo = nil
718
+ duplicated_userinfo = self.userinfo.dup if self.userinfo != nil
719
+ duplicated_host = nil
720
+ duplicated_host = self.host.dup if self.host != nil
721
+ duplicated_port = self.port
722
+ duplicated_path = nil
723
+ duplicated_path = self.path.dup if self.path != nil
724
+ duplicated_query = nil
725
+ duplicated_query = self.query.dup if self.query != nil
726
+ duplicated_fragment = nil
727
+ duplicated_fragment = self.fragment.dup if self.fragment != nil
728
+ duplicated_uri = ForgivingURI.new(
729
+ duplicated_scheme,
730
+ duplicated_userinfo,
731
+ duplicated_host,
732
+ duplicated_port,
733
+ duplicated_path,
734
+ duplicated_query,
735
+ duplicated_fragment
736
+ )
737
+ @specified_port = nil if !defined?(@specified_port)
738
+ duplicated_uri.instance_variable_set("@specified_port", @specified_port)
739
+ return duplicated_uri
740
+ end
741
+
742
+ # Returns the assembled URI as a string.
743
+ def to_s
744
+ uri_string = ""
745
+ if self.scheme != nil
746
+ uri_string << "#{self.scheme}:"
747
+ end
748
+ if self.authority != nil
749
+ uri_string << "//#{self.authority}"
750
+ end
751
+ if self.path != nil
752
+ uri_string << self.path
753
+ end
754
+ if self.query != nil
755
+ uri_string << "?#{self.query}"
756
+ end
757
+ if self.fragment != nil
758
+ uri_string << "##{self.fragment}"
759
+ end
760
+ return uri_string
761
+ end
762
+
763
+ # Returns a string representation of the URI object's state.
764
+ def inspect
765
+ sprintf("#<%s:%#0x URI:%s>", self.class.to_s, self.object_id, self.to_s)
766
+ end
767
+
768
+ # This module handles internationalized domain names. When Ruby has an
769
+ # implementation of nameprep, stringprep, punycode, etc, this
770
+ # module should contain an actual implementation of IDNA instead of
771
+ # returning nil if libidn can't be used.
772
+ module IDNA
773
+ # Returns the ascii representation of the label.
774
+ def self.to_ascii(label)
775
+ return nil if label.nil?
776
+ if self.use_libidn?
777
+ return IDN::Idna.toASCII(label)
778
+ else
779
+ raise NotImplementedError,
780
+ "There is no available pure-ruby implementation. " +
781
+ "Install libidn bindings."
782
+ end
783
+ end
784
+
785
+ # Returns the unicode representation of the label.
786
+ def self.to_unicode(label)
787
+ return nil if label.nil?
788
+ if self.use_libidn?
789
+ return IDN::Idna.toUnicode(label)
790
+ else
791
+ raise NotImplementedError,
792
+ "There is no available pure-ruby implementation. " +
793
+ "Install libidn bindings."
794
+ end
795
+ end
796
+
797
+ private
798
+ # Determines if the libidn bindings are available and able to be used.
799
+ def self.use_libidn?
800
+ if !defined?(@use_libidn) || @use_libidn.nil?
801
+ begin
802
+ require 'rubygems'
803
+ rescue LoadError
804
+ nil
805
+ end
806
+ begin
807
+ require 'idn'
808
+ rescue LoadError
809
+ nil
810
+ end
811
+ @use_libidn = !!(defined?(IDN::Idna))
812
+ end
813
+ return @use_libidn
814
+ end
815
+ end
816
+
817
+ private
818
+ # Resolves paths to their simplest form.
819
+ def self.normalize_path(path)
820
+ return nil if path.nil?
821
+ normalized_path = path.dup
822
+ previous_state = normalized_path.dup
823
+ begin
824
+ previous_state = normalized_path.dup
825
+ normalized_path.gsub!(/\/\.\//, "/")
826
+ normalized_path.gsub!(/\/\.$/, "/")
827
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
828
+ if parent != "." && parent != ".."
829
+ normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
830
+ end
831
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
832
+ if parent != "." && parent != ".."
833
+ normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
834
+ end
835
+ normalized_path.gsub!(/^\.\.?\/?/, "")
836
+ normalized_path.gsub!(/^\/\.\.?\//, "/")
837
+ end until previous_state == normalized_path
838
+ return normalized_path
839
+ end
840
+
841
+ # Normalizes percent escaping of characters
842
+ def self.normalize_escaping(escaped_section)
843
+ return nil if escaped_section.nil?
844
+ normalized_section = escaped_section.dup
845
+ normalized_section.gsub!(/%[0-9a-f]{2}/i) do |sequence|
846
+ sequence[1..3].to_i(16).chr
847
+ end
848
+ if ForgivingURI::IDNA.send(:use_libidn?)
849
+ normalized_section =
850
+ IDN::Stringprep.nfkc_normalize(normalized_section)
851
+ end
852
+ new_section = ""
853
+ for index in 0...normalized_section.size
854
+ if self.unreserved?(normalized_section[index]) ||
855
+ normalized_section[index] == '/'[0]
856
+ new_section << normalized_section[index..index]
857
+ else
858
+ new_section << ("%" + normalized_section[index].to_s(16).upcase)
859
+ end
860
+ end
861
+ normalized_section = new_section
862
+ return normalized_section
863
+ end
864
+
865
+ # Returns true if the specified character is unreserved.
866
+ def self.unreserved?(character)
867
+ character_string = nil
868
+ character_string = character.chr if character.respond_to?(:chr)
869
+ character_string = character[0..0] if character.kind_of?(String)
870
+ return self.unreserved.include?(character_string)
871
+ end
872
+
873
+ # Returns a list of unreserved characters.
874
+ def self.unreserved
875
+ if !defined?(@unreserved) || @unreserved.nil?
876
+ @unreserved = ["-", ".", "_", "~"]
877
+ for c in "a".."z"
878
+ @unreserved << c
879
+ @unreserved << c.upcase
880
+ end
881
+ for c in "0".."9"
882
+ @unreserved << c
883
+ end
884
+ @unreserved.sort!
885
+ end
886
+ return @unreserved
887
+ end
888
+
889
+ # Assigns the specified components to the appropriate instance variables.
890
+ # Used in destructive operations to avoid code repetition.
891
+ def assign_components(scheme, userinfo, host, port, path, query, fragment)
892
+ if scheme == nil && userinfo == nil && host == nil && port == nil &&
893
+ path == nil && query == nil && fragment == nil
894
+ raise InvalidURIError, "All parameters were nil."
895
+ end
896
+ @scheme = scheme
897
+ @userinfo = userinfo
898
+ @host = host
899
+ @specified_port = port.to_s
900
+ @port = port
901
+ @port = @port.to_s if @port.kind_of?(Fixnum)
902
+ if @port != nil && !(@port =~ /^\d+$/)
903
+ raise InvalidURIError,
904
+ "Invalid port number: #{@port.inspect}"
905
+ end
906
+ @port = @port.to_i
907
+ @port = nil if @port == 0
908
+ @path = path
909
+ @query = query
910
+ @fragment = fragment
911
+ if @scheme != nil && @host == "" && @path == ""
912
+ raise InvalidURIError,
913
+ "Absolute URI missing hierarchical segment."
914
+ end
915
+ end
916
+
917
+ # Replaces the internal state of self with the specified URI's state.
918
+ # Used in destructive operations to avoid massive code repetition.
919
+ def replace_self(uri)
920
+ @authority = nil
921
+ @user = nil
922
+ @password = nil
923
+
924
+ @scheme = uri.scheme
925
+ @userinfo = uri.userinfo
926
+ @host = uri.host
927
+ @specified_port = uri.instance_variable_get("@specified_port")
928
+ @port = @specified_port.to_s.to_i
929
+ @path = uri.path
930
+ @query = uri.query
931
+ @fragment = uri.fragment
932
+ return self
933
+ end
934
+ end
935
+
936
+ def urljoin(base, uri)
937
+ urifixer = /^([A-Za-z][A-Za-z0-9+-.]*:\/\/)(\/*)(.*?)/u
938
+ uri = uri.sub(urifixer, '\1\3')
939
+ begin
940
+ return ForgivingURI.join(base, uri).to_s
941
+ rescue URI::BadURIError => e
942
+ if ForgivingURI.parse(base).relative?
943
+ return ForgivingURI.parse(uri).to_s
944
+ end
945
+ end
946
+ end
947
+