feedtools 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,135 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ require 'html5/html5parser'
4
+ require 'html5/treewalkers'
5
+ require 'html5/treebuilders'
6
+
7
+ $tree_types_to_test = {
8
+ 'simpletree' =>
9
+ {:builder => HTML5::TreeBuilders['simpletree'],
10
+ :walker => HTML5::TreeWalkers['simpletree']},
11
+ 'rexml' =>
12
+ {:builder => HTML5::TreeBuilders['rexml'],
13
+ :walker => HTML5::TreeWalkers['rexml']},
14
+ 'hpricot' =>
15
+ {:builder => HTML5::TreeBuilders['hpricot'],
16
+ :walker => HTML5::TreeWalkers['hpricot']},
17
+ }
18
+
19
+ puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
20
+
21
+ class TestTreeWalkers < Test::Unit::TestCase
22
+ include HTML5::TestSupport
23
+
24
+ def concatenateCharacterTokens(tokens)
25
+ charactersToken = nil
26
+ for token in tokens
27
+ type = token[:type]
28
+ if [:Characters, :SpaceCharacters].include?(type)
29
+ if charactersToken == nil
30
+ charactersToken = {:type => :Characters, :data => token[:data]}
31
+ else
32
+ charactersToken[:data] += token[:data]
33
+ end
34
+ else
35
+ if charactersToken != nil
36
+ yield charactersToken
37
+ charactersToken = nil
38
+ end
39
+ yield token
40
+ end
41
+ end
42
+ yield charactersToken if charactersToken != nil
43
+ end
44
+
45
+ def convertTokens(tokens)
46
+ output = []
47
+ indent = 0
48
+ concatenateCharacterTokens(tokens) do |token|
49
+ case token[:type]
50
+ when :StartTag, :EmptyTag
51
+ output << "#{' '*indent}<#{token[:name]}>"
52
+ indent += 2
53
+ for name, value in token[:data].to_a.sort
54
+ next if name=='xmlns'
55
+ output << "#{' '*indent}#{name}=\"#{value}\""
56
+ end
57
+ indent -= 2 if token[:type] == :EmptyTag
58
+ when :EndTag
59
+ indent -= 2
60
+ when :Comment
61
+ output << "#{' '*indent}<!-- #{token[:data]} -->"
62
+ when :Doctype
63
+ if token[:name] and token[:name].any?
64
+ output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
65
+ else
66
+ output << "#{' '*indent}<!DOCTYPE >"
67
+ end
68
+ when :Characters, :SpaceCharacters
69
+ output << "#{' '*indent}\"#{token[:data]}\""
70
+ end
71
+ end
72
+ output.join("\n")
73
+ end
74
+
75
+ html5_test_files('tree-construction').each do |test_file|
76
+
77
+ test_name = File.basename(test_file).sub('.dat', '')
78
+ next if test_name == 'tests5' # TODO
79
+
80
+ TestData.new(test_file, %w(data errors document-fragment document)).
81
+ each_with_index do |(input, errors, inner_html, expected), index|
82
+
83
+ expected = expected.gsub("\n| ","\n")[2..-1]
84
+
85
+ $tree_types_to_test.each do |tree_name, tree_class|
86
+
87
+ define_method "test_#{test_name}_#{index}_#{tree_name}" do
88
+
89
+ parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
90
+
91
+ if inner_html
92
+ parser.parse_fragment(input, inner_html)
93
+ else
94
+ parser.parse(input)
95
+ end
96
+
97
+ document = parser.tree.get_document
98
+
99
+ begin
100
+ output = sortattrs(convertTokens(tree_class[:walker].new(document)))
101
+ expected = sortattrs(expected)
102
+ assert_equal expected, output, [
103
+ '', 'Input:', input,
104
+ '', 'Expected:', expected,
105
+ '', 'Recieved:', output
106
+ ].join("\n")
107
+ rescue NotImplementedError
108
+ # Amnesty for those that confess...
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ def test_all_tokens
116
+ expected = [
117
+ {:data => [], :type => :StartTag, :name => 'html'},
118
+ {:data => [], :type => :StartTag, :name => 'head'},
119
+ {:data => [], :type => :EndTag, :name => 'head'},
120
+ {:data => [], :type => :StartTag, :name => 'body'},
121
+ {:data => [], :type => :EndTag, :name => 'body'},
122
+ {:data => [], :type => :EndTag, :name => 'html'}]
123
+ for treeName, tree_class in $tree_types_to_test
124
+ p = HTML5::HTMLParser.new(:tree => tree_class[:builder])
125
+ document = p.parse("<html></html>")
126
+ # document = tree_class.get(:adapter)(document)
127
+ output = tree_class[:walker].new(document)
128
+ expected.zip(output) do |expected_token, output_token|
129
+ assert_equal(expected_token, output_token)
130
+ end
131
+ end
132
+ end
133
+
134
+
135
+ end
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby -wKU
2
+
3
+ require File.join(File.dirname(__FILE__), 'preamble')
4
+
5
+ require 'html5'
6
+ require 'html5/filters/validator'
7
+
8
+ class TestValidator < Test::Unit::TestCase
9
+ def run_validator_test(test)
10
+ p = HTML5::HTMLParser.new(:tokenizer => HTMLConformanceChecker)
11
+ p.parse(test['input'])
12
+ errorCodes = p.errors.collect{|e| e[1]}
13
+ if test.has_key?('fail-if')
14
+ assert !errorCodes.include?(test['fail-if'])
15
+ end
16
+ if test.has_key?('fail-unless')
17
+ assert errorCodes.include?(test['fail-unless'])
18
+ end
19
+ end
20
+
21
+ for filename in html5_test_files('validator')
22
+ tests = JSON.load(open(filename))
23
+ testName = File.basename(filename).sub(".test", "")
24
+ tests['tests'].each_with_index do |test, index|
25
+ define_method "test_#{testName}_#{index}" do
26
+ run_validator_test(test)
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,63 @@
1
+ require 'html5/constants'
2
+
3
+ class TokenizerTestParser
4
+ def initialize(tokenizer)
5
+ @tokenizer = tokenizer
6
+ end
7
+
8
+ def parse
9
+ @outputTokens = []
10
+
11
+ debug = nil
12
+ for token in @tokenizer
13
+ debug = token.inspect if token[:type] == :ParseError
14
+ send(('process' + token[:type].to_s), token)
15
+ end
16
+
17
+ return @outputTokens
18
+ end
19
+
20
+ def processDoctype(token)
21
+ @outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
22
+ token[:systemId], token[:correct]])
23
+ end
24
+
25
+ def processStartTag(token)
26
+ @outputTokens.push(["StartTag", token[:name], token[:data]])
27
+ end
28
+
29
+ def processEmptyTag(token)
30
+ if not HTML5::VOID_ELEMENTS.include? token[:name]
31
+ @outputTokens.push("ParseError")
32
+ end
33
+ @outputTokens.push(["StartTag", token[:name], token[:data]])
34
+ end
35
+
36
+ def processEndTag(token)
37
+ if token[:data].length > 0
38
+ self.processParseError(token)
39
+ end
40
+ @outputTokens.push(["EndTag", token[:name]])
41
+ end
42
+
43
+ def processComment(token)
44
+ @outputTokens.push(["Comment", token[:data]])
45
+ end
46
+
47
+ def processCharacters(token)
48
+ @outputTokens.push(["Character", token[:data]])
49
+ end
50
+
51
+ alias processSpaceCharacters processCharacters
52
+
53
+ def processCharacters(token)
54
+ @outputTokens.push(["Character", token[:data]])
55
+ end
56
+
57
+ def process_eof(token)
58
+ end
59
+
60
+ def processParseError(token)
61
+ @outputTokens.push("ParseError")
62
+ end
63
+ end
@@ -0,0 +1,781 @@
1
+ module FeedTools
2
+ # This is an implementation of a URI parser based on RFC 3986.
3
+ class URI
4
+ # Raised if something other than a uri is supplied.
5
+ class InvalidURIError < StandardError
6
+ end
7
+ # Raised if an invalid method option is supplied.
8
+ class InvalidOptionError < StandardError
9
+ end
10
+
11
+ # Returns a URI object based on the parsed string.
12
+ def self.parse(uri_string)
13
+ return nil if uri_string.nil?
14
+
15
+ # If a URI object is passed, just return itself.
16
+ return uri_string if uri_string.kind_of?(self)
17
+
18
+ uri_regex =
19
+ /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
20
+ scan = uri_string.scan(uri_regex)
21
+ fragments = scan[0]
22
+ return nil if fragments.nil?
23
+ scheme = fragments[1]
24
+ authority = fragments[3]
25
+ path = fragments[4]
26
+ query = fragments[6]
27
+ fragment = fragments[8]
28
+ userinfo = nil
29
+ host = nil
30
+ port = nil
31
+ if authority != nil
32
+ userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
33
+ host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
34
+ port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
35
+ end
36
+ if port.nil? || port == ""
37
+ port = nil
38
+ end
39
+
40
+ # WARNING: Not standards-compliant, but follows the theme
41
+ # of Postel's law:
42
+ #
43
+ # Special exception for dealing with the retarded idea of the
44
+ # feed pseudo-protocol. Without this exception, the parser will read
45
+ # the URI as having a blank port number, instead of as having a second
46
+ # URI embedded within. This exception translates these broken URIs
47
+ # and instead treats the inner URI as opaque.
48
+ if scheme == "feed" && host == "http"
49
+ userinfo = nil
50
+ host = nil
51
+ port = nil
52
+ path = authority + path
53
+ end
54
+
55
+ return URI.new(scheme, userinfo, host, port, path, query, fragment)
56
+ end
57
+
58
+ # Converts a path to a file protocol URI. If the path supplied is
59
+ # relative, it will be returned as a relative URI. If the path supplied
60
+ # is actually a URI, it will return the parsed URI.
61
+ def self.convert_path(path)
62
+ return nil if path.nil?
63
+
64
+ converted_uri = path.strip
65
+ if converted_uri.length > 0 && converted_uri[0..0] == "/"
66
+ converted_uri = "file://" + converted_uri
67
+ end
68
+ if converted_uri.length > 0 &&
69
+ converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
70
+ converted_uri = "file:///" + converted_uri
71
+ end
72
+ converted_uri.gsub!(/^file:\/*/i, "file:///")
73
+ if converted_uri =~ /^file:/i
74
+ # Adjust windows-style uris
75
+ converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
76
+ converted_uri.gsub!(/\\/, '/')
77
+ converted_uri = self.parse(converted_uri).normalize
78
+ else
79
+ converted_uri = self.parse(converted_uri)
80
+ end
81
+
82
+ return converted_uri
83
+ end
84
+
85
+ # Joins several uris together.
86
+ def self.join(*uris)
87
+ uri_objects = uris.collect do |uri|
88
+ uri.kind_of?(self) ? uri : self.parse(uri.to_s)
89
+ end
90
+ result = uri_objects.shift.dup
91
+ for uri in uri_objects
92
+ result.merge!(uri)
93
+ end
94
+ return result
95
+ end
96
+
97
+ # Correctly escapes a uri.
98
+ def self.escape(uri)
99
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
100
+ return URI.new(
101
+ uri_object.scheme,
102
+ uri_object.userinfo,
103
+ uri_object.host,
104
+ uri_object.specified_port,
105
+ self.normalize_escaping(uri_object.path),
106
+ self.normalize_escaping(uri_object.query),
107
+ self.normalize_escaping(uri_object.fragment)
108
+ ).to_s
109
+ end
110
+
111
+ # Extracts uris from an arbitrary body of text.
112
+ def self.extract(text, options={})
113
+ defaults = {:base => nil, :parse => false}
114
+ options = defaults.merge(options)
115
+ raise InvalidOptionError unless (options.keys - defaults.keys).empty?
116
+ # This regular expression needs to be less forgiving or else it would
117
+ # match virtually all text. Which isn't exactly what we're going for.
118
+ extract_regex = /((([a-z\+]+):)[^ \n\<\>\"\\]+[\w\/])/
119
+ extracted_uris =
120
+ text.scan(extract_regex).collect { |match| match[0] }
121
+ sgml_extract_regex = /<[^>]+href=\"([^\"]+?)\"[^>]*>/
122
+ sgml_extracted_uris =
123
+ text.scan(sgml_extract_regex).collect { |match| match[0] }
124
+ extracted_uris.concat(sgml_extracted_uris - extracted_uris)
125
+ textile_extract_regex = /\".+?\":([^ ]+\/[^ ]+)[ \,\.\;\:\?\!\<\>\"]/i
126
+ textile_extracted_uris =
127
+ text.scan(textile_extract_regex).collect { |match| match[0] }
128
+ extracted_uris.concat(textile_extracted_uris - extracted_uris)
129
+ parsed_uris = []
130
+ base_uri = nil
131
+ if options[:base] != nil
132
+ base_uri = options[:base] if options[:base].kind_of?(self)
133
+ base_uri = self.parse(options[:base].to_s) if base_uri == nil
134
+ end
135
+ for uri_string in extracted_uris
136
+ begin
137
+ if base_uri == nil
138
+ parsed_uris << self.parse(uri_string)
139
+ else
140
+ parsed_uris << (base_uri + self.parse(uri_string))
141
+ end
142
+ rescue Exception
143
+ nil
144
+ end
145
+ end
146
+ parsed_uris.reject! do |uri|
147
+ (uri.scheme =~ /T\d+/ ||
148
+ uri.scheme == "xmlns" ||
149
+ uri.scheme == "xml" ||
150
+ uri.scheme == "thr" ||
151
+ uri.scheme == "this" ||
152
+ uri.scheme == "float" ||
153
+ uri.scheme == "user" ||
154
+ uri.scheme == "username" ||
155
+ uri.scheme == "out")
156
+ end
157
+ if options[:parse]
158
+ return parsed_uris
159
+ else
160
+ return parsed_uris.collect { |uri| uri.to_s }
161
+ end
162
+ end
163
+
164
+ # Creates a new uri object from component parts. Passing nil for
165
+ # any of these parameters is acceptable.
166
+ def initialize(scheme, userinfo, host, port, path, query, fragment)
167
+ assign_components(scheme, userinfo, host, port, path, query, fragment)
168
+ end
169
+
170
+ # Returns the scheme (protocol) for this URI.
171
+ def scheme
172
+ return nil if @scheme.nil? || @scheme.strip == ""
173
+ return @scheme
174
+ end
175
+
176
+ # Returns the username and password segment of this URI.
177
+ def userinfo
178
+ return @userinfo
179
+ end
180
+
181
+ # Returns the host for this URI.
182
+ def host
183
+ return @host
184
+ end
185
+
186
+ # Returns the authority segment of this URI.
187
+ def authority
188
+ if !defined?(@authority) || @authority.nil?
189
+ return nil if self.host.nil?
190
+ @authority = ""
191
+ if self.userinfo != nil
192
+ @authority << "#{self.userinfo}@"
193
+ end
194
+ @authority << self.host
195
+ if self.specified_port != nil
196
+ @authority << ":#{self.specified_port}"
197
+ end
198
+ end
199
+ return @authority
200
+ end
201
+
202
+ # Returns the user for this URI.
203
+ def user
204
+ if !defined?(@user) || @user.nil?
205
+ @user = nil
206
+ return @user if @userinfo.nil?
207
+ @user = @userinfo.strip.scan(/^(.*):/).flatten[0].strip
208
+ end
209
+ return @user
210
+ end
211
+
212
+ # Returns the password for this URI.
213
+ def password
214
+ if !defined?(@password) || @password.nil?
215
+ @password = nil
216
+ return @password if @userinfo.nil?
217
+ @password = @userinfo.strip.scan(/:(.*)$/).flatten[0].strip
218
+ end
219
+ return @password
220
+ end
221
+
222
+ # Returns an array of known ip-based schemes. These schemes typically
223
+ # use a similar URI form:
224
+ # //<user>:<password>@<host>:<port>/<url-path>
225
+ def self.ip_based_schemes
226
+ return self.scheme_mapping.keys
227
+ end
228
+
229
+ # Returns a hash of common IP-based schemes and their default port
230
+ # numbers. Adding new schemes to this hash, as necessary, will allow
231
+ # for better URI normalization.
232
+ def self.scheme_mapping
233
+ if !defined?(@protocol_mapping) || @protocol_mapping.nil?
234
+ @protocol_mapping = {
235
+ "http" => 80,
236
+ "https" => 443,
237
+ "ftp" => 21,
238
+ "tftp" => 69,
239
+ "ssh" => 22,
240
+ "svn+ssh" => 22,
241
+ "telnet" => 23,
242
+ "nntp" => 119,
243
+ "gopher" => 70,
244
+ "wais" => 210,
245
+ "prospero" => 1525
246
+ }
247
+ end
248
+ return @protocol_mapping
249
+ end
250
+
251
+ # Returns the port number for this URI. This method will normalize to the
252
+ # default port for the URI's scheme if the port isn't explicitly specified
253
+ # in the URI.
254
+ def port
255
+ if @port.to_i == 0
256
+ if self.scheme.nil?
257
+ @port = nil
258
+ else
259
+ @port = self.class.scheme_mapping[self.scheme.strip.downcase]
260
+ end
261
+ return @port
262
+ else
263
+ @port = @port.to_i
264
+ return @port
265
+ end
266
+ end
267
+
268
+ # Returns the port number that was actually specified in the URI string.
269
+ def specified_port
270
+ @specified_port = nil if !defined?(@specified_port)
271
+ return nil if @specified_port.nil?
272
+ port = @specified_port.to_s.to_i
273
+ if port == 0
274
+ return nil
275
+ else
276
+ return port
277
+ end
278
+ end
279
+
280
+ # Returns the path for this URI.
281
+ def path
282
+ return @path
283
+ end
284
+
285
+ # Returns the query string for this URI.
286
+ def query
287
+ return @query
288
+ end
289
+
290
+ # Returns the fragment for this URI.
291
+ def fragment
292
+ return @fragment
293
+ end
294
+
295
+ # Returns true if the URI uses an IP-based protocol.
296
+ def ip_based?
297
+ return false if self.scheme.nil?
298
+ return self.class.ip_based_schemes.include?(self.scheme.strip.downcase)
299
+ end
300
+
301
+ # Returns true if this URI is known to be relative.
302
+ def relative?
303
+ return self.scheme.nil?
304
+ end
305
+
306
+ # Returns true if this URI is known to be absolute.
307
+ def absolute?
308
+ return !relative?
309
+ end
310
+
311
+ # Joins two URIs together.
312
+ def +(uri)
313
+ if !uri.kind_of?(self.class)
314
+ uri = URI.parse(uri.to_s)
315
+ end
316
+ if uri.to_s == ""
317
+ return self.dup
318
+ end
319
+
320
+ joined_scheme = nil
321
+ joined_userinfo = nil
322
+ joined_host = nil
323
+ joined_port = nil
324
+ joined_path = nil
325
+ joined_query = nil
326
+ joined_fragment = nil
327
+
328
+ # Section 5.2.2 of RFC 3986
329
+ if uri.scheme != nil
330
+ joined_scheme = uri.scheme
331
+ joined_userinfo = uri.userinfo
332
+ joined_host = uri.host
333
+ joined_port = uri.specified_port
334
+ joined_path = self.class.normalize_path(uri.path)
335
+ joined_query = uri.query
336
+ else
337
+ if uri.authority != nil
338
+ joined_userinfo = uri.userinfo
339
+ joined_host = uri.host
340
+ joined_port = uri.specified_port
341
+ joined_path = self.class.normalize_path(uri.path)
342
+ joined_query = uri.query
343
+ else
344
+ if uri.path == nil || uri.path == ""
345
+ joined_path = self.path
346
+ if uri.query != nil
347
+ joined_query = uri.query
348
+ else
349
+ joined_query = self.query
350
+ end
351
+ else
352
+ if uri.path[0..0] == "/"
353
+ joined_path = self.class.normalize_path(uri.path)
354
+ else
355
+ base_path = self.path.nil? ? "" : self.path.dup
356
+ base_path = self.class.normalize_path(base_path)
357
+ base_path.gsub!(/\/[^\/]+$/, "/")
358
+ joined_path = self.class.normalize_path(base_path + uri.path)
359
+ end
360
+ joined_query = uri.query
361
+ end
362
+ joined_userinfo = self.userinfo
363
+ joined_host = self.host
364
+ joined_port = self.specified_port
365
+ end
366
+ joined_scheme = self.scheme
367
+ end
368
+ joined_fragment = uri.fragment
369
+
370
+ return URI.new(
371
+ joined_scheme,
372
+ joined_userinfo,
373
+ joined_host,
374
+ joined_port,
375
+ joined_path,
376
+ joined_query,
377
+ joined_fragment
378
+ )
379
+ end
380
+
381
+ # Merges two URIs together.
382
+ def merge(uri)
383
+ return self + uri
384
+ end
385
+
386
+ # Destructive form of merge.
387
+ def merge!(uri)
388
+ replace_self(self.merge(uri))
389
+ end
390
+
391
+ # Returns a normalized URI object.
392
+ #
393
+ # NOTE: This method does not attempt to conform to specifications. It
394
+ # exists largely to correct other people's failures to read the
395
+ # specifications, and also to deal with caching issues since several
396
+ # different URIs may represent the same resource and should not be
397
+ # cached multiple times.
398
+ def normalize
399
+ normalized_scheme = nil
400
+ normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
401
+ normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
402
+ if normalized_scheme == "feed"
403
+ if self.to_s =~ /^feed:\/*http:\/*/
404
+ return self.class.parse(
405
+ self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
406
+ end
407
+ end
408
+ normalized_userinfo = nil
409
+ normalized_userinfo = self.userinfo.strip if self.userinfo != nil
410
+ normalized_host = nil
411
+ normalized_host = self.host.strip.downcase if self.host != nil
412
+ if normalized_host != nil
413
+ begin
414
+ normalized_host = URI::IDNA.to_ascii(normalized_host)
415
+ rescue Exception
416
+ end
417
+ end
418
+
419
+ # Normalize IPv4 addresses that were generated with the stupid
420
+ # assumption that inet_addr() would be used to parse the IP address.
421
+ if normalized_host != nil && normalized_host.strip =~ /^\d+$/
422
+ # Decimal IPv4 address.
423
+ decimal = normalized_host.to_i
424
+ if decimal < (256 ** 4)
425
+ octets = [0,0,0,0]
426
+ octets[0] = decimal >> 24
427
+ decimal -= (octets[0] * (256 ** 3))
428
+ octets[1] = decimal >> 16
429
+ decimal -= (octets[1] * (256 ** 2))
430
+ octets[2] = decimal >> 8
431
+ decimal -= (octets[2] * (256 ** 1))
432
+ octets[3] = decimal
433
+ normalized_host = octets.join(".")
434
+ end
435
+ elsif (normalized_host != nil && normalized_host.strip =~
436
+ /^0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}$/)
437
+ # Octal IPv4 address.
438
+ octet_strings = normalized_host.split('.')
439
+ octets = []
440
+ octet_strings.each do |octet_string|
441
+ decimal = octet_string.to_i(8)
442
+ octets << decimal
443
+ end
444
+ normalized_host = octets.join(".")
445
+ elsif (normalized_host != nil && normalized_host.strip =~
446
+ /^0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}$/i)
447
+ # Hexidecimal IPv4 address.
448
+ octet_strings = normalized_host.split('.')
449
+ octets = []
450
+ octet_strings.each do |octet_string|
451
+ decimal = octet_string[2...4].to_i(16)
452
+ octets << decimal
453
+ end
454
+ normalized_host = octets.join(".")
455
+ end
456
+ normalized_port = self.port
457
+ if self.class.scheme_mapping[normalized_scheme] == normalized_port
458
+ normalized_port = nil
459
+ end
460
+ normalized_path = nil
461
+ normalized_path = self.path.strip if self.path != nil
462
+ if normalized_scheme != nil && normalized_host == nil
463
+ if self.class.ip_based_schemes.include?(normalized_scheme) &&
464
+ normalized_path =~ /[\w\.]+/
465
+ normalized_host = normalized_path
466
+ normalized_path = nil
467
+ unless normalized_host =~ /\./
468
+ normalized_host = normalized_host + ".com"
469
+ end
470
+ end
471
+ end
472
+ if normalized_path == nil &&
473
+ normalized_scheme != nil &&
474
+ normalized_host != nil
475
+ normalized_path = "/"
476
+ end
477
+ if normalized_path != nil
478
+ normalized_path = self.class.normalize_path(normalized_path)
479
+ normalized_path = self.class.normalize_escaping(normalized_path)
480
+ end
481
+ if normalized_path == ""
482
+ if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
483
+ normalized_path = "/"
484
+ end
485
+ end
486
+ normalized_path.gsub!(/%3B/, ";") if normalized_path != nil
487
+ normalized_path.gsub!(/%3A/, ":") if normalized_path != nil
488
+ normalized_path.gsub!(/%40/, "@") if normalized_path != nil
489
+ normalized_path.gsub!(/%2B/, "+") if normalized_path != nil
490
+
491
+ normalized_query = nil
492
+ normalized_query = self.query.strip if self.query != nil
493
+ normalized_query = self.class.normalize_escaping(normalized_query)
494
+ normalized_query.gsub!(/%3D/, "=") if normalized_query != nil
495
+ normalized_query.gsub!(/%26/, "&") if normalized_query != nil
496
+ normalized_query.gsub!(/%2B/, "+") if normalized_query != nil
497
+
498
+ normalized_fragment = nil
499
+ normalized_fragment = self.fragment.strip if self.fragment != nil
500
+ normalized_fragment = self.class.normalize_escaping(normalized_fragment)
501
+ return URI.new(
502
+ normalized_scheme,
503
+ normalized_userinfo,
504
+ normalized_host,
505
+ normalized_port,
506
+ normalized_path,
507
+ normalized_query,
508
+ normalized_fragment
509
+ )
510
+ end
511
+
512
+ # Destructively normalizes this URI object.
513
+ def normalize!
514
+ replace_self(self.normalize)
515
+ end
516
+
517
+ # Creates a URI suitable for display to users. If semantic attacks are
518
+ # likely, the application should try to detect these and warn the user.
519
+ # See RFC 3986 section 7.6 for more information.
520
+ def display_uri
521
+ display_uri = self.normalize
522
+ begin
523
+ display_uri.instance_variable_set("@host",
524
+ URI::IDNA.to_unicode(display_uri.host))
525
+ rescue Exception
526
+ end
527
+ return display_uri
528
+ end
529
+
530
+ # Returns true if the URI objects are equal. This method normalizes
531
+ # both URIs before doing the comparison, and allows comparison against
532
+ # strings.
533
+ def ===(uri)
534
+ uri_string = nil
535
+ if uri.respond_to?(:normalize)
536
+ uri_string = uri.normalize.to_s
537
+ else
538
+ begin
539
+ uri_string = URI.parse(uri.to_s).normalize.to_s
540
+ rescue Exception
541
+ return false
542
+ end
543
+ end
544
+ return self.normalize.to_s == uri_string
545
+ end
546
+
547
+ # Returns true if the URI objects are equal. This method normalizes
548
+ # both URIs before doing the comparison.
549
+ def ==(uri)
550
+ return false unless uri.kind_of?(self.class)
551
+ return self.normalize.to_s == uri.normalize.to_s
552
+ end
553
+
554
+ # Returns true if the URI objects are equal. This method does NOT
555
+ # normalize either URI before doing the comparison.
556
+ def eql?(uri)
557
+ return false unless uri.kind_of?(self.class)
558
+ return self.to_s == uri.to_s
559
+ end
560
+
561
+ # Clones the URI object.
562
+ def dup
563
+ duplicated_scheme = nil
564
+ duplicated_scheme = self.scheme.dup if self.scheme != nil
565
+ duplicated_userinfo = nil
566
+ duplicated_userinfo = self.userinfo.dup if self.userinfo != nil
567
+ duplicated_host = nil
568
+ duplicated_host = self.host.dup if self.host != nil
569
+ duplicated_port = self.port
570
+ duplicated_path = nil
571
+ duplicated_path = self.path.dup if self.path != nil
572
+ duplicated_query = nil
573
+ duplicated_query = self.query.dup if self.query != nil
574
+ duplicated_fragment = nil
575
+ duplicated_fragment = self.fragment.dup if self.fragment != nil
576
+ duplicated_uri = URI.new(
577
+ duplicated_scheme,
578
+ duplicated_userinfo,
579
+ duplicated_host,
580
+ duplicated_port,
581
+ duplicated_path,
582
+ duplicated_query,
583
+ duplicated_fragment
584
+ )
585
+ @specified_port = nil if !defined?(@specified_port)
586
+ duplicated_uri.instance_variable_set("@specified_port", @specified_port)
587
+ return duplicated_uri
588
+ end
589
+
590
+ # Returns the assembled URI as a string.
591
+ def to_s
592
+ uri_string = ""
593
+ if self.scheme != nil
594
+ uri_string << "#{self.scheme}:"
595
+ end
596
+ if self.authority != nil
597
+ uri_string << "//#{self.authority}"
598
+ end
599
+ if self.path != nil
600
+ uri_string << self.path
601
+ end
602
+ if self.query != nil
603
+ uri_string << "?#{self.query}"
604
+ end
605
+ if self.fragment != nil
606
+ uri_string << "##{self.fragment}"
607
+ end
608
+ return uri_string
609
+ end
610
+
611
+ # Returns a string representation of the URI object's state.
612
+ def inspect
613
+ sprintf("#<%s:%#0x URL:%s>", self.class.to_s, self.object_id, self.to_s)
614
+ end
615
+
616
+ # This module handles internationalized domain names. When Ruby has an
617
+ # implementation of nameprep, stringprep, punycode, etc, this
618
+ # module should contain an actual implementation of IDNA instead of
619
+ # returning nil if libidn can't be used.
620
+ module IDNA
621
+ # Returns the ascii representation of the label.
622
+ def self.to_ascii(label)
623
+ return nil if label.nil?
624
+ if self.use_libidn?
625
+ return IDN::Idna.toASCII(label)
626
+ else
627
+ raise NotImplementedError,
628
+ "There is no available pure-ruby implementation. " +
629
+ "Install libidn bindings."
630
+ end
631
+ end
632
+
633
+ # Returns the unicode representation of the label.
634
+ def self.to_unicode(label)
635
+ return nil if label.nil?
636
+ if self.use_libidn?
637
+ return IDN::Idna.toUnicode(label)
638
+ else
639
+ raise NotImplementedError,
640
+ "There is no available pure-ruby implementation. " +
641
+ "Install libidn bindings."
642
+ end
643
+ end
644
+
645
+ private
646
+ # Determines if the libidn bindings are available and able to be used.
647
+ def self.use_libidn?
648
+ if !defined?(@use_libidn) || @use_libidn.nil?
649
+ begin
650
+ require 'rubygems'
651
+ rescue LoadError
652
+ end
653
+ begin
654
+ require 'idn'
655
+ rescue LoadError
656
+ end
657
+ @use_libidn = !!(defined?(IDN::Idna))
658
+ end
659
+ return @use_libidn
660
+ end
661
+ end
662
+
663
+ private
664
+ # Resolves paths to their simplest form.
665
+ def self.normalize_path(path)
666
+ return nil if path.nil?
667
+ normalized_path = path.dup
668
+ previous_state = normalized_path.dup
669
+ begin
670
+ previous_state = normalized_path.dup
671
+ normalized_path.gsub!(/\/\.\//, "/")
672
+ normalized_path.gsub!(/\/\.$/, "/")
673
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
674
+ if parent != "." && parent != ".."
675
+ normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
676
+ end
677
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
678
+ if parent != "." && parent != ".."
679
+ normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
680
+ end
681
+ normalized_path.gsub!(/^\.\.?\/?/, "")
682
+ normalized_path.gsub!(/^\/\.\.?\//, "/")
683
+ end until previous_state == normalized_path
684
+ return normalized_path
685
+ end
686
+
687
+ # Normalizes percent escaping of characters
688
+ def self.normalize_escaping(escaped_section)
689
+ return nil if escaped_section.nil?
690
+ normalized_section = escaped_section.dup
691
+ normalized_section.gsub!(/%[0-9a-f]{2}/i) do |sequence|
692
+ sequence[1..3].to_i(16).chr
693
+ end
694
+ if URI::IDNA.send(:use_libidn?)
695
+ normalized_section =
696
+ IDN::Stringprep.nfkc_normalize(normalized_section)
697
+ end
698
+ new_section = ""
699
+ for index in 0...normalized_section.size
700
+ if self.unreserved?(normalized_section[index]) ||
701
+ normalized_section[index] == '/'[0]
702
+ new_section << normalized_section[index..index]
703
+ else
704
+ new_section << ("%" + normalized_section[index].to_s(16).upcase)
705
+ end
706
+ end
707
+ normalized_section = new_section
708
+ return normalized_section
709
+ end
710
+
711
+ # Returns true if the specified character is unreserved.
712
+ def self.unreserved?(character)
713
+ character_string = nil
714
+ character_string = character.chr if character.respond_to?(:chr)
715
+ character_string = character[0..0] if character.kind_of?(String)
716
+ return self.unreserved.include?(character_string)
717
+ end
718
+
719
+ # Returns a list of unreserved characters.
720
+ def self.unreserved
721
+ if !defined?(@unreserved) || @unreserved.nil?
722
+ @unreserved = ["-", ".", "_", "~"]
723
+ for c in "a".."z"
724
+ @unreserved << c
725
+ @unreserved << c.upcase
726
+ end
727
+ for c in "0".."9"
728
+ @unreserved << c
729
+ end
730
+ @unreserved.sort!
731
+ end
732
+ return @unreserved
733
+ end
734
+
735
+ # Assigns the specified components to the appropriate instance variables.
736
+ # Used in destructive operations to avoid code repetition.
737
+ def assign_components(scheme, userinfo, host, port, path, query, fragment)
738
+ if scheme == nil && userinfo == nil && host == nil && port == nil &&
739
+ path == nil && query == nil && fragment == nil
740
+ raise InvalidURIError, "All parameters were nil."
741
+ end
742
+ @scheme = scheme
743
+ @userinfo = userinfo
744
+ @host = host
745
+ @specified_port = port.to_s
746
+ @port = port
747
+ @port = @port.to_s if @port.kind_of?(Fixnum)
748
+ if @port != nil && !(@port =~ /^\d+$/)
749
+ raise InvalidURIError,
750
+ "Invalid port number: #{@port.inspect}"
751
+ end
752
+ @port = @port.to_i
753
+ @port = nil if @port == 0
754
+ @path = path
755
+ @query = query
756
+ @fragment = fragment
757
+ if @scheme != nil && @host == "" && @path == ""
758
+ raise InvalidURIError,
759
+ "Absolute URI missing hierarchical segment."
760
+ end
761
+ end
762
+
763
+ # Replaces the internal state of self with the specified URI's state.
764
+ # Used in destructive operations to avoid code repetition.
765
+ def replace_self(uri)
766
+ @authority = nil
767
+ @user = nil
768
+ @password = nil
769
+
770
+ @scheme = uri.scheme
771
+ @userinfo = uri.userinfo
772
+ @host = uri.host
773
+ @specified_port = uri.instance_variable_get("@specified_port")
774
+ @port = @specified_port.to_s.to_i
775
+ @path = uri.path
776
+ @query = uri.query
777
+ @fragment = uri.fragment
778
+ return self
779
+ end
780
+ end
781
+ end