feedtools 0.2.26 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. data/CHANGELOG +232 -216
  2. data/db/migration.rb +2 -0
  3. data/db/schema.mysql.sql +2 -0
  4. data/db/schema.postgresql.sql +3 -1
  5. data/db/schema.sqlite.sql +3 -1
  6. data/lib/feed_tools.rb +37 -14
  7. data/lib/feed_tools/database_feed_cache.rb +13 -2
  8. data/lib/feed_tools/feed.rb +430 -104
  9. data/lib/feed_tools/feed_item.rb +533 -268
  10. data/lib/feed_tools/helpers/generic_helper.rb +1 -1
  11. data/lib/feed_tools/helpers/html_helper.rb +78 -116
  12. data/lib/feed_tools/helpers/retrieval_helper.rb +33 -3
  13. data/lib/feed_tools/helpers/uri_helper.rb +46 -54
  14. data/lib/feed_tools/monkey_patch.rb +27 -1
  15. data/lib/feed_tools/vendor/html5/History.txt +10 -0
  16. data/lib/feed_tools/vendor/html5/Manifest.txt +117 -0
  17. data/lib/feed_tools/vendor/html5/README +45 -0
  18. data/lib/feed_tools/vendor/html5/Rakefile.rb +33 -0
  19. data/lib/feed_tools/vendor/html5/bin/html5 +217 -0
  20. data/lib/feed_tools/vendor/html5/lib/core_ext/string.rb +17 -0
  21. data/lib/feed_tools/vendor/html5/lib/html5.rb +13 -0
  22. data/lib/feed_tools/vendor/html5/lib/html5/constants.rb +1046 -0
  23. data/lib/feed_tools/vendor/html5/lib/html5/filters/base.rb +10 -0
  24. data/lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb +82 -0
  25. data/lib/feed_tools/vendor/html5/lib/html5/filters/iso639codes.rb +752 -0
  26. data/lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb +198 -0
  27. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc2046.rb +30 -0
  28. data/lib/feed_tools/vendor/html5/lib/html5/filters/rfc3987.rb +89 -0
  29. data/lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb +15 -0
  30. data/lib/feed_tools/vendor/html5/lib/html5/filters/validator.rb +830 -0
  31. data/lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb +36 -0
  32. data/lib/feed_tools/vendor/html5/lib/html5/html5parser.rb +248 -0
  33. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb +46 -0
  34. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb +33 -0
  35. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb +50 -0
  36. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb +41 -0
  37. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb +613 -0
  38. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb +69 -0
  39. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb +78 -0
  40. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  41. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  42. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb +138 -0
  43. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb +89 -0
  44. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb +85 -0
  45. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb +86 -0
  46. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb +115 -0
  47. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb +133 -0
  48. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb +154 -0
  49. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb +41 -0
  50. data/lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  51. data/lib/feed_tools/vendor/html5/lib/html5/inputstream.rb +648 -0
  52. data/lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb +158 -0
  53. data/lib/feed_tools/vendor/html5/lib/html5/sanitizer.rb +188 -0
  54. data/lib/feed_tools/vendor/html5/lib/html5/serializer.rb +2 -0
  55. data/lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb +179 -0
  56. data/lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb +20 -0
  57. data/lib/feed_tools/vendor/html5/lib/html5/sniffer.rb +45 -0
  58. data/lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb +966 -0
  59. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb +24 -0
  60. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb +334 -0
  61. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb +231 -0
  62. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb +209 -0
  63. data/lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb +185 -0
  64. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb +26 -0
  65. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb +162 -0
  66. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb +48 -0
  67. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb +48 -0
  68. data/lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb +48 -0
  69. data/lib/feed_tools/vendor/html5/lib/html5/version.rb +3 -0
  70. data/lib/feed_tools/vendor/html5/testdata/encoding/chardet/test_big5.txt +51 -0
  71. data/lib/feed_tools/vendor/html5/testdata/encoding/test-yahoo-jp.dat +10 -0
  72. data/lib/feed_tools/vendor/html5/testdata/encoding/tests1.dat +394 -0
  73. data/lib/feed_tools/vendor/html5/testdata/encoding/tests2.dat +81 -0
  74. data/lib/feed_tools/vendor/html5/testdata/sanitizer/tests1.dat +416 -0
  75. data/lib/feed_tools/vendor/html5/testdata/serializer/core.test +104 -0
  76. data/lib/feed_tools/vendor/html5/testdata/serializer/injectmeta.test +65 -0
  77. data/lib/feed_tools/vendor/html5/testdata/serializer/optionaltags.test +900 -0
  78. data/lib/feed_tools/vendor/html5/testdata/serializer/options.test +60 -0
  79. data/lib/feed_tools/vendor/html5/testdata/serializer/whitespace.test +51 -0
  80. data/lib/feed_tools/vendor/html5/testdata/sites/google-results.htm +1 -0
  81. data/lib/feed_tools/vendor/html5/testdata/sites/python-ref-import.htm +1 -0
  82. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps-old.htm +1 -0
  83. data/lib/feed_tools/vendor/html5/testdata/sites/web-apps.htm +34275 -0
  84. data/lib/feed_tools/vendor/html5/testdata/sniffer/htmlOrFeed.json +43 -0
  85. data/lib/feed_tools/vendor/html5/testdata/tokenizer/contentModelFlags.test +48 -0
  86. data/lib/feed_tools/vendor/html5/testdata/tokenizer/entities.test +2339 -0
  87. data/lib/feed_tools/vendor/html5/testdata/tokenizer/escapeFlag.test +21 -0
  88. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test1.test +172 -0
  89. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test2.test +129 -0
  90. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test3.test +367 -0
  91. data/lib/feed_tools/vendor/html5/testdata/tokenizer/test4.test +198 -0
  92. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests1.dat +1950 -0
  93. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests2.dat +773 -0
  94. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests3.dat +270 -0
  95. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests4.dat +60 -0
  96. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests5.dat +175 -0
  97. data/lib/feed_tools/vendor/html5/testdata/tree-construction/tests6.dat +196 -0
  98. data/lib/feed_tools/vendor/html5/testdata/validator/attributes.test +1035 -0
  99. data/lib/feed_tools/vendor/html5/testdata/validator/base-href-attribute.test +787 -0
  100. data/lib/feed_tools/vendor/html5/testdata/validator/base-target-attribute.test +35 -0
  101. data/lib/feed_tools/vendor/html5/testdata/validator/blockquote-cite-attribute.test +7 -0
  102. data/lib/feed_tools/vendor/html5/testdata/validator/classattribute.test +152 -0
  103. data/lib/feed_tools/vendor/html5/testdata/validator/contenteditableattribute.test +59 -0
  104. data/lib/feed_tools/vendor/html5/testdata/validator/contextmenuattribute.test +115 -0
  105. data/lib/feed_tools/vendor/html5/testdata/validator/dirattribute.test +59 -0
  106. data/lib/feed_tools/vendor/html5/testdata/validator/draggableattribute.test +63 -0
  107. data/lib/feed_tools/vendor/html5/testdata/validator/html-xmlns-attribute.test +23 -0
  108. data/lib/feed_tools/vendor/html5/testdata/validator/idattribute.test +115 -0
  109. data/lib/feed_tools/vendor/html5/testdata/validator/inputattributes.test +2795 -0
  110. data/lib/feed_tools/vendor/html5/testdata/validator/irrelevantattribute.test +63 -0
  111. data/lib/feed_tools/vendor/html5/testdata/validator/langattribute.test +5579 -0
  112. data/lib/feed_tools/vendor/html5/testdata/validator/li-value-attribute.test +7 -0
  113. data/lib/feed_tools/vendor/html5/testdata/validator/link-href-attribute.test +7 -0
  114. data/lib/feed_tools/vendor/html5/testdata/validator/link-hreflang-attribute.test +7 -0
  115. data/lib/feed_tools/vendor/html5/testdata/validator/link-rel-attribute.test +271 -0
  116. data/lib/feed_tools/vendor/html5/testdata/validator/ol-start-attribute.test +7 -0
  117. data/lib/feed_tools/vendor/html5/testdata/validator/starttags.test +375 -0
  118. data/lib/feed_tools/vendor/html5/testdata/validator/style-scoped-attribute.test +7 -0
  119. data/lib/feed_tools/vendor/html5/testdata/validator/tabindexattribute.test +79 -0
  120. data/lib/feed_tools/vendor/html5/tests/preamble.rb +72 -0
  121. data/lib/feed_tools/vendor/html5/tests/test_encoding.rb +35 -0
  122. data/lib/feed_tools/vendor/html5/tests/test_lxp.rb +279 -0
  123. data/lib/feed_tools/vendor/html5/tests/test_parser.rb +68 -0
  124. data/lib/feed_tools/vendor/html5/tests/test_sanitizer.rb +142 -0
  125. data/lib/feed_tools/vendor/html5/tests/test_serializer.rb +68 -0
  126. data/lib/feed_tools/vendor/html5/tests/test_sniffer.rb +27 -0
  127. data/lib/feed_tools/vendor/html5/tests/test_stream.rb +62 -0
  128. data/lib/feed_tools/vendor/html5/tests/test_tokenizer.rb +94 -0
  129. data/lib/feed_tools/vendor/html5/tests/test_treewalkers.rb +135 -0
  130. data/lib/feed_tools/vendor/html5/tests/test_validator.rb +31 -0
  131. data/lib/feed_tools/vendor/html5/tests/tokenizer_test_parser.rb +63 -0
  132. data/lib/feed_tools/vendor/uri.rb +781 -0
  133. data/lib/feed_tools/version.rb +1 -1
  134. data/rakefile +27 -6
  135. data/test/unit/atom_test.rb +298 -210
  136. data/test/unit/helper_test.rb +7 -12
  137. data/test/unit/rdf_test.rb +51 -1
  138. data/test/unit/rss_test.rb +13 -3
  139. metadata +239 -116
  140. data/lib/feed_tools/vendor/htree.rb +0 -97
  141. data/lib/feed_tools/vendor/htree/container.rb +0 -10
  142. data/lib/feed_tools/vendor/htree/context.rb +0 -67
  143. data/lib/feed_tools/vendor/htree/display.rb +0 -27
  144. data/lib/feed_tools/vendor/htree/doc.rb +0 -149
  145. data/lib/feed_tools/vendor/htree/elem.rb +0 -262
  146. data/lib/feed_tools/vendor/htree/encoder.rb +0 -163
  147. data/lib/feed_tools/vendor/htree/equality.rb +0 -218
  148. data/lib/feed_tools/vendor/htree/extract_text.rb +0 -37
  149. data/lib/feed_tools/vendor/htree/fstr.rb +0 -33
  150. data/lib/feed_tools/vendor/htree/gencode.rb +0 -97
  151. data/lib/feed_tools/vendor/htree/htmlinfo.rb +0 -672
  152. data/lib/feed_tools/vendor/htree/inspect.rb +0 -108
  153. data/lib/feed_tools/vendor/htree/leaf.rb +0 -94
  154. data/lib/feed_tools/vendor/htree/loc.rb +0 -367
  155. data/lib/feed_tools/vendor/htree/modules.rb +0 -48
  156. data/lib/feed_tools/vendor/htree/name.rb +0 -124
  157. data/lib/feed_tools/vendor/htree/output.rb +0 -207
  158. data/lib/feed_tools/vendor/htree/parse.rb +0 -409
  159. data/lib/feed_tools/vendor/htree/raw_string.rb +0 -124
  160. data/lib/feed_tools/vendor/htree/regexp-util.rb +0 -15
  161. data/lib/feed_tools/vendor/htree/rexml.rb +0 -130
  162. data/lib/feed_tools/vendor/htree/scan.rb +0 -166
  163. data/lib/feed_tools/vendor/htree/tag.rb +0 -111
  164. data/lib/feed_tools/vendor/htree/template.rb +0 -909
  165. data/lib/feed_tools/vendor/htree/text.rb +0 -115
  166. data/lib/feed_tools/vendor/htree/traverse.rb +0 -465
@@ -0,0 +1,135 @@
1
+ require File.join(File.dirname(__FILE__), 'preamble')
2
+
3
+ require 'html5/html5parser'
4
+ require 'html5/treewalkers'
5
+ require 'html5/treebuilders'
6
+
7
+ $tree_types_to_test = {
8
+ 'simpletree' =>
9
+ {:builder => HTML5::TreeBuilders['simpletree'],
10
+ :walker => HTML5::TreeWalkers['simpletree']},
11
+ 'rexml' =>
12
+ {:builder => HTML5::TreeBuilders['rexml'],
13
+ :walker => HTML5::TreeWalkers['rexml']},
14
+ 'hpricot' =>
15
+ {:builder => HTML5::TreeBuilders['hpricot'],
16
+ :walker => HTML5::TreeWalkers['hpricot']},
17
+ }
18
+
19
+ puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
20
+
21
+ class TestTreeWalkers < Test::Unit::TestCase
22
+ include HTML5::TestSupport
23
+
24
+ def concatenateCharacterTokens(tokens)
25
+ charactersToken = nil
26
+ for token in tokens
27
+ type = token[:type]
28
+ if [:Characters, :SpaceCharacters].include?(type)
29
+ if charactersToken == nil
30
+ charactersToken = {:type => :Characters, :data => token[:data]}
31
+ else
32
+ charactersToken[:data] += token[:data]
33
+ end
34
+ else
35
+ if charactersToken != nil
36
+ yield charactersToken
37
+ charactersToken = nil
38
+ end
39
+ yield token
40
+ end
41
+ end
42
+ yield charactersToken if charactersToken != nil
43
+ end
44
+
45
+ def convertTokens(tokens)
46
+ output = []
47
+ indent = 0
48
+ concatenateCharacterTokens(tokens) do |token|
49
+ case token[:type]
50
+ when :StartTag, :EmptyTag
51
+ output << "#{' '*indent}<#{token[:name]}>"
52
+ indent += 2
53
+ for name, value in token[:data].to_a.sort
54
+ next if name=='xmlns'
55
+ output << "#{' '*indent}#{name}=\"#{value}\""
56
+ end
57
+ indent -= 2 if token[:type] == :EmptyTag
58
+ when :EndTag
59
+ indent -= 2
60
+ when :Comment
61
+ output << "#{' '*indent}<!-- #{token[:data]} -->"
62
+ when :Doctype
63
+ if token[:name] and token[:name].any?
64
+ output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
65
+ else
66
+ output << "#{' '*indent}<!DOCTYPE >"
67
+ end
68
+ when :Characters, :SpaceCharacters
69
+ output << "#{' '*indent}\"#{token[:data]}\""
70
+ end
71
+ end
72
+ output.join("\n")
73
+ end
74
+
75
+ html5_test_files('tree-construction').each do |test_file|
76
+
77
+ test_name = File.basename(test_file).sub('.dat', '')
78
+ next if test_name == 'tests5' # TODO
79
+
80
+ TestData.new(test_file, %w(data errors document-fragment document)).
81
+ each_with_index do |(input, errors, inner_html, expected), index|
82
+
83
+ expected = expected.gsub("\n| ","\n")[2..-1]
84
+
85
+ $tree_types_to_test.each do |tree_name, tree_class|
86
+
87
+ define_method "test_#{test_name}_#{index}_#{tree_name}" do
88
+
89
+ parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
90
+
91
+ if inner_html
92
+ parser.parse_fragment(input, inner_html)
93
+ else
94
+ parser.parse(input)
95
+ end
96
+
97
+ document = parser.tree.get_document
98
+
99
+ begin
100
+ output = sortattrs(convertTokens(tree_class[:walker].new(document)))
101
+ expected = sortattrs(expected)
102
+ assert_equal expected, output, [
103
+ '', 'Input:', input,
104
+ '', 'Expected:', expected,
105
+ '', 'Recieved:', output
106
+ ].join("\n")
107
+ rescue NotImplementedError
108
+ # Amnesty for those that confess...
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+
115
+ def test_all_tokens
116
+ expected = [
117
+ {:data => [], :type => :StartTag, :name => 'html'},
118
+ {:data => [], :type => :StartTag, :name => 'head'},
119
+ {:data => [], :type => :EndTag, :name => 'head'},
120
+ {:data => [], :type => :StartTag, :name => 'body'},
121
+ {:data => [], :type => :EndTag, :name => 'body'},
122
+ {:data => [], :type => :EndTag, :name => 'html'}]
123
+ for treeName, tree_class in $tree_types_to_test
124
+ p = HTML5::HTMLParser.new(:tree => tree_class[:builder])
125
+ document = p.parse("<html></html>")
126
+ # document = tree_class.get(:adapter)(document)
127
+ output = tree_class[:walker].new(document)
128
+ expected.zip(output) do |expected_token, output_token|
129
+ assert_equal(expected_token, output_token)
130
+ end
131
+ end
132
+ end
133
+
134
+
135
+ end
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby -wKU
2
+
3
+ require File.join(File.dirname(__FILE__), 'preamble')
4
+
5
+ require 'html5'
6
+ require 'html5/filters/validator'
7
+
8
+ class TestValidator < Test::Unit::TestCase
9
+ def run_validator_test(test)
10
+ p = HTML5::HTMLParser.new(:tokenizer => HTMLConformanceChecker)
11
+ p.parse(test['input'])
12
+ errorCodes = p.errors.collect{|e| e[1]}
13
+ if test.has_key?('fail-if')
14
+ assert !errorCodes.include?(test['fail-if'])
15
+ end
16
+ if test.has_key?('fail-unless')
17
+ assert errorCodes.include?(test['fail-unless'])
18
+ end
19
+ end
20
+
21
+ for filename in html5_test_files('validator')
22
+ tests = JSON.load(open(filename))
23
+ testName = File.basename(filename).sub(".test", "")
24
+ tests['tests'].each_with_index do |test, index|
25
+ define_method "test_#{testName}_#{index}" do
26
+ run_validator_test(test)
27
+ end
28
+ end
29
+ end
30
+ end
31
+
@@ -0,0 +1,63 @@
1
+ require 'html5/constants'
2
+
3
+ class TokenizerTestParser
4
+ def initialize(tokenizer)
5
+ @tokenizer = tokenizer
6
+ end
7
+
8
+ def parse
9
+ @outputTokens = []
10
+
11
+ debug = nil
12
+ for token in @tokenizer
13
+ debug = token.inspect if token[:type] == :ParseError
14
+ send(('process' + token[:type].to_s), token)
15
+ end
16
+
17
+ return @outputTokens
18
+ end
19
+
20
+ def processDoctype(token)
21
+ @outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
22
+ token[:systemId], token[:correct]])
23
+ end
24
+
25
+ def processStartTag(token)
26
+ @outputTokens.push(["StartTag", token[:name], token[:data]])
27
+ end
28
+
29
+ def processEmptyTag(token)
30
+ if not HTML5::VOID_ELEMENTS.include? token[:name]
31
+ @outputTokens.push("ParseError")
32
+ end
33
+ @outputTokens.push(["StartTag", token[:name], token[:data]])
34
+ end
35
+
36
+ def processEndTag(token)
37
+ if token[:data].length > 0
38
+ self.processParseError(token)
39
+ end
40
+ @outputTokens.push(["EndTag", token[:name]])
41
+ end
42
+
43
+ def processComment(token)
44
+ @outputTokens.push(["Comment", token[:data]])
45
+ end
46
+
47
+ def processCharacters(token)
48
+ @outputTokens.push(["Character", token[:data]])
49
+ end
50
+
51
+ alias processSpaceCharacters processCharacters
52
+
53
+ def processCharacters(token)
54
+ @outputTokens.push(["Character", token[:data]])
55
+ end
56
+
57
+ def process_eof(token)
58
+ end
59
+
60
+ def processParseError(token)
61
+ @outputTokens.push("ParseError")
62
+ end
63
+ end
@@ -0,0 +1,781 @@
1
+ module FeedTools
2
+ # This is an implementation of a URI parser based on RFC 3986.
3
+ class URI
4
+ # Raised if something other than a uri is supplied.
5
+ class InvalidURIError < StandardError
6
+ end
7
+ # Raised if an invalid method option is supplied.
8
+ class InvalidOptionError < StandardError
9
+ end
10
+
11
+ # Returns a URI object based on the parsed string.
12
+ def self.parse(uri_string)
13
+ return nil if uri_string.nil?
14
+
15
+ # If a URI object is passed, just return itself.
16
+ return uri_string if uri_string.kind_of?(self)
17
+
18
+ uri_regex =
19
+ /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
20
+ scan = uri_string.scan(uri_regex)
21
+ fragments = scan[0]
22
+ return nil if fragments.nil?
23
+ scheme = fragments[1]
24
+ authority = fragments[3]
25
+ path = fragments[4]
26
+ query = fragments[6]
27
+ fragment = fragments[8]
28
+ userinfo = nil
29
+ host = nil
30
+ port = nil
31
+ if authority != nil
32
+ userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
33
+ host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
34
+ port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
35
+ end
36
+ if port.nil? || port == ""
37
+ port = nil
38
+ end
39
+
40
+ # WARNING: Not standards-compliant, but follows the theme
41
+ # of Postel's law:
42
+ #
43
+ # Special exception for dealing with the retarded idea of the
44
+ # feed pseudo-protocol. Without this exception, the parser will read
45
+ # the URI as having a blank port number, instead of as having a second
46
+ # URI embedded within. This exception translates these broken URIs
47
+ # and instead treats the inner URI as opaque.
48
+ if scheme == "feed" && host == "http"
49
+ userinfo = nil
50
+ host = nil
51
+ port = nil
52
+ path = authority + path
53
+ end
54
+
55
+ return URI.new(scheme, userinfo, host, port, path, query, fragment)
56
+ end
57
+
58
+ # Converts a path to a file protocol URI. If the path supplied is
59
+ # relative, it will be returned as a relative URI. If the path supplied
60
+ # is actually a URI, it will return the parsed URI.
61
+ def self.convert_path(path)
62
+ return nil if path.nil?
63
+
64
+ converted_uri = path.strip
65
+ if converted_uri.length > 0 && converted_uri[0..0] == "/"
66
+ converted_uri = "file://" + converted_uri
67
+ end
68
+ if converted_uri.length > 0 &&
69
+ converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
70
+ converted_uri = "file:///" + converted_uri
71
+ end
72
+ converted_uri.gsub!(/^file:\/*/i, "file:///")
73
+ if converted_uri =~ /^file:/i
74
+ # Adjust windows-style uris
75
+ converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
76
+ converted_uri.gsub!(/\\/, '/')
77
+ converted_uri = self.parse(converted_uri).normalize
78
+ else
79
+ converted_uri = self.parse(converted_uri)
80
+ end
81
+
82
+ return converted_uri
83
+ end
84
+
85
+ # Joins several uris together.
86
+ def self.join(*uris)
87
+ uri_objects = uris.collect do |uri|
88
+ uri.kind_of?(self) ? uri : self.parse(uri.to_s)
89
+ end
90
+ result = uri_objects.shift.dup
91
+ for uri in uri_objects
92
+ result.merge!(uri)
93
+ end
94
+ return result
95
+ end
96
+
97
+ # Correctly escapes a uri.
98
+ def self.escape(uri)
99
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
100
+ return URI.new(
101
+ uri_object.scheme,
102
+ uri_object.userinfo,
103
+ uri_object.host,
104
+ uri_object.specified_port,
105
+ self.normalize_escaping(uri_object.path),
106
+ self.normalize_escaping(uri_object.query),
107
+ self.normalize_escaping(uri_object.fragment)
108
+ ).to_s
109
+ end
110
+
111
+ # Extracts uris from an arbitrary body of text.
112
+ def self.extract(text, options={})
113
+ defaults = {:base => nil, :parse => false}
114
+ options = defaults.merge(options)
115
+ raise InvalidOptionError unless (options.keys - defaults.keys).empty?
116
+ # This regular expression needs to be less forgiving or else it would
117
+ # match virtually all text. Which isn't exactly what we're going for.
118
+ extract_regex = /((([a-z\+]+):)[^ \n\<\>\"\\]+[\w\/])/
119
+ extracted_uris =
120
+ text.scan(extract_regex).collect { |match| match[0] }
121
+ sgml_extract_regex = /<[^>]+href=\"([^\"]+?)\"[^>]*>/
122
+ sgml_extracted_uris =
123
+ text.scan(sgml_extract_regex).collect { |match| match[0] }
124
+ extracted_uris.concat(sgml_extracted_uris - extracted_uris)
125
+ textile_extract_regex = /\".+?\":([^ ]+\/[^ ]+)[ \,\.\;\:\?\!\<\>\"]/i
126
+ textile_extracted_uris =
127
+ text.scan(textile_extract_regex).collect { |match| match[0] }
128
+ extracted_uris.concat(textile_extracted_uris - extracted_uris)
129
+ parsed_uris = []
130
+ base_uri = nil
131
+ if options[:base] != nil
132
+ base_uri = options[:base] if options[:base].kind_of?(self)
133
+ base_uri = self.parse(options[:base].to_s) if base_uri == nil
134
+ end
135
+ for uri_string in extracted_uris
136
+ begin
137
+ if base_uri == nil
138
+ parsed_uris << self.parse(uri_string)
139
+ else
140
+ parsed_uris << (base_uri + self.parse(uri_string))
141
+ end
142
+ rescue Exception
143
+ nil
144
+ end
145
+ end
146
+ parsed_uris.reject! do |uri|
147
+ (uri.scheme =~ /T\d+/ ||
148
+ uri.scheme == "xmlns" ||
149
+ uri.scheme == "xml" ||
150
+ uri.scheme == "thr" ||
151
+ uri.scheme == "this" ||
152
+ uri.scheme == "float" ||
153
+ uri.scheme == "user" ||
154
+ uri.scheme == "username" ||
155
+ uri.scheme == "out")
156
+ end
157
+ if options[:parse]
158
+ return parsed_uris
159
+ else
160
+ return parsed_uris.collect { |uri| uri.to_s }
161
+ end
162
+ end
163
+
164
+ # Creates a new uri object from component parts. Passing nil for
165
+ # any of these parameters is acceptable.
166
+ def initialize(scheme, userinfo, host, port, path, query, fragment)
167
+ assign_components(scheme, userinfo, host, port, path, query, fragment)
168
+ end
169
+
170
+ # Returns the scheme (protocol) for this URI.
171
+ def scheme
172
+ return nil if @scheme.nil? || @scheme.strip == ""
173
+ return @scheme
174
+ end
175
+
176
+ # Returns the username and password segment of this URI.
177
+ def userinfo
178
+ return @userinfo
179
+ end
180
+
181
+ # Returns the host for this URI.
182
+ def host
183
+ return @host
184
+ end
185
+
186
+ # Returns the authority segment of this URI.
187
+ def authority
188
+ if !defined?(@authority) || @authority.nil?
189
+ return nil if self.host.nil?
190
+ @authority = ""
191
+ if self.userinfo != nil
192
+ @authority << "#{self.userinfo}@"
193
+ end
194
+ @authority << self.host
195
+ if self.specified_port != nil
196
+ @authority << ":#{self.specified_port}"
197
+ end
198
+ end
199
+ return @authority
200
+ end
201
+
202
+ # Returns the user for this URI.
203
+ def user
204
+ if !defined?(@user) || @user.nil?
205
+ @user = nil
206
+ return @user if @userinfo.nil?
207
+ @user = @userinfo.strip.scan(/^(.*):/).flatten[0].strip
208
+ end
209
+ return @user
210
+ end
211
+
212
+ # Returns the password for this URI.
213
+ def password
214
+ if !defined?(@password) || @password.nil?
215
+ @password = nil
216
+ return @password if @userinfo.nil?
217
+ @password = @userinfo.strip.scan(/:(.*)$/).flatten[0].strip
218
+ end
219
+ return @password
220
+ end
221
+
222
+ # Returns an array of known ip-based schemes. These schemes typically
223
+ # use a similar URI form:
224
+ # //<user>:<password>@<host>:<port>/<url-path>
225
+ def self.ip_based_schemes
226
+ return self.scheme_mapping.keys
227
+ end
228
+
229
+ # Returns a hash of common IP-based schemes and their default port
230
+ # numbers. Adding new schemes to this hash, as necessary, will allow
231
+ # for better URI normalization.
232
+ def self.scheme_mapping
233
+ if !defined?(@protocol_mapping) || @protocol_mapping.nil?
234
+ @protocol_mapping = {
235
+ "http" => 80,
236
+ "https" => 443,
237
+ "ftp" => 21,
238
+ "tftp" => 69,
239
+ "ssh" => 22,
240
+ "svn+ssh" => 22,
241
+ "telnet" => 23,
242
+ "nntp" => 119,
243
+ "gopher" => 70,
244
+ "wais" => 210,
245
+ "prospero" => 1525
246
+ }
247
+ end
248
+ return @protocol_mapping
249
+ end
250
+
251
+ # Returns the port number for this URI. This method will normalize to the
252
+ # default port for the URI's scheme if the port isn't explicitly specified
253
+ # in the URI.
254
+ def port
255
+ if @port.to_i == 0
256
+ if self.scheme.nil?
257
+ @port = nil
258
+ else
259
+ @port = self.class.scheme_mapping[self.scheme.strip.downcase]
260
+ end
261
+ return @port
262
+ else
263
+ @port = @port.to_i
264
+ return @port
265
+ end
266
+ end
267
+
268
+ # Returns the port number that was actually specified in the URI string.
269
+ def specified_port
270
+ @specified_port = nil if !defined?(@specified_port)
271
+ return nil if @specified_port.nil?
272
+ port = @specified_port.to_s.to_i
273
+ if port == 0
274
+ return nil
275
+ else
276
+ return port
277
+ end
278
+ end
279
+
280
+ # Returns the path for this URI.
281
+ def path
282
+ return @path
283
+ end
284
+
285
+ # Returns the query string for this URI.
286
+ def query
287
+ return @query
288
+ end
289
+
290
+ # Returns the fragment for this URI.
291
+ def fragment
292
+ return @fragment
293
+ end
294
+
295
+ # Returns true if the URI uses an IP-based protocol.
296
+ def ip_based?
297
+ return false if self.scheme.nil?
298
+ return self.class.ip_based_schemes.include?(self.scheme.strip.downcase)
299
+ end
300
+
301
+ # Returns true if this URI is known to be relative.
302
+ def relative?
303
+ return self.scheme.nil?
304
+ end
305
+
306
+ # Returns true if this URI is known to be absolute.
307
+ def absolute?
308
+ return !relative?
309
+ end
310
+
311
+ # Joins two URIs together.
312
+ def +(uri)
313
+ if !uri.kind_of?(self.class)
314
+ uri = URI.parse(uri.to_s)
315
+ end
316
+ if uri.to_s == ""
317
+ return self.dup
318
+ end
319
+
320
+ joined_scheme = nil
321
+ joined_userinfo = nil
322
+ joined_host = nil
323
+ joined_port = nil
324
+ joined_path = nil
325
+ joined_query = nil
326
+ joined_fragment = nil
327
+
328
+ # Section 5.2.2 of RFC 3986
329
+ if uri.scheme != nil
330
+ joined_scheme = uri.scheme
331
+ joined_userinfo = uri.userinfo
332
+ joined_host = uri.host
333
+ joined_port = uri.specified_port
334
+ joined_path = self.class.normalize_path(uri.path)
335
+ joined_query = uri.query
336
+ else
337
+ if uri.authority != nil
338
+ joined_userinfo = uri.userinfo
339
+ joined_host = uri.host
340
+ joined_port = uri.specified_port
341
+ joined_path = self.class.normalize_path(uri.path)
342
+ joined_query = uri.query
343
+ else
344
+ if uri.path == nil || uri.path == ""
345
+ joined_path = self.path
346
+ if uri.query != nil
347
+ joined_query = uri.query
348
+ else
349
+ joined_query = self.query
350
+ end
351
+ else
352
+ if uri.path[0..0] == "/"
353
+ joined_path = self.class.normalize_path(uri.path)
354
+ else
355
+ base_path = self.path.nil? ? "" : self.path.dup
356
+ base_path = self.class.normalize_path(base_path)
357
+ base_path.gsub!(/\/[^\/]+$/, "/")
358
+ joined_path = self.class.normalize_path(base_path + uri.path)
359
+ end
360
+ joined_query = uri.query
361
+ end
362
+ joined_userinfo = self.userinfo
363
+ joined_host = self.host
364
+ joined_port = self.specified_port
365
+ end
366
+ joined_scheme = self.scheme
367
+ end
368
+ joined_fragment = uri.fragment
369
+
370
+ return URI.new(
371
+ joined_scheme,
372
+ joined_userinfo,
373
+ joined_host,
374
+ joined_port,
375
+ joined_path,
376
+ joined_query,
377
+ joined_fragment
378
+ )
379
+ end
380
+
381
+ # Merges two URIs together.
382
+ def merge(uri)
383
+ return self + uri
384
+ end
385
+
386
+ # Destructive form of merge.
387
+ def merge!(uri)
388
+ replace_self(self.merge(uri))
389
+ end
390
+
391
+ # Returns a normalized URI object.
392
+ #
393
+ # NOTE: This method does not attempt to conform to specifications. It
394
+ # exists largely to correct other people's failures to read the
395
+ # specifications, and also to deal with caching issues since several
396
+ # different URIs may represent the same resource and should not be
397
+ # cached multiple times.
398
+ def normalize
399
+ normalized_scheme = nil
400
+ normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
401
+ normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
402
+ if normalized_scheme == "feed"
403
+ if self.to_s =~ /^feed:\/*http:\/*/
404
+ return self.class.parse(
405
+ self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
406
+ end
407
+ end
408
+ normalized_userinfo = nil
409
+ normalized_userinfo = self.userinfo.strip if self.userinfo != nil
410
+ normalized_host = nil
411
+ normalized_host = self.host.strip.downcase if self.host != nil
412
+ if normalized_host != nil
413
+ begin
414
+ normalized_host = URI::IDNA.to_ascii(normalized_host)
415
+ rescue Exception
416
+ end
417
+ end
418
+
419
+ # Normalize IPv4 addresses that were generated with the stupid
420
+ # assumption that inet_addr() would be used to parse the IP address.
421
+ if normalized_host != nil && normalized_host.strip =~ /^\d+$/
422
+ # Decimal IPv4 address.
423
+ decimal = normalized_host.to_i
424
+ if decimal < (256 ** 4)
425
+ octets = [0,0,0,0]
426
+ octets[0] = decimal >> 24
427
+ decimal -= (octets[0] * (256 ** 3))
428
+ octets[1] = decimal >> 16
429
+ decimal -= (octets[1] * (256 ** 2))
430
+ octets[2] = decimal >> 8
431
+ decimal -= (octets[2] * (256 ** 1))
432
+ octets[3] = decimal
433
+ normalized_host = octets.join(".")
434
+ end
435
+ elsif (normalized_host != nil && normalized_host.strip =~
436
+ /^0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}$/)
437
+ # Octal IPv4 address.
438
+ octet_strings = normalized_host.split('.')
439
+ octets = []
440
+ octet_strings.each do |octet_string|
441
+ decimal = octet_string.to_i(8)
442
+ octets << decimal
443
+ end
444
+ normalized_host = octets.join(".")
445
+ elsif (normalized_host != nil && normalized_host.strip =~
446
+ /^0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}$/i)
447
+ # Hexidecimal IPv4 address.
448
+ octet_strings = normalized_host.split('.')
449
+ octets = []
450
+ octet_strings.each do |octet_string|
451
+ decimal = octet_string[2...4].to_i(16)
452
+ octets << decimal
453
+ end
454
+ normalized_host = octets.join(".")
455
+ end
456
+ normalized_port = self.port
457
+ if self.class.scheme_mapping[normalized_scheme] == normalized_port
458
+ normalized_port = nil
459
+ end
460
+ normalized_path = nil
461
+ normalized_path = self.path.strip if self.path != nil
462
+ if normalized_scheme != nil && normalized_host == nil
463
+ if self.class.ip_based_schemes.include?(normalized_scheme) &&
464
+ normalized_path =~ /[\w\.]+/
465
+ normalized_host = normalized_path
466
+ normalized_path = nil
467
+ unless normalized_host =~ /\./
468
+ normalized_host = normalized_host + ".com"
469
+ end
470
+ end
471
+ end
472
+ if normalized_path == nil &&
473
+ normalized_scheme != nil &&
474
+ normalized_host != nil
475
+ normalized_path = "/"
476
+ end
477
+ if normalized_path != nil
478
+ normalized_path = self.class.normalize_path(normalized_path)
479
+ normalized_path = self.class.normalize_escaping(normalized_path)
480
+ end
481
+ if normalized_path == ""
482
+ if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
483
+ normalized_path = "/"
484
+ end
485
+ end
486
+ normalized_path.gsub!(/%3B/, ";") if normalized_path != nil
487
+ normalized_path.gsub!(/%3A/, ":") if normalized_path != nil
488
+ normalized_path.gsub!(/%40/, "@") if normalized_path != nil
489
+ normalized_path.gsub!(/%2B/, "+") if normalized_path != nil
490
+
491
+ normalized_query = nil
492
+ normalized_query = self.query.strip if self.query != nil
493
+ normalized_query = self.class.normalize_escaping(normalized_query)
494
+ normalized_query.gsub!(/%3D/, "=") if normalized_query != nil
495
+ normalized_query.gsub!(/%26/, "&") if normalized_query != nil
496
+ normalized_query.gsub!(/%2B/, "+") if normalized_query != nil
497
+
498
+ normalized_fragment = nil
499
+ normalized_fragment = self.fragment.strip if self.fragment != nil
500
+ normalized_fragment = self.class.normalize_escaping(normalized_fragment)
501
+ return URI.new(
502
+ normalized_scheme,
503
+ normalized_userinfo,
504
+ normalized_host,
505
+ normalized_port,
506
+ normalized_path,
507
+ normalized_query,
508
+ normalized_fragment
509
+ )
510
+ end
511
+
512
+ # Destructively normalizes this URI object.
513
+ def normalize!
514
+ replace_self(self.normalize)
515
+ end
516
+
517
+ # Creates a URI suitable for display to users. If semantic attacks are
518
+ # likely, the application should try to detect these and warn the user.
519
+ # See RFC 3986 section 7.6 for more information.
520
+ def display_uri
521
+ display_uri = self.normalize
522
+ begin
523
+ display_uri.instance_variable_set("@host",
524
+ URI::IDNA.to_unicode(display_uri.host))
525
+ rescue Exception
526
+ end
527
+ return display_uri
528
+ end
529
+
530
+ # Returns true if the URI objects are equal. This method normalizes
531
+ # both URIs before doing the comparison, and allows comparison against
532
+ # strings.
533
+ def ===(uri)
534
+ uri_string = nil
535
+ if uri.respond_to?(:normalize)
536
+ uri_string = uri.normalize.to_s
537
+ else
538
+ begin
539
+ uri_string = URI.parse(uri.to_s).normalize.to_s
540
+ rescue Exception
541
+ return false
542
+ end
543
+ end
544
+ return self.normalize.to_s == uri_string
545
+ end
546
+
547
+ # Returns true if the URI objects are equal. This method normalizes
548
+ # both URIs before doing the comparison.
549
+ def ==(uri)
550
+ return false unless uri.kind_of?(self.class)
551
+ return self.normalize.to_s == uri.normalize.to_s
552
+ end
553
+
554
+ # Returns true if the URI objects are equal. This method does NOT
555
+ # normalize either URI before doing the comparison.
556
+ def eql?(uri)
557
+ return false unless uri.kind_of?(self.class)
558
+ return self.to_s == uri.to_s
559
+ end
560
+
561
+ # Clones the URI object.
562
+ def dup
563
+ duplicated_scheme = nil
564
+ duplicated_scheme = self.scheme.dup if self.scheme != nil
565
+ duplicated_userinfo = nil
566
+ duplicated_userinfo = self.userinfo.dup if self.userinfo != nil
567
+ duplicated_host = nil
568
+ duplicated_host = self.host.dup if self.host != nil
569
+ duplicated_port = self.port
570
+ duplicated_path = nil
571
+ duplicated_path = self.path.dup if self.path != nil
572
+ duplicated_query = nil
573
+ duplicated_query = self.query.dup if self.query != nil
574
+ duplicated_fragment = nil
575
+ duplicated_fragment = self.fragment.dup if self.fragment != nil
576
+ duplicated_uri = URI.new(
577
+ duplicated_scheme,
578
+ duplicated_userinfo,
579
+ duplicated_host,
580
+ duplicated_port,
581
+ duplicated_path,
582
+ duplicated_query,
583
+ duplicated_fragment
584
+ )
585
+ @specified_port = nil if !defined?(@specified_port)
586
+ duplicated_uri.instance_variable_set("@specified_port", @specified_port)
587
+ return duplicated_uri
588
+ end
589
+
590
+ # Returns the assembled URI as a string.
591
+ def to_s
592
+ uri_string = ""
593
+ if self.scheme != nil
594
+ uri_string << "#{self.scheme}:"
595
+ end
596
+ if self.authority != nil
597
+ uri_string << "//#{self.authority}"
598
+ end
599
+ if self.path != nil
600
+ uri_string << self.path
601
+ end
602
+ if self.query != nil
603
+ uri_string << "?#{self.query}"
604
+ end
605
+ if self.fragment != nil
606
+ uri_string << "##{self.fragment}"
607
+ end
608
+ return uri_string
609
+ end
610
+
611
+ # Returns a string representation of the URI object's state.
612
+ def inspect
613
+ sprintf("#<%s:%#0x URL:%s>", self.class.to_s, self.object_id, self.to_s)
614
+ end
615
+
616
+ # This module handles internationalized domain names. When Ruby has an
617
+ # implementation of nameprep, stringprep, punycode, etc, this
618
+ # module should contain an actual implementation of IDNA instead of
619
+ # returning nil if libidn can't be used.
620
+ module IDNA
621
+ # Returns the ascii representation of the label.
622
+ def self.to_ascii(label)
623
+ return nil if label.nil?
624
+ if self.use_libidn?
625
+ return IDN::Idna.toASCII(label)
626
+ else
627
+ raise NotImplementedError,
628
+ "There is no available pure-ruby implementation. " +
629
+ "Install libidn bindings."
630
+ end
631
+ end
632
+
633
+ # Returns the unicode representation of the label.
634
+ def self.to_unicode(label)
635
+ return nil if label.nil?
636
+ if self.use_libidn?
637
+ return IDN::Idna.toUnicode(label)
638
+ else
639
+ raise NotImplementedError,
640
+ "There is no available pure-ruby implementation. " +
641
+ "Install libidn bindings."
642
+ end
643
+ end
644
+
645
+ private
646
+ # Determines if the libidn bindings are available and able to be used.
647
+ def self.use_libidn?
648
+ if !defined?(@use_libidn) || @use_libidn.nil?
649
+ begin
650
+ require 'rubygems'
651
+ rescue LoadError
652
+ end
653
+ begin
654
+ require 'idn'
655
+ rescue LoadError
656
+ end
657
+ @use_libidn = !!(defined?(IDN::Idna))
658
+ end
659
+ return @use_libidn
660
+ end
661
+ end
662
+
663
+ private
664
+ # Resolves paths to their simplest form.
665
+ def self.normalize_path(path)
666
+ return nil if path.nil?
667
+ normalized_path = path.dup
668
+ previous_state = normalized_path.dup
669
+ begin
670
+ previous_state = normalized_path.dup
671
+ normalized_path.gsub!(/\/\.\//, "/")
672
+ normalized_path.gsub!(/\/\.$/, "/")
673
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
674
+ if parent != "." && parent != ".."
675
+ normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
676
+ end
677
+ parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
678
+ if parent != "." && parent != ".."
679
+ normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
680
+ end
681
+ normalized_path.gsub!(/^\.\.?\/?/, "")
682
+ normalized_path.gsub!(/^\/\.\.?\//, "/")
683
+ end until previous_state == normalized_path
684
+ return normalized_path
685
+ end
686
+
687
+ # Normalizes percent escaping of characters
688
+ def self.normalize_escaping(escaped_section)
689
+ return nil if escaped_section.nil?
690
+ normalized_section = escaped_section.dup
691
+ normalized_section.gsub!(/%[0-9a-f]{2}/i) do |sequence|
692
+ sequence[1..3].to_i(16).chr
693
+ end
694
+ if URI::IDNA.send(:use_libidn?)
695
+ normalized_section =
696
+ IDN::Stringprep.nfkc_normalize(normalized_section)
697
+ end
698
+ new_section = ""
699
+ for index in 0...normalized_section.size
700
+ if self.unreserved?(normalized_section[index]) ||
701
+ normalized_section[index] == '/'[0]
702
+ new_section << normalized_section[index..index]
703
+ else
704
+ new_section << ("%" + normalized_section[index].to_s(16).upcase)
705
+ end
706
+ end
707
+ normalized_section = new_section
708
+ return normalized_section
709
+ end
710
+
711
+ # Returns true if the specified character is unreserved.
712
+ def self.unreserved?(character)
713
+ character_string = nil
714
+ character_string = character.chr if character.respond_to?(:chr)
715
+ character_string = character[0..0] if character.kind_of?(String)
716
+ return self.unreserved.include?(character_string)
717
+ end
718
+
719
+ # Returns a list of unreserved characters.
720
+ def self.unreserved
721
+ if !defined?(@unreserved) || @unreserved.nil?
722
+ @unreserved = ["-", ".", "_", "~"]
723
+ for c in "a".."z"
724
+ @unreserved << c
725
+ @unreserved << c.upcase
726
+ end
727
+ for c in "0".."9"
728
+ @unreserved << c
729
+ end
730
+ @unreserved.sort!
731
+ end
732
+ return @unreserved
733
+ end
734
+
735
+ # Assigns the specified components to the appropriate instance variables.
736
+ # Used in destructive operations to avoid code repetition.
737
+ def assign_components(scheme, userinfo, host, port, path, query, fragment)
738
+ if scheme == nil && userinfo == nil && host == nil && port == nil &&
739
+ path == nil && query == nil && fragment == nil
740
+ raise InvalidURIError, "All parameters were nil."
741
+ end
742
+ @scheme = scheme
743
+ @userinfo = userinfo
744
+ @host = host
745
+ @specified_port = port.to_s
746
+ @port = port
747
+ @port = @port.to_s if @port.kind_of?(Fixnum)
748
+ if @port != nil && !(@port =~ /^\d+$/)
749
+ raise InvalidURIError,
750
+ "Invalid port number: #{@port.inspect}"
751
+ end
752
+ @port = @port.to_i
753
+ @port = nil if @port == 0
754
+ @path = path
755
+ @query = query
756
+ @fragment = fragment
757
+ if @scheme != nil && @host == "" && @path == ""
758
+ raise InvalidURIError,
759
+ "Absolute URI missing hierarchical segment."
760
+ end
761
+ end
762
+
763
+ # Replaces the internal state of self with the specified URI's state.
764
+ # Used in destructive operations to avoid code repetition.
765
+ def replace_self(uri)
766
+ @authority = nil
767
+ @user = nil
768
+ @password = nil
769
+
770
+ @scheme = uri.scheme
771
+ @userinfo = uri.userinfo
772
+ @host = uri.host
773
+ @specified_port = uri.instance_variable_get("@specified_port")
774
+ @port = @specified_port.to_s.to_i
775
+ @path = uri.path
776
+ @query = uri.query
777
+ @fragment = uri.fragment
778
+ return self
779
+ end
780
+ end
781
+ end