nokogiri 1.8.5 → 1.9.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (147) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +0 -1
  3. data/ext/nokogiri/extconf.rb +10 -6
  4. data/ext/nokogiri/xml_attr.c +26 -21
  5. data/ext/nokogiri/xml_document.c +4 -1
  6. data/ext/nokogiri/xml_namespace.c +3 -3
  7. data/ext/nokogiri/xml_namespace.h +1 -2
  8. data/ext/nokogiri/xml_node.c +55 -15
  9. data/lib/nokogiri/css/parser.rb +61 -61
  10. data/lib/nokogiri/version.rb +1 -1
  11. data/lib/nokogiri/xml/document_fragment.rb +11 -0
  12. data/lib/nokogiri/xml/node.rb +12 -0
  13. data/lib/nokogiri/xml/node_set.rb +4 -8
  14. metadata +17 -153
  15. data/.autotest +0 -22
  16. data/.cross_rubies +0 -8
  17. data/.editorconfig +0 -17
  18. data/.gemtest +0 -0
  19. data/.travis.yml +0 -63
  20. data/CHANGELOG.md +0 -1368
  21. data/CONTRIBUTING.md +0 -42
  22. data/C_CODING_STYLE.rdoc +0 -33
  23. data/Gemfile +0 -23
  24. data/Gemfile-libxml-ruby +0 -3
  25. data/Manifest.txt +0 -370
  26. data/ROADMAP.md +0 -111
  27. data/Rakefile +0 -348
  28. data/SECURITY.md +0 -19
  29. data/STANDARD_RESPONSES.md +0 -47
  30. data/Y_U_NO_GEMSPEC.md +0 -155
  31. data/appveyor.yml +0 -29
  32. data/build_all +0 -44
  33. data/patches/sort-patches-by-date +0 -25
  34. data/suppressions/README.txt +0 -1
  35. data/suppressions/nokogiri_ruby-2.supp +0 -10
  36. data/tasks/test.rb +0 -100
  37. data/test/css/test_nthiness.rb +0 -226
  38. data/test/css/test_parser.rb +0 -386
  39. data/test/css/test_tokenizer.rb +0 -215
  40. data/test/css/test_xpath_visitor.rb +0 -96
  41. data/test/decorators/test_slop.rb +0 -23
  42. data/test/files/2ch.html +0 -108
  43. data/test/files/GH_1042.html +0 -18
  44. data/test/files/address_book.rlx +0 -12
  45. data/test/files/address_book.xml +0 -10
  46. data/test/files/atom.xml +0 -344
  47. data/test/files/bar/bar.xsd +0 -4
  48. data/test/files/bogus.xml +0 -0
  49. data/test/files/dont_hurt_em_why.xml +0 -422
  50. data/test/files/encoding.html +0 -82
  51. data/test/files/encoding.xhtml +0 -84
  52. data/test/files/exslt.xml +0 -8
  53. data/test/files/exslt.xslt +0 -35
  54. data/test/files/foo/foo.xsd +0 -4
  55. data/test/files/metacharset.html +0 -10
  56. data/test/files/namespace_pressure_test.xml +0 -1684
  57. data/test/files/noencoding.html +0 -47
  58. data/test/files/po.xml +0 -32
  59. data/test/files/po.xsd +0 -66
  60. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  61. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  62. data/test/files/saml/xenc_schema.xsd +0 -146
  63. data/test/files/saml/xmldsig_schema.xsd +0 -318
  64. data/test/files/shift_jis.html +0 -10
  65. data/test/files/shift_jis.xml +0 -5
  66. data/test/files/shift_jis_no_charset.html +0 -9
  67. data/test/files/slow-xpath.xml +0 -25509
  68. data/test/files/snuggles.xml +0 -3
  69. data/test/files/staff.dtd +0 -10
  70. data/test/files/staff.xml +0 -59
  71. data/test/files/staff.xslt +0 -32
  72. data/test/files/test_document_url/bar.xml +0 -2
  73. data/test/files/test_document_url/document.dtd +0 -4
  74. data/test/files/test_document_url/document.xml +0 -6
  75. data/test/files/tlm.html +0 -851
  76. data/test/files/to_be_xincluded.xml +0 -2
  77. data/test/files/valid_bar.xml +0 -2
  78. data/test/files/xinclude.xml +0 -4
  79. data/test/helper.rb +0 -271
  80. data/test/html/sax/test_parser.rb +0 -168
  81. data/test/html/sax/test_parser_context.rb +0 -46
  82. data/test/html/sax/test_parser_text.rb +0 -163
  83. data/test/html/sax/test_push_parser.rb +0 -87
  84. data/test/html/test_attributes.rb +0 -85
  85. data/test/html/test_builder.rb +0 -164
  86. data/test/html/test_document.rb +0 -712
  87. data/test/html/test_document_encoding.rb +0 -143
  88. data/test/html/test_document_fragment.rb +0 -310
  89. data/test/html/test_element_description.rb +0 -105
  90. data/test/html/test_named_characters.rb +0 -14
  91. data/test/html/test_node.rb +0 -212
  92. data/test/html/test_node_encoding.rb +0 -91
  93. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  94. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  95. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  96. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  97. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  98. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  99. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  100. data/test/test_convert_xpath.rb +0 -135
  101. data/test/test_css_cache.rb +0 -47
  102. data/test/test_encoding_handler.rb +0 -48
  103. data/test/test_memory_leak.rb +0 -156
  104. data/test/test_nokogiri.rb +0 -138
  105. data/test/test_soap4r_sax.rb +0 -52
  106. data/test/test_xslt_transforms.rb +0 -314
  107. data/test/xml/node/test_save_options.rb +0 -28
  108. data/test/xml/node/test_subclass.rb +0 -44
  109. data/test/xml/sax/test_parser.rb +0 -402
  110. data/test/xml/sax/test_parser_context.rb +0 -115
  111. data/test/xml/sax/test_parser_text.rb +0 -202
  112. data/test/xml/sax/test_push_parser.rb +0 -265
  113. data/test/xml/test_attr.rb +0 -74
  114. data/test/xml/test_attribute_decl.rb +0 -86
  115. data/test/xml/test_builder.rb +0 -341
  116. data/test/xml/test_c14n.rb +0 -180
  117. data/test/xml/test_cdata.rb +0 -54
  118. data/test/xml/test_comment.rb +0 -40
  119. data/test/xml/test_document.rb +0 -982
  120. data/test/xml/test_document_encoding.rb +0 -31
  121. data/test/xml/test_document_fragment.rb +0 -298
  122. data/test/xml/test_dtd.rb +0 -187
  123. data/test/xml/test_dtd_encoding.rb +0 -31
  124. data/test/xml/test_element_content.rb +0 -56
  125. data/test/xml/test_element_decl.rb +0 -73
  126. data/test/xml/test_entity_decl.rb +0 -122
  127. data/test/xml/test_entity_reference.rb +0 -262
  128. data/test/xml/test_namespace.rb +0 -96
  129. data/test/xml/test_node.rb +0 -1325
  130. data/test/xml/test_node_attributes.rb +0 -115
  131. data/test/xml/test_node_encoding.rb +0 -75
  132. data/test/xml/test_node_inheritance.rb +0 -32
  133. data/test/xml/test_node_reparenting.rb +0 -592
  134. data/test/xml/test_node_set.rb +0 -809
  135. data/test/xml/test_parse_options.rb +0 -64
  136. data/test/xml/test_processing_instruction.rb +0 -30
  137. data/test/xml/test_reader.rb +0 -620
  138. data/test/xml/test_reader_encoding.rb +0 -134
  139. data/test/xml/test_relax_ng.rb +0 -60
  140. data/test/xml/test_schema.rb +0 -142
  141. data/test/xml/test_syntax_error.rb +0 -36
  142. data/test/xml/test_text.rb +0 -60
  143. data/test/xml/test_unparented_node.rb +0 -483
  144. data/test/xml/test_xinclude.rb +0 -83
  145. data/test/xml/test_xpath.rb +0 -470
  146. data/test/xslt/test_custom_functions.rb +0 -133
  147. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,2 +0,0 @@
1
- <?xml version="1.0" encoding="utf-8"?>
2
- <included>this snippet is to be included from xinclude.xml</included>
@@ -1,2 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <bar />
@@ -1,4 +0,0 @@
1
- <?xml version="1.0" encoding="utf-8"?>
2
- <test xmlns:xi="http://www.w3.org/2001/XInclude">
3
- <xi:include href="to_be_xincluded.xml"/>
4
- </test>
data/test/helper.rb DELETED
@@ -1,271 +0,0 @@
1
- #Process.setrlimit(Process::RLIMIT_CORE, Process::RLIM_INFINITY) unless RUBY_PLATFORM =~ /(java|mswin|mingw)/i
2
- $VERBOSE = true
3
- require 'minitest/autorun'
4
- require 'minitest/pride'
5
- require 'fileutils'
6
- require 'tempfile'
7
- require 'pp'
8
-
9
- require 'nokogiri'
10
-
11
- if ENV['TEST_NOKOGIRI_WITH_LIBXML_RUBY']
12
- #
13
- # if you'd like to test with the libxml-ruby gem loaded, it's
14
- # recommended that you set
15
- #
16
- # BUNDLE_GEMFILE=Gemfile-libxml-ruby
17
- #
18
- # which will a) bundle that gem, and b) set the appropriate env var to
19
- # trigger this block
20
- #
21
- require 'libxml'
22
- warn "#{__FILE__}:#{__LINE__}: loaded libxml-ruby '#{LibXML::XML::VERSION}'"
23
- end
24
-
25
- warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"
26
-
27
- module Nokogiri
28
- class TestCase < MiniTest::Spec
29
- ASSETS_DIR = File.expand_path File.join(File.dirname(__FILE__), 'files')
30
- ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
31
- ADDRESS_XML_FILE = File.join(ASSETS_DIR, 'address_book.xml')
32
- ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
33
- ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
34
- EXML_FILE = File.join(ASSETS_DIR, 'exslt.xml')
35
- EXSLT_FILE = File.join(ASSETS_DIR, 'exslt.xslt')
36
- HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
37
- METACHARSET_FILE = File.join(ASSETS_DIR, 'metacharset.html')
38
- NICH_FILE = File.join(ASSETS_DIR, '2ch.html')
39
- NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
40
- PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
41
- PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
42
- SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
43
- SHIFT_JIS_NO_CHARSET= File.join(ASSETS_DIR, 'shift_jis_no_charset.html')
44
- SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
45
- SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
46
- XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
47
- XML_XINCLUDE_FILE = File.join(ASSETS_DIR, 'xinclude.xml')
48
- XML_ATOM_FILE = File.join(ASSETS_DIR, 'atom.xml')
49
- XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
50
- XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml')
51
-
52
- def teardown
53
- if ENV['NOKOGIRI_GC']
54
- STDOUT.putc '!'
55
- if RUBY_PLATFORM =~ /java/
56
- require 'java'
57
- java.lang.System.gc
58
- else
59
- GC.start
60
- end
61
- end
62
- end
63
-
64
- def stress_memory_while &block
65
- # force the test to explicitly declare a skip
66
- raise "JRuby doesn't do GC" if Nokogiri.jruby?
67
-
68
- old_stress = GC.stress
69
- begin
70
- GC.stress = true
71
- yield
72
- ensure
73
- GC.stress = old_stress
74
- end
75
- end
76
-
77
- def assert_indent amount, doc, message = nil
78
- nodes = []
79
- doc.traverse do |node|
80
- nodes << node if node.text? && node.blank?
81
- end
82
- assert nodes.length > 0
83
- nodes.each do |node|
84
- len = node.content.gsub(/[\r\n]/, '').length
85
- assert_equal(0, len % amount, message)
86
- end
87
- end
88
-
89
- def util_decorate(document, decorator_module)
90
- document.decorators(XML::Node) << decorator_module
91
- document.decorators(XML::NodeSet) << decorator_module
92
- document.decorate!
93
- end
94
-
95
- #
96
- # Test::Unit backwards compatibility section
97
- #
98
- alias :assert_no_match :refute_match
99
- alias :assert_not_nil :refute_nil
100
- alias :assert_raise :assert_raises
101
- alias :assert_not_equal :refute_equal
102
-
103
- def assert_not_send send_ary, m = nil
104
- recv, msg, *args = send_ary
105
- m = message(m) {
106
- "Expected #{mu_pp(recv)}.#{msg}(*#{mu_pp(args)}) to return false" }
107
- assert !recv.__send__(msg, *args), m
108
- end unless method_defined?(:assert_not_send)
109
- end
110
-
111
- module SAX
112
- class TestCase < Nokogiri::TestCase
113
- class Doc < XML::SAX::Document
114
- attr_reader :start_elements, :start_document_called
115
- attr_reader :end_elements, :end_document_called
116
- attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
117
- attr_reader :errors, :warnings, :end_elements_namespace
118
- attr_reader :xmldecls
119
- attr_reader :processing_instructions
120
-
121
- def xmldecl version, encoding, standalone
122
- @xmldecls = [version, encoding, standalone].compact
123
- super
124
- end
125
-
126
- def start_document
127
- @start_document_called = true
128
- super
129
- end
130
-
131
- def end_document
132
- @end_document_called = true
133
- super
134
- end
135
-
136
- def error error
137
- (@errors ||= []) << error
138
- super
139
- end
140
-
141
- def warning warning
142
- (@warning ||= []) << warning
143
- super
144
- end
145
-
146
- def start_element *args
147
- (@start_elements ||= []) << args
148
- super
149
- end
150
-
151
- def start_element_namespace *args
152
- (@start_elements_namespace ||= []) << args
153
- super
154
- end
155
-
156
- def end_element *args
157
- (@end_elements ||= []) << args
158
- super
159
- end
160
-
161
- def end_element_namespace *args
162
- (@end_elements_namespace ||= []) << args
163
- super
164
- end
165
-
166
- def characters string
167
- @data ||= []
168
- @data += [string]
169
- super
170
- end
171
-
172
- def comment string
173
- @comments ||= []
174
- @comments += [string]
175
- super
176
- end
177
-
178
- def cdata_block string
179
- @cdata_blocks ||= []
180
- @cdata_blocks += [string]
181
- super
182
- end
183
-
184
- def processing_instruction name, content
185
- @processing_instructions ||= []
186
- @processing_instructions << [name, content]
187
- end
188
- end
189
-
190
- # This document will help us to test the strict order of items.
191
-
192
- class DocWithOrderedItems < XML::SAX::Document
193
- attr_reader :items
194
-
195
- def initialize
196
- # [
197
- # [ :method_1, argument_1, ... ],
198
- # [ :method_2, argument_2, ... ],
199
- # ...
200
- # ]
201
- @items = Items.new
202
- end
203
-
204
- [
205
- :xmldecl,
206
- :start_document, :end_document,
207
- :start_element, :end_element,
208
- :start_element_namespace, :end_element_namespace,
209
- :characters, :comment, :cdata_block,
210
- :processing_instruction,
211
- :error, :warning
212
- ]
213
- .each do |name|
214
- define_method name do |*arguments|
215
- @items << [name, *arguments]
216
- super *arguments
217
- end
218
- end
219
-
220
- class Items < Array
221
- def get_root_content root_name
222
- items = clone
223
- is_inside_root = false
224
-
225
- items.select! do |item|
226
- method_name = item[0]
227
- element_name = item[1]
228
-
229
- case method_name
230
- when :start_element, :start_element_namespace
231
- if element_name == root_name
232
- is_inside_root = true
233
- next false
234
- end
235
-
236
- when :end_element, :end_element_namespace
237
- is_inside_root = false if element_name == root_name and is_inside_root
238
- end
239
-
240
- is_inside_root
241
- end
242
-
243
- items
244
- end
245
-
246
- def select_methods(names)
247
- items = clone
248
-
249
- items.select! do |item|
250
- name = item[0]
251
- names.include? name
252
- end
253
-
254
- items
255
- end
256
-
257
- def strip_text! method_names
258
- each do |item|
259
- method_name = item[0]
260
- text = item[1]
261
-
262
- text.strip! if method_names.include? method_name
263
- end
264
-
265
- nil
266
- end
267
- end
268
- end
269
- end
270
- end
271
- end
@@ -1,168 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require "helper"
3
-
4
- module Nokogiri
5
- module HTML
6
- module SAX
7
- class TestParser < Nokogiri::SAX::TestCase
8
- def setup
9
- super
10
- @parser = HTML::SAX::Parser.new(Doc.new)
11
- end
12
-
13
- def test_parse_empty_document
14
- # This caused a segfault in libxml 2.6.x
15
- assert_nil @parser.parse ''
16
- end
17
-
18
- def test_parse_empty_file
19
- # Make sure empty files don't break stuff
20
- empty_file_name = File.join(ASSETS_DIR, 'bogus.xml')
21
- # assert_nothing_raised do
22
- @parser.parse_file empty_file_name
23
- # end
24
- end
25
-
26
- def test_parse_file
27
- @parser.parse_file(HTML_FILE)
28
-
29
- # Take a look at the comment in test_parse_document to know
30
- # a possible reason to this difference.
31
- if Nokogiri.uses_libxml?
32
- assert_equal 1111, @parser.document.end_elements.length
33
- else
34
- assert_equal 1120, @parser.document.end_elements.length
35
- end
36
- end
37
-
38
- def test_parse_file_nil_argument
39
- assert_raises(ArgumentError) {
40
- @parser.parse_file(nil)
41
- }
42
- end
43
-
44
- def test_parse_file_non_existant
45
- assert_raise Errno::ENOENT do
46
- @parser.parse_file('there_is_no_reasonable_way_this_file_exists')
47
- end
48
- end
49
-
50
- def test_parse_file_with_dir
51
- assert_raise Errno::EISDIR do
52
- @parser.parse_file(File.dirname(__FILE__))
53
- end
54
- end
55
-
56
- def test_parse_memory_nil
57
- assert_raise ArgumentError do
58
- @parser.parse_memory(nil)
59
- end
60
- end
61
-
62
- def test_parse_force_encoding
63
- @parser.parse_memory(<<-HTML, 'UTF-8')
64
- <meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
65
- Информация
66
- HTML
67
- assert_equal("Информация",
68
- @parser.document.data.join.strip)
69
- end
70
-
71
- def test_parse_document
72
- @parser.parse_memory(<<-eoxml)
73
- <p>Paragraph 1</p>
74
- <p>Paragraph 2</p>
75
- eoxml
76
-
77
- # JRuby version is different because of the internal implementation
78
- # JRuby version uses NekoHTML which inserts empty "head" elements.
79
- #
80
- # Currently following features are set:
81
- # "http://cyberneko.org/html/properties/names/elems" => "lower"
82
- # "http://cyberneko.org/html/properties/names/attrs" => "lower"
83
- if Nokogiri.uses_libxml?
84
- assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
85
- @parser.document.start_elements)
86
- else
87
- assert_equal([["html", []], ["head", []], ["body", []], ["p", []], ["p", []]],
88
- @parser.document.start_elements)
89
- end
90
- end
91
-
92
- def test_parser_attributes
93
- html = <<-eohtml
94
- <html>
95
- <head>
96
- <title>hello</title>
97
- </head>
98
- <body>
99
- <img src="face.jpg" title="daddy &amp; me">
100
- <hr noshade size="2">
101
- </body>
102
- </html>
103
- eohtml
104
-
105
- block_called = false
106
- @parser.parse(html) { |ctx|
107
- block_called = true
108
- ctx.replace_entities = true
109
- }
110
-
111
- assert block_called
112
-
113
- noshade_value = if Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO['libxml']['loaded'] < '2.7.7'
114
- ['noshade', 'noshade']
115
- else
116
- ['noshade', nil]
117
- end
118
-
119
- assert_equal [
120
- ['html', []],
121
- ['head', []],
122
- ['title', []],
123
- ['body', []],
124
- ['img', [
125
- ['src', 'face.jpg'],
126
- ['title', 'daddy & me']
127
- ]],
128
- ['hr', [
129
- noshade_value,
130
- ['size', '2']
131
- ]]
132
- ], @parser.document.start_elements
133
- end
134
-
135
- HTML_WITH_BR_TAG = <<-EOF
136
- <html>
137
- <head></head>
138
- <body>
139
- <div>
140
- hello
141
- <br>
142
- </div>
143
-
144
- <div>
145
- hello again
146
- </div>
147
- </body>
148
- </html>
149
- EOF
150
-
151
- def test_parsing_dom_error_from_string
152
- @parser.parse(HTML_WITH_BR_TAG)
153
- assert_equal 6, @parser.document.start_elements.length
154
- end
155
-
156
- def test_parsing_dom_error_from_io
157
- @parser.parse(StringIO.new(HTML_WITH_BR_TAG))
158
- assert_equal 6, @parser.document.start_elements.length
159
- end
160
-
161
-
162
- def test_empty_processing_instruction
163
- @parser.parse_memory("<strong>this will segfault<?strong>")
164
- end
165
- end
166
- end
167
- end
168
- end
@@ -1,46 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module HTML
7
- module SAX
8
- class TestParserContext < Nokogiri::SAX::TestCase
9
- def test_from_io
10
- ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
11
- assert ctx
12
- end
13
-
14
- def test_from_string
15
- ctx = ParserContext.new 'blah blah'
16
- assert ctx
17
- end
18
-
19
- def test_parse_with
20
- ctx = ParserContext.new 'blah'
21
- assert_raises ArgumentError do
22
- ctx.parse_with nil
23
- end
24
- end
25
-
26
- def test_parse_with_sax_parser
27
- # assert_nothing_raised do
28
- xml = "<root />"
29
- ctx = ParserContext.new xml
30
- parser = Parser.new Doc.new
31
- ctx.parse_with parser
32
- # end
33
- end
34
-
35
- def test_from_file
36
- # assert_nothing_raised do
37
- ctx = ParserContext.file HTML_FILE, 'UTF-8'
38
- parser = Parser.new Doc.new
39
- ctx.parse_with parser
40
- # end
41
- end
42
- end
43
- end
44
- end
45
- end
46
-
@@ -1,163 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require "helper"
3
-
4
- module Nokogiri
5
- module HTML
6
- module SAX
7
- class TestParserText < Nokogiri::SAX::TestCase
8
- def setup
9
- super
10
- @doc = DocWithOrderedItems.new
11
- @parser = HTML::SAX::Parser.new @doc
12
- end
13
-
14
- def test_texts_order
15
- html = <<-eohtml
16
- <!DOCTYPE html>
17
- <html>
18
- <head></head>
19
- <body>
20
- text 0
21
- <p>
22
- text 1
23
- <span>text 2</span>
24
- text 3
25
- </p>
26
-
27
- text 4
28
- <!--
29
- text 5
30
- -->
31
-
32
- <p>
33
- <!-- text 6 -->
34
- <span><!-- text 7 --></span>
35
- <!-- text 8 -->
36
- </p>
37
-
38
- <!-- text 9 -->
39
- </body>
40
- </html>
41
- eohtml
42
-
43
- @parser.parse html
44
- items = @doc.items.get_root_content "body"
45
- items = items.select_methods [
46
- :start_element, :end_element,
47
- :characters, :comment
48
- ]
49
- items.strip_text! [:characters, :comment]
50
-
51
- assert_equal [
52
- [:characters, 'text 0'],
53
-
54
- [:start_element, 'p', []],
55
- [:characters, 'text 1'],
56
-
57
- [:start_element, 'span', []],
58
- [:characters, 'text 2'],
59
- [:end_element, 'span'],
60
-
61
- [:characters, 'text 3'],
62
- [:end_element, 'p'],
63
-
64
- [:characters, 'text 4'],
65
- [:comment, 'text 5'],
66
- [:characters, ''],
67
-
68
- [:start_element, 'p', []],
69
- [:characters, ''],
70
- [:comment, 'text 6'],
71
- [:characters, ''],
72
-
73
- [:start_element, 'span', []],
74
- [:comment, 'text 7'],
75
- [:end_element, 'span'],
76
- [:characters, ''],
77
-
78
- [:comment, 'text 8'],
79
- [:characters, ''],
80
- [:end_element, 'p'],
81
- [:characters, ''],
82
-
83
- [:comment, 'text 9'],
84
- [:characters, '']
85
- ], items
86
-
87
- nil
88
- end
89
-
90
- def text_whitespace
91
- html = <<-eohtml
92
- <!DOCTYPE html>
93
- <html>
94
- <head></head>
95
- <body>
96
- <p>
97
- <span></span>
98
- <span> </span>
99
- <span>
100
-
101
- </span>
102
- </p>
103
- <p>
104
- <!---->
105
- <!-- -->
106
- <!--
107
-
108
- -->
109
- </p>
110
- </body>
111
- </html>
112
- eohtml
113
-
114
- @parser.parse html
115
- items = @doc.items.get_root_content "body"
116
- items = items.select_methods [
117
- :start_element, :end_element,
118
- :characters, :comment
119
- ]
120
- items.strip_text! [:characters, :comment]
121
-
122
- assert_equal [
123
- [:characters, ''],
124
- [:start_element, 'p', []],
125
-
126
- [:characters, ''],
127
- [:start_element, 'span', []],
128
- [:end_element, 'span'],
129
- [:characters, ''],
130
-
131
- [:start_element, 'span', []],
132
- [:characters, ''],
133
- [:end_element, 'span'],
134
- [:characters, ''],
135
-
136
- [:start_element, 'span', []],
137
- [:characters, ''],
138
- [:end_element, 'span'],
139
- [:characters, ''],
140
-
141
- [:end_element, 'p'],
142
- [:characters, ''],
143
-
144
- [:start_element, 'p', []],
145
- [:characters, ''],
146
-
147
- [:comment, ''],
148
- [:characters, ''],
149
- [:comment, ''],
150
- [:characters, ''],
151
- [:comment, ''],
152
- [:characters, ''],
153
-
154
- [:end_element, 'p'],
155
- [:characters, '']
156
- ], items
157
-
158
- nil
159
- end
160
- end
161
- end
162
- end
163
- end