nokogiri-maglev- 1.5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +26 -0
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +544 -0
- data/CHANGELOG.rdoc +532 -0
- data/Manifest.txt +283 -0
- data/README.ja.rdoc +106 -0
- data/README.rdoc +174 -0
- data/Rakefile +171 -0
- data/bin/nokogiri +53 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +124 -0
- data/ext/nokogiri/html_document.c +154 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +115 -0
- data/ext/nokogiri/nokogiri.h +160 -0
- data/ext/nokogiri/st.c +576 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +56 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +54 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +478 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +56 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1397 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +418 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +684 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +162 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +293 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +199 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +58 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +50 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +315 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +265 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +127 -0
- data/lib/nokogiri/css.rb +27 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +677 -0
- data/lib/nokogiri/css/parser.y +237 -0
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +152 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +171 -0
- data/lib/nokogiri/decorators/slop.rb +35 -0
- data/lib/nokogiri/html.rb +36 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +213 -0
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +52 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +88 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +426 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +234 -0
- data/lib/nokogiri/xml/document_fragment.rb +98 -0
- data/lib/nokogiri/xml/dtd.rb +22 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +915 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node_set.rb +357 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +93 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +112 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +164 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +47 -0
- data/lib/nokogiri/xml/text.rb +9 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +52 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +90 -0
- data/nokogiri_help_responses.md +40 -0
- data/tasks/cross_compile.rb +152 -0
- data/tasks/nokogiri.org.rb +18 -0
- data/tasks/test.rb +94 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +303 -0
- data/test/css/test_tokenizer.rb +198 -0
- data/test/css/test_xpath_visitor.rb +85 -0
- data/test/decorators/test_slop.rb +16 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +173 -0
- data/test/html/sax/test_parser.rb +139 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +165 -0
- data/test/html/test_document.rb +472 -0
- data/test/html/test_document_encoding.rb +138 -0
- data/test/html/test_document_fragment.rb +255 -0
- data/test/html/test_element_description.rb +101 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +193 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +72 -0
- data/test/test_nokogiri.rb +133 -0
- data/test/test_reader.rb +425 -0
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +193 -0
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +338 -0
- data/test/xml/sax/test_parser_context.rb +113 -0
- data/test/xml/sax/test_push_parser.rb +156 -0
- data/test/xml/test_attr.rb +65 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +227 -0
- data/test/xml/test_cdata.rb +50 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +697 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +192 -0
- data/test/xml/test_dtd.rb +107 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +70 -0
- data/test/xml/test_node.rb +917 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_reparenting.rb +334 -0
- data/test/xml/test_node_set.rb +742 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +94 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +47 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +237 -0
- data/test/xslt/test_custom_functions.rb +94 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- metadata +548 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML
|
5
|
+
class TestNamedCharacters < Nokogiri::TestCase
|
6
|
+
def test_named_character
|
7
|
+
copy = NamedCharacters.get('copy')
|
8
|
+
assert_equal 169, NamedCharacters['copy']
|
9
|
+
assert_equal copy.value, NamedCharacters['copy']
|
10
|
+
assert copy.description
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
# require 'nkf' # skip Network Kanji Filter for now
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module HTML
|
7
|
+
class TestNode < Nokogiri::TestCase
|
8
|
+
def setup
|
9
|
+
super
|
10
|
+
@html = Nokogiri::HTML(<<-eohtml)
|
11
|
+
<html>
|
12
|
+
<head></head>
|
13
|
+
<body>
|
14
|
+
<div class='baz'><a href="foo" class="bar">first</a></div>
|
15
|
+
</body>
|
16
|
+
</html>
|
17
|
+
eohtml
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_to_a
|
21
|
+
assert_equal [['class', 'bar'], ['href', 'foo']],@html.at('a').to_a.sort
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_attr
|
25
|
+
node = @html.at('div.baz')
|
26
|
+
assert_equal node['class'], node.attr('class')
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_get_attribute
|
30
|
+
element = @html.at('div')
|
31
|
+
assert_equal 'baz', element.get_attribute('class')
|
32
|
+
assert_equal 'baz', element['class']
|
33
|
+
element['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
34
|
+
assert_match(/%22AGGA-KA-BOO!%22/, element.to_html)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_css_path_round_trip
|
38
|
+
doc = Nokogiri::HTML(File.read(HTML_FILE))
|
39
|
+
%w{ #header small div[2] div.post body }.each do |css_sel|
|
40
|
+
ele = doc.at css_sel
|
41
|
+
assert_equal ele, doc.at(ele.css_path), ele.css_path
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_path_round_trip
|
46
|
+
doc = Nokogiri::HTML(File.read(HTML_FILE))
|
47
|
+
%w{ #header small div[2] div.post body }.each do |css_sel|
|
48
|
+
ele = doc.at css_sel
|
49
|
+
assert_equal ele, doc.at(ele.path), ele.path
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_append_with_document
|
54
|
+
assert_raises(ArgumentError) do
|
55
|
+
@html.root << Nokogiri::HTML::Document.new
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
###
|
60
|
+
# Make sure a document that doesn't declare a meta encoding returns
|
61
|
+
# nil.
|
62
|
+
def test_meta_encoding
|
63
|
+
assert_nil @html.meta_encoding
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_description
|
67
|
+
assert desc = @html.at('a.bar').description
|
68
|
+
assert_equal 'a', desc.name
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_ancestors_with_selector
|
72
|
+
assert node = @html.at('a.bar').child
|
73
|
+
assert list = node.ancestors('.baz')
|
74
|
+
assert_equal 1, list.length
|
75
|
+
assert_equal 'div', list.first.name
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_matches_inside_fragment
|
79
|
+
fragment = DocumentFragment.new @html
|
80
|
+
fragment << XML::Node.new('a', @html)
|
81
|
+
|
82
|
+
a = fragment.children.last
|
83
|
+
assert a.matches?('a'), 'a should match'
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_css_matches?
|
87
|
+
assert node = @html.at('a.bar')
|
88
|
+
assert node.matches?('a.bar')
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_xpath_matches?
|
92
|
+
assert node = @html.at('//a')
|
93
|
+
assert node.matches?('//a')
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_unlink_then_swap
|
97
|
+
node = @html.at('a')
|
98
|
+
node.unlink
|
99
|
+
|
100
|
+
another_node = @html.at('div')
|
101
|
+
assert another_node, 'should have a node'
|
102
|
+
|
103
|
+
# This used to segv
|
104
|
+
assert_nothing_raised do
|
105
|
+
node.add_previous_sibling another_node
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_z_swap
|
110
|
+
# SEGV in spacePop from xmlParseElement
|
111
|
+
@html.at('div').swap('<a href="foo">bar</a>')
|
112
|
+
a_tag = @html.css('a').first
|
113
|
+
assert_equal 'body', a_tag.parent.name
|
114
|
+
assert_equal 0, @html.css('div').length
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_z_swap_with_regex_characters
|
118
|
+
# SEGV in spacePop from xmlParseElement
|
119
|
+
@html.at('div').swap('<a href="foo">ba)r</a>')
|
120
|
+
a_tag = @html.css('a').first
|
121
|
+
assert_equal 'ba)r', a_tag.text
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_attribute_decodes_entities
|
125
|
+
node = @html.at('div')
|
126
|
+
node['href'] = 'foo&bar'
|
127
|
+
assert_equal 'foo&bar', node['href']
|
128
|
+
node['href'] += '&baz'
|
129
|
+
assert_equal 'foo&bar&baz', node['href']
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_parse_config_option
|
133
|
+
node = @html.at('div')
|
134
|
+
options = nil
|
135
|
+
node.parse("<div></div>") do |config|
|
136
|
+
options = config
|
137
|
+
end
|
138
|
+
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_HTML, options.to_i
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_z_fragment_handler_does_not_regurge_on_invalid_attributes
|
142
|
+
# SEGV in spacePop from xmlParseElement
|
143
|
+
iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" allowtransparency></iframe>}
|
144
|
+
assert_nothing_raised { @html.at('div').fragment(iframe) }
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_z_fragment # SEGV in spacePop from xmlParseElement
|
148
|
+
fragment = @html.fragment(<<-eohtml)
|
149
|
+
hello
|
150
|
+
<div class="foo">
|
151
|
+
<p>bar</p>
|
152
|
+
</div>
|
153
|
+
world
|
154
|
+
eohtml
|
155
|
+
assert_match(/^hello/, fragment.inner_html.strip)
|
156
|
+
assert_equal 3, fragment.children.length
|
157
|
+
assert p_tag = fragment.css('p').first
|
158
|
+
assert_equal 'div', p_tag.parent.name
|
159
|
+
assert_equal 'foo', p_tag.parent['class']
|
160
|
+
end
|
161
|
+
|
162
|
+
def test_fragment_serialization
|
163
|
+
fragment = Nokogiri::HTML.fragment("<div>foo</div>")
|
164
|
+
assert_equal "<div>foo</div>", fragment.serialize.chomp
|
165
|
+
assert_equal "<div>foo</div>", fragment.to_xml.chomp
|
166
|
+
assert_equal "<div>foo</div>", fragment.inner_html
|
167
|
+
assert_equal "<div>foo</div>", fragment.to_html
|
168
|
+
assert_equal "<div>foo</div>", fragment.to_s
|
169
|
+
end
|
170
|
+
|
171
|
+
def test_to_html_does_not_contain_entities
|
172
|
+
return unless defined?(NKF) # NKF is not implemented on Rubinius as of 2009-11-23
|
173
|
+
html = NKF.nkf("-e --msdos", <<-EOH)
|
174
|
+
<html><body>
|
175
|
+
<p> test paragraph
|
176
|
+
foo bar </p>
|
177
|
+
</body></html>
|
178
|
+
EOH
|
179
|
+
nokogiri = Nokogiri::HTML.parse(html)
|
180
|
+
|
181
|
+
if RUBY_PLATFORM =~ /java/
|
182
|
+
# NKF linebreak modes are not supported as of jruby 1.2
|
183
|
+
# see http://jira.codehaus.org/browse/JRUBY-3602 for status
|
184
|
+
assert_equal "<p>testparagraph\nfoobar</p>",
|
185
|
+
nokogiri.at("p").to_html.gsub(/ /, '')
|
186
|
+
else
|
187
|
+
assert_equal "<p>testparagraph\r\nfoobar</p>",
|
188
|
+
nokogiri.at("p").to_html.gsub(/ /, '')
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "helper"
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module HTML
|
6
|
+
if RUBY_VERSION =~ /^1\.9/
|
7
|
+
class TestNodeEncoding < Nokogiri::TestCase
|
8
|
+
def test_inner_html
|
9
|
+
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
|
10
|
+
|
11
|
+
hello = "こんにちは"
|
12
|
+
|
13
|
+
contents = doc.at('h2').inner_html
|
14
|
+
assert_equal doc.encoding, contents.encoding.name
|
15
|
+
assert_match hello.encode('Shift_JIS'), contents
|
16
|
+
|
17
|
+
contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
|
18
|
+
assert_match hello, contents
|
19
|
+
|
20
|
+
doc.encoding = 'UTF-8'
|
21
|
+
contents = doc.at('h2').inner_html
|
22
|
+
assert_match hello, contents
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
class TestConvertXPath < Nokogiri::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
super
|
7
|
+
@N = Nokogiri(File.read(HTML_FILE))
|
8
|
+
end
|
9
|
+
|
10
|
+
def assert_syntactical_equivalence(hpath, xpath, match, &blk)
|
11
|
+
blk ||= lambda {|j| j.first}
|
12
|
+
assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_child_tag
|
16
|
+
assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
|
17
|
+
j.inner_text
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_child_tag_equals
|
22
|
+
assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
|
23
|
+
j.inner_text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_filter_contains
|
28
|
+
assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
|
29
|
+
"Tender Lovemaking ") do |j|
|
30
|
+
j.inner_text
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_filter_comment
|
35
|
+
assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
|
36
|
+
j.first.to_s
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_filter_text
|
41
|
+
assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
|
42
|
+
j.first.to_s
|
43
|
+
end
|
44
|
+
assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
|
45
|
+
j.first.to_s
|
46
|
+
end
|
47
|
+
assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
|
48
|
+
j.first.to_s
|
49
|
+
end
|
50
|
+
assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
|
51
|
+
j.first.inner_text
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_filter_by_attr
|
56
|
+
assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
|
57
|
+
".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
|
58
|
+
"http://blog.geminigeek.com/wordpress-theme") do |j|
|
59
|
+
j.first["href"]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_css_id
|
64
|
+
assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
|
65
|
+
j.first["id"]
|
66
|
+
end
|
67
|
+
assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
|
68
|
+
j.first["id"]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_css_class
|
73
|
+
assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
|
74
|
+
"cat-item cat-item-15") do |j|
|
75
|
+
j.first["class"]
|
76
|
+
end
|
77
|
+
assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
|
78
|
+
"cat-item cat-item-15") do |j|
|
79
|
+
j.first["class"]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_css_tags
|
84
|
+
assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
|
85
|
+
j.first.inner_text
|
86
|
+
end
|
87
|
+
assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
|
88
|
+
j.first.inner_text
|
89
|
+
end
|
90
|
+
assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
|
91
|
+
j.first.inner_text
|
92
|
+
end
|
93
|
+
assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
|
94
|
+
j.first.inner_text
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_positional
|
99
|
+
assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
|
100
|
+
j.first.inner_text.gsub(/[\r\n]/, '')
|
101
|
+
end
|
102
|
+
assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
|
103
|
+
j.first.inner_text.gsub(/[\r\n]/, '')
|
104
|
+
end
|
105
|
+
assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
|
106
|
+
j.last.inner_text
|
107
|
+
end
|
108
|
+
assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
|
109
|
+
j.last.inner_text
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_multiple_filters
|
114
|
+
assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
|
115
|
+
j.first.inner_text
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# TODO:
|
120
|
+
# doc/'title ~ link' -> links that are siblings of title
|
121
|
+
# doc/'p[@class~="final"]' -> class includes string (whitespacy)
|
122
|
+
# doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
|
123
|
+
# doc/'p[text()$="final"]' -> /final$/
|
124
|
+
# doc/'p[text()|="final"]' -> /^final$/
|
125
|
+
# doc/'p[text()^="final"]' -> string starts with 'final
|
126
|
+
# nth_first
|
127
|
+
# nth_last
|
128
|
+
# even
|
129
|
+
# odd
|
130
|
+
# first-child, nth-child, last-child, nth-last-child, nth-last-of-type
|
131
|
+
# only-of-type, only-child
|
132
|
+
# parent
|
133
|
+
# empty
|
134
|
+
# root
|
135
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
class TestCssCache < Nokogiri::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
super
|
7
|
+
@css = "a1 > b2 > c3"
|
8
|
+
@parse_result = Nokogiri::CSS.parse(@css)
|
9
|
+
@to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
|
10
|
+
Nokogiri::CSS::Parser.class_eval do
|
11
|
+
class << @cache
|
12
|
+
alias :old_bracket :[]
|
13
|
+
attr_reader :count
|
14
|
+
def [](key)
|
15
|
+
@count ||= 0
|
16
|
+
@count += 1
|
17
|
+
old_bracket(key)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
assert Nokogiri::CSS::Parser.cache_on?
|
22
|
+
end
|
23
|
+
|
24
|
+
def teardown
|
25
|
+
Nokogiri::CSS::Parser.clear_cache
|
26
|
+
Nokogiri::CSS::Parser.set_cache true
|
27
|
+
end
|
28
|
+
|
29
|
+
[ false, true ].each do |cache_setting|
|
30
|
+
define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
|
31
|
+
times = cache_setting ? 4 : nil
|
32
|
+
|
33
|
+
Nokogiri::CSS::Parser.set_cache cache_setting
|
34
|
+
|
35
|
+
Nokogiri::CSS.xpath_for(@css)
|
36
|
+
Nokogiri::CSS.xpath_for(@css)
|
37
|
+
Nokogiri::CSS::Parser.new.xpath_for(@css)
|
38
|
+
Nokogiri::CSS::Parser.new.xpath_for(@css)
|
39
|
+
|
40
|
+
assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require "helper"
|
4
|
+
|
5
|
+
class TestEncodingHandler < Nokogiri::TestCase
|
6
|
+
def teardown
|
7
|
+
Nokogiri::EncodingHandler.clear_aliases!
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_get
|
11
|
+
assert_not_nil Nokogiri::EncodingHandler['UTF-8']
|
12
|
+
assert_nil Nokogiri::EncodingHandler['alsdkjfhaldskjfh']
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_name
|
16
|
+
eh = Nokogiri::EncodingHandler['UTF-8']
|
17
|
+
assert_equal "UTF-8", eh.name
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_alias
|
21
|
+
Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-18')
|
22
|
+
assert_equal 'UTF-8', Nokogiri::EncodingHandler['UTF-18'].name
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_cleanup_aliases
|
26
|
+
assert_nil Nokogiri::EncodingHandler['UTF-9']
|
27
|
+
Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-9')
|
28
|
+
assert_not_nil Nokogiri::EncodingHandler['UTF-9']
|
29
|
+
|
30
|
+
Nokogiri::EncodingHandler.clear_aliases!
|
31
|
+
assert_nil Nokogiri::EncodingHandler['UTF-9']
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_delete
|
35
|
+
assert_nil Nokogiri::EncodingHandler['UTF-9']
|
36
|
+
Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-9')
|
37
|
+
assert_not_nil Nokogiri::EncodingHandler['UTF-9']
|
38
|
+
|
39
|
+
Nokogiri::EncodingHandler.delete 'UTF-9'
|
40
|
+
assert_nil Nokogiri::EncodingHandler['UTF-9']
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_delete_non_existent
|
44
|
+
assert_nil Nokogiri::EncodingHandler.delete('UTF-9')
|
45
|
+
end
|
46
|
+
end
|