nokogiri-maglev- 1.5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +26 -0
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +544 -0
- data/CHANGELOG.rdoc +532 -0
- data/Manifest.txt +283 -0
- data/README.ja.rdoc +106 -0
- data/README.rdoc +174 -0
- data/Rakefile +171 -0
- data/bin/nokogiri +53 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +124 -0
- data/ext/nokogiri/html_document.c +154 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +115 -0
- data/ext/nokogiri/nokogiri.h +160 -0
- data/ext/nokogiri/st.c +576 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +56 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +54 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +478 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +56 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1397 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +418 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +684 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +162 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +293 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +199 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +58 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +50 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +315 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +265 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +127 -0
- data/lib/nokogiri/css.rb +27 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +677 -0
- data/lib/nokogiri/css/parser.y +237 -0
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +152 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +171 -0
- data/lib/nokogiri/decorators/slop.rb +35 -0
- data/lib/nokogiri/html.rb +36 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +213 -0
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +52 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +88 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +426 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +234 -0
- data/lib/nokogiri/xml/document_fragment.rb +98 -0
- data/lib/nokogiri/xml/dtd.rb +22 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +915 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node_set.rb +357 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +93 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +112 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +164 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +47 -0
- data/lib/nokogiri/xml/text.rb +9 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +52 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +90 -0
- data/nokogiri_help_responses.md +40 -0
- data/tasks/cross_compile.rb +152 -0
- data/tasks/nokogiri.org.rb +18 -0
- data/tasks/test.rb +94 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +303 -0
- data/test/css/test_tokenizer.rb +198 -0
- data/test/css/test_xpath_visitor.rb +85 -0
- data/test/decorators/test_slop.rb +16 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +173 -0
- data/test/html/sax/test_parser.rb +139 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +165 -0
- data/test/html/test_document.rb +472 -0
- data/test/html/test_document_encoding.rb +138 -0
- data/test/html/test_document_fragment.rb +255 -0
- data/test/html/test_element_description.rb +101 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +193 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +72 -0
- data/test/test_nokogiri.rb +133 -0
- data/test/test_reader.rb +425 -0
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +193 -0
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +338 -0
- data/test/xml/sax/test_parser_context.rb +113 -0
- data/test/xml/sax/test_push_parser.rb +156 -0
- data/test/xml/test_attr.rb +65 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +227 -0
- data/test/xml/test_cdata.rb +50 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +697 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +192 -0
- data/test/xml/test_dtd.rb +107 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +70 -0
- data/test/xml/test_node.rb +917 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_reparenting.rb +334 -0
- data/test/xml/test_node_set.rb +742 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +94 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +47 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +237 -0
- data/test/xslt/test_custom_functions.rb +94 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- metadata +548 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class Tokenizer
|
4
|
+
|
5
|
+
macro
|
6
|
+
nl \n|\r\n|\r|\f
|
7
|
+
w [\s]*
|
8
|
+
nonascii [^\0-\177]
|
9
|
+
num -?([0-9]+|[0-9]*\.[0-9]+)
|
10
|
+
unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
|
11
|
+
|
12
|
+
escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
|
13
|
+
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
14
|
+
nmstart [_A-Za-z]|{nonascii}|{escape}
|
15
|
+
ident [-@]?({nmstart})({nmchar})*
|
16
|
+
name ({nmchar})+
|
17
|
+
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
|
18
|
+
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
|
19
|
+
string {string1}|{string2}
|
20
|
+
|
21
|
+
rule
|
22
|
+
|
23
|
+
# [:state] pattern [actions]
|
24
|
+
|
25
|
+
has\({w} { [:HAS, text] }
|
26
|
+
{ident}\({w} { [:FUNCTION, text] }
|
27
|
+
{ident} { [:IDENT, text] }
|
28
|
+
\#{name} { [:HASH, text] }
|
29
|
+
{w}~={w} { [:INCLUDES, text] }
|
30
|
+
{w}\|={w} { [:DASHMATCH, text] }
|
31
|
+
{w}\^={w} { [:PREFIXMATCH, text] }
|
32
|
+
{w}\$={w} { [:SUFFIXMATCH, text] }
|
33
|
+
{w}\*={w} { [:SUBSTRINGMATCH, text] }
|
34
|
+
{w}!={w} { [:NOT_EQUAL, text] }
|
35
|
+
{w}={w} { [:EQUAL, text] }
|
36
|
+
{w}\) { [:RPAREN, text] }
|
37
|
+
{w}\[{w} { [:LSQUARE, text] }
|
38
|
+
{w}\] { [:RSQUARE, text] }
|
39
|
+
{w}\+{w} { [:PLUS, text] }
|
40
|
+
{w}>{w} { [:GREATER, text] }
|
41
|
+
{w},{w} { [:COMMA, text] }
|
42
|
+
{w}~{w} { [:TILDE, text] }
|
43
|
+
\:not\({w} { [:NOT, text] }
|
44
|
+
{num} { [:NUMBER, text] }
|
45
|
+
{w}\/\/{w} { [:DOUBLESLASH, text] }
|
46
|
+
{w}\/{w} { [:SLASH, text] }
|
47
|
+
|
48
|
+
U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
|
49
|
+
|
50
|
+
[\s]+ { [:S, text] }
|
51
|
+
{string} { [:STRING, text] }
|
52
|
+
. { [text, text] }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module CSS
|
3
|
+
class XPathVisitor # :nodoc:
|
4
|
+
def visit_function node
|
5
|
+
# note that nth-child and nth-last-child are preprocessed in css/node.rb.
|
6
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
8
|
+
|
9
|
+
case node.value.first
|
10
|
+
when /^text\(/
|
11
|
+
'child::text()'
|
12
|
+
when /^self\(/
|
13
|
+
"self::#{node.value[1]}"
|
14
|
+
when /^eq\(/
|
15
|
+
"position() = #{node.value[1]}"
|
16
|
+
when /^(nth|nth-of-type|nth-child)\(/
|
17
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
18
|
+
an_plus_b(node.value[1])
|
19
|
+
else
|
20
|
+
"position() = #{node.value[1]}"
|
21
|
+
end
|
22
|
+
when /^(nth-last-child|nth-last-of-type)\(/
|
23
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
24
|
+
an_plus_b(node.value[1], :last => true)
|
25
|
+
else
|
26
|
+
index = node.value[1].to_i - 1
|
27
|
+
index == 0 ? "position() = last()" : "position() = last() - #{index}"
|
28
|
+
end
|
29
|
+
when /^(first|first-of-type)\(/
|
30
|
+
"position() = 1"
|
31
|
+
when /^(last|last-of-type)\(/
|
32
|
+
"position() = last()"
|
33
|
+
when /^contains\(/
|
34
|
+
"contains(., #{node.value[1]})"
|
35
|
+
when /^gt\(/
|
36
|
+
"position() > #{node.value[1]}"
|
37
|
+
when /^only-child\(/
|
38
|
+
"last() = 1"
|
39
|
+
when /^comment\(/
|
40
|
+
"comment()"
|
41
|
+
when /^has\(/
|
42
|
+
node.value[1].accept(self)
|
43
|
+
else
|
44
|
+
args = ['.'] + node.value[1..-1]
|
45
|
+
"#{node.value.first}#{args.join(', ')})"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def visit_not node
|
50
|
+
child = node.value.first
|
51
|
+
if :ELEMENT_NAME == child.type
|
52
|
+
"not(self::#{child.accept(self)})"
|
53
|
+
else
|
54
|
+
"not(#{child.accept(self)})"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def visit_id node
|
59
|
+
node.value.first =~ /^#(.*)$/
|
60
|
+
"@id = '#{$1}'"
|
61
|
+
end
|
62
|
+
|
63
|
+
def visit_attribute_condition node
|
64
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
65
|
+
''
|
66
|
+
else
|
67
|
+
'@'
|
68
|
+
end
|
69
|
+
attribute += node.value.first.accept(self)
|
70
|
+
|
71
|
+
# Support non-standard css
|
72
|
+
attribute.gsub!(/^@@/, '@')
|
73
|
+
|
74
|
+
return attribute unless node.value.length == 3
|
75
|
+
|
76
|
+
value = node.value.last
|
77
|
+
value = "'#{value}'" if value !~ /^['"]/
|
78
|
+
|
79
|
+
case node.value[1]
|
80
|
+
when :equal
|
81
|
+
attribute + " = " + "#{value}"
|
82
|
+
when :not_equal
|
83
|
+
attribute + " != " + "#{value}"
|
84
|
+
when :substring_match
|
85
|
+
"contains(#{attribute}, #{value})"
|
86
|
+
when :prefix_match
|
87
|
+
"starts-with(#{attribute}, #{value})"
|
88
|
+
when :dash_match
|
89
|
+
"#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
|
90
|
+
when :includes
|
91
|
+
"contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
|
92
|
+
when :suffix_match
|
93
|
+
"substring(#{attribute}, string-length(#{attribute}) - " +
|
94
|
+
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
95
|
+
else
|
96
|
+
attribute + " #{node.value[1]} " + "#{value}"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def visit_pseudo_class node
|
101
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
|
102
|
+
node.value.first.accept(self)
|
103
|
+
else
|
104
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
|
105
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
106
|
+
|
107
|
+
case node.value.first
|
108
|
+
when "first", "first-child" then "position() = 1"
|
109
|
+
when "last", "last-child" then "position() = last()"
|
110
|
+
when "first-of-type" then "position() = 1"
|
111
|
+
when "last-of-type" then "position() = last()"
|
112
|
+
when "only-of-type" then "last() = 1"
|
113
|
+
when "empty" then "not(node())"
|
114
|
+
when "parent" then "node()"
|
115
|
+
when "root" then "not(parent::*)"
|
116
|
+
else
|
117
|
+
node.value.first + "(.)"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def visit_class_condition node
|
123
|
+
"contains(concat(' ', @class, ' '), ' #{node.value.first} ')"
|
124
|
+
end
|
125
|
+
|
126
|
+
{
|
127
|
+
'combinator' => ' and ',
|
128
|
+
'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
|
129
|
+
'preceding_selector' => "/following-sibling::",
|
130
|
+
'descendant_selector' => '//',
|
131
|
+
'child_selector' => '/',
|
132
|
+
}.each do |k,v|
|
133
|
+
class_eval %{
|
134
|
+
def visit_#{k} node
|
135
|
+
"\#{node.value.first.accept(self)}#{v}\#{node.value.last.accept(self)}"
|
136
|
+
end
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
def visit_conditional_selector node
|
141
|
+
node.value.first.accept(self) + '[' +
|
142
|
+
node.value.last.accept(self) + ']'
|
143
|
+
end
|
144
|
+
|
145
|
+
def visit_element_name node
|
146
|
+
node.value.first
|
147
|
+
end
|
148
|
+
|
149
|
+
def accept node
|
150
|
+
node.accept(self)
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
def an_plus_b node, options={}
|
155
|
+
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
156
|
+
|
157
|
+
a = node.value[0].to_i
|
158
|
+
b = node.value[3].to_i
|
159
|
+
position = options[:last] ? "(last()-position()+1)" : "position()"
|
160
|
+
|
161
|
+
if (b == 0)
|
162
|
+
return "(#{position} mod #{a}) = 0"
|
163
|
+
else
|
164
|
+
compare = (a < 0) ? "<=" : ">="
|
165
|
+
return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
###
|
4
|
+
# The Slop decorator implements method missing such that a methods may be
|
5
|
+
# used instead of XPath or CSS. See Nokogiri.Slop
|
6
|
+
module Slop
|
7
|
+
###
|
8
|
+
# look for node with +name+. See Nokogiri.Slop
|
9
|
+
def method_missing name, *args, &block
|
10
|
+
prefix = implied_xpath_context
|
11
|
+
|
12
|
+
if args.empty?
|
13
|
+
list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
|
14
|
+
elsif args.first.is_a? Hash
|
15
|
+
hash = args.first
|
16
|
+
if hash[:css]
|
17
|
+
list = css("#{name}#{hash[:css]}")
|
18
|
+
elsif hash[:xpath]
|
19
|
+
conds = Array(hash[:xpath]).join(' and ')
|
20
|
+
list = xpath("#{prefix}#{name}[#{conds}]")
|
21
|
+
end
|
22
|
+
else
|
23
|
+
CSS::Parser.without_cache do
|
24
|
+
list = xpath(
|
25
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
super if list.empty?
|
31
|
+
list.length == 1 ? list.first : list
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'nokogiri/html/entity_lookup'
|
2
|
+
require 'nokogiri/html/document'
|
3
|
+
require 'nokogiri/html/document_fragment'
|
4
|
+
require 'nokogiri/html/sax/parser_context'
|
5
|
+
require 'nokogiri/html/sax/parser'
|
6
|
+
require 'nokogiri/html/element_description'
|
7
|
+
require 'nokogiri/html/element_description_defaults'
|
8
|
+
|
9
|
+
module Nokogiri
|
10
|
+
class << self
|
11
|
+
###
|
12
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
13
|
+
def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
14
|
+
Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module HTML
|
19
|
+
class << self
|
20
|
+
###
|
21
|
+
# Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
|
22
|
+
def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
23
|
+
Document.parse(thing, url, encoding, options, &block)
|
24
|
+
end
|
25
|
+
|
26
|
+
####
|
27
|
+
# Parse a fragment from +string+ in to a NodeSet.
|
28
|
+
def fragment string, encoding = nil
|
29
|
+
HTML::DocumentFragment.parse string, encoding
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Instance of Nokogiri::HTML::EntityLookup
|
34
|
+
NamedCharacters = EntityLookup.new
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
###
|
4
|
+
# Nokogiri HTML builder is used for building HTML documents. It is very
|
5
|
+
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
6
|
+
# documentation for Nokogiri::XML::Builder before reading this
|
7
|
+
# documentation.
|
8
|
+
#
|
9
|
+
# == Synopsis:
|
10
|
+
#
|
11
|
+
# Create an HTML document with a body that has an onload attribute, and a
|
12
|
+
# span tag with a class of "bold" that has content of "Hello world".
|
13
|
+
#
|
14
|
+
# builder = Nokogiri::HTML::Builder.new do |doc|
|
15
|
+
# doc.html {
|
16
|
+
# doc.body(:onload => 'some_func();') {
|
17
|
+
# doc.span.bold {
|
18
|
+
# doc.text "Hello world"
|
19
|
+
# }
|
20
|
+
# }
|
21
|
+
# }
|
22
|
+
# end
|
23
|
+
# puts builder.to_html
|
24
|
+
#
|
25
|
+
# The HTML builder inherits from the XML builder, so make sure to read the
|
26
|
+
# Nokogiri::XML::Builder documentation.
|
27
|
+
class Builder < Nokogiri::XML::Builder
|
28
|
+
###
|
29
|
+
# Convert the builder to HTML
|
30
|
+
def to_html
|
31
|
+
@doc.to_html
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML
|
3
|
+
class Document < Nokogiri::XML::Document
|
4
|
+
###
|
5
|
+
# Get the meta tag encoding for this document. If there is no meta tag,
|
6
|
+
# then nil is returned.
|
7
|
+
def meta_encoding
|
8
|
+
meta = meta_content_type and
|
9
|
+
/charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
|
10
|
+
end
|
11
|
+
|
12
|
+
###
|
13
|
+
# Set the meta tag encoding for this document. If there is no meta
|
14
|
+
# content tag, the encoding is not set.
|
15
|
+
def meta_encoding= encoding
|
16
|
+
meta = meta_content_type and
|
17
|
+
meta['content'] = "text/html; charset=%s" % encoding
|
18
|
+
end
|
19
|
+
|
20
|
+
def meta_content_type
|
21
|
+
css('meta[@http-equiv]').find { |node|
|
22
|
+
node['http-equiv'] =~ /\AContent-Type\z/i
|
23
|
+
}
|
24
|
+
end
|
25
|
+
private :meta_content_type
|
26
|
+
|
27
|
+
###
|
28
|
+
# Get the title string of this document. Return nil if there is
|
29
|
+
# no title tag.
|
30
|
+
def title
|
31
|
+
title = at('title') and title.inner_text
|
32
|
+
end
|
33
|
+
|
34
|
+
###
|
35
|
+
# Set the title string of this document. If there is no head
|
36
|
+
# element, the title is not set.
|
37
|
+
def title=(text)
|
38
|
+
unless title = at('title')
|
39
|
+
head = at('head') or return nil
|
40
|
+
title = Nokogiri::XML::Node.new('title', self)
|
41
|
+
head << title
|
42
|
+
end
|
43
|
+
title.children = XML::Text.new(text, self)
|
44
|
+
end
|
45
|
+
|
46
|
+
####
|
47
|
+
# Serialize Node using +options+. Save options can also be set using a
|
48
|
+
# block. See SaveOptions.
|
49
|
+
#
|
50
|
+
# These two statements are equivalent:
|
51
|
+
#
|
52
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
53
|
+
#
|
54
|
+
# or
|
55
|
+
#
|
56
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
57
|
+
# config.format.as_xml
|
58
|
+
# end
|
59
|
+
#
|
60
|
+
def serialize options = {}
|
61
|
+
options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
|
62
|
+
super
|
63
|
+
end
|
64
|
+
|
65
|
+
####
|
66
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
67
|
+
def fragment tags = nil
|
68
|
+
DocumentFragment.new(self, tags, self.root)
|
69
|
+
end
|
70
|
+
|
71
|
+
class << self
|
72
|
+
###
|
73
|
+
# Parse HTML. +string_or_io+ may be a String, or any object that
|
74
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
75
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
76
|
+
# encoding that should be used when processing the document. +options+
|
77
|
+
# is a number that sets options in the parser, such as
|
78
|
+
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
79
|
+
# Nokogiri::XML::ParseOptions.
|
80
|
+
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
81
|
+
|
82
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
83
|
+
# Give the options to the user
|
84
|
+
yield options if block_given?
|
85
|
+
|
86
|
+
if string_or_io.respond_to?(:encoding)
|
87
|
+
unless string_or_io.encoding.name == "ASCII-8BIT"
|
88
|
+
encoding ||= string_or_io.encoding.name
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
if string_or_io.respond_to?(:read)
|
93
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
94
|
+
if !encoding
|
95
|
+
# Perform advanced encoding detection that libxml2 does
|
96
|
+
# not do.
|
97
|
+
string_or_io = EncodingReader.new(string_or_io)
|
98
|
+
begin
|
99
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
100
|
+
rescue EncodingFoundException => e
|
101
|
+
# A retry is required because libxml2 has a problem in
|
102
|
+
# that it cannot switch encoding well in the middle of
|
103
|
+
# parsing, especially if it has already seen a
|
104
|
+
# non-ASCII character when it finds an encoding hint.
|
105
|
+
encoding = e.encoding
|
106
|
+
end
|
107
|
+
end
|
108
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
110
|
+
|
111
|
+
# read_memory pukes on empty docs
|
112
|
+
return new if string_or_io.nil? or string_or_io.empty?
|
113
|
+
|
114
|
+
if !encoding
|
115
|
+
encoding = EncodingReader.detect_encoding(string_or_io)
|
116
|
+
end
|
117
|
+
|
118
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class EncodingFoundException < Exception # :nodoc:
|
123
|
+
attr_reader :encoding
|
124
|
+
|
125
|
+
def initialize(encoding)
|
126
|
+
@encoding = encoding
|
127
|
+
super("encoding found: %s" % encoding)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class EncodingReader # :nodoc:
|
132
|
+
class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
133
|
+
attr_reader :encoding
|
134
|
+
|
135
|
+
def found(encoding)
|
136
|
+
@encoding = encoding
|
137
|
+
throw :found
|
138
|
+
end
|
139
|
+
|
140
|
+
def not_found(encoding)
|
141
|
+
found nil
|
142
|
+
end
|
143
|
+
|
144
|
+
def start_element(name, attrs = [])
|
145
|
+
case name
|
146
|
+
when /\A(?:div|h1|img|p|br)\z/
|
147
|
+
not_found
|
148
|
+
when 'meta'
|
149
|
+
attr = Hash[attrs]
|
150
|
+
charset = attr['charset'] and
|
151
|
+
found charset
|
152
|
+
http_equiv = attr['http-equiv'] and
|
153
|
+
http_equiv.match(/\AContent-Type\z/i) and
|
154
|
+
content = attr['content'] and
|
155
|
+
m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
|
156
|
+
found m[1]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def self.detect_encoding(chunk)
|
162
|
+
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
163
|
+
return Nokogiri.XML(m[1]).encoding
|
164
|
+
|
165
|
+
if Nokogiri.jruby?
|
166
|
+
m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
|
167
|
+
return m[4]
|
168
|
+
end
|
169
|
+
|
170
|
+
handler = SAXHandler.new
|
171
|
+
parser = Nokogiri::HTML::SAX::Parser.new(handler)
|
172
|
+
catch(:found) {
|
173
|
+
parser.parse(chunk)
|
174
|
+
}
|
175
|
+
handler.encoding
|
176
|
+
rescue
|
177
|
+
nil
|
178
|
+
end
|
179
|
+
|
180
|
+
def initialize(io)
|
181
|
+
@io = io
|
182
|
+
@firstchunk = nil
|
183
|
+
end
|
184
|
+
|
185
|
+
def read(len)
|
186
|
+
# no support for a call without len
|
187
|
+
|
188
|
+
if !@firstchunk
|
189
|
+
@firstchunk = @io.read(len) or return nil
|
190
|
+
|
191
|
+
# This implementation expects that the first call from
|
192
|
+
# htmlReadIO() is made with a length long enough (~1KB) to
|
193
|
+
# achieve advanced encoding detection.
|
194
|
+
if encoding = EncodingReader.detect_encoding(@firstchunk)
|
195
|
+
# The first chunk is stored for the next read in retry.
|
196
|
+
raise EncodingFoundException, encoding
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
ret = @firstchunk.slice!(0, len)
|
201
|
+
if (len -= ret.length) > 0
|
202
|
+
rest = @io.read(len) and ret << rest
|
203
|
+
end
|
204
|
+
if ret.empty?
|
205
|
+
nil
|
206
|
+
else
|
207
|
+
ret
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|