nokogiri 1.4.7-java → 1.5.0.beta.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.ja.rdoc +8 -83
- data/CHANGELOG.rdoc +6 -80
- data/Manifest.txt +4 -74
- data/README.ja.rdoc +5 -1
- data/README.rdoc +8 -22
- data/Rakefile +79 -60
- data/bin/nokogiri +1 -6
- data/deps.rip +5 -0
- data/ext/nokogiri/extconf.rb +32 -53
- data/ext/nokogiri/nokogiri.c +0 -2
- data/ext/nokogiri/nokogiri.h +0 -9
- data/ext/nokogiri/xml_document.c +0 -14
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_io.c +7 -32
- data/ext/nokogiri/xml_node.c +31 -103
- data/ext/nokogiri/xml_node_set.c +8 -8
- data/ext/nokogiri/xml_reader.c +1 -20
- data/ext/nokogiri/xml_sax_parser.c +3 -5
- data/ext/nokogiri/xml_sax_parser_context.c +0 -40
- data/ext/nokogiri/xml_xpath_context.c +2 -35
- data/ext/nokogiri/xslt_stylesheet.c +6 -124
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri.rb +7 -3
- data/lib/nokogiri/css.rb +3 -6
- data/lib/nokogiri/css/generated_parser.rb +669 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
- data/lib/nokogiri/css/parser.rb +70 -665
- data/lib/nokogiri/css/parser.y +1 -6
- data/lib/nokogiri/css/tokenizer.rb +3 -148
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +14 -16
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/html.rb +3 -2
- data/lib/nokogiri/html/document.rb +18 -134
- data/lib/nokogiri/html/document_fragment.rb +21 -26
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/sax/parser.rb +2 -6
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +4 -9
- data/lib/nokogiri/xml/attribute_decl.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +3 -27
- data/lib/nokogiri/xml/document_fragment.rb +2 -9
- data/lib/nokogiri/xml/dtd.rb +1 -12
- data/lib/nokogiri/xml/element_decl.rb +1 -1
- data/lib/nokogiri/xml/entity_decl.rb +1 -1
- data/lib/nokogiri/xml/node.rb +75 -172
- data/lib/nokogiri/xml/node/save_options.rb +0 -10
- data/lib/nokogiri/xml/node_set.rb +3 -28
- data/lib/nokogiri/xml/parse_options.rb +0 -8
- data/lib/nokogiri/xml/reader.rb +6 -44
- data/lib/nokogiri/xml/sax/document.rb +5 -9
- data/lib/nokogiri/xml/schema.rb +1 -7
- data/lib/nokogiri/xslt.rb +5 -9
- data/lib/xercesImpl.jar +0 -0
- data/tasks/cross_compile.rb +12 -27
- data/tasks/test.rb +0 -0
- data/test/css/test_parser.rb +19 -40
- data/test/css/test_tokenizer.rb +0 -8
- data/test/helper.rb +1 -4
- data/test/html/sax/test_parser.rb +21 -47
- data/test/html/sax/test_parser_context.rb +2 -2
- data/test/html/test_document.rb +3 -58
- data/test/html/test_document_encoding.rb +0 -53
- data/test/html/test_document_fragment.rb +13 -82
- data/test/html/test_element_description.rb +4 -2
- data/test/html/test_node.rb +0 -9
- data/test/test_memory_leak.rb +2 -57
- data/test/test_nokogiri.rb +14 -20
- data/test/test_reader.rb +7 -47
- data/test/test_xslt_transforms.rb +5 -8
- data/test/xml/sax/test_parser.rb +17 -34
- data/test/xml/sax/test_parser_context.rb +0 -50
- data/test/xml/sax/test_push_parser.rb +1 -18
- data/test/xml/test_attr.rb +4 -31
- data/test/xml/test_attribute_decl.rb +7 -3
- data/test/xml/test_builder.rb +5 -5
- data/test/xml/test_cdata.rb +3 -3
- data/test/xml/test_document.rb +18 -15
- data/test/xml/test_document_fragment.rb +20 -19
- data/test/xml/test_dtd.rb +13 -18
- data/test/xml/test_element_content.rb +1 -1
- data/test/xml/test_element_decl.rb +1 -1
- data/test/xml/test_entity_decl.rb +12 -10
- data/test/xml/test_namespace.rb +7 -5
- data/test/xml/test_node.rb +15 -54
- data/test/xml/test_node_reparenting.rb +42 -85
- data/test/xml/test_node_set.rb +2 -61
- data/test/xml/test_schema.rb +0 -5
- data/test/xml/test_text.rb +2 -11
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +7 -43
- metadata +442 -473
- metadata.gz.sig +0 -0
- data/.gemtest +0 -0
- data/ext/nokogiri/depend +0 -358
- data/ext/nokogiri/libcharset-1.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libiconv-2.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/nokogiri/css/parser_extras.rb +0 -91
- data/lib/nokogiri/ffi/encoding_handler.rb +0 -42
- data/lib/nokogiri/ffi/html/document.rb +0 -28
- data/lib/nokogiri/ffi/html/element_description.rb +0 -81
- data/lib/nokogiri/ffi/html/entity_lookup.rb +0 -16
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +0 -38
- data/lib/nokogiri/ffi/io_callbacks.rb +0 -42
- data/lib/nokogiri/ffi/libxml.rb +0 -420
- data/lib/nokogiri/ffi/structs/common_node.rb +0 -38
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +0 -24
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_attr.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +0 -27
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_document.rb +0 -117
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_element.rb +0 -26
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +0 -17
- data/lib/nokogiri/ffi/structs/xml_entity.rb +0 -32
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_node.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +0 -53
- data/lib/nokogiri/ffi/structs/xml_notation.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_ns.rb +0 -15
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +0 -19
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +0 -14
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +0 -51
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +0 -124
- data/lib/nokogiri/ffi/structs/xml_schema.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +0 -31
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +0 -38
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +0 -35
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +0 -13
- data/lib/nokogiri/ffi/weak_bucket.rb +0 -40
- data/lib/nokogiri/ffi/xml/attr.rb +0 -41
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +0 -27
- data/lib/nokogiri/ffi/xml/cdata.rb +0 -19
- data/lib/nokogiri/ffi/xml/comment.rb +0 -18
- data/lib/nokogiri/ffi/xml/document.rb +0 -174
- data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -21
- data/lib/nokogiri/ffi/xml/dtd.rb +0 -67
- data/lib/nokogiri/ffi/xml/element_content.rb +0 -43
- data/lib/nokogiri/ffi/xml/element_decl.rb +0 -19
- data/lib/nokogiri/ffi/xml/entity_decl.rb +0 -36
- data/lib/nokogiri/ffi/xml/entity_reference.rb +0 -19
- data/lib/nokogiri/ffi/xml/namespace.rb +0 -44
- data/lib/nokogiri/ffi/xml/node.rb +0 -559
- data/lib/nokogiri/ffi/xml/node_set.rb +0 -150
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +0 -20
- data/lib/nokogiri/ffi/xml/reader.rb +0 -236
- data/lib/nokogiri/ffi/xml/relax_ng.rb +0 -85
- data/lib/nokogiri/ffi/xml/sax/parser.rb +0 -143
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +0 -79
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +0 -51
- data/lib/nokogiri/ffi/xml/schema.rb +0 -109
- data/lib/nokogiri/ffi/xml/syntax_error.rb +0 -98
- data/lib/nokogiri/ffi/xml/text.rb +0 -18
- data/lib/nokogiri/ffi/xml/xpath.rb +0 -9
- data/lib/nokogiri/ffi/xml/xpath_context.rb +0 -153
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +0 -77
- data/test/decorators/test_slop.rb +0 -16
- data/test/ffi/test_document.rb +0 -35
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/xslt/test_custom_functions.rb +0 -94
data/lib/nokogiri/css/parser.y
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class Nokogiri::CSS::
|
1
|
+
class Nokogiri::CSS::GeneratedParser
|
2
2
|
|
3
3
|
token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
|
4
4
|
token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
|
@@ -39,9 +39,6 @@ rule
|
|
39
39
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
40
40
|
}
|
41
41
|
| function
|
42
|
-
| function pseudo {
|
43
|
-
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
44
|
-
}
|
45
42
|
| function attrib {
|
46
43
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
47
44
|
}
|
@@ -233,5 +230,3 @@ end
|
|
233
230
|
|
234
231
|
---- header
|
235
232
|
|
236
|
-
require 'nokogiri/css/parser_extras'
|
237
|
-
|
@@ -1,152 +1,7 @@
|
|
1
|
-
#--
|
2
|
-
# DO NOT MODIFY!!!!
|
3
|
-
# This file is automatically generated by rex 1.0.5
|
4
|
-
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
|
5
|
-
#++
|
6
|
-
|
7
1
|
module Nokogiri
|
8
|
-
module CSS
|
9
|
-
class Tokenizer
|
10
|
-
|
11
|
-
|
12
|
-
class ScanError < StandardError ; end
|
13
|
-
|
14
|
-
attr_reader :lineno
|
15
|
-
attr_reader :filename
|
16
|
-
attr_accessor :state
|
17
|
-
|
18
|
-
def scan_setup(str)
|
19
|
-
@ss = StringScanner.new(str)
|
20
|
-
@lineno = 1
|
21
|
-
@state = nil
|
22
|
-
end
|
23
|
-
|
24
|
-
def action
|
25
|
-
yield
|
26
|
-
end
|
27
|
-
|
28
|
-
def scan_str(str)
|
29
|
-
scan_setup(str)
|
30
|
-
do_parse
|
31
|
-
end
|
32
|
-
alias :scan :scan_str
|
33
|
-
|
34
|
-
def load_file( filename )
|
35
|
-
@filename = filename
|
36
|
-
open(filename, "r") do |f|
|
37
|
-
scan_setup(f.read)
|
2
|
+
module CSS
|
3
|
+
class Tokenizer < GeneratedTokenizer
|
4
|
+
alias :scan :scan_setup
|
38
5
|
end
|
39
6
|
end
|
40
|
-
|
41
|
-
def scan_file( filename )
|
42
|
-
load_file(filename)
|
43
|
-
do_parse
|
44
|
-
end
|
45
|
-
|
46
|
-
|
47
|
-
def next_token
|
48
|
-
return if @ss.eos?
|
49
|
-
|
50
|
-
# skips empty actions
|
51
|
-
until token = _next_token or @ss.eos?; end
|
52
|
-
token
|
53
|
-
end
|
54
|
-
|
55
|
-
def _next_token
|
56
|
-
text = @ss.peek(1)
|
57
|
-
@lineno += 1 if text == "\n"
|
58
|
-
token = case @state
|
59
|
-
when nil
|
60
|
-
case
|
61
|
-
when (text = @ss.scan(/has\([\s]*/))
|
62
|
-
action { [:HAS, text] }
|
63
|
-
|
64
|
-
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
|
65
|
-
action { [:FUNCTION, text] }
|
66
|
-
|
67
|
-
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
|
68
|
-
action { [:IDENT, text] }
|
69
|
-
|
70
|
-
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
|
71
|
-
action { [:HASH, text] }
|
72
|
-
|
73
|
-
when (text = @ss.scan(/[\s]*~=[\s]*/))
|
74
|
-
action { [:INCLUDES, text] }
|
75
|
-
|
76
|
-
when (text = @ss.scan(/[\s]*\|=[\s]*/))
|
77
|
-
action { [:DASHMATCH, text] }
|
78
|
-
|
79
|
-
when (text = @ss.scan(/[\s]*\^=[\s]*/))
|
80
|
-
action { [:PREFIXMATCH, text] }
|
81
|
-
|
82
|
-
when (text = @ss.scan(/[\s]*\$=[\s]*/))
|
83
|
-
action { [:SUFFIXMATCH, text] }
|
84
|
-
|
85
|
-
when (text = @ss.scan(/[\s]*\*=[\s]*/))
|
86
|
-
action { [:SUBSTRINGMATCH, text] }
|
87
|
-
|
88
|
-
when (text = @ss.scan(/[\s]*!=[\s]*/))
|
89
|
-
action { [:NOT_EQUAL, text] }
|
90
|
-
|
91
|
-
when (text = @ss.scan(/[\s]*=[\s]*/))
|
92
|
-
action { [:EQUAL, text] }
|
93
|
-
|
94
|
-
when (text = @ss.scan(/[\s]*\)/))
|
95
|
-
action { [:RPAREN, text] }
|
96
|
-
|
97
|
-
when (text = @ss.scan(/[\s]*\[[\s]*/))
|
98
|
-
action { [:LSQUARE, text] }
|
99
|
-
|
100
|
-
when (text = @ss.scan(/[\s]*\]/))
|
101
|
-
action { [:RSQUARE, text] }
|
102
|
-
|
103
|
-
when (text = @ss.scan(/[\s]*\+[\s]*/))
|
104
|
-
action { [:PLUS, text] }
|
105
|
-
|
106
|
-
when (text = @ss.scan(/[\s]*>[\s]*/))
|
107
|
-
action { [:GREATER, text] }
|
108
|
-
|
109
|
-
when (text = @ss.scan(/[\s]*,[\s]*/))
|
110
|
-
action { [:COMMA, text] }
|
111
|
-
|
112
|
-
when (text = @ss.scan(/[\s]*~[\s]*/))
|
113
|
-
action { [:TILDE, text] }
|
114
|
-
|
115
|
-
when (text = @ss.scan(/\:not\([\s]*/))
|
116
|
-
action { [:NOT, text] }
|
117
|
-
|
118
|
-
when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
|
119
|
-
action { [:NUMBER, text] }
|
120
|
-
|
121
|
-
when (text = @ss.scan(/[\s]*\/\/[\s]*/))
|
122
|
-
action { [:DOUBLESLASH, text] }
|
123
|
-
|
124
|
-
when (text = @ss.scan(/[\s]*\/[\s]*/))
|
125
|
-
action { [:SLASH, text] }
|
126
|
-
|
127
|
-
when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
|
128
|
-
action {[:UNICODE_RANGE, text] }
|
129
|
-
|
130
|
-
when (text = @ss.scan(/[\s]+/))
|
131
|
-
action { [:S, text] }
|
132
|
-
|
133
|
-
when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
|
134
|
-
action { [:STRING, text] }
|
135
|
-
|
136
|
-
when (text = @ss.scan(/./))
|
137
|
-
action { [text, text] }
|
138
|
-
|
139
|
-
else
|
140
|
-
text = @ss.string[@ss.pos .. -1]
|
141
|
-
raise ScanError, "can not match: '" + text + "'"
|
142
|
-
end # if
|
143
|
-
|
144
|
-
else
|
145
|
-
raise ScanError, "undefined state: '" + state.to_s + "'"
|
146
|
-
end # case state
|
147
|
-
token
|
148
|
-
end # def _next_token
|
149
|
-
|
150
|
-
end # class
|
151
|
-
end
|
152
7
|
end
|
@@ -11,25 +11,18 @@ module Nokogiri
|
|
11
11
|
'child::text()'
|
12
12
|
when /^self\(/
|
13
13
|
"self::#{node.value[1]}"
|
14
|
-
when /^eq\(/
|
15
|
-
"position() = #{node.value[1]}"
|
16
|
-
when /^(nth|nth-of-type|nth-child)\(/
|
14
|
+
when /^(eq|nth|nth-of-type|nth-child)\(/
|
17
15
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
18
16
|
an_plus_b(node.value[1])
|
19
17
|
else
|
20
|
-
"position() =
|
21
|
-
end
|
22
|
-
when /^(nth-last-child|nth-last-of-type)\(/
|
23
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
24
|
-
an_plus_b(node.value[1], :last => true)
|
25
|
-
else
|
26
|
-
index = node.value[1].to_i - 1
|
27
|
-
index == 0 ? "position() = last()" : "position() = last() - #{index}"
|
18
|
+
"position() = " + node.value[1]
|
28
19
|
end
|
29
20
|
when /^(first|first-of-type)\(/
|
30
21
|
"position() = 1"
|
31
22
|
when /^(last|last-of-type)\(/
|
32
23
|
"position() = last()"
|
24
|
+
when /^(nth-last-child|nth-last-of-type)\(/
|
25
|
+
"position() = last() - #{node.value[1]}"
|
33
26
|
when /^contains\(/
|
34
27
|
"contains(., #{node.value[1]})"
|
35
28
|
when /^gt\(/
|
@@ -55,6 +48,13 @@ module Nokogiri
|
|
55
48
|
end
|
56
49
|
end
|
57
50
|
|
51
|
+
def visit_preceding_selector node
|
52
|
+
node.value.last.accept(self) +
|
53
|
+
'[preceding-sibling::' +
|
54
|
+
node.value.first.accept(self) +
|
55
|
+
']'
|
56
|
+
end
|
57
|
+
|
58
58
|
def visit_id node
|
59
59
|
node.value.first =~ /^#(.*)$/
|
60
60
|
"@id = '#{$1}'"
|
@@ -126,7 +126,6 @@ module Nokogiri
|
|
126
126
|
{
|
127
127
|
'combinator' => ' and ',
|
128
128
|
'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
|
129
|
-
'preceding_selector' => "/following-sibling::",
|
130
129
|
'descendant_selector' => '//',
|
131
130
|
'child_selector' => '/',
|
132
131
|
}.each do |k,v|
|
@@ -151,18 +150,17 @@ module Nokogiri
|
|
151
150
|
end
|
152
151
|
|
153
152
|
private
|
154
|
-
def an_plus_b node
|
153
|
+
def an_plus_b node
|
155
154
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
156
155
|
|
157
156
|
a = node.value[0].to_i
|
158
157
|
b = node.value[3].to_i
|
159
|
-
position = options[:last] ? "(last()-position()+1)" : "position()"
|
160
158
|
|
161
159
|
if (b == 0)
|
162
|
-
return "(
|
160
|
+
return "(position() mod #{a}) = 0"
|
163
161
|
else
|
164
162
|
compare = (a < 0) ? "<=" : ">="
|
165
|
-
return "(
|
163
|
+
return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
|
166
164
|
end
|
167
165
|
end
|
168
166
|
|
@@ -7,22 +7,20 @@ module Nokogiri
|
|
7
7
|
###
|
8
8
|
# look for node with +name+. See Nokogiri.Slop
|
9
9
|
def method_missing name, *args, &block
|
10
|
-
prefix = implied_xpath_context
|
11
|
-
|
12
10
|
if args.empty?
|
13
|
-
list = xpath("
|
11
|
+
list = xpath("./#{name}")
|
14
12
|
elsif args.first.is_a? Hash
|
15
13
|
hash = args.first
|
16
14
|
if hash[:css]
|
17
15
|
list = css("#{name}#{hash[:css]}")
|
18
16
|
elsif hash[:xpath]
|
19
17
|
conds = Array(hash[:xpath]).join(' and ')
|
20
|
-
list = xpath("
|
18
|
+
list = xpath("./#{name}[#{conds}]")
|
21
19
|
end
|
22
20
|
else
|
23
21
|
CSS::Parser.without_cache do
|
24
22
|
list = xpath(
|
25
|
-
*CSS.xpath_for("#{name}#{args.first}", :prefix =>
|
23
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
|
26
24
|
)
|
27
25
|
end
|
28
26
|
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -4,6 +4,7 @@ require 'nokogiri/html/document_fragment'
|
|
4
4
|
require 'nokogiri/html/sax/parser_context'
|
5
5
|
require 'nokogiri/html/sax/parser'
|
6
6
|
require 'nokogiri/html/element_description'
|
7
|
+
require 'nokogiri/html/element_description_defaults'
|
7
8
|
|
8
9
|
module Nokogiri
|
9
10
|
class << self
|
@@ -24,8 +25,8 @@ module Nokogiri
|
|
24
25
|
|
25
26
|
####
|
26
27
|
# Parse a fragment from +string+ in to a NodeSet.
|
27
|
-
def fragment string
|
28
|
-
HTML::DocumentFragment.parse
|
28
|
+
def fragment string
|
29
|
+
HTML::DocumentFragment.parse(string)
|
29
30
|
end
|
30
31
|
end
|
31
32
|
|
@@ -3,44 +3,25 @@ module Nokogiri
|
|
3
3
|
class Document < Nokogiri::XML::Document
|
4
4
|
###
|
5
5
|
# Get the meta tag encoding for this document. If there is no meta tag,
|
6
|
-
# then nil is returned
|
6
|
+
# then nil is returned
|
7
7
|
def meta_encoding
|
8
|
-
meta =
|
9
|
-
|
8
|
+
return nil unless meta = css('meta').find { |node|
|
9
|
+
node['http-equiv'] =~ /Content-Type/i
|
10
|
+
}
|
11
|
+
|
12
|
+
/charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
|
10
13
|
end
|
11
14
|
|
12
15
|
###
|
13
|
-
# Set the meta tag encoding for this document. If there is no meta
|
14
|
-
# content tag, the encoding is not set.
|
16
|
+
# Set the meta tag encoding for this document. If there is no meta
|
17
|
+
# content tag, nil is returned and the encoding is not set.
|
15
18
|
def meta_encoding= encoding
|
16
|
-
meta =
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
def meta_content_type
|
21
|
-
css('meta[@http-equiv]').find { |node|
|
22
|
-
node['http-equiv'] =~ /\AContent-Type\z/i
|
19
|
+
return nil unless meta = css('meta').find { |node|
|
20
|
+
node['http-equiv'] =~ /Content-Type/i
|
23
21
|
}
|
24
|
-
end
|
25
|
-
private :meta_content_type
|
26
22
|
|
27
|
-
|
28
|
-
|
29
|
-
# no title tag.
|
30
|
-
def title
|
31
|
-
title = at('title') and title.inner_text
|
32
|
-
end
|
33
|
-
|
34
|
-
###
|
35
|
-
# Set the title string of this document. If there is no head
|
36
|
-
# element, the title is not set.
|
37
|
-
def title=(text)
|
38
|
-
unless title = at('title')
|
39
|
-
head = at('head') or return nil
|
40
|
-
title = Nokogiri::XML::Node.new('title', self)
|
41
|
-
head << title
|
42
|
-
end
|
43
|
-
title.children = XML::Text.new(text, self)
|
23
|
+
meta['content'] = "text/html; charset=%s" % encoding
|
24
|
+
encoding
|
44
25
|
end
|
45
26
|
|
46
27
|
####
|
@@ -57,8 +38,11 @@ module Nokogiri
|
|
57
38
|
# config.format.as_xml
|
58
39
|
# end
|
59
40
|
#
|
60
|
-
def serialize options = {}
|
61
|
-
options[:save_with] ||= XML::Node::SaveOptions::
|
41
|
+
def serialize options = {}, &block
|
42
|
+
options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
|
43
|
+
XML::Node::SaveOptions::AS_HTML |
|
44
|
+
XML::Node::SaveOptions::NO_DECLARATION |
|
45
|
+
XML::Node::SaveOptions::NO_EMPTY_TAGS
|
62
46
|
super
|
63
47
|
end
|
64
48
|
|
@@ -77,7 +61,7 @@ module Nokogiri
|
|
77
61
|
# is a number that sets options in the parser, such as
|
78
62
|
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
79
63
|
# Nokogiri::XML::ParseOptions.
|
80
|
-
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
64
|
+
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
|
81
65
|
|
82
66
|
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
83
67
|
# Give the options to the user
|
@@ -91,116 +75,16 @@ module Nokogiri
|
|
91
75
|
|
92
76
|
if string_or_io.respond_to?(:read)
|
93
77
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
94
|
-
if !encoding
|
95
|
-
# Perform advanced encoding detection that libxml2 does
|
96
|
-
# not do.
|
97
|
-
string_or_io = EncodingReader.new(string_or_io)
|
98
|
-
begin
|
99
|
-
return read_io(string_or_io, url, encoding, options.to_i)
|
100
|
-
rescue EncodingFoundException => e
|
101
|
-
# A retry is required because libxml2 has a problem in
|
102
|
-
# that it cannot switch encoding well in the middle of
|
103
|
-
# parsing, especially if it has already seen a
|
104
|
-
# non-ASCII character when it finds an encoding hint.
|
105
|
-
encoding = e.encoding
|
106
|
-
end
|
107
|
-
end
|
108
78
|
return read_io(string_or_io, url, encoding, options.to_i)
|
109
79
|
end
|
110
80
|
|
111
81
|
# read_memory pukes on empty docs
|
112
82
|
return new if string_or_io.nil? or string_or_io.empty?
|
113
83
|
|
114
|
-
if !encoding
|
115
|
-
encoding = EncodingReader.detect_encoding(string_or_io)
|
116
|
-
end
|
117
|
-
|
118
84
|
read_memory(string_or_io, url, encoding, options.to_i)
|
119
85
|
end
|
120
86
|
end
|
121
87
|
|
122
|
-
class EncodingFoundException < Exception # :nodoc:
|
123
|
-
attr_reader :encoding
|
124
|
-
|
125
|
-
def initialize(encoding)
|
126
|
-
@encoding = encoding
|
127
|
-
super("encoding found: %s" % encoding)
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
class EncodingReader # :nodoc:
|
132
|
-
class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
133
|
-
attr_reader :encoding
|
134
|
-
|
135
|
-
def found(encoding)
|
136
|
-
@encoding = encoding
|
137
|
-
throw :found
|
138
|
-
end
|
139
|
-
|
140
|
-
def not_found(encoding)
|
141
|
-
found nil
|
142
|
-
end
|
143
|
-
|
144
|
-
def start_element(name, attrs = [])
|
145
|
-
case name
|
146
|
-
when /\A(?:div|h1|img|p|br)\z/
|
147
|
-
not_found
|
148
|
-
when 'meta'
|
149
|
-
attr = Hash[attrs]
|
150
|
-
http_equiv = attr['http-equiv'] and
|
151
|
-
http_equiv.match(/\AContent-Type\z/i) and
|
152
|
-
content = attr['content'] and
|
153
|
-
m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
|
154
|
-
found m[1]
|
155
|
-
end
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
def self.detect_encoding(chunk)
|
160
|
-
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
161
|
-
return Nokogiri.XML(m[1]).encoding
|
162
|
-
|
163
|
-
handler = SAXHandler.new
|
164
|
-
parser = Nokogiri::HTML::SAX::Parser.new(handler)
|
165
|
-
catch(:found) {
|
166
|
-
parser.parse(chunk)
|
167
|
-
}
|
168
|
-
handler.encoding
|
169
|
-
rescue => e
|
170
|
-
nil
|
171
|
-
end
|
172
|
-
|
173
|
-
def initialize(io)
|
174
|
-
@io = io
|
175
|
-
@firstchunk = nil
|
176
|
-
end
|
177
|
-
|
178
|
-
def read(len)
|
179
|
-
# no support for a call without len
|
180
|
-
|
181
|
-
if !@firstchunk
|
182
|
-
@firstchunk = @io.read(len) or return nil
|
183
|
-
|
184
|
-
# This implementation expects that the first call from
|
185
|
-
# htmlReadIO() is made with a length long enough (~1KB) to
|
186
|
-
# achieve advanced encoding detection.
|
187
|
-
if encoding = EncodingReader.detect_encoding(@firstchunk)
|
188
|
-
# The first chunk is stored for the next read in retry.
|
189
|
-
raise EncodingFoundException, encoding
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
ret = @firstchunk.slice!(0, len)
|
194
|
-
if (len -= ret.length) > 0
|
195
|
-
rest = @io.read(len) and ret << rest
|
196
|
-
end
|
197
|
-
if ret.empty?
|
198
|
-
nil
|
199
|
-
else
|
200
|
-
ret
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
88
|
end
|
205
89
|
end
|
206
90
|
end
|