nokogiri 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/History.ja.txt +34 -0
- data/History.txt +36 -0
- data/Manifest.txt +21 -0
- data/README.ja.txt +1 -1
- data/README.txt +1 -1
- data/Rakefile +27 -89
- data/ext/nokogiri/extconf.rb +48 -63
- data/ext/nokogiri/html_document.c +90 -29
- data/ext/nokogiri/html_sax_parser.c +23 -2
- data/ext/nokogiri/native.c +18 -8
- data/ext/nokogiri/native.h +22 -0
- data/ext/nokogiri/xml_attr.c +83 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +84 -18
- data/ext/nokogiri/xml_document_fragment.c +38 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +2 -22
- data/ext/nokogiri/xml_entity_reference.c +41 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +10 -3
- data/ext/nokogiri/xml_io.h +1 -0
- data/ext/nokogiri/xml_node.c +116 -66
- data/ext/nokogiri/xml_node_set.c +5 -1
- data/ext/nokogiri/xml_processing_instruction.c +44 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +20 -4
- data/ext/nokogiri/xml_sax_parser.c +51 -15
- data/ext/nokogiri/xml_sax_push_parser.c +85 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +12 -8
- data/ext/nokogiri/xml_syntax_error.h +2 -1
- data/ext/nokogiri/xml_xpath_context.c +11 -2
- data/ext/nokogiri/xslt_stylesheet.c +1 -6
- data/lib/nokogiri.rb +10 -13
- data/lib/nokogiri/css.rb +1 -1
- data/lib/nokogiri/css/generated_parser.rb +287 -295
- data/lib/nokogiri/css/generated_tokenizer.rb +36 -51
- data/lib/nokogiri/css/node.rb +1 -3
- data/lib/nokogiri/css/parser.rb +21 -12
- data/lib/nokogiri/css/parser.y +55 -44
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rex +23 -32
- data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
- data/lib/nokogiri/html.rb +10 -4
- data/lib/nokogiri/html/document.rb +6 -2
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +2 -1
- data/lib/nokogiri/xml.rb +3 -1
- data/lib/nokogiri/xml/attr.rb +3 -4
- data/lib/nokogiri/xml/cdata.rb +1 -1
- data/lib/nokogiri/xml/document.rb +4 -7
- data/lib/nokogiri/xml/document_fragment.rb +9 -0
- data/lib/nokogiri/xml/dtd.rb +3 -0
- data/lib/nokogiri/xml/node.rb +144 -40
- data/lib/nokogiri/xml/node/save_options.rb +32 -0
- data/lib/nokogiri/xml/node_set.rb +11 -20
- data/lib/nokogiri/xml/processing_instruction.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +5 -0
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +47 -0
- data/lib/nokogiri/xml/syntax_error.rb +3 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/tasks/test.rb +136 -0
- data/test/css/test_parser.rb +4 -0
- data/test/css/test_tokenizer.rb +30 -17
- data/test/css/test_xpath_visitor.rb +11 -0
- data/test/helper.rb +11 -0
- data/test/hpricot/test_builder.rb +2 -9
- data/test/hpricot/test_parser.rb +4 -4
- data/test/html/test_builder.rb +7 -7
- data/test/html/test_document.rb +90 -4
- data/test/html/test_node.rb +1 -0
- data/test/test_css_cache.rb +1 -3
- data/test/test_reader.rb +19 -1
- data/test/test_xslt_transforms.rb +1 -1
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/sax/test_parser.rb +17 -0
- data/test/xml/sax/test_push_parser.rb +67 -0
- data/test/xml/test_attr.rb +16 -0
- data/test/xml/test_cdata.rb +1 -1
- data/test/xml/test_document.rb +45 -0
- data/test/xml/test_document_fragment.rb +18 -0
- data/test/xml/test_dtd.rb +2 -4
- data/test/xml/test_entity_reference.rb +16 -0
- data/test/xml/test_node.rb +149 -80
- data/test/xml/test_processing_instruction.rb +24 -0
- metadata +28 -2
data/lib/nokogiri/html.rb
CHANGED
@@ -30,7 +30,7 @@ module Nokogiri
|
|
30
30
|
def parse string_or_io, url = nil, encoding = nil, options = 2145
|
31
31
|
if string_or_io.respond_to?(:read)
|
32
32
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
33
|
-
string_or_io
|
33
|
+
return Document.read_io(string_or_io, url, encoding, options)
|
34
34
|
end
|
35
35
|
|
36
36
|
Document.read_memory(string_or_io, url, encoding, options)
|
@@ -40,13 +40,19 @@ module Nokogiri
|
|
40
40
|
# Parse a fragment from +string+ in to a NodeSet.
|
41
41
|
def fragment string
|
42
42
|
doc = parse(string)
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
fragment = XML::DocumentFragment.new(doc)
|
44
|
+
finder = lambda { |c, f|
|
45
|
+
c.each do |child|
|
46
|
+
fragment.add_child(child) if string =~ /<#{child.name}/
|
47
|
+
end
|
48
|
+
return fragment if fragment.children.length > 0
|
49
|
+
|
50
|
+
c.each do |child|
|
46
51
|
finder.call(child.children, f)
|
47
52
|
end
|
48
53
|
}
|
49
54
|
finder.call(doc.children, finder)
|
55
|
+
fragment
|
50
56
|
end
|
51
57
|
end
|
52
58
|
|
@@ -1,8 +1,12 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module HTML
|
3
3
|
class Document < XML::Document
|
4
|
-
def
|
5
|
-
|
4
|
+
def serialize encoding = nil, options = XML::Node::SaveOptions::FORMAT |
|
5
|
+
XML::Node::SaveOptions::AS_HTML |
|
6
|
+
XML::Node::SaveOptions::NO_DECLARATION |
|
7
|
+
XML::Node::SaveOptions::NO_EMPTY_TAGS
|
8
|
+
|
9
|
+
super(encoding, options)
|
6
10
|
end
|
7
11
|
end
|
8
12
|
end
|
data/lib/nokogiri/version.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
@@ -6,14 +6,16 @@ require 'nokogiri/xml/attr'
|
|
6
6
|
require 'nokogiri/xml/dtd'
|
7
7
|
require 'nokogiri/xml/text'
|
8
8
|
require 'nokogiri/xml/cdata'
|
9
|
+
require 'nokogiri/xml/processing_instruction'
|
9
10
|
require 'nokogiri/xml/comment'
|
10
11
|
require 'nokogiri/xml/document'
|
12
|
+
require 'nokogiri/xml/document_fragment'
|
11
13
|
require 'nokogiri/xml/node_set'
|
14
|
+
require 'nokogiri/xml/syntax_error'
|
12
15
|
require 'nokogiri/xml/xpath'
|
13
16
|
require 'nokogiri/xml/xpath_context'
|
14
17
|
require 'nokogiri/xml/builder'
|
15
18
|
require 'nokogiri/xml/reader'
|
16
|
-
require 'nokogiri/xml/syntax_error'
|
17
19
|
require 'nokogiri/xml/notation'
|
18
20
|
require 'nokogiri/xml/element'
|
19
21
|
require 'nokogiri/xml/entity_declaration'
|
data/lib/nokogiri/xml/attr.rb
CHANGED
data/lib/nokogiri/xml/cdata.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
3
|
class Document < Node
|
4
|
+
attr_accessor :errors
|
5
|
+
|
4
6
|
def name
|
5
7
|
'document'
|
6
8
|
end
|
@@ -39,13 +41,8 @@ module Nokogiri
|
|
39
41
|
@node_cache ||= {}
|
40
42
|
end
|
41
43
|
|
42
|
-
|
43
|
-
|
44
|
-
end
|
45
|
-
|
46
|
-
def inner_html
|
47
|
-
serialize
|
48
|
-
end
|
44
|
+
alias :to_xml :serialize
|
45
|
+
alias :inner_html :serialize
|
49
46
|
|
50
47
|
def namespaces
|
51
48
|
root ? root.collect_namespaces : {}
|
data/lib/nokogiri/xml/dtd.rb
CHANGED
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'nokogiri/xml/node/save_options'
|
3
|
+
|
1
4
|
module Nokogiri
|
2
5
|
module XML
|
3
6
|
class Node
|
@@ -23,6 +26,7 @@ module Nokogiri
|
|
23
26
|
XINCLUDE_END = 20
|
24
27
|
DOCB_DOCUMENT_NODE = 21
|
25
28
|
|
29
|
+
# The Document associated with this Node.
|
26
30
|
attr_accessor :document
|
27
31
|
|
28
32
|
###
|
@@ -76,7 +80,7 @@ module Nokogiri
|
|
76
80
|
# def regex node_set, regex
|
77
81
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
78
82
|
# end
|
79
|
-
# })
|
83
|
+
# }.new)
|
80
84
|
#
|
81
85
|
def xpath *paths
|
82
86
|
# Pop off our custom function handler if it exists
|
@@ -84,7 +88,7 @@ module Nokogiri
|
|
84
88
|
Hash, String, Symbol
|
85
89
|
].include?(paths.last.class) ? paths.pop : nil
|
86
90
|
|
87
|
-
ns = paths.last.is_a?(Hash) ? paths.pop :
|
91
|
+
ns = paths.last.is_a?(Hash) ? paths.pop : document.root.namespaces
|
88
92
|
|
89
93
|
return NodeSet.new(document) unless document.root
|
90
94
|
|
@@ -134,45 +138,72 @@ module Nokogiri
|
|
134
138
|
Hash, String, Symbol
|
135
139
|
].include?(rules.last.class) ? rules.pop : nil
|
136
140
|
|
137
|
-
ns = rules.last.is_a?(Hash) ? rules.pop :
|
141
|
+
ns = rules.last.is_a?(Hash) ? rules.pop : document.root.namespaces
|
138
142
|
|
139
143
|
rules = rules.map { |rule|
|
140
|
-
CSS.xpath_for(rule, :prefix => ".//")
|
144
|
+
CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
|
141
145
|
}.flatten.uniq + [ns, handler].compact
|
142
146
|
|
143
147
|
xpath(*rules)
|
144
148
|
end
|
145
149
|
|
146
|
-
|
150
|
+
###
|
151
|
+
# Search for the first occurrence of +path+.
|
152
|
+
# Returns nil if nothing is found, otherwise a Node.
|
153
|
+
def at path, ns = document.root.namespaces
|
147
154
|
search(path, ns).first
|
148
155
|
end
|
149
156
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
157
|
+
###
|
158
|
+
# Get the attribute value for the attribute +name+
|
159
|
+
def [](name)
|
160
|
+
return nil unless key?(name)
|
161
|
+
get(name)
|
162
|
+
end
|
163
|
+
|
164
|
+
alias :next :next_sibling
|
165
|
+
alias :previous :previous_sibling
|
166
|
+
alias :remove :unlink
|
167
|
+
alias :get_attribute :[]
|
168
|
+
alias :set_attribute :[]=
|
169
|
+
alias :text :content
|
170
|
+
alias :inner_text :content
|
171
|
+
alias :has_attribute? :key?
|
172
|
+
alias :<< :add_child
|
173
|
+
alias :name :node_name
|
174
|
+
alias :name= :node_name=
|
175
|
+
alias :type :node_type
|
176
|
+
alias :to_str :text
|
166
177
|
|
167
178
|
####
|
168
179
|
# Returns a hash containing the node's attributes. The key is the
|
169
180
|
# attribute name, the value is the string value of the attribute.
|
170
181
|
def attributes
|
171
182
|
Hash[*(attribute_nodes.map { |node|
|
172
|
-
[node.
|
183
|
+
[node.node_name, node]
|
173
184
|
}.flatten)]
|
174
185
|
end
|
175
186
|
|
187
|
+
###
|
188
|
+
# Get the attribute values for this Node.
|
189
|
+
def values
|
190
|
+
attribute_nodes.map { |node| node.value }
|
191
|
+
end
|
192
|
+
|
193
|
+
###
|
194
|
+
# Get the attribute names for this Node.
|
195
|
+
def keys
|
196
|
+
attribute_nodes.map { |node| node.node_name }
|
197
|
+
end
|
198
|
+
|
199
|
+
###
|
200
|
+
# Iterate over each attribute name and value pair for this Node.
|
201
|
+
def each &block
|
202
|
+
attribute_nodes.each { |node|
|
203
|
+
block.call(node.node_name, node.value)
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
176
207
|
###
|
177
208
|
# Remove the attribute named +name+
|
178
209
|
def remove_attribute name
|
@@ -206,20 +237,6 @@ module Nokogiri
|
|
206
237
|
end
|
207
238
|
end
|
208
239
|
|
209
|
-
def has_attribute?(property)
|
210
|
-
key? property
|
211
|
-
end
|
212
|
-
|
213
|
-
alias :get_attribute :[]
|
214
|
-
def set_attribute(name, value)
|
215
|
-
self[name] = value
|
216
|
-
end
|
217
|
-
|
218
|
-
def text
|
219
|
-
content
|
220
|
-
end
|
221
|
-
alias :inner_text :text
|
222
|
-
|
223
240
|
####
|
224
241
|
# Set the content to +string+.
|
225
242
|
# If +encode+, encode any special characters first.
|
@@ -234,10 +251,6 @@ module Nokogiri
|
|
234
251
|
parent_node
|
235
252
|
end
|
236
253
|
|
237
|
-
def << child
|
238
|
-
add_child child
|
239
|
-
end
|
240
|
-
|
241
254
|
def comment?
|
242
255
|
type == COMMENT_NODE
|
243
256
|
end
|
@@ -323,11 +336,102 @@ Node.replace requires a Node argument, and cannot accept a Document.
|
|
323
336
|
replace_with_node new_node
|
324
337
|
end
|
325
338
|
|
339
|
+
###
|
340
|
+
# Test to see if this Node is equal to +other+
|
326
341
|
def == other
|
327
342
|
return false unless other
|
328
343
|
return false unless other.respond_to?(:pointer_id)
|
329
344
|
pointer_id == other.pointer_id
|
330
345
|
end
|
346
|
+
|
347
|
+
###
|
348
|
+
# Serialize Node using +encoding+ and +save_options+. Save options
|
349
|
+
# can also be set using a block. See SaveOptions.
|
350
|
+
#
|
351
|
+
# These two statements are equivalent:
|
352
|
+
#
|
353
|
+
# node.serialize('UTF-8', FORMAT | AS_XML)
|
354
|
+
#
|
355
|
+
# or
|
356
|
+
#
|
357
|
+
# node.serialize('UTF-8') do |config|
|
358
|
+
# config.format.as_xml
|
359
|
+
# end
|
360
|
+
#
|
361
|
+
def serialize encoding = nil, save_options = SaveOptions::FORMAT, &block
|
362
|
+
io = StringIO.new
|
363
|
+
write_to io, encoding, save_options, &block
|
364
|
+
io.rewind
|
365
|
+
io.read
|
366
|
+
end
|
367
|
+
|
368
|
+
###
|
369
|
+
# Serialize this Node to HTML using +encoding+
|
370
|
+
def to_html encoding = nil
|
371
|
+
# FIXME: this is a hack around broken libxml versions
|
372
|
+
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
373
|
+
|
374
|
+
serialize(encoding, SaveOptions::FORMAT |
|
375
|
+
SaveOptions::NO_DECLARATION |
|
376
|
+
SaveOptions::NO_EMPTY_TAGS |
|
377
|
+
SaveOptions::AS_HTML)
|
378
|
+
end
|
379
|
+
|
380
|
+
###
|
381
|
+
# Serialize this Node to XML using +encoding+
|
382
|
+
def to_xml encoding = nil
|
383
|
+
serialize(encoding, SaveOptions::FORMAT | SaveOptions::AS_XML)
|
384
|
+
end
|
385
|
+
|
386
|
+
###
|
387
|
+
# Serialize this Node to XML using +encoding+
|
388
|
+
def to_xhtml encoding = nil
|
389
|
+
# FIXME: this is a hack around broken libxml versions
|
390
|
+
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
391
|
+
|
392
|
+
serialize(encoding, SaveOptions::FORMAT |
|
393
|
+
SaveOptions::NO_DECLARATION |
|
394
|
+
SaveOptions::NO_EMPTY_TAGS |
|
395
|
+
SaveOptions::AS_XHTML)
|
396
|
+
end
|
397
|
+
|
398
|
+
###
|
399
|
+
# Write Node to +io+ with +encoding+ and +save_options+
|
400
|
+
def write_to io, encoding = nil, save_options = SaveOptions::FORMAT
|
401
|
+
config = SaveOptions.new(save_options)
|
402
|
+
yield config if block_given?
|
403
|
+
|
404
|
+
native_write_to(io, encoding, config.options)
|
405
|
+
end
|
406
|
+
|
407
|
+
###
|
408
|
+
# Write Node as HTML to +io+ with +encoding+
|
409
|
+
def write_html_to io, encoding = nil
|
410
|
+
write_to io, encoding, SaveOptions::FORMAT |
|
411
|
+
SaveOptions::NO_DECLARATION |
|
412
|
+
SaveOptions::NO_EMPTY_TAGS |
|
413
|
+
SaveOptions::AS_HTML
|
414
|
+
end
|
415
|
+
|
416
|
+
###
|
417
|
+
# Write Node as XHTML to +io+ with +encoding+
|
418
|
+
def write_xhtml_to io, encoding = nil
|
419
|
+
write_to io, encoding, SaveOptions::FORMAT |
|
420
|
+
SaveOptions::NO_DECLARATION |
|
421
|
+
SaveOptions::NO_EMPTY_TAGS |
|
422
|
+
SaveOptions::AS_XHTML
|
423
|
+
end
|
424
|
+
|
425
|
+
###
|
426
|
+
# Write Node as XML to +io+ with +encoding+
|
427
|
+
def write_xml_to io, encoding = nil
|
428
|
+
write_to io, encoding, SaveOptions::FORMAT | SaveOptions::AS_XML
|
429
|
+
end
|
430
|
+
|
431
|
+
def self.new_from_str string
|
432
|
+
$stderr.puts("This method is deprecated and will be removed in 1.2.0 or by March 1, 2009. Instead, use Nokogiri::HTML.fragment()")
|
433
|
+
Nokogiri::HTML.fragment(string).first
|
434
|
+
end
|
331
435
|
end
|
332
436
|
end
|
333
437
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class Node
|
4
|
+
###
|
5
|
+
# Save options for serializing nodes
|
6
|
+
class SaveOptions
|
7
|
+
FORMAT = 1 # Format serialized xml
|
8
|
+
NO_DECLARATION = 2 # Do not include delcarations
|
9
|
+
NO_EMPTY_TAGS = 4
|
10
|
+
NO_XHTML = 8
|
11
|
+
AS_XHTML = 16
|
12
|
+
AS_XML = 32
|
13
|
+
AS_HTML = 64
|
14
|
+
|
15
|
+
attr_reader :options
|
16
|
+
def initialize options = 0; @options = options; end
|
17
|
+
constants.each do |constant|
|
18
|
+
class_eval %{
|
19
|
+
def #{constant.downcase}
|
20
|
+
@options |= #{constant}
|
21
|
+
self
|
22
|
+
end
|
23
|
+
|
24
|
+
def #{constant.downcase}?
|
25
|
+
#{constant} & @options == #{constant}
|
26
|
+
end
|
27
|
+
}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|