nokogiri 1.2.3 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +14 -2
- data/CHANGELOG.ja.rdoc +38 -0
- data/CHANGELOG.rdoc +43 -0
- data/Manifest.txt +80 -5
- data/README.ja.rdoc +12 -11
- data/README.rdoc +4 -2
- data/Rakefile +103 -173
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +19 -13
- data/ext/nokogiri/html_document.c +39 -3
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.h +1 -1
- data/ext/nokogiri/html_sax_parser.h +1 -1
- data/ext/nokogiri/{native.c → nokogiri.c} +11 -3
- data/ext/nokogiri/{native.h → nokogiri.h} +18 -4
- data/ext/nokogiri/xml_attr.c +14 -5
- data/ext/nokogiri/xml_attr.h +1 -1
- data/ext/nokogiri/xml_cdata.c +15 -6
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +13 -4
- data/ext/nokogiri/xml_comment.h +1 -1
- data/ext/nokogiri/xml_document.c +50 -41
- data/ext/nokogiri/xml_document.h +1 -1
- data/ext/nokogiri/xml_document_fragment.c +12 -4
- data/ext/nokogiri/xml_document_fragment.h +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_dtd.h +1 -1
- data/ext/nokogiri/xml_entity_reference.c +13 -4
- data/ext/nokogiri/xml_entity_reference.h +1 -1
- data/ext/nokogiri/xml_io.h +1 -1
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +232 -124
- data/ext/nokogiri/xml_node.h +3 -4
- data/ext/nokogiri/xml_node_set.c +206 -19
- data/ext/nokogiri/xml_node_set.h +1 -1
- data/ext/nokogiri/xml_processing_instruction.c +14 -4
- data/ext/nokogiri/xml_processing_instruction.h +1 -1
- data/ext/nokogiri/xml_reader.c +87 -7
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +122 -2
- data/ext/nokogiri/xml_sax_parser.h +1 -1
- data/ext/nokogiri/xml_sax_push_parser.c +1 -0
- data/ext/nokogiri/xml_sax_push_parser.h +1 -1
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_text.c +10 -3
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath.h +1 -1
- data/ext/nokogiri/xml_xpath_context.h +1 -1
- data/ext/nokogiri/xslt_stylesheet.c +29 -16
- data/ext/nokogiri/xslt_stylesheet.h +1 -1
- data/lib/action-nokogiri.rb +7 -1
- data/lib/nokogiri.rb +21 -5
- data/lib/nokogiri/css/generated_parser.rb +49 -14
- data/lib/nokogiri/css/generated_tokenizer.rb +2 -2
- data/lib/nokogiri/css/node.rb +13 -3
- data/lib/nokogiri/css/parser.rb +8 -0
- data/lib/nokogiri/css/parser.y +7 -7
- data/lib/nokogiri/css/tokenizer.rb +2 -0
- data/lib/nokogiri/css/xpath_visitor.rb +10 -6
- data/lib/nokogiri/decorators/hpricot/node.rb +1 -1
- data/lib/nokogiri/decorators/hpricot/node_set.rb +2 -2
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +2 -0
- data/lib/nokogiri/decorators/slop.rb +3 -1
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +14 -3
- data/lib/nokogiri/html.rb +11 -46
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +62 -6
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +2 -0
- data/lib/nokogiri/html/sax/parser.rb +27 -1
- data/lib/nokogiri/version.rb +26 -1
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +25 -51
- data/lib/nokogiri/xml/builder.rb +166 -10
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/document.rb +39 -6
- data/lib/nokogiri/xml/document_fragment.rb +41 -1
- data/lib/nokogiri/xml/dtd.rb +3 -1
- data/lib/nokogiri/xml/entity_declaration.rb +3 -1
- data/lib/nokogiri/xml/fragment_handler.rb +24 -3
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +314 -65
- data/lib/nokogiri/xml/node/save_options.rb +12 -2
- data/lib/nokogiri/xml/node_set.rb +58 -8
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -0
- data/lib/nokogiri/xml/reader.rb +42 -3
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +0 -7
- data/lib/nokogiri/xml/sax/document.rb +84 -0
- data/lib/nokogiri/xml/sax/parser.rb +38 -2
- data/lib/nokogiri/xml/sax/push_parser.rb +12 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +1 -1
- data/lib/nokogiri/xml/xpath_context.rb +2 -0
- data/lib/nokogiri/xslt.rb +21 -1
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/xsd/xmlparser/nokogiri.rb +12 -2
- data/tasks/test.rb +42 -19
- data/test/css/test_parser.rb +29 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/helper.rb +38 -8
- data/test/html/sax/test_parser.rb +12 -0
- data/test/html/test_builder.rb +25 -2
- data/test/html/test_document.rb +91 -20
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_node.rb +66 -3
- data/test/test_convert_xpath.rb +1 -1
- data/test/test_memory_leak.rb +57 -18
- data/test/test_nokogiri.rb +24 -2
- data/test/test_reader.rb +77 -0
- data/test/test_xslt_transforms.rb +120 -82
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +9 -0
- data/test/xml/sax/test_push_parser.rb +24 -0
- data/test/xml/test_attr.rb +7 -0
- data/test/xml/test_builder.rb +48 -0
- data/test/xml/test_cdata.rb +19 -0
- data/test/xml/test_comment.rb +6 -0
- data/test/xml/test_document.rb +101 -2
- data/test/xml/test_document_fragment.rb +55 -3
- data/test/xml/test_entity_reference.rb +4 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +255 -8
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +9 -2
- data/test/xml/test_node_set.rb +197 -1
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +5 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +5 -0
- data/test/xml/test_unparented_node.rb +3 -3
- metadata +128 -12
- data/lib/nokogiri/xml/comment.rb +0 -6
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/text.rb +0 -6
data/lib/nokogiri/xml/cdata.rb
CHANGED
@@ -11,6 +11,9 @@ module Nokogiri
|
|
11
11
|
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
12
12
|
attr_accessor :errors
|
13
13
|
|
14
|
+
def initialize *args
|
15
|
+
end
|
16
|
+
|
14
17
|
# The name of this document. Always returns "document"
|
15
18
|
def name
|
16
19
|
'document'
|
@@ -22,7 +25,7 @@ module Nokogiri
|
|
22
25
|
end
|
23
26
|
|
24
27
|
# Get the list of decorators given +key+
|
25
|
-
def decorators
|
28
|
+
def decorators key
|
26
29
|
@decorators ||= Hash.new
|
27
30
|
@decorators[key] ||= []
|
28
31
|
end
|
@@ -40,7 +43,7 @@ module Nokogiri
|
|
40
43
|
|
41
44
|
###
|
42
45
|
# Apply any decorators to +node+
|
43
|
-
def decorate
|
46
|
+
def decorate node
|
44
47
|
return unless @decorators
|
45
48
|
@decorators.each { |klass,list|
|
46
49
|
next unless node.is_a?(klass)
|
@@ -48,10 +51,6 @@ module Nokogiri
|
|
48
51
|
}
|
49
52
|
end
|
50
53
|
|
51
|
-
def node_cache # :nodoc:
|
52
|
-
@node_cache ||= {}
|
53
|
-
end
|
54
|
-
|
55
54
|
alias :to_xml :serialize
|
56
55
|
alias :inner_html :serialize
|
57
56
|
|
@@ -60,7 +59,41 @@ module Nokogiri
|
|
60
59
|
root ? root.collect_namespaces : {}
|
61
60
|
end
|
62
61
|
|
62
|
+
####
|
63
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
64
|
+
def fragment tags
|
65
|
+
DocumentFragment.new(self, tags)
|
66
|
+
end
|
67
|
+
|
63
68
|
undef_method :swap, :parent, :namespace
|
69
|
+
|
70
|
+
class << self
|
71
|
+
###
|
72
|
+
# Parse an XML file. +thing+ may be a String, or any object that
|
73
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
74
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
75
|
+
# encoding that should be used when processing the document. +options+
|
76
|
+
# is a number that sets options in the parser, such as
|
77
|
+
# Nokogiri::XML::PARSE_RECOVER. See the constants in
|
78
|
+
# Nokogiri::XML.
|
79
|
+
def parse string_or_io, url = nil, encoding = nil, options = 2145, &block
|
80
|
+
|
81
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
82
|
+
# Give the options to the user
|
83
|
+
yield options if block_given?
|
84
|
+
|
85
|
+
if string_or_io.respond_to?(:read)
|
86
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
87
|
+
return self.read_io(string_or_io, url, encoding, options.to_i)
|
88
|
+
end
|
89
|
+
|
90
|
+
# read_memory pukes on empty docs
|
91
|
+
return self.new if string_or_io.nil? or string_or_io.empty?
|
92
|
+
|
93
|
+
self.read_memory(string_or_io, url, encoding, options.to_i)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
64
97
|
end
|
65
98
|
end
|
66
99
|
end
|
@@ -1,9 +1,49 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
|
-
class DocumentFragment < Node
|
3
|
+
class DocumentFragment < Nokogiri::XML::Node
|
4
|
+
def initialize document, tags=nil
|
5
|
+
if tags
|
6
|
+
parser = if self.kind_of?(Nokogiri::HTML::DocumentFragment)
|
7
|
+
HTML::SAX::Parser.new(FragmentHandler.new(self, tags))
|
8
|
+
else
|
9
|
+
XML::SAX::Parser.new(FragmentHandler.new(self, tags))
|
10
|
+
end
|
11
|
+
parser.parse(tags)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
# return the name for DocumentFragment
|
4
17
|
def name
|
5
18
|
'#document-fragment'
|
6
19
|
end
|
20
|
+
|
21
|
+
def to_s
|
22
|
+
children.to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_html *args
|
26
|
+
children.to_html(*args)
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_xhtml *args
|
30
|
+
children.to_xhtml(*args)
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_xml *args
|
34
|
+
children.to_xml(*args)
|
35
|
+
end
|
36
|
+
|
37
|
+
alias :serialize :to_s
|
38
|
+
|
39
|
+
class << self
|
40
|
+
####
|
41
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
42
|
+
def parse tags
|
43
|
+
XML::DocumentFragment.new(XML::Document.new, tags)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
7
47
|
end
|
8
48
|
end
|
9
49
|
end
|
data/lib/nokogiri/xml/dtd.rb
CHANGED
@@ -3,16 +3,37 @@ module Nokogiri
|
|
3
3
|
class FragmentHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
4
4
|
def initialize node, original_html
|
5
5
|
@doc_started = false
|
6
|
-
@original_html = original_html
|
7
6
|
@document = node.document
|
8
7
|
@stack = [node]
|
8
|
+
@klass = if node.kind_of?(Nokogiri::HTML::DocumentFragment)
|
9
|
+
Nokogiri::HTML::DocumentFragment
|
10
|
+
else
|
11
|
+
Nokogiri::XML::DocumentFragment
|
12
|
+
end
|
13
|
+
#
|
14
|
+
# the regexes used in start_element() and characters() anchor at
|
15
|
+
# start-of-line, but we really only want them to anchor at
|
16
|
+
# start-of-doc. so let's only save up to the first newline.
|
17
|
+
#
|
18
|
+
# this implementation choice was the result of some benchmarks, if
|
19
|
+
# you're curious: http://gist.github.com/115936
|
20
|
+
#
|
21
|
+
newline_index = original_html.index("\n")
|
22
|
+
@original_html = if newline_index
|
23
|
+
original_html[0,newline_index]
|
24
|
+
else
|
25
|
+
original_html
|
26
|
+
end
|
9
27
|
end
|
10
28
|
|
11
29
|
def start_element name, attrs = []
|
12
|
-
|
30
|
+
regex = (@klass == Nokogiri::HTML::DocumentFragment) ? %r{^\s*<#{Regexp.escape(name)}}i \
|
31
|
+
: %r{^\s*<#{Regexp.escape(name)}}
|
32
|
+
@doc_started = true if @original_html =~ regex
|
13
33
|
return unless @doc_started
|
14
34
|
|
15
35
|
node = Node.new(name, @document)
|
36
|
+
attrs << "" unless (attrs.length % 2) == 0
|
16
37
|
Hash[*attrs].each do |k,v|
|
17
38
|
node[k] = v
|
18
39
|
end
|
@@ -21,7 +42,7 @@ module Nokogiri
|
|
21
42
|
end
|
22
43
|
|
23
44
|
def characters string
|
24
|
-
@doc_started = true if @original_html =~
|
45
|
+
@doc_started = true if @original_html.strip =~ %r{^\s*#{Regexp.escape(string.strip)}}
|
25
46
|
@stack.last << Nokogiri::XML::Text.new(string, @document)
|
26
47
|
end
|
27
48
|
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -34,35 +34,57 @@ module Nokogiri
|
|
34
34
|
#
|
35
35
|
# You may search this node's subtree using Node#xpath and Node#css
|
36
36
|
class Node
|
37
|
+
# Element node type, see Nokogiri::XML::Node#element?
|
37
38
|
ELEMENT_NODE = 1
|
39
|
+
# Attribute node type
|
38
40
|
ATTRIBUTE_NODE = 2
|
41
|
+
# Text node type, see Nokogiri::XML::Node#text?
|
39
42
|
TEXT_NODE = 3
|
43
|
+
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
40
44
|
CDATA_SECTION_NODE = 4
|
45
|
+
# Entity reference node type
|
41
46
|
ENTITY_REF_NODE = 5
|
47
|
+
# Entity node type
|
42
48
|
ENTITY_NODE = 6
|
49
|
+
# PI node type
|
43
50
|
PI_NODE = 7
|
51
|
+
# Comment node type, see Nokogiri::XML::Node#comment?
|
44
52
|
COMMENT_NODE = 8
|
53
|
+
# Document node type, see Nokogiri::XML::Node#xml?
|
45
54
|
DOCUMENT_NODE = 9
|
55
|
+
# Document type node type
|
46
56
|
DOCUMENT_TYPE_NODE = 10
|
57
|
+
# Document fragment node type
|
47
58
|
DOCUMENT_FRAG_NODE = 11
|
59
|
+
# Notation node type
|
48
60
|
NOTATION_NODE = 12
|
61
|
+
# HTML document node type, see Nokogiri::XML::Node#html?
|
49
62
|
HTML_DOCUMENT_NODE = 13
|
63
|
+
# DTD node type
|
50
64
|
DTD_NODE = 14
|
65
|
+
# Element declaration type
|
51
66
|
ELEMENT_DECL = 15
|
67
|
+
# Attribute declaration type
|
52
68
|
ATTRIBUTE_DECL = 16
|
69
|
+
# Entity declaration type
|
53
70
|
ENTITY_DECL = 17
|
71
|
+
# Namespace declaration type
|
54
72
|
NAMESPACE_DECL = 18
|
73
|
+
# XInclude start type
|
55
74
|
XINCLUDE_START = 19
|
75
|
+
# XInclude end type
|
56
76
|
XINCLUDE_END = 20
|
77
|
+
# DOCB document node type
|
57
78
|
DOCB_DOCUMENT_NODE = 21
|
58
79
|
|
59
|
-
|
60
|
-
|
80
|
+
def initialize name, document
|
81
|
+
# ... Ya. This is empty on purpose.
|
82
|
+
end
|
61
83
|
|
62
84
|
###
|
63
85
|
# Decorate this node with the decorators set up in this node's Document
|
64
86
|
def decorate!
|
65
|
-
document.decorate(self)
|
87
|
+
document.decorate(self)
|
66
88
|
end
|
67
89
|
|
68
90
|
###
|
@@ -150,7 +172,7 @@ module Nokogiri
|
|
150
172
|
# def regex node_set, regex
|
151
173
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
152
174
|
# end
|
153
|
-
# })
|
175
|
+
# }.new)
|
154
176
|
#
|
155
177
|
def css *rules
|
156
178
|
# Pop off our custom function handler if it exists
|
@@ -174,6 +196,7 @@ module Nokogiri
|
|
174
196
|
def at path, ns = document.root ? document.root.namespaces : {}
|
175
197
|
search(path, ns).first
|
176
198
|
end
|
199
|
+
alias :% :at
|
177
200
|
|
178
201
|
###
|
179
202
|
# Get the attribute value for the attribute +name+
|
@@ -195,6 +218,7 @@ module Nokogiri
|
|
195
218
|
alias :name= :node_name=
|
196
219
|
alias :type :node_type
|
197
220
|
alias :to_str :text
|
221
|
+
alias :clone :dup
|
198
222
|
|
199
223
|
####
|
200
224
|
# Returns a hash containing the node's attributes. The key is the
|
@@ -232,6 +256,12 @@ module Nokogiri
|
|
232
256
|
end
|
233
257
|
alias :delete :remove_attribute
|
234
258
|
|
259
|
+
###
|
260
|
+
# Returns true if this Node matches +selector+
|
261
|
+
def matches? selector
|
262
|
+
document.search(selector).include?(self)
|
263
|
+
end
|
264
|
+
|
235
265
|
####
|
236
266
|
# Create nodes from +data+ and insert them before this node
|
237
267
|
# (as a sibling).
|
@@ -271,23 +301,13 @@ module Nokogiri
|
|
271
301
|
self
|
272
302
|
end
|
273
303
|
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
classes = document.class.name.split('::')
|
278
|
-
classes[-1] = 'SAX::Parser'
|
279
|
-
|
280
|
-
|
281
|
-
fragment = DocumentFragment.new(self.document)
|
282
|
-
parser = eval(classes.join('::')).new(
|
283
|
-
FragmentHandler.new(fragment, tags)
|
284
|
-
)
|
285
|
-
parser.parse(tags)
|
286
|
-
fragment
|
304
|
+
def fragment tags # :nodoc:
|
305
|
+
# TODO: deprecate?
|
306
|
+
document.fragment(tags)
|
287
307
|
end
|
288
308
|
|
289
309
|
####
|
290
|
-
# Set the content to +string+.
|
310
|
+
# Set the Node content to +string+. The content gets XML escaped.
|
291
311
|
def content= string
|
292
312
|
self.native_content = encode_special_chars(string.to_s)
|
293
313
|
end
|
@@ -299,6 +319,21 @@ module Nokogiri
|
|
299
319
|
parent_node
|
300
320
|
end
|
301
321
|
|
322
|
+
###
|
323
|
+
# Get a hash containing the Namespace definitions for this Node
|
324
|
+
def namespaces
|
325
|
+
Hash[*namespace_definitions.map { |nd|
|
326
|
+
key = ['xmlns', nd.prefix].compact.join(':')
|
327
|
+
if defined?(Encoding) && document.encoding
|
328
|
+
begin
|
329
|
+
key.force_encoding document.encoding
|
330
|
+
rescue ArgumentError
|
331
|
+
end
|
332
|
+
end
|
333
|
+
[key, nd.href]
|
334
|
+
}.flatten]
|
335
|
+
end
|
336
|
+
|
302
337
|
# Returns true if this is a Comment
|
303
338
|
def comment?
|
304
339
|
type == COMMENT_NODE
|
@@ -324,6 +359,16 @@ module Nokogiri
|
|
324
359
|
type == TEXT_NODE
|
325
360
|
end
|
326
361
|
|
362
|
+
###
|
363
|
+
# Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
|
364
|
+
# nil on XML documents and on unknown tags.
|
365
|
+
def description
|
366
|
+
return nil if document.xml?
|
367
|
+
Nokogiri::HTML::ElementDescription[name]
|
368
|
+
end
|
369
|
+
|
370
|
+
###
|
371
|
+
# Is this a read only node?
|
327
372
|
def read_only?
|
328
373
|
# According to gdome2, these are read-only node types
|
329
374
|
[NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
|
@@ -335,10 +380,14 @@ module Nokogiri
|
|
335
380
|
end
|
336
381
|
alias :elem? :element?
|
337
382
|
|
383
|
+
###
|
384
|
+
# Turn this node in to a string. If the document is HTML, this method
|
385
|
+
# returns html. If the document is XML, this method returns XML.
|
338
386
|
def to_s
|
339
387
|
document.xml? ? to_xml : to_html
|
340
388
|
end
|
341
389
|
|
390
|
+
# Get the inner_html for this node's Node#children
|
342
391
|
def inner_html
|
343
392
|
children.map { |x| x.to_html }.join
|
344
393
|
end
|
@@ -359,27 +408,60 @@ module Nokogiri
|
|
359
408
|
end
|
360
409
|
|
361
410
|
###
|
362
|
-
# Get a list of ancestor Node for this Node
|
363
|
-
|
364
|
-
|
411
|
+
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
412
|
+
# the ancestors must match +selector+
|
413
|
+
def ancestors selector = nil
|
414
|
+
return NodeSet.new(document) unless respond_to?(:parent)
|
415
|
+
return NodeSet.new(document) unless parent
|
365
416
|
|
366
417
|
parents = [parent]
|
367
418
|
|
368
419
|
while parents.last.respond_to?(:parent)
|
369
|
-
|
420
|
+
break unless ctx_parent = parents.last.parent
|
421
|
+
parents << ctx_parent
|
370
422
|
end
|
371
|
-
|
423
|
+
|
424
|
+
return NodeSet.new(document, parents) unless selector
|
425
|
+
|
426
|
+
NodeSet.new(document, parents.find_all { |parent|
|
427
|
+
parent.matches?(selector)
|
428
|
+
})
|
429
|
+
end
|
430
|
+
|
431
|
+
###
|
432
|
+
# Set the default namespace for this node to +url+
|
433
|
+
def default_namespace= url
|
434
|
+
add_namespace_definition(nil, url)
|
435
|
+
end
|
436
|
+
alias :add_namespace :add_namespace_definition
|
437
|
+
|
438
|
+
###
|
439
|
+
# Set the namespace for this node to +ns+
|
440
|
+
def namespace= ns
|
441
|
+
if ns.document != document
|
442
|
+
raise ArgumentError, 'namespace must be declared on the same document'
|
443
|
+
end
|
444
|
+
unless ns.is_a? Nokogiri::XML::Namespace
|
445
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
446
|
+
end
|
447
|
+
set_namespace ns
|
372
448
|
end
|
373
449
|
|
374
450
|
####
|
375
451
|
# Yields self and all children to +block+ recursively.
|
376
|
-
def traverse
|
452
|
+
def traverse &block
|
377
453
|
children.each{|j| j.traverse(&block) }
|
378
454
|
block.call(self)
|
379
455
|
end
|
380
456
|
|
457
|
+
###
|
458
|
+
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
459
|
+
def accept visitor
|
460
|
+
visitor.visit(self)
|
461
|
+
end
|
462
|
+
|
381
463
|
####
|
382
|
-
# replace
|
464
|
+
# replace this Node with the +new_node+ in the Document.
|
383
465
|
def replace(new_node)
|
384
466
|
if new_node.is_a?(Document) || !new_node.is_a?(XML::Node)
|
385
467
|
raise ArgumentError, <<-EOERR
|
@@ -399,97 +481,264 @@ Node.replace requires a Node argument, and cannot accept a Document.
|
|
399
481
|
end
|
400
482
|
|
401
483
|
###
|
402
|
-
# Serialize Node using +
|
484
|
+
# Serialize Node using +options+. Save options
|
403
485
|
# can also be set using a block. See SaveOptions.
|
404
486
|
#
|
405
487
|
# These two statements are equivalent:
|
406
488
|
#
|
407
|
-
# node.serialize('UTF-8', FORMAT | AS_XML)
|
489
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
408
490
|
#
|
409
491
|
# or
|
410
492
|
#
|
411
|
-
# node.serialize('UTF-8') do |config|
|
493
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
412
494
|
# config.format.as_xml
|
413
495
|
# end
|
414
496
|
#
|
415
|
-
def serialize
|
497
|
+
def serialize *args, &block
|
498
|
+
if args.first && !args.first.is_a?(Hash)
|
499
|
+
$stderr.puts(<<-eowarn)
|
500
|
+
#{self.class}#serialize(encoding, save_opts) is deprecated and will be removed in
|
501
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
502
|
+
You called serialize from here:
|
503
|
+
|
504
|
+
#{caller.first}
|
505
|
+
|
506
|
+
Please change to #{self.class}#serialize(:encoding => enc, :save_with => opts)
|
507
|
+
eowarn
|
508
|
+
end
|
509
|
+
|
510
|
+
options = args.first.is_a?(Hash) ? args.shift : {
|
511
|
+
:encoding => args[0],
|
512
|
+
:save_with => args[1] || SaveOptions::FORMAT
|
513
|
+
}
|
514
|
+
|
416
515
|
io = StringIO.new
|
417
|
-
write_to io,
|
516
|
+
write_to io, options, &block
|
418
517
|
io.rewind
|
419
518
|
io.read
|
420
519
|
end
|
421
520
|
|
422
521
|
###
|
423
|
-
# Serialize this Node to HTML
|
424
|
-
|
522
|
+
# Serialize this Node to HTML
|
523
|
+
#
|
524
|
+
# doc.to_html
|
525
|
+
#
|
526
|
+
# See Node#write_to for a list of +options+. For formatted output,
|
527
|
+
# use Node#to_xhtml instead.
|
528
|
+
def to_html options = {}
|
529
|
+
if options.is_a?(String)
|
530
|
+
$stderr.puts(<<-eowarn)
|
531
|
+
Node#to_html(encoding) is deprecated and will be removed in
|
532
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
533
|
+
You called to_html from here:
|
534
|
+
|
535
|
+
#{caller.first}
|
536
|
+
|
537
|
+
Please change to Node#to_html(:encoding => #{options})
|
538
|
+
eowarn
|
539
|
+
options = { :encoding => options }
|
540
|
+
end
|
541
|
+
|
425
542
|
# FIXME: this is a hack around broken libxml versions
|
426
543
|
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
427
544
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
545
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
546
|
+
SaveOptions::NO_DECLARATION |
|
547
|
+
SaveOptions::NO_EMPTY_TAGS |
|
548
|
+
SaveOptions::AS_HTML
|
549
|
+
|
550
|
+
serialize(options)
|
432
551
|
end
|
433
552
|
|
434
553
|
###
|
435
|
-
# Serialize this Node to XML using +
|
436
|
-
|
437
|
-
|
554
|
+
# Serialize this Node to XML using +options+
|
555
|
+
#
|
556
|
+
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
|
557
|
+
#
|
558
|
+
# See Node#write_to for a list of +options+
|
559
|
+
def to_xml options = {}
|
560
|
+
encoding = nil
|
561
|
+
|
562
|
+
# FIXME add a deprecation warning
|
563
|
+
if options.is_a? String
|
564
|
+
$stderr.puts(<<-eowarn)
|
565
|
+
Node#to_xml(encoding) is deprecated and will be removed in
|
566
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
567
|
+
You called to_xml from here:
|
568
|
+
|
569
|
+
#{caller.first}
|
570
|
+
|
571
|
+
Please change to Node#to_xml(:encoding => #{options})
|
572
|
+
eowarn
|
573
|
+
options = {
|
574
|
+
:encoding => options
|
575
|
+
}
|
576
|
+
end
|
577
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
578
|
+
|
579
|
+
serialize(options)
|
438
580
|
end
|
439
581
|
|
440
582
|
###
|
441
|
-
# Serialize this Node to
|
442
|
-
|
583
|
+
# Serialize this Node to XHTML using +options+
|
584
|
+
#
|
585
|
+
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
|
586
|
+
#
|
587
|
+
# See Node#write_to for a list of +options+
|
588
|
+
def to_xhtml options = {}
|
589
|
+
if options.is_a?(String)
|
590
|
+
options = { :encoding => options }
|
591
|
+
$stderr.puts(<<-eowarn)
|
592
|
+
Node#to_xml(encoding) is deprecated and will be removed in
|
593
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
594
|
+
You called to_xhtml from here:
|
595
|
+
|
596
|
+
#{caller.first}
|
597
|
+
|
598
|
+
Please change to Node#to_xhtml(:encoding => #{options})
|
599
|
+
eowarn
|
600
|
+
end
|
601
|
+
|
443
602
|
# FIXME: this is a hack around broken libxml versions
|
444
603
|
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
445
604
|
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
605
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
606
|
+
SaveOptions::NO_DECLARATION |
|
607
|
+
SaveOptions::NO_EMPTY_TAGS |
|
608
|
+
SaveOptions::AS_XHTML
|
609
|
+
|
610
|
+
serialize(options)
|
450
611
|
end
|
451
612
|
|
452
613
|
###
|
453
|
-
# Write Node to +io+ with +
|
454
|
-
|
614
|
+
# Write Node to +io+ with +options+. +options+ modify the output of
|
615
|
+
# this method. Valid options are:
|
616
|
+
#
|
617
|
+
# * +:encoding+ for changing the encoding
|
618
|
+
# * +:indent_text+ the indentation text, defaults to one space
|
619
|
+
# * +:indent+ the number of +:indent_text+ to use, defaults to 2
|
620
|
+
# * +:save_with+ a combination of SaveOptions constants.
|
621
|
+
#
|
622
|
+
# To save with UTF-8 indented twice:
|
623
|
+
#
|
624
|
+
# node.write_to(io, :encoding => 'UTF-8', :indent => 2)
|
625
|
+
#
|
626
|
+
# To save indented with two dashes:
|
627
|
+
#
|
628
|
+
# node.write_to(io, :indent_text => '-', :indent => 2
|
629
|
+
#
|
630
|
+
def write_to io, *options
|
631
|
+
if options.length > 0 && !options.first.is_a?(Hash)
|
632
|
+
$stderr.puts(<<-eowarn)
|
633
|
+
Node#write_to(io, encoding, save_options) is deprecated and will be removed in
|
634
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
635
|
+
You called write_to from here:
|
636
|
+
|
637
|
+
#{caller.first}
|
638
|
+
|
639
|
+
Please change to: Node#write_to(io, :encoding => e, :save_options => opts)
|
640
|
+
eowarn
|
641
|
+
end
|
642
|
+
|
643
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
644
|
+
encoding = options[:encoding] || options[0]
|
645
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
646
|
+
indent_text = options[:indent_text] || ' '
|
647
|
+
indent_times = options[:indent] || 2
|
648
|
+
|
649
|
+
|
455
650
|
config = SaveOptions.new(save_options)
|
456
651
|
yield config if block_given?
|
457
652
|
|
458
|
-
native_write_to(io, encoding, config.options)
|
653
|
+
native_write_to(io, encoding, indent_text * indent_times, config.options)
|
459
654
|
end
|
460
655
|
|
461
656
|
###
|
462
|
-
# Write Node as HTML to +io+ with +
|
463
|
-
|
464
|
-
|
657
|
+
# Write Node as HTML to +io+ with +options+
|
658
|
+
#
|
659
|
+
# See Node#write_to for a list of +options+
|
660
|
+
def write_html_to io, options = {}
|
661
|
+
if options.is_a?(String)
|
662
|
+
$stderr.puts(<<-eowarn)
|
663
|
+
Node#write_html_to(io, encoding) is deprecated and will be removed in
|
664
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
665
|
+
You called write_html_to from here:
|
666
|
+
|
667
|
+
#{caller.first}
|
668
|
+
|
669
|
+
Please change to Node#write_html_to(io, :encoding => #{options})
|
670
|
+
eowarn
|
671
|
+
options = { :encoding => options }
|
672
|
+
end
|
673
|
+
|
674
|
+
# FIXME: this is a hack around broken libxml versions
|
675
|
+
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
676
|
+
|
677
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
465
678
|
SaveOptions::NO_DECLARATION |
|
466
679
|
SaveOptions::NO_EMPTY_TAGS |
|
467
680
|
SaveOptions::AS_HTML
|
681
|
+
write_to io, options
|
468
682
|
end
|
469
683
|
|
470
684
|
###
|
471
|
-
# Write Node as XHTML to +io+ with +
|
472
|
-
|
473
|
-
|
685
|
+
# Write Node as XHTML to +io+ with +options+
|
686
|
+
#
|
687
|
+
# See Node#write_to for a list of +options+
|
688
|
+
def write_xhtml_to io, options = {}
|
689
|
+
if options.is_a?(String)
|
690
|
+
$stderr.puts(<<-eowarn)
|
691
|
+
Node#write_xhtml_to(io, encoding) is deprecated and will be removed in
|
692
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
693
|
+
You called write_xhtml_to from here:
|
694
|
+
|
695
|
+
#{caller.first}
|
696
|
+
|
697
|
+
Please change to Node#write_xhtml_to(io, :encoding => #{options})
|
698
|
+
eowarn
|
699
|
+
options = { :encoding => options }
|
700
|
+
end
|
701
|
+
|
702
|
+
# FIXME: this is a hack around broken libxml versions
|
703
|
+
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
704
|
+
|
705
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
474
706
|
SaveOptions::NO_DECLARATION |
|
475
707
|
SaveOptions::NO_EMPTY_TAGS |
|
476
708
|
SaveOptions::AS_XHTML
|
709
|
+
write_to io, options
|
477
710
|
end
|
478
711
|
|
479
712
|
###
|
480
|
-
# Write Node as XML to +io+ with +
|
481
|
-
|
482
|
-
|
713
|
+
# Write Node as XML to +io+ with +options+
|
714
|
+
#
|
715
|
+
# doc.write_xml_to io, :encoding => 'UTF-8'
|
716
|
+
#
|
717
|
+
# See Node#write_to for a list of options
|
718
|
+
def write_xml_to io, options = {}
|
719
|
+
if options.is_a?(String)
|
720
|
+
$stderr.puts(<<-eowarn)
|
721
|
+
Node#write_xml_to(io, encoding) is deprecated and will be removed in
|
722
|
+
Nokogiri version 1.4.0 *or* after June 1 2009.
|
723
|
+
You called write_xml_to from here:
|
724
|
+
|
725
|
+
#{caller.first}
|
726
|
+
|
727
|
+
Please change to Node#write_xml_to(io, :encoding => #{options})
|
728
|
+
eowarn
|
729
|
+
options = { :encoding => options }
|
730
|
+
end
|
731
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
732
|
+
write_to io, options
|
483
733
|
end
|
484
734
|
|
485
|
-
|
486
|
-
#
|
487
|
-
#
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
Nokogiri::HTML.fragment(string).first
|
735
|
+
###
|
736
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
737
|
+
# different documents cannot be compared.
|
738
|
+
def <=> other
|
739
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
740
|
+
return nil unless document == other.document
|
741
|
+
compare other
|
493
742
|
end
|
494
743
|
end
|
495
744
|
end
|