glebm-nokogiri 1.4.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +26 -0
- data/CHANGELOG.ja.rdoc +411 -0
- data/CHANGELOG.rdoc +397 -0
- data/Manifest.txt +276 -0
- data/README.ja.rdoc +106 -0
- data/README.rdoc +132 -0
- data/Rakefile +183 -0
- data/bin/nokogiri +49 -0
- data/deps.rip +5 -0
- data/ext/nokogiri/extconf.rb +97 -0
- data/ext/nokogiri/html_document.c +154 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +95 -0
- data/ext/nokogiri/nokogiri.h +153 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +56 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +54 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +464 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1347 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +418 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +665 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +168 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +159 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +58 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +50 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +276 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +142 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +133 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +669 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +232 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +169 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +81 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +386 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +162 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +556 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +149 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +232 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +135 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
- data/lib/nokogiri/ffi/xml/schema.rb +109 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +148 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +53 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +90 -0
- data/lib/nokogiri/html/document_fragment.rb +36 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +37 -0
- data/lib/nokogiri/version_warning.rb +14 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +418 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +194 -0
- data/lib/nokogiri/xml/document_fragment.rb +77 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +793 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +325 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +57 -0
- data/lib/nokogiri/xml/syntax_error.rb +47 -0
- data/lib/nokogiri/xml/text.rb +9 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +90 -0
- data/tasks/cross_compile.rb +158 -0
- data/tasks/test.rb +94 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +282 -0
- data/test/css/test_tokenizer.rb +190 -0
- data/test/css/test_xpath_visitor.rb +85 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +169 -0
- data/test/html/sax/test_parser.rb +74 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +398 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +182 -0
- data/test/html/test_element_description.rb +98 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +181 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +87 -0
- data/test/test_nokogiri.rb +138 -0
- data/test/test_reader.rb +386 -0
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +188 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +63 -0
- data/test/xml/sax/test_push_parser.rb +139 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +210 -0
- data/test/xml/test_cdata.rb +50 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +668 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +180 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +120 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +865 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_reparenting.rb +293 -0
- data/test/xml/test_node_set.rb +649 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +38 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +138 -0
- metadata +533 -0
@@ -0,0 +1,194 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# Nokogiri::XML::Document is the main entry point for dealing with
|
5
|
+
# XML documents. The Document is created by parsing an XML document.
|
6
|
+
# See Nokogiri.XML()
|
7
|
+
#
|
8
|
+
# For searching a Document, see Nokogiri::XML::Node#css and
|
9
|
+
# Nokogiri::XML::Node#xpath
|
10
|
+
class Document < Node
|
11
|
+
##
|
12
|
+
# Parse an XML file. +thing+ may be a String, or any object that
|
13
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
14
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
15
|
+
# encoding that should be used when processing the document. +options+
|
16
|
+
# is a number that sets options in the parser, such as
|
17
|
+
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
18
|
+
# Nokogiri::XML::ParseOptions.
|
19
|
+
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
|
20
|
+
|
21
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
22
|
+
# Give the options to the user
|
23
|
+
yield options if block_given?
|
24
|
+
|
25
|
+
if string_or_io.respond_to?(:read)
|
26
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
27
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
28
|
+
end
|
29
|
+
|
30
|
+
# read_memory pukes on empty docs
|
31
|
+
return new if string_or_io.nil? or string_or_io.empty?
|
32
|
+
|
33
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
34
|
+
end
|
35
|
+
|
36
|
+
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
37
|
+
attr_accessor :errors
|
38
|
+
|
39
|
+
def initialize *args # :nodoc:
|
40
|
+
@errors = []
|
41
|
+
@decorators = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Create an element with +name+, and optionally setting the content and attributes.
|
46
|
+
#
|
47
|
+
# doc.create_element "div" # <div></div>
|
48
|
+
# doc.create_element "div", :class => "container" # <div class='container'></div>
|
49
|
+
# doc.create_element "div", "contents" # <div>contents</div>
|
50
|
+
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
51
|
+
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
52
|
+
#
|
53
|
+
def create_element name, *args, &block
|
54
|
+
elm = Nokogiri::XML::Element.new(name, self, &block)
|
55
|
+
args.each do |arg|
|
56
|
+
case arg
|
57
|
+
when Hash
|
58
|
+
arg.each { |k,v|
|
59
|
+
key = k.to_s
|
60
|
+
if key =~ /^xmlns(:\w+)?$/
|
61
|
+
ns_name = key.split(":", 2)[1]
|
62
|
+
elm.add_namespace_definition ns_name, v
|
63
|
+
next
|
64
|
+
end
|
65
|
+
elm[k.to_s] = v.to_s
|
66
|
+
}
|
67
|
+
else
|
68
|
+
elm.content = arg
|
69
|
+
end
|
70
|
+
end
|
71
|
+
elm
|
72
|
+
end
|
73
|
+
|
74
|
+
# Create a text node with +text+
|
75
|
+
def create_text_node text, &block
|
76
|
+
Nokogiri::XML::Text.new(text.to_s, self, &block)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Create a CDATA element containing +text+
|
80
|
+
def create_cdata text
|
81
|
+
Nokogiri::XML::CDATA.new(self, text.to_s)
|
82
|
+
end
|
83
|
+
|
84
|
+
# The name of this document. Always returns "document"
|
85
|
+
def name
|
86
|
+
'document'
|
87
|
+
end
|
88
|
+
|
89
|
+
# A reference to +self+
|
90
|
+
def document
|
91
|
+
self
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Recursively get all namespaces from this node and its subtree and
|
96
|
+
# return them as a hash.
|
97
|
+
#
|
98
|
+
# For example, given this document:
|
99
|
+
#
|
100
|
+
# <root xmlns:foo="bar">
|
101
|
+
# <bar xmlns:hello="world" />
|
102
|
+
# </root>
|
103
|
+
#
|
104
|
+
# This method will return:
|
105
|
+
#
|
106
|
+
# { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
|
107
|
+
#
|
108
|
+
# WARNING: this method will clobber duplicate names in the keys.
|
109
|
+
# For example, given this document:
|
110
|
+
#
|
111
|
+
# <root xmlns:foo="bar">
|
112
|
+
# <bar xmlns:foo="baz" />
|
113
|
+
# </root>
|
114
|
+
#
|
115
|
+
# The hash returned will look like this: { 'xmlns:foo' => 'bar' }
|
116
|
+
def collect_namespaces
|
117
|
+
ns = {}
|
118
|
+
traverse { |j| ns.merge!(j.namespaces) }
|
119
|
+
ns
|
120
|
+
end
|
121
|
+
|
122
|
+
# Get the list of decorators given +key+
|
123
|
+
def decorators key
|
124
|
+
@decorators ||= Hash.new
|
125
|
+
@decorators[key] ||= []
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Validate this Document against it's DTD. Returns a list of errors on
|
130
|
+
# the document or +nil+ when there is no DTD.
|
131
|
+
def validate
|
132
|
+
return nil unless internal_subset
|
133
|
+
internal_subset.validate self
|
134
|
+
end
|
135
|
+
|
136
|
+
##
|
137
|
+
# Explore a document with shortcut methods.
|
138
|
+
def slop!
|
139
|
+
unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
|
140
|
+
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
141
|
+
decorate!
|
142
|
+
end
|
143
|
+
|
144
|
+
self
|
145
|
+
end
|
146
|
+
|
147
|
+
##
|
148
|
+
# Apply any decorators to +node+
|
149
|
+
def decorate node
|
150
|
+
return unless @decorators
|
151
|
+
@decorators.each { |klass,list|
|
152
|
+
next unless node.is_a?(klass)
|
153
|
+
list.each { |moodule| node.extend(moodule) }
|
154
|
+
}
|
155
|
+
end
|
156
|
+
|
157
|
+
alias :to_xml :serialize
|
158
|
+
alias :clone :dup
|
159
|
+
|
160
|
+
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
161
|
+
def namespaces
|
162
|
+
root ? root.namespaces : {}
|
163
|
+
end
|
164
|
+
|
165
|
+
##
|
166
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
167
|
+
# Returns an empty fragment if +tags+ is nil.
|
168
|
+
def fragment tags = nil
|
169
|
+
DocumentFragment.new(self, tags, self.root)
|
170
|
+
end
|
171
|
+
|
172
|
+
undef_method :swap, :parent, :namespace, :default_namespace=
|
173
|
+
undef_method :add_namespace_definition, :attributes
|
174
|
+
undef_method :namespace_definitions, :line, :add_namespace
|
175
|
+
undef_method :parse, :in_context
|
176
|
+
|
177
|
+
def add_child child
|
178
|
+
raise "Document already has a root node" if root
|
179
|
+
if child.type == Node::DOCUMENT_FRAG_NODE
|
180
|
+
raise "Document cannot have multiple root nodes" if child.children.size > 1
|
181
|
+
super(child.children.first)
|
182
|
+
else
|
183
|
+
super
|
184
|
+
end
|
185
|
+
end
|
186
|
+
alias :<< :add_child
|
187
|
+
|
188
|
+
private
|
189
|
+
def inspect_attributes
|
190
|
+
[:name, :children]
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class DocumentFragment < Nokogiri::XML::Node
|
4
|
+
##
|
5
|
+
# Create a new DocumentFragment from +tags+.
|
6
|
+
#
|
7
|
+
# If +ctx+ is present, it is used as a context node for the
|
8
|
+
# subtree created, e.g., namespaces will be resolved relative
|
9
|
+
# to +ctx+.
|
10
|
+
def initialize document, tags = nil, ctx = nil
|
11
|
+
return self unless tags
|
12
|
+
|
13
|
+
children = if ctx
|
14
|
+
ctx.parse(tags.strip)
|
15
|
+
else
|
16
|
+
XML::Document.parse("<root>#{tags.strip}</root>") \
|
17
|
+
.xpath("/root/node()")
|
18
|
+
end
|
19
|
+
children.each { |child| child.parent = self }
|
20
|
+
end
|
21
|
+
|
22
|
+
###
|
23
|
+
# return the name for DocumentFragment
|
24
|
+
def name
|
25
|
+
'#document-fragment'
|
26
|
+
end
|
27
|
+
|
28
|
+
###
|
29
|
+
# Convert this DocumentFragment to a string
|
30
|
+
def to_s
|
31
|
+
children.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
###
|
35
|
+
# Convert this DocumentFragment to html
|
36
|
+
# See Nokogiri::XML::NodeSet#to_html
|
37
|
+
def to_html *args
|
38
|
+
children.to_html(*args)
|
39
|
+
end
|
40
|
+
|
41
|
+
###
|
42
|
+
# Convert this DocumentFragment to xhtml
|
43
|
+
# See Nokogiri::XML::NodeSet#to_xhtml
|
44
|
+
def to_xhtml *args
|
45
|
+
children.to_xhtml(*args)
|
46
|
+
end
|
47
|
+
|
48
|
+
###
|
49
|
+
# Convert this DocumentFragment to xml
|
50
|
+
# See Nokogiri::XML::NodeSet#to_xml
|
51
|
+
def to_xml *args
|
52
|
+
children.to_xml(*args)
|
53
|
+
end
|
54
|
+
|
55
|
+
###
|
56
|
+
# Search this fragment. See Nokogiri::XML::Node#css
|
57
|
+
def css *args
|
58
|
+
if children.any?
|
59
|
+
children.css(*args)
|
60
|
+
else
|
61
|
+
NodeSet.new(document)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
alias :serialize :to_s
|
66
|
+
|
67
|
+
class << self
|
68
|
+
####
|
69
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
70
|
+
def parse tags
|
71
|
+
self.new(XML::Document.new, tags)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# Represents the allowed content in an Element Declaration inside a DTD:
|
5
|
+
#
|
6
|
+
# <?xml version="1.0"?><?TEST-STYLE PIDATA?>
|
7
|
+
# <!DOCTYPE staff SYSTEM "staff.dtd" [
|
8
|
+
# <!ELEMENT div1 (head, (p | list | note)*, div2*)>
|
9
|
+
# ]>
|
10
|
+
# </root>
|
11
|
+
#
|
12
|
+
# ElementContent represents the tree inside the <!ELEMENT> tag shown above
|
13
|
+
# that lists the possible content for the div1 tag.
|
14
|
+
class ElementContent
|
15
|
+
# Possible definitions of type
|
16
|
+
PCDATA = 1
|
17
|
+
ELEMENT = 2
|
18
|
+
SEQ = 3
|
19
|
+
OR = 4
|
20
|
+
|
21
|
+
# Possible content occurrences
|
22
|
+
ONCE = 1
|
23
|
+
OPT = 2
|
24
|
+
MULT = 3
|
25
|
+
PLUS = 4
|
26
|
+
|
27
|
+
attr_reader :document
|
28
|
+
|
29
|
+
###
|
30
|
+
# Get the children of this ElementContent node
|
31
|
+
def children
|
32
|
+
[c1, c2].compact
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class ElementDecl < Nokogiri::XML::Node
|
4
|
+
undef_method :namespace
|
5
|
+
undef_method :namespace_definitions
|
6
|
+
undef_method :line
|
7
|
+
|
8
|
+
def inspect
|
9
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class EntityDecl < Nokogiri::XML::Node
|
4
|
+
undef_method :attribute_nodes
|
5
|
+
undef_method :attributes
|
6
|
+
undef_method :namespace
|
7
|
+
undef_method :namespace_definitions
|
8
|
+
undef_method :line
|
9
|
+
|
10
|
+
def self.new name, doc, *args
|
11
|
+
doc.create_entity(name, *args)
|
12
|
+
end
|
13
|
+
|
14
|
+
def inspect
|
15
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,793 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'nokogiri/xml/node/save_options'
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
####
|
7
|
+
# Nokogiri::XML::Node is your window to the fun filled world of dealing
|
8
|
+
# with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
|
9
|
+
# to a hash with regard to attributes. For example (from irb):
|
10
|
+
#
|
11
|
+
# irb(main):004:0> node
|
12
|
+
# => <a href="#foo" id="link">link</a>
|
13
|
+
# irb(main):005:0> node['href']
|
14
|
+
# => "#foo"
|
15
|
+
# irb(main):006:0> node.keys
|
16
|
+
# => ["href", "id"]
|
17
|
+
# irb(main):007:0> node.values
|
18
|
+
# => ["#foo", "link"]
|
19
|
+
# irb(main):008:0> node['class'] = 'green'
|
20
|
+
# => "green"
|
21
|
+
# irb(main):009:0> node
|
22
|
+
# => <a href="#foo" id="link" class="green">link</a>
|
23
|
+
# irb(main):010:0>
|
24
|
+
#
|
25
|
+
# See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
|
26
|
+
#
|
27
|
+
# Nokogiri::XML::Node also has methods that let you move around your
|
28
|
+
# tree. For navigating your tree, see:
|
29
|
+
#
|
30
|
+
# * Nokogiri::XML::Node#parent
|
31
|
+
# * Nokogiri::XML::Node#children
|
32
|
+
# * Nokogiri::XML::Node#next
|
33
|
+
# * Nokogiri::XML::Node#previous
|
34
|
+
#
|
35
|
+
# You may search this node's subtree using Node#xpath and Node#css
|
36
|
+
class Node
|
37
|
+
include Nokogiri::XML::PP::Node
|
38
|
+
include Enumerable
|
39
|
+
|
40
|
+
# Element node type, see Nokogiri::XML::Node#element?
|
41
|
+
ELEMENT_NODE = 1
|
42
|
+
# Attribute node type
|
43
|
+
ATTRIBUTE_NODE = 2
|
44
|
+
# Text node type, see Nokogiri::XML::Node#text?
|
45
|
+
TEXT_NODE = 3
|
46
|
+
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
47
|
+
CDATA_SECTION_NODE = 4
|
48
|
+
# Entity reference node type
|
49
|
+
ENTITY_REF_NODE = 5
|
50
|
+
# Entity node type
|
51
|
+
ENTITY_NODE = 6
|
52
|
+
# PI node type
|
53
|
+
PI_NODE = 7
|
54
|
+
# Comment node type, see Nokogiri::XML::Node#comment?
|
55
|
+
COMMENT_NODE = 8
|
56
|
+
# Document node type, see Nokogiri::XML::Node#xml?
|
57
|
+
DOCUMENT_NODE = 9
|
58
|
+
# Document type node type
|
59
|
+
DOCUMENT_TYPE_NODE = 10
|
60
|
+
# Document fragment node type
|
61
|
+
DOCUMENT_FRAG_NODE = 11
|
62
|
+
# Notation node type
|
63
|
+
NOTATION_NODE = 12
|
64
|
+
# HTML document node type, see Nokogiri::XML::Node#html?
|
65
|
+
HTML_DOCUMENT_NODE = 13
|
66
|
+
# DTD node type
|
67
|
+
DTD_NODE = 14
|
68
|
+
# Element declaration type
|
69
|
+
ELEMENT_DECL = 15
|
70
|
+
# Attribute declaration type
|
71
|
+
ATTRIBUTE_DECL = 16
|
72
|
+
# Entity declaration type
|
73
|
+
ENTITY_DECL = 17
|
74
|
+
# Namespace declaration type
|
75
|
+
NAMESPACE_DECL = 18
|
76
|
+
# XInclude start type
|
77
|
+
XINCLUDE_START = 19
|
78
|
+
# XInclude end type
|
79
|
+
XINCLUDE_END = 20
|
80
|
+
# DOCB document node type
|
81
|
+
DOCB_DOCUMENT_NODE = 21
|
82
|
+
|
83
|
+
def initialize name, document # :nodoc:
|
84
|
+
# ... Ya. This is empty on purpose.
|
85
|
+
end
|
86
|
+
|
87
|
+
###
|
88
|
+
# Decorate this node with the decorators set up in this node's Document
|
89
|
+
def decorate!
|
90
|
+
document.decorate(self)
|
91
|
+
end
|
92
|
+
|
93
|
+
###
|
94
|
+
# Search this node for +paths+. +paths+ can be XPath or CSS, and an
|
95
|
+
# optional hash of namespaces may be appended.
|
96
|
+
# See Node#xpath and Node#css.
|
97
|
+
def search *paths
|
98
|
+
ns = paths.last.is_a?(Hash) ? paths.pop :
|
99
|
+
(document.root ? document.root.namespaces : {})
|
100
|
+
xpath(*(paths.map { |path|
|
101
|
+
path = path.to_s
|
102
|
+
path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
|
103
|
+
path,
|
104
|
+
:prefix => ".//",
|
105
|
+
:ns => ns
|
106
|
+
)
|
107
|
+
}.flatten.uniq) + [ns])
|
108
|
+
end
|
109
|
+
alias :/ :search
|
110
|
+
|
111
|
+
###
|
112
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
113
|
+
# queries. A hash of namespaces may be appended. For example:
|
114
|
+
#
|
115
|
+
# node.xpath('.//title')
|
116
|
+
# node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
|
117
|
+
# node.xpath('.//xmlns:name', node.root.namespaces)
|
118
|
+
#
|
119
|
+
# Custom XPath functions may also be defined. To define custom functions
|
120
|
+
# create a class and implement the # function you want to define.
|
121
|
+
# For example:
|
122
|
+
#
|
123
|
+
# node.xpath('.//title[regex(., "\w+")]', Class.new {
|
124
|
+
# def regex node_set, regex
|
125
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
126
|
+
# end
|
127
|
+
# }.new)
|
128
|
+
#
|
129
|
+
def xpath *paths
|
130
|
+
# Pop off our custom function handler if it exists
|
131
|
+
handler = ![
|
132
|
+
Hash, String, Symbol
|
133
|
+
].include?(paths.last.class) ? paths.pop : nil
|
134
|
+
|
135
|
+
ns = paths.last.is_a?(Hash) ? paths.pop :
|
136
|
+
(document.root ? document.root.namespaces : {})
|
137
|
+
|
138
|
+
return NodeSet.new(document) unless document
|
139
|
+
|
140
|
+
sets = paths.map { |path|
|
141
|
+
ctx = XPathContext.new(self)
|
142
|
+
ctx.register_namespaces(ns)
|
143
|
+
ctx.evaluate(path, handler)
|
144
|
+
}
|
145
|
+
return sets.first if sets.length == 1
|
146
|
+
|
147
|
+
NodeSet.new(document) do |combined|
|
148
|
+
sets.each do |set|
|
149
|
+
set.each do |node|
|
150
|
+
combined << node
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
###
|
157
|
+
# Search this node for CSS +rules+. +rules+ must be one or more CSS
|
158
|
+
# selectors. For example:
|
159
|
+
#
|
160
|
+
# node.css('title')
|
161
|
+
# node.css('body h1.bold')
|
162
|
+
# node.css('div + p.green', 'div#one')
|
163
|
+
#
|
164
|
+
# Custom CSS pseudo classes may also be defined. To define custom pseudo
|
165
|
+
# classes, create a class and implement the custom pseudo class you
|
166
|
+
# want defined. The first argument to the method will be the current
|
167
|
+
# matching NodeSet. Any other arguments are ones that you pass in.
|
168
|
+
# For example:
|
169
|
+
#
|
170
|
+
# node.css('title:regex("\w+")', Class.new {
|
171
|
+
# def regex node_set, regex
|
172
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
173
|
+
# end
|
174
|
+
# }.new)
|
175
|
+
#
|
176
|
+
def css *rules
|
177
|
+
# Pop off our custom function handler if it exists
|
178
|
+
handler = ![
|
179
|
+
Hash, String, Symbol
|
180
|
+
].include?(rules.last.class) ? rules.pop : nil
|
181
|
+
|
182
|
+
ns = rules.last.is_a?(Hash) ? rules.pop :
|
183
|
+
(document.root ? document.root.namespaces : {})
|
184
|
+
|
185
|
+
rules = rules.map { |rule|
|
186
|
+
CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
|
187
|
+
}.flatten.uniq + [ns, handler].compact
|
188
|
+
|
189
|
+
xpath(*rules)
|
190
|
+
end
|
191
|
+
|
192
|
+
###
|
193
|
+
# Search this node's immediate children using CSS selector +selector+
|
194
|
+
def > selector
|
195
|
+
ns = document.root.namespaces
|
196
|
+
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
197
|
+
end
|
198
|
+
|
199
|
+
###
|
200
|
+
# Search for the first occurrence of +path+.
|
201
|
+
#
|
202
|
+
# Returns nil if nothing is found, otherwise a Node.
|
203
|
+
def at path, ns = document.root ? document.root.namespaces : {}
|
204
|
+
search(path, ns).first
|
205
|
+
end
|
206
|
+
alias :% :at
|
207
|
+
|
208
|
+
##
|
209
|
+
# Search this node for the first occurrence of XPath +paths+.
|
210
|
+
# Equivalent to <tt>xpath(paths).first</tt>
|
211
|
+
# See Node#xpath for more information.
|
212
|
+
#
|
213
|
+
def at_xpath *paths
|
214
|
+
xpath(*paths).first
|
215
|
+
end
|
216
|
+
|
217
|
+
##
|
218
|
+
# Search this node for the first occurrence of CSS +rules+.
|
219
|
+
# Equivalent to <tt>css(rules).first</tt>
|
220
|
+
# See Node#css for more information.
|
221
|
+
#
|
222
|
+
def at_css *rules
|
223
|
+
css(*rules).first
|
224
|
+
end
|
225
|
+
|
226
|
+
###
|
227
|
+
# Get the attribute value for the attribute +name+
|
228
|
+
def [] name
|
229
|
+
return nil unless key?(name.to_s)
|
230
|
+
get(name.to_s)
|
231
|
+
end
|
232
|
+
|
233
|
+
###
|
234
|
+
# Add +node_or_tags+ as a child of this Node.
|
235
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
236
|
+
#
|
237
|
+
# Returns the new child node.
|
238
|
+
def add_child node_or_tags
|
239
|
+
node_or_tags = coerce(node_or_tags)
|
240
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
241
|
+
node_or_tags.each { |n| add_child_node n }
|
242
|
+
else
|
243
|
+
add_child_node node_or_tags
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
###
|
248
|
+
# Insert +node_or_tags+ before this Node (as a sibling).
|
249
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
250
|
+
#
|
251
|
+
# Returns the new sibling node.
|
252
|
+
#
|
253
|
+
# Also see related method +before+.
|
254
|
+
def add_previous_sibling node_or_tags
|
255
|
+
node_or_tags = coerce(node_or_tags)
|
256
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
257
|
+
node_or_tags.each { |n| add_previous_sibling_node n }
|
258
|
+
else
|
259
|
+
add_previous_sibling_node node_or_tags
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
###
|
264
|
+
# Insert +node_or_tags+ after this Node (as a sibling).
|
265
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
266
|
+
#
|
267
|
+
# Returns the new sibling node.
|
268
|
+
#
|
269
|
+
# Also see related method +after+.
|
270
|
+
def add_next_sibling node_or_tags
|
271
|
+
node_or_tags = coerce(node_or_tags)
|
272
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
273
|
+
if '1.8.6' == RUBY_VERSION
|
274
|
+
node_or_tags.reverse.each { |n| add_next_sibling_node n }
|
275
|
+
else
|
276
|
+
node_or_tags.reverse_each { |n| add_next_sibling_node n }
|
277
|
+
end
|
278
|
+
else
|
279
|
+
add_next_sibling_node node_or_tags
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
####
|
284
|
+
# Insert +node_or_tags+ before this node (as a sibling).
|
285
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
286
|
+
#
|
287
|
+
# Returns self, to support chaining of calls.
|
288
|
+
#
|
289
|
+
# Also see related method +add_previous_sibling+.
|
290
|
+
def before node_or_tags
|
291
|
+
add_previous_sibling node_or_tags
|
292
|
+
self
|
293
|
+
end
|
294
|
+
|
295
|
+
####
|
296
|
+
# Insert +node_or_tags+ after this node (as a sibling).
|
297
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
298
|
+
#
|
299
|
+
# Returns self, to support chaining of calls.
|
300
|
+
#
|
301
|
+
# Also see related method +add_next_sibling+.
|
302
|
+
def after node_or_tags
|
303
|
+
add_next_sibling node_or_tags
|
304
|
+
self
|
305
|
+
end
|
306
|
+
|
307
|
+
####
|
308
|
+
# Set the inner_html for this Node to +node_or_tags+
|
309
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
310
|
+
#
|
311
|
+
# Returns self.
|
312
|
+
def inner_html= node_or_tags
|
313
|
+
node_or_tags = coerce(node_or_tags)
|
314
|
+
children.unlink
|
315
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
316
|
+
node_or_tags.each { |n| add_child_node n }
|
317
|
+
else
|
318
|
+
add_child node_or_tags
|
319
|
+
end
|
320
|
+
self
|
321
|
+
end
|
322
|
+
|
323
|
+
####
|
324
|
+
# Replace this Node with +node_or_tags+.
|
325
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
326
|
+
#
|
327
|
+
# Returns the new child node.
|
328
|
+
#
|
329
|
+
# Also see related method +swap+.
|
330
|
+
def replace node_or_tags
|
331
|
+
node_or_tags = coerce(node_or_tags)
|
332
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
333
|
+
node_or_tags.each { |n| add_previous_sibling n }
|
334
|
+
unlink
|
335
|
+
else
|
336
|
+
replace_node node_or_tags
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
####
|
341
|
+
# Swap this Node for +node_or_tags+
|
342
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
343
|
+
#
|
344
|
+
# Returns self, to support chaining of calls.
|
345
|
+
#
|
346
|
+
# Also see related method +replace+.
|
347
|
+
def swap node_or_tags
|
348
|
+
replace node_or_tags
|
349
|
+
self
|
350
|
+
end
|
351
|
+
|
352
|
+
alias :next :next_sibling
|
353
|
+
alias :previous :previous_sibling
|
354
|
+
|
355
|
+
# :stopdoc:
|
356
|
+
# HACK: This is to work around an RDoc bug
|
357
|
+
alias :next= :add_next_sibling
|
358
|
+
# :startdoc:
|
359
|
+
|
360
|
+
alias :previous= :add_previous_sibling
|
361
|
+
alias :remove :unlink
|
362
|
+
alias :get_attribute :[]
|
363
|
+
alias :attr :[]
|
364
|
+
alias :set_attribute :[]=
|
365
|
+
alias :text :content
|
366
|
+
alias :inner_text :content
|
367
|
+
alias :has_attribute? :key?
|
368
|
+
alias :<< :add_child
|
369
|
+
alias :name :node_name
|
370
|
+
alias :name= :node_name=
|
371
|
+
alias :type :node_type
|
372
|
+
alias :to_str :text
|
373
|
+
alias :clone :dup
|
374
|
+
alias :elements :element_children
|
375
|
+
|
376
|
+
####
|
377
|
+
# Returns a hash containing the node's attributes. The key is
|
378
|
+
# the attribute name, the value is a Nokogiri::XML::Attr
|
379
|
+
# representing the attribute.
|
380
|
+
def attributes
|
381
|
+
Hash[*(attribute_nodes.map { |node|
|
382
|
+
[node.node_name, node]
|
383
|
+
}.flatten)]
|
384
|
+
end
|
385
|
+
|
386
|
+
###
|
387
|
+
# Get the attribute values for this Node.
|
388
|
+
def values
|
389
|
+
attribute_nodes.map { |node| node.value }
|
390
|
+
end
|
391
|
+
|
392
|
+
###
|
393
|
+
# Get the attribute names for this Node.
|
394
|
+
def keys
|
395
|
+
attribute_nodes.map { |node| node.node_name }
|
396
|
+
end
|
397
|
+
|
398
|
+
###
|
399
|
+
# Iterate over each attribute name and value pair for this Node.
|
400
|
+
def each &block
|
401
|
+
attribute_nodes.each { |node|
|
402
|
+
block.call([node.node_name, node.value])
|
403
|
+
}
|
404
|
+
end
|
405
|
+
|
406
|
+
###
|
407
|
+
# Remove the attribute named +name+
|
408
|
+
def remove_attribute name
|
409
|
+
attributes[name].remove if key? name
|
410
|
+
end
|
411
|
+
alias :delete :remove_attribute
|
412
|
+
|
413
|
+
###
|
414
|
+
# Returns true if this Node matches +selector+
|
415
|
+
def matches? selector
|
416
|
+
ancestors.last.search(selector).include?(self)
|
417
|
+
end
|
418
|
+
|
419
|
+
###
|
420
|
+
# Create a DocumentFragment containing +tags+ that is relative to _this_
|
421
|
+
# context node.
|
422
|
+
def fragment tags
|
423
|
+
type = document.html? ? Nokogiri::HTML : Nokogiri::XML
|
424
|
+
type::DocumentFragment.new(document, tags, self)
|
425
|
+
end
|
426
|
+
|
427
|
+
###
|
428
|
+
# Parse +string_or_io+ as a document fragment within the context of
|
429
|
+
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
430
|
+
# +string_or_io+.
|
431
|
+
def parse string_or_io, options = ParseOptions::DEFAULT_XML
|
432
|
+
if Fixnum === options
|
433
|
+
options = Nokogiri::XML::ParseOptions.new(options)
|
434
|
+
end
|
435
|
+
# Give the options to the user
|
436
|
+
yield options if block_given?
|
437
|
+
|
438
|
+
contents = string_or_io.respond_to?(:read) ?
|
439
|
+
string_or_io.read :
|
440
|
+
string_or_io
|
441
|
+
|
442
|
+
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
443
|
+
in_context(contents, options.to_i)
|
444
|
+
end
|
445
|
+
|
446
|
+
####
|
447
|
+
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
|
448
|
+
def content= string
|
449
|
+
self.native_content = encode_special_chars(string.to_s)
|
450
|
+
end
|
451
|
+
|
452
|
+
###
|
453
|
+
# Set the parent Node for this Node
|
454
|
+
def parent= parent_node
|
455
|
+
parent_node.add_child(self)
|
456
|
+
parent_node
|
457
|
+
end
|
458
|
+
|
459
|
+
###
|
460
|
+
# Get a hash containing the Namespace definitions for this Node
|
461
|
+
def namespaces
|
462
|
+
Hash[*namespace_scopes.map { |nd|
|
463
|
+
key = ['xmlns', nd.prefix].compact.join(':')
|
464
|
+
if RUBY_VERSION >= '1.9' && document.encoding
|
465
|
+
begin
|
466
|
+
key.force_encoding document.encoding
|
467
|
+
rescue ArgumentError
|
468
|
+
end
|
469
|
+
end
|
470
|
+
[key, nd.href]
|
471
|
+
}.flatten]
|
472
|
+
end
|
473
|
+
|
474
|
+
# Returns true if this is a Comment
|
475
|
+
def comment?
|
476
|
+
type == COMMENT_NODE
|
477
|
+
end
|
478
|
+
|
479
|
+
# Returns true if this is a CDATA
|
480
|
+
def cdata?
|
481
|
+
type == CDATA_SECTION_NODE
|
482
|
+
end
|
483
|
+
|
484
|
+
# Returns true if this is an XML::Document node
|
485
|
+
def xml?
|
486
|
+
type == DOCUMENT_NODE
|
487
|
+
end
|
488
|
+
|
489
|
+
# Returns true if this is an HTML::Document node
|
490
|
+
def html?
|
491
|
+
type == HTML_DOCUMENT_NODE
|
492
|
+
end
|
493
|
+
|
494
|
+
# Returns true if this is a Text node
|
495
|
+
def text?
|
496
|
+
type == TEXT_NODE
|
497
|
+
end
|
498
|
+
|
499
|
+
# Returns true if this is a DocumentFragment
|
500
|
+
def fragment?
|
501
|
+
type == DOCUMENT_FRAG_NODE
|
502
|
+
end
|
503
|
+
|
504
|
+
###
|
505
|
+
# Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
|
506
|
+
# nil on XML documents and on unknown tags.
|
507
|
+
def description
|
508
|
+
return nil if document.xml?
|
509
|
+
Nokogiri::HTML::ElementDescription[name]
|
510
|
+
end
|
511
|
+
|
512
|
+
###
|
513
|
+
# Is this a read only node?
|
514
|
+
def read_only?
|
515
|
+
# According to gdome2, these are read-only node types
|
516
|
+
[NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
|
517
|
+
end
|
518
|
+
|
519
|
+
# Returns true if this is an Element node
|
520
|
+
def element?
|
521
|
+
type == ELEMENT_NODE
|
522
|
+
end
|
523
|
+
alias :elem? :element?
|
524
|
+
|
525
|
+
###
|
526
|
+
# Turn this node in to a string. If the document is HTML, this method
|
527
|
+
# returns html. If the document is XML, this method returns XML.
|
528
|
+
def to_s
|
529
|
+
document.xml? ? to_xml : to_html
|
530
|
+
end
|
531
|
+
|
532
|
+
# Get the inner_html for this node's Node#children
|
533
|
+
def inner_html *args
|
534
|
+
children.map { |x| x.to_html(*args) }.join
|
535
|
+
end
|
536
|
+
|
537
|
+
# Get the path to this node as a CSS expression
|
538
|
+
def css_path
|
539
|
+
path.split(/\//).map { |part|
|
540
|
+
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
541
|
+
}.compact.join(' > ')
|
542
|
+
end
|
543
|
+
|
544
|
+
###
|
545
|
+
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
546
|
+
# the ancestors must match +selector+
|
547
|
+
def ancestors selector = nil
|
548
|
+
return NodeSet.new(document) unless respond_to?(:parent)
|
549
|
+
return NodeSet.new(document) unless parent
|
550
|
+
|
551
|
+
parents = [parent]
|
552
|
+
|
553
|
+
while parents.last.respond_to?(:parent)
|
554
|
+
break unless ctx_parent = parents.last.parent
|
555
|
+
parents << ctx_parent
|
556
|
+
end
|
557
|
+
|
558
|
+
return NodeSet.new(document, parents) unless selector
|
559
|
+
|
560
|
+
root = parents.last
|
561
|
+
|
562
|
+
NodeSet.new(document, parents.find_all { |parent|
|
563
|
+
root.search(selector).include?(parent)
|
564
|
+
})
|
565
|
+
end
|
566
|
+
|
567
|
+
###
|
568
|
+
# Set the default namespace for this node to +url+
|
569
|
+
def default_namespace= url
|
570
|
+
add_namespace_definition(nil, url)
|
571
|
+
end
|
572
|
+
alias :add_namespace :add_namespace_definition
|
573
|
+
|
574
|
+
###
|
575
|
+
# Set the namespace for this node to +ns+
|
576
|
+
def namespace= ns
|
577
|
+
return set_namespace(ns) unless ns
|
578
|
+
|
579
|
+
unless Nokogiri::XML::Namespace === ns
|
580
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
581
|
+
end
|
582
|
+
if ns.document != document
|
583
|
+
raise ArgumentError, 'namespace must be declared on the same document'
|
584
|
+
end
|
585
|
+
|
586
|
+
set_namespace ns
|
587
|
+
end
|
588
|
+
|
589
|
+
####
|
590
|
+
# Yields self and all children to +block+ recursively.
|
591
|
+
def traverse &block
|
592
|
+
children.each{|j| j.traverse(&block) }
|
593
|
+
block.call(self)
|
594
|
+
end
|
595
|
+
|
596
|
+
###
|
597
|
+
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
598
|
+
def accept visitor
|
599
|
+
visitor.visit(self)
|
600
|
+
end
|
601
|
+
|
602
|
+
###
|
603
|
+
# Test to see if this Node is equal to +other+
|
604
|
+
def == other
|
605
|
+
return false unless other
|
606
|
+
return false unless other.respond_to?(:pointer_id)
|
607
|
+
pointer_id == other.pointer_id
|
608
|
+
end
|
609
|
+
|
610
|
+
###
|
611
|
+
# Serialize Node using +options+. Save options can also be set using a
|
612
|
+
# block. See SaveOptions.
|
613
|
+
#
|
614
|
+
# These two statements are equivalent:
|
615
|
+
#
|
616
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
617
|
+
#
|
618
|
+
# or
|
619
|
+
#
|
620
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
621
|
+
# config.format.as_xml
|
622
|
+
# end
|
623
|
+
#
|
624
|
+
def serialize *args, &block
|
625
|
+
options = args.first.is_a?(Hash) ? args.shift : {
|
626
|
+
:encoding => args[0],
|
627
|
+
:save_with => args[1] || SaveOptions::FORMAT
|
628
|
+
}
|
629
|
+
|
630
|
+
encoding = options[:encoding] || document.encoding
|
631
|
+
|
632
|
+
outstring = ""
|
633
|
+
if encoding && outstring.respond_to?(:force_encoding)
|
634
|
+
outstring.force_encoding(Encoding.find(encoding))
|
635
|
+
end
|
636
|
+
io = StringIO.new(outstring)
|
637
|
+
write_to io, options, &block
|
638
|
+
io.string
|
639
|
+
end
|
640
|
+
|
641
|
+
###
|
642
|
+
# Serialize this Node to HTML
|
643
|
+
#
|
644
|
+
# doc.to_html
|
645
|
+
#
|
646
|
+
# See Node#write_to for a list of +options+. For formatted output,
|
647
|
+
# use Node#to_xhtml instead.
|
648
|
+
def to_html options = {}
|
649
|
+
# FIXME: this is a hack around broken libxml versions
|
650
|
+
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
651
|
+
|
652
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
653
|
+
SaveOptions::NO_DECLARATION |
|
654
|
+
SaveOptions::NO_EMPTY_TAGS |
|
655
|
+
SaveOptions::AS_HTML
|
656
|
+
|
657
|
+
serialize(options)
|
658
|
+
end
|
659
|
+
|
660
|
+
###
|
661
|
+
# Serialize this Node to XML using +options+
|
662
|
+
#
|
663
|
+
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
|
664
|
+
#
|
665
|
+
# See Node#write_to for a list of +options+
|
666
|
+
def to_xml options = {}
|
667
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
668
|
+
|
669
|
+
serialize(options)
|
670
|
+
end
|
671
|
+
|
672
|
+
###
|
673
|
+
# Serialize this Node to XHTML using +options+
|
674
|
+
#
|
675
|
+
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
|
676
|
+
#
|
677
|
+
# See Node#write_to for a list of +options+
|
678
|
+
def to_xhtml options = {}
|
679
|
+
# FIXME: this is a hack around broken libxml versions
|
680
|
+
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
681
|
+
|
682
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
683
|
+
SaveOptions::NO_DECLARATION |
|
684
|
+
SaveOptions::NO_EMPTY_TAGS |
|
685
|
+
SaveOptions::AS_XHTML
|
686
|
+
|
687
|
+
serialize(options)
|
688
|
+
end
|
689
|
+
|
690
|
+
###
|
691
|
+
# Write Node to +io+ with +options+. +options+ modify the output of
|
692
|
+
# this method. Valid options are:
|
693
|
+
#
|
694
|
+
# * +:encoding+ for changing the encoding
|
695
|
+
# * +:indent_text+ the indentation text, defaults to one space
|
696
|
+
# * +:indent+ the number of +:indent_text+ to use, defaults to 2
|
697
|
+
# * +:save_with+ a combination of SaveOptions constants.
|
698
|
+
#
|
699
|
+
# To save with UTF-8 indented twice:
|
700
|
+
#
|
701
|
+
# node.write_to(io, :encoding => 'UTF-8', :indent => 2)
|
702
|
+
#
|
703
|
+
# To save indented with two dashes:
|
704
|
+
#
|
705
|
+
# node.write_to(io, :indent_text => '-', :indent => 2
|
706
|
+
#
|
707
|
+
def write_to io, *options
|
708
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
709
|
+
encoding = options[:encoding] || options[0]
|
710
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
711
|
+
indent_text = options[:indent_text] || ' '
|
712
|
+
indent_times = options[:indent] || 2
|
713
|
+
|
714
|
+
|
715
|
+
config = SaveOptions.new(save_options)
|
716
|
+
yield config if block_given?
|
717
|
+
|
718
|
+
native_write_to(io, encoding, indent_text * indent_times, config.options)
|
719
|
+
end
|
720
|
+
|
721
|
+
###
|
722
|
+
# Write Node as HTML to +io+ with +options+
|
723
|
+
#
|
724
|
+
# See Node#write_to for a list of +options+
|
725
|
+
def write_html_to io, options = {}
|
726
|
+
# FIXME: this is a hack around broken libxml versions
|
727
|
+
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
728
|
+
|
729
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
730
|
+
SaveOptions::NO_DECLARATION |
|
731
|
+
SaveOptions::NO_EMPTY_TAGS |
|
732
|
+
SaveOptions::AS_HTML
|
733
|
+
write_to io, options
|
734
|
+
end
|
735
|
+
|
736
|
+
###
|
737
|
+
# Write Node as XHTML to +io+ with +options+
|
738
|
+
#
|
739
|
+
# See Node#write_to for a list of +options+
|
740
|
+
def write_xhtml_to io, options = {}
|
741
|
+
# FIXME: this is a hack around broken libxml versions
|
742
|
+
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
743
|
+
|
744
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
745
|
+
SaveOptions::NO_DECLARATION |
|
746
|
+
SaveOptions::NO_EMPTY_TAGS |
|
747
|
+
SaveOptions::AS_XHTML
|
748
|
+
write_to io, options
|
749
|
+
end
|
750
|
+
|
751
|
+
###
|
752
|
+
# Write Node as XML to +io+ with +options+
|
753
|
+
#
|
754
|
+
# doc.write_xml_to io, :encoding => 'UTF-8'
|
755
|
+
#
|
756
|
+
# See Node#write_to for a list of options
|
757
|
+
def write_xml_to io, options = {}
|
758
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
759
|
+
write_to io, options
|
760
|
+
end
|
761
|
+
|
762
|
+
###
|
763
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
764
|
+
# different documents cannot be compared.
|
765
|
+
def <=> other
|
766
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
767
|
+
return nil unless document == other.document
|
768
|
+
compare other
|
769
|
+
end
|
770
|
+
|
771
|
+
private
|
772
|
+
|
773
|
+
def coerce(data) # :nodoc:
|
774
|
+
return data if data.is_a?(XML::NodeSet)
|
775
|
+
return data.children if data.is_a?(XML::DocumentFragment)
|
776
|
+
return fragment(data).children if data.is_a?(String)
|
777
|
+
|
778
|
+
if data.is_a?(Document) || !data.is_a?(XML::Node)
|
779
|
+
raise ArgumentError, <<-EOERR
|
780
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
781
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
782
|
+
EOERR
|
783
|
+
end
|
784
|
+
|
785
|
+
data
|
786
|
+
end
|
787
|
+
|
788
|
+
def inspect_attributes
|
789
|
+
[:name, :namespace, :attribute_nodes, :children]
|
790
|
+
end
|
791
|
+
end
|
792
|
+
end
|
793
|
+
end
|