rubyjedi-nokogiri_java 1.4.0.20100513161003-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +26 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +341 -0
- data/Manifest.txt +277 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +125 -0
- data/Rakefile +307 -0
- data/bin/nokogiri +49 -0
- data/deps.rip +5 -0
- data/ext/nokogiri/extconf.rb +149 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +96 -0
- data/ext/nokogiri/nokogiri.h +148 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +386 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +82 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1080 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +405 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +283 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +157 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +52 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri.rb +123 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +659 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +230 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +164 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +81 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +372 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +465 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +146 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +135 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +55 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +50 -0
- data/lib/nokogiri/html.rb +36 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +163 -0
- data/lib/nokogiri/xml/document_fragment.rb +73 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +73 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +730 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +318 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +43 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xercesImpl.jar +0 -0
- data/lib/xsd/xmlparser/nokogiri.rb +90 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +282 -0
- data/test/css/test_tokenizer.rb +190 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +137 -0
- data/test/html/sax/test_parser.rb +83 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +385 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +157 -0
- data/test/html/test_element_description.rb +98 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +242 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_jruby.rb +40 -0
- data/test/test_memory_leak.rb +87 -0
- data/test/test_nokogiri.rb +140 -0
- data/test/test_reader.rb +358 -0
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +150 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +314 -0
- data/test/xml/sax/test_parser_context.rb +63 -0
- data/test/xml/sax/test_push_parser.rb +135 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +90 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +638 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +149 -0
- data/test/xml/test_dtd.rb +92 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +70 -0
- data/test/xml/test_node.rb +740 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_reparenting.rb +279 -0
- data/test/xml/test_node_set.rb +577 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +169 -0
- metadata +477 -0
@@ -0,0 +1,163 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
####
|
4
|
+
# Nokogiri::XML::Document is the main entry point for dealing with
|
5
|
+
# XML documents. The Document is created by parsing an XML document.
|
6
|
+
# See Nokogiri.XML()
|
7
|
+
#
|
8
|
+
# For searching a Document, see Nokogiri::XML::Node#css and
|
9
|
+
# Nokogiri::XML::Node#xpath
|
10
|
+
class Document < Node
|
11
|
+
###
|
12
|
+
# Parse an XML file. +thing+ may be a String, or any object that
|
13
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
14
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
15
|
+
# encoding that should be used when processing the document. +options+
|
16
|
+
# is a number that sets options in the parser, such as
|
17
|
+
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
18
|
+
# Nokogiri::XML::ParseOptions.
|
19
|
+
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
|
20
|
+
|
21
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
22
|
+
# Give the options to the user
|
23
|
+
yield options if block_given?
|
24
|
+
|
25
|
+
if string_or_io.respond_to?(:read)
|
26
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
27
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
28
|
+
end
|
29
|
+
|
30
|
+
# read_memory pukes on empty docs
|
31
|
+
return new if string_or_io.nil? or string_or_io.empty?
|
32
|
+
|
33
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
34
|
+
end
|
35
|
+
|
36
|
+
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
37
|
+
attr_accessor :errors
|
38
|
+
|
39
|
+
def initialize *args
|
40
|
+
@decorators = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
# Create an element with +name+
|
44
|
+
def create_element name, &block
|
45
|
+
Nokogiri::XML::Element.new(name, self, &block)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Create a text node with +text+
|
49
|
+
def create_text_node text, &block
|
50
|
+
Nokogiri::XML::Text.new(text.to_s, self, &block)
|
51
|
+
end
|
52
|
+
|
53
|
+
# The name of this document. Always returns "document"
|
54
|
+
def name
|
55
|
+
'document'
|
56
|
+
end
|
57
|
+
|
58
|
+
# A reference to +self+
|
59
|
+
def document
|
60
|
+
self
|
61
|
+
end
|
62
|
+
|
63
|
+
###
|
64
|
+
# Recursively get all namespaces from this node and its subtree and
|
65
|
+
# return them as a hash.
|
66
|
+
#
|
67
|
+
# For example, given this document:
|
68
|
+
#
|
69
|
+
# <root xmlns:foo="bar">
|
70
|
+
# <bar xmlns:hello="world" />
|
71
|
+
# </root>
|
72
|
+
#
|
73
|
+
# This method will return:
|
74
|
+
#
|
75
|
+
# { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
|
76
|
+
#
|
77
|
+
# WARNING: this method will clobber duplicate names in the keys.
|
78
|
+
# For example, given this document:
|
79
|
+
#
|
80
|
+
# <root xmlns:foo="bar">
|
81
|
+
# <bar xmlns:foo="baz" />
|
82
|
+
# </root>
|
83
|
+
#
|
84
|
+
# The hash returned will look like this: { 'xmlns:foo' => 'bar' }
|
85
|
+
def collect_namespaces
|
86
|
+
ns = {}
|
87
|
+
traverse { |j| ns.merge!(j.namespaces) }
|
88
|
+
ns
|
89
|
+
end
|
90
|
+
|
91
|
+
# Get the list of decorators given +key+
|
92
|
+
def decorators key
|
93
|
+
@decorators ||= Hash.new
|
94
|
+
@decorators[key] ||= []
|
95
|
+
end
|
96
|
+
|
97
|
+
###
|
98
|
+
# Validate this Document against it's DTD. Returns a list of errors on
|
99
|
+
# the document or +nil+ when there is no DTD.
|
100
|
+
def validate
|
101
|
+
return nil unless internal_subset
|
102
|
+
internal_subset.validate self
|
103
|
+
end
|
104
|
+
|
105
|
+
###
|
106
|
+
# Explore a document with shortcut methods.
|
107
|
+
def slop!
|
108
|
+
unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
|
109
|
+
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
110
|
+
decorate!
|
111
|
+
end
|
112
|
+
|
113
|
+
self
|
114
|
+
end
|
115
|
+
|
116
|
+
###
|
117
|
+
# Apply any decorators to +node+
|
118
|
+
def decorate node
|
119
|
+
return unless @decorators
|
120
|
+
@decorators.each { |klass,list|
|
121
|
+
next unless node.is_a?(klass)
|
122
|
+
list.each { |moodule| node.extend(moodule) }
|
123
|
+
}
|
124
|
+
end
|
125
|
+
|
126
|
+
alias :to_xml :serialize
|
127
|
+
alias :clone :dup
|
128
|
+
|
129
|
+
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
130
|
+
def namespaces
|
131
|
+
root ? root.namespaces : {}
|
132
|
+
end
|
133
|
+
|
134
|
+
####
|
135
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
136
|
+
# Returns an empty fragment if +tags+ is nil.
|
137
|
+
def fragment tags = nil
|
138
|
+
DocumentFragment.new(self, tags)
|
139
|
+
end
|
140
|
+
|
141
|
+
undef_method :swap, :parent, :namespace, :default_namespace=
|
142
|
+
undef_method :add_namespace_definition, :attributes
|
143
|
+
undef_method :namespace_definitions, :add_namespace
|
144
|
+
undef_method :line if method_defined?(:line)
|
145
|
+
|
146
|
+
def add_child child
|
147
|
+
raise "Document already has a root node" if root
|
148
|
+
if child.type == Node::DOCUMENT_FRAG_NODE
|
149
|
+
raise "Document cannot have multiple root nodes" if child.children.size > 1
|
150
|
+
super(child.children.first)
|
151
|
+
else
|
152
|
+
super
|
153
|
+
end
|
154
|
+
end
|
155
|
+
alias :<< :add_child
|
156
|
+
|
157
|
+
private
|
158
|
+
def inspect_attributes
|
159
|
+
[:name, :children]
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class DocumentFragment < Nokogiri::XML::Node
|
4
|
+
def initialize document, tags=nil
|
5
|
+
if tags
|
6
|
+
if self.kind_of?(Nokogiri::HTML::DocumentFragment)
|
7
|
+
HTML::SAX::Parser.new(FragmentHandler.new(self, tags)).parse(tags)
|
8
|
+
else
|
9
|
+
wrapped = "<div>#{tags.strip}</div>"
|
10
|
+
XML::SAX::Parser.new(FragmentHandler.new(self, wrapped)).parse(wrapped)
|
11
|
+
div = self.child
|
12
|
+
div.children.each { |child| child.parent = self }
|
13
|
+
div.unlink
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
###
|
19
|
+
# return the name for DocumentFragment
|
20
|
+
def name
|
21
|
+
'#document-fragment'
|
22
|
+
end
|
23
|
+
|
24
|
+
###
|
25
|
+
# Convert this DocumentFragment to a string
|
26
|
+
def to_s
|
27
|
+
children.to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
###
|
31
|
+
# Convert this DocumentFragment to html
|
32
|
+
# See Nokogiri::XML::NodeSet#to_html
|
33
|
+
def to_html *args
|
34
|
+
children.to_html(*args)
|
35
|
+
end
|
36
|
+
|
37
|
+
###
|
38
|
+
# Convert this DocumentFragment to xhtml
|
39
|
+
# See Nokogiri::XML::NodeSet#to_xhtml
|
40
|
+
def to_xhtml *args
|
41
|
+
children.to_xhtml(*args)
|
42
|
+
end
|
43
|
+
|
44
|
+
###
|
45
|
+
# Convert this DocumentFragment to xml
|
46
|
+
# See Nokogiri::XML::NodeSet#to_xml
|
47
|
+
def to_xml *args
|
48
|
+
children.to_xml(*args)
|
49
|
+
end
|
50
|
+
|
51
|
+
###
|
52
|
+
# Search this fragment. See Nokogiri::XML::Node#css
|
53
|
+
def css *args
|
54
|
+
if children.any?
|
55
|
+
children.css(*args)
|
56
|
+
else
|
57
|
+
NodeSet.new(document)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
alias :serialize :to_s
|
62
|
+
|
63
|
+
class << self
|
64
|
+
####
|
65
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
66
|
+
def parse tags
|
67
|
+
self.new(XML::Document.new, tags)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# Represents the allowed content in an Element Declaration inside a DTD:
|
5
|
+
#
|
6
|
+
# <?xml version="1.0"?><?TEST-STYLE PIDATA?>
|
7
|
+
# <!DOCTYPE staff SYSTEM "staff.dtd" [
|
8
|
+
# <!ELEMENT div1 (head, (p | list | note)*, div2*)>
|
9
|
+
# ]>
|
10
|
+
# </root>
|
11
|
+
#
|
12
|
+
# ElementContent represents the tree inside the <!ELEMENT> tag shown above
|
13
|
+
# that lists the possible content for the div1 tag.
|
14
|
+
class ElementContent
|
15
|
+
# Possible definitions of type
|
16
|
+
PCDATA = 1
|
17
|
+
ELEMENT = 2
|
18
|
+
SEQ = 3
|
19
|
+
OR = 4
|
20
|
+
|
21
|
+
# Possible content occurrences
|
22
|
+
ONCE = 1
|
23
|
+
OPT = 2
|
24
|
+
MULT = 3
|
25
|
+
PLUS = 4
|
26
|
+
|
27
|
+
attr_reader :document
|
28
|
+
|
29
|
+
###
|
30
|
+
# Get the children of this ElementContent node
|
31
|
+
def children
|
32
|
+
[c1, c2].compact
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class ElementDecl < Nokogiri::XML::Node
|
4
|
+
undef_method :namespace
|
5
|
+
undef_method :namespace_definitions
|
6
|
+
undef_method :line if method_defined?(:line)
|
7
|
+
|
8
|
+
def inspect
|
9
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class EntityDecl < Nokogiri::XML::Node
|
4
|
+
undef_method :attribute_nodes
|
5
|
+
undef_method :attributes
|
6
|
+
undef_method :namespace
|
7
|
+
undef_method :namespace_definitions
|
8
|
+
undef_method :line if method_defined?(:line)
|
9
|
+
|
10
|
+
def inspect
|
11
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class FragmentHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
4
|
+
QNAME_REGEX = /(.*):(.*)/
|
5
|
+
|
6
|
+
def initialize node, original_html
|
7
|
+
@doc_started = false
|
8
|
+
@document = node.document
|
9
|
+
@stack = [node]
|
10
|
+
@html_eh = node.kind_of? HTML::DocumentFragment
|
11
|
+
|
12
|
+
# the regexes used in start_element() and characters() anchor at
|
13
|
+
# start-of-line, but we really only want them to anchor at
|
14
|
+
# start-of-doc. so let's only save up to the first newline.
|
15
|
+
#
|
16
|
+
# this implementation choice was the result of some benchmarks, if
|
17
|
+
# you're curious: http://gist.github.com/115936
|
18
|
+
#
|
19
|
+
@original_html = original_html.lstrip
|
20
|
+
newline_index = @original_html.index("\n")
|
21
|
+
@original_html = @original_html[0,newline_index] if newline_index
|
22
|
+
end
|
23
|
+
|
24
|
+
def start_element name, attrs = []
|
25
|
+
regex = @html_eh ? %r{^\s*<#{Regexp.escape(name)}}i :
|
26
|
+
%r{^\s*<#{Regexp.escape(name)}}
|
27
|
+
|
28
|
+
@doc_started = true if @original_html =~ regex
|
29
|
+
return unless @doc_started
|
30
|
+
|
31
|
+
ns = nil
|
32
|
+
if @document.root
|
33
|
+
match = name.match(QNAME_REGEX)
|
34
|
+
if match
|
35
|
+
prefix, name = match[1], match[2]
|
36
|
+
ns = @document.root.namespace_definitions.detect { |x|
|
37
|
+
x.prefix == prefix
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
node = Element.new(name, @document)
|
43
|
+
attrs << "" unless (attrs.length % 2) == 0
|
44
|
+
Hash[*attrs].each do |k,v|
|
45
|
+
node[k] = v
|
46
|
+
end
|
47
|
+
|
48
|
+
node.namespace = ns if ns
|
49
|
+
|
50
|
+
@stack.last << node
|
51
|
+
@stack << node
|
52
|
+
end
|
53
|
+
|
54
|
+
def characters string
|
55
|
+
@doc_started = true if @original_html.strip =~ %r{^\s*#{Regexp.escape(string.strip)}}
|
56
|
+
@stack.last << Text.new(string, @document)
|
57
|
+
end
|
58
|
+
|
59
|
+
def comment string
|
60
|
+
@stack.last << Comment.new(@document, string)
|
61
|
+
end
|
62
|
+
|
63
|
+
def cdata_block string
|
64
|
+
@stack.last << CDATA.new(@document, string)
|
65
|
+
end
|
66
|
+
|
67
|
+
def end_element name
|
68
|
+
return unless @stack.last.name == name
|
69
|
+
@stack.pop
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,730 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'nokogiri/xml/node/save_options'
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
####
|
7
|
+
# Nokogiri::XML::Node is your window to the fun filled world of dealing
|
8
|
+
# with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
|
9
|
+
# to a hash with regard to attributes. For example (from irb):
|
10
|
+
#
|
11
|
+
# irb(main):004:0> node
|
12
|
+
# => <a href="#foo" id="link">link</a>
|
13
|
+
# irb(main):005:0> node['href']
|
14
|
+
# => "#foo"
|
15
|
+
# irb(main):006:0> node.keys
|
16
|
+
# => ["href", "id"]
|
17
|
+
# irb(main):007:0> node.values
|
18
|
+
# => ["#foo", "link"]
|
19
|
+
# irb(main):008:0> node['class'] = 'green'
|
20
|
+
# => "green"
|
21
|
+
# irb(main):009:0> node
|
22
|
+
# => <a href="#foo" id="link" class="green">link</a>
|
23
|
+
# irb(main):010:0>
|
24
|
+
#
|
25
|
+
# See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
|
26
|
+
#
|
27
|
+
# Nokogiri::XML::Node also has methods that let you move around your
|
28
|
+
# tree. For navigating your tree, see:
|
29
|
+
#
|
30
|
+
# * Nokogiri::XML::Node#parent
|
31
|
+
# * Nokogiri::XML::Node#children
|
32
|
+
# * Nokogiri::XML::Node#next
|
33
|
+
# * Nokogiri::XML::Node#previous
|
34
|
+
#
|
35
|
+
# You may search this node's subtree using Node#xpath and Node#css
|
36
|
+
class Node
|
37
|
+
include Nokogiri::XML::PP::Node
|
38
|
+
|
39
|
+
# Element node type, see Nokogiri::XML::Node#element?
|
40
|
+
ELEMENT_NODE = 1
|
41
|
+
# Attribute node type
|
42
|
+
ATTRIBUTE_NODE = 2
|
43
|
+
# Text node type, see Nokogiri::XML::Node#text?
|
44
|
+
TEXT_NODE = 3
|
45
|
+
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
46
|
+
CDATA_SECTION_NODE = 4
|
47
|
+
# Entity reference node type
|
48
|
+
ENTITY_REF_NODE = 5
|
49
|
+
# Entity node type
|
50
|
+
ENTITY_NODE = 6
|
51
|
+
# PI node type
|
52
|
+
PI_NODE = 7
|
53
|
+
# Comment node type, see Nokogiri::XML::Node#comment?
|
54
|
+
COMMENT_NODE = 8
|
55
|
+
# Document node type, see Nokogiri::XML::Node#xml?
|
56
|
+
DOCUMENT_NODE = 9
|
57
|
+
# Document type node type
|
58
|
+
DOCUMENT_TYPE_NODE = 10
|
59
|
+
# Document fragment node type
|
60
|
+
DOCUMENT_FRAG_NODE = 11
|
61
|
+
# Notation node type
|
62
|
+
NOTATION_NODE = 12
|
63
|
+
# HTML document node type, see Nokogiri::XML::Node#html?
|
64
|
+
HTML_DOCUMENT_NODE = 13
|
65
|
+
# DTD node type
|
66
|
+
DTD_NODE = 14
|
67
|
+
# Element declaration type
|
68
|
+
ELEMENT_DECL = 15
|
69
|
+
# Attribute declaration type
|
70
|
+
ATTRIBUTE_DECL = 16
|
71
|
+
# Entity declaration type
|
72
|
+
ENTITY_DECL = 17
|
73
|
+
# Namespace declaration type
|
74
|
+
NAMESPACE_DECL = 18
|
75
|
+
# XInclude start type
|
76
|
+
XINCLUDE_START = 19
|
77
|
+
# XInclude end type
|
78
|
+
XINCLUDE_END = 20
|
79
|
+
# DOCB document node type
|
80
|
+
DOCB_DOCUMENT_NODE = 21
|
81
|
+
|
82
|
+
def initialize name, document
|
83
|
+
# ... Ya. This is empty on purpose.
|
84
|
+
end
|
85
|
+
|
86
|
+
###
|
87
|
+
# Decorate this node with the decorators set up in this node's Document
|
88
|
+
def decorate!
|
89
|
+
document.decorate(self)
|
90
|
+
end
|
91
|
+
|
92
|
+
###
|
93
|
+
# Search this node for +paths+. +paths+ can be XPath or CSS, and an
|
94
|
+
# optional hash of namespaces may be appended.
|
95
|
+
# See Node#xpath and Node#css.
|
96
|
+
def search *paths
|
97
|
+
ns = paths.last.is_a?(Hash) ? paths.pop :
|
98
|
+
(document.root ? document.root.namespaces : {})
|
99
|
+
xpath(*(paths.map { |path|
|
100
|
+
path = path.to_s
|
101
|
+
path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
|
102
|
+
path,
|
103
|
+
:prefix => ".//",
|
104
|
+
:ns => ns
|
105
|
+
)
|
106
|
+
}.flatten.uniq) + [ns])
|
107
|
+
end
|
108
|
+
alias :/ :search
|
109
|
+
|
110
|
+
###
|
111
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
112
|
+
# queries. A hash of namespaces may be appended. For example:
|
113
|
+
#
|
114
|
+
# node.xpath('.//title')
|
115
|
+
# node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
|
116
|
+
# node.xpath('.//xmlns:name', node.root.namespaces)
|
117
|
+
#
|
118
|
+
# Custom XPath functions may also be defined. To define custom functions
|
119
|
+
# create a class and implement the # function you want to define.
|
120
|
+
# For example:
|
121
|
+
#
|
122
|
+
# node.xpath('.//title[regex(., "\w+")]', Class.new {
|
123
|
+
# def regex node_set, regex
|
124
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
125
|
+
# end
|
126
|
+
# }.new)
|
127
|
+
#
|
128
|
+
def xpath *paths
|
129
|
+
# Pop off our custom function handler if it exists
|
130
|
+
handler = ![
|
131
|
+
Hash, String, Symbol
|
132
|
+
].include?(paths.last.class) ? paths.pop : nil
|
133
|
+
|
134
|
+
ns = paths.last.is_a?(Hash) ? paths.pop :
|
135
|
+
(document.root ? document.root.namespaces : {})
|
136
|
+
|
137
|
+
return NodeSet.new(document) unless document
|
138
|
+
|
139
|
+
sets = paths.map { |path|
|
140
|
+
ctx = XPathContext.new(self)
|
141
|
+
ctx.register_namespaces(ns)
|
142
|
+
path = path.gsub(/\/xmlns:/,'/:') unless Nokogiri.uses_libxml?
|
143
|
+
set = ctx.evaluate(path, handler).node_set
|
144
|
+
set.document = document
|
145
|
+
document.decorate(set)
|
146
|
+
set
|
147
|
+
}
|
148
|
+
return sets.first if sets.length == 1
|
149
|
+
|
150
|
+
NodeSet.new(document) do |combined|
|
151
|
+
document.decorate(combined)
|
152
|
+
sets.each do |set|
|
153
|
+
set.each do |node|
|
154
|
+
combined << node
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
###
|
161
|
+
# Search this node for CSS +rules+. +rules+ must be one or more CSS
|
162
|
+
# selectors. For example:
|
163
|
+
#
|
164
|
+
# node.css('title')
|
165
|
+
# node.css('body h1.bold')
|
166
|
+
# node.css('div + p.green', 'div#one')
|
167
|
+
#
|
168
|
+
# Custom CSS pseudo classes may also be defined. To define custom pseudo
|
169
|
+
# classes, create a class and implement the custom pseudo class you
|
170
|
+
# want defined. The first argument to the method will be the current
|
171
|
+
# matching NodeSet. Any other arguments are ones that you pass in.
|
172
|
+
# For example:
|
173
|
+
#
|
174
|
+
# node.css('title:regex("\w+")', Class.new {
|
175
|
+
# def regex node_set, regex
|
176
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
177
|
+
# end
|
178
|
+
# }.new)
|
179
|
+
#
|
180
|
+
def css *rules
|
181
|
+
# Pop off our custom function handler if it exists
|
182
|
+
handler = ![
|
183
|
+
Hash, String, Symbol
|
184
|
+
].include?(rules.last.class) ? rules.pop : nil
|
185
|
+
|
186
|
+
ns = rules.last.is_a?(Hash) ? rules.pop :
|
187
|
+
(document.root ? document.root.namespaces : {})
|
188
|
+
|
189
|
+
rules = rules.map { |rule|
|
190
|
+
xpath_rule = CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
|
191
|
+
}.flatten.uniq + [ns, handler].compact
|
192
|
+
|
193
|
+
xpath(*rules)
|
194
|
+
end
|
195
|
+
|
196
|
+
###
|
197
|
+
# Search this node's immidiate children using CSS selector +selector+
|
198
|
+
def > selector
|
199
|
+
ns = document.root.namespaces
|
200
|
+
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
201
|
+
end
|
202
|
+
|
203
|
+
###
|
204
|
+
# Search for the first occurrence of +path+.
|
205
|
+
# Returns nil if nothing is found, otherwise a Node.
|
206
|
+
def at path, ns = document.root ? document.root.namespaces : {}
|
207
|
+
search(path, ns).first
|
208
|
+
end
|
209
|
+
alias :% :at
|
210
|
+
|
211
|
+
##
|
212
|
+
# Search this node for the first occurrence of XPath +paths+.
|
213
|
+
# Equivalent to <tt>xpath(paths).first</tt>
|
214
|
+
# See Node#xpath for more information.
|
215
|
+
#
|
216
|
+
def at_xpath *paths
|
217
|
+
xpath(*paths).first
|
218
|
+
end
|
219
|
+
|
220
|
+
##
|
221
|
+
# Search this node for the first occurrence of CSS +rules+.
|
222
|
+
# Equivalent to <tt>css(rules).first</tt>
|
223
|
+
# See Node#css for more information.
|
224
|
+
#
|
225
|
+
def at_css *rules
|
226
|
+
css(*rules).first
|
227
|
+
end
|
228
|
+
|
229
|
+
###
|
230
|
+
# Get the attribute value for the attribute +name+
|
231
|
+
def [] name
|
232
|
+
return nil unless key?(name.to_s)
|
233
|
+
get(name.to_s)
|
234
|
+
end
|
235
|
+
|
236
|
+
###
|
237
|
+
# Add +node+ as a child of this Node.
|
238
|
+
# The new node must be a Nokogiri::XML::Node or a non-empty String.
|
239
|
+
# Returns the new child node.
|
240
|
+
def add_child(node)
|
241
|
+
Node.verify_nodeishness(node)
|
242
|
+
if node.type == DOCUMENT_FRAG_NODE
|
243
|
+
node.children.each do |child|
|
244
|
+
add_child_node child
|
245
|
+
end
|
246
|
+
else
|
247
|
+
add_child_node node
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
###
|
252
|
+
# Insert +node+ before this Node (as a sibling).
|
253
|
+
def add_previous_sibling(node)
|
254
|
+
Node.verify_nodeishness(node)
|
255
|
+
if node.type == DOCUMENT_FRAG_NODE
|
256
|
+
node.children.each do |child|
|
257
|
+
add_previous_sibling_node child
|
258
|
+
end
|
259
|
+
else
|
260
|
+
add_previous_sibling_node node
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
###
|
265
|
+
# Insert +node+ after this Node (as a sibling).
|
266
|
+
def add_next_sibling(node)
|
267
|
+
Node.verify_nodeishness(node)
|
268
|
+
if node.type == DOCUMENT_FRAG_NODE
|
269
|
+
node.children.reverse.each do |child|
|
270
|
+
add_next_sibling_node child
|
271
|
+
end
|
272
|
+
else
|
273
|
+
add_next_sibling_node node
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
alias :next :next_sibling
|
278
|
+
alias :previous :previous_sibling
|
279
|
+
alias :remove :unlink
|
280
|
+
alias :get_attribute :[]
|
281
|
+
alias :attr :[]
|
282
|
+
alias :set_attribute :[]=
|
283
|
+
alias :text :content
|
284
|
+
alias :inner_text :content
|
285
|
+
alias :has_attribute? :key?
|
286
|
+
alias :<< :add_child
|
287
|
+
alias :name :node_name
|
288
|
+
alias :name= :node_name=
|
289
|
+
alias :type :node_type
|
290
|
+
alias :to_str :text
|
291
|
+
alias :clone :dup
|
292
|
+
|
293
|
+
####
|
294
|
+
# Returns a hash containing the node's attributes. The key is
|
295
|
+
# the attribute name, the value is a Nokogiri::XML::Attr
|
296
|
+
# representing the attribute.
|
297
|
+
def attributes
|
298
|
+
Hash[*(attribute_nodes.map { |node|
|
299
|
+
[node.node_name, node]
|
300
|
+
}.flatten)]
|
301
|
+
end
|
302
|
+
|
303
|
+
###
|
304
|
+
# Get the attribute values for this Node.
|
305
|
+
def values
|
306
|
+
attribute_nodes.map { |node| node.value }
|
307
|
+
end
|
308
|
+
|
309
|
+
###
|
310
|
+
# Get the attribute names for this Node.
|
311
|
+
def keys
|
312
|
+
attribute_nodes.map { |node| node.node_name }
|
313
|
+
end
|
314
|
+
|
315
|
+
###
|
316
|
+
# Iterate over each attribute name and value pair for this Node.
|
317
|
+
def each &block
|
318
|
+
attribute_nodes.each { |node|
|
319
|
+
block.call(node.node_name, node.value)
|
320
|
+
}
|
321
|
+
end
|
322
|
+
|
323
|
+
###
|
324
|
+
# Remove the attribute named +name+
|
325
|
+
def remove_attribute name
|
326
|
+
attributes[name].remove if key? name
|
327
|
+
end
|
328
|
+
alias :delete :remove_attribute
|
329
|
+
|
330
|
+
###
|
331
|
+
# Returns true if this Node matches +selector+
|
332
|
+
def matches? selector
|
333
|
+
ancestors.last.search(selector).include?(self)
|
334
|
+
end
|
335
|
+
|
336
|
+
####
|
337
|
+
# Create nodes from +data+ and insert them before this node
|
338
|
+
# (as a sibling).
|
339
|
+
def before data
|
340
|
+
fragment(data).children.each do |node|
|
341
|
+
add_previous_sibling node
|
342
|
+
end
|
343
|
+
self
|
344
|
+
end
|
345
|
+
|
346
|
+
####
|
347
|
+
# Create nodes from +data+ and insert them after this node
|
348
|
+
# (as a sibling).
|
349
|
+
def after data
|
350
|
+
fragment(data).children.to_a.reverse.each do |node|
|
351
|
+
add_next_sibling node
|
352
|
+
end
|
353
|
+
self
|
354
|
+
end
|
355
|
+
|
356
|
+
####
|
357
|
+
# Swap this Node for new nodes made from +data+
|
358
|
+
def swap data
|
359
|
+
before(data)
|
360
|
+
remove
|
361
|
+
self
|
362
|
+
end
|
363
|
+
|
364
|
+
####
|
365
|
+
# Set the inner_html for this Node to +tags+
|
366
|
+
def inner_html= tags
|
367
|
+
children.each { |x| x.remove}
|
368
|
+
|
369
|
+
fragment(tags).children.to_a.each do |node|
|
370
|
+
add_child node
|
371
|
+
end
|
372
|
+
self
|
373
|
+
end
|
374
|
+
|
375
|
+
def fragment tags # :nodoc:
|
376
|
+
# TODO: deprecate?
|
377
|
+
document.fragment(tags)
|
378
|
+
end
|
379
|
+
|
380
|
+
####
|
381
|
+
# Set the Node content to +string+. The content gets XML escaped.
|
382
|
+
def content= string
|
383
|
+
self.native_content = encode_special_chars(string.to_s)
|
384
|
+
end
|
385
|
+
|
386
|
+
###
|
387
|
+
# Set the parent Node for this Node
|
388
|
+
def parent= parent_node
|
389
|
+
parent_node.add_child(self)
|
390
|
+
parent_node
|
391
|
+
end
|
392
|
+
|
393
|
+
###
|
394
|
+
# Get a hash containing the Namespace definitions for this Node
|
395
|
+
def namespaces
|
396
|
+
Hash[*namespace_definitions.map { |nd|
|
397
|
+
key = ['xmlns', nd.prefix].compact.join(':')
|
398
|
+
if RUBY_VERSION >= '1.9' && document.encoding
|
399
|
+
begin
|
400
|
+
key.force_encoding document.encoding
|
401
|
+
rescue ArgumentError
|
402
|
+
end
|
403
|
+
end
|
404
|
+
[key, nd.href]
|
405
|
+
}.flatten]
|
406
|
+
end
|
407
|
+
|
408
|
+
# Returns true if this is a Comment
|
409
|
+
def comment?
|
410
|
+
type == COMMENT_NODE
|
411
|
+
end
|
412
|
+
|
413
|
+
# Returns true if this is a CDATA
|
414
|
+
def cdata?
|
415
|
+
type == CDATA_SECTION_NODE
|
416
|
+
end
|
417
|
+
|
418
|
+
# Returns true if this is an XML::Document node
|
419
|
+
def xml?
|
420
|
+
type == DOCUMENT_NODE
|
421
|
+
end
|
422
|
+
|
423
|
+
# Returns true if this is an HTML::Document node
|
424
|
+
def html?
|
425
|
+
type == HTML_DOCUMENT_NODE
|
426
|
+
end
|
427
|
+
|
428
|
+
# Returns true if this is a Text node
|
429
|
+
def text?
|
430
|
+
type == TEXT_NODE
|
431
|
+
end
|
432
|
+
|
433
|
+
###
|
434
|
+
# Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
|
435
|
+
# nil on XML documents and on unknown tags.
|
436
|
+
def description
|
437
|
+
return nil if document.xml?
|
438
|
+
Nokogiri::HTML::ElementDescription[name]
|
439
|
+
end
|
440
|
+
|
441
|
+
###
|
442
|
+
# Is this a read only node?
|
443
|
+
def read_only?
|
444
|
+
# According to gdome2, these are read-only node types
|
445
|
+
[NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
|
446
|
+
end
|
447
|
+
|
448
|
+
# Returns true if this is an Element node
|
449
|
+
def element?
|
450
|
+
type == ELEMENT_NODE
|
451
|
+
end
|
452
|
+
alias :elem? :element?
|
453
|
+
|
454
|
+
###
|
455
|
+
# Turn this node in to a string. If the document is HTML, this method
|
456
|
+
# returns html. If the document is XML, this method returns XML.
|
457
|
+
def to_s
|
458
|
+
document.xml? ? to_xml : to_html
|
459
|
+
end
|
460
|
+
|
461
|
+
# Get the inner_html for this node's Node#children
|
462
|
+
def inner_html *args
|
463
|
+
children.map { |x| x.to_html(*args) }.join
|
464
|
+
end
|
465
|
+
|
466
|
+
# Get the path to this node as a CSS expression
|
467
|
+
def css_path
|
468
|
+
path.split(/\//).map { |part|
|
469
|
+
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
|
470
|
+
}.compact.join(' > ')
|
471
|
+
end
|
472
|
+
|
473
|
+
###
|
474
|
+
# Get a list of ancestor Node for this Node. If +selector+ is given,
|
475
|
+
# the ancestors must match +selector+
|
476
|
+
def ancestors selector = nil
|
477
|
+
return NodeSet.new(document) unless respond_to?(:parent)
|
478
|
+
return NodeSet.new(document) unless parent
|
479
|
+
|
480
|
+
parents = [parent]
|
481
|
+
|
482
|
+
while parents.last.respond_to?(:parent)
|
483
|
+
break unless ctx_parent = parents.last.parent
|
484
|
+
parents << ctx_parent
|
485
|
+
end
|
486
|
+
|
487
|
+
return NodeSet.new(document, parents) unless selector
|
488
|
+
|
489
|
+
root = parents.last
|
490
|
+
|
491
|
+
NodeSet.new(document, parents.find_all { |parent|
|
492
|
+
root.search(selector).include?(parent)
|
493
|
+
})
|
494
|
+
end
|
495
|
+
|
496
|
+
###
|
497
|
+
# Set the default namespace for this node to +url+
|
498
|
+
def default_namespace= url
|
499
|
+
add_namespace_definition(nil, url)
|
500
|
+
end
|
501
|
+
alias :add_namespace :add_namespace_definition
|
502
|
+
|
503
|
+
###
|
504
|
+
# Set the namespace for this node to +ns+
|
505
|
+
def namespace= ns
|
506
|
+
if ns.document != document
|
507
|
+
raise ArgumentError, 'namespace must be declared on the same document'
|
508
|
+
end
|
509
|
+
unless ns.is_a? Nokogiri::XML::Namespace
|
510
|
+
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
|
511
|
+
end
|
512
|
+
set_namespace ns
|
513
|
+
end
|
514
|
+
|
515
|
+
####
|
516
|
+
# Yields self and all children to +block+ recursively.
|
517
|
+
def traverse &block
|
518
|
+
children.each{|j| j.traverse(&block) }
|
519
|
+
block.call(self)
|
520
|
+
end
|
521
|
+
|
522
|
+
###
|
523
|
+
# Accept a visitor. This method calls "visit" on +visitor+ with self.
|
524
|
+
def accept visitor
|
525
|
+
visitor.visit(self)
|
526
|
+
end
|
527
|
+
|
528
|
+
####
|
529
|
+
# +replace+ this Node with the +node+ in the Document.
|
530
|
+
# The new node must be a Nokogiri::XML::Node or a non-empty String.
|
531
|
+
# Returns the new child node.
|
532
|
+
def replace node
|
533
|
+
Node.verify_nodeishness(node)
|
534
|
+
if node.type == DOCUMENT_FRAG_NODE
|
535
|
+
node.children.each do |child|
|
536
|
+
add_previous_sibling child
|
537
|
+
end
|
538
|
+
unlink
|
539
|
+
else
|
540
|
+
replace_node node
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
###
|
545
|
+
# Test to see if this Node is equal to +other+
|
546
|
+
def == other
|
547
|
+
return false unless other
|
548
|
+
return false unless other.respond_to?(:pointer_id)
|
549
|
+
pointer_id == other.pointer_id
|
550
|
+
end
|
551
|
+
|
552
|
+
###
|
553
|
+
# Serialize Node using +options+. Save options can also be set using a
|
554
|
+
# block. See SaveOptions.
|
555
|
+
#
|
556
|
+
# These two statements are equivalent:
|
557
|
+
#
|
558
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
559
|
+
#
|
560
|
+
# or
|
561
|
+
#
|
562
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
563
|
+
# config.format.as_xml
|
564
|
+
# end
|
565
|
+
#
|
566
|
+
def serialize *args, &block
|
567
|
+
options = args.first.is_a?(Hash) ? args.shift : {
|
568
|
+
:encoding => args[0],
|
569
|
+
:save_with => args[1] || SaveOptions::FORMAT
|
570
|
+
}
|
571
|
+
|
572
|
+
encoding = options[:encoding] || document.encoding
|
573
|
+
|
574
|
+
outstring = ""
|
575
|
+
if encoding && outstring.respond_to?(:force_encoding)
|
576
|
+
outstring.force_encoding(Encoding.find(encoding))
|
577
|
+
end
|
578
|
+
io = StringIO.new(outstring)
|
579
|
+
write_to io, options, &block
|
580
|
+
io.string
|
581
|
+
end
|
582
|
+
|
583
|
+
###
|
584
|
+
# Serialize this Node to HTML
|
585
|
+
#
|
586
|
+
# doc.to_html
|
587
|
+
#
|
588
|
+
# See Node#write_to for a list of +options+. For formatted output,
|
589
|
+
# use Node#to_xhtml instead.
|
590
|
+
def to_html options = {}
|
591
|
+
# FIXME: this is a hack around broken libxml versions
|
592
|
+
return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
593
|
+
|
594
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
595
|
+
SaveOptions::NO_DECLARATION |
|
596
|
+
SaveOptions::NO_EMPTY_TAGS |
|
597
|
+
SaveOptions::AS_HTML
|
598
|
+
|
599
|
+
serialize(options)
|
600
|
+
end
|
601
|
+
|
602
|
+
###
|
603
|
+
# Serialize this Node to XML using +options+
|
604
|
+
#
|
605
|
+
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
|
606
|
+
#
|
607
|
+
# See Node#write_to for a list of +options+
|
608
|
+
def to_xml options = {}
|
609
|
+
encoding = nil
|
610
|
+
|
611
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
612
|
+
|
613
|
+
serialize(options)
|
614
|
+
end
|
615
|
+
|
616
|
+
###
|
617
|
+
# Serialize this Node to XHTML using +options+
|
618
|
+
#
|
619
|
+
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
|
620
|
+
#
|
621
|
+
# See Node#write_to for a list of +options+
|
622
|
+
def to_xhtml options = {}
|
623
|
+
# FIXME: this is a hack around broken libxml versions
|
624
|
+
return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
625
|
+
|
626
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
627
|
+
SaveOptions::NO_DECLARATION |
|
628
|
+
SaveOptions::NO_EMPTY_TAGS |
|
629
|
+
SaveOptions::AS_XHTML
|
630
|
+
|
631
|
+
serialize(options)
|
632
|
+
end
|
633
|
+
|
634
|
+
###
|
635
|
+
# Write Node to +io+ with +options+. +options+ modify the output of
|
636
|
+
# this method. Valid options are:
|
637
|
+
#
|
638
|
+
# * +:encoding+ for changing the encoding
|
639
|
+
# * +:indent_text+ the indentation text, defaults to one space
|
640
|
+
# * +:indent+ the number of +:indent_text+ to use, defaults to 2
|
641
|
+
# * +:save_with+ a combination of SaveOptions constants.
|
642
|
+
#
|
643
|
+
# To save with UTF-8 indented twice:
|
644
|
+
#
|
645
|
+
# node.write_to(io, :encoding => 'UTF-8', :indent => 2)
|
646
|
+
#
|
647
|
+
# To save indented with two dashes:
|
648
|
+
#
|
649
|
+
# node.write_to(io, :indent_text => '-', :indent => 2
|
650
|
+
#
|
651
|
+
def write_to io, *options
|
652
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
653
|
+
encoding = options[:encoding] || options[0]
|
654
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
655
|
+
indent_text = options[:indent_text] || ' '
|
656
|
+
indent_times = options[:indent] || 2
|
657
|
+
|
658
|
+
|
659
|
+
config = SaveOptions.new(save_options)
|
660
|
+
yield config if block_given?
|
661
|
+
|
662
|
+
native_write_to(io, encoding, indent_text * indent_times, config.options)
|
663
|
+
end
|
664
|
+
|
665
|
+
###
|
666
|
+
# Write Node as HTML to +io+ with +options+
|
667
|
+
#
|
668
|
+
# See Node#write_to for a list of +options+
|
669
|
+
def write_html_to io, options = {}
|
670
|
+
# FIXME: this is a hack around broken libxml versions
|
671
|
+
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
672
|
+
|
673
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
674
|
+
SaveOptions::NO_DECLARATION |
|
675
|
+
SaveOptions::NO_EMPTY_TAGS |
|
676
|
+
SaveOptions::AS_HTML
|
677
|
+
write_to io, options
|
678
|
+
end
|
679
|
+
|
680
|
+
###
|
681
|
+
# Write Node as XHTML to +io+ with +options+
|
682
|
+
#
|
683
|
+
# See Node#write_to for a list of +options+
|
684
|
+
def write_xhtml_to io, options = {}
|
685
|
+
# FIXME: this is a hack around broken libxml versions
|
686
|
+
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
687
|
+
|
688
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
689
|
+
SaveOptions::NO_DECLARATION |
|
690
|
+
SaveOptions::NO_EMPTY_TAGS |
|
691
|
+
SaveOptions::AS_XHTML
|
692
|
+
write_to io, options
|
693
|
+
end
|
694
|
+
|
695
|
+
###
|
696
|
+
# Write Node as XML to +io+ with +options+
|
697
|
+
#
|
698
|
+
# doc.write_xml_to io, :encoding => 'UTF-8'
|
699
|
+
#
|
700
|
+
# See Node#write_to for a list of options
|
701
|
+
def write_xml_to io, options = {}
|
702
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
703
|
+
write_to io, options
|
704
|
+
end
|
705
|
+
|
706
|
+
###
|
707
|
+
# Compare two Node objects with respect to their Document. Nodes from
|
708
|
+
# different documents cannot be compared.
|
709
|
+
def <=> other
|
710
|
+
return nil unless other.is_a?(Nokogiri::XML::Node)
|
711
|
+
return nil unless document == other.document
|
712
|
+
compare other
|
713
|
+
end
|
714
|
+
|
715
|
+
private
|
716
|
+
def self.verify_nodeishness(node)
|
717
|
+
if node.is_a?(Document) || !node.is_a?(XML::Node)
|
718
|
+
raise ArgumentError, <<-EOERR
|
719
|
+
Node.replace requires a Node argument, and cannot accept a Document.
|
720
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
721
|
+
EOERR
|
722
|
+
end
|
723
|
+
end
|
724
|
+
|
725
|
+
def inspect_attributes
|
726
|
+
[:name, :namespace, :attribute_nodes, :children]
|
727
|
+
end
|
728
|
+
end
|
729
|
+
end
|
730
|
+
end
|