nokogiri 1.11.0.rc1-x86-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1614 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +200 -0
  5. data/bin/nokogiri +118 -0
  6. data/dependencies.yml +74 -0
  7. data/ext/nokogiri/depend +358 -0
  8. data/ext/nokogiri/extconf.rb +695 -0
  9. data/ext/nokogiri/html_document.c +170 -0
  10. data/ext/nokogiri/html_document.h +10 -0
  11. data/ext/nokogiri/html_element_description.c +279 -0
  12. data/ext/nokogiri/html_element_description.h +10 -0
  13. data/ext/nokogiri/html_entity_lookup.c +32 -0
  14. data/ext/nokogiri/html_entity_lookup.h +8 -0
  15. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  16. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  17. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  18. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  19. data/ext/nokogiri/nokogiri.c +147 -0
  20. data/ext/nokogiri/nokogiri.h +122 -0
  21. data/ext/nokogiri/xml_attr.c +103 -0
  22. data/ext/nokogiri/xml_attr.h +9 -0
  23. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  24. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  25. data/ext/nokogiri/xml_cdata.c +62 -0
  26. data/ext/nokogiri/xml_cdata.h +9 -0
  27. data/ext/nokogiri/xml_comment.c +69 -0
  28. data/ext/nokogiri/xml_comment.h +9 -0
  29. data/ext/nokogiri/xml_document.c +617 -0
  30. data/ext/nokogiri/xml_document.h +23 -0
  31. data/ext/nokogiri/xml_document_fragment.c +48 -0
  32. data/ext/nokogiri/xml_document_fragment.h +10 -0
  33. data/ext/nokogiri/xml_dtd.c +202 -0
  34. data/ext/nokogiri/xml_dtd.h +10 -0
  35. data/ext/nokogiri/xml_element_content.c +123 -0
  36. data/ext/nokogiri/xml_element_content.h +10 -0
  37. data/ext/nokogiri/xml_element_decl.c +69 -0
  38. data/ext/nokogiri/xml_element_decl.h +9 -0
  39. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  40. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  41. data/ext/nokogiri/xml_entity_decl.c +110 -0
  42. data/ext/nokogiri/xml_entity_decl.h +10 -0
  43. data/ext/nokogiri/xml_entity_reference.c +52 -0
  44. data/ext/nokogiri/xml_entity_reference.h +9 -0
  45. data/ext/nokogiri/xml_io.c +61 -0
  46. data/ext/nokogiri/xml_io.h +11 -0
  47. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  48. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  49. data/ext/nokogiri/xml_namespace.c +111 -0
  50. data/ext/nokogiri/xml_namespace.h +14 -0
  51. data/ext/nokogiri/xml_node.c +1773 -0
  52. data/ext/nokogiri/xml_node.h +13 -0
  53. data/ext/nokogiri/xml_node_set.c +486 -0
  54. data/ext/nokogiri/xml_node_set.h +12 -0
  55. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  56. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  57. data/ext/nokogiri/xml_reader.c +668 -0
  58. data/ext/nokogiri/xml_reader.h +10 -0
  59. data/ext/nokogiri/xml_relax_ng.c +161 -0
  60. data/ext/nokogiri/xml_relax_ng.h +9 -0
  61. data/ext/nokogiri/xml_sax_parser.c +310 -0
  62. data/ext/nokogiri/xml_sax_parser.h +39 -0
  63. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  64. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  65. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  66. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  67. data/ext/nokogiri/xml_schema.c +205 -0
  68. data/ext/nokogiri/xml_schema.h +9 -0
  69. data/ext/nokogiri/xml_syntax_error.c +64 -0
  70. data/ext/nokogiri/xml_syntax_error.h +13 -0
  71. data/ext/nokogiri/xml_text.c +52 -0
  72. data/ext/nokogiri/xml_text.h +9 -0
  73. data/ext/nokogiri/xml_xpath_context.c +298 -0
  74. data/ext/nokogiri/xml_xpath_context.h +10 -0
  75. data/ext/nokogiri/xslt_stylesheet.c +266 -0
  76. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  77. data/lib/nokogiri.rb +127 -0
  78. data/lib/nokogiri/2.4/nokogiri.so +0 -0
  79. data/lib/nokogiri/2.5/nokogiri.so +0 -0
  80. data/lib/nokogiri/2.6/nokogiri.so +0 -0
  81. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  82. data/lib/nokogiri/css.rb +28 -0
  83. data/lib/nokogiri/css/node.rb +53 -0
  84. data/lib/nokogiri/css/parser.rb +751 -0
  85. data/lib/nokogiri/css/parser.y +272 -0
  86. data/lib/nokogiri/css/parser_extras.rb +92 -0
  87. data/lib/nokogiri/css/syntax_error.rb +8 -0
  88. data/lib/nokogiri/css/tokenizer.rb +154 -0
  89. data/lib/nokogiri/css/tokenizer.rex +55 -0
  90. data/lib/nokogiri/css/xpath_visitor.rb +232 -0
  91. data/lib/nokogiri/decorators/slop.rb +43 -0
  92. data/lib/nokogiri/html.rb +38 -0
  93. data/lib/nokogiri/html/builder.rb +36 -0
  94. data/lib/nokogiri/html/document.rb +336 -0
  95. data/lib/nokogiri/html/document_fragment.rb +50 -0
  96. data/lib/nokogiri/html/element_description.rb +24 -0
  97. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  98. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  99. data/lib/nokogiri/html/sax/parser.rb +63 -0
  100. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  101. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  102. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  103. data/lib/nokogiri/syntax_error.rb +5 -0
  104. data/lib/nokogiri/version.rb +149 -0
  105. data/lib/nokogiri/xml.rb +76 -0
  106. data/lib/nokogiri/xml/attr.rb +15 -0
  107. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  108. data/lib/nokogiri/xml/builder.rb +447 -0
  109. data/lib/nokogiri/xml/cdata.rb +12 -0
  110. data/lib/nokogiri/xml/character_data.rb +8 -0
  111. data/lib/nokogiri/xml/document.rb +280 -0
  112. data/lib/nokogiri/xml/document_fragment.rb +161 -0
  113. data/lib/nokogiri/xml/dtd.rb +33 -0
  114. data/lib/nokogiri/xml/element_content.rb +37 -0
  115. data/lib/nokogiri/xml/element_decl.rb +14 -0
  116. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  117. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  118. data/lib/nokogiri/xml/namespace.rb +14 -0
  119. data/lib/nokogiri/xml/node.rb +916 -0
  120. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  121. data/lib/nokogiri/xml/node_set.rb +372 -0
  122. data/lib/nokogiri/xml/notation.rb +7 -0
  123. data/lib/nokogiri/xml/parse_options.rb +121 -0
  124. data/lib/nokogiri/xml/pp.rb +3 -0
  125. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  126. data/lib/nokogiri/xml/pp/node.rb +57 -0
  127. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  128. data/lib/nokogiri/xml/reader.rb +116 -0
  129. data/lib/nokogiri/xml/relax_ng.rb +33 -0
  130. data/lib/nokogiri/xml/sax.rb +5 -0
  131. data/lib/nokogiri/xml/sax/document.rb +172 -0
  132. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  133. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  134. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  135. data/lib/nokogiri/xml/schema.rb +64 -0
  136. data/lib/nokogiri/xml/searchable.rb +231 -0
  137. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  138. data/lib/nokogiri/xml/text.rb +10 -0
  139. data/lib/nokogiri/xml/xpath.rb +11 -0
  140. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  141. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  142. data/lib/nokogiri/xslt.rb +57 -0
  143. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  144. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  145. metadata +482 -0
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class DTD < Nokogiri::XML::Node
5
+ undef_method :attribute_nodes
6
+ undef_method :values
7
+ undef_method :content
8
+ undef_method :namespace
9
+ undef_method :namespace_definitions
10
+ undef_method :line if method_defined?(:line)
11
+
12
+ def keys
13
+ attributes.keys
14
+ end
15
+
16
+ def each
17
+ attributes.each do |key, value|
18
+ yield([key, value])
19
+ end
20
+ end
21
+
22
+ def html_dtd?
23
+ name.casecmp('html').zero?
24
+ end
25
+
26
+ def html5_dtd?
27
+ html_dtd? &&
28
+ external_id.nil? &&
29
+ (system_id.nil? || system_id == 'about:legacy-compat')
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ ###
5
+ # Represents the allowed content in an Element Declaration inside a DTD:
6
+ #
7
+ # <?xml version="1.0"?><?TEST-STYLE PIDATA?>
8
+ # <!DOCTYPE staff SYSTEM "staff.dtd" [
9
+ # <!ELEMENT div1 (head, (p | list | note)*, div2*)>
10
+ # ]>
11
+ # </root>
12
+ #
13
+ # ElementContent represents the tree inside the <!ELEMENT> tag shown above
14
+ # that lists the possible content for the div1 tag.
15
+ class ElementContent
16
+ # Possible definitions of type
17
+ PCDATA = 1
18
+ ELEMENT = 2
19
+ SEQ = 3
20
+ OR = 4
21
+
22
+ # Possible content occurrences
23
+ ONCE = 1
24
+ OPT = 2
25
+ MULT = 3
26
+ PLUS = 4
27
+
28
+ attr_reader :document
29
+
30
+ ###
31
+ # Get the children of this ElementContent node
32
+ def children
33
+ [c1, c2].compact
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class ElementDecl < Nokogiri::XML::Node
5
+ undef_method :namespace
6
+ undef_method :namespace_definitions
7
+ undef_method :line if method_defined?(:line)
8
+
9
+ def inspect
10
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class EntityDecl < Nokogiri::XML::Node
5
+ undef_method :attribute_nodes
6
+ undef_method :attributes
7
+ undef_method :namespace
8
+ undef_method :namespace_definitions
9
+ undef_method :line if method_defined?(:line)
10
+
11
+ def self.new name, doc, *args
12
+ doc.create_entity(name, *args)
13
+ end
14
+
15
+ def inspect
16
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class EntityReference < Nokogiri::XML::Node
5
+ def children
6
+ # libxml2 will create a malformed child node for predefined
7
+ # entities. because any use of that child is likely to cause a
8
+ # segfault, we shall pretend that it doesn't exist.
9
+ #
10
+ # see https://github.com/sparklemotion/nokogiri/issues/1238 for details
11
+ NodeSet.new(document)
12
+ end
13
+
14
+ def inspect_attributes
15
+ [:name]
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class Namespace
5
+ include Nokogiri::XML::PP::Node
6
+ attr_reader :document
7
+
8
+ private
9
+ def inspect_attributes
10
+ [:prefix, :href]
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,916 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+ require 'stringio'
4
+ require 'nokogiri/xml/node/save_options'
5
+
6
+ module Nokogiri
7
+ module XML
8
+ ####
9
+ # Nokogiri::XML::Node is your window to the fun filled world of dealing
10
+ # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
11
+ # to a hash with regard to attributes. For example (from irb):
12
+ #
13
+ # irb(main):004:0> node
14
+ # => <a href="#foo" id="link">link</a>
15
+ # irb(main):005:0> node['href']
16
+ # => "#foo"
17
+ # irb(main):006:0> node.keys
18
+ # => ["href", "id"]
19
+ # irb(main):007:0> node.values
20
+ # => ["#foo", "link"]
21
+ # irb(main):008:0> node['class'] = 'green'
22
+ # => "green"
23
+ # irb(main):009:0> node
24
+ # => <a href="#foo" id="link" class="green">link</a>
25
+ # irb(main):010:0>
26
+ #
27
+ # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
28
+ #
29
+ # Nokogiri::XML::Node also has methods that let you move around your
30
+ # tree. For navigating your tree, see:
31
+ #
32
+ # * Nokogiri::XML::Node#parent
33
+ # * Nokogiri::XML::Node#children
34
+ # * Nokogiri::XML::Node#next
35
+ # * Nokogiri::XML::Node#previous
36
+ #
37
+ #
38
+ # When printing or otherwise emitting a document or a node (and
39
+ # its subtree), there are a few methods you might want to use:
40
+ #
41
+ # * content, text, inner_text, to_str: emit plaintext
42
+ #
43
+ # These methods will all emit the plaintext version of your
44
+ # document, meaning that entities will be replaced (e.g., "&lt;"
45
+ # will be replaced with "<"), meaning that any sanitizing will
46
+ # likely be un-done in the output.
47
+ #
48
+ # * to_s, to_xml, to_html, inner_html: emit well-formed markup
49
+ #
50
+ # These methods will all emit properly-escaped markup, meaning
51
+ # that it's suitable for consumption by browsers, parsers, etc.
52
+ #
53
+ # You may search this node's subtree using Searchable#xpath and Searchable#css
54
+ class Node
55
+ include Nokogiri::XML::PP::Node
56
+ include Nokogiri::XML::Searchable
57
+ include Enumerable
58
+
59
+ # Element node type, see Nokogiri::XML::Node#element?
60
+ ELEMENT_NODE = 1
61
+ # Attribute node type
62
+ ATTRIBUTE_NODE = 2
63
+ # Text node type, see Nokogiri::XML::Node#text?
64
+ TEXT_NODE = 3
65
+ # CDATA node type, see Nokogiri::XML::Node#cdata?
66
+ CDATA_SECTION_NODE = 4
67
+ # Entity reference node type
68
+ ENTITY_REF_NODE = 5
69
+ # Entity node type
70
+ ENTITY_NODE = 6
71
+ # PI node type
72
+ PI_NODE = 7
73
+ # Comment node type, see Nokogiri::XML::Node#comment?
74
+ COMMENT_NODE = 8
75
+ # Document node type, see Nokogiri::XML::Node#xml?
76
+ DOCUMENT_NODE = 9
77
+ # Document type node type
78
+ DOCUMENT_TYPE_NODE = 10
79
+ # Document fragment node type
80
+ DOCUMENT_FRAG_NODE = 11
81
+ # Notation node type
82
+ NOTATION_NODE = 12
83
+ # HTML document node type, see Nokogiri::XML::Node#html?
84
+ HTML_DOCUMENT_NODE = 13
85
+ # DTD node type
86
+ DTD_NODE = 14
87
+ # Element declaration type
88
+ ELEMENT_DECL = 15
89
+ # Attribute declaration type
90
+ ATTRIBUTE_DECL = 16
91
+ # Entity declaration type
92
+ ENTITY_DECL = 17
93
+ # Namespace declaration type
94
+ NAMESPACE_DECL = 18
95
+ # XInclude start type
96
+ XINCLUDE_START = 19
97
+ # XInclude end type
98
+ XINCLUDE_END = 20
99
+ # DOCB document node type
100
+ DOCB_DOCUMENT_NODE = 21
101
+
102
+ def initialize name, document # :nodoc:
103
+ # ... Ya. This is empty on purpose.
104
+ end
105
+
106
+ ###
107
+ # Decorate this node with the decorators set up in this node's Document
108
+ def decorate!
109
+ document.decorate(self)
110
+ end
111
+
112
+ ###
113
+ # Search this node's immediate children using CSS selector +selector+
114
+ def > selector
115
+ ns = document.root.namespaces
116
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
117
+ end
118
+
119
+ ###
120
+ # Get the attribute value for the attribute +name+
121
+ def [] name
122
+ get(name.to_s)
123
+ end
124
+
125
+ ###
126
+ # Set the attribute value for the attribute +name+ to +value+
127
+ def []= name, value
128
+ set name.to_s, value.to_s
129
+ end
130
+
131
+ ###
132
+ # Add +node_or_tags+ as a child of this Node.
133
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
134
+ #
135
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
136
+ #
137
+ # Also see related method +<<+.
138
+ def add_child node_or_tags
139
+ node_or_tags = coerce(node_or_tags)
140
+ if node_or_tags.is_a?(XML::NodeSet)
141
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
142
+ else
143
+ add_child_node_and_reparent_attrs node_or_tags
144
+ end
145
+ node_or_tags
146
+ end
147
+
148
+ ###
149
+ # Add +node_or_tags+ as the first child of this Node.
150
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
151
+ #
152
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
153
+ #
154
+ # Also see related method +add_child+.
155
+ def prepend_child node_or_tags
156
+ if first = children.first
157
+ # Mimic the error add_child would raise.
158
+ raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
159
+ first.__send__(:add_sibling, :previous, node_or_tags)
160
+ else
161
+ add_child(node_or_tags)
162
+ end
163
+ end
164
+
165
+
166
+ ###
167
+ # Add html around this node
168
+ #
169
+ # Returns self
170
+ def wrap(html)
171
+ new_parent = document.parse(html).first
172
+ add_next_sibling(new_parent)
173
+ new_parent.add_child(self)
174
+ self
175
+ end
176
+
177
+ ###
178
+ # Add +node_or_tags+ as a child of this Node.
179
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
180
+ #
181
+ # Returns self, to support chaining of calls (e.g., root << child1 << child2)
182
+ #
183
+ # Also see related method +add_child+.
184
+ def << node_or_tags
185
+ add_child node_or_tags
186
+ self
187
+ end
188
+
189
+ ###
190
+ # Insert +node_or_tags+ before this Node (as a sibling).
191
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
192
+ #
193
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
194
+ #
195
+ # Also see related method +before+.
196
+ def add_previous_sibling node_or_tags
197
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
198
+
199
+ add_sibling :previous, node_or_tags
200
+ end
201
+
202
+ ###
203
+ # Insert +node_or_tags+ after this Node (as a sibling).
204
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
205
+ #
206
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
207
+ #
208
+ # Also see related method +after+.
209
+ def add_next_sibling node_or_tags
210
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
211
+
212
+ add_sibling :next, node_or_tags
213
+ end
214
+
215
+ ####
216
+ # Insert +node_or_tags+ before this node (as a sibling).
217
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
218
+ #
219
+ # Returns self, to support chaining of calls.
220
+ #
221
+ # Also see related method +add_previous_sibling+.
222
+ def before node_or_tags
223
+ add_previous_sibling node_or_tags
224
+ self
225
+ end
226
+
227
+ ####
228
+ # Insert +node_or_tags+ after this node (as a sibling).
229
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
230
+ #
231
+ # Returns self, to support chaining of calls.
232
+ #
233
+ # Also see related method +add_next_sibling+.
234
+ def after node_or_tags
235
+ add_next_sibling node_or_tags
236
+ self
237
+ end
238
+
239
+ ####
240
+ # Set the inner html for this Node to +node_or_tags+
241
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
242
+ #
243
+ # Returns self.
244
+ #
245
+ # Also see related method +children=+
246
+ def inner_html= node_or_tags
247
+ self.children = node_or_tags
248
+ self
249
+ end
250
+
251
+ ####
252
+ # Set the inner html for this Node +node_or_tags+
253
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
254
+ #
255
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
256
+ #
257
+ # Also see related method +inner_html=+
258
+ def children= node_or_tags
259
+ node_or_tags = coerce(node_or_tags)
260
+ children.unlink
261
+ if node_or_tags.is_a?(XML::NodeSet)
262
+ node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
263
+ else
264
+ add_child_node_and_reparent_attrs node_or_tags
265
+ end
266
+ node_or_tags
267
+ end
268
+
269
+ ####
270
+ # Replace this Node with +node_or_tags+.
271
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
272
+ #
273
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
274
+ #
275
+ # Also see related method +swap+.
276
+ def replace node_or_tags
277
+ # We cannot replace a text node directly, otherwise libxml will return
278
+ # an internal error at parser.c:13031, I don't know exactly why
279
+ # libxml is trying to find a parent node that is an element or document
280
+ # so I can't tell if this is bug in libxml or not. issue #775.
281
+ if text?
282
+ replacee = Nokogiri::XML::Node.new 'dummy', document
283
+ add_previous_sibling_node replacee
284
+ unlink
285
+ return replacee.replace node_or_tags
286
+ end
287
+
288
+ node_or_tags = coerce(node_or_tags)
289
+
290
+ if node_or_tags.is_a?(XML::NodeSet)
291
+ node_or_tags.each { |n| add_previous_sibling n }
292
+ unlink
293
+ else
294
+ replace_node node_or_tags
295
+ end
296
+ node_or_tags
297
+ end
298
+
299
+ ####
300
+ # Swap this Node for +node_or_tags+
301
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
302
+ #
303
+ # Returns self, to support chaining of calls.
304
+ #
305
+ # Also see related method +replace+.
306
+ def swap node_or_tags
307
+ replace node_or_tags
308
+ self
309
+ end
310
+
311
+ alias :next :next_sibling
312
+ alias :previous :previous_sibling
313
+
314
+ # :stopdoc:
315
+ # HACK: This is to work around an RDoc bug
316
+ alias :next= :add_next_sibling
317
+ # :startdoc:
318
+
319
+ alias :previous= :add_previous_sibling
320
+ alias :remove :unlink
321
+ alias :get_attribute :[]
322
+ alias :attr :[]
323
+ alias :set_attribute :[]=
324
+ alias :text :content
325
+ alias :inner_text :content
326
+ alias :has_attribute? :key?
327
+ alias :name :node_name
328
+ alias :name= :node_name=
329
+ alias :type :node_type
330
+ alias :to_str :text
331
+ alias :clone :dup
332
+ alias :elements :element_children
333
+
334
+ ####
335
+ # Returns a hash containing the node's attributes. The key is
336
+ # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
337
+ # representing the attribute.
338
+ # If you need to distinguish attributes with the same name, with different namespaces
339
+ # use #attribute_nodes instead.
340
+ def attributes
341
+ attribute_nodes.each_with_object({}) do |node, hash|
342
+ hash[node.node_name] = node
343
+ end
344
+ end
345
+
346
+ ###
347
+ # Get the attribute values for this Node.
348
+ def values
349
+ attribute_nodes.map(&:value)
350
+ end
351
+
352
+ ###
353
+ # Does this Node's attributes include <value>
354
+ def value?(value)
355
+ values.include? value
356
+ end
357
+
358
+ ###
359
+ # Get the attribute names for this Node.
360
+ def keys
361
+ attribute_nodes.map(&:node_name)
362
+ end
363
+
364
+ ###
365
+ # Iterate over each attribute name and value pair for this Node.
366
+ def each
367
+ attribute_nodes.each { |node|
368
+ yield [node.node_name, node.value]
369
+ }
370
+ end
371
+
372
+ ###
373
+ # Get the list of class names of this Node, without
374
+ # deduplication or sorting.
375
+ def classes
376
+ self['class'].to_s.scan(/\S+/)
377
+ end
378
+
379
+ ###
380
+ # Add +name+ to the "class" attribute value of this Node and
381
+ # return self. If the value is already in the current value, it
382
+ # is not added. If no "class" attribute exists yet, one is
383
+ # created with the given value.
384
+ #
385
+ # More than one class may be added at a time, separated by a
386
+ # space.
387
+ def add_class name
388
+ names = classes
389
+ self['class'] = (names + (name.scan(/\S+/) - names)).join(' ')
390
+ self
391
+ end
392
+
393
+ ###
394
+ # Append +name+ to the "class" attribute value of this Node and
395
+ # return self. The value is simply appended without checking if
396
+ # it is already in the current value. If no "class" attribute
397
+ # exists yet, one is created with the given value.
398
+ #
399
+ # More than one class may be appended at a time, separated by a
400
+ # space.
401
+ def append_class name
402
+ self['class'] = (classes + name.scan(/\S+/)).join(' ')
403
+ self
404
+ end
405
+
406
+ ###
407
+ # Remove +name+ from the "class" attribute value of this Node
408
+ # and return self. If there are many occurrences of the name,
409
+ # they are all removed.
410
+ #
411
+ # More than one class may be removed at a time, separated by a
412
+ # space.
413
+ #
414
+ # If no class name is left after removal, or when +name+ is nil,
415
+ # the "class" attribute is removed from this Node.
416
+ def remove_class name = nil
417
+ if name
418
+ names = classes - name.scan(/\S+/)
419
+ if names.empty?
420
+ delete 'class'
421
+ else
422
+ self['class'] = names.join(' ')
423
+ end
424
+ else
425
+ delete "class"
426
+ end
427
+ self
428
+ end
429
+
430
+ ###
431
+ # Remove the attribute named +name+
432
+ def remove_attribute name
433
+ attr = attributes[name].remove if key? name
434
+ clear_xpath_context if Nokogiri.jruby?
435
+ attr
436
+ end
437
+ alias :delete :remove_attribute
438
+
439
+ ###
440
+ # Returns true if this Node matches +selector+
441
+ def matches? selector
442
+ ancestors.last.search(selector).include?(self)
443
+ end
444
+
445
+ ###
446
+ # Create a DocumentFragment containing +tags+ that is relative to _this_
447
+ # context node.
448
+ def fragment tags
449
+ type = document.html? ? Nokogiri::HTML : Nokogiri::XML
450
+ type::DocumentFragment.new(document, tags, self)
451
+ end
452
+
453
+ ###
454
+ # Parse +string_or_io+ as a document fragment within the context of
455
+ # *this* node. Returns a XML::NodeSet containing the nodes parsed from
456
+ # +string_or_io+.
457
+ def parse string_or_io, options = nil
458
+ ##
459
+ # When the current node is unparented and not an element node, use the
460
+ # document as the parsing context instead. Otherwise, the in-context
461
+ # parser cannot find an element or a document node.
462
+ # Document Fragments are also not usable by the in-context parser.
463
+ if !element? && !document? && (!parent || parent.fragment?)
464
+ return document.parse(string_or_io, options)
465
+ end
466
+
467
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
468
+ if Integer === options
469
+ options = Nokogiri::XML::ParseOptions.new(options)
470
+ end
471
+ # Give the options to the user
472
+ yield options if block_given?
473
+
474
+ contents = string_or_io.respond_to?(:read) ?
475
+ string_or_io.read :
476
+ string_or_io
477
+
478
+ return Nokogiri::XML::NodeSet.new(document) if contents.empty?
479
+
480
+ ##
481
+ # This is a horrible hack, but I don't care. See #313 for background.
482
+ error_count = document.errors.length
483
+ node_set = in_context(contents, options.to_i)
484
+ if node_set.empty? and document.errors.length > error_count and options.recover?
485
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
486
+ node_set = fragment.children
487
+ end
488
+ node_set
489
+ end
490
+
491
+ ####
492
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
493
+ def content= string
494
+ self.native_content = encode_special_chars(string.to_s)
495
+ end
496
+
497
+ ###
498
+ # Set the parent Node for this Node
499
+ def parent= parent_node
500
+ parent_node.add_child(self)
501
+ parent_node
502
+ end
503
+
504
+ ###
505
+ # Returns a Hash of +{prefix => value}+ for all namespaces on this
506
+ # node and its ancestors.
507
+ #
508
+ # This method returns the same namespaces as #namespace_scopes.
509
+ #
510
+ # Returns namespaces in scope for self -- those defined on self
511
+ # element directly or any ancestor node -- as a Hash of
512
+ # attribute-name/value pairs. Note that the keys in this hash
513
+ # XML attributes that would be used to define this namespace,
514
+ # such as "xmlns:prefix", not just the prefix. Default namespace
515
+ # set on self will be included with key "xmlns". However,
516
+ # default namespaces set on ancestor will NOT be, even if self
517
+ # has no explicit default namespace.
518
+ def namespaces
519
+ namespace_scopes.each_with_object({}) do |ns, hash|
520
+ prefix = ns.prefix
521
+ key = prefix ? "xmlns:#{prefix}" : "xmlns"
522
+ hash[key] = ns.href
523
+ end
524
+ end
525
+
526
+ # Returns true if this is a Comment
527
+ def comment?
528
+ type == COMMENT_NODE
529
+ end
530
+
531
+ # Returns true if this is a CDATA
532
+ def cdata?
533
+ type == CDATA_SECTION_NODE
534
+ end
535
+
536
+ # Returns true if this is an XML::Document node
537
+ def xml?
538
+ type == DOCUMENT_NODE
539
+ end
540
+
541
+ # Returns true if this is an HTML::Document node
542
+ def html?
543
+ type == HTML_DOCUMENT_NODE
544
+ end
545
+
546
+ # Returns true if this is a Document
547
+ def document?
548
+ is_a? XML::Document
549
+ end
550
+
551
+ # Returns true if this is a ProcessingInstruction node
552
+ def processing_instruction?
553
+ type == PI_NODE
554
+ end
555
+
556
+ # Returns true if this is a Text node
557
+ def text?
558
+ type == TEXT_NODE
559
+ end
560
+
561
+ # Returns true if this is a DocumentFragment
562
+ def fragment?
563
+ type == DOCUMENT_FRAG_NODE
564
+ end
565
+
566
+ ###
567
+ # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
568
+ # nil on XML documents and on unknown tags.
569
+ def description
570
+ return nil if document.xml?
571
+ Nokogiri::HTML::ElementDescription[name]
572
+ end
573
+
574
+ ###
575
+ # Is this a read only node?
576
+ def read_only?
577
+ # According to gdome2, these are read-only node types
578
+ [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
579
+ end
580
+
581
+ # Returns true if this is an Element node
582
+ def element?
583
+ type == ELEMENT_NODE
584
+ end
585
+ alias :elem? :element?
586
+
587
+ ###
588
+ # Turn this node in to a string. If the document is HTML, this method
589
+ # returns html. If the document is XML, this method returns XML.
590
+ def to_s
591
+ document.xml? ? to_xml : to_html
592
+ end
593
+
594
+ # Get the inner_html for this node's Node#children
595
+ def inner_html *args
596
+ children.map { |x| x.to_html(*args) }.join
597
+ end
598
+
599
+ # Get the path to this node as a CSS expression
600
+ def css_path
601
+ path.split(/\//).map { |part|
602
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
603
+ }.compact.join(' > ')
604
+ end
605
+
606
+ ###
607
+ # Get a list of ancestor Node for this Node. If +selector+ is given,
608
+ # the ancestors must match +selector+
609
+ def ancestors selector = nil
610
+ return NodeSet.new(document) unless respond_to?(:parent)
611
+ return NodeSet.new(document) unless parent
612
+
613
+ parents = [parent]
614
+
615
+ while parents.last.respond_to?(:parent)
616
+ break unless ctx_parent = parents.last.parent
617
+ parents << ctx_parent
618
+ end
619
+
620
+ return NodeSet.new(document, parents) unless selector
621
+
622
+ root = parents.last
623
+ search_results = root.search(selector)
624
+
625
+ NodeSet.new(document, parents.find_all { |parent|
626
+ search_results.include?(parent)
627
+ })
628
+ end
629
+
630
+ ###
631
+ # Adds a default namespace supplied as a string +url+ href, to self.
632
+ # The consequence is as an xmlns attribute with supplied argument were
633
+ # present in parsed XML. A default namespace set with this method will
634
+ # now show up in #attributes, but when this node is serialized to XML an
635
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
636
+ def default_namespace= url
637
+ add_namespace_definition(nil, url)
638
+ end
639
+ alias :add_namespace :add_namespace_definition
640
+
641
+ ###
642
+ # Set the default namespace on this node (as would be defined with an
643
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
644
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
645
+ # for this node. You probably want #default_namespace= instead, or perhaps
646
+ # #add_namespace_definition with a nil prefix argument.
647
+ def namespace= ns
648
+ return set_namespace(ns) unless ns
649
+
650
+ unless Nokogiri::XML::Namespace === ns
651
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
652
+ end
653
+ if ns.document != document
654
+ raise ArgumentError, 'namespace must be declared on the same document'
655
+ end
656
+
657
+ set_namespace ns
658
+ end
659
+
660
+ ####
661
+ # Yields self and all children to +block+ recursively.
662
+ def traverse &block
663
+ children.each{|j| j.traverse(&block) }
664
+ block.call(self)
665
+ end
666
+
667
+ ###
668
+ # Accept a visitor. This method calls "visit" on +visitor+ with self.
669
+ def accept visitor
670
+ visitor.visit(self)
671
+ end
672
+
673
+ ###
674
+ # Test to see if this Node is equal to +other+
675
+ def == other
676
+ return false unless other
677
+ return false unless other.respond_to?(:pointer_id)
678
+ pointer_id == other.pointer_id
679
+ end
680
+
681
+ ###
682
+ # Serialize Node using +options+. Save options can also be set using a
683
+ # block. See SaveOptions.
684
+ #
685
+ # These two statements are equivalent:
686
+ #
687
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
688
+ #
689
+ # or
690
+ #
691
+ # node.serialize(:encoding => 'UTF-8') do |config|
692
+ # config.format.as_xml
693
+ # end
694
+ #
695
+ def serialize *args, &block
696
+ options = args.first.is_a?(Hash) ? args.shift : {
697
+ :encoding => args[0],
698
+ :save_with => args[1]
699
+ }
700
+
701
+ encoding = options[:encoding] || document.encoding
702
+ options[:encoding] = encoding
703
+
704
+ outstring = String.new
705
+ outstring.force_encoding(Encoding.find(encoding || 'utf-8'))
706
+ io = StringIO.new(outstring)
707
+ write_to io, options, &block
708
+ io.string
709
+ end
710
+
711
+ ###
712
+ # Serialize this Node to HTML
713
+ #
714
+ # doc.to_html
715
+ #
716
+ # See Node#write_to for a list of +options+. For formatted output,
717
+ # use Node#to_xhtml instead.
718
+ def to_html options = {}
719
+ to_format SaveOptions::DEFAULT_HTML, options
720
+ end
721
+
722
+ ###
723
+ # Serialize this Node to XML using +options+
724
+ #
725
+ # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
726
+ #
727
+ # See Node#write_to for a list of +options+
728
+ def to_xml options = {}
729
+ options[:save_with] ||= SaveOptions::DEFAULT_XML
730
+ serialize(options)
731
+ end
732
+
733
+ ###
734
+ # Serialize this Node to XHTML using +options+
735
+ #
736
+ # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
737
+ #
738
+ # See Node#write_to for a list of +options+
739
+ def to_xhtml options = {}
740
+ to_format SaveOptions::DEFAULT_XHTML, options
741
+ end
742
+
743
+ ###
744
+ # Write Node to +io+ with +options+. +options+ modify the output of
745
+ # this method. Valid options are:
746
+ #
747
+ # * +:encoding+ for changing the encoding
748
+ # * +:indent_text+ the indentation text, defaults to one space
749
+ # * +:indent+ the number of +:indent_text+ to use, defaults to 2
750
+ # * +:save_with+ a combination of SaveOptions constants.
751
+ #
752
+ # To save with UTF-8 indented twice:
753
+ #
754
+ # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
755
+ #
756
+ # To save indented with two dashes:
757
+ #
758
+ # node.write_to(io, :indent_text => '-', :indent => 2)
759
+ #
760
+ def write_to io, *options
761
+ options = options.first.is_a?(Hash) ? options.shift : {}
762
+ encoding = options[:encoding] || options[0]
763
+ if Nokogiri.jruby?
764
+ save_options = options[:save_with] || options[1]
765
+ indent_times = options[:indent] || 0
766
+ else
767
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
768
+ indent_times = options[:indent] || 2
769
+ end
770
+ indent_text = options[:indent_text] || ' '
771
+
772
+ # Any string times 0 returns an empty string. Therefore, use the same
773
+ # string instead of generating a new empty string for every node with
774
+ # zero indentation.
775
+ indentation = indent_times.zero? ? '' : (indent_text * indent_times)
776
+
777
+ config = SaveOptions.new(save_options.to_i)
778
+ yield config if block_given?
779
+
780
+ native_write_to(io, encoding, indentation, config.options)
781
+ end
782
+
783
+ ###
784
+ # Write Node as HTML to +io+ with +options+
785
+ #
786
+ # See Node#write_to for a list of +options+
787
+ def write_html_to io, options = {}
788
+ write_format_to SaveOptions::DEFAULT_HTML, io, options
789
+ end
790
+
791
+ ###
792
+ # Write Node as XHTML to +io+ with +options+
793
+ #
794
+ # See Node#write_to for a list of +options+
795
+ def write_xhtml_to io, options = {}
796
+ write_format_to SaveOptions::DEFAULT_XHTML, io, options
797
+ end
798
+
799
+ ###
800
+ # Write Node as XML to +io+ with +options+
801
+ #
802
+ # doc.write_xml_to io, :encoding => 'UTF-8'
803
+ #
804
+ # See Node#write_to for a list of options
805
+ def write_xml_to io, options = {}
806
+ options[:save_with] ||= SaveOptions::DEFAULT_XML
807
+ write_to io, options
808
+ end
809
+
810
+ ###
811
+ # Compare two Node objects with respect to their Document. Nodes from
812
+ # different documents cannot be compared.
813
+ def <=> other
814
+ return nil unless other.is_a?(Nokogiri::XML::Node)
815
+ return nil unless document == other.document
816
+ compare other
817
+ end
818
+
819
+ ###
820
+ # Do xinclude substitution on the subtree below node. If given a block, a
821
+ # Nokogiri::XML::ParseOptions object initialized from +options+, will be
822
+ # passed to it, allowing more convenient modification of the parser options.
823
+ def do_xinclude options = XML::ParseOptions::DEFAULT_XML
824
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
825
+
826
+ # give options to user
827
+ yield options if block_given?
828
+
829
+ # call c extension
830
+ process_xincludes(options.to_i)
831
+ end
832
+
833
+ def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
834
+ c14n_root = self
835
+ document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
836
+ tn = node.is_a?(XML::Node) ? node : parent
837
+ tn == c14n_root || tn.ancestors.include?(c14n_root)
838
+ end
839
+ end
840
+
841
+ private
842
+
843
+ def add_sibling next_or_previous, node_or_tags
844
+ impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
845
+ iter = (next_or_previous == :next) ? :reverse_each : :each
846
+
847
+ node_or_tags = coerce node_or_tags
848
+ if node_or_tags.is_a?(XML::NodeSet)
849
+ if text?
850
+ pivot = Nokogiri::XML::Node.new 'dummy', document
851
+ send impl, pivot
852
+ else
853
+ pivot = self
854
+ end
855
+ node_or_tags.send(iter) { |n| pivot.send impl, n }
856
+ pivot.unlink if text?
857
+ else
858
+ send impl, node_or_tags
859
+ end
860
+ node_or_tags
861
+ end
862
+
863
+ USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
864
+ private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
865
+
866
+ def to_format save_option, options
867
+ return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
868
+
869
+ options[:save_with] = save_option unless options[:save_with]
870
+ serialize(options)
871
+ end
872
+
873
+ def write_format_to save_option, io, options
874
+ return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
875
+
876
+ options[:save_with] ||= save_option
877
+ write_to io, options
878
+ end
879
+
880
+ def inspect_attributes
881
+ [:name, :namespace, :attribute_nodes, :children]
882
+ end
883
+
884
+ def coerce data # :nodoc:
885
+ case data
886
+ when XML::NodeSet
887
+ return data
888
+ when XML::DocumentFragment
889
+ return data.children
890
+ when String
891
+ return fragment(data).children
892
+ when Document, XML::Attr
893
+ # unacceptable
894
+ when XML::Node
895
+ return data
896
+ end
897
+
898
+ raise ArgumentError, <<-EOERR
899
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
900
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
901
+ EOERR
902
+ end
903
+
904
+ # @private
905
+ IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
906
+
907
+ def add_child_node_and_reparent_attrs node # :nodoc:
908
+ add_child_node node
909
+ node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
910
+ attr_node.remove
911
+ node[attr_node.name] = attr_node.value
912
+ end
913
+ end
914
+ end
915
+ end
916
+ end