nokogiri 1.10.9 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +12 -12
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +716 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +191 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +60 -51
  32. data/ext/nokogiri/xml_node.c +493 -407
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +96 -46
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +34 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri/css/node.rb +1 -0
  91. data/lib/nokogiri/css/parser.rb +64 -63
  92. data/lib/nokogiri/css/parser.y +3 -3
  93. data/lib/nokogiri/css/parser_extras.rb +39 -36
  94. data/lib/nokogiri/css/syntax_error.rb +2 -1
  95. data/lib/nokogiri/css/tokenizer.rb +1 -0
  96. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  97. data/lib/nokogiri/css.rb +15 -14
  98. data/lib/nokogiri/decorators/slop.rb +1 -0
  99. data/lib/nokogiri/extension.rb +31 -0
  100. data/lib/nokogiri/gumbo.rb +14 -0
  101. data/lib/nokogiri/html.rb +32 -27
  102. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  103. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  104. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  105. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  106. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  107. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  109. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  110. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  111. data/lib/nokogiri/html4.rb +40 -0
  112. data/lib/nokogiri/html5/document.rb +74 -0
  113. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  114. data/lib/nokogiri/html5/node.rb +93 -0
  115. data/lib/nokogiri/html5.rb +473 -0
  116. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  117. data/lib/nokogiri/syntax_error.rb +1 -0
  118. data/lib/nokogiri/version/constant.rb +5 -0
  119. data/lib/nokogiri/version/info.rb +215 -0
  120. data/lib/nokogiri/version.rb +3 -109
  121. data/lib/nokogiri/xml/attr.rb +1 -0
  122. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  123. data/lib/nokogiri/xml/builder.rb +41 -2
  124. data/lib/nokogiri/xml/cdata.rb +1 -0
  125. data/lib/nokogiri/xml/character_data.rb +1 -0
  126. data/lib/nokogiri/xml/document.rb +138 -41
  127. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  128. data/lib/nokogiri/xml/dtd.rb +1 -0
  129. data/lib/nokogiri/xml/element_content.rb +1 -0
  130. data/lib/nokogiri/xml/element_decl.rb +1 -0
  131. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  132. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  133. data/lib/nokogiri/xml/namespace.rb +1 -0
  134. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  135. data/lib/nokogiri/xml/node.rb +629 -293
  136. data/lib/nokogiri/xml/node_set.rb +1 -0
  137. data/lib/nokogiri/xml/notation.rb +1 -0
  138. data/lib/nokogiri/xml/parse_options.rb +12 -3
  139. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  140. data/lib/nokogiri/xml/pp/node.rb +1 -0
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  143. data/lib/nokogiri/xml/reader.rb +9 -12
  144. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  145. data/lib/nokogiri/xml/sax/document.rb +25 -30
  146. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  147. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  148. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  149. data/lib/nokogiri/xml/sax.rb +5 -4
  150. data/lib/nokogiri/xml/schema.rb +13 -4
  151. data/lib/nokogiri/xml/searchable.rb +25 -16
  152. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  153. data/lib/nokogiri/xml/text.rb +1 -0
  154. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  155. data/lib/nokogiri/xml/xpath.rb +4 -5
  156. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  157. data/lib/nokogiri/xml.rb +36 -36
  158. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri.rb +32 -51
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. metadata +139 -161
  173. data/ext/nokogiri/html_document.c +0 -170
  174. data/ext/nokogiri/html_document.h +0 -10
  175. data/ext/nokogiri/html_element_description.c +0 -279
  176. data/ext/nokogiri/html_element_description.h +0 -10
  177. data/ext/nokogiri/html_entity_lookup.c +0 -32
  178. data/ext/nokogiri/html_entity_lookup.h +0 -8
  179. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  180. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  181. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  182. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_attr.h +0 -9
  184. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  185. data/ext/nokogiri/xml_cdata.h +0 -9
  186. data/ext/nokogiri/xml_comment.h +0 -9
  187. data/ext/nokogiri/xml_document.h +0 -23
  188. data/ext/nokogiri/xml_document_fragment.h +0 -10
  189. data/ext/nokogiri/xml_dtd.h +0 -10
  190. data/ext/nokogiri/xml_element_content.h +0 -10
  191. data/ext/nokogiri/xml_element_decl.h +0 -9
  192. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  193. data/ext/nokogiri/xml_entity_decl.h +0 -10
  194. data/ext/nokogiri/xml_entity_reference.h +0 -9
  195. data/ext/nokogiri/xml_io.c +0 -61
  196. data/ext/nokogiri/xml_io.h +0 -11
  197. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  198. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  199. data/ext/nokogiri/xml_namespace.h +0 -14
  200. data/ext/nokogiri/xml_node.h +0 -13
  201. data/ext/nokogiri/xml_node_set.h +0 -12
  202. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  203. data/ext/nokogiri/xml_reader.h +0 -10
  204. data/ext/nokogiri/xml_relax_ng.h +0 -9
  205. data/ext/nokogiri/xml_sax_parser.h +0 -39
  206. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  207. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  208. data/ext/nokogiri/xml_schema.h +0 -9
  209. data/ext/nokogiri/xml_syntax_error.h +0 -13
  210. data/ext/nokogiri/xml_text.h +0 -9
  211. data/ext/nokogiri/xml_xpath_context.h +0 -10
  212. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  213. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  214. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  215. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  216. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+ require "singleton"
3
+ require "shellwords"
4
+
5
+ module Nokogiri
6
+ class VersionInfo # :nodoc:
7
+ include Singleton
8
+
9
+ def jruby?
10
+ ::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
11
+ end
12
+
13
+ def windows?
14
+ ::RUBY_PLATFORM =~ /mingw|mswin/
15
+ end
16
+
17
+ def ruby_minor
18
+ Gem::Version.new(::RUBY_VERSION).segments[0..1].join(".")
19
+ end
20
+
21
+ def engine
22
+ defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
23
+ end
24
+
25
+ def loaded_libxml_version
26
+ Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
27
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
28
+ .collect(&:to_i)
29
+ .join("."))
30
+ end
31
+
32
+ def compiled_libxml_version
33
+ Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
34
+ end
35
+
36
+ def loaded_libxslt_version
37
+ Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
38
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
39
+ .collect(&:to_i)
40
+ .join("."))
41
+ end
42
+
43
+ def compiled_libxslt_version
44
+ Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
45
+ end
46
+
47
+ def libxml2?
48
+ defined?(Nokogiri::LIBXML_COMPILED_VERSION)
49
+ end
50
+
51
+ def libxml2_has_iconv?
52
+ defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
53
+ end
54
+
55
+ def libxslt_has_datetime?
56
+ defined?(Nokogiri::LIBXSLT_DATETIME_ENABLED) && Nokogiri::LIBXSLT_DATETIME_ENABLED
57
+ end
58
+
59
+ def libxml2_using_packaged?
60
+ libxml2? && Nokogiri::PACKAGED_LIBRARIES
61
+ end
62
+
63
+ def libxml2_using_system?
64
+ libxml2? && !libxml2_using_packaged?
65
+ end
66
+
67
+ def libxml2_precompiled?
68
+ libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
69
+ end
70
+
71
+ def warnings
72
+ warnings = []
73
+
74
+ if libxml2?
75
+ if compiled_libxml_version != loaded_libxml_version
76
+ warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
77
+ end
78
+
79
+ if compiled_libxslt_version != loaded_libxslt_version
80
+ warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
81
+ end
82
+ end
83
+
84
+ warnings
85
+ end
86
+
87
+ def to_hash
88
+ header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
89
+
90
+ {}.tap do |vi|
91
+ vi["warnings"] = []
92
+ vi["nokogiri"] = {}.tap do |nokogiri|
93
+ nokogiri["version"] = Nokogiri::VERSION
94
+
95
+ unless jruby?
96
+ # enable gems like nokogumbo to build with the following in their extconf.rb:
97
+ #
98
+ # append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
99
+ # append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
100
+ #
101
+ cppflags = ["-I#{header_directory.shellescape}"]
102
+ ldflags = []
103
+
104
+ if libxml2_using_packaged?
105
+ cppflags << "-I#{File.join(header_directory, 'include').shellescape}"
106
+ cppflags << "-I#{File.join(header_directory, 'include/libxml2').shellescape}"
107
+
108
+ if windows?
109
+ # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
110
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
111
+ unless File.exist?(lib_directory)
112
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
113
+ end
114
+ ldflags << "-L#{lib_directory.shellescape}"
115
+ ldflags << "-l:nokogiri.so"
116
+ end
117
+ end
118
+
119
+ nokogiri["cppflags"] = cppflags
120
+ nokogiri["ldflags"] = ldflags
121
+ end
122
+ end
123
+ vi["ruby"] = {}.tap do |ruby|
124
+ ruby["version"] = ::RUBY_VERSION
125
+ ruby["platform"] = ::RUBY_PLATFORM
126
+ ruby["gem_platform"] = ::Gem::Platform.local.to_s
127
+ ruby["description"] = ::RUBY_DESCRIPTION
128
+ ruby["engine"] = engine
129
+ ruby["jruby"] = jruby? if jruby?
130
+ end
131
+
132
+ if libxml2?
133
+ vi["libxml"] = {}.tap do |libxml|
134
+ if libxml2_using_packaged?
135
+ libxml["source"] = "packaged"
136
+ libxml["precompiled"] = libxml2_precompiled?
137
+ libxml["patches"] = Nokogiri::LIBXML2_PATCHES
138
+
139
+ # this is for nokogumbo and shouldn't be forever
140
+ libxml["libxml2_path"] = header_directory
141
+ else
142
+ libxml["source"] = "system"
143
+ end
144
+ libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT
145
+ libxml["iconv_enabled"] = libxml2_has_iconv?
146
+ libxml["compiled"] = compiled_libxml_version.to_s
147
+ libxml["loaded"] = loaded_libxml_version.to_s
148
+ end
149
+
150
+ vi["libxslt"] = {}.tap do |libxslt|
151
+ if libxml2_using_packaged?
152
+ libxslt["source"] = "packaged"
153
+ libxslt["precompiled"] = libxml2_precompiled?
154
+ libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
155
+ else
156
+ libxslt["source"] = "system"
157
+ end
158
+ libxslt["datetime_enabled"] = libxslt_has_datetime?
159
+ libxslt["compiled"] = compiled_libxslt_version.to_s
160
+ libxslt["loaded"] = loaded_libxslt_version.to_s
161
+ end
162
+
163
+ vi["warnings"] = warnings
164
+ end
165
+
166
+ if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
167
+ # see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
168
+ vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
169
+ elsif jruby?
170
+ vi["other_libraries"] = {}.tap do |ol|
171
+ ol["xerces"] = Nokogiri::XERCES_VERSION
172
+ ol["nekohtml"] = Nokogiri::NEKO_VERSION
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ def to_markdown
179
+ begin
180
+ require "psych"
181
+ rescue LoadError
182
+ end
183
+ require "yaml"
184
+ "# Nokogiri (#{Nokogiri::VERSION})\n" +
185
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
186
+ end
187
+
188
+ instance.warnings.each do |warning|
189
+ warn "WARNING: #{warning}"
190
+ end
191
+ end
192
+
193
+ def self.uses_libxml?(requirement = nil) # :nodoc:
194
+ return false unless VersionInfo.instance.libxml2?
195
+ return true unless requirement
196
+ Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
197
+ end
198
+
199
+ def self.uses_gumbo?
200
+ uses_libxml? # TODO: replace with Gumbo functionality
201
+ end
202
+
203
+ def self.jruby? # :nodoc:
204
+ VersionInfo.instance.jruby?
205
+ end
206
+
207
+ # Ensure constants used in this file are loaded - see #1896
208
+ if Nokogiri.jruby?
209
+ require_relative "../jruby/dependencies"
210
+ end
211
+ require_relative "../extension"
212
+
213
+ # More complete version information about libxml
214
+ VERSION_INFO = VersionInfo.instance.to_hash
215
+ end
@@ -1,109 +1,3 @@
1
- module Nokogiri
2
- # The version of Nokogiri you are using
3
- VERSION = "1.10.9"
4
-
5
- class VersionInfo # :nodoc:
6
- def jruby?
7
- ::JRUBY_VERSION if RUBY_PLATFORM == "java"
8
- end
9
-
10
- def engine
11
- defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
12
- end
13
-
14
- def loaded_parser_version
15
- LIBXML_PARSER_VERSION.
16
- scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
17
- collect(&:to_i).
18
- join(".")
19
- end
20
-
21
- def compiled_parser_version
22
- LIBXML_VERSION
23
- end
24
-
25
- def libxml2?
26
- defined?(LIBXML_VERSION)
27
- end
28
-
29
- def libxml2_using_system?
30
- !libxml2_using_packaged?
31
- end
32
-
33
- def libxml2_using_packaged?
34
- NOKOGIRI_USE_PACKAGED_LIBRARIES
35
- end
36
-
37
- def warnings
38
- return [] unless libxml2?
39
-
40
- if compiled_parser_version != loaded_parser_version
41
- ["Nokogiri was built against LibXML version #{compiled_parser_version}, but has dynamically loaded #{loaded_parser_version}"]
42
- else
43
- []
44
- end
45
- end
46
-
47
- def to_hash
48
- hash_info = {}
49
- hash_info["warnings"] = []
50
- hash_info["nokogiri"] = Nokogiri::VERSION
51
- hash_info["ruby"] = {}
52
- hash_info["ruby"]["version"] = ::RUBY_VERSION
53
- hash_info["ruby"]["platform"] = ::RUBY_PLATFORM
54
- hash_info["ruby"]["description"] = ::RUBY_DESCRIPTION
55
- hash_info["ruby"]["engine"] = engine
56
- hash_info["ruby"]["jruby"] = jruby? if jruby?
57
-
58
- if libxml2?
59
- hash_info["libxml"] = {}
60
- hash_info["libxml"]["binding"] = "extension"
61
- if libxml2_using_packaged?
62
- hash_info["libxml"]["source"] = "packaged"
63
- hash_info["libxml"]["libxml2_path"] = NOKOGIRI_LIBXML2_PATH
64
- hash_info["libxml"]["libxslt_path"] = NOKOGIRI_LIBXSLT_PATH
65
- hash_info["libxml"]["libxml2_patches"] = NOKOGIRI_LIBXML2_PATCHES
66
- hash_info["libxml"]["libxslt_patches"] = NOKOGIRI_LIBXSLT_PATCHES
67
- else
68
- hash_info["libxml"]["source"] = "system"
69
- end
70
- hash_info["libxml"]["compiled"] = compiled_parser_version
71
- hash_info["libxml"]["loaded"] = loaded_parser_version
72
- hash_info["warnings"] = warnings
73
- elsif jruby?
74
- hash_info["xerces"] = Nokogiri::XERCES_VERSION
75
- hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
76
- end
77
-
78
- hash_info
79
- end
80
-
81
- def to_markdown
82
- begin
83
- require "psych"
84
- rescue LoadError
85
- end
86
- require "yaml"
87
- "# Nokogiri (#{Nokogiri::VERSION})\n" +
88
- YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
89
- end
90
-
91
- # FIXME: maybe switch to singleton?
92
- @@instance = new
93
- @@instance.warnings.each do |warning|
94
- warn "WARNING: #{warning}"
95
- end
96
- def self.instance; @@instance; end
97
- end
98
-
99
- # More complete version information about libxml
100
- VERSION_INFO = VersionInfo.instance.to_hash
101
-
102
- def self.uses_libxml? # :nodoc:
103
- VersionInfo.instance.libxml2?
104
- end
105
-
106
- def self.jruby? # :nodoc:
107
- VersionInfo.instance.jruby?
108
- end
109
- end
1
+ # frozen_string_literal: true
2
+ require_relative "version/constant"
3
+ require_relative "version/info"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Attr < Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -195,6 +196,41 @@ module Nokogiri
195
196
  #
196
197
  # Note the "foo:object" tag.
197
198
  #
199
+ # === Namespace inheritance
200
+ #
201
+ # In the Builder context, children will inherit their parent's namespace. This is the same
202
+ # behavior as if the underlying {XML::Document} set +namespace_inheritance+ to +true+:
203
+ #
204
+ # result = Nokogiri::XML::Builder.new do |xml|
205
+ # xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do
206
+ # xml.Header
207
+ # end
208
+ # end
209
+ # result.doc.to_xml
210
+ # # => <?xml version="1.0" encoding="utf-8"?>
211
+ # # <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
212
+ # # <soapenv:Header/>
213
+ # # </soapenv:Envelope>
214
+ #
215
+ # Users may turn this behavior off by passing a keyword argument +namespace_inheritance:false+
216
+ # to the initializer:
217
+ #
218
+ # result = Nokogiri::XML::Builder.new(namespace_inheritance: false) do |xml|
219
+ # xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do
220
+ # xml.Header
221
+ # xml["soapenv"].Body # users may explicitly opt into the namespace
222
+ # end
223
+ # end
224
+ # result.doc.to_xml
225
+ # # => <?xml version="1.0" encoding="utf-8"?>
226
+ # # <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
227
+ # # <Header/>
228
+ # # <soapenv:Body/>
229
+ # # </soapenv:Envelope>
230
+ #
231
+ # For more information on namespace inheritance, please see {XML::Document#namespace_inheritance}
232
+ #
233
+ #
198
234
  # == Document Types
199
235
  #
200
236
  # To create a document type (DTD), access use the Builder#doc method to get
@@ -225,6 +261,8 @@ module Nokogiri
225
261
  # </root>
226
262
  #
227
263
  class Builder
264
+ DEFAULT_DOCUMENT_OPTIONS = {namespace_inheritance: true}
265
+
228
266
  # The current Document object being built
229
267
  attr_accessor :doc
230
268
 
@@ -244,8 +282,8 @@ module Nokogiri
244
282
  #
245
283
  # For example:
246
284
  #
247
- # doc = Nokogiri::XML(open('somedoc.xml'))
248
- # Nokogiri::XML::Builder.with(doc.at('some_tag')) do |xml|
285
+ # doc = Nokogiri::XML(File.read('somedoc.xml'))
286
+ # Nokogiri::XML::Builder.with(doc.at_css('some_tag')) do |xml|
249
287
  # # ... Use normal builder methods here ...
250
288
  # xml.awesome # add the "awesome" tag below "some_tag"
251
289
  # end
@@ -281,6 +319,7 @@ module Nokogiri
281
319
  @arity = nil
282
320
  @ns = nil
283
321
 
322
+ options = DEFAULT_DOCUMENT_OPTIONS.merge(options)
284
323
  options.each do |k, v|
285
324
  @doc.send(:"#{k}=", v)
286
325
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class CDATA < Nokogiri::XML::Text
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class CharacterData < Nokogiri::XML::Node
@@ -1,3 +1,8 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'pathname'
5
+
1
6
  module Nokogiri
2
7
  module XML
3
8
  ##
@@ -9,11 +14,12 @@ module Nokogiri
9
14
  # Nokogiri::XML::Searchable#xpath
10
15
  #
11
16
  class Document < Nokogiri::XML::Node
12
- # I'm ignoring unicode characters here.
13
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
17
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
18
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
19
+ # characters in NCNAMEs.
14
20
  NCNAME_START_CHAR = "A-Za-z_"
15
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
21
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
22
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
17
23
 
18
24
  ##
19
25
  # Parse an XML file.
@@ -43,9 +49,11 @@ module Nokogiri
43
49
  #
44
50
  def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
45
51
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
46
- # Give the options to the user
52
+
47
53
  yield options if block_given?
48
54
 
55
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
56
+
49
57
  if empty_doc?(string_or_io)
50
58
  if options.strict?
51
59
  raise Nokogiri::XML::SyntaxError.new("Empty document")
@@ -55,12 +63,17 @@ module Nokogiri
55
63
  end
56
64
 
57
65
  doc = if string_or_io.respond_to?(:read)
58
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
59
- read_io(string_or_io, url, encoding, options.to_i)
60
- else
61
- # read_memory pukes on empty docs
62
- read_memory(string_or_io, url, encoding, options.to_i)
63
- end
66
+ if string_or_io.is_a?(Pathname)
67
+ # resolve the Pathname to the file and open it as an IO object, see #2110
68
+ string_or_io = string_or_io.expand_path.open
69
+ url ||= string_or_io.path
70
+ end
71
+
72
+ read_io(string_or_io, url, encoding, options.to_i)
73
+ else
74
+ # read_memory pukes on empty docs
75
+ read_memory(string_or_io, url, encoding, options.to_i)
76
+ end
64
77
 
65
78
  # do xinclude processing
66
79
  doc.do_xinclude(options) if options.xinclude?
@@ -68,42 +81,142 @@ module Nokogiri
68
81
  return doc
69
82
  end
70
83
 
84
+ ##
85
+ # @!method wrap(java_document)
86
+ # @!scope class
87
+ #
88
+ # Create a {Document} using an existing Java DOM document object.
89
+ #
90
+ # The returned {Document} shares the same underlying data structure as the Java object, so
91
+ # changes in one are reflected in the other.
92
+ #
93
+ # @param java_document [Java::OrgW3cDom::Document]
94
+ # @return [Nokogiri::XML::Document]
95
+ # @note This method is only available when running JRuby.
96
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
97
+ # @see #to_java
98
+
99
+ ##
100
+ # @!method to_java()
101
+ #
102
+ # Returns the underlying Java DOM document object for the {Document}.
103
+ #
104
+ # The returned Java object shares the same underlying data structure as the {Document}, so
105
+ # changes in one are reflected in the other.
106
+ #
107
+ # @return [Java::OrgW3cDom::Document]
108
+ # @note This method is only available when running JRuby.
109
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
110
+ # @see .wrap
111
+
112
+
71
113
  # A list of Nokogiri::XML::SyntaxError found when parsing a document
72
114
  attr_accessor :errors
73
115
 
116
+ # When true, reparented elements without a namespace will inherit their new parent's
117
+ # namespace (if one exists). Defaults to +false+.
118
+ #
119
+ # @example Default behavior of namespace inheritance
120
+ # xml = <<~EOF
121
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
122
+ # <foo:parent>
123
+ # </foo:parent>
124
+ # </root>
125
+ # EOF
126
+ # doc = Nokogiri::XML(xml)
127
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
128
+ # parent.add_child("<child></child>")
129
+ # doc.to_xml
130
+ # # => <?xml version="1.0"?>
131
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
132
+ # # <foo:parent>
133
+ # # <child/>
134
+ # # </foo:parent>
135
+ # # </root>
136
+ #
137
+ # @example Setting namespace inheritance to +true+
138
+ # xml = <<~EOF
139
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
140
+ # <foo:parent>
141
+ # </foo:parent>
142
+ # </root>
143
+ # EOF
144
+ # doc = Nokogiri::XML(xml)
145
+ # doc.namespace_inheritance = true
146
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
147
+ # parent.add_child("<child></child>")
148
+ # doc.to_xml
149
+ # # => <?xml version="1.0"?>
150
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
151
+ # # <foo:parent>
152
+ # # <foo:child/>
153
+ # # </foo:parent>
154
+ # # </root>
155
+ #
156
+ # @return [Boolean]
157
+ #
158
+ # @since v1.12.4
159
+ attr_accessor :namespace_inheritance
160
+
74
161
  def initialize *args # :nodoc:
75
162
  @errors = []
76
163
  @decorators = nil
164
+ @namespace_inheritance = false
77
165
  end
78
166
 
79
167
  ##
80
- # Create an element with +name+, and optionally setting the content and attributes.
168
+ # Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
169
+ # setting contents or attributes.
170
+ #
171
+ # Arguments may be passed to initialize the element:
172
+ # - a +Hash+ argument will be used to set attributes
173
+ # - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
174
+ #
175
+ # A block may be passed to mutate the node.
176
+ #
177
+ # @param name [String]
178
+ # @param contents_or_attrs [#to_s,Hash]
179
+ # @yieldparam node [Nokogiri::XML::Element]
180
+ # @return [Nokogiri::XML::Element]
181
+ #
182
+ # @example An empty element without attributes
183
+ # doc.create_element("div")
184
+ # # => <div></div>
185
+ #
186
+ # @example An element with contents
187
+ # doc.create_element("div", "contents")
188
+ # # => <div>contents</div>
81
189
  #
82
- # doc.create_element "div" # <div></div>
83
- # doc.create_element "div", :class => "container" # <div class='container'></div>
84
- # doc.create_element "div", "contents" # <div>contents</div>
85
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
86
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
190
+ # @example An element with attributes
191
+ # doc.create_element("div", {"class" => "container"})
192
+ # # => <div class='container'></div>
87
193
  #
88
- def create_element name, *args, &block
194
+ # @example An element with contents and attributes
195
+ # doc.create_element("div", "contents", {"class" => "container"})
196
+ # # => <div class='container'>contents</div>
197
+ #
198
+ # @example Passing a block to mutate the element
199
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
200
+ #
201
+ def create_element(name, *contents_or_attrs, &block)
89
202
  elm = Nokogiri::XML::Element.new(name, self, &block)
90
- args.each do |arg|
203
+ contents_or_attrs.each do |arg|
91
204
  case arg
92
205
  when Hash
93
- arg.each { |k,v|
206
+ arg.each do |k, v|
94
207
  key = k.to_s
95
208
  if key =~ NCNAME_RE
96
- ns_name = key.split(":", 2)[1]
97
- elm.add_namespace_definition ns_name, v
209
+ ns_name = Regexp.last_match(1)
210
+ elm.add_namespace_definition(ns_name, v)
98
211
  else
99
212
  elm[k.to_s] = v.to_s
100
213
  end
101
- }
214
+ end
102
215
  else
103
216
  elm.content = arg
104
217
  end
105
218
  end
106
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
219
+ if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
107
220
  elm.namespace = ns
108
221
  end
109
222
  elm
@@ -251,30 +364,14 @@ module Nokogiri
251
364
  end
252
365
  alias :<< :add_child
253
366
 
254
- ##
255
- # +JRuby+
256
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
257
- def self.wrap document
258
- raise "JRuby only method" unless Nokogiri.jruby?
259
- return wrapJavaDocument(document)
260
- end
261
-
262
- ##
263
- # +JRuby+
264
- # Returns Java's org.w3c.dom.document of this Document.
265
- def to_java
266
- raise "JRuby only method" unless Nokogiri.jruby?
267
- return toJavaDocument()
268
- end
269
-
270
367
  private
368
+
271
369
  def self.empty_doc? string_or_io
272
370
  string_or_io.nil? ||
273
371
  (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
274
372
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
275
373
  end
276
374
 
277
- # @private
278
375
  IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
279
376
 
280
377
  def inspect_attributes