nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -1,29 +1,284 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class Document < Node
4
- def decorators
5
- @decorators ||= Hash.new { |h,k| h[k] = [] }
3
+ ##
4
+ # Nokogiri::XML::Document is the main entry point for dealing with
5
+ # XML documents. The Document is created by parsing an XML document.
6
+ # See Nokogiri::XML::Document.parse() for more information on parsing.
7
+ #
8
+ # For searching a Document, see Nokogiri::XML::Searchable#css and
9
+ # Nokogiri::XML::Searchable#xpath
10
+ #
11
+ class Document < Nokogiri::XML::Node
12
+ # I'm ignoring unicode characters here.
13
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
14
+ NCNAME_START_CHAR = "A-Za-z_"
15
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
+ NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
17
+
18
+ ##
19
+ # Parse an XML file.
20
+ #
21
+ # +string_or_io+ may be a String, or any object that responds to
22
+ # _read_ and _close_ such as an IO, or StringIO.
23
+ #
24
+ # +url+ (optional) is the URI where this document is located.
25
+ #
26
+ # +encoding+ (optional) is the encoding that should be used when processing
27
+ # the document.
28
+ #
29
+ # +options+ (optional) is a configuration object that sets options during
30
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
31
+ # Nokogiri::XML::ParseOptions for more information.
32
+ #
33
+ # +block+ (optional) is passed a configuration object on which
34
+ # parse options may be set.
35
+ #
36
+ # When parsing untrusted documents, it's recommended that the
37
+ # +nonet+ option be used, as shown in this example code:
38
+ #
39
+ # Nokogiri::XML::Document.parse(xml_string) { |config| config.nonet }
40
+ #
41
+ # Nokogiri.XML() is a convenience method which will call this method.
42
+ #
43
+ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
44
+ options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
45
+ # Give the options to the user
46
+ yield options if block_given?
47
+
48
+ if empty_doc?(string_or_io)
49
+ if options.strict?
50
+ raise Nokogiri::XML::SyntaxError.new("Empty document")
51
+ else
52
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
53
+ end
54
+ end
55
+
56
+ doc = if string_or_io.respond_to?(:read)
57
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
58
+ read_io(string_or_io, url, encoding, options.to_i)
59
+ else
60
+ # read_memory pukes on empty docs
61
+ read_memory(string_or_io, url, encoding, options.to_i)
62
+ end
63
+
64
+ # do xinclude processing
65
+ doc.do_xinclude(options) if options.xinclude?
66
+
67
+ return doc
6
68
  end
7
69
 
70
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
71
+ attr_accessor :errors
72
+
73
+ def initialize *args # :nodoc:
74
+ @errors = []
75
+ @decorators = nil
76
+ end
77
+
78
+ ##
79
+ # Create an element with +name+, and optionally setting the content and attributes.
80
+ #
81
+ # doc.create_element "div" # <div></div>
82
+ # doc.create_element "div", :class => "container" # <div class='container'></div>
83
+ # doc.create_element "div", "contents" # <div>contents</div>
84
+ # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
85
+ # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
86
+ #
87
+ def create_element name, *args, &block
88
+ elm = Nokogiri::XML::Element.new(name, self, &block)
89
+ args.each do |arg|
90
+ case arg
91
+ when Hash
92
+ arg.each { |k,v|
93
+ key = k.to_s
94
+ if key =~ NCNAME_RE
95
+ ns_name = key.split(":", 2)[1]
96
+ elm.add_namespace_definition ns_name, v
97
+ else
98
+ elm[k.to_s] = v.to_s
99
+ end
100
+ }
101
+ else
102
+ elm.content = arg
103
+ end
104
+ end
105
+ if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
106
+ elm.namespace = ns
107
+ end
108
+ elm
109
+ end
110
+
111
+ # Create a Text Node with +string+
112
+ def create_text_node string, &block
113
+ Nokogiri::XML::Text.new string.to_s, self, &block
114
+ end
115
+
116
+ # Create a CDATA Node containing +string+
117
+ def create_cdata string, &block
118
+ Nokogiri::XML::CDATA.new self, string.to_s, &block
119
+ end
120
+
121
+ # Create a Comment Node containing +string+
122
+ def create_comment string, &block
123
+ Nokogiri::XML::Comment.new self, string.to_s, &block
124
+ end
125
+
126
+ # The name of this document. Always returns "document"
8
127
  def name
9
128
  'document'
10
129
  end
11
130
 
12
- ###
13
- # Apply any decorators to +node+
14
- def decorate(node)
15
- key = node.class.name.split('::').last.downcase
16
- decorators[key].each do |klass|
17
- node.extend(klass)
131
+ # A reference to +self+
132
+ def document
133
+ self
134
+ end
135
+
136
+ ##
137
+ # Recursively get all namespaces from this node and its subtree and
138
+ # return them as a hash.
139
+ #
140
+ # For example, given this document:
141
+ #
142
+ # <root xmlns:foo="bar">
143
+ # <bar xmlns:hello="world" />
144
+ # </root>
145
+ #
146
+ # This method will return:
147
+ #
148
+ # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
149
+ #
150
+ # WARNING: this method will clobber duplicate names in the keys.
151
+ # For example, given this document:
152
+ #
153
+ # <root xmlns:foo="bar">
154
+ # <bar xmlns:foo="baz" />
155
+ # </root>
156
+ #
157
+ # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
158
+ #
159
+ # Non-prefixed default namespaces (as in "xmlns=") are not included
160
+ # in the hash.
161
+ #
162
+ # Note that this method does an xpath lookup for nodes with
163
+ # namespaces, and as a result the order may be dependent on the
164
+ # implementation of the underlying XML library.
165
+ #
166
+ def collect_namespaces
167
+ xpath("//namespace::*").inject({}) do |hash, ns|
168
+ hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
169
+ hash
18
170
  end
19
171
  end
20
172
 
21
- def to_xml
22
- serialize
173
+ # Get the list of decorators given +key+
174
+ def decorators key
175
+ @decorators ||= Hash.new
176
+ @decorators[key] ||= []
177
+ end
178
+
179
+ ##
180
+ # Validate this Document against it's DTD. Returns a list of errors on
181
+ # the document or +nil+ when there is no DTD.
182
+ def validate
183
+ return nil unless internal_subset
184
+ internal_subset.validate self
23
185
  end
24
186
 
187
+ ##
188
+ # Explore a document with shortcut methods. See Nokogiri::Slop for details.
189
+ #
190
+ # Note that any nodes that have been instantiated before #slop!
191
+ # is called will not be decorated with sloppy behavior. So, if you're in
192
+ # irb, the preferred idiom is:
193
+ #
194
+ # irb> doc = Nokogiri::Slop my_markup
195
+ #
196
+ # and not
197
+ #
198
+ # irb> doc = Nokogiri::HTML my_markup
199
+ # ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
200
+ # irb> doc.slop!
201
+ # ... which does absolutely nothing.
202
+ #
203
+ def slop!
204
+ unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
205
+ decorators(XML::Node) << Nokogiri::Decorators::Slop
206
+ decorate!
207
+ end
208
+
209
+ self
210
+ end
211
+
212
+ ##
213
+ # Apply any decorators to +node+
214
+ def decorate node
215
+ return unless @decorators
216
+ @decorators.each { |klass,list|
217
+ next unless node.is_a?(klass)
218
+ list.each { |moodule| node.extend(moodule) }
219
+ }
220
+ end
221
+
222
+ alias :to_xml :serialize
223
+ alias :clone :dup
224
+
225
+ # Get the hash of namespaces on the root Nokogiri::XML::Node
25
226
  def namespaces
26
- root ? root.collect_namespaces : {}
227
+ root ? root.namespaces : {}
228
+ end
229
+
230
+ ##
231
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
232
+ # Returns an empty fragment if +tags+ is nil.
233
+ def fragment tags = nil
234
+ DocumentFragment.new(self, tags, self.root)
235
+ end
236
+
237
+ undef_method :swap, :parent, :namespace, :default_namespace=
238
+ undef_method :add_namespace_definition, :attributes
239
+ undef_method :namespace_definitions, :line, :add_namespace
240
+
241
+ def add_child node_or_tags
242
+ raise "Document already has a root node" if root && root.name != 'nokogiri_text_wrapper'
243
+ node_or_tags = coerce(node_or_tags)
244
+ if node_or_tags.is_a?(XML::NodeSet)
245
+ raise "Document cannot have multiple root nodes" if node_or_tags.size > 1
246
+ super(node_or_tags.first)
247
+ else
248
+ super
249
+ end
250
+ end
251
+ alias :<< :add_child
252
+
253
+ ##
254
+ # +JRuby+
255
+ # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
256
+ def self.wrap document
257
+ raise "JRuby only method" unless Nokogiri.jruby?
258
+ return wrapJavaDocument(document)
259
+ end
260
+
261
+ ##
262
+ # +JRuby+
263
+ # Returns Java's org.w3c.dom.document of this Document.
264
+ def to_java
265
+ raise "JRuby only method" unless Nokogiri.jruby?
266
+ return toJavaDocument()
267
+ end
268
+
269
+ private
270
+ def self.empty_doc? string_or_io
271
+ string_or_io.nil? ||
272
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
273
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
274
+ end
275
+
276
+ def implied_xpath_contexts # :nodoc:
277
+ ["//"]
278
+ end
279
+
280
+ def inspect_attributes
281
+ [:name, :children]
27
282
  end
28
283
  end
29
284
  end
@@ -0,0 +1,149 @@
1
+ module Nokogiri
2
+ module XML
3
+ class DocumentFragment < Nokogiri::XML::Node
4
+ ##
5
+ # Create a new DocumentFragment from +tags+.
6
+ #
7
+ # If +ctx+ is present, it is used as a context node for the
8
+ # subtree created, e.g., namespaces will be resolved relative
9
+ # to +ctx+.
10
+ def initialize document, tags = nil, ctx = nil
11
+ return self unless tags
12
+
13
+ children = if ctx
14
+ # Fix for issue#490
15
+ if Nokogiri.jruby?
16
+ # fix for issue #770
17
+ ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>").children
18
+ else
19
+ ctx.parse(tags)
20
+ end
21
+ else
22
+ XML::Document.parse("<root>#{tags}</root>") \
23
+ .xpath("/root/node()")
24
+ end
25
+ children.each { |child| child.parent = self }
26
+ end
27
+
28
+ ###
29
+ # return the name for DocumentFragment
30
+ def name
31
+ '#document-fragment'
32
+ end
33
+
34
+ ###
35
+ # Convert this DocumentFragment to a string
36
+ def to_s
37
+ children.to_s
38
+ end
39
+
40
+ ###
41
+ # Convert this DocumentFragment to html
42
+ # See Nokogiri::XML::NodeSet#to_html
43
+ def to_html *args
44
+ if Nokogiri.jruby?
45
+ options = args.first.is_a?(Hash) ? args.shift : {}
46
+ if !options[:save_with]
47
+ options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
48
+ end
49
+ args.insert(0, options)
50
+ end
51
+ children.to_html(*args)
52
+ end
53
+
54
+ ###
55
+ # Convert this DocumentFragment to xhtml
56
+ # See Nokogiri::XML::NodeSet#to_xhtml
57
+ def to_xhtml *args
58
+ if Nokogiri.jruby?
59
+ options = args.first.is_a?(Hash) ? args.shift : {}
60
+ if !options[:save_with]
61
+ options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_XHTML
62
+ end
63
+ args.insert(0, options)
64
+ end
65
+ children.to_xhtml(*args)
66
+ end
67
+
68
+ ###
69
+ # Convert this DocumentFragment to xml
70
+ # See Nokogiri::XML::NodeSet#to_xml
71
+ def to_xml *args
72
+ children.to_xml(*args)
73
+ end
74
+
75
+ ###
76
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
77
+ #
78
+ # Search this fragment for CSS +rules+. +rules+ must be one or more CSS
79
+ # selectors. For example:
80
+ #
81
+ # For more information see Nokogiri::XML::Searchable#css
82
+ def css *args
83
+ if children.any?
84
+ children.css(*args) # 'children' is a smell here
85
+ else
86
+ NodeSet.new(document)
87
+ end
88
+ end
89
+
90
+ #
91
+ # NOTE that we don't delegate #xpath to children ... another smell.
92
+ # def xpath ; end
93
+ #
94
+
95
+ ###
96
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
97
+ #
98
+ # Search this fragment for +paths+. +paths+ must be one or more XPath or CSS queries.
99
+ #
100
+ # For more information see Nokogiri::XML::Searchable#search
101
+ def search *rules
102
+ rules, handler, ns, binds = extract_params(rules)
103
+
104
+ rules.inject(NodeSet.new(document)) do |set, rule|
105
+ set += if rule =~ Searchable::LOOKS_LIKE_XPATH
106
+ xpath(*([rule, ns, handler, binds].compact))
107
+ else
108
+ children.css(*([rule, ns, handler].compact)) # 'children' is a smell here
109
+ end
110
+ end
111
+ end
112
+
113
+ alias :serialize :to_s
114
+
115
+ class << self
116
+ ####
117
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
118
+ def parse tags
119
+ self.new(XML::Document.new, tags)
120
+ end
121
+ end
122
+
123
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
124
+ def errors
125
+ document.errors
126
+ end
127
+
128
+ def errors= things # :nodoc:
129
+ document.errors = things
130
+ end
131
+
132
+ private
133
+
134
+ # fix for issue 770
135
+ def namespace_declarations ctx
136
+ ctx.namespace_scopes.map do |namespace|
137
+ prefix = namespace.prefix.nil? ? "" : ":#{namespace.prefix}"
138
+ %Q{xmlns#{prefix}="#{namespace.href}"}
139
+ end.join ' '
140
+ end
141
+
142
+ def coerce data
143
+ return super unless String === data
144
+
145
+ document.fragment(data).children
146
+ end
147
+ end
148
+ end
149
+ end
@@ -1,6 +1,32 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class DTD < Node
3
+ class DTD < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :values
6
+ undef_method :content
7
+ undef_method :namespace
8
+ undef_method :namespace_definitions
9
+ undef_method :line if method_defined?(:line)
10
+
11
+ def keys
12
+ attributes.keys
13
+ end
14
+
15
+ def each
16
+ attributes.each do |key, value|
17
+ yield([key, value])
18
+ end
19
+ end
20
+
21
+ def html_dtd?
22
+ name.casecmp('html').zero?
23
+ end
24
+
25
+ def html5_dtd?
26
+ html_dtd? &&
27
+ external_id.nil? &&
28
+ (system_id.nil? || system_id == 'about:legacy-compat')
29
+ end
4
30
  end
5
31
  end
6
32
  end
@@ -0,0 +1,36 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Represents the allowed content in an Element Declaration inside a DTD:
5
+ #
6
+ # <?xml version="1.0"?><?TEST-STYLE PIDATA?>
7
+ # <!DOCTYPE staff SYSTEM "staff.dtd" [
8
+ # <!ELEMENT div1 (head, (p | list | note)*, div2*)>
9
+ # ]>
10
+ # </root>
11
+ #
12
+ # ElementContent represents the tree inside the <!ELEMENT> tag shown above
13
+ # that lists the possible content for the div1 tag.
14
+ class ElementContent
15
+ # Possible definitions of type
16
+ PCDATA = 1
17
+ ELEMENT = 2
18
+ SEQ = 3
19
+ OR = 4
20
+
21
+ # Possible content occurrences
22
+ ONCE = 1
23
+ OPT = 2
24
+ MULT = 3
25
+ PLUS = 4
26
+
27
+ attr_reader :document
28
+
29
+ ###
30
+ # Get the children of this ElementContent node
31
+ def children
32
+ [c1, c2].compact
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ElementDecl < Nokogiri::XML::Node
4
+ undef_method :namespace
5
+ undef_method :namespace_definitions
6
+ undef_method :line if method_defined?(:line)
7
+
8
+ def inspect
9
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ module Nokogiri
2
+ module XML
3
+ class EntityDecl < Nokogiri::XML::Node
4
+ undef_method :attribute_nodes
5
+ undef_method :attributes
6
+ undef_method :namespace
7
+ undef_method :namespace_definitions
8
+ undef_method :line if method_defined?(:line)
9
+
10
+ def self.new name, doc, *args
11
+ doc.create_entity(name, *args)
12
+ end
13
+
14
+ def inspect
15
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Namespace
4
+ include Nokogiri::XML::PP::Node
5
+ attr_reader :document
6
+
7
+ private
8
+ def inspect_attributes
9
+ [:prefix, :href]
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,61 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Node
4
+ ###
5
+ # Save options for serializing nodes
6
+ class SaveOptions
7
+ # Format serialized xml
8
+ FORMAT = 1
9
+ # Do not include declarations
10
+ NO_DECLARATION = 2
11
+ # Do not include empty tags
12
+ NO_EMPTY_TAGS = 4
13
+ # Do not save XHTML
14
+ NO_XHTML = 8
15
+ # Save as XHTML
16
+ AS_XHTML = 16
17
+ # Save as XML
18
+ AS_XML = 32
19
+ # Save as HTML
20
+ AS_HTML = 64
21
+
22
+ if Nokogiri.jruby?
23
+ # Save builder created document
24
+ AS_BUILDER = 128
25
+ # the default for XML documents
26
+ DEFAULT_XML = AS_XML # https://github.com/sparklemotion/nokogiri/issues/#issue/415
27
+ # the default for HTML document
28
+ DEFAULT_HTML = NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
29
+ else
30
+ # the default for XML documents
31
+ DEFAULT_XML = FORMAT | AS_XML
32
+ # the default for HTML document
33
+ DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
34
+ end
35
+ # the default for XHTML document
36
+ DEFAULT_XHTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_XHTML
37
+
38
+ # Integer representation of the SaveOptions
39
+ attr_reader :options
40
+
41
+ # Create a new SaveOptions object with +options+
42
+ def initialize options = 0; @options = options; end
43
+
44
+ constants.each do |constant|
45
+ class_eval %{
46
+ def #{constant.downcase}
47
+ @options |= #{constant}
48
+ self
49
+ end
50
+
51
+ def #{constant.downcase}?
52
+ #{constant} & @options == #{constant}
53
+ end
54
+ }
55
+ end
56
+
57
+ alias :to_i :options
58
+ end
59
+ end
60
+ end
61
+ end