nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -1,19 +1,50 @@
1
1
  module Nokogiri
2
2
  module HTML
3
+ ###
4
+ # Nokogiri lets you write a SAX parser to process HTML but get HTML
5
+ # correction features.
6
+ #
7
+ # See Nokogiri::HTML::SAX::Parser for a basic example of using a
8
+ # SAX parser with HTML.
9
+ #
10
+ # For more information on SAX parsers, see Nokogiri::XML::SAX
3
11
  module SAX
4
- class Parser < XML::SAX::Parser
12
+ ###
13
+ # This class lets you perform SAX style parsing on HTML with HTML
14
+ # error correction.
15
+ #
16
+ # Here is a basic usage example:
17
+ #
18
+ # class MyDoc < Nokogiri::XML::SAX::Document
19
+ # def start_element name, attributes = []
20
+ # puts "found a #{name}"
21
+ # end
22
+ # end
23
+ #
24
+ # parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
25
+ # parser.parse(File.read(ARGV[0], mode: 'rb'))
26
+ #
27
+ # For more information on SAX parsers, see Nokogiri::XML::SAX
28
+ class Parser < Nokogiri::XML::SAX::Parser
5
29
  ###
6
30
  # Parse html stored in +data+ using +encoding+
7
31
  def parse_memory data, encoding = 'UTF-8'
8
- native_parse_memory(data, encoding)
32
+ raise ArgumentError unless data
33
+ return unless data.length > 0
34
+ ctx = ParserContext.memory(data, encoding)
35
+ yield ctx if block_given?
36
+ ctx.parse_with self
9
37
  end
10
38
 
11
39
  ###
12
40
  # Parse a file with +filename+
13
41
  def parse_file filename, encoding = 'UTF-8'
14
- raise Errno::ENOENT unless File.exists?(filename)
42
+ raise ArgumentError unless filename
43
+ raise Errno::ENOENT unless File.exist?(filename)
15
44
  raise Errno::EISDIR if File.directory?(filename)
16
- native_parse_file filename, encoding
45
+ ctx = ParserContext.file(filename, encoding)
46
+ yield ctx if block_given?
47
+ ctx.parse_with self
17
48
  end
18
49
  end
19
50
  end
@@ -0,0 +1,16 @@
1
+ module Nokogiri
2
+ module HTML
3
+ module SAX
4
+ ###
5
+ # Context for HTML SAX parsers. This class is usually not instantiated
6
+ # by the user. Instead, you should be looking at
7
+ # Nokogiri::HTML::SAX::Parser
8
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
9
+ def self.new thing, encoding = 'UTF-8'
10
+ [:read, :close].all? { |x| thing.respond_to?(x) } ? super :
11
+ memory(thing, encoding)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,36 @@
1
+ module Nokogiri
2
+ module HTML
3
+ module SAX
4
+ class PushParser
5
+
6
+ # The Nokogiri::HTML::SAX::Document on which the PushParser will be
7
+ # operating
8
+ attr_accessor :document
9
+
10
+ def initialize(doc = HTML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
11
+ @document = doc
12
+ @encoding = encoding
13
+ @sax_parser = HTML::SAX::Parser.new(doc, @encoding)
14
+
15
+ ## Create our push parser context
16
+ initialize_native(@sax_parser, file_name, encoding)
17
+ end
18
+
19
+ ###
20
+ # Write a +chunk+ of HTML to the PushParser. Any callback methods
21
+ # that can be called will be called immediately.
22
+ def write chunk, last_chunk = false
23
+ native_write(chunk, last_chunk)
24
+ end
25
+ alias :<< :write
26
+
27
+ ###
28
+ # Finish the parsing. This method is only necessary for
29
+ # Nokogiri::HTML::SAX::Document#end_document to be called.
30
+ def finish
31
+ write '', true
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
data/lib/nokogiri/html.rb CHANGED
@@ -1,95 +1,37 @@
1
+ require 'nokogiri/html/entity_lookup'
1
2
  require 'nokogiri/html/document'
3
+ require 'nokogiri/html/document_fragment'
4
+ require 'nokogiri/html/sax/parser_context'
2
5
  require 'nokogiri/html/sax/parser'
6
+ require 'nokogiri/html/sax/push_parser'
7
+ require 'nokogiri/html/element_description'
8
+ require 'nokogiri/html/element_description_defaults'
3
9
 
4
10
  module Nokogiri
5
11
  class << self
6
- def HTML thing, url = nil, encoding = nil, options = 2145
7
- Nokogiri::HTML.parse(thing, url, encoding, options)
12
+ ###
13
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
14
+ def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
15
+ Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
8
16
  end
9
17
  end
10
18
 
11
19
  module HTML
12
- # Parser options
13
- PARSE_NOERROR = 1 << 5 # No error reports
14
- PARSE_NOWARNING = 1 << 6 # No warnings
15
- PARSE_PEDANTIC = 1 << 7 # Pedantic errors
16
- PARSE_NOBLANKS = 1 << 8 # Remove blanks nodes
17
- PARSE_NONET = 1 << 11 # No network access
18
-
19
20
  class << self
20
- def parse string_or_io, url = nil, encoding = nil, options = 2145
21
- if string_or_io.respond_to?(:read)
22
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
23
- string_or_io = string_or_io.read
24
- end
25
-
26
- Document.read_memory(string_or_io, url, encoding, options)
21
+ ###
22
+ # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
23
+ def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
24
+ Document.parse(thing, url, encoding, options, &block)
27
25
  end
28
26
 
29
27
  ####
30
28
  # Parse a fragment from +string+ in to a NodeSet.
31
- def fragment string
32
- doc = parse(string)
33
- finder = lambda { |children, f|
34
- children.each do |child|
35
- return children if string =~ /<#{child.name}/
36
- finder.call(child.children, f)
37
- end
38
- }
39
- finder.call(doc.children, finder)
29
+ def fragment string, encoding = nil
30
+ HTML::DocumentFragment.parse string, encoding
40
31
  end
41
32
  end
42
33
 
43
- NamedCharacters =
44
- {"AElig"=>198, "Aacute"=>193, "Acirc"=>194, "Agrave"=>192, "Alpha"=>913,
45
- "Aring"=>197, "Atilde"=>195, "Auml"=>196, "Beta"=>914, "Ccedil"=>199,
46
- "Chi"=>935, "Dagger"=>8225, "Delta"=>916, "ETH"=>208, "Eacute"=>201,
47
- "Ecirc"=>202, "Egrave"=>200, "Epsilon"=>917, "Eta"=>919, "Euml"=>203,
48
- "Gamma"=>915, "Iacute"=>205, "Icirc"=>206, "Igrave"=>204, "Iota"=>921,
49
- "Iuml"=>207, "Kappa"=>922, "Lambda"=>923, "Mu"=>924, "Ntilde"=>209, "Nu"=>925,
50
- "OElig"=>338, "Oacute"=>211, "Ocirc"=>212, "Ograve"=>210, "Omega"=>937,
51
- "Omicron"=>927, "Oslash"=>216, "Otilde"=>213, "Ouml"=>214, "Phi"=>934,
52
- "Pi"=>928, "Prime"=>8243, "Psi"=>936, "Rho"=>929, "Scaron"=>352, "Sigma"=>931,
53
- "THORN"=>222, "Tau"=>932, "Theta"=>920, "Uacute"=>218, "Ucirc"=>219,
54
- "Ugrave"=>217, "Upsilon"=>933, "Uuml"=>220, "Xi"=>926, "Yacute"=>221,
55
- "Yuml"=>376, "Zeta"=>918, "aacute"=>225, "acirc"=>226, "acute"=>180,
56
- "aelig"=>230, "agrave"=>224, "alefsym"=>8501, "alpha"=>945, "amp"=>38,
57
- "and"=>8743, "ang"=>8736, "apos"=>39, "aring"=>229, "asymp"=>8776,
58
- "atilde"=>227, "auml"=>228, "bdquo"=>8222, "beta"=>946, "brvbar"=>166,
59
- "bull"=>8226, "cap"=>8745, "ccedil"=>231, "cedil"=>184, "cent"=>162,
60
- "chi"=>967, "circ"=>710, "clubs"=>9827, "cong"=>8773, "copy"=>169,
61
- "crarr"=>8629, "cup"=>8746, "curren"=>164, "dArr"=>8659, "dagger"=>8224,
62
- "darr"=>8595, "deg"=>176, "delta"=>948, "diams"=>9830, "divide"=>247,
63
- "eacute"=>233, "ecirc"=>234, "egrave"=>232, "empty"=>8709, "emsp"=>8195,
64
- "ensp"=>8194, "epsilon"=>949, "equiv"=>8801, "eta"=>951, "eth"=>240,
65
- "euml"=>235, "euro"=>8364, "exist"=>8707, "fnof"=>402, "forall"=>8704,
66
- "frac12"=>189, "frac14"=>188, "frac34"=>190, "frasl"=>8260, "gamma"=>947,
67
- "ge"=>8805, "gt"=>62, "hArr"=>8660, "harr"=>8596, "hearts"=>9829,
68
- "hellip"=>8230, "iacute"=>237, "icirc"=>238, "iexcl"=>161, "igrave"=>236,
69
- "image"=>8465, "infin"=>8734, "int"=>8747, "iota"=>953, "iquest"=>191,
70
- "isin"=>8712, "iuml"=>239, "kappa"=>954, "lArr"=>8656, "lambda"=>955,
71
- "lang"=>9001, "laquo"=>171, "larr"=>8592, "lceil"=>8968, "ldquo"=>8220,
72
- "le"=>8804, "lfloor"=>8970, "lowast"=>8727, "loz"=>9674, "lrm"=>8206,
73
- "lsaquo"=>8249, "lsquo"=>8216, "lt"=>60, "macr"=>175, "mdash"=>8212,
74
- "micro"=>181, "middot"=>183, "minus"=>8722, "mu"=>956, "nabla"=>8711,
75
- "nbsp"=>160, "ndash"=>8211, "ne"=>8800, "ni"=>8715, "not"=>172, "notin"=>8713,
76
- "nsub"=>8836, "ntilde"=>241, "nu"=>957, "oacute"=>243, "ocirc"=>244,
77
- "oelig"=>339, "ograve"=>242, "oline"=>8254, "omega"=>969, "omicron"=>959,
78
- "oplus"=>8853, "or"=>8744, "ordf"=>170, "ordm"=>186, "oslash"=>248,
79
- "otilde"=>245, "otimes"=>8855, "ouml"=>246, "para"=>182, "part"=>8706,
80
- "permil"=>8240, "perp"=>8869, "phi"=>966, "pi"=>960, "piv"=>982,
81
- "plusmn"=>177, "pound"=>163, "prime"=>8242, "prod"=>8719, "prop"=>8733,
82
- "psi"=>968, "quot"=>34, "rArr"=>8658, "radic"=>8730, "rang"=>9002,
83
- "raquo"=>187, "rarr"=>8594, "rceil"=>8969, "rdquo"=>8221, "real"=>8476,
84
- "reg"=>174, "rfloor"=>8971, "rho"=>961, "rlm"=>8207, "rsaquo"=>8250,
85
- "rsquo"=>8217, "sbquo"=>8218, "scaron"=>353, "sdot"=>8901, "sect"=>167,
86
- "shy"=>173, "sigma"=>963, "sigmaf"=>962, "sim"=>8764, "spades"=>9824,
87
- "sub"=>8834, "sube"=>8838, "sum"=>8721, "sup"=>8835, "sup1"=>185, "sup2"=>178,
88
- "sup3"=>179, "supe"=>8839, "szlig"=>223, "tau"=>964, "there4"=>8756,
89
- "theta"=>952, "thetasym"=>977, "thinsp"=>8201, "thorn"=>254, "tilde"=>732,
90
- "times"=>215, "trade"=>8482, "uArr"=>8657, "uacute"=>250, "uarr"=>8593,
91
- "ucirc"=>251, "ugrave"=>249, "uml"=>168, "upsih"=>978, "upsilon"=>965,
92
- "uuml"=>252, "weierp"=>8472, "xi"=>958, "yacute"=>253, "yen"=>165,
93
- "yuml"=>255, "zeta"=>950, "zwj"=>8205, "zwnj"=>8204}
34
+ # Instance of Nokogiri::HTML::EntityLookup
35
+ NamedCharacters = EntityLookup.new
94
36
  end
95
37
  end
@@ -0,0 +1,4 @@
1
+ module Nokogiri
2
+ class SyntaxError < ::StandardError
3
+ end
4
+ end
@@ -1,3 +1,108 @@
1
1
  module Nokogiri
2
- VERSION = '1.0.0'
2
+ # The version of Nokogiri you are using
3
+ VERSION = '1.6.8.1'
4
+
5
+ class VersionInfo # :nodoc:
6
+ def jruby?
7
+ ::JRUBY_VERSION if RUBY_PLATFORM == "java"
8
+ end
9
+
10
+ def engine
11
+ defined?(RUBY_ENGINE) ? RUBY_ENGINE : 'mri'
12
+ end
13
+
14
+ def loaded_parser_version
15
+ LIBXML_PARSER_VERSION.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.collect{ |j|
16
+ j.to_i
17
+ }.join(".")
18
+ end
19
+
20
+ def compiled_parser_version
21
+ LIBXML_VERSION
22
+ end
23
+
24
+ def libxml2?
25
+ defined?(LIBXML_VERSION)
26
+ end
27
+
28
+ def libxml2_using_system?
29
+ ! libxml2_using_packaged?
30
+ end
31
+
32
+ def libxml2_using_packaged?
33
+ NOKOGIRI_USE_PACKAGED_LIBRARIES
34
+ end
35
+
36
+ def warnings
37
+ return [] unless libxml2?
38
+
39
+ if compiled_parser_version != loaded_parser_version
40
+ ["Nokogiri was built against LibXML version #{compiled_parser_version}, but has dynamically loaded #{loaded_parser_version}"]
41
+ else
42
+ []
43
+ end
44
+ end
45
+
46
+ def to_hash
47
+ hash_info = {}
48
+ hash_info['warnings'] = []
49
+ hash_info['nokogiri'] = Nokogiri::VERSION
50
+ hash_info['ruby'] = {}
51
+ hash_info['ruby']['version'] = ::RUBY_VERSION
52
+ hash_info['ruby']['platform'] = ::RUBY_PLATFORM
53
+ hash_info['ruby']['description'] = ::RUBY_DESCRIPTION
54
+ hash_info['ruby']['engine'] = engine
55
+ hash_info['ruby']['jruby'] = jruby? if jruby?
56
+
57
+ if libxml2?
58
+ hash_info['libxml'] = {}
59
+ hash_info['libxml']['binding'] = 'extension'
60
+ if libxml2_using_packaged?
61
+ hash_info['libxml']['source'] = "packaged"
62
+ hash_info['libxml']['libxml2_path'] = NOKOGIRI_LIBXML2_PATH
63
+ hash_info['libxml']['libxslt_path'] = NOKOGIRI_LIBXSLT_PATH
64
+ hash_info['libxml']['libxml2_patches'] = NOKOGIRI_LIBXML2_PATCHES
65
+ hash_info['libxml']['libxslt_patches'] = NOKOGIRI_LIBXSLT_PATCHES
66
+ else
67
+ hash_info['libxml']['source'] = "system"
68
+ end
69
+ hash_info['libxml']['compiled'] = compiled_parser_version
70
+ hash_info['libxml']['loaded'] = loaded_parser_version
71
+ hash_info['warnings'] = warnings
72
+ elsif jruby?
73
+ hash_info['xerces'] = Nokogiri::XERCES_VERSION
74
+ hash_info['nekohtml'] = Nokogiri::NEKO_VERSION
75
+ end
76
+
77
+ hash_info
78
+ end
79
+
80
+ def to_markdown
81
+ begin
82
+ require 'psych'
83
+ rescue LoadError
84
+ end
85
+ require 'yaml'
86
+ "# Nokogiri (#{Nokogiri::VERSION})\n" +
87
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
88
+ end
89
+
90
+ # FIXME: maybe switch to singleton?
91
+ @@instance = new
92
+ @@instance.warnings.each do |warning|
93
+ warn "WARNING: #{warning}"
94
+ end
95
+ def self.instance; @@instance; end
96
+ end
97
+
98
+ # More complete version information about libxml
99
+ VERSION_INFO = VersionInfo.instance.to_hash
100
+
101
+ def self.uses_libxml? # :nodoc:
102
+ VersionInfo.instance.libxml2?
103
+ end
104
+
105
+ def self.jruby? # :nodoc:
106
+ VersionInfo.instance.jruby?
107
+ end
3
108
  end
@@ -0,0 +1,14 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Attr < Node
4
+ alias :value :content
5
+ alias :to_s :content
6
+ alias :content= :value=
7
+
8
+ private
9
+ def inspect_attributes
10
+ [:name, :namespace, :value]
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Represents an attribute declaration in a DTD
5
+ class AttributeDecl < Nokogiri::XML::Node
6
+ undef_method :attribute_nodes
7
+ undef_method :attributes
8
+ undef_method :content
9
+ undef_method :namespace
10
+ undef_method :namespace_definitions
11
+ undef_method :line if method_defined?(:line)
12
+
13
+ def inspect
14
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
15
+ end
16
+ end
17
+ end
18
+ end