nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
data/README.ja.txt DELETED
@@ -1,86 +0,0 @@
1
- = Nokogiri (鋸)
2
-
3
- * http://nokogiri.rubyforge.org/
4
- * http://github.com/tenderlove/nokogiri/wikis
5
- * http://github.com/tenderlove/nokogiri/tree/master
6
-
7
- == DESCRIPTION:
8
-
9
- Nokogiri はHTMLやXMLやSAXやXSLTやReaderのパーサーです。
10
-
11
- == FEATURES:
12
-
13
- * XPath で探せる
14
- * CSS3 のセレクターで探せる
15
- * XML/HTMLのビルダーはある
16
-
17
- NokogiriはHpricotより早くパーサーし、検索出来たり、
18
- 正確にCSS3とXPathをサポート出来たりする。
19
-
20
- * http://gist.github.com/18533
21
-
22
- NokogiriはHpricotの代わりに使用出来る。
23
- その互換性は簡単に正しいCSSとXPathを使用する事が出来る。
24
-
25
- == SYNOPSIS:
26
-
27
- require 'nokogiri'
28
- require 'open-uri'
29
-
30
- doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
31
-
32
- ####
33
- # Search for nodes by css
34
- doc.css('h3.r a.l').each do |link|
35
- puts link.content
36
- end
37
-
38
- ####
39
- # Search for nodes by xpath
40
- doc.xpath('//h3/a[@class="l"]').each do |link|
41
- puts link.content
42
- end
43
-
44
- ####
45
- # Or mix and match.
46
- doc.search('h3.r a.l', '//h3/a[@class="l"]').each do |link|
47
- puts link.content
48
- end
49
-
50
-
51
- == REQUIREMENTS:
52
-
53
- * ruby 1.8 or 1.9
54
- * libxml
55
-
56
- == INSTALL:
57
-
58
- * sudo gem install nokogiri
59
-
60
- == LICENSE:
61
-
62
- (The MIT License)
63
-
64
- Copyright (c) 2008:
65
-
66
- * {Aaron Patterson}[http://tenderlovemaking.com]
67
- * {Mike Dalessio}[http://mike.daless.io]
68
-
69
- Permission is hereby granted, free of charge, to any person obtaining
70
- a copy of this software and associated documentation files (the
71
- 'Software'), to deal in the Software without restriction, including
72
- without limitation the rights to use, copy, modify, merge, publish,
73
- distribute, sublicense, and/or sell copies of the Software, and to
74
- permit persons to whom the Software is furnished to do so, subject to
75
- the following conditions:
76
-
77
- The above copyright notice and this permission notice shall be
78
- included in all copies or substantial portions of the Software.
79
-
80
- THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
81
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
82
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
83
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
84
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
85
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
86
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.txt DELETED
@@ -1,87 +0,0 @@
1
- = Nokogiri
2
-
3
- * http://nokogiri.rubyforge.org/
4
- * http://github.com/tenderlove/nokogiri/wikis
5
- * http://github.com/tenderlove/nokogiri/tree/master
6
-
7
- == DESCRIPTION:
8
-
9
- Nokogiri (鋸) is an HTML, XML, SAX, and Reader parser.
10
-
11
- == FEATURES:
12
-
13
- * XPath support for document searching
14
- * CSS3 selector support for document searching
15
- * XML/HTML builder
16
- * Drop in replacement for Hpricot
17
-
18
- Nokogiri parses and searches XML/HTML faster than Hpricot, and also has
19
- correctly implemented CSS3 selector support as well as XPath support.
20
-
21
- * http://gist.github.com/18533
22
-
23
- Nokogiri also features an Hpricot compatibility layer to help ease the change
24
- to using correct CSS and XPath.
25
-
26
- == SYNOPSIS:
27
-
28
- require 'nokogiri'
29
- require 'open-uri'
30
-
31
- doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
32
-
33
- ####
34
- # Search for nodes by css
35
- doc.css('h3.r a.l').each do |link|
36
- puts link.content
37
- end
38
-
39
- ####
40
- # Search for nodes by xpath
41
- doc.xpath('//h3/a[@class="l"]').each do |link|
42
- puts link.content
43
- end
44
-
45
- ####
46
- # Or mix and match.
47
- doc.search('h3.r a.l', '//h3/a[@class="l"]').each do |link|
48
- puts link.content
49
- end
50
-
51
-
52
- == REQUIREMENTS:
53
-
54
- * ruby 1.8 or 1.9
55
- * libxml
56
-
57
- == INSTALL:
58
-
59
- * sudo gem install nokogiri
60
-
61
- == LICENSE:
62
-
63
- (The MIT License)
64
-
65
- Copyright (c) 2008:
66
-
67
- * {Aaron Patterson}[http://tenderlovemaking.com]
68
- * {Mike Dalessio}[http://mike.daless.io]
69
-
70
- Permission is hereby granted, free of charge, to any person obtaining
71
- a copy of this software and associated documentation files (the
72
- 'Software'), to deal in the Software without restriction, including
73
- without limitation the rights to use, copy, modify, merge, publish,
74
- distribute, sublicense, and/or sell copies of the Software, and to
75
- permit persons to whom the Software is furnished to do so, subject to
76
- the following conditions:
77
-
78
- The above copyright notice and this permission notice shall be
79
- included in all copies or substantial portions of the Software.
80
-
81
- THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
82
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
83
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
84
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
85
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
86
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
87
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,32 +0,0 @@
1
- #include <html_sax_parser.h>
2
-
3
- static VALUE native_parse_file(VALUE self, VALUE data, VALUE encoding)
4
- {
5
- xmlSAXHandlerPtr handler;
6
- Data_Get_Struct(self, xmlSAXHandler, handler);
7
- htmlSAXParseFile( StringValuePtr(data),
8
- (const char *)StringValuePtr(encoding),
9
- (htmlSAXHandlerPtr)handler,
10
- (void *)self );
11
- return data;
12
- }
13
-
14
- static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding)
15
- {
16
- xmlSAXHandlerPtr handler;
17
- Data_Get_Struct(self, xmlSAXHandler, handler);
18
- htmlSAXParseDoc( (xmlChar *)StringValuePtr(data),
19
- (const char *)StringValuePtr(encoding),
20
- (htmlSAXHandlerPtr)handler,
21
- (void *)self );
22
- return data;
23
- }
24
-
25
- VALUE cNokogiriHtmlSaxParser ;
26
- void init_html_sax_parser()
27
- {
28
- VALUE klass = cNokogiriHtmlSaxParser =
29
- rb_const_get(mNokogiriHtmlSax, rb_intern("Parser"));
30
- rb_define_private_method(klass, "native_parse_memory", native_parse_memory, 2);
31
- rb_define_private_method(klass, "native_parse_file", native_parse_file, 2);
32
- }
@@ -1,11 +0,0 @@
1
- #ifndef NOKOGIRI_HTML_SAX_PARSER
2
- #define NOKOGIRI_HTML_SAX_PARSER
3
-
4
- #include <native.h>
5
-
6
- void init_html_sax_parser();
7
-
8
- extern VALUE cNokogiriHtmlSaxParser ;
9
- #endif
10
-
11
-
@@ -1,40 +0,0 @@
1
- #include <native.h>
2
-
3
- VALUE mNokogiri ;
4
- VALUE mNokogiriXml ;
5
- VALUE mNokogiriHtml ;
6
- VALUE mNokogiriXslt ;
7
- VALUE mNokogiriXmlSax ;
8
- VALUE mNokogiriHtmlSax ;
9
-
10
- void Init_native()
11
- {
12
- mNokogiri = rb_const_get(rb_cObject, rb_intern("Nokogiri"));
13
- mNokogiriXml = rb_const_get(mNokogiri, rb_intern("XML"));
14
- mNokogiriHtml = rb_const_get(mNokogiri, rb_intern("HTML"));
15
- mNokogiriXslt = rb_const_get(mNokogiri, rb_intern("XSLT"));
16
- mNokogiriXmlSax = rb_const_get(mNokogiriXml, rb_intern("SAX"));
17
- mNokogiriHtmlSax = rb_const_get(mNokogiriHtml, rb_intern("SAX"));
18
-
19
- rb_const_set( mNokogiri,
20
- rb_intern("LIBXML_VERSION"),
21
- rb_str_new2(LIBXML_DOTTED_VERSION)
22
- );
23
-
24
- xmlSetStructuredErrorFunc(NULL, Nokogiri_error_handler);
25
-
26
- init_xml_document();
27
- init_html_document();
28
- init_xml_node();
29
- init_xml_text();
30
- init_xml_cdata();
31
- init_xml_node_set();
32
- init_xml_xpath_context();
33
- init_xml_xpath();
34
- init_xml_sax_parser();
35
- init_xml_reader();
36
- init_xml_dtd();
37
- init_html_sax_parser();
38
- init_xslt_stylesheet();
39
- init_xml_syntax_error();
40
- }
@@ -1,51 +0,0 @@
1
- #ifndef NOKOGIRI_NATIVE
2
- #define NOKOGIRI_NATIVE
3
-
4
- #include <stdlib.h>
5
- #include <ruby.h>
6
- #include <libxml/parser.h>
7
- #include <libxml/xpath.h>
8
- #include <libxml/xpathInternals.h>
9
- #include <libxml/xmlreader.h>
10
- #include <libxml/HTMLparser.h>
11
- #include <libxml/HTMLtree.h>
12
-
13
- #include <xml_document.h>
14
- #include <html_document.h>
15
- #include <xml_node.h>
16
- #include <xml_text.h>
17
- #include <xml_cdata.h>
18
- #include <xml_node_set.h>
19
- #include <xml_xpath.h>
20
- #include <xml_dtd.h>
21
- #include <xml_xpath_context.h>
22
- #include <xml_sax_parser.h>
23
- #include <xml_reader.h>
24
- #include <html_sax_parser.h>
25
- #include <xslt_stylesheet.h>
26
- #include <xml_syntax_error.h>
27
-
28
- extern VALUE mNokogiri ;
29
- extern VALUE mNokogiriXml ;
30
- extern VALUE mNokogiriXmlSax ;
31
- extern VALUE mNokogiriHtml ;
32
- extern VALUE mNokogiriHtmlSax ;
33
- extern VALUE mNokogiriXslt ;
34
-
35
- #ifdef DEBUG
36
-
37
- #define NOKOGIRI_DEBUG_START_NODE(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"\nnokogiri: %s:%d %p start node (%p %x %p/%s)\n", __FILE__, __LINE__, p, p->_private, p->type, p->name, p->name);
38
- #define NOKOGIRI_DEBUG_START_TEXT(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"\nnokogiri: %s:%d %p start node (%p %x %p/%s) '%s'\n", __FILE__, __LINE__, p, p->_private, p->type, p->name, p->name, p->content);
39
- #define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
40
- #define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p);
41
-
42
- #else
43
-
44
- #define NOKOGIRI_DEBUG_START_NODE(p)
45
- #define NOKOGIRI_DEBUG_START_TEXT(p)
46
- #define NOKOGIRI_DEBUG_START(p)
47
- #define NOKOGIRI_DEBUG_END(p)
48
-
49
- #endif
50
-
51
- #endif
@@ -1,46 +0,0 @@
1
- #include <xml_xpath.h>
2
-
3
- static void deallocate(xmlXPathObjectPtr xpath)
4
- {
5
- NOKOGIRI_DEBUG_START(xpath);
6
- xmlXPathFreeNodeSetList(xpath); // despite the name, this frees the xpath but not the contained node set
7
- NOKOGIRI_DEBUG_END(xpath);
8
- }
9
-
10
- VALUE Nokogiri_wrap_xml_xpath(xmlXPathObjectPtr xpath)
11
- {
12
- return Data_Wrap_Struct(cNokogiriXmlXpath, 0, deallocate, xpath);
13
- }
14
-
15
- /*
16
- * call-seq:
17
- * node_set
18
- *
19
- * Fetch the node set associated with this xpath context.
20
- */
21
- static VALUE node_set(VALUE self)
22
- {
23
- xmlXPathObjectPtr xpath;
24
- Data_Get_Struct(self, xmlXPathObject, xpath);
25
-
26
- if (xpath->nodesetval)
27
- return Nokogiri_wrap_xml_node_set(xpath->nodesetval);
28
-
29
- return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL));
30
- }
31
-
32
- VALUE cNokogiriXmlXpath;
33
- void init_xml_xpath(void)
34
- {
35
- VALUE module = rb_define_module("Nokogiri");
36
- VALUE xml = rb_define_module_under(module, "XML");
37
-
38
- /*
39
- * This class wraps an XPath object and should only be instantiated from
40
- * XPathContext.
41
- */
42
- VALUE klass = rb_define_class_under(xml, "XPath", rb_cObject);
43
-
44
- cNokogiriXmlXpath = klass;
45
- rb_define_method(klass, "node_set", node_set, 0);
46
- }
@@ -1,11 +0,0 @@
1
- #ifndef NOKOGIRI_XML_XPATH
2
- #define NOKOGIRI_XML_XPATH
3
-
4
- #include <native.h>
5
-
6
- void init_xml_xpath();
7
- VALUE Nokogiri_wrap_xml_xpath(xmlXPathObjectPtr xpath);
8
-
9
- extern VALUE cNokogiriXmlXpath;
10
- #endif
11
-