nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -0,0 +1,87 @@
1
+ #include <html_sax_push_parser.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * native_write(chunk, last_chunk)
6
+ *
7
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
8
+ */
9
+ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
10
+ {
11
+ xmlParserCtxtPtr ctx;
12
+ const char * chunk = NULL;
13
+ int size = 0;
14
+
15
+
16
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
17
+
18
+ if(Qnil != _chunk) {
19
+ chunk = StringValuePtr(_chunk);
20
+ size = (int)RSTRING_LEN(_chunk);
21
+ }
22
+
23
+ if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
24
+ if (!(ctx->options & XML_PARSE_RECOVER)) {
25
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
26
+ Nokogiri_error_raise(NULL, e);
27
+ }
28
+ }
29
+
30
+ return self;
31
+ }
32
+
33
+ /*
34
+ * call-seq:
35
+ * initialize_native(xml_sax, filename)
36
+ *
37
+ * Initialize the push parser with +xml_sax+ using +filename+
38
+ */
39
+ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
40
+ VALUE encoding)
41
+ {
42
+ htmlSAXHandlerPtr sax;
43
+ const char * filename = NULL;
44
+ htmlParserCtxtPtr ctx;
45
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
46
+
47
+ Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
48
+
49
+ if(_filename != Qnil) filename = StringValueCStr(_filename);
50
+
51
+ if (!NIL_P(encoding)) {
52
+ enc = xmlParseCharEncoding(StringValueCStr(encoding));
53
+ if (enc == XML_CHAR_ENCODING_ERROR)
54
+ rb_raise(rb_eArgError, "Unsupported Encoding");
55
+ }
56
+
57
+ ctx = htmlCreatePushParserCtxt(
58
+ sax,
59
+ NULL,
60
+ NULL,
61
+ 0,
62
+ filename,
63
+ enc
64
+ );
65
+ if(ctx == NULL)
66
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
67
+
68
+ ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
69
+
70
+ ctx->sax2 = 1;
71
+ DATA_PTR(self) = ctx;
72
+ return self;
73
+ }
74
+
75
+ VALUE cNokogiriHtmlSaxPushParser;
76
+ void init_html_sax_push_parser()
77
+ {
78
+ VALUE nokogiri = rb_define_module("Nokogiri");
79
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
80
+ VALUE sax = rb_define_module_under(html, "SAX");
81
+ VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
82
+
83
+ cNokogiriHtmlSaxPushParser = klass;
84
+
85
+ rb_define_private_method(klass, "initialize_native", initialize_native, 3);
86
+ rb_define_private_method(klass, "native_write", native_write, 2);
87
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_HTML_SAX_PUSH_PARSER
2
+ #define NOKOGIRI_HTML_SAX_PUSH_PARSER
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_html_sax_push_parser();
7
+
8
+ extern VALUE cNokogiriHtmlSaxPushParser ;
9
+ #endif
@@ -0,0 +1,145 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE mNokogiri ;
4
+ VALUE mNokogiriXml ;
5
+ VALUE mNokogiriHtml ;
6
+ VALUE mNokogiriXslt ;
7
+ VALUE mNokogiriXmlSax ;
8
+ VALUE mNokogiriHtmlSax ;
9
+
10
+ #ifdef USE_INCLUDED_VASPRINTF
11
+ /*
12
+ * I srsly hate windows. it doesn't have vasprintf.
13
+ * Thank you Geoffroy Couprie for this implementation of vasprintf!
14
+ */
15
+ int vasprintf (char **strp, const char *fmt, va_list ap)
16
+ {
17
+ /* Mingw32/64 have a broken vsnprintf implementation that fails when
18
+ * using a zero-byte limit in order to retrieve the required size for malloc.
19
+ * So we use a one byte buffer instead.
20
+ */
21
+ char tmp[1];
22
+ int len = vsnprintf (tmp, 1, fmt, ap) + 1;
23
+ char *res = (char *)malloc((unsigned int)len);
24
+ if (res == NULL)
25
+ return -1;
26
+ *strp = res;
27
+ return vsnprintf(res, (unsigned int)len, fmt, ap);
28
+ }
29
+ #endif
30
+
31
+ void vasprintf_free (void *p)
32
+ {
33
+ free(p);
34
+ }
35
+
36
+ #ifdef HAVE_RUBY_UTIL_H
37
+ #include "ruby/util.h"
38
+ #else
39
+ #ifndef __MACRUBY__
40
+ #include "util.h"
41
+ #endif
42
+ #endif
43
+
44
+ void nokogiri_root_node(xmlNodePtr node)
45
+ {
46
+ xmlDocPtr doc;
47
+ nokogiriTuplePtr tuple;
48
+
49
+ doc = node->doc;
50
+ if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
51
+ tuple = (nokogiriTuplePtr)doc->_private;
52
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
53
+ }
54
+
55
+ void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
56
+ {
57
+ nokogiriTuplePtr tuple;
58
+
59
+ if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
60
+ tuple = (nokogiriTuplePtr)doc->_private;
61
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
62
+ }
63
+
64
+ void Init_nokogiri()
65
+ {
66
+ #ifndef __MACRUBY__
67
+ xmlMemSetup(
68
+ (xmlFreeFunc)ruby_xfree,
69
+ (xmlMallocFunc)ruby_xmalloc,
70
+ (xmlReallocFunc)ruby_xrealloc,
71
+ ruby_strdup
72
+ );
73
+ #endif
74
+
75
+ mNokogiri = rb_define_module("Nokogiri");
76
+ mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
77
+ mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
78
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
79
+ mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
80
+ mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
81
+
82
+ rb_const_set( mNokogiri,
83
+ rb_intern("LIBXML_VERSION"),
84
+ NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
85
+ );
86
+ rb_const_set( mNokogiri,
87
+ rb_intern("LIBXML_PARSER_VERSION"),
88
+ NOKOGIRI_STR_NEW2(xmlParserVersion)
89
+ );
90
+
91
+ #ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
92
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
93
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
94
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
95
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
96
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
97
+ #else
98
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
99
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
100
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
101
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), Qnil);
102
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil);
103
+ #endif
104
+
105
+ #ifdef LIBXML_ICONV_ENABLED
106
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qtrue);
107
+ #else
108
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
109
+ #endif
110
+
111
+ xmlInitParser();
112
+
113
+ init_xml_document();
114
+ init_html_document();
115
+ init_xml_node();
116
+ init_xml_document_fragment();
117
+ init_xml_text();
118
+ init_xml_cdata();
119
+ init_xml_processing_instruction();
120
+ init_xml_attr();
121
+ init_xml_entity_reference();
122
+ init_xml_comment();
123
+ init_xml_node_set();
124
+ init_xml_xpath_context();
125
+ init_xml_sax_parser_context();
126
+ init_xml_sax_parser();
127
+ init_xml_sax_push_parser();
128
+ init_xml_reader();
129
+ init_xml_dtd();
130
+ init_xml_element_content();
131
+ init_xml_attribute_decl();
132
+ init_xml_element_decl();
133
+ init_xml_entity_decl();
134
+ init_xml_namespace();
135
+ init_html_sax_parser_context();
136
+ init_html_sax_push_parser();
137
+ init_xslt_stylesheet();
138
+ init_xml_syntax_error();
139
+ init_html_entity_lookup();
140
+ init_html_element_description();
141
+ init_xml_schema();
142
+ init_xml_relax_ng();
143
+ init_nokogiri_io();
144
+ init_xml_encoding_handler();
145
+ }
@@ -0,0 +1,131 @@
1
+ #ifndef NOKOGIRI_NATIVE
2
+ #define NOKOGIRI_NATIVE
3
+
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+ #include <assert.h>
7
+ #include <stdarg.h>
8
+
9
+ #ifdef USE_INCLUDED_VASPRINTF
10
+ int vasprintf (char **strp, const char *fmt, va_list ap);
11
+ #else
12
+
13
+ #define _GNU_SOURCE
14
+ # include <stdio.h>
15
+ #undef _GNU_SOURCE
16
+
17
+ #endif
18
+
19
+ #include <libxml/parser.h>
20
+ #include <libxml/entities.h>
21
+ #include <libxml/parserInternals.h>
22
+ #include <libxml/xpath.h>
23
+ #include <libxml/xpathInternals.h>
24
+ #include <libxml/xmlreader.h>
25
+ #include <libxml/xmlsave.h>
26
+ #include <libxml/xmlschemas.h>
27
+ #include <libxml/HTMLparser.h>
28
+ #include <libxml/HTMLtree.h>
29
+ #include <libxml/relaxng.h>
30
+ #include <libxml/xinclude.h>
31
+ #include <libxslt/extensions.h>
32
+ #include <libxml/c14n.h>
33
+ #include <ruby.h>
34
+ #include <ruby/st.h>
35
+ #include <ruby/encoding.h>
36
+
37
+ #ifndef UNUSED
38
+ # if defined(__GNUC__)
39
+ # define MAYBE_UNUSED(name) name __attribute__((unused))
40
+ # define UNUSED(name) MAYBE_UNUSED(UNUSED_ ## name)
41
+ # else
42
+ # define MAYBE_UNUSED(name) name
43
+ # define UNUSED(name) name
44
+ # endif
45
+ #endif
46
+
47
+ #ifndef NORETURN
48
+ # if defined(__GNUC__)
49
+ # define NORETURN(name) __attribute__((noreturn)) name
50
+ # else
51
+ # define NORETURN(name) name
52
+ # endif
53
+ #endif
54
+
55
+ #define NOKOGIRI_STR_NEW2(str) \
56
+ NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
57
+
58
+ #define NOKOGIRI_STR_NEW(str, len) \
59
+ rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
60
+
61
+ #define RBSTR_OR_QNIL(_str) \
62
+ (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
63
+
64
+ #include <xml_libxml2_hacks.h>
65
+
66
+ #include <xml_io.h>
67
+ #include <xml_document.h>
68
+ #include <html_entity_lookup.h>
69
+ #include <html_document.h>
70
+ #include <xml_node.h>
71
+ #include <xml_text.h>
72
+ #include <xml_cdata.h>
73
+ #include <xml_attr.h>
74
+ #include <xml_processing_instruction.h>
75
+ #include <xml_entity_reference.h>
76
+ #include <xml_document_fragment.h>
77
+ #include <xml_comment.h>
78
+ #include <xml_node_set.h>
79
+ #include <xml_dtd.h>
80
+ #include <xml_attribute_decl.h>
81
+ #include <xml_element_decl.h>
82
+ #include <xml_entity_decl.h>
83
+ #include <xml_xpath_context.h>
84
+ #include <xml_element_content.h>
85
+ #include <xml_sax_parser_context.h>
86
+ #include <xml_sax_parser.h>
87
+ #include <xml_sax_push_parser.h>
88
+ #include <xml_reader.h>
89
+ #include <html_sax_parser_context.h>
90
+ #include <html_sax_push_parser.h>
91
+ #include <xslt_stylesheet.h>
92
+ #include <xml_syntax_error.h>
93
+ #include <xml_schema.h>
94
+ #include <xml_relax_ng.h>
95
+ #include <html_element_description.h>
96
+ #include <xml_namespace.h>
97
+ #include <xml_encoding_handler.h>
98
+
99
+ extern VALUE mNokogiri ;
100
+ extern VALUE mNokogiriXml ;
101
+ extern VALUE mNokogiriXmlSax ;
102
+ extern VALUE mNokogiriHtml ;
103
+ extern VALUE mNokogiriHtmlSax ;
104
+ extern VALUE mNokogiriXslt ;
105
+
106
+ void nokogiri_root_node(xmlNodePtr);
107
+ void nokogiri_root_nsdef(xmlNsPtr, xmlDocPtr);
108
+
109
+ #ifdef DEBUG
110
+
111
+ #define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
112
+ #define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p);
113
+
114
+ #else
115
+
116
+ #define NOKOGIRI_DEBUG_START(p)
117
+ #define NOKOGIRI_DEBUG_END(p)
118
+
119
+ #endif
120
+
121
+ #ifndef __builtin_expect
122
+ # if defined(__GNUC__)
123
+ # define __builtin_expect(expr, c) __builtin_expect((long)(expr), (long)(c))
124
+ # endif
125
+ #endif
126
+
127
+ #define XMLNS_PREFIX "xmlns"
128
+ #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
129
+ #define XMLNS_BUFFER_LEN 128
130
+
131
+ #endif
@@ -0,0 +1,94 @@
1
+ #include <xml_attr.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * value=(content)
6
+ *
7
+ * Set the value for this Attr to +content+
8
+ */
9
+ static VALUE set_value(VALUE self, VALUE content)
10
+ {
11
+ xmlAttrPtr attr;
12
+ Data_Get_Struct(self, xmlAttr, attr);
13
+
14
+ if(attr->children) xmlFreeNodeList(attr->children);
15
+
16
+ attr->children = attr->last = NULL;
17
+
18
+ if(content) {
19
+ xmlChar *buffer;
20
+ xmlNode *tmp;
21
+
22
+ /* Encode our content */
23
+ buffer = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValueCStr(content));
24
+
25
+ attr->children = xmlStringGetNodeList(attr->doc, buffer);
26
+ attr->last = NULL;
27
+ tmp = attr->children;
28
+
29
+ /* Loop through the children */
30
+ for(tmp = attr->children; tmp; tmp = tmp->next) {
31
+ tmp->parent = (xmlNode *)attr;
32
+ tmp->doc = attr->doc;
33
+ if(tmp->next == NULL) attr->last = tmp;
34
+ }
35
+
36
+ /* Free up memory */
37
+ xmlFree(buffer);
38
+ }
39
+
40
+ return content;
41
+ }
42
+
43
+ /*
44
+ * call-seq:
45
+ * new(document, name)
46
+ *
47
+ * Create a new Attr element on the +document+ with +name+
48
+ */
49
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
50
+ {
51
+ xmlDocPtr xml_doc;
52
+ VALUE document;
53
+ VALUE name;
54
+ VALUE rest;
55
+ xmlAttrPtr node;
56
+ VALUE rb_node;
57
+
58
+ rb_scan_args(argc, argv, "2*", &document, &name, &rest);
59
+
60
+ Data_Get_Struct(document, xmlDoc, xml_doc);
61
+
62
+ node = xmlNewDocProp(
63
+ xml_doc,
64
+ (const xmlChar *)StringValueCStr(name),
65
+ NULL
66
+ );
67
+
68
+ nokogiri_root_node((xmlNodePtr)node);
69
+
70
+ rb_node = Nokogiri_wrap_xml_node(klass, (xmlNodePtr)node);
71
+ rb_obj_call_init(rb_node, argc, argv);
72
+
73
+ if(rb_block_given_p()) rb_yield(rb_node);
74
+
75
+ return rb_node;
76
+ }
77
+
78
+ VALUE cNokogiriXmlAttr;
79
+ void init_xml_attr()
80
+ {
81
+ VALUE nokogiri = rb_define_module("Nokogiri");
82
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
83
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
84
+
85
+ /*
86
+ * Attr represents a Attr node in an xml document.
87
+ */
88
+ VALUE klass = rb_define_class_under(xml, "Attr", node);
89
+
90
+ cNokogiriXmlAttr = klass;
91
+
92
+ rb_define_singleton_method(klass, "new", new, -1);
93
+ rb_define_method(klass, "value=", set_value, 1);
94
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTR
2
+ #define NOKOGIRI_XML_ATTR
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_attr();
7
+
8
+ extern VALUE cNokogiriXmlAttr;
9
+ #endif
@@ -0,0 +1,70 @@
1
+ #include <xml_attribute_decl.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * attribute_type
6
+ *
7
+ * The attribute_type for this AttributeDecl
8
+ */
9
+ static VALUE attribute_type(VALUE self)
10
+ {
11
+ xmlAttributePtr node;
12
+ Data_Get_Struct(self, xmlAttribute, node);
13
+ return INT2NUM((long)node->atype);
14
+ }
15
+
16
+ /*
17
+ * call-seq:
18
+ * default
19
+ *
20
+ * The default value
21
+ */
22
+ static VALUE default_value(VALUE self)
23
+ {
24
+ xmlAttributePtr node;
25
+ Data_Get_Struct(self, xmlAttribute, node);
26
+
27
+ if(node->defaultValue) return NOKOGIRI_STR_NEW2(node->defaultValue);
28
+ return Qnil;
29
+ }
30
+
31
+ /*
32
+ * call-seq:
33
+ * enumeration
34
+ *
35
+ * An enumeration of possible values
36
+ */
37
+ static VALUE enumeration(VALUE self)
38
+ {
39
+ xmlAttributePtr node;
40
+ xmlEnumerationPtr enm;
41
+ VALUE list;
42
+
43
+ Data_Get_Struct(self, xmlAttribute, node);
44
+
45
+ list = rb_ary_new();
46
+ enm = node->tree;
47
+
48
+ while(enm) {
49
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(enm->name));
50
+ enm = enm->next;
51
+ }
52
+
53
+ return list;
54
+ }
55
+
56
+ VALUE cNokogiriXmlAttributeDecl;
57
+
58
+ void init_xml_attribute_decl()
59
+ {
60
+ VALUE nokogiri = rb_define_module("Nokogiri");
61
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
62
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
63
+ VALUE klass = rb_define_class_under(xml, "AttributeDecl", node);
64
+
65
+ cNokogiriXmlAttributeDecl = klass;
66
+
67
+ rb_define_method(klass, "attribute_type", attribute_type, 0);
68
+ rb_define_method(klass, "default", default_value, 0);
69
+ rb_define_method(klass, "enumeration", enumeration, 0);
70
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTRIBUTE_DECL
2
+ #define NOKOGIRI_XML_ATTRIBUTE_DECL
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_attribute_decl();
7
+
8
+ extern VALUE cNokogiriXmlAttributeDecl;
9
+ #endif
@@ -1,33 +1,34 @@
1
1
  #include <xml_cdata.h>
2
2
 
3
- static void dealloc(xmlNodePtr node)
4
- {
5
- if (node->doc == NULL) {
6
- NOKOGIRI_DEBUG_START_NODE(node);
7
- xmlFreeNode(node);
8
- NOKOGIRI_DEBUG_END(node);
9
- }
10
- }
11
-
12
3
  /*
13
4
  * call-seq:
14
5
  * new(document, content)
15
6
  *
16
- * Create a new CData element on the +document+ with +content+
7
+ * Create a new CDATA element on the +document+ with +content+
17
8
  */
18
- static VALUE new(VALUE klass, VALUE doc, VALUE content)
9
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
19
10
  {
20
11
  xmlDocPtr xml_doc;
12
+ xmlNodePtr node;
13
+ VALUE doc;
14
+ VALUE content;
15
+ VALUE rest;
16
+ VALUE rb_node;
17
+
18
+ rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
19
+
21
20
  Data_Get_Struct(doc, xmlDoc, xml_doc);
22
21
 
23
- xmlNodePtr node = xmlNewCDataBlock(
24
- xml_doc,
25
- (const xmlChar *)StringValuePtr(content),
26
- NUM2INT(rb_funcall(content, rb_intern("length"), 0))
22
+ node = xmlNewCDataBlock(
23
+ xml_doc->doc,
24
+ NIL_P(content) ? NULL : (const xmlChar *)StringValuePtr(content),
25
+ NIL_P(content) ? 0 : (int)RSTRING_LEN(content)
27
26
  );
28
27
 
29
- VALUE rb_node = Data_Wrap_Struct(klass, NULL, dealloc, node);
30
- node->_private = (void *)rb_node;
28
+ nokogiri_root_node(node);
29
+
30
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
31
+ rb_obj_call_init(rb_node, argc, argv);
31
32
 
32
33
  if(rb_block_given_p()) rb_yield(rb_node);
33
34
 
@@ -39,14 +40,17 @@ void init_xml_cdata()
39
40
  {
40
41
  VALUE nokogiri = rb_define_module("Nokogiri");
41
42
  VALUE xml = rb_define_module_under(nokogiri, "XML");
43
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
44
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
45
+ VALUE text = rb_define_class_under(xml, "Text", char_data);
42
46
 
43
47
  /*
44
48
  * CData represents a CData node in an xml document.
45
49
  */
46
- VALUE klass = rb_define_class_under(xml, "CData", cNokogiriXmlNode);
50
+ VALUE klass = rb_define_class_under(xml, "CDATA", text);
47
51
 
48
52
 
49
53
  cNokogiriXmlCData = klass;
50
54
 
51
- rb_define_singleton_method(klass, "new", new, 2);
55
+ rb_define_singleton_method(klass, "new", new, -1);
52
56
  }
@@ -1,7 +1,7 @@
1
1
  #ifndef NOKOGIRI_XML_CDATA
2
2
  #define NOKOGIRI_XML_CDATA
3
3
 
4
- #include <native.h>
4
+ #include <nokogiri.h>
5
5
 
6
6
  void init_xml_cdata();
7
7
 
@@ -0,0 +1,69 @@
1
+ #include <xml_comment.h>
2
+
3
+ static ID document_id ;
4
+
5
+ /*
6
+ * call-seq:
7
+ * new(document_or_node, content)
8
+ *
9
+ * Create a new Comment element on the +document+ with +content+.
10
+ * Alternatively, if a +node+ is passed, the +node+'s document is used.
11
+ */
12
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
13
+ {
14
+ xmlDocPtr xml_doc;
15
+ xmlNodePtr node;
16
+ VALUE document;
17
+ VALUE content;
18
+ VALUE rest;
19
+ VALUE rb_node;
20
+
21
+ rb_scan_args(argc, argv, "2*", &document, &content, &rest);
22
+
23
+ if (rb_obj_is_kind_of(document, cNokogiriXmlNode))
24
+ {
25
+ document = rb_funcall(document, document_id, 0);
26
+ }
27
+ else if ( !rb_obj_is_kind_of(document, cNokogiriXmlDocument)
28
+ && !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment))
29
+ {
30
+ rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
+ }
32
+
33
+ Data_Get_Struct(document, xmlDoc, xml_doc);
34
+
35
+ node = xmlNewDocComment(
36
+ xml_doc,
37
+ (const xmlChar *)StringValueCStr(content)
38
+ );
39
+
40
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
41
+ rb_obj_call_init(rb_node, argc, argv);
42
+
43
+ nokogiri_root_node(node);
44
+
45
+ if(rb_block_given_p()) rb_yield(rb_node);
46
+
47
+ return rb_node;
48
+ }
49
+
50
+ VALUE cNokogiriXmlComment;
51
+ void init_xml_comment()
52
+ {
53
+ VALUE nokogiri = rb_define_module("Nokogiri");
54
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
55
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
56
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
57
+
58
+ /*
59
+ * Comment represents a comment node in an xml document.
60
+ */
61
+ VALUE klass = rb_define_class_under(xml, "Comment", char_data);
62
+
63
+
64
+ cNokogiriXmlComment = klass;
65
+
66
+ rb_define_singleton_method(klass, "new", new, -1);
67
+
68
+ document_id = rb_intern("document");
69
+ }