nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -0,0 +1,161 @@
1
+ #include <xml_relax_ng.h>
2
+
3
+ static void dealloc(xmlRelaxNGPtr schema)
4
+ {
5
+ NOKOGIRI_DEBUG_START(schema);
6
+ xmlRelaxNGFree(schema);
7
+ NOKOGIRI_DEBUG_END(schema);
8
+ }
9
+
10
+ /*
11
+ * call-seq:
12
+ * validate_document(document)
13
+ *
14
+ * Validate a Nokogiri::XML::Document against this RelaxNG schema.
15
+ */
16
+ static VALUE validate_document(VALUE self, VALUE document)
17
+ {
18
+ xmlDocPtr doc;
19
+ xmlRelaxNGPtr schema;
20
+ VALUE errors;
21
+ xmlRelaxNGValidCtxtPtr valid_ctxt;
22
+
23
+ Data_Get_Struct(self, xmlRelaxNG, schema);
24
+ Data_Get_Struct(document, xmlDoc, doc);
25
+
26
+ errors = rb_ary_new();
27
+
28
+ valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
29
+
30
+ if(NULL == valid_ctxt) {
31
+ /* we have a problem */
32
+ rb_raise(rb_eRuntimeError, "Could not create a validation context");
33
+ }
34
+
35
+ #ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
36
+ xmlRelaxNGSetValidStructuredErrors(
37
+ valid_ctxt,
38
+ Nokogiri_error_array_pusher,
39
+ (void *)errors
40
+ );
41
+ #endif
42
+
43
+ xmlRelaxNGValidateDoc(valid_ctxt, doc);
44
+
45
+ xmlRelaxNGFreeValidCtxt(valid_ctxt);
46
+
47
+ return errors;
48
+ }
49
+
50
+ /*
51
+ * call-seq:
52
+ * read_memory(string)
53
+ *
54
+ * Create a new RelaxNG from the contents of +string+
55
+ */
56
+ static VALUE read_memory(VALUE klass, VALUE content)
57
+ {
58
+ xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
59
+ (const char *)StringValuePtr(content),
60
+ (int)RSTRING_LEN(content)
61
+ );
62
+ xmlRelaxNGPtr schema;
63
+ VALUE errors = rb_ary_new();
64
+ VALUE rb_schema;
65
+
66
+ xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
67
+
68
+ #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
69
+ xmlRelaxNGSetParserStructuredErrors(
70
+ ctx,
71
+ Nokogiri_error_array_pusher,
72
+ (void *)errors
73
+ );
74
+ #endif
75
+
76
+ schema = xmlRelaxNGParse(ctx);
77
+
78
+ xmlSetStructuredErrorFunc(NULL, NULL);
79
+ xmlRelaxNGFreeParserCtxt(ctx);
80
+
81
+ if(NULL == schema) {
82
+ xmlErrorPtr error = xmlGetLastError();
83
+ if(error)
84
+ Nokogiri_error_raise(NULL, error);
85
+ else
86
+ rb_raise(rb_eRuntimeError, "Could not parse document");
87
+
88
+ return Qnil;
89
+ }
90
+
91
+ rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
92
+ rb_iv_set(rb_schema, "@errors", errors);
93
+
94
+ return rb_schema;
95
+ }
96
+
97
+ /*
98
+ * call-seq:
99
+ * from_document(doc)
100
+ *
101
+ * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
102
+ */
103
+ static VALUE from_document(VALUE klass, VALUE document)
104
+ {
105
+ xmlDocPtr doc;
106
+ xmlRelaxNGParserCtxtPtr ctx;
107
+ xmlRelaxNGPtr schema;
108
+ VALUE errors;
109
+ VALUE rb_schema;
110
+
111
+ Data_Get_Struct(document, xmlDoc, doc);
112
+
113
+ /* In case someone passes us a node. ugh. */
114
+ doc = doc->doc;
115
+
116
+ ctx = xmlRelaxNGNewDocParserCtxt(doc);
117
+
118
+ errors = rb_ary_new();
119
+ xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
120
+
121
+ #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
122
+ xmlRelaxNGSetParserStructuredErrors(
123
+ ctx,
124
+ Nokogiri_error_array_pusher,
125
+ (void *)errors
126
+ );
127
+ #endif
128
+
129
+ schema = xmlRelaxNGParse(ctx);
130
+
131
+ xmlSetStructuredErrorFunc(NULL, NULL);
132
+
133
+ if(NULL == schema) {
134
+ xmlErrorPtr error = xmlGetLastError();
135
+ if(error)
136
+ Nokogiri_error_raise(NULL, error);
137
+ else
138
+ rb_raise(rb_eRuntimeError, "Could not parse document");
139
+
140
+ return Qnil;
141
+ }
142
+
143
+ rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
144
+ rb_iv_set(rb_schema, "@errors", errors);
145
+
146
+ return rb_schema;
147
+ }
148
+
149
+ VALUE cNokogiriXmlRelaxNG;
150
+ void init_xml_relax_ng()
151
+ {
152
+ VALUE nokogiri = rb_define_module("Nokogiri");
153
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
154
+ VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
155
+
156
+ cNokogiriXmlRelaxNG = klass;
157
+
158
+ rb_define_singleton_method(klass, "read_memory", read_memory, 1);
159
+ rb_define_singleton_method(klass, "from_document", from_document, 1);
160
+ rb_define_private_method(klass, "validate_document", validate_document, 1);
161
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_RELAX_NG
2
+ #define NOKOGIRI_XML_RELAX_NG
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_relax_ng();
7
+
8
+ extern VALUE cNokogiriXmlRelaxNG;
9
+ #endif
@@ -1,135 +1,253 @@
1
- #define _GNU_SOURCE
2
- #include <stdio.h>
3
1
  #include <xml_sax_parser.h>
4
2
 
5
- /*
6
- * call-seq:
7
- * parse_memory(data)
8
- *
9
- * Parse the document stored in +data+
10
- */
11
- static VALUE parse_memory(VALUE self, VALUE data)
12
- {
13
- xmlSAXHandlerPtr handler;
14
- Data_Get_Struct(self, xmlSAXHandler, handler);
15
- xmlSAXUserParseMemory( handler,
16
- (void *)self,
17
- StringValuePtr(data),
18
- NUM2INT(rb_funcall(data, rb_intern("length"), 0))
19
- );
20
- return data;
21
- }
3
+ int vasprintf (char **strp, const char *fmt, va_list ap);
4
+ void vasprintf_free (void *p);
22
5
 
23
- static VALUE native_parse_file(VALUE self, VALUE data)
24
- {
25
- xmlSAXHandlerPtr handler;
26
- Data_Get_Struct(self, xmlSAXHandler, handler);
27
- xmlSAXUserParseFile( handler,
28
- (void *)self,
29
- StringValuePtr(data)
30
- );
31
- return data;
32
- }
6
+ static ID id_start_document, id_end_document, id_start_element, id_end_element;
7
+ static ID id_start_element_namespace, id_end_element_namespace;
8
+ static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
9
+ static ID id_cdata_block, id_cAttribute;
10
+ static ID id_processing_instruction;
33
11
 
34
12
  static void start_document(void * ctx)
35
13
  {
36
- VALUE self = (VALUE)ctx;
37
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
38
- rb_funcall(doc, rb_intern("start_document"), 0);
14
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
15
+ VALUE doc = rb_iv_get(self, "@document");
16
+
17
+ xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
18
+
19
+ if(NULL != ctxt && ctxt->html != 1) {
20
+ if(ctxt->standalone != -1) { /* -1 means there was no declaration */
21
+ VALUE encoding = Qnil ;
22
+ if (ctxt->encoding) {
23
+ encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
24
+ } else if (ctxt->input && ctxt->input->encoding) {
25
+ encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
26
+ }
27
+
28
+ VALUE version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
29
+
30
+ VALUE standalone = Qnil;
31
+ switch(ctxt->standalone)
32
+ {
33
+ case 0:
34
+ standalone = NOKOGIRI_STR_NEW2("no");
35
+ break;
36
+ case 1:
37
+ standalone = NOKOGIRI_STR_NEW2("yes");
38
+ break;
39
+ }
40
+
41
+ rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
42
+ }
43
+ }
44
+
45
+ rb_funcall(doc, id_start_document, 0);
39
46
  }
40
47
 
41
48
  static void end_document(void * ctx)
42
49
  {
43
- VALUE self = (VALUE)ctx;
44
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
45
- rb_funcall(doc, rb_intern("end_document"), 0);
50
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
51
+ VALUE doc = rb_iv_get(self, "@document");
52
+ rb_funcall(doc, id_end_document, 0);
46
53
  }
47
54
 
48
55
  static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
49
56
  {
50
- VALUE self = (VALUE)ctx;
51
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
57
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
58
+ VALUE doc = rb_iv_get(self, "@document");
52
59
  VALUE attributes = rb_ary_new();
53
60
  const xmlChar * attr;
54
61
  int i = 0;
55
62
  if(atts) {
56
63
  while((attr = atts[i]) != NULL) {
57
- rb_funcall(attributes, rb_intern("<<"), 1, rb_str_new2((const char *)attr));
58
- i++;
64
+ const xmlChar * val = atts[i+1];
65
+ VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
66
+ rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
67
+ i+=2;
59
68
  }
60
69
  }
61
70
 
62
71
  rb_funcall( doc,
63
- rb_intern("start_element"),
72
+ id_start_element,
64
73
  2,
65
- rb_str_new2((const char *)name),
74
+ NOKOGIRI_STR_NEW2(name),
66
75
  attributes
67
76
  );
68
77
  }
69
78
 
70
79
  static void end_element(void * ctx, const xmlChar *name)
71
80
  {
72
- VALUE self = (VALUE)ctx;
73
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
74
- rb_funcall(doc, rb_intern("end_element"), 1, rb_str_new2((const char *)name));
81
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
82
+ VALUE doc = rb_iv_get(self, "@document");
83
+ rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
84
+ }
85
+
86
+ static VALUE attributes_as_list(
87
+ VALUE self,
88
+ int nb_attributes,
89
+ const xmlChar ** attributes)
90
+ {
91
+ VALUE list = rb_ary_new2((long)nb_attributes);
92
+
93
+ VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
94
+ if (attributes) {
95
+ /* Each attribute is an array of [localname, prefix, URI, value, end] */
96
+ int i;
97
+ for (i = 0; i < nb_attributes * 5; i += 5) {
98
+ VALUE argv[4], attribute;
99
+
100
+ argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
101
+ argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
102
+ argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
103
+
104
+ /* value */
105
+ argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
106
+ (attributes[i+4] - attributes[i+3]));
107
+
108
+ attribute = rb_class_new_instance(4, argv, attr_klass);
109
+ rb_ary_push(list, attribute);
110
+ }
111
+ }
112
+
113
+ return list;
114
+ }
115
+
116
+ static void
117
+ start_element_ns (
118
+ void * ctx,
119
+ const xmlChar * localname,
120
+ const xmlChar * prefix,
121
+ const xmlChar * uri,
122
+ int nb_namespaces,
123
+ const xmlChar ** namespaces,
124
+ int nb_attributes,
125
+ int nb_defaulted,
126
+ const xmlChar ** attributes)
127
+ {
128
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
129
+ VALUE doc = rb_iv_get(self, "@document");
130
+
131
+ VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
132
+
133
+ VALUE ns_list = rb_ary_new2((long)nb_namespaces);
134
+
135
+ if (namespaces) {
136
+ int i;
137
+ for (i = 0; i < nb_namespaces * 2; i += 2)
138
+ {
139
+ rb_ary_push(ns_list,
140
+ rb_ary_new3((long)2,
141
+ RBSTR_OR_QNIL(namespaces[i + 0]),
142
+ RBSTR_OR_QNIL(namespaces[i + 1])
143
+ )
144
+ );
145
+ }
146
+ }
147
+
148
+ rb_funcall( doc,
149
+ id_start_element_namespace,
150
+ 5,
151
+ NOKOGIRI_STR_NEW2(localname),
152
+ attribute_list,
153
+ RBSTR_OR_QNIL(prefix),
154
+ RBSTR_OR_QNIL(uri),
155
+ ns_list
156
+ );
157
+ }
158
+
159
+ /**
160
+ * end_element_ns was borrowed heavily from libxml-ruby.
161
+ */
162
+ static void
163
+ end_element_ns (
164
+ void * ctx,
165
+ const xmlChar * localname,
166
+ const xmlChar * prefix,
167
+ const xmlChar * uri)
168
+ {
169
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
170
+ VALUE doc = rb_iv_get(self, "@document");
171
+
172
+ rb_funcall(doc, id_end_element_namespace, 3,
173
+ NOKOGIRI_STR_NEW2(localname),
174
+ RBSTR_OR_QNIL(prefix),
175
+ RBSTR_OR_QNIL(uri)
176
+ );
75
177
  }
76
178
 
77
179
  static void characters_func(void * ctx, const xmlChar * ch, int len)
78
180
  {
79
- VALUE self = (VALUE)ctx;
80
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
81
- VALUE str = rb_str_new((const char *)ch, (long)len);
82
- rb_funcall(doc, rb_intern("characters"), 1, str);
181
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
182
+ VALUE doc = rb_iv_get(self, "@document");
183
+ VALUE str = NOKOGIRI_STR_NEW(ch, len);
184
+ rb_funcall(doc, id_characters, 1, str);
83
185
  }
84
186
 
85
187
  static void comment_func(void * ctx, const xmlChar * value)
86
188
  {
87
- VALUE self = (VALUE)ctx;
88
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
89
- VALUE str = rb_str_new2((const char *)value);
90
- rb_funcall(doc, rb_intern("comment"), 1, str);
189
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
190
+ VALUE doc = rb_iv_get(self, "@document");
191
+ VALUE str = NOKOGIRI_STR_NEW2(value);
192
+ rb_funcall(doc, id_comment, 1, str);
91
193
  }
92
194
 
93
- #ifndef XP_WIN
94
195
  static void warning_func(void * ctx, const char *msg, ...)
95
196
  {
96
- VALUE self = (VALUE)ctx;
97
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
197
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
198
+ VALUE doc = rb_iv_get(self, "@document");
98
199
  char * message;
200
+ VALUE ruby_message;
99
201
 
100
202
  va_list args;
101
203
  va_start(args, msg);
102
204
  vasprintf(&message, msg, args);
103
205
  va_end(args);
104
206
 
105
- rb_funcall(doc, rb_intern("warning"), 1, rb_str_new2(message));
106
- free(message);
207
+ ruby_message = NOKOGIRI_STR_NEW2(message);
208
+ vasprintf_free(message);
209
+ rb_funcall(doc, id_warning, 1, ruby_message);
107
210
  }
108
- #endif
109
211
 
110
- #ifndef XP_WIN
111
212
  static void error_func(void * ctx, const char *msg, ...)
112
213
  {
113
- VALUE self = (VALUE)ctx;
114
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
214
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
215
+ VALUE doc = rb_iv_get(self, "@document");
115
216
  char * message;
217
+ VALUE ruby_message;
116
218
 
117
219
  va_list args;
118
220
  va_start(args, msg);
119
221
  vasprintf(&message, msg, args);
120
222
  va_end(args);
121
223
 
122
- rb_funcall(doc, rb_intern("error"), 1, rb_str_new2(message));
123
- free(message);
224
+ ruby_message = NOKOGIRI_STR_NEW2(message);
225
+ vasprintf_free(message);
226
+ rb_funcall(doc, id_error, 1, ruby_message);
124
227
  }
125
- #endif
126
228
 
127
229
  static void cdata_block(void * ctx, const xmlChar * value, int len)
128
230
  {
129
- VALUE self = (VALUE)ctx;
130
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
131
- VALUE string = rb_str_new((const char *)value, (long)len);
132
- rb_funcall(doc, rb_intern("cdata_block"), 1, string);
231
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
232
+ VALUE doc = rb_iv_get(self, "@document");
233
+ VALUE string = NOKOGIRI_STR_NEW(value, len);
234
+ rb_funcall(doc, id_cdata_block, 1, string);
235
+ }
236
+
237
+ static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
238
+ {
239
+ VALUE rb_content;
240
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
241
+ VALUE doc = rb_iv_get(self, "@document");
242
+
243
+ rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
244
+
245
+ rb_funcall( doc,
246
+ id_processing_instruction,
247
+ 2,
248
+ NOKOGIRI_STR_NEW2(name),
249
+ rb_content
250
+ );
133
251
  }
134
252
 
135
253
  static void deallocate(xmlSAXHandlerPtr handler)
@@ -141,24 +259,23 @@ static void deallocate(xmlSAXHandlerPtr handler)
141
259
 
142
260
  static VALUE allocate(VALUE klass)
143
261
  {
144
- xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
262
+ xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
263
+
264
+ xmlSetStructuredErrorFunc(NULL, NULL);
145
265
 
146
266
  handler->startDocument = start_document;
147
267
  handler->endDocument = end_document;
148
268
  handler->startElement = start_element;
149
269
  handler->endElement = end_element;
270
+ handler->startElementNs = start_element_ns;
271
+ handler->endElementNs = end_element_ns;
150
272
  handler->characters = characters_func;
151
273
  handler->comment = comment_func;
152
- #ifndef XP_WIN
153
- /*
154
- * The va*functions aren't in ming, and I don't want to deal with
155
- * it right now.....
156
- *
157
- */
158
274
  handler->warning = warning_func;
159
275
  handler->error = error_func;
160
- #endif
161
276
  handler->cdataBlock = cdata_block;
277
+ handler->processingInstruction = processing_instruction;
278
+ handler->initialized = XML_SAX2_MAGIC;
162
279
 
163
280
  return Data_Wrap_Struct(klass, NULL, deallocate, handler);
164
281
  }
@@ -166,9 +283,27 @@ static VALUE allocate(VALUE klass)
166
283
  VALUE cNokogiriXmlSaxParser ;
167
284
  void init_xml_sax_parser()
168
285
  {
169
- VALUE klass = cNokogiriXmlSaxParser =
170
- rb_const_get(mNokogiriXmlSax, rb_intern("Parser"));
286
+ VALUE nokogiri = rb_define_module("Nokogiri");
287
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
288
+ VALUE sax = rb_define_module_under(xml, "SAX");
289
+ VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
290
+
291
+ cNokogiriXmlSaxParser = klass;
292
+
171
293
  rb_define_alloc_func(klass, allocate);
172
- rb_define_method(klass, "parse_memory", parse_memory, 1);
173
- rb_define_private_method(klass, "native_parse_file", native_parse_file, 1);
294
+
295
+ id_start_document = rb_intern("start_document");
296
+ id_end_document = rb_intern("end_document");
297
+ id_start_element = rb_intern("start_element");
298
+ id_end_element = rb_intern("end_element");
299
+ id_comment = rb_intern("comment");
300
+ id_characters = rb_intern("characters");
301
+ id_xmldecl = rb_intern("xmldecl");
302
+ id_error = rb_intern("error");
303
+ id_warning = rb_intern("warning");
304
+ id_cdata_block = rb_intern("cdata_block");
305
+ id_cAttribute = rb_intern("Attribute");
306
+ id_start_element_namespace = rb_intern("start_element_namespace");
307
+ id_end_element_namespace = rb_intern("end_element_namespace");
308
+ id_processing_instruction = rb_intern("processing_instruction");
174
309
  }
@@ -1,10 +1,39 @@
1
1
  #ifndef NOKOGIRI_XML_SAX_PARSER
2
2
  #define NOKOGIRI_XML_SAX_PARSER
3
3
 
4
- #include <native.h>
4
+ #include <nokogiri.h>
5
5
 
6
6
  void init_xml_sax_parser();
7
7
 
8
8
  extern VALUE cNokogiriXmlSaxParser ;
9
+
10
+ typedef struct _nokogiriSAXTuple {
11
+ xmlParserCtxtPtr ctxt;
12
+ VALUE self;
13
+ } nokogiriSAXTuple;
14
+
15
+ typedef nokogiriSAXTuple * nokogiriSAXTuplePtr;
16
+
17
+ #define NOKOGIRI_SAX_SELF(_ctxt) \
18
+ ((nokogiriSAXTuplePtr)(_ctxt))->self
19
+
20
+ #define NOKOGIRI_SAX_CTXT(_ctxt) \
21
+ ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
22
+
23
+ #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) \
24
+ nokogiri_sax_tuple_new(_ctxt, _self)
25
+
26
+ static inline nokogiriSAXTuplePtr
27
+ nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
28
+ {
29
+ nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple));
30
+ tuple->self = self;
31
+ tuple->ctxt = ctxt;
32
+ return tuple;
33
+ }
34
+
35
+ #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) \
36
+ free(_tuple) \
37
+
9
38
  #endif
10
39