nokogiri 1.3.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (256) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +233 -0
  3. data/CHANGELOG.rdoc +222 -0
  4. data/Manifest.txt +247 -0
  5. data/README.ja.rdoc +103 -0
  6. data/README.rdoc +117 -0
  7. data/Rakefile +205 -0
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +89 -0
  10. data/ext/nokogiri/html_document.c +183 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +30 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser.c +57 -0
  17. data/ext/nokogiri/html_sax_parser.h +11 -0
  18. data/ext/nokogiri/iconv.dll +0 -0
  19. data/ext/nokogiri/libexslt.dll +0 -0
  20. data/ext/nokogiri/libxml2.dll +0 -0
  21. data/ext/nokogiri/libxslt.dll +0 -0
  22. data/ext/nokogiri/nokogiri.c +81 -0
  23. data/ext/nokogiri/nokogiri.h +149 -0
  24. data/ext/nokogiri/xml_attr.c +92 -0
  25. data/ext/nokogiri/xml_attr.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +53 -0
  27. data/ext/nokogiri/xml_cdata.h +9 -0
  28. data/ext/nokogiri/xml_comment.c +51 -0
  29. data/ext/nokogiri/xml_comment.h +9 -0
  30. data/ext/nokogiri/xml_document.c +308 -0
  31. data/ext/nokogiri/xml_document.h +21 -0
  32. data/ext/nokogiri/xml_document_fragment.c +48 -0
  33. data/ext/nokogiri/xml_document_fragment.h +10 -0
  34. data/ext/nokogiri/xml_dtd.c +102 -0
  35. data/ext/nokogiri/xml_dtd.h +8 -0
  36. data/ext/nokogiri/xml_entity_reference.c +50 -0
  37. data/ext/nokogiri/xml_entity_reference.h +9 -0
  38. data/ext/nokogiri/xml_io.c +24 -0
  39. data/ext/nokogiri/xml_io.h +10 -0
  40. data/ext/nokogiri/xml_namespace.c +69 -0
  41. data/ext/nokogiri/xml_namespace.h +12 -0
  42. data/ext/nokogiri/xml_node.c +928 -0
  43. data/ext/nokogiri/xml_node.h +14 -0
  44. data/ext/nokogiri/xml_node_set.c +386 -0
  45. data/ext/nokogiri/xml_node_set.h +9 -0
  46. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  47. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  48. data/ext/nokogiri/xml_reader.c +572 -0
  49. data/ext/nokogiri/xml_reader.h +10 -0
  50. data/ext/nokogiri/xml_relax_ng.c +106 -0
  51. data/ext/nokogiri/xml_relax_ng.h +9 -0
  52. data/ext/nokogiri/xml_sax_parser.c +336 -0
  53. data/ext/nokogiri/xml_sax_parser.h +10 -0
  54. data/ext/nokogiri/xml_sax_push_parser.c +86 -0
  55. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  56. data/ext/nokogiri/xml_schema.c +107 -0
  57. data/ext/nokogiri/xml_schema.h +9 -0
  58. data/ext/nokogiri/xml_syntax_error.c +203 -0
  59. data/ext/nokogiri/xml_syntax_error.h +12 -0
  60. data/ext/nokogiri/xml_text.c +47 -0
  61. data/ext/nokogiri/xml_text.h +9 -0
  62. data/ext/nokogiri/xml_xpath.c +53 -0
  63. data/ext/nokogiri/xml_xpath.h +11 -0
  64. data/ext/nokogiri/xml_xpath_context.c +252 -0
  65. data/ext/nokogiri/xml_xpath_context.h +9 -0
  66. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  67. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  68. data/ext/nokogiri/zlib1.dll +0 -0
  69. data/lib/action-nokogiri.rb +36 -0
  70. data/lib/nokogiri.rb +110 -0
  71. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  72. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  73. data/lib/nokogiri/css.rb +25 -0
  74. data/lib/nokogiri/css/generated_parser.rb +748 -0
  75. data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
  76. data/lib/nokogiri/css/node.rb +107 -0
  77. data/lib/nokogiri/css/parser.rb +82 -0
  78. data/lib/nokogiri/css/parser.y +227 -0
  79. data/lib/nokogiri/css/syntax_error.rb +7 -0
  80. data/lib/nokogiri/css/tokenizer.rb +11 -0
  81. data/lib/nokogiri/css/tokenizer.rex +54 -0
  82. data/lib/nokogiri/css/xpath_visitor.rb +172 -0
  83. data/lib/nokogiri/decorators.rb +2 -0
  84. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  85. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  86. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  87. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
  88. data/lib/nokogiri/decorators/slop.rb +33 -0
  89. data/lib/nokogiri/ffi/html/document.rb +37 -0
  90. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  91. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  92. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  93. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  94. data/lib/nokogiri/ffi/libxml.rb +314 -0
  95. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  96. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  97. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  98. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  102. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  103. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  105. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  106. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  107. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  108. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  109. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  110. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  111. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  112. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  113. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  114. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  115. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  117. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  118. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  119. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  120. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  121. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  122. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  123. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  124. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  125. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  126. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  127. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  128. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  129. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  130. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  131. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  132. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  133. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  134. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  135. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  136. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  137. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  138. data/lib/nokogiri/hpricot.rb +62 -0
  139. data/lib/nokogiri/html.rb +34 -0
  140. data/lib/nokogiri/html/builder.rb +35 -0
  141. data/lib/nokogiri/html/document.rb +71 -0
  142. data/lib/nokogiri/html/document_fragment.rb +15 -0
  143. data/lib/nokogiri/html/element_description.rb +23 -0
  144. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  145. data/lib/nokogiri/html/sax/parser.rb +47 -0
  146. data/lib/nokogiri/nokogiri.rb +1 -0
  147. data/lib/nokogiri/syntax_error.rb +4 -0
  148. data/lib/nokogiri/version.rb +29 -0
  149. data/lib/nokogiri/version_warning.rb +11 -0
  150. data/lib/nokogiri/xml.rb +62 -0
  151. data/lib/nokogiri/xml/attr.rb +9 -0
  152. data/lib/nokogiri/xml/builder.rb +254 -0
  153. data/lib/nokogiri/xml/cdata.rb +11 -0
  154. data/lib/nokogiri/xml/document.rb +100 -0
  155. data/lib/nokogiri/xml/document_fragment.rb +49 -0
  156. data/lib/nokogiri/xml/dtd.rb +11 -0
  157. data/lib/nokogiri/xml/entity_declaration.rb +11 -0
  158. data/lib/nokogiri/xml/fragment_handler.rb +55 -0
  159. data/lib/nokogiri/xml/namespace.rb +7 -0
  160. data/lib/nokogiri/xml/node.rb +745 -0
  161. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  162. data/lib/nokogiri/xml/node_set.rb +238 -0
  163. data/lib/nokogiri/xml/notation.rb +6 -0
  164. data/lib/nokogiri/xml/parse_options.rb +80 -0
  165. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  166. data/lib/nokogiri/xml/reader.rb +66 -0
  167. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  168. data/lib/nokogiri/xml/sax.rb +3 -0
  169. data/lib/nokogiri/xml/sax/document.rb +143 -0
  170. data/lib/nokogiri/xml/sax/parser.rb +101 -0
  171. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  172. data/lib/nokogiri/xml/schema.rb +65 -0
  173. data/lib/nokogiri/xml/syntax_error.rb +34 -0
  174. data/lib/nokogiri/xml/xpath.rb +10 -0
  175. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  176. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  177. data/lib/nokogiri/xslt.rb +48 -0
  178. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  179. data/lib/xsd/xmlparser/nokogiri.rb +64 -0
  180. data/tasks/test.rb +161 -0
  181. data/test/css/test_nthiness.rb +160 -0
  182. data/test/css/test_parser.rb +277 -0
  183. data/test/css/test_tokenizer.rb +176 -0
  184. data/test/css/test_xpath_visitor.rb +76 -0
  185. data/test/ffi/test_document.rb +35 -0
  186. data/test/files/address_book.rlx +12 -0
  187. data/test/files/address_book.xml +10 -0
  188. data/test/files/dont_hurt_em_why.xml +422 -0
  189. data/test/files/exslt.xml +8 -0
  190. data/test/files/exslt.xslt +35 -0
  191. data/test/files/po.xml +32 -0
  192. data/test/files/po.xsd +66 -0
  193. data/test/files/staff.xml +59 -0
  194. data/test/files/staff.xslt +32 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/helper.rb +123 -0
  197. data/test/hpricot/files/basic.xhtml +17 -0
  198. data/test/hpricot/files/boingboing.html +2266 -0
  199. data/test/hpricot/files/cy0.html +3653 -0
  200. data/test/hpricot/files/immob.html +400 -0
  201. data/test/hpricot/files/pace_application.html +1320 -0
  202. data/test/hpricot/files/tenderlove.html +16 -0
  203. data/test/hpricot/files/uswebgen.html +220 -0
  204. data/test/hpricot/files/utf8.html +1054 -0
  205. data/test/hpricot/files/week9.html +1723 -0
  206. data/test/hpricot/files/why.xml +19 -0
  207. data/test/hpricot/load_files.rb +11 -0
  208. data/test/hpricot/test_alter.rb +68 -0
  209. data/test/hpricot/test_builder.rb +20 -0
  210. data/test/hpricot/test_parser.rb +426 -0
  211. data/test/hpricot/test_paths.rb +15 -0
  212. data/test/hpricot/test_preserved.rb +77 -0
  213. data/test/hpricot/test_xml.rb +30 -0
  214. data/test/html/sax/test_parser.rb +52 -0
  215. data/test/html/test_builder.rb +156 -0
  216. data/test/html/test_document.rb +361 -0
  217. data/test/html/test_document_encoding.rb +46 -0
  218. data/test/html/test_document_fragment.rb +97 -0
  219. data/test/html/test_element_description.rb +95 -0
  220. data/test/html/test_named_characters.rb +14 -0
  221. data/test/html/test_node.rb +165 -0
  222. data/test/test_convert_xpath.rb +186 -0
  223. data/test/test_css_cache.rb +56 -0
  224. data/test/test_gc.rb +15 -0
  225. data/test/test_memory_leak.rb +77 -0
  226. data/test/test_nokogiri.rb +127 -0
  227. data/test/test_reader.rb +316 -0
  228. data/test/test_xslt_transforms.rb +131 -0
  229. data/test/xml/node/test_save_options.rb +20 -0
  230. data/test/xml/node/test_subclass.rb +44 -0
  231. data/test/xml/sax/test_parser.rb +169 -0
  232. data/test/xml/sax/test_push_parser.rb +92 -0
  233. data/test/xml/test_attr.rb +38 -0
  234. data/test/xml/test_builder.rb +73 -0
  235. data/test/xml/test_cdata.rb +38 -0
  236. data/test/xml/test_comment.rb +23 -0
  237. data/test/xml/test_document.rb +397 -0
  238. data/test/xml/test_document_encoding.rb +26 -0
  239. data/test/xml/test_document_fragment.rb +76 -0
  240. data/test/xml/test_dtd.rb +42 -0
  241. data/test/xml/test_dtd_encoding.rb +31 -0
  242. data/test/xml/test_entity_reference.rb +21 -0
  243. data/test/xml/test_namespace.rb +43 -0
  244. data/test/xml/test_node.rb +808 -0
  245. data/test/xml/test_node_attributes.rb +34 -0
  246. data/test/xml/test_node_encoding.rb +84 -0
  247. data/test/xml/test_node_set.rb +368 -0
  248. data/test/xml/test_parse_options.rb +52 -0
  249. data/test/xml/test_processing_instruction.rb +30 -0
  250. data/test/xml/test_reader_encoding.rb +126 -0
  251. data/test/xml/test_relax_ng.rb +60 -0
  252. data/test/xml/test_schema.rb +65 -0
  253. data/test/xml/test_text.rb +18 -0
  254. data/test/xml/test_unparented_node.rb +381 -0
  255. data/test/xml/test_xpath.rb +106 -0
  256. metadata +409 -0
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_READER
2
+ #define NOKOGIRI_XML_READER
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_reader();
7
+
8
+ extern VALUE cNokogiriXmlReader;
9
+
10
+ #endif
@@ -0,0 +1,106 @@
1
+ #include <xml_relax_ng.h>
2
+
3
+ static void dealloc(xmlRelaxNGPtr schema)
4
+ {
5
+ NOKOGIRI_DEBUG_START(schema);
6
+ xmlRelaxNGFree(schema);
7
+ NOKOGIRI_DEBUG_END(schema);
8
+ }
9
+
10
+ /*
11
+ * call-seq:
12
+ * validate_document(document)
13
+ *
14
+ * Validate a Nokogiri::XML::Document against this RelaxNG schema.
15
+ */
16
+ static VALUE validate_document(VALUE self, VALUE document)
17
+ {
18
+ xmlDocPtr doc;
19
+ xmlRelaxNGPtr schema;
20
+
21
+ Data_Get_Struct(self, xmlRelaxNG, schema);
22
+ Data_Get_Struct(document, xmlDoc, doc);
23
+
24
+ VALUE errors = rb_ary_new();
25
+
26
+ xmlRelaxNGValidCtxtPtr valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
27
+
28
+ if(NULL == valid_ctxt) {
29
+ // we have a problem
30
+ rb_raise(rb_eRuntimeError, "Could not create a validation context");
31
+ }
32
+
33
+ if (! is_2_6_16()) {
34
+ xmlRelaxNGSetValidStructuredErrors(
35
+ valid_ctxt,
36
+ Nokogiri_error_array_pusher,
37
+ (void *)errors
38
+ );
39
+ }
40
+
41
+ xmlRelaxNGValidateDoc(valid_ctxt, doc);
42
+
43
+ xmlRelaxNGFreeValidCtxt(valid_ctxt);
44
+
45
+ return errors;
46
+ }
47
+
48
+ /*
49
+ * call-seq:
50
+ * read_memory(string)
51
+ *
52
+ * Create a new RelaxNG from the contents of +string+
53
+ */
54
+ static VALUE read_memory(VALUE klass, VALUE content)
55
+ {
56
+ xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
57
+ (const char *)StringValuePtr(content),
58
+ RSTRING_LEN(content)
59
+ );
60
+
61
+ VALUE errors = rb_ary_new();
62
+ xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
63
+
64
+ if (! is_2_6_16()) {
65
+ xmlRelaxNGSetParserStructuredErrors(
66
+ ctx,
67
+ Nokogiri_error_array_pusher,
68
+ (void *)errors
69
+ );
70
+ }
71
+
72
+ xmlRelaxNGPtr schema = xmlRelaxNGParse(ctx);
73
+
74
+ xmlSetStructuredErrorFunc(NULL, NULL);
75
+ xmlRelaxNGFreeParserCtxt(ctx);
76
+
77
+ if(NULL == schema) {
78
+ xmlErrorPtr error = xmlGetLastError();
79
+ if(error)
80
+ rb_funcall(rb_mKernel, rb_intern("raise"), 1,
81
+ Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
82
+ );
83
+ else
84
+ rb_raise(rb_eRuntimeError, "Could not parse document");
85
+
86
+ return Qnil;
87
+ }
88
+
89
+ VALUE rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
90
+ rb_iv_set(rb_schema, "@errors", errors);
91
+
92
+ return rb_schema;
93
+ }
94
+
95
+ VALUE cNokogiriXmlRelaxNG;
96
+ void init_xml_relax_ng()
97
+ {
98
+ VALUE nokogiri = rb_define_module("Nokogiri");
99
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
100
+ VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
101
+
102
+ cNokogiriXmlRelaxNG = klass;
103
+
104
+ rb_define_singleton_method(klass, "read_memory", read_memory, 1);
105
+ rb_define_private_method(klass, "validate_document", validate_document, 1);
106
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_RELAX_NG
2
+ #define NOKOGIRI_XML_RELAX_NG
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_relax_ng();
7
+
8
+ extern VALUE cNokogiriXmlRelaxNG;
9
+ #endif
@@ -0,0 +1,336 @@
1
+ #include <nokogiri.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * parse_memory(data)
6
+ *
7
+ * Parse the document stored in +data+
8
+ */
9
+ static VALUE parse_memory(VALUE self, VALUE data)
10
+ {
11
+ xmlSAXHandlerPtr handler;
12
+ Data_Get_Struct(self, xmlSAXHandler, handler);
13
+
14
+ if(Qnil == data) rb_raise(rb_eArgError, "data cannot be nil");
15
+
16
+ xmlSAXUserParseMemory( handler,
17
+ (void *)self,
18
+ StringValuePtr(data),
19
+ RSTRING_LEN(data)
20
+ );
21
+ return data;
22
+ }
23
+
24
+ /*
25
+ * call-seq:
26
+ * native_parse_io(data, encoding)
27
+ *
28
+ * Parse the document accessable via +io+
29
+ */
30
+ static VALUE native_parse_io(VALUE self, VALUE io, VALUE encoding)
31
+ {
32
+ xmlSAXHandlerPtr handler;
33
+ Data_Get_Struct(self, xmlSAXHandler, handler);
34
+
35
+ xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
36
+
37
+ xmlParserCtxtPtr sax_ctx = xmlCreateIOParserCtxt(
38
+ handler,
39
+ (void *)self,
40
+ (xmlInputReadCallback)io_read_callback,
41
+ (xmlInputCloseCallback)io_close_callback,
42
+ (void *)io,
43
+ enc
44
+ );
45
+ xmlParseDocument(sax_ctx);
46
+ xmlFreeParserCtxt(sax_ctx);
47
+ return io;
48
+ }
49
+
50
+ /*
51
+ * call-seq:
52
+ * native_parse_file(data)
53
+ *
54
+ * Parse the document stored in +data+
55
+ */
56
+ static VALUE native_parse_file(VALUE self, VALUE data)
57
+ {
58
+ xmlSAXHandlerPtr handler;
59
+ Data_Get_Struct(self, xmlSAXHandler, handler);
60
+ xmlSAXUserParseFile( handler,
61
+ (void *)self,
62
+ StringValuePtr(data)
63
+ );
64
+ return data;
65
+ }
66
+
67
+ static void start_document(void * ctx)
68
+ {
69
+ VALUE self = (VALUE)ctx;
70
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
71
+ rb_funcall(doc, rb_intern("start_document"), 0);
72
+ }
73
+
74
+ static void end_document(void * ctx)
75
+ {
76
+ VALUE self = (VALUE)ctx;
77
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
78
+ rb_funcall(doc, rb_intern("end_document"), 0);
79
+ }
80
+
81
+ static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
82
+ {
83
+ VALUE self = (VALUE)ctx;
84
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
85
+ VALUE attributes = rb_ary_new();
86
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
87
+ const xmlChar * attr;
88
+ int i = 0;
89
+ if(atts) {
90
+ while((attr = atts[i]) != NULL) {
91
+ rb_funcall(attributes, rb_intern("<<"), 1,
92
+ NOKOGIRI_STR_NEW2(attr, RTEST(enc) ? StringValuePtr(enc) : NULL)
93
+ );
94
+ i++;
95
+ }
96
+ }
97
+
98
+ rb_funcall( doc,
99
+ rb_intern("start_element"),
100
+ 2,
101
+ NOKOGIRI_STR_NEW2(name, RTEST(enc) ? StringValuePtr(enc) : NULL),
102
+ attributes
103
+ );
104
+ }
105
+
106
+ static void end_element(void * ctx, const xmlChar *name)
107
+ {
108
+ VALUE self = (VALUE)ctx;
109
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
110
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
111
+ rb_funcall(doc, rb_intern("end_element"), 1,
112
+ NOKOGIRI_STR_NEW2(name, RTEST(enc) ? StringValuePtr(enc) : NULL)
113
+ );
114
+ }
115
+
116
+ /**
117
+ * start_element_ns was borrowed heavily from libxml-ruby.
118
+ */
119
+ static void
120
+ start_element_ns (
121
+ void * ctx,
122
+ const xmlChar * localname,
123
+ const xmlChar * prefix,
124
+ const xmlChar * URI,
125
+ int nb_namespaces,
126
+ const xmlChar ** namespaces,
127
+ int nb_attributes,
128
+ int nb_defaulted,
129
+ const xmlChar ** attributes)
130
+ {
131
+ VALUE self = (VALUE)ctx;
132
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
133
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
134
+
135
+ VALUE attrHash = rb_hash_new();
136
+ VALUE nsHash = rb_hash_new();
137
+
138
+ if (attributes)
139
+ {
140
+ /* Each attribute is an array of [localname, prefix, URI, value, end] */
141
+ int i;
142
+ for (i = 0; i < nb_attributes * 5; i += 5)
143
+ {
144
+ rb_hash_aset( attrHash,
145
+ NOKOGIRI_STR_NEW2((const char*)attributes[i+0], RTEST(enc) ? StringValuePtr(enc) : NULL),
146
+ NOKOGIRI_STR_NEW((const char*)attributes[i+3], (attributes[i+4] - attributes[i+3]), RTEST(enc) ? StringValuePtr(enc) : NULL));
147
+ }
148
+ }
149
+
150
+ if (namespaces)
151
+ {
152
+ int i;
153
+ for (i = 0; i < nb_namespaces * 2; i += 2)
154
+ {
155
+ rb_hash_aset( nsHash,
156
+ namespaces[i+0] ? NOKOGIRI_STR_NEW2((const char*)namespaces[i+0], RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
157
+ namespaces[i+1] ? NOKOGIRI_STR_NEW2((const char*)namespaces[i+1], RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil);
158
+ }
159
+ }
160
+
161
+ rb_funcall( doc,
162
+ rb_intern("start_element_ns"),
163
+ 5,
164
+ NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL),
165
+ attrHash,
166
+ prefix ? NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
167
+ URI ? NOKOGIRI_STR_NEW2(URI, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
168
+ nsHash
169
+ );
170
+
171
+ /* Call start element if it's there' */
172
+ if (rb_respond_to(doc, rb_intern("start_element")))
173
+ {
174
+ VALUE name;
175
+ if (prefix)
176
+ {
177
+ name = NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL);
178
+ rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(":", RTEST(enc) ? StringValuePtr(enc) : NULL));
179
+ rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL));
180
+ }
181
+ else
182
+ {
183
+ name = NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL);
184
+ }
185
+ VALUE attrArray = rb_funcall(attrHash, rb_intern("to_a"), 0);
186
+ attrArray = rb_funcall(attrArray, rb_intern("flatten"), 0);
187
+ rb_funcall(doc, rb_intern("start_element"), 2, name, attrArray);
188
+ }
189
+
190
+ }
191
+
192
+ /**
193
+ * end_element_ns was borrowed heavily from libxml-ruby.
194
+ */
195
+ static void
196
+ end_element_ns (
197
+ void * ctx,
198
+ const xmlChar * localname,
199
+ const xmlChar * prefix,
200
+ const xmlChar * URI)
201
+ {
202
+ VALUE self = (VALUE)ctx;
203
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
204
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
205
+
206
+ rb_funcall(doc, rb_intern("end_element_ns"), 3,
207
+ NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL),
208
+ prefix ? NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
209
+ URI ? NOKOGIRI_STR_NEW2(URI, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil
210
+ );
211
+
212
+ /* Call end element for old-times sake */
213
+ if (rb_respond_to(doc, rb_intern("end_element")))
214
+ {
215
+ VALUE name;
216
+ if (prefix)
217
+ {
218
+ name = NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL);
219
+ rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(":", RTEST(enc) ? StringValuePtr(enc) : NULL));
220
+ rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL));
221
+ }
222
+ else
223
+ {
224
+ name = NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL);
225
+ }
226
+ rb_funcall(doc, rb_intern("end_element"), 1, name);
227
+ }
228
+
229
+ }
230
+
231
+ static void characters_func(void * ctx, const xmlChar * ch, int len)
232
+ {
233
+ VALUE self = (VALUE)ctx;
234
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
235
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
236
+ VALUE str = NOKOGIRI_STR_NEW(ch, len, RTEST(enc) ? StringValuePtr(enc):NULL);
237
+ rb_funcall(doc, rb_intern("characters"), 1, str);
238
+ }
239
+
240
+ static void comment_func(void * ctx, const xmlChar * value)
241
+ {
242
+ VALUE self = (VALUE)ctx;
243
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
244
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
245
+ VALUE str = NOKOGIRI_STR_NEW2(value, RTEST(enc) ? StringValuePtr(enc):NULL);
246
+ rb_funcall(doc, rb_intern("comment"), 1, str);
247
+ }
248
+
249
+ static void warning_func(void * ctx, const char *msg, ...)
250
+ {
251
+ VALUE self = (VALUE)ctx;
252
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
253
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
254
+ char * message;
255
+
256
+ va_list args;
257
+ va_start(args, msg);
258
+ vasprintf(&message, msg, args);
259
+ va_end(args);
260
+
261
+ rb_funcall(doc, rb_intern("warning"), 1,
262
+ NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
263
+ );
264
+ free(message);
265
+ }
266
+
267
+ static void error_func(void * ctx, const char *msg, ...)
268
+ {
269
+ VALUE self = (VALUE)ctx;
270
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
271
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
272
+ char * message;
273
+
274
+ va_list args;
275
+ va_start(args, msg);
276
+ vasprintf(&message, msg, args);
277
+ va_end(args);
278
+
279
+ rb_funcall(doc, rb_intern("error"), 1,
280
+ NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
281
+ );
282
+ free(message);
283
+ }
284
+
285
+ static void cdata_block(void * ctx, const xmlChar * value, int len)
286
+ {
287
+ VALUE self = (VALUE)ctx;
288
+ VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
289
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
290
+ VALUE string =
291
+ NOKOGIRI_STR_NEW(value, len, RTEST(enc) ? StringValuePtr(enc) : NULL);
292
+ rb_funcall(doc, rb_intern("cdata_block"), 1, string);
293
+ }
294
+
295
+ static void deallocate(xmlSAXHandlerPtr handler)
296
+ {
297
+ NOKOGIRI_DEBUG_START(handler);
298
+ free(handler);
299
+ NOKOGIRI_DEBUG_END(handler);
300
+ }
301
+
302
+ static VALUE allocate(VALUE klass)
303
+ {
304
+ xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
305
+
306
+ handler->startDocument = start_document;
307
+ handler->endDocument = end_document;
308
+ handler->startElement = start_element;
309
+ handler->endElement = end_element;
310
+ handler->startElementNs = start_element_ns;
311
+ handler->endElementNs = end_element_ns;
312
+ handler->characters = characters_func;
313
+ handler->comment = comment_func;
314
+ handler->warning = warning_func;
315
+ handler->error = error_func;
316
+ handler->cdataBlock = cdata_block;
317
+ handler->initialized = XML_SAX2_MAGIC;
318
+
319
+ return Data_Wrap_Struct(klass, NULL, deallocate, handler);
320
+ }
321
+
322
+ VALUE cNokogiriXmlSaxParser ;
323
+ void init_xml_sax_parser()
324
+ {
325
+ VALUE nokogiri = rb_define_module("Nokogiri");
326
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
327
+ VALUE sax = rb_define_module_under(xml, "SAX");
328
+ VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
329
+
330
+ cNokogiriXmlSaxParser = klass;
331
+
332
+ rb_define_alloc_func(klass, allocate);
333
+ rb_define_method(klass, "parse_memory", parse_memory, 1);
334
+ rb_define_private_method(klass, "native_parse_file", native_parse_file, 1);
335
+ rb_define_private_method(klass, "native_parse_io", native_parse_io, 2);
336
+ }