nokogiri 1.6.2.rc1-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.editorconfig +17 -0
  4. data/.gemtest +0 -0
  5. data/.travis.yml +25 -0
  6. data/CHANGELOG.ja.rdoc +857 -0
  7. data/CHANGELOG.rdoc +880 -0
  8. data/C_CODING_STYLE.rdoc +33 -0
  9. data/Gemfile +21 -0
  10. data/Manifest.txt +371 -0
  11. data/README.ja.rdoc +112 -0
  12. data/README.rdoc +180 -0
  13. data/ROADMAP.md +89 -0
  14. data/Rakefile +351 -0
  15. data/STANDARD_RESPONSES.md +47 -0
  16. data/Y_U_NO_GEMSPEC.md +155 -0
  17. data/bin/nokogiri +78 -0
  18. data/build_all +130 -0
  19. data/dependencies.yml +4 -0
  20. data/ext/nokogiri/depend +358 -0
  21. data/ext/nokogiri/extconf.rb +453 -0
  22. data/ext/nokogiri/html_document.c +170 -0
  23. data/ext/nokogiri/html_document.h +10 -0
  24. data/ext/nokogiri/html_element_description.c +279 -0
  25. data/ext/nokogiri/html_element_description.h +10 -0
  26. data/ext/nokogiri/html_entity_lookup.c +32 -0
  27. data/ext/nokogiri/html_entity_lookup.h +8 -0
  28. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  29. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  30. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  31. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  32. data/ext/nokogiri/nokogiri.c +148 -0
  33. data/ext/nokogiri/nokogiri.h +164 -0
  34. data/ext/nokogiri/xml_attr.c +94 -0
  35. data/ext/nokogiri/xml_attr.h +9 -0
  36. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  37. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  38. data/ext/nokogiri/xml_cdata.c +56 -0
  39. data/ext/nokogiri/xml_cdata.h +9 -0
  40. data/ext/nokogiri/xml_comment.c +54 -0
  41. data/ext/nokogiri/xml_comment.h +9 -0
  42. data/ext/nokogiri/xml_document.c +577 -0
  43. data/ext/nokogiri/xml_document.h +23 -0
  44. data/ext/nokogiri/xml_document_fragment.c +48 -0
  45. data/ext/nokogiri/xml_document_fragment.h +10 -0
  46. data/ext/nokogiri/xml_dtd.c +202 -0
  47. data/ext/nokogiri/xml_dtd.h +10 -0
  48. data/ext/nokogiri/xml_element_content.c +123 -0
  49. data/ext/nokogiri/xml_element_content.h +10 -0
  50. data/ext/nokogiri/xml_element_decl.c +69 -0
  51. data/ext/nokogiri/xml_element_decl.h +9 -0
  52. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  53. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  54. data/ext/nokogiri/xml_entity_decl.c +110 -0
  55. data/ext/nokogiri/xml_entity_decl.h +10 -0
  56. data/ext/nokogiri/xml_entity_reference.c +52 -0
  57. data/ext/nokogiri/xml_entity_reference.h +9 -0
  58. data/ext/nokogiri/xml_io.c +56 -0
  59. data/ext/nokogiri/xml_io.h +11 -0
  60. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  61. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  62. data/ext/nokogiri/xml_namespace.c +78 -0
  63. data/ext/nokogiri/xml_namespace.h +13 -0
  64. data/ext/nokogiri/xml_node.c +1541 -0
  65. data/ext/nokogiri/xml_node.h +13 -0
  66. data/ext/nokogiri/xml_node_set.c +467 -0
  67. data/ext/nokogiri/xml_node_set.h +14 -0
  68. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  69. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  70. data/ext/nokogiri/xml_reader.c +681 -0
  71. data/ext/nokogiri/xml_reader.h +10 -0
  72. data/ext/nokogiri/xml_relax_ng.c +161 -0
  73. data/ext/nokogiri/xml_relax_ng.h +9 -0
  74. data/ext/nokogiri/xml_sax_parser.c +312 -0
  75. data/ext/nokogiri/xml_sax_parser.h +39 -0
  76. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  77. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  78. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  79. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  80. data/ext/nokogiri/xml_schema.c +205 -0
  81. data/ext/nokogiri/xml_schema.h +9 -0
  82. data/ext/nokogiri/xml_syntax_error.c +63 -0
  83. data/ext/nokogiri/xml_syntax_error.h +13 -0
  84. data/ext/nokogiri/xml_text.c +52 -0
  85. data/ext/nokogiri/xml_text.h +9 -0
  86. data/ext/nokogiri/xml_xpath_context.c +307 -0
  87. data/ext/nokogiri/xml_xpath_context.h +10 -0
  88. data/ext/nokogiri/xslt_stylesheet.c +270 -0
  89. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  90. data/lib/nokogiri.rb +137 -0
  91. data/lib/nokogiri/2.0/nokogiri.so +0 -0
  92. data/lib/nokogiri/2.1/nokogiri.so +0 -0
  93. data/lib/nokogiri/css.rb +27 -0
  94. data/lib/nokogiri/css/node.rb +52 -0
  95. data/lib/nokogiri/css/parser.rb +715 -0
  96. data/lib/nokogiri/css/parser.y +249 -0
  97. data/lib/nokogiri/css/parser_extras.rb +91 -0
  98. data/lib/nokogiri/css/syntax_error.rb +7 -0
  99. data/lib/nokogiri/css/tokenizer.rb +152 -0
  100. data/lib/nokogiri/css/tokenizer.rex +55 -0
  101. data/lib/nokogiri/css/xpath_visitor.rb +219 -0
  102. data/lib/nokogiri/decorators/slop.rb +35 -0
  103. data/lib/nokogiri/html.rb +37 -0
  104. data/lib/nokogiri/html/builder.rb +35 -0
  105. data/lib/nokogiri/html/document.rb +333 -0
  106. data/lib/nokogiri/html/document_fragment.rb +41 -0
  107. data/lib/nokogiri/html/element_description.rb +23 -0
  108. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  109. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  110. data/lib/nokogiri/html/sax/parser.rb +52 -0
  111. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  112. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  113. data/lib/nokogiri/syntax_error.rb +4 -0
  114. data/lib/nokogiri/version.rb +106 -0
  115. data/lib/nokogiri/xml.rb +73 -0
  116. data/lib/nokogiri/xml/attr.rb +14 -0
  117. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  118. data/lib/nokogiri/xml/builder.rb +443 -0
  119. data/lib/nokogiri/xml/cdata.rb +11 -0
  120. data/lib/nokogiri/xml/character_data.rb +7 -0
  121. data/lib/nokogiri/xml/document.rb +279 -0
  122. data/lib/nokogiri/xml/document_fragment.rb +112 -0
  123. data/lib/nokogiri/xml/dtd.rb +32 -0
  124. data/lib/nokogiri/xml/element_content.rb +36 -0
  125. data/lib/nokogiri/xml/element_decl.rb +13 -0
  126. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  127. data/lib/nokogiri/xml/namespace.rb +13 -0
  128. data/lib/nokogiri/xml/node.rb +982 -0
  129. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  130. data/lib/nokogiri/xml/node_set.rb +355 -0
  131. data/lib/nokogiri/xml/notation.rb +6 -0
  132. data/lib/nokogiri/xml/parse_options.rb +98 -0
  133. data/lib/nokogiri/xml/pp.rb +2 -0
  134. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  135. data/lib/nokogiri/xml/pp/node.rb +56 -0
  136. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  137. data/lib/nokogiri/xml/reader.rb +112 -0
  138. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  139. data/lib/nokogiri/xml/sax.rb +4 -0
  140. data/lib/nokogiri/xml/sax/document.rb +171 -0
  141. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  142. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  143. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  144. data/lib/nokogiri/xml/schema.rb +63 -0
  145. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  146. data/lib/nokogiri/xml/text.rb +9 -0
  147. data/lib/nokogiri/xml/xpath.rb +10 -0
  148. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  149. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  150. data/lib/nokogiri/xslt.rb +56 -0
  151. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/suppressions/README.txt +1 -0
  154. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  155. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  156. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  157. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  158. data/tasks/nokogiri.org.rb +24 -0
  159. data/tasks/test.rb +95 -0
  160. data/test/css/test_nthiness.rb +222 -0
  161. data/test/css/test_parser.rb +358 -0
  162. data/test/css/test_tokenizer.rb +198 -0
  163. data/test/css/test_xpath_visitor.rb +96 -0
  164. data/test/decorators/test_slop.rb +16 -0
  165. data/test/files/2ch.html +108 -0
  166. data/test/files/address_book.rlx +12 -0
  167. data/test/files/address_book.xml +10 -0
  168. data/test/files/atom.xml +344 -0
  169. data/test/files/bar/bar.xsd +4 -0
  170. data/test/files/bogus.xml +0 -0
  171. data/test/files/dont_hurt_em_why.xml +422 -0
  172. data/test/files/encoding.html +82 -0
  173. data/test/files/encoding.xhtml +84 -0
  174. data/test/files/exslt.xml +8 -0
  175. data/test/files/exslt.xslt +35 -0
  176. data/test/files/foo/foo.xsd +4 -0
  177. data/test/files/metacharset.html +10 -0
  178. data/test/files/noencoding.html +47 -0
  179. data/test/files/po.xml +32 -0
  180. data/test/files/po.xsd +66 -0
  181. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  182. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  183. data/test/files/saml/xenc_schema.xsd +146 -0
  184. data/test/files/saml/xmldsig_schema.xsd +318 -0
  185. data/test/files/shift_jis.html +10 -0
  186. data/test/files/shift_jis.xml +5 -0
  187. data/test/files/shift_jis_no_charset.html +9 -0
  188. data/test/files/snuggles.xml +3 -0
  189. data/test/files/staff.dtd +10 -0
  190. data/test/files/staff.xml +59 -0
  191. data/test/files/staff.xslt +32 -0
  192. data/test/files/test_document_url/bar.xml +2 -0
  193. data/test/files/test_document_url/document.dtd +4 -0
  194. data/test/files/test_document_url/document.xml +6 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/files/to_be_xincluded.xml +2 -0
  197. data/test/files/valid_bar.xml +2 -0
  198. data/test/files/xinclude.xml +4 -0
  199. data/test/helper.rb +164 -0
  200. data/test/html/sax/test_parser.rb +141 -0
  201. data/test/html/sax/test_parser_context.rb +46 -0
  202. data/test/html/test_builder.rb +164 -0
  203. data/test/html/test_document.rb +619 -0
  204. data/test/html/test_document_encoding.rb +148 -0
  205. data/test/html/test_document_fragment.rb +261 -0
  206. data/test/html/test_element_description.rb +105 -0
  207. data/test/html/test_named_characters.rb +14 -0
  208. data/test/html/test_node.rb +196 -0
  209. data/test/html/test_node_encoding.rb +27 -0
  210. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  211. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  212. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  213. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  214. data/test/namespaces/test_namespaces_in_parsed_doc.rb +66 -0
  215. data/test/test_convert_xpath.rb +135 -0
  216. data/test/test_css_cache.rb +45 -0
  217. data/test/test_encoding_handler.rb +46 -0
  218. data/test/test_memory_leak.rb +156 -0
  219. data/test/test_nokogiri.rb +138 -0
  220. data/test/test_reader.rb +558 -0
  221. data/test/test_soap4r_sax.rb +52 -0
  222. data/test/test_xslt_transforms.rb +279 -0
  223. data/test/xml/node/test_save_options.rb +28 -0
  224. data/test/xml/node/test_subclass.rb +44 -0
  225. data/test/xml/sax/test_parser.rb +382 -0
  226. data/test/xml/sax/test_parser_context.rb +115 -0
  227. data/test/xml/sax/test_push_parser.rb +157 -0
  228. data/test/xml/test_attr.rb +64 -0
  229. data/test/xml/test_attribute_decl.rb +86 -0
  230. data/test/xml/test_builder.rb +315 -0
  231. data/test/xml/test_c14n.rb +161 -0
  232. data/test/xml/test_cdata.rb +48 -0
  233. data/test/xml/test_comment.rb +29 -0
  234. data/test/xml/test_document.rb +934 -0
  235. data/test/xml/test_document_encoding.rb +28 -0
  236. data/test/xml/test_document_fragment.rb +228 -0
  237. data/test/xml/test_dtd.rb +187 -0
  238. data/test/xml/test_dtd_encoding.rb +33 -0
  239. data/test/xml/test_element_content.rb +56 -0
  240. data/test/xml/test_element_decl.rb +73 -0
  241. data/test/xml/test_entity_decl.rb +122 -0
  242. data/test/xml/test_entity_reference.rb +245 -0
  243. data/test/xml/test_namespace.rb +95 -0
  244. data/test/xml/test_node.rb +1155 -0
  245. data/test/xml/test_node_attributes.rb +113 -0
  246. data/test/xml/test_node_encoding.rb +107 -0
  247. data/test/xml/test_node_inheritance.rb +32 -0
  248. data/test/xml/test_node_reparenting.rb +374 -0
  249. data/test/xml/test_node_set.rb +755 -0
  250. data/test/xml/test_parse_options.rb +64 -0
  251. data/test/xml/test_processing_instruction.rb +30 -0
  252. data/test/xml/test_reader_encoding.rb +142 -0
  253. data/test/xml/test_relax_ng.rb +60 -0
  254. data/test/xml/test_schema.rb +129 -0
  255. data/test/xml/test_syntax_error.rb +12 -0
  256. data/test/xml/test_text.rb +45 -0
  257. data/test/xml/test_unparented_node.rb +422 -0
  258. data/test/xml/test_xinclude.rb +83 -0
  259. data/test/xml/test_xpath.rb +376 -0
  260. data/test/xslt/test_custom_functions.rb +133 -0
  261. data/test/xslt/test_exception_handling.rb +37 -0
  262. data/test_all +81 -0
  263. metadata +601 -0
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_HTML_ELEMENT_DESCRIPTION
2
+ #define NOKOGIRI_HTML_ELEMENT_DESCRIPTION
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_html_element_description();
7
+
8
+ extern VALUE cNokogiriHtmlElementDescription ;
9
+
10
+ #endif
@@ -0,0 +1,32 @@
1
+ #include <html_entity_lookup.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * get(key)
6
+ *
7
+ * Get the HTML::EntityDescription for +key+
8
+ */
9
+ static VALUE get(VALUE self, VALUE key)
10
+ {
11
+ const htmlEntityDesc * desc =
12
+ htmlEntityLookup((const xmlChar *)StringValuePtr(key));
13
+ VALUE klass, args[3];
14
+
15
+ if(NULL == desc) return Qnil;
16
+ klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription"));
17
+
18
+ args[0] = INT2NUM((long)desc->value);
19
+ args[1] = NOKOGIRI_STR_NEW2(desc->name);
20
+ args[2] = NOKOGIRI_STR_NEW2(desc->desc);
21
+
22
+ return rb_class_new_instance(3, args, klass);
23
+ }
24
+
25
+ void init_html_entity_lookup()
26
+ {
27
+ VALUE nokogiri = rb_define_module("Nokogiri");
28
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
29
+ VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
30
+
31
+ rb_define_method(klass, "get", get, 1);
32
+ }
@@ -0,0 +1,8 @@
1
+ #ifndef NOKOGIRI_HTML_ENTITY_LOOKUP
2
+ #define NOKOGIRI_HTML_ENTITY_LOOKUP
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_html_entity_lookup();
7
+
8
+ #endif
@@ -0,0 +1,116 @@
1
+ #include <html_sax_parser_context.h>
2
+
3
+ VALUE cNokogiriHtmlSaxParserContext ;
4
+
5
+ static void deallocate(xmlParserCtxtPtr ctxt)
6
+ {
7
+ NOKOGIRI_DEBUG_START(handler);
8
+
9
+ ctxt->sax = NULL;
10
+
11
+ htmlFreeParserCtxt(ctxt);
12
+
13
+ NOKOGIRI_DEBUG_END(handler);
14
+ }
15
+
16
+ static VALUE
17
+ parse_memory(VALUE klass, VALUE data, VALUE encoding)
18
+ {
19
+ htmlParserCtxtPtr ctxt;
20
+
21
+ if (NIL_P(data))
22
+ rb_raise(rb_eArgError, "data cannot be nil");
23
+ if (!(int)RSTRING_LEN(data))
24
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
25
+
26
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
27
+ (int)RSTRING_LEN(data));
28
+ if (ctxt->sax) {
29
+ xmlFree(ctxt->sax);
30
+ ctxt->sax = NULL;
31
+ }
32
+
33
+ if (RTEST(encoding)) {
34
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
35
+ if (enc != NULL) {
36
+ xmlSwitchToEncoding(ctxt, enc);
37
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
39
+ StringValuePtr(encoding));
40
+ }
41
+ }
42
+ }
43
+
44
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
45
+ }
46
+
47
+ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
48
+ {
49
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
50
+ StringValuePtr(filename),
51
+ StringValuePtr(encoding)
52
+ );
53
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
54
+ }
55
+
56
+ static VALUE
57
+ parse_doc(VALUE ctxt_val)
58
+ {
59
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
60
+ htmlParseDocument(ctxt);
61
+ return Qnil;
62
+ }
63
+
64
+ static VALUE
65
+ parse_doc_finalize(VALUE ctxt_val)
66
+ {
67
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
68
+
69
+ if (ctxt->myDoc)
70
+ xmlFreeDoc(ctxt->myDoc);
71
+
72
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
73
+ return Qnil;
74
+ }
75
+
76
+ static VALUE
77
+ parse_with(VALUE self, VALUE sax_handler)
78
+ {
79
+ htmlParserCtxtPtr ctxt;
80
+ htmlSAXHandlerPtr sax;
81
+
82
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
83
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
84
+
85
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
+ Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
87
+
88
+ /* Free the sax handler since we'll assign our own */
89
+ if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
90
+ xmlFree(ctxt->sax);
91
+
92
+ ctxt->sax = sax;
93
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
94
+
95
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
96
+
97
+ return self;
98
+ }
99
+
100
+ void init_html_sax_parser_context()
101
+ {
102
+ VALUE nokogiri = rb_define_module("Nokogiri");
103
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
104
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
105
+ VALUE sax = rb_define_module_under(xml, "SAX");
106
+ VALUE hsax = rb_define_module_under(html, "SAX");
107
+ VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
108
+ VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
109
+
110
+ cNokogiriHtmlSaxParserContext = klass;
111
+
112
+ rb_define_singleton_method(klass, "memory", parse_memory, 2);
113
+ rb_define_singleton_method(klass, "file", parse_file, 2);
114
+
115
+ rb_define_method(klass, "parse_with", parse_with, 1);
116
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef NOKOGIRI_HTML_SAX_PARSER_CONTEXT
2
+ #define NOKOGIRI_HTML_SAX_PARSER_CONTEXT
3
+
4
+ #include <nokogiri.h>
5
+
6
+ extern VALUE cNokogiriHtmlSaxParserContext;
7
+
8
+ void init_html_sax_parser_context();
9
+
10
+ #endif
11
+
@@ -0,0 +1,87 @@
1
+ #include <html_sax_push_parser.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * native_write(chunk, last_chunk)
6
+ *
7
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
8
+ */
9
+ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
10
+ {
11
+ xmlParserCtxtPtr ctx;
12
+ const char * chunk = NULL;
13
+ int size = 0;
14
+
15
+
16
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
17
+
18
+ if(Qnil != _chunk) {
19
+ chunk = StringValuePtr(_chunk);
20
+ size = (int)RSTRING_LEN(_chunk);
21
+ }
22
+
23
+ if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
24
+ if (!(ctx->options & XML_PARSE_RECOVER)) {
25
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
26
+ Nokogiri_error_raise(NULL, e);
27
+ }
28
+ }
29
+
30
+ return self;
31
+ }
32
+
33
+ /*
34
+ * call-seq:
35
+ * initialize_native(xml_sax, filename)
36
+ *
37
+ * Initialize the push parser with +xml_sax+ using +filename+
38
+ */
39
+ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
40
+ VALUE encoding)
41
+ {
42
+ htmlSAXHandlerPtr sax;
43
+ const char * filename = NULL;
44
+ htmlParserCtxtPtr ctx;
45
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
46
+
47
+ Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
48
+
49
+ if(_filename != Qnil) filename = StringValuePtr(_filename);
50
+
51
+ if (!NIL_P(encoding)) {
52
+ enc = xmlParseCharEncoding(StringValuePtr(encoding));
53
+ if (enc == XML_CHAR_ENCODING_ERROR)
54
+ rb_raise(rb_eArgError, "Unsupported Encoding");
55
+ }
56
+
57
+ ctx = htmlCreatePushParserCtxt(
58
+ sax,
59
+ NULL,
60
+ NULL,
61
+ 0,
62
+ filename,
63
+ enc
64
+ );
65
+ if(ctx == NULL)
66
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
67
+
68
+ ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
69
+
70
+ ctx->sax2 = 1;
71
+ DATA_PTR(self) = ctx;
72
+ return self;
73
+ }
74
+
75
+ VALUE cNokogiriHtmlSaxPushParser;
76
+ void init_html_sax_push_parser()
77
+ {
78
+ VALUE nokogiri = rb_define_module("Nokogiri");
79
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
80
+ VALUE sax = rb_define_module_under(html, "SAX");
81
+ VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
82
+
83
+ cNokogiriHtmlSaxPushParser = klass;
84
+
85
+ rb_define_private_method(klass, "initialize_native", initialize_native, 3);
86
+ rb_define_private_method(klass, "native_write", native_write, 2);
87
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_HTML_SAX_PUSH_PARSER
2
+ #define NOKOGIRI_HTML_SAX_PUSH_PARSER
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_html_sax_push_parser();
7
+
8
+ extern VALUE cNokogiriHtmlSaxPushParser ;
9
+ #endif
@@ -0,0 +1,148 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE mNokogiri ;
4
+ VALUE mNokogiriXml ;
5
+ VALUE mNokogiriHtml ;
6
+ VALUE mNokogiriXslt ;
7
+ VALUE mNokogiriXmlSax ;
8
+ VALUE mNokogiriHtmlSax ;
9
+
10
+ #ifdef USE_INCLUDED_VASPRINTF
11
+ /*
12
+ * I srsly hate windows. it doesn't have vasprintf.
13
+ * Thank you Geoffroy Couprie for this implementation of vasprintf!
14
+ */
15
+ int vasprintf (char **strp, const char *fmt, va_list ap)
16
+ {
17
+ /* Mingw32/64 have a broken vsnprintf implementation that fails when
18
+ * using a zero-byte limit in order to retrieve the required size for malloc.
19
+ * So we use a one byte buffer instead.
20
+ */
21
+ char tmp[1];
22
+ int len = vsnprintf (tmp, 1, fmt, ap) + 1;
23
+ char *res = (char *)malloc((unsigned int)len);
24
+ if (res == NULL)
25
+ return -1;
26
+ *strp = res;
27
+ return vsnprintf(res, (unsigned int)len, fmt, ap);
28
+ }
29
+ #endif
30
+
31
+ #ifdef USING_SYSTEM_ALLOCATOR_LIBRARY /* Ruby Enterprise Edition with tcmalloc */
32
+ void vasprintf_free (void *p)
33
+ {
34
+ system_free(p);
35
+ }
36
+ #else
37
+ void vasprintf_free (void *p)
38
+ {
39
+ free(p);
40
+ }
41
+ #endif
42
+
43
+ #ifdef HAVE_RUBY_UTIL_H
44
+ #include "ruby/util.h"
45
+ #else
46
+ #ifndef __MACRUBY__
47
+ #include "util.h"
48
+ #endif
49
+ #endif
50
+
51
+ void nokogiri_root_node(xmlNodePtr node)
52
+ {
53
+ xmlDocPtr doc;
54
+ nokogiriTuplePtr tuple;
55
+
56
+ doc = node->doc;
57
+ if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
58
+ tuple = (nokogiriTuplePtr)doc->_private;
59
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
60
+ }
61
+
62
+ void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
63
+ {
64
+ nokogiriTuplePtr tuple;
65
+
66
+ if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
67
+ tuple = (nokogiriTuplePtr)doc->_private;
68
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
69
+ }
70
+
71
+ void Init_nokogiri()
72
+ {
73
+ #ifndef __MACRUBY__
74
+ xmlMemSetup(
75
+ (xmlFreeFunc)ruby_xfree,
76
+ (xmlMallocFunc)ruby_xmalloc,
77
+ (xmlReallocFunc)ruby_xrealloc,
78
+ ruby_strdup
79
+ );
80
+ #endif
81
+
82
+ mNokogiri = rb_define_module("Nokogiri");
83
+ mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
84
+ mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
85
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
86
+ mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
87
+ mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
88
+
89
+ rb_const_set( mNokogiri,
90
+ rb_intern("LIBXML_VERSION"),
91
+ NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
92
+ );
93
+ rb_const_set( mNokogiri,
94
+ rb_intern("LIBXML_PARSER_VERSION"),
95
+ NOKOGIRI_STR_NEW2(xmlParserVersion)
96
+ );
97
+
98
+ #ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
99
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
100
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
101
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
102
+ #else
103
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
104
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
105
+ rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
106
+ #endif
107
+
108
+ #ifdef LIBXML_ICONV_ENABLED
109
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qtrue);
110
+ #else
111
+ rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
112
+ #endif
113
+
114
+ xmlInitParser();
115
+
116
+ init_xml_document();
117
+ init_html_document();
118
+ init_xml_node();
119
+ init_xml_document_fragment();
120
+ init_xml_text();
121
+ init_xml_cdata();
122
+ init_xml_processing_instruction();
123
+ init_xml_attr();
124
+ init_xml_entity_reference();
125
+ init_xml_comment();
126
+ init_xml_node_set();
127
+ init_xml_xpath_context();
128
+ init_xml_sax_parser_context();
129
+ init_xml_sax_parser();
130
+ init_xml_sax_push_parser();
131
+ init_xml_reader();
132
+ init_xml_dtd();
133
+ init_xml_element_content();
134
+ init_xml_attribute_decl();
135
+ init_xml_element_decl();
136
+ init_xml_entity_decl();
137
+ init_xml_namespace();
138
+ init_html_sax_parser_context();
139
+ init_html_sax_push_parser();
140
+ init_xslt_stylesheet();
141
+ init_xml_syntax_error();
142
+ init_html_entity_lookup();
143
+ init_html_element_description();
144
+ init_xml_schema();
145
+ init_xml_relax_ng();
146
+ init_nokogiri_io();
147
+ init_xml_encoding_handler();
148
+ }
@@ -0,0 +1,164 @@
1
+ #ifndef NOKOGIRI_NATIVE
2
+ #define NOKOGIRI_NATIVE
3
+
4
+ #include <stdlib.h>
5
+ #include <string.h>
6
+ #include <assert.h>
7
+ #include <stdarg.h>
8
+
9
+ #ifdef USE_INCLUDED_VASPRINTF
10
+ int vasprintf (char **strp, const char *fmt, va_list ap);
11
+ #else
12
+
13
+ #define _GNU_SOURCE
14
+ # include <stdio.h>
15
+ #undef _GNU_SOURCE
16
+
17
+ #endif
18
+
19
+ #include <libxml/parser.h>
20
+ #include <libxml/entities.h>
21
+ #include <libxml/parserInternals.h>
22
+ #include <libxml/xpath.h>
23
+ #include <libxml/xpathInternals.h>
24
+ #include <libxml/xmlreader.h>
25
+ #include <libxml/xmlsave.h>
26
+ #include <libxml/xmlschemas.h>
27
+ #include <libxml/HTMLparser.h>
28
+ #include <libxml/HTMLtree.h>
29
+ #include <libxml/relaxng.h>
30
+ #include <libxml/xinclude.h>
31
+ #include <libxslt/extensions.h>
32
+ #include <libxml/c14n.h>
33
+ #include <ruby.h>
34
+
35
+ #ifdef HAVE_RUBY_ENCODING_H
36
+ #include <ruby/st.h>
37
+ #else
38
+ #include <st.h>
39
+ #endif
40
+
41
+ #ifndef UNUSED
42
+ # if defined(__GNUC__)
43
+ # define MAYBE_UNUSED(name) name __attribute__((unused))
44
+ # define UNUSED(name) MAYBE_UNUSED(UNUSED_ ## name)
45
+ # else
46
+ # define MAYBE_UNUSED(name) name
47
+ # define UNUSED(name) name
48
+ # endif
49
+ #endif
50
+
51
+ #ifndef NORETURN
52
+ # if defined(__GNUC__)
53
+ # define NORETURN(name) __attribute__((noreturn)) name
54
+ # else
55
+ # define NORETURN(name) name
56
+ # endif
57
+ #endif
58
+
59
+ #ifdef HAVE_RUBY_ENCODING_H
60
+
61
+ #include <ruby/encoding.h>
62
+
63
+ #define NOKOGIRI_STR_NEW2(str) \
64
+ NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
65
+
66
+ #define NOKOGIRI_STR_NEW(str, len) \
67
+ rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
68
+
69
+ #else
70
+
71
+ #define NOKOGIRI_STR_NEW2(str) \
72
+ rb_str_new2((const char *)(str))
73
+
74
+ #define NOKOGIRI_STR_NEW(str, len) \
75
+ rb_str_new((const char *)(str), (long)(len))
76
+ #endif
77
+
78
+ #define RBSTR_OR_QNIL(_str) \
79
+ (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
80
+
81
+ #include <xml_libxml2_hacks.h>
82
+
83
+ #include <xml_io.h>
84
+ #include <xml_document.h>
85
+ #include <html_entity_lookup.h>
86
+ #include <html_document.h>
87
+ #include <xml_node.h>
88
+ #include <xml_text.h>
89
+ #include <xml_cdata.h>
90
+ #include <xml_attr.h>
91
+ #include <xml_processing_instruction.h>
92
+ #include <xml_entity_reference.h>
93
+ #include <xml_document_fragment.h>
94
+ #include <xml_comment.h>
95
+ #include <xml_node_set.h>
96
+ #include <xml_dtd.h>
97
+ #include <xml_attribute_decl.h>
98
+ #include <xml_element_decl.h>
99
+ #include <xml_entity_decl.h>
100
+ #include <xml_xpath_context.h>
101
+ #include <xml_element_content.h>
102
+ #include <xml_sax_parser_context.h>
103
+ #include <xml_sax_parser.h>
104
+ #include <xml_sax_push_parser.h>
105
+ #include <xml_reader.h>
106
+ #include <html_sax_parser_context.h>
107
+ #include <html_sax_push_parser.h>
108
+ #include <xslt_stylesheet.h>
109
+ #include <xml_syntax_error.h>
110
+ #include <xml_schema.h>
111
+ #include <xml_relax_ng.h>
112
+ #include <html_element_description.h>
113
+ #include <xml_namespace.h>
114
+ #include <xml_encoding_handler.h>
115
+
116
+ extern VALUE mNokogiri ;
117
+ extern VALUE mNokogiriXml ;
118
+ extern VALUE mNokogiriXmlSax ;
119
+ extern VALUE mNokogiriHtml ;
120
+ extern VALUE mNokogiriHtmlSax ;
121
+ extern VALUE mNokogiriXslt ;
122
+
123
+ void nokogiri_root_node(xmlNodePtr);
124
+ void nokogiri_root_nsdef(xmlNsPtr, xmlDocPtr);
125
+
126
+ #ifdef DEBUG
127
+
128
+ #define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
129
+ #define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p);
130
+
131
+ #else
132
+
133
+ #define NOKOGIRI_DEBUG_START(p)
134
+ #define NOKOGIRI_DEBUG_END(p)
135
+
136
+ #endif
137
+
138
+ #ifndef RSTRING_PTR
139
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
140
+ #endif
141
+
142
+ #ifndef RSTRING_LEN
143
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
144
+ #endif
145
+
146
+ #ifndef RARRAY_PTR
147
+ #define RARRAY_PTR(a) RARRAY(a)->ptr
148
+ #endif
149
+
150
+ #ifndef RARRAY_LEN
151
+ #define RARRAY_LEN(a) RARRAY(a)->len
152
+ #endif
153
+
154
+ #ifndef __builtin_expect
155
+ # if defined(__GNUC__)
156
+ # define __builtin_expect(expr, c) __builtin_expect((long)(expr), (long)(c))
157
+ # endif
158
+ #endif
159
+
160
+ #define XMLNS_PREFIX "xmlns"
161
+ #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
162
+ #define XMLNS_BUFFER_LEN 128
163
+
164
+ #endif