nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,310 +1,403 @@
1
- #include <xml_sax_parser.h>
1
+ #include <nokogiri.h>
2
2
 
3
- int vasprintf (char **strp, const char *fmt, va_list ap);
4
- void vasprintf_free (void *p);
3
+ VALUE cNokogiriXmlSaxParser ;
5
4
 
6
- static ID id_start_document, id_end_document, id_start_element, id_end_element;
7
- static ID id_start_element_namespace, id_end_element_namespace;
8
- static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
9
- static ID id_cdata_block, id_cAttribute;
5
+ static ID id_start_document;
6
+ static ID id_end_document;
7
+ static ID id_start_element;
8
+ static ID id_end_element;
9
+ static ID id_start_element_namespace;
10
+ static ID id_end_element_namespace;
11
+ static ID id_comment;
12
+ static ID id_characters;
13
+ static ID id_xmldecl;
14
+ static ID id_error;
15
+ static ID id_warning;
16
+ static ID id_cdata_block;
10
17
  static ID id_processing_instruction;
18
+ static ID id_reference;
19
+
20
+ static size_t
21
+ xml_sax_parser_memsize(const void *data)
22
+ {
23
+ return sizeof(xmlSAXHandler);
24
+ }
25
+
26
+ /* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
27
+ static const rb_data_type_t xml_sax_parser_type = {
28
+ .wrap_struct_name = "xmlSAXHandler",
29
+ .function = {
30
+ .dfree = RUBY_TYPED_DEFAULT_FREE,
31
+ .dsize = xml_sax_parser_memsize
32
+ },
33
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
34
+ };
11
35
 
12
- static void start_document(void * ctx)
36
+ static void
37
+ noko_xml_sax_parser_start_document_callback(void *ctx)
13
38
  {
14
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
39
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
40
+ VALUE self = (VALUE)ctxt->_private;
15
41
  VALUE doc = rb_iv_get(self, "@document");
16
42
 
17
- xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
18
-
19
- if(NULL != ctxt && ctxt->html != 1) {
20
- if(ctxt->standalone != -1) { /* -1 means there was no declaration */
21
- VALUE encoding = Qnil ;
22
- VALUE standalone = Qnil;
23
- VALUE version;
24
- if (ctxt->encoding) {
25
- encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
26
- } else if (ctxt->input && ctxt->input->encoding) {
27
- encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
28
- }
29
-
30
- version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
31
-
32
- switch(ctxt->standalone)
33
- {
34
- case 0:
35
- standalone = NOKOGIRI_STR_NEW2("no");
36
- break;
37
- case 1:
38
- standalone = NOKOGIRI_STR_NEW2("yes");
39
- break;
40
- }
41
-
42
- rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
43
+ xmlSAX2StartDocument(ctx);
44
+
45
+ if (ctxt->standalone != -1) { /* -1 means there was no declaration */
46
+ VALUE encoding = Qnil ;
47
+ VALUE standalone = Qnil;
48
+ VALUE version;
49
+
50
+ if (ctxt->encoding) {
51
+ encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
52
+ } else if (ctxt->input && ctxt->input->encoding) { // unnecessary after v2.12.0 / gnome/libxml2@ec7be506
53
+ encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
54
+ }
55
+
56
+ version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
57
+
58
+ /* TODO try using xmlSAX2IsStandalone */
59
+ switch (ctxt->standalone) {
60
+ case 0:
61
+ standalone = NOKOGIRI_STR_NEW2("no");
62
+ break;
63
+ case 1:
64
+ standalone = NOKOGIRI_STR_NEW2("yes");
65
+ break;
43
66
  }
67
+
68
+ rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
44
69
  }
45
70
 
46
71
  rb_funcall(doc, id_start_document, 0);
47
72
  }
48
73
 
49
- static void end_document(void * ctx)
74
+ static void
75
+ noko_xml_sax_parser_end_document_callback(void *ctx)
50
76
  {
51
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
77
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
78
+ VALUE self = (VALUE)ctxt->_private;
52
79
  VALUE doc = rb_iv_get(self, "@document");
80
+
53
81
  rb_funcall(doc, id_end_document, 0);
54
82
  }
55
83
 
56
- static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
84
+ static void
85
+ noko_xml_sax_parser_start_element_callback(void *ctx, const xmlChar *name, const xmlChar **atts)
57
86
  {
58
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
87
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
88
+ VALUE self = (VALUE)ctxt->_private;
59
89
  VALUE doc = rb_iv_get(self, "@document");
90
+
60
91
  VALUE attributes = rb_ary_new();
61
- const xmlChar * attr;
92
+ const xmlChar *attr;
62
93
  int i = 0;
63
- if(atts) {
64
- while((attr = atts[i]) != NULL) {
65
- const xmlChar * val = atts[i+1];
94
+ if (atts) {
95
+ while ((attr = atts[i]) != NULL) {
96
+ const xmlChar *val = atts[i + 1];
66
97
  VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
67
98
  rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
68
- i+=2;
99
+ i += 2;
69
100
  }
70
101
  }
71
102
 
72
- rb_funcall( doc,
73
- id_start_element,
74
- 2,
75
- NOKOGIRI_STR_NEW2(name),
76
- attributes
77
- );
103
+ rb_funcall(doc,
104
+ id_start_element,
105
+ 2,
106
+ NOKOGIRI_STR_NEW2(name),
107
+ attributes
108
+ );
78
109
  }
79
110
 
80
- static void end_element(void * ctx, const xmlChar *name)
111
+ static void
112
+ noko_xml_sax_parser_end_element_callback(void *ctx, const xmlChar *name)
81
113
  {
82
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
114
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
115
+ VALUE self = (VALUE)ctxt->_private;
83
116
  VALUE doc = rb_iv_get(self, "@document");
117
+
84
118
  rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
85
119
  }
86
120
 
87
- static VALUE attributes_as_list(
88
- VALUE self,
89
- int nb_attributes,
90
- const xmlChar ** attributes)
121
+ static VALUE
122
+ xml_sax_parser_marshal_attributes(int attributes_len, const xmlChar **c_attributes)
91
123
  {
92
- VALUE list = rb_ary_new2((long)nb_attributes);
124
+ VALUE rb_array = rb_ary_new2((long)attributes_len);
125
+ VALUE cNokogiriXmlSaxParserAttribute;
93
126
 
94
- VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
95
- if (attributes) {
127
+ cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute"));
128
+ if (c_attributes) {
96
129
  /* Each attribute is an array of [localname, prefix, URI, value, end] */
97
130
  int i;
98
- for (i = 0; i < nb_attributes * 5; i += 5) {
99
- VALUE argv[4], attribute;
131
+ for (i = 0; i < attributes_len * 5; i += 5) {
132
+ VALUE rb_constructor_args[4], rb_attribute;
100
133
 
101
- argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
102
- argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
103
- argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
134
+ rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */
135
+ rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */
136
+ rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */
104
137
 
105
138
  /* value */
106
- argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
107
- (attributes[i+4] - attributes[i+3]));
139
+ rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3],
140
+ (c_attributes[i + 4] - c_attributes[i + 3]));
108
141
 
109
- attribute = rb_class_new_instance(4, argv, attr_klass);
110
- rb_ary_push(list, attribute);
142
+ rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute);
143
+ rb_ary_push(rb_array, rb_attribute);
111
144
  }
112
145
  }
113
146
 
114
- return list;
147
+ return rb_array;
115
148
  }
116
149
 
117
150
  static void
118
- start_element_ns (
119
- void * ctx,
120
- const xmlChar * localname,
121
- const xmlChar * prefix,
122
- const xmlChar * uri,
151
+ noko_xml_sax_parser_start_element_ns_callback(
152
+ void *ctx,
153
+ const xmlChar *localname,
154
+ const xmlChar *prefix,
155
+ const xmlChar *uri,
123
156
  int nb_namespaces,
124
- const xmlChar ** namespaces,
157
+ const xmlChar **namespaces,
125
158
  int nb_attributes,
126
159
  int nb_defaulted,
127
- const xmlChar ** attributes)
160
+ const xmlChar **attributes)
128
161
  {
129
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
162
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
163
+ VALUE self = (VALUE)ctxt->_private;
130
164
  VALUE doc = rb_iv_get(self, "@document");
131
165
 
132
- VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
166
+ VALUE attribute_ary = xml_sax_parser_marshal_attributes(nb_attributes, attributes);
133
167
 
134
168
  VALUE ns_list = rb_ary_new2((long)nb_namespaces);
135
169
 
136
170
  if (namespaces) {
137
171
  int i;
138
- for (i = 0; i < nb_namespaces * 2; i += 2)
139
- {
172
+ for (i = 0; i < nb_namespaces * 2; i += 2) {
140
173
  rb_ary_push(ns_list,
141
- rb_ary_new3((long)2,
142
- RBSTR_OR_QNIL(namespaces[i + 0]),
143
- RBSTR_OR_QNIL(namespaces[i + 1])
144
- )
145
- );
174
+ rb_ary_new3((long)2,
175
+ RBSTR_OR_QNIL(namespaces[i + 0]),
176
+ RBSTR_OR_QNIL(namespaces[i + 1])
177
+ )
178
+ );
146
179
  }
147
180
  }
148
181
 
149
- rb_funcall( doc,
150
- id_start_element_namespace,
151
- 5,
152
- NOKOGIRI_STR_NEW2(localname),
153
- attribute_list,
154
- RBSTR_OR_QNIL(prefix),
155
- RBSTR_OR_QNIL(uri),
156
- ns_list
157
- );
182
+ rb_funcall(doc,
183
+ id_start_element_namespace,
184
+ 5,
185
+ NOKOGIRI_STR_NEW2(localname),
186
+ attribute_ary,
187
+ RBSTR_OR_QNIL(prefix),
188
+ RBSTR_OR_QNIL(uri),
189
+ ns_list
190
+ );
158
191
  }
159
192
 
160
193
  /**
161
194
  * end_element_ns was borrowed heavily from libxml-ruby.
162
195
  */
163
196
  static void
164
- end_element_ns (
165
- void * ctx,
166
- const xmlChar * localname,
167
- const xmlChar * prefix,
168
- const xmlChar * uri)
197
+ noko_xml_sax_parser_end_element_ns_callback(
198
+ void *ctx,
199
+ const xmlChar *localname,
200
+ const xmlChar *prefix,
201
+ const xmlChar *uri)
169
202
  {
170
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
203
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
204
+ VALUE self = (VALUE)ctxt->_private;
171
205
  VALUE doc = rb_iv_get(self, "@document");
172
206
 
173
207
  rb_funcall(doc, id_end_element_namespace, 3,
174
- NOKOGIRI_STR_NEW2(localname),
175
- RBSTR_OR_QNIL(prefix),
176
- RBSTR_OR_QNIL(uri)
177
- );
208
+ NOKOGIRI_STR_NEW2(localname),
209
+ RBSTR_OR_QNIL(prefix),
210
+ RBSTR_OR_QNIL(uri)
211
+ );
178
212
  }
179
213
 
180
- static void characters_func(void * ctx, const xmlChar * ch, int len)
214
+ static void
215
+ noko_xml_sax_parser_characters_callback(void *ctx, const xmlChar *ch, int len)
181
216
  {
182
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
217
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
218
+ VALUE self = (VALUE)ctxt->_private;
183
219
  VALUE doc = rb_iv_get(self, "@document");
220
+
184
221
  VALUE str = NOKOGIRI_STR_NEW(ch, len);
185
222
  rb_funcall(doc, id_characters, 1, str);
186
223
  }
187
224
 
188
- static void comment_func(void * ctx, const xmlChar * value)
225
+ static void
226
+ noko_xml_sax_parser_comment_callback(void *ctx, const xmlChar *value)
189
227
  {
190
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
228
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
229
+ VALUE self = (VALUE)ctxt->_private;
191
230
  VALUE doc = rb_iv_get(self, "@document");
231
+
192
232
  VALUE str = NOKOGIRI_STR_NEW2(value);
193
233
  rb_funcall(doc, id_comment, 1, str);
194
234
  }
195
235
 
196
- static void warning_func(void * ctx, const char *msg, ...)
236
+ PRINTFLIKE_DECL(2, 3)
237
+ static void
238
+ noko_xml_sax_parser_warning_callback(void *ctx, const char *msg, ...)
197
239
  {
198
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
240
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
241
+ VALUE self = (VALUE)ctxt->_private;
199
242
  VALUE doc = rb_iv_get(self, "@document");
200
- char * message;
201
- VALUE ruby_message;
202
243
 
244
+ VALUE rb_message;
245
+
246
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
247
+ /* It is not currently possible to pass var args from native
248
+ functions to sulong, so we work around the issue here. */
249
+ rb_message = rb_sprintf("warning_func: %s", msg);
250
+ #else
203
251
  va_list args;
204
252
  va_start(args, msg);
205
- vasprintf(&message, msg, args);
253
+ rb_message = rb_vsprintf(msg, args);
206
254
  va_end(args);
255
+ #endif
207
256
 
208
- ruby_message = NOKOGIRI_STR_NEW2(message);
209
- vasprintf_free(message);
210
- rb_funcall(doc, id_warning, 1, ruby_message);
257
+ rb_funcall(doc, id_warning, 1, rb_message);
211
258
  }
212
259
 
213
- static void error_func(void * ctx, const char *msg, ...)
260
+ PRINTFLIKE_DECL(2, 3)
261
+ static void
262
+ noko_xml_sax_parser_error_callback(void *ctx, const char *msg, ...)
214
263
  {
215
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
264
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
265
+ VALUE self = (VALUE)ctxt->_private;
216
266
  VALUE doc = rb_iv_get(self, "@document");
217
- char * message;
218
- VALUE ruby_message;
219
267
 
268
+ VALUE rb_message;
269
+
270
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
271
+ /* It is not currently possible to pass var args from native
272
+ functions to sulong, so we work around the issue here. */
273
+ rb_message = rb_sprintf("error_func: %s", msg);
274
+ #else
220
275
  va_list args;
221
276
  va_start(args, msg);
222
- vasprintf(&message, msg, args);
277
+ rb_message = rb_vsprintf(msg, args);
223
278
  va_end(args);
279
+ #endif
224
280
 
225
- ruby_message = NOKOGIRI_STR_NEW2(message);
226
- vasprintf_free(message);
227
- rb_funcall(doc, id_error, 1, ruby_message);
281
+ rb_funcall(doc, id_error, 1, rb_message);
228
282
  }
229
283
 
230
- static void cdata_block(void * ctx, const xmlChar * value, int len)
284
+ static void
285
+ noko_xml_sax_parser_cdata_block_callback(void *ctx, const xmlChar *value, int len)
231
286
  {
232
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
287
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
288
+ VALUE self = (VALUE)ctxt->_private;
233
289
  VALUE doc = rb_iv_get(self, "@document");
290
+
234
291
  VALUE string = NOKOGIRI_STR_NEW(value, len);
235
292
  rb_funcall(doc, id_cdata_block, 1, string);
236
293
  }
237
294
 
238
- static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
295
+ static void
296
+ noko_xml_sax_parser_processing_instruction_callback(void *ctx, const xmlChar *name, const xmlChar *content)
239
297
  {
240
- VALUE rb_content;
241
- VALUE self = NOKOGIRI_SAX_SELF(ctx);
298
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
299
+ VALUE self = (VALUE)ctxt->_private;
242
300
  VALUE doc = rb_iv_get(self, "@document");
243
301
 
244
- rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
302
+ VALUE rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
245
303
 
246
- rb_funcall( doc,
247
- id_processing_instruction,
248
- 2,
249
- NOKOGIRI_STR_NEW2(name),
250
- rb_content
251
- );
304
+ rb_funcall(doc,
305
+ id_processing_instruction,
306
+ 2,
307
+ NOKOGIRI_STR_NEW2(name),
308
+ rb_content
309
+ );
252
310
  }
253
311
 
254
- static void deallocate(xmlSAXHandlerPtr handler)
312
+ static void
313
+ noko_xml_sax_parser_reference_callback(void *ctx, const xmlChar *name)
255
314
  {
256
- NOKOGIRI_DEBUG_START(handler);
257
- free(handler);
258
- NOKOGIRI_DEBUG_END(handler);
315
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
316
+ xmlEntityPtr entity = xmlSAX2GetEntity(ctxt, name);
317
+
318
+ VALUE self = (VALUE)ctxt->_private;
319
+ VALUE doc = rb_iv_get(self, "@document");
320
+
321
+ if (entity && entity->content) {
322
+ rb_funcall(doc, id_reference, 2, NOKOGIRI_STR_NEW2(entity->name), NOKOGIRI_STR_NEW2(entity->content));
323
+ } else {
324
+ rb_funcall(doc, id_reference, 2, NOKOGIRI_STR_NEW2(name), Qnil);
325
+ }
259
326
  }
260
327
 
261
- static VALUE allocate(VALUE klass)
328
+ static VALUE
329
+ noko_xml_sax_parser__initialize_native(VALUE self)
262
330
  {
263
- xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
264
-
265
- xmlSetStructuredErrorFunc(NULL, NULL);
266
-
267
- handler->startDocument = start_document;
268
- handler->endDocument = end_document;
269
- handler->startElement = start_element;
270
- handler->endElement = end_element;
271
- handler->startElementNs = start_element_ns;
272
- handler->endElementNs = end_element_ns;
273
- handler->characters = characters_func;
274
- handler->comment = comment_func;
275
- handler->warning = warning_func;
276
- handler->error = error_func;
277
- handler->cdataBlock = cdata_block;
278
- handler->processingInstruction = processing_instruction;
331
+ xmlSAXHandlerPtr handler = noko_xml_sax_parser_unwrap(self);
332
+
333
+ handler->startDocument = noko_xml_sax_parser_start_document_callback;
334
+ handler->endDocument = noko_xml_sax_parser_end_document_callback;
335
+ handler->startElement = noko_xml_sax_parser_start_element_callback;
336
+ handler->endElement = noko_xml_sax_parser_end_element_callback;
337
+ handler->startElementNs = noko_xml_sax_parser_start_element_ns_callback;
338
+ handler->endElementNs = noko_xml_sax_parser_end_element_ns_callback;
339
+ handler->characters = noko_xml_sax_parser_characters_callback;
340
+ handler->comment = noko_xml_sax_parser_comment_callback;
341
+ handler->warning = noko_xml_sax_parser_warning_callback;
342
+ handler->error = noko_xml_sax_parser_error_callback;
343
+ handler->cdataBlock = noko_xml_sax_parser_cdata_block_callback;
344
+ handler->processingInstruction = noko_xml_sax_parser_processing_instruction_callback;
345
+ handler->reference = noko_xml_sax_parser_reference_callback;
346
+
347
+ /* use some of libxml2's default callbacks to managed DTDs and entities */
348
+ handler->getEntity = xmlSAX2GetEntity;
349
+ handler->internalSubset = xmlSAX2InternalSubset;
350
+ handler->externalSubset = xmlSAX2ExternalSubset;
351
+ handler->isStandalone = xmlSAX2IsStandalone;
352
+ handler->hasInternalSubset = xmlSAX2HasInternalSubset;
353
+ handler->hasExternalSubset = xmlSAX2HasExternalSubset;
354
+ handler->resolveEntity = xmlSAX2ResolveEntity;
355
+ handler->getParameterEntity = xmlSAX2GetParameterEntity;
356
+ handler->entityDecl = xmlSAX2EntityDecl;
357
+ handler->unparsedEntityDecl = xmlSAX2UnparsedEntityDecl;
358
+
279
359
  handler->initialized = XML_SAX2_MAGIC;
280
360
 
281
- return Data_Wrap_Struct(klass, NULL, deallocate, handler);
361
+ return self;
282
362
  }
283
363
 
284
- VALUE cNokogiriXmlSaxParser ;
285
- void init_xml_sax_parser()
364
+ static VALUE
365
+ noko_xml_sax_parser_allocate(VALUE klass)
366
+ {
367
+ xmlSAXHandlerPtr handler;
368
+ return TypedData_Make_Struct(klass, xmlSAXHandler, &xml_sax_parser_type, handler);
369
+ }
370
+
371
+ xmlSAXHandlerPtr
372
+ noko_xml_sax_parser_unwrap(VALUE rb_sax_handler)
373
+ {
374
+ xmlSAXHandlerPtr c_sax_handler;
375
+ TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &xml_sax_parser_type, c_sax_handler);
376
+ return c_sax_handler;
377
+ }
378
+
379
+ void
380
+ noko_init_xml_sax_parser(void)
286
381
  {
287
- VALUE nokogiri = rb_define_module("Nokogiri");
288
- VALUE xml = rb_define_module_under(nokogiri, "XML");
289
- VALUE sax = rb_define_module_under(xml, "SAX");
290
- VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
382
+ cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
291
383
 
292
- cNokogiriXmlSaxParser = klass;
384
+ rb_define_alloc_func(cNokogiriXmlSaxParser, noko_xml_sax_parser_allocate);
293
385
 
294
- rb_define_alloc_func(klass, allocate);
386
+ rb_define_private_method(cNokogiriXmlSaxParser, "initialize_native",
387
+ noko_xml_sax_parser__initialize_native, 0);
295
388
 
296
389
  id_start_document = rb_intern("start_document");
297
- id_end_document = rb_intern("end_document");
298
- id_start_element = rb_intern("start_element");
299
- id_end_element = rb_intern("end_element");
300
- id_comment = rb_intern("comment");
301
- id_characters = rb_intern("characters");
302
- id_xmldecl = rb_intern("xmldecl");
303
- id_error = rb_intern("error");
304
- id_warning = rb_intern("warning");
305
- id_cdata_block = rb_intern("cdata_block");
306
- id_cAttribute = rb_intern("Attribute");
390
+ id_end_document = rb_intern("end_document");
391
+ id_start_element = rb_intern("start_element");
392
+ id_end_element = rb_intern("end_element");
393
+ id_comment = rb_intern("comment");
394
+ id_characters = rb_intern("characters");
395
+ id_xmldecl = rb_intern("xmldecl");
396
+ id_error = rb_intern("error");
397
+ id_warning = rb_intern("warning");
398
+ id_cdata_block = rb_intern("cdata_block");
307
399
  id_start_element_namespace = rb_intern("start_element_namespace");
308
400
  id_end_element_namespace = rb_intern("end_element_namespace");
309
401
  id_processing_instruction = rb_intern("processing_instruction");
402
+ id_reference = rb_intern("reference");
310
403
  }