nokogiri 1.10.7 → 1.16.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +42 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +188 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +862 -421
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +222 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +39 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +408 -243
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +131 -61
  33. data/ext/nokogiri/xml_node.c +1343 -674
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +305 -213
  37. data/ext/nokogiri/xml_relax_ng.c +87 -78
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +149 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +65 -37
  41. data/ext/nokogiri/xml_schema.c +138 -82
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +35 -26
  44. data/ext/nokogiri/xml_xpath_context.c +363 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +126 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3464 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +205 -96
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +326 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +224 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +75 -34
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -127
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1096 -419
  142. data/lib/nokogiri/xml/node_set.rb +137 -61
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +7 -5
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +39 -38
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  171. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  172. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  173. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  174. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  175. metadata +121 -291
  176. data/ext/nokogiri/html_document.c +0 -170
  177. data/ext/nokogiri/html_document.h +0 -10
  178. data/ext/nokogiri/html_element_description.c +0 -279
  179. data/ext/nokogiri/html_element_description.h +0 -10
  180. data/ext/nokogiri/html_entity_lookup.c +0 -32
  181. data/ext/nokogiri/html_entity_lookup.h +0 -8
  182. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  183. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  184. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  185. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  186. data/ext/nokogiri/xml_attr.h +0 -9
  187. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  188. data/ext/nokogiri/xml_cdata.h +0 -9
  189. data/ext/nokogiri/xml_comment.h +0 -9
  190. data/ext/nokogiri/xml_document.h +0 -23
  191. data/ext/nokogiri/xml_document_fragment.h +0 -10
  192. data/ext/nokogiri/xml_dtd.h +0 -10
  193. data/ext/nokogiri/xml_element_content.h +0 -10
  194. data/ext/nokogiri/xml_element_decl.h +0 -9
  195. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  196. data/ext/nokogiri/xml_entity_decl.h +0 -10
  197. data/ext/nokogiri/xml_entity_reference.h +0 -9
  198. data/ext/nokogiri/xml_io.c +0 -61
  199. data/ext/nokogiri/xml_io.h +0 -11
  200. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  201. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  202. data/ext/nokogiri/xml_namespace.h +0 -14
  203. data/ext/nokogiri/xml_node.h +0 -13
  204. data/ext/nokogiri/xml_node_set.h +0 -12
  205. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  206. data/ext/nokogiri/xml_reader.h +0 -10
  207. data/ext/nokogiri/xml_relax_ng.h +0 -9
  208. data/ext/nokogiri/xml_sax_parser.h +0 -39
  209. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  210. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  211. data/ext/nokogiri/xml_schema.h +0 -9
  212. data/ext/nokogiri/xml_syntax_error.h +0 -13
  213. data/ext/nokogiri/xml_text.h +0 -9
  214. data/ext/nokogiri/xml_xpath_context.h +0 -10
  215. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  216. data/lib/nokogiri/html/document.rb +0 -335
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  222. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  223. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  224. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,18 +1,44 @@
1
- #include <xml_sax_parser_context.h>
1
+ #include <nokogiri.h>
2
2
 
3
3
  VALUE cNokogiriXmlSaxParserContext ;
4
4
 
5
- static void deallocate(xmlParserCtxtPtr ctxt)
6
- {
7
- NOKOGIRI_DEBUG_START(handler);
5
+ static ID id_read;
8
6
 
7
+ static void
8
+ xml_sax_parser_context_free(void *data)
9
+ {
10
+ xmlParserCtxtPtr ctxt = data;
9
11
  ctxt->sax = NULL;
10
-
11
12
  xmlFreeParserCtxt(ctxt);
13
+ }
14
+
15
+ /*
16
+ * note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
17
+ * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
+ */
19
+ static const rb_data_type_t xml_sax_parser_context_type = {
20
+ .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
21
+ .function = {
22
+ .dfree = xml_sax_parser_context_free,
23
+ },
24
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
+ };
26
+
27
+ xmlParserCtxtPtr
28
+ noko_xml_sax_parser_context_unwrap(VALUE rb_context)
29
+ {
30
+ xmlParserCtxtPtr c_context;
31
+ TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
32
+ return c_context;
33
+ }
12
34
 
13
- NOKOGIRI_DEBUG_END(handler);
35
+ VALUE
36
+ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
37
+ {
38
+ return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
14
39
  }
15
40
 
41
+
16
42
  /*
17
43
  * call-seq:
18
44
  * parse_io(io, encoding)
@@ -22,19 +48,27 @@ static void deallocate(xmlParserCtxtPtr ctxt)
22
48
  static VALUE
23
49
  parse_io(VALUE klass, VALUE io, VALUE encoding)
24
50
  {
25
- xmlParserCtxtPtr ctxt;
26
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
27
-
28
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
29
- (xmlInputReadCallback)io_read_callback,
30
- (xmlInputCloseCallback)io_close_callback,
31
- (void *)io, enc);
32
- if (ctxt->sax) {
33
- xmlFree(ctxt->sax);
34
- ctxt->sax = NULL;
35
- }
36
-
37
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
51
+ xmlParserCtxtPtr ctxt;
52
+ xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
+
54
+ if (!rb_respond_to(io, id_read)) {
55
+ rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
+ }
57
+
58
+ ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
+ (xmlInputReadCallback)noko_io_read,
60
+ (xmlInputCloseCallback)noko_io_close,
61
+ (void *)io, enc);
62
+ if (!ctxt) {
63
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
64
+ }
65
+
66
+ if (ctxt->sax) {
67
+ xmlFree(ctxt->sax);
68
+ ctxt->sax = NULL;
69
+ }
70
+
71
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
38
72
  }
39
73
 
40
74
  /*
@@ -43,10 +77,17 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
43
77
  *
44
78
  * Parse file given +filename+
45
79
  */
46
- static VALUE parse_file(VALUE klass, VALUE filename)
80
+ static VALUE
81
+ parse_file(VALUE klass, VALUE filename)
47
82
  {
48
83
  xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
49
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
84
+
85
+ if (ctxt->sax) {
86
+ xmlFree(ctxt->sax);
87
+ ctxt->sax = NULL;
88
+ }
89
+
90
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
50
91
  }
51
92
 
52
93
  /*
@@ -58,41 +99,43 @@ static VALUE parse_file(VALUE klass, VALUE filename)
58
99
  static VALUE
59
100
  parse_memory(VALUE klass, VALUE data)
60
101
  {
61
- xmlParserCtxtPtr ctxt;
102
+ xmlParserCtxtPtr ctxt;
103
+
104
+ Check_Type(data, T_STRING);
62
105
 
63
- if (NIL_P(data))
64
- rb_raise(rb_eArgError, "data cannot be nil");
65
- if (!(int)RSTRING_LEN(data))
66
- rb_raise(rb_eRuntimeError, "data cannot be empty");
106
+ if (!(int)RSTRING_LEN(data)) {
107
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
108
+ }
67
109
 
68
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
69
- (int)RSTRING_LEN(data));
70
- if (ctxt->sax) {
71
- xmlFree(ctxt->sax);
72
- ctxt->sax = NULL;
73
- }
110
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
111
+ (int)RSTRING_LEN(data));
112
+ if (ctxt->sax) {
113
+ xmlFree(ctxt->sax);
114
+ ctxt->sax = NULL;
115
+ }
74
116
 
75
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
117
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
76
118
  }
77
119
 
78
120
  static VALUE
79
121
  parse_doc(VALUE ctxt_val)
80
122
  {
81
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
82
- xmlParseDocument(ctxt);
83
- return Qnil;
123
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
124
+ xmlParseDocument(ctxt);
125
+ return Qnil;
84
126
  }
85
127
 
86
128
  static VALUE
87
129
  parse_doc_finalize(VALUE ctxt_val)
88
130
  {
89
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
131
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
90
132
 
91
- if (NULL != ctxt->myDoc)
92
- xmlFreeDoc(ctxt->myDoc);
133
+ if (NULL != ctxt->myDoc) {
134
+ xmlFreeDoc(ctxt->myDoc);
135
+ }
93
136
 
94
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
95
- return Qnil;
137
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
138
+ return Qnil;
96
139
  }
97
140
 
98
141
  /*
@@ -104,25 +147,24 @@ parse_doc_finalize(VALUE ctxt_val)
104
147
  static VALUE
105
148
  parse_with(VALUE self, VALUE sax_handler)
106
149
  {
107
- xmlParserCtxtPtr ctxt;
108
- xmlSAXHandlerPtr sax;
150
+ xmlParserCtxtPtr ctxt;
151
+ xmlSAXHandlerPtr sax;
109
152
 
110
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
111
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
153
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
154
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
155
+ }
112
156
 
113
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
114
- Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
157
+ ctxt = noko_xml_sax_parser_context_unwrap(self);
158
+ sax = noko_sax_handler_unwrap(sax_handler);
115
159
 
116
- /* Free the sax handler since we'll assign our own */
117
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
118
- xmlFree(ctxt->sax);
160
+ ctxt->sax = sax;
161
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
119
162
 
120
- ctxt->sax = sax;
121
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
163
+ xmlSetStructuredErrorFunc(NULL, NULL);
122
164
 
123
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
165
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
124
166
 
125
- return Qnil;
167
+ return Qnil;
126
168
  }
127
169
 
128
170
  /*
@@ -132,15 +174,16 @@ parse_with(VALUE self, VALUE sax_handler)
132
174
  * Should this parser replace entities? &amp; will get converted to '&' if
133
175
  * set to true
134
176
  */
135
- static VALUE set_replace_entities(VALUE self, VALUE value)
177
+ static VALUE
178
+ set_replace_entities(VALUE self, VALUE value)
136
179
  {
137
- xmlParserCtxtPtr ctxt;
138
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
180
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
139
181
 
140
- if(Qfalse == value)
182
+ if (Qfalse == value) {
141
183
  ctxt->replaceEntities = 0;
142
- else
184
+ } else {
143
185
  ctxt->replaceEntities = 1;
186
+ }
144
187
 
145
188
  return value;
146
189
  }
@@ -152,15 +195,16 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
152
195
  * Should this parser replace entities? &amp; will get converted to '&' if
153
196
  * set to true
154
197
  */
155
- static VALUE get_replace_entities(VALUE self)
198
+ static VALUE
199
+ get_replace_entities(VALUE self)
156
200
  {
157
- xmlParserCtxtPtr ctxt;
158
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
201
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
159
202
 
160
- if(0 == ctxt->replaceEntities)
203
+ if (0 == ctxt->replaceEntities) {
161
204
  return Qfalse;
162
- else
205
+ } else {
163
206
  return Qtrue;
207
+ }
164
208
  }
165
209
 
166
210
  /*
@@ -168,16 +212,16 @@ static VALUE get_replace_entities(VALUE self)
168
212
  *
169
213
  * Get the current line the parser context is processing.
170
214
  */
171
- static VALUE line(VALUE self)
215
+ static VALUE
216
+ line(VALUE self)
172
217
  {
173
- xmlParserCtxtPtr ctxt;
174
218
  xmlParserInputPtr io;
175
-
176
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
219
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
177
220
 
178
221
  io = ctxt->input;
179
- if(io)
222
+ if (io) {
180
223
  return INT2NUM(io->line);
224
+ }
181
225
 
182
226
  return Qnil;
183
227
  }
@@ -187,16 +231,16 @@ static VALUE line(VALUE self)
187
231
  *
188
232
  * Get the current column the parser context is processing.
189
233
  */
190
- static VALUE column(VALUE self)
234
+ static VALUE
235
+ column(VALUE self)
191
236
  {
192
- xmlParserCtxtPtr ctxt;
237
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
193
238
  xmlParserInputPtr io;
194
239
 
195
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
196
-
197
240
  io = ctxt->input;
198
- if(io)
241
+ if (io) {
199
242
  return INT2NUM(io->col);
243
+ }
200
244
 
201
245
  return Qnil;
202
246
  }
@@ -208,15 +252,16 @@ static VALUE column(VALUE self)
208
252
  * Should this parser recover from structural errors? It will not stop processing
209
253
  * file on structural errors if set to true
210
254
  */
211
- static VALUE set_recovery(VALUE self, VALUE value)
255
+ static VALUE
256
+ set_recovery(VALUE self, VALUE value)
212
257
  {
213
- xmlParserCtxtPtr ctxt;
214
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
258
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
215
259
 
216
- if(value == Qfalse)
260
+ if (value == Qfalse) {
217
261
  ctxt->recovery = 0;
218
- else
262
+ } else {
219
263
  ctxt->recovery = 1;
264
+ }
220
265
 
221
266
  return value;
222
267
  }
@@ -228,35 +273,36 @@ static VALUE set_recovery(VALUE self, VALUE value)
228
273
  * Should this parser recover from structural errors? It will not stop processing
229
274
  * file on structural errors if set to true
230
275
  */
231
- static VALUE get_recovery(VALUE self)
276
+ static VALUE
277
+ get_recovery(VALUE self)
232
278
  {
233
- xmlParserCtxtPtr ctxt;
234
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
279
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
235
280
 
236
- if(ctxt->recovery == 0)
281
+ if (ctxt->recovery == 0) {
237
282
  return Qfalse;
238
- else
283
+ } else {
239
284
  return Qtrue;
285
+ }
240
286
  }
241
287
 
242
- void init_xml_sax_parser_context()
288
+ void
289
+ noko_init_xml_sax_parser_context(void)
243
290
  {
244
- VALUE nokogiri = rb_define_module("Nokogiri");
245
- VALUE xml = rb_define_module_under(nokogiri, "XML");
246
- VALUE sax = rb_define_module_under(xml, "SAX");
247
- VALUE klass = rb_define_class_under(sax, "ParserContext", rb_cObject);
248
-
249
- cNokogiriXmlSaxParserContext = klass;
250
-
251
- rb_define_singleton_method(klass, "io", parse_io, 2);
252
- rb_define_singleton_method(klass, "memory", parse_memory, 1);
253
- rb_define_singleton_method(klass, "file", parse_file, 1);
254
-
255
- rb_define_method(klass, "parse_with", parse_with, 1);
256
- rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
257
- rb_define_method(klass, "replace_entities", get_replace_entities, 0);
258
- rb_define_method(klass, "recovery=", set_recovery, 1);
259
- rb_define_method(klass, "recovery", get_recovery, 0);
260
- rb_define_method(klass, "line", line, 0);
261
- rb_define_method(klass, "column", column, 0);
291
+ cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
292
+
293
+ rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
294
+
295
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
296
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
297
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
298
+
299
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
300
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
301
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
302
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
303
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
304
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
305
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
306
+
307
+ id_read = rb_intern("read");
262
308
  }
@@ -1,18 +1,37 @@
1
- #include <xml_sax_push_parser.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void deallocate(xmlParserCtxtPtr ctx)
3
+ VALUE cNokogiriXmlSaxPushParser ;
4
+
5
+ static void
6
+ xml_sax_push_parser_free(void *data)
4
7
  {
5
- NOKOGIRI_DEBUG_START(ctx);
8
+ xmlParserCtxtPtr ctx = data;
6
9
  if (ctx != NULL) {
7
10
  NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
8
11
  xmlFreeParserCtxt(ctx);
9
12
  }
10
- NOKOGIRI_DEBUG_END(ctx);
11
13
  }
12
14
 
13
- static VALUE allocate(VALUE klass)
15
+ static const rb_data_type_t xml_sax_push_parser_type = {
16
+ .wrap_struct_name = "Nokogiri::XML::SAX::PushParser",
17
+ .function = {
18
+ .dfree = xml_sax_push_parser_free,
19
+ },
20
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
21
+ };
22
+
23
+ static VALUE
24
+ allocate(VALUE klass)
25
+ {
26
+ return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
27
+ }
28
+
29
+ xmlParserCtxtPtr
30
+ noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
14
31
  {
15
- return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
32
+ xmlParserCtxtPtr c_parser;
33
+ TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
34
+ return c_parser;
16
35
  }
17
36
 
18
37
  /*
@@ -21,23 +40,26 @@ static VALUE allocate(VALUE klass)
21
40
  *
22
41
  * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
23
42
  */
24
- static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
43
+ static VALUE
44
+ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
25
45
  {
26
46
  xmlParserCtxtPtr ctx;
27
- const char * chunk = NULL;
47
+ const char *chunk = NULL;
28
48
  int size = 0;
29
49
 
30
50
 
31
- Data_Get_Struct(self, xmlParserCtxt, ctx);
51
+ ctx = noko_xml_sax_push_parser_unwrap(self);
32
52
 
33
53
  if (Qnil != _chunk) {
34
54
  chunk = StringValuePtr(_chunk);
35
55
  size = (int)RSTRING_LEN(_chunk);
36
56
  }
37
57
 
58
+ xmlSetStructuredErrorFunc(NULL, NULL);
59
+
38
60
  if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
39
61
  if (!(ctx->options & XML_PARSE_RECOVER)) {
40
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
62
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
41
63
  Nokogiri_error_raise(NULL, e);
42
64
  }
43
65
  }
@@ -51,13 +73,14 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
51
73
  *
52
74
  * Initialize the push parser with +xml_sax+ using +filename+
53
75
  */
54
- static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
76
+ static VALUE
77
+ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
55
78
  {
56
79
  xmlSAXHandlerPtr sax;
57
- const char * filename = NULL;
80
+ const char *filename = NULL;
58
81
  xmlParserCtxtPtr ctx;
59
82
 
60
- Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
83
+ sax = noko_sax_handler_unwrap(_xml_sax);
61
84
 
62
85
  if (_filename != Qnil) { filename = StringValueCStr(_filename); }
63
86
 
@@ -79,18 +102,22 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
79
102
  return self;
80
103
  }
81
104
 
82
- static VALUE get_options(VALUE self)
105
+ static VALUE
106
+ get_options(VALUE self)
83
107
  {
84
108
  xmlParserCtxtPtr ctx;
85
- Data_Get_Struct(self, xmlParserCtxt, ctx);
109
+
110
+ ctx = noko_xml_sax_push_parser_unwrap(self);
86
111
 
87
112
  return INT2NUM(ctx->options);
88
113
  }
89
114
 
90
- static VALUE set_options(VALUE self, VALUE options)
115
+ static VALUE
116
+ set_options(VALUE self, VALUE options)
91
117
  {
92
118
  xmlParserCtxtPtr ctx;
93
- Data_Get_Struct(self, xmlParserCtxt, ctx);
119
+
120
+ ctx = noko_xml_sax_push_parser_unwrap(self);
94
121
 
95
122
  if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
96
123
  rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
@@ -106,10 +133,12 @@ static VALUE set_options(VALUE self, VALUE options)
106
133
  * Should this parser replace entities? &amp; will get converted to '&' if
107
134
  * set to true
108
135
  */
109
- static VALUE get_replace_entities(VALUE self)
136
+ static VALUE
137
+ get_replace_entities(VALUE self)
110
138
  {
111
139
  xmlParserCtxtPtr ctx;
112
- Data_Get_Struct(self, xmlParserCtxt, ctx);
140
+
141
+ ctx = noko_xml_sax_push_parser_unwrap(self);
113
142
 
114
143
  if (0 == ctx->replaceEntities) {
115
144
  return Qfalse;
@@ -125,10 +154,12 @@ static VALUE get_replace_entities(VALUE self)
125
154
  * Should this parser replace entities? &amp; will get converted to '&' if
126
155
  * set to true
127
156
  */
128
- static VALUE set_replace_entities(VALUE self, VALUE value)
157
+ static VALUE
158
+ set_replace_entities(VALUE self, VALUE value)
129
159
  {
130
160
  xmlParserCtxtPtr ctx;
131
- Data_Get_Struct(self, xmlParserCtxt, ctx);
161
+
162
+ ctx = noko_xml_sax_push_parser_unwrap(self);
132
163
 
133
164
  if (Qfalse == value) {
134
165
  ctx->replaceEntities = 0;
@@ -139,21 +170,18 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
139
170
  return value;
140
171
  }
141
172
 
142
- VALUE cNokogiriXmlSaxPushParser ;
143
- void init_xml_sax_push_parser()
173
+ void
174
+ noko_init_xml_sax_push_parser(void)
144
175
  {
145
- VALUE nokogiri = rb_define_module("Nokogiri");
146
- VALUE xml = rb_define_module_under(nokogiri, "XML");
147
- VALUE sax = rb_define_module_under(xml, "SAX");
148
- VALUE klass = rb_define_class_under(sax, "PushParser", rb_cObject);
149
-
150
- cNokogiriXmlSaxPushParser = klass;
151
-
152
- rb_define_alloc_func(klass, allocate);
153
- rb_define_private_method(klass, "initialize_native", initialize_native, 2);
154
- rb_define_private_method(klass, "native_write", native_write, 2);
155
- rb_define_method(klass, "options", get_options, 0);
156
- rb_define_method(klass, "options=", set_options, 1);
157
- rb_define_method(klass, "replace_entities", get_replace_entities, 0);
158
- rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
176
+ cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
177
+
178
+ rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
179
+
180
+ rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
181
+ rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
182
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
183
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
184
+
185
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
186
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
159
187
  }