nokogiri 1.9.1 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +45 -0
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -89
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +864 -418
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -240
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +135 -61
  33. data/ext/nokogiri/xml_node.c +1346 -677
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1088 -418
  142. data/lib/nokogiri/xml/node_set.rb +173 -63
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +128 -265
  178. data/ext/nokogiri/html_document.c +0 -170
  179. data/ext/nokogiri/html_document.h +0 -10
  180. data/ext/nokogiri/html_element_description.c +0 -279
  181. data/ext/nokogiri/html_element_description.h +0 -10
  182. data/ext/nokogiri/html_entity_lookup.c +0 -32
  183. data/ext/nokogiri/html_entity_lookup.h +0 -8
  184. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  185. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  186. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  187. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  188. data/ext/nokogiri/xml_attr.h +0 -9
  189. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  190. data/ext/nokogiri/xml_cdata.h +0 -9
  191. data/ext/nokogiri/xml_comment.h +0 -9
  192. data/ext/nokogiri/xml_document.h +0 -23
  193. data/ext/nokogiri/xml_document_fragment.h +0 -10
  194. data/ext/nokogiri/xml_dtd.h +0 -10
  195. data/ext/nokogiri/xml_element_content.h +0 -10
  196. data/ext/nokogiri/xml_element_decl.h +0 -9
  197. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  198. data/ext/nokogiri/xml_entity_decl.h +0 -10
  199. data/ext/nokogiri/xml_entity_reference.h +0 -9
  200. data/ext/nokogiri/xml_io.c +0 -61
  201. data/ext/nokogiri/xml_io.h +0 -11
  202. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  203. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  204. data/ext/nokogiri/xml_namespace.h +0 -14
  205. data/ext/nokogiri/xml_node.h +0 -13
  206. data/ext/nokogiri/xml_node_set.h +0 -12
  207. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  208. data/ext/nokogiri/xml_reader.h +0 -10
  209. data/ext/nokogiri/xml_relax_ng.h +0 -9
  210. data/ext/nokogiri/xml_sax_parser.h +0 -39
  211. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  212. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  213. data/ext/nokogiri/xml_schema.h +0 -9
  214. data/ext/nokogiri/xml_syntax_error.h +0 -13
  215. data/ext/nokogiri/xml_text.h +0 -9
  216. data/ext/nokogiri/xml_xpath_context.h +0 -10
  217. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  218. data/lib/nokogiri/html/document.rb +0 -335
  219. data/lib/nokogiri/html/document_fragment.rb +0 -49
  220. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  221. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  222. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  223. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  224. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  225. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  226. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
@@ -1,18 +1,44 @@
1
- #include <xml_sax_parser_context.h>
1
+ #include <nokogiri.h>
2
2
 
3
3
  VALUE cNokogiriXmlSaxParserContext ;
4
4
 
5
- static void deallocate(xmlParserCtxtPtr ctxt)
6
- {
7
- NOKOGIRI_DEBUG_START(handler);
5
+ static ID id_read;
8
6
 
7
+ static void
8
+ xml_sax_parser_context_free(void *data)
9
+ {
10
+ xmlParserCtxtPtr ctxt = data;
9
11
  ctxt->sax = NULL;
10
-
11
12
  xmlFreeParserCtxt(ctxt);
13
+ }
14
+
15
+ /*
16
+ * note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
17
+ * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
+ */
19
+ static const rb_data_type_t xml_sax_parser_context_type = {
20
+ .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
21
+ .function = {
22
+ .dfree = xml_sax_parser_context_free,
23
+ },
24
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
+ };
26
+
27
+ xmlParserCtxtPtr
28
+ noko_xml_sax_parser_context_unwrap(VALUE rb_context)
29
+ {
30
+ xmlParserCtxtPtr c_context;
31
+ TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
32
+ return c_context;
33
+ }
12
34
 
13
- NOKOGIRI_DEBUG_END(handler);
35
+ VALUE
36
+ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
37
+ {
38
+ return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
14
39
  }
15
40
 
41
+
16
42
  /*
17
43
  * call-seq:
18
44
  * parse_io(io, encoding)
@@ -22,19 +48,23 @@ static void deallocate(xmlParserCtxtPtr ctxt)
22
48
  static VALUE
23
49
  parse_io(VALUE klass, VALUE io, VALUE encoding)
24
50
  {
25
- xmlParserCtxtPtr ctxt;
26
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
27
-
28
- ctxt = xmlCreateIOParserCtxt(NULL, NULL,
29
- (xmlInputReadCallback)io_read_callback,
30
- (xmlInputCloseCallback)io_close_callback,
31
- (void *)io, enc);
32
- if (ctxt->sax) {
33
- xmlFree(ctxt->sax);
34
- ctxt->sax = NULL;
35
- }
36
-
37
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
51
+ xmlParserCtxtPtr ctxt;
52
+ xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
53
+
54
+ if (!rb_respond_to(io, id_read)) {
55
+ rb_raise(rb_eTypeError, "argument expected to respond to :read");
56
+ }
57
+
58
+ ctxt = xmlCreateIOParserCtxt(NULL, NULL,
59
+ (xmlInputReadCallback)noko_io_read,
60
+ (xmlInputCloseCallback)noko_io_close,
61
+ (void *)io, enc);
62
+ if (ctxt->sax) {
63
+ xmlFree(ctxt->sax);
64
+ ctxt->sax = NULL;
65
+ }
66
+
67
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
38
68
  }
39
69
 
40
70
  /*
@@ -43,10 +73,17 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
43
73
  *
44
74
  * Parse file given +filename+
45
75
  */
46
- static VALUE parse_file(VALUE klass, VALUE filename)
76
+ static VALUE
77
+ parse_file(VALUE klass, VALUE filename)
47
78
  {
48
79
  xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
49
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
80
+
81
+ if (ctxt->sax) {
82
+ xmlFree(ctxt->sax);
83
+ ctxt->sax = NULL;
84
+ }
85
+
86
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
50
87
  }
51
88
 
52
89
  /*
@@ -58,41 +95,43 @@ static VALUE parse_file(VALUE klass, VALUE filename)
58
95
  static VALUE
59
96
  parse_memory(VALUE klass, VALUE data)
60
97
  {
61
- xmlParserCtxtPtr ctxt;
98
+ xmlParserCtxtPtr ctxt;
99
+
100
+ Check_Type(data, T_STRING);
62
101
 
63
- if (NIL_P(data))
64
- rb_raise(rb_eArgError, "data cannot be nil");
65
- if (!(int)RSTRING_LEN(data))
66
- rb_raise(rb_eRuntimeError, "data cannot be empty");
102
+ if (!(int)RSTRING_LEN(data)) {
103
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
104
+ }
67
105
 
68
- ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
69
- (int)RSTRING_LEN(data));
70
- if (ctxt->sax) {
71
- xmlFree(ctxt->sax);
72
- ctxt->sax = NULL;
73
- }
106
+ ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
107
+ (int)RSTRING_LEN(data));
108
+ if (ctxt->sax) {
109
+ xmlFree(ctxt->sax);
110
+ ctxt->sax = NULL;
111
+ }
74
112
 
75
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
113
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
76
114
  }
77
115
 
78
116
  static VALUE
79
117
  parse_doc(VALUE ctxt_val)
80
118
  {
81
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
82
- xmlParseDocument(ctxt);
83
- return Qnil;
119
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
120
+ xmlParseDocument(ctxt);
121
+ return Qnil;
84
122
  }
85
123
 
86
124
  static VALUE
87
125
  parse_doc_finalize(VALUE ctxt_val)
88
126
  {
89
- xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
127
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
90
128
 
91
- if (NULL != ctxt->myDoc)
92
- xmlFreeDoc(ctxt->myDoc);
129
+ if (NULL != ctxt->myDoc) {
130
+ xmlFreeDoc(ctxt->myDoc);
131
+ }
93
132
 
94
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
95
- return Qnil;
133
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
134
+ return Qnil;
96
135
  }
97
136
 
98
137
  /*
@@ -104,25 +143,24 @@ parse_doc_finalize(VALUE ctxt_val)
104
143
  static VALUE
105
144
  parse_with(VALUE self, VALUE sax_handler)
106
145
  {
107
- xmlParserCtxtPtr ctxt;
108
- xmlSAXHandlerPtr sax;
146
+ xmlParserCtxtPtr ctxt;
147
+ xmlSAXHandlerPtr sax;
109
148
 
110
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
111
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
149
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
150
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
151
+ }
112
152
 
113
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
114
- Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
153
+ ctxt = noko_xml_sax_parser_context_unwrap(self);
154
+ sax = noko_sax_handler_unwrap(sax_handler);
115
155
 
116
- /* Free the sax handler since we'll assign our own */
117
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
118
- xmlFree(ctxt->sax);
156
+ ctxt->sax = sax;
157
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
119
158
 
120
- ctxt->sax = sax;
121
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
159
+ xmlSetStructuredErrorFunc(NULL, NULL);
122
160
 
123
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
161
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
124
162
 
125
- return Qnil;
163
+ return Qnil;
126
164
  }
127
165
 
128
166
  /*
@@ -132,15 +170,16 @@ parse_with(VALUE self, VALUE sax_handler)
132
170
  * Should this parser replace entities? &amp; will get converted to '&' if
133
171
  * set to true
134
172
  */
135
- static VALUE set_replace_entities(VALUE self, VALUE value)
173
+ static VALUE
174
+ set_replace_entities(VALUE self, VALUE value)
136
175
  {
137
- xmlParserCtxtPtr ctxt;
138
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
176
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
139
177
 
140
- if(Qfalse == value)
178
+ if (Qfalse == value) {
141
179
  ctxt->replaceEntities = 0;
142
- else
180
+ } else {
143
181
  ctxt->replaceEntities = 1;
182
+ }
144
183
 
145
184
  return value;
146
185
  }
@@ -152,15 +191,16 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
152
191
  * Should this parser replace entities? &amp; will get converted to '&' if
153
192
  * set to true
154
193
  */
155
- static VALUE get_replace_entities(VALUE self)
194
+ static VALUE
195
+ get_replace_entities(VALUE self)
156
196
  {
157
- xmlParserCtxtPtr ctxt;
158
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
197
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
159
198
 
160
- if(0 == ctxt->replaceEntities)
199
+ if (0 == ctxt->replaceEntities) {
161
200
  return Qfalse;
162
- else
201
+ } else {
163
202
  return Qtrue;
203
+ }
164
204
  }
165
205
 
166
206
  /*
@@ -168,16 +208,16 @@ static VALUE get_replace_entities(VALUE self)
168
208
  *
169
209
  * Get the current line the parser context is processing.
170
210
  */
171
- static VALUE line(VALUE self)
211
+ static VALUE
212
+ line(VALUE self)
172
213
  {
173
- xmlParserCtxtPtr ctxt;
174
214
  xmlParserInputPtr io;
175
-
176
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
215
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
177
216
 
178
217
  io = ctxt->input;
179
- if(io)
218
+ if (io) {
180
219
  return INT2NUM(io->line);
220
+ }
181
221
 
182
222
  return Qnil;
183
223
  }
@@ -187,16 +227,16 @@ static VALUE line(VALUE self)
187
227
  *
188
228
  * Get the current column the parser context is processing.
189
229
  */
190
- static VALUE column(VALUE self)
230
+ static VALUE
231
+ column(VALUE self)
191
232
  {
192
- xmlParserCtxtPtr ctxt;
233
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
193
234
  xmlParserInputPtr io;
194
235
 
195
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
196
-
197
236
  io = ctxt->input;
198
- if(io)
237
+ if (io) {
199
238
  return INT2NUM(io->col);
239
+ }
200
240
 
201
241
  return Qnil;
202
242
  }
@@ -208,15 +248,16 @@ static VALUE column(VALUE self)
208
248
  * Should this parser recover from structural errors? It will not stop processing
209
249
  * file on structural errors if set to true
210
250
  */
211
- static VALUE set_recovery(VALUE self, VALUE value)
251
+ static VALUE
252
+ set_recovery(VALUE self, VALUE value)
212
253
  {
213
- xmlParserCtxtPtr ctxt;
214
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
254
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
215
255
 
216
- if(value == Qfalse)
256
+ if (value == Qfalse) {
217
257
  ctxt->recovery = 0;
218
- else
258
+ } else {
219
259
  ctxt->recovery = 1;
260
+ }
220
261
 
221
262
  return value;
222
263
  }
@@ -228,35 +269,36 @@ static VALUE set_recovery(VALUE self, VALUE value)
228
269
  * Should this parser recover from structural errors? It will not stop processing
229
270
  * file on structural errors if set to true
230
271
  */
231
- static VALUE get_recovery(VALUE self)
272
+ static VALUE
273
+ get_recovery(VALUE self)
232
274
  {
233
- xmlParserCtxtPtr ctxt;
234
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
275
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
235
276
 
236
- if(ctxt->recovery == 0)
277
+ if (ctxt->recovery == 0) {
237
278
  return Qfalse;
238
- else
279
+ } else {
239
280
  return Qtrue;
281
+ }
240
282
  }
241
283
 
242
- void init_xml_sax_parser_context()
284
+ void
285
+ noko_init_xml_sax_parser_context(void)
243
286
  {
244
- VALUE nokogiri = rb_define_module("Nokogiri");
245
- VALUE xml = rb_define_module_under(nokogiri, "XML");
246
- VALUE sax = rb_define_module_under(xml, "SAX");
247
- VALUE klass = rb_define_class_under(sax, "ParserContext", rb_cObject);
248
-
249
- cNokogiriXmlSaxParserContext = klass;
250
-
251
- rb_define_singleton_method(klass, "io", parse_io, 2);
252
- rb_define_singleton_method(klass, "memory", parse_memory, 1);
253
- rb_define_singleton_method(klass, "file", parse_file, 1);
254
-
255
- rb_define_method(klass, "parse_with", parse_with, 1);
256
- rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
257
- rb_define_method(klass, "replace_entities", get_replace_entities, 0);
258
- rb_define_method(klass, "recovery=", set_recovery, 1);
259
- rb_define_method(klass, "recovery", get_recovery, 0);
260
- rb_define_method(klass, "line", line, 0);
261
- rb_define_method(klass, "column", column, 0);
287
+ cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
288
+
289
+ rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
290
+
291
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
292
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
293
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
294
+
295
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
296
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
297
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
298
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
299
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
300
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
301
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
302
+
303
+ id_read = rb_intern("read");
262
304
  }
@@ -1,18 +1,37 @@
1
- #include <xml_sax_push_parser.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void deallocate(xmlParserCtxtPtr ctx)
3
+ VALUE cNokogiriXmlSaxPushParser ;
4
+
5
+ static void
6
+ xml_sax_push_parser_free(void *data)
4
7
  {
5
- NOKOGIRI_DEBUG_START(ctx);
8
+ xmlParserCtxtPtr ctx = data;
6
9
  if (ctx != NULL) {
7
10
  NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
8
11
  xmlFreeParserCtxt(ctx);
9
12
  }
10
- NOKOGIRI_DEBUG_END(ctx);
11
13
  }
12
14
 
13
- static VALUE allocate(VALUE klass)
15
+ static const rb_data_type_t xml_sax_push_parser_type = {
16
+ .wrap_struct_name = "Nokogiri::XML::SAX::PushParser",
17
+ .function = {
18
+ .dfree = xml_sax_push_parser_free,
19
+ },
20
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
21
+ };
22
+
23
+ static VALUE
24
+ allocate(VALUE klass)
25
+ {
26
+ return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
27
+ }
28
+
29
+ xmlParserCtxtPtr
30
+ noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
14
31
  {
15
- return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
32
+ xmlParserCtxtPtr c_parser;
33
+ TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
34
+ return c_parser;
16
35
  }
17
36
 
18
37
  /*
@@ -21,20 +40,23 @@ static VALUE allocate(VALUE klass)
21
40
  *
22
41
  * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
23
42
  */
24
- static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
43
+ static VALUE
44
+ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
25
45
  {
26
46
  xmlParserCtxtPtr ctx;
27
- const char * chunk = NULL;
47
+ const char *chunk = NULL;
28
48
  int size = 0;
29
49
 
30
50
 
31
- Data_Get_Struct(self, xmlParserCtxt, ctx);
51
+ ctx = noko_xml_sax_push_parser_unwrap(self);
32
52
 
33
53
  if (Qnil != _chunk) {
34
54
  chunk = StringValuePtr(_chunk);
35
55
  size = (int)RSTRING_LEN(_chunk);
36
56
  }
37
57
 
58
+ xmlSetStructuredErrorFunc(NULL, NULL);
59
+
38
60
  if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
39
61
  if (!(ctx->options & XML_PARSE_RECOVER)) {
40
62
  xmlErrorPtr e = xmlCtxtGetLastError(ctx);
@@ -51,13 +73,14 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
51
73
  *
52
74
  * Initialize the push parser with +xml_sax+ using +filename+
53
75
  */
54
- static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
76
+ static VALUE
77
+ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
55
78
  {
56
79
  xmlSAXHandlerPtr sax;
57
- const char * filename = NULL;
80
+ const char *filename = NULL;
58
81
  xmlParserCtxtPtr ctx;
59
82
 
60
- Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
83
+ sax = noko_sax_handler_unwrap(_xml_sax);
61
84
 
62
85
  if (_filename != Qnil) { filename = StringValueCStr(_filename); }
63
86
 
@@ -79,18 +102,22 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
79
102
  return self;
80
103
  }
81
104
 
82
- static VALUE get_options(VALUE self)
105
+ static VALUE
106
+ get_options(VALUE self)
83
107
  {
84
108
  xmlParserCtxtPtr ctx;
85
- Data_Get_Struct(self, xmlParserCtxt, ctx);
109
+
110
+ ctx = noko_xml_sax_push_parser_unwrap(self);
86
111
 
87
112
  return INT2NUM(ctx->options);
88
113
  }
89
114
 
90
- static VALUE set_options(VALUE self, VALUE options)
115
+ static VALUE
116
+ set_options(VALUE self, VALUE options)
91
117
  {
92
118
  xmlParserCtxtPtr ctx;
93
- Data_Get_Struct(self, xmlParserCtxt, ctx);
119
+
120
+ ctx = noko_xml_sax_push_parser_unwrap(self);
94
121
 
95
122
  if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
96
123
  rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
@@ -106,10 +133,12 @@ static VALUE set_options(VALUE self, VALUE options)
106
133
  * Should this parser replace entities? &amp; will get converted to '&' if
107
134
  * set to true
108
135
  */
109
- static VALUE get_replace_entities(VALUE self)
136
+ static VALUE
137
+ get_replace_entities(VALUE self)
110
138
  {
111
139
  xmlParserCtxtPtr ctx;
112
- Data_Get_Struct(self, xmlParserCtxt, ctx);
140
+
141
+ ctx = noko_xml_sax_push_parser_unwrap(self);
113
142
 
114
143
  if (0 == ctx->replaceEntities) {
115
144
  return Qfalse;
@@ -125,10 +154,12 @@ static VALUE get_replace_entities(VALUE self)
125
154
  * Should this parser replace entities? &amp; will get converted to '&' if
126
155
  * set to true
127
156
  */
128
- static VALUE set_replace_entities(VALUE self, VALUE value)
157
+ static VALUE
158
+ set_replace_entities(VALUE self, VALUE value)
129
159
  {
130
160
  xmlParserCtxtPtr ctx;
131
- Data_Get_Struct(self, xmlParserCtxt, ctx);
161
+
162
+ ctx = noko_xml_sax_push_parser_unwrap(self);
132
163
 
133
164
  if (Qfalse == value) {
134
165
  ctx->replaceEntities = 0;
@@ -139,21 +170,18 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
139
170
  return value;
140
171
  }
141
172
 
142
- VALUE cNokogiriXmlSaxPushParser ;
143
- void init_xml_sax_push_parser()
173
+ void
174
+ noko_init_xml_sax_push_parser(void)
144
175
  {
145
- VALUE nokogiri = rb_define_module("Nokogiri");
146
- VALUE xml = rb_define_module_under(nokogiri, "XML");
147
- VALUE sax = rb_define_module_under(xml, "SAX");
148
- VALUE klass = rb_define_class_under(sax, "PushParser", rb_cObject);
149
-
150
- cNokogiriXmlSaxPushParser = klass;
151
-
152
- rb_define_alloc_func(klass, allocate);
153
- rb_define_private_method(klass, "initialize_native", initialize_native, 2);
154
- rb_define_private_method(klass, "native_write", native_write, 2);
155
- rb_define_method(klass, "options", get_options, 0);
156
- rb_define_method(klass, "options=", set_options, 1);
157
- rb_define_method(klass, "replace_entities", get_replace_entities, 0);
158
- rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
176
+ cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
177
+
178
+ rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
179
+
180
+ rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
181
+ rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
182
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
183
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
184
+
185
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
186
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
159
187
  }