nokogiri 1.18.0-arm-linux-musl

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +39 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +486 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +274 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +27 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +321 -0
@@ -0,0 +1,390 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlSaxParserContext ;
4
+
5
+ static ID id_read;
6
+
7
+ static void
8
+ xml_sax_parser_context_type_free(void *data)
9
+ {
10
+ xmlParserCtxtPtr ctxt = data;
11
+ ctxt->sax = NULL;
12
+ if (ctxt->myDoc) {
13
+ xmlFreeDoc(ctxt->myDoc);
14
+ }
15
+ if (ctxt) {
16
+ xmlFreeParserCtxt(ctxt);
17
+ }
18
+ }
19
+
20
+ /*
21
+ * note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
22
+ * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
23
+ */
24
+ static const rb_data_type_t xml_sax_parser_context_type = {
25
+ .wrap_struct_name = "xmlParserCtxt",
26
+ .function = {
27
+ .dfree = xml_sax_parser_context_type_free,
28
+ },
29
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
30
+ };
31
+
32
+ xmlParserCtxtPtr
33
+ noko_xml_sax_parser_context_unwrap(VALUE rb_context)
34
+ {
35
+ xmlParserCtxtPtr c_context;
36
+ TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
37
+ return c_context;
38
+ }
39
+
40
+ VALUE
41
+ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
42
+ {
43
+ return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
44
+ }
45
+
46
+ void
47
+ noko_xml_sax_parser_context_set_encoding(xmlParserCtxtPtr c_context, VALUE rb_encoding)
48
+ {
49
+ if (!NIL_P(rb_encoding)) {
50
+ VALUE rb_encoding_name = rb_funcall(rb_encoding, rb_intern("name"), 0);
51
+
52
+ char *encoding_name = StringValueCStr(rb_encoding_name);
53
+ if (encoding_name) {
54
+ libxmlStructuredErrorHandlerState handler_state;
55
+ VALUE rb_errors = rb_ary_new();
56
+
57
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
58
+
59
+ int result = xmlSwitchEncodingName(c_context, encoding_name);
60
+
61
+ noko__structured_error_func_restore(&handler_state);
62
+
63
+ if (result != 0) {
64
+ xmlFreeParserCtxt(c_context);
65
+
66
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
67
+ if (!NIL_P(exception)) {
68
+ rb_exc_raise(exception);
69
+ } else {
70
+ rb_raise(rb_eRuntimeError, "could not set encoding");
71
+ }
72
+ }
73
+ }
74
+ }
75
+ }
76
+
77
+ /* :nodoc: */
78
+ static VALUE
79
+ noko_xml_sax_parser_context_s_native_io(VALUE rb_class, VALUE rb_io, VALUE rb_encoding)
80
+ {
81
+ if (!rb_respond_to(rb_io, id_read)) {
82
+ rb_raise(rb_eTypeError, "argument expected to respond to :read");
83
+ }
84
+
85
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
86
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
87
+ }
88
+
89
+ xmlParserCtxtPtr c_context =
90
+ xmlCreateIOParserCtxt(NULL, NULL,
91
+ (xmlInputReadCallback)noko_io_read,
92
+ (xmlInputCloseCallback)noko_io_close,
93
+ (void *)rb_io, XML_CHAR_ENCODING_NONE);
94
+ if (!c_context) {
95
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
96
+ }
97
+
98
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
99
+
100
+ if (c_context->sax) {
101
+ xmlFree(c_context->sax);
102
+ c_context->sax = NULL;
103
+ }
104
+
105
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
106
+ }
107
+
108
+ /* :nodoc: */
109
+ static VALUE
110
+ noko_xml_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_path, VALUE rb_encoding)
111
+ {
112
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
113
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
114
+ }
115
+
116
+ xmlParserCtxtPtr c_context = xmlCreateFileParserCtxt(StringValueCStr(rb_path));
117
+ if (!c_context) {
118
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
119
+ }
120
+
121
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
122
+
123
+ if (c_context->sax) {
124
+ xmlFree(c_context->sax);
125
+ c_context->sax = NULL;
126
+ }
127
+
128
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
129
+ }
130
+
131
+ /* :nodoc: */
132
+ static VALUE
133
+ noko_xml_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
134
+ {
135
+ Check_Type(rb_input, T_STRING);
136
+ if (!(int)RSTRING_LEN(rb_input)) {
137
+ rb_raise(rb_eRuntimeError, "input string cannot be empty");
138
+ }
139
+
140
+ if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
141
+ rb_raise(rb_eTypeError, "argument must be an Encoding object");
142
+ }
143
+
144
+ xmlParserCtxtPtr c_context =
145
+ xmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
146
+ if (!c_context) {
147
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
148
+ }
149
+
150
+ noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
151
+
152
+ if (c_context->sax) {
153
+ xmlFree(c_context->sax);
154
+ c_context->sax = NULL;
155
+ }
156
+
157
+ return noko_xml_sax_parser_context_wrap(rb_class, c_context);
158
+ }
159
+
160
+ /*
161
+ * call-seq:
162
+ * parse_with(sax_handler)
163
+ *
164
+ * Use +sax_handler+ and parse the current document
165
+ *
166
+ * 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser methods which are
167
+ * more convenient for most use cases.
168
+ */
169
+ static VALUE
170
+ noko_xml_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
171
+ {
172
+ xmlParserCtxtPtr c_context;
173
+ xmlSAXHandlerPtr sax;
174
+
175
+ if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
176
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
177
+ }
178
+
179
+ c_context = noko_xml_sax_parser_context_unwrap(rb_context);
180
+ sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
181
+
182
+ c_context->sax = sax;
183
+ c_context->userData = c_context; /* so we can use libxml2/SAX2.c handlers if we want to */
184
+ c_context->_private = (void *)rb_sax_parser;
185
+
186
+ xmlSetStructuredErrorFunc(NULL, NULL);
187
+
188
+ /* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
189
+ * don't have any cleanup to do. The only memory we need to free is handled by
190
+ * xml_sax_parser_context_type_free */
191
+ xmlParseDocument(c_context);
192
+
193
+ return Qnil;
194
+ }
195
+
196
+ /*
197
+ * call-seq:
198
+ * replace_entities=(value)
199
+ *
200
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
201
+ *
202
+ * [Parameters]
203
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
204
+ *
205
+ * ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
206
+ * defaults to +false+ for this reason.
207
+ *
208
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
209
+ * replacement.
210
+ *
211
+ * [Example]
212
+ * Because this class is generally not instantiated directly, you would typically set this option
213
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
214
+ *
215
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
216
+ * parser.parse(xml) do |ctx|
217
+ * ctx.replace_entities = true # this is UNSAFE for untrusted documents!
218
+ * end
219
+ */
220
+ static VALUE
221
+ noko_xml_sax_parser_context__replace_entities_set(VALUE rb_context, VALUE rb_value)
222
+ {
223
+ int error;
224
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
225
+
226
+ if (RB_TEST(rb_value)) {
227
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
228
+ } else {
229
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
230
+ }
231
+
232
+ if (error) {
233
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
234
+ }
235
+
236
+ return rb_value;
237
+ }
238
+
239
+ /*
240
+ * call-seq:
241
+ * replace_entities
242
+ *
243
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
244
+ *
245
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
246
+ *
247
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
248
+ * replacement.
249
+ */
250
+ static VALUE
251
+ noko_xml_sax_parser_context__replace_entities_get(VALUE rb_context)
252
+ {
253
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
254
+
255
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
256
+ return Qtrue;
257
+ } else {
258
+ return Qfalse;
259
+ }
260
+ }
261
+
262
+ /*
263
+ * call-seq: line
264
+ *
265
+ * [Returns] (Integer) the line number of the line being currently parsed.
266
+ */
267
+ static VALUE
268
+ noko_xml_sax_parser_context__line(VALUE rb_context)
269
+ {
270
+ xmlParserInputPtr io;
271
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
272
+
273
+ io = ctxt->input;
274
+ if (io) {
275
+ return INT2NUM(io->line);
276
+ }
277
+
278
+ return Qnil;
279
+ }
280
+
281
+ /*
282
+ * call-seq: column
283
+ *
284
+ * [Returns] (Integer) the column number of the column being currently parsed.
285
+ */
286
+ static VALUE
287
+ noko_xml_sax_parser_context__column(VALUE rb_context)
288
+ {
289
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
290
+ xmlParserInputPtr io;
291
+
292
+ io = ctxt->input;
293
+ if (io) {
294
+ return INT2NUM(io->col);
295
+ }
296
+
297
+ return Qnil;
298
+ }
299
+
300
+ /*
301
+ * call-seq:
302
+ * recovery=(value)
303
+ *
304
+ * Controls whether this parser will recover from parsing errors. If set to +true+, the parser will
305
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
306
+ * parser will stop processing the file on the first parsing error.
307
+ *
308
+ * [Parameters]
309
+ * - +value+ (Boolean) Recover from parsing errors. (Default is +false+ for XML and +true+ for HTML.)
310
+ *
311
+ * [Returns] (Boolean) The passed +value+.
312
+ *
313
+ * [Example]
314
+ * Because this class is generally not instantiated directly, you would typically set this option
315
+ * via the block argument to Nokogiri::XML::SAX::Parser.parse et al:
316
+ *
317
+ * parser = Nokogiri::XML::SAX::Parser.new(document_handler)
318
+ * parser.parse(xml) do |ctx|
319
+ * ctx.recovery = true
320
+ * end
321
+ */
322
+ static VALUE
323
+ noko_xml_sax_parser_context__recovery_set(VALUE rb_context, VALUE rb_value)
324
+ {
325
+ int error;
326
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
327
+
328
+ if (RB_TEST(rb_value)) {
329
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_RECOVER);
330
+ } else {
331
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_RECOVER);
332
+ }
333
+
334
+ if (error) {
335
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
336
+ }
337
+
338
+ return rb_value;
339
+ }
340
+
341
+ /*
342
+ * call-seq:
343
+ * recovery
344
+ *
345
+ * Inspect whether this parser will recover from parsing errors. If set to +true+, the parser will
346
+ * invoke the SAX::Document#error callback and continue processing the file. If set to +false+, the
347
+ * parser will stop processing the file on the first parsing error.
348
+ *
349
+ * [Returns] (Boolean) Whether this parser will recover from parsing errors.
350
+ *
351
+ * Default is +false+ for XML and +true+ for HTML.
352
+ */
353
+ static VALUE
354
+ noko_xml_sax_parser_context__recovery_get(VALUE rb_context)
355
+ {
356
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
357
+
358
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_RECOVER) {
359
+ return Qtrue;
360
+ } else {
361
+ return Qfalse;
362
+ }
363
+ }
364
+
365
+ void
366
+ noko_init_xml_sax_parser_context(void)
367
+ {
368
+ cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
369
+
370
+ rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
371
+
372
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_io",
373
+ noko_xml_sax_parser_context_s_native_io, 2);
374
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_memory",
375
+ noko_xml_sax_parser_context_s_native_memory, 2);
376
+ rb_define_singleton_method(cNokogiriXmlSaxParserContext, "native_file",
377
+ noko_xml_sax_parser_context_s_native_file, 2);
378
+
379
+ rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", noko_xml_sax_parser_context__parse_with, 1);
380
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=",
381
+ noko_xml_sax_parser_context__replace_entities_set, 1);
382
+ rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities",
383
+ noko_xml_sax_parser_context__replace_entities_get, 0);
384
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", noko_xml_sax_parser_context__recovery_set, 1);
385
+ rb_define_method(cNokogiriXmlSaxParserContext, "recovery", noko_xml_sax_parser_context__recovery_get, 0);
386
+ rb_define_method(cNokogiriXmlSaxParserContext, "line", noko_xml_sax_parser_context__line, 0);
387
+ rb_define_method(cNokogiriXmlSaxParserContext, "column", noko_xml_sax_parser_context__column, 0);
388
+
389
+ id_read = rb_intern("read");
390
+ }
@@ -0,0 +1,206 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlSaxPushParser ;
4
+
5
+ static void
6
+ xml_sax_push_parser_free(void *data)
7
+ {
8
+ xmlParserCtxtPtr ctx = data;
9
+ if (ctx->myDoc) {
10
+ xmlFreeDoc(ctx->myDoc);
11
+ }
12
+ if (ctx) {
13
+ xmlFreeParserCtxt(ctx);
14
+ }
15
+ }
16
+
17
+ static const rb_data_type_t xml_sax_push_parser_type = {
18
+ .wrap_struct_name = "xmlParserCtxt",
19
+ .function = {
20
+ .dfree = xml_sax_push_parser_free,
21
+ },
22
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
23
+ };
24
+
25
+ static VALUE
26
+ xml_sax_push_parser_allocate(VALUE klass)
27
+ {
28
+ return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
29
+ }
30
+
31
+ xmlParserCtxtPtr
32
+ noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
33
+ {
34
+ xmlParserCtxtPtr c_parser;
35
+ TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
36
+ return c_parser;
37
+ }
38
+
39
+ /*
40
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
41
+ */
42
+ static VALUE
43
+ noko_xml_sax_push_parser__native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
44
+ {
45
+ xmlParserCtxtPtr ctx;
46
+ const char *chunk = NULL;
47
+ int size = 0;
48
+
49
+ ctx = noko_xml_sax_push_parser_unwrap(self);
50
+
51
+ if (Qnil != _chunk) {
52
+ chunk = StringValuePtr(_chunk);
53
+ size = (int)RSTRING_LEN(_chunk);
54
+ }
55
+
56
+ xmlSetStructuredErrorFunc(NULL, NULL);
57
+
58
+ if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
59
+ if (!(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
60
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
61
+ noko__error_raise(NULL, e);
62
+ }
63
+ }
64
+
65
+ return self;
66
+ }
67
+
68
+ /*
69
+ * call-seq:
70
+ * initialize_native(xml_sax, filename)
71
+ *
72
+ * Initialize the push parser with +xml_sax+ using +filename+
73
+ */
74
+ static VALUE
75
+ noko_xml_sax_push_parser__initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
76
+ {
77
+ xmlSAXHandlerPtr sax;
78
+ const char *filename = NULL;
79
+ xmlParserCtxtPtr ctx;
80
+
81
+ sax = noko_xml_sax_parser_unwrap(_xml_sax);
82
+
83
+ if (_filename != Qnil) { filename = StringValueCStr(_filename); }
84
+
85
+ ctx = xmlCreatePushParserCtxt(
86
+ sax,
87
+ NULL,
88
+ NULL,
89
+ 0,
90
+ filename
91
+ );
92
+ if (ctx == NULL) {
93
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
94
+ }
95
+
96
+ ctx->userData = ctx;
97
+ ctx->_private = (void *)_xml_sax;
98
+
99
+ DATA_PTR(self) = ctx;
100
+ return self;
101
+ }
102
+
103
+ static VALUE
104
+ noko_xml_sax_push_parser__options_get(VALUE self)
105
+ {
106
+ xmlParserCtxtPtr ctx;
107
+
108
+ ctx = noko_xml_sax_push_parser_unwrap(self);
109
+
110
+ return INT2NUM(xmlCtxtGetOptions(ctx));
111
+ }
112
+
113
+ static VALUE
114
+ noko_xml_sax_push_parser__options_set(VALUE self, VALUE options)
115
+ {
116
+ int error;
117
+ xmlParserCtxtPtr ctx;
118
+
119
+ ctx = noko_xml_sax_push_parser_unwrap(self);
120
+
121
+ error = xmlCtxtSetOptions(ctx, (int)NUM2INT(options));
122
+ if (error) {
123
+ rb_raise(rb_eRuntimeError, "Cannot set XML parser context options (%x)", error);
124
+ }
125
+
126
+ return Qnil;
127
+ }
128
+
129
+ /*
130
+ * call-seq:
131
+ * replace_entities
132
+ *
133
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
134
+ *
135
+ * [Returns] (Boolean) Value of the parse option. (Default +false+)
136
+ *
137
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
138
+ * replacement.
139
+ */
140
+ static VALUE
141
+ noko_xml_sax_push_parser__replace_entities_get(VALUE self)
142
+ {
143
+ xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
144
+
145
+ if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
146
+ return Qtrue;
147
+ } else {
148
+ return Qfalse;
149
+ }
150
+ }
151
+
152
+ /*
153
+ * call-seq:
154
+ * replace_entities=(value)
155
+ *
156
+ * See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
157
+ *
158
+ * [Parameters]
159
+ * - +value+ (Boolean) Whether external parsed entities will be resolved.
160
+ *
161
+ * ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
162
+ * defaults to +false+ for this reason.
163
+ *
164
+ * This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
165
+ * replacement.
166
+ */
167
+ static VALUE
168
+ noko_xml_sax_push_parser__replace_entities_set(VALUE self, VALUE value)
169
+ {
170
+ int error;
171
+ xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
172
+
173
+ if (RB_TEST(value)) {
174
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
175
+ } else {
176
+ error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
177
+ }
178
+
179
+ if (error) {
180
+ rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
181
+ }
182
+
183
+ return value;
184
+ }
185
+
186
+ void
187
+ noko_init_xml_sax_push_parser(void)
188
+ {
189
+ cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
190
+
191
+ rb_define_alloc_func(cNokogiriXmlSaxPushParser, xml_sax_push_parser_allocate);
192
+
193
+ rb_define_method(cNokogiriXmlSaxPushParser, "options",
194
+ noko_xml_sax_push_parser__options_get, 0);
195
+ rb_define_method(cNokogiriXmlSaxPushParser, "options=",
196
+ noko_xml_sax_push_parser__options_set, 1);
197
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities",
198
+ noko_xml_sax_push_parser__replace_entities_get, 0);
199
+ rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=",
200
+ noko_xml_sax_push_parser__replace_entities_set, 1);
201
+
202
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native",
203
+ noko_xml_sax_push_parser__initialize_native, 2);
204
+ rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write",
205
+ noko_xml_sax_push_parser__native_write, 2);
206
+ }