nokogiri 1.10.7 → 1.16.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +42 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +188 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +862 -421
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +222 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +39 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +408 -243
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +131 -61
  33. data/ext/nokogiri/xml_node.c +1343 -674
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +305 -213
  37. data/ext/nokogiri/xml_relax_ng.c +87 -78
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +149 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +65 -37
  41. data/ext/nokogiri/xml_schema.c +138 -82
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +35 -26
  44. data/ext/nokogiri/xml_xpath_context.c +363 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +126 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3464 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +205 -96
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +326 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +224 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +75 -34
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -127
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1096 -419
  142. data/lib/nokogiri/xml/node_set.rb +137 -61
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +7 -5
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +39 -38
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  171. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  172. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  173. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  174. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  175. metadata +121 -291
  176. data/ext/nokogiri/html_document.c +0 -170
  177. data/ext/nokogiri/html_document.h +0 -10
  178. data/ext/nokogiri/html_element_description.c +0 -279
  179. data/ext/nokogiri/html_element_description.h +0 -10
  180. data/ext/nokogiri/html_entity_lookup.c +0 -32
  181. data/ext/nokogiri/html_entity_lookup.h +0 -8
  182. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  183. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  184. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  185. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  186. data/ext/nokogiri/xml_attr.h +0 -9
  187. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  188. data/ext/nokogiri/xml_cdata.h +0 -9
  189. data/ext/nokogiri/xml_comment.h +0 -9
  190. data/ext/nokogiri/xml_document.h +0 -23
  191. data/ext/nokogiri/xml_document_fragment.h +0 -10
  192. data/ext/nokogiri/xml_dtd.h +0 -10
  193. data/ext/nokogiri/xml_element_content.h +0 -10
  194. data/ext/nokogiri/xml_element_decl.h +0 -9
  195. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  196. data/ext/nokogiri/xml_entity_decl.h +0 -10
  197. data/ext/nokogiri/xml_entity_reference.h +0 -9
  198. data/ext/nokogiri/xml_io.c +0 -61
  199. data/ext/nokogiri/xml_io.h +0 -11
  200. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  201. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  202. data/ext/nokogiri/xml_namespace.h +0 -14
  203. data/ext/nokogiri/xml_node.h +0 -13
  204. data/ext/nokogiri/xml_node_set.h +0 -12
  205. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  206. data/ext/nokogiri/xml_reader.h +0 -10
  207. data/ext/nokogiri/xml_relax_ng.h +0 -9
  208. data/ext/nokogiri/xml_sax_parser.h +0 -39
  209. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  210. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  211. data/ext/nokogiri/xml_schema.h +0 -9
  212. data/ext/nokogiri/xml_syntax_error.h +0 -13
  213. data/ext/nokogiri/xml_text.h +0 -9
  214. data/ext/nokogiri/xml_xpath_context.h +0 -10
  215. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  216. data/lib/nokogiri/html/document.rb +0 -335
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  222. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  223. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  224. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,33 +1,44 @@
1
- #include <xml_relax_ng.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static void dealloc(xmlRelaxNGPtr schema)
3
+ VALUE cNokogiriXmlRelaxNG;
4
+
5
+ static void
6
+ xml_relax_ng_deallocate(void *data)
4
7
  {
5
- NOKOGIRI_DEBUG_START(schema);
8
+ xmlRelaxNGPtr schema = data;
6
9
  xmlRelaxNGFree(schema);
7
- NOKOGIRI_DEBUG_END(schema);
8
10
  }
9
11
 
12
+ static const rb_data_type_t xml_relax_ng_type = {
13
+ .wrap_struct_name = "Nokogiri::XML::RelaxNG",
14
+ .function = {
15
+ .dfree = xml_relax_ng_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
10
20
  /*
11
21
  * call-seq:
12
22
  * validate_document(document)
13
23
  *
14
24
  * Validate a Nokogiri::XML::Document against this RelaxNG schema.
15
25
  */
16
- static VALUE validate_document(VALUE self, VALUE document)
26
+ static VALUE
27
+ validate_document(VALUE self, VALUE document)
17
28
  {
18
29
  xmlDocPtr doc;
19
30
  xmlRelaxNGPtr schema;
20
31
  VALUE errors;
21
32
  xmlRelaxNGValidCtxtPtr valid_ctxt;
22
33
 
23
- Data_Get_Struct(self, xmlRelaxNG, schema);
24
- Data_Get_Struct(document, xmlDoc, doc);
34
+ TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
35
+ doc = noko_xml_document_unwrap(document);
25
36
 
26
37
  errors = rb_ary_new();
27
38
 
28
39
  valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
29
40
 
30
- if(NULL == valid_ctxt) {
41
+ if (NULL == valid_ctxt) {
31
42
  /* we have a problem */
32
43
  rb_raise(rb_eRuntimeError, "Could not create a validation context");
33
44
  }
@@ -47,115 +58,113 @@ static VALUE validate_document(VALUE self, VALUE document)
47
58
  return errors;
48
59
  }
49
60
 
50
- /*
51
- * call-seq:
52
- * read_memory(string)
53
- *
54
- * Create a new RelaxNG from the contents of +string+
55
- */
56
- static VALUE read_memory(VALUE klass, VALUE content)
61
+ static VALUE
62
+ xml_relax_ng_parse_schema(
63
+ VALUE klass,
64
+ xmlRelaxNGParserCtxtPtr c_parser_context,
65
+ VALUE rb_parse_options
66
+ )
57
67
  {
58
- xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
59
- (const char *)StringValuePtr(content),
60
- (int)RSTRING_LEN(content)
61
- );
62
- xmlRelaxNGPtr schema;
63
- VALUE errors = rb_ary_new();
68
+ VALUE rb_errors;
64
69
  VALUE rb_schema;
70
+ xmlRelaxNGPtr c_schema;
71
+
72
+ if (NIL_P(rb_parse_options)) {
73
+ rb_parse_options = rb_const_get_at(
74
+ rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
75
+ rb_intern("DEFAULT_SCHEMA")
76
+ );
77
+ }
65
78
 
66
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
79
+ rb_errors = rb_ary_new();
80
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
67
81
 
68
82
  #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
69
83
  xmlRelaxNGSetParserStructuredErrors(
70
- ctx,
84
+ c_parser_context,
71
85
  Nokogiri_error_array_pusher,
72
- (void *)errors
86
+ (void *)rb_errors
73
87
  );
74
88
  #endif
75
89
 
76
- schema = xmlRelaxNGParse(ctx);
90
+ c_schema = xmlRelaxNGParse(c_parser_context);
77
91
 
78
92
  xmlSetStructuredErrorFunc(NULL, NULL);
79
- xmlRelaxNGFreeParserCtxt(ctx);
93
+ xmlRelaxNGFreeParserCtxt(c_parser_context);
80
94
 
81
- if(NULL == schema) {
82
- xmlErrorPtr error = xmlGetLastError();
83
- if(error)
95
+ if (NULL == c_schema) {
96
+ xmlErrorConstPtr error = xmlGetLastError();
97
+ if (error) {
84
98
  Nokogiri_error_raise(NULL, error);
85
- else
99
+ } else {
86
100
  rb_raise(rb_eRuntimeError, "Could not parse document");
101
+ }
87
102
 
88
103
  return Qnil;
89
104
  }
90
105
 
91
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
92
- rb_iv_set(rb_schema, "@errors", errors);
106
+ rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
107
+ rb_iv_set(rb_schema, "@errors", rb_errors);
108
+ rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
93
109
 
94
110
  return rb_schema;
95
111
  }
96
112
 
97
113
  /*
98
114
  * call-seq:
99
- * from_document(doc)
115
+ * read_memory(string)
100
116
  *
101
- * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
117
+ * Create a new RelaxNG from the contents of +string+
102
118
  */
103
- static VALUE from_document(VALUE klass, VALUE document)
119
+ static VALUE
120
+ read_memory(int argc, VALUE *argv, VALUE klass)
104
121
  {
105
- xmlDocPtr doc;
106
- xmlRelaxNGParserCtxtPtr ctx;
107
- xmlRelaxNGPtr schema;
108
- VALUE errors;
109
- VALUE rb_schema;
110
-
111
- Data_Get_Struct(document, xmlDoc, doc);
122
+ VALUE rb_content;
123
+ VALUE rb_parse_options;
124
+ xmlRelaxNGParserCtxtPtr c_parser_context;
112
125
 
113
- /* In case someone passes us a node. ugh. */
114
- doc = doc->doc;
126
+ rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
115
127
 
116
- ctx = xmlRelaxNGNewDocParserCtxt(doc);
128
+ c_parser_context = xmlRelaxNGNewMemParserCtxt(
129
+ (const char *)StringValuePtr(rb_content),
130
+ (int)RSTRING_LEN(rb_content)
131
+ );
117
132
 
118
- errors = rb_ary_new();
119
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
120
-
121
- #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
122
- xmlRelaxNGSetParserStructuredErrors(
123
- ctx,
124
- Nokogiri_error_array_pusher,
125
- (void *)errors
126
- );
127
- #endif
128
-
129
- schema = xmlRelaxNGParse(ctx);
133
+ return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
134
+ }
130
135
 
131
- xmlSetStructuredErrorFunc(NULL, NULL);
136
+ /*
137
+ * call-seq:
138
+ * from_document(doc)
139
+ *
140
+ * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
141
+ */
142
+ static VALUE
143
+ from_document(int argc, VALUE *argv, VALUE klass)
144
+ {
145
+ VALUE rb_document;
146
+ VALUE rb_parse_options;
147
+ xmlDocPtr c_document;
148
+ xmlRelaxNGParserCtxtPtr c_parser_context;
132
149
 
133
- if(NULL == schema) {
134
- xmlErrorPtr error = xmlGetLastError();
135
- if(error)
136
- Nokogiri_error_raise(NULL, error);
137
- else
138
- rb_raise(rb_eRuntimeError, "Could not parse document");
150
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
139
151
 
140
- return Qnil;
141
- }
152
+ c_document = noko_xml_document_unwrap(rb_document);
153
+ c_document = c_document->doc; /* In case someone passes us a node. ugh. */
142
154
 
143
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
144
- rb_iv_set(rb_schema, "@errors", errors);
155
+ c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
145
156
 
146
- return rb_schema;
157
+ return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
147
158
  }
148
159
 
149
- VALUE cNokogiriXmlRelaxNG;
150
- void init_xml_relax_ng()
160
+ void
161
+ noko_init_xml_relax_ng(void)
151
162
  {
152
- VALUE nokogiri = rb_define_module("Nokogiri");
153
- VALUE xml = rb_define_module_under(nokogiri, "XML");
154
- VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
163
+ assert(cNokogiriXmlSchema);
164
+ cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
155
165
 
156
- cNokogiriXmlRelaxNG = klass;
166
+ rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1);
167
+ rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
157
168
 
158
- rb_define_singleton_method(klass, "read_memory", read_memory, 1);
159
- rb_define_singleton_method(klass, "from_document", from_document, 1);
160
- rb_define_private_method(klass, "validate_document", validate_document, 1);
169
+ rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1);
161
170
  }
@@ -1,23 +1,23 @@
1
- #include <xml_sax_parser.h>
1
+ #include <nokogiri.h>
2
2
 
3
- int vasprintf (char **strp, const char *fmt, va_list ap);
4
- void vasprintf_free (void *p);
3
+ VALUE cNokogiriXmlSaxParser ;
5
4
 
6
5
  static ID id_start_document, id_end_document, id_start_element, id_end_element;
7
6
  static ID id_start_element_namespace, id_end_element_namespace;
8
7
  static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
9
- static ID id_cdata_block, id_cAttribute;
8
+ static ID id_cdata_block;
10
9
  static ID id_processing_instruction;
11
10
 
12
- static void start_document(void * ctx)
11
+ static void
12
+ start_document(void *ctx)
13
13
  {
14
14
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
15
15
  VALUE doc = rb_iv_get(self, "@document");
16
16
 
17
17
  xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
18
18
 
19
- if(NULL != ctxt && ctxt->html != 1) {
20
- if(ctxt->standalone != -1) { /* -1 means there was no declaration */
19
+ if (NULL != ctxt && ctxt->html != 1) {
20
+ if (ctxt->standalone != -1) { /* -1 means there was no declaration */
21
21
  VALUE encoding = Qnil ;
22
22
  VALUE standalone = Qnil;
23
23
  VALUE version;
@@ -29,8 +29,7 @@ static void start_document(void * ctx)
29
29
 
30
30
  version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
31
31
 
32
- switch(ctxt->standalone)
33
- {
32
+ switch (ctxt->standalone) {
34
33
  case 0:
35
34
  standalone = NOKOGIRI_STR_NEW2("no");
36
35
  break;
@@ -46,138 +45,140 @@ static void start_document(void * ctx)
46
45
  rb_funcall(doc, id_start_document, 0);
47
46
  }
48
47
 
49
- static void end_document(void * ctx)
48
+ static void
49
+ end_document(void *ctx)
50
50
  {
51
51
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
52
52
  VALUE doc = rb_iv_get(self, "@document");
53
53
  rb_funcall(doc, id_end_document, 0);
54
54
  }
55
55
 
56
- static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
56
+ static void
57
+ start_element(void *ctx, const xmlChar *name, const xmlChar **atts)
57
58
  {
58
59
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
59
60
  VALUE doc = rb_iv_get(self, "@document");
60
61
  VALUE attributes = rb_ary_new();
61
- const xmlChar * attr;
62
+ const xmlChar *attr;
62
63
  int i = 0;
63
- if(atts) {
64
- while((attr = atts[i]) != NULL) {
65
- const xmlChar * val = atts[i+1];
64
+ if (atts) {
65
+ while ((attr = atts[i]) != NULL) {
66
+ const xmlChar *val = atts[i + 1];
66
67
  VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
67
68
  rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
68
- i+=2;
69
+ i += 2;
69
70
  }
70
71
  }
71
72
 
72
- rb_funcall( doc,
73
- id_start_element,
74
- 2,
75
- NOKOGIRI_STR_NEW2(name),
76
- attributes
77
- );
73
+ rb_funcall(doc,
74
+ id_start_element,
75
+ 2,
76
+ NOKOGIRI_STR_NEW2(name),
77
+ attributes
78
+ );
78
79
  }
79
80
 
80
- static void end_element(void * ctx, const xmlChar *name)
81
+ static void
82
+ end_element(void *ctx, const xmlChar *name)
81
83
  {
82
84
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
83
85
  VALUE doc = rb_iv_get(self, "@document");
84
86
  rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
85
87
  }
86
88
 
87
- static VALUE attributes_as_list(
88
- VALUE self,
89
- int nb_attributes,
90
- const xmlChar ** attributes)
89
+ static VALUE
90
+ attributes_as_array(int attributes_len, const xmlChar **c_attributes)
91
91
  {
92
- VALUE list = rb_ary_new2((long)nb_attributes);
92
+ VALUE rb_array = rb_ary_new2((long)attributes_len);
93
+ VALUE cNokogiriXmlSaxParserAttribute;
93
94
 
94
- VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
95
- if (attributes) {
95
+ cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute"));
96
+ if (c_attributes) {
96
97
  /* Each attribute is an array of [localname, prefix, URI, value, end] */
97
98
  int i;
98
- for (i = 0; i < nb_attributes * 5; i += 5) {
99
- VALUE argv[4], attribute;
99
+ for (i = 0; i < attributes_len * 5; i += 5) {
100
+ VALUE rb_constructor_args[4], rb_attribute;
100
101
 
101
- argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
102
- argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
103
- argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
102
+ rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */
103
+ rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */
104
+ rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */
104
105
 
105
106
  /* value */
106
- argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
107
- (attributes[i+4] - attributes[i+3]));
107
+ rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3],
108
+ (c_attributes[i + 4] - c_attributes[i + 3]));
108
109
 
109
- attribute = rb_class_new_instance(4, argv, attr_klass);
110
- rb_ary_push(list, attribute);
110
+ rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute);
111
+ rb_ary_push(rb_array, rb_attribute);
111
112
  }
112
113
  }
113
114
 
114
- return list;
115
+ return rb_array;
115
116
  }
116
117
 
117
118
  static void
118
- start_element_ns (
119
- void * ctx,
120
- const xmlChar * localname,
121
- const xmlChar * prefix,
122
- const xmlChar * uri,
119
+ start_element_ns(
120
+ void *ctx,
121
+ const xmlChar *localname,
122
+ const xmlChar *prefix,
123
+ const xmlChar *uri,
123
124
  int nb_namespaces,
124
- const xmlChar ** namespaces,
125
+ const xmlChar **namespaces,
125
126
  int nb_attributes,
126
127
  int nb_defaulted,
127
- const xmlChar ** attributes)
128
+ const xmlChar **attributes)
128
129
  {
129
130
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
130
131
  VALUE doc = rb_iv_get(self, "@document");
131
132
 
132
- VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
133
+ VALUE attribute_ary = attributes_as_array(nb_attributes, attributes);
133
134
 
134
135
  VALUE ns_list = rb_ary_new2((long)nb_namespaces);
135
136
 
136
137
  if (namespaces) {
137
138
  int i;
138
- for (i = 0; i < nb_namespaces * 2; i += 2)
139
- {
139
+ for (i = 0; i < nb_namespaces * 2; i += 2) {
140
140
  rb_ary_push(ns_list,
141
- rb_ary_new3((long)2,
142
- RBSTR_OR_QNIL(namespaces[i + 0]),
143
- RBSTR_OR_QNIL(namespaces[i + 1])
144
- )
145
- );
141
+ rb_ary_new3((long)2,
142
+ RBSTR_OR_QNIL(namespaces[i + 0]),
143
+ RBSTR_OR_QNIL(namespaces[i + 1])
144
+ )
145
+ );
146
146
  }
147
147
  }
148
148
 
149
- rb_funcall( doc,
150
- id_start_element_namespace,
151
- 5,
152
- NOKOGIRI_STR_NEW2(localname),
153
- attribute_list,
154
- RBSTR_OR_QNIL(prefix),
155
- RBSTR_OR_QNIL(uri),
156
- ns_list
157
- );
149
+ rb_funcall(doc,
150
+ id_start_element_namespace,
151
+ 5,
152
+ NOKOGIRI_STR_NEW2(localname),
153
+ attribute_ary,
154
+ RBSTR_OR_QNIL(prefix),
155
+ RBSTR_OR_QNIL(uri),
156
+ ns_list
157
+ );
158
158
  }
159
159
 
160
160
  /**
161
161
  * end_element_ns was borrowed heavily from libxml-ruby.
162
162
  */
163
163
  static void
164
- end_element_ns (
165
- void * ctx,
166
- const xmlChar * localname,
167
- const xmlChar * prefix,
168
- const xmlChar * uri)
164
+ end_element_ns(
165
+ void *ctx,
166
+ const xmlChar *localname,
167
+ const xmlChar *prefix,
168
+ const xmlChar *uri)
169
169
  {
170
170
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
171
171
  VALUE doc = rb_iv_get(self, "@document");
172
172
 
173
173
  rb_funcall(doc, id_end_element_namespace, 3,
174
- NOKOGIRI_STR_NEW2(localname),
175
- RBSTR_OR_QNIL(prefix),
176
- RBSTR_OR_QNIL(uri)
177
- );
174
+ NOKOGIRI_STR_NEW2(localname),
175
+ RBSTR_OR_QNIL(prefix),
176
+ RBSTR_OR_QNIL(uri)
177
+ );
178
178
  }
179
179
 
180
- static void characters_func(void * ctx, const xmlChar * ch, int len)
180
+ static void
181
+ characters_func(void *ctx, const xmlChar *ch, int len)
181
182
  {
182
183
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
183
184
  VALUE doc = rb_iv_get(self, "@document");
@@ -185,7 +186,8 @@ static void characters_func(void * ctx, const xmlChar * ch, int len)
185
186
  rb_funcall(doc, id_characters, 1, str);
186
187
  }
187
188
 
188
- static void comment_func(void * ctx, const xmlChar * value)
189
+ static void
190
+ comment_func(void *ctx, const xmlChar *value)
189
191
  {
190
192
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
191
193
  VALUE doc = rb_iv_get(self, "@document");
@@ -193,41 +195,52 @@ static void comment_func(void * ctx, const xmlChar * value)
193
195
  rb_funcall(doc, id_comment, 1, str);
194
196
  }
195
197
 
196
- static void warning_func(void * ctx, const char *msg, ...)
198
+ PRINTFLIKE_DECL(2, 3)
199
+ static void
200
+ warning_func(void *ctx, const char *msg, ...)
197
201
  {
198
202
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
199
203
  VALUE doc = rb_iv_get(self, "@document");
200
- char * message;
201
- VALUE ruby_message;
204
+ VALUE rb_message;
202
205
 
206
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
207
+ /* It is not currently possible to pass var args from native
208
+ functions to sulong, so we work around the issue here. */
209
+ rb_message = rb_sprintf("warning_func: %s", msg);
210
+ #else
203
211
  va_list args;
204
212
  va_start(args, msg);
205
- vasprintf(&message, msg, args);
213
+ rb_message = rb_vsprintf(msg, args);
206
214
  va_end(args);
215
+ #endif
207
216
 
208
- ruby_message = NOKOGIRI_STR_NEW2(message);
209
- vasprintf_free(message);
210
- rb_funcall(doc, id_warning, 1, ruby_message);
217
+ rb_funcall(doc, id_warning, 1, rb_message);
211
218
  }
212
219
 
213
- static void error_func(void * ctx, const char *msg, ...)
220
+ PRINTFLIKE_DECL(2, 3)
221
+ static void
222
+ error_func(void *ctx, const char *msg, ...)
214
223
  {
215
224
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
216
225
  VALUE doc = rb_iv_get(self, "@document");
217
- char * message;
218
- VALUE ruby_message;
226
+ VALUE rb_message;
219
227
 
228
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
229
+ /* It is not currently possible to pass var args from native
230
+ functions to sulong, so we work around the issue here. */
231
+ rb_message = rb_sprintf("error_func: %s", msg);
232
+ #else
220
233
  va_list args;
221
234
  va_start(args, msg);
222
- vasprintf(&message, msg, args);
235
+ rb_message = rb_vsprintf(msg, args);
223
236
  va_end(args);
237
+ #endif
224
238
 
225
- ruby_message = NOKOGIRI_STR_NEW2(message);
226
- vasprintf_free(message);
227
- rb_funcall(doc, id_error, 1, ruby_message);
239
+ rb_funcall(doc, id_error, 1, rb_message);
228
240
  }
229
241
 
230
- static void cdata_block(void * ctx, const xmlChar * value, int len)
242
+ static void
243
+ cdata_block(void *ctx, const xmlChar *value, int len)
231
244
  {
232
245
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
233
246
  VALUE doc = rb_iv_get(self, "@document");
@@ -235,7 +248,8 @@ static void cdata_block(void * ctx, const xmlChar * value, int len)
235
248
  rb_funcall(doc, id_cdata_block, 1, string);
236
249
  }
237
250
 
238
- static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
251
+ static void
252
+ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
239
253
  {
240
254
  VALUE rb_content;
241
255
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
@@ -243,26 +257,35 @@ static void processing_instruction(void * ctx, const xmlChar * name, const xmlCh
243
257
 
244
258
  rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
245
259
 
246
- rb_funcall( doc,
247
- id_processing_instruction,
248
- 2,
249
- NOKOGIRI_STR_NEW2(name),
250
- rb_content
251
- );
260
+ rb_funcall(doc,
261
+ id_processing_instruction,
262
+ 2,
263
+ NOKOGIRI_STR_NEW2(name),
264
+ rb_content
265
+ );
252
266
  }
253
267
 
254
- static void deallocate(xmlSAXHandlerPtr handler)
268
+ static size_t
269
+ memsize(const void *data)
255
270
  {
256
- NOKOGIRI_DEBUG_START(handler);
257
- free(handler);
258
- NOKOGIRI_DEBUG_END(handler);
271
+ return sizeof(xmlSAXHandler);
259
272
  }
260
273
 
261
- static VALUE allocate(VALUE klass)
274
+ /* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
275
+ static const rb_data_type_t noko_sax_handler_type = {
276
+ .wrap_struct_name = "Nokogiri::SAXHandler",
277
+ .function = {
278
+ .dfree = RUBY_TYPED_DEFAULT_FREE,
279
+ .dsize = memsize
280
+ },
281
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
282
+ };
283
+
284
+ static VALUE
285
+ allocate(VALUE klass)
262
286
  {
263
- xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
264
-
265
- xmlSetStructuredErrorFunc(NULL, NULL);
287
+ xmlSAXHandlerPtr handler;
288
+ VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
266
289
 
267
290
  handler->startDocument = start_document;
268
291
  handler->endDocument = end_document;
@@ -278,32 +301,34 @@ static VALUE allocate(VALUE klass)
278
301
  handler->processingInstruction = processing_instruction;
279
302
  handler->initialized = XML_SAX2_MAGIC;
280
303
 
281
- return Data_Wrap_Struct(klass, NULL, deallocate, handler);
304
+ return self;
282
305
  }
283
306
 
284
- VALUE cNokogiriXmlSaxParser ;
285
- void init_xml_sax_parser()
307
+ xmlSAXHandlerPtr
308
+ noko_sax_handler_unwrap(VALUE rb_sax_handler)
286
309
  {
287
- VALUE nokogiri = rb_define_module("Nokogiri");
288
- VALUE xml = rb_define_module_under(nokogiri, "XML");
289
- VALUE sax = rb_define_module_under(xml, "SAX");
290
- VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
310
+ xmlSAXHandlerPtr c_sax_handler;
311
+ TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
312
+ return c_sax_handler;
313
+ }
291
314
 
292
- cNokogiriXmlSaxParser = klass;
315
+ void
316
+ noko_init_xml_sax_parser(void)
317
+ {
318
+ cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
293
319
 
294
- rb_define_alloc_func(klass, allocate);
320
+ rb_define_alloc_func(cNokogiriXmlSaxParser, allocate);
295
321
 
296
322
  id_start_document = rb_intern("start_document");
297
- id_end_document = rb_intern("end_document");
298
- id_start_element = rb_intern("start_element");
299
- id_end_element = rb_intern("end_element");
300
- id_comment = rb_intern("comment");
301
- id_characters = rb_intern("characters");
302
- id_xmldecl = rb_intern("xmldecl");
303
- id_error = rb_intern("error");
304
- id_warning = rb_intern("warning");
305
- id_cdata_block = rb_intern("cdata_block");
306
- id_cAttribute = rb_intern("Attribute");
323
+ id_end_document = rb_intern("end_document");
324
+ id_start_element = rb_intern("start_element");
325
+ id_end_element = rb_intern("end_element");
326
+ id_comment = rb_intern("comment");
327
+ id_characters = rb_intern("characters");
328
+ id_xmldecl = rb_intern("xmldecl");
329
+ id_error = rb_intern("error");
330
+ id_warning = rb_intern("warning");
331
+ id_cdata_block = rb_intern("cdata_block");
307
332
  id_start_element_namespace = rb_intern("start_element_namespace");
308
333
  id_end_element_namespace = rb_intern("end_element_namespace");
309
334
  id_processing_instruction = rb_intern("processing_instruction");