nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,52 +1,64 @@
1
- #include <xslt_stylesheet.h>
1
+ #include <nokogiri.h>
2
2
 
3
- #include <libxslt/xsltInternals.h>
4
- #include <libxslt/xsltutils.h>
5
- #include <libxslt/transform.h>
6
- #include <libexslt/exslt.h>
3
+ VALUE cNokogiriXsltStylesheet;
7
4
 
8
- VALUE xslt;
9
-
10
- int vasprintf (char **strp, const char *fmt, va_list ap);
11
- void vasprintf_free (void *p);
12
-
13
- static void mark(nokogiriXsltStylesheetTuple *wrapper)
5
+ static void
6
+ mark(void *data)
14
7
  {
8
+ nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
15
9
  rb_gc_mark(wrapper->func_instances);
16
10
  }
17
11
 
18
- static void dealloc(nokogiriXsltStylesheetTuple *wrapper)
12
+ static void
13
+ dealloc(void *data)
19
14
  {
20
- xsltStylesheetPtr doc = wrapper->ss;
21
-
22
- NOKOGIRI_DEBUG_START(doc);
23
- xsltFreeStylesheet(doc); /* commented out for now. */
24
- NOKOGIRI_DEBUG_END(doc);
25
-
26
- free(wrapper);
15
+ nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
16
+ xsltStylesheetPtr doc = wrapper->ss;
17
+ xsltFreeStylesheet(doc);
18
+ ruby_xfree(wrapper);
27
19
  }
28
20
 
29
- static void xslt_generic_error_handler(void * ctx, const char *msg, ...)
21
+ static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
22
+ .wrap_struct_name = "nokogiriXsltStylesheetTuple",
23
+ .function = {
24
+ .dmark = mark,
25
+ .dfree = dealloc,
26
+ },
27
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
28
+ };
29
+
30
+ PRINTFLIKE_DECL(2, 3)
31
+ static void
32
+ xslt_generic_error_handler(void *ctx, const char *msg, ...)
30
33
  {
31
- char * message;
34
+ VALUE message;
32
35
 
36
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
37
+ /* It is not currently possible to pass var args from native
38
+ functions to sulong, so we work around the issue here. */
39
+ message = rb_sprintf("xslt_generic_error_handler: %s", msg);
40
+ #else
33
41
  va_list args;
34
42
  va_start(args, msg);
35
- vasprintf(&message, msg, args);
43
+ message = rb_vsprintf(msg, args);
36
44
  va_end(args);
45
+ #endif
37
46
 
38
- rb_str_cat2((VALUE)ctx, message);
39
-
40
- vasprintf_free(message);
47
+ rb_str_concat((VALUE)ctx, message);
41
48
  }
42
49
 
43
- VALUE Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
50
+ VALUE
51
+ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
44
52
  {
45
53
  VALUE self;
46
54
  nokogiriXsltStylesheetTuple *wrapper;
47
55
 
48
- self = Data_Make_Struct(cNokogiriXsltStylesheet, nokogiriXsltStylesheetTuple,
49
- mark, dealloc, wrapper);
56
+ self = TypedData_Make_Struct(
57
+ cNokogiriXsltStylesheet,
58
+ nokogiriXsltStylesheetTuple,
59
+ &nokogiri_xslt_stylesheet_tuple_type,
60
+ wrapper
61
+ );
50
62
 
51
63
  ss->_private = (void *)self;
52
64
  wrapper->ss = ss;
@@ -59,31 +71,37 @@ VALUE Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
59
71
  * call-seq:
60
72
  * parse_stylesheet_doc(document)
61
73
  *
62
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
63
80
  */
64
- static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
81
+ static VALUE
82
+ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
65
83
  {
66
- xmlDocPtr xml, xml_cpy;
67
- VALUE errstr, exception;
68
- xsltStylesheetPtr ss ;
69
- Data_Get_Struct(xmldocobj, xmlDoc, xml);
70
- exsltRegisterAll();
84
+ xmlDocPtr xml, xml_cpy;
85
+ VALUE errstr, exception;
86
+ xsltStylesheetPtr ss ;
87
+
88
+ xml = noko_xml_document_unwrap(xmldocobj);
71
89
 
72
- errstr = rb_str_new(0, 0);
73
- xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
90
+ errstr = rb_str_new(0, 0);
91
+ xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
74
92
 
75
- xml_cpy = xmlCopyDoc(xml, 1); /* 1 => recursive */
76
- ss = xsltParseStylesheetDoc(xml_cpy);
93
+ xml_cpy = xmlCopyDoc(xml, 1); /* 1 => recursive */
94
+ ss = xsltParseStylesheetDoc(xml_cpy);
77
95
 
78
- xsltSetGenericErrorFunc(NULL, NULL);
96
+ xsltSetGenericErrorFunc(NULL, NULL);
79
97
 
80
- if (!ss) {
81
- xmlFreeDoc(xml_cpy);
82
- exception = rb_exc_new3(rb_eRuntimeError, errstr);
83
- rb_exc_raise(exception);
84
- }
98
+ if (!ss) {
99
+ xmlFreeDoc(xml_cpy);
100
+ exception = rb_exc_new3(rb_eRuntimeError, errstr);
101
+ rb_exc_raise(exception);
102
+ }
85
103
 
86
- return Nokogiri_wrap_xslt_stylesheet(ss);
104
+ return Nokogiri_wrap_xslt_stylesheet(ss);
87
105
  }
88
106
 
89
107
 
@@ -91,180 +109,313 @@ static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
91
109
  * call-seq:
92
110
  * serialize(document)
93
111
  *
94
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
95
113
  */
96
- static VALUE serialize(VALUE self, VALUE xmlobj)
97
- {
98
- xmlDocPtr xml ;
99
- nokogiriXsltStylesheetTuple *wrapper;
100
- xmlChar* doc_ptr ;
101
- int doc_len ;
102
- VALUE rval ;
103
-
104
- Data_Get_Struct(xmlobj, xmlDoc, xml);
105
- Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
106
- xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
107
- rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
108
- xmlFree(doc_ptr);
109
- return rval ;
110
- }
111
-
112
- static void swallow_superfluous_xml_errors(void * userdata, xmlErrorPtr error, ...)
114
+ static VALUE
115
+ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
113
116
  {
117
+ xmlDocPtr xml ;
118
+ nokogiriXsltStylesheetTuple *wrapper;
119
+ xmlChar *doc_ptr ;
120
+ int doc_len ;
121
+ VALUE rval ;
122
+
123
+ xml = noko_xml_document_unwrap(xmlobj);
124
+ TypedData_Get_Struct(
125
+ self,
126
+ nokogiriXsltStylesheetTuple,
127
+ &nokogiri_xslt_stylesheet_tuple_type,
128
+ wrapper
129
+ );
130
+ xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
131
+ rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
132
+ xmlFree(doc_ptr);
133
+ return rval ;
114
134
  }
115
135
 
116
136
  /*
117
- * call-seq:
118
- * transform(document, params = [])
137
+ * call-seq:
138
+ * transform(document)
139
+ * transform(document, params = {})
140
+ *
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
142
+ *
143
+ * [Parameters]
144
+ * - +document+ (Nokogiri::XML::Document) the document to be transformed.
145
+ * - +params+ (Hash, Array) strings used as XSLT parameters.
146
+ *
147
+ * [Returns] Nokogiri::XML::Document
148
+ *
149
+ * *Example* of basic transformation:
150
+ *
151
+ * xslt = <<~XSLT
152
+ * <xsl:stylesheet version="1.0"
153
+ * xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
154
+ *
155
+ * <xsl:param name="title"/>
156
+ *
157
+ * <xsl:template match="/">
158
+ * <html>
159
+ * <body>
160
+ * <h1><xsl:value-of select="$title"/></h1>
161
+ * <ol>
162
+ * <xsl:for-each select="staff/employee">
163
+ * <li><xsl:value-of select="employeeId"></li>
164
+ * </xsl:for-each>
165
+ * </ol>
166
+ * </body>
167
+ * </html>
168
+ * </xsl:stylesheet>
169
+ * XSLT
170
+ *
171
+ * xml = <<~XML
172
+ * <?xml version="1.0"?>
173
+ * <staff>
174
+ * <employee>
175
+ * <employeeId>EMP0001</employeeId>
176
+ * <position>Accountant</position>
177
+ * </employee>
178
+ * <employee>
179
+ * <employeeId>EMP0002</employeeId>
180
+ * <position>Developer</position>
181
+ * </employee>
182
+ * </staff>
183
+ * XML
119
184
  *
120
- * Apply an XSLT stylesheet to an XML::Document.
121
- * +params+ is an array of strings used as XSLT parameters.
122
- * returns Nokogiri::XML::Document
185
+ * doc = Nokogiri::XML::Document.parse(xml)
186
+ * stylesheet = Nokogiri::XSLT.parse(xslt)
123
187
  *
124
- * Example:
188
+ * ⚠ Note that the +h1+ element is empty because no param has been provided!
125
189
  *
126
- * doc = Nokogiri::XML(File.read(ARGV[0]))
127
- * xslt = Nokogiri::XSLT(File.read(ARGV[1]))
128
- * puts xslt.transform(doc, ['key', 'value'])
190
+ * stylesheet.transform(doc).to_xml
191
+ * # => "<html><body>\n" +
192
+ * # "<h1></h1>\n" +
193
+ * # "<ol>\n" +
194
+ * # "<li>EMP0001</li>\n" +
195
+ * # "<li>EMP0002</li>\n" +
196
+ * # "</ol>\n" +
197
+ * # "</body></html>\n"
129
198
  *
199
+ * *Example* of using an input parameter hash:
200
+ *
201
+ * ⚠ The title is populated, but note how we need to quote-escape the value.
202
+ *
203
+ * stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml
204
+ * # => "<html><body>\n" +
205
+ * # "<h1>Employee List</h1>\n" +
206
+ * # "<ol>\n" +
207
+ * # "<li>EMP0001</li>\n" +
208
+ * # "<li>EMP0002</li>\n" +
209
+ * # "</ol>\n" +
210
+ * # "</body></html>\n"
211
+ *
212
+ * *Example* using the XSLT.quote_params helper method to safely quote-escape strings:
213
+ *
214
+ * stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml
215
+ * # => "<html><body>\n" +
216
+ * # "<h1>Aaron's List</h1>\n" +
217
+ * # "<ol>\n" +
218
+ * # "<li>EMP0001</li>\n" +
219
+ * # "<li>EMP0002</li>\n" +
220
+ * # "</ol>\n" +
221
+ * # "</body></html>\n"
222
+ *
223
+ * *Example* using an array of XSLT parameters
224
+ *
225
+ * You can also use an array if you want to.
226
+ *
227
+ * stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml
228
+ * # => "<html><body>\n" +
229
+ * # "<h1>Employee List</h1>\n" +
230
+ * # "<ol>\n" +
231
+ * # "<li>EMP0001</li>\n" +
232
+ * # "<li>EMP0002</li>\n" +
233
+ * # "</ol>\n" +
234
+ * # "</body></html>\n"
235
+ *
236
+ * Or pass an array to XSLT.quote_params:
237
+ *
238
+ * stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml
239
+ * # => "<html><body>\n" +
240
+ * # "<h1>Aaron's List</h1>\n" +
241
+ * # "<ol>\n" +
242
+ * # "<li>EMP0001</li>\n" +
243
+ * # "<li>EMP0002</li>\n" +
244
+ * # "</ol>\n" +
245
+ * # "</body></html>\n"
246
+ *
247
+ * See: Nokogiri::XSLT.quote_params
130
248
  */
131
- static VALUE transform(int argc, VALUE* argv, VALUE self)
249
+ static VALUE
250
+ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
132
251
  {
133
- VALUE xmldoc, paramobj, errstr, exception ;
134
- xmlDocPtr xml ;
135
- xmlDocPtr result ;
136
- nokogiriXsltStylesheetTuple *wrapper;
137
- const char** params ;
138
- long param_len, j ;
139
- int parse_error_occurred ;
140
-
141
- rb_scan_args(argc, argv, "11", &xmldoc, &paramobj);
142
- if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; }
143
- if (!rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument))
144
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
145
-
146
- /* handle hashes as arguments. */
147
- if(T_HASH == TYPE(paramobj)) {
148
- paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0);
149
- paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0);
150
- }
151
-
152
- Check_Type(paramobj, T_ARRAY);
153
-
154
- Data_Get_Struct(xmldoc, xmlDoc, xml);
155
- Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
156
-
157
- param_len = RARRAY_LEN(paramobj);
158
- params = calloc((size_t)param_len+1, sizeof(char*));
159
- for (j = 0 ; j < param_len ; j++) {
160
- VALUE entry = rb_ary_entry(paramobj, j);
161
- const char * ptr = StringValueCStr(entry);
162
- params[j] = ptr;
163
- }
164
- params[param_len] = 0 ;
165
-
166
- errstr = rb_str_new(0, 0);
167
- xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
168
- xmlSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
169
-
170
- result = xsltApplyStylesheet(wrapper->ss, xml, params);
171
- free(params);
172
-
173
- xsltSetGenericErrorFunc(NULL, NULL);
174
- xmlSetGenericErrorFunc(NULL, NULL);
175
-
176
- parse_error_occurred = (Qfalse == rb_funcall(errstr, rb_intern("empty?"), 0));
177
-
178
- if (parse_error_occurred) {
179
- exception = rb_exc_new3(rb_eRuntimeError, errstr);
180
- rb_exc_raise(exception);
181
- }
182
-
183
- return Nokogiri_wrap_xml_document((VALUE)0, result) ;
252
+ VALUE rb_document, rb_param, rb_error_str;
253
+ xmlDocPtr c_document ;
254
+ xmlDocPtr c_result_document ;
255
+ nokogiriXsltStylesheetTuple *wrapper;
256
+ const char **params ;
257
+ long param_len, j ;
258
+ int parse_error_occurred ;
259
+ int defensive_copy_p = 0;
260
+
261
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_param);
262
+ if (NIL_P(rb_param)) { rb_param = rb_ary_new2(0L) ; }
263
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
264
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
265
+ }
266
+
267
+ /* handle hashes as arguments. */
268
+ if (T_HASH == TYPE(rb_param)) {
269
+ rb_param = rb_funcall(rb_param, rb_intern("to_a"), 0);
270
+ rb_param = rb_funcall(rb_param, rb_intern("flatten"), 0);
271
+ }
272
+
273
+ Check_Type(rb_param, T_ARRAY);
274
+
275
+ c_document = noko_xml_document_unwrap(rb_document);
276
+ TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
277
+
278
+ param_len = RARRAY_LEN(rb_param);
279
+ params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
280
+ for (j = 0 ; j < param_len ; j++) {
281
+ VALUE entry = rb_ary_entry(rb_param, j);
282
+ const char *ptr = StringValueCStr(entry);
283
+ params[j] = ptr;
284
+ }
285
+ params[param_len] = 0 ;
286
+
287
+ xsltTransformContextPtr c_transform_context = xsltNewTransformContext(wrapper->ss, c_document);
288
+ if (xsltNeedElemSpaceHandling(c_transform_context) &&
289
+ noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
290
+ // see https://github.com/sparklemotion/nokogiri/issues/2800
291
+ c_document = xmlCopyDoc(c_document, 1);
292
+ defensive_copy_p = 1;
293
+ }
294
+ xsltFreeTransformContext(c_transform_context);
295
+
296
+ rb_error_str = rb_str_new(0, 0);
297
+ xsltSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
298
+ xmlSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
299
+
300
+ c_result_document = xsltApplyStylesheet(wrapper->ss, c_document, params);
301
+
302
+ ruby_xfree(params);
303
+ if (defensive_copy_p) {
304
+ xmlFreeDoc(c_document);
305
+ c_document = NULL;
306
+ }
307
+
308
+ xsltSetGenericErrorFunc(NULL, NULL);
309
+ xmlSetGenericErrorFunc(NULL, NULL);
310
+
311
+ parse_error_occurred = (Qfalse == rb_funcall(rb_error_str, rb_intern("empty?"), 0));
312
+
313
+ if (parse_error_occurred) {
314
+ rb_exc_raise(rb_exc_new3(rb_eRuntimeError, rb_error_str));
315
+ }
316
+
317
+ return noko_xml_document_wrap((VALUE)0, c_result_document) ;
184
318
  }
185
319
 
186
- static void method_caller(xmlXPathParserContextPtr ctxt, int nargs)
320
+ static void
321
+ method_caller(xmlXPathParserContextPtr ctxt, int nargs)
187
322
  {
188
- VALUE handler;
189
- const char *function_name;
190
- xsltTransformContextPtr transform;
191
- const xmlChar *functionURI;
192
-
193
- transform = xsltXPathGetTransformContext(ctxt);
194
- functionURI = ctxt->context->functionURI;
195
- handler = (VALUE)xsltGetExtData(transform, functionURI);
196
- function_name = (const char*)(ctxt->context->function);
197
-
198
- Nokogiri_marshal_xpath_funcall_and_return_values(ctxt, nargs, handler, (const char*)function_name);
323
+ VALUE handler;
324
+ const char *function_name;
325
+ xsltTransformContextPtr transform;
326
+ const xmlChar *functionURI;
327
+
328
+ transform = xsltXPathGetTransformContext(ctxt);
329
+ functionURI = ctxt->context->functionURI;
330
+ handler = (VALUE)xsltGetExtData(transform, functionURI);
331
+ function_name = (const char *)(ctxt->context->function);
332
+
333
+ Nokogiri_marshal_xpath_funcall_and_return_values(
334
+ ctxt,
335
+ nargs,
336
+ handler,
337
+ (const char *)function_name
338
+ );
199
339
  }
200
340
 
201
- static void * initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
341
+ static void *
342
+ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
202
343
  {
203
- VALUE modules = rb_iv_get(xslt, "@modules");
204
- VALUE obj = rb_hash_aref(modules, rb_str_new2((const char *)uri));
205
- VALUE args = { Qfalse };
206
- VALUE methods = rb_funcall(obj, rb_intern("instance_methods"), 1, args);
207
- VALUE inst;
208
- nokogiriXsltStylesheetTuple *wrapper;
209
- int i;
210
-
211
- for(i = 0; i < RARRAY_LEN(methods); i++) {
212
- VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
213
- xsltRegisterExtFunction(ctxt,
214
- (unsigned char *)StringValueCStr(method_name), uri, method_caller);
215
- }
216
-
217
- Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple,
218
- wrapper);
219
- inst = rb_class_new_instance(0, NULL, obj);
220
- rb_ary_push(wrapper->func_instances, inst);
221
-
222
- return (void *)inst;
344
+ VALUE modules = rb_iv_get(mNokogiriXslt, "@modules");
345
+ VALUE obj = rb_hash_aref(modules, rb_str_new2((const char *)uri));
346
+ VALUE args = { Qfalse };
347
+ VALUE methods = rb_funcall(obj, rb_intern("instance_methods"), 1, args);
348
+ VALUE inst;
349
+ nokogiriXsltStylesheetTuple *wrapper;
350
+ int i;
351
+
352
+ for (i = 0; i < RARRAY_LEN(methods); i++) {
353
+ VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
354
+ xsltRegisterExtFunction(
355
+ ctxt,
356
+ (unsigned char *)StringValueCStr(method_name),
357
+ uri,
358
+ method_caller
359
+ );
360
+ }
361
+
362
+ TypedData_Get_Struct(
363
+ (VALUE)ctxt->style->_private,
364
+ nokogiriXsltStylesheetTuple,
365
+ &nokogiri_xslt_stylesheet_tuple_type,
366
+ wrapper
367
+ );
368
+ inst = rb_class_new_instance(0, NULL, obj);
369
+ rb_ary_push(wrapper->func_instances, inst);
370
+
371
+ return (void *)inst;
223
372
  }
224
373
 
225
- static void shutdownFunc(xsltTransformContextPtr ctxt,
226
- const xmlChar *uri, void *data)
374
+ static void
375
+ shutdownFunc(xsltTransformContextPtr ctxt,
376
+ const xmlChar *uri, void *data)
227
377
  {
228
- nokogiriXsltStylesheetTuple *wrapper;
378
+ nokogiriXsltStylesheetTuple *wrapper;
229
379
 
230
- Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple,
231
- wrapper);
380
+ TypedData_Get_Struct(
381
+ (VALUE)ctxt->style->_private,
382
+ nokogiriXsltStylesheetTuple,
383
+ &nokogiri_xslt_stylesheet_tuple_type,
384
+ wrapper
385
+ );
232
386
 
233
- rb_ary_clear(wrapper->func_instances);
387
+ rb_ary_clear(wrapper->func_instances);
234
388
  }
235
389
 
236
- /*
237
- * call-seq:
238
- * register(uri, custom_handler_class)
239
- *
240
- * Register a class that implements custom XSLT transformation functions.
241
- */
242
- static VALUE registr(VALUE self, VALUE uri, VALUE obj)
390
+ /* docstring is in lib/nokogiri/xslt.rb */
391
+ static VALUE
392
+ rb_xslt_s_register(VALUE self, VALUE uri, VALUE obj)
243
393
  {
244
- VALUE modules = rb_iv_get(self, "@modules");
245
- if(NIL_P(modules)) rb_raise(rb_eRuntimeError, "wtf! @modules isn't set");
246
-
247
- rb_hash_aset(modules, uri, obj);
248
- xsltRegisterExtModule((unsigned char *)StringValueCStr(uri), initFunc, shutdownFunc);
249
- return self;
394
+ VALUE modules = rb_iv_get(self, "@modules");
395
+ if (NIL_P(modules)) {
396
+ rb_raise(rb_eRuntimeError, "internal error: @modules not set");
397
+ }
398
+
399
+ rb_hash_aset(modules, uri, obj);
400
+ xsltRegisterExtModule(
401
+ (unsigned char *)StringValueCStr(uri),
402
+ initFunc,
403
+ shutdownFunc
404
+ );
405
+ return self;
250
406
  }
251
407
 
252
- VALUE cNokogiriXsltStylesheet ;
253
- void init_xslt_stylesheet()
408
+ void
409
+ noko_init_xslt_stylesheet(void)
254
410
  {
255
- VALUE nokogiri;
256
- VALUE klass;
257
-
258
- nokogiri = rb_define_module("Nokogiri");
259
- xslt = rb_define_module_under(nokogiri, "XSLT");
260
- klass = rb_define_class_under(xslt, "Stylesheet", rb_cObject);
411
+ rb_define_singleton_method(mNokogiriXslt, "register", rb_xslt_s_register, 2);
412
+ rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
261
413
 
262
- rb_iv_set(xslt, "@modules", rb_hash_new());
414
+ cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
263
415
 
264
- cNokogiriXsltStylesheet = klass;
416
+ rb_undef_alloc_func(cNokogiriXsltStylesheet);
265
417
 
266
- rb_define_singleton_method(klass, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
267
- rb_define_singleton_method(xslt, "register", registr, 2);
268
- rb_define_method(klass, "serialize", serialize, 1);
269
- rb_define_method(klass, "transform", transform, -1);
418
+ rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
419
+ rb_define_method(cNokogiriXsltStylesheet, "serialize", rb_xslt_stylesheet_serialize, 1);
420
+ rb_define_method(cNokogiriXsltStylesheet, "transform", rb_xslt_stylesheet_transform, -1);
270
421
  }
@@ -0,0 +1,63 @@
1
+ ## Gumbo 0.10.1 (2015-04-30)
2
+
3
+ Same as 0.10.0, but with the version number bumped because the last version-number commit to v0.9.4 makes GitHub think that v0.9.4 is the latest version and so it's not highlighted on the webpage.
4
+
5
+ ## Gumbo 0.10.0 (2015-04-30)
6
+
7
+ * Full support for `<template>` tag (kevinhendricks, nostrademons).
8
+ * Some fixes for `<rtc>`/`<rt>` handling (kevinhendricks, vmg).
9
+ * All html5lib-trunk tests pass now! (kevinhendricks, vmg, nostrademons)
10
+ * Support for fragment parsing (vmg)
11
+ * A couple additional example programs (kevinhendricks)
12
+ * Performance improvements totaling an estimated 30-40% total improvement (vmg, nostrademons).
13
+
14
+ ## Gumbo 0.9.4 (2015-04-30)
15
+
16
+ * Additional Visual Studio fixes (lowjoel, nostrademons)
17
+ * Fixed some unused variable warnings.
18
+ * Fix for glibtoolize vs. libtoolize build errors on Mac.
19
+ * Fixed `CDATA` end tag handling.
20
+
21
+ ## Gumbo 0.9.3 (2015-02-17)
22
+
23
+ * Bugfix for `&AElig;` entities (rgrove)
24
+ * Fix `CDATA` handling; `CDATA` sections now generate a `GUMBO_NODE_CDATA` node rather
25
+ than plain text.
26
+ * Fix `get_title example` to handle whitespace nodes (gsnedders)
27
+ * Visual Studio compilation fixes (fishioon)
28
+ * Take the namespace into account when determining whether a node matches a
29
+ certain tag (aroben)
30
+ * Replace the varargs tag functions with a tagset bytevector, for a 20-30%
31
+ speedup in overall parse time (kevinhendricks, vmg)
32
+ * Add MacOS X support to Travis CI, and fix the deployment/DLL issues this
33
+ uncovered (nostrademons, kevinhendricks, vmg)
34
+
35
+ ## Gumbo 0.9.2 (2014-09-21)
36
+
37
+ * Performance improvements: Ragel-based char ref decoder and DFA-based UTF8
38
+ decoder, totaling speedups of up to 300%.
39
+ * Added benchmarking program and some sample data.
40
+ * Fixed a compiler error under Visual Studio.
41
+ * Fix an error in the ctypes bindings that could lead to memory corruption in
42
+ the Python bindings.
43
+ * Fix duplicate attributes when parsing `<isindex>` tags.
44
+ * Don't leave semicolons behind when consuming entity references (rgrove)
45
+ * Internally rename some functions in preparation for an amalgamation file
46
+ (jdeng)
47
+ * Add proper cflags for gyp builds (skabbes)
48
+
49
+ ## Gumbo 0.9.1 (2014-08-07)
50
+
51
+ * First version listed on PyPi.
52
+ * Autotools files excluded from GitHub and generated via autogen.sh. (endgame)
53
+ * Numerous compiler warnings fixed. (bnoordhuis, craigbarnes)
54
+ * Google security audit passed.
55
+ * Gyp support (tfarina)
56
+ * Naming convention for structs changed to avoid C reserved words.
57
+ * Fix several integer and buffer overflows (Maxime2)
58
+ * Some Visual Studio compiler support (bugparty)
59
+ * Python3 compatibility for the ctypes bindings.
60
+
61
+ ## Gumbo 0.9.0 (2013-08-13)
62
+
63
+ * Initial release open-sourced by Google.