nokogiri 1.12.5 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +41 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +23 -14
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -66
- data/ext/nokogiri/extconf.rb +159 -63
- data/ext/nokogiri/gumbo.c +21 -11
- data/ext/nokogiri/html4_document.c +2 -2
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +3 -9
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.c +38 -51
- data/ext/nokogiri/nokogiri.h +26 -14
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +3 -3
- data/ext/nokogiri/xml_attribute_decl.c +5 -5
- data/ext/nokogiri/xml_cdata.c +3 -3
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +53 -44
- data/ext/nokogiri/xml_document_fragment.c +1 -3
- data/ext/nokogiri/xml_dtd.c +11 -11
- data/ext/nokogiri/xml_element_content.c +3 -3
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +28 -14
- data/ext/nokogiri/xml_entity_decl.c +6 -6
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +80 -14
- data/ext/nokogiri/xml_node.c +982 -396
- data/ext/nokogiri/xml_node_set.c +4 -6
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +133 -32
- data/ext/nokogiri/xml_relax_ng.c +1 -3
- data/ext/nokogiri/xml_sax_parser.c +23 -17
- data/ext/nokogiri/xml_sax_parser_context.c +11 -9
- data/ext/nokogiri/xml_sax_push_parser.c +1 -3
- data/ext/nokogiri/xml_schema.c +4 -6
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +2 -2
- data/ext/nokogiri/xml_xpath_context.c +144 -114
- data/ext/nokogiri/xslt_stylesheet.c +122 -23
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +2 -2
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +2 -2
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +8 -16
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +184 -85
- data/lib/nokogiri/css.rb +44 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +56 -164
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +12 -5
- data/lib/nokogiri/html5/document.rb +126 -32
- data/lib/nokogiri/html5/document_fragment.rb +14 -4
- data/lib/nokogiri/html5/node.rb +12 -7
- data/lib/nokogiri/html5.rb +138 -222
- data/lib/nokogiri/jruby/dependencies.rb +2 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +32 -24
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +54 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +35 -33
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +232 -143
- data/lib/nokogiri/xml/document_fragment.rb +88 -42
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -8
- data/lib/nokogiri/xml/node.rb +708 -383
- data/lib/nokogiri/xml/node_set.rb +134 -59
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +140 -56
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +26 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +38 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +22 -27
- data/lib/xsd/xmlparser/nokogiri.rb +28 -25
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +20 -171
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -6,15 +6,15 @@ VALUE cNokogiriXmlXpathContext;
|
|
6
6
|
* these constants have matching declarations in
|
7
7
|
* ext/java/nokogiri/internals/NokogiriNamespaceContext.java
|
8
8
|
*/
|
9
|
+
static const xmlChar *NOKOGIRI_PREFIX = (const xmlChar *)"nokogiri";
|
10
|
+
static const xmlChar *NOKOGIRI_URI = (const xmlChar *)"http://www.nokogiri.org/default_ns/ruby/extensions_functions";
|
9
11
|
static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-builtin";
|
10
12
|
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
|
11
13
|
|
12
14
|
static void
|
13
|
-
|
15
|
+
xml_xpath_context_deallocate(xmlXPathContextPtr ctx)
|
14
16
|
{
|
15
|
-
NOKOGIRI_DEBUG_START(ctx);
|
16
17
|
xmlXPathFreeContext(ctx);
|
17
|
-
NOKOGIRI_DEBUG_END(ctx);
|
18
18
|
}
|
19
19
|
|
20
20
|
/* find a CSS class in an HTML element's `class` attribute */
|
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
86
86
|
xmlXPathFreeObject(needle);
|
87
87
|
}
|
88
88
|
|
89
|
+
|
90
|
+
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
|
91
|
+
static void
|
92
|
+
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
93
|
+
{
|
94
|
+
xmlXPathObjectPtr element_name;
|
95
|
+
|
96
|
+
assert(ctxt->context->node);
|
97
|
+
|
98
|
+
CHECK_ARITY(1);
|
99
|
+
CAST_TO_STRING;
|
100
|
+
CHECK_TYPE(XPATH_STRING);
|
101
|
+
element_name = valuePop(ctxt);
|
102
|
+
|
103
|
+
valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
|
104
|
+
|
105
|
+
xmlXPathFreeObject(element_name);
|
106
|
+
}
|
107
|
+
|
108
|
+
|
89
109
|
/*
|
90
110
|
* call-seq:
|
91
111
|
* register_ns(prefix, uri)
|
@@ -93,7 +113,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
93
113
|
* Register the namespace with +prefix+ and +uri+.
|
94
114
|
*/
|
95
115
|
static VALUE
|
96
|
-
|
116
|
+
rb_xml_xpath_context_register_ns(VALUE self, VALUE prefix, VALUE uri)
|
97
117
|
{
|
98
118
|
xmlXPathContextPtr ctx;
|
99
119
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
@@ -112,7 +132,7 @@ register_ns(VALUE self, VALUE prefix, VALUE uri)
|
|
112
132
|
* Register the variable +name+ with +value+.
|
113
133
|
*/
|
114
134
|
static VALUE
|
115
|
-
|
135
|
+
rb_xml_xpath_context_register_variable(VALUE self, VALUE name, VALUE value)
|
116
136
|
{
|
117
137
|
xmlXPathContextPtr ctx;
|
118
138
|
xmlXPathObjectPtr xmlValue;
|
@@ -128,69 +148,86 @@ register_variable(VALUE self, VALUE name, VALUE value)
|
|
128
148
|
return self;
|
129
149
|
}
|
130
150
|
|
151
|
+
|
152
|
+
/*
|
153
|
+
* convert an XPath object into a Ruby object of the appropriate type.
|
154
|
+
* returns Qundef if no conversion was possible.
|
155
|
+
*/
|
156
|
+
static VALUE
|
157
|
+
xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr ctx)
|
158
|
+
{
|
159
|
+
VALUE rb_retval;
|
160
|
+
|
161
|
+
assert(ctx->doc);
|
162
|
+
assert(DOC_RUBY_OBJECT_TEST(ctx->doc));
|
163
|
+
|
164
|
+
switch (c_xpath_object->type) {
|
165
|
+
case XPATH_STRING:
|
166
|
+
rb_retval = NOKOGIRI_STR_NEW2(c_xpath_object->stringval);
|
167
|
+
xmlFree(c_xpath_object->stringval);
|
168
|
+
return rb_retval;
|
169
|
+
|
170
|
+
case XPATH_NODESET:
|
171
|
+
return noko_xml_node_set_wrap(c_xpath_object->nodesetval,
|
172
|
+
DOC_RUBY_OBJECT(ctx->doc));
|
173
|
+
|
174
|
+
case XPATH_NUMBER:
|
175
|
+
return rb_float_new(c_xpath_object->floatval);
|
176
|
+
|
177
|
+
case XPATH_BOOLEAN:
|
178
|
+
return (c_xpath_object->boolval == 1) ? Qtrue : Qfalse;
|
179
|
+
|
180
|
+
default:
|
181
|
+
return Qundef;
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
131
185
|
void
|
132
|
-
Nokogiri_marshal_xpath_funcall_and_return_values(
|
133
|
-
|
186
|
+
Nokogiri_marshal_xpath_funcall_and_return_values(
|
187
|
+
xmlXPathParserContextPtr ctx,
|
188
|
+
int argc,
|
189
|
+
VALUE rb_xpath_handler,
|
190
|
+
const char *method_name
|
191
|
+
)
|
134
192
|
{
|
135
|
-
|
136
|
-
VALUE result, doc;
|
193
|
+
VALUE rb_retval;
|
137
194
|
VALUE *argv;
|
138
|
-
VALUE
|
139
|
-
xmlNodeSetPtr
|
140
|
-
xmlXPathObjectPtr
|
195
|
+
VALUE rb_node_set = Qnil;
|
196
|
+
xmlNodeSetPtr c_node_set = NULL;
|
197
|
+
xmlXPathObjectPtr c_xpath_object;
|
141
198
|
|
142
199
|
assert(ctx->context->doc);
|
143
200
|
assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
|
144
201
|
|
145
|
-
argv = (VALUE *)
|
146
|
-
for (
|
147
|
-
rb_gc_register_address(&argv[
|
202
|
+
argv = (VALUE *)ruby_xcalloc((size_t)argc, sizeof(VALUE));
|
203
|
+
for (int j = 0 ; j < argc ; ++j) {
|
204
|
+
rb_gc_register_address(&argv[j]);
|
148
205
|
}
|
149
206
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
case XPATH_STRING:
|
158
|
-
argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
|
159
|
-
break;
|
160
|
-
case XPATH_BOOLEAN:
|
161
|
-
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
|
162
|
-
break;
|
163
|
-
case XPATH_NUMBER:
|
164
|
-
argv[i] = rb_float_new(obj->floatval);
|
165
|
-
break;
|
166
|
-
case XPATH_NODESET:
|
167
|
-
argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
|
168
|
-
break;
|
169
|
-
default:
|
170
|
-
argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
171
|
-
}
|
172
|
-
xmlXPathFreeNodeSetList(obj);
|
173
|
-
} while (i-- > 0);
|
207
|
+
for (int j = argc - 1 ; j >= 0 ; --j) {
|
208
|
+
c_xpath_object = valuePop(ctx);
|
209
|
+
argv[j] = xpath2ruby(c_xpath_object, ctx->context);
|
210
|
+
if (argv[j] == Qundef) {
|
211
|
+
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
|
212
|
+
}
|
213
|
+
xmlXPathFreeNodeSetList(c_xpath_object);
|
174
214
|
}
|
175
215
|
|
176
|
-
|
216
|
+
rb_retval = rb_funcall2(rb_xpath_handler, rb_intern((const char *)method_name), argc, argv);
|
177
217
|
|
178
|
-
for (
|
179
|
-
rb_gc_unregister_address(&argv[
|
218
|
+
for (int j = 0 ; j < argc ; ++j) {
|
219
|
+
rb_gc_unregister_address(&argv[j]);
|
180
220
|
}
|
181
|
-
|
221
|
+
ruby_xfree(argv);
|
182
222
|
|
183
|
-
switch (TYPE(
|
223
|
+
switch (TYPE(rb_retval)) {
|
184
224
|
case T_FLOAT:
|
185
225
|
case T_BIGNUM:
|
186
226
|
case T_FIXNUM:
|
187
|
-
xmlXPathReturnNumber(ctx, NUM2DBL(
|
227
|
+
xmlXPathReturnNumber(ctx, NUM2DBL(rb_retval));
|
188
228
|
break;
|
189
229
|
case T_STRING:
|
190
|
-
xmlXPathReturnString(
|
191
|
-
ctx,
|
192
|
-
xmlCharStrdup(StringValueCStr(result))
|
193
|
-
);
|
230
|
+
xmlXPathReturnString(ctx, xmlCharStrdup(StringValueCStr(rb_retval)));
|
194
231
|
break;
|
195
232
|
case T_TRUE:
|
196
233
|
xmlXPathReturnTrue(ctx);
|
@@ -201,19 +238,17 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
201
238
|
case T_NIL:
|
202
239
|
break;
|
203
240
|
case T_ARRAY: {
|
204
|
-
VALUE
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
|
209
|
-
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
241
|
+
VALUE construct_args[2] = { DOC_RUBY_OBJECT(ctx->context->doc), rb_retval };
|
242
|
+
rb_node_set = rb_class_new_instance(2, construct_args, cNokogiriXmlNodeSet);
|
243
|
+
Data_Get_Struct(rb_node_set, xmlNodeSet, c_node_set);
|
244
|
+
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, c_node_set));
|
210
245
|
}
|
211
246
|
break;
|
212
247
|
case T_DATA:
|
213
|
-
if (rb_obj_is_kind_of(
|
214
|
-
Data_Get_Struct(
|
248
|
+
if (rb_obj_is_kind_of(rb_retval, cNokogiriXmlNodeSet)) {
|
249
|
+
Data_Get_Struct(rb_retval, xmlNodeSet, c_node_set);
|
215
250
|
/* Copy the node set, otherwise it will get GC'd. */
|
216
|
-
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL,
|
251
|
+
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, c_node_set));
|
217
252
|
break;
|
218
253
|
}
|
219
254
|
default:
|
@@ -222,47 +257,56 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
222
257
|
}
|
223
258
|
|
224
259
|
static void
|
225
|
-
|
260
|
+
method_caller(xmlXPathParserContextPtr ctx, int argc)
|
226
261
|
{
|
227
|
-
VALUE
|
228
|
-
const char *
|
262
|
+
VALUE rb_xpath_handler = Qnil;
|
263
|
+
const char *method_name = NULL ;
|
229
264
|
|
230
265
|
assert(ctx);
|
231
266
|
assert(ctx->context);
|
232
267
|
assert(ctx->context->userData);
|
233
268
|
assert(ctx->context->function);
|
234
269
|
|
235
|
-
|
236
|
-
|
270
|
+
rb_xpath_handler = (VALUE)(ctx->context->userData);
|
271
|
+
method_name = (const char *)(ctx->context->function);
|
237
272
|
|
238
|
-
Nokogiri_marshal_xpath_funcall_and_return_values(ctx,
|
273
|
+
Nokogiri_marshal_xpath_funcall_and_return_values(ctx, argc, rb_xpath_handler, method_name);
|
239
274
|
}
|
240
275
|
|
241
276
|
static xmlXPathFunction
|
242
|
-
|
243
|
-
const xmlChar *name,
|
244
|
-
const xmlChar *ns_uri)
|
277
|
+
handler_lookup(void *ctx, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
245
278
|
{
|
246
|
-
VALUE
|
247
|
-
if (rb_respond_to(
|
248
|
-
return
|
279
|
+
VALUE rb_xpath_handler = (VALUE)ctx;
|
280
|
+
if (rb_respond_to(rb_xpath_handler, rb_intern((const char *)c_name))) {
|
281
|
+
return method_caller;
|
249
282
|
}
|
250
283
|
|
251
284
|
return NULL;
|
252
285
|
}
|
253
286
|
|
254
|
-
|
287
|
+
PRINTFLIKE_DECL(2, 3)
|
255
288
|
static void
|
256
|
-
|
289
|
+
generic_exception_pusher(void *ctx, const char *msg, ...)
|
257
290
|
{
|
258
|
-
|
291
|
+
VALUE rb_errors = (VALUE)ctx;
|
292
|
+
VALUE rb_message;
|
293
|
+
VALUE rb_exception;
|
259
294
|
|
295
|
+
Check_Type(rb_errors, T_ARRAY);
|
296
|
+
|
297
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
298
|
+
/* It is not currently possible to pass var args from native
|
299
|
+
functions to sulong, so we work around the issue here. */
|
300
|
+
rb_message = rb_sprintf("generic_exception_pusher: %s", msg);
|
301
|
+
#else
|
260
302
|
va_list args;
|
261
303
|
va_start(args, msg);
|
262
|
-
|
304
|
+
rb_message = rb_vsprintf(msg, args);
|
263
305
|
va_end(args);
|
306
|
+
#endif
|
264
307
|
|
265
|
-
|
308
|
+
rb_exception = rb_exc_new_str(cNokogiriXmlXpathSyntaxError, rb_message);
|
309
|
+
rb_ary_push(rb_errors, rb_exception);
|
266
310
|
}
|
267
311
|
|
268
312
|
/*
|
@@ -272,13 +316,14 @@ xpath_generic_exception_handler(void *ctx, const char *msg, ...)
|
|
272
316
|
* Evaluate the +search_path+ returning an XML::XPath object.
|
273
317
|
*/
|
274
318
|
static VALUE
|
275
|
-
|
319
|
+
rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
276
320
|
{
|
277
321
|
VALUE search_path, xpath_handler;
|
278
|
-
VALUE
|
322
|
+
VALUE retval = Qnil;
|
279
323
|
xmlXPathContextPtr ctx;
|
280
324
|
xmlXPathObjectPtr xpath;
|
281
325
|
xmlChar *query;
|
326
|
+
VALUE errors = rb_ary_new();
|
282
327
|
|
283
328
|
Data_Get_Struct(self, xmlXPathContext, ctx);
|
284
329
|
|
@@ -291,50 +336,29 @@ evaluate(int argc, VALUE *argv, VALUE self)
|
|
291
336
|
if (Qnil != xpath_handler) {
|
292
337
|
/* FIXME: not sure if this is the correct place to shove private data. */
|
293
338
|
ctx->userData = (void *)xpath_handler;
|
294
|
-
xmlXPathRegisterFuncLookup(ctx,
|
339
|
+
xmlXPathRegisterFuncLookup(ctx, handler_lookup, (void *)xpath_handler);
|
295
340
|
}
|
296
341
|
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
/* For some reason, xmlXPathEvalExpression will blow up with a generic error */
|
301
|
-
/* when there is a non existent function. */
|
302
|
-
xmlSetGenericErrorFunc(NULL, xpath_generic_exception_handler);
|
342
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
343
|
+
xmlSetGenericErrorFunc((void *)errors, generic_exception_pusher);
|
303
344
|
|
304
345
|
xpath = xmlXPathEvalExpression(query, ctx);
|
346
|
+
|
305
347
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
306
348
|
xmlSetGenericErrorFunc(NULL, NULL);
|
307
349
|
|
308
350
|
if (xpath == NULL) {
|
309
|
-
|
310
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
351
|
+
rb_exc_raise(rb_ary_entry(errors, 0));
|
311
352
|
}
|
312
353
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
switch (xpath->type) {
|
317
|
-
case XPATH_STRING:
|
318
|
-
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
|
319
|
-
xmlFree(xpath->stringval);
|
320
|
-
break;
|
321
|
-
case XPATH_NODESET:
|
322
|
-
thing = noko_xml_node_set_wrap(xpath->nodesetval,
|
323
|
-
DOC_RUBY_OBJECT(ctx->doc));
|
324
|
-
break;
|
325
|
-
case XPATH_NUMBER:
|
326
|
-
thing = rb_float_new(xpath->floatval);
|
327
|
-
break;
|
328
|
-
case XPATH_BOOLEAN:
|
329
|
-
thing = xpath->boolval == 1 ? Qtrue : Qfalse;
|
330
|
-
break;
|
331
|
-
default:
|
332
|
-
thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
354
|
+
retval = xpath2ruby(xpath, ctx);
|
355
|
+
if (retval == Qundef) {
|
356
|
+
retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
333
357
|
}
|
334
358
|
|
335
359
|
xmlXPathFreeNodeSetList(xpath);
|
336
360
|
|
337
|
-
return
|
361
|
+
return retval;
|
338
362
|
}
|
339
363
|
|
340
364
|
/*
|
@@ -344,24 +368,30 @@ evaluate(int argc, VALUE *argv, VALUE self)
|
|
344
368
|
* Create a new XPathContext with +node+ as the reference point.
|
345
369
|
*/
|
346
370
|
static VALUE
|
347
|
-
|
371
|
+
rb_xml_xpath_context_new(VALUE klass, VALUE nodeobj)
|
348
372
|
{
|
349
373
|
xmlNodePtr node;
|
350
374
|
xmlXPathContextPtr ctx;
|
351
375
|
VALUE self;
|
352
376
|
|
353
|
-
|
377
|
+
Noko_Node_Get_Struct(nodeobj, xmlNode, node);
|
354
378
|
|
379
|
+
#if LIBXML_VERSION < 21000
|
380
|
+
/* deprecated in 40483d0 */
|
355
381
|
xmlXPathInit();
|
382
|
+
#endif
|
356
383
|
|
357
384
|
ctx = xmlXPathNewContext(node->doc);
|
358
385
|
ctx->node = node;
|
359
386
|
|
387
|
+
xmlXPathRegisterNs(ctx, NOKOGIRI_PREFIX, NOKOGIRI_URI);
|
360
388
|
xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
361
389
|
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
362
390
|
xpath_builtin_css_class);
|
391
|
+
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
392
|
+
xpath_builtin_local_name_is);
|
363
393
|
|
364
|
-
self = Data_Wrap_Struct(klass, 0,
|
394
|
+
self = Data_Wrap_Struct(klass, 0, xml_xpath_context_deallocate, ctx);
|
365
395
|
return self;
|
366
396
|
}
|
367
397
|
|
@@ -375,9 +405,9 @@ noko_init_xml_xpath_context(void)
|
|
375
405
|
|
376
406
|
rb_undef_alloc_func(cNokogiriXmlXpathContext);
|
377
407
|
|
378
|
-
rb_define_singleton_method(cNokogiriXmlXpathContext, "new",
|
408
|
+
rb_define_singleton_method(cNokogiriXmlXpathContext, "new", rb_xml_xpath_context_new, 1);
|
379
409
|
|
380
|
-
rb_define_method(cNokogiriXmlXpathContext, "evaluate",
|
381
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_variable",
|
382
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_ns",
|
410
|
+
rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1);
|
411
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2);
|
412
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2);
|
383
413
|
}
|
@@ -12,27 +12,28 @@ static void
|
|
12
12
|
dealloc(nokogiriXsltStylesheetTuple *wrapper)
|
13
13
|
{
|
14
14
|
xsltStylesheetPtr doc = wrapper->ss;
|
15
|
-
|
16
|
-
|
17
|
-
xsltFreeStylesheet(doc); /* commented out for now. */
|
18
|
-
NOKOGIRI_DEBUG_END(doc);
|
19
|
-
|
20
|
-
free(wrapper);
|
15
|
+
xsltFreeStylesheet(doc);
|
16
|
+
ruby_xfree(wrapper);
|
21
17
|
}
|
22
18
|
|
19
|
+
PRINTFLIKE_DECL(2, 3)
|
23
20
|
static void
|
24
21
|
xslt_generic_error_handler(void *ctx, const char *msg, ...)
|
25
22
|
{
|
26
|
-
|
23
|
+
VALUE message;
|
27
24
|
|
25
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
26
|
+
/* It is not currently possible to pass var args from native
|
27
|
+
functions to sulong, so we work around the issue here. */
|
28
|
+
message = rb_sprintf("xslt_generic_error_handler: %s", msg);
|
29
|
+
#else
|
28
30
|
va_list args;
|
29
31
|
va_start(args, msg);
|
30
|
-
|
32
|
+
message = rb_vsprintf(msg, args);
|
31
33
|
va_end(args);
|
34
|
+
#endif
|
32
35
|
|
33
|
-
|
34
|
-
|
35
|
-
free(message);
|
36
|
+
rb_str_concat((VALUE)ctx, message);
|
36
37
|
}
|
37
38
|
|
38
39
|
VALUE
|
@@ -107,19 +108,117 @@ serialize(VALUE self, VALUE xmlobj)
|
|
107
108
|
}
|
108
109
|
|
109
110
|
/*
|
110
|
-
*
|
111
|
-
*
|
111
|
+
* call-seq:
|
112
|
+
* transform(document)
|
113
|
+
* transform(document, params = {})
|
114
|
+
*
|
115
|
+
* Apply an XSLT stylesheet to an XML::Document.
|
116
|
+
*
|
117
|
+
* [Parameters]
|
118
|
+
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
119
|
+
* - +params+ (Hash, Array) strings used as XSLT parameters.
|
120
|
+
*
|
121
|
+
* [Returns] Nokogiri::XML::Document
|
122
|
+
*
|
123
|
+
* *Example* of basic transformation:
|
124
|
+
*
|
125
|
+
* xslt = <<~XSLT
|
126
|
+
* <xsl:stylesheet version="1.0"
|
127
|
+
* xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
128
|
+
*
|
129
|
+
* <xsl:param name="title"/>
|
130
|
+
*
|
131
|
+
* <xsl:template match="/">
|
132
|
+
* <html>
|
133
|
+
* <body>
|
134
|
+
* <h1><xsl:value-of select="$title"/></h1>
|
135
|
+
* <ol>
|
136
|
+
* <xsl:for-each select="staff/employee">
|
137
|
+
* <li><xsl:value-of select="employeeId"></li>
|
138
|
+
* </xsl:for-each>
|
139
|
+
* </ol>
|
140
|
+
* </body>
|
141
|
+
* </html>
|
142
|
+
* </xsl:stylesheet>
|
143
|
+
* XSLT
|
144
|
+
*
|
145
|
+
* xml = <<~XML
|
146
|
+
* <?xml version="1.0"?>
|
147
|
+
* <staff>
|
148
|
+
* <employee>
|
149
|
+
* <employeeId>EMP0001</employeeId>
|
150
|
+
* <position>Accountant</position>
|
151
|
+
* </employee>
|
152
|
+
* <employee>
|
153
|
+
* <employeeId>EMP0002</employeeId>
|
154
|
+
* <position>Developer</position>
|
155
|
+
* </employee>
|
156
|
+
* </staff>
|
157
|
+
* XML
|
158
|
+
*
|
159
|
+
* doc = Nokogiri::XML::Document.parse(xml)
|
160
|
+
* stylesheet = Nokogiri::XSLT.parse(xslt)
|
161
|
+
*
|
162
|
+
* ⚠ Note that the +h1+ element is empty because no param has been provided!
|
163
|
+
*
|
164
|
+
* stylesheet.transform(doc).to_xml
|
165
|
+
* # => "<html><body>\n" +
|
166
|
+
* # "<h1></h1>\n" +
|
167
|
+
* # "<ol>\n" +
|
168
|
+
* # "<li>EMP0001</li>\n" +
|
169
|
+
* # "<li>EMP0002</li>\n" +
|
170
|
+
* # "</ol>\n" +
|
171
|
+
* # "</body></html>\n"
|
172
|
+
*
|
173
|
+
* *Example* of using an input parameter hash:
|
174
|
+
*
|
175
|
+
* ⚠ The title is populated, but note how we need to quote-escape the value.
|
176
|
+
*
|
177
|
+
* stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml
|
178
|
+
* # => "<html><body>\n" +
|
179
|
+
* # "<h1>Employee List</h1>\n" +
|
180
|
+
* # "<ol>\n" +
|
181
|
+
* # "<li>EMP0001</li>\n" +
|
182
|
+
* # "<li>EMP0002</li>\n" +
|
183
|
+
* # "</ol>\n" +
|
184
|
+
* # "</body></html>\n"
|
185
|
+
*
|
186
|
+
* *Example* using the XSLT.quote_params helper method to safely quote-escape strings:
|
187
|
+
*
|
188
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml
|
189
|
+
* # => "<html><body>\n" +
|
190
|
+
* # "<h1>Aaron's List</h1>\n" +
|
191
|
+
* # "<ol>\n" +
|
192
|
+
* # "<li>EMP0001</li>\n" +
|
193
|
+
* # "<li>EMP0002</li>\n" +
|
194
|
+
* # "</ol>\n" +
|
195
|
+
* # "</body></html>\n"
|
196
|
+
*
|
197
|
+
* *Example* using an array of XSLT parameters
|
198
|
+
*
|
199
|
+
* You can also use an array if you want to.
|
112
200
|
*
|
113
|
-
*
|
114
|
-
*
|
115
|
-
*
|
201
|
+
* stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml
|
202
|
+
* # => "<html><body>\n" +
|
203
|
+
* # "<h1>Employee List</h1>\n" +
|
204
|
+
* # "<ol>\n" +
|
205
|
+
* # "<li>EMP0001</li>\n" +
|
206
|
+
* # "<li>EMP0002</li>\n" +
|
207
|
+
* # "</ol>\n" +
|
208
|
+
* # "</body></html>\n"
|
116
209
|
*
|
117
|
-
*
|
210
|
+
* Or pass an array to XSLT.quote_params:
|
118
211
|
*
|
119
|
-
*
|
120
|
-
*
|
121
|
-
*
|
212
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml
|
213
|
+
* # => "<html><body>\n" +
|
214
|
+
* # "<h1>Aaron's List</h1>\n" +
|
215
|
+
* # "<ol>\n" +
|
216
|
+
* # "<li>EMP0001</li>\n" +
|
217
|
+
* # "<li>EMP0002</li>\n" +
|
218
|
+
* # "</ol>\n" +
|
219
|
+
* # "</body></html>\n"
|
122
220
|
*
|
221
|
+
* See: Nokogiri::XSLT.quote_params
|
123
222
|
*/
|
124
223
|
static VALUE
|
125
224
|
transform(int argc, VALUE *argv, VALUE self)
|
@@ -150,7 +249,7 @@ transform(int argc, VALUE *argv, VALUE self)
|
|
150
249
|
Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
|
151
250
|
|
152
251
|
param_len = RARRAY_LEN(paramobj);
|
153
|
-
params =
|
252
|
+
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
154
253
|
for (j = 0 ; j < param_len ; j++) {
|
155
254
|
VALUE entry = rb_ary_entry(paramobj, j);
|
156
255
|
const char *ptr = StringValueCStr(entry);
|
@@ -163,7 +262,7 @@ transform(int argc, VALUE *argv, VALUE self)
|
|
163
262
|
xmlSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
|
164
263
|
|
165
264
|
result = xsltApplyStylesheet(wrapper->ss, xml, params);
|
166
|
-
|
265
|
+
ruby_xfree(params);
|
167
266
|
|
168
267
|
xsltSetGenericErrorFunc(NULL, NULL);
|
169
268
|
xmlSetGenericErrorFunc(NULL, NULL);
|
@@ -249,7 +348,7 @@ registr(VALUE self, VALUE uri, VALUE obj)
|
|
249
348
|
}
|
250
349
|
|
251
350
|
void
|
252
|
-
noko_init_xslt_stylesheet()
|
351
|
+
noko_init_xslt_stylesheet(void)
|
253
352
|
{
|
254
353
|
rb_define_singleton_method(mNokogiriXslt, "register", registr, 2);
|
255
354
|
rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,8 +13,18 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
|
+
# the generated files should be committed to SCM
|
18
|
+
ifneq ($(CI),true)
|
19
|
+
src/foreign_attrs.c: src/foreign_attrs.gperf
|
20
|
+
gperf -m100 -n $< | ./gperf-filter.sed > $@
|
21
|
+
|
22
|
+
src/%.c: src/%.gperf
|
23
|
+
gperf -m100 $< | ./gperf-filter.sed > $@
|
24
|
+
|
16
25
|
src/%.c: src/%.rl
|
17
26
|
ragel -F1 -o $@ $<
|
27
|
+
endif
|
18
28
|
|
19
29
|
build/src:
|
20
30
|
mkdir -p $@
|
data/gumbo-parser/src/error.c
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
#include <string.h>
|
22
22
|
#include "ascii.h"
|
23
23
|
#include "error.h"
|
24
|
-
#include "
|
24
|
+
#include "nokogiri_gumbo.h"
|
25
25
|
#include "macros.h"
|
26
26
|
#include "parser.h"
|
27
27
|
#include "string_buffer.h"
|
@@ -96,7 +96,7 @@ static void print_tag_stack (
|
|
96
96
|
if (i) {
|
97
97
|
print_message(output, ", ");
|
98
98
|
}
|
99
|
-
GumboTag tag = (GumboTag) error->tag_stack.data[i];
|
99
|
+
GumboTag tag = (GumboTag)(intptr_t) error->tag_stack.data[i];
|
100
100
|
print_message(output, "%s", gumbo_normalized_tagname(tag));
|
101
101
|
}
|
102
102
|
gumbo_string_buffer_append_codepoint('.', output);
|
data/gumbo-parser/src/error.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ANSI-C code produced by gperf version 3.1 */
|
2
|
-
/* Command-line: gperf -m100 -n
|
2
|
+
/* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
|
3
3
|
/* Computed positions: -k'2,8' */
|
4
|
-
/* Filtered by:
|
4
|
+
/* Filtered by: gperf-filter.sed */
|
5
5
|
|
6
6
|
#include "replacement.h"
|
7
7
|
#include "macros.h"
|
@@ -292,6 +292,7 @@ typedef enum {
|
|
292
292
|
GUMBO_TAG_TT,
|
293
293
|
GUMBO_TAG_RTC,
|
294
294
|
GUMBO_TAG_DIALOG,
|
295
|
+
GUMBO_TAG_SEARCH,
|
295
296
|
// Used for all tags that don't have special handling in HTML.
|
296
297
|
GUMBO_TAG_UNKNOWN,
|
297
298
|
// A marker value to indicate the end of the enum, for iterating over it.
|