nokogiri 1.18.0.rc1-x86_64-linux-gnu
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,421 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXsltStylesheet;
|
4
|
+
|
5
|
+
static void
|
6
|
+
mark(void *data)
|
7
|
+
{
|
8
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
9
|
+
rb_gc_mark(wrapper->func_instances);
|
10
|
+
}
|
11
|
+
|
12
|
+
static void
|
13
|
+
dealloc(void *data)
|
14
|
+
{
|
15
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
16
|
+
xsltStylesheetPtr doc = wrapper->ss;
|
17
|
+
xsltFreeStylesheet(doc);
|
18
|
+
ruby_xfree(wrapper);
|
19
|
+
}
|
20
|
+
|
21
|
+
static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
|
22
|
+
.wrap_struct_name = "nokogiriXsltStylesheetTuple",
|
23
|
+
.function = {
|
24
|
+
.dmark = mark,
|
25
|
+
.dfree = dealloc,
|
26
|
+
},
|
27
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
28
|
+
};
|
29
|
+
|
30
|
+
PRINTFLIKE_DECL(2, 3)
|
31
|
+
static void
|
32
|
+
xslt_generic_error_handler(void *ctx, const char *msg, ...)
|
33
|
+
{
|
34
|
+
VALUE message;
|
35
|
+
|
36
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
37
|
+
/* It is not currently possible to pass var args from native
|
38
|
+
functions to sulong, so we work around the issue here. */
|
39
|
+
message = rb_sprintf("xslt_generic_error_handler: %s", msg);
|
40
|
+
#else
|
41
|
+
va_list args;
|
42
|
+
va_start(args, msg);
|
43
|
+
message = rb_vsprintf(msg, args);
|
44
|
+
va_end(args);
|
45
|
+
#endif
|
46
|
+
|
47
|
+
rb_str_concat((VALUE)ctx, message);
|
48
|
+
}
|
49
|
+
|
50
|
+
VALUE
|
51
|
+
Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
52
|
+
{
|
53
|
+
VALUE self;
|
54
|
+
nokogiriXsltStylesheetTuple *wrapper;
|
55
|
+
|
56
|
+
self = TypedData_Make_Struct(
|
57
|
+
cNokogiriXsltStylesheet,
|
58
|
+
nokogiriXsltStylesheetTuple,
|
59
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
60
|
+
wrapper
|
61
|
+
);
|
62
|
+
|
63
|
+
ss->_private = (void *)self;
|
64
|
+
wrapper->ss = ss;
|
65
|
+
wrapper->func_instances = rb_ary_new();
|
66
|
+
|
67
|
+
return self;
|
68
|
+
}
|
69
|
+
|
70
|
+
/*
|
71
|
+
* call-seq:
|
72
|
+
* parse_stylesheet_doc(document)
|
73
|
+
*
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
80
|
+
*/
|
81
|
+
static VALUE
|
82
|
+
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
83
|
+
{
|
84
|
+
xmlDocPtr xml, xml_cpy;
|
85
|
+
VALUE errstr, exception;
|
86
|
+
xsltStylesheetPtr ss ;
|
87
|
+
|
88
|
+
xml = noko_xml_document_unwrap(xmldocobj);
|
89
|
+
|
90
|
+
errstr = rb_str_new(0, 0);
|
91
|
+
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
|
92
|
+
|
93
|
+
xml_cpy = xmlCopyDoc(xml, 1); /* 1 => recursive */
|
94
|
+
ss = xsltParseStylesheetDoc(xml_cpy);
|
95
|
+
|
96
|
+
xsltSetGenericErrorFunc(NULL, NULL);
|
97
|
+
|
98
|
+
if (!ss) {
|
99
|
+
xmlFreeDoc(xml_cpy);
|
100
|
+
exception = rb_exc_new3(rb_eRuntimeError, errstr);
|
101
|
+
rb_exc_raise(exception);
|
102
|
+
}
|
103
|
+
|
104
|
+
return Nokogiri_wrap_xslt_stylesheet(ss);
|
105
|
+
}
|
106
|
+
|
107
|
+
|
108
|
+
/*
|
109
|
+
* call-seq:
|
110
|
+
* serialize(document)
|
111
|
+
*
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
113
|
+
*/
|
114
|
+
static VALUE
|
115
|
+
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
116
|
+
{
|
117
|
+
xmlDocPtr xml ;
|
118
|
+
nokogiriXsltStylesheetTuple *wrapper;
|
119
|
+
xmlChar *doc_ptr ;
|
120
|
+
int doc_len ;
|
121
|
+
VALUE rval ;
|
122
|
+
|
123
|
+
xml = noko_xml_document_unwrap(xmlobj);
|
124
|
+
TypedData_Get_Struct(
|
125
|
+
self,
|
126
|
+
nokogiriXsltStylesheetTuple,
|
127
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
128
|
+
wrapper
|
129
|
+
);
|
130
|
+
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
|
131
|
+
rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
|
132
|
+
xmlFree(doc_ptr);
|
133
|
+
return rval ;
|
134
|
+
}
|
135
|
+
|
136
|
+
/*
|
137
|
+
* call-seq:
|
138
|
+
* transform(document)
|
139
|
+
* transform(document, params = {})
|
140
|
+
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
142
|
+
*
|
143
|
+
* [Parameters]
|
144
|
+
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
145
|
+
* - +params+ (Hash, Array) strings used as XSLT parameters.
|
146
|
+
*
|
147
|
+
* [Returns] Nokogiri::XML::Document
|
148
|
+
*
|
149
|
+
* *Example* of basic transformation:
|
150
|
+
*
|
151
|
+
* xslt = <<~XSLT
|
152
|
+
* <xsl:stylesheet version="1.0"
|
153
|
+
* xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
154
|
+
*
|
155
|
+
* <xsl:param name="title"/>
|
156
|
+
*
|
157
|
+
* <xsl:template match="/">
|
158
|
+
* <html>
|
159
|
+
* <body>
|
160
|
+
* <h1><xsl:value-of select="$title"/></h1>
|
161
|
+
* <ol>
|
162
|
+
* <xsl:for-each select="staff/employee">
|
163
|
+
* <li><xsl:value-of select="employeeId"></li>
|
164
|
+
* </xsl:for-each>
|
165
|
+
* </ol>
|
166
|
+
* </body>
|
167
|
+
* </html>
|
168
|
+
* </xsl:stylesheet>
|
169
|
+
* XSLT
|
170
|
+
*
|
171
|
+
* xml = <<~XML
|
172
|
+
* <?xml version="1.0"?>
|
173
|
+
* <staff>
|
174
|
+
* <employee>
|
175
|
+
* <employeeId>EMP0001</employeeId>
|
176
|
+
* <position>Accountant</position>
|
177
|
+
* </employee>
|
178
|
+
* <employee>
|
179
|
+
* <employeeId>EMP0002</employeeId>
|
180
|
+
* <position>Developer</position>
|
181
|
+
* </employee>
|
182
|
+
* </staff>
|
183
|
+
* XML
|
184
|
+
*
|
185
|
+
* doc = Nokogiri::XML::Document.parse(xml)
|
186
|
+
* stylesheet = Nokogiri::XSLT.parse(xslt)
|
187
|
+
*
|
188
|
+
* ⚠ Note that the +h1+ element is empty because no param has been provided!
|
189
|
+
*
|
190
|
+
* stylesheet.transform(doc).to_xml
|
191
|
+
* # => "<html><body>\n" +
|
192
|
+
* # "<h1></h1>\n" +
|
193
|
+
* # "<ol>\n" +
|
194
|
+
* # "<li>EMP0001</li>\n" +
|
195
|
+
* # "<li>EMP0002</li>\n" +
|
196
|
+
* # "</ol>\n" +
|
197
|
+
* # "</body></html>\n"
|
198
|
+
*
|
199
|
+
* *Example* of using an input parameter hash:
|
200
|
+
*
|
201
|
+
* ⚠ The title is populated, but note how we need to quote-escape the value.
|
202
|
+
*
|
203
|
+
* stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml
|
204
|
+
* # => "<html><body>\n" +
|
205
|
+
* # "<h1>Employee List</h1>\n" +
|
206
|
+
* # "<ol>\n" +
|
207
|
+
* # "<li>EMP0001</li>\n" +
|
208
|
+
* # "<li>EMP0002</li>\n" +
|
209
|
+
* # "</ol>\n" +
|
210
|
+
* # "</body></html>\n"
|
211
|
+
*
|
212
|
+
* *Example* using the XSLT.quote_params helper method to safely quote-escape strings:
|
213
|
+
*
|
214
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml
|
215
|
+
* # => "<html><body>\n" +
|
216
|
+
* # "<h1>Aaron's List</h1>\n" +
|
217
|
+
* # "<ol>\n" +
|
218
|
+
* # "<li>EMP0001</li>\n" +
|
219
|
+
* # "<li>EMP0002</li>\n" +
|
220
|
+
* # "</ol>\n" +
|
221
|
+
* # "</body></html>\n"
|
222
|
+
*
|
223
|
+
* *Example* using an array of XSLT parameters
|
224
|
+
*
|
225
|
+
* You can also use an array if you want to.
|
226
|
+
*
|
227
|
+
* stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml
|
228
|
+
* # => "<html><body>\n" +
|
229
|
+
* # "<h1>Employee List</h1>\n" +
|
230
|
+
* # "<ol>\n" +
|
231
|
+
* # "<li>EMP0001</li>\n" +
|
232
|
+
* # "<li>EMP0002</li>\n" +
|
233
|
+
* # "</ol>\n" +
|
234
|
+
* # "</body></html>\n"
|
235
|
+
*
|
236
|
+
* Or pass an array to XSLT.quote_params:
|
237
|
+
*
|
238
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml
|
239
|
+
* # => "<html><body>\n" +
|
240
|
+
* # "<h1>Aaron's List</h1>\n" +
|
241
|
+
* # "<ol>\n" +
|
242
|
+
* # "<li>EMP0001</li>\n" +
|
243
|
+
* # "<li>EMP0002</li>\n" +
|
244
|
+
* # "</ol>\n" +
|
245
|
+
* # "</body></html>\n"
|
246
|
+
*
|
247
|
+
* See: Nokogiri::XSLT.quote_params
|
248
|
+
*/
|
249
|
+
static VALUE
|
250
|
+
rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
|
251
|
+
{
|
252
|
+
VALUE rb_document, rb_param, rb_error_str;
|
253
|
+
xmlDocPtr c_document ;
|
254
|
+
xmlDocPtr c_result_document ;
|
255
|
+
nokogiriXsltStylesheetTuple *wrapper;
|
256
|
+
const char **params ;
|
257
|
+
long param_len, j ;
|
258
|
+
int parse_error_occurred ;
|
259
|
+
int defensive_copy_p = 0;
|
260
|
+
|
261
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_param);
|
262
|
+
if (NIL_P(rb_param)) { rb_param = rb_ary_new2(0L) ; }
|
263
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
264
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
|
265
|
+
}
|
266
|
+
|
267
|
+
/* handle hashes as arguments. */
|
268
|
+
if (T_HASH == TYPE(rb_param)) {
|
269
|
+
rb_param = rb_funcall(rb_param, rb_intern("to_a"), 0);
|
270
|
+
rb_param = rb_funcall(rb_param, rb_intern("flatten"), 0);
|
271
|
+
}
|
272
|
+
|
273
|
+
Check_Type(rb_param, T_ARRAY);
|
274
|
+
|
275
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
276
|
+
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
|
277
|
+
|
278
|
+
param_len = RARRAY_LEN(rb_param);
|
279
|
+
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
280
|
+
for (j = 0 ; j < param_len ; j++) {
|
281
|
+
VALUE entry = rb_ary_entry(rb_param, j);
|
282
|
+
const char *ptr = StringValueCStr(entry);
|
283
|
+
params[j] = ptr;
|
284
|
+
}
|
285
|
+
params[param_len] = 0 ;
|
286
|
+
|
287
|
+
xsltTransformContextPtr c_transform_context = xsltNewTransformContext(wrapper->ss, c_document);
|
288
|
+
if (xsltNeedElemSpaceHandling(c_transform_context) &&
|
289
|
+
noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
|
290
|
+
// see https://github.com/sparklemotion/nokogiri/issues/2800
|
291
|
+
c_document = xmlCopyDoc(c_document, 1);
|
292
|
+
defensive_copy_p = 1;
|
293
|
+
}
|
294
|
+
xsltFreeTransformContext(c_transform_context);
|
295
|
+
|
296
|
+
rb_error_str = rb_str_new(0, 0);
|
297
|
+
xsltSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
298
|
+
xmlSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
299
|
+
|
300
|
+
c_result_document = xsltApplyStylesheet(wrapper->ss, c_document, params);
|
301
|
+
|
302
|
+
ruby_xfree(params);
|
303
|
+
if (defensive_copy_p) {
|
304
|
+
xmlFreeDoc(c_document);
|
305
|
+
c_document = NULL;
|
306
|
+
}
|
307
|
+
|
308
|
+
xsltSetGenericErrorFunc(NULL, NULL);
|
309
|
+
xmlSetGenericErrorFunc(NULL, NULL);
|
310
|
+
|
311
|
+
parse_error_occurred = (Qfalse == rb_funcall(rb_error_str, rb_intern("empty?"), 0));
|
312
|
+
|
313
|
+
if (parse_error_occurred) {
|
314
|
+
rb_exc_raise(rb_exc_new3(rb_eRuntimeError, rb_error_str));
|
315
|
+
}
|
316
|
+
|
317
|
+
return noko_xml_document_wrap((VALUE)0, c_result_document) ;
|
318
|
+
}
|
319
|
+
|
320
|
+
static void
|
321
|
+
method_caller(xmlXPathParserContextPtr ctxt, int nargs)
|
322
|
+
{
|
323
|
+
VALUE handler;
|
324
|
+
const char *function_name;
|
325
|
+
xsltTransformContextPtr transform;
|
326
|
+
const xmlChar *functionURI;
|
327
|
+
|
328
|
+
transform = xsltXPathGetTransformContext(ctxt);
|
329
|
+
functionURI = ctxt->context->functionURI;
|
330
|
+
handler = (VALUE)xsltGetExtData(transform, functionURI);
|
331
|
+
function_name = (const char *)(ctxt->context->function);
|
332
|
+
|
333
|
+
Nokogiri_marshal_xpath_funcall_and_return_values(
|
334
|
+
ctxt,
|
335
|
+
nargs,
|
336
|
+
handler,
|
337
|
+
(const char *)function_name
|
338
|
+
);
|
339
|
+
}
|
340
|
+
|
341
|
+
static void *
|
342
|
+
initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
343
|
+
{
|
344
|
+
VALUE modules = rb_iv_get(mNokogiriXslt, "@modules");
|
345
|
+
VALUE obj = rb_hash_aref(modules, rb_str_new2((const char *)uri));
|
346
|
+
VALUE args = { Qfalse };
|
347
|
+
VALUE methods = rb_funcall(obj, rb_intern("instance_methods"), 1, args);
|
348
|
+
VALUE inst;
|
349
|
+
nokogiriXsltStylesheetTuple *wrapper;
|
350
|
+
int i;
|
351
|
+
|
352
|
+
for (i = 0; i < RARRAY_LEN(methods); i++) {
|
353
|
+
VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
|
354
|
+
xsltRegisterExtFunction(
|
355
|
+
ctxt,
|
356
|
+
(unsigned char *)StringValueCStr(method_name),
|
357
|
+
uri,
|
358
|
+
method_caller
|
359
|
+
);
|
360
|
+
}
|
361
|
+
|
362
|
+
TypedData_Get_Struct(
|
363
|
+
(VALUE)ctxt->style->_private,
|
364
|
+
nokogiriXsltStylesheetTuple,
|
365
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
366
|
+
wrapper
|
367
|
+
);
|
368
|
+
inst = rb_class_new_instance(0, NULL, obj);
|
369
|
+
rb_ary_push(wrapper->func_instances, inst);
|
370
|
+
|
371
|
+
return (void *)inst;
|
372
|
+
}
|
373
|
+
|
374
|
+
static void
|
375
|
+
shutdownFunc(xsltTransformContextPtr ctxt,
|
376
|
+
const xmlChar *uri, void *data)
|
377
|
+
{
|
378
|
+
nokogiriXsltStylesheetTuple *wrapper;
|
379
|
+
|
380
|
+
TypedData_Get_Struct(
|
381
|
+
(VALUE)ctxt->style->_private,
|
382
|
+
nokogiriXsltStylesheetTuple,
|
383
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
384
|
+
wrapper
|
385
|
+
);
|
386
|
+
|
387
|
+
rb_ary_clear(wrapper->func_instances);
|
388
|
+
}
|
389
|
+
|
390
|
+
/* docstring is in lib/nokogiri/xslt.rb */
|
391
|
+
static VALUE
|
392
|
+
rb_xslt_s_register(VALUE self, VALUE uri, VALUE obj)
|
393
|
+
{
|
394
|
+
VALUE modules = rb_iv_get(self, "@modules");
|
395
|
+
if (NIL_P(modules)) {
|
396
|
+
rb_raise(rb_eRuntimeError, "internal error: @modules not set");
|
397
|
+
}
|
398
|
+
|
399
|
+
rb_hash_aset(modules, uri, obj);
|
400
|
+
xsltRegisterExtModule(
|
401
|
+
(unsigned char *)StringValueCStr(uri),
|
402
|
+
initFunc,
|
403
|
+
shutdownFunc
|
404
|
+
);
|
405
|
+
return self;
|
406
|
+
}
|
407
|
+
|
408
|
+
void
|
409
|
+
noko_init_xslt_stylesheet(void)
|
410
|
+
{
|
411
|
+
rb_define_singleton_method(mNokogiriXslt, "register", rb_xslt_s_register, 2);
|
412
|
+
rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
|
413
|
+
|
414
|
+
cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
|
415
|
+
|
416
|
+
rb_undef_alloc_func(cNokogiriXsltStylesheet);
|
417
|
+
|
418
|
+
rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
|
419
|
+
rb_define_method(cNokogiriXsltStylesheet, "serialize", rb_xslt_stylesheet_serialize, 1);
|
420
|
+
rb_define_method(cNokogiriXsltStylesheet, "transform", rb_xslt_stylesheet_transform, -1);
|
421
|
+
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
## Gumbo 0.10.1 (2015-04-30)
|
2
|
+
|
3
|
+
Same as 0.10.0, but with the version number bumped because the last version-number commit to v0.9.4 makes GitHub think that v0.9.4 is the latest version and so it's not highlighted on the webpage.
|
4
|
+
|
5
|
+
## Gumbo 0.10.0 (2015-04-30)
|
6
|
+
|
7
|
+
* Full support for `<template>` tag (kevinhendricks, nostrademons).
|
8
|
+
* Some fixes for `<rtc>`/`<rt>` handling (kevinhendricks, vmg).
|
9
|
+
* All html5lib-trunk tests pass now! (kevinhendricks, vmg, nostrademons)
|
10
|
+
* Support for fragment parsing (vmg)
|
11
|
+
* A couple additional example programs (kevinhendricks)
|
12
|
+
* Performance improvements totaling an estimated 30-40% total improvement (vmg, nostrademons).
|
13
|
+
|
14
|
+
## Gumbo 0.9.4 (2015-04-30)
|
15
|
+
|
16
|
+
* Additional Visual Studio fixes (lowjoel, nostrademons)
|
17
|
+
* Fixed some unused variable warnings.
|
18
|
+
* Fix for glibtoolize vs. libtoolize build errors on Mac.
|
19
|
+
* Fixed `CDATA` end tag handling.
|
20
|
+
|
21
|
+
## Gumbo 0.9.3 (2015-02-17)
|
22
|
+
|
23
|
+
* Bugfix for `Æ` entities (rgrove)
|
24
|
+
* Fix `CDATA` handling; `CDATA` sections now generate a `GUMBO_NODE_CDATA` node rather
|
25
|
+
than plain text.
|
26
|
+
* Fix `get_title example` to handle whitespace nodes (gsnedders)
|
27
|
+
* Visual Studio compilation fixes (fishioon)
|
28
|
+
* Take the namespace into account when determining whether a node matches a
|
29
|
+
certain tag (aroben)
|
30
|
+
* Replace the varargs tag functions with a tagset bytevector, for a 20-30%
|
31
|
+
speedup in overall parse time (kevinhendricks, vmg)
|
32
|
+
* Add MacOS X support to Travis CI, and fix the deployment/DLL issues this
|
33
|
+
uncovered (nostrademons, kevinhendricks, vmg)
|
34
|
+
|
35
|
+
## Gumbo 0.9.2 (2014-09-21)
|
36
|
+
|
37
|
+
* Performance improvements: Ragel-based char ref decoder and DFA-based UTF8
|
38
|
+
decoder, totaling speedups of up to 300%.
|
39
|
+
* Added benchmarking program and some sample data.
|
40
|
+
* Fixed a compiler error under Visual Studio.
|
41
|
+
* Fix an error in the ctypes bindings that could lead to memory corruption in
|
42
|
+
the Python bindings.
|
43
|
+
* Fix duplicate attributes when parsing `<isindex>` tags.
|
44
|
+
* Don't leave semicolons behind when consuming entity references (rgrove)
|
45
|
+
* Internally rename some functions in preparation for an amalgamation file
|
46
|
+
(jdeng)
|
47
|
+
* Add proper cflags for gyp builds (skabbes)
|
48
|
+
|
49
|
+
## Gumbo 0.9.1 (2014-08-07)
|
50
|
+
|
51
|
+
* First version listed on PyPi.
|
52
|
+
* Autotools files excluded from GitHub and generated via autogen.sh. (endgame)
|
53
|
+
* Numerous compiler warnings fixed. (bnoordhuis, craigbarnes)
|
54
|
+
* Google security audit passed.
|
55
|
+
* Gyp support (tfarina)
|
56
|
+
* Naming convention for structs changed to avoid C reserved words.
|
57
|
+
* Fix several integer and buffer overflows (Maxime2)
|
58
|
+
* Some Visual Studio compiler support (bugparty)
|
59
|
+
* Python3 compatibility for the ctypes bindings.
|
60
|
+
|
61
|
+
## Gumbo 0.9.0 (2013-08-13)
|
62
|
+
|
63
|
+
* Initial release open-sourced by Google.
|
@@ -0,0 +1,129 @@
|
|
1
|
+
.PHONY: all clean check coverage
|
2
|
+
|
3
|
+
gumbo_objs := $(patsubst %.c,build/%.o,$(wildcard src/*.c))
|
4
|
+
test_objs := $(patsubst %.cc,build/%.o,$(wildcard test/*.cc))
|
5
|
+
gtest_lib := googletest/make/gtest_main.a
|
6
|
+
|
7
|
+
# make SANITIZEFLAGS='-fsanitize=undefined -fsanitize=address'
|
8
|
+
SANITIZEFLAGS :=
|
9
|
+
CPPFLAGS := -Isrc
|
10
|
+
CFLAGS := -std=c99 -Os -Wall
|
11
|
+
CXXFLAGS := -isystem googletest/include -std=c++11 -Os -Wall
|
12
|
+
LDFLAGS := -pthread
|
13
|
+
|
14
|
+
all: check
|
15
|
+
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
33
|
+
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
34
|
+
# the generated files should be committed to SCM
|
35
|
+
ifneq ($(CI),true)
|
36
|
+
src/foreign_attrs.c: src/foreign_attrs.gperf
|
37
|
+
gperf -m100 -n $< | ./gperf-filter.sed > $@
|
38
|
+
|
39
|
+
src/%.c: src/%.gperf
|
40
|
+
gperf -m100 $< | ./gperf-filter.sed > $@
|
41
|
+
|
42
|
+
src/%.c: src/%.rl
|
43
|
+
ragel -F1 -o $@ $<
|
44
|
+
endif
|
45
|
+
|
46
|
+
build/src:
|
47
|
+
mkdir -p $@
|
48
|
+
|
49
|
+
build/test:
|
50
|
+
mkdir -p $@
|
51
|
+
|
52
|
+
build/src/%.o: src/%.c build/src/flags | build/src
|
53
|
+
$(CC) -MMD $(CPPFLAGS) $(CFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
|
54
|
+
|
55
|
+
build/test/%.o: test/%.cc build/test/flags | build/test
|
56
|
+
$(CXX) -MMD $(CPPFLAGS) $(CXXFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
|
57
|
+
|
58
|
+
build/run_tests: $(gumbo_objs) $(test_objs) $(gtest_lib)
|
59
|
+
$(CXX) -o $@ $+ $(LDFLAGS) $(SANITIZEFLAGS)
|
60
|
+
|
61
|
+
check: build/run_tests
|
62
|
+
./build/run_tests
|
63
|
+
|
64
|
+
coverage:
|
65
|
+
$(RM) build/{src,test}/*.gcda
|
66
|
+
$(RM) build/*.info
|
67
|
+
$(MAKE) CPPFLAGS='-Isrc -DNDEBUG=1' \
|
68
|
+
CFLAGS='-std=c99 --coverage -g -O0' \
|
69
|
+
CXXFLAGS='-isystem googletest/include -std=c++11 --coverage -g -O0' \
|
70
|
+
LDFLAGS='--coverage' \
|
71
|
+
build/run_tests
|
72
|
+
lcov --no-external \
|
73
|
+
--initial \
|
74
|
+
--capture \
|
75
|
+
--base-directory . \
|
76
|
+
--directory build \
|
77
|
+
--output-file build/coverage-pre.info
|
78
|
+
awk -F '[:,]' \
|
79
|
+
'/^SF:/ { delete defs } /^FN:/ { defs[$$2]=1 } /^DA:/ { if ($$3 == 0 && $$2 in defs) next } { print }' \
|
80
|
+
build/coverage-pre.info > build/coverage-initial.info
|
81
|
+
./build/run_tests
|
82
|
+
lcov --no-external \
|
83
|
+
--capture \
|
84
|
+
--base-directory . \
|
85
|
+
--directory build \
|
86
|
+
--rc lcov_branch_coverage=1 \
|
87
|
+
--output-file build/coverage-test.info
|
88
|
+
lcov --add-tracefile build/coverage-initial.info \
|
89
|
+
--add-tracefile build/coverage-test.info \
|
90
|
+
--rc lcov_branch_coverage=1 \
|
91
|
+
--output-file build/coverage.info
|
92
|
+
lcov --remove build/coverage.info '$(CURDIR)/googletest/*' \
|
93
|
+
--rc lcov_branch_coverage=1 \
|
94
|
+
--output-file build/coverage.info
|
95
|
+
genhtml --branch-coverage \
|
96
|
+
--output-directory build/coverage \
|
97
|
+
build/coverage.info
|
98
|
+
|
99
|
+
clean:
|
100
|
+
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
102
|
+
|
103
|
+
build/src/flags: | build/src
|
104
|
+
@echo 'old_CC := $(CC)' > $@
|
105
|
+
@echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
|
106
|
+
@echo 'old_CFLAGS := $(CFLAGS)' >>$@
|
107
|
+
@echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
|
108
|
+
@echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
|
109
|
+
|
110
|
+
build/test/flags: | build/test
|
111
|
+
@echo 'old_CXX := $(CXX)' > $@
|
112
|
+
@echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
|
113
|
+
@echo 'old_CXXFLAGS := $(CXXFLAGS)' >> $@
|
114
|
+
@echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
|
115
|
+
@echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
|
116
|
+
|
117
|
+
ifeq (,$(filter clean coverage,$(MAKECMDGOALS)))
|
118
|
+
# Ensure that the flags are up to date.
|
119
|
+
-include build/src/flags build/test/flags
|
120
|
+
ifneq ($(old_CC) | $(old_CPPFLAGS) | $(old_CFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CC) | $(CPPFLAGS) | $(CFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
|
121
|
+
.PHONY: build/src/flags
|
122
|
+
endif
|
123
|
+
ifneq ($(old_CXX) | $(old_CPPFLAGS) | $(old_CXXFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CXX) | $(CPPFLAGS) | $(CXXFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
|
124
|
+
.PHONY: build/test/flags
|
125
|
+
endif
|
126
|
+
|
127
|
+
# Include dependencies.
|
128
|
+
-include $(test_objs:.o=.d) $(gumbo_objs:.o=.d)
|
129
|
+
endif
|
data/gumbo-parser/THANKS
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Gumbo HTML parser THANKS file
|
2
|
+
|
3
|
+
Gumbo was originally written by Jonathan Tang, but many people helped out through suggestions, question-answering, code reviews, bugfixes, and organizational support. Here is a list of these people. Help me keep it complete and exempt of errors.
|
4
|
+
|
5
|
+
Adam Barth
|
6
|
+
Adam Roben
|
7
|
+
Ben Noordhuis
|
8
|
+
Bowen Han
|
9
|
+
Constantinos Michael
|
10
|
+
Craig Barnes
|
11
|
+
Geoffrey Sneddon
|
12
|
+
Ian Hickson
|
13
|
+
Jack Deng
|
14
|
+
Joel Low
|
15
|
+
Jonathan Shneier
|
16
|
+
Kevin Hendricks
|
17
|
+
Mason Tang
|
18
|
+
Maxim Zakharov
|
19
|
+
Michal Zalewski
|
20
|
+
Neal Norwitz
|
21
|
+
Othar Hansson
|
22
|
+
Ryan Grove
|
23
|
+
Stefan Haustein
|
24
|
+
Steffen Meschkat
|
25
|
+
Steven Kabbes
|
26
|
+
Thiago Farina
|
27
|
+
Vicent Marti
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|