nokogiri 1.18.0.rc1-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlRelaxNG;
|
4
|
+
|
5
|
+
static void
|
6
|
+
_noko_xml_relax_ng_deallocate(void *data)
|
7
|
+
{
|
8
|
+
xmlRelaxNGPtr schema = data;
|
9
|
+
xmlRelaxNGFree(schema);
|
10
|
+
}
|
11
|
+
|
12
|
+
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
+
.wrap_struct_name = "xmlRelaxNG",
|
14
|
+
.function = {
|
15
|
+
.dfree = _noko_xml_relax_ng_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
20
|
+
static VALUE
|
21
|
+
noko_xml_relax_ng__validate_document(VALUE self, VALUE document)
|
22
|
+
{
|
23
|
+
xmlDocPtr doc;
|
24
|
+
xmlRelaxNGPtr schema;
|
25
|
+
VALUE errors;
|
26
|
+
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
27
|
+
|
28
|
+
TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
|
29
|
+
doc = noko_xml_document_unwrap(document);
|
30
|
+
|
31
|
+
errors = rb_ary_new();
|
32
|
+
|
33
|
+
valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
34
|
+
|
35
|
+
if (NULL == valid_ctxt) {
|
36
|
+
/* we have a problem */
|
37
|
+
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
38
|
+
}
|
39
|
+
|
40
|
+
xmlRelaxNGSetValidStructuredErrors(
|
41
|
+
valid_ctxt,
|
42
|
+
noko__error_array_pusher,
|
43
|
+
(void *)errors
|
44
|
+
);
|
45
|
+
|
46
|
+
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
47
|
+
|
48
|
+
xmlRelaxNGFreeValidCtxt(valid_ctxt);
|
49
|
+
|
50
|
+
return errors;
|
51
|
+
}
|
52
|
+
|
53
|
+
static VALUE
|
54
|
+
_noko_xml_relax_ng_parse_schema(
|
55
|
+
VALUE rb_class,
|
56
|
+
xmlRelaxNGParserCtxtPtr c_parser_context,
|
57
|
+
VALUE rb_parse_options
|
58
|
+
)
|
59
|
+
{
|
60
|
+
VALUE rb_errors;
|
61
|
+
VALUE rb_schema;
|
62
|
+
xmlRelaxNGPtr c_schema;
|
63
|
+
libxmlStructuredErrorHandlerState handler_state;
|
64
|
+
|
65
|
+
if (NIL_P(rb_parse_options)) {
|
66
|
+
rb_parse_options = rb_const_get_at(
|
67
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
68
|
+
rb_intern("DEFAULT_SCHEMA")
|
69
|
+
);
|
70
|
+
}
|
71
|
+
|
72
|
+
rb_errors = rb_ary_new();
|
73
|
+
|
74
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
75
|
+
xmlRelaxNGSetParserStructuredErrors(
|
76
|
+
c_parser_context,
|
77
|
+
noko__error_array_pusher,
|
78
|
+
(void *)rb_errors
|
79
|
+
);
|
80
|
+
|
81
|
+
c_schema = xmlRelaxNGParse(c_parser_context);
|
82
|
+
|
83
|
+
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
84
|
+
noko__structured_error_func_restore(&handler_state);
|
85
|
+
|
86
|
+
if (NULL == c_schema) {
|
87
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
88
|
+
|
89
|
+
if (RB_TEST(exception)) {
|
90
|
+
rb_exc_raise(exception);
|
91
|
+
} else {
|
92
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
rb_schema = TypedData_Wrap_Struct(rb_class, &xml_relax_ng_type, c_schema);
|
97
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
98
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
99
|
+
|
100
|
+
return rb_schema;
|
101
|
+
}
|
102
|
+
|
103
|
+
/*
|
104
|
+
* :call-seq:
|
105
|
+
* from_document(document) → Nokogiri::XML::RelaxNG
|
106
|
+
* from_document(document, parse_options) → Nokogiri::XML::RelaxNG
|
107
|
+
*
|
108
|
+
* Parse a RELAX NG schema definition from a Document to create a new Nokogiri::XML::RelaxNG.
|
109
|
+
*
|
110
|
+
* [Parameters]
|
111
|
+
* - +document+ (XML::Document) A document containing the RELAX NG schema definition
|
112
|
+
* - +parse_options+ (Nokogiri::XML::ParseOptions)
|
113
|
+
* Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused
|
114
|
+
*
|
115
|
+
* [Returns] Nokogiri::XML::RelaxNG
|
116
|
+
*
|
117
|
+
* ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for
|
118
|
+
* future functionality.
|
119
|
+
*/
|
120
|
+
static VALUE
|
121
|
+
noko_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE rb_class)
|
122
|
+
{
|
123
|
+
/* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
|
124
|
+
* preferred entry point, and this can become a private method */
|
125
|
+
VALUE rb_document;
|
126
|
+
VALUE rb_parse_options;
|
127
|
+
xmlDocPtr c_document;
|
128
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
129
|
+
|
130
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
131
|
+
|
132
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
133
|
+
c_document = c_document->doc; /* In case someone passes us a node. ugh. */
|
134
|
+
|
135
|
+
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
136
|
+
|
137
|
+
return _noko_xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options);
|
138
|
+
}
|
139
|
+
|
140
|
+
void
|
141
|
+
noko_init_xml_relax_ng(void)
|
142
|
+
{
|
143
|
+
assert(cNokogiriXmlSchema);
|
144
|
+
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
145
|
+
|
146
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1);
|
147
|
+
|
148
|
+
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1);
|
149
|
+
}
|
@@ -0,0 +1,403 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlSaxParser ;
|
4
|
+
|
5
|
+
static ID id_start_document;
|
6
|
+
static ID id_end_document;
|
7
|
+
static ID id_start_element;
|
8
|
+
static ID id_end_element;
|
9
|
+
static ID id_start_element_namespace;
|
10
|
+
static ID id_end_element_namespace;
|
11
|
+
static ID id_comment;
|
12
|
+
static ID id_characters;
|
13
|
+
static ID id_xmldecl;
|
14
|
+
static ID id_error;
|
15
|
+
static ID id_warning;
|
16
|
+
static ID id_cdata_block;
|
17
|
+
static ID id_processing_instruction;
|
18
|
+
static ID id_reference;
|
19
|
+
|
20
|
+
static size_t
|
21
|
+
xml_sax_parser_memsize(const void *data)
|
22
|
+
{
|
23
|
+
return sizeof(xmlSAXHandler);
|
24
|
+
}
|
25
|
+
|
26
|
+
/* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
|
27
|
+
static const rb_data_type_t xml_sax_parser_type = {
|
28
|
+
.wrap_struct_name = "xmlSAXHandler",
|
29
|
+
.function = {
|
30
|
+
.dfree = RUBY_TYPED_DEFAULT_FREE,
|
31
|
+
.dsize = xml_sax_parser_memsize
|
32
|
+
},
|
33
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
34
|
+
};
|
35
|
+
|
36
|
+
static void
|
37
|
+
noko_xml_sax_parser_start_document_callback(void *ctx)
|
38
|
+
{
|
39
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
40
|
+
VALUE self = (VALUE)ctxt->_private;
|
41
|
+
VALUE doc = rb_iv_get(self, "@document");
|
42
|
+
|
43
|
+
xmlSAX2StartDocument(ctx);
|
44
|
+
|
45
|
+
if (ctxt->standalone != -1) { /* -1 means there was no declaration */
|
46
|
+
VALUE encoding = Qnil ;
|
47
|
+
VALUE standalone = Qnil;
|
48
|
+
VALUE version;
|
49
|
+
|
50
|
+
if (ctxt->encoding) {
|
51
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
|
52
|
+
} else if (ctxt->input && ctxt->input->encoding) { // unnecessary after v2.12.0 / gnome/libxml2@ec7be506
|
53
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
|
54
|
+
}
|
55
|
+
|
56
|
+
version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
|
57
|
+
|
58
|
+
/* TODO try using xmlSAX2IsStandalone */
|
59
|
+
switch (ctxt->standalone) {
|
60
|
+
case 0:
|
61
|
+
standalone = NOKOGIRI_STR_NEW2("no");
|
62
|
+
break;
|
63
|
+
case 1:
|
64
|
+
standalone = NOKOGIRI_STR_NEW2("yes");
|
65
|
+
break;
|
66
|
+
}
|
67
|
+
|
68
|
+
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
|
69
|
+
}
|
70
|
+
|
71
|
+
rb_funcall(doc, id_start_document, 0);
|
72
|
+
}
|
73
|
+
|
74
|
+
static void
|
75
|
+
noko_xml_sax_parser_end_document_callback(void *ctx)
|
76
|
+
{
|
77
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
78
|
+
VALUE self = (VALUE)ctxt->_private;
|
79
|
+
VALUE doc = rb_iv_get(self, "@document");
|
80
|
+
|
81
|
+
rb_funcall(doc, id_end_document, 0);
|
82
|
+
}
|
83
|
+
|
84
|
+
static void
|
85
|
+
noko_xml_sax_parser_start_element_callback(void *ctx, const xmlChar *name, const xmlChar **atts)
|
86
|
+
{
|
87
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
88
|
+
VALUE self = (VALUE)ctxt->_private;
|
89
|
+
VALUE doc = rb_iv_get(self, "@document");
|
90
|
+
|
91
|
+
VALUE attributes = rb_ary_new();
|
92
|
+
const xmlChar *attr;
|
93
|
+
int i = 0;
|
94
|
+
if (atts) {
|
95
|
+
while ((attr = atts[i]) != NULL) {
|
96
|
+
const xmlChar *val = atts[i + 1];
|
97
|
+
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
|
98
|
+
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
|
99
|
+
i += 2;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
rb_funcall(doc,
|
104
|
+
id_start_element,
|
105
|
+
2,
|
106
|
+
NOKOGIRI_STR_NEW2(name),
|
107
|
+
attributes
|
108
|
+
);
|
109
|
+
}
|
110
|
+
|
111
|
+
static void
|
112
|
+
noko_xml_sax_parser_end_element_callback(void *ctx, const xmlChar *name)
|
113
|
+
{
|
114
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
115
|
+
VALUE self = (VALUE)ctxt->_private;
|
116
|
+
VALUE doc = rb_iv_get(self, "@document");
|
117
|
+
|
118
|
+
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
119
|
+
}
|
120
|
+
|
121
|
+
static VALUE
|
122
|
+
xml_sax_parser_marshal_attributes(int attributes_len, const xmlChar **c_attributes)
|
123
|
+
{
|
124
|
+
VALUE rb_array = rb_ary_new2((long)attributes_len);
|
125
|
+
VALUE cNokogiriXmlSaxParserAttribute;
|
126
|
+
|
127
|
+
cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute"));
|
128
|
+
if (c_attributes) {
|
129
|
+
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
130
|
+
int i;
|
131
|
+
for (i = 0; i < attributes_len * 5; i += 5) {
|
132
|
+
VALUE rb_constructor_args[4], rb_attribute;
|
133
|
+
|
134
|
+
rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */
|
135
|
+
rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */
|
136
|
+
rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */
|
137
|
+
|
138
|
+
/* value */
|
139
|
+
rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3],
|
140
|
+
(c_attributes[i + 4] - c_attributes[i + 3]));
|
141
|
+
|
142
|
+
rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute);
|
143
|
+
rb_ary_push(rb_array, rb_attribute);
|
144
|
+
}
|
145
|
+
}
|
146
|
+
|
147
|
+
return rb_array;
|
148
|
+
}
|
149
|
+
|
150
|
+
static void
|
151
|
+
noko_xml_sax_parser_start_element_ns_callback(
|
152
|
+
void *ctx,
|
153
|
+
const xmlChar *localname,
|
154
|
+
const xmlChar *prefix,
|
155
|
+
const xmlChar *uri,
|
156
|
+
int nb_namespaces,
|
157
|
+
const xmlChar **namespaces,
|
158
|
+
int nb_attributes,
|
159
|
+
int nb_defaulted,
|
160
|
+
const xmlChar **attributes)
|
161
|
+
{
|
162
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
163
|
+
VALUE self = (VALUE)ctxt->_private;
|
164
|
+
VALUE doc = rb_iv_get(self, "@document");
|
165
|
+
|
166
|
+
VALUE attribute_ary = xml_sax_parser_marshal_attributes(nb_attributes, attributes);
|
167
|
+
|
168
|
+
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
169
|
+
|
170
|
+
if (namespaces) {
|
171
|
+
int i;
|
172
|
+
for (i = 0; i < nb_namespaces * 2; i += 2) {
|
173
|
+
rb_ary_push(ns_list,
|
174
|
+
rb_ary_new3((long)2,
|
175
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
176
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
177
|
+
)
|
178
|
+
);
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
rb_funcall(doc,
|
183
|
+
id_start_element_namespace,
|
184
|
+
5,
|
185
|
+
NOKOGIRI_STR_NEW2(localname),
|
186
|
+
attribute_ary,
|
187
|
+
RBSTR_OR_QNIL(prefix),
|
188
|
+
RBSTR_OR_QNIL(uri),
|
189
|
+
ns_list
|
190
|
+
);
|
191
|
+
}
|
192
|
+
|
193
|
+
/**
|
194
|
+
* end_element_ns was borrowed heavily from libxml-ruby.
|
195
|
+
*/
|
196
|
+
static void
|
197
|
+
noko_xml_sax_parser_end_element_ns_callback(
|
198
|
+
void *ctx,
|
199
|
+
const xmlChar *localname,
|
200
|
+
const xmlChar *prefix,
|
201
|
+
const xmlChar *uri)
|
202
|
+
{
|
203
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
204
|
+
VALUE self = (VALUE)ctxt->_private;
|
205
|
+
VALUE doc = rb_iv_get(self, "@document");
|
206
|
+
|
207
|
+
rb_funcall(doc, id_end_element_namespace, 3,
|
208
|
+
NOKOGIRI_STR_NEW2(localname),
|
209
|
+
RBSTR_OR_QNIL(prefix),
|
210
|
+
RBSTR_OR_QNIL(uri)
|
211
|
+
);
|
212
|
+
}
|
213
|
+
|
214
|
+
static void
|
215
|
+
noko_xml_sax_parser_characters_callback(void *ctx, const xmlChar *ch, int len)
|
216
|
+
{
|
217
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
218
|
+
VALUE self = (VALUE)ctxt->_private;
|
219
|
+
VALUE doc = rb_iv_get(self, "@document");
|
220
|
+
|
221
|
+
VALUE str = NOKOGIRI_STR_NEW(ch, len);
|
222
|
+
rb_funcall(doc, id_characters, 1, str);
|
223
|
+
}
|
224
|
+
|
225
|
+
static void
|
226
|
+
noko_xml_sax_parser_comment_callback(void *ctx, const xmlChar *value)
|
227
|
+
{
|
228
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
229
|
+
VALUE self = (VALUE)ctxt->_private;
|
230
|
+
VALUE doc = rb_iv_get(self, "@document");
|
231
|
+
|
232
|
+
VALUE str = NOKOGIRI_STR_NEW2(value);
|
233
|
+
rb_funcall(doc, id_comment, 1, str);
|
234
|
+
}
|
235
|
+
|
236
|
+
PRINTFLIKE_DECL(2, 3)
|
237
|
+
static void
|
238
|
+
noko_xml_sax_parser_warning_callback(void *ctx, const char *msg, ...)
|
239
|
+
{
|
240
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
241
|
+
VALUE self = (VALUE)ctxt->_private;
|
242
|
+
VALUE doc = rb_iv_get(self, "@document");
|
243
|
+
|
244
|
+
VALUE rb_message;
|
245
|
+
|
246
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
247
|
+
/* It is not currently possible to pass var args from native
|
248
|
+
functions to sulong, so we work around the issue here. */
|
249
|
+
rb_message = rb_sprintf("warning_func: %s", msg);
|
250
|
+
#else
|
251
|
+
va_list args;
|
252
|
+
va_start(args, msg);
|
253
|
+
rb_message = rb_vsprintf(msg, args);
|
254
|
+
va_end(args);
|
255
|
+
#endif
|
256
|
+
|
257
|
+
rb_funcall(doc, id_warning, 1, rb_message);
|
258
|
+
}
|
259
|
+
|
260
|
+
PRINTFLIKE_DECL(2, 3)
|
261
|
+
static void
|
262
|
+
noko_xml_sax_parser_error_callback(void *ctx, const char *msg, ...)
|
263
|
+
{
|
264
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
265
|
+
VALUE self = (VALUE)ctxt->_private;
|
266
|
+
VALUE doc = rb_iv_get(self, "@document");
|
267
|
+
|
268
|
+
VALUE rb_message;
|
269
|
+
|
270
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
271
|
+
/* It is not currently possible to pass var args from native
|
272
|
+
functions to sulong, so we work around the issue here. */
|
273
|
+
rb_message = rb_sprintf("error_func: %s", msg);
|
274
|
+
#else
|
275
|
+
va_list args;
|
276
|
+
va_start(args, msg);
|
277
|
+
rb_message = rb_vsprintf(msg, args);
|
278
|
+
va_end(args);
|
279
|
+
#endif
|
280
|
+
|
281
|
+
rb_funcall(doc, id_error, 1, rb_message);
|
282
|
+
}
|
283
|
+
|
284
|
+
static void
|
285
|
+
noko_xml_sax_parser_cdata_block_callback(void *ctx, const xmlChar *value, int len)
|
286
|
+
{
|
287
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
288
|
+
VALUE self = (VALUE)ctxt->_private;
|
289
|
+
VALUE doc = rb_iv_get(self, "@document");
|
290
|
+
|
291
|
+
VALUE string = NOKOGIRI_STR_NEW(value, len);
|
292
|
+
rb_funcall(doc, id_cdata_block, 1, string);
|
293
|
+
}
|
294
|
+
|
295
|
+
static void
|
296
|
+
noko_xml_sax_parser_processing_instruction_callback(void *ctx, const xmlChar *name, const xmlChar *content)
|
297
|
+
{
|
298
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
299
|
+
VALUE self = (VALUE)ctxt->_private;
|
300
|
+
VALUE doc = rb_iv_get(self, "@document");
|
301
|
+
|
302
|
+
VALUE rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
|
303
|
+
|
304
|
+
rb_funcall(doc,
|
305
|
+
id_processing_instruction,
|
306
|
+
2,
|
307
|
+
NOKOGIRI_STR_NEW2(name),
|
308
|
+
rb_content
|
309
|
+
);
|
310
|
+
}
|
311
|
+
|
312
|
+
static void
|
313
|
+
noko_xml_sax_parser_reference_callback(void *ctx, const xmlChar *name)
|
314
|
+
{
|
315
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
316
|
+
xmlEntityPtr entity = xmlSAX2GetEntity(ctxt, name);
|
317
|
+
|
318
|
+
VALUE self = (VALUE)ctxt->_private;
|
319
|
+
VALUE doc = rb_iv_get(self, "@document");
|
320
|
+
|
321
|
+
if (entity && entity->content) {
|
322
|
+
rb_funcall(doc, id_reference, 2, NOKOGIRI_STR_NEW2(entity->name), NOKOGIRI_STR_NEW2(entity->content));
|
323
|
+
} else {
|
324
|
+
rb_funcall(doc, id_reference, 2, NOKOGIRI_STR_NEW2(name), Qnil);
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
static VALUE
|
329
|
+
noko_xml_sax_parser__initialize_native(VALUE self)
|
330
|
+
{
|
331
|
+
xmlSAXHandlerPtr handler = noko_xml_sax_parser_unwrap(self);
|
332
|
+
|
333
|
+
handler->startDocument = noko_xml_sax_parser_start_document_callback;
|
334
|
+
handler->endDocument = noko_xml_sax_parser_end_document_callback;
|
335
|
+
handler->startElement = noko_xml_sax_parser_start_element_callback;
|
336
|
+
handler->endElement = noko_xml_sax_parser_end_element_callback;
|
337
|
+
handler->startElementNs = noko_xml_sax_parser_start_element_ns_callback;
|
338
|
+
handler->endElementNs = noko_xml_sax_parser_end_element_ns_callback;
|
339
|
+
handler->characters = noko_xml_sax_parser_characters_callback;
|
340
|
+
handler->comment = noko_xml_sax_parser_comment_callback;
|
341
|
+
handler->warning = noko_xml_sax_parser_warning_callback;
|
342
|
+
handler->error = noko_xml_sax_parser_error_callback;
|
343
|
+
handler->cdataBlock = noko_xml_sax_parser_cdata_block_callback;
|
344
|
+
handler->processingInstruction = noko_xml_sax_parser_processing_instruction_callback;
|
345
|
+
handler->reference = noko_xml_sax_parser_reference_callback;
|
346
|
+
|
347
|
+
/* use some of libxml2's default callbacks to managed DTDs and entities */
|
348
|
+
handler->getEntity = xmlSAX2GetEntity;
|
349
|
+
handler->internalSubset = xmlSAX2InternalSubset;
|
350
|
+
handler->externalSubset = xmlSAX2ExternalSubset;
|
351
|
+
handler->isStandalone = xmlSAX2IsStandalone;
|
352
|
+
handler->hasInternalSubset = xmlSAX2HasInternalSubset;
|
353
|
+
handler->hasExternalSubset = xmlSAX2HasExternalSubset;
|
354
|
+
handler->resolveEntity = xmlSAX2ResolveEntity;
|
355
|
+
handler->getParameterEntity = xmlSAX2GetParameterEntity;
|
356
|
+
handler->entityDecl = xmlSAX2EntityDecl;
|
357
|
+
handler->unparsedEntityDecl = xmlSAX2UnparsedEntityDecl;
|
358
|
+
|
359
|
+
handler->initialized = XML_SAX2_MAGIC;
|
360
|
+
|
361
|
+
return self;
|
362
|
+
}
|
363
|
+
|
364
|
+
static VALUE
|
365
|
+
noko_xml_sax_parser_allocate(VALUE klass)
|
366
|
+
{
|
367
|
+
xmlSAXHandlerPtr handler;
|
368
|
+
return TypedData_Make_Struct(klass, xmlSAXHandler, &xml_sax_parser_type, handler);
|
369
|
+
}
|
370
|
+
|
371
|
+
xmlSAXHandlerPtr
|
372
|
+
noko_xml_sax_parser_unwrap(VALUE rb_sax_handler)
|
373
|
+
{
|
374
|
+
xmlSAXHandlerPtr c_sax_handler;
|
375
|
+
TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &xml_sax_parser_type, c_sax_handler);
|
376
|
+
return c_sax_handler;
|
377
|
+
}
|
378
|
+
|
379
|
+
void
|
380
|
+
noko_init_xml_sax_parser(void)
|
381
|
+
{
|
382
|
+
cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
|
383
|
+
|
384
|
+
rb_define_alloc_func(cNokogiriXmlSaxParser, noko_xml_sax_parser_allocate);
|
385
|
+
|
386
|
+
rb_define_private_method(cNokogiriXmlSaxParser, "initialize_native",
|
387
|
+
noko_xml_sax_parser__initialize_native, 0);
|
388
|
+
|
389
|
+
id_start_document = rb_intern("start_document");
|
390
|
+
id_end_document = rb_intern("end_document");
|
391
|
+
id_start_element = rb_intern("start_element");
|
392
|
+
id_end_element = rb_intern("end_element");
|
393
|
+
id_comment = rb_intern("comment");
|
394
|
+
id_characters = rb_intern("characters");
|
395
|
+
id_xmldecl = rb_intern("xmldecl");
|
396
|
+
id_error = rb_intern("error");
|
397
|
+
id_warning = rb_intern("warning");
|
398
|
+
id_cdata_block = rb_intern("cdata_block");
|
399
|
+
id_start_element_namespace = rb_intern("start_element_namespace");
|
400
|
+
id_end_element_namespace = rb_intern("end_element_namespace");
|
401
|
+
id_processing_instruction = rb_intern("processing_instruction");
|
402
|
+
id_reference = rb_intern("reference");
|
403
|
+
}
|