nokogiri 1.18.0.rc1-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtml4Document ;
|
4
|
+
|
5
|
+
static ID id_encoding_found;
|
6
|
+
static ID id_to_s;
|
7
|
+
|
8
|
+
/*
|
9
|
+
* call-seq:
|
10
|
+
* new(uri=nil, external_id=nil) → HTML4::Document
|
11
|
+
*
|
12
|
+
* Create a new empty document with base URI +uri+ and external ID +external_id+.
|
13
|
+
*/
|
14
|
+
static VALUE
|
15
|
+
rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
|
16
|
+
{
|
17
|
+
VALUE uri, external_id, rest, rb_doc;
|
18
|
+
htmlDocPtr doc;
|
19
|
+
|
20
|
+
rb_scan_args(argc, argv, "0*", &rest);
|
21
|
+
uri = rb_ary_entry(rest, (long)0);
|
22
|
+
external_id = rb_ary_entry(rest, (long)1);
|
23
|
+
|
24
|
+
doc = htmlNewDoc(
|
25
|
+
RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
|
26
|
+
RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
|
27
|
+
);
|
28
|
+
rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
|
29
|
+
return rb_doc ;
|
30
|
+
}
|
31
|
+
|
32
|
+
/*
|
33
|
+
* call-seq:
|
34
|
+
* read_io(io, url, encoding, options)
|
35
|
+
*
|
36
|
+
* Read the HTML document from +io+ with given +url+, +encoding+,
|
37
|
+
* and +options+. See Nokogiri::HTML4.parse
|
38
|
+
*/
|
39
|
+
static VALUE
|
40
|
+
rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
|
41
|
+
{
|
42
|
+
VALUE rb_doc;
|
43
|
+
VALUE rb_error_list = rb_ary_new();
|
44
|
+
htmlDocPtr c_doc;
|
45
|
+
const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
|
46
|
+
const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
|
47
|
+
int options = NUM2INT(rb_options);
|
48
|
+
|
49
|
+
xmlSetStructuredErrorFunc((void *)rb_error_list, noko__error_array_pusher);
|
50
|
+
|
51
|
+
c_doc = htmlReadIO(noko_io_read, noko_io_close, (void *)rb_io, c_url, c_encoding, options);
|
52
|
+
|
53
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
54
|
+
|
55
|
+
/*
|
56
|
+
* If EncodingFound has occurred in EncodingReader, make sure to do
|
57
|
+
* a cleanup and propagate the error.
|
58
|
+
*/
|
59
|
+
if (rb_respond_to(rb_io, id_encoding_found)) {
|
60
|
+
VALUE encoding_found = rb_funcall(rb_io, id_encoding_found, 0);
|
61
|
+
if (!NIL_P(encoding_found)) {
|
62
|
+
xmlFreeDoc(c_doc);
|
63
|
+
rb_exc_raise(encoding_found);
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) {
|
68
|
+
VALUE rb_error ;
|
69
|
+
|
70
|
+
xmlFreeDoc(c_doc);
|
71
|
+
|
72
|
+
rb_error = rb_ary_entry(rb_error_list, 0);
|
73
|
+
if (rb_error == Qnil) {
|
74
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
75
|
+
} else {
|
76
|
+
VALUE exception_message = rb_funcall(rb_error, id_to_s, 0);
|
77
|
+
exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "),
|
78
|
+
exception_message);
|
79
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
80
|
+
}
|
81
|
+
|
82
|
+
return Qnil;
|
83
|
+
}
|
84
|
+
|
85
|
+
rb_doc = noko_xml_document_wrap(klass, c_doc);
|
86
|
+
rb_iv_set(rb_doc, "@errors", rb_error_list);
|
87
|
+
return rb_doc;
|
88
|
+
}
|
89
|
+
|
90
|
+
/*
|
91
|
+
* call-seq:
|
92
|
+
* read_memory(string, url, encoding, options)
|
93
|
+
*
|
94
|
+
* Read the HTML document contained in +string+ with given +url+, +encoding+,
|
95
|
+
* and +options+. See Nokogiri::HTML4.parse
|
96
|
+
*/
|
97
|
+
static VALUE
|
98
|
+
rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
|
99
|
+
{
|
100
|
+
VALUE rb_doc;
|
101
|
+
VALUE rb_error_list = rb_ary_new();
|
102
|
+
htmlDocPtr c_doc;
|
103
|
+
const char *c_buffer = StringValuePtr(rb_html);
|
104
|
+
const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
|
105
|
+
const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
|
106
|
+
int html_len = (int)RSTRING_LEN(rb_html);
|
107
|
+
int options = NUM2INT(rb_options);
|
108
|
+
|
109
|
+
xmlSetStructuredErrorFunc((void *)rb_error_list, noko__error_array_pusher);
|
110
|
+
|
111
|
+
c_doc = htmlReadMemory(c_buffer, html_len, c_url, c_encoding, options);
|
112
|
+
|
113
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
114
|
+
|
115
|
+
if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) {
|
116
|
+
VALUE rb_error ;
|
117
|
+
|
118
|
+
xmlFreeDoc(c_doc);
|
119
|
+
|
120
|
+
rb_error = rb_ary_entry(rb_error_list, 0);
|
121
|
+
if (rb_error == Qnil) {
|
122
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
123
|
+
} else {
|
124
|
+
VALUE exception_message = rb_funcall(rb_error, id_to_s, 0);
|
125
|
+
exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "),
|
126
|
+
exception_message);
|
127
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
128
|
+
}
|
129
|
+
|
130
|
+
return Qnil;
|
131
|
+
}
|
132
|
+
|
133
|
+
rb_doc = noko_xml_document_wrap(klass, c_doc);
|
134
|
+
rb_iv_set(rb_doc, "@errors", rb_error_list);
|
135
|
+
return rb_doc;
|
136
|
+
}
|
137
|
+
|
138
|
+
/*
|
139
|
+
* call-seq:
|
140
|
+
* type
|
141
|
+
*
|
142
|
+
* The type for this document
|
143
|
+
*/
|
144
|
+
static VALUE
|
145
|
+
rb_html_document_type(VALUE self)
|
146
|
+
{
|
147
|
+
htmlDocPtr doc = noko_xml_document_unwrap(self);
|
148
|
+
return INT2NUM(doc->type);
|
149
|
+
}
|
150
|
+
|
151
|
+
void
|
152
|
+
noko_init_html_document(void)
|
153
|
+
{
|
154
|
+
/* this is here so that rdoc doesn't ignore this file. */
|
155
|
+
/*
|
156
|
+
mNokogiri = rb_define_module("Nokogiri");
|
157
|
+
mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
|
158
|
+
*/
|
159
|
+
|
160
|
+
assert(cNokogiriXmlDocument);
|
161
|
+
cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument);
|
162
|
+
|
163
|
+
rb_define_singleton_method(cNokogiriHtml4Document, "read_memory", rb_html_document_s_read_memory, 4);
|
164
|
+
rb_define_singleton_method(cNokogiriHtml4Document, "read_io", rb_html_document_s_read_io, 4);
|
165
|
+
rb_define_singleton_method(cNokogiriHtml4Document, "new", rb_html_document_s_new, -1);
|
166
|
+
|
167
|
+
rb_define_method(cNokogiriHtml4Document, "type", rb_html_document_type, 0);
|
168
|
+
|
169
|
+
id_encoding_found = rb_intern("encoding_found");
|
170
|
+
id_to_s = rb_intern("to_s");
|
171
|
+
}
|
@@ -0,0 +1,299 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
static const rb_data_type_t html_elem_desc_type = {
|
4
|
+
.wrap_struct_name = "htmlElemDesc",
|
5
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
6
|
+
};
|
7
|
+
|
8
|
+
VALUE cNokogiriHtml4ElementDescription ;
|
9
|
+
|
10
|
+
/*
|
11
|
+
* call-seq:
|
12
|
+
* required_attributes
|
13
|
+
*
|
14
|
+
* A list of required attributes for this element
|
15
|
+
*/
|
16
|
+
static VALUE
|
17
|
+
required_attributes(VALUE self)
|
18
|
+
{
|
19
|
+
const htmlElemDesc *description;
|
20
|
+
VALUE list;
|
21
|
+
int i;
|
22
|
+
|
23
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
24
|
+
|
25
|
+
list = rb_ary_new();
|
26
|
+
|
27
|
+
if (NULL == description->attrs_req) { return list; }
|
28
|
+
|
29
|
+
for (i = 0; description->attrs_depr[i]; i++) {
|
30
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
|
31
|
+
}
|
32
|
+
|
33
|
+
return list;
|
34
|
+
}
|
35
|
+
|
36
|
+
/*
|
37
|
+
* call-seq:
|
38
|
+
* deprecated_attributes
|
39
|
+
*
|
40
|
+
* A list of deprecated attributes for this element
|
41
|
+
*/
|
42
|
+
static VALUE
|
43
|
+
deprecated_attributes(VALUE self)
|
44
|
+
{
|
45
|
+
const htmlElemDesc *description;
|
46
|
+
VALUE list;
|
47
|
+
int i;
|
48
|
+
|
49
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
50
|
+
|
51
|
+
list = rb_ary_new();
|
52
|
+
|
53
|
+
if (NULL == description->attrs_depr) { return list; }
|
54
|
+
|
55
|
+
for (i = 0; description->attrs_depr[i]; i++) {
|
56
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
|
57
|
+
}
|
58
|
+
|
59
|
+
return list;
|
60
|
+
}
|
61
|
+
|
62
|
+
/*
|
63
|
+
* call-seq:
|
64
|
+
* optional_attributes
|
65
|
+
*
|
66
|
+
* A list of optional attributes for this element
|
67
|
+
*/
|
68
|
+
static VALUE
|
69
|
+
optional_attributes(VALUE self)
|
70
|
+
{
|
71
|
+
const htmlElemDesc *description;
|
72
|
+
VALUE list;
|
73
|
+
int i;
|
74
|
+
|
75
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
76
|
+
|
77
|
+
list = rb_ary_new();
|
78
|
+
|
79
|
+
if (NULL == description->attrs_opt) { return list; }
|
80
|
+
|
81
|
+
for (i = 0; description->attrs_opt[i]; i++) {
|
82
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
|
83
|
+
}
|
84
|
+
|
85
|
+
return list;
|
86
|
+
}
|
87
|
+
|
88
|
+
/*
|
89
|
+
* call-seq:
|
90
|
+
* default_sub_element
|
91
|
+
*
|
92
|
+
* The default sub element for this element
|
93
|
+
*/
|
94
|
+
static VALUE
|
95
|
+
default_sub_element(VALUE self)
|
96
|
+
{
|
97
|
+
const htmlElemDesc *description;
|
98
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
99
|
+
|
100
|
+
if (description->defaultsubelt) {
|
101
|
+
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
|
102
|
+
}
|
103
|
+
|
104
|
+
return Qnil;
|
105
|
+
}
|
106
|
+
|
107
|
+
/*
|
108
|
+
* call-seq:
|
109
|
+
* sub_elements
|
110
|
+
*
|
111
|
+
* A list of allowed sub elements for this element.
|
112
|
+
*/
|
113
|
+
static VALUE
|
114
|
+
sub_elements(VALUE self)
|
115
|
+
{
|
116
|
+
const htmlElemDesc *description;
|
117
|
+
VALUE list;
|
118
|
+
int i;
|
119
|
+
|
120
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
121
|
+
|
122
|
+
list = rb_ary_new();
|
123
|
+
|
124
|
+
if (NULL == description->subelts) { return list; }
|
125
|
+
|
126
|
+
for (i = 0; description->subelts[i]; i++) {
|
127
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
|
128
|
+
}
|
129
|
+
|
130
|
+
return list;
|
131
|
+
}
|
132
|
+
|
133
|
+
/*
|
134
|
+
* call-seq:
|
135
|
+
* description
|
136
|
+
*
|
137
|
+
* The description for this element
|
138
|
+
*/
|
139
|
+
static VALUE
|
140
|
+
description(VALUE self)
|
141
|
+
{
|
142
|
+
const htmlElemDesc *description;
|
143
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
144
|
+
|
145
|
+
return NOKOGIRI_STR_NEW2(description->desc);
|
146
|
+
}
|
147
|
+
|
148
|
+
/*
|
149
|
+
* call-seq:
|
150
|
+
* inline?
|
151
|
+
*
|
152
|
+
* Is this element an inline element?
|
153
|
+
*/
|
154
|
+
static VALUE
|
155
|
+
inline_eh(VALUE self)
|
156
|
+
{
|
157
|
+
const htmlElemDesc *description;
|
158
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
159
|
+
|
160
|
+
if (description->isinline) { return Qtrue; }
|
161
|
+
return Qfalse;
|
162
|
+
}
|
163
|
+
|
164
|
+
/*
|
165
|
+
* call-seq:
|
166
|
+
* deprecated?
|
167
|
+
*
|
168
|
+
* Is this element deprecated?
|
169
|
+
*/
|
170
|
+
static VALUE
|
171
|
+
deprecated_eh(VALUE self)
|
172
|
+
{
|
173
|
+
const htmlElemDesc *description;
|
174
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
175
|
+
|
176
|
+
if (description->depr) { return Qtrue; }
|
177
|
+
return Qfalse;
|
178
|
+
}
|
179
|
+
|
180
|
+
/*
|
181
|
+
* call-seq:
|
182
|
+
* empty?
|
183
|
+
*
|
184
|
+
* Is this an empty element?
|
185
|
+
*/
|
186
|
+
static VALUE
|
187
|
+
empty_eh(VALUE self)
|
188
|
+
{
|
189
|
+
const htmlElemDesc *description;
|
190
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
191
|
+
|
192
|
+
if (description->empty) { return Qtrue; }
|
193
|
+
return Qfalse;
|
194
|
+
}
|
195
|
+
|
196
|
+
/*
|
197
|
+
* call-seq:
|
198
|
+
* save_end_tag?
|
199
|
+
*
|
200
|
+
* Should the end tag be saved?
|
201
|
+
*/
|
202
|
+
static VALUE
|
203
|
+
save_end_tag_eh(VALUE self)
|
204
|
+
{
|
205
|
+
const htmlElemDesc *description;
|
206
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
207
|
+
|
208
|
+
if (description->saveEndTag) { return Qtrue; }
|
209
|
+
return Qfalse;
|
210
|
+
}
|
211
|
+
|
212
|
+
/*
|
213
|
+
* call-seq:
|
214
|
+
* implied_end_tag?
|
215
|
+
*
|
216
|
+
* Can the end tag be implied for this tag?
|
217
|
+
*/
|
218
|
+
static VALUE
|
219
|
+
implied_end_tag_eh(VALUE self)
|
220
|
+
{
|
221
|
+
const htmlElemDesc *description;
|
222
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
223
|
+
|
224
|
+
if (description->endTag) { return Qtrue; }
|
225
|
+
return Qfalse;
|
226
|
+
}
|
227
|
+
|
228
|
+
/*
|
229
|
+
* call-seq:
|
230
|
+
* implied_start_tag?
|
231
|
+
*
|
232
|
+
* Can the start tag be implied for this tag?
|
233
|
+
*/
|
234
|
+
static VALUE
|
235
|
+
implied_start_tag_eh(VALUE self)
|
236
|
+
{
|
237
|
+
const htmlElemDesc *description;
|
238
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
239
|
+
|
240
|
+
if (description->startTag) { return Qtrue; }
|
241
|
+
return Qfalse;
|
242
|
+
}
|
243
|
+
|
244
|
+
/*
|
245
|
+
* call-seq:
|
246
|
+
* name
|
247
|
+
*
|
248
|
+
* Get the tag name for this ElementDescription
|
249
|
+
*/
|
250
|
+
static VALUE
|
251
|
+
name(VALUE self)
|
252
|
+
{
|
253
|
+
const htmlElemDesc *description;
|
254
|
+
TypedData_Get_Struct(self, htmlElemDesc, &html_elem_desc_type, description);
|
255
|
+
|
256
|
+
if (NULL == description->name) { return Qnil; }
|
257
|
+
return NOKOGIRI_STR_NEW2(description->name);
|
258
|
+
}
|
259
|
+
|
260
|
+
/*
|
261
|
+
* call-seq:
|
262
|
+
* [](tag_name)
|
263
|
+
*
|
264
|
+
* Get ElementDescription for +tag_name+
|
265
|
+
*/
|
266
|
+
static VALUE
|
267
|
+
get_description(VALUE klass, VALUE tag_name)
|
268
|
+
{
|
269
|
+
const htmlElemDesc *description = htmlTagLookup(
|
270
|
+
(const xmlChar *)StringValueCStr(tag_name)
|
271
|
+
);
|
272
|
+
|
273
|
+
if (NULL == description) { return Qnil; }
|
274
|
+
return TypedData_Wrap_Struct(klass, &html_elem_desc_type, DISCARD_CONST_QUAL(void *, description));
|
275
|
+
}
|
276
|
+
|
277
|
+
void
|
278
|
+
noko_init_html_element_description(void)
|
279
|
+
{
|
280
|
+
cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject);
|
281
|
+
|
282
|
+
rb_undef_alloc_func(cNokogiriHtml4ElementDescription);
|
283
|
+
|
284
|
+
rb_define_singleton_method(cNokogiriHtml4ElementDescription, "[]", get_description, 1);
|
285
|
+
|
286
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "name", name, 0);
|
287
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "implied_start_tag?", implied_start_tag_eh, 0);
|
288
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "implied_end_tag?", implied_end_tag_eh, 0);
|
289
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "save_end_tag?", save_end_tag_eh, 0);
|
290
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "empty?", empty_eh, 0);
|
291
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "deprecated?", deprecated_eh, 0);
|
292
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "inline?", inline_eh, 0);
|
293
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "description", description, 0);
|
294
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "sub_elements", sub_elements, 0);
|
295
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "default_sub_element", default_sub_element, 0);
|
296
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "optional_attributes", optional_attributes, 0);
|
297
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "deprecated_attributes", deprecated_attributes, 0);
|
298
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "required_attributes", required_attributes, 0);
|
299
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
static VALUE cNokogiriHtml4EntityLookup;
|
4
|
+
|
5
|
+
/*
|
6
|
+
* call-seq:
|
7
|
+
* get(key)
|
8
|
+
*
|
9
|
+
* Get the HTML4::EntityDescription for +key+
|
10
|
+
*/
|
11
|
+
static VALUE
|
12
|
+
get(VALUE _, VALUE rb_entity_name)
|
13
|
+
{
|
14
|
+
VALUE cNokogiriHtml4EntityDescription;
|
15
|
+
const htmlEntityDesc *c_entity_desc;
|
16
|
+
VALUE rb_constructor_args[3];
|
17
|
+
|
18
|
+
c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
|
19
|
+
if (NULL == c_entity_desc) {
|
20
|
+
return Qnil;
|
21
|
+
}
|
22
|
+
|
23
|
+
rb_constructor_args[0] = UINT2NUM(c_entity_desc->value);
|
24
|
+
rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
|
25
|
+
rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
|
26
|
+
|
27
|
+
cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription"));
|
28
|
+
return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription);
|
29
|
+
}
|
30
|
+
|
31
|
+
void
|
32
|
+
noko_init_html_entity_lookup(void)
|
33
|
+
{
|
34
|
+
cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
|
35
|
+
|
36
|
+
rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1);
|
37
|
+
}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtml4SaxParser;
|
4
|
+
|
5
|
+
static ID id_start_document;
|
6
|
+
|
7
|
+
static void
|
8
|
+
noko_html4_sax_parser_start_document(void *ctx)
|
9
|
+
{
|
10
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
11
|
+
VALUE self = (VALUE)ctxt->_private;
|
12
|
+
VALUE doc = rb_iv_get(self, "@document");
|
13
|
+
|
14
|
+
xmlSAX2StartDocument(ctx);
|
15
|
+
|
16
|
+
rb_funcall(doc, id_start_document, 0);
|
17
|
+
}
|
18
|
+
|
19
|
+
static VALUE
|
20
|
+
noko_html4_sax_parser_initialize(VALUE self)
|
21
|
+
{
|
22
|
+
xmlSAXHandlerPtr handler = noko_xml_sax_parser_unwrap(self);
|
23
|
+
|
24
|
+
rb_call_super(0, NULL);
|
25
|
+
|
26
|
+
handler->startDocument = noko_html4_sax_parser_start_document;
|
27
|
+
|
28
|
+
return self;
|
29
|
+
}
|
30
|
+
|
31
|
+
void
|
32
|
+
noko_init_html4_sax_parser(void)
|
33
|
+
{
|
34
|
+
cNokogiriHtml4SaxParser = rb_define_class_under(mNokogiriHtml4Sax, "Parser", cNokogiriXmlSaxParser);
|
35
|
+
|
36
|
+
rb_define_private_method(cNokogiriHtml4SaxParser, "initialize_native",
|
37
|
+
noko_html4_sax_parser_initialize, 0);
|
38
|
+
|
39
|
+
id_start_document = rb_intern("start_document");
|
40
|
+
}
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtml4SaxParserContext ;
|
4
|
+
|
5
|
+
/* :nodoc: */
|
6
|
+
static VALUE
|
7
|
+
noko_html4_sax_parser_context_s_native_memory(VALUE rb_class, VALUE rb_input, VALUE rb_encoding)
|
8
|
+
{
|
9
|
+
Check_Type(rb_input, T_STRING);
|
10
|
+
if (!(int)RSTRING_LEN(rb_input)) {
|
11
|
+
rb_raise(rb_eRuntimeError, "input string cannot be empty");
|
12
|
+
}
|
13
|
+
|
14
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
15
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
16
|
+
}
|
17
|
+
|
18
|
+
htmlParserCtxtPtr c_context =
|
19
|
+
htmlCreateMemoryParserCtxt(StringValuePtr(rb_input), (int)RSTRING_LEN(rb_input));
|
20
|
+
if (!c_context) {
|
21
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
22
|
+
}
|
23
|
+
|
24
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
25
|
+
|
26
|
+
if (c_context->sax) {
|
27
|
+
xmlFree(c_context->sax);
|
28
|
+
c_context->sax = NULL;
|
29
|
+
}
|
30
|
+
|
31
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
32
|
+
}
|
33
|
+
|
34
|
+
/* :nodoc: */
|
35
|
+
static VALUE
|
36
|
+
noko_html4_sax_parser_context_s_native_file(VALUE rb_class, VALUE rb_filename, VALUE rb_encoding)
|
37
|
+
{
|
38
|
+
if (!NIL_P(rb_encoding) && !rb_obj_is_kind_of(rb_encoding, rb_cEncoding)) {
|
39
|
+
rb_raise(rb_eTypeError, "argument must be an Encoding object");
|
40
|
+
}
|
41
|
+
|
42
|
+
htmlParserCtxtPtr c_context = htmlCreateFileParserCtxt(StringValueCStr(rb_filename), NULL);
|
43
|
+
if (!c_context) {
|
44
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
45
|
+
}
|
46
|
+
|
47
|
+
noko_xml_sax_parser_context_set_encoding(c_context, rb_encoding);
|
48
|
+
|
49
|
+
if (c_context->sax) {
|
50
|
+
xmlFree(c_context->sax);
|
51
|
+
c_context->sax = NULL;
|
52
|
+
}
|
53
|
+
|
54
|
+
return noko_xml_sax_parser_context_wrap(rb_class, c_context);
|
55
|
+
}
|
56
|
+
|
57
|
+
static VALUE
|
58
|
+
noko_html4_sax_parser_context__parse_with(VALUE rb_context, VALUE rb_sax_parser)
|
59
|
+
{
|
60
|
+
htmlParserCtxtPtr ctxt;
|
61
|
+
htmlSAXHandlerPtr sax;
|
62
|
+
|
63
|
+
if (!rb_obj_is_kind_of(rb_sax_parser, cNokogiriXmlSaxParser)) {
|
64
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
65
|
+
}
|
66
|
+
|
67
|
+
ctxt = noko_xml_sax_parser_context_unwrap(rb_context);
|
68
|
+
sax = noko_xml_sax_parser_unwrap(rb_sax_parser);
|
69
|
+
|
70
|
+
ctxt->sax = sax;
|
71
|
+
ctxt->userData = ctxt; /* so we can use libxml2/SAX2.c handlers if we want to */
|
72
|
+
ctxt->_private = (void *)rb_sax_parser;
|
73
|
+
|
74
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
75
|
+
|
76
|
+
/* although we're calling back into Ruby here, we don't need to worry about exceptions, because we
|
77
|
+
* don't have any cleanup to do. The only memory we need to free is handled by
|
78
|
+
* xml_sax_parser_context_type_free */
|
79
|
+
htmlParseDocument(ctxt);
|
80
|
+
|
81
|
+
return Qnil;
|
82
|
+
}
|
83
|
+
|
84
|
+
void
|
85
|
+
noko_init_html_sax_parser_context(void)
|
86
|
+
{
|
87
|
+
assert(cNokogiriXmlSaxParserContext);
|
88
|
+
cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
|
89
|
+
cNokogiriXmlSaxParserContext);
|
90
|
+
|
91
|
+
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "native_memory",
|
92
|
+
noko_html4_sax_parser_context_s_native_memory, 2);
|
93
|
+
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "native_file",
|
94
|
+
noko_html4_sax_parser_context_s_native_file, 2);
|
95
|
+
|
96
|
+
rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with",
|
97
|
+
noko_html4_sax_parser_context__parse_with, 1);
|
98
|
+
}
|