nokogiri 1.11.7 → 1.12.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +6 -5
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +181 -103
- data/ext/nokogiri/gumbo.c +611 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +20 -18
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +5 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +51 -38
- data/ext/nokogiri/nokogiri.h +16 -9
- data/ext/nokogiri/xml_document.c +13 -13
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +2 -0
- data/ext/nokogiri/xml_node.c +102 -102
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +17 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri.rb +31 -29
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +2 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/node.rb +6 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- metadata +102 -60
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -274,6 +274,8 @@ noko_init_xml_schema()
|
|
274
274
|
{
|
275
275
|
cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
|
276
276
|
|
277
|
+
rb_undef_alloc_func(cNokogiriXmlSchema);
|
278
|
+
|
277
279
|
rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
|
278
280
|
rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
|
279
281
|
|
@@ -154,20 +154,20 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
154
154
|
do {
|
155
155
|
obj = valuePop(ctx);
|
156
156
|
switch (obj->type) {
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
157
|
+
case XPATH_STRING:
|
158
|
+
argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
|
159
|
+
break;
|
160
|
+
case XPATH_BOOLEAN:
|
161
|
+
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
|
162
|
+
break;
|
163
|
+
case XPATH_NUMBER:
|
164
|
+
argv[i] = rb_float_new(obj->floatval);
|
165
|
+
break;
|
166
|
+
case XPATH_NODESET:
|
167
|
+
argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
|
168
|
+
break;
|
169
|
+
default:
|
170
|
+
argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
171
171
|
}
|
172
172
|
xmlXPathFreeNodeSetList(obj);
|
173
173
|
} while (i-- > 0);
|
@@ -181,43 +181,43 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
181
181
|
free(argv);
|
182
182
|
|
183
183
|
switch (TYPE(result)) {
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
break;
|
201
|
-
case T_NIL:
|
202
|
-
break;
|
203
|
-
case T_ARRAY: {
|
204
|
-
VALUE args[2];
|
205
|
-
args[0] = doc;
|
206
|
-
args[1] = result;
|
207
|
-
node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
|
208
|
-
Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
|
209
|
-
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
210
|
-
}
|
211
|
-
break;
|
212
|
-
case T_DATA:
|
213
|
-
if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
|
214
|
-
Data_Get_Struct(result, xmlNodeSet, xml_node_set);
|
215
|
-
/* Copy the node set, otherwise it will get GC'd. */
|
216
|
-
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
184
|
+
case T_FLOAT:
|
185
|
+
case T_BIGNUM:
|
186
|
+
case T_FIXNUM:
|
187
|
+
xmlXPathReturnNumber(ctx, NUM2DBL(result));
|
188
|
+
break;
|
189
|
+
case T_STRING:
|
190
|
+
xmlXPathReturnString(
|
191
|
+
ctx,
|
192
|
+
xmlCharStrdup(StringValueCStr(result))
|
193
|
+
);
|
194
|
+
break;
|
195
|
+
case T_TRUE:
|
196
|
+
xmlXPathReturnTrue(ctx);
|
197
|
+
break;
|
198
|
+
case T_FALSE:
|
199
|
+
xmlXPathReturnFalse(ctx);
|
217
200
|
break;
|
201
|
+
case T_NIL:
|
202
|
+
break;
|
203
|
+
case T_ARRAY: {
|
204
|
+
VALUE args[2];
|
205
|
+
args[0] = doc;
|
206
|
+
args[1] = result;
|
207
|
+
node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
|
208
|
+
Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
|
209
|
+
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
218
210
|
}
|
219
|
-
|
220
|
-
|
211
|
+
break;
|
212
|
+
case T_DATA:
|
213
|
+
if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
|
214
|
+
Data_Get_Struct(result, xmlNodeSet, xml_node_set);
|
215
|
+
/* Copy the node set, otherwise it will get GC'd. */
|
216
|
+
xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
|
217
|
+
break;
|
218
|
+
}
|
219
|
+
default:
|
220
|
+
rb_raise(rb_eRuntimeError, "Invalid return type");
|
221
221
|
}
|
222
222
|
}
|
223
223
|
|
@@ -314,22 +314,22 @@ evaluate(int argc, VALUE *argv, VALUE self)
|
|
314
314
|
assert(DOC_RUBY_OBJECT_TEST(ctx->doc));
|
315
315
|
|
316
316
|
switch (xpath->type) {
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
317
|
+
case XPATH_STRING:
|
318
|
+
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
|
319
|
+
xmlFree(xpath->stringval);
|
320
|
+
break;
|
321
|
+
case XPATH_NODESET:
|
322
|
+
thing = noko_xml_node_set_wrap(xpath->nodesetval,
|
323
|
+
DOC_RUBY_OBJECT(ctx->doc));
|
324
|
+
break;
|
325
|
+
case XPATH_NUMBER:
|
326
|
+
thing = rb_float_new(xpath->floatval);
|
327
|
+
break;
|
328
|
+
case XPATH_BOOLEAN:
|
329
|
+
thing = xpath->boolval == 1 ? Qtrue : Qfalse;
|
330
|
+
break;
|
331
|
+
default:
|
332
|
+
thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
333
333
|
}
|
334
334
|
|
335
335
|
xmlXPathFreeNodeSetList(xpath);
|
@@ -373,6 +373,8 @@ noko_init_xml_xpath_context(void)
|
|
373
373
|
*/
|
374
374
|
cNokogiriXmlXpathContext = rb_define_class_under(mNokogiriXml, "XPathContext", rb_cObject);
|
375
375
|
|
376
|
+
rb_undef_alloc_func(cNokogiriXmlXpathContext);
|
377
|
+
|
376
378
|
rb_define_singleton_method(cNokogiriXmlXpathContext, "new", new, 1);
|
377
379
|
|
378
380
|
rb_define_method(cNokogiriXmlXpathContext, "evaluate", evaluate, -1);
|
@@ -64,7 +64,6 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
64
64
|
VALUE errstr, exception;
|
65
65
|
xsltStylesheetPtr ss ;
|
66
66
|
Data_Get_Struct(xmldocobj, xmlDoc, xml);
|
67
|
-
exsltRegisterAll();
|
68
67
|
|
69
68
|
errstr = rb_str_new(0, 0);
|
70
69
|
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
|
@@ -257,6 +256,8 @@ noko_init_xslt_stylesheet()
|
|
257
256
|
|
258
257
|
cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
|
259
258
|
|
259
|
+
rb_undef_alloc_func(cNokogiriXsltStylesheet);
|
260
|
+
|
260
261
|
rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
|
261
262
|
rb_define_method(cNokogiriXsltStylesheet, "serialize", serialize, 1);
|
262
263
|
rb_define_method(cNokogiriXsltStylesheet, "transform", transform, -1);
|
@@ -0,0 +1,63 @@
|
|
1
|
+
## Gumbo 0.10.1 (2015-04-30)
|
2
|
+
|
3
|
+
Same as 0.10.0, but with the version number bumped because the last version-number commit to v0.9.4 makes GitHub think that v0.9.4 is the latest version and so it's not highlighted on the webpage.
|
4
|
+
|
5
|
+
## Gumbo 0.10.0 (2015-04-30)
|
6
|
+
|
7
|
+
* Full support for `<template>` tag (kevinhendricks, nostrademons).
|
8
|
+
* Some fixes for `<rtc>`/`<rt>` handling (kevinhendricks, vmg).
|
9
|
+
* All html5lib-trunk tests pass now! (kevinhendricks, vmg, nostrademons)
|
10
|
+
* Support for fragment parsing (vmg)
|
11
|
+
* A couple additional example programs (kevinhendricks)
|
12
|
+
* Performance improvements totaling an estimated 30-40% total improvement (vmg, nostrademons).
|
13
|
+
|
14
|
+
## Gumbo 0.9.4 (2015-04-30)
|
15
|
+
|
16
|
+
* Additional Visual Studio fixes (lowjoel, nostrademons)
|
17
|
+
* Fixed some unused variable warnings.
|
18
|
+
* Fix for glibtoolize vs. libtoolize build errors on Mac.
|
19
|
+
* Fixed `CDATA` end tag handling.
|
20
|
+
|
21
|
+
## Gumbo 0.9.3 (2015-02-17)
|
22
|
+
|
23
|
+
* Bugfix for `Æ` entities (rgrove)
|
24
|
+
* Fix `CDATA` handling; `CDATA` sections now generate a `GUMBO_NODE_CDATA` node rather
|
25
|
+
than plain text.
|
26
|
+
* Fix `get_title example` to handle whitespace nodes (gsnedders)
|
27
|
+
* Visual Studio compilation fixes (fishioon)
|
28
|
+
* Take the namespace into account when determining whether a node matches a
|
29
|
+
certain tag (aroben)
|
30
|
+
* Replace the varargs tag functions with a tagset bytevector, for a 20-30%
|
31
|
+
speedup in overall parse time (kevinhendricks, vmg)
|
32
|
+
* Add MacOS X support to Travis CI, and fix the deployment/DLL issues this
|
33
|
+
uncovered (nostrademons, kevinhendricks, vmg)
|
34
|
+
|
35
|
+
## Gumbo 0.9.2 (2014-09-21)
|
36
|
+
|
37
|
+
* Performance improvements: Ragel-based char ref decoder and DFA-based UTF8
|
38
|
+
decoder, totaling speedups of up to 300%.
|
39
|
+
* Added benchmarking program and some sample data.
|
40
|
+
* Fixed a compiler error under Visual Studio.
|
41
|
+
* Fix an error in the ctypes bindings that could lead to memory corruption in
|
42
|
+
the Python bindings.
|
43
|
+
* Fix duplicate attributes when parsing `<isindex>` tags.
|
44
|
+
* Don't leave semicolons behind when consuming entity references (rgrove)
|
45
|
+
* Internally rename some functions in preparation for an amalgamation file
|
46
|
+
(jdeng)
|
47
|
+
* Add proper cflags for gyp builds (skabbes)
|
48
|
+
|
49
|
+
## Gumbo 0.9.1 (2014-08-07)
|
50
|
+
|
51
|
+
* First version listed on PyPi.
|
52
|
+
* Autotools files excluded from GitHub and generated via autogen.sh. (endgame)
|
53
|
+
* Numerous compiler warnings fixed. (bnoordhuis, craigbarnes)
|
54
|
+
* Google security audit passed.
|
55
|
+
* Gyp support (tfarina)
|
56
|
+
* Naming convention for structs changed to avoid C reserved words.
|
57
|
+
* Fix several integer and buffer overflows (Maxime2)
|
58
|
+
* Some Visual Studio compiler support (bugparty)
|
59
|
+
* Python3 compatibility for the ctypes bindings.
|
60
|
+
|
61
|
+
## Gumbo 0.9.0 (2013-08-13)
|
62
|
+
|
63
|
+
* Initial release open-sourced by Google.
|
@@ -0,0 +1,101 @@
|
|
1
|
+
.PHONY: all clean check coverage
|
2
|
+
|
3
|
+
gumbo_objs := $(patsubst %.c,build/%.o,$(wildcard src/*.c))
|
4
|
+
test_objs := $(patsubst %.cc,build/%.o,$(wildcard test/*.cc))
|
5
|
+
gtest_lib := googletest/make/gtest_main.a
|
6
|
+
|
7
|
+
# make SANITIZEFLAGS='-fsanitize=undefined -fsanitize=address'
|
8
|
+
SANITIZEFLAGS :=
|
9
|
+
CPPFLAGS := -Isrc
|
10
|
+
CFLAGS := -std=c99 -Os -Wall
|
11
|
+
CXXFLAGS := -isystem googletest/include -std=c++11 -Os -Wall
|
12
|
+
LDFLAGS := -pthread
|
13
|
+
|
14
|
+
all: check
|
15
|
+
|
16
|
+
src/%.c: src/%.rl
|
17
|
+
ragel -F1 -o $@ $<
|
18
|
+
|
19
|
+
build/src:
|
20
|
+
mkdir -p $@
|
21
|
+
|
22
|
+
build/test:
|
23
|
+
mkdir -p $@
|
24
|
+
|
25
|
+
build/src/%.o: src/%.c build/src/flags | build/src
|
26
|
+
$(CC) -MMD $(CPPFLAGS) $(CFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
|
27
|
+
|
28
|
+
build/test/%.o: test/%.cc build/test/flags | build/test
|
29
|
+
$(CXX) -MMD $(CPPFLAGS) $(CXXFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
|
30
|
+
|
31
|
+
build/run_tests: $(gumbo_objs) $(test_objs) $(gtest_lib)
|
32
|
+
$(CXX) -o $@ $+ $(LDFLAGS) $(SANITIZEFLAGS)
|
33
|
+
|
34
|
+
check: build/run_tests
|
35
|
+
./build/run_tests
|
36
|
+
|
37
|
+
coverage:
|
38
|
+
$(RM) build/{src,test}/*.gcda
|
39
|
+
$(RM) build/*.info
|
40
|
+
$(MAKE) CPPFLAGS='-Isrc -DNDEBUG=1' \
|
41
|
+
CFLAGS='-std=c99 --coverage -g -O0' \
|
42
|
+
CXXFLAGS='-isystem googletest/include -std=c++11 --coverage -g -O0' \
|
43
|
+
LDFLAGS='--coverage' \
|
44
|
+
build/run_tests
|
45
|
+
lcov --no-external \
|
46
|
+
--initial \
|
47
|
+
--capture \
|
48
|
+
--base-directory . \
|
49
|
+
--directory build \
|
50
|
+
--output-file build/coverage-pre.info
|
51
|
+
awk -F '[:,]' \
|
52
|
+
'/^SF:/ { delete defs } /^FN:/ { defs[$$2]=1 } /^DA:/ { if ($$3 == 0 && $$2 in defs) next } { print }' \
|
53
|
+
build/coverage-pre.info > build/coverage-initial.info
|
54
|
+
./build/run_tests
|
55
|
+
lcov --no-external \
|
56
|
+
--capture \
|
57
|
+
--base-directory . \
|
58
|
+
--directory build \
|
59
|
+
--rc lcov_branch_coverage=1 \
|
60
|
+
--output-file build/coverage-test.info
|
61
|
+
lcov --add-tracefile build/coverage-initial.info \
|
62
|
+
--add-tracefile build/coverage-test.info \
|
63
|
+
--rc lcov_branch_coverage=1 \
|
64
|
+
--output-file build/coverage.info
|
65
|
+
lcov --remove build/coverage.info '$(CURDIR)/googletest/*' \
|
66
|
+
--rc lcov_branch_coverage=1 \
|
67
|
+
--output-file build/coverage.info
|
68
|
+
genhtml --branch-coverage \
|
69
|
+
--output-directory build/coverage \
|
70
|
+
build/coverage.info
|
71
|
+
|
72
|
+
clean:
|
73
|
+
$(RM) -r build
|
74
|
+
|
75
|
+
build/src/flags: | build/src
|
76
|
+
@echo 'old_CC := $(CC)' > $@
|
77
|
+
@echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
|
78
|
+
@echo 'old_CFLAGS := $(CFLAGS)' >>$@
|
79
|
+
@echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
|
80
|
+
@echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
|
81
|
+
|
82
|
+
build/test/flags: | build/test
|
83
|
+
@echo 'old_CXX := $(CXX)' > $@
|
84
|
+
@echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
|
85
|
+
@echo 'old_CXXFLAGS := $(CXXFLAGS)' >> $@
|
86
|
+
@echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
|
87
|
+
@echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
|
88
|
+
|
89
|
+
ifeq (,$(filter clean coverage,$(MAKECMDGOALS)))
|
90
|
+
# Ensure that the flags are up to date.
|
91
|
+
-include build/src/flags build/test/flags
|
92
|
+
ifneq ($(old_CC) | $(old_CPPFLAGS) | $(old_CFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CC) | $(CPPFLAGS) | $(CFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
|
93
|
+
.PHONY: build/src/flags
|
94
|
+
endif
|
95
|
+
ifneq ($(old_CXX) | $(old_CPPFLAGS) | $(old_CXXFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CXX) | $(CPPFLAGS) | $(CXXFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
|
96
|
+
.PHONY: build/test/flags
|
97
|
+
endif
|
98
|
+
|
99
|
+
# Include dependencies.
|
100
|
+
-include $(test_objs:.o=.d) $(gumbo_objs:.o=.d)
|
101
|
+
endif
|
data/gumbo-parser/THANKS
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Gumbo HTML parser THANKS file
|
2
|
+
|
3
|
+
Gumbo was originally written by Jonathan Tang, but many people helped out through suggestions, question-answering, code reviews, bugfixes, and organizational support. Here is a list of these people. Help me keep it complete and exempt of errors.
|
4
|
+
|
5
|
+
Adam Barth
|
6
|
+
Adam Roben
|
7
|
+
Ben Noordhuis
|
8
|
+
Bowen Han
|
9
|
+
Constantinos Michael
|
10
|
+
Craig Barnes
|
11
|
+
Geoffrey Sneddon
|
12
|
+
Ian Hickson
|
13
|
+
Jack Deng
|
14
|
+
Joel Low
|
15
|
+
Jonathan Shneier
|
16
|
+
Kevin Hendricks
|
17
|
+
Mason Tang
|
18
|
+
Maxim Zakharov
|
19
|
+
Michal Zalewski
|
20
|
+
Neal Norwitz
|
21
|
+
Othar Hansson
|
22
|
+
Ryan Grove
|
23
|
+
Stefan Haustein
|
24
|
+
Steffen Meschkat
|
25
|
+
Steven Kabbes
|
26
|
+
Thiago Farina
|
27
|
+
Vicent Marti
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# this Makefile is used by ext/nokogiri/extconf.rb
|
2
|
+
# to enable a mini_portile2 recipe to build the gumbo parser
|
3
|
+
.PHONY: clean
|
4
|
+
|
5
|
+
override CFLAGS += -std=c99 -Wall
|
6
|
+
|
7
|
+
# allow the ENV var to override this
|
8
|
+
RANLIB ?= ranlib
|
9
|
+
|
10
|
+
gumbo_objs := $(patsubst %.c,%.o,$(wildcard *.c))
|
11
|
+
|
12
|
+
libgumbo.a: $(gumbo_objs)
|
13
|
+
$(AR) $(ARFLAGS) $@ $^
|
14
|
+
- ($(RANLIB) $@ || true) >/dev/null 2>&1
|
15
|
+
|
16
|
+
clean:
|
17
|
+
rm -f $(gumbo_objs) libgumbo.a
|
@@ -0,0 +1,41 @@
|
|
1
|
+
libgumbo
|
2
|
+
========
|
3
|
+
|
4
|
+
This is an internal fork of the [libgumbo] library, which was copied and
|
5
|
+
later modified under the terms of the Apache 2.0 [license]. See `lua-gumbo`
|
6
|
+
commit [`0a04728`] for details of the original import.
|
7
|
+
|
8
|
+
Since importing the code, the following notable fixes and improvements
|
9
|
+
have been made:
|
10
|
+
|
11
|
+
* `91cef89`: Re-implement `adjust_foreign_attributes()` with a gperf hash
|
12
|
+
* `b11abe7`: Pass `TagSet` arrays into functions by reference instead of value
|
13
|
+
* `b73dc03`: Simplify `maybe_replace_codepoint()` function
|
14
|
+
* `d5d0bb3`: Remove special handling of `<menuitem>` tag
|
15
|
+
* `7bd5162`: Remove special handling of `<isindex>` tag
|
16
|
+
* `a5c1b0e`: Use `realloc(3)` instead of `malloc(3)` in `enlarge_vector_if_full()`
|
17
|
+
* `dcbebd7`: Use `realloc(3)` instead of `malloc(3)` in `maybe_resize_string_buffer()`
|
18
|
+
* `df15262`: Make `destroy_node()` function non-recursive
|
19
|
+
* `2df37f5`: Fix signedness of some format specifiers
|
20
|
+
* `176553e`: Add maximum element nesting limit
|
21
|
+
* `bed0f4a`: Annotate `gumbo_debug()` with `PRINTF` macro and fix warnings
|
22
|
+
* `7ffc218`: Annotate `print_message()` with `PRINTF` macro and fix warnings
|
23
|
+
* `1bd8ab5`, `9136507`, `53a1f9a`: Deduplicate some identical `TagSet` arrays
|
24
|
+
* `a7a9065`: Add some GCC/Clang function attributes
|
25
|
+
* `8d3d4e4`: Remove custom allocator support
|
26
|
+
* `8d3b006`: Fix recording of source positions for `</form>` end tags
|
27
|
+
* `1a8d763`: Replace linear search in `maybe_replace_codepoint()` with a lookup table
|
28
|
+
* `6dca79e`: Replace `strcasecmp()` and `strncasecmp()` with ascii-only equivalents
|
29
|
+
* `17ab1d2`: Fix `TAGSET_INCLUDES` macro to work properly with multiple bit flags
|
30
|
+
* `7e56d45`: Re-implement `gumbo_normalize_svg_tagname()` with a gperf hash
|
31
|
+
* `a518d35`: Replace linear array search in `adjust_svg_attributes()` with a gperf hash
|
32
|
+
* `a4a7433`: Fix duplicate `TagSet` initializer being ignored in `is_special_node()`
|
33
|
+
* `8137fcd`: Add support for `<dialog>` tag
|
34
|
+
* `4b35471`: Add missing `static` qualifiers to hide symbols that shouldn't be extern
|
35
|
+
* `df57c59`, `03101f3`, `ea62330`: Replace use of locale-dependant `ctype.h` functions
|
36
|
+
with custom, ASCII-only equivalents
|
37
|
+
|
38
|
+
|
39
|
+
[libgumbo]: https://github.com/google/gumbo-parser/tree/aa91b27b02c0c80c482e24348a457ed7c3c088e0/src
|
40
|
+
[license]: https://github.com/google/gumbo-parser/blob/aa91b27b02c0c80c482e24348a457ed7c3c088e0/COPYING
|
41
|
+
[`0a04728`]: https://gitlab.com/craigbarnes/lua-gumbo/commit/0a047282815af86f3367a7d95fefcfe5723ece48
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#include "ascii.h"
|
2
|
+
|
3
|
+
int gumbo_ascii_strcasecmp(const char *s1, const char *s2) {
|
4
|
+
int c1, c2;
|
5
|
+
while (*s1 && *s2) {
|
6
|
+
c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
|
7
|
+
c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
|
8
|
+
if (c1 != c2) {
|
9
|
+
return (c1 - c2);
|
10
|
+
}
|
11
|
+
s1++;
|
12
|
+
s2++;
|
13
|
+
}
|
14
|
+
return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
|
15
|
+
}
|
16
|
+
|
17
|
+
int gumbo_ascii_strncasecmp(const char *s1, const char *s2, size_t n) {
|
18
|
+
int c1, c2;
|
19
|
+
while (n && *s1 && *s2) {
|
20
|
+
n -= 1;
|
21
|
+
c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
|
22
|
+
c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
|
23
|
+
if (c1 != c2) {
|
24
|
+
return (c1 - c2);
|
25
|
+
}
|
26
|
+
s1++;
|
27
|
+
s2++;
|
28
|
+
}
|
29
|
+
if (n) {
|
30
|
+
return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
|
31
|
+
}
|
32
|
+
return 0;
|
33
|
+
}
|
34
|
+
|
35
|
+
const unsigned char _gumbo_ascii_table[0x80] = {
|
36
|
+
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x03,0x03,0x01,0x03,0x03,0x01,0x01,
|
37
|
+
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
|
38
|
+
0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
39
|
+
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00,
|
40
|
+
0x00,0x28,0x28,0x28,0x28,0x28,0x28,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
|
41
|
+
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x00,0x00,0x00,0x00,0x00,
|
42
|
+
0x00,0x50,0x50,0x50,0x50,0x50,0x50,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
43
|
+
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x00,0x00,0x00,0x00,0x00,
|
44
|
+
};
|
45
|
+
|
46
|
+
// Table generation code.
|
47
|
+
// clang -DGUMBO_GEN_TABLE=1 ascii.c && ./a.out && rm a.out
|
48
|
+
#if GUMBO_GEN_TABLE
|
49
|
+
#include <stdio.h>
|
50
|
+
|
51
|
+
int main() {
|
52
|
+
printf("const unsigned char _gumbo_ascii_table[0x80] = {");
|
53
|
+
for (int c = 0; c < 0x80; ++c) {
|
54
|
+
unsigned int x = 0;
|
55
|
+
// https://infra.spec.whatwg.org/#ascii-code-point
|
56
|
+
if (c <= 0x1f)
|
57
|
+
x |= GUMBO_ASCII_CNTRL;
|
58
|
+
if (c == 0x09 || c == 0x0a || c == 0x0c || c == 0x0d || c == 0x20)
|
59
|
+
x |= GUMBO_ASCII_SPACE;
|
60
|
+
if (c >= 0x30 && c <= 0x39)
|
61
|
+
x |= GUMBO_ASCII_DIGIT;
|
62
|
+
if ((c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x46))
|
63
|
+
x |= GUMBO_ASCII_UPPER_XDIGIT;
|
64
|
+
if ((c >= 0x30 && c <= 0x39) || (c >= 0x61 && c <= 0x66))
|
65
|
+
x |= GUMBO_ASCII_LOWER_XDIGIT;
|
66
|
+
if (c >= 0x41 && c <= 0x5a)
|
67
|
+
x |= GUMBO_ASCII_UPPER_ALPHA;
|
68
|
+
if (c >= 0x61 && c <= 0x7a)
|
69
|
+
x |= GUMBO_ASCII_LOWER_ALPHA;
|
70
|
+
printf("%s0x%02x,", (c % 16 == 0? "\n " : ""), x);
|
71
|
+
}
|
72
|
+
printf("\n};\n");
|
73
|
+
return 0;
|
74
|
+
}
|
75
|
+
#endif
|