nokogiri 1.11.3 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE-DEPENDENCIES.md +243 -22
  4. data/LICENSE.md +1 -1
  5. data/README.md +14 -11
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -64
  8. data/ext/nokogiri/depend +35 -34
  9. data/ext/nokogiri/extconf.rb +237 -133
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  12. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  13. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  14. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
  15. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  16. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  17. data/ext/nokogiri/nokogiri.c +70 -38
  18. data/ext/nokogiri/nokogiri.h +27 -9
  19. data/ext/nokogiri/xml_attr.c +2 -2
  20. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  21. data/ext/nokogiri/xml_cdata.c +1 -1
  22. data/ext/nokogiri/xml_document.c +50 -50
  23. data/ext/nokogiri/xml_document_fragment.c +0 -2
  24. data/ext/nokogiri/xml_dtd.c +10 -10
  25. data/ext/nokogiri/xml_element_content.c +2 -0
  26. data/ext/nokogiri/xml_element_decl.c +3 -3
  27. data/ext/nokogiri/xml_encoding_handler.c +31 -12
  28. data/ext/nokogiri/xml_entity_decl.c +5 -5
  29. data/ext/nokogiri/xml_namespace.c +4 -2
  30. data/ext/nokogiri/xml_node.c +833 -492
  31. data/ext/nokogiri/xml_node_set.c +24 -24
  32. data/ext/nokogiri/xml_reader.c +90 -11
  33. data/ext/nokogiri/xml_sax_parser.c +6 -6
  34. data/ext/nokogiri/xml_sax_parser_context.c +12 -3
  35. data/ext/nokogiri/xml_schema.c +5 -3
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +110 -85
  38. data/ext/nokogiri/xslt_stylesheet.c +109 -10
  39. data/gumbo-parser/CHANGES.md +63 -0
  40. data/gumbo-parser/Makefile +101 -0
  41. data/gumbo-parser/THANKS +27 -0
  42. data/gumbo-parser/src/Makefile +34 -0
  43. data/gumbo-parser/src/README.md +41 -0
  44. data/gumbo-parser/src/ascii.c +75 -0
  45. data/gumbo-parser/src/ascii.h +115 -0
  46. data/gumbo-parser/src/attribute.c +42 -0
  47. data/gumbo-parser/src/attribute.h +17 -0
  48. data/gumbo-parser/src/char_ref.c +22225 -0
  49. data/gumbo-parser/src/char_ref.h +29 -0
  50. data/gumbo-parser/src/char_ref.rl +2154 -0
  51. data/gumbo-parser/src/error.c +626 -0
  52. data/gumbo-parser/src/error.h +148 -0
  53. data/gumbo-parser/src/foreign_attrs.c +104 -0
  54. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  55. data/gumbo-parser/src/gumbo.h +943 -0
  56. data/gumbo-parser/src/insertion_mode.h +33 -0
  57. data/gumbo-parser/src/macros.h +91 -0
  58. data/gumbo-parser/src/parser.c +4875 -0
  59. data/gumbo-parser/src/parser.h +41 -0
  60. data/gumbo-parser/src/replacement.h +33 -0
  61. data/gumbo-parser/src/string_buffer.c +103 -0
  62. data/gumbo-parser/src/string_buffer.h +68 -0
  63. data/gumbo-parser/src/string_piece.c +48 -0
  64. data/gumbo-parser/src/svg_attrs.c +174 -0
  65. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  66. data/gumbo-parser/src/svg_tags.c +137 -0
  67. data/gumbo-parser/src/svg_tags.gperf +55 -0
  68. data/gumbo-parser/src/tag.c +222 -0
  69. data/gumbo-parser/src/tag_lookup.c +382 -0
  70. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  71. data/gumbo-parser/src/tag_lookup.h +13 -0
  72. data/gumbo-parser/src/token_buffer.c +79 -0
  73. data/gumbo-parser/src/token_buffer.h +71 -0
  74. data/gumbo-parser/src/token_type.h +17 -0
  75. data/gumbo-parser/src/tokenizer.c +3463 -0
  76. data/gumbo-parser/src/tokenizer.h +112 -0
  77. data/gumbo-parser/src/tokenizer_states.h +339 -0
  78. data/gumbo-parser/src/utf8.c +245 -0
  79. data/gumbo-parser/src/utf8.h +164 -0
  80. data/gumbo-parser/src/util.c +68 -0
  81. data/gumbo-parser/src/util.h +30 -0
  82. data/gumbo-parser/src/vector.c +111 -0
  83. data/gumbo-parser/src/vector.h +45 -0
  84. data/lib/nokogiri/class_resolver.rb +67 -0
  85. data/lib/nokogiri/css/node.rb +9 -8
  86. data/lib/nokogiri/css/parser.rb +361 -342
  87. data/lib/nokogiri/css/parser.y +250 -245
  88. data/lib/nokogiri/css/parser_extras.rb +22 -20
  89. data/lib/nokogiri/css/syntax_error.rb +2 -1
  90. data/lib/nokogiri/css/tokenizer.rb +4 -3
  91. data/lib/nokogiri/css/tokenizer.rex +3 -2
  92. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  93. data/lib/nokogiri/css.rb +49 -17
  94. data/lib/nokogiri/decorators/slop.rb +8 -7
  95. data/lib/nokogiri/extension.rb +8 -3
  96. data/lib/nokogiri/gumbo.rb +15 -0
  97. data/lib/nokogiri/html.rb +37 -27
  98. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +92 -81
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
  101. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  102. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
  105. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
  107. data/lib/nokogiri/html4.rb +46 -0
  108. data/lib/nokogiri/html5/document.rb +91 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  110. data/lib/nokogiri/html5/node.rb +100 -0
  111. data/lib/nokogiri/html5.rb +478 -0
  112. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  113. data/lib/nokogiri/syntax_error.rb +1 -0
  114. data/lib/nokogiri/version/constant.rb +2 -1
  115. data/lib/nokogiri/version/info.rb +31 -14
  116. data/lib/nokogiri/version.rb +1 -0
  117. data/lib/nokogiri/xml/attr.rb +5 -3
  118. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  119. data/lib/nokogiri/xml/builder.rb +71 -31
  120. data/lib/nokogiri/xml/cdata.rb +2 -1
  121. data/lib/nokogiri/xml/character_data.rb +1 -0
  122. data/lib/nokogiri/xml/document.rb +183 -96
  123. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  124. data/lib/nokogiri/xml/dtd.rb +3 -2
  125. data/lib/nokogiri/xml/element_content.rb +1 -0
  126. data/lib/nokogiri/xml/element_decl.rb +2 -1
  127. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  128. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  129. data/lib/nokogiri/xml/namespace.rb +2 -0
  130. data/lib/nokogiri/xml/node/save_options.rb +9 -5
  131. data/lib/nokogiri/xml/node.rb +525 -354
  132. data/lib/nokogiri/xml/node_set.rb +50 -54
  133. data/lib/nokogiri/xml/notation.rb +12 -0
  134. data/lib/nokogiri/xml/parse_options.rb +13 -6
  135. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  136. data/lib/nokogiri/xml/pp/node.rb +24 -26
  137. data/lib/nokogiri/xml/pp.rb +3 -2
  138. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  139. data/lib/nokogiri/xml/reader.rb +20 -24
  140. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  141. data/lib/nokogiri/xml/sax/document.rb +44 -49
  142. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  143. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  144. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  145. data/lib/nokogiri/xml/sax.rb +5 -4
  146. data/lib/nokogiri/xml/schema.rb +7 -6
  147. data/lib/nokogiri/xml/searchable.rb +93 -62
  148. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  149. data/lib/nokogiri/xml/text.rb +1 -0
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  151. data/lib/nokogiri/xml/xpath.rb +13 -1
  152. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  153. data/lib/nokogiri/xml.rb +37 -37
  154. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  155. data/lib/nokogiri/xslt.rb +28 -20
  156. data/lib/nokogiri.rb +48 -43
  157. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  158. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  159. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  160. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  161. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
  162. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  163. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  164. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  165. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  166. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  167. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  168. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  169. metadata +204 -93
  170. data/lib/nokogiri/html/element_description_defaults.rb +0 -672
  171. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  172. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  173. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  174. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  175. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  176. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  177. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
  178. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  179. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -64,7 +64,6 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
64
64
  VALUE errstr, exception;
65
65
  xsltStylesheetPtr ss ;
66
66
  Data_Get_Struct(xmldocobj, xmlDoc, xml);
67
- exsltRegisterAll();
68
67
 
69
68
  errstr = rb_str_new(0, 0);
70
69
  xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
@@ -108,19 +107,117 @@ serialize(VALUE self, VALUE xmlobj)
108
107
  }
109
108
 
110
109
  /*
111
- * call-seq:
112
- * transform(document, params = [])
110
+ * call-seq:
111
+ * transform(document)
112
+ * transform(document, params = {})
113
+ *
114
+ * Apply an XSLT stylesheet to an XML::Document.
115
+ *
116
+ * [Parameters]
117
+ * - +document+ (Nokogiri::XML::Document) the document to be transformed.
118
+ * - +params+ (Hash, Array) strings used as XSLT parameters.
119
+ *
120
+ * [Returns] Nokogiri::XML::Document
121
+ *
122
+ * *Example* of basic transformation:
123
+ *
124
+ * xslt = <<~XSLT
125
+ * <xsl:stylesheet version="1.0"
126
+ * xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
127
+ *
128
+ * <xsl:param name="title"/>
129
+ *
130
+ * <xsl:template match="/">
131
+ * <html>
132
+ * <body>
133
+ * <h1><xsl:value-of select="$title"/></h1>
134
+ * <ol>
135
+ * <xsl:for-each select="staff/employee">
136
+ * <li><xsl:value-of select="employeeId"></li>
137
+ * </xsl:for-each>
138
+ * </ol>
139
+ * </body>
140
+ * </html>
141
+ * </xsl:stylesheet>
142
+ * XSLT
143
+ *
144
+ * xml = <<~XML
145
+ * <?xml version="1.0"?>
146
+ * <staff>
147
+ * <employee>
148
+ * <employeeId>EMP0001</employeeId>
149
+ * <position>Accountant</position>
150
+ * </employee>
151
+ * <employee>
152
+ * <employeeId>EMP0002</employeeId>
153
+ * <position>Developer</position>
154
+ * </employee>
155
+ * </staff>
156
+ * XML
157
+ *
158
+ * doc = Nokogiri::XML::Document.parse(xml)
159
+ * stylesheet = Nokogiri::XSLT.parse(xslt)
113
160
  *
114
- * Apply an XSLT stylesheet to an XML::Document.
115
- * +params+ is an array of strings used as XSLT parameters.
116
- * returns Nokogiri::XML::Document
161
+ * Note that the +h1+ element is empty because no param has been provided!
117
162
  *
118
- * Example:
163
+ * stylesheet.transform(doc).to_xml
164
+ * # => "<html><body>\n" +
165
+ * # "<h1></h1>\n" +
166
+ * # "<ol>\n" +
167
+ * # "<li>EMP0001</li>\n" +
168
+ * # "<li>EMP0002</li>\n" +
169
+ * # "</ol>\n" +
170
+ * # "</body></html>\n"
119
171
  *
120
- * doc = Nokogiri::XML(File.read(ARGV[0]))
121
- * xslt = Nokogiri::XSLT(File.read(ARGV[1]))
122
- * puts xslt.transform(doc, ['key', 'value'])
172
+ * *Example* of using an input parameter hash:
123
173
  *
174
+ * ⚠ The title is populated, but note how we need to quote-escape the value.
175
+ *
176
+ * stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml
177
+ * # => "<html><body>\n" +
178
+ * # "<h1>Employee List</h1>\n" +
179
+ * # "<ol>\n" +
180
+ * # "<li>EMP0001</li>\n" +
181
+ * # "<li>EMP0002</li>\n" +
182
+ * # "</ol>\n" +
183
+ * # "</body></html>\n"
184
+ *
185
+ * *Example* using the XSLT.quote_params helper method to safely quote-escape strings:
186
+ *
187
+ * stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml
188
+ * # => "<html><body>\n" +
189
+ * # "<h1>Aaron's List</h1>\n" +
190
+ * # "<ol>\n" +
191
+ * # "<li>EMP0001</li>\n" +
192
+ * # "<li>EMP0002</li>\n" +
193
+ * # "</ol>\n" +
194
+ * # "</body></html>\n"
195
+ *
196
+ * *Example* using an array of XSLT parameters
197
+ *
198
+ * You can also use an array if you want to.
199
+ *
200
+ * stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml
201
+ * # => "<html><body>\n" +
202
+ * # "<h1>Employee List</h1>\n" +
203
+ * # "<ol>\n" +
204
+ * # "<li>EMP0001</li>\n" +
205
+ * # "<li>EMP0002</li>\n" +
206
+ * # "</ol>\n" +
207
+ * # "</body></html>\n"
208
+ *
209
+ * Or pass an array to XSLT.quote_params:
210
+ *
211
+ * stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml
212
+ * # => "<html><body>\n" +
213
+ * # "<h1>Aaron's List</h1>\n" +
214
+ * # "<ol>\n" +
215
+ * # "<li>EMP0001</li>\n" +
216
+ * # "<li>EMP0002</li>\n" +
217
+ * # "</ol>\n" +
218
+ * # "</body></html>\n"
219
+ *
220
+ * See: Nokogiri::XSLT.quote_params
124
221
  */
125
222
  static VALUE
126
223
  transform(int argc, VALUE *argv, VALUE self)
@@ -257,6 +354,8 @@ noko_init_xslt_stylesheet()
257
354
 
258
355
  cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
259
356
 
357
+ rb_undef_alloc_func(cNokogiriXsltStylesheet);
358
+
260
359
  rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
261
360
  rb_define_method(cNokogiriXsltStylesheet, "serialize", serialize, 1);
262
361
  rb_define_method(cNokogiriXsltStylesheet, "transform", transform, -1);
@@ -0,0 +1,63 @@
1
+ ## Gumbo 0.10.1 (2015-04-30)
2
+
3
+ Same as 0.10.0, but with the version number bumped because the last version-number commit to v0.9.4 makes GitHub think that v0.9.4 is the latest version and so it's not highlighted on the webpage.
4
+
5
+ ## Gumbo 0.10.0 (2015-04-30)
6
+
7
+ * Full support for `<template>` tag (kevinhendricks, nostrademons).
8
+ * Some fixes for `<rtc>`/`<rt>` handling (kevinhendricks, vmg).
9
+ * All html5lib-trunk tests pass now! (kevinhendricks, vmg, nostrademons)
10
+ * Support for fragment parsing (vmg)
11
+ * A couple additional example programs (kevinhendricks)
12
+ * Performance improvements totaling an estimated 30-40% total improvement (vmg, nostrademons).
13
+
14
+ ## Gumbo 0.9.4 (2015-04-30)
15
+
16
+ * Additional Visual Studio fixes (lowjoel, nostrademons)
17
+ * Fixed some unused variable warnings.
18
+ * Fix for glibtoolize vs. libtoolize build errors on Mac.
19
+ * Fixed `CDATA` end tag handling.
20
+
21
+ ## Gumbo 0.9.3 (2015-02-17)
22
+
23
+ * Bugfix for `&AElig;` entities (rgrove)
24
+ * Fix `CDATA` handling; `CDATA` sections now generate a `GUMBO_NODE_CDATA` node rather
25
+ than plain text.
26
+ * Fix `get_title example` to handle whitespace nodes (gsnedders)
27
+ * Visual Studio compilation fixes (fishioon)
28
+ * Take the namespace into account when determining whether a node matches a
29
+ certain tag (aroben)
30
+ * Replace the varargs tag functions with a tagset bytevector, for a 20-30%
31
+ speedup in overall parse time (kevinhendricks, vmg)
32
+ * Add MacOS X support to Travis CI, and fix the deployment/DLL issues this
33
+ uncovered (nostrademons, kevinhendricks, vmg)
34
+
35
+ ## Gumbo 0.9.2 (2014-09-21)
36
+
37
+ * Performance improvements: Ragel-based char ref decoder and DFA-based UTF8
38
+ decoder, totaling speedups of up to 300%.
39
+ * Added benchmarking program and some sample data.
40
+ * Fixed a compiler error under Visual Studio.
41
+ * Fix an error in the ctypes bindings that could lead to memory corruption in
42
+ the Python bindings.
43
+ * Fix duplicate attributes when parsing `<isindex>` tags.
44
+ * Don't leave semicolons behind when consuming entity references (rgrove)
45
+ * Internally rename some functions in preparation for an amalgamation file
46
+ (jdeng)
47
+ * Add proper cflags for gyp builds (skabbes)
48
+
49
+ ## Gumbo 0.9.1 (2014-08-07)
50
+
51
+ * First version listed on PyPi.
52
+ * Autotools files excluded from GitHub and generated via autogen.sh. (endgame)
53
+ * Numerous compiler warnings fixed. (bnoordhuis, craigbarnes)
54
+ * Google security audit passed.
55
+ * Gyp support (tfarina)
56
+ * Naming convention for structs changed to avoid C reserved words.
57
+ * Fix several integer and buffer overflows (Maxime2)
58
+ * Some Visual Studio compiler support (bugparty)
59
+ * Python3 compatibility for the ctypes bindings.
60
+
61
+ ## Gumbo 0.9.0 (2013-08-13)
62
+
63
+ * Initial release open-sourced by Google.
@@ -0,0 +1,101 @@
1
+ .PHONY: all clean check coverage
2
+
3
+ gumbo_objs := $(patsubst %.c,build/%.o,$(wildcard src/*.c))
4
+ test_objs := $(patsubst %.cc,build/%.o,$(wildcard test/*.cc))
5
+ gtest_lib := googletest/make/gtest_main.a
6
+
7
+ # make SANITIZEFLAGS='-fsanitize=undefined -fsanitize=address'
8
+ SANITIZEFLAGS :=
9
+ CPPFLAGS := -Isrc
10
+ CFLAGS := -std=c99 -Os -Wall
11
+ CXXFLAGS := -isystem googletest/include -std=c++11 -Os -Wall
12
+ LDFLAGS := -pthread
13
+
14
+ all: check
15
+
16
+ src/%.c: src/%.rl
17
+ ragel -F1 -o $@ $<
18
+
19
+ build/src:
20
+ mkdir -p $@
21
+
22
+ build/test:
23
+ mkdir -p $@
24
+
25
+ build/src/%.o: src/%.c build/src/flags | build/src
26
+ $(CC) -MMD $(CPPFLAGS) $(CFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
27
+
28
+ build/test/%.o: test/%.cc build/test/flags | build/test
29
+ $(CXX) -MMD $(CPPFLAGS) $(CXXFLAGS) $(SANITIZEFLAGS) -c -o $@ $<
30
+
31
+ build/run_tests: $(gumbo_objs) $(test_objs) $(gtest_lib)
32
+ $(CXX) -o $@ $+ $(LDFLAGS) $(SANITIZEFLAGS)
33
+
34
+ check: build/run_tests
35
+ ./build/run_tests
36
+
37
+ coverage:
38
+ $(RM) build/{src,test}/*.gcda
39
+ $(RM) build/*.info
40
+ $(MAKE) CPPFLAGS='-Isrc -DNDEBUG=1' \
41
+ CFLAGS='-std=c99 --coverage -g -O0' \
42
+ CXXFLAGS='-isystem googletest/include -std=c++11 --coverage -g -O0' \
43
+ LDFLAGS='--coverage' \
44
+ build/run_tests
45
+ lcov --no-external \
46
+ --initial \
47
+ --capture \
48
+ --base-directory . \
49
+ --directory build \
50
+ --output-file build/coverage-pre.info
51
+ awk -F '[:,]' \
52
+ '/^SF:/ { delete defs } /^FN:/ { defs[$$2]=1 } /^DA:/ { if ($$3 == 0 && $$2 in defs) next } { print }' \
53
+ build/coverage-pre.info > build/coverage-initial.info
54
+ ./build/run_tests
55
+ lcov --no-external \
56
+ --capture \
57
+ --base-directory . \
58
+ --directory build \
59
+ --rc lcov_branch_coverage=1 \
60
+ --output-file build/coverage-test.info
61
+ lcov --add-tracefile build/coverage-initial.info \
62
+ --add-tracefile build/coverage-test.info \
63
+ --rc lcov_branch_coverage=1 \
64
+ --output-file build/coverage.info
65
+ lcov --remove build/coverage.info '$(CURDIR)/googletest/*' \
66
+ --rc lcov_branch_coverage=1 \
67
+ --output-file build/coverage.info
68
+ genhtml --branch-coverage \
69
+ --output-directory build/coverage \
70
+ build/coverage.info
71
+
72
+ clean:
73
+ $(RM) -r build
74
+
75
+ build/src/flags: | build/src
76
+ @echo 'old_CC := $(CC)' > $@
77
+ @echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
78
+ @echo 'old_CFLAGS := $(CFLAGS)' >>$@
79
+ @echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
80
+ @echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
81
+
82
+ build/test/flags: | build/test
83
+ @echo 'old_CXX := $(CXX)' > $@
84
+ @echo 'old_CPPFLAGS := $(CPPFLAGS)' >> $@
85
+ @echo 'old_CXXFLAGS := $(CXXFLAGS)' >> $@
86
+ @echo 'old_SANITIZEFLAGS := $(SANITIZEFLAGS)' >> $@
87
+ @echo 'old_LDFLAGS := $(LDFLAGS)' >> $@
88
+
89
+ ifeq (,$(filter clean coverage,$(MAKECMDGOALS)))
90
+ # Ensure that the flags are up to date.
91
+ -include build/src/flags build/test/flags
92
+ ifneq ($(old_CC) | $(old_CPPFLAGS) | $(old_CFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CC) | $(CPPFLAGS) | $(CFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
93
+ .PHONY: build/src/flags
94
+ endif
95
+ ifneq ($(old_CXX) | $(old_CPPFLAGS) | $(old_CXXFLAGS) | $(old_SANITIZEFLAGS) | $(old_LDFLAGS),$(CXX) | $(CPPFLAGS) | $(CXXFLAGS) | $(SANITIZEFLAGS) | $(LDFLAGS))
96
+ .PHONY: build/test/flags
97
+ endif
98
+
99
+ # Include dependencies.
100
+ -include $(test_objs:.o=.d) $(gumbo_objs:.o=.d)
101
+ endif
@@ -0,0 +1,27 @@
1
+ Gumbo HTML parser THANKS file
2
+
3
+ Gumbo was originally written by Jonathan Tang, but many people helped out through suggestions, question-answering, code reviews, bugfixes, and organizational support. Here is a list of these people. Help me keep it complete and exempt of errors.
4
+
5
+ Adam Barth
6
+ Adam Roben
7
+ Ben Noordhuis
8
+ Bowen Han
9
+ Constantinos Michael
10
+ Craig Barnes
11
+ Geoffrey Sneddon
12
+ Ian Hickson
13
+ Jack Deng
14
+ Joel Low
15
+ Jonathan Shneier
16
+ Kevin Hendricks
17
+ Mason Tang
18
+ Maxim Zakharov
19
+ Michal Zalewski
20
+ Neal Norwitz
21
+ Othar Hansson
22
+ Ryan Grove
23
+ Stefan Haustein
24
+ Steffen Meschkat
25
+ Steven Kabbes
26
+ Thiago Farina
27
+ Vicent Marti
@@ -0,0 +1,34 @@
1
+ # this Makefile is used by ext/nokogiri/extconf.rb
2
+ # to enable a mini_portile2 recipe to build the gumbo parser
3
+ .PHONY: clean
4
+
5
+ CFLAGS += -std=c99 -Wall
6
+
7
+ # allow the ENV var to override this
8
+ RANLIB ?= ranlib
9
+
10
+ gumbo_objs := \
11
+ ascii.o \
12
+ attribute.o \
13
+ char_ref.o \
14
+ error.o \
15
+ foreign_attrs.o \
16
+ parser.o \
17
+ string_buffer.o \
18
+ string_piece.o \
19
+ svg_attrs.o \
20
+ svg_tags.o \
21
+ tag.o \
22
+ tag_lookup.o \
23
+ token_buffer.o \
24
+ tokenizer.o \
25
+ utf8.o \
26
+ util.o \
27
+ vector.o
28
+
29
+ libgumbo.a: $(gumbo_objs)
30
+ $(AR) $(ARFLAGS) $@ $(gumbo_objs)
31
+ - ($(RANLIB) $@ || true) >/dev/null 2>&1
32
+
33
+ clean:
34
+ rm -f $(gumbo_objs) libgumbo.a
@@ -0,0 +1,41 @@
1
+ libgumbo
2
+ ========
3
+
4
+ This is an internal fork of the [libgumbo] library, which was copied and
5
+ later modified under the terms of the Apache 2.0 [license]. See `lua-gumbo`
6
+ commit [`0a04728`] for details of the original import.
7
+
8
+ Since importing the code, the following notable fixes and improvements
9
+ have been made:
10
+
11
+ * `91cef89`: Re-implement `adjust_foreign_attributes()` with a gperf hash
12
+ * `b11abe7`: Pass `TagSet` arrays into functions by reference instead of value
13
+ * `b73dc03`: Simplify `maybe_replace_codepoint()` function
14
+ * `d5d0bb3`: Remove special handling of `<menuitem>` tag
15
+ * `7bd5162`: Remove special handling of `<isindex>` tag
16
+ * `a5c1b0e`: Use `realloc(3)` instead of `malloc(3)` in `enlarge_vector_if_full()`
17
+ * `dcbebd7`: Use `realloc(3)` instead of `malloc(3)` in `maybe_resize_string_buffer()`
18
+ * `df15262`: Make `destroy_node()` function non-recursive
19
+ * `2df37f5`: Fix signedness of some format specifiers
20
+ * `176553e`: Add maximum element nesting limit
21
+ * `bed0f4a`: Annotate `gumbo_debug()` with `PRINTF` macro and fix warnings
22
+ * `7ffc218`: Annotate `print_message()` with `PRINTF` macro and fix warnings
23
+ * `1bd8ab5`, `9136507`, `53a1f9a`: Deduplicate some identical `TagSet` arrays
24
+ * `a7a9065`: Add some GCC/Clang function attributes
25
+ * `8d3d4e4`: Remove custom allocator support
26
+ * `8d3b006`: Fix recording of source positions for `</form>` end tags
27
+ * `1a8d763`: Replace linear search in `maybe_replace_codepoint()` with a lookup table
28
+ * `6dca79e`: Replace `strcasecmp()` and `strncasecmp()` with ascii-only equivalents
29
+ * `17ab1d2`: Fix `TAGSET_INCLUDES` macro to work properly with multiple bit flags
30
+ * `7e56d45`: Re-implement `gumbo_normalize_svg_tagname()` with a gperf hash
31
+ * `a518d35`: Replace linear array search in `adjust_svg_attributes()` with a gperf hash
32
+ * `a4a7433`: Fix duplicate `TagSet` initializer being ignored in `is_special_node()`
33
+ * `8137fcd`: Add support for `<dialog>` tag
34
+ * `4b35471`: Add missing `static` qualifiers to hide symbols that shouldn't be extern
35
+ * `df57c59`, `03101f3`, `ea62330`: Replace use of locale-dependant `ctype.h` functions
36
+ with custom, ASCII-only equivalents
37
+
38
+
39
+ [libgumbo]: https://github.com/google/gumbo-parser/tree/aa91b27b02c0c80c482e24348a457ed7c3c088e0/src
40
+ [license]: https://github.com/google/gumbo-parser/blob/aa91b27b02c0c80c482e24348a457ed7c3c088e0/COPYING
41
+ [`0a04728`]: https://gitlab.com/craigbarnes/lua-gumbo/commit/0a047282815af86f3367a7d95fefcfe5723ece48
@@ -0,0 +1,75 @@
1
+ #include "ascii.h"
2
+
3
+ int gumbo_ascii_strcasecmp(const char *s1, const char *s2) {
4
+ int c1, c2;
5
+ while (*s1 && *s2) {
6
+ c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
7
+ c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
8
+ if (c1 != c2) {
9
+ return (c1 - c2);
10
+ }
11
+ s1++;
12
+ s2++;
13
+ }
14
+ return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
15
+ }
16
+
17
+ int gumbo_ascii_strncasecmp(const char *s1, const char *s2, size_t n) {
18
+ int c1, c2;
19
+ while (n && *s1 && *s2) {
20
+ n -= 1;
21
+ c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
22
+ c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
23
+ if (c1 != c2) {
24
+ return (c1 - c2);
25
+ }
26
+ s1++;
27
+ s2++;
28
+ }
29
+ if (n) {
30
+ return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
31
+ }
32
+ return 0;
33
+ }
34
+
35
+ const unsigned char _gumbo_ascii_table[0x80] = {
36
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x03,0x03,0x01,0x03,0x03,0x01,0x01,
37
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
38
+ 0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
39
+ 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00,
40
+ 0x00,0x28,0x28,0x28,0x28,0x28,0x28,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
41
+ 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x00,0x00,0x00,0x00,0x00,
42
+ 0x00,0x50,0x50,0x50,0x50,0x50,0x50,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
43
+ 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x00,0x00,0x00,0x00,0x00,
44
+ };
45
+
46
+ // Table generation code.
47
+ // clang -DGUMBO_GEN_TABLE=1 ascii.c && ./a.out && rm a.out
48
+ #if GUMBO_GEN_TABLE
49
+ #include <stdio.h>
50
+
51
+ int main() {
52
+ printf("const unsigned char _gumbo_ascii_table[0x80] = {");
53
+ for (int c = 0; c < 0x80; ++c) {
54
+ unsigned int x = 0;
55
+ // https://infra.spec.whatwg.org/#ascii-code-point
56
+ if (c <= 0x1f)
57
+ x |= GUMBO_ASCII_CNTRL;
58
+ if (c == 0x09 || c == 0x0a || c == 0x0c || c == 0x0d || c == 0x20)
59
+ x |= GUMBO_ASCII_SPACE;
60
+ if (c >= 0x30 && c <= 0x39)
61
+ x |= GUMBO_ASCII_DIGIT;
62
+ if ((c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x46))
63
+ x |= GUMBO_ASCII_UPPER_XDIGIT;
64
+ if ((c >= 0x30 && c <= 0x39) || (c >= 0x61 && c <= 0x66))
65
+ x |= GUMBO_ASCII_LOWER_XDIGIT;
66
+ if (c >= 0x41 && c <= 0x5a)
67
+ x |= GUMBO_ASCII_UPPER_ALPHA;
68
+ if (c >= 0x61 && c <= 0x7a)
69
+ x |= GUMBO_ASCII_LOWER_ALPHA;
70
+ printf("%s0x%02x,", (c % 16 == 0? "\n " : ""), x);
71
+ }
72
+ printf("\n};\n");
73
+ return 0;
74
+ }
75
+ #endif
@@ -0,0 +1,115 @@
1
+ #ifndef GUMBO_ASCII_H_
2
+ #define GUMBO_ASCII_H_
3
+
4
+ #include <stddef.h>
5
+ #include "macros.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ PURE NONNULL_ARGS
12
+ int gumbo_ascii_strcasecmp(const char *s1, const char *s2);
13
+
14
+ PURE NONNULL_ARGS
15
+ int gumbo_ascii_strncasecmp(const char *s1, const char *s2, size_t n);
16
+
17
+ // If these values change, then _gumbo_ascii_table needs to be regenerated.
18
+ #define GUMBO_ASCII_CNTRL 1
19
+ #define GUMBO_ASCII_SPACE 2
20
+ #define GUMBO_ASCII_DIGIT 4
21
+ #define GUMBO_ASCII_UPPER_XDIGIT 8
22
+ #define GUMBO_ASCII_LOWER_XDIGIT 16
23
+ #define GUMBO_ASCII_UPPER_ALPHA 32
24
+ #define GUMBO_ASCII_LOWER_ALPHA 64
25
+ #define GUMBO_ASCII_XDIGIT (GUMBO_ASCII_LOWER_XDIGIT | GUMBO_ASCII_UPPER_XDIGIT)
26
+ #define GUMBO_ASCII_ALPHA (GUMBO_ASCII_UPPER_ALPHA | GUMBO_ASCII_LOWER_ALPHA)
27
+ #define GUMBO_ASCII_ALNUM (GUMBO_ASCII_DIGIT | GUMBO_ASCII_ALPHA)
28
+
29
+ extern const unsigned char _gumbo_ascii_table[0x80];
30
+
31
+ CONST_FN
32
+ static inline int gumbo_ascii_isascii(int c) {
33
+ return ((unsigned int)c & ~0x7fu) == 0;
34
+ }
35
+
36
+ // 0x00 -- 0x1F (A C0 control)
37
+ CONST_FN
38
+ static inline int gumbo_ascii_iscntrl(int c) {
39
+ return gumbo_ascii_isascii(c)
40
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_CNTRL);
41
+ }
42
+
43
+ // 0x09, 0x0a, 0x0c, 0x0d, 0x20
44
+ CONST_FN
45
+ static inline int gumbo_ascii_isspace(int c) {
46
+ return gumbo_ascii_isascii(c)
47
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_SPACE);
48
+ }
49
+
50
+ CONST_FN
51
+ static inline int gumbo_ascii_istab_or_newline(int c) {
52
+ return c == 0x09 || c == 0x0a || c == 0x0d;
53
+ }
54
+
55
+
56
+ CONST_FN
57
+ static inline int gumbo_ascii_isdigit(int c) {
58
+ return c >= 0x30 && c <= 0x39;
59
+ }
60
+
61
+ CONST_FN
62
+ static inline int gumbo_ascii_isalpha(int c) {
63
+ return gumbo_ascii_isascii(c)
64
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_ALPHA);
65
+ }
66
+
67
+ CONST_FN
68
+ static inline int gumbo_ascii_isxdigit(int c) {
69
+ return gumbo_ascii_isascii(c)
70
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_XDIGIT);
71
+ }
72
+
73
+ CONST_FN
74
+ static inline int gumbo_ascii_isupper_xdigit(int c) {
75
+ return gumbo_ascii_isascii(c)
76
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_UPPER_XDIGIT);
77
+ }
78
+
79
+ CONST_FN
80
+ static inline int gumbo_ascii_islower_xdigit(int c) {
81
+ return gumbo_ascii_isascii(c)
82
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_LOWER_XDIGIT);
83
+ }
84
+
85
+ CONST_FN
86
+ static inline int gumbo_ascii_isupper(int c) {
87
+ return ((unsigned)(c) - 'A') < 26;
88
+ }
89
+
90
+ CONST_FN
91
+ static inline int gumbo_ascii_islower(int c) {
92
+ return gumbo_ascii_isascii(c)
93
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_LOWER_ALPHA);
94
+ }
95
+
96
+ CONST_FN
97
+ static inline int gumbo_ascii_isalnum(int c) {
98
+ return gumbo_ascii_isascii(c)
99
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_ALNUM);
100
+ }
101
+
102
+
103
+ CONST_FN
104
+ static inline int gumbo_ascii_tolower(int c) {
105
+ if (gumbo_ascii_isupper(c)) {
106
+ return c | 32;
107
+ }
108
+ return c;
109
+ }
110
+
111
+ #ifdef __cplusplus
112
+ }
113
+ #endif
114
+
115
+ #endif // GUMBO_ASCII_H_
@@ -0,0 +1,42 @@
1
+ /*
2
+ Copyright 2018 Craig Barnes.
3
+ Copyright 2010 Google Inc.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ https://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ */
17
+
18
+ #include <assert.h>
19
+ #include <stdlib.h>
20
+ #include <string.h>
21
+ #include "attribute.h"
22
+ #include "ascii.h"
23
+ #include "util.h"
24
+
25
+ GumboAttribute* gumbo_get_attribute (
26
+ const GumboVector* attributes,
27
+ const char* name
28
+ ) {
29
+ for (unsigned int i = 0; i < attributes->length; ++i) {
30
+ GumboAttribute* attr = attributes->data[i];
31
+ if (!gumbo_ascii_strcasecmp(attr->name, name)) {
32
+ return attr;
33
+ }
34
+ }
35
+ return NULL;
36
+ }
37
+
38
+ void gumbo_destroy_attribute(GumboAttribute* attribute) {
39
+ gumbo_free((void*) attribute->name);
40
+ gumbo_free((void*) attribute->value);
41
+ gumbo_free((void*) attribute);
42
+ }
@@ -0,0 +1,17 @@
1
+ #ifndef GUMBO_ATTRIBUTE_H_
2
+ #define GUMBO_ATTRIBUTE_H_
3
+
4
+ #include "gumbo.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ // Release the memory used for a GumboAttribute, including the attribute itself
11
+ void gumbo_destroy_attribute(GumboAttribute* attribute);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
17
+ #endif // GUMBO_ATTRIBUTE_H_