Nokogiri_precompiled_aarch64_dedshit 1.14.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +44 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/java/nokogiri/Html4Document.java +157 -0
  9. data/ext/java/nokogiri/Html4ElementDescription.java +133 -0
  10. data/ext/java/nokogiri/Html4EntityLookup.java +63 -0
  11. data/ext/java/nokogiri/Html4SaxParserContext.java +289 -0
  12. data/ext/java/nokogiri/Html4SaxPushParser.java +213 -0
  13. data/ext/java/nokogiri/NokogiriService.java +613 -0
  14. data/ext/java/nokogiri/XmlAttr.java +154 -0
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +119 -0
  16. data/ext/java/nokogiri/XmlCdata.java +60 -0
  17. data/ext/java/nokogiri/XmlComment.java +77 -0
  18. data/ext/java/nokogiri/XmlDocument.java +705 -0
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +163 -0
  20. data/ext/java/nokogiri/XmlDtd.java +516 -0
  21. data/ext/java/nokogiri/XmlElement.java +44 -0
  22. data/ext/java/nokogiri/XmlElementContent.java +412 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +148 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +151 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +79 -0
  26. data/ext/java/nokogiri/XmlNamespace.java +193 -0
  27. data/ext/java/nokogiri/XmlNode.java +1938 -0
  28. data/ext/java/nokogiri/XmlNodeSet.java +463 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +79 -0
  30. data/ext/java/nokogiri/XmlReader.java +615 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +133 -0
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +329 -0
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +288 -0
  34. data/ext/java/nokogiri/XmlSchema.java +423 -0
  35. data/ext/java/nokogiri/XmlSyntaxError.java +137 -0
  36. data/ext/java/nokogiri/XmlText.java +90 -0
  37. data/ext/java/nokogiri/XmlXpathContext.java +305 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +368 -0
  39. data/ext/java/nokogiri/internals/ClosedStreamException.java +13 -0
  40. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  41. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +27 -0
  42. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +178 -0
  43. data/ext/java/nokogiri/internals/NokogiriDomParser.java +99 -0
  44. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +140 -0
  45. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +65 -0
  46. data/ext/java/nokogiri/internals/NokogiriHandler.java +339 -0
  47. data/ext/java/nokogiri/internals/NokogiriHelpers.java +817 -0
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +228 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +110 -0
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +86 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +107 -0
  52. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +62 -0
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +165 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +50 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +37 -0
  56. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +70 -0
  57. data/ext/java/nokogiri/internals/ParserContext.java +262 -0
  58. data/ext/java/nokogiri/internals/ReaderNode.java +564 -0
  59. data/ext/java/nokogiri/internals/SaveContextVisitor.java +865 -0
  60. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +50 -0
  61. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +174 -0
  62. data/ext/java/nokogiri/internals/XmlDeclHandler.java +11 -0
  63. data/ext/java/nokogiri/internals/XmlDomParserContext.java +265 -0
  64. data/ext/java/nokogiri/internals/XmlSaxParser.java +40 -0
  65. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +122 -0
  66. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +178 -0
  67. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +43 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +106 -0
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +278 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +664 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +45 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +45 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +388 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +308 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +47 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +51 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +51 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +50 -0
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +660 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +194 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +77 -0
  82. data/ext/java/nokogiri/internals/c14n/Constants.java +45 -0
  83. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +325 -0
  84. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +106 -0
  85. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +86 -0
  86. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +181 -0
  87. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +87 -0
  88. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +452 -0
  89. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +52 -0
  90. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +190 -0
  91. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +540 -0
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1712 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +737 -0
  94. data/ext/nokogiri/depend +38 -0
  95. data/ext/nokogiri/extconf.rb +1086 -0
  96. data/ext/nokogiri/gumbo.c +594 -0
  97. data/ext/nokogiri/html4_document.c +167 -0
  98. data/ext/nokogiri/html4_element_description.c +294 -0
  99. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  100. data/ext/nokogiri/html4_sax_parser_context.c +116 -0
  101. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  102. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  103. data/ext/nokogiri/nokogiri.c +265 -0
  104. data/ext/nokogiri/nokogiri.h +235 -0
  105. data/ext/nokogiri/test_global_handlers.c +42 -0
  106. data/ext/nokogiri/xml_attr.c +103 -0
  107. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  108. data/ext/nokogiri/xml_cdata.c +57 -0
  109. data/ext/nokogiri/xml_comment.c +62 -0
  110. data/ext/nokogiri/xml_document.c +689 -0
  111. data/ext/nokogiri/xml_document_fragment.c +44 -0
  112. data/ext/nokogiri/xml_dtd.c +210 -0
  113. data/ext/nokogiri/xml_element_content.c +128 -0
  114. data/ext/nokogiri/xml_element_decl.c +69 -0
  115. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  116. data/ext/nokogiri/xml_entity_decl.c +112 -0
  117. data/ext/nokogiri/xml_entity_reference.c +50 -0
  118. data/ext/nokogiri/xml_namespace.c +186 -0
  119. data/ext/nokogiri/xml_node.c +2426 -0
  120. data/ext/nokogiri/xml_node_set.c +496 -0
  121. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  122. data/ext/nokogiri/xml_reader.c +794 -0
  123. data/ext/nokogiri/xml_relax_ng.c +164 -0
  124. data/ext/nokogiri/xml_sax_parser.c +316 -0
  125. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  126. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  127. data/ext/nokogiri/xml_schema.c +260 -0
  128. data/ext/nokogiri/xml_syntax_error.c +85 -0
  129. data/ext/nokogiri/xml_text.c +48 -0
  130. data/ext/nokogiri/xml_xpath_context.c +415 -0
  131. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  132. data/gumbo-parser/CHANGES.md +63 -0
  133. data/gumbo-parser/Makefile +111 -0
  134. data/gumbo-parser/THANKS +27 -0
  135. data/gumbo-parser/src/Makefile +34 -0
  136. data/gumbo-parser/src/README.md +41 -0
  137. data/gumbo-parser/src/ascii.c +75 -0
  138. data/gumbo-parser/src/ascii.h +115 -0
  139. data/gumbo-parser/src/attribute.c +42 -0
  140. data/gumbo-parser/src/attribute.h +17 -0
  141. data/gumbo-parser/src/char_ref.c +22225 -0
  142. data/gumbo-parser/src/char_ref.h +29 -0
  143. data/gumbo-parser/src/char_ref.rl +2154 -0
  144. data/gumbo-parser/src/error.c +626 -0
  145. data/gumbo-parser/src/error.h +148 -0
  146. data/gumbo-parser/src/foreign_attrs.c +104 -0
  147. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  148. data/gumbo-parser/src/insertion_mode.h +33 -0
  149. data/gumbo-parser/src/macros.h +91 -0
  150. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  151. data/gumbo-parser/src/parser.c +4878 -0
  152. data/gumbo-parser/src/parser.h +41 -0
  153. data/gumbo-parser/src/replacement.h +33 -0
  154. data/gumbo-parser/src/string_buffer.c +103 -0
  155. data/gumbo-parser/src/string_buffer.h +68 -0
  156. data/gumbo-parser/src/string_piece.c +48 -0
  157. data/gumbo-parser/src/svg_attrs.c +174 -0
  158. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  159. data/gumbo-parser/src/svg_tags.c +137 -0
  160. data/gumbo-parser/src/svg_tags.gperf +55 -0
  161. data/gumbo-parser/src/tag.c +223 -0
  162. data/gumbo-parser/src/tag_lookup.c +382 -0
  163. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  164. data/gumbo-parser/src/tag_lookup.h +13 -0
  165. data/gumbo-parser/src/token_buffer.c +79 -0
  166. data/gumbo-parser/src/token_buffer.h +71 -0
  167. data/gumbo-parser/src/token_type.h +17 -0
  168. data/gumbo-parser/src/tokenizer.c +3463 -0
  169. data/gumbo-parser/src/tokenizer.h +112 -0
  170. data/gumbo-parser/src/tokenizer_states.h +339 -0
  171. data/gumbo-parser/src/utf8.c +245 -0
  172. data/gumbo-parser/src/utf8.h +164 -0
  173. data/gumbo-parser/src/util.c +66 -0
  174. data/gumbo-parser/src/util.h +34 -0
  175. data/gumbo-parser/src/vector.c +111 -0
  176. data/gumbo-parser/src/vector.h +45 -0
  177. data/lib/nokogiri/class_resolver.rb +67 -0
  178. data/lib/nokogiri/css/node.rb +54 -0
  179. data/lib/nokogiri/css/parser.rb +770 -0
  180. data/lib/nokogiri/css/parser.y +277 -0
  181. data/lib/nokogiri/css/parser_extras.rb +96 -0
  182. data/lib/nokogiri/css/syntax_error.rb +9 -0
  183. data/lib/nokogiri/css/tokenizer.rb +155 -0
  184. data/lib/nokogiri/css/tokenizer.rex +56 -0
  185. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  186. data/lib/nokogiri/css.rb +66 -0
  187. data/lib/nokogiri/decorators/slop.rb +44 -0
  188. data/lib/nokogiri/encoding_handler.rb +57 -0
  189. data/lib/nokogiri/extension.rb +32 -0
  190. data/lib/nokogiri/gumbo.rb +15 -0
  191. data/lib/nokogiri/html.rb +48 -0
  192. data/lib/nokogiri/html4/builder.rb +37 -0
  193. data/lib/nokogiri/html4/document.rb +214 -0
  194. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  195. data/lib/nokogiri/html4/element_description.rb +25 -0
  196. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  197. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  198. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  199. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  200. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  201. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  202. data/lib/nokogiri/html4.rb +47 -0
  203. data/lib/nokogiri/html5/document.rb +168 -0
  204. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  205. data/lib/nokogiri/html5/node.rb +98 -0
  206. data/lib/nokogiri/html5.rb +389 -0
  207. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  208. data/lib/nokogiri/jruby/isorelax/isorelax/20030108/isorelax-20030108.jar +0 -0
  209. data/lib/nokogiri/jruby/net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar +0 -0
  210. data/lib/nokogiri/jruby/net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar +0 -0
  211. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  212. data/lib/nokogiri/jruby/nu/validator/jing/20200702VNU/jing-20200702VNU.jar +0 -0
  213. data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar +0 -0
  214. data/lib/nokogiri/jruby/xalan/serializer/2.7.3/serializer-2.7.3.jar +0 -0
  215. data/lib/nokogiri/jruby/xalan/xalan/2.7.3/xalan-2.7.3.jar +0 -0
  216. data/lib/nokogiri/jruby/xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar +0 -0
  217. data/lib/nokogiri/jruby/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar +0 -0
  218. data/lib/nokogiri/syntax_error.rb +6 -0
  219. data/lib/nokogiri/version/constant.rb +6 -0
  220. data/lib/nokogiri/version/info.rb +223 -0
  221. data/lib/nokogiri/version.rb +4 -0
  222. data/lib/nokogiri/xml/attr.rb +66 -0
  223. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  224. data/lib/nokogiri/xml/builder.rb +487 -0
  225. data/lib/nokogiri/xml/cdata.rb +13 -0
  226. data/lib/nokogiri/xml/character_data.rb +9 -0
  227. data/lib/nokogiri/xml/document.rb +471 -0
  228. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  229. data/lib/nokogiri/xml/dtd.rb +34 -0
  230. data/lib/nokogiri/xml/element_content.rb +38 -0
  231. data/lib/nokogiri/xml/element_decl.rb +15 -0
  232. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  233. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  234. data/lib/nokogiri/xml/namespace.rb +58 -0
  235. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  236. data/lib/nokogiri/xml/node.rb +1563 -0
  237. data/lib/nokogiri/xml/node_set.rb +447 -0
  238. data/lib/nokogiri/xml/notation.rb +19 -0
  239. data/lib/nokogiri/xml/parse_options.rb +213 -0
  240. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  241. data/lib/nokogiri/xml/pp/node.rb +57 -0
  242. data/lib/nokogiri/xml/pp.rb +4 -0
  243. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  244. data/lib/nokogiri/xml/reader.rb +105 -0
  245. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  246. data/lib/nokogiri/xml/sax/document.rb +167 -0
  247. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  248. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  249. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  250. data/lib/nokogiri/xml/sax.rb +6 -0
  251. data/lib/nokogiri/xml/schema.rb +73 -0
  252. data/lib/nokogiri/xml/searchable.rb +270 -0
  253. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  254. data/lib/nokogiri/xml/text.rb +11 -0
  255. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  256. data/lib/nokogiri/xml/xpath.rb +21 -0
  257. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  258. data/lib/nokogiri/xml.rb +76 -0
  259. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  260. data/lib/nokogiri/xslt.rb +65 -0
  261. data/lib/nokogiri.rb +120 -0
  262. data/lib/xsd/xmlparser/nokogiri.rb +106 -0
  263. metadata +391 -0
@@ -0,0 +1,626 @@
1
+ /*
2
+ Copyright 2010 Google Inc.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
16
+
17
+ #include <assert.h>
18
+ #include <inttypes.h>
19
+ #include <stdarg.h>
20
+ #include <stdio.h>
21
+ #include <string.h>
22
+ #include "ascii.h"
23
+ #include "error.h"
24
+ #include "nokogiri_gumbo.h"
25
+ #include "macros.h"
26
+ #include "parser.h"
27
+ #include "string_buffer.h"
28
+ #include "util.h"
29
+ #include "vector.h"
30
+
31
+ // Prints a formatted message to a StringBuffer. This automatically resizes the
32
+ // StringBuffer as necessary to fit the message. Returns the number of bytes
33
+ // written.
34
+ static int PRINTF(2) print_message (
35
+ GumboStringBuffer* output,
36
+ const char* format,
37
+ ...
38
+ ) {
39
+ va_list args;
40
+ int remaining_capacity = output->capacity - output->length;
41
+ va_start(args, format);
42
+ int bytes_written = vsnprintf (
43
+ output->data + output->length,
44
+ remaining_capacity,
45
+ format,
46
+ args
47
+ );
48
+ va_end(args);
49
+ #if _MSC_VER && _MSC_VER < 1900
50
+ if (bytes_written == -1) {
51
+ // vsnprintf returns -1 on older MSVC++ if there's not enough capacity,
52
+ // instead of returning the number of bytes that would've been written had
53
+ // there been enough. In this case, we'll double the buffer size and hope
54
+ // it fits when we retry (letting it fail and returning 0 if it doesn't),
55
+ // since there's no way to smartly resize the buffer.
56
+ gumbo_string_buffer_reserve(output->capacity * 2, output);
57
+ va_start(args, format);
58
+ int result = vsnprintf (
59
+ output->data + output->length,
60
+ remaining_capacity,
61
+ format,
62
+ args
63
+ );
64
+ va_end(args);
65
+ return result == -1 ? 0 : result;
66
+ }
67
+ #else
68
+ // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
69
+ if (bytes_written == -1) {
70
+ return 0;
71
+ }
72
+ #endif
73
+
74
+ if (bytes_written >= remaining_capacity) {
75
+ gumbo_string_buffer_reserve(output->capacity + bytes_written, output);
76
+ remaining_capacity = output->capacity - output->length;
77
+ va_start(args, format);
78
+ bytes_written = vsnprintf (
79
+ output->data + output->length,
80
+ remaining_capacity,
81
+ format,
82
+ args
83
+ );
84
+ va_end(args);
85
+ }
86
+ output->length += bytes_written;
87
+ return bytes_written;
88
+ }
89
+
90
+ static void print_tag_stack (
91
+ const GumboParserError* error,
92
+ GumboStringBuffer* output
93
+ ) {
94
+ print_message(output, " Currently open tags: ");
95
+ for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
96
+ if (i) {
97
+ print_message(output, ", ");
98
+ }
99
+ GumboTag tag = (GumboTag)(intptr_t) error->tag_stack.data[i];
100
+ print_message(output, "%s", gumbo_normalized_tagname(tag));
101
+ }
102
+ gumbo_string_buffer_append_codepoint('.', output);
103
+ }
104
+
105
+ static void handle_tokenizer_error (
106
+ const GumboError* error,
107
+ GumboStringBuffer* output
108
+ ) {
109
+ switch (error->type) {
110
+ case GUMBO_ERR_ABRUPT_CLOSING_OF_EMPTY_COMMENT:
111
+ print_message(output, "Empty comment abruptly closed by '%s', use '-->'.",
112
+ error->v.tokenizer.state == GUMBO_LEX_COMMENT_START? ">" : "->");
113
+ break;
114
+ case GUMBO_ERR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER:
115
+ print_message (
116
+ output,
117
+ "DOCTYPE public identifier missing closing %s.",
118
+ error->v.tokenizer.state == GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED?
119
+ "quotation mark (\")" : "apostrophe (')"
120
+ );
121
+ break;
122
+ case GUMBO_ERR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER:
123
+ print_message (
124
+ output,
125
+ "DOCTYPE system identifier missing closing %s.",
126
+ error->v.tokenizer.state == GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED?
127
+ "quotation mark (\")" : "apostrophe (')"
128
+ );
129
+ break;
130
+ case GUMBO_ERR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE:
131
+ print_message (
132
+ output,
133
+ "Numeric character reference '%.*s' does not contain any %sdigits.",
134
+ (int)error->original_text.length, error->original_text.data,
135
+ error->v.tokenizer.state == GUMBO_LEX_HEXADECIMAL_CHARACTER_REFERENCE_START? "hexadecimal " : ""
136
+ );
137
+ break;
138
+ case GUMBO_ERR_CDATA_IN_HTML_CONTENT:
139
+ print_message(output, "CDATA section outside foreign (SVG or MathML) content.");
140
+ break;
141
+ case GUMBO_ERR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE:
142
+ print_message (
143
+ output,
144
+ "Numeric character reference '%.*s' references a code point that is outside the valid Unicode range.",
145
+ (int)error->original_text.length, error->original_text.data
146
+ );
147
+ break;
148
+ case GUMBO_ERR_CONTROL_CHARACTER_IN_INPUT_STREAM:
149
+ print_message (
150
+ output,
151
+ "Input contains prohibited control code point U+%04X.",
152
+ error->v.tokenizer.codepoint
153
+ );
154
+ break;
155
+ case GUMBO_ERR_CONTROL_CHARACTER_REFERENCE:
156
+ print_message (
157
+ output,
158
+ "Numeric character reference '%.*s' references prohibited control code point U+%04X.",
159
+ (int)error->original_text.length, error->original_text.data,
160
+ error->v.tokenizer.codepoint
161
+ );
162
+ break;
163
+ case GUMBO_ERR_END_TAG_WITH_ATTRIBUTES:
164
+ print_message(output, "End tag contains attributes.");
165
+ break;
166
+ case GUMBO_ERR_DUPLICATE_ATTRIBUTE:
167
+ print_message(output, "Tag contains multiple attributes with the same name.");
168
+ break;
169
+ case GUMBO_ERR_END_TAG_WITH_TRAILING_SOLIDUS:
170
+ print_message(output, "End tag ends with '/>', use '>'.");
171
+ break;
172
+ case GUMBO_ERR_EOF_BEFORE_TAG_NAME:
173
+ print_message(output, "End of input where a tag name is expected.");
174
+ break;
175
+ case GUMBO_ERR_EOF_IN_CDATA:
176
+ print_message(output, "End of input in CDATA section.");
177
+ break;
178
+ case GUMBO_ERR_EOF_IN_COMMENT:
179
+ print_message(output, "End of input in comment.");
180
+ break;
181
+ case GUMBO_ERR_EOF_IN_DOCTYPE:
182
+ print_message(output, "End of input in DOCTYPE.");
183
+ break;
184
+ case GUMBO_ERR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT:
185
+ print_message(output, "End of input in text that resembles an HTML comment inside script element content.");
186
+ break;
187
+ case GUMBO_ERR_EOF_IN_TAG:
188
+ print_message(output, "End of input in tag.");
189
+ break;
190
+ case GUMBO_ERR_INCORRECTLY_CLOSED_COMMENT:
191
+ print_message(output, "Comment closed incorrectly by '--!>', use '-->'.");
192
+ break;
193
+ case GUMBO_ERR_INCORRECTLY_OPENED_COMMENT:
194
+ print_message(output, "Comment, DOCTYPE, or CDATA opened incorrectly, use '<!--', '<!DOCTYPE', or '<![CDATA['.");
195
+ break;
196
+ case GUMBO_ERR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME:
197
+ print_message(output, "Invalid character sequence after DOCTYPE name, expected 'PUBLIC', 'SYSTEM', or '>'.");
198
+ break;
199
+ case GUMBO_ERR_INVALID_FIRST_CHARACTER_OF_TAG_NAME:
200
+ if (gumbo_ascii_isascii(error->v.tokenizer.codepoint)
201
+ && !gumbo_ascii_iscntrl(error->v.tokenizer.codepoint))
202
+ print_message(output, "Invalid first character of tag name '%c'.", error->v.tokenizer.codepoint);
203
+ else
204
+ print_message(output, "Invalid first code point of tag name U+%04X.", error->v.tokenizer.codepoint);
205
+ break;
206
+ case GUMBO_ERR_MISSING_ATTRIBUTE_VALUE:
207
+ print_message(output, "Missing attribute value.");
208
+ break;
209
+ case GUMBO_ERR_MISSING_DOCTYPE_NAME:
210
+ print_message(output, "Missing DOCTYPE name.");
211
+ break;
212
+ case GUMBO_ERR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER:
213
+ print_message(output, "Missing DOCTYPE public identifier.");
214
+ break;
215
+ case GUMBO_ERR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER:
216
+ print_message(output, "Missing DOCTYPE system identifier.");
217
+ break;
218
+ case GUMBO_ERR_MISSING_END_TAG_NAME:
219
+ print_message(output, "Missing end tag name.");
220
+ break;
221
+ case GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
222
+ print_message(output, "Missing quote before DOCTYPE public identifier.");
223
+ break;
224
+ case GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
225
+ print_message(output, "Missing quote before DOCTYPE system identifier.");
226
+ break;
227
+ case GUMBO_ERR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE:
228
+ print_message(output, "Missing semicolon after character reference '%.*s'.",
229
+ (int)error->original_text.length, error->original_text.data);
230
+ break;
231
+ case GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD:
232
+ print_message(output, "Missing whitespace after 'PUBLIC' keyword.");
233
+ break;
234
+ case GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD:
235
+ print_message(output, "Missing whitespace after 'SYSTEM' keyword.");
236
+ break;
237
+ case GUMBO_ERR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME:
238
+ print_message(output, "Missing whitespace between 'DOCTYPE' keyword and DOCTYPE name.");
239
+ break;
240
+ case GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES:
241
+ print_message(output, "Missing whitespace between attributes.");
242
+ break;
243
+ case GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
244
+ print_message(output, "Missing whitespace between DOCTYPE public and system identifiers.");
245
+ break;
246
+ case GUMBO_ERR_NESTED_COMMENT:
247
+ print_message(output, "Nested comment.");
248
+ break;
249
+ case GUMBO_ERR_NONCHARACTER_CHARACTER_REFERENCE:
250
+ print_message (
251
+ output,
252
+ "Numeric character reference '%.*s' references noncharacter U+%04X.",
253
+ (int)error->original_text.length, error->original_text.data,
254
+ error->v.tokenizer.codepoint
255
+ );
256
+ break;
257
+ case GUMBO_ERR_NONCHARACTER_IN_INPUT_STREAM:
258
+ print_message(output, "Input contains noncharacter U+%04X.", error->v.tokenizer.codepoint);
259
+ break;
260
+ case GUMBO_ERR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS:
261
+ print_message(output, "Start tag of nonvoid HTML element ends with '/>', use '>'.");
262
+ break;
263
+ case GUMBO_ERR_NULL_CHARACTER_REFERENCE:
264
+ print_message(output, "Numeric character reference '%.*s' references U+0000.",
265
+ (int)error->original_text.length, error->original_text.data);
266
+ break;
267
+ case GUMBO_ERR_SURROGATE_CHARACTER_REFERENCE:
268
+ print_message (
269
+ output,
270
+ "Numeric character reference '%.*s' references surrogate U+%4X.",
271
+ (int)error->original_text.length, error->original_text.data,
272
+ error->v.tokenizer.codepoint
273
+ );
274
+ break;
275
+ case GUMBO_ERR_SURROGATE_IN_INPUT_STREAM:
276
+ print_message(output, "Input contains surrogate U+%04X.", error->v.tokenizer.codepoint);
277
+ break;
278
+ case GUMBO_ERR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
279
+ print_message(output, "Unexpected character after DOCTYPE system identifier.");
280
+ break;
281
+ case GUMBO_ERR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME:
282
+ print_message(output, "Unexpected character (%c) in attribute name.", error->v.tokenizer.codepoint);
283
+ break;
284
+ case GUMBO_ERR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE:
285
+ print_message(output, "Unexpected character (%c) in unquoted attribute value.", error->v.tokenizer.codepoint);
286
+ break;
287
+ case GUMBO_ERR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME:
288
+ print_message(output, "Unexpected '=' before an attribute name.");
289
+ break;
290
+ case GUMBO_ERR_UNEXPECTED_NULL_CHARACTER:
291
+ print_message(output, "Input contains unexpected U+0000.");
292
+ break;
293
+ case GUMBO_ERR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME:
294
+ print_message(output, "Unexpected '?' where start tag name is expected.");
295
+ break;
296
+ case GUMBO_ERR_UNEXPECTED_SOLIDUS_IN_TAG:
297
+ print_message(output, "Unexpected '/' in tag.");
298
+ break;
299
+ case GUMBO_ERR_UNKNOWN_NAMED_CHARACTER_REFERENCE:
300
+ print_message(output, "Unknown named character reference '%.*s'.",
301
+ (int)error->original_text.length, error->original_text.data);
302
+ break;
303
+ case GUMBO_ERR_UTF8_INVALID:
304
+ print_message(output, "Invalid UTF8 encoding.");
305
+ break;
306
+ case GUMBO_ERR_UTF8_TRUNCATED:
307
+ print_message(output, "UTF8 character truncated.");
308
+ break;
309
+ case GUMBO_ERR_PARSER:
310
+ assert(0 && "Unreachable.");
311
+ }
312
+ }
313
+ static void handle_parser_error (
314
+ const GumboParserError* error,
315
+ GumboStringBuffer* output
316
+ ) {
317
+ if (
318
+ error->parser_state == GUMBO_INSERTION_MODE_INITIAL
319
+ && error->input_type != GUMBO_TOKEN_DOCTYPE
320
+ ) {
321
+ print_message (
322
+ output,
323
+ "Expected a doctype token"
324
+ );
325
+ return;
326
+ }
327
+
328
+ switch (error->input_type) {
329
+ case GUMBO_TOKEN_DOCTYPE:
330
+ print_message(output, "This is not a legal doctype");
331
+ return;
332
+ case GUMBO_TOKEN_COMMENT:
333
+ // Should never happen; comments are always legal.
334
+ assert(0);
335
+ // But just in case...
336
+ print_message(output, "Comments aren't legal here");
337
+ return;
338
+ case GUMBO_TOKEN_CDATA:
339
+ case GUMBO_TOKEN_WHITESPACE:
340
+ case GUMBO_TOKEN_CHARACTER:
341
+ print_message(output, "Character tokens aren't legal here");
342
+ return;
343
+ case GUMBO_TOKEN_NULL:
344
+ print_message(output, "Null bytes are not allowed in HTML5");
345
+ return;
346
+ case GUMBO_TOKEN_EOF:
347
+ if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
348
+ print_message(output, "You must provide a doctype");
349
+ } else {
350
+ print_message(output, "Premature end of file");
351
+ print_tag_stack(error, output);
352
+ }
353
+ return;
354
+ case GUMBO_TOKEN_START_TAG:
355
+ case GUMBO_TOKEN_END_TAG:
356
+ print_message(output, "That tag isn't allowed here");
357
+ print_tag_stack(error, output);
358
+ // TODO(jdtang): Give more specific messaging.
359
+ return;
360
+ }
361
+ }
362
+
363
+ // Finds the preceding newline in an original source buffer from a given byte
364
+ // location. Returns a character pointer to the character after that, or a
365
+ // pointer to the beginning of the string if this is the first line.
366
+ static const char* find_prev_newline (
367
+ const char* source_text,
368
+ size_t source_length,
369
+ const char* error_location
370
+ ) {
371
+ const char* source_end = source_text + source_length;
372
+ assert(error_location >= source_text);
373
+ assert(error_location <= source_end);
374
+ const char* c = error_location;
375
+ if (c != source_text && (error_location == source_end || *c == '\n'))
376
+ --c;
377
+ while (c != source_text && *c != '\n')
378
+ --c;
379
+ return c == source_text ? c : c + 1;
380
+ }
381
+
382
+ // Finds the next newline in the original source buffer from a given byte
383
+ // location. Returns a character pointer to that newline, or a pointer to
384
+ // source_text + source_length if this is the last line.
385
+ static const char* find_next_newline(
386
+ const char* source_text,
387
+ size_t source_length,
388
+ const char* error_location
389
+ ) {
390
+ const char* source_end = source_text + source_length;
391
+ assert(error_location >= source_text);
392
+ assert(error_location <= source_end);
393
+ const char* c = error_location;
394
+ while (c != source_end && *c != '\n')
395
+ ++c;
396
+ return c;
397
+ }
398
+
399
+ GumboError* gumbo_add_error(GumboParser* parser) {
400
+ parser->_output->document_error = true;
401
+
402
+ int max_errors = parser->_options->max_errors;
403
+ if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
404
+ return NULL;
405
+ }
406
+ GumboError* error = gumbo_alloc(sizeof(GumboError));
407
+ gumbo_vector_add(error, &parser->_output->errors);
408
+ return error;
409
+ }
410
+
411
+ GumboSourcePosition gumbo_error_position(const GumboError* error) {
412
+ return error->position;
413
+ }
414
+
415
+ const char* gumbo_error_code(const GumboError* error) {
416
+ switch (error->type) {
417
+ // Defined tokenizer errors.
418
+ case GUMBO_ERR_ABRUPT_CLOSING_OF_EMPTY_COMMENT:
419
+ return "abrupt-closing-of-empty-comment";
420
+ case GUMBO_ERR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER:
421
+ return "abrupt-doctype-public-identifier";
422
+ case GUMBO_ERR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER:
423
+ return "abrupt-doctype-system-identifier";
424
+ case GUMBO_ERR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE:
425
+ return "absence-of-digits-in-numeric-character-reference";
426
+ case GUMBO_ERR_CDATA_IN_HTML_CONTENT:
427
+ return "cdata-in-html-content";
428
+ case GUMBO_ERR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE:
429
+ return "character-reference-outside-unicode-range";
430
+ case GUMBO_ERR_CONTROL_CHARACTER_IN_INPUT_STREAM:
431
+ return "control-character-in-input-stream";
432
+ case GUMBO_ERR_CONTROL_CHARACTER_REFERENCE:
433
+ return "control-character-reference";
434
+ case GUMBO_ERR_END_TAG_WITH_ATTRIBUTES:
435
+ return "end-tag-with-attributes";
436
+ case GUMBO_ERR_DUPLICATE_ATTRIBUTE:
437
+ return "duplicate-attribute";
438
+ case GUMBO_ERR_END_TAG_WITH_TRAILING_SOLIDUS:
439
+ return "end-tag-with-trailing-solidus";
440
+ case GUMBO_ERR_EOF_BEFORE_TAG_NAME:
441
+ return "eof-before-tag-name";
442
+ case GUMBO_ERR_EOF_IN_CDATA:
443
+ return "eof-in-cdata";
444
+ case GUMBO_ERR_EOF_IN_COMMENT:
445
+ return "eof-in-comment";
446
+ case GUMBO_ERR_EOF_IN_DOCTYPE:
447
+ return "eof-in-doctype";
448
+ case GUMBO_ERR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT:
449
+ return "eof-in-script-html-comment-like-text";
450
+ case GUMBO_ERR_EOF_IN_TAG:
451
+ return "eof-in-tag";
452
+ case GUMBO_ERR_INCORRECTLY_CLOSED_COMMENT:
453
+ return "incorrectly-closed-comment";
454
+ case GUMBO_ERR_INCORRECTLY_OPENED_COMMENT:
455
+ return "incorrectly-opened-comment";
456
+ case GUMBO_ERR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME:
457
+ return "invalid-character-sequence-after-doctype-name";
458
+ case GUMBO_ERR_INVALID_FIRST_CHARACTER_OF_TAG_NAME:
459
+ return "invalid-first-character-of-tag-name";
460
+ case GUMBO_ERR_MISSING_ATTRIBUTE_VALUE:
461
+ return "missing-attribute-value";
462
+ case GUMBO_ERR_MISSING_DOCTYPE_NAME:
463
+ return "missing-doctype-name";
464
+ case GUMBO_ERR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER:
465
+ return "missing-doctype-public-identifier";
466
+ case GUMBO_ERR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER:
467
+ return "missing-doctype-system-identifier";
468
+ case GUMBO_ERR_MISSING_END_TAG_NAME:
469
+ return "missing-end-tag-name";
470
+ case GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
471
+ return "missing-quote-before-doctype-public-identifier";
472
+ case GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
473
+ return "missing-quote-before-doctype-system-identifier";
474
+ case GUMBO_ERR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE:
475
+ return "missing-semicolon-after-character-reference";
476
+ case GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD:
477
+ return "missing-whitespace-after-doctype-public-keyword";
478
+ case GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD:
479
+ return "missing-whitespace-after-doctype-system-keyword";
480
+ case GUMBO_ERR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME:
481
+ return "missing-whitespace-before-doctype-name";
482
+ case GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES:
483
+ return "missing-whitespace-between-attributes";
484
+ case GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
485
+ return "missing-whitespace-between-doctype-public-and-system-identifiers";
486
+ case GUMBO_ERR_NESTED_COMMENT:
487
+ return "nested-comment";
488
+ case GUMBO_ERR_NONCHARACTER_CHARACTER_REFERENCE:
489
+ return "noncharacter-character-reference";
490
+ case GUMBO_ERR_NONCHARACTER_IN_INPUT_STREAM:
491
+ return "noncharacter-in-input-stream";
492
+ case GUMBO_ERR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS:
493
+ return "non-void-html-element-start-tag-with-trailing-solidus";
494
+ case GUMBO_ERR_NULL_CHARACTER_REFERENCE:
495
+ return "null-character-reference";
496
+ case GUMBO_ERR_SURROGATE_CHARACTER_REFERENCE:
497
+ return "surrogate-character-reference";
498
+ case GUMBO_ERR_SURROGATE_IN_INPUT_STREAM:
499
+ return "surrogate-in-input-stream";
500
+ case GUMBO_ERR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
501
+ return "unexpected-character-after-doctype-system-identifier";
502
+ case GUMBO_ERR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME:
503
+ return "unexpected-character-in-attribute-name";
504
+ case GUMBO_ERR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE:
505
+ return "unexpected-character-in-unquoted-attribute-value";
506
+ case GUMBO_ERR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME:
507
+ return "unexpected-equals-sign-before-attribute-name";
508
+ case GUMBO_ERR_UNEXPECTED_NULL_CHARACTER:
509
+ return "unexpected-null-character";
510
+ case GUMBO_ERR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME:
511
+ return "unexpected-question-mark-instead-of-tag-name";
512
+ case GUMBO_ERR_UNEXPECTED_SOLIDUS_IN_TAG:
513
+ return "unexpected-solidus-in-tag";
514
+ case GUMBO_ERR_UNKNOWN_NAMED_CHARACTER_REFERENCE:
515
+ return "unknown-named-character-reference";
516
+
517
+ // Encoding errors.
518
+ case GUMBO_ERR_UTF8_INVALID:
519
+ return "utf8-invalid";
520
+ case GUMBO_ERR_UTF8_TRUNCATED:
521
+ return "utf8-truncated";
522
+
523
+ // Generic parser error.
524
+ case GUMBO_ERR_PARSER:
525
+ return "generic-parser";
526
+ }
527
+ // Silence warning about control reaching end of non-void function.
528
+ // All errors _should_ be handled in the switch statement.
529
+ return "generic-parser";
530
+ }
531
+
532
+ static void error_to_string (
533
+ const GumboError* error,
534
+ GumboStringBuffer* output
535
+ ) {
536
+ if (error->type < GUMBO_ERR_PARSER)
537
+ handle_tokenizer_error(error, output);
538
+ else
539
+ handle_parser_error(&error->v.parser, output);
540
+ }
541
+
542
+ size_t gumbo_error_to_string(const GumboError* error, char** output) {
543
+ GumboStringBuffer sb;
544
+ gumbo_string_buffer_init(&sb);
545
+ error_to_string(error, &sb);
546
+ *output = sb.data;
547
+ return sb.length;
548
+ }
549
+
550
+ void caret_diagnostic_to_string (
551
+ const GumboError* error,
552
+ const char* source_text,
553
+ size_t source_length,
554
+ GumboStringBuffer* output
555
+ ) {
556
+ error_to_string(error, output);
557
+
558
+ const char* error_text = error->original_text.data;
559
+ const char* line_start = find_prev_newline(source_text, source_length, error_text);
560
+ const char* line_end = find_next_newline(source_text, source_length, error_text);
561
+ GumboStringPiece original_line;
562
+ original_line.data = line_start;
563
+ original_line.length = line_end - line_start;
564
+
565
+ gumbo_string_buffer_append_codepoint('\n', output);
566
+ gumbo_string_buffer_append_string(&original_line, output);
567
+ gumbo_string_buffer_append_codepoint('\n', output);
568
+ gumbo_string_buffer_reserve(output->length + error->position.column, output);
569
+ if (error->position.column >= 2) {
570
+ size_t num_spaces = error->position.column - 1;
571
+ memset(output->data + output->length, ' ', num_spaces);
572
+ output->length += num_spaces;
573
+ }
574
+ gumbo_string_buffer_append_codepoint('^', output);
575
+ gumbo_string_buffer_append_codepoint('\n', output);
576
+ }
577
+
578
+ size_t gumbo_caret_diagnostic_to_string (
579
+ const GumboError* error,
580
+ const char* source_text,
581
+ size_t source_length,
582
+ char **output
583
+ ) {
584
+ GumboStringBuffer sb;
585
+ gumbo_string_buffer_init(&sb);
586
+ caret_diagnostic_to_string(error, source_text, source_length, &sb);
587
+ *output = sb.data;
588
+ return sb.length;
589
+ }
590
+
591
+ void gumbo_print_caret_diagnostic (
592
+ const GumboError* error,
593
+ const char* source_text,
594
+ size_t source_length
595
+ ) {
596
+ GumboStringBuffer text;
597
+ gumbo_string_buffer_init(&text);
598
+ print_message (
599
+ &text,
600
+ "%lu:%lu: ",
601
+ (unsigned long)error->position.line,
602
+ (unsigned long)error->position.column
603
+ );
604
+
605
+ caret_diagnostic_to_string(error, source_text, source_length, &text);
606
+ printf("%.*s", (int) text.length, text.data);
607
+ gumbo_string_buffer_destroy(&text);
608
+ }
609
+
610
+ void gumbo_error_destroy(GumboError* error) {
611
+ if (error->type == GUMBO_ERR_PARSER) {
612
+ gumbo_vector_destroy(&error->v.parser.tag_stack);
613
+ }
614
+ gumbo_free(error);
615
+ }
616
+
617
+ void gumbo_init_errors(GumboParser* parser) {
618
+ gumbo_vector_init(5, &parser->_output->errors);
619
+ }
620
+
621
+ void gumbo_destroy_errors(GumboParser* parser) {
622
+ for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
623
+ gumbo_error_destroy(parser->_output->errors.data[i]);
624
+ }
625
+ gumbo_vector_destroy(&parser->_output->errors);
626
+ }