nokogiri 1.5.10 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (334) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +73 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +956 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +327 -288
  94. data/lib/nokogiri/css/parser.y +67 -45
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +263 -75
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -90
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +259 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +18 -16
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  171. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  172. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  173. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  175. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  176. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  177. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  178. metadata +382 -460
  179. data/.autotest +0 -26
  180. data/.gemtest +0 -0
  181. data/CHANGELOG.ja.rdoc +0 -785
  182. data/CHANGELOG.rdoc +0 -783
  183. data/C_CODING_STYLE.rdoc +0 -33
  184. data/Manifest.txt +0 -303
  185. data/README.ja.rdoc +0 -106
  186. data/README.rdoc +0 -175
  187. data/ROADMAP.md +0 -90
  188. data/Rakefile +0 -228
  189. data/STANDARD_RESPONSES.md +0 -47
  190. data/Y_U_NO_GEMSPEC.md +0 -155
  191. data/build_all +0 -105
  192. data/ext/nokogiri/html_document.c +0 -170
  193. data/ext/nokogiri/html_document.h +0 -10
  194. data/ext/nokogiri/html_element_description.c +0 -279
  195. data/ext/nokogiri/html_element_description.h +0 -10
  196. data/ext/nokogiri/html_entity_lookup.c +0 -32
  197. data/ext/nokogiri/html_entity_lookup.h +0 -8
  198. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  199. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  200. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  201. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  202. data/ext/nokogiri/xml_attr.h +0 -9
  203. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  204. data/ext/nokogiri/xml_cdata.h +0 -9
  205. data/ext/nokogiri/xml_comment.h +0 -9
  206. data/ext/nokogiri/xml_document.h +0 -23
  207. data/ext/nokogiri/xml_document_fragment.h +0 -10
  208. data/ext/nokogiri/xml_dtd.h +0 -10
  209. data/ext/nokogiri/xml_element_content.h +0 -10
  210. data/ext/nokogiri/xml_element_decl.h +0 -9
  211. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  212. data/ext/nokogiri/xml_entity_decl.h +0 -10
  213. data/ext/nokogiri/xml_entity_reference.h +0 -9
  214. data/ext/nokogiri/xml_io.c +0 -56
  215. data/ext/nokogiri/xml_io.h +0 -11
  216. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  217. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  218. data/ext/nokogiri/xml_namespace.h +0 -13
  219. data/ext/nokogiri/xml_node.h +0 -13
  220. data/ext/nokogiri/xml_node_set.h +0 -14
  221. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  222. data/ext/nokogiri/xml_reader.h +0 -10
  223. data/ext/nokogiri/xml_relax_ng.h +0 -9
  224. data/ext/nokogiri/xml_sax_parser.h +0 -39
  225. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  226. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  227. data/ext/nokogiri/xml_schema.h +0 -9
  228. data/ext/nokogiri/xml_syntax_error.h +0 -13
  229. data/ext/nokogiri/xml_text.h +0 -9
  230. data/ext/nokogiri/xml_xpath_context.h +0 -10
  231. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  232. data/lib/nokogiri/html/document.rb +0 -254
  233. data/lib/nokogiri/html/document_fragment.rb +0 -41
  234. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  235. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  236. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  237. data/tasks/cross_compile.rb +0 -150
  238. data/tasks/nokogiri.org.rb +0 -24
  239. data/tasks/test.rb +0 -95
  240. data/test/css/test_nthiness.rb +0 -159
  241. data/test/css/test_parser.rb +0 -341
  242. data/test/css/test_tokenizer.rb +0 -198
  243. data/test/css/test_xpath_visitor.rb +0 -91
  244. data/test/decorators/test_slop.rb +0 -16
  245. data/test/files/2ch.html +0 -108
  246. data/test/files/address_book.rlx +0 -12
  247. data/test/files/address_book.xml +0 -10
  248. data/test/files/bar/bar.xsd +0 -4
  249. data/test/files/dont_hurt_em_why.xml +0 -422
  250. data/test/files/encoding.html +0 -82
  251. data/test/files/encoding.xhtml +0 -84
  252. data/test/files/exslt.xml +0 -8
  253. data/test/files/exslt.xslt +0 -35
  254. data/test/files/foo/foo.xsd +0 -4
  255. data/test/files/metacharset.html +0 -10
  256. data/test/files/noencoding.html +0 -47
  257. data/test/files/po.xml +0 -32
  258. data/test/files/po.xsd +0 -66
  259. data/test/files/shift_jis.html +0 -10
  260. data/test/files/shift_jis.xml +0 -5
  261. data/test/files/snuggles.xml +0 -3
  262. data/test/files/staff.dtd +0 -10
  263. data/test/files/staff.xml +0 -59
  264. data/test/files/staff.xslt +0 -32
  265. data/test/files/test_document_url/bar.xml +0 -2
  266. data/test/files/test_document_url/document.dtd +0 -4
  267. data/test/files/test_document_url/document.xml +0 -6
  268. data/test/files/tlm.html +0 -850
  269. data/test/files/to_be_xincluded.xml +0 -2
  270. data/test/files/valid_bar.xml +0 -2
  271. data/test/files/xinclude.xml +0 -4
  272. data/test/helper.rb +0 -154
  273. data/test/html/sax/test_parser.rb +0 -141
  274. data/test/html/sax/test_parser_context.rb +0 -46
  275. data/test/html/test_builder.rb +0 -164
  276. data/test/html/test_document.rb +0 -552
  277. data/test/html/test_document_encoding.rb +0 -138
  278. data/test/html/test_document_fragment.rb +0 -261
  279. data/test/html/test_element_description.rb +0 -105
  280. data/test/html/test_named_characters.rb +0 -14
  281. data/test/html/test_node.rb +0 -196
  282. data/test/html/test_node_encoding.rb +0 -27
  283. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  284. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  285. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  286. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  287. data/test/test_convert_xpath.rb +0 -135
  288. data/test/test_css_cache.rb +0 -45
  289. data/test/test_encoding_handler.rb +0 -46
  290. data/test/test_memory_leak.rb +0 -156
  291. data/test/test_nokogiri.rb +0 -132
  292. data/test/test_reader.rb +0 -555
  293. data/test/test_soap4r_sax.rb +0 -52
  294. data/test/test_xslt_transforms.rb +0 -254
  295. data/test/xml/node/test_save_options.rb +0 -28
  296. data/test/xml/node/test_subclass.rb +0 -44
  297. data/test/xml/sax/test_parser.rb +0 -366
  298. data/test/xml/sax/test_parser_context.rb +0 -106
  299. data/test/xml/sax/test_push_parser.rb +0 -157
  300. data/test/xml/test_attr.rb +0 -64
  301. data/test/xml/test_attribute_decl.rb +0 -86
  302. data/test/xml/test_builder.rb +0 -306
  303. data/test/xml/test_c14n.rb +0 -151
  304. data/test/xml/test_cdata.rb +0 -48
  305. data/test/xml/test_comment.rb +0 -29
  306. data/test/xml/test_document.rb +0 -828
  307. data/test/xml/test_document_encoding.rb +0 -28
  308. data/test/xml/test_document_fragment.rb +0 -223
  309. data/test/xml/test_dtd.rb +0 -103
  310. data/test/xml/test_dtd_encoding.rb +0 -33
  311. data/test/xml/test_element_content.rb +0 -56
  312. data/test/xml/test_element_decl.rb +0 -73
  313. data/test/xml/test_entity_decl.rb +0 -122
  314. data/test/xml/test_entity_reference.rb +0 -245
  315. data/test/xml/test_namespace.rb +0 -95
  316. data/test/xml/test_node.rb +0 -1137
  317. data/test/xml/test_node_attributes.rb +0 -96
  318. data/test/xml/test_node_encoding.rb +0 -107
  319. data/test/xml/test_node_inheritance.rb +0 -32
  320. data/test/xml/test_node_reparenting.rb +0 -374
  321. data/test/xml/test_node_set.rb +0 -755
  322. data/test/xml/test_parse_options.rb +0 -64
  323. data/test/xml/test_processing_instruction.rb +0 -30
  324. data/test/xml/test_reader_encoding.rb +0 -142
  325. data/test/xml/test_relax_ng.rb +0 -60
  326. data/test/xml/test_schema.rb +0 -103
  327. data/test/xml/test_syntax_error.rb +0 -12
  328. data/test/xml/test_text.rb +0 -45
  329. data/test/xml/test_unparented_node.rb +0 -422
  330. data/test/xml/test_xinclude.rb +0 -83
  331. data/test/xml/test_xpath.rb +0 -295
  332. data/test/xslt/test_custom_functions.rb +0 -133
  333. data/test/xslt/test_exception_handling.rb +0 -37
  334. data/test_all +0 -81
@@ -0,0 +1,41 @@
1
+ libgumbo
2
+ ========
3
+
4
+ This is an internal fork of the [libgumbo] library, which was copied and
5
+ later modified under the terms of the Apache 2.0 [license]. See `lua-gumbo`
6
+ commit [`0a04728`] for details of the original import.
7
+
8
+ Since importing the code, the following notable fixes and improvements
9
+ have been made:
10
+
11
+ * `91cef89`: Re-implement `adjust_foreign_attributes()` with a gperf hash
12
+ * `b11abe7`: Pass `TagSet` arrays into functions by reference instead of value
13
+ * `b73dc03`: Simplify `maybe_replace_codepoint()` function
14
+ * `d5d0bb3`: Remove special handling of `<menuitem>` tag
15
+ * `7bd5162`: Remove special handling of `<isindex>` tag
16
+ * `a5c1b0e`: Use `realloc(3)` instead of `malloc(3)` in `enlarge_vector_if_full()`
17
+ * `dcbebd7`: Use `realloc(3)` instead of `malloc(3)` in `maybe_resize_string_buffer()`
18
+ * `df15262`: Make `destroy_node()` function non-recursive
19
+ * `2df37f5`: Fix signedness of some format specifiers
20
+ * `176553e`: Add maximum element nesting limit
21
+ * `bed0f4a`: Annotate `gumbo_debug()` with `PRINTF` macro and fix warnings
22
+ * `7ffc218`: Annotate `print_message()` with `PRINTF` macro and fix warnings
23
+ * `1bd8ab5`, `9136507`, `53a1f9a`: Deduplicate some identical `TagSet` arrays
24
+ * `a7a9065`: Add some GCC/Clang function attributes
25
+ * `8d3d4e4`: Remove custom allocator support
26
+ * `8d3b006`: Fix recording of source positions for `</form>` end tags
27
+ * `1a8d763`: Replace linear search in `maybe_replace_codepoint()` with a lookup table
28
+ * `6dca79e`: Replace `strcasecmp()` and `strncasecmp()` with ascii-only equivalents
29
+ * `17ab1d2`: Fix `TAGSET_INCLUDES` macro to work properly with multiple bit flags
30
+ * `7e56d45`: Re-implement `gumbo_normalize_svg_tagname()` with a gperf hash
31
+ * `a518d35`: Replace linear array search in `adjust_svg_attributes()` with a gperf hash
32
+ * `a4a7433`: Fix duplicate `TagSet` initializer being ignored in `is_special_node()`
33
+ * `8137fcd`: Add support for `<dialog>` tag
34
+ * `4b35471`: Add missing `static` qualifiers to hide symbols that shouldn't be extern
35
+ * `df57c59`, `03101f3`, `ea62330`: Replace use of locale-dependant `ctype.h` functions
36
+ with custom, ASCII-only equivalents
37
+
38
+
39
+ [libgumbo]: https://github.com/google/gumbo-parser/tree/aa91b27b02c0c80c482e24348a457ed7c3c088e0/src
40
+ [license]: https://github.com/google/gumbo-parser/blob/aa91b27b02c0c80c482e24348a457ed7c3c088e0/COPYING
41
+ [`0a04728`]: https://gitlab.com/craigbarnes/lua-gumbo/commit/0a047282815af86f3367a7d95fefcfe5723ece48
@@ -0,0 +1,75 @@
1
+ #include "ascii.h"
2
+
3
+ int gumbo_ascii_strcasecmp(const char *s1, const char *s2) {
4
+ int c1, c2;
5
+ while (*s1 && *s2) {
6
+ c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
7
+ c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
8
+ if (c1 != c2) {
9
+ return (c1 - c2);
10
+ }
11
+ s1++;
12
+ s2++;
13
+ }
14
+ return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
15
+ }
16
+
17
+ int gumbo_ascii_strncasecmp(const char *s1, const char *s2, size_t n) {
18
+ int c1, c2;
19
+ while (n && *s1 && *s2) {
20
+ n -= 1;
21
+ c1 = (int)(unsigned char) gumbo_ascii_tolower(*s1);
22
+ c2 = (int)(unsigned char) gumbo_ascii_tolower(*s2);
23
+ if (c1 != c2) {
24
+ return (c1 - c2);
25
+ }
26
+ s1++;
27
+ s2++;
28
+ }
29
+ if (n) {
30
+ return (((int)(unsigned char) *s1) - ((int)(unsigned char) *s2));
31
+ }
32
+ return 0;
33
+ }
34
+
35
+ const unsigned char _gumbo_ascii_table[0x80] = {
36
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x03,0x03,0x01,0x03,0x03,0x01,0x01,
37
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
38
+ 0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
39
+ 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00,
40
+ 0x00,0x28,0x28,0x28,0x28,0x28,0x28,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
41
+ 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x00,0x00,0x00,0x00,0x00,
42
+ 0x00,0x50,0x50,0x50,0x50,0x50,0x50,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
43
+ 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x00,0x00,0x00,0x00,0x00,
44
+ };
45
+
46
+ // Table generation code.
47
+ // clang -DGUMBO_GEN_TABLE=1 ascii.c && ./a.out && rm a.out
48
+ #if GUMBO_GEN_TABLE
49
+ #include <stdio.h>
50
+
51
+ int main() {
52
+ printf("const unsigned char _gumbo_ascii_table[0x80] = {");
53
+ for (int c = 0; c < 0x80; ++c) {
54
+ unsigned int x = 0;
55
+ // https://infra.spec.whatwg.org/#ascii-code-point
56
+ if (c <= 0x1f)
57
+ x |= GUMBO_ASCII_CNTRL;
58
+ if (c == 0x09 || c == 0x0a || c == 0x0c || c == 0x0d || c == 0x20)
59
+ x |= GUMBO_ASCII_SPACE;
60
+ if (c >= 0x30 && c <= 0x39)
61
+ x |= GUMBO_ASCII_DIGIT;
62
+ if ((c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x46))
63
+ x |= GUMBO_ASCII_UPPER_XDIGIT;
64
+ if ((c >= 0x30 && c <= 0x39) || (c >= 0x61 && c <= 0x66))
65
+ x |= GUMBO_ASCII_LOWER_XDIGIT;
66
+ if (c >= 0x41 && c <= 0x5a)
67
+ x |= GUMBO_ASCII_UPPER_ALPHA;
68
+ if (c >= 0x61 && c <= 0x7a)
69
+ x |= GUMBO_ASCII_LOWER_ALPHA;
70
+ printf("%s0x%02x,", (c % 16 == 0? "\n " : ""), x);
71
+ }
72
+ printf("\n};\n");
73
+ return 0;
74
+ }
75
+ #endif
@@ -0,0 +1,115 @@
1
+ #ifndef GUMBO_ASCII_H_
2
+ #define GUMBO_ASCII_H_
3
+
4
+ #include <stddef.h>
5
+ #include "macros.h"
6
+
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ PURE NONNULL_ARGS
12
+ int gumbo_ascii_strcasecmp(const char *s1, const char *s2);
13
+
14
+ PURE NONNULL_ARGS
15
+ int gumbo_ascii_strncasecmp(const char *s1, const char *s2, size_t n);
16
+
17
+ // If these values change, then _gumbo_ascii_table needs to be regenerated.
18
+ #define GUMBO_ASCII_CNTRL 1
19
+ #define GUMBO_ASCII_SPACE 2
20
+ #define GUMBO_ASCII_DIGIT 4
21
+ #define GUMBO_ASCII_UPPER_XDIGIT 8
22
+ #define GUMBO_ASCII_LOWER_XDIGIT 16
23
+ #define GUMBO_ASCII_UPPER_ALPHA 32
24
+ #define GUMBO_ASCII_LOWER_ALPHA 64
25
+ #define GUMBO_ASCII_XDIGIT (GUMBO_ASCII_LOWER_XDIGIT | GUMBO_ASCII_UPPER_XDIGIT)
26
+ #define GUMBO_ASCII_ALPHA (GUMBO_ASCII_UPPER_ALPHA | GUMBO_ASCII_LOWER_ALPHA)
27
+ #define GUMBO_ASCII_ALNUM (GUMBO_ASCII_DIGIT | GUMBO_ASCII_ALPHA)
28
+
29
+ extern const unsigned char _gumbo_ascii_table[0x80];
30
+
31
+ CONST_FN
32
+ static inline int gumbo_ascii_isascii(int c) {
33
+ return ((unsigned int)c & ~0x7fu) == 0;
34
+ }
35
+
36
+ // 0x00 -- 0x1F (A C0 control)
37
+ CONST_FN
38
+ static inline int gumbo_ascii_iscntrl(int c) {
39
+ return gumbo_ascii_isascii(c)
40
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_CNTRL);
41
+ }
42
+
43
+ // 0x09, 0x0a, 0x0c, 0x0d, 0x20
44
+ CONST_FN
45
+ static inline int gumbo_ascii_isspace(int c) {
46
+ return gumbo_ascii_isascii(c)
47
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_SPACE);
48
+ }
49
+
50
+ CONST_FN
51
+ static inline int gumbo_ascii_istab_or_newline(int c) {
52
+ return c == 0x09 || c == 0x0a || c == 0x0d;
53
+ }
54
+
55
+
56
+ CONST_FN
57
+ static inline int gumbo_ascii_isdigit(int c) {
58
+ return c >= 0x30 && c <= 0x39;
59
+ }
60
+
61
+ CONST_FN
62
+ static inline int gumbo_ascii_isalpha(int c) {
63
+ return gumbo_ascii_isascii(c)
64
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_ALPHA);
65
+ }
66
+
67
+ CONST_FN
68
+ static inline int gumbo_ascii_isxdigit(int c) {
69
+ return gumbo_ascii_isascii(c)
70
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_XDIGIT);
71
+ }
72
+
73
+ CONST_FN
74
+ static inline int gumbo_ascii_isupper_xdigit(int c) {
75
+ return gumbo_ascii_isascii(c)
76
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_UPPER_XDIGIT);
77
+ }
78
+
79
+ CONST_FN
80
+ static inline int gumbo_ascii_islower_xdigit(int c) {
81
+ return gumbo_ascii_isascii(c)
82
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_LOWER_XDIGIT);
83
+ }
84
+
85
+ CONST_FN
86
+ static inline int gumbo_ascii_isupper(int c) {
87
+ return ((unsigned)(c) - 'A') < 26;
88
+ }
89
+
90
+ CONST_FN
91
+ static inline int gumbo_ascii_islower(int c) {
92
+ return gumbo_ascii_isascii(c)
93
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_LOWER_ALPHA);
94
+ }
95
+
96
+ CONST_FN
97
+ static inline int gumbo_ascii_isalnum(int c) {
98
+ return gumbo_ascii_isascii(c)
99
+ && (_gumbo_ascii_table[c] & GUMBO_ASCII_ALNUM);
100
+ }
101
+
102
+
103
+ CONST_FN
104
+ static inline int gumbo_ascii_tolower(int c) {
105
+ if (gumbo_ascii_isupper(c)) {
106
+ return c | 32;
107
+ }
108
+ return c;
109
+ }
110
+
111
+ #ifdef __cplusplus
112
+ }
113
+ #endif
114
+
115
+ #endif // GUMBO_ASCII_H_
@@ -0,0 +1,42 @@
1
+ /*
2
+ Copyright 2018 Craig Barnes.
3
+ Copyright 2010 Google Inc.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ https://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ */
17
+
18
+ #include <assert.h>
19
+ #include <stdlib.h>
20
+ #include <string.h>
21
+ #include "attribute.h"
22
+ #include "ascii.h"
23
+ #include "util.h"
24
+
25
+ GumboAttribute* gumbo_get_attribute (
26
+ const GumboVector* attributes,
27
+ const char* name
28
+ ) {
29
+ for (unsigned int i = 0; i < attributes->length; ++i) {
30
+ GumboAttribute* attr = attributes->data[i];
31
+ if (!gumbo_ascii_strcasecmp(attr->name, name)) {
32
+ return attr;
33
+ }
34
+ }
35
+ return NULL;
36
+ }
37
+
38
+ void gumbo_destroy_attribute(GumboAttribute* attribute) {
39
+ gumbo_free((void*) attribute->name);
40
+ gumbo_free((void*) attribute->value);
41
+ gumbo_free((void*) attribute);
42
+ }
@@ -0,0 +1,17 @@
1
+ #ifndef GUMBO_ATTRIBUTE_H_
2
+ #define GUMBO_ATTRIBUTE_H_
3
+
4
+ #include "gumbo.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ // Release the memory used for a GumboAttribute, including the attribute itself
11
+ void gumbo_destroy_attribute(GumboAttribute* attribute);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
17
+ #endif // GUMBO_ATTRIBUTE_H_