makiri 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (641) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +18 -7
  3. data/CHANGELOG.md +148 -5
  4. data/README.md +173 -7
  5. data/Rakefile +103 -7
  6. data/ext/makiri/bridge/bridge.h +28 -0
  7. data/ext/makiri/bridge/ruby_string.c +217 -0
  8. data/ext/makiri/core/mkr_alloc.h +1 -1
  9. data/ext/makiri/core/mkr_buf.c +35 -1
  10. data/ext/makiri/core/mkr_buf.h +37 -3
  11. data/ext/makiri/core/mkr_core.h +1 -1
  12. data/ext/makiri/core/mkr_hash.h +1 -1
  13. data/ext/makiri/core/mkr_text.h +8 -8
  14. data/ext/makiri/extconf.rb +20 -2
  15. data/ext/makiri/glue/glue.h +53 -11
  16. data/ext/makiri/glue/ruby_doc.c +165 -35
  17. data/ext/makiri/glue/ruby_html_css.c +246 -0
  18. data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +271 -43
  19. data/ext/makiri/glue/ruby_html_node.c +888 -0
  20. data/ext/makiri/glue/ruby_html_serialize.c +154 -0
  21. data/ext/makiri/glue/ruby_node.c +54 -555
  22. data/ext/makiri/glue/ruby_node_set.c +167 -32
  23. data/ext/makiri/glue/ruby_xml.c +420 -0
  24. data/ext/makiri/glue/ruby_xml_node.c +1386 -0
  25. data/ext/makiri/glue/ruby_xpath.c +60 -27
  26. data/ext/makiri/glue/ruby_xpath.h +19 -0
  27. data/ext/makiri/lexbor_compat/compat.h +42 -9
  28. data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
  29. data/ext/makiri/lexbor_compat/dom_index.c +2 -2
  30. data/ext/makiri/lexbor_compat/post_parse.c +100 -10
  31. data/ext/makiri/lexbor_compat/source_loc.c +13 -9
  32. data/ext/makiri/lexbor_compat/text_index.c +14 -8
  33. data/ext/makiri/lexbor_compat/utf8_input.c +85 -26
  34. data/ext/makiri/makiri.c +139 -6
  35. data/ext/makiri/makiri.h +43 -2
  36. data/ext/makiri/xml/mkr_xml.h +126 -0
  37. data/ext/makiri/xml/mkr_xml_chars.c +225 -0
  38. data/ext/makiri/xml/mkr_xml_mutate.c +875 -0
  39. data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
  40. data/ext/makiri/xml/mkr_xml_node.c +267 -0
  41. data/ext/makiri/xml/mkr_xml_node.h +119 -0
  42. data/ext/makiri/xml/mkr_xml_tree.c +1479 -0
  43. data/ext/makiri/xpath/mkr_xpath.c +59 -32
  44. data/ext/makiri/xpath/mkr_xpath.h +96 -4
  45. data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
  46. data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
  47. data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +202 -175
  48. data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +110 -86
  49. data/ext/makiri/xpath/mkr_xpath_internal.h +91 -200
  50. data/ext/makiri/xpath/mkr_xpath_lex.c +2 -2
  51. data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
  52. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +142 -0
  53. data/ext/makiri/xpath/mkr_xpath_parse.c +5 -5
  54. data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
  55. data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
  56. data/ext/makiri/xpath/mkr_xpath_shared.c +593 -0
  57. data/ext/makiri/xpath/{mkr_xpath_value.c → mkr_xpath_value_body.h} +145 -656
  58. data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
  59. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  60. data/lib/makiri/cdata_section.rb +21 -0
  61. data/lib/makiri/comment.rb +12 -0
  62. data/lib/makiri/compat_aliases.rb +30 -0
  63. data/lib/makiri/document.rb +4 -76
  64. data/lib/makiri/document_fragment.rb +14 -9
  65. data/lib/makiri/element.rb +5 -3
  66. data/lib/makiri/html/document.rb +106 -0
  67. data/lib/makiri/html/node_methods.rb +19 -0
  68. data/lib/makiri/html.rb +12 -0
  69. data/lib/makiri/node.rb +58 -15
  70. data/lib/makiri/node_set.rb +8 -0
  71. data/lib/makiri/processing_instruction.rb +12 -0
  72. data/lib/makiri/text.rb +2 -0
  73. data/lib/makiri/version.rb +1 -1
  74. data/lib/makiri/xml/document.rb +24 -0
  75. data/lib/makiri/xml/node_methods.rb +37 -0
  76. data/lib/makiri/xml.rb +10 -0
  77. data/lib/makiri/xpath_context.rb +1 -1
  78. data/lib/makiri.rb +23 -5
  79. data/script/build_native_gem.rb +2 -2
  80. data/script/check_c_safety.rb +32 -0
  81. data/script/check_c_safety_allowlist.yml +83 -0
  82. metadata +35 -565
  83. data/ext/makiri/glue/ruby_css.c +0 -185
  84. data/ext/makiri/glue/ruby_serialize.c +0 -92
  85. data/lib/makiri/cdata.rb +0 -6
  86. data/vendor/lexbor/.github/FUNDING.yml +0 -12
  87. data/vendor/lexbor/.github/workflows/cmake.yml +0 -37
  88. data/vendor/lexbor/benchmarks/CMakeLists.txt +0 -22
  89. data/vendor/lexbor/benchmarks/benchmark.h +0 -101
  90. data/vendor/lexbor/benchmarks/lexbor/html/CMakeLists.txt +0 -16
  91. data/vendor/lexbor/benchmarks/lexbor/html/tokenizer/input_validation.c +0 -100
  92. data/vendor/lexbor/benchmarks/lexbor/html/tokenizer/parse.c +0 -95
  93. data/vendor/lexbor/benchmarks/lexbor/selectors/CMakeLists.txt +0 -16
  94. data/vendor/lexbor/benchmarks/lexbor/selectors/files/average.html +0 -41
  95. data/vendor/lexbor/benchmarks/lexbor/selectors/selectors.c +0 -144
  96. data/vendor/lexbor/examples/CMakeLists.txt +0 -17
  97. data/vendor/lexbor/examples/lexbor/css/CMakeLists.txt +0 -25
  98. data/vendor/lexbor/examples/lexbor/css/StyleSheet.c +0 -70
  99. data/vendor/lexbor/examples/lexbor/css/base.h +0 -34
  100. data/vendor/lexbor/examples/lexbor/css/selectors/list_easy_way.c +0 -74
  101. data/vendor/lexbor/examples/lexbor/css/selectors/list_fast_way.c +0 -149
  102. data/vendor/lexbor/examples/lexbor/css/syntax/structure_parse_file.c +0 -467
  103. data/vendor/lexbor/examples/lexbor/css/syntax/tokenizer/from_file.c +0 -87
  104. data/vendor/lexbor/examples/lexbor/css/syntax/tokenizer/print_raw.c +0 -100
  105. data/vendor/lexbor/examples/lexbor/encoding/CMakeLists.txt +0 -11
  106. data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/decode.c +0 -58
  107. data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/decoder.c +0 -140
  108. data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/validate.c +0 -65
  109. data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/encode.c +0 -67
  110. data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/encoder.c +0 -262
  111. data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/validate.c +0 -77
  112. data/vendor/lexbor/examples/lexbor/encoding/buffer/from_to.c +0 -193
  113. data/vendor/lexbor/examples/lexbor/encoding/data_by_name.c +0 -23
  114. data/vendor/lexbor/examples/lexbor/encoding/single/decode/decode.c +0 -55
  115. data/vendor/lexbor/examples/lexbor/encoding/single/decode/decoder.c +0 -115
  116. data/vendor/lexbor/examples/lexbor/encoding/single/decode/validate.c +0 -59
  117. data/vendor/lexbor/examples/lexbor/encoding/single/encode/encode.c +0 -65
  118. data/vendor/lexbor/examples/lexbor/encoding/single/encode/encoder.c +0 -241
  119. data/vendor/lexbor/examples/lexbor/encoding/single/encode/validate.c +0 -85
  120. data/vendor/lexbor/examples/lexbor/encoding/single/from_to.c +0 -156
  121. data/vendor/lexbor/examples/lexbor/html/CMakeLists.txt +0 -21
  122. data/vendor/lexbor/examples/lexbor/html/base.h +0 -98
  123. data/vendor/lexbor/examples/lexbor/html/document_parse.c +0 -43
  124. data/vendor/lexbor/examples/lexbor/html/document_parse_chunk.c +0 -72
  125. data/vendor/lexbor/examples/lexbor/html/document_title.c +0 -84
  126. data/vendor/lexbor/examples/lexbor/html/element_attributes.c +0 -134
  127. data/vendor/lexbor/examples/lexbor/html/element_create.c +0 -84
  128. data/vendor/lexbor/examples/lexbor/html/element_innerHTML.c +0 -52
  129. data/vendor/lexbor/examples/lexbor/html/elements_by_attr.c +0 -106
  130. data/vendor/lexbor/examples/lexbor/html/elements_by_class_name.c +0 -55
  131. data/vendor/lexbor/examples/lexbor/html/elements_by_tag_name.c +0 -51
  132. data/vendor/lexbor/examples/lexbor/html/encoding.c +0 -95
  133. data/vendor/lexbor/examples/lexbor/html/html2sexpr.c +0 -231
  134. data/vendor/lexbor/examples/lexbor/html/parse.c +0 -69
  135. data/vendor/lexbor/examples/lexbor/html/parse_chunk.c +0 -77
  136. data/vendor/lexbor/examples/lexbor/html/tokenizer/callback.c +0 -78
  137. data/vendor/lexbor/examples/lexbor/html/tokenizer/simple.c +0 -118
  138. data/vendor/lexbor/examples/lexbor/html/tokenizer/tag_attributes.c +0 -106
  139. data/vendor/lexbor/examples/lexbor/html/tokenizer/text.c +0 -75
  140. data/vendor/lexbor/examples/lexbor/punycode/CMakeLists.txt +0 -11
  141. data/vendor/lexbor/examples/lexbor/punycode/decode.c +0 -102
  142. data/vendor/lexbor/examples/lexbor/punycode/encode.c +0 -102
  143. data/vendor/lexbor/examples/lexbor/selectors/CMakeLists.txt +0 -15
  144. data/vendor/lexbor/examples/lexbor/selectors/easy_way.c +0 -120
  145. data/vendor/lexbor/examples/lexbor/selectors/normal_way.c +0 -172
  146. data/vendor/lexbor/examples/lexbor/selectors/unique_nodes.c +0 -142
  147. data/vendor/lexbor/examples/lexbor/styles/CMakeLists.txt +0 -15
  148. data/vendor/lexbor/examples/lexbor/styles/attribute_style.c +0 -110
  149. data/vendor/lexbor/examples/lexbor/styles/base.h +0 -34
  150. data/vendor/lexbor/examples/lexbor/styles/events_insert.c +0 -199
  151. data/vendor/lexbor/examples/lexbor/styles/stylesheet.c +0 -141
  152. data/vendor/lexbor/examples/lexbor/styles/walk.c +0 -170
  153. data/vendor/lexbor/examples/lexbor/unicode/CMakeLists.txt +0 -17
  154. data/vendor/lexbor/examples/lexbor/unicode/idna_to_ascii.c +0 -115
  155. data/vendor/lexbor/examples/lexbor/unicode/normalization_form.c +0 -99
  156. data/vendor/lexbor/examples/lexbor/unicode/normalization_form_stdin.c +0 -99
  157. data/vendor/lexbor/examples/lexbor/url/CMakeLists.txt +0 -15
  158. data/vendor/lexbor/examples/lexbor/url/parse.c +0 -101
  159. data/vendor/lexbor/examples/lexbor/url/relative.c +0 -112
  160. data/vendor/lexbor/images/SerpApi-logo.png +0 -0
  161. data/vendor/lexbor/images/neural-logo.png +0 -0
  162. data/vendor/lexbor/packaging/Makefile +0 -26
  163. data/vendor/lexbor/packaging/README.md +0 -17
  164. data/vendor/lexbor/packaging/deb/Makefile.in +0 -40
  165. data/vendor/lexbor/packaging/deb/Makefile.module.in +0 -15
  166. data/vendor/lexbor/packaging/deb/debian_in/changelog +0 -6
  167. data/vendor/lexbor/packaging/deb/debian_in/control +0 -25
  168. data/vendor/lexbor/packaging/deb/debian_in/copyright +0 -29
  169. data/vendor/lexbor/packaging/deb/debian_in/dev.dirs +0 -2
  170. data/vendor/lexbor/packaging/deb/debian_in/dev.install +0 -3
  171. data/vendor/lexbor/packaging/deb/debian_in/dirs +0 -1
  172. data/vendor/lexbor/packaging/deb/debian_in/docs +0 -2
  173. data/vendor/lexbor/packaging/deb/debian_in/install +0 -1
  174. data/vendor/lexbor/packaging/deb/debian_in/not-installed +0 -4
  175. data/vendor/lexbor/packaging/deb/debian_in/rules +0 -15
  176. data/vendor/lexbor/packaging/deb/debian_in/source/format +0 -1
  177. data/vendor/lexbor/packaging/deb/debian_main_in/changelog +0 -6
  178. data/vendor/lexbor/packaging/deb/debian_main_in/control +0 -33
  179. data/vendor/lexbor/packaging/deb/debian_main_in/copyright +0 -29
  180. data/vendor/lexbor/packaging/deb/debian_main_in/dev.dirs +0 -3
  181. data/vendor/lexbor/packaging/deb/debian_main_in/dev.install +0 -5
  182. data/vendor/lexbor/packaging/deb/debian_main_in/dirs +0 -1
  183. data/vendor/lexbor/packaging/deb/debian_main_in/docs +0 -2
  184. data/vendor/lexbor/packaging/deb/debian_main_in/install +0 -1
  185. data/vendor/lexbor/packaging/deb/debian_main_in/rules +0 -15
  186. data/vendor/lexbor/packaging/deb/debian_main_in/source/format +0 -1
  187. data/vendor/lexbor/packaging/rpm/Makefile +0 -14
  188. data/vendor/lexbor/packaging/rpm/build.sh +0 -105
  189. data/vendor/lexbor/packaging/rpm/liblexbor-module.spec.in +0 -31
  190. data/vendor/lexbor/packaging/rpm/liblexbor.spec.in +0 -62
  191. data/vendor/lexbor/test/CMakeLists.txt +0 -44
  192. data/vendor/lexbor/test/amalgamation/code/_base.h +0 -33
  193. data/vendor/lexbor/test/amalgamation/code/html.c +0 -35
  194. data/vendor/lexbor/test/amalgamation/generate_and_compile.sh +0 -130
  195. data/vendor/lexbor/test/external/commoncrawl.py +0 -110
  196. data/vendor/lexbor/test/files/lexbor/css/declarations/display.ton +0 -801
  197. data/vendor/lexbor/test/files/lexbor/css/declarations/height.ton +0 -367
  198. data/vendor/lexbor/test/files/lexbor/css/declarations/syntax.ton +0 -189
  199. data/vendor/lexbor/test/files/lexbor/css/declarations/width.ton +0 -367
  200. data/vendor/lexbor/test/files/lexbor/css/lexbor.css +0 -205
  201. data/vendor/lexbor/test/files/lexbor/css/syntax/parser/at.ton +0 -518
  202. data/vendor/lexbor/test/files/lexbor/css/syntax/parser/other.ton +0 -80
  203. data/vendor/lexbor/test/files/lexbor/css/syntax/parser/qualified.ton +0 -799
  204. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/CDO-CDC.ton +0 -226
  205. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/at.ton +0 -170
  206. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/broken-utf-8.ton +0 -101
  207. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/comment.ton +0 -95
  208. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/hash.ton +0 -181
  209. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/ident.ton +0 -245
  210. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/number.ton +0 -694
  211. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/other.ton +0 -16
  212. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/reverse-solidus.ton +0 -111
  213. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/single-tokens.ton +0 -66
  214. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/string.ton +0 -303
  215. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/unicode_range.ton +0 -139
  216. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/url-function.ton +0 -229
  217. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/whitespace.ton +0 -45
  218. data/vendor/lexbor/test/files/lexbor/encoding/big5_map_decode.txt +0 -14699
  219. data/vendor/lexbor/test/files/lexbor/encoding/euc_jp_map_decode.txt +0 -7737
  220. data/vendor/lexbor/test/files/lexbor/encoding/euc_kr_map_decode.txt +0 -17189
  221. data/vendor/lexbor/test/files/lexbor/encoding/gb18030_map_decode.txt +0 -27672
  222. data/vendor/lexbor/test/files/lexbor/encoding/iso_2022_jp_map_decode.txt +0 -7928
  223. data/vendor/lexbor/test/files/lexbor/encoding/shift_jis_map_decode.txt +0 -5138
  224. data/vendor/lexbor/test/files/lexbor/html/html5_test/README.md +0 -12
  225. data/vendor/lexbor/test/files/lexbor/html/html5_test/adoption01.ton +0 -442
  226. data/vendor/lexbor/test/files/lexbor/html/html5_test/adoption02.ton +0 -53
  227. data/vendor/lexbor/test/files/lexbor/html/html5_test/attributes.ton +0 -29
  228. data/vendor/lexbor/test/files/lexbor/html/html5_test/blocks.ton +0 -891
  229. data/vendor/lexbor/test/files/lexbor/html/html5_test/char_ref.ton +0 -51
  230. data/vendor/lexbor/test/files/lexbor/html/html5_test/comments01.ton +0 -290
  231. data/vendor/lexbor/test/files/lexbor/html/html5_test/doctype01.ton +0 -637
  232. data/vendor/lexbor/test/files/lexbor/html/html5_test/domjs-unsafe.ton +0 -822
  233. data/vendor/lexbor/test/files/lexbor/html/html5_test/entities01.ton +0 -1262
  234. data/vendor/lexbor/test/files/lexbor/html/html5_test/entities02.ton +0 -416
  235. data/vendor/lexbor/test/files/lexbor/html/html5_test/foreign-fragment.ton +0 -859
  236. data/vendor/lexbor/test/files/lexbor/html/html5_test/html5test-com.ton +0 -414
  237. data/vendor/lexbor/test/files/lexbor/html/html5_test/inbody01.ton +0 -78
  238. data/vendor/lexbor/test/files/lexbor/html/html5_test/isindex.ton +0 -67
  239. data/vendor/lexbor/test/files/lexbor/html/html5_test/main-element.ton +0 -63
  240. data/vendor/lexbor/test/files/lexbor/html/html5_test/math.ton +0 -140
  241. data/vendor/lexbor/test/files/lexbor/html/html5_test/menuitem-element.ton +0 -345
  242. data/vendor/lexbor/test/files/lexbor/html/html5_test/namespace-sensitivity.ton +0 -31
  243. data/vendor/lexbor/test/files/lexbor/html/html5_test/noscript01.ton +0 -344
  244. data/vendor/lexbor/test/files/lexbor/html/html5_test/pending-spec-changes-plain-text-unsafe.ton +0 -39
  245. data/vendor/lexbor/test/files/lexbor/html/html5_test/pending-spec-changes.ton +0 -65
  246. data/vendor/lexbor/test/files/lexbor/html/html5_test/plain-text-unsafe.ton +0 -657
  247. data/vendor/lexbor/test/files/lexbor/html/html5_test/quirks01.ton +0 -77
  248. data/vendor/lexbor/test/files/lexbor/html/html5_test/ruby.ton +0 -411
  249. data/vendor/lexbor/test/files/lexbor/html/html5_test/scriptdata01.ton +0 -499
  250. data/vendor/lexbor/test/files/lexbor/html/html5_test/search-element.ton +0 -63
  251. data/vendor/lexbor/test/files/lexbor/html/html5_test/svg.ton +0 -140
  252. data/vendor/lexbor/test/files/lexbor/html/html5_test/tables01.ton +0 -421
  253. data/vendor/lexbor/test/files/lexbor/html/html5_test/template.ton +0 -2199
  254. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests1.ton +0 -2486
  255. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests10.ton +0 -1090
  256. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests11.ton +0 -317
  257. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests12.ton +0 -72
  258. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests14.ton +0 -100
  259. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests15.ton +0 -290
  260. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests16.ton +0 -3471
  261. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests17.ton +0 -244
  262. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests18.ton +0 -752
  263. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests19.ton +0 -1889
  264. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests2.ton +0 -1093
  265. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests20.ton +0 -1158
  266. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests21.ton +0 -416
  267. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests22.ton +0 -192
  268. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests23.ton +0 -148
  269. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests24.ton +0 -107
  270. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests25.ton +0 -390
  271. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests26.ton +0 -546
  272. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests3.ton +0 -407
  273. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests4.ton +0 -96
  274. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests5.ton +0 -299
  275. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests6.ton +0 -908
  276. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests7.ton +0 -597
  277. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests8.ton +0 -219
  278. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests9.ton +0 -585
  279. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests_innerHTML_1.ton +0 -1164
  280. data/vendor/lexbor/test/files/lexbor/html/html5_test/tricky01.ton +0 -378
  281. data/vendor/lexbor/test/files/lexbor/html/html5_test/webkit01.ton +0 -1022
  282. data/vendor/lexbor/test/files/lexbor/html/html5_test/webkit02.ton +0 -996
  283. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/README.md +0 -12
  284. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/test-yahoo-jp.dat +0 -10
  285. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/tests1.dat +0 -388
  286. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/tests2.dat +0 -115
  287. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/README.md +0 -12
  288. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/contentModelFlags.test +0 -93
  289. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/domjs.test +0 -335
  290. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/entities.test +0 -542
  291. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/escapeFlag.test +0 -36
  292. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/namedEntities.test +0 -42422
  293. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/numericEntities.test +0 -1677
  294. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/pendingSpecChanges.test +0 -9
  295. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test1.test +0 -353
  296. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test2.test +0 -275
  297. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test3.test +0 -11233
  298. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test4.test +0 -532
  299. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/unicodeChars.test +0 -1577
  300. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/unicodeCharsProblematic.test +0 -41
  301. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/xmlViolation.test +0 -20
  302. data/vendor/lexbor/test/files/lexbor/html/lexbor.html +0 -150
  303. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/attributes.ton +0 -167
  304. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/comment.ton +0 -218
  305. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/document_type.ton +0 -180
  306. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/element.ton +0 -392
  307. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/processing_instruction.ton +0 -45
  308. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/serialize_ext.ton +0 -277
  309. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/text.ton +0 -308
  310. data/vendor/lexbor/test/files/lexbor/html/tokenizer/char_ref.ton +0 -563
  311. data/vendor/lexbor/test/files/lexbor/html/tokenizer/comment.ton +0 -28
  312. data/vendor/lexbor/test/files/lexbor/html/tokenizer/doctype.ton +0 -257
  313. data/vendor/lexbor/test/files/lexbor/html/tokenizer/tag_attr.ton +0 -107
  314. data/vendor/lexbor/test/files/lexbor/html/tokenizer/tag_name.ton +0 -51
  315. data/vendor/lexbor/test/files/lexbor/url/changes.ton +0 -1005
  316. data/vendor/lexbor/test/files/lexbor/url/domain.ton +0 -93
  317. data/vendor/lexbor/test/files/lexbor/url/file.ton +0 -29
  318. data/vendor/lexbor/test/files/lexbor/url/fragment.ton +0 -47
  319. data/vendor/lexbor/test/files/lexbor/url/ipv4.ton +0 -221
  320. data/vendor/lexbor/test/files/lexbor/url/ipv6.ton +0 -197
  321. data/vendor/lexbor/test/files/lexbor/url/path.ton +0 -510
  322. data/vendor/lexbor/test/files/lexbor/url/query.ton +0 -135
  323. data/vendor/lexbor/test/files/lexbor/url/scheme.ton +0 -139
  324. data/vendor/lexbor/test/files/lexbor/url/slow_path.ton +0 -460
  325. data/vendor/lexbor/test/files/lexbor/url/url.ton +0 -78
  326. data/vendor/lexbor/test/files/lexbor/url/username_password.ton +0 -127
  327. data/vendor/lexbor/test/fuzzers/lexbor/css/CMakeLists.txt +0 -16
  328. data/vendor/lexbor/test/fuzzers/lexbor/css/css.dict +0 -307
  329. data/vendor/lexbor/test/fuzzers/lexbor/css/stylesheet.c +0 -55
  330. data/vendor/lexbor/test/fuzzers/lexbor/css/syntax/syntax.dict +0 -41
  331. data/vendor/lexbor/test/fuzzers/lexbor/css/syntax/tokenizer.c +0 -99
  332. data/vendor/lexbor/test/fuzzers/lexbor/encoding/CMakeLists.txt +0 -16
  333. data/vendor/lexbor/test/fuzzers/lexbor/encoding/decode.c +0 -29
  334. data/vendor/lexbor/test/fuzzers/lexbor/html/CMakeLists.txt +0 -16
  335. data/vendor/lexbor/test/fuzzers/lexbor/html/document_parse.c +0 -23
  336. data/vendor/lexbor/test/fuzzers/lexbor/punycode/CMakeLists.txt +0 -16
  337. data/vendor/lexbor/test/fuzzers/lexbor/punycode/base.c +0 -89
  338. data/vendor/lexbor/test/fuzzers/lexbor/selectors/CMakeLists.txt +0 -16
  339. data/vendor/lexbor/test/fuzzers/lexbor/selectors/find.c +0 -146
  340. data/vendor/lexbor/test/fuzzers/lexbor/selectors/selectors.dict +0 -71
  341. data/vendor/lexbor/test/fuzzers/lexbor/unicode/CMakeLists.txt +0 -16
  342. data/vendor/lexbor/test/fuzzers/lexbor/unicode/idna_to_ascii.c +0 -40
  343. data/vendor/lexbor/test/fuzzers/lexbor/unicode/normalization_forms.c +0 -41
  344. data/vendor/lexbor/test/fuzzers/lexbor/url/CMakeLists.txt +0 -16
  345. data/vendor/lexbor/test/fuzzers/lexbor/url/modify.c +0 -117
  346. data/vendor/lexbor/test/fuzzers/lexbor/url/parser.c +0 -132
  347. data/vendor/lexbor/test/fuzzers/lexbor/url/url.dict +0 -13
  348. data/vendor/lexbor/test/lexbor/core/CMakeLists.txt +0 -17
  349. data/vendor/lexbor/test/lexbor/core/array.c +0 -443
  350. data/vendor/lexbor/test/lexbor/core/array_obj.c +0 -306
  351. data/vendor/lexbor/test/lexbor/core/avl.c +0 -1588
  352. data/vendor/lexbor/test/lexbor/core/bst.c +0 -388
  353. data/vendor/lexbor/test/lexbor/core/bst_map.c +0 -209
  354. data/vendor/lexbor/test/lexbor/core/dobject.c +0 -322
  355. data/vendor/lexbor/test/lexbor/core/hash.c +0 -171
  356. data/vendor/lexbor/test/lexbor/core/in.c +0 -356
  357. data/vendor/lexbor/test/lexbor/core/mem.c +0 -332
  358. data/vendor/lexbor/test/lexbor/core/mraw.c +0 -612
  359. data/vendor/lexbor/test/lexbor/core/str.c +0 -433
  360. data/vendor/lexbor/test/lexbor/css/CMakeLists.txt +0 -25
  361. data/vendor/lexbor/test/lexbor/css/declarations.c +0 -571
  362. data/vendor/lexbor/test/lexbor/css/selectors/selectors.c +0 -894
  363. data/vendor/lexbor/test/lexbor/css/selectors/specificity.c +0 -177
  364. data/vendor/lexbor/test/lexbor/css/stylesheet.c +0 -196
  365. data/vendor/lexbor/test/lexbor/css/syntax/an_plus_b.c +0 -233
  366. data/vendor/lexbor/test/lexbor/css/syntax/parser.c +0 -1134
  367. data/vendor/lexbor/test/lexbor/css/syntax/style.c +0 -67
  368. data/vendor/lexbor/test/lexbor/css/syntax/tokenizer.c +0 -485
  369. data/vendor/lexbor/test/lexbor/css/syntax/tokenizer_queue.c +0 -92
  370. data/vendor/lexbor/test/lexbor/dom/CMakeLists.txt +0 -17
  371. data/vendor/lexbor/test/lexbor/dom/exception.c +0 -210
  372. data/vendor/lexbor/test/lexbor/dom/node.c +0 -441
  373. data/vendor/lexbor/test/lexbor/encoding/CMakeLists.txt +0 -42
  374. data/vendor/lexbor/test/lexbor/encoding/buffer/big5.c +0 -210
  375. data/vendor/lexbor/test/lexbor/encoding/buffer/encoding.h +0 -243
  376. data/vendor/lexbor/test/lexbor/encoding/buffer/euc_jp.c +0 -228
  377. data/vendor/lexbor/test/lexbor/encoding/buffer/euc_kr.c +0 -172
  378. data/vendor/lexbor/test/lexbor/encoding/buffer/gb18030.c +0 -297
  379. data/vendor/lexbor/test/lexbor/encoding/buffer/ibm866.c +0 -123
  380. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_2022_jp.c +0 -403
  381. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_10.c +0 -123
  382. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_13.c +0 -123
  383. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_14.c +0 -123
  384. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_15.c +0 -123
  385. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_16.c +0 -123
  386. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_2.c +0 -123
  387. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_3.c +0 -123
  388. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_4.c +0 -123
  389. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_5.c +0 -123
  390. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_6.c +0 -123
  391. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_7.c +0 -123
  392. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_8.c +0 -123
  393. data/vendor/lexbor/test/lexbor/encoding/buffer/koi8_r.c +0 -123
  394. data/vendor/lexbor/test/lexbor/encoding/buffer/koi8_u.c +0 -123
  395. data/vendor/lexbor/test/lexbor/encoding/buffer/macintosh.c +0 -123
  396. data/vendor/lexbor/test/lexbor/encoding/buffer/shift_jis.c +0 -230
  397. data/vendor/lexbor/test/lexbor/encoding/buffer/utf-16.c +0 -230
  398. data/vendor/lexbor/test/lexbor/encoding/buffer/utf-8.c +0 -282
  399. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1250.c +0 -123
  400. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1251.c +0 -123
  401. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1252.c +0 -123
  402. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1253.c +0 -123
  403. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1254.c +0 -123
  404. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1255.c +0 -123
  405. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1256.c +0 -123
  406. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1257.c +0 -123
  407. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1258.c +0 -123
  408. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_874.c +0 -123
  409. data/vendor/lexbor/test/lexbor/encoding/buffer/x_mac_cyrillic.c +0 -123
  410. data/vendor/lexbor/test/lexbor/encoding/encoding.c +0 -97
  411. data/vendor/lexbor/test/lexbor/encoding/parser.h +0 -225
  412. data/vendor/lexbor/test/lexbor/encoding/single/big5.c +0 -203
  413. data/vendor/lexbor/test/lexbor/encoding/single/encoding.h +0 -227
  414. data/vendor/lexbor/test/lexbor/encoding/single/euc_jp.c +0 -220
  415. data/vendor/lexbor/test/lexbor/encoding/single/euc_kr.c +0 -162
  416. data/vendor/lexbor/test/lexbor/encoding/single/gb18030.c +0 -277
  417. data/vendor/lexbor/test/lexbor/encoding/single/ibm866.c +0 -114
  418. data/vendor/lexbor/test/lexbor/encoding/single/iso_2022_jp.c +0 -342
  419. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_10.c +0 -114
  420. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_13.c +0 -114
  421. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_14.c +0 -114
  422. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_15.c +0 -114
  423. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_16.c +0 -114
  424. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_2.c +0 -114
  425. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_3.c +0 -114
  426. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_4.c +0 -114
  427. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_5.c +0 -114
  428. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_6.c +0 -114
  429. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_7.c +0 -114
  430. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_8.c +0 -114
  431. data/vendor/lexbor/test/lexbor/encoding/single/koi8_r.c +0 -114
  432. data/vendor/lexbor/test/lexbor/encoding/single/koi8_u.c +0 -114
  433. data/vendor/lexbor/test/lexbor/encoding/single/macintosh.c +0 -114
  434. data/vendor/lexbor/test/lexbor/encoding/single/shift_jis.c +0 -203
  435. data/vendor/lexbor/test/lexbor/encoding/single/utf-16.c +0 -216
  436. data/vendor/lexbor/test/lexbor/encoding/single/utf-8.c +0 -227
  437. data/vendor/lexbor/test/lexbor/encoding/single/windows_1250.c +0 -114
  438. data/vendor/lexbor/test/lexbor/encoding/single/windows_1251.c +0 -114
  439. data/vendor/lexbor/test/lexbor/encoding/single/windows_1252.c +0 -114
  440. data/vendor/lexbor/test/lexbor/encoding/single/windows_1253.c +0 -114
  441. data/vendor/lexbor/test/lexbor/encoding/single/windows_1254.c +0 -114
  442. data/vendor/lexbor/test/lexbor/encoding/single/windows_1255.c +0 -114
  443. data/vendor/lexbor/test/lexbor/encoding/single/windows_1256.c +0 -114
  444. data/vendor/lexbor/test/lexbor/encoding/single/windows_1257.c +0 -114
  445. data/vendor/lexbor/test/lexbor/encoding/single/windows_1258.c +0 -114
  446. data/vendor/lexbor/test/lexbor/encoding/single/windows_874.c +0 -114
  447. data/vendor/lexbor/test/lexbor/encoding/single/x_mac_cyrillic.c +0 -114
  448. data/vendor/lexbor/test/lexbor/html/CMakeLists.txt +0 -35
  449. data/vendor/lexbor/test/lexbor/html/attributes.c +0 -105
  450. data/vendor/lexbor/test/lexbor/html/build-cpp.cpp +0 -68
  451. data/vendor/lexbor/test/lexbor/html/clone.c +0 -356
  452. data/vendor/lexbor/test/lexbor/html/dom/document_type.c +0 -125
  453. data/vendor/lexbor/test/lexbor/html/element_by.c +0 -147
  454. data/vendor/lexbor/test/lexbor/html/encoding.c +0 -228
  455. data/vendor/lexbor/test/lexbor/html/encoding_html5lib_tests.c +0 -308
  456. data/vendor/lexbor/test/lexbor/html/encoding_prescan.c +0 -1686
  457. data/vendor/lexbor/test/lexbor/html/inner.c +0 -103
  458. data/vendor/lexbor/test/lexbor/html/other.c +0 -139
  459. data/vendor/lexbor/test/lexbor/html/parse.c +0 -380
  460. data/vendor/lexbor/test/lexbor/html/perf.c +0 -161
  461. data/vendor/lexbor/test/lexbor/html/serialize.c +0 -56
  462. data/vendor/lexbor/test/lexbor/html/serialize_ext.c +0 -461
  463. data/vendor/lexbor/test/lexbor/html/tags.c +0 -140
  464. data/vendor/lexbor/test/lexbor/html/tokenizer/errors.c +0 -34
  465. data/vendor/lexbor/test/lexbor/html/tokenizer/html5lib_tests.c +0 -1168
  466. data/vendor/lexbor/test/lexbor/html/tokenizer_helper.h +0 -403
  467. data/vendor/lexbor/test/lexbor/html/tokenizer_tokens.c +0 -754
  468. data/vendor/lexbor/test/lexbor/html/tree/errors.c +0 -34
  469. data/vendor/lexbor/test/lexbor/html/tree/open_elements.c +0 -99
  470. data/vendor/lexbor/test/lexbor/html/tree_builder.c +0 -536
  471. data/vendor/lexbor/test/lexbor/ns/CMakeLists.txt +0 -17
  472. data/vendor/lexbor/test/lexbor/ns/res.c +0 -55
  473. data/vendor/lexbor/test/lexbor/punycode/CMakeLists.txt +0 -17
  474. data/vendor/lexbor/test/lexbor/punycode/base.c +0 -240
  475. data/vendor/lexbor/test/lexbor/selectors/CMakeLists.txt +0 -17
  476. data/vendor/lexbor/test/lexbor/selectors/selectors.c +0 -911
  477. data/vendor/lexbor/test/lexbor/style/CMakeLists.txt +0 -17
  478. data/vendor/lexbor/test/lexbor/style/element_events.c +0 -291
  479. data/vendor/lexbor/test/lexbor/style/element_style_steps.c +0 -5035
  480. data/vendor/lexbor/test/lexbor/style/not_html_namespace.c +0 -87
  481. data/vendor/lexbor/test/lexbor/style/style_tag.c +0 -184
  482. data/vendor/lexbor/test/lexbor/style/stylesheet.c +0 -51
  483. data/vendor/lexbor/test/lexbor/style/wo_events.c +0 -351
  484. data/vendor/lexbor/test/lexbor/tag/CMakeLists.txt +0 -17
  485. data/vendor/lexbor/test/lexbor/tag/res.c +0 -440
  486. data/vendor/lexbor/test/lexbor/unicode/CMakeLists.txt +0 -17
  487. data/vendor/lexbor/test/lexbor/unicode/composition_test.c +0 -1095
  488. data/vendor/lexbor/test/lexbor/unicode/edges_normalization_forms.c +0 -220
  489. data/vendor/lexbor/test/lexbor/unicode/idna.c +0 -98
  490. data/vendor/lexbor/test/lexbor/unicode/idna_codepoints.c +0 -110
  491. data/vendor/lexbor/test/lexbor/unicode/idna_type.c +0 -31
  492. data/vendor/lexbor/test/lexbor/unicode/normalization_forms.c +0 -205
  493. data/vendor/lexbor/test/lexbor/unicode/normalization_forms_code_points.c +0 -214
  494. data/vendor/lexbor/test/lexbor/unicode/unicode_idna_test_res.h +0 -6423
  495. data/vendor/lexbor/test/lexbor/unicode/unicode_normalization_test_res.h +0 -120229
  496. data/vendor/lexbor/test/lexbor/url/CMakeLists.txt +0 -22
  497. data/vendor/lexbor/test/lexbor/url/errors.c +0 -41
  498. data/vendor/lexbor/test/lexbor/url/other.c +0 -134
  499. data/vendor/lexbor/test/lexbor/url/parser.c +0 -872
  500. data/vendor/lexbor/test/lexbor/url/search_params.c +0 -616
  501. data/vendor/lexbor/test/lexbor/url/validation.c +0 -185
  502. data/vendor/lexbor/test/unit/CMakeLists.txt +0 -49
  503. data/vendor/lexbor/test/unit/kv.c +0 -538
  504. data/vendor/lexbor/test/unit/kv.h +0 -301
  505. data/vendor/lexbor/test/unit/kv_rules.c +0 -609
  506. data/vendor/lexbor/test/unit/kv_state.c +0 -1470
  507. data/vendor/lexbor/test/unit/test.c +0 -131
  508. data/vendor/lexbor/test/unit/test.h +0 -410
  509. data/vendor/lexbor/utils/CMakeLists.txt +0 -11
  510. data/vendor/lexbor/utils/lexbor/css/grammar.txt +0 -263
  511. data/vendor/lexbor/utils/lexbor/css/names.py +0 -768
  512. data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +0 -234
  513. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +0 -21
  514. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +0 -26
  515. data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +0 -62
  516. data/vendor/lexbor/utils/lexbor/css/syntax/non_ascii.pl +0 -77
  517. data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +0 -55
  518. data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +0 -36
  519. data/vendor/lexbor/utils/lexbor/css/tmp/const.h +0 -24
  520. data/vendor/lexbor/utils/lexbor/css/tmp/res.h +0 -26
  521. data/vendor/lexbor/utils/lexbor/css/tmp/types.h +0 -21
  522. data/vendor/lexbor/utils/lexbor/css/tmp/value_const.h +0 -21
  523. data/vendor/lexbor/utils/lexbor/css/tmp/value_res.h +0 -25
  524. data/vendor/lexbor/utils/lexbor/dom/attr.py +0 -129
  525. data/vendor/lexbor/utils/lexbor/dom/tmp/const.h +0 -23
  526. data/vendor/lexbor/utils/lexbor/dom/tmp/res.h +0 -27
  527. data/vendor/lexbor/utils/lexbor/encoding/CMakeLists.txt +0 -32
  528. data/vendor/lexbor/utils/lexbor/encoding/big5_map_decode.c +0 -93
  529. data/vendor/lexbor/utils/lexbor/encoding/buffer-single-byte.py +0 -95
  530. data/vendor/lexbor/utils/lexbor/encoding/encodings.json +0 -456
  531. data/vendor/lexbor/utils/lexbor/encoding/euc_jp_map_decode.c +0 -83
  532. data/vendor/lexbor/utils/lexbor/encoding/euc_kr_map_decode.c +0 -89
  533. data/vendor/lexbor/utils/lexbor/encoding/gb18030_map_decode.c +0 -170
  534. data/vendor/lexbor/utils/lexbor/encoding/iso_2022_jp_map_decode.c +0 -120
  535. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-big5.txt +0 -18596
  536. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-euc-kr.txt +0 -17054
  537. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-gb18030.txt +0 -23946
  538. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-iso-2022-jp-katakana.txt +0 -69
  539. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-jis0208.txt +0 -7730
  540. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-jis0212.txt +0 -6073
  541. data/vendor/lexbor/utils/lexbor/encoding/multi-byte.pl +0 -424
  542. data/vendor/lexbor/utils/lexbor/encoding/range-byte.py +0 -118
  543. data/vendor/lexbor/utils/lexbor/encoding/ranges/index-gb18030-ranges.txt +0 -213
  544. data/vendor/lexbor/utils/lexbor/encoding/res.py +0 -231
  545. data/vendor/lexbor/utils/lexbor/encoding/shift_jis_map_decode.c +0 -102
  546. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-ibm866.txt +0 -134
  547. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-10.txt +0 -134
  548. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-13.txt +0 -134
  549. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-14.txt +0 -134
  550. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-15.txt +0 -134
  551. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-16.txt +0 -134
  552. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-2.txt +0 -134
  553. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-3.txt +0 -127
  554. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-4.txt +0 -134
  555. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-5.txt +0 -134
  556. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-6.txt +0 -89
  557. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-7.txt +0 -131
  558. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-8.txt +0 -98
  559. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-koi8-r.txt +0 -134
  560. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-koi8-u.txt +0 -134
  561. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-macintosh.txt +0 -134
  562. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1250.txt +0 -134
  563. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1251.txt +0 -134
  564. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1252.txt +0 -134
  565. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1253.txt +0 -131
  566. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1254.txt +0 -134
  567. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1255.txt +0 -124
  568. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1256.txt +0 -134
  569. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1257.txt +0 -132
  570. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1258.txt +0 -134
  571. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-874.txt +0 -126
  572. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-x-mac-cyrillic.txt +0 -134
  573. data/vendor/lexbor/utils/lexbor/encoding/single-byte.py +0 -179
  574. data/vendor/lexbor/utils/lexbor/encoding/tmp/buffer_single_byte_test.c +0 -123
  575. data/vendor/lexbor/utils/lexbor/encoding/tmp/const.h +0 -19
  576. data/vendor/lexbor/utils/lexbor/encoding/tmp/multi.c +0 -20
  577. data/vendor/lexbor/utils/lexbor/encoding/tmp/multi.h +0 -37
  578. data/vendor/lexbor/utils/lexbor/encoding/tmp/range.c +0 -17
  579. data/vendor/lexbor/utils/lexbor/encoding/tmp/range.h +0 -35
  580. data/vendor/lexbor/utils/lexbor/encoding/tmp/res.c +0 -22
  581. data/vendor/lexbor/utils/lexbor/encoding/tmp/res.h +0 -34
  582. data/vendor/lexbor/utils/lexbor/encoding/tmp/single.c +0 -20
  583. data/vendor/lexbor/utils/lexbor/encoding/tmp/single.h +0 -37
  584. data/vendor/lexbor/utils/lexbor/encoding/tmp/single_byte_test.c +0 -114
  585. data/vendor/lexbor/utils/lexbor/grammar/CMakeLists.txt +0 -63
  586. data/vendor/lexbor/utils/lexbor/grammar/base.h +0 -89
  587. data/vendor/lexbor/utils/lexbor/grammar/document.h +0 -34
  588. data/vendor/lexbor/utils/lexbor/grammar/grammar.c +0 -243
  589. data/vendor/lexbor/utils/lexbor/grammar/json.c +0 -368
  590. data/vendor/lexbor/utils/lexbor/grammar/json.h +0 -48
  591. data/vendor/lexbor/utils/lexbor/grammar/node.c +0 -653
  592. data/vendor/lexbor/utils/lexbor/grammar/node.h +0 -120
  593. data/vendor/lexbor/utils/lexbor/grammar/parser.c +0 -724
  594. data/vendor/lexbor/utils/lexbor/grammar/parser.h +0 -75
  595. data/vendor/lexbor/utils/lexbor/grammar/test.c +0 -1762
  596. data/vendor/lexbor/utils/lexbor/grammar/test.h +0 -35
  597. data/vendor/lexbor/utils/lexbor/grammar/token.c +0 -258
  598. data/vendor/lexbor/utils/lexbor/grammar/token.h +0 -91
  599. data/vendor/lexbor/utils/lexbor/grammar/tokenizer.c +0 -706
  600. data/vendor/lexbor/utils/lexbor/grammar/tokenizer.h +0 -73
  601. data/vendor/lexbor/utils/lexbor/html/convert_html5_tests.py +0 -162
  602. data/vendor/lexbor/utils/lexbor/html/data/entities.json +0 -2233
  603. data/vendor/lexbor/utils/lexbor/html/insertion_mode.py +0 -61
  604. data/vendor/lexbor/utils/lexbor/html/reorder_html5_tests_tokenizer_errors.py +0 -137
  605. data/vendor/lexbor/utils/lexbor/html/tmp/insertion_mode.c +0 -53
  606. data/vendor/lexbor/utils/lexbor/html/tmp/insertion_mode.h +0 -18
  607. data/vendor/lexbor/utils/lexbor/html/tmp/tokenizer_res.h +0 -20
  608. data/vendor/lexbor/utils/lexbor/html/tokenizer_entities_bst.py +0 -209
  609. data/vendor/lexbor/utils/lexbor/html/tokenizer_entities_switch.py +0 -162
  610. data/vendor/lexbor/utils/lexbor/html/tokenizer_parse_error.pl +0 -97
  611. data/vendor/lexbor/utils/lexbor/lexbor/LXB.py +0 -498
  612. data/vendor/lexbor/utils/lexbor/lexbor/res.py +0 -130
  613. data/vendor/lexbor/utils/lexbor/tag_ns/data/interfaces.json +0 -98
  614. data/vendor/lexbor/utils/lexbor/tag_ns/data/tags.json +0 -371
  615. data/vendor/lexbor/utils/lexbor/tag_ns/interfaces.py +0 -175
  616. data/vendor/lexbor/utils/lexbor/tag_ns/tags.py +0 -808
  617. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_attribute_steps_res.h +0 -21
  618. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_element_steps_res.h +0 -21
  619. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_interface_res.h +0 -29
  620. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_open_elements_res.h +0 -21
  621. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_tag_res.h +0 -25
  622. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/interface.c +0 -36
  623. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/interface.h +0 -33
  624. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/ns_const.h +0 -26
  625. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/ns_res.h +0 -29
  626. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/open_elements_res.h +0 -21
  627. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/steps_res.h +0 -23
  628. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/tag_const.h +0 -26
  629. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/tag_res.h +0 -26
  630. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/test/ns_res.c +0 -44
  631. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/test/tag_res.c +0 -45
  632. data/vendor/lexbor/utils/lexbor/unicode/build.pl +0 -1323
  633. data/vendor/lexbor/utils/lexbor/unicode/idna_test.pl +0 -398
  634. data/vendor/lexbor/utils/lexbor/unicode/normalization_test.pl +0 -157
  635. data/vendor/lexbor/utils/wasm/gen_constants.py +0 -186
  636. data/vendor/lexbor/wasm/CMakeLists.txt +0 -18
  637. data/vendor/lexbor/wasm/lexbor/engine/CMakeLists.txt +0 -21
  638. data/vendor/lexbor/wasm/lexbor/engine/index.html +0 -406
  639. data/vendor/lexbor/wasm/lexbor/engine/lexbor.c +0 -1340
  640. data/vendor/lexbor/wasm/lexbor/html/CMakeLists.txt +0 -11
  641. data/vendor/lexbor/wasm/lexbor/html/parse.c +0 -58
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d7f8de6bbef8fc2fbb17df06bbc9e0fcea04d04ddab17cfa25cb723b5af4c17
4
- data.tar.gz: e3de39e5c405d058807c2a9d4fd00a9c7a3d7b876037b0e152c4d80d162f59ec
3
+ metadata.gz: b0cf63c9d861e721a52064dccc929db0a8f823d485f69854f07d90b805913db0
4
+ data.tar.gz: 989e0d0b1430b202147cd4f0fec411d0377114f34ae380217b683b6b63d031e6
5
5
  SHA512:
6
- metadata.gz: 309d9b79ac58fdd1bd68ccd1f817efe8ce5de3bc4d8e21555babca1490a088b0694e43f4fd36f524b5591c5a2b9c10ff18e4c8a7457483f091d31b73a4e34e73
7
- data.tar.gz: cf184ca85e1c985b8dbbabdbd3684ee5b0d9cbffb874761189621f08141f3196c352cc1e36882b4860225cbefb471d2fc13ad1886de7a1fabffda3e0602952c6
6
+ metadata.gz: 13598e1f45341c8fed3924da8bbe913cf55ef5c1b9256193db18ae3cf2bb9ae0f4816370d8ca569139b33e7194aced65656c1314989b882ea612a44e3750e84b
7
+ data.tar.gz: 71c9da99e6f26fb8a034efba1d0642b37cdcd3ddf212c6f977ad3c868b104162ca86f0c721606024286a06c858326508e41c8624687c03fa3d7a205461926faf
@@ -18,7 +18,7 @@ on:
18
18
  workflow_dispatch:
19
19
  inputs:
20
20
  publish_to_rubygems:
21
- description: "Push the built gems to RubyGems (requires the RUBYGEMS_API_KEY secret)"
21
+ description: "Push the built gems to RubyGems (via Trusted Publishing / OIDC)"
22
22
  type: boolean
23
23
  default: false
24
24
 
@@ -196,13 +196,25 @@ jobs:
196
196
  $pre --verify-tag || \
197
197
  gh release upload "${GITHUB_REF_NAME}" dist/*.gem --repo "${GITHUB_REPOSITORY}" --clobber
198
198
 
199
- # --- optional: publish to RubyGems (manual, opt-in, never on a tag push) ----
199
+ # --- publish to RubyGems, behind the `rubygems` environment approval gate ---
200
+ # Held until the `rubygems` environment's Required-reviewers rule is approved,
201
+ # so a tag push releases on GitHub immediately but the RubyGems push waits.
202
+ #
203
+ # Auth is RubyGems Trusted Publishing (OIDC): no stored API key. Configure a
204
+ # matching Trusted Publisher on RubyGems.org (owner=takahashim, repo=makiri,
205
+ # workflow=release.yml, Environment=rubygems) so the token is only accepted
206
+ # through this gated environment.
200
207
  publish:
201
208
  name: Publish to RubyGems
202
209
  needs: [source-gem, native-gem]
203
- if: github.event_name == 'workflow_dispatch' && inputs.publish_to_rubygems
210
+ if: >-
211
+ startsWith(github.ref, 'refs/tags/') ||
212
+ (github.event_name == 'workflow_dispatch' && inputs.publish_to_rubygems)
204
213
  runs-on: ubuntu-latest
205
- environment: rubygems # add a protection rule here for a manual approval gate
214
+ environment: rubygems
215
+ permissions:
216
+ contents: read
217
+ id-token: write # OIDC identity token for Trusted Publishing
206
218
  steps:
207
219
  - uses: ruby/setup-ruby@v1
208
220
  with:
@@ -212,11 +224,10 @@ jobs:
212
224
  pattern: gem-*
213
225
  merge-multiple: true
214
226
  path: dist
227
+ - name: Configure RubyGems credentials (OIDC trusted publishing)
228
+ uses: rubygems/configure-rubygems-credentials@762a4b77c3300434bb57c7ce80b20e36231927aa # v2.0.0
215
229
  - name: gem push
216
- env:
217
- GEM_HOST_API_KEY: ${{ secrets.RUBYGEMS_API_KEY }}
218
230
  run: |
219
- test -n "$GEM_HOST_API_KEY" || { echo "RUBYGEMS_API_KEY secret is not set"; exit 1; }
220
231
  for g in dist/*.gem; do
221
232
  echo "Pushing $g"
222
233
  gem push "$g"
data/CHANGELOG.md CHANGED
@@ -7,22 +7,163 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.3.0] - 2026-06-06
11
+
12
+ ### Added
13
+
14
+ * **Native XML 1.0 reader + in-place editor** - `Makiri::XML::Document.parse(source)`
15
+ / `Makiri::XML(source)`. No libxml2: a strict, fail-closed parser builds its own
16
+ node arena (case- and namespace-preserving), queried by the native XPath engine.
17
+ * Strict & secure: fail-closed decode (bad UTF-8 / NUL -> `XML::SyntaxError`),
18
+ duplicate attributes rejected, XML 1.0 only; verified against the W3C XML
19
+ Conformance Test Suite.
20
+ * Encoding autodetected (BOM / `<?xml encoding?>`); a contradicting String
21
+ encoding is a fatal error, not a silent mis-decode.
22
+ * DoS-bounded by a single arena byte ceiling (default 256 MiB; raise per parse
23
+ with `max_bytes:`).
24
+ * `<!DOCTYPE>` recognized but **not processed** (`#internal_subset` ->
25
+ `XML::DocumentType`); zero entity/DTD I/O, so **XXE and billion-laughs are
26
+ structurally impossible**. Kept off the tree, as in libxml2.
27
+ * Read API mirrors Nokogiri: `#xpath` / `#at_xpath` (`{prefix => uri}`),
28
+ name/namespace readers, `#text`, `#[]`, traversal, and namespace introspection
29
+ (`Makiri::XML::Namespace`); `XPathContext` works over XML nodes too.
30
+ * Prolog/epilog comments & PIs kept on the document node; adjacent same-type
31
+ character data coalesced - byte-identical to Nokogiri (property-based diff).
32
+ * `#to_xml` / `#to_s` (`pretty:` / `indent:` / `encoding:`) and `#canonicalize`
33
+ (Inclusive C14N 1.0, byte-identical to libxml2); buffers fail closed.
34
+ * Unsupported surface raises `NotImplementedError`: `#css` / `#at_css` and HTML
35
+ serialization.
36
+ * Tree mutation - fully fail-closed, detach-never-destroy:
37
+ * in-place: `#[]=` / `#delete`, `#content=`, `#name=`, `#remove` / `#unlink`;
38
+ * factories: `Document#create_{element,text_node,comment,cdata,processing_instruction}`
39
+ (+ Nokogiri-style `.new` constructors);
40
+ * insertion: `#add_child` / `<<`, `#before` / `#after`, `#replace` - namespaces
41
+ resolved at the insertion point; a cross-document insert deep-copies;
42
+ * fragments: `XML::DocumentFragment.parse` / `XML::Document#fragment`;
43
+ * from scratch: `XML::Document.new` + `#root=`.
44
+ * `XML::Element#element_children` and `Node#clone_node` for XML nodes (also enabling
45
+ `Node#dup` / `#clone`); a clone keeps name case, namespace and the CDATA type.
46
+ * `Node` includes `Enumerable` over its child nodes (`each` / `map` / `select` / ...).
47
+ * `Node#<=>` + `Comparable` - sort by document position (`nil` across documents or
48
+ for attributes).
49
+ * `NodeSet.new(document_or_node, list = [])` - foreign / cross-representation nodes
50
+ are rejected.
51
+ * `NodeSet#[]` accepts a `Range` or `start, length` (like `Array#[]`).
52
+ * `Node` / `NodeSet` / `Document` `#dup` / `#clone` now return real independent
53
+ copies (`#dup(0)` shallow; `#clone(freeze:)` honoured).
54
+ * A **frozen node is genuinely immutable** - every mutator raises `FrozenError`.
55
+
56
+ ### Changed
57
+
58
+ * CSS queries reuse one shared Lexbor engine (GVL-safe) and `at_css` wraps the match
59
+ directly: `at_css('#id')` ~5x faster than nokolexbor (was ~1.16x slower).
60
+ * HTML serialization pre-reserves its buffer - `to_html` now at parity with nokolexbor.
61
+ * Node-class names are the WHATWG DOM interface names (`CDATASection`, `Attr`,
62
+ `DocumentType`, ...), with the Nokogiri spellings (`CDATA`, `DTD`) kept as aliases;
63
+ added `Node#cdata?`.
64
+ * Text-index range table uses `uint32` bounds (24 -> 16 B/entry; ~27% less retained
65
+ index, byte-identical text).
66
+ * Parsing **honours the input String's encoding** - Shift_JIS / EUC-JP / ... are now
67
+ transcoded to UTF-8 instead of mangled.
68
+ * Parsing skips its UTF-8 validation scan when the String's coderange already proves
69
+ it valid.
70
+ * Faster HTML parse/serialize: `memchr` line table + validate-only UTF-8 scan (~7%),
71
+ and a single-copy serializer buffer (~1.2-1.3x).
72
+
73
+ ### Fixed
74
+
75
+ * **Hardened the HTML/XML representation boundary.** HTML (Lexbor) and XML (arena)
76
+ nodes are now distinct TypedData types, so the wrong representation raises
77
+ `TypeError` instead of corrupting memory:
78
+ * `Node#==` / `XPathContext#node=` with an XML `Document` no longer aborts the
79
+ process;
80
+ * `NodeSet#|` / `+` / `&` / `-` across different documents raise `Makiri::Error`
81
+ (was a silent mis-wrap);
82
+ * HTML-only APIs (`import_node`, `add_child` / `before` / `after` / `replace`,
83
+ `fragment(context:)`) reject an XML node argument (was a segfault).
84
+ * The bundle exported the entire vendored Lexbor symbol table (~1700 `lxb_*`); now
85
+ only `Init_makiri` is exported, so loading alongside another Lexbor gem (e.g.
86
+ nokolexbor) no longer segfaults. (Precompiled gems: rebuild required.)
87
+
88
+ ## [0.2.0] - 2026-06-04
89
+
90
+ ### Added
91
+
92
+ * `Element#tag_name` (DOM `tagName`) - the qualified name uppercased for an
93
+ HTML element in an HTML document (`"DIV"`), keeping the original case for
94
+ SVG/MathML; `nil` for non-elements. Complements `#name`, which stays the
95
+ lowercase qualified name.
96
+ * `ProcessingInstruction#target` (DOM `target`) - a PI's target name; `nil` for
97
+ other node kinds. Its data is read via `#content`/`#text`.
98
+ * `Document#create_processing_instruction(target, data)` (DOM
99
+ `createProcessingInstruction`) and `Document#create_document_fragment` (DOM
100
+ `createDocumentFragment`, an empty fragment to build up programmatically -
101
+ unlike `#fragment` / `DocumentFragment.parse`, which parse HTML). Both produce
102
+ a detached node owned by the document; PI creation fails closed when the data
103
+ contains the `?>` terminator (matching the DOM constraint). (DOM
104
+ `createCDATASection` is intentionally not provided: per WHATWG DOM it throws on
105
+ an HTML document, which is the only kind Makiri produces.)
106
+ * `Node#{namespace_uri, prefix, local_name}` - the WHATWG DOM per-node
107
+ namespace accessors on `Element` and `Attribute` (`nil` on other node kinds).
108
+ `namespace_uri` resolves an element's namespace from its node (so an HTML
109
+ element is the XHTML namespace `http://www.w3.org/1999/xhtml`, not `nil` - the
110
+ DOM-faithful value browsers and `namespace-uri()` return; SVG/MathML get their
111
+ own URI), and agrees byte-for-byte with the `namespace-uri()` XPath function.
112
+ For attributes it is `nil` unless prefixed, where it returns the parser-assigned
113
+ foreign-content namespace (`xlink`/`xml`/`xmlns`). `prefix` is the prefix
114
+ segment of the qualified name (`nil` for the usual unprefixed HTML5 case), and
115
+ `local_name` is the name without that prefix. Previously a node's namespace was
116
+ reachable only through XPath (`namespace-uri()`/`local-name()`).
117
+ * `Node#clone_node(deep = false)` - a copy of the node, owned by the same
118
+ document and detached from any parent (the DOM `cloneNode`, whose `deep`
119
+ defaults to `false` - a missing/`nil`/`false` argument is a shallow clone; a
120
+ truthy one copies the subtree). Built on the same `import_node` +
121
+ `<template>`-content fixup the fragment parser uses, so a deep-cloned
122
+ `<template>` keeps its contents. Fails closed: a failed import raises rather
123
+ than returning a partial node.
124
+ * `Document#import_node(node, deep = false)` - a copy of `node` owned by the
125
+ receiver document (the DOM `importNode`, whose `deep` likewise defaults to
126
+ `false`). Unlike `Node#clone_node`, the copy is owned by the target rather
127
+ than the node's own document, so it is the way to bring a node across
128
+ documents (Makiri never moves a node between arenas); the source is left
129
+ untouched. Same import + `<template>`-content fixup as `clone_node`, and fails
130
+ closed on a failed import.
131
+ * `Node#pointer_id` - the underlying `lxb_dom_node_t` pointer as an Integer,
132
+ matching `Nokogiri::XML::Node#pointer_id`. Shares the value `#hash`/`#eql?`
133
+ are built on, so it is a stable, Nokogiri-compatible identity key for
134
+ consumers (e.g. wrapper caches) that key nodes by pointer. Stable for a
135
+ node's lifetime; an address may be reused after a node is freed (same caveat
136
+ as Nokogiri).
137
+
138
+ ### Changed
139
+
140
+ * Source gem: drop the Lexbor trees the build never compiles
141
+ (`test`/`utils`/`examples`/`benchmarks`/`wasm`/`packaging`; each is behind an
142
+ `IF(LEXBOR_BUILD_*)` guard and we build with them OFF), roughly halving the
143
+ packaged file count (~1115 → ~566). Precompiled gems are unaffected.
144
+
145
+ ### Internal
146
+
147
+ * XPath: build the per-context compiled-AST cache key with `mkr_strndup`
148
+ (the expression is a `verified_text`, so its length is known) instead of
149
+ `mkr_strdup`, avoiding a `strlen` over already-length-bounded bytes.
150
+
10
151
  ## [0.1.0] - 2026-06-02
11
152
 
12
153
  First public release. An HTML5 parser, a native XPath 1.0 query engine, and CSS
13
- selectors for Ruby built on vendored [Lexbor](https://lexbor.com/) with **no
154
+ selectors for Ruby - built on vendored [Lexbor](https://lexbor.com/) with **no
14
155
  libxml2 / libxslt dependency at any layer**.
15
156
 
16
157
  ### Added
17
158
 
18
159
  **Parsing & DOM**
19
160
 
20
- * `Makiri::HTML` / `Makiri.parse` HTML5 parsing via vendored, unpatched Lexbor,
161
+ * `Makiri::HTML` / `Makiri.parse` - HTML5 parsing via vendored, unpatched Lexbor,
21
162
  with browser-compatible UTF-8 decoding (invalid bytes → U+FFFD; parsing never
22
163
  fails on bad bytes). Read-only navigation and attribute/text readers across
23
164
  `Document`, `Element`, `Attribute`, `Text`, `CData`, `Comment`,
24
165
  `ProcessingInstruction`, `DocumentType`, and `DocumentFragment`.
25
- * `Node#line` 1-based source line of an element, reconstructed from the
166
+ * `Node#line` - 1-based source line of an element, reconstructed from the
26
167
  tokenizer without patching Lexbor (nil when the location is unknown).
27
168
  * `Element#attribute_nodes` and `Attribute#{name,value,parent,element}`, backed
28
169
  by a lazily-built attribute→owner index in the Lexbor compat layer.
@@ -75,7 +216,7 @@ libxml2 / libxslt dependency at any layer**.
75
216
  * UTF-8 text-input contract: HTML and fragment parsing are lenient (invalid
76
217
  bytes → U+FFFD, never reject), while strings passed to the XPath / CSS /
77
218
  DOM-mutation APIs must be valid UTF-8 with no NUL byte, otherwise they raise
78
- `Makiri::Error` never silently truncated, repaired, or reinterpreted.
219
+ `Makiri::Error` - never silently truncated, repaired, or reinterpreted.
79
220
  * Thread-safe by construction: parsing releases the GVL (concurrent parse scales
80
221
  ~2× on 8 cores), while XPath evaluation holds the GVL so sharing a document or
81
222
  context across threads cannot corrupt memory. Fail-closed string caps and
@@ -98,5 +239,7 @@ libxml2 / libxslt dependency at any layer**.
98
239
  domxpath, CSS differential vs `Nokogiri::HTML5`). GitHub Actions CI across
99
240
  Ruby 3.2–4.0 × Ubuntu/macOS plus a sanitizer job.
100
241
 
101
- [Unreleased]: https://github.com/takahashim/makiri/compare/v0.1.0...HEAD
242
+ [Unreleased]: https://github.com/takahashim/makiri/compare/v0.3.0...HEAD
243
+ [0.3.0]: https://github.com/takahashim/makiri/compare/v0.2.0...v0.3.0
244
+ [0.2.0]: https://github.com/takahashim/makiri/compare/v0.1.0...v0.2.0
102
245
  [0.1.0]: https://github.com/takahashim/makiri/releases/tag/v0.1.0
data/README.md CHANGED
@@ -1,7 +1,8 @@
1
1
  # Makiri
2
2
 
3
- Standards-oriented HTML5 parsing, CSS selector querying, and XPath 1.0
4
- querying for Ruby, powered by Lexbor and a native XPath engine.
3
+ Standards-oriented HTML5/XML parsing, CSS selector querying, XPath 1.0 querying,
4
+ and a native XML 1.0 reader/editor for Ruby, powered by Lexbor and a native XPath
5
+ engine - with no libxml2 dependency.
5
6
 
6
7
  > [!WARNING]
7
8
  > Status: early release. APIs and behavior may change before v1.0.
@@ -20,6 +21,12 @@ XPath 1.0 evaluation in its own native engine, with no libxml2 dependency.
20
21
  * Native XPath 1.0 engine
21
22
  * XPath is parsed and evaluated by Makiri's own engine, written from scratch.
22
23
  * Makiri does not depend on libxml2 for parsing, DOM representation, or XPath evaluation.
24
+ * Native XML 1.0 reader + in-place editor (`Makiri::XML`)
25
+ * A strict, non-validating, fail-closed parser with its own node arena (not
26
+ Lexbor's HTML DOM), queried through the same native XPath engine, with
27
+ in-place tree edits (attributes, content, rename, remove).
28
+ * Conformance is held by the W3C XML Conformance Test Suite, an XPath
29
+ differential, and property-based testing vs Nokogiri (see below).
23
30
  * Bounded, fail-closed execution
24
31
  * XPath evaluation is bounded by per-evaluation limits on work, memory, and recursion.
25
32
  * Ownership and borrowing are kept explicit across layers, with owned/borrowed
@@ -46,7 +53,7 @@ HTML
46
53
  doc.css("a").map { |a| a["href"] } # => ["/a", "/b"]
47
54
  doc.at_css("p.lead").text # => "Hello"
48
55
 
49
- # XPath 1.0 (native engine no libxml2)
56
+ # XPath 1.0 (native engine - no libxml2)
50
57
  doc.xpath("//a").length # => 2
51
58
  doc.xpath("count(//a)") # => 2.0
52
59
  doc.at_xpath('//*[@id="main"]/p').text # => "Hello"
@@ -72,16 +79,158 @@ ctx.register_variable("cls", "lead")
72
79
  ctx.evaluate('//p[@class=$cls]').first.text # => "Hello"
73
80
  ```
74
81
 
82
+ ### XML (with in-place editing)
83
+
84
+ `Makiri::XML(source)` parses **XML 1.0** with a native, strict,
85
+ well-formedness-checking parser (no libxml2) and queries it through the same
86
+ native XPath 1.0 engine. `source` is a String or any object responding to
87
+ `#read` (an `IO` / `File` / `StringIO`); read a non-UTF-8 file in binary mode
88
+ (`File.binread`) so its encoding is autodetected. Element-name case and namespaces are preserved. It is
89
+ **fail-closed**: malformed input, a duplicate attribute, or a
90
+ non-`1.0` version declaration raises `Makiri::XML::SyntaxError`, and operations
91
+ XML does not support raise `NotImplementedError` rather than returning a wrong
92
+ result. The tree supports in-place edits and building new subtrees (see below).
93
+ A `<!DOCTYPE ...>` is recognized but its **DTD is not processed** (no
94
+ entity/element declarations are loaded, no external subset is fetched) - so a
95
+ DTD-defined entity reference stays an undefined-entity error and **XXE /
96
+ billion-laughs are structurally impossible**. The doctype's name and identifiers
97
+ are still readable:
98
+
99
+ ```ruby
100
+ doc = Makiri::XML(<<~XML)
101
+ <feed xmlns="http://www.w3.org/2005/Atom">
102
+ <entry><title>Hello</title></entry>
103
+ <entry><title>World</title></entry>
104
+ </feed>
105
+ XML
106
+
107
+ # Namespace matching is strict, so a default namespace needs a registered prefix.
108
+ ns = { "a" => "http://www.w3.org/2005/Atom" }
109
+ doc.xpath("//entry").length # => 0 (default namespace)
110
+ doc.xpath("//a:entry", ns).length # => 2
111
+ doc.at_xpath("//a:entry/a:title", ns).text # => "Hello"
112
+
113
+ # Or reuse a context (caches registrations + compiled expressions):
114
+ ctx = Makiri::XPathContext.new(doc.root)
115
+ ctx.register_namespace("a", "http://www.w3.org/2005/Atom")
116
+ ctx.evaluate("//a:entry").length # => 2
117
+
118
+ el = doc.at_xpath("//a:entry", ns)
119
+ el.local_name # => "entry"
120
+ el.namespace_uri # => "http://www.w3.org/2005/Atom"
121
+
122
+ doc.css("entry") # raises NotImplementedError (use #xpath)
123
+
124
+ # Serialize back to XML
125
+ doc.to_xml # => "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<feed ...>...</feed>\n"
126
+ doc.at_xpath("//a:entry", ns).to_xml # => "<entry><title>Hello</title></entry>" (no declaration)
127
+ doc.to_xml(pretty: true) # indented, element-only content
128
+
129
+ # DOCTYPE is recognized but the DTD is not processed (no entities, no I/O):
130
+ dtd = Makiri::XML(%(<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0//EN" "x.dtd"><html/>))
131
+ .internal_subset
132
+ dtd.name # => "html"
133
+ dtd.external_id # => "-//W3C//DTD XHTML 1.0//EN" (alias: #public_id)
134
+ dtd.system_id # => "x.dtd"
135
+ ```
136
+
137
+ Comments and processing instructions in the prolog/epilog are document-node
138
+ children (reachable via `//comment()` / `//processing-instruction()` and
139
+ `#children`), and adjacent CDATA is coalesced - matching libxml2 and the XPath
140
+ data model. `#to_xml` / `#to_s` serialize the tree back to XML (`pretty: true`,
141
+ or `indent: n`, for indented element-only content; `encoding: "Shift_JIS"` to
142
+ transcode, with a hex character reference for anything the encoding can't hold);
143
+ a `Document#to_xml` adds the declaration and the DOCTYPE. `#canonicalize` emits
144
+ Inclusive Canonical XML 1.0 (for XML signatures; `comments: true` to keep
145
+ comments), byte-identical to libxml2. CSS is intentionally unavailable for XML
146
+ (Lexbor's selector engine lower-cases names, which breaks XML case/namespace
147
+ matching) - use XPath.
148
+
149
+ The tree supports in-place mutation - every edit validates its input (names as
150
+ XML 1.0 QNames, values as XML Char) so the tree stays serializable to
151
+ well-formed XML, and a removed node is detached, never freed, so a live wrapper
152
+ that aliases it stays usable:
153
+
154
+ ```ruby
155
+ doc = Makiri::XML(%(<feed xmlns:dc="urn:dc"><entry id="1">Hi</entry><draft/></feed>))
156
+ e = doc.at_xpath("//entry")
157
+
158
+ e["id"] = "9" # add or replace an attribute (value escaped on output)
159
+ e["dc:k"] = "v" # a prefixed name resolves against the in-scope xmlns
160
+ e.content = "Bye" # replace an element's children with text
161
+ e.name = "post" # rename in place (identity + namespace re-resolved)
162
+ e.delete("id") # remove an attribute
163
+ doc.at_xpath("//draft").remove
164
+
165
+ doc.root.to_xml # => "<feed xmlns:dc=\"urn:dc\"><post dc:k=\"v\">Bye</post></feed>"
166
+ ```
167
+
168
+ New subtrees can be built too - `Document#create_element` (and
169
+ `#create_text_node` / `#create_comment` / `#create_cdata` /
170
+ `#create_processing_instruction`) make detached nodes, and `#add_child` / `<<`,
171
+ `#add_previous_sibling` / `#before`, `#add_next_sibling` / `#after`, `#replace`
172
+ link them. A node's namespace is resolved against its position **at insertion**
173
+ (a prefixed name binds to the in-scope `xmlns`, an unprefixed element to the
174
+ default namespace), so the same tree results whether you set names before or
175
+ after attaching; an unbound prefix in the live tree fails closed. A node from
176
+ another document is **deep-copied** into the target (the source is untouched):
177
+
178
+ ```ruby
179
+ doc = Makiri::XML(%(<feed xmlns="urn:a" xmlns:dc="urn:dc"/>))
180
+ entry = doc.create_element("entry")
181
+ entry["dc:id"] = "42" # prefixed attr resolves on insertion
182
+ entry.add_child(doc.create_element("title", "Hello"))
183
+ doc.root.add_child(entry)
184
+
185
+ doc.to_xml # => "...<entry dc:id=\"42\"><title>Hello</title></entry>..."
186
+ ```
187
+
188
+ Supported edits: `#[]=`, `#delete` / `#remove_attribute`, `#content=`, `#name=`,
189
+ `#remove` / `#unlink`, the factories above, and `#add_child` / `<<` /
190
+ `#before` / `#after` / `#replace`. Insertion takes a `Makiri::XML` node or a
191
+ `DocumentFragment` (its children are spliced in); a fragment is parsed by
192
+ `Document#fragment(str)` (bound to the document) or `DocumentFragment.parse(str)`
193
+ (standalone). A raw string handed straight to `#add_child` is **not** accepted -
194
+ parse it into a fragment first. A whole document can also be built from scratch
195
+ with `XML::Document.new` + `#root=` and the factories.
196
+
197
+ The character encoding is autodetected (XML 1.0 Appendix F): a byte-order mark or
198
+ the `<?xml encoding="..."?>` declaration selects it, so raw bytes (`File.binread`)
199
+ in UTF-16, Shift_JIS, etc. parse correctly and a leading BOM is stripped. A
200
+ concrete String encoding stays authoritative - a BOM or declaration that
201
+ contradicts it is a fatal error, not a silent mis-decode.
202
+
203
+ Parsing is DoS-bounded by a single arena memory ceiling (default 256 MiB,
204
+ counting node structs and text), which fits every standard document. Raise it
205
+ per parse for an unusually large one:
206
+
207
+ ```ruby
208
+ Makiri::XML(huge_xml, max_bytes: 512 * 1024 * 1024) # also Makiri::XML::Document.parse(..., max_bytes:)
209
+ ```
210
+
211
+ Conformance is held by a regression net: the **W3C XML Conformance Test Suite**
212
+ (`rake conformance:xmlconf`, 100% of the in-scope non-validating XML-1.0 tests),
213
+ an XPath 1.0 differential vs Nokogiri/libxml2 (`rake conformance:xpath_xml`), and
214
+ property-based testing that requires Makiri's tree to be byte-identical to
215
+ Nokogiri's over generated documents (`rake conformance:xml_pbt`).
216
+
75
217
  ## Non-goals (v1.0)
76
218
 
77
- * XML parsing (HTML only).
219
+ * Passing a raw markup string straight to an insertion method
220
+ (`node.add_child("<x/>")`); parse it into a fragment first
221
+ (`Document#fragment` / `DocumentFragment.parse`). (Building XML from scratch
222
+ (`XML::Document.new` + `#root=`), the node factories - `Document#create_element`
223
+ etc. - fragments, node insertion (`#add_child` / `#before` / `#after` /
224
+ `#replace`), and `#to_xml` serialization ARE supported.)
78
225
  * XSLT, DTD / Schema / RelaxNG validation, XPointer, XInclude.
79
226
  * Streaming / SAX parsing.
80
227
  * Drop-in replacement for every Nokogiri method. Makiri covers the common
81
228
  HTML-scraping and manipulation surface. Deliberately not provided:
82
- - XML/XHTML serialization variants (`to_xml`, `to_xhtml`, `write_xml_to`)
229
+ - XHTML serialization variants (`to_xhtml`, `write_xml_to`); `#to_xml` is supported
83
230
  - XML/DTD construction (`create_internal_subset`, `external_subset`)
84
- - namespace introspection beyond `namespace-uri()` (`namespace_definitions`, `add_namespace`, `collect_namespaces`)
231
+ - namespace *mutation* (`add_namespace_definition`); read introspection
232
+ (`#namespace`, `#namespace_definitions`, `#namespaces`, `#collect_namespaces`)
233
+ is supported on `Makiri::XML` nodes
85
234
  - Nokogiri internals (`decorate`, `slop!`, `validate`).
86
235
 
87
236
  ## Differences from Nokogiri
@@ -103,9 +252,26 @@ Detailed, test-backed notes live in `spec/conformance/README.md`.
103
252
  * `namespace-uri()` of an HTML element returns the XHTML URI (DOM-correct, as browsers report)
104
253
  * `Nokogiri::HTML5` returns `""`.
105
254
 
255
+ ### XML
256
+
257
+ * `Makiri::XML` is **XML 1.0 only and non-validating**.
258
+ * A `version="1.1"` declaration is rejected; Nokogiri parses XML 1.1.
259
+ * The DTD is recognized but not processed: DTD-defined entities are not
260
+ expanded and DTD default attributes are not applied (Nokogiri/libxml2 can do
261
+ both). External entities/subsets are never fetched (no I/O).
262
+ * Mutation supports in-place edits, the node factories, fragments
263
+ (`Document#fragment` / `DocumentFragment.parse`), node insertion, and building
264
+ a document from scratch (`XML::Document.new` + `#root=`); only handing a raw
265
+ markup string straight to `#add_child` is unsupported (parse it into a fragment
266
+ first). (`#to_xml` serialization is supported; HTML serialization - `to_html`
267
+ / `inner_html` / `outer_html` - is not.)
268
+ * Otherwise the parsed tree is byte-identical to `Nokogiri::XML`'s (verified by
269
+ the property-based differential), including namespaces, prolog/epilog comments
270
+ and PIs, and adjacent-CDATA coalescing.
271
+
106
272
  ### CSS
107
273
 
108
- * jQuery/Nokogiri CSS extensions are not supported (`:contains`, `:gt`, `:lt`, `:eq`, `:first`, )
274
+ * jQuery/Nokogiri CSS extensions are not supported (`:contains`, `:gt`, `:lt`, `:eq`, `:first`, ...)
109
275
  * Makiri uses Lexbor's standards-only selector engine.
110
276
  Use XPath (`xpath("//p[contains(., 'x')]")`) or Enumerable (`css('li')[1]`).
111
277
  Standard Level-4 selectors (`:is` / `:where` / `:has`) are supported; some of which Nokogiri rejects.
data/Rakefile CHANGED
@@ -7,6 +7,24 @@ require "shellwords"
7
7
 
8
8
  GEMSPEC = Gem::Specification.load("makiri.gemspec")
9
9
 
10
+ # Replace bundler/gem_tasks' `release` (which builds a source-only gem and
11
+ # `gem push`es it from the dev machine) with a tag push: it hands the build,
12
+ # GitHub Release, and the approval-gated RubyGems publish off to CI
13
+ # (.github/workflows/release.yml). Nothing is pushed to RubyGems locally.
14
+ Rake::Task["release"].clear
15
+ desc "Tag v#{GEMSPEC.version} and push it; CI builds, releases, and publishes"
16
+ task release: %w[release:guard_clean release:source_control_push] do
17
+ puts <<~MSG
18
+
19
+ Pushed tag v#{GEMSPEC.version}. GitHub Actions (release.yml) will now:
20
+ 1. build the source gem + precompiled native gems,
21
+ 2. create the GitHub Release and attach them, then
22
+ 3. publish to RubyGems via OIDC - after the `rubygems` environment approval.
23
+ Approve the pending deployment in the Actions run to publish; nothing is
24
+ pushed to RubyGems from this machine.
25
+ MSG
26
+ end
27
+
10
28
  Rake::ExtensionTask.new("makiri", GEMSPEC) do |ext|
11
29
  ext.lib_dir = "lib/makiri"
12
30
  ext.ext_dir = "ext/makiri"
@@ -26,7 +44,7 @@ end
26
44
 
27
45
  # `rake clean` (from rake-compiler) removes the ext build dir under tmp/,
28
46
  # including the generated Makefile. The next `rake compile` re-runs extconf,
29
- # so newly-added .c files are picked up without this, a stale Makefile omits
47
+ # so newly-added .c files are picked up - without this, a stale Makefile omits
30
48
  # new sources and macOS's -undefined dynamic_lookup turns the missing symbols
31
49
  # into runtime NULL calls. The vendored Lexbor build is deliberately NOT wiped
32
50
  # here (it is slow to rebuild and rarely changes); use `rake clean:lexbor` for
@@ -63,6 +81,17 @@ def asan_runtime_path
63
81
  nil
64
82
  end
65
83
 
84
+ # The compiled extension, and whether it carries sanitizer instrumentation, so
85
+ # `fuzz:sanitize SKIP_BUILD=1` can refuse to run a plain (non-ASan) build.
86
+ def ext_bundle_path
87
+ Dir["lib/makiri/makiri.{bundle,so}"].first
88
+ end
89
+
90
+ def ext_sanitized?
91
+ bundle = ext_bundle_path or return false
92
+ !(`nm "#{bundle}" 2>/dev/null` =~ /asan|ubsan/i).nil?
93
+ end
94
+
66
95
  desc "Build the extension with sanitizers (MAKIRI_SANITIZE, default " \
67
96
  "address,undefined) and run the spec suite under them"
68
97
  task :sanitize do
@@ -92,6 +121,16 @@ task fuzz: :compile do
92
121
  sh "#{FileUtils::RUBY} -Ilib spec/fuzz/run.rb #{ENV['FUZZ_ARGS']}"
93
122
  end
94
123
 
124
+ desc "Fuzz the XML parser (hostile/mutated documents; override via FUZZ_ARGS)"
125
+ task "fuzz:xml": :compile do
126
+ sh "#{FileUtils::RUBY} -Ilib spec/fuzz/run.rb --target xml #{ENV['FUZZ_ARGS']}"
127
+ end
128
+
129
+ desc "Fuzz the XML mutation surface (random edit sequences + invariants; override via FUZZ_ARGS)"
130
+ task "fuzz:mutate": :compile do
131
+ sh "#{FileUtils::RUBY} -Ilib spec/fuzz/run.rb --target mutate #{ENV['FUZZ_ARGS']}"
132
+ end
133
+
95
134
  desc "Run the performance benchmark (Makiri vs Nokogiri reference)"
96
135
  task bench: :compile do
97
136
  # Run outside the bundle so the bench-only gems (nokogiri, benchmark-ips)
@@ -101,6 +140,13 @@ task bench: :compile do
101
140
  end
102
141
  end
103
142
 
143
+ desc "Run the XML reader benchmark (Makiri::XML vs Nokogiri::XML reference)"
144
+ task "bench:xml" => :compile do
145
+ Bundler.with_unbundled_env do
146
+ sh "#{FileUtils::RUBY} -Ilib bench/bench_xml.rb"
147
+ end
148
+ end
149
+
104
150
  namespace :conformance do
105
151
  desc "WHATWG HTML5 parsing conformance: run html5lib-tests through Makiri"
106
152
  task html5: :compile do
@@ -116,6 +162,28 @@ namespace :conformance do
116
162
  end
117
163
  end
118
164
 
165
+ desc "XML XPath 1.0 differential conformance: Makiri::XML vs Nokogiri::XML"
166
+ task xpath_xml: :compile do
167
+ Bundler.with_unbundled_env do
168
+ sh "#{FileUtils::RUBY} -Ilib spec/conformance/xml_xpath_diff.rb #{ENV['XPATH_ARGS']}"
169
+ end
170
+ end
171
+
172
+ desc "W3C XML Conformance Test Suite: well-formedness through Makiri::XML"
173
+ task xmlconf: :compile do
174
+ # Nokogiri (bench-only) parses the manifests, so run outside the bundle.
175
+ Bundler.with_unbundled_env do
176
+ sh "#{FileUtils::RUBY} -Ilib spec/conformance/xmlconf_runner.rb #{ENV['XMLCONF_ARGS']}"
177
+ end
178
+ end
179
+
180
+ desc "Property-based XML differential: generated documents, Makiri vs Nokogiri tree"
181
+ task xml_pbt: :compile do
182
+ Bundler.with_unbundled_env do
183
+ sh "#{FileUtils::RUBY} -Ilib spec/conformance/xml_pbt_diff.rb #{ENV['PBT_ARGS']}"
184
+ end
185
+ end
186
+
119
187
  desc "CSS Selectors differential conformance vs Nokogiri::HTML5"
120
188
  task css: :compile do
121
189
  Bundler.with_unbundled_env do
@@ -124,14 +192,31 @@ namespace :conformance do
124
192
  end
125
193
  end
126
194
 
127
- desc "Run all conformance suites (html5lib-tests + XPath & CSS differentials)"
128
- task conformance: %w[conformance:html5 conformance:xpath conformance:css]
195
+ desc "Run all conformance suites"
196
+ task conformance: %w[conformance:html5 conformance:xpath conformance:css conformance:xmlconf conformance:xpath_xml]
129
197
 
130
198
  namespace :fuzz do
131
- desc "Run the fuzzer under AddressSanitizer (rebuilds the ext; --isolated)"
199
+ # Run the fuzzer under the sanitizer. Toggles (all via env):
200
+ # FAST=1 run the surfaces NON-isolated (one process, no fork-per-query).
201
+ # Far higher throughput; ASan still aborts on a memory error
202
+ # (halt_on_error). The default (isolated) is the complete net:
203
+ # it also survives + attributes a genuine segfault and catches a
204
+ # hang via the per-query timeout, at much lower throughput.
205
+ # SKIP_BUILD=1 reuse the current build instead of rebuilding (refuses to run
206
+ # if it is not a sanitizer build, so you never fuzz a plain ext).
207
+ # FUZZ_TIME=N seconds per surface (default 90).
208
+ # FUZZ_ARGS=... run a single custom invocation instead of the three surfaces.
209
+ desc "Run the fuzzer under AddressSanitizer (FAST=1 non-isolated, SKIP_BUILD=1 reuse build)"
132
210
  task :sanitize do
133
211
  sanitize = ENV["MAKIRI_SANITIZE"] || "address,undefined"
134
- sh({ "MAKIRI_SANITIZE" => sanitize }, "#{FileUtils::RUBY} -S rake clean compile")
212
+ if %w[1 true yes].include?(ENV["SKIP_BUILD"].to_s.downcase)
213
+ ext_sanitized? or
214
+ abort "fuzz:sanitize: SKIP_BUILD set but lib/makiri is not a sanitizer build; " \
215
+ "drop SKIP_BUILD to rebuild with MAKIRI_SANITIZE"
216
+ puts "fuzz:sanitize: reusing the existing sanitizer build (SKIP_BUILD)"
217
+ else
218
+ sh({ "MAKIRI_SANITIZE" => sanitize }, "#{FileUtils::RUBY} -S rake clean compile")
219
+ end
135
220
 
136
221
  env = {
137
222
  "ASAN_OPTIONS" => "detect_leaks=0:detect_container_overflow=0:" \
@@ -144,7 +229,18 @@ namespace :fuzz do
144
229
  preload = RbConfig::CONFIG["target_os"] =~ /darwin/ ? "DYLD_INSERT_LIBRARIES" : "LD_PRELOAD"
145
230
  env[preload] = runtime
146
231
  end
147
- args = ENV["FUZZ_ARGS"] || "--isolated --time 120"
148
- sh(env, "#{FileUtils::RUBY} -Ilib spec/fuzz/run.rb #{args}")
232
+
233
+ if ENV["FUZZ_ARGS"]
234
+ sh(env, "#{FileUtils::RUBY} -Ilib spec/fuzz/run.rb #{ENV['FUZZ_ARGS']}")
235
+ else
236
+ iso = %w[1 true yes].include?(ENV["FAST"].to_s.downcase) ? "" : "--isolated"
237
+ secs = ENV["FUZZ_TIME"] || "90"
238
+ # Cover every surface under the sanitizer: the query engine (XPath/CSS over
239
+ # parsed fixtures), the XML parser (hostile documents), and the XML mutation
240
+ # surface (random edit sequences + invariants).
241
+ ["", "--target xml", "--target mutate"].each do |surface|
242
+ sh(env, "#{FileUtils::RUBY} -Ilib spec/fuzz/run.rb #{surface} #{iso} --time #{secs}".squeeze(" ").strip)
243
+ end
244
+ end
149
245
  end
150
246
  end