makiri 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (641) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +18 -7
  3. data/CHANGELOG.md +148 -5
  4. data/README.md +173 -7
  5. data/Rakefile +103 -7
  6. data/ext/makiri/bridge/bridge.h +28 -0
  7. data/ext/makiri/bridge/ruby_string.c +217 -0
  8. data/ext/makiri/core/mkr_alloc.h +1 -1
  9. data/ext/makiri/core/mkr_buf.c +35 -1
  10. data/ext/makiri/core/mkr_buf.h +37 -3
  11. data/ext/makiri/core/mkr_core.h +1 -1
  12. data/ext/makiri/core/mkr_hash.h +1 -1
  13. data/ext/makiri/core/mkr_text.h +8 -8
  14. data/ext/makiri/extconf.rb +20 -2
  15. data/ext/makiri/glue/glue.h +53 -11
  16. data/ext/makiri/glue/ruby_doc.c +165 -35
  17. data/ext/makiri/glue/ruby_html_css.c +246 -0
  18. data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +271 -43
  19. data/ext/makiri/glue/ruby_html_node.c +888 -0
  20. data/ext/makiri/glue/ruby_html_serialize.c +154 -0
  21. data/ext/makiri/glue/ruby_node.c +54 -555
  22. data/ext/makiri/glue/ruby_node_set.c +167 -32
  23. data/ext/makiri/glue/ruby_xml.c +420 -0
  24. data/ext/makiri/glue/ruby_xml_node.c +1386 -0
  25. data/ext/makiri/glue/ruby_xpath.c +60 -27
  26. data/ext/makiri/glue/ruby_xpath.h +19 -0
  27. data/ext/makiri/lexbor_compat/compat.h +42 -9
  28. data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
  29. data/ext/makiri/lexbor_compat/dom_index.c +2 -2
  30. data/ext/makiri/lexbor_compat/post_parse.c +100 -10
  31. data/ext/makiri/lexbor_compat/source_loc.c +13 -9
  32. data/ext/makiri/lexbor_compat/text_index.c +14 -8
  33. data/ext/makiri/lexbor_compat/utf8_input.c +85 -26
  34. data/ext/makiri/makiri.c +139 -6
  35. data/ext/makiri/makiri.h +43 -2
  36. data/ext/makiri/xml/mkr_xml.h +126 -0
  37. data/ext/makiri/xml/mkr_xml_chars.c +225 -0
  38. data/ext/makiri/xml/mkr_xml_mutate.c +875 -0
  39. data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
  40. data/ext/makiri/xml/mkr_xml_node.c +267 -0
  41. data/ext/makiri/xml/mkr_xml_node.h +119 -0
  42. data/ext/makiri/xml/mkr_xml_tree.c +1479 -0
  43. data/ext/makiri/xpath/mkr_xpath.c +59 -32
  44. data/ext/makiri/xpath/mkr_xpath.h +96 -4
  45. data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
  46. data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
  47. data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +202 -175
  48. data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +110 -86
  49. data/ext/makiri/xpath/mkr_xpath_internal.h +91 -200
  50. data/ext/makiri/xpath/mkr_xpath_lex.c +2 -2
  51. data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
  52. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +142 -0
  53. data/ext/makiri/xpath/mkr_xpath_parse.c +5 -5
  54. data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
  55. data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
  56. data/ext/makiri/xpath/mkr_xpath_shared.c +593 -0
  57. data/ext/makiri/xpath/{mkr_xpath_value.c → mkr_xpath_value_body.h} +145 -656
  58. data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
  59. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  60. data/lib/makiri/cdata_section.rb +21 -0
  61. data/lib/makiri/comment.rb +12 -0
  62. data/lib/makiri/compat_aliases.rb +30 -0
  63. data/lib/makiri/document.rb +4 -76
  64. data/lib/makiri/document_fragment.rb +14 -9
  65. data/lib/makiri/element.rb +5 -3
  66. data/lib/makiri/html/document.rb +106 -0
  67. data/lib/makiri/html/node_methods.rb +19 -0
  68. data/lib/makiri/html.rb +12 -0
  69. data/lib/makiri/node.rb +58 -15
  70. data/lib/makiri/node_set.rb +8 -0
  71. data/lib/makiri/processing_instruction.rb +12 -0
  72. data/lib/makiri/text.rb +2 -0
  73. data/lib/makiri/version.rb +1 -1
  74. data/lib/makiri/xml/document.rb +24 -0
  75. data/lib/makiri/xml/node_methods.rb +37 -0
  76. data/lib/makiri/xml.rb +10 -0
  77. data/lib/makiri/xpath_context.rb +1 -1
  78. data/lib/makiri.rb +23 -5
  79. data/script/build_native_gem.rb +2 -2
  80. data/script/check_c_safety.rb +32 -0
  81. data/script/check_c_safety_allowlist.yml +83 -0
  82. metadata +35 -565
  83. data/ext/makiri/glue/ruby_css.c +0 -185
  84. data/ext/makiri/glue/ruby_serialize.c +0 -92
  85. data/lib/makiri/cdata.rb +0 -6
  86. data/vendor/lexbor/.github/FUNDING.yml +0 -12
  87. data/vendor/lexbor/.github/workflows/cmake.yml +0 -37
  88. data/vendor/lexbor/benchmarks/CMakeLists.txt +0 -22
  89. data/vendor/lexbor/benchmarks/benchmark.h +0 -101
  90. data/vendor/lexbor/benchmarks/lexbor/html/CMakeLists.txt +0 -16
  91. data/vendor/lexbor/benchmarks/lexbor/html/tokenizer/input_validation.c +0 -100
  92. data/vendor/lexbor/benchmarks/lexbor/html/tokenizer/parse.c +0 -95
  93. data/vendor/lexbor/benchmarks/lexbor/selectors/CMakeLists.txt +0 -16
  94. data/vendor/lexbor/benchmarks/lexbor/selectors/files/average.html +0 -41
  95. data/vendor/lexbor/benchmarks/lexbor/selectors/selectors.c +0 -144
  96. data/vendor/lexbor/examples/CMakeLists.txt +0 -17
  97. data/vendor/lexbor/examples/lexbor/css/CMakeLists.txt +0 -25
  98. data/vendor/lexbor/examples/lexbor/css/StyleSheet.c +0 -70
  99. data/vendor/lexbor/examples/lexbor/css/base.h +0 -34
  100. data/vendor/lexbor/examples/lexbor/css/selectors/list_easy_way.c +0 -74
  101. data/vendor/lexbor/examples/lexbor/css/selectors/list_fast_way.c +0 -149
  102. data/vendor/lexbor/examples/lexbor/css/syntax/structure_parse_file.c +0 -467
  103. data/vendor/lexbor/examples/lexbor/css/syntax/tokenizer/from_file.c +0 -87
  104. data/vendor/lexbor/examples/lexbor/css/syntax/tokenizer/print_raw.c +0 -100
  105. data/vendor/lexbor/examples/lexbor/encoding/CMakeLists.txt +0 -11
  106. data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/decode.c +0 -58
  107. data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/decoder.c +0 -140
  108. data/vendor/lexbor/examples/lexbor/encoding/buffer/decode/validate.c +0 -65
  109. data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/encode.c +0 -67
  110. data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/encoder.c +0 -262
  111. data/vendor/lexbor/examples/lexbor/encoding/buffer/encode/validate.c +0 -77
  112. data/vendor/lexbor/examples/lexbor/encoding/buffer/from_to.c +0 -193
  113. data/vendor/lexbor/examples/lexbor/encoding/data_by_name.c +0 -23
  114. data/vendor/lexbor/examples/lexbor/encoding/single/decode/decode.c +0 -55
  115. data/vendor/lexbor/examples/lexbor/encoding/single/decode/decoder.c +0 -115
  116. data/vendor/lexbor/examples/lexbor/encoding/single/decode/validate.c +0 -59
  117. data/vendor/lexbor/examples/lexbor/encoding/single/encode/encode.c +0 -65
  118. data/vendor/lexbor/examples/lexbor/encoding/single/encode/encoder.c +0 -241
  119. data/vendor/lexbor/examples/lexbor/encoding/single/encode/validate.c +0 -85
  120. data/vendor/lexbor/examples/lexbor/encoding/single/from_to.c +0 -156
  121. data/vendor/lexbor/examples/lexbor/html/CMakeLists.txt +0 -21
  122. data/vendor/lexbor/examples/lexbor/html/base.h +0 -98
  123. data/vendor/lexbor/examples/lexbor/html/document_parse.c +0 -43
  124. data/vendor/lexbor/examples/lexbor/html/document_parse_chunk.c +0 -72
  125. data/vendor/lexbor/examples/lexbor/html/document_title.c +0 -84
  126. data/vendor/lexbor/examples/lexbor/html/element_attributes.c +0 -134
  127. data/vendor/lexbor/examples/lexbor/html/element_create.c +0 -84
  128. data/vendor/lexbor/examples/lexbor/html/element_innerHTML.c +0 -52
  129. data/vendor/lexbor/examples/lexbor/html/elements_by_attr.c +0 -106
  130. data/vendor/lexbor/examples/lexbor/html/elements_by_class_name.c +0 -55
  131. data/vendor/lexbor/examples/lexbor/html/elements_by_tag_name.c +0 -51
  132. data/vendor/lexbor/examples/lexbor/html/encoding.c +0 -95
  133. data/vendor/lexbor/examples/lexbor/html/html2sexpr.c +0 -231
  134. data/vendor/lexbor/examples/lexbor/html/parse.c +0 -69
  135. data/vendor/lexbor/examples/lexbor/html/parse_chunk.c +0 -77
  136. data/vendor/lexbor/examples/lexbor/html/tokenizer/callback.c +0 -78
  137. data/vendor/lexbor/examples/lexbor/html/tokenizer/simple.c +0 -118
  138. data/vendor/lexbor/examples/lexbor/html/tokenizer/tag_attributes.c +0 -106
  139. data/vendor/lexbor/examples/lexbor/html/tokenizer/text.c +0 -75
  140. data/vendor/lexbor/examples/lexbor/punycode/CMakeLists.txt +0 -11
  141. data/vendor/lexbor/examples/lexbor/punycode/decode.c +0 -102
  142. data/vendor/lexbor/examples/lexbor/punycode/encode.c +0 -102
  143. data/vendor/lexbor/examples/lexbor/selectors/CMakeLists.txt +0 -15
  144. data/vendor/lexbor/examples/lexbor/selectors/easy_way.c +0 -120
  145. data/vendor/lexbor/examples/lexbor/selectors/normal_way.c +0 -172
  146. data/vendor/lexbor/examples/lexbor/selectors/unique_nodes.c +0 -142
  147. data/vendor/lexbor/examples/lexbor/styles/CMakeLists.txt +0 -15
  148. data/vendor/lexbor/examples/lexbor/styles/attribute_style.c +0 -110
  149. data/vendor/lexbor/examples/lexbor/styles/base.h +0 -34
  150. data/vendor/lexbor/examples/lexbor/styles/events_insert.c +0 -199
  151. data/vendor/lexbor/examples/lexbor/styles/stylesheet.c +0 -141
  152. data/vendor/lexbor/examples/lexbor/styles/walk.c +0 -170
  153. data/vendor/lexbor/examples/lexbor/unicode/CMakeLists.txt +0 -17
  154. data/vendor/lexbor/examples/lexbor/unicode/idna_to_ascii.c +0 -115
  155. data/vendor/lexbor/examples/lexbor/unicode/normalization_form.c +0 -99
  156. data/vendor/lexbor/examples/lexbor/unicode/normalization_form_stdin.c +0 -99
  157. data/vendor/lexbor/examples/lexbor/url/CMakeLists.txt +0 -15
  158. data/vendor/lexbor/examples/lexbor/url/parse.c +0 -101
  159. data/vendor/lexbor/examples/lexbor/url/relative.c +0 -112
  160. data/vendor/lexbor/images/SerpApi-logo.png +0 -0
  161. data/vendor/lexbor/images/neural-logo.png +0 -0
  162. data/vendor/lexbor/packaging/Makefile +0 -26
  163. data/vendor/lexbor/packaging/README.md +0 -17
  164. data/vendor/lexbor/packaging/deb/Makefile.in +0 -40
  165. data/vendor/lexbor/packaging/deb/Makefile.module.in +0 -15
  166. data/vendor/lexbor/packaging/deb/debian_in/changelog +0 -6
  167. data/vendor/lexbor/packaging/deb/debian_in/control +0 -25
  168. data/vendor/lexbor/packaging/deb/debian_in/copyright +0 -29
  169. data/vendor/lexbor/packaging/deb/debian_in/dev.dirs +0 -2
  170. data/vendor/lexbor/packaging/deb/debian_in/dev.install +0 -3
  171. data/vendor/lexbor/packaging/deb/debian_in/dirs +0 -1
  172. data/vendor/lexbor/packaging/deb/debian_in/docs +0 -2
  173. data/vendor/lexbor/packaging/deb/debian_in/install +0 -1
  174. data/vendor/lexbor/packaging/deb/debian_in/not-installed +0 -4
  175. data/vendor/lexbor/packaging/deb/debian_in/rules +0 -15
  176. data/vendor/lexbor/packaging/deb/debian_in/source/format +0 -1
  177. data/vendor/lexbor/packaging/deb/debian_main_in/changelog +0 -6
  178. data/vendor/lexbor/packaging/deb/debian_main_in/control +0 -33
  179. data/vendor/lexbor/packaging/deb/debian_main_in/copyright +0 -29
  180. data/vendor/lexbor/packaging/deb/debian_main_in/dev.dirs +0 -3
  181. data/vendor/lexbor/packaging/deb/debian_main_in/dev.install +0 -5
  182. data/vendor/lexbor/packaging/deb/debian_main_in/dirs +0 -1
  183. data/vendor/lexbor/packaging/deb/debian_main_in/docs +0 -2
  184. data/vendor/lexbor/packaging/deb/debian_main_in/install +0 -1
  185. data/vendor/lexbor/packaging/deb/debian_main_in/rules +0 -15
  186. data/vendor/lexbor/packaging/deb/debian_main_in/source/format +0 -1
  187. data/vendor/lexbor/packaging/rpm/Makefile +0 -14
  188. data/vendor/lexbor/packaging/rpm/build.sh +0 -105
  189. data/vendor/lexbor/packaging/rpm/liblexbor-module.spec.in +0 -31
  190. data/vendor/lexbor/packaging/rpm/liblexbor.spec.in +0 -62
  191. data/vendor/lexbor/test/CMakeLists.txt +0 -44
  192. data/vendor/lexbor/test/amalgamation/code/_base.h +0 -33
  193. data/vendor/lexbor/test/amalgamation/code/html.c +0 -35
  194. data/vendor/lexbor/test/amalgamation/generate_and_compile.sh +0 -130
  195. data/vendor/lexbor/test/external/commoncrawl.py +0 -110
  196. data/vendor/lexbor/test/files/lexbor/css/declarations/display.ton +0 -801
  197. data/vendor/lexbor/test/files/lexbor/css/declarations/height.ton +0 -367
  198. data/vendor/lexbor/test/files/lexbor/css/declarations/syntax.ton +0 -189
  199. data/vendor/lexbor/test/files/lexbor/css/declarations/width.ton +0 -367
  200. data/vendor/lexbor/test/files/lexbor/css/lexbor.css +0 -205
  201. data/vendor/lexbor/test/files/lexbor/css/syntax/parser/at.ton +0 -518
  202. data/vendor/lexbor/test/files/lexbor/css/syntax/parser/other.ton +0 -80
  203. data/vendor/lexbor/test/files/lexbor/css/syntax/parser/qualified.ton +0 -799
  204. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/CDO-CDC.ton +0 -226
  205. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/at.ton +0 -170
  206. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/broken-utf-8.ton +0 -101
  207. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/comment.ton +0 -95
  208. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/hash.ton +0 -181
  209. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/ident.ton +0 -245
  210. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/number.ton +0 -694
  211. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/other.ton +0 -16
  212. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/reverse-solidus.ton +0 -111
  213. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/single-tokens.ton +0 -66
  214. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/string.ton +0 -303
  215. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/unicode_range.ton +0 -139
  216. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/url-function.ton +0 -229
  217. data/vendor/lexbor/test/files/lexbor/css/syntax/tokenizer/whitespace.ton +0 -45
  218. data/vendor/lexbor/test/files/lexbor/encoding/big5_map_decode.txt +0 -14699
  219. data/vendor/lexbor/test/files/lexbor/encoding/euc_jp_map_decode.txt +0 -7737
  220. data/vendor/lexbor/test/files/lexbor/encoding/euc_kr_map_decode.txt +0 -17189
  221. data/vendor/lexbor/test/files/lexbor/encoding/gb18030_map_decode.txt +0 -27672
  222. data/vendor/lexbor/test/files/lexbor/encoding/iso_2022_jp_map_decode.txt +0 -7928
  223. data/vendor/lexbor/test/files/lexbor/encoding/shift_jis_map_decode.txt +0 -5138
  224. data/vendor/lexbor/test/files/lexbor/html/html5_test/README.md +0 -12
  225. data/vendor/lexbor/test/files/lexbor/html/html5_test/adoption01.ton +0 -442
  226. data/vendor/lexbor/test/files/lexbor/html/html5_test/adoption02.ton +0 -53
  227. data/vendor/lexbor/test/files/lexbor/html/html5_test/attributes.ton +0 -29
  228. data/vendor/lexbor/test/files/lexbor/html/html5_test/blocks.ton +0 -891
  229. data/vendor/lexbor/test/files/lexbor/html/html5_test/char_ref.ton +0 -51
  230. data/vendor/lexbor/test/files/lexbor/html/html5_test/comments01.ton +0 -290
  231. data/vendor/lexbor/test/files/lexbor/html/html5_test/doctype01.ton +0 -637
  232. data/vendor/lexbor/test/files/lexbor/html/html5_test/domjs-unsafe.ton +0 -822
  233. data/vendor/lexbor/test/files/lexbor/html/html5_test/entities01.ton +0 -1262
  234. data/vendor/lexbor/test/files/lexbor/html/html5_test/entities02.ton +0 -416
  235. data/vendor/lexbor/test/files/lexbor/html/html5_test/foreign-fragment.ton +0 -859
  236. data/vendor/lexbor/test/files/lexbor/html/html5_test/html5test-com.ton +0 -414
  237. data/vendor/lexbor/test/files/lexbor/html/html5_test/inbody01.ton +0 -78
  238. data/vendor/lexbor/test/files/lexbor/html/html5_test/isindex.ton +0 -67
  239. data/vendor/lexbor/test/files/lexbor/html/html5_test/main-element.ton +0 -63
  240. data/vendor/lexbor/test/files/lexbor/html/html5_test/math.ton +0 -140
  241. data/vendor/lexbor/test/files/lexbor/html/html5_test/menuitem-element.ton +0 -345
  242. data/vendor/lexbor/test/files/lexbor/html/html5_test/namespace-sensitivity.ton +0 -31
  243. data/vendor/lexbor/test/files/lexbor/html/html5_test/noscript01.ton +0 -344
  244. data/vendor/lexbor/test/files/lexbor/html/html5_test/pending-spec-changes-plain-text-unsafe.ton +0 -39
  245. data/vendor/lexbor/test/files/lexbor/html/html5_test/pending-spec-changes.ton +0 -65
  246. data/vendor/lexbor/test/files/lexbor/html/html5_test/plain-text-unsafe.ton +0 -657
  247. data/vendor/lexbor/test/files/lexbor/html/html5_test/quirks01.ton +0 -77
  248. data/vendor/lexbor/test/files/lexbor/html/html5_test/ruby.ton +0 -411
  249. data/vendor/lexbor/test/files/lexbor/html/html5_test/scriptdata01.ton +0 -499
  250. data/vendor/lexbor/test/files/lexbor/html/html5_test/search-element.ton +0 -63
  251. data/vendor/lexbor/test/files/lexbor/html/html5_test/svg.ton +0 -140
  252. data/vendor/lexbor/test/files/lexbor/html/html5_test/tables01.ton +0 -421
  253. data/vendor/lexbor/test/files/lexbor/html/html5_test/template.ton +0 -2199
  254. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests1.ton +0 -2486
  255. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests10.ton +0 -1090
  256. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests11.ton +0 -317
  257. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests12.ton +0 -72
  258. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests14.ton +0 -100
  259. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests15.ton +0 -290
  260. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests16.ton +0 -3471
  261. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests17.ton +0 -244
  262. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests18.ton +0 -752
  263. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests19.ton +0 -1889
  264. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests2.ton +0 -1093
  265. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests20.ton +0 -1158
  266. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests21.ton +0 -416
  267. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests22.ton +0 -192
  268. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests23.ton +0 -148
  269. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests24.ton +0 -107
  270. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests25.ton +0 -390
  271. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests26.ton +0 -546
  272. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests3.ton +0 -407
  273. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests4.ton +0 -96
  274. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests5.ton +0 -299
  275. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests6.ton +0 -908
  276. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests7.ton +0 -597
  277. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests8.ton +0 -219
  278. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests9.ton +0 -585
  279. data/vendor/lexbor/test/files/lexbor/html/html5_test/tests_innerHTML_1.ton +0 -1164
  280. data/vendor/lexbor/test/files/lexbor/html/html5_test/tricky01.ton +0 -378
  281. data/vendor/lexbor/test/files/lexbor/html/html5_test/webkit01.ton +0 -1022
  282. data/vendor/lexbor/test/files/lexbor/html/html5_test/webkit02.ton +0 -996
  283. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/README.md +0 -12
  284. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/test-yahoo-jp.dat +0 -10
  285. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/tests1.dat +0 -388
  286. data/vendor/lexbor/test/files/lexbor/html/html5lib_encoding/tests2.dat +0 -115
  287. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/README.md +0 -12
  288. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/contentModelFlags.test +0 -93
  289. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/domjs.test +0 -335
  290. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/entities.test +0 -542
  291. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/escapeFlag.test +0 -36
  292. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/namedEntities.test +0 -42422
  293. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/numericEntities.test +0 -1677
  294. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/pendingSpecChanges.test +0 -9
  295. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test1.test +0 -353
  296. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test2.test +0 -275
  297. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test3.test +0 -11233
  298. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/test4.test +0 -532
  299. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/unicodeChars.test +0 -1577
  300. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/unicodeCharsProblematic.test +0 -41
  301. data/vendor/lexbor/test/files/lexbor/html/html5lib_tokenizer/xmlViolation.test +0 -20
  302. data/vendor/lexbor/test/files/lexbor/html/lexbor.html +0 -150
  303. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/attributes.ton +0 -167
  304. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/comment.ton +0 -218
  305. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/document_type.ton +0 -180
  306. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/element.ton +0 -392
  307. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/processing_instruction.ton +0 -45
  308. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/serialize_ext.ton +0 -277
  309. data/vendor/lexbor/test/files/lexbor/html/serialize_ext/text.ton +0 -308
  310. data/vendor/lexbor/test/files/lexbor/html/tokenizer/char_ref.ton +0 -563
  311. data/vendor/lexbor/test/files/lexbor/html/tokenizer/comment.ton +0 -28
  312. data/vendor/lexbor/test/files/lexbor/html/tokenizer/doctype.ton +0 -257
  313. data/vendor/lexbor/test/files/lexbor/html/tokenizer/tag_attr.ton +0 -107
  314. data/vendor/lexbor/test/files/lexbor/html/tokenizer/tag_name.ton +0 -51
  315. data/vendor/lexbor/test/files/lexbor/url/changes.ton +0 -1005
  316. data/vendor/lexbor/test/files/lexbor/url/domain.ton +0 -93
  317. data/vendor/lexbor/test/files/lexbor/url/file.ton +0 -29
  318. data/vendor/lexbor/test/files/lexbor/url/fragment.ton +0 -47
  319. data/vendor/lexbor/test/files/lexbor/url/ipv4.ton +0 -221
  320. data/vendor/lexbor/test/files/lexbor/url/ipv6.ton +0 -197
  321. data/vendor/lexbor/test/files/lexbor/url/path.ton +0 -510
  322. data/vendor/lexbor/test/files/lexbor/url/query.ton +0 -135
  323. data/vendor/lexbor/test/files/lexbor/url/scheme.ton +0 -139
  324. data/vendor/lexbor/test/files/lexbor/url/slow_path.ton +0 -460
  325. data/vendor/lexbor/test/files/lexbor/url/url.ton +0 -78
  326. data/vendor/lexbor/test/files/lexbor/url/username_password.ton +0 -127
  327. data/vendor/lexbor/test/fuzzers/lexbor/css/CMakeLists.txt +0 -16
  328. data/vendor/lexbor/test/fuzzers/lexbor/css/css.dict +0 -307
  329. data/vendor/lexbor/test/fuzzers/lexbor/css/stylesheet.c +0 -55
  330. data/vendor/lexbor/test/fuzzers/lexbor/css/syntax/syntax.dict +0 -41
  331. data/vendor/lexbor/test/fuzzers/lexbor/css/syntax/tokenizer.c +0 -99
  332. data/vendor/lexbor/test/fuzzers/lexbor/encoding/CMakeLists.txt +0 -16
  333. data/vendor/lexbor/test/fuzzers/lexbor/encoding/decode.c +0 -29
  334. data/vendor/lexbor/test/fuzzers/lexbor/html/CMakeLists.txt +0 -16
  335. data/vendor/lexbor/test/fuzzers/lexbor/html/document_parse.c +0 -23
  336. data/vendor/lexbor/test/fuzzers/lexbor/punycode/CMakeLists.txt +0 -16
  337. data/vendor/lexbor/test/fuzzers/lexbor/punycode/base.c +0 -89
  338. data/vendor/lexbor/test/fuzzers/lexbor/selectors/CMakeLists.txt +0 -16
  339. data/vendor/lexbor/test/fuzzers/lexbor/selectors/find.c +0 -146
  340. data/vendor/lexbor/test/fuzzers/lexbor/selectors/selectors.dict +0 -71
  341. data/vendor/lexbor/test/fuzzers/lexbor/unicode/CMakeLists.txt +0 -16
  342. data/vendor/lexbor/test/fuzzers/lexbor/unicode/idna_to_ascii.c +0 -40
  343. data/vendor/lexbor/test/fuzzers/lexbor/unicode/normalization_forms.c +0 -41
  344. data/vendor/lexbor/test/fuzzers/lexbor/url/CMakeLists.txt +0 -16
  345. data/vendor/lexbor/test/fuzzers/lexbor/url/modify.c +0 -117
  346. data/vendor/lexbor/test/fuzzers/lexbor/url/parser.c +0 -132
  347. data/vendor/lexbor/test/fuzzers/lexbor/url/url.dict +0 -13
  348. data/vendor/lexbor/test/lexbor/core/CMakeLists.txt +0 -17
  349. data/vendor/lexbor/test/lexbor/core/array.c +0 -443
  350. data/vendor/lexbor/test/lexbor/core/array_obj.c +0 -306
  351. data/vendor/lexbor/test/lexbor/core/avl.c +0 -1588
  352. data/vendor/lexbor/test/lexbor/core/bst.c +0 -388
  353. data/vendor/lexbor/test/lexbor/core/bst_map.c +0 -209
  354. data/vendor/lexbor/test/lexbor/core/dobject.c +0 -322
  355. data/vendor/lexbor/test/lexbor/core/hash.c +0 -171
  356. data/vendor/lexbor/test/lexbor/core/in.c +0 -356
  357. data/vendor/lexbor/test/lexbor/core/mem.c +0 -332
  358. data/vendor/lexbor/test/lexbor/core/mraw.c +0 -612
  359. data/vendor/lexbor/test/lexbor/core/str.c +0 -433
  360. data/vendor/lexbor/test/lexbor/css/CMakeLists.txt +0 -25
  361. data/vendor/lexbor/test/lexbor/css/declarations.c +0 -571
  362. data/vendor/lexbor/test/lexbor/css/selectors/selectors.c +0 -894
  363. data/vendor/lexbor/test/lexbor/css/selectors/specificity.c +0 -177
  364. data/vendor/lexbor/test/lexbor/css/stylesheet.c +0 -196
  365. data/vendor/lexbor/test/lexbor/css/syntax/an_plus_b.c +0 -233
  366. data/vendor/lexbor/test/lexbor/css/syntax/parser.c +0 -1134
  367. data/vendor/lexbor/test/lexbor/css/syntax/style.c +0 -67
  368. data/vendor/lexbor/test/lexbor/css/syntax/tokenizer.c +0 -485
  369. data/vendor/lexbor/test/lexbor/css/syntax/tokenizer_queue.c +0 -92
  370. data/vendor/lexbor/test/lexbor/dom/CMakeLists.txt +0 -17
  371. data/vendor/lexbor/test/lexbor/dom/exception.c +0 -210
  372. data/vendor/lexbor/test/lexbor/dom/node.c +0 -441
  373. data/vendor/lexbor/test/lexbor/encoding/CMakeLists.txt +0 -42
  374. data/vendor/lexbor/test/lexbor/encoding/buffer/big5.c +0 -210
  375. data/vendor/lexbor/test/lexbor/encoding/buffer/encoding.h +0 -243
  376. data/vendor/lexbor/test/lexbor/encoding/buffer/euc_jp.c +0 -228
  377. data/vendor/lexbor/test/lexbor/encoding/buffer/euc_kr.c +0 -172
  378. data/vendor/lexbor/test/lexbor/encoding/buffer/gb18030.c +0 -297
  379. data/vendor/lexbor/test/lexbor/encoding/buffer/ibm866.c +0 -123
  380. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_2022_jp.c +0 -403
  381. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_10.c +0 -123
  382. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_13.c +0 -123
  383. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_14.c +0 -123
  384. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_15.c +0 -123
  385. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_16.c +0 -123
  386. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_2.c +0 -123
  387. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_3.c +0 -123
  388. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_4.c +0 -123
  389. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_5.c +0 -123
  390. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_6.c +0 -123
  391. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_7.c +0 -123
  392. data/vendor/lexbor/test/lexbor/encoding/buffer/iso_8859_8.c +0 -123
  393. data/vendor/lexbor/test/lexbor/encoding/buffer/koi8_r.c +0 -123
  394. data/vendor/lexbor/test/lexbor/encoding/buffer/koi8_u.c +0 -123
  395. data/vendor/lexbor/test/lexbor/encoding/buffer/macintosh.c +0 -123
  396. data/vendor/lexbor/test/lexbor/encoding/buffer/shift_jis.c +0 -230
  397. data/vendor/lexbor/test/lexbor/encoding/buffer/utf-16.c +0 -230
  398. data/vendor/lexbor/test/lexbor/encoding/buffer/utf-8.c +0 -282
  399. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1250.c +0 -123
  400. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1251.c +0 -123
  401. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1252.c +0 -123
  402. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1253.c +0 -123
  403. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1254.c +0 -123
  404. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1255.c +0 -123
  405. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1256.c +0 -123
  406. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1257.c +0 -123
  407. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_1258.c +0 -123
  408. data/vendor/lexbor/test/lexbor/encoding/buffer/windows_874.c +0 -123
  409. data/vendor/lexbor/test/lexbor/encoding/buffer/x_mac_cyrillic.c +0 -123
  410. data/vendor/lexbor/test/lexbor/encoding/encoding.c +0 -97
  411. data/vendor/lexbor/test/lexbor/encoding/parser.h +0 -225
  412. data/vendor/lexbor/test/lexbor/encoding/single/big5.c +0 -203
  413. data/vendor/lexbor/test/lexbor/encoding/single/encoding.h +0 -227
  414. data/vendor/lexbor/test/lexbor/encoding/single/euc_jp.c +0 -220
  415. data/vendor/lexbor/test/lexbor/encoding/single/euc_kr.c +0 -162
  416. data/vendor/lexbor/test/lexbor/encoding/single/gb18030.c +0 -277
  417. data/vendor/lexbor/test/lexbor/encoding/single/ibm866.c +0 -114
  418. data/vendor/lexbor/test/lexbor/encoding/single/iso_2022_jp.c +0 -342
  419. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_10.c +0 -114
  420. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_13.c +0 -114
  421. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_14.c +0 -114
  422. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_15.c +0 -114
  423. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_16.c +0 -114
  424. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_2.c +0 -114
  425. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_3.c +0 -114
  426. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_4.c +0 -114
  427. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_5.c +0 -114
  428. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_6.c +0 -114
  429. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_7.c +0 -114
  430. data/vendor/lexbor/test/lexbor/encoding/single/iso_8859_8.c +0 -114
  431. data/vendor/lexbor/test/lexbor/encoding/single/koi8_r.c +0 -114
  432. data/vendor/lexbor/test/lexbor/encoding/single/koi8_u.c +0 -114
  433. data/vendor/lexbor/test/lexbor/encoding/single/macintosh.c +0 -114
  434. data/vendor/lexbor/test/lexbor/encoding/single/shift_jis.c +0 -203
  435. data/vendor/lexbor/test/lexbor/encoding/single/utf-16.c +0 -216
  436. data/vendor/lexbor/test/lexbor/encoding/single/utf-8.c +0 -227
  437. data/vendor/lexbor/test/lexbor/encoding/single/windows_1250.c +0 -114
  438. data/vendor/lexbor/test/lexbor/encoding/single/windows_1251.c +0 -114
  439. data/vendor/lexbor/test/lexbor/encoding/single/windows_1252.c +0 -114
  440. data/vendor/lexbor/test/lexbor/encoding/single/windows_1253.c +0 -114
  441. data/vendor/lexbor/test/lexbor/encoding/single/windows_1254.c +0 -114
  442. data/vendor/lexbor/test/lexbor/encoding/single/windows_1255.c +0 -114
  443. data/vendor/lexbor/test/lexbor/encoding/single/windows_1256.c +0 -114
  444. data/vendor/lexbor/test/lexbor/encoding/single/windows_1257.c +0 -114
  445. data/vendor/lexbor/test/lexbor/encoding/single/windows_1258.c +0 -114
  446. data/vendor/lexbor/test/lexbor/encoding/single/windows_874.c +0 -114
  447. data/vendor/lexbor/test/lexbor/encoding/single/x_mac_cyrillic.c +0 -114
  448. data/vendor/lexbor/test/lexbor/html/CMakeLists.txt +0 -35
  449. data/vendor/lexbor/test/lexbor/html/attributes.c +0 -105
  450. data/vendor/lexbor/test/lexbor/html/build-cpp.cpp +0 -68
  451. data/vendor/lexbor/test/lexbor/html/clone.c +0 -356
  452. data/vendor/lexbor/test/lexbor/html/dom/document_type.c +0 -125
  453. data/vendor/lexbor/test/lexbor/html/element_by.c +0 -147
  454. data/vendor/lexbor/test/lexbor/html/encoding.c +0 -228
  455. data/vendor/lexbor/test/lexbor/html/encoding_html5lib_tests.c +0 -308
  456. data/vendor/lexbor/test/lexbor/html/encoding_prescan.c +0 -1686
  457. data/vendor/lexbor/test/lexbor/html/inner.c +0 -103
  458. data/vendor/lexbor/test/lexbor/html/other.c +0 -139
  459. data/vendor/lexbor/test/lexbor/html/parse.c +0 -380
  460. data/vendor/lexbor/test/lexbor/html/perf.c +0 -161
  461. data/vendor/lexbor/test/lexbor/html/serialize.c +0 -56
  462. data/vendor/lexbor/test/lexbor/html/serialize_ext.c +0 -461
  463. data/vendor/lexbor/test/lexbor/html/tags.c +0 -140
  464. data/vendor/lexbor/test/lexbor/html/tokenizer/errors.c +0 -34
  465. data/vendor/lexbor/test/lexbor/html/tokenizer/html5lib_tests.c +0 -1168
  466. data/vendor/lexbor/test/lexbor/html/tokenizer_helper.h +0 -403
  467. data/vendor/lexbor/test/lexbor/html/tokenizer_tokens.c +0 -754
  468. data/vendor/lexbor/test/lexbor/html/tree/errors.c +0 -34
  469. data/vendor/lexbor/test/lexbor/html/tree/open_elements.c +0 -99
  470. data/vendor/lexbor/test/lexbor/html/tree_builder.c +0 -536
  471. data/vendor/lexbor/test/lexbor/ns/CMakeLists.txt +0 -17
  472. data/vendor/lexbor/test/lexbor/ns/res.c +0 -55
  473. data/vendor/lexbor/test/lexbor/punycode/CMakeLists.txt +0 -17
  474. data/vendor/lexbor/test/lexbor/punycode/base.c +0 -240
  475. data/vendor/lexbor/test/lexbor/selectors/CMakeLists.txt +0 -17
  476. data/vendor/lexbor/test/lexbor/selectors/selectors.c +0 -911
  477. data/vendor/lexbor/test/lexbor/style/CMakeLists.txt +0 -17
  478. data/vendor/lexbor/test/lexbor/style/element_events.c +0 -291
  479. data/vendor/lexbor/test/lexbor/style/element_style_steps.c +0 -5035
  480. data/vendor/lexbor/test/lexbor/style/not_html_namespace.c +0 -87
  481. data/vendor/lexbor/test/lexbor/style/style_tag.c +0 -184
  482. data/vendor/lexbor/test/lexbor/style/stylesheet.c +0 -51
  483. data/vendor/lexbor/test/lexbor/style/wo_events.c +0 -351
  484. data/vendor/lexbor/test/lexbor/tag/CMakeLists.txt +0 -17
  485. data/vendor/lexbor/test/lexbor/tag/res.c +0 -440
  486. data/vendor/lexbor/test/lexbor/unicode/CMakeLists.txt +0 -17
  487. data/vendor/lexbor/test/lexbor/unicode/composition_test.c +0 -1095
  488. data/vendor/lexbor/test/lexbor/unicode/edges_normalization_forms.c +0 -220
  489. data/vendor/lexbor/test/lexbor/unicode/idna.c +0 -98
  490. data/vendor/lexbor/test/lexbor/unicode/idna_codepoints.c +0 -110
  491. data/vendor/lexbor/test/lexbor/unicode/idna_type.c +0 -31
  492. data/vendor/lexbor/test/lexbor/unicode/normalization_forms.c +0 -205
  493. data/vendor/lexbor/test/lexbor/unicode/normalization_forms_code_points.c +0 -214
  494. data/vendor/lexbor/test/lexbor/unicode/unicode_idna_test_res.h +0 -6423
  495. data/vendor/lexbor/test/lexbor/unicode/unicode_normalization_test_res.h +0 -120229
  496. data/vendor/lexbor/test/lexbor/url/CMakeLists.txt +0 -22
  497. data/vendor/lexbor/test/lexbor/url/errors.c +0 -41
  498. data/vendor/lexbor/test/lexbor/url/other.c +0 -134
  499. data/vendor/lexbor/test/lexbor/url/parser.c +0 -872
  500. data/vendor/lexbor/test/lexbor/url/search_params.c +0 -616
  501. data/vendor/lexbor/test/lexbor/url/validation.c +0 -185
  502. data/vendor/lexbor/test/unit/CMakeLists.txt +0 -49
  503. data/vendor/lexbor/test/unit/kv.c +0 -538
  504. data/vendor/lexbor/test/unit/kv.h +0 -301
  505. data/vendor/lexbor/test/unit/kv_rules.c +0 -609
  506. data/vendor/lexbor/test/unit/kv_state.c +0 -1470
  507. data/vendor/lexbor/test/unit/test.c +0 -131
  508. data/vendor/lexbor/test/unit/test.h +0 -410
  509. data/vendor/lexbor/utils/CMakeLists.txt +0 -11
  510. data/vendor/lexbor/utils/lexbor/css/grammar.txt +0 -263
  511. data/vendor/lexbor/utils/lexbor/css/names.py +0 -768
  512. data/vendor/lexbor/utils/lexbor/css/selectors/pseudo.py +0 -234
  513. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/const.h +0 -21
  514. data/vendor/lexbor/utils/lexbor/css/selectors/tmp/res.h +0 -26
  515. data/vendor/lexbor/utils/lexbor/css/syntax/definitions.py +0 -62
  516. data/vendor/lexbor/utils/lexbor/css/syntax/non_ascii.pl +0 -77
  517. data/vendor/lexbor/utils/lexbor/css/syntax/token_res.py +0 -55
  518. data/vendor/lexbor/utils/lexbor/css/syntax/tokenizer_code_map.py +0 -36
  519. data/vendor/lexbor/utils/lexbor/css/tmp/const.h +0 -24
  520. data/vendor/lexbor/utils/lexbor/css/tmp/res.h +0 -26
  521. data/vendor/lexbor/utils/lexbor/css/tmp/types.h +0 -21
  522. data/vendor/lexbor/utils/lexbor/css/tmp/value_const.h +0 -21
  523. data/vendor/lexbor/utils/lexbor/css/tmp/value_res.h +0 -25
  524. data/vendor/lexbor/utils/lexbor/dom/attr.py +0 -129
  525. data/vendor/lexbor/utils/lexbor/dom/tmp/const.h +0 -23
  526. data/vendor/lexbor/utils/lexbor/dom/tmp/res.h +0 -27
  527. data/vendor/lexbor/utils/lexbor/encoding/CMakeLists.txt +0 -32
  528. data/vendor/lexbor/utils/lexbor/encoding/big5_map_decode.c +0 -93
  529. data/vendor/lexbor/utils/lexbor/encoding/buffer-single-byte.py +0 -95
  530. data/vendor/lexbor/utils/lexbor/encoding/encodings.json +0 -456
  531. data/vendor/lexbor/utils/lexbor/encoding/euc_jp_map_decode.c +0 -83
  532. data/vendor/lexbor/utils/lexbor/encoding/euc_kr_map_decode.c +0 -89
  533. data/vendor/lexbor/utils/lexbor/encoding/gb18030_map_decode.c +0 -170
  534. data/vendor/lexbor/utils/lexbor/encoding/iso_2022_jp_map_decode.c +0 -120
  535. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-big5.txt +0 -18596
  536. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-euc-kr.txt +0 -17054
  537. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-gb18030.txt +0 -23946
  538. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-iso-2022-jp-katakana.txt +0 -69
  539. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-jis0208.txt +0 -7730
  540. data/vendor/lexbor/utils/lexbor/encoding/multi-byte/index-jis0212.txt +0 -6073
  541. data/vendor/lexbor/utils/lexbor/encoding/multi-byte.pl +0 -424
  542. data/vendor/lexbor/utils/lexbor/encoding/range-byte.py +0 -118
  543. data/vendor/lexbor/utils/lexbor/encoding/ranges/index-gb18030-ranges.txt +0 -213
  544. data/vendor/lexbor/utils/lexbor/encoding/res.py +0 -231
  545. data/vendor/lexbor/utils/lexbor/encoding/shift_jis_map_decode.c +0 -102
  546. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-ibm866.txt +0 -134
  547. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-10.txt +0 -134
  548. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-13.txt +0 -134
  549. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-14.txt +0 -134
  550. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-15.txt +0 -134
  551. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-16.txt +0 -134
  552. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-2.txt +0 -134
  553. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-3.txt +0 -127
  554. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-4.txt +0 -134
  555. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-5.txt +0 -134
  556. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-6.txt +0 -89
  557. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-7.txt +0 -131
  558. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-iso-8859-8.txt +0 -98
  559. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-koi8-r.txt +0 -134
  560. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-koi8-u.txt +0 -134
  561. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-macintosh.txt +0 -134
  562. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1250.txt +0 -134
  563. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1251.txt +0 -134
  564. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1252.txt +0 -134
  565. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1253.txt +0 -131
  566. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1254.txt +0 -134
  567. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1255.txt +0 -124
  568. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1256.txt +0 -134
  569. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1257.txt +0 -132
  570. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-1258.txt +0 -134
  571. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-windows-874.txt +0 -126
  572. data/vendor/lexbor/utils/lexbor/encoding/single-byte/index-x-mac-cyrillic.txt +0 -134
  573. data/vendor/lexbor/utils/lexbor/encoding/single-byte.py +0 -179
  574. data/vendor/lexbor/utils/lexbor/encoding/tmp/buffer_single_byte_test.c +0 -123
  575. data/vendor/lexbor/utils/lexbor/encoding/tmp/const.h +0 -19
  576. data/vendor/lexbor/utils/lexbor/encoding/tmp/multi.c +0 -20
  577. data/vendor/lexbor/utils/lexbor/encoding/tmp/multi.h +0 -37
  578. data/vendor/lexbor/utils/lexbor/encoding/tmp/range.c +0 -17
  579. data/vendor/lexbor/utils/lexbor/encoding/tmp/range.h +0 -35
  580. data/vendor/lexbor/utils/lexbor/encoding/tmp/res.c +0 -22
  581. data/vendor/lexbor/utils/lexbor/encoding/tmp/res.h +0 -34
  582. data/vendor/lexbor/utils/lexbor/encoding/tmp/single.c +0 -20
  583. data/vendor/lexbor/utils/lexbor/encoding/tmp/single.h +0 -37
  584. data/vendor/lexbor/utils/lexbor/encoding/tmp/single_byte_test.c +0 -114
  585. data/vendor/lexbor/utils/lexbor/grammar/CMakeLists.txt +0 -63
  586. data/vendor/lexbor/utils/lexbor/grammar/base.h +0 -89
  587. data/vendor/lexbor/utils/lexbor/grammar/document.h +0 -34
  588. data/vendor/lexbor/utils/lexbor/grammar/grammar.c +0 -243
  589. data/vendor/lexbor/utils/lexbor/grammar/json.c +0 -368
  590. data/vendor/lexbor/utils/lexbor/grammar/json.h +0 -48
  591. data/vendor/lexbor/utils/lexbor/grammar/node.c +0 -653
  592. data/vendor/lexbor/utils/lexbor/grammar/node.h +0 -120
  593. data/vendor/lexbor/utils/lexbor/grammar/parser.c +0 -724
  594. data/vendor/lexbor/utils/lexbor/grammar/parser.h +0 -75
  595. data/vendor/lexbor/utils/lexbor/grammar/test.c +0 -1762
  596. data/vendor/lexbor/utils/lexbor/grammar/test.h +0 -35
  597. data/vendor/lexbor/utils/lexbor/grammar/token.c +0 -258
  598. data/vendor/lexbor/utils/lexbor/grammar/token.h +0 -91
  599. data/vendor/lexbor/utils/lexbor/grammar/tokenizer.c +0 -706
  600. data/vendor/lexbor/utils/lexbor/grammar/tokenizer.h +0 -73
  601. data/vendor/lexbor/utils/lexbor/html/convert_html5_tests.py +0 -162
  602. data/vendor/lexbor/utils/lexbor/html/data/entities.json +0 -2233
  603. data/vendor/lexbor/utils/lexbor/html/insertion_mode.py +0 -61
  604. data/vendor/lexbor/utils/lexbor/html/reorder_html5_tests_tokenizer_errors.py +0 -137
  605. data/vendor/lexbor/utils/lexbor/html/tmp/insertion_mode.c +0 -53
  606. data/vendor/lexbor/utils/lexbor/html/tmp/insertion_mode.h +0 -18
  607. data/vendor/lexbor/utils/lexbor/html/tmp/tokenizer_res.h +0 -20
  608. data/vendor/lexbor/utils/lexbor/html/tokenizer_entities_bst.py +0 -209
  609. data/vendor/lexbor/utils/lexbor/html/tokenizer_entities_switch.py +0 -162
  610. data/vendor/lexbor/utils/lexbor/html/tokenizer_parse_error.pl +0 -97
  611. data/vendor/lexbor/utils/lexbor/lexbor/LXB.py +0 -498
  612. data/vendor/lexbor/utils/lexbor/lexbor/res.py +0 -130
  613. data/vendor/lexbor/utils/lexbor/tag_ns/data/interfaces.json +0 -98
  614. data/vendor/lexbor/utils/lexbor/tag_ns/data/tags.json +0 -371
  615. data/vendor/lexbor/utils/lexbor/tag_ns/interfaces.py +0 -175
  616. data/vendor/lexbor/utils/lexbor/tag_ns/tags.py +0 -808
  617. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_attribute_steps_res.h +0 -21
  618. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_element_steps_res.h +0 -21
  619. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_interface_res.h +0 -29
  620. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_open_elements_res.h +0 -21
  621. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/html_tag_res.h +0 -25
  622. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/interface.c +0 -36
  623. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/interface.h +0 -33
  624. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/ns_const.h +0 -26
  625. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/ns_res.h +0 -29
  626. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/open_elements_res.h +0 -21
  627. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/steps_res.h +0 -23
  628. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/tag_const.h +0 -26
  629. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/tag_res.h +0 -26
  630. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/test/ns_res.c +0 -44
  631. data/vendor/lexbor/utils/lexbor/tag_ns/tmp/test/tag_res.c +0 -45
  632. data/vendor/lexbor/utils/lexbor/unicode/build.pl +0 -1323
  633. data/vendor/lexbor/utils/lexbor/unicode/idna_test.pl +0 -398
  634. data/vendor/lexbor/utils/lexbor/unicode/normalization_test.pl +0 -157
  635. data/vendor/lexbor/utils/wasm/gen_constants.py +0 -186
  636. data/vendor/lexbor/wasm/CMakeLists.txt +0 -18
  637. data/vendor/lexbor/wasm/lexbor/engine/CMakeLists.txt +0 -21
  638. data/vendor/lexbor/wasm/lexbor/engine/index.html +0 -406
  639. data/vendor/lexbor/wasm/lexbor/engine/lexbor.c +0 -1340
  640. data/vendor/lexbor/wasm/lexbor/html/CMakeLists.txt +0 -11
  641. data/vendor/lexbor/wasm/lexbor/html/parse.c +0 -58
@@ -46,6 +46,34 @@ mkr_ruby_borrowed_bytes_t mkr_ruby_bytes_view(VALUE in);
46
46
  * for an empty input), suitable for use while the GVL is released. */
47
47
  int mkr_ruby_copy_bytes(VALUE in, mkr_owned_bytes_t *out);
48
48
 
49
+ /* Return a UTF-8 Ruby String for `str`, honouring its declared encoding: UTF-8 /
50
+ * US-ASCII / ASCII-8BIT are returned unchanged (the parser handles their bytes
51
+ * directly); any other encoding is transcoded to UTF-8 (invalid/undef -> U+FFFD)
52
+ * so its content is preserved rather than read as raw UTF-8. The UTF-8 common
53
+ * case is a single encoding comparison. */
54
+ VALUE mkr_ruby_to_utf8(VALUE str);
55
+
56
+ /* STRICT decode for XML (§2.1): like mkr_ruby_to_utf8 it honours the String's
57
+ * declared encoding (UTF-8 / US-ASCII / ASCII-8BIT pass through; any other
58
+ * encoding is transcoded to UTF-8) - but FAIL-CLOSED, never lenient: a non-UTF-8
59
+ * byte that can't be converted, invalid UTF-8, or an embedded NUL all raise
60
+ * Makiri::XML::SyntaxError (no U+FFFD replacement). Returns a validated,
61
+ * UTF-8-tagged Ruby String. (The HTML replace path mkr_ruby_to_utf8 itself is
62
+ * NOT reused for the conversion - only its encoding-judgment rule is shared.)
63
+ *
64
+ * +max_bytes+ bounds the decoded UTF-8 length: an input that already exceeds the
65
+ * parser's arena byte budget is rejected here with Makiri::XML::LimitExceeded,
66
+ * before the validation copy and the caller's GVL-release copy (so a hostile
67
+ * oversized document is not copied twice for a doomed parse). 0 disables the
68
+ * check (decode-only callers that build no arena). */
69
+ VALUE mkr_xml_decode_input(VALUE str, size_t max_bytes);
70
+
71
+ /* True if `str` is *already known* to be valid UTF-8 - pure ASCII, or valid in
72
+ * the UTF-8 encoding - from its cached coderange, WITHOUT forcing a scan. Lets
73
+ * the parse skip mkr_utf8_sanitize's validation pass for input Ruby has already
74
+ * classified (an unknown/broken coderange returns false: sanitize handles it). */
75
+ bool mkr_ruby_str_known_valid_utf8(VALUE str);
76
+
49
77
  /* Validate a Ruby String for use as an XPath engine string: valid UTF-8,
50
78
  * no interior NUL, and at most +max_bytes+. Returns NULL on success and fills
51
79
  * +out+; otherwise returns a static reason string. +sv+ must be a String. */
@@ -108,6 +108,223 @@ mkr_ruby_copy_bytes(VALUE in, mkr_owned_bytes_t *out)
108
108
  return 0;
109
109
  }
110
110
 
111
+ VALUE
112
+ mkr_ruby_to_utf8(VALUE str)
113
+ {
114
+ /* Honour the Ruby String's declared encoding so its content survives:
115
+ *
116
+ * - UTF-8 / US-ASCII / ASCII-8BIT (binary): returned unchanged. These are
117
+ * already UTF-8 bytes (or deliberately raw bytes), and the native parser
118
+ * does the WHATWG invalid-byte replacement for them. The UTF-8 common
119
+ * case costs only this encoding comparison - no transcode, no copy.
120
+ *
121
+ * - any other encoding (Shift_JIS, EUC-JP, ISO-8859-1, Windows-1252, ...):
122
+ * transcoded to UTF-8 with invalid/undef -> U+FFFD, so e.g. Shift_JIS
123
+ * text becomes the right UTF-8 characters instead of being read as raw
124
+ * UTF-8 bytes and mangled. Only non-UTF-8 input pays this. */
125
+ rb_encoding *enc = rb_enc_get(str);
126
+ if (enc == rb_utf8_encoding()
127
+ || enc == rb_usascii_encoding()
128
+ || enc == rb_ascii8bit_encoding()) {
129
+ return str;
130
+ }
131
+ return rb_str_encode(str, rb_enc_from_encoding(rb_utf8_encoding()),
132
+ ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE, Qnil);
133
+ }
134
+
135
+ /* rb_str_encode with no replacement flags: an undefined conversion or invalid
136
+ * byte sequence RAISES (Encoding::UndefinedConversionError /
137
+ * Encoding::InvalidByteSequenceError) instead of substituting U+FFFD. Run under
138
+ * rb_protect so we can remap the Ruby Encoding error to Makiri::XML::SyntaxError. */
139
+ static VALUE
140
+ mkr_xml_strict_transcode_thunk(VALUE str)
141
+ {
142
+ return rb_str_encode(str, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil);
143
+ }
144
+
145
+ /* --- XML 1.0 Appendix F: byte-encoding autodetection (BOM, then declaration) ---
146
+ *
147
+ * The leading byte-order mark, or NULL; *bom_len gets its length. UTF-32 BOMs are
148
+ * checked before the UTF-16 LE BOM they share a prefix with. */
149
+ static rb_encoding *
150
+ mkr_xml_bom_encoding(const unsigned char *p, long len, long *bom_len)
151
+ {
152
+ *bom_len = 0;
153
+ if (len >= 4 && p[0] == 0x00 && p[1] == 0x00 && p[2] == 0xFE && p[3] == 0xFF) {
154
+ *bom_len = 4; return rb_enc_find("UTF-32BE");
155
+ }
156
+ if (len >= 4 && p[0] == 0xFF && p[1] == 0xFE && p[2] == 0x00 && p[3] == 0x00) {
157
+ *bom_len = 4; return rb_enc_find("UTF-32LE");
158
+ }
159
+ if (len >= 2 && p[0] == 0xFE && p[1] == 0xFF) { *bom_len = 2; return rb_enc_find("UTF-16BE"); }
160
+ if (len >= 2 && p[0] == 0xFF && p[1] == 0xFE) { *bom_len = 2; return rb_enc_find("UTF-16LE"); }
161
+ if (len >= 3 && p[0] == 0xEF && p[1] == 0xBB && p[2] == 0xBF) { *bom_len = 3; return rb_utf8_encoding(); }
162
+ return NULL;
163
+ }
164
+
165
+ /* The encoding named in the '<?xml ... encoding="NAME" ?>' declaration, or NULL.
166
+ * The declaration is ASCII; for a UTF-16/32-detected document its bytes are
167
+ * stride-interleaved, so the ASCII column is extracted (per the BOM) before the
168
+ * scan, letting a BOM-vs-declaration conflict be caught even in UTF-16. */
169
+ static rb_encoding *
170
+ mkr_xml_decl_encoding(const unsigned char *p, long len, rb_encoding *bom)
171
+ {
172
+ long stride = 1, off = 0;
173
+ if (bom == rb_enc_find("UTF-16LE")) { stride = 2; off = 0; }
174
+ else if (bom == rb_enc_find("UTF-16BE")) { stride = 2; off = 1; }
175
+ else if (bom == rb_enc_find("UTF-32LE")) { stride = 4; off = 0; }
176
+ else if (bom == rb_enc_find("UTF-32BE")) { stride = 4; off = 3; }
177
+
178
+ char head[256];
179
+ long hn = 0;
180
+ for (long i = off; i < len && hn < (long)sizeof(head); i += stride) head[hn++] = (char)p[i];
181
+
182
+ long i = 0;
183
+ while (i < hn && (head[i] == ' ' || head[i] == '\t' || head[i] == '\r' || head[i] == '\n')) i++;
184
+ if (i + 5 > hn || memcmp(head + i, "<?xml", 5) != 0) return NULL;
185
+ i += 5;
186
+ /* find a whitespace-introduced "encoding" before the '?>' */
187
+ for (; i + 8 <= hn; i++) {
188
+ if (head[i] == '?' && i + 1 < hn && head[i + 1] == '>') return NULL; /* end of decl */
189
+ int ws_before = (head[i - 1] == ' ' || head[i - 1] == '\t' || head[i - 1] == '\r' || head[i - 1] == '\n');
190
+ if (!ws_before || memcmp(head + i, "encoding", 8) != 0) continue;
191
+ long j = i + 8;
192
+ while (j < hn && (head[j] == ' ' || head[j] == '\t' || head[j] == '\r' || head[j] == '\n')) j++;
193
+ if (j >= hn || head[j] != '=') return NULL;
194
+ j++;
195
+ while (j < hn && (head[j] == ' ' || head[j] == '\t' || head[j] == '\r' || head[j] == '\n')) j++;
196
+ if (j >= hn || (head[j] != '"' && head[j] != '\'')) return NULL;
197
+ char q = head[j++];
198
+ long ns = j;
199
+ while (j < hn && head[j] != q) j++;
200
+ if (j >= hn) return NULL;
201
+ char name[64];
202
+ long nl = j - ns;
203
+ if (nl <= 0 || nl >= (long)sizeof(name)) return NULL;
204
+ memcpy(name, head + ns, (size_t)nl);
205
+ name[nl] = '\0';
206
+ return rb_enc_find(name); /* NULL for an unknown encoding name */
207
+ }
208
+ return NULL;
209
+ }
210
+
211
+ /* Two encodings agree for conflict purposes when identical, or when either is
212
+ * US-ASCII (a subset of UTF-8 and the single-byte encodings). */
213
+ static int
214
+ mkr_xml_enc_compatible(rb_encoding *a, rb_encoding *b)
215
+ {
216
+ return a == b || a == rb_usascii_encoding() || b == rb_usascii_encoding();
217
+ }
218
+
219
+ VALUE
220
+ mkr_xml_decode_input(VALUE str, size_t max_bytes)
221
+ {
222
+ rb_encoding *tag = rb_enc_get(str);
223
+ const unsigned char *raw = (const unsigned char *)RSTRING_PTR(str);
224
+ long rawlen = RSTRING_LEN(str);
225
+
226
+ /* Detect the byte encoding (XML 1.0 Appendix F): a BOM wins, else the
227
+ * declaration. The Ruby String's encoding is authoritative when it is a
228
+ * concrete text encoding; a BOM/declaration that disagrees is a fatal
229
+ * conflict. ASCII-8BIT means "raw bytes, no claimed encoding", so there the
230
+ * detected encoding decodes the input (a UTF-16/Shift_JIS/BOM'd file read
231
+ * with File.binread now parses). */
232
+ long bom_len = 0;
233
+ rb_encoding *bom = mkr_xml_bom_encoding(raw, rawlen, &bom_len);
234
+ rb_encoding *decl = mkr_xml_decl_encoding(raw + bom_len, rawlen - bom_len, bom);
235
+ int is_binary = (tag == rb_ascii8bit_encoding());
236
+
237
+ if (bom && decl && !mkr_xml_enc_compatible(bom, decl)) {
238
+ rb_raise(mkr_eXmlSyntaxError,
239
+ "XML encoding conflict: the byte-order mark and the encoding declaration disagree");
240
+ }
241
+ if (!is_binary && bom && !mkr_xml_enc_compatible(bom, tag)) {
242
+ rb_raise(mkr_eXmlSyntaxError,
243
+ "XML encoding conflict: the byte-order mark disagrees with the string's encoding");
244
+ }
245
+ if (!is_binary && decl && !mkr_xml_enc_compatible(decl, tag)) {
246
+ /* A concrete String encoding is authoritative for decoding, so the
247
+ * declaration is not used to transcode - but a declaration that names a
248
+ * different encoding than the String is tagged with (e.g. a Shift_JIS
249
+ * String declaring encoding="UTF-8") is a self-inconsistent document and
250
+ * a fatal error, not a silently-ignored mismatch. */
251
+ rb_raise(mkr_eXmlSyntaxError,
252
+ "XML encoding conflict: the encoding declaration disagrees with the string's encoding");
253
+ }
254
+
255
+ rb_encoding *eff = is_binary ? (bom ? bom : (decl ? decl : rb_utf8_encoding())) : tag;
256
+
257
+ /* Decode to UTF-8 (strict). UTF-8 / US-ASCII / ASCII-8BIT are already UTF-8
258
+ * bytes (validated below); anything else is strict-transcoded, raising rather
259
+ * than substituting U+FFFD. */
260
+ VALUE s;
261
+ if (eff == rb_utf8_encoding() || eff == rb_usascii_encoding() || eff == rb_ascii8bit_encoding()) {
262
+ s = str;
263
+ } else {
264
+ VALUE in = str;
265
+ if (rb_enc_get(str) != eff) { in = rb_str_dup(str); rb_enc_associate(in, eff); }
266
+ int state = 0;
267
+ s = rb_protect(mkr_xml_strict_transcode_thunk, in, &state);
268
+ if (state != 0) {
269
+ VALUE exc = rb_errinfo();
270
+ rb_set_errinfo(Qnil);
271
+ char msg[256];
272
+ mkr_ruby_exception_message(exc, msg, sizeof msg);
273
+ rb_raise(mkr_eXmlSyntaxError,
274
+ "XML input could not be decoded to UTF-8: %s", msg);
275
+ }
276
+ RB_GC_GUARD(in);
277
+ }
278
+
279
+ const char *ptr = RSTRING_PTR(s);
280
+ long len = RSTRING_LEN(s);
281
+ /* §4.3.3: a leading BOM is the encoding signature, not document content -
282
+ * strip a U+FEFF (the transcode above turns any UTF-16/32 BOM into one). */
283
+ if (len >= 3 && (unsigned char)ptr[0] == 0xEF && (unsigned char)ptr[1] == 0xBB
284
+ && (unsigned char)ptr[2] == 0xBF) {
285
+ ptr += 3; len -= 3;
286
+ }
287
+
288
+ /* Fail closed on an over-budget input BEFORE the validation copy and the
289
+ * caller's GVL-release copy (an input whose UTF-8 length exceeds the arena
290
+ * budget can never parse). max_bytes == 0 disables the check (__decode). */
291
+ if (max_bytes != 0 && (size_t)len > max_bytes) {
292
+ RB_GC_GUARD(s);
293
+ rb_raise(mkr_eXmlLimitExceeded, "XML input exceeds the byte budget");
294
+ }
295
+
296
+ /* Strict UTF-8 validation: an embedded NUL or any invalid UTF-8 is fatal
297
+ * (no U+FFFD repair - unlike the HTML mkr_utf8_sanitize path). */
298
+ if (len > 0 && memchr(ptr, '\0', (size_t)len) != NULL) {
299
+ rb_raise(mkr_eXmlSyntaxError, "XML input must not contain a NUL byte");
300
+ }
301
+ VALUE u = rb_enc_str_new(ptr, len, rb_utf8_encoding());
302
+ if (rb_enc_str_coderange(u) == ENC_CODERANGE_BROKEN) {
303
+ rb_raise(mkr_eXmlSyntaxError, "XML input must be valid UTF-8");
304
+ }
305
+ RB_GC_GUARD(s);
306
+ return u; /* validated, UTF-8-tagged, BOM-stripped */
307
+ }
308
+
309
+ bool
310
+ mkr_ruby_str_known_valid_utf8(VALUE str)
311
+ {
312
+ if (!RB_TYPE_P(str, T_STRING)) {
313
+ return false;
314
+ }
315
+ /* ENC_CODERANGE reads the *cached* classification from the object's flags;
316
+ * it does NOT scan (rb_enc_str_coderange would, costing as much as our own
317
+ * validator). So this only wins when Ruby already knows the answer. */
318
+ int cr = ENC_CODERANGE(str);
319
+ if (cr == ENC_CODERANGE_7BIT) {
320
+ return true; /* all bytes < 0x80 in an ASCII-compatible encoding */
321
+ }
322
+ if (cr == ENC_CODERANGE_VALID) {
323
+ return rb_enc_get(str) == rb_utf8_encoding(); /* valid AND UTF-8 */
324
+ }
325
+ return false; /* UNKNOWN or BROKEN: let mkr_utf8_sanitize handle it */
326
+ }
327
+
111
328
  const char *
112
329
  mkr_ruby_try_verified_text(VALUE sv, size_t max_bytes, mkr_ruby_borrowed_text_t *out)
113
330
  {
@@ -6,7 +6,7 @@
6
6
  * allocators, the foundation every other C layer (glue, xpath engine,
7
7
  * lexbor_compat) builds on, so the ad-hoc `cap *= 2` / `n + 1` /
8
8
  * `malloc(n * sizeof(T))` patterns are written once, here, and fail closed.
9
- * NOTHING in this header touches Ruby exception mapping happens at the glue
9
+ * NOTHING in this header touches Ruby - exception mapping happens at the glue
10
10
  * boundary. (mkr_core.h is a thin umbrella over this + the other core headers.)
11
11
  */
12
12
 
@@ -13,7 +13,12 @@ mkr_buf_append(mkr_buf_t *b, const void *bytes, size_t n)
13
13
  if (!mkr_size_add(b->len, n, &need)) {
14
14
  return MKR_ERR_OOM;
15
15
  }
16
- if (b->max != 0 && need > b->max) {
16
+ /* max == 0 is NOT unbounded: it falls back to the conservative default
17
+ * ceiling, so a caller that never set a cap still fails closed. Either way the
18
+ * absolute hard ceiling clamps it, so no buffer can exhaust memory. */
19
+ size_t soft = (b->max != 0) ? b->max : MKR_BUF_DEFAULT_LIMIT;
20
+ size_t limit = (soft < MKR_BUF_HARD_MAX) ? soft : MKR_BUF_HARD_MAX;
21
+ if (need > limit) {
17
22
  return MKR_ERR_LIMIT;
18
23
  }
19
24
  size_t need_term; /* room for the NUL terminator too */
@@ -38,6 +43,35 @@ mkr_buf_append(mkr_buf_t *b, const void *bytes, size_t n)
38
43
  return MKR_OK;
39
44
  }
40
45
 
46
+ mkr_status_t
47
+ mkr_buf_reserve(mkr_buf_t *b, size_t n)
48
+ {
49
+ /* Pre-allocate capacity for n bytes so a known-size fill does not realloc on
50
+ * every geometric step (the serializer reserves ~the output size up front).
51
+ * Best-effort: never grow past the buffer's own cap, and a later append still
52
+ * fails closed if the real output exceeds it. */
53
+ size_t soft = (b->max != 0) ? b->max : MKR_BUF_DEFAULT_LIMIT;
54
+ size_t limit = (soft < MKR_BUF_HARD_MAX) ? soft : MKR_BUF_HARD_MAX;
55
+ if (n > limit) {
56
+ n = limit;
57
+ }
58
+ size_t need_term; /* room for the NUL terminator too */
59
+ if (!mkr_size_add(n, 1, &need_term)) {
60
+ return MKR_ERR_OOM;
61
+ }
62
+ if (need_term <= b->cap) {
63
+ return MKR_OK; /* already have room */
64
+ }
65
+ char *p = realloc(b->data, need_term);
66
+ if (p == NULL) {
67
+ return MKR_ERR_OOM;
68
+ }
69
+ b->data = p;
70
+ b->cap = need_term;
71
+ b->data[b->len] = '\0'; /* keep NUL-terminated */
72
+ return MKR_OK;
73
+ }
74
+
41
75
  char *
42
76
  mkr_buf_steal(mkr_buf_t *b, size_t *out_len)
43
77
  {
@@ -2,7 +2,7 @@
2
2
  #define MAKIRI_CORE_MKR_BUF_H
3
3
 
4
4
  /*
5
- * mkr_buf_t an owned, growable, optionally capped byte buffer, kept
5
+ * mkr_buf_t - an owned, growable, optionally capped byte buffer, kept
6
6
  * NUL-terminated. Built on the fail-closed allocators in mkr_alloc.h.
7
7
  * (mkr_core.h is a thin umbrella over mkr_alloc.h + mkr_text.h + this.)
8
8
  */
@@ -13,14 +13,42 @@
13
13
  extern "C" {
14
14
  #endif
15
15
 
16
+ /* Memory safety for buffers lives HERE, at the one buffer primitive, not at each
17
+ * call site: "max == 0" can no longer mean "unbounded". Two ceilings bound every
18
+ * mkr_buf so a runaway - a cycle, an unbounded loop, or a caller that forgot to
19
+ * pass a cap - fails closed with MKR_ERR_LIMIT instead of exhausting memory and
20
+ * freezing the machine:
21
+ *
22
+ * MKR_BUF_DEFAULT_LIMIT the cap applied when the caller passes max == 0. A
23
+ * conservative default (100 MiB): code that did not
24
+ * think about a bound gets a tight one for free, and a
25
+ * buffer that genuinely needs to be large must opt in
26
+ * EXPLICITLY by passing a larger max.
27
+ * MKR_BUF_HARD_MAX an absolute ceiling no buffer may exceed, even one
28
+ * with an explicit max - the last-resort backstop.
29
+ * Tight, content-scaled bounds still belong to the
30
+ * caller (e.g. the XML serializer caps itself at a
31
+ * multiple of arena_bytes); this stops total runaway.
32
+ *
33
+ * Override either at build time: -DMKR_BUF_DEFAULT_LIMIT=<bytes> / -DMKR_BUF_HARD_MAX=<bytes>. */
34
+ #ifndef MKR_BUF_DEFAULT_LIMIT
35
+ #define MKR_BUF_DEFAULT_LIMIT ((size_t)100 << 20) /* 100 MiB */
36
+ #endif
37
+ #ifndef MKR_BUF_HARD_MAX
38
+ #define MKR_BUF_HARD_MAX ((size_t)4 << 30) /* 4 GiB */
39
+ #endif
40
+
16
41
  typedef struct {
17
42
  char *data; /* owned; kept NUL-terminated after any append */
18
43
  size_t len; /* bytes used (excluding the terminator) */
19
44
  size_t cap; /* bytes allocated */
20
- size_t max; /* 0 = unbounded; else append past max returns MKR_ERR_LIMIT */
45
+ size_t max; /* 0 = the conservative MKR_BUF_DEFAULT_LIMIT; else this value -
46
+ * either way clamped by MKR_BUF_HARD_MAX (past it -> ERR_LIMIT) */
21
47
  } mkr_buf_t;
22
48
 
23
- /* Initialise an empty buffer. max == 0 means unbounded. */
49
+ /* Initialise an empty buffer. max == 0 applies the conservative default ceiling
50
+ * (MKR_BUF_DEFAULT_LIMIT) - it is NOT unbounded; pass an explicit (larger or
51
+ * smaller) value to opt into a different bound, always under MKR_BUF_HARD_MAX. */
24
52
  static inline void
25
53
  mkr_buf_init(mkr_buf_t *b, size_t max)
26
54
  {
@@ -35,6 +63,12 @@ mkr_buf_init(mkr_buf_t *b, size_t max)
35
63
  * failure (the buffer is left intact in every failure case). n == 0 is a no-op. */
36
64
  mkr_status_t mkr_buf_append(mkr_buf_t *b, const void *bytes, size_t n);
37
65
 
66
+ /* Pre-allocate capacity for at least n bytes (best-effort, clamped to the
67
+ * buffer's cap), so a fill of known approximate size avoids per-append reallocs.
68
+ * A no-op if the buffer already has room. Returns MKR_ERR_OOM on overflow /
69
+ * allocation failure (the buffer is left intact). */
70
+ mkr_status_t mkr_buf_reserve(mkr_buf_t *b, size_t n);
71
+
38
72
  /* Take ownership of the (NUL-terminated) bytes; the buffer is reset to empty.
39
73
  * Returns a freshly owned "" for an empty buffer, or NULL on OOM. */
40
74
  char *mkr_buf_steal(mkr_buf_t *b, size_t *out_len);
@@ -10,7 +10,7 @@
10
10
  * mkr_text.h string-type lattice (owned/borrowed/verified text + bytes)
11
11
  * mkr_buf.h mkr_buf_t (growable, capped byte buffer)
12
12
  *
13
- * NOTHING here touches Ruby exception mapping happens at the glue boundary.
13
+ * NOTHING here touches Ruby - exception mapping happens at the glue boundary.
14
14
  */
15
15
 
16
16
  #include "mkr_alloc.h"
@@ -35,7 +35,7 @@ mkr_ptr_hash(const void *p)
35
35
 
36
36
  /* Smallest power of two >= n, into *out. Returns false on overflow (no power of
37
37
  * two >= n fits in size_t) so the caller fails closed rather than sizing a
38
- * power-of-two hash table below the element count it must hold which would
38
+ * power-of-two hash table below the element count it must hold - which would
39
39
  * never find a free slot under linear probing. Shared by the pointer-keyed
40
40
  * indexes (attr->owner, text-index). */
41
41
  static inline bool
@@ -16,7 +16,7 @@ extern "C" {
16
16
  #endif
17
17
 
18
18
  /* ---------------------------------------------------------------- */
19
- /* mkr_verified_text_t a string proven to meet the engine text contract */
19
+ /* mkr_verified_text_t - a string proven to meet the engine text contract */
20
20
  /* ---------------------------------------------------------------- */
21
21
 
22
22
  /* A borrowed byte slice whose contents are guaranteed to satisfy Makiri's
@@ -39,7 +39,7 @@ typedef struct {
39
39
  /*
40
40
  * Makiri's string types form a small lattice over two axes plus a shape marker.
41
41
  * They look alike ({ptr,len}) but C has no subtyping, so each contract is its
42
- * own type that distinctness IS the guarantee, and is why there is no single
42
+ * own type - that distinctness IS the guarantee, and is why there is no single
43
43
  * "string" type.
44
44
  *
45
45
  * axis 1 ownership : borrowed (we never free) | owned (free via *_clear)
@@ -51,7 +51,7 @@ typedef struct {
51
51
  * shape \ contract raw (bytes) valid (text)
52
52
  * ---------------------- ------------------------ -------------------------
53
53
  * ruby-anchored borrowed mkr_ruby_borrowed_bytes_t mkr_ruby_borrowed_text_t (bridge.h)
54
- * borrowed slice (none yet would be mkr_borrowed_text_t /
54
+ * borrowed slice (none yet - would be mkr_borrowed_text_t /
55
55
  * mkr_borrowed_bytes_t) mkr_verified_text_t (*)
56
56
  * owned mkr_owned_bytes_t mkr_owned_text_t
57
57
  *
@@ -65,22 +65,22 @@ typedef struct {
65
65
  * cannot reach the engine's public API. Internally the engine carries the
66
66
  * freely-constructible mkr_borrowed_text_t instead.
67
67
  *
68
- * Conversions the only sanctioned edges. The points that actually VALIDATE
68
+ * Conversions - the only sanctioned edges. The points that actually VALIDATE
69
69
  * raw bytes are the bridge's checked entry points; everything else only moves
70
70
  * already-valid text between shapes (no edge re-validates, and none turns raw
71
71
  * bytes into text without one of those checks):
72
- * validate raw -> valid : the bridge's checked entry points only
72
+ * validate raw -> valid : the bridge's checked entry points only -
73
73
  * mkr_ruby_verified_text / mkr_ruby_try_verified_text
74
74
  * (both validate UTF-8 + no NUL); never a cast.
75
75
  * drop the GC anchor : mkr_verified_text_from_view (ruby_borrowed_text -> verified_text)
76
76
  * assert valid (no copy) : mkr_borrowed_text (const char*,len -> borrowed_text)
77
- * caller asserts the bytes already meet the contract
77
+ * - caller asserts the bytes already meet the contract
78
78
  * downgrade to borrow : mkr_borrowed_text_from_owned (owned_text -> borrowed_text)
79
79
  * mkr_borrowed_text_from_verified (verified_text -> borrowed_text)
80
80
  * copy into owned : mkr_owned_text_from_borrowed_copy /
81
- * mkr_owned_text_from_buf_steal accept only
81
+ * mkr_owned_text_from_buf_steal - accept only
82
82
  * already-asserted-valid text; they copy, not validate.
83
- * take ownership : mkr_owned_text (char*,len -> owned_text) caller
83
+ * take ownership : mkr_owned_text (char*,len -> owned_text) - caller
84
84
  * transfers an already-valid heap buffer it produced
85
85
  * (substring/concat/format output); asserts validity.
86
86
  */
@@ -12,7 +12,7 @@ require "etc"
12
12
  # 1. Build vendored Lexbor (unpatched) via cmake into vendor/lexbor/build,
13
13
  # install headers + a static archive into vendor/lexbor/dist.
14
14
  # 2. Compile ext/makiri/**/*.c with rake-compiler, linking against the
15
- # static Lexbor archive only no system libxml2/libxslt.
15
+ # static Lexbor archive only - no system libxml2/libxslt.
16
16
  #
17
17
  # Security note: the C extension is built with -D_FORTIFY_SOURCE=2,
18
18
  # -fstack-protector-strong, and -Wformat -Wformat-security. -O2 is kept
@@ -60,7 +60,7 @@ $LDFLAGS << " #{lexbor_archive.shellescape}"
60
60
  # Sanitizer build (opt-in): MAKIRI_SANITIZE=address,undefined rake clean compile
61
61
  # Then run the suite under the runtime via `rake sanitize` (which preloads the
62
62
  # ASan runtime). Sanitizers replace the heap allocator, so even the vendored
63
- # (uninstrumented) Lexbor's allocations get red-zoned heap overflows on
63
+ # (uninstrumented) Lexbor's allocations get red-zoned - heap overflows on
64
64
  # Lexbor-owned buffers are still caught. _FORTIFY_SOURCE is dropped here because
65
65
  # it conflicts with the sanitizer interceptors.
66
66
  sanitize = ENV["MAKIRI_SANITIZE"].to_s.strip
@@ -115,6 +115,24 @@ elsif RbConfig::CONFIG["target_os"] =~ /linux/
115
115
  $LIBRUBYARG_STATIC = ""
116
116
  end
117
117
 
118
+ # Export ONLY Init_makiri from the compiled extension. `-fvisibility=hidden`
119
+ # above hides our own sources' symbols, but the vendored Lexbor static library
120
+ # is built (by Lexbor's own CMake) with default visibility, so without this the
121
+ # linker re-exports ~1700 `lxb_*` / `lexbor_*` symbols into the bundle's dynamic
122
+ # table. Another Lexbor-based extension loaded in the same process (e.g.
123
+ # nokolexbor) would then resolve its own `lxb_*` calls to OUR copy - a different
124
+ # Lexbor version with an incompatible ABI - and segfault. Restricting the export
125
+ # list to Init_makiri keeps Makiri's Lexbor entirely private (Ruby only needs
126
+ # Init_makiri, found via dlsym at require time).
127
+ if RbConfig::CONFIG["target_os"] =~ /darwin/
128
+ $DLDFLAGS << " -Wl,-exported_symbol,_Init_makiri"
129
+ elsif RbConfig::CONFIG["target_os"] =~ /linux/
130
+ # Hide every symbol pulled in from static archives (the Lexbor .a); our own
131
+ # are already hidden by -fvisibility=hidden, leaving just RUBY_FUNC_EXPORTED
132
+ # Init_makiri in the dynamic symbol table.
133
+ $DLDFLAGS << " -Wl,--exclude-libs,ALL"
134
+ end
135
+
118
136
  # Recursively pick up C sources under ext/makiri/.
119
137
  $srcs = Dir.glob(File.join(EXT_DIR, "**", "*.c")).map { |f| f.sub("#{EXT_DIR}/", "") }
120
138
  $VPATH ||= []
@@ -8,11 +8,24 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
+ /* A DOM node pointer of UNKNOWN representation - an HTML lxb_dom_node_t or an XML
12
+ * mkr_xml_node_t - as stored in a node wrapper or a NodeSet. It is an INCOMPLETE
13
+ * type on purpose: it cannot be dereferenced, and (unlike void*) it does not
14
+ * implicitly convert to a typed pointer, so reading a stored node AS a specific
15
+ * representation requires an explicit cast that the kind-checked accessors
16
+ * (mkr_html_node_unwrap / mkr_xml_node_unwrap) justify by the wrapper's TypedData type
17
+ * (or, for a NodeSet, by doc_is_xml). The stored pointer is only ever
18
+ * pointer-compared or cast through one of those accessors. */
19
+ typedef struct mkr_raw_node mkr_raw_node_t;
20
+
11
21
  /* Wrapper for any DOM node except Document. The node memory is owned by the
12
- * document's Lexbor arena; we keep only the pointer plus a keepalive VALUE
13
- * reference to the Ruby Document so the arena outlives the wrapper. */
22
+ * document's arena (an HTML Lexbor arena or the XML node arena); we keep only the
23
+ * pointer plus a keepalive VALUE reference to the Ruby Document so the arena
24
+ * outlives the wrapper. The pointer is representation-opaque (mkr_raw_node_t):
25
+ * read it only through mkr_html_node_unwrap / mkr_xml_node_unwrap, which check the
26
+ * wrapper's representation (distinct TypedData types) before casting. */
14
27
  typedef struct {
15
- lxb_dom_node_t *node;
28
+ mkr_raw_node_t *node;
16
29
  VALUE document;
17
30
  } mkr_node_data_t;
18
31
 
@@ -31,14 +44,35 @@ extern const rb_data_type_t mkr_node_type;
31
44
  extern const rb_data_type_t mkr_doc_type;
32
45
  extern const rb_data_type_t mkr_node_set_type;
33
46
 
34
- /* Node bridge (glue/ruby_node.c). mkr_wrap_node returns the Document VALUE
47
+ /* Node bridge (glue/ruby_node.c). mkr_wrap_html_node returns the Document VALUE
35
48
  * for the document node, Qnil for NULL, otherwise a freshly-wrapped Node. */
36
- VALUE mkr_wrap_node(lxb_dom_node_t *node, VALUE document);
37
- lxb_dom_node_t *mkr_node_unwrap(VALUE rb_node);
49
+ VALUE mkr_wrap_html_node(lxb_dom_node_t *node, VALUE document);
38
50
  VALUE mkr_node_document(VALUE rb_node);
39
51
 
52
+ /* HTML and XML nodes are wrapped under DISTINCT TypedData types (both deriving
53
+ * from the shared base mkr_node_type), so a representation-specific accessor
54
+ * rejects the wrong kind via Ruby's type machinery. See ruby_node.c.
55
+ * mkr_html_node_unwrap -> lxb_dom_node_t* ; raises on an XML node/Document.
56
+ * mkr_xml_node_unwrap-> mkr_xml_node_t* ; raises on an HTML node/Document (ruby_xml_node.c).
57
+ * mkr_node_raw -> void* ; kind-agnostic raw pointer for identity, or for a
58
+ * site where the kind is already guaranteed. Deref needs an
59
+ * explicit cast - never treat it as a typed pointer blindly.
60
+ * mkr_node_id -> uintptr_t ; node identity for ==/eql?/hash/pointer_id. */
61
+ extern const rb_data_type_t mkr_html_node_type;
62
+ extern const rb_data_type_t mkr_xml_node_type;
63
+ lxb_dom_node_t *mkr_html_node_unwrap(VALUE rb_node);
64
+ void *mkr_node_raw(VALUE rb_node);
65
+ uintptr_t mkr_node_id(VALUE rb_node);
66
+
67
+ /* XML node bridge (glue/ruby_xml_node.c): wrap a custom XML node into the right
68
+ * Makiri::XML::* leaf (Qnil for NULL, the Document VALUE for the document node). */
69
+ struct mkr_xml_node;
70
+ VALUE mkr_wrap_xml_node(struct mkr_xml_node *node, VALUE document);
71
+ /* XML node-pointer accessor; raises TypeError on an HTML node/Document. */
72
+ struct mkr_xml_node *mkr_xml_node_unwrap(VALUE rb_node);
73
+
40
74
  /* Document bridge (glue/ruby_doc.c). */
41
- lxb_dom_document_t *mkr_doc_unwrap(VALUE rb_doc);
75
+ lxb_dom_document_t *mkr_html_doc_unwrap(VALUE rb_doc);
42
76
  mkr_parsed_t *mkr_doc_parsed(VALUE rb_doc);
43
77
  VALUE mkr_wrap_document(mkr_parsed_t *parsed); /* GC takes ownership */
44
78
 
@@ -46,7 +80,7 @@ VALUE mkr_wrap_document(mkr_parsed_t *parsed); /* GC takes ownersh
46
80
  * inner_html=/outer_html= so the UTF-8 sanitisation and import+template-fixup
47
81
  * are not duplicated.
48
82
  *
49
- * mkr_sanitize_html_input: decode rb_html for the fragment parser *out / *out_len
83
+ * mkr_sanitize_html_input: decode rb_html for the fragment parser - *out / *out_len
50
84
  * are the bytes to parse, *owned a malloc'd buffer to free afterwards (NULL when
51
85
  * the input is used in place). Returns 0, or -1 on OOM (nothing allocated), so
52
86
  * the caller can release its parser before raising. See mkr_utf8_sanitize.
@@ -54,7 +88,7 @@ VALUE mkr_wrap_document(mkr_parsed_t *parsed); /* GC takes ownersh
54
88
  * mkr_import_fragment_children: deep-import each child of `root` into `doc`, hand
55
89
  * it to `emit`, and fix up any <template> contents (which import_node omits).
56
90
  *
57
- * mkr_emit_append / mkr_emit_before: emit callbacks append as last child of
91
+ * mkr_emit_append / mkr_emit_before: emit callbacks - append as last child of
58
92
  * `u`, or insert before the reference node `u`. */
59
93
  int mkr_sanitize_html_input(VALUE html, const lxb_char_t **out, size_t *out_len,
60
94
  lxb_char_t **owned);
@@ -63,9 +97,17 @@ void mkr_import_fragment_children(lxb_dom_document_t *doc, lxb_dom_node_t *root,
63
97
  void mkr_emit_append(lxb_dom_node_t *imported, void *u);
64
98
  void mkr_emit_before(lxb_dom_node_t *imported, void *u);
65
99
 
66
- /* NodeSet bridge (glue/ruby_node_set.c). */
100
+ /* Node#clone_node(deep=false): shallow/deep DOM clone owned by this node's
101
+ * document (import_node + <template>-content fixup), detached from any parent.
102
+ * Implemented in ruby_doc.c (next to the import machinery), bound in
103
+ * mkr_init_node. */
104
+ VALUE mkr_node_clone_node(int argc, VALUE *argv, VALUE self);
105
+
106
+ /* NodeSet bridge (glue/ruby_node_set.c). mkr_raw_node_t (above): callers cast
107
+ * their typed node to it when pushing (forgetting the type is the safe, store
108
+ * direction); the single typed read-back lives in mkr_node_set_wrap. */
67
109
  VALUE mkr_node_set_new(VALUE document);
68
- void mkr_node_set_push(VALUE rb_set, lxb_dom_node_t *node);
110
+ void mkr_node_set_push(VALUE rb_set, mkr_raw_node_t *node);
69
111
 
70
112
  #ifdef __cplusplus
71
113
  }