makiri 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/conformance.yml +22 -0
  3. data/.github/workflows/libfuzzer.yml +83 -0
  4. data/.github/workflows/security.yml +88 -3
  5. data/.github/workflows/valgrind.yml +135 -0
  6. data/CHANGELOG.md +60 -2
  7. data/README.md +81 -77
  8. data/Rakefile +194 -3
  9. data/ext/makiri/bridge/ruby_string.c +119 -66
  10. data/ext/makiri/core/mkr_alloc.c +40 -3
  11. data/ext/makiri/core/mkr_alloc.h +27 -4
  12. data/ext/makiri/core/mkr_buf.c +13 -3
  13. data/ext/makiri/core/mkr_buf.h +80 -5
  14. data/ext/makiri/core/mkr_core.c +143 -0
  15. data/ext/makiri/core/mkr_core.h +10 -1
  16. data/ext/makiri/core/mkr_span.h +186 -0
  17. data/ext/makiri/core/mkr_utf8.c +101 -0
  18. data/ext/makiri/core/mkr_utf8.h +88 -0
  19. data/ext/makiri/extconf.rb +104 -9
  20. data/ext/makiri/fuzz/Makefile +95 -0
  21. data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
  22. data/ext/makiri/fuzz/xml_fuzz.c +24 -0
  23. data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
  24. data/ext/makiri/glue/glue.h +8 -0
  25. data/ext/makiri/glue/ruby_doc.c +20 -24
  26. data/ext/makiri/glue/ruby_html_css.c +58 -12
  27. data/ext/makiri/glue/ruby_html_mutate.c +11 -6
  28. data/ext/makiri/glue/ruby_html_node.c +3 -32
  29. data/ext/makiri/glue/ruby_node.c +39 -0
  30. data/ext/makiri/glue/ruby_xml.c +198 -16
  31. data/ext/makiri/glue/ruby_xml_node.c +46 -59
  32. data/ext/makiri/glue/ruby_xpath.c +4 -4
  33. data/ext/makiri/lexbor_compat/source_loc.c +14 -16
  34. data/ext/makiri/lexbor_compat/utf8_input.c +5 -78
  35. data/ext/makiri/makiri.c +45 -0
  36. data/ext/makiri/xml/mkr_xml.h +2 -3
  37. data/ext/makiri/xml/mkr_xml_chars.c +67 -97
  38. data/ext/makiri/xml/mkr_xml_index.c +169 -0
  39. data/ext/makiri/xml/mkr_xml_index.h +48 -0
  40. data/ext/makiri/xml/mkr_xml_mutate.c +63 -121
  41. data/ext/makiri/xml/mkr_xml_node.c +147 -15
  42. data/ext/makiri/xml/mkr_xml_node.h +71 -6
  43. data/ext/makiri/xml/mkr_xml_tree.c +185 -149
  44. data/ext/makiri/xpath/mkr_css.c +1023 -0
  45. data/ext/makiri/xpath/mkr_css.h +65 -0
  46. data/ext/makiri/xpath/mkr_xpath.c +37 -0
  47. data/ext/makiri/xpath/mkr_xpath.h +13 -0
  48. data/ext/makiri/xpath/mkr_xpath_eval_body.h +373 -90
  49. data/ext/makiri/xpath/mkr_xpath_funcs_body.h +249 -231
  50. data/ext/makiri/xpath/mkr_xpath_internal.h +89 -9
  51. data/ext/makiri/xpath/mkr_xpath_lex.c +94 -124
  52. data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +6 -3
  53. data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
  54. data/ext/makiri/xpath/mkr_xpath_parse.c +79 -90
  55. data/ext/makiri/xpath/mkr_xpath_shared.c +40 -24
  56. data/ext/makiri/xpath/mkr_xpath_value_body.h +50 -24
  57. data/lib/makiri/cdata_section.rb +1 -3
  58. data/lib/makiri/comment.rb +1 -3
  59. data/lib/makiri/document.rb +8 -0
  60. data/lib/makiri/element.rb +1 -3
  61. data/lib/makiri/processing_instruction.rb +1 -3
  62. data/lib/makiri/text.rb +1 -3
  63. data/lib/makiri/version.rb +1 -1
  64. data/lib/makiri/xml/builder.rb +263 -0
  65. data/lib/makiri/xml/node_methods.rb +47 -0
  66. data/lib/makiri.rb +1 -0
  67. data/script/check_alloc_failures.rb +266 -0
  68. data/script/check_c_safety.rb +45 -2
  69. data/script/check_c_safety_allowlist.yml +19 -0
  70. data/script/check_leaks.rb +64 -0
  71. data/script/leaks_harness.rb +64 -0
  72. data/vendor/lexbor/CMakeLists.txt +6 -0
  73. data/vendor/lexbor/README.md +12 -0
  74. data/vendor/lexbor/config.cmake +1 -1
  75. data/vendor/lexbor/source/lexbor/core/base.h +1 -1
  76. data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
  77. data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
  78. data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
  79. data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
  80. data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
  81. data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
  82. data/vendor/lexbor/source/lexbor/html/base.h +1 -1
  83. data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
  84. data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
  85. data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
  86. data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
  87. data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
  88. data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
  89. data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
  90. data/vendor/lexbor/source/lexbor/url/base.h +1 -1
  91. data/vendor/lexbor/source/lexbor/url/url.c +5 -2
  92. data/vendor/lexbor/source/lexbor/url/url.h +9 -0
  93. data/vendor/lexbor/version +1 -1
  94. metadata +19 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b0cf63c9d861e721a52064dccc929db0a8f823d485f69854f07d90b805913db0
4
- data.tar.gz: 989e0d0b1430b202147cd4f0fec411d0377114f34ae380217b683b6b63d031e6
3
+ metadata.gz: 30e3037756fec29474a8fb0c62e38d06a3337bba9c3ad844e6bdcfc02cff5026
4
+ data.tar.gz: 5b2f2a2887019261a359a64c35bddd36eb076a8c6a4f145c1a2ec1d84f679be6
5
5
  SHA512:
6
- metadata.gz: 13598e1f45341c8fed3924da8bbe913cf55ef5c1b9256193db18ae3cf2bb9ae0f4816370d8ca569139b33e7194aced65656c1314989b882ea612a44e3750e84b
7
- data.tar.gz: 71c9da99e6f26fb8a034efba1d0642b37cdcd3ddf212c6f977ad3c868b104162ca86f0c721606024286a06c858326508e41c8624687c03fa3d7a205461926faf
6
+ metadata.gz: 2b6bf4ed94ae428e23bcb3af8ec71febda16c83c5187ffb8cadd3698327681d951ead4b68d7df78328e8eeeec82a09de310d862cd110323c2874ac1fb0adf62c
7
+ data.tar.gz: 9ebd0a7562d7ff5541ead1e61f4dae1719257b29ff8cf4414e5ca2c23e2c708849c360b26471fc132bebd0ddd9cd5866ec11ab44aec4ac4044cdba10958a1038
@@ -57,6 +57,15 @@ jobs:
57
57
  - name: CSS Selectors differential vs Nokogiri::HTML5
58
58
  run: bundle exec rake conformance:css
59
59
 
60
+ - name: XML XPath 1.0 differential vs Nokogiri::XML
61
+ run: bundle exec rake conformance:xpath_xml
62
+
63
+ - name: XML CSS-selector differential vs Nokogiri::XML
64
+ run: bundle exec rake conformance:css_xml
65
+
66
+ - name: XML Builder differential vs Nokogiri::XML::Builder
67
+ run: bundle exec rake conformance:builder
68
+
60
69
  # Nightly: a wide XPath differential sweep across many seeds and a high
61
70
  # generated volume, to surface divergences the curated corpus and a single
62
71
  # seed miss. Fails fast (bash -e) on the first real divergence.
@@ -92,3 +101,16 @@ jobs:
92
101
  XPATH_ARGS="--generate 20000 --seed ${seed}" bundle exec rake conformance:xpath
93
102
  echo "::endgroup::"
94
103
  done
104
+
105
+ # The W3C XML conformance suite fetches its (pinned) test corpus on first
106
+ # run, so it lives in the nightly rather than on every PR.
107
+ - name: XML 1.0 well-formedness vs the W3C XML Conformance Test Suite
108
+ run: bundle exec rake conformance:xmlconf
109
+
110
+ # Property-based tree differential: generated documents, Makiri's parsed
111
+ # tree + canonical output vs Nokogiri::XML. Scalable volume - nightly gets
112
+ # a much larger batch than the resident in-suite PBT specs.
113
+ - name: XML property-based tree differential vs Nokogiri::XML
114
+ run: bundle exec rake conformance:xml_pbt
115
+ env:
116
+ PBT_ARGS: "--count 20000"
@@ -0,0 +1,83 @@
1
+ name: libFuzzer
2
+
3
+ on:
4
+ # Nightly: libFuzzer runs are coverage-guided and long-running; they complement
5
+ # the PR-level short fuzz (30s) and the nightly sanitizer fuzz (300s per target).
6
+ # We run them on a schedule because the coverage signal needs sustained CPU
7
+ # time to reach deep branches (e.g. DOCTYPE quote/bracket state machine,
8
+ # reference expansion boundaries).
9
+ schedule:
10
+ - cron: "0 3 * * *"
11
+ workflow_dispatch:
12
+
13
+ jobs:
14
+ # Build the libFuzzer harnesses under clang with -fsanitize=fuzzer,address
15
+ # and run each target for 300s. The corpus is stored as a build artifact so
16
+ # it can be seeded in subsequent runs (regression asset per the roadmap).
17
+ libfuzzer-nightly:
18
+ name: libFuzzer ${{ matrix.target }} (clang)
19
+ runs-on: ubuntu-latest
20
+ timeout-minutes: 360
21
+ strategy:
22
+ fail-fast: false
23
+ matrix:
24
+ target: [xml, xpath]
25
+
26
+ steps:
27
+ - name: Checkout (with vendored Lexbor submodule)
28
+ uses: actions/checkout@v6
29
+ with:
30
+ submodules: recursive
31
+
32
+ - name: Ensure cmake is available
33
+ uses: lukka/get-cmake@latest
34
+
35
+ - name: Set up Ruby
36
+ uses: ruby/setup-ruby@v1
37
+ with:
38
+ ruby-version: "3.4"
39
+ bundler-cache: true
40
+
41
+ # Build the vendored Lexbor first (plain mode is fine; the harness links
42
+ # the static archive). The fuzzer binary itself is built with ASan.
43
+ - name: Compile the extension (builds Lexbor)
44
+ run: bundle exec rake compile
45
+
46
+ - name: Install clang
47
+ run: |
48
+ sudo apt-get update
49
+ sudo apt-get install -y clang
50
+
51
+ - name: Build libFuzzer harnesses
52
+ run: |
53
+ cd ext/makiri/fuzz
54
+ make clean
55
+ make all
56
+
57
+ # Restore the previous corpus so the fuzzer starts from the accumulated
58
+ # regression seeds rather than from scratch.
59
+ - name: Restore corpus cache
60
+ uses: actions/cache@v4
61
+ with:
62
+ path: ext/makiri/fuzz/corpus/${{ matrix.target }}
63
+ key: libfuzzer-corpus-${{ matrix.target }}-${{ github.run_id }}
64
+ restore-keys: |
65
+ libfuzzer-corpus-${{ matrix.target }}-
66
+
67
+ - name: Run libFuzzer ${{ matrix.target }} (300s)
68
+ run: |
69
+ mkdir -p ext/makiri/fuzz/corpus/${{ matrix.target }}
70
+ cd ext/makiri/fuzz
71
+ ./${{ matrix.target }}_fuzz \
72
+ -max_total_time=300 \
73
+ -max_len=4096 \
74
+ -print_final_stats=1 \
75
+ corpus/${{ matrix.target }}
76
+
77
+ # Save the mutated corpus as an artifact for download / seeding.
78
+ - name: Upload corpus artifact
79
+ uses: actions/upload-artifact@v4
80
+ with:
81
+ name: libfuzzer-corpus-${{ matrix.target }}
82
+ path: ext/makiri/fuzz/corpus/${{ matrix.target }}
83
+ retention-days: 7
@@ -70,12 +70,71 @@ jobs:
70
70
  bundler-cache: true
71
71
 
72
72
  - name: Run short fuzz under sanitizers
73
- run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--isolated --time 30"
73
+ run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--time 30"
74
+
75
+ # macOS-only malloc-leak gate: ASan everywhere runs with detect_leaks=0 (Ruby
76
+ # and Lexbor are uninstrumented), so this is the ONLY automated leak check. It
77
+ # flags per-call leak stacks through the extension, including on rescued
78
+ # failure paths (see script/check_leaks.rb).
79
+ security-leaks:
80
+ name: Malloc-leak gate (macOS leaks)
81
+ runs-on: macos-latest
82
+ if: github.event_name != 'schedule'
83
+ steps:
84
+ - name: Checkout (with vendored Lexbor submodule)
85
+ uses: actions/checkout@v6
86
+ with:
87
+ submodules: recursive
88
+
89
+ - name: Ensure cmake is available
90
+ uses: lukka/get-cmake@latest
91
+
92
+ - name: Set up Ruby
93
+ uses: ruby/setup-ruby@v1
94
+ with:
95
+ ruby-version: "3.4"
96
+ bundler-cache: true
74
97
 
98
+ - name: Run the leak gate
99
+ run: bundle exec rake leaks
100
+
101
+ # OOM-injection sweep: rebuilds with MAKIRI_ALLOC_INJECT=1 and fails each core
102
+ # C allocation site in turn, gating that every OOM branch fails closed - a
103
+ # clean exception or a baseline-identical result, never truncated output
104
+ # (see script/check_alloc_failures.rb).
105
+ security-alloc-inject:
106
+ name: OOM-injection sweep
107
+ runs-on: ubuntu-latest
108
+ if: github.event_name != 'schedule'
109
+ steps:
110
+ - name: Checkout (with vendored Lexbor submodule)
111
+ uses: actions/checkout@v6
112
+ with:
113
+ submodules: recursive
114
+
115
+ - name: Ensure cmake is available
116
+ uses: lukka/get-cmake@latest
117
+
118
+ - name: Set up Ruby
119
+ uses: ruby/setup-ruby@v1
120
+ with:
121
+ ruby-version: "3.4"
122
+ bundler-cache: true
123
+
124
+ - name: Run the OOM-injection sweep
125
+ run: bundle exec rake oom
126
+
127
+ # Nightly: fuzz EVERY target (the 30s PR fuzz covers only the default xpath
128
+ # target; the CSS engine reuse and the XML parser/mutator are each their own
129
+ # documented memory-safety risk, so each gets a full 300s run).
75
130
  security-fuzz-nightly:
76
- name: Nightly sanitized fuzz
131
+ name: Nightly sanitized fuzz (${{ matrix.target }})
77
132
  runs-on: ubuntu-latest
78
133
  if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
134
+ strategy:
135
+ fail-fast: false
136
+ matrix:
137
+ target: [xpath, css, xml, mutate, xmlcss]
79
138
  steps:
80
139
  - name: Checkout (with vendored Lexbor submodule)
81
140
  uses: actions/checkout@v6
@@ -92,4 +151,30 @@ jobs:
92
151
  bundler-cache: true
93
152
 
94
153
  - name: Run nightly fuzz under sanitizers
95
- run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--isolated --time 300"
154
+ run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--target ${{ matrix.target }} --time 300"
155
+
156
+ # Nightly: the whole spec suite with Lexbor ITSELF built under ASan (mraw
157
+ # poisoning on), catching intra-arena overflows that a plain ASan build cannot
158
+ # see - the class the v3.0.0 :lexbor-contains overflow belonged to. Heavy
159
+ # (full instrumented Lexbor rebuild), so nightly only.
160
+ security-sanitize-lexbor:
161
+ name: Nightly instrumented-Lexbor ASan suite
162
+ runs-on: ubuntu-latest
163
+ if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
164
+ steps:
165
+ - name: Checkout (with vendored Lexbor submodule)
166
+ uses: actions/checkout@v6
167
+ with:
168
+ submodules: recursive
169
+
170
+ - name: Ensure cmake is available
171
+ uses: lukka/get-cmake@latest
172
+
173
+ - name: Set up Ruby
174
+ uses: ruby/setup-ruby@v1
175
+ with:
176
+ ruby-version: "3.4"
177
+ bundler-cache: true
178
+
179
+ - name: Build Lexbor under ASan and run the suite
180
+ run: bundle exec rake "sanitize:lexbor"
@@ -0,0 +1,135 @@
1
+ name: Valgrind + GC.compact
2
+
3
+ on:
4
+ # Nightly: these jobs are heavy (Valgrind is ~10-50x slower, GC.stress is ~10x
5
+ # slower) and check structural properties that do not vary by day-to-day code
6
+ # churn, so run them on a schedule rather than on every push/PR.
7
+ schedule:
8
+ - cron: "0 2 * * *"
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ # Valgrind memcheck on the full spec suite. Complements ASan by catching
13
+ # layout-independent errors ASan misses: use of uninitialised values, and
14
+ # invalid reads/writes that happen to land inside valid malloc regions
15
+ # (intra-arena overflows). Runs on Linux because Valgrind is x86_64/amd64
16
+ # Linux only.
17
+ valgrind-memcheck:
18
+ name: Valgrind memcheck (Ruby ${{ matrix.ruby }})
19
+ runs-on: ubuntu-latest
20
+ timeout-minutes: 360
21
+ env:
22
+ BUNDLE_WITH: valgrind
23
+ # These heavy jobs verify memory discipline (uninit values, intra-arena
24
+ # overflows, use-after-move), not the property space - so the full
25
+ # 300-iteration PBT sweep (already run by the normal CI matrix) is
26
+ # overkill here and, multiplied by Valgrind's 10-50x slowdown, never
27
+ # finishes. A handful of iterations exercises every C memory path while
28
+ # keeping the run tractable.
29
+ PBT_COUNT: "15"
30
+ CSS_PBT_COUNT: "15"
31
+ strategy:
32
+ fail-fast: false
33
+ matrix:
34
+ ruby: ["3.4"]
35
+
36
+ steps:
37
+ - name: Checkout (with vendored Lexbor submodule)
38
+ uses: actions/checkout@v6
39
+ with:
40
+ submodules: recursive
41
+
42
+ - name: Ensure cmake is available
43
+ uses: lukka/get-cmake@latest
44
+
45
+ - name: Set up Ruby
46
+ uses: ruby/setup-ruby@v1
47
+ with:
48
+ ruby-version: ${{ matrix.ruby }}
49
+ bundler-cache: true
50
+
51
+ - name: Compile the extension
52
+ run: bundle exec rake compile
53
+
54
+ - name: Install Valgrind
55
+ run: sudo apt-get update && sudo apt-get install -y valgrind
56
+
57
+ # ruby_memcheck (the `spec:valgrind` rake task) runs the suite under
58
+ # memcheck. It ships Ruby's Valgrind suppression files itself (matched to
59
+ # the running Ruby), so there is no longer a ruby.supp to fetch from
60
+ # ruby/ruby - that path was removed upstream and the fetch step 404'd.
61
+ - name: Run spec suite under Valgrind (ruby_memcheck)
62
+ run: bundle exec rake spec:valgrind
63
+
64
+ # GC.auto_compact + GC.stress run of the full spec suite. This structurally
65
+ # tests the borrowed-pointer discipline under the condition that Ruby Strings
66
+ # actually move (compaction) and that every allocation triggers a full GC
67
+ # cycle (stress). Failures here are typically use-after-move or stale
68
+ # pointer bugs in the C extension or bridge layer.
69
+ #
70
+ # THREADING is deliberately OFF here. The :threading suite (spec/threading_spec.rb)
71
+ # is 8 threads x tens of iterations, and forcing the job-level GC.stress onto it
72
+ # means a full GC per allocation across every thread - which made this job run
73
+ # for 30+ minutes without finishing. It also adds little: that suite already
74
+ # runs in ci.yml (ubuntu/3.4), and its GC-sensitive examples opt into GC.stress
75
+ # themselves via their own `around` hook, so cross-thread interactions are
76
+ # covered there. This job's unique value is the *single-threaded* full suite
77
+ # under stress+compaction, which catches use-after-move across every code path.
78
+ gc-compact-stress:
79
+ # Temporarily disabled, too long
80
+ if: false
81
+ name: GC.auto_compact + GC.stress (Ruby ${{ matrix.ruby }})
82
+ runs-on: ubuntu-latest
83
+ timeout-minutes: 360
84
+ env:
85
+ # As in the Valgrind job: GC.stress (a full GC per allocation) makes the
86
+ # 300-iteration PBT sweep run for hours, and these jobs check memory
87
+ # discipline rather than the property space, so trim the iteration count.
88
+ PBT_COUNT: "15"
89
+ CSS_PBT_COUNT: "15"
90
+ strategy:
91
+ fail-fast: false
92
+ matrix:
93
+ ruby: ["3.4"]
94
+
95
+ steps:
96
+ - name: Checkout (with vendored Lexbor submodule)
97
+ uses: actions/checkout@v6
98
+ with:
99
+ submodules: recursive
100
+
101
+ - name: Ensure cmake is available
102
+ uses: lukka/get-cmake@latest
103
+
104
+ - name: Set up Ruby
105
+ uses: ruby/setup-ruby@v1
106
+ with:
107
+ ruby-version: ${{ matrix.ruby }}
108
+ bundler-cache: true
109
+
110
+ - name: Compile the extension
111
+ run: bundle exec rake compile
112
+
113
+ # GC.stress is scoped to each example via an around hook rather than set
114
+ # process-wide: under a global GC.stress, even requiring the 88 spec files
115
+ # runs a full GC per allocation, so loading alone took tens of minutes and
116
+ # the job never reached the first example. auto_compact stays global so
117
+ # objects actually move during those stressed examples (the point of the
118
+ # job), while loading/collection runs at normal speed.
119
+ - name: Run spec suite under GC.auto_compact + GC.stress
120
+ run: |
121
+ bundle exec ruby -Ilib -e '
122
+ GC.auto_compact = true
123
+ require "rspec/core"
124
+ RSpec.configure do |c|
125
+ c.around(:each) do |example|
126
+ GC.stress = true
127
+ begin
128
+ example.run
129
+ ensure
130
+ GC.stress = false
131
+ end
132
+ end
133
+ end
134
+ exit RSpec::Core::Runner.run(ARGV)
135
+ ' spec
data/CHANGELOG.md CHANGED
@@ -5,7 +5,64 @@ All notable changes to this project will be documented in this file.
5
5
  The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [Unreleased]
8
+ ## [0.4.0] - 2026-06-12
9
+
10
+ ### Added
11
+
12
+ * CSS selectors on `Makiri::XML`. `#css` / `#at_css` / `#matches?`, lowered
13
+ to the native XPath engine (case-sensitive, namespace-aware). Covers the
14
+ standard selector set including combinator arguments to `:is`/`:where`/`:not`/
15
+ `:has`, untyped `:*-of-type`, and `:lexbor-contains`. Verified by a differential
16
+ against `Nokogiri::XML` plus property-based tests.
17
+
18
+ * `Makiri::XML::Builder`, a Nokogiri-compatible DSL for building an XML
19
+ document or subtree from scratch (block / `instance_eval` forms, namespaced
20
+ elements via `xml["prefix"]`, the `tag.class.id!` attribute short-cuts, raw-XML
21
+ `<<`, and `.with`). Verified by a differential against `Nokogiri::XML::Builder`.
22
+
23
+ ### Changed
24
+
25
+ * The XML declaration emits `encoding="UTF-8"` only when the source declared
26
+ one (or `#to_xml(encoding:)` is passed); built or declaration-less documents
27
+ now serialize to a bare `<?xml version="1.0"?>`, like Nokogiri (the output is
28
+ UTF-8 either way).
29
+
30
+ * Faster XML queries. A document-rooted `//name` / `css("name")` is served
31
+ from a lazily-built element-name index instead of a full-tree walk (~11x
32
+ Nokogiri on the benchmark feed); name tests resolve their prefix once per step,
33
+ and `at_css` / `at_xpath` short-circuit on prefixed name tests.
34
+
35
+ * CSS class/ID selectors now match case-sensitively in no-quirks documents
36
+ (case-insensitively only in quirks mode), like browsers and `Nokogiri::HTML5` -
37
+ via an upstreamed Lexbor fix (see below).
38
+
39
+ * XPath number parsing now follows the XPath 1.0 `Number` grammar exactly and
40
+ is locale-independent, matching libxml2/Nokogiri and browsers. C `strtod`'s
41
+ superset forms are no longer accepted: `1e3` / `0x1A` lex as a Number followed
42
+ by a name (a syntax error as a full expression, where they previously parsed
43
+ as 1000 / 26), `number()` returns NaN for exponent/hex/`+`-signed strings, and
44
+ only XPath whitespace (space/tab/CR/LF, not `\v`/`\f`) is trimmed around the
45
+ coerced value. Valid literals (`5.`, `.5`, `1.5`) are unchanged.
46
+
47
+ ### Security
48
+
49
+ * Updated the vendored Lexbor (v3.0.0 -> `3a2d595`), which includes two
50
+ CSS-selector fixes we upstreamed - class/ID case-sensitivity follows quirks
51
+ mode, and a prefix-less type selector no longer defaults to the universal
52
+ namespace - plus a heap-overflow fix in its `:lexbor-contains()` parser
53
+ (reached from `Node#css`) and other post-v3.0.0 bugfixes. (An untagged master
54
+ commit, taken deliberately; see CLAUDE.md.)
55
+
56
+ * Hardened native memory safety. The XML arena is ASan-red-zoned to catch
57
+ intra-arena overflows, the engines are fuzzed under ASan/UBSan, and buffer
58
+ growth is bounded by a hard ceiling.
59
+
60
+ * Extended the lint-enforced bounded-reader (`mkr_span`) discipline to the
61
+ remaining byte-scanning code: the source-location line table, the XPath
62
+ string-function scanners (now explicitly length-bounded instead of relying on
63
+ the NUL contract), and the number parse above. Fixed a borrowed-RSTRING
64
+ pointer held across a potential GC point in the XML encoding sniffer, and a
65
+ missing NUL-termination guarantee in the libFuzzer XPath harness.
9
66
 
10
67
  ## [0.3.0] - 2026-06-06
11
68
 
@@ -239,7 +296,8 @@ libxml2 / libxslt dependency at any layer**.
239
296
  domxpath, CSS differential vs `Nokogiri::HTML5`). GitHub Actions CI across
240
297
  Ruby 3.2–4.0 × Ubuntu/macOS plus a sanitizer job.
241
298
 
242
- [Unreleased]: https://github.com/takahashim/makiri/compare/v0.3.0...HEAD
299
+ [Unreleased]: https://github.com/takahashim/makiri/compare/v0.4.0...HEAD
300
+ [0.4.0]: https://github.com/takahashim/makiri/compare/v0.3.0...v0.4.0
243
301
  [0.3.0]: https://github.com/takahashim/makiri/compare/v0.2.0...v0.3.0
244
302
  [0.2.0]: https://github.com/takahashim/makiri/compare/v0.1.0...v0.2.0
245
303
  [0.1.0]: https://github.com/takahashim/makiri/releases/tag/v0.1.0