makiri 0.2.0-aarch64-linux → 0.4.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/conformance.yml +22 -0
  3. data/.github/workflows/libfuzzer.yml +83 -0
  4. data/.github/workflows/release.yml +12 -7
  5. data/.github/workflows/security.yml +88 -3
  6. data/.github/workflows/valgrind.yml +135 -0
  7. data/CHANGELOG.md +152 -15
  8. data/README.md +183 -13
  9. data/Rakefile +294 -7
  10. data/lib/makiri/3.2/makiri.so +0 -0
  11. data/lib/makiri/3.3/makiri.so +0 -0
  12. data/lib/makiri/3.4/makiri.so +0 -0
  13. data/lib/makiri/4.0/makiri.so +0 -0
  14. data/lib/makiri/{attribute.rb → attr.rb} +7 -3
  15. data/lib/makiri/cdata_section.rb +19 -0
  16. data/lib/makiri/comment.rb +10 -0
  17. data/lib/makiri/compat_aliases.rb +30 -0
  18. data/lib/makiri/document.rb +9 -73
  19. data/lib/makiri/document_fragment.rb +14 -9
  20. data/lib/makiri/element.rb +4 -4
  21. data/lib/makiri/html/document.rb +106 -0
  22. data/lib/makiri/html/node_methods.rb +19 -0
  23. data/lib/makiri/html.rb +12 -0
  24. data/lib/makiri/node.rb +58 -15
  25. data/lib/makiri/node_set.rb +8 -0
  26. data/lib/makiri/processing_instruction.rb +10 -0
  27. data/lib/makiri/text.rb +1 -1
  28. data/lib/makiri/version.rb +1 -1
  29. data/lib/makiri/xml/builder.rb +263 -0
  30. data/lib/makiri/xml/document.rb +24 -0
  31. data/lib/makiri/xml/node_methods.rb +84 -0
  32. data/lib/makiri/xml.rb +10 -0
  33. data/lib/makiri/xpath_context.rb +1 -1
  34. data/lib/makiri.rb +24 -5
  35. data/script/build_native_gem.rb +2 -2
  36. data/script/check_alloc_failures.rb +266 -0
  37. data/script/check_c_safety.rb +77 -2
  38. data/script/check_c_safety_allowlist.yml +102 -0
  39. data/script/check_leaks.rb +64 -0
  40. data/script/leaks_harness.rb +64 -0
  41. metadata +16 -3
  42. data/lib/makiri/cdata.rb +0 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 98834843b01646be14385e918a5bee9f7c7d86caa4807142b4f4b341bdb9ce1f
4
- data.tar.gz: a249f288b3de1c130ed255ae0c4449267489f3eae7d08a74eff92c0d75c9116b
3
+ metadata.gz: 933c989d6690a09618da157124a5cd6070be49695a5bea898417cb79d7b03c56
4
+ data.tar.gz: 1e2c9fc7a8bbd8618ce5a2e2de0b5c07d6bdfd4d884bb43ed4512c7e35135694
5
5
  SHA512:
6
- metadata.gz: a87555ee4721bf1d09cf29598961d7098b82147d1ac1cf9a22db13f34de8ddc7646c233a1ace508aad7840dab289b46ffb60379aed21ef953a15e89e06db15cf
7
- data.tar.gz: a331f795706d65d12ceb08c3bb9d121c60b3e737ee91f6f7a14789dfa3c364409f85d0ef31a0d60a47f7e1686b4107baebefd15328509677d08605b102b0f0bb
6
+ metadata.gz: 5c0c93ca0b1637e658e9dfab904f6529b8513eb725e457b4cc4ce6158dc2266e9fbc6195811d00ec6ace4027603a79316b0d00bc934137a58e80648b8fcdd40b
7
+ data.tar.gz: 287464ae8c18a7ccf72ed1b2c52968fbb531fa83ce5d12b9500eb787cd560938377e19eb172ca98900247da55efc9665c86dd72bb8812b7f9539ecb6a8b3bdbf
@@ -57,6 +57,15 @@ jobs:
57
57
  - name: CSS Selectors differential vs Nokogiri::HTML5
58
58
  run: bundle exec rake conformance:css
59
59
 
60
+ - name: XML XPath 1.0 differential vs Nokogiri::XML
61
+ run: bundle exec rake conformance:xpath_xml
62
+
63
+ - name: XML CSS-selector differential vs Nokogiri::XML
64
+ run: bundle exec rake conformance:css_xml
65
+
66
+ - name: XML Builder differential vs Nokogiri::XML::Builder
67
+ run: bundle exec rake conformance:builder
68
+
60
69
  # Nightly: a wide XPath differential sweep across many seeds and a high
61
70
  # generated volume, to surface divergences the curated corpus and a single
62
71
  # seed miss. Fails fast (bash -e) on the first real divergence.
@@ -92,3 +101,16 @@ jobs:
92
101
  XPATH_ARGS="--generate 20000 --seed ${seed}" bundle exec rake conformance:xpath
93
102
  echo "::endgroup::"
94
103
  done
104
+
105
+ # The W3C XML conformance suite fetches its (pinned) test corpus on first
106
+ # run, so it lives in the nightly rather than on every PR.
107
+ - name: XML 1.0 well-formedness vs the W3C XML Conformance Test Suite
108
+ run: bundle exec rake conformance:xmlconf
109
+
110
+ # Property-based tree differential: generated documents, Makiri's parsed
111
+ # tree + canonical output vs Nokogiri::XML. Scalable volume - nightly gets
112
+ # a much larger batch than the resident in-suite PBT specs.
113
+ - name: XML property-based tree differential vs Nokogiri::XML
114
+ run: bundle exec rake conformance:xml_pbt
115
+ env:
116
+ PBT_ARGS: "--count 20000"
@@ -0,0 +1,83 @@
1
+ name: libFuzzer
2
+
3
+ on:
4
+ # Nightly: libFuzzer runs are coverage-guided and long-running; they complement
5
+ # the PR-level short fuzz (30s) and the nightly sanitizer fuzz (300s per target).
6
+ # We run them on a schedule because the coverage signal needs sustained CPU
7
+ # time to reach deep branches (e.g. DOCTYPE quote/bracket state machine,
8
+ # reference expansion boundaries).
9
+ schedule:
10
+ - cron: "0 3 * * *"
11
+ workflow_dispatch:
12
+
13
+ jobs:
14
+ # Build the libFuzzer harnesses under clang with -fsanitize=fuzzer,address
15
+ # and run each target for 300s. The corpus is stored as a build artifact so
16
+ # it can be seeded in subsequent runs (regression asset per the roadmap).
17
+ libfuzzer-nightly:
18
+ name: libFuzzer ${{ matrix.target }} (clang)
19
+ runs-on: ubuntu-latest
20
+ timeout-minutes: 360
21
+ strategy:
22
+ fail-fast: false
23
+ matrix:
24
+ target: [xml, xpath]
25
+
26
+ steps:
27
+ - name: Checkout (with vendored Lexbor submodule)
28
+ uses: actions/checkout@v6
29
+ with:
30
+ submodules: recursive
31
+
32
+ - name: Ensure cmake is available
33
+ uses: lukka/get-cmake@latest
34
+
35
+ - name: Set up Ruby
36
+ uses: ruby/setup-ruby@v1
37
+ with:
38
+ ruby-version: "3.4"
39
+ bundler-cache: true
40
+
41
+ # Build the vendored Lexbor first (plain mode is fine; the harness links
42
+ # the static archive). The fuzzer binary itself is built with ASan.
43
+ - name: Compile the extension (builds Lexbor)
44
+ run: bundle exec rake compile
45
+
46
+ - name: Install clang
47
+ run: |
48
+ sudo apt-get update
49
+ sudo apt-get install -y clang
50
+
51
+ - name: Build libFuzzer harnesses
52
+ run: |
53
+ cd ext/makiri/fuzz
54
+ make clean
55
+ make all
56
+
57
+ # Restore the previous corpus so the fuzzer starts from the accumulated
58
+ # regression seeds rather than from scratch.
59
+ - name: Restore corpus cache
60
+ uses: actions/cache@v4
61
+ with:
62
+ path: ext/makiri/fuzz/corpus/${{ matrix.target }}
63
+ key: libfuzzer-corpus-${{ matrix.target }}-${{ github.run_id }}
64
+ restore-keys: |
65
+ libfuzzer-corpus-${{ matrix.target }}-
66
+
67
+ - name: Run libFuzzer ${{ matrix.target }} (300s)
68
+ run: |
69
+ mkdir -p ext/makiri/fuzz/corpus/${{ matrix.target }}
70
+ cd ext/makiri/fuzz
71
+ ./${{ matrix.target }}_fuzz \
72
+ -max_total_time=300 \
73
+ -max_len=4096 \
74
+ -print_final_stats=1 \
75
+ corpus/${{ matrix.target }}
76
+
77
+ # Save the mutated corpus as an artifact for download / seeding.
78
+ - name: Upload corpus artifact
79
+ uses: actions/upload-artifact@v4
80
+ with:
81
+ name: libfuzzer-corpus-${{ matrix.target }}
82
+ path: ext/makiri/fuzz/corpus/${{ matrix.target }}
83
+ retention-days: 7
@@ -196,17 +196,22 @@ jobs:
196
196
  $pre --verify-tag || \
197
197
  gh release upload "${GITHUB_REF_NAME}" dist/*.gem --repo "${GITHUB_REPOSITORY}" --clobber
198
198
 
199
- # --- optional: publish to RubyGems (manual, opt-in, never on a tag push) ----
200
- # Auth is RubyGems Trusted Publishing (OIDC): no stored API key, short-lived
201
- # token, MFA-compatible. Configure a matching Trusted Publisher on RubyGems.org
202
- # for this gem: owner=takahashim, repo=makiri, workflow=release.yml, and set its
203
- # Environment to "rubygems" (matching `environment:` below).
199
+ # --- publish to RubyGems, behind the `rubygems` environment approval gate ---
200
+ # Held until the `rubygems` environment's Required-reviewers rule is approved,
201
+ # so a tag push releases on GitHub immediately but the RubyGems push waits.
202
+ #
203
+ # Auth is RubyGems Trusted Publishing (OIDC): no stored API key. Configure a
204
+ # matching Trusted Publisher on RubyGems.org (owner=takahashim, repo=makiri,
205
+ # workflow=release.yml, Environment=rubygems) so the token is only accepted
206
+ # through this gated environment.
204
207
  publish:
205
208
  name: Publish to RubyGems
206
209
  needs: [source-gem, native-gem]
207
- if: github.event_name == 'workflow_dispatch' && inputs.publish_to_rubygems
210
+ if: >-
211
+ startsWith(github.ref, 'refs/tags/') ||
212
+ (github.event_name == 'workflow_dispatch' && inputs.publish_to_rubygems)
208
213
  runs-on: ubuntu-latest
209
- environment: rubygems # add a Required-reviewers protection rule for an approval gate
214
+ environment: rubygems
210
215
  permissions:
211
216
  contents: read
212
217
  id-token: write # OIDC identity token for Trusted Publishing
@@ -70,12 +70,71 @@ jobs:
70
70
  bundler-cache: true
71
71
 
72
72
  - name: Run short fuzz under sanitizers
73
- run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--isolated --time 30"
73
+ run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--time 30"
74
+
75
+ # macOS-only malloc-leak gate: ASan everywhere runs with detect_leaks=0 (Ruby
76
+ # and Lexbor are uninstrumented), so this is the ONLY automated leak check. It
77
+ # flags per-call leak stacks through the extension, including on rescued
78
+ # failure paths (see script/check_leaks.rb).
79
+ security-leaks:
80
+ name: Malloc-leak gate (macOS leaks)
81
+ runs-on: macos-latest
82
+ if: github.event_name != 'schedule'
83
+ steps:
84
+ - name: Checkout (with vendored Lexbor submodule)
85
+ uses: actions/checkout@v6
86
+ with:
87
+ submodules: recursive
88
+
89
+ - name: Ensure cmake is available
90
+ uses: lukka/get-cmake@latest
91
+
92
+ - name: Set up Ruby
93
+ uses: ruby/setup-ruby@v1
94
+ with:
95
+ ruby-version: "3.4"
96
+ bundler-cache: true
74
97
 
98
+ - name: Run the leak gate
99
+ run: bundle exec rake leaks
100
+
101
+ # OOM-injection sweep: rebuilds with MAKIRI_ALLOC_INJECT=1 and fails each core
102
+ # C allocation site in turn, gating that every OOM branch fails closed - a
103
+ # clean exception or a baseline-identical result, never truncated output
104
+ # (see script/check_alloc_failures.rb).
105
+ security-alloc-inject:
106
+ name: OOM-injection sweep
107
+ runs-on: ubuntu-latest
108
+ if: github.event_name != 'schedule'
109
+ steps:
110
+ - name: Checkout (with vendored Lexbor submodule)
111
+ uses: actions/checkout@v6
112
+ with:
113
+ submodules: recursive
114
+
115
+ - name: Ensure cmake is available
116
+ uses: lukka/get-cmake@latest
117
+
118
+ - name: Set up Ruby
119
+ uses: ruby/setup-ruby@v1
120
+ with:
121
+ ruby-version: "3.4"
122
+ bundler-cache: true
123
+
124
+ - name: Run the OOM-injection sweep
125
+ run: bundle exec rake oom
126
+
127
+ # Nightly: fuzz EVERY target (the 30s PR fuzz covers only the default xpath
128
+ # target; the CSS engine reuse and the XML parser/mutator are each their own
129
+ # documented memory-safety risk, so each gets a full 300s run).
75
130
  security-fuzz-nightly:
76
- name: Nightly sanitized fuzz
131
+ name: Nightly sanitized fuzz (${{ matrix.target }})
77
132
  runs-on: ubuntu-latest
78
133
  if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
134
+ strategy:
135
+ fail-fast: false
136
+ matrix:
137
+ target: [xpath, css, xml, mutate, xmlcss]
79
138
  steps:
80
139
  - name: Checkout (with vendored Lexbor submodule)
81
140
  uses: actions/checkout@v6
@@ -92,4 +151,30 @@ jobs:
92
151
  bundler-cache: true
93
152
 
94
153
  - name: Run nightly fuzz under sanitizers
95
- run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--isolated --time 300"
154
+ run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--target ${{ matrix.target }} --time 300"
155
+
156
+ # Nightly: the whole spec suite with Lexbor ITSELF built under ASan (mraw
157
+ # poisoning on), catching intra-arena overflows that a plain ASan build cannot
158
+ # see - the class the v3.0.0 :lexbor-contains overflow belonged to. Heavy
159
+ # (full instrumented Lexbor rebuild), so nightly only.
160
+ security-sanitize-lexbor:
161
+ name: Nightly instrumented-Lexbor ASan suite
162
+ runs-on: ubuntu-latest
163
+ if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
164
+ steps:
165
+ - name: Checkout (with vendored Lexbor submodule)
166
+ uses: actions/checkout@v6
167
+ with:
168
+ submodules: recursive
169
+
170
+ - name: Ensure cmake is available
171
+ uses: lukka/get-cmake@latest
172
+
173
+ - name: Set up Ruby
174
+ uses: ruby/setup-ruby@v1
175
+ with:
176
+ ruby-version: "3.4"
177
+ bundler-cache: true
178
+
179
+ - name: Build Lexbor under ASan and run the suite
180
+ run: bundle exec rake "sanitize:lexbor"
@@ -0,0 +1,135 @@
1
+ name: Valgrind + GC.compact
2
+
3
+ on:
4
+ # Nightly: these jobs are heavy (Valgrind is ~10-50x slower, GC.stress is ~10x
5
+ # slower) and check structural properties that do not vary by day-to-day code
6
+ # churn, so run them on a schedule rather than on every push/PR.
7
+ schedule:
8
+ - cron: "0 2 * * *"
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ # Valgrind memcheck on the full spec suite. Complements ASan by catching
13
+ # layout-independent errors ASan misses: use of uninitialised values, and
14
+ # invalid reads/writes that happen to land inside valid malloc regions
15
+ # (intra-arena overflows). Runs on Linux because Valgrind is x86_64/amd64
16
+ # Linux only.
17
+ valgrind-memcheck:
18
+ name: Valgrind memcheck (Ruby ${{ matrix.ruby }})
19
+ runs-on: ubuntu-latest
20
+ timeout-minutes: 360
21
+ env:
22
+ BUNDLE_WITH: valgrind
23
+ # These heavy jobs verify memory discipline (uninit values, intra-arena
24
+ # overflows, use-after-move), not the property space - so the full
25
+ # 300-iteration PBT sweep (already run by the normal CI matrix) is
26
+ # overkill here and, multiplied by Valgrind's 10-50x slowdown, never
27
+ # finishes. A handful of iterations exercises every C memory path while
28
+ # keeping the run tractable.
29
+ PBT_COUNT: "15"
30
+ CSS_PBT_COUNT: "15"
31
+ strategy:
32
+ fail-fast: false
33
+ matrix:
34
+ ruby: ["3.4"]
35
+
36
+ steps:
37
+ - name: Checkout (with vendored Lexbor submodule)
38
+ uses: actions/checkout@v6
39
+ with:
40
+ submodules: recursive
41
+
42
+ - name: Ensure cmake is available
43
+ uses: lukka/get-cmake@latest
44
+
45
+ - name: Set up Ruby
46
+ uses: ruby/setup-ruby@v1
47
+ with:
48
+ ruby-version: ${{ matrix.ruby }}
49
+ bundler-cache: true
50
+
51
+ - name: Compile the extension
52
+ run: bundle exec rake compile
53
+
54
+ - name: Install Valgrind
55
+ run: sudo apt-get update && sudo apt-get install -y valgrind
56
+
57
+ # ruby_memcheck (the `spec:valgrind` rake task) runs the suite under
58
+ # memcheck. It ships Ruby's Valgrind suppression files itself (matched to
59
+ # the running Ruby), so there is no longer a ruby.supp to fetch from
60
+ # ruby/ruby - that path was removed upstream and the fetch step 404'd.
61
+ - name: Run spec suite under Valgrind (ruby_memcheck)
62
+ run: bundle exec rake spec:valgrind
63
+
64
+ # GC.auto_compact + GC.stress run of the full spec suite. This structurally
65
+ # tests the borrowed-pointer discipline under the condition that Ruby Strings
66
+ # actually move (compaction) and that every allocation triggers a full GC
67
+ # cycle (stress). Failures here are typically use-after-move or stale
68
+ # pointer bugs in the C extension or bridge layer.
69
+ #
70
+ # THREADING is deliberately OFF here. The :threading suite (spec/threading_spec.rb)
71
+ # is 8 threads x tens of iterations, and forcing the job-level GC.stress onto it
72
+ # means a full GC per allocation across every thread - which made this job run
73
+ # for 30+ minutes without finishing. It also adds little: that suite already
74
+ # runs in ci.yml (ubuntu/3.4), and its GC-sensitive examples opt into GC.stress
75
+ # themselves via their own `around` hook, so cross-thread interactions are
76
+ # covered there. This job's unique value is the *single-threaded* full suite
77
+ # under stress+compaction, which catches use-after-move across every code path.
78
+ gc-compact-stress:
79
+ # Temporarily disabled, too long
80
+ if: false
81
+ name: GC.auto_compact + GC.stress (Ruby ${{ matrix.ruby }})
82
+ runs-on: ubuntu-latest
83
+ timeout-minutes: 360
84
+ env:
85
+ # As in the Valgrind job: GC.stress (a full GC per allocation) makes the
86
+ # 300-iteration PBT sweep run for hours, and these jobs check memory
87
+ # discipline rather than the property space, so trim the iteration count.
88
+ PBT_COUNT: "15"
89
+ CSS_PBT_COUNT: "15"
90
+ strategy:
91
+ fail-fast: false
92
+ matrix:
93
+ ruby: ["3.4"]
94
+
95
+ steps:
96
+ - name: Checkout (with vendored Lexbor submodule)
97
+ uses: actions/checkout@v6
98
+ with:
99
+ submodules: recursive
100
+
101
+ - name: Ensure cmake is available
102
+ uses: lukka/get-cmake@latest
103
+
104
+ - name: Set up Ruby
105
+ uses: ruby/setup-ruby@v1
106
+ with:
107
+ ruby-version: ${{ matrix.ruby }}
108
+ bundler-cache: true
109
+
110
+ - name: Compile the extension
111
+ run: bundle exec rake compile
112
+
113
+ # GC.stress is scoped to each example via an around hook rather than set
114
+ # process-wide: under a global GC.stress, even requiring the 88 spec files
115
+ # runs a full GC per allocation, so loading alone took tens of minutes and
116
+ # the job never reached the first example. auto_compact stays global so
117
+ # objects actually move during those stressed examples (the point of the
118
+ # job), while loading/collection runs at normal speed.
119
+ - name: Run spec suite under GC.auto_compact + GC.stress
120
+ run: |
121
+ bundle exec ruby -Ilib -e '
122
+ GC.auto_compact = true
123
+ require "rspec/core"
124
+ RSpec.configure do |c|
125
+ c.around(:each) do |example|
126
+ GC.stress = true
127
+ begin
128
+ example.run
129
+ ensure
130
+ GC.stress = false
131
+ end
132
+ end
133
+ end
134
+ exit RSpec::Core::Runner.run(ARGV)
135
+ ' spec
data/CHANGELOG.md CHANGED
@@ -5,30 +5,165 @@ All notable changes to this project will be documented in this file.
5
5
  The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [Unreleased]
8
+ ## [0.4.0] - 2026-06-12
9
+
10
+ ### Added
11
+
12
+ * CSS selectors on `Makiri::XML`. `#css` / `#at_css` / `#matches?`, lowered
13
+ to the native XPath engine (case-sensitive, namespace-aware). Covers the
14
+ standard selector set including combinator arguments to `:is`/`:where`/`:not`/
15
+ `:has`, untyped `:*-of-type`, and `:lexbor-contains`. Verified by a differential
16
+ against `Nokogiri::XML` plus property-based tests.
17
+
18
+ * `Makiri::XML::Builder`, a Nokogiri-compatible DSL for building an XML
19
+ document or subtree from scratch (block / `instance_eval` forms, namespaced
20
+ elements via `xml["prefix"]`, the `tag.class.id!` attribute short-cuts, raw-XML
21
+ `<<`, and `.with`). Verified by a differential against `Nokogiri::XML::Builder`.
22
+
23
+ ### Changed
24
+
25
+ * The XML declaration emits `encoding="UTF-8"` only when the source declared
26
+ one (or `#to_xml(encoding:)` is passed); built or declaration-less documents
27
+ now serialize to a bare `<?xml version="1.0"?>`, like Nokogiri (the output is
28
+ UTF-8 either way).
29
+
30
+ * Faster XML queries. A document-rooted `//name` / `css("name")` is served
31
+ from a lazily-built element-name index instead of a full-tree walk (~11x
32
+ Nokogiri on the benchmark feed); name tests resolve their prefix once per step,
33
+ and `at_css` / `at_xpath` short-circuit on prefixed name tests.
34
+
35
+ * CSS class/ID selectors now match case-sensitively in no-quirks documents
36
+ (case-insensitively only in quirks mode), like browsers and `Nokogiri::HTML5` -
37
+ via an upstreamed Lexbor fix (see below).
38
+
39
+ * XPath number parsing now follows the XPath 1.0 `Number` grammar exactly and
40
+ is locale-independent, matching libxml2/Nokogiri and browsers. C `strtod`'s
41
+ superset forms are no longer accepted: `1e3` / `0x1A` lex as a Number followed
42
+ by a name (a syntax error as a full expression, where they previously parsed
43
+ as 1000 / 26), `number()` returns NaN for exponent/hex/`+`-signed strings, and
44
+ only XPath whitespace (space/tab/CR/LF, not `\v`/`\f`) is trimmed around the
45
+ coerced value. Valid literals (`5.`, `.5`, `1.5`) are unchanged.
46
+
47
+ ### Security
48
+
49
+ * Updated the vendored Lexbor (v3.0.0 -> `3a2d595`), which includes two
50
+ CSS-selector fixes we upstreamed - class/ID case-sensitivity follows quirks
51
+ mode, and a prefix-less type selector no longer defaults to the universal
52
+ namespace - plus a heap-overflow fix in its `:lexbor-contains()` parser
53
+ (reached from `Node#css`) and other post-v3.0.0 bugfixes. (An untagged master
54
+ commit, taken deliberately; see CLAUDE.md.)
55
+
56
+ * Hardened native memory safety. The XML arena is ASan-red-zoned to catch
57
+ intra-arena overflows, the engines are fuzzed under ASan/UBSan, and buffer
58
+ growth is bounded by a hard ceiling.
59
+
60
+ * Extended the lint-enforced bounded-reader (`mkr_span`) discipline to the
61
+ remaining byte-scanning code: the source-location line table, the XPath
62
+ string-function scanners (now explicitly length-bounded instead of relying on
63
+ the NUL contract), and the number parse above. Fixed a borrowed-RSTRING
64
+ pointer held across a potential GC point in the XML encoding sniffer, and a
65
+ missing NUL-termination guarantee in the libFuzzer XPath harness.
66
+
67
+ ## [0.3.0] - 2026-06-06
68
+
69
+ ### Added
70
+
71
+ * **Native XML 1.0 reader + in-place editor** - `Makiri::XML::Document.parse(source)`
72
+ / `Makiri::XML(source)`. No libxml2: a strict, fail-closed parser builds its own
73
+ node arena (case- and namespace-preserving), queried by the native XPath engine.
74
+ * Strict & secure: fail-closed decode (bad UTF-8 / NUL -> `XML::SyntaxError`),
75
+ duplicate attributes rejected, XML 1.0 only; verified against the W3C XML
76
+ Conformance Test Suite.
77
+ * Encoding autodetected (BOM / `<?xml encoding?>`); a contradicting String
78
+ encoding is a fatal error, not a silent mis-decode.
79
+ * DoS-bounded by a single arena byte ceiling (default 256 MiB; raise per parse
80
+ with `max_bytes:`).
81
+ * `<!DOCTYPE>` recognized but **not processed** (`#internal_subset` ->
82
+ `XML::DocumentType`); zero entity/DTD I/O, so **XXE and billion-laughs are
83
+ structurally impossible**. Kept off the tree, as in libxml2.
84
+ * Read API mirrors Nokogiri: `#xpath` / `#at_xpath` (`{prefix => uri}`),
85
+ name/namespace readers, `#text`, `#[]`, traversal, and namespace introspection
86
+ (`Makiri::XML::Namespace`); `XPathContext` works over XML nodes too.
87
+ * Prolog/epilog comments & PIs kept on the document node; adjacent same-type
88
+ character data coalesced - byte-identical to Nokogiri (property-based diff).
89
+ * `#to_xml` / `#to_s` (`pretty:` / `indent:` / `encoding:`) and `#canonicalize`
90
+ (Inclusive C14N 1.0, byte-identical to libxml2); buffers fail closed.
91
+ * Unsupported surface raises `NotImplementedError`: `#css` / `#at_css` and HTML
92
+ serialization.
93
+ * Tree mutation - fully fail-closed, detach-never-destroy:
94
+ * in-place: `#[]=` / `#delete`, `#content=`, `#name=`, `#remove` / `#unlink`;
95
+ * factories: `Document#create_{element,text_node,comment,cdata,processing_instruction}`
96
+ (+ Nokogiri-style `.new` constructors);
97
+ * insertion: `#add_child` / `<<`, `#before` / `#after`, `#replace` - namespaces
98
+ resolved at the insertion point; a cross-document insert deep-copies;
99
+ * fragments: `XML::DocumentFragment.parse` / `XML::Document#fragment`;
100
+ * from scratch: `XML::Document.new` + `#root=`.
101
+ * `XML::Element#element_children` and `Node#clone_node` for XML nodes (also enabling
102
+ `Node#dup` / `#clone`); a clone keeps name case, namespace and the CDATA type.
103
+ * `Node` includes `Enumerable` over its child nodes (`each` / `map` / `select` / ...).
104
+ * `Node#<=>` + `Comparable` - sort by document position (`nil` across documents or
105
+ for attributes).
106
+ * `NodeSet.new(document_or_node, list = [])` - foreign / cross-representation nodes
107
+ are rejected.
108
+ * `NodeSet#[]` accepts a `Range` or `start, length` (like `Array#[]`).
109
+ * `Node` / `NodeSet` / `Document` `#dup` / `#clone` now return real independent
110
+ copies (`#dup(0)` shallow; `#clone(freeze:)` honoured).
111
+ * A **frozen node is genuinely immutable** - every mutator raises `FrozenError`.
112
+
113
+ ### Changed
114
+
115
+ * CSS queries reuse one shared Lexbor engine (GVL-safe) and `at_css` wraps the match
116
+ directly: `at_css('#id')` ~5x faster than nokolexbor (was ~1.16x slower).
117
+ * HTML serialization pre-reserves its buffer - `to_html` now at parity with nokolexbor.
118
+ * Node-class names are the WHATWG DOM interface names (`CDATASection`, `Attr`,
119
+ `DocumentType`, ...), with the Nokogiri spellings (`CDATA`, `DTD`) kept as aliases;
120
+ added `Node#cdata?`.
121
+ * Text-index range table uses `uint32` bounds (24 -> 16 B/entry; ~27% less retained
122
+ index, byte-identical text).
123
+ * Parsing **honours the input String's encoding** - Shift_JIS / EUC-JP / ... are now
124
+ transcoded to UTF-8 instead of mangled.
125
+ * Parsing skips its UTF-8 validation scan when the String's coderange already proves
126
+ it valid.
127
+ * Faster HTML parse/serialize: `memchr` line table + validate-only UTF-8 scan (~7%),
128
+ and a single-copy serializer buffer (~1.2-1.3x).
129
+
130
+ ### Fixed
131
+
132
+ * **Hardened the HTML/XML representation boundary.** HTML (Lexbor) and XML (arena)
133
+ nodes are now distinct TypedData types, so the wrong representation raises
134
+ `TypeError` instead of corrupting memory:
135
+ * `Node#==` / `XPathContext#node=` with an XML `Document` no longer aborts the
136
+ process;
137
+ * `NodeSet#|` / `+` / `&` / `-` across different documents raise `Makiri::Error`
138
+ (was a silent mis-wrap);
139
+ * HTML-only APIs (`import_node`, `add_child` / `before` / `after` / `replace`,
140
+ `fragment(context:)`) reject an XML node argument (was a segfault).
141
+ * The bundle exported the entire vendored Lexbor symbol table (~1700 `lxb_*`); now
142
+ only `Init_makiri` is exported, so loading alongside another Lexbor gem (e.g.
143
+ nokolexbor) no longer segfaults. (Precompiled gems: rebuild required.)
9
144
 
10
145
  ## [0.2.0] - 2026-06-04
11
146
 
12
147
  ### Added
13
148
 
14
- * `Element#tag_name` (DOM `tagName`) the qualified name uppercased for an
149
+ * `Element#tag_name` (DOM `tagName`) - the qualified name uppercased for an
15
150
  HTML element in an HTML document (`"DIV"`), keeping the original case for
16
151
  SVG/MathML; `nil` for non-elements. Complements `#name`, which stays the
17
152
  lowercase qualified name.
18
- * `ProcessingInstruction#target` (DOM `target`) a PI's target name; `nil` for
153
+ * `ProcessingInstruction#target` (DOM `target`) - a PI's target name; `nil` for
19
154
  other node kinds. Its data is read via `#content`/`#text`.
20
155
  * `Document#create_processing_instruction(target, data)` (DOM
21
156
  `createProcessingInstruction`) and `Document#create_document_fragment` (DOM
22
- `createDocumentFragment`, an empty fragment to build up programmatically
157
+ `createDocumentFragment`, an empty fragment to build up programmatically -
23
158
  unlike `#fragment` / `DocumentFragment.parse`, which parse HTML). Both produce
24
159
  a detached node owned by the document; PI creation fails closed when the data
25
160
  contains the `?>` terminator (matching the DOM constraint). (DOM
26
161
  `createCDATASection` is intentionally not provided: per WHATWG DOM it throws on
27
162
  an HTML document, which is the only kind Makiri produces.)
28
- * `Node#{namespace_uri, prefix, local_name}` the WHATWG DOM per-node
163
+ * `Node#{namespace_uri, prefix, local_name}` - the WHATWG DOM per-node
29
164
  namespace accessors on `Element` and `Attribute` (`nil` on other node kinds).
30
165
  `namespace_uri` resolves an element's namespace from its node (so an HTML
31
- element is the XHTML namespace `http://www.w3.org/1999/xhtml`, not `nil` the
166
+ element is the XHTML namespace `http://www.w3.org/1999/xhtml`, not `nil` - the
32
167
  DOM-faithful value browsers and `namespace-uri()` return; SVG/MathML get their
33
168
  own URI), and agrees byte-for-byte with the `namespace-uri()` XPath function.
34
169
  For attributes it is `nil` unless prefixed, where it returns the parser-assigned
@@ -36,21 +171,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
36
171
  segment of the qualified name (`nil` for the usual unprefixed HTML5 case), and
37
172
  `local_name` is the name without that prefix. Previously a node's namespace was
38
173
  reachable only through XPath (`namespace-uri()`/`local-name()`).
39
- * `Node#clone_node(deep = false)` a copy of the node, owned by the same
174
+ * `Node#clone_node(deep = false)` - a copy of the node, owned by the same
40
175
  document and detached from any parent (the DOM `cloneNode`, whose `deep`
41
- defaults to `false` a missing/`nil`/`false` argument is a shallow clone; a
176
+ defaults to `false` - a missing/`nil`/`false` argument is a shallow clone; a
42
177
  truthy one copies the subtree). Built on the same `import_node` +
43
178
  `<template>`-content fixup the fragment parser uses, so a deep-cloned
44
179
  `<template>` keeps its contents. Fails closed: a failed import raises rather
45
180
  than returning a partial node.
46
- * `Document#import_node(node, deep = false)` a copy of `node` owned by the
181
+ * `Document#import_node(node, deep = false)` - a copy of `node` owned by the
47
182
  receiver document (the DOM `importNode`, whose `deep` likewise defaults to
48
183
  `false`). Unlike `Node#clone_node`, the copy is owned by the target rather
49
184
  than the node's own document, so it is the way to bring a node across
50
185
  documents (Makiri never moves a node between arenas); the source is left
51
186
  untouched. Same import + `<template>`-content fixup as `clone_node`, and fails
52
187
  closed on a failed import.
53
- * `Node#pointer_id` the underlying `lxb_dom_node_t` pointer as an Integer,
188
+ * `Node#pointer_id` - the underlying `lxb_dom_node_t` pointer as an Integer,
54
189
  matching `Nokogiri::XML::Node#pointer_id`. Shares the value `#hash`/`#eql?`
55
190
  are built on, so it is a stable, Nokogiri-compatible identity key for
56
191
  consumers (e.g. wrapper caches) that key nodes by pointer. Stable for a
@@ -73,19 +208,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
73
208
  ## [0.1.0] - 2026-06-02
74
209
 
75
210
  First public release. An HTML5 parser, a native XPath 1.0 query engine, and CSS
76
- selectors for Ruby built on vendored [Lexbor](https://lexbor.com/) with **no
211
+ selectors for Ruby - built on vendored [Lexbor](https://lexbor.com/) with **no
77
212
  libxml2 / libxslt dependency at any layer**.
78
213
 
79
214
  ### Added
80
215
 
81
216
  **Parsing & DOM**
82
217
 
83
- * `Makiri::HTML` / `Makiri.parse` HTML5 parsing via vendored, unpatched Lexbor,
218
+ * `Makiri::HTML` / `Makiri.parse` - HTML5 parsing via vendored, unpatched Lexbor,
84
219
  with browser-compatible UTF-8 decoding (invalid bytes → U+FFFD; parsing never
85
220
  fails on bad bytes). Read-only navigation and attribute/text readers across
86
221
  `Document`, `Element`, `Attribute`, `Text`, `CData`, `Comment`,
87
222
  `ProcessingInstruction`, `DocumentType`, and `DocumentFragment`.
88
- * `Node#line` 1-based source line of an element, reconstructed from the
223
+ * `Node#line` - 1-based source line of an element, reconstructed from the
89
224
  tokenizer without patching Lexbor (nil when the location is unknown).
90
225
  * `Element#attribute_nodes` and `Attribute#{name,value,parent,element}`, backed
91
226
  by a lazily-built attribute→owner index in the Lexbor compat layer.
@@ -138,7 +273,7 @@ libxml2 / libxslt dependency at any layer**.
138
273
  * UTF-8 text-input contract: HTML and fragment parsing are lenient (invalid
139
274
  bytes → U+FFFD, never reject), while strings passed to the XPath / CSS /
140
275
  DOM-mutation APIs must be valid UTF-8 with no NUL byte, otherwise they raise
141
- `Makiri::Error` never silently truncated, repaired, or reinterpreted.
276
+ `Makiri::Error` - never silently truncated, repaired, or reinterpreted.
142
277
  * Thread-safe by construction: parsing releases the GVL (concurrent parse scales
143
278
  ~2× on 8 cores), while XPath evaluation holds the GVL so sharing a document or
144
279
  context across threads cannot corrupt memory. Fail-closed string caps and
@@ -161,6 +296,8 @@ libxml2 / libxslt dependency at any layer**.
161
296
  domxpath, CSS differential vs `Nokogiri::HTML5`). GitHub Actions CI across
162
297
  Ruby 3.2–4.0 × Ubuntu/macOS plus a sanitizer job.
163
298
 
164
- [Unreleased]: https://github.com/takahashim/makiri/compare/v0.2.0...HEAD
299
+ [Unreleased]: https://github.com/takahashim/makiri/compare/v0.4.0...HEAD
300
+ [0.4.0]: https://github.com/takahashim/makiri/compare/v0.3.0...v0.4.0
301
+ [0.3.0]: https://github.com/takahashim/makiri/compare/v0.2.0...v0.3.0
165
302
  [0.2.0]: https://github.com/takahashim/makiri/compare/v0.1.0...v0.2.0
166
303
  [0.1.0]: https://github.com/takahashim/makiri/releases/tag/v0.1.0