makiri 0.1.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +73 -0
- data/.github/workflows/conformance.yml +94 -0
- data/.github/workflows/release.yml +223 -0
- data/.github/workflows/security.yml +95 -0
- data/.gitmodules +3 -0
- data/CHANGELOG.md +102 -0
- data/LICENSE +176 -0
- data/NOTICE +12 -0
- data/README.md +134 -0
- data/Rakefile +150 -0
- data/lib/makiri/3.2/makiri.so +0 -0
- data/lib/makiri/3.3/makiri.so +0 -0
- data/lib/makiri/3.4/makiri.so +0 -0
- data/lib/makiri/4.0/makiri.so +0 -0
- data/lib/makiri/attribute.rb +13 -0
- data/lib/makiri/cdata.rb +6 -0
- data/lib/makiri/comment.rb +6 -0
- data/lib/makiri/css.rb +11 -0
- data/lib/makiri/document.rb +82 -0
- data/lib/makiri/document_fragment.rb +21 -0
- data/lib/makiri/document_type.rb +14 -0
- data/lib/makiri/element.rb +17 -0
- data/lib/makiri/node.rb +221 -0
- data/lib/makiri/node_set.rb +105 -0
- data/lib/makiri/processing_instruction.rb +8 -0
- data/lib/makiri/text.rb +16 -0
- data/lib/makiri/version.rb +5 -0
- data/lib/makiri/xpath.rb +14 -0
- data/lib/makiri/xpath_context.rb +41 -0
- data/lib/makiri.rb +47 -0
- data/script/build_native_gem.rb +50 -0
- data/script/check_c_safety.rb +238 -0
- data/script/check_c_safety_allowlist.yml +12 -0
- data/sig/makiri.rbs +4 -0
- metadata +127 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 6ff9eaf947a703e2f69e2093bb2d4ce0501f91b765c12a69a93a4da6f041554f
|
|
4
|
+
data.tar.gz: c16b65672e4a401fa3aaae86fcaeccc50c6a9ed2a9083962ea0ccbbaa5e3c768
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 5bca639024347dae58bb753143ef5e43c95881b05bb6f284dfb873851ae4a546a1c2488ea1142f272a1f3aac6dba65bb44c6afddd3c97f7cbbd4e75fef8a20be
|
|
7
|
+
data.tar.gz: 2e629dd79bd3c133017767de949899b0e153151028cf653379d23a0d5336d6499507c32bef4f48681e5330077972fdd8d30a4c7d417aa3eaa6a8f9c7a6f20be5
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, master]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
name: Ruby ${{ matrix.ruby }} on ${{ matrix.os }}
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-latest, macos-latest]
|
|
16
|
+
ruby: ["3.2", "3.3", "3.4", "4.0"]
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
20
|
+
uses: actions/checkout@v6
|
|
21
|
+
with:
|
|
22
|
+
submodules: recursive
|
|
23
|
+
|
|
24
|
+
- name: Ensure cmake is available
|
|
25
|
+
uses: lukka/get-cmake@latest
|
|
26
|
+
|
|
27
|
+
- name: Set up Ruby
|
|
28
|
+
uses: ruby/setup-ruby@v1
|
|
29
|
+
with:
|
|
30
|
+
ruby-version: ${{ matrix.ruby }}
|
|
31
|
+
bundler-cache: true
|
|
32
|
+
|
|
33
|
+
- name: Compile the extension (builds vendored Lexbor, then the ext)
|
|
34
|
+
run: bundle exec rake compile
|
|
35
|
+
|
|
36
|
+
# The :threading suite is heavy (GC.stress across threads) and checks a
|
|
37
|
+
# structural property that does not vary by OS/Ruby, so run it on a single
|
|
38
|
+
# representative job; the rest of the matrix skips it for a fast run.
|
|
39
|
+
- name: Run the test suite
|
|
40
|
+
run: bundle exec rake spec
|
|
41
|
+
env:
|
|
42
|
+
THREADING: ${{ (matrix.os == 'ubuntu-latest' && matrix.ruby == '3.4') && '1' || '' }}
|
|
43
|
+
|
|
44
|
+
- name: Smoke-load the gem
|
|
45
|
+
run: bundle exec ruby -Ilib -r makiri -e 'p Makiri::VERSION'
|
|
46
|
+
|
|
47
|
+
sanitize:
|
|
48
|
+
name: AddressSanitizer + UBSan (Ruby ${{ matrix.ruby }})
|
|
49
|
+
runs-on: ubuntu-latest
|
|
50
|
+
strategy:
|
|
51
|
+
fail-fast: false
|
|
52
|
+
matrix:
|
|
53
|
+
ruby: ["3.4", "4.0"]
|
|
54
|
+
steps:
|
|
55
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
56
|
+
uses: actions/checkout@v6
|
|
57
|
+
with:
|
|
58
|
+
submodules: recursive
|
|
59
|
+
|
|
60
|
+
- name: Ensure cmake is available
|
|
61
|
+
uses: lukka/get-cmake@latest
|
|
62
|
+
|
|
63
|
+
- name: Set up Ruby
|
|
64
|
+
uses: ruby/setup-ruby@v1
|
|
65
|
+
with:
|
|
66
|
+
ruby-version: ${{ matrix.ruby }}
|
|
67
|
+
bundler-cache: true
|
|
68
|
+
|
|
69
|
+
# Builds the ext with -fsanitize=address,undefined and runs the whole
|
|
70
|
+
# spec suite with the ASan runtime preloaded. Any heap/UB error aborts
|
|
71
|
+
# the run with a non-zero exit, failing the job.
|
|
72
|
+
- name: Build + test under sanitizers
|
|
73
|
+
run: bundle exec rake sanitize
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
name: Conformance
|
|
2
|
+
|
|
3
|
+
# Differential / spec-conformance suites. These compare Makiri against external
|
|
4
|
+
# references (Nokogiri's libxml2/Gumbo XPath+CSS, the html5lib-tests corpus) and
|
|
5
|
+
# catch semantic divergences that the unit specs cannot. The XPath and CSS
|
|
6
|
+
# differentials run OUTSIDE the bundle (the rake tasks use
|
|
7
|
+
# Bundler.with_unbundled_env), so Nokogiri — a bench-only dependency — must be
|
|
8
|
+
# present as a system gem; each job installs it explicitly.
|
|
9
|
+
|
|
10
|
+
on:
|
|
11
|
+
push:
|
|
12
|
+
branches: [main, master]
|
|
13
|
+
pull_request:
|
|
14
|
+
schedule:
|
|
15
|
+
- cron: "37 5 * * *" # nightly, offset from the other workflows
|
|
16
|
+
workflow_dispatch:
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
# PR + push: HTML5 parsing conformance, plus the XPath and CSS differentials
|
|
20
|
+
# over the curated corpus and a moderate batch of generated XPath expressions.
|
|
21
|
+
conformance:
|
|
22
|
+
name: HTML5 + XPath/CSS differential vs Nokogiri
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
if: github.event_name != 'schedule'
|
|
25
|
+
steps:
|
|
26
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
27
|
+
uses: actions/checkout@v6
|
|
28
|
+
with:
|
|
29
|
+
submodules: recursive
|
|
30
|
+
|
|
31
|
+
- name: Ensure cmake is available
|
|
32
|
+
uses: lukka/get-cmake@latest
|
|
33
|
+
|
|
34
|
+
- name: Set up Ruby
|
|
35
|
+
uses: ruby/setup-ruby@v1
|
|
36
|
+
with:
|
|
37
|
+
ruby-version: "3.4"
|
|
38
|
+
bundler-cache: true
|
|
39
|
+
|
|
40
|
+
- name: Compile the extension (builds vendored Lexbor, then the ext)
|
|
41
|
+
run: bundle exec rake compile
|
|
42
|
+
|
|
43
|
+
- name: Install Nokogiri (system gem; the differentials run outside the bundle)
|
|
44
|
+
run: gem install --no-document nokogiri
|
|
45
|
+
|
|
46
|
+
- name: HTML5 parsing conformance (html5lib-tests)
|
|
47
|
+
run: bundle exec rake conformance:html5
|
|
48
|
+
|
|
49
|
+
# XPATH_ARGS must be a real env var, not a rake CLI arg: the task reads
|
|
50
|
+
# ENV['XPATH_ARGS'] inside Bundler.with_unbundled_env, which reverts to the
|
|
51
|
+
# pre-rake environment, so a rake-set var would be dropped.
|
|
52
|
+
- name: XPath 1.0 differential vs Nokogiri (corpus + generated)
|
|
53
|
+
run: bundle exec rake conformance:xpath
|
|
54
|
+
env:
|
|
55
|
+
XPATH_ARGS: "--generate 5000 --seed 1"
|
|
56
|
+
|
|
57
|
+
- name: CSS Selectors differential vs Nokogiri::HTML5
|
|
58
|
+
run: bundle exec rake conformance:css
|
|
59
|
+
|
|
60
|
+
# Nightly: a wide XPath differential sweep across many seeds and a high
|
|
61
|
+
# generated volume, to surface divergences the curated corpus and a single
|
|
62
|
+
# seed miss. Fails fast (bash -e) on the first real divergence.
|
|
63
|
+
conformance-nightly:
|
|
64
|
+
name: Nightly XPath differential sweep
|
|
65
|
+
runs-on: ubuntu-latest
|
|
66
|
+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
|
67
|
+
steps:
|
|
68
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
69
|
+
uses: actions/checkout@v6
|
|
70
|
+
with:
|
|
71
|
+
submodules: recursive
|
|
72
|
+
|
|
73
|
+
- name: Ensure cmake is available
|
|
74
|
+
uses: lukka/get-cmake@latest
|
|
75
|
+
|
|
76
|
+
- name: Set up Ruby
|
|
77
|
+
uses: ruby/setup-ruby@v1
|
|
78
|
+
with:
|
|
79
|
+
ruby-version: "3.4"
|
|
80
|
+
bundler-cache: true
|
|
81
|
+
|
|
82
|
+
- name: Compile the extension
|
|
83
|
+
run: bundle exec rake compile
|
|
84
|
+
|
|
85
|
+
- name: Install Nokogiri (system gem)
|
|
86
|
+
run: gem install --no-document nokogiri
|
|
87
|
+
|
|
88
|
+
- name: XPath differential sweep (many seeds, high volume)
|
|
89
|
+
run: |
|
|
90
|
+
for seed in 1 2 3 4 5 6 7 8; do
|
|
91
|
+
echo "::group::seed ${seed}"
|
|
92
|
+
XPATH_ARGS="--generate 20000 --seed ${seed}" bundle exec rake conformance:xpath
|
|
93
|
+
echo "::endgroup::"
|
|
94
|
+
done
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
# Build the source gem and precompiled ("fat") native gems for the major
|
|
4
|
+
# platforms, attach them to a GitHub Release, and (optionally, on manual
|
|
5
|
+
# dispatch) publish to RubyGems.
|
|
6
|
+
#
|
|
7
|
+
# Native gems are built on each target platform's own runner — NOT
|
|
8
|
+
# cross-compiled — because the extension builds vendored Lexbor with CMake using
|
|
9
|
+
# the host toolchain (see ext/makiri/extconf.rb). Each runner compiles for
|
|
10
|
+
# Ruby 3.2/3.3/3.4; script/build_native_gem.rb then assembles one gem per
|
|
11
|
+
# platform containing all three ABIs (lib/makiri.rb selects the right one at
|
|
12
|
+
# require time). `gem install` of a native gem does not recompile or need
|
|
13
|
+
# cmake / the Lexbor submodule.
|
|
14
|
+
|
|
15
|
+
on:
|
|
16
|
+
push:
|
|
17
|
+
tags: ["v*"]
|
|
18
|
+
workflow_dispatch:
|
|
19
|
+
inputs:
|
|
20
|
+
publish_to_rubygems:
|
|
21
|
+
description: "Push the built gems to RubyGems (requires the RUBYGEMS_API_KEY secret)"
|
|
22
|
+
type: boolean
|
|
23
|
+
default: false
|
|
24
|
+
|
|
25
|
+
permissions:
|
|
26
|
+
contents: write # create the GitHub Release and upload assets
|
|
27
|
+
|
|
28
|
+
env:
|
|
29
|
+
RUBY_ABIS: "3.2 3.3 3.4 4.0" # Ruby minor versions a native gem serves
|
|
30
|
+
|
|
31
|
+
jobs:
|
|
32
|
+
# --- source gem (always builds; compiles from source at install time) -------
|
|
33
|
+
source-gem:
|
|
34
|
+
name: Source gem
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
steps:
|
|
37
|
+
- uses: actions/checkout@v6
|
|
38
|
+
with:
|
|
39
|
+
submodules: recursive # vendored Lexbor must be in the gem's file list
|
|
40
|
+
- uses: ruby/setup-ruby@v1
|
|
41
|
+
with:
|
|
42
|
+
ruby-version: "3.4"
|
|
43
|
+
- name: Build source gem
|
|
44
|
+
run: |
|
|
45
|
+
mkdir -p pkg
|
|
46
|
+
gem build makiri.gemspec -o pkg/makiri-source.gem
|
|
47
|
+
- uses: actions/upload-artifact@v7
|
|
48
|
+
with:
|
|
49
|
+
name: gem-source
|
|
50
|
+
path: pkg/*.gem
|
|
51
|
+
if-no-files-found: error
|
|
52
|
+
|
|
53
|
+
# --- compile the extension natively, per platform x Ruby --------------------
|
|
54
|
+
compile:
|
|
55
|
+
name: Compile ${{ matrix.platform }} (Ruby ${{ matrix.ruby }})
|
|
56
|
+
runs-on: ${{ matrix.os }}
|
|
57
|
+
strategy:
|
|
58
|
+
fail-fast: false
|
|
59
|
+
matrix:
|
|
60
|
+
ruby: ["3.2", "3.3", "3.4", "4.0"]
|
|
61
|
+
platform:
|
|
62
|
+
- x86_64-linux
|
|
63
|
+
- aarch64-linux
|
|
64
|
+
- arm64-darwin
|
|
65
|
+
include:
|
|
66
|
+
- { platform: x86_64-linux, os: ubuntu-latest }
|
|
67
|
+
- { platform: aarch64-linux, os: ubuntu-24.04-arm }
|
|
68
|
+
- { platform: arm64-darwin, os: macos-14 }
|
|
69
|
+
steps:
|
|
70
|
+
- uses: actions/checkout@v6
|
|
71
|
+
with:
|
|
72
|
+
submodules: recursive
|
|
73
|
+
- name: Ensure cmake is available
|
|
74
|
+
uses: lukka/get-cmake@latest
|
|
75
|
+
- uses: ruby/setup-ruby@v1
|
|
76
|
+
with:
|
|
77
|
+
ruby-version: ${{ matrix.ruby }}
|
|
78
|
+
bundler-cache: true
|
|
79
|
+
- name: Compile (builds vendored Lexbor, then the ext)
|
|
80
|
+
run: bundle exec rake compile
|
|
81
|
+
- name: Smoke-load
|
|
82
|
+
run: bundle exec ruby -Ilib -r makiri -e 'p Makiri::VERSION'
|
|
83
|
+
- name: Stage the compiled binary under its ABI subdir
|
|
84
|
+
shell: bash
|
|
85
|
+
run: |
|
|
86
|
+
minor="$(ruby -e 'print RUBY_VERSION[/\d+\.\d+/]')"
|
|
87
|
+
dlext="$(ruby -e 'print RbConfig::CONFIG["DLEXT"]')" # so | bundle
|
|
88
|
+
mkdir -p "artifact/${minor}"
|
|
89
|
+
cp "lib/makiri/makiri.${dlext}" "artifact/${minor}/makiri.${dlext}"
|
|
90
|
+
ls -l "artifact/${minor}"
|
|
91
|
+
- name: Assert the binary is Ruby-portable (no libruby hard-link)
|
|
92
|
+
shell: bash
|
|
93
|
+
# extconf.rb builds the extension with dynamic_lookup / no libruby link so
|
|
94
|
+
# one precompiled binary loads on any compatible Ruby of that ABI. If a
|
|
95
|
+
# change reintroduces a libruby dependency, the gem would only load on the
|
|
96
|
+
# exact build Ruby — fail loudly here instead of shipping a broken gem.
|
|
97
|
+
run: |
|
|
98
|
+
dlext="$(ruby -e 'print RbConfig::CONFIG["DLEXT"]')"
|
|
99
|
+
bin="lib/makiri/makiri.${dlext}"
|
|
100
|
+
if [ "$(uname)" = "Darwin" ]; then
|
|
101
|
+
if otool -L "$bin" | grep -i 'libruby'; then
|
|
102
|
+
echo "::error::$bin hard-links libruby; precompiled gem would not load on other Ruby installs"; exit 1
|
|
103
|
+
fi
|
|
104
|
+
else
|
|
105
|
+
if readelf -d "$bin" 2>/dev/null | grep -iE 'NEEDED.*libruby'; then
|
|
106
|
+
echo "::error::$bin has a libruby NEEDED entry; precompiled gem may not load on a static Ruby"; exit 1
|
|
107
|
+
fi
|
|
108
|
+
fi
|
|
109
|
+
echo "OK: $bin has no libruby dependency"
|
|
110
|
+
- uses: actions/upload-artifact@v7
|
|
111
|
+
with:
|
|
112
|
+
name: bin-${{ matrix.platform }}-${{ matrix.ruby }}
|
|
113
|
+
path: artifact/
|
|
114
|
+
if-no-files-found: error
|
|
115
|
+
|
|
116
|
+
# --- assemble one fat gem per platform from its per-ABI binaries ------------
|
|
117
|
+
native-gem:
|
|
118
|
+
name: Native gem ${{ matrix.platform }}
|
|
119
|
+
needs: compile
|
|
120
|
+
runs-on: ${{ matrix.os }}
|
|
121
|
+
strategy:
|
|
122
|
+
fail-fast: false
|
|
123
|
+
matrix:
|
|
124
|
+
include:
|
|
125
|
+
- platform: x86_64-linux
|
|
126
|
+
os: ubuntu-latest
|
|
127
|
+
- platform: aarch64-linux
|
|
128
|
+
os: ubuntu-24.04-arm
|
|
129
|
+
- platform: arm64-darwin
|
|
130
|
+
os: macos-14
|
|
131
|
+
steps:
|
|
132
|
+
- uses: actions/checkout@v6
|
|
133
|
+
with:
|
|
134
|
+
submodules: recursive # for the gemspec's file scan (ext/vendor are then dropped)
|
|
135
|
+
- uses: ruby/setup-ruby@v1
|
|
136
|
+
with:
|
|
137
|
+
ruby-version: "3.4"
|
|
138
|
+
- name: Download this platform's per-ABI binaries
|
|
139
|
+
uses: actions/download-artifact@v8
|
|
140
|
+
with:
|
|
141
|
+
pattern: bin-${{ matrix.platform }}-*
|
|
142
|
+
merge-multiple: true
|
|
143
|
+
path: staged
|
|
144
|
+
- name: Place binaries under lib/makiri/<abi>/
|
|
145
|
+
shell: bash
|
|
146
|
+
run: |
|
|
147
|
+
mkdir -p lib/makiri
|
|
148
|
+
echo "Downloaded artifacts:"
|
|
149
|
+
find staged -maxdepth 3 -type f -print || true
|
|
150
|
+
if ! find staged -mindepth 2 -maxdepth 2 -name 'makiri.*' | grep -q .; then
|
|
151
|
+
echo "::error::No makiri binaries were downloaded for ${{ matrix.platform }}"
|
|
152
|
+
exit 1
|
|
153
|
+
fi
|
|
154
|
+
cp -r staged/* lib/makiri/
|
|
155
|
+
echo "Staged binaries:"
|
|
156
|
+
find lib/makiri -maxdepth 2 -name 'makiri.*' -print
|
|
157
|
+
- name: Build the native gem
|
|
158
|
+
run: ruby script/build_native_gem.rb "${{ matrix.platform }}"
|
|
159
|
+
- name: Verify it installs without recompiling
|
|
160
|
+
run: |
|
|
161
|
+
gem install "makiri-"*"-${{ matrix.platform }}.gem" --local
|
|
162
|
+
ruby -e 'require "makiri"; puts Makiri::VERSION'
|
|
163
|
+
- uses: actions/upload-artifact@v7
|
|
164
|
+
with:
|
|
165
|
+
name: gem-${{ matrix.platform }}
|
|
166
|
+
path: "*.gem"
|
|
167
|
+
if-no-files-found: error
|
|
168
|
+
|
|
169
|
+
# --- attach every gem to a GitHub Release (on a version tag) ----------------
|
|
170
|
+
release:
|
|
171
|
+
name: GitHub Release
|
|
172
|
+
needs: [source-gem, native-gem]
|
|
173
|
+
if: startsWith(github.ref, 'refs/tags/')
|
|
174
|
+
runs-on: ubuntu-latest
|
|
175
|
+
steps:
|
|
176
|
+
- uses: actions/download-artifact@v8
|
|
177
|
+
with:
|
|
178
|
+
pattern: gem-*
|
|
179
|
+
merge-multiple: true
|
|
180
|
+
path: dist
|
|
181
|
+
- name: List assets
|
|
182
|
+
run: ls -l dist
|
|
183
|
+
- name: Create / update the release
|
|
184
|
+
env:
|
|
185
|
+
GH_TOKEN: ${{ github.token }}
|
|
186
|
+
run: |
|
|
187
|
+
# Mark pre-release tags (v1.2.3.rc1 / -beta / -alpha / -pre) as such.
|
|
188
|
+
pre=""
|
|
189
|
+
case "$GITHUB_REF_NAME" in
|
|
190
|
+
*rc*|*beta*|*alpha*|*pre*) pre="--prerelease" ;;
|
|
191
|
+
esac
|
|
192
|
+
gh release create "${GITHUB_REF_NAME}" dist/*.gem \
|
|
193
|
+
--repo "${GITHUB_REPOSITORY}" \
|
|
194
|
+
--title "${GITHUB_REF_NAME}" \
|
|
195
|
+
--notes "Automated release for ${GITHUB_REF_NAME}. See CHANGELOG.md." \
|
|
196
|
+
$pre --verify-tag || \
|
|
197
|
+
gh release upload "${GITHUB_REF_NAME}" dist/*.gem --repo "${GITHUB_REPOSITORY}" --clobber
|
|
198
|
+
|
|
199
|
+
# --- optional: publish to RubyGems (manual, opt-in, never on a tag push) ----
|
|
200
|
+
publish:
|
|
201
|
+
name: Publish to RubyGems
|
|
202
|
+
needs: [source-gem, native-gem]
|
|
203
|
+
if: github.event_name == 'workflow_dispatch' && inputs.publish_to_rubygems
|
|
204
|
+
runs-on: ubuntu-latest
|
|
205
|
+
environment: rubygems # add a protection rule here for a manual approval gate
|
|
206
|
+
steps:
|
|
207
|
+
- uses: ruby/setup-ruby@v1
|
|
208
|
+
with:
|
|
209
|
+
ruby-version: "3.4"
|
|
210
|
+
- uses: actions/download-artifact@v8
|
|
211
|
+
with:
|
|
212
|
+
pattern: gem-*
|
|
213
|
+
merge-multiple: true
|
|
214
|
+
path: dist
|
|
215
|
+
- name: gem push
|
|
216
|
+
env:
|
|
217
|
+
GEM_HOST_API_KEY: ${{ secrets.RUBYGEMS_API_KEY }}
|
|
218
|
+
run: |
|
|
219
|
+
test -n "$GEM_HOST_API_KEY" || { echo "RUBYGEMS_API_KEY secret is not set"; exit 1; }
|
|
220
|
+
for g in dist/*.gem; do
|
|
221
|
+
echo "Pushing $g"
|
|
222
|
+
gem push "$g"
|
|
223
|
+
done
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
name: Security
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, master]
|
|
6
|
+
pull_request:
|
|
7
|
+
schedule:
|
|
8
|
+
- cron: "17 19 * * *"
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
security-lint:
|
|
13
|
+
name: C safety lint
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
if: github.event_name != 'schedule'
|
|
16
|
+
steps:
|
|
17
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
18
|
+
uses: actions/checkout@v6
|
|
19
|
+
with:
|
|
20
|
+
submodules: recursive
|
|
21
|
+
|
|
22
|
+
- name: Set up Ruby
|
|
23
|
+
uses: ruby/setup-ruby@v1
|
|
24
|
+
with:
|
|
25
|
+
ruby-version: "3.4"
|
|
26
|
+
bundler-cache: true
|
|
27
|
+
|
|
28
|
+
- name: Run C safety lint
|
|
29
|
+
run: bundle exec rake security:clint
|
|
30
|
+
|
|
31
|
+
security-sanitize:
|
|
32
|
+
name: ASan + UBSan security suite
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
if: github.event_name != 'schedule'
|
|
35
|
+
steps:
|
|
36
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
37
|
+
uses: actions/checkout@v6
|
|
38
|
+
with:
|
|
39
|
+
submodules: recursive
|
|
40
|
+
|
|
41
|
+
- name: Ensure cmake is available
|
|
42
|
+
uses: lukka/get-cmake@latest
|
|
43
|
+
|
|
44
|
+
- name: Set up Ruby
|
|
45
|
+
uses: ruby/setup-ruby@v1
|
|
46
|
+
with:
|
|
47
|
+
ruby-version: "3.4"
|
|
48
|
+
bundler-cache: true
|
|
49
|
+
|
|
50
|
+
- name: Build + test under sanitizers
|
|
51
|
+
run: bundle exec rake sanitize
|
|
52
|
+
|
|
53
|
+
security-fuzz-short:
|
|
54
|
+
name: Short sanitized fuzz
|
|
55
|
+
runs-on: ubuntu-latest
|
|
56
|
+
if: github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch'
|
|
57
|
+
steps:
|
|
58
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
59
|
+
uses: actions/checkout@v6
|
|
60
|
+
with:
|
|
61
|
+
submodules: recursive
|
|
62
|
+
|
|
63
|
+
- name: Ensure cmake is available
|
|
64
|
+
uses: lukka/get-cmake@latest
|
|
65
|
+
|
|
66
|
+
- name: Set up Ruby
|
|
67
|
+
uses: ruby/setup-ruby@v1
|
|
68
|
+
with:
|
|
69
|
+
ruby-version: "3.4"
|
|
70
|
+
bundler-cache: true
|
|
71
|
+
|
|
72
|
+
- name: Run short fuzz under sanitizers
|
|
73
|
+
run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--isolated --time 30"
|
|
74
|
+
|
|
75
|
+
security-fuzz-nightly:
|
|
76
|
+
name: Nightly sanitized fuzz
|
|
77
|
+
runs-on: ubuntu-latest
|
|
78
|
+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
|
79
|
+
steps:
|
|
80
|
+
- name: Checkout (with vendored Lexbor submodule)
|
|
81
|
+
uses: actions/checkout@v6
|
|
82
|
+
with:
|
|
83
|
+
submodules: recursive
|
|
84
|
+
|
|
85
|
+
- name: Ensure cmake is available
|
|
86
|
+
uses: lukka/get-cmake@latest
|
|
87
|
+
|
|
88
|
+
- name: Set up Ruby
|
|
89
|
+
uses: ruby/setup-ruby@v1
|
|
90
|
+
with:
|
|
91
|
+
ruby-version: "3.4"
|
|
92
|
+
bundler-cache: true
|
|
93
|
+
|
|
94
|
+
- name: Run nightly fuzz under sanitizers
|
|
95
|
+
run: bundle exec rake fuzz:sanitize FUZZ_ARGS="--isolated --time 300"
|
data/.gitmodules
ADDED
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-06-02
|
|
11
|
+
|
|
12
|
+
First public release. An HTML5 parser, a native XPath 1.0 query engine, and CSS
|
|
13
|
+
selectors for Ruby — built on vendored [Lexbor](https://lexbor.com/) with **no
|
|
14
|
+
libxml2 / libxslt dependency at any layer**.
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
|
|
18
|
+
**Parsing & DOM**
|
|
19
|
+
|
|
20
|
+
* `Makiri::HTML` / `Makiri.parse` — HTML5 parsing via vendored, unpatched Lexbor,
|
|
21
|
+
with browser-compatible UTF-8 decoding (invalid bytes → U+FFFD; parsing never
|
|
22
|
+
fails on bad bytes). Read-only navigation and attribute/text readers across
|
|
23
|
+
`Document`, `Element`, `Attribute`, `Text`, `CData`, `Comment`,
|
|
24
|
+
`ProcessingInstruction`, `DocumentType`, and `DocumentFragment`.
|
|
25
|
+
* `Node#line` — 1-based source line of an element, reconstructed from the
|
|
26
|
+
tokenizer without patching Lexbor (nil when the location is unknown).
|
|
27
|
+
* `Element#attribute_nodes` and `Attribute#{name,value,parent,element}`, backed
|
|
28
|
+
by a lazily-built attribute→owner index in the Lexbor compat layer.
|
|
29
|
+
* `Document#{root,title,body,head,encoding,meta_encoding,meta_encoding=,
|
|
30
|
+
quirks_mode,internal_subset,errors}` and `Makiri::DocumentType#{public_id,
|
|
31
|
+
system_id,external_id}`.
|
|
32
|
+
|
|
33
|
+
**XPath**
|
|
34
|
+
|
|
35
|
+
* Native XPath 1.0 query engine (no libxml2/libxslt): `Node#{xpath,at_xpath}`
|
|
36
|
+
and `Makiri::XPathContext` (`evaluate`, namespace/variable binding, custom
|
|
37
|
+
function handlers that dispatch unknown functions to a Ruby object). 26
|
|
38
|
+
built-in functions with spec-faithful semantics (XML NCNames including
|
|
39
|
+
non-ASCII, node-set vs node-set comparisons per §3.4, document order per §5.1,
|
|
40
|
+
Unicode-aware `translate`/`substring`).
|
|
41
|
+
* Namespace matching is **strict by default** (HTML5/WHATWG-faithful, like
|
|
42
|
+
browsers' `document.evaluate` and `Nokogiri::HTML5`); pass
|
|
43
|
+
`namespace_matching: :lax` for the namespace-agnostic, `Nokogiri::HTML`-style
|
|
44
|
+
match.
|
|
45
|
+
* Per-context compiled-AST cache, and fail-closed per-evaluate budgets
|
|
46
|
+
(operation / recursion-depth / node-set / string-byte caps) that raise
|
|
47
|
+
`Makiri::XPath::LimitExceeded` on overrun.
|
|
48
|
+
|
|
49
|
+
**CSS**
|
|
50
|
+
|
|
51
|
+
* `Node#{css,at_css,matches?}` via Lexbor's selector engine (descendant-only,
|
|
52
|
+
document order). Malformed selectors raise `Makiri::CSS::SyntaxError`.
|
|
53
|
+
|
|
54
|
+
**Mutation & serialization**
|
|
55
|
+
|
|
56
|
+
* DOM mutation: `add_child`/`<<`, `add_previous_sibling`/`before`,
|
|
57
|
+
`add_next_sibling`/`after`, `remove`/`unlink`, `replace`; attribute `[]=` and
|
|
58
|
+
`delete`; `content=`, `name=` (in-place rename); `Document#create_element` /
|
|
59
|
+
`create_text_node` / `create_comment`; `inner_html=` / `outer_html=`. Inserts
|
|
60
|
+
validate same-document, reject cycles, and use move semantics.
|
|
61
|
+
* Context-sensitive fragment parsing: `DocumentFragment.parse` /
|
|
62
|
+
`Document#fragment` / `Node#parse` with a `context:` element, and a
|
|
63
|
+
`<template>`'s contents via `Element#content_fragment` (preserved through
|
|
64
|
+
import). Passes the html5lib-tests fragment suite.
|
|
65
|
+
* Serialization: `Node#{to_html,to_s,outer_html,inner_html}` (with `pretty:`)
|
|
66
|
+
and `NodeSet#{to_html,text}`.
|
|
67
|
+
* Nokogiri-compatible conveniences: `Node#{attr,get_attribute,set_attribute,
|
|
68
|
+
attribute,has_attribute?,node_name,type,classes,add_class,append_class,
|
|
69
|
+
remove_class,traverse,root,ancestors,path,search,at,to_h}`,
|
|
70
|
+
`NodeSet#{|,+,&,-,css,xpath,search,at,last,remove}`, and `Element.new` /
|
|
71
|
+
`Text.new`.
|
|
72
|
+
|
|
73
|
+
**Safety & concurrency**
|
|
74
|
+
|
|
75
|
+
* UTF-8 text-input contract: HTML and fragment parsing are lenient (invalid
|
|
76
|
+
bytes → U+FFFD, never reject), while strings passed to the XPath / CSS /
|
|
77
|
+
DOM-mutation APIs must be valid UTF-8 with no NUL byte, otherwise they raise
|
|
78
|
+
`Makiri::Error` — never silently truncated, repaired, or reinterpreted.
|
|
79
|
+
* Thread-safe by construction: parsing releases the GVL (concurrent parse scales
|
|
80
|
+
~2× on 8 cores), while XPath evaluation holds the GVL so sharing a document or
|
|
81
|
+
context across threads cannot corrupt memory. Fail-closed string caps and
|
|
82
|
+
iterative (non-recursive) tree walks resist stack-exhaustion DoS.
|
|
83
|
+
|
|
84
|
+
**Performance** (`rake bench`, vs Nokogiri/libxml2)
|
|
85
|
+
|
|
86
|
+
* Meets or beats Nokogiri on every benchmarked operation: parse ~3×, css ~12×,
|
|
87
|
+
at_css ~1000×, serialize ~4×, `//tag` ~3.4×, `[@attr='v']` predicate ~1.5×,
|
|
88
|
+
attribute axis ~1.3×, traverse ~1.2×, full-text extraction ~parity. Backed by
|
|
89
|
+
a document element index (for `//tag`), a direct-attribute predicate fast
|
|
90
|
+
path, and a hashed per-evaluate string-value cache.
|
|
91
|
+
|
|
92
|
+
**Tooling**
|
|
93
|
+
|
|
94
|
+
* Vendored Lexbor as a git submodule (pinned v3.0.0, applied without patches).
|
|
95
|
+
Build hardening flags; AddressSanitizer+UBSan build (`rake sanitize`);
|
|
96
|
+
grammar-aware robustness fuzzer (`rake fuzz` / `rake fuzz:sanitize`);
|
|
97
|
+
benchmark harness (`rake bench`); conformance harnesses (html5lib-tests, WPT
|
|
98
|
+
domxpath, CSS differential vs `Nokogiri::HTML5`). GitHub Actions CI across
|
|
99
|
+
Ruby 3.2–4.0 × Ubuntu/macOS plus a sanitizer job.
|
|
100
|
+
|
|
101
|
+
[Unreleased]: https://github.com/takahashim/makiri/compare/v0.1.0...HEAD
|
|
102
|
+
[0.1.0]: https://github.com/takahashim/makiri/releases/tag/v0.1.0
|