makiri 0.4.0-x86_64-linux → 0.5.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/valgrind.yml +49 -46
- data/CHANGELOG.md +68 -1
- data/README.md +14 -0
- data/Rakefile +13 -0
- data/lib/makiri/3.2/makiri.so +0 -0
- data/lib/makiri/3.3/makiri.so +0 -0
- data/lib/makiri/3.4/makiri.so +0 -0
- data/lib/makiri/4.0/makiri.so +0 -0
- data/lib/makiri/html/document.rb +11 -12
- data/lib/makiri/html/node_methods.rb +0 -1
- data/lib/makiri/node_set.rb +14 -9
- data/lib/makiri/processing_instruction.rb +8 -0
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/builder.rb +29 -21
- data/lib/makiri/xpath_context.rb +12 -4
- data/script/check_c_safety.rb +1 -1
- data/script/check_c_safety_allowlist.yml +8 -5
- data/script/leaks_harness.rb +7 -0
- data/suppressions/ruby.supp +140 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 64d3fbdbe21dbbfddf413832c97e9f5f32dc020ff0c3f01efb75232fca32085c
|
|
4
|
+
data.tar.gz: 37990c3d0d3032aa088240d76edfcfeb4a4f598d87e770aef7a21a19a7de5446
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 933fcc0d10a4923c0c1f1541845a6a46b60dc8091cbe54fcd8edd85e7d17321decff9d080f294997a2c21608ae948557e94cc971514c37f6c72de751fa9d4b23
|
|
7
|
+
data.tar.gz: b3159878da760a16387247bd0e32a6c15e84d85dac066bb334397b27e9ba98429c73144c8ae0a35b513420826aebbc24b5a74b19988857274dfa72afea69b714
|
|
@@ -1,9 +1,18 @@
|
|
|
1
1
|
name: Valgrind + GC.compact
|
|
2
2
|
|
|
3
3
|
on:
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
4
|
+
# Valgrind memcheck ALSO runs on push to main: it is the only check without a
|
|
5
|
+
# frequency threshold (any "definitely lost" / uninitialised-value use fails,
|
|
6
|
+
# unlike the PR-level macOS leak gate, which only flags stacks repeated >=30x),
|
|
7
|
+
# so a leak on a rarely-hit error path slips past the PR gates and would
|
|
8
|
+
# otherwise surface only on the next nightly. Running it post-merge catches such
|
|
9
|
+
# regressions within ~30 min without adding ~20 min to every PR. (It is gated to
|
|
10
|
+
# main only, not pull_request, to keep PR latency low.)
|
|
11
|
+
#
|
|
12
|
+
# The GC.stress job stays nightly-only (see its `if:` below): it is heavy and
|
|
13
|
+
# checks structural properties that do not vary by day-to-day churn.
|
|
14
|
+
push:
|
|
15
|
+
branches: [main, master]
|
|
7
16
|
schedule:
|
|
8
17
|
- cron: "0 2 * * *"
|
|
9
18
|
workflow_dispatch:
|
|
@@ -61,32 +70,44 @@ jobs:
|
|
|
61
70
|
- name: Run spec suite under Valgrind (ruby_memcheck)
|
|
62
71
|
run: bundle exec rake spec:valgrind
|
|
63
72
|
|
|
64
|
-
# GC.auto_compact + GC.stress
|
|
65
|
-
# tests the borrowed-pointer discipline under the condition that
|
|
66
|
-
# actually move (compaction) and that every allocation triggers a
|
|
67
|
-
# cycle (stress). Failures here are typically use-after-move or stale
|
|
73
|
+
# GC.auto_compact + GC.stress over the GC-sensitive examples. This
|
|
74
|
+
# structurally tests the borrowed-pointer discipline under the condition that
|
|
75
|
+
# Ruby Strings actually move (compaction) and that every allocation triggers a
|
|
76
|
+
# full GC cycle (stress). Failures here are typically use-after-move or stale
|
|
68
77
|
# pointer bugs in the C extension or bridge layer.
|
|
69
78
|
#
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
#
|
|
73
|
-
#
|
|
74
|
-
#
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
#
|
|
79
|
+
# Scope: only the examples tagged `:gc_compact` (the `memory safety` blocks in
|
|
80
|
+
# css/xpath/serialize/mutation/source_location/xpath_handler/api_compat2 +
|
|
81
|
+
# attribute's lazy-index example). Those are the examples written to exercise
|
|
82
|
+
# the borrowed-pointer paths. `GC_COMPACT_STRESS=1` makes spec_helper set
|
|
83
|
+
# `GC.auto_compact = true` process-wide and wrap every example in `GC.stress`,
|
|
84
|
+
# so each allocation inside a tagged example triggers a *compacting* GC - the
|
|
85
|
+
# strongest form of the use-after-move test. The high-volume churn loops
|
|
86
|
+
# (parse/drop cycles) scale their iteration count down under stress
|
|
87
|
+
# (`gc_churn_iters` / `GC_COMPACT_ITERS`) because each stressed iteration is
|
|
88
|
+
# orders of magnitude heavier; `GC_COMPACT_ITERS` below tunes the total runtime
|
|
89
|
+
# (~6-9 min on CI at 200). An earlier version forced GC.stress onto the
|
|
90
|
+
# *entire* suite (~800 examples): it ran 1h40m+ and never finished, while
|
|
91
|
+
# testing borrowed-pointer discipline on hundreds of examples that have none.
|
|
92
|
+
# The rest of the suite still runs in ci.yml.
|
|
93
|
+
#
|
|
94
|
+
# THREADING is deliberately OFF here. The :threading suite is 8 threads x tens
|
|
95
|
+
# of iterations; it runs in ci.yml and its GC-sensitive examples opt into
|
|
96
|
+
# GC.stress themselves, so cross-thread interactions are covered there.
|
|
78
97
|
gc-compact-stress:
|
|
79
|
-
#
|
|
80
|
-
|
|
98
|
+
# Nightly / on-demand only - not on push (the valgrind job is the post-merge
|
|
99
|
+
# gate; GC.stress is heavy and structural, so it does not need per-push runs).
|
|
100
|
+
if: github.event_name != 'push'
|
|
81
101
|
name: GC.auto_compact + GC.stress (Ruby ${{ matrix.ruby }})
|
|
82
102
|
runs-on: ubuntu-latest
|
|
83
|
-
timeout-minutes:
|
|
103
|
+
timeout-minutes: 30
|
|
84
104
|
env:
|
|
85
|
-
|
|
86
|
-
#
|
|
87
|
-
#
|
|
88
|
-
|
|
89
|
-
|
|
105
|
+
GC_COMPACT_STRESS: "1"
|
|
106
|
+
# Per-iteration cost under per-allocation compacting GC is ~1000x normal, so
|
|
107
|
+
# the churn loops run this many iterations (vs their normal 200-1000). Tunes
|
|
108
|
+
# the job's runtime; raise for more coverage, lower if it approaches the
|
|
109
|
+
# timeout.
|
|
110
|
+
GC_COMPACT_ITERS: "200"
|
|
90
111
|
strategy:
|
|
91
112
|
fail-fast: false
|
|
92
113
|
matrix:
|
|
@@ -110,26 +131,8 @@ jobs:
|
|
|
110
131
|
- name: Compile the extension
|
|
111
132
|
run: bundle exec rake compile
|
|
112
133
|
|
|
113
|
-
#
|
|
114
|
-
#
|
|
115
|
-
#
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
# job), while loading/collection runs at normal speed.
|
|
119
|
-
- name: Run spec suite under GC.auto_compact + GC.stress
|
|
120
|
-
run: |
|
|
121
|
-
bundle exec ruby -Ilib -e '
|
|
122
|
-
GC.auto_compact = true
|
|
123
|
-
require "rspec/core"
|
|
124
|
-
RSpec.configure do |c|
|
|
125
|
-
c.around(:each) do |example|
|
|
126
|
-
GC.stress = true
|
|
127
|
-
begin
|
|
128
|
-
example.run
|
|
129
|
-
ensure
|
|
130
|
-
GC.stress = false
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
end
|
|
134
|
-
exit RSpec::Core::Runner.run(ARGV)
|
|
135
|
-
' spec
|
|
134
|
+
# GC_COMPACT_STRESS=1 (set in env above) makes spec_helper enable
|
|
135
|
+
# auto_compact globally and wrap each example in GC.stress; --tag gc_compact
|
|
136
|
+
# limits the run to the borrowed-pointer examples.
|
|
137
|
+
- name: Run GC-sensitive examples under GC.auto_compact + GC.stress
|
|
138
|
+
run: bundle exec rspec --tag gc_compact spec
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,72 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.5.0] - 2026-06-14
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
* Use-after-free when an XPath custom-function handler mutated the same
|
|
13
|
+
`XPathContext` (`register_*` / `node=`) mid-`evaluate`: such re-entrant context
|
|
14
|
+
mutation is now refused instead of invalidating the running evaluation's state.
|
|
15
|
+
|
|
16
|
+
* `Node#name=` now invalidates the element-name index, so a later `//tag` query
|
|
17
|
+
reflects the rename instead of seeing a stale bucket.
|
|
18
|
+
|
|
19
|
+
* XML processing-instruction targets now follow XML 1.0 §2.6: a PITarget is a
|
|
20
|
+
`Name`, not an NCName, so a colon is permitted (`<?a:b ...?>` parses, and
|
|
21
|
+
`create_processing_instruction("a:b", ...)` succeeds). Only the reserved `xml`
|
|
22
|
+
(any case) is still rejected. Previously a colon in a PI target was rejected as
|
|
23
|
+
not-well-formed, which was stricter than the spec (a PI target is not subject to
|
|
24
|
+
namespace processing).
|
|
25
|
+
|
|
26
|
+
* Memory leaks of the internal XPath evaluation context on error / edge paths: a
|
|
27
|
+
`Makiri::XML` `#css` / `#xpath` / `#at_xpath` whose selector or expression failed
|
|
28
|
+
the text-input contract leaked the context (it is now verified BEFORE the context
|
|
29
|
+
is allocated), and a context could leak if building the Ruby result raised (it is
|
|
30
|
+
now freed before conversion).
|
|
31
|
+
|
|
32
|
+
### Added
|
|
33
|
+
|
|
34
|
+
* `ProcessingInstruction#target` on the XML node (the PI's target name).
|
|
35
|
+
|
|
36
|
+
* Cross-kind `Document#import_node(node, deep = false)`. `import_node` now
|
|
37
|
+
translates a subtree across representations: `Makiri::XML::Document#import_node`
|
|
38
|
+
(newly added) imports an HTML (Lexbor) node by translating it to the XML node
|
|
39
|
+
representation, and `Makiri::HTML::Document#import_node` likewise translates an
|
|
40
|
+
XML node to HTML. Same-representation imports keep working (HTML to HTML via
|
|
41
|
+
Lexbor, XML to XML via the arena deep/shallow copy). The result is a detached
|
|
42
|
+
copy owned by the target document; the source is untouched. Elements (with
|
|
43
|
+
attributes), text, comment, and processing-instruction nodes translate both
|
|
44
|
+
ways, and an HTML `<template>`'s contents (which HTML keeps in a separate
|
|
45
|
+
fragment) are carried across rather than silently dropped; an XML CDATA section
|
|
46
|
+
has no HTML counterpart, so translating one into an HTML document fails closed
|
|
47
|
+
(`Makiri::Error`). Namespaces are preserved across the translation: HTML->XML
|
|
48
|
+
synthesizes the xmlns declarations needed to reproduce each node's namespace
|
|
49
|
+
(so e.g. an inline `<svg>` stays in the SVG namespace and HTML elements in the
|
|
50
|
+
XHTML namespace), and XML->HTML maps the namespace URI back to a Lexbor
|
|
51
|
+
namespace id, interning any URI (not only the ones Lexbor knows by default) so
|
|
52
|
+
custom namespaces survive too. An HTML-namespaced `<template>`'s content is
|
|
53
|
+
placed in its content fragment (HTMLTemplateElement.content), like a parsed
|
|
54
|
+
template. The other node-argument mutators
|
|
55
|
+
(`add_child`/`before`/`after`/`replace`/`fragment`) still reject a foreign-kind
|
|
56
|
+
node; `import_node` is the one sanctioned crossing point.
|
|
57
|
+
|
|
58
|
+
* `set_attribute_ns(namespace, qualified_name, value)` and
|
|
59
|
+
`remove_attribute_ns(namespace, local_name)` on `Makiri::XML` elements - the DOM
|
|
60
|
+
setAttributeNS / removeAttributeNS, keyed on the (explicit namespace, local name)
|
|
61
|
+
pair so two attributes with the same qualified name in different namespaces
|
|
62
|
+
coexist (a null/"" namespace is the null namespace).
|
|
63
|
+
|
|
64
|
+
* `Makiri::Lexbor::CSS.parse_stylesheet(text)`, a thin binding over Lexbor's
|
|
65
|
+
CSS stylesheet parser that returns the parsed rules as plain Ruby primitives
|
|
66
|
+
(`{type: :style, selectors: [{text:, specificity: [a,b,c]}, ...],
|
|
67
|
+
declarations: [{name:, value:, important:}, ...]}` and nested
|
|
68
|
+
`{type: :media, condition:, rules: [...]}`, in source order). Selector
|
|
69
|
+
specificity and value normalization come from Lexbor; `css-syntax-3` error
|
|
70
|
+
recovery means a broken stylesheet yields its valid rules instead of raising.
|
|
71
|
+
Hosts the new `Makiri::Lexbor::*` namespace (the unabstracted lexbor-native
|
|
72
|
+
surface, distinct from the Nokogiri-compatible `Makiri::*`).
|
|
73
|
+
|
|
8
74
|
## [0.4.0] - 2026-06-12
|
|
9
75
|
|
|
10
76
|
### Added
|
|
@@ -296,7 +362,8 @@ libxml2 / libxslt dependency at any layer**.
|
|
|
296
362
|
domxpath, CSS differential vs `Nokogiri::HTML5`). GitHub Actions CI across
|
|
297
363
|
Ruby 3.2–4.0 × Ubuntu/macOS plus a sanitizer job.
|
|
298
364
|
|
|
299
|
-
[Unreleased]: https://github.com/takahashim/makiri/compare/v0.
|
|
365
|
+
[Unreleased]: https://github.com/takahashim/makiri/compare/v0.5.0...HEAD
|
|
366
|
+
[0.5.0]: https://github.com/takahashim/makiri/compare/v0.4.0...v0.5.0
|
|
300
367
|
[0.4.0]: https://github.com/takahashim/makiri/compare/v0.3.0...v0.4.0
|
|
301
368
|
[0.3.0]: https://github.com/takahashim/makiri/compare/v0.2.0...v0.3.0
|
|
302
369
|
[0.2.0]: https://github.com/takahashim/makiri/compare/v0.1.0...v0.2.0
|
data/README.md
CHANGED
|
@@ -141,6 +141,14 @@ XML subtrees can be built with `Document#create_element` and related node factor
|
|
|
141
141
|
then inserted with `#add_child`, `#before`, `#after`, or `#replace`;
|
|
142
142
|
namespaces are resolved at insertion time, and cross-document nodes are deep-copied.
|
|
143
143
|
|
|
144
|
+
`Document#import_node(node, deep = false)` brings a node into a document as a
|
|
145
|
+
detached copy, and works **across representations**: importing a `Makiri::HTML`
|
|
146
|
+
node into a `Makiri::XML::Document` (or vice versa) translates the subtree between
|
|
147
|
+
the two node representations, preserving namespaces (e.g. an inline `<svg>` keeps
|
|
148
|
+
the SVG namespace, HTML elements the XHTML namespace; custom namespaces are
|
|
149
|
+
preserved across both directions). An XML CDATA section has no HTML counterpart,
|
|
150
|
+
so importing one into an HTML document raises.
|
|
151
|
+
|
|
144
152
|
```ruby
|
|
145
153
|
doc = Makiri::XML(%(<feed xmlns="urn:a" xmlns:dc="urn:dc"/>))
|
|
146
154
|
entry = doc.create_element("entry")
|
|
@@ -226,6 +234,12 @@ Detailed, test-backed notes live in `spec/conformance/README.md`.
|
|
|
226
234
|
markup string straight to `#add_child` is unsupported (parse it into a fragment
|
|
227
235
|
first). (`#to_xml` serialization is supported; HTML serialization - `to_html`
|
|
228
236
|
/ `inner_html` / `outer_html` - is not.)
|
|
237
|
+
* A colon in a processing-instruction target is well-formed (`<?a:b ...?>` parses).
|
|
238
|
+
* XML 1.0 §2.6: a `PITarget` is a `Name`, not an NCName, and Namespaces in XML
|
|
239
|
+
1.0's normative conformance section constrains only element/attribute names
|
|
240
|
+
(QNames), never PI targets. Nokogiri/libxml2 rejects it (`colons are forbidden
|
|
241
|
+
from PI names`); Makiri follows the normative text. Only the reserved `xml`
|
|
242
|
+
(any case) target is rejected.
|
|
229
243
|
* Otherwise the parsed tree is byte-identical to `Nokogiri::XML`'s (verified by
|
|
230
244
|
the property-based differential), including namespaces, prolog/epilog comments
|
|
231
245
|
and PIs, and adjacent-CDATA coalescing.
|
data/Rakefile
CHANGED
|
@@ -59,6 +59,19 @@ task default: %i[compile spec]
|
|
|
59
59
|
# *our* code: a real uninit/invalid access in mkr_*/Lexbor still has a makiri frame
|
|
60
60
|
# and is still reported.
|
|
61
61
|
#
|
|
62
|
+
# BUT the binary-touch filter is too coarse for one residual class: when a GC
|
|
63
|
+
# cycle fires *inside* one of our allocations (or marks through our mark
|
|
64
|
+
# callback), CRuby's conservative collector legitimately reads uninitialised
|
|
65
|
+
# words (machine-stack scan reading stale frames, incremental mark/sweep reading
|
|
66
|
+
# not-yet-written RVALUE flags) while a makiri frame sits on the stack - so ~190
|
|
67
|
+
# of these pure-Ruby-GC false positives pass the filter. The gem's bundled
|
|
68
|
+
# ruby.supp only covers `each_location*` under Addr8, not the Cond/Value8 reads
|
|
69
|
+
# we hit. `suppressions/ruby.supp` (auto-loaded by ruby_memcheck: it globs
|
|
70
|
+
# `<dir>/<ruby-version>.supp`, and the bare `ruby.supp` matches every version)
|
|
71
|
+
# suppresses exactly those GC-driver-anchored uninit reads, plus the VM
|
|
72
|
+
# method-cache id_table the interpreter never frees before exit. A real uninit
|
|
73
|
+
# read in our code does not descend from a GC driver, so it still fails.
|
|
74
|
+
#
|
|
62
75
|
# Guarded: ruby_memcheck lives in the optional :valgrind bundler group, so a
|
|
63
76
|
# normal `bundle exec rake` (without that group) must not fail to load.
|
|
64
77
|
begin
|
data/lib/makiri/3.2/makiri.so
CHANGED
|
Binary file
|
data/lib/makiri/3.3/makiri.so
CHANGED
|
Binary file
|
data/lib/makiri/3.4/makiri.so
CHANGED
|
Binary file
|
data/lib/makiri/4.0/makiri.so
CHANGED
|
Binary file
|
data/lib/makiri/html/document.rb
CHANGED
|
@@ -55,12 +55,7 @@ module Makiri
|
|
|
55
55
|
# @param text [String]
|
|
56
56
|
# @return [String]
|
|
57
57
|
def title=(text)
|
|
58
|
-
|
|
59
|
-
unless t
|
|
60
|
-
t = Element.new("title", self)
|
|
61
|
-
(head || root).add_child(t)
|
|
62
|
-
end
|
|
63
|
-
t.content = text
|
|
58
|
+
ensure_in_head("title", "title").content = text
|
|
64
59
|
text
|
|
65
60
|
end
|
|
66
61
|
|
|
@@ -93,14 +88,18 @@ module Makiri
|
|
|
93
88
|
# @param value [String]
|
|
94
89
|
# @return [String]
|
|
95
90
|
def meta_encoding=(value)
|
|
96
|
-
|
|
97
|
-
unless meta
|
|
98
|
-
meta = Element.new("meta", self)
|
|
99
|
-
(head || root).add_child(meta)
|
|
100
|
-
end
|
|
101
|
-
meta["charset"] = value
|
|
91
|
+
ensure_in_head("meta[charset]", "meta")["charset"] = value
|
|
102
92
|
value
|
|
103
93
|
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
# The first node matching +css_query+, or a freshly created <+tag+>
|
|
98
|
+
# appended to <head> (or the root when the document has no head). Shared by
|
|
99
|
+
# #title= and #meta_encoding=, which then set content / attributes on it.
|
|
100
|
+
def ensure_in_head(css_query, tag)
|
|
101
|
+
at_css(css_query) || Element.new(tag, self).tap { |el| (head || root).add_child(el) }
|
|
102
|
+
end
|
|
104
103
|
end
|
|
105
104
|
end
|
|
106
105
|
end
|
|
@@ -10,7 +10,6 @@ module Makiri
|
|
|
10
10
|
alias_method :attr, :[]
|
|
11
11
|
alias_method :get_attribute, :[]
|
|
12
12
|
alias_method :has_attribute?, :key?
|
|
13
|
-
alias_method :remove_attribute, :delete
|
|
14
13
|
alias_method :node_name, :name
|
|
15
14
|
alias_method :node_name=, :name=
|
|
16
15
|
alias_method :type, :node_type
|
data/lib/makiri/node_set.rb
CHANGED
|
@@ -48,17 +48,13 @@ module Makiri
|
|
|
48
48
|
# Run a CSS selector against every node and return the unioned matches.
|
|
49
49
|
# @return [Makiri::NodeSet]
|
|
50
50
|
def css(selector)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
map { |node| node.css(selector) }.reduce(:|)
|
|
51
|
+
union_query(:css, selector)
|
|
54
52
|
end
|
|
55
53
|
|
|
56
54
|
# Run an XPath expression against every node and union the node-set results.
|
|
57
55
|
# @return [Makiri::NodeSet]
|
|
58
56
|
def xpath(expr)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
map { |node| node.xpath(expr) }.reduce(:|)
|
|
57
|
+
union_query(:xpath, expr)
|
|
62
58
|
end
|
|
63
59
|
|
|
64
60
|
# First node matching the CSS selector across the set, or nil.
|
|
@@ -77,9 +73,7 @@ module Makiri
|
|
|
77
73
|
# CSS- or XPath-detecting query against every node (see {Node#search}).
|
|
78
74
|
# @return [Makiri::NodeSet]
|
|
79
75
|
def search(path)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
map { |node| node.search(path) }.reduce(:|)
|
|
76
|
+
union_query(:search, path)
|
|
83
77
|
end
|
|
84
78
|
|
|
85
79
|
# Remove the named attribute from every node in the set.
|
|
@@ -109,5 +103,16 @@ module Makiri
|
|
|
109
103
|
def inspect
|
|
110
104
|
"#<#{self.class.name} length=#{length}>"
|
|
111
105
|
end
|
|
106
|
+
|
|
107
|
+
private
|
|
108
|
+
|
|
109
|
+
# Run +method+(+arg+) on every node in the set and union the per-node
|
|
110
|
+
# results. An empty set returns self unchanged, so it stays a NodeSet (the
|
|
111
|
+
# shared shape behind #css / #xpath / #search).
|
|
112
|
+
def union_query(method, arg)
|
|
113
|
+
return self if empty?
|
|
114
|
+
|
|
115
|
+
map { |node| node.public_send(method, arg) }.reduce(:|)
|
|
116
|
+
end
|
|
112
117
|
end
|
|
113
118
|
end
|
|
@@ -14,5 +14,13 @@ module Makiri
|
|
|
14
14
|
def self.new(document, target, content)
|
|
15
15
|
Makiri::Document.coerce!(document).create_processing_instruction(target, content)
|
|
16
16
|
end
|
|
17
|
+
|
|
18
|
+
# DOM `ProcessingInstruction#target` — the PI's target name. Defined once on
|
|
19
|
+
# the shared base so both backends expose it: the XML node reaches it here
|
|
20
|
+
# (its target IS its node name), while the HTML node overrides it with a
|
|
21
|
+
# native implementation earlier in the ancestor chain.
|
|
22
|
+
def target
|
|
23
|
+
name
|
|
24
|
+
end
|
|
17
25
|
end
|
|
18
26
|
end
|
data/lib/makiri/version.rb
CHANGED
data/lib/makiri/xml/builder.rb
CHANGED
|
@@ -30,7 +30,7 @@ module Makiri
|
|
|
30
30
|
#
|
|
31
31
|
# Tag names that collide with a Ruby/Kernel method (or with one of this
|
|
32
32
|
# builder's own helpers below - +text+, +cdata+, +comment+, +doc+, +parent+,
|
|
33
|
-
# +to_xml+, +to_s+) must be written with a trailing underscore, which is
|
|
33
|
+
# +to_xml+, +to_s+, +descend+) must be written with a trailing underscore, which is
|
|
34
34
|
# stripped: +xml.id_("9")+ produces +<id>9</id>+. This matches Nokogiri.
|
|
35
35
|
#
|
|
36
36
|
# A namespace prefix is selected for the next element with +[]+:
|
|
@@ -137,6 +137,22 @@ module Makiri
|
|
|
137
137
|
true
|
|
138
138
|
end
|
|
139
139
|
|
|
140
|
+
# Run +block+ with +node+ as the current parent, restoring the previous
|
|
141
|
+
# parent afterward (even if the block raises) and returning the block's
|
|
142
|
+
# value. The single place +@parent+ is pushed/popped - shared by #insert and
|
|
143
|
+
# by {NodeBuilder}'s nested-block chain, so neither manipulates the parent
|
|
144
|
+
# state directly. Public so NodeBuilder (a separate class) can reuse it
|
|
145
|
+
# without reaching into a private method.
|
|
146
|
+
def descend(node, &block)
|
|
147
|
+
previous = @parent
|
|
148
|
+
@parent = node
|
|
149
|
+
begin
|
|
150
|
+
run(&block)
|
|
151
|
+
ensure
|
|
152
|
+
@parent = previous
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
140
156
|
private
|
|
141
157
|
|
|
142
158
|
# Translate the Nokogiri-style trailing arguments (a Hash is attributes,
|
|
@@ -177,21 +193,6 @@ module Makiri
|
|
|
177
193
|
NodeBuilder.new(node, self)
|
|
178
194
|
end
|
|
179
195
|
|
|
180
|
-
# Run +block+ with +node+ as the current parent, restoring the previous
|
|
181
|
-
# parent afterward (even if the block raises) and returning the block's
|
|
182
|
-
# value. The single place the parent is pushed/popped - shared by #insert and
|
|
183
|
-
# NodeBuilder's nested-block chain, so neither manipulates the parent state
|
|
184
|
-
# directly.
|
|
185
|
-
def descend(node, &block)
|
|
186
|
-
previous = @parent
|
|
187
|
-
@parent = node
|
|
188
|
-
begin
|
|
189
|
-
run(&block)
|
|
190
|
-
ensure
|
|
191
|
-
@parent = previous
|
|
192
|
-
end
|
|
193
|
-
end
|
|
194
|
-
|
|
195
196
|
# Run a DSL block, choosing instance_eval vs yield once (from the first
|
|
196
197
|
# block seen), the same way Nokogiri does, so the form is consistent
|
|
197
198
|
# throughout a build.
|
|
@@ -239,17 +240,15 @@ module Makiri
|
|
|
239
240
|
when /\A(.*)=\z/
|
|
240
241
|
@node[Regexp.last_match(1)] = args.first
|
|
241
242
|
else
|
|
242
|
-
|
|
243
|
+
append_attr("class", method.to_s)
|
|
243
244
|
@node.content = args.first if args.first
|
|
244
245
|
end
|
|
245
246
|
|
|
246
|
-
opts.each
|
|
247
|
-
@node[key.to_s] = ((@node[key.to_s] || "").split(/\s/) + [value]).join(" ")
|
|
248
|
-
end
|
|
247
|
+
opts.each { |key, value| append_attr(key.to_s, value) }
|
|
249
248
|
|
|
250
249
|
# Descend into this node for a nested block via the builder's own parent
|
|
251
250
|
# stack (with its ensure-based restore), rather than re-rooting it by hand.
|
|
252
|
-
return @doc_builder.
|
|
251
|
+
return @doc_builder.descend(@node, &block) if block
|
|
253
252
|
|
|
254
253
|
self
|
|
255
254
|
end
|
|
@@ -257,6 +256,15 @@ module Makiri
|
|
|
257
256
|
def respond_to_missing?(_name, _include_private = false)
|
|
258
257
|
true
|
|
259
258
|
end
|
|
259
|
+
|
|
260
|
+
private
|
|
261
|
+
|
|
262
|
+
# Append +value+ as a space-separated token to the +key+ attribute,
|
|
263
|
+
# preserving any existing tokens. The shared idiom behind the terse
|
|
264
|
+
# class-append and the trailing-Hash attribute shortcut.
|
|
265
|
+
def append_attr(key, value)
|
|
266
|
+
@node[key] = ((@node[key] || "").split(/\s/) + [value]).join(" ")
|
|
267
|
+
end
|
|
260
268
|
end
|
|
261
269
|
end
|
|
262
270
|
end
|
data/lib/makiri/xpath_context.rb
CHANGED
|
@@ -14,10 +14,18 @@ module Makiri
|
|
|
14
14
|
# The bulk of the implementation lives in C (see
|
|
15
15
|
# ext/makiri/glue/ruby_xpath.c and ext/makiri/xpath/).
|
|
16
16
|
class XPathContext
|
|
17
|
-
# +#evaluate+ is defined in C
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
17
|
+
# +#evaluate+ is defined in C and runs under the GVL (XPath never releases
|
|
18
|
+
# it), so it cannot corrupt memory under concurrency. Two distinct hazards,
|
|
19
|
+
# handled differently:
|
|
20
|
+
#
|
|
21
|
+
# * Cross-thread: the GVL serialises all of +evaluate+ / +register_*+ /
|
|
22
|
+
# +node=+ and any tree mutation of the queried document, so threads can
|
|
23
|
+
# never run two of them at once.
|
|
24
|
+
# * Same-thread re-entrancy: a custom function handler runs mid-evaluate and
|
|
25
|
+
# could call back into this same context. A re-entrant +register_namespace+
|
|
26
|
+
# / +register_variable+ / +node=+ is refused (raises) while an evaluate is
|
|
27
|
+
# in progress, since it would free/swap state the suspended evaluator still
|
|
28
|
+
# borrows; a nested +evaluate+ is allowed.
|
|
21
29
|
|
|
22
30
|
# Nokogiri-compatible name for {#register_namespace}.
|
|
23
31
|
alias register_ns register_namespace
|
data/script/check_c_safety.rb
CHANGED
|
@@ -33,20 +33,20 @@ ignore_paths:
|
|
|
33
33
|
rule: html_doc_unwrap_boundary
|
|
34
34
|
reason: the HTML branch of mkr_xpath_context_for, entered only after the XML kind returns early.
|
|
35
35
|
# mkr_parsed_html_doc (asserts kind == HTML)
|
|
36
|
-
- path: ext/makiri/
|
|
36
|
+
- path: ext/makiri/dom_adapter/compat.h
|
|
37
37
|
rule: parsed_html_doc_boundary
|
|
38
38
|
reason: declaration of the HTML parsed-document accessor.
|
|
39
|
-
- path: ext/makiri/
|
|
39
|
+
- path: ext/makiri/dom_adapter/post_parse.c
|
|
40
40
|
rule: parsed_html_doc_boundary
|
|
41
41
|
reason: definition (the kind assert lives here) + HTML post-parse pipeline.
|
|
42
42
|
- path: ext/makiri/glue/ruby_doc.c
|
|
43
43
|
rule: parsed_html_doc_boundary
|
|
44
44
|
reason: mkr_html_doc_unwrap is defined here over the HTML parsed document.
|
|
45
45
|
# mkr_parsed_xml_doc (XML arena accessor) — kept out of the pure-HTML glue files
|
|
46
|
-
- path: ext/makiri/
|
|
46
|
+
- path: ext/makiri/dom_adapter/compat.h
|
|
47
47
|
rule: parsed_xml_doc_boundary
|
|
48
48
|
reason: declaration of the XML parsed-document accessor.
|
|
49
|
-
- path: ext/makiri/
|
|
49
|
+
- path: ext/makiri/dom_adapter/post_parse.c
|
|
50
50
|
rule: parsed_xml_doc_boundary
|
|
51
51
|
reason: definition + XML document wrapping/teardown.
|
|
52
52
|
- path: ext/makiri/glue/ruby_xml.c
|
|
@@ -65,7 +65,7 @@ ignore_paths:
|
|
|
65
65
|
rule: parsed_xml_doc_boundary
|
|
66
66
|
reason: the kind-aware mkr_node_raw resolves an XML Document only after a MKR_DOC_XML check.
|
|
67
67
|
# owner_document (HTML-only lxb_dom_node_t field)
|
|
68
|
-
- path: ext/makiri/
|
|
68
|
+
- path: ext/makiri/dom_adapter/post_parse.c
|
|
69
69
|
rule: owner_document_boundary
|
|
70
70
|
reason: mkr_lxb_document_bytes resolves a Lexbor node's owner document to size its mraw pools (HTML-only; the XML serializer uses arena_bytes instead).
|
|
71
71
|
- path: ext/makiri/glue/ruby_html_node.c
|
|
@@ -80,6 +80,9 @@ ignore_paths:
|
|
|
80
80
|
- path: ext/makiri/xpath/mkr_xpath_node_access_html.h
|
|
81
81
|
rule: owner_document_boundary
|
|
82
82
|
reason: the HTML monomorphization of the engine's node-access layer.
|
|
83
|
+
- path: ext/makiri/dom_adapter/cross_import.c
|
|
84
|
+
rule: owner_document_boundary
|
|
85
|
+
reason: mkr_html_ns_uri reads the SOURCE HTML node's owner document ns table to resolve its namespace id to a URI; reached only on the HTML (lxb) side of HTML->XML translation, never on an XML node (no cross-kind document comparison).
|
|
83
86
|
# mkr_node_raw (kind-agnostic void* raw pointer; never dereferenced as a typed node)
|
|
84
87
|
- path: ext/makiri/glue/glue.h
|
|
85
88
|
rule: node_raw_boundary
|
data/script/leaks_harness.rb
CHANGED
|
@@ -15,6 +15,8 @@ ITERATIONS = Integer(ENV.fetch("LEAKS_ITERATIONS", "120"))
|
|
|
15
15
|
|
|
16
16
|
HTML = "<div id=m class='a b'><ul><li class=item>x</li><li>y<svg><path/></svg></li></ul><p>t&</p></div>"
|
|
17
17
|
XML = %(<r xmlns:p="urn:p" xmlns="urn:d"><a id="1">t</a><p:b/><!--c--><![CDATA[z]]></r>)
|
|
18
|
+
CSS = "div.a, #b > span { color: red !important; --v: 1px }\n" \
|
|
19
|
+
"@media (min-width: 600px) { .x { opacity: 0 } }\n@font-face { font-family: y }"
|
|
18
20
|
|
|
19
21
|
handler = Class.new { def my_fn(nodes) = nodes.length.to_s }.new
|
|
20
22
|
|
|
@@ -57,6 +59,11 @@ ITERATIONS.times do |i|
|
|
|
57
59
|
ctx.register_namespace("d", "urn:d"); ctx.register_variable("v", "1")
|
|
58
60
|
ctx.evaluate("//d:a[@id=$v]"); ctx.evaluate("//d:a[@id=$v]")
|
|
59
61
|
begin ctx.evaluate("//(") rescue Makiri::XPath::SyntaxError; end
|
|
62
|
+
|
|
63
|
+
# --- Lexbor CSS stylesheet parser (per-call parser+stylesheet lifetime,
|
|
64
|
+
# freed under rb_ensure) including the NUL-reject raise path ---
|
|
65
|
+
Makiri::Lexbor::CSS.parse_stylesheet(CSS)
|
|
66
|
+
begin Makiri::Lexbor::CSS.parse_stylesheet("a{}\0x") rescue Makiri::Error; end
|
|
60
67
|
end
|
|
61
68
|
|
|
62
69
|
GC.start
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Valgrind suppressions for `rake spec:valgrind` (ruby_memcheck).
|
|
2
|
+
#
|
|
3
|
+
# ruby_memcheck auto-loads every `<suppressions>/<ruby-version>.supp` it finds,
|
|
4
|
+
# and `ruby.supp` (the bare RUBY_ENGINE name) matches every Ruby version. These
|
|
5
|
+
# entries are ADDED to the ones the gem already bundles.
|
|
6
|
+
#
|
|
7
|
+
# Why this file exists: our job runs with `--undef-value-errors=yes` and
|
|
8
|
+
# `filter_all_errors: true`, which surfaces *every* uninitialised-value report
|
|
9
|
+
# whose stack touches the makiri binary. CRuby's garbage collector legitimately
|
|
10
|
+
# reads uninitialised memory - the conservative machine-stack scan reads stale
|
|
11
|
+
# stack words, and incremental mark/sweep reads not-yet-written RVALUE flags -
|
|
12
|
+
# and when a GC cycle fires inside one of our allocations (or marks through our
|
|
13
|
+
# mark callback) the makiri frame is on the stack, so the binary-touch filter
|
|
14
|
+
# keeps the report. The gem's bundled ruby.supp only covers `each_location*`
|
|
15
|
+
# under `Memcheck:Addr8`, not the `Cond`/`Value8` reads we see, so ~190 of these
|
|
16
|
+
# Ruby-GC false positives slip through and fail the run.
|
|
17
|
+
#
|
|
18
|
+
# Each suppression below is anchored on a CRuby GC driver function (gc marks /
|
|
19
|
+
# sweep / conservative root scan), with leading `...` absorbing the GC-internal
|
|
20
|
+
# error-origin frames above it. A real uninitialised read in our own code does
|
|
21
|
+
# NOT descend from these GC drivers, so it still fails the gate. We do NOT
|
|
22
|
+
# suppress Addr/Overlap/Param/etc. - only the GC uninit reads and a couple of
|
|
23
|
+
# Ruby-internal leaks the VM never frees before exit.
|
|
24
|
+
|
|
25
|
+
# ---- conservative GC: uninitialised-value reads (Memcheck:Cond) ----
|
|
26
|
+
{
|
|
27
|
+
ruby-gc-cond-garbage_collect
|
|
28
|
+
Memcheck:Cond
|
|
29
|
+
...
|
|
30
|
+
fun:garbage_collect
|
|
31
|
+
}
|
|
32
|
+
{
|
|
33
|
+
ruby-gc-cond-gc_start
|
|
34
|
+
Memcheck:Cond
|
|
35
|
+
...
|
|
36
|
+
fun:gc_start
|
|
37
|
+
}
|
|
38
|
+
{
|
|
39
|
+
ruby-gc-cond-gc_marks
|
|
40
|
+
Memcheck:Cond
|
|
41
|
+
...
|
|
42
|
+
fun:gc_marks*
|
|
43
|
+
}
|
|
44
|
+
{
|
|
45
|
+
ruby-gc-cond-gc_sweep
|
|
46
|
+
Memcheck:Cond
|
|
47
|
+
...
|
|
48
|
+
fun:gc_sweep*
|
|
49
|
+
}
|
|
50
|
+
{
|
|
51
|
+
ruby-gc-cond-rb_gc_impl_mark
|
|
52
|
+
Memcheck:Cond
|
|
53
|
+
...
|
|
54
|
+
fun:rb_gc_impl_mark*
|
|
55
|
+
}
|
|
56
|
+
{
|
|
57
|
+
ruby-gc-cond-machine_context
|
|
58
|
+
Memcheck:Cond
|
|
59
|
+
...
|
|
60
|
+
fun:rb_gc_mark_machine_context
|
|
61
|
+
}
|
|
62
|
+
{
|
|
63
|
+
ruby-gc-cond-mark_roots
|
|
64
|
+
Memcheck:Cond
|
|
65
|
+
...
|
|
66
|
+
fun:mark_roots
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# ---- conservative GC: uninitialised-value reads (Memcheck:Value8) ----
|
|
70
|
+
# `gc_aging` (Ruby's default.c) ages objects during incremental marking and
|
|
71
|
+
# reads the not-yet-written RVALUE flag word - the same false positive as the
|
|
72
|
+
# drivers below, but here it is the ONLY resolved frame (every caller is
|
|
73
|
+
# `<unknown stack frame>`), so the `...`-then-driver anchors miss it. Anchor on
|
|
74
|
+
# the top frame itself (the `.part.0` GCC partial-inline clone is matched by `*`).
|
|
75
|
+
{
|
|
76
|
+
ruby-gc-cond-gc_aging
|
|
77
|
+
Memcheck:Cond
|
|
78
|
+
fun:gc_aging*
|
|
79
|
+
}
|
|
80
|
+
{
|
|
81
|
+
ruby-gc-value8-gc_aging
|
|
82
|
+
Memcheck:Value8
|
|
83
|
+
fun:gc_aging*
|
|
84
|
+
}
|
|
85
|
+
{
|
|
86
|
+
ruby-gc-value8-garbage_collect
|
|
87
|
+
Memcheck:Value8
|
|
88
|
+
...
|
|
89
|
+
fun:garbage_collect
|
|
90
|
+
}
|
|
91
|
+
{
|
|
92
|
+
ruby-gc-value8-gc_start
|
|
93
|
+
Memcheck:Value8
|
|
94
|
+
...
|
|
95
|
+
fun:gc_start
|
|
96
|
+
}
|
|
97
|
+
{
|
|
98
|
+
ruby-gc-value8-gc_marks
|
|
99
|
+
Memcheck:Value8
|
|
100
|
+
...
|
|
101
|
+
fun:gc_marks*
|
|
102
|
+
}
|
|
103
|
+
{
|
|
104
|
+
ruby-gc-value8-gc_sweep
|
|
105
|
+
Memcheck:Value8
|
|
106
|
+
...
|
|
107
|
+
fun:gc_sweep*
|
|
108
|
+
}
|
|
109
|
+
{
|
|
110
|
+
ruby-gc-value8-rb_gc_impl_mark
|
|
111
|
+
Memcheck:Value8
|
|
112
|
+
...
|
|
113
|
+
fun:rb_gc_impl_mark*
|
|
114
|
+
}
|
|
115
|
+
{
|
|
116
|
+
ruby-gc-value8-machine_context
|
|
117
|
+
Memcheck:Value8
|
|
118
|
+
...
|
|
119
|
+
fun:rb_gc_mark_machine_context
|
|
120
|
+
}
|
|
121
|
+
{
|
|
122
|
+
ruby-gc-value8-mark_roots
|
|
123
|
+
Memcheck:Value8
|
|
124
|
+
...
|
|
125
|
+
fun:mark_roots
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# ---- Ruby VM internal caches that are never freed before exit ----
|
|
129
|
+
# The global constant-cache / inline method-cache id_table is allocated lazily
|
|
130
|
+
# on method lookup and lives for the life of the VM, so RUBY_FREE_AT_EXIT still
|
|
131
|
+
# reports it as "definitely lost" whenever the first lookup happened under one
|
|
132
|
+
# of our cfuncs. Not ours to free.
|
|
133
|
+
{
|
|
134
|
+
ruby-vm-method-cache-id_table
|
|
135
|
+
Memcheck:Leak
|
|
136
|
+
...
|
|
137
|
+
fun:rb_id_table_create
|
|
138
|
+
fun:vm_search_cc
|
|
139
|
+
...
|
|
140
|
+
}
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: makiri
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: x86_64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- takahashim
|
|
@@ -109,6 +109,7 @@ files:
|
|
|
109
109
|
- script/check_leaks.rb
|
|
110
110
|
- script/leaks_harness.rb
|
|
111
111
|
- sig/makiri.rbs
|
|
112
|
+
- suppressions/ruby.supp
|
|
112
113
|
homepage: https://github.com/takahashim/makiri
|
|
113
114
|
licenses:
|
|
114
115
|
- Apache-2.0
|