makiri 0.4.0-x86_64-linux → 0.5.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 80f4b2b7459af8f421bdd5ffa526c70aaca8fcd5bdbfca64bd01f240487b2347
4
- data.tar.gz: 28eeacf29507338f372622e73d8921ef99fd390975f9fba666608e9087eab467
3
+ metadata.gz: 64d3fbdbe21dbbfddf413832c97e9f5f32dc020ff0c3f01efb75232fca32085c
4
+ data.tar.gz: 37990c3d0d3032aa088240d76edfcfeb4a4f598d87e770aef7a21a19a7de5446
5
5
  SHA512:
6
- metadata.gz: d8d70b1e3e4e4f94f0d4f3fd34e28677d2b87908a0c7e9b7ff34cef14d5a5dd318d8de473afbc5a10cc3f54f0f30945778f256fc3a303cffc734e02c7b9c6509
7
- data.tar.gz: 2c97c8f1d5deb67e9a37ffd87dbca033c3788507890d6f8be14facf205aa584448cc4c9b12f0ce81989b6dffbdea82631cbf9b7b9de513d05fc5f2e0f8bf566d
6
+ metadata.gz: 933fcc0d10a4923c0c1f1541845a6a46b60dc8091cbe54fcd8edd85e7d17321decff9d080f294997a2c21608ae948557e94cc971514c37f6c72de751fa9d4b23
7
+ data.tar.gz: b3159878da760a16387247bd0e32a6c15e84d85dac066bb334397b27e9ba98429c73144c8ae0a35b513420826aebbc24b5a74b19988857274dfa72afea69b714
@@ -1,9 +1,18 @@
1
1
  name: Valgrind + GC.compact
2
2
 
3
3
  on:
4
- # Nightly: these jobs are heavy (Valgrind is ~10-50x slower, GC.stress is ~10x
5
- # slower) and check structural properties that do not vary by day-to-day code
6
- # churn, so run them on a schedule rather than on every push/PR.
4
+ # Valgrind memcheck ALSO runs on push to main: it is the only check without a
5
+ # frequency threshold (any "definitely lost" / uninitialised-value use fails,
6
+ # unlike the PR-level macOS leak gate, which only flags stacks repeated >=30x),
7
+ # so a leak on a rarely-hit error path slips past the PR gates and would
8
+ # otherwise surface only on the next nightly. Running it post-merge catches such
9
+ # regressions within ~30 min without adding ~20 min to every PR. (It is gated to
10
+ # main only, not pull_request, to keep PR latency low.)
11
+ #
12
+ # The GC.stress job stays nightly-only (see its `if:` below): it is heavy and
13
+ # checks structural properties that do not vary by day-to-day churn.
14
+ push:
15
+ branches: [main, master]
7
16
  schedule:
8
17
  - cron: "0 2 * * *"
9
18
  workflow_dispatch:
@@ -61,32 +70,44 @@ jobs:
61
70
  - name: Run spec suite under Valgrind (ruby_memcheck)
62
71
  run: bundle exec rake spec:valgrind
63
72
 
64
- # GC.auto_compact + GC.stress run of the full spec suite. This structurally
65
- # tests the borrowed-pointer discipline under the condition that Ruby Strings
66
- # actually move (compaction) and that every allocation triggers a full GC
67
- # cycle (stress). Failures here are typically use-after-move or stale
73
+ # GC.auto_compact + GC.stress over the GC-sensitive examples. This
74
+ # structurally tests the borrowed-pointer discipline under the condition that
75
+ # Ruby Strings actually move (compaction) and that every allocation triggers a
76
+ # full GC cycle (stress). Failures here are typically use-after-move or stale
68
77
  # pointer bugs in the C extension or bridge layer.
69
78
  #
70
- # THREADING is deliberately OFF here. The :threading suite (spec/threading_spec.rb)
71
- # is 8 threads x tens of iterations, and forcing the job-level GC.stress onto it
72
- # means a full GC per allocation across every thread - which made this job run
73
- # for 30+ minutes without finishing. It also adds little: that suite already
74
- # runs in ci.yml (ubuntu/3.4), and its GC-sensitive examples opt into GC.stress
75
- # themselves via their own `around` hook, so cross-thread interactions are
76
- # covered there. This job's unique value is the *single-threaded* full suite
77
- # under stress+compaction, which catches use-after-move across every code path.
79
+ # Scope: only the examples tagged `:gc_compact` (the `memory safety` blocks in
80
+ # css/xpath/serialize/mutation/source_location/xpath_handler/api_compat2 +
81
+ # attribute's lazy-index example). Those are the examples written to exercise
82
+ # the borrowed-pointer paths. `GC_COMPACT_STRESS=1` makes spec_helper set
83
+ # `GC.auto_compact = true` process-wide and wrap every example in `GC.stress`,
84
+ # so each allocation inside a tagged example triggers a *compacting* GC - the
85
+ # strongest form of the use-after-move test. The high-volume churn loops
86
+ # (parse/drop cycles) scale their iteration count down under stress
87
+ # (`gc_churn_iters` / `GC_COMPACT_ITERS`) because each stressed iteration is
88
+ # orders of magnitude heavier; `GC_COMPACT_ITERS` below tunes the total runtime
89
+ # (~6-9 min on CI at 200). An earlier version forced GC.stress onto the
90
+ # *entire* suite (~800 examples): it ran 1h40m+ and never finished, while
91
+ # testing borrowed-pointer discipline on hundreds of examples that have none.
92
+ # The rest of the suite still runs in ci.yml.
93
+ #
94
+ # THREADING is deliberately OFF here. The :threading suite is 8 threads x tens
95
+ # of iterations; it runs in ci.yml and its GC-sensitive examples opt into
96
+ # GC.stress themselves, so cross-thread interactions are covered there.
78
97
  gc-compact-stress:
79
- # Temporarily disabled, too long
80
- if: false
98
+ # Nightly / on-demand only - not on push (the valgrind job is the post-merge
99
+ # gate; GC.stress is heavy and structural, so it does not need per-push runs).
100
+ if: github.event_name != 'push'
81
101
  name: GC.auto_compact + GC.stress (Ruby ${{ matrix.ruby }})
82
102
  runs-on: ubuntu-latest
83
- timeout-minutes: 360
103
+ timeout-minutes: 30
84
104
  env:
85
- # As in the Valgrind job: GC.stress (a full GC per allocation) makes the
86
- # 300-iteration PBT sweep run for hours, and these jobs check memory
87
- # discipline rather than the property space, so trim the iteration count.
88
- PBT_COUNT: "15"
89
- CSS_PBT_COUNT: "15"
105
+ GC_COMPACT_STRESS: "1"
106
+ # Per-iteration cost under per-allocation compacting GC is ~1000x normal, so
107
+ # the churn loops run this many iterations (vs their normal 200-1000). Tunes
108
+ # the job's runtime; raise for more coverage, lower if it approaches the
109
+ # timeout.
110
+ GC_COMPACT_ITERS: "200"
90
111
  strategy:
91
112
  fail-fast: false
92
113
  matrix:
@@ -110,26 +131,8 @@ jobs:
110
131
  - name: Compile the extension
111
132
  run: bundle exec rake compile
112
133
 
113
- # GC.stress is scoped to each example via an around hook rather than set
114
- # process-wide: under a global GC.stress, even requiring the 88 spec files
115
- # runs a full GC per allocation, so loading alone took tens of minutes and
116
- # the job never reached the first example. auto_compact stays global so
117
- # objects actually move during those stressed examples (the point of the
118
- # job), while loading/collection runs at normal speed.
119
- - name: Run spec suite under GC.auto_compact + GC.stress
120
- run: |
121
- bundle exec ruby -Ilib -e '
122
- GC.auto_compact = true
123
- require "rspec/core"
124
- RSpec.configure do |c|
125
- c.around(:each) do |example|
126
- GC.stress = true
127
- begin
128
- example.run
129
- ensure
130
- GC.stress = false
131
- end
132
- end
133
- end
134
- exit RSpec::Core::Runner.run(ARGV)
135
- ' spec
134
+ # GC_COMPACT_STRESS=1 (set in env above) makes spec_helper enable
135
+ # auto_compact globally and wrap each example in GC.stress; --tag gc_compact
136
+ # limits the run to the borrowed-pointer examples.
137
+ - name: Run GC-sensitive examples under GC.auto_compact + GC.stress
138
+ run: bundle exec rspec --tag gc_compact spec
data/CHANGELOG.md CHANGED
@@ -5,6 +5,72 @@ All notable changes to this project will be documented in this file.
5
5
  The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.5.0] - 2026-06-14
9
+
10
+ ### Fixed
11
+
12
+ * Use-after-free when an XPath custom-function handler mutated the same
13
+ `XPathContext` (`register_*` / `node=`) mid-`evaluate`: such re-entrant context
14
+ mutation is now refused instead of invalidating the running evaluation's state.
15
+
16
+ * `Node#name=` now invalidates the element-name index, so a later `//tag` query
17
+ reflects the rename instead of seeing a stale bucket.
18
+
19
+ * XML processing-instruction targets now follow XML 1.0 §2.6: a PITarget is a
20
+ `Name`, not an NCName, so a colon is permitted (`<?a:b ...?>` parses, and
21
+ `create_processing_instruction("a:b", ...)` succeeds). Only the reserved `xml`
22
+ (any case) is still rejected. Previously a colon in a PI target was rejected as
23
+ not-well-formed, which was stricter than the spec (a PI target is not subject to
24
+ namespace processing).
25
+
26
+ * Memory leaks of the internal XPath evaluation context on error / edge paths: a
27
+ `Makiri::XML` `#css` / `#xpath` / `#at_xpath` whose selector or expression failed
28
+ the text-input contract leaked the context (it is now verified BEFORE the context
29
+ is allocated), and a context could leak if building the Ruby result raised (it is
30
+ now freed before conversion).
31
+
32
+ ### Added
33
+
34
+ * `ProcessingInstruction#target` on the XML node (the PI's target name).
35
+
36
+ * Cross-kind `Document#import_node(node, deep = false)`. `import_node` now
37
+ translates a subtree across representations: `Makiri::XML::Document#import_node`
38
+ (newly added) imports an HTML (Lexbor) node by translating it to the XML node
39
+ representation, and `Makiri::HTML::Document#import_node` likewise translates an
40
+ XML node to HTML. Same-representation imports keep working (HTML to HTML via
41
+ Lexbor, XML to XML via the arena deep/shallow copy). The result is a detached
42
+ copy owned by the target document; the source is untouched. Elements (with
43
+ attributes), text, comment, and processing-instruction nodes translate both
44
+ ways, and an HTML `<template>`'s contents (which HTML keeps in a separate
45
+ fragment) are carried across rather than silently dropped; an XML CDATA section
46
+ has no HTML counterpart, so translating one into an HTML document fails closed
47
+ (`Makiri::Error`). Namespaces are preserved across the translation: HTML->XML
48
+ synthesizes the xmlns declarations needed to reproduce each node's namespace
49
+ (so e.g. an inline `<svg>` stays in the SVG namespace and HTML elements in the
50
+ XHTML namespace), and XML->HTML maps the namespace URI back to a Lexbor
51
+ namespace id, interning any URI (not only the ones Lexbor knows by default) so
52
+ custom namespaces survive too. An HTML-namespaced `<template>`'s content is
53
+ placed in its content fragment (HTMLTemplateElement.content), like a parsed
54
+ template. The other node-argument mutators
55
+ (`add_child`/`before`/`after`/`replace`/`fragment`) still reject a foreign-kind
56
+ node; `import_node` is the one sanctioned crossing point.
57
+
58
+ * `set_attribute_ns(namespace, qualified_name, value)` and
59
+ `remove_attribute_ns(namespace, local_name)` on `Makiri::XML` elements - the DOM
60
+ setAttributeNS / removeAttributeNS, keyed on the (explicit namespace, local name)
61
+ pair so two attributes with the same qualified name in different namespaces
62
+ coexist (a null/"" namespace is the null namespace).
63
+
64
+ * `Makiri::Lexbor::CSS.parse_stylesheet(text)`, a thin binding over Lexbor's
65
+ CSS stylesheet parser that returns the parsed rules as plain Ruby primitives
66
+ (`{type: :style, selectors: [{text:, specificity: [a,b,c]}, ...],
67
+ declarations: [{name:, value:, important:}, ...]}` and nested
68
+ `{type: :media, condition:, rules: [...]}`, in source order). Selector
69
+ specificity and value normalization come from Lexbor; `css-syntax-3` error
70
+ recovery means a broken stylesheet yields its valid rules instead of raising.
71
+ Hosts the new `Makiri::Lexbor::*` namespace (the unabstracted lexbor-native
72
+ surface, distinct from the Nokogiri-compatible `Makiri::*`).
73
+
8
74
  ## [0.4.0] - 2026-06-12
9
75
 
10
76
  ### Added
@@ -296,7 +362,8 @@ libxml2 / libxslt dependency at any layer**.
296
362
  domxpath, CSS differential vs `Nokogiri::HTML5`). GitHub Actions CI across
297
363
  Ruby 3.2–4.0 × Ubuntu/macOS plus a sanitizer job.
298
364
 
299
- [Unreleased]: https://github.com/takahashim/makiri/compare/v0.4.0...HEAD
365
+ [Unreleased]: https://github.com/takahashim/makiri/compare/v0.5.0...HEAD
366
+ [0.5.0]: https://github.com/takahashim/makiri/compare/v0.4.0...v0.5.0
300
367
  [0.4.0]: https://github.com/takahashim/makiri/compare/v0.3.0...v0.4.0
301
368
  [0.3.0]: https://github.com/takahashim/makiri/compare/v0.2.0...v0.3.0
302
369
  [0.2.0]: https://github.com/takahashim/makiri/compare/v0.1.0...v0.2.0
data/README.md CHANGED
@@ -141,6 +141,14 @@ XML subtrees can be built with `Document#create_element` and related node factor
141
141
  then inserted with `#add_child`, `#before`, `#after`, or `#replace`;
142
142
  namespaces are resolved at insertion time, and cross-document nodes are deep-copied.
143
143
 
144
+ `Document#import_node(node, deep = false)` brings a node into a document as a
145
+ detached copy, and works **across representations**: importing a `Makiri::HTML`
146
+ node into a `Makiri::XML::Document` (or vice versa) translates the subtree between
147
+ the two node representations, preserving namespaces (e.g. an inline `<svg>` keeps
148
+ the SVG namespace, HTML elements the XHTML namespace; custom namespaces are
149
+ preserved across both directions). An XML CDATA section has no HTML counterpart,
150
+ so importing one into an HTML document raises.
151
+
144
152
  ```ruby
145
153
  doc = Makiri::XML(%(<feed xmlns="urn:a" xmlns:dc="urn:dc"/>))
146
154
  entry = doc.create_element("entry")
@@ -226,6 +234,12 @@ Detailed, test-backed notes live in `spec/conformance/README.md`.
226
234
  markup string straight to `#add_child` is unsupported (parse it into a fragment
227
235
  first). (`#to_xml` serialization is supported; HTML serialization - `to_html`
228
236
  / `inner_html` / `outer_html` - is not.)
237
+ * A colon in a processing-instruction target is well-formed (`<?a:b ...?>` parses).
238
+ * XML 1.0 §2.6: a `PITarget` is a `Name`, not an NCName, and Namespaces in XML
239
+ 1.0's normative conformance section constrains only element/attribute names
240
+ (QNames), never PI targets. Nokogiri/libxml2 rejects it (`colons are forbidden
241
+ from PI names`); Makiri follows the normative text. Only the reserved `xml`
242
+ (any case) target is rejected.
229
243
  * Otherwise the parsed tree is byte-identical to `Nokogiri::XML`'s (verified by
230
244
  the property-based differential), including namespaces, prolog/epilog comments
231
245
  and PIs, and adjacent-CDATA coalescing.
data/Rakefile CHANGED
@@ -59,6 +59,19 @@ task default: %i[compile spec]
59
59
  # *our* code: a real uninit/invalid access in mkr_*/Lexbor still has a makiri frame
60
60
  # and is still reported.
61
61
  #
62
+ # BUT the binary-touch filter is too coarse for one residual class: when a GC
63
+ # cycle fires *inside* one of our allocations (or marks through our mark
64
+ # callback), CRuby's conservative collector legitimately reads uninitialised
65
+ # words (machine-stack scan reading stale frames, incremental mark/sweep reading
66
+ # not-yet-written RVALUE flags) while a makiri frame sits on the stack - so ~190
67
+ # of these pure-Ruby-GC false positives pass the filter. The gem's bundled
68
+ # ruby.supp only covers `each_location*` under Addr8, not the Cond/Value8 reads
69
+ # we hit. `suppressions/ruby.supp` (auto-loaded by ruby_memcheck: it globs
70
+ # `<dir>/<ruby-version>.supp`, and the bare `ruby.supp` matches every version)
71
+ # suppresses exactly those GC-driver-anchored uninit reads, plus the VM
72
+ # method-cache id_table the interpreter never frees before exit. A real uninit
73
+ # read in our code does not descend from a GC driver, so it still fails.
74
+ #
62
75
  # Guarded: ruby_memcheck lives in the optional :valgrind bundler group, so a
63
76
  # normal `bundle exec rake` (without that group) must not fail to load.
64
77
  begin
Binary file
Binary file
Binary file
Binary file
@@ -55,12 +55,7 @@ module Makiri
55
55
  # @param text [String]
56
56
  # @return [String]
57
57
  def title=(text)
58
- t = at_css("title")
59
- unless t
60
- t = Element.new("title", self)
61
- (head || root).add_child(t)
62
- end
63
- t.content = text
58
+ ensure_in_head("title", "title").content = text
64
59
  text
65
60
  end
66
61
 
@@ -93,14 +88,18 @@ module Makiri
93
88
  # @param value [String]
94
89
  # @return [String]
95
90
  def meta_encoding=(value)
96
- meta = at_css("meta[charset]")
97
- unless meta
98
- meta = Element.new("meta", self)
99
- (head || root).add_child(meta)
100
- end
101
- meta["charset"] = value
91
+ ensure_in_head("meta[charset]", "meta")["charset"] = value
102
92
  value
103
93
  end
94
+
95
+ private
96
+
97
+ # The first node matching +css_query+, or a freshly created <+tag+>
98
+ # appended to <head> (or the root when the document has no head). Shared by
99
+ # #title= and #meta_encoding=, which then set content / attributes on it.
100
+ def ensure_in_head(css_query, tag)
101
+ at_css(css_query) || Element.new(tag, self).tap { |el| (head || root).add_child(el) }
102
+ end
104
103
  end
105
104
  end
106
105
  end
@@ -10,7 +10,6 @@ module Makiri
10
10
  alias_method :attr, :[]
11
11
  alias_method :get_attribute, :[]
12
12
  alias_method :has_attribute?, :key?
13
- alias_method :remove_attribute, :delete
14
13
  alias_method :node_name, :name
15
14
  alias_method :node_name=, :name=
16
15
  alias_method :type, :node_type
@@ -48,17 +48,13 @@ module Makiri
48
48
  # Run a CSS selector against every node and return the unioned matches.
49
49
  # @return [Makiri::NodeSet]
50
50
  def css(selector)
51
- return self if empty?
52
-
53
- map { |node| node.css(selector) }.reduce(:|)
51
+ union_query(:css, selector)
54
52
  end
55
53
 
56
54
  # Run an XPath expression against every node and union the node-set results.
57
55
  # @return [Makiri::NodeSet]
58
56
  def xpath(expr)
59
- return self if empty?
60
-
61
- map { |node| node.xpath(expr) }.reduce(:|)
57
+ union_query(:xpath, expr)
62
58
  end
63
59
 
64
60
  # First node matching the CSS selector across the set, or nil.
@@ -77,9 +73,7 @@ module Makiri
77
73
  # CSS- or XPath-detecting query against every node (see {Node#search}).
78
74
  # @return [Makiri::NodeSet]
79
75
  def search(path)
80
- return self if empty?
81
-
82
- map { |node| node.search(path) }.reduce(:|)
76
+ union_query(:search, path)
83
77
  end
84
78
 
85
79
  # Remove the named attribute from every node in the set.
@@ -109,5 +103,16 @@ module Makiri
109
103
  def inspect
110
104
  "#<#{self.class.name} length=#{length}>"
111
105
  end
106
+
107
+ private
108
+
109
+ # Run +method+(+arg+) on every node in the set and union the per-node
110
+ # results. An empty set returns self unchanged, so it stays a NodeSet (the
111
+ # shared shape behind #css / #xpath / #search).
112
+ def union_query(method, arg)
113
+ return self if empty?
114
+
115
+ map { |node| node.public_send(method, arg) }.reduce(:|)
116
+ end
112
117
  end
113
118
  end
@@ -14,5 +14,13 @@ module Makiri
14
14
  def self.new(document, target, content)
15
15
  Makiri::Document.coerce!(document).create_processing_instruction(target, content)
16
16
  end
17
+
18
+ # DOM `ProcessingInstruction#target` — the PI's target name. Defined once on
19
+ # the shared base so both backends expose it: the XML node reaches it here
20
+ # (its target IS its node name), while the HTML node overrides it with a
21
+ # native implementation earlier in the ancestor chain.
22
+ def target
23
+ name
24
+ end
17
25
  end
18
26
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Makiri
4
- VERSION = "0.4.0"
4
+ VERSION = "0.5.0"
5
5
  end
@@ -30,7 +30,7 @@ module Makiri
30
30
  #
31
31
  # Tag names that collide with a Ruby/Kernel method (or with one of this
32
32
  # builder's own helpers below - +text+, +cdata+, +comment+, +doc+, +parent+,
33
- # +to_xml+, +to_s+) must be written with a trailing underscore, which is
33
+ # +to_xml+, +to_s+, +descend+) must be written with a trailing underscore, which is
34
34
  # stripped: +xml.id_("9")+ produces +<id>9</id>+. This matches Nokogiri.
35
35
  #
36
36
  # A namespace prefix is selected for the next element with +[]+:
@@ -137,6 +137,22 @@ module Makiri
137
137
  true
138
138
  end
139
139
 
140
+ # Run +block+ with +node+ as the current parent, restoring the previous
141
+ # parent afterward (even if the block raises) and returning the block's
142
+ # value. The single place +@parent+ is pushed/popped - shared by #insert and
143
+ # by {NodeBuilder}'s nested-block chain, so neither manipulates the parent
144
+ # state directly. Public so NodeBuilder (a separate class) can reuse it
145
+ # without reaching into a private method.
146
+ def descend(node, &block)
147
+ previous = @parent
148
+ @parent = node
149
+ begin
150
+ run(&block)
151
+ ensure
152
+ @parent = previous
153
+ end
154
+ end
155
+
140
156
  private
141
157
 
142
158
  # Translate the Nokogiri-style trailing arguments (a Hash is attributes,
@@ -177,21 +193,6 @@ module Makiri
177
193
  NodeBuilder.new(node, self)
178
194
  end
179
195
 
180
- # Run +block+ with +node+ as the current parent, restoring the previous
181
- # parent afterward (even if the block raises) and returning the block's
182
- # value. The single place the parent is pushed/popped - shared by #insert and
183
- # NodeBuilder's nested-block chain, so neither manipulates the parent state
184
- # directly.
185
- def descend(node, &block)
186
- previous = @parent
187
- @parent = node
188
- begin
189
- run(&block)
190
- ensure
191
- @parent = previous
192
- end
193
- end
194
-
195
196
  # Run a DSL block, choosing instance_eval vs yield once (from the first
196
197
  # block seen), the same way Nokogiri does, so the form is consistent
197
198
  # throughout a build.
@@ -239,17 +240,15 @@ module Makiri
239
240
  when /\A(.*)=\z/
240
241
  @node[Regexp.last_match(1)] = args.first
241
242
  else
242
- @node["class"] = ((@node["class"] || "").split(/\s/) + [method.to_s]).join(" ")
243
+ append_attr("class", method.to_s)
243
244
  @node.content = args.first if args.first
244
245
  end
245
246
 
246
- opts.each do |key, value|
247
- @node[key.to_s] = ((@node[key.to_s] || "").split(/\s/) + [value]).join(" ")
248
- end
247
+ opts.each { |key, value| append_attr(key.to_s, value) }
249
248
 
250
249
  # Descend into this node for a nested block via the builder's own parent
251
250
  # stack (with its ensure-based restore), rather than re-rooting it by hand.
252
- return @doc_builder.send(:descend, @node, &block) if block
251
+ return @doc_builder.descend(@node, &block) if block
253
252
 
254
253
  self
255
254
  end
@@ -257,6 +256,15 @@ module Makiri
257
256
  def respond_to_missing?(_name, _include_private = false)
258
257
  true
259
258
  end
259
+
260
+ private
261
+
262
+ # Append +value+ as a space-separated token to the +key+ attribute,
263
+ # preserving any existing tokens. The shared idiom behind the terse
264
+ # class-append and the trailing-Hash attribute shortcut.
265
+ def append_attr(key, value)
266
+ @node[key] = ((@node[key] || "").split(/\s/) + [value]).join(" ")
267
+ end
260
268
  end
261
269
  end
262
270
  end
@@ -14,10 +14,18 @@ module Makiri
14
14
  # The bulk of the implementation lives in C (see
15
15
  # ext/makiri/glue/ruby_xpath.c and ext/makiri/xpath/).
16
16
  class XPathContext
17
- # +#evaluate+ is defined in C. It evaluates under the GVL (XPath never
18
- # releases it), so concurrent +evaluate+ / +register_*+ / +node=+ on the
19
- # same context - and any tree mutation of the document being queried - are
20
- # serialised by the GVL and cannot corrupt memory.
17
+ # +#evaluate+ is defined in C and runs under the GVL (XPath never releases
18
+ # it), so it cannot corrupt memory under concurrency. Two distinct hazards,
19
+ # handled differently:
20
+ #
21
+ # * Cross-thread: the GVL serialises all of +evaluate+ / +register_*+ /
22
+ # +node=+ and any tree mutation of the queried document, so threads can
23
+ # never run two of them at once.
24
+ # * Same-thread re-entrancy: a custom function handler runs mid-evaluate and
25
+ # could call back into this same context. A re-entrant +register_namespace+
26
+ # / +register_variable+ / +node=+ is refused (raises) while an evaluate is
27
+ # in progress, since it would free/swap state the suspended evaluator still
28
+ # borrows; a nested +evaluate+ is allowed.
21
29
 
22
30
  # Nokogiri-compatible name for {#register_namespace}.
23
31
  alias register_ns register_namespace
@@ -25,7 +25,7 @@ PARSER_TUS = %w[
25
25
  ext/makiri/xpath/mkr_xpath_funcs_body.h
26
26
  ext/makiri/xpath/mkr_xpath_value_body.h
27
27
  ext/makiri/bridge/ruby_string.c
28
- ext/makiri/lexbor_compat/source_loc.c
28
+ ext/makiri/dom_adapter/source_loc.c
29
29
  ].freeze
30
30
 
31
31
  RULES = [
@@ -33,20 +33,20 @@ ignore_paths:
33
33
  rule: html_doc_unwrap_boundary
34
34
  reason: the HTML branch of mkr_xpath_context_for, entered only after the XML kind returns early.
35
35
  # mkr_parsed_html_doc (asserts kind == HTML)
36
- - path: ext/makiri/lexbor_compat/compat.h
36
+ - path: ext/makiri/dom_adapter/compat.h
37
37
  rule: parsed_html_doc_boundary
38
38
  reason: declaration of the HTML parsed-document accessor.
39
- - path: ext/makiri/lexbor_compat/post_parse.c
39
+ - path: ext/makiri/dom_adapter/post_parse.c
40
40
  rule: parsed_html_doc_boundary
41
41
  reason: definition (the kind assert lives here) + HTML post-parse pipeline.
42
42
  - path: ext/makiri/glue/ruby_doc.c
43
43
  rule: parsed_html_doc_boundary
44
44
  reason: mkr_html_doc_unwrap is defined here over the HTML parsed document.
45
45
  # mkr_parsed_xml_doc (XML arena accessor) — kept out of the pure-HTML glue files
46
- - path: ext/makiri/lexbor_compat/compat.h
46
+ - path: ext/makiri/dom_adapter/compat.h
47
47
  rule: parsed_xml_doc_boundary
48
48
  reason: declaration of the XML parsed-document accessor.
49
- - path: ext/makiri/lexbor_compat/post_parse.c
49
+ - path: ext/makiri/dom_adapter/post_parse.c
50
50
  rule: parsed_xml_doc_boundary
51
51
  reason: definition + XML document wrapping/teardown.
52
52
  - path: ext/makiri/glue/ruby_xml.c
@@ -65,7 +65,7 @@ ignore_paths:
65
65
  rule: parsed_xml_doc_boundary
66
66
  reason: the kind-aware mkr_node_raw resolves an XML Document only after a MKR_DOC_XML check.
67
67
  # owner_document (HTML-only lxb_dom_node_t field)
68
- - path: ext/makiri/lexbor_compat/post_parse.c
68
+ - path: ext/makiri/dom_adapter/post_parse.c
69
69
  rule: owner_document_boundary
70
70
  reason: mkr_lxb_document_bytes resolves a Lexbor node's owner document to size its mraw pools (HTML-only; the XML serializer uses arena_bytes instead).
71
71
  - path: ext/makiri/glue/ruby_html_node.c
@@ -80,6 +80,9 @@ ignore_paths:
80
80
  - path: ext/makiri/xpath/mkr_xpath_node_access_html.h
81
81
  rule: owner_document_boundary
82
82
  reason: the HTML monomorphization of the engine's node-access layer.
83
+ - path: ext/makiri/dom_adapter/cross_import.c
84
+ rule: owner_document_boundary
85
+ reason: mkr_html_ns_uri reads the SOURCE HTML node's owner document ns table to resolve its namespace id to a URI; reached only on the HTML (lxb) side of HTML->XML translation, never on an XML node (no cross-kind document comparison).
83
86
  # mkr_node_raw (kind-agnostic void* raw pointer; never dereferenced as a typed node)
84
87
  - path: ext/makiri/glue/glue.h
85
88
  rule: node_raw_boundary
@@ -15,6 +15,8 @@ ITERATIONS = Integer(ENV.fetch("LEAKS_ITERATIONS", "120"))
15
15
 
16
16
  HTML = "<div id=m class='a b'><ul><li class=item>x</li><li>y<svg><path/></svg></li></ul><p>t&amp;</p></div>"
17
17
  XML = %(<r xmlns:p="urn:p" xmlns="urn:d"><a id="1">t</a><p:b/><!--c--><![CDATA[z]]></r>)
18
+ CSS = "div.a, #b > span { color: red !important; --v: 1px }\n" \
19
+ "@media (min-width: 600px) { .x { opacity: 0 } }\n@font-face { font-family: y }"
18
20
 
19
21
  handler = Class.new { def my_fn(nodes) = nodes.length.to_s }.new
20
22
 
@@ -57,6 +59,11 @@ ITERATIONS.times do |i|
57
59
  ctx.register_namespace("d", "urn:d"); ctx.register_variable("v", "1")
58
60
  ctx.evaluate("//d:a[@id=$v]"); ctx.evaluate("//d:a[@id=$v]")
59
61
  begin ctx.evaluate("//(") rescue Makiri::XPath::SyntaxError; end
62
+
63
+ # --- Lexbor CSS stylesheet parser (per-call parser+stylesheet lifetime,
64
+ # freed under rb_ensure) including the NUL-reject raise path ---
65
+ Makiri::Lexbor::CSS.parse_stylesheet(CSS)
66
+ begin Makiri::Lexbor::CSS.parse_stylesheet("a{}\0x") rescue Makiri::Error; end
60
67
  end
61
68
 
62
69
  GC.start
@@ -0,0 +1,140 @@
1
+ # Valgrind suppressions for `rake spec:valgrind` (ruby_memcheck).
2
+ #
3
+ # ruby_memcheck auto-loads every `<suppressions>/<ruby-version>.supp` it finds,
4
+ # and `ruby.supp` (the bare RUBY_ENGINE name) matches every Ruby version. These
5
+ # entries are ADDED to the ones the gem already bundles.
6
+ #
7
+ # Why this file exists: our job runs with `--undef-value-errors=yes` and
8
+ # `filter_all_errors: true`, which surfaces *every* uninitialised-value report
9
+ # whose stack touches the makiri binary. CRuby's garbage collector legitimately
10
+ # reads uninitialised memory - the conservative machine-stack scan reads stale
11
+ # stack words, and incremental mark/sweep reads not-yet-written RVALUE flags -
12
+ # and when a GC cycle fires inside one of our allocations (or marks through our
13
+ # mark callback) the makiri frame is on the stack, so the binary-touch filter
14
+ # keeps the report. The gem's bundled ruby.supp only covers `each_location*`
15
+ # under `Memcheck:Addr8`, not the `Cond`/`Value8` reads we see, so ~190 of these
16
+ # Ruby-GC false positives slip through and fail the run.
17
+ #
18
+ # Each suppression below is anchored on a CRuby GC driver function (gc marks /
19
+ # sweep / conservative root scan), with leading `...` absorbing the GC-internal
20
+ # error-origin frames above it. A real uninitialised read in our own code does
21
+ # NOT descend from these GC drivers, so it still fails the gate. We do NOT
22
+ # suppress Addr/Overlap/Param/etc. - only the GC uninit reads and a couple of
23
+ # Ruby-internal leaks the VM never frees before exit.
24
+
25
+ # ---- conservative GC: uninitialised-value reads (Memcheck:Cond) ----
26
+ {
27
+ ruby-gc-cond-garbage_collect
28
+ Memcheck:Cond
29
+ ...
30
+ fun:garbage_collect
31
+ }
32
+ {
33
+ ruby-gc-cond-gc_start
34
+ Memcheck:Cond
35
+ ...
36
+ fun:gc_start
37
+ }
38
+ {
39
+ ruby-gc-cond-gc_marks
40
+ Memcheck:Cond
41
+ ...
42
+ fun:gc_marks*
43
+ }
44
+ {
45
+ ruby-gc-cond-gc_sweep
46
+ Memcheck:Cond
47
+ ...
48
+ fun:gc_sweep*
49
+ }
50
+ {
51
+ ruby-gc-cond-rb_gc_impl_mark
52
+ Memcheck:Cond
53
+ ...
54
+ fun:rb_gc_impl_mark*
55
+ }
56
+ {
57
+ ruby-gc-cond-machine_context
58
+ Memcheck:Cond
59
+ ...
60
+ fun:rb_gc_mark_machine_context
61
+ }
62
+ {
63
+ ruby-gc-cond-mark_roots
64
+ Memcheck:Cond
65
+ ...
66
+ fun:mark_roots
67
+ }
68
+
69
+ # ---- conservative GC: uninitialised-value reads (Memcheck:Value8) ----
70
+ # `gc_aging` (Ruby's default.c) ages objects during incremental marking and
71
+ # reads the not-yet-written RVALUE flag word - the same false positive as the
72
+ # drivers below, but here it is the ONLY resolved frame (every caller is
73
+ # `<unknown stack frame>`), so the `...`-then-driver anchors miss it. Anchor on
74
+ # the top frame itself (the `.part.0` GCC partial-inline clone is matched by `*`).
75
+ {
76
+ ruby-gc-cond-gc_aging
77
+ Memcheck:Cond
78
+ fun:gc_aging*
79
+ }
80
+ {
81
+ ruby-gc-value8-gc_aging
82
+ Memcheck:Value8
83
+ fun:gc_aging*
84
+ }
85
+ {
86
+ ruby-gc-value8-garbage_collect
87
+ Memcheck:Value8
88
+ ...
89
+ fun:garbage_collect
90
+ }
91
+ {
92
+ ruby-gc-value8-gc_start
93
+ Memcheck:Value8
94
+ ...
95
+ fun:gc_start
96
+ }
97
+ {
98
+ ruby-gc-value8-gc_marks
99
+ Memcheck:Value8
100
+ ...
101
+ fun:gc_marks*
102
+ }
103
+ {
104
+ ruby-gc-value8-gc_sweep
105
+ Memcheck:Value8
106
+ ...
107
+ fun:gc_sweep*
108
+ }
109
+ {
110
+ ruby-gc-value8-rb_gc_impl_mark
111
+ Memcheck:Value8
112
+ ...
113
+ fun:rb_gc_impl_mark*
114
+ }
115
+ {
116
+ ruby-gc-value8-machine_context
117
+ Memcheck:Value8
118
+ ...
119
+ fun:rb_gc_mark_machine_context
120
+ }
121
+ {
122
+ ruby-gc-value8-mark_roots
123
+ Memcheck:Value8
124
+ ...
125
+ fun:mark_roots
126
+ }
127
+
128
+ # ---- Ruby VM internal caches that are never freed before exit ----
129
+ # The global constant-cache / inline method-cache id_table is allocated lazily
130
+ # on method lookup and lives for the life of the VM, so RUBY_FREE_AT_EXIT still
131
+ # reports it as "definitely lost" whenever the first lookup happened under one
132
+ # of our cfuncs. Not ours to free.
133
+ {
134
+ ruby-vm-method-cache-id_table
135
+ Memcheck:Leak
136
+ ...
137
+ fun:rb_id_table_create
138
+ fun:vm_search_cc
139
+ ...
140
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: makiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - takahashim
@@ -109,6 +109,7 @@ files:
109
109
  - script/check_leaks.rb
110
110
  - script/leaks_harness.rb
111
111
  - sig/makiri.rbs
112
+ - suppressions/ruby.supp
112
113
  homepage: https://github.com/takahashim/makiri
113
114
  licenses:
114
115
  - Apache-2.0