makiri 0.1.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Makiri
4
+ # Base class for every DOM node (element, attribute, text, comment, ...).
5
+ # The bulk of the API lives in the C extension; this file defines the
6
+ # Ruby-only conveniences.
7
+ class Node
8
+ # Identity is by wrapped node pointer; defined in C.
9
+
10
+ # @return [Boolean]
11
+ def element?
12
+ is_a?(Element)
13
+ end
14
+
15
+ # @return [Boolean]
16
+ def text?
17
+ is_a?(Text)
18
+ end
19
+
20
+ # @return [Boolean]
21
+ def comment?
22
+ is_a?(Comment)
23
+ end
24
+
25
+ # @return [Boolean]
26
+ def attribute?
27
+ is_a?(Attribute)
28
+ end
29
+
30
+ # @return [Boolean]
31
+ def document?
32
+ is_a?(Document)
33
+ end
34
+
35
+ # @return [Boolean]
36
+ def processing_instruction?
37
+ is_a?(ProcessingInstruction)
38
+ end
39
+
40
+ # @return [Boolean]
41
+ def document_fragment?
42
+ is_a?(DocumentFragment)
43
+ end
44
+
45
+ # @return [Boolean] true for a blank/whitespace-only text or CDATA node.
46
+ def blank?
47
+ (text? || is_a?(CData)) && content.strip.empty?
48
+ end
49
+
50
+ # --- Nokogiri-compatible aliases over the core API ---
51
+
52
+ # Attribute value by name (alias for {#[]}). Use {#attribute} for the node.
53
+ alias_method :attr, :[]
54
+ alias_method :get_attribute, :[]
55
+ alias_method :has_attribute?, :key?
56
+ alias_method :remove_attribute, :delete
57
+ alias_method :node_name, :name
58
+ alias_method :node_name=, :name=
59
+ alias_method :type, :node_type
60
+ alias_method :elem?, :element?
61
+ alias_method :fragment?, :document_fragment?
62
+
63
+ # Set an attribute (alias for {#[]=}). @return [String]
64
+ def set_attribute(name, value)
65
+ self[name] = value
66
+ end
67
+
68
+ # The Attribute node named +name+, or nil (cf. {#[]}, which returns the value).
69
+ # @return [Makiri::Attribute, nil]
70
+ def attribute(name)
71
+ attributes[name.to_s]
72
+ end
73
+
74
+ # --- CSS class helpers (operate on the `class` attribute) ---
75
+
76
+ # @return [Array<String>] the element's class names.
77
+ def classes
78
+ self["class"].to_s.split(/\s+/).reject(&:empty?)
79
+ end
80
+
81
+ # Add each class in +names+ (space-separated) that is not already present.
82
+ # @return [self]
83
+ def add_class(names)
84
+ have = classes
85
+ have.concat(names.to_s.split(/\s+/).reject { |c| c.empty? || have.include?(c) })
86
+ self["class"] = have.join(" ")
87
+ self
88
+ end
89
+
90
+ # Append each class in +names+ unconditionally (duplicates allowed).
91
+ # @return [self]
92
+ def append_class(names)
93
+ self["class"] = (classes + names.to_s.split(/\s+/).reject(&:empty?)).join(" ")
94
+ self
95
+ end
96
+
97
+ # Remove each class in +names+ (or every class when +names+ is nil); drops
98
+ # the `class` attribute entirely when none remain.
99
+ # @return [self]
100
+ def remove_class(names = nil)
101
+ if names.nil?
102
+ delete("class")
103
+ else
104
+ remaining = classes - names.to_s.split(/\s+/)
105
+ remaining.empty? ? delete("class") : (self["class"] = remaining.join(" "))
106
+ end
107
+ self
108
+ end
109
+
110
+ # Yield this node and every descendant, depth-first, children before self
111
+ # (post-order, matching Nokogiri).
112
+ # @return [self]
113
+ def traverse(&block)
114
+ children.each { |child| child.traverse(&block) }
115
+ block.call(self)
116
+ self
117
+ end
118
+
119
+ # The root element of the owning document (e.g. <html>).
120
+ # @return [Makiri::Element, nil]
121
+ def root
122
+ document.root
123
+ end
124
+
125
+ # Attributes as a name => Attribute Hash (empty for non-elements).
126
+ # @return [Hash{String => Makiri::Attribute}]
127
+ def attributes
128
+ attribute_nodes.each_with_object({}) { |attr, h| h[attr.name] = attr }
129
+ end
130
+
131
+ # Attributes as a plain name => value Hash (empty for non-elements).
132
+ # @return [Hash{String => String}]
133
+ def to_h
134
+ attribute_nodes.each_with_object({}) { |attr, h| h[attr.name] = attr.value }
135
+ end
136
+
137
+ # Query with CSS or XPath, auto-detecting which from the string shape.
138
+ # Strings that look like a location path (start with "/", "./", "..", ".//",
139
+ # "(", "@" or contain "::") are treated as XPath; everything else as CSS.
140
+ # @return [Makiri::NodeSet, String, Float, Boolean]
141
+ def search(path)
142
+ xpath?(path) ? xpath(path) : css(path)
143
+ end
144
+
145
+ # First result of {#search}: the first node for a node-set, else the value.
146
+ def at(path)
147
+ result = search(path)
148
+ result.is_a?(NodeSet) ? result.first : result
149
+ end
150
+
151
+ # An absolute XPath that locates this node, e.g. "/html/body/p[2]".
152
+ # Element/text/comment steps carry a 1-based position among same-kind
153
+ # siblings (omitted when unique); attributes use "@name". Round-trips
154
+ # through {#at_xpath}.
155
+ # @return [String]
156
+ def path
157
+ return "/" if document?
158
+
159
+ segments = []
160
+ node = self
161
+ until node.nil? || node.document?
162
+ segments.unshift(node.send(:path_segment))
163
+ node = node.parent
164
+ end
165
+ "/#{segments.join("/")}"
166
+ end
167
+
168
+ # Inspect representation. Avoids dumping the whole subtree.
169
+ def inspect
170
+ "#<#{self.class.name} name=#{name.inspect}>"
171
+ rescue NoMethodError
172
+ "#<#{self.class.name}>"
173
+ end
174
+
175
+ private
176
+
177
+ # Heuristic used by {#search}: does +path+ look like an XPath location path
178
+ # rather than a CSS selector?
179
+ def xpath?(path)
180
+ s = path.to_s.strip
181
+ s.start_with?("/", "./", "../", ".//", "(", "@") || s.include?("::")
182
+ end
183
+
184
+ # One "/"-separated step of {#path} for this node.
185
+ def path_segment
186
+ return "@#{name}" if attribute?
187
+
188
+ parent_node = parent
189
+ return step_name unless parent_node
190
+
191
+ siblings = parent_node.children.select { |c| same_step_kind?(c) }
192
+ return step_name if siblings.length <= 1
193
+
194
+ "#{step_name}[#{siblings.index(self) + 1}]"
195
+ end
196
+
197
+ # The node-test portion of a path step (without any position predicate).
198
+ def step_name
199
+ if text?
200
+ "text()"
201
+ elsif comment?
202
+ "comment()"
203
+ else
204
+ name
205
+ end
206
+ end
207
+
208
+ # Whether +other+ shares this node's path-step kind (for position counting).
209
+ def same_step_kind?(other)
210
+ if element?
211
+ other.element? && other.name == name
212
+ elsif text?
213
+ other.text?
214
+ elsif comment?
215
+ other.comment?
216
+ else
217
+ false
218
+ end
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Makiri
4
+ # An ordered collection of nodes, returned by xpath/css queries.
5
+ class NodeSet
6
+ include Enumerable
7
+
8
+ # @return [Integer]
9
+ def size
10
+ length
11
+ end
12
+
13
+ # @return [Boolean]
14
+ def empty?
15
+ length.zero?
16
+ end
17
+
18
+ # @return [Makiri::Node, nil]
19
+ def first
20
+ self[0]
21
+ end
22
+
23
+ # @return [Makiri::Node, nil]
24
+ def last
25
+ self[length - 1]
26
+ end
27
+
28
+ # Index access; alias for #[].
29
+ # @return [Makiri::Node, nil]
30
+ def at(index)
31
+ self[index]
32
+ end
33
+
34
+ # Concatenated outer HTML of every node in the set.
35
+ # @return [String]
36
+ def to_html
37
+ map(&:to_html).join
38
+ end
39
+ alias to_s to_html
40
+
41
+ # Concatenated text content of every node in the set.
42
+ # @return [String]
43
+ def text
44
+ map(&:text).join
45
+ end
46
+ alias inner_text text
47
+
48
+ # Run a CSS selector against every node and return the unioned matches.
49
+ # @return [Makiri::NodeSet]
50
+ def css(selector)
51
+ return self if empty?
52
+
53
+ map { |node| node.css(selector) }.reduce(:|)
54
+ end
55
+
56
+ # Run an XPath expression against every node and union the node-set results.
57
+ # @return [Makiri::NodeSet]
58
+ def xpath(expr)
59
+ return self if empty?
60
+
61
+ map { |node| node.xpath(expr) }.reduce(:|)
62
+ end
63
+
64
+ # First node matching the CSS selector across the set, or nil.
65
+ # @return [Makiri::Node, nil]
66
+ def at_css(selector)
67
+ css(selector).first
68
+ end
69
+
70
+ # First node matching the XPath expression across the set (or the scalar
71
+ # value for a non-node-set result).
72
+ def at_xpath(expr)
73
+ result = xpath(expr)
74
+ result.is_a?(NodeSet) ? result.first : result
75
+ end
76
+
77
+ # CSS- or XPath-detecting query against every node (see {Node#search}).
78
+ # @return [Makiri::NodeSet]
79
+ def search(path)
80
+ return self if empty?
81
+
82
+ map { |node| node.search(path) }.reduce(:|)
83
+ end
84
+
85
+ # Remove the named attribute from every node in the set.
86
+ # @return [self]
87
+ def remove_attr(name)
88
+ each { |node| node.delete(name) }
89
+ self
90
+ end
91
+ alias remove_attribute remove_attr
92
+
93
+ # Detach every node in the set from its tree.
94
+ # @return [self]
95
+ def remove
96
+ to_a.each(&:remove)
97
+ self
98
+ end
99
+ alias unlink remove
100
+
101
+ def inspect
102
+ "#<#{self.class.name} length=#{length}>"
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Makiri
4
+ # An XML/HTML processing-instruction node. Rare in HTML5 (the parser usually
5
+ # treats "<?...>" as a bogus comment), present mainly for completeness.
6
+ class ProcessingInstruction < Node
7
+ end
8
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Makiri
4
+ # A text node.
5
+ class Text < Node
6
+ # Create a detached text node with +content+ owned by +document+
7
+ # (Nokogiri-style constructor; delegates to {Document#create_text_node}).
8
+ #
9
+ # @param content [String]
10
+ # @param document [Makiri::Document]
11
+ # @return [Makiri::Text]
12
+ def self.new(content, document)
13
+ document.create_text_node(content)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Makiri
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Makiri
4
+ module XPath
5
+ # Raised when an XPath expression fails to parse, or when an
6
+ # evaluation limit (operation count, recursion depth, node-set
7
+ # cap) is exceeded.
8
+ class SyntaxError < ::Makiri::Error; end
9
+
10
+ # Raised when an evaluation budget is exhausted. Subclasses
11
+ # SyntaxError for Nokogiri-shaped error compatibility.
12
+ class LimitExceeded < SyntaxError; end
13
+ end
14
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Makiri
4
+ # Per-query XPath evaluation context. Holds the context node, namespace
5
+ # bindings, and variable bindings, and evaluates expressions against them.
6
+ #
7
+ # ctx = Makiri::XPathContext.new(doc)
8
+ # ctx.register_namespace("svg", "http://www.w3.org/2000/svg")
9
+ # ctx.evaluate("//svg:circle") # => Makiri::NodeSet
10
+ #
11
+ # +evaluate+ returns a NodeSet for node-set expressions, and a String,
12
+ # Float, or boolean for the corresponding scalar XPath types.
13
+ #
14
+ # The bulk of the implementation lives in C (see
15
+ # ext/makiri/glue/ruby_xpath.c and ext/makiri/xpath/).
16
+ class XPathContext
17
+ # +#evaluate+ is defined in C. It evaluates under the GVL (XPath never
18
+ # releases it), so concurrent +evaluate+ / +register_*+ / +node=+ on the
19
+ # same context — and any tree mutation of the document being queried — are
20
+ # serialised by the GVL and cannot corrupt memory.
21
+
22
+ # Nokogiri-compatible name for {#register_namespace}.
23
+ alias register_ns register_namespace
24
+
25
+ # Register several prefix => URI namespace bindings at once.
26
+ # @param bindings [Hash{String => String}]
27
+ # @return [self]
28
+ def register_namespaces(bindings)
29
+ bindings.each { |prefix, uri| register_namespace(prefix.to_s, uri.to_s) }
30
+ self
31
+ end
32
+
33
+ # Register several name => value variable bindings at once.
34
+ # @param bindings [Hash{String => Object}]
35
+ # @return [self]
36
+ def register_variables(bindings)
37
+ bindings.each { |name, value| register_variable(name.to_s, value) }
38
+ self
39
+ end
40
+ end
41
+ end
data/lib/makiri.rb ADDED
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "makiri/version"
4
+
5
+ # Native C extension. Located at lib/makiri/<ruby_abi>/makiri.{so,bundle}
6
+ # (created by rake-compiler). Loading is gated so the gem can be required
7
+ # in environments where the binary is not yet built (the require error
8
+ # is then surfaced clearly).
9
+ begin
10
+ RUBY_VERSION =~ /(\d+\.\d+)/
11
+ require_relative "makiri/#{Regexp.last_match(1)}/makiri"
12
+ rescue LoadError
13
+ require_relative "makiri/makiri"
14
+ end
15
+
16
+ require_relative "makiri/node"
17
+ require_relative "makiri/document"
18
+ require_relative "makiri/element"
19
+ require_relative "makiri/attribute"
20
+ require_relative "makiri/text"
21
+ require_relative "makiri/comment"
22
+ require_relative "makiri/cdata"
23
+ require_relative "makiri/processing_instruction"
24
+ require_relative "makiri/document_type"
25
+ require_relative "makiri/document_fragment"
26
+ require_relative "makiri/node_set"
27
+ require_relative "makiri/xpath_context"
28
+ require_relative "makiri/xpath"
29
+ require_relative "makiri/css"
30
+
31
+ module Makiri
32
+ # Base exception class for Makiri-specific errors.
33
+ class Error < StandardError; end
34
+
35
+ # Convenience constructor mirroring Nokogiri.
36
+ #
37
+ # @param source [String] HTML source (UTF-8).
38
+ # @return [Makiri::Document]
39
+ def self.HTML(source) # rubocop:disable Naming/MethodName
40
+ Document.parse(source)
41
+ end
42
+
43
+ # Alias for {.HTML}.
44
+ def self.parse(source)
45
+ Document.parse(source)
46
+ end
47
+ end
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Assemble a precompiled ("native" / fat) gem for one platform from the
5
+ # per-Ruby-ABI extension binaries already staged under
6
+ # lib/makiri/<ruby_minor>/makiri.{so,bundle}
7
+ #
8
+ # Usage: ruby script/build_native_gem.rb <gem-platform>
9
+ # e.g. ruby script/build_native_gem.rb arm64-darwin
10
+ #
11
+ # Run by .github/workflows/release.yml after downloading the compile artifacts.
12
+ # The resulting gem ships the compiled binaries (one per Ruby minor version) and
13
+ # declares no C extension, so `gem install` does NOT recompile or need cmake /
14
+ # the Lexbor submodule. lib/makiri.rb already selects lib/makiri/<minor>/makiri
15
+ # at require time.
16
+
17
+ require "rubygems"
18
+ require "rubygems/package"
19
+
20
+ platform = ARGV[0]
21
+ abort "usage: build_native_gem.rb <gem-platform>" if platform.nil? || platform.empty?
22
+
23
+ root = File.expand_path("..", __dir__)
24
+ spec = Gem::Specification.load(File.join(root, "makiri.gemspec"))
25
+
26
+ libs = Dir[File.join(root, "lib", "makiri", "*", "makiri.{so,bundle}")].sort
27
+ abort "no precompiled libraries found under lib/makiri/*/ — stage them first" if libs.empty?
28
+
29
+ # Native gem: ship binaries, not the C sources or the vendored Lexbor tree, and
30
+ # declare no extension so install never tries to compile.
31
+ spec.platform = platform
32
+ spec.extensions = []
33
+ spec.files = spec.files.reject { |f| f.start_with?("ext/", "vendor/") }
34
+ spec.files += libs.map { |p| p.sub("#{root}/", "") }
35
+ spec.files.uniq!
36
+
37
+ # Bound the Ruby versions this binary gem serves — one subdir per ABI minor.
38
+ abis = libs.map { |p| File.basename(File.dirname(p)) }.sort_by { |v| v.split(".").map(&:to_i) }
39
+ lo_major, lo_minor = abis.first.split(".").map(&:to_i)
40
+ hi_major, hi_minor = abis.last.split(".").map(&:to_i)
41
+ spec.required_ruby_version = [">= #{lo_major}.#{lo_minor}.0", "< #{hi_major}.#{hi_minor + 1}.dev"]
42
+
43
+ puts "Building native gem for #{platform}"
44
+ puts " ABIs: #{abis.join(', ')}"
45
+ puts " ruby: #{spec.required_ruby_version}"
46
+ puts " binaries:"
47
+ libs.each { |p| puts " #{p.sub("#{root}/", '')}" }
48
+
49
+ gem = Gem::Package.build(spec)
50
+ puts "Built #{gem}"