makiri 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/conformance.yml +22 -0
- data/.github/workflows/libfuzzer.yml +83 -0
- data/.github/workflows/release.yml +12 -7
- data/.github/workflows/security.yml +88 -3
- data/.github/workflows/valgrind.yml +135 -0
- data/CHANGELOG.md +152 -15
- data/README.md +183 -13
- data/Rakefile +294 -7
- data/ext/makiri/bridge/bridge.h +28 -0
- data/ext/makiri/bridge/ruby_string.c +282 -12
- data/ext/makiri/core/mkr_alloc.c +40 -3
- data/ext/makiri/core/mkr_alloc.h +28 -5
- data/ext/makiri/core/mkr_buf.c +47 -3
- data/ext/makiri/core/mkr_buf.h +112 -3
- data/ext/makiri/core/mkr_core.c +143 -0
- data/ext/makiri/core/mkr_core.h +11 -2
- data/ext/makiri/core/mkr_hash.h +1 -1
- data/ext/makiri/core/mkr_span.h +186 -0
- data/ext/makiri/core/mkr_text.h +8 -8
- data/ext/makiri/core/mkr_utf8.c +101 -0
- data/ext/makiri/core/mkr_utf8.h +88 -0
- data/ext/makiri/extconf.rb +123 -10
- data/ext/makiri/fuzz/Makefile +95 -0
- data/ext/makiri/fuzz/check_fuzzer.cc +4 -0
- data/ext/makiri/fuzz/xml_fuzz.c +24 -0
- data/ext/makiri/fuzz/xpath_fuzz.c +109 -0
- data/ext/makiri/glue/glue.h +55 -11
- data/ext/makiri/glue/ruby_doc.c +129 -59
- data/ext/makiri/glue/ruby_html_css.c +292 -0
- data/ext/makiri/glue/{ruby_mutate.c → ruby_html_mutate.c} +248 -52
- data/ext/makiri/glue/ruby_html_node.c +859 -0
- data/ext/makiri/glue/ruby_html_serialize.c +154 -0
- data/ext/makiri/glue/ruby_node.c +74 -729
- data/ext/makiri/glue/ruby_node_set.c +167 -32
- data/ext/makiri/glue/ruby_xml.c +602 -0
- data/ext/makiri/glue/ruby_xml_node.c +1373 -0
- data/ext/makiri/glue/ruby_xpath.c +63 -30
- data/ext/makiri/glue/ruby_xpath.h +19 -0
- data/ext/makiri/lexbor_compat/compat.h +42 -9
- data/ext/makiri/lexbor_compat/compat_internal.h +1 -1
- data/ext/makiri/lexbor_compat/dom_index.c +2 -2
- data/ext/makiri/lexbor_compat/post_parse.c +100 -10
- data/ext/makiri/lexbor_compat/source_loc.c +15 -13
- data/ext/makiri/lexbor_compat/text_index.c +14 -8
- data/ext/makiri/lexbor_compat/utf8_input.c +19 -33
- data/ext/makiri/makiri.c +184 -6
- data/ext/makiri/makiri.h +43 -2
- data/ext/makiri/xml/mkr_xml.h +125 -0
- data/ext/makiri/xml/mkr_xml_chars.c +195 -0
- data/ext/makiri/xml/mkr_xml_index.c +169 -0
- data/ext/makiri/xml/mkr_xml_index.h +48 -0
- data/ext/makiri/xml/mkr_xml_mutate.c +817 -0
- data/ext/makiri/xml/mkr_xml_mutate.h +139 -0
- data/ext/makiri/xml/mkr_xml_node.c +399 -0
- data/ext/makiri/xml/mkr_xml_node.h +184 -0
- data/ext/makiri/xml/mkr_xml_tree.c +1515 -0
- data/ext/makiri/xpath/mkr_css.c +1023 -0
- data/ext/makiri/xpath/mkr_css.h +65 -0
- data/ext/makiri/xpath/mkr_xpath.c +96 -32
- data/ext/makiri/xpath/mkr_xpath.h +109 -4
- data/ext/makiri/xpath/mkr_xpath_engine_html.c +17 -0
- data/ext/makiri/xpath/mkr_xpath_engine_xml.c +12 -0
- data/ext/makiri/xpath/{mkr_xpath_eval.c → mkr_xpath_eval_body.h} +551 -241
- data/ext/makiri/xpath/{mkr_xpath_funcs.c → mkr_xpath_funcs_body.h} +318 -276
- data/ext/makiri/xpath/mkr_xpath_internal.h +177 -206
- data/ext/makiri/xpath/mkr_xpath_lex.c +95 -125
- data/ext/makiri/xpath/mkr_xpath_node_access_html.h +138 -0
- data/ext/makiri/xpath/mkr_xpath_node_access_xml.h +145 -0
- data/ext/makiri/xpath/mkr_xpath_number.c +109 -0
- data/ext/makiri/xpath/mkr_xpath_parse.c +83 -94
- data/ext/makiri/xpath/mkr_xpath_prelude_html.h +30 -0
- data/ext/makiri/xpath/mkr_xpath_prelude_xml.h +28 -0
- data/ext/makiri/xpath/mkr_xpath_shared.c +609 -0
- data/ext/makiri/xpath/mkr_xpath_value_body.h +801 -0
- data/ext/makiri/xpath/mkr_xpath_xml_selftest.c +76 -0
- data/lib/makiri/{attribute.rb → attr.rb} +7 -3
- data/lib/makiri/cdata_section.rb +19 -0
- data/lib/makiri/comment.rb +10 -0
- data/lib/makiri/compat_aliases.rb +30 -0
- data/lib/makiri/document.rb +9 -73
- data/lib/makiri/document_fragment.rb +14 -9
- data/lib/makiri/element.rb +4 -4
- data/lib/makiri/html/document.rb +106 -0
- data/lib/makiri/html/node_methods.rb +19 -0
- data/lib/makiri/html.rb +12 -0
- data/lib/makiri/node.rb +58 -15
- data/lib/makiri/node_set.rb +8 -0
- data/lib/makiri/processing_instruction.rb +10 -0
- data/lib/makiri/text.rb +1 -1
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/builder.rb +263 -0
- data/lib/makiri/xml/document.rb +24 -0
- data/lib/makiri/xml/node_methods.rb +84 -0
- data/lib/makiri/xml.rb +10 -0
- data/lib/makiri/xpath_context.rb +1 -1
- data/lib/makiri.rb +24 -5
- data/script/build_native_gem.rb +2 -2
- data/script/check_alloc_failures.rb +266 -0
- data/script/check_c_safety.rb +77 -2
- data/script/check_c_safety_allowlist.yml +102 -0
- data/script/check_leaks.rb +64 -0
- data/script/leaks_harness.rb +64 -0
- data/vendor/lexbor/CMakeLists.txt +6 -0
- data/vendor/lexbor/README.md +12 -0
- data/vendor/lexbor/config.cmake +1 -1
- data/vendor/lexbor/source/lexbor/core/base.h +1 -1
- data/vendor/lexbor/source/lexbor/core/config.cmake +9 -1
- data/vendor/lexbor/source/lexbor/css/selectors/pseudo_state.c +2 -3
- data/vendor/lexbor/source/lexbor/css/selectors/state.c +3 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.c +21 -0
- data/vendor/lexbor/source/lexbor/dom/interfaces/element.h +5 -0
- data/vendor/lexbor/source/lexbor/encoding/decode.c +33 -4
- data/vendor/lexbor/source/lexbor/html/base.h +1 -1
- data/vendor/lexbor/source/lexbor/html/interfaces/select_element.c +4 -0
- data/vendor/lexbor/source/lexbor/html/serialize.c +545 -41
- data/vendor/lexbor/source/lexbor/html/serialize.h +2 -1
- data/vendor/lexbor/source/lexbor/html/tokenizer.h +2 -2
- data/vendor/lexbor/source/lexbor/html/tree/insertion_mode/in_body.c +1 -1
- data/vendor/lexbor/source/lexbor/html/tree.c +6 -6
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +12 -3
- data/vendor/lexbor/source/lexbor/url/base.h +1 -1
- data/vendor/lexbor/source/lexbor/url/url.c +5 -2
- data/vendor/lexbor/source/lexbor/url/url.h +9 -0
- data/vendor/lexbor/version +1 -1
- metadata +53 -9
- data/ext/makiri/glue/ruby_css.c +0 -185
- data/ext/makiri/glue/ruby_serialize.c +0 -92
- data/ext/makiri/xpath/mkr_xpath_value.c +0 -1286
- data/lib/makiri/cdata.rb +0 -6
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
module XML
|
|
5
|
+
# A Nokogiri-compatible DSL for building an XML document (or subtree) from
|
|
6
|
+
# scratch. It is a thin, pure-Ruby layer over the public construction surface
|
|
7
|
+
# (+XML::Document.new+, +Document#create_element+ / +#create_text_node+ /
|
|
8
|
+
# +#create_cdata+ / +#create_comment+, and +Node#add_child+); no C code is
|
|
9
|
+
# involved.
|
|
10
|
+
#
|
|
11
|
+
# @example Block-with-argument form (recommended)
|
|
12
|
+
# builder = Makiri::XML::Builder.new do |xml|
|
|
13
|
+
# xml.feed("xmlns" => "urn:a") do
|
|
14
|
+
# xml.entry do
|
|
15
|
+
# xml.title("Hello")
|
|
16
|
+
# end
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
# builder.to_xml
|
|
20
|
+
#
|
|
21
|
+
# @example instance_eval form (no block argument)
|
|
22
|
+
# builder = Makiri::XML::Builder.new do
|
|
23
|
+
# root { child("text") }
|
|
24
|
+
# end
|
|
25
|
+
#
|
|
26
|
+
# An element is created by calling a method named after the tag. Trailing
|
|
27
|
+
# arguments follow the Nokogiri convention: a Hash sets attributes (including
|
|
28
|
+
# +xmlns+ / +xmlns:prefix+ namespace declarations), any other argument becomes
|
|
29
|
+
# the element's text content, and a block builds nested children.
|
|
30
|
+
#
|
|
31
|
+
# Tag names that collide with a Ruby/Kernel method (or with one of this
|
|
32
|
+
# builder's own helpers below - +text+, +cdata+, +comment+, +doc+, +parent+,
|
|
33
|
+
# +to_xml+, +to_s+) must be written with a trailing underscore, which is
|
|
34
|
+
# stripped: +xml.id_("9")+ produces +<id>9</id>+. This matches Nokogiri.
|
|
35
|
+
#
|
|
36
|
+
# A namespace prefix is selected for the next element with +[]+:
|
|
37
|
+
# +xml["dc"].title+ produces +<dc:title>+ (the prefix must be in scope, i.e.
|
|
38
|
+
# declared via an +"xmlns:dc"+ attribute on an ancestor or on the element
|
|
39
|
+
# itself, exactly as Makiri resolves prefixes at insertion time).
|
|
40
|
+
class Builder
|
|
41
|
+
# The document being built (a {Makiri::XML::Document}).
|
|
42
|
+
attr_reader :doc
|
|
43
|
+
|
|
44
|
+
# The node new children are currently appended to. While a nested block is
|
|
45
|
+
# running this is that block's element; otherwise it is {#doc}.
|
|
46
|
+
attr_reader :parent
|
|
47
|
+
|
|
48
|
+
# @param options [Hash] accepted for Nokogiri compatibility and ignored - a
|
|
49
|
+
# Makiri document has no configurable options (it is always UTF-8).
|
|
50
|
+
# @param root [Makiri::XML::Node, nil] when given, build into this node:
|
|
51
|
+
# top-level calls append to it and its document is used. (This is what
|
|
52
|
+
# {.with} passes; mirrors +Nokogiri::XML::Builder.new(options, root)+.)
|
|
53
|
+
# @yield [self] when the block takes an argument; otherwise the block is
|
|
54
|
+
# +instance_eval+'d against the builder.
|
|
55
|
+
def initialize(options = {}, root = nil, &block)
|
|
56
|
+
if root
|
|
57
|
+
@doc = root.document
|
|
58
|
+
@parent = root
|
|
59
|
+
else
|
|
60
|
+
@parent = @doc = Makiri::XML::Document.new
|
|
61
|
+
end
|
|
62
|
+
@ns_prefix = nil
|
|
63
|
+
@arity = nil
|
|
64
|
+
return unless block
|
|
65
|
+
|
|
66
|
+
run(&block)
|
|
67
|
+
@parent = @doc # like Nokogiri: after a build block, settle back at the document
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Build into an existing node: top-level calls append to +node+, using
|
|
71
|
+
# +node+'s document. Mirrors +Nokogiri::XML::Builder.with+.
|
|
72
|
+
#
|
|
73
|
+
# @param node [Makiri::XML::Node]
|
|
74
|
+
# @return [Builder]
|
|
75
|
+
def self.with(node, &block)
|
|
76
|
+
new({}, node, &block)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Append a text node to the current parent.
|
|
80
|
+
# @return [NodeBuilder]
|
|
81
|
+
def text(string)
|
|
82
|
+
insert(@doc.create_text_node(string.to_s))
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Append a CDATA section to the current parent.
|
|
86
|
+
# @return [NodeBuilder]
|
|
87
|
+
def cdata(string)
|
|
88
|
+
insert(@doc.create_cdata(string.to_s))
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Append a comment node to the current parent.
|
|
92
|
+
# @return [NodeBuilder]
|
|
93
|
+
def comment(string)
|
|
94
|
+
insert(@doc.create_comment(string.to_s))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Parse +string+ as an XML fragment (against the document's in-scope
|
|
98
|
+
# namespaces) and append its children to the current parent. The Builder
|
|
99
|
+
# analogue of +Nokogiri::XML::Builder#<<+.
|
|
100
|
+
# @return [self]
|
|
101
|
+
def <<(string)
|
|
102
|
+
@doc.fragment(string).children.to_a.each { |child| insert(child) }
|
|
103
|
+
self
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Select the namespace prefix for the next element (consumed by the next
|
|
107
|
+
# tag method). Returns self so it reads as +xml["dc"].title+.
|
|
108
|
+
# @return [self]
|
|
109
|
+
def [](ns_prefix)
|
|
110
|
+
@ns_prefix = ns_prefix.to_s
|
|
111
|
+
self
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Serialize the built document. Forwards to {Document#to_xml} (so +pretty:+
|
|
115
|
+
# works).
|
|
116
|
+
def to_xml(...)
|
|
117
|
+
@doc.to_xml(...)
|
|
118
|
+
end
|
|
119
|
+
alias_method :to_s, :to_xml
|
|
120
|
+
|
|
121
|
+
# Any other method name is a tag: create the element and insert it.
|
|
122
|
+
def method_missing(name, *args, &block)
|
|
123
|
+
tag = name.to_s.sub(/[_!]\z/, "")
|
|
124
|
+
prefix = @ns_prefix
|
|
125
|
+
if prefix
|
|
126
|
+
tag = "#{prefix}:#{tag}"
|
|
127
|
+
@ns_prefix = nil
|
|
128
|
+
end
|
|
129
|
+
node = create_element(tag, args)
|
|
130
|
+
check_prefix_defined!(node, prefix) if prefix
|
|
131
|
+
insert(node, &block)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Tag methods are open-ended, so report respond_to? truthfully for them
|
|
135
|
+
# (anything that is not already a real method is a candidate tag).
|
|
136
|
+
def respond_to_missing?(_name, _include_private = false)
|
|
137
|
+
true
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
private
|
|
141
|
+
|
|
142
|
+
# Translate the Nokogiri-style trailing arguments (a Hash is attributes,
|
|
143
|
+
# anything else is text content) into a {Document#create_element} call.
|
|
144
|
+
def create_element(tag, args)
|
|
145
|
+
text = nil
|
|
146
|
+
attributes = nil
|
|
147
|
+
args.each do |arg|
|
|
148
|
+
if arg.is_a?(Hash)
|
|
149
|
+
attributes = attributes ? attributes.merge(arg) : arg
|
|
150
|
+
else
|
|
151
|
+
text = arg
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
cargs = []
|
|
155
|
+
cargs << text.to_s unless text.nil?
|
|
156
|
+
cargs << attributes unless attributes.nil?
|
|
157
|
+
@doc.create_element(tag, *cargs)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Raise like Nokogiri when a prefix selected via +[]+ resolves nowhere: not
|
|
161
|
+
# in scope at the insertion point (+@parent+ and its ancestors) and not
|
|
162
|
+
# self-declared on +node+ itself (e.g. +xml["foo"].root("xmlns:foo" => ...)+).
|
|
163
|
+
def check_prefix_defined!(node, prefix)
|
|
164
|
+
return if @parent.respond_to?(:namespaces) && @parent.namespaces.key?("xmlns:#{prefix}")
|
|
165
|
+
return if node.namespace_definitions.any? { |ns| ns.prefix == prefix }
|
|
166
|
+
|
|
167
|
+
raise ArgumentError, "Namespace #{prefix} has not been defined"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Append +node+ to the current parent, then, if a block is given, descend
|
|
171
|
+
# into the inserted node for the duration of that block. Returns a
|
|
172
|
+
# {NodeBuilder} over the inserted node (which may be an imported copy, e.g.
|
|
173
|
+
# from a fragment), so the Nokogiri attribute-shortcut chain works.
|
|
174
|
+
def insert(node, &block)
|
|
175
|
+
node = @parent.add_child(node)
|
|
176
|
+
descend(node, &block) if block
|
|
177
|
+
NodeBuilder.new(node, self)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Run +block+ with +node+ as the current parent, restoring the previous
|
|
181
|
+
# parent afterward (even if the block raises) and returning the block's
|
|
182
|
+
# value. The single place the parent is pushed/popped - shared by #insert and
|
|
183
|
+
# NodeBuilder's nested-block chain, so neither manipulates the parent state
|
|
184
|
+
# directly.
|
|
185
|
+
def descend(node, &block)
|
|
186
|
+
previous = @parent
|
|
187
|
+
@parent = node
|
|
188
|
+
begin
|
|
189
|
+
run(&block)
|
|
190
|
+
ensure
|
|
191
|
+
@parent = previous
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Run a DSL block, choosing instance_eval vs yield once (from the first
|
|
196
|
+
# block seen), the same way Nokogiri does, so the form is consistent
|
|
197
|
+
# throughout a build.
|
|
198
|
+
def run(&block)
|
|
199
|
+
@arity = block.arity if @arity.nil?
|
|
200
|
+
if @arity <= 0
|
|
201
|
+
instance_eval(&block)
|
|
202
|
+
else
|
|
203
|
+
block.call(self)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# The value returned by each element call, wrapping the just-inserted node
|
|
208
|
+
# so attributes can be added with the Nokogiri terse-chain syntax:
|
|
209
|
+
#
|
|
210
|
+
# xml.object.classy.thing! # => <object class="classy" id="thing"/>
|
|
211
|
+
#
|
|
212
|
+
# A plain method name appends to the +class+ attribute, +name!+ sets +id+
|
|
213
|
+
# (and content if given), +name=+ sets that attribute, a trailing Hash adds
|
|
214
|
+
# each key as an attribute, and a block descends into the node. +[]+ / +[]=+
|
|
215
|
+
# read and write attributes directly. Semantics mirror
|
|
216
|
+
# +Nokogiri::XML::Builder::NodeBuilder+.
|
|
217
|
+
class NodeBuilder
|
|
218
|
+
def initialize(node, doc_builder)
|
|
219
|
+
@node = node
|
|
220
|
+
@doc_builder = doc_builder
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Read an attribute of the wrapped node.
|
|
224
|
+
def [](key)
|
|
225
|
+
@node[key]
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Write an attribute of the wrapped node.
|
|
229
|
+
def []=(key, value)
|
|
230
|
+
@node[key] = value
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def method_missing(method, *args, &block)
|
|
234
|
+
opts = args.last.is_a?(Hash) ? args.pop : {}
|
|
235
|
+
case method.to_s
|
|
236
|
+
when /\A(.*)!\z/
|
|
237
|
+
@node["id"] = Regexp.last_match(1)
|
|
238
|
+
@node.content = args.first if args.first
|
|
239
|
+
when /\A(.*)=\z/
|
|
240
|
+
@node[Regexp.last_match(1)] = args.first
|
|
241
|
+
else
|
|
242
|
+
@node["class"] = ((@node["class"] || "").split(/\s/) + [method.to_s]).join(" ")
|
|
243
|
+
@node.content = args.first if args.first
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
opts.each do |key, value|
|
|
247
|
+
@node[key.to_s] = ((@node[key.to_s] || "").split(/\s/) + [value]).join(" ")
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Descend into this node for a nested block via the builder's own parent
|
|
251
|
+
# stack (with its ensure-based restore), rather than re-rooting it by hand.
|
|
252
|
+
return @doc_builder.send(:descend, @node, &block) if block
|
|
253
|
+
|
|
254
|
+
self
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def respond_to_missing?(_name, _include_private = false)
|
|
258
|
+
true
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
module XML
|
|
5
|
+
# XML-specific document conveniences. The XML node leaves and the document
|
|
6
|
+
# itself are defined in C (ext/makiri/glue/ruby_xml*.c); construction sugar
|
|
7
|
+
# that is pure composition over the public surface lives here, not on the
|
|
8
|
+
# abstract Makiri::Document (which carries no construction).
|
|
9
|
+
class Document
|
|
10
|
+
# Set (or replace) the document's root element: with an existing root it
|
|
11
|
+
# replaces that root, otherwise it appends one (subject to the single-root
|
|
12
|
+
# rule). Pure composition over {Node#replace} / {Node#add_child};
|
|
13
|
+
# Nokogiri-compatible. XML only - an HTML5 document has a fixed
|
|
14
|
+
# html/head/body structure, so a free-form root is not meaningful there.
|
|
15
|
+
#
|
|
16
|
+
# @param node [Makiri::XML::Element]
|
|
17
|
+
# @return [Makiri::XML::Element] the node
|
|
18
|
+
def root=(node)
|
|
19
|
+
r = root
|
|
20
|
+
r ? r.replace(node) : add_child(node)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
module XML
|
|
5
|
+
# Ruby additions over the C-defined XML node readers, mirroring
|
|
6
|
+
# Makiri::HTML::NodeMethods so the XML node surface matches the HTML one for
|
|
7
|
+
# the methods consumers (e.g. Dommy) rely on. Each is guarded with
|
|
8
|
+
# `method_defined?` so a future native implementation on this module takes
|
|
9
|
+
# precedence rather than being shadowed.
|
|
10
|
+
module NodeMethods
|
|
11
|
+
# Element ancestors, nearest first, excluding self (element nodes only) —
|
|
12
|
+
# matching Makiri::HTML's #ancestors.
|
|
13
|
+
unless method_defined?(:ancestors)
|
|
14
|
+
def ancestors
|
|
15
|
+
out = []
|
|
16
|
+
node = parent
|
|
17
|
+
while node
|
|
18
|
+
out << node if node.node_type == 1
|
|
19
|
+
node = node.respond_to?(:parent) ? node.parent : nil
|
|
20
|
+
end
|
|
21
|
+
out
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Whether an attribute with the given qualified name is present
|
|
26
|
+
# (case-sensitive, per XML).
|
|
27
|
+
unless method_defined?(:key?)
|
|
28
|
+
def key?(name)
|
|
29
|
+
wanted = name.to_s
|
|
30
|
+
attribute_nodes.any? { |attr| attr.name == wanted }
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
alias_method :has_attribute?, :key? unless method_defined?(:has_attribute?)
|
|
35
|
+
|
|
36
|
+
# CSS selector queries over XML, lowered to the native XPath engine (so
|
|
37
|
+
# matching is case-sensitive and namespace-aware, unlike a Lexbor HTML
|
|
38
|
+
# matcher). Nokogiri-compatible namespaces: the document's in-scope
|
|
39
|
+
# declarations are collected automatically (a bare type selector binds to
|
|
40
|
+
# the default namespace), and an optional +ns+ hash of {prefix => uri}
|
|
41
|
+
# supplements/overrides them.
|
|
42
|
+
#
|
|
43
|
+
# doc.css("entry") # default-namespace bound (Atom/RSS just work)
|
|
44
|
+
# doc.css("a|entry", "a" => uri) # explicit prefix
|
|
45
|
+
#
|
|
46
|
+
# @return [Makiri::NodeSet]
|
|
47
|
+
def css(selector, ns = nil)
|
|
48
|
+
_css(selector.to_s, _css_namespaces(ns))
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# First descendant matching +selector+, or nil. @return [Makiri::Node, nil]
|
|
52
|
+
def at_css(selector, ns = nil)
|
|
53
|
+
_at_css(selector.to_s, _css_namespaces(ns))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Whether this node matches +selector+ (full selector, combinators included).
|
|
57
|
+
# @return [Boolean]
|
|
58
|
+
def matches?(selector, ns = nil)
|
|
59
|
+
_css_matches(selector.to_s, _css_namespaces(ns))
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# Build the {prefix => uri} hash the C primitives register. Matching
|
|
65
|
+
# Nokogiri: with NO explicit namespaces the document's own declarations are
|
|
66
|
+
# collected (the default namespace under the synthetic prefix "xmlns", so a
|
|
67
|
+
# bare type selector binds to it - the RSS/Atom common case); but once the
|
|
68
|
+
# caller passes a namespaces hash, ONLY those prefixes are used and a bare
|
|
69
|
+
# selector resolves to no namespace (Nokogiri disables the default binding
|
|
70
|
+
# the moment an explicit map is given). Reading only the root's
|
|
71
|
+
# declarations is O(root attributes), not the whole-document walk.
|
|
72
|
+
def _css_namespaces(user)
|
|
73
|
+
return user.transform_keys(&:to_s).transform_values(&:to_s) if user && !user.empty?
|
|
74
|
+
|
|
75
|
+
reg = {}
|
|
76
|
+
root = document&.root
|
|
77
|
+
root&.namespace_definitions&.each do |ns|
|
|
78
|
+
reg[ns.prefix.nil? ? "xmlns" : ns.prefix] = ns.href
|
|
79
|
+
end
|
|
80
|
+
reg
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
data/lib/makiri/xml.rb
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
# XML-specific node leaves and document conveniences (§12), mirroring
|
|
5
|
+
# Makiri::HTML. The XML nodes and the document are defined in C
|
|
6
|
+
# (ext/makiri/glue/ruby_xml*.c); the per-class Ruby additions live in this
|
|
7
|
+
# namespace's files (xml/document.rb).
|
|
8
|
+
module XML
|
|
9
|
+
end
|
|
10
|
+
end
|
data/lib/makiri/xpath_context.rb
CHANGED
|
@@ -16,7 +16,7 @@ module Makiri
|
|
|
16
16
|
class XPathContext
|
|
17
17
|
# +#evaluate+ is defined in C. It evaluates under the GVL (XPath never
|
|
18
18
|
# releases it), so concurrent +evaluate+ / +register_*+ / +node=+ on the
|
|
19
|
-
# same context
|
|
19
|
+
# same context - and any tree mutation of the document being queried - are
|
|
20
20
|
# serialised by the GVL and cannot corrupt memory.
|
|
21
21
|
|
|
22
22
|
# Nokogiri-compatible name for {#register_namespace}.
|
data/lib/makiri.rb
CHANGED
|
@@ -15,11 +15,18 @@ end
|
|
|
15
15
|
|
|
16
16
|
require_relative "makiri/node"
|
|
17
17
|
require_relative "makiri/document"
|
|
18
|
+
require_relative "makiri/html"
|
|
19
|
+
require_relative "makiri/html/node_methods"
|
|
20
|
+
require_relative "makiri/html/document"
|
|
21
|
+
require_relative "makiri/xml"
|
|
22
|
+
require_relative "makiri/xml/node_methods"
|
|
23
|
+
require_relative "makiri/xml/document"
|
|
24
|
+
require_relative "makiri/xml/builder"
|
|
18
25
|
require_relative "makiri/element"
|
|
19
|
-
require_relative "makiri/
|
|
26
|
+
require_relative "makiri/attr"
|
|
20
27
|
require_relative "makiri/text"
|
|
21
28
|
require_relative "makiri/comment"
|
|
22
|
-
require_relative "makiri/
|
|
29
|
+
require_relative "makiri/cdata_section"
|
|
23
30
|
require_relative "makiri/processing_instruction"
|
|
24
31
|
require_relative "makiri/document_type"
|
|
25
32
|
require_relative "makiri/document_fragment"
|
|
@@ -27,6 +34,7 @@ require_relative "makiri/node_set"
|
|
|
27
34
|
require_relative "makiri/xpath_context"
|
|
28
35
|
require_relative "makiri/xpath"
|
|
29
36
|
require_relative "makiri/css"
|
|
37
|
+
require_relative "makiri/compat_aliases"
|
|
30
38
|
|
|
31
39
|
module Makiri
|
|
32
40
|
# Base exception class for Makiri-specific errors.
|
|
@@ -35,13 +43,24 @@ module Makiri
|
|
|
35
43
|
# Convenience constructor mirroring Nokogiri.
|
|
36
44
|
#
|
|
37
45
|
# @param source [String] HTML source (UTF-8).
|
|
38
|
-
# @return [Makiri::Document]
|
|
46
|
+
# @return [Makiri::HTML::Document]
|
|
39
47
|
def self.HTML(source) # rubocop:disable Naming/MethodName
|
|
40
|
-
Document.parse(source)
|
|
48
|
+
HTML::Document.parse(source)
|
|
41
49
|
end
|
|
42
50
|
|
|
43
51
|
# Alias for {.HTML}.
|
|
44
52
|
def self.parse(source)
|
|
45
|
-
Document.parse(source)
|
|
53
|
+
HTML::Document.parse(source)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Convenience XML constructor mirroring Nokogiri::XML(source). A method named
|
|
57
|
+
# XML on the Makiri module, coexisting with the Makiri::XML constant (the
|
|
58
|
+
# module), as Nokogiri::XML does. Delegates to {Makiri::XML::Document.parse},
|
|
59
|
+
# exactly as {.HTML} delegates to {Makiri::HTML::Document.parse}.
|
|
60
|
+
#
|
|
61
|
+
# @param source [String, #read] XML source (its String encoding is honoured).
|
|
62
|
+
# @return [Makiri::XML::Document]
|
|
63
|
+
def self.XML(source, **opts) # rubocop:disable Naming/MethodName
|
|
64
|
+
XML::Document.parse(source, **opts)
|
|
46
65
|
end
|
|
47
66
|
end
|
data/script/build_native_gem.rb
CHANGED
|
@@ -24,7 +24,7 @@ root = File.expand_path("..", __dir__)
|
|
|
24
24
|
spec = Gem::Specification.load(File.join(root, "makiri.gemspec"))
|
|
25
25
|
|
|
26
26
|
libs = Dir[File.join(root, "lib", "makiri", "*", "makiri.{so,bundle}")].sort
|
|
27
|
-
abort "no precompiled libraries found under lib/makiri/*/
|
|
27
|
+
abort "no precompiled libraries found under lib/makiri/*/ - stage them first" if libs.empty?
|
|
28
28
|
|
|
29
29
|
# Native gem: ship binaries, not the C sources or the vendored Lexbor tree, and
|
|
30
30
|
# declare no extension so install never tries to compile.
|
|
@@ -34,7 +34,7 @@ spec.files = spec.files.reject { |f| f.start_with?("ext/", "vendor/") }
|
|
|
34
34
|
spec.files += libs.map { |p| p.sub("#{root}/", "") }
|
|
35
35
|
spec.files.uniq!
|
|
36
36
|
|
|
37
|
-
# Bound the Ruby versions this binary gem serves
|
|
37
|
+
# Bound the Ruby versions this binary gem serves - one subdir per ABI minor.
|
|
38
38
|
abis = libs.map { |p| File.basename(File.dirname(p)) }.sort_by { |v| v.split(".").map(&:to_i) }
|
|
39
39
|
lo_major, lo_minor = abis.first.split(".").map(&:to_i)
|
|
40
40
|
hi_major, hi_minor = abis.last.split(".").map(&:to_i)
|