makiri 0.2.0-aarch64-linux → 0.3.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +12 -7
- data/CHANGELOG.md +93 -14
- data/README.md +173 -7
- data/Rakefile +103 -7
- data/lib/makiri/3.2/makiri.so +0 -0
- data/lib/makiri/3.3/makiri.so +0 -0
- data/lib/makiri/3.4/makiri.so +0 -0
- data/lib/makiri/4.0/makiri.so +0 -0
- data/lib/makiri/{attribute.rb → attr.rb} +7 -3
- data/lib/makiri/cdata_section.rb +21 -0
- data/lib/makiri/comment.rb +12 -0
- data/lib/makiri/compat_aliases.rb +30 -0
- data/lib/makiri/document.rb +4 -76
- data/lib/makiri/document_fragment.rb +14 -9
- data/lib/makiri/element.rb +5 -3
- data/lib/makiri/html/document.rb +106 -0
- data/lib/makiri/html/node_methods.rb +19 -0
- data/lib/makiri/html.rb +12 -0
- data/lib/makiri/node.rb +58 -15
- data/lib/makiri/node_set.rb +8 -0
- data/lib/makiri/processing_instruction.rb +12 -0
- data/lib/makiri/text.rb +2 -0
- data/lib/makiri/version.rb +1 -1
- data/lib/makiri/xml/document.rb +24 -0
- data/lib/makiri/xml/node_methods.rb +37 -0
- data/lib/makiri/xml.rb +10 -0
- data/lib/makiri/xpath_context.rb +1 -1
- data/lib/makiri.rb +23 -5
- data/script/build_native_gem.rb +2 -2
- data/script/check_c_safety.rb +32 -0
- data/script/check_c_safety_allowlist.yml +83 -0
- metadata +10 -3
- data/lib/makiri/cdata.rb +0 -6
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
# A CDATA section. The canonical name is the WHATWG DOM interface name
|
|
5
|
+
# CDATASection; Makiri::CDATA is a Nokogiri-compatible alias (see
|
|
6
|
+
# compat_aliases.rb).
|
|
7
|
+
class CDATASection < Node
|
|
8
|
+
# Create a detached CDATA section owned by +document+ (Nokogiri-style,
|
|
9
|
+
# document first). Delegates to {Document#create_cdata} - so XML only; HTML
|
|
10
|
+
# has no CDATA construction.
|
|
11
|
+
#
|
|
12
|
+
# @param document [Makiri::Document]
|
|
13
|
+
# @param content [String]
|
|
14
|
+
# @return [Makiri::CDATASection]
|
|
15
|
+
def self.new(document, content)
|
|
16
|
+
raise TypeError, "expected a Makiri::Document" unless document.is_a?(Makiri::Document)
|
|
17
|
+
|
|
18
|
+
document.create_cdata(content)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
data/lib/makiri/comment.rb
CHANGED
|
@@ -2,5 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
module Makiri
|
|
4
4
|
class Comment < Node
|
|
5
|
+
# Create a detached comment owned by +document+ (Nokogiri-style constructor;
|
|
6
|
+
# the document comes FIRST for Comment / CDATASection / ProcessingInstruction, unlike
|
|
7
|
+
# Element / Text). Delegates to {Document#create_comment}.
|
|
8
|
+
#
|
|
9
|
+
# @param document [Makiri::Document]
|
|
10
|
+
# @param content [String]
|
|
11
|
+
# @return [Makiri::Comment]
|
|
12
|
+
def self.new(document, content)
|
|
13
|
+
raise TypeError, "expected a Makiri::Document" unless document.is_a?(Makiri::Document)
|
|
14
|
+
|
|
15
|
+
document.create_comment(content)
|
|
16
|
+
end
|
|
5
17
|
end
|
|
6
18
|
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
# Nokogiri-compatible class-name aliases.
|
|
5
|
+
#
|
|
6
|
+
# Makiri's canonical node-class names are the WHATWG DOM interface names
|
|
7
|
+
# (Element, Attr, Text, Comment, CDATASection, ProcessingInstruction,
|
|
8
|
+
# DocumentType, Document, DocumentFragment). Two of those differ from the
|
|
9
|
+
# libxml2/Nokogiri spelling; we expose the Nokogiri names as aliases so a
|
|
10
|
+
# snippet ported from Nokogiri (or an is_a?/case check) resolves unchanged:
|
|
11
|
+
#
|
|
12
|
+
# CDATASection <- CDATA (Nokogiri::XML::CDATA)
|
|
13
|
+
# DocumentType <- DTD (Nokogiri::XML::DTD)
|
|
14
|
+
#
|
|
15
|
+
# An alias is the same class object, so #is_a? works under either name; only
|
|
16
|
+
# #class.name (and inspect) report the canonical DOM name. Defined at all three
|
|
17
|
+
# levels (the abstract base and the HTML::/XML:: leaves).
|
|
18
|
+
CDATA = CDATASection
|
|
19
|
+
DTD = DocumentType
|
|
20
|
+
|
|
21
|
+
module HTML
|
|
22
|
+
CDATA = CDATASection
|
|
23
|
+
DTD = DocumentType
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
module XML
|
|
27
|
+
CDATA = CDATASection
|
|
28
|
+
DTD = DocumentType
|
|
29
|
+
end
|
|
30
|
+
end
|
data/lib/makiri/document.rb
CHANGED
|
@@ -1,82 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Makiri
|
|
4
|
-
#
|
|
4
|
+
# Abstract base for a parsed document (§12). Concrete documents are the
|
|
5
|
+
# per-kind leaves: {Makiri::HTML::Document} (HTML5) and
|
|
6
|
+
# {Makiri::XML::Document} (XML). `is_a?(Makiri::Document)` is true for both.
|
|
7
|
+
# Construction and the HTML-only conveniences live on the leaves, not here.
|
|
5
8
|
class Document < Node
|
|
6
|
-
# Parse +source+ as HTML5 and return a Document.
|
|
7
|
-
#
|
|
8
|
-
# +source+ may be a String or any object responding to +#read+ (e.g. an
|
|
9
|
-
# IO). The native parser (#_parse) expects UTF-8 bytes. Source locations
|
|
10
|
-
# for {Node#line} are always tracked (the cost is negligible).
|
|
11
|
-
#
|
|
12
|
-
# @param source [String, #read]
|
|
13
|
-
# @return [Makiri::Document]
|
|
14
|
-
def self.parse(source)
|
|
15
|
-
source = source.read if source.respond_to?(:read)
|
|
16
|
-
_parse(String(source))
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
# The document's <body> element, or nil.
|
|
20
|
-
# @return [Makiri::Element, nil]
|
|
21
|
-
def body
|
|
22
|
-
at_css("body")
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# The document's <head> element, or nil.
|
|
26
|
-
# @return [Makiri::Element, nil]
|
|
27
|
-
def head
|
|
28
|
-
at_css("head")
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Set the document title, creating <title> (in <head>) if absent.
|
|
32
|
-
# @param text [String]
|
|
33
|
-
# @return [String]
|
|
34
|
-
def title=(text)
|
|
35
|
-
t = at_css("title")
|
|
36
|
-
unless t
|
|
37
|
-
t = Element.new("title", self)
|
|
38
|
-
(head || root).add_child(t)
|
|
39
|
-
end
|
|
40
|
-
t.content = text
|
|
41
|
-
text
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Makiri parses and stores everything as UTF-8 (callers decode bytes before
|
|
45
|
-
# parsing), so the in-memory encoding is always UTF-8.
|
|
46
|
-
# @return [String]
|
|
47
|
-
def encoding
|
|
48
|
-
"UTF-8"
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# The charset declared in the document's markup, or nil. Reads
|
|
52
|
-
# <meta charset> first, then <meta http-equiv="Content-Type">.
|
|
53
|
-
# @return [String, nil]
|
|
54
|
-
def meta_encoding
|
|
55
|
-
if (m = at_css("meta[charset]"))
|
|
56
|
-
return m["charset"]
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
css("meta").each do |meta|
|
|
60
|
-
http_equiv = meta["http-equiv"]
|
|
61
|
-
next unless http_equiv&.downcase == "content-type"
|
|
62
|
-
|
|
63
|
-
content = meta["content"].to_s
|
|
64
|
-
return Regexp.last_match(1) if content =~ /charset\s*=\s*"?([^\s;"]+)/i
|
|
65
|
-
end
|
|
66
|
-
nil
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# Set (or insert) a <meta charset> declaration.
|
|
70
|
-
# @param value [String]
|
|
71
|
-
# @return [String]
|
|
72
|
-
def meta_encoding=(value)
|
|
73
|
-
meta = at_css("meta[charset]")
|
|
74
|
-
unless meta
|
|
75
|
-
meta = Element.new("meta", self)
|
|
76
|
-
(head || root).add_child(meta)
|
|
77
|
-
end
|
|
78
|
-
meta["charset"] = value
|
|
79
|
-
value
|
|
80
|
-
end
|
|
81
9
|
end
|
|
82
10
|
end
|
|
@@ -6,16 +6,21 @@ module Makiri
|
|
|
6
6
|
# its nodes can be spliced in with {Node#add_child} and friends). Inserting a
|
|
7
7
|
# fragment contributes its children, not the fragment node itself.
|
|
8
8
|
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
# (the fragment-parsing algorithm is context-sensitive). It may be:
|
|
12
|
-
# * a tag-name String (HTML namespace), e.g. <tt>context: "tr"</tt>; the
|
|
13
|
-
# bare strings <tt>"svg"</tt> / <tt>"math"</tt> name the foreign roots;
|
|
14
|
-
# * a {Makiri::Node} element — its tag and namespace are used (the way to
|
|
15
|
-
# reach a foreign non-root context such as an SVG <desc>).
|
|
16
|
-
# The default context is <tt><body></tt>. See also {Makiri::Node#parse}.
|
|
9
|
+
# The concrete classes are {Makiri::HTML::DocumentFragment} and
|
|
10
|
+
# {Makiri::XML::DocumentFragment}; their fragment-parsing context differs:
|
|
17
11
|
#
|
|
18
|
-
#
|
|
12
|
+
# * HTML is context-sensitive: <tt>.parse</tt> / <tt>Document#fragment</tt>
|
|
13
|
+
# accept a Nokogiri-compatible <tt>context:</tt> keyword naming the element
|
|
14
|
+
# the HTML is parsed inside of - a tag-name String (HTML namespace, e.g.
|
|
15
|
+
# <tt>context: "tr"</tt>; the bare strings <tt>"svg"</tt> / <tt>"math"</tt>
|
|
16
|
+
# name the foreign roots), or a {Makiri::Node} element whose tag and namespace
|
|
17
|
+
# are used. The default context is <tt><body></tt>. (Defined in C, ruby_html_*.c.)
|
|
18
|
+
# * XML is namespace-context-based (no <tt>context:</tt> keyword):
|
|
19
|
+
# {Makiri::XML::DocumentFragment.parse} is self-contained (a prefix must be
|
|
20
|
+
# declared within the fragment itself), while {Makiri::XML::Document#fragment}
|
|
21
|
+
# resolves names against the document's in-scope namespaces. (C: ruby_xml.c.)
|
|
22
|
+
#
|
|
23
|
+
# See also {Makiri::Node#parse}.
|
|
19
24
|
class DocumentFragment < Node
|
|
20
25
|
end
|
|
21
26
|
end
|
data/lib/makiri/element.rb
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Makiri
|
|
4
|
-
# An HTML
|
|
4
|
+
# An element node (HTML or XML). Attribute access lives in C.
|
|
5
5
|
class Element < Node
|
|
6
6
|
# Create a detached element named +name+ owned by +document+ (Nokogiri-style
|
|
7
|
-
# constructor; delegates to {Document#create_element}
|
|
8
|
-
# {Node#add_child} and friends.
|
|
7
|
+
# constructor; delegates to {Document#create_element}, so its representation
|
|
8
|
+
# follows the document). Attach it with {Node#add_child} and friends.
|
|
9
9
|
#
|
|
10
10
|
# @param name [String]
|
|
11
11
|
# @param document [Makiri::Document]
|
|
12
12
|
# @return [Makiri::Element]
|
|
13
13
|
def self.new(name, document)
|
|
14
|
+
raise TypeError, "expected a Makiri::Document" unless document.is_a?(Makiri::Document)
|
|
15
|
+
|
|
14
16
|
document.create_element(name)
|
|
15
17
|
end
|
|
16
18
|
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
module HTML
|
|
5
|
+
# Root container for a parsed HTML document. Construction, serialization and
|
|
6
|
+
# the HTML-only conveniences (body/head/title/encoding) live here, not on the
|
|
7
|
+
# abstract Makiri::Document.
|
|
8
|
+
class Document
|
|
9
|
+
# Parse +source+ as HTML5 and return a Makiri::HTML::Document.
|
|
10
|
+
#
|
|
11
|
+
# +source+ may be a String or any object responding to +#read+ (e.g. an
|
|
12
|
+
# IO). The native parser (#_parse) expects UTF-8 bytes. Source locations
|
|
13
|
+
# for {Node#line} are always tracked (the cost is negligible).
|
|
14
|
+
#
|
|
15
|
+
# @param source [String, #read]
|
|
16
|
+
# @return [Makiri::HTML::Document]
|
|
17
|
+
def self.parse(source)
|
|
18
|
+
source = source.read if source.respond_to?(:read)
|
|
19
|
+
_parse(String(source))
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# An independent copy of the whole document (like Nokogiri's Document#dup).
|
|
23
|
+
# Built by serialising and re-parsing, so the copy shares no nodes with the
|
|
24
|
+
# original - Node#dup's clone_node delegation is wrong for a document node,
|
|
25
|
+
# hence this override. (A DOM mutated into a shape the HTML parser would not
|
|
26
|
+
# itself produce, e.g. a foster-parented table cell, may be re-normalised on
|
|
27
|
+
# re-parse; a freshly parsed document round-trips unchanged.) Any level /
|
|
28
|
+
# freeze argument is ignored.
|
|
29
|
+
def dup(*)
|
|
30
|
+
Makiri.parse(to_html)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Like {#dup}: an independent copy of the document, honouring Ruby's
|
|
34
|
+
# +freeze:+ keyword (a frozen document's nodes raise +FrozenError+ on
|
|
35
|
+
# mutation).
|
|
36
|
+
def clone(freeze: nil)
|
|
37
|
+
copy = Makiri.parse(to_html)
|
|
38
|
+
copy.freeze if freeze || (freeze.nil? && frozen?)
|
|
39
|
+
copy
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# The document's <body> element, or nil.
|
|
43
|
+
# @return [Makiri::Element, nil]
|
|
44
|
+
def body
|
|
45
|
+
at_css("body")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# The document's <head> element, or nil.
|
|
49
|
+
# @return [Makiri::Element, nil]
|
|
50
|
+
def head
|
|
51
|
+
at_css("head")
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Set the document title, creating <title> (in <head>) if absent.
|
|
55
|
+
# @param text [String]
|
|
56
|
+
# @return [String]
|
|
57
|
+
def title=(text)
|
|
58
|
+
t = at_css("title")
|
|
59
|
+
unless t
|
|
60
|
+
t = Element.new("title", self)
|
|
61
|
+
(head || root).add_child(t)
|
|
62
|
+
end
|
|
63
|
+
t.content = text
|
|
64
|
+
text
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Makiri parses and stores everything as UTF-8 (callers decode bytes before
|
|
68
|
+
# parsing), so the in-memory encoding is always UTF-8.
|
|
69
|
+
# @return [String]
|
|
70
|
+
def encoding
|
|
71
|
+
"UTF-8"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# The charset declared in the document's markup, or nil. Reads
|
|
75
|
+
# <meta charset> first, then <meta http-equiv="Content-Type">.
|
|
76
|
+
# @return [String, nil]
|
|
77
|
+
def meta_encoding
|
|
78
|
+
if (m = at_css("meta[charset]"))
|
|
79
|
+
return m["charset"]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
css("meta").each do |meta|
|
|
83
|
+
http_equiv = meta["http-equiv"]
|
|
84
|
+
next unless http_equiv&.downcase == "content-type"
|
|
85
|
+
|
|
86
|
+
content = meta["content"].to_s
|
|
87
|
+
return Regexp.last_match(1) if content =~ /charset\s*=\s*"?([^\s;"]+)/i
|
|
88
|
+
end
|
|
89
|
+
nil
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Set (or insert) a <meta charset> declaration.
|
|
93
|
+
# @param value [String]
|
|
94
|
+
# @return [String]
|
|
95
|
+
def meta_encoding=(value)
|
|
96
|
+
meta = at_css("meta[charset]")
|
|
97
|
+
unless meta
|
|
98
|
+
meta = Element.new("meta", self)
|
|
99
|
+
(head || root).add_child(meta)
|
|
100
|
+
end
|
|
101
|
+
meta["charset"] = value
|
|
102
|
+
value
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
module HTML
|
|
5
|
+
# The lxb_dom reader/query methods are defined in C on this module and
|
|
6
|
+
# included into every HTML leaf (including the generic Makiri::HTML::Node).
|
|
7
|
+
# The Nokogiri-compatible aliases over those readers live here (not on
|
|
8
|
+
# Makiri::Node) so they resolve against the HTML readers at definition time.
|
|
9
|
+
module NodeMethods
|
|
10
|
+
alias_method :attr, :[]
|
|
11
|
+
alias_method :get_attribute, :[]
|
|
12
|
+
alias_method :has_attribute?, :key?
|
|
13
|
+
alias_method :remove_attribute, :delete
|
|
14
|
+
alias_method :node_name, :name
|
|
15
|
+
alias_method :node_name=, :name=
|
|
16
|
+
alias_method :type, :node_type
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/makiri/html.rb
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
# HTML-specific node leaves (§12). Every concrete HTML node is a Makiri::HTML::*
|
|
5
|
+
# class under the matching abstract base (so is_a?(Makiri::Element) etc. holds),
|
|
6
|
+
# carrying the lxb_dom-backed reader/query methods via the included
|
|
7
|
+
# Makiri::HTML::Node module. XML nodes never inherit these. The classes
|
|
8
|
+
# themselves are defined in C (ext/makiri/glue/ruby_html*.c); the per-class Ruby
|
|
9
|
+
# additions live in this namespace's files (html/node_methods.rb, html/document.rb).
|
|
10
|
+
module HTML
|
|
11
|
+
end
|
|
12
|
+
end
|
data/lib/makiri/node.rb
CHANGED
|
@@ -5,8 +5,29 @@ module Makiri
|
|
|
5
5
|
# The bulk of the API lives in the C extension; this file defines the
|
|
6
6
|
# Ruby-only conveniences.
|
|
7
7
|
class Node
|
|
8
|
+
# Order by document (pre-order) position via the native #<=>, so nodes can
|
|
9
|
+
# be sorted; #<=> is nil (incomparable) across documents or for attributes.
|
|
10
|
+
include Comparable
|
|
11
|
+
|
|
12
|
+
# A Node is Enumerable over its child nodes (#each yields each child), so
|
|
13
|
+
# node.map / node.select / node.find / node.to_a, etc. work - like Nokogiri.
|
|
14
|
+
# (#to_h is unaffected: Node defines its own, returning the attribute hash.)
|
|
15
|
+
include Enumerable
|
|
16
|
+
|
|
8
17
|
# Identity is by wrapped node pointer; defined in C.
|
|
9
18
|
|
|
19
|
+
# Yield each child node in document order. Iterates a snapshot of the
|
|
20
|
+
# children (taken when called), so the block may safely move or remove the
|
|
21
|
+
# current node. Returns an Enumerator when no block is given.
|
|
22
|
+
# @yieldparam child [Makiri::Node]
|
|
23
|
+
# @return [self, Enumerator]
|
|
24
|
+
def each(&block)
|
|
25
|
+
return enum_for(:each) { children.length } unless block_given?
|
|
26
|
+
|
|
27
|
+
children.each(&block)
|
|
28
|
+
self
|
|
29
|
+
end
|
|
30
|
+
|
|
10
31
|
# @return [Boolean]
|
|
11
32
|
def element?
|
|
12
33
|
is_a?(Element)
|
|
@@ -22,9 +43,14 @@ module Makiri
|
|
|
22
43
|
is_a?(Comment)
|
|
23
44
|
end
|
|
24
45
|
|
|
46
|
+
# @return [Boolean]
|
|
47
|
+
def cdata?
|
|
48
|
+
is_a?(CDATASection)
|
|
49
|
+
end
|
|
50
|
+
|
|
25
51
|
# @return [Boolean]
|
|
26
52
|
def attribute?
|
|
27
|
-
is_a?(
|
|
53
|
+
is_a?(Attr)
|
|
28
54
|
end
|
|
29
55
|
|
|
30
56
|
# @return [Boolean]
|
|
@@ -44,19 +70,16 @@ module Makiri
|
|
|
44
70
|
|
|
45
71
|
# @return [Boolean] true for a blank/whitespace-only text or CDATA node.
|
|
46
72
|
def blank?
|
|
47
|
-
(text? ||
|
|
73
|
+
(text? || cdata?) && content.strip.empty?
|
|
48
74
|
end
|
|
49
75
|
|
|
50
76
|
# --- Nokogiri-compatible aliases over the core API ---
|
|
51
|
-
|
|
52
|
-
#
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
alias_method :node_name, :name
|
|
58
|
-
alias_method :node_name=, :name=
|
|
59
|
-
alias_method :type, :node_type
|
|
77
|
+
#
|
|
78
|
+
# Aliases of the representation-specific reader methods (#[], #name, ...) live
|
|
79
|
+
# with those methods on the per-kind node behaviour (Makiri::HTML::Node), not
|
|
80
|
+
# here, since alias_method resolves its target at definition time and the
|
|
81
|
+
# readers are defined on the leaves' included module. These two alias
|
|
82
|
+
# representation-independent predicates defined just above, so they stay.
|
|
60
83
|
alias_method :elem?, :element?
|
|
61
84
|
alias_method :fragment?, :document_fragment?
|
|
62
85
|
|
|
@@ -65,8 +88,8 @@ module Makiri
|
|
|
65
88
|
self[name] = value
|
|
66
89
|
end
|
|
67
90
|
|
|
68
|
-
# The
|
|
69
|
-
# @return [Makiri::
|
|
91
|
+
# The Attr node named +name+, or nil (cf. {#[]}, which returns the value).
|
|
92
|
+
# @return [Makiri::Attr, nil]
|
|
70
93
|
def attribute(name)
|
|
71
94
|
attributes[name.to_s]
|
|
72
95
|
end
|
|
@@ -122,8 +145,8 @@ module Makiri
|
|
|
122
145
|
document.root
|
|
123
146
|
end
|
|
124
147
|
|
|
125
|
-
# Attributes as a name =>
|
|
126
|
-
# @return [Hash{String => Makiri::
|
|
148
|
+
# Attributes as a name => Attr Hash (empty for non-elements).
|
|
149
|
+
# @return [Hash{String => Makiri::Attr}]
|
|
127
150
|
def attributes
|
|
128
151
|
attribute_nodes.each_with_object({}) { |attr, h| h[attr.name] = attr }
|
|
129
152
|
end
|
|
@@ -172,6 +195,26 @@ module Makiri
|
|
|
172
195
|
"#<#{self.class.name}>"
|
|
173
196
|
end
|
|
174
197
|
|
|
198
|
+
# An independent copy of this node, detached from any parent and owned by the
|
|
199
|
+
# same document (like Nokogiri's Node#dup). Deep by default; +level+ 0 makes
|
|
200
|
+
# a shallow copy (matching Nokogiri's level argument). The native allocator
|
|
201
|
+
# is undef'd to keep wrappers memory-safe, so #dup/#clone delegate to
|
|
202
|
+
# {#clone_node} rather than Ruby's default allocate-and-copy (which would
|
|
203
|
+
# otherwise raise "allocator undefined").
|
|
204
|
+
def dup(level = 1)
|
|
205
|
+
clone_node(level != 0)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Like {#dup}, always a deep copy, and honouring Ruby's +freeze:+ keyword:
|
|
209
|
+
# +true+ returns a frozen copy, +false+ an unfrozen one, the default (+nil+)
|
|
210
|
+
# copies the receiver's frozen state. A frozen node is genuinely immutable -
|
|
211
|
+
# its mutators raise +FrozenError+ (see Makiri's mutation methods).
|
|
212
|
+
def clone(freeze: nil)
|
|
213
|
+
copy = clone_node(true)
|
|
214
|
+
copy.freeze if freeze || (freeze.nil? && frozen?)
|
|
215
|
+
copy
|
|
216
|
+
end
|
|
217
|
+
|
|
175
218
|
private
|
|
176
219
|
|
|
177
220
|
# Heuristic used by {#search}: does +path+ look like an XPath location path
|
data/lib/makiri/node_set.rb
CHANGED
|
@@ -98,6 +98,14 @@ module Makiri
|
|
|
98
98
|
end
|
|
99
99
|
alias unlink remove
|
|
100
100
|
|
|
101
|
+
# Like {#dup} (a new set over the same nodes), honouring Ruby's +freeze:+
|
|
102
|
+
# keyword. (#dup is the native copy.)
|
|
103
|
+
def clone(freeze: nil)
|
|
104
|
+
copy = dup
|
|
105
|
+
copy.freeze if freeze || (freeze.nil? && frozen?)
|
|
106
|
+
copy
|
|
107
|
+
end
|
|
108
|
+
|
|
101
109
|
def inspect
|
|
102
110
|
"#<#{self.class.name} length=#{length}>"
|
|
103
111
|
end
|
|
@@ -4,5 +4,17 @@ module Makiri
|
|
|
4
4
|
# An XML/HTML processing-instruction node. Rare in HTML5 (the parser usually
|
|
5
5
|
# treats "<?...>" as a bogus comment), present mainly for completeness.
|
|
6
6
|
class ProcessingInstruction < Node
|
|
7
|
+
# Create a detached processing instruction owned by +document+ (Nokogiri-style,
|
|
8
|
+
# document first). Delegates to {Document#create_processing_instruction}.
|
|
9
|
+
#
|
|
10
|
+
# @param document [Makiri::Document]
|
|
11
|
+
# @param target [String]
|
|
12
|
+
# @param content [String]
|
|
13
|
+
# @return [Makiri::ProcessingInstruction]
|
|
14
|
+
def self.new(document, target, content)
|
|
15
|
+
raise TypeError, "expected a Makiri::Document" unless document.is_a?(Makiri::Document)
|
|
16
|
+
|
|
17
|
+
document.create_processing_instruction(target, content)
|
|
18
|
+
end
|
|
7
19
|
end
|
|
8
20
|
end
|
data/lib/makiri/text.rb
CHANGED
|
@@ -10,6 +10,8 @@ module Makiri
|
|
|
10
10
|
# @param document [Makiri::Document]
|
|
11
11
|
# @return [Makiri::Text]
|
|
12
12
|
def self.new(content, document)
|
|
13
|
+
raise TypeError, "expected a Makiri::Document" unless document.is_a?(Makiri::Document)
|
|
14
|
+
|
|
13
15
|
document.create_text_node(content)
|
|
14
16
|
end
|
|
15
17
|
end
|
data/lib/makiri/version.rb
CHANGED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
module XML
|
|
5
|
+
# XML-specific document conveniences. The XML node leaves and the document
|
|
6
|
+
# itself are defined in C (ext/makiri/glue/ruby_xml*.c); construction sugar
|
|
7
|
+
# that is pure composition over the public surface lives here, not on the
|
|
8
|
+
# abstract Makiri::Document (which carries no construction).
|
|
9
|
+
class Document
|
|
10
|
+
# Set (or replace) the document's root element: with an existing root it
|
|
11
|
+
# replaces that root, otherwise it appends one (subject to the single-root
|
|
12
|
+
# rule). Pure composition over {Node#replace} / {Node#add_child};
|
|
13
|
+
# Nokogiri-compatible. XML only - an HTML5 document has a fixed
|
|
14
|
+
# html/head/body structure, so a free-form root is not meaningful there.
|
|
15
|
+
#
|
|
16
|
+
# @param node [Makiri::XML::Element]
|
|
17
|
+
# @return [Makiri::XML::Element] the node
|
|
18
|
+
def root=(node)
|
|
19
|
+
r = root
|
|
20
|
+
r ? r.replace(node) : add_child(node)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
module XML
|
|
5
|
+
# Ruby additions over the C-defined XML node readers, mirroring
|
|
6
|
+
# Makiri::HTML::NodeMethods so the XML node surface matches the HTML one for
|
|
7
|
+
# the methods consumers (e.g. Dommy) rely on. Each is guarded with
|
|
8
|
+
# `method_defined?` so a future native implementation on this module takes
|
|
9
|
+
# precedence rather than being shadowed.
|
|
10
|
+
module NodeMethods
|
|
11
|
+
# Element ancestors, nearest first, excluding self (element nodes only) —
|
|
12
|
+
# matching Makiri::HTML's #ancestors.
|
|
13
|
+
unless method_defined?(:ancestors)
|
|
14
|
+
def ancestors
|
|
15
|
+
out = []
|
|
16
|
+
node = parent
|
|
17
|
+
while node
|
|
18
|
+
out << node if node.node_type == 1
|
|
19
|
+
node = node.respond_to?(:parent) ? node.parent : nil
|
|
20
|
+
end
|
|
21
|
+
out
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Whether an attribute with the given qualified name is present
|
|
26
|
+
# (case-sensitive, per XML).
|
|
27
|
+
unless method_defined?(:key?)
|
|
28
|
+
def key?(name)
|
|
29
|
+
wanted = name.to_s
|
|
30
|
+
attribute_nodes.any? { |attr| attr.name == wanted }
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
alias_method :has_attribute?, :key? unless method_defined?(:has_attribute?)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
data/lib/makiri/xml.rb
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Makiri
|
|
4
|
+
# XML-specific node leaves and document conveniences (§12), mirroring
|
|
5
|
+
# Makiri::HTML. The XML nodes and the document are defined in C
|
|
6
|
+
# (ext/makiri/glue/ruby_xml*.c); the per-class Ruby additions live in this
|
|
7
|
+
# namespace's files (xml/document.rb).
|
|
8
|
+
module XML
|
|
9
|
+
end
|
|
10
|
+
end
|
data/lib/makiri/xpath_context.rb
CHANGED
|
@@ -16,7 +16,7 @@ module Makiri
|
|
|
16
16
|
class XPathContext
|
|
17
17
|
# +#evaluate+ is defined in C. It evaluates under the GVL (XPath never
|
|
18
18
|
# releases it), so concurrent +evaluate+ / +register_*+ / +node=+ on the
|
|
19
|
-
# same context
|
|
19
|
+
# same context - and any tree mutation of the document being queried - are
|
|
20
20
|
# serialised by the GVL and cannot corrupt memory.
|
|
21
21
|
|
|
22
22
|
# Nokogiri-compatible name for {#register_namespace}.
|
data/lib/makiri.rb
CHANGED
|
@@ -15,11 +15,17 @@ end
|
|
|
15
15
|
|
|
16
16
|
require_relative "makiri/node"
|
|
17
17
|
require_relative "makiri/document"
|
|
18
|
+
require_relative "makiri/html"
|
|
19
|
+
require_relative "makiri/html/node_methods"
|
|
20
|
+
require_relative "makiri/html/document"
|
|
21
|
+
require_relative "makiri/xml"
|
|
22
|
+
require_relative "makiri/xml/node_methods"
|
|
23
|
+
require_relative "makiri/xml/document"
|
|
18
24
|
require_relative "makiri/element"
|
|
19
|
-
require_relative "makiri/
|
|
25
|
+
require_relative "makiri/attr"
|
|
20
26
|
require_relative "makiri/text"
|
|
21
27
|
require_relative "makiri/comment"
|
|
22
|
-
require_relative "makiri/
|
|
28
|
+
require_relative "makiri/cdata_section"
|
|
23
29
|
require_relative "makiri/processing_instruction"
|
|
24
30
|
require_relative "makiri/document_type"
|
|
25
31
|
require_relative "makiri/document_fragment"
|
|
@@ -27,6 +33,7 @@ require_relative "makiri/node_set"
|
|
|
27
33
|
require_relative "makiri/xpath_context"
|
|
28
34
|
require_relative "makiri/xpath"
|
|
29
35
|
require_relative "makiri/css"
|
|
36
|
+
require_relative "makiri/compat_aliases"
|
|
30
37
|
|
|
31
38
|
module Makiri
|
|
32
39
|
# Base exception class for Makiri-specific errors.
|
|
@@ -35,13 +42,24 @@ module Makiri
|
|
|
35
42
|
# Convenience constructor mirroring Nokogiri.
|
|
36
43
|
#
|
|
37
44
|
# @param source [String] HTML source (UTF-8).
|
|
38
|
-
# @return [Makiri::Document]
|
|
45
|
+
# @return [Makiri::HTML::Document]
|
|
39
46
|
def self.HTML(source) # rubocop:disable Naming/MethodName
|
|
40
|
-
Document.parse(source)
|
|
47
|
+
HTML::Document.parse(source)
|
|
41
48
|
end
|
|
42
49
|
|
|
43
50
|
# Alias for {.HTML}.
|
|
44
51
|
def self.parse(source)
|
|
45
|
-
Document.parse(source)
|
|
52
|
+
HTML::Document.parse(source)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Convenience XML constructor mirroring Nokogiri::XML(source). A method named
|
|
56
|
+
# XML on the Makiri module, coexisting with the Makiri::XML constant (the
|
|
57
|
+
# module), as Nokogiri::XML does. Delegates to {Makiri::XML::Document.parse},
|
|
58
|
+
# exactly as {.HTML} delegates to {Makiri::HTML::Document.parse}.
|
|
59
|
+
#
|
|
60
|
+
# @param source [String, #read] XML source (its String encoding is honoured).
|
|
61
|
+
# @return [Makiri::XML::Document]
|
|
62
|
+
def self.XML(source, **opts) # rubocop:disable Naming/MethodName
|
|
63
|
+
XML::Document.parse(source, **opts)
|
|
46
64
|
end
|
|
47
65
|
end
|