nokolexbor 0.2.6 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/libxml/xpathInternals.h +4 -5
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +122 -26
- data/ext/nokolexbor/nokolexbor.c +10 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/ext/nokolexbor/xml_xpath.c +7 -0
- data/lib/nokolexbor/document.rb +96 -1
- data/lib/nokolexbor/node.rb +109 -1
- data/lib/nokolexbor/node_set.rb +23 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +5 -14
- data/patches/0002-lexbor-match-id-class-case-sensitive.patch +2 -2
- data/vendor/lexbor/source/lexbor/core/lexbor.h +8 -0
- data/vendor/lexbor/source/lexbor/ports/posix/lexbor/core/memory.c +24 -4
- data/vendor/lexbor/source/lexbor/ports/windows_nt/lexbor/core/memory.c +24 -4
- data/vendor/lexbor/source/lexbor/selectors/selectors.c +1 -2
- metadata +7 -5
- data/ext/nokolexbor/memory.c +0 -46
data/lib/nokolexbor/document.rb
CHANGED
@@ -1,6 +1,101 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
|
-
class Document < Node
|
4
|
+
class Document < Nokolexbor::Node
|
5
|
+
def create_element(name, *contents_or_attrs, &block)
|
6
|
+
elm = Nokolexbor::Element.new(name, self, &block)
|
7
|
+
contents_or_attrs.each do |arg|
|
8
|
+
case arg
|
9
|
+
when Hash
|
10
|
+
arg.each do |k, v|
|
11
|
+
elm[k.to_s] = v.to_s
|
12
|
+
end
|
13
|
+
else
|
14
|
+
elm.content = arg
|
15
|
+
end
|
16
|
+
end
|
17
|
+
elm
|
18
|
+
end
|
19
|
+
|
20
|
+
# Create a Text Node with +string+
|
21
|
+
def create_text_node(string, &block)
|
22
|
+
Nokolexbor::Text.new(string.to_s, self, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Create a CDATA Node containing +string+
|
26
|
+
def create_cdata(string, &block)
|
27
|
+
Nokolexbor::CDATA.new(string.to_s, self, &block)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Create a Comment Node containing +string+
|
31
|
+
def create_comment(string, &block)
|
32
|
+
Nokolexbor::Comment.new(string.to_s, self, &block)
|
33
|
+
end
|
34
|
+
|
35
|
+
# A reference to +self+
|
36
|
+
def document
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def meta_encoding
|
41
|
+
if (meta = at_css("meta[charset]"))
|
42
|
+
meta[:charset]
|
43
|
+
elsif (meta = meta_content_type)
|
44
|
+
meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def meta_encoding=(encoding)
|
49
|
+
if (meta = meta_content_type)
|
50
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
51
|
+
encoding
|
52
|
+
elsif (meta = at_css("meta[charset]"))
|
53
|
+
meta["charset"] = encoding
|
54
|
+
else
|
55
|
+
meta = Nokolexbor::Node.new("meta", self)
|
56
|
+
meta["charset"] = encoding
|
57
|
+
|
58
|
+
if (head = at_css("head"))
|
59
|
+
head.prepend_child(meta)
|
60
|
+
else
|
61
|
+
set_metadata_element(meta)
|
62
|
+
end
|
63
|
+
encoding
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def meta_content_type
|
68
|
+
xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
|
69
|
+
node["http-equiv"] =~ /\AContent-Type\z/i
|
70
|
+
end
|
71
|
+
end
|
72
|
+
private :meta_content_type
|
73
|
+
|
74
|
+
def set_metadata_element(element)
|
75
|
+
if (head = at_css("head"))
|
76
|
+
head << element
|
77
|
+
elsif (html = at_css("html"))
|
78
|
+
head = html.prepend_child(Nokolexbor::Node.new("head", self))
|
79
|
+
head.prepend_child(element)
|
80
|
+
elsif (first = children.find do |node|
|
81
|
+
case node
|
82
|
+
when Nokolexbor::Node
|
83
|
+
true
|
84
|
+
end
|
85
|
+
end)
|
86
|
+
# We reach here only if the underlying document model
|
87
|
+
# allows <html>/<head> elements to be omitted and does not
|
88
|
+
# automatically supply them.
|
89
|
+
first.add_previous_sibling(element)
|
90
|
+
else
|
91
|
+
html = add_child(Nokolexbor::Node.new("html", self))
|
92
|
+
head = html.add_child(Nokolexbor::Node.new("head", self))
|
93
|
+
head.prepend_child(element)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
|
5
100
|
end
|
6
101
|
end
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -45,6 +45,10 @@ module Nokolexbor
|
|
45
45
|
type == ELEMENT_NODE
|
46
46
|
end
|
47
47
|
|
48
|
+
def document?
|
49
|
+
is_a?(Nokolexbor::Document)
|
50
|
+
end
|
51
|
+
|
48
52
|
def ancestors(selector = nil)
|
49
53
|
return NodeSet.new(@document) unless respond_to?(:parent)
|
50
54
|
return NodeSet.new(@document) unless parent
|
@@ -87,6 +91,56 @@ module Nokolexbor
|
|
87
91
|
self
|
88
92
|
end
|
89
93
|
|
94
|
+
def add_previous_sibling(node_or_tags)
|
95
|
+
raise ArgumentError,
|
96
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
97
|
+
|
98
|
+
add_sibling(:previous, node_or_tags)
|
99
|
+
end
|
100
|
+
|
101
|
+
def add_next_sibling(node_or_tags)
|
102
|
+
raise ArgumentError,
|
103
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
104
|
+
|
105
|
+
add_sibling(:next, node_or_tags)
|
106
|
+
end
|
107
|
+
|
108
|
+
def before(node_or_tags)
|
109
|
+
add_previous_sibling(node_or_tags)
|
110
|
+
self
|
111
|
+
end
|
112
|
+
|
113
|
+
def after(node_or_tags)
|
114
|
+
add_next_sibling(node_or_tags)
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
118
|
+
alias_method :next_sibling, :next
|
119
|
+
alias_method :previous_sibling, :previous
|
120
|
+
alias_method :next=, :add_next_sibling
|
121
|
+
alias_method :previous=, :add_previous_sibling
|
122
|
+
|
123
|
+
def <<(node_or_tags)
|
124
|
+
add_child(node_or_tags)
|
125
|
+
self
|
126
|
+
end
|
127
|
+
|
128
|
+
def prepend_child(node)
|
129
|
+
if (first = children.first)
|
130
|
+
# Mimic the error add_child would raise.
|
131
|
+
raise "Document already has a root node" if document? && !(node.comment? || node.processing_instruction?)
|
132
|
+
|
133
|
+
first.add_sibling(:previous, node)
|
134
|
+
else
|
135
|
+
add_child(node)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def traverse(&block)
|
140
|
+
children.each { |j| j.traverse(&block) }
|
141
|
+
yield(self)
|
142
|
+
end
|
143
|
+
|
90
144
|
def matches?(selector)
|
91
145
|
ancestors.last.css(selector).any? { |node| node == self }
|
92
146
|
end
|
@@ -118,6 +172,10 @@ module Nokolexbor
|
|
118
172
|
end
|
119
173
|
end
|
120
174
|
|
175
|
+
def parent=(parent_node)
|
176
|
+
parent_node.add_child(self)
|
177
|
+
end
|
178
|
+
|
121
179
|
def each
|
122
180
|
attributes.each do |name, node|
|
123
181
|
yield [name, node.value]
|
@@ -134,6 +192,16 @@ module Nokolexbor
|
|
134
192
|
at_css_impl(args.join(', '))
|
135
193
|
end
|
136
194
|
|
195
|
+
def nokogiri_css(*args)
|
196
|
+
rules, handler, ns, _ = extract_params(args)
|
197
|
+
|
198
|
+
nokogiri_css_internal(self, rules, handler, ns)
|
199
|
+
end
|
200
|
+
|
201
|
+
def nokogiri_at_css(*args)
|
202
|
+
nokogiri_css(*args).first
|
203
|
+
end
|
204
|
+
|
137
205
|
def xpath(*args)
|
138
206
|
paths, handler, ns, binds = extract_params(args)
|
139
207
|
|
@@ -233,8 +301,18 @@ module Nokolexbor
|
|
233
301
|
end
|
234
302
|
end
|
235
303
|
|
304
|
+
def write_to(io, *options)
|
305
|
+
io.write(to_html(*options))
|
306
|
+
end
|
307
|
+
|
308
|
+
alias_method :write_html_to, :write_to
|
309
|
+
|
236
310
|
private
|
237
311
|
|
312
|
+
def nokogiri_css_internal(node, rules, handler, ns)
|
313
|
+
xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
|
314
|
+
end
|
315
|
+
|
238
316
|
def xpath_internal(node, paths, handler, ns, binds)
|
239
317
|
# document = node.document
|
240
318
|
# return NodeSet.new(document) unless document
|
@@ -262,6 +340,34 @@ module Nokolexbor
|
|
262
340
|
ctx.evaluate(path, handler)
|
263
341
|
end
|
264
342
|
|
343
|
+
def css_rules_to_xpath(rules, ns)
|
344
|
+
rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
|
345
|
+
end
|
346
|
+
|
347
|
+
def ensure_nokogiri
|
348
|
+
unless defined?(Nokogiri) && defined?(Nokogiri::CSS)
|
349
|
+
require 'nokogiri'
|
350
|
+
end
|
351
|
+
rescue LoadError
|
352
|
+
fail('nokogiri_css and nokogiri_at_css require Nokogiri to be installed')
|
353
|
+
end
|
354
|
+
|
355
|
+
def xpath_query_from_css_rule(rule, ns)
|
356
|
+
ensure_nokogiri
|
357
|
+
if defined? Nokogiri::CSS::XPathVisitor::BuiltinsConfig
|
358
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
359
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
360
|
+
doctype: :html4,
|
361
|
+
)
|
362
|
+
else
|
363
|
+
visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
|
364
|
+
end
|
365
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
366
|
+
Nokogiri::CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
367
|
+
visitor: visitor, })
|
368
|
+
end.join(" | ")
|
369
|
+
end
|
370
|
+
|
265
371
|
def extract_params(params)
|
266
372
|
handler = params.find do |param|
|
267
373
|
![Hash, String, Symbol].include?(param.class)
|
@@ -280,5 +386,7 @@ module Nokolexbor
|
|
280
386
|
|
281
387
|
[params, handler, ns, binds]
|
282
388
|
end
|
389
|
+
|
390
|
+
IMPLIED_XPATH_CONTEXTS = [".//"].freeze
|
283
391
|
end
|
284
|
-
end
|
392
|
+
end
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
|
-
class NodeSet < Node
|
4
|
+
class NodeSet < Nokolexbor::Node
|
5
5
|
include Enumerable
|
6
6
|
|
7
7
|
def self.new(document, list = [])
|
@@ -54,16 +54,17 @@ module Nokolexbor
|
|
54
54
|
alias_method :inner_text, :content
|
55
55
|
alias_method :to_str, :content
|
56
56
|
|
57
|
-
def inner_html
|
58
|
-
self.map(
|
57
|
+
def inner_html(*args)
|
58
|
+
self.map { |n| n.inner_html(*args) }.join
|
59
59
|
end
|
60
60
|
|
61
|
-
def outer_html
|
62
|
-
self.map(
|
61
|
+
def outer_html(*args)
|
62
|
+
self.map { |n| n.outer_html(*args) }.join
|
63
63
|
end
|
64
64
|
|
65
65
|
alias_method :to_s, :outer_html
|
66
66
|
alias_method :to_html, :outer_html
|
67
|
+
alias_method :serialize, :outer_html
|
67
68
|
|
68
69
|
def remove
|
69
70
|
self.each(&:remove)
|
@@ -125,6 +126,23 @@ module Nokolexbor
|
|
125
126
|
end
|
126
127
|
end
|
127
128
|
end
|
129
|
+
|
130
|
+
def nokogiri_css(*args)
|
131
|
+
rules, handler, ns, _ = extract_params(args)
|
132
|
+
paths = css_rules_to_xpath(rules, ns)
|
133
|
+
|
134
|
+
NodeSet.new(@document) do |set|
|
135
|
+
each do |node|
|
136
|
+
node.send(:xpath_internal, node, paths, handler, ns, nil).each do |inner_node|
|
137
|
+
set << inner_node
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
|
128
146
|
|
129
147
|
end
|
130
148
|
end
|
data/lib/nokolexbor/version.rb
CHANGED
data/lib/nokolexbor.rb
CHANGED
@@ -1,6 +1,26 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
begin
|
4
|
+
# pre-compiled extension by rake-compiler is located inside lib/nokolexbor/<ruby_version>/
|
5
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
6
|
+
require "nokolexbor/#{Regexp.last_match(1)}/nokolexbor"
|
7
|
+
rescue LoadError => e
|
8
|
+
if /GLIBC/.match?(e.message)
|
9
|
+
warn(<<~EOM)
|
10
|
+
ERROR: It looks like you're trying to use Nokolexbor as a precompiled native gem on a system
|
11
|
+
with an unsupported version of glibc.
|
12
|
+
#{e.message}
|
13
|
+
If that's the case, then please install Nokolexbor via the `ruby` platform gem:
|
14
|
+
gem install nokolexbor --platform=ruby
|
15
|
+
or:
|
16
|
+
bundle config set force_ruby_platform true
|
17
|
+
EOM
|
18
|
+
raise e
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'nokolexbor/nokolexbor'
|
22
|
+
end
|
23
|
+
|
4
24
|
require 'nokolexbor/version'
|
5
25
|
require 'nokolexbor/node'
|
6
26
|
require 'nokolexbor/document'
|
@@ -13,7 +13,7 @@ index 2b19f18..5e56181 100644
|
|
13
13
|
lxb_css_selector_pseudo_element_id_t;
|
14
14
|
|
15
15
|
diff --git i/source/lexbor/css/selectors/pseudo_res.h w/source/lexbor/css/selectors/pseudo_res.h
|
16
|
-
index 398d1bb..
|
16
|
+
index 398d1bb..ddd5f3e 100644
|
17
17
|
--- i/source/lexbor/css/selectors/pseudo_res.h
|
18
18
|
+++ w/source/lexbor/css/selectors/pseudo_res.h
|
19
19
|
@@ -124,7 +124,8 @@ static const lxb_css_selectors_pseudo_data_t lxb_css_selectors_pseudo_data_pseud
|
@@ -31,12 +31,12 @@ index 398d1bb..b35bfab 100644
|
|
31
31
|
{"after", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_AFTER], 5, 0},
|
32
32
|
{"first-letter", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LETTER], 12, 0},
|
33
33
|
- {NULL, NULL, 0, 0},
|
34
|
-
+ {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
|
34
|
+
+ {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
|
35
35
|
{NULL, NULL, 0, 0},
|
36
36
|
{"grammar-error", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_GRAMMAR_ERROR], 13, 0},
|
37
37
|
{"before", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BEFORE], 6, 0},
|
38
38
|
diff --git i/source/lexbor/selectors/selectors.c w/source/lexbor/selectors/selectors.c
|
39
|
-
index
|
39
|
+
index fef05d3..1ceaa19 100644
|
40
40
|
--- i/source/lexbor/selectors/selectors.c
|
41
41
|
+++ w/source/lexbor/selectors/selectors.c
|
42
42
|
@@ -156,6 +156,12 @@ lxb_selectors_descendant(lxb_selectors_t *selectors, lxb_selectors_entry_t *entr
|
@@ -91,16 +91,7 @@ index c2d18ea..84b5b14 100644
|
|
91
91
|
|
92
92
|
node = node->next;
|
93
93
|
}
|
94
|
-
@@ -
|
95
|
-
{
|
96
|
-
node = node->next;
|
97
|
-
|
98
|
-
+ if (node == NULL) return NULL;
|
99
|
-
+
|
100
|
-
do {
|
101
|
-
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
102
|
-
if (lxb_selectors_match(selectors, entry, selector, node)) {
|
103
|
-
@@ -290,6 +316,12 @@ lxb_selectors_following(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry
|
94
|
+
@@ -289,6 +313,12 @@ lxb_selectors_following(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry
|
104
95
|
{
|
105
96
|
return node;
|
106
97
|
}
|
@@ -113,7 +104,7 @@ index c2d18ea..84b5b14 100644
|
|
113
104
|
|
114
105
|
node = node->next;
|
115
106
|
}
|
116
|
-
@@ -
|
107
|
+
@@ -1428,6 +1458,8 @@ lxb_selectors_pseudo_element(lxb_selectors_t *selectors,
|
117
108
|
lxb_css_selector_pseudo_t *pseudo = &selector->u.pseudo;
|
118
109
|
|
119
110
|
switch (pseudo->type) {
|
@@ -2,7 +2,7 @@ diff --git i/source/lexbor/selectors/selectors.c w/source/lexbor/selectors/selec
|
|
2
2
|
index c2d18ea..84b5b14 100644
|
3
3
|
--- i/source/lexbor/selectors/selectors.c
|
4
4
|
+++ w/source/lexbor/selectors/selectors.c
|
5
|
-
@@ -
|
5
|
+
@@ -750,7 +780,7 @@ lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
|
6
6
|
src = &selector->name;
|
7
7
|
|
8
8
|
if (trg->length == src->length
|
@@ -11,7 +11,7 @@ index c2d18ea..84b5b14 100644
|
|
11
11
|
{
|
12
12
|
return true;
|
13
13
|
}
|
14
|
-
@@ -
|
14
|
+
@@ -765,7 +795,7 @@ lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
|
15
15
|
}
|
16
16
|
|
17
17
|
return lxb_selectors_match_class(element->attr_class->value,
|
@@ -13,6 +13,10 @@ extern "C" {
|
|
13
13
|
|
14
14
|
#include "lexbor/core/def.h"
|
15
15
|
|
16
|
+
typedef void *(*lexbor_memory_malloc_f)(size_t size);
|
17
|
+
typedef void *(*lexbor_memory_realloc_f)(void *dst, size_t size);
|
18
|
+
typedef void *(*lexbor_memory_calloc_f)(size_t num, size_t size);
|
19
|
+
typedef void (*lexbor_memory_free_f)(void *dst);
|
16
20
|
|
17
21
|
LXB_API void *
|
18
22
|
lexbor_malloc(size_t size);
|
@@ -26,6 +30,10 @@ lexbor_calloc(size_t num, size_t size);
|
|
26
30
|
LXB_API void *
|
27
31
|
lexbor_free(void *dst);
|
28
32
|
|
33
|
+
LXB_API lxb_status_t
|
34
|
+
lexbor_memory_setup(lexbor_memory_malloc_f new_malloc, lexbor_memory_realloc_f new_realloc,
|
35
|
+
lexbor_memory_calloc_f new_calloc, lexbor_memory_free_f new_free);
|
36
|
+
|
29
37
|
|
30
38
|
#ifdef __cplusplus
|
31
39
|
} /* extern "C" */
|
@@ -6,28 +6,48 @@
|
|
6
6
|
|
7
7
|
#include "lexbor/core/base.h"
|
8
8
|
|
9
|
+
static lexbor_memory_malloc_f lexbor_memory_malloc = malloc;
|
10
|
+
static lexbor_memory_realloc_f lexbor_memory_realloc = realloc;
|
11
|
+
static lexbor_memory_calloc_f lexbor_memory_calloc = calloc;
|
12
|
+
static lexbor_memory_free_f lexbor_memory_free = free;
|
9
13
|
|
10
14
|
void *
|
11
15
|
lexbor_malloc(size_t size)
|
12
16
|
{
|
13
|
-
return
|
17
|
+
return lexbor_memory_malloc(size);
|
14
18
|
}
|
15
19
|
|
16
20
|
void *
|
17
21
|
lexbor_realloc(void *dst, size_t size)
|
18
22
|
{
|
19
|
-
return
|
23
|
+
return lexbor_memory_realloc(dst, size);
|
20
24
|
}
|
21
25
|
|
22
26
|
void *
|
23
27
|
lexbor_calloc(size_t num, size_t size)
|
24
28
|
{
|
25
|
-
return
|
29
|
+
return lexbor_memory_calloc(num, size);
|
26
30
|
}
|
27
31
|
|
28
32
|
void *
|
29
33
|
lexbor_free(void *dst)
|
30
34
|
{
|
31
|
-
|
35
|
+
lexbor_memory_free(dst);
|
32
36
|
return NULL;
|
33
37
|
}
|
38
|
+
|
39
|
+
lxb_status_t
|
40
|
+
lexbor_memory_setup(lexbor_memory_malloc_f new_malloc, lexbor_memory_realloc_f new_realloc,
|
41
|
+
lexbor_memory_calloc_f new_calloc, lexbor_memory_free_f new_free)
|
42
|
+
{
|
43
|
+
if (new_malloc == NULL || new_realloc == NULL || new_calloc == NULL || new_free == NULL) {
|
44
|
+
return LXB_STATUS_ERROR_OBJECT_IS_NULL;
|
45
|
+
}
|
46
|
+
|
47
|
+
lexbor_memory_malloc = new_malloc;
|
48
|
+
lexbor_memory_realloc = new_realloc;
|
49
|
+
lexbor_memory_calloc = new_calloc;
|
50
|
+
lexbor_memory_free = new_free;
|
51
|
+
|
52
|
+
return LXB_STATUS_OK;
|
53
|
+
}
|
@@ -6,28 +6,48 @@
|
|
6
6
|
|
7
7
|
#include "lexbor/core/base.h"
|
8
8
|
|
9
|
+
static lexbor_memory_malloc_f lexbor_memory_malloc = malloc;
|
10
|
+
static lexbor_memory_realloc_f lexbor_memory_realloc = realloc;
|
11
|
+
static lexbor_memory_calloc_f lexbor_memory_calloc = calloc;
|
12
|
+
static lexbor_memory_free_f lexbor_memory_free = free;
|
9
13
|
|
10
14
|
void *
|
11
15
|
lexbor_malloc(size_t size)
|
12
16
|
{
|
13
|
-
return
|
17
|
+
return lexbor_memory_malloc(size);
|
14
18
|
}
|
15
19
|
|
16
20
|
void *
|
17
21
|
lexbor_realloc(void *dst, size_t size)
|
18
22
|
{
|
19
|
-
return
|
23
|
+
return lexbor_memory_realloc(dst, size);
|
20
24
|
}
|
21
25
|
|
22
26
|
void *
|
23
27
|
lexbor_calloc(size_t num, size_t size)
|
24
28
|
{
|
25
|
-
return
|
29
|
+
return lexbor_memory_calloc(num, size);
|
26
30
|
}
|
27
31
|
|
28
32
|
void *
|
29
33
|
lexbor_free(void *dst)
|
30
34
|
{
|
31
|
-
|
35
|
+
lexbor_memory_free(dst);
|
32
36
|
return NULL;
|
33
37
|
}
|
38
|
+
|
39
|
+
lxb_status_t
|
40
|
+
lexbor_memory_setup(lexbor_memory_malloc_f new_malloc, lexbor_memory_realloc_f new_realloc,
|
41
|
+
lexbor_memory_calloc_f new_calloc, lexbor_memory_free_f new_free)
|
42
|
+
{
|
43
|
+
if (new_malloc == NULL || new_realloc == NULL || new_calloc == NULL || new_free == NULL) {
|
44
|
+
return LXB_STATUS_ERROR_OBJECT_IS_NULL;
|
45
|
+
}
|
46
|
+
|
47
|
+
lexbor_memory_malloc = new_malloc;
|
48
|
+
lexbor_memory_realloc = new_realloc;
|
49
|
+
lexbor_memory_calloc = new_calloc;
|
50
|
+
lexbor_memory_free = new_free;
|
51
|
+
|
52
|
+
return LXB_STATUS_OK;
|
53
|
+
}
|
@@ -262,7 +262,7 @@ lxb_selectors_sibling(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
|
|
262
262
|
{
|
263
263
|
node = node->next;
|
264
264
|
|
265
|
-
|
265
|
+
while (node != NULL) {
|
266
266
|
if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
|
267
267
|
if (lxb_selectors_match(selectors, entry, selector, node)) {
|
268
268
|
return node;
|
@@ -273,7 +273,6 @@ lxb_selectors_sibling(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
|
|
273
273
|
|
274
274
|
node = node->next;
|
275
275
|
}
|
276
|
-
while (node != NULL);
|
277
276
|
|
278
277
|
return NULL;
|
279
278
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '5.0'
|
41
|
-
description: Nokolexbor is a high
|
41
|
+
description: Nokolexbor is a high-performance HTML5 parser, with support for both
|
42
42
|
CSS selectors and XPath. It's API is designed to be compatible with Nokogiri.
|
43
43
|
email: zyc9012@gmail.com
|
44
44
|
executables: []
|
@@ -78,10 +78,12 @@ files:
|
|
78
78
|
- ext/nokolexbor/libxml/xpath.h
|
79
79
|
- ext/nokolexbor/libxml/xpathInternals.h
|
80
80
|
- ext/nokolexbor/libxml/xpointer.h
|
81
|
-
- ext/nokolexbor/
|
81
|
+
- ext/nokolexbor/nl_cdata.c
|
82
|
+
- ext/nokolexbor/nl_comment.c
|
82
83
|
- ext/nokolexbor/nl_document.c
|
83
84
|
- ext/nokolexbor/nl_node.c
|
84
85
|
- ext/nokolexbor/nl_node_set.c
|
86
|
+
- ext/nokolexbor/nl_text.c
|
85
87
|
- ext/nokolexbor/nl_xpath_context.c
|
86
88
|
- ext/nokolexbor/nokolexbor.c
|
87
89
|
- ext/nokolexbor/nokolexbor.h
|
@@ -555,5 +557,5 @@ requirements: []
|
|
555
557
|
rubygems_version: 3.0.3.1
|
556
558
|
signing_key:
|
557
559
|
specification_version: 4
|
558
|
-
summary: High
|
560
|
+
summary: High-performance HTML5 parser, with support for both CSS selectors and XPath.
|
559
561
|
test_files: []
|
data/ext/nokolexbor/memory.c
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* This file overrides the default `lexbor_malloc` and other
|
3
|
-
* related functions to reduce memory bloat and allow Ruby to
|
4
|
-
* GC more often.
|
5
|
-
* By calling `ruby_xmalloc` instead of `malloc`, Ruby can
|
6
|
-
* count the memory usage of the C extension and run GC
|
7
|
-
* whenever `malloc_increase_byte` exceeds the limit. Similar
|
8
|
-
* to Nokogiri's solution by calling `xmlMemSetup`.
|
9
|
-
* The downside will be the downgrade of performance because
|
10
|
-
* of more frequent GC.
|
11
|
-
*/
|
12
|
-
|
13
|
-
#include <ruby.h>
|
14
|
-
#include "lexbor/core/base.h"
|
15
|
-
|
16
|
-
// Disable using ruby memory functions when ASAN is enabled,
|
17
|
-
// otherwise memory leak info will be all about ruby which
|
18
|
-
// is useless.
|
19
|
-
#ifndef NOKOLEXBOR_ASAN
|
20
|
-
|
21
|
-
void *
|
22
|
-
lexbor_malloc(size_t size)
|
23
|
-
{
|
24
|
-
return ruby_xmalloc(size);
|
25
|
-
}
|
26
|
-
|
27
|
-
void *
|
28
|
-
lexbor_realloc(void *dst, size_t size)
|
29
|
-
{
|
30
|
-
return ruby_xrealloc(dst, size);
|
31
|
-
}
|
32
|
-
|
33
|
-
void *
|
34
|
-
lexbor_calloc(size_t num, size_t size)
|
35
|
-
{
|
36
|
-
return ruby_xcalloc(num, size);
|
37
|
-
}
|
38
|
-
|
39
|
-
void *
|
40
|
-
lexbor_free(void *dst)
|
41
|
-
{
|
42
|
-
ruby_xfree(dst);
|
43
|
-
return NULL;
|
44
|
-
}
|
45
|
-
|
46
|
-
#endif
|