nokolexbor 0.2.5 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/libxml/SAX2.h +4 -4
- data/ext/nokolexbor/libxml/chvalid.h +21 -21
- data/ext/nokolexbor/libxml/dict.h +13 -13
- data/ext/nokolexbor/libxml/globals.h +202 -202
- data/ext/nokolexbor/libxml/hash.h +25 -25
- data/ext/nokolexbor/libxml/parser.h +5 -5
- data/ext/nokolexbor/libxml/parserInternals.h +4 -4
- data/ext/nokolexbor/libxml/pattern.h +14 -14
- data/ext/nokolexbor/libxml/threads.h +15 -15
- data/ext/nokolexbor/libxml/tree.h +5 -5
- data/ext/nokolexbor/libxml/xmlerror.h +5 -5
- data/ext/nokolexbor/libxml/xmlmemory.h +16 -16
- data/ext/nokolexbor/libxml/xmlstring.h +30 -30
- data/ext/nokolexbor/libxml/xpath.h +43 -43
- data/ext/nokolexbor/libxml/xpathInternals.h +128 -128
- data/ext/nokolexbor/memory.c +6 -6
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +191 -178
- data/ext/nokolexbor/nl_node_set.c +38 -73
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +33 -42
- data/ext/nokolexbor/nokolexbor.c +7 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/ext/nokolexbor/private/buf.h +1 -1
- data/ext/nokolexbor/private/error.h +3 -3
- data/ext/nokolexbor/xml_SAX2.c +8 -8
- data/ext/nokolexbor/xml_buf.c +19 -19
- data/ext/nokolexbor/xml_chvalid.c +25 -25
- data/ext/nokolexbor/xml_dict.c +69 -69
- data/ext/nokolexbor/xml_encoding.c +2 -2
- data/ext/nokolexbor/xml_error.c +51 -51
- data/ext/nokolexbor/xml_globals.c +329 -329
- data/ext/nokolexbor/xml_hash.c +131 -131
- data/ext/nokolexbor/xml_memory.c +25 -25
- data/ext/nokolexbor/xml_parser.c +3 -3
- data/ext/nokolexbor/xml_parserInternals.c +15 -15
- data/ext/nokolexbor/xml_pattern.c +103 -103
- data/ext/nokolexbor/xml_string.c +93 -93
- data/ext/nokolexbor/xml_threads.c +61 -61
- data/ext/nokolexbor/xml_tree.c +12 -12
- data/ext/nokolexbor/xml_xpath.c +1194 -1203
- data/lib/nokolexbor/document.rb +92 -1
- data/lib/nokolexbor/node.rb +64 -0
- data/lib/nokolexbor/node_set.rb +6 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
- metadata +7 -4
data/lib/nokolexbor/document.rb
CHANGED
@@ -1,6 +1,97 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
|
-
class Document < Node
|
4
|
+
class Document < Nokolexbor::Node
|
5
|
+
def create_element(name, *contents_or_attrs, &block)
|
6
|
+
elm = Nokolexbor::Element.new(name, self, &block)
|
7
|
+
contents_or_attrs.each do |arg|
|
8
|
+
case arg
|
9
|
+
when Hash
|
10
|
+
arg.each do |k, v|
|
11
|
+
elm[k.to_s] = v.to_s
|
12
|
+
end
|
13
|
+
else
|
14
|
+
elm.content = arg
|
15
|
+
end
|
16
|
+
end
|
17
|
+
elm
|
18
|
+
end
|
19
|
+
|
20
|
+
# Create a Text Node with +string+
|
21
|
+
def create_text_node(string, &block)
|
22
|
+
Nokolexbor::Text.new(string.to_s, self, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Create a CDATA Node containing +string+
|
26
|
+
def create_cdata(string, &block)
|
27
|
+
Nokolexbor::CDATA.new(string.to_s, self, &block)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Create a Comment Node containing +string+
|
31
|
+
def create_comment(string, &block)
|
32
|
+
Nokolexbor::Comment.new(string.to_s, self, &block)
|
33
|
+
end
|
34
|
+
|
35
|
+
# A reference to +self+
|
36
|
+
def document
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def meta_encoding
|
41
|
+
if (meta = at_css("meta[charset]"))
|
42
|
+
meta[:charset]
|
43
|
+
elsif (meta = meta_content_type)
|
44
|
+
meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def meta_encoding=(encoding)
|
49
|
+
if (meta = meta_content_type)
|
50
|
+
meta["content"] = format("text/html; charset=%s", encoding)
|
51
|
+
encoding
|
52
|
+
elsif (meta = at_css("meta[charset]"))
|
53
|
+
meta["charset"] = encoding
|
54
|
+
else
|
55
|
+
meta = Nokolexbor::Node.new("meta", self)
|
56
|
+
meta["charset"] = encoding
|
57
|
+
|
58
|
+
if (head = at_css("head"))
|
59
|
+
head.prepend_child(meta)
|
60
|
+
else
|
61
|
+
set_metadata_element(meta)
|
62
|
+
end
|
63
|
+
encoding
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def meta_content_type
|
68
|
+
xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
|
69
|
+
node["http-equiv"] =~ /\AContent-Type\z/i
|
70
|
+
end
|
71
|
+
end
|
72
|
+
private :meta_content_type
|
73
|
+
|
74
|
+
def set_metadata_element(element)
|
75
|
+
if (head = at_css("head"))
|
76
|
+
head << element
|
77
|
+
elsif (html = at_css("html"))
|
78
|
+
head = html.prepend_child(Nokolexbor::Node.new("head", self))
|
79
|
+
head.prepend_child(element)
|
80
|
+
elsif (first = children.find do |node|
|
81
|
+
case node
|
82
|
+
when Nokolexbor::Node
|
83
|
+
true
|
84
|
+
end
|
85
|
+
end)
|
86
|
+
# We reach here only if the underlying document model
|
87
|
+
# allows <html>/<head> elements to be omitted and does not
|
88
|
+
# automatically supply them.
|
89
|
+
first.add_previous_sibling(element)
|
90
|
+
else
|
91
|
+
html = add_child(Nokolexbor::Node.new("html", self))
|
92
|
+
head = html.add_child(Nokolexbor::Node.new("head", self))
|
93
|
+
head.prepend_child(element)
|
94
|
+
end
|
95
|
+
end
|
5
96
|
end
|
6
97
|
end
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -45,6 +45,10 @@ module Nokolexbor
|
|
45
45
|
type == ELEMENT_NODE
|
46
46
|
end
|
47
47
|
|
48
|
+
def document?
|
49
|
+
is_a?(Nokolexbor::Document)
|
50
|
+
end
|
51
|
+
|
48
52
|
def ancestors(selector = nil)
|
49
53
|
return NodeSet.new(@document) unless respond_to?(:parent)
|
50
54
|
return NodeSet.new(@document) unless parent
|
@@ -87,6 +91,56 @@ module Nokolexbor
|
|
87
91
|
self
|
88
92
|
end
|
89
93
|
|
94
|
+
def add_previous_sibling(node_or_tags)
|
95
|
+
raise ArgumentError,
|
96
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
97
|
+
|
98
|
+
add_sibling(:previous, node_or_tags)
|
99
|
+
end
|
100
|
+
|
101
|
+
def add_next_sibling(node_or_tags)
|
102
|
+
raise ArgumentError,
|
103
|
+
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
104
|
+
|
105
|
+
add_sibling(:next, node_or_tags)
|
106
|
+
end
|
107
|
+
|
108
|
+
def before(node_or_tags)
|
109
|
+
add_previous_sibling(node_or_tags)
|
110
|
+
self
|
111
|
+
end
|
112
|
+
|
113
|
+
def after(node_or_tags)
|
114
|
+
add_next_sibling(node_or_tags)
|
115
|
+
self
|
116
|
+
end
|
117
|
+
|
118
|
+
alias_method :next_sibling, :next
|
119
|
+
alias_method :previous_sibling, :previous
|
120
|
+
alias_method :next=, :add_next_sibling
|
121
|
+
alias_method :previous=, :add_previous_sibling
|
122
|
+
|
123
|
+
def <<(node_or_tags)
|
124
|
+
add_child(node_or_tags)
|
125
|
+
self
|
126
|
+
end
|
127
|
+
|
128
|
+
def prepend_child(node)
|
129
|
+
if (first = children.first)
|
130
|
+
# Mimic the error add_child would raise.
|
131
|
+
raise "Document already has a root node" if document? && !(node.comment? || node.processing_instruction?)
|
132
|
+
|
133
|
+
first.add_sibling(:previous, node)
|
134
|
+
else
|
135
|
+
add_child(node)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def traverse(&block)
|
140
|
+
children.each { |j| j.traverse(&block) }
|
141
|
+
yield(self)
|
142
|
+
end
|
143
|
+
|
90
144
|
def matches?(selector)
|
91
145
|
ancestors.last.css(selector).any? { |node| node == self }
|
92
146
|
end
|
@@ -118,6 +172,10 @@ module Nokolexbor
|
|
118
172
|
end
|
119
173
|
end
|
120
174
|
|
175
|
+
def parent=(parent_node)
|
176
|
+
parent_node.add_child(self)
|
177
|
+
end
|
178
|
+
|
121
179
|
def each
|
122
180
|
attributes.each do |name, node|
|
123
181
|
yield [name, node.value]
|
@@ -233,6 +291,12 @@ module Nokolexbor
|
|
233
291
|
end
|
234
292
|
end
|
235
293
|
|
294
|
+
def write_to(io, *options)
|
295
|
+
io.write(to_html(*options))
|
296
|
+
end
|
297
|
+
|
298
|
+
alias_method :write_html_to, :write_to
|
299
|
+
|
236
300
|
private
|
237
301
|
|
238
302
|
def xpath_internal(node, paths, handler, ns, binds)
|
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Nokolexbor
|
4
|
-
class NodeSet < Node
|
4
|
+
class NodeSet < Nokolexbor::Node
|
5
5
|
include Enumerable
|
6
6
|
|
7
7
|
def self.new(document, list = [])
|
@@ -54,16 +54,17 @@ module Nokolexbor
|
|
54
54
|
alias_method :inner_text, :content
|
55
55
|
alias_method :to_str, :content
|
56
56
|
|
57
|
-
def inner_html
|
58
|
-
self.map(
|
57
|
+
def inner_html(*args)
|
58
|
+
self.map { |n| n.inner_html(*args) }.join
|
59
59
|
end
|
60
60
|
|
61
|
-
def outer_html
|
62
|
-
self.map(
|
61
|
+
def outer_html(*args)
|
62
|
+
self.map { |n| n.outer_html(*args) }.join
|
63
63
|
end
|
64
64
|
|
65
65
|
alias_method :to_s, :outer_html
|
66
66
|
alias_method :to_html, :outer_html
|
67
|
+
alias_method :serialize, :outer_html
|
67
68
|
|
68
69
|
def remove
|
69
70
|
self.each(&:remove)
|
data/lib/nokolexbor/version.rb
CHANGED
data/lib/nokolexbor.rb
CHANGED
@@ -1,6 +1,26 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
begin
|
4
|
+
# pre-compiled extension by rake-compiler is located inside lib/nokolexbor/<ruby_version>/
|
5
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
6
|
+
require "nokolexbor/#{Regexp.last_match(1)}/nokolexbor"
|
7
|
+
rescue LoadError => e
|
8
|
+
if /GLIBC/.match?(e.message)
|
9
|
+
warn(<<~EOM)
|
10
|
+
ERROR: It looks like you're trying to use Nokolexbor as a precompiled native gem on a system
|
11
|
+
with an unsupported version of glibc.
|
12
|
+
#{e.message}
|
13
|
+
If that's the case, then please install Nokolexbor via the `ruby` platform gem:
|
14
|
+
gem install nokolexbor --platform=ruby
|
15
|
+
or:
|
16
|
+
bundle config set force_ruby_platform true
|
17
|
+
EOM
|
18
|
+
raise e
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'nokolexbor/nokolexbor'
|
22
|
+
end
|
23
|
+
|
4
24
|
require 'nokolexbor/version'
|
5
25
|
require 'nokolexbor/node'
|
6
26
|
require 'nokolexbor/document'
|
@@ -31,7 +31,7 @@ index 398d1bb..b35bfab 100644
|
|
31
31
|
{"after", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_AFTER], 5, 0},
|
32
32
|
{"first-letter", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LETTER], 12, 0},
|
33
33
|
- {NULL, NULL, 0, 0},
|
34
|
-
+ {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
|
34
|
+
+ {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
|
35
35
|
{NULL, NULL, 0, 0},
|
36
36
|
{"grammar-error", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_GRAMMAR_ERROR], 13, 0},
|
37
37
|
{"before", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BEFORE], 6, 0},
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '5.0'
|
41
|
-
description: Nokolexbor is a high
|
41
|
+
description: Nokolexbor is a high-performance HTML5 parser, with support for both
|
42
42
|
CSS selectors and XPath. It's API is designed to be compatible with Nokogiri.
|
43
43
|
email: zyc9012@gmail.com
|
44
44
|
executables: []
|
@@ -79,9 +79,12 @@ files:
|
|
79
79
|
- ext/nokolexbor/libxml/xpathInternals.h
|
80
80
|
- ext/nokolexbor/libxml/xpointer.h
|
81
81
|
- ext/nokolexbor/memory.c
|
82
|
+
- ext/nokolexbor/nl_cdata.c
|
83
|
+
- ext/nokolexbor/nl_comment.c
|
82
84
|
- ext/nokolexbor/nl_document.c
|
83
85
|
- ext/nokolexbor/nl_node.c
|
84
86
|
- ext/nokolexbor/nl_node_set.c
|
87
|
+
- ext/nokolexbor/nl_text.c
|
85
88
|
- ext/nokolexbor/nl_xpath_context.c
|
86
89
|
- ext/nokolexbor/nokolexbor.c
|
87
90
|
- ext/nokolexbor/nokolexbor.h
|
@@ -555,5 +558,5 @@ requirements: []
|
|
555
558
|
rubygems_version: 3.0.3.1
|
556
559
|
signing_key:
|
557
560
|
specification_version: 4
|
558
|
-
summary: High
|
561
|
+
summary: High-performance HTML5 parser, with support for both CSS selectors and XPath.
|
559
562
|
test_files: []
|