nokolexbor 0.3.4 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +9 -5
- data/ext/nokolexbor/nl_attribute.c +46 -0
- data/ext/nokolexbor/nl_cdata.c +8 -0
- data/ext/nokolexbor/nl_comment.c +6 -0
- data/ext/nokolexbor/nl_document.c +53 -7
- data/ext/nokolexbor/nl_document_fragment.c +9 -0
- data/ext/nokolexbor/nl_error.c +21 -19
- data/ext/nokolexbor/nl_node.c +255 -50
- data/ext/nokolexbor/nl_node_set.c +56 -1
- data/ext/nokolexbor/nl_processing_instruction.c +6 -0
- data/ext/nokolexbor/nl_text.c +6 -0
- data/ext/nokolexbor/nokolexbor.h +1 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +367 -18
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- metadata +2 -24
- data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
- data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
- data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
- data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
- data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
- data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
- data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
- data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
- data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
- data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
- data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
- data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
- data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
- data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
- data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
- data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
- data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
- data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
- data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
- data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
data/lib/nokolexbor/node_set.rb
CHANGED
@@ -4,6 +4,11 @@ module Nokolexbor
|
|
4
4
|
class NodeSet < Nokolexbor::Node
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
+
# Create a NodeSet with +document+ defaulting to +list+.
|
8
|
+
#
|
9
|
+
# @yield [Document]
|
10
|
+
#
|
11
|
+
# @return [Document]
|
7
12
|
def self.new(document, list = [])
|
8
13
|
obj = allocate
|
9
14
|
obj.instance_variable_set(:@document, document)
|
@@ -12,6 +17,9 @@ module Nokolexbor
|
|
12
17
|
obj
|
13
18
|
end
|
14
19
|
|
20
|
+
# Iterate over each node.
|
21
|
+
#
|
22
|
+
# @yield [Node]
|
15
23
|
def each
|
16
24
|
return to_enum unless block_given?
|
17
25
|
|
@@ -21,6 +29,11 @@ module Nokolexbor
|
|
21
29
|
self
|
22
30
|
end
|
23
31
|
|
32
|
+
# Get the first +n+ elements of the NodeSet.
|
33
|
+
#
|
34
|
+
# @param n [Numeric,nil]
|
35
|
+
#
|
36
|
+
# @return [Node,Array<Node>] {Node} if +n+ is nil, otherwise {Array<Node>}
|
24
37
|
def first(n = nil)
|
25
38
|
return self[0] unless n
|
26
39
|
|
@@ -29,14 +42,19 @@ module Nokolexbor
|
|
29
42
|
list
|
30
43
|
end
|
31
44
|
|
45
|
+
# Get the last element of the NodeSet.
|
46
|
+
#
|
47
|
+
# @return [Node,nil]
|
32
48
|
def last
|
33
49
|
self[-1]
|
34
50
|
end
|
35
51
|
|
52
|
+
# @return [Boolean] true if this NodeSet is empty.
|
36
53
|
def empty?
|
37
54
|
length == 0
|
38
55
|
end
|
39
56
|
|
57
|
+
# @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
|
40
58
|
def index(node = nil)
|
41
59
|
if node
|
42
60
|
each_with_index { |member, j| return j if member == node }
|
@@ -46,6 +64,9 @@ module Nokolexbor
|
|
46
64
|
nil
|
47
65
|
end
|
48
66
|
|
67
|
+
# Get the content of all contained Nodes.
|
68
|
+
#
|
69
|
+
# @return [String]
|
49
70
|
def content
|
50
71
|
self.map(&:content).join
|
51
72
|
end
|
@@ -54,10 +75,16 @@ module Nokolexbor
|
|
54
75
|
alias_method :inner_text, :content
|
55
76
|
alias_method :to_str, :content
|
56
77
|
|
78
|
+
# Get the inner html of all contained Nodes.
|
79
|
+
#
|
80
|
+
# @return [String]
|
57
81
|
def inner_html(*args)
|
58
82
|
self.map { |n| n.inner_html(*args) }.join
|
59
83
|
end
|
60
84
|
|
85
|
+
# Convert this NodeSet to HTML.
|
86
|
+
#
|
87
|
+
# @return [String]
|
61
88
|
def outer_html(*args)
|
62
89
|
self.map { |n| n.outer_html(*args) }.join
|
63
90
|
end
|
@@ -66,6 +93,9 @@ module Nokolexbor
|
|
66
93
|
alias_method :to_html, :outer_html
|
67
94
|
alias_method :serialize, :outer_html
|
68
95
|
|
96
|
+
# Remove all nodes in this NodeSet.
|
97
|
+
#
|
98
|
+
# @see Node#remove
|
69
99
|
def remove
|
70
100
|
self.each(&:remove)
|
71
101
|
end
|
@@ -73,22 +103,32 @@ module Nokolexbor
|
|
73
103
|
alias_method :unlink, :remove
|
74
104
|
alias_method :to_ary, :to_a
|
75
105
|
|
106
|
+
# Destroy all nodes in the NodeSet.
|
107
|
+
#
|
108
|
+
# @see Node#destroy
|
76
109
|
def destroy
|
77
110
|
self.each(&:destroy)
|
78
111
|
end
|
79
112
|
|
113
|
+
# @return [Node,nil] The last element of this NodeSet and removes it. Returns
|
114
|
+
# +nil+ if the set is empty.
|
80
115
|
def pop
|
81
116
|
return nil if length == 0
|
82
117
|
|
83
118
|
delete(last)
|
84
119
|
end
|
85
120
|
|
121
|
+
# @return [Node,nil] The first element of this NodeSet and removes it. Returns
|
122
|
+
# +nil+ if the set is empty.
|
86
123
|
def shift
|
87
124
|
return nil if length == 0
|
88
125
|
|
89
126
|
delete(first)
|
90
127
|
end
|
91
128
|
|
129
|
+
# @return [Boolean] true if two NodeSets contain the same number
|
130
|
+
# of elements and each element is equal to the corresponding
|
131
|
+
# element in the other NodeSet.
|
92
132
|
def ==(other)
|
93
133
|
return false unless other.is_a?(NodeSet)
|
94
134
|
return false unless length == other.length
|
@@ -99,6 +139,8 @@ module Nokolexbor
|
|
99
139
|
true
|
100
140
|
end
|
101
141
|
|
142
|
+
# @return [NodeSet] A new NodeSet containing all the children of all the nodes in
|
143
|
+
# the NodeSet.
|
102
144
|
def children
|
103
145
|
node_set = NodeSet.new(@document)
|
104
146
|
each do |node|
|
@@ -107,6 +149,8 @@ module Nokolexbor
|
|
107
149
|
node_set
|
108
150
|
end
|
109
151
|
|
152
|
+
# @return [NodeSet] A new NodeSet containing all the nodes in the NodeSet
|
153
|
+
# in reverse order.
|
110
154
|
def reverse
|
111
155
|
node_set = NodeSet.new(@document)
|
112
156
|
(length - 1).downto(0) do |x|
|
@@ -115,6 +159,17 @@ module Nokolexbor
|
|
115
159
|
node_set
|
116
160
|
end
|
117
161
|
|
162
|
+
# Wrap all nodes of this NodeSet with +node_or_tags+.
|
163
|
+
#
|
164
|
+
# @see Node#wrap
|
165
|
+
#
|
166
|
+
# @return [NodeSet] +self+, to support chaining.
|
167
|
+
def wrap(node_or_tags)
|
168
|
+
map { |node| node.wrap(node_or_tags) }
|
169
|
+
self
|
170
|
+
end
|
171
|
+
|
172
|
+
# (see Node#xpath)
|
118
173
|
def xpath(*args)
|
119
174
|
paths, handler, ns, binds = extract_params(args)
|
120
175
|
|
@@ -127,6 +182,7 @@ module Nokolexbor
|
|
127
182
|
end
|
128
183
|
end
|
129
184
|
|
185
|
+
# (see Node#nokogiri_css)
|
130
186
|
def nokogiri_css(*args)
|
131
187
|
rules, handler, ns, _ = extract_params(args)
|
132
188
|
paths = css_rules_to_xpath(rules, ns)
|
data/lib/nokolexbor/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -259,28 +259,6 @@ files:
|
|
259
259
|
- vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h
|
260
260
|
- vendor/lexbor/source/lexbor/dom/interfaces/text.c
|
261
261
|
- vendor/lexbor/source/lexbor/dom/interfaces/text.h
|
262
|
-
- vendor/lexbor/source/lexbor/encoding/base.h
|
263
|
-
- vendor/lexbor/source/lexbor/encoding/big5.c
|
264
|
-
- vendor/lexbor/source/lexbor/encoding/config.cmake
|
265
|
-
- vendor/lexbor/source/lexbor/encoding/const.h
|
266
|
-
- vendor/lexbor/source/lexbor/encoding/decode.c
|
267
|
-
- vendor/lexbor/source/lexbor/encoding/decode.h
|
268
|
-
- vendor/lexbor/source/lexbor/encoding/encode.c
|
269
|
-
- vendor/lexbor/source/lexbor/encoding/encode.h
|
270
|
-
- vendor/lexbor/source/lexbor/encoding/encoding.c
|
271
|
-
- vendor/lexbor/source/lexbor/encoding/encoding.h
|
272
|
-
- vendor/lexbor/source/lexbor/encoding/euc_kr.c
|
273
|
-
- vendor/lexbor/source/lexbor/encoding/gb18030.c
|
274
|
-
- vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c
|
275
|
-
- vendor/lexbor/source/lexbor/encoding/jis0208.c
|
276
|
-
- vendor/lexbor/source/lexbor/encoding/jis0212.c
|
277
|
-
- vendor/lexbor/source/lexbor/encoding/multi.h
|
278
|
-
- vendor/lexbor/source/lexbor/encoding/range.c
|
279
|
-
- vendor/lexbor/source/lexbor/encoding/range.h
|
280
|
-
- vendor/lexbor/source/lexbor/encoding/res.c
|
281
|
-
- vendor/lexbor/source/lexbor/encoding/res.h
|
282
|
-
- vendor/lexbor/source/lexbor/encoding/single.c
|
283
|
-
- vendor/lexbor/source/lexbor/encoding/single.h
|
284
262
|
- vendor/lexbor/source/lexbor/html/base.h
|
285
263
|
- vendor/lexbor/source/lexbor/html/config.cmake
|
286
264
|
- vendor/lexbor/source/lexbor/html/encoding.c
|
@@ -1,218 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Copyright (C) 2019 Alexander Borisov
|
3
|
-
*
|
4
|
-
* Author: Alexander Borisov <borisov@lexbor.com>
|
5
|
-
*/
|
6
|
-
|
7
|
-
#ifndef LEXBOR_ENCODING_BASE_H
|
8
|
-
#define LEXBOR_ENCODING_BASE_H
|
9
|
-
|
10
|
-
#ifdef __cplusplus
|
11
|
-
extern "C" {
|
12
|
-
#endif
|
13
|
-
|
14
|
-
#include "lexbor/core/base.h"
|
15
|
-
#include "lexbor/encoding/const.h"
|
16
|
-
|
17
|
-
|
18
|
-
#define LXB_ENCODING_VERSION_MAJOR 2
|
19
|
-
#define LXB_ENCODING_VERSION_MINOR 0
|
20
|
-
#define LXB_ENCODING_VERSION_PATCH 1
|
21
|
-
|
22
|
-
#define LXB_ENCODING_VERSION_STRING \
|
23
|
-
LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MAJOR) "." \
|
24
|
-
LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MINOR) "." \
|
25
|
-
LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_PATCH)
|
26
|
-
|
27
|
-
|
28
|
-
#define LXB_ENCODING_REPLACEMENT_BYTES ((lxb_char_t *) "\xEF\xBF\xBD")
|
29
|
-
|
30
|
-
#define LXB_ENCODING_REPLACEMENT_BUFFER_LEN 1
|
31
|
-
#define LXB_ENCODING_REPLACEMENT_BUFFER \
|
32
|
-
(&(const lxb_codepoint_t) {LXB_ENCODING_REPLACEMENT_CODEPOINT})
|
33
|
-
|
34
|
-
|
35
|
-
/*
|
36
|
-
* In UTF-8 0x10FFFF value is maximum (inclusive)
|
37
|
-
*/
|
38
|
-
enum {
|
39
|
-
LXB_ENCODING_REPLACEMENT_SIZE = 0x03,
|
40
|
-
LXB_ENCODING_REPLACEMENT_CODEPOINT = 0xFFFD,
|
41
|
-
LXB_ENCODING_MAX_CODEPOINT = 0x10FFFF,
|
42
|
-
LXB_ENCODING_ERROR_CODEPOINT = 0x1FFFFF
|
43
|
-
};
|
44
|
-
|
45
|
-
enum {
|
46
|
-
LXB_ENCODING_ENCODE_OK = 0x00,
|
47
|
-
LXB_ENCODING_ENCODE_ERROR = -0x01,
|
48
|
-
LXB_ENCODING_ENCODE_SMALL_BUFFER = -0x02
|
49
|
-
};
|
50
|
-
|
51
|
-
enum {
|
52
|
-
LXB_ENCODING_DECODE_MAX_CODEPOINT = LXB_ENCODING_MAX_CODEPOINT,
|
53
|
-
LXB_ENCODING_DECODE_ERROR = LXB_ENCODING_ERROR_CODEPOINT,
|
54
|
-
LXB_ENCODING_DECODE_CONTINUE = 0x2FFFFF
|
55
|
-
};
|
56
|
-
|
57
|
-
enum {
|
58
|
-
LXB_ENCODING_DECODE_2022_JP_ASCII = 0x00,
|
59
|
-
LXB_ENCODING_DECODE_2022_JP_ROMAN,
|
60
|
-
LXB_ENCODING_DECODE_2022_JP_KATAKANA,
|
61
|
-
LXB_ENCODING_DECODE_2022_JP_LEAD,
|
62
|
-
LXB_ENCODING_DECODE_2022_JP_TRAIL,
|
63
|
-
LXB_ENCODING_DECODE_2022_JP_ESCAPE_START,
|
64
|
-
LXB_ENCODING_DECODE_2022_JP_ESCAPE,
|
65
|
-
LXB_ENCODING_DECODE_2022_JP_UNSET
|
66
|
-
};
|
67
|
-
|
68
|
-
enum {
|
69
|
-
LXB_ENCODING_ENCODE_2022_JP_ASCII = 0x00,
|
70
|
-
LXB_ENCODING_ENCODE_2022_JP_ROMAN,
|
71
|
-
LXB_ENCODING_ENCODE_2022_JP_JIS0208
|
72
|
-
};
|
73
|
-
|
74
|
-
typedef struct {
|
75
|
-
unsigned need;
|
76
|
-
lxb_char_t lower;
|
77
|
-
lxb_char_t upper;
|
78
|
-
}
|
79
|
-
lxb_encoding_ctx_utf_8_t;
|
80
|
-
|
81
|
-
typedef struct {
|
82
|
-
lxb_char_t first;
|
83
|
-
lxb_char_t second;
|
84
|
-
lxb_char_t third;
|
85
|
-
}
|
86
|
-
lxb_encoding_ctx_gb18030_t;
|
87
|
-
|
88
|
-
typedef struct {
|
89
|
-
lxb_char_t lead;
|
90
|
-
bool is_jis0212;
|
91
|
-
}
|
92
|
-
lxb_encoding_ctx_euc_jp_t;
|
93
|
-
|
94
|
-
typedef struct {
|
95
|
-
lxb_char_t lead;
|
96
|
-
lxb_char_t prepand;
|
97
|
-
unsigned state;
|
98
|
-
unsigned out_state;
|
99
|
-
bool out_flag;
|
100
|
-
}
|
101
|
-
lxb_encoding_ctx_2022_jp_t;
|
102
|
-
|
103
|
-
typedef struct lxb_encoding_data lxb_encoding_data_t;
|
104
|
-
|
105
|
-
typedef struct {
|
106
|
-
const lxb_encoding_data_t *encoding_data;
|
107
|
-
|
108
|
-
/* Out buffer */
|
109
|
-
lxb_codepoint_t *buffer_out;
|
110
|
-
size_t buffer_length;
|
111
|
-
size_t buffer_used;
|
112
|
-
|
113
|
-
/*
|
114
|
-
* Bad code points will be replaced to user code point.
|
115
|
-
* If replace_to == 0 stop parsing and return error ot user.
|
116
|
-
*/
|
117
|
-
const lxb_codepoint_t *replace_to;
|
118
|
-
size_t replace_len;
|
119
|
-
|
120
|
-
/* Not for users */
|
121
|
-
lxb_codepoint_t codepoint;
|
122
|
-
lxb_codepoint_t second_codepoint;
|
123
|
-
bool prepend;
|
124
|
-
bool have_error;
|
125
|
-
|
126
|
-
lxb_status_t status;
|
127
|
-
|
128
|
-
union {
|
129
|
-
lxb_encoding_ctx_utf_8_t utf_8;
|
130
|
-
lxb_encoding_ctx_gb18030_t gb18030;
|
131
|
-
unsigned lead;
|
132
|
-
lxb_encoding_ctx_euc_jp_t euc_jp;
|
133
|
-
lxb_encoding_ctx_2022_jp_t iso_2022_jp;
|
134
|
-
} u;
|
135
|
-
}
|
136
|
-
lxb_encoding_decode_t;
|
137
|
-
|
138
|
-
typedef struct {
|
139
|
-
const lxb_encoding_data_t *encoding_data;
|
140
|
-
|
141
|
-
/* Out buffer */
|
142
|
-
lxb_char_t *buffer_out;
|
143
|
-
size_t buffer_length;
|
144
|
-
size_t buffer_used;
|
145
|
-
|
146
|
-
/*
|
147
|
-
* Bad code points will be replaced to user bytes.
|
148
|
-
* If replace_to == NULL stop parsing and return error ot user.
|
149
|
-
*/
|
150
|
-
const lxb_char_t *replace_to;
|
151
|
-
size_t replace_len;
|
152
|
-
|
153
|
-
unsigned state;
|
154
|
-
}
|
155
|
-
lxb_encoding_encode_t;
|
156
|
-
|
157
|
-
/*
|
158
|
-
* Why can't I pass a char ** to a function which expects a const char **?
|
159
|
-
* http://c-faq.com/ansi/constmismatch.html
|
160
|
-
*
|
161
|
-
* Short answer: use cast (const char **).
|
162
|
-
*
|
163
|
-
* For example:
|
164
|
-
* lxb_encoding_ctx_t ctx = {0};
|
165
|
-
* const lxb_encoding_data_t *enc;
|
166
|
-
*
|
167
|
-
* lxb_char_t *data = (lxb_char_t *) "\x81\x30\x84\x36";
|
168
|
-
*
|
169
|
-
* enc = lxb_encoding_data(LXB_ENCODING_GB18030);
|
170
|
-
*
|
171
|
-
* enc->decode(&ctx, (const lxb_char_t **) &data, data + 4);
|
172
|
-
*/
|
173
|
-
typedef lxb_status_t
|
174
|
-
(*lxb_encoding_encode_f)(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cp,
|
175
|
-
const lxb_codepoint_t *end);
|
176
|
-
|
177
|
-
typedef lxb_status_t
|
178
|
-
(*lxb_encoding_decode_f)(lxb_encoding_decode_t *ctx,
|
179
|
-
const lxb_char_t **data, const lxb_char_t *end);
|
180
|
-
|
181
|
-
typedef int8_t
|
182
|
-
(*lxb_encoding_encode_single_f)(lxb_encoding_encode_t *ctx, lxb_char_t **data,
|
183
|
-
const lxb_char_t *end, lxb_codepoint_t cp);
|
184
|
-
|
185
|
-
typedef lxb_codepoint_t
|
186
|
-
(*lxb_encoding_decode_single_f)(lxb_encoding_decode_t *ctx,
|
187
|
-
const lxb_char_t **data, const lxb_char_t *end);
|
188
|
-
|
189
|
-
struct lxb_encoding_data {
|
190
|
-
lxb_encoding_t encoding;
|
191
|
-
lxb_encoding_encode_f encode;
|
192
|
-
lxb_encoding_decode_f decode;
|
193
|
-
lxb_encoding_encode_single_f encode_single;
|
194
|
-
lxb_encoding_decode_single_f decode_single;
|
195
|
-
lxb_char_t *name;
|
196
|
-
};
|
197
|
-
|
198
|
-
typedef struct {
|
199
|
-
lxb_char_t *name;
|
200
|
-
unsigned size;
|
201
|
-
lxb_codepoint_t codepoint;
|
202
|
-
}
|
203
|
-
lxb_encoding_single_index_t;
|
204
|
-
|
205
|
-
typedef lxb_encoding_single_index_t lxb_encoding_multi_index_t;
|
206
|
-
|
207
|
-
typedef struct {
|
208
|
-
unsigned index;
|
209
|
-
lxb_codepoint_t codepoint;
|
210
|
-
}
|
211
|
-
lxb_encoding_range_index_t;
|
212
|
-
|
213
|
-
|
214
|
-
#ifdef __cplusplus
|
215
|
-
} /* extern "C" */
|
216
|
-
#endif
|
217
|
-
|
218
|
-
#endif /* LEXBOR_ENCODING_BASE_H */
|