nokolexbor 0.3.4 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/nl_attribute.c +46 -0
  3. data/ext/nokolexbor/nl_cdata.c +8 -0
  4. data/ext/nokolexbor/nl_comment.c +6 -0
  5. data/ext/nokolexbor/nl_document.c +53 -7
  6. data/ext/nokolexbor/nl_document_fragment.c +9 -0
  7. data/ext/nokolexbor/nl_error.c +21 -19
  8. data/ext/nokolexbor/nl_node.c +255 -49
  9. data/ext/nokolexbor/nl_node_set.c +56 -1
  10. data/ext/nokolexbor/nl_processing_instruction.c +6 -0
  11. data/ext/nokolexbor/nl_text.c +6 -0
  12. data/ext/nokolexbor/nokolexbor.h +1 -0
  13. data/lib/nokolexbor/document.rb +52 -5
  14. data/lib/nokolexbor/document_fragment.rb +11 -0
  15. data/lib/nokolexbor/node.rb +367 -18
  16. data/lib/nokolexbor/node_set.rb +56 -0
  17. data/lib/nokolexbor/version.rb +1 -1
  18. metadata +2 -24
  19. data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
  20. data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
  21. data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
  22. data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
  23. data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
  24. data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
  25. data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
  26. data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
  27. data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
  28. data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
  29. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
  30. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
  31. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
  32. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
  33. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
  34. data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
  35. data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
  36. data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
  37. data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
  38. data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
  39. data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
  40. data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -4,6 +4,11 @@ module Nokolexbor
4
4
  class NodeSet < Nokolexbor::Node
5
5
  include Enumerable
6
6
 
7
+ # Create a NodeSet with +document+ defaulting to +list+.
8
+ #
9
+ # @yield [Document]
10
+ #
11
+ # @return [Document]
7
12
  def self.new(document, list = [])
8
13
  obj = allocate
9
14
  obj.instance_variable_set(:@document, document)
@@ -12,6 +17,9 @@ module Nokolexbor
12
17
  obj
13
18
  end
14
19
 
20
+ # Iterate over each node.
21
+ #
22
+ # @yield [Node]
15
23
  def each
16
24
  return to_enum unless block_given?
17
25
 
@@ -21,6 +29,11 @@ module Nokolexbor
21
29
  self
22
30
  end
23
31
 
32
+ # Get the first +n+ elements of the NodeSet.
33
+ #
34
+ # @param n [Numeric,nil]
35
+ #
36
+ # @return [Node,Array<Node>] {Node} if +n+ is nil, otherwise {Array<Node>}
24
37
  def first(n = nil)
25
38
  return self[0] unless n
26
39
 
@@ -29,14 +42,19 @@ module Nokolexbor
29
42
  list
30
43
  end
31
44
 
45
+ # Get the last element of the NodeSet.
46
+ #
47
+ # @return [Node,nil]
32
48
  def last
33
49
  self[-1]
34
50
  end
35
51
 
52
+ # @return [Boolean] true if this NodeSet is empty.
36
53
  def empty?
37
54
  length == 0
38
55
  end
39
56
 
57
+ # @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
40
58
  def index(node = nil)
41
59
  if node
42
60
  each_with_index { |member, j| return j if member == node }
@@ -46,6 +64,9 @@ module Nokolexbor
46
64
  nil
47
65
  end
48
66
 
67
+ # Get the content of all contained Nodes.
68
+ #
69
+ # @return [String]
49
70
  def content
50
71
  self.map(&:content).join
51
72
  end
@@ -54,10 +75,16 @@ module Nokolexbor
54
75
  alias_method :inner_text, :content
55
76
  alias_method :to_str, :content
56
77
 
78
+ # Get the inner html of all contained Nodes.
79
+ #
80
+ # @return [String]
57
81
  def inner_html(*args)
58
82
  self.map { |n| n.inner_html(*args) }.join
59
83
  end
60
84
 
85
+ # Convert this NodeSet to HTML.
86
+ #
87
+ # @return [String]
61
88
  def outer_html(*args)
62
89
  self.map { |n| n.outer_html(*args) }.join
63
90
  end
@@ -66,6 +93,9 @@ module Nokolexbor
66
93
  alias_method :to_html, :outer_html
67
94
  alias_method :serialize, :outer_html
68
95
 
96
+ # Remove all nodes in this NodeSet.
97
+ #
98
+ # @see Node#remove
69
99
  def remove
70
100
  self.each(&:remove)
71
101
  end
@@ -73,22 +103,32 @@ module Nokolexbor
73
103
  alias_method :unlink, :remove
74
104
  alias_method :to_ary, :to_a
75
105
 
106
+ # Destroy all nodes in the NodeSet.
107
+ #
108
+ # @see Node#destroy
76
109
  def destroy
77
110
  self.each(&:destroy)
78
111
  end
79
112
 
113
+ # @return [Node,nil] The last element of this NodeSet and removes it. Returns
114
+ # +nil+ if the set is empty.
80
115
  def pop
81
116
  return nil if length == 0
82
117
 
83
118
  delete(last)
84
119
  end
85
120
 
121
+ # @return [Node,nil] The first element of this NodeSet and removes it. Returns
122
+ # +nil+ if the set is empty.
86
123
  def shift
87
124
  return nil if length == 0
88
125
 
89
126
  delete(first)
90
127
  end
91
128
 
129
+ # @return [Boolean] true if two NodeSets contain the same number
130
+ # of elements and each element is equal to the corresponding
131
+ # element in the other NodeSet.
92
132
  def ==(other)
93
133
  return false unless other.is_a?(NodeSet)
94
134
  return false unless length == other.length
@@ -99,6 +139,8 @@ module Nokolexbor
99
139
  true
100
140
  end
101
141
 
142
+ # @return [NodeSet] A new NodeSet containing all the children of all the nodes in
143
+ # the NodeSet.
102
144
  def children
103
145
  node_set = NodeSet.new(@document)
104
146
  each do |node|
@@ -107,6 +149,8 @@ module Nokolexbor
107
149
  node_set
108
150
  end
109
151
 
152
+ # @return [NodeSet] A new NodeSet containing all the nodes in the NodeSet
153
+ # in reverse order.
110
154
  def reverse
111
155
  node_set = NodeSet.new(@document)
112
156
  (length - 1).downto(0) do |x|
@@ -115,6 +159,17 @@ module Nokolexbor
115
159
  node_set
116
160
  end
117
161
 
162
+ # Wrap all nodes of this NodeSet with +node_or_tags+.
163
+ #
164
+ # @see Node#wrap
165
+ #
166
+ # @return [NodeSet] +self+, to support chaining.
167
+ def wrap(node_or_tags)
168
+ map { |node| node.wrap(node_or_tags) }
169
+ self
170
+ end
171
+
172
+ # (see Node#xpath)
118
173
  def xpath(*args)
119
174
  paths, handler, ns, binds = extract_params(args)
120
175
 
@@ -127,6 +182,7 @@ module Nokolexbor
127
182
  end
128
183
  end
129
184
 
185
+ # (see Node#nokogiri_css)
130
186
  def nokogiri_css(*args)
131
187
  rules, handler, ns, _ = extract_params(args)
132
188
  paths = css_rules_to_xpath(rules, ns)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.3.4'
4
+ VERSION = '0.3.5'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-10 00:00:00.000000000 Z
11
+ date: 2023-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -259,28 +259,6 @@ files:
259
259
  - vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h
260
260
  - vendor/lexbor/source/lexbor/dom/interfaces/text.c
261
261
  - vendor/lexbor/source/lexbor/dom/interfaces/text.h
262
- - vendor/lexbor/source/lexbor/encoding/base.h
263
- - vendor/lexbor/source/lexbor/encoding/big5.c
264
- - vendor/lexbor/source/lexbor/encoding/config.cmake
265
- - vendor/lexbor/source/lexbor/encoding/const.h
266
- - vendor/lexbor/source/lexbor/encoding/decode.c
267
- - vendor/lexbor/source/lexbor/encoding/decode.h
268
- - vendor/lexbor/source/lexbor/encoding/encode.c
269
- - vendor/lexbor/source/lexbor/encoding/encode.h
270
- - vendor/lexbor/source/lexbor/encoding/encoding.c
271
- - vendor/lexbor/source/lexbor/encoding/encoding.h
272
- - vendor/lexbor/source/lexbor/encoding/euc_kr.c
273
- - vendor/lexbor/source/lexbor/encoding/gb18030.c
274
- - vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c
275
- - vendor/lexbor/source/lexbor/encoding/jis0208.c
276
- - vendor/lexbor/source/lexbor/encoding/jis0212.c
277
- - vendor/lexbor/source/lexbor/encoding/multi.h
278
- - vendor/lexbor/source/lexbor/encoding/range.c
279
- - vendor/lexbor/source/lexbor/encoding/range.h
280
- - vendor/lexbor/source/lexbor/encoding/res.c
281
- - vendor/lexbor/source/lexbor/encoding/res.h
282
- - vendor/lexbor/source/lexbor/encoding/single.c
283
- - vendor/lexbor/source/lexbor/encoding/single.h
284
262
  - vendor/lexbor/source/lexbor/html/base.h
285
263
  - vendor/lexbor/source/lexbor/html/config.cmake
286
264
  - vendor/lexbor/source/lexbor/html/encoding.c
@@ -1,218 +0,0 @@
1
- /*
2
- * Copyright (C) 2019 Alexander Borisov
3
- *
4
- * Author: Alexander Borisov <borisov@lexbor.com>
5
- */
6
-
7
- #ifndef LEXBOR_ENCODING_BASE_H
8
- #define LEXBOR_ENCODING_BASE_H
9
-
10
- #ifdef __cplusplus
11
- extern "C" {
12
- #endif
13
-
14
- #include "lexbor/core/base.h"
15
- #include "lexbor/encoding/const.h"
16
-
17
-
18
- #define LXB_ENCODING_VERSION_MAJOR 2
19
- #define LXB_ENCODING_VERSION_MINOR 0
20
- #define LXB_ENCODING_VERSION_PATCH 1
21
-
22
- #define LXB_ENCODING_VERSION_STRING \
23
- LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MAJOR) "." \
24
- LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MINOR) "." \
25
- LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_PATCH)
26
-
27
-
28
- #define LXB_ENCODING_REPLACEMENT_BYTES ((lxb_char_t *) "\xEF\xBF\xBD")
29
-
30
- #define LXB_ENCODING_REPLACEMENT_BUFFER_LEN 1
31
- #define LXB_ENCODING_REPLACEMENT_BUFFER \
32
- (&(const lxb_codepoint_t) {LXB_ENCODING_REPLACEMENT_CODEPOINT})
33
-
34
-
35
- /*
36
- * In UTF-8 0x10FFFF value is maximum (inclusive)
37
- */
38
- enum {
39
- LXB_ENCODING_REPLACEMENT_SIZE = 0x03,
40
- LXB_ENCODING_REPLACEMENT_CODEPOINT = 0xFFFD,
41
- LXB_ENCODING_MAX_CODEPOINT = 0x10FFFF,
42
- LXB_ENCODING_ERROR_CODEPOINT = 0x1FFFFF
43
- };
44
-
45
- enum {
46
- LXB_ENCODING_ENCODE_OK = 0x00,
47
- LXB_ENCODING_ENCODE_ERROR = -0x01,
48
- LXB_ENCODING_ENCODE_SMALL_BUFFER = -0x02
49
- };
50
-
51
- enum {
52
- LXB_ENCODING_DECODE_MAX_CODEPOINT = LXB_ENCODING_MAX_CODEPOINT,
53
- LXB_ENCODING_DECODE_ERROR = LXB_ENCODING_ERROR_CODEPOINT,
54
- LXB_ENCODING_DECODE_CONTINUE = 0x2FFFFF
55
- };
56
-
57
- enum {
58
- LXB_ENCODING_DECODE_2022_JP_ASCII = 0x00,
59
- LXB_ENCODING_DECODE_2022_JP_ROMAN,
60
- LXB_ENCODING_DECODE_2022_JP_KATAKANA,
61
- LXB_ENCODING_DECODE_2022_JP_LEAD,
62
- LXB_ENCODING_DECODE_2022_JP_TRAIL,
63
- LXB_ENCODING_DECODE_2022_JP_ESCAPE_START,
64
- LXB_ENCODING_DECODE_2022_JP_ESCAPE,
65
- LXB_ENCODING_DECODE_2022_JP_UNSET
66
- };
67
-
68
- enum {
69
- LXB_ENCODING_ENCODE_2022_JP_ASCII = 0x00,
70
- LXB_ENCODING_ENCODE_2022_JP_ROMAN,
71
- LXB_ENCODING_ENCODE_2022_JP_JIS0208
72
- };
73
-
74
- typedef struct {
75
- unsigned need;
76
- lxb_char_t lower;
77
- lxb_char_t upper;
78
- }
79
- lxb_encoding_ctx_utf_8_t;
80
-
81
- typedef struct {
82
- lxb_char_t first;
83
- lxb_char_t second;
84
- lxb_char_t third;
85
- }
86
- lxb_encoding_ctx_gb18030_t;
87
-
88
- typedef struct {
89
- lxb_char_t lead;
90
- bool is_jis0212;
91
- }
92
- lxb_encoding_ctx_euc_jp_t;
93
-
94
- typedef struct {
95
- lxb_char_t lead;
96
- lxb_char_t prepand;
97
- unsigned state;
98
- unsigned out_state;
99
- bool out_flag;
100
- }
101
- lxb_encoding_ctx_2022_jp_t;
102
-
103
- typedef struct lxb_encoding_data lxb_encoding_data_t;
104
-
105
- typedef struct {
106
- const lxb_encoding_data_t *encoding_data;
107
-
108
- /* Out buffer */
109
- lxb_codepoint_t *buffer_out;
110
- size_t buffer_length;
111
- size_t buffer_used;
112
-
113
- /*
114
- * Bad code points will be replaced to user code point.
115
- * If replace_to == 0 stop parsing and return error ot user.
116
- */
117
- const lxb_codepoint_t *replace_to;
118
- size_t replace_len;
119
-
120
- /* Not for users */
121
- lxb_codepoint_t codepoint;
122
- lxb_codepoint_t second_codepoint;
123
- bool prepend;
124
- bool have_error;
125
-
126
- lxb_status_t status;
127
-
128
- union {
129
- lxb_encoding_ctx_utf_8_t utf_8;
130
- lxb_encoding_ctx_gb18030_t gb18030;
131
- unsigned lead;
132
- lxb_encoding_ctx_euc_jp_t euc_jp;
133
- lxb_encoding_ctx_2022_jp_t iso_2022_jp;
134
- } u;
135
- }
136
- lxb_encoding_decode_t;
137
-
138
- typedef struct {
139
- const lxb_encoding_data_t *encoding_data;
140
-
141
- /* Out buffer */
142
- lxb_char_t *buffer_out;
143
- size_t buffer_length;
144
- size_t buffer_used;
145
-
146
- /*
147
- * Bad code points will be replaced to user bytes.
148
- * If replace_to == NULL stop parsing and return error ot user.
149
- */
150
- const lxb_char_t *replace_to;
151
- size_t replace_len;
152
-
153
- unsigned state;
154
- }
155
- lxb_encoding_encode_t;
156
-
157
- /*
158
- * Why can't I pass a char ** to a function which expects a const char **?
159
- * http://c-faq.com/ansi/constmismatch.html
160
- *
161
- * Short answer: use cast (const char **).
162
- *
163
- * For example:
164
- * lxb_encoding_ctx_t ctx = {0};
165
- * const lxb_encoding_data_t *enc;
166
- *
167
- * lxb_char_t *data = (lxb_char_t *) "\x81\x30\x84\x36";
168
- *
169
- * enc = lxb_encoding_data(LXB_ENCODING_GB18030);
170
- *
171
- * enc->decode(&ctx, (const lxb_char_t **) &data, data + 4);
172
- */
173
- typedef lxb_status_t
174
- (*lxb_encoding_encode_f)(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cp,
175
- const lxb_codepoint_t *end);
176
-
177
- typedef lxb_status_t
178
- (*lxb_encoding_decode_f)(lxb_encoding_decode_t *ctx,
179
- const lxb_char_t **data, const lxb_char_t *end);
180
-
181
- typedef int8_t
182
- (*lxb_encoding_encode_single_f)(lxb_encoding_encode_t *ctx, lxb_char_t **data,
183
- const lxb_char_t *end, lxb_codepoint_t cp);
184
-
185
- typedef lxb_codepoint_t
186
- (*lxb_encoding_decode_single_f)(lxb_encoding_decode_t *ctx,
187
- const lxb_char_t **data, const lxb_char_t *end);
188
-
189
- struct lxb_encoding_data {
190
- lxb_encoding_t encoding;
191
- lxb_encoding_encode_f encode;
192
- lxb_encoding_decode_f decode;
193
- lxb_encoding_encode_single_f encode_single;
194
- lxb_encoding_decode_single_f decode_single;
195
- lxb_char_t *name;
196
- };
197
-
198
- typedef struct {
199
- lxb_char_t *name;
200
- unsigned size;
201
- lxb_codepoint_t codepoint;
202
- }
203
- lxb_encoding_single_index_t;
204
-
205
- typedef lxb_encoding_single_index_t lxb_encoding_multi_index_t;
206
-
207
- typedef struct {
208
- unsigned index;
209
- lxb_codepoint_t codepoint;
210
- }
211
- lxb_encoding_range_index_t;
212
-
213
-
214
- #ifdef __cplusplus
215
- } /* extern "C" */
216
- #endif
217
-
218
- #endif /* LEXBOR_ENCODING_BASE_H */