nokolexbor 0.3.4 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/nl_attribute.c +46 -0
  3. data/ext/nokolexbor/nl_cdata.c +8 -0
  4. data/ext/nokolexbor/nl_comment.c +6 -0
  5. data/ext/nokolexbor/nl_document.c +53 -7
  6. data/ext/nokolexbor/nl_document_fragment.c +9 -0
  7. data/ext/nokolexbor/nl_error.c +21 -19
  8. data/ext/nokolexbor/nl_node.c +255 -49
  9. data/ext/nokolexbor/nl_node_set.c +56 -1
  10. data/ext/nokolexbor/nl_processing_instruction.c +6 -0
  11. data/ext/nokolexbor/nl_text.c +6 -0
  12. data/ext/nokolexbor/nokolexbor.h +1 -0
  13. data/lib/nokolexbor/document.rb +52 -5
  14. data/lib/nokolexbor/document_fragment.rb +11 -0
  15. data/lib/nokolexbor/node.rb +367 -18
  16. data/lib/nokolexbor/node_set.rb +56 -0
  17. data/lib/nokolexbor/version.rb +1 -1
  18. metadata +2 -24
  19. data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
  20. data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
  21. data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
  22. data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
  23. data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
  24. data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
  25. data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
  26. data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
  27. data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
  28. data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
  29. data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
  30. data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
  31. data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
  32. data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
  33. data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
  34. data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
  35. data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
  36. data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
  37. data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
  38. data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
  39. data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
  40. data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
@@ -4,6 +4,11 @@ module Nokolexbor
4
4
  class NodeSet < Nokolexbor::Node
5
5
  include Enumerable
6
6
 
7
+ # Create a NodeSet with +document+ defaulting to +list+.
8
+ #
9
+ # @yield [Document]
10
+ #
11
+ # @return [Document]
7
12
  def self.new(document, list = [])
8
13
  obj = allocate
9
14
  obj.instance_variable_set(:@document, document)
@@ -12,6 +17,9 @@ module Nokolexbor
12
17
  obj
13
18
  end
14
19
 
20
+ # Iterate over each node.
21
+ #
22
+ # @yield [Node]
15
23
  def each
16
24
  return to_enum unless block_given?
17
25
 
@@ -21,6 +29,11 @@ module Nokolexbor
21
29
  self
22
30
  end
23
31
 
32
+ # Get the first +n+ elements of the NodeSet.
33
+ #
34
+ # @param n [Numeric,nil]
35
+ #
36
+ # @return [Node,Array<Node>] {Node} if +n+ is nil, otherwise {Array<Node>}
24
37
  def first(n = nil)
25
38
  return self[0] unless n
26
39
 
@@ -29,14 +42,19 @@ module Nokolexbor
29
42
  list
30
43
  end
31
44
 
45
+ # Get the last element of the NodeSet.
46
+ #
47
+ # @return [Node,nil]
32
48
  def last
33
49
  self[-1]
34
50
  end
35
51
 
52
+ # @return [Boolean] true if this NodeSet is empty.
36
53
  def empty?
37
54
  length == 0
38
55
  end
39
56
 
57
+ # @return [Integer] The index of the first node in this NodeSet that is equal to +node+ or meets the given block. Returns nil if no match is found.
40
58
  def index(node = nil)
41
59
  if node
42
60
  each_with_index { |member, j| return j if member == node }
@@ -46,6 +64,9 @@ module Nokolexbor
46
64
  nil
47
65
  end
48
66
 
67
+ # Get the content of all contained Nodes.
68
+ #
69
+ # @return [String]
49
70
  def content
50
71
  self.map(&:content).join
51
72
  end
@@ -54,10 +75,16 @@ module Nokolexbor
54
75
  alias_method :inner_text, :content
55
76
  alias_method :to_str, :content
56
77
 
78
+ # Get the inner html of all contained Nodes.
79
+ #
80
+ # @return [String]
57
81
  def inner_html(*args)
58
82
  self.map { |n| n.inner_html(*args) }.join
59
83
  end
60
84
 
85
+ # Convert this NodeSet to HTML.
86
+ #
87
+ # @return [String]
61
88
  def outer_html(*args)
62
89
  self.map { |n| n.outer_html(*args) }.join
63
90
  end
@@ -66,6 +93,9 @@ module Nokolexbor
66
93
  alias_method :to_html, :outer_html
67
94
  alias_method :serialize, :outer_html
68
95
 
96
+ # Remove all nodes in this NodeSet.
97
+ #
98
+ # @see Node#remove
69
99
  def remove
70
100
  self.each(&:remove)
71
101
  end
@@ -73,22 +103,32 @@ module Nokolexbor
73
103
  alias_method :unlink, :remove
74
104
  alias_method :to_ary, :to_a
75
105
 
106
+ # Destroy all nodes in the NodeSet.
107
+ #
108
+ # @see Node#destroy
76
109
  def destroy
77
110
  self.each(&:destroy)
78
111
  end
79
112
 
113
+ # @return [Node,nil] The last element of this NodeSet and removes it. Returns
114
+ # +nil+ if the set is empty.
80
115
  def pop
81
116
  return nil if length == 0
82
117
 
83
118
  delete(last)
84
119
  end
85
120
 
121
+ # @return [Node,nil] The first element of this NodeSet and removes it. Returns
122
+ # +nil+ if the set is empty.
86
123
  def shift
87
124
  return nil if length == 0
88
125
 
89
126
  delete(first)
90
127
  end
91
128
 
129
+ # @return [Boolean] true if two NodeSets contain the same number
130
+ # of elements and each element is equal to the corresponding
131
+ # element in the other NodeSet.
92
132
  def ==(other)
93
133
  return false unless other.is_a?(NodeSet)
94
134
  return false unless length == other.length
@@ -99,6 +139,8 @@ module Nokolexbor
99
139
  true
100
140
  end
101
141
 
142
+ # @return [NodeSet] A new NodeSet containing all the children of all the nodes in
143
+ # the NodeSet.
102
144
  def children
103
145
  node_set = NodeSet.new(@document)
104
146
  each do |node|
@@ -107,6 +149,8 @@ module Nokolexbor
107
149
  node_set
108
150
  end
109
151
 
152
+ # @return [NodeSet] A new NodeSet containing all the nodes in the NodeSet
153
+ # in reverse order.
110
154
  def reverse
111
155
  node_set = NodeSet.new(@document)
112
156
  (length - 1).downto(0) do |x|
@@ -115,6 +159,17 @@ module Nokolexbor
115
159
  node_set
116
160
  end
117
161
 
162
+ # Wrap all nodes of this NodeSet with +node_or_tags+.
163
+ #
164
+ # @see Node#wrap
165
+ #
166
+ # @return [NodeSet] +self+, to support chaining.
167
+ def wrap(node_or_tags)
168
+ map { |node| node.wrap(node_or_tags) }
169
+ self
170
+ end
171
+
172
+ # (see Node#xpath)
118
173
  def xpath(*args)
119
174
  paths, handler, ns, binds = extract_params(args)
120
175
 
@@ -127,6 +182,7 @@ module Nokolexbor
127
182
  end
128
183
  end
129
184
 
185
+ # (see Node#nokogiri_css)
130
186
  def nokogiri_css(*args)
131
187
  rules, handler, ns, _ = extract_params(args)
132
188
  paths = css_rules_to_xpath(rules, ns)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.3.4'
4
+ VERSION = '0.3.5'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-10 00:00:00.000000000 Z
11
+ date: 2023-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -259,28 +259,6 @@ files:
259
259
  - vendor/lexbor/source/lexbor/dom/interfaces/shadow_root.h
260
260
  - vendor/lexbor/source/lexbor/dom/interfaces/text.c
261
261
  - vendor/lexbor/source/lexbor/dom/interfaces/text.h
262
- - vendor/lexbor/source/lexbor/encoding/base.h
263
- - vendor/lexbor/source/lexbor/encoding/big5.c
264
- - vendor/lexbor/source/lexbor/encoding/config.cmake
265
- - vendor/lexbor/source/lexbor/encoding/const.h
266
- - vendor/lexbor/source/lexbor/encoding/decode.c
267
- - vendor/lexbor/source/lexbor/encoding/decode.h
268
- - vendor/lexbor/source/lexbor/encoding/encode.c
269
- - vendor/lexbor/source/lexbor/encoding/encode.h
270
- - vendor/lexbor/source/lexbor/encoding/encoding.c
271
- - vendor/lexbor/source/lexbor/encoding/encoding.h
272
- - vendor/lexbor/source/lexbor/encoding/euc_kr.c
273
- - vendor/lexbor/source/lexbor/encoding/gb18030.c
274
- - vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c
275
- - vendor/lexbor/source/lexbor/encoding/jis0208.c
276
- - vendor/lexbor/source/lexbor/encoding/jis0212.c
277
- - vendor/lexbor/source/lexbor/encoding/multi.h
278
- - vendor/lexbor/source/lexbor/encoding/range.c
279
- - vendor/lexbor/source/lexbor/encoding/range.h
280
- - vendor/lexbor/source/lexbor/encoding/res.c
281
- - vendor/lexbor/source/lexbor/encoding/res.h
282
- - vendor/lexbor/source/lexbor/encoding/single.c
283
- - vendor/lexbor/source/lexbor/encoding/single.h
284
262
  - vendor/lexbor/source/lexbor/html/base.h
285
263
  - vendor/lexbor/source/lexbor/html/config.cmake
286
264
  - vendor/lexbor/source/lexbor/html/encoding.c
@@ -1,218 +0,0 @@
1
- /*
2
- * Copyright (C) 2019 Alexander Borisov
3
- *
4
- * Author: Alexander Borisov <borisov@lexbor.com>
5
- */
6
-
7
- #ifndef LEXBOR_ENCODING_BASE_H
8
- #define LEXBOR_ENCODING_BASE_H
9
-
10
- #ifdef __cplusplus
11
- extern "C" {
12
- #endif
13
-
14
- #include "lexbor/core/base.h"
15
- #include "lexbor/encoding/const.h"
16
-
17
-
18
- #define LXB_ENCODING_VERSION_MAJOR 2
19
- #define LXB_ENCODING_VERSION_MINOR 0
20
- #define LXB_ENCODING_VERSION_PATCH 1
21
-
22
- #define LXB_ENCODING_VERSION_STRING \
23
- LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MAJOR) "." \
24
- LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MINOR) "." \
25
- LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_PATCH)
26
-
27
-
28
- #define LXB_ENCODING_REPLACEMENT_BYTES ((lxb_char_t *) "\xEF\xBF\xBD")
29
-
30
- #define LXB_ENCODING_REPLACEMENT_BUFFER_LEN 1
31
- #define LXB_ENCODING_REPLACEMENT_BUFFER \
32
- (&(const lxb_codepoint_t) {LXB_ENCODING_REPLACEMENT_CODEPOINT})
33
-
34
-
35
- /*
36
- * In UTF-8 0x10FFFF value is maximum (inclusive)
37
- */
38
- enum {
39
- LXB_ENCODING_REPLACEMENT_SIZE = 0x03,
40
- LXB_ENCODING_REPLACEMENT_CODEPOINT = 0xFFFD,
41
- LXB_ENCODING_MAX_CODEPOINT = 0x10FFFF,
42
- LXB_ENCODING_ERROR_CODEPOINT = 0x1FFFFF
43
- };
44
-
45
- enum {
46
- LXB_ENCODING_ENCODE_OK = 0x00,
47
- LXB_ENCODING_ENCODE_ERROR = -0x01,
48
- LXB_ENCODING_ENCODE_SMALL_BUFFER = -0x02
49
- };
50
-
51
- enum {
52
- LXB_ENCODING_DECODE_MAX_CODEPOINT = LXB_ENCODING_MAX_CODEPOINT,
53
- LXB_ENCODING_DECODE_ERROR = LXB_ENCODING_ERROR_CODEPOINT,
54
- LXB_ENCODING_DECODE_CONTINUE = 0x2FFFFF
55
- };
56
-
57
- enum {
58
- LXB_ENCODING_DECODE_2022_JP_ASCII = 0x00,
59
- LXB_ENCODING_DECODE_2022_JP_ROMAN,
60
- LXB_ENCODING_DECODE_2022_JP_KATAKANA,
61
- LXB_ENCODING_DECODE_2022_JP_LEAD,
62
- LXB_ENCODING_DECODE_2022_JP_TRAIL,
63
- LXB_ENCODING_DECODE_2022_JP_ESCAPE_START,
64
- LXB_ENCODING_DECODE_2022_JP_ESCAPE,
65
- LXB_ENCODING_DECODE_2022_JP_UNSET
66
- };
67
-
68
- enum {
69
- LXB_ENCODING_ENCODE_2022_JP_ASCII = 0x00,
70
- LXB_ENCODING_ENCODE_2022_JP_ROMAN,
71
- LXB_ENCODING_ENCODE_2022_JP_JIS0208
72
- };
73
-
74
- typedef struct {
75
- unsigned need;
76
- lxb_char_t lower;
77
- lxb_char_t upper;
78
- }
79
- lxb_encoding_ctx_utf_8_t;
80
-
81
- typedef struct {
82
- lxb_char_t first;
83
- lxb_char_t second;
84
- lxb_char_t third;
85
- }
86
- lxb_encoding_ctx_gb18030_t;
87
-
88
- typedef struct {
89
- lxb_char_t lead;
90
- bool is_jis0212;
91
- }
92
- lxb_encoding_ctx_euc_jp_t;
93
-
94
- typedef struct {
95
- lxb_char_t lead;
96
- lxb_char_t prepand;
97
- unsigned state;
98
- unsigned out_state;
99
- bool out_flag;
100
- }
101
- lxb_encoding_ctx_2022_jp_t;
102
-
103
- typedef struct lxb_encoding_data lxb_encoding_data_t;
104
-
105
- typedef struct {
106
- const lxb_encoding_data_t *encoding_data;
107
-
108
- /* Out buffer */
109
- lxb_codepoint_t *buffer_out;
110
- size_t buffer_length;
111
- size_t buffer_used;
112
-
113
- /*
114
- * Bad code points will be replaced to user code point.
115
- * If replace_to == 0 stop parsing and return error ot user.
116
- */
117
- const lxb_codepoint_t *replace_to;
118
- size_t replace_len;
119
-
120
- /* Not for users */
121
- lxb_codepoint_t codepoint;
122
- lxb_codepoint_t second_codepoint;
123
- bool prepend;
124
- bool have_error;
125
-
126
- lxb_status_t status;
127
-
128
- union {
129
- lxb_encoding_ctx_utf_8_t utf_8;
130
- lxb_encoding_ctx_gb18030_t gb18030;
131
- unsigned lead;
132
- lxb_encoding_ctx_euc_jp_t euc_jp;
133
- lxb_encoding_ctx_2022_jp_t iso_2022_jp;
134
- } u;
135
- }
136
- lxb_encoding_decode_t;
137
-
138
- typedef struct {
139
- const lxb_encoding_data_t *encoding_data;
140
-
141
- /* Out buffer */
142
- lxb_char_t *buffer_out;
143
- size_t buffer_length;
144
- size_t buffer_used;
145
-
146
- /*
147
- * Bad code points will be replaced to user bytes.
148
- * If replace_to == NULL stop parsing and return error ot user.
149
- */
150
- const lxb_char_t *replace_to;
151
- size_t replace_len;
152
-
153
- unsigned state;
154
- }
155
- lxb_encoding_encode_t;
156
-
157
- /*
158
- * Why can't I pass a char ** to a function which expects a const char **?
159
- * http://c-faq.com/ansi/constmismatch.html
160
- *
161
- * Short answer: use cast (const char **).
162
- *
163
- * For example:
164
- * lxb_encoding_ctx_t ctx = {0};
165
- * const lxb_encoding_data_t *enc;
166
- *
167
- * lxb_char_t *data = (lxb_char_t *) "\x81\x30\x84\x36";
168
- *
169
- * enc = lxb_encoding_data(LXB_ENCODING_GB18030);
170
- *
171
- * enc->decode(&ctx, (const lxb_char_t **) &data, data + 4);
172
- */
173
- typedef lxb_status_t
174
- (*lxb_encoding_encode_f)(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cp,
175
- const lxb_codepoint_t *end);
176
-
177
- typedef lxb_status_t
178
- (*lxb_encoding_decode_f)(lxb_encoding_decode_t *ctx,
179
- const lxb_char_t **data, const lxb_char_t *end);
180
-
181
- typedef int8_t
182
- (*lxb_encoding_encode_single_f)(lxb_encoding_encode_t *ctx, lxb_char_t **data,
183
- const lxb_char_t *end, lxb_codepoint_t cp);
184
-
185
- typedef lxb_codepoint_t
186
- (*lxb_encoding_decode_single_f)(lxb_encoding_decode_t *ctx,
187
- const lxb_char_t **data, const lxb_char_t *end);
188
-
189
- struct lxb_encoding_data {
190
- lxb_encoding_t encoding;
191
- lxb_encoding_encode_f encode;
192
- lxb_encoding_decode_f decode;
193
- lxb_encoding_encode_single_f encode_single;
194
- lxb_encoding_decode_single_f decode_single;
195
- lxb_char_t *name;
196
- };
197
-
198
- typedef struct {
199
- lxb_char_t *name;
200
- unsigned size;
201
- lxb_codepoint_t codepoint;
202
- }
203
- lxb_encoding_single_index_t;
204
-
205
- typedef lxb_encoding_single_index_t lxb_encoding_multi_index_t;
206
-
207
- typedef struct {
208
- unsigned index;
209
- lxb_codepoint_t codepoint;
210
- }
211
- lxb_encoding_range_index_t;
212
-
213
-
214
- #ifdef __cplusplus
215
- } /* extern "C" */
216
- #endif
217
-
218
- #endif /* LEXBOR_ENCODING_BASE_H */