nokolexbor 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_attribute.c +201 -0
- data/ext/nokolexbor/nl_cdata.c +8 -0
- data/ext/nokolexbor/nl_comment.c +6 -0
- data/ext/nokolexbor/nl_document.c +53 -7
- data/ext/nokolexbor/nl_document_fragment.c +9 -0
- data/ext/nokolexbor/nl_error.c +21 -19
- data/ext/nokolexbor/nl_node.c +317 -48
- data/ext/nokolexbor/nl_node_set.c +56 -1
- data/ext/nokolexbor/nl_processing_instruction.c +6 -0
- data/ext/nokolexbor/nl_text.c +6 -0
- data/ext/nokolexbor/nokolexbor.c +1 -0
- data/ext/nokolexbor/nokolexbor.h +2 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +370 -24
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +0 -1
- metadata +3 -25
- data/lib/nokolexbor/attribute.rb +0 -18
- data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
- data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
- data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
- data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
- data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
- data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
- data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
- data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
- data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
- data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
- data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
- data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
- data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
- data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
- data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
- data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
- data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
- data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
- data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
- data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 23409daec8c9cbe801afef6c39e6507fc938eb35cad2458ef80fc99e6bf5bece
|
4
|
+
data.tar.gz: 06e83318711ac6e6c2582d78fcd87f5a0efeaceff7421a1880cd8e698637d803
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d77bb45c508edba84043156149ff1dfce0249b7f4d52d871c0552b4f590c01e0879c77b0bf039a46ee019be4e445b072a658fbb9fc3872c8b1d6f48e4801b0dd
|
7
|
+
data.tar.gz: 8ec494323f66188fbd39455c05a63e6be0fa5f8979fcb43310dc80a429a3d77355b8875988fa800041a04f1746254b0d88d9522780c085469efe18308a3d4576
|
@@ -0,0 +1,201 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborAttribute;
|
4
|
+
extern VALUE mNokolexbor;
|
5
|
+
extern VALUE cNokolexborNode;
|
6
|
+
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(document, name) -> Attribute
|
10
|
+
*
|
11
|
+
* Create a new Attribute on the +document+ with +name+.
|
12
|
+
*
|
13
|
+
* @param document [Document]
|
14
|
+
* @param name [String]
|
15
|
+
*/
|
16
|
+
static VALUE
|
17
|
+
nl_attribute_new(int argc, VALUE *argv, VALUE klass)
|
18
|
+
{
|
19
|
+
lxb_dom_document_t *document;
|
20
|
+
VALUE rb_document;
|
21
|
+
VALUE rb_name;
|
22
|
+
VALUE rest;
|
23
|
+
|
24
|
+
rb_scan_args(argc, argv, "2*", &rb_document, &rb_name, &rest);
|
25
|
+
|
26
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
27
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
28
|
+
}
|
29
|
+
|
30
|
+
document = nl_rb_document_unwrap(rb_document);
|
31
|
+
|
32
|
+
const char *c_name = StringValuePtr(rb_name);
|
33
|
+
size_t name_len = RSTRING_LEN(rb_name);
|
34
|
+
lxb_dom_attr_t *attr = lxb_dom_attr_interface_create(document);
|
35
|
+
if (attr == NULL) {
|
36
|
+
rb_raise(rb_eRuntimeError, "Error creating attribute");
|
37
|
+
}
|
38
|
+
|
39
|
+
lxb_dom_attr_set_name(attr, (const lxb_char_t *)c_name, name_len, false);
|
40
|
+
|
41
|
+
VALUE rb_node = nl_rb_node_create(&attr->node, rb_document);
|
42
|
+
|
43
|
+
if (rb_block_given_p()) {
|
44
|
+
rb_yield(rb_node);
|
45
|
+
}
|
46
|
+
|
47
|
+
return rb_node;
|
48
|
+
}
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Get the name of the Attribute.
|
52
|
+
*
|
53
|
+
* @return [String]
|
54
|
+
*/
|
55
|
+
static VALUE
|
56
|
+
nl_attribute_name(VALUE self)
|
57
|
+
{
|
58
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
59
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
60
|
+
|
61
|
+
size_t len;
|
62
|
+
lxb_char_t *name = lxb_dom_attr_qualified_name(attr, &len);
|
63
|
+
|
64
|
+
return rb_utf8_str_new(name, len);
|
65
|
+
}
|
66
|
+
|
67
|
+
/**
|
68
|
+
* call-seq:
|
69
|
+
* name=(name) -> String
|
70
|
+
*
|
71
|
+
* Set the name of the Attribute.
|
72
|
+
*/
|
73
|
+
static VALUE
|
74
|
+
nl_attribute_set_name(VALUE self, VALUE rb_name)
|
75
|
+
{
|
76
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
77
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
78
|
+
|
79
|
+
const char *c_name = StringValuePtr(rb_name);
|
80
|
+
size_t name_len = RSTRING_LEN(rb_name);
|
81
|
+
|
82
|
+
lxb_status_t status = lxb_dom_attr_set_name(attr, (const lxb_char_t *)c_name, name_len, false);
|
83
|
+
if (status != LXB_STATUS_OK) {
|
84
|
+
nl_raise_lexbor_error(status);
|
85
|
+
}
|
86
|
+
|
87
|
+
return rb_name;
|
88
|
+
}
|
89
|
+
|
90
|
+
/**
|
91
|
+
* Get the value of the Attribute.
|
92
|
+
*
|
93
|
+
* @return [String]
|
94
|
+
*/
|
95
|
+
static VALUE
|
96
|
+
nl_attribute_value(VALUE self)
|
97
|
+
{
|
98
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
99
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
100
|
+
|
101
|
+
size_t len;
|
102
|
+
lxb_char_t *value = lxb_dom_attr_value(attr, &len);
|
103
|
+
|
104
|
+
return rb_utf8_str_new(value, len);
|
105
|
+
}
|
106
|
+
|
107
|
+
/**
|
108
|
+
* call-seq:
|
109
|
+
* value=(value) -> String
|
110
|
+
*
|
111
|
+
* Set the value of the Attribute.
|
112
|
+
*/
|
113
|
+
static VALUE
|
114
|
+
nl_attribute_set_value(VALUE self, VALUE rb_content)
|
115
|
+
{
|
116
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
117
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
118
|
+
|
119
|
+
const char *c_content = StringValuePtr(rb_content);
|
120
|
+
size_t content_len = RSTRING_LEN(rb_content);
|
121
|
+
|
122
|
+
lxb_status_t status = lxb_dom_attr_set_value(attr, (const lxb_char_t *)c_content, content_len);
|
123
|
+
if (status != LXB_STATUS_OK) {
|
124
|
+
nl_raise_lexbor_error(status);
|
125
|
+
}
|
126
|
+
|
127
|
+
return rb_content;
|
128
|
+
}
|
129
|
+
|
130
|
+
/**
|
131
|
+
* Get the owner Node of the Attribute.
|
132
|
+
*
|
133
|
+
* @return [Node]
|
134
|
+
*/
|
135
|
+
static VALUE
|
136
|
+
nl_attribute_parent(VALUE self)
|
137
|
+
{
|
138
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
139
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
140
|
+
|
141
|
+
if (attr->owner == NULL) {
|
142
|
+
return Qnil;
|
143
|
+
}
|
144
|
+
return nl_rb_node_create(attr->owner, nl_rb_document_get(self));
|
145
|
+
}
|
146
|
+
|
147
|
+
/**
|
148
|
+
* Get the previous Attribute.
|
149
|
+
*
|
150
|
+
* @return [Attribute]
|
151
|
+
*/
|
152
|
+
static VALUE
|
153
|
+
nl_attribute_previous(VALUE self)
|
154
|
+
{
|
155
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
156
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
157
|
+
|
158
|
+
if (attr->prev == NULL) {
|
159
|
+
return Qnil;
|
160
|
+
}
|
161
|
+
return nl_rb_node_create(attr->prev, nl_rb_document_get(self));
|
162
|
+
}
|
163
|
+
|
164
|
+
/**
|
165
|
+
* Get the next Attribute.
|
166
|
+
*
|
167
|
+
* @return [Attribute]
|
168
|
+
*/
|
169
|
+
static VALUE
|
170
|
+
nl_attribute_next(VALUE self)
|
171
|
+
{
|
172
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
173
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
174
|
+
|
175
|
+
if (attr->next == NULL) {
|
176
|
+
return Qnil;
|
177
|
+
}
|
178
|
+
return nl_rb_node_create(attr->next, nl_rb_document_get(self));
|
179
|
+
}
|
180
|
+
|
181
|
+
void Init_nl_attribute(void)
|
182
|
+
{
|
183
|
+
cNokolexborAttribute = rb_define_class_under(mNokolexbor, "Attribute", cNokolexborNode);
|
184
|
+
|
185
|
+
rb_define_singleton_method(cNokolexborAttribute, "new", nl_attribute_new, -1);
|
186
|
+
|
187
|
+
rb_define_method(cNokolexborAttribute, "name", nl_attribute_name, 0);
|
188
|
+
rb_define_method(cNokolexborAttribute, "name=", nl_attribute_set_name, 1);
|
189
|
+
rb_define_method(cNokolexborAttribute, "value", nl_attribute_value, 0);
|
190
|
+
rb_define_method(cNokolexborAttribute, "value=", nl_attribute_set_value, 1);
|
191
|
+
rb_define_method(cNokolexborAttribute, "parent", nl_attribute_parent, 0);
|
192
|
+
rb_define_method(cNokolexborAttribute, "previous", nl_attribute_previous, 0);
|
193
|
+
rb_define_method(cNokolexborAttribute, "next", nl_attribute_next, 0);
|
194
|
+
|
195
|
+
rb_define_alias(cNokolexborAttribute, "node_name", "name");
|
196
|
+
rb_define_alias(cNokolexborAttribute, "node_name=", "name=");
|
197
|
+
rb_define_alias(cNokolexborAttribute, "text", "value");
|
198
|
+
rb_define_alias(cNokolexborAttribute, "content", "value");
|
199
|
+
rb_define_alias(cNokolexborAttribute, "to_s", "value");
|
200
|
+
rb_define_alias(cNokolexborAttribute, "to_str", "value");
|
201
|
+
}
|
data/ext/nokolexbor/nl_cdata.c
CHANGED
@@ -4,6 +4,14 @@ VALUE cNokolexborCData;
|
|
4
4
|
extern VALUE cNokolexborText;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(content, document) { |CDATA| ... } -> CDATA
|
10
|
+
*
|
11
|
+
* Create a new CDATA from +content+.
|
12
|
+
*
|
13
|
+
* @return [CDATA]
|
14
|
+
*/
|
7
15
|
static VALUE
|
8
16
|
nl_cdata_new(int argc, VALUE *argv, VALUE klass)
|
9
17
|
{
|
data/ext/nokolexbor/nl_comment.c
CHANGED
@@ -4,6 +4,12 @@ VALUE cNokolexborComment;
|
|
4
4
|
extern VALUE cNokolexborCharacterData;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(content, document) { |Comment| ... } -> Comment
|
10
|
+
*
|
11
|
+
* Create a new Comment from +content+.
|
12
|
+
*/
|
7
13
|
static VALUE
|
8
14
|
nl_comment_new(int argc, VALUE *argv, VALUE klass)
|
9
15
|
{
|
@@ -21,6 +21,16 @@ const rb_data_type_t nl_document_type = {
|
|
21
21
|
RUBY_TYPED_FREE_IMMEDIATELY,
|
22
22
|
};
|
23
23
|
|
24
|
+
/**
|
25
|
+
* call-seq:
|
26
|
+
* parse(string_or_io) -> Document
|
27
|
+
*
|
28
|
+
* Parse HTML into a {Document}.
|
29
|
+
*
|
30
|
+
* @param string_or_io [String, #read]
|
31
|
+
* The HTML to be parsed. It may be a String, or any object that
|
32
|
+
* responds to #read such as an IO, or StringIO.
|
33
|
+
*/
|
24
34
|
static VALUE
|
25
35
|
nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
26
36
|
{
|
@@ -51,6 +61,11 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
51
61
|
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
52
62
|
}
|
53
63
|
|
64
|
+
/**
|
65
|
+
* Create a new document.
|
66
|
+
*
|
67
|
+
* @return [Document]
|
68
|
+
*/
|
54
69
|
static VALUE
|
55
70
|
nl_document_new(VALUE self)
|
56
71
|
{
|
@@ -65,21 +80,51 @@ nl_rb_document_unwrap(VALUE rb_doc)
|
|
65
80
|
return doc;
|
66
81
|
}
|
67
82
|
|
68
|
-
|
69
|
-
|
83
|
+
/**
|
84
|
+
* Get the title of this document.
|
85
|
+
*
|
86
|
+
* @return [String]
|
87
|
+
*/
|
88
|
+
static VALUE
|
89
|
+
nl_document_get_title(VALUE self)
|
70
90
|
{
|
71
91
|
size_t len;
|
72
|
-
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(
|
92
|
+
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(self), &len);
|
73
93
|
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
|
74
94
|
}
|
75
95
|
|
76
|
-
|
77
|
-
|
96
|
+
/**
|
97
|
+
* call-seq:
|
98
|
+
* title=(text) -> String
|
99
|
+
*
|
100
|
+
* Set the title of this document.
|
101
|
+
*
|
102
|
+
* If a title element is already present, its content is replaced
|
103
|
+
* with the given text.
|
104
|
+
*
|
105
|
+
* Otherwise, this method tries to create one inside <head>.
|
106
|
+
*
|
107
|
+
* @return [String]
|
108
|
+
*/
|
109
|
+
static VALUE
|
110
|
+
nl_document_set_title(VALUE self, VALUE rb_title)
|
78
111
|
{
|
79
112
|
const char *c_title = StringValuePtr(rb_title);
|
80
113
|
size_t len = RSTRING_LEN(rb_title);
|
81
|
-
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(
|
82
|
-
return
|
114
|
+
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(self), (const lxb_char_t *)c_title, len);
|
115
|
+
return rb_title;
|
116
|
+
}
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Get the root node for this document.
|
120
|
+
*
|
121
|
+
* @return [Node]
|
122
|
+
*/
|
123
|
+
static VALUE
|
124
|
+
nl_document_root(VALUE self)
|
125
|
+
{
|
126
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(self);
|
127
|
+
return nl_rb_node_create(lxb_dom_document_root(doc), self);
|
83
128
|
}
|
84
129
|
|
85
130
|
void Init_nl_document(void)
|
@@ -89,4 +134,5 @@ void Init_nl_document(void)
|
|
89
134
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
90
135
|
rb_define_method(cNokolexborDocument, "title", nl_document_get_title, 0);
|
91
136
|
rb_define_method(cNokolexborDocument, "title=", nl_document_set_title, 1);
|
137
|
+
rb_define_method(cNokolexborDocument, "root", nl_document_root, 0);
|
92
138
|
}
|
@@ -4,6 +4,15 @@ VALUE cNokolexborDocumentFragment;
|
|
4
4
|
extern VALUE cNokolexborNode;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(document, tags = nil, ctx = nil) -> DocumentFragment
|
10
|
+
*
|
11
|
+
* Create a {DocumentFragment} from +tags+.
|
12
|
+
*
|
13
|
+
* If +ctx+ is present, it is used as a context node for the
|
14
|
+
* subtree created.
|
15
|
+
*/
|
7
16
|
static VALUE
|
8
17
|
nl_document_fragment_new(int argc, VALUE *argv, VALUE klass)
|
9
18
|
{
|
data/ext/nokolexbor/nl_error.c
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "nokolexbor.h"
|
2
2
|
|
3
|
+
VALUE mLexbor;
|
3
4
|
VALUE eLexborError;
|
4
5
|
VALUE eLexborMemoryAllocationError;
|
5
6
|
VALUE eLexborSmallBufferError;
|
@@ -71,23 +72,24 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
71
72
|
|
72
73
|
void Init_nl_error(void)
|
73
74
|
{
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
75
|
+
mLexbor = rb_define_module_under(mNokolexbor, "Lexbor");
|
76
|
+
eLexborError = rb_define_class_under(mLexbor, "Error", rb_eStandardError);
|
77
|
+
eLexborMemoryAllocationError = rb_define_class_under(mLexbor, "MemoryAllocationError", eLexborError);
|
78
|
+
eLexborSmallBufferError = rb_define_class_under(mLexbor, "SmallBufferError", eLexborError);
|
79
|
+
eLexborObjectIsNullError = rb_define_class_under(mLexbor, "ObjectIsNullError", eLexborError);
|
80
|
+
eLexborIncompleteObjectError = rb_define_class_under(mLexbor, "IncompleteObjectError", eLexborError);
|
81
|
+
eLexborNoFreeSlotError = rb_define_class_under(mLexbor, "NoFreeSlotError", eLexborError);
|
82
|
+
eLexborTooSmallSizeError = rb_define_class_under(mLexbor, "TooSmallSizeError", eLexborError);
|
83
|
+
eLexborNotExistsError = rb_define_class_under(mLexbor, "NotExistsError", eLexborError);
|
84
|
+
eLexborWrongArgsError = rb_define_class_under(mLexbor, "WrongArgsError", eLexborError);
|
85
|
+
eLexborWrongStageError = rb_define_class_under(mLexbor, "WrongStageError", eLexborError);
|
86
|
+
eLexborUnexpectedResultError = rb_define_class_under(mLexbor, "UnexpectedResultError", eLexborError);
|
87
|
+
eLexborUnexpectedDataError = rb_define_class_under(mLexbor, "UnexpectedDataError", eLexborError);
|
88
|
+
eLexborOverflowError = rb_define_class_under(mLexbor, "OverflowError", eLexborError);
|
89
|
+
eLexborContinueStatus = rb_define_class_under(mLexbor, "ContinueStatus", eLexborError);
|
90
|
+
eLexborSmallBufferStatus = rb_define_class_under(mLexbor, "SmallBufferStatus", eLexborError);
|
91
|
+
eLexborAbortedStatus = rb_define_class_under(mLexbor, "AbortedStatus", eLexborError);
|
92
|
+
eLexborStoppedStatus = rb_define_class_under(mLexbor, "StoppedStatus", eLexborError);
|
93
|
+
eLexborNextStatus = rb_define_class_under(mLexbor, "NextStatus", eLexborError);
|
94
|
+
eLexborStopStatus = rb_define_class_under(mLexbor, "StopStatus", eLexborError);
|
93
95
|
}
|