nokolexbor 0.3.3 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/nl_attribute.c +201 -0
- data/ext/nokolexbor/nl_cdata.c +8 -0
- data/ext/nokolexbor/nl_comment.c +6 -0
- data/ext/nokolexbor/nl_document.c +53 -7
- data/ext/nokolexbor/nl_document_fragment.c +9 -0
- data/ext/nokolexbor/nl_error.c +21 -19
- data/ext/nokolexbor/nl_node.c +317 -48
- data/ext/nokolexbor/nl_node_set.c +56 -1
- data/ext/nokolexbor/nl_processing_instruction.c +6 -0
- data/ext/nokolexbor/nl_text.c +6 -0
- data/ext/nokolexbor/nokolexbor.c +1 -0
- data/ext/nokolexbor/nokolexbor.h +2 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +370 -24
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +0 -1
- metadata +3 -25
- data/lib/nokolexbor/attribute.rb +0 -18
- data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
- data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
- data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
- data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
- data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
- data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
- data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
- data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
- data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
- data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
- data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
- data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
- data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
- data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
- data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
- data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
- data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
- data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
- data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
- data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 23409daec8c9cbe801afef6c39e6507fc938eb35cad2458ef80fc99e6bf5bece
|
4
|
+
data.tar.gz: 06e83318711ac6e6c2582d78fcd87f5a0efeaceff7421a1880cd8e698637d803
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d77bb45c508edba84043156149ff1dfce0249b7f4d52d871c0552b4f590c01e0879c77b0bf039a46ee019be4e445b072a658fbb9fc3872c8b1d6f48e4801b0dd
|
7
|
+
data.tar.gz: 8ec494323f66188fbd39455c05a63e6be0fa5f8979fcb43310dc80a429a3d77355b8875988fa800041a04f1746254b0d88d9522780c085469efe18308a3d4576
|
@@ -0,0 +1,201 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborAttribute;
|
4
|
+
extern VALUE mNokolexbor;
|
5
|
+
extern VALUE cNokolexborNode;
|
6
|
+
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(document, name) -> Attribute
|
10
|
+
*
|
11
|
+
* Create a new Attribute on the +document+ with +name+.
|
12
|
+
*
|
13
|
+
* @param document [Document]
|
14
|
+
* @param name [String]
|
15
|
+
*/
|
16
|
+
static VALUE
|
17
|
+
nl_attribute_new(int argc, VALUE *argv, VALUE klass)
|
18
|
+
{
|
19
|
+
lxb_dom_document_t *document;
|
20
|
+
VALUE rb_document;
|
21
|
+
VALUE rb_name;
|
22
|
+
VALUE rest;
|
23
|
+
|
24
|
+
rb_scan_args(argc, argv, "2*", &rb_document, &rb_name, &rest);
|
25
|
+
|
26
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
27
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
28
|
+
}
|
29
|
+
|
30
|
+
document = nl_rb_document_unwrap(rb_document);
|
31
|
+
|
32
|
+
const char *c_name = StringValuePtr(rb_name);
|
33
|
+
size_t name_len = RSTRING_LEN(rb_name);
|
34
|
+
lxb_dom_attr_t *attr = lxb_dom_attr_interface_create(document);
|
35
|
+
if (attr == NULL) {
|
36
|
+
rb_raise(rb_eRuntimeError, "Error creating attribute");
|
37
|
+
}
|
38
|
+
|
39
|
+
lxb_dom_attr_set_name(attr, (const lxb_char_t *)c_name, name_len, false);
|
40
|
+
|
41
|
+
VALUE rb_node = nl_rb_node_create(&attr->node, rb_document);
|
42
|
+
|
43
|
+
if (rb_block_given_p()) {
|
44
|
+
rb_yield(rb_node);
|
45
|
+
}
|
46
|
+
|
47
|
+
return rb_node;
|
48
|
+
}
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Get the name of the Attribute.
|
52
|
+
*
|
53
|
+
* @return [String]
|
54
|
+
*/
|
55
|
+
static VALUE
|
56
|
+
nl_attribute_name(VALUE self)
|
57
|
+
{
|
58
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
59
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
60
|
+
|
61
|
+
size_t len;
|
62
|
+
lxb_char_t *name = lxb_dom_attr_qualified_name(attr, &len);
|
63
|
+
|
64
|
+
return rb_utf8_str_new(name, len);
|
65
|
+
}
|
66
|
+
|
67
|
+
/**
|
68
|
+
* call-seq:
|
69
|
+
* name=(name) -> String
|
70
|
+
*
|
71
|
+
* Set the name of the Attribute.
|
72
|
+
*/
|
73
|
+
static VALUE
|
74
|
+
nl_attribute_set_name(VALUE self, VALUE rb_name)
|
75
|
+
{
|
76
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
77
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
78
|
+
|
79
|
+
const char *c_name = StringValuePtr(rb_name);
|
80
|
+
size_t name_len = RSTRING_LEN(rb_name);
|
81
|
+
|
82
|
+
lxb_status_t status = lxb_dom_attr_set_name(attr, (const lxb_char_t *)c_name, name_len, false);
|
83
|
+
if (status != LXB_STATUS_OK) {
|
84
|
+
nl_raise_lexbor_error(status);
|
85
|
+
}
|
86
|
+
|
87
|
+
return rb_name;
|
88
|
+
}
|
89
|
+
|
90
|
+
/**
|
91
|
+
* Get the value of the Attribute.
|
92
|
+
*
|
93
|
+
* @return [String]
|
94
|
+
*/
|
95
|
+
static VALUE
|
96
|
+
nl_attribute_value(VALUE self)
|
97
|
+
{
|
98
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
99
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
100
|
+
|
101
|
+
size_t len;
|
102
|
+
lxb_char_t *value = lxb_dom_attr_value(attr, &len);
|
103
|
+
|
104
|
+
return rb_utf8_str_new(value, len);
|
105
|
+
}
|
106
|
+
|
107
|
+
/**
|
108
|
+
* call-seq:
|
109
|
+
* value=(value) -> String
|
110
|
+
*
|
111
|
+
* Set the value of the Attribute.
|
112
|
+
*/
|
113
|
+
static VALUE
|
114
|
+
nl_attribute_set_value(VALUE self, VALUE rb_content)
|
115
|
+
{
|
116
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
117
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
118
|
+
|
119
|
+
const char *c_content = StringValuePtr(rb_content);
|
120
|
+
size_t content_len = RSTRING_LEN(rb_content);
|
121
|
+
|
122
|
+
lxb_status_t status = lxb_dom_attr_set_value(attr, (const lxb_char_t *)c_content, content_len);
|
123
|
+
if (status != LXB_STATUS_OK) {
|
124
|
+
nl_raise_lexbor_error(status);
|
125
|
+
}
|
126
|
+
|
127
|
+
return rb_content;
|
128
|
+
}
|
129
|
+
|
130
|
+
/**
|
131
|
+
* Get the owner Node of the Attribute.
|
132
|
+
*
|
133
|
+
* @return [Node]
|
134
|
+
*/
|
135
|
+
static VALUE
|
136
|
+
nl_attribute_parent(VALUE self)
|
137
|
+
{
|
138
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
139
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
140
|
+
|
141
|
+
if (attr->owner == NULL) {
|
142
|
+
return Qnil;
|
143
|
+
}
|
144
|
+
return nl_rb_node_create(attr->owner, nl_rb_document_get(self));
|
145
|
+
}
|
146
|
+
|
147
|
+
/**
|
148
|
+
* Get the previous Attribute.
|
149
|
+
*
|
150
|
+
* @return [Attribute]
|
151
|
+
*/
|
152
|
+
static VALUE
|
153
|
+
nl_attribute_previous(VALUE self)
|
154
|
+
{
|
155
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
156
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
157
|
+
|
158
|
+
if (attr->prev == NULL) {
|
159
|
+
return Qnil;
|
160
|
+
}
|
161
|
+
return nl_rb_node_create(attr->prev, nl_rb_document_get(self));
|
162
|
+
}
|
163
|
+
|
164
|
+
/**
|
165
|
+
* Get the next Attribute.
|
166
|
+
*
|
167
|
+
* @return [Attribute]
|
168
|
+
*/
|
169
|
+
static VALUE
|
170
|
+
nl_attribute_next(VALUE self)
|
171
|
+
{
|
172
|
+
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
173
|
+
lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
|
174
|
+
|
175
|
+
if (attr->next == NULL) {
|
176
|
+
return Qnil;
|
177
|
+
}
|
178
|
+
return nl_rb_node_create(attr->next, nl_rb_document_get(self));
|
179
|
+
}
|
180
|
+
|
181
|
+
void Init_nl_attribute(void)
|
182
|
+
{
|
183
|
+
cNokolexborAttribute = rb_define_class_under(mNokolexbor, "Attribute", cNokolexborNode);
|
184
|
+
|
185
|
+
rb_define_singleton_method(cNokolexborAttribute, "new", nl_attribute_new, -1);
|
186
|
+
|
187
|
+
rb_define_method(cNokolexborAttribute, "name", nl_attribute_name, 0);
|
188
|
+
rb_define_method(cNokolexborAttribute, "name=", nl_attribute_set_name, 1);
|
189
|
+
rb_define_method(cNokolexborAttribute, "value", nl_attribute_value, 0);
|
190
|
+
rb_define_method(cNokolexborAttribute, "value=", nl_attribute_set_value, 1);
|
191
|
+
rb_define_method(cNokolexborAttribute, "parent", nl_attribute_parent, 0);
|
192
|
+
rb_define_method(cNokolexborAttribute, "previous", nl_attribute_previous, 0);
|
193
|
+
rb_define_method(cNokolexborAttribute, "next", nl_attribute_next, 0);
|
194
|
+
|
195
|
+
rb_define_alias(cNokolexborAttribute, "node_name", "name");
|
196
|
+
rb_define_alias(cNokolexborAttribute, "node_name=", "name=");
|
197
|
+
rb_define_alias(cNokolexborAttribute, "text", "value");
|
198
|
+
rb_define_alias(cNokolexborAttribute, "content", "value");
|
199
|
+
rb_define_alias(cNokolexborAttribute, "to_s", "value");
|
200
|
+
rb_define_alias(cNokolexborAttribute, "to_str", "value");
|
201
|
+
}
|
data/ext/nokolexbor/nl_cdata.c
CHANGED
@@ -4,6 +4,14 @@ VALUE cNokolexborCData;
|
|
4
4
|
extern VALUE cNokolexborText;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(content, document) { |CDATA| ... } -> CDATA
|
10
|
+
*
|
11
|
+
* Create a new CDATA from +content+.
|
12
|
+
*
|
13
|
+
* @return [CDATA]
|
14
|
+
*/
|
7
15
|
static VALUE
|
8
16
|
nl_cdata_new(int argc, VALUE *argv, VALUE klass)
|
9
17
|
{
|
data/ext/nokolexbor/nl_comment.c
CHANGED
@@ -4,6 +4,12 @@ VALUE cNokolexborComment;
|
|
4
4
|
extern VALUE cNokolexborCharacterData;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(content, document) { |Comment| ... } -> Comment
|
10
|
+
*
|
11
|
+
* Create a new Comment from +content+.
|
12
|
+
*/
|
7
13
|
static VALUE
|
8
14
|
nl_comment_new(int argc, VALUE *argv, VALUE klass)
|
9
15
|
{
|
@@ -21,6 +21,16 @@ const rb_data_type_t nl_document_type = {
|
|
21
21
|
RUBY_TYPED_FREE_IMMEDIATELY,
|
22
22
|
};
|
23
23
|
|
24
|
+
/**
|
25
|
+
* call-seq:
|
26
|
+
* parse(string_or_io) -> Document
|
27
|
+
*
|
28
|
+
* Parse HTML into a {Document}.
|
29
|
+
*
|
30
|
+
* @param string_or_io [String, #read]
|
31
|
+
* The HTML to be parsed. It may be a String, or any object that
|
32
|
+
* responds to #read such as an IO, or StringIO.
|
33
|
+
*/
|
24
34
|
static VALUE
|
25
35
|
nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
26
36
|
{
|
@@ -51,6 +61,11 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
51
61
|
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
52
62
|
}
|
53
63
|
|
64
|
+
/**
|
65
|
+
* Create a new document.
|
66
|
+
*
|
67
|
+
* @return [Document]
|
68
|
+
*/
|
54
69
|
static VALUE
|
55
70
|
nl_document_new(VALUE self)
|
56
71
|
{
|
@@ -65,21 +80,51 @@ nl_rb_document_unwrap(VALUE rb_doc)
|
|
65
80
|
return doc;
|
66
81
|
}
|
67
82
|
|
68
|
-
|
69
|
-
|
83
|
+
/**
|
84
|
+
* Get the title of this document.
|
85
|
+
*
|
86
|
+
* @return [String]
|
87
|
+
*/
|
88
|
+
static VALUE
|
89
|
+
nl_document_get_title(VALUE self)
|
70
90
|
{
|
71
91
|
size_t len;
|
72
|
-
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(
|
92
|
+
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(self), &len);
|
73
93
|
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
|
74
94
|
}
|
75
95
|
|
76
|
-
|
77
|
-
|
96
|
+
/**
|
97
|
+
* call-seq:
|
98
|
+
* title=(text) -> String
|
99
|
+
*
|
100
|
+
* Set the title of this document.
|
101
|
+
*
|
102
|
+
* If a title element is already present, its content is replaced
|
103
|
+
* with the given text.
|
104
|
+
*
|
105
|
+
* Otherwise, this method tries to create one inside <head>.
|
106
|
+
*
|
107
|
+
* @return [String]
|
108
|
+
*/
|
109
|
+
static VALUE
|
110
|
+
nl_document_set_title(VALUE self, VALUE rb_title)
|
78
111
|
{
|
79
112
|
const char *c_title = StringValuePtr(rb_title);
|
80
113
|
size_t len = RSTRING_LEN(rb_title);
|
81
|
-
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(
|
82
|
-
return
|
114
|
+
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(self), (const lxb_char_t *)c_title, len);
|
115
|
+
return rb_title;
|
116
|
+
}
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Get the root node for this document.
|
120
|
+
*
|
121
|
+
* @return [Node]
|
122
|
+
*/
|
123
|
+
static VALUE
|
124
|
+
nl_document_root(VALUE self)
|
125
|
+
{
|
126
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(self);
|
127
|
+
return nl_rb_node_create(lxb_dom_document_root(doc), self);
|
83
128
|
}
|
84
129
|
|
85
130
|
void Init_nl_document(void)
|
@@ -89,4 +134,5 @@ void Init_nl_document(void)
|
|
89
134
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
90
135
|
rb_define_method(cNokolexborDocument, "title", nl_document_get_title, 0);
|
91
136
|
rb_define_method(cNokolexborDocument, "title=", nl_document_set_title, 1);
|
137
|
+
rb_define_method(cNokolexborDocument, "root", nl_document_root, 0);
|
92
138
|
}
|
@@ -4,6 +4,15 @@ VALUE cNokolexborDocumentFragment;
|
|
4
4
|
extern VALUE cNokolexborNode;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(document, tags = nil, ctx = nil) -> DocumentFragment
|
10
|
+
*
|
11
|
+
* Create a {DocumentFragment} from +tags+.
|
12
|
+
*
|
13
|
+
* If +ctx+ is present, it is used as a context node for the
|
14
|
+
* subtree created.
|
15
|
+
*/
|
7
16
|
static VALUE
|
8
17
|
nl_document_fragment_new(int argc, VALUE *argv, VALUE klass)
|
9
18
|
{
|
data/ext/nokolexbor/nl_error.c
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "nokolexbor.h"
|
2
2
|
|
3
|
+
VALUE mLexbor;
|
3
4
|
VALUE eLexborError;
|
4
5
|
VALUE eLexborMemoryAllocationError;
|
5
6
|
VALUE eLexborSmallBufferError;
|
@@ -71,23 +72,24 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
71
72
|
|
72
73
|
void Init_nl_error(void)
|
73
74
|
{
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
75
|
+
mLexbor = rb_define_module_under(mNokolexbor, "Lexbor");
|
76
|
+
eLexborError = rb_define_class_under(mLexbor, "Error", rb_eStandardError);
|
77
|
+
eLexborMemoryAllocationError = rb_define_class_under(mLexbor, "MemoryAllocationError", eLexborError);
|
78
|
+
eLexborSmallBufferError = rb_define_class_under(mLexbor, "SmallBufferError", eLexborError);
|
79
|
+
eLexborObjectIsNullError = rb_define_class_under(mLexbor, "ObjectIsNullError", eLexborError);
|
80
|
+
eLexborIncompleteObjectError = rb_define_class_under(mLexbor, "IncompleteObjectError", eLexborError);
|
81
|
+
eLexborNoFreeSlotError = rb_define_class_under(mLexbor, "NoFreeSlotError", eLexborError);
|
82
|
+
eLexborTooSmallSizeError = rb_define_class_under(mLexbor, "TooSmallSizeError", eLexborError);
|
83
|
+
eLexborNotExistsError = rb_define_class_under(mLexbor, "NotExistsError", eLexborError);
|
84
|
+
eLexborWrongArgsError = rb_define_class_under(mLexbor, "WrongArgsError", eLexborError);
|
85
|
+
eLexborWrongStageError = rb_define_class_under(mLexbor, "WrongStageError", eLexborError);
|
86
|
+
eLexborUnexpectedResultError = rb_define_class_under(mLexbor, "UnexpectedResultError", eLexborError);
|
87
|
+
eLexborUnexpectedDataError = rb_define_class_under(mLexbor, "UnexpectedDataError", eLexborError);
|
88
|
+
eLexborOverflowError = rb_define_class_under(mLexbor, "OverflowError", eLexborError);
|
89
|
+
eLexborContinueStatus = rb_define_class_under(mLexbor, "ContinueStatus", eLexborError);
|
90
|
+
eLexborSmallBufferStatus = rb_define_class_under(mLexbor, "SmallBufferStatus", eLexborError);
|
91
|
+
eLexborAbortedStatus = rb_define_class_under(mLexbor, "AbortedStatus", eLexborError);
|
92
|
+
eLexborStoppedStatus = rb_define_class_under(mLexbor, "StoppedStatus", eLexborError);
|
93
|
+
eLexborNextStatus = rb_define_class_under(mLexbor, "NextStatus", eLexborError);
|
94
|
+
eLexborStopStatus = rb_define_class_under(mLexbor, "StopStatus", eLexborError);
|
93
95
|
}
|