nokolexbor 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +9 -5
- data/ext/nokolexbor/nl_attribute.c +46 -0
- data/ext/nokolexbor/nl_cdata.c +8 -0
- data/ext/nokolexbor/nl_comment.c +6 -0
- data/ext/nokolexbor/nl_document.c +53 -7
- data/ext/nokolexbor/nl_document_fragment.c +9 -0
- data/ext/nokolexbor/nl_error.c +21 -19
- data/ext/nokolexbor/nl_node.c +255 -50
- data/ext/nokolexbor/nl_node_set.c +56 -1
- data/ext/nokolexbor/nl_processing_instruction.c +6 -0
- data/ext/nokolexbor/nl_text.c +6 -0
- data/ext/nokolexbor/nokolexbor.h +1 -0
- data/lib/nokolexbor/document.rb +52 -5
- data/lib/nokolexbor/document_fragment.rb +11 -0
- data/lib/nokolexbor/node.rb +367 -18
- data/lib/nokolexbor/node_set.rb +56 -0
- data/lib/nokolexbor/version.rb +1 -1
- metadata +2 -24
- data/vendor/lexbor/source/lexbor/encoding/base.h +0 -218
- data/vendor/lexbor/source/lexbor/encoding/big5.c +0 -42839
- data/vendor/lexbor/source/lexbor/encoding/config.cmake +0 -12
- data/vendor/lexbor/source/lexbor/encoding/const.h +0 -65
- data/vendor/lexbor/source/lexbor/encoding/decode.c +0 -3193
- data/vendor/lexbor/source/lexbor/encoding/decode.h +0 -370
- data/vendor/lexbor/source/lexbor/encoding/encode.c +0 -1931
- data/vendor/lexbor/source/lexbor/encoding/encode.h +0 -377
- data/vendor/lexbor/source/lexbor/encoding/encoding.c +0 -252
- data/vendor/lexbor/source/lexbor/encoding/encoding.h +0 -475
- data/vendor/lexbor/source/lexbor/encoding/euc_kr.c +0 -53883
- data/vendor/lexbor/source/lexbor/encoding/gb18030.c +0 -47905
- data/vendor/lexbor/source/lexbor/encoding/iso_2022_jp_katakana.c +0 -159
- data/vendor/lexbor/source/lexbor/encoding/jis0208.c +0 -22477
- data/vendor/lexbor/source/lexbor/encoding/jis0212.c +0 -15787
- data/vendor/lexbor/source/lexbor/encoding/multi.h +0 -53
- data/vendor/lexbor/source/lexbor/encoding/range.c +0 -71
- data/vendor/lexbor/source/lexbor/encoding/range.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/res.c +0 -222
- data/vendor/lexbor/source/lexbor/encoding/res.h +0 -34
- data/vendor/lexbor/source/lexbor/encoding/single.c +0 -13748
- data/vendor/lexbor/source/lexbor/encoding/single.h +0 -116
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29411076a95eae20060101aa8a270bfe53a48eda3b6e58cfad6ebe22b184049c
|
4
|
+
data.tar.gz: 920536ad4f69a635cfec9a6dea4d8bbe746c7c97d74c1b1daa6b22c41856d201
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6ca49aa873ee254caf034600fbbb9384a2a4efaa86422e992a7fbb7970107af20541c4309e582c6f8863353209d36cd6e5d56fdb19b52fd54a233a4419202ea
|
7
|
+
data.tar.gz: f4c9413c2cd8683e6cf111d1c966a8511d4d605ec30008f595d2c05b402b028dc8f889c9480d3ff43b0f380e84861b2384c915f9f9cca68b6ad8f510a58a0b0a
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -5,6 +5,10 @@ if ENV["CC"]
|
|
5
5
|
RbConfig::CONFIG["CC"] = RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"]
|
6
6
|
end
|
7
7
|
|
8
|
+
def windows?
|
9
|
+
RbConfig::CONFIG["target_os"].match?(/mingw|mswin/)
|
10
|
+
end
|
11
|
+
|
8
12
|
# From: https://stackoverflow.com/questions/2108727
|
9
13
|
# Cross-platform way of finding an executable in the $PATH.
|
10
14
|
#
|
@@ -23,9 +27,9 @@ end
|
|
23
27
|
cmake_flags = [ ENV["CMAKE_FLAGS"] ]
|
24
28
|
cmake_flags << "-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY"
|
25
29
|
# Set system name explicitly when cross-compiling
|
26
|
-
cmake_flags << "-DCMAKE_SYSTEM_NAME=Windows -DWIN32=1" if
|
30
|
+
cmake_flags << "-DCMAKE_SYSTEM_NAME=Windows -DWIN32=1" if windows?
|
27
31
|
# On Windows, Ruby-DevKit is MSYS-based, so ensure to use MSYS Makefiles.
|
28
|
-
cmake_flags << "-G \"MSYS Makefiles\"" if
|
32
|
+
cmake_flags << "-G \"MSYS Makefiles\"" if windows? && !ENV['NOKOLEXBOR_CROSS_COMPILE']
|
29
33
|
|
30
34
|
if ENV['NOKOLEXBOR_CROSS_COMPILE']
|
31
35
|
# use the same toolchain for cross-compiling lexbor
|
@@ -74,7 +78,7 @@ end
|
|
74
78
|
|
75
79
|
def self.run_cmake(timeout, args)
|
76
80
|
# Set to process group so we can kill it and its children
|
77
|
-
pgroup = (
|
81
|
+
pgroup = (windows? && !ENV['NOKOLEXBOR_CROSS_COMPILE']) ? :new_pgroup : :pgroup
|
78
82
|
pid = Process.spawn("cmake #{args}", pgroup => true)
|
79
83
|
|
80
84
|
Timeout.timeout(timeout) do
|
@@ -104,7 +108,7 @@ def apply_patch(patch_file, chdir)
|
|
104
108
|
end
|
105
109
|
|
106
110
|
|
107
|
-
MAKE = if
|
111
|
+
MAKE = if windows?
|
108
112
|
# On Windows, Ruby-DevKit only has 'make'.
|
109
113
|
find_executable('make')
|
110
114
|
else
|
@@ -143,7 +147,7 @@ Dir.chdir(EXT_DIR) do
|
|
143
147
|
Dir.mkdir("build") if !Dir.exist?("build")
|
144
148
|
|
145
149
|
Dir.chdir("build") do
|
146
|
-
run_cmake(10 * 60, ".. #{cmake_flags.join(' ')} #{
|
150
|
+
run_cmake(10 * 60, ".. #{cmake_flags.join(' ')} #{windows? ? "-DLIBXML2_WITH_THREADS=OFF" : ""}")
|
147
151
|
end
|
148
152
|
end
|
149
153
|
|
@@ -4,6 +4,15 @@ VALUE cNokolexborAttribute;
|
|
4
4
|
extern VALUE mNokolexbor;
|
5
5
|
extern VALUE cNokolexborNode;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(document, name) -> Attribute
|
10
|
+
*
|
11
|
+
* Create a new Attribute on the +document+ with +name+.
|
12
|
+
*
|
13
|
+
* @param document [Document]
|
14
|
+
* @param name [String]
|
15
|
+
*/
|
7
16
|
static VALUE
|
8
17
|
nl_attribute_new(int argc, VALUE *argv, VALUE klass)
|
9
18
|
{
|
@@ -38,6 +47,11 @@ nl_attribute_new(int argc, VALUE *argv, VALUE klass)
|
|
38
47
|
return rb_node;
|
39
48
|
}
|
40
49
|
|
50
|
+
/**
|
51
|
+
* Get the name of the Attribute.
|
52
|
+
*
|
53
|
+
* @return [String]
|
54
|
+
*/
|
41
55
|
static VALUE
|
42
56
|
nl_attribute_name(VALUE self)
|
43
57
|
{
|
@@ -50,6 +64,12 @@ nl_attribute_name(VALUE self)
|
|
50
64
|
return rb_utf8_str_new(name, len);
|
51
65
|
}
|
52
66
|
|
67
|
+
/**
|
68
|
+
* call-seq:
|
69
|
+
* name=(name) -> String
|
70
|
+
*
|
71
|
+
* Set the name of the Attribute.
|
72
|
+
*/
|
53
73
|
static VALUE
|
54
74
|
nl_attribute_set_name(VALUE self, VALUE rb_name)
|
55
75
|
{
|
@@ -67,6 +87,11 @@ nl_attribute_set_name(VALUE self, VALUE rb_name)
|
|
67
87
|
return rb_name;
|
68
88
|
}
|
69
89
|
|
90
|
+
/**
|
91
|
+
* Get the value of the Attribute.
|
92
|
+
*
|
93
|
+
* @return [String]
|
94
|
+
*/
|
70
95
|
static VALUE
|
71
96
|
nl_attribute_value(VALUE self)
|
72
97
|
{
|
@@ -79,6 +104,12 @@ nl_attribute_value(VALUE self)
|
|
79
104
|
return rb_utf8_str_new(value, len);
|
80
105
|
}
|
81
106
|
|
107
|
+
/**
|
108
|
+
* call-seq:
|
109
|
+
* value=(value) -> String
|
110
|
+
*
|
111
|
+
* Set the value of the Attribute.
|
112
|
+
*/
|
82
113
|
static VALUE
|
83
114
|
nl_attribute_set_value(VALUE self, VALUE rb_content)
|
84
115
|
{
|
@@ -96,6 +127,11 @@ nl_attribute_set_value(VALUE self, VALUE rb_content)
|
|
96
127
|
return rb_content;
|
97
128
|
}
|
98
129
|
|
130
|
+
/**
|
131
|
+
* Get the owner Node of the Attribute.
|
132
|
+
*
|
133
|
+
* @return [Node]
|
134
|
+
*/
|
99
135
|
static VALUE
|
100
136
|
nl_attribute_parent(VALUE self)
|
101
137
|
{
|
@@ -108,6 +144,11 @@ nl_attribute_parent(VALUE self)
|
|
108
144
|
return nl_rb_node_create(attr->owner, nl_rb_document_get(self));
|
109
145
|
}
|
110
146
|
|
147
|
+
/**
|
148
|
+
* Get the previous Attribute.
|
149
|
+
*
|
150
|
+
* @return [Attribute]
|
151
|
+
*/
|
111
152
|
static VALUE
|
112
153
|
nl_attribute_previous(VALUE self)
|
113
154
|
{
|
@@ -120,6 +161,11 @@ nl_attribute_previous(VALUE self)
|
|
120
161
|
return nl_rb_node_create(attr->prev, nl_rb_document_get(self));
|
121
162
|
}
|
122
163
|
|
164
|
+
/**
|
165
|
+
* Get the next Attribute.
|
166
|
+
*
|
167
|
+
* @return [Attribute]
|
168
|
+
*/
|
123
169
|
static VALUE
|
124
170
|
nl_attribute_next(VALUE self)
|
125
171
|
{
|
data/ext/nokolexbor/nl_cdata.c
CHANGED
@@ -4,6 +4,14 @@ VALUE cNokolexborCData;
|
|
4
4
|
extern VALUE cNokolexborText;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(content, document) { |CDATA| ... } -> CDATA
|
10
|
+
*
|
11
|
+
* Create a new CDATA from +content+.
|
12
|
+
*
|
13
|
+
* @return [CDATA]
|
14
|
+
*/
|
7
15
|
static VALUE
|
8
16
|
nl_cdata_new(int argc, VALUE *argv, VALUE klass)
|
9
17
|
{
|
data/ext/nokolexbor/nl_comment.c
CHANGED
@@ -4,6 +4,12 @@ VALUE cNokolexborComment;
|
|
4
4
|
extern VALUE cNokolexborCharacterData;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(content, document) { |Comment| ... } -> Comment
|
10
|
+
*
|
11
|
+
* Create a new Comment from +content+.
|
12
|
+
*/
|
7
13
|
static VALUE
|
8
14
|
nl_comment_new(int argc, VALUE *argv, VALUE klass)
|
9
15
|
{
|
@@ -21,6 +21,16 @@ const rb_data_type_t nl_document_type = {
|
|
21
21
|
RUBY_TYPED_FREE_IMMEDIATELY,
|
22
22
|
};
|
23
23
|
|
24
|
+
/**
|
25
|
+
* call-seq:
|
26
|
+
* parse(string_or_io) -> Document
|
27
|
+
*
|
28
|
+
* Parse HTML into a {Document}.
|
29
|
+
*
|
30
|
+
* @param string_or_io [String, #read]
|
31
|
+
* The HTML to be parsed. It may be a String, or any object that
|
32
|
+
* responds to #read such as an IO, or StringIO.
|
33
|
+
*/
|
24
34
|
static VALUE
|
25
35
|
nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
26
36
|
{
|
@@ -51,6 +61,11 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
51
61
|
return TypedData_Wrap_Struct(cNokolexborDocument, &nl_document_type, document);
|
52
62
|
}
|
53
63
|
|
64
|
+
/**
|
65
|
+
* Create a new document.
|
66
|
+
*
|
67
|
+
* @return [Document]
|
68
|
+
*/
|
54
69
|
static VALUE
|
55
70
|
nl_document_new(VALUE self)
|
56
71
|
{
|
@@ -65,21 +80,51 @@ nl_rb_document_unwrap(VALUE rb_doc)
|
|
65
80
|
return doc;
|
66
81
|
}
|
67
82
|
|
68
|
-
|
69
|
-
|
83
|
+
/**
|
84
|
+
* Get the title of this document.
|
85
|
+
*
|
86
|
+
* @return [String]
|
87
|
+
*/
|
88
|
+
static VALUE
|
89
|
+
nl_document_get_title(VALUE self)
|
70
90
|
{
|
71
91
|
size_t len;
|
72
|
-
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(
|
92
|
+
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(self), &len);
|
73
93
|
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
|
74
94
|
}
|
75
95
|
|
76
|
-
|
77
|
-
|
96
|
+
/**
|
97
|
+
* call-seq:
|
98
|
+
* title=(text) -> String
|
99
|
+
*
|
100
|
+
* Set the title of this document.
|
101
|
+
*
|
102
|
+
* If a title element is already present, its content is replaced
|
103
|
+
* with the given text.
|
104
|
+
*
|
105
|
+
* Otherwise, this method tries to create one inside <head>.
|
106
|
+
*
|
107
|
+
* @return [String]
|
108
|
+
*/
|
109
|
+
static VALUE
|
110
|
+
nl_document_set_title(VALUE self, VALUE rb_title)
|
78
111
|
{
|
79
112
|
const char *c_title = StringValuePtr(rb_title);
|
80
113
|
size_t len = RSTRING_LEN(rb_title);
|
81
|
-
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(
|
82
|
-
return
|
114
|
+
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(self), (const lxb_char_t *)c_title, len);
|
115
|
+
return rb_title;
|
116
|
+
}
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Get the root node for this document.
|
120
|
+
*
|
121
|
+
* @return [Node]
|
122
|
+
*/
|
123
|
+
static VALUE
|
124
|
+
nl_document_root(VALUE self)
|
125
|
+
{
|
126
|
+
lxb_dom_document_t *doc = nl_rb_document_unwrap(self);
|
127
|
+
return nl_rb_node_create(lxb_dom_document_root(doc), self);
|
83
128
|
}
|
84
129
|
|
85
130
|
void Init_nl_document(void)
|
@@ -89,4 +134,5 @@ void Init_nl_document(void)
|
|
89
134
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
90
135
|
rb_define_method(cNokolexborDocument, "title", nl_document_get_title, 0);
|
91
136
|
rb_define_method(cNokolexborDocument, "title=", nl_document_set_title, 1);
|
137
|
+
rb_define_method(cNokolexborDocument, "root", nl_document_root, 0);
|
92
138
|
}
|
@@ -4,6 +4,15 @@ VALUE cNokolexborDocumentFragment;
|
|
4
4
|
extern VALUE cNokolexborNode;
|
5
5
|
extern VALUE mNokolexbor;
|
6
6
|
|
7
|
+
/**
|
8
|
+
* call-seq:
|
9
|
+
* new(document, tags = nil, ctx = nil) -> DocumentFragment
|
10
|
+
*
|
11
|
+
* Create a {DocumentFragment} from +tags+.
|
12
|
+
*
|
13
|
+
* If +ctx+ is present, it is used as a context node for the
|
14
|
+
* subtree created.
|
15
|
+
*/
|
7
16
|
static VALUE
|
8
17
|
nl_document_fragment_new(int argc, VALUE *argv, VALUE klass)
|
9
18
|
{
|
data/ext/nokolexbor/nl_error.c
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "nokolexbor.h"
|
2
2
|
|
3
|
+
VALUE mLexbor;
|
3
4
|
VALUE eLexborError;
|
4
5
|
VALUE eLexborMemoryAllocationError;
|
5
6
|
VALUE eLexborSmallBufferError;
|
@@ -71,23 +72,24 @@ void nl_raise_lexbor_error(lxb_status_t error)
|
|
71
72
|
|
72
73
|
void Init_nl_error(void)
|
73
74
|
{
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
75
|
+
mLexbor = rb_define_module_under(mNokolexbor, "Lexbor");
|
76
|
+
eLexborError = rb_define_class_under(mLexbor, "Error", rb_eStandardError);
|
77
|
+
eLexborMemoryAllocationError = rb_define_class_under(mLexbor, "MemoryAllocationError", eLexborError);
|
78
|
+
eLexborSmallBufferError = rb_define_class_under(mLexbor, "SmallBufferError", eLexborError);
|
79
|
+
eLexborObjectIsNullError = rb_define_class_under(mLexbor, "ObjectIsNullError", eLexborError);
|
80
|
+
eLexborIncompleteObjectError = rb_define_class_under(mLexbor, "IncompleteObjectError", eLexborError);
|
81
|
+
eLexborNoFreeSlotError = rb_define_class_under(mLexbor, "NoFreeSlotError", eLexborError);
|
82
|
+
eLexborTooSmallSizeError = rb_define_class_under(mLexbor, "TooSmallSizeError", eLexborError);
|
83
|
+
eLexborNotExistsError = rb_define_class_under(mLexbor, "NotExistsError", eLexborError);
|
84
|
+
eLexborWrongArgsError = rb_define_class_under(mLexbor, "WrongArgsError", eLexborError);
|
85
|
+
eLexborWrongStageError = rb_define_class_under(mLexbor, "WrongStageError", eLexborError);
|
86
|
+
eLexborUnexpectedResultError = rb_define_class_under(mLexbor, "UnexpectedResultError", eLexborError);
|
87
|
+
eLexborUnexpectedDataError = rb_define_class_under(mLexbor, "UnexpectedDataError", eLexborError);
|
88
|
+
eLexborOverflowError = rb_define_class_under(mLexbor, "OverflowError", eLexborError);
|
89
|
+
eLexborContinueStatus = rb_define_class_under(mLexbor, "ContinueStatus", eLexborError);
|
90
|
+
eLexborSmallBufferStatus = rb_define_class_under(mLexbor, "SmallBufferStatus", eLexborError);
|
91
|
+
eLexborAbortedStatus = rb_define_class_under(mLexbor, "AbortedStatus", eLexborError);
|
92
|
+
eLexborStoppedStatus = rb_define_class_under(mLexbor, "StoppedStatus", eLexborError);
|
93
|
+
eLexborNextStatus = rb_define_class_under(mLexbor, "NextStatus", eLexborError);
|
94
|
+
eLexborStopStatus = rb_define_class_under(mLexbor, "StopStatus", eLexborError);
|
93
95
|
}
|