nokolexbor 0.2.6 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/memory.c +6 -6
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +17 -26
- data/ext/nokolexbor/nokolexbor.c +7 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/lib/nokolexbor/document.rb +92 -1
- data/lib/nokolexbor/node.rb +64 -0
- data/lib/nokolexbor/node_set.rb +6 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6bfa3062e8718581cd8aabef75f0084a6626bd90301c9db4957ee975215edec1
|
4
|
+
data.tar.gz: 243f5c217c85750f63bfc0dcb8af575c019d3f2fd920f41769c0affc8a219802
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e66b390ef696ed5cafa480065e8ac8fc66ce98d86a7cb15f2f59d8acb7df21381942e5fe6f0244e3b9c7bf54787131c5241a49faeaba328eb49e65af0ac3216c
|
7
|
+
data.tar.gz: d266e2a7c49a202aec4112667807158b54878cc014b42fc32492fc0977c06db76b1a5ded827d9810f2e7966ba3e5068fe54ab89915a4c56e499181ddc190b6d7
|
@@ -1,8 +1,11 @@
|
|
1
|
-
cmake_minimum_required(VERSION
|
1
|
+
cmake_minimum_required(VERSION 2.8.12)
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
IF(CMAKE_VERSION VERSION_LESS "3.0")
|
4
|
+
project(libxml2)
|
5
|
+
ELSE()
|
6
|
+
cmake_policy(SET CMP0048 NEW)
|
7
|
+
project(libxml2 VERSION "2.11.0")
|
8
|
+
ENDIF()
|
6
9
|
|
7
10
|
include(CheckFunctionExists)
|
8
11
|
include(CheckIncludeFiles)
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -1,21 +1,60 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
require 'timeout'
|
3
3
|
|
4
|
+
if ENV["CC"]
|
5
|
+
RbConfig::CONFIG["CC"] = RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"]
|
6
|
+
end
|
7
|
+
|
8
|
+
# From: https://stackoverflow.com/questions/2108727
|
9
|
+
# Cross-platform way of finding an executable in the $PATH.
|
10
|
+
#
|
11
|
+
# which('ruby') #=> /usr/bin/ruby
|
12
|
+
def which(cmd)
|
13
|
+
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
|
14
|
+
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
|
15
|
+
exts.each { |ext|
|
16
|
+
exe = File.join(path, "#{cmd}#{ext}")
|
17
|
+
return exe if File.executable? exe
|
18
|
+
}
|
19
|
+
end
|
20
|
+
return nil
|
21
|
+
end
|
22
|
+
|
4
23
|
cmake_flags = [ ENV["CMAKE_FLAGS"] ]
|
5
|
-
cmake_flags << "-
|
6
|
-
|
7
|
-
cmake_flags << "-
|
24
|
+
cmake_flags << "-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY"
|
25
|
+
# Set system name explicitly when cross-compiling
|
26
|
+
cmake_flags << "-DCMAKE_SYSTEM_NAME=Windows -DWIN32=1" if Gem.win_platform?
|
27
|
+
# On Windows, Ruby-DevKit is MSYS-based, so ensure to use MSYS Makefiles.
|
28
|
+
cmake_flags << "-G \"MSYS Makefiles\"" if Gem.win_platform? && !ENV['NOKOLEXBOR_CROSS_COMPILE']
|
29
|
+
|
30
|
+
if ENV['NOKOLEXBOR_CROSS_COMPILE']
|
31
|
+
# use the same toolchain for cross-compiling lexbor
|
32
|
+
['CC', 'CXX'].each do |env|
|
33
|
+
if RbConfig::CONFIG[env]
|
34
|
+
ENV[env] = RbConfig::CONFIG[env]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
{'RANLIB' => 'RANLIB', 'AR' => 'AR', 'LD' => 'LINKER'}.each do |env, cmake_opt|
|
38
|
+
if RbConfig::CONFIG[env]
|
39
|
+
cmake_flags << "-DCMAKE_#{cmake_opt}=#{which(RbConfig::CONFIG[env])}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
lexbor_cmake_flags = cmake_flags + ["-DLEXBOR_BUILD_TESTS_CPP=OFF"]
|
45
|
+
lexbor_cmake_flags << "-DLEXBOR_BUILD_SHARED=OFF"
|
46
|
+
lexbor_cmake_flags << "-DLEXBOR_BUILD_STATIC=ON"
|
8
47
|
|
9
48
|
if ENV['NOKOLEXBOR_DEBUG'] || ENV['NOKOLEXBOR_ASAN']
|
10
49
|
CONFIG["optflags"] = "-O0"
|
11
50
|
CONFIG["debugflags"] = "-ggdb3"
|
12
|
-
|
51
|
+
lexbor_cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
|
13
52
|
end
|
14
53
|
|
15
54
|
if ENV['NOKOLEXBOR_ASAN']
|
16
55
|
$LDFLAGS << " -fsanitize=address"
|
17
56
|
$CFLAGS << " -fsanitize=address -DNOKOLEXBOR_ASAN"
|
18
|
-
|
57
|
+
lexbor_cmake_flags << "-DLEXBOR_BUILD_WITH_ASAN=ON"
|
19
58
|
end
|
20
59
|
|
21
60
|
append_cflags("-DLEXBOR_STATIC")
|
@@ -35,7 +74,7 @@ end
|
|
35
74
|
|
36
75
|
def self.run_cmake(timeout, args)
|
37
76
|
# Set to process group so we can kill it and its children
|
38
|
-
pgroup = Gem.win_platform? ? :new_pgroup : :pgroup
|
77
|
+
pgroup = (Gem.win_platform? && !ENV['NOKOLEXBOR_CROSS_COMPILE']) ? :new_pgroup : :pgroup
|
39
78
|
pid = Process.spawn("cmake #{args}", pgroup => true)
|
40
79
|
|
41
80
|
Timeout.timeout(timeout) do
|
@@ -50,21 +89,6 @@ rescue Timeout::Error
|
|
50
89
|
raise CMakeTimeout.new("cmake has exceeded its timeout of #{timeout}s")
|
51
90
|
end
|
52
91
|
|
53
|
-
# From: https://stackoverflow.com/questions/2108727
|
54
|
-
# Cross-platform way of finding an executable in the $PATH.
|
55
|
-
#
|
56
|
-
# which('ruby') #=> /usr/bin/ruby
|
57
|
-
def which(cmd)
|
58
|
-
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
|
59
|
-
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
|
60
|
-
exts.each { |ext|
|
61
|
-
exe = File.join(path, "#{cmd}#{ext}")
|
62
|
-
return exe if File.executable? exe
|
63
|
-
}
|
64
|
-
end
|
65
|
-
return nil
|
66
|
-
end
|
67
|
-
|
68
92
|
# From: https://github.com/flavorjones/mini_portile/blob/main/lib/mini_portile2/mini_portile.rb#L94
|
69
93
|
def apply_patch(patch_file, chdir)
|
70
94
|
case
|
@@ -109,9 +133,7 @@ Dir.chdir(LEXBOR_DIR) do
|
|
109
133
|
Dir.mkdir("build") if !Dir.exist?("build")
|
110
134
|
|
111
135
|
Dir.chdir("build") do
|
112
|
-
|
113
|
-
generator = "-G \"MSYS Makefiles\"" if Gem.win_platform?
|
114
|
-
run_cmake(10 * 60, ".. -DCMAKE_INSTALL_PREFIX:PATH=#{INSTALL_DIR} #{cmake_flags.join(' ')} #{generator}")
|
136
|
+
run_cmake(10 * 60, ".. -DCMAKE_INSTALL_PREFIX:PATH=#{INSTALL_DIR} #{lexbor_cmake_flags.join(' ')}")
|
115
137
|
sys("#{MAKE} install")
|
116
138
|
end
|
117
139
|
end
|
@@ -121,7 +143,7 @@ Dir.chdir(EXT_DIR) do
|
|
121
143
|
Dir.mkdir("build") if !Dir.exist?("build")
|
122
144
|
|
123
145
|
Dir.chdir("build") do
|
124
|
-
run_cmake(10 * 60, "
|
146
|
+
run_cmake(10 * 60, ".. #{cmake_flags.join(' ')} #{Gem.win_platform? ? "-DLIBXML2_WITH_THREADS=OFF" : ""}")
|
125
147
|
end
|
126
148
|
end
|
127
149
|
|
data/ext/nokolexbor/memory.c
CHANGED
@@ -10,8 +10,8 @@
|
|
10
10
|
* of more frequent GC.
|
11
11
|
*/
|
12
12
|
|
13
|
-
#include <ruby.h>
|
14
13
|
#include "lexbor/core/base.h"
|
14
|
+
#include <ruby.h>
|
15
15
|
|
16
16
|
// Disable using ruby memory functions when ASAN is enabled,
|
17
17
|
// otherwise memory leak info will be all about ruby which
|
@@ -21,26 +21,26 @@
|
|
21
21
|
void *
|
22
22
|
lexbor_malloc(size_t size)
|
23
23
|
{
|
24
|
-
|
24
|
+
return ruby_xmalloc(size);
|
25
25
|
}
|
26
26
|
|
27
27
|
void *
|
28
28
|
lexbor_realloc(void *dst, size_t size)
|
29
29
|
{
|
30
|
-
|
30
|
+
return ruby_xrealloc(dst, size);
|
31
31
|
}
|
32
32
|
|
33
33
|
void *
|
34
34
|
lexbor_calloc(size_t num, size_t size)
|
35
35
|
{
|
36
|
-
|
36
|
+
return ruby_xcalloc(num, size);
|
37
37
|
}
|
38
38
|
|
39
39
|
void *
|
40
40
|
lexbor_free(void *dst)
|
41
41
|
{
|
42
|
-
|
43
|
-
|
42
|
+
ruby_xfree(dst);
|
43
|
+
return NULL;
|
44
44
|
}
|
45
45
|
|
46
46
|
#endif
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborCData;
|
4
|
+
extern VALUE cNokolexborText;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_cdata_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_content;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_content, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_content = StringValuePtr(rb_content);
|
24
|
+
size_t content_len = RSTRING_LEN(rb_content);
|
25
|
+
lxb_dom_cdata_section_t *element = lxb_dom_document_create_cdata_section(document, (const lxb_char_t *)c_content, content_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating text node");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->text.char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_cdata(void)
|
40
|
+
{
|
41
|
+
cNokolexborCData = rb_define_class_under(mNokolexbor, "CDATA", cNokolexborText);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborCData, "new", nl_cdata_new, -1);
|
44
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborComment;
|
4
|
+
extern VALUE cNokolexborCharacterData;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_comment_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_content;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_content, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_content = StringValuePtr(rb_content);
|
24
|
+
size_t content_len = RSTRING_LEN(rb_content);
|
25
|
+
lxb_dom_comment_t *element = lxb_dom_document_create_comment(document, (const lxb_char_t *)c_content, content_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating comment");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_comment(void)
|
40
|
+
{
|
41
|
+
cNokolexborComment = rb_define_class_under(mNokolexbor, "Comment", cNokolexborCharacterData);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborComment, "new", nl_comment_new, -1);
|
44
|
+
}
|
@@ -26,12 +26,9 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
26
26
|
{
|
27
27
|
VALUE id_read = rb_intern("read");
|
28
28
|
VALUE rb_html;
|
29
|
-
if (rb_respond_to(rb_string_or_io, id_read))
|
30
|
-
{
|
29
|
+
if (rb_respond_to(rb_string_or_io, id_read)) {
|
31
30
|
rb_html = rb_funcall(rb_string_or_io, id_read, 0);
|
32
|
-
}
|
33
|
-
else
|
34
|
-
{
|
31
|
+
} else {
|
35
32
|
rb_html = rb_string_or_io;
|
36
33
|
}
|
37
34
|
const char *html_c = StringValuePtr(rb_html);
|
@@ -40,14 +37,12 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
40
37
|
lxb_html_document_t *document;
|
41
38
|
|
42
39
|
document = lxb_html_document_create();
|
43
|
-
if (document == NULL)
|
44
|
-
{
|
40
|
+
if (document == NULL) {
|
45
41
|
rb_raise(rb_eRuntimeError, "Error creating document");
|
46
42
|
}
|
47
43
|
|
48
44
|
lxb_status_t status = lxb_html_document_parse(document, (const lxb_char_t *)html_c, html_len);
|
49
|
-
if (status != LXB_STATUS_OK)
|
50
|
-
{
|
45
|
+
if (status != LXB_STATUS_OK) {
|
51
46
|
nl_raise_lexbor_error(status);
|
52
47
|
}
|
53
48
|
|
@@ -68,9 +63,28 @@ nl_rb_document_unwrap(VALUE rb_doc)
|
|
68
63
|
return doc;
|
69
64
|
}
|
70
65
|
|
66
|
+
VALUE
|
67
|
+
nl_document_get_title(VALUE rb_doc)
|
68
|
+
{
|
69
|
+
size_t len;
|
70
|
+
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(rb_doc), &len);
|
71
|
+
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
|
72
|
+
}
|
73
|
+
|
74
|
+
VALUE
|
75
|
+
nl_document_set_title(VALUE rb_doc, VALUE rb_title)
|
76
|
+
{
|
77
|
+
const char *c_title = StringValuePtr(rb_title);
|
78
|
+
size_t len = RSTRING_LEN(rb_title);
|
79
|
+
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(rb_doc), (const lxb_char_t *)c_title, len);
|
80
|
+
return Qnil;
|
81
|
+
}
|
82
|
+
|
71
83
|
void Init_nl_document(void)
|
72
84
|
{
|
73
85
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
74
86
|
rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
|
75
87
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
88
|
+
rb_define_method(cNokolexborDocument, "title", nl_document_get_title, 0);
|
89
|
+
rb_define_method(cNokolexborDocument, "title=", nl_document_set_title, 1);
|
76
90
|
}
|