nokolexbor 0.2.6 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/CMakeLists.txt +7 -4
- data/ext/nokolexbor/config.h.cmake.in +2 -0
- data/ext/nokolexbor/extconf.rb +47 -25
- data/ext/nokolexbor/memory.c +6 -6
- data/ext/nokolexbor/nl_cdata.c +44 -0
- data/ext/nokolexbor/nl_comment.c +44 -0
- data/ext/nokolexbor/nl_document.c +23 -9
- data/ext/nokolexbor/nl_node.c +186 -173
- data/ext/nokolexbor/nl_node_set.c +35 -70
- data/ext/nokolexbor/nl_text.c +44 -0
- data/ext/nokolexbor/nl_xpath_context.c +17 -26
- data/ext/nokolexbor/nokolexbor.c +7 -3
- data/ext/nokolexbor/nokolexbor.h +9 -7
- data/lib/nokolexbor/document.rb +92 -1
- data/lib/nokolexbor/node.rb +64 -0
- data/lib/nokolexbor/node_set.rb +6 -5
- data/lib/nokolexbor/version.rb +1 -1
- data/lib/nokolexbor.rb +21 -1
- data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6bfa3062e8718581cd8aabef75f0084a6626bd90301c9db4957ee975215edec1
|
4
|
+
data.tar.gz: 243f5c217c85750f63bfc0dcb8af575c019d3f2fd920f41769c0affc8a219802
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e66b390ef696ed5cafa480065e8ac8fc66ce98d86a7cb15f2f59d8acb7df21381942e5fe6f0244e3b9c7bf54787131c5241a49faeaba328eb49e65af0ac3216c
|
7
|
+
data.tar.gz: d266e2a7c49a202aec4112667807158b54878cc014b42fc32492fc0977c06db76b1a5ded827d9810f2e7966ba3e5068fe54ab89915a4c56e499181ddc190b6d7
|
@@ -1,8 +1,11 @@
|
|
1
|
-
cmake_minimum_required(VERSION
|
1
|
+
cmake_minimum_required(VERSION 2.8.12)
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
IF(CMAKE_VERSION VERSION_LESS "3.0")
|
4
|
+
project(libxml2)
|
5
|
+
ELSE()
|
6
|
+
cmake_policy(SET CMP0048 NEW)
|
7
|
+
project(libxml2 VERSION "2.11.0")
|
8
|
+
ENDIF()
|
6
9
|
|
7
10
|
include(CheckFunctionExists)
|
8
11
|
include(CheckIncludeFiles)
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -1,21 +1,60 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
require 'timeout'
|
3
3
|
|
4
|
+
if ENV["CC"]
|
5
|
+
RbConfig::CONFIG["CC"] = RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"]
|
6
|
+
end
|
7
|
+
|
8
|
+
# From: https://stackoverflow.com/questions/2108727
|
9
|
+
# Cross-platform way of finding an executable in the $PATH.
|
10
|
+
#
|
11
|
+
# which('ruby') #=> /usr/bin/ruby
|
12
|
+
def which(cmd)
|
13
|
+
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
|
14
|
+
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
|
15
|
+
exts.each { |ext|
|
16
|
+
exe = File.join(path, "#{cmd}#{ext}")
|
17
|
+
return exe if File.executable? exe
|
18
|
+
}
|
19
|
+
end
|
20
|
+
return nil
|
21
|
+
end
|
22
|
+
|
4
23
|
cmake_flags = [ ENV["CMAKE_FLAGS"] ]
|
5
|
-
cmake_flags << "-
|
6
|
-
|
7
|
-
cmake_flags << "-
|
24
|
+
cmake_flags << "-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY"
|
25
|
+
# Set system name explicitly when cross-compiling
|
26
|
+
cmake_flags << "-DCMAKE_SYSTEM_NAME=Windows -DWIN32=1" if Gem.win_platform?
|
27
|
+
# On Windows, Ruby-DevKit is MSYS-based, so ensure to use MSYS Makefiles.
|
28
|
+
cmake_flags << "-G \"MSYS Makefiles\"" if Gem.win_platform? && !ENV['NOKOLEXBOR_CROSS_COMPILE']
|
29
|
+
|
30
|
+
if ENV['NOKOLEXBOR_CROSS_COMPILE']
|
31
|
+
# use the same toolchain for cross-compiling lexbor
|
32
|
+
['CC', 'CXX'].each do |env|
|
33
|
+
if RbConfig::CONFIG[env]
|
34
|
+
ENV[env] = RbConfig::CONFIG[env]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
{'RANLIB' => 'RANLIB', 'AR' => 'AR', 'LD' => 'LINKER'}.each do |env, cmake_opt|
|
38
|
+
if RbConfig::CONFIG[env]
|
39
|
+
cmake_flags << "-DCMAKE_#{cmake_opt}=#{which(RbConfig::CONFIG[env])}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
lexbor_cmake_flags = cmake_flags + ["-DLEXBOR_BUILD_TESTS_CPP=OFF"]
|
45
|
+
lexbor_cmake_flags << "-DLEXBOR_BUILD_SHARED=OFF"
|
46
|
+
lexbor_cmake_flags << "-DLEXBOR_BUILD_STATIC=ON"
|
8
47
|
|
9
48
|
if ENV['NOKOLEXBOR_DEBUG'] || ENV['NOKOLEXBOR_ASAN']
|
10
49
|
CONFIG["optflags"] = "-O0"
|
11
50
|
CONFIG["debugflags"] = "-ggdb3"
|
12
|
-
|
51
|
+
lexbor_cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
|
13
52
|
end
|
14
53
|
|
15
54
|
if ENV['NOKOLEXBOR_ASAN']
|
16
55
|
$LDFLAGS << " -fsanitize=address"
|
17
56
|
$CFLAGS << " -fsanitize=address -DNOKOLEXBOR_ASAN"
|
18
|
-
|
57
|
+
lexbor_cmake_flags << "-DLEXBOR_BUILD_WITH_ASAN=ON"
|
19
58
|
end
|
20
59
|
|
21
60
|
append_cflags("-DLEXBOR_STATIC")
|
@@ -35,7 +74,7 @@ end
|
|
35
74
|
|
36
75
|
def self.run_cmake(timeout, args)
|
37
76
|
# Set to process group so we can kill it and its children
|
38
|
-
pgroup = Gem.win_platform? ? :new_pgroup : :pgroup
|
77
|
+
pgroup = (Gem.win_platform? && !ENV['NOKOLEXBOR_CROSS_COMPILE']) ? :new_pgroup : :pgroup
|
39
78
|
pid = Process.spawn("cmake #{args}", pgroup => true)
|
40
79
|
|
41
80
|
Timeout.timeout(timeout) do
|
@@ -50,21 +89,6 @@ rescue Timeout::Error
|
|
50
89
|
raise CMakeTimeout.new("cmake has exceeded its timeout of #{timeout}s")
|
51
90
|
end
|
52
91
|
|
53
|
-
# From: https://stackoverflow.com/questions/2108727
|
54
|
-
# Cross-platform way of finding an executable in the $PATH.
|
55
|
-
#
|
56
|
-
# which('ruby') #=> /usr/bin/ruby
|
57
|
-
def which(cmd)
|
58
|
-
exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
|
59
|
-
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
|
60
|
-
exts.each { |ext|
|
61
|
-
exe = File.join(path, "#{cmd}#{ext}")
|
62
|
-
return exe if File.executable? exe
|
63
|
-
}
|
64
|
-
end
|
65
|
-
return nil
|
66
|
-
end
|
67
|
-
|
68
92
|
# From: https://github.com/flavorjones/mini_portile/blob/main/lib/mini_portile2/mini_portile.rb#L94
|
69
93
|
def apply_patch(patch_file, chdir)
|
70
94
|
case
|
@@ -109,9 +133,7 @@ Dir.chdir(LEXBOR_DIR) do
|
|
109
133
|
Dir.mkdir("build") if !Dir.exist?("build")
|
110
134
|
|
111
135
|
Dir.chdir("build") do
|
112
|
-
|
113
|
-
generator = "-G \"MSYS Makefiles\"" if Gem.win_platform?
|
114
|
-
run_cmake(10 * 60, ".. -DCMAKE_INSTALL_PREFIX:PATH=#{INSTALL_DIR} #{cmake_flags.join(' ')} #{generator}")
|
136
|
+
run_cmake(10 * 60, ".. -DCMAKE_INSTALL_PREFIX:PATH=#{INSTALL_DIR} #{lexbor_cmake_flags.join(' ')}")
|
115
137
|
sys("#{MAKE} install")
|
116
138
|
end
|
117
139
|
end
|
@@ -121,7 +143,7 @@ Dir.chdir(EXT_DIR) do
|
|
121
143
|
Dir.mkdir("build") if !Dir.exist?("build")
|
122
144
|
|
123
145
|
Dir.chdir("build") do
|
124
|
-
run_cmake(10 * 60, "
|
146
|
+
run_cmake(10 * 60, ".. #{cmake_flags.join(' ')} #{Gem.win_platform? ? "-DLIBXML2_WITH_THREADS=OFF" : ""}")
|
125
147
|
end
|
126
148
|
end
|
127
149
|
|
data/ext/nokolexbor/memory.c
CHANGED
@@ -10,8 +10,8 @@
|
|
10
10
|
* of more frequent GC.
|
11
11
|
*/
|
12
12
|
|
13
|
-
#include <ruby.h>
|
14
13
|
#include "lexbor/core/base.h"
|
14
|
+
#include <ruby.h>
|
15
15
|
|
16
16
|
// Disable using ruby memory functions when ASAN is enabled,
|
17
17
|
// otherwise memory leak info will be all about ruby which
|
@@ -21,26 +21,26 @@
|
|
21
21
|
void *
|
22
22
|
lexbor_malloc(size_t size)
|
23
23
|
{
|
24
|
-
|
24
|
+
return ruby_xmalloc(size);
|
25
25
|
}
|
26
26
|
|
27
27
|
void *
|
28
28
|
lexbor_realloc(void *dst, size_t size)
|
29
29
|
{
|
30
|
-
|
30
|
+
return ruby_xrealloc(dst, size);
|
31
31
|
}
|
32
32
|
|
33
33
|
void *
|
34
34
|
lexbor_calloc(size_t num, size_t size)
|
35
35
|
{
|
36
|
-
|
36
|
+
return ruby_xcalloc(num, size);
|
37
37
|
}
|
38
38
|
|
39
39
|
void *
|
40
40
|
lexbor_free(void *dst)
|
41
41
|
{
|
42
|
-
|
43
|
-
|
42
|
+
ruby_xfree(dst);
|
43
|
+
return NULL;
|
44
44
|
}
|
45
45
|
|
46
46
|
#endif
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborCData;
|
4
|
+
extern VALUE cNokolexborText;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_cdata_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_content;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_content, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_content = StringValuePtr(rb_content);
|
24
|
+
size_t content_len = RSTRING_LEN(rb_content);
|
25
|
+
lxb_dom_cdata_section_t *element = lxb_dom_document_create_cdata_section(document, (const lxb_char_t *)c_content, content_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating text node");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->text.char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_cdata(void)
|
40
|
+
{
|
41
|
+
cNokolexborCData = rb_define_class_under(mNokolexbor, "CDATA", cNokolexborText);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborCData, "new", nl_cdata_new, -1);
|
44
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#include "nokolexbor.h"
|
2
|
+
|
3
|
+
VALUE cNokolexborComment;
|
4
|
+
extern VALUE cNokolexborCharacterData;
|
5
|
+
extern VALUE mNokolexbor;
|
6
|
+
|
7
|
+
static VALUE
|
8
|
+
nl_comment_new(int argc, VALUE *argv, VALUE klass)
|
9
|
+
{
|
10
|
+
lxb_dom_document_t *document;
|
11
|
+
VALUE rb_content;
|
12
|
+
VALUE rb_document;
|
13
|
+
VALUE rest;
|
14
|
+
|
15
|
+
rb_scan_args(argc, argv, "2*", &rb_content, &rb_document, &rest);
|
16
|
+
|
17
|
+
if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
|
18
|
+
rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
|
19
|
+
}
|
20
|
+
|
21
|
+
document = nl_rb_document_unwrap(rb_document);
|
22
|
+
|
23
|
+
const char* c_content = StringValuePtr(rb_content);
|
24
|
+
size_t content_len = RSTRING_LEN(rb_content);
|
25
|
+
lxb_dom_comment_t *element = lxb_dom_document_create_comment(document, (const lxb_char_t *)c_content, content_len);
|
26
|
+
if (element == NULL) {
|
27
|
+
rb_raise(rb_eRuntimeError, "Error creating comment");
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE rb_node = nl_rb_node_create(&element->char_data.node, rb_document);
|
31
|
+
|
32
|
+
if (rb_block_given_p()) {
|
33
|
+
rb_yield(rb_node);
|
34
|
+
}
|
35
|
+
|
36
|
+
return rb_node;
|
37
|
+
}
|
38
|
+
|
39
|
+
void Init_nl_comment(void)
|
40
|
+
{
|
41
|
+
cNokolexborComment = rb_define_class_under(mNokolexbor, "Comment", cNokolexborCharacterData);
|
42
|
+
|
43
|
+
rb_define_singleton_method(cNokolexborComment, "new", nl_comment_new, -1);
|
44
|
+
}
|
@@ -26,12 +26,9 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
26
26
|
{
|
27
27
|
VALUE id_read = rb_intern("read");
|
28
28
|
VALUE rb_html;
|
29
|
-
if (rb_respond_to(rb_string_or_io, id_read))
|
30
|
-
{
|
29
|
+
if (rb_respond_to(rb_string_or_io, id_read)) {
|
31
30
|
rb_html = rb_funcall(rb_string_or_io, id_read, 0);
|
32
|
-
}
|
33
|
-
else
|
34
|
-
{
|
31
|
+
} else {
|
35
32
|
rb_html = rb_string_or_io;
|
36
33
|
}
|
37
34
|
const char *html_c = StringValuePtr(rb_html);
|
@@ -40,14 +37,12 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
40
37
|
lxb_html_document_t *document;
|
41
38
|
|
42
39
|
document = lxb_html_document_create();
|
43
|
-
if (document == NULL)
|
44
|
-
{
|
40
|
+
if (document == NULL) {
|
45
41
|
rb_raise(rb_eRuntimeError, "Error creating document");
|
46
42
|
}
|
47
43
|
|
48
44
|
lxb_status_t status = lxb_html_document_parse(document, (const lxb_char_t *)html_c, html_len);
|
49
|
-
if (status != LXB_STATUS_OK)
|
50
|
-
{
|
45
|
+
if (status != LXB_STATUS_OK) {
|
51
46
|
nl_raise_lexbor_error(status);
|
52
47
|
}
|
53
48
|
|
@@ -68,9 +63,28 @@ nl_rb_document_unwrap(VALUE rb_doc)
|
|
68
63
|
return doc;
|
69
64
|
}
|
70
65
|
|
66
|
+
VALUE
|
67
|
+
nl_document_get_title(VALUE rb_doc)
|
68
|
+
{
|
69
|
+
size_t len;
|
70
|
+
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(rb_doc), &len);
|
71
|
+
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
|
72
|
+
}
|
73
|
+
|
74
|
+
VALUE
|
75
|
+
nl_document_set_title(VALUE rb_doc, VALUE rb_title)
|
76
|
+
{
|
77
|
+
const char *c_title = StringValuePtr(rb_title);
|
78
|
+
size_t len = RSTRING_LEN(rb_title);
|
79
|
+
lxb_char_t *str = lxb_html_document_title_set(nl_rb_document_unwrap(rb_doc), (const lxb_char_t *)c_title, len);
|
80
|
+
return Qnil;
|
81
|
+
}
|
82
|
+
|
71
83
|
void Init_nl_document(void)
|
72
84
|
{
|
73
85
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
74
86
|
rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
|
75
87
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
88
|
+
rb_define_method(cNokolexborDocument, "title", nl_document_get_title, 0);
|
89
|
+
rb_define_method(cNokolexborDocument, "title=", nl_document_set_title, 1);
|
76
90
|
}
|