nokolexbor 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +1 -9
- data/ext/nokolexbor/libxml/tree.h +1 -1
- data/ext/nokolexbor/nl_attribute.c +4 -4
- data/ext/nokolexbor/nl_document.c +11 -34
- data/ext/nokolexbor/nl_node.c +32 -20
- data/ext/nokolexbor/nl_node_set.c +1 -1
- data/ext/nokolexbor/xml_tree.c +2 -2
- data/ext/nokolexbor/xml_xpath.c +3 -3
- data/lib/nokolexbor/node.rb +30 -10
- data/lib/nokolexbor/version.rb +1 -1
- data/patches/0004-lexbor-fix-template-clone.patch +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eddfccec75e5089344c81814159f243d5c46e1d747d7cb2351a8efcdc554e46e
|
4
|
+
data.tar.gz: 1112c8ee0d902e2ef382f582a39e306a5a908ca03bb5b5bc6837e361663552fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66ad9e8663b0f072a308b01e98314622fc2b920d0c3523c8fb8faa81555bf86cb45bd2c7dc230aeaf6333c6964c59d508eb97162ab88db4130eab07412297db4
|
7
|
+
data.tar.gz: 60255a5d4f8beefa0b51e37480138f6c52fa0fa4f414585409f3a71cfb00ee489a734ed93a6d34689c9601d70b1101af2ab831e4c6c207e069a441c66491d668
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -64,14 +64,6 @@ end
|
|
64
64
|
append_cflags("-DLEXBOR_STATIC")
|
65
65
|
append_cflags("-DLIBXML_STATIC")
|
66
66
|
|
67
|
-
def sys(cmd)
|
68
|
-
puts "-- #{cmd}"
|
69
|
-
unless ret = xsystem(cmd)
|
70
|
-
raise "ERROR: '#{cmd}' failed"
|
71
|
-
end
|
72
|
-
ret
|
73
|
-
end
|
74
|
-
|
75
67
|
# Thrown when we detect CMake is taking too long and we killed it
|
76
68
|
class CMakeTimeout < StandardError
|
77
69
|
end
|
@@ -138,7 +130,7 @@ Dir.chdir(LEXBOR_DIR) do
|
|
138
130
|
|
139
131
|
Dir.chdir("build") do
|
140
132
|
run_cmake(10 * 60, ".. -DCMAKE_INSTALL_PREFIX:PATH=#{INSTALL_DIR} #{lexbor_cmake_flags.join(' ')}")
|
141
|
-
|
133
|
+
system("#{MAKE}", "install")
|
142
134
|
end
|
143
135
|
end
|
144
136
|
|
@@ -23,7 +23,7 @@ extern "C" {
|
|
23
23
|
#endif
|
24
24
|
|
25
25
|
static size_t tmp_len;
|
26
|
-
#define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
|
26
|
+
#define NODE_NAME(node) lxb_dom_node_name_qualified((lxb_dom_node_t *)(node), &tmp_len)
|
27
27
|
#define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
|
28
28
|
#define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
|
29
29
|
|
@@ -141,7 +141,7 @@ nl_attribute_parent(VALUE self)
|
|
141
141
|
if (attr->owner == NULL) {
|
142
142
|
return Qnil;
|
143
143
|
}
|
144
|
-
return nl_rb_node_create(attr->owner, nl_rb_document_get(self));
|
144
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr->owner, nl_rb_document_get(self));
|
145
145
|
}
|
146
146
|
|
147
147
|
/**
|
@@ -158,7 +158,7 @@ nl_attribute_previous(VALUE self)
|
|
158
158
|
if (attr->prev == NULL) {
|
159
159
|
return Qnil;
|
160
160
|
}
|
161
|
-
return nl_rb_node_create(attr->prev, nl_rb_document_get(self));
|
161
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr->prev, nl_rb_document_get(self));
|
162
162
|
}
|
163
163
|
|
164
164
|
/**
|
@@ -175,7 +175,7 @@ nl_attribute_next(VALUE self)
|
|
175
175
|
if (attr->next == NULL) {
|
176
176
|
return Qnil;
|
177
177
|
}
|
178
|
-
return nl_rb_node_create(attr->next, nl_rb_document_get(self));
|
178
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr->next, nl_rb_document_get(self));
|
179
179
|
}
|
180
180
|
|
181
181
|
static VALUE
|
@@ -189,7 +189,7 @@ nl_attribute_inspect(VALUE self)
|
|
189
189
|
|
190
190
|
return rb_sprintf("#<%" PRIsVALUE " %s=\"%s\">", c,
|
191
191
|
lxb_dom_attr_qualified_name(attr, &len),
|
192
|
-
attr_value == NULL ? "" : attr_value);
|
192
|
+
attr_value == NULL ? "" : (char *)attr_value);
|
193
193
|
}
|
194
194
|
|
195
195
|
void Init_nl_attribute(void)
|
@@ -5,11 +5,6 @@ extern VALUE mNokolexbor;
|
|
5
5
|
extern VALUE cNokolexborNode;
|
6
6
|
VALUE cNokolexborDocument;
|
7
7
|
|
8
|
-
#ifdef HAVE_PTHREAD_H
|
9
|
-
#include <pthread.h>
|
10
|
-
pthread_key_t p_key_parser;
|
11
|
-
#endif
|
12
|
-
|
13
8
|
static void
|
14
9
|
free_nl_document(lxb_html_document_t *document)
|
15
10
|
{
|
@@ -20,7 +15,7 @@ const rb_data_type_t nl_document_type = {
|
|
20
15
|
"Nokolexbor::Document",
|
21
16
|
{
|
22
17
|
0,
|
23
|
-
free_nl_document,
|
18
|
+
(RUBY_DATA_FUNC)free_nl_document,
|
24
19
|
},
|
25
20
|
0,
|
26
21
|
0,
|
@@ -50,24 +45,19 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
50
45
|
const char *html_c = StringValuePtr(rb_html);
|
51
46
|
size_t html_len = RSTRING_LEN(rb_html);
|
52
47
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
#endif
|
58
|
-
if (g_parser == NULL) {
|
59
|
-
g_parser = lxb_html_parser_create();
|
60
|
-
lxb_status_t status = lxb_html_parser_init(g_parser);
|
48
|
+
static lxb_html_parser_t *html_parser = NULL;
|
49
|
+
if (html_parser == NULL) {
|
50
|
+
html_parser = lxb_html_parser_create();
|
51
|
+
lxb_status_t status = lxb_html_parser_init(html_parser);
|
61
52
|
if (status != LXB_STATUS_OK) {
|
53
|
+
lxb_html_parser_destroy(html_parser);
|
54
|
+
html_parser = NULL;
|
62
55
|
nl_raise_lexbor_error(status);
|
63
56
|
}
|
64
|
-
|
65
|
-
#ifdef HAVE_PTHREAD_H
|
66
|
-
pthread_setspecific(p_key_parser, g_parser);
|
67
|
-
#endif
|
57
|
+
html_parser->tree->scripting = true;
|
68
58
|
}
|
69
59
|
|
70
|
-
lxb_html_document_t *document = lxb_html_parse(
|
60
|
+
lxb_html_document_t *document = lxb_html_parse(html_parser, (const lxb_char_t *)html_c, html_len);
|
71
61
|
|
72
62
|
if (document == NULL) {
|
73
63
|
rb_raise(rb_eRuntimeError, "Error parsing document");
|
@@ -104,7 +94,7 @@ static VALUE
|
|
104
94
|
nl_document_get_title(VALUE self)
|
105
95
|
{
|
106
96
|
size_t len;
|
107
|
-
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(self), &len);
|
97
|
+
lxb_char_t *str = lxb_html_document_title((lxb_html_document_t *)nl_rb_document_unwrap(self), &len);
|
108
98
|
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
|
109
99
|
}
|
110
100
|
|
@@ -126,7 +116,7 @@ nl_document_set_title(VALUE self, VALUE rb_title)
|
|
126
116
|
{
|
127
117
|
const char *c_title = StringValuePtr(rb_title);
|
128
118
|
size_t len = RSTRING_LEN(rb_title);
|
129
|
-
|
119
|
+
lxb_html_document_title_set((lxb_html_document_t *)nl_rb_document_unwrap(self), (const lxb_char_t *)c_title, len);
|
130
120
|
return rb_title;
|
131
121
|
}
|
132
122
|
|
@@ -142,21 +132,8 @@ nl_document_root(VALUE self)
|
|
142
132
|
return nl_rb_node_create(lxb_dom_document_root(doc), self);
|
143
133
|
}
|
144
134
|
|
145
|
-
static void
|
146
|
-
free_parser(void *data)
|
147
|
-
{
|
148
|
-
lxb_html_parser_t *g_parser = (lxb_html_parser_t *)data;
|
149
|
-
if (g_parser != NULL) {
|
150
|
-
g_parser = lxb_html_parser_destroy(g_parser);
|
151
|
-
}
|
152
|
-
}
|
153
|
-
|
154
135
|
void Init_nl_document(void)
|
155
136
|
{
|
156
|
-
#ifdef HAVE_PTHREAD_H
|
157
|
-
pthread_key_create(&p_key_parser, free_parser);
|
158
|
-
#endif
|
159
|
-
|
160
137
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
161
138
|
rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
|
162
139
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -159,7 +159,7 @@ nl_node_attribute(VALUE self, VALUE rb_name)
|
|
159
159
|
if (attr->owner == NULL) {
|
160
160
|
attr->owner = lxb_dom_interface_element(node);
|
161
161
|
}
|
162
|
-
return nl_rb_node_create(attr, nl_rb_document_get(self));
|
162
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr, nl_rb_document_get(self));
|
163
163
|
}
|
164
164
|
|
165
165
|
/**
|
@@ -185,7 +185,7 @@ nl_node_attribute_nodes(VALUE self)
|
|
185
185
|
if (attr->owner == NULL) {
|
186
186
|
attr->owner = lxb_dom_interface_element(node);
|
187
187
|
}
|
188
|
-
rb_ary_push(ary, nl_rb_node_create(attr, rb_doc));
|
188
|
+
rb_ary_push(ary, nl_rb_node_create((lxb_dom_node_t *)attr, rb_doc));
|
189
189
|
attr = attr->next;
|
190
190
|
}
|
191
191
|
|
@@ -366,28 +366,32 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
366
366
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
367
367
|
|
368
368
|
lxb_status_t status;
|
369
|
-
lxb_css_parser_t *
|
370
|
-
lxb_selectors_t *selectors = NULL;
|
369
|
+
static lxb_css_parser_t *css_parser = NULL;
|
370
|
+
static lxb_selectors_t *selectors = NULL;
|
371
371
|
lxb_css_selector_list_t *list = NULL;
|
372
372
|
|
373
|
-
/*
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
373
|
+
/* CSS parser. */
|
374
|
+
if (css_parser == NULL) {
|
375
|
+
css_parser = lxb_css_parser_create();
|
376
|
+
status = lxb_css_parser_init(css_parser, NULL, NULL);
|
377
|
+
if (status != LXB_STATUS_OK) {
|
378
|
+
goto init_error;
|
379
|
+
}
|
378
380
|
}
|
379
381
|
|
380
382
|
/* Selectors. */
|
381
|
-
selectors
|
382
|
-
|
383
|
-
|
384
|
-
|
383
|
+
if (selectors == NULL) {
|
384
|
+
selectors = lxb_selectors_create();
|
385
|
+
status = lxb_selectors_init(selectors);
|
386
|
+
if (status != LXB_STATUS_OK) {
|
387
|
+
goto init_error;
|
388
|
+
}
|
385
389
|
}
|
386
390
|
|
387
391
|
/* Parse and get the log. */
|
388
|
-
list = lxb_css_selectors_parse_relative_list(
|
389
|
-
if (
|
390
|
-
status =
|
392
|
+
list = lxb_css_selectors_parse_relative_list(css_parser, (const lxb_char_t *)selector_c, selector_len);
|
393
|
+
if (css_parser->status != LXB_STATUS_OK) {
|
394
|
+
status = css_parser->status;
|
391
395
|
goto cleanup;
|
392
396
|
}
|
393
397
|
|
@@ -398,11 +402,19 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
398
402
|
}
|
399
403
|
|
400
404
|
cleanup:
|
405
|
+
/* Destroy all object for all CSS Selector List. */
|
406
|
+
lxb_css_selector_list_destroy_memory(list);
|
407
|
+
|
408
|
+
return status;
|
409
|
+
|
410
|
+
init_error:
|
401
411
|
/* Destroy Selectors object. */
|
402
|
-
|
412
|
+
lxb_selectors_destroy(selectors, true);
|
413
|
+
selectors = NULL;
|
403
414
|
|
404
415
|
/* Destroy resources for CSS Parser. */
|
405
|
-
|
416
|
+
lxb_css_parser_destroy(css_parser, true);
|
417
|
+
css_parser = NULL;
|
406
418
|
|
407
419
|
/* Destroy all object for all CSS Selector List. */
|
408
420
|
lxb_css_selector_list_destroy_memory(list);
|
@@ -1014,9 +1026,9 @@ static VALUE
|
|
1014
1026
|
nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
1015
1027
|
{
|
1016
1028
|
bool insert_after;
|
1017
|
-
if (
|
1029
|
+
if (rb_str_cmp(rb_String(next_or_previous), rb_str_new_literal("next")) == 0) {
|
1018
1030
|
insert_after = true;
|
1019
|
-
} else if (
|
1031
|
+
} else if (rb_str_cmp(rb_String(next_or_previous), rb_str_new_literal("previous")) == 0) {
|
1020
1032
|
insert_after = false;
|
1021
1033
|
} else {
|
1022
1034
|
rb_raise(rb_eArgError, "Unsupported inserting position");
|
data/ext/nokolexbor/xml_tree.c
CHANGED
@@ -339,8 +339,8 @@ nl_xmlGetNodePath(const lxb_dom_node_t *node)
|
|
339
339
|
|
340
340
|
} else if (cur->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
341
341
|
sep = "/@";
|
342
|
-
name = (const char *) lxb_dom_attr_qualified_name(cur, &tmp_len);
|
343
|
-
next = ((lxb_dom_attr_t_ptr)cur)->owner;
|
342
|
+
name = (const char *) lxb_dom_attr_qualified_name((lxb_dom_attr_t_ptr)cur, &tmp_len);
|
343
|
+
next = (lxb_dom_node_t *)((lxb_dom_attr_t_ptr)cur)->owner;
|
344
344
|
} else {
|
345
345
|
nl_xmlFree(buf);
|
346
346
|
nl_xmlFree(buffer);
|
data/ext/nokolexbor/xml_xpath.c
CHANGED
@@ -6389,7 +6389,7 @@ xmlXPathNodeValHash(lxb_dom_node_t_ptr node) {
|
|
6389
6389
|
return(0);
|
6390
6390
|
return(string[0] + (string[1] << 8));
|
6391
6391
|
case LXB_DOM_NODE_TYPE_ATTRIBUTE:
|
6392
|
-
string = lxb_dom_attr_value(node, &tmp_len);
|
6392
|
+
string = lxb_dom_attr_value((lxb_dom_attr_t_ptr)node, &tmp_len);
|
6393
6393
|
if (string == NULL)
|
6394
6394
|
return(0);
|
6395
6395
|
if (string[0] == 0)
|
@@ -8452,9 +8452,9 @@ nl_xmlXPathNextAttribute(xmlXPathParserContextPtr ctxt, lxb_dom_node_t_ptr cur)
|
|
8452
8452
|
if (cur == NULL) {
|
8453
8453
|
if (ctxt->context->node == (lxb_dom_node_t_ptr) ctxt->context->doc)
|
8454
8454
|
return(NULL);
|
8455
|
-
return lxb_dom_element_first_attribute(ctxt->context->node);
|
8455
|
+
return (lxb_dom_node_t_ptr)lxb_dom_element_first_attribute((lxb_dom_element_t *)ctxt->context->node);
|
8456
8456
|
}
|
8457
|
-
return(((lxb_dom_attr_t *)cur)->next);
|
8457
|
+
return (lxb_dom_node_t_ptr)(((lxb_dom_attr_t *)cur)->next);
|
8458
8458
|
}
|
8459
8459
|
|
8460
8460
|
/************************************************************************
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -717,18 +717,38 @@ module Nokolexbor
|
|
717
717
|
|
718
718
|
def xpath_query_from_css_rule(rule, ns)
|
719
719
|
ensure_nokogiri
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
720
|
+
|
721
|
+
unless defined?(Gem)
|
722
|
+
require 'rubygems'
|
723
|
+
end
|
724
|
+
|
725
|
+
v_1_17_0 = Gem::Version.new("1.17.0")
|
726
|
+
current_version = Gem::Version.new(Nokogiri::VERSION)
|
727
|
+
|
728
|
+
if current_version < v_1_17_0
|
729
|
+
if defined? Nokogiri::CSS::XPathVisitor::BuiltinsConfig
|
730
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
731
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
732
|
+
doctype: :html4,
|
733
|
+
)
|
734
|
+
else
|
735
|
+
visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
|
736
|
+
end
|
737
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
738
|
+
Nokogiri::CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
739
|
+
visitor: visitor, })
|
740
|
+
end.join(" | ")
|
725
741
|
else
|
726
|
-
|
742
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
743
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
744
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
745
|
+
doctype: :html4,
|
746
|
+
prefix: implied_xpath_context,
|
747
|
+
namespaces: ns,
|
748
|
+
)
|
749
|
+
Nokogiri::CSS.xpath_for(rule.to_s, visitor: visitor)
|
750
|
+
end.join(" | ")
|
727
751
|
end
|
728
|
-
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
729
|
-
Nokogiri::CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
730
|
-
visitor: visitor, })
|
731
|
-
end.join(" | ")
|
732
752
|
end
|
733
753
|
|
734
754
|
def extract_params(params)
|
data/lib/nokolexbor/version.rb
CHANGED
@@ -16,7 +16,7 @@ index a2153f4..8a9c69f 100755
|
|
16
16
|
|
17
17
|
+ if (curr->local_name == LXB_TAG_TEMPLATE && curr->first_child != NULL && cnode->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
|
18
18
|
+ lxb_dom_node_remove(curr->first_child);
|
19
|
-
+ lxb_html_interface_template(curr)->content = cnode;
|
19
|
+
+ lxb_html_interface_template(curr)->content = (lxb_dom_document_fragment_t *)cnode;
|
20
20
|
+ }
|
21
21
|
+
|
22
22
|
lxb_dom_node_insert_child(curr, cnode);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|