nokolexbor 0.5.4 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +1 -9
- data/ext/nokolexbor/libxml/tree.h +1 -1
- data/ext/nokolexbor/nl_attribute.c +4 -4
- data/ext/nokolexbor/nl_document.c +10 -33
- data/ext/nokolexbor/nl_node.c +32 -20
- data/ext/nokolexbor/xml_tree.c +2 -2
- data/ext/nokolexbor/xml_xpath.c +3 -3
- data/lib/nokolexbor/node.rb +30 -10
- data/lib/nokolexbor/version.rb +1 -1
- data/patches/0004-lexbor-fix-template-clone.patch +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eddfccec75e5089344c81814159f243d5c46e1d747d7cb2351a8efcdc554e46e
|
4
|
+
data.tar.gz: 1112c8ee0d902e2ef382f582a39e306a5a908ca03bb5b5bc6837e361663552fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66ad9e8663b0f072a308b01e98314622fc2b920d0c3523c8fb8faa81555bf86cb45bd2c7dc230aeaf6333c6964c59d508eb97162ab88db4130eab07412297db4
|
7
|
+
data.tar.gz: 60255a5d4f8beefa0b51e37480138f6c52fa0fa4f414585409f3a71cfb00ee489a734ed93a6d34689c9601d70b1101af2ab831e4c6c207e069a441c66491d668
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -64,14 +64,6 @@ end
|
|
64
64
|
append_cflags("-DLEXBOR_STATIC")
|
65
65
|
append_cflags("-DLIBXML_STATIC")
|
66
66
|
|
67
|
-
def sys(cmd)
|
68
|
-
puts "-- #{cmd}"
|
69
|
-
unless ret = xsystem(cmd)
|
70
|
-
raise "ERROR: '#{cmd}' failed"
|
71
|
-
end
|
72
|
-
ret
|
73
|
-
end
|
74
|
-
|
75
67
|
# Thrown when we detect CMake is taking too long and we killed it
|
76
68
|
class CMakeTimeout < StandardError
|
77
69
|
end
|
@@ -138,7 +130,7 @@ Dir.chdir(LEXBOR_DIR) do
|
|
138
130
|
|
139
131
|
Dir.chdir("build") do
|
140
132
|
run_cmake(10 * 60, ".. -DCMAKE_INSTALL_PREFIX:PATH=#{INSTALL_DIR} #{lexbor_cmake_flags.join(' ')}")
|
141
|
-
|
133
|
+
system("#{MAKE}", "install")
|
142
134
|
end
|
143
135
|
end
|
144
136
|
|
@@ -23,7 +23,7 @@ extern "C" {
|
|
23
23
|
#endif
|
24
24
|
|
25
25
|
static size_t tmp_len;
|
26
|
-
#define NODE_NAME(node) lxb_dom_node_name_qualified((node), &tmp_len)
|
26
|
+
#define NODE_NAME(node) lxb_dom_node_name_qualified((lxb_dom_node_t *)(node), &tmp_len)
|
27
27
|
#define NODE_NS_HREF(node) ((node)->prefix ? lxb_ns_by_id((node)->owner_document->ns, (node)->ns, &tmp_len) : NULL)
|
28
28
|
#define NODE_NS_PREFIX(node) lxb_ns_by_id((node)->owner_document->prefix, (node)->prefix, &tmp_len)
|
29
29
|
|
@@ -141,7 +141,7 @@ nl_attribute_parent(VALUE self)
|
|
141
141
|
if (attr->owner == NULL) {
|
142
142
|
return Qnil;
|
143
143
|
}
|
144
|
-
return nl_rb_node_create(attr->owner, nl_rb_document_get(self));
|
144
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr->owner, nl_rb_document_get(self));
|
145
145
|
}
|
146
146
|
|
147
147
|
/**
|
@@ -158,7 +158,7 @@ nl_attribute_previous(VALUE self)
|
|
158
158
|
if (attr->prev == NULL) {
|
159
159
|
return Qnil;
|
160
160
|
}
|
161
|
-
return nl_rb_node_create(attr->prev, nl_rb_document_get(self));
|
161
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr->prev, nl_rb_document_get(self));
|
162
162
|
}
|
163
163
|
|
164
164
|
/**
|
@@ -175,7 +175,7 @@ nl_attribute_next(VALUE self)
|
|
175
175
|
if (attr->next == NULL) {
|
176
176
|
return Qnil;
|
177
177
|
}
|
178
|
-
return nl_rb_node_create(attr->next, nl_rb_document_get(self));
|
178
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr->next, nl_rb_document_get(self));
|
179
179
|
}
|
180
180
|
|
181
181
|
static VALUE
|
@@ -189,7 +189,7 @@ nl_attribute_inspect(VALUE self)
|
|
189
189
|
|
190
190
|
return rb_sprintf("#<%" PRIsVALUE " %s=\"%s\">", c,
|
191
191
|
lxb_dom_attr_qualified_name(attr, &len),
|
192
|
-
attr_value == NULL ? "" : attr_value);
|
192
|
+
attr_value == NULL ? "" : (char *)attr_value);
|
193
193
|
}
|
194
194
|
|
195
195
|
void Init_nl_attribute(void)
|
@@ -5,11 +5,6 @@ extern VALUE mNokolexbor;
|
|
5
5
|
extern VALUE cNokolexborNode;
|
6
6
|
VALUE cNokolexborDocument;
|
7
7
|
|
8
|
-
#ifdef HAVE_PTHREAD_H
|
9
|
-
#include <pthread.h>
|
10
|
-
pthread_key_t p_key_parser;
|
11
|
-
#endif
|
12
|
-
|
13
8
|
static void
|
14
9
|
free_nl_document(lxb_html_document_t *document)
|
15
10
|
{
|
@@ -50,24 +45,19 @@ nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
|
50
45
|
const char *html_c = StringValuePtr(rb_html);
|
51
46
|
size_t html_len = RSTRING_LEN(rb_html);
|
52
47
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
#endif
|
58
|
-
if (g_parser == NULL) {
|
59
|
-
g_parser = lxb_html_parser_create();
|
60
|
-
lxb_status_t status = lxb_html_parser_init(g_parser);
|
48
|
+
static lxb_html_parser_t *html_parser = NULL;
|
49
|
+
if (html_parser == NULL) {
|
50
|
+
html_parser = lxb_html_parser_create();
|
51
|
+
lxb_status_t status = lxb_html_parser_init(html_parser);
|
61
52
|
if (status != LXB_STATUS_OK) {
|
53
|
+
lxb_html_parser_destroy(html_parser);
|
54
|
+
html_parser = NULL;
|
62
55
|
nl_raise_lexbor_error(status);
|
63
56
|
}
|
64
|
-
|
65
|
-
#ifdef HAVE_PTHREAD_H
|
66
|
-
pthread_setspecific(p_key_parser, g_parser);
|
67
|
-
#endif
|
57
|
+
html_parser->tree->scripting = true;
|
68
58
|
}
|
69
59
|
|
70
|
-
lxb_html_document_t *document = lxb_html_parse(
|
60
|
+
lxb_html_document_t *document = lxb_html_parse(html_parser, (const lxb_char_t *)html_c, html_len);
|
71
61
|
|
72
62
|
if (document == NULL) {
|
73
63
|
rb_raise(rb_eRuntimeError, "Error parsing document");
|
@@ -104,7 +94,7 @@ static VALUE
|
|
104
94
|
nl_document_get_title(VALUE self)
|
105
95
|
{
|
106
96
|
size_t len;
|
107
|
-
lxb_char_t *str = lxb_html_document_title(nl_rb_document_unwrap(self), &len);
|
97
|
+
lxb_char_t *str = lxb_html_document_title((lxb_html_document_t *)nl_rb_document_unwrap(self), &len);
|
108
98
|
return str == NULL ? rb_str_new("", 0) : rb_utf8_str_new(str, len);
|
109
99
|
}
|
110
100
|
|
@@ -126,7 +116,7 @@ nl_document_set_title(VALUE self, VALUE rb_title)
|
|
126
116
|
{
|
127
117
|
const char *c_title = StringValuePtr(rb_title);
|
128
118
|
size_t len = RSTRING_LEN(rb_title);
|
129
|
-
lxb_html_document_title_set(nl_rb_document_unwrap(self), (const lxb_char_t *)c_title, len);
|
119
|
+
lxb_html_document_title_set((lxb_html_document_t *)nl_rb_document_unwrap(self), (const lxb_char_t *)c_title, len);
|
130
120
|
return rb_title;
|
131
121
|
}
|
132
122
|
|
@@ -142,21 +132,8 @@ nl_document_root(VALUE self)
|
|
142
132
|
return nl_rb_node_create(lxb_dom_document_root(doc), self);
|
143
133
|
}
|
144
134
|
|
145
|
-
static void
|
146
|
-
free_parser(void *data)
|
147
|
-
{
|
148
|
-
lxb_html_parser_t *g_parser = (lxb_html_parser_t *)data;
|
149
|
-
if (g_parser != NULL) {
|
150
|
-
g_parser = lxb_html_parser_destroy(g_parser);
|
151
|
-
}
|
152
|
-
}
|
153
|
-
|
154
135
|
void Init_nl_document(void)
|
155
136
|
{
|
156
|
-
#ifdef HAVE_PTHREAD_H
|
157
|
-
pthread_key_create(&p_key_parser, free_parser);
|
158
|
-
#endif
|
159
|
-
|
160
137
|
cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
|
161
138
|
rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
|
162
139
|
rb_define_singleton_method(cNokolexborDocument, "parse", nl_document_parse, 1);
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -159,7 +159,7 @@ nl_node_attribute(VALUE self, VALUE rb_name)
|
|
159
159
|
if (attr->owner == NULL) {
|
160
160
|
attr->owner = lxb_dom_interface_element(node);
|
161
161
|
}
|
162
|
-
return nl_rb_node_create(attr, nl_rb_document_get(self));
|
162
|
+
return nl_rb_node_create((lxb_dom_node_t *)attr, nl_rb_document_get(self));
|
163
163
|
}
|
164
164
|
|
165
165
|
/**
|
@@ -185,7 +185,7 @@ nl_node_attribute_nodes(VALUE self)
|
|
185
185
|
if (attr->owner == NULL) {
|
186
186
|
attr->owner = lxb_dom_interface_element(node);
|
187
187
|
}
|
188
|
-
rb_ary_push(ary, nl_rb_node_create(attr, rb_doc));
|
188
|
+
rb_ary_push(ary, nl_rb_node_create((lxb_dom_node_t *)attr, rb_doc));
|
189
189
|
attr = attr->next;
|
190
190
|
}
|
191
191
|
|
@@ -366,28 +366,32 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
366
366
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
367
367
|
|
368
368
|
lxb_status_t status;
|
369
|
-
lxb_css_parser_t *
|
370
|
-
lxb_selectors_t *selectors = NULL;
|
369
|
+
static lxb_css_parser_t *css_parser = NULL;
|
370
|
+
static lxb_selectors_t *selectors = NULL;
|
371
371
|
lxb_css_selector_list_t *list = NULL;
|
372
372
|
|
373
|
-
/*
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
373
|
+
/* CSS parser. */
|
374
|
+
if (css_parser == NULL) {
|
375
|
+
css_parser = lxb_css_parser_create();
|
376
|
+
status = lxb_css_parser_init(css_parser, NULL, NULL);
|
377
|
+
if (status != LXB_STATUS_OK) {
|
378
|
+
goto init_error;
|
379
|
+
}
|
378
380
|
}
|
379
381
|
|
380
382
|
/* Selectors. */
|
381
|
-
selectors
|
382
|
-
|
383
|
-
|
384
|
-
|
383
|
+
if (selectors == NULL) {
|
384
|
+
selectors = lxb_selectors_create();
|
385
|
+
status = lxb_selectors_init(selectors);
|
386
|
+
if (status != LXB_STATUS_OK) {
|
387
|
+
goto init_error;
|
388
|
+
}
|
385
389
|
}
|
386
390
|
|
387
391
|
/* Parse and get the log. */
|
388
|
-
list = lxb_css_selectors_parse_relative_list(
|
389
|
-
if (
|
390
|
-
status =
|
392
|
+
list = lxb_css_selectors_parse_relative_list(css_parser, (const lxb_char_t *)selector_c, selector_len);
|
393
|
+
if (css_parser->status != LXB_STATUS_OK) {
|
394
|
+
status = css_parser->status;
|
391
395
|
goto cleanup;
|
392
396
|
}
|
393
397
|
|
@@ -398,11 +402,19 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
398
402
|
}
|
399
403
|
|
400
404
|
cleanup:
|
405
|
+
/* Destroy all object for all CSS Selector List. */
|
406
|
+
lxb_css_selector_list_destroy_memory(list);
|
407
|
+
|
408
|
+
return status;
|
409
|
+
|
410
|
+
init_error:
|
401
411
|
/* Destroy Selectors object. */
|
402
|
-
|
412
|
+
lxb_selectors_destroy(selectors, true);
|
413
|
+
selectors = NULL;
|
403
414
|
|
404
415
|
/* Destroy resources for CSS Parser. */
|
405
|
-
|
416
|
+
lxb_css_parser_destroy(css_parser, true);
|
417
|
+
css_parser = NULL;
|
406
418
|
|
407
419
|
/* Destroy all object for all CSS Selector List. */
|
408
420
|
lxb_css_selector_list_destroy_memory(list);
|
@@ -1014,9 +1026,9 @@ static VALUE
|
|
1014
1026
|
nl_node_add_sibling(VALUE self, VALUE next_or_previous, VALUE new)
|
1015
1027
|
{
|
1016
1028
|
bool insert_after;
|
1017
|
-
if (
|
1029
|
+
if (rb_str_cmp(rb_String(next_or_previous), rb_str_new_literal("next")) == 0) {
|
1018
1030
|
insert_after = true;
|
1019
|
-
} else if (
|
1031
|
+
} else if (rb_str_cmp(rb_String(next_or_previous), rb_str_new_literal("previous")) == 0) {
|
1020
1032
|
insert_after = false;
|
1021
1033
|
} else {
|
1022
1034
|
rb_raise(rb_eArgError, "Unsupported inserting position");
|
data/ext/nokolexbor/xml_tree.c
CHANGED
@@ -339,8 +339,8 @@ nl_xmlGetNodePath(const lxb_dom_node_t *node)
|
|
339
339
|
|
340
340
|
} else if (cur->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
|
341
341
|
sep = "/@";
|
342
|
-
name = (const char *) lxb_dom_attr_qualified_name(cur, &tmp_len);
|
343
|
-
next = ((lxb_dom_attr_t_ptr)cur)->owner;
|
342
|
+
name = (const char *) lxb_dom_attr_qualified_name((lxb_dom_attr_t_ptr)cur, &tmp_len);
|
343
|
+
next = (lxb_dom_node_t *)((lxb_dom_attr_t_ptr)cur)->owner;
|
344
344
|
} else {
|
345
345
|
nl_xmlFree(buf);
|
346
346
|
nl_xmlFree(buffer);
|
data/ext/nokolexbor/xml_xpath.c
CHANGED
@@ -6389,7 +6389,7 @@ xmlXPathNodeValHash(lxb_dom_node_t_ptr node) {
|
|
6389
6389
|
return(0);
|
6390
6390
|
return(string[0] + (string[1] << 8));
|
6391
6391
|
case LXB_DOM_NODE_TYPE_ATTRIBUTE:
|
6392
|
-
string = lxb_dom_attr_value(node, &tmp_len);
|
6392
|
+
string = lxb_dom_attr_value((lxb_dom_attr_t_ptr)node, &tmp_len);
|
6393
6393
|
if (string == NULL)
|
6394
6394
|
return(0);
|
6395
6395
|
if (string[0] == 0)
|
@@ -8452,9 +8452,9 @@ nl_xmlXPathNextAttribute(xmlXPathParserContextPtr ctxt, lxb_dom_node_t_ptr cur)
|
|
8452
8452
|
if (cur == NULL) {
|
8453
8453
|
if (ctxt->context->node == (lxb_dom_node_t_ptr) ctxt->context->doc)
|
8454
8454
|
return(NULL);
|
8455
|
-
return lxb_dom_element_first_attribute(ctxt->context->node);
|
8455
|
+
return (lxb_dom_node_t_ptr)lxb_dom_element_first_attribute((lxb_dom_element_t *)ctxt->context->node);
|
8456
8456
|
}
|
8457
|
-
return(((lxb_dom_attr_t *)cur)->next);
|
8457
|
+
return (lxb_dom_node_t_ptr)(((lxb_dom_attr_t *)cur)->next);
|
8458
8458
|
}
|
8459
8459
|
|
8460
8460
|
/************************************************************************
|
data/lib/nokolexbor/node.rb
CHANGED
@@ -717,18 +717,38 @@ module Nokolexbor
|
|
717
717
|
|
718
718
|
def xpath_query_from_css_rule(rule, ns)
|
719
719
|
ensure_nokogiri
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
720
|
+
|
721
|
+
unless defined?(Gem)
|
722
|
+
require 'rubygems'
|
723
|
+
end
|
724
|
+
|
725
|
+
v_1_17_0 = Gem::Version.new("1.17.0")
|
726
|
+
current_version = Gem::Version.new(Nokogiri::VERSION)
|
727
|
+
|
728
|
+
if current_version < v_1_17_0
|
729
|
+
if defined? Nokogiri::CSS::XPathVisitor::BuiltinsConfig
|
730
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
731
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
732
|
+
doctype: :html4,
|
733
|
+
)
|
734
|
+
else
|
735
|
+
visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
|
736
|
+
end
|
737
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
738
|
+
Nokogiri::CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
739
|
+
visitor: visitor, })
|
740
|
+
end.join(" | ")
|
725
741
|
else
|
726
|
-
|
742
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
743
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
744
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
745
|
+
doctype: :html4,
|
746
|
+
prefix: implied_xpath_context,
|
747
|
+
namespaces: ns,
|
748
|
+
)
|
749
|
+
Nokogiri::CSS.xpath_for(rule.to_s, visitor: visitor)
|
750
|
+
end.join(" | ")
|
727
751
|
end
|
728
|
-
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
729
|
-
Nokogiri::CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
730
|
-
visitor: visitor, })
|
731
|
-
end.join(" | ")
|
732
752
|
end
|
733
753
|
|
734
754
|
def extract_params(params)
|
data/lib/nokolexbor/version.rb
CHANGED
@@ -16,7 +16,7 @@ index a2153f4..8a9c69f 100755
|
|
16
16
|
|
17
17
|
+ if (curr->local_name == LXB_TAG_TEMPLATE && curr->first_child != NULL && cnode->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
|
18
18
|
+ lxb_dom_node_remove(curr->first_child);
|
19
|
-
+ lxb_html_interface_template(curr)->content = cnode;
|
19
|
+
+ lxb_html_interface_template(curr)->content = (lxb_dom_document_fragment_t *)cnode;
|
20
20
|
+ }
|
21
21
|
+
|
22
22
|
lxb_dom_node_insert_child(curr, cnode);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|