nokolexbor 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +12 -6
- data/ext/nokolexbor/memory.c +7 -0
- data/ext/nokolexbor/nl_document.c +11 -1
- data/ext/nokolexbor/nl_node.c +32 -11
- data/ext/nokolexbor/nl_node_set.c +20 -6
- data/ext/nokolexbor/nl_xpath_context.c +6 -1
- data/lib/nokolexbor/version.rb +1 -1
- data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4f935c5e81aed7b5d964e7332a9abeaa1f44dec7e2532f01c89dfd17df14ded
|
4
|
+
data.tar.gz: 91c58aca5a9b16a3cfa7e2436dfb86feeb777daa3615b8b3a95b234981cc67d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af6ae7b07bde270ea275a5b21221c7e90923f195a6a62ed7b85174f1d9f31f017661dc50c7d8dd1c3d39096a5c1ecbb3469f4496f2e628a3b7026f82ca3994c5
|
7
|
+
data.tar.gz: eea3a38dcbd81cdb7c54996248d6eaa2d2c51b964d79d3131ba04594dd449a141b32b225bc7a879844c92418f3c04e6b63c77c9c6bef86f8ee619fa6a47b03a3
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -1,16 +1,22 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
require 'timeout'
|
3
3
|
|
4
|
-
# For debugging
|
5
|
-
# CONFIG["optflags"] = "-O0"
|
6
|
-
# CONFIG["debugflags"] = "-ggdb3"
|
7
|
-
|
8
4
|
cmake_flags = [ ENV["CMAKE_FLAGS"] ]
|
9
5
|
cmake_flags << "-DLEXBOR_BUILD_TESTS_CPP=OFF"
|
10
6
|
cmake_flags << "-DLEXBOR_BUILD_SHARED=OFF"
|
11
7
|
cmake_flags << "-DLEXBOR_BUILD_STATIC=ON"
|
12
|
-
|
13
|
-
|
8
|
+
|
9
|
+
if ENV['NOKOLEXBOR_DEBUG'] || ENV['NOKOLEXBOR_ASAN']
|
10
|
+
CONFIG["optflags"] = "-O0"
|
11
|
+
CONFIG["debugflags"] = "-ggdb3"
|
12
|
+
cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
|
13
|
+
end
|
14
|
+
|
15
|
+
if ENV['NOKOLEXBOR_ASAN']
|
16
|
+
$LDFLAGS << " -fsanitize=address"
|
17
|
+
$CFLAGS << " -fsanitize=address -DNOKOLEXBOR_ASAN"
|
18
|
+
cmake_flags << "-DLEXBOR_BUILD_WITH_ASAN=ON"
|
19
|
+
end
|
14
20
|
|
15
21
|
append_cflags("-DLEXBOR_STATIC")
|
16
22
|
append_cflags("-DLIBXML_STATIC")
|
data/ext/nokolexbor/memory.c
CHANGED
@@ -13,6 +13,11 @@
|
|
13
13
|
#include <ruby.h>
|
14
14
|
#include "lexbor/core/base.h"
|
15
15
|
|
16
|
+
// Disable using ruby memory functions when ASAN is enabled,
|
17
|
+
// otherwise memory leak info will be all about ruby which
|
18
|
+
// is useless.
|
19
|
+
#ifndef NOKOLEXBOR_ASAN
|
20
|
+
|
16
21
|
void *
|
17
22
|
lexbor_malloc(size_t size)
|
18
23
|
{
|
@@ -37,3 +42,5 @@ lexbor_free(void *dst)
|
|
37
42
|
ruby_xfree(dst);
|
38
43
|
return NULL;
|
39
44
|
}
|
45
|
+
|
46
|
+
#endif
|
@@ -22,8 +22,18 @@ const rb_data_type_t nl_document_type = {
|
|
22
22
|
};
|
23
23
|
|
24
24
|
static VALUE
|
25
|
-
nl_document_parse(VALUE self, VALUE
|
25
|
+
nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
26
26
|
{
|
27
|
+
VALUE id_read = rb_intern("read");
|
28
|
+
VALUE rb_html;
|
29
|
+
if (rb_respond_to(rb_string_or_io, id_read))
|
30
|
+
{
|
31
|
+
rb_html = rb_funcall(rb_string_or_io, id_read, 0);
|
32
|
+
}
|
33
|
+
else
|
34
|
+
{
|
35
|
+
rb_html = rb_string_or_io;
|
36
|
+
}
|
27
37
|
const char *html_c = StringValuePtr(rb_html);
|
28
38
|
size_t html_len = RSTRING_LEN(rb_html);
|
29
39
|
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -199,7 +199,7 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
|
|
199
199
|
return LXB_STATUS_OK;
|
200
200
|
}
|
201
201
|
|
202
|
-
|
202
|
+
lxb_status_t
|
203
203
|
nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
204
204
|
{
|
205
205
|
const char *selector_c = StringValuePtr(selector);
|
@@ -207,37 +207,44 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
207
207
|
|
208
208
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
209
209
|
|
210
|
+
lxb_status_t status;
|
211
|
+
lxb_css_parser_t *parser = NULL;
|
212
|
+
lxb_selectors_t *selectors = NULL;
|
213
|
+
lxb_css_selector_list_t *list = NULL;
|
214
|
+
|
210
215
|
/* Create CSS parser. */
|
211
|
-
|
212
|
-
|
216
|
+
parser = lxb_css_parser_create();
|
217
|
+
status = lxb_css_parser_init(parser, NULL, NULL);
|
213
218
|
if (status != LXB_STATUS_OK)
|
214
219
|
{
|
215
|
-
|
220
|
+
goto cleanup;
|
216
221
|
}
|
217
222
|
|
218
223
|
/* Selectors. */
|
219
|
-
|
224
|
+
selectors = lxb_selectors_create();
|
220
225
|
status = lxb_selectors_init(selectors);
|
221
226
|
if (status != LXB_STATUS_OK)
|
222
227
|
{
|
223
|
-
|
228
|
+
goto cleanup;
|
224
229
|
}
|
225
230
|
|
226
231
|
/* Parse and get the log. */
|
227
232
|
// TODO: Cache the list for reuse, improves performance
|
228
|
-
|
233
|
+
list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
|
229
234
|
if (parser->status != LXB_STATUS_OK)
|
230
235
|
{
|
231
|
-
|
236
|
+
status = parser->status;
|
237
|
+
goto cleanup;
|
232
238
|
}
|
233
239
|
|
234
240
|
/* Find HTML nodes by CSS Selectors. */
|
235
241
|
status = lxb_selectors_find(selectors, node, list, cb, ctx);
|
236
242
|
if (status != LXB_STATUS_OK)
|
237
243
|
{
|
238
|
-
|
244
|
+
goto cleanup;
|
239
245
|
}
|
240
246
|
|
247
|
+
cleanup:
|
241
248
|
/* Destroy Selectors object. */
|
242
249
|
(void)lxb_selectors_destroy(selectors, true);
|
243
250
|
|
@@ -246,6 +253,8 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
246
253
|
|
247
254
|
/* Destroy all object for all CSS Selector List. */
|
248
255
|
lxb_css_selector_list_destroy_memory(list);
|
256
|
+
|
257
|
+
return status;
|
249
258
|
}
|
250
259
|
|
251
260
|
static void
|
@@ -311,10 +320,17 @@ nl_node_at_css(VALUE self, VALUE selector)
|
|
311
320
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
312
321
|
lexbor_array_t *array = lexbor_array_create();
|
313
322
|
|
314
|
-
nl_node_find(self, selector, nl_node_at_css_callback, array);
|
323
|
+
lxb_status_t status = nl_node_find(self, selector, nl_node_at_css_callback, array);
|
324
|
+
|
325
|
+
if (status != LXB_STATUS_OK)
|
326
|
+
{
|
327
|
+
lexbor_array_destroy(array, true);
|
328
|
+
nl_raise_lexbor_error(status);
|
329
|
+
}
|
315
330
|
|
316
331
|
if (array->length == 0)
|
317
332
|
{
|
333
|
+
lexbor_array_destroy(array, true);
|
318
334
|
return Qnil;
|
319
335
|
}
|
320
336
|
|
@@ -333,7 +349,12 @@ nl_node_css(VALUE self, VALUE selector)
|
|
333
349
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
334
350
|
lexbor_array_t *array = lexbor_array_create();
|
335
351
|
|
336
|
-
nl_node_find(self, selector, nl_node_css_callback, array);
|
352
|
+
lxb_status_t status = nl_node_find(self, selector, nl_node_css_callback, array);
|
353
|
+
if (status != LXB_STATUS_OK)
|
354
|
+
{
|
355
|
+
lexbor_array_destroy(array, true);
|
356
|
+
nl_raise_lexbor_error(status);
|
357
|
+
}
|
337
358
|
|
338
359
|
sort_nodes_if_necessary(selector, node->owner_document, array);
|
339
360
|
|
@@ -5,7 +5,7 @@ extern VALUE cNokolexborNode;
|
|
5
5
|
VALUE cNokolexborNodeSet;
|
6
6
|
extern rb_data_type_t nl_document_type;
|
7
7
|
|
8
|
-
|
8
|
+
lxb_status_t nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
|
9
9
|
void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array);
|
10
10
|
lxb_status_t nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
11
11
|
lxb_status_t nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
@@ -275,8 +275,8 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
275
275
|
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
276
276
|
}
|
277
277
|
|
278
|
-
static
|
279
|
-
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void*
|
278
|
+
static lxb_status_t
|
279
|
+
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
280
280
|
{
|
281
281
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
282
282
|
if (doc == NULL)
|
@@ -319,7 +319,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
|
|
319
319
|
}
|
320
320
|
VALUE rb_frag = nl_rb_node_create(&frag->node, nl_rb_document_get(self));
|
321
321
|
|
322
|
-
nl_node_find(rb_frag, selector, cb, ctx);
|
322
|
+
lxb_status_t status = nl_node_find(rb_frag, selector, cb, ctx);
|
323
323
|
|
324
324
|
lxb_dom_document_fragment_interface_destroy(frag);
|
325
325
|
// Restore original node data
|
@@ -329,6 +329,8 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
|
|
329
329
|
free(backup_array->list[i]);
|
330
330
|
}
|
331
331
|
lexbor_array_destroy(backup_array, true);
|
332
|
+
|
333
|
+
return status;
|
332
334
|
}
|
333
335
|
|
334
336
|
static VALUE
|
@@ -337,10 +339,17 @@ nl_node_set_at_css(VALUE self, VALUE selector)
|
|
337
339
|
lexbor_array_t *array = lexbor_array_create();
|
338
340
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
339
341
|
|
340
|
-
nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
342
|
+
lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
343
|
+
|
344
|
+
if (status != LXB_STATUS_OK)
|
345
|
+
{
|
346
|
+
lexbor_array_destroy(array, true);
|
347
|
+
nl_raise_lexbor_error(status);
|
348
|
+
}
|
341
349
|
|
342
350
|
if (array->length == 0)
|
343
351
|
{
|
352
|
+
lexbor_array_destroy(array, true);
|
344
353
|
return Qnil;
|
345
354
|
}
|
346
355
|
|
@@ -359,7 +368,12 @@ nl_node_set_css(VALUE self, VALUE selector)
|
|
359
368
|
lexbor_array_t *array = lexbor_array_create();
|
360
369
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
361
370
|
|
362
|
-
nl_node_set_find(self, selector, nl_node_css_callback, array);
|
371
|
+
lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
|
372
|
+
if (status != LXB_STATUS_OK)
|
373
|
+
{
|
374
|
+
lexbor_array_destroy(array, true);
|
375
|
+
nl_raise_lexbor_error(status);
|
376
|
+
}
|
363
377
|
|
364
378
|
sort_nodes_if_necessary(selector, doc, array);
|
365
379
|
|
@@ -205,6 +205,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
205
205
|
|
206
206
|
if (xpath == NULL)
|
207
207
|
{
|
208
|
+
xmlXPathFreeObject(xpath);
|
208
209
|
rb_exc_raise(rb_ary_entry(errors, 0));
|
209
210
|
}
|
210
211
|
|
@@ -214,7 +215,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
214
215
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
215
216
|
}
|
216
217
|
|
217
|
-
|
218
|
+
xmlXPathFreeObject(xpath);
|
218
219
|
|
219
220
|
return retval;
|
220
221
|
}
|
@@ -244,7 +245,11 @@ nl_xpath_context_new(VALUE klass, VALUE rb_node)
|
|
244
245
|
|
245
246
|
void Init_nl_xpath_context(void)
|
246
247
|
{
|
248
|
+
#ifndef NOKOLEXBOR_ASAN
|
247
249
|
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
250
|
+
#else
|
251
|
+
xmlMemSetup((xmlFreeFunc)free, (xmlMallocFunc)malloc, (xmlReallocFunc)realloc, strdup);
|
252
|
+
#endif
|
248
253
|
|
249
254
|
cNokolexborXpathContext = rb_define_class_under(mNokolexbor, "XPathContext", rb_cObject);
|
250
255
|
mNokolexborXpath = rb_define_module_under(mNokolexbor, "XPath");
|
data/lib/nokolexbor/version.rb
CHANGED
@@ -11,3 +11,16 @@ index 884fede..29c1e69 100755
|
|
11
11
|
element->content->node.ns = LXB_NS_HTML;
|
12
12
|
element->content->host = lxb_dom_interface_element(element);
|
13
13
|
|
14
|
+
diff --git i/source/lexbor/html/serialize.c w/source/lexbor/html/serialize.c
|
15
|
+
index df9689d..5e2ff91 100755
|
16
|
+
--- i/source/lexbor/html/serialize.c
|
17
|
+
+++ w/source/lexbor/html/serialize.c
|
18
|
+
@@ -272,7 +272,7 @@ lxb_html_serialize_node_cb(lxb_dom_node_t *node,
|
19
|
+
}
|
20
|
+
}
|
21
|
+
|
22
|
+
- skip_it = lxb_html_node_is_void(node);
|
23
|
+
+ skip_it = lxb_html_node_is_void(node) || node->local_name == LXB_TAG_TEMPLATE;
|
24
|
+
|
25
|
+
if (skip_it == false && node->first_child != NULL) {
|
26
|
+
node = node->first_child;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|