nokolexbor 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokolexbor/extconf.rb +12 -6
- data/ext/nokolexbor/memory.c +7 -0
- data/ext/nokolexbor/nl_document.c +11 -1
- data/ext/nokolexbor/nl_node.c +32 -11
- data/ext/nokolexbor/nl_node_set.c +20 -6
- data/ext/nokolexbor/nl_xpath_context.c +6 -1
- data/lib/nokolexbor/version.rb +1 -1
- data/patches/0003-lexbor-attach-template-content-to-self.patch +13 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4f935c5e81aed7b5d964e7332a9abeaa1f44dec7e2532f01c89dfd17df14ded
|
4
|
+
data.tar.gz: 91c58aca5a9b16a3cfa7e2436dfb86feeb777daa3615b8b3a95b234981cc67d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af6ae7b07bde270ea275a5b21221c7e90923f195a6a62ed7b85174f1d9f31f017661dc50c7d8dd1c3d39096a5c1ecbb3469f4496f2e628a3b7026f82ca3994c5
|
7
|
+
data.tar.gz: eea3a38dcbd81cdb7c54996248d6eaa2d2c51b964d79d3131ba04594dd449a141b32b225bc7a879844c92418f3c04e6b63c77c9c6bef86f8ee619fa6a47b03a3
|
data/ext/nokolexbor/extconf.rb
CHANGED
@@ -1,16 +1,22 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
require 'timeout'
|
3
3
|
|
4
|
-
# For debugging
|
5
|
-
# CONFIG["optflags"] = "-O0"
|
6
|
-
# CONFIG["debugflags"] = "-ggdb3"
|
7
|
-
|
8
4
|
cmake_flags = [ ENV["CMAKE_FLAGS"] ]
|
9
5
|
cmake_flags << "-DLEXBOR_BUILD_TESTS_CPP=OFF"
|
10
6
|
cmake_flags << "-DLEXBOR_BUILD_SHARED=OFF"
|
11
7
|
cmake_flags << "-DLEXBOR_BUILD_STATIC=ON"
|
12
|
-
|
13
|
-
|
8
|
+
|
9
|
+
if ENV['NOKOLEXBOR_DEBUG'] || ENV['NOKOLEXBOR_ASAN']
|
10
|
+
CONFIG["optflags"] = "-O0"
|
11
|
+
CONFIG["debugflags"] = "-ggdb3"
|
12
|
+
cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
|
13
|
+
end
|
14
|
+
|
15
|
+
if ENV['NOKOLEXBOR_ASAN']
|
16
|
+
$LDFLAGS << " -fsanitize=address"
|
17
|
+
$CFLAGS << " -fsanitize=address -DNOKOLEXBOR_ASAN"
|
18
|
+
cmake_flags << "-DLEXBOR_BUILD_WITH_ASAN=ON"
|
19
|
+
end
|
14
20
|
|
15
21
|
append_cflags("-DLEXBOR_STATIC")
|
16
22
|
append_cflags("-DLIBXML_STATIC")
|
data/ext/nokolexbor/memory.c
CHANGED
@@ -13,6 +13,11 @@
|
|
13
13
|
#include <ruby.h>
|
14
14
|
#include "lexbor/core/base.h"
|
15
15
|
|
16
|
+
// Disable using ruby memory functions when ASAN is enabled,
|
17
|
+
// otherwise memory leak info will be all about ruby which
|
18
|
+
// is useless.
|
19
|
+
#ifndef NOKOLEXBOR_ASAN
|
20
|
+
|
16
21
|
void *
|
17
22
|
lexbor_malloc(size_t size)
|
18
23
|
{
|
@@ -37,3 +42,5 @@ lexbor_free(void *dst)
|
|
37
42
|
ruby_xfree(dst);
|
38
43
|
return NULL;
|
39
44
|
}
|
45
|
+
|
46
|
+
#endif
|
@@ -22,8 +22,18 @@ const rb_data_type_t nl_document_type = {
|
|
22
22
|
};
|
23
23
|
|
24
24
|
static VALUE
|
25
|
-
nl_document_parse(VALUE self, VALUE
|
25
|
+
nl_document_parse(VALUE self, VALUE rb_string_or_io)
|
26
26
|
{
|
27
|
+
VALUE id_read = rb_intern("read");
|
28
|
+
VALUE rb_html;
|
29
|
+
if (rb_respond_to(rb_string_or_io, id_read))
|
30
|
+
{
|
31
|
+
rb_html = rb_funcall(rb_string_or_io, id_read, 0);
|
32
|
+
}
|
33
|
+
else
|
34
|
+
{
|
35
|
+
rb_html = rb_string_or_io;
|
36
|
+
}
|
27
37
|
const char *html_c = StringValuePtr(rb_html);
|
28
38
|
size_t html_len = RSTRING_LEN(rb_html);
|
29
39
|
|
data/ext/nokolexbor/nl_node.c
CHANGED
@@ -199,7 +199,7 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
|
|
199
199
|
return LXB_STATUS_OK;
|
200
200
|
}
|
201
201
|
|
202
|
-
|
202
|
+
lxb_status_t
|
203
203
|
nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
204
204
|
{
|
205
205
|
const char *selector_c = StringValuePtr(selector);
|
@@ -207,37 +207,44 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
207
207
|
|
208
208
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
209
209
|
|
210
|
+
lxb_status_t status;
|
211
|
+
lxb_css_parser_t *parser = NULL;
|
212
|
+
lxb_selectors_t *selectors = NULL;
|
213
|
+
lxb_css_selector_list_t *list = NULL;
|
214
|
+
|
210
215
|
/* Create CSS parser. */
|
211
|
-
|
212
|
-
|
216
|
+
parser = lxb_css_parser_create();
|
217
|
+
status = lxb_css_parser_init(parser, NULL, NULL);
|
213
218
|
if (status != LXB_STATUS_OK)
|
214
219
|
{
|
215
|
-
|
220
|
+
goto cleanup;
|
216
221
|
}
|
217
222
|
|
218
223
|
/* Selectors. */
|
219
|
-
|
224
|
+
selectors = lxb_selectors_create();
|
220
225
|
status = lxb_selectors_init(selectors);
|
221
226
|
if (status != LXB_STATUS_OK)
|
222
227
|
{
|
223
|
-
|
228
|
+
goto cleanup;
|
224
229
|
}
|
225
230
|
|
226
231
|
/* Parse and get the log. */
|
227
232
|
// TODO: Cache the list for reuse, improves performance
|
228
|
-
|
233
|
+
list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
|
229
234
|
if (parser->status != LXB_STATUS_OK)
|
230
235
|
{
|
231
|
-
|
236
|
+
status = parser->status;
|
237
|
+
goto cleanup;
|
232
238
|
}
|
233
239
|
|
234
240
|
/* Find HTML nodes by CSS Selectors. */
|
235
241
|
status = lxb_selectors_find(selectors, node, list, cb, ctx);
|
236
242
|
if (status != LXB_STATUS_OK)
|
237
243
|
{
|
238
|
-
|
244
|
+
goto cleanup;
|
239
245
|
}
|
240
246
|
|
247
|
+
cleanup:
|
241
248
|
/* Destroy Selectors object. */
|
242
249
|
(void)lxb_selectors_destroy(selectors, true);
|
243
250
|
|
@@ -246,6 +253,8 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
|
246
253
|
|
247
254
|
/* Destroy all object for all CSS Selector List. */
|
248
255
|
lxb_css_selector_list_destroy_memory(list);
|
256
|
+
|
257
|
+
return status;
|
249
258
|
}
|
250
259
|
|
251
260
|
static void
|
@@ -311,10 +320,17 @@ nl_node_at_css(VALUE self, VALUE selector)
|
|
311
320
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
312
321
|
lexbor_array_t *array = lexbor_array_create();
|
313
322
|
|
314
|
-
nl_node_find(self, selector, nl_node_at_css_callback, array);
|
323
|
+
lxb_status_t status = nl_node_find(self, selector, nl_node_at_css_callback, array);
|
324
|
+
|
325
|
+
if (status != LXB_STATUS_OK)
|
326
|
+
{
|
327
|
+
lexbor_array_destroy(array, true);
|
328
|
+
nl_raise_lexbor_error(status);
|
329
|
+
}
|
315
330
|
|
316
331
|
if (array->length == 0)
|
317
332
|
{
|
333
|
+
lexbor_array_destroy(array, true);
|
318
334
|
return Qnil;
|
319
335
|
}
|
320
336
|
|
@@ -333,7 +349,12 @@ nl_node_css(VALUE self, VALUE selector)
|
|
333
349
|
lxb_dom_node_t *node = nl_rb_node_unwrap(self);
|
334
350
|
lexbor_array_t *array = lexbor_array_create();
|
335
351
|
|
336
|
-
nl_node_find(self, selector, nl_node_css_callback, array);
|
352
|
+
lxb_status_t status = nl_node_find(self, selector, nl_node_css_callback, array);
|
353
|
+
if (status != LXB_STATUS_OK)
|
354
|
+
{
|
355
|
+
lexbor_array_destroy(array, true);
|
356
|
+
nl_raise_lexbor_error(status);
|
357
|
+
}
|
337
358
|
|
338
359
|
sort_nodes_if_necessary(selector, node->owner_document, array);
|
339
360
|
|
@@ -5,7 +5,7 @@ extern VALUE cNokolexborNode;
|
|
5
5
|
VALUE cNokolexborNodeSet;
|
6
6
|
extern rb_data_type_t nl_document_type;
|
7
7
|
|
8
|
-
|
8
|
+
lxb_status_t nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
|
9
9
|
void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array);
|
10
10
|
lxb_status_t nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
11
11
|
lxb_status_t nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
|
@@ -275,8 +275,8 @@ nl_node_set_union(VALUE self, VALUE other)
|
|
275
275
|
return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
|
276
276
|
}
|
277
277
|
|
278
|
-
static
|
279
|
-
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void*
|
278
|
+
static lxb_status_t
|
279
|
+
nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
|
280
280
|
{
|
281
281
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
282
282
|
if (doc == NULL)
|
@@ -319,7 +319,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
|
|
319
319
|
}
|
320
320
|
VALUE rb_frag = nl_rb_node_create(&frag->node, nl_rb_document_get(self));
|
321
321
|
|
322
|
-
nl_node_find(rb_frag, selector, cb, ctx);
|
322
|
+
lxb_status_t status = nl_node_find(rb_frag, selector, cb, ctx);
|
323
323
|
|
324
324
|
lxb_dom_document_fragment_interface_destroy(frag);
|
325
325
|
// Restore original node data
|
@@ -329,6 +329,8 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
|
|
329
329
|
free(backup_array->list[i]);
|
330
330
|
}
|
331
331
|
lexbor_array_destroy(backup_array, true);
|
332
|
+
|
333
|
+
return status;
|
332
334
|
}
|
333
335
|
|
334
336
|
static VALUE
|
@@ -337,10 +339,17 @@ nl_node_set_at_css(VALUE self, VALUE selector)
|
|
337
339
|
lexbor_array_t *array = lexbor_array_create();
|
338
340
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
339
341
|
|
340
|
-
nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
342
|
+
lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
|
343
|
+
|
344
|
+
if (status != LXB_STATUS_OK)
|
345
|
+
{
|
346
|
+
lexbor_array_destroy(array, true);
|
347
|
+
nl_raise_lexbor_error(status);
|
348
|
+
}
|
341
349
|
|
342
350
|
if (array->length == 0)
|
343
351
|
{
|
352
|
+
lexbor_array_destroy(array, true);
|
344
353
|
return Qnil;
|
345
354
|
}
|
346
355
|
|
@@ -359,7 +368,12 @@ nl_node_set_css(VALUE self, VALUE selector)
|
|
359
368
|
lexbor_array_t *array = lexbor_array_create();
|
360
369
|
lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
|
361
370
|
|
362
|
-
nl_node_set_find(self, selector, nl_node_css_callback, array);
|
371
|
+
lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
|
372
|
+
if (status != LXB_STATUS_OK)
|
373
|
+
{
|
374
|
+
lexbor_array_destroy(array, true);
|
375
|
+
nl_raise_lexbor_error(status);
|
376
|
+
}
|
363
377
|
|
364
378
|
sort_nodes_if_necessary(selector, doc, array);
|
365
379
|
|
@@ -205,6 +205,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
205
205
|
|
206
206
|
if (xpath == NULL)
|
207
207
|
{
|
208
|
+
xmlXPathFreeObject(xpath);
|
208
209
|
rb_exc_raise(rb_ary_entry(errors, 0));
|
209
210
|
}
|
210
211
|
|
@@ -214,7 +215,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
|
|
214
215
|
retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
|
215
216
|
}
|
216
217
|
|
217
|
-
|
218
|
+
xmlXPathFreeObject(xpath);
|
218
219
|
|
219
220
|
return retval;
|
220
221
|
}
|
@@ -244,7 +245,11 @@ nl_xpath_context_new(VALUE klass, VALUE rb_node)
|
|
244
245
|
|
245
246
|
void Init_nl_xpath_context(void)
|
246
247
|
{
|
248
|
+
#ifndef NOKOLEXBOR_ASAN
|
247
249
|
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
250
|
+
#else
|
251
|
+
xmlMemSetup((xmlFreeFunc)free, (xmlMallocFunc)malloc, (xmlReallocFunc)realloc, strdup);
|
252
|
+
#endif
|
248
253
|
|
249
254
|
cNokolexborXpathContext = rb_define_class_under(mNokolexbor, "XPathContext", rb_cObject);
|
250
255
|
mNokolexborXpath = rb_define_module_under(mNokolexbor, "XPath");
|
data/lib/nokolexbor/version.rb
CHANGED
@@ -11,3 +11,16 @@ index 884fede..29c1e69 100755
|
|
11
11
|
element->content->node.ns = LXB_NS_HTML;
|
12
12
|
element->content->host = lxb_dom_interface_element(element);
|
13
13
|
|
14
|
+
diff --git i/source/lexbor/html/serialize.c w/source/lexbor/html/serialize.c
|
15
|
+
index df9689d..5e2ff91 100755
|
16
|
+
--- i/source/lexbor/html/serialize.c
|
17
|
+
+++ w/source/lexbor/html/serialize.c
|
18
|
+
@@ -272,7 +272,7 @@ lxb_html_serialize_node_cb(lxb_dom_node_t *node,
|
19
|
+
}
|
20
|
+
}
|
21
|
+
|
22
|
+
- skip_it = lxb_html_node_is_void(node);
|
23
|
+
+ skip_it = lxb_html_node_is_void(node) || node->local_name == LXB_TAG_TEMPLATE;
|
24
|
+
|
25
|
+
if (skip_it == false && node->first_child != NULL) {
|
26
|
+
node = node->first_child;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokolexbor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yicheng Zhou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|