nokolexbor 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e93c293e8d506960077c772b822d4f75103d6583b8864e56f343a8ad6229c7a
4
- data.tar.gz: 1baaaed733eae123b895e21021709fed7a7e185a10e8c27cf0d43105e7e4a272
3
+ metadata.gz: d4f935c5e81aed7b5d964e7332a9abeaa1f44dec7e2532f01c89dfd17df14ded
4
+ data.tar.gz: 91c58aca5a9b16a3cfa7e2436dfb86feeb777daa3615b8b3a95b234981cc67d0
5
5
  SHA512:
6
- metadata.gz: 665232217ba5f1b0a53ad67dfcbb38244685fab9242bf718b1364b59c3b623e5150bdd471284c3424fb2c319761c30f1022d7792e1edf848e3c4c0a41de89806
7
- data.tar.gz: 81cc64f435de161807ce0a6d3a0c8b0875a51f0f49307a763be8be4c70b3eefd0a5ea36fb364d2fec2e37b6b3307d549fc0f0bbd3afe35f47b6b797ea5998b96
6
+ metadata.gz: af6ae7b07bde270ea275a5b21221c7e90923f195a6a62ed7b85174f1d9f31f017661dc50c7d8dd1c3d39096a5c1ecbb3469f4496f2e628a3b7026f82ca3994c5
7
+ data.tar.gz: eea3a38dcbd81cdb7c54996248d6eaa2d2c51b964d79d3131ba04594dd449a141b32b225bc7a879844c92418f3c04e6b63c77c9c6bef86f8ee619fa6a47b03a3
@@ -1,16 +1,22 @@
1
1
  require 'mkmf'
2
2
  require 'timeout'
3
3
 
4
- # For debugging
5
- # CONFIG["optflags"] = "-O0"
6
- # CONFIG["debugflags"] = "-ggdb3"
7
-
8
4
  cmake_flags = [ ENV["CMAKE_FLAGS"] ]
9
5
  cmake_flags << "-DLEXBOR_BUILD_TESTS_CPP=OFF"
10
6
  cmake_flags << "-DLEXBOR_BUILD_SHARED=OFF"
11
7
  cmake_flags << "-DLEXBOR_BUILD_STATIC=ON"
12
- # For debugging
13
- # cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
8
+
9
+ if ENV['NOKOLEXBOR_DEBUG'] || ENV['NOKOLEXBOR_ASAN']
10
+ CONFIG["optflags"] = "-O0"
11
+ CONFIG["debugflags"] = "-ggdb3"
12
+ cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
13
+ end
14
+
15
+ if ENV['NOKOLEXBOR_ASAN']
16
+ $LDFLAGS << " -fsanitize=address"
17
+ $CFLAGS << " -fsanitize=address -DNOKOLEXBOR_ASAN"
18
+ cmake_flags << "-DLEXBOR_BUILD_WITH_ASAN=ON"
19
+ end
14
20
 
15
21
  append_cflags("-DLEXBOR_STATIC")
16
22
  append_cflags("-DLIBXML_STATIC")
@@ -13,6 +13,11 @@
13
13
  #include <ruby.h>
14
14
  #include "lexbor/core/base.h"
15
15
 
16
+ // Disable using ruby memory functions when ASAN is enabled,
17
+ // otherwise memory leak info will be all about ruby which
18
+ // is useless.
19
+ #ifndef NOKOLEXBOR_ASAN
20
+
16
21
  void *
17
22
  lexbor_malloc(size_t size)
18
23
  {
@@ -37,3 +42,5 @@ lexbor_free(void *dst)
37
42
  ruby_xfree(dst);
38
43
  return NULL;
39
44
  }
45
+
46
+ #endif
@@ -22,8 +22,18 @@ const rb_data_type_t nl_document_type = {
22
22
  };
23
23
 
24
24
  static VALUE
25
- nl_document_parse(VALUE self, VALUE rb_html)
25
+ nl_document_parse(VALUE self, VALUE rb_string_or_io)
26
26
  {
27
+ VALUE id_read = rb_intern("read");
28
+ VALUE rb_html;
29
+ if (rb_respond_to(rb_string_or_io, id_read))
30
+ {
31
+ rb_html = rb_funcall(rb_string_or_io, id_read, 0);
32
+ }
33
+ else
34
+ {
35
+ rb_html = rb_string_or_io;
36
+ }
27
37
  const char *html_c = StringValuePtr(rb_html);
28
38
  size_t html_len = RSTRING_LEN(rb_html);
29
39
 
@@ -199,7 +199,7 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
199
199
  return LXB_STATUS_OK;
200
200
  }
201
201
 
202
- void
202
+ lxb_status_t
203
203
  nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
204
204
  {
205
205
  const char *selector_c = StringValuePtr(selector);
@@ -207,37 +207,44 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
207
207
 
208
208
  lxb_dom_node_t *node = nl_rb_node_unwrap(self);
209
209
 
210
+ lxb_status_t status;
211
+ lxb_css_parser_t *parser = NULL;
212
+ lxb_selectors_t *selectors = NULL;
213
+ lxb_css_selector_list_t *list = NULL;
214
+
210
215
  /* Create CSS parser. */
211
- lxb_css_parser_t *parser = lxb_css_parser_create();
212
- lxb_status_t status = lxb_css_parser_init(parser, NULL, NULL);
216
+ parser = lxb_css_parser_create();
217
+ status = lxb_css_parser_init(parser, NULL, NULL);
213
218
  if (status != LXB_STATUS_OK)
214
219
  {
215
- nl_raise_lexbor_error(status);
220
+ goto cleanup;
216
221
  }
217
222
 
218
223
  /* Selectors. */
219
- lxb_selectors_t *selectors = lxb_selectors_create();
224
+ selectors = lxb_selectors_create();
220
225
  status = lxb_selectors_init(selectors);
221
226
  if (status != LXB_STATUS_OK)
222
227
  {
223
- nl_raise_lexbor_error(status);
228
+ goto cleanup;
224
229
  }
225
230
 
226
231
  /* Parse and get the log. */
227
232
  // TODO: Cache the list for reuse, improves performance
228
- lxb_css_selector_list_t *list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
233
+ list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
229
234
  if (parser->status != LXB_STATUS_OK)
230
235
  {
231
- nl_raise_lexbor_error(parser->status);
236
+ status = parser->status;
237
+ goto cleanup;
232
238
  }
233
239
 
234
240
  /* Find HTML nodes by CSS Selectors. */
235
241
  status = lxb_selectors_find(selectors, node, list, cb, ctx);
236
242
  if (status != LXB_STATUS_OK)
237
243
  {
238
- nl_raise_lexbor_error(status);
244
+ goto cleanup;
239
245
  }
240
246
 
247
+ cleanup:
241
248
  /* Destroy Selectors object. */
242
249
  (void)lxb_selectors_destroy(selectors, true);
243
250
 
@@ -246,6 +253,8 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
246
253
 
247
254
  /* Destroy all object for all CSS Selector List. */
248
255
  lxb_css_selector_list_destroy_memory(list);
256
+
257
+ return status;
249
258
  }
250
259
 
251
260
  static void
@@ -311,10 +320,17 @@ nl_node_at_css(VALUE self, VALUE selector)
311
320
  lxb_dom_node_t *node = nl_rb_node_unwrap(self);
312
321
  lexbor_array_t *array = lexbor_array_create();
313
322
 
314
- nl_node_find(self, selector, nl_node_at_css_callback, array);
323
+ lxb_status_t status = nl_node_find(self, selector, nl_node_at_css_callback, array);
324
+
325
+ if (status != LXB_STATUS_OK)
326
+ {
327
+ lexbor_array_destroy(array, true);
328
+ nl_raise_lexbor_error(status);
329
+ }
315
330
 
316
331
  if (array->length == 0)
317
332
  {
333
+ lexbor_array_destroy(array, true);
318
334
  return Qnil;
319
335
  }
320
336
 
@@ -333,7 +349,12 @@ nl_node_css(VALUE self, VALUE selector)
333
349
  lxb_dom_node_t *node = nl_rb_node_unwrap(self);
334
350
  lexbor_array_t *array = lexbor_array_create();
335
351
 
336
- nl_node_find(self, selector, nl_node_css_callback, array);
352
+ lxb_status_t status = nl_node_find(self, selector, nl_node_css_callback, array);
353
+ if (status != LXB_STATUS_OK)
354
+ {
355
+ lexbor_array_destroy(array, true);
356
+ nl_raise_lexbor_error(status);
357
+ }
337
358
 
338
359
  sort_nodes_if_necessary(selector, node->owner_document, array);
339
360
 
@@ -5,7 +5,7 @@ extern VALUE cNokolexborNode;
5
5
  VALUE cNokolexborNodeSet;
6
6
  extern rb_data_type_t nl_document_type;
7
7
 
8
- void nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
8
+ lxb_status_t nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
9
9
  void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array);
10
10
  lxb_status_t nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
11
11
  lxb_status_t nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
@@ -275,8 +275,8 @@ nl_node_set_union(VALUE self, VALUE other)
275
275
  return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
276
276
  }
277
277
 
278
- static void
279
- nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
278
+ static lxb_status_t
279
+ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
280
280
  {
281
281
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
282
282
  if (doc == NULL)
@@ -319,7 +319,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
319
319
  }
320
320
  VALUE rb_frag = nl_rb_node_create(&frag->node, nl_rb_document_get(self));
321
321
 
322
- nl_node_find(rb_frag, selector, cb, ctx);
322
+ lxb_status_t status = nl_node_find(rb_frag, selector, cb, ctx);
323
323
 
324
324
  lxb_dom_document_fragment_interface_destroy(frag);
325
325
  // Restore original node data
@@ -329,6 +329,8 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
329
329
  free(backup_array->list[i]);
330
330
  }
331
331
  lexbor_array_destroy(backup_array, true);
332
+
333
+ return status;
332
334
  }
333
335
 
334
336
  static VALUE
@@ -337,10 +339,17 @@ nl_node_set_at_css(VALUE self, VALUE selector)
337
339
  lexbor_array_t *array = lexbor_array_create();
338
340
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
339
341
 
340
- nl_node_set_find(self, selector, nl_node_at_css_callback, array);
342
+ lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
343
+
344
+ if (status != LXB_STATUS_OK)
345
+ {
346
+ lexbor_array_destroy(array, true);
347
+ nl_raise_lexbor_error(status);
348
+ }
341
349
 
342
350
  if (array->length == 0)
343
351
  {
352
+ lexbor_array_destroy(array, true);
344
353
  return Qnil;
345
354
  }
346
355
 
@@ -359,7 +368,12 @@ nl_node_set_css(VALUE self, VALUE selector)
359
368
  lexbor_array_t *array = lexbor_array_create();
360
369
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
361
370
 
362
- nl_node_set_find(self, selector, nl_node_css_callback, array);
371
+ lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
372
+ if (status != LXB_STATUS_OK)
373
+ {
374
+ lexbor_array_destroy(array, true);
375
+ nl_raise_lexbor_error(status);
376
+ }
363
377
 
364
378
  sort_nodes_if_necessary(selector, doc, array);
365
379
 
@@ -205,6 +205,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
205
205
 
206
206
  if (xpath == NULL)
207
207
  {
208
+ xmlXPathFreeObject(xpath);
208
209
  rb_exc_raise(rb_ary_entry(errors, 0));
209
210
  }
210
211
 
@@ -214,7 +215,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
214
215
  retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
215
216
  }
216
217
 
217
- xmlXPathFreeNodeSetList(xpath);
218
+ xmlXPathFreeObject(xpath);
218
219
 
219
220
  return retval;
220
221
  }
@@ -244,7 +245,11 @@ nl_xpath_context_new(VALUE klass, VALUE rb_node)
244
245
 
245
246
  void Init_nl_xpath_context(void)
246
247
  {
248
+ #ifndef NOKOLEXBOR_ASAN
247
249
  xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
250
+ #else
251
+ xmlMemSetup((xmlFreeFunc)free, (xmlMallocFunc)malloc, (xmlReallocFunc)realloc, strdup);
252
+ #endif
248
253
 
249
254
  cNokolexborXpathContext = rb_define_class_under(mNokolexbor, "XPathContext", rb_cObject);
250
255
  mNokolexborXpath = rb_define_module_under(mNokolexbor, "XPath");
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.2.4'
4
+ VERSION = '0.2.5'
5
5
  end
@@ -11,3 +11,16 @@ index 884fede..29c1e69 100755
11
11
  element->content->node.ns = LXB_NS_HTML;
12
12
  element->content->host = lxb_dom_interface_element(element);
13
13
 
14
+ diff --git i/source/lexbor/html/serialize.c w/source/lexbor/html/serialize.c
15
+ index df9689d..5e2ff91 100755
16
+ --- i/source/lexbor/html/serialize.c
17
+ +++ w/source/lexbor/html/serialize.c
18
+ @@ -272,7 +272,7 @@ lxb_html_serialize_node_cb(lxb_dom_node_t *node,
19
+ }
20
+ }
21
+
22
+ - skip_it = lxb_html_node_is_void(node);
23
+ + skip_it = lxb_html_node_is_void(node) || node->local_name == LXB_TAG_TEMPLATE;
24
+
25
+ if (skip_it == false && node->first_child != NULL) {
26
+ node = node->first_child;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-19 00:00:00.000000000 Z
11
+ date: 2022-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler