nokolexbor 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e93c293e8d506960077c772b822d4f75103d6583b8864e56f343a8ad6229c7a
4
- data.tar.gz: 1baaaed733eae123b895e21021709fed7a7e185a10e8c27cf0d43105e7e4a272
3
+ metadata.gz: d4f935c5e81aed7b5d964e7332a9abeaa1f44dec7e2532f01c89dfd17df14ded
4
+ data.tar.gz: 91c58aca5a9b16a3cfa7e2436dfb86feeb777daa3615b8b3a95b234981cc67d0
5
5
  SHA512:
6
- metadata.gz: 665232217ba5f1b0a53ad67dfcbb38244685fab9242bf718b1364b59c3b623e5150bdd471284c3424fb2c319761c30f1022d7792e1edf848e3c4c0a41de89806
7
- data.tar.gz: 81cc64f435de161807ce0a6d3a0c8b0875a51f0f49307a763be8be4c70b3eefd0a5ea36fb364d2fec2e37b6b3307d549fc0f0bbd3afe35f47b6b797ea5998b96
6
+ metadata.gz: af6ae7b07bde270ea275a5b21221c7e90923f195a6a62ed7b85174f1d9f31f017661dc50c7d8dd1c3d39096a5c1ecbb3469f4496f2e628a3b7026f82ca3994c5
7
+ data.tar.gz: eea3a38dcbd81cdb7c54996248d6eaa2d2c51b964d79d3131ba04594dd449a141b32b225bc7a879844c92418f3c04e6b63c77c9c6bef86f8ee619fa6a47b03a3
@@ -1,16 +1,22 @@
1
1
  require 'mkmf'
2
2
  require 'timeout'
3
3
 
4
- # For debugging
5
- # CONFIG["optflags"] = "-O0"
6
- # CONFIG["debugflags"] = "-ggdb3"
7
-
8
4
  cmake_flags = [ ENV["CMAKE_FLAGS"] ]
9
5
  cmake_flags << "-DLEXBOR_BUILD_TESTS_CPP=OFF"
10
6
  cmake_flags << "-DLEXBOR_BUILD_SHARED=OFF"
11
7
  cmake_flags << "-DLEXBOR_BUILD_STATIC=ON"
12
- # For debugging
13
- # cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
8
+
9
+ if ENV['NOKOLEXBOR_DEBUG'] || ENV['NOKOLEXBOR_ASAN']
10
+ CONFIG["optflags"] = "-O0"
11
+ CONFIG["debugflags"] = "-ggdb3"
12
+ cmake_flags << "-DLEXBOR_OPTIMIZATION_LEVEL='-O0 -g'"
13
+ end
14
+
15
+ if ENV['NOKOLEXBOR_ASAN']
16
+ $LDFLAGS << " -fsanitize=address"
17
+ $CFLAGS << " -fsanitize=address -DNOKOLEXBOR_ASAN"
18
+ cmake_flags << "-DLEXBOR_BUILD_WITH_ASAN=ON"
19
+ end
14
20
 
15
21
  append_cflags("-DLEXBOR_STATIC")
16
22
  append_cflags("-DLIBXML_STATIC")
@@ -13,6 +13,11 @@
13
13
  #include <ruby.h>
14
14
  #include "lexbor/core/base.h"
15
15
 
16
+ // Disable using ruby memory functions when ASAN is enabled,
17
+ // otherwise memory leak info will be all about ruby which
18
+ // is useless.
19
+ #ifndef NOKOLEXBOR_ASAN
20
+
16
21
  void *
17
22
  lexbor_malloc(size_t size)
18
23
  {
@@ -37,3 +42,5 @@ lexbor_free(void *dst)
37
42
  ruby_xfree(dst);
38
43
  return NULL;
39
44
  }
45
+
46
+ #endif
@@ -22,8 +22,18 @@ const rb_data_type_t nl_document_type = {
22
22
  };
23
23
 
24
24
  static VALUE
25
- nl_document_parse(VALUE self, VALUE rb_html)
25
+ nl_document_parse(VALUE self, VALUE rb_string_or_io)
26
26
  {
27
+ VALUE id_read = rb_intern("read");
28
+ VALUE rb_html;
29
+ if (rb_respond_to(rb_string_or_io, id_read))
30
+ {
31
+ rb_html = rb_funcall(rb_string_or_io, id_read, 0);
32
+ }
33
+ else
34
+ {
35
+ rb_html = rb_string_or_io;
36
+ }
27
37
  const char *html_c = StringValuePtr(rb_html);
28
38
  size_t html_len = RSTRING_LEN(rb_html);
29
39
 
@@ -199,7 +199,7 @@ nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec,
199
199
  return LXB_STATUS_OK;
200
200
  }
201
201
 
202
- void
202
+ lxb_status_t
203
203
  nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
204
204
  {
205
205
  const char *selector_c = StringValuePtr(selector);
@@ -207,37 +207,44 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
207
207
 
208
208
  lxb_dom_node_t *node = nl_rb_node_unwrap(self);
209
209
 
210
+ lxb_status_t status;
211
+ lxb_css_parser_t *parser = NULL;
212
+ lxb_selectors_t *selectors = NULL;
213
+ lxb_css_selector_list_t *list = NULL;
214
+
210
215
  /* Create CSS parser. */
211
- lxb_css_parser_t *parser = lxb_css_parser_create();
212
- lxb_status_t status = lxb_css_parser_init(parser, NULL, NULL);
216
+ parser = lxb_css_parser_create();
217
+ status = lxb_css_parser_init(parser, NULL, NULL);
213
218
  if (status != LXB_STATUS_OK)
214
219
  {
215
- nl_raise_lexbor_error(status);
220
+ goto cleanup;
216
221
  }
217
222
 
218
223
  /* Selectors. */
219
- lxb_selectors_t *selectors = lxb_selectors_create();
224
+ selectors = lxb_selectors_create();
220
225
  status = lxb_selectors_init(selectors);
221
226
  if (status != LXB_STATUS_OK)
222
227
  {
223
- nl_raise_lexbor_error(status);
228
+ goto cleanup;
224
229
  }
225
230
 
226
231
  /* Parse and get the log. */
227
232
  // TODO: Cache the list for reuse, improves performance
228
- lxb_css_selector_list_t *list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
233
+ list = lxb_css_selectors_parse_relative_list(parser, (const lxb_char_t *)selector_c, selector_len);
229
234
  if (parser->status != LXB_STATUS_OK)
230
235
  {
231
- nl_raise_lexbor_error(parser->status);
236
+ status = parser->status;
237
+ goto cleanup;
232
238
  }
233
239
 
234
240
  /* Find HTML nodes by CSS Selectors. */
235
241
  status = lxb_selectors_find(selectors, node, list, cb, ctx);
236
242
  if (status != LXB_STATUS_OK)
237
243
  {
238
- nl_raise_lexbor_error(status);
244
+ goto cleanup;
239
245
  }
240
246
 
247
+ cleanup:
241
248
  /* Destroy Selectors object. */
242
249
  (void)lxb_selectors_destroy(selectors, true);
243
250
 
@@ -246,6 +253,8 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
246
253
 
247
254
  /* Destroy all object for all CSS Selector List. */
248
255
  lxb_css_selector_list_destroy_memory(list);
256
+
257
+ return status;
249
258
  }
250
259
 
251
260
  static void
@@ -311,10 +320,17 @@ nl_node_at_css(VALUE self, VALUE selector)
311
320
  lxb_dom_node_t *node = nl_rb_node_unwrap(self);
312
321
  lexbor_array_t *array = lexbor_array_create();
313
322
 
314
- nl_node_find(self, selector, nl_node_at_css_callback, array);
323
+ lxb_status_t status = nl_node_find(self, selector, nl_node_at_css_callback, array);
324
+
325
+ if (status != LXB_STATUS_OK)
326
+ {
327
+ lexbor_array_destroy(array, true);
328
+ nl_raise_lexbor_error(status);
329
+ }
315
330
 
316
331
  if (array->length == 0)
317
332
  {
333
+ lexbor_array_destroy(array, true);
318
334
  return Qnil;
319
335
  }
320
336
 
@@ -333,7 +349,12 @@ nl_node_css(VALUE self, VALUE selector)
333
349
  lxb_dom_node_t *node = nl_rb_node_unwrap(self);
334
350
  lexbor_array_t *array = lexbor_array_create();
335
351
 
336
- nl_node_find(self, selector, nl_node_css_callback, array);
352
+ lxb_status_t status = nl_node_find(self, selector, nl_node_css_callback, array);
353
+ if (status != LXB_STATUS_OK)
354
+ {
355
+ lexbor_array_destroy(array, true);
356
+ nl_raise_lexbor_error(status);
357
+ }
337
358
 
338
359
  sort_nodes_if_necessary(selector, node->owner_document, array);
339
360
 
@@ -5,7 +5,7 @@ extern VALUE cNokolexborNode;
5
5
  VALUE cNokolexborNodeSet;
6
6
  extern rb_data_type_t nl_document_type;
7
7
 
8
- void nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
8
+ lxb_status_t nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx);
9
9
  void sort_nodes_if_necessary(VALUE selector, lxb_dom_document_t *doc, lexbor_array_t *array);
10
10
  lxb_status_t nl_node_at_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
11
11
  lxb_status_t nl_node_css_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t *spec, void *ctx);
@@ -275,8 +275,8 @@ nl_node_set_union(VALUE self, VALUE other)
275
275
  return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
276
276
  }
277
277
 
278
- static void
279
- nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
278
+ static lxb_status_t
279
+ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
280
280
  {
281
281
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
282
282
  if (doc == NULL)
@@ -319,7 +319,7 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
319
319
  }
320
320
  VALUE rb_frag = nl_rb_node_create(&frag->node, nl_rb_document_get(self));
321
321
 
322
- nl_node_find(rb_frag, selector, cb, ctx);
322
+ lxb_status_t status = nl_node_find(rb_frag, selector, cb, ctx);
323
323
 
324
324
  lxb_dom_document_fragment_interface_destroy(frag);
325
325
  // Restore original node data
@@ -329,6 +329,8 @@ nl_node_set_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void* ctx)
329
329
  free(backup_array->list[i]);
330
330
  }
331
331
  lexbor_array_destroy(backup_array, true);
332
+
333
+ return status;
332
334
  }
333
335
 
334
336
  static VALUE
@@ -337,10 +339,17 @@ nl_node_set_at_css(VALUE self, VALUE selector)
337
339
  lexbor_array_t *array = lexbor_array_create();
338
340
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
339
341
 
340
- nl_node_set_find(self, selector, nl_node_at_css_callback, array);
342
+ lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);
343
+
344
+ if (status != LXB_STATUS_OK)
345
+ {
346
+ lexbor_array_destroy(array, true);
347
+ nl_raise_lexbor_error(status);
348
+ }
341
349
 
342
350
  if (array->length == 0)
343
351
  {
352
+ lexbor_array_destroy(array, true);
344
353
  return Qnil;
345
354
  }
346
355
 
@@ -359,7 +368,12 @@ nl_node_set_css(VALUE self, VALUE selector)
359
368
  lexbor_array_t *array = lexbor_array_create();
360
369
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));
361
370
 
362
- nl_node_set_find(self, selector, nl_node_css_callback, array);
371
+ lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
372
+ if (status != LXB_STATUS_OK)
373
+ {
374
+ lexbor_array_destroy(array, true);
375
+ nl_raise_lexbor_error(status);
376
+ }
363
377
 
364
378
  sort_nodes_if_necessary(selector, doc, array);
365
379
 
@@ -205,6 +205,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
205
205
 
206
206
  if (xpath == NULL)
207
207
  {
208
+ xmlXPathFreeObject(xpath);
208
209
  rb_exc_raise(rb_ary_entry(errors, 0));
209
210
  }
210
211
 
@@ -214,7 +215,7 @@ nl_xpath_context_evaluate(int argc, VALUE *argv, VALUE self)
214
215
  retval = rb_funcall(cNokolexborNodeSet, rb_intern("new"), 1, rb_ary_new());
215
216
  }
216
217
 
217
- xmlXPathFreeNodeSetList(xpath);
218
+ xmlXPathFreeObject(xpath);
218
219
 
219
220
  return retval;
220
221
  }
@@ -244,7 +245,11 @@ nl_xpath_context_new(VALUE klass, VALUE rb_node)
244
245
 
245
246
  void Init_nl_xpath_context(void)
246
247
  {
248
+ #ifndef NOKOLEXBOR_ASAN
247
249
  xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
250
+ #else
251
+ xmlMemSetup((xmlFreeFunc)free, (xmlMallocFunc)malloc, (xmlReallocFunc)realloc, strdup);
252
+ #endif
248
253
 
249
254
  cNokolexborXpathContext = rb_define_class_under(mNokolexbor, "XPathContext", rb_cObject);
250
255
  mNokolexborXpath = rb_define_module_under(mNokolexbor, "XPath");
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.2.4'
4
+ VERSION = '0.2.5'
5
5
  end
@@ -11,3 +11,16 @@ index 884fede..29c1e69 100755
11
11
  element->content->node.ns = LXB_NS_HTML;
12
12
  element->content->host = lxb_dom_interface_element(element);
13
13
 
14
+ diff --git i/source/lexbor/html/serialize.c w/source/lexbor/html/serialize.c
15
+ index df9689d..5e2ff91 100755
16
+ --- i/source/lexbor/html/serialize.c
17
+ +++ w/source/lexbor/html/serialize.c
18
+ @@ -272,7 +272,7 @@ lxb_html_serialize_node_cb(lxb_dom_node_t *node,
19
+ }
20
+ }
21
+
22
+ - skip_it = lxb_html_node_is_void(node);
23
+ + skip_it = lxb_html_node_is_void(node) || node->local_name == LXB_TAG_TEMPLATE;
24
+
25
+ if (skip_it == false && node->first_child != NULL) {
26
+ node = node->first_child;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-19 00:00:00.000000000 Z
11
+ date: 2022-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler