nokolexbor 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5095dc8ed38a170bab48cc78e7a7e4475657b3066cb114a46eab1d9f4ed25c36
4
- data.tar.gz: c2a00a4ebc644dda81e0b010166063a5ad975ad7e2410739c69af2246cce560f
3
+ metadata.gz: 3615acd25f6233c5701c91d25ef6c94a73adf2de8433e2e1b6b3aa215f0581ab
4
+ data.tar.gz: ad3e9f8134a7f26924b2283a2ac10a8dfae2f5684dd650be5903faf069f595ef
5
5
  SHA512:
6
- metadata.gz: 07df8df17e08dbe9293b6f0e1f878ee70d787f3370c33a00bcf0ca71dee007a708ecf4a7b069039cb05d95e93cd741654a3bdb8a11833804f2b9f63dd5d9fb4b
7
- data.tar.gz: 31addbc3877af1b1d814cb720c17e8130d98280d35831794e135e259b790de0c719236004c202649092366323a7e82a9175003be0116ac0267639d15fd186081
6
+ metadata.gz: 2d84f341a6b3e1c4dcc8501ab9772e4376bc8c9acf2441753253c331ca381ff161c95cbb93f32568aada6ac03e3bedb01343bbc507d5201c468884f286008228
7
+ data.tar.gz: f8a85acb4fdd063714316490289bae356a731931aa8c4202a80f677b70d2f02258f69991e0d996cfaadf80dbcad8c505c61b4a6a91456514bbc972e0d7861148
@@ -23,12 +23,16 @@ nl_attribute_new(int argc, VALUE *argv, VALUE klass)
23
23
 
24
24
  rb_scan_args(argc, argv, "2*", &rb_document, &rb_name, &rest);
25
25
 
26
- if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
27
- rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
26
+ if (rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
27
+ document = nl_rb_document_unwrap(rb_document);
28
+ } else if (rb_obj_is_kind_of(rb_document, cNokolexborNode)) {
29
+ lxb_dom_node_t *node = nl_rb_node_unwrap(rb_document);
30
+ document = node->owner_document;
31
+ rb_document = nl_rb_document_get(rb_document);
32
+ } else {
33
+ rb_raise(rb_eArgError, "Expected a Document or Node, got %s", rb_class2name(CLASS_OF(rb_document)));
28
34
  }
29
35
 
30
- document = nl_rb_document_unwrap(rb_document);
31
-
32
36
  const char *c_name = StringValuePtr(rb_name);
33
37
  size_t name_len = RSTRING_LEN(rb_name);
34
38
  lxb_dom_attr_t *attr = lxb_dom_attr_interface_create(document);
@@ -1,6 +1,7 @@
1
1
  #include "nokolexbor.h"
2
2
 
3
3
  VALUE cNokolexborCData;
4
+ extern VALUE cNokolexborNode;
4
5
  extern VALUE cNokolexborText;
5
6
  extern VALUE mNokolexbor;
6
7
 
@@ -22,12 +23,16 @@ nl_cdata_new(int argc, VALUE *argv, VALUE klass)
22
23
 
23
24
  rb_scan_args(argc, argv, "2*", &rb_content, &rb_document, &rest);
24
25
 
25
- if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
26
- rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
26
+ if (rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
27
+ document = nl_rb_document_unwrap(rb_document);
28
+ } else if (rb_obj_is_kind_of(rb_document, cNokolexborNode)) {
29
+ lxb_dom_node_t *node = nl_rb_node_unwrap(rb_document);
30
+ document = node->owner_document;
31
+ rb_document = nl_rb_document_get(rb_document);
32
+ } else {
33
+ rb_raise(rb_eArgError, "Expected a Document or Node, got %s", rb_class2name(CLASS_OF(rb_document)));
27
34
  }
28
35
 
29
- document = nl_rb_document_unwrap(rb_document);
30
-
31
36
  const char *c_content = StringValuePtr(rb_content);
32
37
  size_t content_len = RSTRING_LEN(rb_content);
33
38
  lxb_dom_cdata_section_t *element = lxb_dom_document_create_cdata_section(document, (const lxb_char_t *)c_content, content_len);
@@ -1,6 +1,7 @@
1
1
  #include "nokolexbor.h"
2
2
 
3
3
  VALUE cNokolexborComment;
4
+ extern VALUE cNokolexborNode;
4
5
  extern VALUE cNokolexborCharacterData;
5
6
  extern VALUE mNokolexbor;
6
7
 
@@ -20,12 +21,16 @@ nl_comment_new(int argc, VALUE *argv, VALUE klass)
20
21
 
21
22
  rb_scan_args(argc, argv, "2*", &rb_content, &rb_document, &rest);
22
23
 
23
- if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
24
- rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
24
+ if (rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
25
+ document = nl_rb_document_unwrap(rb_document);
26
+ } else if (rb_obj_is_kind_of(rb_document, cNokolexborNode)) {
27
+ lxb_dom_node_t *node = nl_rb_node_unwrap(rb_document);
28
+ document = node->owner_document;
29
+ rb_document = nl_rb_document_get(rb_document);
30
+ } else {
31
+ rb_raise(rb_eArgError, "Expected a Document or Node, got %s", rb_class2name(CLASS_OF(rb_document)));
25
32
  }
26
33
 
27
- document = nl_rb_document_unwrap(rb_document);
28
-
29
34
  const char *c_content = StringValuePtr(rb_content);
30
35
  size_t content_len = RSTRING_LEN(rb_content);
31
36
  lxb_dom_comment_t *element = lxb_dom_document_create_comment(document, (const lxb_char_t *)c_content, content_len);
@@ -5,6 +5,11 @@ extern VALUE mNokolexbor;
5
5
  extern VALUE cNokolexborNode;
6
6
  VALUE cNokolexborDocument;
7
7
 
8
+ #ifdef HAVE_PTHREAD_H
9
+ #include <pthread.h>
10
+ pthread_key_t p_key_html_parser;
11
+ #endif
12
+
8
13
  static void
9
14
  free_nl_document(lxb_html_document_t *document)
10
15
  {
@@ -28,7 +33,11 @@ nl_document_parse_native(VALUE self, VALUE rb_html)
28
33
  const char *html_c = StringValuePtr(rb_html);
29
34
  size_t html_len = RSTRING_LEN(rb_html);
30
35
 
31
- static lxb_html_parser_t *html_parser = NULL;
36
+ #ifdef HAVE_PTHREAD_H
37
+ lxb_html_parser_t *html_parser = (lxb_html_parser_t *)pthread_getspecific(p_key_html_parser);
38
+ #else
39
+ lxb_html_parser_t *html_parser = NULL;
40
+ #endif
32
41
  if (html_parser == NULL) {
33
42
  html_parser = lxb_html_parser_create();
34
43
  lxb_status_t status = lxb_html_parser_init(html_parser);
@@ -38,10 +47,17 @@ nl_document_parse_native(VALUE self, VALUE rb_html)
38
47
  nl_raise_lexbor_error(status);
39
48
  }
40
49
  html_parser->tree->scripting = true;
50
+ #ifdef HAVE_PTHREAD_H
51
+ pthread_setspecific(p_key_html_parser, html_parser);
52
+ #endif
41
53
  }
42
54
 
43
55
  lxb_html_document_t *document = lxb_html_parse(html_parser, (const lxb_char_t *)html_c, html_len);
44
56
 
57
+ #ifndef HAVE_PTHREAD_H
58
+ lxb_html_parser_destroy(html_parser);
59
+ #endif
60
+
45
61
  if (document == NULL) {
46
62
  rb_raise(rb_eRuntimeError, "Error parsing document");
47
63
  }
@@ -115,8 +131,21 @@ nl_document_root(VALUE self)
115
131
  return nl_rb_node_create(lxb_dom_document_root(doc), self);
116
132
  }
117
133
 
134
+ static void
135
+ free_html_parser(void *data)
136
+ {
137
+ lxb_html_parser_t *html_parser = (lxb_html_parser_t *)data;
138
+ if (html_parser != NULL) {
139
+ html_parser = lxb_html_parser_destroy(html_parser);
140
+ }
141
+ }
142
+
118
143
  void Init_nl_document(void)
119
144
  {
145
+ #ifdef HAVE_PTHREAD_H
146
+ pthread_key_create(&p_key_html_parser, free_html_parser);
147
+ #endif
148
+
120
149
  cNokolexborDocument = rb_define_class_under(mNokolexbor, "Document", cNokolexborNode);
121
150
  rb_define_singleton_method(cNokolexborDocument, "new", nl_document_new, 0);
122
151
  rb_define_singleton_method(cNokolexborDocument, "parse_native", nl_document_parse_native, 1);
@@ -22,12 +22,16 @@ nl_document_fragment_new(int argc, VALUE *argv, VALUE klass)
22
22
 
23
23
  rb_scan_args(argc, argv, "1*", &rb_document, &rest);
24
24
 
25
- if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
26
- rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
25
+ if (rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
26
+ document = nl_rb_document_unwrap(rb_document);
27
+ } else if (rb_obj_is_kind_of(rb_document, cNokolexborNode)) {
28
+ lxb_dom_node_t *node = nl_rb_node_unwrap(rb_document);
29
+ document = node->owner_document;
30
+ rb_document = nl_rb_document_get(rb_document);
31
+ } else {
32
+ rb_raise(rb_eArgError, "Expected a Document or Node, got %s", rb_class2name(CLASS_OF(rb_document)));
27
33
  }
28
34
 
29
- document = nl_rb_document_unwrap(rb_document);
30
-
31
35
  lxb_dom_document_fragment_t *node = lxb_dom_document_create_document_fragment(document);
32
36
  if (node == NULL) {
33
37
  rb_raise(rb_eRuntimeError, "Error creating document fragment");
@@ -1,6 +1,13 @@
1
1
  #include "nokolexbor.h"
2
+ #include "config.h"
2
3
  #include "libxml/tree.h"
3
4
 
5
+ #ifdef HAVE_PTHREAD_H
6
+ #include <pthread.h>
7
+ pthread_key_t p_key_css_parser;
8
+ pthread_key_t p_key_selectors;
9
+ #endif
10
+
4
11
  #define SORT_NAME nl_css_result
5
12
  #define SORT_TYPE lxb_dom_node_t *
6
13
  #define SORT_CMP(x, y) (x->user >= y->user ? (x->user == y->user ? 0 : 1) : -1)
@@ -113,12 +120,16 @@ nl_node_new(int argc, VALUE *argv, VALUE klass)
113
120
 
114
121
  rb_scan_args(argc, argv, "2*", &rb_name, &rb_document, &rest);
115
122
 
116
- if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
117
- rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
123
+ if (rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
124
+ document = nl_rb_document_unwrap(rb_document);
125
+ } else if (rb_obj_is_kind_of(rb_document, cNokolexborNode)) {
126
+ lxb_dom_node_t *node = nl_rb_node_unwrap(rb_document);
127
+ document = node->owner_document;
128
+ rb_document = nl_rb_document_get(rb_document);
129
+ } else {
130
+ rb_raise(rb_eArgError, "Expected a Document or Node, got %s", rb_class2name(CLASS_OF(rb_document)));
118
131
  }
119
132
 
120
- document = nl_rb_document_unwrap(rb_document);
121
-
122
133
  const char *c_name = StringValuePtr(rb_name);
123
134
  size_t name_len = RSTRING_LEN(rb_name);
124
135
  lxb_dom_element_t *element = lxb_dom_document_create_element(document, (const lxb_char_t *)c_name, name_len, NULL);
@@ -366,8 +377,13 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
366
377
  lxb_dom_node_t *node = nl_rb_node_unwrap(self);
367
378
 
368
379
  lxb_status_t status;
369
- static lxb_css_parser_t *css_parser = NULL;
370
- static lxb_selectors_t *selectors = NULL;
380
+ #ifdef HAVE_PTHREAD_H
381
+ lxb_css_parser_t *css_parser = (lxb_css_parser_t *)pthread_getspecific(p_key_css_parser);
382
+ lxb_selectors_t *selectors = (lxb_selectors_t *)pthread_getspecific(p_key_selectors);
383
+ #else
384
+ lxb_css_parser_t *css_parser = NULL;
385
+ lxb_selectors_t *selectors = NULL;
386
+ #endif
371
387
  lxb_css_selector_list_t *list = NULL;
372
388
 
373
389
  /* CSS parser. */
@@ -377,6 +393,9 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
377
393
  if (status != LXB_STATUS_OK) {
378
394
  goto init_error;
379
395
  }
396
+ #ifdef HAVE_PTHREAD_H
397
+ pthread_setspecific(p_key_css_parser, css_parser);
398
+ #endif
380
399
  }
381
400
 
382
401
  /* Selectors. */
@@ -386,6 +405,9 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
386
405
  if (status != LXB_STATUS_OK) {
387
406
  goto init_error;
388
407
  }
408
+ #ifdef HAVE_PTHREAD_H
409
+ pthread_setspecific(p_key_selectors, selectors);
410
+ #endif
389
411
  }
390
412
 
391
413
  /* Parse and get the log. */
@@ -397,11 +419,16 @@ nl_node_find(VALUE self, VALUE selector, lxb_selectors_cb_f cb, void *ctx)
397
419
 
398
420
  /* Find HTML nodes by CSS Selectors. */
399
421
  status = lxb_selectors_find(selectors, node, list, cb, ctx);
400
- if (status != LXB_STATUS_OK) {
401
- goto cleanup;
402
- }
403
422
 
404
423
  cleanup:
424
+ #ifndef HAVE_PTHREAD_H
425
+ /* Destroy Selectors object. */
426
+ (void)lxb_selectors_destroy(selectors, true);
427
+
428
+ /* Destroy resources for CSS Parser. */
429
+ (void)lxb_css_parser_destroy(css_parser, true);
430
+ #endif
431
+
405
432
  /* Destroy all object for all CSS Selector List. */
406
433
  lxb_css_selector_list_destroy_memory(list);
407
434
 
@@ -416,8 +443,10 @@ init_error:
416
443
  lxb_css_parser_destroy(css_parser, true);
417
444
  css_parser = NULL;
418
445
 
419
- /* Destroy all object for all CSS Selector List. */
420
- lxb_css_selector_list_destroy_memory(list);
446
+ #ifdef HAVE_PTHREAD_H
447
+ pthread_setspecific(p_key_css_parser, NULL);
448
+ pthread_setspecific(p_key_selectors, NULL);
449
+ #endif
421
450
 
422
451
  return status;
423
452
  }
@@ -1173,8 +1202,30 @@ nl_node_path(VALUE self)
1173
1202
  return ret;
1174
1203
  }
1175
1204
 
1205
+ static void
1206
+ free_css_parser(void *data)
1207
+ {
1208
+ lxb_css_parser_t *css_parser = (lxb_css_parser_t *)data;
1209
+ if (css_parser != NULL) {
1210
+ lxb_css_parser_destroy(css_parser, true);
1211
+ }
1212
+ }
1213
+
1214
+ static void
1215
+ free_selectors(void *data)
1216
+ {
1217
+ lxb_selectors_t *selectors = (lxb_selectors_t *)data;
1218
+ if (selectors != NULL) {
1219
+ lxb_selectors_destroy(selectors, true);
1220
+ }
1221
+ }
1222
+
1176
1223
  void Init_nl_node(void)
1177
1224
  {
1225
+ #ifdef HAVE_PTHREAD_H
1226
+ pthread_key_create(&p_key_css_parser, free_css_parser);
1227
+ pthread_key_create(&p_key_selectors, free_selectors);
1228
+ #endif
1178
1229
  cNokolexborNode = rb_define_class_under(mNokolexbor, "Node", rb_cObject);
1179
1230
  rb_undef_alloc_func(cNokolexborNode);
1180
1231
 
@@ -21,12 +21,16 @@ nl_processing_instruction_new(int argc, VALUE *argv, VALUE klass)
21
21
 
22
22
  rb_scan_args(argc, argv, "3*", &rb_name, &rb_content, &rb_document, &rest);
23
23
 
24
- if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
25
- rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
24
+ if (rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
25
+ document = nl_rb_document_unwrap(rb_document);
26
+ } else if (rb_obj_is_kind_of(rb_document, cNokolexborNode)) {
27
+ lxb_dom_node_t *node = nl_rb_node_unwrap(rb_document);
28
+ document = node->owner_document;
29
+ rb_document = nl_rb_document_get(rb_document);
30
+ } else {
31
+ rb_raise(rb_eArgError, "Expected a Document or Node, got %s", rb_class2name(CLASS_OF(rb_document)));
26
32
  }
27
33
 
28
- document = nl_rb_document_unwrap(rb_document);
29
-
30
34
  const char *c_name = StringValuePtr(rb_name);
31
35
  size_t name_len = RSTRING_LEN(rb_name);
32
36
  const char *c_content = StringValuePtr(rb_content);
@@ -1,6 +1,7 @@
1
1
  #include "nokolexbor.h"
2
2
 
3
3
  VALUE cNokolexborText;
4
+ extern VALUE cNokolexborNode;
4
5
  extern VALUE cNokolexborCharacterData;
5
6
  extern VALUE mNokolexbor;
6
7
 
@@ -20,12 +21,16 @@ nl_text_new(int argc, VALUE *argv, VALUE klass)
20
21
 
21
22
  rb_scan_args(argc, argv, "2*", &rb_text, &rb_document, &rest);
22
23
 
23
- if (!rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
24
- rb_raise(rb_eArgError, "Document must be a Nokolexbor::Document");
24
+ if (rb_obj_is_kind_of(rb_document, cNokolexborDocument)) {
25
+ document = nl_rb_document_unwrap(rb_document);
26
+ } else if (rb_obj_is_kind_of(rb_document, cNokolexborNode)) {
27
+ lxb_dom_node_t *node = nl_rb_node_unwrap(rb_document);
28
+ document = node->owner_document;
29
+ rb_document = nl_rb_document_get(rb_document);
30
+ } else {
31
+ rb_raise(rb_eArgError, "Expected a Document or Node, got %s", rb_class2name(CLASS_OF(rb_document)));
25
32
  }
26
33
 
27
- document = nl_rb_document_unwrap(rb_document);
28
-
29
34
  const char *c_text = StringValuePtr(rb_text);
30
35
  size_t text_len = RSTRING_LEN(rb_text);
31
36
  lxb_dom_text_t *element = lxb_dom_document_create_text_node(document, (const lxb_char_t *)c_text, text_len);
@@ -11,7 +11,13 @@ module Nokolexbor
11
11
  # @return [Document]
12
12
  def self.new(document, list = [])
13
13
  obj = allocate
14
- obj.instance_variable_set(:@document, document)
14
+ if document.is_a?(Document) || document.nil?
15
+ obj.instance_variable_set(:@document, document)
16
+ elsif document.is_a?(Node)
17
+ obj.instance_variable_set(:@document, document.document)
18
+ else
19
+ raise ArgumentError, "Expected a Document or Node, got #{document.class}"
20
+ end
15
21
  list.each { |x| obj << x }
16
22
  yield obj if block_given?
17
23
  obj
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.6.2'
4
+ VERSION = '0.6.4'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-05-13 00:00:00.000000000 Z
11
+ date: 2026-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -538,7 +538,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
538
538
  - !ruby/object:Gem::Version
539
539
  version: '0'
540
540
  requirements: []
541
- rubygems_version: 3.0.3.1
541
+ rubygems_version: 3.1.6
542
542
  signing_key:
543
543
  specification_version: 4
544
544
  summary: High-performance HTML5 parser, with support for both CSS selectors and XPath.