nokolexbor 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/CMakeLists.txt +7 -4
  3. data/ext/nokolexbor/config.h.cmake.in +2 -0
  4. data/ext/nokolexbor/extconf.rb +47 -25
  5. data/ext/nokolexbor/libxml/SAX2.h +4 -4
  6. data/ext/nokolexbor/libxml/chvalid.h +21 -21
  7. data/ext/nokolexbor/libxml/dict.h +13 -13
  8. data/ext/nokolexbor/libxml/globals.h +202 -202
  9. data/ext/nokolexbor/libxml/hash.h +25 -25
  10. data/ext/nokolexbor/libxml/parser.h +5 -5
  11. data/ext/nokolexbor/libxml/parserInternals.h +4 -4
  12. data/ext/nokolexbor/libxml/pattern.h +14 -14
  13. data/ext/nokolexbor/libxml/threads.h +15 -15
  14. data/ext/nokolexbor/libxml/tree.h +5 -5
  15. data/ext/nokolexbor/libxml/xmlerror.h +5 -5
  16. data/ext/nokolexbor/libxml/xmlmemory.h +16 -16
  17. data/ext/nokolexbor/libxml/xmlstring.h +30 -30
  18. data/ext/nokolexbor/libxml/xpath.h +43 -43
  19. data/ext/nokolexbor/libxml/xpathInternals.h +128 -128
  20. data/ext/nokolexbor/memory.c +6 -6
  21. data/ext/nokolexbor/nl_cdata.c +44 -0
  22. data/ext/nokolexbor/nl_comment.c +44 -0
  23. data/ext/nokolexbor/nl_document.c +23 -9
  24. data/ext/nokolexbor/nl_node.c +191 -178
  25. data/ext/nokolexbor/nl_node_set.c +38 -73
  26. data/ext/nokolexbor/nl_text.c +44 -0
  27. data/ext/nokolexbor/nl_xpath_context.c +33 -42
  28. data/ext/nokolexbor/nokolexbor.c +7 -3
  29. data/ext/nokolexbor/nokolexbor.h +9 -7
  30. data/ext/nokolexbor/private/buf.h +1 -1
  31. data/ext/nokolexbor/private/error.h +3 -3
  32. data/ext/nokolexbor/xml_SAX2.c +8 -8
  33. data/ext/nokolexbor/xml_buf.c +19 -19
  34. data/ext/nokolexbor/xml_chvalid.c +25 -25
  35. data/ext/nokolexbor/xml_dict.c +69 -69
  36. data/ext/nokolexbor/xml_encoding.c +2 -2
  37. data/ext/nokolexbor/xml_error.c +51 -51
  38. data/ext/nokolexbor/xml_globals.c +329 -329
  39. data/ext/nokolexbor/xml_hash.c +131 -131
  40. data/ext/nokolexbor/xml_memory.c +25 -25
  41. data/ext/nokolexbor/xml_parser.c +3 -3
  42. data/ext/nokolexbor/xml_parserInternals.c +15 -15
  43. data/ext/nokolexbor/xml_pattern.c +103 -103
  44. data/ext/nokolexbor/xml_string.c +93 -93
  45. data/ext/nokolexbor/xml_threads.c +61 -61
  46. data/ext/nokolexbor/xml_tree.c +12 -12
  47. data/ext/nokolexbor/xml_xpath.c +1194 -1203
  48. data/lib/nokolexbor/document.rb +92 -1
  49. data/lib/nokolexbor/node.rb +64 -0
  50. data/lib/nokolexbor/node_set.rb +6 -5
  51. data/lib/nokolexbor/version.rb +1 -1
  52. data/lib/nokolexbor.rb +21 -1
  53. data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
  54. metadata +7 -4
@@ -1,6 +1,97 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- class Document < Node
4
+ class Document < Nokolexbor::Node
5
+ def create_element(name, *contents_or_attrs, &block)
6
+ elm = Nokolexbor::Element.new(name, self, &block)
7
+ contents_or_attrs.each do |arg|
8
+ case arg
9
+ when Hash
10
+ arg.each do |k, v|
11
+ elm[k.to_s] = v.to_s
12
+ end
13
+ else
14
+ elm.content = arg
15
+ end
16
+ end
17
+ elm
18
+ end
19
+
20
+ # Create a Text Node with +string+
21
+ def create_text_node(string, &block)
22
+ Nokolexbor::Text.new(string.to_s, self, &block)
23
+ end
24
+
25
+ # Create a CDATA Node containing +string+
26
+ def create_cdata(string, &block)
27
+ Nokolexbor::CDATA.new(string.to_s, self, &block)
28
+ end
29
+
30
+ # Create a Comment Node containing +string+
31
+ def create_comment(string, &block)
32
+ Nokolexbor::Comment.new(string.to_s, self, &block)
33
+ end
34
+
35
+ # A reference to +self+
36
+ def document
37
+ self
38
+ end
39
+
40
+ def meta_encoding
41
+ if (meta = at_css("meta[charset]"))
42
+ meta[:charset]
43
+ elsif (meta = meta_content_type)
44
+ meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
45
+ end
46
+ end
47
+
48
+ def meta_encoding=(encoding)
49
+ if (meta = meta_content_type)
50
+ meta["content"] = format("text/html; charset=%s", encoding)
51
+ encoding
52
+ elsif (meta = at_css("meta[charset]"))
53
+ meta["charset"] = encoding
54
+ else
55
+ meta = Nokolexbor::Node.new("meta", self)
56
+ meta["charset"] = encoding
57
+
58
+ if (head = at_css("head"))
59
+ head.prepend_child(meta)
60
+ else
61
+ set_metadata_element(meta)
62
+ end
63
+ encoding
64
+ end
65
+ end
66
+
67
+ def meta_content_type
68
+ xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
69
+ node["http-equiv"] =~ /\AContent-Type\z/i
70
+ end
71
+ end
72
+ private :meta_content_type
73
+
74
+ def set_metadata_element(element)
75
+ if (head = at_css("head"))
76
+ head << element
77
+ elsif (html = at_css("html"))
78
+ head = html.prepend_child(Nokolexbor::Node.new("head", self))
79
+ head.prepend_child(element)
80
+ elsif (first = children.find do |node|
81
+ case node
82
+ when Nokolexbor::Node
83
+ true
84
+ end
85
+ end)
86
+ # We reach here only if the underlying document model
87
+ # allows <html>/<head> elements to be omitted and does not
88
+ # automatically supply them.
89
+ first.add_previous_sibling(element)
90
+ else
91
+ html = add_child(Nokolexbor::Node.new("html", self))
92
+ head = html.add_child(Nokolexbor::Node.new("head", self))
93
+ head.prepend_child(element)
94
+ end
95
+ end
5
96
  end
6
97
  end
@@ -45,6 +45,10 @@ module Nokolexbor
45
45
  type == ELEMENT_NODE
46
46
  end
47
47
 
48
+ def document?
49
+ is_a?(Nokolexbor::Document)
50
+ end
51
+
48
52
  def ancestors(selector = nil)
49
53
  return NodeSet.new(@document) unless respond_to?(:parent)
50
54
  return NodeSet.new(@document) unless parent
@@ -87,6 +91,56 @@ module Nokolexbor
87
91
  self
88
92
  end
89
93
 
94
+ def add_previous_sibling(node_or_tags)
95
+ raise ArgumentError,
96
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
97
+
98
+ add_sibling(:previous, node_or_tags)
99
+ end
100
+
101
+ def add_next_sibling(node_or_tags)
102
+ raise ArgumentError,
103
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
104
+
105
+ add_sibling(:next, node_or_tags)
106
+ end
107
+
108
+ def before(node_or_tags)
109
+ add_previous_sibling(node_or_tags)
110
+ self
111
+ end
112
+
113
+ def after(node_or_tags)
114
+ add_next_sibling(node_or_tags)
115
+ self
116
+ end
117
+
118
+ alias_method :next_sibling, :next
119
+ alias_method :previous_sibling, :previous
120
+ alias_method :next=, :add_next_sibling
121
+ alias_method :previous=, :add_previous_sibling
122
+
123
+ def <<(node_or_tags)
124
+ add_child(node_or_tags)
125
+ self
126
+ end
127
+
128
+ def prepend_child(node)
129
+ if (first = children.first)
130
+ # Mimic the error add_child would raise.
131
+ raise "Document already has a root node" if document? && !(node.comment? || node.processing_instruction?)
132
+
133
+ first.add_sibling(:previous, node)
134
+ else
135
+ add_child(node)
136
+ end
137
+ end
138
+
139
+ def traverse(&block)
140
+ children.each { |j| j.traverse(&block) }
141
+ yield(self)
142
+ end
143
+
90
144
  def matches?(selector)
91
145
  ancestors.last.css(selector).any? { |node| node == self }
92
146
  end
@@ -118,6 +172,10 @@ module Nokolexbor
118
172
  end
119
173
  end
120
174
 
175
+ def parent=(parent_node)
176
+ parent_node.add_child(self)
177
+ end
178
+
121
179
  def each
122
180
  attributes.each do |name, node|
123
181
  yield [name, node.value]
@@ -233,6 +291,12 @@ module Nokolexbor
233
291
  end
234
292
  end
235
293
 
294
+ def write_to(io, *options)
295
+ io.write(to_html(*options))
296
+ end
297
+
298
+ alias_method :write_html_to, :write_to
299
+
236
300
  private
237
301
 
238
302
  def xpath_internal(node, paths, handler, ns, binds)
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- class NodeSet < Node
4
+ class NodeSet < Nokolexbor::Node
5
5
  include Enumerable
6
6
 
7
7
  def self.new(document, list = [])
@@ -54,16 +54,17 @@ module Nokolexbor
54
54
  alias_method :inner_text, :content
55
55
  alias_method :to_str, :content
56
56
 
57
- def inner_html
58
- self.map(&:inner_html).join
57
+ def inner_html(*args)
58
+ self.map { |n| n.inner_html(*args) }.join
59
59
  end
60
60
 
61
- def outer_html
62
- self.map(&:outer_html).join
61
+ def outer_html(*args)
62
+ self.map { |n| n.outer_html(*args) }.join
63
63
  end
64
64
 
65
65
  alias_method :to_s, :outer_html
66
66
  alias_method :to_html, :outer_html
67
+ alias_method :serialize, :outer_html
67
68
 
68
69
  def remove
69
70
  self.each(&:remove)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.2.5'
4
+ VERSION = '0.3.1'
5
5
  end
data/lib/nokolexbor.rb CHANGED
@@ -1,6 +1,26 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokolexbor/nokolexbor'
3
+ begin
4
+ # pre-compiled extension by rake-compiler is located inside lib/nokolexbor/<ruby_version>/
5
+ RUBY_VERSION =~ /(\d+\.\d+)/
6
+ require "nokolexbor/#{Regexp.last_match(1)}/nokolexbor"
7
+ rescue LoadError => e
8
+ if /GLIBC/.match?(e.message)
9
+ warn(<<~EOM)
10
+ ERROR: It looks like you're trying to use Nokolexbor as a precompiled native gem on a system
11
+ with an unsupported version of glibc.
12
+ #{e.message}
13
+ If that's the case, then please install Nokolexbor via the `ruby` platform gem:
14
+ gem install nokolexbor --platform=ruby
15
+ or:
16
+ bundle config set force_ruby_platform true
17
+ EOM
18
+ raise e
19
+ end
20
+
21
+ require 'nokolexbor/nokolexbor'
22
+ end
23
+
4
24
  require 'nokolexbor/version'
5
25
  require 'nokolexbor/node'
6
26
  require 'nokolexbor/document'
@@ -31,7 +31,7 @@ index 398d1bb..b35bfab 100644
31
31
  {"after", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_AFTER], 5, 0},
32
32
  {"first-letter", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LETTER], 12, 0},
33
33
  - {NULL, NULL, 0, 0},
34
- + {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
34
+ + {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
35
35
  {NULL, NULL, 0, 0},
36
36
  {"grammar-error", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_GRAMMAR_ERROR], 13, 0},
37
37
  {"before", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BEFORE], 6, 0},
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-20 00:00:00.000000000 Z
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '5.0'
41
- description: Nokolexbor is a high performance HTML5 parser, with support for both
41
+ description: Nokolexbor is a high-performance HTML5 parser, with support for both
42
42
  CSS selectors and XPath. It's API is designed to be compatible with Nokogiri.
43
43
  email: zyc9012@gmail.com
44
44
  executables: []
@@ -79,9 +79,12 @@ files:
79
79
  - ext/nokolexbor/libxml/xpathInternals.h
80
80
  - ext/nokolexbor/libxml/xpointer.h
81
81
  - ext/nokolexbor/memory.c
82
+ - ext/nokolexbor/nl_cdata.c
83
+ - ext/nokolexbor/nl_comment.c
82
84
  - ext/nokolexbor/nl_document.c
83
85
  - ext/nokolexbor/nl_node.c
84
86
  - ext/nokolexbor/nl_node_set.c
87
+ - ext/nokolexbor/nl_text.c
85
88
  - ext/nokolexbor/nl_xpath_context.c
86
89
  - ext/nokolexbor/nokolexbor.c
87
90
  - ext/nokolexbor/nokolexbor.h
@@ -555,5 +558,5 @@ requirements: []
555
558
  rubygems_version: 3.0.3.1
556
559
  signing_key:
557
560
  specification_version: 4
558
- summary: High performance HTML5 parser, with support for both CSS selectors and XPath.
561
+ summary: High-performance HTML5 parser, with support for both CSS selectors and XPath.
559
562
  test_files: []