nokolexbor 0.2.5 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/ext/nokolexbor/CMakeLists.txt +7 -4
  3. data/ext/nokolexbor/config.h.cmake.in +2 -0
  4. data/ext/nokolexbor/extconf.rb +47 -25
  5. data/ext/nokolexbor/libxml/SAX2.h +4 -4
  6. data/ext/nokolexbor/libxml/chvalid.h +21 -21
  7. data/ext/nokolexbor/libxml/dict.h +13 -13
  8. data/ext/nokolexbor/libxml/globals.h +202 -202
  9. data/ext/nokolexbor/libxml/hash.h +25 -25
  10. data/ext/nokolexbor/libxml/parser.h +5 -5
  11. data/ext/nokolexbor/libxml/parserInternals.h +4 -4
  12. data/ext/nokolexbor/libxml/pattern.h +14 -14
  13. data/ext/nokolexbor/libxml/threads.h +15 -15
  14. data/ext/nokolexbor/libxml/tree.h +5 -5
  15. data/ext/nokolexbor/libxml/xmlerror.h +5 -5
  16. data/ext/nokolexbor/libxml/xmlmemory.h +16 -16
  17. data/ext/nokolexbor/libxml/xmlstring.h +30 -30
  18. data/ext/nokolexbor/libxml/xpath.h +43 -43
  19. data/ext/nokolexbor/libxml/xpathInternals.h +128 -128
  20. data/ext/nokolexbor/memory.c +6 -6
  21. data/ext/nokolexbor/nl_cdata.c +44 -0
  22. data/ext/nokolexbor/nl_comment.c +44 -0
  23. data/ext/nokolexbor/nl_document.c +23 -9
  24. data/ext/nokolexbor/nl_node.c +191 -178
  25. data/ext/nokolexbor/nl_node_set.c +38 -73
  26. data/ext/nokolexbor/nl_text.c +44 -0
  27. data/ext/nokolexbor/nl_xpath_context.c +33 -42
  28. data/ext/nokolexbor/nokolexbor.c +7 -3
  29. data/ext/nokolexbor/nokolexbor.h +9 -7
  30. data/ext/nokolexbor/private/buf.h +1 -1
  31. data/ext/nokolexbor/private/error.h +3 -3
  32. data/ext/nokolexbor/xml_SAX2.c +8 -8
  33. data/ext/nokolexbor/xml_buf.c +19 -19
  34. data/ext/nokolexbor/xml_chvalid.c +25 -25
  35. data/ext/nokolexbor/xml_dict.c +69 -69
  36. data/ext/nokolexbor/xml_encoding.c +2 -2
  37. data/ext/nokolexbor/xml_error.c +51 -51
  38. data/ext/nokolexbor/xml_globals.c +329 -329
  39. data/ext/nokolexbor/xml_hash.c +131 -131
  40. data/ext/nokolexbor/xml_memory.c +25 -25
  41. data/ext/nokolexbor/xml_parser.c +3 -3
  42. data/ext/nokolexbor/xml_parserInternals.c +15 -15
  43. data/ext/nokolexbor/xml_pattern.c +103 -103
  44. data/ext/nokolexbor/xml_string.c +93 -93
  45. data/ext/nokolexbor/xml_threads.c +61 -61
  46. data/ext/nokolexbor/xml_tree.c +12 -12
  47. data/ext/nokolexbor/xml_xpath.c +1194 -1203
  48. data/lib/nokolexbor/document.rb +92 -1
  49. data/lib/nokolexbor/node.rb +64 -0
  50. data/lib/nokolexbor/node_set.rb +6 -5
  51. data/lib/nokolexbor/version.rb +1 -1
  52. data/lib/nokolexbor.rb +21 -1
  53. data/patches/0001-lexbor-support-text-pseudo-element.patch +1 -1
  54. metadata +7 -4
@@ -1,6 +1,97 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- class Document < Node
4
+ class Document < Nokolexbor::Node
5
+ def create_element(name, *contents_or_attrs, &block)
6
+ elm = Nokolexbor::Element.new(name, self, &block)
7
+ contents_or_attrs.each do |arg|
8
+ case arg
9
+ when Hash
10
+ arg.each do |k, v|
11
+ elm[k.to_s] = v.to_s
12
+ end
13
+ else
14
+ elm.content = arg
15
+ end
16
+ end
17
+ elm
18
+ end
19
+
20
+ # Create a Text Node with +string+
21
+ def create_text_node(string, &block)
22
+ Nokolexbor::Text.new(string.to_s, self, &block)
23
+ end
24
+
25
+ # Create a CDATA Node containing +string+
26
+ def create_cdata(string, &block)
27
+ Nokolexbor::CDATA.new(string.to_s, self, &block)
28
+ end
29
+
30
+ # Create a Comment Node containing +string+
31
+ def create_comment(string, &block)
32
+ Nokolexbor::Comment.new(string.to_s, self, &block)
33
+ end
34
+
35
+ # A reference to +self+
36
+ def document
37
+ self
38
+ end
39
+
40
+ def meta_encoding
41
+ if (meta = at_css("meta[charset]"))
42
+ meta[:charset]
43
+ elsif (meta = meta_content_type)
44
+ meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
45
+ end
46
+ end
47
+
48
+ def meta_encoding=(encoding)
49
+ if (meta = meta_content_type)
50
+ meta["content"] = format("text/html; charset=%s", encoding)
51
+ encoding
52
+ elsif (meta = at_css("meta[charset]"))
53
+ meta["charset"] = encoding
54
+ else
55
+ meta = Nokolexbor::Node.new("meta", self)
56
+ meta["charset"] = encoding
57
+
58
+ if (head = at_css("head"))
59
+ head.prepend_child(meta)
60
+ else
61
+ set_metadata_element(meta)
62
+ end
63
+ encoding
64
+ end
65
+ end
66
+
67
+ def meta_content_type
68
+ xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
69
+ node["http-equiv"] =~ /\AContent-Type\z/i
70
+ end
71
+ end
72
+ private :meta_content_type
73
+
74
+ def set_metadata_element(element)
75
+ if (head = at_css("head"))
76
+ head << element
77
+ elsif (html = at_css("html"))
78
+ head = html.prepend_child(Nokolexbor::Node.new("head", self))
79
+ head.prepend_child(element)
80
+ elsif (first = children.find do |node|
81
+ case node
82
+ when Nokolexbor::Node
83
+ true
84
+ end
85
+ end)
86
+ # We reach here only if the underlying document model
87
+ # allows <html>/<head> elements to be omitted and does not
88
+ # automatically supply them.
89
+ first.add_previous_sibling(element)
90
+ else
91
+ html = add_child(Nokolexbor::Node.new("html", self))
92
+ head = html.add_child(Nokolexbor::Node.new("head", self))
93
+ head.prepend_child(element)
94
+ end
95
+ end
5
96
  end
6
97
  end
@@ -45,6 +45,10 @@ module Nokolexbor
45
45
  type == ELEMENT_NODE
46
46
  end
47
47
 
48
+ def document?
49
+ is_a?(Nokolexbor::Document)
50
+ end
51
+
48
52
  def ancestors(selector = nil)
49
53
  return NodeSet.new(@document) unless respond_to?(:parent)
50
54
  return NodeSet.new(@document) unless parent
@@ -87,6 +91,56 @@ module Nokolexbor
87
91
  self
88
92
  end
89
93
 
94
+ def add_previous_sibling(node_or_tags)
95
+ raise ArgumentError,
96
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
97
+
98
+ add_sibling(:previous, node_or_tags)
99
+ end
100
+
101
+ def add_next_sibling(node_or_tags)
102
+ raise ArgumentError,
103
+ "A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
104
+
105
+ add_sibling(:next, node_or_tags)
106
+ end
107
+
108
+ def before(node_or_tags)
109
+ add_previous_sibling(node_or_tags)
110
+ self
111
+ end
112
+
113
+ def after(node_or_tags)
114
+ add_next_sibling(node_or_tags)
115
+ self
116
+ end
117
+
118
+ alias_method :next_sibling, :next
119
+ alias_method :previous_sibling, :previous
120
+ alias_method :next=, :add_next_sibling
121
+ alias_method :previous=, :add_previous_sibling
122
+
123
+ def <<(node_or_tags)
124
+ add_child(node_or_tags)
125
+ self
126
+ end
127
+
128
+ def prepend_child(node)
129
+ if (first = children.first)
130
+ # Mimic the error add_child would raise.
131
+ raise "Document already has a root node" if document? && !(node.comment? || node.processing_instruction?)
132
+
133
+ first.add_sibling(:previous, node)
134
+ else
135
+ add_child(node)
136
+ end
137
+ end
138
+
139
+ def traverse(&block)
140
+ children.each { |j| j.traverse(&block) }
141
+ yield(self)
142
+ end
143
+
90
144
  def matches?(selector)
91
145
  ancestors.last.css(selector).any? { |node| node == self }
92
146
  end
@@ -118,6 +172,10 @@ module Nokolexbor
118
172
  end
119
173
  end
120
174
 
175
+ def parent=(parent_node)
176
+ parent_node.add_child(self)
177
+ end
178
+
121
179
  def each
122
180
  attributes.each do |name, node|
123
181
  yield [name, node.value]
@@ -233,6 +291,12 @@ module Nokolexbor
233
291
  end
234
292
  end
235
293
 
294
+ def write_to(io, *options)
295
+ io.write(to_html(*options))
296
+ end
297
+
298
+ alias_method :write_html_to, :write_to
299
+
236
300
  private
237
301
 
238
302
  def xpath_internal(node, paths, handler, ns, binds)
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- class NodeSet < Node
4
+ class NodeSet < Nokolexbor::Node
5
5
  include Enumerable
6
6
 
7
7
  def self.new(document, list = [])
@@ -54,16 +54,17 @@ module Nokolexbor
54
54
  alias_method :inner_text, :content
55
55
  alias_method :to_str, :content
56
56
 
57
- def inner_html
58
- self.map(&:inner_html).join
57
+ def inner_html(*args)
58
+ self.map { |n| n.inner_html(*args) }.join
59
59
  end
60
60
 
61
- def outer_html
62
- self.map(&:outer_html).join
61
+ def outer_html(*args)
62
+ self.map { |n| n.outer_html(*args) }.join
63
63
  end
64
64
 
65
65
  alias_method :to_s, :outer_html
66
66
  alias_method :to_html, :outer_html
67
+ alias_method :serialize, :outer_html
67
68
 
68
69
  def remove
69
70
  self.each(&:remove)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Nokolexbor
4
- VERSION = '0.2.5'
4
+ VERSION = '0.3.1'
5
5
  end
data/lib/nokolexbor.rb CHANGED
@@ -1,6 +1,26 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokolexbor/nokolexbor'
3
+ begin
4
+ # pre-compiled extension by rake-compiler is located inside lib/nokolexbor/<ruby_version>/
5
+ RUBY_VERSION =~ /(\d+\.\d+)/
6
+ require "nokolexbor/#{Regexp.last_match(1)}/nokolexbor"
7
+ rescue LoadError => e
8
+ if /GLIBC/.match?(e.message)
9
+ warn(<<~EOM)
10
+ ERROR: It looks like you're trying to use Nokolexbor as a precompiled native gem on a system
11
+ with an unsupported version of glibc.
12
+ #{e.message}
13
+ If that's the case, then please install Nokolexbor via the `ruby` platform gem:
14
+ gem install nokolexbor --platform=ruby
15
+ or:
16
+ bundle config set force_ruby_platform true
17
+ EOM
18
+ raise e
19
+ end
20
+
21
+ require 'nokolexbor/nokolexbor'
22
+ end
23
+
4
24
  require 'nokolexbor/version'
5
25
  require 'nokolexbor/node'
6
26
  require 'nokolexbor/document'
@@ -31,7 +31,7 @@ index 398d1bb..b35bfab 100644
31
31
  {"after", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_AFTER], 5, 0},
32
32
  {"first-letter", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LETTER], 12, 0},
33
33
  - {NULL, NULL, 0, 0},
34
- + {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
34
+ + {"text", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TEXT], 4, 0},
35
35
  {NULL, NULL, 0, 0},
36
36
  {"grammar-error", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_GRAMMAR_ERROR], 13, 0},
37
37
  {"before", (void *) &lxb_css_selectors_pseudo_data_pseudo_element[LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BEFORE], 6, 0},
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokolexbor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yicheng Zhou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-20 00:00:00.000000000 Z
11
+ date: 2022-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -38,7 +38,7 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '5.0'
41
- description: Nokolexbor is a high performance HTML5 parser, with support for both
41
+ description: Nokolexbor is a high-performance HTML5 parser, with support for both
42
42
  CSS selectors and XPath. It's API is designed to be compatible with Nokogiri.
43
43
  email: zyc9012@gmail.com
44
44
  executables: []
@@ -79,9 +79,12 @@ files:
79
79
  - ext/nokolexbor/libxml/xpathInternals.h
80
80
  - ext/nokolexbor/libxml/xpointer.h
81
81
  - ext/nokolexbor/memory.c
82
+ - ext/nokolexbor/nl_cdata.c
83
+ - ext/nokolexbor/nl_comment.c
82
84
  - ext/nokolexbor/nl_document.c
83
85
  - ext/nokolexbor/nl_node.c
84
86
  - ext/nokolexbor/nl_node_set.c
87
+ - ext/nokolexbor/nl_text.c
85
88
  - ext/nokolexbor/nl_xpath_context.c
86
89
  - ext/nokolexbor/nokolexbor.c
87
90
  - ext/nokolexbor/nokolexbor.h
@@ -555,5 +558,5 @@ requirements: []
555
558
  rubygems_version: 3.0.3.1
556
559
  signing_key:
557
560
  specification_version: 4
558
- summary: High performance HTML5 parser, with support for both CSS selectors and XPath.
561
+ summary: High-performance HTML5 parser, with support for both CSS selectors and XPath.
559
562
  test_files: []