nokogiri 1.12.5 → 1.14.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +41 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +23 -14
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -66
  8. data/ext/nokogiri/extconf.rb +159 -63
  9. data/ext/nokogiri/gumbo.c +21 -11
  10. data/ext/nokogiri/html4_document.c +2 -2
  11. data/ext/nokogiri/html4_element_description.c +1 -1
  12. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  13. data/ext/nokogiri/html4_sax_parser_context.c +3 -9
  14. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  15. data/ext/nokogiri/nokogiri.c +38 -51
  16. data/ext/nokogiri/nokogiri.h +26 -14
  17. data/ext/nokogiri/test_global_handlers.c +1 -1
  18. data/ext/nokogiri/xml_attr.c +3 -3
  19. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  20. data/ext/nokogiri/xml_cdata.c +3 -3
  21. data/ext/nokogiri/xml_comment.c +1 -1
  22. data/ext/nokogiri/xml_document.c +53 -44
  23. data/ext/nokogiri/xml_document_fragment.c +1 -3
  24. data/ext/nokogiri/xml_dtd.c +11 -11
  25. data/ext/nokogiri/xml_element_content.c +3 -3
  26. data/ext/nokogiri/xml_element_decl.c +5 -5
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -14
  28. data/ext/nokogiri/xml_entity_decl.c +6 -6
  29. data/ext/nokogiri/xml_entity_reference.c +1 -1
  30. data/ext/nokogiri/xml_namespace.c +80 -14
  31. data/ext/nokogiri/xml_node.c +982 -396
  32. data/ext/nokogiri/xml_node_set.c +4 -6
  33. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  34. data/ext/nokogiri/xml_reader.c +133 -32
  35. data/ext/nokogiri/xml_relax_ng.c +1 -3
  36. data/ext/nokogiri/xml_sax_parser.c +23 -17
  37. data/ext/nokogiri/xml_sax_parser_context.c +11 -9
  38. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  39. data/ext/nokogiri/xml_schema.c +4 -6
  40. data/ext/nokogiri/xml_syntax_error.c +1 -1
  41. data/ext/nokogiri/xml_text.c +2 -2
  42. data/ext/nokogiri/xml_xpath_context.c +144 -114
  43. data/ext/nokogiri/xslt_stylesheet.c +122 -23
  44. data/gumbo-parser/Makefile +10 -0
  45. data/gumbo-parser/src/attribute.h +1 -1
  46. data/gumbo-parser/src/error.c +2 -2
  47. data/gumbo-parser/src/error.h +1 -1
  48. data/gumbo-parser/src/foreign_attrs.c +2 -2
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +8 -16
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/class_resolver.rb +67 -0
  69. data/lib/nokogiri/css/node.rb +9 -8
  70. data/lib/nokogiri/css/parser.rb +360 -341
  71. data/lib/nokogiri/css/parser.y +249 -244
  72. data/lib/nokogiri/css/parser_extras.rb +22 -20
  73. data/lib/nokogiri/css/syntax_error.rb +1 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -3
  75. data/lib/nokogiri/css/tokenizer.rex +3 -2
  76. data/lib/nokogiri/css/xpath_visitor.rb +184 -85
  77. data/lib/nokogiri/css.rb +44 -6
  78. data/lib/nokogiri/decorators/slop.rb +8 -7
  79. data/lib/nokogiri/encoding_handler.rb +57 -0
  80. data/lib/nokogiri/extension.rb +4 -3
  81. data/lib/nokogiri/gumbo.rb +1 -0
  82. data/lib/nokogiri/html.rb +16 -10
  83. data/lib/nokogiri/html4/builder.rb +1 -0
  84. data/lib/nokogiri/html4/document.rb +56 -164
  85. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  86. data/lib/nokogiri/html4/element_description.rb +1 -0
  87. data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
  88. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  89. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  90. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  91. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  92. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  93. data/lib/nokogiri/html4.rb +12 -5
  94. data/lib/nokogiri/html5/document.rb +126 -32
  95. data/lib/nokogiri/html5/document_fragment.rb +14 -4
  96. data/lib/nokogiri/html5/node.rb +12 -7
  97. data/lib/nokogiri/html5.rb +138 -222
  98. data/lib/nokogiri/jruby/dependencies.rb +2 -19
  99. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  100. data/lib/nokogiri/syntax_error.rb +1 -0
  101. data/lib/nokogiri/version/constant.rb +2 -1
  102. data/lib/nokogiri/version/info.rb +32 -24
  103. data/lib/nokogiri/version.rb +1 -0
  104. data/lib/nokogiri/xml/attr.rb +54 -3
  105. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  106. data/lib/nokogiri/xml/builder.rb +35 -33
  107. data/lib/nokogiri/xml/cdata.rb +2 -1
  108. data/lib/nokogiri/xml/character_data.rb +1 -0
  109. data/lib/nokogiri/xml/document.rb +232 -143
  110. data/lib/nokogiri/xml/document_fragment.rb +88 -42
  111. data/lib/nokogiri/xml/dtd.rb +3 -2
  112. data/lib/nokogiri/xml/element_content.rb +1 -0
  113. data/lib/nokogiri/xml/element_decl.rb +2 -1
  114. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  115. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  116. data/lib/nokogiri/xml/namespace.rb +44 -0
  117. data/lib/nokogiri/xml/node/save_options.rb +14 -8
  118. data/lib/nokogiri/xml/node.rb +708 -383
  119. data/lib/nokogiri/xml/node_set.rb +134 -59
  120. data/lib/nokogiri/xml/notation.rb +12 -0
  121. data/lib/nokogiri/xml/parse_options.rb +140 -56
  122. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  123. data/lib/nokogiri/xml/pp/node.rb +26 -26
  124. data/lib/nokogiri/xml/pp.rb +1 -0
  125. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  126. data/lib/nokogiri/xml/reader.rb +20 -24
  127. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  128. data/lib/nokogiri/xml/sax/document.rb +20 -19
  129. data/lib/nokogiri/xml/sax/parser.rb +38 -36
  130. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  131. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  132. data/lib/nokogiri/xml/sax.rb +1 -0
  133. data/lib/nokogiri/xml/schema.rb +7 -6
  134. data/lib/nokogiri/xml/searchable.rb +93 -62
  135. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  136. data/lib/nokogiri/xml/text.rb +1 -0
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  138. data/lib/nokogiri/xml/xpath.rb +12 -0
  139. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  140. data/lib/nokogiri/xml.rb +4 -3
  141. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  142. data/lib/nokogiri/xslt.rb +21 -13
  143. data/lib/nokogiri.rb +22 -27
  144. data/lib/xsd/xmlparser/nokogiri.rb +28 -25
  145. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  146. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  147. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  148. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  149. metadata +20 -171
  150. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  151. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  152. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  153. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  154. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  155. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  156. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,17 +1,19 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
5
+ # :nodoc: all
4
6
  module PP
5
7
  module CharacterData
6
- def pretty_print pp # :nodoc:
7
- nice_name = self.class.name.split('::').last
8
- pp.group(2, "#(#{nice_name} ", ')') do
9
- pp.pp text
8
+ def pretty_print(pp)
9
+ nice_name = self.class.name.split("::").last
10
+ pp.group(2, "#(#{nice_name} ", ")") do
11
+ pp.pp(text)
10
12
  end
11
13
  end
12
14
 
13
- def inspect # :nodoc:
14
- "#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
15
+ def inspect
16
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
15
17
  end
16
18
  end
17
19
  end
@@ -1,54 +1,54 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
5
+ # :nodoc: all
4
6
  module PP
5
7
  module Node
6
- def inspect # :nodoc:
7
- attributes = inspect_attributes.reject { |x|
8
- begin
9
- attribute = send x
10
- !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
11
- rescue NoMethodError
12
- true
13
- end
14
- }.map { |attribute|
15
- "#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
16
- }.join ' '
17
- "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
18
- end
8
+ COLLECTIONS = [:attribute_nodes, :children]
19
9
 
20
- def pretty_print pp # :nodoc:
21
- nice_name = self.class.name.split('::').last
22
- pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
10
+ def inspect
11
+ attributes = inspect_attributes.reject do |x|
12
+ attribute = send(x)
13
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
14
+ rescue NoMethodError
15
+ true
16
+ end.map do |attribute|
17
+ "#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
18
+ end.join(" ")
19
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
20
+ end
23
21
 
22
+ def pretty_print(pp)
23
+ nice_name = self.class.name.split("::").last
24
+ pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
24
25
  pp.breakable
25
- attrs = inspect_attributes.map { |t|
26
+ attrs = inspect_attributes.filter_map do |t|
26
27
  [t, send(t)] if respond_to?(t)
27
- }.compact.find_all { |x|
28
+ end.find_all do |x|
28
29
  if x.last
29
- if [:attribute_nodes, :children].include? x.first
30
+ if COLLECTIONS.include?(x.first)
30
31
  !x.last.empty?
31
32
  else
32
33
  true
33
34
  end
34
35
  end
35
- }
36
+ end
36
37
 
37
38
  pp.seplist(attrs) do |v|
38
- if [:attribute_nodes, :children].include? v.first
39
- pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
39
+ if COLLECTIONS.include?(v.first)
40
+ pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
40
41
  pp.breakable
41
42
  pp.seplist(v.last) do |item|
42
- pp.pp item
43
+ pp.pp(item)
43
44
  end
44
45
  end
45
46
  else
46
- pp.text "#{v.first} = "
47
- pp.pp v.last
47
+ pp.text("#{v.first} = ")
48
+ pp.pp(v.last)
48
49
  end
49
50
  end
50
51
  pp.breakable
51
-
52
52
  end
53
53
  end
54
54
  end
@@ -1,3 +1,4 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_relative "pp/node"
3
4
  require_relative "pp/character_data"
@@ -1,8 +1,10 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class ProcessingInstruction < Node
5
- def initialize document, name, content
6
+ def initialize(document, name, content)
7
+ super(document, name)
6
8
  end
7
9
  end
8
10
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  ###
@@ -8,18 +9,18 @@ module Nokogiri
8
9
  #
9
10
  # Here is an example of usage:
10
11
  #
11
- # reader = Nokogiri::XML::Reader(<<-eoxml)
12
- # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
13
- # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
14
- # </x>
15
- # eoxml
12
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
13
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
14
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
15
+ # </x>
16
+ # eoxml
16
17
  #
17
- # reader.each do |node|
18
+ # reader.each do |node|
18
19
  #
19
- # # node is an instance of Nokogiri::XML::Reader
20
- # puts node.name
20
+ # # node is an instance of Nokogiri::XML::Reader
21
+ # puts node.name
21
22
  #
22
- # end
23
+ # end
23
24
  #
24
25
  # Note that Nokogiri::XML::Reader#each can only be called once!! Once
25
26
  # the cursor moves through the entire document, you must parse the
@@ -70,37 +71,32 @@ module Nokogiri
70
71
  # A list of errors encountered while parsing
71
72
  attr_accessor :errors
72
73
 
73
- # The encoding for the document
74
- attr_reader :encoding
75
-
76
74
  # The XML source
77
75
  attr_reader :source
78
76
 
79
- alias :self_closing? :empty_element?
77
+ alias_method :self_closing?, :empty_element?
80
78
 
81
- def initialize source, url = nil, encoding = nil # :nodoc:
79
+ def initialize(source, url = nil, encoding = nil) # :nodoc:
82
80
  @source = source
83
81
  @errors = []
84
82
  @encoding = encoding
85
83
  end
86
84
  private :initialize
87
85
 
88
- ###
89
- # Get the attributes of the current node as a Hash
90
- # @return [Hash<String, String>] Attribute names and values
86
+ # Get the attributes and namespaces of the current node as a Hash.
87
+ #
88
+ # This is the union of Reader#attribute_hash and Reader#namespaces
89
+ #
90
+ # [Returns]
91
+ # (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
91
92
  def attributes
92
- attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
93
- hash[node.name] = node.to_s
94
- end
95
- ns = namespaces
96
- attrs_hash.merge!(ns) if ns
97
- attrs_hash
93
+ attribute_hash.merge(namespaces)
98
94
  end
99
95
 
100
96
  ###
101
97
  # Move the cursor through the document yielding the cursor to the block
102
98
  def each
103
- while cursor = self.read
99
+ while (cursor = read)
104
100
  yield cursor
105
101
  end
106
102
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class << self
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  ###
@@ -65,7 +66,7 @@ module Nokogiri
65
66
  class Document
66
67
  ###
67
68
  # Called when an XML declaration is parsed
68
- def xmldecl version, encoding, standalone
69
+ def xmldecl(version, encoding, standalone)
69
70
  end
70
71
 
71
72
  ###
@@ -83,13 +84,13 @@ module Nokogiri
83
84
  # * +name+ is the name of the tag
84
85
  # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
85
86
  # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
86
- def start_element name, attrs = []
87
+ def start_element(name, attrs = [])
87
88
  end
88
89
 
89
90
  ###
90
91
  # Called at the end of an element
91
92
  # +name+ is the tag name
92
- def end_element name
93
+ def end_element(name)
93
94
  end
94
95
 
95
96
  ###
@@ -99,16 +100,16 @@ module Nokogiri
99
100
  # +prefix+ is the namespace prefix for the element
100
101
  # +uri+ is the associated namespace URI
101
102
  # +ns+ is a hash of namespace prefix:urls associated with the element
102
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
103
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
103
104
  ###
104
105
  # Deal with SAX v1 interface
105
- name = [prefix, name].compact.join(':')
106
- attributes = ns.map { |ns_prefix,ns_uri|
107
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
108
- } + attrs.map { |attr|
109
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
110
- }
111
- start_element name, attributes
106
+ name = [prefix, name].compact.join(":")
107
+ attributes = ns.map do |ns_prefix, ns_uri|
108
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
109
+ end + attrs.map do |attr|
110
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
111
+ end
112
+ start_element(name, attributes)
112
113
  end
113
114
 
114
115
  ###
@@ -116,10 +117,10 @@ module Nokogiri
116
117
  # +name+ is the element's name
117
118
  # +prefix+ is the namespace prefix associated with the element
118
119
  # +uri+ is the associated namespace URI
119
- def end_element_namespace name, prefix = nil, uri = nil
120
+ def end_element_namespace(name, prefix = nil, uri = nil)
120
121
  ###
121
122
  # Deal with SAX v1 interface
122
- end_element [prefix, name].compact.join(':')
123
+ end_element([prefix, name].compact.join(":"))
123
124
  end
124
125
 
125
126
  ###
@@ -127,38 +128,38 @@ module Nokogiri
127
128
  # times given one contiguous string of characters.
128
129
  #
129
130
  # +string+ contains the character data
130
- def characters string
131
+ def characters(string)
131
132
  end
132
133
 
133
134
  ###
134
135
  # Called when comments are encountered
135
136
  # +string+ contains the comment data
136
- def comment string
137
+ def comment(string)
137
138
  end
138
139
 
139
140
  ###
140
141
  # Called on document warnings
141
142
  # +string+ contains the warning
142
- def warning string
143
+ def warning(string)
143
144
  end
144
145
 
145
146
  ###
146
147
  # Called on document errors
147
148
  # +string+ contains the error
148
- def error string
149
+ def error(string)
149
150
  end
150
151
 
151
152
  ###
152
153
  # Called when cdata blocks are found
153
154
  # +string+ contains the cdata content
154
- def cdata_block string
155
+ def cdata_block(string)
155
156
  end
156
157
 
157
158
  ###
158
159
  # Called when processing instructions are found
159
160
  # +name+ is the target of the instruction
160
161
  # +content+ is the value of the instruction
161
- def processing_instruction name, content
162
+ def processing_instruction(name, content)
162
163
  end
163
164
  end
164
165
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module SAX
@@ -36,29 +37,29 @@ module Nokogiri
36
37
 
37
38
  # Encodinds this parser supports
38
39
  ENCODINGS = {
39
- 'NONE' => 0, # No char encoding detected
40
- 'UTF-8' => 1, # UTF-8
41
- 'UTF16LE' => 2, # UTF-16 little endian
42
- 'UTF16BE' => 3, # UTF-16 big endian
43
- 'UCS4LE' => 4, # UCS-4 little endian
44
- 'UCS4BE' => 5, # UCS-4 big endian
45
- 'EBCDIC' => 6, # EBCDIC uh!
46
- 'UCS4-2143' => 7, # UCS-4 unusual ordering
47
- 'UCS4-3412' => 8, # UCS-4 unusual ordering
48
- 'UCS2' => 9, # UCS-2
49
- 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
50
- 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
51
- 'ISO-8859-3' => 12, # ISO-8859-3
52
- 'ISO-8859-4' => 13, # ISO-8859-4
53
- 'ISO-8859-5' => 14, # ISO-8859-5
54
- 'ISO-8859-6' => 15, # ISO-8859-6
55
- 'ISO-8859-7' => 16, # ISO-8859-7
56
- 'ISO-8859-8' => 17, # ISO-8859-8
57
- 'ISO-8859-9' => 18, # ISO-8859-9
58
- 'ISO-2022-JP' => 19, # ISO-2022-JP
59
- 'SHIFT-JIS' => 20, # Shift_JIS
60
- 'EUC-JP' => 21, # EUC-JP
61
- 'ASCII' => 22, # pure ASCII
40
+ "NONE" => 0, # No char encoding detected
41
+ "UTF-8" => 1, # UTF-8
42
+ "UTF16LE" => 2, # UTF-16 little endian
43
+ "UTF16BE" => 3, # UTF-16 big endian
44
+ "UCS4LE" => 4, # UCS-4 little endian
45
+ "UCS4BE" => 5, # UCS-4 big endian
46
+ "EBCDIC" => 6, # EBCDIC uh!
47
+ "UCS4-2143" => 7, # UCS-4 unusual ordering
48
+ "UCS4-3412" => 8, # UCS-4 unusual ordering
49
+ "UCS2" => 9, # UCS-2
50
+ "ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
51
+ "ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
52
+ "ISO-8859-3" => 12, # ISO-8859-3
53
+ "ISO-8859-4" => 13, # ISO-8859-4
54
+ "ISO-8859-5" => 14, # ISO-8859-5
55
+ "ISO-8859-6" => 15, # ISO-8859-6
56
+ "ISO-8859-7" => 16, # ISO-8859-7
57
+ "ISO-8859-8" => 17, # ISO-8859-8
58
+ "ISO-8859-9" => 18, # ISO-8859-9
59
+ "ISO-2022-JP" => 19, # ISO-2022-JP
60
+ "SHIFT-JIS" => 20, # Shift_JIS
61
+ "EUC-JP" => 21, # EUC-JP
62
+ "ASCII" => 22, # pure ASCII
62
63
  }
63
64
 
64
65
  # The Nokogiri::XML::SAX::Document where events will be sent.
@@ -68,7 +69,7 @@ module Nokogiri
68
69
  attr_accessor :encoding
69
70
 
70
71
  # Create a new Parser with +doc+ and +encoding+
71
- def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
72
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
72
73
  @encoding = check_encoding(encoding)
73
74
  @document = doc
74
75
  @warned = false
@@ -77,7 +78,7 @@ module Nokogiri
77
78
  ###
78
79
  # Parse given +thing+ which may be a string containing xml, or an
79
80
  # IO object.
80
- def parse thing, &block
81
+ def parse(thing, &block)
81
82
  if thing.respond_to?(:read) && thing.respond_to?(:close)
82
83
  parse_io(thing, &block)
83
84
  else
@@ -87,34 +88,35 @@ module Nokogiri
87
88
 
88
89
  ###
89
90
  # Parse given +io+
90
- def parse_io io, encoding = 'ASCII'
91
- @encoding = check_encoding(encoding)
92
- ctx = ParserContext.io(io, ENCODINGS[@encoding])
91
+ def parse_io(io, encoding = @encoding)
92
+ ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
93
93
  yield ctx if block_given?
94
- ctx.parse_with self
94
+ ctx.parse_with(self)
95
95
  end
96
96
 
97
97
  ###
98
98
  # Parse a file with +filename+
99
- def parse_file filename
99
+ def parse_file(filename)
100
100
  raise ArgumentError unless filename
101
101
  raise Errno::ENOENT unless File.exist?(filename)
102
102
  raise Errno::EISDIR if File.directory?(filename)
103
- ctx = ParserContext.file filename
103
+
104
+ ctx = ParserContext.file(filename)
104
105
  yield ctx if block_given?
105
- ctx.parse_with self
106
+ ctx.parse_with(self)
106
107
  end
107
108
 
108
- def parse_memory data
109
- ctx = ParserContext.memory data
109
+ def parse_memory(data)
110
+ ctx = ParserContext.memory(data)
110
111
  yield ctx if block_given?
111
- ctx.parse_with self
112
+ ctx.parse_with(self)
112
113
  end
113
114
 
114
115
  private
116
+
115
117
  def check_encoding(encoding)
116
118
  encoding.upcase.tap do |enc|
117
- raise ArgumentError.new("'#{enc}' is not a valid encoding") unless ENCODINGS[enc]
119
+ raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
118
120
  end
119
121
  end
120
122
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module SAX
@@ -7,9 +8,12 @@ module Nokogiri
7
8
  # by the user. Instead, you should be looking at
8
9
  # Nokogiri::XML::SAX::Parser
9
10
  class ParserContext
10
- def self.new thing, encoding = 'UTF-8'
11
- [:read, :close].all? { |x| thing.respond_to?(x) } ?
12
- io(thing, Parser::ENCODINGS[encoding]) : memory(thing)
11
+ def self.new(thing, encoding = "UTF-8")
12
+ if [:read, :close].all? { |x| thing.respond_to?(x) }
13
+ io(thing, Parser::ENCODINGS[encoding])
14
+ else
15
+ memory(thing)
16
+ end
13
17
  end
14
18
  end
15
19
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module SAX
@@ -24,7 +25,6 @@ module Nokogiri
24
25
  # parser << "/div>"
25
26
  # parser.finish
26
27
  class PushParser
27
-
28
28
  # The Nokogiri::XML::SAX::Document on which the PushParser will be
29
29
  # operating
30
30
  attr_accessor :document
@@ -32,7 +32,7 @@ module Nokogiri
32
32
  ###
33
33
  # Create a new PushParser with +doc+ as the SAX Document, providing
34
34
  # an optional +file_name+ and +encoding+
35
- def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
35
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
36
36
  @document = doc
37
37
  @encoding = encoding
38
38
  @sax_parser = XML::SAX::Parser.new(doc)
@@ -44,16 +44,16 @@ module Nokogiri
44
44
  ###
45
45
  # Write a +chunk+ of XML to the PushParser. Any callback methods
46
46
  # that can be called will be called immediately.
47
- def write chunk, last_chunk = false
47
+ def write(chunk, last_chunk = false)
48
48
  native_write(chunk, last_chunk)
49
49
  end
50
- alias :<< :write
50
+ alias_method :<<, :write
51
51
 
52
52
  ###
53
53
  # Finish the parsing. This method is only necessary for
54
54
  # Nokogiri::XML::SAX::Document#end_document to be called.
55
55
  def finish
56
- write '', true
56
+ write("", true)
57
57
  end
58
58
  end
59
59
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_relative "sax/document"
3
4
  require_relative "sax/parser_context"
4
5
  require_relative "sax/parser"
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class << self
@@ -42,7 +43,7 @@ module Nokogiri
42
43
  ###
43
44
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
44
45
  # object.
45
- def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
46
+ def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
46
47
  from_document(Nokogiri::XML(string_or_io), options)
47
48
  end
48
49
 
@@ -51,9 +52,9 @@ module Nokogiri
51
52
  # Nokogiri::XML::Document object, or a filename. An Array of
52
53
  # Nokogiri::XML::SyntaxError objects found while validating the
53
54
  # +thing+ is returned.
54
- def validate thing
55
- if thing.is_a?(Nokogiri::XML::Document)
56
- validate_document(thing)
55
+ def validate(thing)
56
+ if thing.is_a?(Nokogiri::XML::Document)
57
+ validate_document(thing)
57
58
  elsif File.file?(thing)
58
59
  validate_file(thing)
59
60
  else
@@ -64,8 +65,8 @@ module Nokogiri
64
65
  ###
65
66
  # Returns true if +thing+ is a valid Nokogiri::XML::Document or
66
67
  # file.
67
- def valid? thing
68
- validate(thing).length == 0
68
+ def valid?(thing)
69
+ validate(thing).empty?
69
70
  end
70
71
  end
71
72
  end