nokogiri 1.12.5 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +41 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +23 -14
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -66
- data/ext/nokogiri/extconf.rb +159 -63
- data/ext/nokogiri/gumbo.c +21 -11
- data/ext/nokogiri/html4_document.c +2 -2
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +3 -9
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.c +38 -51
- data/ext/nokogiri/nokogiri.h +26 -14
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +3 -3
- data/ext/nokogiri/xml_attribute_decl.c +5 -5
- data/ext/nokogiri/xml_cdata.c +3 -3
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +53 -44
- data/ext/nokogiri/xml_document_fragment.c +1 -3
- data/ext/nokogiri/xml_dtd.c +11 -11
- data/ext/nokogiri/xml_element_content.c +3 -3
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +28 -14
- data/ext/nokogiri/xml_entity_decl.c +6 -6
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +80 -14
- data/ext/nokogiri/xml_node.c +982 -396
- data/ext/nokogiri/xml_node_set.c +4 -6
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +133 -32
- data/ext/nokogiri/xml_relax_ng.c +1 -3
- data/ext/nokogiri/xml_sax_parser.c +23 -17
- data/ext/nokogiri/xml_sax_parser_context.c +11 -9
- data/ext/nokogiri/xml_sax_push_parser.c +1 -3
- data/ext/nokogiri/xml_schema.c +4 -6
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +2 -2
- data/ext/nokogiri/xml_xpath_context.c +144 -114
- data/ext/nokogiri/xslt_stylesheet.c +122 -23
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +2 -2
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +2 -2
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +8 -16
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +184 -85
- data/lib/nokogiri/css.rb +44 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +56 -164
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +12 -5
- data/lib/nokogiri/html5/document.rb +126 -32
- data/lib/nokogiri/html5/document_fragment.rb +14 -4
- data/lib/nokogiri/html5/node.rb +12 -7
- data/lib/nokogiri/html5.rb +138 -222
- data/lib/nokogiri/jruby/dependencies.rb +2 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +32 -24
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +54 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +35 -33
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +232 -143
- data/lib/nokogiri/xml/document_fragment.rb +88 -42
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -8
- data/lib/nokogiri/xml/node.rb +708 -383
- data/lib/nokogiri/xml/node_set.rb +134 -59
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +140 -56
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +26 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +38 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +22 -27
- data/lib/xsd/xmlparser/nokogiri.rb +28 -25
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +20 -171
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,17 +1,19 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
5
|
+
# :nodoc: all
|
4
6
|
module PP
|
5
7
|
module CharacterData
|
6
|
-
def pretty_print
|
7
|
-
nice_name = self.class.name.split(
|
8
|
-
pp.group(2, "#(#{nice_name} ",
|
9
|
-
pp.pp
|
8
|
+
def pretty_print(pp)
|
9
|
+
nice_name = self.class.name.split("::").last
|
10
|
+
pp.group(2, "#(#{nice_name} ", ")") do
|
11
|
+
pp.pp(text)
|
10
12
|
end
|
11
13
|
end
|
12
14
|
|
13
|
-
def inspect
|
14
|
-
"#<#{self.class.name}:#{
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
|
15
17
|
end
|
16
18
|
end
|
17
19
|
end
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -1,54 +1,54 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
5
|
+
# :nodoc: all
|
4
6
|
module PP
|
5
7
|
module Node
|
6
|
-
|
7
|
-
attributes = inspect_attributes.reject { |x|
|
8
|
-
begin
|
9
|
-
attribute = send x
|
10
|
-
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
11
|
-
rescue NoMethodError
|
12
|
-
true
|
13
|
-
end
|
14
|
-
}.map { |attribute|
|
15
|
-
"#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
|
16
|
-
}.join ' '
|
17
|
-
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
|
18
|
-
end
|
8
|
+
COLLECTIONS = [:attribute_nodes, :children]
|
19
9
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
10
|
+
def inspect
|
11
|
+
attributes = inspect_attributes.reject do |x|
|
12
|
+
attribute = send(x)
|
13
|
+
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
14
|
+
rescue NoMethodError
|
15
|
+
true
|
16
|
+
end.map do |attribute|
|
17
|
+
"#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
|
18
|
+
end.join(" ")
|
19
|
+
"#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
|
20
|
+
end
|
23
21
|
|
22
|
+
def pretty_print(pp)
|
23
|
+
nice_name = self.class.name.split("::").last
|
24
|
+
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
24
25
|
pp.breakable
|
25
|
-
attrs = inspect_attributes.
|
26
|
+
attrs = inspect_attributes.filter_map do |t|
|
26
27
|
[t, send(t)] if respond_to?(t)
|
27
|
-
|
28
|
+
end.find_all do |x|
|
28
29
|
if x.last
|
29
|
-
if
|
30
|
+
if COLLECTIONS.include?(x.first)
|
30
31
|
!x.last.empty?
|
31
32
|
else
|
32
33
|
true
|
33
34
|
end
|
34
35
|
end
|
35
|
-
|
36
|
+
end
|
36
37
|
|
37
38
|
pp.seplist(attrs) do |v|
|
38
|
-
if
|
39
|
-
pp.group(2, "#{v.first.to_s.sub(/_\w+$/,
|
39
|
+
if COLLECTIONS.include?(v.first)
|
40
|
+
pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
|
40
41
|
pp.breakable
|
41
42
|
pp.seplist(v.last) do |item|
|
42
|
-
pp.pp
|
43
|
+
pp.pp(item)
|
43
44
|
end
|
44
45
|
end
|
45
46
|
else
|
46
|
-
pp.text
|
47
|
-
pp.pp
|
47
|
+
pp.text("#{v.first} = ")
|
48
|
+
pp.pp(v.last)
|
48
49
|
end
|
49
50
|
end
|
50
51
|
pp.breakable
|
51
|
-
|
52
52
|
end
|
53
53
|
end
|
54
54
|
end
|
data/lib/nokogiri/xml/pp.rb
CHANGED
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
###
|
@@ -8,18 +9,18 @@ module Nokogiri
|
|
8
9
|
#
|
9
10
|
# Here is an example of usage:
|
10
11
|
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
12
|
+
# reader = Nokogiri::XML::Reader(<<-eoxml)
|
13
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
14
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
15
|
+
# </x>
|
16
|
+
# eoxml
|
16
17
|
#
|
17
|
-
#
|
18
|
+
# reader.each do |node|
|
18
19
|
#
|
19
|
-
#
|
20
|
-
#
|
20
|
+
# # node is an instance of Nokogiri::XML::Reader
|
21
|
+
# puts node.name
|
21
22
|
#
|
22
|
-
#
|
23
|
+
# end
|
23
24
|
#
|
24
25
|
# Note that Nokogiri::XML::Reader#each can only be called once!! Once
|
25
26
|
# the cursor moves through the entire document, you must parse the
|
@@ -70,37 +71,32 @@ module Nokogiri
|
|
70
71
|
# A list of errors encountered while parsing
|
71
72
|
attr_accessor :errors
|
72
73
|
|
73
|
-
# The encoding for the document
|
74
|
-
attr_reader :encoding
|
75
|
-
|
76
74
|
# The XML source
|
77
75
|
attr_reader :source
|
78
76
|
|
79
|
-
|
77
|
+
alias_method :self_closing?, :empty_element?
|
80
78
|
|
81
|
-
def initialize
|
79
|
+
def initialize(source, url = nil, encoding = nil) # :nodoc:
|
82
80
|
@source = source
|
83
81
|
@errors = []
|
84
82
|
@encoding = encoding
|
85
83
|
end
|
86
84
|
private :initialize
|
87
85
|
|
88
|
-
|
89
|
-
#
|
90
|
-
#
|
86
|
+
# Get the attributes and namespaces of the current node as a Hash.
|
87
|
+
#
|
88
|
+
# This is the union of Reader#attribute_hash and Reader#namespaces
|
89
|
+
#
|
90
|
+
# [Returns]
|
91
|
+
# (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
|
91
92
|
def attributes
|
92
|
-
|
93
|
-
hash[node.name] = node.to_s
|
94
|
-
end
|
95
|
-
ns = namespaces
|
96
|
-
attrs_hash.merge!(ns) if ns
|
97
|
-
attrs_hash
|
93
|
+
attribute_hash.merge(namespaces)
|
98
94
|
end
|
99
95
|
|
100
96
|
###
|
101
97
|
# Move the cursor through the document yielding the cursor to the block
|
102
98
|
def each
|
103
|
-
while cursor =
|
99
|
+
while (cursor = read)
|
104
100
|
yield cursor
|
105
101
|
end
|
106
102
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
###
|
@@ -65,7 +66,7 @@ module Nokogiri
|
|
65
66
|
class Document
|
66
67
|
###
|
67
68
|
# Called when an XML declaration is parsed
|
68
|
-
def xmldecl
|
69
|
+
def xmldecl(version, encoding, standalone)
|
69
70
|
end
|
70
71
|
|
71
72
|
###
|
@@ -83,13 +84,13 @@ module Nokogiri
|
|
83
84
|
# * +name+ is the name of the tag
|
84
85
|
# * +attrs+ are an assoc list of namespaces and attributes, e.g.:
|
85
86
|
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
86
|
-
def start_element
|
87
|
+
def start_element(name, attrs = [])
|
87
88
|
end
|
88
89
|
|
89
90
|
###
|
90
91
|
# Called at the end of an element
|
91
92
|
# +name+ is the tag name
|
92
|
-
def end_element
|
93
|
+
def end_element(name)
|
93
94
|
end
|
94
95
|
|
95
96
|
###
|
@@ -99,16 +100,16 @@ module Nokogiri
|
|
99
100
|
# +prefix+ is the namespace prefix for the element
|
100
101
|
# +uri+ is the associated namespace URI
|
101
102
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
102
|
-
def start_element_namespace
|
103
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
103
104
|
###
|
104
105
|
# Deal with SAX v1 interface
|
105
|
-
name = [prefix, name].compact.join(
|
106
|
-
attributes = ns.map
|
107
|
-
[[
|
108
|
-
|
109
|
-
[[attr.prefix, attr.localname].compact.join(
|
110
|
-
|
111
|
-
start_element
|
106
|
+
name = [prefix, name].compact.join(":")
|
107
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
108
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
109
|
+
end + attrs.map do |attr|
|
110
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
111
|
+
end
|
112
|
+
start_element(name, attributes)
|
112
113
|
end
|
113
114
|
|
114
115
|
###
|
@@ -116,10 +117,10 @@ module Nokogiri
|
|
116
117
|
# +name+ is the element's name
|
117
118
|
# +prefix+ is the namespace prefix associated with the element
|
118
119
|
# +uri+ is the associated namespace URI
|
119
|
-
def end_element_namespace
|
120
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
120
121
|
###
|
121
122
|
# Deal with SAX v1 interface
|
122
|
-
end_element
|
123
|
+
end_element([prefix, name].compact.join(":"))
|
123
124
|
end
|
124
125
|
|
125
126
|
###
|
@@ -127,38 +128,38 @@ module Nokogiri
|
|
127
128
|
# times given one contiguous string of characters.
|
128
129
|
#
|
129
130
|
# +string+ contains the character data
|
130
|
-
def characters
|
131
|
+
def characters(string)
|
131
132
|
end
|
132
133
|
|
133
134
|
###
|
134
135
|
# Called when comments are encountered
|
135
136
|
# +string+ contains the comment data
|
136
|
-
def comment
|
137
|
+
def comment(string)
|
137
138
|
end
|
138
139
|
|
139
140
|
###
|
140
141
|
# Called on document warnings
|
141
142
|
# +string+ contains the warning
|
142
|
-
def warning
|
143
|
+
def warning(string)
|
143
144
|
end
|
144
145
|
|
145
146
|
###
|
146
147
|
# Called on document errors
|
147
148
|
# +string+ contains the error
|
148
|
-
def error
|
149
|
+
def error(string)
|
149
150
|
end
|
150
151
|
|
151
152
|
###
|
152
153
|
# Called when cdata blocks are found
|
153
154
|
# +string+ contains the cdata content
|
154
|
-
def cdata_block
|
155
|
+
def cdata_block(string)
|
155
156
|
end
|
156
157
|
|
157
158
|
###
|
158
159
|
# Called when processing instructions are found
|
159
160
|
# +name+ is the target of the instruction
|
160
161
|
# +content+ is the value of the instruction
|
161
|
-
def processing_instruction
|
162
|
+
def processing_instruction(name, content)
|
162
163
|
end
|
163
164
|
end
|
164
165
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -36,29 +37,29 @@ module Nokogiri
|
|
36
37
|
|
37
38
|
# Encodinds this parser supports
|
38
39
|
ENCODINGS = {
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
40
|
+
"NONE" => 0, # No char encoding detected
|
41
|
+
"UTF-8" => 1, # UTF-8
|
42
|
+
"UTF16LE" => 2, # UTF-16 little endian
|
43
|
+
"UTF16BE" => 3, # UTF-16 big endian
|
44
|
+
"UCS4LE" => 4, # UCS-4 little endian
|
45
|
+
"UCS4BE" => 5, # UCS-4 big endian
|
46
|
+
"EBCDIC" => 6, # EBCDIC uh!
|
47
|
+
"UCS4-2143" => 7, # UCS-4 unusual ordering
|
48
|
+
"UCS4-3412" => 8, # UCS-4 unusual ordering
|
49
|
+
"UCS2" => 9, # UCS-2
|
50
|
+
"ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
|
51
|
+
"ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
|
52
|
+
"ISO-8859-3" => 12, # ISO-8859-3
|
53
|
+
"ISO-8859-4" => 13, # ISO-8859-4
|
54
|
+
"ISO-8859-5" => 14, # ISO-8859-5
|
55
|
+
"ISO-8859-6" => 15, # ISO-8859-6
|
56
|
+
"ISO-8859-7" => 16, # ISO-8859-7
|
57
|
+
"ISO-8859-8" => 17, # ISO-8859-8
|
58
|
+
"ISO-8859-9" => 18, # ISO-8859-9
|
59
|
+
"ISO-2022-JP" => 19, # ISO-2022-JP
|
60
|
+
"SHIFT-JIS" => 20, # Shift_JIS
|
61
|
+
"EUC-JP" => 21, # EUC-JP
|
62
|
+
"ASCII" => 22, # pure ASCII
|
62
63
|
}
|
63
64
|
|
64
65
|
# The Nokogiri::XML::SAX::Document where events will be sent.
|
@@ -68,7 +69,7 @@ module Nokogiri
|
|
68
69
|
attr_accessor :encoding
|
69
70
|
|
70
71
|
# Create a new Parser with +doc+ and +encoding+
|
71
|
-
def initialize
|
72
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
|
72
73
|
@encoding = check_encoding(encoding)
|
73
74
|
@document = doc
|
74
75
|
@warned = false
|
@@ -77,7 +78,7 @@ module Nokogiri
|
|
77
78
|
###
|
78
79
|
# Parse given +thing+ which may be a string containing xml, or an
|
79
80
|
# IO object.
|
80
|
-
def parse
|
81
|
+
def parse(thing, &block)
|
81
82
|
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
82
83
|
parse_io(thing, &block)
|
83
84
|
else
|
@@ -87,34 +88,35 @@ module Nokogiri
|
|
87
88
|
|
88
89
|
###
|
89
90
|
# Parse given +io+
|
90
|
-
def parse_io
|
91
|
-
|
92
|
-
ctx = ParserContext.io(io, ENCODINGS[@encoding])
|
91
|
+
def parse_io(io, encoding = @encoding)
|
92
|
+
ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
|
93
93
|
yield ctx if block_given?
|
94
|
-
ctx.parse_with
|
94
|
+
ctx.parse_with(self)
|
95
95
|
end
|
96
96
|
|
97
97
|
###
|
98
98
|
# Parse a file with +filename+
|
99
|
-
def parse_file
|
99
|
+
def parse_file(filename)
|
100
100
|
raise ArgumentError unless filename
|
101
101
|
raise Errno::ENOENT unless File.exist?(filename)
|
102
102
|
raise Errno::EISDIR if File.directory?(filename)
|
103
|
-
|
103
|
+
|
104
|
+
ctx = ParserContext.file(filename)
|
104
105
|
yield ctx if block_given?
|
105
|
-
ctx.parse_with
|
106
|
+
ctx.parse_with(self)
|
106
107
|
end
|
107
108
|
|
108
|
-
def parse_memory
|
109
|
-
ctx = ParserContext.memory
|
109
|
+
def parse_memory(data)
|
110
|
+
ctx = ParserContext.memory(data)
|
110
111
|
yield ctx if block_given?
|
111
|
-
ctx.parse_with
|
112
|
+
ctx.parse_with(self)
|
112
113
|
end
|
113
114
|
|
114
115
|
private
|
116
|
+
|
115
117
|
def check_encoding(encoding)
|
116
118
|
encoding.upcase.tap do |enc|
|
117
|
-
raise ArgumentError
|
119
|
+
raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
|
118
120
|
end
|
119
121
|
end
|
120
122
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -7,9 +8,12 @@ module Nokogiri
|
|
7
8
|
# by the user. Instead, you should be looking at
|
8
9
|
# Nokogiri::XML::SAX::Parser
|
9
10
|
class ParserContext
|
10
|
-
def self.new
|
11
|
-
[:read, :close].all? { |x| thing.respond_to?(x) }
|
12
|
-
io(thing, Parser::ENCODINGS[encoding])
|
11
|
+
def self.new(thing, encoding = "UTF-8")
|
12
|
+
if [:read, :close].all? { |x| thing.respond_to?(x) }
|
13
|
+
io(thing, Parser::ENCODINGS[encoding])
|
14
|
+
else
|
15
|
+
memory(thing)
|
16
|
+
end
|
13
17
|
end
|
14
18
|
end
|
15
19
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -24,7 +25,6 @@ module Nokogiri
|
|
24
25
|
# parser << "/div>"
|
25
26
|
# parser.finish
|
26
27
|
class PushParser
|
27
|
-
|
28
28
|
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
29
29
|
# operating
|
30
30
|
attr_accessor :document
|
@@ -32,7 +32,7 @@ module Nokogiri
|
|
32
32
|
###
|
33
33
|
# Create a new PushParser with +doc+ as the SAX Document, providing
|
34
34
|
# an optional +file_name+ and +encoding+
|
35
|
-
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding =
|
35
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
36
36
|
@document = doc
|
37
37
|
@encoding = encoding
|
38
38
|
@sax_parser = XML::SAX::Parser.new(doc)
|
@@ -44,16 +44,16 @@ module Nokogiri
|
|
44
44
|
###
|
45
45
|
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
46
46
|
# that can be called will be called immediately.
|
47
|
-
def write
|
47
|
+
def write(chunk, last_chunk = false)
|
48
48
|
native_write(chunk, last_chunk)
|
49
49
|
end
|
50
|
-
|
50
|
+
alias_method :<<, :write
|
51
51
|
|
52
52
|
###
|
53
53
|
# Finish the parsing. This method is only necessary for
|
54
54
|
# Nokogiri::XML::SAX::Document#end_document to be called.
|
55
55
|
def finish
|
56
|
-
write
|
56
|
+
write("", true)
|
57
57
|
end
|
58
58
|
end
|
59
59
|
end
|
data/lib/nokogiri/xml/sax.rb
CHANGED
data/lib/nokogiri/xml/schema.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
class << self
|
@@ -42,7 +43,7 @@ module Nokogiri
|
|
42
43
|
###
|
43
44
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
44
45
|
# object.
|
45
|
-
def self.new
|
46
|
+
def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
46
47
|
from_document(Nokogiri::XML(string_or_io), options)
|
47
48
|
end
|
48
49
|
|
@@ -51,9 +52,9 @@ module Nokogiri
|
|
51
52
|
# Nokogiri::XML::Document object, or a filename. An Array of
|
52
53
|
# Nokogiri::XML::SyntaxError objects found while validating the
|
53
54
|
# +thing+ is returned.
|
54
|
-
def validate
|
55
|
-
if thing.is_a?(Nokogiri::XML::Document)
|
56
|
-
validate_document(thing)
|
55
|
+
def validate(thing)
|
56
|
+
if thing.is_a?(Nokogiri::XML::Document)
|
57
|
+
validate_document(thing)
|
57
58
|
elsif File.file?(thing)
|
58
59
|
validate_file(thing)
|
59
60
|
else
|
@@ -64,8 +65,8 @@ module Nokogiri
|
|
64
65
|
###
|
65
66
|
# Returns true if +thing+ is a valid Nokogiri::XML::Document or
|
66
67
|
# file.
|
67
|
-
def valid?
|
68
|
-
validate(thing).
|
68
|
+
def valid?(thing)
|
69
|
+
validate(thing).empty?
|
69
70
|
end
|
70
71
|
end
|
71
72
|
end
|