nokogiri 1.16.8-java → 1.17.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/java/nokogiri/Html4Document.java +3 -3
- data/ext/java/nokogiri/Html4SaxParserContext.java +47 -175
- data/ext/java/nokogiri/NokogiriService.java +2 -2
- data/ext/java/nokogiri/XmlCdata.java +3 -0
- data/ext/java/nokogiri/XmlDocument.java +7 -14
- data/ext/java/nokogiri/XmlDocumentFragment.java +4 -92
- data/ext/java/nokogiri/XmlDtd.java +2 -2
- data/ext/java/nokogiri/XmlEntityReference.java +16 -12
- data/ext/java/nokogiri/XmlNode.java +26 -47
- data/ext/java/nokogiri/XmlNodeSet.java +10 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +73 -36
- data/ext/java/nokogiri/XmlSchema.java +15 -16
- data/ext/java/nokogiri/XsltStylesheet.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +3 -3
- data/ext/java/nokogiri/internals/NokogiriHandler.java +59 -15
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +1 -1
- data/ext/java/nokogiri/internals/ParserContext.java +51 -21
- data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +8 -19
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +1 -1
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +1 -1
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +10 -11
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +5 -5
- data/ext/java/nokogiri/internals/c14n/{UtfHelpper.java → UtfHelper.java} +2 -2
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +8 -8
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +2 -2
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +130 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +9 -5
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e72cb3083fa1cff82029cbbf2fec76f1a8dd39937c017633b0d944925d72ccaa
|
4
|
+
data.tar.gz: 45760600fd16493478685f69e2d122bd807f869052e1a3d2f640e2ba2d0a1451
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db57663703ce10b22d3aa59ab5aa2a308ac5fda082cea1a4bb01245277b1b2df7b20eb103966b1fc814465276d921e52ff971adca42ea7e71b67fd20219fbeba
|
7
|
+
data.tar.gz: 979db2b053dc00919e9ea60a548af51eb30d2b447a8ae46bdcf5d66acba204ab5019b277614a321f03a0d8b7f6a62171d113b8d392f172c56af305ce343c5bdd
|
data/Gemfile
CHANGED
@@ -5,44 +5,34 @@ source "https://rubygems.org"
|
|
5
5
|
gemspec
|
6
6
|
|
7
7
|
group :development do
|
8
|
-
# ruby 3.4.0-dev removed some gems from the default set
|
9
|
-
#
|
10
|
-
# TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
|
11
|
-
# transitively.
|
12
|
-
gem "mutex_m"
|
13
|
-
|
14
8
|
# bootstrapping
|
15
9
|
gem "bundler", "~> 2.3"
|
16
|
-
gem "rake", "13.1
|
10
|
+
gem "rake", "13.2.1"
|
17
11
|
|
18
12
|
# building extensions
|
19
|
-
gem "rake-compiler", "1.2.
|
20
|
-
gem "rake-compiler-dock", "1.
|
13
|
+
gem "rake-compiler", "1.2.8"
|
14
|
+
gem "rake-compiler-dock", "1.5.2"
|
21
15
|
|
22
16
|
# parser generator
|
23
|
-
gem "rexical", "
|
17
|
+
gem "rexical", "1.0.8"
|
24
18
|
|
25
19
|
# tests
|
26
|
-
gem "minitest", "5.
|
20
|
+
gem "minitest", "5.25.2"
|
27
21
|
gem "minitest-parallel_fork", "2.0.0"
|
28
|
-
gem "ruby_memcheck", "
|
22
|
+
gem "ruby_memcheck", "3.0.0"
|
29
23
|
gem "rubyzip", "~> 2.3.2"
|
30
24
|
gem "simplecov", "= 0.21.2"
|
31
25
|
|
32
26
|
# rubocop
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
gem "rubocop-performance", "1.20.2"
|
38
|
-
gem "rubocop-rake", "= 0.6.0"
|
39
|
-
gem "rubocop-shopify", "2.14.0"
|
40
|
-
end
|
27
|
+
gem "standard", "1.42.1"
|
28
|
+
gem "rubocop-minitest", "0.36.0"
|
29
|
+
gem "rubocop-packaging", "0.5.2"
|
30
|
+
gem "rubocop-rake", "0.6.0"
|
41
31
|
end
|
42
32
|
|
43
33
|
# If Psych doesn't build, you can disable this group locally by running
|
44
34
|
# `bundle config set --local without rdoc`
|
45
35
|
# Then re-run `bundle install`.
|
46
36
|
group :rdoc do
|
47
|
-
gem "rdoc", "6.
|
37
|
+
gem "rdoc", "6.8.1"
|
48
38
|
end
|
data/README.md
CHANGED
@@ -117,6 +117,10 @@ Requirements:
|
|
117
117
|
- Ruby >= 3.0
|
118
118
|
- JRuby >= 9.4.0.0
|
119
119
|
|
120
|
+
If you are compiling the native extension against a system version of libxml2:
|
121
|
+
|
122
|
+
- libxml2 >= 2.9.2 (recommended >= 2.12.0)
|
123
|
+
|
120
124
|
|
121
125
|
### Native Gems: Faster, more reliable installation
|
122
126
|
|
data/dependencies.yml
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
---
|
2
2
|
libxml2:
|
3
|
-
version: "2.
|
4
|
-
sha256: "
|
5
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.
|
3
|
+
version: "2.13.5"
|
4
|
+
sha256: "74fc163217a3964257d3be39af943e08861263c4231f9ef5b496b6f6d4c7b2b6"
|
5
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.13/libxml2-2.13.5.sha256sum
|
6
6
|
|
7
7
|
libxslt:
|
8
|
-
version: "1.1.
|
9
|
-
sha256: "
|
10
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.
|
8
|
+
version: "1.1.42"
|
9
|
+
sha256: "85ca62cac0d41fc77d3f6033da9df6fd73d20ea2fc18b0a3609ffb4110e1baeb"
|
10
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.42.sha256sum
|
11
11
|
|
12
12
|
zlib:
|
13
13
|
version: "1.3.1"
|
@@ -30,7 +30,7 @@ public class Html4Document extends XmlDocument
|
|
30
30
|
|
31
31
|
private static final String DEFAULT_CONTENT_TYPE = "html";
|
32
32
|
private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
|
33
|
-
private static final String
|
33
|
+
private static final String DEFAULT_SYSTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";
|
34
34
|
|
35
35
|
private String parsed_encoding = null;
|
36
36
|
|
@@ -88,7 +88,7 @@ public class Html4Document extends XmlDocument
|
|
88
88
|
getDocument(),
|
89
89
|
context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
|
90
90
|
context.getRuntime().newString(DEFAULT_PUBLIC_ID),
|
91
|
-
context.getRuntime().newString(
|
91
|
+
context.getRuntime().newString(DEFAULT_SYSTEM_ID));
|
92
92
|
setInternalSubset(internalSubset);
|
93
93
|
}
|
94
94
|
|
@@ -132,7 +132,7 @@ public class Html4Document extends XmlDocument
|
|
132
132
|
}
|
133
133
|
|
134
134
|
public String
|
135
|
-
|
135
|
+
getParsedEncoding()
|
136
136
|
{
|
137
137
|
return parsed_encoding;
|
138
138
|
}
|
@@ -2,16 +2,12 @@ package nokogiri;
|
|
2
2
|
|
3
3
|
import java.io.ByteArrayInputStream;
|
4
4
|
import java.io.InputStream;
|
5
|
-
import java.nio.charset.Charset;
|
6
|
-
import java.nio.charset.IllegalCharsetNameException;
|
7
|
-
import java.nio.charset.UnsupportedCharsetException;
|
8
|
-
import java.util.regex.Matcher;
|
9
|
-
import java.util.regex.Pattern;
|
10
5
|
|
11
6
|
import org.apache.xerces.parsers.AbstractSAXParser;
|
12
7
|
import net.sourceforge.htmlunit.cyberneko.parsers.SAXParser;
|
13
8
|
import org.jruby.Ruby;
|
14
9
|
import org.jruby.RubyClass;
|
10
|
+
import org.jruby.RubyEncoding;
|
15
11
|
import org.jruby.RubyFixnum;
|
16
12
|
import org.jruby.RubyString;
|
17
13
|
import org.jruby.anno.JRubyClass;
|
@@ -23,6 +19,8 @@ import org.xml.sax.SAXException;
|
|
23
19
|
import nokogiri.internals.NokogiriHandler;
|
24
20
|
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
25
21
|
|
22
|
+
import static org.jruby.runtime.Helpers.invoke;
|
23
|
+
|
26
24
|
/**
|
27
25
|
* Class for Nokogiri::HTML4::SAX::ParserContext.
|
28
26
|
*
|
@@ -56,10 +54,9 @@ public class Html4SaxParserContext extends XmlSaxParserContext
|
|
56
54
|
SAXParser parser = new SAXParser();
|
57
55
|
|
58
56
|
try {
|
59
|
-
parser.setProperty(
|
60
|
-
|
61
|
-
parser.
|
62
|
-
"http://cyberneko.org/html/properties/names/attrs", "lower");
|
57
|
+
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
|
58
|
+
parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
|
59
|
+
parser.setFeature("http://cyberneko.org/html/features/report-errors", true);
|
63
60
|
|
64
61
|
// NekoHTML should not try to guess the encoding based on the meta
|
65
62
|
// tags or other information in the document. This is already
|
@@ -72,198 +69,73 @@ public class Html4SaxParserContext extends XmlSaxParserContext
|
|
72
69
|
}
|
73
70
|
}
|
74
71
|
|
75
|
-
@JRubyMethod(name = "
|
72
|
+
@JRubyMethod(name = "native_memory", meta = true)
|
76
73
|
public static IRubyObject
|
77
|
-
parse_memory(ThreadContext context,
|
78
|
-
IRubyObject klazz,
|
79
|
-
IRubyObject data,
|
80
|
-
IRubyObject encoding)
|
74
|
+
parse_memory(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
81
75
|
{
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
|
87
|
-
ctx.setInputSource(istream);
|
88
|
-
ctx.getInputSource().setEncoding(javaEncoding);
|
89
|
-
}
|
90
|
-
return ctx;
|
91
|
-
}
|
92
|
-
|
93
|
-
public enum EncodingType {
|
94
|
-
NONE(0, "NONE"),
|
95
|
-
UTF_8(1, "UTF-8"),
|
96
|
-
UTF16LE(2, "UTF16LE"),
|
97
|
-
UTF16BE(3, "UTF16BE"),
|
98
|
-
UCS4LE(4, "UCS4LE"),
|
99
|
-
UCS4BE(5, "UCS4BE"),
|
100
|
-
EBCDIC(6, "EBCDIC"),
|
101
|
-
UCS4_2143(7, "ICS4-2143"),
|
102
|
-
UCS4_3412(8, "UCS4-3412"),
|
103
|
-
UCS2(9, "UCS2"),
|
104
|
-
ISO_8859_1(10, "ISO-8859-1"),
|
105
|
-
ISO_8859_2(11, "ISO-8859-2"),
|
106
|
-
ISO_8859_3(12, "ISO-8859-3"),
|
107
|
-
ISO_8859_4(13, "ISO-8859-4"),
|
108
|
-
ISO_8859_5(14, "ISO-8859-5"),
|
109
|
-
ISO_8859_6(15, "ISO-8859-6"),
|
110
|
-
ISO_8859_7(16, "ISO-8859-7"),
|
111
|
-
ISO_8859_8(17, "ISO-8859-8"),
|
112
|
-
ISO_8859_9(18, "ISO-8859-9"),
|
113
|
-
ISO_2022_JP(19, "ISO-2022-JP"),
|
114
|
-
SHIFT_JIS(20, "SHIFT-JIS"),
|
115
|
-
EUC_JP(21, "EUC-JP"),
|
116
|
-
ASCII(22, "ASCII");
|
117
|
-
|
118
|
-
private final int value;
|
119
|
-
private final String name;
|
120
|
-
|
121
|
-
EncodingType(int value, String name)
|
122
|
-
{
|
123
|
-
this.value = value;
|
124
|
-
this.name = name;
|
125
|
-
}
|
126
|
-
|
127
|
-
public int getValue()
|
128
|
-
{
|
129
|
-
return value;
|
130
|
-
}
|
131
|
-
|
132
|
-
public String toString()
|
133
|
-
{
|
134
|
-
return name;
|
135
|
-
}
|
136
|
-
|
137
|
-
private static transient EncodingType[] values;
|
138
|
-
|
139
|
-
// NOTE: assuming ordinal == value
|
140
|
-
static EncodingType get(final int ordinal)
|
141
|
-
{
|
142
|
-
EncodingType[] values = EncodingType.values;
|
143
|
-
if (values == null) {
|
144
|
-
values = EncodingType.values();
|
145
|
-
EncodingType.values = values;
|
76
|
+
String java_encoding = null;
|
77
|
+
if (encoding != context.runtime.getNil()) {
|
78
|
+
if (!(encoding instanceof RubyEncoding)) {
|
79
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
146
80
|
}
|
147
|
-
|
148
|
-
return values[ordinal];
|
149
|
-
}
|
150
|
-
return null;
|
81
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
151
82
|
}
|
152
83
|
|
153
|
-
|
154
|
-
|
155
|
-
private static String
|
156
|
-
findEncodingName(final int value)
|
157
|
-
{
|
158
|
-
EncodingType type = EncodingType.get(value);
|
159
|
-
if (type == null) { return null; }
|
160
|
-
assert type.value == value;
|
161
|
-
return type.name;
|
162
|
-
}
|
163
|
-
|
164
|
-
private static String
|
165
|
-
findEncodingName(ThreadContext context, IRubyObject encoding)
|
166
|
-
{
|
167
|
-
String rubyEncoding = null;
|
168
|
-
if (encoding instanceof RubyString) {
|
169
|
-
rubyEncoding = rubyStringToString((RubyString) encoding);
|
170
|
-
} else if (encoding instanceof RubyFixnum) {
|
171
|
-
rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
|
172
|
-
}
|
173
|
-
if (rubyEncoding == null) { return null; }
|
174
|
-
try {
|
175
|
-
return Charset.forName(rubyEncoding).displayName();
|
176
|
-
} catch (UnsupportedCharsetException e) {
|
177
|
-
throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
|
178
|
-
} catch (IllegalCharsetNameException e) {
|
179
|
-
throw context.getRuntime().newEncodingError(e.getMessage());
|
180
|
-
}
|
181
|
-
}
|
182
|
-
|
183
|
-
private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+",
|
184
|
-
Pattern.CASE_INSENSITIVE);
|
84
|
+
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
|
85
|
+
ctx.setStringInputSourceNoEnc(context, data, context.runtime.getNil());
|
185
86
|
|
186
|
-
|
187
|
-
|
188
|
-
{
|
189
|
-
int start_pos = 0;
|
190
|
-
int end_pos = 0;
|
191
|
-
if (containsIgnoreCase(input, "charset")) {
|
192
|
-
Matcher m = CHARSET_PATTERN.matcher(input);
|
193
|
-
while (m.find()) {
|
194
|
-
start_pos = m.start();
|
195
|
-
end_pos = m.end();
|
196
|
-
}
|
87
|
+
if (java_encoding != null) {
|
88
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
197
89
|
}
|
198
|
-
if (start_pos != end_pos) {
|
199
|
-
return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
|
200
|
-
}
|
201
|
-
return input;
|
202
|
-
}
|
203
90
|
|
204
|
-
|
205
|
-
containsIgnoreCase(final String str, final String sub)
|
206
|
-
{
|
207
|
-
final int len = sub.length();
|
208
|
-
final int max = str.length() - len;
|
209
|
-
|
210
|
-
if (len == 0) { return true; }
|
211
|
-
final char c0Lower = Character.toLowerCase(sub.charAt(0));
|
212
|
-
final char c0Upper = Character.toUpperCase(sub.charAt(0));
|
213
|
-
|
214
|
-
for (int i = 0; i <= max; i++) {
|
215
|
-
final char ch = str.charAt(i);
|
216
|
-
if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
|
217
|
-
continue; // first char doesn't match
|
218
|
-
}
|
219
|
-
|
220
|
-
if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
|
221
|
-
return true;
|
222
|
-
}
|
223
|
-
}
|
224
|
-
return false;
|
91
|
+
return ctx;
|
225
92
|
}
|
226
93
|
|
227
|
-
@JRubyMethod(name = "
|
94
|
+
@JRubyMethod(name = "native_file", meta = true)
|
228
95
|
public static IRubyObject
|
229
|
-
parse_file(ThreadContext context,
|
230
|
-
IRubyObject klass,
|
231
|
-
IRubyObject data,
|
232
|
-
IRubyObject encoding)
|
96
|
+
parse_file(ThreadContext context, IRubyObject klass, IRubyObject data, IRubyObject encoding)
|
233
97
|
{
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
98
|
+
String java_encoding = null;
|
99
|
+
if (encoding != context.runtime.getNil()) {
|
100
|
+
if (!(encoding instanceof RubyEncoding)) {
|
101
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
102
|
+
}
|
103
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
239
104
|
}
|
240
105
|
|
241
106
|
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
|
242
107
|
ctx.setInputSourceFile(context, data);
|
243
|
-
|
244
|
-
if (
|
245
|
-
ctx.getInputSource().setEncoding(
|
108
|
+
|
109
|
+
if (java_encoding != null) {
|
110
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
246
111
|
}
|
112
|
+
|
247
113
|
return ctx;
|
248
114
|
}
|
249
115
|
|
250
|
-
@JRubyMethod(name = "
|
116
|
+
@JRubyMethod(name = "native_io", meta = true)
|
251
117
|
public static IRubyObject
|
252
|
-
parse_io(ThreadContext context,
|
253
|
-
IRubyObject klass,
|
254
|
-
IRubyObject data,
|
255
|
-
IRubyObject encoding)
|
118
|
+
parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding)
|
256
119
|
{
|
257
|
-
if (!(
|
258
|
-
throw context.
|
120
|
+
if (!invoke(context, data, "respond_to?", context.runtime.newSymbol("read")).isTrue()) {
|
121
|
+
throw context.runtime.newTypeError("argument expected to respond to :read");
|
259
122
|
}
|
260
123
|
|
261
|
-
|
124
|
+
String java_encoding = null;
|
125
|
+
if (encoding != context.runtime.getNil()) {
|
126
|
+
if (!(encoding instanceof RubyEncoding)) {
|
127
|
+
throw context.runtime.newTypeError("encoding must be kind_of Encoding");
|
128
|
+
}
|
129
|
+
java_encoding = ((RubyEncoding)encoding).toString();
|
130
|
+
}
|
131
|
+
|
132
|
+
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
|
262
133
|
ctx.setIOInputSource(context, data, context.nil);
|
263
|
-
|
264
|
-
if (
|
265
|
-
ctx.getInputSource().setEncoding(
|
134
|
+
|
135
|
+
if (java_encoding != null) {
|
136
|
+
ctx.getInputSource().setEncoding(java_encoding);
|
266
137
|
}
|
138
|
+
|
267
139
|
return ctx;
|
268
140
|
}
|
269
141
|
|
@@ -37,7 +37,7 @@ public class NokogiriService implements BasicLibraryService
|
|
37
37
|
}
|
38
38
|
|
39
39
|
private static Map<String, RubyClass>
|
40
|
-
|
40
|
+
populateNokogiriClassCache(Ruby ruby)
|
41
41
|
{
|
42
42
|
Map<String, RubyClass> nokogiriClassCache = new HashMap<String, RubyClass>();
|
43
43
|
nokogiriClassCache.put("Nokogiri::HTML4::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML4::Document"));
|
@@ -91,7 +91,7 @@ public class NokogiriService implements BasicLibraryService
|
|
91
91
|
createDocuments(ruby, xmlModule, htmlModule, xmlNode);
|
92
92
|
createSaxModule(ruby, xmlSaxModule, htmlSaxModule);
|
93
93
|
createXsltModule(ruby, xsltModule);
|
94
|
-
nokogiri.setInternalVariable("cache",
|
94
|
+
nokogiri.setInternalVariable("cache", populateNokogiriClassCache(ruby));
|
95
95
|
}
|
96
96
|
|
97
97
|
private void
|
@@ -46,6 +46,9 @@ public class XmlCdata extends XmlText
|
|
46
46
|
IRubyObject rbDocument = args[0];
|
47
47
|
content = args[1];
|
48
48
|
|
49
|
+
if (content.isNil()) {
|
50
|
+
throw context.runtime.newTypeError("expected second parameter to be a String, received NilClass");
|
51
|
+
}
|
49
52
|
if (!(rbDocument instanceof XmlNode)) {
|
50
53
|
String msg = "expected first parameter to be a Nokogiri::XML::Document, received " + rbDocument.getMetaClass();
|
51
54
|
throw context.runtime.newTypeError(msg);
|
@@ -414,20 +414,13 @@ public class XmlDocument extends XmlNode
|
|
414
414
|
return getCachedNodeOrCreate(context.runtime, rootNode);
|
415
415
|
}
|
416
416
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
// dup = doc.dup
|
425
|
-
// dup.equal?(dup.children[0].document)
|
426
|
-
//
|
427
|
-
// Since `dup.children[0].document' will end up creating a new
|
428
|
-
// XmlDocument. See #1060.
|
429
|
-
doc.resetCache();
|
430
|
-
return doc;
|
417
|
+
@JRubyMethod(visibility = Visibility.PROTECTED)
|
418
|
+
public IRubyObject
|
419
|
+
initialize_copy_with_args(ThreadContext context, IRubyObject other, IRubyObject level)
|
420
|
+
{
|
421
|
+
super.initialize_copy_with_args(context, other, level, null);
|
422
|
+
resetCache();
|
423
|
+
return this;
|
431
424
|
}
|
432
425
|
|
433
426
|
@JRubyMethod(name = "root=")
|
@@ -48,112 +48,24 @@ public class XmlDocumentFragment extends XmlNode
|
|
48
48
|
super(ruby, klazz);
|
49
49
|
}
|
50
50
|
|
51
|
-
@JRubyMethod(name = "
|
51
|
+
@JRubyMethod(name = "native_new", meta = true)
|
52
52
|
public static IRubyObject
|
53
|
-
rbNew(ThreadContext context, IRubyObject cls, IRubyObject
|
53
|
+
rbNew(ThreadContext context, IRubyObject cls, IRubyObject value)
|
54
54
|
{
|
55
|
-
if (
|
56
|
-
throw context.runtime.newArgumentError(args.length, 1);
|
57
|
-
}
|
58
|
-
|
59
|
-
if (!(args[0] instanceof XmlDocument)) {
|
55
|
+
if (!(value instanceof XmlDocument)) {
|
60
56
|
throw context.runtime.newArgumentError("first parameter must be a Nokogiri::XML::Document instance");
|
61
57
|
}
|
62
58
|
|
63
|
-
XmlDocument doc = (XmlDocument)
|
64
|
-
|
65
|
-
// make wellformed fragment, ignore invalid namespace, or add appropriate namespace to parse
|
66
|
-
if (args.length > 1 && args[1] instanceof RubyString) {
|
67
|
-
final RubyString arg1 = (RubyString) args[1];
|
68
|
-
if (XmlDocumentFragment.isTag(arg1)) {
|
69
|
-
args[1] = RubyString.newString(context.runtime, addNamespaceDeclIfNeeded(doc, rubyStringToString(arg1)));
|
70
|
-
}
|
71
|
-
}
|
59
|
+
XmlDocument doc = (XmlDocument) value;
|
72
60
|
|
73
61
|
XmlDocumentFragment fragment = (XmlDocumentFragment) NokogiriService.XML_DOCUMENT_FRAGMENT_ALLOCATOR.allocate(
|
74
62
|
context.runtime, (RubyClass)cls);
|
75
63
|
fragment.setDocument(context, doc);
|
76
64
|
fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment());
|
77
65
|
|
78
|
-
Helpers.invoke(context, fragment, "initialize", args, block);
|
79
66
|
return fragment;
|
80
67
|
}
|
81
68
|
|
82
|
-
private static final ByteList TAG_BEG = ByteList.create("<");
|
83
|
-
private static final ByteList TAG_END = ByteList.create(">");
|
84
|
-
|
85
|
-
private static boolean
|
86
|
-
isTag(final RubyString str)
|
87
|
-
{
|
88
|
-
return str.getByteList().startsWith(TAG_BEG) && str.getByteList().endsWith(TAG_END);
|
89
|
-
}
|
90
|
-
|
91
|
-
private static boolean
|
92
|
-
isNamespaceDefined(String qName, NamedNodeMap nodeMap)
|
93
|
-
{
|
94
|
-
if (isNamespace(qName.intern())) { return true; }
|
95
|
-
for (int i = 0; i < nodeMap.getLength(); i++) {
|
96
|
-
Attr attr = (Attr)nodeMap.item(i);
|
97
|
-
if (isNamespace(attr.getNodeName())) {
|
98
|
-
String localPart = getLocalNameForNamespace(attr.getNodeName(), null);
|
99
|
-
if (getPrefix(qName).equals(localPart)) {
|
100
|
-
return true;
|
101
|
-
}
|
102
|
-
}
|
103
|
-
}
|
104
|
-
return false;
|
105
|
-
}
|
106
|
-
|
107
|
-
private static final Pattern QNAME_RE = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
|
108
|
-
private static final Pattern START_TAG_RE = Pattern.compile("<[^</>]+>");
|
109
|
-
|
110
|
-
private static String
|
111
|
-
addNamespaceDeclIfNeeded(XmlDocument doc, String tags)
|
112
|
-
{
|
113
|
-
if (doc.getDocument() == null) { return tags; }
|
114
|
-
if (doc.getDocument().getDocumentElement() == null) { return tags; }
|
115
|
-
Matcher matcher = START_TAG_RE.matcher(tags);
|
116
|
-
Map<CharSequence, CharSequence> rewriteTable = null;
|
117
|
-
while (matcher.find()) {
|
118
|
-
String start_tag = matcher.group();
|
119
|
-
Matcher matcher2 = QNAME_RE.matcher(start_tag);
|
120
|
-
while (matcher2.find()) {
|
121
|
-
String qName = matcher2.group();
|
122
|
-
NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
|
123
|
-
if (isNamespaceDefined(qName, nodeMap)) {
|
124
|
-
CharSequence namespaceDecl = getNamespaceDecl(getPrefix(qName), nodeMap);
|
125
|
-
if (namespaceDecl != null) {
|
126
|
-
if (rewriteTable == null) { rewriteTable = new HashMap<CharSequence, CharSequence>(8, 1); }
|
127
|
-
StringBuilder str = new StringBuilder(qName.length() + namespaceDecl.length() + 3);
|
128
|
-
String key = str.append('<').append(qName).append('>').toString();
|
129
|
-
str.setCharAt(key.length() - 1, ' '); // (last) '>' -> ' '
|
130
|
-
rewriteTable.put(key, str.append(namespaceDecl).append('>'));
|
131
|
-
}
|
132
|
-
}
|
133
|
-
}
|
134
|
-
}
|
135
|
-
if (rewriteTable != null) {
|
136
|
-
for (Map.Entry<CharSequence, CharSequence> e : rewriteTable.entrySet()) {
|
137
|
-
tags = tags.replace(e.getKey(), e.getValue());
|
138
|
-
}
|
139
|
-
}
|
140
|
-
|
141
|
-
return tags;
|
142
|
-
}
|
143
|
-
|
144
|
-
private static CharSequence
|
145
|
-
getNamespaceDecl(final String prefix, NamedNodeMap nodeMap)
|
146
|
-
{
|
147
|
-
for (int i = 0; i < nodeMap.getLength(); i++) {
|
148
|
-
Attr attr = (Attr) nodeMap.item(i);
|
149
|
-
if (prefix.equals(attr.getLocalName())) {
|
150
|
-
return new StringBuilder().
|
151
|
-
append(attr.getName()).append('=').append('"').append(attr.getValue()).append('"');
|
152
|
-
}
|
153
|
-
}
|
154
|
-
return null;
|
155
|
-
}
|
156
|
-
|
157
69
|
@Override
|
158
70
|
public void
|
159
71
|
relink_namespace(ThreadContext context)
|
@@ -141,7 +141,7 @@ public class XmlDtd extends XmlNode
|
|
141
141
|
*
|
142
142
|
* NekoDTD parser returns a new document node containing elements
|
143
143
|
* representing the dtd declarations. The plan is to get the root
|
144
|
-
* element and adopt it into the correct document,
|
144
|
+
* element and adopt it into the correct document, stripping the
|
145
145
|
* Document provided by NekoDTD.
|
146
146
|
*
|
147
147
|
*/
|
@@ -454,7 +454,7 @@ public class XmlDtd extends XmlNode
|
|
454
454
|
* This recursive function will not descend into an
|
455
455
|
* 'externalSubset' node, thus for an internal subset it only
|
456
456
|
* extracts nodes in the internal subset, and for an external
|
457
|
-
* subset it extracts everything and
|
457
|
+
* subset it extracts everything and assumes <code>node</code>
|
458
458
|
* and all children are part of the external subset.
|
459
459
|
*/
|
460
460
|
protected IRubyObject[]
|
@@ -61,19 +61,23 @@ public class XmlEntityReference extends XmlNode
|
|
61
61
|
public void
|
62
62
|
accept(ThreadContext context, SaveContextVisitor visitor)
|
63
63
|
{
|
64
|
+
//
|
65
|
+
// Note that when noEnt is set, we call setFeature(FEATURE_NOT_EXPAND_ENTITY, false) in
|
66
|
+
// XmlDomParserContext.
|
67
|
+
//
|
68
|
+
// See https://xerces.apache.org/xerces-j/features.html section on `create-entity-ref-nodes`
|
69
|
+
//
|
70
|
+
// When set to true (the default), then EntityReference nodes are present in the DOM tree, and
|
71
|
+
// its children represent the replacement text. When set to false, then the EntityReference is
|
72
|
+
// not present in the tree, and instead the replacement text nodes are present.
|
73
|
+
//
|
74
|
+
// So: if we are here, then noEnt must be true, and we should just serialize the EntityReference
|
75
|
+
// and not worry about the replacement text. When noEnt is false, we would never this and
|
76
|
+
// instead would be serializing the replacement text.
|
77
|
+
//
|
78
|
+
// https://github.com/sparklemotion/nokogiri/issues/3270
|
79
|
+
//
|
64
80
|
visitor.enter(node);
|
65
|
-
Node child = node.getFirstChild();
|
66
|
-
while (child != null) {
|
67
|
-
IRubyObject nokoNode = getCachedNodeOrCreate(context.getRuntime(), child);
|
68
|
-
if (nokoNode instanceof XmlNode) {
|
69
|
-
XmlNode cur = (XmlNode) nokoNode;
|
70
|
-
cur.accept(context, visitor);
|
71
|
-
} else if (nokoNode instanceof XmlNamespace) {
|
72
|
-
XmlNamespace cur = (XmlNamespace) nokoNode;
|
73
|
-
cur.accept(context, visitor);
|
74
|
-
}
|
75
|
-
child = child.getNextSibling();
|
76
|
-
}
|
77
81
|
visitor.leave(node);
|
78
82
|
}
|
79
83
|
}
|