nokogiri 1.11.7-java → 1.12.0.rc1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +6 -5
- data/ext/java/nokogiri/{HtmlDocument.java → Html4Document.java} +8 -22
- data/ext/java/nokogiri/{HtmlElementDescription.java → Html4ElementDescription.java} +6 -6
- data/ext/java/nokogiri/{HtmlEntityLookup.java → Html4EntityLookup.java} +5 -5
- data/ext/java/nokogiri/{HtmlSaxParserContext.java → Html4SaxParserContext.java} +13 -13
- data/ext/java/nokogiri/{HtmlSaxPushParser.java → Html4SaxPushParser.java} +14 -14
- data/ext/java/nokogiri/NokogiriService.java +20 -20
- data/ext/java/nokogiri/XmlAttr.java +2 -2
- data/ext/java/nokogiri/XmlDocument.java +14 -14
- data/ext/java/nokogiri/XmlElementContent.java +5 -5
- data/ext/java/nokogiri/XmlNode.java +74 -74
- data/ext/java/nokogiri/XmlSaxPushParser.java +2 -2
- data/ext/java/nokogiri/XmlSyntaxError.java +1 -1
- data/ext/java/nokogiri/XmlXpathContext.java +9 -9
- data/ext/java/nokogiri/XsltStylesheet.java +8 -8
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +4 -4
- data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +57 -57
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +24 -24
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +105 -105
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +30 -30
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +87 -87
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +95 -95
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +181 -103
- data/ext/nokogiri/gumbo.c +611 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +20 -18
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +5 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +51 -38
- data/ext/nokogiri/xml_document.c +13 -13
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +2 -0
- data/ext/nokogiri/xml_node.c +102 -102
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri.rb +31 -29
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +2 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/node.rb +6 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- metadata +42 -42
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
@@ -16,12 +16,12 @@ import org.jruby.runtime.ThreadContext;
|
|
16
16
|
import org.jruby.runtime.builtin.IRubyObject;
|
17
17
|
|
18
18
|
/**
|
19
|
-
* Class for Nokogiri::
|
19
|
+
* Class for Nokogiri::HTML4::ElementDescription.
|
20
20
|
*
|
21
21
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
22
22
|
*/
|
23
|
-
@JRubyClass(name = "Nokogiri::
|
24
|
-
public class
|
23
|
+
@JRubyClass(name = "Nokogiri::HTML4::ElementDescription")
|
24
|
+
public class Html4ElementDescription extends RubyObject
|
25
25
|
{
|
26
26
|
|
27
27
|
/**
|
@@ -38,7 +38,7 @@ public class HtmlElementDescription extends RubyObject
|
|
38
38
|
protected HTMLElements.Element element;
|
39
39
|
|
40
40
|
public
|
41
|
-
|
41
|
+
Html4ElementDescription(Ruby runtime, RubyClass rubyClass)
|
42
42
|
{
|
43
43
|
super(runtime, rubyClass);
|
44
44
|
}
|
@@ -89,8 +89,8 @@ public class HtmlElementDescription extends RubyObject
|
|
89
89
|
return context.nil;
|
90
90
|
}
|
91
91
|
|
92
|
-
|
93
|
-
new
|
92
|
+
Html4ElementDescription desc =
|
93
|
+
new Html4ElementDescription(context.getRuntime(), (RubyClass)klazz);
|
94
94
|
desc.element = elem;
|
95
95
|
return desc;
|
96
96
|
}
|
@@ -12,16 +12,16 @@ import org.jruby.runtime.ThreadContext;
|
|
12
12
|
import org.jruby.runtime.builtin.IRubyObject;
|
13
13
|
|
14
14
|
/**
|
15
|
-
* Class for Nokogiri::
|
15
|
+
* Class for Nokogiri::HTML4::EntityLookup.
|
16
16
|
*
|
17
17
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
18
18
|
*/
|
19
|
-
@JRubyClass(name = "Nokogiri::
|
20
|
-
public class
|
19
|
+
@JRubyClass(name = "Nokogiri::HTML4::EntityLookup")
|
20
|
+
public class Html4EntityLookup extends RubyObject
|
21
21
|
{
|
22
22
|
|
23
23
|
public
|
24
|
-
|
24
|
+
Html4EntityLookup(Ruby runtime, RubyClass rubyClass)
|
25
25
|
{
|
26
26
|
super(runtime, rubyClass);
|
27
27
|
}
|
@@ -41,7 +41,7 @@ public class HtmlEntityLookup extends RubyObject
|
|
41
41
|
if (val == -1) { return ruby.getNil(); }
|
42
42
|
|
43
43
|
IRubyObject edClass =
|
44
|
-
ruby.getClassFromPath("Nokogiri::
|
44
|
+
ruby.getClassFromPath("Nokogiri::HTML4::EntityDescription");
|
45
45
|
IRubyObject edObj = invoke(context, edClass, "new",
|
46
46
|
ruby.newFixnum(val), ruby.newString(name),
|
47
47
|
ruby.newString(name + " entity"));
|
@@ -24,27 +24,27 @@ import nokogiri.internals.NokogiriHandler;
|
|
24
24
|
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
25
25
|
|
26
26
|
/**
|
27
|
-
* Class for Nokogiri::
|
27
|
+
* Class for Nokogiri::HTML4::SAX::ParserContext.
|
28
28
|
*
|
29
29
|
* @author serabe
|
30
30
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
31
31
|
* @author Yoko Harada <yokolet@gmail.com>
|
32
32
|
*/
|
33
33
|
|
34
|
-
@JRubyClass(name = "Nokogiri::
|
35
|
-
public class
|
34
|
+
@JRubyClass(name = "Nokogiri::HTML4::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext")
|
35
|
+
public class Html4SaxParserContext extends XmlSaxParserContext
|
36
36
|
{
|
37
37
|
|
38
|
-
static
|
38
|
+
static Html4SaxParserContext
|
39
39
|
newInstance(final Ruby runtime, final RubyClass klazz)
|
40
40
|
{
|
41
|
-
|
41
|
+
Html4SaxParserContext instance = new Html4SaxParserContext(runtime, klazz);
|
42
42
|
instance.initialize(runtime);
|
43
43
|
return instance;
|
44
44
|
}
|
45
45
|
|
46
46
|
public
|
47
|
-
|
47
|
+
Html4SaxParserContext(Ruby ruby, RubyClass rubyClass)
|
48
48
|
{
|
49
49
|
super(ruby, rubyClass);
|
50
50
|
}
|
@@ -68,7 +68,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext
|
|
68
68
|
return parser;
|
69
69
|
} catch (SAXException ex) {
|
70
70
|
throw new SAXException(
|
71
|
-
"Problem while creating
|
71
|
+
"Problem while creating HTML4 SAX Parser: " + ex.toString());
|
72
72
|
}
|
73
73
|
}
|
74
74
|
|
@@ -79,7 +79,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext
|
|
79
79
|
IRubyObject data,
|
80
80
|
IRubyObject encoding)
|
81
81
|
{
|
82
|
-
|
82
|
+
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
|
83
83
|
String javaEncoding = findEncodingName(context, encoding);
|
84
84
|
if (javaEncoding != null) {
|
85
85
|
CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
|
@@ -231,7 +231,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext
|
|
231
231
|
IRubyObject data,
|
232
232
|
IRubyObject encoding)
|
233
233
|
{
|
234
|
-
|
234
|
+
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
|
235
235
|
ctx.setInputSourceFile(context, data);
|
236
236
|
String javaEncoding = findEncodingName(context, encoding);
|
237
237
|
if (javaEncoding != null) {
|
@@ -247,7 +247,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext
|
|
247
247
|
IRubyObject data,
|
248
248
|
IRubyObject encoding)
|
249
249
|
{
|
250
|
-
|
250
|
+
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
|
251
251
|
ctx.setIOInputSource(context, data, context.nil);
|
252
252
|
String javaEncoding = findEncodingName(context, encoding);
|
253
253
|
if (javaEncoding != null) {
|
@@ -258,12 +258,12 @@ public class HtmlSaxParserContext extends XmlSaxParserContext
|
|
258
258
|
|
259
259
|
/**
|
260
260
|
* Create a new parser context that will read from a raw input stream.
|
261
|
-
* Meant to be run in a separate thread by
|
261
|
+
* Meant to be run in a separate thread by Html4SaxPushParser.
|
262
262
|
*/
|
263
|
-
static
|
263
|
+
static Html4SaxParserContext
|
264
264
|
parse_stream(final Ruby runtime, RubyClass klass, InputStream stream)
|
265
265
|
{
|
266
|
-
|
266
|
+
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(runtime, klass);
|
267
267
|
ctx.setInputSource(stream);
|
268
268
|
return ctx;
|
269
269
|
}
|
@@ -27,13 +27,13 @@ import org.jruby.runtime.ThreadContext;
|
|
27
27
|
import org.jruby.runtime.builtin.IRubyObject;
|
28
28
|
|
29
29
|
/**
|
30
|
-
* Class for Nokogiri::
|
30
|
+
* Class for Nokogiri::HTML4::SAX::PushParser
|
31
31
|
*
|
32
32
|
* @author
|
33
33
|
* @author Piotr Szmielew <p.szmielew@ava.waw.pl> - based on Nokogiri::XML::SAX::PushParser
|
34
34
|
*/
|
35
|
-
@JRubyClass(name = "Nokogiri::
|
36
|
-
public class
|
35
|
+
@JRubyClass(name = "Nokogiri::HTML4::SAX::PushParser")
|
36
|
+
public class Html4SaxPushParser extends RubyObject
|
37
37
|
{
|
38
38
|
ParserContext.Options options;
|
39
39
|
IRubyObject saxParser;
|
@@ -41,11 +41,11 @@ public class HtmlSaxPushParser extends RubyObject
|
|
41
41
|
NokogiriBlockingQueueInputStream stream;
|
42
42
|
|
43
43
|
private ParserTask parserTask = null;
|
44
|
-
private FutureTask<
|
44
|
+
private FutureTask<Html4SaxParserContext> futureTask = null;
|
45
45
|
private ExecutorService executor = null;
|
46
46
|
|
47
47
|
public
|
48
|
-
|
48
|
+
Html4SaxPushParser(Ruby ruby, RubyClass rubyClass)
|
49
49
|
{
|
50
50
|
super(ruby, rubyClass);
|
51
51
|
}
|
@@ -111,7 +111,7 @@ public class HtmlSaxPushParser extends RubyObject
|
|
111
111
|
final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk);
|
112
112
|
if (data == null) {
|
113
113
|
terminateTask(context.runtime);
|
114
|
-
throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::
|
114
|
+
throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML4::SyntaxError
|
115
115
|
}
|
116
116
|
|
117
117
|
int errorCount0 = parserTask.getErrorCount();
|
@@ -149,12 +149,12 @@ public class HtmlSaxPushParser extends RubyObject
|
|
149
149
|
|
150
150
|
assert saxParser != null : "saxParser null";
|
151
151
|
parserTask = new ParserTask(context, saxParser, stream);
|
152
|
-
futureTask = new FutureTask<
|
152
|
+
futureTask = new FutureTask<Html4SaxParserContext>((Callable) parserTask);
|
153
153
|
executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
|
154
154
|
@Override
|
155
155
|
public Thread newThread(Runnable r) {
|
156
156
|
Thread t = new Thread(r);
|
157
|
-
t.setName("
|
157
|
+
t.setName("Html4SaxPushParser");
|
158
158
|
t.setDaemon(true);
|
159
159
|
return t;
|
160
160
|
}
|
@@ -187,14 +187,14 @@ public class HtmlSaxPushParser extends RubyObject
|
|
187
187
|
futureTask = null;
|
188
188
|
}
|
189
189
|
|
190
|
-
private static
|
190
|
+
private static Html4SaxParserContext
|
191
191
|
parse(final Ruby runtime, final InputStream stream)
|
192
192
|
{
|
193
|
-
RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::
|
194
|
-
return
|
193
|
+
RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML4::SAX::ParserContext");
|
194
|
+
return Html4SaxParserContext.parse_stream(runtime, klazz, stream);
|
195
195
|
}
|
196
196
|
|
197
|
-
static class ParserTask extends XmlSaxPushParser.ParserTask /* <
|
197
|
+
static class ParserTask extends XmlSaxPushParser.ParserTask /* <Html4SaxPushParser> */
|
198
198
|
{
|
199
199
|
|
200
200
|
private
|
@@ -204,10 +204,10 @@ public class HtmlSaxPushParser extends RubyObject
|
|
204
204
|
}
|
205
205
|
|
206
206
|
@Override
|
207
|
-
public
|
207
|
+
public Html4SaxParserContext
|
208
208
|
call() throws Exception
|
209
209
|
{
|
210
|
-
return (
|
210
|
+
return (Html4SaxParserContext) super.call();
|
211
211
|
}
|
212
212
|
|
213
213
|
}
|
@@ -40,9 +40,9 @@ public class NokogiriService implements BasicLibraryService
|
|
40
40
|
{
|
41
41
|
Map<String, RubyClass> nokogiriClassCache = new HashMap<String, RubyClass>();
|
42
42
|
nokogiriClassCache.put("Nokogiri::EncodingHandler", (RubyClass)ruby.getClassFromPath("Nokogiri::EncodingHandler"));
|
43
|
-
nokogiriClassCache.put("Nokogiri::
|
44
|
-
nokogiriClassCache.put("Nokogiri::
|
45
|
-
(RubyClass)ruby.getClassFromPath("Nokogiri::
|
43
|
+
nokogiriClassCache.put("Nokogiri::HTML4::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML4::Document"));
|
44
|
+
nokogiriClassCache.put("Nokogiri::HTML4::ElementDescription",
|
45
|
+
(RubyClass)ruby.getClassFromPath("Nokogiri::HTML4::ElementDescription"));
|
46
46
|
nokogiriClassCache.put("Nokogiri::XML::Attr", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Attr"));
|
47
47
|
nokogiriClassCache.put("Nokogiri::XML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Document"));
|
48
48
|
nokogiriClassCache.put("Nokogiri::XML::DocumentFragment",
|
@@ -81,7 +81,7 @@ public class NokogiriService implements BasicLibraryService
|
|
81
81
|
RubyModule nokogiri = ruby.defineModule("Nokogiri");
|
82
82
|
RubyModule xmlModule = nokogiri.defineModuleUnder("XML");
|
83
83
|
RubyModule xmlSaxModule = xmlModule.defineModuleUnder("SAX");
|
84
|
-
RubyModule htmlModule = nokogiri.defineModuleUnder("
|
84
|
+
RubyModule htmlModule = nokogiri.defineModuleUnder("HTML4");
|
85
85
|
RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX");
|
86
86
|
RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT");
|
87
87
|
|
@@ -201,11 +201,11 @@ public class NokogiriService implements BasicLibraryService
|
|
201
201
|
{
|
202
202
|
RubyClass htmlElemDesc = htmlModule.defineClassUnder("ElementDescription", ruby.getObject(),
|
203
203
|
HTML_ELEMENT_DESCRIPTION_ALLOCATOR);
|
204
|
-
htmlElemDesc.defineAnnotatedMethods(
|
204
|
+
htmlElemDesc.defineAnnotatedMethods(Html4ElementDescription.class);
|
205
205
|
|
206
206
|
RubyClass htmlEntityLookup = htmlModule.defineClassUnder("EntityLookup", ruby.getObject(),
|
207
207
|
HTML_ENTITY_LOOKUP_ALLOCATOR);
|
208
|
-
htmlEntityLookup.defineAnnotatedMethods(
|
208
|
+
htmlEntityLookup.defineAnnotatedMethods(Html4EntityLookup.class);
|
209
209
|
}
|
210
210
|
|
211
211
|
private void
|
@@ -216,7 +216,7 @@ public class NokogiriService implements BasicLibraryService
|
|
216
216
|
|
217
217
|
//RubyModule htmlDoc = html.defineOrGetClassUnder("Document", document);
|
218
218
|
RubyModule htmlDocument = htmlModule.defineClassUnder("Document", xmlDocument, HTML_DOCUMENT_ALLOCATOR);
|
219
|
-
htmlDocument.defineAnnotatedMethods(
|
219
|
+
htmlDocument.defineAnnotatedMethods(Html4Document.class);
|
220
220
|
}
|
221
221
|
|
222
222
|
private void
|
@@ -231,11 +231,11 @@ public class NokogiriService implements BasicLibraryService
|
|
231
231
|
|
232
232
|
RubyClass htmlSaxPushParser = htmlSaxModule.defineClassUnder("PushParser", ruby.getObject(),
|
233
233
|
HTML_SAXPUSHPARSER_ALLOCATOR);
|
234
|
-
htmlSaxPushParser.defineAnnotatedMethods(
|
234
|
+
htmlSaxPushParser.defineAnnotatedMethods(Html4SaxPushParser.class);
|
235
235
|
|
236
236
|
RubyClass htmlSaxParserContext = htmlSaxModule.defineClassUnder("ParserContext", xmlSaxParserContext,
|
237
237
|
HTML_SAXPARSER_CONTEXT_ALLOCATOR);
|
238
|
-
htmlSaxParserContext.defineAnnotatedMethods(
|
238
|
+
htmlSaxParserContext.defineAnnotatedMethods(Html4SaxParserContext.class);
|
239
239
|
}
|
240
240
|
|
241
241
|
private void
|
@@ -255,30 +255,30 @@ public class NokogiriService implements BasicLibraryService
|
|
255
255
|
|
256
256
|
public static final ObjectAllocator HTML_DOCUMENT_ALLOCATOR = new ObjectAllocator()
|
257
257
|
{
|
258
|
-
private
|
258
|
+
private Html4Document htmlDocument = null;
|
259
259
|
public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
|
260
|
-
if (htmlDocument == null) { htmlDocument = new
|
260
|
+
if (htmlDocument == null) { htmlDocument = new Html4Document(runtime, klazz); }
|
261
261
|
try {
|
262
|
-
|
262
|
+
Html4Document clone = (Html4Document) htmlDocument.clone();
|
263
263
|
clone.setMetaClass(klazz);
|
264
264
|
return clone;
|
265
265
|
} catch (CloneNotSupportedException e) {
|
266
|
-
return new
|
266
|
+
return new Html4Document(runtime, klazz);
|
267
267
|
}
|
268
268
|
}
|
269
269
|
};
|
270
270
|
|
271
271
|
private static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator()
|
272
272
|
{
|
273
|
-
private
|
273
|
+
private Html4SaxParserContext htmlSaxParserContext = null;
|
274
274
|
public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
|
275
|
-
if (htmlSaxParserContext == null) { htmlSaxParserContext = new
|
275
|
+
if (htmlSaxParserContext == null) { htmlSaxParserContext = new Html4SaxParserContext(runtime, klazz); }
|
276
276
|
try {
|
277
|
-
|
277
|
+
Html4SaxParserContext clone = (Html4SaxParserContext) htmlSaxParserContext.clone();
|
278
278
|
clone.setMetaClass(klazz);
|
279
279
|
return clone;
|
280
280
|
} catch (CloneNotSupportedException e) {
|
281
|
-
return new
|
281
|
+
return new Html4SaxParserContext(runtime, klazz);
|
282
282
|
}
|
283
283
|
}
|
284
284
|
};
|
@@ -287,7 +287,7 @@ public class NokogiriService implements BasicLibraryService
|
|
287
287
|
new ObjectAllocator()
|
288
288
|
{
|
289
289
|
public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
|
290
|
-
return new
|
290
|
+
return new Html4ElementDescription(runtime, klazz);
|
291
291
|
}
|
292
292
|
};
|
293
293
|
|
@@ -295,7 +295,7 @@ public class NokogiriService implements BasicLibraryService
|
|
295
295
|
new ObjectAllocator()
|
296
296
|
{
|
297
297
|
public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
|
298
|
-
return new
|
298
|
+
return new Html4EntityLookup(runtime, klazz);
|
299
299
|
}
|
300
300
|
};
|
301
301
|
|
@@ -571,7 +571,7 @@ public class NokogiriService implements BasicLibraryService
|
|
571
571
|
private static final ObjectAllocator HTML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator()
|
572
572
|
{
|
573
573
|
public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
|
574
|
-
return new
|
574
|
+
return new Html4SaxPushParser(runtime, klazz);
|
575
575
|
}
|
576
576
|
};
|
577
577
|
|
@@ -117,7 +117,7 @@ public class XmlAttr extends XmlNode
|
|
117
117
|
String attrName = ((Attr) node).getName();
|
118
118
|
if (attrName == null) { return context.nil; }
|
119
119
|
|
120
|
-
if (node.getNamespaceURI() != null && !(document(context.runtime) instanceof
|
120
|
+
if (node.getNamespaceURI() != null && !(document(context.runtime) instanceof Html4Document)) {
|
121
121
|
attrName = NokogiriHelpers.getLocalPart(attrName);
|
122
122
|
if (attrName == null) { return context.nil; }
|
123
123
|
}
|
@@ -137,7 +137,7 @@ public class XmlAttr extends XmlNode
|
|
137
137
|
isHtml(ThreadContext context)
|
138
138
|
{
|
139
139
|
return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.getRuntime(),
|
140
|
-
"Nokogiri::
|
140
|
+
"Nokogiri::HTML4::Document"));
|
141
141
|
}
|
142
142
|
|
143
143
|
@Override
|
@@ -280,8 +280,8 @@ public class XmlDocument extends XmlNode
|
|
280
280
|
XmlDocument xmlDocument;
|
281
281
|
try {
|
282
282
|
Document docNode = createNewDocument(runtime);
|
283
|
-
if ("Nokogiri::
|
284
|
-
xmlDocument = new
|
283
|
+
if ("Nokogiri::HTML4::Document".equals(((RubyClass)klazz).getName())) {
|
284
|
+
xmlDocument = new Html4Document(context.runtime, (RubyClass) klazz, docNode);
|
285
285
|
} else {
|
286
286
|
xmlDocument = new XmlDocument(context.runtime, (RubyClass) klazz, docNode);
|
287
287
|
}
|
@@ -443,7 +443,7 @@ public class XmlDocument extends XmlNode
|
|
443
443
|
return new_root;
|
444
444
|
}
|
445
445
|
if (!(new_root instanceof XmlNode)) {
|
446
|
-
|
446
|
+
throw context.runtime.newArgumentError("expected Nokogiri::XML::Node but received " + new_root.getType());
|
447
447
|
}
|
448
448
|
XmlNode newRoot = asXmlNode(context, new_root);
|
449
449
|
|
@@ -657,17 +657,17 @@ public class XmlDocument extends XmlNode
|
|
657
657
|
}
|
658
658
|
String algorithmURI = null;
|
659
659
|
switch (mode) {
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
660
|
+
case 0: // XML_C14N_1_0
|
661
|
+
if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS; }
|
662
|
+
else { algorithmURI = Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS; }
|
663
|
+
break;
|
664
|
+
case 1: // XML_C14N_EXCLUSIVE_1_0
|
665
|
+
if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS; }
|
666
|
+
else { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS; }
|
667
|
+
break;
|
668
|
+
case 2: // XML_C14N_1_1 = 2
|
669
|
+
if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS; }
|
670
|
+
else { algorithmURI = Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS; }
|
671
671
|
}
|
672
672
|
try {
|
673
673
|
Canonicalizer canonicalizer = Canonicalizer.getInstance(algorithmURI);
|
@@ -142,11 +142,11 @@ public class XmlElementContent extends RubyObject
|
|
142
142
|
right = runtime.getNil();
|
143
143
|
|
144
144
|
switch (type) {
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
145
|
+
case SEQ:
|
146
|
+
case OR:
|
147
|
+
applyGroup(runtime, klass, doc, iter);
|
148
|
+
default:
|
149
|
+
// noop
|
150
150
|
}
|
151
151
|
}
|
152
152
|
|
@@ -645,7 +645,7 @@ public class XmlNode extends RubyObject
|
|
645
645
|
|
646
646
|
final XmlDocument doc = document(context.runtime);
|
647
647
|
for (int i = 0; i < nodeMap.getLength(); i++) {
|
648
|
-
if ((doc instanceof
|
648
|
+
if ((doc instanceof Html4Document) || !NokogiriHelpers.isNamespace(nodeMap.item(i))) {
|
649
649
|
attr.append(getCachedNodeOrCreate(runtime, nodeMap.item(i)));
|
650
650
|
}
|
651
651
|
}
|
@@ -811,8 +811,8 @@ public class XmlNode extends RubyObject
|
|
811
811
|
XmlDocument document = document(runtime);
|
812
812
|
if (document == null) { return context.nil; }
|
813
813
|
|
814
|
-
if (document instanceof
|
815
|
-
klass = getNokogiriClass(runtime, "Nokogiri::
|
814
|
+
if (document instanceof Html4Document) {
|
815
|
+
klass = getNokogiriClass(runtime, "Nokogiri::HTML4::Document");
|
816
816
|
ctx = new HtmlDomParserContext(runtime, options);
|
817
817
|
((HtmlDomParserContext) ctx).enableDocumentFragment();
|
818
818
|
ctx.setStringInputSource(context, str, context.nil);
|
@@ -824,7 +824,7 @@ public class XmlNode extends RubyObject
|
|
824
824
|
|
825
825
|
// TODO: for some reason, document.getEncoding() can be null or nil (don't know why)
|
826
826
|
// run `test_parse_with_unparented_html_text_context_node' few times to see this happen
|
827
|
-
if (document instanceof
|
827
|
+
if (document instanceof Html4Document && !(document.getEncoding() == null || document.getEncoding().isNil())) {
|
828
828
|
HtmlDomParserContext htmlCtx = (HtmlDomParserContext) ctx;
|
829
829
|
htmlCtx.setEncoding(document.getEncoding().asJavaString());
|
830
830
|
}
|
@@ -1148,7 +1148,7 @@ public class XmlNode extends RubyObject
|
|
1148
1148
|
namespace(ThreadContext context)
|
1149
1149
|
{
|
1150
1150
|
final XmlDocument doc = document(context.runtime);
|
1151
|
-
if (doc instanceof
|
1151
|
+
if (doc instanceof Html4Document) { return context.nil; }
|
1152
1152
|
|
1153
1153
|
String namespaceURI = node.getNamespaceURI();
|
1154
1154
|
if (namespaceURI == null || namespaceURI.isEmpty()) {
|
@@ -1183,7 +1183,7 @@ public class XmlNode extends RubyObject
|
|
1183
1183
|
// updated.
|
1184
1184
|
final XmlDocument doc = document(context.runtime);
|
1185
1185
|
if (doc == null) { return context.runtime.newEmptyArray(); }
|
1186
|
-
if (doc instanceof
|
1186
|
+
if (doc instanceof Html4Document) { return context.runtime.newEmptyArray(); }
|
1187
1187
|
|
1188
1188
|
List<XmlNamespace> namespaces = doc.getNamespaceCache().get(node);
|
1189
1189
|
return context.runtime.newArray((List) namespaces);
|
@@ -1199,7 +1199,7 @@ public class XmlNode extends RubyObject
|
|
1199
1199
|
{
|
1200
1200
|
final XmlDocument doc = document(context.runtime);
|
1201
1201
|
if (doc == null) { return context.runtime.newEmptyArray(); }
|
1202
|
-
if (doc instanceof
|
1202
|
+
if (doc instanceof Html4Document) { return context.runtime.newEmptyArray(); }
|
1203
1203
|
|
1204
1204
|
Node previousNode;
|
1205
1205
|
if (node.getNodeType() == Node.ELEMENT_NODE) {
|
@@ -1335,7 +1335,7 @@ public class XmlNode extends RubyObject
|
|
1335
1335
|
private boolean
|
1336
1336
|
isHtmlDoc(ThreadContext context)
|
1337
1337
|
{
|
1338
|
-
return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.runtime, "Nokogiri::
|
1338
|
+
return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.runtime, "Nokogiri::HTML4::Document"));
|
1339
1339
|
}
|
1340
1340
|
|
1341
1341
|
private boolean
|
@@ -1516,56 +1516,56 @@ public class XmlNode extends RubyObject
|
|
1516
1516
|
{
|
1517
1517
|
String type;
|
1518
1518
|
switch (node.getNodeType()) {
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1559
|
-
|
1560
|
-
|
1561
|
-
|
1562
|
-
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1519
|
+
case Node.ELEMENT_NODE:
|
1520
|
+
if (this instanceof XmlElementDecl) {
|
1521
|
+
type = "ELEMENT_DECL";
|
1522
|
+
} else if (this instanceof XmlAttributeDecl) {
|
1523
|
+
type = "ATTRIBUTE_DECL";
|
1524
|
+
} else if (this instanceof XmlEntityDecl) {
|
1525
|
+
type = "ENTITY_DECL";
|
1526
|
+
} else {
|
1527
|
+
type = "ELEMENT_NODE";
|
1528
|
+
}
|
1529
|
+
break;
|
1530
|
+
case Node.ATTRIBUTE_NODE:
|
1531
|
+
type = "ATTRIBUTE_NODE";
|
1532
|
+
break;
|
1533
|
+
case Node.TEXT_NODE:
|
1534
|
+
type = "TEXT_NODE";
|
1535
|
+
break;
|
1536
|
+
case Node.CDATA_SECTION_NODE:
|
1537
|
+
type = "CDATA_SECTION_NODE";
|
1538
|
+
break;
|
1539
|
+
case Node.ENTITY_REFERENCE_NODE:
|
1540
|
+
type = "ENTITY_REF_NODE";
|
1541
|
+
break;
|
1542
|
+
case Node.ENTITY_NODE:
|
1543
|
+
type = "ENTITY_NODE";
|
1544
|
+
break;
|
1545
|
+
case Node.PROCESSING_INSTRUCTION_NODE:
|
1546
|
+
type = "PI_NODE";
|
1547
|
+
break;
|
1548
|
+
case Node.COMMENT_NODE:
|
1549
|
+
type = "COMMENT_NODE";
|
1550
|
+
break;
|
1551
|
+
case Node.DOCUMENT_NODE:
|
1552
|
+
if (this instanceof Html4Document) {
|
1553
|
+
type = "HTML_DOCUMENT_NODE";
|
1554
|
+
} else {
|
1555
|
+
type = "DOCUMENT_NODE";
|
1556
|
+
}
|
1557
|
+
break;
|
1558
|
+
case Node.DOCUMENT_TYPE_NODE:
|
1559
|
+
type = "DOCUMENT_TYPE_NODE";
|
1560
|
+
break;
|
1561
|
+
case Node.DOCUMENT_FRAGMENT_NODE:
|
1562
|
+
type = "DOCUMENT_FRAG_NODE";
|
1563
|
+
break;
|
1564
|
+
case Node.NOTATION_NODE:
|
1565
|
+
type = "NOTATION_NODE";
|
1566
|
+
break;
|
1567
|
+
default:
|
1568
|
+
return context.runtime.newFixnum(0);
|
1569
1569
|
}
|
1570
1570
|
|
1571
1571
|
return getNokogiriClass(context.runtime, "Nokogiri::XML::Node").getConstant(type);
|
@@ -1676,23 +1676,23 @@ public class XmlNode extends RubyObject
|
|
1676
1676
|
Node parent = thisNode.getParentNode();
|
1677
1677
|
|
1678
1678
|
switch (scheme) {
|
1679
|
-
|
1680
|
-
|
1681
|
-
|
1679
|
+
case CHILD:
|
1680
|
+
Node[] children = adoptAsChild(thisNode, otherNode);
|
1681
|
+
if (children.length == 1 && otherNode == children[0]) {
|
1682
|
+
break;
|
1683
|
+
} else {
|
1684
|
+
nodeOrTags = nodeArrayToRubyArray(context.runtime, children);
|
1685
|
+
}
|
1686
|
+
break;
|
1687
|
+
case PREV_SIBLING:
|
1688
|
+
adoptAsPrevSibling(context, parent, thisNode, otherNode);
|
1689
|
+
break;
|
1690
|
+
case NEXT_SIBLING:
|
1691
|
+
adoptAsNextSibling(context, parent, thisNode, otherNode);
|
1692
|
+
break;
|
1693
|
+
case REPLACEMENT:
|
1694
|
+
adoptAsReplacement(context, parent, thisNode, otherNode);
|
1682
1695
|
break;
|
1683
|
-
} else {
|
1684
|
-
nodeOrTags = nodeArrayToRubyArray(context.runtime, children);
|
1685
|
-
}
|
1686
|
-
break;
|
1687
|
-
case PREV_SIBLING:
|
1688
|
-
adoptAsPrevSibling(context, parent, thisNode, otherNode);
|
1689
|
-
break;
|
1690
|
-
case NEXT_SIBLING:
|
1691
|
-
adoptAsNextSibling(context, parent, thisNode, otherNode);
|
1692
|
-
break;
|
1693
|
-
case REPLACEMENT:
|
1694
|
-
adoptAsReplacement(context, parent, thisNode, otherNode);
|
1695
|
-
break;
|
1696
1696
|
}
|
1697
1697
|
} catch (Exception e) {
|
1698
1698
|
throw context.runtime.newRuntimeError(e.toString());
|