nokogiri 1.11.0.rc3-java → 1.11.4-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/LICENSE.md +1 -1
- data/README.md +168 -91
- data/dependencies.yml +12 -12
- data/ext/java/nokogiri/EncodingHandler.java +76 -89
- data/ext/java/nokogiri/HtmlDocument.java +135 -144
- data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
- data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
- data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
- data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
- data/ext/java/nokogiri/NokogiriService.java +595 -556
- data/ext/java/nokogiri/XmlAttr.java +118 -126
- data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
- data/ext/java/nokogiri/XmlCdata.java +35 -58
- data/ext/java/nokogiri/XmlComment.java +46 -67
- data/ext/java/nokogiri/XmlDocument.java +645 -572
- data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
- data/ext/java/nokogiri/XmlDtd.java +448 -414
- data/ext/java/nokogiri/XmlElement.java +23 -48
- data/ext/java/nokogiri/XmlElementContent.java +343 -316
- data/ext/java/nokogiri/XmlElementDecl.java +124 -125
- data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
- data/ext/java/nokogiri/XmlEntityReference.java +49 -72
- data/ext/java/nokogiri/XmlNamespace.java +175 -175
- data/ext/java/nokogiri/XmlNode.java +1843 -1622
- data/ext/java/nokogiri/XmlNodeSet.java +361 -331
- data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
- data/ext/java/nokogiri/XmlReader.java +513 -450
- data/ext/java/nokogiri/XmlRelaxng.java +89 -101
- data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
- data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
- data/ext/java/nokogiri/XmlSchema.java +335 -242
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
- data/ext/java/nokogiri/XmlText.java +55 -76
- data/ext/java/nokogiri/XmlXpathContext.java +242 -210
- data/ext/java/nokogiri/XsltStylesheet.java +280 -269
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
- data/ext/java/nokogiri/internals/ParserContext.java +206 -211
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
- data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
- data/ext/nokogiri/depend +37 -358
- data/ext/nokogiri/extconf.rb +581 -374
- data/ext/nokogiri/html_document.c +78 -82
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +69 -66
- data/ext/nokogiri/html_sax_push_parser.c +42 -34
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +192 -93
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +246 -188
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +371 -320
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +118 -118
- data/ext/nokogiri/xml_sax_parser_context.c +103 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +95 -47
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +206 -123
- data/ext/nokogiri/xslt_stylesheet.c +158 -161
- data/lib/nokogiri.rb +3 -7
- data/lib/nokogiri/css/parser.rb +3 -3
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +70 -42
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document.rb +12 -26
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +2 -149
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +205 -0
- data/lib/nokogiri/xml/document.rb +91 -35
- data/lib/nokogiri/xml/document_fragment.rb +4 -6
- data/lib/nokogiri/xml/node.rb +89 -69
- data/lib/nokogiri/xml/parse_options.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/relax_ng.rb +6 -2
- data/lib/nokogiri/xml/schema.rb +12 -4
- data/lib/nokogiri/xml/searchable.rb +3 -1
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +86 -177
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -134
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,35 +1,3 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2012:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
1
|
package nokogiri;
|
34
2
|
|
35
3
|
import org.jruby.Ruby;
|
@@ -55,124 +23,147 @@ import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
|
55
23
|
* @author sergio
|
56
24
|
* @author Yoko Harada <yokolet@gmail.com>
|
57
25
|
*/
|
58
|
-
@JRubyClass(name="Nokogiri::HTML::Document", parent="Nokogiri::XML::Document")
|
59
|
-
public class HtmlDocument extends XmlDocument
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
26
|
+
@JRubyClass(name = "Nokogiri::HTML::Document", parent = "Nokogiri::XML::Document")
|
27
|
+
public class HtmlDocument extends XmlDocument
|
28
|
+
{
|
29
|
+
private static final String DEFAULT_CONTENT_TYPE = "html";
|
30
|
+
private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
|
31
|
+
private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";
|
32
|
+
|
33
|
+
private String parsed_encoding = null;
|
34
|
+
|
35
|
+
public
|
36
|
+
HtmlDocument(Ruby ruby, RubyClass klazz)
|
37
|
+
{
|
38
|
+
super(ruby, klazz);
|
39
|
+
}
|
40
|
+
|
41
|
+
public
|
42
|
+
HtmlDocument(Ruby runtime, Document document)
|
43
|
+
{
|
44
|
+
this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document);
|
45
|
+
}
|
46
|
+
|
47
|
+
public
|
48
|
+
HtmlDocument(Ruby ruby, RubyClass klazz, Document doc)
|
49
|
+
{
|
50
|
+
super(ruby, klazz, doc);
|
51
|
+
}
|
52
|
+
|
53
|
+
@JRubyMethod(name = "new", meta = true, rest = true, required = 0)
|
54
|
+
public static IRubyObject
|
55
|
+
rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args)
|
56
|
+
{
|
57
|
+
final Ruby runtime = context.runtime;
|
58
|
+
HtmlDocument htmlDocument;
|
59
|
+
try {
|
60
|
+
Document docNode = createNewDocument(runtime);
|
61
|
+
htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz);
|
62
|
+
htmlDocument.setDocumentNode(context.runtime, docNode);
|
63
|
+
} catch (Exception ex) {
|
64
|
+
throw asRuntimeError(runtime, "couldn't create document: ", ex);
|
68
65
|
}
|
69
66
|
|
70
|
-
|
71
|
-
|
67
|
+
Helpers.invoke(context, htmlDocument, "initialize", args);
|
68
|
+
|
69
|
+
return htmlDocument;
|
70
|
+
}
|
71
|
+
|
72
|
+
public IRubyObject
|
73
|
+
getInternalSubset(ThreadContext context)
|
74
|
+
{
|
75
|
+
IRubyObject internalSubset = super.getInternalSubset(context);
|
76
|
+
|
77
|
+
// html documents are expected to have a default internal subset
|
78
|
+
// the default values are the same ones used when the following
|
79
|
+
// feature is turned on
|
80
|
+
// "http://cyberneko.org/html/features/insert-doctype"
|
81
|
+
// the reason we don't turn it on, is because it overrides the document's
|
82
|
+
// declared doctype declaration.
|
83
|
+
|
84
|
+
if (internalSubset.isNil()) {
|
85
|
+
internalSubset = XmlDtd.newEmpty(context.getRuntime(),
|
86
|
+
getDocument(),
|
87
|
+
context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
|
88
|
+
context.getRuntime().newString(DEFAULT_PUBLIC_ID),
|
89
|
+
context.getRuntime().newString(DEFAULT_SYTEM_ID));
|
90
|
+
setInternalSubset(internalSubset);
|
72
91
|
}
|
73
92
|
|
74
|
-
|
75
|
-
|
93
|
+
return internalSubset;
|
94
|
+
}
|
95
|
+
|
96
|
+
@Override
|
97
|
+
void
|
98
|
+
init(Ruby runtime, Document document)
|
99
|
+
{
|
100
|
+
stabilizeTextContent(document);
|
101
|
+
document.normalize();
|
102
|
+
setInstanceVariable("@decorators", runtime.getNil());
|
103
|
+
if (document.getDocumentElement() != null) {
|
104
|
+
stabilizeAttrs(document.getDocumentElement());
|
76
105
|
}
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
106
|
+
}
|
107
|
+
|
108
|
+
private static void
|
109
|
+
stabilizeAttrs(Node node)
|
110
|
+
{
|
111
|
+
if (node.hasAttributes()) {
|
112
|
+
NamedNodeMap nodeMap = node.getAttributes();
|
113
|
+
for (int i = 0; i < nodeMap.getLength(); i++) {
|
114
|
+
Node n = nodeMap.item(i);
|
115
|
+
if (n instanceof Attr) {
|
116
|
+
stabilizeAttr((Attr) n);
|
88
117
|
}
|
89
|
-
|
90
|
-
Helpers.invoke(context, htmlDocument, "initialize", args);
|
91
|
-
|
92
|
-
return htmlDocument;
|
118
|
+
}
|
93
119
|
}
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
// html documents are expected to have a default internal subset
|
99
|
-
// the default values are the same ones used when the following
|
100
|
-
// feature is turned on
|
101
|
-
// "http://cyberneko.org/html/features/insert-doctype"
|
102
|
-
// the reason we don't turn it on, is because it overrides the document's
|
103
|
-
// declared doctype declaration.
|
104
|
-
|
105
|
-
if (internalSubset.isNil()) {
|
106
|
-
internalSubset = XmlDtd.newEmpty(context.getRuntime(),
|
107
|
-
getDocument(),
|
108
|
-
context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
|
109
|
-
context.getRuntime().newString(DEFAULT_PUBLIC_ID),
|
110
|
-
context.getRuntime().newString(DEFAULT_SYTEM_ID));
|
111
|
-
setInternalSubset(internalSubset);
|
112
|
-
}
|
113
|
-
|
114
|
-
return internalSubset;
|
115
|
-
}
|
116
|
-
|
117
|
-
@Override
|
118
|
-
void init(Ruby runtime, Document document) {
|
119
|
-
stabilizeTextContent(document);
|
120
|
-
document.normalize();
|
121
|
-
setInstanceVariable("@decorators", runtime.getNil());
|
122
|
-
if (document.getDocumentElement() != null) {
|
123
|
-
stabilizeAttrs(document.getDocumentElement());
|
124
|
-
}
|
125
|
-
}
|
126
|
-
|
127
|
-
private static void stabilizeAttrs(Node node) {
|
128
|
-
if (node.hasAttributes()) {
|
129
|
-
NamedNodeMap nodeMap = node.getAttributes();
|
130
|
-
for (int i=0; i<nodeMap.getLength(); i++) {
|
131
|
-
Node n = nodeMap.item(i);
|
132
|
-
if (n instanceof Attr) {
|
133
|
-
stabilizeAttr((Attr) n);
|
134
|
-
}
|
135
|
-
}
|
136
|
-
}
|
137
|
-
NodeList children = node.getChildNodes();
|
138
|
-
for (int i=0; i<children.getLength(); i++) {
|
139
|
-
stabilizeAttrs(children.item(i));
|
140
|
-
}
|
141
|
-
}
|
142
|
-
|
143
|
-
public void setParsedEncoding(String encoding) {
|
144
|
-
parsed_encoding = encoding;
|
145
|
-
}
|
146
|
-
|
147
|
-
public String getPraedEncoding() {
|
148
|
-
return parsed_encoding;
|
149
|
-
}
|
150
|
-
|
151
|
-
/*
|
152
|
-
* call-seq:
|
153
|
-
* read_io(io, url, encoding, options)
|
154
|
-
*
|
155
|
-
* Read the HTML document from +io+ with given +url+, +encoding+,
|
156
|
-
* and +options+. See Nokogiri::HTML.parse
|
157
|
-
*/
|
158
|
-
@JRubyMethod(meta = true, required = 4)
|
159
|
-
public static IRubyObject read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
|
160
|
-
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
|
161
|
-
ctx.setIOInputSource(context, args[0], args[1]);
|
162
|
-
return ctx.parse(context, (RubyClass) klass, args[1]);
|
163
|
-
}
|
164
|
-
|
165
|
-
/*
|
166
|
-
* call-seq:
|
167
|
-
* read_memory(string, url, encoding, options)
|
168
|
-
*
|
169
|
-
* Read the HTML document contained in +string+ with given +url+, +encoding+,
|
170
|
-
* and +options+. See Nokogiri::HTML.parse
|
171
|
-
*/
|
172
|
-
@JRubyMethod(meta = true, required = 4)
|
173
|
-
public static IRubyObject read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args) {
|
174
|
-
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
|
175
|
-
ctx.setStringInputSource(context, args[0], args[1]);
|
176
|
-
return ctx.parse(context, (RubyClass) klass, args[1]);
|
120
|
+
NodeList children = node.getChildNodes();
|
121
|
+
for (int i = 0; i < children.getLength(); i++) {
|
122
|
+
stabilizeAttrs(children.item(i));
|
177
123
|
}
|
124
|
+
}
|
125
|
+
|
126
|
+
public void
|
127
|
+
setParsedEncoding(String encoding)
|
128
|
+
{
|
129
|
+
parsed_encoding = encoding;
|
130
|
+
}
|
131
|
+
|
132
|
+
public String
|
133
|
+
getPraedEncoding()
|
134
|
+
{
|
135
|
+
return parsed_encoding;
|
136
|
+
}
|
137
|
+
|
138
|
+
/*
|
139
|
+
* call-seq:
|
140
|
+
* read_io(io, url, encoding, options)
|
141
|
+
*
|
142
|
+
* Read the HTML document from +io+ with given +url+, +encoding+,
|
143
|
+
* and +options+. See Nokogiri::HTML.parse
|
144
|
+
*/
|
145
|
+
@JRubyMethod(meta = true, required = 4)
|
146
|
+
public static IRubyObject
|
147
|
+
read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args)
|
148
|
+
{
|
149
|
+
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
|
150
|
+
ctx.setIOInputSource(context, args[0], args[1]);
|
151
|
+
return ctx.parse(context, (RubyClass) klass, args[1]);
|
152
|
+
}
|
153
|
+
|
154
|
+
/*
|
155
|
+
* call-seq:
|
156
|
+
* read_memory(string, url, encoding, options)
|
157
|
+
*
|
158
|
+
* Read the HTML document contained in +string+ with given +url+, +encoding+,
|
159
|
+
* and +options+. See Nokogiri::HTML.parse
|
160
|
+
*/
|
161
|
+
@JRubyMethod(meta = true, required = 4)
|
162
|
+
public static IRubyObject
|
163
|
+
read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args)
|
164
|
+
{
|
165
|
+
HtmlDomParserContext ctx = new HtmlDomParserContext(context.runtime, args[2], args[3]);
|
166
|
+
ctx.setStringInputSource(context, args[0], args[1]);
|
167
|
+
return ctx.parse(context, (RubyClass) klass, args[1]);
|
168
|
+
}
|
178
169
|
}
|
@@ -1,35 +1,3 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2012:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
1
|
package nokogiri;
|
34
2
|
|
35
3
|
import java.util.ArrayList;
|
@@ -49,100 +17,117 @@ import org.jruby.runtime.builtin.IRubyObject;
|
|
49
17
|
|
50
18
|
/**
|
51
19
|
* Class for Nokogiri::HTML::ElementDescription.
|
52
|
-
*
|
20
|
+
*
|
53
21
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
54
22
|
*/
|
55
|
-
@JRubyClass(name="Nokogiri::HTML::ElementDescription")
|
56
|
-
public class HtmlElementDescription extends RubyObject
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
23
|
+
@JRubyClass(name = "Nokogiri::HTML::ElementDescription")
|
24
|
+
public class HtmlElementDescription extends RubyObject
|
25
|
+
{
|
26
|
+
|
27
|
+
/**
|
28
|
+
* Stores memoized hash of element -> list of valid subelements.
|
29
|
+
*/
|
30
|
+
static protected Map<Short, List<String>> subElements;
|
31
|
+
static
|
32
|
+
{
|
33
|
+
Map<Short, List<String>> _subElements =
|
34
|
+
new HashMap<Short, List<String>>();
|
35
|
+
subElements = Collections.synchronizedMap(_subElements);
|
36
|
+
}
|
37
|
+
|
38
|
+
protected HTMLElements.Element element;
|
39
|
+
|
40
|
+
public
|
41
|
+
HtmlElementDescription(Ruby runtime, RubyClass rubyClass)
|
42
|
+
{
|
43
|
+
super(runtime, rubyClass);
|
44
|
+
}
|
45
|
+
|
46
|
+
/**
|
47
|
+
* Lookup the list of sub elements of <code>code</code>. If not
|
48
|
+
* already stored, iterate through all elements to find valid
|
49
|
+
* subelements; save this list and return it.
|
50
|
+
*/
|
51
|
+
protected static List<String>
|
52
|
+
findSubElements(HTMLElements.Element elem)
|
53
|
+
{
|
54
|
+
List<String> subs = subElements.get(elem.code);
|
55
|
+
|
56
|
+
if (subs == null) {
|
57
|
+
subs = new ArrayList<String>();
|
58
|
+
|
59
|
+
/*
|
60
|
+
* A bit of a hack. NekoHtml source code shows that
|
61
|
+
* UNKNOWN is the highest value element. We cannot access
|
62
|
+
* the list of elements directly because it's protected.
|
63
|
+
*/
|
64
|
+
for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
|
65
|
+
HTMLElements.Element maybe_sub =
|
66
|
+
HTMLElements.getElement(c);
|
67
|
+
if (maybe_sub.isParent(elem)) {
|
68
|
+
subs.add(maybe_sub.name);
|
99
69
|
}
|
70
|
+
}
|
100
71
|
|
101
|
-
|
72
|
+
subElements.put(elem.code, subs);
|
102
73
|
}
|
103
74
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
75
|
+
return subs;
|
76
|
+
}
|
77
|
+
|
78
|
+
@JRubyMethod(name = "[]", meta = true)
|
79
|
+
public static IRubyObject
|
80
|
+
get(ThreadContext context,
|
81
|
+
IRubyObject klazz, IRubyObject name)
|
82
|
+
{
|
83
|
+
|
84
|
+
// nekohtml will return an element even for invalid names, see
|
85
|
+
// http://sourceforge.net/p/nekohtml/code/HEAD/tree/trunk/src/org/cyberneko/html/HTMLElements.java#l514
|
86
|
+
// which breaks `test_fetch_nonexistent'
|
87
|
+
HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT);
|
88
|
+
if (elem == HTMLElements.NO_SUCH_ELEMENT) {
|
89
|
+
return context.nil;
|
119
90
|
}
|
120
91
|
|
121
|
-
|
122
|
-
|
123
|
-
|
92
|
+
HtmlElementDescription desc =
|
93
|
+
new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
|
94
|
+
desc.element = elem;
|
95
|
+
return desc;
|
96
|
+
}
|
97
|
+
|
98
|
+
@JRubyMethod()
|
99
|
+
public IRubyObject
|
100
|
+
name(ThreadContext context)
|
101
|
+
{
|
102
|
+
return context.getRuntime().newString(element.name.toLowerCase());
|
103
|
+
}
|
104
|
+
|
105
|
+
@JRubyMethod(name = "inline?")
|
106
|
+
public IRubyObject
|
107
|
+
inline_eh(ThreadContext context)
|
108
|
+
{
|
109
|
+
return context.getRuntime().newBoolean(element.isInline());
|
110
|
+
}
|
111
|
+
|
112
|
+
@JRubyMethod(name = "empty?")
|
113
|
+
public IRubyObject
|
114
|
+
empty_eh(ThreadContext context)
|
115
|
+
{
|
116
|
+
return context.getRuntime().newBoolean(element.isEmpty());
|
117
|
+
}
|
118
|
+
|
119
|
+
@JRubyMethod()
|
120
|
+
public IRubyObject
|
121
|
+
sub_elements(ThreadContext context)
|
122
|
+
{
|
123
|
+
Ruby ruby = context.getRuntime();
|
124
|
+
List<String> subs = findSubElements(element);
|
125
|
+
IRubyObject[] ary = new IRubyObject[subs.size()];
|
126
|
+
for (int i = 0; i < subs.size(); ++i) {
|
127
|
+
ary[i] = ruby.newString(subs.get(i));
|
124
128
|
}
|
125
129
|
|
126
|
-
|
127
|
-
|
128
|
-
return context.getRuntime().newBoolean(element.isInline());
|
129
|
-
}
|
130
|
-
|
131
|
-
@JRubyMethod(name="empty?")
|
132
|
-
public IRubyObject empty_eh(ThreadContext context) {
|
133
|
-
return context.getRuntime().newBoolean(element.isEmpty());
|
134
|
-
}
|
135
|
-
|
136
|
-
@JRubyMethod()
|
137
|
-
public IRubyObject sub_elements(ThreadContext context) {
|
138
|
-
Ruby ruby = context.getRuntime();
|
139
|
-
List<String> subs = findSubElements(element);
|
140
|
-
IRubyObject[] ary = new IRubyObject[subs.size()];
|
141
|
-
for (int i = 0; i < subs.size(); ++i) {
|
142
|
-
ary[i] = ruby.newString(subs.get(i));
|
143
|
-
}
|
144
|
-
|
145
|
-
return ruby.newArray(ary);
|
146
|
-
}
|
130
|
+
return ruby.newArray(ary);
|
131
|
+
}
|
147
132
|
|
148
133
|
}
|