nokogiri-maven 1.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.ja.rdoc +544 -0
- data/CHANGELOG.rdoc +532 -0
- data/Manifest.txt +283 -0
- data/README.ja.rdoc +106 -0
- data/README.rdoc +174 -0
- data/Rakefile +164 -0
- data/bin/nokogiri +53 -0
- data/ext/java/nokogiri/EncodingHandler.java +124 -0
- data/ext/java/nokogiri/HtmlDocument.java +119 -0
- data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
- data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
- data/ext/java/nokogiri/HtmlSaxParserContext.java +259 -0
- data/ext/java/nokogiri/NokogiriService.java +590 -0
- data/ext/java/nokogiri/XmlAttr.java +180 -0
- data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
- data/ext/java/nokogiri/XmlCdata.java +84 -0
- data/ext/java/nokogiri/XmlComment.java +86 -0
- data/ext/java/nokogiri/XmlDocument.java +519 -0
- data/ext/java/nokogiri/XmlDocumentFragment.java +223 -0
- data/ext/java/nokogiri/XmlDtd.java +469 -0
- data/ext/java/nokogiri/XmlElement.java +195 -0
- data/ext/java/nokogiri/XmlElementContent.java +382 -0
- data/ext/java/nokogiri/XmlElementDecl.java +152 -0
- data/ext/java/nokogiri/XmlEntityDecl.java +162 -0
- data/ext/java/nokogiri/XmlEntityReference.java +97 -0
- data/ext/java/nokogiri/XmlNamespace.java +183 -0
- data/ext/java/nokogiri/XmlNode.java +1378 -0
- data/ext/java/nokogiri/XmlNodeSet.java +267 -0
- data/ext/java/nokogiri/XmlProcessingInstruction.java +99 -0
- data/ext/java/nokogiri/XmlReader.java +408 -0
- data/ext/java/nokogiri/XmlRelaxng.java +144 -0
- data/ext/java/nokogiri/XmlSaxParserContext.java +367 -0
- data/ext/java/nokogiri/XmlSaxPushParser.java +184 -0
- data/ext/java/nokogiri/XmlSchema.java +324 -0
- data/ext/java/nokogiri/XmlSyntaxError.java +119 -0
- data/ext/java/nokogiri/XmlText.java +119 -0
- data/ext/java/nokogiri/XmlXpathContext.java +199 -0
- data/ext/java/nokogiri/XsltStylesheet.java +197 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +204 -0
- data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +86 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +639 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +167 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +130 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +74 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +79 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +141 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +73 -0
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +67 -0
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +86 -0
- data/ext/java/nokogiri/internals/ParserContext.java +276 -0
- data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
- data/ext/java/nokogiri/internals/ReaderNode.java +531 -0
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +567 -0
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +76 -0
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
- data/ext/java/nokogiri/internals/XmlDomParser.java +76 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +244 -0
- data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
- data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +124 -0
- data/ext/nokogiri/html_document.c +154 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +115 -0
- data/ext/nokogiri/nokogiri.h +160 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +56 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +54 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +478 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +56 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1385 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +418 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +684 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +293 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +199 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +58 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +50 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +309 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +264 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +127 -0
- data/lib/nokogiri/css.rb +27 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +677 -0
- data/lib/nokogiri/css/parser.y +237 -0
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +152 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +171 -0
- data/lib/nokogiri/decorators/slop.rb +35 -0
- data/lib/nokogiri/html.rb +36 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +213 -0
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +52 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +88 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +425 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +234 -0
- data/lib/nokogiri/xml/document_fragment.rb +98 -0
- data/lib/nokogiri/xml/dtd.rb +22 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +915 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node_set.rb +357 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +93 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +112 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +164 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +47 -0
- data/lib/nokogiri/xml/text.rb +9 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +52 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +90 -0
- data/nokogiri_help_responses.md +40 -0
- data/tasks/cross_compile.rb +152 -0
- data/tasks/nokogiri.org.rb +18 -0
- data/tasks/test.rb +94 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +303 -0
- data/test/css/test_tokenizer.rb +198 -0
- data/test/css/test_xpath_visitor.rb +85 -0
- data/test/decorators/test_slop.rb +16 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +173 -0
- data/test/html/sax/test_parser.rb +136 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +472 -0
- data/test/html/test_document_encoding.rb +138 -0
- data/test/html/test_document_fragment.rb +255 -0
- data/test/html/test_element_description.rb +100 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +190 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +72 -0
- data/test/test_nokogiri.rb +132 -0
- data/test/test_reader.rb +425 -0
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +193 -0
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +338 -0
- data/test/xml/sax/test_parser_context.rb +113 -0
- data/test/xml/sax/test_push_parser.rb +156 -0
- data/test/xml/test_attr.rb +65 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +227 -0
- data/test/xml/test_cdata.rb +50 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +697 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +192 -0
- data/test/xml/test_dtd.rb +107 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +70 -0
- data/test/xml/test_node.rb +917 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_reparenting.rb +334 -0
- data/test/xml/test_node_set.rb +742 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +94 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +47 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +237 -0
- data/test/xslt/test_custom_functions.rb +94 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- metadata +552 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* (The MIT License)
|
|
3
|
+
*
|
|
4
|
+
* Copyright (c) 2008 - 2011:
|
|
5
|
+
*
|
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
|
12
|
+
*
|
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
|
14
|
+
* a copy of this software and associated documentation files (the
|
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
|
19
|
+
* the following conditions:
|
|
20
|
+
*
|
|
21
|
+
* The above copyright notice and this permission notice shall be
|
|
22
|
+
* included in all copies or substantial portions of the Software.
|
|
23
|
+
*
|
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
package nokogiri;
|
|
34
|
+
|
|
35
|
+
import java.util.ArrayList;
|
|
36
|
+
import java.util.Collections;
|
|
37
|
+
import java.util.HashMap;
|
|
38
|
+
import java.util.List;
|
|
39
|
+
import java.util.Map;
|
|
40
|
+
|
|
41
|
+
import org.cyberneko.html.HTMLElements;
|
|
42
|
+
import org.jruby.Ruby;
|
|
43
|
+
import org.jruby.RubyClass;
|
|
44
|
+
import org.jruby.RubyObject;
|
|
45
|
+
import org.jruby.anno.JRubyClass;
|
|
46
|
+
import org.jruby.anno.JRubyMethod;
|
|
47
|
+
import org.jruby.runtime.ThreadContext;
|
|
48
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Class for Nokogiri::HTML::ElementDescription.
|
|
52
|
+
*
|
|
53
|
+
* @author Patrick Mahoney <pat@polycrystal.org>
|
|
54
|
+
*/
|
|
55
|
+
@JRubyClass(name="Nokogiri::HTML::ElementDescription")
|
|
56
|
+
public class HtmlElementDescription extends RubyObject {
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Stores memoized hash of element -> list of valid subelements.
|
|
60
|
+
*/
|
|
61
|
+
static protected Map<Short, List<String>> subElements;
|
|
62
|
+
static {
|
|
63
|
+
Map<Short, List<String>> _subElements =
|
|
64
|
+
new HashMap<Short, List<String>>();
|
|
65
|
+
subElements = Collections.synchronizedMap(_subElements);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
protected HTMLElements.Element element;
|
|
69
|
+
|
|
70
|
+
public HtmlElementDescription(Ruby runtime, RubyClass rubyClass) {
|
|
71
|
+
super(runtime, rubyClass);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Lookup the list of sub elements of <code>code</code>. If not
|
|
76
|
+
* already stored, iterate through all elements to find valid
|
|
77
|
+
* subelements; save this list and return it.
|
|
78
|
+
*/
|
|
79
|
+
protected static List<String> findSubElements(HTMLElements.Element elem) {
|
|
80
|
+
List<String> subs = subElements.get(elem.code);
|
|
81
|
+
|
|
82
|
+
if (subs == null) {
|
|
83
|
+
subs = new ArrayList<String>();
|
|
84
|
+
|
|
85
|
+
/*
|
|
86
|
+
* A bit of a hack. NekoHtml source code shows that
|
|
87
|
+
* UNKNOWN is the highest value element. We cannot access
|
|
88
|
+
* the list of elements directly because it's protected.
|
|
89
|
+
*/
|
|
90
|
+
for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
|
|
91
|
+
HTMLElements.Element maybe_sub =
|
|
92
|
+
HTMLElements.getElement(c);
|
|
93
|
+
if (maybe_sub.isParent(elem)) {
|
|
94
|
+
subs.add(maybe_sub.name);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
subElements.put(elem.code, subs);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return subs;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
@JRubyMethod(name="[]", meta=true)
|
|
105
|
+
public static IRubyObject get(ThreadContext context,
|
|
106
|
+
IRubyObject klazz, IRubyObject name) {
|
|
107
|
+
|
|
108
|
+
HTMLElements.Element elem = HTMLElements.getElement(name.toString());
|
|
109
|
+
if (elem == HTMLElements.NO_SUCH_ELEMENT)
|
|
110
|
+
return context.getRuntime().getNil();
|
|
111
|
+
|
|
112
|
+
HtmlElementDescription desc =
|
|
113
|
+
new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
|
|
114
|
+
desc.element = elem;
|
|
115
|
+
return desc;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
@JRubyMethod()
|
|
119
|
+
public IRubyObject name(ThreadContext context) {
|
|
120
|
+
return context.getRuntime().newString(element.name.toLowerCase());
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
@JRubyMethod(name="inline?")
|
|
124
|
+
public IRubyObject inline_eh(ThreadContext context) {
|
|
125
|
+
return context.getRuntime().newBoolean(element.isInline());
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
@JRubyMethod(name="empty?")
|
|
129
|
+
public IRubyObject empty_eh(ThreadContext context) {
|
|
130
|
+
return context.getRuntime().newBoolean(element.isEmpty());
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
@JRubyMethod()
|
|
134
|
+
public IRubyObject sub_elements(ThreadContext context) {
|
|
135
|
+
Ruby ruby = context.getRuntime();
|
|
136
|
+
List<String> subs = findSubElements(element);
|
|
137
|
+
IRubyObject[] ary = new IRubyObject[subs.size()];
|
|
138
|
+
for (int i = 0; i < subs.size(); ++i) {
|
|
139
|
+
ary[i] = ruby.newString(subs.get(i));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return ruby.newArray(ary);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* (The MIT License)
|
|
3
|
+
*
|
|
4
|
+
* Copyright (c) 2008 - 2011:
|
|
5
|
+
*
|
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
|
12
|
+
*
|
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
|
14
|
+
* a copy of this software and associated documentation files (the
|
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
|
19
|
+
* the following conditions:
|
|
20
|
+
*
|
|
21
|
+
* The above copyright notice and this permission notice shall be
|
|
22
|
+
* included in all copies or substantial portions of the Software.
|
|
23
|
+
*
|
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
package nokogiri;
|
|
34
|
+
|
|
35
|
+
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
|
|
36
|
+
|
|
37
|
+
import org.cyberneko.html.HTMLEntities;
|
|
38
|
+
import org.jruby.Ruby;
|
|
39
|
+
import org.jruby.RubyClass;
|
|
40
|
+
import org.jruby.RubyObject;
|
|
41
|
+
import org.jruby.anno.JRubyClass;
|
|
42
|
+
import org.jruby.anno.JRubyMethod;
|
|
43
|
+
import org.jruby.runtime.ThreadContext;
|
|
44
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Class for Nokogiri::HTML::EntityLookup.
|
|
48
|
+
*
|
|
49
|
+
* @author Patrick Mahoney <pat@polycrystal.org>
|
|
50
|
+
*/
|
|
51
|
+
@JRubyClass(name="Nokogiri::HTML::EntityLookup")
|
|
52
|
+
public class HtmlEntityLookup extends RubyObject {
|
|
53
|
+
|
|
54
|
+
public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) {
|
|
55
|
+
super(runtime, rubyClass);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Looks up an HTML entity <code>key</code>.
|
|
60
|
+
*
|
|
61
|
+
* The description is a bit lacking.
|
|
62
|
+
*/
|
|
63
|
+
@JRubyMethod()
|
|
64
|
+
public IRubyObject get(ThreadContext context, IRubyObject key) {
|
|
65
|
+
Ruby ruby = context.getRuntime();
|
|
66
|
+
String name = key.toString();
|
|
67
|
+
int val = HTMLEntities.get(name);
|
|
68
|
+
if (val == -1) return ruby.getNil();
|
|
69
|
+
|
|
70
|
+
IRubyObject edClass =
|
|
71
|
+
ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
|
|
72
|
+
IRubyObject edObj = invoke(context, edClass, "new",
|
|
73
|
+
ruby.newFixnum(val), ruby.newString(name),
|
|
74
|
+
ruby.newString(name + " entity"));
|
|
75
|
+
|
|
76
|
+
return edObj;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* (The MIT License)
|
|
3
|
+
*
|
|
4
|
+
* Copyright (c) 2008 - 2011:
|
|
5
|
+
*
|
|
6
|
+
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
|
7
|
+
* * {Mike Dalessio}[http://mike.daless.io]
|
|
8
|
+
* * {Charles Nutter}[http://blog.headius.com]
|
|
9
|
+
* * {Sergio Arbeo}[http://www.serabe.com]
|
|
10
|
+
* * {Patrick Mahoney}[http://polycrystal.org]
|
|
11
|
+
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
|
12
|
+
*
|
|
13
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
|
14
|
+
* a copy of this software and associated documentation files (the
|
|
15
|
+
* 'Software'), to deal in the Software without restriction, including
|
|
16
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
|
17
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
18
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
|
19
|
+
* the following conditions:
|
|
20
|
+
*
|
|
21
|
+
* The above copyright notice and this permission notice shall be
|
|
22
|
+
* included in all copies or substantial portions of the Software.
|
|
23
|
+
*
|
|
24
|
+
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
26
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
27
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
28
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
29
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
30
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
package nokogiri;
|
|
34
|
+
|
|
35
|
+
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
|
36
|
+
|
|
37
|
+
import java.io.ByteArrayInputStream;
|
|
38
|
+
import java.io.InputStream;
|
|
39
|
+
import java.nio.charset.Charset;
|
|
40
|
+
import java.nio.charset.IllegalCharsetNameException;
|
|
41
|
+
import java.util.EnumSet;
|
|
42
|
+
import java.util.regex.Matcher;
|
|
43
|
+
import java.util.regex.Pattern;
|
|
44
|
+
|
|
45
|
+
import nokogiri.internals.NokogiriHandler;
|
|
46
|
+
|
|
47
|
+
import org.apache.xerces.parsers.AbstractSAXParser;
|
|
48
|
+
import org.cyberneko.html.parsers.SAXParser;
|
|
49
|
+
import org.jruby.Ruby;
|
|
50
|
+
import org.jruby.RubyClass;
|
|
51
|
+
import org.jruby.RubyFixnum;
|
|
52
|
+
import org.jruby.RubyString;
|
|
53
|
+
import org.jruby.anno.JRubyClass;
|
|
54
|
+
import org.jruby.anno.JRubyMethod;
|
|
55
|
+
import org.jruby.runtime.ThreadContext;
|
|
56
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
|
57
|
+
import org.xml.sax.SAXException;
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Class for Nokogiri::HTML::SAX::ParserContext.
|
|
61
|
+
*
|
|
62
|
+
* @author serabe
|
|
63
|
+
* @author Patrick Mahoney <pat@polycrystal.org>
|
|
64
|
+
* @author Yoko Harada <yokolet@gmail.com>
|
|
65
|
+
*/
|
|
66
|
+
|
|
67
|
+
@JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
|
|
68
|
+
public class HtmlSaxParserContext extends XmlSaxParserContext {
|
|
69
|
+
|
|
70
|
+
public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
|
|
71
|
+
super(ruby, rubyClass);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
@Override
|
|
75
|
+
protected AbstractSAXParser createParser() throws SAXException {
|
|
76
|
+
SAXParser parser = new SAXParser();
|
|
77
|
+
|
|
78
|
+
try{
|
|
79
|
+
parser.setProperty(
|
|
80
|
+
"http://cyberneko.org/html/properties/names/elems", "lower");
|
|
81
|
+
parser.setProperty(
|
|
82
|
+
"http://cyberneko.org/html/properties/names/attrs", "lower");
|
|
83
|
+
return parser;
|
|
84
|
+
} catch(SAXException ex) {
|
|
85
|
+
throw new SAXException(
|
|
86
|
+
"Problem while creating HTML SAX Parser: " + ex.toString());
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
@JRubyMethod(name="memory", meta=true)
|
|
91
|
+
public static IRubyObject parse_memory(ThreadContext context,
|
|
92
|
+
IRubyObject klazz,
|
|
93
|
+
IRubyObject data,
|
|
94
|
+
IRubyObject encoding) {
|
|
95
|
+
HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
|
|
96
|
+
ctx.initialize(context.getRuntime());
|
|
97
|
+
String javaEncoding = findEncoding(context, encoding);
|
|
98
|
+
if (javaEncoding != null) {
|
|
99
|
+
String input = applyEncoding(rubyStringToString(data), javaEncoding);
|
|
100
|
+
ByteArrayInputStream istream = new ByteArrayInputStream(input.getBytes());
|
|
101
|
+
ctx.setInputSource(istream);
|
|
102
|
+
ctx.getInputSource().setEncoding(javaEncoding);
|
|
103
|
+
}
|
|
104
|
+
return ctx;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
public static enum EncodingType {
|
|
108
|
+
NONE(0, "NONE"),
|
|
109
|
+
UTF_8(1, "UTF-8"),
|
|
110
|
+
UTF16LE(2, "UTF16LE"),
|
|
111
|
+
UTF16BE(3, "UTF16BE"),
|
|
112
|
+
UCS4LE(4, "UCS4LE"),
|
|
113
|
+
UCS4BE(5, "UCS4BE"),
|
|
114
|
+
EBCDIC(6, "EBCDIC"),
|
|
115
|
+
UCS4_2143(7, "ICS4-2143"),
|
|
116
|
+
UCS4_3412(8, "UCS4-3412"),
|
|
117
|
+
UCS2(9, "UCS2"),
|
|
118
|
+
ISO_8859_1(10, "ISO-8859-1"),
|
|
119
|
+
ISO_8859_2(11, "ISO-8859-2"),
|
|
120
|
+
ISO_8859_3(12, "ISO-8859-3"),
|
|
121
|
+
ISO_8859_4(13, "ISO-8859-4"),
|
|
122
|
+
ISO_8859_5(14, "ISO-8859-5"),
|
|
123
|
+
ISO_8859_6(15, "ISO-8859-6"),
|
|
124
|
+
ISO_8859_7(16, "ISO-8859-7"),
|
|
125
|
+
ISO_8859_8(17, "ISO-8859-8"),
|
|
126
|
+
ISO_8859_9(18, "ISO-8859-9"),
|
|
127
|
+
ISO_2022_JP(19, "ISO-2022-JP"),
|
|
128
|
+
SHIFT_JIS(20, "SHIFT-JIS"),
|
|
129
|
+
EUC_JP(21, "EUC-JP"),
|
|
130
|
+
ASCII(22, "ASCII");
|
|
131
|
+
|
|
132
|
+
private final int value;
|
|
133
|
+
private final String name;
|
|
134
|
+
EncodingType(int value, String name) {
|
|
135
|
+
this.value = value;
|
|
136
|
+
this.name = name;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
public int getValue() {
|
|
140
|
+
return value;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
public String toString() {
|
|
144
|
+
return name;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
private static String findName(int value) {
|
|
149
|
+
EnumSet<EncodingType> set = EnumSet.allOf(EncodingType.class);
|
|
150
|
+
for (EncodingType type : set) {
|
|
151
|
+
if (type.getValue() == value) return type.toString();
|
|
152
|
+
}
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
private static String findEncoding(ThreadContext context, IRubyObject encoding) {
|
|
157
|
+
String rubyEncoding = null;
|
|
158
|
+
if (encoding instanceof RubyString) {
|
|
159
|
+
rubyEncoding = rubyStringToString(encoding);
|
|
160
|
+
} else if (encoding instanceof RubyFixnum) {
|
|
161
|
+
int value = (Integer)encoding.toJava(Integer.class);
|
|
162
|
+
rubyEncoding = findName(value);
|
|
163
|
+
}
|
|
164
|
+
if (rubyEncoding == null) return null;
|
|
165
|
+
try {
|
|
166
|
+
Charset charset = Charset.forName(rubyEncoding);
|
|
167
|
+
return charset.displayName();
|
|
168
|
+
} catch (IllegalCharsetNameException e) {
|
|
169
|
+
throw context.getRuntime().newEncodingCompatibilityError(
|
|
170
|
+
rubyEncoding + "is not supported in Java.");
|
|
171
|
+
} catch (IllegalArgumentException e) {
|
|
172
|
+
throw context.getRuntime().newInvalidEncoding(
|
|
173
|
+
"encoding should not be nil");
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
private static String applyEncoding(String input, String enc) {
|
|
178
|
+
String str = input.toLowerCase();
|
|
179
|
+
int start_pos = 0;
|
|
180
|
+
int end_pos = 0;
|
|
181
|
+
if (input.contains("meta") && input.contains("charset")) {
|
|
182
|
+
Pattern p = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+");
|
|
183
|
+
Matcher m = p.matcher(str);
|
|
184
|
+
while (m.find()) {
|
|
185
|
+
start_pos = m.start();
|
|
186
|
+
end_pos = m.end();
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
if (start_pos != end_pos) {
|
|
190
|
+
String substr = input.substring(start_pos, end_pos);
|
|
191
|
+
input = input.replace(substr, "charset=" + enc);
|
|
192
|
+
}
|
|
193
|
+
return input;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
@JRubyMethod(name="file", meta=true)
|
|
197
|
+
public static IRubyObject parse_file(ThreadContext context,
|
|
198
|
+
IRubyObject klazz,
|
|
199
|
+
IRubyObject data,
|
|
200
|
+
IRubyObject encoding) {
|
|
201
|
+
HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
|
|
202
|
+
ctx.initialize(context.getRuntime());
|
|
203
|
+
ctx.setInputSourceFile(context, data);
|
|
204
|
+
String javaEncoding = findEncoding(context, encoding);
|
|
205
|
+
if (javaEncoding != null) {
|
|
206
|
+
ctx.getInputSource().setEncoding(javaEncoding);
|
|
207
|
+
}
|
|
208
|
+
return ctx;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
@JRubyMethod(name="io", meta=true)
|
|
212
|
+
public static IRubyObject parse_io(ThreadContext context,
|
|
213
|
+
IRubyObject klazz,
|
|
214
|
+
IRubyObject data,
|
|
215
|
+
IRubyObject encoding) {
|
|
216
|
+
HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
|
|
217
|
+
ctx.initialize(context.getRuntime());
|
|
218
|
+
ctx.setInputSource(context, data, context.getRuntime().getNil());
|
|
219
|
+
String javaEncoding = findEncoding(context, encoding);
|
|
220
|
+
if (javaEncoding != null) {
|
|
221
|
+
ctx.getInputSource().setEncoding(javaEncoding);
|
|
222
|
+
}
|
|
223
|
+
return ctx;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Create a new parser context that will read from a raw input
|
|
228
|
+
* stream. Not a JRuby method. Meant to be run in a separate
|
|
229
|
+
* thread by XmlSaxPushParser.
|
|
230
|
+
*/
|
|
231
|
+
public static IRubyObject parse_stream(ThreadContext context,
|
|
232
|
+
IRubyObject klazz,
|
|
233
|
+
InputStream stream) {
|
|
234
|
+
HtmlSaxParserContext ctx = (HtmlSaxParserContext) NokogiriService.HTML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(context.getRuntime(), (RubyClass)klazz);
|
|
235
|
+
ctx.initialize(context.getRuntime());
|
|
236
|
+
ctx.setInputSource(stream);
|
|
237
|
+
return ctx;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
@Override
|
|
241
|
+
protected void preParse(ThreadContext context,
|
|
242
|
+
IRubyObject handlerRuby,
|
|
243
|
+
NokogiriHandler handler) {
|
|
244
|
+
// final String path = "Nokogiri::XML::FragmentHandler";
|
|
245
|
+
// final String docFrag =
|
|
246
|
+
// "http://cyberneko.org/html/features/balance-tags/document-fragment";
|
|
247
|
+
// RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
|
|
248
|
+
// IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document");
|
|
249
|
+
// RubyModule mod =
|
|
250
|
+
// context.getRuntime().getClassFromPath(path);
|
|
251
|
+
// try {
|
|
252
|
+
// if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod))
|
|
253
|
+
// parser.setFeature(docFrag, true);
|
|
254
|
+
// } catch (Exception e) {
|
|
255
|
+
// // ignore
|
|
256
|
+
// }
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
}
|