nokogiri 1.5.0.beta.4 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (107) hide show
  1. data/.gemtest +0 -0
  2. data/CHANGELOG.ja.rdoc +34 -0
  3. data/CHANGELOG.rdoc +40 -1
  4. data/Manifest.txt +11 -2
  5. data/README.rdoc +1 -1
  6. data/Rakefile +100 -104
  7. data/bin/nokogiri +1 -2
  8. data/ext/nokogiri/nokogiri.c +24 -1
  9. data/ext/nokogiri/xml_io.c +32 -7
  10. data/ext/nokogiri/xml_node.c +14 -13
  11. data/ext/nokogiri/xml_sax_parser.c +9 -4
  12. data/ext/nokogiri/xslt_stylesheet.c +7 -1
  13. data/lib/nokogiri.rb +3 -22
  14. data/lib/nokogiri/css.rb +4 -0
  15. data/lib/nokogiri/html/document.rb +10 -14
  16. data/lib/nokogiri/version.rb +76 -23
  17. data/lib/nokogiri/xml/builder.rb +7 -0
  18. data/lib/nokogiri/xml/document.rb +17 -1
  19. data/lib/nokogiri/xml/document_fragment.rb +14 -0
  20. data/lib/nokogiri/xml/node.rb +36 -28
  21. data/lib/nokogiri/xml/node/save_options.rb +17 -1
  22. data/lib/nokogiri/xml/node_set.rb +7 -0
  23. data/lib/nokogiri/xml/parse_options.rb +8 -0
  24. data/lib/nokogiri/xml/reader.rb +6 -6
  25. data/lib/nokogiri/xml/schema.rb +7 -1
  26. data/nokogiri_help_responses.md +40 -0
  27. data/tasks/cross_compile.rb +134 -159
  28. data/tasks/nokogiri.org.rb +18 -0
  29. data/tasks/test.rb +1 -1
  30. data/test/files/encoding.html +82 -0
  31. data/test/files/encoding.xhtml +84 -0
  32. data/test/files/metacharset.html +10 -0
  33. data/test/files/noencoding.html +47 -0
  34. data/test/helper.rb +2 -0
  35. data/test/html/test_document.rb +15 -0
  36. data/test/html/test_document_encoding.rb +13 -0
  37. data/test/test_memory_leak.rb +20 -0
  38. data/test/test_reader.rb +22 -0
  39. data/test/test_xslt_transforms.rb +6 -2
  40. data/test/xml/node/test_save_options.rb +10 -2
  41. data/test/xml/test_builder.rb +17 -0
  42. data/test/xml/test_document.rb +22 -0
  43. data/test/xml/test_node.rb +19 -1
  44. data/test/xml/test_node_reparenting.rb +16 -3
  45. data/test/xml/test_node_set.rb +34 -0
  46. data/test/xml/test_schema.rb +5 -0
  47. data/test/xslt/test_exception_handling.rb +37 -0
  48. metadata +185 -157
  49. data/deps.rip +0 -5
  50. data/ext/java/nokogiri/EncodingHandler.java +0 -124
  51. data/ext/java/nokogiri/HtmlDocument.java +0 -149
  52. data/ext/java/nokogiri/HtmlElementDescription.java +0 -145
  53. data/ext/java/nokogiri/HtmlEntityLookup.java +0 -79
  54. data/ext/java/nokogiri/HtmlSaxParserContext.java +0 -259
  55. data/ext/java/nokogiri/NokogiriService.java +0 -535
  56. data/ext/java/nokogiri/XmlAttr.java +0 -191
  57. data/ext/java/nokogiri/XmlAttributeDecl.java +0 -130
  58. data/ext/java/nokogiri/XmlCdata.java +0 -91
  59. data/ext/java/nokogiri/XmlComment.java +0 -86
  60. data/ext/java/nokogiri/XmlDocument.java +0 -529
  61. data/ext/java/nokogiri/XmlDocumentFragment.java +0 -217
  62. data/ext/java/nokogiri/XmlDtd.java +0 -467
  63. data/ext/java/nokogiri/XmlElement.java +0 -222
  64. data/ext/java/nokogiri/XmlElementContent.java +0 -382
  65. data/ext/java/nokogiri/XmlElementDecl.java +0 -148
  66. data/ext/java/nokogiri/XmlEntityDecl.java +0 -162
  67. data/ext/java/nokogiri/XmlEntityReference.java +0 -75
  68. data/ext/java/nokogiri/XmlNamespace.java +0 -128
  69. data/ext/java/nokogiri/XmlNode.java +0 -1399
  70. data/ext/java/nokogiri/XmlNodeSet.java +0 -311
  71. data/ext/java/nokogiri/XmlProcessingInstruction.java +0 -103
  72. data/ext/java/nokogiri/XmlReader.java +0 -411
  73. data/ext/java/nokogiri/XmlRelaxng.java +0 -144
  74. data/ext/java/nokogiri/XmlSaxParserContext.java +0 -367
  75. data/ext/java/nokogiri/XmlSaxPushParser.java +0 -184
  76. data/ext/java/nokogiri/XmlSchema.java +0 -319
  77. data/ext/java/nokogiri/XmlSyntaxError.java +0 -119
  78. data/ext/java/nokogiri/XmlText.java +0 -136
  79. data/ext/java/nokogiri/XmlXpathContext.java +0 -179
  80. data/ext/java/nokogiri/XsltStylesheet.java +0 -183
  81. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +0 -206
  82. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +0 -73
  83. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +0 -86
  84. data/ext/java/nokogiri/internals/NokogiriHandler.java +0 -327
  85. data/ext/java/nokogiri/internals/NokogiriHelpers.java +0 -582
  86. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +0 -171
  87. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +0 -118
  88. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +0 -74
  89. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +0 -121
  90. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -79
  91. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +0 -126
  92. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +0 -56
  93. data/ext/java/nokogiri/internals/ParserContext.java +0 -278
  94. data/ext/java/nokogiri/internals/PushInputStream.java +0 -411
  95. data/ext/java/nokogiri/internals/ReaderNode.java +0 -474
  96. data/ext/java/nokogiri/internals/SaveContext.java +0 -288
  97. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +0 -76
  98. data/ext/java/nokogiri/internals/XmlDeclHandler.java +0 -42
  99. data/ext/java/nokogiri/internals/XmlDomParser.java +0 -77
  100. data/ext/java/nokogiri/internals/XmlDomParserContext.java +0 -238
  101. data/ext/java/nokogiri/internals/XmlSaxParser.java +0 -65
  102. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +0 -72
  103. data/lib/isorelax.jar +0 -0
  104. data/lib/jing.jar +0 -0
  105. data/lib/nekodtd.jar +0 -0
  106. data/lib/nekohtml.jar +0 -0
  107. data/lib/xercesImpl.jar +0 -0
@@ -1,206 +0,0 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
- package nokogiri.internals;
34
-
35
- import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
- import static nokogiri.internals.NokogiriHelpers.isNamespace;
37
- import nokogiri.HtmlDocument;
38
- import nokogiri.NokogiriService;
39
- import nokogiri.XmlDocument;
40
-
41
- import org.apache.xerces.parsers.DOMParser;
42
- import org.apache.xerces.xni.Augmentations;
43
- import org.apache.xerces.xni.QName;
44
- import org.apache.xerces.xni.XMLAttributes;
45
- import org.apache.xerces.xni.XNIException;
46
- import org.apache.xerces.xni.parser.XMLDocumentFilter;
47
- import org.apache.xerces.xni.parser.XMLParserConfiguration;
48
- import org.cyberneko.html.HTMLConfiguration;
49
- import org.cyberneko.html.filters.DefaultFilter;
50
- import org.jruby.Ruby;
51
- import org.jruby.RubyClass;
52
- import org.jruby.runtime.ThreadContext;
53
- import org.jruby.runtime.builtin.IRubyObject;
54
- import org.w3c.dom.Document;
55
-
56
- /**
57
- * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml.
58
- *
59
- * @author sergio
60
- * @author Yoko Harada <yokolet@gmail.com>
61
- */
62
- public class HtmlDomParserContext extends XmlDomParserContext {
63
-
64
- public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
65
- super(runtime, options);
66
- }
67
-
68
- public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
69
- super(runtime, encoding, options);
70
- }
71
-
72
- @Override
73
- protected void initErrorHandler() {
74
- if (options.strict) {
75
- errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
76
- } else {
77
- errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning);
78
- }
79
- }
80
-
81
- @Override
82
- protected void initParser(Ruby runtime) {
83
- XMLParserConfiguration config = new HTMLConfiguration();
84
- XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
85
- XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
86
- XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
87
-
88
- config.setErrorHandler(this.errorHandler);
89
- parser = new DOMParser(config);
90
-
91
- // see http://nekohtml.sourceforge.net/settings.html for details
92
- setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
93
- setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
94
- setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
95
- setFeature("http://cyberneko.org/html/features/report-errors", true);
96
- setFeature("http://xml.org/sax/features/namespaces", false);
97
- setProperty("http://cyberneko.org/html/properties/filters", filters);
98
- setFeature("http://cyberneko.org/html/features/insert-doctype", true);
99
- }
100
-
101
- /**
102
- * Enable NekoHTML feature for balancing tags in a document fragment.
103
- *
104
- * This method is used in XmlNode#in_context method.
105
- */
106
- public void enableDocumentFragment() {
107
- setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
108
- }
109
-
110
- @Override
111
- protected XmlDocument getNewEmptyDocument(ThreadContext context) {
112
- IRubyObject[] args = new IRubyObject[0];
113
- return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args);
114
- }
115
-
116
- @Override
117
- protected XmlDocument wrapDocument(ThreadContext context,
118
- RubyClass klazz,
119
- Document document) {
120
- HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
121
- htmlDocument.setNode(context, document);
122
- htmlDocument.setEncoding(ruby_encoding);
123
- return htmlDocument;
124
- }
125
-
126
- /**
127
- * Filter to strip out attributes that pertain to XML namespaces.
128
- *
129
- * @author sergio
130
- * @author Patrick Mahoney <pat@polycrystal.org>
131
- */
132
- public static class RemoveNSAttrsFilter extends DefaultFilter {
133
- @Override
134
- public void startElement(QName element, XMLAttributes attrs,
135
- Augmentations augs) throws XNIException {
136
- int i;
137
- for (i = 0; i < attrs.getLength(); ++i) {
138
- if (isNamespace(attrs.getQName(i))) {
139
- attrs.removeAttributeAt(i);
140
- --i;
141
- }
142
- }
143
-
144
- element.uri = null;
145
- super.startElement(element, attrs, augs);
146
- }
147
- }
148
-
149
- public static class ElementValidityCheckFilter extends DefaultFilter {
150
- private NokogiriErrorHandler errorHandler;
151
-
152
- private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) {
153
- this.errorHandler = errorHandler;
154
- }
155
-
156
- // element names from xhtml1-strict.dtd
157
- private static String[][] element_names = {
158
- {"a", "abbr", "acronym", "address", "area"},
159
- {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"},
160
- {"caption", "cite", "code", "col", "colgroup"},
161
- {"dd", "del", "dfn", "div", "dl", "dt"},
162
- {"em"},
163
- {"fieldset", "font", "form", "frame", "frameset"},
164
- {}, // g
165
- {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"},
166
- {"i", "iframe", "img", "input", "ins"},
167
- {}, // j
168
- {"kbd"},
169
- {"label", "legend", "li", "link"},
170
- {"map", "meta"},
171
- {"noframes", "noscript"},
172
- {"object", "ol", "optgroup", "option"},
173
- {"p", "param", "pre"},
174
- {"q"},
175
- {}, // r
176
- {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"},
177
- {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"},
178
- {"u", "ul"},
179
- {"var"},
180
- {}, // w
181
- {}, // x
182
- {}, // y
183
- {} // z
184
- };
185
-
186
- private boolean isValid(String testee) {
187
- char[] c = testee.toCharArray();
188
- int index = new Integer(c[0]) - 97;
189
- if (index > 25) return false;
190
- for (int i=0; i<element_names[index].length; i++) {
191
- if (testee.equals(element_names[index][i])) {
192
- return true;
193
- }
194
- }
195
- return false;
196
- }
197
-
198
- @Override
199
- public void startElement(QName name, XMLAttributes attrs, Augmentations augs) throws XNIException {
200
- if (!isValid(name.rawname)) {
201
- errorHandler.getErrors().add(new Exception("Tag " + name.rawname + " invalid"));
202
- }
203
- super.startElement(name, attrs, augs);
204
- }
205
- }
206
- }
@@ -1,73 +0,0 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
- package nokogiri.internals;
34
-
35
- import java.util.Hashtable;
36
- import nokogiri.XmlDocument;
37
- import org.w3c.dom.Document;
38
-
39
- /**
40
- * Currently, this class is not used anywhere.
41
- * I'm not sure what for this class was written.(Yoko)
42
- *
43
- * @author sergio
44
- */
45
- public class NokogiriDocumentCache {
46
-
47
- private static NokogiriDocumentCache instance;
48
- protected Hashtable<Document, XmlDocument> cache;
49
-
50
- private NokogiriDocumentCache() {
51
- this.cache = new Hashtable<Document, XmlDocument>();
52
- }
53
-
54
- public static NokogiriDocumentCache getInstance() {
55
- if(instance == null) {
56
- instance = new NokogiriDocumentCache();
57
- }
58
- return instance;
59
- }
60
-
61
- public XmlDocument getXmlDocument(Document doc) {
62
- return this.cache.get(doc);
63
- }
64
-
65
- public void putDocument(Document doc, XmlDocument xmlDoc) {
66
- this.cache.put(doc, xmlDoc);
67
- }
68
-
69
- public XmlDocument removeDocument(Document doc) {
70
- return this.cache.remove(doc);
71
- }
72
-
73
- }
@@ -1,86 +0,0 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
- package nokogiri.internals;
34
-
35
- import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
-
37
- import java.util.ArrayList;
38
- import java.util.List;
39
-
40
- import nokogiri.NokogiriService;
41
- import nokogiri.XmlSyntaxError;
42
-
43
- import org.apache.xerces.xni.parser.XMLErrorHandler;
44
- import org.jruby.Ruby;
45
- import org.jruby.runtime.ThreadContext;
46
- import org.jruby.runtime.builtin.IRubyObject;
47
- import org.xml.sax.ErrorHandler;
48
-
49
- /**
50
- * Super class of error handlers.
51
- *
52
- * XMLErrorHandler is used by nokogiri.internals.HtmlDomParserContext since NekoHtml
53
- * uses this type of the error handler.
54
- *
55
- * @author sergio
56
- * @author Yoko Harada <yokolet@gmail.com>
57
- */
58
- public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler {
59
- protected List<Exception> errors;
60
- protected boolean noerror;
61
- protected boolean nowarning;
62
-
63
- public NokogiriErrorHandler(boolean noerror, boolean nowarning) {
64
- errors = new ArrayList<Exception>();
65
- this.noerror = noerror;
66
- this.nowarning = nowarning;
67
- }
68
-
69
- public List<Exception> getErrors() { return errors; }
70
-
71
- public List<IRubyObject> getErrorsReadyForRuby(ThreadContext context) {
72
- Ruby runtime = context.getRuntime();
73
- List<IRubyObject> res = new ArrayList<IRubyObject>();
74
- for (int i = 0; i < errors.size(); i++) {
75
- XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::SyntaxError"));
76
- xmlSyntaxError.setException(errors.get(i));
77
- res.add(xmlSyntaxError);
78
- }
79
- return res;
80
- }
81
-
82
- protected boolean usesNekoHtml(String domain) {
83
- if ("http://cyberneko.org/html".equals(domain)) return true;
84
- else return false;
85
- }
86
- }
@@ -1,327 +0,0 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
- package nokogiri.internals;
34
-
35
- import static nokogiri.internals.NokogiriHelpers.getLocalPart;
36
- import static nokogiri.internals.NokogiriHelpers.getPrefix;
37
- import static nokogiri.internals.NokogiriHelpers.isNamespace;
38
- import static nokogiri.internals.NokogiriHelpers.stringOrNil;
39
-
40
- import java.util.ArrayDeque;
41
- import java.util.LinkedList;
42
-
43
- import nokogiri.XmlSyntaxError;
44
-
45
- import org.jruby.Ruby;
46
- import org.jruby.RubyArray;
47
- import org.jruby.RubyClass;
48
- import org.jruby.RubyObject;
49
- import org.jruby.javasupport.util.RuntimeHelpers;
50
- import org.jruby.runtime.ThreadContext;
51
- import org.jruby.runtime.builtin.IRubyObject;
52
- import org.xml.sax.Attributes;
53
- import org.xml.sax.Locator;
54
- import org.xml.sax.SAXException;
55
- import org.xml.sax.SAXParseException;
56
- import org.xml.sax.ext.DefaultHandler2;
57
-
58
- /**
59
- * A handler for SAX parsing.
60
- *
61
- * @author sergio
62
- * @author Yoko Harada <yokolet@gmail.com>
63
- */
64
- public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
65
-
66
- boolean inCDATA = false;
67
-
68
- private Ruby ruby;
69
- private RubyClass attrClass;
70
- private IRubyObject object;
71
-
72
- /**
73
- * Stores parse errors with the most-recent error last.
74
- *
75
- * TODO: should these be stored in the document 'errors' array?
76
- * Currently only string messages are stored there.
77
- */
78
- private LinkedList<XmlSyntaxError> errors = new LinkedList<XmlSyntaxError>();
79
-
80
- private Locator locator;
81
- private ArrayDeque<Integer> lines;
82
- private ArrayDeque<Integer> columns;
83
- private static String htmlParserName = "Nokogiri::HTML::SAX::Parser";
84
- private boolean needEmptyAttrCheck = false;
85
-
86
- public NokogiriHandler(Ruby runtime, IRubyObject object) {
87
- this.ruby = runtime;
88
- this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute");
89
- this.object = object;
90
- lines = new ArrayDeque<Integer>();
91
- columns = new ArrayDeque<Integer>();
92
- String objectName = object.getMetaClass().getName();
93
- if (htmlParserName.equals(objectName)) needEmptyAttrCheck = true;
94
- }
95
-
96
- public void setDocumentLocator(Locator locator) {
97
- this.locator = locator;
98
- }
99
-
100
- @Override
101
- public void startDocument() throws SAXException {
102
- call("start_document");
103
- }
104
-
105
- public void xmlDecl(String version, String encoding, String standalone) {
106
- call("xmldecl", stringOrNil(ruby, version),
107
- stringOrNil(ruby, encoding),
108
- stringOrNil(ruby, standalone));
109
- }
110
-
111
- @Override
112
- public void endDocument() throws SAXException {
113
- call("end_document");
114
- }
115
-
116
- /*
117
- * This has to call either "start_element" or
118
- * "start_element_namespace" depending on whether there are any
119
- * namespace attributes.
120
- *
121
- * Attributes that define namespaces are passed in a separate
122
- * array of of <code>[:prefix, :uri]</code> arrays and are not
123
- * passed with the other attributes.
124
- */
125
- @Override
126
- public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
127
- // for attributes other than namespace attrs
128
- RubyArray rubyAttr = RubyArray.newArray(ruby);
129
- // for namespace defining attributes
130
- RubyArray rubyNSAttr = RubyArray.newArray(ruby);
131
-
132
- ThreadContext context = ruby.getCurrentContext();
133
- boolean fromFragmentHandler = false; // isFromFragmentHandler();
134
-
135
- lines.add(locator.getLineNumber());
136
- columns.add(locator.getColumnNumber() - 1); // libxml counts from 0 while java does from 1
137
-
138
- for (int i = 0; i < attrs.getLength(); i++) {
139
- String u = attrs.getURI(i);
140
- String qn = attrs.getQName(i);
141
- String ln = attrs.getLocalName(i);
142
- String val = attrs.getValue(i);
143
- String pre;
144
-
145
- pre = getPrefix(qn);
146
- if (ln == null || ln.equals("")) ln = getLocalPart(qn);
147
-
148
- if (isNamespace(qn) && !fromFragmentHandler) {
149
- // I haven't figured the reason out yet, but, in somewhere,
150
- // namespace is converted to array in array in array and cause
151
- // TypeError at line 45 in fragment_handler.rb
152
- RubyArray ns = RubyArray.newArray(ruby, 2);
153
- if (ln.equals("xmlns")) ln = null;
154
- ns.add(stringOrNil(ruby, ln));
155
- ns.add(ruby.newString(val));
156
- rubyNSAttr.add(ns);
157
- } else {
158
- IRubyObject[] args = null;
159
- if (needEmptyAttrCheck) {
160
- if (isEmptyAttr(ln)) {
161
- args = new IRubyObject[3];
162
- args[0] = stringOrNil(ruby, ln);
163
- args[1] = stringOrNil(ruby, pre);
164
- args[2] = stringOrNil(ruby, u);
165
- }
166
- }
167
- if (args == null) {
168
- args = new IRubyObject[4];
169
- args[0] = stringOrNil(ruby, ln);
170
- args[1] = stringOrNil(ruby, pre);
171
- args[2] = stringOrNil(ruby, u);
172
- args[3] = stringOrNil(ruby, val);
173
- }
174
-
175
- IRubyObject attr = RuntimeHelpers.invoke(context, attrClass, "new", args);
176
- rubyAttr.add(attr);
177
- }
178
- }
179
-
180
- if (localName == null || localName.equals("")) localName = getLocalPart(qName);
181
- call("start_element_namespace",
182
- stringOrNil(ruby, localName),
183
- rubyAttr,
184
- stringOrNil(ruby, getPrefix(qName)),
185
- stringOrNil(ruby, uri),
186
- rubyNSAttr);
187
- }
188
-
189
- private static String[] emptyAttrs =
190
- {"checked", "compact", "declare", "defer", "disabled", "ismap", "multiple",
191
- "noresize", "nohref", "noshade", "nowrap", "readonly", "selected"};
192
-
193
- private boolean isEmptyAttr(String name) {
194
- for (String emptyAttr : emptyAttrs) {
195
- if (emptyAttr.equals(name)) return true;
196
- }
197
- return false;
198
- }
199
-
200
- public Integer getLine() {
201
- return lines.pop();
202
- }
203
-
204
- public Integer getColumn() {
205
- return columns.pop();
206
- }
207
-
208
- private boolean isFromFragmentHandler() {
209
- if (object != null && object instanceof RubyObject) {
210
- RubyObject rubyObj = (RubyObject)object;
211
- IRubyObject document = rubyObj.getInstanceVariable("@document");
212
- if (document != null) {
213
- String name = document.getMetaClass().getName();
214
- if ("Nokogiri::XML::FragmentHandler".equals(name)) {
215
- return true;
216
- }
217
- }
218
- }
219
- return false;
220
- }
221
-
222
- @Override
223
- public void endElement(String uri, String localName, String qName) throws SAXException {
224
- call("end_element_namespace",
225
- stringOrNil(ruby, localName),
226
- stringOrNil(ruby, getPrefix(qName)),
227
- stringOrNil(ruby, uri));
228
- }
229
-
230
- @Override
231
- public void characters(char[] ch, int start, int length) throws SAXException {
232
- String target = inCDATA ? "cdata_block" : "characters";
233
- call(target, ruby.newString(new String(ch, start, length)));
234
- }
235
-
236
- @Override
237
- public void comment(char[] ch, int start, int length) throws SAXException {
238
- call("comment", ruby.newString(new String(ch, start, length)));
239
- }
240
-
241
- @Override
242
- public void startCDATA() throws SAXException {
243
- inCDATA = true;
244
- }
245
-
246
- @Override
247
- public void endCDATA() throws SAXException {
248
- inCDATA = false;
249
- }
250
-
251
- @Override
252
- public void error(SAXParseException saxpe) {
253
- addError(XmlSyntaxError.createError(ruby, saxpe));
254
- call("error", ruby.newString(saxpe.getMessage()));
255
- }
256
-
257
- @Override
258
- public void fatalError(SAXParseException saxpe) throws SAXException
259
- {
260
- addError(XmlSyntaxError.createFatalError(ruby, saxpe));
261
- call("error", ruby.newString(saxpe.getMessage()));
262
- }
263
-
264
- @Override
265
- public void warning(SAXParseException saxpe) {
266
- //System.out.println("warning: " + saxpe);
267
- call("warning", ruby.newString(saxpe.getMessage()));
268
- }
269
-
270
- protected synchronized void addError(XmlSyntaxError e) {
271
- errors.add(e);
272
- }
273
-
274
- public synchronized int getErrorCount() {
275
- return errors.size();
276
- }
277
-
278
- public synchronized IRubyObject getLastError() {
279
- return errors.getLast();
280
- }
281
-
282
- private void call(String methodName) {
283
- ThreadContext context = ruby.getCurrentContext();
284
- RuntimeHelpers.invoke(context, document(context), methodName);
285
- }
286
-
287
- private void call(String methodName, IRubyObject argument) {
288
- ThreadContext context = ruby.getCurrentContext();
289
- RuntimeHelpers.invoke(context, document(context), methodName, argument);
290
- }
291
-
292
- private void call(String methodName, IRubyObject arg1, IRubyObject arg2) {
293
- ThreadContext context = ruby.getCurrentContext();
294
- RuntimeHelpers.invoke(context, document(context), methodName, arg1, arg2);
295
- }
296
-
297
- private void call(String methodName, IRubyObject arg1, IRubyObject arg2,
298
- IRubyObject arg3) {
299
- ThreadContext context = ruby.getCurrentContext();
300
- RuntimeHelpers.invoke(context, document(context), methodName,
301
- arg1, arg2, arg3);
302
- }
303
-
304
- private void call(String methodName,
305
- IRubyObject arg0,
306
- IRubyObject arg1,
307
- IRubyObject arg2,
308
- IRubyObject arg3,
309
- IRubyObject arg4) {
310
- IRubyObject[] args = new IRubyObject[5];
311
- args[0] = arg0;
312
- args[1] = arg1;
313
- args[2] = arg2;
314
- args[3] = arg3;
315
- args[4] = arg4;
316
- ThreadContext context = ruby.getCurrentContext();
317
- RuntimeHelpers.invoke(context, document(context), methodName, args);
318
- }
319
-
320
- private IRubyObject document(ThreadContext context) {
321
- if (object instanceof RubyObject) {
322
- return ((RubyObject)object).fastGetInstanceVariable("@document");
323
- }
324
- return context.getRuntime().getNil();
325
- }
326
-
327
- }