nokogiri 1.11.1-java → 1.11.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +20 -15
  5. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  6. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  7. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  8. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  9. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  10. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  11. data/ext/java/nokogiri/NokogiriService.java +597 -526
  12. data/ext/java/nokogiri/XmlAttr.java +120 -96
  13. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  14. data/ext/java/nokogiri/XmlCdata.java +35 -26
  15. data/ext/java/nokogiri/XmlComment.java +48 -37
  16. data/ext/java/nokogiri/XmlDocument.java +642 -540
  17. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  18. data/ext/java/nokogiri/XmlDtd.java +450 -384
  19. data/ext/java/nokogiri/XmlElement.java +25 -18
  20. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  21. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  22. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  23. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  24. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  25. data/ext/java/nokogiri/XmlNode.java +1843 -1588
  26. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  27. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  28. data/ext/java/nokogiri/XmlReader.java +513 -418
  29. data/ext/java/nokogiri/XmlRelaxng.java +91 -78
  30. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
  31. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  32. data/ext/java/nokogiri/XmlSchema.java +328 -263
  33. data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
  34. data/ext/java/nokogiri/XmlText.java +57 -46
  35. data/ext/java/nokogiri/XmlXpathContext.java +240 -206
  36. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  37. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  38. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
  39. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  40. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  41. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  42. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
  53. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  54. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  55. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  56. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  57. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  58. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  59. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  60. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  61. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  62. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  63. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  64. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  65. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  66. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  67. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  77. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  80. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  81. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  82. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  83. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  84. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  85. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  86. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  87. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  88. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  89. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  90. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  92. data/ext/nokogiri/depend +34 -474
  93. data/ext/nokogiri/extconf.rb +253 -183
  94. data/ext/nokogiri/html_document.c +10 -15
  95. data/ext/nokogiri/html_element_description.c +84 -71
  96. data/ext/nokogiri/html_entity_lookup.c +21 -16
  97. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  98. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  99. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  100. data/ext/nokogiri/nokogiri.c +171 -63
  101. data/ext/nokogiri/test_global_handlers.c +3 -4
  102. data/ext/nokogiri/xml_attr.c +15 -15
  103. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  104. data/ext/nokogiri/xml_cdata.c +13 -18
  105. data/ext/nokogiri/xml_comment.c +19 -26
  106. data/ext/nokogiri/xml_document.c +221 -164
  107. data/ext/nokogiri/xml_document_fragment.c +13 -15
  108. data/ext/nokogiri/xml_dtd.c +54 -48
  109. data/ext/nokogiri/xml_element_content.c +30 -27
  110. data/ext/nokogiri/xml_element_decl.c +22 -22
  111. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  112. data/ext/nokogiri/xml_entity_decl.c +32 -30
  113. data/ext/nokogiri/xml_entity_reference.c +16 -18
  114. data/ext/nokogiri/xml_namespace.c +56 -49
  115. data/ext/nokogiri/xml_node.c +338 -286
  116. data/ext/nokogiri/xml_node_set.c +168 -156
  117. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  118. data/ext/nokogiri/xml_reader.c +191 -157
  119. data/ext/nokogiri/xml_relax_ng.c +29 -23
  120. data/ext/nokogiri/xml_sax_parser.c +117 -112
  121. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  122. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  123. data/ext/nokogiri/xml_schema.c +48 -42
  124. data/ext/nokogiri/xml_syntax_error.c +21 -23
  125. data/ext/nokogiri/xml_text.c +13 -17
  126. data/ext/nokogiri/xml_xpath_context.c +134 -127
  127. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  128. data/lib/nokogiri.rb +1 -22
  129. data/lib/nokogiri/css/parser.rb +1 -1
  130. data/lib/nokogiri/extension.rb +26 -0
  131. data/lib/nokogiri/html/document_fragment.rb +15 -15
  132. data/lib/nokogiri/nokogiri.jar +0 -0
  133. data/lib/nokogiri/version/constant.rb +1 -1
  134. data/lib/nokogiri/version/info.rb +31 -8
  135. data/lib/nokogiri/xml/document.rb +31 -11
  136. data/lib/nokogiri/xml/node.rb +38 -42
  137. data/lib/nokogiri/xml/reader.rb +2 -9
  138. data/lib/nokogiri/xml/xpath.rb +1 -3
  139. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  140. metadata +7 -8
  141. data/ext/nokogiri/xml_io.c +0 -63
  142. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -49,100 +49,117 @@ import org.jruby.runtime.builtin.IRubyObject;
49
49
 
50
50
  /**
51
51
  * Class for Nokogiri::HTML::ElementDescription.
52
- *
52
+ *
53
53
  * @author Patrick Mahoney <pat@polycrystal.org>
54
54
  */
55
- @JRubyClass(name="Nokogiri::HTML::ElementDescription")
56
- public class HtmlElementDescription extends RubyObject {
57
-
58
- /**
59
- * Stores memoized hash of element -> list of valid subelements.
60
- */
61
- static protected Map<Short, List<String>> subElements;
62
- static {
63
- Map<Short, List<String>> _subElements =
64
- new HashMap<Short, List<String>>();
65
- subElements = Collections.synchronizedMap(_subElements);
66
- }
67
-
68
- protected HTMLElements.Element element;
69
-
70
- public HtmlElementDescription(Ruby runtime, RubyClass rubyClass) {
71
- super(runtime, rubyClass);
72
- }
73
-
74
- /**
75
- * Lookup the list of sub elements of <code>code</code>. If not
76
- * already stored, iterate through all elements to find valid
77
- * subelements; save this list and return it.
78
- */
79
- protected static List<String> findSubElements(HTMLElements.Element elem) {
80
- List<String> subs = subElements.get(elem.code);
81
-
82
- if (subs == null) {
83
- subs = new ArrayList<String>();
84
-
85
- /*
86
- * A bit of a hack. NekoHtml source code shows that
87
- * UNKNOWN is the highest value element. We cannot access
88
- * the list of elements directly because it's protected.
89
- */
90
- for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
91
- HTMLElements.Element maybe_sub =
92
- HTMLElements.getElement(c);
93
- if (maybe_sub.isParent(elem)) {
94
- subs.add(maybe_sub.name);
95
- }
96
- }
97
-
98
- subElements.put(elem.code, subs);
55
+ @JRubyClass(name = "Nokogiri::HTML::ElementDescription")
56
+ public class HtmlElementDescription extends RubyObject
57
+ {
58
+
59
+ /**
60
+ * Stores memoized hash of element -> list of valid subelements.
61
+ */
62
+ static protected Map<Short, List<String>> subElements;
63
+ static
64
+ {
65
+ Map<Short, List<String>> _subElements =
66
+ new HashMap<Short, List<String>>();
67
+ subElements = Collections.synchronizedMap(_subElements);
68
+ }
69
+
70
+ protected HTMLElements.Element element;
71
+
72
+ public
73
+ HtmlElementDescription(Ruby runtime, RubyClass rubyClass)
74
+ {
75
+ super(runtime, rubyClass);
76
+ }
77
+
78
+ /**
79
+ * Lookup the list of sub elements of <code>code</code>. If not
80
+ * already stored, iterate through all elements to find valid
81
+ * subelements; save this list and return it.
82
+ */
83
+ protected static List<String>
84
+ findSubElements(HTMLElements.Element elem)
85
+ {
86
+ List<String> subs = subElements.get(elem.code);
87
+
88
+ if (subs == null) {
89
+ subs = new ArrayList<String>();
90
+
91
+ /*
92
+ * A bit of a hack. NekoHtml source code shows that
93
+ * UNKNOWN is the highest value element. We cannot access
94
+ * the list of elements directly because it's protected.
95
+ */
96
+ for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
97
+ HTMLElements.Element maybe_sub =
98
+ HTMLElements.getElement(c);
99
+ if (maybe_sub.isParent(elem)) {
100
+ subs.add(maybe_sub.name);
99
101
  }
102
+ }
100
103
 
101
- return subs;
104
+ subElements.put(elem.code, subs);
102
105
  }
103
106
 
104
- @JRubyMethod(name="[]", meta=true)
105
- public static IRubyObject get(ThreadContext context,
106
- IRubyObject klazz, IRubyObject name) {
107
-
108
- // nekohtml will return an element even for invalid names, see
109
- // http://sourceforge.net/p/nekohtml/code/HEAD/tree/trunk/src/org/cyberneko/html/HTMLElements.java#l514
110
- // which breaks `test_fetch_nonexistent'
111
- HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT);
112
- if (elem == HTMLElements.NO_SUCH_ELEMENT)
113
- return context.nil;
114
-
115
- HtmlElementDescription desc =
116
- new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
117
- desc.element = elem;
118
- return desc;
107
+ return subs;
108
+ }
109
+
110
+ @JRubyMethod(name = "[]", meta = true)
111
+ public static IRubyObject
112
+ get(ThreadContext context,
113
+ IRubyObject klazz, IRubyObject name)
114
+ {
115
+
116
+ // nekohtml will return an element even for invalid names, see
117
+ // http://sourceforge.net/p/nekohtml/code/HEAD/tree/trunk/src/org/cyberneko/html/HTMLElements.java#l514
118
+ // which breaks `test_fetch_nonexistent'
119
+ HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT);
120
+ if (elem == HTMLElements.NO_SUCH_ELEMENT) {
121
+ return context.nil;
119
122
  }
120
123
 
121
- @JRubyMethod()
122
- public IRubyObject name(ThreadContext context) {
123
- return context.getRuntime().newString(element.name.toLowerCase());
124
+ HtmlElementDescription desc =
125
+ new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
126
+ desc.element = elem;
127
+ return desc;
128
+ }
129
+
130
+ @JRubyMethod()
131
+ public IRubyObject
132
+ name(ThreadContext context)
133
+ {
134
+ return context.getRuntime().newString(element.name.toLowerCase());
135
+ }
136
+
137
+ @JRubyMethod(name = "inline?")
138
+ public IRubyObject
139
+ inline_eh(ThreadContext context)
140
+ {
141
+ return context.getRuntime().newBoolean(element.isInline());
142
+ }
143
+
144
+ @JRubyMethod(name = "empty?")
145
+ public IRubyObject
146
+ empty_eh(ThreadContext context)
147
+ {
148
+ return context.getRuntime().newBoolean(element.isEmpty());
149
+ }
150
+
151
+ @JRubyMethod()
152
+ public IRubyObject
153
+ sub_elements(ThreadContext context)
154
+ {
155
+ Ruby ruby = context.getRuntime();
156
+ List<String> subs = findSubElements(element);
157
+ IRubyObject[] ary = new IRubyObject[subs.size()];
158
+ for (int i = 0; i < subs.size(); ++i) {
159
+ ary[i] = ruby.newString(subs.get(i));
124
160
  }
125
161
 
126
- @JRubyMethod(name="inline?")
127
- public IRubyObject inline_eh(ThreadContext context) {
128
- return context.getRuntime().newBoolean(element.isInline());
129
- }
130
-
131
- @JRubyMethod(name="empty?")
132
- public IRubyObject empty_eh(ThreadContext context) {
133
- return context.getRuntime().newBoolean(element.isEmpty());
134
- }
135
-
136
- @JRubyMethod()
137
- public IRubyObject sub_elements(ThreadContext context) {
138
- Ruby ruby = context.getRuntime();
139
- List<String> subs = findSubElements(element);
140
- IRubyObject[] ary = new IRubyObject[subs.size()];
141
- for (int i = 0; i < subs.size(); ++i) {
142
- ary[i] = ruby.newString(subs.get(i));
143
- }
144
-
145
- return ruby.newArray(ary);
146
- }
162
+ return ruby.newArray(ary);
163
+ }
147
164
 
148
165
  }
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -45,35 +45,40 @@ import org.jruby.runtime.builtin.IRubyObject;
45
45
 
46
46
  /**
47
47
  * Class for Nokogiri::HTML::EntityLookup.
48
- *
48
+ *
49
49
  * @author Patrick Mahoney <pat@polycrystal.org>
50
50
  */
51
- @JRubyClass(name="Nokogiri::HTML::EntityLookup")
52
- public class HtmlEntityLookup extends RubyObject {
51
+ @JRubyClass(name = "Nokogiri::HTML::EntityLookup")
52
+ public class HtmlEntityLookup extends RubyObject
53
+ {
53
54
 
54
- public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) {
55
- super(runtime, rubyClass);
56
- }
55
+ public
56
+ HtmlEntityLookup(Ruby runtime, RubyClass rubyClass)
57
+ {
58
+ super(runtime, rubyClass);
59
+ }
57
60
 
58
- /**
59
- * Looks up an HTML entity <code>key</code>.
60
- *
61
- * The description is a bit lacking.
62
- */
63
- @JRubyMethod()
64
- public IRubyObject get(ThreadContext context, IRubyObject key) {
65
- Ruby ruby = context.getRuntime();
66
- String name = key.toString();
67
- int val = HTMLEntities.get(name);
68
- if (val == -1) return ruby.getNil();
61
+ /**
62
+ * Looks up an HTML entity <code>key</code>.
63
+ *
64
+ * The description is a bit lacking.
65
+ */
66
+ @JRubyMethod()
67
+ public IRubyObject
68
+ get(ThreadContext context, IRubyObject key)
69
+ {
70
+ Ruby ruby = context.getRuntime();
71
+ String name = key.toString();
72
+ int val = HTMLEntities.get(name);
73
+ if (val == -1) { return ruby.getNil(); }
69
74
 
70
- IRubyObject edClass =
71
- ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
72
- IRubyObject edObj = invoke(context, edClass, "new",
73
- ruby.newFixnum(val), ruby.newString(name),
74
- ruby.newString(name + " entity"));
75
+ IRubyObject edClass =
76
+ ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
77
+ IRubyObject edObj = invoke(context, edClass, "new",
78
+ ruby.newFixnum(val), ruby.newString(name),
79
+ ruby.newString(name + " entity"));
75
80
 
76
- return edObj;
77
- }
81
+ return edObj;
82
+ }
78
83
 
79
84
  }
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -63,220 +63,248 @@ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
63
63
  * @author Yoko Harada <yokolet@gmail.com>
64
64
  */
65
65
 
66
- @JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
67
- public class HtmlSaxParserContext extends XmlSaxParserContext {
66
+ @JRubyClass(name = "Nokogiri::HTML::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext")
67
+ public class HtmlSaxParserContext extends XmlSaxParserContext
68
+ {
68
69
 
69
- static HtmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
70
- HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
71
- instance.initialize(runtime);
72
- return instance;
73
- }
70
+ static HtmlSaxParserContext
71
+ newInstance(final Ruby runtime, final RubyClass klazz)
72
+ {
73
+ HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
74
+ instance.initialize(runtime);
75
+ return instance;
76
+ }
74
77
 
75
- public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
76
- super(ruby, rubyClass);
77
- }
78
+ public
79
+ HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass)
80
+ {
81
+ super(ruby, rubyClass);
82
+ }
83
+
84
+ @Override
85
+ protected AbstractSAXParser
86
+ createParser() throws SAXException
87
+ {
88
+ SAXParser parser = new SAXParser();
78
89
 
79
- @Override
80
- protected AbstractSAXParser createParser() throws SAXException {
81
- SAXParser parser = new SAXParser();
82
-
83
- try{
84
- parser.setProperty(
85
- "http://cyberneko.org/html/properties/names/elems", "lower");
86
- parser.setProperty(
87
- "http://cyberneko.org/html/properties/names/attrs", "lower");
88
-
89
- // NekoHTML should not try to guess the encoding based on the meta
90
- // tags or other information in the document. This is already
91
- // handled by the EncodingReader.
92
- parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
93
- return parser;
94
- } catch(SAXException ex) {
95
- throw new SAXException(
96
- "Problem while creating HTML SAX Parser: " + ex.toString());
97
- }
90
+ try {
91
+ parser.setProperty(
92
+ "http://cyberneko.org/html/properties/names/elems", "lower");
93
+ parser.setProperty(
94
+ "http://cyberneko.org/html/properties/names/attrs", "lower");
95
+
96
+ // NekoHTML should not try to guess the encoding based on the meta
97
+ // tags or other information in the document. This is already
98
+ // handled by the EncodingReader.
99
+ parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
100
+ return parser;
101
+ } catch (SAXException ex) {
102
+ throw new SAXException(
103
+ "Problem while creating HTML SAX Parser: " + ex.toString());
98
104
  }
105
+ }
99
106
 
100
- @JRubyMethod(name="memory", meta=true)
101
- public static IRubyObject parse_memory(ThreadContext context,
102
- IRubyObject klazz,
103
- IRubyObject data,
104
- IRubyObject encoding) {
105
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
106
- String javaEncoding = findEncodingName(context, encoding);
107
- if (javaEncoding != null) {
108
- CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
109
- ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
110
- ctx.setInputSource(istream);
111
- ctx.getInputSource().setEncoding(javaEncoding);
112
- }
113
- return ctx;
107
+ @JRubyMethod(name = "memory", meta = true)
108
+ public static IRubyObject
109
+ parse_memory(ThreadContext context,
110
+ IRubyObject klazz,
111
+ IRubyObject data,
112
+ IRubyObject encoding)
113
+ {
114
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
115
+ String javaEncoding = findEncodingName(context, encoding);
116
+ if (javaEncoding != null) {
117
+ CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
118
+ ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
119
+ ctx.setInputSource(istream);
120
+ ctx.getInputSource().setEncoding(javaEncoding);
114
121
  }
122
+ return ctx;
123
+ }
124
+
125
+ public enum EncodingType {
126
+ NONE(0, "NONE"),
127
+ UTF_8(1, "UTF-8"),
128
+ UTF16LE(2, "UTF16LE"),
129
+ UTF16BE(3, "UTF16BE"),
130
+ UCS4LE(4, "UCS4LE"),
131
+ UCS4BE(5, "UCS4BE"),
132
+ EBCDIC(6, "EBCDIC"),
133
+ UCS4_2143(7, "ICS4-2143"),
134
+ UCS4_3412(8, "UCS4-3412"),
135
+ UCS2(9, "UCS2"),
136
+ ISO_8859_1(10, "ISO-8859-1"),
137
+ ISO_8859_2(11, "ISO-8859-2"),
138
+ ISO_8859_3(12, "ISO-8859-3"),
139
+ ISO_8859_4(13, "ISO-8859-4"),
140
+ ISO_8859_5(14, "ISO-8859-5"),
141
+ ISO_8859_6(15, "ISO-8859-6"),
142
+ ISO_8859_7(16, "ISO-8859-7"),
143
+ ISO_8859_8(17, "ISO-8859-8"),
144
+ ISO_8859_9(18, "ISO-8859-9"),
145
+ ISO_2022_JP(19, "ISO-2022-JP"),
146
+ SHIFT_JIS(20, "SHIFT-JIS"),
147
+ EUC_JP(21, "EUC-JP"),
148
+ ASCII(22, "ASCII");
115
149
 
116
- public enum EncodingType {
117
- NONE(0, "NONE"),
118
- UTF_8(1, "UTF-8"),
119
- UTF16LE(2, "UTF16LE"),
120
- UTF16BE(3, "UTF16BE"),
121
- UCS4LE(4, "UCS4LE"),
122
- UCS4BE(5, "UCS4BE"),
123
- EBCDIC(6, "EBCDIC"),
124
- UCS4_2143(7, "ICS4-2143"),
125
- UCS4_3412(8, "UCS4-3412"),
126
- UCS2(9, "UCS2"),
127
- ISO_8859_1(10, "ISO-8859-1"),
128
- ISO_8859_2(11, "ISO-8859-2"),
129
- ISO_8859_3(12, "ISO-8859-3"),
130
- ISO_8859_4(13, "ISO-8859-4"),
131
- ISO_8859_5(14, "ISO-8859-5"),
132
- ISO_8859_6(15, "ISO-8859-6"),
133
- ISO_8859_7(16, "ISO-8859-7"),
134
- ISO_8859_8(17, "ISO-8859-8"),
135
- ISO_8859_9(18, "ISO-8859-9"),
136
- ISO_2022_JP(19, "ISO-2022-JP"),
137
- SHIFT_JIS(20, "SHIFT-JIS"),
138
- EUC_JP(21, "EUC-JP"),
139
- ASCII(22, "ASCII");
140
-
141
- private final int value;
142
- private final String name;
143
-
144
- EncodingType(int value, String name) {
145
- this.value = value;
146
- this.name = name;
147
- }
148
-
149
- public int getValue() {
150
- return value;
151
- }
152
-
153
- public String toString() {
154
- return name;
155
- }
156
-
157
- private static transient EncodingType[] values;
158
-
159
- // NOTE: assuming ordinal == value
160
- static EncodingType get(final int ordinal) {
161
- EncodingType[] values = EncodingType.values;
162
- if (values == null) {
163
- values = EncodingType.values();
164
- EncodingType.values = values;
165
- }
166
- if (ordinal >= 0 && ordinal < values.length) {
167
- return values[ordinal];
168
- }
169
- return null;
170
- }
150
+ private final int value;
151
+ private final String name;
171
152
 
153
+ EncodingType(int value, String name)
154
+ {
155
+ this.value = value;
156
+ this.name = name;
172
157
  }
173
158
 
174
- private static String findEncodingName(final int value) {
175
- EncodingType type = EncodingType.get(value);
176
- if (type == null) return null;
177
- assert type.value == value;
178
- return type.name;
159
+ public int getValue()
160
+ {
161
+ return value;
179
162
  }
180
163
 
181
- private static String findEncodingName(ThreadContext context, IRubyObject encoding) {
182
- String rubyEncoding = null;
183
- if (encoding instanceof RubyString) {
184
- rubyEncoding = rubyStringToString((RubyString) encoding);
185
- }
186
- else if (encoding instanceof RubyFixnum) {
187
- rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
188
- }
189
- if (rubyEncoding == null) return null;
190
- try {
191
- return Charset.forName(rubyEncoding).displayName();
192
- }
193
- catch (UnsupportedCharsetException e) {
194
- throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
195
- }
196
- catch (IllegalCharsetNameException e) {
197
- throw context.getRuntime().newEncodingError(e.getMessage());
198
- }
164
+ public String toString()
165
+ {
166
+ return name;
199
167
  }
200
168
 
201
- private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", Pattern.CASE_INSENSITIVE);
202
-
203
- private static CharSequence applyEncoding(final String input, final String enc) {
204
- int start_pos = 0; int end_pos = 0;
205
- if (containsIgnoreCase(input, "charset")) {
206
- Matcher m = CHARSET_PATTERN.matcher(input);
207
- while (m.find()) {
208
- start_pos = m.start();
209
- end_pos = m.end();
210
- }
211
- }
212
- if (start_pos != end_pos) {
213
- return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
214
- }
215
- return input;
169
+ private static transient EncodingType[] values;
170
+
171
+ // NOTE: assuming ordinal == value
172
+ static EncodingType get(final int ordinal)
173
+ {
174
+ EncodingType[] values = EncodingType.values;
175
+ if (values == null) {
176
+ values = EncodingType.values();
177
+ EncodingType.values = values;
178
+ }
179
+ if (ordinal >= 0 && ordinal < values.length) {
180
+ return values[ordinal];
181
+ }
182
+ return null;
216
183
  }
217
184
 
218
- private static boolean containsIgnoreCase(final String str, final String sub) {
219
- final int len = sub.length();
220
- final int max = str.length() - len;
221
-
222
- if (len == 0) return true;
223
- final char c0Lower = Character.toLowerCase(sub.charAt(0));
224
- final char c0Upper = Character.toUpperCase(sub.charAt(0));
225
-
226
- for (int i = 0; i <= max; i++) {
227
- final char ch = str.charAt(i);
228
- if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
229
- continue; // first char doesn't match
230
- }
231
-
232
- if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
233
- return true;
234
- }
235
- }
236
- return false;
185
+ }
186
+
187
+ private static String
188
+ findEncodingName(final int value)
189
+ {
190
+ EncodingType type = EncodingType.get(value);
191
+ if (type == null) { return null; }
192
+ assert type.value == value;
193
+ return type.name;
194
+ }
195
+
196
+ private static String
197
+ findEncodingName(ThreadContext context, IRubyObject encoding)
198
+ {
199
+ String rubyEncoding = null;
200
+ if (encoding instanceof RubyString) {
201
+ rubyEncoding = rubyStringToString((RubyString) encoding);
202
+ } else if (encoding instanceof RubyFixnum) {
203
+ rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
204
+ }
205
+ if (rubyEncoding == null) { return null; }
206
+ try {
207
+ return Charset.forName(rubyEncoding).displayName();
208
+ } catch (UnsupportedCharsetException e) {
209
+ throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
210
+ } catch (IllegalCharsetNameException e) {
211
+ throw context.getRuntime().newEncodingError(e.getMessage());
237
212
  }
213
+ }
214
+
215
+ private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+",
216
+ Pattern.CASE_INSENSITIVE);
238
217
 
239
- @JRubyMethod(name="file", meta=true)
240
- public static IRubyObject parse_file(ThreadContext context,
241
- IRubyObject klass,
242
- IRubyObject data,
243
- IRubyObject encoding) {
244
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
245
- ctx.setInputSourceFile(context, data);
246
- String javaEncoding = findEncodingName(context, encoding);
247
- if (javaEncoding != null) {
248
- ctx.getInputSource().setEncoding(javaEncoding);
249
- }
250
- return ctx;
218
+ private static CharSequence
219
+ applyEncoding(final String input, final String enc)
220
+ {
221
+ int start_pos = 0;
222
+ int end_pos = 0;
223
+ if (containsIgnoreCase(input, "charset")) {
224
+ Matcher m = CHARSET_PATTERN.matcher(input);
225
+ while (m.find()) {
226
+ start_pos = m.start();
227
+ end_pos = m.end();
228
+ }
251
229
  }
230
+ if (start_pos != end_pos) {
231
+ return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
232
+ }
233
+ return input;
234
+ }
235
+
236
+ private static boolean
237
+ containsIgnoreCase(final String str, final String sub)
238
+ {
239
+ final int len = sub.length();
240
+ final int max = str.length() - len;
252
241
 
253
- @JRubyMethod(name="io", meta=true)
254
- public static IRubyObject parse_io(ThreadContext context,
255
- IRubyObject klass,
256
- IRubyObject data,
257
- IRubyObject encoding) {
258
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
259
- ctx.setIOInputSource(context, data, context.nil);
260
- String javaEncoding = findEncodingName(context, encoding);
261
- if (javaEncoding != null) {
262
- ctx.getInputSource().setEncoding(javaEncoding);
263
- }
264
- return ctx;
242
+ if (len == 0) { return true; }
243
+ final char c0Lower = Character.toLowerCase(sub.charAt(0));
244
+ final char c0Upper = Character.toUpperCase(sub.charAt(0));
245
+
246
+ for (int i = 0; i <= max; i++) {
247
+ final char ch = str.charAt(i);
248
+ if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
249
+ continue; // first char doesn't match
250
+ }
251
+
252
+ if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
253
+ return true;
254
+ }
265
255
  }
256
+ return false;
257
+ }
266
258
 
267
- /**
268
- * Create a new parser context that will read from a raw input stream.
269
- * Meant to be run in a separate thread by HtmlSaxPushParser.
270
- */
271
- static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) {
272
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
273
- ctx.setInputSource(stream);
274
- return ctx;
259
+ @JRubyMethod(name = "file", meta = true)
260
+ public static IRubyObject
261
+ parse_file(ThreadContext context,
262
+ IRubyObject klass,
263
+ IRubyObject data,
264
+ IRubyObject encoding)
265
+ {
266
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
267
+ ctx.setInputSourceFile(context, data);
268
+ String javaEncoding = findEncodingName(context, encoding);
269
+ if (javaEncoding != null) {
270
+ ctx.getInputSource().setEncoding(javaEncoding);
275
271
  }
272
+ return ctx;
273
+ }
276
274
 
277
- @Override
278
- protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
279
- // this function is meant to be empty. It overrides the one in XmlSaxParserContext
275
+ @JRubyMethod(name = "io", meta = true)
276
+ public static IRubyObject
277
+ parse_io(ThreadContext context,
278
+ IRubyObject klass,
279
+ IRubyObject data,
280
+ IRubyObject encoding)
281
+ {
282
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
283
+ ctx.setIOInputSource(context, data, context.nil);
284
+ String javaEncoding = findEncodingName(context, encoding);
285
+ if (javaEncoding != null) {
286
+ ctx.getInputSource().setEncoding(javaEncoding);
280
287
  }
288
+ return ctx;
289
+ }
290
+
291
+ /**
292
+ * Create a new parser context that will read from a raw input stream.
293
+ * Meant to be run in a separate thread by HtmlSaxPushParser.
294
+ */
295
+ static HtmlSaxParserContext
296
+ parse_stream(final Ruby runtime, RubyClass klass, InputStream stream)
297
+ {
298
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
299
+ ctx.setInputSource(stream);
300
+ return ctx;
301
+ }
302
+
303
+ @Override
304
+ protected void
305
+ preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
306
+ {
307
+ // this function is meant to be empty. It overrides the one in XmlSaxParserContext
308
+ }
281
309
 
282
310
  }