nokogiri 1.11.1-java → 1.11.6-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +21 -16
  5. data/dependencies.yml +12 -12
  6. data/ext/java/nokogiri/EncodingHandler.java +76 -89
  7. data/ext/java/nokogiri/HtmlDocument.java +135 -144
  8. data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
  12. data/ext/java/nokogiri/NokogiriService.java +595 -556
  13. data/ext/java/nokogiri/XmlAttr.java +118 -126
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
  15. data/ext/java/nokogiri/XmlCdata.java +35 -58
  16. data/ext/java/nokogiri/XmlComment.java +46 -67
  17. data/ext/java/nokogiri/XmlDocument.java +645 -572
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
  19. data/ext/java/nokogiri/XmlDtd.java +448 -414
  20. data/ext/java/nokogiri/XmlElement.java +23 -48
  21. data/ext/java/nokogiri/XmlElementContent.java +343 -316
  22. data/ext/java/nokogiri/XmlElementDecl.java +124 -125
  23. data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
  24. data/ext/java/nokogiri/XmlEntityReference.java +49 -72
  25. data/ext/java/nokogiri/XmlNamespace.java +175 -175
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1620
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -331
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
  29. data/ext/java/nokogiri/XmlReader.java +513 -450
  30. data/ext/java/nokogiri/XmlRelaxng.java +85 -104
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +328 -315
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
  33. data/ext/java/nokogiri/XmlSchema.java +328 -295
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -115
  35. data/ext/java/nokogiri/XmlText.java +55 -76
  36. data/ext/java/nokogiri/XmlXpathContext.java +240 -238
  37. data/ext/java/nokogiri/XsltStylesheet.java +280 -269
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -202
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -98
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +116 -131
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -56
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -211
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
  93. data/ext/nokogiri/depend +34 -474
  94. data/ext/nokogiri/extconf.rb +253 -183
  95. data/ext/nokogiri/html_document.c +10 -15
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  99. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +190 -63
  102. data/ext/nokogiri/test_global_handlers.c +3 -4
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +246 -188
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +385 -326
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +191 -157
  120. data/ext/nokogiri/xml_relax_ng.c +29 -23
  121. data/ext/nokogiri/xml_sax_parser.c +117 -112
  122. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  123. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  124. data/ext/nokogiri/xml_schema.c +48 -42
  125. data/ext/nokogiri/xml_syntax_error.c +21 -23
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +134 -127
  128. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  129. data/lib/nokogiri.rb +1 -22
  130. data/lib/nokogiri/css/parser.rb +1 -1
  131. data/lib/nokogiri/extension.rb +26 -0
  132. data/lib/nokogiri/html/document_fragment.rb +15 -15
  133. data/lib/nokogiri/nokogiri.jar +0 -0
  134. data/lib/nokogiri/version/constant.rb +1 -1
  135. data/lib/nokogiri/version/info.rb +32 -8
  136. data/lib/nokogiri/xml/document.rb +74 -28
  137. data/lib/nokogiri/xml/node.rb +39 -42
  138. data/lib/nokogiri/xml/reader.rb +2 -9
  139. data/lib/nokogiri/xml/xpath.rb +1 -3
  140. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  141. metadata +7 -8
  142. data/ext/nokogiri/xml_io.c +0 -63
  143. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import java.util.ArrayList;
@@ -49,100 +17,117 @@ import org.jruby.runtime.builtin.IRubyObject;
49
17
 
50
18
  /**
51
19
  * Class for Nokogiri::HTML::ElementDescription.
52
- *
20
+ *
53
21
  * @author Patrick Mahoney <pat@polycrystal.org>
54
22
  */
55
- @JRubyClass(name="Nokogiri::HTML::ElementDescription")
56
- public class HtmlElementDescription extends RubyObject {
57
-
58
- /**
59
- * Stores memoized hash of element -> list of valid subelements.
60
- */
61
- static protected Map<Short, List<String>> subElements;
62
- static {
63
- Map<Short, List<String>> _subElements =
64
- new HashMap<Short, List<String>>();
65
- subElements = Collections.synchronizedMap(_subElements);
66
- }
67
-
68
- protected HTMLElements.Element element;
69
-
70
- public HtmlElementDescription(Ruby runtime, RubyClass rubyClass) {
71
- super(runtime, rubyClass);
72
- }
73
-
74
- /**
75
- * Lookup the list of sub elements of <code>code</code>. If not
76
- * already stored, iterate through all elements to find valid
77
- * subelements; save this list and return it.
78
- */
79
- protected static List<String> findSubElements(HTMLElements.Element elem) {
80
- List<String> subs = subElements.get(elem.code);
81
-
82
- if (subs == null) {
83
- subs = new ArrayList<String>();
84
-
85
- /*
86
- * A bit of a hack. NekoHtml source code shows that
87
- * UNKNOWN is the highest value element. We cannot access
88
- * the list of elements directly because it's protected.
89
- */
90
- for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
91
- HTMLElements.Element maybe_sub =
92
- HTMLElements.getElement(c);
93
- if (maybe_sub.isParent(elem)) {
94
- subs.add(maybe_sub.name);
95
- }
96
- }
97
-
98
- subElements.put(elem.code, subs);
23
+ @JRubyClass(name = "Nokogiri::HTML::ElementDescription")
24
+ public class HtmlElementDescription extends RubyObject
25
+ {
26
+
27
+ /**
28
+ * Stores memoized hash of element -> list of valid subelements.
29
+ */
30
+ static protected Map<Short, List<String>> subElements;
31
+ static
32
+ {
33
+ Map<Short, List<String>> _subElements =
34
+ new HashMap<Short, List<String>>();
35
+ subElements = Collections.synchronizedMap(_subElements);
36
+ }
37
+
38
+ protected HTMLElements.Element element;
39
+
40
+ public
41
+ HtmlElementDescription(Ruby runtime, RubyClass rubyClass)
42
+ {
43
+ super(runtime, rubyClass);
44
+ }
45
+
46
+ /**
47
+ * Lookup the list of sub elements of <code>code</code>. If not
48
+ * already stored, iterate through all elements to find valid
49
+ * subelements; save this list and return it.
50
+ */
51
+ protected static List<String>
52
+ findSubElements(HTMLElements.Element elem)
53
+ {
54
+ List<String> subs = subElements.get(elem.code);
55
+
56
+ if (subs == null) {
57
+ subs = new ArrayList<String>();
58
+
59
+ /*
60
+ * A bit of a hack. NekoHtml source code shows that
61
+ * UNKNOWN is the highest value element. We cannot access
62
+ * the list of elements directly because it's protected.
63
+ */
64
+ for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
65
+ HTMLElements.Element maybe_sub =
66
+ HTMLElements.getElement(c);
67
+ if (maybe_sub.isParent(elem)) {
68
+ subs.add(maybe_sub.name);
99
69
  }
70
+ }
100
71
 
101
- return subs;
72
+ subElements.put(elem.code, subs);
102
73
  }
103
74
 
104
- @JRubyMethod(name="[]", meta=true)
105
- public static IRubyObject get(ThreadContext context,
106
- IRubyObject klazz, IRubyObject name) {
107
-
108
- // nekohtml will return an element even for invalid names, see
109
- // http://sourceforge.net/p/nekohtml/code/HEAD/tree/trunk/src/org/cyberneko/html/HTMLElements.java#l514
110
- // which breaks `test_fetch_nonexistent'
111
- HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT);
112
- if (elem == HTMLElements.NO_SUCH_ELEMENT)
113
- return context.nil;
114
-
115
- HtmlElementDescription desc =
116
- new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
117
- desc.element = elem;
118
- return desc;
75
+ return subs;
76
+ }
77
+
78
+ @JRubyMethod(name = "[]", meta = true)
79
+ public static IRubyObject
80
+ get(ThreadContext context,
81
+ IRubyObject klazz, IRubyObject name)
82
+ {
83
+
84
+ // nekohtml will return an element even for invalid names, see
85
+ // http://sourceforge.net/p/nekohtml/code/HEAD/tree/trunk/src/org/cyberneko/html/HTMLElements.java#l514
86
+ // which breaks `test_fetch_nonexistent'
87
+ HTMLElements.Element elem = HTMLElements.getElement(name.asJavaString(), HTMLElements.NO_SUCH_ELEMENT);
88
+ if (elem == HTMLElements.NO_SUCH_ELEMENT) {
89
+ return context.nil;
119
90
  }
120
91
 
121
- @JRubyMethod()
122
- public IRubyObject name(ThreadContext context) {
123
- return context.getRuntime().newString(element.name.toLowerCase());
92
+ HtmlElementDescription desc =
93
+ new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
94
+ desc.element = elem;
95
+ return desc;
96
+ }
97
+
98
+ @JRubyMethod()
99
+ public IRubyObject
100
+ name(ThreadContext context)
101
+ {
102
+ return context.getRuntime().newString(element.name.toLowerCase());
103
+ }
104
+
105
+ @JRubyMethod(name = "inline?")
106
+ public IRubyObject
107
+ inline_eh(ThreadContext context)
108
+ {
109
+ return context.getRuntime().newBoolean(element.isInline());
110
+ }
111
+
112
+ @JRubyMethod(name = "empty?")
113
+ public IRubyObject
114
+ empty_eh(ThreadContext context)
115
+ {
116
+ return context.getRuntime().newBoolean(element.isEmpty());
117
+ }
118
+
119
+ @JRubyMethod()
120
+ public IRubyObject
121
+ sub_elements(ThreadContext context)
122
+ {
123
+ Ruby ruby = context.getRuntime();
124
+ List<String> subs = findSubElements(element);
125
+ IRubyObject[] ary = new IRubyObject[subs.size()];
126
+ for (int i = 0; i < subs.size(); ++i) {
127
+ ary[i] = ruby.newString(subs.get(i));
124
128
  }
125
129
 
126
- @JRubyMethod(name="inline?")
127
- public IRubyObject inline_eh(ThreadContext context) {
128
- return context.getRuntime().newBoolean(element.isInline());
129
- }
130
-
131
- @JRubyMethod(name="empty?")
132
- public IRubyObject empty_eh(ThreadContext context) {
133
- return context.getRuntime().newBoolean(element.isEmpty());
134
- }
135
-
136
- @JRubyMethod()
137
- public IRubyObject sub_elements(ThreadContext context) {
138
- Ruby ruby = context.getRuntime();
139
- List<String> subs = findSubElements(element);
140
- IRubyObject[] ary = new IRubyObject[subs.size()];
141
- for (int i = 0; i < subs.size(); ++i) {
142
- ary[i] = ruby.newString(subs.get(i));
143
- }
144
-
145
- return ruby.newArray(ary);
146
- }
130
+ return ruby.newArray(ary);
131
+ }
147
132
 
148
133
  }
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2012:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import static org.jruby.runtime.Helpers.invoke;
@@ -45,35 +13,40 @@ import org.jruby.runtime.builtin.IRubyObject;
45
13
 
46
14
  /**
47
15
  * Class for Nokogiri::HTML::EntityLookup.
48
- *
16
+ *
49
17
  * @author Patrick Mahoney <pat@polycrystal.org>
50
18
  */
51
- @JRubyClass(name="Nokogiri::HTML::EntityLookup")
52
- public class HtmlEntityLookup extends RubyObject {
53
-
54
- public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) {
55
- super(runtime, rubyClass);
56
- }
57
-
58
- /**
59
- * Looks up an HTML entity <code>key</code>.
60
- *
61
- * The description is a bit lacking.
62
- */
63
- @JRubyMethod()
64
- public IRubyObject get(ThreadContext context, IRubyObject key) {
65
- Ruby ruby = context.getRuntime();
66
- String name = key.toString();
67
- int val = HTMLEntities.get(name);
68
- if (val == -1) return ruby.getNil();
69
-
70
- IRubyObject edClass =
71
- ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
72
- IRubyObject edObj = invoke(context, edClass, "new",
73
- ruby.newFixnum(val), ruby.newString(name),
74
- ruby.newString(name + " entity"));
75
-
76
- return edObj;
77
- }
19
+ @JRubyClass(name = "Nokogiri::HTML::EntityLookup")
20
+ public class HtmlEntityLookup extends RubyObject
21
+ {
22
+
23
+ public
24
+ HtmlEntityLookup(Ruby runtime, RubyClass rubyClass)
25
+ {
26
+ super(runtime, rubyClass);
27
+ }
28
+
29
+ /**
30
+ * Looks up an HTML entity <code>key</code>.
31
+ *
32
+ * The description is a bit lacking.
33
+ */
34
+ @JRubyMethod()
35
+ public IRubyObject
36
+ get(ThreadContext context, IRubyObject key)
37
+ {
38
+ Ruby ruby = context.getRuntime();
39
+ String name = key.toString();
40
+ int val = HTMLEntities.get(name);
41
+ if (val == -1) { return ruby.getNil(); }
42
+
43
+ IRubyObject edClass =
44
+ ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
45
+ IRubyObject edObj = invoke(context, edClass, "new",
46
+ ruby.newFixnum(val), ruby.newString(name),
47
+ ruby.newString(name + " entity"));
48
+
49
+ return edObj;
50
+ }
78
51
 
79
52
  }
@@ -1,35 +1,3 @@
1
- /**
2
- * (The MIT License)
3
- *
4
- * Copyright (c) 2008 - 2011:
5
- *
6
- * * {Aaron Patterson}[http://tenderlovemaking.com]
7
- * * {Mike Dalessio}[http://mike.daless.io]
8
- * * {Charles Nutter}[http://blog.headius.com]
9
- * * {Sergio Arbeo}[http://www.serabe.com]
10
- * * {Patrick Mahoney}[http://polycrystal.org]
11
- * * {Yoko Harada}[http://yokolet.blogspot.com]
12
- *
13
- * Permission is hereby granted, free of charge, to any person obtaining
14
- * a copy of this software and associated documentation files (the
15
- * 'Software'), to deal in the Software without restriction, including
16
- * without limitation the rights to use, copy, modify, merge, publish,
17
- * distribute, sublicense, and/or sell copies of the Software, and to
18
- * permit persons to whom the Software is furnished to do so, subject to
19
- * the following conditions:
20
- *
21
- * The above copyright notice and this permission notice shall be
22
- * included in all copies or substantial portions of the Software.
23
- *
24
- * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
27
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
28
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
29
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
30
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31
- */
32
-
33
1
  package nokogiri;
34
2
 
35
3
  import java.io.ByteArrayInputStream;
@@ -63,220 +31,248 @@ import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
63
31
  * @author Yoko Harada <yokolet@gmail.com>
64
32
  */
65
33
 
66
- @JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
67
- public class HtmlSaxParserContext extends XmlSaxParserContext {
34
+ @JRubyClass(name = "Nokogiri::HTML::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext")
35
+ public class HtmlSaxParserContext extends XmlSaxParserContext
36
+ {
68
37
 
69
- static HtmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
70
- HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
71
- instance.initialize(runtime);
72
- return instance;
73
- }
38
+ static HtmlSaxParserContext
39
+ newInstance(final Ruby runtime, final RubyClass klazz)
40
+ {
41
+ HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz);
42
+ instance.initialize(runtime);
43
+ return instance;
44
+ }
74
45
 
75
- public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
76
- super(ruby, rubyClass);
77
- }
46
+ public
47
+ HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass)
48
+ {
49
+ super(ruby, rubyClass);
50
+ }
51
+
52
+ @Override
53
+ protected AbstractSAXParser
54
+ createParser() throws SAXException
55
+ {
56
+ SAXParser parser = new SAXParser();
78
57
 
79
- @Override
80
- protected AbstractSAXParser createParser() throws SAXException {
81
- SAXParser parser = new SAXParser();
82
-
83
- try{
84
- parser.setProperty(
85
- "http://cyberneko.org/html/properties/names/elems", "lower");
86
- parser.setProperty(
87
- "http://cyberneko.org/html/properties/names/attrs", "lower");
88
-
89
- // NekoHTML should not try to guess the encoding based on the meta
90
- // tags or other information in the document. This is already
91
- // handled by the EncodingReader.
92
- parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
93
- return parser;
94
- } catch(SAXException ex) {
95
- throw new SAXException(
96
- "Problem while creating HTML SAX Parser: " + ex.toString());
97
- }
58
+ try {
59
+ parser.setProperty(
60
+ "http://cyberneko.org/html/properties/names/elems", "lower");
61
+ parser.setProperty(
62
+ "http://cyberneko.org/html/properties/names/attrs", "lower");
63
+
64
+ // NekoHTML should not try to guess the encoding based on the meta
65
+ // tags or other information in the document. This is already
66
+ // handled by the EncodingReader.
67
+ parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
68
+ return parser;
69
+ } catch (SAXException ex) {
70
+ throw new SAXException(
71
+ "Problem while creating HTML SAX Parser: " + ex.toString());
98
72
  }
73
+ }
99
74
 
100
- @JRubyMethod(name="memory", meta=true)
101
- public static IRubyObject parse_memory(ThreadContext context,
102
- IRubyObject klazz,
103
- IRubyObject data,
104
- IRubyObject encoding) {
105
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
106
- String javaEncoding = findEncodingName(context, encoding);
107
- if (javaEncoding != null) {
108
- CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
109
- ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
110
- ctx.setInputSource(istream);
111
- ctx.getInputSource().setEncoding(javaEncoding);
112
- }
113
- return ctx;
75
+ @JRubyMethod(name = "memory", meta = true)
76
+ public static IRubyObject
77
+ parse_memory(ThreadContext context,
78
+ IRubyObject klazz,
79
+ IRubyObject data,
80
+ IRubyObject encoding)
81
+ {
82
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz);
83
+ String javaEncoding = findEncodingName(context, encoding);
84
+ if (javaEncoding != null) {
85
+ CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding);
86
+ ByteArrayInputStream istream = new ByteArrayInputStream(input.toString().getBytes());
87
+ ctx.setInputSource(istream);
88
+ ctx.getInputSource().setEncoding(javaEncoding);
114
89
  }
90
+ return ctx;
91
+ }
92
+
93
+ public enum EncodingType {
94
+ NONE(0, "NONE"),
95
+ UTF_8(1, "UTF-8"),
96
+ UTF16LE(2, "UTF16LE"),
97
+ UTF16BE(3, "UTF16BE"),
98
+ UCS4LE(4, "UCS4LE"),
99
+ UCS4BE(5, "UCS4BE"),
100
+ EBCDIC(6, "EBCDIC"),
101
+ UCS4_2143(7, "ICS4-2143"),
102
+ UCS4_3412(8, "UCS4-3412"),
103
+ UCS2(9, "UCS2"),
104
+ ISO_8859_1(10, "ISO-8859-1"),
105
+ ISO_8859_2(11, "ISO-8859-2"),
106
+ ISO_8859_3(12, "ISO-8859-3"),
107
+ ISO_8859_4(13, "ISO-8859-4"),
108
+ ISO_8859_5(14, "ISO-8859-5"),
109
+ ISO_8859_6(15, "ISO-8859-6"),
110
+ ISO_8859_7(16, "ISO-8859-7"),
111
+ ISO_8859_8(17, "ISO-8859-8"),
112
+ ISO_8859_9(18, "ISO-8859-9"),
113
+ ISO_2022_JP(19, "ISO-2022-JP"),
114
+ SHIFT_JIS(20, "SHIFT-JIS"),
115
+ EUC_JP(21, "EUC-JP"),
116
+ ASCII(22, "ASCII");
115
117
 
116
- public enum EncodingType {
117
- NONE(0, "NONE"),
118
- UTF_8(1, "UTF-8"),
119
- UTF16LE(2, "UTF16LE"),
120
- UTF16BE(3, "UTF16BE"),
121
- UCS4LE(4, "UCS4LE"),
122
- UCS4BE(5, "UCS4BE"),
123
- EBCDIC(6, "EBCDIC"),
124
- UCS4_2143(7, "ICS4-2143"),
125
- UCS4_3412(8, "UCS4-3412"),
126
- UCS2(9, "UCS2"),
127
- ISO_8859_1(10, "ISO-8859-1"),
128
- ISO_8859_2(11, "ISO-8859-2"),
129
- ISO_8859_3(12, "ISO-8859-3"),
130
- ISO_8859_4(13, "ISO-8859-4"),
131
- ISO_8859_5(14, "ISO-8859-5"),
132
- ISO_8859_6(15, "ISO-8859-6"),
133
- ISO_8859_7(16, "ISO-8859-7"),
134
- ISO_8859_8(17, "ISO-8859-8"),
135
- ISO_8859_9(18, "ISO-8859-9"),
136
- ISO_2022_JP(19, "ISO-2022-JP"),
137
- SHIFT_JIS(20, "SHIFT-JIS"),
138
- EUC_JP(21, "EUC-JP"),
139
- ASCII(22, "ASCII");
140
-
141
- private final int value;
142
- private final String name;
143
-
144
- EncodingType(int value, String name) {
145
- this.value = value;
146
- this.name = name;
147
- }
148
-
149
- public int getValue() {
150
- return value;
151
- }
152
-
153
- public String toString() {
154
- return name;
155
- }
156
-
157
- private static transient EncodingType[] values;
158
-
159
- // NOTE: assuming ordinal == value
160
- static EncodingType get(final int ordinal) {
161
- EncodingType[] values = EncodingType.values;
162
- if (values == null) {
163
- values = EncodingType.values();
164
- EncodingType.values = values;
165
- }
166
- if (ordinal >= 0 && ordinal < values.length) {
167
- return values[ordinal];
168
- }
169
- return null;
170
- }
118
+ private final int value;
119
+ private final String name;
171
120
 
121
+ EncodingType(int value, String name)
122
+ {
123
+ this.value = value;
124
+ this.name = name;
172
125
  }
173
126
 
174
- private static String findEncodingName(final int value) {
175
- EncodingType type = EncodingType.get(value);
176
- if (type == null) return null;
177
- assert type.value == value;
178
- return type.name;
127
+ public int getValue()
128
+ {
129
+ return value;
179
130
  }
180
131
 
181
- private static String findEncodingName(ThreadContext context, IRubyObject encoding) {
182
- String rubyEncoding = null;
183
- if (encoding instanceof RubyString) {
184
- rubyEncoding = rubyStringToString((RubyString) encoding);
185
- }
186
- else if (encoding instanceof RubyFixnum) {
187
- rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
188
- }
189
- if (rubyEncoding == null) return null;
190
- try {
191
- return Charset.forName(rubyEncoding).displayName();
192
- }
193
- catch (UnsupportedCharsetException e) {
194
- throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
195
- }
196
- catch (IllegalCharsetNameException e) {
197
- throw context.getRuntime().newEncodingError(e.getMessage());
198
- }
132
+ public String toString()
133
+ {
134
+ return name;
199
135
  }
200
136
 
201
- private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+", Pattern.CASE_INSENSITIVE);
202
-
203
- private static CharSequence applyEncoding(final String input, final String enc) {
204
- int start_pos = 0; int end_pos = 0;
205
- if (containsIgnoreCase(input, "charset")) {
206
- Matcher m = CHARSET_PATTERN.matcher(input);
207
- while (m.find()) {
208
- start_pos = m.start();
209
- end_pos = m.end();
210
- }
211
- }
212
- if (start_pos != end_pos) {
213
- return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
214
- }
215
- return input;
137
+ private static transient EncodingType[] values;
138
+
139
+ // NOTE: assuming ordinal == value
140
+ static EncodingType get(final int ordinal)
141
+ {
142
+ EncodingType[] values = EncodingType.values;
143
+ if (values == null) {
144
+ values = EncodingType.values();
145
+ EncodingType.values = values;
146
+ }
147
+ if (ordinal >= 0 && ordinal < values.length) {
148
+ return values[ordinal];
149
+ }
150
+ return null;
216
151
  }
217
152
 
218
- private static boolean containsIgnoreCase(final String str, final String sub) {
219
- final int len = sub.length();
220
- final int max = str.length() - len;
221
-
222
- if (len == 0) return true;
223
- final char c0Lower = Character.toLowerCase(sub.charAt(0));
224
- final char c0Upper = Character.toUpperCase(sub.charAt(0));
225
-
226
- for (int i = 0; i <= max; i++) {
227
- final char ch = str.charAt(i);
228
- if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
229
- continue; // first char doesn't match
230
- }
231
-
232
- if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
233
- return true;
234
- }
235
- }
236
- return false;
153
+ }
154
+
155
+ private static String
156
+ findEncodingName(final int value)
157
+ {
158
+ EncodingType type = EncodingType.get(value);
159
+ if (type == null) { return null; }
160
+ assert type.value == value;
161
+ return type.name;
162
+ }
163
+
164
+ private static String
165
+ findEncodingName(ThreadContext context, IRubyObject encoding)
166
+ {
167
+ String rubyEncoding = null;
168
+ if (encoding instanceof RubyString) {
169
+ rubyEncoding = rubyStringToString((RubyString) encoding);
170
+ } else if (encoding instanceof RubyFixnum) {
171
+ rubyEncoding = findEncodingName(RubyFixnum.fix2int((RubyFixnum) encoding));
237
172
  }
173
+ if (rubyEncoding == null) { return null; }
174
+ try {
175
+ return Charset.forName(rubyEncoding).displayName();
176
+ } catch (UnsupportedCharsetException e) {
177
+ throw context.getRuntime().newEncodingCompatibilityError(rubyEncoding + "is not supported");
178
+ } catch (IllegalCharsetNameException e) {
179
+ throw context.getRuntime().newEncodingError(e.getMessage());
180
+ }
181
+ }
182
+
183
+ private static final Pattern CHARSET_PATTERN = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+",
184
+ Pattern.CASE_INSENSITIVE);
238
185
 
239
- @JRubyMethod(name="file", meta=true)
240
- public static IRubyObject parse_file(ThreadContext context,
241
- IRubyObject klass,
242
- IRubyObject data,
243
- IRubyObject encoding) {
244
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
245
- ctx.setInputSourceFile(context, data);
246
- String javaEncoding = findEncodingName(context, encoding);
247
- if (javaEncoding != null) {
248
- ctx.getInputSource().setEncoding(javaEncoding);
249
- }
250
- return ctx;
186
+ private static CharSequence
187
+ applyEncoding(final String input, final String enc)
188
+ {
189
+ int start_pos = 0;
190
+ int end_pos = 0;
191
+ if (containsIgnoreCase(input, "charset")) {
192
+ Matcher m = CHARSET_PATTERN.matcher(input);
193
+ while (m.find()) {
194
+ start_pos = m.start();
195
+ end_pos = m.end();
196
+ }
197
+ }
198
+ if (start_pos != end_pos) {
199
+ return new StringBuilder(input).replace(start_pos, end_pos, "charset=" + enc);
251
200
  }
201
+ return input;
202
+ }
252
203
 
253
- @JRubyMethod(name="io", meta=true)
254
- public static IRubyObject parse_io(ThreadContext context,
255
- IRubyObject klass,
256
- IRubyObject data,
257
- IRubyObject encoding) {
258
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
259
- ctx.setIOInputSource(context, data, context.nil);
260
- String javaEncoding = findEncodingName(context, encoding);
261
- if (javaEncoding != null) {
262
- ctx.getInputSource().setEncoding(javaEncoding);
263
- }
264
- return ctx;
204
+ private static boolean
205
+ containsIgnoreCase(final String str, final String sub)
206
+ {
207
+ final int len = sub.length();
208
+ final int max = str.length() - len;
209
+
210
+ if (len == 0) { return true; }
211
+ final char c0Lower = Character.toLowerCase(sub.charAt(0));
212
+ final char c0Upper = Character.toUpperCase(sub.charAt(0));
213
+
214
+ for (int i = 0; i <= max; i++) {
215
+ final char ch = str.charAt(i);
216
+ if (ch != c0Lower && Character.toLowerCase(ch) != c0Lower && Character.toUpperCase(ch) != c0Upper) {
217
+ continue; // first char doesn't match
218
+ }
219
+
220
+ if (str.regionMatches(true, i + 1, sub, 0 + 1, len - 1)) {
221
+ return true;
222
+ }
265
223
  }
224
+ return false;
225
+ }
266
226
 
267
- /**
268
- * Create a new parser context that will read from a raw input stream.
269
- * Meant to be run in a separate thread by HtmlSaxPushParser.
270
- */
271
- static HtmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) {
272
- HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
273
- ctx.setInputSource(stream);
274
- return ctx;
227
+ @JRubyMethod(name = "file", meta = true)
228
+ public static IRubyObject
229
+ parse_file(ThreadContext context,
230
+ IRubyObject klass,
231
+ IRubyObject data,
232
+ IRubyObject encoding)
233
+ {
234
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
235
+ ctx.setInputSourceFile(context, data);
236
+ String javaEncoding = findEncodingName(context, encoding);
237
+ if (javaEncoding != null) {
238
+ ctx.getInputSource().setEncoding(javaEncoding);
275
239
  }
240
+ return ctx;
241
+ }
276
242
 
277
- @Override
278
- protected void preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
279
- // this function is meant to be empty. It overrides the one in XmlSaxParserContext
243
+ @JRubyMethod(name = "io", meta = true)
244
+ public static IRubyObject
245
+ parse_io(ThreadContext context,
246
+ IRubyObject klass,
247
+ IRubyObject data,
248
+ IRubyObject encoding)
249
+ {
250
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass);
251
+ ctx.setIOInputSource(context, data, context.nil);
252
+ String javaEncoding = findEncodingName(context, encoding);
253
+ if (javaEncoding != null) {
254
+ ctx.getInputSource().setEncoding(javaEncoding);
280
255
  }
256
+ return ctx;
257
+ }
258
+
259
+ /**
260
+ * Create a new parser context that will read from a raw input stream.
261
+ * Meant to be run in a separate thread by HtmlSaxPushParser.
262
+ */
263
+ static HtmlSaxParserContext
264
+ parse_stream(final Ruby runtime, RubyClass klass, InputStream stream)
265
+ {
266
+ HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass);
267
+ ctx.setInputSource(stream);
268
+ return ctx;
269
+ }
270
+
271
+ @Override
272
+ protected void
273
+ preParse(final Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
274
+ {
275
+ // this function is meant to be empty. It overrides the one in XmlSaxParserContext
276
+ }
281
277
 
282
278
  }