nokogiri 1.5.0.beta.1 → 1.5.0.beta.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (114) hide show
  1. data/CHANGELOG.ja.rdoc +28 -8
  2. data/CHANGELOG.rdoc +23 -0
  3. data/Manifest.txt +63 -1
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +22 -4
  6. data/Rakefile +6 -2
  7. data/ext/java/nokogiri/EncodingHandler.java +92 -0
  8. data/ext/java/nokogiri/HtmlDocument.java +116 -0
  9. data/ext/java/nokogiri/HtmlElementDescription.java +111 -0
  10. data/ext/java/nokogiri/HtmlEntityLookup.java +45 -0
  11. data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -0
  12. data/ext/java/nokogiri/NokogiriService.java +370 -0
  13. data/ext/java/nokogiri/XmlAttr.java +147 -0
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +98 -0
  15. data/ext/java/nokogiri/XmlCdata.java +50 -0
  16. data/ext/java/nokogiri/XmlComment.java +47 -0
  17. data/ext/java/nokogiri/XmlDocument.java +463 -0
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +207 -0
  19. data/ext/java/nokogiri/XmlDtd.java +427 -0
  20. data/ext/java/nokogiri/XmlElement.java +172 -0
  21. data/ext/java/nokogiri/XmlElementContent.java +350 -0
  22. data/ext/java/nokogiri/XmlElementDecl.java +115 -0
  23. data/ext/java/nokogiri/XmlEntityDecl.java +129 -0
  24. data/ext/java/nokogiri/XmlEntityReference.java +42 -0
  25. data/ext/java/nokogiri/XmlNamespace.java +77 -0
  26. data/ext/java/nokogiri/XmlNode.java +1399 -0
  27. data/ext/java/nokogiri/XmlNodeSet.java +248 -0
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +70 -0
  29. data/ext/java/nokogiri/XmlReader.java +373 -0
  30. data/ext/java/nokogiri/XmlRelaxng.java +166 -0
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +308 -0
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +146 -0
  33. data/ext/java/nokogiri/XmlSchema.java +142 -0
  34. data/ext/java/nokogiri/XmlSyntaxError.java +84 -0
  35. data/ext/java/nokogiri/XmlText.java +96 -0
  36. data/ext/java/nokogiri/XmlXpathContext.java +130 -0
  37. data/ext/java/nokogiri/XsltStylesheet.java +126 -0
  38. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +181 -0
  39. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +39 -0
  40. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +42 -0
  41. data/ext/java/nokogiri/internals/NokogiriHandler.java +251 -0
  42. data/ext/java/nokogiri/internals/NokogiriHelpers.java +526 -0
  43. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +136 -0
  44. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +80 -0
  45. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +37 -0
  46. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +54 -0
  47. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +49 -0
  48. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +88 -0
  49. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +23 -0
  50. data/ext/java/nokogiri/internals/ParserContext.java +235 -0
  51. data/ext/java/nokogiri/internals/PushInputStream.java +381 -0
  52. data/ext/java/nokogiri/internals/ReaderNode.java +431 -0
  53. data/ext/java/nokogiri/internals/SaveContext.java +249 -0
  54. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +35 -0
  55. data/ext/java/nokogiri/internals/XmlDeclHandler.java +10 -0
  56. data/ext/java/nokogiri/internals/XmlDomParser.java +45 -0
  57. data/ext/java/nokogiri/internals/XmlDomParserContext.java +201 -0
  58. data/ext/java/nokogiri/internals/XmlSaxParser.java +33 -0
  59. data/ext/nokogiri/depend +32 -0
  60. data/ext/nokogiri/extconf.rb +61 -32
  61. data/ext/nokogiri/nokogiri.c +0 -5
  62. data/ext/nokogiri/nokogiri.h +2 -2
  63. data/ext/nokogiri/xml_document.c +5 -0
  64. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  65. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  66. data/ext/nokogiri/xml_node.c +56 -16
  67. data/ext/nokogiri/xml_node_set.c +7 -7
  68. data/ext/nokogiri/xml_reader.c +20 -1
  69. data/ext/nokogiri/xml_relax_ng.c +0 -7
  70. data/ext/nokogiri/xml_xpath_context.c +2 -0
  71. data/lib/isorelax.jar +0 -0
  72. data/lib/jing.jar +0 -0
  73. data/lib/nekodtd.jar +0 -0
  74. data/lib/nekohtml.jar +0 -0
  75. data/lib/nokogiri.rb +1 -2
  76. data/lib/nokogiri/css/generated_parser.rb +155 -148
  77. data/lib/nokogiri/css/generated_tokenizer.rb +2 -1
  78. data/lib/nokogiri/css/parser.y +3 -0
  79. data/lib/nokogiri/css/xpath_visitor.rb +1 -7
  80. data/lib/nokogiri/html.rb +2 -2
  81. data/lib/nokogiri/html/document_fragment.rb +7 -4
  82. data/lib/nokogiri/nokogiri.jar +0 -0
  83. data/lib/nokogiri/version.rb +3 -6
  84. data/lib/nokogiri/xml/builder.rb +1 -1
  85. data/lib/nokogiri/xml/document.rb +1 -2
  86. data/lib/nokogiri/xml/document_fragment.rb +7 -0
  87. data/lib/nokogiri/xml/node.rb +5 -3
  88. data/lib/nokogiri/xml/node_set.rb +25 -0
  89. data/lib/nokogiri/xml/reader.rb +2 -0
  90. data/lib/nokogiri/xml/sax/document.rb +3 -1
  91. data/lib/xercesImpl.jar +0 -0
  92. data/spec/helper.rb +3 -0
  93. data/spec/xml/reader_spec.rb +307 -0
  94. data/tasks/test.rb +1 -1
  95. data/test/css/test_parser.rb +11 -1
  96. data/test/html/sax/test_parser_context.rb +2 -2
  97. data/test/html/test_document.rb +2 -2
  98. data/test/html/test_document_fragment.rb +34 -6
  99. data/test/test_memory_leak.rb +2 -2
  100. data/test/test_reader.rb +28 -6
  101. data/test/test_xslt_transforms.rb +2 -3
  102. data/test/xml/test_attr.rb +31 -4
  103. data/test/xml/test_builder.rb +5 -5
  104. data/test/xml/test_cdata.rb +3 -3
  105. data/test/xml/test_document.rb +8 -8
  106. data/test/xml/test_document_fragment.rb +4 -12
  107. data/test/xml/test_node.rb +1 -1
  108. data/test/xml/test_node_reparenting.rb +26 -11
  109. data/test/xml/test_node_set.rb +38 -2
  110. data/test/xml/test_text.rb +11 -2
  111. data/test/xml/test_unparented_node.rb +1 -1
  112. data/test/xml/test_xpath.rb +11 -7
  113. metadata +68 -5
  114. data/lib/nokogiri/version_warning.rb +0 -14
@@ -0,0 +1,111 @@
1
+ package nokogiri;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.Collections;
5
+ import java.util.HashMap;
6
+ import java.util.List;
7
+ import java.util.Map;
8
+
9
+ import org.cyberneko.html.HTMLElements;
10
+ import org.jruby.Ruby;
11
+ import org.jruby.RubyClass;
12
+ import org.jruby.RubyObject;
13
+ import org.jruby.anno.JRubyClass;
14
+ import org.jruby.anno.JRubyMethod;
15
+ import org.jruby.runtime.ThreadContext;
16
+ import org.jruby.runtime.builtin.IRubyObject;
17
+
18
+ /**
19
+ * @author Patrick Mahoney <pat@polycrystal.org>
20
+ */
21
+ @JRubyClass(name="Nokogiri::HTML::ElementDescription")
22
+ public class HtmlElementDescription extends RubyObject {
23
+
24
+ /**
25
+ * Stores memoized hash of element -> list of valid subelements.
26
+ */
27
+ static protected Map<Short, List<String>> subElements;
28
+ static {
29
+ Map<Short, List<String>> _subElements =
30
+ new HashMap<Short, List<String>>();
31
+ subElements = Collections.synchronizedMap(_subElements);
32
+ }
33
+
34
+ protected HTMLElements.Element element;
35
+
36
+ public HtmlElementDescription(Ruby runtime, RubyClass rubyClass) {
37
+ super(runtime, rubyClass);
38
+ }
39
+
40
+ /**
41
+ * Lookup the list of sub elements of <code>code</code>. If not
42
+ * already stored, iterate through all elements to find valid
43
+ * subelements; save this list and return it.
44
+ */
45
+ protected static List<String> findSubElements(HTMLElements.Element elem) {
46
+ List<String> subs = subElements.get(elem.code);
47
+
48
+ if (subs == null) {
49
+ subs = new ArrayList<String>();
50
+
51
+ /*
52
+ * A bit of a hack. NekoHtml source code shows that
53
+ * UNKNOWN is the highest value element. We cannot access
54
+ * the list of elements directly because it's protected.
55
+ */
56
+ for (short c = 0; c < HTMLElements.UNKNOWN; c++) {
57
+ HTMLElements.Element maybe_sub =
58
+ HTMLElements.getElement(c);
59
+ if (maybe_sub.isParent(elem)) {
60
+ subs.add(maybe_sub.name);
61
+ }
62
+ }
63
+
64
+ subElements.put(elem.code, subs);
65
+ }
66
+
67
+ return subs;
68
+ }
69
+
70
+ @JRubyMethod(name="[]", meta=true)
71
+ public static IRubyObject get(ThreadContext context,
72
+ IRubyObject klazz, IRubyObject name) {
73
+
74
+ HTMLElements.Element elem = HTMLElements.getElement(name.toString());
75
+ if (elem == HTMLElements.NO_SUCH_ELEMENT)
76
+ return context.getRuntime().getNil();
77
+
78
+ HtmlElementDescription desc =
79
+ new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz);
80
+ desc.element = elem;
81
+ return desc;
82
+ }
83
+
84
+ @JRubyMethod()
85
+ public IRubyObject name(ThreadContext context) {
86
+ return context.getRuntime().newString(element.name.toLowerCase());
87
+ }
88
+
89
+ @JRubyMethod(name="inline?")
90
+ public IRubyObject inline_eh(ThreadContext context) {
91
+ return context.getRuntime().newBoolean(element.isInline());
92
+ }
93
+
94
+ @JRubyMethod(name="empty?")
95
+ public IRubyObject empty_eh(ThreadContext context) {
96
+ return context.getRuntime().newBoolean(element.isEmpty());
97
+ }
98
+
99
+ @JRubyMethod()
100
+ public IRubyObject sub_elements(ThreadContext context) {
101
+ Ruby ruby = context.getRuntime();
102
+ List<String> subs = findSubElements(element);
103
+ IRubyObject[] ary = new IRubyObject[subs.size()];
104
+ for (int i = 0; i < subs.size(); ++i) {
105
+ ary[i] = ruby.newString(subs.get(i));
106
+ }
107
+
108
+ return ruby.newArray(ary);
109
+ }
110
+
111
+ }
@@ -0,0 +1,45 @@
1
+ package nokogiri;
2
+
3
+ import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
4
+
5
+ import org.cyberneko.html.HTMLEntities;
6
+ import org.jruby.Ruby;
7
+ import org.jruby.RubyClass;
8
+ import org.jruby.RubyObject;
9
+ import org.jruby.anno.JRubyClass;
10
+ import org.jruby.anno.JRubyMethod;
11
+ import org.jruby.runtime.ThreadContext;
12
+ import org.jruby.runtime.builtin.IRubyObject;
13
+
14
+ /**
15
+ * @author Patrick Mahoney <pat@polycrystal.org>
16
+ */
17
+ @JRubyClass(name="Nokogiri::HTML::EntityLookup")
18
+ public class HtmlEntityLookup extends RubyObject {
19
+
20
+ public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) {
21
+ super(runtime, rubyClass);
22
+ }
23
+
24
+ /**
25
+ * Looks up an HTML entity <code>key</code>.
26
+ *
27
+ * The description is a bit lacking.
28
+ */
29
+ @JRubyMethod()
30
+ public IRubyObject get(ThreadContext context, IRubyObject key) {
31
+ Ruby ruby = context.getRuntime();
32
+ String name = key.toString();
33
+ int val = HTMLEntities.get(name);
34
+ if (val == -1) return ruby.getNil();
35
+
36
+ IRubyObject edClass =
37
+ ruby.getClassFromPath("Nokogiri::HTML::EntityDescription");
38
+ IRubyObject edObj = invoke(context, edClass, "new",
39
+ ruby.newFixnum(val), ruby.newString(name),
40
+ ruby.newString(name + " entity"));
41
+
42
+ return edObj;
43
+ }
44
+
45
+ }
@@ -0,0 +1,218 @@
1
+ package nokogiri;
2
+
3
+ import java.io.ByteArrayInputStream;
4
+ import java.io.InputStream;
5
+ import java.nio.charset.Charset;
6
+ import java.nio.charset.IllegalCharsetNameException;
7
+ import java.util.EnumSet;
8
+ import java.util.regex.Matcher;
9
+ import java.util.regex.Pattern;
10
+
11
+ import nokogiri.internals.NokogiriHandler;
12
+
13
+ import org.apache.xerces.parsers.AbstractSAXParser;
14
+ import org.cyberneko.html.parsers.SAXParser;
15
+ import org.jruby.Ruby;
16
+ import org.jruby.RubyClass;
17
+ import org.jruby.RubyFixnum;
18
+ import org.jruby.RubyString;
19
+ import org.jruby.anno.JRubyClass;
20
+ import org.jruby.anno.JRubyMethod;
21
+ import org.jruby.runtime.ThreadContext;
22
+ import org.jruby.runtime.builtin.IRubyObject;
23
+ import org.xml.sax.SAXException;
24
+
25
+ @JRubyClass(name="Nokogiri::HTML::SAX::ParserContext", parent="Nokogiri::XML::SAX::ParserContext")
26
+ public class HtmlSaxParserContext extends XmlSaxParserContext {
27
+ private SAXParser parser;
28
+
29
+ public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) {
30
+ super(ruby, rubyClass);
31
+ }
32
+
33
+ @Override
34
+ protected AbstractSAXParser createParser() throws SAXException {
35
+ SAXParser parser = new SAXParser();
36
+
37
+ try{
38
+ parser.setProperty(
39
+ "http://cyberneko.org/html/properties/names/elems", "lower");
40
+ parser.setProperty(
41
+ "http://cyberneko.org/html/properties/names/attrs", "lower");
42
+ return parser;
43
+ } catch(SAXException ex) {
44
+ throw new SAXException(
45
+ "Problem while creating HTML SAX Parser: " + ex.toString());
46
+ }
47
+ }
48
+
49
+ @JRubyMethod(name="memory", meta=true)
50
+ public static IRubyObject parse_memory(ThreadContext context,
51
+ IRubyObject klazz,
52
+ IRubyObject data,
53
+ IRubyObject encoding) {
54
+ HtmlSaxParserContext ctx =
55
+ new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz);
56
+ String javaEncoding = findEncoding(context, encoding);
57
+ if (javaEncoding != null) {
58
+ String input = applyEncoding((String) data.toJava(String.class), javaEncoding);
59
+ ByteArrayInputStream istream = new ByteArrayInputStream(input.getBytes());
60
+ ctx.setInputSource(istream);
61
+ ctx.getInputSource().setEncoding(javaEncoding);
62
+ }
63
+ return ctx;
64
+ }
65
+
66
+ public static enum EncodingType {
67
+ NONE(0, "NONE"),
68
+ UTF_8(1, "UTF-8"),
69
+ UTF16LE(2, "UTF16LE"),
70
+ UTF16BE(3, "UTF16BE"),
71
+ UCS4LE(4, "UCS4LE"),
72
+ UCS4BE(5, "UCS4BE"),
73
+ EBCDIC(6, "EBCDIC"),
74
+ UCS4_2143(7, "ICS4-2143"),
75
+ UCS4_3412(8, "UCS4-3412"),
76
+ UCS2(9, "UCS2"),
77
+ ISO_8859_1(10, "ISO-8859-1"),
78
+ ISO_8859_2(11, "ISO-8859-2"),
79
+ ISO_8859_3(12, "ISO-8859-3"),
80
+ ISO_8859_4(13, "ISO-8859-4"),
81
+ ISO_8859_5(14, "ISO-8859-5"),
82
+ ISO_8859_6(15, "ISO-8859-6"),
83
+ ISO_8859_7(16, "ISO-8859-7"),
84
+ ISO_8859_8(17, "ISO-8859-8"),
85
+ ISO_8859_9(18, "ISO-8859-9"),
86
+ ISO_2022_JP(19, "ISO-2022-JP"),
87
+ SHIFT_JIS(20, "SHIFT-JIS"),
88
+ EUC_JP(21, "EUC-JP"),
89
+ ASCII(22, "ASCII");
90
+
91
+ private final int value;
92
+ private final String name;
93
+ EncodingType(int value, String name) {
94
+ this.value = value;
95
+ this.name = name;
96
+ }
97
+
98
+ public int getValue() {
99
+ return value;
100
+ }
101
+
102
+ public String toString() {
103
+ return name;
104
+ }
105
+ }
106
+
107
+ private static String findName(int value) {
108
+ EnumSet<EncodingType> set = EnumSet.allOf(EncodingType.class);
109
+ for (EncodingType type : set) {
110
+ if (type.getValue() == value) return type.toString();
111
+ }
112
+ return null;
113
+ }
114
+
115
+ private static String findEncoding(ThreadContext context, IRubyObject encoding) {
116
+ String rubyEncoding = null;
117
+ if (encoding instanceof RubyString) {
118
+ rubyEncoding = (String)encoding.toJava(String.class);
119
+ } else if (encoding instanceof RubyFixnum) {
120
+ int value = (Integer)encoding.toJava(Integer.class);
121
+ rubyEncoding = findName(value);
122
+ }
123
+ if (rubyEncoding == null) return null;
124
+ try {
125
+ Charset charset = Charset.forName(rubyEncoding);
126
+ return charset.displayName();
127
+ } catch (IllegalCharsetNameException e) {
128
+ throw context.getRuntime().newEncodingCompatibilityError(
129
+ rubyEncoding + "is not supported in Java.");
130
+ } catch (IllegalArgumentException e) {
131
+ throw context.getRuntime().newInvalidEncoding(
132
+ "encoding should not be nil");
133
+ }
134
+ }
135
+
136
+ private static String applyEncoding(String input, String enc) {
137
+ String str = input.toLowerCase();
138
+ int start_pos = 0;
139
+ int end_pos = 0;
140
+ if (input.contains("meta") && input.contains("charset")) {
141
+ Pattern p = Pattern.compile("charset(()|\\s)=(()|\\s)([a-z]|-|_|\\d)+");
142
+ Matcher m = p.matcher(str);
143
+ while (m.find()) {
144
+ start_pos = m.start();
145
+ end_pos = m.end();
146
+ }
147
+ }
148
+ if (start_pos != end_pos) {
149
+ String substr = input.substring(start_pos, end_pos);
150
+ input = input.replace(substr, "charset=" + enc);
151
+ }
152
+ return input;
153
+ }
154
+
155
+ @JRubyMethod(name="file", meta=true)
156
+ public static IRubyObject parse_file(ThreadContext context,
157
+ IRubyObject klazz,
158
+ IRubyObject data,
159
+ IRubyObject encoding) {
160
+ HtmlSaxParserContext ctx =
161
+ new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz);
162
+ ctx.setInputSourceFile(context, data);
163
+ String javaEncoding = findEncoding(context, encoding);
164
+ if (javaEncoding != null) {
165
+ ctx.getInputSource().setEncoding(javaEncoding);
166
+ }
167
+ return ctx;
168
+ }
169
+
170
+ @JRubyMethod(name="io", meta=true)
171
+ public static IRubyObject parse_io(ThreadContext context,
172
+ IRubyObject klazz,
173
+ IRubyObject data,
174
+ IRubyObject encoding) {
175
+ HtmlSaxParserContext ctx =
176
+ new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz);
177
+ ctx.setInputSource(context, data);
178
+ String javaEncoding = findEncoding(context, encoding);
179
+ if (javaEncoding != null) {
180
+ ctx.getInputSource().setEncoding(javaEncoding);
181
+ }
182
+ return ctx;
183
+ }
184
+
185
+ /**
186
+ * Create a new parser context that will read from a raw input
187
+ * stream. Not a JRuby method. Meant to be run in a separate
188
+ * thread by XmlSaxPushParser.
189
+ */
190
+ public static IRubyObject parse_stream(ThreadContext context,
191
+ IRubyObject klazz,
192
+ InputStream stream) {
193
+ HtmlSaxParserContext ctx =
194
+ new HtmlSaxParserContext(context.getRuntime(), (RubyClass)klazz);
195
+ ctx.setInputSource(stream);
196
+ return ctx;
197
+ }
198
+
199
+ @Override
200
+ protected void preParse(ThreadContext context,
201
+ IRubyObject handlerRuby,
202
+ NokogiriHandler handler) {
203
+ // final String path = "Nokogiri::XML::FragmentHandler";
204
+ // final String docFrag =
205
+ // "http://cyberneko.org/html/features/balance-tags/document-fragment";
206
+ // RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
207
+ // IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document");
208
+ // RubyModule mod =
209
+ // context.getRuntime().getClassFromPath(path);
210
+ // try {
211
+ // if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod))
212
+ // parser.setFeature(docFrag, true);
213
+ // } catch (Exception e) {
214
+ // // ignore
215
+ // }
216
+ }
217
+
218
+ }
@@ -0,0 +1,370 @@
1
+ package nokogiri;
2
+
3
+ import java.util.Collections;
4
+ import java.util.List;
5
+
6
+ import org.jruby.Ruby;
7
+ import org.jruby.RubyArray;
8
+ import org.jruby.RubyClass;
9
+ import org.jruby.RubyFixnum;
10
+ import org.jruby.RubyHash;
11
+ import org.jruby.RubyModule;
12
+ import org.jruby.runtime.ObjectAllocator;
13
+ import org.jruby.runtime.builtin.IRubyObject;
14
+ import org.jruby.runtime.load.BasicLibraryService;
15
+
16
+ /**
17
+ *
18
+ * @author headius
19
+ */
20
+ public class NokogiriService implements BasicLibraryService {
21
+ public static final String nokogiriClassCacheGvarName = "$NOKOGIRI_CLASS_CACHE";
22
+
23
+ public boolean basicLoad(Ruby ruby) {
24
+ init(ruby);
25
+ createNokogiriClassCahce(ruby);
26
+ return true;
27
+ }
28
+
29
+ private void createNokogiriClassCahce(Ruby ruby) {
30
+ RubyHash nokogiriClassCache = RubyHash.newHash(ruby);
31
+ nokogiriClassCache.put("Nokogiri::EncodingHandler", (RubyClass)ruby.getClassFromPath("Nokogiri::EncodingHandler"));
32
+ nokogiriClassCache.put("Nokogiri::XML::Attr", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Attr"));
33
+ nokogiriClassCache.put("Nokogiri::XML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Document"));
34
+ nokogiriClassCache.put("Nokogiri::XML::DocumentFragment", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DocumentFragment"));
35
+ nokogiriClassCache.put("Nokogiri::XML::DTD", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DTD"));
36
+ nokogiriClassCache.put("Nokogiri::XML::Text", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Text"));
37
+ nokogiriClassCache.put("Nokogiri::XML::Comment", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Comment"));
38
+ nokogiriClassCache.put("Nokogiri::XML::Element", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"));
39
+ nokogiriClassCache.put("Nokogiri::XML::ElementContent", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementContent"));
40
+ nokogiriClassCache.put("Nokogiri::XML::ElementDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementDecl"));
41
+ nokogiriClassCache.put("Nokogiri::XML::EntityDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityDecl"));
42
+ nokogiriClassCache.put("Nokogiri::XML::CDATA", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::CDATA"));
43
+ nokogiriClassCache.put("Nokogiri::XML::Node", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Node"));
44
+ nokogiriClassCache.put("Nokogiri::XML::NodeSet", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::NodeSet"));
45
+ nokogiriClassCache.put("Nokogiri::XML::Namespace", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Namespace"));
46
+ nokogiriClassCache.put("Nokogiri::XML::RelaxNG", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::RelaxNG"));
47
+ nokogiriClassCache.put("Nokogiri::HTML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::Document"));
48
+ nokogiriClassCache.put("Nokogiri::HTML::ElementDescription", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::ElementDescription"));
49
+ nokogiriClassCache.put("Nokogiri::XML::AttributeDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::AttributeDecl"));
50
+ nokogiriClassCache.put("Nokogiri::XML::SAX::ParserContext", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::SAX::ParserContext"));
51
+
52
+ ruby.getGlobalVariables().set(nokogiriClassCacheGvarName, nokogiriClassCache);
53
+ }
54
+
55
+ private void init(Ruby ruby) {
56
+ RubyModule nokogiri = ruby.defineModule("Nokogiri");
57
+ RubyModule xmlModule = nokogiri.defineModuleUnder("XML");
58
+ RubyModule xmlSaxModule = xmlModule.defineModuleUnder("SAX");
59
+ RubyModule htmlModule = nokogiri.defineModuleUnder("HTML");
60
+ RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX");
61
+ RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT");
62
+
63
+ createNokogiriModule(ruby, nokogiri);
64
+ createSyntaxErrors(ruby, nokogiri, xmlModule);
65
+ RubyClass xmlNode = createXmlModule(ruby, xmlModule);
66
+ createHtmlModule(ruby, htmlModule);
67
+ createDocuments(ruby, xmlModule, htmlModule, xmlNode);
68
+ createSaxModule(ruby, xmlSaxModule, htmlSaxModule);
69
+ createXsltModule(ruby, xsltModule);
70
+ }
71
+
72
+ private void createNokogiriModule(Ruby ruby, RubyModule nokogiri) {;
73
+ RubyClass encHandler = nokogiri.defineClassUnder("EncodingHandler", ruby.getObject(), ENCODING_HANDLER_ALLOCATOR);
74
+ encHandler.defineAnnotatedMethods(EncodingHandler.class);
75
+ }
76
+
77
+ private void createSyntaxErrors(Ruby ruby, RubyModule nokogiri, RubyModule xmlModule) {
78
+ RubyClass syntaxError = nokogiri.defineClassUnder("SyntaxError", ruby.getStandardError(), ruby.getStandardError().getAllocator());
79
+ RubyClass xmlSyntaxError = xmlModule.defineClassUnder("SyntaxError", syntaxError, XML_SYNTAXERROR_ALLOCATOR);
80
+ xmlSyntaxError.defineAnnotatedMethods(XmlSyntaxError.class);
81
+ }
82
+
83
+ private RubyClass createXmlModule(Ruby ruby, RubyModule xmlModule) {
84
+ RubyClass node = xmlModule.defineClassUnder("Node", ruby.getObject(), XML_NODE_ALLOCATOR);
85
+ node.defineAnnotatedMethods(XmlNode.class);
86
+
87
+ RubyClass attr = xmlModule.defineClassUnder("Attr", node, XML_ATTR_ALLOCATOR);
88
+ attr.defineAnnotatedMethods(XmlAttr.class);
89
+
90
+ RubyClass attrDecl = xmlModule.defineClassUnder("AttributeDecl", node, XML_ATTRIBUTE_DECL_ALLOCATOR);
91
+ attrDecl.defineAnnotatedMethods(XmlAttributeDecl.class);
92
+
93
+ RubyClass characterData = xmlModule.defineClassUnder("CharacterData", node, null);
94
+
95
+ RubyClass comment = xmlModule.defineClassUnder("Comment", characterData, XML_COMMENT_ALLOCATOR);
96
+ comment.defineAnnotatedMethods(XmlComment.class);
97
+
98
+ RubyClass text = xmlModule.defineClassUnder("Text", characterData, XML_TEXT_ALLOCATOR);
99
+ text.defineAnnotatedMethods(XmlText.class);
100
+
101
+ RubyModule cdata = xmlModule.defineClassUnder("CDATA", text, XML_CDATA_ALLOCATOR);
102
+ cdata.defineAnnotatedMethods(XmlCdata.class);
103
+
104
+ RubyClass dtd = xmlModule.defineClassUnder("DTD", node, XML_DTD_ALLOCATOR);
105
+ dtd.defineAnnotatedMethods(XmlDtd.class);
106
+
107
+ RubyClass documentFragment = xmlModule.defineClassUnder("DocumentFragment", node, XML_DOCUMENT_FRAGMENT_ALLOCATOR);
108
+ documentFragment.defineAnnotatedMethods(XmlDocumentFragment.class);
109
+
110
+ RubyClass element = xmlModule.defineClassUnder("Element", node, XML_ELEMENT_ALLOCATOR);
111
+ element.defineAnnotatedMethods(XmlElement.class);
112
+
113
+ RubyClass elementContent = xmlModule.defineClassUnder("ElementContent", ruby.getObject(), XML_ELEMENT_CONTENT_ALLOCATOR);
114
+ elementContent.defineAnnotatedMethods(XmlElementContent.class);
115
+
116
+ RubyClass elementDecl = xmlModule.defineClassUnder("ElementDecl", node, XML_ELEMENT_DECL_ALLOCATOR);
117
+ elementDecl.defineAnnotatedMethods(XmlElementDecl.class);
118
+
119
+ RubyClass entityDecl = xmlModule.defineClassUnder("EntityDecl", node, XML_ENTITY_DECL_ALLOCATOR);
120
+ entityDecl.defineAnnotatedMethods(XmlEntityDecl.class);
121
+ entityDecl.defineConstant("INTERNAL_GENERAL", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_GENERAL));
122
+ entityDecl.defineConstant("EXTERNAL_GENERAL_PARSED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_GENERAL_PARSED));
123
+ entityDecl.defineConstant("EXTERNAL_GENERAL_UNPARSED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_GENERAL_UNPARSED));
124
+ entityDecl.defineConstant("INTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PARAMETER));
125
+ entityDecl.defineConstant("EXTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_PARAMETER));
126
+ entityDecl.defineConstant("INTERNAL_PREDEFINED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PREDEFINED));
127
+
128
+ RubyClass entref = xmlModule.defineClassUnder("EntityReference", node, XML_ENTITY_REFERENCE_ALLOCATOR);
129
+ entref.defineAnnotatedMethods(XmlEntityReference.class);
130
+
131
+ RubyClass namespace = xmlModule.defineClassUnder("Namespace", ruby.getObject(), XML_NAMESPACE_ALLOCATOR);
132
+ namespace.defineAnnotatedMethods(XmlNamespace.class);
133
+
134
+ RubyClass nodeSet = xmlModule.defineClassUnder("NodeSet", ruby.getObject(), XML_NODESET_ALLOCATOR);
135
+ nodeSet.defineAnnotatedMethods(XmlNodeSet.class);
136
+
137
+ RubyClass pi = xmlModule.defineClassUnder("ProcessingInstruction", node, XML_PROCESSING_INSTRUCTION_ALLOCATOR);
138
+ pi.defineAnnotatedMethods(XmlProcessingInstruction.class);
139
+
140
+ RubyClass reader = xmlModule.defineClassUnder("Reader", ruby.getObject(), XML_READER_ALLOCATOR);
141
+ reader.defineAnnotatedMethods(XmlReader.class);
142
+
143
+ RubyClass schema = xmlModule.defineClassUnder("Schema", ruby.getObject(), XML_SCHEMA_ALLOCATOR);
144
+ schema.defineAnnotatedMethods(XmlSchema.class);
145
+
146
+ RubyClass relaxng = xmlModule.defineClassUnder("RelaxNG", schema, XML_RELAXNG_ALLOCATOR);
147
+ relaxng.defineAnnotatedMethods(XmlRelaxng.class);
148
+
149
+ RubyClass xpathContext = xmlModule.defineClassUnder("XPathContext", ruby.getObject(), XML_XPATHCONTEXT_ALLOCATOR);
150
+ xpathContext.defineAnnotatedMethods(XmlXpathContext.class);
151
+
152
+ return node;
153
+ }
154
+
155
+ private void createHtmlModule(Ruby ruby, RubyModule htmlModule) {
156
+ RubyClass htmlElemDesc = htmlModule.defineClassUnder("ElementDescription", ruby.getObject(), HTML_ELEMENT_DESCRIPTION_ALLOCATOR);
157
+ htmlElemDesc.defineAnnotatedMethods(HtmlElementDescription.class);
158
+
159
+ RubyClass htmlEntityLookup = htmlModule.defineClassUnder("EntityLookup", ruby.getObject(), HTML_ENTITY_LOOKUP_ALLOCATOR);
160
+ htmlEntityLookup.defineAnnotatedMethods(HtmlEntityLookup.class);
161
+ }
162
+
163
+ private void createDocuments(Ruby ruby, RubyModule xmlModule, RubyModule htmlModule, RubyClass node) {
164
+ RubyClass xmlDocument = xmlModule.defineClassUnder("Document", node, XML_DOCUMENT_ALLOCATOR);
165
+ xmlDocument.defineAnnotatedMethods(XmlDocument.class);
166
+
167
+ //RubyModule htmlDoc = html.defineOrGetClassUnder("Document", document);
168
+ RubyModule htmlDocument = htmlModule.defineClassUnder("Document", xmlDocument, HTML_DOCUMENT_ALLOCATOR);
169
+ htmlDocument.defineAnnotatedMethods(HtmlDocument.class);
170
+ }
171
+
172
+ private void createSaxModule(Ruby ruby, RubyModule xmlSaxModule, RubyModule htmlSaxModule) {
173
+ RubyClass xmlSaxParserContext = xmlSaxModule.defineClassUnder("ParserContext", ruby.getObject(), XML_SAXPARSER_ALLOCATOR);
174
+ xmlSaxParserContext.defineAnnotatedMethods(XmlSaxParserContext.class);
175
+
176
+ RubyClass xmlSaxPushParser = xmlSaxModule.defineClassUnder("PushParser", ruby.getObject(), XML_SAXPUSHPARSER_ALLOCATOR);
177
+ xmlSaxPushParser.defineAnnotatedMethods(XmlSaxPushParser.class);
178
+
179
+ RubyClass htmlSaxParserContext = htmlSaxModule.defineClassUnder("ParserContext", xmlSaxParserContext, HTML_SAXPARSER_ALLOCATOR);
180
+ htmlSaxParserContext.defineAnnotatedMethods(HtmlSaxParserContext.class);
181
+ }
182
+
183
+ private void createXsltModule(Ruby ruby, RubyModule xsltModule) {
184
+ RubyClass stylesheet = xsltModule.defineClassUnder("Stylesheet", ruby.getObject(), XSLT_STYLESHEET_ALLOCATOR);
185
+ stylesheet.defineAnnotatedMethods(XsltStylesheet.class);
186
+ }
187
+
188
+ private static ObjectAllocator ENCODING_HANDLER_ALLOCATOR = new ObjectAllocator() {
189
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
190
+ return new EncodingHandler(runtime, klazz, "");
191
+ }
192
+ };
193
+
194
+ private static ObjectAllocator HTML_DOCUMENT_ALLOCATOR = new ObjectAllocator() {
195
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
196
+ return new HtmlDocument(runtime, klazz);
197
+ }
198
+ };
199
+
200
+ private static ObjectAllocator HTML_SAXPARSER_ALLOCATOR = new ObjectAllocator() {
201
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
202
+ return new HtmlSaxParserContext(runtime, klazz);
203
+ }
204
+ };
205
+
206
+ private static ObjectAllocator HTML_ELEMENT_DESCRIPTION_ALLOCATOR =
207
+ new ObjectAllocator() {
208
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
209
+ return new HtmlElementDescription(runtime, klazz);
210
+ }
211
+ };
212
+
213
+ private static ObjectAllocator HTML_ENTITY_LOOKUP_ALLOCATOR =
214
+ new ObjectAllocator() {
215
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
216
+ return new HtmlEntityLookup(runtime, klazz);
217
+ }
218
+ };
219
+
220
+ private static ObjectAllocator XML_ATTR_ALLOCATOR = new ObjectAllocator() {
221
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz){
222
+ return new XmlAttr(runtime, klazz);
223
+ }
224
+ };
225
+
226
+ private static ObjectAllocator XML_CDATA_ALLOCATOR = new ObjectAllocator() {
227
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
228
+ return new XmlCdata(runtime, klazz);
229
+ }
230
+ };
231
+
232
+ private static ObjectAllocator XML_COMMENT_ALLOCATOR = new ObjectAllocator() {
233
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
234
+ return new XmlComment(runtime, klazz);
235
+ }
236
+ };
237
+
238
+ private static ObjectAllocator XML_PROCESSING_INSTRUCTION_ALLOCATOR =
239
+ new ObjectAllocator() {
240
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
241
+ throw runtime.newNotImplementedError("not implemented");
242
+ }
243
+ };
244
+
245
+ private static ObjectAllocator XML_DOCUMENT_ALLOCATOR = new ObjectAllocator() {
246
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
247
+ throw runtime.newNotImplementedError("not implemented");
248
+ }
249
+ };
250
+
251
+ private static ObjectAllocator XML_DOCUMENT_FRAGMENT_ALLOCATOR = new ObjectAllocator() {
252
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
253
+ return new XmlDocumentFragment(runtime, klazz);
254
+ }
255
+ };
256
+
257
+ private static ObjectAllocator XML_DTD_ALLOCATOR = new ObjectAllocator() {
258
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
259
+ return new XmlDtd(runtime, klazz);
260
+ }
261
+ };
262
+
263
+ private static ObjectAllocator XML_ELEMENT_ALLOCATOR = new ObjectAllocator() {
264
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
265
+ return new XmlElement(runtime, klazz);
266
+ }
267
+ };
268
+
269
+ private static ObjectAllocator XML_ENTITY_REFERENCE_ALLOCATOR = new ObjectAllocator() {
270
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
271
+ return new XmlEntityReference(runtime, klazz);
272
+ }
273
+ };
274
+
275
+ private static ObjectAllocator XML_NAMESPACE_ALLOCATOR = new ObjectAllocator() {
276
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
277
+ return new XmlNamespace(runtime, klazz);
278
+ }
279
+ };
280
+
281
+ private static ObjectAllocator XML_NODE_ALLOCATOR = new ObjectAllocator() {
282
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
283
+ return new XmlNode(runtime, klazz);
284
+ }
285
+ };
286
+
287
+ private static ObjectAllocator XML_NODESET_ALLOCATOR = new ObjectAllocator() {
288
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
289
+ return new XmlNodeSet(runtime, klazz, RubyArray.newEmptyArray(runtime));
290
+ }
291
+ };
292
+
293
+ private static ObjectAllocator XML_READER_ALLOCATOR = new ObjectAllocator() {
294
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
295
+ return new XmlReader(runtime, klazz);
296
+ }
297
+ };
298
+
299
+ private static ObjectAllocator XML_ATTRIBUTE_DECL_ALLOCATOR = new ObjectAllocator() {
300
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
301
+ return new XmlAttributeDecl(runtime, klazz);
302
+ }
303
+ };
304
+
305
+ private static ObjectAllocator XML_ELEMENT_DECL_ALLOCATOR = new ObjectAllocator() {
306
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
307
+ return new XmlElementDecl(runtime, klazz);
308
+ }
309
+ };
310
+
311
+ private static ObjectAllocator XML_ENTITY_DECL_ALLOCATOR = new ObjectAllocator() {
312
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
313
+ return new XmlEntityDecl(runtime, klazz);
314
+ }
315
+ };
316
+
317
+ private static ObjectAllocator XML_ELEMENT_CONTENT_ALLOCATOR = new ObjectAllocator() {
318
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
319
+ throw runtime.newNotImplementedError("not implemented");
320
+ }
321
+ };
322
+
323
+ private static ObjectAllocator XML_RELAXNG_ALLOCATOR = new ObjectAllocator() {
324
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
325
+ return new XmlRelaxng(runtime, klazz);
326
+ }
327
+ };
328
+
329
+ private static ObjectAllocator XML_SAXPARSER_ALLOCATOR = new ObjectAllocator() {
330
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
331
+ return new XmlSaxParserContext(runtime, klazz);
332
+ }
333
+ };
334
+
335
+ private static ObjectAllocator XML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() {
336
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
337
+ return new XmlSaxPushParser(runtime, klazz);
338
+ }
339
+ };
340
+
341
+ private static ObjectAllocator XML_SCHEMA_ALLOCATOR = new ObjectAllocator() {
342
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
343
+ return new XmlSchema(runtime, klazz);
344
+ }
345
+ };
346
+
347
+ private static ObjectAllocator XML_SYNTAXERROR_ALLOCATOR = new ObjectAllocator() {
348
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
349
+ return new XmlSyntaxError(runtime, klazz);
350
+ }
351
+ };
352
+
353
+ private static ObjectAllocator XML_TEXT_ALLOCATOR = new ObjectAllocator() {
354
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
355
+ return new XmlText(runtime, klazz);
356
+ }
357
+ };
358
+
359
+ private static ObjectAllocator XML_XPATHCONTEXT_ALLOCATOR = new ObjectAllocator() {
360
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
361
+ throw runtime.newNotImplementedError("not implemented");
362
+ }
363
+ };
364
+
365
+ private static ObjectAllocator XSLT_STYLESHEET_ALLOCATOR = new ObjectAllocator() {
366
+ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
367
+ return new XsltStylesheet(runtime, klazz);
368
+ }
369
+ };
370
+ }