oga 0.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +179 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +20 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/liboga.jar +0 -0
  20. data/lib/oga.rb +43 -0
  21. data/lib/oga/html/parser.rb +25 -0
  22. data/lib/oga/oga.rb +27 -0
  23. data/lib/oga/version.rb +3 -0
  24. data/lib/oga/xml/attribute.rb +111 -0
  25. data/lib/oga/xml/cdata.rb +17 -0
  26. data/lib/oga/xml/character_node.rb +39 -0
  27. data/lib/oga/xml/comment.rb +17 -0
  28. data/lib/oga/xml/doctype.rb +84 -0
  29. data/lib/oga/xml/document.rb +99 -0
  30. data/lib/oga/xml/element.rb +331 -0
  31. data/lib/oga/xml/lexer.rb +399 -0
  32. data/lib/oga/xml/namespace.rb +42 -0
  33. data/lib/oga/xml/node.rb +168 -0
  34. data/lib/oga/xml/node_set.rb +313 -0
  35. data/lib/oga/xml/parser.rb +556 -0
  36. data/lib/oga/xml/processing_instruction.rb +39 -0
  37. data/lib/oga/xml/pull_parser.rb +180 -0
  38. data/lib/oga/xml/querying.rb +32 -0
  39. data/lib/oga/xml/text.rb +11 -0
  40. data/lib/oga/xml/traversal.rb +48 -0
  41. data/lib/oga/xml/xml_declaration.rb +69 -0
  42. data/lib/oga/xpath/evaluator.rb +1748 -0
  43. data/lib/oga/xpath/lexer.rb +2043 -0
  44. data/lib/oga/xpath/node.rb +10 -0
  45. data/lib/oga/xpath/parser.rb +537 -0
  46. data/oga.gemspec +45 -0
  47. metadata +221 -0
data/ext/c/lexer.h ADDED
@@ -0,0 +1,8 @@
1
+ #ifndef LIBOGA_XML_LEXER_H
2
+ #define LIBOGA_XML_LEXER_H
3
+
4
+ #include "liboga.h"
5
+
6
+ extern void Init_liboga_xml_lexer();
7
+
8
+ #endif
data/ext/c/lexer.rl ADDED
@@ -0,0 +1,121 @@
1
+ #include "lexer.h"
2
+
3
+ /*
4
+ The following two macros allow the Ragel grammar to use generic function calls
5
+ without relying on the setup of the C or Java lexer. Using these macros we can
6
+ also pass along `self` to the callback functions without having to hard-code
7
+ this in to the Ragel grammar.
8
+
9
+ In the C lexer we don't need the `data` variable (since this is pulled in based
10
+ on `ts` and `te`) so the macro ignores this argument.
11
+ */
12
+
13
+ #define callback(name, data, encoding, start, stop) \
14
+ liboga_xml_lexer_callback(self, name, encoding, start, stop);
15
+
16
+ #define callback_simple(name) \
17
+ liboga_xml_lexer_callback_simple(self, name);
18
+
19
+ #define oga_ivar_get(owner, name) \
20
+ rb_ivar_get(owner, rb_intern(name))
21
+
22
+ #define oga_ivar_set(owner, name, value) \
23
+ rb_ivar_set(owner, rb_intern(name), value)
24
+
25
+ %%machine c_lexer;
26
+
27
+ /**
28
+ * Calls a method defined in the Ruby side of the lexer. The String value is
29
+ * created based on the values of `ts` and `te` and uses the encoding specified
30
+ * in `encoding`.
31
+ *
32
+ * @example
33
+ * rb_encoding *encoding = rb_enc_get(...);
34
+ * liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
35
+ */
36
+ void liboga_xml_lexer_callback(
37
+ VALUE self,
38
+ const char *name,
39
+ rb_encoding *encoding,
40
+ const char *ts,
41
+ const char *te
42
+ )
43
+ {
44
+ VALUE value = rb_enc_str_new(ts, te - ts, encoding);
45
+ VALUE method = rb_intern(name);
46
+
47
+ rb_funcall(self, method, 1, value);
48
+ }
49
+
50
+ /**
51
+ * Calls a method defined in the Ruby side of the lexer without passing it any
52
+ * arguments.
53
+ *
54
+ * @example
55
+ * liboga_xml_lexer_callback_simple(self, "on_cdata_start");
56
+ */
57
+ void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
58
+ {
59
+ VALUE method = rb_intern(name);
60
+
61
+ rb_funcall(self, method, 0);
62
+ }
63
+
64
+ %% write data;
65
+
66
+ /**
67
+ * Lexes the String specifies as the method argument. Token values have the
68
+ * same encoding as the input value.
69
+ *
70
+ * This method keeps track of an internal state using the instance variables
71
+ * `@act` and `@cs`.
72
+ */
73
+ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
74
+ {
75
+ /* Make sure that all data passed back to Ruby has the proper encoding. */
76
+ rb_encoding *encoding = rb_enc_get(data_block);
77
+
78
+ char *data_str_val = StringValuePtr(data_block);
79
+
80
+ const char *p = data_str_val;
81
+ const char *pe = data_str_val + strlen(data_str_val);
82
+ const char *eof = pe;
83
+ const char *ts = 0;
84
+ const char *te = 0;
85
+ const char *mark = 0;
86
+
87
+ int act = NUM2INT(oga_ivar_get(self, "@act"));
88
+ int cs = NUM2INT(oga_ivar_get(self, "@cs"));
89
+
90
+ %% write exec;
91
+
92
+ oga_ivar_set(self, "@act", INT2NUM(act));
93
+ oga_ivar_set(self, "@cs", INT2NUM(cs));
94
+
95
+ return Qnil;
96
+ }
97
+
98
+ /**
99
+ * Resets the internal state of the lexer.
100
+ */
101
+ VALUE oga_xml_lexer_reset(VALUE self)
102
+ {
103
+ oga_ivar_set(self, "@act", INT2NUM(0));
104
+ oga_ivar_set(self, "@cs", INT2NUM(c_lexer_start));
105
+
106
+ return Qnil;
107
+ }
108
+
109
+ %%{
110
+ include base_lexer "base_lexer.rl";
111
+ }%%
112
+
113
+ void Init_liboga_xml_lexer()
114
+ {
115
+ VALUE mOga = rb_const_get(rb_cObject, rb_intern("Oga"));
116
+ VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
117
+ VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
118
+
119
+ rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
120
+ rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
121
+ }
data/ext/c/liboga.c ADDED
@@ -0,0 +1,6 @@
1
+ #include "liboga.h"
2
+
3
+ void Init_liboga()
4
+ {
5
+ Init_liboga_xml_lexer();
6
+ }
data/ext/c/liboga.h ADDED
@@ -0,0 +1,11 @@
1
+ #ifndef LIBOGA_H
2
+ #define LIBOGA_H
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+
7
+ #include "lexer.h"
8
+
9
+ void Init_liboga();
10
+
11
+ #endif
@@ -0,0 +1,14 @@
1
+ package org.liboga;
2
+
3
+ import org.jruby.Ruby;
4
+
5
+ public class Liboga
6
+ {
7
+ /**
8
+ * Bootstraps the JRuby extension.
9
+ */
10
+ public static void load(final Ruby runtime)
11
+ {
12
+ org.liboga.xml.Lexer.load(runtime);
13
+ }
14
+ }
@@ -0,0 +1,829 @@
1
+
2
+ // line 1 "ext/java/org/liboga/xml/Lexer.rl"
3
+ package org.liboga.xml;
4
+
5
+
6
+ // line 4 "ext/java/org/liboga/xml/Lexer.rl"
7
+
8
+ import java.io.IOException;
9
+
10
+ import org.jcodings.Encoding;
11
+
12
+ import org.jruby.Ruby;
13
+ import org.jruby.RubyModule;
14
+ import org.jruby.RubyClass;
15
+ import org.jruby.RubyObject;
16
+ import org.jruby.RubyString;
17
+ import org.jruby.RubyFixnum;
18
+ import org.jruby.util.ByteList;
19
+ import org.jruby.anno.JRubyClass;
20
+ import org.jruby.anno.JRubyMethod;
21
+ import org.jruby.runtime.ThreadContext;
22
+ import org.jruby.runtime.ObjectAllocator;
23
+ import org.jruby.runtime.builtin.IRubyObject;
24
+
25
+ /**
26
+ * Lexer support class for JRuby.
27
+ *
28
+ * The Lexer class contains the raw Ragel loop and calls back in to Ruby land
29
+ * whenever a Ragel action is needed similar to the C extension setup.
30
+ *
31
+ * This class requires Ruby land to first define the `Oga::XML` namespace.
32
+ */
33
+ @JRubyClass(name="Oga::XML::Lexer", parent="Object")
34
+ public class Lexer extends RubyObject
35
+ {
36
+ /**
37
+ * The current Ruby runtime.
38
+ */
39
+ private Ruby runtime;
40
+
41
+
42
+ // line 43 "ext/java/org/liboga/xml/Lexer.java"
43
+ private static byte[] init__java_lexer_actions_0()
44
+ {
45
+ return new byte [] {
46
+ 0, 1, 0, 1, 1, 1, 3, 1, 4, 1, 5, 1,
47
+ 6, 1, 7, 1, 10, 1, 11, 1, 12, 1, 13, 1,
48
+ 14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1,
49
+ 20, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1,
50
+ 26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 32, 1,
51
+ 33, 1, 34, 1, 37, 1, 38, 1, 39, 1, 40, 1,
52
+ 41, 1, 42, 1, 43, 1, 44, 1, 45, 1, 46, 2,
53
+ 1, 2, 2, 4, 8, 2, 4, 9, 2, 4, 35, 2,
54
+ 4, 36, 3, 4, 0, 31
55
+ };
56
+ }
57
+
58
+ private static final byte _java_lexer_actions[] = init__java_lexer_actions_0();
59
+
60
+
61
+ private static short[] init__java_lexer_key_offsets_0()
62
+ {
63
+ return new short [] {
64
+ 0, 0, 4, 5, 6, 7, 9, 11, 13, 15, 17, 19,
65
+ 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 41,
66
+ 51, 60, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69,
67
+ 70, 81, 83, 91, 100, 109, 110, 111, 127, 135, 144, 153,
68
+ 162, 171, 180, 189, 198, 207, 216, 225, 236, 237, 238, 246,
69
+ 247, 255, 264, 281, 290, 291, 292
70
+ };
71
+ }
72
+
73
+ private static final short _java_lexer_key_offsets[] = init__java_lexer_key_offsets_0();
74
+
75
+
76
+ private static char[] init__java_lexer_trans_keys_0()
77
+ {
78
+ return new char [] {
79
+ 45, 68, 91, 100, 45, 45, 45, 45, 62, 79, 111, 67,
80
+ 99, 84, 116, 89, 121, 80, 112, 69, 101, 9, 32, 67,
81
+ 68, 65, 84, 65, 91, 93, 93, 62, 93, 45, 95, 48,
82
+ 57, 65, 90, 97, 122, 45, 58, 62, 95, 48, 57, 65,
83
+ 90, 97, 122, 45, 95, 120, 48, 57, 65, 90, 97, 122,
84
+ 34, 39, 93, 34, 39, 10, 34, 39, 62, 60, 33, 45,
85
+ 47, 63, 95, 48, 57, 65, 90, 97, 122, 9, 32, 45,
86
+ 95, 48, 57, 65, 90, 97, 122, 45, 95, 109, 48, 57,
87
+ 65, 90, 97, 122, 45, 95, 108, 48, 57, 65, 90, 97,
88
+ 122, 63, 62, 9, 32, 34, 39, 45, 62, 80, 83, 91,
89
+ 95, 48, 57, 65, 90, 97, 122, 45, 95, 48, 57, 65,
90
+ 90, 97, 122, 45, 85, 95, 48, 57, 65, 90, 97, 122,
91
+ 45, 66, 95, 48, 57, 65, 90, 97, 122, 45, 76, 95,
92
+ 48, 57, 65, 90, 97, 122, 45, 73, 95, 48, 57, 65,
93
+ 90, 97, 122, 45, 67, 95, 48, 57, 65, 90, 97, 122,
94
+ 45, 89, 95, 48, 57, 65, 90, 97, 122, 45, 83, 95,
95
+ 48, 57, 65, 90, 97, 122, 45, 84, 95, 48, 57, 65,
96
+ 90, 97, 122, 45, 69, 95, 48, 57, 65, 90, 97, 122,
97
+ 45, 77, 95, 48, 57, 65, 90, 97, 122, 34, 39, 45,
98
+ 63, 95, 48, 57, 65, 90, 97, 122, 34, 39, 45, 95,
99
+ 48, 57, 65, 90, 97, 122, 62, 45, 95, 48, 57, 65,
100
+ 90, 97, 122, 45, 58, 95, 48, 57, 65, 90, 97, 122,
101
+ 9, 10, 13, 32, 34, 39, 45, 47, 61, 62, 95, 48,
102
+ 57, 65, 90, 97, 122, 45, 58, 95, 48, 57, 65, 90,
103
+ 97, 122, 60, 60, 33, 45, 60, 63, 95, 47, 57, 65,
104
+ 90, 97, 122, 0
105
+ };
106
+ }
107
+
108
+ private static final char _java_lexer_trans_keys[] = init__java_lexer_trans_keys_0();
109
+
110
+
111
+ private static byte[] init__java_lexer_single_lengths_0()
112
+ {
113
+ return new byte [] {
114
+ 0, 4, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
115
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 4,
116
+ 3, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
117
+ 5, 2, 2, 3, 3, 1, 1, 10, 2, 3, 3, 3,
118
+ 3, 3, 3, 3, 3, 3, 3, 5, 1, 1, 2, 1,
119
+ 2, 3, 11, 3, 1, 1, 5
120
+ };
121
+ }
122
+
123
+ private static final byte _java_lexer_single_lengths[] = init__java_lexer_single_lengths_0();
124
+
125
+
126
+ private static byte[] init__java_lexer_range_lengths_0()
127
+ {
128
+ return new byte [] {
129
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3,
131
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132
+ 3, 0, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3,
133
+ 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 0,
134
+ 3, 3, 3, 3, 0, 0, 3
135
+ };
136
+ }
137
+
138
+ private static final byte _java_lexer_range_lengths[] = init__java_lexer_range_lengths_0();
139
+
140
+
141
+ private static short[] init__java_lexer_index_offsets_0()
142
+ {
143
+ return new short [] {
144
+ 0, 0, 5, 7, 9, 11, 14, 17, 20, 23, 26, 29,
145
+ 32, 35, 37, 39, 41, 43, 45, 47, 49, 51, 54, 60,
146
+ 68, 75, 77, 79, 80, 82, 84, 86, 88, 90, 92, 94,
147
+ 96, 105, 108, 114, 121, 128, 130, 132, 146, 152, 159, 166,
148
+ 173, 180, 187, 194, 201, 208, 215, 222, 231, 233, 235, 241,
149
+ 243, 249, 256, 271, 278, 280, 282
150
+ };
151
+ }
152
+
153
+ private static final short _java_lexer_index_offsets[] = init__java_lexer_index_offsets_0();
154
+
155
+
156
+ private static byte[] init__java_lexer_indicies_0()
157
+ {
158
+ return new byte [] {
159
+ 1, 2, 3, 2, 0, 4, 0, 5, 4, 6, 4, 6,
160
+ 7, 4, 8, 8, 0, 9, 9, 0, 10, 10, 0, 11,
161
+ 11, 0, 12, 12, 0, 13, 13, 0, 14, 14, 0, 15,
162
+ 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21,
163
+ 20, 22, 20, 23, 22, 20, 24, 24, 24, 24, 24, 0,
164
+ 24, 25, 26, 24, 24, 24, 24, 0, 27, 27, 28, 27,
165
+ 27, 27, 0, 30, 29, 30, 31, 32, 33, 32, 36, 35,
166
+ 36, 37, 38, 39, 41, 40, 41, 42, 43, 39, 45, 44,
167
+ 47, 48, 25, 49, 48, 48, 48, 48, 46, 14, 14, 50,
168
+ 27, 27, 27, 27, 27, 51, 27, 27, 53, 27, 27, 27,
169
+ 52, 27, 27, 54, 27, 27, 27, 52, 56, 55, 58, 57,
170
+ 59, 59, 29, 31, 60, 61, 62, 63, 64, 60, 60, 60,
171
+ 60, 39, 60, 60, 60, 60, 60, 65, 60, 67, 60, 60,
172
+ 60, 60, 66, 60, 68, 60, 60, 60, 60, 66, 60, 69,
173
+ 60, 60, 60, 60, 66, 60, 70, 60, 60, 60, 60, 66,
174
+ 60, 71, 60, 60, 60, 60, 66, 60, 72, 60, 60, 60,
175
+ 60, 66, 60, 73, 60, 60, 60, 60, 66, 60, 74, 60,
176
+ 60, 60, 60, 66, 60, 75, 60, 60, 60, 60, 66, 60,
177
+ 71, 60, 60, 60, 60, 66, 77, 78, 79, 80, 79, 79,
178
+ 79, 79, 76, 36, 35, 36, 37, 79, 79, 79, 79, 79,
179
+ 82, 83, 81, 84, 84, 84, 84, 84, 39, 84, 86, 84,
180
+ 84, 84, 84, 85, 87, 38, 88, 87, 40, 42, 89, 90,
181
+ 87, 91, 89, 89, 89, 89, 39, 89, 93, 89, 89, 89,
182
+ 89, 92, 95, 94, 95, 94, 98, 98, 95, 98, 98, 98,
183
+ 98, 98, 94, 0
184
+ };
185
+ }
186
+
187
+ private static final byte _java_lexer_indicies[] = init__java_lexer_indicies_0();
188
+
189
+
190
+ private static byte[] init__java_lexer_trans_targs_0()
191
+ {
192
+ return new byte [] {
193
+ 35, 2, 6, 13, 3, 4, 5, 35, 7, 8, 9, 10,
194
+ 11, 12, 37, 14, 15, 16, 17, 18, 19, 20, 21, 35,
195
+ 23, 22, 35, 38, 39, 25, 43, 26, 28, 43, 55, 29,
196
+ 55, 30, 62, 0, 32, 62, 33, 62, 35, 36, 35, 1,
197
+ 35, 24, 35, 35, 35, 40, 38, 41, 42, 41, 41, 43,
198
+ 44, 43, 45, 50, 27, 43, 43, 46, 47, 48, 49, 44,
199
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 55, 55, 55,
200
+ 61, 60, 60, 62, 31, 63, 34, 62, 62, 62, 65, 66,
201
+ 64, 64, 64
202
+ };
203
+ }
204
+
205
+ private static final byte _java_lexer_trans_targs[] = init__java_lexer_trans_targs_0();
206
+
207
+
208
+ private static byte[] init__java_lexer_trans_actions_0()
209
+ {
210
+ return new byte [] {
211
+ 79, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0,
212
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65,
213
+ 0, 0, 69, 95, 0, 0, 17, 0, 0, 15, 37, 0,
214
+ 29, 0, 45, 0, 0, 49, 0, 53, 71, 7, 77, 0,
215
+ 67, 0, 73, 81, 75, 0, 92, 11, 0, 13, 9, 19,
216
+ 89, 21, 0, 0, 0, 25, 23, 0, 0, 0, 0, 86,
217
+ 0, 0, 0, 0, 31, 7, 7, 0, 0, 35, 33, 27,
218
+ 0, 41, 39, 43, 0, 0, 0, 51, 55, 47, 98, 1,
219
+ 61, 59, 57
220
+ };
221
+ }
222
+
223
+ private static final byte _java_lexer_trans_actions[] = init__java_lexer_trans_actions_0();
224
+
225
+
226
+ private static byte[] init__java_lexer_to_state_actions_0()
227
+ {
228
+ return new byte [] {
229
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
232
+ 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
233
+ 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
234
+ 3, 0, 3, 0, 83, 0, 0
235
+ };
236
+ }
237
+
238
+ private static final byte _java_lexer_to_state_actions[] = init__java_lexer_to_state_actions_0();
239
+
240
+
241
+ private static byte[] init__java_lexer_from_state_actions_0()
242
+ {
243
+ return new byte [] {
244
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
245
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
246
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
247
+ 0, 0, 0, 0, 0, 5, 0, 5, 0, 0, 0, 0,
248
+ 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0,
249
+ 5, 0, 5, 0, 5, 0, 0
250
+ };
251
+ }
252
+
253
+ private static final byte _java_lexer_from_state_actions[] = init__java_lexer_from_state_actions_0();
254
+
255
+
256
+ private static short[] init__java_lexer_eof_trans_0()
257
+ {
258
+ return new short [] {
259
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
260
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
261
+ 1, 0, 0, 0, 0, 35, 35, 0, 0, 0, 0, 0,
262
+ 47, 51, 52, 53, 53, 0, 58, 0, 66, 67, 67, 67,
263
+ 67, 67, 67, 67, 67, 67, 67, 0, 82, 82, 83, 82,
264
+ 0, 86, 0, 93, 0, 97, 98
265
+ };
266
+ }
267
+
268
+ private static final short _java_lexer_eof_trans[] = init__java_lexer_eof_trans_0();
269
+
270
+
271
+ static final int java_lexer_start = 35;
272
+ static final int java_lexer_first_final = 35;
273
+ static final int java_lexer_error = 0;
274
+
275
+ static final int java_lexer_en_proc_ins_body = 41;
276
+ static final int java_lexer_en_doctype = 43;
277
+ static final int java_lexer_en_xml_decl = 55;
278
+ static final int java_lexer_en_element_name = 60;
279
+ static final int java_lexer_en_element_head = 62;
280
+ static final int java_lexer_en_text = 64;
281
+ static final int java_lexer_en_main = 35;
282
+
283
+
284
+ // line 39 "ext/java/org/liboga/xml/Lexer.rl"
285
+
286
+ /* Used by Ragel to keep track of the current state. */
287
+ int act;
288
+ int cs;
289
+
290
+ /**
291
+ * Sets up the current class in the Ruby runtime.
292
+ */
293
+ public static void load(Ruby runtime)
294
+ {
295
+ RubyModule xml = (RubyModule) runtime.getModule("Oga")
296
+ .getConstant("XML");
297
+
298
+ RubyClass lexer = xml.defineClassUnder(
299
+ "Lexer",
300
+ runtime.getObject(),
301
+ ALLOCATOR
302
+ );
303
+
304
+ lexer.defineAnnotatedMethods(Lexer.class);
305
+ }
306
+
307
+ private static final ObjectAllocator ALLOCATOR = new ObjectAllocator()
308
+ {
309
+ public IRubyObject allocate(Ruby runtime, RubyClass klass)
310
+ {
311
+ return new org.liboga.xml.Lexer(runtime, klass);
312
+ }
313
+ };
314
+
315
+ public Lexer(Ruby runtime, RubyClass klass)
316
+ {
317
+ super(runtime, klass);
318
+
319
+ this.runtime = runtime;
320
+ }
321
+
322
+ /**
323
+ * Runs the bulk of the Ragel loop and calls back in to Ruby.
324
+ *
325
+ * This method pulls its data in from the instance variable `@data`. The
326
+ * Ruby side of the Lexer class should set this variable to a String in its
327
+ * constructor method. Encodings are passed along to make sure that token
328
+ * values share the same encoding as the input.
329
+ *
330
+ * This method always returns nil.
331
+ */
332
+ @JRubyMethod
333
+ public IRubyObject advance_native(ThreadContext context, RubyString rb_str)
334
+ {
335
+ Encoding encoding = rb_str.getEncoding();
336
+
337
+ byte[] data = rb_str.getBytes();
338
+
339
+ int ts = 0;
340
+ int te = 0;
341
+ int p = 0;
342
+ int mark = 0;
343
+ int pe = data.length;
344
+ int eof = data.length;
345
+
346
+
347
+ // line 348 "ext/java/org/liboga/xml/Lexer.java"
348
+ {
349
+ int _klen;
350
+ int _trans = 0;
351
+ int _acts;
352
+ int _nacts;
353
+ int _keys;
354
+ int _goto_targ = 0;
355
+
356
+ _goto: while (true) {
357
+ switch ( _goto_targ ) {
358
+ case 0:
359
+ if ( p == pe ) {
360
+ _goto_targ = 4;
361
+ continue _goto;
362
+ }
363
+ if ( ( this.cs) == 0 ) {
364
+ _goto_targ = 5;
365
+ continue _goto;
366
+ }
367
+ case 1:
368
+ _acts = _java_lexer_from_state_actions[( this.cs)];
369
+ _nacts = (int) _java_lexer_actions[_acts++];
370
+ while ( _nacts-- > 0 ) {
371
+ switch ( _java_lexer_actions[_acts++] ) {
372
+ case 3:
373
+ // line 1 "NONE"
374
+ {ts = p;}
375
+ break;
376
+ // line 377 "ext/java/org/liboga/xml/Lexer.java"
377
+ }
378
+ }
379
+
380
+ _match: do {
381
+ _keys = _java_lexer_key_offsets[( this.cs)];
382
+ _trans = _java_lexer_index_offsets[( this.cs)];
383
+ _klen = _java_lexer_single_lengths[( this.cs)];
384
+ if ( _klen > 0 ) {
385
+ int _lower = _keys;
386
+ int _mid;
387
+ int _upper = _keys + _klen - 1;
388
+ while (true) {
389
+ if ( _upper < _lower )
390
+ break;
391
+
392
+ _mid = _lower + ((_upper-_lower) >> 1);
393
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
394
+ _upper = _mid - 1;
395
+ else if ( data[p] > _java_lexer_trans_keys[_mid] )
396
+ _lower = _mid + 1;
397
+ else {
398
+ _trans += (_mid - _keys);
399
+ break _match;
400
+ }
401
+ }
402
+ _keys += _klen;
403
+ _trans += _klen;
404
+ }
405
+
406
+ _klen = _java_lexer_range_lengths[( this.cs)];
407
+ if ( _klen > 0 ) {
408
+ int _lower = _keys;
409
+ int _mid;
410
+ int _upper = _keys + (_klen<<1) - 2;
411
+ while (true) {
412
+ if ( _upper < _lower )
413
+ break;
414
+
415
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
416
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
417
+ _upper = _mid - 2;
418
+ else if ( data[p] > _java_lexer_trans_keys[_mid+1] )
419
+ _lower = _mid + 2;
420
+ else {
421
+ _trans += ((_mid - _keys)>>1);
422
+ break _match;
423
+ }
424
+ }
425
+ _trans += _klen;
426
+ }
427
+ } while (false);
428
+
429
+ _trans = _java_lexer_indicies[_trans];
430
+ case 3:
431
+ ( this.cs) = _java_lexer_trans_targs[_trans];
432
+
433
+ if ( _java_lexer_trans_actions[_trans] != 0 ) {
434
+ _acts = _java_lexer_trans_actions[_trans];
435
+ _nacts = (int) _java_lexer_actions[_acts++];
436
+ while ( _nacts-- > 0 )
437
+ {
438
+ switch ( _java_lexer_actions[_acts++] )
439
+ {
440
+ case 0:
441
+ // line 296 "ext/ragel/base_lexer.rl"
442
+ { mark = p; }
443
+ break;
444
+ case 4:
445
+ // line 1 "NONE"
446
+ {te = p+1;}
447
+ break;
448
+ case 5:
449
+ // line 100 "ext/ragel/base_lexer.rl"
450
+ {te = p+1;{
451
+ callback("on_text", data, encoding, mark, ts);
452
+ callback_simple("on_proc_ins_end");
453
+
454
+ ( this.cs) = 35;
455
+ }}
456
+ break;
457
+ case 6:
458
+ // line 107 "ext/ragel/base_lexer.rl"
459
+ {te = p+1;}
460
+ break;
461
+ case 7:
462
+ // line 107 "ext/ragel/base_lexer.rl"
463
+ {te = p;p--;}
464
+ break;
465
+ case 8:
466
+ // line 148 "ext/ragel/base_lexer.rl"
467
+ {( this.act) = 3;}
468
+ break;
469
+ case 9:
470
+ // line 165 "ext/ragel/base_lexer.rl"
471
+ {( this.act) = 7;}
472
+ break;
473
+ case 10:
474
+ // line 154 "ext/ragel/base_lexer.rl"
475
+ {te = p+1;{
476
+ callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
477
+ }}
478
+ break;
479
+ case 11:
480
+ // line 124 "ext/ragel/base_lexer.rl"
481
+ {te = p+1;{
482
+ callback("on_string", data, encoding, ts + 1, te - 1);
483
+ }}
484
+ break;
485
+ case 12:
486
+ // line 163 "ext/ragel/base_lexer.rl"
487
+ {te = p+1;}
488
+ break;
489
+ case 13:
490
+ // line 169 "ext/ragel/base_lexer.rl"
491
+ {te = p+1;{
492
+ callback_simple("on_doctype_end");
493
+ ( this.cs) = 35;
494
+ }}
495
+ break;
496
+ case 14:
497
+ // line 165 "ext/ragel/base_lexer.rl"
498
+ {te = p;p--;{
499
+ callback("on_doctype_name", data, encoding, ts, te);
500
+ }}
501
+ break;
502
+ case 15:
503
+ // line 1 "NONE"
504
+ { switch( ( this.act) ) {
505
+ case 3:
506
+ {{p = ((te))-1;}
507
+ callback("on_doctype_type", data, encoding, ts, te);
508
+ }
509
+ break;
510
+ case 7:
511
+ {{p = ((te))-1;}
512
+ callback("on_doctype_name", data, encoding, ts, te);
513
+ }
514
+ break;
515
+ }
516
+ }
517
+ break;
518
+ case 16:
519
+ // line 189 "ext/ragel/base_lexer.rl"
520
+ {te = p+1;{
521
+ callback_simple("on_xml_decl_end");
522
+ ( this.cs) = 35;
523
+ }}
524
+ break;
525
+ case 17:
526
+ // line 124 "ext/ragel/base_lexer.rl"
527
+ {te = p+1;{
528
+ callback("on_string", data, encoding, ts + 1, te - 1);
529
+ }}
530
+ break;
531
+ case 18:
532
+ // line 201 "ext/ragel/base_lexer.rl"
533
+ {te = p+1;}
534
+ break;
535
+ case 19:
536
+ // line 195 "ext/ragel/base_lexer.rl"
537
+ {te = p;p--;{
538
+ callback("on_attribute", data, encoding, ts, te);
539
+ }}
540
+ break;
541
+ case 20:
542
+ // line 201 "ext/ragel/base_lexer.rl"
543
+ {te = p;p--;}
544
+ break;
545
+ case 21:
546
+ // line 201 "ext/ragel/base_lexer.rl"
547
+ {{p = ((te))-1;}}
548
+ break;
549
+ case 22:
550
+ // line 228 "ext/ragel/base_lexer.rl"
551
+ {te = p+1;{
552
+ callback("on_element_ns", data, encoding, ts, te - 1);
553
+ }}
554
+ break;
555
+ case 23:
556
+ // line 232 "ext/ragel/base_lexer.rl"
557
+ {te = p;p--;{
558
+ callback("on_element_name", data, encoding, ts, te);
559
+ ( this.cs) = 62;
560
+ }}
561
+ break;
562
+ case 24:
563
+ // line 241 "ext/ragel/base_lexer.rl"
564
+ {te = p+1;}
565
+ break;
566
+ case 25:
567
+ // line 243 "ext/ragel/base_lexer.rl"
568
+ {te = p+1;{
569
+ callback_simple("advance_line");
570
+ }}
571
+ break;
572
+ case 26:
573
+ // line 248 "ext/ragel/base_lexer.rl"
574
+ {te = p+1;{
575
+ callback("on_attribute_ns", data, encoding, ts, te - 1);
576
+ }}
577
+ break;
578
+ case 27:
579
+ // line 124 "ext/ragel/base_lexer.rl"
580
+ {te = p+1;{
581
+ callback("on_string", data, encoding, ts + 1, te - 1);
582
+ }}
583
+ break;
584
+ case 28:
585
+ // line 260 "ext/ragel/base_lexer.rl"
586
+ {te = p+1;{
587
+ callback_simple("on_element_open_end");
588
+ ( this.cs) = 35;
589
+ }}
590
+ break;
591
+ case 29:
592
+ // line 266 "ext/ragel/base_lexer.rl"
593
+ {te = p+1;{
594
+ callback_simple("on_element_end");
595
+ ( this.cs) = 35;
596
+ }}
597
+ break;
598
+ case 30:
599
+ // line 252 "ext/ragel/base_lexer.rl"
600
+ {te = p;p--;{
601
+ callback("on_attribute", data, encoding, ts, te);
602
+ }}
603
+ break;
604
+ case 31:
605
+ // line 306 "ext/ragel/base_lexer.rl"
606
+ {( this.act) = 23;}
607
+ break;
608
+ case 32:
609
+ // line 296 "ext/ragel/base_lexer.rl"
610
+ {te = p+1;{
611
+ callback("on_text", data, encoding, ts, mark);
612
+
613
+ p = mark - 1;
614
+ mark = 0;
615
+
616
+ ( this.cs) = 35;
617
+ }}
618
+ break;
619
+ case 33:
620
+ // line 306 "ext/ragel/base_lexer.rl"
621
+ {te = p;p--;{
622
+ callback("on_text", data, encoding, ts, te);
623
+ ( this.cs) = 35;
624
+ }}
625
+ break;
626
+ case 34:
627
+ // line 1 "NONE"
628
+ { switch( ( this.act) ) {
629
+ case 0:
630
+ {{( this.cs) = 0; _goto_targ = 2; if (true) continue _goto;}}
631
+ break;
632
+ case 23:
633
+ {{p = ((te))-1;}
634
+ callback("on_text", data, encoding, ts, te);
635
+ ( this.cs) = 35;
636
+ }
637
+ break;
638
+ }
639
+ }
640
+ break;
641
+ case 35:
642
+ // line 182 "ext/ragel/base_lexer.rl"
643
+ {( this.act) = 25;}
644
+ break;
645
+ case 36:
646
+ // line 90 "ext/ragel/base_lexer.rl"
647
+ {( this.act) = 28;}
648
+ break;
649
+ case 37:
650
+ // line 56 "ext/ragel/base_lexer.rl"
651
+ {te = p+1;{
652
+ callback("on_comment", data, encoding, ts + 4, te - 3);
653
+ }}
654
+ break;
655
+ case 38:
656
+ // line 72 "ext/ragel/base_lexer.rl"
657
+ {te = p+1;{
658
+ callback("on_cdata", data, encoding, ts + 9, te - 3);
659
+ }}
660
+ break;
661
+ case 39:
662
+ // line 216 "ext/ragel/base_lexer.rl"
663
+ {te = p+1;{
664
+ callback_simple("on_element_start");
665
+ p--;
666
+ ( this.cs) = 60;
667
+ }}
668
+ break;
669
+ case 40:
670
+ // line 222 "ext/ragel/base_lexer.rl"
671
+ {te = p+1;{
672
+ callback_simple("on_element_end");
673
+ }}
674
+ break;
675
+ case 41:
676
+ // line 280 "ext/ragel/base_lexer.rl"
677
+ {te = p+1;{
678
+ p--;
679
+ ( this.cs) = 64;
680
+ }}
681
+ break;
682
+ case 42:
683
+ // line 140 "ext/ragel/base_lexer.rl"
684
+ {te = p;p--;{
685
+ callback_simple("on_doctype_start");
686
+ ( this.cs) = 43;
687
+ }}
688
+ break;
689
+ case 43:
690
+ // line 90 "ext/ragel/base_lexer.rl"
691
+ {te = p;p--;{
692
+ callback_simple("on_proc_ins_start");
693
+ callback("on_proc_ins_name", data, encoding, ts + 2, te);
694
+
695
+ mark = te;
696
+
697
+ ( this.cs) = 41;
698
+ }}
699
+ break;
700
+ case 44:
701
+ // line 280 "ext/ragel/base_lexer.rl"
702
+ {te = p;p--;{
703
+ p--;
704
+ ( this.cs) = 64;
705
+ }}
706
+ break;
707
+ case 45:
708
+ // line 280 "ext/ragel/base_lexer.rl"
709
+ {{p = ((te))-1;}{
710
+ p--;
711
+ ( this.cs) = 64;
712
+ }}
713
+ break;
714
+ case 46:
715
+ // line 1 "NONE"
716
+ { switch( ( this.act) ) {
717
+ case 25:
718
+ {{p = ((te))-1;}
719
+ callback_simple("on_xml_decl_start");
720
+ ( this.cs) = 55;
721
+ }
722
+ break;
723
+ case 28:
724
+ {{p = ((te))-1;}
725
+ callback_simple("on_proc_ins_start");
726
+ callback("on_proc_ins_name", data, encoding, ts + 2, te);
727
+
728
+ mark = te;
729
+
730
+ ( this.cs) = 41;
731
+ }
732
+ break;
733
+ }
734
+ }
735
+ break;
736
+ // line 737 "ext/java/org/liboga/xml/Lexer.java"
737
+ }
738
+ }
739
+ }
740
+
741
+ case 2:
742
+ _acts = _java_lexer_to_state_actions[( this.cs)];
743
+ _nacts = (int) _java_lexer_actions[_acts++];
744
+ while ( _nacts-- > 0 ) {
745
+ switch ( _java_lexer_actions[_acts++] ) {
746
+ case 1:
747
+ // line 1 "NONE"
748
+ {ts = -1;}
749
+ break;
750
+ case 2:
751
+ // line 1 "NONE"
752
+ {( this.act) = 0;}
753
+ break;
754
+ // line 755 "ext/java/org/liboga/xml/Lexer.java"
755
+ }
756
+ }
757
+
758
+ if ( ( this.cs) == 0 ) {
759
+ _goto_targ = 5;
760
+ continue _goto;
761
+ }
762
+ if ( ++p != pe ) {
763
+ _goto_targ = 1;
764
+ continue _goto;
765
+ }
766
+ case 4:
767
+ if ( p == eof )
768
+ {
769
+ if ( _java_lexer_eof_trans[( this.cs)] > 0 ) {
770
+ _trans = _java_lexer_eof_trans[( this.cs)] - 1;
771
+ _goto_targ = 3;
772
+ continue _goto;
773
+ }
774
+ }
775
+
776
+ case 5:
777
+ }
778
+ break; }
779
+ }
780
+
781
+ // line 101 "ext/java/org/liboga/xml/Lexer.rl"
782
+
783
+ return context.nil;
784
+ }
785
+
786
+ /**
787
+ * Resets the internal state of the lexer.
788
+ */
789
+ @JRubyMethod
790
+ public IRubyObject reset_native(ThreadContext context)
791
+ {
792
+ this.act = 0;
793
+ this.cs = java_lexer_start;
794
+
795
+ return context.nil;
796
+ }
797
+
798
+ /**
799
+ * Calls back in to Ruby land passing the current token value along.
800
+ *
801
+ * This method calls back in to Ruby land based on the method name
802
+ * specified in `name`. The Ruby callback should take one argument. This
803
+ * argument will be a String containing the value of the current token.
804
+ */
805
+ public void callback(String name, byte[] data, Encoding enc, int ts, int te)
806
+ {
807
+ ByteList bytelist = new ByteList(data, ts, te - ts, enc, true);
808
+
809
+ RubyString value = this.runtime.newString(bytelist);
810
+
811
+ ThreadContext context = this.runtime.getCurrentContext();
812
+
813
+ this.callMethod(context, name, value);
814
+ }
815
+
816
+ /**
817
+ * Calls back in to Ruby land without passing any arguments.
818
+ */
819
+ public void callback_simple(String name)
820
+ {
821
+ ThreadContext context = this.runtime.getCurrentContext();
822
+
823
+ this.callMethod(context, name);
824
+ }
825
+ }
826
+
827
+
828
+ // line 151 "ext/java/org/liboga/xml/Lexer.rl"
829
+