oga 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +171 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +7 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/oga.rb +43 -0
  20. data/lib/oga/html/parser.rb +25 -0
  21. data/lib/oga/oga.rb +27 -0
  22. data/lib/oga/version.rb +3 -0
  23. data/lib/oga/xml/attribute.rb +111 -0
  24. data/lib/oga/xml/cdata.rb +24 -0
  25. data/lib/oga/xml/character_node.rb +39 -0
  26. data/lib/oga/xml/comment.rb +24 -0
  27. data/lib/oga/xml/doctype.rb +91 -0
  28. data/lib/oga/xml/document.rb +99 -0
  29. data/lib/oga/xml/element.rb +340 -0
  30. data/lib/oga/xml/lexer.rb +399 -0
  31. data/lib/oga/xml/namespace.rb +42 -0
  32. data/lib/oga/xml/node.rb +175 -0
  33. data/lib/oga/xml/node_set.rb +313 -0
  34. data/lib/oga/xml/parser.rb +556 -0
  35. data/lib/oga/xml/processing_instruction.rb +39 -0
  36. data/lib/oga/xml/pull_parser.rb +166 -0
  37. data/lib/oga/xml/querying.rb +32 -0
  38. data/lib/oga/xml/text.rb +16 -0
  39. data/lib/oga/xml/traversal.rb +48 -0
  40. data/lib/oga/xml/xml_declaration.rb +76 -0
  41. data/lib/oga/xpath/evaluator.rb +1748 -0
  42. data/lib/oga/xpath/lexer.rb +2043 -0
  43. data/lib/oga/xpath/node.rb +10 -0
  44. data/lib/oga/xpath/parser.rb +535 -0
  45. data/oga.gemspec +45 -0
  46. metadata +221 -0
@@ -0,0 +1,8 @@
1
+ #ifndef LIBOGA_XML_LEXER_H
2
+ #define LIBOGA_XML_LEXER_H
3
+
4
+ #include "liboga.h"
5
+
6
+ extern void Init_liboga_xml_lexer();
7
+
8
+ #endif
@@ -0,0 +1,121 @@
1
+ #include "lexer.h"
2
+
3
+ /*
4
+ The following two macros allow the Ragel grammar to use generic function calls
5
+ without relying on the setup of the C or Java lexer. Using these macros we can
6
+ also pass along `self` to the callback functions without having to hard-code
7
+ this in to the Ragel grammar.
8
+
9
+ In the C lexer we don't need the `data` variable (since this is pulled in based
10
+ on `ts` and `te`) so the macro ignores this argument.
11
+ */
12
+
13
+ #define callback(name, data, encoding, start, stop) \
14
+ liboga_xml_lexer_callback(self, name, encoding, start, stop);
15
+
16
+ #define callback_simple(name) \
17
+ liboga_xml_lexer_callback_simple(self, name);
18
+
19
+ #define oga_ivar_get(owner, name) \
20
+ rb_ivar_get(owner, rb_intern(name))
21
+
22
+ #define oga_ivar_set(owner, name, value) \
23
+ rb_ivar_set(owner, rb_intern(name), value)
24
+
25
+ %%machine c_lexer;
26
+
27
+ /**
28
+ * Calls a method defined in the Ruby side of the lexer. The String value is
29
+ * created based on the values of `ts` and `te` and uses the encoding specified
30
+ * in `encoding`.
31
+ *
32
+ * @example
33
+ * rb_encoding *encoding = rb_enc_get(...);
34
+ * liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
35
+ */
36
+ void liboga_xml_lexer_callback(
37
+ VALUE self,
38
+ const char *name,
39
+ rb_encoding *encoding,
40
+ const char *ts,
41
+ const char *te
42
+ )
43
+ {
44
+ VALUE value = rb_enc_str_new(ts, te - ts, encoding);
45
+ VALUE method = rb_intern(name);
46
+
47
+ rb_funcall(self, method, 1, value);
48
+ }
49
+
50
+ /**
51
+ * Calls a method defined in the Ruby side of the lexer without passing it any
52
+ * arguments.
53
+ *
54
+ * @example
55
+ * liboga_xml_lexer_callback_simple(self, "on_cdata_start");
56
+ */
57
+ void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
58
+ {
59
+ VALUE method = rb_intern(name);
60
+
61
+ rb_funcall(self, method, 0);
62
+ }
63
+
64
+ %% write data;
65
+
66
+ /**
67
+ * Lexes the String specifies as the method argument. Token values have the
68
+ * same encoding as the input value.
69
+ *
70
+ * This method keeps track of an internal state using the instance variables
71
+ * `@act` and `@cs`.
72
+ */
73
+ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
74
+ {
75
+ /* Make sure that all data passed back to Ruby has the proper encoding. */
76
+ rb_encoding *encoding = rb_enc_get(data_block);
77
+
78
+ char *data_str_val = StringValuePtr(data_block);
79
+
80
+ const char *p = data_str_val;
81
+ const char *pe = data_str_val + strlen(data_str_val);
82
+ const char *eof = pe;
83
+ const char *ts = 0;
84
+ const char *te = 0;
85
+ const char *mark = 0;
86
+
87
+ int act = NUM2INT(oga_ivar_get(self, "@act"));
88
+ int cs = NUM2INT(oga_ivar_get(self, "@cs"));
89
+
90
+ %% write exec;
91
+
92
+ oga_ivar_set(self, "@act", INT2NUM(act));
93
+ oga_ivar_set(self, "@cs", INT2NUM(cs));
94
+
95
+ return Qnil;
96
+ }
97
+
98
+ /**
99
+ * Resets the internal state of the lexer.
100
+ */
101
+ VALUE oga_xml_lexer_reset(VALUE self)
102
+ {
103
+ oga_ivar_set(self, "@act", INT2NUM(0));
104
+ oga_ivar_set(self, "@cs", INT2NUM(c_lexer_start));
105
+
106
+ return Qnil;
107
+ }
108
+
109
+ %%{
110
+ include base_lexer "base_lexer.rl";
111
+ }%%
112
+
113
+ void Init_liboga_xml_lexer()
114
+ {
115
+ VALUE mOga = rb_const_get(rb_cObject, rb_intern("Oga"));
116
+ VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
117
+ VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
118
+
119
+ rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
120
+ rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
121
+ }
@@ -0,0 +1,6 @@
1
+ #include "liboga.h"
2
+
3
+ void Init_liboga()
4
+ {
5
+ Init_liboga_xml_lexer();
6
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef LIBOGA_H
2
+ #define LIBOGA_H
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+
7
+ #include "lexer.h"
8
+
9
+ void Init_liboga();
10
+
11
+ #endif
@@ -0,0 +1,14 @@
1
+ package org.liboga;
2
+
3
+ import org.jruby.Ruby;
4
+
5
+ public class Liboga
6
+ {
7
+ /**
8
+ * Bootstraps the JRuby extension.
9
+ */
10
+ public static void load(final Ruby runtime)
11
+ {
12
+ org.liboga.xml.Lexer.load(runtime);
13
+ }
14
+ }
@@ -0,0 +1,829 @@
1
+
2
+ // line 1 "ext/java/org/liboga/xml/Lexer.rl"
3
+ package org.liboga.xml;
4
+
5
+
6
+ // line 4 "ext/java/org/liboga/xml/Lexer.rl"
7
+
8
+ import java.io.IOException;
9
+
10
+ import org.jcodings.Encoding;
11
+
12
+ import org.jruby.Ruby;
13
+ import org.jruby.RubyModule;
14
+ import org.jruby.RubyClass;
15
+ import org.jruby.RubyObject;
16
+ import org.jruby.RubyString;
17
+ import org.jruby.RubyFixnum;
18
+ import org.jruby.util.ByteList;
19
+ import org.jruby.anno.JRubyClass;
20
+ import org.jruby.anno.JRubyMethod;
21
+ import org.jruby.runtime.ThreadContext;
22
+ import org.jruby.runtime.ObjectAllocator;
23
+ import org.jruby.runtime.builtin.IRubyObject;
24
+
25
+ /**
26
+ * Lexer support class for JRuby.
27
+ *
28
+ * The Lexer class contains the raw Ragel loop and calls back in to Ruby land
29
+ * whenever a Ragel action is needed similar to the C extension setup.
30
+ *
31
+ * This class requires Ruby land to first define the `Oga::XML` namespace.
32
+ */
33
+ @JRubyClass(name="Oga::XML::Lexer", parent="Object")
34
+ public class Lexer extends RubyObject
35
+ {
36
+ /**
37
+ * The current Ruby runtime.
38
+ */
39
+ private Ruby runtime;
40
+
41
+
42
+ // line 43 "ext/java/org/liboga/xml/Lexer.java"
43
+ private static byte[] init__java_lexer_actions_0()
44
+ {
45
+ return new byte [] {
46
+ 0, 1, 0, 1, 1, 1, 3, 1, 4, 1, 5, 1,
47
+ 6, 1, 7, 1, 10, 1, 11, 1, 12, 1, 13, 1,
48
+ 14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1,
49
+ 20, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1,
50
+ 26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 32, 1,
51
+ 33, 1, 34, 1, 37, 1, 38, 1, 39, 1, 40, 1,
52
+ 41, 1, 42, 1, 43, 1, 44, 1, 45, 1, 46, 2,
53
+ 1, 2, 2, 4, 8, 2, 4, 9, 2, 4, 35, 2,
54
+ 4, 36, 3, 4, 0, 31
55
+ };
56
+ }
57
+
58
+ private static final byte _java_lexer_actions[] = init__java_lexer_actions_0();
59
+
60
+
61
+ private static short[] init__java_lexer_key_offsets_0()
62
+ {
63
+ return new short [] {
64
+ 0, 0, 4, 5, 6, 7, 9, 11, 13, 15, 17, 19,
65
+ 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 41,
66
+ 51, 60, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69,
67
+ 70, 81, 83, 91, 100, 109, 110, 111, 127, 135, 144, 153,
68
+ 162, 171, 180, 189, 198, 207, 216, 225, 236, 237, 238, 246,
69
+ 247, 255, 264, 281, 290, 291, 292
70
+ };
71
+ }
72
+
73
+ private static final short _java_lexer_key_offsets[] = init__java_lexer_key_offsets_0();
74
+
75
+
76
+ private static char[] init__java_lexer_trans_keys_0()
77
+ {
78
+ return new char [] {
79
+ 45, 68, 91, 100, 45, 45, 45, 45, 62, 79, 111, 67,
80
+ 99, 84, 116, 89, 121, 80, 112, 69, 101, 9, 32, 67,
81
+ 68, 65, 84, 65, 91, 93, 93, 62, 93, 45, 95, 48,
82
+ 57, 65, 90, 97, 122, 45, 58, 62, 95, 48, 57, 65,
83
+ 90, 97, 122, 45, 95, 120, 48, 57, 65, 90, 97, 122,
84
+ 34, 39, 93, 34, 39, 10, 34, 39, 62, 60, 33, 45,
85
+ 47, 63, 95, 48, 57, 65, 90, 97, 122, 9, 32, 45,
86
+ 95, 48, 57, 65, 90, 97, 122, 45, 95, 109, 48, 57,
87
+ 65, 90, 97, 122, 45, 95, 108, 48, 57, 65, 90, 97,
88
+ 122, 63, 62, 9, 32, 34, 39, 45, 62, 80, 83, 91,
89
+ 95, 48, 57, 65, 90, 97, 122, 45, 95, 48, 57, 65,
90
+ 90, 97, 122, 45, 85, 95, 48, 57, 65, 90, 97, 122,
91
+ 45, 66, 95, 48, 57, 65, 90, 97, 122, 45, 76, 95,
92
+ 48, 57, 65, 90, 97, 122, 45, 73, 95, 48, 57, 65,
93
+ 90, 97, 122, 45, 67, 95, 48, 57, 65, 90, 97, 122,
94
+ 45, 89, 95, 48, 57, 65, 90, 97, 122, 45, 83, 95,
95
+ 48, 57, 65, 90, 97, 122, 45, 84, 95, 48, 57, 65,
96
+ 90, 97, 122, 45, 69, 95, 48, 57, 65, 90, 97, 122,
97
+ 45, 77, 95, 48, 57, 65, 90, 97, 122, 34, 39, 45,
98
+ 63, 95, 48, 57, 65, 90, 97, 122, 34, 39, 45, 95,
99
+ 48, 57, 65, 90, 97, 122, 62, 45, 95, 48, 57, 65,
100
+ 90, 97, 122, 45, 58, 95, 48, 57, 65, 90, 97, 122,
101
+ 9, 10, 13, 32, 34, 39, 45, 47, 61, 62, 95, 48,
102
+ 57, 65, 90, 97, 122, 45, 58, 95, 48, 57, 65, 90,
103
+ 97, 122, 60, 60, 33, 45, 60, 63, 95, 47, 57, 65,
104
+ 90, 97, 122, 0
105
+ };
106
+ }
107
+
108
+ private static final char _java_lexer_trans_keys[] = init__java_lexer_trans_keys_0();
109
+
110
+
111
+ private static byte[] init__java_lexer_single_lengths_0()
112
+ {
113
+ return new byte [] {
114
+ 0, 4, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
115
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 4,
116
+ 3, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
117
+ 5, 2, 2, 3, 3, 1, 1, 10, 2, 3, 3, 3,
118
+ 3, 3, 3, 3, 3, 3, 3, 5, 1, 1, 2, 1,
119
+ 2, 3, 11, 3, 1, 1, 5
120
+ };
121
+ }
122
+
123
+ private static final byte _java_lexer_single_lengths[] = init__java_lexer_single_lengths_0();
124
+
125
+
126
+ private static byte[] init__java_lexer_range_lengths_0()
127
+ {
128
+ return new byte [] {
129
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3,
131
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132
+ 3, 0, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3,
133
+ 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 0,
134
+ 3, 3, 3, 3, 0, 0, 3
135
+ };
136
+ }
137
+
138
+ private static final byte _java_lexer_range_lengths[] = init__java_lexer_range_lengths_0();
139
+
140
+
141
+ private static short[] init__java_lexer_index_offsets_0()
142
+ {
143
+ return new short [] {
144
+ 0, 0, 5, 7, 9, 11, 14, 17, 20, 23, 26, 29,
145
+ 32, 35, 37, 39, 41, 43, 45, 47, 49, 51, 54, 60,
146
+ 68, 75, 77, 79, 80, 82, 84, 86, 88, 90, 92, 94,
147
+ 96, 105, 108, 114, 121, 128, 130, 132, 146, 152, 159, 166,
148
+ 173, 180, 187, 194, 201, 208, 215, 222, 231, 233, 235, 241,
149
+ 243, 249, 256, 271, 278, 280, 282
150
+ };
151
+ }
152
+
153
+ private static final short _java_lexer_index_offsets[] = init__java_lexer_index_offsets_0();
154
+
155
+
156
+ private static byte[] init__java_lexer_indicies_0()
157
+ {
158
+ return new byte [] {
159
+ 1, 2, 3, 2, 0, 4, 0, 5, 4, 6, 4, 6,
160
+ 7, 4, 8, 8, 0, 9, 9, 0, 10, 10, 0, 11,
161
+ 11, 0, 12, 12, 0, 13, 13, 0, 14, 14, 0, 15,
162
+ 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21,
163
+ 20, 22, 20, 23, 22, 20, 24, 24, 24, 24, 24, 0,
164
+ 24, 25, 26, 24, 24, 24, 24, 0, 27, 27, 28, 27,
165
+ 27, 27, 0, 30, 29, 30, 31, 32, 33, 32, 36, 35,
166
+ 36, 37, 38, 39, 41, 40, 41, 42, 43, 39, 45, 44,
167
+ 47, 48, 25, 49, 48, 48, 48, 48, 46, 14, 14, 50,
168
+ 27, 27, 27, 27, 27, 51, 27, 27, 53, 27, 27, 27,
169
+ 52, 27, 27, 54, 27, 27, 27, 52, 56, 55, 58, 57,
170
+ 59, 59, 29, 31, 60, 61, 62, 63, 64, 60, 60, 60,
171
+ 60, 39, 60, 60, 60, 60, 60, 65, 60, 67, 60, 60,
172
+ 60, 60, 66, 60, 68, 60, 60, 60, 60, 66, 60, 69,
173
+ 60, 60, 60, 60, 66, 60, 70, 60, 60, 60, 60, 66,
174
+ 60, 71, 60, 60, 60, 60, 66, 60, 72, 60, 60, 60,
175
+ 60, 66, 60, 73, 60, 60, 60, 60, 66, 60, 74, 60,
176
+ 60, 60, 60, 66, 60, 75, 60, 60, 60, 60, 66, 60,
177
+ 71, 60, 60, 60, 60, 66, 77, 78, 79, 80, 79, 79,
178
+ 79, 79, 76, 36, 35, 36, 37, 79, 79, 79, 79, 79,
179
+ 82, 83, 81, 84, 84, 84, 84, 84, 39, 84, 86, 84,
180
+ 84, 84, 84, 85, 87, 38, 88, 87, 40, 42, 89, 90,
181
+ 87, 91, 89, 89, 89, 89, 39, 89, 93, 89, 89, 89,
182
+ 89, 92, 95, 94, 95, 94, 98, 98, 95, 98, 98, 98,
183
+ 98, 98, 94, 0
184
+ };
185
+ }
186
+
187
+ private static final byte _java_lexer_indicies[] = init__java_lexer_indicies_0();
188
+
189
+
190
+ private static byte[] init__java_lexer_trans_targs_0()
191
+ {
192
+ return new byte [] {
193
+ 35, 2, 6, 13, 3, 4, 5, 35, 7, 8, 9, 10,
194
+ 11, 12, 37, 14, 15, 16, 17, 18, 19, 20, 21, 35,
195
+ 23, 22, 35, 38, 39, 25, 43, 26, 28, 43, 55, 29,
196
+ 55, 30, 62, 0, 32, 62, 33, 62, 35, 36, 35, 1,
197
+ 35, 24, 35, 35, 35, 40, 38, 41, 42, 41, 41, 43,
198
+ 44, 43, 45, 50, 27, 43, 43, 46, 47, 48, 49, 44,
199
+ 51, 52, 53, 54, 55, 56, 57, 58, 59, 55, 55, 55,
200
+ 61, 60, 60, 62, 31, 63, 34, 62, 62, 62, 65, 66,
201
+ 64, 64, 64
202
+ };
203
+ }
204
+
205
+ private static final byte _java_lexer_trans_targs[] = init__java_lexer_trans_targs_0();
206
+
207
+
208
+ private static byte[] init__java_lexer_trans_actions_0()
209
+ {
210
+ return new byte [] {
211
+ 79, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0,
212
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65,
213
+ 0, 0, 69, 95, 0, 0, 17, 0, 0, 15, 37, 0,
214
+ 29, 0, 45, 0, 0, 49, 0, 53, 71, 7, 77, 0,
215
+ 67, 0, 73, 81, 75, 0, 92, 11, 0, 13, 9, 19,
216
+ 89, 21, 0, 0, 0, 25, 23, 0, 0, 0, 0, 86,
217
+ 0, 0, 0, 0, 31, 7, 7, 0, 0, 35, 33, 27,
218
+ 0, 41, 39, 43, 0, 0, 0, 51, 55, 47, 98, 1,
219
+ 61, 59, 57
220
+ };
221
+ }
222
+
223
+ private static final byte _java_lexer_trans_actions[] = init__java_lexer_trans_actions_0();
224
+
225
+
226
+ private static byte[] init__java_lexer_to_state_actions_0()
227
+ {
228
+ return new byte [] {
229
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
232
+ 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
233
+ 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
234
+ 3, 0, 3, 0, 83, 0, 0
235
+ };
236
+ }
237
+
238
+ private static final byte _java_lexer_to_state_actions[] = init__java_lexer_to_state_actions_0();
239
+
240
+
241
+ private static byte[] init__java_lexer_from_state_actions_0()
242
+ {
243
+ return new byte [] {
244
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
245
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
246
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
247
+ 0, 0, 0, 0, 0, 5, 0, 5, 0, 0, 0, 0,
248
+ 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0,
249
+ 5, 0, 5, 0, 5, 0, 0
250
+ };
251
+ }
252
+
253
+ private static final byte _java_lexer_from_state_actions[] = init__java_lexer_from_state_actions_0();
254
+
255
+
256
+ private static short[] init__java_lexer_eof_trans_0()
257
+ {
258
+ return new short [] {
259
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
260
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
261
+ 1, 0, 0, 0, 0, 35, 35, 0, 0, 0, 0, 0,
262
+ 47, 51, 52, 53, 53, 0, 58, 0, 66, 67, 67, 67,
263
+ 67, 67, 67, 67, 67, 67, 67, 0, 82, 82, 83, 82,
264
+ 0, 86, 0, 93, 0, 97, 98
265
+ };
266
+ }
267
+
268
+ private static final short _java_lexer_eof_trans[] = init__java_lexer_eof_trans_0();
269
+
270
+
271
+ static final int java_lexer_start = 35;
272
+ static final int java_lexer_first_final = 35;
273
+ static final int java_lexer_error = 0;
274
+
275
+ static final int java_lexer_en_proc_ins_body = 41;
276
+ static final int java_lexer_en_doctype = 43;
277
+ static final int java_lexer_en_xml_decl = 55;
278
+ static final int java_lexer_en_element_name = 60;
279
+ static final int java_lexer_en_element_head = 62;
280
+ static final int java_lexer_en_text = 64;
281
+ static final int java_lexer_en_main = 35;
282
+
283
+
284
+ // line 39 "ext/java/org/liboga/xml/Lexer.rl"
285
+
286
+ /* Used by Ragel to keep track of the current state. */
287
+ int act;
288
+ int cs;
289
+
290
+ /**
291
+ * Sets up the current class in the Ruby runtime.
292
+ */
293
+ public static void load(Ruby runtime)
294
+ {
295
+ RubyModule xml = (RubyModule) runtime.getModule("Oga")
296
+ .getConstant("XML");
297
+
298
+ RubyClass lexer = xml.defineClassUnder(
299
+ "Lexer",
300
+ runtime.getObject(),
301
+ ALLOCATOR
302
+ );
303
+
304
+ lexer.defineAnnotatedMethods(Lexer.class);
305
+ }
306
+
307
+ private static final ObjectAllocator ALLOCATOR = new ObjectAllocator()
308
+ {
309
+ public IRubyObject allocate(Ruby runtime, RubyClass klass)
310
+ {
311
+ return new org.liboga.xml.Lexer(runtime, klass);
312
+ }
313
+ };
314
+
315
+ public Lexer(Ruby runtime, RubyClass klass)
316
+ {
317
+ super(runtime, klass);
318
+
319
+ this.runtime = runtime;
320
+ }
321
+
322
+ /**
323
+ * Runs the bulk of the Ragel loop and calls back in to Ruby.
324
+ *
325
+ * This method pulls its data in from the instance variable `@data`. The
326
+ * Ruby side of the Lexer class should set this variable to a String in its
327
+ * constructor method. Encodings are passed along to make sure that token
328
+ * values share the same encoding as the input.
329
+ *
330
+ * This method always returns nil.
331
+ */
332
+ @JRubyMethod
333
+ public IRubyObject advance_native(ThreadContext context, RubyString rb_str)
334
+ {
335
+ Encoding encoding = rb_str.getEncoding();
336
+
337
+ byte[] data = rb_str.getBytes();
338
+
339
+ int ts = 0;
340
+ int te = 0;
341
+ int p = 0;
342
+ int mark = 0;
343
+ int pe = data.length;
344
+ int eof = data.length;
345
+
346
+
347
+ // line 348 "ext/java/org/liboga/xml/Lexer.java"
348
+ {
349
+ int _klen;
350
+ int _trans = 0;
351
+ int _acts;
352
+ int _nacts;
353
+ int _keys;
354
+ int _goto_targ = 0;
355
+
356
+ _goto: while (true) {
357
+ switch ( _goto_targ ) {
358
+ case 0:
359
+ if ( p == pe ) {
360
+ _goto_targ = 4;
361
+ continue _goto;
362
+ }
363
+ if ( ( this.cs) == 0 ) {
364
+ _goto_targ = 5;
365
+ continue _goto;
366
+ }
367
+ case 1:
368
+ _acts = _java_lexer_from_state_actions[( this.cs)];
369
+ _nacts = (int) _java_lexer_actions[_acts++];
370
+ while ( _nacts-- > 0 ) {
371
+ switch ( _java_lexer_actions[_acts++] ) {
372
+ case 3:
373
+ // line 1 "NONE"
374
+ {ts = p;}
375
+ break;
376
+ // line 377 "ext/java/org/liboga/xml/Lexer.java"
377
+ }
378
+ }
379
+
380
+ _match: do {
381
+ _keys = _java_lexer_key_offsets[( this.cs)];
382
+ _trans = _java_lexer_index_offsets[( this.cs)];
383
+ _klen = _java_lexer_single_lengths[( this.cs)];
384
+ if ( _klen > 0 ) {
385
+ int _lower = _keys;
386
+ int _mid;
387
+ int _upper = _keys + _klen - 1;
388
+ while (true) {
389
+ if ( _upper < _lower )
390
+ break;
391
+
392
+ _mid = _lower + ((_upper-_lower) >> 1);
393
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
394
+ _upper = _mid - 1;
395
+ else if ( data[p] > _java_lexer_trans_keys[_mid] )
396
+ _lower = _mid + 1;
397
+ else {
398
+ _trans += (_mid - _keys);
399
+ break _match;
400
+ }
401
+ }
402
+ _keys += _klen;
403
+ _trans += _klen;
404
+ }
405
+
406
+ _klen = _java_lexer_range_lengths[( this.cs)];
407
+ if ( _klen > 0 ) {
408
+ int _lower = _keys;
409
+ int _mid;
410
+ int _upper = _keys + (_klen<<1) - 2;
411
+ while (true) {
412
+ if ( _upper < _lower )
413
+ break;
414
+
415
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
416
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
417
+ _upper = _mid - 2;
418
+ else if ( data[p] > _java_lexer_trans_keys[_mid+1] )
419
+ _lower = _mid + 2;
420
+ else {
421
+ _trans += ((_mid - _keys)>>1);
422
+ break _match;
423
+ }
424
+ }
425
+ _trans += _klen;
426
+ }
427
+ } while (false);
428
+
429
+ _trans = _java_lexer_indicies[_trans];
430
+ case 3:
431
+ ( this.cs) = _java_lexer_trans_targs[_trans];
432
+
433
+ if ( _java_lexer_trans_actions[_trans] != 0 ) {
434
+ _acts = _java_lexer_trans_actions[_trans];
435
+ _nacts = (int) _java_lexer_actions[_acts++];
436
+ while ( _nacts-- > 0 )
437
+ {
438
+ switch ( _java_lexer_actions[_acts++] )
439
+ {
440
+ case 0:
441
+ // line 296 "ext/ragel/base_lexer.rl"
442
+ { mark = p; }
443
+ break;
444
+ case 4:
445
+ // line 1 "NONE"
446
+ {te = p+1;}
447
+ break;
448
+ case 5:
449
+ // line 100 "ext/ragel/base_lexer.rl"
450
+ {te = p+1;{
451
+ callback("on_text", data, encoding, mark, ts);
452
+ callback_simple("on_proc_ins_end");
453
+
454
+ ( this.cs) = 35;
455
+ }}
456
+ break;
457
+ case 6:
458
+ // line 107 "ext/ragel/base_lexer.rl"
459
+ {te = p+1;}
460
+ break;
461
+ case 7:
462
+ // line 107 "ext/ragel/base_lexer.rl"
463
+ {te = p;p--;}
464
+ break;
465
+ case 8:
466
+ // line 148 "ext/ragel/base_lexer.rl"
467
+ {( this.act) = 3;}
468
+ break;
469
+ case 9:
470
+ // line 165 "ext/ragel/base_lexer.rl"
471
+ {( this.act) = 7;}
472
+ break;
473
+ case 10:
474
+ // line 154 "ext/ragel/base_lexer.rl"
475
+ {te = p+1;{
476
+ callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
477
+ }}
478
+ break;
479
+ case 11:
480
+ // line 124 "ext/ragel/base_lexer.rl"
481
+ {te = p+1;{
482
+ callback("on_string", data, encoding, ts + 1, te - 1);
483
+ }}
484
+ break;
485
+ case 12:
486
+ // line 163 "ext/ragel/base_lexer.rl"
487
+ {te = p+1;}
488
+ break;
489
+ case 13:
490
+ // line 169 "ext/ragel/base_lexer.rl"
491
+ {te = p+1;{
492
+ callback_simple("on_doctype_end");
493
+ ( this.cs) = 35;
494
+ }}
495
+ break;
496
+ case 14:
497
+ // line 165 "ext/ragel/base_lexer.rl"
498
+ {te = p;p--;{
499
+ callback("on_doctype_name", data, encoding, ts, te);
500
+ }}
501
+ break;
502
+ case 15:
503
+ // line 1 "NONE"
504
+ { switch( ( this.act) ) {
505
+ case 3:
506
+ {{p = ((te))-1;}
507
+ callback("on_doctype_type", data, encoding, ts, te);
508
+ }
509
+ break;
510
+ case 7:
511
+ {{p = ((te))-1;}
512
+ callback("on_doctype_name", data, encoding, ts, te);
513
+ }
514
+ break;
515
+ }
516
+ }
517
+ break;
518
+ case 16:
519
+ // line 189 "ext/ragel/base_lexer.rl"
520
+ {te = p+1;{
521
+ callback_simple("on_xml_decl_end");
522
+ ( this.cs) = 35;
523
+ }}
524
+ break;
525
+ case 17:
526
+ // line 124 "ext/ragel/base_lexer.rl"
527
+ {te = p+1;{
528
+ callback("on_string", data, encoding, ts + 1, te - 1);
529
+ }}
530
+ break;
531
+ case 18:
532
+ // line 201 "ext/ragel/base_lexer.rl"
533
+ {te = p+1;}
534
+ break;
535
+ case 19:
536
+ // line 195 "ext/ragel/base_lexer.rl"
537
+ {te = p;p--;{
538
+ callback("on_attribute", data, encoding, ts, te);
539
+ }}
540
+ break;
541
+ case 20:
542
+ // line 201 "ext/ragel/base_lexer.rl"
543
+ {te = p;p--;}
544
+ break;
545
+ case 21:
546
+ // line 201 "ext/ragel/base_lexer.rl"
547
+ {{p = ((te))-1;}}
548
+ break;
549
+ case 22:
550
+ // line 228 "ext/ragel/base_lexer.rl"
551
+ {te = p+1;{
552
+ callback("on_element_ns", data, encoding, ts, te - 1);
553
+ }}
554
+ break;
555
+ case 23:
556
+ // line 232 "ext/ragel/base_lexer.rl"
557
+ {te = p;p--;{
558
+ callback("on_element_name", data, encoding, ts, te);
559
+ ( this.cs) = 62;
560
+ }}
561
+ break;
562
+ case 24:
563
+ // line 241 "ext/ragel/base_lexer.rl"
564
+ {te = p+1;}
565
+ break;
566
+ case 25:
567
+ // line 243 "ext/ragel/base_lexer.rl"
568
+ {te = p+1;{
569
+ callback_simple("advance_line");
570
+ }}
571
+ break;
572
+ case 26:
573
+ // line 248 "ext/ragel/base_lexer.rl"
574
+ {te = p+1;{
575
+ callback("on_attribute_ns", data, encoding, ts, te - 1);
576
+ }}
577
+ break;
578
+ case 27:
579
+ // line 124 "ext/ragel/base_lexer.rl"
580
+ {te = p+1;{
581
+ callback("on_string", data, encoding, ts + 1, te - 1);
582
+ }}
583
+ break;
584
+ case 28:
585
+ // line 260 "ext/ragel/base_lexer.rl"
586
+ {te = p+1;{
587
+ callback_simple("on_element_open_end");
588
+ ( this.cs) = 35;
589
+ }}
590
+ break;
591
+ case 29:
592
+ // line 266 "ext/ragel/base_lexer.rl"
593
+ {te = p+1;{
594
+ callback_simple("on_element_end");
595
+ ( this.cs) = 35;
596
+ }}
597
+ break;
598
+ case 30:
599
+ // line 252 "ext/ragel/base_lexer.rl"
600
+ {te = p;p--;{
601
+ callback("on_attribute", data, encoding, ts, te);
602
+ }}
603
+ break;
604
+ case 31:
605
+ // line 306 "ext/ragel/base_lexer.rl"
606
+ {( this.act) = 23;}
607
+ break;
608
+ case 32:
609
+ // line 296 "ext/ragel/base_lexer.rl"
610
+ {te = p+1;{
611
+ callback("on_text", data, encoding, ts, mark);
612
+
613
+ p = mark - 1;
614
+ mark = 0;
615
+
616
+ ( this.cs) = 35;
617
+ }}
618
+ break;
619
+ case 33:
620
+ // line 306 "ext/ragel/base_lexer.rl"
621
+ {te = p;p--;{
622
+ callback("on_text", data, encoding, ts, te);
623
+ ( this.cs) = 35;
624
+ }}
625
+ break;
626
+ case 34:
627
+ // line 1 "NONE"
628
+ { switch( ( this.act) ) {
629
+ case 0:
630
+ {{( this.cs) = 0; _goto_targ = 2; if (true) continue _goto;}}
631
+ break;
632
+ case 23:
633
+ {{p = ((te))-1;}
634
+ callback("on_text", data, encoding, ts, te);
635
+ ( this.cs) = 35;
636
+ }
637
+ break;
638
+ }
639
+ }
640
+ break;
641
+ case 35:
642
+ // line 182 "ext/ragel/base_lexer.rl"
643
+ {( this.act) = 25;}
644
+ break;
645
+ case 36:
646
+ // line 90 "ext/ragel/base_lexer.rl"
647
+ {( this.act) = 28;}
648
+ break;
649
+ case 37:
650
+ // line 56 "ext/ragel/base_lexer.rl"
651
+ {te = p+1;{
652
+ callback("on_comment", data, encoding, ts + 4, te - 3);
653
+ }}
654
+ break;
655
+ case 38:
656
+ // line 72 "ext/ragel/base_lexer.rl"
657
+ {te = p+1;{
658
+ callback("on_cdata", data, encoding, ts + 9, te - 3);
659
+ }}
660
+ break;
661
+ case 39:
662
+ // line 216 "ext/ragel/base_lexer.rl"
663
+ {te = p+1;{
664
+ callback_simple("on_element_start");
665
+ p--;
666
+ ( this.cs) = 60;
667
+ }}
668
+ break;
669
+ case 40:
670
+ // line 222 "ext/ragel/base_lexer.rl"
671
+ {te = p+1;{
672
+ callback_simple("on_element_end");
673
+ }}
674
+ break;
675
+ case 41:
676
+ // line 280 "ext/ragel/base_lexer.rl"
677
+ {te = p+1;{
678
+ p--;
679
+ ( this.cs) = 64;
680
+ }}
681
+ break;
682
+ case 42:
683
+ // line 140 "ext/ragel/base_lexer.rl"
684
+ {te = p;p--;{
685
+ callback_simple("on_doctype_start");
686
+ ( this.cs) = 43;
687
+ }}
688
+ break;
689
+ case 43:
690
+ // line 90 "ext/ragel/base_lexer.rl"
691
+ {te = p;p--;{
692
+ callback_simple("on_proc_ins_start");
693
+ callback("on_proc_ins_name", data, encoding, ts + 2, te);
694
+
695
+ mark = te;
696
+
697
+ ( this.cs) = 41;
698
+ }}
699
+ break;
700
+ case 44:
701
+ // line 280 "ext/ragel/base_lexer.rl"
702
+ {te = p;p--;{
703
+ p--;
704
+ ( this.cs) = 64;
705
+ }}
706
+ break;
707
+ case 45:
708
+ // line 280 "ext/ragel/base_lexer.rl"
709
+ {{p = ((te))-1;}{
710
+ p--;
711
+ ( this.cs) = 64;
712
+ }}
713
+ break;
714
+ case 46:
715
+ // line 1 "NONE"
716
+ { switch( ( this.act) ) {
717
+ case 25:
718
+ {{p = ((te))-1;}
719
+ callback_simple("on_xml_decl_start");
720
+ ( this.cs) = 55;
721
+ }
722
+ break;
723
+ case 28:
724
+ {{p = ((te))-1;}
725
+ callback_simple("on_proc_ins_start");
726
+ callback("on_proc_ins_name", data, encoding, ts + 2, te);
727
+
728
+ mark = te;
729
+
730
+ ( this.cs) = 41;
731
+ }
732
+ break;
733
+ }
734
+ }
735
+ break;
736
+ // line 737 "ext/java/org/liboga/xml/Lexer.java"
737
+ }
738
+ }
739
+ }
740
+
741
+ case 2:
742
+ _acts = _java_lexer_to_state_actions[( this.cs)];
743
+ _nacts = (int) _java_lexer_actions[_acts++];
744
+ while ( _nacts-- > 0 ) {
745
+ switch ( _java_lexer_actions[_acts++] ) {
746
+ case 1:
747
+ // line 1 "NONE"
748
+ {ts = -1;}
749
+ break;
750
+ case 2:
751
+ // line 1 "NONE"
752
+ {( this.act) = 0;}
753
+ break;
754
+ // line 755 "ext/java/org/liboga/xml/Lexer.java"
755
+ }
756
+ }
757
+
758
+ if ( ( this.cs) == 0 ) {
759
+ _goto_targ = 5;
760
+ continue _goto;
761
+ }
762
+ if ( ++p != pe ) {
763
+ _goto_targ = 1;
764
+ continue _goto;
765
+ }
766
+ case 4:
767
+ if ( p == eof )
768
+ {
769
+ if ( _java_lexer_eof_trans[( this.cs)] > 0 ) {
770
+ _trans = _java_lexer_eof_trans[( this.cs)] - 1;
771
+ _goto_targ = 3;
772
+ continue _goto;
773
+ }
774
+ }
775
+
776
+ case 5:
777
+ }
778
+ break; }
779
+ }
780
+
781
+ // line 101 "ext/java/org/liboga/xml/Lexer.rl"
782
+
783
+ return context.nil;
784
+ }
785
+
786
+ /**
787
+ * Resets the internal state of the lexer.
788
+ */
789
+ @JRubyMethod
790
+ public IRubyObject reset_native(ThreadContext context)
791
+ {
792
+ this.act = 0;
793
+ this.cs = java_lexer_start;
794
+
795
+ return context.nil;
796
+ }
797
+
798
+ /**
799
+ * Calls back in to Ruby land passing the current token value along.
800
+ *
801
+ * This method calls back in to Ruby land based on the method name
802
+ * specified in `name`. The Ruby callback should take one argument. This
803
+ * argument will be a String containing the value of the current token.
804
+ */
805
+ public void callback(String name, byte[] data, Encoding enc, int ts, int te)
806
+ {
807
+ ByteList bytelist = new ByteList(data, ts, te - ts, enc, true);
808
+
809
+ RubyString value = this.runtime.newString(bytelist);
810
+
811
+ ThreadContext context = this.runtime.getCurrentContext();
812
+
813
+ this.callMethod(context, name, value);
814
+ }
815
+
816
+ /**
817
+ * Calls back in to Ruby land without passing any arguments.
818
+ */
819
+ public void callback_simple(String name)
820
+ {
821
+ ThreadContext context = this.runtime.getCurrentContext();
822
+
823
+ this.callMethod(context, name);
824
+ }
825
+ }
826
+
827
+
828
+ // line 151 "ext/java/org/liboga/xml/Lexer.rl"
829
+