rubyjedi-oga 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +362 -0
  4. data/README.md +317 -0
  5. data/doc/css/common.css +77 -0
  6. data/doc/css_selectors.md +935 -0
  7. data/doc/manually_creating_documents.md +67 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/doc/xml_namespaces.md +63 -0
  10. data/ext/c/extconf.rb +11 -0
  11. data/ext/c/lexer.c +2595 -0
  12. data/ext/c/lexer.h +16 -0
  13. data/ext/c/lexer.rl +198 -0
  14. data/ext/c/liboga.c +6 -0
  15. data/ext/c/liboga.h +11 -0
  16. data/ext/java/Liboga.java +14 -0
  17. data/ext/java/org/liboga/xml/Lexer.java +1363 -0
  18. data/ext/java/org/liboga/xml/Lexer.rl +223 -0
  19. data/ext/ragel/base_lexer.rl +633 -0
  20. data/lib/oga.rb +57 -0
  21. data/lib/oga/blacklist.rb +40 -0
  22. data/lib/oga/css/lexer.rb +743 -0
  23. data/lib/oga/css/parser.rb +976 -0
  24. data/lib/oga/entity_decoder.rb +21 -0
  25. data/lib/oga/html/entities.rb +2150 -0
  26. data/lib/oga/html/parser.rb +25 -0
  27. data/lib/oga/html/sax_parser.rb +18 -0
  28. data/lib/oga/lru.rb +160 -0
  29. data/lib/oga/oga.rb +57 -0
  30. data/lib/oga/version.rb +3 -0
  31. data/lib/oga/whitelist.rb +20 -0
  32. data/lib/oga/xml/attribute.rb +136 -0
  33. data/lib/oga/xml/cdata.rb +17 -0
  34. data/lib/oga/xml/character_node.rb +37 -0
  35. data/lib/oga/xml/comment.rb +17 -0
  36. data/lib/oga/xml/default_namespace.rb +13 -0
  37. data/lib/oga/xml/doctype.rb +82 -0
  38. data/lib/oga/xml/document.rb +108 -0
  39. data/lib/oga/xml/element.rb +428 -0
  40. data/lib/oga/xml/entities.rb +122 -0
  41. data/lib/oga/xml/html_void_elements.rb +15 -0
  42. data/lib/oga/xml/lexer.rb +550 -0
  43. data/lib/oga/xml/namespace.rb +48 -0
  44. data/lib/oga/xml/node.rb +219 -0
  45. data/lib/oga/xml/node_set.rb +333 -0
  46. data/lib/oga/xml/parser.rb +631 -0
  47. data/lib/oga/xml/processing_instruction.rb +37 -0
  48. data/lib/oga/xml/pull_parser.rb +175 -0
  49. data/lib/oga/xml/querying.rb +56 -0
  50. data/lib/oga/xml/sax_parser.rb +192 -0
  51. data/lib/oga/xml/text.rb +66 -0
  52. data/lib/oga/xml/traversal.rb +50 -0
  53. data/lib/oga/xml/xml_declaration.rb +65 -0
  54. data/lib/oga/xpath/evaluator.rb +1798 -0
  55. data/lib/oga/xpath/lexer.rb +1958 -0
  56. data/lib/oga/xpath/parser.rb +622 -0
  57. data/oga.gemspec +45 -0
  58. metadata +227 -0
@@ -0,0 +1,16 @@
1
+ #ifndef LIBOGA_XML_LEXER_H
2
+ #define LIBOGA_XML_LEXER_H
3
+
4
+ #include "liboga.h"
5
+
6
+ extern void Init_liboga_xml_lexer();
7
+
8
+ typedef struct {
9
+ int act;
10
+ int cs;
11
+ int lines;
12
+ int stack[4];
13
+ int top;
14
+ } OgaLexerState;
15
+
16
+ #endif
@@ -0,0 +1,198 @@
1
+ #include "lexer.h"
2
+
3
+ /*
4
+ The following two macros allow the Ragel grammar to use generic function calls
5
+ without relying on the setup of the C or Java lexer. Using these macros we can
6
+ also pass along `self` to the callback functions without having to hard-code
7
+ this in to the Ragel grammar.
8
+
9
+ In the C lexer we don't need the `data` variable (since this is pulled in based
10
+ on `ts` and `te`) so the macro ignores this argument.
11
+ */
12
+
13
+ #define callback(name, data, encoding, start, stop) \
14
+ liboga_xml_lexer_callback(self, name, encoding, start, stop);
15
+
16
+ #define callback_simple(name) \
17
+ liboga_xml_lexer_callback_simple(self, name);
18
+
19
+ #define advance_line(amount) \
20
+ rb_funcall(self, id_advance_line, 1, INT2NUM(amount));
21
+
22
+ #define html_script_p() \
23
+ rb_funcall(self, id_html_script_p, 0) == Qtrue
24
+
25
+ #define html_style_p() \
26
+ rb_funcall(self, id_html_style_p, 0) == Qtrue
27
+
28
+ ID id_advance_line;
29
+ ID id_html_script_p;
30
+ ID id_html_style_p;
31
+ ID id_html_p;
32
+
33
+ %%machine c_lexer;
34
+
35
+ /**
36
+ * Calls a method defined in the Ruby side of the lexer. The String value is
37
+ * created based on the values of `ts` and `te` and uses the encoding specified
38
+ * in `encoding`.
39
+ *
40
+ * @example
41
+ * rb_encoding *encoding = rb_enc_get(...);
42
+ * liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
43
+ */
44
+ void liboga_xml_lexer_callback(
45
+ VALUE self,
46
+ ID name,
47
+ rb_encoding *encoding,
48
+ const char *ts,
49
+ const char *te
50
+ )
51
+ {
52
+ VALUE value = rb_enc_str_new(ts, te - ts, encoding);
53
+
54
+ rb_funcall(self, name, 1, value);
55
+ }
56
+
57
+ /**
58
+ * Calls a method defined in the Ruby side of the lexer without passing it any
59
+ * arguments.
60
+ *
61
+ * @example
62
+ * liboga_xml_lexer_callback_simple(self, "on_cdata_start");
63
+ */
64
+ void liboga_xml_lexer_callback_simple(VALUE self, VALUE name)
65
+ {
66
+ rb_funcall(self, name, 0);
67
+ }
68
+
69
+ %% write data;
70
+
71
+ /**
72
+ * Lexes the String specifies as the method argument. Token values have the
73
+ * same encoding as the input value.
74
+ *
75
+ * This method keeps track of an internal state using the instance variables
76
+ * `@act` and `@cs`.
77
+ */
78
+ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
79
+ {
80
+ OgaLexerState *state;
81
+ int lines;
82
+
83
+ /* Whether or not HTML mode is enabled */
84
+ int html_p = rb_funcall(self, id_html_p, 0) == Qtrue;
85
+
86
+ /* Make sure that all data passed back to Ruby has the proper encoding. */
87
+ rb_encoding *encoding = rb_enc_get(data_block);
88
+
89
+ char *data_str_val = StringValueCStr(data_block);
90
+
91
+ const char *p = data_str_val;
92
+ const char *pe = data_str_val + strlen(data_str_val);
93
+ const char *eof = pe;
94
+ const char *ts = 0;
95
+ const char *te = 0;
96
+ const char *mark = 0;
97
+
98
+ ID id_advance_line = rb_intern("advance_line");
99
+ ID id_on_attribute = rb_intern("on_attribute");
100
+ ID id_on_attribute_ns = rb_intern("on_attribute_ns");
101
+ ID id_on_cdata_start = rb_intern("on_cdata_start");
102
+ ID id_on_cdata_body = rb_intern("on_cdata_body");
103
+ ID id_on_cdata_end = rb_intern("on_cdata_end");
104
+ ID id_on_comment_start = rb_intern("on_comment_start");
105
+ ID id_on_comment_body = rb_intern("on_comment_body");
106
+ ID id_on_comment_end = rb_intern("on_comment_end");
107
+ ID id_on_doctype_end = rb_intern("on_doctype_end");
108
+ ID id_on_doctype_inline = rb_intern("on_doctype_inline");
109
+ ID id_on_doctype_name = rb_intern("on_doctype_name");
110
+ ID id_on_doctype_start = rb_intern("on_doctype_start");
111
+ ID id_on_doctype_type = rb_intern("on_doctype_type");
112
+ ID id_on_element_end = rb_intern("on_element_end");
113
+ ID id_on_element_name = rb_intern("on_element_name");
114
+ ID id_on_element_ns = rb_intern("on_element_ns");
115
+ ID id_on_element_open_end = rb_intern("on_element_open_end");
116
+ ID id_on_proc_ins_end = rb_intern("on_proc_ins_end");
117
+ ID id_on_proc_ins_name = rb_intern("on_proc_ins_name");
118
+ ID id_on_proc_ins_start = rb_intern("on_proc_ins_start");
119
+ ID id_on_proc_ins_body = rb_intern("on_proc_ins_body");
120
+ ID id_on_string_body = rb_intern("on_string_body");
121
+ ID id_on_string_dquote = rb_intern("on_string_dquote");
122
+ ID id_on_string_squote = rb_intern("on_string_squote");
123
+ ID id_on_text = rb_intern("on_text");
124
+ ID id_on_xml_decl_end = rb_intern("on_xml_decl_end");
125
+ ID id_on_xml_decl_start = rb_intern("on_xml_decl_start");
126
+
127
+ Data_Get_Struct(self, OgaLexerState, state);
128
+
129
+ lines = state->lines;
130
+
131
+ %% write exec;
132
+
133
+ state->lines = lines;
134
+
135
+ return Qnil;
136
+ }
137
+
138
+ /**
139
+ * Resets the internal state of the lexer.
140
+ */
141
+ VALUE oga_xml_lexer_reset(VALUE self)
142
+ {
143
+ OgaLexerState *state;
144
+
145
+ Data_Get_Struct(self, OgaLexerState, state);
146
+
147
+ state->act = 0;
148
+ state->cs = c_lexer_start;
149
+ state->lines = 0;
150
+ state->top = 0;
151
+
152
+ return Qnil;
153
+ }
154
+
155
+ /**
156
+ * Frees the associated lexer state struct.
157
+ */
158
+ void oga_xml_lexer_free(void *state)
159
+ {
160
+ free((OgaLexerState *) state);
161
+ }
162
+
163
+ /**
164
+ * Allocates and wraps the C lexer state struct. This state is used to keep
165
+ * track of the current position, line numbers, etc.
166
+ */
167
+ VALUE oga_xml_lexer_allocate(VALUE klass)
168
+ {
169
+ OgaLexerState *state = malloc(sizeof(OgaLexerState));
170
+
171
+ return Data_Wrap_Struct(klass, NULL, oga_xml_lexer_free, state);
172
+ }
173
+
174
+ %%{
175
+ include base_lexer "base_lexer.rl";
176
+
177
+ variable top state->top;
178
+ variable stack state->stack;
179
+ variable act state->act;
180
+ variable cs state->cs;
181
+ }%%
182
+
183
+ void Init_liboga_xml_lexer()
184
+ {
185
+ VALUE mOga = rb_const_get(rb_cObject, rb_intern("Oga"));
186
+ VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
187
+ VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
188
+
189
+ id_advance_line = rb_intern("advance_line");
190
+ id_html_script_p = rb_intern("html_script?");
191
+ id_html_style_p = rb_intern("html_style?");
192
+ id_html_p = rb_intern("html?");
193
+
194
+ rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
195
+ rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
196
+
197
+ rb_define_alloc_func(cLexer, oga_xml_lexer_allocate);
198
+ }
@@ -0,0 +1,6 @@
1
+ #include "liboga.h"
2
+
3
+ void Init_liboga()
4
+ {
5
+ Init_liboga_xml_lexer();
6
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef LIBOGA_H
2
+ #define LIBOGA_H
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+
7
+ #include "lexer.h"
8
+
9
+ void Init_liboga();
10
+
11
+ #endif
@@ -0,0 +1,14 @@
1
+ package org.liboga;
2
+
3
+ import org.jruby.Ruby;
4
+
5
+ public class Liboga
6
+ {
7
+ /**
8
+ * Bootstraps the JRuby extension.
9
+ */
10
+ public static void load(final Ruby runtime)
11
+ {
12
+ org.liboga.xml.Lexer.load(runtime);
13
+ }
14
+ }
@@ -0,0 +1,1363 @@
1
+
2
+ // line 1 "ext/java/org/liboga/xml/Lexer.rl"
3
+ package org.liboga.xml;
4
+
5
+
6
+ // line 4 "ext/java/org/liboga/xml/Lexer.rl"
7
+
8
+ import java.io.IOException;
9
+
10
+ import org.jcodings.Encoding;
11
+
12
+ import org.jruby.Ruby;
13
+ import org.jruby.RubyModule;
14
+ import org.jruby.RubyClass;
15
+ import org.jruby.RubyObject;
16
+ import org.jruby.RubyString;
17
+ import org.jruby.RubyFixnum;
18
+ import org.jruby.util.ByteList;
19
+ import org.jruby.anno.JRubyClass;
20
+ import org.jruby.anno.JRubyMethod;
21
+ import org.jruby.runtime.ThreadContext;
22
+ import org.jruby.runtime.ObjectAllocator;
23
+ import org.jruby.runtime.builtin.IRubyObject;
24
+
25
+ /**
26
+ * Lexer support class for JRuby.
27
+ *
28
+ * The Lexer class contains the raw Ragel loop and calls back in to Ruby land
29
+ * whenever a Ragel action is needed similar to the C extension setup.
30
+ *
31
+ * This class requires Ruby land to first define the `Oga::XML` namespace.
32
+ */
33
+ @JRubyClass(name="Oga::XML::Lexer", parent="Object")
34
+ public class Lexer extends RubyObject
35
+ {
36
+ /**
37
+ * The current Ruby runtime.
38
+ */
39
+ private Ruby runtime;
40
+
41
+
42
+ // line 43 "ext/java/org/liboga/xml/Lexer.java"
43
+ private static byte[] init__java_lexer_actions_0()
44
+ {
45
+ return new byte [] {
46
+ 0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1,
47
+ 6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1,
48
+ 12, 1, 13, 1, 14, 1, 15, 1, 16, 1, 17, 1,
49
+ 18, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1,
50
+ 26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 34, 1,
51
+ 35, 1, 36, 1, 37, 1, 38, 1, 41, 1, 43, 1,
52
+ 44, 1, 47, 1, 48, 1, 49, 1, 50, 1, 51, 1,
53
+ 52, 1, 53, 1, 54, 1, 55, 1, 56, 1, 57, 1,
54
+ 58, 1, 59, 1, 60, 1, 61, 1, 62, 1, 63, 1,
55
+ 64, 1, 65, 1, 66, 1, 67, 1, 68, 1, 69, 1,
56
+ 70, 1, 73, 1, 74, 1, 75, 1, 76, 1, 77, 1,
57
+ 78, 1, 79, 1, 80, 1, 81, 1, 82, 2, 0, 1,
58
+ 2, 0, 33, 2, 0, 40, 2, 0, 42, 2, 4, 0,
59
+ 2, 4, 19, 2, 4, 20, 2, 4, 45, 2, 4, 46,
60
+ 2, 4, 71, 2, 4, 72, 2, 31, 0, 2, 32, 0,
61
+ 2, 39, 0
62
+ };
63
+ }
64
+
65
+ private static final byte _java_lexer_actions[] = init__java_lexer_actions_0();
66
+
67
+
68
+ private static short[] init__java_lexer_key_offsets_0()
69
+ {
70
+ return new short [] {
71
+ 0, 0, 4, 5, 7, 9, 11, 13, 15, 17, 21, 22,
72
+ 23, 24, 25, 26, 27, 37, 38, 39, 40, 41, 42, 43,
73
+ 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 63, 67,
74
+ 76, 86, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
75
+ 106, 107, 108, 109, 110, 111, 130, 131, 140, 150, 160, 170,
76
+ 180, 190, 200, 210, 220, 230, 240, 252, 261, 262, 271, 281,
77
+ 291, 301, 305, 306, 312, 319, 321, 333, 334, 344, 345, 346,
78
+ 347, 357, 367, 368, 369, 371, 372, 373, 374, 376
79
+ };
80
+ }
81
+
82
+ private static final short _java_lexer_key_offsets[] = init__java_lexer_key_offsets_0();
83
+
84
+
85
+ private static char[] init__java_lexer_trans_keys_0()
86
+ {
87
+ return new char [] {
88
+ 45, 68, 91, 100, 45, 79, 111, 67, 99, 84, 116, 89,
89
+ 121, 80, 112, 69, 101, 13, 32, 9, 10, 67, 68, 65,
90
+ 84, 65, 91, 95, 120, 45, 46, 48, 57, 65, 90, 97,
91
+ 122, 62, 62, 115, 99, 114, 105, 112, 116, 62, 115, 116,
92
+ 121, 108, 101, 62, 60, 33, 47, 63, 95, 45, 57, 65,
93
+ 90, 97, 122, 13, 32, 9, 10, 95, 45, 46, 48, 57,
94
+ 65, 90, 97, 122, 95, 109, 45, 46, 48, 57, 65, 90,
95
+ 97, 122, 95, 108, 45, 46, 48, 57, 65, 90, 97, 122,
96
+ 45, 45, 45, 93, 93, 93, 63, 63, 62, 39, 39, 34,
97
+ 34, 93, 93, 9, 10, 13, 32, 34, 39, 62, 80, 83,
98
+ 91, 95, 45, 46, 48, 57, 65, 90, 97, 122, 10, 95,
99
+ 45, 46, 48, 57, 65, 90, 97, 122, 85, 95, 45, 46,
100
+ 48, 57, 65, 90, 97, 122, 66, 95, 45, 46, 48, 57,
101
+ 65, 90, 97, 122, 76, 95, 45, 46, 48, 57, 65, 90,
102
+ 97, 122, 73, 95, 45, 46, 48, 57, 65, 90, 97, 122,
103
+ 67, 95, 45, 46, 48, 57, 65, 90, 97, 122, 89, 95,
104
+ 45, 46, 48, 57, 65, 90, 97, 122, 83, 95, 45, 46,
105
+ 48, 57, 65, 90, 97, 122, 84, 95, 45, 46, 48, 57,
106
+ 65, 90, 97, 122, 69, 95, 45, 46, 48, 57, 65, 90,
107
+ 97, 122, 77, 95, 45, 46, 48, 57, 65, 90, 97, 122,
108
+ 34, 39, 63, 95, 45, 46, 48, 57, 65, 90, 97, 122,
109
+ 95, 45, 46, 48, 57, 65, 90, 97, 122, 62, 95, 45,
110
+ 46, 48, 57, 65, 90, 97, 122, 58, 95, 45, 46, 48,
111
+ 57, 65, 90, 97, 122, 62, 95, 45, 46, 48, 57, 65,
112
+ 90, 97, 122, 58, 95, 45, 46, 48, 57, 65, 90, 97,
113
+ 122, 13, 32, 9, 10, 10, 13, 32, 34, 39, 9, 10,
114
+ 13, 32, 96, 9, 10, 60, 62, 34, 39, 10, 13, 47,
115
+ 61, 62, 95, 45, 57, 65, 90, 97, 122, 10, 58, 95,
116
+ 45, 46, 48, 57, 65, 90, 97, 122, 62, 60, 60, 33,
117
+ 60, 63, 95, 45, 57, 65, 90, 97, 122, 33, 60, 63,
118
+ 95, 45, 57, 65, 90, 97, 122, 60, 60, 47, 60, 60,
119
+ 60, 60, 47, 60, 60, 0
120
+ };
121
+ }
122
+
123
+ private static final char _java_lexer_trans_keys[] = init__java_lexer_trans_keys_0();
124
+
125
+
126
+ private static byte[] init__java_lexer_single_lengths_0()
127
+ {
128
+ return new byte [] {
129
+ 0, 4, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
130
+ 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
131
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 1,
132
+ 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133
+ 1, 1, 1, 1, 1, 11, 1, 1, 2, 2, 2, 2,
134
+ 2, 2, 2, 2, 2, 2, 4, 1, 1, 1, 2, 2,
135
+ 2, 2, 1, 4, 3, 2, 6, 1, 2, 1, 1, 1,
136
+ 4, 4, 1, 1, 2, 1, 1, 1, 2, 1
137
+ };
138
+ }
139
+
140
+ private static final byte _java_lexer_single_lengths[] = init__java_lexer_single_lengths_0();
141
+
142
+
143
+ private static byte[] init__java_lexer_range_lengths_0()
144
+ {
145
+ return new byte [] {
146
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
147
+ 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
148
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 4,
149
+ 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150
+ 0, 0, 0, 0, 0, 4, 0, 4, 4, 4, 4, 4,
151
+ 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4,
152
+ 4, 1, 0, 1, 2, 0, 3, 0, 4, 0, 0, 0,
153
+ 3, 3, 0, 0, 0, 0, 0, 0, 0, 0
154
+ };
155
+ }
156
+
157
+ private static final byte _java_lexer_range_lengths[] = init__java_lexer_range_lengths_0();
158
+
159
+
160
+ private static short[] init__java_lexer_index_offsets_0()
161
+ {
162
+ return new short [] {
163
+ 0, 0, 5, 7, 10, 13, 16, 19, 22, 25, 29, 31,
164
+ 33, 35, 37, 39, 41, 48, 50, 52, 54, 56, 58, 60,
165
+ 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 88, 92,
166
+ 98, 105, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130,
167
+ 132, 134, 136, 138, 140, 142, 158, 160, 166, 173, 180, 187,
168
+ 194, 201, 208, 215, 222, 229, 236, 245, 251, 253, 259, 266,
169
+ 273, 280, 284, 286, 292, 298, 301, 311, 313, 320, 322, 324,
170
+ 326, 334, 342, 344, 346, 349, 351, 353, 355, 358
171
+ };
172
+ }
173
+
174
+ private static final short _java_lexer_index_offsets[] = init__java_lexer_index_offsets_0();
175
+
176
+
177
+ private static byte[] init__java_lexer_trans_targs_0()
178
+ {
179
+ return new byte [] {
180
+ 2, 3, 10, 3, 32, 32, 32, 4, 4, 32, 5, 5,
181
+ 32, 6, 6, 32, 7, 7, 32, 8, 8, 32, 9, 9,
182
+ 32, 34, 34, 34, 32, 11, 32, 12, 32, 13, 32, 14,
183
+ 32, 15, 32, 32, 32, 35, 36, 35, 35, 35, 35, 32,
184
+ 38, 38, 41, 41, 20, 86, 21, 86, 22, 86, 23, 86,
185
+ 24, 86, 25, 86, 86, 86, 27, 90, 28, 90, 29, 90,
186
+ 30, 90, 31, 90, 90, 90, 33, 32, 1, 32, 16, 32,
187
+ 32, 32, 32, 32, 34, 34, 34, 32, 35, 35, 35, 35,
188
+ 35, 32, 35, 37, 35, 35, 35, 35, 32, 35, 35, 35,
189
+ 35, 35, 35, 32, 40, 39, 38, 39, 17, 38, 43, 42,
190
+ 41, 42, 18, 41, 46, 45, 44, 45, 44, 44, 47, 48,
191
+ 47, 48, 49, 50, 49, 50, 51, 52, 51, 52, 53, 53,
192
+ 54, 53, 53, 53, 53, 56, 61, 53, 55, 55, 55, 55,
193
+ 55, 0, 53, 53, 55, 55, 55, 55, 55, 53, 57, 55,
194
+ 55, 55, 55, 55, 53, 58, 55, 55, 55, 55, 55, 53,
195
+ 59, 55, 55, 55, 55, 55, 53, 60, 55, 55, 55, 55,
196
+ 55, 53, 55, 55, 55, 55, 55, 55, 53, 62, 55, 55,
197
+ 55, 55, 55, 53, 63, 55, 55, 55, 55, 55, 53, 64,
198
+ 55, 55, 55, 55, 55, 53, 65, 55, 55, 55, 55, 55,
199
+ 53, 55, 55, 55, 55, 55, 55, 53, 66, 66, 68, 67,
200
+ 67, 67, 67, 67, 66, 67, 67, 67, 67, 67, 66, 66,
201
+ 66, 70, 70, 70, 70, 70, 0, 69, 70, 70, 70, 70,
202
+ 70, 69, 71, 72, 72, 72, 72, 72, 71, 71, 72, 72,
203
+ 72, 72, 72, 71, 74, 73, 73, 73, 73, 73, 75, 75,
204
+ 75, 75, 75, 76, 75, 75, 75, 75, 75, 76, 77, 77,
205
+ 77, 78, 79, 81, 78, 78, 80, 80, 80, 80, 78, 78,
206
+ 78, 78, 80, 80, 80, 80, 80, 78, 78, 78, 85, 83,
207
+ 84, 83, 82, 84, 82, 82, 82, 82, 82, 83, 82, 84,
208
+ 82, 82, 82, 82, 82, 83, 88, 87, 86, 87, 19, 89,
209
+ 86, 89, 86, 92, 91, 90, 91, 26, 93, 90, 93, 90,
210
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
211
+ 32, 32, 32, 32, 38, 41, 86, 86, 86, 86, 86, 86,
212
+ 86, 90, 90, 90, 90, 90, 90, 32, 32, 32, 32, 32,
213
+ 38, 38, 41, 41, 44, 44, 47, 49, 51, 53, 53, 53,
214
+ 53, 53, 53, 53, 53, 53, 53, 53, 53, 66, 66, 69,
215
+ 71, 73, 75, 78, 78, 78, 82, 82, 82, 86, 86, 86,
216
+ 90, 90, 90, 0
217
+ };
218
+ }
219
+
220
+ private static final byte _java_lexer_trans_targs[] = init__java_lexer_trans_targs_0();
221
+
222
+
223
+ private static short[] init__java_lexer_trans_actions_0()
224
+ {
225
+ return new short [] {
226
+ 0, 0, 0, 0, 137, 121, 137, 0, 0, 137, 0, 0,
227
+ 137, 0, 0, 137, 0, 0, 137, 0, 0, 137, 0, 0,
228
+ 137, 1, 1, 1, 137, 0, 137, 0, 137, 0, 137, 0,
229
+ 137, 0, 137, 123, 137, 171, 0, 171, 171, 171, 171, 137,
230
+ 9, 13, 15, 19, 0, 113, 0, 113, 0, 113, 0, 113,
231
+ 0, 113, 0, 113, 109, 113, 0, 119, 0, 119, 0, 119,
232
+ 0, 119, 0, 119, 115, 119, 7, 129, 0, 127, 0, 125,
233
+ 125, 125, 125, 135, 1, 1, 1, 131, 171, 171, 171, 171,
234
+ 171, 139, 171, 0, 171, 171, 171, 171, 133, 171, 168, 171,
235
+ 171, 171, 171, 133, 153, 1, 11, 1, 0, 11, 153, 1,
236
+ 17, 1, 0, 17, 1, 1, 23, 1, 21, 23, 25, 1,
237
+ 27, 1, 29, 1, 31, 1, 33, 1, 35, 1, 47, 45,
238
+ 0, 47, 41, 39, 43, 0, 0, 37, 159, 159, 159, 159,
239
+ 159, 0, 45, 51, 159, 159, 159, 159, 159, 53, 0, 159,
240
+ 159, 159, 159, 159, 49, 0, 159, 159, 159, 159, 159, 49,
241
+ 0, 159, 159, 159, 159, 159, 49, 0, 159, 159, 159, 159,
242
+ 159, 49, 156, 159, 159, 159, 159, 159, 49, 0, 159, 159,
243
+ 159, 159, 159, 49, 0, 159, 159, 159, 159, 159, 49, 0,
244
+ 159, 159, 159, 159, 159, 49, 0, 159, 159, 159, 159, 159,
245
+ 49, 156, 159, 159, 159, 159, 159, 49, 177, 174, 1, 1,
246
+ 1, 1, 1, 1, 144, 0, 0, 0, 0, 0, 57, 55,
247
+ 59, 0, 0, 0, 0, 0, 0, 61, 0, 0, 0, 0,
248
+ 0, 63, 180, 1, 1, 1, 1, 1, 147, 65, 0, 0,
249
+ 0, 0, 0, 67, 1, 150, 150, 69, 150, 71, 75, 75,
250
+ 73, 73, 75, 165, 77, 77, 77, 77, 77, 162, 81, 79,
251
+ 83, 85, 0, 0, 89, 91, 0, 0, 0, 0, 95, 85,
252
+ 97, 87, 0, 0, 0, 0, 0, 99, 93, 101, 141, 1,
253
+ 141, 1, 105, 141, 105, 105, 105, 105, 105, 1, 103, 141,
254
+ 103, 103, 103, 103, 103, 1, 153, 1, 111, 1, 0, 1,
255
+ 111, 1, 111, 153, 1, 117, 1, 0, 1, 117, 1, 117,
256
+ 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137,
257
+ 137, 137, 137, 137, 13, 19, 113, 113, 113, 113, 113, 113,
258
+ 113, 119, 119, 119, 119, 119, 119, 135, 131, 139, 133, 133,
259
+ 11, 11, 17, 17, 23, 23, 27, 31, 35, 51, 53, 49,
260
+ 49, 49, 49, 49, 49, 49, 49, 49, 49, 57, 59, 63,
261
+ 67, 71, 77, 97, 99, 101, 107, 107, 107, 111, 111, 111,
262
+ 117, 117, 117, 0
263
+ };
264
+ }
265
+
266
+ private static final short _java_lexer_trans_actions[] = init__java_lexer_trans_actions_0();
267
+
268
+
269
+ private static short[] init__java_lexer_to_state_actions_0()
270
+ {
271
+ return new short [] {
272
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
274
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
275
+ 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3,
276
+ 0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0,
277
+ 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3,
278
+ 0, 3, 0, 3, 0, 3, 3, 0, 0, 0, 3, 0,
279
+ 0, 0, 3, 0, 0, 0, 3, 0, 0, 0
280
+ };
281
+ }
282
+
283
+ private static final short _java_lexer_to_state_actions[] = init__java_lexer_to_state_actions_0();
284
+
285
+
286
+ private static short[] init__java_lexer_from_state_actions_0()
287
+ {
288
+ return new short [] {
289
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291
+ 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0,
292
+ 0, 0, 5, 0, 0, 5, 0, 0, 5, 0, 0, 5,
293
+ 0, 5, 0, 5, 0, 5, 0, 0, 0, 0, 0, 0,
294
+ 0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0, 5,
295
+ 0, 5, 0, 5, 0, 5, 5, 0, 0, 0, 5, 0,
296
+ 0, 0, 5, 0, 0, 0, 5, 0, 0, 0
297
+ };
298
+ }
299
+
300
+ private static final short _java_lexer_from_state_actions[] = init__java_lexer_from_state_actions_0();
301
+
302
+
303
+ private static short[] init__java_lexer_eof_trans_0()
304
+ {
305
+ return new short [] {
306
+ 0, 376, 376, 376, 376, 376, 376, 376, 376, 376, 376, 376,
307
+ 376, 376, 376, 376, 376, 377, 378, 385, 385, 385, 385, 385,
308
+ 385, 385, 391, 391, 391, 391, 391, 391, 0, 392, 393, 394,
309
+ 396, 396, 0, 398, 398, 0, 400, 400, 0, 402, 402, 0,
310
+ 403, 0, 404, 0, 405, 0, 406, 407, 417, 417, 417, 417,
311
+ 417, 417, 417, 417, 417, 417, 0, 418, 419, 0, 420, 0,
312
+ 421, 0, 422, 0, 423, 0, 0, 424, 425, 426, 0, 429,
313
+ 429, 429, 0, 432, 432, 432, 0, 435, 435, 435
314
+ };
315
+ }
316
+
317
+ private static final short _java_lexer_eof_trans[] = init__java_lexer_eof_trans_0();
318
+
319
+
320
+ static final int java_lexer_start = 32;
321
+ static final int java_lexer_first_final = 32;
322
+ static final int java_lexer_error = 0;
323
+
324
+ static final int java_lexer_en_comment_body = 38;
325
+ static final int java_lexer_en_cdata_body = 41;
326
+ static final int java_lexer_en_proc_ins_body = 44;
327
+ static final int java_lexer_en_string_squote = 47;
328
+ static final int java_lexer_en_string_dquote = 49;
329
+ static final int java_lexer_en_doctype_inline = 51;
330
+ static final int java_lexer_en_doctype = 53;
331
+ static final int java_lexer_en_xml_decl = 66;
332
+ static final int java_lexer_en_element_name = 69;
333
+ static final int java_lexer_en_element_close = 71;
334
+ static final int java_lexer_en_attribute_pre = 73;
335
+ static final int java_lexer_en_html_attribute_value = 75;
336
+ static final int java_lexer_en_xml_attribute_value = 77;
337
+ static final int java_lexer_en_element_head = 78;
338
+ static final int java_lexer_en_text = 82;
339
+ static final int java_lexer_en_html_script = 86;
340
+ static final int java_lexer_en_html_style = 90;
341
+ static final int java_lexer_en_main = 32;
342
+
343
+
344
+ // line 39 "ext/java/org/liboga/xml/Lexer.rl"
345
+
346
+ /* Used by Ragel to keep track of the current state. */
347
+ int act;
348
+ int cs;
349
+ int top;
350
+ int lines;
351
+ int[] stack;
352
+
353
+ /**
354
+ * Sets up the current class in the Ruby runtime.
355
+ */
356
+ public static void load(Ruby runtime)
357
+ {
358
+ RubyModule xml = (RubyModule) runtime.getModule("Oga")
359
+ .getConstant("XML");
360
+
361
+ RubyClass lexer = xml.defineClassUnder(
362
+ "Lexer",
363
+ runtime.getObject(),
364
+ ALLOCATOR
365
+ );
366
+
367
+ lexer.defineAnnotatedMethods(Lexer.class);
368
+ }
369
+
370
+ private static final ObjectAllocator ALLOCATOR = new ObjectAllocator()
371
+ {
372
+ public IRubyObject allocate(Ruby runtime, RubyClass klass)
373
+ {
374
+ return new org.liboga.xml.Lexer(runtime, klass);
375
+ }
376
+ };
377
+
378
+ public Lexer(Ruby runtime, RubyClass klass)
379
+ {
380
+ super(runtime, klass);
381
+
382
+ this.runtime = runtime;
383
+ }
384
+
385
+ /**
386
+ * Runs the bulk of the Ragel loop and calls back in to Ruby.
387
+ *
388
+ * This method pulls its data in from the instance variable `@data`. The
389
+ * Ruby side of the Lexer class should set this variable to a String in its
390
+ * constructor method. Encodings are passed along to make sure that token
391
+ * values share the same encoding as the input.
392
+ *
393
+ * This method always returns nil.
394
+ */
395
+ @JRubyMethod
396
+ public IRubyObject advance_native(ThreadContext context, RubyString rb_str)
397
+ {
398
+ Boolean html_p = this.callMethod(context, "html?").isTrue();
399
+
400
+ Encoding encoding = rb_str.getEncoding();
401
+
402
+ byte[] data = rb_str.getBytes();
403
+
404
+ int ts = 0;
405
+ int te = 0;
406
+ int p = 0;
407
+ int mark = 0;
408
+ int lines = this.lines;
409
+ int pe = data.length;
410
+ int eof = data.length;
411
+
412
+ String id_advance_line = "advance_line";
413
+ String id_on_attribute = "on_attribute";
414
+ String id_on_attribute_ns = "on_attribute_ns";
415
+ String id_on_cdata_start = "on_cdata_start";
416
+ String id_on_cdata_body = "on_cdata_body";
417
+ String id_on_cdata_end = "on_cdata_end";
418
+ String id_on_comment_start = "on_comment_start";
419
+ String id_on_comment_body = "on_comment_body";
420
+ String id_on_comment_end = "on_comment_end";
421
+ String id_on_doctype_end = "on_doctype_end";
422
+ String id_on_doctype_inline = "on_doctype_inline";
423
+ String id_on_doctype_name = "on_doctype_name";
424
+ String id_on_doctype_start = "on_doctype_start";
425
+ String id_on_doctype_type = "on_doctype_type";
426
+ String id_on_element_end = "on_element_end";
427
+ String id_on_element_name = "on_element_name";
428
+ String id_on_element_ns = "on_element_ns";
429
+ String id_on_element_open_end = "on_element_open_end";
430
+ String id_on_proc_ins_end = "on_proc_ins_end";
431
+ String id_on_proc_ins_name = "on_proc_ins_name";
432
+ String id_on_proc_ins_start = "on_proc_ins_start";
433
+ String id_on_proc_ins_body = "on_proc_ins_body";
434
+ String id_on_string_body = "on_string_body";
435
+ String id_on_string_dquote = "on_string_dquote";
436
+ String id_on_string_squote = "on_string_squote";
437
+ String id_on_text = "on_text";
438
+ String id_on_xml_decl_end = "on_xml_decl_end";
439
+ String id_on_xml_decl_start = "on_xml_decl_start";
440
+
441
+
442
+ // line 443 "ext/java/org/liboga/xml/Lexer.java"
443
+ {
444
+ int _klen;
445
+ int _trans = 0;
446
+ int _acts;
447
+ int _nacts;
448
+ int _keys;
449
+ int _goto_targ = 0;
450
+
451
+ _goto: while (true) {
452
+ switch ( _goto_targ ) {
453
+ case 0:
454
+ if ( p == pe ) {
455
+ _goto_targ = 4;
456
+ continue _goto;
457
+ }
458
+ if ( ( this.cs) == 0 ) {
459
+ _goto_targ = 5;
460
+ continue _goto;
461
+ }
462
+ case 1:
463
+ _acts = _java_lexer_from_state_actions[( this.cs)];
464
+ _nacts = (int) _java_lexer_actions[_acts++];
465
+ while ( _nacts-- > 0 ) {
466
+ switch ( _java_lexer_actions[_acts++] ) {
467
+ case 3:
468
+ // line 1 "NONE"
469
+ {ts = p;}
470
+ break;
471
+ // line 472 "ext/java/org/liboga/xml/Lexer.java"
472
+ }
473
+ }
474
+
475
+ _match: do {
476
+ _keys = _java_lexer_key_offsets[( this.cs)];
477
+ _trans = _java_lexer_index_offsets[( this.cs)];
478
+ _klen = _java_lexer_single_lengths[( this.cs)];
479
+ if ( _klen > 0 ) {
480
+ int _lower = _keys;
481
+ int _mid;
482
+ int _upper = _keys + _klen - 1;
483
+ while (true) {
484
+ if ( _upper < _lower )
485
+ break;
486
+
487
+ _mid = _lower + ((_upper-_lower) >> 1);
488
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
489
+ _upper = _mid - 1;
490
+ else if ( data[p] > _java_lexer_trans_keys[_mid] )
491
+ _lower = _mid + 1;
492
+ else {
493
+ _trans += (_mid - _keys);
494
+ break _match;
495
+ }
496
+ }
497
+ _keys += _klen;
498
+ _trans += _klen;
499
+ }
500
+
501
+ _klen = _java_lexer_range_lengths[( this.cs)];
502
+ if ( _klen > 0 ) {
503
+ int _lower = _keys;
504
+ int _mid;
505
+ int _upper = _keys + (_klen<<1) - 2;
506
+ while (true) {
507
+ if ( _upper < _lower )
508
+ break;
509
+
510
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
511
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
512
+ _upper = _mid - 2;
513
+ else if ( data[p] > _java_lexer_trans_keys[_mid+1] )
514
+ _lower = _mid + 2;
515
+ else {
516
+ _trans += ((_mid - _keys)>>1);
517
+ break _match;
518
+ }
519
+ }
520
+ _trans += _klen;
521
+ }
522
+ } while (false);
523
+
524
+ case 3:
525
+ ( this.cs) = _java_lexer_trans_targs[_trans];
526
+
527
+ if ( _java_lexer_trans_actions[_trans] != 0 ) {
528
+ _acts = _java_lexer_trans_actions[_trans];
529
+ _nacts = (int) _java_lexer_actions[_acts++];
530
+ while ( _nacts-- > 0 )
531
+ {
532
+ switch ( _java_lexer_actions[_acts++] )
533
+ {
534
+ case 0:
535
+ // line 56 "ext/ragel/base_lexer.rl"
536
+ {
537
+ if ( data[p] == '\n' ) lines++;
538
+ }
539
+ break;
540
+ case 1:
541
+ // line 589 "ext/ragel/base_lexer.rl"
542
+ { mark = p; }
543
+ break;
544
+ case 4:
545
+ // line 1 "NONE"
546
+ {te = p+1;}
547
+ break;
548
+ case 5:
549
+ // line 102 "ext/ragel/base_lexer.rl"
550
+ {te = p+1;{
551
+ callback_simple(id_on_comment_end);
552
+
553
+ ( this.cs) = 32;
554
+ }}
555
+ break;
556
+ case 6:
557
+ // line 91 "ext/ragel/base_lexer.rl"
558
+ {te = p;p--;{
559
+ callback(id_on_comment_body, data, encoding, ts, te);
560
+
561
+ if ( lines > 0 )
562
+ {
563
+ advance_line(lines);
564
+
565
+ lines = 0;
566
+ }
567
+ }}
568
+ break;
569
+ case 7:
570
+ // line 91 "ext/ragel/base_lexer.rl"
571
+ {{p = ((te))-1;}{
572
+ callback(id_on_comment_body, data, encoding, ts, te);
573
+
574
+ if ( lines > 0 )
575
+ {
576
+ advance_line(lines);
577
+
578
+ lines = 0;
579
+ }
580
+ }}
581
+ break;
582
+ case 8:
583
+ // line 141 "ext/ragel/base_lexer.rl"
584
+ {te = p+1;{
585
+ callback_simple(id_on_cdata_end);
586
+
587
+ ( this.cs) = 32;
588
+ }}
589
+ break;
590
+ case 9:
591
+ // line 130 "ext/ragel/base_lexer.rl"
592
+ {te = p;p--;{
593
+ callback(id_on_cdata_body, data, encoding, ts, te);
594
+
595
+ if ( lines > 0 )
596
+ {
597
+ advance_line(lines);
598
+
599
+ lines = 0;
600
+ }
601
+ }}
602
+ break;
603
+ case 10:
604
+ // line 130 "ext/ragel/base_lexer.rl"
605
+ {{p = ((te))-1;}{
606
+ callback(id_on_cdata_body, data, encoding, ts, te);
607
+
608
+ if ( lines > 0 )
609
+ {
610
+ advance_line(lines);
611
+
612
+ lines = 0;
613
+ }
614
+ }}
615
+ break;
616
+ case 11:
617
+ // line 184 "ext/ragel/base_lexer.rl"
618
+ {te = p+1;{
619
+ callback_simple(id_on_proc_ins_end);
620
+
621
+ ( this.cs) = 32;
622
+ }}
623
+ break;
624
+ case 12:
625
+ // line 173 "ext/ragel/base_lexer.rl"
626
+ {te = p;p--;{
627
+ callback(id_on_proc_ins_body, data, encoding, ts, te);
628
+
629
+ if ( lines > 0 )
630
+ {
631
+ advance_line(lines);
632
+
633
+ lines = 0;
634
+ }
635
+ }}
636
+ break;
637
+ case 13:
638
+ // line 226 "ext/ragel/base_lexer.rl"
639
+ {te = p+1;{
640
+ callback_simple(id_on_string_squote);
641
+
642
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
643
+ }}
644
+ break;
645
+ case 14:
646
+ // line 200 "ext/ragel/base_lexer.rl"
647
+ {te = p;p--;{
648
+ callback(id_on_string_body, data, encoding, ts, te);
649
+
650
+ if ( lines > 0 )
651
+ {
652
+ advance_line(lines);
653
+
654
+ lines = 0;
655
+ }
656
+ }}
657
+ break;
658
+ case 15:
659
+ // line 236 "ext/ragel/base_lexer.rl"
660
+ {te = p+1;{
661
+ callback_simple(id_on_string_dquote);
662
+
663
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
664
+ }}
665
+ break;
666
+ case 16:
667
+ // line 200 "ext/ragel/base_lexer.rl"
668
+ {te = p;p--;{
669
+ callback(id_on_string_body, data, encoding, ts, te);
670
+
671
+ if ( lines > 0 )
672
+ {
673
+ advance_line(lines);
674
+
675
+ lines = 0;
676
+ }
677
+ }}
678
+ break;
679
+ case 17:
680
+ // line 281 "ext/ragel/base_lexer.rl"
681
+ {te = p+1;{ ( this.cs) = 53; }}
682
+ break;
683
+ case 18:
684
+ // line 270 "ext/ragel/base_lexer.rl"
685
+ {te = p;p--;{
686
+ callback(id_on_doctype_inline, data, encoding, ts, te);
687
+
688
+ if ( lines > 0 )
689
+ {
690
+ advance_line(lines);
691
+
692
+ lines = 0;
693
+ }
694
+ }}
695
+ break;
696
+ case 19:
697
+ // line 287 "ext/ragel/base_lexer.rl"
698
+ {( this.act) = 13;}
699
+ break;
700
+ case 20:
701
+ // line 298 "ext/ragel/base_lexer.rl"
702
+ {( this.act) = 17;}
703
+ break;
704
+ case 21:
705
+ // line 292 "ext/ragel/base_lexer.rl"
706
+ {te = p+1;{ ( this.cs) = 51; }}
707
+ break;
708
+ case 22:
709
+ // line 211 "ext/ragel/base_lexer.rl"
710
+ {te = p+1;{
711
+ callback_simple(id_on_string_squote);
712
+
713
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 47; _goto_targ = 2; if (true) continue _goto;}
714
+ }}
715
+ break;
716
+ case 23:
717
+ // line 217 "ext/ragel/base_lexer.rl"
718
+ {te = p+1;{
719
+ callback_simple(id_on_string_dquote);
720
+
721
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 49; _goto_targ = 2; if (true) continue _goto;}
722
+ }}
723
+ break;
724
+ case 24:
725
+ // line 302 "ext/ragel/base_lexer.rl"
726
+ {te = p+1;{
727
+ callback_simple(id_on_doctype_end);
728
+ ( this.cs) = 32;
729
+ }}
730
+ break;
731
+ case 25:
732
+ // line 60 "ext/ragel/base_lexer.rl"
733
+ {te = p+1;{
734
+ advance_line(1);
735
+ }}
736
+ break;
737
+ case 26:
738
+ // line 309 "ext/ragel/base_lexer.rl"
739
+ {te = p+1;}
740
+ break;
741
+ case 27:
742
+ // line 298 "ext/ragel/base_lexer.rl"
743
+ {te = p;p--;{
744
+ callback(id_on_doctype_name, data, encoding, ts, te);
745
+ }}
746
+ break;
747
+ case 28:
748
+ // line 60 "ext/ragel/base_lexer.rl"
749
+ {te = p;p--;{
750
+ advance_line(1);
751
+ }}
752
+ break;
753
+ case 29:
754
+ // line 1 "NONE"
755
+ { switch( ( this.act) ) {
756
+ case 13:
757
+ {{p = ((te))-1;}
758
+ callback(id_on_doctype_type, data, encoding, ts, te);
759
+ }
760
+ break;
761
+ case 17:
762
+ {{p = ((te))-1;}
763
+ callback(id_on_doctype_name, data, encoding, ts, te);
764
+ }
765
+ break;
766
+ }
767
+ }
768
+ break;
769
+ case 30:
770
+ // line 326 "ext/ragel/base_lexer.rl"
771
+ {te = p+1;{
772
+ if ( lines > 0 )
773
+ {
774
+ advance_line(lines);
775
+
776
+ lines = 0;
777
+ }
778
+
779
+ callback_simple(id_on_xml_decl_end);
780
+
781
+ ( this.cs) = 32;
782
+ }}
783
+ break;
784
+ case 31:
785
+ // line 211 "ext/ragel/base_lexer.rl"
786
+ {te = p+1;{
787
+ callback_simple(id_on_string_squote);
788
+
789
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 47; _goto_targ = 2; if (true) continue _goto;}
790
+ }}
791
+ break;
792
+ case 32:
793
+ // line 217 "ext/ragel/base_lexer.rl"
794
+ {te = p+1;{
795
+ callback_simple(id_on_string_dquote);
796
+
797
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 49; _goto_targ = 2; if (true) continue _goto;}
798
+ }}
799
+ break;
800
+ case 33:
801
+ // line 354 "ext/ragel/base_lexer.rl"
802
+ {te = p+1;}
803
+ break;
804
+ case 34:
805
+ // line 340 "ext/ragel/base_lexer.rl"
806
+ {te = p;p--;{
807
+ if ( lines > 0 )
808
+ {
809
+ advance_line(lines);
810
+
811
+ lines = 0;
812
+ }
813
+
814
+ callback(id_on_attribute, data, encoding, ts, te);
815
+ }}
816
+ break;
817
+ case 35:
818
+ // line 354 "ext/ragel/base_lexer.rl"
819
+ {te = p;p--;}
820
+ break;
821
+ case 36:
822
+ // line 390 "ext/ragel/base_lexer.rl"
823
+ {te = p+1;{
824
+ callback(id_on_element_ns, data, encoding, ts, te - 1);
825
+ }}
826
+ break;
827
+ case 37:
828
+ // line 394 "ext/ragel/base_lexer.rl"
829
+ {te = p;p--;{
830
+ callback(id_on_element_name, data, encoding, ts, te);
831
+ ( this.cs) = 78;
832
+ }}
833
+ break;
834
+ case 38:
835
+ // line 404 "ext/ragel/base_lexer.rl"
836
+ {te = p+1;}
837
+ break;
838
+ case 39:
839
+ // line 408 "ext/ragel/base_lexer.rl"
840
+ {te = p+1;{
841
+ if ( lines > 0 )
842
+ {
843
+ advance_line(lines);
844
+
845
+ lines = 0;
846
+ }
847
+
848
+ ( this.cs) = 32;
849
+ }}
850
+ break;
851
+ case 40:
852
+ // line 419 "ext/ragel/base_lexer.rl"
853
+ {te = p+1;}
854
+ break;
855
+ case 41:
856
+ // line 375 "ext/ragel/base_lexer.rl"
857
+ {te = p;p--;{
858
+ callback(id_on_element_end, data, encoding, ts, te);
859
+ }}
860
+ break;
861
+ case 42:
862
+ // line 432 "ext/ragel/base_lexer.rl"
863
+ {te = p+1;}
864
+ break;
865
+ case 43:
866
+ // line 434 "ext/ragel/base_lexer.rl"
867
+ {te = p+1;{
868
+ p--;
869
+
870
+ if ( lines > 0 )
871
+ {
872
+ advance_line(lines);
873
+
874
+ lines = 0;
875
+ }
876
+
877
+ if ( html_p )
878
+ {
879
+ ( this.cs) = 75;
880
+ }
881
+ else
882
+ {
883
+ ( this.cs) = 77;
884
+ }
885
+ }}
886
+ break;
887
+ case 44:
888
+ // line 432 "ext/ragel/base_lexer.rl"
889
+ {te = p;p--;}
890
+ break;
891
+ case 45:
892
+ // line 464 "ext/ragel/base_lexer.rl"
893
+ {( this.act) = 35;}
894
+ break;
895
+ case 46:
896
+ // line 64 "ext/ragel/base_lexer.rl"
897
+ {( this.act) = 36;}
898
+ break;
899
+ case 47:
900
+ // line 457 "ext/ragel/base_lexer.rl"
901
+ {te = p+1;{
902
+ p--;
903
+ ( this.cs) = 77;
904
+ }}
905
+ break;
906
+ case 48:
907
+ // line 64 "ext/ragel/base_lexer.rl"
908
+ {te = p+1;{
909
+ p--;
910
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
911
+ }}
912
+ break;
913
+ case 49:
914
+ // line 1 "NONE"
915
+ { switch( ( this.act) ) {
916
+ case 35:
917
+ {{p = ((te))-1;}
918
+ callback_simple(id_on_string_squote);
919
+
920
+ callback(id_on_string_body, data, encoding, ts, te);
921
+
922
+ callback_simple(id_on_string_squote);
923
+ }
924
+ break;
925
+ case 36:
926
+ {{p = ((te))-1;}
927
+ p--;
928
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
929
+ }
930
+ break;
931
+ }
932
+ }
933
+ break;
934
+ case 50:
935
+ // line 480 "ext/ragel/base_lexer.rl"
936
+ {te = p+1;{
937
+ callback_simple(id_on_string_squote);
938
+
939
+ ( this.cs) = 47;
940
+ }}
941
+ break;
942
+ case 51:
943
+ // line 486 "ext/ragel/base_lexer.rl"
944
+ {te = p+1;{
945
+ callback_simple(id_on_string_dquote);
946
+
947
+ ( this.cs) = 49;
948
+ }}
949
+ break;
950
+ case 52:
951
+ // line 64 "ext/ragel/base_lexer.rl"
952
+ {te = p+1;{
953
+ p--;
954
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
955
+ }}
956
+ break;
957
+ case 53:
958
+ // line 60 "ext/ragel/base_lexer.rl"
959
+ {te = p+1;{
960
+ advance_line(1);
961
+ }}
962
+ break;
963
+ case 54:
964
+ // line 501 "ext/ragel/base_lexer.rl"
965
+ {te = p+1;{
966
+ callback(id_on_attribute_ns, data, encoding, ts, te - 1);
967
+ }}
968
+ break;
969
+ case 55:
970
+ // line 510 "ext/ragel/base_lexer.rl"
971
+ {te = p+1;{
972
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 73; _goto_targ = 2; if (true) continue _goto;}
973
+ }}
974
+ break;
975
+ case 56:
976
+ // line 515 "ext/ragel/base_lexer.rl"
977
+ {te = p+1;{
978
+ callback_simple(id_on_element_open_end);
979
+
980
+ if ( html_script_p() )
981
+ {
982
+ ( this.cs) = 86;
983
+ }
984
+ else if ( html_style_p() )
985
+ {
986
+ ( this.cs) = 90;
987
+ }
988
+ else
989
+ {
990
+ ( this.cs) = 32;
991
+ }
992
+ }}
993
+ break;
994
+ case 57:
995
+ // line 533 "ext/ragel/base_lexer.rl"
996
+ {te = p+1;{
997
+ callback_simple(id_on_element_end);
998
+ ( this.cs) = 32;
999
+ }}
1000
+ break;
1001
+ case 58:
1002
+ // line 538 "ext/ragel/base_lexer.rl"
1003
+ {te = p+1;}
1004
+ break;
1005
+ case 59:
1006
+ // line 60 "ext/ragel/base_lexer.rl"
1007
+ {te = p;p--;{
1008
+ advance_line(1);
1009
+ }}
1010
+ break;
1011
+ case 60:
1012
+ // line 505 "ext/ragel/base_lexer.rl"
1013
+ {te = p;p--;{
1014
+ callback(id_on_attribute, data, encoding, ts, te);
1015
+ }}
1016
+ break;
1017
+ case 61:
1018
+ // line 538 "ext/ragel/base_lexer.rl"
1019
+ {te = p;p--;}
1020
+ break;
1021
+ case 62:
1022
+ // line 575 "ext/ragel/base_lexer.rl"
1023
+ {te = p+1;{
1024
+ callback(id_on_text, data, encoding, ts, te);
1025
+
1026
+ if ( lines > 0 )
1027
+ {
1028
+ advance_line(lines);
1029
+
1030
+ lines = 0;
1031
+ }
1032
+
1033
+ ( this.cs) = 32;
1034
+ }}
1035
+ break;
1036
+ case 63:
1037
+ // line 589 "ext/ragel/base_lexer.rl"
1038
+ {te = p+1;{
1039
+ callback(id_on_text, data, encoding, ts, mark);
1040
+
1041
+ p = mark - 1;
1042
+ mark = 0;
1043
+
1044
+ if ( lines > 0 )
1045
+ {
1046
+ advance_line(lines);
1047
+
1048
+ lines = 0;
1049
+ }
1050
+
1051
+ ( this.cs) = 32;
1052
+ }}
1053
+ break;
1054
+ case 64:
1055
+ // line 575 "ext/ragel/base_lexer.rl"
1056
+ {te = p;p--;{
1057
+ callback(id_on_text, data, encoding, ts, te);
1058
+
1059
+ if ( lines > 0 )
1060
+ {
1061
+ advance_line(lines);
1062
+
1063
+ lines = 0;
1064
+ }
1065
+
1066
+ ( this.cs) = 32;
1067
+ }}
1068
+ break;
1069
+ case 65:
1070
+ // line 379 "ext/ragel/base_lexer.rl"
1071
+ {te = p+1;{
1072
+ callback_simple(id_on_element_end);
1073
+
1074
+ ( this.cs) = 32;
1075
+ }}
1076
+ break;
1077
+ case 66:
1078
+ // line 563 "ext/ragel/base_lexer.rl"
1079
+ {te = p;p--;{
1080
+ callback(id_on_text, data, encoding, ts, te);
1081
+
1082
+ if ( lines > 0 )
1083
+ {
1084
+ advance_line(lines);
1085
+
1086
+ lines = 0;
1087
+ }
1088
+ }}
1089
+ break;
1090
+ case 67:
1091
+ // line 563 "ext/ragel/base_lexer.rl"
1092
+ {{p = ((te))-1;}{
1093
+ callback(id_on_text, data, encoding, ts, te);
1094
+
1095
+ if ( lines > 0 )
1096
+ {
1097
+ advance_line(lines);
1098
+
1099
+ lines = 0;
1100
+ }
1101
+ }}
1102
+ break;
1103
+ case 68:
1104
+ // line 379 "ext/ragel/base_lexer.rl"
1105
+ {te = p+1;{
1106
+ callback_simple(id_on_element_end);
1107
+
1108
+ ( this.cs) = 32;
1109
+ }}
1110
+ break;
1111
+ case 69:
1112
+ // line 563 "ext/ragel/base_lexer.rl"
1113
+ {te = p;p--;{
1114
+ callback(id_on_text, data, encoding, ts, te);
1115
+
1116
+ if ( lines > 0 )
1117
+ {
1118
+ advance_line(lines);
1119
+
1120
+ lines = 0;
1121
+ }
1122
+ }}
1123
+ break;
1124
+ case 70:
1125
+ // line 563 "ext/ragel/base_lexer.rl"
1126
+ {{p = ((te))-1;}{
1127
+ callback(id_on_text, data, encoding, ts, te);
1128
+
1129
+ if ( lines > 0 )
1130
+ {
1131
+ advance_line(lines);
1132
+
1133
+ lines = 0;
1134
+ }
1135
+ }}
1136
+ break;
1137
+ case 71:
1138
+ // line 319 "ext/ragel/base_lexer.rl"
1139
+ {( this.act) = 54;}
1140
+ break;
1141
+ case 72:
1142
+ // line 165 "ext/ragel/base_lexer.rl"
1143
+ {( this.act) = 57;}
1144
+ break;
1145
+ case 73:
1146
+ // line 84 "ext/ragel/base_lexer.rl"
1147
+ {te = p+1;{
1148
+ callback_simple(id_on_comment_start);
1149
+
1150
+ ( this.cs) = 38;
1151
+ }}
1152
+ break;
1153
+ case 74:
1154
+ // line 123 "ext/ragel/base_lexer.rl"
1155
+ {te = p+1;{
1156
+ callback_simple(id_on_cdata_start);
1157
+
1158
+ ( this.cs) = 41;
1159
+ }}
1160
+ break;
1161
+ case 75:
1162
+ // line 366 "ext/ragel/base_lexer.rl"
1163
+ {te = p+1;{
1164
+ p--;
1165
+ ( this.cs) = 69;
1166
+ }}
1167
+ break;
1168
+ case 76:
1169
+ // line 371 "ext/ragel/base_lexer.rl"
1170
+ {te = p+1;{
1171
+ ( this.cs) = 71;
1172
+ }}
1173
+ break;
1174
+ case 77:
1175
+ // line 549 "ext/ragel/base_lexer.rl"
1176
+ {te = p+1;{
1177
+ p--;
1178
+ ( this.cs) = 82;
1179
+ }}
1180
+ break;
1181
+ case 78:
1182
+ // line 255 "ext/ragel/base_lexer.rl"
1183
+ {te = p;p--;{
1184
+ callback_simple(id_on_doctype_start);
1185
+
1186
+ if ( lines > 0 )
1187
+ {
1188
+ advance_line(lines);
1189
+
1190
+ lines = 0;
1191
+ }
1192
+
1193
+ ( this.cs) = 53;
1194
+ }}
1195
+ break;
1196
+ case 79:
1197
+ // line 165 "ext/ragel/base_lexer.rl"
1198
+ {te = p;p--;{
1199
+ callback_simple(id_on_proc_ins_start);
1200
+ callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
1201
+
1202
+ ( this.cs) = 44;
1203
+ }}
1204
+ break;
1205
+ case 80:
1206
+ // line 549 "ext/ragel/base_lexer.rl"
1207
+ {te = p;p--;{
1208
+ p--;
1209
+ ( this.cs) = 82;
1210
+ }}
1211
+ break;
1212
+ case 81:
1213
+ // line 549 "ext/ragel/base_lexer.rl"
1214
+ {{p = ((te))-1;}{
1215
+ p--;
1216
+ ( this.cs) = 82;
1217
+ }}
1218
+ break;
1219
+ case 82:
1220
+ // line 1 "NONE"
1221
+ { switch( ( this.act) ) {
1222
+ case 54:
1223
+ {{p = ((te))-1;}
1224
+ callback_simple(id_on_xml_decl_start);
1225
+ ( this.cs) = 66;
1226
+ }
1227
+ break;
1228
+ case 57:
1229
+ {{p = ((te))-1;}
1230
+ callback_simple(id_on_proc_ins_start);
1231
+ callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
1232
+
1233
+ ( this.cs) = 44;
1234
+ }
1235
+ break;
1236
+ }
1237
+ }
1238
+ break;
1239
+ // line 1240 "ext/java/org/liboga/xml/Lexer.java"
1240
+ }
1241
+ }
1242
+ }
1243
+
1244
+ case 2:
1245
+ _acts = _java_lexer_to_state_actions[( this.cs)];
1246
+ _nacts = (int) _java_lexer_actions[_acts++];
1247
+ while ( _nacts-- > 0 ) {
1248
+ switch ( _java_lexer_actions[_acts++] ) {
1249
+ case 2:
1250
+ // line 1 "NONE"
1251
+ {ts = -1;}
1252
+ break;
1253
+ // line 1254 "ext/java/org/liboga/xml/Lexer.java"
1254
+ }
1255
+ }
1256
+
1257
+ if ( ( this.cs) == 0 ) {
1258
+ _goto_targ = 5;
1259
+ continue _goto;
1260
+ }
1261
+ if ( ++p != pe ) {
1262
+ _goto_targ = 1;
1263
+ continue _goto;
1264
+ }
1265
+ case 4:
1266
+ if ( p == eof )
1267
+ {
1268
+ if ( _java_lexer_eof_trans[( this.cs)] > 0 ) {
1269
+ _trans = _java_lexer_eof_trans[( this.cs)] - 1;
1270
+ _goto_targ = 3;
1271
+ continue _goto;
1272
+ }
1273
+ }
1274
+
1275
+ case 5:
1276
+ }
1277
+ break; }
1278
+ }
1279
+
1280
+ // line 136 "ext/java/org/liboga/xml/Lexer.rl"
1281
+
1282
+ this.lines = lines;
1283
+
1284
+ return context.nil;
1285
+ }
1286
+
1287
+ /**
1288
+ * Resets the internal state of the lexer.
1289
+ */
1290
+ @JRubyMethod
1291
+ public IRubyObject reset_native(ThreadContext context)
1292
+ {
1293
+ this.act = 0;
1294
+ this.top = 0;
1295
+ this.stack = new int[4];
1296
+ this.cs = java_lexer_start;
1297
+
1298
+ return context.nil;
1299
+ }
1300
+
1301
+ /**
1302
+ * Calls back in to Ruby land passing the current token value along.
1303
+ *
1304
+ * This method calls back in to Ruby land based on the method name
1305
+ * specified in `name`. The Ruby callback should take one argument. This
1306
+ * argument will be a String containing the value of the current token.
1307
+ */
1308
+ public void callback(String name, byte[] data, Encoding enc, int ts, int te)
1309
+ {
1310
+ ByteList bytelist = new ByteList(data, ts, te - ts, enc, true);
1311
+
1312
+ RubyString value = this.runtime.newString(bytelist);
1313
+
1314
+ ThreadContext context = this.runtime.getCurrentContext();
1315
+
1316
+ this.callMethod(context, name, value);
1317
+ }
1318
+
1319
+ /**
1320
+ * Calls back in to Ruby land without passing any arguments.
1321
+ */
1322
+ public void callback_simple(String name)
1323
+ {
1324
+ ThreadContext context = this.runtime.getCurrentContext();
1325
+
1326
+ this.callMethod(context, name);
1327
+ }
1328
+
1329
+ /**
1330
+ * Advances the line number by `amount` lines.
1331
+ */
1332
+ public void advance_line(int amount)
1333
+ {
1334
+ ThreadContext context = this.runtime.getCurrentContext();
1335
+ RubyFixnum lines = this.runtime.newFixnum(amount);
1336
+
1337
+ this.callMethod(context, "advance_line", lines);
1338
+ }
1339
+
1340
+ /**
1341
+ * @see Oga::XML::Lexer#html_script?
1342
+ */
1343
+ public Boolean html_script_p()
1344
+ {
1345
+ ThreadContext context = this.runtime.getCurrentContext();
1346
+
1347
+ return this.callMethod(context, "html_script?").isTrue();
1348
+ }
1349
+
1350
+ /**
1351
+ * @see Oga::XML::Lexer#html_style?
1352
+ */
1353
+ public Boolean html_style_p()
1354
+ {
1355
+ ThreadContext context = this.runtime.getCurrentContext();
1356
+
1357
+ return this.callMethod(context, "html_style?").isTrue();
1358
+ }
1359
+ }
1360
+
1361
+
1362
+ // line 223 "ext/java/org/liboga/xml/Lexer.rl"
1363
+