rubyjedi-oga 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +362 -0
  4. data/README.md +317 -0
  5. data/doc/css/common.css +77 -0
  6. data/doc/css_selectors.md +935 -0
  7. data/doc/manually_creating_documents.md +67 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/doc/xml_namespaces.md +63 -0
  10. data/ext/c/extconf.rb +11 -0
  11. data/ext/c/lexer.c +2595 -0
  12. data/ext/c/lexer.h +16 -0
  13. data/ext/c/lexer.rl +198 -0
  14. data/ext/c/liboga.c +6 -0
  15. data/ext/c/liboga.h +11 -0
  16. data/ext/java/Liboga.java +14 -0
  17. data/ext/java/org/liboga/xml/Lexer.java +1363 -0
  18. data/ext/java/org/liboga/xml/Lexer.rl +223 -0
  19. data/ext/ragel/base_lexer.rl +633 -0
  20. data/lib/oga.rb +57 -0
  21. data/lib/oga/blacklist.rb +40 -0
  22. data/lib/oga/css/lexer.rb +743 -0
  23. data/lib/oga/css/parser.rb +976 -0
  24. data/lib/oga/entity_decoder.rb +21 -0
  25. data/lib/oga/html/entities.rb +2150 -0
  26. data/lib/oga/html/parser.rb +25 -0
  27. data/lib/oga/html/sax_parser.rb +18 -0
  28. data/lib/oga/lru.rb +160 -0
  29. data/lib/oga/oga.rb +57 -0
  30. data/lib/oga/version.rb +3 -0
  31. data/lib/oga/whitelist.rb +20 -0
  32. data/lib/oga/xml/attribute.rb +136 -0
  33. data/lib/oga/xml/cdata.rb +17 -0
  34. data/lib/oga/xml/character_node.rb +37 -0
  35. data/lib/oga/xml/comment.rb +17 -0
  36. data/lib/oga/xml/default_namespace.rb +13 -0
  37. data/lib/oga/xml/doctype.rb +82 -0
  38. data/lib/oga/xml/document.rb +108 -0
  39. data/lib/oga/xml/element.rb +428 -0
  40. data/lib/oga/xml/entities.rb +122 -0
  41. data/lib/oga/xml/html_void_elements.rb +15 -0
  42. data/lib/oga/xml/lexer.rb +550 -0
  43. data/lib/oga/xml/namespace.rb +48 -0
  44. data/lib/oga/xml/node.rb +219 -0
  45. data/lib/oga/xml/node_set.rb +333 -0
  46. data/lib/oga/xml/parser.rb +631 -0
  47. data/lib/oga/xml/processing_instruction.rb +37 -0
  48. data/lib/oga/xml/pull_parser.rb +175 -0
  49. data/lib/oga/xml/querying.rb +56 -0
  50. data/lib/oga/xml/sax_parser.rb +192 -0
  51. data/lib/oga/xml/text.rb +66 -0
  52. data/lib/oga/xml/traversal.rb +50 -0
  53. data/lib/oga/xml/xml_declaration.rb +65 -0
  54. data/lib/oga/xpath/evaluator.rb +1798 -0
  55. data/lib/oga/xpath/lexer.rb +1958 -0
  56. data/lib/oga/xpath/parser.rb +622 -0
  57. data/oga.gemspec +45 -0
  58. metadata +227 -0
@@ -0,0 +1,16 @@
1
+ #ifndef LIBOGA_XML_LEXER_H
2
+ #define LIBOGA_XML_LEXER_H
3
+
4
+ #include "liboga.h"
5
+
6
+ extern void Init_liboga_xml_lexer();
7
+
8
+ typedef struct {
9
+ int act;
10
+ int cs;
11
+ int lines;
12
+ int stack[4];
13
+ int top;
14
+ } OgaLexerState;
15
+
16
+ #endif
@@ -0,0 +1,198 @@
1
+ #include "lexer.h"
2
+
3
+ /*
4
+ The following two macros allow the Ragel grammar to use generic function calls
5
+ without relying on the setup of the C or Java lexer. Using these macros we can
6
+ also pass along `self` to the callback functions without having to hard-code
7
+ this in to the Ragel grammar.
8
+
9
+ In the C lexer we don't need the `data` variable (since this is pulled in based
10
+ on `ts` and `te`) so the macro ignores this argument.
11
+ */
12
+
13
+ #define callback(name, data, encoding, start, stop) \
14
+ liboga_xml_lexer_callback(self, name, encoding, start, stop);
15
+
16
+ #define callback_simple(name) \
17
+ liboga_xml_lexer_callback_simple(self, name);
18
+
19
+ #define advance_line(amount) \
20
+ rb_funcall(self, id_advance_line, 1, INT2NUM(amount));
21
+
22
+ #define html_script_p() \
23
+ rb_funcall(self, id_html_script_p, 0) == Qtrue
24
+
25
+ #define html_style_p() \
26
+ rb_funcall(self, id_html_style_p, 0) == Qtrue
27
+
28
+ ID id_advance_line;
29
+ ID id_html_script_p;
30
+ ID id_html_style_p;
31
+ ID id_html_p;
32
+
33
+ %%machine c_lexer;
34
+
35
+ /**
36
+ * Calls a method defined in the Ruby side of the lexer. The String value is
37
+ * created based on the values of `ts` and `te` and uses the encoding specified
38
+ * in `encoding`.
39
+ *
40
+ * @example
41
+ * rb_encoding *encoding = rb_enc_get(...);
42
+ * liboga_xml_lexer_callback(self, "on_string", encoding, ts, te);
43
+ */
44
+ void liboga_xml_lexer_callback(
45
+ VALUE self,
46
+ ID name,
47
+ rb_encoding *encoding,
48
+ const char *ts,
49
+ const char *te
50
+ )
51
+ {
52
+ VALUE value = rb_enc_str_new(ts, te - ts, encoding);
53
+
54
+ rb_funcall(self, name, 1, value);
55
+ }
56
+
57
+ /**
58
+ * Calls a method defined in the Ruby side of the lexer without passing it any
59
+ * arguments.
60
+ *
61
+ * @example
62
+ * liboga_xml_lexer_callback_simple(self, "on_cdata_start");
63
+ */
64
+ void liboga_xml_lexer_callback_simple(VALUE self, VALUE name)
65
+ {
66
+ rb_funcall(self, name, 0);
67
+ }
68
+
69
+ %% write data;
70
+
71
+ /**
72
+ * Lexes the String specifies as the method argument. Token values have the
73
+ * same encoding as the input value.
74
+ *
75
+ * This method keeps track of an internal state using the instance variables
76
+ * `@act` and `@cs`.
77
+ */
78
+ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
79
+ {
80
+ OgaLexerState *state;
81
+ int lines;
82
+
83
+ /* Whether or not HTML mode is enabled */
84
+ int html_p = rb_funcall(self, id_html_p, 0) == Qtrue;
85
+
86
+ /* Make sure that all data passed back to Ruby has the proper encoding. */
87
+ rb_encoding *encoding = rb_enc_get(data_block);
88
+
89
+ char *data_str_val = StringValueCStr(data_block);
90
+
91
+ const char *p = data_str_val;
92
+ const char *pe = data_str_val + strlen(data_str_val);
93
+ const char *eof = pe;
94
+ const char *ts = 0;
95
+ const char *te = 0;
96
+ const char *mark = 0;
97
+
98
+ ID id_advance_line = rb_intern("advance_line");
99
+ ID id_on_attribute = rb_intern("on_attribute");
100
+ ID id_on_attribute_ns = rb_intern("on_attribute_ns");
101
+ ID id_on_cdata_start = rb_intern("on_cdata_start");
102
+ ID id_on_cdata_body = rb_intern("on_cdata_body");
103
+ ID id_on_cdata_end = rb_intern("on_cdata_end");
104
+ ID id_on_comment_start = rb_intern("on_comment_start");
105
+ ID id_on_comment_body = rb_intern("on_comment_body");
106
+ ID id_on_comment_end = rb_intern("on_comment_end");
107
+ ID id_on_doctype_end = rb_intern("on_doctype_end");
108
+ ID id_on_doctype_inline = rb_intern("on_doctype_inline");
109
+ ID id_on_doctype_name = rb_intern("on_doctype_name");
110
+ ID id_on_doctype_start = rb_intern("on_doctype_start");
111
+ ID id_on_doctype_type = rb_intern("on_doctype_type");
112
+ ID id_on_element_end = rb_intern("on_element_end");
113
+ ID id_on_element_name = rb_intern("on_element_name");
114
+ ID id_on_element_ns = rb_intern("on_element_ns");
115
+ ID id_on_element_open_end = rb_intern("on_element_open_end");
116
+ ID id_on_proc_ins_end = rb_intern("on_proc_ins_end");
117
+ ID id_on_proc_ins_name = rb_intern("on_proc_ins_name");
118
+ ID id_on_proc_ins_start = rb_intern("on_proc_ins_start");
119
+ ID id_on_proc_ins_body = rb_intern("on_proc_ins_body");
120
+ ID id_on_string_body = rb_intern("on_string_body");
121
+ ID id_on_string_dquote = rb_intern("on_string_dquote");
122
+ ID id_on_string_squote = rb_intern("on_string_squote");
123
+ ID id_on_text = rb_intern("on_text");
124
+ ID id_on_xml_decl_end = rb_intern("on_xml_decl_end");
125
+ ID id_on_xml_decl_start = rb_intern("on_xml_decl_start");
126
+
127
+ Data_Get_Struct(self, OgaLexerState, state);
128
+
129
+ lines = state->lines;
130
+
131
+ %% write exec;
132
+
133
+ state->lines = lines;
134
+
135
+ return Qnil;
136
+ }
137
+
138
+ /**
139
+ * Resets the internal state of the lexer.
140
+ */
141
+ VALUE oga_xml_lexer_reset(VALUE self)
142
+ {
143
+ OgaLexerState *state;
144
+
145
+ Data_Get_Struct(self, OgaLexerState, state);
146
+
147
+ state->act = 0;
148
+ state->cs = c_lexer_start;
149
+ state->lines = 0;
150
+ state->top = 0;
151
+
152
+ return Qnil;
153
+ }
154
+
155
+ /**
156
+ * Frees the associated lexer state struct.
157
+ */
158
+ void oga_xml_lexer_free(void *state)
159
+ {
160
+ free((OgaLexerState *) state);
161
+ }
162
+
163
+ /**
164
+ * Allocates and wraps the C lexer state struct. This state is used to keep
165
+ * track of the current position, line numbers, etc.
166
+ */
167
+ VALUE oga_xml_lexer_allocate(VALUE klass)
168
+ {
169
+ OgaLexerState *state = malloc(sizeof(OgaLexerState));
170
+
171
+ return Data_Wrap_Struct(klass, NULL, oga_xml_lexer_free, state);
172
+ }
173
+
174
+ %%{
175
+ include base_lexer "base_lexer.rl";
176
+
177
+ variable top state->top;
178
+ variable stack state->stack;
179
+ variable act state->act;
180
+ variable cs state->cs;
181
+ }%%
182
+
183
+ void Init_liboga_xml_lexer()
184
+ {
185
+ VALUE mOga = rb_const_get(rb_cObject, rb_intern("Oga"));
186
+ VALUE mXML = rb_const_get(mOga, rb_intern("XML"));
187
+ VALUE cLexer = rb_define_class_under(mXML, "Lexer", rb_cObject);
188
+
189
+ id_advance_line = rb_intern("advance_line");
190
+ id_html_script_p = rb_intern("html_script?");
191
+ id_html_style_p = rb_intern("html_style?");
192
+ id_html_p = rb_intern("html?");
193
+
194
+ rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
195
+ rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
196
+
197
+ rb_define_alloc_func(cLexer, oga_xml_lexer_allocate);
198
+ }
@@ -0,0 +1,6 @@
1
+ #include "liboga.h"
2
+
3
+ void Init_liboga()
4
+ {
5
+ Init_liboga_xml_lexer();
6
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef LIBOGA_H
2
+ #define LIBOGA_H
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+
7
+ #include "lexer.h"
8
+
9
+ void Init_liboga();
10
+
11
+ #endif
@@ -0,0 +1,14 @@
1
+ package org.liboga;
2
+
3
+ import org.jruby.Ruby;
4
+
5
+ public class Liboga
6
+ {
7
+ /**
8
+ * Bootstraps the JRuby extension.
9
+ */
10
+ public static void load(final Ruby runtime)
11
+ {
12
+ org.liboga.xml.Lexer.load(runtime);
13
+ }
14
+ }
@@ -0,0 +1,1363 @@
1
+
2
+ // line 1 "ext/java/org/liboga/xml/Lexer.rl"
3
+ package org.liboga.xml;
4
+
5
+
6
+ // line 4 "ext/java/org/liboga/xml/Lexer.rl"
7
+
8
+ import java.io.IOException;
9
+
10
+ import org.jcodings.Encoding;
11
+
12
+ import org.jruby.Ruby;
13
+ import org.jruby.RubyModule;
14
+ import org.jruby.RubyClass;
15
+ import org.jruby.RubyObject;
16
+ import org.jruby.RubyString;
17
+ import org.jruby.RubyFixnum;
18
+ import org.jruby.util.ByteList;
19
+ import org.jruby.anno.JRubyClass;
20
+ import org.jruby.anno.JRubyMethod;
21
+ import org.jruby.runtime.ThreadContext;
22
+ import org.jruby.runtime.ObjectAllocator;
23
+ import org.jruby.runtime.builtin.IRubyObject;
24
+
25
+ /**
26
+ * Lexer support class for JRuby.
27
+ *
28
+ * The Lexer class contains the raw Ragel loop and calls back in to Ruby land
29
+ * whenever a Ragel action is needed similar to the C extension setup.
30
+ *
31
+ * This class requires Ruby land to first define the `Oga::XML` namespace.
32
+ */
33
+ @JRubyClass(name="Oga::XML::Lexer", parent="Object")
34
+ public class Lexer extends RubyObject
35
+ {
36
+ /**
37
+ * The current Ruby runtime.
38
+ */
39
+ private Ruby runtime;
40
+
41
+
42
+ // line 43 "ext/java/org/liboga/xml/Lexer.java"
43
+ private static byte[] init__java_lexer_actions_0()
44
+ {
45
+ return new byte [] {
46
+ 0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1,
47
+ 6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1,
48
+ 12, 1, 13, 1, 14, 1, 15, 1, 16, 1, 17, 1,
49
+ 18, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1,
50
+ 26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 34, 1,
51
+ 35, 1, 36, 1, 37, 1, 38, 1, 41, 1, 43, 1,
52
+ 44, 1, 47, 1, 48, 1, 49, 1, 50, 1, 51, 1,
53
+ 52, 1, 53, 1, 54, 1, 55, 1, 56, 1, 57, 1,
54
+ 58, 1, 59, 1, 60, 1, 61, 1, 62, 1, 63, 1,
55
+ 64, 1, 65, 1, 66, 1, 67, 1, 68, 1, 69, 1,
56
+ 70, 1, 73, 1, 74, 1, 75, 1, 76, 1, 77, 1,
57
+ 78, 1, 79, 1, 80, 1, 81, 1, 82, 2, 0, 1,
58
+ 2, 0, 33, 2, 0, 40, 2, 0, 42, 2, 4, 0,
59
+ 2, 4, 19, 2, 4, 20, 2, 4, 45, 2, 4, 46,
60
+ 2, 4, 71, 2, 4, 72, 2, 31, 0, 2, 32, 0,
61
+ 2, 39, 0
62
+ };
63
+ }
64
+
65
+ private static final byte _java_lexer_actions[] = init__java_lexer_actions_0();
66
+
67
+
68
+ private static short[] init__java_lexer_key_offsets_0()
69
+ {
70
+ return new short [] {
71
+ 0, 0, 4, 5, 7, 9, 11, 13, 15, 17, 21, 22,
72
+ 23, 24, 25, 26, 27, 37, 38, 39, 40, 41, 42, 43,
73
+ 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 63, 67,
74
+ 76, 86, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
75
+ 106, 107, 108, 109, 110, 111, 130, 131, 140, 150, 160, 170,
76
+ 180, 190, 200, 210, 220, 230, 240, 252, 261, 262, 271, 281,
77
+ 291, 301, 305, 306, 312, 319, 321, 333, 334, 344, 345, 346,
78
+ 347, 357, 367, 368, 369, 371, 372, 373, 374, 376
79
+ };
80
+ }
81
+
82
+ private static final short _java_lexer_key_offsets[] = init__java_lexer_key_offsets_0();
83
+
84
+
85
+ private static char[] init__java_lexer_trans_keys_0()
86
+ {
87
+ return new char [] {
88
+ 45, 68, 91, 100, 45, 79, 111, 67, 99, 84, 116, 89,
89
+ 121, 80, 112, 69, 101, 13, 32, 9, 10, 67, 68, 65,
90
+ 84, 65, 91, 95, 120, 45, 46, 48, 57, 65, 90, 97,
91
+ 122, 62, 62, 115, 99, 114, 105, 112, 116, 62, 115, 116,
92
+ 121, 108, 101, 62, 60, 33, 47, 63, 95, 45, 57, 65,
93
+ 90, 97, 122, 13, 32, 9, 10, 95, 45, 46, 48, 57,
94
+ 65, 90, 97, 122, 95, 109, 45, 46, 48, 57, 65, 90,
95
+ 97, 122, 95, 108, 45, 46, 48, 57, 65, 90, 97, 122,
96
+ 45, 45, 45, 93, 93, 93, 63, 63, 62, 39, 39, 34,
97
+ 34, 93, 93, 9, 10, 13, 32, 34, 39, 62, 80, 83,
98
+ 91, 95, 45, 46, 48, 57, 65, 90, 97, 122, 10, 95,
99
+ 45, 46, 48, 57, 65, 90, 97, 122, 85, 95, 45, 46,
100
+ 48, 57, 65, 90, 97, 122, 66, 95, 45, 46, 48, 57,
101
+ 65, 90, 97, 122, 76, 95, 45, 46, 48, 57, 65, 90,
102
+ 97, 122, 73, 95, 45, 46, 48, 57, 65, 90, 97, 122,
103
+ 67, 95, 45, 46, 48, 57, 65, 90, 97, 122, 89, 95,
104
+ 45, 46, 48, 57, 65, 90, 97, 122, 83, 95, 45, 46,
105
+ 48, 57, 65, 90, 97, 122, 84, 95, 45, 46, 48, 57,
106
+ 65, 90, 97, 122, 69, 95, 45, 46, 48, 57, 65, 90,
107
+ 97, 122, 77, 95, 45, 46, 48, 57, 65, 90, 97, 122,
108
+ 34, 39, 63, 95, 45, 46, 48, 57, 65, 90, 97, 122,
109
+ 95, 45, 46, 48, 57, 65, 90, 97, 122, 62, 95, 45,
110
+ 46, 48, 57, 65, 90, 97, 122, 58, 95, 45, 46, 48,
111
+ 57, 65, 90, 97, 122, 62, 95, 45, 46, 48, 57, 65,
112
+ 90, 97, 122, 58, 95, 45, 46, 48, 57, 65, 90, 97,
113
+ 122, 13, 32, 9, 10, 10, 13, 32, 34, 39, 9, 10,
114
+ 13, 32, 96, 9, 10, 60, 62, 34, 39, 10, 13, 47,
115
+ 61, 62, 95, 45, 57, 65, 90, 97, 122, 10, 58, 95,
116
+ 45, 46, 48, 57, 65, 90, 97, 122, 62, 60, 60, 33,
117
+ 60, 63, 95, 45, 57, 65, 90, 97, 122, 33, 60, 63,
118
+ 95, 45, 57, 65, 90, 97, 122, 60, 60, 47, 60, 60,
119
+ 60, 60, 47, 60, 60, 0
120
+ };
121
+ }
122
+
123
+ private static final char _java_lexer_trans_keys[] = init__java_lexer_trans_keys_0();
124
+
125
+
126
+ private static byte[] init__java_lexer_single_lengths_0()
127
+ {
128
+ return new byte [] {
129
+ 0, 4, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1,
130
+ 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
131
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 1,
132
+ 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133
+ 1, 1, 1, 1, 1, 11, 1, 1, 2, 2, 2, 2,
134
+ 2, 2, 2, 2, 2, 2, 4, 1, 1, 1, 2, 2,
135
+ 2, 2, 1, 4, 3, 2, 6, 1, 2, 1, 1, 1,
136
+ 4, 4, 1, 1, 2, 1, 1, 1, 2, 1
137
+ };
138
+ }
139
+
140
+ private static final byte _java_lexer_single_lengths[] = init__java_lexer_single_lengths_0();
141
+
142
+
143
+ private static byte[] init__java_lexer_range_lengths_0()
144
+ {
145
+ return new byte [] {
146
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
147
+ 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0,
148
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 4,
149
+ 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150
+ 0, 0, 0, 0, 0, 4, 0, 4, 4, 4, 4, 4,
151
+ 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4,
152
+ 4, 1, 0, 1, 2, 0, 3, 0, 4, 0, 0, 0,
153
+ 3, 3, 0, 0, 0, 0, 0, 0, 0, 0
154
+ };
155
+ }
156
+
157
+ private static final byte _java_lexer_range_lengths[] = init__java_lexer_range_lengths_0();
158
+
159
+
160
+ private static short[] init__java_lexer_index_offsets_0()
161
+ {
162
+ return new short [] {
163
+ 0, 0, 5, 7, 10, 13, 16, 19, 22, 25, 29, 31,
164
+ 33, 35, 37, 39, 41, 48, 50, 52, 54, 56, 58, 60,
165
+ 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 88, 92,
166
+ 98, 105, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130,
167
+ 132, 134, 136, 138, 140, 142, 158, 160, 166, 173, 180, 187,
168
+ 194, 201, 208, 215, 222, 229, 236, 245, 251, 253, 259, 266,
169
+ 273, 280, 284, 286, 292, 298, 301, 311, 313, 320, 322, 324,
170
+ 326, 334, 342, 344, 346, 349, 351, 353, 355, 358
171
+ };
172
+ }
173
+
174
+ private static final short _java_lexer_index_offsets[] = init__java_lexer_index_offsets_0();
175
+
176
+
177
+ private static byte[] init__java_lexer_trans_targs_0()
178
+ {
179
+ return new byte [] {
180
+ 2, 3, 10, 3, 32, 32, 32, 4, 4, 32, 5, 5,
181
+ 32, 6, 6, 32, 7, 7, 32, 8, 8, 32, 9, 9,
182
+ 32, 34, 34, 34, 32, 11, 32, 12, 32, 13, 32, 14,
183
+ 32, 15, 32, 32, 32, 35, 36, 35, 35, 35, 35, 32,
184
+ 38, 38, 41, 41, 20, 86, 21, 86, 22, 86, 23, 86,
185
+ 24, 86, 25, 86, 86, 86, 27, 90, 28, 90, 29, 90,
186
+ 30, 90, 31, 90, 90, 90, 33, 32, 1, 32, 16, 32,
187
+ 32, 32, 32, 32, 34, 34, 34, 32, 35, 35, 35, 35,
188
+ 35, 32, 35, 37, 35, 35, 35, 35, 32, 35, 35, 35,
189
+ 35, 35, 35, 32, 40, 39, 38, 39, 17, 38, 43, 42,
190
+ 41, 42, 18, 41, 46, 45, 44, 45, 44, 44, 47, 48,
191
+ 47, 48, 49, 50, 49, 50, 51, 52, 51, 52, 53, 53,
192
+ 54, 53, 53, 53, 53, 56, 61, 53, 55, 55, 55, 55,
193
+ 55, 0, 53, 53, 55, 55, 55, 55, 55, 53, 57, 55,
194
+ 55, 55, 55, 55, 53, 58, 55, 55, 55, 55, 55, 53,
195
+ 59, 55, 55, 55, 55, 55, 53, 60, 55, 55, 55, 55,
196
+ 55, 53, 55, 55, 55, 55, 55, 55, 53, 62, 55, 55,
197
+ 55, 55, 55, 53, 63, 55, 55, 55, 55, 55, 53, 64,
198
+ 55, 55, 55, 55, 55, 53, 65, 55, 55, 55, 55, 55,
199
+ 53, 55, 55, 55, 55, 55, 55, 53, 66, 66, 68, 67,
200
+ 67, 67, 67, 67, 66, 67, 67, 67, 67, 67, 66, 66,
201
+ 66, 70, 70, 70, 70, 70, 0, 69, 70, 70, 70, 70,
202
+ 70, 69, 71, 72, 72, 72, 72, 72, 71, 71, 72, 72,
203
+ 72, 72, 72, 71, 74, 73, 73, 73, 73, 73, 75, 75,
204
+ 75, 75, 75, 76, 75, 75, 75, 75, 75, 76, 77, 77,
205
+ 77, 78, 79, 81, 78, 78, 80, 80, 80, 80, 78, 78,
206
+ 78, 78, 80, 80, 80, 80, 80, 78, 78, 78, 85, 83,
207
+ 84, 83, 82, 84, 82, 82, 82, 82, 82, 83, 82, 84,
208
+ 82, 82, 82, 82, 82, 83, 88, 87, 86, 87, 19, 89,
209
+ 86, 89, 86, 92, 91, 90, 91, 26, 93, 90, 93, 90,
210
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
211
+ 32, 32, 32, 32, 38, 41, 86, 86, 86, 86, 86, 86,
212
+ 86, 90, 90, 90, 90, 90, 90, 32, 32, 32, 32, 32,
213
+ 38, 38, 41, 41, 44, 44, 47, 49, 51, 53, 53, 53,
214
+ 53, 53, 53, 53, 53, 53, 53, 53, 53, 66, 66, 69,
215
+ 71, 73, 75, 78, 78, 78, 82, 82, 82, 86, 86, 86,
216
+ 90, 90, 90, 0
217
+ };
218
+ }
219
+
220
+ private static final byte _java_lexer_trans_targs[] = init__java_lexer_trans_targs_0();
221
+
222
+
223
+ private static short[] init__java_lexer_trans_actions_0()
224
+ {
225
+ return new short [] {
226
+ 0, 0, 0, 0, 137, 121, 137, 0, 0, 137, 0, 0,
227
+ 137, 0, 0, 137, 0, 0, 137, 0, 0, 137, 0, 0,
228
+ 137, 1, 1, 1, 137, 0, 137, 0, 137, 0, 137, 0,
229
+ 137, 0, 137, 123, 137, 171, 0, 171, 171, 171, 171, 137,
230
+ 9, 13, 15, 19, 0, 113, 0, 113, 0, 113, 0, 113,
231
+ 0, 113, 0, 113, 109, 113, 0, 119, 0, 119, 0, 119,
232
+ 0, 119, 0, 119, 115, 119, 7, 129, 0, 127, 0, 125,
233
+ 125, 125, 125, 135, 1, 1, 1, 131, 171, 171, 171, 171,
234
+ 171, 139, 171, 0, 171, 171, 171, 171, 133, 171, 168, 171,
235
+ 171, 171, 171, 133, 153, 1, 11, 1, 0, 11, 153, 1,
236
+ 17, 1, 0, 17, 1, 1, 23, 1, 21, 23, 25, 1,
237
+ 27, 1, 29, 1, 31, 1, 33, 1, 35, 1, 47, 45,
238
+ 0, 47, 41, 39, 43, 0, 0, 37, 159, 159, 159, 159,
239
+ 159, 0, 45, 51, 159, 159, 159, 159, 159, 53, 0, 159,
240
+ 159, 159, 159, 159, 49, 0, 159, 159, 159, 159, 159, 49,
241
+ 0, 159, 159, 159, 159, 159, 49, 0, 159, 159, 159, 159,
242
+ 159, 49, 156, 159, 159, 159, 159, 159, 49, 0, 159, 159,
243
+ 159, 159, 159, 49, 0, 159, 159, 159, 159, 159, 49, 0,
244
+ 159, 159, 159, 159, 159, 49, 0, 159, 159, 159, 159, 159,
245
+ 49, 156, 159, 159, 159, 159, 159, 49, 177, 174, 1, 1,
246
+ 1, 1, 1, 1, 144, 0, 0, 0, 0, 0, 57, 55,
247
+ 59, 0, 0, 0, 0, 0, 0, 61, 0, 0, 0, 0,
248
+ 0, 63, 180, 1, 1, 1, 1, 1, 147, 65, 0, 0,
249
+ 0, 0, 0, 67, 1, 150, 150, 69, 150, 71, 75, 75,
250
+ 73, 73, 75, 165, 77, 77, 77, 77, 77, 162, 81, 79,
251
+ 83, 85, 0, 0, 89, 91, 0, 0, 0, 0, 95, 85,
252
+ 97, 87, 0, 0, 0, 0, 0, 99, 93, 101, 141, 1,
253
+ 141, 1, 105, 141, 105, 105, 105, 105, 105, 1, 103, 141,
254
+ 103, 103, 103, 103, 103, 1, 153, 1, 111, 1, 0, 1,
255
+ 111, 1, 111, 153, 1, 117, 1, 0, 1, 117, 1, 117,
256
+ 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137, 137,
257
+ 137, 137, 137, 137, 13, 19, 113, 113, 113, 113, 113, 113,
258
+ 113, 119, 119, 119, 119, 119, 119, 135, 131, 139, 133, 133,
259
+ 11, 11, 17, 17, 23, 23, 27, 31, 35, 51, 53, 49,
260
+ 49, 49, 49, 49, 49, 49, 49, 49, 49, 57, 59, 63,
261
+ 67, 71, 77, 97, 99, 101, 107, 107, 107, 111, 111, 111,
262
+ 117, 117, 117, 0
263
+ };
264
+ }
265
+
266
+ private static final short _java_lexer_trans_actions[] = init__java_lexer_trans_actions_0();
267
+
268
+
269
+ private static short[] init__java_lexer_to_state_actions_0()
270
+ {
271
+ return new short [] {
272
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
274
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
275
+ 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3,
276
+ 0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0,
277
+ 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3,
278
+ 0, 3, 0, 3, 0, 3, 3, 0, 0, 0, 3, 0,
279
+ 0, 0, 3, 0, 0, 0, 3, 0, 0, 0
280
+ };
281
+ }
282
+
283
+ private static final short _java_lexer_to_state_actions[] = init__java_lexer_to_state_actions_0();
284
+
285
+
286
+ private static short[] init__java_lexer_from_state_actions_0()
287
+ {
288
+ return new short [] {
289
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291
+ 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0,
292
+ 0, 0, 5, 0, 0, 5, 0, 0, 5, 0, 0, 5,
293
+ 0, 5, 0, 5, 0, 5, 0, 0, 0, 0, 0, 0,
294
+ 0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0, 5,
295
+ 0, 5, 0, 5, 0, 5, 5, 0, 0, 0, 5, 0,
296
+ 0, 0, 5, 0, 0, 0, 5, 0, 0, 0
297
+ };
298
+ }
299
+
300
+ private static final short _java_lexer_from_state_actions[] = init__java_lexer_from_state_actions_0();
301
+
302
+
303
+ private static short[] init__java_lexer_eof_trans_0()
304
+ {
305
+ return new short [] {
306
+ 0, 376, 376, 376, 376, 376, 376, 376, 376, 376, 376, 376,
307
+ 376, 376, 376, 376, 376, 377, 378, 385, 385, 385, 385, 385,
308
+ 385, 385, 391, 391, 391, 391, 391, 391, 0, 392, 393, 394,
309
+ 396, 396, 0, 398, 398, 0, 400, 400, 0, 402, 402, 0,
310
+ 403, 0, 404, 0, 405, 0, 406, 407, 417, 417, 417, 417,
311
+ 417, 417, 417, 417, 417, 417, 0, 418, 419, 0, 420, 0,
312
+ 421, 0, 422, 0, 423, 0, 0, 424, 425, 426, 0, 429,
313
+ 429, 429, 0, 432, 432, 432, 0, 435, 435, 435
314
+ };
315
+ }
316
+
317
+ private static final short _java_lexer_eof_trans[] = init__java_lexer_eof_trans_0();
318
+
319
+
320
+ static final int java_lexer_start = 32;
321
+ static final int java_lexer_first_final = 32;
322
+ static final int java_lexer_error = 0;
323
+
324
+ static final int java_lexer_en_comment_body = 38;
325
+ static final int java_lexer_en_cdata_body = 41;
326
+ static final int java_lexer_en_proc_ins_body = 44;
327
+ static final int java_lexer_en_string_squote = 47;
328
+ static final int java_lexer_en_string_dquote = 49;
329
+ static final int java_lexer_en_doctype_inline = 51;
330
+ static final int java_lexer_en_doctype = 53;
331
+ static final int java_lexer_en_xml_decl = 66;
332
+ static final int java_lexer_en_element_name = 69;
333
+ static final int java_lexer_en_element_close = 71;
334
+ static final int java_lexer_en_attribute_pre = 73;
335
+ static final int java_lexer_en_html_attribute_value = 75;
336
+ static final int java_lexer_en_xml_attribute_value = 77;
337
+ static final int java_lexer_en_element_head = 78;
338
+ static final int java_lexer_en_text = 82;
339
+ static final int java_lexer_en_html_script = 86;
340
+ static final int java_lexer_en_html_style = 90;
341
+ static final int java_lexer_en_main = 32;
342
+
343
+
344
+ // line 39 "ext/java/org/liboga/xml/Lexer.rl"
345
+
346
+ /* Used by Ragel to keep track of the current state. */
347
+ int act;
348
+ int cs;
349
+ int top;
350
+ int lines;
351
+ int[] stack;
352
+
353
+ /**
354
+ * Sets up the current class in the Ruby runtime.
355
+ */
356
+ public static void load(Ruby runtime)
357
+ {
358
+ RubyModule xml = (RubyModule) runtime.getModule("Oga")
359
+ .getConstant("XML");
360
+
361
+ RubyClass lexer = xml.defineClassUnder(
362
+ "Lexer",
363
+ runtime.getObject(),
364
+ ALLOCATOR
365
+ );
366
+
367
+ lexer.defineAnnotatedMethods(Lexer.class);
368
+ }
369
+
370
+ private static final ObjectAllocator ALLOCATOR = new ObjectAllocator()
371
+ {
372
+ public IRubyObject allocate(Ruby runtime, RubyClass klass)
373
+ {
374
+ return new org.liboga.xml.Lexer(runtime, klass);
375
+ }
376
+ };
377
+
378
+ public Lexer(Ruby runtime, RubyClass klass)
379
+ {
380
+ super(runtime, klass);
381
+
382
+ this.runtime = runtime;
383
+ }
384
+
385
+ /**
386
+ * Runs the bulk of the Ragel loop and calls back in to Ruby.
387
+ *
388
+ * This method pulls its data in from the instance variable `@data`. The
389
+ * Ruby side of the Lexer class should set this variable to a String in its
390
+ * constructor method. Encodings are passed along to make sure that token
391
+ * values share the same encoding as the input.
392
+ *
393
+ * This method always returns nil.
394
+ */
395
+ @JRubyMethod
396
+ public IRubyObject advance_native(ThreadContext context, RubyString rb_str)
397
+ {
398
+ Boolean html_p = this.callMethod(context, "html?").isTrue();
399
+
400
+ Encoding encoding = rb_str.getEncoding();
401
+
402
+ byte[] data = rb_str.getBytes();
403
+
404
+ int ts = 0;
405
+ int te = 0;
406
+ int p = 0;
407
+ int mark = 0;
408
+ int lines = this.lines;
409
+ int pe = data.length;
410
+ int eof = data.length;
411
+
412
+ String id_advance_line = "advance_line";
413
+ String id_on_attribute = "on_attribute";
414
+ String id_on_attribute_ns = "on_attribute_ns";
415
+ String id_on_cdata_start = "on_cdata_start";
416
+ String id_on_cdata_body = "on_cdata_body";
417
+ String id_on_cdata_end = "on_cdata_end";
418
+ String id_on_comment_start = "on_comment_start";
419
+ String id_on_comment_body = "on_comment_body";
420
+ String id_on_comment_end = "on_comment_end";
421
+ String id_on_doctype_end = "on_doctype_end";
422
+ String id_on_doctype_inline = "on_doctype_inline";
423
+ String id_on_doctype_name = "on_doctype_name";
424
+ String id_on_doctype_start = "on_doctype_start";
425
+ String id_on_doctype_type = "on_doctype_type";
426
+ String id_on_element_end = "on_element_end";
427
+ String id_on_element_name = "on_element_name";
428
+ String id_on_element_ns = "on_element_ns";
429
+ String id_on_element_open_end = "on_element_open_end";
430
+ String id_on_proc_ins_end = "on_proc_ins_end";
431
+ String id_on_proc_ins_name = "on_proc_ins_name";
432
+ String id_on_proc_ins_start = "on_proc_ins_start";
433
+ String id_on_proc_ins_body = "on_proc_ins_body";
434
+ String id_on_string_body = "on_string_body";
435
+ String id_on_string_dquote = "on_string_dquote";
436
+ String id_on_string_squote = "on_string_squote";
437
+ String id_on_text = "on_text";
438
+ String id_on_xml_decl_end = "on_xml_decl_end";
439
+ String id_on_xml_decl_start = "on_xml_decl_start";
440
+
441
+
442
+ // line 443 "ext/java/org/liboga/xml/Lexer.java"
443
+ {
444
+ int _klen;
445
+ int _trans = 0;
446
+ int _acts;
447
+ int _nacts;
448
+ int _keys;
449
+ int _goto_targ = 0;
450
+
451
+ _goto: while (true) {
452
+ switch ( _goto_targ ) {
453
+ case 0:
454
+ if ( p == pe ) {
455
+ _goto_targ = 4;
456
+ continue _goto;
457
+ }
458
+ if ( ( this.cs) == 0 ) {
459
+ _goto_targ = 5;
460
+ continue _goto;
461
+ }
462
+ case 1:
463
+ _acts = _java_lexer_from_state_actions[( this.cs)];
464
+ _nacts = (int) _java_lexer_actions[_acts++];
465
+ while ( _nacts-- > 0 ) {
466
+ switch ( _java_lexer_actions[_acts++] ) {
467
+ case 3:
468
+ // line 1 "NONE"
469
+ {ts = p;}
470
+ break;
471
+ // line 472 "ext/java/org/liboga/xml/Lexer.java"
472
+ }
473
+ }
474
+
475
+ _match: do {
476
+ _keys = _java_lexer_key_offsets[( this.cs)];
477
+ _trans = _java_lexer_index_offsets[( this.cs)];
478
+ _klen = _java_lexer_single_lengths[( this.cs)];
479
+ if ( _klen > 0 ) {
480
+ int _lower = _keys;
481
+ int _mid;
482
+ int _upper = _keys + _klen - 1;
483
+ while (true) {
484
+ if ( _upper < _lower )
485
+ break;
486
+
487
+ _mid = _lower + ((_upper-_lower) >> 1);
488
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
489
+ _upper = _mid - 1;
490
+ else if ( data[p] > _java_lexer_trans_keys[_mid] )
491
+ _lower = _mid + 1;
492
+ else {
493
+ _trans += (_mid - _keys);
494
+ break _match;
495
+ }
496
+ }
497
+ _keys += _klen;
498
+ _trans += _klen;
499
+ }
500
+
501
+ _klen = _java_lexer_range_lengths[( this.cs)];
502
+ if ( _klen > 0 ) {
503
+ int _lower = _keys;
504
+ int _mid;
505
+ int _upper = _keys + (_klen<<1) - 2;
506
+ while (true) {
507
+ if ( _upper < _lower )
508
+ break;
509
+
510
+ _mid = _lower + (((_upper-_lower) >> 1) & ~1);
511
+ if ( data[p] < _java_lexer_trans_keys[_mid] )
512
+ _upper = _mid - 2;
513
+ else if ( data[p] > _java_lexer_trans_keys[_mid+1] )
514
+ _lower = _mid + 2;
515
+ else {
516
+ _trans += ((_mid - _keys)>>1);
517
+ break _match;
518
+ }
519
+ }
520
+ _trans += _klen;
521
+ }
522
+ } while (false);
523
+
524
+ case 3:
525
+ ( this.cs) = _java_lexer_trans_targs[_trans];
526
+
527
+ if ( _java_lexer_trans_actions[_trans] != 0 ) {
528
+ _acts = _java_lexer_trans_actions[_trans];
529
+ _nacts = (int) _java_lexer_actions[_acts++];
530
+ while ( _nacts-- > 0 )
531
+ {
532
+ switch ( _java_lexer_actions[_acts++] )
533
+ {
534
+ case 0:
535
+ // line 56 "ext/ragel/base_lexer.rl"
536
+ {
537
+ if ( data[p] == '\n' ) lines++;
538
+ }
539
+ break;
540
+ case 1:
541
+ // line 589 "ext/ragel/base_lexer.rl"
542
+ { mark = p; }
543
+ break;
544
+ case 4:
545
+ // line 1 "NONE"
546
+ {te = p+1;}
547
+ break;
548
+ case 5:
549
+ // line 102 "ext/ragel/base_lexer.rl"
550
+ {te = p+1;{
551
+ callback_simple(id_on_comment_end);
552
+
553
+ ( this.cs) = 32;
554
+ }}
555
+ break;
556
+ case 6:
557
+ // line 91 "ext/ragel/base_lexer.rl"
558
+ {te = p;p--;{
559
+ callback(id_on_comment_body, data, encoding, ts, te);
560
+
561
+ if ( lines > 0 )
562
+ {
563
+ advance_line(lines);
564
+
565
+ lines = 0;
566
+ }
567
+ }}
568
+ break;
569
+ case 7:
570
+ // line 91 "ext/ragel/base_lexer.rl"
571
+ {{p = ((te))-1;}{
572
+ callback(id_on_comment_body, data, encoding, ts, te);
573
+
574
+ if ( lines > 0 )
575
+ {
576
+ advance_line(lines);
577
+
578
+ lines = 0;
579
+ }
580
+ }}
581
+ break;
582
+ case 8:
583
+ // line 141 "ext/ragel/base_lexer.rl"
584
+ {te = p+1;{
585
+ callback_simple(id_on_cdata_end);
586
+
587
+ ( this.cs) = 32;
588
+ }}
589
+ break;
590
+ case 9:
591
+ // line 130 "ext/ragel/base_lexer.rl"
592
+ {te = p;p--;{
593
+ callback(id_on_cdata_body, data, encoding, ts, te);
594
+
595
+ if ( lines > 0 )
596
+ {
597
+ advance_line(lines);
598
+
599
+ lines = 0;
600
+ }
601
+ }}
602
+ break;
603
+ case 10:
604
+ // line 130 "ext/ragel/base_lexer.rl"
605
+ {{p = ((te))-1;}{
606
+ callback(id_on_cdata_body, data, encoding, ts, te);
607
+
608
+ if ( lines > 0 )
609
+ {
610
+ advance_line(lines);
611
+
612
+ lines = 0;
613
+ }
614
+ }}
615
+ break;
616
+ case 11:
617
+ // line 184 "ext/ragel/base_lexer.rl"
618
+ {te = p+1;{
619
+ callback_simple(id_on_proc_ins_end);
620
+
621
+ ( this.cs) = 32;
622
+ }}
623
+ break;
624
+ case 12:
625
+ // line 173 "ext/ragel/base_lexer.rl"
626
+ {te = p;p--;{
627
+ callback(id_on_proc_ins_body, data, encoding, ts, te);
628
+
629
+ if ( lines > 0 )
630
+ {
631
+ advance_line(lines);
632
+
633
+ lines = 0;
634
+ }
635
+ }}
636
+ break;
637
+ case 13:
638
+ // line 226 "ext/ragel/base_lexer.rl"
639
+ {te = p+1;{
640
+ callback_simple(id_on_string_squote);
641
+
642
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
643
+ }}
644
+ break;
645
+ case 14:
646
+ // line 200 "ext/ragel/base_lexer.rl"
647
+ {te = p;p--;{
648
+ callback(id_on_string_body, data, encoding, ts, te);
649
+
650
+ if ( lines > 0 )
651
+ {
652
+ advance_line(lines);
653
+
654
+ lines = 0;
655
+ }
656
+ }}
657
+ break;
658
+ case 15:
659
+ // line 236 "ext/ragel/base_lexer.rl"
660
+ {te = p+1;{
661
+ callback_simple(id_on_string_dquote);
662
+
663
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
664
+ }}
665
+ break;
666
+ case 16:
667
+ // line 200 "ext/ragel/base_lexer.rl"
668
+ {te = p;p--;{
669
+ callback(id_on_string_body, data, encoding, ts, te);
670
+
671
+ if ( lines > 0 )
672
+ {
673
+ advance_line(lines);
674
+
675
+ lines = 0;
676
+ }
677
+ }}
678
+ break;
679
+ case 17:
680
+ // line 281 "ext/ragel/base_lexer.rl"
681
+ {te = p+1;{ ( this.cs) = 53; }}
682
+ break;
683
+ case 18:
684
+ // line 270 "ext/ragel/base_lexer.rl"
685
+ {te = p;p--;{
686
+ callback(id_on_doctype_inline, data, encoding, ts, te);
687
+
688
+ if ( lines > 0 )
689
+ {
690
+ advance_line(lines);
691
+
692
+ lines = 0;
693
+ }
694
+ }}
695
+ break;
696
+ case 19:
697
+ // line 287 "ext/ragel/base_lexer.rl"
698
+ {( this.act) = 13;}
699
+ break;
700
+ case 20:
701
+ // line 298 "ext/ragel/base_lexer.rl"
702
+ {( this.act) = 17;}
703
+ break;
704
+ case 21:
705
+ // line 292 "ext/ragel/base_lexer.rl"
706
+ {te = p+1;{ ( this.cs) = 51; }}
707
+ break;
708
+ case 22:
709
+ // line 211 "ext/ragel/base_lexer.rl"
710
+ {te = p+1;{
711
+ callback_simple(id_on_string_squote);
712
+
713
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 47; _goto_targ = 2; if (true) continue _goto;}
714
+ }}
715
+ break;
716
+ case 23:
717
+ // line 217 "ext/ragel/base_lexer.rl"
718
+ {te = p+1;{
719
+ callback_simple(id_on_string_dquote);
720
+
721
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 49; _goto_targ = 2; if (true) continue _goto;}
722
+ }}
723
+ break;
724
+ case 24:
725
+ // line 302 "ext/ragel/base_lexer.rl"
726
+ {te = p+1;{
727
+ callback_simple(id_on_doctype_end);
728
+ ( this.cs) = 32;
729
+ }}
730
+ break;
731
+ case 25:
732
+ // line 60 "ext/ragel/base_lexer.rl"
733
+ {te = p+1;{
734
+ advance_line(1);
735
+ }}
736
+ break;
737
+ case 26:
738
+ // line 309 "ext/ragel/base_lexer.rl"
739
+ {te = p+1;}
740
+ break;
741
+ case 27:
742
+ // line 298 "ext/ragel/base_lexer.rl"
743
+ {te = p;p--;{
744
+ callback(id_on_doctype_name, data, encoding, ts, te);
745
+ }}
746
+ break;
747
+ case 28:
748
+ // line 60 "ext/ragel/base_lexer.rl"
749
+ {te = p;p--;{
750
+ advance_line(1);
751
+ }}
752
+ break;
753
+ case 29:
754
+ // line 1 "NONE"
755
+ { switch( ( this.act) ) {
756
+ case 13:
757
+ {{p = ((te))-1;}
758
+ callback(id_on_doctype_type, data, encoding, ts, te);
759
+ }
760
+ break;
761
+ case 17:
762
+ {{p = ((te))-1;}
763
+ callback(id_on_doctype_name, data, encoding, ts, te);
764
+ }
765
+ break;
766
+ }
767
+ }
768
+ break;
769
+ case 30:
770
+ // line 326 "ext/ragel/base_lexer.rl"
771
+ {te = p+1;{
772
+ if ( lines > 0 )
773
+ {
774
+ advance_line(lines);
775
+
776
+ lines = 0;
777
+ }
778
+
779
+ callback_simple(id_on_xml_decl_end);
780
+
781
+ ( this.cs) = 32;
782
+ }}
783
+ break;
784
+ case 31:
785
+ // line 211 "ext/ragel/base_lexer.rl"
786
+ {te = p+1;{
787
+ callback_simple(id_on_string_squote);
788
+
789
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 47; _goto_targ = 2; if (true) continue _goto;}
790
+ }}
791
+ break;
792
+ case 32:
793
+ // line 217 "ext/ragel/base_lexer.rl"
794
+ {te = p+1;{
795
+ callback_simple(id_on_string_dquote);
796
+
797
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 49; _goto_targ = 2; if (true) continue _goto;}
798
+ }}
799
+ break;
800
+ case 33:
801
+ // line 354 "ext/ragel/base_lexer.rl"
802
+ {te = p+1;}
803
+ break;
804
+ case 34:
805
+ // line 340 "ext/ragel/base_lexer.rl"
806
+ {te = p;p--;{
807
+ if ( lines > 0 )
808
+ {
809
+ advance_line(lines);
810
+
811
+ lines = 0;
812
+ }
813
+
814
+ callback(id_on_attribute, data, encoding, ts, te);
815
+ }}
816
+ break;
817
+ case 35:
818
+ // line 354 "ext/ragel/base_lexer.rl"
819
+ {te = p;p--;}
820
+ break;
821
+ case 36:
822
+ // line 390 "ext/ragel/base_lexer.rl"
823
+ {te = p+1;{
824
+ callback(id_on_element_ns, data, encoding, ts, te - 1);
825
+ }}
826
+ break;
827
+ case 37:
828
+ // line 394 "ext/ragel/base_lexer.rl"
829
+ {te = p;p--;{
830
+ callback(id_on_element_name, data, encoding, ts, te);
831
+ ( this.cs) = 78;
832
+ }}
833
+ break;
834
+ case 38:
835
+ // line 404 "ext/ragel/base_lexer.rl"
836
+ {te = p+1;}
837
+ break;
838
+ case 39:
839
+ // line 408 "ext/ragel/base_lexer.rl"
840
+ {te = p+1;{
841
+ if ( lines > 0 )
842
+ {
843
+ advance_line(lines);
844
+
845
+ lines = 0;
846
+ }
847
+
848
+ ( this.cs) = 32;
849
+ }}
850
+ break;
851
+ case 40:
852
+ // line 419 "ext/ragel/base_lexer.rl"
853
+ {te = p+1;}
854
+ break;
855
+ case 41:
856
+ // line 375 "ext/ragel/base_lexer.rl"
857
+ {te = p;p--;{
858
+ callback(id_on_element_end, data, encoding, ts, te);
859
+ }}
860
+ break;
861
+ case 42:
862
+ // line 432 "ext/ragel/base_lexer.rl"
863
+ {te = p+1;}
864
+ break;
865
+ case 43:
866
+ // line 434 "ext/ragel/base_lexer.rl"
867
+ {te = p+1;{
868
+ p--;
869
+
870
+ if ( lines > 0 )
871
+ {
872
+ advance_line(lines);
873
+
874
+ lines = 0;
875
+ }
876
+
877
+ if ( html_p )
878
+ {
879
+ ( this.cs) = 75;
880
+ }
881
+ else
882
+ {
883
+ ( this.cs) = 77;
884
+ }
885
+ }}
886
+ break;
887
+ case 44:
888
+ // line 432 "ext/ragel/base_lexer.rl"
889
+ {te = p;p--;}
890
+ break;
891
+ case 45:
892
+ // line 464 "ext/ragel/base_lexer.rl"
893
+ {( this.act) = 35;}
894
+ break;
895
+ case 46:
896
+ // line 64 "ext/ragel/base_lexer.rl"
897
+ {( this.act) = 36;}
898
+ break;
899
+ case 47:
900
+ // line 457 "ext/ragel/base_lexer.rl"
901
+ {te = p+1;{
902
+ p--;
903
+ ( this.cs) = 77;
904
+ }}
905
+ break;
906
+ case 48:
907
+ // line 64 "ext/ragel/base_lexer.rl"
908
+ {te = p+1;{
909
+ p--;
910
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
911
+ }}
912
+ break;
913
+ case 49:
914
+ // line 1 "NONE"
915
+ { switch( ( this.act) ) {
916
+ case 35:
917
+ {{p = ((te))-1;}
918
+ callback_simple(id_on_string_squote);
919
+
920
+ callback(id_on_string_body, data, encoding, ts, te);
921
+
922
+ callback_simple(id_on_string_squote);
923
+ }
924
+ break;
925
+ case 36:
926
+ {{p = ((te))-1;}
927
+ p--;
928
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
929
+ }
930
+ break;
931
+ }
932
+ }
933
+ break;
934
+ case 50:
935
+ // line 480 "ext/ragel/base_lexer.rl"
936
+ {te = p+1;{
937
+ callback_simple(id_on_string_squote);
938
+
939
+ ( this.cs) = 47;
940
+ }}
941
+ break;
942
+ case 51:
943
+ // line 486 "ext/ragel/base_lexer.rl"
944
+ {te = p+1;{
945
+ callback_simple(id_on_string_dquote);
946
+
947
+ ( this.cs) = 49;
948
+ }}
949
+ break;
950
+ case 52:
951
+ // line 64 "ext/ragel/base_lexer.rl"
952
+ {te = p+1;{
953
+ p--;
954
+ {( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
955
+ }}
956
+ break;
957
+ case 53:
958
+ // line 60 "ext/ragel/base_lexer.rl"
959
+ {te = p+1;{
960
+ advance_line(1);
961
+ }}
962
+ break;
963
+ case 54:
964
+ // line 501 "ext/ragel/base_lexer.rl"
965
+ {te = p+1;{
966
+ callback(id_on_attribute_ns, data, encoding, ts, te - 1);
967
+ }}
968
+ break;
969
+ case 55:
970
+ // line 510 "ext/ragel/base_lexer.rl"
971
+ {te = p+1;{
972
+ {( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 73; _goto_targ = 2; if (true) continue _goto;}
973
+ }}
974
+ break;
975
+ case 56:
976
+ // line 515 "ext/ragel/base_lexer.rl"
977
+ {te = p+1;{
978
+ callback_simple(id_on_element_open_end);
979
+
980
+ if ( html_script_p() )
981
+ {
982
+ ( this.cs) = 86;
983
+ }
984
+ else if ( html_style_p() )
985
+ {
986
+ ( this.cs) = 90;
987
+ }
988
+ else
989
+ {
990
+ ( this.cs) = 32;
991
+ }
992
+ }}
993
+ break;
994
+ case 57:
995
+ // line 533 "ext/ragel/base_lexer.rl"
996
+ {te = p+1;{
997
+ callback_simple(id_on_element_end);
998
+ ( this.cs) = 32;
999
+ }}
1000
+ break;
1001
+ case 58:
1002
+ // line 538 "ext/ragel/base_lexer.rl"
1003
+ {te = p+1;}
1004
+ break;
1005
+ case 59:
1006
+ // line 60 "ext/ragel/base_lexer.rl"
1007
+ {te = p;p--;{
1008
+ advance_line(1);
1009
+ }}
1010
+ break;
1011
+ case 60:
1012
+ // line 505 "ext/ragel/base_lexer.rl"
1013
+ {te = p;p--;{
1014
+ callback(id_on_attribute, data, encoding, ts, te);
1015
+ }}
1016
+ break;
1017
+ case 61:
1018
+ // line 538 "ext/ragel/base_lexer.rl"
1019
+ {te = p;p--;}
1020
+ break;
1021
+ case 62:
1022
+ // line 575 "ext/ragel/base_lexer.rl"
1023
+ {te = p+1;{
1024
+ callback(id_on_text, data, encoding, ts, te);
1025
+
1026
+ if ( lines > 0 )
1027
+ {
1028
+ advance_line(lines);
1029
+
1030
+ lines = 0;
1031
+ }
1032
+
1033
+ ( this.cs) = 32;
1034
+ }}
1035
+ break;
1036
+ case 63:
1037
+ // line 589 "ext/ragel/base_lexer.rl"
1038
+ {te = p+1;{
1039
+ callback(id_on_text, data, encoding, ts, mark);
1040
+
1041
+ p = mark - 1;
1042
+ mark = 0;
1043
+
1044
+ if ( lines > 0 )
1045
+ {
1046
+ advance_line(lines);
1047
+
1048
+ lines = 0;
1049
+ }
1050
+
1051
+ ( this.cs) = 32;
1052
+ }}
1053
+ break;
1054
+ case 64:
1055
+ // line 575 "ext/ragel/base_lexer.rl"
1056
+ {te = p;p--;{
1057
+ callback(id_on_text, data, encoding, ts, te);
1058
+
1059
+ if ( lines > 0 )
1060
+ {
1061
+ advance_line(lines);
1062
+
1063
+ lines = 0;
1064
+ }
1065
+
1066
+ ( this.cs) = 32;
1067
+ }}
1068
+ break;
1069
+ case 65:
1070
+ // line 379 "ext/ragel/base_lexer.rl"
1071
+ {te = p+1;{
1072
+ callback_simple(id_on_element_end);
1073
+
1074
+ ( this.cs) = 32;
1075
+ }}
1076
+ break;
1077
+ case 66:
1078
+ // line 563 "ext/ragel/base_lexer.rl"
1079
+ {te = p;p--;{
1080
+ callback(id_on_text, data, encoding, ts, te);
1081
+
1082
+ if ( lines > 0 )
1083
+ {
1084
+ advance_line(lines);
1085
+
1086
+ lines = 0;
1087
+ }
1088
+ }}
1089
+ break;
1090
+ case 67:
1091
+ // line 563 "ext/ragel/base_lexer.rl"
1092
+ {{p = ((te))-1;}{
1093
+ callback(id_on_text, data, encoding, ts, te);
1094
+
1095
+ if ( lines > 0 )
1096
+ {
1097
+ advance_line(lines);
1098
+
1099
+ lines = 0;
1100
+ }
1101
+ }}
1102
+ break;
1103
+ case 68:
1104
+ // line 379 "ext/ragel/base_lexer.rl"
1105
+ {te = p+1;{
1106
+ callback_simple(id_on_element_end);
1107
+
1108
+ ( this.cs) = 32;
1109
+ }}
1110
+ break;
1111
+ case 69:
1112
+ // line 563 "ext/ragel/base_lexer.rl"
1113
+ {te = p;p--;{
1114
+ callback(id_on_text, data, encoding, ts, te);
1115
+
1116
+ if ( lines > 0 )
1117
+ {
1118
+ advance_line(lines);
1119
+
1120
+ lines = 0;
1121
+ }
1122
+ }}
1123
+ break;
1124
+ case 70:
1125
+ // line 563 "ext/ragel/base_lexer.rl"
1126
+ {{p = ((te))-1;}{
1127
+ callback(id_on_text, data, encoding, ts, te);
1128
+
1129
+ if ( lines > 0 )
1130
+ {
1131
+ advance_line(lines);
1132
+
1133
+ lines = 0;
1134
+ }
1135
+ }}
1136
+ break;
1137
+ case 71:
1138
+ // line 319 "ext/ragel/base_lexer.rl"
1139
+ {( this.act) = 54;}
1140
+ break;
1141
+ case 72:
1142
+ // line 165 "ext/ragel/base_lexer.rl"
1143
+ {( this.act) = 57;}
1144
+ break;
1145
+ case 73:
1146
+ // line 84 "ext/ragel/base_lexer.rl"
1147
+ {te = p+1;{
1148
+ callback_simple(id_on_comment_start);
1149
+
1150
+ ( this.cs) = 38;
1151
+ }}
1152
+ break;
1153
+ case 74:
1154
+ // line 123 "ext/ragel/base_lexer.rl"
1155
+ {te = p+1;{
1156
+ callback_simple(id_on_cdata_start);
1157
+
1158
+ ( this.cs) = 41;
1159
+ }}
1160
+ break;
1161
+ case 75:
1162
+ // line 366 "ext/ragel/base_lexer.rl"
1163
+ {te = p+1;{
1164
+ p--;
1165
+ ( this.cs) = 69;
1166
+ }}
1167
+ break;
1168
+ case 76:
1169
+ // line 371 "ext/ragel/base_lexer.rl"
1170
+ {te = p+1;{
1171
+ ( this.cs) = 71;
1172
+ }}
1173
+ break;
1174
+ case 77:
1175
+ // line 549 "ext/ragel/base_lexer.rl"
1176
+ {te = p+1;{
1177
+ p--;
1178
+ ( this.cs) = 82;
1179
+ }}
1180
+ break;
1181
+ case 78:
1182
+ // line 255 "ext/ragel/base_lexer.rl"
1183
+ {te = p;p--;{
1184
+ callback_simple(id_on_doctype_start);
1185
+
1186
+ if ( lines > 0 )
1187
+ {
1188
+ advance_line(lines);
1189
+
1190
+ lines = 0;
1191
+ }
1192
+
1193
+ ( this.cs) = 53;
1194
+ }}
1195
+ break;
1196
+ case 79:
1197
+ // line 165 "ext/ragel/base_lexer.rl"
1198
+ {te = p;p--;{
1199
+ callback_simple(id_on_proc_ins_start);
1200
+ callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
1201
+
1202
+ ( this.cs) = 44;
1203
+ }}
1204
+ break;
1205
+ case 80:
1206
+ // line 549 "ext/ragel/base_lexer.rl"
1207
+ {te = p;p--;{
1208
+ p--;
1209
+ ( this.cs) = 82;
1210
+ }}
1211
+ break;
1212
+ case 81:
1213
+ // line 549 "ext/ragel/base_lexer.rl"
1214
+ {{p = ((te))-1;}{
1215
+ p--;
1216
+ ( this.cs) = 82;
1217
+ }}
1218
+ break;
1219
+ case 82:
1220
+ // line 1 "NONE"
1221
+ { switch( ( this.act) ) {
1222
+ case 54:
1223
+ {{p = ((te))-1;}
1224
+ callback_simple(id_on_xml_decl_start);
1225
+ ( this.cs) = 66;
1226
+ }
1227
+ break;
1228
+ case 57:
1229
+ {{p = ((te))-1;}
1230
+ callback_simple(id_on_proc_ins_start);
1231
+ callback(id_on_proc_ins_name, data, encoding, ts + 2, te);
1232
+
1233
+ ( this.cs) = 44;
1234
+ }
1235
+ break;
1236
+ }
1237
+ }
1238
+ break;
1239
+ // line 1240 "ext/java/org/liboga/xml/Lexer.java"
1240
+ }
1241
+ }
1242
+ }
1243
+
1244
+ case 2:
1245
+ _acts = _java_lexer_to_state_actions[( this.cs)];
1246
+ _nacts = (int) _java_lexer_actions[_acts++];
1247
+ while ( _nacts-- > 0 ) {
1248
+ switch ( _java_lexer_actions[_acts++] ) {
1249
+ case 2:
1250
+ // line 1 "NONE"
1251
+ {ts = -1;}
1252
+ break;
1253
+ // line 1254 "ext/java/org/liboga/xml/Lexer.java"
1254
+ }
1255
+ }
1256
+
1257
+ if ( ( this.cs) == 0 ) {
1258
+ _goto_targ = 5;
1259
+ continue _goto;
1260
+ }
1261
+ if ( ++p != pe ) {
1262
+ _goto_targ = 1;
1263
+ continue _goto;
1264
+ }
1265
+ case 4:
1266
+ if ( p == eof )
1267
+ {
1268
+ if ( _java_lexer_eof_trans[( this.cs)] > 0 ) {
1269
+ _trans = _java_lexer_eof_trans[( this.cs)] - 1;
1270
+ _goto_targ = 3;
1271
+ continue _goto;
1272
+ }
1273
+ }
1274
+
1275
+ case 5:
1276
+ }
1277
+ break; }
1278
+ }
1279
+
1280
+ // line 136 "ext/java/org/liboga/xml/Lexer.rl"
1281
+
1282
+ this.lines = lines;
1283
+
1284
+ return context.nil;
1285
+ }
1286
+
1287
+ /**
1288
+ * Resets the internal state of the lexer.
1289
+ */
1290
+ @JRubyMethod
1291
+ public IRubyObject reset_native(ThreadContext context)
1292
+ {
1293
+ this.act = 0;
1294
+ this.top = 0;
1295
+ this.stack = new int[4];
1296
+ this.cs = java_lexer_start;
1297
+
1298
+ return context.nil;
1299
+ }
1300
+
1301
+ /**
1302
+ * Calls back in to Ruby land passing the current token value along.
1303
+ *
1304
+ * This method calls back in to Ruby land based on the method name
1305
+ * specified in `name`. The Ruby callback should take one argument. This
1306
+ * argument will be a String containing the value of the current token.
1307
+ */
1308
+ public void callback(String name, byte[] data, Encoding enc, int ts, int te)
1309
+ {
1310
+ ByteList bytelist = new ByteList(data, ts, te - ts, enc, true);
1311
+
1312
+ RubyString value = this.runtime.newString(bytelist);
1313
+
1314
+ ThreadContext context = this.runtime.getCurrentContext();
1315
+
1316
+ this.callMethod(context, name, value);
1317
+ }
1318
+
1319
+ /**
1320
+ * Calls back in to Ruby land without passing any arguments.
1321
+ */
1322
+ public void callback_simple(String name)
1323
+ {
1324
+ ThreadContext context = this.runtime.getCurrentContext();
1325
+
1326
+ this.callMethod(context, name);
1327
+ }
1328
+
1329
+ /**
1330
+ * Advances the line number by `amount` lines.
1331
+ */
1332
+ public void advance_line(int amount)
1333
+ {
1334
+ ThreadContext context = this.runtime.getCurrentContext();
1335
+ RubyFixnum lines = this.runtime.newFixnum(amount);
1336
+
1337
+ this.callMethod(context, "advance_line", lines);
1338
+ }
1339
+
1340
+ /**
1341
+ * @see Oga::XML::Lexer#html_script?
1342
+ */
1343
+ public Boolean html_script_p()
1344
+ {
1345
+ ThreadContext context = this.runtime.getCurrentContext();
1346
+
1347
+ return this.callMethod(context, "html_script?").isTrue();
1348
+ }
1349
+
1350
+ /**
1351
+ * @see Oga::XML::Lexer#html_style?
1352
+ */
1353
+ public Boolean html_style_p()
1354
+ {
1355
+ ThreadContext context = this.runtime.getCurrentContext();
1356
+
1357
+ return this.callMethod(context, "html_style?").isTrue();
1358
+ }
1359
+ }
1360
+
1361
+
1362
+ // line 223 "ext/java/org/liboga/xml/Lexer.rl"
1363
+