oga 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +57 -0
- data/doc/changelog.md +128 -0
- data/doc/css/common.css +5 -4
- data/doc/css_selectors.md +935 -0
- data/doc/manually_creating_documents.md +67 -0
- data/doc/xml_namespaces.md +63 -0
- data/ext/c/lexer.c +745 -628
- data/ext/c/lexer.h +8 -0
- data/ext/c/lexer.rl +44 -7
- data/ext/java/org/liboga/xml/Lexer.java +351 -232
- data/ext/java/org/liboga/xml/Lexer.rl +29 -8
- data/ext/ragel/base_lexer.rl +68 -18
- data/lib/oga.rb +4 -1
- data/lib/oga/css/lexer.rb +743 -0
- data/lib/oga/css/parser.rb +828 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/attribute.rb +3 -1
- data/lib/oga/xml/element.rb +15 -1
- data/lib/oga/xml/entities.rb +60 -0
- data/lib/oga/xml/html_void_elements.rb +2 -0
- data/lib/oga/xml/lexer.rb +36 -28
- data/lib/oga/xml/node_set.rb +22 -0
- data/lib/oga/xml/parser.rb +149 -128
- data/lib/oga/xml/querying.rb +24 -0
- data/lib/oga/xml/sax_parser.rb +55 -1
- data/lib/oga/xml/text.rb +6 -1
- data/lib/oga/xpath/evaluator.rb +138 -101
- data/lib/oga/xpath/lexer.rb +1205 -1294
- data/lib/oga/xpath/parser.rb +228 -204
- metadata +9 -4
- data/lib/oga/xpath/node.rb +0 -10
data/ext/c/lexer.h
CHANGED
data/ext/c/lexer.rl
CHANGED
@@ -22,6 +22,9 @@ on `ts` and `te`) so the macro ignores this argument.
|
|
22
22
|
#define oga_ivar_set(owner, name, value) \
|
23
23
|
rb_ivar_set(owner, rb_intern(name), value)
|
24
24
|
|
25
|
+
#define advance_line(amount) \
|
26
|
+
rb_funcall(self, rb_intern("advance_line"), 1, INT2NUM(amount));
|
27
|
+
|
25
28
|
%%machine c_lexer;
|
26
29
|
|
27
30
|
/**
|
@@ -72,10 +75,14 @@ void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
|
|
72
75
|
*/
|
73
76
|
VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
74
77
|
{
|
78
|
+
OgaLexerState *state;
|
79
|
+
|
75
80
|
/* Make sure that all data passed back to Ruby has the proper encoding. */
|
76
81
|
rb_encoding *encoding = rb_enc_get(data_block);
|
77
82
|
|
78
|
-
char *data_str_val =
|
83
|
+
char *data_str_val = StringValueCStr(data_block);
|
84
|
+
|
85
|
+
Data_Get_Struct(self, OgaLexerState, state);
|
79
86
|
|
80
87
|
const char *p = data_str_val;
|
81
88
|
const char *pe = data_str_val + strlen(data_str_val);
|
@@ -84,13 +91,11 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
|
84
91
|
const char *te = 0;
|
85
92
|
const char *mark = 0;
|
86
93
|
|
87
|
-
int
|
88
|
-
int cs = NUM2INT(oga_ivar_get(self, "@cs"));
|
94
|
+
int lines = state->lines;
|
89
95
|
|
90
96
|
%% write exec;
|
91
97
|
|
92
|
-
|
93
|
-
oga_ivar_set(self, "@cs", INT2NUM(cs));
|
98
|
+
state->lines = lines;
|
94
99
|
|
95
100
|
return Qnil;
|
96
101
|
}
|
@@ -100,14 +105,44 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
|
100
105
|
*/
|
101
106
|
VALUE oga_xml_lexer_reset(VALUE self)
|
102
107
|
{
|
103
|
-
|
104
|
-
|
108
|
+
OgaLexerState *state;
|
109
|
+
|
110
|
+
Data_Get_Struct(self, OgaLexerState, state);
|
111
|
+
|
112
|
+
state->act = 0;
|
113
|
+
state->cs = c_lexer_start;
|
114
|
+
state->lines = 0;
|
115
|
+
state->top = 0;
|
105
116
|
|
106
117
|
return Qnil;
|
107
118
|
}
|
108
119
|
|
120
|
+
/**
|
121
|
+
* Frees the associated lexer state struct.
|
122
|
+
*/
|
123
|
+
void oga_xml_lexer_free(void *state)
|
124
|
+
{
|
125
|
+
free((OgaLexerState *) state);
|
126
|
+
}
|
127
|
+
|
128
|
+
/**
|
129
|
+
* Allocates and wraps the C lexer state struct. This state is used to keep
|
130
|
+
* track of the current position, line numbers, etc.
|
131
|
+
*/
|
132
|
+
VALUE oga_xml_lexer_allocate(VALUE klass)
|
133
|
+
{
|
134
|
+
OgaLexerState *state = malloc(sizeof(OgaLexerState));
|
135
|
+
|
136
|
+
return Data_Wrap_Struct(klass, NULL, oga_xml_lexer_free, state);
|
137
|
+
}
|
138
|
+
|
109
139
|
%%{
|
110
140
|
include base_lexer "base_lexer.rl";
|
141
|
+
|
142
|
+
variable top state->top;
|
143
|
+
variable stack state->stack;
|
144
|
+
variable act state->act;
|
145
|
+
variable cs state->cs;
|
111
146
|
}%%
|
112
147
|
|
113
148
|
void Init_liboga_xml_lexer()
|
@@ -118,4 +153,6 @@ void Init_liboga_xml_lexer()
|
|
118
153
|
|
119
154
|
rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
|
120
155
|
rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
|
156
|
+
|
157
|
+
rb_define_alloc_func(cLexer, oga_xml_lexer_allocate);
|
121
158
|
}
|
@@ -43,14 +43,16 @@ public class Lexer extends RubyObject
|
|
43
43
|
private static byte[] init__java_lexer_actions_0()
|
44
44
|
{
|
45
45
|
return new byte [] {
|
46
|
-
0, 1, 0, 1,
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
46
|
+
0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1,
|
47
|
+
6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1,
|
48
|
+
14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1,
|
49
|
+
20, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1,
|
50
|
+
26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 31, 1,
|
51
|
+
32, 1, 33, 1, 34, 1, 35, 1, 36, 1, 37, 1,
|
52
|
+
38, 1, 39, 1, 42, 1, 43, 1, 44, 1, 45, 1,
|
53
|
+
46, 1, 47, 1, 48, 1, 49, 1, 50, 1, 51, 2,
|
54
|
+
0, 1, 2, 4, 12, 2, 4, 13, 2, 4, 40, 2,
|
55
|
+
4, 41
|
54
56
|
};
|
55
57
|
}
|
56
58
|
|
@@ -62,10 +64,10 @@ private static short[] init__java_lexer_key_offsets_0()
|
|
62
64
|
return new short [] {
|
63
65
|
0, 0, 4, 5, 6, 7, 9, 11, 13, 15, 17, 19,
|
64
66
|
21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 41,
|
65
|
-
51, 60,
|
66
|
-
|
67
|
-
|
68
|
-
|
67
|
+
51, 60, 60, 61, 62, 63, 64, 75, 77, 85, 94, 103,
|
68
|
+
104, 105, 106, 107, 108, 109, 125, 133, 142, 151, 160, 169,
|
69
|
+
178, 187, 196, 205, 214, 223, 234, 242, 243, 251, 260, 277,
|
70
|
+
286, 287, 288, 299
|
69
71
|
};
|
70
72
|
}
|
71
73
|
|
@@ -80,28 +82,27 @@ private static char[] init__java_lexer_trans_keys_0()
|
|
80
82
|
68, 65, 84, 65, 91, 93, 93, 62, 93, 45, 95, 48,
|
81
83
|
57, 65, 90, 97, 122, 45, 58, 62, 95, 48, 57, 65,
|
82
84
|
90, 97, 122, 45, 95, 120, 48, 57, 65, 90, 97, 122,
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
97, 122, 0
|
85
|
+
93, 10, 62, 60, 33, 45, 47, 63, 95, 48, 57, 65,
|
86
|
+
90, 97, 122, 9, 32, 45, 95, 48, 57, 65, 90, 97,
|
87
|
+
122, 45, 95, 109, 48, 57, 65, 90, 97, 122, 45, 95,
|
88
|
+
108, 48, 57, 65, 90, 97, 122, 63, 62, 39, 39, 34,
|
89
|
+
34, 9, 32, 34, 39, 45, 62, 80, 83, 91, 95, 48,
|
90
|
+
57, 65, 90, 97, 122, 45, 95, 48, 57, 65, 90, 97,
|
91
|
+
122, 45, 85, 95, 48, 57, 65, 90, 97, 122, 45, 66,
|
92
|
+
95, 48, 57, 65, 90, 97, 122, 45, 76, 95, 48, 57,
|
93
|
+
65, 90, 97, 122, 45, 73, 95, 48, 57, 65, 90, 97,
|
94
|
+
122, 45, 67, 95, 48, 57, 65, 90, 97, 122, 45, 89,
|
95
|
+
95, 48, 57, 65, 90, 97, 122, 45, 83, 95, 48, 57,
|
96
|
+
65, 90, 97, 122, 45, 84, 95, 48, 57, 65, 90, 97,
|
97
|
+
122, 45, 69, 95, 48, 57, 65, 90, 97, 122, 45, 77,
|
98
|
+
95, 48, 57, 65, 90, 97, 122, 34, 39, 45, 63, 95,
|
99
|
+
48, 57, 65, 90, 97, 122, 45, 95, 48, 57, 65, 90,
|
100
|
+
97, 122, 62, 45, 95, 48, 57, 65, 90, 97, 122, 45,
|
101
|
+
58, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32,
|
102
|
+
34, 39, 45, 47, 61, 62, 95, 48, 57, 65, 90, 97,
|
103
|
+
122, 45, 58, 95, 48, 57, 65, 90, 97, 122, 60, 60,
|
104
|
+
33, 45, 60, 63, 95, 47, 57, 65, 90, 97, 122, 33,
|
105
|
+
45, 60, 63, 95, 47, 57, 65, 90, 97, 122, 0
|
105
106
|
};
|
106
107
|
}
|
107
108
|
|
@@ -113,10 +114,10 @@ private static byte[] init__java_lexer_single_lengths_0()
|
|
113
114
|
return new byte [] {
|
114
115
|
0, 4, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
|
115
116
|
2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 4,
|
116
|
-
3,
|
117
|
-
|
118
|
-
3, 3, 3, 3, 3,
|
119
|
-
|
117
|
+
3, 0, 1, 1, 1, 1, 5, 2, 2, 3, 3, 1,
|
118
|
+
1, 1, 1, 1, 1, 10, 2, 3, 3, 3, 3, 3,
|
119
|
+
3, 3, 3, 3, 3, 5, 2, 1, 2, 3, 11, 3,
|
120
|
+
1, 1, 5, 5
|
120
121
|
};
|
121
122
|
}
|
122
123
|
|
@@ -128,10 +129,10 @@ private static byte[] init__java_lexer_range_lengths_0()
|
|
128
129
|
return new byte [] {
|
129
130
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
130
131
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3,
|
131
|
-
3, 0, 0, 0, 0, 0,
|
132
|
-
|
133
|
-
3, 3, 3, 3, 3, 3, 3,
|
134
|
-
|
132
|
+
3, 0, 0, 0, 0, 0, 3, 0, 3, 3, 3, 0,
|
133
|
+
0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3,
|
134
|
+
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3,
|
135
|
+
0, 0, 3, 3
|
135
136
|
};
|
136
137
|
}
|
137
138
|
|
@@ -143,10 +144,10 @@ private static short[] init__java_lexer_index_offsets_0()
|
|
143
144
|
return new short [] {
|
144
145
|
0, 0, 5, 7, 9, 11, 14, 17, 20, 23, 26, 29,
|
145
146
|
32, 35, 37, 39, 41, 43, 45, 47, 49, 51, 54, 60,
|
146
|
-
68, 75,
|
147
|
-
|
148
|
-
|
149
|
-
|
147
|
+
68, 75, 76, 78, 80, 82, 84, 93, 96, 102, 109, 116,
|
148
|
+
118, 120, 122, 124, 126, 128, 142, 148, 155, 162, 169, 176,
|
149
|
+
183, 190, 197, 204, 211, 218, 227, 233, 235, 241, 248, 263,
|
150
|
+
270, 272, 274, 283
|
150
151
|
};
|
151
152
|
}
|
152
153
|
|
@@ -162,26 +163,25 @@ private static byte[] init__java_lexer_indicies_0()
|
|
162
163
|
0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21,
|
163
164
|
20, 22, 20, 23, 22, 20, 24, 24, 24, 24, 24, 0,
|
164
165
|
24, 25, 26, 24, 24, 24, 24, 0, 27, 27, 28, 27,
|
165
|
-
27, 27, 0, 30, 29,
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
0
|
166
|
+
27, 27, 0, 29, 30, 29, 31, 32, 33, 32, 35, 34,
|
167
|
+
37, 38, 25, 39, 38, 38, 38, 38, 36, 14, 14, 40,
|
168
|
+
27, 27, 27, 27, 27, 41, 27, 27, 43, 27, 27, 27,
|
169
|
+
42, 27, 27, 44, 27, 27, 27, 42, 46, 45, 48, 47,
|
170
|
+
50, 49, 51, 49, 53, 52, 54, 52, 55, 55, 56, 57,
|
171
|
+
58, 59, 60, 61, 62, 58, 58, 58, 58, 32, 58, 58,
|
172
|
+
58, 58, 58, 63, 58, 65, 58, 58, 58, 58, 64, 58,
|
173
|
+
66, 58, 58, 58, 58, 64, 58, 67, 58, 58, 58, 58,
|
174
|
+
64, 58, 68, 58, 58, 58, 58, 64, 58, 69, 58, 58,
|
175
|
+
58, 58, 64, 58, 70, 58, 58, 58, 58, 64, 58, 71,
|
176
|
+
58, 58, 58, 58, 64, 58, 72, 58, 58, 58, 58, 64,
|
177
|
+
58, 73, 58, 58, 58, 58, 64, 58, 69, 58, 58, 58,
|
178
|
+
58, 64, 75, 76, 77, 78, 77, 77, 77, 77, 74, 77,
|
179
|
+
77, 77, 77, 77, 79, 81, 80, 82, 82, 82, 82, 82,
|
180
|
+
32, 82, 84, 82, 82, 82, 82, 83, 85, 31, 86, 85,
|
181
|
+
87, 88, 89, 90, 85, 91, 89, 89, 89, 89, 32, 89,
|
182
|
+
93, 89, 89, 89, 89, 92, 95, 94, 97, 94, 98, 98,
|
183
|
+
97, 98, 98, 98, 98, 98, 94, 99, 99, 97, 99, 99,
|
184
|
+
99, 99, 99, 94, 0
|
185
185
|
};
|
186
186
|
}
|
187
187
|
|
@@ -191,15 +191,15 @@ private static final byte _java_lexer_indicies[] = init__java_lexer_indicies_0()
|
|
191
191
|
private static byte[] init__java_lexer_trans_targs_0()
|
192
192
|
{
|
193
193
|
return new byte [] {
|
194
|
-
|
195
|
-
11, 12,
|
196
|
-
23, 22,
|
197
|
-
|
198
|
-
35,
|
199
|
-
|
200
|
-
51, 52, 53,
|
201
|
-
|
202
|
-
|
194
|
+
29, 2, 6, 13, 3, 4, 5, 29, 7, 8, 9, 10,
|
195
|
+
11, 12, 31, 14, 15, 16, 17, 18, 19, 20, 21, 29,
|
196
|
+
23, 22, 29, 32, 33, 26, 41, 58, 0, 58, 29, 30,
|
197
|
+
29, 1, 29, 24, 29, 29, 29, 34, 32, 35, 36, 35,
|
198
|
+
35, 38, 37, 37, 40, 39, 39, 41, 41, 41, 42, 41,
|
199
|
+
43, 48, 25, 41, 41, 44, 45, 46, 47, 42, 49, 50,
|
200
|
+
51, 52, 53, 53, 53, 54, 55, 53, 53, 53, 57, 56,
|
201
|
+
56, 58, 27, 58, 58, 59, 28, 58, 58, 58, 61, 63,
|
202
|
+
60, 62, 60, 60
|
203
203
|
};
|
204
204
|
}
|
205
205
|
|
@@ -209,15 +209,15 @@ private static final byte _java_lexer_trans_targs[] = init__java_lexer_trans_tar
|
|
209
209
|
private static byte[] init__java_lexer_trans_actions_0()
|
210
210
|
{
|
211
211
|
return new byte [] {
|
212
|
-
|
213
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
214
|
-
0, 0,
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
0, 0,
|
219
|
-
|
220
|
-
|
212
|
+
91, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0,
|
213
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77,
|
214
|
+
0, 0, 81, 107, 0, 0, 23, 55, 0, 65, 83, 7,
|
215
|
+
89, 0, 79, 0, 85, 93, 87, 0, 104, 11, 0, 13,
|
216
|
+
9, 1, 15, 17, 1, 19, 21, 29, 27, 25, 101, 31,
|
217
|
+
0, 0, 0, 35, 33, 0, 0, 0, 0, 98, 0, 0,
|
218
|
+
0, 0, 43, 41, 39, 0, 0, 45, 47, 37, 0, 51,
|
219
|
+
49, 53, 0, 61, 59, 0, 0, 63, 67, 57, 1, 95,
|
220
|
+
73, 95, 71, 69
|
221
221
|
};
|
222
222
|
}
|
223
223
|
|
@@ -229,10 +229,10 @@ private static byte[] init__java_lexer_to_state_actions_0()
|
|
229
229
|
return new byte [] {
|
230
230
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
231
231
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
232
|
-
0, 0, 0, 0, 0,
|
233
|
-
0,
|
234
|
-
0, 0, 0, 0, 0, 0, 0, 3, 0,
|
235
|
-
3, 0,
|
232
|
+
0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3,
|
233
|
+
0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0,
|
234
|
+
0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0,
|
235
|
+
3, 0, 0, 0
|
236
236
|
};
|
237
237
|
}
|
238
238
|
|
@@ -244,10 +244,10 @@ private static byte[] init__java_lexer_from_state_actions_0()
|
|
244
244
|
return new byte [] {
|
245
245
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
246
246
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
247
|
-
0, 0, 0, 0, 0,
|
248
|
-
0,
|
249
|
-
0, 0, 0, 0, 0, 0, 0, 5, 0,
|
250
|
-
5, 0,
|
247
|
+
0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 5,
|
248
|
+
0, 5, 0, 5, 0, 5, 0, 0, 0, 0, 0, 0,
|
249
|
+
0, 0, 0, 0, 0, 5, 0, 0, 5, 0, 5, 0,
|
250
|
+
5, 0, 0, 0
|
251
251
|
};
|
252
252
|
}
|
253
253
|
|
@@ -259,27 +259,29 @@ private static short[] init__java_lexer_eof_trans_0()
|
|
259
259
|
return new short [] {
|
260
260
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
261
261
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
262
|
-
1, 0, 0, 0, 0,
|
263
|
-
|
264
|
-
|
265
|
-
0,
|
262
|
+
1, 0, 0, 0, 0, 0, 37, 41, 42, 43, 43, 0,
|
263
|
+
48, 0, 52, 0, 55, 0, 64, 65, 65, 65, 65, 65,
|
264
|
+
65, 65, 65, 65, 65, 0, 80, 81, 0, 84, 0, 93,
|
265
|
+
0, 97, 97, 97
|
266
266
|
};
|
267
267
|
}
|
268
268
|
|
269
269
|
private static final short _java_lexer_eof_trans[] = init__java_lexer_eof_trans_0();
|
270
270
|
|
271
271
|
|
272
|
-
static final int java_lexer_start =
|
273
|
-
static final int java_lexer_first_final =
|
272
|
+
static final int java_lexer_start = 29;
|
273
|
+
static final int java_lexer_first_final = 29;
|
274
274
|
static final int java_lexer_error = 0;
|
275
275
|
|
276
|
-
static final int java_lexer_en_proc_ins_body =
|
277
|
-
static final int
|
278
|
-
static final int
|
279
|
-
static final int
|
280
|
-
static final int
|
281
|
-
static final int
|
282
|
-
static final int
|
276
|
+
static final int java_lexer_en_proc_ins_body = 35;
|
277
|
+
static final int java_lexer_en_string_squote = 37;
|
278
|
+
static final int java_lexer_en_string_dquote = 39;
|
279
|
+
static final int java_lexer_en_doctype = 41;
|
280
|
+
static final int java_lexer_en_xml_decl = 53;
|
281
|
+
static final int java_lexer_en_element_name = 56;
|
282
|
+
static final int java_lexer_en_element_head = 58;
|
283
|
+
static final int java_lexer_en_text = 60;
|
284
|
+
static final int java_lexer_en_main = 29;
|
283
285
|
|
284
286
|
|
285
287
|
// line 39 "ext/java/org/liboga/xml/Lexer.rl"
|
@@ -287,6 +289,9 @@ static final int java_lexer_en_main = 35;
|
|
287
289
|
/* Used by Ragel to keep track of the current state. */
|
288
290
|
int act;
|
289
291
|
int cs;
|
292
|
+
int top;
|
293
|
+
int lines;
|
294
|
+
int[] stack;
|
290
295
|
|
291
296
|
/**
|
292
297
|
* Sets up the current class in the Ruby runtime.
|
@@ -337,15 +342,16 @@ static final int java_lexer_en_main = 35;
|
|
337
342
|
|
338
343
|
byte[] data = rb_str.getBytes();
|
339
344
|
|
340
|
-
int ts
|
341
|
-
int te
|
342
|
-
int p
|
343
|
-
int mark
|
344
|
-
int
|
345
|
-
int
|
345
|
+
int ts = 0;
|
346
|
+
int te = 0;
|
347
|
+
int p = 0;
|
348
|
+
int mark = 0;
|
349
|
+
int lines = this.lines;
|
350
|
+
int pe = data.length;
|
351
|
+
int eof = data.length;
|
346
352
|
|
347
353
|
|
348
|
-
// line
|
354
|
+
// line 355 "ext/java/org/liboga/xml/Lexer.java"
|
349
355
|
{
|
350
356
|
int _klen;
|
351
357
|
int _trans = 0;
|
@@ -370,11 +376,11 @@ case 1:
|
|
370
376
|
_nacts = (int) _java_lexer_actions[_acts++];
|
371
377
|
while ( _nacts-- > 0 ) {
|
372
378
|
switch ( _java_lexer_actions[_acts++] ) {
|
373
|
-
case
|
379
|
+
case 3:
|
374
380
|
// line 1 "NONE"
|
375
381
|
{ts = p;}
|
376
382
|
break;
|
377
|
-
// line
|
383
|
+
// line 384 "ext/java/org/liboga/xml/Lexer.java"
|
378
384
|
}
|
379
385
|
}
|
380
386
|
|
@@ -439,76 +445,136 @@ case 3:
|
|
439
445
|
switch ( _java_lexer_actions[_acts++] )
|
440
446
|
{
|
441
447
|
case 0:
|
442
|
-
// line
|
448
|
+
// line 40 "ext/ragel/base_lexer.rl"
|
449
|
+
{
|
450
|
+
if ( data[p] == '\n' ) lines++;
|
451
|
+
}
|
452
|
+
break;
|
453
|
+
case 1:
|
454
|
+
// line 353 "ext/ragel/base_lexer.rl"
|
443
455
|
{ mark = p; }
|
444
456
|
break;
|
445
|
-
case
|
457
|
+
case 4:
|
446
458
|
// line 1 "NONE"
|
447
459
|
{te = p+1;}
|
448
460
|
break;
|
449
|
-
case
|
450
|
-
// line
|
461
|
+
case 5:
|
462
|
+
// line 105 "ext/ragel/base_lexer.rl"
|
451
463
|
{te = p+1;{
|
452
464
|
callback("on_text", data, encoding, mark, ts);
|
453
465
|
callback_simple("on_proc_ins_end");
|
454
466
|
|
455
|
-
|
467
|
+
mark = 0;
|
468
|
+
|
469
|
+
( this.cs) = 29;
|
456
470
|
}}
|
457
471
|
break;
|
458
|
-
case 5:
|
459
|
-
// line 107 "ext/ragel/base_lexer.rl"
|
460
|
-
{te = p+1;}
|
461
|
-
break;
|
462
472
|
case 6:
|
463
|
-
// line
|
464
|
-
{te = p;
|
473
|
+
// line 114 "ext/ragel/base_lexer.rl"
|
474
|
+
{te = p+1;}
|
465
475
|
break;
|
466
476
|
case 7:
|
467
|
-
// line
|
468
|
-
{
|
477
|
+
// line 114 "ext/ragel/base_lexer.rl"
|
478
|
+
{te = p;p--;}
|
469
479
|
break;
|
470
480
|
case 8:
|
471
|
-
// line
|
472
|
-
{
|
481
|
+
// line 152 "ext/ragel/base_lexer.rl"
|
482
|
+
{te = p+1;{
|
483
|
+
callback_simple("on_string_squote");
|
484
|
+
|
485
|
+
{( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
|
486
|
+
}}
|
473
487
|
break;
|
474
488
|
case 9:
|
475
|
-
// line
|
489
|
+
// line 126 "ext/ragel/base_lexer.rl"
|
490
|
+
{te = p;p--;{
|
491
|
+
callback("on_string_body", data, encoding, ts, te);
|
492
|
+
|
493
|
+
if ( lines > 0 )
|
494
|
+
{
|
495
|
+
advance_line(lines);
|
496
|
+
|
497
|
+
lines = 0;
|
498
|
+
}
|
499
|
+
}}
|
500
|
+
break;
|
501
|
+
case 10:
|
502
|
+
// line 162 "ext/ragel/base_lexer.rl"
|
503
|
+
{te = p+1;{
|
504
|
+
callback_simple("on_string_dquote");
|
505
|
+
|
506
|
+
{( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
|
507
|
+
}}
|
508
|
+
break;
|
509
|
+
case 11:
|
510
|
+
// line 126 "ext/ragel/base_lexer.rl"
|
511
|
+
{te = p;p--;{
|
512
|
+
callback("on_string_body", data, encoding, ts, te);
|
513
|
+
|
514
|
+
if ( lines > 0 )
|
515
|
+
{
|
516
|
+
advance_line(lines);
|
517
|
+
|
518
|
+
lines = 0;
|
519
|
+
}
|
520
|
+
}}
|
521
|
+
break;
|
522
|
+
case 12:
|
523
|
+
// line 189 "ext/ragel/base_lexer.rl"
|
524
|
+
{( this.act) = 7;}
|
525
|
+
break;
|
526
|
+
case 13:
|
527
|
+
// line 207 "ext/ragel/base_lexer.rl"
|
528
|
+
{( this.act) = 12;}
|
529
|
+
break;
|
530
|
+
case 14:
|
531
|
+
// line 195 "ext/ragel/base_lexer.rl"
|
476
532
|
{te = p+1;{
|
477
533
|
callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
|
478
534
|
}}
|
479
535
|
break;
|
480
|
-
case
|
481
|
-
// line
|
536
|
+
case 15:
|
537
|
+
// line 137 "ext/ragel/base_lexer.rl"
|
482
538
|
{te = p+1;{
|
483
|
-
|
539
|
+
callback_simple("on_string_squote");
|
540
|
+
|
541
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 37; _goto_targ = 2; if (true) continue _goto;}
|
484
542
|
}}
|
485
543
|
break;
|
486
|
-
case
|
487
|
-
// line
|
544
|
+
case 16:
|
545
|
+
// line 143 "ext/ragel/base_lexer.rl"
|
546
|
+
{te = p+1;{
|
547
|
+
callback_simple("on_string_dquote");
|
548
|
+
|
549
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 39; _goto_targ = 2; if (true) continue _goto;}
|
550
|
+
}}
|
551
|
+
break;
|
552
|
+
case 17:
|
553
|
+
// line 205 "ext/ragel/base_lexer.rl"
|
488
554
|
{te = p+1;}
|
489
555
|
break;
|
490
|
-
case
|
491
|
-
// line
|
556
|
+
case 18:
|
557
|
+
// line 211 "ext/ragel/base_lexer.rl"
|
492
558
|
{te = p+1;{
|
493
559
|
callback_simple("on_doctype_end");
|
494
|
-
( this.cs) =
|
560
|
+
( this.cs) = 29;
|
495
561
|
}}
|
496
562
|
break;
|
497
|
-
case
|
498
|
-
// line
|
563
|
+
case 19:
|
564
|
+
// line 207 "ext/ragel/base_lexer.rl"
|
499
565
|
{te = p;p--;{
|
500
566
|
callback("on_doctype_name", data, encoding, ts, te);
|
501
567
|
}}
|
502
568
|
break;
|
503
|
-
case
|
569
|
+
case 20:
|
504
570
|
// line 1 "NONE"
|
505
571
|
{ switch( ( this.act) ) {
|
506
|
-
case
|
572
|
+
case 7:
|
507
573
|
{{p = ((te))-1;}
|
508
574
|
callback("on_doctype_type", data, encoding, ts, te);
|
509
575
|
}
|
510
576
|
break;
|
511
|
-
case
|
577
|
+
case 12:
|
512
578
|
{{p = ((te))-1;}
|
513
579
|
callback("on_doctype_name", data, encoding, ts, te);
|
514
580
|
}
|
@@ -516,214 +582,252 @@ case 3:
|
|
516
582
|
}
|
517
583
|
}
|
518
584
|
break;
|
519
|
-
case
|
520
|
-
// line
|
585
|
+
case 21:
|
586
|
+
// line 231 "ext/ragel/base_lexer.rl"
|
521
587
|
{te = p+1;{
|
522
588
|
callback_simple("on_xml_decl_end");
|
523
|
-
( this.cs) =
|
589
|
+
( this.cs) = 29;
|
524
590
|
}}
|
525
591
|
break;
|
526
|
-
case
|
527
|
-
// line
|
592
|
+
case 22:
|
593
|
+
// line 137 "ext/ragel/base_lexer.rl"
|
528
594
|
{te = p+1;{
|
529
|
-
|
595
|
+
callback_simple("on_string_squote");
|
596
|
+
|
597
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 37; _goto_targ = 2; if (true) continue _goto;}
|
530
598
|
}}
|
531
599
|
break;
|
532
|
-
case
|
533
|
-
// line
|
600
|
+
case 23:
|
601
|
+
// line 143 "ext/ragel/base_lexer.rl"
|
602
|
+
{te = p+1;{
|
603
|
+
callback_simple("on_string_dquote");
|
604
|
+
|
605
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 39; _goto_targ = 2; if (true) continue _goto;}
|
606
|
+
}}
|
607
|
+
break;
|
608
|
+
case 24:
|
609
|
+
// line 244 "ext/ragel/base_lexer.rl"
|
534
610
|
{te = p+1;}
|
535
611
|
break;
|
536
|
-
case
|
537
|
-
// line
|
612
|
+
case 25:
|
613
|
+
// line 237 "ext/ragel/base_lexer.rl"
|
538
614
|
{te = p;p--;{
|
539
615
|
callback("on_attribute", data, encoding, ts, te);
|
540
616
|
}}
|
541
617
|
break;
|
542
|
-
case
|
543
|
-
// line
|
618
|
+
case 26:
|
619
|
+
// line 244 "ext/ragel/base_lexer.rl"
|
544
620
|
{te = p;p--;}
|
545
621
|
break;
|
546
|
-
case
|
547
|
-
// line
|
548
|
-
{{p = ((te))-1;}}
|
549
|
-
break;
|
550
|
-
case 21:
|
551
|
-
// line 228 "ext/ragel/base_lexer.rl"
|
622
|
+
case 27:
|
623
|
+
// line 271 "ext/ragel/base_lexer.rl"
|
552
624
|
{te = p+1;{
|
553
625
|
callback("on_element_ns", data, encoding, ts, te - 1);
|
554
626
|
}}
|
555
627
|
break;
|
556
|
-
case
|
557
|
-
// line
|
628
|
+
case 28:
|
629
|
+
// line 275 "ext/ragel/base_lexer.rl"
|
558
630
|
{te = p;p--;{
|
559
631
|
callback("on_element_name", data, encoding, ts, te);
|
560
|
-
( this.cs) =
|
632
|
+
( this.cs) = 58;
|
561
633
|
}}
|
562
634
|
break;
|
563
|
-
case
|
564
|
-
// line
|
635
|
+
case 29:
|
636
|
+
// line 284 "ext/ragel/base_lexer.rl"
|
565
637
|
{te = p+1;}
|
566
638
|
break;
|
567
|
-
case
|
568
|
-
// line
|
639
|
+
case 30:
|
640
|
+
// line 286 "ext/ragel/base_lexer.rl"
|
569
641
|
{te = p+1;{
|
570
642
|
callback_simple("advance_line");
|
571
643
|
}}
|
572
644
|
break;
|
573
|
-
case
|
574
|
-
// line
|
645
|
+
case 31:
|
646
|
+
// line 291 "ext/ragel/base_lexer.rl"
|
575
647
|
{te = p+1;{
|
576
648
|
callback("on_attribute_ns", data, encoding, ts, te - 1);
|
577
649
|
}}
|
578
650
|
break;
|
579
|
-
case
|
580
|
-
// line
|
651
|
+
case 32:
|
652
|
+
// line 137 "ext/ragel/base_lexer.rl"
|
581
653
|
{te = p+1;{
|
582
|
-
|
654
|
+
callback_simple("on_string_squote");
|
655
|
+
|
656
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 37; _goto_targ = 2; if (true) continue _goto;}
|
583
657
|
}}
|
584
658
|
break;
|
585
|
-
case
|
586
|
-
// line
|
659
|
+
case 33:
|
660
|
+
// line 143 "ext/ragel/base_lexer.rl"
|
661
|
+
{te = p+1;{
|
662
|
+
callback_simple("on_string_dquote");
|
663
|
+
|
664
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 39; _goto_targ = 2; if (true) continue _goto;}
|
665
|
+
}}
|
666
|
+
break;
|
667
|
+
case 34:
|
668
|
+
// line 304 "ext/ragel/base_lexer.rl"
|
587
669
|
{te = p+1;{
|
588
670
|
callback_simple("on_element_open_end");
|
589
|
-
( this.cs) =
|
671
|
+
( this.cs) = 29;
|
590
672
|
}}
|
591
673
|
break;
|
592
|
-
case
|
593
|
-
// line
|
674
|
+
case 35:
|
675
|
+
// line 310 "ext/ragel/base_lexer.rl"
|
594
676
|
{te = p+1;{
|
595
677
|
callback_simple("on_element_end");
|
596
|
-
( this.cs) =
|
678
|
+
( this.cs) = 29;
|
597
679
|
}}
|
598
680
|
break;
|
599
|
-
case
|
600
|
-
// line
|
681
|
+
case 36:
|
682
|
+
// line 295 "ext/ragel/base_lexer.rl"
|
601
683
|
{te = p;p--;{
|
602
684
|
callback("on_attribute", data, encoding, ts, te);
|
603
685
|
}}
|
604
686
|
break;
|
605
|
-
case
|
606
|
-
// line
|
687
|
+
case 37:
|
688
|
+
// line 339 "ext/ragel/base_lexer.rl"
|
607
689
|
{te = p+1;{
|
608
690
|
callback("on_text", data, encoding, ts, te);
|
609
691
|
|
610
|
-
(
|
692
|
+
if ( lines > 0 )
|
693
|
+
{
|
694
|
+
advance_line(lines);
|
695
|
+
|
696
|
+
lines = 0;
|
697
|
+
}
|
698
|
+
|
699
|
+
( this.cs) = 29;
|
611
700
|
}}
|
612
701
|
break;
|
613
|
-
case
|
614
|
-
// line
|
702
|
+
case 38:
|
703
|
+
// line 353 "ext/ragel/base_lexer.rl"
|
615
704
|
{te = p+1;{
|
616
705
|
callback("on_text", data, encoding, ts, mark);
|
617
706
|
|
618
707
|
p = mark - 1;
|
619
708
|
mark = 0;
|
620
709
|
|
621
|
-
(
|
710
|
+
if ( lines > 0 )
|
711
|
+
{
|
712
|
+
advance_line(lines);
|
713
|
+
|
714
|
+
lines = 0;
|
715
|
+
}
|
716
|
+
|
717
|
+
( this.cs) = 29;
|
622
718
|
}}
|
623
719
|
break;
|
624
|
-
case
|
625
|
-
// line
|
720
|
+
case 39:
|
721
|
+
// line 339 "ext/ragel/base_lexer.rl"
|
626
722
|
{te = p;p--;{
|
627
723
|
callback("on_text", data, encoding, ts, te);
|
628
|
-
|
724
|
+
|
725
|
+
if ( lines > 0 )
|
726
|
+
{
|
727
|
+
advance_line(lines);
|
728
|
+
|
729
|
+
lines = 0;
|
730
|
+
}
|
731
|
+
|
732
|
+
( this.cs) = 29;
|
629
733
|
}}
|
630
734
|
break;
|
631
|
-
case
|
632
|
-
// line
|
633
|
-
{( this.act) =
|
735
|
+
case 40:
|
736
|
+
// line 224 "ext/ragel/base_lexer.rl"
|
737
|
+
{( this.act) = 32;}
|
634
738
|
break;
|
635
|
-
case
|
636
|
-
// line
|
637
|
-
{( this.act) =
|
739
|
+
case 41:
|
740
|
+
// line 95 "ext/ragel/base_lexer.rl"
|
741
|
+
{( this.act) = 35;}
|
638
742
|
break;
|
639
|
-
case
|
640
|
-
// line
|
743
|
+
case 42:
|
744
|
+
// line 61 "ext/ragel/base_lexer.rl"
|
641
745
|
{te = p+1;{
|
642
746
|
callback("on_comment", data, encoding, ts + 4, te - 3);
|
643
747
|
}}
|
644
748
|
break;
|
645
|
-
case
|
646
|
-
// line
|
749
|
+
case 43:
|
750
|
+
// line 77 "ext/ragel/base_lexer.rl"
|
647
751
|
{te = p+1;{
|
648
752
|
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
649
753
|
}}
|
650
754
|
break;
|
651
|
-
case
|
652
|
-
// line
|
755
|
+
case 44:
|
756
|
+
// line 259 "ext/ragel/base_lexer.rl"
|
653
757
|
{te = p+1;{
|
654
758
|
callback_simple("on_element_start");
|
655
759
|
p--;
|
656
|
-
( this.cs) =
|
760
|
+
( this.cs) = 56;
|
657
761
|
}}
|
658
762
|
break;
|
659
|
-
case
|
660
|
-
// line
|
763
|
+
case 45:
|
764
|
+
// line 265 "ext/ragel/base_lexer.rl"
|
661
765
|
{te = p+1;{
|
662
766
|
callback_simple("on_element_end");
|
663
767
|
}}
|
664
768
|
break;
|
665
|
-
case
|
666
|
-
// line
|
769
|
+
case 46:
|
770
|
+
// line 324 "ext/ragel/base_lexer.rl"
|
667
771
|
{te = p+1;{
|
668
772
|
p--;
|
669
|
-
( this.cs) =
|
773
|
+
( this.cs) = 60;
|
670
774
|
}}
|
671
775
|
break;
|
672
|
-
case
|
673
|
-
// line
|
776
|
+
case 47:
|
777
|
+
// line 181 "ext/ragel/base_lexer.rl"
|
674
778
|
{te = p;p--;{
|
675
779
|
callback_simple("on_doctype_start");
|
676
|
-
( this.cs) =
|
780
|
+
( this.cs) = 41;
|
677
781
|
}}
|
678
782
|
break;
|
679
|
-
case
|
680
|
-
// line
|
783
|
+
case 48:
|
784
|
+
// line 95 "ext/ragel/base_lexer.rl"
|
681
785
|
{te = p;p--;{
|
682
786
|
callback_simple("on_proc_ins_start");
|
683
787
|
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
684
788
|
|
685
789
|
mark = te;
|
686
790
|
|
687
|
-
( this.cs) =
|
791
|
+
( this.cs) = 35;
|
688
792
|
}}
|
689
793
|
break;
|
690
|
-
case
|
691
|
-
// line
|
794
|
+
case 49:
|
795
|
+
// line 324 "ext/ragel/base_lexer.rl"
|
692
796
|
{te = p;p--;{
|
693
797
|
p--;
|
694
|
-
( this.cs) =
|
798
|
+
( this.cs) = 60;
|
695
799
|
}}
|
696
800
|
break;
|
697
|
-
case
|
698
|
-
// line
|
801
|
+
case 50:
|
802
|
+
// line 324 "ext/ragel/base_lexer.rl"
|
699
803
|
{{p = ((te))-1;}{
|
700
804
|
p--;
|
701
|
-
( this.cs) =
|
805
|
+
( this.cs) = 60;
|
702
806
|
}}
|
703
807
|
break;
|
704
|
-
case
|
808
|
+
case 51:
|
705
809
|
// line 1 "NONE"
|
706
810
|
{ switch( ( this.act) ) {
|
707
|
-
case
|
811
|
+
case 32:
|
708
812
|
{{p = ((te))-1;}
|
709
813
|
callback_simple("on_xml_decl_start");
|
710
|
-
( this.cs) =
|
814
|
+
( this.cs) = 53;
|
711
815
|
}
|
712
816
|
break;
|
713
|
-
case
|
817
|
+
case 35:
|
714
818
|
{{p = ((te))-1;}
|
715
819
|
callback_simple("on_proc_ins_start");
|
716
820
|
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
717
821
|
|
718
822
|
mark = te;
|
719
823
|
|
720
|
-
( this.cs) =
|
824
|
+
( this.cs) = 35;
|
721
825
|
}
|
722
826
|
break;
|
723
827
|
}
|
724
828
|
}
|
725
829
|
break;
|
726
|
-
// line
|
830
|
+
// line 831 "ext/java/org/liboga/xml/Lexer.java"
|
727
831
|
}
|
728
832
|
}
|
729
833
|
}
|
@@ -733,11 +837,11 @@ case 2:
|
|
733
837
|
_nacts = (int) _java_lexer_actions[_acts++];
|
734
838
|
while ( _nacts-- > 0 ) {
|
735
839
|
switch ( _java_lexer_actions[_acts++] ) {
|
736
|
-
case
|
840
|
+
case 2:
|
737
841
|
// line 1 "NONE"
|
738
842
|
{ts = -1;}
|
739
843
|
break;
|
740
|
-
// line
|
844
|
+
// line 845 "ext/java/org/liboga/xml/Lexer.java"
|
741
845
|
}
|
742
846
|
}
|
743
847
|
|
@@ -764,7 +868,9 @@ case 5:
|
|
764
868
|
break; }
|
765
869
|
}
|
766
870
|
|
767
|
-
// line
|
871
|
+
// line 105 "ext/java/org/liboga/xml/Lexer.rl"
|
872
|
+
|
873
|
+
this.lines = lines;
|
768
874
|
|
769
875
|
return context.nil;
|
770
876
|
}
|
@@ -775,8 +881,10 @@ case 5:
|
|
775
881
|
@JRubyMethod
|
776
882
|
public IRubyObject reset_native(ThreadContext context)
|
777
883
|
{
|
778
|
-
this.act
|
779
|
-
this.
|
884
|
+
this.act = 0;
|
885
|
+
this.top = 0;
|
886
|
+
this.stack = new int[4];
|
887
|
+
this.cs = java_lexer_start;
|
780
888
|
|
781
889
|
return context.nil;
|
782
890
|
}
|
@@ -808,8 +916,19 @@ case 5:
|
|
808
916
|
|
809
917
|
this.callMethod(context, name);
|
810
918
|
}
|
919
|
+
|
920
|
+
/**
|
921
|
+
* Advances the line number by `amount` lines.
|
922
|
+
*/
|
923
|
+
public void advance_line(int amount)
|
924
|
+
{
|
925
|
+
ThreadContext context = this.runtime.getCurrentContext();
|
926
|
+
RubyFixnum lines = this.runtime.newFixnum(amount);
|
927
|
+
|
928
|
+
this.callMethod(context, "advance_line", lines);
|
929
|
+
}
|
811
930
|
}
|
812
931
|
|
813
932
|
|
814
|
-
// line
|
933
|
+
// line 172 "ext/java/org/liboga/xml/Lexer.rl"
|
815
934
|
|