oga 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +57 -0
- data/doc/changelog.md +128 -0
- data/doc/css/common.css +5 -4
- data/doc/css_selectors.md +935 -0
- data/doc/manually_creating_documents.md +67 -0
- data/doc/xml_namespaces.md +63 -0
- data/ext/c/lexer.c +745 -628
- data/ext/c/lexer.h +8 -0
- data/ext/c/lexer.rl +44 -7
- data/ext/java/org/liboga/xml/Lexer.java +351 -232
- data/ext/java/org/liboga/xml/Lexer.rl +29 -8
- data/ext/ragel/base_lexer.rl +68 -18
- data/lib/oga.rb +4 -1
- data/lib/oga/css/lexer.rb +743 -0
- data/lib/oga/css/parser.rb +828 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/attribute.rb +3 -1
- data/lib/oga/xml/element.rb +15 -1
- data/lib/oga/xml/entities.rb +60 -0
- data/lib/oga/xml/html_void_elements.rb +2 -0
- data/lib/oga/xml/lexer.rb +36 -28
- data/lib/oga/xml/node_set.rb +22 -0
- data/lib/oga/xml/parser.rb +149 -128
- data/lib/oga/xml/querying.rb +24 -0
- data/lib/oga/xml/sax_parser.rb +55 -1
- data/lib/oga/xml/text.rb +6 -1
- data/lib/oga/xpath/evaluator.rb +138 -101
- data/lib/oga/xpath/lexer.rb +1205 -1294
- data/lib/oga/xpath/parser.rb +228 -204
- metadata +9 -4
- data/lib/oga/xpath/node.rb +0 -10
data/ext/c/lexer.h
CHANGED
data/ext/c/lexer.rl
CHANGED
@@ -22,6 +22,9 @@ on `ts` and `te`) so the macro ignores this argument.
|
|
22
22
|
#define oga_ivar_set(owner, name, value) \
|
23
23
|
rb_ivar_set(owner, rb_intern(name), value)
|
24
24
|
|
25
|
+
#define advance_line(amount) \
|
26
|
+
rb_funcall(self, rb_intern("advance_line"), 1, INT2NUM(amount));
|
27
|
+
|
25
28
|
%%machine c_lexer;
|
26
29
|
|
27
30
|
/**
|
@@ -72,10 +75,14 @@ void liboga_xml_lexer_callback_simple(VALUE self, const char *name)
|
|
72
75
|
*/
|
73
76
|
VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
74
77
|
{
|
78
|
+
OgaLexerState *state;
|
79
|
+
|
75
80
|
/* Make sure that all data passed back to Ruby has the proper encoding. */
|
76
81
|
rb_encoding *encoding = rb_enc_get(data_block);
|
77
82
|
|
78
|
-
char *data_str_val =
|
83
|
+
char *data_str_val = StringValueCStr(data_block);
|
84
|
+
|
85
|
+
Data_Get_Struct(self, OgaLexerState, state);
|
79
86
|
|
80
87
|
const char *p = data_str_val;
|
81
88
|
const char *pe = data_str_val + strlen(data_str_val);
|
@@ -84,13 +91,11 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
|
84
91
|
const char *te = 0;
|
85
92
|
const char *mark = 0;
|
86
93
|
|
87
|
-
int
|
88
|
-
int cs = NUM2INT(oga_ivar_get(self, "@cs"));
|
94
|
+
int lines = state->lines;
|
89
95
|
|
90
96
|
%% write exec;
|
91
97
|
|
92
|
-
|
93
|
-
oga_ivar_set(self, "@cs", INT2NUM(cs));
|
98
|
+
state->lines = lines;
|
94
99
|
|
95
100
|
return Qnil;
|
96
101
|
}
|
@@ -100,14 +105,44 @@ VALUE oga_xml_lexer_advance(VALUE self, VALUE data_block)
|
|
100
105
|
*/
|
101
106
|
VALUE oga_xml_lexer_reset(VALUE self)
|
102
107
|
{
|
103
|
-
|
104
|
-
|
108
|
+
OgaLexerState *state;
|
109
|
+
|
110
|
+
Data_Get_Struct(self, OgaLexerState, state);
|
111
|
+
|
112
|
+
state->act = 0;
|
113
|
+
state->cs = c_lexer_start;
|
114
|
+
state->lines = 0;
|
115
|
+
state->top = 0;
|
105
116
|
|
106
117
|
return Qnil;
|
107
118
|
}
|
108
119
|
|
120
|
+
/**
|
121
|
+
* Frees the associated lexer state struct.
|
122
|
+
*/
|
123
|
+
void oga_xml_lexer_free(void *state)
|
124
|
+
{
|
125
|
+
free((OgaLexerState *) state);
|
126
|
+
}
|
127
|
+
|
128
|
+
/**
|
129
|
+
* Allocates and wraps the C lexer state struct. This state is used to keep
|
130
|
+
* track of the current position, line numbers, etc.
|
131
|
+
*/
|
132
|
+
VALUE oga_xml_lexer_allocate(VALUE klass)
|
133
|
+
{
|
134
|
+
OgaLexerState *state = malloc(sizeof(OgaLexerState));
|
135
|
+
|
136
|
+
return Data_Wrap_Struct(klass, NULL, oga_xml_lexer_free, state);
|
137
|
+
}
|
138
|
+
|
109
139
|
%%{
|
110
140
|
include base_lexer "base_lexer.rl";
|
141
|
+
|
142
|
+
variable top state->top;
|
143
|
+
variable stack state->stack;
|
144
|
+
variable act state->act;
|
145
|
+
variable cs state->cs;
|
111
146
|
}%%
|
112
147
|
|
113
148
|
void Init_liboga_xml_lexer()
|
@@ -118,4 +153,6 @@ void Init_liboga_xml_lexer()
|
|
118
153
|
|
119
154
|
rb_define_method(cLexer, "advance_native", oga_xml_lexer_advance, 1);
|
120
155
|
rb_define_method(cLexer, "reset_native", oga_xml_lexer_reset, 0);
|
156
|
+
|
157
|
+
rb_define_alloc_func(cLexer, oga_xml_lexer_allocate);
|
121
158
|
}
|
@@ -43,14 +43,16 @@ public class Lexer extends RubyObject
|
|
43
43
|
private static byte[] init__java_lexer_actions_0()
|
44
44
|
{
|
45
45
|
return new byte [] {
|
46
|
-
0, 1, 0, 1,
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
46
|
+
0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1,
|
47
|
+
6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1,
|
48
|
+
14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1,
|
49
|
+
20, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1,
|
50
|
+
26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 31, 1,
|
51
|
+
32, 1, 33, 1, 34, 1, 35, 1, 36, 1, 37, 1,
|
52
|
+
38, 1, 39, 1, 42, 1, 43, 1, 44, 1, 45, 1,
|
53
|
+
46, 1, 47, 1, 48, 1, 49, 1, 50, 1, 51, 2,
|
54
|
+
0, 1, 2, 4, 12, 2, 4, 13, 2, 4, 40, 2,
|
55
|
+
4, 41
|
54
56
|
};
|
55
57
|
}
|
56
58
|
|
@@ -62,10 +64,10 @@ private static short[] init__java_lexer_key_offsets_0()
|
|
62
64
|
return new short [] {
|
63
65
|
0, 0, 4, 5, 6, 7, 9, 11, 13, 15, 17, 19,
|
64
66
|
21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 41,
|
65
|
-
51, 60,
|
66
|
-
|
67
|
-
|
68
|
-
|
67
|
+
51, 60, 60, 61, 62, 63, 64, 75, 77, 85, 94, 103,
|
68
|
+
104, 105, 106, 107, 108, 109, 125, 133, 142, 151, 160, 169,
|
69
|
+
178, 187, 196, 205, 214, 223, 234, 242, 243, 251, 260, 277,
|
70
|
+
286, 287, 288, 299
|
69
71
|
};
|
70
72
|
}
|
71
73
|
|
@@ -80,28 +82,27 @@ private static char[] init__java_lexer_trans_keys_0()
|
|
80
82
|
68, 65, 84, 65, 91, 93, 93, 62, 93, 45, 95, 48,
|
81
83
|
57, 65, 90, 97, 122, 45, 58, 62, 95, 48, 57, 65,
|
82
84
|
90, 97, 122, 45, 95, 120, 48, 57, 65, 90, 97, 122,
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
97, 122, 0
|
85
|
+
93, 10, 62, 60, 33, 45, 47, 63, 95, 48, 57, 65,
|
86
|
+
90, 97, 122, 9, 32, 45, 95, 48, 57, 65, 90, 97,
|
87
|
+
122, 45, 95, 109, 48, 57, 65, 90, 97, 122, 45, 95,
|
88
|
+
108, 48, 57, 65, 90, 97, 122, 63, 62, 39, 39, 34,
|
89
|
+
34, 9, 32, 34, 39, 45, 62, 80, 83, 91, 95, 48,
|
90
|
+
57, 65, 90, 97, 122, 45, 95, 48, 57, 65, 90, 97,
|
91
|
+
122, 45, 85, 95, 48, 57, 65, 90, 97, 122, 45, 66,
|
92
|
+
95, 48, 57, 65, 90, 97, 122, 45, 76, 95, 48, 57,
|
93
|
+
65, 90, 97, 122, 45, 73, 95, 48, 57, 65, 90, 97,
|
94
|
+
122, 45, 67, 95, 48, 57, 65, 90, 97, 122, 45, 89,
|
95
|
+
95, 48, 57, 65, 90, 97, 122, 45, 83, 95, 48, 57,
|
96
|
+
65, 90, 97, 122, 45, 84, 95, 48, 57, 65, 90, 97,
|
97
|
+
122, 45, 69, 95, 48, 57, 65, 90, 97, 122, 45, 77,
|
98
|
+
95, 48, 57, 65, 90, 97, 122, 34, 39, 45, 63, 95,
|
99
|
+
48, 57, 65, 90, 97, 122, 45, 95, 48, 57, 65, 90,
|
100
|
+
97, 122, 62, 45, 95, 48, 57, 65, 90, 97, 122, 45,
|
101
|
+
58, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32,
|
102
|
+
34, 39, 45, 47, 61, 62, 95, 48, 57, 65, 90, 97,
|
103
|
+
122, 45, 58, 95, 48, 57, 65, 90, 97, 122, 60, 60,
|
104
|
+
33, 45, 60, 63, 95, 47, 57, 65, 90, 97, 122, 33,
|
105
|
+
45, 60, 63, 95, 47, 57, 65, 90, 97, 122, 0
|
105
106
|
};
|
106
107
|
}
|
107
108
|
|
@@ -113,10 +114,10 @@ private static byte[] init__java_lexer_single_lengths_0()
|
|
113
114
|
return new byte [] {
|
114
115
|
0, 4, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
|
115
116
|
2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 4,
|
116
|
-
3,
|
117
|
-
|
118
|
-
3, 3, 3, 3, 3,
|
119
|
-
|
117
|
+
3, 0, 1, 1, 1, 1, 5, 2, 2, 3, 3, 1,
|
118
|
+
1, 1, 1, 1, 1, 10, 2, 3, 3, 3, 3, 3,
|
119
|
+
3, 3, 3, 3, 3, 5, 2, 1, 2, 3, 11, 3,
|
120
|
+
1, 1, 5, 5
|
120
121
|
};
|
121
122
|
}
|
122
123
|
|
@@ -128,10 +129,10 @@ private static byte[] init__java_lexer_range_lengths_0()
|
|
128
129
|
return new byte [] {
|
129
130
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
130
131
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3,
|
131
|
-
3, 0, 0, 0, 0, 0,
|
132
|
-
|
133
|
-
3, 3, 3, 3, 3, 3, 3,
|
134
|
-
|
132
|
+
3, 0, 0, 0, 0, 0, 3, 0, 3, 3, 3, 0,
|
133
|
+
0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3,
|
134
|
+
3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3,
|
135
|
+
0, 0, 3, 3
|
135
136
|
};
|
136
137
|
}
|
137
138
|
|
@@ -143,10 +144,10 @@ private static short[] init__java_lexer_index_offsets_0()
|
|
143
144
|
return new short [] {
|
144
145
|
0, 0, 5, 7, 9, 11, 14, 17, 20, 23, 26, 29,
|
145
146
|
32, 35, 37, 39, 41, 43, 45, 47, 49, 51, 54, 60,
|
146
|
-
68, 75,
|
147
|
-
|
148
|
-
|
149
|
-
|
147
|
+
68, 75, 76, 78, 80, 82, 84, 93, 96, 102, 109, 116,
|
148
|
+
118, 120, 122, 124, 126, 128, 142, 148, 155, 162, 169, 176,
|
149
|
+
183, 190, 197, 204, 211, 218, 227, 233, 235, 241, 248, 263,
|
150
|
+
270, 272, 274, 283
|
150
151
|
};
|
151
152
|
}
|
152
153
|
|
@@ -162,26 +163,25 @@ private static byte[] init__java_lexer_indicies_0()
|
|
162
163
|
0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21,
|
163
164
|
20, 22, 20, 23, 22, 20, 24, 24, 24, 24, 24, 0,
|
164
165
|
24, 25, 26, 24, 24, 24, 24, 0, 27, 27, 28, 27,
|
165
|
-
27, 27, 0, 30, 29,
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
0
|
166
|
+
27, 27, 0, 29, 30, 29, 31, 32, 33, 32, 35, 34,
|
167
|
+
37, 38, 25, 39, 38, 38, 38, 38, 36, 14, 14, 40,
|
168
|
+
27, 27, 27, 27, 27, 41, 27, 27, 43, 27, 27, 27,
|
169
|
+
42, 27, 27, 44, 27, 27, 27, 42, 46, 45, 48, 47,
|
170
|
+
50, 49, 51, 49, 53, 52, 54, 52, 55, 55, 56, 57,
|
171
|
+
58, 59, 60, 61, 62, 58, 58, 58, 58, 32, 58, 58,
|
172
|
+
58, 58, 58, 63, 58, 65, 58, 58, 58, 58, 64, 58,
|
173
|
+
66, 58, 58, 58, 58, 64, 58, 67, 58, 58, 58, 58,
|
174
|
+
64, 58, 68, 58, 58, 58, 58, 64, 58, 69, 58, 58,
|
175
|
+
58, 58, 64, 58, 70, 58, 58, 58, 58, 64, 58, 71,
|
176
|
+
58, 58, 58, 58, 64, 58, 72, 58, 58, 58, 58, 64,
|
177
|
+
58, 73, 58, 58, 58, 58, 64, 58, 69, 58, 58, 58,
|
178
|
+
58, 64, 75, 76, 77, 78, 77, 77, 77, 77, 74, 77,
|
179
|
+
77, 77, 77, 77, 79, 81, 80, 82, 82, 82, 82, 82,
|
180
|
+
32, 82, 84, 82, 82, 82, 82, 83, 85, 31, 86, 85,
|
181
|
+
87, 88, 89, 90, 85, 91, 89, 89, 89, 89, 32, 89,
|
182
|
+
93, 89, 89, 89, 89, 92, 95, 94, 97, 94, 98, 98,
|
183
|
+
97, 98, 98, 98, 98, 98, 94, 99, 99, 97, 99, 99,
|
184
|
+
99, 99, 99, 94, 0
|
185
185
|
};
|
186
186
|
}
|
187
187
|
|
@@ -191,15 +191,15 @@ private static final byte _java_lexer_indicies[] = init__java_lexer_indicies_0()
|
|
191
191
|
private static byte[] init__java_lexer_trans_targs_0()
|
192
192
|
{
|
193
193
|
return new byte [] {
|
194
|
-
|
195
|
-
11, 12,
|
196
|
-
23, 22,
|
197
|
-
|
198
|
-
35,
|
199
|
-
|
200
|
-
51, 52, 53,
|
201
|
-
|
202
|
-
|
194
|
+
29, 2, 6, 13, 3, 4, 5, 29, 7, 8, 9, 10,
|
195
|
+
11, 12, 31, 14, 15, 16, 17, 18, 19, 20, 21, 29,
|
196
|
+
23, 22, 29, 32, 33, 26, 41, 58, 0, 58, 29, 30,
|
197
|
+
29, 1, 29, 24, 29, 29, 29, 34, 32, 35, 36, 35,
|
198
|
+
35, 38, 37, 37, 40, 39, 39, 41, 41, 41, 42, 41,
|
199
|
+
43, 48, 25, 41, 41, 44, 45, 46, 47, 42, 49, 50,
|
200
|
+
51, 52, 53, 53, 53, 54, 55, 53, 53, 53, 57, 56,
|
201
|
+
56, 58, 27, 58, 58, 59, 28, 58, 58, 58, 61, 63,
|
202
|
+
60, 62, 60, 60
|
203
203
|
};
|
204
204
|
}
|
205
205
|
|
@@ -209,15 +209,15 @@ private static final byte _java_lexer_trans_targs[] = init__java_lexer_trans_tar
|
|
209
209
|
private static byte[] init__java_lexer_trans_actions_0()
|
210
210
|
{
|
211
211
|
return new byte [] {
|
212
|
-
|
213
|
-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
214
|
-
0, 0,
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
0, 0,
|
219
|
-
|
220
|
-
|
212
|
+
91, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0,
|
213
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77,
|
214
|
+
0, 0, 81, 107, 0, 0, 23, 55, 0, 65, 83, 7,
|
215
|
+
89, 0, 79, 0, 85, 93, 87, 0, 104, 11, 0, 13,
|
216
|
+
9, 1, 15, 17, 1, 19, 21, 29, 27, 25, 101, 31,
|
217
|
+
0, 0, 0, 35, 33, 0, 0, 0, 0, 98, 0, 0,
|
218
|
+
0, 0, 43, 41, 39, 0, 0, 45, 47, 37, 0, 51,
|
219
|
+
49, 53, 0, 61, 59, 0, 0, 63, 67, 57, 1, 95,
|
220
|
+
73, 95, 71, 69
|
221
221
|
};
|
222
222
|
}
|
223
223
|
|
@@ -229,10 +229,10 @@ private static byte[] init__java_lexer_to_state_actions_0()
|
|
229
229
|
return new byte [] {
|
230
230
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
231
231
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
232
|
-
0, 0, 0, 0, 0,
|
233
|
-
0,
|
234
|
-
0, 0, 0, 0, 0, 0, 0, 3, 0,
|
235
|
-
3, 0,
|
232
|
+
0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3,
|
233
|
+
0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0,
|
234
|
+
0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0,
|
235
|
+
3, 0, 0, 0
|
236
236
|
};
|
237
237
|
}
|
238
238
|
|
@@ -244,10 +244,10 @@ private static byte[] init__java_lexer_from_state_actions_0()
|
|
244
244
|
return new byte [] {
|
245
245
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
246
246
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
247
|
-
0, 0, 0, 0, 0,
|
248
|
-
0,
|
249
|
-
0, 0, 0, 0, 0, 0, 0, 5, 0,
|
250
|
-
5, 0,
|
247
|
+
0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 5,
|
248
|
+
0, 5, 0, 5, 0, 5, 0, 0, 0, 0, 0, 0,
|
249
|
+
0, 0, 0, 0, 0, 5, 0, 0, 5, 0, 5, 0,
|
250
|
+
5, 0, 0, 0
|
251
251
|
};
|
252
252
|
}
|
253
253
|
|
@@ -259,27 +259,29 @@ private static short[] init__java_lexer_eof_trans_0()
|
|
259
259
|
return new short [] {
|
260
260
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
261
261
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
262
|
-
1, 0, 0, 0, 0,
|
263
|
-
|
264
|
-
|
265
|
-
0,
|
262
|
+
1, 0, 0, 0, 0, 0, 37, 41, 42, 43, 43, 0,
|
263
|
+
48, 0, 52, 0, 55, 0, 64, 65, 65, 65, 65, 65,
|
264
|
+
65, 65, 65, 65, 65, 0, 80, 81, 0, 84, 0, 93,
|
265
|
+
0, 97, 97, 97
|
266
266
|
};
|
267
267
|
}
|
268
268
|
|
269
269
|
private static final short _java_lexer_eof_trans[] = init__java_lexer_eof_trans_0();
|
270
270
|
|
271
271
|
|
272
|
-
static final int java_lexer_start =
|
273
|
-
static final int java_lexer_first_final =
|
272
|
+
static final int java_lexer_start = 29;
|
273
|
+
static final int java_lexer_first_final = 29;
|
274
274
|
static final int java_lexer_error = 0;
|
275
275
|
|
276
|
-
static final int java_lexer_en_proc_ins_body =
|
277
|
-
static final int
|
278
|
-
static final int
|
279
|
-
static final int
|
280
|
-
static final int
|
281
|
-
static final int
|
282
|
-
static final int
|
276
|
+
static final int java_lexer_en_proc_ins_body = 35;
|
277
|
+
static final int java_lexer_en_string_squote = 37;
|
278
|
+
static final int java_lexer_en_string_dquote = 39;
|
279
|
+
static final int java_lexer_en_doctype = 41;
|
280
|
+
static final int java_lexer_en_xml_decl = 53;
|
281
|
+
static final int java_lexer_en_element_name = 56;
|
282
|
+
static final int java_lexer_en_element_head = 58;
|
283
|
+
static final int java_lexer_en_text = 60;
|
284
|
+
static final int java_lexer_en_main = 29;
|
283
285
|
|
284
286
|
|
285
287
|
// line 39 "ext/java/org/liboga/xml/Lexer.rl"
|
@@ -287,6 +289,9 @@ static final int java_lexer_en_main = 35;
|
|
287
289
|
/* Used by Ragel to keep track of the current state. */
|
288
290
|
int act;
|
289
291
|
int cs;
|
292
|
+
int top;
|
293
|
+
int lines;
|
294
|
+
int[] stack;
|
290
295
|
|
291
296
|
/**
|
292
297
|
* Sets up the current class in the Ruby runtime.
|
@@ -337,15 +342,16 @@ static final int java_lexer_en_main = 35;
|
|
337
342
|
|
338
343
|
byte[] data = rb_str.getBytes();
|
339
344
|
|
340
|
-
int ts
|
341
|
-
int te
|
342
|
-
int p
|
343
|
-
int mark
|
344
|
-
int
|
345
|
-
int
|
345
|
+
int ts = 0;
|
346
|
+
int te = 0;
|
347
|
+
int p = 0;
|
348
|
+
int mark = 0;
|
349
|
+
int lines = this.lines;
|
350
|
+
int pe = data.length;
|
351
|
+
int eof = data.length;
|
346
352
|
|
347
353
|
|
348
|
-
// line
|
354
|
+
// line 355 "ext/java/org/liboga/xml/Lexer.java"
|
349
355
|
{
|
350
356
|
int _klen;
|
351
357
|
int _trans = 0;
|
@@ -370,11 +376,11 @@ case 1:
|
|
370
376
|
_nacts = (int) _java_lexer_actions[_acts++];
|
371
377
|
while ( _nacts-- > 0 ) {
|
372
378
|
switch ( _java_lexer_actions[_acts++] ) {
|
373
|
-
case
|
379
|
+
case 3:
|
374
380
|
// line 1 "NONE"
|
375
381
|
{ts = p;}
|
376
382
|
break;
|
377
|
-
// line
|
383
|
+
// line 384 "ext/java/org/liboga/xml/Lexer.java"
|
378
384
|
}
|
379
385
|
}
|
380
386
|
|
@@ -439,76 +445,136 @@ case 3:
|
|
439
445
|
switch ( _java_lexer_actions[_acts++] )
|
440
446
|
{
|
441
447
|
case 0:
|
442
|
-
// line
|
448
|
+
// line 40 "ext/ragel/base_lexer.rl"
|
449
|
+
{
|
450
|
+
if ( data[p] == '\n' ) lines++;
|
451
|
+
}
|
452
|
+
break;
|
453
|
+
case 1:
|
454
|
+
// line 353 "ext/ragel/base_lexer.rl"
|
443
455
|
{ mark = p; }
|
444
456
|
break;
|
445
|
-
case
|
457
|
+
case 4:
|
446
458
|
// line 1 "NONE"
|
447
459
|
{te = p+1;}
|
448
460
|
break;
|
449
|
-
case
|
450
|
-
// line
|
461
|
+
case 5:
|
462
|
+
// line 105 "ext/ragel/base_lexer.rl"
|
451
463
|
{te = p+1;{
|
452
464
|
callback("on_text", data, encoding, mark, ts);
|
453
465
|
callback_simple("on_proc_ins_end");
|
454
466
|
|
455
|
-
|
467
|
+
mark = 0;
|
468
|
+
|
469
|
+
( this.cs) = 29;
|
456
470
|
}}
|
457
471
|
break;
|
458
|
-
case 5:
|
459
|
-
// line 107 "ext/ragel/base_lexer.rl"
|
460
|
-
{te = p+1;}
|
461
|
-
break;
|
462
472
|
case 6:
|
463
|
-
// line
|
464
|
-
{te = p;
|
473
|
+
// line 114 "ext/ragel/base_lexer.rl"
|
474
|
+
{te = p+1;}
|
465
475
|
break;
|
466
476
|
case 7:
|
467
|
-
// line
|
468
|
-
{
|
477
|
+
// line 114 "ext/ragel/base_lexer.rl"
|
478
|
+
{te = p;p--;}
|
469
479
|
break;
|
470
480
|
case 8:
|
471
|
-
// line
|
472
|
-
{
|
481
|
+
// line 152 "ext/ragel/base_lexer.rl"
|
482
|
+
{te = p+1;{
|
483
|
+
callback_simple("on_string_squote");
|
484
|
+
|
485
|
+
{( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
|
486
|
+
}}
|
473
487
|
break;
|
474
488
|
case 9:
|
475
|
-
// line
|
489
|
+
// line 126 "ext/ragel/base_lexer.rl"
|
490
|
+
{te = p;p--;{
|
491
|
+
callback("on_string_body", data, encoding, ts, te);
|
492
|
+
|
493
|
+
if ( lines > 0 )
|
494
|
+
{
|
495
|
+
advance_line(lines);
|
496
|
+
|
497
|
+
lines = 0;
|
498
|
+
}
|
499
|
+
}}
|
500
|
+
break;
|
501
|
+
case 10:
|
502
|
+
// line 162 "ext/ragel/base_lexer.rl"
|
503
|
+
{te = p+1;{
|
504
|
+
callback_simple("on_string_dquote");
|
505
|
+
|
506
|
+
{( this.cs) = ( this.stack)[--( this.top)];_goto_targ = 2; if (true) continue _goto;}
|
507
|
+
}}
|
508
|
+
break;
|
509
|
+
case 11:
|
510
|
+
// line 126 "ext/ragel/base_lexer.rl"
|
511
|
+
{te = p;p--;{
|
512
|
+
callback("on_string_body", data, encoding, ts, te);
|
513
|
+
|
514
|
+
if ( lines > 0 )
|
515
|
+
{
|
516
|
+
advance_line(lines);
|
517
|
+
|
518
|
+
lines = 0;
|
519
|
+
}
|
520
|
+
}}
|
521
|
+
break;
|
522
|
+
case 12:
|
523
|
+
// line 189 "ext/ragel/base_lexer.rl"
|
524
|
+
{( this.act) = 7;}
|
525
|
+
break;
|
526
|
+
case 13:
|
527
|
+
// line 207 "ext/ragel/base_lexer.rl"
|
528
|
+
{( this.act) = 12;}
|
529
|
+
break;
|
530
|
+
case 14:
|
531
|
+
// line 195 "ext/ragel/base_lexer.rl"
|
476
532
|
{te = p+1;{
|
477
533
|
callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
|
478
534
|
}}
|
479
535
|
break;
|
480
|
-
case
|
481
|
-
// line
|
536
|
+
case 15:
|
537
|
+
// line 137 "ext/ragel/base_lexer.rl"
|
482
538
|
{te = p+1;{
|
483
|
-
|
539
|
+
callback_simple("on_string_squote");
|
540
|
+
|
541
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 37; _goto_targ = 2; if (true) continue _goto;}
|
484
542
|
}}
|
485
543
|
break;
|
486
|
-
case
|
487
|
-
// line
|
544
|
+
case 16:
|
545
|
+
// line 143 "ext/ragel/base_lexer.rl"
|
546
|
+
{te = p+1;{
|
547
|
+
callback_simple("on_string_dquote");
|
548
|
+
|
549
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 39; _goto_targ = 2; if (true) continue _goto;}
|
550
|
+
}}
|
551
|
+
break;
|
552
|
+
case 17:
|
553
|
+
// line 205 "ext/ragel/base_lexer.rl"
|
488
554
|
{te = p+1;}
|
489
555
|
break;
|
490
|
-
case
|
491
|
-
// line
|
556
|
+
case 18:
|
557
|
+
// line 211 "ext/ragel/base_lexer.rl"
|
492
558
|
{te = p+1;{
|
493
559
|
callback_simple("on_doctype_end");
|
494
|
-
( this.cs) =
|
560
|
+
( this.cs) = 29;
|
495
561
|
}}
|
496
562
|
break;
|
497
|
-
case
|
498
|
-
// line
|
563
|
+
case 19:
|
564
|
+
// line 207 "ext/ragel/base_lexer.rl"
|
499
565
|
{te = p;p--;{
|
500
566
|
callback("on_doctype_name", data, encoding, ts, te);
|
501
567
|
}}
|
502
568
|
break;
|
503
|
-
case
|
569
|
+
case 20:
|
504
570
|
// line 1 "NONE"
|
505
571
|
{ switch( ( this.act) ) {
|
506
|
-
case
|
572
|
+
case 7:
|
507
573
|
{{p = ((te))-1;}
|
508
574
|
callback("on_doctype_type", data, encoding, ts, te);
|
509
575
|
}
|
510
576
|
break;
|
511
|
-
case
|
577
|
+
case 12:
|
512
578
|
{{p = ((te))-1;}
|
513
579
|
callback("on_doctype_name", data, encoding, ts, te);
|
514
580
|
}
|
@@ -516,214 +582,252 @@ case 3:
|
|
516
582
|
}
|
517
583
|
}
|
518
584
|
break;
|
519
|
-
case
|
520
|
-
// line
|
585
|
+
case 21:
|
586
|
+
// line 231 "ext/ragel/base_lexer.rl"
|
521
587
|
{te = p+1;{
|
522
588
|
callback_simple("on_xml_decl_end");
|
523
|
-
( this.cs) =
|
589
|
+
( this.cs) = 29;
|
524
590
|
}}
|
525
591
|
break;
|
526
|
-
case
|
527
|
-
// line
|
592
|
+
case 22:
|
593
|
+
// line 137 "ext/ragel/base_lexer.rl"
|
528
594
|
{te = p+1;{
|
529
|
-
|
595
|
+
callback_simple("on_string_squote");
|
596
|
+
|
597
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 37; _goto_targ = 2; if (true) continue _goto;}
|
530
598
|
}}
|
531
599
|
break;
|
532
|
-
case
|
533
|
-
// line
|
600
|
+
case 23:
|
601
|
+
// line 143 "ext/ragel/base_lexer.rl"
|
602
|
+
{te = p+1;{
|
603
|
+
callback_simple("on_string_dquote");
|
604
|
+
|
605
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 39; _goto_targ = 2; if (true) continue _goto;}
|
606
|
+
}}
|
607
|
+
break;
|
608
|
+
case 24:
|
609
|
+
// line 244 "ext/ragel/base_lexer.rl"
|
534
610
|
{te = p+1;}
|
535
611
|
break;
|
536
|
-
case
|
537
|
-
// line
|
612
|
+
case 25:
|
613
|
+
// line 237 "ext/ragel/base_lexer.rl"
|
538
614
|
{te = p;p--;{
|
539
615
|
callback("on_attribute", data, encoding, ts, te);
|
540
616
|
}}
|
541
617
|
break;
|
542
|
-
case
|
543
|
-
// line
|
618
|
+
case 26:
|
619
|
+
// line 244 "ext/ragel/base_lexer.rl"
|
544
620
|
{te = p;p--;}
|
545
621
|
break;
|
546
|
-
case
|
547
|
-
// line
|
548
|
-
{{p = ((te))-1;}}
|
549
|
-
break;
|
550
|
-
case 21:
|
551
|
-
// line 228 "ext/ragel/base_lexer.rl"
|
622
|
+
case 27:
|
623
|
+
// line 271 "ext/ragel/base_lexer.rl"
|
552
624
|
{te = p+1;{
|
553
625
|
callback("on_element_ns", data, encoding, ts, te - 1);
|
554
626
|
}}
|
555
627
|
break;
|
556
|
-
case
|
557
|
-
// line
|
628
|
+
case 28:
|
629
|
+
// line 275 "ext/ragel/base_lexer.rl"
|
558
630
|
{te = p;p--;{
|
559
631
|
callback("on_element_name", data, encoding, ts, te);
|
560
|
-
( this.cs) =
|
632
|
+
( this.cs) = 58;
|
561
633
|
}}
|
562
634
|
break;
|
563
|
-
case
|
564
|
-
// line
|
635
|
+
case 29:
|
636
|
+
// line 284 "ext/ragel/base_lexer.rl"
|
565
637
|
{te = p+1;}
|
566
638
|
break;
|
567
|
-
case
|
568
|
-
// line
|
639
|
+
case 30:
|
640
|
+
// line 286 "ext/ragel/base_lexer.rl"
|
569
641
|
{te = p+1;{
|
570
642
|
callback_simple("advance_line");
|
571
643
|
}}
|
572
644
|
break;
|
573
|
-
case
|
574
|
-
// line
|
645
|
+
case 31:
|
646
|
+
// line 291 "ext/ragel/base_lexer.rl"
|
575
647
|
{te = p+1;{
|
576
648
|
callback("on_attribute_ns", data, encoding, ts, te - 1);
|
577
649
|
}}
|
578
650
|
break;
|
579
|
-
case
|
580
|
-
// line
|
651
|
+
case 32:
|
652
|
+
// line 137 "ext/ragel/base_lexer.rl"
|
581
653
|
{te = p+1;{
|
582
|
-
|
654
|
+
callback_simple("on_string_squote");
|
655
|
+
|
656
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 37; _goto_targ = 2; if (true) continue _goto;}
|
583
657
|
}}
|
584
658
|
break;
|
585
|
-
case
|
586
|
-
// line
|
659
|
+
case 33:
|
660
|
+
// line 143 "ext/ragel/base_lexer.rl"
|
661
|
+
{te = p+1;{
|
662
|
+
callback_simple("on_string_dquote");
|
663
|
+
|
664
|
+
{( this.stack)[( this.top)++] = ( this.cs); ( this.cs) = 39; _goto_targ = 2; if (true) continue _goto;}
|
665
|
+
}}
|
666
|
+
break;
|
667
|
+
case 34:
|
668
|
+
// line 304 "ext/ragel/base_lexer.rl"
|
587
669
|
{te = p+1;{
|
588
670
|
callback_simple("on_element_open_end");
|
589
|
-
( this.cs) =
|
671
|
+
( this.cs) = 29;
|
590
672
|
}}
|
591
673
|
break;
|
592
|
-
case
|
593
|
-
// line
|
674
|
+
case 35:
|
675
|
+
// line 310 "ext/ragel/base_lexer.rl"
|
594
676
|
{te = p+1;{
|
595
677
|
callback_simple("on_element_end");
|
596
|
-
( this.cs) =
|
678
|
+
( this.cs) = 29;
|
597
679
|
}}
|
598
680
|
break;
|
599
|
-
case
|
600
|
-
// line
|
681
|
+
case 36:
|
682
|
+
// line 295 "ext/ragel/base_lexer.rl"
|
601
683
|
{te = p;p--;{
|
602
684
|
callback("on_attribute", data, encoding, ts, te);
|
603
685
|
}}
|
604
686
|
break;
|
605
|
-
case
|
606
|
-
// line
|
687
|
+
case 37:
|
688
|
+
// line 339 "ext/ragel/base_lexer.rl"
|
607
689
|
{te = p+1;{
|
608
690
|
callback("on_text", data, encoding, ts, te);
|
609
691
|
|
610
|
-
(
|
692
|
+
if ( lines > 0 )
|
693
|
+
{
|
694
|
+
advance_line(lines);
|
695
|
+
|
696
|
+
lines = 0;
|
697
|
+
}
|
698
|
+
|
699
|
+
( this.cs) = 29;
|
611
700
|
}}
|
612
701
|
break;
|
613
|
-
case
|
614
|
-
// line
|
702
|
+
case 38:
|
703
|
+
// line 353 "ext/ragel/base_lexer.rl"
|
615
704
|
{te = p+1;{
|
616
705
|
callback("on_text", data, encoding, ts, mark);
|
617
706
|
|
618
707
|
p = mark - 1;
|
619
708
|
mark = 0;
|
620
709
|
|
621
|
-
(
|
710
|
+
if ( lines > 0 )
|
711
|
+
{
|
712
|
+
advance_line(lines);
|
713
|
+
|
714
|
+
lines = 0;
|
715
|
+
}
|
716
|
+
|
717
|
+
( this.cs) = 29;
|
622
718
|
}}
|
623
719
|
break;
|
624
|
-
case
|
625
|
-
// line
|
720
|
+
case 39:
|
721
|
+
// line 339 "ext/ragel/base_lexer.rl"
|
626
722
|
{te = p;p--;{
|
627
723
|
callback("on_text", data, encoding, ts, te);
|
628
|
-
|
724
|
+
|
725
|
+
if ( lines > 0 )
|
726
|
+
{
|
727
|
+
advance_line(lines);
|
728
|
+
|
729
|
+
lines = 0;
|
730
|
+
}
|
731
|
+
|
732
|
+
( this.cs) = 29;
|
629
733
|
}}
|
630
734
|
break;
|
631
|
-
case
|
632
|
-
// line
|
633
|
-
{( this.act) =
|
735
|
+
case 40:
|
736
|
+
// line 224 "ext/ragel/base_lexer.rl"
|
737
|
+
{( this.act) = 32;}
|
634
738
|
break;
|
635
|
-
case
|
636
|
-
// line
|
637
|
-
{( this.act) =
|
739
|
+
case 41:
|
740
|
+
// line 95 "ext/ragel/base_lexer.rl"
|
741
|
+
{( this.act) = 35;}
|
638
742
|
break;
|
639
|
-
case
|
640
|
-
// line
|
743
|
+
case 42:
|
744
|
+
// line 61 "ext/ragel/base_lexer.rl"
|
641
745
|
{te = p+1;{
|
642
746
|
callback("on_comment", data, encoding, ts + 4, te - 3);
|
643
747
|
}}
|
644
748
|
break;
|
645
|
-
case
|
646
|
-
// line
|
749
|
+
case 43:
|
750
|
+
// line 77 "ext/ragel/base_lexer.rl"
|
647
751
|
{te = p+1;{
|
648
752
|
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
649
753
|
}}
|
650
754
|
break;
|
651
|
-
case
|
652
|
-
// line
|
755
|
+
case 44:
|
756
|
+
// line 259 "ext/ragel/base_lexer.rl"
|
653
757
|
{te = p+1;{
|
654
758
|
callback_simple("on_element_start");
|
655
759
|
p--;
|
656
|
-
( this.cs) =
|
760
|
+
( this.cs) = 56;
|
657
761
|
}}
|
658
762
|
break;
|
659
|
-
case
|
660
|
-
// line
|
763
|
+
case 45:
|
764
|
+
// line 265 "ext/ragel/base_lexer.rl"
|
661
765
|
{te = p+1;{
|
662
766
|
callback_simple("on_element_end");
|
663
767
|
}}
|
664
768
|
break;
|
665
|
-
case
|
666
|
-
// line
|
769
|
+
case 46:
|
770
|
+
// line 324 "ext/ragel/base_lexer.rl"
|
667
771
|
{te = p+1;{
|
668
772
|
p--;
|
669
|
-
( this.cs) =
|
773
|
+
( this.cs) = 60;
|
670
774
|
}}
|
671
775
|
break;
|
672
|
-
case
|
673
|
-
// line
|
776
|
+
case 47:
|
777
|
+
// line 181 "ext/ragel/base_lexer.rl"
|
674
778
|
{te = p;p--;{
|
675
779
|
callback_simple("on_doctype_start");
|
676
|
-
( this.cs) =
|
780
|
+
( this.cs) = 41;
|
677
781
|
}}
|
678
782
|
break;
|
679
|
-
case
|
680
|
-
// line
|
783
|
+
case 48:
|
784
|
+
// line 95 "ext/ragel/base_lexer.rl"
|
681
785
|
{te = p;p--;{
|
682
786
|
callback_simple("on_proc_ins_start");
|
683
787
|
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
684
788
|
|
685
789
|
mark = te;
|
686
790
|
|
687
|
-
( this.cs) =
|
791
|
+
( this.cs) = 35;
|
688
792
|
}}
|
689
793
|
break;
|
690
|
-
case
|
691
|
-
// line
|
794
|
+
case 49:
|
795
|
+
// line 324 "ext/ragel/base_lexer.rl"
|
692
796
|
{te = p;p--;{
|
693
797
|
p--;
|
694
|
-
( this.cs) =
|
798
|
+
( this.cs) = 60;
|
695
799
|
}}
|
696
800
|
break;
|
697
|
-
case
|
698
|
-
// line
|
801
|
+
case 50:
|
802
|
+
// line 324 "ext/ragel/base_lexer.rl"
|
699
803
|
{{p = ((te))-1;}{
|
700
804
|
p--;
|
701
|
-
( this.cs) =
|
805
|
+
( this.cs) = 60;
|
702
806
|
}}
|
703
807
|
break;
|
704
|
-
case
|
808
|
+
case 51:
|
705
809
|
// line 1 "NONE"
|
706
810
|
{ switch( ( this.act) ) {
|
707
|
-
case
|
811
|
+
case 32:
|
708
812
|
{{p = ((te))-1;}
|
709
813
|
callback_simple("on_xml_decl_start");
|
710
|
-
( this.cs) =
|
814
|
+
( this.cs) = 53;
|
711
815
|
}
|
712
816
|
break;
|
713
|
-
case
|
817
|
+
case 35:
|
714
818
|
{{p = ((te))-1;}
|
715
819
|
callback_simple("on_proc_ins_start");
|
716
820
|
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
717
821
|
|
718
822
|
mark = te;
|
719
823
|
|
720
|
-
( this.cs) =
|
824
|
+
( this.cs) = 35;
|
721
825
|
}
|
722
826
|
break;
|
723
827
|
}
|
724
828
|
}
|
725
829
|
break;
|
726
|
-
// line
|
830
|
+
// line 831 "ext/java/org/liboga/xml/Lexer.java"
|
727
831
|
}
|
728
832
|
}
|
729
833
|
}
|
@@ -733,11 +837,11 @@ case 2:
|
|
733
837
|
_nacts = (int) _java_lexer_actions[_acts++];
|
734
838
|
while ( _nacts-- > 0 ) {
|
735
839
|
switch ( _java_lexer_actions[_acts++] ) {
|
736
|
-
case
|
840
|
+
case 2:
|
737
841
|
// line 1 "NONE"
|
738
842
|
{ts = -1;}
|
739
843
|
break;
|
740
|
-
// line
|
844
|
+
// line 845 "ext/java/org/liboga/xml/Lexer.java"
|
741
845
|
}
|
742
846
|
}
|
743
847
|
|
@@ -764,7 +868,9 @@ case 5:
|
|
764
868
|
break; }
|
765
869
|
}
|
766
870
|
|
767
|
-
// line
|
871
|
+
// line 105 "ext/java/org/liboga/xml/Lexer.rl"
|
872
|
+
|
873
|
+
this.lines = lines;
|
768
874
|
|
769
875
|
return context.nil;
|
770
876
|
}
|
@@ -775,8 +881,10 @@ case 5:
|
|
775
881
|
@JRubyMethod
|
776
882
|
public IRubyObject reset_native(ThreadContext context)
|
777
883
|
{
|
778
|
-
this.act
|
779
|
-
this.
|
884
|
+
this.act = 0;
|
885
|
+
this.top = 0;
|
886
|
+
this.stack = new int[4];
|
887
|
+
this.cs = java_lexer_start;
|
780
888
|
|
781
889
|
return context.nil;
|
782
890
|
}
|
@@ -808,8 +916,19 @@ case 5:
|
|
808
916
|
|
809
917
|
this.callMethod(context, name);
|
810
918
|
}
|
919
|
+
|
920
|
+
/**
|
921
|
+
* Advances the line number by `amount` lines.
|
922
|
+
*/
|
923
|
+
public void advance_line(int amount)
|
924
|
+
{
|
925
|
+
ThreadContext context = this.runtime.getCurrentContext();
|
926
|
+
RubyFixnum lines = this.runtime.newFixnum(amount);
|
927
|
+
|
928
|
+
this.callMethod(context, "advance_line", lines);
|
929
|
+
}
|
811
930
|
}
|
812
931
|
|
813
932
|
|
814
|
-
// line
|
933
|
+
// line 172 "ext/java/org/liboga/xml/Lexer.rl"
|
815
934
|
|