oga 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +57 -0
- data/doc/changelog.md +128 -0
- data/doc/css/common.css +5 -4
- data/doc/css_selectors.md +935 -0
- data/doc/manually_creating_documents.md +67 -0
- data/doc/xml_namespaces.md +63 -0
- data/ext/c/lexer.c +745 -628
- data/ext/c/lexer.h +8 -0
- data/ext/c/lexer.rl +44 -7
- data/ext/java/org/liboga/xml/Lexer.java +351 -232
- data/ext/java/org/liboga/xml/Lexer.rl +29 -8
- data/ext/ragel/base_lexer.rl +68 -18
- data/lib/oga.rb +4 -1
- data/lib/oga/css/lexer.rb +743 -0
- data/lib/oga/css/parser.rb +828 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/attribute.rb +3 -1
- data/lib/oga/xml/element.rb +15 -1
- data/lib/oga/xml/entities.rb +60 -0
- data/lib/oga/xml/html_void_elements.rb +2 -0
- data/lib/oga/xml/lexer.rb +36 -28
- data/lib/oga/xml/node_set.rb +22 -0
- data/lib/oga/xml/parser.rb +149 -128
- data/lib/oga/xml/querying.rb +24 -0
- data/lib/oga/xml/sax_parser.rb +55 -1
- data/lib/oga/xml/text.rb +6 -1
- data/lib/oga/xpath/evaluator.rb +138 -101
- data/lib/oga/xpath/lexer.rb +1205 -1294
- data/lib/oga/xpath/parser.rb +228 -204
- metadata +9 -4
- data/lib/oga/xpath/node.rb +0 -10
@@ -40,6 +40,9 @@ public class Lexer extends RubyObject
|
|
40
40
|
/* Used by Ragel to keep track of the current state. */
|
41
41
|
int act;
|
42
42
|
int cs;
|
43
|
+
int top;
|
44
|
+
int lines;
|
45
|
+
int[] stack;
|
43
46
|
|
44
47
|
/**
|
45
48
|
* Sets up the current class in the Ruby runtime.
|
@@ -90,15 +93,18 @@ public class Lexer extends RubyObject
|
|
90
93
|
|
91
94
|
byte[] data = rb_str.getBytes();
|
92
95
|
|
93
|
-
int ts
|
94
|
-
int te
|
95
|
-
int p
|
96
|
-
int mark
|
97
|
-
int
|
98
|
-
int
|
96
|
+
int ts = 0;
|
97
|
+
int te = 0;
|
98
|
+
int p = 0;
|
99
|
+
int mark = 0;
|
100
|
+
int lines = this.lines;
|
101
|
+
int pe = data.length;
|
102
|
+
int eof = data.length;
|
99
103
|
|
100
104
|
%% write exec;
|
101
105
|
|
106
|
+
this.lines = lines;
|
107
|
+
|
102
108
|
return context.nil;
|
103
109
|
}
|
104
110
|
|
@@ -108,8 +114,10 @@ public class Lexer extends RubyObject
|
|
108
114
|
@JRubyMethod
|
109
115
|
public IRubyObject reset_native(ThreadContext context)
|
110
116
|
{
|
111
|
-
this.act
|
112
|
-
this.
|
117
|
+
this.act = 0;
|
118
|
+
this.top = 0;
|
119
|
+
this.stack = new int[4];
|
120
|
+
this.cs = java_lexer_start;
|
113
121
|
|
114
122
|
return context.nil;
|
115
123
|
}
|
@@ -141,11 +149,24 @@ public class Lexer extends RubyObject
|
|
141
149
|
|
142
150
|
this.callMethod(context, name);
|
143
151
|
}
|
152
|
+
|
153
|
+
/**
|
154
|
+
* Advances the line number by `amount` lines.
|
155
|
+
*/
|
156
|
+
public void advance_line(int amount)
|
157
|
+
{
|
158
|
+
ThreadContext context = this.runtime.getCurrentContext();
|
159
|
+
RubyFixnum lines = this.runtime.newFixnum(amount);
|
160
|
+
|
161
|
+
this.callMethod(context, "advance_line", lines);
|
162
|
+
}
|
144
163
|
}
|
145
164
|
|
146
165
|
%%{
|
147
166
|
variable act this.act;
|
148
167
|
variable cs this.cs;
|
168
|
+
variable stack this.stack;
|
169
|
+
variable top this.top;
|
149
170
|
|
150
171
|
include base_lexer "base_lexer.rl";
|
151
172
|
}%%
|
data/ext/ragel/base_lexer.rl
CHANGED
@@ -35,7 +35,12 @@
|
|
35
35
|
# stack.
|
36
36
|
#
|
37
37
|
|
38
|
-
newline
|
38
|
+
newline = '\n' | '\r\n';
|
39
|
+
|
40
|
+
action count_newlines {
|
41
|
+
if ( fc == '\n' ) lines++;
|
42
|
+
}
|
43
|
+
|
39
44
|
whitespace = [ \t];
|
40
45
|
ident_char = [a-zA-Z0-9\-_];
|
41
46
|
identifier = ident_char+;
|
@@ -101,6 +106,8 @@
|
|
101
106
|
callback("on_text", data, encoding, mark, ts);
|
102
107
|
callback_simple("on_proc_ins_end");
|
103
108
|
|
109
|
+
mark = 0;
|
110
|
+
|
104
111
|
fnext main;
|
105
112
|
};
|
106
113
|
|
@@ -116,15 +123,49 @@
|
|
116
123
|
dquote = '"';
|
117
124
|
squote = "'";
|
118
125
|
|
119
|
-
|
120
|
-
|
126
|
+
action emit_string {
|
127
|
+
callback("on_string_body", data, encoding, ts, te);
|
121
128
|
|
122
|
-
|
129
|
+
if ( lines > 0 )
|
130
|
+
{
|
131
|
+
advance_line(lines);
|
123
132
|
|
124
|
-
|
125
|
-
|
133
|
+
lines = 0;
|
134
|
+
}
|
126
135
|
}
|
127
136
|
|
137
|
+
action start_string_squote {
|
138
|
+
callback_simple("on_string_squote");
|
139
|
+
|
140
|
+
fcall string_squote;
|
141
|
+
}
|
142
|
+
|
143
|
+
action start_string_dquote {
|
144
|
+
callback_simple("on_string_dquote");
|
145
|
+
|
146
|
+
fcall string_dquote;
|
147
|
+
}
|
148
|
+
|
149
|
+
string_squote := |*
|
150
|
+
^squote* $count_newlines => emit_string;
|
151
|
+
|
152
|
+
squote => {
|
153
|
+
callback_simple("on_string_squote");
|
154
|
+
|
155
|
+
fret;
|
156
|
+
};
|
157
|
+
*|;
|
158
|
+
|
159
|
+
string_dquote := |*
|
160
|
+
^dquote* $count_newlines => emit_string;
|
161
|
+
|
162
|
+
dquote => {
|
163
|
+
callback_simple("on_string_dquote");
|
164
|
+
|
165
|
+
fret;
|
166
|
+
};
|
167
|
+
*|;
|
168
|
+
|
128
169
|
# DOCTYPES
|
129
170
|
#
|
130
171
|
# http://www.w3.org/TR/html-markup/syntax.html#doctype-syntax
|
@@ -156,7 +197,8 @@
|
|
156
197
|
};
|
157
198
|
|
158
199
|
# Lex the public/system IDs as regular strings.
|
159
|
-
|
200
|
+
squote => start_string_squote;
|
201
|
+
dquote => start_string_dquote;
|
160
202
|
|
161
203
|
# Whitespace inside doctypes is ignored since there's no point in
|
162
204
|
# including it.
|
@@ -196,7 +238,8 @@
|
|
196
238
|
callback("on_attribute", data, encoding, ts, te);
|
197
239
|
};
|
198
240
|
|
199
|
-
|
241
|
+
squote => start_string_squote;
|
242
|
+
dquote => start_string_dquote;
|
200
243
|
|
201
244
|
any;
|
202
245
|
*|;
|
@@ -254,7 +297,8 @@
|
|
254
297
|
};
|
255
298
|
|
256
299
|
# Attribute values.
|
257
|
-
|
300
|
+
squote => start_string_squote;
|
301
|
+
dquote => start_string_dquote;
|
258
302
|
|
259
303
|
# We're done with the open tag of the element.
|
260
304
|
'>' => {
|
@@ -289,14 +333,19 @@
|
|
289
333
|
# long. Because of this "<!" is used instead of "<!--".
|
290
334
|
|
291
335
|
terminate_text = '</' | '<!' | '<?' | element_start;
|
292
|
-
allowed_text = any* -- terminate_text;
|
336
|
+
allowed_text = (any* -- terminate_text) $count_newlines;
|
293
337
|
|
294
338
|
text := |*
|
295
|
-
|
296
|
-
# rules below, but only if those don't match.
|
297
|
-
terminate_text => {
|
339
|
+
terminate_text | allowed_text => {
|
298
340
|
callback("on_text", data, encoding, ts, te);
|
299
341
|
|
342
|
+
if ( lines > 0 )
|
343
|
+
{
|
344
|
+
advance_line(lines);
|
345
|
+
|
346
|
+
lines = 0;
|
347
|
+
}
|
348
|
+
|
300
349
|
fnext main;
|
301
350
|
};
|
302
351
|
|
@@ -307,12 +356,13 @@
|
|
307
356
|
p = mark - 1;
|
308
357
|
mark = 0;
|
309
358
|
|
310
|
-
|
311
|
-
|
359
|
+
if ( lines > 0 )
|
360
|
+
{
|
361
|
+
advance_line(lines);
|
362
|
+
|
363
|
+
lines = 0;
|
364
|
+
}
|
312
365
|
|
313
|
-
# Just regular text.
|
314
|
-
allowed_text => {
|
315
|
-
callback("on_text", data, encoding, ts, te);
|
316
366
|
fnext main;
|
317
367
|
};
|
318
368
|
*|;
|
data/lib/oga.rb
CHANGED
@@ -21,6 +21,7 @@ end
|
|
21
21
|
#:nocov:
|
22
22
|
|
23
23
|
require_relative 'oga/xml/html_void_elements'
|
24
|
+
require_relative 'oga/xml/entities'
|
24
25
|
require_relative 'oga/xml/querying'
|
25
26
|
require_relative 'oga/xml/traversal'
|
26
27
|
require_relative 'oga/xml/node'
|
@@ -43,7 +44,9 @@ require_relative 'oga/xml/pull_parser'
|
|
43
44
|
require_relative 'oga/html/parser'
|
44
45
|
require_relative 'oga/html/sax_parser'
|
45
46
|
|
46
|
-
require_relative 'oga/xpath/node'
|
47
47
|
require_relative 'oga/xpath/lexer'
|
48
48
|
require_relative 'oga/xpath/parser'
|
49
49
|
require_relative 'oga/xpath/evaluator'
|
50
|
+
|
51
|
+
require_relative 'oga/css/lexer'
|
52
|
+
require_relative 'oga/css/parser'
|
@@ -0,0 +1,743 @@
|
|
1
|
+
|
2
|
+
# line 1 "lib/oga/css/lexer.rl"
|
3
|
+
|
4
|
+
# line 3 "lib/oga/css/lexer.rl"
|
5
|
+
module Oga
|
6
|
+
module CSS
|
7
|
+
##
|
8
|
+
# Lexer for turning CSS expressions into a sequence of tokens. Tokens are
|
9
|
+
# returned as arrays with every array having two values:
|
10
|
+
#
|
11
|
+
# 1. The token type as a Symbol
|
12
|
+
# 2. The token value, or nil if there is no value.
|
13
|
+
#
|
14
|
+
# ## Thread Safety
|
15
|
+
#
|
16
|
+
# Similar to the XPath lexer this lexer keeps track of an internal state. As
|
17
|
+
# a result it's not safe to share the same instance of this lexer between
|
18
|
+
# multiple threads. However, no global state is used so you can use separate
|
19
|
+
# instances in threads just fine.
|
20
|
+
#
|
21
|
+
class Lexer
|
22
|
+
|
23
|
+
# line 24 "lib/oga/css/lexer.rb"
|
24
|
+
class << self
|
25
|
+
attr_accessor :_css_lexer_trans_keys
|
26
|
+
private :_css_lexer_trans_keys, :_css_lexer_trans_keys=
|
27
|
+
end
|
28
|
+
self._css_lexer_trans_keys = [
|
29
|
+
0, 0, 43, 57, 118, 118,
|
30
|
+
101, 101, 110, 110, 100,
|
31
|
+
100, 100, 100, 34, 34,
|
32
|
+
61, 61, 39, 39, 61, 61,
|
33
|
+
61, 61, 61, 61, 9,
|
34
|
+
126, 9, 44, 9, 32,
|
35
|
+
0, 0, 9, 32, 0, 0,
|
36
|
+
0, 0, 9, 32, 45,
|
37
|
+
122, 9, 32, 9, 122,
|
38
|
+
9, 32, 48, 57, 43, 57,
|
39
|
+
45, 122, 9, 126, 9,
|
40
|
+
32, 61, 61, 45, 122,
|
41
|
+
0
|
42
|
+
]
|
43
|
+
|
44
|
+
class << self
|
45
|
+
attr_accessor :_css_lexer_key_spans
|
46
|
+
private :_css_lexer_key_spans, :_css_lexer_key_spans=
|
47
|
+
end
|
48
|
+
self._css_lexer_key_spans = [
|
49
|
+
0, 15, 1, 1, 1, 1, 1, 1,
|
50
|
+
1, 1, 1, 1, 1, 118, 36, 24,
|
51
|
+
0, 24, 0, 0, 24, 78, 24, 114,
|
52
|
+
24, 10, 15, 78, 118, 24, 1, 78
|
53
|
+
]
|
54
|
+
|
55
|
+
class << self
|
56
|
+
attr_accessor :_css_lexer_index_offsets
|
57
|
+
private :_css_lexer_index_offsets, :_css_lexer_index_offsets=
|
58
|
+
end
|
59
|
+
self._css_lexer_index_offsets = [
|
60
|
+
0, 0, 16, 18, 20, 22, 24, 26,
|
61
|
+
28, 30, 32, 34, 36, 38, 157, 194,
|
62
|
+
219, 220, 245, 246, 247, 272, 351, 376,
|
63
|
+
491, 516, 527, 543, 622, 741, 766, 768
|
64
|
+
]
|
65
|
+
|
66
|
+
class << self
|
67
|
+
attr_accessor :_css_lexer_indicies
|
68
|
+
private :_css_lexer_indicies, :_css_lexer_indicies=
|
69
|
+
end
|
70
|
+
self._css_lexer_indicies = [
|
71
|
+
1, 0, 1, 0, 0, 2, 2, 2,
|
72
|
+
2, 2, 2, 2, 2, 2, 2, 0,
|
73
|
+
3, 4, 5, 4, 6, 4, 7, 4,
|
74
|
+
8, 4, 10, 9, 11, 4, 10, 12,
|
75
|
+
13, 4, 14, 4, 15, 4, 17, 16,
|
76
|
+
16, 16, 16, 16, 16, 16, 16, 16,
|
77
|
+
16, 16, 16, 16, 16, 16, 16, 16,
|
78
|
+
16, 16, 16, 16, 16, 17, 16, 16,
|
79
|
+
18, 16, 16, 16, 16, 19, 16, 20,
|
80
|
+
21, 22, 16, 23, 16, 16, 16, 16,
|
81
|
+
16, 16, 16, 16, 16, 16, 16, 24,
|
82
|
+
16, 16, 16, 25, 16, 16, 26, 26,
|
83
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
84
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
85
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
86
|
+
27, 16, 16, 16, 26, 16, 26, 26,
|
87
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
88
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
89
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
90
|
+
16, 28, 16, 29, 16, 17, 30, 30,
|
91
|
+
30, 30, 30, 30, 30, 30, 30, 30,
|
92
|
+
30, 30, 30, 30, 30, 30, 30, 30,
|
93
|
+
30, 30, 30, 30, 17, 30, 30, 30,
|
94
|
+
30, 30, 30, 30, 30, 30, 30, 30,
|
95
|
+
22, 30, 22, 31, 31, 31, 31, 31,
|
96
|
+
31, 31, 31, 31, 31, 31, 31, 31,
|
97
|
+
31, 31, 31, 31, 31, 31, 31, 31,
|
98
|
+
31, 22, 31, 32, 21, 33, 33, 33,
|
99
|
+
33, 33, 33, 33, 33, 33, 33, 33,
|
100
|
+
33, 33, 33, 33, 33, 33, 33, 33,
|
101
|
+
33, 33, 33, 21, 33, 34, 35, 25,
|
102
|
+
36, 36, 36, 36, 36, 36, 36, 36,
|
103
|
+
36, 36, 36, 36, 36, 36, 36, 36,
|
104
|
+
36, 36, 36, 36, 36, 36, 25, 36,
|
105
|
+
26, 37, 37, 26, 26, 26, 26, 26,
|
106
|
+
26, 26, 26, 26, 26, 37, 37, 37,
|
107
|
+
37, 37, 37, 37, 26, 26, 26, 26,
|
108
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
109
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
110
|
+
26, 26, 26, 26, 26, 26, 37, 37,
|
111
|
+
37, 37, 26, 37, 26, 26, 26, 26,
|
112
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
113
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
114
|
+
26, 26, 26, 26, 26, 26, 37, 29,
|
115
|
+
38, 38, 38, 38, 38, 38, 38, 38,
|
116
|
+
38, 38, 38, 38, 38, 38, 38, 38,
|
117
|
+
38, 38, 38, 38, 38, 38, 29, 38,
|
118
|
+
39, 4, 4, 4, 4, 4, 4, 4,
|
119
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
120
|
+
4, 4, 4, 4, 4, 4, 4, 39,
|
121
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
122
|
+
40, 41, 1, 4, 42, 4, 4, 2,
|
123
|
+
2, 2, 2, 2, 2, 2, 2, 2,
|
124
|
+
2, 4, 4, 4, 4, 4, 4, 4,
|
125
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
126
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
127
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
128
|
+
43, 43, 4, 4, 4, 4, 43, 4,
|
129
|
+
43, 43, 43, 43, 44, 43, 43, 43,
|
130
|
+
43, 43, 43, 43, 43, 45, 46, 43,
|
131
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
132
|
+
43, 43, 4, 39, 47, 47, 47, 47,
|
133
|
+
47, 47, 47, 47, 47, 47, 47, 47,
|
134
|
+
47, 47, 47, 47, 47, 47, 47, 47,
|
135
|
+
47, 47, 39, 47, 2, 2, 2, 2,
|
136
|
+
2, 2, 2, 2, 2, 2, 48, 1,
|
137
|
+
49, 1, 49, 49, 2, 2, 2, 2,
|
138
|
+
2, 2, 2, 2, 2, 2, 49, 43,
|
139
|
+
50, 50, 43, 43, 43, 43, 43, 43,
|
140
|
+
43, 43, 43, 43, 50, 50, 50, 50,
|
141
|
+
50, 50, 50, 43, 43, 43, 43, 43,
|
142
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
143
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
144
|
+
43, 43, 43, 43, 43, 50, 50, 50,
|
145
|
+
50, 43, 50, 43, 43, 43, 43, 43,
|
146
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
147
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
148
|
+
43, 43, 43, 43, 43, 50, 51, 4,
|
149
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
150
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
151
|
+
4, 4, 4, 4, 4, 51, 4, 9,
|
152
|
+
4, 52, 4, 4, 12, 4, 4, 53,
|
153
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
154
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
155
|
+
4, 4, 54, 4, 4, 4, 55, 55,
|
156
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
157
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
158
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
159
|
+
4, 4, 56, 57, 55, 4, 55, 55,
|
160
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
161
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
162
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
163
|
+
4, 58, 4, 59, 4, 51, 60, 60,
|
164
|
+
60, 60, 60, 60, 60, 60, 60, 60,
|
165
|
+
60, 60, 60, 60, 60, 60, 60, 60,
|
166
|
+
60, 60, 60, 60, 51, 60, 62, 61,
|
167
|
+
55, 61, 61, 55, 55, 55, 55, 55,
|
168
|
+
55, 55, 55, 55, 55, 61, 61, 61,
|
169
|
+
61, 61, 61, 61, 55, 55, 55, 55,
|
170
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
171
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
172
|
+
55, 55, 55, 55, 55, 55, 61, 61,
|
173
|
+
61, 61, 55, 61, 55, 55, 55, 55,
|
174
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
175
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
176
|
+
55, 55, 55, 55, 55, 55, 61, 0
|
177
|
+
]
|
178
|
+
|
179
|
+
class << self
|
180
|
+
attr_accessor :_css_lexer_trans_targs
|
181
|
+
private :_css_lexer_trans_targs, :_css_lexer_trans_targs=
|
182
|
+
end
|
183
|
+
self._css_lexer_trans_targs = [
|
184
|
+
23, 1, 25, 3, 0, 4, 23, 6,
|
185
|
+
23, 7, 28, 28, 9, 28, 28, 28,
|
186
|
+
13, 14, 16, 13, 13, 17, 15, 18,
|
187
|
+
19, 20, 21, 13, 13, 22, 13, 13,
|
188
|
+
13, 13, 13, 13, 13, 13, 13, 24,
|
189
|
+
23, 23, 26, 27, 2, 23, 5, 23,
|
190
|
+
23, 23, 23, 29, 8, 30, 28, 31,
|
191
|
+
28, 10, 11, 12, 28, 28, 28
|
192
|
+
]
|
193
|
+
|
194
|
+
class << self
|
195
|
+
attr_accessor :_css_lexer_trans_actions
|
196
|
+
private :_css_lexer_trans_actions, :_css_lexer_trans_actions=
|
197
|
+
end
|
198
|
+
self._css_lexer_trans_actions = [
|
199
|
+
1, 0, 0, 0, 0, 0, 2, 0,
|
200
|
+
3, 0, 4, 5, 0, 6, 7, 8,
|
201
|
+
11, 0, 0, 12, 13, 0, 0, 0,
|
202
|
+
0, 0, 0, 14, 15, 0, 16, 17,
|
203
|
+
18, 19, 20, 21, 22, 23, 24, 0,
|
204
|
+
26, 27, 28, 0, 0, 29, 0, 30,
|
205
|
+
31, 32, 33, 0, 0, 0, 34, 0,
|
206
|
+
35, 0, 0, 0, 36, 37, 38
|
207
|
+
]
|
208
|
+
|
209
|
+
class << self
|
210
|
+
attr_accessor :_css_lexer_to_state_actions
|
211
|
+
private :_css_lexer_to_state_actions, :_css_lexer_to_state_actions=
|
212
|
+
end
|
213
|
+
self._css_lexer_to_state_actions = [
|
214
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
215
|
+
0, 0, 0, 0, 0, 9, 0, 0,
|
216
|
+
0, 0, 0, 0, 0, 0, 0, 25,
|
217
|
+
0, 0, 0, 0, 9, 0, 0, 0
|
218
|
+
]
|
219
|
+
|
220
|
+
class << self
|
221
|
+
attr_accessor :_css_lexer_from_state_actions
|
222
|
+
private :_css_lexer_from_state_actions, :_css_lexer_from_state_actions=
|
223
|
+
end
|
224
|
+
self._css_lexer_from_state_actions = [
|
225
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
226
|
+
0, 0, 0, 0, 0, 10, 0, 0,
|
227
|
+
0, 0, 0, 0, 0, 0, 0, 10,
|
228
|
+
0, 0, 0, 0, 10, 0, 0, 0
|
229
|
+
]
|
230
|
+
|
231
|
+
class << self
|
232
|
+
attr_accessor :_css_lexer_eof_trans
|
233
|
+
private :_css_lexer_eof_trans, :_css_lexer_eof_trans=
|
234
|
+
end
|
235
|
+
self._css_lexer_eof_trans = [
|
236
|
+
0, 1, 0, 0, 0, 0, 0, 0,
|
237
|
+
0, 0, 0, 0, 0, 0, 31, 32,
|
238
|
+
33, 34, 35, 36, 37, 38, 39, 0,
|
239
|
+
48, 49, 50, 51, 0, 61, 62, 62
|
240
|
+
]
|
241
|
+
|
242
|
+
class << self
|
243
|
+
attr_accessor :css_lexer_start
|
244
|
+
end
|
245
|
+
self.css_lexer_start = 13;
|
246
|
+
class << self
|
247
|
+
attr_accessor :css_lexer_first_final
|
248
|
+
end
|
249
|
+
self.css_lexer_first_final = 13;
|
250
|
+
class << self
|
251
|
+
attr_accessor :css_lexer_error
|
252
|
+
end
|
253
|
+
self.css_lexer_error = 0;
|
254
|
+
|
255
|
+
class << self
|
256
|
+
attr_accessor :css_lexer_en_pseudo_args
|
257
|
+
end
|
258
|
+
self.css_lexer_en_pseudo_args = 23;
|
259
|
+
class << self
|
260
|
+
attr_accessor :css_lexer_en_predicate
|
261
|
+
end
|
262
|
+
self.css_lexer_en_predicate = 28;
|
263
|
+
class << self
|
264
|
+
attr_accessor :css_lexer_en_main
|
265
|
+
end
|
266
|
+
self.css_lexer_en_main = 13;
|
267
|
+
|
268
|
+
|
269
|
+
# line 21 "lib/oga/css/lexer.rl"
|
270
|
+
|
271
|
+
# % fix highlight
|
272
|
+
|
273
|
+
##
|
274
|
+
# @param [String] data The data to lex.
|
275
|
+
#
|
276
|
+
def initialize(data)
|
277
|
+
@data = data
|
278
|
+
end
|
279
|
+
|
280
|
+
##
|
281
|
+
# Gathers all the tokens for the input and returns them as an Array.
|
282
|
+
#
|
283
|
+
# @see [#advance]
|
284
|
+
# @return [Array]
|
285
|
+
#
|
286
|
+
def lex
|
287
|
+
tokens = []
|
288
|
+
|
289
|
+
advance do |type, value|
|
290
|
+
tokens << [type, value]
|
291
|
+
end
|
292
|
+
|
293
|
+
return tokens
|
294
|
+
end
|
295
|
+
|
296
|
+
##
|
297
|
+
# Advances through the input and generates the corresponding tokens. Each
|
298
|
+
# token is yielded to the supplied block.
|
299
|
+
#
|
300
|
+
# This method stores the supplied block in `@block` and resets it after
|
301
|
+
# the lexer loop has finished.
|
302
|
+
#
|
303
|
+
# @see [#add_token]
|
304
|
+
#
|
305
|
+
def advance(&block)
|
306
|
+
@block = block
|
307
|
+
|
308
|
+
data = @data # saves ivar lookups while lexing.
|
309
|
+
ts = nil
|
310
|
+
te = nil
|
311
|
+
stack = []
|
312
|
+
top = 0
|
313
|
+
cs = self.class.css_lexer_start
|
314
|
+
act = 0
|
315
|
+
eof = @data.bytesize
|
316
|
+
p = 0
|
317
|
+
pe = eof
|
318
|
+
|
319
|
+
_css_lexer_eof_trans = self.class.send(:_css_lexer_eof_trans)
|
320
|
+
_css_lexer_from_state_actions = self.class.send(:_css_lexer_from_state_actions)
|
321
|
+
_css_lexer_index_offsets = self.class.send(:_css_lexer_index_offsets)
|
322
|
+
_css_lexer_indicies = self.class.send(:_css_lexer_indicies)
|
323
|
+
_css_lexer_key_spans = self.class.send(:_css_lexer_key_spans)
|
324
|
+
_css_lexer_to_state_actions = self.class.send(:_css_lexer_to_state_actions)
|
325
|
+
_css_lexer_trans_actions = self.class.send(:_css_lexer_trans_actions)
|
326
|
+
_css_lexer_trans_keys = self.class.send(:_css_lexer_trans_keys)
|
327
|
+
_css_lexer_trans_targs = self.class.send(:_css_lexer_trans_targs)
|
328
|
+
|
329
|
+
|
330
|
+
# line 331 "lib/oga/css/lexer.rb"
|
331
|
+
begin
|
332
|
+
testEof = false
|
333
|
+
_slen, _trans, _keys, _inds, _acts, _nacts = nil
|
334
|
+
_goto_level = 0
|
335
|
+
_resume = 10
|
336
|
+
_eof_trans = 15
|
337
|
+
_again = 20
|
338
|
+
_test_eof = 30
|
339
|
+
_out = 40
|
340
|
+
while true
|
341
|
+
if _goto_level <= 0
|
342
|
+
if p == pe
|
343
|
+
_goto_level = _test_eof
|
344
|
+
next
|
345
|
+
end
|
346
|
+
if cs == 0
|
347
|
+
_goto_level = _out
|
348
|
+
next
|
349
|
+
end
|
350
|
+
end
|
351
|
+
if _goto_level <= _resume
|
352
|
+
case _css_lexer_from_state_actions[cs]
|
353
|
+
when 10 then
|
354
|
+
# line 1 "NONE"
|
355
|
+
begin
|
356
|
+
ts = p
|
357
|
+
end
|
358
|
+
# line 359 "lib/oga/css/lexer.rb"
|
359
|
+
end
|
360
|
+
_keys = cs << 1
|
361
|
+
_inds = _css_lexer_index_offsets[cs]
|
362
|
+
_slen = _css_lexer_key_spans[cs]
|
363
|
+
_wide = ( _wide)
|
364
|
+
_wide = (data.getbyte(p) || 0)
|
365
|
+
_trans = if ( _slen > 0 &&
|
366
|
+
_css_lexer_trans_keys[_keys] <= _wide &&
|
367
|
+
_wide <= _css_lexer_trans_keys[_keys + 1]
|
368
|
+
) then
|
369
|
+
_css_lexer_indicies[ _inds + _wide - _css_lexer_trans_keys[_keys] ]
|
370
|
+
else
|
371
|
+
_css_lexer_indicies[ _inds + _slen ]
|
372
|
+
end
|
373
|
+
end
|
374
|
+
if _goto_level <= _eof_trans
|
375
|
+
cs = _css_lexer_trans_targs[_trans]
|
376
|
+
if _css_lexer_trans_actions[_trans] != 0
|
377
|
+
case _css_lexer_trans_actions[_trans]
|
378
|
+
when 29 then
|
379
|
+
# line 249 "lib/oga/css/lexer.rl"
|
380
|
+
begin
|
381
|
+
te = p+1
|
382
|
+
begin add_token(:T_NTH) end
|
383
|
+
end
|
384
|
+
when 3 then
|
385
|
+
# line 251 "lib/oga/css/lexer.rl"
|
386
|
+
begin
|
387
|
+
te = p+1
|
388
|
+
begin add_token(:T_ODD) end
|
389
|
+
end
|
390
|
+
when 2 then
|
391
|
+
# line 252 "lib/oga/css/lexer.rl"
|
392
|
+
begin
|
393
|
+
te = p+1
|
394
|
+
begin add_token(:T_EVEN) end
|
395
|
+
end
|
396
|
+
when 26 then
|
397
|
+
# line 233 "lib/oga/css/lexer.rl"
|
398
|
+
begin
|
399
|
+
te = p+1
|
400
|
+
begin
|
401
|
+
add_token(:T_RPAREN)
|
402
|
+
|
403
|
+
cs = 13;
|
404
|
+
end
|
405
|
+
end
|
406
|
+
when 27 then
|
407
|
+
# line 160 "lib/oga/css/lexer.rl"
|
408
|
+
begin
|
409
|
+
te = p+1
|
410
|
+
begin
|
411
|
+
emit(:T_IDENT, ts, te)
|
412
|
+
end
|
413
|
+
end
|
414
|
+
when 30 then
|
415
|
+
# line 244 "lib/oga/css/lexer.rl"
|
416
|
+
begin
|
417
|
+
te = p
|
418
|
+
p = p - 1; end
|
419
|
+
when 32 then
|
420
|
+
# line 250 "lib/oga/css/lexer.rl"
|
421
|
+
begin
|
422
|
+
te = p
|
423
|
+
p = p - 1; begin add_token(:T_MINUS) end
|
424
|
+
end
|
425
|
+
when 31 then
|
426
|
+
# line 193 "lib/oga/css/lexer.rl"
|
427
|
+
begin
|
428
|
+
te = p
|
429
|
+
p = p - 1; begin
|
430
|
+
value = slice_input(ts, te).to_i
|
431
|
+
|
432
|
+
add_token(:T_INT, value)
|
433
|
+
end
|
434
|
+
end
|
435
|
+
when 33 then
|
436
|
+
# line 160 "lib/oga/css/lexer.rl"
|
437
|
+
begin
|
438
|
+
te = p
|
439
|
+
p = p - 1; begin
|
440
|
+
emit(:T_IDENT, ts, te)
|
441
|
+
end
|
442
|
+
end
|
443
|
+
when 1 then
|
444
|
+
# line 1 "NONE"
|
445
|
+
begin
|
446
|
+
case act
|
447
|
+
when 0 then
|
448
|
+
begin begin
|
449
|
+
cs = 0
|
450
|
+
_goto_level = _again
|
451
|
+
next
|
452
|
+
end
|
453
|
+
end
|
454
|
+
when 3 then
|
455
|
+
begin begin p = ((te))-1; end
|
456
|
+
add_token(:T_MINUS) end
|
457
|
+
end
|
458
|
+
end
|
459
|
+
when 34 then
|
460
|
+
# line 290 "lib/oga/css/lexer.rl"
|
461
|
+
begin
|
462
|
+
te = p+1
|
463
|
+
begin add_token(:T_EQ) end
|
464
|
+
end
|
465
|
+
when 8 then
|
466
|
+
# line 291 "lib/oga/css/lexer.rl"
|
467
|
+
begin
|
468
|
+
te = p+1
|
469
|
+
begin add_token(:T_SPACE_IN) end
|
470
|
+
end
|
471
|
+
when 6 then
|
472
|
+
# line 292 "lib/oga/css/lexer.rl"
|
473
|
+
begin
|
474
|
+
te = p+1
|
475
|
+
begin add_token(:T_STARTS_WITH) end
|
476
|
+
end
|
477
|
+
when 5 then
|
478
|
+
# line 293 "lib/oga/css/lexer.rl"
|
479
|
+
begin
|
480
|
+
te = p+1
|
481
|
+
begin add_token(:T_ENDS_WITH) end
|
482
|
+
end
|
483
|
+
when 38 then
|
484
|
+
# line 294 "lib/oga/css/lexer.rl"
|
485
|
+
begin
|
486
|
+
te = p+1
|
487
|
+
begin add_token(:T_IN) end
|
488
|
+
end
|
489
|
+
when 7 then
|
490
|
+
# line 295 "lib/oga/css/lexer.rl"
|
491
|
+
begin
|
492
|
+
te = p+1
|
493
|
+
begin add_token(:T_HYPHEN_IN) end
|
494
|
+
end
|
495
|
+
when 35 then
|
496
|
+
# line 272 "lib/oga/css/lexer.rl"
|
497
|
+
begin
|
498
|
+
te = p+1
|
499
|
+
begin
|
500
|
+
add_token(:T_RBRACK)
|
501
|
+
|
502
|
+
cs = 13;
|
503
|
+
end
|
504
|
+
end
|
505
|
+
when 4 then
|
506
|
+
# line 212 "lib/oga/css/lexer.rl"
|
507
|
+
begin
|
508
|
+
te = p+1
|
509
|
+
begin
|
510
|
+
emit(:T_STRING, ts + 1, te - 1)
|
511
|
+
end
|
512
|
+
end
|
513
|
+
when 36 then
|
514
|
+
# line 280 "lib/oga/css/lexer.rl"
|
515
|
+
begin
|
516
|
+
te = p
|
517
|
+
p = p - 1; end
|
518
|
+
when 37 then
|
519
|
+
# line 160 "lib/oga/css/lexer.rl"
|
520
|
+
begin
|
521
|
+
te = p
|
522
|
+
p = p - 1; begin
|
523
|
+
emit(:T_IDENT, ts, te)
|
524
|
+
end
|
525
|
+
end
|
526
|
+
when 14 then
|
527
|
+
# line 266 "lib/oga/css/lexer.rl"
|
528
|
+
begin
|
529
|
+
te = p+1
|
530
|
+
begin
|
531
|
+
add_token(:T_LBRACK)
|
532
|
+
|
533
|
+
cs = 28;
|
534
|
+
end
|
535
|
+
end
|
536
|
+
when 15 then
|
537
|
+
# line 145 "lib/oga/css/lexer.rl"
|
538
|
+
begin
|
539
|
+
te = p+1
|
540
|
+
begin
|
541
|
+
add_token(:T_PIPE)
|
542
|
+
end
|
543
|
+
end
|
544
|
+
when 12 then
|
545
|
+
# line 227 "lib/oga/css/lexer.rl"
|
546
|
+
begin
|
547
|
+
te = p+1
|
548
|
+
begin
|
549
|
+
add_token(:T_LPAREN)
|
550
|
+
|
551
|
+
cs = 23;
|
552
|
+
end
|
553
|
+
end
|
554
|
+
when 13 then
|
555
|
+
# line 160 "lib/oga/css/lexer.rl"
|
556
|
+
begin
|
557
|
+
te = p+1
|
558
|
+
begin
|
559
|
+
emit(:T_IDENT, ts, te)
|
560
|
+
end
|
561
|
+
end
|
562
|
+
when 11 then
|
563
|
+
# line 315 "lib/oga/css/lexer.rl"
|
564
|
+
begin
|
565
|
+
te = p+1
|
566
|
+
end
|
567
|
+
when 22 then
|
568
|
+
# line 304 "lib/oga/css/lexer.rl"
|
569
|
+
begin
|
570
|
+
te = p
|
571
|
+
p = p - 1; begin add_token(:T_GREATER) end
|
572
|
+
end
|
573
|
+
when 19 then
|
574
|
+
# line 305 "lib/oga/css/lexer.rl"
|
575
|
+
begin
|
576
|
+
te = p
|
577
|
+
p = p - 1; begin add_token(:T_PLUS) end
|
578
|
+
end
|
579
|
+
when 24 then
|
580
|
+
# line 306 "lib/oga/css/lexer.rl"
|
581
|
+
begin
|
582
|
+
te = p
|
583
|
+
p = p - 1; begin add_token(:T_TILDE) end
|
584
|
+
end
|
585
|
+
when 17 then
|
586
|
+
# line 149 "lib/oga/css/lexer.rl"
|
587
|
+
begin
|
588
|
+
te = p
|
589
|
+
p = p - 1; begin
|
590
|
+
add_token(:T_COMMA)
|
591
|
+
end
|
592
|
+
end
|
593
|
+
when 16 then
|
594
|
+
# line 135 "lib/oga/css/lexer.rl"
|
595
|
+
begin
|
596
|
+
te = p
|
597
|
+
p = p - 1; begin
|
598
|
+
add_token(:T_SPACE)
|
599
|
+
end
|
600
|
+
end
|
601
|
+
when 23 then
|
602
|
+
# line 160 "lib/oga/css/lexer.rl"
|
603
|
+
begin
|
604
|
+
te = p
|
605
|
+
p = p - 1; begin
|
606
|
+
emit(:T_IDENT, ts, te)
|
607
|
+
end
|
608
|
+
end
|
609
|
+
when 18 then
|
610
|
+
# line 139 "lib/oga/css/lexer.rl"
|
611
|
+
begin
|
612
|
+
add_token(:T_HASH) end
|
613
|
+
# line 302 "lib/oga/css/lexer.rl"
|
614
|
+
begin
|
615
|
+
te = p
|
616
|
+
p = p - 1; end
|
617
|
+
when 20 then
|
618
|
+
# line 140 "lib/oga/css/lexer.rl"
|
619
|
+
begin
|
620
|
+
add_token(:T_DOT) end
|
621
|
+
# line 302 "lib/oga/css/lexer.rl"
|
622
|
+
begin
|
623
|
+
te = p
|
624
|
+
p = p - 1; end
|
625
|
+
when 21 then
|
626
|
+
# line 141 "lib/oga/css/lexer.rl"
|
627
|
+
begin
|
628
|
+
add_token(:T_COLON) end
|
629
|
+
# line 302 "lib/oga/css/lexer.rl"
|
630
|
+
begin
|
631
|
+
te = p
|
632
|
+
p = p - 1; end
|
633
|
+
when 28 then
|
634
|
+
# line 1 "NONE"
|
635
|
+
begin
|
636
|
+
te = p+1
|
637
|
+
end
|
638
|
+
# line 250 "lib/oga/css/lexer.rl"
|
639
|
+
begin
|
640
|
+
act = 3; end
|
641
|
+
# line 641 "lib/oga/css/lexer.rb"
|
642
|
+
end
|
643
|
+
end
|
644
|
+
end
|
645
|
+
if _goto_level <= _again
|
646
|
+
case _css_lexer_to_state_actions[cs]
|
647
|
+
when 9 then
|
648
|
+
# line 1 "NONE"
|
649
|
+
begin
|
650
|
+
ts = nil; end
|
651
|
+
when 25 then
|
652
|
+
# line 1 "NONE"
|
653
|
+
begin
|
654
|
+
ts = nil; end
|
655
|
+
# line 1 "NONE"
|
656
|
+
begin
|
657
|
+
act = 0
|
658
|
+
end
|
659
|
+
# line 659 "lib/oga/css/lexer.rb"
|
660
|
+
end
|
661
|
+
|
662
|
+
if cs == 0
|
663
|
+
_goto_level = _out
|
664
|
+
next
|
665
|
+
end
|
666
|
+
p += 1
|
667
|
+
if p != pe
|
668
|
+
_goto_level = _resume
|
669
|
+
next
|
670
|
+
end
|
671
|
+
end
|
672
|
+
if _goto_level <= _test_eof
|
673
|
+
if p == eof
|
674
|
+
if _css_lexer_eof_trans[cs] > 0
|
675
|
+
_trans = _css_lexer_eof_trans[cs] - 1;
|
676
|
+
_goto_level = _eof_trans
|
677
|
+
next;
|
678
|
+
end
|
679
|
+
end
|
680
|
+
|
681
|
+
end
|
682
|
+
if _goto_level <= _out
|
683
|
+
break
|
684
|
+
end
|
685
|
+
end
|
686
|
+
end
|
687
|
+
|
688
|
+
# line 81 "lib/oga/css/lexer.rl"
|
689
|
+
|
690
|
+
# % fix highlight
|
691
|
+
ensure
|
692
|
+
@block = nil
|
693
|
+
end
|
694
|
+
|
695
|
+
private
|
696
|
+
|
697
|
+
##
|
698
|
+
# Emits a token of which the value is based on the supplied start/stop
|
699
|
+
# position.
|
700
|
+
#
|
701
|
+
# @param [Symbol] type The token type.
|
702
|
+
# @param [Fixnum] start
|
703
|
+
# @param [Fixnum] stop
|
704
|
+
#
|
705
|
+
# @see [#text]
|
706
|
+
# @see [#add_token]
|
707
|
+
#
|
708
|
+
def emit(type, start, stop)
|
709
|
+
value = slice_input(start, stop)
|
710
|
+
|
711
|
+
add_token(type, value)
|
712
|
+
end
|
713
|
+
|
714
|
+
##
|
715
|
+
# Returns the text between the specified start and stop position.
|
716
|
+
#
|
717
|
+
# @param [Fixnum] start
|
718
|
+
# @param [Fixnum] stop
|
719
|
+
# @return [String]
|
720
|
+
#
|
721
|
+
def slice_input(start, stop)
|
722
|
+
return @data.byteslice(start, stop - start)
|
723
|
+
end
|
724
|
+
|
725
|
+
##
|
726
|
+
# Yields a new token to the supplied block.
|
727
|
+
#
|
728
|
+
# @param [Symbol] type The token type.
|
729
|
+
# @param [String] value The token value.
|
730
|
+
#
|
731
|
+
# @yieldparam [Symbol] type
|
732
|
+
# @yieldparam [String|NilClass] value
|
733
|
+
#
|
734
|
+
def add_token(type, value = nil)
|
735
|
+
@block.call(type, value)
|
736
|
+
end
|
737
|
+
|
738
|
+
|
739
|
+
# line 317 "lib/oga/css/lexer.rl"
|
740
|
+
|
741
|
+
end # Lexer
|
742
|
+
end # CSS
|
743
|
+
end # Oga
|