rubyjedi-oga 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +13 -0
- data/LICENSE +362 -0
- data/README.md +317 -0
- data/doc/css/common.css +77 -0
- data/doc/css_selectors.md +935 -0
- data/doc/manually_creating_documents.md +67 -0
- data/doc/migrating_from_nokogiri.md +169 -0
- data/doc/xml_namespaces.md +63 -0
- data/ext/c/extconf.rb +11 -0
- data/ext/c/lexer.c +2595 -0
- data/ext/c/lexer.h +16 -0
- data/ext/c/lexer.rl +198 -0
- data/ext/c/liboga.c +6 -0
- data/ext/c/liboga.h +11 -0
- data/ext/java/Liboga.java +14 -0
- data/ext/java/org/liboga/xml/Lexer.java +1363 -0
- data/ext/java/org/liboga/xml/Lexer.rl +223 -0
- data/ext/ragel/base_lexer.rl +633 -0
- data/lib/oga.rb +57 -0
- data/lib/oga/blacklist.rb +40 -0
- data/lib/oga/css/lexer.rb +743 -0
- data/lib/oga/css/parser.rb +976 -0
- data/lib/oga/entity_decoder.rb +21 -0
- data/lib/oga/html/entities.rb +2150 -0
- data/lib/oga/html/parser.rb +25 -0
- data/lib/oga/html/sax_parser.rb +18 -0
- data/lib/oga/lru.rb +160 -0
- data/lib/oga/oga.rb +57 -0
- data/lib/oga/version.rb +3 -0
- data/lib/oga/whitelist.rb +20 -0
- data/lib/oga/xml/attribute.rb +136 -0
- data/lib/oga/xml/cdata.rb +17 -0
- data/lib/oga/xml/character_node.rb +37 -0
- data/lib/oga/xml/comment.rb +17 -0
- data/lib/oga/xml/default_namespace.rb +13 -0
- data/lib/oga/xml/doctype.rb +82 -0
- data/lib/oga/xml/document.rb +108 -0
- data/lib/oga/xml/element.rb +428 -0
- data/lib/oga/xml/entities.rb +122 -0
- data/lib/oga/xml/html_void_elements.rb +15 -0
- data/lib/oga/xml/lexer.rb +550 -0
- data/lib/oga/xml/namespace.rb +48 -0
- data/lib/oga/xml/node.rb +219 -0
- data/lib/oga/xml/node_set.rb +333 -0
- data/lib/oga/xml/parser.rb +631 -0
- data/lib/oga/xml/processing_instruction.rb +37 -0
- data/lib/oga/xml/pull_parser.rb +175 -0
- data/lib/oga/xml/querying.rb +56 -0
- data/lib/oga/xml/sax_parser.rb +192 -0
- data/lib/oga/xml/text.rb +66 -0
- data/lib/oga/xml/traversal.rb +50 -0
- data/lib/oga/xml/xml_declaration.rb +65 -0
- data/lib/oga/xpath/evaluator.rb +1798 -0
- data/lib/oga/xpath/lexer.rb +1958 -0
- data/lib/oga/xpath/parser.rb +622 -0
- data/oga.gemspec +45 -0
- metadata +227 -0
data/lib/oga.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'ast'
|
2
|
+
require 'set'
|
3
|
+
require 'stringio'
|
4
|
+
require 'thread'
|
5
|
+
|
6
|
+
require 'oga/version'
|
7
|
+
require 'oga/oga'
|
8
|
+
require 'oga/lru'
|
9
|
+
require 'oga/entity_decoder'
|
10
|
+
require 'oga/blacklist'
|
11
|
+
require 'oga/whitelist'
|
12
|
+
|
13
|
+
# Load these first so that the native extensions don't have to define the
|
14
|
+
# Oga::XML namespace.
|
15
|
+
require 'oga/xml/lexer'
|
16
|
+
require 'oga/xml/parser'
|
17
|
+
|
18
|
+
require 'liboga'
|
19
|
+
|
20
|
+
#:nocov:
|
21
|
+
if RUBY_PLATFORM == 'java'
|
22
|
+
org.liboga.Liboga.load(JRuby.runtime)
|
23
|
+
end
|
24
|
+
#:nocov:
|
25
|
+
|
26
|
+
require 'oga/xml/html_void_elements'
|
27
|
+
require 'oga/xml/entities'
|
28
|
+
require 'oga/xml/querying'
|
29
|
+
require 'oga/xml/traversal'
|
30
|
+
require 'oga/xml/node'
|
31
|
+
require 'oga/xml/document'
|
32
|
+
require 'oga/xml/character_node'
|
33
|
+
require 'oga/xml/text'
|
34
|
+
require 'oga/xml/comment'
|
35
|
+
require 'oga/xml/cdata'
|
36
|
+
require 'oga/xml/xml_declaration'
|
37
|
+
require 'oga/xml/processing_instruction'
|
38
|
+
require 'oga/xml/doctype'
|
39
|
+
require 'oga/xml/namespace'
|
40
|
+
require 'oga/xml/default_namespace'
|
41
|
+
require 'oga/xml/attribute'
|
42
|
+
require 'oga/xml/element'
|
43
|
+
require 'oga/xml/node_set'
|
44
|
+
|
45
|
+
require 'oga/xml/sax_parser'
|
46
|
+
require 'oga/xml/pull_parser'
|
47
|
+
|
48
|
+
require 'oga/html/parser'
|
49
|
+
require 'oga/html/sax_parser'
|
50
|
+
require 'oga/html/entities'
|
51
|
+
|
52
|
+
require 'oga/xpath/lexer'
|
53
|
+
require 'oga/xpath/parser'
|
54
|
+
require 'oga/xpath/evaluator'
|
55
|
+
|
56
|
+
require 'oga/css/lexer'
|
57
|
+
require 'oga/css/parser'
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Oga
|
2
|
+
##
|
3
|
+
# @api private
|
4
|
+
#
|
5
|
+
class Blacklist
|
6
|
+
# @return [Set]
|
7
|
+
attr_reader :names
|
8
|
+
|
9
|
+
##
|
10
|
+
# @param [Array] names
|
11
|
+
#
|
12
|
+
def initialize(names)
|
13
|
+
@names = Set.new(names + names.map(&:upcase))
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# @yieldparam [String]
|
18
|
+
#
|
19
|
+
def each
|
20
|
+
names.each do |value|
|
21
|
+
yield value
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# @return [TrueClass|FalseClass]
|
27
|
+
#
|
28
|
+
def allow?(name)
|
29
|
+
!names.include?(name)
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# @param [Oga::Blacklist] other
|
34
|
+
# @return [Oga::Blacklist]
|
35
|
+
#
|
36
|
+
def +(other)
|
37
|
+
self.class.new(names + other.names)
|
38
|
+
end
|
39
|
+
end # Blacklist
|
40
|
+
end # Oga
|
@@ -0,0 +1,743 @@
|
|
1
|
+
|
2
|
+
# line 1 "lib/oga/css/lexer.rl"
|
3
|
+
|
4
|
+
# line 3 "lib/oga/css/lexer.rl"
|
5
|
+
module Oga
|
6
|
+
module CSS
|
7
|
+
##
|
8
|
+
# Lexer for turning CSS expressions into a sequence of tokens. Tokens are
|
9
|
+
# returned as arrays with every array having two values:
|
10
|
+
#
|
11
|
+
# 1. The token type as a Symbol
|
12
|
+
# 2. The token value, or nil if there is no value.
|
13
|
+
#
|
14
|
+
# ## Thread Safety
|
15
|
+
#
|
16
|
+
# Similar to the XPath lexer this lexer keeps track of an internal state. As
|
17
|
+
# a result it's not safe to share the same instance of this lexer between
|
18
|
+
# multiple threads. However, no global state is used so you can use separate
|
19
|
+
# instances in threads just fine.
|
20
|
+
#
|
21
|
+
# @api private
|
22
|
+
#
|
23
|
+
class Lexer
|
24
|
+
|
25
|
+
# line 26 "lib/oga/css/lexer.rb"
|
26
|
+
class << self
|
27
|
+
attr_accessor :_css_lexer_trans_keys
|
28
|
+
private :_css_lexer_trans_keys, :_css_lexer_trans_keys=
|
29
|
+
end
|
30
|
+
self._css_lexer_trans_keys = [
|
31
|
+
0, 0, 43, 57, 118, 118,
|
32
|
+
101, 101, 110, 110, 100,
|
33
|
+
100, 100, 100, 34, 34,
|
34
|
+
61, 61, 39, 39, 61, 61,
|
35
|
+
61, 61, 61, 61, 9,
|
36
|
+
126, 9, 44, 9, 32,
|
37
|
+
0, 0, 9, 32, 0, 0,
|
38
|
+
0, 0, 9, 32, 45,
|
39
|
+
122, 9, 32, 9, 122,
|
40
|
+
9, 32, 48, 57, 43, 57,
|
41
|
+
45, 122, 9, 126, 9,
|
42
|
+
32, 61, 61, 45, 122,
|
43
|
+
0
|
44
|
+
]
|
45
|
+
|
46
|
+
class << self
|
47
|
+
attr_accessor :_css_lexer_key_spans
|
48
|
+
private :_css_lexer_key_spans, :_css_lexer_key_spans=
|
49
|
+
end
|
50
|
+
self._css_lexer_key_spans = [
|
51
|
+
0, 15, 1, 1, 1, 1, 1, 1,
|
52
|
+
1, 1, 1, 1, 1, 118, 36, 24,
|
53
|
+
0, 24, 0, 0, 24, 78, 24, 114,
|
54
|
+
24, 10, 15, 78, 118, 24, 1, 78
|
55
|
+
]
|
56
|
+
|
57
|
+
class << self
|
58
|
+
attr_accessor :_css_lexer_index_offsets
|
59
|
+
private :_css_lexer_index_offsets, :_css_lexer_index_offsets=
|
60
|
+
end
|
61
|
+
self._css_lexer_index_offsets = [
|
62
|
+
0, 0, 16, 18, 20, 22, 24, 26,
|
63
|
+
28, 30, 32, 34, 36, 38, 157, 194,
|
64
|
+
219, 220, 245, 246, 247, 272, 351, 376,
|
65
|
+
491, 516, 527, 543, 622, 741, 766, 768
|
66
|
+
]
|
67
|
+
|
68
|
+
class << self
|
69
|
+
attr_accessor :_css_lexer_indicies
|
70
|
+
private :_css_lexer_indicies, :_css_lexer_indicies=
|
71
|
+
end
|
72
|
+
self._css_lexer_indicies = [
|
73
|
+
1, 0, 1, 0, 0, 2, 2, 2,
|
74
|
+
2, 2, 2, 2, 2, 2, 2, 0,
|
75
|
+
3, 4, 5, 4, 6, 4, 7, 4,
|
76
|
+
8, 4, 10, 9, 11, 4, 10, 12,
|
77
|
+
13, 4, 14, 4, 15, 4, 17, 16,
|
78
|
+
16, 16, 16, 16, 16, 16, 16, 16,
|
79
|
+
16, 16, 16, 16, 16, 16, 16, 16,
|
80
|
+
16, 16, 16, 16, 16, 17, 16, 16,
|
81
|
+
18, 16, 16, 16, 16, 19, 16, 20,
|
82
|
+
21, 22, 16, 23, 16, 16, 16, 16,
|
83
|
+
16, 16, 16, 16, 16, 16, 16, 24,
|
84
|
+
16, 16, 16, 25, 16, 16, 26, 26,
|
85
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
86
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
87
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
88
|
+
27, 16, 16, 16, 26, 16, 26, 26,
|
89
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
90
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
91
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
92
|
+
16, 28, 16, 29, 16, 17, 30, 30,
|
93
|
+
30, 30, 30, 30, 30, 30, 30, 30,
|
94
|
+
30, 30, 30, 30, 30, 30, 30, 30,
|
95
|
+
30, 30, 30, 30, 17, 30, 30, 30,
|
96
|
+
30, 30, 30, 30, 30, 30, 30, 30,
|
97
|
+
22, 30, 22, 31, 31, 31, 31, 31,
|
98
|
+
31, 31, 31, 31, 31, 31, 31, 31,
|
99
|
+
31, 31, 31, 31, 31, 31, 31, 31,
|
100
|
+
31, 22, 31, 32, 21, 33, 33, 33,
|
101
|
+
33, 33, 33, 33, 33, 33, 33, 33,
|
102
|
+
33, 33, 33, 33, 33, 33, 33, 33,
|
103
|
+
33, 33, 33, 21, 33, 34, 35, 25,
|
104
|
+
36, 36, 36, 36, 36, 36, 36, 36,
|
105
|
+
36, 36, 36, 36, 36, 36, 36, 36,
|
106
|
+
36, 36, 36, 36, 36, 36, 25, 36,
|
107
|
+
26, 37, 37, 26, 26, 26, 26, 26,
|
108
|
+
26, 26, 26, 26, 26, 37, 37, 37,
|
109
|
+
37, 37, 37, 37, 26, 26, 26, 26,
|
110
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
111
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
112
|
+
26, 26, 26, 26, 26, 26, 37, 37,
|
113
|
+
37, 37, 26, 37, 26, 26, 26, 26,
|
114
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
115
|
+
26, 26, 26, 26, 26, 26, 26, 26,
|
116
|
+
26, 26, 26, 26, 26, 26, 37, 29,
|
117
|
+
38, 38, 38, 38, 38, 38, 38, 38,
|
118
|
+
38, 38, 38, 38, 38, 38, 38, 38,
|
119
|
+
38, 38, 38, 38, 38, 38, 29, 38,
|
120
|
+
39, 4, 4, 4, 4, 4, 4, 4,
|
121
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
122
|
+
4, 4, 4, 4, 4, 4, 4, 39,
|
123
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
124
|
+
40, 41, 1, 4, 42, 4, 4, 2,
|
125
|
+
2, 2, 2, 2, 2, 2, 2, 2,
|
126
|
+
2, 4, 4, 4, 4, 4, 4, 4,
|
127
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
128
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
129
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
130
|
+
43, 43, 4, 4, 4, 4, 43, 4,
|
131
|
+
43, 43, 43, 43, 44, 43, 43, 43,
|
132
|
+
43, 43, 43, 43, 43, 45, 46, 43,
|
133
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
134
|
+
43, 43, 4, 39, 47, 47, 47, 47,
|
135
|
+
47, 47, 47, 47, 47, 47, 47, 47,
|
136
|
+
47, 47, 47, 47, 47, 47, 47, 47,
|
137
|
+
47, 47, 39, 47, 2, 2, 2, 2,
|
138
|
+
2, 2, 2, 2, 2, 2, 48, 1,
|
139
|
+
49, 1, 49, 49, 2, 2, 2, 2,
|
140
|
+
2, 2, 2, 2, 2, 2, 49, 43,
|
141
|
+
50, 50, 43, 43, 43, 43, 43, 43,
|
142
|
+
43, 43, 43, 43, 50, 50, 50, 50,
|
143
|
+
50, 50, 50, 43, 43, 43, 43, 43,
|
144
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
145
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
146
|
+
43, 43, 43, 43, 43, 50, 50, 50,
|
147
|
+
50, 43, 50, 43, 43, 43, 43, 43,
|
148
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
149
|
+
43, 43, 43, 43, 43, 43, 43, 43,
|
150
|
+
43, 43, 43, 43, 43, 50, 51, 4,
|
151
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
152
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
153
|
+
4, 4, 4, 4, 4, 51, 4, 9,
|
154
|
+
4, 52, 4, 4, 12, 4, 4, 53,
|
155
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
156
|
+
4, 4, 4, 4, 4, 4, 4, 4,
|
157
|
+
4, 4, 54, 4, 4, 4, 55, 55,
|
158
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
159
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
160
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
161
|
+
4, 4, 56, 57, 55, 4, 55, 55,
|
162
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
163
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
164
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
165
|
+
4, 58, 4, 59, 4, 51, 60, 60,
|
166
|
+
60, 60, 60, 60, 60, 60, 60, 60,
|
167
|
+
60, 60, 60, 60, 60, 60, 60, 60,
|
168
|
+
60, 60, 60, 60, 51, 60, 62, 61,
|
169
|
+
55, 61, 61, 55, 55, 55, 55, 55,
|
170
|
+
55, 55, 55, 55, 55, 61, 61, 61,
|
171
|
+
61, 61, 61, 61, 55, 55, 55, 55,
|
172
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
173
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
174
|
+
55, 55, 55, 55, 55, 55, 61, 61,
|
175
|
+
61, 61, 55, 61, 55, 55, 55, 55,
|
176
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
177
|
+
55, 55, 55, 55, 55, 55, 55, 55,
|
178
|
+
55, 55, 55, 55, 55, 55, 61, 0
|
179
|
+
]
|
180
|
+
|
181
|
+
class << self
|
182
|
+
attr_accessor :_css_lexer_trans_targs
|
183
|
+
private :_css_lexer_trans_targs, :_css_lexer_trans_targs=
|
184
|
+
end
|
185
|
+
self._css_lexer_trans_targs = [
|
186
|
+
23, 1, 25, 3, 0, 4, 23, 6,
|
187
|
+
23, 7, 28, 28, 9, 28, 28, 28,
|
188
|
+
13, 14, 16, 13, 13, 17, 15, 18,
|
189
|
+
19, 20, 21, 13, 13, 22, 13, 13,
|
190
|
+
13, 13, 13, 13, 13, 13, 13, 24,
|
191
|
+
23, 23, 26, 27, 2, 23, 5, 23,
|
192
|
+
23, 23, 23, 29, 8, 30, 28, 31,
|
193
|
+
28, 10, 11, 12, 28, 28, 28
|
194
|
+
]
|
195
|
+
|
196
|
+
class << self
|
197
|
+
attr_accessor :_css_lexer_trans_actions
|
198
|
+
private :_css_lexer_trans_actions, :_css_lexer_trans_actions=
|
199
|
+
end
|
200
|
+
self._css_lexer_trans_actions = [
|
201
|
+
1, 0, 0, 0, 0, 0, 2, 0,
|
202
|
+
3, 0, 4, 5, 0, 6, 7, 8,
|
203
|
+
11, 0, 0, 12, 13, 0, 0, 0,
|
204
|
+
0, 0, 0, 14, 15, 0, 16, 17,
|
205
|
+
18, 19, 20, 21, 22, 23, 24, 0,
|
206
|
+
26, 27, 28, 0, 0, 29, 0, 30,
|
207
|
+
31, 32, 33, 0, 0, 0, 34, 0,
|
208
|
+
35, 0, 0, 0, 36, 37, 38
|
209
|
+
]
|
210
|
+
|
211
|
+
class << self
|
212
|
+
attr_accessor :_css_lexer_to_state_actions
|
213
|
+
private :_css_lexer_to_state_actions, :_css_lexer_to_state_actions=
|
214
|
+
end
|
215
|
+
self._css_lexer_to_state_actions = [
|
216
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
217
|
+
0, 0, 0, 0, 0, 9, 0, 0,
|
218
|
+
0, 0, 0, 0, 0, 0, 0, 25,
|
219
|
+
0, 0, 0, 0, 9, 0, 0, 0
|
220
|
+
]
|
221
|
+
|
222
|
+
class << self
|
223
|
+
attr_accessor :_css_lexer_from_state_actions
|
224
|
+
private :_css_lexer_from_state_actions, :_css_lexer_from_state_actions=
|
225
|
+
end
|
226
|
+
self._css_lexer_from_state_actions = [
|
227
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
228
|
+
0, 0, 0, 0, 0, 10, 0, 0,
|
229
|
+
0, 0, 0, 0, 0, 0, 0, 10,
|
230
|
+
0, 0, 0, 0, 10, 0, 0, 0
|
231
|
+
]
|
232
|
+
|
233
|
+
class << self
|
234
|
+
attr_accessor :_css_lexer_eof_trans
|
235
|
+
private :_css_lexer_eof_trans, :_css_lexer_eof_trans=
|
236
|
+
end
|
237
|
+
self._css_lexer_eof_trans = [
|
238
|
+
0, 1, 0, 0, 0, 0, 0, 0,
|
239
|
+
0, 0, 0, 0, 0, 0, 31, 32,
|
240
|
+
33, 34, 35, 36, 37, 38, 39, 0,
|
241
|
+
48, 49, 50, 51, 0, 61, 62, 62
|
242
|
+
]
|
243
|
+
|
244
|
+
class << self
|
245
|
+
attr_accessor :css_lexer_start
|
246
|
+
end
|
247
|
+
self.css_lexer_start = 13;
|
248
|
+
class << self
|
249
|
+
attr_accessor :css_lexer_first_final
|
250
|
+
end
|
251
|
+
self.css_lexer_first_final = 13;
|
252
|
+
class << self
|
253
|
+
attr_accessor :css_lexer_error
|
254
|
+
end
|
255
|
+
self.css_lexer_error = 0;
|
256
|
+
|
257
|
+
class << self
|
258
|
+
attr_accessor :css_lexer_en_pseudo_args
|
259
|
+
end
|
260
|
+
self.css_lexer_en_pseudo_args = 23;
|
261
|
+
class << self
|
262
|
+
attr_accessor :css_lexer_en_predicate
|
263
|
+
end
|
264
|
+
self.css_lexer_en_predicate = 28;
|
265
|
+
class << self
|
266
|
+
attr_accessor :css_lexer_en_main
|
267
|
+
end
|
268
|
+
self.css_lexer_en_main = 13;
|
269
|
+
|
270
|
+
|
271
|
+
# line 23 "lib/oga/css/lexer.rl"
|
272
|
+
|
273
|
+
# % fix highlight
|
274
|
+
|
275
|
+
##
|
276
|
+
# @param [String] data The data to lex.
|
277
|
+
#
|
278
|
+
def initialize(data)
|
279
|
+
@data = data
|
280
|
+
end
|
281
|
+
|
282
|
+
##
|
283
|
+
# Gathers all the tokens for the input and returns them as an Array.
|
284
|
+
#
|
285
|
+
# @see [#advance]
|
286
|
+
# @return [Array]
|
287
|
+
#
|
288
|
+
def lex
|
289
|
+
tokens = []
|
290
|
+
|
291
|
+
advance do |type, value|
|
292
|
+
tokens << [type, value]
|
293
|
+
end
|
294
|
+
|
295
|
+
return tokens
|
296
|
+
end
|
297
|
+
|
298
|
+
##
|
299
|
+
# Advances through the input and generates the corresponding tokens. Each
|
300
|
+
# token is yielded to the supplied block.
|
301
|
+
#
|
302
|
+
# This method stores the supplied block in `@block` and resets it after
|
303
|
+
# the lexer loop has finished.
|
304
|
+
#
|
305
|
+
# @see [#add_token]
|
306
|
+
#
|
307
|
+
def advance(&block)
|
308
|
+
@block = block
|
309
|
+
|
310
|
+
data = @data # saves ivar lookups while lexing.
|
311
|
+
ts = nil
|
312
|
+
te = nil
|
313
|
+
stack = []
|
314
|
+
top = 0
|
315
|
+
cs = self.class.css_lexer_start
|
316
|
+
act = 0
|
317
|
+
eof = @data.bytesize
|
318
|
+
p = 0
|
319
|
+
pe = eof
|
320
|
+
|
321
|
+
_css_lexer_eof_trans = self.class.send(:_css_lexer_eof_trans)
|
322
|
+
_css_lexer_from_state_actions = self.class.send(:_css_lexer_from_state_actions)
|
323
|
+
_css_lexer_index_offsets = self.class.send(:_css_lexer_index_offsets)
|
324
|
+
_css_lexer_indicies = self.class.send(:_css_lexer_indicies)
|
325
|
+
_css_lexer_key_spans = self.class.send(:_css_lexer_key_spans)
|
326
|
+
_css_lexer_to_state_actions = self.class.send(:_css_lexer_to_state_actions)
|
327
|
+
_css_lexer_trans_actions = self.class.send(:_css_lexer_trans_actions)
|
328
|
+
_css_lexer_trans_keys = self.class.send(:_css_lexer_trans_keys)
|
329
|
+
_css_lexer_trans_targs = self.class.send(:_css_lexer_trans_targs)
|
330
|
+
|
331
|
+
|
332
|
+
# line 333 "lib/oga/css/lexer.rb"
|
333
|
+
begin
|
334
|
+
testEof = false
|
335
|
+
_slen, _trans, _keys, _inds, _acts, _nacts = nil
|
336
|
+
_goto_level = 0
|
337
|
+
_resume = 10
|
338
|
+
_eof_trans = 15
|
339
|
+
_again = 20
|
340
|
+
_test_eof = 30
|
341
|
+
_out = 40
|
342
|
+
while true
|
343
|
+
if _goto_level <= 0
|
344
|
+
if p == pe
|
345
|
+
_goto_level = _test_eof
|
346
|
+
next
|
347
|
+
end
|
348
|
+
if cs == 0
|
349
|
+
_goto_level = _out
|
350
|
+
next
|
351
|
+
end
|
352
|
+
end
|
353
|
+
if _goto_level <= _resume
|
354
|
+
case _css_lexer_from_state_actions[cs]
|
355
|
+
when 10 then
|
356
|
+
# line 1 "NONE"
|
357
|
+
begin
|
358
|
+
ts = p
|
359
|
+
end
|
360
|
+
# line 361 "lib/oga/css/lexer.rb"
|
361
|
+
end
|
362
|
+
_keys = cs << 1
|
363
|
+
_inds = _css_lexer_index_offsets[cs]
|
364
|
+
_slen = _css_lexer_key_spans[cs]
|
365
|
+
_trans = if ( _slen > 0 &&
|
366
|
+
_css_lexer_trans_keys[_keys] <= ( (data.getbyte(p) || 0)) &&
|
367
|
+
( (data.getbyte(p) || 0)) <= _css_lexer_trans_keys[_keys + 1]
|
368
|
+
) then
|
369
|
+
_css_lexer_indicies[ _inds + ( (data.getbyte(p) || 0)) - _css_lexer_trans_keys[_keys] ]
|
370
|
+
else
|
371
|
+
_css_lexer_indicies[ _inds + _slen ]
|
372
|
+
end
|
373
|
+
end
|
374
|
+
if _goto_level <= _eof_trans
|
375
|
+
cs = _css_lexer_trans_targs[_trans]
|
376
|
+
if _css_lexer_trans_actions[_trans] != 0
|
377
|
+
case _css_lexer_trans_actions[_trans]
|
378
|
+
when 29 then
|
379
|
+
# line 251 "lib/oga/css/lexer.rl"
|
380
|
+
begin
|
381
|
+
te = p+1
|
382
|
+
begin add_token(:T_NTH) end
|
383
|
+
end
|
384
|
+
when 3 then
|
385
|
+
# line 253 "lib/oga/css/lexer.rl"
|
386
|
+
begin
|
387
|
+
te = p+1
|
388
|
+
begin add_token(:T_ODD) end
|
389
|
+
end
|
390
|
+
when 2 then
|
391
|
+
# line 254 "lib/oga/css/lexer.rl"
|
392
|
+
begin
|
393
|
+
te = p+1
|
394
|
+
begin add_token(:T_EVEN) end
|
395
|
+
end
|
396
|
+
when 26 then
|
397
|
+
# line 235 "lib/oga/css/lexer.rl"
|
398
|
+
begin
|
399
|
+
te = p+1
|
400
|
+
begin
|
401
|
+
add_token(:T_RPAREN)
|
402
|
+
|
403
|
+
cs = 13;
|
404
|
+
end
|
405
|
+
end
|
406
|
+
when 27 then
|
407
|
+
# line 162 "lib/oga/css/lexer.rl"
|
408
|
+
begin
|
409
|
+
te = p+1
|
410
|
+
begin
|
411
|
+
emit(:T_IDENT, ts, te)
|
412
|
+
end
|
413
|
+
end
|
414
|
+
when 30 then
|
415
|
+
# line 246 "lib/oga/css/lexer.rl"
|
416
|
+
begin
|
417
|
+
te = p
|
418
|
+
p = p - 1; end
|
419
|
+
when 32 then
|
420
|
+
# line 252 "lib/oga/css/lexer.rl"
|
421
|
+
begin
|
422
|
+
te = p
|
423
|
+
p = p - 1; begin add_token(:T_MINUS) end
|
424
|
+
end
|
425
|
+
when 31 then
|
426
|
+
# line 195 "lib/oga/css/lexer.rl"
|
427
|
+
begin
|
428
|
+
te = p
|
429
|
+
p = p - 1; begin
|
430
|
+
value = slice_input(ts, te).to_i
|
431
|
+
|
432
|
+
add_token(:T_INT, value)
|
433
|
+
end
|
434
|
+
end
|
435
|
+
when 33 then
|
436
|
+
# line 162 "lib/oga/css/lexer.rl"
|
437
|
+
begin
|
438
|
+
te = p
|
439
|
+
p = p - 1; begin
|
440
|
+
emit(:T_IDENT, ts, te)
|
441
|
+
end
|
442
|
+
end
|
443
|
+
when 1 then
|
444
|
+
# line 1 "NONE"
|
445
|
+
begin
|
446
|
+
case act
|
447
|
+
when 0 then
|
448
|
+
begin begin
|
449
|
+
cs = 0
|
450
|
+
_goto_level = _again
|
451
|
+
next
|
452
|
+
end
|
453
|
+
end
|
454
|
+
when 3 then
|
455
|
+
begin begin p = ((te))-1; end
|
456
|
+
add_token(:T_MINUS) end
|
457
|
+
end
|
458
|
+
end
|
459
|
+
when 34 then
|
460
|
+
# line 292 "lib/oga/css/lexer.rl"
|
461
|
+
begin
|
462
|
+
te = p+1
|
463
|
+
begin add_token(:T_EQ) end
|
464
|
+
end
|
465
|
+
when 8 then
|
466
|
+
# line 293 "lib/oga/css/lexer.rl"
|
467
|
+
begin
|
468
|
+
te = p+1
|
469
|
+
begin add_token(:T_SPACE_IN) end
|
470
|
+
end
|
471
|
+
when 6 then
|
472
|
+
# line 294 "lib/oga/css/lexer.rl"
|
473
|
+
begin
|
474
|
+
te = p+1
|
475
|
+
begin add_token(:T_STARTS_WITH) end
|
476
|
+
end
|
477
|
+
when 5 then
|
478
|
+
# line 295 "lib/oga/css/lexer.rl"
|
479
|
+
begin
|
480
|
+
te = p+1
|
481
|
+
begin add_token(:T_ENDS_WITH) end
|
482
|
+
end
|
483
|
+
when 38 then
|
484
|
+
# line 296 "lib/oga/css/lexer.rl"
|
485
|
+
begin
|
486
|
+
te = p+1
|
487
|
+
begin add_token(:T_IN) end
|
488
|
+
end
|
489
|
+
when 7 then
|
490
|
+
# line 297 "lib/oga/css/lexer.rl"
|
491
|
+
begin
|
492
|
+
te = p+1
|
493
|
+
begin add_token(:T_HYPHEN_IN) end
|
494
|
+
end
|
495
|
+
when 35 then
|
496
|
+
# line 274 "lib/oga/css/lexer.rl"
|
497
|
+
begin
|
498
|
+
te = p+1
|
499
|
+
begin
|
500
|
+
add_token(:T_RBRACK)
|
501
|
+
|
502
|
+
cs = 13;
|
503
|
+
end
|
504
|
+
end
|
505
|
+
when 4 then
|
506
|
+
# line 214 "lib/oga/css/lexer.rl"
|
507
|
+
begin
|
508
|
+
te = p+1
|
509
|
+
begin
|
510
|
+
emit(:T_STRING, ts + 1, te - 1)
|
511
|
+
end
|
512
|
+
end
|
513
|
+
when 36 then
|
514
|
+
# line 282 "lib/oga/css/lexer.rl"
|
515
|
+
begin
|
516
|
+
te = p
|
517
|
+
p = p - 1; end
|
518
|
+
when 37 then
|
519
|
+
# line 162 "lib/oga/css/lexer.rl"
|
520
|
+
begin
|
521
|
+
te = p
|
522
|
+
p = p - 1; begin
|
523
|
+
emit(:T_IDENT, ts, te)
|
524
|
+
end
|
525
|
+
end
|
526
|
+
when 14 then
|
527
|
+
# line 268 "lib/oga/css/lexer.rl"
|
528
|
+
begin
|
529
|
+
te = p+1
|
530
|
+
begin
|
531
|
+
add_token(:T_LBRACK)
|
532
|
+
|
533
|
+
cs = 28;
|
534
|
+
end
|
535
|
+
end
|
536
|
+
when 15 then
|
537
|
+
# line 147 "lib/oga/css/lexer.rl"
|
538
|
+
begin
|
539
|
+
te = p+1
|
540
|
+
begin
|
541
|
+
add_token(:T_PIPE)
|
542
|
+
end
|
543
|
+
end
|
544
|
+
when 12 then
|
545
|
+
# line 229 "lib/oga/css/lexer.rl"
|
546
|
+
begin
|
547
|
+
te = p+1
|
548
|
+
begin
|
549
|
+
add_token(:T_LPAREN)
|
550
|
+
|
551
|
+
cs = 23;
|
552
|
+
end
|
553
|
+
end
|
554
|
+
when 13 then
|
555
|
+
# line 162 "lib/oga/css/lexer.rl"
|
556
|
+
begin
|
557
|
+
te = p+1
|
558
|
+
begin
|
559
|
+
emit(:T_IDENT, ts, te)
|
560
|
+
end
|
561
|
+
end
|
562
|
+
when 11 then
|
563
|
+
# line 317 "lib/oga/css/lexer.rl"
|
564
|
+
begin
|
565
|
+
te = p+1
|
566
|
+
end
|
567
|
+
when 22 then
|
568
|
+
# line 306 "lib/oga/css/lexer.rl"
|
569
|
+
begin
|
570
|
+
te = p
|
571
|
+
p = p - 1; begin add_token(:T_GREATER) end
|
572
|
+
end
|
573
|
+
when 19 then
|
574
|
+
# line 307 "lib/oga/css/lexer.rl"
|
575
|
+
begin
|
576
|
+
te = p
|
577
|
+
p = p - 1; begin add_token(:T_PLUS) end
|
578
|
+
end
|
579
|
+
when 24 then
|
580
|
+
# line 308 "lib/oga/css/lexer.rl"
|
581
|
+
begin
|
582
|
+
te = p
|
583
|
+
p = p - 1; begin add_token(:T_TILDE) end
|
584
|
+
end
|
585
|
+
when 17 then
|
586
|
+
# line 151 "lib/oga/css/lexer.rl"
|
587
|
+
begin
|
588
|
+
te = p
|
589
|
+
p = p - 1; begin
|
590
|
+
add_token(:T_COMMA)
|
591
|
+
end
|
592
|
+
end
|
593
|
+
when 16 then
|
594
|
+
# line 137 "lib/oga/css/lexer.rl"
|
595
|
+
begin
|
596
|
+
te = p
|
597
|
+
p = p - 1; begin
|
598
|
+
add_token(:T_SPACE)
|
599
|
+
end
|
600
|
+
end
|
601
|
+
when 23 then
|
602
|
+
# line 162 "lib/oga/css/lexer.rl"
|
603
|
+
begin
|
604
|
+
te = p
|
605
|
+
p = p - 1; begin
|
606
|
+
emit(:T_IDENT, ts, te)
|
607
|
+
end
|
608
|
+
end
|
609
|
+
when 18 then
|
610
|
+
# line 141 "lib/oga/css/lexer.rl"
|
611
|
+
begin
|
612
|
+
add_token(:T_HASH) end
|
613
|
+
# line 304 "lib/oga/css/lexer.rl"
|
614
|
+
begin
|
615
|
+
te = p
|
616
|
+
p = p - 1; end
|
617
|
+
when 20 then
|
618
|
+
# line 142 "lib/oga/css/lexer.rl"
|
619
|
+
begin
|
620
|
+
add_token(:T_DOT) end
|
621
|
+
# line 304 "lib/oga/css/lexer.rl"
|
622
|
+
begin
|
623
|
+
te = p
|
624
|
+
p = p - 1; end
|
625
|
+
when 21 then
|
626
|
+
# line 143 "lib/oga/css/lexer.rl"
|
627
|
+
begin
|
628
|
+
add_token(:T_COLON) end
|
629
|
+
# line 304 "lib/oga/css/lexer.rl"
|
630
|
+
begin
|
631
|
+
te = p
|
632
|
+
p = p - 1; end
|
633
|
+
when 28 then
|
634
|
+
# line 1 "NONE"
|
635
|
+
begin
|
636
|
+
te = p+1
|
637
|
+
end
|
638
|
+
# line 252 "lib/oga/css/lexer.rl"
|
639
|
+
begin
|
640
|
+
act = 3; end
|
641
|
+
# line 642 "lib/oga/css/lexer.rb"
|
642
|
+
end
|
643
|
+
end
|
644
|
+
end
|
645
|
+
if _goto_level <= _again
|
646
|
+
case _css_lexer_to_state_actions[cs]
|
647
|
+
when 9 then
|
648
|
+
# line 1 "NONE"
|
649
|
+
begin
|
650
|
+
ts = nil; end
|
651
|
+
when 25 then
|
652
|
+
# line 1 "NONE"
|
653
|
+
begin
|
654
|
+
ts = nil; end
|
655
|
+
# line 1 "NONE"
|
656
|
+
begin
|
657
|
+
act = 0
|
658
|
+
end
|
659
|
+
# line 660 "lib/oga/css/lexer.rb"
|
660
|
+
end
|
661
|
+
|
662
|
+
if cs == 0
|
663
|
+
_goto_level = _out
|
664
|
+
next
|
665
|
+
end
|
666
|
+
p += 1
|
667
|
+
if p != pe
|
668
|
+
_goto_level = _resume
|
669
|
+
next
|
670
|
+
end
|
671
|
+
end
|
672
|
+
if _goto_level <= _test_eof
|
673
|
+
if p == eof
|
674
|
+
if _css_lexer_eof_trans[cs] > 0
|
675
|
+
_trans = _css_lexer_eof_trans[cs] - 1;
|
676
|
+
_goto_level = _eof_trans
|
677
|
+
next;
|
678
|
+
end
|
679
|
+
end
|
680
|
+
|
681
|
+
end
|
682
|
+
if _goto_level <= _out
|
683
|
+
break
|
684
|
+
end
|
685
|
+
end
|
686
|
+
end
|
687
|
+
|
688
|
+
# line 83 "lib/oga/css/lexer.rl"
|
689
|
+
|
690
|
+
# % fix highlight
|
691
|
+
ensure
|
692
|
+
@block = nil
|
693
|
+
end
|
694
|
+
|
695
|
+
private
|
696
|
+
|
697
|
+
##
|
698
|
+
# Emits a token of which the value is based on the supplied start/stop
|
699
|
+
# position.
|
700
|
+
#
|
701
|
+
# @param [Symbol] type The token type.
|
702
|
+
# @param [Fixnum] start
|
703
|
+
# @param [Fixnum] stop
|
704
|
+
#
|
705
|
+
# @see [#text]
|
706
|
+
# @see [#add_token]
|
707
|
+
#
|
708
|
+
def emit(type, start, stop)
|
709
|
+
value = slice_input(start, stop)
|
710
|
+
|
711
|
+
add_token(type, value)
|
712
|
+
end
|
713
|
+
|
714
|
+
##
|
715
|
+
# Returns the text between the specified start and stop position.
|
716
|
+
#
|
717
|
+
# @param [Fixnum] start
|
718
|
+
# @param [Fixnum] stop
|
719
|
+
# @return [String]
|
720
|
+
#
|
721
|
+
def slice_input(start, stop)
|
722
|
+
return @data.byteslice(start, stop - start)
|
723
|
+
end
|
724
|
+
|
725
|
+
##
|
726
|
+
# Yields a new token to the supplied block.
|
727
|
+
#
|
728
|
+
# @param [Symbol] type The token type.
|
729
|
+
# @param [String] value The token value.
|
730
|
+
#
|
731
|
+
# @yieldparam [Symbol] type
|
732
|
+
# @yieldparam [String|NilClass] value
|
733
|
+
#
|
734
|
+
def add_token(type, value = nil)
|
735
|
+
@block.call(type, value)
|
736
|
+
end
|
737
|
+
|
738
|
+
|
739
|
+
# line 319 "lib/oga/css/lexer.rl"
|
740
|
+
|
741
|
+
end # Lexer
|
742
|
+
end # CSS
|
743
|
+
end # Oga
|