nokogiri 1.11.0.rc1-x86-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1614 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +200 -0
  5. data/bin/nokogiri +118 -0
  6. data/dependencies.yml +74 -0
  7. data/ext/nokogiri/depend +358 -0
  8. data/ext/nokogiri/extconf.rb +695 -0
  9. data/ext/nokogiri/html_document.c +170 -0
  10. data/ext/nokogiri/html_document.h +10 -0
  11. data/ext/nokogiri/html_element_description.c +279 -0
  12. data/ext/nokogiri/html_element_description.h +10 -0
  13. data/ext/nokogiri/html_entity_lookup.c +32 -0
  14. data/ext/nokogiri/html_entity_lookup.h +8 -0
  15. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  16. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  17. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  18. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  19. data/ext/nokogiri/nokogiri.c +147 -0
  20. data/ext/nokogiri/nokogiri.h +122 -0
  21. data/ext/nokogiri/xml_attr.c +103 -0
  22. data/ext/nokogiri/xml_attr.h +9 -0
  23. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  24. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  25. data/ext/nokogiri/xml_cdata.c +62 -0
  26. data/ext/nokogiri/xml_cdata.h +9 -0
  27. data/ext/nokogiri/xml_comment.c +69 -0
  28. data/ext/nokogiri/xml_comment.h +9 -0
  29. data/ext/nokogiri/xml_document.c +617 -0
  30. data/ext/nokogiri/xml_document.h +23 -0
  31. data/ext/nokogiri/xml_document_fragment.c +48 -0
  32. data/ext/nokogiri/xml_document_fragment.h +10 -0
  33. data/ext/nokogiri/xml_dtd.c +202 -0
  34. data/ext/nokogiri/xml_dtd.h +10 -0
  35. data/ext/nokogiri/xml_element_content.c +123 -0
  36. data/ext/nokogiri/xml_element_content.h +10 -0
  37. data/ext/nokogiri/xml_element_decl.c +69 -0
  38. data/ext/nokogiri/xml_element_decl.h +9 -0
  39. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  40. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  41. data/ext/nokogiri/xml_entity_decl.c +110 -0
  42. data/ext/nokogiri/xml_entity_decl.h +10 -0
  43. data/ext/nokogiri/xml_entity_reference.c +52 -0
  44. data/ext/nokogiri/xml_entity_reference.h +9 -0
  45. data/ext/nokogiri/xml_io.c +61 -0
  46. data/ext/nokogiri/xml_io.h +11 -0
  47. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  48. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  49. data/ext/nokogiri/xml_namespace.c +111 -0
  50. data/ext/nokogiri/xml_namespace.h +14 -0
  51. data/ext/nokogiri/xml_node.c +1773 -0
  52. data/ext/nokogiri/xml_node.h +13 -0
  53. data/ext/nokogiri/xml_node_set.c +486 -0
  54. data/ext/nokogiri/xml_node_set.h +12 -0
  55. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  56. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  57. data/ext/nokogiri/xml_reader.c +668 -0
  58. data/ext/nokogiri/xml_reader.h +10 -0
  59. data/ext/nokogiri/xml_relax_ng.c +161 -0
  60. data/ext/nokogiri/xml_relax_ng.h +9 -0
  61. data/ext/nokogiri/xml_sax_parser.c +310 -0
  62. data/ext/nokogiri/xml_sax_parser.h +39 -0
  63. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  64. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  65. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  66. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  67. data/ext/nokogiri/xml_schema.c +205 -0
  68. data/ext/nokogiri/xml_schema.h +9 -0
  69. data/ext/nokogiri/xml_syntax_error.c +64 -0
  70. data/ext/nokogiri/xml_syntax_error.h +13 -0
  71. data/ext/nokogiri/xml_text.c +52 -0
  72. data/ext/nokogiri/xml_text.h +9 -0
  73. data/ext/nokogiri/xml_xpath_context.c +298 -0
  74. data/ext/nokogiri/xml_xpath_context.h +10 -0
  75. data/ext/nokogiri/xslt_stylesheet.c +266 -0
  76. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  77. data/lib/nokogiri.rb +127 -0
  78. data/lib/nokogiri/2.4/nokogiri.so +0 -0
  79. data/lib/nokogiri/2.5/nokogiri.so +0 -0
  80. data/lib/nokogiri/2.6/nokogiri.so +0 -0
  81. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  82. data/lib/nokogiri/css.rb +28 -0
  83. data/lib/nokogiri/css/node.rb +53 -0
  84. data/lib/nokogiri/css/parser.rb +751 -0
  85. data/lib/nokogiri/css/parser.y +272 -0
  86. data/lib/nokogiri/css/parser_extras.rb +92 -0
  87. data/lib/nokogiri/css/syntax_error.rb +8 -0
  88. data/lib/nokogiri/css/tokenizer.rb +154 -0
  89. data/lib/nokogiri/css/tokenizer.rex +55 -0
  90. data/lib/nokogiri/css/xpath_visitor.rb +232 -0
  91. data/lib/nokogiri/decorators/slop.rb +43 -0
  92. data/lib/nokogiri/html.rb +38 -0
  93. data/lib/nokogiri/html/builder.rb +36 -0
  94. data/lib/nokogiri/html/document.rb +336 -0
  95. data/lib/nokogiri/html/document_fragment.rb +50 -0
  96. data/lib/nokogiri/html/element_description.rb +24 -0
  97. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  98. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  99. data/lib/nokogiri/html/sax/parser.rb +63 -0
  100. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  101. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  102. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  103. data/lib/nokogiri/syntax_error.rb +5 -0
  104. data/lib/nokogiri/version.rb +149 -0
  105. data/lib/nokogiri/xml.rb +76 -0
  106. data/lib/nokogiri/xml/attr.rb +15 -0
  107. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  108. data/lib/nokogiri/xml/builder.rb +447 -0
  109. data/lib/nokogiri/xml/cdata.rb +12 -0
  110. data/lib/nokogiri/xml/character_data.rb +8 -0
  111. data/lib/nokogiri/xml/document.rb +280 -0
  112. data/lib/nokogiri/xml/document_fragment.rb +161 -0
  113. data/lib/nokogiri/xml/dtd.rb +33 -0
  114. data/lib/nokogiri/xml/element_content.rb +37 -0
  115. data/lib/nokogiri/xml/element_decl.rb +14 -0
  116. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  117. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  118. data/lib/nokogiri/xml/namespace.rb +14 -0
  119. data/lib/nokogiri/xml/node.rb +916 -0
  120. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  121. data/lib/nokogiri/xml/node_set.rb +372 -0
  122. data/lib/nokogiri/xml/notation.rb +7 -0
  123. data/lib/nokogiri/xml/parse_options.rb +121 -0
  124. data/lib/nokogiri/xml/pp.rb +3 -0
  125. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  126. data/lib/nokogiri/xml/pp/node.rb +57 -0
  127. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  128. data/lib/nokogiri/xml/reader.rb +116 -0
  129. data/lib/nokogiri/xml/relax_ng.rb +33 -0
  130. data/lib/nokogiri/xml/sax.rb +5 -0
  131. data/lib/nokogiri/xml/sax/document.rb +172 -0
  132. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  133. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  134. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  135. data/lib/nokogiri/xml/schema.rb +64 -0
  136. data/lib/nokogiri/xml/searchable.rb +231 -0
  137. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  138. data/lib/nokogiri/xml/text.rb +10 -0
  139. data/lib/nokogiri/xml/xpath.rb +11 -0
  140. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  141. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  142. data/lib/nokogiri/xslt.rb +57 -0
  143. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  144. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  145. metadata +482 -0
@@ -0,0 +1,272 @@
1
+ class Nokogiri::CSS::Parser
2
+
3
+ token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
6
+
7
+ rule
8
+ selector
9
+ : selector COMMA simple_selector_1toN {
10
+ result = [val.first, val.last].flatten
11
+ }
12
+ | prefixless_combinator_selector { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val.last].flatten }
14
+ ;
15
+ combinator
16
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
+ | GREATER { result = :CHILD_SELECTOR }
18
+ | TILDE { result = :FOLLOWING_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
21
+ ;
22
+ simple_selector
23
+ : element_name hcap_0toN {
24
+ result = if val[1].nil?
25
+ val.first
26
+ else
27
+ Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
28
+ end
29
+ }
30
+ | function
31
+ | function pseudo {
32
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
33
+ }
34
+ | function attrib {
35
+ result = Node.new(:CONDITIONAL_SELECTOR, val)
36
+ }
37
+ | hcap_1toN {
38
+ result = Node.new(:CONDITIONAL_SELECTOR,
39
+ [Node.new(:ELEMENT_NAME, ['*']), val.first]
40
+ )
41
+ }
42
+ ;
43
+ prefixless_combinator_selector
44
+ : combinator simple_selector_1toN {
45
+ result = Node.new(val.first, [nil, val.last])
46
+ }
47
+ ;
48
+ simple_selector_1toN
49
+ : simple_selector combinator simple_selector_1toN {
50
+ result = Node.new(val[1], [val.first, val.last])
51
+ }
52
+ | simple_selector S simple_selector_1toN {
53
+ result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
+ }
55
+ | simple_selector
56
+ ;
57
+ class
58
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
59
+ ;
60
+ element_name
61
+ : namespaced_ident
62
+ | '*' { result = Node.new(:ELEMENT_NAME, val) }
63
+ ;
64
+ namespaced_ident
65
+ : namespace '|' IDENT {
66
+ result = Node.new(:ELEMENT_NAME,
67
+ [[val.first, val.last].compact.join(':')]
68
+ )
69
+ }
70
+ | IDENT {
71
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
72
+ result = Node.new(:ELEMENT_NAME, [name])
73
+ }
74
+ ;
75
+ namespace
76
+ : IDENT { result = val[0] }
77
+ |
78
+ ;
79
+ attrib
80
+ : LSQUARE attrib_name attrib_val_0or1 RSQUARE {
81
+ result = Node.new(:ATTRIBUTE_CONDITION,
82
+ [val[1]] + (val[2] || [])
83
+ )
84
+ }
85
+ | LSQUARE function attrib_val_0or1 RSQUARE {
86
+ result = Node.new(:ATTRIBUTE_CONDITION,
87
+ [val[1]] + (val[2] || [])
88
+ )
89
+ }
90
+ | LSQUARE NUMBER RSQUARE {
91
+ # Non standard, but hpricot supports it.
92
+ result = Node.new(:PSEUDO_CLASS,
93
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
94
+ )
95
+ }
96
+ ;
97
+ attrib_name
98
+ : namespace '|' IDENT {
99
+ result = Node.new(:ELEMENT_NAME,
100
+ [[val.first, val.last].compact.join(':')]
101
+ )
102
+ }
103
+ | IDENT {
104
+ # Default namespace is not applied to attributes.
105
+ # So we don't add prefix "xmlns:" as in namespaced_ident.
106
+ result = Node.new(:ELEMENT_NAME, [val.first])
107
+ }
108
+ ;
109
+ function
110
+ : FUNCTION RPAREN {
111
+ result = Node.new(:FUNCTION, [val.first.strip])
112
+ }
113
+ | FUNCTION expr RPAREN {
114
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
115
+ }
116
+ | FUNCTION nth RPAREN {
117
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
118
+ }
119
+ | NOT expr RPAREN {
120
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
121
+ }
122
+ | HAS selector RPAREN {
123
+ result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
124
+ }
125
+ ;
126
+ expr
127
+ : NUMBER COMMA expr { result = [val.first, val.last] }
128
+ | STRING COMMA expr { result = [val.first, val.last] }
129
+ | IDENT COMMA expr { result = [val.first, val.last] }
130
+ | NUMBER
131
+ | STRING
132
+ | IDENT # even, odd
133
+ {
134
+ case val[0]
135
+ when 'even'
136
+ result = Node.new(:NTH, ['2','n','+','0'])
137
+ when 'odd'
138
+ result = Node.new(:NTH, ['2','n','+','1'])
139
+ when 'n'
140
+ result = Node.new(:NTH, ['1','n','+','0'])
141
+ else
142
+ # This is not CSS standard. It allows us to support this:
143
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
146
+ result = val
147
+ end
148
+ }
149
+ ;
150
+ nth
151
+ : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
152
+ {
153
+ if val[1] == 'n'
154
+ result = Node.new(:NTH, val)
155
+ else
156
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
157
+ end
158
+ }
159
+ | IDENT PLUS NUMBER { # n+3, -n+3
160
+ if val[0] == 'n'
161
+ val.unshift("1")
162
+ result = Node.new(:NTH, val)
163
+ elsif val[0] == '-n'
164
+ val[0] = 'n'
165
+ val.unshift("-1")
166
+ result = Node.new(:NTH, val)
167
+ else
168
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
169
+ end
170
+ }
171
+ | NUMBER IDENT { # 5n, -5n, 10n-1
172
+ n = val[1]
173
+ if n[0, 2] == 'n-'
174
+ val[1] = 'n'
175
+ val << "-"
176
+ # b is contained in n as n is the string "n-b"
177
+ val << n[2, n.size]
178
+ result = Node.new(:NTH, val)
179
+ elsif n == 'n'
180
+ val << "+"
181
+ val << "0"
182
+ result = Node.new(:NTH, val)
183
+ else
184
+ raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
185
+ end
186
+ }
187
+ ;
188
+ pseudo
189
+ : ':' function {
190
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
191
+ }
192
+ | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
193
+ ;
194
+ hcap_0toN
195
+ : hcap_1toN
196
+ |
197
+ ;
198
+ hcap_1toN
199
+ : attribute_id hcap_1toN {
200
+ result = Node.new(:COMBINATOR, val)
201
+ }
202
+ | class hcap_1toN {
203
+ result = Node.new(:COMBINATOR, val)
204
+ }
205
+ | attrib hcap_1toN {
206
+ result = Node.new(:COMBINATOR, val)
207
+ }
208
+ | pseudo hcap_1toN {
209
+ result = Node.new(:COMBINATOR, val)
210
+ }
211
+ | negation hcap_1toN {
212
+ result = Node.new(:COMBINATOR, val)
213
+ }
214
+ | attribute_id
215
+ | class
216
+ | attrib
217
+ | pseudo
218
+ | negation
219
+ ;
220
+ attribute_id
221
+ : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
222
+ ;
223
+ attrib_val_0or1
224
+ : eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
225
+ | eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
226
+ | eql_incl_dash NUMBER { result = [val.first, val[1]] }
227
+ |
228
+ ;
229
+ eql_incl_dash
230
+ : EQUAL { result = :equal }
231
+ | PREFIXMATCH { result = :prefix_match }
232
+ | SUFFIXMATCH { result = :suffix_match }
233
+ | SUBSTRINGMATCH { result = :substring_match }
234
+ | NOT_EQUAL { result = :not_equal }
235
+ | INCLUDES { result = :includes }
236
+ | DASHMATCH { result = :dash_match }
237
+ ;
238
+ negation
239
+ : NOT negation_arg RPAREN {
240
+ result = Node.new(:NOT, [val[1]])
241
+ }
242
+ ;
243
+ negation_arg
244
+ : element_name
245
+ | element_name hcap_1toN
246
+ | hcap_1toN
247
+ ;
248
+ optional_S
249
+ : S
250
+ |
251
+ ;
252
+ end
253
+
254
+ ---- header
255
+
256
+ require 'nokogiri/css/parser_extras'
257
+
258
+ ---- inner
259
+
260
+ def unescape_css_identifier(identifier)
261
+ identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
262
+ end
263
+
264
+ def unescape_css_string(str)
265
+ str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
266
+ if $1=="\n"
267
+ ''
268
+ else
269
+ $1 || [$2.hex].pack('U')
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+ require 'thread'
3
+
4
+ module Nokogiri
5
+ module CSS
6
+ class Parser < Racc::Parser
7
+ @cache_on = true
8
+ @cache = {}
9
+ @mutex = Mutex.new
10
+
11
+ class << self
12
+ # Turn on CSS parse caching
13
+ attr_accessor :cache_on
14
+ alias :cache_on? :cache_on
15
+ alias :set_cache :cache_on=
16
+
17
+ # Get the css selector in +string+ from the cache
18
+ def [] string
19
+ return unless @cache_on
20
+ @mutex.synchronize { @cache[string] }
21
+ end
22
+
23
+ # Set the css selector in +string+ in the cache to +value+
24
+ def []= string, value
25
+ return value unless @cache_on
26
+ @mutex.synchronize { @cache[string] = value }
27
+ end
28
+
29
+ # Clear the cache
30
+ def clear_cache
31
+ @mutex.synchronize { @cache = {} }
32
+ end
33
+
34
+ # Execute +block+ without cache
35
+ def without_cache &block
36
+ tmp = @cache_on
37
+ @cache_on = false
38
+ block.call
39
+ @cache_on = tmp
40
+ end
41
+
42
+ ###
43
+ # Parse this CSS selector in +selector+. Returns an AST.
44
+ def parse selector
45
+ @warned ||= false
46
+ unless @warned
47
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
48
+ @warned = true
49
+ end
50
+ new.parse selector
51
+ end
52
+ end
53
+
54
+ # Create a new CSS parser with respect to +namespaces+
55
+ def initialize namespaces = {}
56
+ @tokenizer = Tokenizer.new
57
+ @namespaces = namespaces
58
+ super()
59
+ end
60
+
61
+ def parse string
62
+ @tokenizer.scan_setup string
63
+ do_parse
64
+ end
65
+
66
+ def next_token
67
+ @tokenizer.next_token
68
+ end
69
+
70
+ # Get the xpath for +string+ using +options+
71
+ def xpath_for string, options={}
72
+ key = "#{string}#{options[:ns]}#{options[:prefix]}"
73
+ v = self.class[key]
74
+ return v if v
75
+
76
+ args = [
77
+ options[:prefix] || '//',
78
+ options[:visitor] || XPathVisitor.new
79
+ ]
80
+ self.class[key] = parse(string).map { |ast|
81
+ ast.to_xpath(*args)
82
+ }
83
+ end
84
+
85
+ # On CSS parser error, raise an exception
86
+ def on_error error_token_id, error_value, value_stack
87
+ after = value_stack.compact.last
88
+ raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+ require 'nokogiri/syntax_error'
3
+ module Nokogiri
4
+ module CSS
5
+ class SyntaxError < ::Nokogiri::SyntaxError
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+ #--
3
+ # DO NOT MODIFY!!!!
4
+ # This file is automatically generated by rex 1.0.7
5
+ # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
6
+ #++
7
+
8
+ module Nokogiri
9
+ module CSS
10
+ class Tokenizer # :nodoc:
11
+ require 'strscan'
12
+
13
+ class ScanError < StandardError ; end
14
+
15
+ attr_reader :lineno
16
+ attr_reader :filename
17
+ attr_accessor :state
18
+
19
+ def scan_setup(str)
20
+ @ss = StringScanner.new(str)
21
+ @lineno = 1
22
+ @state = nil
23
+ end
24
+
25
+ def action
26
+ yield
27
+ end
28
+
29
+ def scan_str(str)
30
+ scan_setup(str)
31
+ do_parse
32
+ end
33
+ alias :scan :scan_str
34
+
35
+ def load_file( filename )
36
+ @filename = filename
37
+ File.open(filename, "r") do |f|
38
+ scan_setup(f.read)
39
+ end
40
+ end
41
+
42
+ def scan_file( filename )
43
+ load_file(filename)
44
+ do_parse
45
+ end
46
+
47
+
48
+ def next_token
49
+ return if @ss.eos?
50
+
51
+ # skips empty actions
52
+ until token = _next_token or @ss.eos?; end
53
+ token
54
+ end
55
+
56
+ def _next_token
57
+ text = @ss.peek(1)
58
+ @lineno += 1 if text == "\n"
59
+ token = case @state
60
+ when nil
61
+ case
62
+ when (text = @ss.scan(/has\([\s]*/))
63
+ action { [:HAS, text] }
64
+
65
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ action { [:FUNCTION, text] }
67
+
68
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ action { [:IDENT, text] }
70
+
71
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ action { [:HASH, text] }
73
+
74
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
75
+ action { [:INCLUDES, text] }
76
+
77
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
78
+ action { [:DASHMATCH, text] }
79
+
80
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
81
+ action { [:PREFIXMATCH, text] }
82
+
83
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
84
+ action { [:SUFFIXMATCH, text] }
85
+
86
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
87
+ action { [:SUBSTRINGMATCH, text] }
88
+
89
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
90
+ action { [:NOT_EQUAL, text] }
91
+
92
+ when (text = @ss.scan(/[\s]*=[\s]*/))
93
+ action { [:EQUAL, text] }
94
+
95
+ when (text = @ss.scan(/[\s]*\)/))
96
+ action { [:RPAREN, text] }
97
+
98
+ when (text = @ss.scan(/\[[\s]*/))
99
+ action { [:LSQUARE, text] }
100
+
101
+ when (text = @ss.scan(/[\s]*\]/))
102
+ action { [:RSQUARE, text] }
103
+
104
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
105
+ action { [:PLUS, text] }
106
+
107
+ when (text = @ss.scan(/[\s]*>[\s]*/))
108
+ action { [:GREATER, text] }
109
+
110
+ when (text = @ss.scan(/[\s]*,[\s]*/))
111
+ action { [:COMMA, text] }
112
+
113
+ when (text = @ss.scan(/[\s]*~[\s]*/))
114
+ action { [:TILDE, text] }
115
+
116
+ when (text = @ss.scan(/\:not\([\s]*/))
117
+ action { [:NOT, text] }
118
+
119
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
120
+ action { [:NUMBER, text] }
121
+
122
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
123
+ action { [:DOUBLESLASH, text] }
124
+
125
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
126
+ action { [:SLASH, text] }
127
+
128
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
129
+ action {[:UNICODE_RANGE, text] }
130
+
131
+ when (text = @ss.scan(/[\s]+/))
132
+ action { [:S, text] }
133
+
134
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ action { [:STRING, text] }
136
+
137
+ when (text = @ss.scan(/./))
138
+ action { [text, text] }
139
+
140
+
141
+ else
142
+ text = @ss.string[@ss.pos .. -1]
143
+ raise ScanError, "can not match: '" + text + "'"
144
+ end # if
145
+
146
+ else
147
+ raise ScanError, "undefined state: '" + state.to_s + "'"
148
+ end # case state
149
+ token
150
+ end # def _next_token
151
+
152
+ end # class
153
+ end
154
+ end