coderay 1.0.0.598.pre → 1.0.0.738.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/bin/coderay +1 -1
  2. data/lib/coderay.rb +38 -32
  3. data/lib/coderay/duo.rb +1 -54
  4. data/lib/coderay/encoder.rb +31 -33
  5. data/lib/coderay/encoders/_map.rb +4 -2
  6. data/lib/coderay/encoders/comment_filter.rb +0 -61
  7. data/lib/coderay/encoders/count.rb +2 -23
  8. data/lib/coderay/encoders/debug.rb +11 -60
  9. data/lib/coderay/encoders/filter.rb +0 -46
  10. data/lib/coderay/encoders/html.rb +83 -91
  11. data/lib/coderay/encoders/html/css.rb +1 -6
  12. data/lib/coderay/encoders/html/numbering.rb +18 -21
  13. data/lib/coderay/encoders/html/output.rb +10 -52
  14. data/lib/coderay/encoders/json.rb +19 -39
  15. data/lib/coderay/encoders/lines_of_code.rb +7 -52
  16. data/lib/coderay/encoders/null.rb +6 -13
  17. data/lib/coderay/encoders/statistic.rb +30 -93
  18. data/lib/coderay/encoders/terminal.rb +3 -4
  19. data/lib/coderay/encoders/text.rb +1 -23
  20. data/lib/coderay/encoders/token_kind_filter.rb +0 -58
  21. data/lib/coderay/helpers/file_type.rb +119 -240
  22. data/lib/coderay/helpers/gzip.rb +41 -0
  23. data/lib/coderay/helpers/plugin.rb +237 -307
  24. data/lib/coderay/scanner.rb +112 -88
  25. data/lib/coderay/scanners/_map.rb +3 -3
  26. data/lib/coderay/scanners/c.rb +7 -7
  27. data/lib/coderay/scanners/clojure.rb +204 -0
  28. data/lib/coderay/scanners/css.rb +10 -20
  29. data/lib/coderay/scanners/debug.rb +9 -55
  30. data/lib/coderay/scanners/diff.rb +21 -4
  31. data/lib/coderay/scanners/html.rb +65 -18
  32. data/lib/coderay/scanners/java.rb +3 -2
  33. data/lib/coderay/scanners/java_script.rb +3 -3
  34. data/lib/coderay/scanners/json.rb +7 -6
  35. data/lib/coderay/scanners/php.rb +2 -1
  36. data/lib/coderay/scanners/rhtml.rb +6 -2
  37. data/lib/coderay/scanners/ruby.rb +193 -193
  38. data/lib/coderay/scanners/ruby/patterns.rb +15 -82
  39. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  40. data/lib/coderay/scanners/sql.rb +1 -1
  41. data/lib/coderay/scanners/yaml.rb +4 -2
  42. data/lib/coderay/styles/_map.rb +2 -2
  43. data/lib/coderay/styles/alpha.rb +48 -38
  44. data/lib/coderay/styles/cycnus.rb +2 -1
  45. data/lib/coderay/token_kinds.rb +88 -86
  46. data/lib/coderay/tokens.rb +88 -112
  47. data/test/functional/basic.rb +184 -5
  48. data/test/functional/examples.rb +4 -4
  49. data/test/functional/for_redcloth.rb +3 -2
  50. data/test/functional/suite.rb +7 -6
  51. metadata +11 -24
  52. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  53. data/test/functional/load_plugin_scanner.rb +0 -11
  54. data/test/functional/vhdl.rb +0 -126
  55. data/test/functional/word_list.rb +0 -79
@@ -1,7 +1,12 @@
1
- module CodeRay
1
+ # encoding: utf-8
2
+ require 'strscan'
2
3
 
3
- require 'coderay/helpers/plugin'
4
+ module CodeRay
4
5
 
6
+ autoload :WordList, 'coderay/helpers/word_list'
7
+ # FIXME: Rename CaseIgnoringWordList to WordList::CaseIgnoring.
8
+ autoload :CaseIgnoringWordList, 'coderay/helpers/word_list'
9
+
5
10
  # = Scanners
6
11
  #
7
12
  # This module holds the Scanner class and its subclasses.
@@ -16,9 +21,8 @@ module CodeRay
16
21
  module Scanners
17
22
  extend PluginHost
18
23
  plugin_path File.dirname(__FILE__), 'scanners'
19
-
20
- require 'strscan'
21
-
24
+
25
+
22
26
  # = Scanner
23
27
  #
24
28
  # The base class for all Scanners.
@@ -46,61 +50,83 @@ module CodeRay
46
50
 
47
51
  extend Plugin
48
52
  plugin_host Scanners
49
-
53
+
50
54
  # Raised if a Scanner fails while scanning
51
- ScanError = Class.new(Exception)
52
-
53
- require 'coderay/helpers/word_list'
54
-
55
+ ScanError = Class.new Exception
56
+
55
57
  # The default options for all scanner classes.
56
58
  #
57
59
  # Define @default_options for subclasses.
58
60
  DEFAULT_OPTIONS = { }
59
61
 
60
62
  KINDS_NOT_LOC = [:comment, :doctype, :docstring]
61
-
63
+
62
64
  class << self
63
-
64
- def normify code
65
- code = code.to_s.dup
66
- # try using UTF-8
67
- if code.respond_to? :force_encoding
68
- debug, $DEBUG = $DEBUG, false
69
- begin
70
- code.force_encoding 'UTF-8'
71
- code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
72
- rescue ArgumentError
73
- code.force_encoding 'binary'
74
- ensure
75
- $DEBUG = debug
76
- end
65
+
66
+ # Normalizes the given code into a string with UNIX newlines, in the
67
+ # scanner's internal encoding, with invalid and undefined charachters
68
+ # replaced by placeholders. Always returns a new object.
69
+ def normalize code
70
+ original = code
71
+ code = code.to_s unless code.is_a? ::String
72
+ if code.respond_to? :encoding
73
+ code = encode_with_encoding code, self.encoding
74
+ else
75
+ code = to_unix code if code.index ?\r
77
76
  end
78
- # convert the string to UNIX newline format
79
- code.gsub!(/\r\n?/, "\n") if code.index ?\r
77
+ # code = code.dup if code.eql? original
80
78
  code
81
79
  end
82
80
 
83
- def file_extension extension = nil
84
- if extension
85
- @file_extension = extension.to_s
81
+ # The typical filename suffix for this scanner's language.
82
+ def file_extension extension = plugin_id
83
+ @file_extension ||= extension.to_s
84
+ end
85
+
86
+ # The encoding used internally by this scanner.
87
+ def encoding name = 'UTF-8'
88
+ @encoding ||= defined?(Encoding.find) && Encoding.find(name)
89
+ end
90
+
91
+ # The lang of this Scanner class, which is equal to its Plugin ID.
92
+ alias lang plugin_id
93
+
94
+ protected
95
+
96
+ def encode_with_encoding code, target_encoding
97
+ if code.encoding == target_encoding
98
+ if code.valid_encoding?
99
+ return to_unix code
100
+ else
101
+ source_encoding = guess_encoding code
102
+ end
86
103
  else
87
- @file_extension ||= plugin_id.to_s
104
+ source_encoding = code.encoding
88
105
  end
106
+ # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
107
+ code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
89
108
  end
90
-
109
+
110
+ def to_unix code
111
+ code.gsub(/\r\n?/, "\n")
112
+ end
113
+
114
+ def guess_encoding s
115
+ #:nocov:
116
+ IO.popen("file -b --mime -", "w+") do |file|
117
+ file.write s[0, 1024]
118
+ file.close_write
119
+ begin
120
+ Encoding.find file.gets[/charset=([-\w]+)/, 1]
121
+ rescue ArgumentError
122
+ Encoding::BINARY
123
+ end
124
+ end
125
+ #:nocov:
126
+ end
127
+
91
128
  end
92
-
93
- =begin
94
- ## Excluded for speed reasons; protected seems to make methods slow.
95
-
96
- # Save the StringScanner methods from being called.
97
- # This would not be useful for highlighting.
98
- strscan_public_methods =
99
- StringScanner.instance_methods -
100
- StringScanner.ancestors[1].instance_methods
101
- protected(*strscan_public_methods)
102
- =end
103
-
129
+
104
130
  # Create a new Scanner.
105
131
  #
106
132
  # * +code+ is the input String and is handled by the superclass
@@ -110,43 +136,46 @@ module CodeRay
110
136
  # overwrite default options here.)
111
137
  #
112
138
  # Else, a Tokens object is used.
113
- def initialize code='', options = {}
114
- raise "I am only the basic Scanner class. I can't scan "\
115
- "anything. :( Use my subclasses." if self.class == Scanner
139
+ def initialize code = '', options = {}
140
+ if self.class == Scanner
141
+ raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
142
+ end
116
143
 
117
144
  @options = self.class::DEFAULT_OPTIONS.merge options
118
-
119
- super Scanner.normify(code)
120
-
145
+
146
+ super self.class.normalize(code)
147
+
121
148
  @tokens = options[:tokens] || Tokens.new
122
149
  @tokens.scanner = self if @tokens.respond_to? :scanner=
123
-
150
+
124
151
  setup
125
152
  end
126
153
 
127
- # Sets back the scanner. Subclasses are to define the reset_instance
128
- # method.
154
+ # Sets back the scanner. Subclasses should to define the reset_instance
155
+ # method instead of this one.
129
156
  def reset
130
157
  super
131
158
  reset_instance
132
159
  end
133
-
160
+
161
+ # Set a new string to be scanned.
134
162
  def string= code
135
- code = Scanner.normify(code)
163
+ code = self.class.normalize(code)
136
164
  super code
137
165
  reset_instance
138
166
  end
139
-
140
- # More mnemonic accessor name for the input string.
141
- alias code string
142
- alias code= string=
143
-
144
- # Returns the Plugin ID for this scanner.
167
+
168
+ # the Plugin ID for this scanner
145
169
  def lang
146
- self.class.plugin_id.to_s
170
+ self.class.lang
147
171
  end
148
-
149
- # Scans the code and returns all tokens in a Tokens object.
172
+
173
+ # the default file extension for this scanner
174
+ def file_extension
175
+ self.class.file_extension
176
+ end
177
+
178
+ # Scan the code and returns all tokens in a Tokens object.
150
179
  def tokenize source = nil, options = {}
151
180
  options = @options.merge(options)
152
181
  @tokens = options[:tokens] || @tokens || Tokens.new
@@ -170,18 +199,18 @@ module CodeRay
170
199
  end
171
200
  end
172
201
 
173
- # Caches the result of tokenize.
202
+ # Cache the result of tokenize.
174
203
  def tokens
175
204
  @cached_tokens ||= tokenize
176
205
  end
177
206
 
178
- # Traverses the tokens.
207
+ # Traverse the tokens.
179
208
  def each &block
180
209
  tokens.each(&block)
181
210
  end
182
211
  include Enumerable
183
-
184
- # The current line position of the scanner.
212
+
213
+ # The current line position of the scanner. See also #column.
185
214
  #
186
215
  # Beware, this is implemented inefficiently. It should be used
187
216
  # for debugging only.
@@ -189,27 +218,23 @@ module CodeRay
189
218
  string[0..pos].count("\n") + 1
190
219
  end
191
220
 
192
- # The current column position of the scanner. See #line.
221
+ # The current column position of the scanner. See also #line.
222
+ #
223
+ # Beware, this is implemented inefficiently. It should be used
224
+ # for debugging only.
193
225
  def column pos = self.pos
194
226
  return 0 if pos <= 0
195
- string = string()
196
- if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
197
- @bin_string ||= string.dup.force_encoding('binary')
198
- string = @bin_string
227
+ string = self.string
228
+ if string.respond_to?(:bytesize) && string.bytesize != string.size
229
+ #:nocov:
230
+ string = string.dup.force_encoding('binary')
231
+ #:nocov:
199
232
  end
200
233
  pos - (string.rindex(?\n, pos) || 0)
201
234
  end
202
235
 
203
- def marshal_dump # :nodoc:
204
- @options
205
- end
206
-
207
- def marshal_load options # :nodoc:
208
- @options = options
209
- end
210
-
211
236
  protected
212
-
237
+
213
238
  # Can be implemented by subclasses to do some initialization
214
239
  # that has to be done once per instance.
215
240
  #
@@ -217,15 +242,14 @@ module CodeRay
217
242
  # scan.
218
243
  def setup # :doc:
219
244
  end
220
-
245
+
221
246
  # This is the central method, and commonly the only one a
222
247
  # subclass implements.
223
248
  #
224
249
  # Subclasses must implement this method; it must return +tokens+
225
250
  # and must only use Tokens#<< for storing scanned tokens!
226
251
  def scan_tokens tokens, options # :doc:
227
- raise NotImplementedError,
228
- "#{self.class}#scan_tokens not implemented."
252
+ raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
229
253
  end
230
254
 
231
255
  # Resets the scanner.
@@ -234,7 +258,7 @@ module CodeRay
234
258
  @cached_tokens = nil
235
259
  @bin_string = nil if defined? @bin_string
236
260
  end
237
-
261
+
238
262
  # Scanner error with additional status information
239
263
  def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
240
264
  raise ScanError, <<-EOE % [
@@ -266,8 +290,8 @@ surrounding code:
266
290
  string[pos, ambit],
267
291
  ]
268
292
  end
269
-
293
+
270
294
  end
271
-
295
+
272
296
  end
273
297
  end
@@ -1,6 +1,6 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
4
  map \
5
5
  :cplusplus => :cpp,
6
6
  :'c++' => :cpp,
@@ -15,8 +15,8 @@ module Scanners
15
15
  :plain => :plaintext,
16
16
  :xhtml => :html,
17
17
  :yml => :yaml
18
-
18
+
19
19
  default :plain
20
-
20
+
21
21
  end
22
22
  end
@@ -65,10 +65,6 @@ module Scanners
65
65
  elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
66
66
  encoder.text_token match, :comment
67
67
 
68
- elsif match = scan(/ \# \s* if \s* 0 /x)
69
- match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
70
- encoder.text_token match, :comment
71
-
72
68
  elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
73
69
  label_expected = match =~ /[;\{\}]/
74
70
  if case_expected
@@ -93,9 +89,6 @@ module Scanners
93
89
  end
94
90
  encoder.text_token match, kind
95
91
 
96
- elsif match = scan(/\$/)
97
- encoder.text_token match, :ident
98
-
99
92
  elsif match = scan(/L?"/)
100
93
  encoder.begin_group :string
101
94
  if match[0] == ?L
@@ -105,6 +98,10 @@ module Scanners
105
98
  encoder.text_token match, :delimiter
106
99
  state = :string
107
100
 
101
+ elsif match = scan(/ \# \s* if \s* 0 /x)
102
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
103
+ encoder.text_token match, :comment
104
+
108
105
  elsif match = scan(/#[ \t]*(\w*)/)
109
106
  encoder.text_token match, :preprocessor
110
107
  in_preproc_line = true
@@ -115,6 +112,9 @@ module Scanners
115
112
  label_expected = false
116
113
  encoder.text_token match, :char
117
114
 
115
+ elsif match = scan(/\$/)
116
+ encoder.text_token match, :ident
117
+
118
118
  elsif match = scan(/0[xX][0-9A-Fa-f]+/)
119
119
  label_expected = false
120
120
  encoder.text_token match, :hex
@@ -0,0 +1,204 @@
1
+ # encoding: utf-8
2
+ module CodeRay
3
+ module Scanners
4
+
5
+ # Clojure scanner by Licenser.
6
+ class Clojure < Scanner
7
+
8
+ register_for :clojure
9
+ file_extension 'clj'
10
+
11
+ SPECIAL_FORMS = %w[
12
+ def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
13
+ new
14
+ ] # :nodoc:
15
+
16
+ CORE_FORMS = %w[
17
+ + - -> ->> .. / * < <= = == > >= accessor aclone add-classpath add-watch
18
+ agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
19
+ alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
20
+ aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
21
+ assert assoc assoc! assoc-in associative? atom await await-for bases bean
22
+ bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
23
+ bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
24
+ booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
25
+ char-array char-escape-string char-name-string char? chars class class?
26
+ clear-agent-errors clojure-version coll? comment commute comp comparator
27
+ compare compare-and-set! compile complement concat cond condp conj conj!
28
+ cons constantly construct-proxy contains? count counted? create-ns
29
+ create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
30
+ defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
31
+ deliver denominator deref derive descendants disj disj! dissoc dissoc!
32
+ distinct distinct? doall doc dorun doseq dosync dotimes doto double
33
+ double-array doubles drop drop-last drop-while empty empty? ensure
34
+ enumeration-seq error-handler error-mode eval even? every? extend
35
+ extend-protocol extend-type extenders extends? false? ffirst file-seq
36
+ filter find find-doc find-ns find-var first float float-array float?
37
+ floats flush fn fn? fnext for force format future future-call future-cancel
38
+ future-cancelled? future-done? future? gen-class gen-interface gensym get
39
+ get-in get-method get-proxy-class get-thread-bindings get-validator hash
40
+ hash-map hash-set identical? identity if-let if-not ifn? import in-ns
41
+ inc init-proxy instance? int int-array integer? interleave intern
42
+ interpose into into-array ints io! isa? iterate iterator-seq juxt key
43
+ keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
44
+ list? load load-file load-reader load-string loaded-libs locking long
45
+ long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
46
+ map map? mapcat max max-key memfn memoize merge merge-with meta methods
47
+ min min-key mod name namespace neg? newline next nfirst nil? nnext not
48
+ not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
49
+ ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
50
+ nthnext num number? numerator object-array odd? or parents partial
51
+ partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
52
+ pos? pr pr-str prefer-method prefers print print-namespace-doc
53
+ print-str printf println println-str prn prn-str promise proxy
54
+ proxy-mappings proxy-super push-thread-bindings pvalues quot rand
55
+ rand-int range ratio? rationalize re-find re-groups re-matcher
56
+ re-matches re-pattern re-seq read read-line read-string reduce ref
57
+ ref-history-count ref-max-history ref-min-history ref-set refer
58
+ refer-clojure reify release-pending-sends rem remove remove-all-methods
59
+ remove-method remove-ns remove-watch repeat repeatedly replace replicate
60
+ require reset! reset-meta! resolve rest restart-agent resultset-seq
61
+ reverse reversible? rseq rsubseq satisfies? second select-keys send
62
+ send-off seq seq? seque sequence sequential? set set-error-handler!
63
+ set-error-mode! set-validator! set? short short-array shorts
64
+ shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
65
+ sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
66
+ split-at split-with str string? struct struct-map subs subseq subvec
67
+ supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
68
+ take-nth take-while test the-ns thread-bound? time to-array to-array-2d
69
+ trampoline transient tree-seq true? type unchecked-add unchecked-dec
70
+ unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
71
+ unchecked-remainder unchecked-subtract underive update-in update-proxy
72
+ use val vals var-get var-set var? vary-meta vec vector vector-of vector?
73
+ when when-first when-let when-not while with-bindings with-bindings*
74
+ with-in-str with-local-vars with-meta with-open with-out-str
75
+ with-precision xml-seq zero? zipmap
76
+ ] # :nodoc:
77
+ PREDEFINED_CONSTANTS = %w[
78
+ true false *1 *2 *3 *agent* *clojure-version* *command-line-args*
79
+ *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
80
+ *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
81
+ *print-readably* *read-eval* *warn-on-reflection*
82
+ ] # :nodoc:
83
+
84
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
85
+ add(SPECIAL_FORMS, :reserved).
86
+ add(CORE_FORMS, :reserved).
87
+ add(PREDEFINED_CONSTANTS, :pre_constant)
88
+
89
+ BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=][a-zA-Z0-9$&*+!\/_?<>\-\#]*/
90
+ IDENTIFIER = /(?:[@']?(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
91
+ SYMBOL = /::?#{IDENTIFIER}/o
92
+ DIGIT = /\d/
93
+ DIGIT10 = DIGIT
94
+ DIGIT16 = /[0-9a-f]/i
95
+ DIGIT8 = /[0-7]/
96
+ DIGIT2 = /[01]/
97
+ RADIX16 = /\#x/i
98
+ RADIX8 = /\#o/i
99
+ RADIX2 = /\#b/i
100
+ RADIX10 = /\#d/i
101
+ EXACTNESS = /#i|#e/i
102
+ SIGN = /[\+-]?/
103
+ EXP_MARK = /[esfdl]/i
104
+ EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
105
+ SUFFIX = /#{EXP}?/
106
+ PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
107
+ PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
108
+ PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
109
+ PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
110
+ UINT10 = /#{DIGIT10}+#*/
111
+ UINT16 = /#{DIGIT16}+#*/
112
+ UINT8 = /#{DIGIT8}+#*/
113
+ UINT2 = /#{DIGIT2}+#*/
114
+ DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
115
+ UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
116
+ UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
117
+ UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
118
+ UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
119
+ REAL10 = /#{SIGN}#{UREAL10}/
120
+ REAL16 = /#{SIGN}#{UREAL16}/
121
+ REAL8 = /#{SIGN}#{UREAL8}/
122
+ REAL2 = /#{SIGN}#{UREAL2}/
123
+ IMAG10 = /i|#{UREAL10}i/
124
+ IMAG16 = /i|#{UREAL16}i/
125
+ IMAG8 = /i|#{UREAL8}i/
126
+ IMAG2 = /i|#{UREAL2}i/
127
+ COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
128
+ COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
129
+ COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
130
+ COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
131
+ NUM10 = /#{PREFIX10}?#{COMPLEX10}/
132
+ NUM16 = /#{PREFIX16}#{COMPLEX16}/
133
+ NUM8 = /#{PREFIX8}#{COMPLEX8}/
134
+ NUM2 = /#{PREFIX2}#{COMPLEX2}/
135
+ NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
136
+
137
+ protected
138
+
139
+ def scan_tokens encoder, options
140
+
141
+ state = :initial
142
+ ident_kind = IDENT_KIND
143
+
144
+ until eos?
145
+
146
+ case state
147
+ when :initial
148
+ if match = scan(/ \s+ | \\\n | , /x)
149
+ encoder.text_token match, :space
150
+ elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|@/)
151
+ encoder.text_token match, :operator # FIXME: was :operator_fat
152
+ elsif match = scan(/;.*/)
153
+ encoder.text_token match, :comment
154
+ elsif match = scan(/\#\\(?:newline|space|.?)/)
155
+ encoder.text_token match, :char
156
+ elsif match = scan(/\#[ft]/)
157
+ encoder.text_token match, :pre_constant
158
+ elsif match = scan(/#{IDENTIFIER}/o)
159
+ encoder.text_token match, ident_kind[matched]
160
+ elsif match = scan(/#{SYMBOL}/o)
161
+ encoder.text_token match, :symbol
162
+ elsif match = scan(/\./)
163
+ encoder.text_token match, :operator
164
+ elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
165
+ encoder.text_token match, :type
166
+ elsif match = scan(/ (\#)? " /x)
167
+ state = self[1] ? :regexp : :string
168
+ encoder.begin_group state
169
+ encoder.text_token match, :delimiter
170
+ elsif match = scan(/#{NUM}/o) and not matched.empty?
171
+ encoder.text_token match, match[/[.e]/i] ? :float : :integer
172
+ else
173
+ encoder.text_token getch, :error
174
+ end
175
+
176
+ when :string, :regexp
177
+ if match = scan(/[^"\\]+|\\.?/)
178
+ encoder.text_token match, :content
179
+ elsif match = scan(/"/)
180
+ encoder.text_token match, :delimiter
181
+ encoder.end_group state
182
+ state = :initial
183
+ else
184
+ raise_inspect "else case \" reached; %p not handled." % peek(1),
185
+ encoder, state
186
+ end
187
+
188
+ else
189
+ raise 'else case reached'
190
+
191
+ end
192
+
193
+ end
194
+
195
+ if [:string, :regexp].include? state
196
+ encoder.end_group state
197
+ end
198
+
199
+ encoder
200
+
201
+ end
202
+ end
203
+ end
204
+ end