coderay 1.0.0.598.pre → 1.0.0.738.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/bin/coderay +1 -1
  2. data/lib/coderay.rb +38 -32
  3. data/lib/coderay/duo.rb +1 -54
  4. data/lib/coderay/encoder.rb +31 -33
  5. data/lib/coderay/encoders/_map.rb +4 -2
  6. data/lib/coderay/encoders/comment_filter.rb +0 -61
  7. data/lib/coderay/encoders/count.rb +2 -23
  8. data/lib/coderay/encoders/debug.rb +11 -60
  9. data/lib/coderay/encoders/filter.rb +0 -46
  10. data/lib/coderay/encoders/html.rb +83 -91
  11. data/lib/coderay/encoders/html/css.rb +1 -6
  12. data/lib/coderay/encoders/html/numbering.rb +18 -21
  13. data/lib/coderay/encoders/html/output.rb +10 -52
  14. data/lib/coderay/encoders/json.rb +19 -39
  15. data/lib/coderay/encoders/lines_of_code.rb +7 -52
  16. data/lib/coderay/encoders/null.rb +6 -13
  17. data/lib/coderay/encoders/statistic.rb +30 -93
  18. data/lib/coderay/encoders/terminal.rb +3 -4
  19. data/lib/coderay/encoders/text.rb +1 -23
  20. data/lib/coderay/encoders/token_kind_filter.rb +0 -58
  21. data/lib/coderay/helpers/file_type.rb +119 -240
  22. data/lib/coderay/helpers/gzip.rb +41 -0
  23. data/lib/coderay/helpers/plugin.rb +237 -307
  24. data/lib/coderay/scanner.rb +112 -88
  25. data/lib/coderay/scanners/_map.rb +3 -3
  26. data/lib/coderay/scanners/c.rb +7 -7
  27. data/lib/coderay/scanners/clojure.rb +204 -0
  28. data/lib/coderay/scanners/css.rb +10 -20
  29. data/lib/coderay/scanners/debug.rb +9 -55
  30. data/lib/coderay/scanners/diff.rb +21 -4
  31. data/lib/coderay/scanners/html.rb +65 -18
  32. data/lib/coderay/scanners/java.rb +3 -2
  33. data/lib/coderay/scanners/java_script.rb +3 -3
  34. data/lib/coderay/scanners/json.rb +7 -6
  35. data/lib/coderay/scanners/php.rb +2 -1
  36. data/lib/coderay/scanners/rhtml.rb +6 -2
  37. data/lib/coderay/scanners/ruby.rb +193 -193
  38. data/lib/coderay/scanners/ruby/patterns.rb +15 -82
  39. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  40. data/lib/coderay/scanners/sql.rb +1 -1
  41. data/lib/coderay/scanners/yaml.rb +4 -2
  42. data/lib/coderay/styles/_map.rb +2 -2
  43. data/lib/coderay/styles/alpha.rb +48 -38
  44. data/lib/coderay/styles/cycnus.rb +2 -1
  45. data/lib/coderay/token_kinds.rb +88 -86
  46. data/lib/coderay/tokens.rb +88 -112
  47. data/test/functional/basic.rb +184 -5
  48. data/test/functional/examples.rb +4 -4
  49. data/test/functional/for_redcloth.rb +3 -2
  50. data/test/functional/suite.rb +7 -6
  51. metadata +11 -24
  52. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  53. data/test/functional/load_plugin_scanner.rb +0 -11
  54. data/test/functional/vhdl.rb +0 -126
  55. data/test/functional/word_list.rb +0 -79
@@ -1,7 +1,12 @@
1
- module CodeRay
1
+ # encoding: utf-8
2
+ require 'strscan'
2
3
 
3
- require 'coderay/helpers/plugin'
4
+ module CodeRay
4
5
 
6
+ autoload :WordList, 'coderay/helpers/word_list'
7
+ # FIXME: Rename CaseIgnoringWordList to WordList::CaseIgnoring.
8
+ autoload :CaseIgnoringWordList, 'coderay/helpers/word_list'
9
+
5
10
  # = Scanners
6
11
  #
7
12
  # This module holds the Scanner class and its subclasses.
@@ -16,9 +21,8 @@ module CodeRay
16
21
  module Scanners
17
22
  extend PluginHost
18
23
  plugin_path File.dirname(__FILE__), 'scanners'
19
-
20
- require 'strscan'
21
-
24
+
25
+
22
26
  # = Scanner
23
27
  #
24
28
  # The base class for all Scanners.
@@ -46,61 +50,83 @@ module CodeRay
46
50
 
47
51
  extend Plugin
48
52
  plugin_host Scanners
49
-
53
+
50
54
  # Raised if a Scanner fails while scanning
51
- ScanError = Class.new(Exception)
52
-
53
- require 'coderay/helpers/word_list'
54
-
55
+ ScanError = Class.new Exception
56
+
55
57
  # The default options for all scanner classes.
56
58
  #
57
59
  # Define @default_options for subclasses.
58
60
  DEFAULT_OPTIONS = { }
59
61
 
60
62
  KINDS_NOT_LOC = [:comment, :doctype, :docstring]
61
-
63
+
62
64
  class << self
63
-
64
- def normify code
65
- code = code.to_s.dup
66
- # try using UTF-8
67
- if code.respond_to? :force_encoding
68
- debug, $DEBUG = $DEBUG, false
69
- begin
70
- code.force_encoding 'UTF-8'
71
- code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
72
- rescue ArgumentError
73
- code.force_encoding 'binary'
74
- ensure
75
- $DEBUG = debug
76
- end
65
+
66
+ # Normalizes the given code into a string with UNIX newlines, in the
67
+ # scanner's internal encoding, with invalid and undefined charachters
68
+ # replaced by placeholders. Always returns a new object.
69
+ def normalize code
70
+ original = code
71
+ code = code.to_s unless code.is_a? ::String
72
+ if code.respond_to? :encoding
73
+ code = encode_with_encoding code, self.encoding
74
+ else
75
+ code = to_unix code if code.index ?\r
77
76
  end
78
- # convert the string to UNIX newline format
79
- code.gsub!(/\r\n?/, "\n") if code.index ?\r
77
+ # code = code.dup if code.eql? original
80
78
  code
81
79
  end
82
80
 
83
- def file_extension extension = nil
84
- if extension
85
- @file_extension = extension.to_s
81
+ # The typical filename suffix for this scanner's language.
82
+ def file_extension extension = plugin_id
83
+ @file_extension ||= extension.to_s
84
+ end
85
+
86
+ # The encoding used internally by this scanner.
87
+ def encoding name = 'UTF-8'
88
+ @encoding ||= defined?(Encoding.find) && Encoding.find(name)
89
+ end
90
+
91
+ # The lang of this Scanner class, which is equal to its Plugin ID.
92
+ alias lang plugin_id
93
+
94
+ protected
95
+
96
+ def encode_with_encoding code, target_encoding
97
+ if code.encoding == target_encoding
98
+ if code.valid_encoding?
99
+ return to_unix code
100
+ else
101
+ source_encoding = guess_encoding code
102
+ end
86
103
  else
87
- @file_extension ||= plugin_id.to_s
104
+ source_encoding = code.encoding
88
105
  end
106
+ # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
107
+ code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
89
108
  end
90
-
109
+
110
+ def to_unix code
111
+ code.gsub(/\r\n?/, "\n")
112
+ end
113
+
114
+ def guess_encoding s
115
+ #:nocov:
116
+ IO.popen("file -b --mime -", "w+") do |file|
117
+ file.write s[0, 1024]
118
+ file.close_write
119
+ begin
120
+ Encoding.find file.gets[/charset=([-\w]+)/, 1]
121
+ rescue ArgumentError
122
+ Encoding::BINARY
123
+ end
124
+ end
125
+ #:nocov:
126
+ end
127
+
91
128
  end
92
-
93
- =begin
94
- ## Excluded for speed reasons; protected seems to make methods slow.
95
-
96
- # Save the StringScanner methods from being called.
97
- # This would not be useful for highlighting.
98
- strscan_public_methods =
99
- StringScanner.instance_methods -
100
- StringScanner.ancestors[1].instance_methods
101
- protected(*strscan_public_methods)
102
- =end
103
-
129
+
104
130
  # Create a new Scanner.
105
131
  #
106
132
  # * +code+ is the input String and is handled by the superclass
@@ -110,43 +136,46 @@ module CodeRay
110
136
  # overwrite default options here.)
111
137
  #
112
138
  # Else, a Tokens object is used.
113
- def initialize code='', options = {}
114
- raise "I am only the basic Scanner class. I can't scan "\
115
- "anything. :( Use my subclasses." if self.class == Scanner
139
+ def initialize code = '', options = {}
140
+ if self.class == Scanner
141
+ raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
142
+ end
116
143
 
117
144
  @options = self.class::DEFAULT_OPTIONS.merge options
118
-
119
- super Scanner.normify(code)
120
-
145
+
146
+ super self.class.normalize(code)
147
+
121
148
  @tokens = options[:tokens] || Tokens.new
122
149
  @tokens.scanner = self if @tokens.respond_to? :scanner=
123
-
150
+
124
151
  setup
125
152
  end
126
153
 
127
- # Sets back the scanner. Subclasses are to define the reset_instance
128
- # method.
154
+ # Sets back the scanner. Subclasses should to define the reset_instance
155
+ # method instead of this one.
129
156
  def reset
130
157
  super
131
158
  reset_instance
132
159
  end
133
-
160
+
161
+ # Set a new string to be scanned.
134
162
  def string= code
135
- code = Scanner.normify(code)
163
+ code = self.class.normalize(code)
136
164
  super code
137
165
  reset_instance
138
166
  end
139
-
140
- # More mnemonic accessor name for the input string.
141
- alias code string
142
- alias code= string=
143
-
144
- # Returns the Plugin ID for this scanner.
167
+
168
+ # the Plugin ID for this scanner
145
169
  def lang
146
- self.class.plugin_id.to_s
170
+ self.class.lang
147
171
  end
148
-
149
- # Scans the code and returns all tokens in a Tokens object.
172
+
173
+ # the default file extension for this scanner
174
+ def file_extension
175
+ self.class.file_extension
176
+ end
177
+
178
+ # Scan the code and returns all tokens in a Tokens object.
150
179
  def tokenize source = nil, options = {}
151
180
  options = @options.merge(options)
152
181
  @tokens = options[:tokens] || @tokens || Tokens.new
@@ -170,18 +199,18 @@ module CodeRay
170
199
  end
171
200
  end
172
201
 
173
- # Caches the result of tokenize.
202
+ # Cache the result of tokenize.
174
203
  def tokens
175
204
  @cached_tokens ||= tokenize
176
205
  end
177
206
 
178
- # Traverses the tokens.
207
+ # Traverse the tokens.
179
208
  def each &block
180
209
  tokens.each(&block)
181
210
  end
182
211
  include Enumerable
183
-
184
- # The current line position of the scanner.
212
+
213
+ # The current line position of the scanner. See also #column.
185
214
  #
186
215
  # Beware, this is implemented inefficiently. It should be used
187
216
  # for debugging only.
@@ -189,27 +218,23 @@ module CodeRay
189
218
  string[0..pos].count("\n") + 1
190
219
  end
191
220
 
192
- # The current column position of the scanner. See #line.
221
+ # The current column position of the scanner. See also #line.
222
+ #
223
+ # Beware, this is implemented inefficiently. It should be used
224
+ # for debugging only.
193
225
  def column pos = self.pos
194
226
  return 0 if pos <= 0
195
- string = string()
196
- if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
197
- @bin_string ||= string.dup.force_encoding('binary')
198
- string = @bin_string
227
+ string = self.string
228
+ if string.respond_to?(:bytesize) && string.bytesize != string.size
229
+ #:nocov:
230
+ string = string.dup.force_encoding('binary')
231
+ #:nocov:
199
232
  end
200
233
  pos - (string.rindex(?\n, pos) || 0)
201
234
  end
202
235
 
203
- def marshal_dump # :nodoc:
204
- @options
205
- end
206
-
207
- def marshal_load options # :nodoc:
208
- @options = options
209
- end
210
-
211
236
  protected
212
-
237
+
213
238
  # Can be implemented by subclasses to do some initialization
214
239
  # that has to be done once per instance.
215
240
  #
@@ -217,15 +242,14 @@ module CodeRay
217
242
  # scan.
218
243
  def setup # :doc:
219
244
  end
220
-
245
+
221
246
  # This is the central method, and commonly the only one a
222
247
  # subclass implements.
223
248
  #
224
249
  # Subclasses must implement this method; it must return +tokens+
225
250
  # and must only use Tokens#<< for storing scanned tokens!
226
251
  def scan_tokens tokens, options # :doc:
227
- raise NotImplementedError,
228
- "#{self.class}#scan_tokens not implemented."
252
+ raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
229
253
  end
230
254
 
231
255
  # Resets the scanner.
@@ -234,7 +258,7 @@ module CodeRay
234
258
  @cached_tokens = nil
235
259
  @bin_string = nil if defined? @bin_string
236
260
  end
237
-
261
+
238
262
  # Scanner error with additional status information
239
263
  def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
240
264
  raise ScanError, <<-EOE % [
@@ -266,8 +290,8 @@ surrounding code:
266
290
  string[pos, ambit],
267
291
  ]
268
292
  end
269
-
293
+
270
294
  end
271
-
295
+
272
296
  end
273
297
  end
@@ -1,6 +1,6 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
4
  map \
5
5
  :cplusplus => :cpp,
6
6
  :'c++' => :cpp,
@@ -15,8 +15,8 @@ module Scanners
15
15
  :plain => :plaintext,
16
16
  :xhtml => :html,
17
17
  :yml => :yaml
18
-
18
+
19
19
  default :plain
20
-
20
+
21
21
  end
22
22
  end
@@ -65,10 +65,6 @@ module Scanners
65
65
  elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
66
66
  encoder.text_token match, :comment
67
67
 
68
- elsif match = scan(/ \# \s* if \s* 0 /x)
69
- match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
70
- encoder.text_token match, :comment
71
-
72
68
  elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
73
69
  label_expected = match =~ /[;\{\}]/
74
70
  if case_expected
@@ -93,9 +89,6 @@ module Scanners
93
89
  end
94
90
  encoder.text_token match, kind
95
91
 
96
- elsif match = scan(/\$/)
97
- encoder.text_token match, :ident
98
-
99
92
  elsif match = scan(/L?"/)
100
93
  encoder.begin_group :string
101
94
  if match[0] == ?L
@@ -105,6 +98,10 @@ module Scanners
105
98
  encoder.text_token match, :delimiter
106
99
  state = :string
107
100
 
101
+ elsif match = scan(/ \# \s* if \s* 0 /x)
102
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
103
+ encoder.text_token match, :comment
104
+
108
105
  elsif match = scan(/#[ \t]*(\w*)/)
109
106
  encoder.text_token match, :preprocessor
110
107
  in_preproc_line = true
@@ -115,6 +112,9 @@ module Scanners
115
112
  label_expected = false
116
113
  encoder.text_token match, :char
117
114
 
115
+ elsif match = scan(/\$/)
116
+ encoder.text_token match, :ident
117
+
118
118
  elsif match = scan(/0[xX][0-9A-Fa-f]+/)
119
119
  label_expected = false
120
120
  encoder.text_token match, :hex
@@ -0,0 +1,204 @@
1
+ # encoding: utf-8
2
+ module CodeRay
3
+ module Scanners
4
+
5
+ # Clojure scanner by Licenser.
6
+ class Clojure < Scanner
7
+
8
+ register_for :clojure
9
+ file_extension 'clj'
10
+
11
+ SPECIAL_FORMS = %w[
12
+ def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
13
+ new
14
+ ] # :nodoc:
15
+
16
+ CORE_FORMS = %w[
17
+ + - -> ->> .. / * < <= = == > >= accessor aclone add-classpath add-watch
18
+ agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
19
+ alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
20
+ aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
21
+ assert assoc assoc! assoc-in associative? atom await await-for bases bean
22
+ bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
23
+ bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
24
+ booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
25
+ char-array char-escape-string char-name-string char? chars class class?
26
+ clear-agent-errors clojure-version coll? comment commute comp comparator
27
+ compare compare-and-set! compile complement concat cond condp conj conj!
28
+ cons constantly construct-proxy contains? count counted? create-ns
29
+ create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
30
+ defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
31
+ deliver denominator deref derive descendants disj disj! dissoc dissoc!
32
+ distinct distinct? doall doc dorun doseq dosync dotimes doto double
33
+ double-array doubles drop drop-last drop-while empty empty? ensure
34
+ enumeration-seq error-handler error-mode eval even? every? extend
35
+ extend-protocol extend-type extenders extends? false? ffirst file-seq
36
+ filter find find-doc find-ns find-var first float float-array float?
37
+ floats flush fn fn? fnext for force format future future-call future-cancel
38
+ future-cancelled? future-done? future? gen-class gen-interface gensym get
39
+ get-in get-method get-proxy-class get-thread-bindings get-validator hash
40
+ hash-map hash-set identical? identity if-let if-not ifn? import in-ns
41
+ inc init-proxy instance? int int-array integer? interleave intern
42
+ interpose into into-array ints io! isa? iterate iterator-seq juxt key
43
+ keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
44
+ list? load load-file load-reader load-string loaded-libs locking long
45
+ long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
46
+ map map? mapcat max max-key memfn memoize merge merge-with meta methods
47
+ min min-key mod name namespace neg? newline next nfirst nil? nnext not
48
+ not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
49
+ ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
50
+ nthnext num number? numerator object-array odd? or parents partial
51
+ partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
52
+ pos? pr pr-str prefer-method prefers print print-namespace-doc
53
+ print-str printf println println-str prn prn-str promise proxy
54
+ proxy-mappings proxy-super push-thread-bindings pvalues quot rand
55
+ rand-int range ratio? rationalize re-find re-groups re-matcher
56
+ re-matches re-pattern re-seq read read-line read-string reduce ref
57
+ ref-history-count ref-max-history ref-min-history ref-set refer
58
+ refer-clojure reify release-pending-sends rem remove remove-all-methods
59
+ remove-method remove-ns remove-watch repeat repeatedly replace replicate
60
+ require reset! reset-meta! resolve rest restart-agent resultset-seq
61
+ reverse reversible? rseq rsubseq satisfies? second select-keys send
62
+ send-off seq seq? seque sequence sequential? set set-error-handler!
63
+ set-error-mode! set-validator! set? short short-array shorts
64
+ shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
65
+ sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
66
+ split-at split-with str string? struct struct-map subs subseq subvec
67
+ supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
68
+ take-nth take-while test the-ns thread-bound? time to-array to-array-2d
69
+ trampoline transient tree-seq true? type unchecked-add unchecked-dec
70
+ unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
71
+ unchecked-remainder unchecked-subtract underive update-in update-proxy
72
+ use val vals var-get var-set var? vary-meta vec vector vector-of vector?
73
+ when when-first when-let when-not while with-bindings with-bindings*
74
+ with-in-str with-local-vars with-meta with-open with-out-str
75
+ with-precision xml-seq zero? zipmap
76
+ ] # :nodoc:
77
+ PREDEFINED_CONSTANTS = %w[
78
+ true false *1 *2 *3 *agent* *clojure-version* *command-line-args*
79
+ *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
80
+ *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
81
+ *print-readably* *read-eval* *warn-on-reflection*
82
+ ] # :nodoc:
83
+
84
+ IDENT_KIND = CaseIgnoringWordList.new(:ident).
85
+ add(SPECIAL_FORMS, :reserved).
86
+ add(CORE_FORMS, :reserved).
87
+ add(PREDEFINED_CONSTANTS, :pre_constant)
88
+
89
+ BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=][a-zA-Z0-9$&*+!\/_?<>\-\#]*/
90
+ IDENTIFIER = /(?:[@']?(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
91
+ SYMBOL = /::?#{IDENTIFIER}/o
92
+ DIGIT = /\d/
93
+ DIGIT10 = DIGIT
94
+ DIGIT16 = /[0-9a-f]/i
95
+ DIGIT8 = /[0-7]/
96
+ DIGIT2 = /[01]/
97
+ RADIX16 = /\#x/i
98
+ RADIX8 = /\#o/i
99
+ RADIX2 = /\#b/i
100
+ RADIX10 = /\#d/i
101
+ EXACTNESS = /#i|#e/i
102
+ SIGN = /[\+-]?/
103
+ EXP_MARK = /[esfdl]/i
104
+ EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
105
+ SUFFIX = /#{EXP}?/
106
+ PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
107
+ PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
108
+ PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
109
+ PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
110
+ UINT10 = /#{DIGIT10}+#*/
111
+ UINT16 = /#{DIGIT16}+#*/
112
+ UINT8 = /#{DIGIT8}+#*/
113
+ UINT2 = /#{DIGIT2}+#*/
114
+ DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
115
+ UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
116
+ UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
117
+ UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
118
+ UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
119
+ REAL10 = /#{SIGN}#{UREAL10}/
120
+ REAL16 = /#{SIGN}#{UREAL16}/
121
+ REAL8 = /#{SIGN}#{UREAL8}/
122
+ REAL2 = /#{SIGN}#{UREAL2}/
123
+ IMAG10 = /i|#{UREAL10}i/
124
+ IMAG16 = /i|#{UREAL16}i/
125
+ IMAG8 = /i|#{UREAL8}i/
126
+ IMAG2 = /i|#{UREAL2}i/
127
+ COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
128
+ COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
129
+ COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
130
+ COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
131
+ NUM10 = /#{PREFIX10}?#{COMPLEX10}/
132
+ NUM16 = /#{PREFIX16}#{COMPLEX16}/
133
+ NUM8 = /#{PREFIX8}#{COMPLEX8}/
134
+ NUM2 = /#{PREFIX2}#{COMPLEX2}/
135
+ NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
136
+
137
+ protected
138
+
139
+ def scan_tokens encoder, options
140
+
141
+ state = :initial
142
+ ident_kind = IDENT_KIND
143
+
144
+ until eos?
145
+
146
+ case state
147
+ when :initial
148
+ if match = scan(/ \s+ | \\\n | , /x)
149
+ encoder.text_token match, :space
150
+ elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|@/)
151
+ encoder.text_token match, :operator # FIXME: was :operator_fat
152
+ elsif match = scan(/;.*/)
153
+ encoder.text_token match, :comment
154
+ elsif match = scan(/\#\\(?:newline|space|.?)/)
155
+ encoder.text_token match, :char
156
+ elsif match = scan(/\#[ft]/)
157
+ encoder.text_token match, :pre_constant
158
+ elsif match = scan(/#{IDENTIFIER}/o)
159
+ encoder.text_token match, ident_kind[matched]
160
+ elsif match = scan(/#{SYMBOL}/o)
161
+ encoder.text_token match, :symbol
162
+ elsif match = scan(/\./)
163
+ encoder.text_token match, :operator
164
+ elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
165
+ encoder.text_token match, :type
166
+ elsif match = scan(/ (\#)? " /x)
167
+ state = self[1] ? :regexp : :string
168
+ encoder.begin_group state
169
+ encoder.text_token match, :delimiter
170
+ elsif match = scan(/#{NUM}/o) and not matched.empty?
171
+ encoder.text_token match, match[/[.e]/i] ? :float : :integer
172
+ else
173
+ encoder.text_token getch, :error
174
+ end
175
+
176
+ when :string, :regexp
177
+ if match = scan(/[^"\\]+|\\.?/)
178
+ encoder.text_token match, :content
179
+ elsif match = scan(/"/)
180
+ encoder.text_token match, :delimiter
181
+ encoder.end_group state
182
+ state = :initial
183
+ else
184
+ raise_inspect "else case \" reached; %p not handled." % peek(1),
185
+ encoder, state
186
+ end
187
+
188
+ else
189
+ raise 'else case reached'
190
+
191
+ end
192
+
193
+ end
194
+
195
+ if [:string, :regexp].include? state
196
+ encoder.end_group state
197
+ end
198
+
199
+ encoder
200
+
201
+ end
202
+ end
203
+ end
204
+ end