coderay 0.9.8 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -1,23 +1,24 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
4
  map \
5
- :h => :c,
6
- :cplusplus => :cpp,
7
- :'c++' => :cpp,
8
- :ecma => :java_script,
9
- :ecmascript => :java_script,
5
+ :'c++' => :cpp,
6
+ :cplusplus => :cpp,
7
+ :ecmascript => :java_script,
10
8
  :ecma_script => :java_script,
11
- :irb => :ruby,
12
- :javascript => :java_script,
13
- :js => :java_script,
14
- :nitro => :nitro_xhtml,
15
- :pascal => :delphi,
16
- :plain => :plaintext,
17
- :xhtml => :html,
18
- :yml => :yaml
19
-
20
- default :plain
21
-
9
+ :rhtml => :erb,
10
+ :eruby => :erb,
11
+ :irb => :ruby,
12
+ :javascript => :java_script,
13
+ :js => :java_script,
14
+ :pascal => :delphi,
15
+ :patch => :diff,
16
+ :plain => :text,
17
+ :plaintext => :text,
18
+ :xhtml => :html,
19
+ :yml => :yaml
20
+
21
+ default :text
22
+
22
23
  end
23
24
  end
@@ -1,46 +1,47 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
+ # Scanner for C.
4
5
  class C < Scanner
5
6
 
6
- include Streamable
7
-
8
7
  register_for :c
9
8
  file_extension 'c'
10
-
11
- RESERVED_WORDS = [
9
+
10
+ KEYWORDS = [
12
11
  'asm', 'break', 'case', 'continue', 'default', 'do',
13
12
  'else', 'enum', 'for', 'goto', 'if', 'return',
14
13
  'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
15
14
  'restrict', # added in C99
16
- ]
15
+ ] # :nodoc:
17
16
 
18
17
  PREDEFINED_TYPES = [
19
18
  'int', 'long', 'short', 'char',
20
19
  'signed', 'unsigned', 'float', 'double',
21
20
  'bool', 'complex', # added in C99
22
- ]
21
+ ] # :nodoc:
23
22
 
24
23
  PREDEFINED_CONSTANTS = [
25
24
  'EOF', 'NULL',
26
25
  'true', 'false', # added in C99
27
- ]
26
+ ] # :nodoc:
28
27
  DIRECTIVES = [
29
28
  'auto', 'extern', 'register', 'static', 'void',
30
29
  'const', 'volatile', # added in C89
31
30
  'inline', # added in C99
32
- ]
31
+ ] # :nodoc:
33
32
 
34
33
  IDENT_KIND = WordList.new(:ident).
35
- add(RESERVED_WORDS, :reserved).
36
- add(PREDEFINED_TYPES, :pre_type).
34
+ add(KEYWORDS, :keyword).
35
+ add(PREDEFINED_TYPES, :predefined_type).
37
36
  add(DIRECTIVES, :directive).
38
- add(PREDEFINED_CONSTANTS, :pre_constant)
39
-
40
- ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
41
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
37
+ add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
42
38
 
43
- def scan_tokens tokens, options
39
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
40
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
41
+
42
+ protected
43
+
44
+ def scan_tokens encoder, options
44
45
 
45
46
  state = :initial
46
47
  label_expected = true
@@ -50,9 +51,6 @@ module Scanners
50
51
 
51
52
  until eos?
52
53
 
53
- kind = nil
54
- match = nil
55
-
56
54
  case state
57
55
 
58
56
  when :initial
@@ -62,15 +60,10 @@ module Scanners
62
60
  in_preproc_line = false
63
61
  label_expected = label_expected_before_preproc_line
64
62
  end
65
- tokens << [match, :space]
66
- next
63
+ encoder.text_token match, :space
67
64
 
68
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
69
- kind = :comment
70
-
71
- elsif match = scan(/ \# \s* if \s* 0 /x)
72
- match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
73
- kind = :comment
65
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
66
+ encoder.text_token match, :comment
74
67
 
75
68
  elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
76
69
  label_expected = match =~ /[;\{\}]/
@@ -78,7 +71,7 @@ module Scanners
78
71
  label_expected = true if match == ':'
79
72
  case_expected = false
80
73
  end
81
- kind = :operator
74
+ encoder.text_token match, :operator
82
75
 
83
76
  elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
84
77
  kind = IDENT_KIND[match]
@@ -87,114 +80,107 @@ module Scanners
87
80
  match << matched
88
81
  else
89
82
  label_expected = false
90
- if kind == :reserved
83
+ if kind == :keyword
91
84
  case match
92
85
  when 'case', 'default'
93
86
  case_expected = true
94
87
  end
95
88
  end
96
89
  end
90
+ encoder.text_token match, kind
97
91
 
98
- elsif scan(/\$/)
99
- kind = :ident
100
-
101
92
  elsif match = scan(/L?"/)
102
- tokens << [:open, :string]
93
+ encoder.begin_group :string
103
94
  if match[0] == ?L
104
- tokens << ['L', :modifier]
95
+ encoder.text_token 'L', :modifier
105
96
  match = '"'
106
97
  end
98
+ encoder.text_token match, :delimiter
107
99
  state = :string
108
- kind = :delimiter
109
100
 
110
- elsif scan(/#[ \t]*(\w*)/)
111
- kind = :preprocessor
101
+ elsif match = scan(/ \# \s* if \s* 0 /x)
102
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
103
+ encoder.text_token match, :comment
104
+
105
+ elsif match = scan(/#[ \t]*(\w*)/)
106
+ encoder.text_token match, :preprocessor
112
107
  in_preproc_line = true
113
108
  label_expected_before_preproc_line = label_expected
114
109
  state = :include_expected if self[1] == 'include'
115
110
 
116
- elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
111
+ elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
117
112
  label_expected = false
118
- kind = :char
113
+ encoder.text_token match, :char
119
114
 
120
- elsif scan(/0[xX][0-9A-Fa-f]+/)
115
+ elsif match = scan(/\$/)
116
+ encoder.text_token match, :ident
117
+
118
+ elsif match = scan(/0[xX][0-9A-Fa-f]+/)
121
119
  label_expected = false
122
- kind = :hex
120
+ encoder.text_token match, :hex
123
121
 
124
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
122
+ elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
125
123
  label_expected = false
126
- kind = :oct
124
+ encoder.text_token match, :octal
127
125
 
128
- elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
126
+ elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
129
127
  label_expected = false
130
- kind = :integer
128
+ encoder.text_token match, :integer
131
129
 
132
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
130
+ elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
133
131
  label_expected = false
134
- kind = :float
132
+ encoder.text_token match, :float
135
133
 
136
134
  else
137
- getch
138
- kind = :error
135
+ encoder.text_token getch, :error
139
136
 
140
137
  end
141
138
 
142
139
  when :string
143
- if scan(/[^\\\n"]+/)
144
- kind = :content
145
- elsif scan(/"/)
146
- tokens << ['"', :delimiter]
147
- tokens << [:close, :string]
140
+ if match = scan(/[^\\\n"]+/)
141
+ encoder.text_token match, :content
142
+ elsif match = scan(/"/)
143
+ encoder.text_token match, :delimiter
144
+ encoder.end_group :string
148
145
  state = :initial
149
146
  label_expected = false
150
- next
151
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
152
- kind = :char
153
- elsif scan(/ \\ | $ /x)
154
- tokens << [:close, :string]
155
- kind = :error
147
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
148
+ encoder.text_token match, :char
149
+ elsif match = scan(/ \\ | $ /x)
150
+ encoder.end_group :string
151
+ encoder.text_token match, :error
156
152
  state = :initial
157
153
  label_expected = false
158
154
  else
159
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
155
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
160
156
  end
161
157
 
162
158
  when :include_expected
163
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
164
- kind = :include
159
+ if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
160
+ encoder.text_token match, :include
165
161
  state = :initial
166
162
 
167
163
  elsif match = scan(/\s+/)
168
- kind = :space
164
+ encoder.text_token match, :space
169
165
  state = :initial if match.index ?\n
170
166
 
171
167
  else
172
168
  state = :initial
173
- next
174
169
 
175
170
  end
176
171
 
177
172
  else
178
- raise_inspect 'Unknown state', tokens
179
-
180
- end
173
+ raise_inspect 'Unknown state', encoder
181
174
 
182
- match ||= matched
183
- if $CODERAY_DEBUG and not kind
184
- raise_inspect 'Error token %p in line %d' %
185
- [[match, kind], line], tokens
186
175
  end
187
- raise_inspect 'Empty token', tokens unless match
188
-
189
- tokens << [match, kind]
190
176
 
191
177
  end
192
178
 
193
179
  if state == :string
194
- tokens << [:close, :string]
180
+ encoder.end_group :string
195
181
  end
196
182
 
197
- tokens
183
+ encoder
198
184
  end
199
185
 
200
186
  end
@@ -0,0 +1,217 @@
1
+ # encoding: utf-8
2
+ module CodeRay
3
+ module Scanners
4
+
5
+ # Clojure scanner by Licenser.
6
+ class Clojure < Scanner
7
+
8
+ register_for :clojure
9
+ file_extension 'clj'
10
+
11
+ SPECIAL_FORMS = %w[
12
+ def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
13
+ new
14
+ ] # :nodoc:
15
+
16
+ CORE_FORMS = %w[
17
+ + - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
18
+ agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
19
+ alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
20
+ aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
21
+ assert assoc assoc! assoc-in associative? atom await await-for bases bean
22
+ bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
23
+ bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
24
+ booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
25
+ char-array char-escape-string char-name-string char? chars class class?
26
+ clear-agent-errors clojure-version coll? comment commute comp comparator
27
+ compare compare-and-set! compile complement concat cond condp conj conj!
28
+ cons constantly construct-proxy contains? count counted? create-ns
29
+ create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
30
+ defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
31
+ deliver denominator deref derive descendants disj disj! dissoc dissoc!
32
+ distinct distinct? doall doc dorun doseq dosync dotimes doto double
33
+ double-array doubles drop drop-last drop-while empty empty? ensure
34
+ enumeration-seq error-handler error-mode eval even? every? extend
35
+ extend-protocol extend-type extenders extends? false? ffirst file-seq
36
+ filter find find-doc find-ns find-var first float float-array float?
37
+ floats flush fn fn? fnext for force format future future-call future-cancel
38
+ future-cancelled? future-done? future? gen-class gen-interface gensym get
39
+ get-in get-method get-proxy-class get-thread-bindings get-validator hash
40
+ hash-map hash-set identical? identity if-let if-not ifn? import in-ns
41
+ inc init-proxy instance? int int-array integer? interleave intern
42
+ interpose into into-array ints io! isa? iterate iterator-seq juxt key
43
+ keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
44
+ list? load load-file load-reader load-string loaded-libs locking long
45
+ long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
46
+ map map? mapcat max max-key memfn memoize merge merge-with meta methods
47
+ min min-key mod name namespace neg? newline next nfirst nil? nnext not
48
+ not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
49
+ ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
50
+ nthnext num number? numerator object-array odd? or parents partial
51
+ partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
52
+ pos? pr pr-str prefer-method prefers print print-namespace-doc
53
+ print-str printf println println-str prn prn-str promise proxy
54
+ proxy-mappings proxy-super push-thread-bindings pvalues quot rand
55
+ rand-int range ratio? rationalize re-find re-groups re-matcher
56
+ re-matches re-pattern re-seq read read-line read-string reduce ref
57
+ ref-history-count ref-max-history ref-min-history ref-set refer
58
+ refer-clojure reify release-pending-sends rem remove remove-all-methods
59
+ remove-method remove-ns remove-watch repeat repeatedly replace replicate
60
+ require reset! reset-meta! resolve rest restart-agent resultset-seq
61
+ reverse reversible? rseq rsubseq satisfies? second select-keys send
62
+ send-off seq seq? seque sequence sequential? set set-error-handler!
63
+ set-error-mode! set-validator! set? short short-array shorts
64
+ shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
65
+ sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
66
+ split-at split-with str string? struct struct-map subs subseq subvec
67
+ supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
68
+ take-nth take-while test the-ns thread-bound? time to-array to-array-2d
69
+ trampoline transient tree-seq true? type unchecked-add unchecked-dec
70
+ unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
71
+ unchecked-remainder unchecked-subtract underive update-in update-proxy
72
+ use val vals var-get var-set var? vary-meta vec vector vector-of vector?
73
+ when when-first when-let when-not while with-bindings with-bindings*
74
+ with-in-str with-local-vars with-meta with-open with-out-str
75
+ with-precision xml-seq zero? zipmap
76
+ ] # :nodoc:
77
+
78
+ PREDEFINED_CONSTANTS = %w[
79
+ true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
80
+ *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
81
+ *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
82
+ *print-readably* *read-eval* *warn-on-reflection*
83
+ ] # :nodoc:
84
+
85
+ IDENT_KIND = WordList.new(:ident).
86
+ add(SPECIAL_FORMS, :keyword).
87
+ add(CORE_FORMS, :keyword).
88
+ add(PREDEFINED_CONSTANTS, :predefined_constant)
89
+
90
+ KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
91
+ add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
92
+ add(%w[ ns ], :namespace).
93
+ add(%w[ defprotocol defrecord ], :class)
94
+
95
+ BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
96
+ IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
97
+ SYMBOL = /::?#{IDENTIFIER}/o
98
+ DIGIT = /\d/
99
+ DIGIT10 = DIGIT
100
+ DIGIT16 = /[0-9a-f]/i
101
+ DIGIT8 = /[0-7]/
102
+ DIGIT2 = /[01]/
103
+ RADIX16 = /\#x/i
104
+ RADIX8 = /\#o/i
105
+ RADIX2 = /\#b/i
106
+ RADIX10 = /\#d/i
107
+ EXACTNESS = /#i|#e/i
108
+ SIGN = /[\+-]?/
109
+ EXP_MARK = /[esfdl]/i
110
+ EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
111
+ SUFFIX = /#{EXP}?/
112
+ PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
113
+ PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
114
+ PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
115
+ PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
116
+ UINT10 = /#{DIGIT10}+#*/
117
+ UINT16 = /#{DIGIT16}+#*/
118
+ UINT8 = /#{DIGIT8}+#*/
119
+ UINT2 = /#{DIGIT2}+#*/
120
+ DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
121
+ UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
122
+ UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
123
+ UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
124
+ UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
125
+ REAL10 = /#{SIGN}#{UREAL10}/
126
+ REAL16 = /#{SIGN}#{UREAL16}/
127
+ REAL8 = /#{SIGN}#{UREAL8}/
128
+ REAL2 = /#{SIGN}#{UREAL2}/
129
+ IMAG10 = /i|#{UREAL10}i/
130
+ IMAG16 = /i|#{UREAL16}i/
131
+ IMAG8 = /i|#{UREAL8}i/
132
+ IMAG2 = /i|#{UREAL2}i/
133
+ COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
134
+ COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
135
+ COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
136
+ COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
137
+ NUM10 = /#{PREFIX10}?#{COMPLEX10}/
138
+ NUM16 = /#{PREFIX16}#{COMPLEX16}/
139
+ NUM8 = /#{PREFIX8}#{COMPLEX8}/
140
+ NUM2 = /#{PREFIX2}#{COMPLEX2}/
141
+ NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
142
+
143
+ protected
144
+
145
+ def scan_tokens encoder, options
146
+
147
+ state = :initial
148
+ kind = nil
149
+
150
+ until eos?
151
+
152
+ case state
153
+ when :initial
154
+ if match = scan(/ \s+ | \\\n | , /x)
155
+ encoder.text_token match, :space
156
+ elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
157
+ encoder.text_token match, :operator
158
+ elsif match = scan(/;.*/)
159
+ encoder.text_token match, :comment # TODO: recognize (comment ...) too
160
+ elsif match = scan(/\#?\\(?:newline|space|.?)/)
161
+ encoder.text_token match, :char
162
+ elsif match = scan(/\#[ft]/)
163
+ encoder.text_token match, :predefined_constant
164
+ elsif match = scan(/#{IDENTIFIER}/o)
165
+ kind = IDENT_KIND[match]
166
+ encoder.text_token match, kind
167
+ if rest? && kind == :keyword
168
+ if kind = KEYWORD_NEXT_TOKEN_KIND[match]
169
+ encoder.text_token match, :space if match = scan(/\s+/o)
170
+ encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
171
+ end
172
+ end
173
+ elsif match = scan(/#{SYMBOL}/o)
174
+ encoder.text_token match, :symbol
175
+ elsif match = scan(/\./)
176
+ encoder.text_token match, :operator
177
+ elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
178
+ encoder.text_token match, :type
179
+ elsif match = scan(/ (\#)? " /x)
180
+ state = self[1] ? :regexp : :string
181
+ encoder.begin_group state
182
+ encoder.text_token match, :delimiter
183
+ elsif match = scan(/#{NUM}/o) and not matched.empty?
184
+ encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
185
+ else
186
+ encoder.text_token getch, :error
187
+ end
188
+
189
+ when :string, :regexp
190
+ if match = scan(/[^"\\]+|\\.?/)
191
+ encoder.text_token match, :content
192
+ elsif match = scan(/"/)
193
+ encoder.text_token match, :delimiter
194
+ encoder.end_group state
195
+ state = :initial
196
+ else
197
+ raise_inspect "else case \" reached; %p not handled." % peek(1),
198
+ encoder, state
199
+ end
200
+
201
+ else
202
+ raise 'else case reached'
203
+
204
+ end
205
+
206
+ end
207
+
208
+ if [:string, :regexp].include? state
209
+ encoder.end_group state
210
+ end
211
+
212
+ encoder
213
+
214
+ end
215
+ end
216
+ end
217
+ end