coderay 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -1,23 +1,24 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
4
  map \
5
- :h => :c,
6
- :cplusplus => :cpp,
7
- :'c++' => :cpp,
8
- :ecma => :java_script,
9
- :ecmascript => :java_script,
5
+ :'c++' => :cpp,
6
+ :cplusplus => :cpp,
7
+ :ecmascript => :java_script,
10
8
  :ecma_script => :java_script,
11
- :irb => :ruby,
12
- :javascript => :java_script,
13
- :js => :java_script,
14
- :nitro => :nitro_xhtml,
15
- :pascal => :delphi,
16
- :plain => :plaintext,
17
- :xhtml => :html,
18
- :yml => :yaml
19
-
20
- default :plain
21
-
9
+ :rhtml => :erb,
10
+ :eruby => :erb,
11
+ :irb => :ruby,
12
+ :javascript => :java_script,
13
+ :js => :java_script,
14
+ :pascal => :delphi,
15
+ :patch => :diff,
16
+ :plain => :text,
17
+ :plaintext => :text,
18
+ :xhtml => :html,
19
+ :yml => :yaml
20
+
21
+ default :text
22
+
22
23
  end
23
24
  end
@@ -1,46 +1,47 @@
1
1
  module CodeRay
2
2
  module Scanners
3
-
3
+
4
+ # Scanner for C.
4
5
  class C < Scanner
5
6
 
6
- include Streamable
7
-
8
7
  register_for :c
9
8
  file_extension 'c'
10
-
11
- RESERVED_WORDS = [
9
+
10
+ KEYWORDS = [
12
11
  'asm', 'break', 'case', 'continue', 'default', 'do',
13
12
  'else', 'enum', 'for', 'goto', 'if', 'return',
14
13
  'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
15
14
  'restrict', # added in C99
16
- ]
15
+ ] # :nodoc:
17
16
 
18
17
  PREDEFINED_TYPES = [
19
18
  'int', 'long', 'short', 'char',
20
19
  'signed', 'unsigned', 'float', 'double',
21
20
  'bool', 'complex', # added in C99
22
- ]
21
+ ] # :nodoc:
23
22
 
24
23
  PREDEFINED_CONSTANTS = [
25
24
  'EOF', 'NULL',
26
25
  'true', 'false', # added in C99
27
- ]
26
+ ] # :nodoc:
28
27
  DIRECTIVES = [
29
28
  'auto', 'extern', 'register', 'static', 'void',
30
29
  'const', 'volatile', # added in C89
31
30
  'inline', # added in C99
32
- ]
31
+ ] # :nodoc:
33
32
 
34
33
  IDENT_KIND = WordList.new(:ident).
35
- add(RESERVED_WORDS, :reserved).
36
- add(PREDEFINED_TYPES, :pre_type).
34
+ add(KEYWORDS, :keyword).
35
+ add(PREDEFINED_TYPES, :predefined_type).
37
36
  add(DIRECTIVES, :directive).
38
- add(PREDEFINED_CONSTANTS, :pre_constant)
39
-
40
- ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
41
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
37
+ add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
42
38
 
43
- def scan_tokens tokens, options
39
+ ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
40
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
41
+
42
+ protected
43
+
44
+ def scan_tokens encoder, options
44
45
 
45
46
  state = :initial
46
47
  label_expected = true
@@ -50,9 +51,6 @@ module Scanners
50
51
 
51
52
  until eos?
52
53
 
53
- kind = nil
54
- match = nil
55
-
56
54
  case state
57
55
 
58
56
  when :initial
@@ -62,15 +60,10 @@ module Scanners
62
60
  in_preproc_line = false
63
61
  label_expected = label_expected_before_preproc_line
64
62
  end
65
- tokens << [match, :space]
66
- next
63
+ encoder.text_token match, :space
67
64
 
68
- elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
69
- kind = :comment
70
-
71
- elsif match = scan(/ \# \s* if \s* 0 /x)
72
- match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
73
- kind = :comment
65
+ elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
66
+ encoder.text_token match, :comment
74
67
 
75
68
  elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
76
69
  label_expected = match =~ /[;\{\}]/
@@ -78,7 +71,7 @@ module Scanners
78
71
  label_expected = true if match == ':'
79
72
  case_expected = false
80
73
  end
81
- kind = :operator
74
+ encoder.text_token match, :operator
82
75
 
83
76
  elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
84
77
  kind = IDENT_KIND[match]
@@ -87,114 +80,107 @@ module Scanners
87
80
  match << matched
88
81
  else
89
82
  label_expected = false
90
- if kind == :reserved
83
+ if kind == :keyword
91
84
  case match
92
85
  when 'case', 'default'
93
86
  case_expected = true
94
87
  end
95
88
  end
96
89
  end
90
+ encoder.text_token match, kind
97
91
 
98
- elsif scan(/\$/)
99
- kind = :ident
100
-
101
92
  elsif match = scan(/L?"/)
102
- tokens << [:open, :string]
93
+ encoder.begin_group :string
103
94
  if match[0] == ?L
104
- tokens << ['L', :modifier]
95
+ encoder.text_token 'L', :modifier
105
96
  match = '"'
106
97
  end
98
+ encoder.text_token match, :delimiter
107
99
  state = :string
108
- kind = :delimiter
109
100
 
110
- elsif scan(/#[ \t]*(\w*)/)
111
- kind = :preprocessor
101
+ elsif match = scan(/ \# \s* if \s* 0 /x)
102
+ match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
103
+ encoder.text_token match, :comment
104
+
105
+ elsif match = scan(/#[ \t]*(\w*)/)
106
+ encoder.text_token match, :preprocessor
112
107
  in_preproc_line = true
113
108
  label_expected_before_preproc_line = label_expected
114
109
  state = :include_expected if self[1] == 'include'
115
110
 
116
- elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
111
+ elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
117
112
  label_expected = false
118
- kind = :char
113
+ encoder.text_token match, :char
119
114
 
120
- elsif scan(/0[xX][0-9A-Fa-f]+/)
115
+ elsif match = scan(/\$/)
116
+ encoder.text_token match, :ident
117
+
118
+ elsif match = scan(/0[xX][0-9A-Fa-f]+/)
121
119
  label_expected = false
122
- kind = :hex
120
+ encoder.text_token match, :hex
123
121
 
124
- elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
122
+ elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
125
123
  label_expected = false
126
- kind = :oct
124
+ encoder.text_token match, :octal
127
125
 
128
- elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
126
+ elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
129
127
  label_expected = false
130
- kind = :integer
128
+ encoder.text_token match, :integer
131
129
 
132
- elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
130
+ elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
133
131
  label_expected = false
134
- kind = :float
132
+ encoder.text_token match, :float
135
133
 
136
134
  else
137
- getch
138
- kind = :error
135
+ encoder.text_token getch, :error
139
136
 
140
137
  end
141
138
 
142
139
  when :string
143
- if scan(/[^\\\n"]+/)
144
- kind = :content
145
- elsif scan(/"/)
146
- tokens << ['"', :delimiter]
147
- tokens << [:close, :string]
140
+ if match = scan(/[^\\\n"]+/)
141
+ encoder.text_token match, :content
142
+ elsif match = scan(/"/)
143
+ encoder.text_token match, :delimiter
144
+ encoder.end_group :string
148
145
  state = :initial
149
146
  label_expected = false
150
- next
151
- elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
152
- kind = :char
153
- elsif scan(/ \\ | $ /x)
154
- tokens << [:close, :string]
155
- kind = :error
147
+ elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
148
+ encoder.text_token match, :char
149
+ elsif match = scan(/ \\ | $ /x)
150
+ encoder.end_group :string
151
+ encoder.text_token match, :error
156
152
  state = :initial
157
153
  label_expected = false
158
154
  else
159
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
155
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
160
156
  end
161
157
 
162
158
  when :include_expected
163
- if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
164
- kind = :include
159
+ if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
160
+ encoder.text_token match, :include
165
161
  state = :initial
166
162
 
167
163
  elsif match = scan(/\s+/)
168
- kind = :space
164
+ encoder.text_token match, :space
169
165
  state = :initial if match.index ?\n
170
166
 
171
167
  else
172
168
  state = :initial
173
- next
174
169
 
175
170
  end
176
171
 
177
172
  else
178
- raise_inspect 'Unknown state', tokens
179
-
180
- end
173
+ raise_inspect 'Unknown state', encoder
181
174
 
182
- match ||= matched
183
- if $CODERAY_DEBUG and not kind
184
- raise_inspect 'Error token %p in line %d' %
185
- [[match, kind], line], tokens
186
175
  end
187
- raise_inspect 'Empty token', tokens unless match
188
-
189
- tokens << [match, kind]
190
176
 
191
177
  end
192
178
 
193
179
  if state == :string
194
- tokens << [:close, :string]
180
+ encoder.end_group :string
195
181
  end
196
182
 
197
- tokens
183
+ encoder
198
184
  end
199
185
 
200
186
  end
@@ -0,0 +1,217 @@
1
+ # encoding: utf-8
2
+ module CodeRay
3
+ module Scanners
4
+
5
+ # Clojure scanner by Licenser.
6
+ class Clojure < Scanner
7
+
8
+ register_for :clojure
9
+ file_extension 'clj'
10
+
11
+ SPECIAL_FORMS = %w[
12
+ def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
13
+ new
14
+ ] # :nodoc:
15
+
16
+ CORE_FORMS = %w[
17
+ + - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
18
+ agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
19
+ alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
20
+ aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
21
+ assert assoc assoc! assoc-in associative? atom await await-for bases bean
22
+ bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
23
+ bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
24
+ booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
25
+ char-array char-escape-string char-name-string char? chars class class?
26
+ clear-agent-errors clojure-version coll? comment commute comp comparator
27
+ compare compare-and-set! compile complement concat cond condp conj conj!
28
+ cons constantly construct-proxy contains? count counted? create-ns
29
+ create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
30
+ defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
31
+ deliver denominator deref derive descendants disj disj! dissoc dissoc!
32
+ distinct distinct? doall doc dorun doseq dosync dotimes doto double
33
+ double-array doubles drop drop-last drop-while empty empty? ensure
34
+ enumeration-seq error-handler error-mode eval even? every? extend
35
+ extend-protocol extend-type extenders extends? false? ffirst file-seq
36
+ filter find find-doc find-ns find-var first float float-array float?
37
+ floats flush fn fn? fnext for force format future future-call future-cancel
38
+ future-cancelled? future-done? future? gen-class gen-interface gensym get
39
+ get-in get-method get-proxy-class get-thread-bindings get-validator hash
40
+ hash-map hash-set identical? identity if-let if-not ifn? import in-ns
41
+ inc init-proxy instance? int int-array integer? interleave intern
42
+ interpose into into-array ints io! isa? iterate iterator-seq juxt key
43
+ keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
44
+ list? load load-file load-reader load-string loaded-libs locking long
45
+ long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
46
+ map map? mapcat max max-key memfn memoize merge merge-with meta methods
47
+ min min-key mod name namespace neg? newline next nfirst nil? nnext not
48
+ not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
49
+ ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
50
+ nthnext num number? numerator object-array odd? or parents partial
51
+ partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
52
+ pos? pr pr-str prefer-method prefers print print-namespace-doc
53
+ print-str printf println println-str prn prn-str promise proxy
54
+ proxy-mappings proxy-super push-thread-bindings pvalues quot rand
55
+ rand-int range ratio? rationalize re-find re-groups re-matcher
56
+ re-matches re-pattern re-seq read read-line read-string reduce ref
57
+ ref-history-count ref-max-history ref-min-history ref-set refer
58
+ refer-clojure reify release-pending-sends rem remove remove-all-methods
59
+ remove-method remove-ns remove-watch repeat repeatedly replace replicate
60
+ require reset! reset-meta! resolve rest restart-agent resultset-seq
61
+ reverse reversible? rseq rsubseq satisfies? second select-keys send
62
+ send-off seq seq? seque sequence sequential? set set-error-handler!
63
+ set-error-mode! set-validator! set? short short-array shorts
64
+ shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
65
+ sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
66
+ split-at split-with str string? struct struct-map subs subseq subvec
67
+ supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
68
+ take-nth take-while test the-ns thread-bound? time to-array to-array-2d
69
+ trampoline transient tree-seq true? type unchecked-add unchecked-dec
70
+ unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
71
+ unchecked-remainder unchecked-subtract underive update-in update-proxy
72
+ use val vals var-get var-set var? vary-meta vec vector vector-of vector?
73
+ when when-first when-let when-not while with-bindings with-bindings*
74
+ with-in-str with-local-vars with-meta with-open with-out-str
75
+ with-precision xml-seq zero? zipmap
76
+ ] # :nodoc:
77
+
78
+ PREDEFINED_CONSTANTS = %w[
79
+ true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
80
+ *compile-files* *compile-path* *e *err* *file* *flush-on-newline*
81
+ *in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
82
+ *print-readably* *read-eval* *warn-on-reflection*
83
+ ] # :nodoc:
84
+
85
+ IDENT_KIND = WordList.new(:ident).
86
+ add(SPECIAL_FORMS, :keyword).
87
+ add(CORE_FORMS, :keyword).
88
+ add(PREDEFINED_CONSTANTS, :predefined_constant)
89
+
90
+ KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
91
+ add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
92
+ add(%w[ ns ], :namespace).
93
+ add(%w[ defprotocol defrecord ], :class)
94
+
95
+ BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
96
+ IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
97
+ SYMBOL = /::?#{IDENTIFIER}/o
98
+ DIGIT = /\d/
99
+ DIGIT10 = DIGIT
100
+ DIGIT16 = /[0-9a-f]/i
101
+ DIGIT8 = /[0-7]/
102
+ DIGIT2 = /[01]/
103
+ RADIX16 = /\#x/i
104
+ RADIX8 = /\#o/i
105
+ RADIX2 = /\#b/i
106
+ RADIX10 = /\#d/i
107
+ EXACTNESS = /#i|#e/i
108
+ SIGN = /[\+-]?/
109
+ EXP_MARK = /[esfdl]/i
110
+ EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
111
+ SUFFIX = /#{EXP}?/
112
+ PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
113
+ PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
114
+ PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
115
+ PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
116
+ UINT10 = /#{DIGIT10}+#*/
117
+ UINT16 = /#{DIGIT16}+#*/
118
+ UINT8 = /#{DIGIT8}+#*/
119
+ UINT2 = /#{DIGIT2}+#*/
120
+ DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
121
+ UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
122
+ UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
123
+ UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
124
+ UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
125
+ REAL10 = /#{SIGN}#{UREAL10}/
126
+ REAL16 = /#{SIGN}#{UREAL16}/
127
+ REAL8 = /#{SIGN}#{UREAL8}/
128
+ REAL2 = /#{SIGN}#{UREAL2}/
129
+ IMAG10 = /i|#{UREAL10}i/
130
+ IMAG16 = /i|#{UREAL16}i/
131
+ IMAG8 = /i|#{UREAL8}i/
132
+ IMAG2 = /i|#{UREAL2}i/
133
+ COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
134
+ COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
135
+ COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
136
+ COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
137
+ NUM10 = /#{PREFIX10}?#{COMPLEX10}/
138
+ NUM16 = /#{PREFIX16}#{COMPLEX16}/
139
+ NUM8 = /#{PREFIX8}#{COMPLEX8}/
140
+ NUM2 = /#{PREFIX2}#{COMPLEX2}/
141
+ NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
142
+
143
+ protected
144
+
145
+ def scan_tokens encoder, options
146
+
147
+ state = :initial
148
+ kind = nil
149
+
150
+ until eos?
151
+
152
+ case state
153
+ when :initial
154
+ if match = scan(/ \s+ | \\\n | , /x)
155
+ encoder.text_token match, :space
156
+ elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
157
+ encoder.text_token match, :operator
158
+ elsif match = scan(/;.*/)
159
+ encoder.text_token match, :comment # TODO: recognize (comment ...) too
160
+ elsif match = scan(/\#?\\(?:newline|space|.?)/)
161
+ encoder.text_token match, :char
162
+ elsif match = scan(/\#[ft]/)
163
+ encoder.text_token match, :predefined_constant
164
+ elsif match = scan(/#{IDENTIFIER}/o)
165
+ kind = IDENT_KIND[match]
166
+ encoder.text_token match, kind
167
+ if rest? && kind == :keyword
168
+ if kind = KEYWORD_NEXT_TOKEN_KIND[match]
169
+ encoder.text_token match, :space if match = scan(/\s+/o)
170
+ encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
171
+ end
172
+ end
173
+ elsif match = scan(/#{SYMBOL}/o)
174
+ encoder.text_token match, :symbol
175
+ elsif match = scan(/\./)
176
+ encoder.text_token match, :operator
177
+ elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
178
+ encoder.text_token match, :type
179
+ elsif match = scan(/ (\#)? " /x)
180
+ state = self[1] ? :regexp : :string
181
+ encoder.begin_group state
182
+ encoder.text_token match, :delimiter
183
+ elsif match = scan(/#{NUM}/o) and not matched.empty?
184
+ encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
185
+ else
186
+ encoder.text_token getch, :error
187
+ end
188
+
189
+ when :string, :regexp
190
+ if match = scan(/[^"\\]+|\\.?/)
191
+ encoder.text_token match, :content
192
+ elsif match = scan(/"/)
193
+ encoder.text_token match, :delimiter
194
+ encoder.end_group state
195
+ state = :initial
196
+ else
197
+ raise_inspect "else case \" reached; %p not handled." % peek(1),
198
+ encoder, state
199
+ end
200
+
201
+ else
202
+ raise 'else case reached'
203
+
204
+ end
205
+
206
+ end
207
+
208
+ if [:string, :regexp].include? state
209
+ encoder.end_group state
210
+ end
211
+
212
+ encoder
213
+
214
+ end
215
+ end
216
+ end
217
+ end