coderay 0.9.8 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
@@ -1,23 +1,24 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
|
-
|
3
|
+
|
4
4
|
map \
|
5
|
-
:
|
6
|
-
:cplusplus
|
7
|
-
:
|
8
|
-
:ecma => :java_script,
|
9
|
-
:ecmascript => :java_script,
|
5
|
+
:'c++' => :cpp,
|
6
|
+
:cplusplus => :cpp,
|
7
|
+
:ecmascript => :java_script,
|
10
8
|
:ecma_script => :java_script,
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
|
20
|
-
|
21
|
-
|
9
|
+
:rhtml => :erb,
|
10
|
+
:eruby => :erb,
|
11
|
+
:irb => :ruby,
|
12
|
+
:javascript => :java_script,
|
13
|
+
:js => :java_script,
|
14
|
+
:pascal => :delphi,
|
15
|
+
:patch => :diff,
|
16
|
+
:plain => :text,
|
17
|
+
:plaintext => :text,
|
18
|
+
:xhtml => :html,
|
19
|
+
:yml => :yaml
|
20
|
+
|
21
|
+
default :text
|
22
|
+
|
22
23
|
end
|
23
24
|
end
|
data/lib/coderay/scanners/c.rb
CHANGED
@@ -1,46 +1,47 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
|
-
|
3
|
+
|
4
|
+
# Scanner for C.
|
4
5
|
class C < Scanner
|
5
6
|
|
6
|
-
include Streamable
|
7
|
-
|
8
7
|
register_for :c
|
9
8
|
file_extension 'c'
|
10
|
-
|
11
|
-
|
9
|
+
|
10
|
+
KEYWORDS = [
|
12
11
|
'asm', 'break', 'case', 'continue', 'default', 'do',
|
13
12
|
'else', 'enum', 'for', 'goto', 'if', 'return',
|
14
13
|
'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
|
15
14
|
'restrict', # added in C99
|
16
|
-
]
|
15
|
+
] # :nodoc:
|
17
16
|
|
18
17
|
PREDEFINED_TYPES = [
|
19
18
|
'int', 'long', 'short', 'char',
|
20
19
|
'signed', 'unsigned', 'float', 'double',
|
21
20
|
'bool', 'complex', # added in C99
|
22
|
-
]
|
21
|
+
] # :nodoc:
|
23
22
|
|
24
23
|
PREDEFINED_CONSTANTS = [
|
25
24
|
'EOF', 'NULL',
|
26
25
|
'true', 'false', # added in C99
|
27
|
-
]
|
26
|
+
] # :nodoc:
|
28
27
|
DIRECTIVES = [
|
29
28
|
'auto', 'extern', 'register', 'static', 'void',
|
30
29
|
'const', 'volatile', # added in C89
|
31
30
|
'inline', # added in C99
|
32
|
-
]
|
31
|
+
] # :nodoc:
|
33
32
|
|
34
33
|
IDENT_KIND = WordList.new(:ident).
|
35
|
-
add(
|
36
|
-
add(PREDEFINED_TYPES, :
|
34
|
+
add(KEYWORDS, :keyword).
|
35
|
+
add(PREDEFINED_TYPES, :predefined_type).
|
37
36
|
add(DIRECTIVES, :directive).
|
38
|
-
add(PREDEFINED_CONSTANTS, :
|
39
|
-
|
40
|
-
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
41
|
-
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
37
|
+
add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
|
42
38
|
|
43
|
-
|
39
|
+
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
40
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
def scan_tokens encoder, options
|
44
45
|
|
45
46
|
state = :initial
|
46
47
|
label_expected = true
|
@@ -50,9 +51,6 @@ module Scanners
|
|
50
51
|
|
51
52
|
until eos?
|
52
53
|
|
53
|
-
kind = nil
|
54
|
-
match = nil
|
55
|
-
|
56
54
|
case state
|
57
55
|
|
58
56
|
when :initial
|
@@ -62,15 +60,10 @@ module Scanners
|
|
62
60
|
in_preproc_line = false
|
63
61
|
label_expected = label_expected_before_preproc_line
|
64
62
|
end
|
65
|
-
|
66
|
-
next
|
63
|
+
encoder.text_token match, :space
|
67
64
|
|
68
|
-
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
69
|
-
|
70
|
-
|
71
|
-
elsif match = scan(/ \# \s* if \s* 0 /x)
|
72
|
-
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
73
|
-
kind = :comment
|
65
|
+
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
66
|
+
encoder.text_token match, :comment
|
74
67
|
|
75
68
|
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
76
69
|
label_expected = match =~ /[;\{\}]/
|
@@ -78,7 +71,7 @@ module Scanners
|
|
78
71
|
label_expected = true if match == ':'
|
79
72
|
case_expected = false
|
80
73
|
end
|
81
|
-
|
74
|
+
encoder.text_token match, :operator
|
82
75
|
|
83
76
|
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
84
77
|
kind = IDENT_KIND[match]
|
@@ -87,114 +80,107 @@ module Scanners
|
|
87
80
|
match << matched
|
88
81
|
else
|
89
82
|
label_expected = false
|
90
|
-
if kind == :
|
83
|
+
if kind == :keyword
|
91
84
|
case match
|
92
85
|
when 'case', 'default'
|
93
86
|
case_expected = true
|
94
87
|
end
|
95
88
|
end
|
96
89
|
end
|
90
|
+
encoder.text_token match, kind
|
97
91
|
|
98
|
-
elsif scan(/\$/)
|
99
|
-
kind = :ident
|
100
|
-
|
101
92
|
elsif match = scan(/L?"/)
|
102
|
-
|
93
|
+
encoder.begin_group :string
|
103
94
|
if match[0] == ?L
|
104
|
-
|
95
|
+
encoder.text_token 'L', :modifier
|
105
96
|
match = '"'
|
106
97
|
end
|
98
|
+
encoder.text_token match, :delimiter
|
107
99
|
state = :string
|
108
|
-
kind = :delimiter
|
109
100
|
|
110
|
-
elsif scan(
|
111
|
-
|
101
|
+
elsif match = scan(/ \# \s* if \s* 0 /x)
|
102
|
+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
103
|
+
encoder.text_token match, :comment
|
104
|
+
|
105
|
+
elsif match = scan(/#[ \t]*(\w*)/)
|
106
|
+
encoder.text_token match, :preprocessor
|
112
107
|
in_preproc_line = true
|
113
108
|
label_expected_before_preproc_line = label_expected
|
114
109
|
state = :include_expected if self[1] == 'include'
|
115
110
|
|
116
|
-
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
111
|
+
elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
117
112
|
label_expected = false
|
118
|
-
|
113
|
+
encoder.text_token match, :char
|
119
114
|
|
120
|
-
elsif scan(
|
115
|
+
elsif match = scan(/\$/)
|
116
|
+
encoder.text_token match, :ident
|
117
|
+
|
118
|
+
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
|
121
119
|
label_expected = false
|
122
|
-
|
120
|
+
encoder.text_token match, :hex
|
123
121
|
|
124
|
-
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
122
|
+
elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
125
123
|
label_expected = false
|
126
|
-
|
124
|
+
encoder.text_token match, :octal
|
127
125
|
|
128
|
-
elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
|
126
|
+
elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
|
129
127
|
label_expected = false
|
130
|
-
|
128
|
+
encoder.text_token match, :integer
|
131
129
|
|
132
|
-
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
130
|
+
elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
133
131
|
label_expected = false
|
134
|
-
|
132
|
+
encoder.text_token match, :float
|
135
133
|
|
136
134
|
else
|
137
|
-
getch
|
138
|
-
kind = :error
|
135
|
+
encoder.text_token getch, :error
|
139
136
|
|
140
137
|
end
|
141
138
|
|
142
139
|
when :string
|
143
|
-
if scan(/[^\\\n"]+/)
|
144
|
-
|
145
|
-
elsif scan(/"/)
|
146
|
-
|
147
|
-
|
140
|
+
if match = scan(/[^\\\n"]+/)
|
141
|
+
encoder.text_token match, :content
|
142
|
+
elsif match = scan(/"/)
|
143
|
+
encoder.text_token match, :delimiter
|
144
|
+
encoder.end_group :string
|
148
145
|
state = :initial
|
149
146
|
label_expected = false
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
kind = :error
|
147
|
+
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
148
|
+
encoder.text_token match, :char
|
149
|
+
elsif match = scan(/ \\ | $ /x)
|
150
|
+
encoder.end_group :string
|
151
|
+
encoder.text_token match, :error
|
156
152
|
state = :initial
|
157
153
|
label_expected = false
|
158
154
|
else
|
159
|
-
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
155
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
160
156
|
end
|
161
157
|
|
162
158
|
when :include_expected
|
163
|
-
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
164
|
-
|
159
|
+
if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
160
|
+
encoder.text_token match, :include
|
165
161
|
state = :initial
|
166
162
|
|
167
163
|
elsif match = scan(/\s+/)
|
168
|
-
|
164
|
+
encoder.text_token match, :space
|
169
165
|
state = :initial if match.index ?\n
|
170
166
|
|
171
167
|
else
|
172
168
|
state = :initial
|
173
|
-
next
|
174
169
|
|
175
170
|
end
|
176
171
|
|
177
172
|
else
|
178
|
-
raise_inspect 'Unknown state',
|
179
|
-
|
180
|
-
end
|
173
|
+
raise_inspect 'Unknown state', encoder
|
181
174
|
|
182
|
-
match ||= matched
|
183
|
-
if $CODERAY_DEBUG and not kind
|
184
|
-
raise_inspect 'Error token %p in line %d' %
|
185
|
-
[[match, kind], line], tokens
|
186
175
|
end
|
187
|
-
raise_inspect 'Empty token', tokens unless match
|
188
|
-
|
189
|
-
tokens << [match, kind]
|
190
176
|
|
191
177
|
end
|
192
178
|
|
193
179
|
if state == :string
|
194
|
-
|
180
|
+
encoder.end_group :string
|
195
181
|
end
|
196
182
|
|
197
|
-
|
183
|
+
encoder
|
198
184
|
end
|
199
185
|
|
200
186
|
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module CodeRay
|
3
|
+
module Scanners
|
4
|
+
|
5
|
+
# Clojure scanner by Licenser.
|
6
|
+
class Clojure < Scanner
|
7
|
+
|
8
|
+
register_for :clojure
|
9
|
+
file_extension 'clj'
|
10
|
+
|
11
|
+
SPECIAL_FORMS = %w[
|
12
|
+
def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
|
13
|
+
new
|
14
|
+
] # :nodoc:
|
15
|
+
|
16
|
+
CORE_FORMS = %w[
|
17
|
+
+ - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
|
18
|
+
agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
|
19
|
+
alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
|
20
|
+
aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
|
21
|
+
assert assoc assoc! assoc-in associative? atom await await-for bases bean
|
22
|
+
bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
|
23
|
+
bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
|
24
|
+
booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
|
25
|
+
char-array char-escape-string char-name-string char? chars class class?
|
26
|
+
clear-agent-errors clojure-version coll? comment commute comp comparator
|
27
|
+
compare compare-and-set! compile complement concat cond condp conj conj!
|
28
|
+
cons constantly construct-proxy contains? count counted? create-ns
|
29
|
+
create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
|
30
|
+
defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
|
31
|
+
deliver denominator deref derive descendants disj disj! dissoc dissoc!
|
32
|
+
distinct distinct? doall doc dorun doseq dosync dotimes doto double
|
33
|
+
double-array doubles drop drop-last drop-while empty empty? ensure
|
34
|
+
enumeration-seq error-handler error-mode eval even? every? extend
|
35
|
+
extend-protocol extend-type extenders extends? false? ffirst file-seq
|
36
|
+
filter find find-doc find-ns find-var first float float-array float?
|
37
|
+
floats flush fn fn? fnext for force format future future-call future-cancel
|
38
|
+
future-cancelled? future-done? future? gen-class gen-interface gensym get
|
39
|
+
get-in get-method get-proxy-class get-thread-bindings get-validator hash
|
40
|
+
hash-map hash-set identical? identity if-let if-not ifn? import in-ns
|
41
|
+
inc init-proxy instance? int int-array integer? interleave intern
|
42
|
+
interpose into into-array ints io! isa? iterate iterator-seq juxt key
|
43
|
+
keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
|
44
|
+
list? load load-file load-reader load-string loaded-libs locking long
|
45
|
+
long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
|
46
|
+
map map? mapcat max max-key memfn memoize merge merge-with meta methods
|
47
|
+
min min-key mod name namespace neg? newline next nfirst nil? nnext not
|
48
|
+
not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
|
49
|
+
ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
|
50
|
+
nthnext num number? numerator object-array odd? or parents partial
|
51
|
+
partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
|
52
|
+
pos? pr pr-str prefer-method prefers print print-namespace-doc
|
53
|
+
print-str printf println println-str prn prn-str promise proxy
|
54
|
+
proxy-mappings proxy-super push-thread-bindings pvalues quot rand
|
55
|
+
rand-int range ratio? rationalize re-find re-groups re-matcher
|
56
|
+
re-matches re-pattern re-seq read read-line read-string reduce ref
|
57
|
+
ref-history-count ref-max-history ref-min-history ref-set refer
|
58
|
+
refer-clojure reify release-pending-sends rem remove remove-all-methods
|
59
|
+
remove-method remove-ns remove-watch repeat repeatedly replace replicate
|
60
|
+
require reset! reset-meta! resolve rest restart-agent resultset-seq
|
61
|
+
reverse reversible? rseq rsubseq satisfies? second select-keys send
|
62
|
+
send-off seq seq? seque sequence sequential? set set-error-handler!
|
63
|
+
set-error-mode! set-validator! set? short short-array shorts
|
64
|
+
shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
|
65
|
+
sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
|
66
|
+
split-at split-with str string? struct struct-map subs subseq subvec
|
67
|
+
supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
|
68
|
+
take-nth take-while test the-ns thread-bound? time to-array to-array-2d
|
69
|
+
trampoline transient tree-seq true? type unchecked-add unchecked-dec
|
70
|
+
unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
|
71
|
+
unchecked-remainder unchecked-subtract underive update-in update-proxy
|
72
|
+
use val vals var-get var-set var? vary-meta vec vector vector-of vector?
|
73
|
+
when when-first when-let when-not while with-bindings with-bindings*
|
74
|
+
with-in-str with-local-vars with-meta with-open with-out-str
|
75
|
+
with-precision xml-seq zero? zipmap
|
76
|
+
] # :nodoc:
|
77
|
+
|
78
|
+
PREDEFINED_CONSTANTS = %w[
|
79
|
+
true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
|
80
|
+
*compile-files* *compile-path* *e *err* *file* *flush-on-newline*
|
81
|
+
*in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
|
82
|
+
*print-readably* *read-eval* *warn-on-reflection*
|
83
|
+
] # :nodoc:
|
84
|
+
|
85
|
+
IDENT_KIND = WordList.new(:ident).
|
86
|
+
add(SPECIAL_FORMS, :keyword).
|
87
|
+
add(CORE_FORMS, :keyword).
|
88
|
+
add(PREDEFINED_CONSTANTS, :predefined_constant)
|
89
|
+
|
90
|
+
KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
|
91
|
+
add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
|
92
|
+
add(%w[ ns ], :namespace).
|
93
|
+
add(%w[ defprotocol defrecord ], :class)
|
94
|
+
|
95
|
+
BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
|
96
|
+
IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
|
97
|
+
SYMBOL = /::?#{IDENTIFIER}/o
|
98
|
+
DIGIT = /\d/
|
99
|
+
DIGIT10 = DIGIT
|
100
|
+
DIGIT16 = /[0-9a-f]/i
|
101
|
+
DIGIT8 = /[0-7]/
|
102
|
+
DIGIT2 = /[01]/
|
103
|
+
RADIX16 = /\#x/i
|
104
|
+
RADIX8 = /\#o/i
|
105
|
+
RADIX2 = /\#b/i
|
106
|
+
RADIX10 = /\#d/i
|
107
|
+
EXACTNESS = /#i|#e/i
|
108
|
+
SIGN = /[\+-]?/
|
109
|
+
EXP_MARK = /[esfdl]/i
|
110
|
+
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
111
|
+
SUFFIX = /#{EXP}?/
|
112
|
+
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
113
|
+
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
114
|
+
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
115
|
+
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
116
|
+
UINT10 = /#{DIGIT10}+#*/
|
117
|
+
UINT16 = /#{DIGIT16}+#*/
|
118
|
+
UINT8 = /#{DIGIT8}+#*/
|
119
|
+
UINT2 = /#{DIGIT2}+#*/
|
120
|
+
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
121
|
+
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
122
|
+
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
123
|
+
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
124
|
+
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
125
|
+
REAL10 = /#{SIGN}#{UREAL10}/
|
126
|
+
REAL16 = /#{SIGN}#{UREAL16}/
|
127
|
+
REAL8 = /#{SIGN}#{UREAL8}/
|
128
|
+
REAL2 = /#{SIGN}#{UREAL2}/
|
129
|
+
IMAG10 = /i|#{UREAL10}i/
|
130
|
+
IMAG16 = /i|#{UREAL16}i/
|
131
|
+
IMAG8 = /i|#{UREAL8}i/
|
132
|
+
IMAG2 = /i|#{UREAL2}i/
|
133
|
+
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
134
|
+
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
135
|
+
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
136
|
+
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
137
|
+
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
138
|
+
NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
139
|
+
NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
140
|
+
NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
141
|
+
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
142
|
+
|
143
|
+
protected
|
144
|
+
|
145
|
+
def scan_tokens encoder, options
|
146
|
+
|
147
|
+
state = :initial
|
148
|
+
kind = nil
|
149
|
+
|
150
|
+
until eos?
|
151
|
+
|
152
|
+
case state
|
153
|
+
when :initial
|
154
|
+
if match = scan(/ \s+ | \\\n | , /x)
|
155
|
+
encoder.text_token match, :space
|
156
|
+
elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
|
157
|
+
encoder.text_token match, :operator
|
158
|
+
elsif match = scan(/;.*/)
|
159
|
+
encoder.text_token match, :comment # TODO: recognize (comment ...) too
|
160
|
+
elsif match = scan(/\#?\\(?:newline|space|.?)/)
|
161
|
+
encoder.text_token match, :char
|
162
|
+
elsif match = scan(/\#[ft]/)
|
163
|
+
encoder.text_token match, :predefined_constant
|
164
|
+
elsif match = scan(/#{IDENTIFIER}/o)
|
165
|
+
kind = IDENT_KIND[match]
|
166
|
+
encoder.text_token match, kind
|
167
|
+
if rest? && kind == :keyword
|
168
|
+
if kind = KEYWORD_NEXT_TOKEN_KIND[match]
|
169
|
+
encoder.text_token match, :space if match = scan(/\s+/o)
|
170
|
+
encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
elsif match = scan(/#{SYMBOL}/o)
|
174
|
+
encoder.text_token match, :symbol
|
175
|
+
elsif match = scan(/\./)
|
176
|
+
encoder.text_token match, :operator
|
177
|
+
elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
|
178
|
+
encoder.text_token match, :type
|
179
|
+
elsif match = scan(/ (\#)? " /x)
|
180
|
+
state = self[1] ? :regexp : :string
|
181
|
+
encoder.begin_group state
|
182
|
+
encoder.text_token match, :delimiter
|
183
|
+
elsif match = scan(/#{NUM}/o) and not matched.empty?
|
184
|
+
encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
|
185
|
+
else
|
186
|
+
encoder.text_token getch, :error
|
187
|
+
end
|
188
|
+
|
189
|
+
when :string, :regexp
|
190
|
+
if match = scan(/[^"\\]+|\\.?/)
|
191
|
+
encoder.text_token match, :content
|
192
|
+
elsif match = scan(/"/)
|
193
|
+
encoder.text_token match, :delimiter
|
194
|
+
encoder.end_group state
|
195
|
+
state = :initial
|
196
|
+
else
|
197
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
198
|
+
encoder, state
|
199
|
+
end
|
200
|
+
|
201
|
+
else
|
202
|
+
raise 'else case reached'
|
203
|
+
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
if [:string, :regexp].include? state
|
209
|
+
encoder.end_group state
|
210
|
+
end
|
211
|
+
|
212
|
+
encoder
|
213
|
+
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|