coderay 0.9.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
@@ -1,23 +1,24 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
|
-
|
3
|
+
|
4
4
|
map \
|
5
|
-
:
|
6
|
-
:cplusplus
|
7
|
-
:
|
8
|
-
:ecma => :java_script,
|
9
|
-
:ecmascript => :java_script,
|
5
|
+
:'c++' => :cpp,
|
6
|
+
:cplusplus => :cpp,
|
7
|
+
:ecmascript => :java_script,
|
10
8
|
:ecma_script => :java_script,
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
:
|
19
|
-
|
20
|
-
|
21
|
-
|
9
|
+
:rhtml => :erb,
|
10
|
+
:eruby => :erb,
|
11
|
+
:irb => :ruby,
|
12
|
+
:javascript => :java_script,
|
13
|
+
:js => :java_script,
|
14
|
+
:pascal => :delphi,
|
15
|
+
:patch => :diff,
|
16
|
+
:plain => :text,
|
17
|
+
:plaintext => :text,
|
18
|
+
:xhtml => :html,
|
19
|
+
:yml => :yaml
|
20
|
+
|
21
|
+
default :text
|
22
|
+
|
22
23
|
end
|
23
24
|
end
|
data/lib/coderay/scanners/c.rb
CHANGED
@@ -1,46 +1,47 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
|
-
|
3
|
+
|
4
|
+
# Scanner for C.
|
4
5
|
class C < Scanner
|
5
6
|
|
6
|
-
include Streamable
|
7
|
-
|
8
7
|
register_for :c
|
9
8
|
file_extension 'c'
|
10
|
-
|
11
|
-
|
9
|
+
|
10
|
+
KEYWORDS = [
|
12
11
|
'asm', 'break', 'case', 'continue', 'default', 'do',
|
13
12
|
'else', 'enum', 'for', 'goto', 'if', 'return',
|
14
13
|
'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
|
15
14
|
'restrict', # added in C99
|
16
|
-
]
|
15
|
+
] # :nodoc:
|
17
16
|
|
18
17
|
PREDEFINED_TYPES = [
|
19
18
|
'int', 'long', 'short', 'char',
|
20
19
|
'signed', 'unsigned', 'float', 'double',
|
21
20
|
'bool', 'complex', # added in C99
|
22
|
-
]
|
21
|
+
] # :nodoc:
|
23
22
|
|
24
23
|
PREDEFINED_CONSTANTS = [
|
25
24
|
'EOF', 'NULL',
|
26
25
|
'true', 'false', # added in C99
|
27
|
-
]
|
26
|
+
] # :nodoc:
|
28
27
|
DIRECTIVES = [
|
29
28
|
'auto', 'extern', 'register', 'static', 'void',
|
30
29
|
'const', 'volatile', # added in C89
|
31
30
|
'inline', # added in C99
|
32
|
-
]
|
31
|
+
] # :nodoc:
|
33
32
|
|
34
33
|
IDENT_KIND = WordList.new(:ident).
|
35
|
-
add(
|
36
|
-
add(PREDEFINED_TYPES, :
|
34
|
+
add(KEYWORDS, :keyword).
|
35
|
+
add(PREDEFINED_TYPES, :predefined_type).
|
37
36
|
add(DIRECTIVES, :directive).
|
38
|
-
add(PREDEFINED_CONSTANTS, :
|
39
|
-
|
40
|
-
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
41
|
-
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
|
37
|
+
add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
|
42
38
|
|
43
|
-
|
39
|
+
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
40
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
def scan_tokens encoder, options
|
44
45
|
|
45
46
|
state = :initial
|
46
47
|
label_expected = true
|
@@ -50,9 +51,6 @@ module Scanners
|
|
50
51
|
|
51
52
|
until eos?
|
52
53
|
|
53
|
-
kind = nil
|
54
|
-
match = nil
|
55
|
-
|
56
54
|
case state
|
57
55
|
|
58
56
|
when :initial
|
@@ -62,15 +60,10 @@ module Scanners
|
|
62
60
|
in_preproc_line = false
|
63
61
|
label_expected = label_expected_before_preproc_line
|
64
62
|
end
|
65
|
-
|
66
|
-
next
|
63
|
+
encoder.text_token match, :space
|
67
64
|
|
68
|
-
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
69
|
-
|
70
|
-
|
71
|
-
elsif match = scan(/ \# \s* if \s* 0 /x)
|
72
|
-
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
73
|
-
kind = :comment
|
65
|
+
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
66
|
+
encoder.text_token match, :comment
|
74
67
|
|
75
68
|
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
76
69
|
label_expected = match =~ /[;\{\}]/
|
@@ -78,7 +71,7 @@ module Scanners
|
|
78
71
|
label_expected = true if match == ':'
|
79
72
|
case_expected = false
|
80
73
|
end
|
81
|
-
|
74
|
+
encoder.text_token match, :operator
|
82
75
|
|
83
76
|
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
|
84
77
|
kind = IDENT_KIND[match]
|
@@ -87,114 +80,107 @@ module Scanners
|
|
87
80
|
match << matched
|
88
81
|
else
|
89
82
|
label_expected = false
|
90
|
-
if kind == :
|
83
|
+
if kind == :keyword
|
91
84
|
case match
|
92
85
|
when 'case', 'default'
|
93
86
|
case_expected = true
|
94
87
|
end
|
95
88
|
end
|
96
89
|
end
|
90
|
+
encoder.text_token match, kind
|
97
91
|
|
98
|
-
elsif scan(/\$/)
|
99
|
-
kind = :ident
|
100
|
-
|
101
92
|
elsif match = scan(/L?"/)
|
102
|
-
|
93
|
+
encoder.begin_group :string
|
103
94
|
if match[0] == ?L
|
104
|
-
|
95
|
+
encoder.text_token 'L', :modifier
|
105
96
|
match = '"'
|
106
97
|
end
|
98
|
+
encoder.text_token match, :delimiter
|
107
99
|
state = :string
|
108
|
-
kind = :delimiter
|
109
100
|
|
110
|
-
elsif scan(
|
111
|
-
|
101
|
+
elsif match = scan(/ \# \s* if \s* 0 /x)
|
102
|
+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
103
|
+
encoder.text_token match, :comment
|
104
|
+
|
105
|
+
elsif match = scan(/#[ \t]*(\w*)/)
|
106
|
+
encoder.text_token match, :preprocessor
|
112
107
|
in_preproc_line = true
|
113
108
|
label_expected_before_preproc_line = label_expected
|
114
109
|
state = :include_expected if self[1] == 'include'
|
115
110
|
|
116
|
-
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
111
|
+
elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
|
117
112
|
label_expected = false
|
118
|
-
|
113
|
+
encoder.text_token match, :char
|
119
114
|
|
120
|
-
elsif scan(
|
115
|
+
elsif match = scan(/\$/)
|
116
|
+
encoder.text_token match, :ident
|
117
|
+
|
118
|
+
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
|
121
119
|
label_expected = false
|
122
|
-
|
120
|
+
encoder.text_token match, :hex
|
123
121
|
|
124
|
-
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
122
|
+
elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
|
125
123
|
label_expected = false
|
126
|
-
|
124
|
+
encoder.text_token match, :octal
|
127
125
|
|
128
|
-
elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
|
126
|
+
elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
|
129
127
|
label_expected = false
|
130
|
-
|
128
|
+
encoder.text_token match, :integer
|
131
129
|
|
132
|
-
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
130
|
+
elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
|
133
131
|
label_expected = false
|
134
|
-
|
132
|
+
encoder.text_token match, :float
|
135
133
|
|
136
134
|
else
|
137
|
-
getch
|
138
|
-
kind = :error
|
135
|
+
encoder.text_token getch, :error
|
139
136
|
|
140
137
|
end
|
141
138
|
|
142
139
|
when :string
|
143
|
-
if scan(/[^\\\n"]+/)
|
144
|
-
|
145
|
-
elsif scan(/"/)
|
146
|
-
|
147
|
-
|
140
|
+
if match = scan(/[^\\\n"]+/)
|
141
|
+
encoder.text_token match, :content
|
142
|
+
elsif match = scan(/"/)
|
143
|
+
encoder.text_token match, :delimiter
|
144
|
+
encoder.end_group :string
|
148
145
|
state = :initial
|
149
146
|
label_expected = false
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
kind = :error
|
147
|
+
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
148
|
+
encoder.text_token match, :char
|
149
|
+
elsif match = scan(/ \\ | $ /x)
|
150
|
+
encoder.end_group :string
|
151
|
+
encoder.text_token match, :error
|
156
152
|
state = :initial
|
157
153
|
label_expected = false
|
158
154
|
else
|
159
|
-
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
155
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
|
160
156
|
end
|
161
157
|
|
162
158
|
when :include_expected
|
163
|
-
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
164
|
-
|
159
|
+
if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
|
160
|
+
encoder.text_token match, :include
|
165
161
|
state = :initial
|
166
162
|
|
167
163
|
elsif match = scan(/\s+/)
|
168
|
-
|
164
|
+
encoder.text_token match, :space
|
169
165
|
state = :initial if match.index ?\n
|
170
166
|
|
171
167
|
else
|
172
168
|
state = :initial
|
173
|
-
next
|
174
169
|
|
175
170
|
end
|
176
171
|
|
177
172
|
else
|
178
|
-
raise_inspect 'Unknown state',
|
179
|
-
|
180
|
-
end
|
173
|
+
raise_inspect 'Unknown state', encoder
|
181
174
|
|
182
|
-
match ||= matched
|
183
|
-
if $CODERAY_DEBUG and not kind
|
184
|
-
raise_inspect 'Error token %p in line %d' %
|
185
|
-
[[match, kind], line], tokens
|
186
175
|
end
|
187
|
-
raise_inspect 'Empty token', tokens unless match
|
188
|
-
|
189
|
-
tokens << [match, kind]
|
190
176
|
|
191
177
|
end
|
192
178
|
|
193
179
|
if state == :string
|
194
|
-
|
180
|
+
encoder.end_group :string
|
195
181
|
end
|
196
182
|
|
197
|
-
|
183
|
+
encoder
|
198
184
|
end
|
199
185
|
|
200
186
|
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module CodeRay
|
3
|
+
module Scanners
|
4
|
+
|
5
|
+
# Clojure scanner by Licenser.
|
6
|
+
class Clojure < Scanner
|
7
|
+
|
8
|
+
register_for :clojure
|
9
|
+
file_extension 'clj'
|
10
|
+
|
11
|
+
SPECIAL_FORMS = %w[
|
12
|
+
def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
|
13
|
+
new
|
14
|
+
] # :nodoc:
|
15
|
+
|
16
|
+
CORE_FORMS = %w[
|
17
|
+
+ - -> ->> .. / * <= < = == >= > accessor aclone add-classpath add-watch
|
18
|
+
agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
|
19
|
+
alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
|
20
|
+
aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
|
21
|
+
assert assoc assoc! assoc-in associative? atom await await-for bases bean
|
22
|
+
bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
|
23
|
+
bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
|
24
|
+
booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
|
25
|
+
char-array char-escape-string char-name-string char? chars class class?
|
26
|
+
clear-agent-errors clojure-version coll? comment commute comp comparator
|
27
|
+
compare compare-and-set! compile complement concat cond condp conj conj!
|
28
|
+
cons constantly construct-proxy contains? count counted? create-ns
|
29
|
+
create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
|
30
|
+
defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
|
31
|
+
deliver denominator deref derive descendants disj disj! dissoc dissoc!
|
32
|
+
distinct distinct? doall doc dorun doseq dosync dotimes doto double
|
33
|
+
double-array doubles drop drop-last drop-while empty empty? ensure
|
34
|
+
enumeration-seq error-handler error-mode eval even? every? extend
|
35
|
+
extend-protocol extend-type extenders extends? false? ffirst file-seq
|
36
|
+
filter find find-doc find-ns find-var first float float-array float?
|
37
|
+
floats flush fn fn? fnext for force format future future-call future-cancel
|
38
|
+
future-cancelled? future-done? future? gen-class gen-interface gensym get
|
39
|
+
get-in get-method get-proxy-class get-thread-bindings get-validator hash
|
40
|
+
hash-map hash-set identical? identity if-let if-not ifn? import in-ns
|
41
|
+
inc init-proxy instance? int int-array integer? interleave intern
|
42
|
+
interpose into into-array ints io! isa? iterate iterator-seq juxt key
|
43
|
+
keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
|
44
|
+
list? load load-file load-reader load-string loaded-libs locking long
|
45
|
+
long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
|
46
|
+
map map? mapcat max max-key memfn memoize merge merge-with meta methods
|
47
|
+
min min-key mod name namespace neg? newline next nfirst nil? nnext not
|
48
|
+
not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
|
49
|
+
ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
|
50
|
+
nthnext num number? numerator object-array odd? or parents partial
|
51
|
+
partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
|
52
|
+
pos? pr pr-str prefer-method prefers print print-namespace-doc
|
53
|
+
print-str printf println println-str prn prn-str promise proxy
|
54
|
+
proxy-mappings proxy-super push-thread-bindings pvalues quot rand
|
55
|
+
rand-int range ratio? rationalize re-find re-groups re-matcher
|
56
|
+
re-matches re-pattern re-seq read read-line read-string reduce ref
|
57
|
+
ref-history-count ref-max-history ref-min-history ref-set refer
|
58
|
+
refer-clojure reify release-pending-sends rem remove remove-all-methods
|
59
|
+
remove-method remove-ns remove-watch repeat repeatedly replace replicate
|
60
|
+
require reset! reset-meta! resolve rest restart-agent resultset-seq
|
61
|
+
reverse reversible? rseq rsubseq satisfies? second select-keys send
|
62
|
+
send-off seq seq? seque sequence sequential? set set-error-handler!
|
63
|
+
set-error-mode! set-validator! set? short short-array shorts
|
64
|
+
shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
|
65
|
+
sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
|
66
|
+
split-at split-with str string? struct struct-map subs subseq subvec
|
67
|
+
supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
|
68
|
+
take-nth take-while test the-ns thread-bound? time to-array to-array-2d
|
69
|
+
trampoline transient tree-seq true? type unchecked-add unchecked-dec
|
70
|
+
unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
|
71
|
+
unchecked-remainder unchecked-subtract underive update-in update-proxy
|
72
|
+
use val vals var-get var-set var? vary-meta vec vector vector-of vector?
|
73
|
+
when when-first when-let when-not while with-bindings with-bindings*
|
74
|
+
with-in-str with-local-vars with-meta with-open with-out-str
|
75
|
+
with-precision xml-seq zero? zipmap
|
76
|
+
] # :nodoc:
|
77
|
+
|
78
|
+
PREDEFINED_CONSTANTS = %w[
|
79
|
+
true false nil *1 *2 *3 *agent* *clojure-version* *command-line-args*
|
80
|
+
*compile-files* *compile-path* *e *err* *file* *flush-on-newline*
|
81
|
+
*in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
|
82
|
+
*print-readably* *read-eval* *warn-on-reflection*
|
83
|
+
] # :nodoc:
|
84
|
+
|
85
|
+
IDENT_KIND = WordList.new(:ident).
|
86
|
+
add(SPECIAL_FORMS, :keyword).
|
87
|
+
add(CORE_FORMS, :keyword).
|
88
|
+
add(PREDEFINED_CONSTANTS, :predefined_constant)
|
89
|
+
|
90
|
+
KEYWORD_NEXT_TOKEN_KIND = WordList.new(nil).
|
91
|
+
add(%w[ def defn defn- definline defmacro defmulti defmethod defstruct defonce declare ], :function).
|
92
|
+
add(%w[ ns ], :namespace).
|
93
|
+
add(%w[ defprotocol defrecord ], :class)
|
94
|
+
|
95
|
+
BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=]=?[a-zA-Z0-9$&*+!\/_?<>\-\#]*/
|
96
|
+
IDENTIFIER = /(?!-\d)(?:(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
|
97
|
+
SYMBOL = /::?#{IDENTIFIER}/o
|
98
|
+
DIGIT = /\d/
|
99
|
+
DIGIT10 = DIGIT
|
100
|
+
DIGIT16 = /[0-9a-f]/i
|
101
|
+
DIGIT8 = /[0-7]/
|
102
|
+
DIGIT2 = /[01]/
|
103
|
+
RADIX16 = /\#x/i
|
104
|
+
RADIX8 = /\#o/i
|
105
|
+
RADIX2 = /\#b/i
|
106
|
+
RADIX10 = /\#d/i
|
107
|
+
EXACTNESS = /#i|#e/i
|
108
|
+
SIGN = /[\+-]?/
|
109
|
+
EXP_MARK = /[esfdl]/i
|
110
|
+
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
111
|
+
SUFFIX = /#{EXP}?/
|
112
|
+
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
113
|
+
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
114
|
+
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
115
|
+
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
116
|
+
UINT10 = /#{DIGIT10}+#*/
|
117
|
+
UINT16 = /#{DIGIT16}+#*/
|
118
|
+
UINT8 = /#{DIGIT8}+#*/
|
119
|
+
UINT2 = /#{DIGIT2}+#*/
|
120
|
+
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
121
|
+
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
122
|
+
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
123
|
+
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
124
|
+
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
125
|
+
REAL10 = /#{SIGN}#{UREAL10}/
|
126
|
+
REAL16 = /#{SIGN}#{UREAL16}/
|
127
|
+
REAL8 = /#{SIGN}#{UREAL8}/
|
128
|
+
REAL2 = /#{SIGN}#{UREAL2}/
|
129
|
+
IMAG10 = /i|#{UREAL10}i/
|
130
|
+
IMAG16 = /i|#{UREAL16}i/
|
131
|
+
IMAG8 = /i|#{UREAL8}i/
|
132
|
+
IMAG2 = /i|#{UREAL2}i/
|
133
|
+
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
134
|
+
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
135
|
+
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
136
|
+
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
137
|
+
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
138
|
+
NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
139
|
+
NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
140
|
+
NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
141
|
+
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
142
|
+
|
143
|
+
protected
|
144
|
+
|
145
|
+
def scan_tokens encoder, options
|
146
|
+
|
147
|
+
state = :initial
|
148
|
+
kind = nil
|
149
|
+
|
150
|
+
until eos?
|
151
|
+
|
152
|
+
case state
|
153
|
+
when :initial
|
154
|
+
if match = scan(/ \s+ | \\\n | , /x)
|
155
|
+
encoder.text_token match, :space
|
156
|
+
elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|[@\^]/)
|
157
|
+
encoder.text_token match, :operator
|
158
|
+
elsif match = scan(/;.*/)
|
159
|
+
encoder.text_token match, :comment # TODO: recognize (comment ...) too
|
160
|
+
elsif match = scan(/\#?\\(?:newline|space|.?)/)
|
161
|
+
encoder.text_token match, :char
|
162
|
+
elsif match = scan(/\#[ft]/)
|
163
|
+
encoder.text_token match, :predefined_constant
|
164
|
+
elsif match = scan(/#{IDENTIFIER}/o)
|
165
|
+
kind = IDENT_KIND[match]
|
166
|
+
encoder.text_token match, kind
|
167
|
+
if rest? && kind == :keyword
|
168
|
+
if kind = KEYWORD_NEXT_TOKEN_KIND[match]
|
169
|
+
encoder.text_token match, :space if match = scan(/\s+/o)
|
170
|
+
encoder.text_token match, kind if match = scan(/#{IDENTIFIER}/o)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
elsif match = scan(/#{SYMBOL}/o)
|
174
|
+
encoder.text_token match, :symbol
|
175
|
+
elsif match = scan(/\./)
|
176
|
+
encoder.text_token match, :operator
|
177
|
+
elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
|
178
|
+
encoder.text_token match, :type
|
179
|
+
elsif match = scan(/ (\#)? " /x)
|
180
|
+
state = self[1] ? :regexp : :string
|
181
|
+
encoder.begin_group state
|
182
|
+
encoder.text_token match, :delimiter
|
183
|
+
elsif match = scan(/#{NUM}/o) and not matched.empty?
|
184
|
+
encoder.text_token match, match[/[.e\/]/i] ? :float : :integer
|
185
|
+
else
|
186
|
+
encoder.text_token getch, :error
|
187
|
+
end
|
188
|
+
|
189
|
+
when :string, :regexp
|
190
|
+
if match = scan(/[^"\\]+|\\.?/)
|
191
|
+
encoder.text_token match, :content
|
192
|
+
elsif match = scan(/"/)
|
193
|
+
encoder.text_token match, :delimiter
|
194
|
+
encoder.end_group state
|
195
|
+
state = :initial
|
196
|
+
else
|
197
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
198
|
+
encoder, state
|
199
|
+
end
|
200
|
+
|
201
|
+
else
|
202
|
+
raise 'else case reached'
|
203
|
+
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
if [:string, :regexp].include? state
|
209
|
+
encoder.end_group state
|
210
|
+
end
|
211
|
+
|
212
|
+
encoder
|
213
|
+
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|