coderay 1.0.0.598.pre → 1.0.0.738.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/coderay +1 -1
- data/lib/coderay.rb +38 -32
- data/lib/coderay/duo.rb +1 -54
- data/lib/coderay/encoder.rb +31 -33
- data/lib/coderay/encoders/_map.rb +4 -2
- data/lib/coderay/encoders/comment_filter.rb +0 -61
- data/lib/coderay/encoders/count.rb +2 -23
- data/lib/coderay/encoders/debug.rb +11 -60
- data/lib/coderay/encoders/filter.rb +0 -46
- data/lib/coderay/encoders/html.rb +83 -91
- data/lib/coderay/encoders/html/css.rb +1 -6
- data/lib/coderay/encoders/html/numbering.rb +18 -21
- data/lib/coderay/encoders/html/output.rb +10 -52
- data/lib/coderay/encoders/json.rb +19 -39
- data/lib/coderay/encoders/lines_of_code.rb +7 -52
- data/lib/coderay/encoders/null.rb +6 -13
- data/lib/coderay/encoders/statistic.rb +30 -93
- data/lib/coderay/encoders/terminal.rb +3 -4
- data/lib/coderay/encoders/text.rb +1 -23
- data/lib/coderay/encoders/token_kind_filter.rb +0 -58
- data/lib/coderay/helpers/file_type.rb +119 -240
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +237 -307
- data/lib/coderay/scanner.rb +112 -88
- data/lib/coderay/scanners/_map.rb +3 -3
- data/lib/coderay/scanners/c.rb +7 -7
- data/lib/coderay/scanners/clojure.rb +204 -0
- data/lib/coderay/scanners/css.rb +10 -20
- data/lib/coderay/scanners/debug.rb +9 -55
- data/lib/coderay/scanners/diff.rb +21 -4
- data/lib/coderay/scanners/html.rb +65 -18
- data/lib/coderay/scanners/java.rb +3 -2
- data/lib/coderay/scanners/java_script.rb +3 -3
- data/lib/coderay/scanners/json.rb +7 -6
- data/lib/coderay/scanners/php.rb +2 -1
- data/lib/coderay/scanners/rhtml.rb +6 -2
- data/lib/coderay/scanners/ruby.rb +193 -193
- data/lib/coderay/scanners/ruby/patterns.rb +15 -82
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +4 -2
- data/lib/coderay/styles/_map.rb +2 -2
- data/lib/coderay/styles/alpha.rb +48 -38
- data/lib/coderay/styles/cycnus.rb +2 -1
- data/lib/coderay/token_kinds.rb +88 -86
- data/lib/coderay/tokens.rb +88 -112
- data/test/functional/basic.rb +184 -5
- data/test/functional/examples.rb +4 -4
- data/test/functional/for_redcloth.rb +3 -2
- data/test/functional/suite.rb +7 -6
- metadata +11 -24
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
data/lib/coderay/scanner.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'strscan'
|
2
3
|
|
3
|
-
|
4
|
+
module CodeRay
|
4
5
|
|
6
|
+
autoload :WordList, 'coderay/helpers/word_list'
|
7
|
+
# FIXME: Rename CaseIgnoringWordList to WordList::CaseIgnoring.
|
8
|
+
autoload :CaseIgnoringWordList, 'coderay/helpers/word_list'
|
9
|
+
|
5
10
|
# = Scanners
|
6
11
|
#
|
7
12
|
# This module holds the Scanner class and its subclasses.
|
@@ -16,9 +21,8 @@ module CodeRay
|
|
16
21
|
module Scanners
|
17
22
|
extend PluginHost
|
18
23
|
plugin_path File.dirname(__FILE__), 'scanners'
|
19
|
-
|
20
|
-
|
21
|
-
|
24
|
+
|
25
|
+
|
22
26
|
# = Scanner
|
23
27
|
#
|
24
28
|
# The base class for all Scanners.
|
@@ -46,61 +50,83 @@ module CodeRay
|
|
46
50
|
|
47
51
|
extend Plugin
|
48
52
|
plugin_host Scanners
|
49
|
-
|
53
|
+
|
50
54
|
# Raised if a Scanner fails while scanning
|
51
|
-
ScanError = Class.new
|
52
|
-
|
53
|
-
require 'coderay/helpers/word_list'
|
54
|
-
|
55
|
+
ScanError = Class.new Exception
|
56
|
+
|
55
57
|
# The default options for all scanner classes.
|
56
58
|
#
|
57
59
|
# Define @default_options for subclasses.
|
58
60
|
DEFAULT_OPTIONS = { }
|
59
61
|
|
60
62
|
KINDS_NOT_LOC = [:comment, :doctype, :docstring]
|
61
|
-
|
63
|
+
|
62
64
|
class << self
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
ensure
|
75
|
-
$DEBUG = debug
|
76
|
-
end
|
65
|
+
|
66
|
+
# Normalizes the given code into a string with UNIX newlines, in the
|
67
|
+
# scanner's internal encoding, with invalid and undefined charachters
|
68
|
+
# replaced by placeholders. Always returns a new object.
|
69
|
+
def normalize code
|
70
|
+
original = code
|
71
|
+
code = code.to_s unless code.is_a? ::String
|
72
|
+
if code.respond_to? :encoding
|
73
|
+
code = encode_with_encoding code, self.encoding
|
74
|
+
else
|
75
|
+
code = to_unix code if code.index ?\r
|
77
76
|
end
|
78
|
-
#
|
79
|
-
code.gsub!(/\r\n?/, "\n") if code.index ?\r
|
77
|
+
# code = code.dup if code.eql? original
|
80
78
|
code
|
81
79
|
end
|
82
80
|
|
83
|
-
|
84
|
-
|
85
|
-
|
81
|
+
# The typical filename suffix for this scanner's language.
|
82
|
+
def file_extension extension = plugin_id
|
83
|
+
@file_extension ||= extension.to_s
|
84
|
+
end
|
85
|
+
|
86
|
+
# The encoding used internally by this scanner.
|
87
|
+
def encoding name = 'UTF-8'
|
88
|
+
@encoding ||= defined?(Encoding.find) && Encoding.find(name)
|
89
|
+
end
|
90
|
+
|
91
|
+
# The lang of this Scanner class, which is equal to its Plugin ID.
|
92
|
+
alias lang plugin_id
|
93
|
+
|
94
|
+
protected
|
95
|
+
|
96
|
+
def encode_with_encoding code, target_encoding
|
97
|
+
if code.encoding == target_encoding
|
98
|
+
if code.valid_encoding?
|
99
|
+
return to_unix code
|
100
|
+
else
|
101
|
+
source_encoding = guess_encoding code
|
102
|
+
end
|
86
103
|
else
|
87
|
-
|
104
|
+
source_encoding = code.encoding
|
88
105
|
end
|
106
|
+
# print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
|
107
|
+
code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
|
89
108
|
end
|
90
|
-
|
109
|
+
|
110
|
+
def to_unix code
|
111
|
+
code.gsub(/\r\n?/, "\n")
|
112
|
+
end
|
113
|
+
|
114
|
+
def guess_encoding s
|
115
|
+
#:nocov:
|
116
|
+
IO.popen("file -b --mime -", "w+") do |file|
|
117
|
+
file.write s[0, 1024]
|
118
|
+
file.close_write
|
119
|
+
begin
|
120
|
+
Encoding.find file.gets[/charset=([-\w]+)/, 1]
|
121
|
+
rescue ArgumentError
|
122
|
+
Encoding::BINARY
|
123
|
+
end
|
124
|
+
end
|
125
|
+
#:nocov:
|
126
|
+
end
|
127
|
+
|
91
128
|
end
|
92
|
-
|
93
|
-
=begin
|
94
|
-
## Excluded for speed reasons; protected seems to make methods slow.
|
95
|
-
|
96
|
-
# Save the StringScanner methods from being called.
|
97
|
-
# This would not be useful for highlighting.
|
98
|
-
strscan_public_methods =
|
99
|
-
StringScanner.instance_methods -
|
100
|
-
StringScanner.ancestors[1].instance_methods
|
101
|
-
protected(*strscan_public_methods)
|
102
|
-
=end
|
103
|
-
|
129
|
+
|
104
130
|
# Create a new Scanner.
|
105
131
|
#
|
106
132
|
# * +code+ is the input String and is handled by the superclass
|
@@ -110,43 +136,46 @@ module CodeRay
|
|
110
136
|
# overwrite default options here.)
|
111
137
|
#
|
112
138
|
# Else, a Tokens object is used.
|
113
|
-
def initialize code='', options = {}
|
114
|
-
|
115
|
-
"anything. :( Use my subclasses."
|
139
|
+
def initialize code = '', options = {}
|
140
|
+
if self.class == Scanner
|
141
|
+
raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
|
142
|
+
end
|
116
143
|
|
117
144
|
@options = self.class::DEFAULT_OPTIONS.merge options
|
118
|
-
|
119
|
-
super
|
120
|
-
|
145
|
+
|
146
|
+
super self.class.normalize(code)
|
147
|
+
|
121
148
|
@tokens = options[:tokens] || Tokens.new
|
122
149
|
@tokens.scanner = self if @tokens.respond_to? :scanner=
|
123
|
-
|
150
|
+
|
124
151
|
setup
|
125
152
|
end
|
126
153
|
|
127
|
-
# Sets back the scanner. Subclasses
|
128
|
-
# method.
|
154
|
+
# Sets back the scanner. Subclasses should to define the reset_instance
|
155
|
+
# method instead of this one.
|
129
156
|
def reset
|
130
157
|
super
|
131
158
|
reset_instance
|
132
159
|
end
|
133
|
-
|
160
|
+
|
161
|
+
# Set a new string to be scanned.
|
134
162
|
def string= code
|
135
|
-
code =
|
163
|
+
code = self.class.normalize(code)
|
136
164
|
super code
|
137
165
|
reset_instance
|
138
166
|
end
|
139
|
-
|
140
|
-
#
|
141
|
-
alias code string
|
142
|
-
alias code= string=
|
143
|
-
|
144
|
-
# Returns the Plugin ID for this scanner.
|
167
|
+
|
168
|
+
# the Plugin ID for this scanner
|
145
169
|
def lang
|
146
|
-
self.class.
|
170
|
+
self.class.lang
|
147
171
|
end
|
148
|
-
|
149
|
-
#
|
172
|
+
|
173
|
+
# the default file extension for this scanner
|
174
|
+
def file_extension
|
175
|
+
self.class.file_extension
|
176
|
+
end
|
177
|
+
|
178
|
+
# Scan the code and returns all tokens in a Tokens object.
|
150
179
|
def tokenize source = nil, options = {}
|
151
180
|
options = @options.merge(options)
|
152
181
|
@tokens = options[:tokens] || @tokens || Tokens.new
|
@@ -170,18 +199,18 @@ module CodeRay
|
|
170
199
|
end
|
171
200
|
end
|
172
201
|
|
173
|
-
#
|
202
|
+
# Cache the result of tokenize.
|
174
203
|
def tokens
|
175
204
|
@cached_tokens ||= tokenize
|
176
205
|
end
|
177
206
|
|
178
|
-
#
|
207
|
+
# Traverse the tokens.
|
179
208
|
def each &block
|
180
209
|
tokens.each(&block)
|
181
210
|
end
|
182
211
|
include Enumerable
|
183
|
-
|
184
|
-
# The current line position of the scanner.
|
212
|
+
|
213
|
+
# The current line position of the scanner. See also #column.
|
185
214
|
#
|
186
215
|
# Beware, this is implemented inefficiently. It should be used
|
187
216
|
# for debugging only.
|
@@ -189,27 +218,23 @@ module CodeRay
|
|
189
218
|
string[0..pos].count("\n") + 1
|
190
219
|
end
|
191
220
|
|
192
|
-
# The current column position of the scanner. See #line.
|
221
|
+
# The current column position of the scanner. See also #line.
|
222
|
+
#
|
223
|
+
# Beware, this is implemented inefficiently. It should be used
|
224
|
+
# for debugging only.
|
193
225
|
def column pos = self.pos
|
194
226
|
return 0 if pos <= 0
|
195
|
-
string = string
|
196
|
-
if string.respond_to?(:bytesize) &&
|
197
|
-
|
198
|
-
string =
|
227
|
+
string = self.string
|
228
|
+
if string.respond_to?(:bytesize) && string.bytesize != string.size
|
229
|
+
#:nocov:
|
230
|
+
string = string.dup.force_encoding('binary')
|
231
|
+
#:nocov:
|
199
232
|
end
|
200
233
|
pos - (string.rindex(?\n, pos) || 0)
|
201
234
|
end
|
202
235
|
|
203
|
-
def marshal_dump # :nodoc:
|
204
|
-
@options
|
205
|
-
end
|
206
|
-
|
207
|
-
def marshal_load options # :nodoc:
|
208
|
-
@options = options
|
209
|
-
end
|
210
|
-
|
211
236
|
protected
|
212
|
-
|
237
|
+
|
213
238
|
# Can be implemented by subclasses to do some initialization
|
214
239
|
# that has to be done once per instance.
|
215
240
|
#
|
@@ -217,15 +242,14 @@ module CodeRay
|
|
217
242
|
# scan.
|
218
243
|
def setup # :doc:
|
219
244
|
end
|
220
|
-
|
245
|
+
|
221
246
|
# This is the central method, and commonly the only one a
|
222
247
|
# subclass implements.
|
223
248
|
#
|
224
249
|
# Subclasses must implement this method; it must return +tokens+
|
225
250
|
# and must only use Tokens#<< for storing scanned tokens!
|
226
251
|
def scan_tokens tokens, options # :doc:
|
227
|
-
raise NotImplementedError,
|
228
|
-
"#{self.class}#scan_tokens not implemented."
|
252
|
+
raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
|
229
253
|
end
|
230
254
|
|
231
255
|
# Resets the scanner.
|
@@ -234,7 +258,7 @@ module CodeRay
|
|
234
258
|
@cached_tokens = nil
|
235
259
|
@bin_string = nil if defined? @bin_string
|
236
260
|
end
|
237
|
-
|
261
|
+
|
238
262
|
# Scanner error with additional status information
|
239
263
|
def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
|
240
264
|
raise ScanError, <<-EOE % [
|
@@ -266,8 +290,8 @@ surrounding code:
|
|
266
290
|
string[pos, ambit],
|
267
291
|
]
|
268
292
|
end
|
269
|
-
|
293
|
+
|
270
294
|
end
|
271
|
-
|
295
|
+
|
272
296
|
end
|
273
297
|
end
|
data/lib/coderay/scanners/c.rb
CHANGED
@@ -65,10 +65,6 @@ module Scanners
|
|
65
65
|
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
|
66
66
|
encoder.text_token match, :comment
|
67
67
|
|
68
|
-
elsif match = scan(/ \# \s* if \s* 0 /x)
|
69
|
-
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
70
|
-
encoder.text_token match, :comment
|
71
|
-
|
72
68
|
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
|
73
69
|
label_expected = match =~ /[;\{\}]/
|
74
70
|
if case_expected
|
@@ -93,9 +89,6 @@ module Scanners
|
|
93
89
|
end
|
94
90
|
encoder.text_token match, kind
|
95
91
|
|
96
|
-
elsif match = scan(/\$/)
|
97
|
-
encoder.text_token match, :ident
|
98
|
-
|
99
92
|
elsif match = scan(/L?"/)
|
100
93
|
encoder.begin_group :string
|
101
94
|
if match[0] == ?L
|
@@ -105,6 +98,10 @@ module Scanners
|
|
105
98
|
encoder.text_token match, :delimiter
|
106
99
|
state = :string
|
107
100
|
|
101
|
+
elsif match = scan(/ \# \s* if \s* 0 /x)
|
102
|
+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
|
103
|
+
encoder.text_token match, :comment
|
104
|
+
|
108
105
|
elsif match = scan(/#[ \t]*(\w*)/)
|
109
106
|
encoder.text_token match, :preprocessor
|
110
107
|
in_preproc_line = true
|
@@ -115,6 +112,9 @@ module Scanners
|
|
115
112
|
label_expected = false
|
116
113
|
encoder.text_token match, :char
|
117
114
|
|
115
|
+
elsif match = scan(/\$/)
|
116
|
+
encoder.text_token match, :ident
|
117
|
+
|
118
118
|
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
|
119
119
|
label_expected = false
|
120
120
|
encoder.text_token match, :hex
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module CodeRay
|
3
|
+
module Scanners
|
4
|
+
|
5
|
+
# Clojure scanner by Licenser.
|
6
|
+
class Clojure < Scanner
|
7
|
+
|
8
|
+
register_for :clojure
|
9
|
+
file_extension 'clj'
|
10
|
+
|
11
|
+
SPECIAL_FORMS = %w[
|
12
|
+
def if do let quote var fn loop recur throw try catch monitor-enter monitor-exit .
|
13
|
+
new
|
14
|
+
] # :nodoc:
|
15
|
+
|
16
|
+
CORE_FORMS = %w[
|
17
|
+
+ - -> ->> .. / * < <= = == > >= accessor aclone add-classpath add-watch
|
18
|
+
agent agent-error agent-errors aget alength alias all-ns alter alter-meta!
|
19
|
+
alter-var-root amap ancestors and apply areduce array-map aset aset-boolean
|
20
|
+
aset-byte aset-char aset-double aset-float aset-int aset-long aset-short
|
21
|
+
assert assoc assoc! assoc-in associative? atom await await-for bases bean
|
22
|
+
bigdec bigint binding bit-and bit-and-not bit-clear bit-flip bit-not bit-or
|
23
|
+
bit-set bit-shift-left bit-shift-right bit-test bit-xor boolean boolean-array
|
24
|
+
booleans bound-fn bound-fn* bound? butlast byte byte-array bytes case cast char
|
25
|
+
char-array char-escape-string char-name-string char? chars class class?
|
26
|
+
clear-agent-errors clojure-version coll? comment commute comp comparator
|
27
|
+
compare compare-and-set! compile complement concat cond condp conj conj!
|
28
|
+
cons constantly construct-proxy contains? count counted? create-ns
|
29
|
+
create-struct cycle dec decimal? declare definline defmacro defmethod defmulti
|
30
|
+
defn defn- defonce defprotocol defrecord defstruct deftype delay delay?
|
31
|
+
deliver denominator deref derive descendants disj disj! dissoc dissoc!
|
32
|
+
distinct distinct? doall doc dorun doseq dosync dotimes doto double
|
33
|
+
double-array doubles drop drop-last drop-while empty empty? ensure
|
34
|
+
enumeration-seq error-handler error-mode eval even? every? extend
|
35
|
+
extend-protocol extend-type extenders extends? false? ffirst file-seq
|
36
|
+
filter find find-doc find-ns find-var first float float-array float?
|
37
|
+
floats flush fn fn? fnext for force format future future-call future-cancel
|
38
|
+
future-cancelled? future-done? future? gen-class gen-interface gensym get
|
39
|
+
get-in get-method get-proxy-class get-thread-bindings get-validator hash
|
40
|
+
hash-map hash-set identical? identity if-let if-not ifn? import in-ns
|
41
|
+
inc init-proxy instance? int int-array integer? interleave intern
|
42
|
+
interpose into into-array ints io! isa? iterate iterator-seq juxt key
|
43
|
+
keys keyword keyword? last lazy-cat lazy-seq let letfn line-seq list list*
|
44
|
+
list? load load-file load-reader load-string loaded-libs locking long
|
45
|
+
long-array longs loop macroexpand macroexpand-1 make-array make-hierarchy
|
46
|
+
map map? mapcat max max-key memfn memoize merge merge-with meta methods
|
47
|
+
min min-key mod name namespace neg? newline next nfirst nil? nnext not
|
48
|
+
not-any? not-empty not-every? not= ns ns-aliases ns-imports ns-interns
|
49
|
+
ns-map ns-name ns-publics ns-refers ns-resolve ns-unalias ns-unmap nth
|
50
|
+
nthnext num number? numerator object-array odd? or parents partial
|
51
|
+
partition pcalls peek persistent! pmap pop pop! pop-thread-bindings
|
52
|
+
pos? pr pr-str prefer-method prefers print print-namespace-doc
|
53
|
+
print-str printf println println-str prn prn-str promise proxy
|
54
|
+
proxy-mappings proxy-super push-thread-bindings pvalues quot rand
|
55
|
+
rand-int range ratio? rationalize re-find re-groups re-matcher
|
56
|
+
re-matches re-pattern re-seq read read-line read-string reduce ref
|
57
|
+
ref-history-count ref-max-history ref-min-history ref-set refer
|
58
|
+
refer-clojure reify release-pending-sends rem remove remove-all-methods
|
59
|
+
remove-method remove-ns remove-watch repeat repeatedly replace replicate
|
60
|
+
require reset! reset-meta! resolve rest restart-agent resultset-seq
|
61
|
+
reverse reversible? rseq rsubseq satisfies? second select-keys send
|
62
|
+
send-off seq seq? seque sequence sequential? set set-error-handler!
|
63
|
+
set-error-mode! set-validator! set? short short-array shorts
|
64
|
+
shutdown-agents slurp some sort sort-by sorted-map sorted-map-by
|
65
|
+
sorted-set sorted-set-by sorted? special-form-anchor special-symbol?
|
66
|
+
split-at split-with str string? struct struct-map subs subseq subvec
|
67
|
+
supers swap! symbol symbol? sync syntax-symbol-anchor take take-last
|
68
|
+
take-nth take-while test the-ns thread-bound? time to-array to-array-2d
|
69
|
+
trampoline transient tree-seq true? type unchecked-add unchecked-dec
|
70
|
+
unchecked-divide unchecked-inc unchecked-multiply unchecked-negate
|
71
|
+
unchecked-remainder unchecked-subtract underive update-in update-proxy
|
72
|
+
use val vals var-get var-set var? vary-meta vec vector vector-of vector?
|
73
|
+
when when-first when-let when-not while with-bindings with-bindings*
|
74
|
+
with-in-str with-local-vars with-meta with-open with-out-str
|
75
|
+
with-precision xml-seq zero? zipmap
|
76
|
+
] # :nodoc:
|
77
|
+
PREDEFINED_CONSTANTS = %w[
|
78
|
+
true false *1 *2 *3 *agent* *clojure-version* *command-line-args*
|
79
|
+
*compile-files* *compile-path* *e *err* *file* *flush-on-newline*
|
80
|
+
*in* *ns* *out* *print-dup* *print-length* *print-level* *print-meta*
|
81
|
+
*print-readably* *read-eval* *warn-on-reflection*
|
82
|
+
] # :nodoc:
|
83
|
+
|
84
|
+
IDENT_KIND = CaseIgnoringWordList.new(:ident).
|
85
|
+
add(SPECIAL_FORMS, :reserved).
|
86
|
+
add(CORE_FORMS, :reserved).
|
87
|
+
add(PREDEFINED_CONSTANTS, :pre_constant)
|
88
|
+
|
89
|
+
BASIC_IDENTIFIER = /[a-zA-Z$%*\/_+!?&<>\-=][a-zA-Z0-9$&*+!\/_?<>\-\#]*/
|
90
|
+
IDENTIFIER = /(?:[@']?(?:#{BASIC_IDENTIFIER}\.)*#{BASIC_IDENTIFIER}(?:\/#{BASIC_IDENTIFIER})?\.?)|\.\.?/
|
91
|
+
SYMBOL = /::?#{IDENTIFIER}/o
|
92
|
+
DIGIT = /\d/
|
93
|
+
DIGIT10 = DIGIT
|
94
|
+
DIGIT16 = /[0-9a-f]/i
|
95
|
+
DIGIT8 = /[0-7]/
|
96
|
+
DIGIT2 = /[01]/
|
97
|
+
RADIX16 = /\#x/i
|
98
|
+
RADIX8 = /\#o/i
|
99
|
+
RADIX2 = /\#b/i
|
100
|
+
RADIX10 = /\#d/i
|
101
|
+
EXACTNESS = /#i|#e/i
|
102
|
+
SIGN = /[\+-]?/
|
103
|
+
EXP_MARK = /[esfdl]/i
|
104
|
+
EXP = /#{EXP_MARK}#{SIGN}#{DIGIT}+/
|
105
|
+
SUFFIX = /#{EXP}?/
|
106
|
+
PREFIX10 = /#{RADIX10}?#{EXACTNESS}?|#{EXACTNESS}?#{RADIX10}?/
|
107
|
+
PREFIX16 = /#{RADIX16}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX16}/
|
108
|
+
PREFIX8 = /#{RADIX8}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX8}/
|
109
|
+
PREFIX2 = /#{RADIX2}#{EXACTNESS}?|#{EXACTNESS}?#{RADIX2}/
|
110
|
+
UINT10 = /#{DIGIT10}+#*/
|
111
|
+
UINT16 = /#{DIGIT16}+#*/
|
112
|
+
UINT8 = /#{DIGIT8}+#*/
|
113
|
+
UINT2 = /#{DIGIT2}+#*/
|
114
|
+
DECIMAL = /#{DIGIT10}+#+\.#*#{SUFFIX}|#{DIGIT10}+\.#{DIGIT10}*#*#{SUFFIX}|\.#{DIGIT10}+#*#{SUFFIX}|#{UINT10}#{EXP}/
|
115
|
+
UREAL10 = /#{UINT10}\/#{UINT10}|#{DECIMAL}|#{UINT10}/
|
116
|
+
UREAL16 = /#{UINT16}\/#{UINT16}|#{UINT16}/
|
117
|
+
UREAL8 = /#{UINT8}\/#{UINT8}|#{UINT8}/
|
118
|
+
UREAL2 = /#{UINT2}\/#{UINT2}|#{UINT2}/
|
119
|
+
REAL10 = /#{SIGN}#{UREAL10}/
|
120
|
+
REAL16 = /#{SIGN}#{UREAL16}/
|
121
|
+
REAL8 = /#{SIGN}#{UREAL8}/
|
122
|
+
REAL2 = /#{SIGN}#{UREAL2}/
|
123
|
+
IMAG10 = /i|#{UREAL10}i/
|
124
|
+
IMAG16 = /i|#{UREAL16}i/
|
125
|
+
IMAG8 = /i|#{UREAL8}i/
|
126
|
+
IMAG2 = /i|#{UREAL2}i/
|
127
|
+
COMPLEX10 = /#{REAL10}@#{REAL10}|#{REAL10}\+#{IMAG10}|#{REAL10}-#{IMAG10}|\+#{IMAG10}|-#{IMAG10}|#{REAL10}/
|
128
|
+
COMPLEX16 = /#{REAL16}@#{REAL16}|#{REAL16}\+#{IMAG16}|#{REAL16}-#{IMAG16}|\+#{IMAG16}|-#{IMAG16}|#{REAL16}/
|
129
|
+
COMPLEX8 = /#{REAL8}@#{REAL8}|#{REAL8}\+#{IMAG8}|#{REAL8}-#{IMAG8}|\+#{IMAG8}|-#{IMAG8}|#{REAL8}/
|
130
|
+
COMPLEX2 = /#{REAL2}@#{REAL2}|#{REAL2}\+#{IMAG2}|#{REAL2}-#{IMAG2}|\+#{IMAG2}|-#{IMAG2}|#{REAL2}/
|
131
|
+
NUM10 = /#{PREFIX10}?#{COMPLEX10}/
|
132
|
+
NUM16 = /#{PREFIX16}#{COMPLEX16}/
|
133
|
+
NUM8 = /#{PREFIX8}#{COMPLEX8}/
|
134
|
+
NUM2 = /#{PREFIX2}#{COMPLEX2}/
|
135
|
+
NUM = /#{NUM10}|#{NUM16}|#{NUM8}|#{NUM2}/
|
136
|
+
|
137
|
+
protected
|
138
|
+
|
139
|
+
def scan_tokens encoder, options
|
140
|
+
|
141
|
+
state = :initial
|
142
|
+
ident_kind = IDENT_KIND
|
143
|
+
|
144
|
+
until eos?
|
145
|
+
|
146
|
+
case state
|
147
|
+
when :initial
|
148
|
+
if match = scan(/ \s+ | \\\n | , /x)
|
149
|
+
encoder.text_token match, :space
|
150
|
+
elsif match = scan(/['`\(\[\)\]\{\}]|\#[({]|~@?|@/)
|
151
|
+
encoder.text_token match, :operator # FIXME: was :operator_fat
|
152
|
+
elsif match = scan(/;.*/)
|
153
|
+
encoder.text_token match, :comment
|
154
|
+
elsif match = scan(/\#\\(?:newline|space|.?)/)
|
155
|
+
encoder.text_token match, :char
|
156
|
+
elsif match = scan(/\#[ft]/)
|
157
|
+
encoder.text_token match, :pre_constant
|
158
|
+
elsif match = scan(/#{IDENTIFIER}/o)
|
159
|
+
encoder.text_token match, ident_kind[matched]
|
160
|
+
elsif match = scan(/#{SYMBOL}/o)
|
161
|
+
encoder.text_token match, :symbol
|
162
|
+
elsif match = scan(/\./)
|
163
|
+
encoder.text_token match, :operator
|
164
|
+
elsif match = scan(/ \# \^ #{IDENTIFIER} /ox)
|
165
|
+
encoder.text_token match, :type
|
166
|
+
elsif match = scan(/ (\#)? " /x)
|
167
|
+
state = self[1] ? :regexp : :string
|
168
|
+
encoder.begin_group state
|
169
|
+
encoder.text_token match, :delimiter
|
170
|
+
elsif match = scan(/#{NUM}/o) and not matched.empty?
|
171
|
+
encoder.text_token match, match[/[.e]/i] ? :float : :integer
|
172
|
+
else
|
173
|
+
encoder.text_token getch, :error
|
174
|
+
end
|
175
|
+
|
176
|
+
when :string, :regexp
|
177
|
+
if match = scan(/[^"\\]+|\\.?/)
|
178
|
+
encoder.text_token match, :content
|
179
|
+
elsif match = scan(/"/)
|
180
|
+
encoder.text_token match, :delimiter
|
181
|
+
encoder.end_group state
|
182
|
+
state = :initial
|
183
|
+
else
|
184
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
185
|
+
encoder, state
|
186
|
+
end
|
187
|
+
|
188
|
+
else
|
189
|
+
raise 'else case reached'
|
190
|
+
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
if [:string, :regexp].include? state
|
196
|
+
encoder.end_group state
|
197
|
+
end
|
198
|
+
|
199
|
+
encoder
|
200
|
+
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|