coderay 0.9.8 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
@@ -1,138 +1,77 @@
|
|
1
1
|
module CodeRay
|
2
|
-
|
3
|
-
# = WordList
|
4
|
-
#
|
5
|
-
# <b>A Hash subclass designed for mapping word lists to token types.</b>
|
6
|
-
#
|
7
|
-
# Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
|
8
|
-
#
|
9
|
-
# License:: LGPL / ask the author
|
10
|
-
# Version:: 1.1 (2006-Oct-19)
|
11
|
-
#
|
12
|
-
# A WordList is a Hash with some additional features.
|
13
|
-
# It is intended to be used for keyword recognition.
|
14
|
-
#
|
15
|
-
# WordList is highly optimized to be used in Scanners,
|
16
|
-
# typically to decide whether a given ident is a special token.
|
17
|
-
#
|
18
|
-
# For case insensitive words use CaseIgnoringWordList.
|
19
|
-
#
|
20
|
-
# Example:
|
21
|
-
#
|
22
|
-
# # define word arrays
|
23
|
-
# RESERVED_WORDS = %w[
|
24
|
-
# asm break case continue default do else
|
25
|
-
# ...
|
26
|
-
# ]
|
27
|
-
#
|
28
|
-
# PREDEFINED_TYPES = %w[
|
29
|
-
# int long short char void
|
30
|
-
# ...
|
31
|
-
# ]
|
32
|
-
#
|
33
|
-
# PREDEFINED_CONSTANTS = %w[
|
34
|
-
# EOF NULL ...
|
35
|
-
# ]
|
36
|
-
#
|
37
|
-
# # make a WordList
|
38
|
-
# IDENT_KIND = WordList.new(:ident).
|
39
|
-
# add(RESERVED_WORDS, :reserved).
|
40
|
-
# add(PREDEFINED_TYPES, :pre_type).
|
41
|
-
# add(PREDEFINED_CONSTANTS, :pre_constant)
|
42
|
-
#
|
43
|
-
# ...
|
44
|
-
#
|
45
|
-
# def scan_tokens tokens, options
|
46
|
-
# ...
|
47
|
-
#
|
48
|
-
# elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
|
49
|
-
# # use it
|
50
|
-
# kind = IDENT_KIND[match]
|
51
|
-
# ...
|
52
|
-
class WordList < Hash
|
53
|
-
|
54
|
-
# Creates a new WordList with +default+ as default value.
|
55
|
-
#
|
56
|
-
# You can activate +caching+ to store the results for every [] request.
|
2
|
+
|
3
|
+
# = WordList
|
57
4
|
#
|
58
|
-
#
|
59
|
-
# as you expect. Therefore, it is recommended to use the [] method only.
|
60
|
-
def initialize default = false, caching = false, &block
|
61
|
-
if block
|
62
|
-
raise ArgumentError, 'Can\'t combine block with caching.' if caching
|
63
|
-
super(&block)
|
64
|
-
else
|
65
|
-
if caching
|
66
|
-
super() do |h, k|
|
67
|
-
h[k] = h.fetch k, default
|
68
|
-
end
|
69
|
-
else
|
70
|
-
super default
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
# Add words to the list and associate them with +kind+.
|
5
|
+
# <b>A Hash subclass designed for mapping word lists to token types.</b>
|
76
6
|
#
|
77
|
-
#
|
78
|
-
|
79
|
-
|
80
|
-
|
7
|
+
# Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
|
8
|
+
#
|
9
|
+
# License:: LGPL / ask the author
|
10
|
+
# Version:: 2.0 (2011-05-08)
|
11
|
+
#
|
12
|
+
# A WordList is a Hash with some additional features.
|
13
|
+
# It is intended to be used for keyword recognition.
|
14
|
+
#
|
15
|
+
# WordList is optimized to be used in Scanners,
|
16
|
+
# typically to decide whether a given ident is a special token.
|
17
|
+
#
|
18
|
+
# For case insensitive words use WordList::CaseIgnoring.
|
19
|
+
#
|
20
|
+
# Example:
|
21
|
+
#
|
22
|
+
# # define word arrays
|
23
|
+
# RESERVED_WORDS = %w[
|
24
|
+
# asm break case continue default do else
|
25
|
+
# ]
|
26
|
+
#
|
27
|
+
# PREDEFINED_TYPES = %w[
|
28
|
+
# int long short char void
|
29
|
+
# ]
|
30
|
+
#
|
31
|
+
# # make a WordList
|
32
|
+
# IDENT_KIND = WordList.new(:ident).
|
33
|
+
# add(RESERVED_WORDS, :reserved).
|
34
|
+
# add(PREDEFINED_TYPES, :predefined_type)
|
35
|
+
#
|
36
|
+
# ...
|
37
|
+
#
|
38
|
+
# def scan_tokens tokens, options
|
39
|
+
# ...
|
40
|
+
#
|
41
|
+
# elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
|
42
|
+
# # use it
|
43
|
+
# kind = IDENT_KIND[match]
|
44
|
+
# ...
|
45
|
+
class WordList < Hash
|
46
|
+
|
47
|
+
# Create a new WordList with +default+ as default value.
|
48
|
+
def initialize default = false
|
49
|
+
super default
|
81
50
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
# keys are compared case-insensitively.
|
90
|
-
#
|
91
|
-
# Ignoring the text case is realized by sending the +downcase+ message to
|
92
|
-
# all keys.
|
93
|
-
#
|
94
|
-
# Caching usually makes a CaseIgnoringWordList faster, but it has to be
|
95
|
-
# activated explicitely.
|
96
|
-
class CaseIgnoringWordList < WordList
|
97
|
-
|
98
|
-
# Creates a new case-insensitive WordList with +default+ as default value.
|
99
|
-
#
|
100
|
-
# You can activate caching to store the results for every [] request.
|
101
|
-
# This speeds up subsequent lookups for the same word, but also
|
102
|
-
# uses memory.
|
103
|
-
def initialize default = false, caching = false
|
104
|
-
if caching
|
105
|
-
super(default, false) do |h, k|
|
106
|
-
h[k] = h.fetch k.downcase, default
|
107
|
-
end
|
108
|
-
else
|
109
|
-
super(default, false)
|
110
|
-
extend Uncached
|
51
|
+
|
52
|
+
# Add words to the list and associate them with +value+.
|
53
|
+
#
|
54
|
+
# Returns +self+, so you can concat add calls.
|
55
|
+
def add words, value = true
|
56
|
+
words.each { |word| self[word] = value }
|
57
|
+
self
|
111
58
|
end
|
59
|
+
|
112
60
|
end
|
113
61
|
|
114
|
-
|
62
|
+
|
63
|
+
# A CaseIgnoring WordList is like a WordList, only that
|
64
|
+
# keys are compared case-insensitively (normalizing keys using +downcase+).
|
65
|
+
class WordList::CaseIgnoring < WordList
|
66
|
+
|
115
67
|
def [] key
|
116
|
-
super
|
68
|
+
super key.downcase
|
117
69
|
end
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
def add words, kind = true
|
122
|
-
words.each do |word|
|
123
|
-
self[word.downcase] = kind
|
70
|
+
|
71
|
+
def []= key, value
|
72
|
+
super key.downcase, value
|
124
73
|
end
|
125
|
-
|
74
|
+
|
126
75
|
end
|
127
|
-
|
128
|
-
end
|
129
|
-
|
76
|
+
|
130
77
|
end
|
131
|
-
|
132
|
-
__END__
|
133
|
-
# check memory consumption
|
134
|
-
END {
|
135
|
-
ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
|
136
|
-
p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
|
137
|
-
end
|
138
|
-
}
|
data/lib/coderay/scanner.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'strscan'
|
2
3
|
|
3
|
-
|
4
|
+
module CodeRay
|
4
5
|
|
6
|
+
autoload :WordList, 'coderay/helpers/word_list'
|
7
|
+
|
5
8
|
# = Scanners
|
6
9
|
#
|
7
10
|
# This module holds the Scanner class and its subclasses.
|
@@ -16,9 +19,8 @@ module CodeRay
|
|
16
19
|
module Scanners
|
17
20
|
extend PluginHost
|
18
21
|
plugin_path File.dirname(__FILE__), 'scanners'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
+
|
23
|
+
|
22
24
|
# = Scanner
|
23
25
|
#
|
24
26
|
# The base class for all Scanners.
|
@@ -46,64 +48,89 @@ module CodeRay
|
|
46
48
|
|
47
49
|
extend Plugin
|
48
50
|
plugin_host Scanners
|
49
|
-
|
51
|
+
|
50
52
|
# Raised if a Scanner fails while scanning
|
51
|
-
ScanError = Class.new
|
52
|
-
|
53
|
-
require 'coderay/helpers/word_list'
|
54
|
-
|
53
|
+
ScanError = Class.new StandardError
|
54
|
+
|
55
55
|
# The default options for all scanner classes.
|
56
56
|
#
|
57
57
|
# Define @default_options for subclasses.
|
58
|
-
DEFAULT_OPTIONS = {
|
58
|
+
DEFAULT_OPTIONS = { }
|
59
|
+
|
60
|
+
KINDS_NOT_LOC = [:comment, :doctype, :docstring]
|
61
|
+
|
62
|
+
attr_accessor :state
|
59
63
|
|
60
|
-
KINDS_NOT_LOC = [:comment, :doctype]
|
61
|
-
|
62
64
|
class << self
|
63
|
-
|
64
|
-
#
|
65
|
-
|
66
|
-
|
65
|
+
|
66
|
+
# Normalizes the given code into a string with UNIX newlines, in the
|
67
|
+
# scanner's internal encoding, with invalid and undefined charachters
|
68
|
+
# replaced by placeholders. Always returns a new object.
|
69
|
+
def normalize code
|
70
|
+
# original = code
|
71
|
+
code = code.to_s unless code.is_a? ::String
|
72
|
+
return code if code.empty?
|
73
|
+
|
74
|
+
if code.respond_to? :encoding
|
75
|
+
code = encode_with_encoding code, self.encoding
|
76
|
+
else
|
77
|
+
code = to_unix code
|
78
|
+
end
|
79
|
+
# code = code.dup if code.eql? original
|
80
|
+
code
|
67
81
|
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
82
|
+
|
83
|
+
# The typical filename suffix for this scanner's language.
|
84
|
+
def file_extension extension = lang
|
85
|
+
@file_extension ||= extension.to_s
|
86
|
+
end
|
87
|
+
|
88
|
+
# The encoding used internally by this scanner.
|
89
|
+
def encoding name = 'UTF-8'
|
90
|
+
@encoding ||= defined?(Encoding.find) && Encoding.find(name)
|
91
|
+
end
|
92
|
+
|
93
|
+
# The lang of this Scanner class, which is equal to its Plugin ID.
|
94
|
+
def lang
|
95
|
+
@plugin_id
|
96
|
+
end
|
97
|
+
|
98
|
+
protected
|
99
|
+
|
100
|
+
def encode_with_encoding code, target_encoding
|
101
|
+
if code.encoding == target_encoding
|
102
|
+
if code.valid_encoding?
|
103
|
+
return to_unix(code)
|
104
|
+
else
|
105
|
+
source_encoding = guess_encoding code
|
81
106
|
end
|
107
|
+
else
|
108
|
+
source_encoding = code.encoding
|
82
109
|
end
|
83
|
-
|
110
|
+
# print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
|
111
|
+
code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
|
84
112
|
end
|
85
113
|
|
86
|
-
def
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
114
|
+
def to_unix code
|
115
|
+
code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
|
116
|
+
end
|
117
|
+
|
118
|
+
def guess_encoding s
|
119
|
+
#:nocov:
|
120
|
+
IO.popen("file -b --mime -", "w+") do |file|
|
121
|
+
file.write s[0, 1024]
|
122
|
+
file.close_write
|
123
|
+
begin
|
124
|
+
Encoding.find file.gets[/charset=([-\w]+)/, 1]
|
125
|
+
rescue ArgumentError
|
126
|
+
Encoding::BINARY
|
127
|
+
end
|
91
128
|
end
|
129
|
+
#:nocov:
|
92
130
|
end
|
93
|
-
|
131
|
+
|
94
132
|
end
|
95
|
-
|
96
|
-
=begin
|
97
|
-
## Excluded for speed reasons; protected seems to make methods slow.
|
98
|
-
|
99
|
-
# Save the StringScanner methods from being called.
|
100
|
-
# This would not be useful for highlighting.
|
101
|
-
strscan_public_methods =
|
102
|
-
StringScanner.instance_methods -
|
103
|
-
StringScanner.ancestors[1].instance_methods
|
104
|
-
protected(*strscan_public_methods)
|
105
|
-
=end
|
106
|
-
|
133
|
+
|
107
134
|
# Create a new Scanner.
|
108
135
|
#
|
109
136
|
# * +code+ is the input String and is handled by the superclass
|
@@ -111,146 +138,147 @@ module CodeRay
|
|
111
138
|
# * +options+ is a Hash with Symbols as keys.
|
112
139
|
# It is merged with the default options of the class (you can
|
113
140
|
# overwrite default options here.)
|
114
|
-
# * +block+ is the callback for streamed highlighting.
|
115
|
-
#
|
116
|
-
# If you set :stream to +true+ in the options, the Scanner uses a
|
117
|
-
# TokenStream with the +block+ as callback to handle the tokens.
|
118
141
|
#
|
119
142
|
# Else, a Tokens object is used.
|
120
|
-
def initialize code='', options = {}
|
121
|
-
|
122
|
-
"anything. :( Use my subclasses."
|
143
|
+
def initialize code = '', options = {}
|
144
|
+
if self.class == Scanner
|
145
|
+
raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
|
146
|
+
end
|
123
147
|
|
124
148
|
@options = self.class::DEFAULT_OPTIONS.merge options
|
125
|
-
|
126
|
-
super
|
127
|
-
|
128
|
-
@tokens = options[:tokens]
|
129
|
-
if @
|
130
|
-
|
131
|
-
"but no block was given" unless block_given?
|
132
|
-
raise NotStreamableError, self unless kind_of? Streamable
|
133
|
-
@tokens ||= TokenStream.new(&block)
|
134
|
-
else
|
135
|
-
warn "warning in CodeRay::Scanner.new: Block given, "\
|
136
|
-
"but :stream is #{@options[:stream]}" if block_given?
|
137
|
-
@tokens ||= Tokens.new
|
138
|
-
end
|
139
|
-
@tokens.scanner = self
|
140
|
-
|
149
|
+
|
150
|
+
super self.class.normalize(code)
|
151
|
+
|
152
|
+
@tokens = options[:tokens] || Tokens.new
|
153
|
+
@tokens.scanner = self if @tokens.respond_to? :scanner=
|
154
|
+
|
141
155
|
setup
|
142
156
|
end
|
143
|
-
|
157
|
+
|
158
|
+
# Sets back the scanner. Subclasses should redefine the reset_instance
|
159
|
+
# method instead of this one.
|
144
160
|
def reset
|
145
161
|
super
|
146
162
|
reset_instance
|
147
163
|
end
|
148
|
-
|
164
|
+
|
165
|
+
# Set a new string to be scanned.
|
149
166
|
def string= code
|
150
|
-
code =
|
151
|
-
|
152
|
-
reset_state
|
153
|
-
@string = code
|
154
|
-
else
|
155
|
-
super code
|
156
|
-
end
|
167
|
+
code = self.class.normalize(code)
|
168
|
+
super code
|
157
169
|
reset_instance
|
158
170
|
end
|
159
|
-
|
160
|
-
#
|
161
|
-
alias code string
|
162
|
-
alias code= string=
|
163
|
-
|
164
|
-
# Returns the Plugin ID for this scanner.
|
171
|
+
|
172
|
+
# the Plugin ID for this scanner
|
165
173
|
def lang
|
166
|
-
self.class.
|
174
|
+
self.class.lang
|
167
175
|
end
|
168
|
-
|
169
|
-
#
|
170
|
-
def
|
176
|
+
|
177
|
+
# the default file extension for this scanner
|
178
|
+
def file_extension
|
179
|
+
self.class.file_extension
|
180
|
+
end
|
181
|
+
|
182
|
+
# Scan the code and returns all tokens in a Tokens object.
|
183
|
+
def tokenize source = nil, options = {}
|
171
184
|
options = @options.merge(options)
|
172
|
-
|
173
|
-
@
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
185
|
+
@tokens = options[:tokens] || @tokens || Tokens.new
|
186
|
+
@tokens.scanner = self if @tokens.respond_to? :scanner=
|
187
|
+
case source
|
188
|
+
when Array
|
189
|
+
self.string = self.class.normalize(source.join)
|
190
|
+
when nil
|
191
|
+
reset
|
192
|
+
else
|
193
|
+
self.string = self.class.normalize(source)
|
194
|
+
end
|
195
|
+
|
196
|
+
begin
|
197
|
+
scan_tokens @tokens, options
|
198
|
+
rescue => e
|
199
|
+
message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
|
200
|
+
raise_inspect e.message, @tokens, message, 30, e.backtrace
|
201
|
+
end
|
202
|
+
|
203
|
+
@cached_tokens = @tokens
|
204
|
+
if source.is_a? Array
|
205
|
+
@tokens.split_into_parts(*source.map { |part| part.size })
|
206
|
+
else
|
207
|
+
@tokens
|
208
|
+
end
|
181
209
|
end
|
182
|
-
|
210
|
+
|
211
|
+
# Cache the result of tokenize.
|
183
212
|
def tokens
|
184
213
|
@cached_tokens ||= tokenize
|
185
214
|
end
|
186
215
|
|
187
|
-
#
|
188
|
-
def streaming?
|
189
|
-
!!@options[:stream]
|
190
|
-
end
|
191
|
-
|
192
|
-
# Traverses the tokens.
|
216
|
+
# Traverse the tokens.
|
193
217
|
def each &block
|
194
|
-
raise ArgumentError,
|
195
|
-
'Cannot traverse TokenStream.' if @options[:stream]
|
196
218
|
tokens.each(&block)
|
197
219
|
end
|
198
220
|
include Enumerable
|
199
|
-
|
200
|
-
# The current line position of the scanner.
|
221
|
+
|
222
|
+
# The current line position of the scanner, starting with 1.
|
223
|
+
# See also: #column.
|
201
224
|
#
|
202
225
|
# Beware, this is implemented inefficiently. It should be used
|
203
226
|
# for debugging only.
|
204
|
-
def line
|
205
|
-
|
227
|
+
def line pos = self.pos
|
228
|
+
return 1 if pos <= 0
|
229
|
+
binary_string[0...pos].count("\n") + 1
|
206
230
|
end
|
207
231
|
|
232
|
+
# The current column position of the scanner, starting with 1.
|
233
|
+
# See also: #line.
|
208
234
|
def column pos = self.pos
|
209
|
-
return
|
210
|
-
|
211
|
-
if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
|
212
|
-
@bin_string ||= string.dup.force_encoding('binary')
|
213
|
-
string = @bin_string
|
214
|
-
end
|
215
|
-
pos - (string.rindex(?\n, pos) || 0)
|
235
|
+
return 1 if pos <= 0
|
236
|
+
pos - (binary_string.rindex(?\n, pos - 1) || -1)
|
216
237
|
end
|
217
238
|
|
218
|
-
|
219
|
-
|
239
|
+
# The string in binary encoding.
|
240
|
+
#
|
241
|
+
# To be used with #pos, which is the index of the byte the scanner
|
242
|
+
# will scan next.
|
243
|
+
def binary_string
|
244
|
+
@binary_string ||=
|
245
|
+
if string.respond_to?(:bytesize) && string.bytesize != string.size
|
246
|
+
#:nocov:
|
247
|
+
string.dup.force_encoding('binary')
|
248
|
+
#:nocov:
|
249
|
+
else
|
250
|
+
string
|
251
|
+
end
|
220
252
|
end
|
221
253
|
|
222
|
-
def marshal_load options
|
223
|
-
@options = options
|
224
|
-
end
|
225
|
-
|
226
254
|
protected
|
227
|
-
|
255
|
+
|
228
256
|
# Can be implemented by subclasses to do some initialization
|
229
257
|
# that has to be done once per instance.
|
230
258
|
#
|
231
259
|
# Use reset for initialization that has to be done once per
|
232
260
|
# scan.
|
233
|
-
def setup
|
261
|
+
def setup # :doc:
|
234
262
|
end
|
235
|
-
|
263
|
+
|
236
264
|
# This is the central method, and commonly the only one a
|
237
265
|
# subclass implements.
|
238
266
|
#
|
239
267
|
# Subclasses must implement this method; it must return +tokens+
|
240
268
|
# and must only use Tokens#<< for storing scanned tokens!
|
241
|
-
def scan_tokens tokens, options
|
242
|
-
raise NotImplementedError,
|
243
|
-
"#{self.class}#scan_tokens not implemented."
|
269
|
+
def scan_tokens tokens, options # :doc:
|
270
|
+
raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
|
244
271
|
end
|
245
|
-
|
272
|
+
|
273
|
+
# Resets the scanner.
|
246
274
|
def reset_instance
|
247
|
-
@tokens.clear
|
275
|
+
@tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
|
248
276
|
@cached_tokens = nil
|
249
|
-
@
|
277
|
+
@binary_string = nil if defined? @binary_string
|
250
278
|
end
|
251
|
-
|
279
|
+
|
252
280
|
# Scanner error with additional status information
|
253
|
-
def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
|
281
|
+
def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
|
254
282
|
raise ScanError, <<-EOE % [
|
255
283
|
|
256
284
|
|
@@ -272,13 +300,13 @@ surrounding code:
|
|
272
300
|
EOE
|
273
301
|
File.basename(caller[0]),
|
274
302
|
msg,
|
275
|
-
tokens.size,
|
276
|
-
tokens.last(10).map { |t| t.inspect }.join("\n"),
|
303
|
+
tokens.respond_to?(:size) ? tokens.size : 0,
|
304
|
+
tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
|
277
305
|
line, column, pos,
|
278
306
|
matched, state, bol?, eos?,
|
279
|
-
|
280
|
-
|
281
|
-
]
|
307
|
+
binary_string[pos - ambit, ambit],
|
308
|
+
binary_string[pos, ambit],
|
309
|
+
], backtrace
|
282
310
|
end
|
283
311
|
|
284
312
|
# Shorthand for scan_until(/\z/).
|
@@ -288,19 +316,8 @@ surrounding code:
|
|
288
316
|
terminate
|
289
317
|
rest
|
290
318
|
end
|
291
|
-
|
292
|
-
end
|
293
|
-
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
class String
|
298
|
-
# I love this hack. It seems to silence all dos/unix/mac newline problems.
|
299
|
-
def to_unix
|
300
|
-
if index ?\r
|
301
|
-
gsub(/\r\n?/, "\n")
|
302
|
-
else
|
303
|
-
self
|
319
|
+
|
304
320
|
end
|
321
|
+
|
305
322
|
end
|
306
|
-
end
|
323
|
+
end
|