coderay 0.9.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
@@ -1,138 +1,77 @@
|
|
1
1
|
module CodeRay
|
2
|
-
|
3
|
-
# = WordList
|
4
|
-
#
|
5
|
-
# <b>A Hash subclass designed for mapping word lists to token types.</b>
|
6
|
-
#
|
7
|
-
# Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
|
8
|
-
#
|
9
|
-
# License:: LGPL / ask the author
|
10
|
-
# Version:: 1.1 (2006-Oct-19)
|
11
|
-
#
|
12
|
-
# A WordList is a Hash with some additional features.
|
13
|
-
# It is intended to be used for keyword recognition.
|
14
|
-
#
|
15
|
-
# WordList is highly optimized to be used in Scanners,
|
16
|
-
# typically to decide whether a given ident is a special token.
|
17
|
-
#
|
18
|
-
# For case insensitive words use CaseIgnoringWordList.
|
19
|
-
#
|
20
|
-
# Example:
|
21
|
-
#
|
22
|
-
# # define word arrays
|
23
|
-
# RESERVED_WORDS = %w[
|
24
|
-
# asm break case continue default do else
|
25
|
-
# ...
|
26
|
-
# ]
|
27
|
-
#
|
28
|
-
# PREDEFINED_TYPES = %w[
|
29
|
-
# int long short char void
|
30
|
-
# ...
|
31
|
-
# ]
|
32
|
-
#
|
33
|
-
# PREDEFINED_CONSTANTS = %w[
|
34
|
-
# EOF NULL ...
|
35
|
-
# ]
|
36
|
-
#
|
37
|
-
# # make a WordList
|
38
|
-
# IDENT_KIND = WordList.new(:ident).
|
39
|
-
# add(RESERVED_WORDS, :reserved).
|
40
|
-
# add(PREDEFINED_TYPES, :pre_type).
|
41
|
-
# add(PREDEFINED_CONSTANTS, :pre_constant)
|
42
|
-
#
|
43
|
-
# ...
|
44
|
-
#
|
45
|
-
# def scan_tokens tokens, options
|
46
|
-
# ...
|
47
|
-
#
|
48
|
-
# elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
|
49
|
-
# # use it
|
50
|
-
# kind = IDENT_KIND[match]
|
51
|
-
# ...
|
52
|
-
class WordList < Hash
|
53
|
-
|
54
|
-
# Creates a new WordList with +default+ as default value.
|
55
|
-
#
|
56
|
-
# You can activate +caching+ to store the results for every [] request.
|
2
|
+
|
3
|
+
# = WordList
|
57
4
|
#
|
58
|
-
#
|
59
|
-
# as you expect. Therefore, it is recommended to use the [] method only.
|
60
|
-
def initialize default = false, caching = false, &block
|
61
|
-
if block
|
62
|
-
raise ArgumentError, 'Can\'t combine block with caching.' if caching
|
63
|
-
super(&block)
|
64
|
-
else
|
65
|
-
if caching
|
66
|
-
super() do |h, k|
|
67
|
-
h[k] = h.fetch k, default
|
68
|
-
end
|
69
|
-
else
|
70
|
-
super default
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
# Add words to the list and associate them with +kind+.
|
5
|
+
# <b>A Hash subclass designed for mapping word lists to token types.</b>
|
76
6
|
#
|
77
|
-
#
|
78
|
-
|
79
|
-
|
80
|
-
|
7
|
+
# Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
|
8
|
+
#
|
9
|
+
# License:: LGPL / ask the author
|
10
|
+
# Version:: 2.0 (2011-05-08)
|
11
|
+
#
|
12
|
+
# A WordList is a Hash with some additional features.
|
13
|
+
# It is intended to be used for keyword recognition.
|
14
|
+
#
|
15
|
+
# WordList is optimized to be used in Scanners,
|
16
|
+
# typically to decide whether a given ident is a special token.
|
17
|
+
#
|
18
|
+
# For case insensitive words use WordList::CaseIgnoring.
|
19
|
+
#
|
20
|
+
# Example:
|
21
|
+
#
|
22
|
+
# # define word arrays
|
23
|
+
# RESERVED_WORDS = %w[
|
24
|
+
# asm break case continue default do else
|
25
|
+
# ]
|
26
|
+
#
|
27
|
+
# PREDEFINED_TYPES = %w[
|
28
|
+
# int long short char void
|
29
|
+
# ]
|
30
|
+
#
|
31
|
+
# # make a WordList
|
32
|
+
# IDENT_KIND = WordList.new(:ident).
|
33
|
+
# add(RESERVED_WORDS, :reserved).
|
34
|
+
# add(PREDEFINED_TYPES, :predefined_type)
|
35
|
+
#
|
36
|
+
# ...
|
37
|
+
#
|
38
|
+
# def scan_tokens tokens, options
|
39
|
+
# ...
|
40
|
+
#
|
41
|
+
# elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
|
42
|
+
# # use it
|
43
|
+
# kind = IDENT_KIND[match]
|
44
|
+
# ...
|
45
|
+
class WordList < Hash
|
46
|
+
|
47
|
+
# Create a new WordList with +default+ as default value.
|
48
|
+
def initialize default = false
|
49
|
+
super default
|
81
50
|
end
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
# keys are compared case-insensitively.
|
90
|
-
#
|
91
|
-
# Ignoring the text case is realized by sending the +downcase+ message to
|
92
|
-
# all keys.
|
93
|
-
#
|
94
|
-
# Caching usually makes a CaseIgnoringWordList faster, but it has to be
|
95
|
-
# activated explicitely.
|
96
|
-
class CaseIgnoringWordList < WordList
|
97
|
-
|
98
|
-
# Creates a new case-insensitive WordList with +default+ as default value.
|
99
|
-
#
|
100
|
-
# You can activate caching to store the results for every [] request.
|
101
|
-
# This speeds up subsequent lookups for the same word, but also
|
102
|
-
# uses memory.
|
103
|
-
def initialize default = false, caching = false
|
104
|
-
if caching
|
105
|
-
super(default, false) do |h, k|
|
106
|
-
h[k] = h.fetch k.downcase, default
|
107
|
-
end
|
108
|
-
else
|
109
|
-
super(default, false)
|
110
|
-
extend Uncached
|
51
|
+
|
52
|
+
# Add words to the list and associate them with +value+.
|
53
|
+
#
|
54
|
+
# Returns +self+, so you can concat add calls.
|
55
|
+
def add words, value = true
|
56
|
+
words.each { |word| self[word] = value }
|
57
|
+
self
|
111
58
|
end
|
59
|
+
|
112
60
|
end
|
113
61
|
|
114
|
-
|
62
|
+
|
63
|
+
# A CaseIgnoring WordList is like a WordList, only that
|
64
|
+
# keys are compared case-insensitively (normalizing keys using +downcase+).
|
65
|
+
class WordList::CaseIgnoring < WordList
|
66
|
+
|
115
67
|
def [] key
|
116
|
-
super
|
68
|
+
super key.downcase
|
117
69
|
end
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
def add words, kind = true
|
122
|
-
words.each do |word|
|
123
|
-
self[word.downcase] = kind
|
70
|
+
|
71
|
+
def []= key, value
|
72
|
+
super key.downcase, value
|
124
73
|
end
|
125
|
-
|
74
|
+
|
126
75
|
end
|
127
|
-
|
128
|
-
end
|
129
|
-
|
76
|
+
|
130
77
|
end
|
131
|
-
|
132
|
-
__END__
|
133
|
-
# check memory consumption
|
134
|
-
END {
|
135
|
-
ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
|
136
|
-
p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
|
137
|
-
end
|
138
|
-
}
|
data/lib/coderay/scanner.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'strscan'
|
2
3
|
|
3
|
-
|
4
|
+
module CodeRay
|
4
5
|
|
6
|
+
autoload :WordList, 'coderay/helpers/word_list'
|
7
|
+
|
5
8
|
# = Scanners
|
6
9
|
#
|
7
10
|
# This module holds the Scanner class and its subclasses.
|
@@ -16,9 +19,8 @@ module CodeRay
|
|
16
19
|
module Scanners
|
17
20
|
extend PluginHost
|
18
21
|
plugin_path File.dirname(__FILE__), 'scanners'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
+
|
23
|
+
|
22
24
|
# = Scanner
|
23
25
|
#
|
24
26
|
# The base class for all Scanners.
|
@@ -46,64 +48,89 @@ module CodeRay
|
|
46
48
|
|
47
49
|
extend Plugin
|
48
50
|
plugin_host Scanners
|
49
|
-
|
51
|
+
|
50
52
|
# Raised if a Scanner fails while scanning
|
51
|
-
ScanError = Class.new
|
52
|
-
|
53
|
-
require 'coderay/helpers/word_list'
|
54
|
-
|
53
|
+
ScanError = Class.new StandardError
|
54
|
+
|
55
55
|
# The default options for all scanner classes.
|
56
56
|
#
|
57
57
|
# Define @default_options for subclasses.
|
58
|
-
DEFAULT_OPTIONS = {
|
58
|
+
DEFAULT_OPTIONS = { }
|
59
|
+
|
60
|
+
KINDS_NOT_LOC = [:comment, :doctype, :docstring]
|
61
|
+
|
62
|
+
attr_accessor :state
|
59
63
|
|
60
|
-
KINDS_NOT_LOC = [:comment, :doctype]
|
61
|
-
|
62
64
|
class << self
|
63
|
-
|
64
|
-
#
|
65
|
-
|
66
|
-
|
65
|
+
|
66
|
+
# Normalizes the given code into a string with UNIX newlines, in the
|
67
|
+
# scanner's internal encoding, with invalid and undefined charachters
|
68
|
+
# replaced by placeholders. Always returns a new object.
|
69
|
+
def normalize code
|
70
|
+
# original = code
|
71
|
+
code = code.to_s unless code.is_a? ::String
|
72
|
+
return code if code.empty?
|
73
|
+
|
74
|
+
if code.respond_to? :encoding
|
75
|
+
code = encode_with_encoding code, self.encoding
|
76
|
+
else
|
77
|
+
code = to_unix code
|
78
|
+
end
|
79
|
+
# code = code.dup if code.eql? original
|
80
|
+
code
|
67
81
|
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
82
|
+
|
83
|
+
# The typical filename suffix for this scanner's language.
|
84
|
+
def file_extension extension = lang
|
85
|
+
@file_extension ||= extension.to_s
|
86
|
+
end
|
87
|
+
|
88
|
+
# The encoding used internally by this scanner.
|
89
|
+
def encoding name = 'UTF-8'
|
90
|
+
@encoding ||= defined?(Encoding.find) && Encoding.find(name)
|
91
|
+
end
|
92
|
+
|
93
|
+
# The lang of this Scanner class, which is equal to its Plugin ID.
|
94
|
+
def lang
|
95
|
+
@plugin_id
|
96
|
+
end
|
97
|
+
|
98
|
+
protected
|
99
|
+
|
100
|
+
def encode_with_encoding code, target_encoding
|
101
|
+
if code.encoding == target_encoding
|
102
|
+
if code.valid_encoding?
|
103
|
+
return to_unix(code)
|
104
|
+
else
|
105
|
+
source_encoding = guess_encoding code
|
81
106
|
end
|
107
|
+
else
|
108
|
+
source_encoding = code.encoding
|
82
109
|
end
|
83
|
-
|
110
|
+
# print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
|
111
|
+
code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
|
84
112
|
end
|
85
113
|
|
86
|
-
def
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
114
|
+
def to_unix code
|
115
|
+
code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
|
116
|
+
end
|
117
|
+
|
118
|
+
def guess_encoding s
|
119
|
+
#:nocov:
|
120
|
+
IO.popen("file -b --mime -", "w+") do |file|
|
121
|
+
file.write s[0, 1024]
|
122
|
+
file.close_write
|
123
|
+
begin
|
124
|
+
Encoding.find file.gets[/charset=([-\w]+)/, 1]
|
125
|
+
rescue ArgumentError
|
126
|
+
Encoding::BINARY
|
127
|
+
end
|
91
128
|
end
|
129
|
+
#:nocov:
|
92
130
|
end
|
93
|
-
|
131
|
+
|
94
132
|
end
|
95
|
-
|
96
|
-
=begin
|
97
|
-
## Excluded for speed reasons; protected seems to make methods slow.
|
98
|
-
|
99
|
-
# Save the StringScanner methods from being called.
|
100
|
-
# This would not be useful for highlighting.
|
101
|
-
strscan_public_methods =
|
102
|
-
StringScanner.instance_methods -
|
103
|
-
StringScanner.ancestors[1].instance_methods
|
104
|
-
protected(*strscan_public_methods)
|
105
|
-
=end
|
106
|
-
|
133
|
+
|
107
134
|
# Create a new Scanner.
|
108
135
|
#
|
109
136
|
# * +code+ is the input String and is handled by the superclass
|
@@ -111,146 +138,147 @@ module CodeRay
|
|
111
138
|
# * +options+ is a Hash with Symbols as keys.
|
112
139
|
# It is merged with the default options of the class (you can
|
113
140
|
# overwrite default options here.)
|
114
|
-
# * +block+ is the callback for streamed highlighting.
|
115
|
-
#
|
116
|
-
# If you set :stream to +true+ in the options, the Scanner uses a
|
117
|
-
# TokenStream with the +block+ as callback to handle the tokens.
|
118
141
|
#
|
119
142
|
# Else, a Tokens object is used.
|
120
|
-
def initialize code='', options = {}
|
121
|
-
|
122
|
-
"anything. :( Use my subclasses."
|
143
|
+
def initialize code = '', options = {}
|
144
|
+
if self.class == Scanner
|
145
|
+
raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
|
146
|
+
end
|
123
147
|
|
124
148
|
@options = self.class::DEFAULT_OPTIONS.merge options
|
125
|
-
|
126
|
-
super
|
127
|
-
|
128
|
-
@tokens = options[:tokens]
|
129
|
-
if @
|
130
|
-
|
131
|
-
"but no block was given" unless block_given?
|
132
|
-
raise NotStreamableError, self unless kind_of? Streamable
|
133
|
-
@tokens ||= TokenStream.new(&block)
|
134
|
-
else
|
135
|
-
warn "warning in CodeRay::Scanner.new: Block given, "\
|
136
|
-
"but :stream is #{@options[:stream]}" if block_given?
|
137
|
-
@tokens ||= Tokens.new
|
138
|
-
end
|
139
|
-
@tokens.scanner = self
|
140
|
-
|
149
|
+
|
150
|
+
super self.class.normalize(code)
|
151
|
+
|
152
|
+
@tokens = options[:tokens] || Tokens.new
|
153
|
+
@tokens.scanner = self if @tokens.respond_to? :scanner=
|
154
|
+
|
141
155
|
setup
|
142
156
|
end
|
143
|
-
|
157
|
+
|
158
|
+
# Sets back the scanner. Subclasses should redefine the reset_instance
|
159
|
+
# method instead of this one.
|
144
160
|
def reset
|
145
161
|
super
|
146
162
|
reset_instance
|
147
163
|
end
|
148
|
-
|
164
|
+
|
165
|
+
# Set a new string to be scanned.
|
149
166
|
def string= code
|
150
|
-
code =
|
151
|
-
|
152
|
-
reset_state
|
153
|
-
@string = code
|
154
|
-
else
|
155
|
-
super code
|
156
|
-
end
|
167
|
+
code = self.class.normalize(code)
|
168
|
+
super code
|
157
169
|
reset_instance
|
158
170
|
end
|
159
|
-
|
160
|
-
#
|
161
|
-
alias code string
|
162
|
-
alias code= string=
|
163
|
-
|
164
|
-
# Returns the Plugin ID for this scanner.
|
171
|
+
|
172
|
+
# the Plugin ID for this scanner
|
165
173
|
def lang
|
166
|
-
self.class.
|
174
|
+
self.class.lang
|
167
175
|
end
|
168
|
-
|
169
|
-
#
|
170
|
-
def
|
176
|
+
|
177
|
+
# the default file extension for this scanner
|
178
|
+
def file_extension
|
179
|
+
self.class.file_extension
|
180
|
+
end
|
181
|
+
|
182
|
+
# Scan the code and returns all tokens in a Tokens object.
|
183
|
+
def tokenize source = nil, options = {}
|
171
184
|
options = @options.merge(options)
|
172
|
-
|
173
|
-
@
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
185
|
+
@tokens = options[:tokens] || @tokens || Tokens.new
|
186
|
+
@tokens.scanner = self if @tokens.respond_to? :scanner=
|
187
|
+
case source
|
188
|
+
when Array
|
189
|
+
self.string = self.class.normalize(source.join)
|
190
|
+
when nil
|
191
|
+
reset
|
192
|
+
else
|
193
|
+
self.string = self.class.normalize(source)
|
194
|
+
end
|
195
|
+
|
196
|
+
begin
|
197
|
+
scan_tokens @tokens, options
|
198
|
+
rescue => e
|
199
|
+
message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
|
200
|
+
raise_inspect e.message, @tokens, message, 30, e.backtrace
|
201
|
+
end
|
202
|
+
|
203
|
+
@cached_tokens = @tokens
|
204
|
+
if source.is_a? Array
|
205
|
+
@tokens.split_into_parts(*source.map { |part| part.size })
|
206
|
+
else
|
207
|
+
@tokens
|
208
|
+
end
|
181
209
|
end
|
182
|
-
|
210
|
+
|
211
|
+
# Cache the result of tokenize.
|
183
212
|
def tokens
|
184
213
|
@cached_tokens ||= tokenize
|
185
214
|
end
|
186
215
|
|
187
|
-
#
|
188
|
-
def streaming?
|
189
|
-
!!@options[:stream]
|
190
|
-
end
|
191
|
-
|
192
|
-
# Traverses the tokens.
|
216
|
+
# Traverse the tokens.
|
193
217
|
def each &block
|
194
|
-
raise ArgumentError,
|
195
|
-
'Cannot traverse TokenStream.' if @options[:stream]
|
196
218
|
tokens.each(&block)
|
197
219
|
end
|
198
220
|
include Enumerable
|
199
|
-
|
200
|
-
# The current line position of the scanner.
|
221
|
+
|
222
|
+
# The current line position of the scanner, starting with 1.
|
223
|
+
# See also: #column.
|
201
224
|
#
|
202
225
|
# Beware, this is implemented inefficiently. It should be used
|
203
226
|
# for debugging only.
|
204
|
-
def line
|
205
|
-
|
227
|
+
def line pos = self.pos
|
228
|
+
return 1 if pos <= 0
|
229
|
+
binary_string[0...pos].count("\n") + 1
|
206
230
|
end
|
207
231
|
|
232
|
+
# The current column position of the scanner, starting with 1.
|
233
|
+
# See also: #line.
|
208
234
|
def column pos = self.pos
|
209
|
-
return
|
210
|
-
|
211
|
-
if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
|
212
|
-
@bin_string ||= string.dup.force_encoding('binary')
|
213
|
-
string = @bin_string
|
214
|
-
end
|
215
|
-
pos - (string.rindex(?\n, pos) || 0)
|
235
|
+
return 1 if pos <= 0
|
236
|
+
pos - (binary_string.rindex(?\n, pos - 1) || -1)
|
216
237
|
end
|
217
238
|
|
218
|
-
|
219
|
-
|
239
|
+
# The string in binary encoding.
|
240
|
+
#
|
241
|
+
# To be used with #pos, which is the index of the byte the scanner
|
242
|
+
# will scan next.
|
243
|
+
def binary_string
|
244
|
+
@binary_string ||=
|
245
|
+
if string.respond_to?(:bytesize) && string.bytesize != string.size
|
246
|
+
#:nocov:
|
247
|
+
string.dup.force_encoding('binary')
|
248
|
+
#:nocov:
|
249
|
+
else
|
250
|
+
string
|
251
|
+
end
|
220
252
|
end
|
221
253
|
|
222
|
-
def marshal_load options
|
223
|
-
@options = options
|
224
|
-
end
|
225
|
-
|
226
254
|
protected
|
227
|
-
|
255
|
+
|
228
256
|
# Can be implemented by subclasses to do some initialization
|
229
257
|
# that has to be done once per instance.
|
230
258
|
#
|
231
259
|
# Use reset for initialization that has to be done once per
|
232
260
|
# scan.
|
233
|
-
def setup
|
261
|
+
def setup # :doc:
|
234
262
|
end
|
235
|
-
|
263
|
+
|
236
264
|
# This is the central method, and commonly the only one a
|
237
265
|
# subclass implements.
|
238
266
|
#
|
239
267
|
# Subclasses must implement this method; it must return +tokens+
|
240
268
|
# and must only use Tokens#<< for storing scanned tokens!
|
241
|
-
def scan_tokens tokens, options
|
242
|
-
raise NotImplementedError,
|
243
|
-
"#{self.class}#scan_tokens not implemented."
|
269
|
+
def scan_tokens tokens, options # :doc:
|
270
|
+
raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
|
244
271
|
end
|
245
|
-
|
272
|
+
|
273
|
+
# Resets the scanner.
|
246
274
|
def reset_instance
|
247
|
-
@tokens.clear
|
275
|
+
@tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
|
248
276
|
@cached_tokens = nil
|
249
|
-
@
|
277
|
+
@binary_string = nil if defined? @binary_string
|
250
278
|
end
|
251
|
-
|
279
|
+
|
252
280
|
# Scanner error with additional status information
|
253
|
-
def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
|
281
|
+
def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
|
254
282
|
raise ScanError, <<-EOE % [
|
255
283
|
|
256
284
|
|
@@ -272,13 +300,13 @@ surrounding code:
|
|
272
300
|
EOE
|
273
301
|
File.basename(caller[0]),
|
274
302
|
msg,
|
275
|
-
tokens.size,
|
276
|
-
tokens.last(10).map { |t| t.inspect }.join("\n"),
|
303
|
+
tokens.respond_to?(:size) ? tokens.size : 0,
|
304
|
+
tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
|
277
305
|
line, column, pos,
|
278
306
|
matched, state, bol?, eos?,
|
279
|
-
|
280
|
-
|
281
|
-
]
|
307
|
+
binary_string[pos - ambit, ambit],
|
308
|
+
binary_string[pos, ambit],
|
309
|
+
], backtrace
|
282
310
|
end
|
283
311
|
|
284
312
|
# Shorthand for scan_until(/\z/).
|
@@ -288,19 +316,8 @@ surrounding code:
|
|
288
316
|
terminate
|
289
317
|
rest
|
290
318
|
end
|
291
|
-
|
292
|
-
end
|
293
|
-
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
class String
|
298
|
-
# I love this hack. It seems to silence all dos/unix/mac newline problems.
|
299
|
-
def to_unix
|
300
|
-
if index ?\r
|
301
|
-
gsub(/\r\n?/, "\n")
|
302
|
-
else
|
303
|
-
self
|
319
|
+
|
304
320
|
end
|
321
|
+
|
305
322
|
end
|
306
|
-
end
|
323
|
+
end
|