coderay 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -1,138 +1,77 @@
1
1
  module CodeRay
2
-
3
- # = WordList
4
- #
5
- # <b>A Hash subclass designed for mapping word lists to token types.</b>
6
- #
7
- # Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
- #
9
- # License:: LGPL / ask the author
10
- # Version:: 1.1 (2006-Oct-19)
11
- #
12
- # A WordList is a Hash with some additional features.
13
- # It is intended to be used for keyword recognition.
14
- #
15
- # WordList is highly optimized to be used in Scanners,
16
- # typically to decide whether a given ident is a special token.
17
- #
18
- # For case insensitive words use CaseIgnoringWordList.
19
- #
20
- # Example:
21
- #
22
- # # define word arrays
23
- # RESERVED_WORDS = %w[
24
- # asm break case continue default do else
25
- # ...
26
- # ]
27
- #
28
- # PREDEFINED_TYPES = %w[
29
- # int long short char void
30
- # ...
31
- # ]
32
- #
33
- # PREDEFINED_CONSTANTS = %w[
34
- # EOF NULL ...
35
- # ]
36
- #
37
- # # make a WordList
38
- # IDENT_KIND = WordList.new(:ident).
39
- # add(RESERVED_WORDS, :reserved).
40
- # add(PREDEFINED_TYPES, :pre_type).
41
- # add(PREDEFINED_CONSTANTS, :pre_constant)
42
- #
43
- # ...
44
- #
45
- # def scan_tokens tokens, options
46
- # ...
47
- #
48
- # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
49
- # # use it
50
- # kind = IDENT_KIND[match]
51
- # ...
52
- class WordList < Hash
53
-
54
- # Creates a new WordList with +default+ as default value.
55
- #
56
- # You can activate +caching+ to store the results for every [] request.
2
+
3
+ # = WordList
57
4
  #
58
- # With caching, methods like +include?+ or +delete+ may no longer behave
59
- # as you expect. Therefore, it is recommended to use the [] method only.
60
- def initialize default = false, caching = false, &block
61
- if block
62
- raise ArgumentError, 'Can\'t combine block with caching.' if caching
63
- super(&block)
64
- else
65
- if caching
66
- super() do |h, k|
67
- h[k] = h.fetch k, default
68
- end
69
- else
70
- super default
71
- end
72
- end
73
- end
74
-
75
- # Add words to the list and associate them with +kind+.
5
+ # <b>A Hash subclass designed for mapping word lists to token types.</b>
76
6
  #
77
- # Returns +self+, so you can concat add calls.
78
- def add words, kind = true
79
- words.each do |word|
80
- self[word] = kind
7
+ # Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
+ #
9
+ # License:: LGPL / ask the author
10
+ # Version:: 2.0 (2011-05-08)
11
+ #
12
+ # A WordList is a Hash with some additional features.
13
+ # It is intended to be used for keyword recognition.
14
+ #
15
+ # WordList is optimized to be used in Scanners,
16
+ # typically to decide whether a given ident is a special token.
17
+ #
18
+ # For case insensitive words use WordList::CaseIgnoring.
19
+ #
20
+ # Example:
21
+ #
22
+ # # define word arrays
23
+ # RESERVED_WORDS = %w[
24
+ # asm break case continue default do else
25
+ # ]
26
+ #
27
+ # PREDEFINED_TYPES = %w[
28
+ # int long short char void
29
+ # ]
30
+ #
31
+ # # make a WordList
32
+ # IDENT_KIND = WordList.new(:ident).
33
+ # add(RESERVED_WORDS, :reserved).
34
+ # add(PREDEFINED_TYPES, :predefined_type)
35
+ #
36
+ # ...
37
+ #
38
+ # def scan_tokens tokens, options
39
+ # ...
40
+ #
41
+ # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
42
+ # # use it
43
+ # kind = IDENT_KIND[match]
44
+ # ...
45
+ class WordList < Hash
46
+
47
+ # Create a new WordList with +default+ as default value.
48
+ def initialize default = false
49
+ super default
81
50
  end
82
- self
83
- end
84
-
85
- end
86
-
87
-
88
- # A CaseIgnoringWordList is like a WordList, only that
89
- # keys are compared case-insensitively.
90
- #
91
- # Ignoring the text case is realized by sending the +downcase+ message to
92
- # all keys.
93
- #
94
- # Caching usually makes a CaseIgnoringWordList faster, but it has to be
95
- # activated explicitely.
96
- class CaseIgnoringWordList < WordList
97
-
98
- # Creates a new case-insensitive WordList with +default+ as default value.
99
- #
100
- # You can activate caching to store the results for every [] request.
101
- # This speeds up subsequent lookups for the same word, but also
102
- # uses memory.
103
- def initialize default = false, caching = false
104
- if caching
105
- super(default, false) do |h, k|
106
- h[k] = h.fetch k.downcase, default
107
- end
108
- else
109
- super(default, false)
110
- extend Uncached
51
+
52
+ # Add words to the list and associate them with +value+.
53
+ #
54
+ # Returns +self+, so you can concat add calls.
55
+ def add words, value = true
56
+ words.each { |word| self[word] = value }
57
+ self
111
58
  end
59
+
112
60
  end
113
61
 
114
- module Uncached # :nodoc:
62
+
63
+ # A CaseIgnoring WordList is like a WordList, only that
64
+ # keys are compared case-insensitively (normalizing keys using +downcase+).
65
+ class WordList::CaseIgnoring < WordList
66
+
115
67
  def [] key
116
- super(key.downcase)
68
+ super key.downcase
117
69
  end
118
- end
119
-
120
- # Add +words+ to the list and associate them with +kind+.
121
- def add words, kind = true
122
- words.each do |word|
123
- self[word.downcase] = kind
70
+
71
+ def []= key, value
72
+ super key.downcase, value
124
73
  end
125
- self
74
+
126
75
  end
127
-
128
- end
129
-
76
+
130
77
  end
131
-
132
- __END__
133
- # check memory consumption
134
- END {
135
- ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
136
- p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
137
- end
138
- }
@@ -1,7 +1,10 @@
1
- module CodeRay
1
+ # encoding: utf-8
2
+ require 'strscan'
2
3
 
3
- require 'coderay/helpers/plugin'
4
+ module CodeRay
4
5
 
6
+ autoload :WordList, 'coderay/helpers/word_list'
7
+
5
8
  # = Scanners
6
9
  #
7
10
  # This module holds the Scanner class and its subclasses.
@@ -16,9 +19,8 @@ module CodeRay
16
19
  module Scanners
17
20
  extend PluginHost
18
21
  plugin_path File.dirname(__FILE__), 'scanners'
19
-
20
- require 'strscan'
21
-
22
+
23
+
22
24
  # = Scanner
23
25
  #
24
26
  # The base class for all Scanners.
@@ -46,64 +48,89 @@ module CodeRay
46
48
 
47
49
  extend Plugin
48
50
  plugin_host Scanners
49
-
51
+
50
52
  # Raised if a Scanner fails while scanning
51
- ScanError = Class.new(Exception)
52
-
53
- require 'coderay/helpers/word_list'
54
-
53
+ ScanError = Class.new StandardError
54
+
55
55
  # The default options for all scanner classes.
56
56
  #
57
57
  # Define @default_options for subclasses.
58
- DEFAULT_OPTIONS = { :stream => false }
58
+ DEFAULT_OPTIONS = { }
59
+
60
+ KINDS_NOT_LOC = [:comment, :doctype, :docstring]
61
+
62
+ attr_accessor :state
59
63
 
60
- KINDS_NOT_LOC = [:comment, :doctype]
61
-
62
64
  class << self
63
-
64
- # Returns if the Scanner can be used in streaming mode.
65
- def streamable?
66
- is_a? Streamable
65
+
66
+ # Normalizes the given code into a string with UNIX newlines, in the
67
+ # scanner's internal encoding, with invalid and undefined charachters
68
+ # replaced by placeholders. Always returns a new object.
69
+ def normalize code
70
+ # original = code
71
+ code = code.to_s unless code.is_a? ::String
72
+ return code if code.empty?
73
+
74
+ if code.respond_to? :encoding
75
+ code = encode_with_encoding code, self.encoding
76
+ else
77
+ code = to_unix code
78
+ end
79
+ # code = code.dup if code.eql? original
80
+ code
67
81
  end
68
-
69
- def normify code
70
- code = code.to_s
71
- if code.respond_to?(:encoding) && (code.encoding.name != 'UTF-8' || !code.valid_encoding?)
72
- code = code.dup
73
- original_encoding = code.encoding
74
- code.force_encoding 'Windows-1252'
75
- unless code.valid_encoding?
76
- code.force_encoding original_encoding
77
- if code.encoding.name == 'UTF-8'
78
- code.encode! 'UTF-16BE', :invalid => :replace, :undef => :replace, :replace => '?'
79
- end
80
- code.encode! 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '?'
82
+
83
+ # The typical filename suffix for this scanner's language.
84
+ def file_extension extension = lang
85
+ @file_extension ||= extension.to_s
86
+ end
87
+
88
+ # The encoding used internally by this scanner.
89
+ def encoding name = 'UTF-8'
90
+ @encoding ||= defined?(Encoding.find) && Encoding.find(name)
91
+ end
92
+
93
+ # The lang of this Scanner class, which is equal to its Plugin ID.
94
+ def lang
95
+ @plugin_id
96
+ end
97
+
98
+ protected
99
+
100
+ def encode_with_encoding code, target_encoding
101
+ if code.encoding == target_encoding
102
+ if code.valid_encoding?
103
+ return to_unix(code)
104
+ else
105
+ source_encoding = guess_encoding code
81
106
  end
107
+ else
108
+ source_encoding = code.encoding
82
109
  end
83
- code.to_unix
110
+ # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
111
+ code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
84
112
  end
85
113
 
86
- def file_extension extension = nil
87
- if extension
88
- @file_extension = extension.to_s
89
- else
90
- @file_extension ||= plugin_id.to_s
114
+ def to_unix code
115
+ code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
116
+ end
117
+
118
+ def guess_encoding s
119
+ #:nocov:
120
+ IO.popen("file -b --mime -", "w+") do |file|
121
+ file.write s[0, 1024]
122
+ file.close_write
123
+ begin
124
+ Encoding.find file.gets[/charset=([-\w]+)/, 1]
125
+ rescue ArgumentError
126
+ Encoding::BINARY
127
+ end
91
128
  end
129
+ #:nocov:
92
130
  end
93
-
131
+
94
132
  end
95
-
96
- =begin
97
- ## Excluded for speed reasons; protected seems to make methods slow.
98
-
99
- # Save the StringScanner methods from being called.
100
- # This would not be useful for highlighting.
101
- strscan_public_methods =
102
- StringScanner.instance_methods -
103
- StringScanner.ancestors[1].instance_methods
104
- protected(*strscan_public_methods)
105
- =end
106
-
133
+
107
134
  # Create a new Scanner.
108
135
  #
109
136
  # * +code+ is the input String and is handled by the superclass
@@ -111,146 +138,147 @@ module CodeRay
111
138
  # * +options+ is a Hash with Symbols as keys.
112
139
  # It is merged with the default options of the class (you can
113
140
  # overwrite default options here.)
114
- # * +block+ is the callback for streamed highlighting.
115
- #
116
- # If you set :stream to +true+ in the options, the Scanner uses a
117
- # TokenStream with the +block+ as callback to handle the tokens.
118
141
  #
119
142
  # Else, a Tokens object is used.
120
- def initialize code='', options = {}, &block
121
- raise "I am only the basic Scanner class. I can't scan "\
122
- "anything. :( Use my subclasses." if self.class == Scanner
143
+ def initialize code = '', options = {}
144
+ if self.class == Scanner
145
+ raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
146
+ end
123
147
 
124
148
  @options = self.class::DEFAULT_OPTIONS.merge options
125
-
126
- super Scanner.normify(code)
127
-
128
- @tokens = options[:tokens]
129
- if @options[:stream]
130
- warn "warning in CodeRay::Scanner.new: :stream is set, "\
131
- "but no block was given" unless block_given?
132
- raise NotStreamableError, self unless kind_of? Streamable
133
- @tokens ||= TokenStream.new(&block)
134
- else
135
- warn "warning in CodeRay::Scanner.new: Block given, "\
136
- "but :stream is #{@options[:stream]}" if block_given?
137
- @tokens ||= Tokens.new
138
- end
139
- @tokens.scanner = self
140
-
149
+
150
+ super self.class.normalize(code)
151
+
152
+ @tokens = options[:tokens] || Tokens.new
153
+ @tokens.scanner = self if @tokens.respond_to? :scanner=
154
+
141
155
  setup
142
156
  end
143
-
157
+
158
+ # Sets back the scanner. Subclasses should redefine the reset_instance
159
+ # method instead of this one.
144
160
  def reset
145
161
  super
146
162
  reset_instance
147
163
  end
148
-
164
+
165
+ # Set a new string to be scanned.
149
166
  def string= code
150
- code = Scanner.normify(code)
151
- if defined?(RUBY_DESCRIPTION) && RUBY_DESCRIPTION['rubinius 1.0.1']
152
- reset_state
153
- @string = code
154
- else
155
- super code
156
- end
167
+ code = self.class.normalize(code)
168
+ super code
157
169
  reset_instance
158
170
  end
159
-
160
- # More mnemonic accessor name for the input string.
161
- alias code string
162
- alias code= string=
163
-
164
- # Returns the Plugin ID for this scanner.
171
+
172
+ # the Plugin ID for this scanner
165
173
  def lang
166
- self.class.plugin_id
174
+ self.class.lang
167
175
  end
168
-
169
- # Scans the code and returns all tokens in a Tokens object.
170
- def tokenize new_string=nil, options = {}
176
+
177
+ # the default file extension for this scanner
178
+ def file_extension
179
+ self.class.file_extension
180
+ end
181
+
182
+ # Scan the code and returns all tokens in a Tokens object.
183
+ def tokenize source = nil, options = {}
171
184
  options = @options.merge(options)
172
- self.string = new_string if new_string
173
- @cached_tokens =
174
- if @options[:stream] # :stream must have been set already
175
- reset unless new_string
176
- scan_tokens @tokens, options
177
- @tokens
178
- else
179
- scan_tokens @tokens, options
180
- end
185
+ @tokens = options[:tokens] || @tokens || Tokens.new
186
+ @tokens.scanner = self if @tokens.respond_to? :scanner=
187
+ case source
188
+ when Array
189
+ self.string = self.class.normalize(source.join)
190
+ when nil
191
+ reset
192
+ else
193
+ self.string = self.class.normalize(source)
194
+ end
195
+
196
+ begin
197
+ scan_tokens @tokens, options
198
+ rescue => e
199
+ message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
200
+ raise_inspect e.message, @tokens, message, 30, e.backtrace
201
+ end
202
+
203
+ @cached_tokens = @tokens
204
+ if source.is_a? Array
205
+ @tokens.split_into_parts(*source.map { |part| part.size })
206
+ else
207
+ @tokens
208
+ end
181
209
  end
182
-
210
+
211
+ # Cache the result of tokenize.
183
212
  def tokens
184
213
  @cached_tokens ||= tokenize
185
214
  end
186
215
 
187
- # Whether the scanner is in streaming mode.
188
- def streaming?
189
- !!@options[:stream]
190
- end
191
-
192
- # Traverses the tokens.
216
+ # Traverse the tokens.
193
217
  def each &block
194
- raise ArgumentError,
195
- 'Cannot traverse TokenStream.' if @options[:stream]
196
218
  tokens.each(&block)
197
219
  end
198
220
  include Enumerable
199
-
200
- # The current line position of the scanner.
221
+
222
+ # The current line position of the scanner, starting with 1.
223
+ # See also: #column.
201
224
  #
202
225
  # Beware, this is implemented inefficiently. It should be used
203
226
  # for debugging only.
204
- def line
205
- string[0..pos].count("\n") + 1
227
+ def line pos = self.pos
228
+ return 1 if pos <= 0
229
+ binary_string[0...pos].count("\n") + 1
206
230
  end
207
231
 
232
+ # The current column position of the scanner, starting with 1.
233
+ # See also: #line.
208
234
  def column pos = self.pos
209
- return 0 if pos <= 0
210
- string = string()
211
- if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
212
- @bin_string ||= string.dup.force_encoding('binary')
213
- string = @bin_string
214
- end
215
- pos - (string.rindex(?\n, pos) || 0)
235
+ return 1 if pos <= 0
236
+ pos - (binary_string.rindex(?\n, pos - 1) || -1)
216
237
  end
217
238
 
218
- def marshal_dump
219
- @options
239
+ # The string in binary encoding.
240
+ #
241
+ # To be used with #pos, which is the index of the byte the scanner
242
+ # will scan next.
243
+ def binary_string
244
+ @binary_string ||=
245
+ if string.respond_to?(:bytesize) && string.bytesize != string.size
246
+ #:nocov:
247
+ string.dup.force_encoding('binary')
248
+ #:nocov:
249
+ else
250
+ string
251
+ end
220
252
  end
221
253
 
222
- def marshal_load options
223
- @options = options
224
- end
225
-
226
254
  protected
227
-
255
+
228
256
  # Can be implemented by subclasses to do some initialization
229
257
  # that has to be done once per instance.
230
258
  #
231
259
  # Use reset for initialization that has to be done once per
232
260
  # scan.
233
- def setup
261
+ def setup # :doc:
234
262
  end
235
-
263
+
236
264
  # This is the central method, and commonly the only one a
237
265
  # subclass implements.
238
266
  #
239
267
  # Subclasses must implement this method; it must return +tokens+
240
268
  # and must only use Tokens#<< for storing scanned tokens!
241
- def scan_tokens tokens, options
242
- raise NotImplementedError,
243
- "#{self.class}#scan_tokens not implemented."
269
+ def scan_tokens tokens, options # :doc:
270
+ raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
244
271
  end
245
-
272
+
273
+ # Resets the scanner.
246
274
  def reset_instance
247
- @tokens.clear unless @options[:keep_tokens]
275
+ @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
248
276
  @cached_tokens = nil
249
- @bin_string = nil if defined? @bin_string
277
+ @binary_string = nil if defined? @binary_string
250
278
  end
251
-
279
+
252
280
  # Scanner error with additional status information
253
- def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
281
+ def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
254
282
  raise ScanError, <<-EOE % [
255
283
 
256
284
 
@@ -272,13 +300,13 @@ surrounding code:
272
300
  EOE
273
301
  File.basename(caller[0]),
274
302
  msg,
275
- tokens.size,
276
- tokens.last(10).map { |t| t.inspect }.join("\n"),
303
+ tokens.respond_to?(:size) ? tokens.size : 0,
304
+ tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
277
305
  line, column, pos,
278
306
  matched, state, bol?, eos?,
279
- string[pos - ambit, ambit],
280
- string[pos, ambit],
281
- ]
307
+ binary_string[pos - ambit, ambit],
308
+ binary_string[pos, ambit],
309
+ ], backtrace
282
310
  end
283
311
 
284
312
  # Shorthand for scan_until(/\z/).
@@ -288,19 +316,8 @@ surrounding code:
288
316
  terminate
289
317
  rest
290
318
  end
291
-
292
- end
293
-
294
- end
295
- end
296
-
297
- class String
298
- # I love this hack. It seems to silence all dos/unix/mac newline problems.
299
- def to_unix
300
- if index ?\r
301
- gsub(/\r\n?/, "\n")
302
- else
303
- self
319
+
304
320
  end
321
+
305
322
  end
306
- end
323
+ end