coderay 0.9.8 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -1,138 +1,77 @@
1
1
  module CodeRay
2
-
3
- # = WordList
4
- #
5
- # <b>A Hash subclass designed for mapping word lists to token types.</b>
6
- #
7
- # Copyright (c) 2006 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
- #
9
- # License:: LGPL / ask the author
10
- # Version:: 1.1 (2006-Oct-19)
11
- #
12
- # A WordList is a Hash with some additional features.
13
- # It is intended to be used for keyword recognition.
14
- #
15
- # WordList is highly optimized to be used in Scanners,
16
- # typically to decide whether a given ident is a special token.
17
- #
18
- # For case insensitive words use CaseIgnoringWordList.
19
- #
20
- # Example:
21
- #
22
- # # define word arrays
23
- # RESERVED_WORDS = %w[
24
- # asm break case continue default do else
25
- # ...
26
- # ]
27
- #
28
- # PREDEFINED_TYPES = %w[
29
- # int long short char void
30
- # ...
31
- # ]
32
- #
33
- # PREDEFINED_CONSTANTS = %w[
34
- # EOF NULL ...
35
- # ]
36
- #
37
- # # make a WordList
38
- # IDENT_KIND = WordList.new(:ident).
39
- # add(RESERVED_WORDS, :reserved).
40
- # add(PREDEFINED_TYPES, :pre_type).
41
- # add(PREDEFINED_CONSTANTS, :pre_constant)
42
- #
43
- # ...
44
- #
45
- # def scan_tokens tokens, options
46
- # ...
47
- #
48
- # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
49
- # # use it
50
- # kind = IDENT_KIND[match]
51
- # ...
52
- class WordList < Hash
53
-
54
- # Creates a new WordList with +default+ as default value.
55
- #
56
- # You can activate +caching+ to store the results for every [] request.
2
+
3
+ # = WordList
57
4
  #
58
- # With caching, methods like +include?+ or +delete+ may no longer behave
59
- # as you expect. Therefore, it is recommended to use the [] method only.
60
- def initialize default = false, caching = false, &block
61
- if block
62
- raise ArgumentError, 'Can\'t combine block with caching.' if caching
63
- super(&block)
64
- else
65
- if caching
66
- super() do |h, k|
67
- h[k] = h.fetch k, default
68
- end
69
- else
70
- super default
71
- end
72
- end
73
- end
74
-
75
- # Add words to the list and associate them with +kind+.
5
+ # <b>A Hash subclass designed for mapping word lists to token types.</b>
76
6
  #
77
- # Returns +self+, so you can concat add calls.
78
- def add words, kind = true
79
- words.each do |word|
80
- self[word] = kind
7
+ # Copyright (c) 2006-2011 by murphy (Kornelius Kalnbach) <murphy rubychan de>
8
+ #
9
+ # License:: LGPL / ask the author
10
+ # Version:: 2.0 (2011-05-08)
11
+ #
12
+ # A WordList is a Hash with some additional features.
13
+ # It is intended to be used for keyword recognition.
14
+ #
15
+ # WordList is optimized to be used in Scanners,
16
+ # typically to decide whether a given ident is a special token.
17
+ #
18
+ # For case insensitive words use WordList::CaseIgnoring.
19
+ #
20
+ # Example:
21
+ #
22
+ # # define word arrays
23
+ # RESERVED_WORDS = %w[
24
+ # asm break case continue default do else
25
+ # ]
26
+ #
27
+ # PREDEFINED_TYPES = %w[
28
+ # int long short char void
29
+ # ]
30
+ #
31
+ # # make a WordList
32
+ # IDENT_KIND = WordList.new(:ident).
33
+ # add(RESERVED_WORDS, :reserved).
34
+ # add(PREDEFINED_TYPES, :predefined_type)
35
+ #
36
+ # ...
37
+ #
38
+ # def scan_tokens tokens, options
39
+ # ...
40
+ #
41
+ # elsif scan(/[A-Za-z_][A-Za-z_0-9]*/)
42
+ # # use it
43
+ # kind = IDENT_KIND[match]
44
+ # ...
45
+ class WordList < Hash
46
+
47
+ # Create a new WordList with +default+ as default value.
48
+ def initialize default = false
49
+ super default
81
50
  end
82
- self
83
- end
84
-
85
- end
86
-
87
-
88
- # A CaseIgnoringWordList is like a WordList, only that
89
- # keys are compared case-insensitively.
90
- #
91
- # Ignoring the text case is realized by sending the +downcase+ message to
92
- # all keys.
93
- #
94
- # Caching usually makes a CaseIgnoringWordList faster, but it has to be
95
- # activated explicitely.
96
- class CaseIgnoringWordList < WordList
97
-
98
- # Creates a new case-insensitive WordList with +default+ as default value.
99
- #
100
- # You can activate caching to store the results for every [] request.
101
- # This speeds up subsequent lookups for the same word, but also
102
- # uses memory.
103
- def initialize default = false, caching = false
104
- if caching
105
- super(default, false) do |h, k|
106
- h[k] = h.fetch k.downcase, default
107
- end
108
- else
109
- super(default, false)
110
- extend Uncached
51
+
52
+ # Add words to the list and associate them with +value+.
53
+ #
54
+ # Returns +self+, so you can concat add calls.
55
+ def add words, value = true
56
+ words.each { |word| self[word] = value }
57
+ self
111
58
  end
59
+
112
60
  end
113
61
 
114
- module Uncached # :nodoc:
62
+
63
+ # A CaseIgnoring WordList is like a WordList, only that
64
+ # keys are compared case-insensitively (normalizing keys using +downcase+).
65
+ class WordList::CaseIgnoring < WordList
66
+
115
67
  def [] key
116
- super(key.downcase)
68
+ super key.downcase
117
69
  end
118
- end
119
-
120
- # Add +words+ to the list and associate them with +kind+.
121
- def add words, kind = true
122
- words.each do |word|
123
- self[word.downcase] = kind
70
+
71
+ def []= key, value
72
+ super key.downcase, value
124
73
  end
125
- self
74
+
126
75
  end
127
-
128
- end
129
-
76
+
130
77
  end
131
-
132
- __END__
133
- # check memory consumption
134
- END {
135
- ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
136
- p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
137
- end
138
- }
@@ -1,7 +1,10 @@
1
- module CodeRay
1
+ # encoding: utf-8
2
+ require 'strscan'
2
3
 
3
- require 'coderay/helpers/plugin'
4
+ module CodeRay
4
5
 
6
+ autoload :WordList, 'coderay/helpers/word_list'
7
+
5
8
  # = Scanners
6
9
  #
7
10
  # This module holds the Scanner class and its subclasses.
@@ -16,9 +19,8 @@ module CodeRay
16
19
  module Scanners
17
20
  extend PluginHost
18
21
  plugin_path File.dirname(__FILE__), 'scanners'
19
-
20
- require 'strscan'
21
-
22
+
23
+
22
24
  # = Scanner
23
25
  #
24
26
  # The base class for all Scanners.
@@ -46,64 +48,89 @@ module CodeRay
46
48
 
47
49
  extend Plugin
48
50
  plugin_host Scanners
49
-
51
+
50
52
  # Raised if a Scanner fails while scanning
51
- ScanError = Class.new(Exception)
52
-
53
- require 'coderay/helpers/word_list'
54
-
53
+ ScanError = Class.new StandardError
54
+
55
55
  # The default options for all scanner classes.
56
56
  #
57
57
  # Define @default_options for subclasses.
58
- DEFAULT_OPTIONS = { :stream => false }
58
+ DEFAULT_OPTIONS = { }
59
+
60
+ KINDS_NOT_LOC = [:comment, :doctype, :docstring]
61
+
62
+ attr_accessor :state
59
63
 
60
- KINDS_NOT_LOC = [:comment, :doctype]
61
-
62
64
  class << self
63
-
64
- # Returns if the Scanner can be used in streaming mode.
65
- def streamable?
66
- is_a? Streamable
65
+
66
+ # Normalizes the given code into a string with UNIX newlines, in the
67
+ # scanner's internal encoding, with invalid and undefined charachters
68
+ # replaced by placeholders. Always returns a new object.
69
+ def normalize code
70
+ # original = code
71
+ code = code.to_s unless code.is_a? ::String
72
+ return code if code.empty?
73
+
74
+ if code.respond_to? :encoding
75
+ code = encode_with_encoding code, self.encoding
76
+ else
77
+ code = to_unix code
78
+ end
79
+ # code = code.dup if code.eql? original
80
+ code
67
81
  end
68
-
69
- def normify code
70
- code = code.to_s
71
- if code.respond_to?(:encoding) && (code.encoding.name != 'UTF-8' || !code.valid_encoding?)
72
- code = code.dup
73
- original_encoding = code.encoding
74
- code.force_encoding 'Windows-1252'
75
- unless code.valid_encoding?
76
- code.force_encoding original_encoding
77
- if code.encoding.name == 'UTF-8'
78
- code.encode! 'UTF-16BE', :invalid => :replace, :undef => :replace, :replace => '?'
79
- end
80
- code.encode! 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '?'
82
+
83
+ # The typical filename suffix for this scanner's language.
84
+ def file_extension extension = lang
85
+ @file_extension ||= extension.to_s
86
+ end
87
+
88
+ # The encoding used internally by this scanner.
89
+ def encoding name = 'UTF-8'
90
+ @encoding ||= defined?(Encoding.find) && Encoding.find(name)
91
+ end
92
+
93
+ # The lang of this Scanner class, which is equal to its Plugin ID.
94
+ def lang
95
+ @plugin_id
96
+ end
97
+
98
+ protected
99
+
100
+ def encode_with_encoding code, target_encoding
101
+ if code.encoding == target_encoding
102
+ if code.valid_encoding?
103
+ return to_unix(code)
104
+ else
105
+ source_encoding = guess_encoding code
81
106
  end
107
+ else
108
+ source_encoding = code.encoding
82
109
  end
83
- code.to_unix
110
+ # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
111
+ code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
84
112
  end
85
113
 
86
- def file_extension extension = nil
87
- if extension
88
- @file_extension = extension.to_s
89
- else
90
- @file_extension ||= plugin_id.to_s
114
+ def to_unix code
115
+ code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
116
+ end
117
+
118
+ def guess_encoding s
119
+ #:nocov:
120
+ IO.popen("file -b --mime -", "w+") do |file|
121
+ file.write s[0, 1024]
122
+ file.close_write
123
+ begin
124
+ Encoding.find file.gets[/charset=([-\w]+)/, 1]
125
+ rescue ArgumentError
126
+ Encoding::BINARY
127
+ end
91
128
  end
129
+ #:nocov:
92
130
  end
93
-
131
+
94
132
  end
95
-
96
- =begin
97
- ## Excluded for speed reasons; protected seems to make methods slow.
98
-
99
- # Save the StringScanner methods from being called.
100
- # This would not be useful for highlighting.
101
- strscan_public_methods =
102
- StringScanner.instance_methods -
103
- StringScanner.ancestors[1].instance_methods
104
- protected(*strscan_public_methods)
105
- =end
106
-
133
+
107
134
  # Create a new Scanner.
108
135
  #
109
136
  # * +code+ is the input String and is handled by the superclass
@@ -111,146 +138,147 @@ module CodeRay
111
138
  # * +options+ is a Hash with Symbols as keys.
112
139
  # It is merged with the default options of the class (you can
113
140
  # overwrite default options here.)
114
- # * +block+ is the callback for streamed highlighting.
115
- #
116
- # If you set :stream to +true+ in the options, the Scanner uses a
117
- # TokenStream with the +block+ as callback to handle the tokens.
118
141
  #
119
142
  # Else, a Tokens object is used.
120
- def initialize code='', options = {}, &block
121
- raise "I am only the basic Scanner class. I can't scan "\
122
- "anything. :( Use my subclasses." if self.class == Scanner
143
+ def initialize code = '', options = {}
144
+ if self.class == Scanner
145
+ raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
146
+ end
123
147
 
124
148
  @options = self.class::DEFAULT_OPTIONS.merge options
125
-
126
- super Scanner.normify(code)
127
-
128
- @tokens = options[:tokens]
129
- if @options[:stream]
130
- warn "warning in CodeRay::Scanner.new: :stream is set, "\
131
- "but no block was given" unless block_given?
132
- raise NotStreamableError, self unless kind_of? Streamable
133
- @tokens ||= TokenStream.new(&block)
134
- else
135
- warn "warning in CodeRay::Scanner.new: Block given, "\
136
- "but :stream is #{@options[:stream]}" if block_given?
137
- @tokens ||= Tokens.new
138
- end
139
- @tokens.scanner = self
140
-
149
+
150
+ super self.class.normalize(code)
151
+
152
+ @tokens = options[:tokens] || Tokens.new
153
+ @tokens.scanner = self if @tokens.respond_to? :scanner=
154
+
141
155
  setup
142
156
  end
143
-
157
+
158
+ # Sets back the scanner. Subclasses should redefine the reset_instance
159
+ # method instead of this one.
144
160
  def reset
145
161
  super
146
162
  reset_instance
147
163
  end
148
-
164
+
165
+ # Set a new string to be scanned.
149
166
  def string= code
150
- code = Scanner.normify(code)
151
- if defined?(RUBY_DESCRIPTION) && RUBY_DESCRIPTION['rubinius 1.0.1']
152
- reset_state
153
- @string = code
154
- else
155
- super code
156
- end
167
+ code = self.class.normalize(code)
168
+ super code
157
169
  reset_instance
158
170
  end
159
-
160
- # More mnemonic accessor name for the input string.
161
- alias code string
162
- alias code= string=
163
-
164
- # Returns the Plugin ID for this scanner.
171
+
172
+ # the Plugin ID for this scanner
165
173
  def lang
166
- self.class.plugin_id
174
+ self.class.lang
167
175
  end
168
-
169
- # Scans the code and returns all tokens in a Tokens object.
170
- def tokenize new_string=nil, options = {}
176
+
177
+ # the default file extension for this scanner
178
+ def file_extension
179
+ self.class.file_extension
180
+ end
181
+
182
+ # Scan the code and returns all tokens in a Tokens object.
183
+ def tokenize source = nil, options = {}
171
184
  options = @options.merge(options)
172
- self.string = new_string if new_string
173
- @cached_tokens =
174
- if @options[:stream] # :stream must have been set already
175
- reset unless new_string
176
- scan_tokens @tokens, options
177
- @tokens
178
- else
179
- scan_tokens @tokens, options
180
- end
185
+ @tokens = options[:tokens] || @tokens || Tokens.new
186
+ @tokens.scanner = self if @tokens.respond_to? :scanner=
187
+ case source
188
+ when Array
189
+ self.string = self.class.normalize(source.join)
190
+ when nil
191
+ reset
192
+ else
193
+ self.string = self.class.normalize(source)
194
+ end
195
+
196
+ begin
197
+ scan_tokens @tokens, options
198
+ rescue => e
199
+ message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
200
+ raise_inspect e.message, @tokens, message, 30, e.backtrace
201
+ end
202
+
203
+ @cached_tokens = @tokens
204
+ if source.is_a? Array
205
+ @tokens.split_into_parts(*source.map { |part| part.size })
206
+ else
207
+ @tokens
208
+ end
181
209
  end
182
-
210
+
211
+ # Cache the result of tokenize.
183
212
  def tokens
184
213
  @cached_tokens ||= tokenize
185
214
  end
186
215
 
187
- # Whether the scanner is in streaming mode.
188
- def streaming?
189
- !!@options[:stream]
190
- end
191
-
192
- # Traverses the tokens.
216
+ # Traverse the tokens.
193
217
  def each &block
194
- raise ArgumentError,
195
- 'Cannot traverse TokenStream.' if @options[:stream]
196
218
  tokens.each(&block)
197
219
  end
198
220
  include Enumerable
199
-
200
- # The current line position of the scanner.
221
+
222
+ # The current line position of the scanner, starting with 1.
223
+ # See also: #column.
201
224
  #
202
225
  # Beware, this is implemented inefficiently. It should be used
203
226
  # for debugging only.
204
- def line
205
- string[0..pos].count("\n") + 1
227
+ def line pos = self.pos
228
+ return 1 if pos <= 0
229
+ binary_string[0...pos].count("\n") + 1
206
230
  end
207
231
 
232
+ # The current column position of the scanner, starting with 1.
233
+ # See also: #line.
208
234
  def column pos = self.pos
209
- return 0 if pos <= 0
210
- string = string()
211
- if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
212
- @bin_string ||= string.dup.force_encoding('binary')
213
- string = @bin_string
214
- end
215
- pos - (string.rindex(?\n, pos) || 0)
235
+ return 1 if pos <= 0
236
+ pos - (binary_string.rindex(?\n, pos - 1) || -1)
216
237
  end
217
238
 
218
- def marshal_dump
219
- @options
239
+ # The string in binary encoding.
240
+ #
241
+ # To be used with #pos, which is the index of the byte the scanner
242
+ # will scan next.
243
+ def binary_string
244
+ @binary_string ||=
245
+ if string.respond_to?(:bytesize) && string.bytesize != string.size
246
+ #:nocov:
247
+ string.dup.force_encoding('binary')
248
+ #:nocov:
249
+ else
250
+ string
251
+ end
220
252
  end
221
253
 
222
- def marshal_load options
223
- @options = options
224
- end
225
-
226
254
  protected
227
-
255
+
228
256
  # Can be implemented by subclasses to do some initialization
229
257
  # that has to be done once per instance.
230
258
  #
231
259
  # Use reset for initialization that has to be done once per
232
260
  # scan.
233
- def setup
261
+ def setup # :doc:
234
262
  end
235
-
263
+
236
264
  # This is the central method, and commonly the only one a
237
265
  # subclass implements.
238
266
  #
239
267
  # Subclasses must implement this method; it must return +tokens+
240
268
  # and must only use Tokens#<< for storing scanned tokens!
241
- def scan_tokens tokens, options
242
- raise NotImplementedError,
243
- "#{self.class}#scan_tokens not implemented."
269
+ def scan_tokens tokens, options # :doc:
270
+ raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
244
271
  end
245
-
272
+
273
+ # Resets the scanner.
246
274
  def reset_instance
247
- @tokens.clear unless @options[:keep_tokens]
275
+ @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
248
276
  @cached_tokens = nil
249
- @bin_string = nil if defined? @bin_string
277
+ @binary_string = nil if defined? @binary_string
250
278
  end
251
-
279
+
252
280
  # Scanner error with additional status information
253
- def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
281
+ def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
254
282
  raise ScanError, <<-EOE % [
255
283
 
256
284
 
@@ -272,13 +300,13 @@ surrounding code:
272
300
  EOE
273
301
  File.basename(caller[0]),
274
302
  msg,
275
- tokens.size,
276
- tokens.last(10).map { |t| t.inspect }.join("\n"),
303
+ tokens.respond_to?(:size) ? tokens.size : 0,
304
+ tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
277
305
  line, column, pos,
278
306
  matched, state, bol?, eos?,
279
- string[pos - ambit, ambit],
280
- string[pos, ambit],
281
- ]
307
+ binary_string[pos - ambit, ambit],
308
+ binary_string[pos, ambit],
309
+ ], backtrace
282
310
  end
283
311
 
284
312
  # Shorthand for scan_until(/\z/).
@@ -288,19 +316,8 @@ surrounding code:
288
316
  terminate
289
317
  rest
290
318
  end
291
-
292
- end
293
-
294
- end
295
- end
296
-
297
- class String
298
- # I love this hack. It seems to silence all dos/unix/mac newline problems.
299
- def to_unix
300
- if index ?\r
301
- gsub(/\r\n?/, "\n")
302
- else
303
- self
319
+
304
320
  end
321
+
305
322
  end
306
- end
323
+ end