antelope 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/CONTRIBUTING.md +4 -4
  4. data/GENERATORS.md +61 -19
  5. data/README.md +84 -9
  6. data/TODO.md +58 -0
  7. data/examples/deterministic.ace +21 -9
  8. data/examples/example.ace +16 -10
  9. data/examples/example.output +213 -146
  10. data/examples/simple.ace +1 -1
  11. data/lib/antelope/ace/compiler.rb +52 -15
  12. data/lib/antelope/ace/errors.rb +7 -0
  13. data/lib/antelope/ace/grammar/generation.rb +3 -3
  14. data/lib/antelope/ace/grammar/precedences.rb +5 -7
  15. data/lib/antelope/ace/grammar/productions.rb +36 -11
  16. data/lib/antelope/ace/grammar/{terminals.rb → symbols.rb} +25 -2
  17. data/lib/antelope/ace/grammar.rb +12 -3
  18. data/lib/antelope/ace/precedence.rb +4 -0
  19. data/lib/antelope/ace/scanner/argument.rb +57 -0
  20. data/lib/antelope/ace/scanner/first.rb +32 -6
  21. data/lib/antelope/ace/scanner/second.rb +23 -8
  22. data/lib/antelope/ace/scanner.rb +32 -26
  23. data/lib/antelope/ace/token.rb +21 -2
  24. data/lib/antelope/cli.rb +22 -2
  25. data/lib/antelope/generation/constructor/first.rb +1 -1
  26. data/lib/antelope/generation/constructor.rb +2 -0
  27. data/lib/antelope/generation/null.rb +13 -0
  28. data/lib/antelope/generation/recognizer/rule.rb +4 -3
  29. data/lib/antelope/generation/recognizer/state.rb +18 -3
  30. data/lib/antelope/generation/recognizer.rb +19 -24
  31. data/lib/antelope/generation/tableizer.rb +30 -2
  32. data/lib/antelope/generation.rb +1 -0
  33. data/lib/antelope/generator/base.rb +150 -13
  34. data/lib/antelope/generator/c.rb +11 -0
  35. data/lib/antelope/generator/c_header.rb +105 -0
  36. data/lib/antelope/generator/c_source.rb +39 -0
  37. data/lib/antelope/generator/null.rb +5 -0
  38. data/lib/antelope/generator/output.rb +3 -3
  39. data/lib/antelope/generator/ruby.rb +23 -5
  40. data/lib/antelope/generator/templates/c_header.ant +36 -0
  41. data/lib/antelope/generator/templates/c_source.ant +202 -0
  42. data/lib/antelope/generator/templates/output.ant +68 -0
  43. data/lib/antelope/generator/templates/ruby.ant +146 -0
  44. data/lib/antelope/generator.rb +15 -3
  45. data/lib/antelope/template/compiler.rb +78 -0
  46. data/lib/antelope/template/errors.rb +9 -0
  47. data/lib/antelope/template/scanner.rb +111 -0
  48. data/lib/antelope/template.rb +60 -0
  49. data/lib/antelope/version.rb +1 -1
  50. data/lib/antelope.rb +1 -0
  51. data/spec/antelope/template_spec.rb +39 -0
  52. data/subl/Ace (Ruby).JSON-tmLanguage +94 -0
  53. data/subl/Ace (Ruby).tmLanguage +153 -0
  54. metadata +21 -8
  55. data/examples/deterministic.output +0 -131
  56. data/examples/simple.output +0 -121
  57. data/lib/antelope/generator/templates/output.erb +0 -56
  58. data/lib/antelope/generator/templates/ruby.erb +0 -63
@@ -14,8 +14,8 @@ module Antelope
14
14
  # - `:directive` (2 arguments)
15
15
  # - `:copy` (1 argument)
16
16
  # - `:second` (no arguments)
17
- # - `:label` (1 argument)
18
- # - `:part` (1 argument)
17
+ # - `:label` (2 arguments)
18
+ # - `:part` (2 arguments)
19
19
  # - `:or` (no arguments)
20
20
  # - `:prec` (1 argument)
21
21
  # - `:block` (1 argument)
@@ -92,14 +92,21 @@ module Antelope
92
92
  @rules = []
93
93
  @current = nil
94
94
  @current_label = nil
95
- @options = { :terminals => [], :prec => [], :extra => {} }
95
+ @options = {
96
+ :terminals => [],
97
+ :nonterminals => [],
98
+ :prec => [],
99
+ :type => nil,
100
+ :extra => Hashie::Extensions::IndifferentAccess.
101
+ inject!({})
102
+ }
96
103
  end
97
104
 
98
105
  # Pretty inspect.
99
106
  #
100
107
  # @return [String]
101
108
  def inspect
102
- "#<#{self.class} state=#{@state.inspect} options=#{@options.inspect}>"
109
+ "#<#{self.class} state=#{@state.inspect} options=#{options.inspect}>"
103
110
  end
104
111
 
105
112
  # Runs the compiler on the input tokens. For each token,
@@ -152,19 +159,35 @@ module Antelope
152
159
  name = name.intern
153
160
  case name
154
161
  when :terminal, :token
155
- @options[:terminals] << [args[0].intern, args[1]]
162
+ handle_token(args)
156
163
  when :require
157
164
  compare_versions(args[0])
158
165
  when :left, :right, :nonassoc
159
- @options[:prec] << [name, *args.map(&:intern)]
166
+ options[:prec] << [name, *args.map(&:intern)]
167
+ when :language, :generator, :"grammar.type"
168
+ options[:type] = args[0].downcase
160
169
  when :type
161
- @options[:type] = args[0]
170
+ raise SyntaxError, "%type directive requires first " \
171
+ "argument to be caret" unless args[0].caret?
172
+
173
+ options[:nonterminals] <<
174
+ [args[0], args[1..-1].map(&:intern)]
175
+ when :define
176
+ compile_extra(args[0], args[1..-1])
162
177
  else
163
- @options[:extra][name] = args
164
- $stderr.puts "Unknown Directive: #{name}"
178
+ compile_extra(name, args)
165
179
  end
166
180
  end
167
181
 
182
+ def compile_extra(name, args)
183
+ matching = Generator.directives[name.to_s]
184
+
185
+ raise NoDirectiveError, "no directive named #{name}" \
186
+ unless matching
187
+
188
+ options[:extra][name] = args
189
+ end
190
+
168
191
  # Compiles a copy token. A copy token basically copies its
169
192
  # argument directly into the body. Used in both the first
170
193
  # and third parts.
@@ -192,16 +215,18 @@ module Antelope
192
215
  # @param label [String] the left-hand side of the rule; it
193
216
  # should be a nonterminal.
194
217
  # @return [void]
195
- def compile_label(label)
218
+ def compile_label(label, val)
196
219
  require_state! :second
197
220
  if @current
198
221
  @rules << @current
199
222
  end
200
223
 
201
- @current_label = label.intern
224
+ label = label.intern
225
+ @current_label = [label, val]
202
226
 
203
227
  @current = {
204
- label: @current_label,
228
+ label: label,
229
+ label_id: val,
205
230
  set: [],
206
231
  block: "",
207
232
  prec: ""
@@ -213,9 +238,9 @@ module Antelope
213
238
  # It adds the first argument to the set of the current rule.
214
239
  #
215
240
  # @param text [String] the symbol to append to the current rule.
216
- def compile_part(text)
241
+ def compile_part(text, val)
217
242
  require_state! :second
218
- @current[:set] << text.intern
243
+ @current[:set] << [text.intern, val]
219
244
  end
220
245
 
221
246
  # Compiles an or. This should only occur in a rule definition,
@@ -225,7 +250,7 @@ module Antelope
225
250
  # @return [void]
226
251
  # @see #compile_label
227
252
  def compile_or
228
- compile_label(@current_label)
253
+ compile_label(*@current_label)
229
254
  end
230
255
 
231
256
  # Compiles the precedence operator. This should only occur in a
@@ -265,6 +290,18 @@ module Antelope
265
290
 
266
291
  private
267
292
 
293
+ def handle_token(args)
294
+ type = ""
295
+ if args[0].caret?
296
+ type = args.shift
297
+ end
298
+
299
+ name = args.shift
300
+ value = args.shift
301
+
302
+ options[:terminals] << [name.intern, type, nil, value]
303
+ end
304
+
268
305
  # Checks the current state against the given states.
269
306
  #
270
307
  # @raise [InvalidStateError] if none of the given states match
@@ -37,5 +37,12 @@ module Antelope
37
37
  # generator to use for the generation, it raises this.
38
38
  class NoTypeError < Error
39
39
  end
40
+
41
+ # Primarily used in the {Compiler}, it is raised if it encounters
42
+ # a directive it cannot handle. This is more to warn the
43
+ # developer that a directive they wrote may not be accepted by any
44
+ # generator.
45
+ class NoDirectiveError < Error
46
+ end
40
47
  end
41
48
  end
@@ -33,14 +33,14 @@ module Antelope
33
33
  mods = modifiers.map(&:last).
34
34
  map { |x| x.new(self) }
35
35
  mods.each do |mod|
36
- puts "Running mod #{mod.class}..."
36
+ puts "Running mod #{mod.class}..." if options[:verbose]
37
37
  mod.call
38
38
  end
39
39
  hash = Hash[modifiers.map(&:first).zip(mods)]
40
40
  # This is when we'd generate
41
41
 
42
42
  find_generators(generators, options).each do |gen|
43
- puts "Running generator #{gen}..."
43
+ puts "Running generator #{gen}..." if options[:verbose]
44
44
  gen.new(self, hash).generate
45
45
  end
46
46
  end
@@ -71,7 +71,7 @@ module Antelope
71
71
 
72
72
  generators
73
73
 
74
- rescue KeyError => e
74
+ rescue KeyError
75
75
  raise NoTypeError, "Undefined type #{type}"
76
76
  end
77
77
  end
@@ -27,14 +27,12 @@ module Antelope
27
27
  def precedence_for(token)
28
28
  token = token.name if token.is_a?(Token)
29
29
 
30
- set = Set.new([token, :_])
31
-
32
30
  prec = precedence.
33
- select { |pr| set.intersection(pr.tokens).any? }.
34
- first
31
+ select { |pr| pr.tokens.include?(token) }.first
35
32
 
36
- if token == :modifier
37
- p prec
33
+ unless prec
34
+ prec = precedence.
35
+ select { |pr| pr.tokens.include?(:_) }.first
38
36
  end
39
37
 
40
38
  prec
@@ -56,7 +54,7 @@ module Antelope
56
54
  end
57
55
 
58
56
  precedence <<
59
- Ace::Precedence.new(:nonassoc, [:"$"].to_set, 0) <<
57
+ Ace::Precedence.new(:nonassoc, [:$end].to_set, 0) <<
60
58
  Ace::Precedence.new(:nonassoc, [:_].to_set, 1)
61
59
  precedence.sort_by { |_| _.level }.reverse
62
60
  end
@@ -35,8 +35,10 @@ module Antelope
35
35
  # @return [Token]
36
36
  def find_token(value)
37
37
  value = value.intern
38
+
38
39
  if productions.key?(value)
39
- Token::Nonterminal.new(value)
40
+ typed_nonterminals.find { |term| term.name == value } ||
41
+ Token::Nonterminal.new(value)
40
42
  elsif terminal = terminals.
41
43
  find { |term| term.name == value }
42
44
  terminal
@@ -45,7 +47,8 @@ module Antelope
45
47
  elsif [:nothing, :ε].include?(value)
46
48
  Token::Epsilon.new
47
49
  else
48
- raise UndefinedTokenError, "Could not find a token named #{value.inspect}"
50
+ raise UndefinedTokenError, "Could not find a token " \
51
+ "named #{value.inspect}"
49
52
  end
50
53
  end
51
54
 
@@ -88,22 +91,44 @@ module Antelope
88
91
  # @param id [Numeric] the id for the production.
89
92
  # @return [Production]
90
93
  def generate_production_for(rule, id)
91
- left = rule[:label]
92
- items = rule[:set].map { |_| find_token(_) }
94
+ left = Token::Nonterminal.new(rule[:label])
95
+ items = rule[:set].map { |_| find_token(_[0]) }
93
96
  prec = if rule[:prec].empty?
94
97
  items.select(&:terminal?).last
95
98
  else
96
- find_token(rule[:prec])
99
+ rule[:prec].intern
97
100
  end
98
101
 
99
- unless rule[:prec].empty?
100
- puts "PREC, #{prec.inspect}"
102
+ prec = precedence_for(prec)
103
+ left.type = type_for(rule[:label])
104
+ left.id = rule[:label_id]
105
+
106
+ rule[:set].each_with_index do |tok, i|
107
+ items[i] = items[i].dup
108
+ items[i].id = tok[1]
101
109
  end
102
110
 
103
- prec = precedence_for(prec)
111
+ Production.new(left, items, rule[:block], prec, id + 1)
112
+ end
113
+
114
+ # Returns the defined type for the given token name.
115
+ # Uses the `%type` directive to infer the corresponding types.
116
+ #
117
+ # @param token [Symbol] the token to check for
118
+ # types.
119
+ def type_for(token)
120
+ token = find_token(token) unless token.is_a?(Token)
104
121
 
105
- Production.new(Token::Nonterminal.new(left), items,
106
- rule[:block], prec, id + 1)
122
+ case token
123
+ when Token::Nonterminal
124
+ token.type
125
+ when Token::Terminal
126
+ token.type
127
+ when Token::Epsilon
128
+ ""
129
+ when Token::Error
130
+ ""
131
+ end
107
132
  end
108
133
 
109
134
  # Creates the default production for the grammar. The left
@@ -116,7 +141,7 @@ module Antelope
116
141
  def default_production
117
142
  Production.new(Token::Nonterminal.new(:$start), [
118
143
  Token::Nonterminal.new(@compiler.rules.first[:label]),
119
- Token::Terminal.new(:"$")
144
+ Token::Terminal.new(:$end)
120
145
  ], "", precedence.last, 0)
121
146
  end
122
147
  end
@@ -4,8 +4,8 @@ module Antelope
4
4
  module Ace
5
5
  class Grammar
6
6
 
7
- # Manages a list of the terminals in the grammar.
8
- module Terminals
7
+ # Manages a list of the symbols in the grammar.
8
+ module Symbols
9
9
 
10
10
  # A list of all terminals in the grammar. Checks the compiler
11
11
  # options for terminals, and then returns an array of
@@ -28,6 +28,21 @@ module Antelope
28
28
  @_nonterminals ||= productions.keys
29
29
  end
30
30
 
31
+ # A list of all nonterminals, with types.
32
+ #
33
+ # @return [Array<Token::Nonterminal>>]
34
+ def typed_nonterminals
35
+ @_typed_nonterminals ||= begin
36
+ typed = []
37
+ compiler.options[:nonterminals].each do |data|
38
+ data[1].each do |nonterm|
39
+ typed << Token::Nonterminal.new(nonterm, data[0])
40
+ end
41
+ end
42
+ typed
43
+ end
44
+ end
45
+
31
46
  # A list of all symbols in the grammar; includes both
32
47
  # terminals and nonterminals.
33
48
  #
@@ -37,6 +52,14 @@ module Antelope
37
52
  def symbols
38
53
  @_symbols ||= terminals + nonterminals
39
54
  end
55
+
56
+ # Checks to see if the grammar uses the `error` terminal
57
+ # anywhere.
58
+ #
59
+ # @return [Boolean]
60
+ def contains_error_token?
61
+ all_productions.any? { |_| _.items.any?(&:error?) }
62
+ end
40
63
  end
41
64
  end
42
65
  end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
- require "antelope/ace/grammar/terminals"
3
+ require "hashie"
4
+ require "antelope/ace/grammar/symbols"
4
5
  require "antelope/ace/grammar/productions"
5
6
  require "antelope/ace/grammar/precedences"
6
7
  require "antelope/ace/grammar/loading"
@@ -10,11 +11,11 @@ module Antelope
10
11
  module Ace
11
12
 
12
13
  # Defines a grammar from an Ace file. This handles setting up
13
- # productions, loading from files, terminals, precedence, and
14
+ # productions, loading from files, symbols, precedence, and
14
15
  # generation.
15
16
  class Grammar
16
17
 
17
- include Terminals
18
+ include Symbols
18
19
  include Productions
19
20
  include Precedences
20
21
  include Loading
@@ -55,6 +56,14 @@ module Antelope
55
56
  @output = Pathname.new(output)
56
57
  @compiler = compiler
57
58
  end
59
+
60
+ # Extra options from the compiler. This can be used by
61
+ # generators for output information.
62
+ #
63
+ # @return [Hash]
64
+ def options
65
+ compiler.options[:extra]
66
+ end
58
67
  end
59
68
  end
60
69
  end
@@ -49,6 +49,10 @@ module Antelope
49
49
  end
50
50
  end
51
51
 
52
+ # Converts the precedence into a representative string, denoting
53
+ # the type and the level.
54
+ #
55
+ # @return [String]
52
56
  def to_s
53
57
  "#{type.to_s[0]}#{level}"
54
58
  end
@@ -0,0 +1,57 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Represents an argument to a directive. It encapsulates a
6
+ # string object, which is the value of the argument.
7
+ class Argument < String
8
+
9
+ # Initialize the argument.
10
+ #
11
+ # @param type [Symbol] the type of argument it is; it can be
12
+ # a `:block`, `:text`, or `:caret`. The type is defined by
13
+ # the encapsulating characters. If the encapsulating
14
+ # characters are `{` and `}`, it's a `:block`; if they are
15
+ # `<` and `>`, it's a `:caret`; otherwise, it's a `:text`.
16
+ # @param value [String] the value of the argument.
17
+ def initialize(type, value)
18
+ @type = type
19
+ super(value)
20
+ end
21
+
22
+ # If this argument is type `:block`.
23
+ #
24
+ # @return [Boolean]
25
+ # @see type?
26
+ def block?
27
+ type? :block
28
+ end
29
+
30
+ # If this argument is type `:text`.
31
+ #
32
+ # @return [Boolean]
33
+ # @see type?
34
+ def text?
35
+ type? :text
36
+ end
37
+
38
+ # If this argument is type `:caret`.
39
+ #
40
+ # @return [Boolean]
41
+ # @see type?
42
+ def caret?
43
+ type? :caret
44
+ end
45
+
46
+ # Checks to see if any of the given arguments match the type
47
+ # of this argument.
48
+ #
49
+ # @param inc [Array<Symbol>]
50
+ # @return [Boolean]
51
+ def type?(*inc)
52
+ inc.include?(@type)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -45,17 +45,43 @@ module Antelope
45
45
  #
46
46
  # @return [Boolean] if it matched.
47
47
  def scan_first_directive
48
- if @scanner.scan(/%([A-Za-z_-]+) ?/)
48
+ if @scanner.scan(/%(#{IDENTIFIER}) ?/)
49
49
  directive = @scanner[1]
50
- arguments = []
51
- until @scanner.check(/\n/)
50
+ arguments = scan_first_directive_arguments
51
+
52
+ tokens << [:directive, directive, arguments]
53
+ end
54
+ end
55
+
56
+ # Scan the arguments for a directive. It keeps attempting to
57
+ # scan arguments until the first newline that was not in a
58
+ # block. Arguments can be blocks, carets, or text; blocks are
59
+ # encapsulated with `{` and `}`, carets are encapsulated with
60
+ # `<` and `>`, and text is encapsulated with quotes or
61
+ # nothing.
62
+ #
63
+ # @return [Array<Argument>]
64
+ def scan_first_directive_arguments
65
+ arguments = []
66
+ until @scanner.check(/\n/)
67
+ if @scanner.scan(/\{/)
68
+ argument =
69
+ Argument.new(:block, _scan_block[1..-2])
70
+ elsif @scanner.scan(/</)
71
+ @scanner.scan(/((?:\\>|[^>])*)\>/)
72
+ argument =
73
+ Argument.new(:caret, @scanner[1])
74
+ else
52
75
  @scanner.scan(/#{VALUE}/x) or error!
53
- arguments.push(@scanner[2] || @scanner[3])
54
- @scanner.scan(/ */)
76
+ argument = Argument.new(:text,
77
+ @scanner[2] || @scanner[3])
55
78
  end
56
79
 
57
- tokens << [:directive, directive, arguments]
80
+ arguments.push(argument)
81
+ @scanner.scan(/ */)
58
82
  end
83
+
84
+ arguments
59
85
  end
60
86
  end
61
87
  end
@@ -45,7 +45,7 @@ module Antelope
45
45
  # @see #scan_second_rule_body
46
46
  # @see #error!
47
47
  def scan_second_rule
48
- if @scanner.check(/([a-z._-]+):/)
48
+ if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
49
49
  scan_second_rule_label or error!
50
50
  scan_second_rule_body
51
51
  true
@@ -57,8 +57,8 @@ module Antelope
57
57
  #
58
58
  # @return [Boolean] if it matched.
59
59
  def scan_second_rule_label
60
- if @scanner.scan(/([a-z._-]+): ?/)
61
- tokens << [:label, @scanner[1]]
60
+ if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
61
+ tokens << [:label, @scanner[1], @scanner[2]]
62
62
  end
63
63
  end
64
64
 
@@ -88,8 +88,8 @@ module Antelope
88
88
  #
89
89
  # @return [Boolean] if it matched.
90
90
  def scan_second_rule_part
91
- if @scanner.scan(/([A-Za-z._-]+)(?!\:|[A-Za-z._-])/)
92
- tokens << [:part, @scanner[1]]
91
+ if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?(?!\:|[A-Za-z._])/)
92
+ tokens << [:part, @scanner[1], @scanner[2]]
93
93
  end
94
94
  end
95
95
 
@@ -107,7 +107,7 @@ module Antelope
107
107
  #
108
108
  # @return [Boolean] if it matched.
109
109
  def scan_second_rule_prec
110
- if @scanner.scan(/%prec ([A-Za-z._-]+)/)
110
+ if @scanner.scan(/%prec (#{IDENTIFIER})/)
111
111
  tokens << [:prec, @scanner[1]]
112
112
  end
113
113
  end
@@ -139,17 +139,32 @@ module Antelope
139
139
  def _scan_block
140
140
  brack = 1
141
141
  body = "{"
142
+ scan_for = %r{
143
+ (
144
+ (?: " ( \\\\ | \\" | [^"] )* "? )
145
+ | (?: ' ( \\\\ | \\' | [^'] )* '? )
146
+ | (?: // .*? \n )
147
+ | (?: \# .*? \n )
148
+ | (?: /\* [\s\S]+? \*/ )
149
+ | (?: \} )
150
+ | (?: \{ )
151
+ )
152
+ }x
142
153
 
143
154
  until brack.zero?
144
- if part = @scanner.scan_until(/(\}|\{)/)
155
+ if part = @scanner.scan_until(scan_for)
145
156
  body << part
146
157
 
158
+
147
159
  if @scanner[1] == "}"
148
160
  brack -= 1
149
- else
161
+ elsif @scanner[1] == "{"
150
162
  brack += 1
151
163
  end
152
164
  else
165
+ if @scanner.scan(/(.+)/m)
166
+ @line += @scanner[1].count("\n")
167
+ end
153
168
  error!
154
169
  end
155
170
  end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require "strscan"
4
+ require "antelope/ace/scanner/argument"
4
5
  require "antelope/ace/scanner/first"
5
6
  require "antelope/ace/scanner/second"
6
7
  require "antelope/ace/scanner/third"
@@ -8,16 +9,19 @@ require "antelope/ace/scanner/third"
8
9
  module Antelope
9
10
  module Ace
10
11
 
11
- # Scans a given input. The input should be a properly formatted ACE file;
12
- # see the Ace module for more information. This scanner uses the
13
- # StringScanner class internally; see the ruby documentation for more on
14
- # that. This scanner seperates scanning into three seperate stages:
15
- # First, Second, and Third, for each section of the file, respectively.
12
+ # Scans a given input. The input should be a properly formatted
13
+ # ACE file; see the Ace module for more information. This scanner
14
+ # uses the StringScanner class internally; see the ruby
15
+ # documentation for more on that. This scanner seperates scanning
16
+ # into three seperate stages: First, Second, and Third, for each
17
+ # section of the file, respectively.
16
18
  #
17
19
  # @see Ace
18
20
  # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
19
21
  class Scanner
20
22
 
23
+ IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
24
+
21
25
  include First
22
26
  include Second
23
27
  include Third
@@ -32,30 +36,30 @@ module Antelope
32
36
  # @return [Array<Array<(Symbol, Object, ...)>>]
33
37
  attr_reader :tokens
34
38
 
35
- # The boundry between each section. Placed here to be easily modifiable.
36
- # **MUST** be a regular expression.
39
+ # The boundry between each section. Placed here to be easily.
40
+ # modifiable. **MUST** be a regular expression.
37
41
  #
38
42
  # @return [RegExp]
39
43
  CONTENT_BOUNDRY = /%%/
40
44
 
41
- # The value regular expression. It should match values; for example,
42
- # things quoted in strings or word letters without quotes. Must respond
43
- # to #to_s, since it is embedded within other regular expressions. The
44
- # regular expression should place the contents of the value in the
45
- # groups 2 or 3.
45
+ # The value regular expression. It should match values; for
46
+ # example, things quoted in strings or word letters without
47
+ # quotes. Must respond to #to_s, since it is embedded within
48
+ # other regular expressions. The regular expression should
49
+ # place the contents of the value in the groups 2 or 3.
46
50
  #
47
51
  # @return [#to_s]
48
52
  VALUE = %q{(?:
49
53
  (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
50
- | ([[:word:]]+)
54
+ | ([A-Za-z0-9_.<>*-]+)
51
55
  )}
52
56
 
53
57
  # Scans a file. It returns the tokens resulting from scanning.
54
58
  #
55
- # @param source [String] the source to scan. This should be compatible
56
- # with StringScanner.
57
- # @param name [String] the name of the source file. This is primarily
58
- # used in backtrace information.
59
+ # @param source [String] the source to scan. This should be
60
+ # compatible with StringScanner.
61
+ # @param name [String] the name of the source file. This is
62
+ # primarilyused in backtrace information.
59
63
  # @return [Array<Array<(Symbol, Object, ...)>>]
60
64
  # @see #tokens
61
65
  def self.scan(source, name = "(ace file)")
@@ -92,8 +96,15 @@ module Antelope
92
96
  start = [@scanner.pos - 8, 0].max
93
97
  stop = [@scanner.pos + 8, @scanner.string.length].min
94
98
  snip = @scanner.string[start..stop].strip.inspect
95
- char = @scanner.string[@scanner.pos].inspect
96
- new_line = "#{@source}:#{@line}:unexpected #{char} (near #{snip})"
99
+ char = @scanner.string[@scanner.pos]
100
+ char = if char
101
+ char.inspect
102
+ else
103
+ "EOF"
104
+ end
105
+
106
+ new_line = "#{@source}:#{@line}: unexpected #{char} " \
107
+ "(near #{snip})"
97
108
 
98
109
  raise e, e.message, [new_line, *e.backtrace]
99
110
  end
@@ -111,17 +122,12 @@ module Antelope
111
122
 
112
123
  private
113
124
 
114
- # Raises an error; first creates a small snippet to give the developer
115
- # some context.
125
+ # Raises an error.
116
126
  #
117
127
  # @raise [SyntaxError] always.
118
128
  # @return [void]
119
129
  def error!
120
- start = [@scanner.pos - 8, 0].max
121
- stop = [@scanner.pos + 8, @scanner.string.length].min
122
- snip = @scanner.string[start..stop].strip
123
- char = @scanner.string[@scanner.pos]
124
- raise SyntaxError, "invalid syntax"# near `#{snip.inspect}' (#{char.inspect})"
130
+ raise SyntaxError, "invalid syntax"
125
131
  end
126
132
  end
127
133
  end