antelope 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +2 -0
  3. data/CONTRIBUTING.md +4 -4
  4. data/GENERATORS.md +61 -19
  5. data/README.md +84 -9
  6. data/TODO.md +58 -0
  7. data/examples/deterministic.ace +21 -9
  8. data/examples/example.ace +16 -10
  9. data/examples/example.output +213 -146
  10. data/examples/simple.ace +1 -1
  11. data/lib/antelope/ace/compiler.rb +52 -15
  12. data/lib/antelope/ace/errors.rb +7 -0
  13. data/lib/antelope/ace/grammar/generation.rb +3 -3
  14. data/lib/antelope/ace/grammar/precedences.rb +5 -7
  15. data/lib/antelope/ace/grammar/productions.rb +36 -11
  16. data/lib/antelope/ace/grammar/{terminals.rb → symbols.rb} +25 -2
  17. data/lib/antelope/ace/grammar.rb +12 -3
  18. data/lib/antelope/ace/precedence.rb +4 -0
  19. data/lib/antelope/ace/scanner/argument.rb +57 -0
  20. data/lib/antelope/ace/scanner/first.rb +32 -6
  21. data/lib/antelope/ace/scanner/second.rb +23 -8
  22. data/lib/antelope/ace/scanner.rb +32 -26
  23. data/lib/antelope/ace/token.rb +21 -2
  24. data/lib/antelope/cli.rb +22 -2
  25. data/lib/antelope/generation/constructor/first.rb +1 -1
  26. data/lib/antelope/generation/constructor.rb +2 -0
  27. data/lib/antelope/generation/null.rb +13 -0
  28. data/lib/antelope/generation/recognizer/rule.rb +4 -3
  29. data/lib/antelope/generation/recognizer/state.rb +18 -3
  30. data/lib/antelope/generation/recognizer.rb +19 -24
  31. data/lib/antelope/generation/tableizer.rb +30 -2
  32. data/lib/antelope/generation.rb +1 -0
  33. data/lib/antelope/generator/base.rb +150 -13
  34. data/lib/antelope/generator/c.rb +11 -0
  35. data/lib/antelope/generator/c_header.rb +105 -0
  36. data/lib/antelope/generator/c_source.rb +39 -0
  37. data/lib/antelope/generator/null.rb +5 -0
  38. data/lib/antelope/generator/output.rb +3 -3
  39. data/lib/antelope/generator/ruby.rb +23 -5
  40. data/lib/antelope/generator/templates/c_header.ant +36 -0
  41. data/lib/antelope/generator/templates/c_source.ant +202 -0
  42. data/lib/antelope/generator/templates/output.ant +68 -0
  43. data/lib/antelope/generator/templates/ruby.ant +146 -0
  44. data/lib/antelope/generator.rb +15 -3
  45. data/lib/antelope/template/compiler.rb +78 -0
  46. data/lib/antelope/template/errors.rb +9 -0
  47. data/lib/antelope/template/scanner.rb +111 -0
  48. data/lib/antelope/template.rb +60 -0
  49. data/lib/antelope/version.rb +1 -1
  50. data/lib/antelope.rb +1 -0
  51. data/spec/antelope/template_spec.rb +39 -0
  52. data/subl/Ace (Ruby).JSON-tmLanguage +94 -0
  53. data/subl/Ace (Ruby).tmLanguage +153 -0
  54. metadata +21 -8
  55. data/examples/deterministic.output +0 -131
  56. data/examples/simple.output +0 -121
  57. data/lib/antelope/generator/templates/output.erb +0 -56
  58. data/lib/antelope/generator/templates/ruby.erb +0 -63
@@ -14,8 +14,8 @@ module Antelope
14
14
  # - `:directive` (2 arguments)
15
15
  # - `:copy` (1 argument)
16
16
  # - `:second` (no arguments)
17
- # - `:label` (1 argument)
18
- # - `:part` (1 argument)
17
+ # - `:label` (2 arguments)
18
+ # - `:part` (2 arguments)
19
19
  # - `:or` (no arguments)
20
20
  # - `:prec` (1 argument)
21
21
  # - `:block` (1 argument)
@@ -92,14 +92,21 @@ module Antelope
92
92
  @rules = []
93
93
  @current = nil
94
94
  @current_label = nil
95
- @options = { :terminals => [], :prec => [], :extra => {} }
95
+ @options = {
96
+ :terminals => [],
97
+ :nonterminals => [],
98
+ :prec => [],
99
+ :type => nil,
100
+ :extra => Hashie::Extensions::IndifferentAccess.
101
+ inject!({})
102
+ }
96
103
  end
97
104
 
98
105
  # Pretty inspect.
99
106
  #
100
107
  # @return [String]
101
108
  def inspect
102
- "#<#{self.class} state=#{@state.inspect} options=#{@options.inspect}>"
109
+ "#<#{self.class} state=#{@state.inspect} options=#{options.inspect}>"
103
110
  end
104
111
 
105
112
  # Runs the compiler on the input tokens. For each token,
@@ -152,19 +159,35 @@ module Antelope
152
159
  name = name.intern
153
160
  case name
154
161
  when :terminal, :token
155
- @options[:terminals] << [args[0].intern, args[1]]
162
+ handle_token(args)
156
163
  when :require
157
164
  compare_versions(args[0])
158
165
  when :left, :right, :nonassoc
159
- @options[:prec] << [name, *args.map(&:intern)]
166
+ options[:prec] << [name, *args.map(&:intern)]
167
+ when :language, :generator, :"grammar.type"
168
+ options[:type] = args[0].downcase
160
169
  when :type
161
- @options[:type] = args[0]
170
+ raise SyntaxError, "%type directive requires first " \
171
+ "argument to be caret" unless args[0].caret?
172
+
173
+ options[:nonterminals] <<
174
+ [args[0], args[1..-1].map(&:intern)]
175
+ when :define
176
+ compile_extra(args[0], args[1..-1])
162
177
  else
163
- @options[:extra][name] = args
164
- $stderr.puts "Unknown Directive: #{name}"
178
+ compile_extra(name, args)
165
179
  end
166
180
  end
167
181
 
182
+ def compile_extra(name, args)
183
+ matching = Generator.directives[name.to_s]
184
+
185
+ raise NoDirectiveError, "no directive named #{name}" \
186
+ unless matching
187
+
188
+ options[:extra][name] = args
189
+ end
190
+
168
191
  # Compiles a copy token. A copy token basically copies its
169
192
  # argument directly into the body. Used in both the first
170
193
  # and third parts.
@@ -192,16 +215,18 @@ module Antelope
192
215
  # @param label [String] the left-hand side of the rule; it
193
216
  # should be a nonterminal.
194
217
  # @return [void]
195
- def compile_label(label)
218
+ def compile_label(label, val)
196
219
  require_state! :second
197
220
  if @current
198
221
  @rules << @current
199
222
  end
200
223
 
201
- @current_label = label.intern
224
+ label = label.intern
225
+ @current_label = [label, val]
202
226
 
203
227
  @current = {
204
- label: @current_label,
228
+ label: label,
229
+ label_id: val,
205
230
  set: [],
206
231
  block: "",
207
232
  prec: ""
@@ -213,9 +238,9 @@ module Antelope
213
238
  # It adds the first argument to the set of the current rule.
214
239
  #
215
240
  # @param text [String] the symbol to append to the current rule.
216
- def compile_part(text)
241
+ def compile_part(text, val)
217
242
  require_state! :second
218
- @current[:set] << text.intern
243
+ @current[:set] << [text.intern, val]
219
244
  end
220
245
 
221
246
  # Compiles an or. This should only occur in a rule definition,
@@ -225,7 +250,7 @@ module Antelope
225
250
  # @return [void]
226
251
  # @see #compile_label
227
252
  def compile_or
228
- compile_label(@current_label)
253
+ compile_label(*@current_label)
229
254
  end
230
255
 
231
256
  # Compiles the precedence operator. This should only occur in a
@@ -265,6 +290,18 @@ module Antelope
265
290
 
266
291
  private
267
292
 
293
+ def handle_token(args)
294
+ type = ""
295
+ if args[0].caret?
296
+ type = args.shift
297
+ end
298
+
299
+ name = args.shift
300
+ value = args.shift
301
+
302
+ options[:terminals] << [name.intern, type, nil, value]
303
+ end
304
+
268
305
  # Checks the current state against the given states.
269
306
  #
270
307
  # @raise [InvalidStateError] if none of the given states match
@@ -37,5 +37,12 @@ module Antelope
37
37
  # generator to use for the generation, it raises this.
38
38
  class NoTypeError < Error
39
39
  end
40
+
41
+ # Primarily used in the {Compiler}, it is raised if it encounters
42
+ # a directive it cannot handle. This is more to warn the
43
+ # developer that a directive they wrote may not be accepted by any
44
+ # generator.
45
+ class NoDirectiveError < Error
46
+ end
40
47
  end
41
48
  end
@@ -33,14 +33,14 @@ module Antelope
33
33
  mods = modifiers.map(&:last).
34
34
  map { |x| x.new(self) }
35
35
  mods.each do |mod|
36
- puts "Running mod #{mod.class}..."
36
+ puts "Running mod #{mod.class}..." if options[:verbose]
37
37
  mod.call
38
38
  end
39
39
  hash = Hash[modifiers.map(&:first).zip(mods)]
40
40
  # This is when we'd generate
41
41
 
42
42
  find_generators(generators, options).each do |gen|
43
- puts "Running generator #{gen}..."
43
+ puts "Running generator #{gen}..." if options[:verbose]
44
44
  gen.new(self, hash).generate
45
45
  end
46
46
  end
@@ -71,7 +71,7 @@ module Antelope
71
71
 
72
72
  generators
73
73
 
74
- rescue KeyError => e
74
+ rescue KeyError
75
75
  raise NoTypeError, "Undefined type #{type}"
76
76
  end
77
77
  end
@@ -27,14 +27,12 @@ module Antelope
27
27
  def precedence_for(token)
28
28
  token = token.name if token.is_a?(Token)
29
29
 
30
- set = Set.new([token, :_])
31
-
32
30
  prec = precedence.
33
- select { |pr| set.intersection(pr.tokens).any? }.
34
- first
31
+ select { |pr| pr.tokens.include?(token) }.first
35
32
 
36
- if token == :modifier
37
- p prec
33
+ unless prec
34
+ prec = precedence.
35
+ select { |pr| pr.tokens.include?(:_) }.first
38
36
  end
39
37
 
40
38
  prec
@@ -56,7 +54,7 @@ module Antelope
56
54
  end
57
55
 
58
56
  precedence <<
59
- Ace::Precedence.new(:nonassoc, [:"$"].to_set, 0) <<
57
+ Ace::Precedence.new(:nonassoc, [:$end].to_set, 0) <<
60
58
  Ace::Precedence.new(:nonassoc, [:_].to_set, 1)
61
59
  precedence.sort_by { |_| _.level }.reverse
62
60
  end
@@ -35,8 +35,10 @@ module Antelope
35
35
  # @return [Token]
36
36
  def find_token(value)
37
37
  value = value.intern
38
+
38
39
  if productions.key?(value)
39
- Token::Nonterminal.new(value)
40
+ typed_nonterminals.find { |term| term.name == value } ||
41
+ Token::Nonterminal.new(value)
40
42
  elsif terminal = terminals.
41
43
  find { |term| term.name == value }
42
44
  terminal
@@ -45,7 +47,8 @@ module Antelope
45
47
  elsif [:nothing, :ε].include?(value)
46
48
  Token::Epsilon.new
47
49
  else
48
- raise UndefinedTokenError, "Could not find a token named #{value.inspect}"
50
+ raise UndefinedTokenError, "Could not find a token " \
51
+ "named #{value.inspect}"
49
52
  end
50
53
  end
51
54
 
@@ -88,22 +91,44 @@ module Antelope
88
91
  # @param id [Numeric] the id for the production.
89
92
  # @return [Production]
90
93
  def generate_production_for(rule, id)
91
- left = rule[:label]
92
- items = rule[:set].map { |_| find_token(_) }
94
+ left = Token::Nonterminal.new(rule[:label])
95
+ items = rule[:set].map { |_| find_token(_[0]) }
93
96
  prec = if rule[:prec].empty?
94
97
  items.select(&:terminal?).last
95
98
  else
96
- find_token(rule[:prec])
99
+ rule[:prec].intern
97
100
  end
98
101
 
99
- unless rule[:prec].empty?
100
- puts "PREC, #{prec.inspect}"
102
+ prec = precedence_for(prec)
103
+ left.type = type_for(rule[:label])
104
+ left.id = rule[:label_id]
105
+
106
+ rule[:set].each_with_index do |tok, i|
107
+ items[i] = items[i].dup
108
+ items[i].id = tok[1]
101
109
  end
102
110
 
103
- prec = precedence_for(prec)
111
+ Production.new(left, items, rule[:block], prec, id + 1)
112
+ end
113
+
114
+ # Returns the defined type for the given token name.
115
+ # Uses the `%type` directive to infer the corresponding types.
116
+ #
117
+ # @param token [Symbol] the token to check for
118
+ # types.
119
+ def type_for(token)
120
+ token = find_token(token) unless token.is_a?(Token)
104
121
 
105
- Production.new(Token::Nonterminal.new(left), items,
106
- rule[:block], prec, id + 1)
122
+ case token
123
+ when Token::Nonterminal
124
+ token.type
125
+ when Token::Terminal
126
+ token.type
127
+ when Token::Epsilon
128
+ ""
129
+ when Token::Error
130
+ ""
131
+ end
107
132
  end
108
133
 
109
134
  # Creates the default production for the grammar. The left
@@ -116,7 +141,7 @@ module Antelope
116
141
  def default_production
117
142
  Production.new(Token::Nonterminal.new(:$start), [
118
143
  Token::Nonterminal.new(@compiler.rules.first[:label]),
119
- Token::Terminal.new(:"$")
144
+ Token::Terminal.new(:$end)
120
145
  ], "", precedence.last, 0)
121
146
  end
122
147
  end
@@ -4,8 +4,8 @@ module Antelope
4
4
  module Ace
5
5
  class Grammar
6
6
 
7
- # Manages a list of the terminals in the grammar.
8
- module Terminals
7
+ # Manages a list of the symbols in the grammar.
8
+ module Symbols
9
9
 
10
10
  # A list of all terminals in the grammar. Checks the compiler
11
11
  # options for terminals, and then returns an array of
@@ -28,6 +28,21 @@ module Antelope
28
28
  @_nonterminals ||= productions.keys
29
29
  end
30
30
 
31
+ # A list of all nonterminals, with types.
32
+ #
33
+ # @return [Array<Token::Nonterminal>>]
34
+ def typed_nonterminals
35
+ @_typed_nonterminals ||= begin
36
+ typed = []
37
+ compiler.options[:nonterminals].each do |data|
38
+ data[1].each do |nonterm|
39
+ typed << Token::Nonterminal.new(nonterm, data[0])
40
+ end
41
+ end
42
+ typed
43
+ end
44
+ end
45
+
31
46
  # A list of all symbols in the grammar; includes both
32
47
  # terminals and nonterminals.
33
48
  #
@@ -37,6 +52,14 @@ module Antelope
37
52
  def symbols
38
53
  @_symbols ||= terminals + nonterminals
39
54
  end
55
+
56
+ # Checks to see if the grammar uses the `error` terminal
57
+ # anywhere.
58
+ #
59
+ # @return [Boolean]
60
+ def contains_error_token?
61
+ all_productions.any? { |_| _.items.any?(&:error?) }
62
+ end
40
63
  end
41
64
  end
42
65
  end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
- require "antelope/ace/grammar/terminals"
3
+ require "hashie"
4
+ require "antelope/ace/grammar/symbols"
4
5
  require "antelope/ace/grammar/productions"
5
6
  require "antelope/ace/grammar/precedences"
6
7
  require "antelope/ace/grammar/loading"
@@ -10,11 +11,11 @@ module Antelope
10
11
  module Ace
11
12
 
12
13
  # Defines a grammar from an Ace file. This handles setting up
13
- # productions, loading from files, terminals, precedence, and
14
+ # productions, loading from files, symbols, precedence, and
14
15
  # generation.
15
16
  class Grammar
16
17
 
17
- include Terminals
18
+ include Symbols
18
19
  include Productions
19
20
  include Precedences
20
21
  include Loading
@@ -55,6 +56,14 @@ module Antelope
55
56
  @output = Pathname.new(output)
56
57
  @compiler = compiler
57
58
  end
59
+
60
+ # Extra options from the compiler. This can be used by
61
+ # generators for output information.
62
+ #
63
+ # @return [Hash]
64
+ def options
65
+ compiler.options[:extra]
66
+ end
58
67
  end
59
68
  end
60
69
  end
@@ -49,6 +49,10 @@ module Antelope
49
49
  end
50
50
  end
51
51
 
52
+ # Converts the precedence into a representative string, denoting
53
+ # the type and the level.
54
+ #
55
+ # @return [String]
52
56
  def to_s
53
57
  "#{type.to_s[0]}#{level}"
54
58
  end
@@ -0,0 +1,57 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Represents an argument to a directive. It encapsulates a
6
+ # string object, which is the value of the argument.
7
+ class Argument < String
8
+
9
+ # Initialize the argument.
10
+ #
11
+ # @param type [Symbol] the type of argument it is; it can be
12
+ # a `:block`, `:text`, or `:caret`. The type is defined by
13
+ # the encapsulating characters. If the encapsulating
14
+ # characters are `{` and `}`, it's a `:block`; if they are
15
+ # `<` and `>`, it's a `:caret`; otherwise, it's a `:text`.
16
+ # @param value [String] the value of the argument.
17
+ def initialize(type, value)
18
+ @type = type
19
+ super(value)
20
+ end
21
+
22
+ # If this argument is type `:block`.
23
+ #
24
+ # @return [Boolean]
25
+ # @see type?
26
+ def block?
27
+ type? :block
28
+ end
29
+
30
+ # If this argument is type `:text`.
31
+ #
32
+ # @return [Boolean]
33
+ # @see type?
34
+ def text?
35
+ type? :text
36
+ end
37
+
38
+ # If this argument is type `:caret`.
39
+ #
40
+ # @return [Boolean]
41
+ # @see type?
42
+ def caret?
43
+ type? :caret
44
+ end
45
+
46
+ # Checks to see if any of the given arguments match the type
47
+ # of this argument.
48
+ #
49
+ # @param inc [Array<Symbol>]
50
+ # @return [Boolean]
51
+ def type?(*inc)
52
+ inc.include?(@type)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -45,17 +45,43 @@ module Antelope
45
45
  #
46
46
  # @return [Boolean] if it matched.
47
47
  def scan_first_directive
48
- if @scanner.scan(/%([A-Za-z_-]+) ?/)
48
+ if @scanner.scan(/%(#{IDENTIFIER}) ?/)
49
49
  directive = @scanner[1]
50
- arguments = []
51
- until @scanner.check(/\n/)
50
+ arguments = scan_first_directive_arguments
51
+
52
+ tokens << [:directive, directive, arguments]
53
+ end
54
+ end
55
+
56
+ # Scan the arguments for a directive. It keeps attempting to
57
+ # scan arguments until the first newline that was not in a
58
+ # block. Arguments can be blocks, carets, or text; blocks are
59
+ # encapsulated with `{` and `}`, carets are encapsulated with
60
+ # `<` and `>`, and text is encapsulated with quotes or
61
+ # nothing.
62
+ #
63
+ # @return [Array<Argument>]
64
+ def scan_first_directive_arguments
65
+ arguments = []
66
+ until @scanner.check(/\n/)
67
+ if @scanner.scan(/\{/)
68
+ argument =
69
+ Argument.new(:block, _scan_block[1..-2])
70
+ elsif @scanner.scan(/</)
71
+ @scanner.scan(/((?:\\>|[^>])*)\>/)
72
+ argument =
73
+ Argument.new(:caret, @scanner[1])
74
+ else
52
75
  @scanner.scan(/#{VALUE}/x) or error!
53
- arguments.push(@scanner[2] || @scanner[3])
54
- @scanner.scan(/ */)
76
+ argument = Argument.new(:text,
77
+ @scanner[2] || @scanner[3])
55
78
  end
56
79
 
57
- tokens << [:directive, directive, arguments]
80
+ arguments.push(argument)
81
+ @scanner.scan(/ */)
58
82
  end
83
+
84
+ arguments
59
85
  end
60
86
  end
61
87
  end
@@ -45,7 +45,7 @@ module Antelope
45
45
  # @see #scan_second_rule_body
46
46
  # @see #error!
47
47
  def scan_second_rule
48
- if @scanner.check(/([a-z._-]+):/)
48
+ if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
49
49
  scan_second_rule_label or error!
50
50
  scan_second_rule_body
51
51
  true
@@ -57,8 +57,8 @@ module Antelope
57
57
  #
58
58
  # @return [Boolean] if it matched.
59
59
  def scan_second_rule_label
60
- if @scanner.scan(/([a-z._-]+): ?/)
61
- tokens << [:label, @scanner[1]]
60
+ if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
61
+ tokens << [:label, @scanner[1], @scanner[2]]
62
62
  end
63
63
  end
64
64
 
@@ -88,8 +88,8 @@ module Antelope
88
88
  #
89
89
  # @return [Boolean] if it matched.
90
90
  def scan_second_rule_part
91
- if @scanner.scan(/([A-Za-z._-]+)(?!\:|[A-Za-z._-])/)
92
- tokens << [:part, @scanner[1]]
91
+ if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?(?!\:|[A-Za-z._])/)
92
+ tokens << [:part, @scanner[1], @scanner[2]]
93
93
  end
94
94
  end
95
95
 
@@ -107,7 +107,7 @@ module Antelope
107
107
  #
108
108
  # @return [Boolean] if it matched.
109
109
  def scan_second_rule_prec
110
- if @scanner.scan(/%prec ([A-Za-z._-]+)/)
110
+ if @scanner.scan(/%prec (#{IDENTIFIER})/)
111
111
  tokens << [:prec, @scanner[1]]
112
112
  end
113
113
  end
@@ -139,17 +139,32 @@ module Antelope
139
139
  def _scan_block
140
140
  brack = 1
141
141
  body = "{"
142
+ scan_for = %r{
143
+ (
144
+ (?: " ( \\\\ | \\" | [^"] )* "? )
145
+ | (?: ' ( \\\\ | \\' | [^'] )* '? )
146
+ | (?: // .*? \n )
147
+ | (?: \# .*? \n )
148
+ | (?: /\* [\s\S]+? \*/ )
149
+ | (?: \} )
150
+ | (?: \{ )
151
+ )
152
+ }x
142
153
 
143
154
  until brack.zero?
144
- if part = @scanner.scan_until(/(\}|\{)/)
155
+ if part = @scanner.scan_until(scan_for)
145
156
  body << part
146
157
 
158
+
147
159
  if @scanner[1] == "}"
148
160
  brack -= 1
149
- else
161
+ elsif @scanner[1] == "{"
150
162
  brack += 1
151
163
  end
152
164
  else
165
+ if @scanner.scan(/(.+)/m)
166
+ @line += @scanner[1].count("\n")
167
+ end
153
168
  error!
154
169
  end
155
170
  end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require "strscan"
4
+ require "antelope/ace/scanner/argument"
4
5
  require "antelope/ace/scanner/first"
5
6
  require "antelope/ace/scanner/second"
6
7
  require "antelope/ace/scanner/third"
@@ -8,16 +9,19 @@ require "antelope/ace/scanner/third"
8
9
  module Antelope
9
10
  module Ace
10
11
 
11
- # Scans a given input. The input should be a properly formatted ACE file;
12
- # see the Ace module for more information. This scanner uses the
13
- # StringScanner class internally; see the ruby documentation for more on
14
- # that. This scanner seperates scanning into three seperate stages:
15
- # First, Second, and Third, for each section of the file, respectively.
12
+ # Scans a given input. The input should be a properly formatted
13
+ # ACE file; see the Ace module for more information. This scanner
14
+ # uses the StringScanner class internally; see the ruby
15
+ # documentation for more on that. This scanner seperates scanning
16
+ # into three seperate stages: First, Second, and Third, for each
17
+ # section of the file, respectively.
16
18
  #
17
19
  # @see Ace
18
20
  # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
19
21
  class Scanner
20
22
 
23
+ IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
24
+
21
25
  include First
22
26
  include Second
23
27
  include Third
@@ -32,30 +36,30 @@ module Antelope
32
36
  # @return [Array<Array<(Symbol, Object, ...)>>]
33
37
  attr_reader :tokens
34
38
 
35
- # The boundry between each section. Placed here to be easily modifiable.
36
- # **MUST** be a regular expression.
39
+ # The boundry between each section. Placed here to be easily.
40
+ # modifiable. **MUST** be a regular expression.
37
41
  #
38
42
  # @return [RegExp]
39
43
  CONTENT_BOUNDRY = /%%/
40
44
 
41
- # The value regular expression. It should match values; for example,
42
- # things quoted in strings or word letters without quotes. Must respond
43
- # to #to_s, since it is embedded within other regular expressions. The
44
- # regular expression should place the contents of the value in the
45
- # groups 2 or 3.
45
+ # The value regular expression. It should match values; for
46
+ # example, things quoted in strings or word letters without
47
+ # quotes. Must respond to #to_s, since it is embedded within
48
+ # other regular expressions. The regular expression should
49
+ # place the contents of the value in the groups 2 or 3.
46
50
  #
47
51
  # @return [#to_s]
48
52
  VALUE = %q{(?:
49
53
  (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
50
- | ([[:word:]]+)
54
+ | ([A-Za-z0-9_.<>*-]+)
51
55
  )}
52
56
 
53
57
  # Scans a file. It returns the tokens resulting from scanning.
54
58
  #
55
- # @param source [String] the source to scan. This should be compatible
56
- # with StringScanner.
57
- # @param name [String] the name of the source file. This is primarily
58
- # used in backtrace information.
59
+ # @param source [String] the source to scan. This should be
60
+ # compatible with StringScanner.
61
+ # @param name [String] the name of the source file. This is
62
+ # primarilyused in backtrace information.
59
63
  # @return [Array<Array<(Symbol, Object, ...)>>]
60
64
  # @see #tokens
61
65
  def self.scan(source, name = "(ace file)")
@@ -92,8 +96,15 @@ module Antelope
92
96
  start = [@scanner.pos - 8, 0].max
93
97
  stop = [@scanner.pos + 8, @scanner.string.length].min
94
98
  snip = @scanner.string[start..stop].strip.inspect
95
- char = @scanner.string[@scanner.pos].inspect
96
- new_line = "#{@source}:#{@line}:unexpected #{char} (near #{snip})"
99
+ char = @scanner.string[@scanner.pos]
100
+ char = if char
101
+ char.inspect
102
+ else
103
+ "EOF"
104
+ end
105
+
106
+ new_line = "#{@source}:#{@line}: unexpected #{char} " \
107
+ "(near #{snip})"
97
108
 
98
109
  raise e, e.message, [new_line, *e.backtrace]
99
110
  end
@@ -111,17 +122,12 @@ module Antelope
111
122
 
112
123
  private
113
124
 
114
- # Raises an error; first creates a small snippet to give the developer
115
- # some context.
125
+ # Raises an error.
116
126
  #
117
127
  # @raise [SyntaxError] always.
118
128
  # @return [void]
119
129
  def error!
120
- start = [@scanner.pos - 8, 0].max
121
- stop = [@scanner.pos + 8, @scanner.string.length].min
122
- snip = @scanner.string[start..stop].strip
123
- char = @scanner.string[@scanner.pos]
124
- raise SyntaxError, "invalid syntax"# near `#{snip.inspect}' (#{char.inspect})"
130
+ raise SyntaxError, "invalid syntax"
125
131
  end
126
132
  end
127
133
  end