antelope 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +3 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +29 -0
  8. data/Rakefile +2 -0
  9. data/antelope.gemspec +30 -0
  10. data/bin/antelope +24 -0
  11. data/examples/deterministic.ace +27 -0
  12. data/examples/deterministic.output +229 -0
  13. data/examples/example.ace +45 -0
  14. data/examples/example.output +610 -0
  15. data/examples/simple.ace +26 -0
  16. data/examples/simple.output +194 -0
  17. data/lib/antelope/ace/compiler.rb +290 -0
  18. data/lib/antelope/ace/errors.rb +27 -0
  19. data/lib/antelope/ace/grammar/generation.rb +47 -0
  20. data/lib/antelope/ace/grammar/loading.rb +51 -0
  21. data/lib/antelope/ace/grammar/presidence.rb +59 -0
  22. data/lib/antelope/ace/grammar/production.rb +47 -0
  23. data/lib/antelope/ace/grammar/productions.rb +119 -0
  24. data/lib/antelope/ace/grammar/terminals.rb +41 -0
  25. data/lib/antelope/ace/grammar.rb +59 -0
  26. data/lib/antelope/ace/presidence.rb +51 -0
  27. data/lib/antelope/ace/scanner/first.rb +61 -0
  28. data/lib/antelope/ace/scanner/second.rb +160 -0
  29. data/lib/antelope/ace/scanner/third.rb +25 -0
  30. data/lib/antelope/ace/scanner.rb +110 -0
  31. data/lib/antelope/ace/token/epsilon.rb +22 -0
  32. data/lib/antelope/ace/token/error.rb +24 -0
  33. data/lib/antelope/ace/token/nonterminal.rb +15 -0
  34. data/lib/antelope/ace/token/terminal.rb +15 -0
  35. data/lib/antelope/ace/token.rb +171 -0
  36. data/lib/antelope/ace.rb +50 -0
  37. data/lib/antelope/automaton.rb +36 -0
  38. data/lib/antelope/generation/conflictor/conflict.rb +7 -0
  39. data/lib/antelope/generation/conflictor.rb +45 -0
  40. data/lib/antelope/generation/constructor/first.rb +52 -0
  41. data/lib/antelope/generation/constructor/follow.rb +46 -0
  42. data/lib/antelope/generation/constructor/lookahead.rb +42 -0
  43. data/lib/antelope/generation/constructor/nullable.rb +40 -0
  44. data/lib/antelope/generation/constructor.rb +81 -0
  45. data/lib/antelope/generation/recognizer/rule.rb +93 -0
  46. data/lib/antelope/generation/recognizer/state.rb +56 -0
  47. data/lib/antelope/generation/recognizer.rb +152 -0
  48. data/lib/antelope/generation/tableizer.rb +80 -0
  49. data/lib/antelope/generation.rb +12 -0
  50. data/lib/antelope/generator/output.rb +30 -0
  51. data/lib/antelope/generator/ruby.rb +57 -0
  52. data/lib/antelope/generator/templates/output.erb +49 -0
  53. data/lib/antelope/generator/templates/ruby.erb +62 -0
  54. data/lib/antelope/generator.rb +84 -0
  55. data/lib/antelope/version.rb +4 -0
  56. data/lib/antelope.rb +9 -0
  57. data/spec/antelope/ace/compiler_spec.rb +50 -0
  58. data/spec/antelope/ace/scanner_spec.rb +27 -0
  59. data/spec/antelope/automaton_spec.rb +29 -0
  60. data/spec/spec_helper.rb +38 -0
  61. data/spec/support/benchmark_helper.rb +5 -0
  62. metadata +223 -0
@@ -0,0 +1,47 @@
1
+ module Antelope
2
+ module Ace
3
+ class Grammar
4
+
5
+ # Defines a production.
6
+ #
7
+ # @!attribute [rw] label
8
+ # The label (or left-hand side) of the production. This
9
+ # should be a nonterminal.
10
+ #
11
+ # @return [Symbol]
12
+ # @!attribute [rw] items
13
+ # The body (or right-hand side) of the production. This can
14
+ # be array of terminals and nonterminals.
15
+ #
16
+ # @return [Array<Token>]
17
+ # @!attribute [rw] block
18
+ # The block of code to be executed when the production's right
19
+ # hand side is reduced.
20
+ #
21
+ # @return [String]
22
+ # @!attribute [rw] prec
23
+ # The presidence declaration for the production.
24
+ #
25
+ # @return [Ace::Presidence]
26
+ # @!attribute [rw] id
27
+ # The ID of the production. The starting production always
28
+ # has an ID of 0.
29
+ #
30
+ # @return [Numeric]
31
+ class Production < Struct.new(:label, :items, :block, :prec, :id)
32
+
33
+ # Creates a new production from a hash. The hash's keys
34
+ # correspond to the attributes on this class.
35
+ #
36
+ # @param hash [Hash<(Symbol, Object)>]
37
+ def self.from_hash(hash)
38
+ new(hash[:label] || hash["label"],
39
+ hash[:items] || hash["items"],
40
+ hash[:block] || hash["block"],
41
+ hash[:prec] || hash["prec"],
42
+ hash[:id] || hash["id"])
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,119 @@
1
+ module Antelope
2
+ module Ace
3
+ class Grammar
4
+
5
+ # Manages the productions of the grammar.
6
+ module Productions
7
+
8
+ # Returns a hash of all of the productions. The result is
9
+ # cached.
10
+ #
11
+ # @return [Hash<(Symbol, Array<Production>)>]
12
+ def productions
13
+ @_productions || generate_productions
14
+ end
15
+
16
+ # Returns all productions for all nonterminals, sorted by id.
17
+ #
18
+ # @return [Array<Production>]
19
+ def all_productions
20
+ productions.values.flatten.sort_by(&:id)
21
+ end
22
+
23
+ private
24
+
25
+ # Actually generates the productions. Uses the rules from the
26
+ # compiler to construct the productions. Makes two loops over
27
+ # the compiler's rules; the first to tell the grammar that the
28
+ # nonterminal does exist, and the second to actually construct
29
+ # the productions. The first loop is for {#find_token},
30
+ # because otherwise it wouldn't be able to return a
31
+ # nonterminal properly.
32
+ #
33
+ # @return [Hash<(Symbol, Array<Production>)>]
34
+ def generate_productions
35
+ @_productions = {}
36
+
37
+ @compiler.rules.each do |rule|
38
+ productions[rule[:label]] = []
39
+ end.each_with_index do |rule, id|
40
+ productions[rule[:label]] <<
41
+ generate_production_for(rule, id)
42
+ end
43
+
44
+ productions[:$start] = [default_production]
45
+
46
+ productions
47
+ end
48
+
49
+ # Generates a production for a given compiler rule. Converts
50
+ # the tokens in the set to their {Token} counterparts,
51
+ # and then sets the presidence for the production. If the
52
+ # presidence declaration from the compiler rule is empty,
53
+ # then it'll use the last terminal from the set to check for
54
+ # presidence; otherwise, it'll use the presidence declaration.
55
+ # This is to make sure that every production has a presidence
56
+ # declaration.
57
+ #
58
+ # @param rule [Hash] the compiler's rule.
59
+ # @param id [Numeric] the id for the production.
60
+ # @return [Production]
61
+ def generate_production_for(rule, id)
62
+ left = rule[:label]
63
+ items = rule[:set].map { |_| find_token(_) }
64
+ prec = if rule[:prec].empty?
65
+ items.select(&:terminal?).last
66
+ else
67
+ find_token(rule[:prec])
68
+ end
69
+
70
+ prec = presidence_for(prec)
71
+
72
+ Production.new(Token::Nonterminal.new(left), items,
73
+ rule[:block], prec, id + 1)
74
+ end
75
+
76
+ # Creates the default production for the grammar. The left
77
+ # hand side of the production is the `:$start` symbol, with
78
+ # the right hand side being the first rule's left-hand side
79
+ # and the terminal `$`. This production is automagically
80
+ # given the last presidence, and an id of 0.
81
+ #
82
+ # @return [Production]
83
+ def default_production
84
+ Production.new(Token::Nonterminal.new(:$start), [
85
+ Token::Nonterminal.new(@compiler.rules.first[:label]),
86
+ Token::Terminal.new(:"$")
87
+ ], "", presidence.last, 0)
88
+ end
89
+
90
+ # Finds a token based on its corresponding symbol. First
91
+ # checks the productions, to see if it's a nonterminal; then,
92
+ # tries to find it in the terminals; otherwise, if the symbol
93
+ # is `error`, it returns a {Token::Error}; if the symbol is
94
+ # `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
95
+ # none of those, it raises an {UndefiendTokenError}.
96
+ #
97
+ # @raise [UndefinedTokenError] if the token doesn't exist.
98
+ # @param value [String, Symbol, #intern] the token's symbol to
99
+ # check.
100
+ # @return [Token]
101
+ def find_token(value)
102
+ value = value.intern
103
+ if productions.key?(value)
104
+ Token::Nonterminal.new(value)
105
+ elsif terminal = terminals.
106
+ find { |term| term.name == value }
107
+ terminal
108
+ elsif value == :error
109
+ Token::Error.new
110
+ elsif [:nothing, :ε].include?(value)
111
+ Token::Epsilon.new
112
+ else
113
+ raise UndefinedTokenError, "Could not find a token named #{value.inspect}"
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,41 @@
1
+ module Antelope
2
+ module Ace
3
+ class Grammar
4
+
5
+ # Manages a list of the terminals in the grammar.
6
+ module Terminals
7
+
8
+ # A list of all terminals in the grammar. Checks the compiler
9
+ # options for terminals, and then returns an array of
10
+ # terminals. Caches the result.
11
+ #
12
+ # @return [Array<Token::Terminal>]
13
+ def terminals
14
+ @_terminals ||= begin
15
+ @compiler.options.fetch(:terminals, []).map do |v|
16
+ Token::Terminal.new(*v)
17
+ end
18
+ end
19
+ end
20
+
21
+ # A list of all nonterminals in the grammar.
22
+ #
23
+ # @return [Array<Symbol>]
24
+ # @see #productions
25
+ def nonterminals
26
+ @_nonterminals ||= productions.keys
27
+ end
28
+
29
+ # A list of all symbols in the grammar; includes both
30
+ # terminals and nonterminals.
31
+ #
32
+ # @return [Array<Token::Terminal, Symbol>]
33
+ # @see #terminals
34
+ # @see #nonterminals
35
+ def symbols
36
+ @_symbols ||= terminals + nonterminals
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,59 @@
1
+ require "antelope/ace/grammar/terminals"
2
+ require "antelope/ace/grammar/productions"
3
+ require "antelope/ace/grammar/presidence"
4
+ require "antelope/ace/grammar/loading"
5
+ require "antelope/ace/grammar/generation"
6
+ require "antelope/ace/grammar/production"
7
+
8
+ module Antelope
9
+ module Ace
10
+
11
+ # Defines a grammar from an Ace file. This handles setting up
12
+ # productions, loading from files, terminals, presidence, and
13
+ # generation.
14
+ class Grammar
15
+
16
+ include Terminals
17
+ include Productions
18
+ include Presidence
19
+ include Loading
20
+ include Grammar::Generation
21
+
22
+ # Used by a generation class; this is all the generated states
23
+ # of the grammar.
24
+ #
25
+ # @return [Set<Generation::Recognizer::State>]
26
+ # @see Generation::Recognizer
27
+ attr_accessor :states
28
+
29
+ # The name of the grammar. This is normally assumed from a file
30
+ # name.
31
+ #
32
+ # @return [String]
33
+ attr_accessor :name
34
+
35
+ # The output directory for the grammar. This is normally the
36
+ # same directory as the Ace file.
37
+ #
38
+ # @return [Pathname]
39
+ attr_accessor :output
40
+
41
+ # The compiler for the Ace file.
42
+ #
43
+ # @return [Compiler]
44
+ attr_reader :compiler
45
+
46
+ # Initialize.
47
+ #
48
+ # @param name [String]
49
+ # @param output [String] the output directory. Automagically
50
+ # turned into a Pathname.
51
+ # @param compiler [Compiler]
52
+ def initialize(name, output, compiler)
53
+ @name = name
54
+ @output = Pathname.new(output)
55
+ @compiler = compiler
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,51 @@
1
+ module Antelope
2
+ module Ace
3
+
4
+ # Defines a presidence. A presidence has a type, tokens, and a
5
+ # level.
6
+ class Presidence < Struct.new(:type, :tokens, :level)
7
+
8
+ # @!attribute [rw] type
9
+ # The type of presidence level. This should be one of
10
+ # `:left`, `:right`, or `:nonassoc`.
11
+ #
12
+ # @return [Symbol] the type.
13
+ # @!attribute [rw] tokens
14
+ # An set of tokens that are on this specific presidence
15
+ # level. The tokens are identified as symbols. The special
16
+ # symbol, `:_`, represents any token.
17
+ #
18
+ # @return [Set<Symbol>] the tokens on this level.
19
+ # @!attribute [rw] level
20
+ # The level we're on. The higher the level, the higher the
21
+ # presidence.
22
+
23
+ include Comparable
24
+
25
+ # Compares the other object to this object. If the other object
26
+ # isn't a {Presidence}, it returns nil. If the other
27
+ # presidence isn't on the same level as this one, then the
28
+ # levels are compared and the result of that is returned. If
29
+ # it is, however, the type is checked; if this presidence is
30
+ # left associative, then it returns 1 (it is greater than the
31
+ # other); if this presidence is right associative, then it
32
+ # returns -1 (it is less than the other); if this presidence is
33
+ # nonassociative, it returns 0 (it is equal to the other).
34
+ #
35
+ # @param other [Object] the object to compare to this one.
36
+ # @return [Numeric?]
37
+ def <=>(other)
38
+ return nil unless other.is_a? Presidence
39
+ if level != other.level
40
+ level <=> other.level
41
+ elsif type == :left
42
+ 1
43
+ elsif type == :right
44
+ -1
45
+ else
46
+ 0
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,61 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Scans the first section of the file. This contains directives and
6
+ # small blocks that can be copied directly into the body of the output.
7
+ # The blocks are formatted as `%{ ... %}`; however, the ending tag _must_
8
+ # be on its own line. The directive is formatted as `%<name> <value>`,
9
+ # with `<name>` being the key, and `<value>` being the value. The value
10
+ # can be a piece of straight-up text (no quotes), or it can be quoted.
11
+ # There can be any number of values to a directive.
12
+ module First
13
+
14
+ # Scans until the first content boundry. If it encounters anything but
15
+ # a block or a directive (or whitespace), it will raise an error.
16
+ #
17
+ # @raise [SyntaxError] if it encounters anything but whitespace, a
18
+ # block, or a directive.
19
+ # @return [void]
20
+ def scan_first_part
21
+ until @scanner.check(CONTENT_BOUNDRY)
22
+ scan_first_copy || scan_first_directive ||
23
+ scan_whitespace || error!
24
+ end
25
+ end
26
+
27
+ # Scans for a block. It is called `copy` instead of `block` because
28
+ # contents of the block is _copied_ directly into the body.
29
+ #
30
+ # @return [Boolean] if it matched.
31
+ def scan_first_copy
32
+ if @scanner.scan(/%{([\s\S]+?)\n\s*%}/)
33
+ tokens << [:copy, @scanner[1]]
34
+ end
35
+ end
36
+
37
+ # Scans a directive. A directive has one _name_, and any number of
38
+ # arguments. Every argument is a _value_. The name can be any
39
+ # combinations of alphabetical characters, underscores, and dashes;
40
+ # the value can be word characters, or a quote-delimited string.
41
+ # It emits a `:directive` token with the directive (Sring) as an
42
+ # argument, and the passed arguments (Array<String>).
43
+ #
44
+ # @return [Boolean] if it matched.
45
+ def scan_first_directive
46
+ if @scanner.scan(/%([A-Za-z_-]+) ?/)
47
+ directive = @scanner[1]
48
+ arguments = []
49
+ until @scanner.check(/\n/)
50
+ @scanner.scan(/#{VALUE}/x) or error!
51
+ arguments.push(@scanner[2] || @scanner[3])
52
+ @scanner.scan(/ */)
53
+ end
54
+
55
+ tokens << [:directive, directive, arguments]
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,160 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Scans the second part of the file. The second part of the
6
+ # file _only_ contains productions (or rules). Rules have a
7
+ # label and a body; the label may be any lowercase alphabetical
8
+ # identifier followed by a colon; the body consists of "parts",
9
+ # an "or", a "prec", and/or a "block". The part may consist
10
+ # of any alphabetical characters. An or is just a vertical bar
11
+ # (`|`). A prec is a presidence declaraction, which is `%prec `
12
+ # followed by any alphabetical characters. A block is a `{`,
13
+ # followed by code, followed by a terminating `}`. Rules _may_
14
+ # be terminated by a semicolon, but this is optional.
15
+ module Second
16
+
17
+ # Scans the second part of the file. This should be from just
18
+ # before the first content boundry; if the scanner doesn't
19
+ # find a content boundry, it will error. It will then check
20
+ # for a rule.
21
+ #
22
+ # @raise [SyntaxError] if no content boundry was found, or if
23
+ # the scanner encounters anything but a rule or whitespace.
24
+ # @return [void]
25
+ # @see #scan_second_rule
26
+ # @see #scan_whitespace
27
+ # @see #error!
28
+ def scan_second_part
29
+ scanner.scan(CONTENT_BOUNDRY) or error!
30
+ tokens << [:second]
31
+
32
+ until @scanner.check(CONTENT_BOUNDRY)
33
+ scan_second_rule || scan_whitespace || error!
34
+ end
35
+ end
36
+
37
+ # Scans a rule. A rule consists of a label (the nonterminal
38
+ # the production is for), a body, and a block; and then,
39
+ # an optional semicolon.
40
+ #
41
+ # @return [Boolean] if it matched
42
+ # @see #scan_second_rule_label
43
+ # @see #scan_second_rule_body
44
+ # @see #error!
45
+ def scan_second_rule
46
+ if @scanner.check(/([a-z]+):/)
47
+ scan_second_rule_label or error!
48
+ scan_second_rule_body
49
+ true
50
+ end
51
+ end
52
+
53
+ # Scans the label for a rule. It should contain only lower
54
+ # case letters and a colon.
55
+ #
56
+ # @return [Boolean] if it matched.
57
+ def scan_second_rule_label
58
+ if @scanner.scan(/([a-z]+): ?/)
59
+ tokens << [:label, @scanner[1]]
60
+ end
61
+ end
62
+
63
+ # The body can contain parts, ors, precs, or blocks (or
64
+ # whitespaces). Scans all of them, and then attempts to
65
+ # scan a semicolon.
66
+ #
67
+ # @return [void]
68
+ # @see #scan_second_rule_part
69
+ # @see #scan_second_rule_or
70
+ # @see #scan_second_rule_prec
71
+ # @see #scan_second_rule_block
72
+ # @see #scan_whitespace
73
+ def scan_second_rule_body
74
+ body = true
75
+ while body
76
+ scan_second_rule_part || scan_second_rule_or ||
77
+ scan_second_rule_prec || scan_second_rule_block ||
78
+ scan_whitespace || (body = false)
79
+ end
80
+ @scanner.scan(/;/)
81
+ end
82
+
83
+ # Attempts to scan a "part". A part is any series of
84
+ # alphabetical characters that are not followed by a
85
+ # colon.
86
+ #
87
+ # @return [Boolean] if it matched.
88
+ def scan_second_rule_part
89
+ if @scanner.scan(/([A-Za-z]+)(?!\:)/)
90
+ tokens << [:part, @scanner[1]]
91
+ end
92
+ end
93
+
94
+ # Attempts to scan an "or". It's just a vertical bar.
95
+ #
96
+ # @return [Boolean] if it matched.
97
+ def scan_second_rule_or
98
+ if @scanner.scan(/\|/)
99
+ tokens << [:or]
100
+ end
101
+ end
102
+
103
+ # Attempts to scan a presidence definition. A presidence
104
+ # definition is "%prec " followed by a terminal or nonterminal.
105
+ #
106
+ # @return [Boolean] if it matched.
107
+ def scan_second_rule_prec
108
+ if @scanner.scan(/%prec ([A-Za-z]+)/)
109
+ tokens << [:prec, @scanner[1]]
110
+ end
111
+ end
112
+
113
+ # Attempts to scan a block. This correctly balances brackets;
114
+ # however, if a bracket is opened/closed within a string, it
115
+ # still counts that as a bracket that needs to be balanced.
116
+ # So, having extensive code within a block is not a good idea.
117
+ #
118
+ # @return [Boolean] if it matched.
119
+ def scan_second_rule_block
120
+ if @scanner.scan(/\{/)
121
+ tokens << [:block, _scan_block]
122
+ end
123
+ end
124
+
125
+ private
126
+
127
+ # Scans the block; it scans until it encounters enough closing
128
+ # brackets to match the opening brackets. If it encounters
129
+ # an opening brackets, it increments the bracket counter by
130
+ # one; if it encounters a closing bracket, it decrements by
131
+ # one. It will error if it reaches the end before the
132
+ # brackets are fully closed.
133
+ #
134
+ # @return [String] the block's body.
135
+ # @raise [SyntaxError] if it reaches the end before the final
136
+ # bracket is closed.
137
+ def _scan_block
138
+ brack = 1
139
+ body = "{"
140
+
141
+ until brack.zero?
142
+ if part = @scanner.scan_until(/(\}|\{)/)
143
+ body << part
144
+
145
+ if @scanner[1] == "}"
146
+ brack -= 1
147
+ else
148
+ brack += 1
149
+ end
150
+ else
151
+ error!
152
+ end
153
+ end
154
+
155
+ body
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,25 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Scans the third part. Everything after the content
6
+ # boundry is copied directly into the output.
7
+ module Third
8
+
9
+ # Scans the third part. It should start with a content
10
+ # boundry; raises an error if it does not. It then scans
11
+ # until the end of the file.
12
+ #
13
+ # @raise [SyntaxError] if somehow there is no content
14
+ # boundry.
15
+ # @return [void]
16
+ def scan_third_part
17
+ @scanner.scan(CONTENT_BOUNDRY) or error!
18
+
19
+ tokens << [:third]
20
+ tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,110 @@
1
+ require "strscan"
2
+ require "antelope/ace/scanner/first"
3
+ require "antelope/ace/scanner/second"
4
+ require "antelope/ace/scanner/third"
5
+
6
+ module Antelope
7
+ module Ace
8
+
9
+ # Scans a given input. The input should be a properly formatted ACE file;
10
+ # see the Ace module for more information. This scanner uses the
11
+ # StringScanner class internally; see the ruby documentation for more on
12
+ # that. This scanner seperates scanning into three seperate stages:
13
+ # First, Second, and Third, for each section of the file, respectively.
14
+ #
15
+ # @see Ace
16
+ # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
17
+ class Scanner
18
+
19
+ include First
20
+ include Second
21
+ include Third
22
+
23
+ # The string scanner that we're using to scan the string with.
24
+ #
25
+ # @return [StringScanner]
26
+ attr_reader :scanner
27
+
28
+ # An array of the tokens that the scanner scanned.
29
+ #
30
+ # @return [Array<Array<(Symbol, Object, ...)>>]
31
+ attr_reader :tokens
32
+
33
+ # The boundry between each section. Placed here to be easily modifiable.
34
+ # **MUST** be a regular expression.
35
+ #
36
+ # @return [RegExp]
37
+ CONTENT_BOUNDRY = /%%/
38
+
39
+ # The value regular expression. It should match values; for example,
40
+ # things quoted in strings or word letters without quotes. Must respond
41
+ # to #to_s, since it is embedded within other regular expressions. The
42
+ # regular expression should place the contents of the value in the
43
+ # groups 2 or 3.
44
+ #
45
+ # @return [#to_s]
46
+ VALUE = %q{(?:
47
+ (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
48
+ | ([[:word:]]+)
49
+ )}
50
+
51
+ # Scans a file. It returns the tokens resulting from scanning.
52
+ #
53
+ # @param source [String] the source to scan. This should be compatible
54
+ # with StringScanner.
55
+ # @return [Array<Array<(Symbol, Object, ...)>>]
56
+ # @see #tokens
57
+ def self.scan(source)
58
+ new(source).scan_file
59
+ end
60
+
61
+ # Initialize the scanner with the input.
62
+ #
63
+ # @param input [String] The source to scan.
64
+ def initialize(input)
65
+ @scanner = StringScanner.new(input)
66
+ @tokens = []
67
+ end
68
+
69
+ # Scans the file in parts.
70
+ #
71
+ # @raise [SyntaxError] if the source is malformed in some way.
72
+ # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
73
+ # were scanned in this file.
74
+ # @see #scan_first_part
75
+ # @see #scan_second_part
76
+ # @see #scan_third_part
77
+ # @see #tokens
78
+ def scan_file
79
+ scan_first_part
80
+ scan_second_part
81
+ scan_third_part
82
+ tokens
83
+ end
84
+
85
+ # Scans for whitespace. If the next character is whitespace, it
86
+ # will consume all whitespace until the next non-whitespace
87
+ # character.
88
+ #
89
+ # @return [Boolean] if any whitespace was matched.
90
+ def scan_whitespace
91
+ @scanner.scan(/\s+/)
92
+ end
93
+
94
+ private
95
+
96
+ # Raises an error; first creates a small snippet to give the developer
97
+ # some context.
98
+ #
99
+ # @raise [SyntaxError] always.
100
+ # @return [void]
101
+ def error!
102
+ start = [@scanner.pos - 8, 0].max
103
+ stop = [@scanner.pos + 8, @scanner.string.length].min
104
+ snip = @scanner.string[start..stop].strip
105
+ char = @scanner.string[@scanner.pos]
106
+ raise SyntaxError, "invalid syntax near `#{snip.inspect}' (#{char.inspect})"
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,22 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines an epsilon token. An epsilon token represents
6
+ # nothing. This is used to say that a nonterminal can
7
+ # reduce to nothing.
8
+ class Epsilon < Token
9
+ # Initialize. Technically takes no arguments. Sets
10
+ # the name of the token to be `:epsilon`.
11
+ def initialize(*)
12
+ super :epsilon
13
+ end
14
+
15
+ # (see Token#epsilon?)
16
+ def epsilon?
17
+ true
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end