antelope 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +3 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +29 -0
  8. data/Rakefile +2 -0
  9. data/antelope.gemspec +30 -0
  10. data/bin/antelope +24 -0
  11. data/examples/deterministic.ace +27 -0
  12. data/examples/deterministic.output +229 -0
  13. data/examples/example.ace +45 -0
  14. data/examples/example.output +610 -0
  15. data/examples/simple.ace +26 -0
  16. data/examples/simple.output +194 -0
  17. data/lib/antelope/ace/compiler.rb +290 -0
  18. data/lib/antelope/ace/errors.rb +27 -0
  19. data/lib/antelope/ace/grammar/generation.rb +47 -0
  20. data/lib/antelope/ace/grammar/loading.rb +51 -0
  21. data/lib/antelope/ace/grammar/presidence.rb +59 -0
  22. data/lib/antelope/ace/grammar/production.rb +47 -0
  23. data/lib/antelope/ace/grammar/productions.rb +119 -0
  24. data/lib/antelope/ace/grammar/terminals.rb +41 -0
  25. data/lib/antelope/ace/grammar.rb +59 -0
  26. data/lib/antelope/ace/presidence.rb +51 -0
  27. data/lib/antelope/ace/scanner/first.rb +61 -0
  28. data/lib/antelope/ace/scanner/second.rb +160 -0
  29. data/lib/antelope/ace/scanner/third.rb +25 -0
  30. data/lib/antelope/ace/scanner.rb +110 -0
  31. data/lib/antelope/ace/token/epsilon.rb +22 -0
  32. data/lib/antelope/ace/token/error.rb +24 -0
  33. data/lib/antelope/ace/token/nonterminal.rb +15 -0
  34. data/lib/antelope/ace/token/terminal.rb +15 -0
  35. data/lib/antelope/ace/token.rb +171 -0
  36. data/lib/antelope/ace.rb +50 -0
  37. data/lib/antelope/automaton.rb +36 -0
  38. data/lib/antelope/generation/conflictor/conflict.rb +7 -0
  39. data/lib/antelope/generation/conflictor.rb +45 -0
  40. data/lib/antelope/generation/constructor/first.rb +52 -0
  41. data/lib/antelope/generation/constructor/follow.rb +46 -0
  42. data/lib/antelope/generation/constructor/lookahead.rb +42 -0
  43. data/lib/antelope/generation/constructor/nullable.rb +40 -0
  44. data/lib/antelope/generation/constructor.rb +81 -0
  45. data/lib/antelope/generation/recognizer/rule.rb +93 -0
  46. data/lib/antelope/generation/recognizer/state.rb +56 -0
  47. data/lib/antelope/generation/recognizer.rb +152 -0
  48. data/lib/antelope/generation/tableizer.rb +80 -0
  49. data/lib/antelope/generation.rb +12 -0
  50. data/lib/antelope/generator/output.rb +30 -0
  51. data/lib/antelope/generator/ruby.rb +57 -0
  52. data/lib/antelope/generator/templates/output.erb +49 -0
  53. data/lib/antelope/generator/templates/ruby.erb +62 -0
  54. data/lib/antelope/generator.rb +84 -0
  55. data/lib/antelope/version.rb +4 -0
  56. data/lib/antelope.rb +9 -0
  57. data/spec/antelope/ace/compiler_spec.rb +50 -0
  58. data/spec/antelope/ace/scanner_spec.rb +27 -0
  59. data/spec/antelope/automaton_spec.rb +29 -0
  60. data/spec/spec_helper.rb +38 -0
  61. data/spec/support/benchmark_helper.rb +5 -0
  62. metadata +223 -0
@@ -0,0 +1,47 @@
1
+ module Antelope
2
+ module Ace
3
+ class Grammar
4
+
5
+ # Defines a production.
6
+ #
7
+ # @!attribute [rw] label
8
+ # The label (or left-hand side) of the production. This
9
+ # should be a nonterminal.
10
+ #
11
+ # @return [Symbol]
12
+ # @!attribute [rw] items
13
+ # The body (or right-hand side) of the production. This can
14
+ # be array of terminals and nonterminals.
15
+ #
16
+ # @return [Array<Token>]
17
+ # @!attribute [rw] block
18
+ # The block of code to be executed when the production's right
19
+ # hand side is reduced.
20
+ #
21
+ # @return [String]
22
+ # @!attribute [rw] prec
23
+ # The presidence declaration for the production.
24
+ #
25
+ # @return [Ace::Presidence]
26
+ # @!attribute [rw] id
27
+ # The ID of the production. The starting production always
28
+ # has an ID of 0.
29
+ #
30
+ # @return [Numeric]
31
+ class Production < Struct.new(:label, :items, :block, :prec, :id)
32
+
33
+ # Creates a new production from a hash. The hash's keys
34
+ # correspond to the attributes on this class.
35
+ #
36
+ # @param hash [Hash<(Symbol, Object)>]
37
+ def self.from_hash(hash)
38
+ new(hash[:label] || hash["label"],
39
+ hash[:items] || hash["items"],
40
+ hash[:block] || hash["block"],
41
+ hash[:prec] || hash["prec"],
42
+ hash[:id] || hash["id"])
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,119 @@
1
+ module Antelope
2
+ module Ace
3
+ class Grammar
4
+
5
+ # Manages the productions of the grammar.
6
+ module Productions
7
+
8
+ # Returns a hash of all of the productions. The result is
9
+ # cached.
10
+ #
11
+ # @return [Hash<(Symbol, Array<Production>)>]
12
+ def productions
13
+ @_productions || generate_productions
14
+ end
15
+
16
+ # Returns all productions for all nonterminals, sorted by id.
17
+ #
18
+ # @return [Array<Production>]
19
+ def all_productions
20
+ productions.values.flatten.sort_by(&:id)
21
+ end
22
+
23
+ private
24
+
25
+ # Actually generates the productions. Uses the rules from the
26
+ # compiler to construct the productions. Makes two loops over
27
+ # the compiler's rules; the first to tell the grammar that the
28
+ # nonterminal does exist, and the second to actually construct
29
+ # the productions. The first loop is for {#find_token},
30
+ # because otherwise it wouldn't be able to return a
31
+ # nonterminal properly.
32
+ #
33
+ # @return [Hash<(Symbol, Array<Production>)>]
34
+ def generate_productions
35
+ @_productions = {}
36
+
37
+ @compiler.rules.each do |rule|
38
+ productions[rule[:label]] = []
39
+ end.each_with_index do |rule, id|
40
+ productions[rule[:label]] <<
41
+ generate_production_for(rule, id)
42
+ end
43
+
44
+ productions[:$start] = [default_production]
45
+
46
+ productions
47
+ end
48
+
49
+ # Generates a production for a given compiler rule. Converts
50
+ # the tokens in the set to their {Token} counterparts,
51
+ # and then sets the presidence for the production. If the
52
+ # presidence declaration from the compiler rule is empty,
53
+ # then it'll use the last terminal from the set to check for
54
+ # presidence; otherwise, it'll use the presidence declaration.
55
+ # This is to make sure that every production has a presidence
56
+ # declaration.
57
+ #
58
+ # @param rule [Hash] the compiler's rule.
59
+ # @param id [Numeric] the id for the production.
60
+ # @return [Production]
61
+ def generate_production_for(rule, id)
62
+ left = rule[:label]
63
+ items = rule[:set].map { |_| find_token(_) }
64
+ prec = if rule[:prec].empty?
65
+ items.select(&:terminal?).last
66
+ else
67
+ find_token(rule[:prec])
68
+ end
69
+
70
+ prec = presidence_for(prec)
71
+
72
+ Production.new(Token::Nonterminal.new(left), items,
73
+ rule[:block], prec, id + 1)
74
+ end
75
+
76
+ # Creates the default production for the grammar. The left
77
+ # hand side of the production is the `:$start` symbol, with
78
+ # the right hand side being the first rule's left-hand side
79
+ # and the terminal `$`. This production is automagically
80
+ # given the last presidence, and an id of 0.
81
+ #
82
+ # @return [Production]
83
+ def default_production
84
+ Production.new(Token::Nonterminal.new(:$start), [
85
+ Token::Nonterminal.new(@compiler.rules.first[:label]),
86
+ Token::Terminal.new(:"$")
87
+ ], "", presidence.last, 0)
88
+ end
89
+
90
+ # Finds a token based on its corresponding symbol. First
91
+ # checks the productions, to see if it's a nonterminal; then,
92
+ # tries to find it in the terminals; otherwise, if the symbol
93
+ # is `error`, it returns a {Token::Error}; if the symbol is
94
+ # `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
95
+ # none of those, it raises an {UndefiendTokenError}.
96
+ #
97
+ # @raise [UndefinedTokenError] if the token doesn't exist.
98
+ # @param value [String, Symbol, #intern] the token's symbol to
99
+ # check.
100
+ # @return [Token]
101
+ def find_token(value)
102
+ value = value.intern
103
+ if productions.key?(value)
104
+ Token::Nonterminal.new(value)
105
+ elsif terminal = terminals.
106
+ find { |term| term.name == value }
107
+ terminal
108
+ elsif value == :error
109
+ Token::Error.new
110
+ elsif [:nothing, :ε].include?(value)
111
+ Token::Epsilon.new
112
+ else
113
+ raise UndefinedTokenError, "Could not find a token named #{value.inspect}"
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,41 @@
1
+ module Antelope
2
+ module Ace
3
+ class Grammar
4
+
5
+ # Manages a list of the terminals in the grammar.
6
+ module Terminals
7
+
8
+ # A list of all terminals in the grammar. Checks the compiler
9
+ # options for terminals, and then returns an array of
10
+ # terminals. Caches the result.
11
+ #
12
+ # @return [Array<Token::Terminal>]
13
+ def terminals
14
+ @_terminals ||= begin
15
+ @compiler.options.fetch(:terminals, []).map do |v|
16
+ Token::Terminal.new(*v)
17
+ end
18
+ end
19
+ end
20
+
21
+ # A list of all nonterminals in the grammar.
22
+ #
23
+ # @return [Array<Symbol>]
24
+ # @see #productions
25
+ def nonterminals
26
+ @_nonterminals ||= productions.keys
27
+ end
28
+
29
+ # A list of all symbols in the grammar; includes both
30
+ # terminals and nonterminals.
31
+ #
32
+ # @return [Array<Token::Terminal, Symbol>]
33
+ # @see #terminals
34
+ # @see #nonterminals
35
+ def symbols
36
+ @_symbols ||= terminals + nonterminals
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,59 @@
1
+ require "antelope/ace/grammar/terminals"
2
+ require "antelope/ace/grammar/productions"
3
+ require "antelope/ace/grammar/presidence"
4
+ require "antelope/ace/grammar/loading"
5
+ require "antelope/ace/grammar/generation"
6
+ require "antelope/ace/grammar/production"
7
+
8
+ module Antelope
9
+ module Ace
10
+
11
+ # Defines a grammar from an Ace file. This handles setting up
12
+ # productions, loading from files, terminals, presidence, and
13
+ # generation.
14
+ class Grammar
15
+
16
+ include Terminals
17
+ include Productions
18
+ include Presidence
19
+ include Loading
20
+ include Grammar::Generation
21
+
22
+ # Used by a generation class; this is all the generated states
23
+ # of the grammar.
24
+ #
25
+ # @return [Set<Generation::Recognizer::State>]
26
+ # @see Generation::Recognizer
27
+ attr_accessor :states
28
+
29
+ # The name of the grammar. This is normally assumed from a file
30
+ # name.
31
+ #
32
+ # @return [String]
33
+ attr_accessor :name
34
+
35
+ # The output directory for the grammar. This is normally the
36
+ # same directory as the Ace file.
37
+ #
38
+ # @return [Pathname]
39
+ attr_accessor :output
40
+
41
+ # The compiler for the Ace file.
42
+ #
43
+ # @return [Compiler]
44
+ attr_reader :compiler
45
+
46
+ # Initialize.
47
+ #
48
+ # @param name [String]
49
+ # @param output [String] the output directory. Automagically
50
+ # turned into a Pathname.
51
+ # @param compiler [Compiler]
52
+ def initialize(name, output, compiler)
53
+ @name = name
54
+ @output = Pathname.new(output)
55
+ @compiler = compiler
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,51 @@
1
+ module Antelope
2
+ module Ace
3
+
4
+ # Defines a presidence. A presidence has a type, tokens, and a
5
+ # level.
6
+ class Presidence < Struct.new(:type, :tokens, :level)
7
+
8
+ # @!attribute [rw] type
9
+ # The type of presidence level. This should be one of
10
+ # `:left`, `:right`, or `:nonassoc`.
11
+ #
12
+ # @return [Symbol] the type.
13
+ # @!attribute [rw] tokens
14
+ # An set of tokens that are on this specific presidence
15
+ # level. The tokens are identified as symbols. The special
16
+ # symbol, `:_`, represents any token.
17
+ #
18
+ # @return [Set<Symbol>] the tokens on this level.
19
+ # @!attribute [rw] level
20
+ # The level we're on. The higher the level, the higher the
21
+ # presidence.
22
+
23
+ include Comparable
24
+
25
+ # Compares the other object to this object. If the other object
26
+ # isn't a {Presidence}, it returns nil. If the other
27
+ # presidence isn't on the same level as this one, then the
28
+ # levels are compared and the result of that is returned. If
29
+ # it is, however, the type is checked; if this presidence is
30
+ # left associative, then it returns 1 (it is greater than the
31
+ # other); if this presidence is right associative, then it
32
+ # returns -1 (it is less than the other); if this presidence is
33
+ # nonassociative, it returns 0 (it is equal to the other).
34
+ #
35
+ # @param other [Object] the object to compare to this one.
36
+ # @return [Numeric?]
37
+ def <=>(other)
38
+ return nil unless other.is_a? Presidence
39
+ if level != other.level
40
+ level <=> other.level
41
+ elsif type == :left
42
+ 1
43
+ elsif type == :right
44
+ -1
45
+ else
46
+ 0
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,61 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Scans the first section of the file. This contains directives and
6
+ # small blocks that can be copied directly into the body of the output.
7
+ # The blocks are formatted as `%{ ... %}`; however, the ending tag _must_
8
+ # be on its own line. The directive is formatted as `%<name> <value>`,
9
+ # with `<name>` being the key, and `<value>` being the value. The value
10
+ # can be a piece of straight-up text (no quotes), or it can be quoted.
11
+ # There can be any number of values to a directive.
12
+ module First
13
+
14
+ # Scans until the first content boundry. If it encounters anything but
15
+ # a block or a directive (or whitespace), it will raise an error.
16
+ #
17
+ # @raise [SyntaxError] if it encounters anything but whitespace, a
18
+ # block, or a directive.
19
+ # @return [void]
20
+ def scan_first_part
21
+ until @scanner.check(CONTENT_BOUNDRY)
22
+ scan_first_copy || scan_first_directive ||
23
+ scan_whitespace || error!
24
+ end
25
+ end
26
+
27
+ # Scans for a block. It is called `copy` instead of `block` because
28
+ # contents of the block is _copied_ directly into the body.
29
+ #
30
+ # @return [Boolean] if it matched.
31
+ def scan_first_copy
32
+ if @scanner.scan(/%{([\s\S]+?)\n\s*%}/)
33
+ tokens << [:copy, @scanner[1]]
34
+ end
35
+ end
36
+
37
+ # Scans a directive. A directive has one _name_, and any number of
38
+ # arguments. Every argument is a _value_. The name can be any
39
+ # combinations of alphabetical characters, underscores, and dashes;
40
+ # the value can be word characters, or a quote-delimited string.
41
+ # It emits a `:directive` token with the directive (Sring) as an
42
+ # argument, and the passed arguments (Array<String>).
43
+ #
44
+ # @return [Boolean] if it matched.
45
+ def scan_first_directive
46
+ if @scanner.scan(/%([A-Za-z_-]+) ?/)
47
+ directive = @scanner[1]
48
+ arguments = []
49
+ until @scanner.check(/\n/)
50
+ @scanner.scan(/#{VALUE}/x) or error!
51
+ arguments.push(@scanner[2] || @scanner[3])
52
+ @scanner.scan(/ */)
53
+ end
54
+
55
+ tokens << [:directive, directive, arguments]
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,160 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Scans the second part of the file. The second part of the
6
+ # file _only_ contains productions (or rules). Rules have a
7
+ # label and a body; the label may be any lowercase alphabetical
8
+ # identifier followed by a colon; the body consists of "parts",
9
+ # an "or", a "prec", and/or a "block". The part may consist
10
+ # of any alphabetical characters. An or is just a vertical bar
11
+ # (`|`). A prec is a presidence declaraction, which is `%prec `
12
+ # followed by any alphabetical characters. A block is a `{`,
13
+ # followed by code, followed by a terminating `}`. Rules _may_
14
+ # be terminated by a semicolon, but this is optional.
15
+ module Second
16
+
17
+ # Scans the second part of the file. This should be from just
18
+ # before the first content boundry; if the scanner doesn't
19
+ # find a content boundry, it will error. It will then check
20
+ # for a rule.
21
+ #
22
+ # @raise [SyntaxError] if no content boundry was found, or if
23
+ # the scanner encounters anything but a rule or whitespace.
24
+ # @return [void]
25
+ # @see #scan_second_rule
26
+ # @see #scan_whitespace
27
+ # @see #error!
28
+ def scan_second_part
29
+ scanner.scan(CONTENT_BOUNDRY) or error!
30
+ tokens << [:second]
31
+
32
+ until @scanner.check(CONTENT_BOUNDRY)
33
+ scan_second_rule || scan_whitespace || error!
34
+ end
35
+ end
36
+
37
+ # Scans a rule. A rule consists of a label (the nonterminal
38
+ # the production is for), a body, and a block; and then,
39
+ # an optional semicolon.
40
+ #
41
+ # @return [Boolean] if it matched
42
+ # @see #scan_second_rule_label
43
+ # @see #scan_second_rule_body
44
+ # @see #error!
45
+ def scan_second_rule
46
+ if @scanner.check(/([a-z]+):/)
47
+ scan_second_rule_label or error!
48
+ scan_second_rule_body
49
+ true
50
+ end
51
+ end
52
+
53
+ # Scans the label for a rule. It should contain only lower
54
+ # case letters and a colon.
55
+ #
56
+ # @return [Boolean] if it matched.
57
+ def scan_second_rule_label
58
+ if @scanner.scan(/([a-z]+): ?/)
59
+ tokens << [:label, @scanner[1]]
60
+ end
61
+ end
62
+
63
+ # The body can contain parts, ors, precs, or blocks (or
64
+ # whitespaces). Scans all of them, and then attempts to
65
+ # scan a semicolon.
66
+ #
67
+ # @return [void]
68
+ # @see #scan_second_rule_part
69
+ # @see #scan_second_rule_or
70
+ # @see #scan_second_rule_prec
71
+ # @see #scan_second_rule_block
72
+ # @see #scan_whitespace
73
+ def scan_second_rule_body
74
+ body = true
75
+ while body
76
+ scan_second_rule_part || scan_second_rule_or ||
77
+ scan_second_rule_prec || scan_second_rule_block ||
78
+ scan_whitespace || (body = false)
79
+ end
80
+ @scanner.scan(/;/)
81
+ end
82
+
83
+ # Attempts to scan a "part". A part is any series of
84
+ # alphabetical characters that are not followed by a
85
+ # colon.
86
+ #
87
+ # @return [Boolean] if it matched.
88
+ def scan_second_rule_part
89
+ if @scanner.scan(/([A-Za-z]+)(?!\:)/)
90
+ tokens << [:part, @scanner[1]]
91
+ end
92
+ end
93
+
94
+ # Attempts to scan an "or". It's just a vertical bar.
95
+ #
96
+ # @return [Boolean] if it matched.
97
+ def scan_second_rule_or
98
+ if @scanner.scan(/\|/)
99
+ tokens << [:or]
100
+ end
101
+ end
102
+
103
+ # Attempts to scan a presidence definition. A presidence
104
+ # definition is "%prec " followed by a terminal or nonterminal.
105
+ #
106
+ # @return [Boolean] if it matched.
107
+ def scan_second_rule_prec
108
+ if @scanner.scan(/%prec ([A-Za-z]+)/)
109
+ tokens << [:prec, @scanner[1]]
110
+ end
111
+ end
112
+
113
+ # Attempts to scan a block. This correctly balances brackets;
114
+ # however, if a bracket is opened/closed within a string, it
115
+ # still counts that as a bracket that needs to be balanced.
116
+ # So, having extensive code within a block is not a good idea.
117
+ #
118
+ # @return [Boolean] if it matched.
119
+ def scan_second_rule_block
120
+ if @scanner.scan(/\{/)
121
+ tokens << [:block, _scan_block]
122
+ end
123
+ end
124
+
125
+ private
126
+
127
+ # Scans the block; it scans until it encounters enough closing
128
+ # brackets to match the opening brackets. If it encounters
129
+ # an opening brackets, it increments the bracket counter by
130
+ # one; if it encounters a closing bracket, it decrements by
131
+ # one. It will error if it reaches the end before the
132
+ # brackets are fully closed.
133
+ #
134
+ # @return [String] the block's body.
135
+ # @raise [SyntaxError] if it reaches the end before the final
136
+ # bracket is closed.
137
+ def _scan_block
138
+ brack = 1
139
+ body = "{"
140
+
141
+ until brack.zero?
142
+ if part = @scanner.scan_until(/(\}|\{)/)
143
+ body << part
144
+
145
+ if @scanner[1] == "}"
146
+ brack -= 1
147
+ else
148
+ brack += 1
149
+ end
150
+ else
151
+ error!
152
+ end
153
+ end
154
+
155
+ body
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,25 @@
1
+ module Antelope
2
+ module Ace
3
+ class Scanner
4
+
5
+ # Scans the third part. Everything after the content
6
+ # boundry is copied directly into the output.
7
+ module Third
8
+
9
+ # Scans the third part. It should start with a content
10
+ # boundry; raises an error if it does not. It then scans
11
+ # until the end of the file.
12
+ #
13
+ # @raise [SyntaxError] if somehow there is no content
14
+ # boundry.
15
+ # @return [void]
16
+ def scan_third_part
17
+ @scanner.scan(CONTENT_BOUNDRY) or error!
18
+
19
+ tokens << [:third]
20
+ tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,110 @@
1
+ require "strscan"
2
+ require "antelope/ace/scanner/first"
3
+ require "antelope/ace/scanner/second"
4
+ require "antelope/ace/scanner/third"
5
+
6
+ module Antelope
7
+ module Ace
8
+
9
+ # Scans a given input. The input should be a properly formatted ACE file;
10
+ # see the Ace module for more information. This scanner uses the
11
+ # StringScanner class internally; see the ruby documentation for more on
12
+ # that. This scanner seperates scanning into three seperate stages:
13
+ # First, Second, and Third, for each section of the file, respectively.
14
+ #
15
+ # @see Ace
16
+ # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
17
+ class Scanner
18
+
19
+ include First
20
+ include Second
21
+ include Third
22
+
23
+ # The string scanner that we're using to scan the string with.
24
+ #
25
+ # @return [StringScanner]
26
+ attr_reader :scanner
27
+
28
+ # An array of the tokens that the scanner scanned.
29
+ #
30
+ # @return [Array<Array<(Symbol, Object, ...)>>]
31
+ attr_reader :tokens
32
+
33
+ # The boundry between each section. Placed here to be easily modifiable.
34
+ # **MUST** be a regular expression.
35
+ #
36
+ # @return [RegExp]
37
+ CONTENT_BOUNDRY = /%%/
38
+
39
+ # The value regular expression. It should match values; for example,
40
+ # things quoted in strings or word letters without quotes. Must respond
41
+ # to #to_s, since it is embedded within other regular expressions. The
42
+ # regular expression should place the contents of the value in the
43
+ # groups 2 or 3.
44
+ #
45
+ # @return [#to_s]
46
+ VALUE = %q{(?:
47
+ (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
48
+ | ([[:word:]]+)
49
+ )}
50
+
51
+ # Scans a file. It returns the tokens resulting from scanning.
52
+ #
53
+ # @param source [String] the source to scan. This should be compatible
54
+ # with StringScanner.
55
+ # @return [Array<Array<(Symbol, Object, ...)>>]
56
+ # @see #tokens
57
+ def self.scan(source)
58
+ new(source).scan_file
59
+ end
60
+
61
+ # Initialize the scanner with the input.
62
+ #
63
+ # @param input [String] The source to scan.
64
+ def initialize(input)
65
+ @scanner = StringScanner.new(input)
66
+ @tokens = []
67
+ end
68
+
69
+ # Scans the file in parts.
70
+ #
71
+ # @raise [SyntaxError] if the source is malformed in some way.
72
+ # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
73
+ # were scanned in this file.
74
+ # @see #scan_first_part
75
+ # @see #scan_second_part
76
+ # @see #scan_third_part
77
+ # @see #tokens
78
+ def scan_file
79
+ scan_first_part
80
+ scan_second_part
81
+ scan_third_part
82
+ tokens
83
+ end
84
+
85
+ # Scans for whitespace. If the next character is whitespace, it
86
+ # will consume all whitespace until the next non-whitespace
87
+ # character.
88
+ #
89
+ # @return [Boolean] if any whitespace was matched.
90
+ def scan_whitespace
91
+ @scanner.scan(/\s+/)
92
+ end
93
+
94
+ private
95
+
96
+ # Raises an error; first creates a small snippet to give the developer
97
+ # some context.
98
+ #
99
+ # @raise [SyntaxError] always.
100
+ # @return [void]
101
+ def error!
102
+ start = [@scanner.pos - 8, 0].max
103
+ stop = [@scanner.pos + 8, @scanner.string.length].min
104
+ snip = @scanner.string[start..stop].strip
105
+ char = @scanner.string[@scanner.pos]
106
+ raise SyntaxError, "invalid syntax near `#{snip.inspect}' (#{char.inspect})"
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,22 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines an epsilon token. An epsilon token represents
6
+ # nothing. This is used to say that a nonterminal can
7
+ # reduce to nothing.
8
+ class Epsilon < Token
9
+ # Initialize. Technically takes no arguments. Sets
10
+ # the name of the token to be `:epsilon`.
11
+ def initialize(*)
12
+ super :epsilon
13
+ end
14
+
15
+ # (see Token#epsilon?)
16
+ def epsilon?
17
+ true
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end