antelope 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.rspec +3 -0
- data/.yardopts +4 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/antelope.gemspec +30 -0
- data/bin/antelope +24 -0
- data/examples/deterministic.ace +27 -0
- data/examples/deterministic.output +229 -0
- data/examples/example.ace +45 -0
- data/examples/example.output +610 -0
- data/examples/simple.ace +26 -0
- data/examples/simple.output +194 -0
- data/lib/antelope/ace/compiler.rb +290 -0
- data/lib/antelope/ace/errors.rb +27 -0
- data/lib/antelope/ace/grammar/generation.rb +47 -0
- data/lib/antelope/ace/grammar/loading.rb +51 -0
- data/lib/antelope/ace/grammar/presidence.rb +59 -0
- data/lib/antelope/ace/grammar/production.rb +47 -0
- data/lib/antelope/ace/grammar/productions.rb +119 -0
- data/lib/antelope/ace/grammar/terminals.rb +41 -0
- data/lib/antelope/ace/grammar.rb +59 -0
- data/lib/antelope/ace/presidence.rb +51 -0
- data/lib/antelope/ace/scanner/first.rb +61 -0
- data/lib/antelope/ace/scanner/second.rb +160 -0
- data/lib/antelope/ace/scanner/third.rb +25 -0
- data/lib/antelope/ace/scanner.rb +110 -0
- data/lib/antelope/ace/token/epsilon.rb +22 -0
- data/lib/antelope/ace/token/error.rb +24 -0
- data/lib/antelope/ace/token/nonterminal.rb +15 -0
- data/lib/antelope/ace/token/terminal.rb +15 -0
- data/lib/antelope/ace/token.rb +171 -0
- data/lib/antelope/ace.rb +50 -0
- data/lib/antelope/automaton.rb +36 -0
- data/lib/antelope/generation/conflictor/conflict.rb +7 -0
- data/lib/antelope/generation/conflictor.rb +45 -0
- data/lib/antelope/generation/constructor/first.rb +52 -0
- data/lib/antelope/generation/constructor/follow.rb +46 -0
- data/lib/antelope/generation/constructor/lookahead.rb +42 -0
- data/lib/antelope/generation/constructor/nullable.rb +40 -0
- data/lib/antelope/generation/constructor.rb +81 -0
- data/lib/antelope/generation/recognizer/rule.rb +93 -0
- data/lib/antelope/generation/recognizer/state.rb +56 -0
- data/lib/antelope/generation/recognizer.rb +152 -0
- data/lib/antelope/generation/tableizer.rb +80 -0
- data/lib/antelope/generation.rb +12 -0
- data/lib/antelope/generator/output.rb +30 -0
- data/lib/antelope/generator/ruby.rb +57 -0
- data/lib/antelope/generator/templates/output.erb +49 -0
- data/lib/antelope/generator/templates/ruby.erb +62 -0
- data/lib/antelope/generator.rb +84 -0
- data/lib/antelope/version.rb +4 -0
- data/lib/antelope.rb +9 -0
- data/spec/antelope/ace/compiler_spec.rb +50 -0
- data/spec/antelope/ace/scanner_spec.rb +27 -0
- data/spec/antelope/automaton_spec.rb +29 -0
- data/spec/spec_helper.rb +38 -0
- data/spec/support/benchmark_helper.rb +5 -0
- metadata +223 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Grammar
|
4
|
+
|
5
|
+
# Defines a production.
|
6
|
+
#
|
7
|
+
# @!attribute [rw] label
|
8
|
+
# The label (or left-hand side) of the production. This
|
9
|
+
# should be a nonterminal.
|
10
|
+
#
|
11
|
+
# @return [Symbol]
|
12
|
+
# @!attribute [rw] items
|
13
|
+
# The body (or right-hand side) of the production. This can
|
14
|
+
# be array of terminals and nonterminals.
|
15
|
+
#
|
16
|
+
# @return [Array<Token>]
|
17
|
+
# @!attribute [rw] block
|
18
|
+
# The block of code to be executed when the production's right
|
19
|
+
# hand side is reduced.
|
20
|
+
#
|
21
|
+
# @return [String]
|
22
|
+
# @!attribute [rw] prec
|
23
|
+
# The presidence declaration for the production.
|
24
|
+
#
|
25
|
+
# @return [Ace::Presidence]
|
26
|
+
# @!attribute [rw] id
|
27
|
+
# The ID of the production. The starting production always
|
28
|
+
# has an ID of 0.
|
29
|
+
#
|
30
|
+
# @return [Numeric]
|
31
|
+
class Production < Struct.new(:label, :items, :block, :prec, :id)
|
32
|
+
|
33
|
+
# Creates a new production from a hash. The hash's keys
|
34
|
+
# correspond to the attributes on this class.
|
35
|
+
#
|
36
|
+
# @param hash [Hash<(Symbol, Object)>]
|
37
|
+
def self.from_hash(hash)
|
38
|
+
new(hash[:label] || hash["label"],
|
39
|
+
hash[:items] || hash["items"],
|
40
|
+
hash[:block] || hash["block"],
|
41
|
+
hash[:prec] || hash["prec"],
|
42
|
+
hash[:id] || hash["id"])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Grammar
|
4
|
+
|
5
|
+
# Manages the productions of the grammar.
|
6
|
+
module Productions
|
7
|
+
|
8
|
+
# Returns a hash of all of the productions. The result is
|
9
|
+
# cached.
|
10
|
+
#
|
11
|
+
# @return [Hash<(Symbol, Array<Production>)>]
|
12
|
+
def productions
|
13
|
+
@_productions || generate_productions
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns all productions for all nonterminals, sorted by id.
|
17
|
+
#
|
18
|
+
# @return [Array<Production>]
|
19
|
+
def all_productions
|
20
|
+
productions.values.flatten.sort_by(&:id)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# Actually generates the productions. Uses the rules from the
|
26
|
+
# compiler to construct the productions. Makes two loops over
|
27
|
+
# the compiler's rules; the first to tell the grammar that the
|
28
|
+
# nonterminal does exist, and the second to actually construct
|
29
|
+
# the productions. The first loop is for {#find_token},
|
30
|
+
# because otherwise it wouldn't be able to return a
|
31
|
+
# nonterminal properly.
|
32
|
+
#
|
33
|
+
# @return [Hash<(Symbol, Array<Production>)>]
|
34
|
+
def generate_productions
|
35
|
+
@_productions = {}
|
36
|
+
|
37
|
+
@compiler.rules.each do |rule|
|
38
|
+
productions[rule[:label]] = []
|
39
|
+
end.each_with_index do |rule, id|
|
40
|
+
productions[rule[:label]] <<
|
41
|
+
generate_production_for(rule, id)
|
42
|
+
end
|
43
|
+
|
44
|
+
productions[:$start] = [default_production]
|
45
|
+
|
46
|
+
productions
|
47
|
+
end
|
48
|
+
|
49
|
+
# Generates a production for a given compiler rule. Converts
|
50
|
+
# the tokens in the set to their {Token} counterparts,
|
51
|
+
# and then sets the presidence for the production. If the
|
52
|
+
# presidence declaration from the compiler rule is empty,
|
53
|
+
# then it'll use the last terminal from the set to check for
|
54
|
+
# presidence; otherwise, it'll use the presidence declaration.
|
55
|
+
# This is to make sure that every production has a presidence
|
56
|
+
# declaration.
|
57
|
+
#
|
58
|
+
# @param rule [Hash] the compiler's rule.
|
59
|
+
# @param id [Numeric] the id for the production.
|
60
|
+
# @return [Production]
|
61
|
+
def generate_production_for(rule, id)
|
62
|
+
left = rule[:label]
|
63
|
+
items = rule[:set].map { |_| find_token(_) }
|
64
|
+
prec = if rule[:prec].empty?
|
65
|
+
items.select(&:terminal?).last
|
66
|
+
else
|
67
|
+
find_token(rule[:prec])
|
68
|
+
end
|
69
|
+
|
70
|
+
prec = presidence_for(prec)
|
71
|
+
|
72
|
+
Production.new(Token::Nonterminal.new(left), items,
|
73
|
+
rule[:block], prec, id + 1)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Creates the default production for the grammar. The left
|
77
|
+
# hand side of the production is the `:$start` symbol, with
|
78
|
+
# the right hand side being the first rule's left-hand side
|
79
|
+
# and the terminal `$`. This production is automagically
|
80
|
+
# given the last presidence, and an id of 0.
|
81
|
+
#
|
82
|
+
# @return [Production]
|
83
|
+
def default_production
|
84
|
+
Production.new(Token::Nonterminal.new(:$start), [
|
85
|
+
Token::Nonterminal.new(@compiler.rules.first[:label]),
|
86
|
+
Token::Terminal.new(:"$")
|
87
|
+
], "", presidence.last, 0)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Finds a token based on its corresponding symbol. First
|
91
|
+
# checks the productions, to see if it's a nonterminal; then,
|
92
|
+
# tries to find it in the terminals; otherwise, if the symbol
|
93
|
+
# is `error`, it returns a {Token::Error}; if the symbol is
|
94
|
+
# `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
|
95
|
+
# none of those, it raises an {UndefiendTokenError}.
|
96
|
+
#
|
97
|
+
# @raise [UndefinedTokenError] if the token doesn't exist.
|
98
|
+
# @param value [String, Symbol, #intern] the token's symbol to
|
99
|
+
# check.
|
100
|
+
# @return [Token]
|
101
|
+
def find_token(value)
|
102
|
+
value = value.intern
|
103
|
+
if productions.key?(value)
|
104
|
+
Token::Nonterminal.new(value)
|
105
|
+
elsif terminal = terminals.
|
106
|
+
find { |term| term.name == value }
|
107
|
+
terminal
|
108
|
+
elsif value == :error
|
109
|
+
Token::Error.new
|
110
|
+
elsif [:nothing, :ε].include?(value)
|
111
|
+
Token::Epsilon.new
|
112
|
+
else
|
113
|
+
raise UndefinedTokenError, "Could not find a token named #{value.inspect}"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Grammar
|
4
|
+
|
5
|
+
# Manages a list of the terminals in the grammar.
|
6
|
+
module Terminals
|
7
|
+
|
8
|
+
# A list of all terminals in the grammar. Checks the compiler
|
9
|
+
# options for terminals, and then returns an array of
|
10
|
+
# terminals. Caches the result.
|
11
|
+
#
|
12
|
+
# @return [Array<Token::Terminal>]
|
13
|
+
def terminals
|
14
|
+
@_terminals ||= begin
|
15
|
+
@compiler.options.fetch(:terminals, []).map do |v|
|
16
|
+
Token::Terminal.new(*v)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# A list of all nonterminals in the grammar.
|
22
|
+
#
|
23
|
+
# @return [Array<Symbol>]
|
24
|
+
# @see #productions
|
25
|
+
def nonterminals
|
26
|
+
@_nonterminals ||= productions.keys
|
27
|
+
end
|
28
|
+
|
29
|
+
# A list of all symbols in the grammar; includes both
|
30
|
+
# terminals and nonterminals.
|
31
|
+
#
|
32
|
+
# @return [Array<Token::Terminal, Symbol>]
|
33
|
+
# @see #terminals
|
34
|
+
# @see #nonterminals
|
35
|
+
def symbols
|
36
|
+
@_symbols ||= terminals + nonterminals
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require "antelope/ace/grammar/terminals"
|
2
|
+
require "antelope/ace/grammar/productions"
|
3
|
+
require "antelope/ace/grammar/presidence"
|
4
|
+
require "antelope/ace/grammar/loading"
|
5
|
+
require "antelope/ace/grammar/generation"
|
6
|
+
require "antelope/ace/grammar/production"
|
7
|
+
|
8
|
+
module Antelope
|
9
|
+
module Ace
|
10
|
+
|
11
|
+
# Defines a grammar from an Ace file. This handles setting up
|
12
|
+
# productions, loading from files, terminals, presidence, and
|
13
|
+
# generation.
|
14
|
+
class Grammar
|
15
|
+
|
16
|
+
include Terminals
|
17
|
+
include Productions
|
18
|
+
include Presidence
|
19
|
+
include Loading
|
20
|
+
include Grammar::Generation
|
21
|
+
|
22
|
+
# Used by a generation class; this is all the generated states
|
23
|
+
# of the grammar.
|
24
|
+
#
|
25
|
+
# @return [Set<Generation::Recognizer::State>]
|
26
|
+
# @see Generation::Recognizer
|
27
|
+
attr_accessor :states
|
28
|
+
|
29
|
+
# The name of the grammar. This is normally assumed from a file
|
30
|
+
# name.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
attr_accessor :name
|
34
|
+
|
35
|
+
# The output directory for the grammar. This is normally the
|
36
|
+
# same directory as the Ace file.
|
37
|
+
#
|
38
|
+
# @return [Pathname]
|
39
|
+
attr_accessor :output
|
40
|
+
|
41
|
+
# The compiler for the Ace file.
|
42
|
+
#
|
43
|
+
# @return [Compiler]
|
44
|
+
attr_reader :compiler
|
45
|
+
|
46
|
+
# Initialize.
|
47
|
+
#
|
48
|
+
# @param name [String]
|
49
|
+
# @param output [String] the output directory. Automagically
|
50
|
+
# turned into a Pathname.
|
51
|
+
# @param compiler [Compiler]
|
52
|
+
def initialize(name, output, compiler)
|
53
|
+
@name = name
|
54
|
+
@output = Pathname.new(output)
|
55
|
+
@compiler = compiler
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
|
4
|
+
# Defines a presidence. A presidence has a type, tokens, and a
|
5
|
+
# level.
|
6
|
+
class Presidence < Struct.new(:type, :tokens, :level)
|
7
|
+
|
8
|
+
# @!attribute [rw] type
|
9
|
+
# The type of presidence level. This should be one of
|
10
|
+
# `:left`, `:right`, or `:nonassoc`.
|
11
|
+
#
|
12
|
+
# @return [Symbol] the type.
|
13
|
+
# @!attribute [rw] tokens
|
14
|
+
# An set of tokens that are on this specific presidence
|
15
|
+
# level. The tokens are identified as symbols. The special
|
16
|
+
# symbol, `:_`, represents any token.
|
17
|
+
#
|
18
|
+
# @return [Set<Symbol>] the tokens on this level.
|
19
|
+
# @!attribute [rw] level
|
20
|
+
# The level we're on. The higher the level, the higher the
|
21
|
+
# presidence.
|
22
|
+
|
23
|
+
include Comparable
|
24
|
+
|
25
|
+
# Compares the other object to this object. If the other object
|
26
|
+
# isn't a {Presidence}, it returns nil. If the other
|
27
|
+
# presidence isn't on the same level as this one, then the
|
28
|
+
# levels are compared and the result of that is returned. If
|
29
|
+
# it is, however, the type is checked; if this presidence is
|
30
|
+
# left associative, then it returns 1 (it is greater than the
|
31
|
+
# other); if this presidence is right associative, then it
|
32
|
+
# returns -1 (it is less than the other); if this presidence is
|
33
|
+
# nonassociative, it returns 0 (it is equal to the other).
|
34
|
+
#
|
35
|
+
# @param other [Object] the object to compare to this one.
|
36
|
+
# @return [Numeric?]
|
37
|
+
def <=>(other)
|
38
|
+
return nil unless other.is_a? Presidence
|
39
|
+
if level != other.level
|
40
|
+
level <=> other.level
|
41
|
+
elsif type == :left
|
42
|
+
1
|
43
|
+
elsif type == :right
|
44
|
+
-1
|
45
|
+
else
|
46
|
+
0
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Scanner
|
4
|
+
|
5
|
+
# Scans the first section of the file. This contains directives and
|
6
|
+
# small blocks that can be copied directly into the body of the output.
|
7
|
+
# The blocks are formatted as `%{ ... %}`; however, the ending tag _must_
|
8
|
+
# be on its own line. The directive is formatted as `%<name> <value>`,
|
9
|
+
# with `<name>` being the key, and `<value>` being the value. The value
|
10
|
+
# can be a piece of straight-up text (no quotes), or it can be quoted.
|
11
|
+
# There can be any number of values to a directive.
|
12
|
+
module First
|
13
|
+
|
14
|
+
# Scans until the first content boundry. If it encounters anything but
|
15
|
+
# a block or a directive (or whitespace), it will raise an error.
|
16
|
+
#
|
17
|
+
# @raise [SyntaxError] if it encounters anything but whitespace, a
|
18
|
+
# block, or a directive.
|
19
|
+
# @return [void]
|
20
|
+
def scan_first_part
|
21
|
+
until @scanner.check(CONTENT_BOUNDRY)
|
22
|
+
scan_first_copy || scan_first_directive ||
|
23
|
+
scan_whitespace || error!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Scans for a block. It is called `copy` instead of `block` because
|
28
|
+
# contents of the block is _copied_ directly into the body.
|
29
|
+
#
|
30
|
+
# @return [Boolean] if it matched.
|
31
|
+
def scan_first_copy
|
32
|
+
if @scanner.scan(/%{([\s\S]+?)\n\s*%}/)
|
33
|
+
tokens << [:copy, @scanner[1]]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Scans a directive. A directive has one _name_, and any number of
|
38
|
+
# arguments. Every argument is a _value_. The name can be any
|
39
|
+
# combinations of alphabetical characters, underscores, and dashes;
|
40
|
+
# the value can be word characters, or a quote-delimited string.
|
41
|
+
# It emits a `:directive` token with the directive (Sring) as an
|
42
|
+
# argument, and the passed arguments (Array<String>).
|
43
|
+
#
|
44
|
+
# @return [Boolean] if it matched.
|
45
|
+
def scan_first_directive
|
46
|
+
if @scanner.scan(/%([A-Za-z_-]+) ?/)
|
47
|
+
directive = @scanner[1]
|
48
|
+
arguments = []
|
49
|
+
until @scanner.check(/\n/)
|
50
|
+
@scanner.scan(/#{VALUE}/x) or error!
|
51
|
+
arguments.push(@scanner[2] || @scanner[3])
|
52
|
+
@scanner.scan(/ */)
|
53
|
+
end
|
54
|
+
|
55
|
+
tokens << [:directive, directive, arguments]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Scanner
|
4
|
+
|
5
|
+
# Scans the second part of the file. The second part of the
|
6
|
+
# file _only_ contains productions (or rules). Rules have a
|
7
|
+
# label and a body; the label may be any lowercase alphabetical
|
8
|
+
# identifier followed by a colon; the body consists of "parts",
|
9
|
+
# an "or", a "prec", and/or a "block". The part may consist
|
10
|
+
# of any alphabetical characters. An or is just a vertical bar
|
11
|
+
# (`|`). A prec is a presidence declaraction, which is `%prec `
|
12
|
+
# followed by any alphabetical characters. A block is a `{`,
|
13
|
+
# followed by code, followed by a terminating `}`. Rules _may_
|
14
|
+
# be terminated by a semicolon, but this is optional.
|
15
|
+
module Second
|
16
|
+
|
17
|
+
# Scans the second part of the file. This should be from just
|
18
|
+
# before the first content boundry; if the scanner doesn't
|
19
|
+
# find a content boundry, it will error. It will then check
|
20
|
+
# for a rule.
|
21
|
+
#
|
22
|
+
# @raise [SyntaxError] if no content boundry was found, or if
|
23
|
+
# the scanner encounters anything but a rule or whitespace.
|
24
|
+
# @return [void]
|
25
|
+
# @see #scan_second_rule
|
26
|
+
# @see #scan_whitespace
|
27
|
+
# @see #error!
|
28
|
+
def scan_second_part
|
29
|
+
scanner.scan(CONTENT_BOUNDRY) or error!
|
30
|
+
tokens << [:second]
|
31
|
+
|
32
|
+
until @scanner.check(CONTENT_BOUNDRY)
|
33
|
+
scan_second_rule || scan_whitespace || error!
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Scans a rule. A rule consists of a label (the nonterminal
|
38
|
+
# the production is for), a body, and a block; and then,
|
39
|
+
# an optional semicolon.
|
40
|
+
#
|
41
|
+
# @return [Boolean] if it matched
|
42
|
+
# @see #scan_second_rule_label
|
43
|
+
# @see #scan_second_rule_body
|
44
|
+
# @see #error!
|
45
|
+
def scan_second_rule
|
46
|
+
if @scanner.check(/([a-z]+):/)
|
47
|
+
scan_second_rule_label or error!
|
48
|
+
scan_second_rule_body
|
49
|
+
true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Scans the label for a rule. It should contain only lower
|
54
|
+
# case letters and a colon.
|
55
|
+
#
|
56
|
+
# @return [Boolean] if it matched.
|
57
|
+
def scan_second_rule_label
|
58
|
+
if @scanner.scan(/([a-z]+): ?/)
|
59
|
+
tokens << [:label, @scanner[1]]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# The body can contain parts, ors, precs, or blocks (or
|
64
|
+
# whitespaces). Scans all of them, and then attempts to
|
65
|
+
# scan a semicolon.
|
66
|
+
#
|
67
|
+
# @return [void]
|
68
|
+
# @see #scan_second_rule_part
|
69
|
+
# @see #scan_second_rule_or
|
70
|
+
# @see #scan_second_rule_prec
|
71
|
+
# @see #scan_second_rule_block
|
72
|
+
# @see #scan_whitespace
|
73
|
+
def scan_second_rule_body
|
74
|
+
body = true
|
75
|
+
while body
|
76
|
+
scan_second_rule_part || scan_second_rule_or ||
|
77
|
+
scan_second_rule_prec || scan_second_rule_block ||
|
78
|
+
scan_whitespace || (body = false)
|
79
|
+
end
|
80
|
+
@scanner.scan(/;/)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Attempts to scan a "part". A part is any series of
|
84
|
+
# alphabetical characters that are not followed by a
|
85
|
+
# colon.
|
86
|
+
#
|
87
|
+
# @return [Boolean] if it matched.
|
88
|
+
def scan_second_rule_part
|
89
|
+
if @scanner.scan(/([A-Za-z]+)(?!\:)/)
|
90
|
+
tokens << [:part, @scanner[1]]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Attempts to scan an "or". It's just a vertical bar.
|
95
|
+
#
|
96
|
+
# @return [Boolean] if it matched.
|
97
|
+
def scan_second_rule_or
|
98
|
+
if @scanner.scan(/\|/)
|
99
|
+
tokens << [:or]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Attempts to scan a presidence definition. A presidence
|
104
|
+
# definition is "%prec " followed by a terminal or nonterminal.
|
105
|
+
#
|
106
|
+
# @return [Boolean] if it matched.
|
107
|
+
def scan_second_rule_prec
|
108
|
+
if @scanner.scan(/%prec ([A-Za-z]+)/)
|
109
|
+
tokens << [:prec, @scanner[1]]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Attempts to scan a block. This correctly balances brackets;
|
114
|
+
# however, if a bracket is opened/closed within a string, it
|
115
|
+
# still counts that as a bracket that needs to be balanced.
|
116
|
+
# So, having extensive code within a block is not a good idea.
|
117
|
+
#
|
118
|
+
# @return [Boolean] if it matched.
|
119
|
+
def scan_second_rule_block
|
120
|
+
if @scanner.scan(/\{/)
|
121
|
+
tokens << [:block, _scan_block]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
# Scans the block; it scans until it encounters enough closing
|
128
|
+
# brackets to match the opening brackets. If it encounters
|
129
|
+
# an opening brackets, it increments the bracket counter by
|
130
|
+
# one; if it encounters a closing bracket, it decrements by
|
131
|
+
# one. It will error if it reaches the end before the
|
132
|
+
# brackets are fully closed.
|
133
|
+
#
|
134
|
+
# @return [String] the block's body.
|
135
|
+
# @raise [SyntaxError] if it reaches the end before the final
|
136
|
+
# bracket is closed.
|
137
|
+
def _scan_block
|
138
|
+
brack = 1
|
139
|
+
body = "{"
|
140
|
+
|
141
|
+
until brack.zero?
|
142
|
+
if part = @scanner.scan_until(/(\}|\{)/)
|
143
|
+
body << part
|
144
|
+
|
145
|
+
if @scanner[1] == "}"
|
146
|
+
brack -= 1
|
147
|
+
else
|
148
|
+
brack += 1
|
149
|
+
end
|
150
|
+
else
|
151
|
+
error!
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
body
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Scanner
|
4
|
+
|
5
|
+
# Scans the third part. Everything after the content
|
6
|
+
# boundry is copied directly into the output.
|
7
|
+
module Third
|
8
|
+
|
9
|
+
# Scans the third part. It should start with a content
|
10
|
+
# boundry; raises an error if it does not. It then scans
|
11
|
+
# until the end of the file.
|
12
|
+
#
|
13
|
+
# @raise [SyntaxError] if somehow there is no content
|
14
|
+
# boundry.
|
15
|
+
# @return [void]
|
16
|
+
def scan_third_part
|
17
|
+
@scanner.scan(CONTENT_BOUNDRY) or error!
|
18
|
+
|
19
|
+
tokens << [:third]
|
20
|
+
tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require "strscan"
|
2
|
+
require "antelope/ace/scanner/first"
|
3
|
+
require "antelope/ace/scanner/second"
|
4
|
+
require "antelope/ace/scanner/third"
|
5
|
+
|
6
|
+
module Antelope
|
7
|
+
module Ace
|
8
|
+
|
9
|
+
# Scans a given input. The input should be a properly formatted ACE file;
|
10
|
+
# see the Ace module for more information. This scanner uses the
|
11
|
+
# StringScanner class internally; see the ruby documentation for more on
|
12
|
+
# that. This scanner seperates scanning into three seperate stages:
|
13
|
+
# First, Second, and Third, for each section of the file, respectively.
|
14
|
+
#
|
15
|
+
# @see Ace
|
16
|
+
# @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
|
17
|
+
class Scanner
|
18
|
+
|
19
|
+
include First
|
20
|
+
include Second
|
21
|
+
include Third
|
22
|
+
|
23
|
+
# The string scanner that we're using to scan the string with.
|
24
|
+
#
|
25
|
+
# @return [StringScanner]
|
26
|
+
attr_reader :scanner
|
27
|
+
|
28
|
+
# An array of the tokens that the scanner scanned.
|
29
|
+
#
|
30
|
+
# @return [Array<Array<(Symbol, Object, ...)>>]
|
31
|
+
attr_reader :tokens
|
32
|
+
|
33
|
+
# The boundry between each section. Placed here to be easily modifiable.
|
34
|
+
# **MUST** be a regular expression.
|
35
|
+
#
|
36
|
+
# @return [RegExp]
|
37
|
+
CONTENT_BOUNDRY = /%%/
|
38
|
+
|
39
|
+
# The value regular expression. It should match values; for example,
|
40
|
+
# things quoted in strings or word letters without quotes. Must respond
|
41
|
+
# to #to_s, since it is embedded within other regular expressions. The
|
42
|
+
# regular expression should place the contents of the value in the
|
43
|
+
# groups 2 or 3.
|
44
|
+
#
|
45
|
+
# @return [#to_s]
|
46
|
+
VALUE = %q{(?:
|
47
|
+
(?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
|
48
|
+
| ([[:word:]]+)
|
49
|
+
)}
|
50
|
+
|
51
|
+
# Scans a file. It returns the tokens resulting from scanning.
|
52
|
+
#
|
53
|
+
# @param source [String] the source to scan. This should be compatible
|
54
|
+
# with StringScanner.
|
55
|
+
# @return [Array<Array<(Symbol, Object, ...)>>]
|
56
|
+
# @see #tokens
|
57
|
+
def self.scan(source)
|
58
|
+
new(source).scan_file
|
59
|
+
end
|
60
|
+
|
61
|
+
# Initialize the scanner with the input.
|
62
|
+
#
|
63
|
+
# @param input [String] The source to scan.
|
64
|
+
def initialize(input)
|
65
|
+
@scanner = StringScanner.new(input)
|
66
|
+
@tokens = []
|
67
|
+
end
|
68
|
+
|
69
|
+
# Scans the file in parts.
|
70
|
+
#
|
71
|
+
# @raise [SyntaxError] if the source is malformed in some way.
|
72
|
+
# @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
|
73
|
+
# were scanned in this file.
|
74
|
+
# @see #scan_first_part
|
75
|
+
# @see #scan_second_part
|
76
|
+
# @see #scan_third_part
|
77
|
+
# @see #tokens
|
78
|
+
def scan_file
|
79
|
+
scan_first_part
|
80
|
+
scan_second_part
|
81
|
+
scan_third_part
|
82
|
+
tokens
|
83
|
+
end
|
84
|
+
|
85
|
+
# Scans for whitespace. If the next character is whitespace, it
|
86
|
+
# will consume all whitespace until the next non-whitespace
|
87
|
+
# character.
|
88
|
+
#
|
89
|
+
# @return [Boolean] if any whitespace was matched.
|
90
|
+
def scan_whitespace
|
91
|
+
@scanner.scan(/\s+/)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# Raises an error; first creates a small snippet to give the developer
|
97
|
+
# some context.
|
98
|
+
#
|
99
|
+
# @raise [SyntaxError] always.
|
100
|
+
# @return [void]
|
101
|
+
def error!
|
102
|
+
start = [@scanner.pos - 8, 0].max
|
103
|
+
stop = [@scanner.pos + 8, @scanner.string.length].min
|
104
|
+
snip = @scanner.string[start..stop].strip
|
105
|
+
char = @scanner.string[@scanner.pos]
|
106
|
+
raise SyntaxError, "invalid syntax near `#{snip.inspect}' (#{char.inspect})"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Token
|
4
|
+
|
5
|
+
# Defines an epsilon token. An epsilon token represents
|
6
|
+
# nothing. This is used to say that a nonterminal can
|
7
|
+
# reduce to nothing.
|
8
|
+
class Epsilon < Token
|
9
|
+
# Initialize. Technically takes no arguments. Sets
|
10
|
+
# the name of the token to be `:epsilon`.
|
11
|
+
def initialize(*)
|
12
|
+
super :epsilon
|
13
|
+
end
|
14
|
+
|
15
|
+
# (see Token#epsilon?)
|
16
|
+
def epsilon?
|
17
|
+
true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|