antelope 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.rspec +3 -0
- data/.yardopts +4 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/antelope.gemspec +30 -0
- data/bin/antelope +24 -0
- data/examples/deterministic.ace +27 -0
- data/examples/deterministic.output +229 -0
- data/examples/example.ace +45 -0
- data/examples/example.output +610 -0
- data/examples/simple.ace +26 -0
- data/examples/simple.output +194 -0
- data/lib/antelope/ace/compiler.rb +290 -0
- data/lib/antelope/ace/errors.rb +27 -0
- data/lib/antelope/ace/grammar/generation.rb +47 -0
- data/lib/antelope/ace/grammar/loading.rb +51 -0
- data/lib/antelope/ace/grammar/presidence.rb +59 -0
- data/lib/antelope/ace/grammar/production.rb +47 -0
- data/lib/antelope/ace/grammar/productions.rb +119 -0
- data/lib/antelope/ace/grammar/terminals.rb +41 -0
- data/lib/antelope/ace/grammar.rb +59 -0
- data/lib/antelope/ace/presidence.rb +51 -0
- data/lib/antelope/ace/scanner/first.rb +61 -0
- data/lib/antelope/ace/scanner/second.rb +160 -0
- data/lib/antelope/ace/scanner/third.rb +25 -0
- data/lib/antelope/ace/scanner.rb +110 -0
- data/lib/antelope/ace/token/epsilon.rb +22 -0
- data/lib/antelope/ace/token/error.rb +24 -0
- data/lib/antelope/ace/token/nonterminal.rb +15 -0
- data/lib/antelope/ace/token/terminal.rb +15 -0
- data/lib/antelope/ace/token.rb +171 -0
- data/lib/antelope/ace.rb +50 -0
- data/lib/antelope/automaton.rb +36 -0
- data/lib/antelope/generation/conflictor/conflict.rb +7 -0
- data/lib/antelope/generation/conflictor.rb +45 -0
- data/lib/antelope/generation/constructor/first.rb +52 -0
- data/lib/antelope/generation/constructor/follow.rb +46 -0
- data/lib/antelope/generation/constructor/lookahead.rb +42 -0
- data/lib/antelope/generation/constructor/nullable.rb +40 -0
- data/lib/antelope/generation/constructor.rb +81 -0
- data/lib/antelope/generation/recognizer/rule.rb +93 -0
- data/lib/antelope/generation/recognizer/state.rb +56 -0
- data/lib/antelope/generation/recognizer.rb +152 -0
- data/lib/antelope/generation/tableizer.rb +80 -0
- data/lib/antelope/generation.rb +12 -0
- data/lib/antelope/generator/output.rb +30 -0
- data/lib/antelope/generator/ruby.rb +57 -0
- data/lib/antelope/generator/templates/output.erb +49 -0
- data/lib/antelope/generator/templates/ruby.erb +62 -0
- data/lib/antelope/generator.rb +84 -0
- data/lib/antelope/version.rb +4 -0
- data/lib/antelope.rb +9 -0
- data/spec/antelope/ace/compiler_spec.rb +50 -0
- data/spec/antelope/ace/scanner_spec.rb +27 -0
- data/spec/antelope/automaton_spec.rb +29 -0
- data/spec/spec_helper.rb +38 -0
- data/spec/support/benchmark_helper.rb +5 -0
- metadata +223 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Grammar
|
4
|
+
|
5
|
+
# Defines a production.
|
6
|
+
#
|
7
|
+
# @!attribute [rw] label
|
8
|
+
# The label (or left-hand side) of the production. This
|
9
|
+
# should be a nonterminal.
|
10
|
+
#
|
11
|
+
# @return [Symbol]
|
12
|
+
# @!attribute [rw] items
|
13
|
+
# The body (or right-hand side) of the production. This can
|
14
|
+
# be array of terminals and nonterminals.
|
15
|
+
#
|
16
|
+
# @return [Array<Token>]
|
17
|
+
# @!attribute [rw] block
|
18
|
+
# The block of code to be executed when the production's right
|
19
|
+
# hand side is reduced.
|
20
|
+
#
|
21
|
+
# @return [String]
|
22
|
+
# @!attribute [rw] prec
|
23
|
+
# The presidence declaration for the production.
|
24
|
+
#
|
25
|
+
# @return [Ace::Presidence]
|
26
|
+
# @!attribute [rw] id
|
27
|
+
# The ID of the production. The starting production always
|
28
|
+
# has an ID of 0.
|
29
|
+
#
|
30
|
+
# @return [Numeric]
|
31
|
+
class Production < Struct.new(:label, :items, :block, :prec, :id)
|
32
|
+
|
33
|
+
# Creates a new production from a hash. The hash's keys
|
34
|
+
# correspond to the attributes on this class.
|
35
|
+
#
|
36
|
+
# @param hash [Hash<(Symbol, Object)>]
|
37
|
+
def self.from_hash(hash)
|
38
|
+
new(hash[:label] || hash["label"],
|
39
|
+
hash[:items] || hash["items"],
|
40
|
+
hash[:block] || hash["block"],
|
41
|
+
hash[:prec] || hash["prec"],
|
42
|
+
hash[:id] || hash["id"])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Grammar
|
4
|
+
|
5
|
+
# Manages the productions of the grammar.
|
6
|
+
module Productions
|
7
|
+
|
8
|
+
# Returns a hash of all of the productions. The result is
|
9
|
+
# cached.
|
10
|
+
#
|
11
|
+
# @return [Hash<(Symbol, Array<Production>)>]
|
12
|
+
def productions
|
13
|
+
@_productions || generate_productions
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns all productions for all nonterminals, sorted by id.
|
17
|
+
#
|
18
|
+
# @return [Array<Production>]
|
19
|
+
def all_productions
|
20
|
+
productions.values.flatten.sort_by(&:id)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# Actually generates the productions. Uses the rules from the
|
26
|
+
# compiler to construct the productions. Makes two loops over
|
27
|
+
# the compiler's rules; the first to tell the grammar that the
|
28
|
+
# nonterminal does exist, and the second to actually construct
|
29
|
+
# the productions. The first loop is for {#find_token},
|
30
|
+
# because otherwise it wouldn't be able to return a
|
31
|
+
# nonterminal properly.
|
32
|
+
#
|
33
|
+
# @return [Hash<(Symbol, Array<Production>)>]
|
34
|
+
def generate_productions
|
35
|
+
@_productions = {}
|
36
|
+
|
37
|
+
@compiler.rules.each do |rule|
|
38
|
+
productions[rule[:label]] = []
|
39
|
+
end.each_with_index do |rule, id|
|
40
|
+
productions[rule[:label]] <<
|
41
|
+
generate_production_for(rule, id)
|
42
|
+
end
|
43
|
+
|
44
|
+
productions[:$start] = [default_production]
|
45
|
+
|
46
|
+
productions
|
47
|
+
end
|
48
|
+
|
49
|
+
# Generates a production for a given compiler rule. Converts
|
50
|
+
# the tokens in the set to their {Token} counterparts,
|
51
|
+
# and then sets the presidence for the production. If the
|
52
|
+
# presidence declaration from the compiler rule is empty,
|
53
|
+
# then it'll use the last terminal from the set to check for
|
54
|
+
# presidence; otherwise, it'll use the presidence declaration.
|
55
|
+
# This is to make sure that every production has a presidence
|
56
|
+
# declaration.
|
57
|
+
#
|
58
|
+
# @param rule [Hash] the compiler's rule.
|
59
|
+
# @param id [Numeric] the id for the production.
|
60
|
+
# @return [Production]
|
61
|
+
def generate_production_for(rule, id)
|
62
|
+
left = rule[:label]
|
63
|
+
items = rule[:set].map { |_| find_token(_) }
|
64
|
+
prec = if rule[:prec].empty?
|
65
|
+
items.select(&:terminal?).last
|
66
|
+
else
|
67
|
+
find_token(rule[:prec])
|
68
|
+
end
|
69
|
+
|
70
|
+
prec = presidence_for(prec)
|
71
|
+
|
72
|
+
Production.new(Token::Nonterminal.new(left), items,
|
73
|
+
rule[:block], prec, id + 1)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Creates the default production for the grammar. The left
|
77
|
+
# hand side of the production is the `:$start` symbol, with
|
78
|
+
# the right hand side being the first rule's left-hand side
|
79
|
+
# and the terminal `$`. This production is automagically
|
80
|
+
# given the last presidence, and an id of 0.
|
81
|
+
#
|
82
|
+
# @return [Production]
|
83
|
+
def default_production
|
84
|
+
Production.new(Token::Nonterminal.new(:$start), [
|
85
|
+
Token::Nonterminal.new(@compiler.rules.first[:label]),
|
86
|
+
Token::Terminal.new(:"$")
|
87
|
+
], "", presidence.last, 0)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Finds a token based on its corresponding symbol. First
|
91
|
+
# checks the productions, to see if it's a nonterminal; then,
|
92
|
+
# tries to find it in the terminals; otherwise, if the symbol
|
93
|
+
# is `error`, it returns a {Token::Error}; if the symbol is
|
94
|
+
# `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
|
95
|
+
# none of those, it raises an {UndefiendTokenError}.
|
96
|
+
#
|
97
|
+
# @raise [UndefinedTokenError] if the token doesn't exist.
|
98
|
+
# @param value [String, Symbol, #intern] the token's symbol to
|
99
|
+
# check.
|
100
|
+
# @return [Token]
|
101
|
+
def find_token(value)
|
102
|
+
value = value.intern
|
103
|
+
if productions.key?(value)
|
104
|
+
Token::Nonterminal.new(value)
|
105
|
+
elsif terminal = terminals.
|
106
|
+
find { |term| term.name == value }
|
107
|
+
terminal
|
108
|
+
elsif value == :error
|
109
|
+
Token::Error.new
|
110
|
+
elsif [:nothing, :ε].include?(value)
|
111
|
+
Token::Epsilon.new
|
112
|
+
else
|
113
|
+
raise UndefinedTokenError, "Could not find a token named #{value.inspect}"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Grammar
|
4
|
+
|
5
|
+
# Manages a list of the terminals in the grammar.
|
6
|
+
module Terminals
|
7
|
+
|
8
|
+
# A list of all terminals in the grammar. Checks the compiler
|
9
|
+
# options for terminals, and then returns an array of
|
10
|
+
# terminals. Caches the result.
|
11
|
+
#
|
12
|
+
# @return [Array<Token::Terminal>]
|
13
|
+
def terminals
|
14
|
+
@_terminals ||= begin
|
15
|
+
@compiler.options.fetch(:terminals, []).map do |v|
|
16
|
+
Token::Terminal.new(*v)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# A list of all nonterminals in the grammar.
|
22
|
+
#
|
23
|
+
# @return [Array<Symbol>]
|
24
|
+
# @see #productions
|
25
|
+
def nonterminals
|
26
|
+
@_nonterminals ||= productions.keys
|
27
|
+
end
|
28
|
+
|
29
|
+
# A list of all symbols in the grammar; includes both
|
30
|
+
# terminals and nonterminals.
|
31
|
+
#
|
32
|
+
# @return [Array<Token::Terminal, Symbol>]
|
33
|
+
# @see #terminals
|
34
|
+
# @see #nonterminals
|
35
|
+
def symbols
|
36
|
+
@_symbols ||= terminals + nonterminals
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require "antelope/ace/grammar/terminals"
|
2
|
+
require "antelope/ace/grammar/productions"
|
3
|
+
require "antelope/ace/grammar/presidence"
|
4
|
+
require "antelope/ace/grammar/loading"
|
5
|
+
require "antelope/ace/grammar/generation"
|
6
|
+
require "antelope/ace/grammar/production"
|
7
|
+
|
8
|
+
module Antelope
|
9
|
+
module Ace
|
10
|
+
|
11
|
+
# Defines a grammar from an Ace file. This handles setting up
|
12
|
+
# productions, loading from files, terminals, presidence, and
|
13
|
+
# generation.
|
14
|
+
class Grammar
|
15
|
+
|
16
|
+
include Terminals
|
17
|
+
include Productions
|
18
|
+
include Presidence
|
19
|
+
include Loading
|
20
|
+
include Grammar::Generation
|
21
|
+
|
22
|
+
# Used by a generation class; this is all the generated states
|
23
|
+
# of the grammar.
|
24
|
+
#
|
25
|
+
# @return [Set<Generation::Recognizer::State>]
|
26
|
+
# @see Generation::Recognizer
|
27
|
+
attr_accessor :states
|
28
|
+
|
29
|
+
# The name of the grammar. This is normally assumed from a file
|
30
|
+
# name.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
attr_accessor :name
|
34
|
+
|
35
|
+
# The output directory for the grammar. This is normally the
|
36
|
+
# same directory as the Ace file.
|
37
|
+
#
|
38
|
+
# @return [Pathname]
|
39
|
+
attr_accessor :output
|
40
|
+
|
41
|
+
# The compiler for the Ace file.
|
42
|
+
#
|
43
|
+
# @return [Compiler]
|
44
|
+
attr_reader :compiler
|
45
|
+
|
46
|
+
# Initialize.
|
47
|
+
#
|
48
|
+
# @param name [String]
|
49
|
+
# @param output [String] the output directory. Automagically
|
50
|
+
# turned into a Pathname.
|
51
|
+
# @param compiler [Compiler]
|
52
|
+
def initialize(name, output, compiler)
|
53
|
+
@name = name
|
54
|
+
@output = Pathname.new(output)
|
55
|
+
@compiler = compiler
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
|
4
|
+
# Defines a presidence. A presidence has a type, tokens, and a
|
5
|
+
# level.
|
6
|
+
class Presidence < Struct.new(:type, :tokens, :level)
|
7
|
+
|
8
|
+
# @!attribute [rw] type
|
9
|
+
# The type of presidence level. This should be one of
|
10
|
+
# `:left`, `:right`, or `:nonassoc`.
|
11
|
+
#
|
12
|
+
# @return [Symbol] the type.
|
13
|
+
# @!attribute [rw] tokens
|
14
|
+
# An set of tokens that are on this specific presidence
|
15
|
+
# level. The tokens are identified as symbols. The special
|
16
|
+
# symbol, `:_`, represents any token.
|
17
|
+
#
|
18
|
+
# @return [Set<Symbol>] the tokens on this level.
|
19
|
+
# @!attribute [rw] level
|
20
|
+
# The level we're on. The higher the level, the higher the
|
21
|
+
# presidence.
|
22
|
+
|
23
|
+
include Comparable
|
24
|
+
|
25
|
+
# Compares the other object to this object. If the other object
|
26
|
+
# isn't a {Presidence}, it returns nil. If the other
|
27
|
+
# presidence isn't on the same level as this one, then the
|
28
|
+
# levels are compared and the result of that is returned. If
|
29
|
+
# it is, however, the type is checked; if this presidence is
|
30
|
+
# left associative, then it returns 1 (it is greater than the
|
31
|
+
# other); if this presidence is right associative, then it
|
32
|
+
# returns -1 (it is less than the other); if this presidence is
|
33
|
+
# nonassociative, it returns 0 (it is equal to the other).
|
34
|
+
#
|
35
|
+
# @param other [Object] the object to compare to this one.
|
36
|
+
# @return [Numeric?]
|
37
|
+
def <=>(other)
|
38
|
+
return nil unless other.is_a? Presidence
|
39
|
+
if level != other.level
|
40
|
+
level <=> other.level
|
41
|
+
elsif type == :left
|
42
|
+
1
|
43
|
+
elsif type == :right
|
44
|
+
-1
|
45
|
+
else
|
46
|
+
0
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Scanner
|
4
|
+
|
5
|
+
# Scans the first section of the file. This contains directives and
|
6
|
+
# small blocks that can be copied directly into the body of the output.
|
7
|
+
# The blocks are formatted as `%{ ... %}`; however, the ending tag _must_
|
8
|
+
# be on its own line. The directive is formatted as `%<name> <value>`,
|
9
|
+
# with `<name>` being the key, and `<value>` being the value. The value
|
10
|
+
# can be a piece of straight-up text (no quotes), or it can be quoted.
|
11
|
+
# There can be any number of values to a directive.
|
12
|
+
module First
|
13
|
+
|
14
|
+
# Scans until the first content boundry. If it encounters anything but
|
15
|
+
# a block or a directive (or whitespace), it will raise an error.
|
16
|
+
#
|
17
|
+
# @raise [SyntaxError] if it encounters anything but whitespace, a
|
18
|
+
# block, or a directive.
|
19
|
+
# @return [void]
|
20
|
+
def scan_first_part
|
21
|
+
until @scanner.check(CONTENT_BOUNDRY)
|
22
|
+
scan_first_copy || scan_first_directive ||
|
23
|
+
scan_whitespace || error!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Scans for a block. It is called `copy` instead of `block` because
|
28
|
+
# contents of the block is _copied_ directly into the body.
|
29
|
+
#
|
30
|
+
# @return [Boolean] if it matched.
|
31
|
+
def scan_first_copy
|
32
|
+
if @scanner.scan(/%{([\s\S]+?)\n\s*%}/)
|
33
|
+
tokens << [:copy, @scanner[1]]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Scans a directive. A directive has one _name_, and any number of
|
38
|
+
# arguments. Every argument is a _value_. The name can be any
|
39
|
+
# combinations of alphabetical characters, underscores, and dashes;
|
40
|
+
# the value can be word characters, or a quote-delimited string.
|
41
|
+
# It emits a `:directive` token with the directive (Sring) as an
|
42
|
+
# argument, and the passed arguments (Array<String>).
|
43
|
+
#
|
44
|
+
# @return [Boolean] if it matched.
|
45
|
+
def scan_first_directive
|
46
|
+
if @scanner.scan(/%([A-Za-z_-]+) ?/)
|
47
|
+
directive = @scanner[1]
|
48
|
+
arguments = []
|
49
|
+
until @scanner.check(/\n/)
|
50
|
+
@scanner.scan(/#{VALUE}/x) or error!
|
51
|
+
arguments.push(@scanner[2] || @scanner[3])
|
52
|
+
@scanner.scan(/ */)
|
53
|
+
end
|
54
|
+
|
55
|
+
tokens << [:directive, directive, arguments]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Scanner
|
4
|
+
|
5
|
+
# Scans the second part of the file. The second part of the
|
6
|
+
# file _only_ contains productions (or rules). Rules have a
|
7
|
+
# label and a body; the label may be any lowercase alphabetical
|
8
|
+
# identifier followed by a colon; the body consists of "parts",
|
9
|
+
# an "or", a "prec", and/or a "block". The part may consist
|
10
|
+
# of any alphabetical characters. An or is just a vertical bar
|
11
|
+
# (`|`). A prec is a presidence declaraction, which is `%prec `
|
12
|
+
# followed by any alphabetical characters. A block is a `{`,
|
13
|
+
# followed by code, followed by a terminating `}`. Rules _may_
|
14
|
+
# be terminated by a semicolon, but this is optional.
|
15
|
+
module Second
|
16
|
+
|
17
|
+
# Scans the second part of the file. This should be from just
|
18
|
+
# before the first content boundry; if the scanner doesn't
|
19
|
+
# find a content boundry, it will error. It will then check
|
20
|
+
# for a rule.
|
21
|
+
#
|
22
|
+
# @raise [SyntaxError] if no content boundry was found, or if
|
23
|
+
# the scanner encounters anything but a rule or whitespace.
|
24
|
+
# @return [void]
|
25
|
+
# @see #scan_second_rule
|
26
|
+
# @see #scan_whitespace
|
27
|
+
# @see #error!
|
28
|
+
def scan_second_part
|
29
|
+
scanner.scan(CONTENT_BOUNDRY) or error!
|
30
|
+
tokens << [:second]
|
31
|
+
|
32
|
+
until @scanner.check(CONTENT_BOUNDRY)
|
33
|
+
scan_second_rule || scan_whitespace || error!
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Scans a rule. A rule consists of a label (the nonterminal
|
38
|
+
# the production is for), a body, and a block; and then,
|
39
|
+
# an optional semicolon.
|
40
|
+
#
|
41
|
+
# @return [Boolean] if it matched
|
42
|
+
# @see #scan_second_rule_label
|
43
|
+
# @see #scan_second_rule_body
|
44
|
+
# @see #error!
|
45
|
+
def scan_second_rule
|
46
|
+
if @scanner.check(/([a-z]+):/)
|
47
|
+
scan_second_rule_label or error!
|
48
|
+
scan_second_rule_body
|
49
|
+
true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# Scans the label for a rule. It should contain only lower
|
54
|
+
# case letters and a colon.
|
55
|
+
#
|
56
|
+
# @return [Boolean] if it matched.
|
57
|
+
def scan_second_rule_label
|
58
|
+
if @scanner.scan(/([a-z]+): ?/)
|
59
|
+
tokens << [:label, @scanner[1]]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# The body can contain parts, ors, precs, or blocks (or
|
64
|
+
# whitespaces). Scans all of them, and then attempts to
|
65
|
+
# scan a semicolon.
|
66
|
+
#
|
67
|
+
# @return [void]
|
68
|
+
# @see #scan_second_rule_part
|
69
|
+
# @see #scan_second_rule_or
|
70
|
+
# @see #scan_second_rule_prec
|
71
|
+
# @see #scan_second_rule_block
|
72
|
+
# @see #scan_whitespace
|
73
|
+
def scan_second_rule_body
|
74
|
+
body = true
|
75
|
+
while body
|
76
|
+
scan_second_rule_part || scan_second_rule_or ||
|
77
|
+
scan_second_rule_prec || scan_second_rule_block ||
|
78
|
+
scan_whitespace || (body = false)
|
79
|
+
end
|
80
|
+
@scanner.scan(/;/)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Attempts to scan a "part". A part is any series of
|
84
|
+
# alphabetical characters that are not followed by a
|
85
|
+
# colon.
|
86
|
+
#
|
87
|
+
# @return [Boolean] if it matched.
|
88
|
+
def scan_second_rule_part
|
89
|
+
if @scanner.scan(/([A-Za-z]+)(?!\:)/)
|
90
|
+
tokens << [:part, @scanner[1]]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Attempts to scan an "or". It's just a vertical bar.
|
95
|
+
#
|
96
|
+
# @return [Boolean] if it matched.
|
97
|
+
def scan_second_rule_or
|
98
|
+
if @scanner.scan(/\|/)
|
99
|
+
tokens << [:or]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Attempts to scan a presidence definition. A presidence
|
104
|
+
# definition is "%prec " followed by a terminal or nonterminal.
|
105
|
+
#
|
106
|
+
# @return [Boolean] if it matched.
|
107
|
+
def scan_second_rule_prec
|
108
|
+
if @scanner.scan(/%prec ([A-Za-z]+)/)
|
109
|
+
tokens << [:prec, @scanner[1]]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Attempts to scan a block. This correctly balances brackets;
|
114
|
+
# however, if a bracket is opened/closed within a string, it
|
115
|
+
# still counts that as a bracket that needs to be balanced.
|
116
|
+
# So, having extensive code within a block is not a good idea.
|
117
|
+
#
|
118
|
+
# @return [Boolean] if it matched.
|
119
|
+
def scan_second_rule_block
|
120
|
+
if @scanner.scan(/\{/)
|
121
|
+
tokens << [:block, _scan_block]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
# Scans the block; it scans until it encounters enough closing
|
128
|
+
# brackets to match the opening brackets. If it encounters
|
129
|
+
# an opening brackets, it increments the bracket counter by
|
130
|
+
# one; if it encounters a closing bracket, it decrements by
|
131
|
+
# one. It will error if it reaches the end before the
|
132
|
+
# brackets are fully closed.
|
133
|
+
#
|
134
|
+
# @return [String] the block's body.
|
135
|
+
# @raise [SyntaxError] if it reaches the end before the final
|
136
|
+
# bracket is closed.
|
137
|
+
def _scan_block
|
138
|
+
brack = 1
|
139
|
+
body = "{"
|
140
|
+
|
141
|
+
until brack.zero?
|
142
|
+
if part = @scanner.scan_until(/(\}|\{)/)
|
143
|
+
body << part
|
144
|
+
|
145
|
+
if @scanner[1] == "}"
|
146
|
+
brack -= 1
|
147
|
+
else
|
148
|
+
brack += 1
|
149
|
+
end
|
150
|
+
else
|
151
|
+
error!
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
body
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Scanner
|
4
|
+
|
5
|
+
# Scans the third part. Everything after the content
|
6
|
+
# boundry is copied directly into the output.
|
7
|
+
module Third
|
8
|
+
|
9
|
+
# Scans the third part. It should start with a content
|
10
|
+
# boundry; raises an error if it does not. It then scans
|
11
|
+
# until the end of the file.
|
12
|
+
#
|
13
|
+
# @raise [SyntaxError] if somehow there is no content
|
14
|
+
# boundry.
|
15
|
+
# @return [void]
|
16
|
+
def scan_third_part
|
17
|
+
@scanner.scan(CONTENT_BOUNDRY) or error!
|
18
|
+
|
19
|
+
tokens << [:third]
|
20
|
+
tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require "strscan"
|
2
|
+
require "antelope/ace/scanner/first"
|
3
|
+
require "antelope/ace/scanner/second"
|
4
|
+
require "antelope/ace/scanner/third"
|
5
|
+
|
6
|
+
module Antelope
|
7
|
+
module Ace
|
8
|
+
|
9
|
+
# Scans a given input. The input should be a properly formatted ACE file;
|
10
|
+
# see the Ace module for more information. This scanner uses the
|
11
|
+
# StringScanner class internally; see the ruby documentation for more on
|
12
|
+
# that. This scanner seperates scanning into three seperate stages:
|
13
|
+
# First, Second, and Third, for each section of the file, respectively.
|
14
|
+
#
|
15
|
+
# @see Ace
|
16
|
+
# @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
|
17
|
+
class Scanner
|
18
|
+
|
19
|
+
include First
|
20
|
+
include Second
|
21
|
+
include Third
|
22
|
+
|
23
|
+
# The string scanner that we're using to scan the string with.
|
24
|
+
#
|
25
|
+
# @return [StringScanner]
|
26
|
+
attr_reader :scanner
|
27
|
+
|
28
|
+
# An array of the tokens that the scanner scanned.
|
29
|
+
#
|
30
|
+
# @return [Array<Array<(Symbol, Object, ...)>>]
|
31
|
+
attr_reader :tokens
|
32
|
+
|
33
|
+
# The boundry between each section. Placed here to be easily modifiable.
|
34
|
+
# **MUST** be a regular expression.
|
35
|
+
#
|
36
|
+
# @return [RegExp]
|
37
|
+
CONTENT_BOUNDRY = /%%/
|
38
|
+
|
39
|
+
# The value regular expression. It should match values; for example,
|
40
|
+
# things quoted in strings or word letters without quotes. Must respond
|
41
|
+
# to #to_s, since it is embedded within other regular expressions. The
|
42
|
+
# regular expression should place the contents of the value in the
|
43
|
+
# groups 2 or 3.
|
44
|
+
#
|
45
|
+
# @return [#to_s]
|
46
|
+
VALUE = %q{(?:
|
47
|
+
(?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
|
48
|
+
| ([[:word:]]+)
|
49
|
+
)}
|
50
|
+
|
51
|
+
# Scans a file. It returns the tokens resulting from scanning.
|
52
|
+
#
|
53
|
+
# @param source [String] the source to scan. This should be compatible
|
54
|
+
# with StringScanner.
|
55
|
+
# @return [Array<Array<(Symbol, Object, ...)>>]
|
56
|
+
# @see #tokens
|
57
|
+
def self.scan(source)
|
58
|
+
new(source).scan_file
|
59
|
+
end
|
60
|
+
|
61
|
+
# Initialize the scanner with the input.
|
62
|
+
#
|
63
|
+
# @param input [String] The source to scan.
|
64
|
+
def initialize(input)
|
65
|
+
@scanner = StringScanner.new(input)
|
66
|
+
@tokens = []
|
67
|
+
end
|
68
|
+
|
69
|
+
# Scans the file in parts.
|
70
|
+
#
|
71
|
+
# @raise [SyntaxError] if the source is malformed in some way.
|
72
|
+
# @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
|
73
|
+
# were scanned in this file.
|
74
|
+
# @see #scan_first_part
|
75
|
+
# @see #scan_second_part
|
76
|
+
# @see #scan_third_part
|
77
|
+
# @see #tokens
|
78
|
+
def scan_file
|
79
|
+
scan_first_part
|
80
|
+
scan_second_part
|
81
|
+
scan_third_part
|
82
|
+
tokens
|
83
|
+
end
|
84
|
+
|
85
|
+
# Scans for whitespace. If the next character is whitespace, it
|
86
|
+
# will consume all whitespace until the next non-whitespace
|
87
|
+
# character.
|
88
|
+
#
|
89
|
+
# @return [Boolean] if any whitespace was matched.
|
90
|
+
def scan_whitespace
|
91
|
+
@scanner.scan(/\s+/)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# Raises an error; first creates a small snippet to give the developer
|
97
|
+
# some context.
|
98
|
+
#
|
99
|
+
# @raise [SyntaxError] always.
|
100
|
+
# @return [void]
|
101
|
+
def error!
|
102
|
+
start = [@scanner.pos - 8, 0].max
|
103
|
+
stop = [@scanner.pos + 8, @scanner.string.length].min
|
104
|
+
snip = @scanner.string[start..stop].strip
|
105
|
+
char = @scanner.string[@scanner.pos]
|
106
|
+
raise SyntaxError, "invalid syntax near `#{snip.inspect}' (#{char.inspect})"
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Antelope
|
2
|
+
module Ace
|
3
|
+
class Token
|
4
|
+
|
5
|
+
# Defines an epsilon token. An epsilon token represents
|
6
|
+
# nothing. This is used to say that a nonterminal can
|
7
|
+
# reduce to nothing.
|
8
|
+
class Epsilon < Token
|
9
|
+
# Initialize. Technically takes no arguments. Sets
|
10
|
+
# the name of the token to be `:epsilon`.
|
11
|
+
def initialize(*)
|
12
|
+
super :epsilon
|
13
|
+
end
|
14
|
+
|
15
|
+
# (see Token#epsilon?)
|
16
|
+
def epsilon?
|
17
|
+
true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|