antelope 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +25 -25
  3. data/.rspec +3 -3
  4. data/.travis.yml +10 -10
  5. data/.yardopts +7 -7
  6. data/CONTRIBUTING.md +50 -38
  7. data/GENERATORS.md +180 -124
  8. data/Gemfile +7 -7
  9. data/LICENSE.txt +22 -22
  10. data/README.md +240 -104
  11. data/Rakefile +2 -2
  12. data/TODO.md +58 -58
  13. data/antelope.gemspec +29 -28
  14. data/bin/antelope +7 -7
  15. data/examples/deterministic.ace +35 -35
  16. data/examples/example.ace +52 -51
  17. data/examples/example.ace.err +192 -192
  18. data/examples/example.ace.inf +432 -432
  19. data/examples/example.ate +70 -70
  20. data/examples/example.ate.err +192 -192
  21. data/examples/example.ate.inf +432 -432
  22. data/examples/liquidscript.ace +233 -233
  23. data/examples/simple.ace +22 -22
  24. data/lib/antelope/ace/compiler.rb +334 -334
  25. data/lib/antelope/ace/errors.rb +30 -30
  26. data/lib/antelope/ace/scanner/argument.rb +57 -57
  27. data/lib/antelope/ace/scanner/first.rb +89 -89
  28. data/lib/antelope/ace/scanner/second.rb +178 -178
  29. data/lib/antelope/ace/scanner/third.rb +27 -27
  30. data/lib/antelope/ace/scanner.rb +144 -144
  31. data/lib/antelope/ace.rb +47 -47
  32. data/lib/antelope/cli.rb +60 -60
  33. data/lib/antelope/errors.rb +25 -25
  34. data/lib/antelope/generation/constructor/first.rb +86 -86
  35. data/lib/antelope/generation/constructor/follow.rb +105 -105
  36. data/lib/antelope/generation/constructor/nullable.rb +64 -64
  37. data/lib/antelope/generation/constructor.rb +127 -127
  38. data/lib/antelope/generation/errors.rb +17 -17
  39. data/lib/antelope/generation/null.rb +13 -13
  40. data/lib/antelope/generation/recognizer/rule.rb +216 -216
  41. data/lib/antelope/generation/recognizer/state.rb +129 -129
  42. data/lib/antelope/generation/recognizer.rb +177 -177
  43. data/lib/antelope/generation/tableizer.rb +176 -176
  44. data/lib/antelope/generation.rb +15 -15
  45. data/lib/antelope/generator/base/coerce.rb +115 -0
  46. data/lib/antelope/generator/base/extra.rb +50 -0
  47. data/lib/antelope/generator/base.rb +134 -264
  48. data/lib/antelope/generator/c.rb +11 -11
  49. data/lib/antelope/generator/c_header.rb +105 -105
  50. data/lib/antelope/generator/c_source.rb +39 -39
  51. data/lib/antelope/generator/error.rb +34 -34
  52. data/lib/antelope/generator/group.rb +60 -57
  53. data/lib/antelope/generator/html.rb +51 -51
  54. data/lib/antelope/generator/info.rb +47 -47
  55. data/lib/antelope/generator/null.rb +18 -18
  56. data/lib/antelope/generator/output.rb +17 -17
  57. data/lib/antelope/generator/ruby.rb +112 -79
  58. data/lib/antelope/generator/templates/c_header.ant +36 -36
  59. data/lib/antelope/generator/templates/c_source.ant +202 -202
  60. data/lib/antelope/generator/templates/error.erb +40 -0
  61. data/lib/antelope/generator/templates/html/antelope.css +53 -1
  62. data/lib/antelope/generator/templates/html/antelope.html +82 -1
  63. data/lib/antelope/generator/templates/html/antelope.js +9 -1
  64. data/lib/antelope/generator/templates/html/css.ant +53 -53
  65. data/lib/antelope/generator/templates/html/html.ant +82 -82
  66. data/lib/antelope/generator/templates/html/js.ant +9 -9
  67. data/lib/antelope/generator/templates/info.erb +61 -0
  68. data/lib/antelope/generator/templates/{ruby.ant → ruby.erb} +171 -178
  69. data/lib/antelope/generator.rb +62 -66
  70. data/lib/antelope/grammar/generation.rb +76 -76
  71. data/lib/antelope/grammar/loading.rb +84 -84
  72. data/lib/antelope/grammar/precedence.rb +59 -59
  73. data/lib/antelope/grammar/precedences.rb +64 -64
  74. data/lib/antelope/grammar/production.rb +56 -56
  75. data/lib/antelope/grammar/productions.rb +154 -154
  76. data/lib/antelope/grammar/symbols.rb +64 -64
  77. data/lib/antelope/grammar/token/epsilon.rb +23 -23
  78. data/lib/antelope/grammar/token/error.rb +24 -24
  79. data/lib/antelope/grammar/token/nonterminal.rb +15 -15
  80. data/lib/antelope/grammar/token/terminal.rb +15 -15
  81. data/lib/antelope/grammar/token.rb +231 -231
  82. data/lib/antelope/grammar.rb +68 -68
  83. data/lib/antelope/version.rb +6 -6
  84. data/lib/antelope.rb +18 -19
  85. data/optimizations.txt +42 -42
  86. data/spec/antelope/ace/compiler_spec.rb +60 -60
  87. data/spec/antelope/ace/scanner_spec.rb +27 -27
  88. data/spec/antelope/generation/constructor_spec.rb +131 -131
  89. data/spec/fixtures/simple.ace +22 -22
  90. data/spec/spec_helper.rb +39 -39
  91. data/spec/support/benchmark_helper.rb +5 -5
  92. data/spec/support/grammar_helper.rb +14 -14
  93. data/subl/Ace (Ruby).JSON-tmLanguage +94 -94
  94. data/subl/Ace (Ruby).tmLanguage +153 -153
  95. metadata +22 -11
  96. data/lib/antelope/generator/templates/error.ant +0 -34
  97. data/lib/antelope/generator/templates/info.ant +0 -53
  98. data/lib/antelope/template/compiler.rb +0 -78
  99. data/lib/antelope/template/errors.rb +0 -9
  100. data/lib/antelope/template/scanner.rb +0 -109
  101. data/lib/antelope/template.rb +0 -64
  102. data/spec/antelope/template_spec.rb +0 -50
@@ -1,154 +1,154 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
-
6
- # Manages the productions of the grammar.
7
- module Productions
8
-
9
- # Returns a hash of all of the productions. The result is
10
- # cached.
11
- #
12
- # @return [Hash<(Symbol, Array<Production>)>]
13
- def productions
14
- @_productions || generate_productions
15
- end
16
-
17
- # Returns all productions for all nonterminals, sorted by id.
18
- #
19
- # @return [Array<Production>]
20
- def all_productions
21
- productions.values.flatten.sort_by(&:id)
22
- end
23
-
24
- # Finds a token based on its corresponding symbol. First
25
- # checks the productions, to see if it's a nonterminal; then,
26
- # tries to find it in the terminals; otherwise, if the symbol
27
- # is `error`, it returns a {Token::Error}; if the symbol is
28
- # `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
29
- # none of those, it raises an {UndefinedTokenError}.
30
- #
31
- # @raise [UndefinedTokenError] if the token doesn't exist.
32
- # @param value [String, Symbol, #intern] the token's symbol to
33
- # check.
34
- # @return [Token]
35
- def find_token(value)
36
- value = value.intern
37
-
38
- if productions.key?(value)
39
- typed_nonterminals.find { |term| term.name == value } ||
40
- Token::Nonterminal.new(value)
41
- elsif terminal = terminals.
42
- find { |term| term.name == value }
43
- terminal
44
- elsif value == :$error || value == :error
45
- Token::Error.new
46
- elsif [:nothing, :ε, :"%empty"].include?(value)
47
- Token::Epsilon.new
48
- else
49
- raise UndefinedTokenError, "Could not find a token " \
50
- "named #{value.inspect}"
51
- end
52
- end
53
-
54
- private
55
-
56
- # Actually generates the productions. Uses the rules from the
57
- # compiler to construct the productions. Makes two loops over
58
- # the compiler's rules; the first to tell the grammar that the
59
- # nonterminal does exist, and the second to actually construct
60
- # the productions. The first loop is for {#find_token},
61
- # because otherwise it wouldn't be able to return a
62
- # nonterminal properly.
63
- #
64
- # @return [Hash<(Symbol, Array<Production>)>]
65
- def generate_productions
66
- @_productions = {}
67
- index = 0
68
-
69
- rules = @compiler.rules.each do |rule|
70
- productions[rule[:label]] = []
71
- end
72
-
73
- while index < rules.size
74
- rule = rules[index]
75
- productions[rule[:label]] <<
76
- generate_production_for(rule, index)
77
- index += 1
78
- end
79
-
80
- productions[:$start] = [default_production]
81
-
82
- productions
83
- end
84
-
85
- # Generates a production for a given compiler rule. Converts
86
- # the tokens in the set to their {Token} counterparts,
87
- # and then sets the precedence for the production. If the
88
- # precedence declaration from the compiler rule is empty,
89
- # then it'll use the last terminal from the set to check for
90
- # precedence; otherwise, it'll use the precedence declaration.
91
- # This is to make sure that every production has a precedence
92
- # declaration.
93
- #
94
- # @param rule [Hash] the compiler's rule.
95
- # @param id [Numeric] the id for the production.
96
- # @return [Production]
97
- def generate_production_for(rule, id)
98
- left = Token::Nonterminal.new(rule[:label])
99
- items = rule[:set].map { |_| find_token(_[0]) }
100
- prec = if rule[:prec].empty?
101
- items.select(&:terminal?).first
102
- else
103
- rule[:prec].intern
104
- end
105
-
106
- prec = precedence_for(prec)
107
- left.type = type_for(rule[:label])
108
- left.id = rule[:label_id]
109
-
110
- rule[:set].each_with_index do |tok, i|
111
- items[i] = items[i].dup
112
- items[i].id = tok[1]
113
- end
114
- items.delete_if(&:epsilon?)
115
-
116
- Production.new(left, items, rule[:block], prec, id + 1)
117
- end
118
-
119
- # Returns the defined type for the given token name.
120
- # Uses the `%type` directive to infer the corresponding types.
121
- #
122
- # @param token [Symbol] the token to check for
123
- # types.
124
- def type_for(token)
125
- token = find_token(token) unless token.is_a?(Token)
126
-
127
- case token
128
- when Token::Nonterminal
129
- token.type
130
- when Token::Terminal
131
- token.type
132
- when Token::Epsilon
133
- ""
134
- when Token::Error
135
- ""
136
- end
137
- end
138
-
139
- # Creates the default production for the grammar. The left
140
- # hand side of the production is the `:$start` symbol, with
141
- # the right hand side being the first rule's left-hand side
142
- # and the terminal `$`. This production is automagically
143
- # given the last precedence, and an id of 0.
144
- #
145
- # @return [Production]
146
- def default_production
147
- Production.new(Token::Nonterminal.new(:$start), [
148
- Token::Nonterminal.new(@compiler.rules.first[:label]),
149
- Token::Terminal.new(:$end)
150
- ], "", precedence.last, 0)
151
- end
152
- end
153
- end
154
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+
6
+ # Manages the productions of the grammar.
7
+ module Productions
8
+
9
+ # Returns a hash of all of the productions. The result is
10
+ # cached.
11
+ #
12
+ # @return [Hash<(Symbol, Array<Production>)>]
13
+ def productions
14
+ @_productions || generate_productions
15
+ end
16
+
17
+ # Returns all productions for all nonterminals, sorted by id.
18
+ #
19
+ # @return [Array<Production>]
20
+ def all_productions
21
+ productions.values.flatten.sort_by(&:id)
22
+ end
23
+
24
+ # Finds a token based on its corresponding symbol. First
25
+ # checks the productions, to see if it's a nonterminal; then,
26
+ # tries to find it in the terminals; otherwise, if the symbol
27
+ # is `error`, it returns a {Token::Error}; if the symbol is
28
+ # `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
29
+ # none of those, it raises an {UndefinedTokenError}.
30
+ #
31
+ # @raise [UndefinedTokenError] if the token doesn't exist.
32
+ # @param value [String, Symbol, #intern] the token's symbol to
33
+ # check.
34
+ # @return [Token]
35
+ def find_token(value)
36
+ value = value.intern
37
+
38
+ if productions.key?(value)
39
+ typed_nonterminals.find { |term| term.name == value } ||
40
+ Token::Nonterminal.new(value)
41
+ elsif terminal = terminals.
42
+ find { |term| term.name == value }
43
+ terminal
44
+ elsif value == :$error || value == :error
45
+ Token::Error.new
46
+ elsif [:nothing, :ε, :"%empty"].include?(value)
47
+ Token::Epsilon.new
48
+ else
49
+ raise UndefinedTokenError, "Could not find a token " \
50
+ "named #{value.inspect}"
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ # Actually generates the productions. Uses the rules from the
57
+ # compiler to construct the productions. Makes two loops over
58
+ # the compiler's rules; the first to tell the grammar that the
59
+ # nonterminal does exist, and the second to actually construct
60
+ # the productions. The first loop is for {#find_token},
61
+ # because otherwise it wouldn't be able to return a
62
+ # nonterminal properly.
63
+ #
64
+ # @return [Hash<(Symbol, Array<Production>)>]
65
+ def generate_productions
66
+ @_productions = {}
67
+ index = 0
68
+
69
+ rules = @compiler.rules.each do |rule|
70
+ productions[rule[:label]] = []
71
+ end
72
+
73
+ while index < rules.size
74
+ rule = rules[index]
75
+ productions[rule[:label]] <<
76
+ generate_production_for(rule, index)
77
+ index += 1
78
+ end
79
+
80
+ productions[:$start] = [default_production]
81
+
82
+ productions
83
+ end
84
+
85
+ # Generates a production for a given compiler rule. Converts
86
+ # the tokens in the set to their {Token} counterparts,
87
+ # and then sets the precedence for the production. If the
88
+ # precedence declaration from the compiler rule is empty,
89
+ # then it'll use the last terminal from the set to check for
90
+ # precedence; otherwise, it'll use the precedence declaration.
91
+ # This is to make sure that every production has a precedence
92
+ # declaration.
93
+ #
94
+ # @param rule [Hash] the compiler's rule.
95
+ # @param id [Numeric] the id for the production.
96
+ # @return [Production]
97
+ def generate_production_for(rule, id)
98
+ left = Token::Nonterminal.new(rule[:label])
99
+ items = rule[:set].map { |_| find_token(_[0]) }
100
+ prec = if rule[:prec].empty?
101
+ items.select(&:terminal?).first
102
+ else
103
+ rule[:prec].intern
104
+ end
105
+
106
+ prec = precedence_for(prec)
107
+ left.type = type_for(rule[:label])
108
+ left.id = rule[:label_id]
109
+
110
+ rule[:set].each_with_index do |tok, i|
111
+ items[i] = items[i].dup
112
+ items[i].id = tok[1]
113
+ end
114
+ items.delete_if(&:epsilon?)
115
+
116
+ Production.new(left, items, rule[:block], prec, id + 1)
117
+ end
118
+
119
+ # Returns the defined type for the given token name.
120
+ # Uses the `%type` directive to infer the corresponding types.
121
+ #
122
+ # @param token [Symbol] the token to check for
123
+ # types.
124
+ def type_for(token)
125
+ token = find_token(token) unless token.is_a?(Token)
126
+
127
+ case token
128
+ when Token::Nonterminal
129
+ token.type
130
+ when Token::Terminal
131
+ token.type
132
+ when Token::Epsilon
133
+ ""
134
+ when Token::Error
135
+ ""
136
+ end
137
+ end
138
+
139
+ # Creates the default production for the grammar. The left
140
+ # hand side of the production is the `:$start` symbol, with
141
+ # the right hand side being the first rule's left-hand side
142
+ # and the terminal `$`. This production is automagically
143
+ # given the last precedence, and an id of 0.
144
+ #
145
+ # @return [Production]
146
+ def default_production
147
+ Production.new(Token::Nonterminal.new(:$start), [
148
+ Token::Nonterminal.new(@compiler.rules.first[:label]),
149
+ Token::Terminal.new(:$end)
150
+ ], "", precedence.last, 0)
151
+ end
152
+ end
153
+ end
154
+ end
@@ -1,64 +1,64 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
-
6
- # Manages a list of the symbols in the grammar.
7
- module Symbols
8
-
9
- # A list of all terminals in the grammar. Checks the compiler
10
- # options for terminals, and then returns an array of
11
- # terminals. Caches the result.
12
- #
13
- # @return [Array<Token::Terminal>]
14
- def terminals
15
- @_terminals ||= begin
16
- @compiler.options.fetch(:terminals) { [] }.map do |v|
17
- Token::Terminal.new(*v)
18
- end
19
- end
20
- end
21
-
22
- # A list of all nonterminals in the grammar.
23
- #
24
- # @return [Array<Symbol>]
25
- # @see #productions
26
- def nonterminals
27
- @_nonterminals ||= productions.keys
28
- end
29
-
30
- # A list of all nonterminals, with types.
31
- #
32
- # @return [Array<Token::Nonterminal>>]
33
- def typed_nonterminals
34
- @_typed_nonterminals ||= begin
35
- typed = []
36
- compiler.options[:nonterminals].each do |data|
37
- data[1].each do |nonterm|
38
- typed << Token::Nonterminal.new(nonterm, data[0])
39
- end
40
- end
41
- typed
42
- end
43
- end
44
-
45
- # A list of all symbols in the grammar; includes both
46
- # terminals and nonterminals.
47
- #
48
- # @return [Array<Token::Terminal, Symbol>]
49
- # @see #terminals
50
- # @see #nonterminals
51
- def symbols
52
- @_symbols ||= terminals + nonterminals
53
- end
54
-
55
- # Checks to see if the grammar uses the `error` terminal
56
- # anywhere.
57
- #
58
- # @return [Boolean]
59
- def contains_error_token?
60
- all_productions.any? { |_| _.items.any?(&:error?) }
61
- end
62
- end
63
- end
64
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+
6
+ # Manages a list of the symbols in the grammar.
7
+ module Symbols
8
+
9
+ # A list of all terminals in the grammar. Checks the compiler
10
+ # options for terminals, and then returns an array of
11
+ # terminals. Caches the result.
12
+ #
13
+ # @return [Array<Token::Terminal>]
14
+ def terminals
15
+ @_terminals ||= begin
16
+ @compiler.options.fetch(:terminals) { [] }.map do |v|
17
+ Token::Terminal.new(*v)
18
+ end
19
+ end
20
+ end
21
+
22
+ # A list of all nonterminals in the grammar.
23
+ #
24
+ # @return [Array<Symbol>]
25
+ # @see #productions
26
+ def nonterminals
27
+ @_nonterminals ||= productions.keys
28
+ end
29
+
30
+ # A list of all nonterminals, with types.
31
+ #
32
+ # @return [Array<Token::Nonterminal>>]
33
+ def typed_nonterminals
34
+ @_typed_nonterminals ||= begin
35
+ typed = []
36
+ compiler.options[:nonterminals].each do |data|
37
+ data[1].each do |nonterm|
38
+ typed << Token::Nonterminal.new(nonterm, data[0])
39
+ end
40
+ end
41
+ typed
42
+ end
43
+ end
44
+
45
+ # A list of all symbols in the grammar; includes both
46
+ # terminals and nonterminals.
47
+ #
48
+ # @return [Array<Token::Terminal, Symbol>]
49
+ # @see #terminals
50
+ # @see #nonterminals
51
+ def symbols
52
+ @_symbols ||= terminals + nonterminals
53
+ end
54
+
55
+ # Checks to see if the grammar uses the `error` terminal
56
+ # anywhere.
57
+ #
58
+ # @return [Boolean]
59
+ def contains_error_token?
60
+ all_productions.any? { |_| _.items.any?(&:error?) }
61
+ end
62
+ end
63
+ end
64
+ end
@@ -1,23 +1,23 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines an epsilon token. An epsilon token represents
7
- # nothing. This is used to say that a nonterminal can
8
- # reduce to nothing.
9
- class Epsilon < Token
10
- # Initialize. Technically takes no arguments. Sets
11
- # the name of the token to be `:$empty`.
12
- def initialize(*)
13
- super :"$empty"
14
- end
15
-
16
- # (see Token#epsilon?)
17
- def epsilon?
18
- true
19
- end
20
- end
21
- end
22
- end
23
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines an epsilon token. An epsilon token represents
7
+ # nothing. This is used to say that a nonterminal can
8
+ # reduce to nothing.
9
+ class Epsilon < Token
10
+ # Initialize. Technically takes no arguments. Sets
11
+ # the name of the token to be `:$empty`.
12
+ def initialize(*)
13
+ super :"$empty"
14
+ end
15
+
16
+ # (see Token#epsilon?)
17
+ def epsilon?
18
+ true
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -1,24 +1,24 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines an error token. This may be used internally by the
7
- # parser when it enters panic mode; any tokens following this
8
- # are the synchronisation tokens. This is considered a terminal
9
- # for the purposes of rule definitions.
10
- class Error < Terminal
11
- # Initialize the error token. Technically takes no arguments.
12
- # Sets the name to be `:$error`.
13
- def initialize(*)
14
- super :$error
15
- end
16
-
17
- # (see Token#error?)
18
- def error?
19
- true
20
- end
21
- end
22
- end
23
- end
24
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines an error token. This may be used internally by the
7
+ # parser when it enters panic mode; any tokens following this
8
+ # are the synchronisation tokens. This is considered a terminal
9
+ # for the purposes of rule definitions.
10
+ class Error < Terminal
11
+ # Initialize the error token. Technically takes no arguments.
12
+ # Sets the name to be `:$error`.
13
+ def initialize(*)
14
+ super :$error
15
+ end
16
+
17
+ # (see Token#error?)
18
+ def error?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,15 +1,15 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines a nonterminal token.
7
- class Nonterminal < Token
8
- # (see Token#nonterminal?)
9
- def nonterminal?
10
- true
11
- end
12
- end
13
- end
14
- end
15
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines a nonterminal token.
7
+ class Nonterminal < Token
8
+ # (see Token#nonterminal?)
9
+ def nonterminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -1,15 +1,15 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines a terminal token.
7
- class Terminal < Token
8
- # (see Token#terminal?)
9
- def terminal?
10
- true
11
- end
12
- end
13
- end
14
- end
15
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines a terminal token.
7
+ class Terminal < Token
8
+ # (see Token#terminal?)
9
+ def terminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end