antelope 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +25 -25
  3. data/.rspec +3 -3
  4. data/.travis.yml +10 -10
  5. data/.yardopts +7 -7
  6. data/CONTRIBUTING.md +50 -38
  7. data/GENERATORS.md +180 -124
  8. data/Gemfile +7 -7
  9. data/LICENSE.txt +22 -22
  10. data/README.md +240 -104
  11. data/Rakefile +2 -2
  12. data/TODO.md +58 -58
  13. data/antelope.gemspec +29 -28
  14. data/bin/antelope +7 -7
  15. data/examples/deterministic.ace +35 -35
  16. data/examples/example.ace +52 -51
  17. data/examples/example.ace.err +192 -192
  18. data/examples/example.ace.inf +432 -432
  19. data/examples/example.ate +70 -70
  20. data/examples/example.ate.err +192 -192
  21. data/examples/example.ate.inf +432 -432
  22. data/examples/liquidscript.ace +233 -233
  23. data/examples/simple.ace +22 -22
  24. data/lib/antelope/ace/compiler.rb +334 -334
  25. data/lib/antelope/ace/errors.rb +30 -30
  26. data/lib/antelope/ace/scanner/argument.rb +57 -57
  27. data/lib/antelope/ace/scanner/first.rb +89 -89
  28. data/lib/antelope/ace/scanner/second.rb +178 -178
  29. data/lib/antelope/ace/scanner/third.rb +27 -27
  30. data/lib/antelope/ace/scanner.rb +144 -144
  31. data/lib/antelope/ace.rb +47 -47
  32. data/lib/antelope/cli.rb +60 -60
  33. data/lib/antelope/errors.rb +25 -25
  34. data/lib/antelope/generation/constructor/first.rb +86 -86
  35. data/lib/antelope/generation/constructor/follow.rb +105 -105
  36. data/lib/antelope/generation/constructor/nullable.rb +64 -64
  37. data/lib/antelope/generation/constructor.rb +127 -127
  38. data/lib/antelope/generation/errors.rb +17 -17
  39. data/lib/antelope/generation/null.rb +13 -13
  40. data/lib/antelope/generation/recognizer/rule.rb +216 -216
  41. data/lib/antelope/generation/recognizer/state.rb +129 -129
  42. data/lib/antelope/generation/recognizer.rb +177 -177
  43. data/lib/antelope/generation/tableizer.rb +176 -176
  44. data/lib/antelope/generation.rb +15 -15
  45. data/lib/antelope/generator/base/coerce.rb +115 -0
  46. data/lib/antelope/generator/base/extra.rb +50 -0
  47. data/lib/antelope/generator/base.rb +134 -264
  48. data/lib/antelope/generator/c.rb +11 -11
  49. data/lib/antelope/generator/c_header.rb +105 -105
  50. data/lib/antelope/generator/c_source.rb +39 -39
  51. data/lib/antelope/generator/error.rb +34 -34
  52. data/lib/antelope/generator/group.rb +60 -57
  53. data/lib/antelope/generator/html.rb +51 -51
  54. data/lib/antelope/generator/info.rb +47 -47
  55. data/lib/antelope/generator/null.rb +18 -18
  56. data/lib/antelope/generator/output.rb +17 -17
  57. data/lib/antelope/generator/ruby.rb +112 -79
  58. data/lib/antelope/generator/templates/c_header.ant +36 -36
  59. data/lib/antelope/generator/templates/c_source.ant +202 -202
  60. data/lib/antelope/generator/templates/error.erb +40 -0
  61. data/lib/antelope/generator/templates/html/antelope.css +53 -1
  62. data/lib/antelope/generator/templates/html/antelope.html +82 -1
  63. data/lib/antelope/generator/templates/html/antelope.js +9 -1
  64. data/lib/antelope/generator/templates/html/css.ant +53 -53
  65. data/lib/antelope/generator/templates/html/html.ant +82 -82
  66. data/lib/antelope/generator/templates/html/js.ant +9 -9
  67. data/lib/antelope/generator/templates/info.erb +61 -0
  68. data/lib/antelope/generator/templates/{ruby.ant → ruby.erb} +171 -178
  69. data/lib/antelope/generator.rb +62 -66
  70. data/lib/antelope/grammar/generation.rb +76 -76
  71. data/lib/antelope/grammar/loading.rb +84 -84
  72. data/lib/antelope/grammar/precedence.rb +59 -59
  73. data/lib/antelope/grammar/precedences.rb +64 -64
  74. data/lib/antelope/grammar/production.rb +56 -56
  75. data/lib/antelope/grammar/productions.rb +154 -154
  76. data/lib/antelope/grammar/symbols.rb +64 -64
  77. data/lib/antelope/grammar/token/epsilon.rb +23 -23
  78. data/lib/antelope/grammar/token/error.rb +24 -24
  79. data/lib/antelope/grammar/token/nonterminal.rb +15 -15
  80. data/lib/antelope/grammar/token/terminal.rb +15 -15
  81. data/lib/antelope/grammar/token.rb +231 -231
  82. data/lib/antelope/grammar.rb +68 -68
  83. data/lib/antelope/version.rb +6 -6
  84. data/lib/antelope.rb +18 -19
  85. data/optimizations.txt +42 -42
  86. data/spec/antelope/ace/compiler_spec.rb +60 -60
  87. data/spec/antelope/ace/scanner_spec.rb +27 -27
  88. data/spec/antelope/generation/constructor_spec.rb +131 -131
  89. data/spec/fixtures/simple.ace +22 -22
  90. data/spec/spec_helper.rb +39 -39
  91. data/spec/support/benchmark_helper.rb +5 -5
  92. data/spec/support/grammar_helper.rb +14 -14
  93. data/subl/Ace (Ruby).JSON-tmLanguage +94 -94
  94. data/subl/Ace (Ruby).tmLanguage +153 -153
  95. metadata +22 -11
  96. data/lib/antelope/generator/templates/error.ant +0 -34
  97. data/lib/antelope/generator/templates/info.ant +0 -53
  98. data/lib/antelope/template/compiler.rb +0 -78
  99. data/lib/antelope/template/errors.rb +0 -9
  100. data/lib/antelope/template/scanner.rb +0 -109
  101. data/lib/antelope/template.rb +0 -64
  102. data/spec/antelope/template_spec.rb +0 -50
@@ -1,154 +1,154 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
-
6
- # Manages the productions of the grammar.
7
- module Productions
8
-
9
- # Returns a hash of all of the productions. The result is
10
- # cached.
11
- #
12
- # @return [Hash<(Symbol, Array<Production>)>]
13
- def productions
14
- @_productions || generate_productions
15
- end
16
-
17
- # Returns all productions for all nonterminals, sorted by id.
18
- #
19
- # @return [Array<Production>]
20
- def all_productions
21
- productions.values.flatten.sort_by(&:id)
22
- end
23
-
24
- # Finds a token based on its corresponding symbol. First
25
- # checks the productions, to see if it's a nonterminal; then,
26
- # tries to find it in the terminals; otherwise, if the symbol
27
- # is `error`, it returns a {Token::Error}; if the symbol is
28
- # `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
29
- # none of those, it raises an {UndefinedTokenError}.
30
- #
31
- # @raise [UndefinedTokenError] if the token doesn't exist.
32
- # @param value [String, Symbol, #intern] the token's symbol to
33
- # check.
34
- # @return [Token]
35
- def find_token(value)
36
- value = value.intern
37
-
38
- if productions.key?(value)
39
- typed_nonterminals.find { |term| term.name == value } ||
40
- Token::Nonterminal.new(value)
41
- elsif terminal = terminals.
42
- find { |term| term.name == value }
43
- terminal
44
- elsif value == :$error || value == :error
45
- Token::Error.new
46
- elsif [:nothing, :ε, :"%empty"].include?(value)
47
- Token::Epsilon.new
48
- else
49
- raise UndefinedTokenError, "Could not find a token " \
50
- "named #{value.inspect}"
51
- end
52
- end
53
-
54
- private
55
-
56
- # Actually generates the productions. Uses the rules from the
57
- # compiler to construct the productions. Makes two loops over
58
- # the compiler's rules; the first to tell the grammar that the
59
- # nonterminal does exist, and the second to actually construct
60
- # the productions. The first loop is for {#find_token},
61
- # because otherwise it wouldn't be able to return a
62
- # nonterminal properly.
63
- #
64
- # @return [Hash<(Symbol, Array<Production>)>]
65
- def generate_productions
66
- @_productions = {}
67
- index = 0
68
-
69
- rules = @compiler.rules.each do |rule|
70
- productions[rule[:label]] = []
71
- end
72
-
73
- while index < rules.size
74
- rule = rules[index]
75
- productions[rule[:label]] <<
76
- generate_production_for(rule, index)
77
- index += 1
78
- end
79
-
80
- productions[:$start] = [default_production]
81
-
82
- productions
83
- end
84
-
85
- # Generates a production for a given compiler rule. Converts
86
- # the tokens in the set to their {Token} counterparts,
87
- # and then sets the precedence for the production. If the
88
- # precedence declaration from the compiler rule is empty,
89
- # then it'll use the last terminal from the set to check for
90
- # precedence; otherwise, it'll use the precedence declaration.
91
- # This is to make sure that every production has a precedence
92
- # declaration.
93
- #
94
- # @param rule [Hash] the compiler's rule.
95
- # @param id [Numeric] the id for the production.
96
- # @return [Production]
97
- def generate_production_for(rule, id)
98
- left = Token::Nonterminal.new(rule[:label])
99
- items = rule[:set].map { |_| find_token(_[0]) }
100
- prec = if rule[:prec].empty?
101
- items.select(&:terminal?).first
102
- else
103
- rule[:prec].intern
104
- end
105
-
106
- prec = precedence_for(prec)
107
- left.type = type_for(rule[:label])
108
- left.id = rule[:label_id]
109
-
110
- rule[:set].each_with_index do |tok, i|
111
- items[i] = items[i].dup
112
- items[i].id = tok[1]
113
- end
114
- items.delete_if(&:epsilon?)
115
-
116
- Production.new(left, items, rule[:block], prec, id + 1)
117
- end
118
-
119
- # Returns the defined type for the given token name.
120
- # Uses the `%type` directive to infer the corresponding types.
121
- #
122
- # @param token [Symbol] the token to check for
123
- # types.
124
- def type_for(token)
125
- token = find_token(token) unless token.is_a?(Token)
126
-
127
- case token
128
- when Token::Nonterminal
129
- token.type
130
- when Token::Terminal
131
- token.type
132
- when Token::Epsilon
133
- ""
134
- when Token::Error
135
- ""
136
- end
137
- end
138
-
139
- # Creates the default production for the grammar. The left
140
- # hand side of the production is the `:$start` symbol, with
141
- # the right hand side being the first rule's left-hand side
142
- # and the terminal `$`. This production is automagically
143
- # given the last precedence, and an id of 0.
144
- #
145
- # @return [Production]
146
- def default_production
147
- Production.new(Token::Nonterminal.new(:$start), [
148
- Token::Nonterminal.new(@compiler.rules.first[:label]),
149
- Token::Terminal.new(:$end)
150
- ], "", precedence.last, 0)
151
- end
152
- end
153
- end
154
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+
6
+ # Manages the productions of the grammar.
7
+ module Productions
8
+
9
+ # Returns a hash of all of the productions. The result is
10
+ # cached.
11
+ #
12
+ # @return [Hash<(Symbol, Array<Production>)>]
13
+ def productions
14
+ @_productions || generate_productions
15
+ end
16
+
17
+ # Returns all productions for all nonterminals, sorted by id.
18
+ #
19
+ # @return [Array<Production>]
20
+ def all_productions
21
+ productions.values.flatten.sort_by(&:id)
22
+ end
23
+
24
+ # Finds a token based on its corresponding symbol. First
25
+ # checks the productions, to see if it's a nonterminal; then,
26
+ # tries to find it in the terminals; otherwise, if the symbol
27
+ # is `error`, it returns a {Token::Error}; if the symbol is
28
+ # `nothing` or `ε`, it returns a {Token::Epsilon}; if it's
29
+ # none of those, it raises an {UndefinedTokenError}.
30
+ #
31
+ # @raise [UndefinedTokenError] if the token doesn't exist.
32
+ # @param value [String, Symbol, #intern] the token's symbol to
33
+ # check.
34
+ # @return [Token]
35
+ def find_token(value)
36
+ value = value.intern
37
+
38
+ if productions.key?(value)
39
+ typed_nonterminals.find { |term| term.name == value } ||
40
+ Token::Nonterminal.new(value)
41
+ elsif terminal = terminals.
42
+ find { |term| term.name == value }
43
+ terminal
44
+ elsif value == :$error || value == :error
45
+ Token::Error.new
46
+ elsif [:nothing, :ε, :"%empty"].include?(value)
47
+ Token::Epsilon.new
48
+ else
49
+ raise UndefinedTokenError, "Could not find a token " \
50
+ "named #{value.inspect}"
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ # Actually generates the productions. Uses the rules from the
57
+ # compiler to construct the productions. Makes two loops over
58
+ # the compiler's rules; the first to tell the grammar that the
59
+ # nonterminal does exist, and the second to actually construct
60
+ # the productions. The first loop is for {#find_token},
61
+ # because otherwise it wouldn't be able to return a
62
+ # nonterminal properly.
63
+ #
64
+ # @return [Hash<(Symbol, Array<Production>)>]
65
+ def generate_productions
66
+ @_productions = {}
67
+ index = 0
68
+
69
+ rules = @compiler.rules.each do |rule|
70
+ productions[rule[:label]] = []
71
+ end
72
+
73
+ while index < rules.size
74
+ rule = rules[index]
75
+ productions[rule[:label]] <<
76
+ generate_production_for(rule, index)
77
+ index += 1
78
+ end
79
+
80
+ productions[:$start] = [default_production]
81
+
82
+ productions
83
+ end
84
+
85
+ # Generates a production for a given compiler rule. Converts
86
+ # the tokens in the set to their {Token} counterparts,
87
+ # and then sets the precedence for the production. If the
88
+ # precedence declaration from the compiler rule is empty,
89
+ # then it'll use the last terminal from the set to check for
90
+ # precedence; otherwise, it'll use the precedence declaration.
91
+ # This is to make sure that every production has a precedence
92
+ # declaration.
93
+ #
94
+ # @param rule [Hash] the compiler's rule.
95
+ # @param id [Numeric] the id for the production.
96
+ # @return [Production]
97
+ def generate_production_for(rule, id)
98
+ left = Token::Nonterminal.new(rule[:label])
99
+ items = rule[:set].map { |_| find_token(_[0]) }
100
+ prec = if rule[:prec].empty?
101
+ items.select(&:terminal?).first
102
+ else
103
+ rule[:prec].intern
104
+ end
105
+
106
+ prec = precedence_for(prec)
107
+ left.type = type_for(rule[:label])
108
+ left.id = rule[:label_id]
109
+
110
+ rule[:set].each_with_index do |tok, i|
111
+ items[i] = items[i].dup
112
+ items[i].id = tok[1]
113
+ end
114
+ items.delete_if(&:epsilon?)
115
+
116
+ Production.new(left, items, rule[:block], prec, id + 1)
117
+ end
118
+
119
+ # Returns the defined type for the given token name.
120
+ # Uses the `%type` directive to infer the corresponding types.
121
+ #
122
+ # @param token [Symbol] the token to check for
123
+ # types.
124
+ def type_for(token)
125
+ token = find_token(token) unless token.is_a?(Token)
126
+
127
+ case token
128
+ when Token::Nonterminal
129
+ token.type
130
+ when Token::Terminal
131
+ token.type
132
+ when Token::Epsilon
133
+ ""
134
+ when Token::Error
135
+ ""
136
+ end
137
+ end
138
+
139
+ # Creates the default production for the grammar. The left
140
+ # hand side of the production is the `:$start` symbol, with
141
+ # the right hand side being the first rule's left-hand side
142
+ # and the terminal `$`. This production is automagically
143
+ # given the last precedence, and an id of 0.
144
+ #
145
+ # @return [Production]
146
+ def default_production
147
+ Production.new(Token::Nonterminal.new(:$start), [
148
+ Token::Nonterminal.new(@compiler.rules.first[:label]),
149
+ Token::Terminal.new(:$end)
150
+ ], "", precedence.last, 0)
151
+ end
152
+ end
153
+ end
154
+ end
@@ -1,64 +1,64 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
-
6
- # Manages a list of the symbols in the grammar.
7
- module Symbols
8
-
9
- # A list of all terminals in the grammar. Checks the compiler
10
- # options for terminals, and then returns an array of
11
- # terminals. Caches the result.
12
- #
13
- # @return [Array<Token::Terminal>]
14
- def terminals
15
- @_terminals ||= begin
16
- @compiler.options.fetch(:terminals) { [] }.map do |v|
17
- Token::Terminal.new(*v)
18
- end
19
- end
20
- end
21
-
22
- # A list of all nonterminals in the grammar.
23
- #
24
- # @return [Array<Symbol>]
25
- # @see #productions
26
- def nonterminals
27
- @_nonterminals ||= productions.keys
28
- end
29
-
30
- # A list of all nonterminals, with types.
31
- #
32
- # @return [Array<Token::Nonterminal>>]
33
- def typed_nonterminals
34
- @_typed_nonterminals ||= begin
35
- typed = []
36
- compiler.options[:nonterminals].each do |data|
37
- data[1].each do |nonterm|
38
- typed << Token::Nonterminal.new(nonterm, data[0])
39
- end
40
- end
41
- typed
42
- end
43
- end
44
-
45
- # A list of all symbols in the grammar; includes both
46
- # terminals and nonterminals.
47
- #
48
- # @return [Array<Token::Terminal, Symbol>]
49
- # @see #terminals
50
- # @see #nonterminals
51
- def symbols
52
- @_symbols ||= terminals + nonterminals
53
- end
54
-
55
- # Checks to see if the grammar uses the `error` terminal
56
- # anywhere.
57
- #
58
- # @return [Boolean]
59
- def contains_error_token?
60
- all_productions.any? { |_| _.items.any?(&:error?) }
61
- end
62
- end
63
- end
64
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+
6
+ # Manages a list of the symbols in the grammar.
7
+ module Symbols
8
+
9
+ # A list of all terminals in the grammar. Checks the compiler
10
+ # options for terminals, and then returns an array of
11
+ # terminals. Caches the result.
12
+ #
13
+ # @return [Array<Token::Terminal>]
14
+ def terminals
15
+ @_terminals ||= begin
16
+ @compiler.options.fetch(:terminals) { [] }.map do |v|
17
+ Token::Terminal.new(*v)
18
+ end
19
+ end
20
+ end
21
+
22
+ # A list of all nonterminals in the grammar.
23
+ #
24
+ # @return [Array<Symbol>]
25
+ # @see #productions
26
+ def nonterminals
27
+ @_nonterminals ||= productions.keys
28
+ end
29
+
30
+ # A list of all nonterminals, with types.
31
+ #
32
+ # @return [Array<Token::Nonterminal>>]
33
+ def typed_nonterminals
34
+ @_typed_nonterminals ||= begin
35
+ typed = []
36
+ compiler.options[:nonterminals].each do |data|
37
+ data[1].each do |nonterm|
38
+ typed << Token::Nonterminal.new(nonterm, data[0])
39
+ end
40
+ end
41
+ typed
42
+ end
43
+ end
44
+
45
+ # A list of all symbols in the grammar; includes both
46
+ # terminals and nonterminals.
47
+ #
48
+ # @return [Array<Token::Terminal, Symbol>]
49
+ # @see #terminals
50
+ # @see #nonterminals
51
+ def symbols
52
+ @_symbols ||= terminals + nonterminals
53
+ end
54
+
55
+ # Checks to see if the grammar uses the `error` terminal
56
+ # anywhere.
57
+ #
58
+ # @return [Boolean]
59
+ def contains_error_token?
60
+ all_productions.any? { |_| _.items.any?(&:error?) }
61
+ end
62
+ end
63
+ end
64
+ end
@@ -1,23 +1,23 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines an epsilon token. An epsilon token represents
7
- # nothing. This is used to say that a nonterminal can
8
- # reduce to nothing.
9
- class Epsilon < Token
10
- # Initialize. Technically takes no arguments. Sets
11
- # the name of the token to be `:$empty`.
12
- def initialize(*)
13
- super :"$empty"
14
- end
15
-
16
- # (see Token#epsilon?)
17
- def epsilon?
18
- true
19
- end
20
- end
21
- end
22
- end
23
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines an epsilon token. An epsilon token represents
7
+ # nothing. This is used to say that a nonterminal can
8
+ # reduce to nothing.
9
+ class Epsilon < Token
10
+ # Initialize. Technically takes no arguments. Sets
11
+ # the name of the token to be `:$empty`.
12
+ def initialize(*)
13
+ super :"$empty"
14
+ end
15
+
16
+ # (see Token#epsilon?)
17
+ def epsilon?
18
+ true
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -1,24 +1,24 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines an error token. This may be used internally by the
7
- # parser when it enters panic mode; any tokens following this
8
- # are the synchronisation tokens. This is considered a terminal
9
- # for the purposes of rule definitions.
10
- class Error < Terminal
11
- # Initialize the error token. Technically takes no arguments.
12
- # Sets the name to be `:$error`.
13
- def initialize(*)
14
- super :$error
15
- end
16
-
17
- # (see Token#error?)
18
- def error?
19
- true
20
- end
21
- end
22
- end
23
- end
24
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines an error token. This may be used internally by the
7
+ # parser when it enters panic mode; any tokens following this
8
+ # are the synchronisation tokens. This is considered a terminal
9
+ # for the purposes of rule definitions.
10
+ class Error < Terminal
11
+ # Initialize the error token. Technically takes no arguments.
12
+ # Sets the name to be `:$error`.
13
+ def initialize(*)
14
+ super :$error
15
+ end
16
+
17
+ # (see Token#error?)
18
+ def error?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,15 +1,15 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines a nonterminal token.
7
- class Nonterminal < Token
8
- # (see Token#nonterminal?)
9
- def nonterminal?
10
- true
11
- end
12
- end
13
- end
14
- end
15
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines a nonterminal token.
7
+ class Nonterminal < Token
8
+ # (see Token#nonterminal?)
9
+ def nonterminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -1,15 +1,15 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- class Grammar
5
- class Token
6
- # Defines a terminal token.
7
- class Terminal < Token
8
- # (see Token#terminal?)
9
- def terminal?
10
- true
11
- end
12
- end
13
- end
14
- end
15
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ class Grammar
5
+ class Token
6
+ # Defines a terminal token.
7
+ class Terminal < Token
8
+ # (see Token#terminal?)
9
+ def terminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end