antelope 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +25 -25
  3. data/.rspec +3 -3
  4. data/.travis.yml +10 -10
  5. data/.yardopts +7 -7
  6. data/CONTRIBUTING.md +50 -38
  7. data/GENERATORS.md +180 -124
  8. data/Gemfile +7 -7
  9. data/LICENSE.txt +22 -22
  10. data/README.md +240 -104
  11. data/Rakefile +2 -2
  12. data/TODO.md +58 -58
  13. data/antelope.gemspec +29 -28
  14. data/bin/antelope +7 -7
  15. data/examples/deterministic.ace +35 -35
  16. data/examples/example.ace +52 -51
  17. data/examples/example.ace.err +192 -192
  18. data/examples/example.ace.inf +432 -432
  19. data/examples/example.ate +70 -70
  20. data/examples/example.ate.err +192 -192
  21. data/examples/example.ate.inf +432 -432
  22. data/examples/liquidscript.ace +233 -233
  23. data/examples/simple.ace +22 -22
  24. data/lib/antelope/ace/compiler.rb +334 -334
  25. data/lib/antelope/ace/errors.rb +30 -30
  26. data/lib/antelope/ace/scanner/argument.rb +57 -57
  27. data/lib/antelope/ace/scanner/first.rb +89 -89
  28. data/lib/antelope/ace/scanner/second.rb +178 -178
  29. data/lib/antelope/ace/scanner/third.rb +27 -27
  30. data/lib/antelope/ace/scanner.rb +144 -144
  31. data/lib/antelope/ace.rb +47 -47
  32. data/lib/antelope/cli.rb +60 -60
  33. data/lib/antelope/errors.rb +25 -25
  34. data/lib/antelope/generation/constructor/first.rb +86 -86
  35. data/lib/antelope/generation/constructor/follow.rb +105 -105
  36. data/lib/antelope/generation/constructor/nullable.rb +64 -64
  37. data/lib/antelope/generation/constructor.rb +127 -127
  38. data/lib/antelope/generation/errors.rb +17 -17
  39. data/lib/antelope/generation/null.rb +13 -13
  40. data/lib/antelope/generation/recognizer/rule.rb +216 -216
  41. data/lib/antelope/generation/recognizer/state.rb +129 -129
  42. data/lib/antelope/generation/recognizer.rb +177 -177
  43. data/lib/antelope/generation/tableizer.rb +176 -176
  44. data/lib/antelope/generation.rb +15 -15
  45. data/lib/antelope/generator/base/coerce.rb +115 -0
  46. data/lib/antelope/generator/base/extra.rb +50 -0
  47. data/lib/antelope/generator/base.rb +134 -264
  48. data/lib/antelope/generator/c.rb +11 -11
  49. data/lib/antelope/generator/c_header.rb +105 -105
  50. data/lib/antelope/generator/c_source.rb +39 -39
  51. data/lib/antelope/generator/error.rb +34 -34
  52. data/lib/antelope/generator/group.rb +60 -57
  53. data/lib/antelope/generator/html.rb +51 -51
  54. data/lib/antelope/generator/info.rb +47 -47
  55. data/lib/antelope/generator/null.rb +18 -18
  56. data/lib/antelope/generator/output.rb +17 -17
  57. data/lib/antelope/generator/ruby.rb +112 -79
  58. data/lib/antelope/generator/templates/c_header.ant +36 -36
  59. data/lib/antelope/generator/templates/c_source.ant +202 -202
  60. data/lib/antelope/generator/templates/error.erb +40 -0
  61. data/lib/antelope/generator/templates/html/antelope.css +53 -1
  62. data/lib/antelope/generator/templates/html/antelope.html +82 -1
  63. data/lib/antelope/generator/templates/html/antelope.js +9 -1
  64. data/lib/antelope/generator/templates/html/css.ant +53 -53
  65. data/lib/antelope/generator/templates/html/html.ant +82 -82
  66. data/lib/antelope/generator/templates/html/js.ant +9 -9
  67. data/lib/antelope/generator/templates/info.erb +61 -0
  68. data/lib/antelope/generator/templates/{ruby.ant → ruby.erb} +171 -178
  69. data/lib/antelope/generator.rb +62 -66
  70. data/lib/antelope/grammar/generation.rb +76 -76
  71. data/lib/antelope/grammar/loading.rb +84 -84
  72. data/lib/antelope/grammar/precedence.rb +59 -59
  73. data/lib/antelope/grammar/precedences.rb +64 -64
  74. data/lib/antelope/grammar/production.rb +56 -56
  75. data/lib/antelope/grammar/productions.rb +154 -154
  76. data/lib/antelope/grammar/symbols.rb +64 -64
  77. data/lib/antelope/grammar/token/epsilon.rb +23 -23
  78. data/lib/antelope/grammar/token/error.rb +24 -24
  79. data/lib/antelope/grammar/token/nonterminal.rb +15 -15
  80. data/lib/antelope/grammar/token/terminal.rb +15 -15
  81. data/lib/antelope/grammar/token.rb +231 -231
  82. data/lib/antelope/grammar.rb +68 -68
  83. data/lib/antelope/version.rb +6 -6
  84. data/lib/antelope.rb +18 -19
  85. data/optimizations.txt +42 -42
  86. data/spec/antelope/ace/compiler_spec.rb +60 -60
  87. data/spec/antelope/ace/scanner_spec.rb +27 -27
  88. data/spec/antelope/generation/constructor_spec.rb +131 -131
  89. data/spec/fixtures/simple.ace +22 -22
  90. data/spec/spec_helper.rb +39 -39
  91. data/spec/support/benchmark_helper.rb +5 -5
  92. data/spec/support/grammar_helper.rb +14 -14
  93. data/subl/Ace (Ruby).JSON-tmLanguage +94 -94
  94. data/subl/Ace (Ruby).tmLanguage +153 -153
  95. metadata +22 -11
  96. data/lib/antelope/generator/templates/error.ant +0 -34
  97. data/lib/antelope/generator/templates/info.ant +0 -53
  98. data/lib/antelope/template/compiler.rb +0 -78
  99. data/lib/antelope/template/errors.rb +0 -9
  100. data/lib/antelope/template/scanner.rb +0 -109
  101. data/lib/antelope/template.rb +0 -64
  102. data/spec/antelope/template_spec.rb +0 -50
data/README.md CHANGED
@@ -1,104 +1,240 @@
1
- # Antelope
2
-
3
- _Antelope_ is a parser generator that can generate parsers for any language*. In the sense of actually creating a parser, it works kind of like [_Bison_][bison] - you give it an input file, say, `language.ace`, and it generates a parser for it, in, say, `language.rb`. Only, instead of _Bison_'s support only for C, C++, and Java, _Antelope_ is meant to generate parsers for multiple languages. _Antelope_ is also written in Ruby for understandability.
4
-
5
- Enough about that, though, let's get into _Antelope_.
6
-
7
- ## Installation
8
-
9
- Since you'll only typically use _Antelope_ from the command line, I suggest you install it like so:
10
-
11
- $ gem install antelope
12
-
13
- If, however, you plan on using it in an application, or need it as a part of a library, you can add `gem "antelope"` to your Gemfile.
14
-
15
- ## Usage
16
-
17
- _Antelope_ is fairly simple to use; you define an `.ace` file, compile it, and use the proper API for the generated language.
18
-
19
- ### How Antelope Works
20
-
21
- Before getting into Ace files, however, you have to understand how _Antelope_ works. _Antelope_ generates a LALR(1) parser, like _Bison_; for your benefit, however, there are some terms here to understand:
22
-
23
- - LL: A type of parser. If followed by parenthesis, the number (or letter) in the parenthesis denotes the number of tokens of lookahead; i.e., LL(0) has 0 tokens of lookahead. Standing for **L** eft to Right, **L** eftmost Derivation, these tend to be handwritten as Recursive Decent parsers. LL parsers can be represented normally by a set of _productions_. LL parsers are _Top-Down parsers_, meaning they start with only the _Starting symbol_ and try to match the input. A look at an LL parser is given [here][ll-parser]; check out [this post][tumblr-ll-parser] for more information about LL parsers. _Antelope_ does not generate LL parsers.
24
- - Production: In parsing, a production associates a _nonterminal_ with a _string_ of _nonterminals_ and _terminals_. It is said that the string of nonterminals and terminals _reduces to_ the nonterminal. Productions take the form of `A -> y`, with _A_ being the left hand side (and the _nonterminal_), and _y_ being the string.
25
- - Starting symbol: In parsing, it is the _nonterminal_ that is used to represent any kind of valid input.
26
- - Symbol: A _nonterminal_ or a _terminal_.
27
- - Nonterminal: In parsing, a nonterminal is an abstraction used to represent any number of _strings_ of _nonterminals_ and _terminals_.
28
- - String: In parsing, an ordered set of _symbols_.
29
- - Terminal: In parsing, it is a concrete value given by the lexer.
30
- - LR: A family of types of parsers. If followed by parenthesis, the number (or letter) in the parenthesis denotes the number of tokens of lookahead; i.e., LR(0) has 0 tokens of lookahead. Standing for **L** eft to Right, **R** ightmost Derivation, they tend to be more complicated than their LL brethren. LR parsers work by starting with the entire input and finding _Handles_ from that input, eventually ending up at the _Starting symbol_. LR parsers typically do this by splitting the input into two parts: the _stack_, and the _input_. LR(0), LR(1), SLR(1), and LALR(1) are all examples of LR parsers.
31
- - Handle: In a LR parser, it is the _Leftmost complete cluster of leaf nodes_ (in a representative AST). When a handle is found, a _reduction_ is performed.
32
- - Stack: Initially empty, it can contain any _symbol_, and is primarily used to represent what the parser has seen. Finding handles will purely occur at the top of the stack.
33
- - Reduction/Reduce: In a LR parser, this is an action corresponding to replacing the right side of a _production_ with its left side. This purely occurs at the top of the _stack_, and correlates to finding a _Handle_.
34
- - Input: Initially containing the full input, it can contain only _terminals_; it primarily contains what the parser has yet to see.
35
- - LR(0): In parsing, it is a type of LR parser that uses no lookahead. It essentially uses a deterministic finite automaton to find _possible_ handles. It does no checking to make sure that the _possible_ handles are legitimate.
36
- - SLR(1): A part of the LR family of parsers, it upgrades LR(0) by checking to make sure that the reduction that it will make (as a part of finding a handle) is valid in the context; basically, for every reduction that it can make, it defines a set of terminals that can _FOLLOW_ the corresponding nonterminal.
37
- - FOLLOW(A) set: In parsing, it defines a set of terminals that can _follow_ the nonterminal _A_ anywhere in the grammar.
38
- - LALR(1): A part of the LR family of parsers, it upgrades SLR by using a more precise _FOLLOW_ set, called _LA_.
39
- - LA set: LA(q, A -> y) = { t | S =>* _aAtw_ and _ay_ reaches _q_ }
40
- - Panic mode: In parsing, this is the mode that a parser can go in for recovery, if it encounters a terminal that it was not expecting. In panic mode, the parser pops terminals off of the input until it reaches a valid _synchronization token_. In order to utilize panic mode, at least one production must have the special _error_ terminal in it. If the parser encounters an error, it will attempt to find a production it can use to resynchronize; if it cannot resynchronize, it will error. It then attempts to resynchronize by continuously pop terminals off of the input and discarding them, attempting to find a synchronization token. A synchronization token is a token that follows an _error_ terminal.
41
- - Shift/reduce conflict: This occurs when the parser is unable to decide if it should shift the next token over from the input to the stack, or to reduce the top token on the stack. If a shift/reduce conflict cannot be solved by changing the grammar, then precedence rules may be used (see `examples/example.ace`).
42
- - Reduce/reduce conflict: This occurs when the parser is unable to decide which production to reduce. This cannot be solved by precedence.
43
- - Precedence: In some grammars, the _Antelope_ runs into _Shift/reduce conflicts_ when attempting to construct a parser. To resolve these conflicts, _Antelope_ provides precedence declarations. Precedence is separated into levels, which each have a type; levels can be _left-associative_, _right-associative_, or _non-associative_. The higher the level, the higher the precedence. Think of the Order of Operations here; the operations multiply and divide are left associative, and on a higher level than add and subtract, which are still left-associative:
44
-
45
- MULTIPLY, DIVIDE (left-associative)
46
- ADD, SUBTRACT (left-associative)
47
-
48
- Exponentiation, however, is right-associative, and is higher than MULTIPLY or DIVIDE; basically, `2**2**2` would be parsed as `2**(2**2)`, instead of the left-associative `(2**2)**2`. For an example of a grammar that uses precedence, see `examples/example.ace`.
49
-
50
- ### Defining the Ace file
51
-
52
- The Ace file format is very similar to _Bison_'s _y_ files; this was intentional, to make transitions between the two easy. The Ace file should be formatted like so:
53
-
54
- ```
55
- <directives>
56
- %%
57
- <rules>
58
- %%
59
- <code>
60
- ```
61
-
62
- Both `%%` (internally called _content boundaries_) are required; the minimum file that is _technically_ accepted by _Antelope_ is therefore two content boundaries separated by a newline.
63
-
64
- In the `<directives>` section, there can be any number and combinations of _code blocks_ and _directives_. _Code blocks_ are blocks of code delimited by `%{` and `%}`, with the ending delimiter on its own line. These are copied into the output of the file directly. _Directives_ tell _Antelope_ information about the grammar. An example directive would be the `token` or `terminal` directive; this lets _Antelope_ know that a terminal by the given name exists. Directives take the form `%<name> [<value>]*`, with `<name>` being the directive name, and `<value>` being a string delimited by braces, angle brackets, quotes, or nothing at all. An example of a directive would be `%token ADD "+"`. The available directives are determined by the code generators available to _Antelope_ at the time that the Ace file is being compiled. Some directives, however, are always available:
65
-
66
- - `require` (1 argument): This makes _Antelope_ check its version against the first argument to this. If the versions do _not_ match, _Antelope_ will raise an error and fail to parse the file. It is recommended to at least require the minor version of _Antelope_ (i.e. `%require "~> 0.1").
67
- - `token`, `terminal` (1-2 arguments): Defines a terminal. The first argument defines its name; the second argument defines its value. Its value isn't used anywhere but the `.output` file, to make it easier to read.
68
- - `left`, `right`, `nonassoc` (1+ arguments): Defines a precedence level, and sets the type of the level based on the directive name used.
69
- - `type`: The code generator to use. Currently, the possible values for this can be `null`, `ruby`, and `output`.
70
- - `define` (1+ arguments): Sets a key to a value. This would do the exact same thing that using the key as a directive would do, i.e. `%define something "value"` does the same thing as `%something "value"`. _(note: This is not entirely true. If the key were one of the above, it would most likely raise an error, complaining that there is no directive named that.)_
71
- - `panic-mode` (0-1 arguments): Enables/disables panic mode being put in the output code. Not included by default, but should be.
72
-
73
- In the `<rules>` section, there can be any number of rules (which are definitions for productions). Rules have this syntax:
74
-
75
- ```
76
- <head>: <body> ["|" <body>]* [";"]
77
- ```
78
-
79
- With `<head>` being the nonterminal that the production(s) reduce to, and `<body>` being one or more symbols followed by an optional block that is executed when is a reduction is made using that production. A semicolon terminating the rule is optional. Rules are what make up the grammar. `error`, `nothing`, and `ε` are all special symbols; the first one defines the special `error` terminal (used for panic mode, ignored otherwise), whereas the second two are used to literally mean nothing (i.e., the rule reduces to nothing). It is not always a good idea to use the `nothing` symbol, since most rules can be written without it.
80
-
81
- In the `<code>` section, custom code used to wrap the generated parser can be placed. In order to embed the generated parser, you must place `%{write}` where you want the generated parser.
82
-
83
- ### Compiling the Ace file
84
-
85
- Compiling the Ace file is somewhat straightforward; `antelope compile /path/to/file.ace` will cover most use cases. If you want to override the type in the Ace file, you can use the `--type=` command option. If it is giving an error, and you're not sure what's causing it, you can use the `--verbose` command option to see a backtrace. If there are any conflicts in the
86
-
87
- By default, _Antelope_ always includes the `Output` generator as a part of the output. This means that an `.output` file will always be generated along with any other files. The `.output` file contains information about the parser, like the productions that were used, precedence levels, states, and lookahead sets.
88
-
89
- ### Language API
90
-
91
- todo.
92
-
93
- ## Contributing
94
-
95
- 1. Fork it (<https://github.com/medcat/antelope/fork>)
96
- 2. Create your feature branch (`git checkout -b my-new-feature`)
97
- 3. Commit your changes (`git commit -am 'Add some feature'`)
98
- 4. Push to the branch (`git push origin my-new-feature`)
99
- 5. Create a new Pull Request
100
-
101
- * Only if there's a generator for it.
102
- [bison]: http://www.gnu.org/software/bison/
103
- [ll-parser]: http://i.imgur.com/XhJKrDW.png
104
- [tumblr-ll-parser]: http://redjazz96.tumblr.com/post/88336053195/what-antelope-does-and-what-i-hope-it-will-do-part
1
+ # Antelope
2
+
3
+ _Antelope_ is a parser generator that can generate parsers for any
4
+ language*. In the sense of actually creating a parser, it works
5
+ kind of like [_Bison_][bison] - you give it an input file, say,
6
+ `language.ace`, and it generates a parser for it, in, say,
7
+ `language.rb`. Only, instead of _Bison_'s support only for C, C++,
8
+ and Java, _Antelope_ is meant to generate parsers for multiple
9
+ languages. _Antelope_ is also written in Ruby for understandability.
10
+
11
+ Enough about that, though, let's get into _Antelope_.
12
+
13
+ ## Installation
14
+
15
+ Since you'll only typically use _Antelope_ from the command line, I
16
+ suggest you install it like so:
17
+
18
+ $ gem install antelope
19
+
20
+ If, however, you plan on using it in an application, or need it as a
21
+ part of a library, you can add `gem "antelope"` to your Gemfile.
22
+
23
+ ## Usage
24
+
25
+ _Antelope_ is fairly simple to use; you define an `.ace` file, compile
26
+ it, and use the proper API for the generated language.
27
+
28
+ ### How Antelope Works
29
+
30
+ Before getting into Ace files, however, you have to understand how
31
+ _Antelope_ works. _Antelope_ generates a LALR(1) parser, like
32
+ _Bison_; for your benefit, however, there are some terms here to
33
+ understand:
34
+
35
+ - LL: A type of parser. If followed by parenthesis, the number (or
36
+ letter) in the parenthesis denotes the number of tokens of
37
+ lookahead; i.e., LL(0) has 0 tokens of lookahead. Standing for
38
+ **L** eft to Right, **L** eftmost Derivation, these tend to be
39
+ handwritten as Recursive Decent parsers. LL parsers can be
40
+ represented normally by a set of _productions_. LL parsers are
41
+ _Top-Down parsers_, meaning they start with only the
42
+ _Starting symbol_ and try to match the input. A look at an LL
43
+ parser is given [here][ll-parser]; check out
44
+ [this post][tumblr-ll-parser] for more information about LL parsers.
45
+ _Antelope_ does not generate LL parsers.
46
+ - Production: In parsing, a production associates a _nonterminal_ with
47
+ a _string_ of _nonterminals_ and _terminals_. It is said that the
48
+ string of nonterminals and terminals _reduces to_ the nonterminal.
49
+ Productions take the form of `A -> y`, with _A_ being the left hand
50
+ side (and the _nonterminal_), and _y_ being the string.
51
+ - Starting symbol: In parsing, it is the _nonterminal_ that is used to
52
+ represent any kind of valid input.
53
+ - Symbol: A _nonterminal_ or a _terminal_.
54
+ - Nonterminal: In parsing, a nonterminal is an abstraction used to
55
+ represent any number of _strings_ of _nonterminals_ and _terminals_.
56
+ - String: In parsing, an ordered set of _symbols_.
57
+ - Terminal: In parsing, it is a concrete value given by the lexer.
58
+ - LR: A family of types of parsers. If followed by parenthesis, the
59
+ number (or letter) in the parenthesis denotes the number of tokens
60
+ of lookahead; i.e., LR(0) has 0 tokens of lookahead. Standing for
61
+ **L** eft to Right, **R** ightmost Derivation, they tend to be more
62
+ complicated than their LL brethren. LR parsers work by starting
63
+ with the entire input and finding _Handles_ from that input,
64
+ eventually ending up at the _Starting symbol_. LR parsers typically
65
+ do this by splitting the input into two parts: the _stack_, and the
66
+ _input_. LR(0), LR(1), SLR(1), and LALR(1) are all examples of LR
67
+ parsers.
68
+ - Handle: In a LR parser, it is the _Leftmost complete cluster of leaf
69
+ nodes_ (in a representative AST). When a handle is found, a
70
+ _reduction_ is performed.
71
+ - Stack: Initially empty, it can contain any _symbol_, and is
72
+ primarily used to represent what the parser has seen. Finding handles
73
+ will purely occur at the top of the stack.
74
+ - Reduction/Reduce: In a LR parser, this is an action corresponding to
75
+ replacing the right side of a _production_ with its left side. This
76
+ purely occurs at the top of the _stack_, and correlates to finding a
77
+ _Handle_.
78
+ - Input: Initially containing the full input, it can contain only
79
+ _terminals_; it primarily contains what the parser has yet to see.
80
+ - LR(0): In parsing, it is a type of LR parser that uses no lookahead.
81
+ It essentially uses a deterministic finite automaton to find
82
+ _possible_ handles. It does no checking to make sure that the
83
+ _possible_ handles are legitimate.
84
+ - SLR(1): A part of the LR family of parsers, it upgrades LR(0) by
85
+ checking to make sure that the reduction that it will make (as a
86
+ part of finding a handle) is valid in the context; basically, for
87
+ every reduction that it can make, it defines a set of terminals that
88
+ can _FOLLOW_ the corresponding nonterminal.
89
+ - FOLLOW(A) set: In parsing, it defines a set of terminals that can
90
+ _follow_ the nonterminal _A_ anywhere in the grammar.
91
+ - LALR(1): A part of the LR family of parsers, it upgrades SLR by
92
+ using a more precise _FOLLOW_ set, called _LA_.
93
+ - LA set: LA(q, A -> y) = { t | S =>* _aAtw_ and _ay_ reaches _q_ }
94
+ - Panic mode: In parsing, this is the mode that a parser can go in for
95
+ recovery, if it encounters a terminal that it was not expecting. In
96
+ panic mode, the parser pops terminals off of the input until it
97
+ reaches a valid _synchronization token_. In order to utilize panic
98
+ mode, at least one production must have the special _error_ terminal
99
+ in it. If the parser encounters an error, it will attempt to find a
100
+ production it can use to resynchronize; if it cannot resynchronize,
101
+ it will error. It then attempts to resynchronize by continuously
102
+ pop terminals off of the input and discarding them, attempting to
103
+ find a synchronization token. A synchronization token is a token
104
+ that follows an _error_ terminal.
105
+ - Shift/reduce conflict: This occurs when the parser is unable to
106
+ decide if it should shift the next token over from the input to the
107
+ stack, or to reduce the top token on the stack. If a shift/reduce
108
+ conflict cannot be solved by changing the grammar, then precedence
109
+ rules may be used (see `examples/example.ace`).
110
+ - Reduce/reduce conflict: This occurs when the parser is unable to
111
+ decide which production to reduce. This cannot be solved by
112
+ precedence.
113
+ - Precedence: In some grammars, the _Antelope_ runs into _Shift/reduce
114
+ conflicts_ when attempting to construct a parser. To resolve these
115
+ conflicts, _Antelope_ provides precedence declarations. Precedence
116
+ is separated into levels, which each have a type; levels can be
117
+ _left-associative_, _right-associative_, or _non-associative_. The
118
+ higher the level, the higher the precedence. Think of the Order of
119
+ Operations here; the operations multiply and divide are left
120
+ associative, and on a higher level than add and subtract, which are
121
+ still left-associative:
122
+
123
+ MULTIPLY, DIVIDE (left-associative)
124
+ ADD, SUBTRACT (left-associative)
125
+
126
+ Exponentiation, however, is right-associative, and is higher than
127
+ MULTIPLY or DIVIDE; basically, `2**2**2` would be parsed as
128
+ `2**(2**2)`, instead of the left-associative `(2**2)**2`. For an
129
+ example of a grammar that uses precedence, see
130
+ `examples/example.ace`.
131
+
132
+ ### Defining the Ace file
133
+
134
+ The Ace file format is very similar to _Bison_'s _y_ files; this was
135
+ intentional, to make transitions between the two easy. The Ace file
136
+ should be formatted like so:
137
+
138
+ ```
139
+ <directives>
140
+ %%
141
+ <rules>
142
+ %%
143
+ <code>
144
+ ```
145
+
146
+ Both `%%` (internally called _content boundaries_) are required; the
147
+ minimum file that is _technically_ accepted by _Antelope_ is therefore
148
+ two content boundaries separated by a newline.
149
+
150
+ In the `<directives>` section, there can be any number and
151
+ combinations of _code blocks_ and _directives_. _Code blocks_ are
152
+ blocks of code delimited by `%{` and `%}`, with the ending delimiter
153
+ on its own line. These are copied into the output of the file
154
+ directly. _Directives_ tell _Antelope_ information about the grammar.
155
+ An example directive would be the `token` or `terminal` directive;
156
+ this lets _Antelope_ know that a terminal by the given name exists.
157
+ Directives take the form `%<name> [<value>]*`, with `<name>` being the
158
+ directive name, and `<value>` being a string delimited by braces,
159
+ angle brackets, quotes, or nothing at all. An example of a directive
160
+ would be `%token ADD "+"`. The available directives are determined by
161
+ the code generators available to _Antelope_ at the time that the Ace
162
+ file is being compiled. Some directives, however, are always
163
+ available:
164
+
165
+ - `require` (1 argument): This makes _Antelope_ check its version
166
+ against the first argument to this. If the versions do _not_ match,
167
+ _Antelope_ will raise an error and fail to parse the file. It is
168
+ recommended to at least require the minor version of _Antelope_
169
+ (i.e. `%require "~> 0.1"`).
170
+ - `token`, `terminal` (1-2 arguments): Defines a terminal. The first
171
+ argument defines its name; the second argument defines its value.
172
+ Its value isn't used anywhere but the `.output` file, to make it
173
+ easier to read.
174
+ - `left`, `right`, `nonassoc` (1+ arguments): Defines a precedence
175
+ level, and sets the type of the level based on the directive name
176
+ used.
177
+ - `type`: The code generator to use. Currently, the possible values
178
+ for this can be `null`, `ruby`, and `output`.
179
+ - `define` (1+ arguments): Sets a key to a value. This would do the
180
+ exact same thing that using the key as a directive would do, i.e.
181
+ `%define something "value"` does the same thing as
182
+ `%something "value"`. _(note: This is not entirely true. If the key
183
+ were one of the above, it would most likely raise an error,
184
+ complaining that there is no directive named that.)_
185
+ - `panic-mode` (0-1 arguments): Enables/disables panic mode being put
186
+ in the output code. Not included by default, but should be.
187
+
188
+ In the `<rules>` section, there can be any number of rules (which are
189
+ definitions for productions). Rules have this syntax:
190
+
191
+ ```
192
+ <head>: <body> ["|" <body>]* [";"]
193
+ ```
194
+
195
+ With `<head>` being the nonterminal that the production(s) reduce to,
196
+ and `<body>` being one or more symbols followed by an optional block
197
+ that is executed when is a reduction is made using that production. A
198
+ semicolon terminating the rule is optional. Rules are what make up
199
+ the grammar. `error`, `nothing`, and `ε` are all special symbols; the
200
+ first one defines the special `error` terminal (used for panic mode,
201
+ ignored otherwise), whereas the second two are used to literally
202
+ mean nothing (i.e., the rule reduces to nothing). It is not always
203
+ a good idea to use the `nothing` symbol, since most rules can be
204
+ written without it.
205
+
206
+ In the `<code>` section, custom code used to wrap the generated parser
207
+ can be placed. In order to embed the generated parser, you must place
208
+ `%{write}` where you want the generated parser.
209
+
210
+ ### Compiling the Ace file
211
+
212
+ Compiling the Ace file is somewhat straightforward;
213
+ `antelope compile /path/to/file.ace` will cover most use cases. If
214
+ you want to override the type in the Ace file, you can use the
215
+ `--type=` command option. If it is giving an error, and you're not
216
+ sure what's causing it, you can use the `--verbose` command option to
217
+ see a backtrace.
218
+
219
+ By default, _Antelope_ always includes the `Output` generator as a
220
+ part of the output. This means that an `.output` file will always be
221
+ generated along with any other files. The `.output` file contains
222
+ information about the parser, like the productions that were used,
223
+ precedence levels, states, and lookahead sets.
224
+
225
+ ### Language API
226
+
227
+ todo.
228
+
229
+ ## Contributing
230
+
231
+ 1. Fork it (<https://github.com/medcat/antelope/fork>)
232
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
233
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
234
+ 4. Push to the branch (`git push origin my-new-feature`)
235
+ 5. Create a new Pull Request
236
+
237
+ \* Only if there's a generator for it.
238
+ [bison]: http://www.gnu.org/software/bison/
239
+ [ll-parser]: http://i.imgur.com/XhJKrDW.png
240
+ [tumblr-ll-parser]: http://redjazz96.tumblr.com/post/88336053195/what-antelope-does-and-what-i-hope-it-will-do-part
data/Rakefile CHANGED
@@ -1,2 +1,2 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
data/TODO.md CHANGED
@@ -1,58 +1,58 @@
1
- # Todo
2
-
3
- - Documentation
4
- - Language API files
5
-
6
- ## Undocumented:
7
-
8
- - `Antelope::Ace::Precedence#to_s`
9
- - `Antelope::Ace::Scanner::Argument#method_missing`
10
- - `Antelope::Ace::Scanner::Argument#==`
11
- - `Antelope::Ace::Scanner::Argument`
12
- - `Antelope::Ace::Scanner::First#scan_first_directive_arguments`
13
- - `Antelope::Ace::Token#inspect`
14
- - `Antelope::CLI`
15
- - `Antelope::CLI#compile_file`
16
- - `Antelope::CLI#compile`
17
- - `Antelope::Generation::Recognizer::State#===`
18
- - `Antelope::Generator#directives`
19
- - `Antelope::Generator::Base.register_as`
20
- - `Antelope::Generator::Base::Boolean`
21
- - `Antelope::Generator::C`
22
- - `Antelope::Generator::CHeader#guard_name`
23
- - `Antelope::Generator::CHeader`
24
- - `Antelope::Generator::CHeader#generate`
25
- - `Antelope::Generator::CHeader#namespace`
26
- - `Antelope::Generator::CSource`
27
- - `Antelope::Generator::CSource#namespace`
28
- - `Antelope::Generator::CSource#guard_name`
29
- - `Antelope::Generator::CSource#generate`
30
- - `Antelope::Generator::Output#unused_symbols`
31
- - `Antelope::Generator::Ruby#error_class`
32
- - `Antelope::Template#parse`
33
- - `Antelope::Template::NO_SOURCE`
34
- - `Antelope::Template`
35
- - `Antelope::Template#determine_source`
36
- - `Antelope::Template#normalize_input`
37
- - `Antelope::Template#result`
38
- - `Antelope::Template::Compiler#parse_comment_tag`
39
- - `Antelope::Template::Compiler`
40
- - `Antelope::Template::Compiler#parse_tag`
41
- - `Antelope::Template::Compiler#parse_output_tag`
42
- - `Antelope::Template::Compiler#compile`
43
- - `Antelope::Template::Compiler#parse_text`
44
- - `Antelope::Template::Compiler#merge_text_tokens`
45
- - `Antelope::Template::Error`
46
- - `Antelope::Template::NoTokenError`
47
- - `Antelope::Template::SyntaxError`
48
- - `Antelope::Template::Scanner#scan_ending`
49
- - `Antelope::Template::Scanner#scan_until_brace`
50
- - `Antelope::Template::Scanner#scan_text`
51
- - `Antelope::Template::Scanner#scan_everything`
52
- - `Antelope::Template::Scanner#scan_tag_type`
53
- - `Antelope::Template::Scanner#scan_tag_contents`
54
- - `Antelope::Template::Scanner#scan_tag`
55
- - `Antelope::Template::Scanner#scan_escaped`
56
- - `Antelope::Template::Scanner`
57
- - `Antelope::Template::Scanner#scan`
58
-
1
+ # Todo
2
+
3
+ - Documentation
4
+ - Language API files
5
+
6
+ ## Undocumented:
7
+
8
+ - `Antelope::Ace::Precedence#to_s`
9
+ - `Antelope::Ace::Scanner::Argument#method_missing`
10
+ - `Antelope::Ace::Scanner::Argument#==`
11
+ - `Antelope::Ace::Scanner::Argument`
12
+ - `Antelope::Ace::Scanner::First#scan_first_directive_arguments`
13
+ - `Antelope::Ace::Token#inspect`
14
+ - `Antelope::CLI`
15
+ - `Antelope::CLI#compile_file`
16
+ - `Antelope::CLI#compile`
17
+ - `Antelope::Generation::Recognizer::State#===`
18
+ - `Antelope::Generator#directives`
19
+ - `Antelope::Generator::Base.register_as`
20
+ - `Antelope::Generator::Base::Boolean`
21
+ - `Antelope::Generator::C`
22
+ - `Antelope::Generator::CHeader#guard_name`
23
+ - `Antelope::Generator::CHeader`
24
+ - `Antelope::Generator::CHeader#generate`
25
+ - `Antelope::Generator::CHeader#namespace`
26
+ - `Antelope::Generator::CSource`
27
+ - `Antelope::Generator::CSource#namespace`
28
+ - `Antelope::Generator::CSource#guard_name`
29
+ - `Antelope::Generator::CSource#generate`
30
+ - `Antelope::Generator::Output#unused_symbols`
31
+ - `Antelope::Generator::Ruby#error_class`
32
+ - `Antelope::Template#parse`
33
+ - `Antelope::Template::NO_SOURCE`
34
+ - `Antelope::Template`
35
+ - `Antelope::Template#determine_source`
36
+ - `Antelope::Template#normalize_input`
37
+ - `Antelope::Template#result`
38
+ - `Antelope::Template::Compiler#parse_comment_tag`
39
+ - `Antelope::Template::Compiler`
40
+ - `Antelope::Template::Compiler#parse_tag`
41
+ - `Antelope::Template::Compiler#parse_output_tag`
42
+ - `Antelope::Template::Compiler#compile`
43
+ - `Antelope::Template::Compiler#parse_text`
44
+ - `Antelope::Template::Compiler#merge_text_tokens`
45
+ - `Antelope::Template::Error`
46
+ - `Antelope::Template::NoTokenError`
47
+ - `Antelope::Template::SyntaxError`
48
+ - `Antelope::Template::Scanner#scan_ending`
49
+ - `Antelope::Template::Scanner#scan_until_brace`
50
+ - `Antelope::Template::Scanner#scan_text`
51
+ - `Antelope::Template::Scanner#scan_everything`
52
+ - `Antelope::Template::Scanner#scan_tag_type`
53
+ - `Antelope::Template::Scanner#scan_tag_contents`
54
+ - `Antelope::Template::Scanner#scan_tag`
55
+ - `Antelope::Template::Scanner#scan_escaped`
56
+ - `Antelope::Template::Scanner`
57
+ - `Antelope::Template::Scanner#scan`
58
+
data/antelope.gemspec CHANGED
@@ -1,28 +1,29 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'antelope/version'
5
-
6
- Gem::Specification.new do |spec|
7
- spec.name = "antelope"
8
- spec.version = Antelope::VERSION
9
- spec.authors = ["Jeremy Rodi"]
10
- spec.email = ["redjazz96@gmail.com"]
11
- spec.summary = %q{A compiler compiler, written in ruby.}
12
- spec.description = %q{A compiler compiler, written in ruby.}
13
- spec.homepage = "https://github.com/medcat/antelope"
14
- spec.license = "MIT"
15
-
16
- spec.files = `git ls-files -z`.split("\x0")
17
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ["lib"]
20
-
21
- spec.add_dependency "hashie", "~> 3.0"
22
- spec.add_dependency "thor", "~> 0.19"
23
-
24
- spec.add_development_dependency "bundler", "~> 1.6"
25
- spec.add_development_dependency "rake"
26
- spec.add_development_dependency "rspec"
27
- spec.add_development_dependency "rspec-its"
28
- end
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'antelope/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'antelope'
8
+ spec.version = Antelope::VERSION
9
+ spec.authors = ['Jeremy Rodi']
10
+ spec.email = ['redjazz96@gmail.com']
11
+ spec.summary = 'A compiler compiler, written in ruby.'
12
+ spec.description = 'A compiler compiler, written in ruby.'
13
+ spec.homepage = 'https://github.com/medcat/antelope'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(/^spec\//)
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency 'hashie', '~> 3.0'
22
+ spec.add_dependency 'thor', '~> 0.19'
23
+ spec.add_dependency 'mote', '~> 1.1'
24
+
25
+ spec.add_development_dependency 'bundler', '~> 1.6'
26
+ spec.add_development_dependency 'rake'
27
+ spec.add_development_dependency 'rspec'
28
+ spec.add_development_dependency 'rspec-its'
29
+ end
data/bin/antelope CHANGED
@@ -1,7 +1,7 @@
1
- #!/usr/bin/env ruby
2
-
3
- $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
4
- require 'antelope'
5
- require 'antelope/cli'
6
-
7
- Antelope::CLI.start
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
4
+ require 'antelope'
5
+ require 'antelope/cli'
6
+
7
+ Antelope::CLI.start
@@ -1,35 +1,35 @@
1
- %require "~> 0.1"
2
- %generator "ruby"
3
-
4
- %define api.push-pull pull
5
- %define panic-mode true
6
- %token <lex> NUMBER
7
- %token <lex> SEMICOLON ";"
8
- %token <lex> ADD "+"
9
- %token <lex> LPAREN "("
10
- %token <lex> RPAREN ")"
11
-
12
- %type <node> s e t
13
-
14
- %null.data api.prefix "antelope_"
15
- %union {
16
- struct slip_parser_node* node;
17
- struct slip_lex_token* lex;
18
- struct slip_parser_list* list;
19
- }
20
-
21
- %%
22
-
23
- s: e
24
- e: t[a] SEMICOLON[b] { $$ = $1 }
25
- | t[a] ADD[b] e[c] { $$ = $1 + $2 }
26
- | error[a]
27
-
28
- t: NUMBER
29
- | LPAREN e RPAREN { $$ = $2 }
30
-
31
- %%
32
-
33
- class DeterministicParser < Antelope::Parser
34
- %{write}
35
- end
1
+ %require "~> 0.1"
2
+ %generator "ruby"
3
+
4
+ %define api.push-pull pull
5
+ %define panic-mode true
6
+ %token <lex> NUMBER
7
+ %token <lex> SEMICOLON ";"
8
+ %token <lex> ADD "+"
9
+ %token <lex> LPAREN "("
10
+ %token <lex> RPAREN ")"
11
+
12
+ %type <node> s e t
13
+
14
+ %null.data api.prefix "antelope_"
15
+ %union {
16
+ struct slip_parser_node* node;
17
+ struct slip_lex_token* lex;
18
+ struct slip_parser_list* list;
19
+ }
20
+
21
+ %%
22
+
23
+ s: e
24
+ e: t[a] SEMICOLON[b] { $$ = $1 }
25
+ | t[a] ADD[b] e[c] { $$ = $1 + $2 }
26
+ | error[a]
27
+
28
+ t: NUMBER
29
+ | LPAREN e RPAREN { $$ = $2 }
30
+
31
+ %%
32
+
33
+ class DeterministicParser < Antelope::Parser
34
+ %{write}
35
+ end