antelope 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +25 -23
- data/.rspec +3 -3
- data/.travis.yml +10 -9
- data/.yardopts +7 -7
- data/CONTRIBUTING.md +38 -38
- data/GENERATORS.md +124 -124
- data/Gemfile +7 -7
- data/LICENSE.txt +22 -22
- data/README.md +104 -104
- data/Rakefile +2 -2
- data/TODO.md +58 -58
- data/antelope.gemspec +28 -28
- data/bin/antelope +7 -7
- data/examples/deterministic.ace +35 -35
- data/examples/example.ace +51 -50
- data/examples/example.err +192 -0
- data/examples/{example.output → example.inf} +384 -385
- data/examples/liquidscript.ace +233 -162
- data/examples/simple.ace +22 -22
- data/lib/antelope/ace/compiler.rb +334 -334
- data/lib/antelope/ace/errors.rb +48 -48
- data/lib/antelope/ace/grammar/generation.rb +80 -80
- data/lib/antelope/ace/grammar/loading.rb +53 -53
- data/lib/antelope/ace/grammar/precedences.rb +68 -65
- data/lib/antelope/ace/grammar/productions.rb +156 -150
- data/lib/antelope/ace/grammar/symbols.rb +66 -66
- data/lib/antelope/ace/grammar.rb +69 -69
- data/lib/antelope/ace/precedence.rb +61 -61
- data/lib/antelope/ace/production.rb +57 -57
- data/lib/antelope/ace/scanner/argument.rb +57 -57
- data/lib/antelope/ace/scanner/first.rb +89 -89
- data/lib/antelope/ace/scanner/second.rb +177 -177
- data/lib/antelope/ace/scanner/third.rb +27 -27
- data/lib/antelope/ace/scanner.rb +134 -134
- data/lib/antelope/ace/token/epsilon.rb +24 -24
- data/lib/antelope/ace/token/error.rb +26 -26
- data/lib/antelope/ace/token/nonterminal.rb +17 -17
- data/lib/antelope/ace/token/terminal.rb +17 -17
- data/lib/antelope/ace/token.rb +238 -238
- data/lib/antelope/ace.rb +53 -53
- data/lib/antelope/cli.rb +55 -55
- data/lib/antelope/errors.rb +8 -8
- data/lib/antelope/generation/constructor/first.rb +88 -88
- data/lib/antelope/generation/constructor/follow.rb +103 -103
- data/lib/antelope/generation/constructor/nullable.rb +64 -64
- data/lib/antelope/generation/constructor.rb +126 -126
- data/lib/antelope/generation/errors.rb +17 -17
- data/lib/antelope/generation/null.rb +13 -13
- data/lib/antelope/generation/recognizer/rule.rb +216 -216
- data/lib/antelope/generation/recognizer/state.rb +130 -130
- data/lib/antelope/generation/recognizer.rb +180 -180
- data/lib/antelope/generation/tableizer.rb +175 -154
- data/lib/antelope/generation.rb +15 -15
- data/lib/antelope/generator/base.rb +264 -264
- data/lib/antelope/generator/c.rb +11 -11
- data/lib/antelope/generator/c_header.rb +105 -105
- data/lib/antelope/generator/c_source.rb +39 -39
- data/lib/antelope/generator/error.rb +34 -0
- data/lib/antelope/generator/group.rb +57 -57
- data/lib/antelope/generator/html.rb +51 -0
- data/lib/antelope/generator/info.rb +47 -0
- data/lib/antelope/generator/null.rb +18 -18
- data/lib/antelope/generator/output.rb +17 -49
- data/lib/antelope/generator/ruby.rb +79 -79
- data/lib/antelope/generator/templates/c_header.ant +36 -36
- data/lib/antelope/generator/templates/c_source.ant +202 -202
- data/lib/antelope/generator/templates/error.ant +33 -0
- data/lib/antelope/generator/templates/html/antelope.css +1 -0
- data/lib/antelope/generator/templates/html/antelope.html +1 -0
- data/lib/antelope/generator/templates/html/antelope.js +1 -0
- data/lib/antelope/generator/templates/html/css.ant +53 -0
- data/lib/antelope/generator/templates/html/html.ant +82 -0
- data/lib/antelope/generator/templates/html/js.ant +9 -0
- data/lib/antelope/generator/templates/info.ant +53 -0
- data/lib/antelope/generator/templates/ruby.ant +178 -146
- data/lib/antelope/generator.rb +66 -63
- data/lib/antelope/template/compiler.rb +78 -78
- data/lib/antelope/template/errors.rb +9 -9
- data/lib/antelope/template/scanner.rb +109 -109
- data/lib/antelope/template.rb +65 -60
- data/lib/antelope/version.rb +6 -6
- data/lib/antelope.rb +13 -13
- data/optimizations.txt +42 -0
- data/spec/antelope/ace/compiler_spec.rb +60 -60
- data/spec/antelope/ace/scanner_spec.rb +27 -27
- data/spec/antelope/constructor_spec.rb +133 -136
- data/spec/antelope/template_spec.rb +50 -49
- data/spec/fixtures/simple.ace +22 -22
- data/spec/spec_helper.rb +39 -39
- data/spec/support/benchmark_helper.rb +5 -5
- data/spec/support/grammar_helper.rb +15 -15
- data/subl/Ace (Ruby).JSON-tmLanguage +94 -94
- data/subl/Ace (Ruby).tmLanguage +153 -153
- metadata +17 -6
- data/lib/antelope/generator/templates/output.ant +0 -68
@@ -1,177 +1,177 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Antelope
|
4
|
-
module Ace
|
5
|
-
class Scanner
|
6
|
-
|
7
|
-
# Scans the second part of the file. The second part of the
|
8
|
-
# file _only_ contains productions (or rules). Rules have a
|
9
|
-
# label and a body; the label may be any lowercase alphabetical
|
10
|
-
# identifier followed by a colon; the body consists of "parts",
|
11
|
-
# an "or", a "prec", and/or a "block". The part may consist
|
12
|
-
# of any alphabetical characters. An or is just a vertical bar
|
13
|
-
# (`|`). A prec is a precedence declaraction, which is `%prec `
|
14
|
-
# followed by any alphabetical characters. A block is a `{`,
|
15
|
-
# followed by code, followed by a terminating `}`. Rules _may_
|
16
|
-
# be terminated by a semicolon, but this is optional.
|
17
|
-
module Second
|
18
|
-
|
19
|
-
# Scans the second part of the file. This should be from just
|
20
|
-
# before the first content boundry; if the scanner doesn't
|
21
|
-
# find a content boundry, it will error. It will then check
|
22
|
-
# for a rule.
|
23
|
-
#
|
24
|
-
# @raise [SyntaxError] if no content boundry was found, or if
|
25
|
-
# the scanner encounters anything but a rule or whitespace.
|
26
|
-
# @return [void]
|
27
|
-
# @see #scan_second_rule
|
28
|
-
# @see #scan_whitespace
|
29
|
-
# @see #error!
|
30
|
-
def scan_second_part
|
31
|
-
scanner.scan(CONTENT_BOUNDRY) or error!
|
32
|
-
tokens << [:second]
|
33
|
-
|
34
|
-
until @scanner.check(CONTENT_BOUNDRY)
|
35
|
-
scan_second_rule || scan_whitespace || error!
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Scans a rule. A rule consists of a label (the nonterminal
|
40
|
-
# the production is for), a body, and a block; and then,
|
41
|
-
# an optional semicolon.
|
42
|
-
#
|
43
|
-
# @return [Boolean] if it matched
|
44
|
-
# @see #scan_second_rule_label
|
45
|
-
# @see #scan_second_rule_body
|
46
|
-
# @see #error!
|
47
|
-
def scan_second_rule
|
48
|
-
if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
|
49
|
-
scan_second_rule_label or error!
|
50
|
-
scan_second_rule_body
|
51
|
-
true
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
# Scans the label for a rule. It should contain only lower
|
56
|
-
# case letters and a colon.
|
57
|
-
#
|
58
|
-
# @return [Boolean] if it matched.
|
59
|
-
def scan_second_rule_label
|
60
|
-
if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
|
61
|
-
tokens << [:label, @scanner[1], @scanner[2]]
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# The body can contain parts, ors, precs, or blocks (or
|
66
|
-
# whitespaces). Scans all of them, and then attempts to
|
67
|
-
# scan a semicolon.
|
68
|
-
#
|
69
|
-
# @return [void]
|
70
|
-
# @see #scan_second_rule_part
|
71
|
-
# @see #scan_second_rule_or
|
72
|
-
# @see #scan_second_rule_prec
|
73
|
-
# @see #scan_second_rule_block
|
74
|
-
# @see #scan_whitespace
|
75
|
-
def scan_second_rule_body
|
76
|
-
body = true
|
77
|
-
while body
|
78
|
-
|
79
|
-
|
80
|
-
scan_whitespace || (body = false)
|
81
|
-
end
|
82
|
-
@scanner.scan(/;/)
|
83
|
-
end
|
84
|
-
|
85
|
-
# Attempts to scan a "part". A part is any series of
|
86
|
-
# alphabetical characters that are not followed by a
|
87
|
-
# colon.
|
88
|
-
#
|
89
|
-
# @return [Boolean] if it matched.
|
90
|
-
def scan_second_rule_part
|
91
|
-
if @scanner.scan(/(
|
92
|
-
tokens << [:part, @scanner[1], @scanner[2]]
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
# Attempts to scan an "or". It's just a vertical bar.
|
97
|
-
#
|
98
|
-
# @return [Boolean] if it matched.
|
99
|
-
def scan_second_rule_or
|
100
|
-
if @scanner.scan(/\|/)
|
101
|
-
tokens << [:or]
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
# Attempts to scan a precedence definition. A precedence
|
106
|
-
# definition is "%prec " followed by a terminal or nonterminal.
|
107
|
-
#
|
108
|
-
# @return [Boolean] if it matched.
|
109
|
-
def scan_second_rule_prec
|
110
|
-
if @scanner.scan(/%prec (#{IDENTIFIER})/)
|
111
|
-
tokens << [:prec, @scanner[1]]
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# Attempts to scan a block. This correctly balances brackets;
|
116
|
-
# however, if a bracket is opened/closed within a string, it
|
117
|
-
# still counts that as a bracket that needs to be balanced.
|
118
|
-
# So, having extensive code within a block is not a good idea.
|
119
|
-
#
|
120
|
-
# @return [Boolean] if it matched.
|
121
|
-
def scan_second_rule_block
|
122
|
-
if @scanner.scan(/\{/)
|
123
|
-
tokens << [:block, _scan_block]
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
private
|
128
|
-
|
129
|
-
# Scans the block; it scans until it encounters enough closing
|
130
|
-
# brackets to match the opening brackets. If it encounters
|
131
|
-
# an opening brackets, it increments the bracket counter by
|
132
|
-
# one; if it encounters a closing bracket, it decrements by
|
133
|
-
# one. It will error if it reaches the end before the
|
134
|
-
# brackets are fully closed.
|
135
|
-
#
|
136
|
-
# @return [String] the block's body.
|
137
|
-
# @raise [SyntaxError] if it reaches the end before the final
|
138
|
-
# bracket is closed.
|
139
|
-
def _scan_block
|
140
|
-
brack = 1
|
141
|
-
body = "{"
|
142
|
-
scan_for = %r{
|
143
|
-
(
|
144
|
-
(?: " ( \\\\ | \\" | [^"] )* "? )
|
145
|
-
| (?: ' ( \\\\ | \\' | [^'] )* '? )
|
146
|
-
| (?: // .*? \n )
|
147
|
-
| (?: \# .*? \n )
|
148
|
-
| (?: /\* [\s\S]+? \*/ )
|
149
|
-
| (?: \} )
|
150
|
-
| (?: \{ )
|
151
|
-
)
|
152
|
-
}x
|
153
|
-
|
154
|
-
until brack.zero?
|
155
|
-
if part = @scanner.scan_until(scan_for)
|
156
|
-
body << part
|
157
|
-
|
158
|
-
|
159
|
-
if @scanner[1] == "}"
|
160
|
-
brack -= 1
|
161
|
-
elsif @scanner[1] == "{"
|
162
|
-
brack += 1
|
163
|
-
end
|
164
|
-
else
|
165
|
-
if @scanner.scan(/(.+)/m)
|
166
|
-
@line += @scanner[1].count("\n")
|
167
|
-
end
|
168
|
-
error!
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
body
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Antelope
|
4
|
+
module Ace
|
5
|
+
class Scanner
|
6
|
+
|
7
|
+
# Scans the second part of the file. The second part of the
|
8
|
+
# file _only_ contains productions (or rules). Rules have a
|
9
|
+
# label and a body; the label may be any lowercase alphabetical
|
10
|
+
# identifier followed by a colon; the body consists of "parts",
|
11
|
+
# an "or", a "prec", and/or a "block". The part may consist
|
12
|
+
# of any alphabetical characters. An or is just a vertical bar
|
13
|
+
# (`|`). A prec is a precedence declaraction, which is `%prec `
|
14
|
+
# followed by any alphabetical characters. A block is a `{`,
|
15
|
+
# followed by code, followed by a terminating `}`. Rules _may_
|
16
|
+
# be terminated by a semicolon, but this is optional.
|
17
|
+
module Second
|
18
|
+
|
19
|
+
# Scans the second part of the file. This should be from just
|
20
|
+
# before the first content boundry; if the scanner doesn't
|
21
|
+
# find a content boundry, it will error. It will then check
|
22
|
+
# for a rule.
|
23
|
+
#
|
24
|
+
# @raise [SyntaxError] if no content boundry was found, or if
|
25
|
+
# the scanner encounters anything but a rule or whitespace.
|
26
|
+
# @return [void]
|
27
|
+
# @see #scan_second_rule
|
28
|
+
# @see #scan_whitespace
|
29
|
+
# @see #error!
|
30
|
+
def scan_second_part
|
31
|
+
scanner.scan(CONTENT_BOUNDRY) or error!
|
32
|
+
tokens << [:second]
|
33
|
+
|
34
|
+
until @scanner.check(CONTENT_BOUNDRY)
|
35
|
+
scan_second_rule || scan_whitespace || error!
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Scans a rule. A rule consists of a label (the nonterminal
|
40
|
+
# the production is for), a body, and a block; and then,
|
41
|
+
# an optional semicolon.
|
42
|
+
#
|
43
|
+
# @return [Boolean] if it matched
|
44
|
+
# @see #scan_second_rule_label
|
45
|
+
# @see #scan_second_rule_body
|
46
|
+
# @see #error!
|
47
|
+
def scan_second_rule
|
48
|
+
if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
|
49
|
+
scan_second_rule_label or error!
|
50
|
+
scan_second_rule_body
|
51
|
+
true
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Scans the label for a rule. It should contain only lower
|
56
|
+
# case letters and a colon.
|
57
|
+
#
|
58
|
+
# @return [Boolean] if it matched.
|
59
|
+
def scan_second_rule_label
|
60
|
+
if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
|
61
|
+
tokens << [:label, @scanner[1], @scanner[2]]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# The body can contain parts, ors, precs, or blocks (or
|
66
|
+
# whitespaces). Scans all of them, and then attempts to
|
67
|
+
# scan a semicolon.
|
68
|
+
#
|
69
|
+
# @return [void]
|
70
|
+
# @see #scan_second_rule_part
|
71
|
+
# @see #scan_second_rule_or
|
72
|
+
# @see #scan_second_rule_prec
|
73
|
+
# @see #scan_second_rule_block
|
74
|
+
# @see #scan_whitespace
|
75
|
+
def scan_second_rule_body
|
76
|
+
body = true
|
77
|
+
while body
|
78
|
+
scan_second_rule_prec || scan_second_rule_part ||
|
79
|
+
scan_second_rule_or || scan_second_rule_block ||
|
80
|
+
scan_whitespace || (body = false)
|
81
|
+
end
|
82
|
+
@scanner.scan(/;/)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Attempts to scan a "part". A part is any series of
|
86
|
+
# alphabetical characters that are not followed by a
|
87
|
+
# colon.
|
88
|
+
#
|
89
|
+
# @return [Boolean] if it matched.
|
90
|
+
def scan_second_rule_part
|
91
|
+
if @scanner.scan(/(%?#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?(?!\:|[A-Za-z._])/)
|
92
|
+
tokens << [:part, @scanner[1], @scanner[2]]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Attempts to scan an "or". It's just a vertical bar.
|
97
|
+
#
|
98
|
+
# @return [Boolean] if it matched.
|
99
|
+
def scan_second_rule_or
|
100
|
+
if @scanner.scan(/\|/)
|
101
|
+
tokens << [:or]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Attempts to scan a precedence definition. A precedence
|
106
|
+
# definition is "%prec " followed by a terminal or nonterminal.
|
107
|
+
#
|
108
|
+
# @return [Boolean] if it matched.
|
109
|
+
def scan_second_rule_prec
|
110
|
+
if @scanner.scan(/%prec (#{IDENTIFIER})/)
|
111
|
+
tokens << [:prec, @scanner[1]]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Attempts to scan a block. This correctly balances brackets;
|
116
|
+
# however, if a bracket is opened/closed within a string, it
|
117
|
+
# still counts that as a bracket that needs to be balanced.
|
118
|
+
# So, having extensive code within a block is not a good idea.
|
119
|
+
#
|
120
|
+
# @return [Boolean] if it matched.
|
121
|
+
def scan_second_rule_block
|
122
|
+
if @scanner.scan(/\{/)
|
123
|
+
tokens << [:block, _scan_block]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
# Scans the block; it scans until it encounters enough closing
|
130
|
+
# brackets to match the opening brackets. If it encounters
|
131
|
+
# an opening brackets, it increments the bracket counter by
|
132
|
+
# one; if it encounters a closing bracket, it decrements by
|
133
|
+
# one. It will error if it reaches the end before the
|
134
|
+
# brackets are fully closed.
|
135
|
+
#
|
136
|
+
# @return [String] the block's body.
|
137
|
+
# @raise [SyntaxError] if it reaches the end before the final
|
138
|
+
# bracket is closed.
|
139
|
+
def _scan_block
|
140
|
+
brack = 1
|
141
|
+
body = "{"
|
142
|
+
scan_for = %r{
|
143
|
+
(
|
144
|
+
(?: " ( \\\\ | \\" | [^"] )* "? )
|
145
|
+
| (?: ' ( \\\\ | \\' | [^'] )* '? )
|
146
|
+
| (?: // .*? \n )
|
147
|
+
| (?: \# .*? \n )
|
148
|
+
| (?: /\* [\s\S]+? \*/ )
|
149
|
+
| (?: \} )
|
150
|
+
| (?: \{ )
|
151
|
+
)
|
152
|
+
}x
|
153
|
+
|
154
|
+
until brack.zero?
|
155
|
+
if part = @scanner.scan_until(scan_for)
|
156
|
+
body << part
|
157
|
+
|
158
|
+
|
159
|
+
if @scanner[1] == "}"
|
160
|
+
brack -= 1
|
161
|
+
elsif @scanner[1] == "{"
|
162
|
+
brack += 1
|
163
|
+
end
|
164
|
+
else
|
165
|
+
if @scanner.scan(/(.+)/m)
|
166
|
+
@line += @scanner[1].count("\n")
|
167
|
+
end
|
168
|
+
error!
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
body
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
@@ -1,27 +1,27 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Antelope
|
4
|
-
module Ace
|
5
|
-
class Scanner
|
6
|
-
|
7
|
-
# Scans the third part. Everything after the content
|
8
|
-
# boundry is copied directly into the output.
|
9
|
-
module Third
|
10
|
-
|
11
|
-
# Scans the third part. It should start with a content
|
12
|
-
# boundry; raises an error if it does not. It then scans
|
13
|
-
# until the end of the file.
|
14
|
-
#
|
15
|
-
# @raise [SyntaxError] if somehow there is no content
|
16
|
-
# boundry.
|
17
|
-
# @return [void]
|
18
|
-
def scan_third_part
|
19
|
-
@scanner.scan(CONTENT_BOUNDRY) or error!
|
20
|
-
|
21
|
-
tokens << [:third]
|
22
|
-
tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Antelope
|
4
|
+
module Ace
|
5
|
+
class Scanner
|
6
|
+
|
7
|
+
# Scans the third part. Everything after the content
|
8
|
+
# boundry is copied directly into the output.
|
9
|
+
module Third
|
10
|
+
|
11
|
+
# Scans the third part. It should start with a content
|
12
|
+
# boundry; raises an error if it does not. It then scans
|
13
|
+
# until the end of the file.
|
14
|
+
#
|
15
|
+
# @raise [SyntaxError] if somehow there is no content
|
16
|
+
# boundry.
|
17
|
+
# @return [void]
|
18
|
+
def scan_third_part
|
19
|
+
@scanner.scan(CONTENT_BOUNDRY) or error!
|
20
|
+
|
21
|
+
tokens << [:third]
|
22
|
+
tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/antelope/ace/scanner.rb
CHANGED
@@ -1,134 +1,134 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require "strscan"
|
4
|
-
require "antelope/ace/scanner/argument"
|
5
|
-
require "antelope/ace/scanner/first"
|
6
|
-
require "antelope/ace/scanner/second"
|
7
|
-
require "antelope/ace/scanner/third"
|
8
|
-
|
9
|
-
module Antelope
|
10
|
-
module Ace
|
11
|
-
|
12
|
-
# Scans a given input. The input should be a properly formatted
|
13
|
-
# ACE file; see the Ace module for more information. This scanner
|
14
|
-
# uses the StringScanner class internally; see the ruby
|
15
|
-
# documentation for more on that. This scanner seperates scanning
|
16
|
-
# into three seperate stages: First, Second, and Third, for each
|
17
|
-
# section of the file, respectively.
|
18
|
-
#
|
19
|
-
# @see Ace
|
20
|
-
# @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
|
21
|
-
class Scanner
|
22
|
-
|
23
|
-
IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
|
24
|
-
|
25
|
-
include First
|
26
|
-
include Second
|
27
|
-
include Third
|
28
|
-
|
29
|
-
# The string scanner that we're using to scan the string with.
|
30
|
-
#
|
31
|
-
# @return [StringScanner]
|
32
|
-
attr_reader :scanner
|
33
|
-
|
34
|
-
# An array of the tokens that the scanner scanned.
|
35
|
-
#
|
36
|
-
# @return [Array<Array<(Symbol, Object, ...)>>]
|
37
|
-
attr_reader :tokens
|
38
|
-
|
39
|
-
# The boundry between each section. Placed here to be easily.
|
40
|
-
# modifiable. **MUST** be a regular expression.
|
41
|
-
#
|
42
|
-
# @return [RegExp]
|
43
|
-
CONTENT_BOUNDRY = /%%/
|
44
|
-
|
45
|
-
# The value regular expression. It should match values; for
|
46
|
-
# example, things quoted in strings or word letters without
|
47
|
-
# quotes. Must respond to #to_s, since it is embedded within
|
48
|
-
# other regular expressions. The regular expression should
|
49
|
-
# place the contents of the value in the groups 2 or 3.
|
50
|
-
#
|
51
|
-
# @return [#to_s]
|
52
|
-
VALUE = %q{(?:
|
53
|
-
(?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
|
54
|
-
| ([A-Za-z0-9_.<>*-]+)
|
55
|
-
)}
|
56
|
-
|
57
|
-
# Scans a file. It returns the tokens resulting from scanning.
|
58
|
-
#
|
59
|
-
# @param source [String] the source to scan. This should be
|
60
|
-
# compatible with StringScanner.
|
61
|
-
# @param name [String] the name of the source file. This is
|
62
|
-
# primarilyused in backtrace information.
|
63
|
-
# @return [Array<Array<(Symbol, Object, ...)>>]
|
64
|
-
# @see #tokens
|
65
|
-
def self.scan(source, name = "(ace file)")
|
66
|
-
new(source, name).scan_file
|
67
|
-
end
|
68
|
-
|
69
|
-
# Initialize the scanner with the input.
|
70
|
-
#
|
71
|
-
# @param input [String] The source to scan.
|
72
|
-
# @param source [String] the source file. This is primarily
|
73
|
-
# used in backtrace information.
|
74
|
-
def initialize(input, source = "(ace file)")
|
75
|
-
@source = source
|
76
|
-
@scanner = StringScanner.new(input)
|
77
|
-
@tokens = []
|
78
|
-
end
|
79
|
-
|
80
|
-
# Scans the file in parts.
|
81
|
-
#
|
82
|
-
# @raise [SyntaxError] if the source is malformed in some way.
|
83
|
-
# @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
|
84
|
-
# were scanned in this file.
|
85
|
-
# @see #scan_first_part
|
86
|
-
# @see #scan_second_part
|
87
|
-
# @see #scan_third_part
|
88
|
-
# @see #tokens
|
89
|
-
def scan_file
|
90
|
-
@line = 1
|
91
|
-
scan_first_part
|
92
|
-
scan_second_part
|
93
|
-
scan_third_part
|
94
|
-
tokens
|
95
|
-
rescue SyntaxError => e
|
96
|
-
start = [@scanner.pos - 8, 0].max
|
97
|
-
stop = [@scanner.pos + 8, @scanner.string.length].min
|
98
|
-
snip = @scanner.string[start..stop].strip.inspect
|
99
|
-
char = @scanner.string[@scanner.pos]
|
100
|
-
char = if char
|
101
|
-
char.inspect
|
102
|
-
else
|
103
|
-
"EOF"
|
104
|
-
end
|
105
|
-
|
106
|
-
new_line = "#{@source}:#{@line}: unexpected #{char} " \
|
107
|
-
"(near #{snip})"
|
108
|
-
|
109
|
-
raise e, e.message, [new_line, *e.backtrace]
|
110
|
-
end
|
111
|
-
|
112
|
-
# Scans for whitespace. If the next character is whitespace, it
|
113
|
-
# will consume all whitespace until the next non-whitespace
|
114
|
-
# character.
|
115
|
-
#
|
116
|
-
# @return [Boolean] if any whitespace was matched.
|
117
|
-
def scan_whitespace
|
118
|
-
if @scanner.scan(/(\s+)/)
|
119
|
-
@line += @scanner[1].count("\n")
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
private
|
124
|
-
|
125
|
-
# Raises an error.
|
126
|
-
#
|
127
|
-
# @raise [SyntaxError] always.
|
128
|
-
# @return [void]
|
129
|
-
def error!
|
130
|
-
raise SyntaxError, "invalid syntax"
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "strscan"
|
4
|
+
require "antelope/ace/scanner/argument"
|
5
|
+
require "antelope/ace/scanner/first"
|
6
|
+
require "antelope/ace/scanner/second"
|
7
|
+
require "antelope/ace/scanner/third"
|
8
|
+
|
9
|
+
module Antelope
|
10
|
+
module Ace
|
11
|
+
|
12
|
+
# Scans a given input. The input should be a properly formatted
|
13
|
+
# ACE file; see the Ace module for more information. This scanner
|
14
|
+
# uses the StringScanner class internally; see the ruby
|
15
|
+
# documentation for more on that. This scanner seperates scanning
|
16
|
+
# into three seperate stages: First, Second, and Third, for each
|
17
|
+
# section of the file, respectively.
|
18
|
+
#
|
19
|
+
# @see Ace
|
20
|
+
# @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
|
21
|
+
class Scanner
|
22
|
+
|
23
|
+
IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
|
24
|
+
|
25
|
+
include First
|
26
|
+
include Second
|
27
|
+
include Third
|
28
|
+
|
29
|
+
# The string scanner that we're using to scan the string with.
|
30
|
+
#
|
31
|
+
# @return [StringScanner]
|
32
|
+
attr_reader :scanner
|
33
|
+
|
34
|
+
# An array of the tokens that the scanner scanned.
|
35
|
+
#
|
36
|
+
# @return [Array<Array<(Symbol, Object, ...)>>]
|
37
|
+
attr_reader :tokens
|
38
|
+
|
39
|
+
# The boundry between each section. Placed here to be easily.
|
40
|
+
# modifiable. **MUST** be a regular expression.
|
41
|
+
#
|
42
|
+
# @return [RegExp]
|
43
|
+
CONTENT_BOUNDRY = /%%/
|
44
|
+
|
45
|
+
# The value regular expression. It should match values; for
|
46
|
+
# example, things quoted in strings or word letters without
|
47
|
+
# quotes. Must respond to #to_s, since it is embedded within
|
48
|
+
# other regular expressions. The regular expression should
|
49
|
+
# place the contents of the value in the groups 2 or 3.
|
50
|
+
#
|
51
|
+
# @return [#to_s]
|
52
|
+
VALUE = %q{(?:
|
53
|
+
(?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
|
54
|
+
| ([A-Za-z0-9_.<>*-]+)
|
55
|
+
)}
|
56
|
+
|
57
|
+
# Scans a file. It returns the tokens resulting from scanning.
|
58
|
+
#
|
59
|
+
# @param source [String] the source to scan. This should be
|
60
|
+
# compatible with StringScanner.
|
61
|
+
# @param name [String] the name of the source file. This is
|
62
|
+
# primarilyused in backtrace information.
|
63
|
+
# @return [Array<Array<(Symbol, Object, ...)>>]
|
64
|
+
# @see #tokens
|
65
|
+
def self.scan(source, name = "(ace file)")
|
66
|
+
new(source, name).scan_file
|
67
|
+
end
|
68
|
+
|
69
|
+
# Initialize the scanner with the input.
|
70
|
+
#
|
71
|
+
# @param input [String] The source to scan.
|
72
|
+
# @param source [String] the source file. This is primarily
|
73
|
+
# used in backtrace information.
|
74
|
+
def initialize(input, source = "(ace file)")
|
75
|
+
@source = source
|
76
|
+
@scanner = StringScanner.new(input)
|
77
|
+
@tokens = []
|
78
|
+
end
|
79
|
+
|
80
|
+
# Scans the file in parts.
|
81
|
+
#
|
82
|
+
# @raise [SyntaxError] if the source is malformed in some way.
|
83
|
+
# @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
|
84
|
+
# were scanned in this file.
|
85
|
+
# @see #scan_first_part
|
86
|
+
# @see #scan_second_part
|
87
|
+
# @see #scan_third_part
|
88
|
+
# @see #tokens
|
89
|
+
def scan_file
|
90
|
+
@line = 1
|
91
|
+
scan_first_part
|
92
|
+
scan_second_part
|
93
|
+
scan_third_part
|
94
|
+
tokens
|
95
|
+
rescue SyntaxError => e
|
96
|
+
start = [@scanner.pos - 8, 0].max
|
97
|
+
stop = [@scanner.pos + 8, @scanner.string.length].min
|
98
|
+
snip = @scanner.string[start..stop].strip.inspect
|
99
|
+
char = @scanner.string[@scanner.pos]
|
100
|
+
char = if char
|
101
|
+
char.inspect
|
102
|
+
else
|
103
|
+
"EOF"
|
104
|
+
end
|
105
|
+
|
106
|
+
new_line = "#{@source}:#{@line}: unexpected #{char} " \
|
107
|
+
"(near #{snip})"
|
108
|
+
|
109
|
+
raise e, e.message, [new_line, *e.backtrace]
|
110
|
+
end
|
111
|
+
|
112
|
+
# Scans for whitespace. If the next character is whitespace, it
|
113
|
+
# will consume all whitespace until the next non-whitespace
|
114
|
+
# character.
|
115
|
+
#
|
116
|
+
# @return [Boolean] if any whitespace was matched.
|
117
|
+
def scan_whitespace
|
118
|
+
if @scanner.scan(/(\s+)/)
|
119
|
+
@line += @scanner[1].count("\n")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
# Raises an error.
|
126
|
+
#
|
127
|
+
# @raise [SyntaxError] always.
|
128
|
+
# @return [void]
|
129
|
+
def error!
|
130
|
+
raise SyntaxError, "invalid syntax"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|