antelope 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +25 -23
  3. data/.rspec +3 -3
  4. data/.travis.yml +10 -9
  5. data/.yardopts +7 -7
  6. data/CONTRIBUTING.md +38 -38
  7. data/GENERATORS.md +124 -124
  8. data/Gemfile +7 -7
  9. data/LICENSE.txt +22 -22
  10. data/README.md +104 -104
  11. data/Rakefile +2 -2
  12. data/TODO.md +58 -58
  13. data/antelope.gemspec +28 -28
  14. data/bin/antelope +7 -7
  15. data/examples/deterministic.ace +35 -35
  16. data/examples/example.ace +51 -50
  17. data/examples/example.err +192 -0
  18. data/examples/{example.output → example.inf} +384 -385
  19. data/examples/liquidscript.ace +233 -162
  20. data/examples/simple.ace +22 -22
  21. data/lib/antelope/ace/compiler.rb +334 -334
  22. data/lib/antelope/ace/errors.rb +48 -48
  23. data/lib/antelope/ace/grammar/generation.rb +80 -80
  24. data/lib/antelope/ace/grammar/loading.rb +53 -53
  25. data/lib/antelope/ace/grammar/precedences.rb +68 -65
  26. data/lib/antelope/ace/grammar/productions.rb +156 -150
  27. data/lib/antelope/ace/grammar/symbols.rb +66 -66
  28. data/lib/antelope/ace/grammar.rb +69 -69
  29. data/lib/antelope/ace/precedence.rb +61 -61
  30. data/lib/antelope/ace/production.rb +57 -57
  31. data/lib/antelope/ace/scanner/argument.rb +57 -57
  32. data/lib/antelope/ace/scanner/first.rb +89 -89
  33. data/lib/antelope/ace/scanner/second.rb +177 -177
  34. data/lib/antelope/ace/scanner/third.rb +27 -27
  35. data/lib/antelope/ace/scanner.rb +134 -134
  36. data/lib/antelope/ace/token/epsilon.rb +24 -24
  37. data/lib/antelope/ace/token/error.rb +26 -26
  38. data/lib/antelope/ace/token/nonterminal.rb +17 -17
  39. data/lib/antelope/ace/token/terminal.rb +17 -17
  40. data/lib/antelope/ace/token.rb +238 -238
  41. data/lib/antelope/ace.rb +53 -53
  42. data/lib/antelope/cli.rb +55 -55
  43. data/lib/antelope/errors.rb +8 -8
  44. data/lib/antelope/generation/constructor/first.rb +88 -88
  45. data/lib/antelope/generation/constructor/follow.rb +103 -103
  46. data/lib/antelope/generation/constructor/nullable.rb +64 -64
  47. data/lib/antelope/generation/constructor.rb +126 -126
  48. data/lib/antelope/generation/errors.rb +17 -17
  49. data/lib/antelope/generation/null.rb +13 -13
  50. data/lib/antelope/generation/recognizer/rule.rb +216 -216
  51. data/lib/antelope/generation/recognizer/state.rb +130 -130
  52. data/lib/antelope/generation/recognizer.rb +180 -180
  53. data/lib/antelope/generation/tableizer.rb +175 -154
  54. data/lib/antelope/generation.rb +15 -15
  55. data/lib/antelope/generator/base.rb +264 -264
  56. data/lib/antelope/generator/c.rb +11 -11
  57. data/lib/antelope/generator/c_header.rb +105 -105
  58. data/lib/antelope/generator/c_source.rb +39 -39
  59. data/lib/antelope/generator/error.rb +34 -0
  60. data/lib/antelope/generator/group.rb +57 -57
  61. data/lib/antelope/generator/html.rb +51 -0
  62. data/lib/antelope/generator/info.rb +47 -0
  63. data/lib/antelope/generator/null.rb +18 -18
  64. data/lib/antelope/generator/output.rb +17 -49
  65. data/lib/antelope/generator/ruby.rb +79 -79
  66. data/lib/antelope/generator/templates/c_header.ant +36 -36
  67. data/lib/antelope/generator/templates/c_source.ant +202 -202
  68. data/lib/antelope/generator/templates/error.ant +33 -0
  69. data/lib/antelope/generator/templates/html/antelope.css +1 -0
  70. data/lib/antelope/generator/templates/html/antelope.html +1 -0
  71. data/lib/antelope/generator/templates/html/antelope.js +1 -0
  72. data/lib/antelope/generator/templates/html/css.ant +53 -0
  73. data/lib/antelope/generator/templates/html/html.ant +82 -0
  74. data/lib/antelope/generator/templates/html/js.ant +9 -0
  75. data/lib/antelope/generator/templates/info.ant +53 -0
  76. data/lib/antelope/generator/templates/ruby.ant +178 -146
  77. data/lib/antelope/generator.rb +66 -63
  78. data/lib/antelope/template/compiler.rb +78 -78
  79. data/lib/antelope/template/errors.rb +9 -9
  80. data/lib/antelope/template/scanner.rb +109 -109
  81. data/lib/antelope/template.rb +65 -60
  82. data/lib/antelope/version.rb +6 -6
  83. data/lib/antelope.rb +13 -13
  84. data/optimizations.txt +42 -0
  85. data/spec/antelope/ace/compiler_spec.rb +60 -60
  86. data/spec/antelope/ace/scanner_spec.rb +27 -27
  87. data/spec/antelope/constructor_spec.rb +133 -136
  88. data/spec/antelope/template_spec.rb +50 -49
  89. data/spec/fixtures/simple.ace +22 -22
  90. data/spec/spec_helper.rb +39 -39
  91. data/spec/support/benchmark_helper.rb +5 -5
  92. data/spec/support/grammar_helper.rb +15 -15
  93. data/subl/Ace (Ruby).JSON-tmLanguage +94 -94
  94. data/subl/Ace (Ruby).tmLanguage +153 -153
  95. metadata +17 -6
  96. data/lib/antelope/generator/templates/output.ant +0 -68
@@ -1,177 +1,177 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- module Ace
5
- class Scanner
6
-
7
- # Scans the second part of the file. The second part of the
8
- # file _only_ contains productions (or rules). Rules have a
9
- # label and a body; the label may be any lowercase alphabetical
10
- # identifier followed by a colon; the body consists of "parts",
11
- # an "or", a "prec", and/or a "block". The part may consist
12
- # of any alphabetical characters. An or is just a vertical bar
13
- # (`|`). A prec is a precedence declaraction, which is `%prec `
14
- # followed by any alphabetical characters. A block is a `{`,
15
- # followed by code, followed by a terminating `}`. Rules _may_
16
- # be terminated by a semicolon, but this is optional.
17
- module Second
18
-
19
- # Scans the second part of the file. This should be from just
20
- # before the first content boundry; if the scanner doesn't
21
- # find a content boundry, it will error. It will then check
22
- # for a rule.
23
- #
24
- # @raise [SyntaxError] if no content boundry was found, or if
25
- # the scanner encounters anything but a rule or whitespace.
26
- # @return [void]
27
- # @see #scan_second_rule
28
- # @see #scan_whitespace
29
- # @see #error!
30
- def scan_second_part
31
- scanner.scan(CONTENT_BOUNDRY) or error!
32
- tokens << [:second]
33
-
34
- until @scanner.check(CONTENT_BOUNDRY)
35
- scan_second_rule || scan_whitespace || error!
36
- end
37
- end
38
-
39
- # Scans a rule. A rule consists of a label (the nonterminal
40
- # the production is for), a body, and a block; and then,
41
- # an optional semicolon.
42
- #
43
- # @return [Boolean] if it matched
44
- # @see #scan_second_rule_label
45
- # @see #scan_second_rule_body
46
- # @see #error!
47
- def scan_second_rule
48
- if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
49
- scan_second_rule_label or error!
50
- scan_second_rule_body
51
- true
52
- end
53
- end
54
-
55
- # Scans the label for a rule. It should contain only lower
56
- # case letters and a colon.
57
- #
58
- # @return [Boolean] if it matched.
59
- def scan_second_rule_label
60
- if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
61
- tokens << [:label, @scanner[1], @scanner[2]]
62
- end
63
- end
64
-
65
- # The body can contain parts, ors, precs, or blocks (or
66
- # whitespaces). Scans all of them, and then attempts to
67
- # scan a semicolon.
68
- #
69
- # @return [void]
70
- # @see #scan_second_rule_part
71
- # @see #scan_second_rule_or
72
- # @see #scan_second_rule_prec
73
- # @see #scan_second_rule_block
74
- # @see #scan_whitespace
75
- def scan_second_rule_body
76
- body = true
77
- while body
78
- scan_second_rule_part || scan_second_rule_or ||
79
- scan_second_rule_prec || scan_second_rule_block ||
80
- scan_whitespace || (body = false)
81
- end
82
- @scanner.scan(/;/)
83
- end
84
-
85
- # Attempts to scan a "part". A part is any series of
86
- # alphabetical characters that are not followed by a
87
- # colon.
88
- #
89
- # @return [Boolean] if it matched.
90
- def scan_second_rule_part
91
- if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?(?!\:|[A-Za-z._])/)
92
- tokens << [:part, @scanner[1], @scanner[2]]
93
- end
94
- end
95
-
96
- # Attempts to scan an "or". It's just a vertical bar.
97
- #
98
- # @return [Boolean] if it matched.
99
- def scan_second_rule_or
100
- if @scanner.scan(/\|/)
101
- tokens << [:or]
102
- end
103
- end
104
-
105
- # Attempts to scan a precedence definition. A precedence
106
- # definition is "%prec " followed by a terminal or nonterminal.
107
- #
108
- # @return [Boolean] if it matched.
109
- def scan_second_rule_prec
110
- if @scanner.scan(/%prec (#{IDENTIFIER})/)
111
- tokens << [:prec, @scanner[1]]
112
- end
113
- end
114
-
115
- # Attempts to scan a block. This correctly balances brackets;
116
- # however, if a bracket is opened/closed within a string, it
117
- # still counts that as a bracket that needs to be balanced.
118
- # So, having extensive code within a block is not a good idea.
119
- #
120
- # @return [Boolean] if it matched.
121
- def scan_second_rule_block
122
- if @scanner.scan(/\{/)
123
- tokens << [:block, _scan_block]
124
- end
125
- end
126
-
127
- private
128
-
129
- # Scans the block; it scans until it encounters enough closing
130
- # brackets to match the opening brackets. If it encounters
131
- # an opening brackets, it increments the bracket counter by
132
- # one; if it encounters a closing bracket, it decrements by
133
- # one. It will error if it reaches the end before the
134
- # brackets are fully closed.
135
- #
136
- # @return [String] the block's body.
137
- # @raise [SyntaxError] if it reaches the end before the final
138
- # bracket is closed.
139
- def _scan_block
140
- brack = 1
141
- body = "{"
142
- scan_for = %r{
143
- (
144
- (?: " ( \\\\ | \\" | [^"] )* "? )
145
- | (?: ' ( \\\\ | \\' | [^'] )* '? )
146
- | (?: // .*? \n )
147
- | (?: \# .*? \n )
148
- | (?: /\* [\s\S]+? \*/ )
149
- | (?: \} )
150
- | (?: \{ )
151
- )
152
- }x
153
-
154
- until brack.zero?
155
- if part = @scanner.scan_until(scan_for)
156
- body << part
157
-
158
-
159
- if @scanner[1] == "}"
160
- brack -= 1
161
- elsif @scanner[1] == "{"
162
- brack += 1
163
- end
164
- else
165
- if @scanner.scan(/(.+)/m)
166
- @line += @scanner[1].count("\n")
167
- end
168
- error!
169
- end
170
- end
171
-
172
- body
173
- end
174
- end
175
- end
176
- end
177
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ module Ace
5
+ class Scanner
6
+
7
+ # Scans the second part of the file. The second part of the
8
+ # file _only_ contains productions (or rules). Rules have a
9
+ # label and a body; the label may be any lowercase alphabetical
10
+ # identifier followed by a colon; the body consists of "parts",
11
+ # an "or", a "prec", and/or a "block". The part may consist
12
+ # of any alphabetical characters. An or is just a vertical bar
13
+ # (`|`). A prec is a precedence declaraction, which is `%prec `
14
+ # followed by any alphabetical characters. A block is a `{`,
15
+ # followed by code, followed by a terminating `}`. Rules _may_
16
+ # be terminated by a semicolon, but this is optional.
17
+ module Second
18
+
19
+ # Scans the second part of the file. This should be from just
20
+ # before the first content boundry; if the scanner doesn't
21
+ # find a content boundry, it will error. It will then check
22
+ # for a rule.
23
+ #
24
+ # @raise [SyntaxError] if no content boundry was found, or if
25
+ # the scanner encounters anything but a rule or whitespace.
26
+ # @return [void]
27
+ # @see #scan_second_rule
28
+ # @see #scan_whitespace
29
+ # @see #error!
30
+ def scan_second_part
31
+ scanner.scan(CONTENT_BOUNDRY) or error!
32
+ tokens << [:second]
33
+
34
+ until @scanner.check(CONTENT_BOUNDRY)
35
+ scan_second_rule || scan_whitespace || error!
36
+ end
37
+ end
38
+
39
+ # Scans a rule. A rule consists of a label (the nonterminal
40
+ # the production is for), a body, and a block; and then,
41
+ # an optional semicolon.
42
+ #
43
+ # @return [Boolean] if it matched
44
+ # @see #scan_second_rule_label
45
+ # @see #scan_second_rule_body
46
+ # @see #error!
47
+ def scan_second_rule
48
+ if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
49
+ scan_second_rule_label or error!
50
+ scan_second_rule_body
51
+ true
52
+ end
53
+ end
54
+
55
+ # Scans the label for a rule. It should contain only lower
56
+ # case letters and a colon.
57
+ #
58
+ # @return [Boolean] if it matched.
59
+ def scan_second_rule_label
60
+ if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
61
+ tokens << [:label, @scanner[1], @scanner[2]]
62
+ end
63
+ end
64
+
65
+ # The body can contain parts, ors, precs, or blocks (or
66
+ # whitespaces). Scans all of them, and then attempts to
67
+ # scan a semicolon.
68
+ #
69
+ # @return [void]
70
+ # @see #scan_second_rule_part
71
+ # @see #scan_second_rule_or
72
+ # @see #scan_second_rule_prec
73
+ # @see #scan_second_rule_block
74
+ # @see #scan_whitespace
75
+ def scan_second_rule_body
76
+ body = true
77
+ while body
78
+ scan_second_rule_prec || scan_second_rule_part ||
79
+ scan_second_rule_or || scan_second_rule_block ||
80
+ scan_whitespace || (body = false)
81
+ end
82
+ @scanner.scan(/;/)
83
+ end
84
+
85
+ # Attempts to scan a "part". A part is any series of
86
+ # alphabetical characters that are not followed by a
87
+ # colon.
88
+ #
89
+ # @return [Boolean] if it matched.
90
+ def scan_second_rule_part
91
+ if @scanner.scan(/(%?#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?(?!\:|[A-Za-z._])/)
92
+ tokens << [:part, @scanner[1], @scanner[2]]
93
+ end
94
+ end
95
+
96
+ # Attempts to scan an "or". It's just a vertical bar.
97
+ #
98
+ # @return [Boolean] if it matched.
99
+ def scan_second_rule_or
100
+ if @scanner.scan(/\|/)
101
+ tokens << [:or]
102
+ end
103
+ end
104
+
105
+ # Attempts to scan a precedence definition. A precedence
106
+ # definition is "%prec " followed by a terminal or nonterminal.
107
+ #
108
+ # @return [Boolean] if it matched.
109
+ def scan_second_rule_prec
110
+ if @scanner.scan(/%prec (#{IDENTIFIER})/)
111
+ tokens << [:prec, @scanner[1]]
112
+ end
113
+ end
114
+
115
+ # Attempts to scan a block. This correctly balances brackets;
116
+ # however, if a bracket is opened/closed within a string, it
117
+ # still counts that as a bracket that needs to be balanced.
118
+ # So, having extensive code within a block is not a good idea.
119
+ #
120
+ # @return [Boolean] if it matched.
121
+ def scan_second_rule_block
122
+ if @scanner.scan(/\{/)
123
+ tokens << [:block, _scan_block]
124
+ end
125
+ end
126
+
127
+ private
128
+
129
+ # Scans the block; it scans until it encounters enough closing
130
+ # brackets to match the opening brackets. If it encounters
131
+ # an opening brackets, it increments the bracket counter by
132
+ # one; if it encounters a closing bracket, it decrements by
133
+ # one. It will error if it reaches the end before the
134
+ # brackets are fully closed.
135
+ #
136
+ # @return [String] the block's body.
137
+ # @raise [SyntaxError] if it reaches the end before the final
138
+ # bracket is closed.
139
+ def _scan_block
140
+ brack = 1
141
+ body = "{"
142
+ scan_for = %r{
143
+ (
144
+ (?: " ( \\\\ | \\" | [^"] )* "? )
145
+ | (?: ' ( \\\\ | \\' | [^'] )* '? )
146
+ | (?: // .*? \n )
147
+ | (?: \# .*? \n )
148
+ | (?: /\* [\s\S]+? \*/ )
149
+ | (?: \} )
150
+ | (?: \{ )
151
+ )
152
+ }x
153
+
154
+ until brack.zero?
155
+ if part = @scanner.scan_until(scan_for)
156
+ body << part
157
+
158
+
159
+ if @scanner[1] == "}"
160
+ brack -= 1
161
+ elsif @scanner[1] == "{"
162
+ brack += 1
163
+ end
164
+ else
165
+ if @scanner.scan(/(.+)/m)
166
+ @line += @scanner[1].count("\n")
167
+ end
168
+ error!
169
+ end
170
+ end
171
+
172
+ body
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
@@ -1,27 +1,27 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- module Ace
5
- class Scanner
6
-
7
- # Scans the third part. Everything after the content
8
- # boundry is copied directly into the output.
9
- module Third
10
-
11
- # Scans the third part. It should start with a content
12
- # boundry; raises an error if it does not. It then scans
13
- # until the end of the file.
14
- #
15
- # @raise [SyntaxError] if somehow there is no content
16
- # boundry.
17
- # @return [void]
18
- def scan_third_part
19
- @scanner.scan(CONTENT_BOUNDRY) or error!
20
-
21
- tokens << [:third]
22
- tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
23
- end
24
- end
25
- end
26
- end
27
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ module Ace
5
+ class Scanner
6
+
7
+ # Scans the third part. Everything after the content
8
+ # boundry is copied directly into the output.
9
+ module Third
10
+
11
+ # Scans the third part. It should start with a content
12
+ # boundry; raises an error if it does not. It then scans
13
+ # until the end of the file.
14
+ #
15
+ # @raise [SyntaxError] if somehow there is no content
16
+ # boundry.
17
+ # @return [void]
18
+ def scan_third_part
19
+ @scanner.scan(CONTENT_BOUNDRY) or error!
20
+
21
+ tokens << [:third]
22
+ tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,134 +1,134 @@
1
- # encoding: utf-8
2
-
3
- require "strscan"
4
- require "antelope/ace/scanner/argument"
5
- require "antelope/ace/scanner/first"
6
- require "antelope/ace/scanner/second"
7
- require "antelope/ace/scanner/third"
8
-
9
- module Antelope
10
- module Ace
11
-
12
- # Scans a given input. The input should be a properly formatted
13
- # ACE file; see the Ace module for more information. This scanner
14
- # uses the StringScanner class internally; see the ruby
15
- # documentation for more on that. This scanner seperates scanning
16
- # into three seperate stages: First, Second, and Third, for each
17
- # section of the file, respectively.
18
- #
19
- # @see Ace
20
- # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
21
- class Scanner
22
-
23
- IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
24
-
25
- include First
26
- include Second
27
- include Third
28
-
29
- # The string scanner that we're using to scan the string with.
30
- #
31
- # @return [StringScanner]
32
- attr_reader :scanner
33
-
34
- # An array of the tokens that the scanner scanned.
35
- #
36
- # @return [Array<Array<(Symbol, Object, ...)>>]
37
- attr_reader :tokens
38
-
39
- # The boundry between each section. Placed here to be easily.
40
- # modifiable. **MUST** be a regular expression.
41
- #
42
- # @return [RegExp]
43
- CONTENT_BOUNDRY = /%%/
44
-
45
- # The value regular expression. It should match values; for
46
- # example, things quoted in strings or word letters without
47
- # quotes. Must respond to #to_s, since it is embedded within
48
- # other regular expressions. The regular expression should
49
- # place the contents of the value in the groups 2 or 3.
50
- #
51
- # @return [#to_s]
52
- VALUE = %q{(?:
53
- (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
54
- | ([A-Za-z0-9_.<>*-]+)
55
- )}
56
-
57
- # Scans a file. It returns the tokens resulting from scanning.
58
- #
59
- # @param source [String] the source to scan. This should be
60
- # compatible with StringScanner.
61
- # @param name [String] the name of the source file. This is
62
- # primarilyused in backtrace information.
63
- # @return [Array<Array<(Symbol, Object, ...)>>]
64
- # @see #tokens
65
- def self.scan(source, name = "(ace file)")
66
- new(source, name).scan_file
67
- end
68
-
69
- # Initialize the scanner with the input.
70
- #
71
- # @param input [String] The source to scan.
72
- # @param source [String] the source file. This is primarily
73
- # used in backtrace information.
74
- def initialize(input, source = "(ace file)")
75
- @source = source
76
- @scanner = StringScanner.new(input)
77
- @tokens = []
78
- end
79
-
80
- # Scans the file in parts.
81
- #
82
- # @raise [SyntaxError] if the source is malformed in some way.
83
- # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
84
- # were scanned in this file.
85
- # @see #scan_first_part
86
- # @see #scan_second_part
87
- # @see #scan_third_part
88
- # @see #tokens
89
- def scan_file
90
- @line = 1
91
- scan_first_part
92
- scan_second_part
93
- scan_third_part
94
- tokens
95
- rescue SyntaxError => e
96
- start = [@scanner.pos - 8, 0].max
97
- stop = [@scanner.pos + 8, @scanner.string.length].min
98
- snip = @scanner.string[start..stop].strip.inspect
99
- char = @scanner.string[@scanner.pos]
100
- char = if char
101
- char.inspect
102
- else
103
- "EOF"
104
- end
105
-
106
- new_line = "#{@source}:#{@line}: unexpected #{char} " \
107
- "(near #{snip})"
108
-
109
- raise e, e.message, [new_line, *e.backtrace]
110
- end
111
-
112
- # Scans for whitespace. If the next character is whitespace, it
113
- # will consume all whitespace until the next non-whitespace
114
- # character.
115
- #
116
- # @return [Boolean] if any whitespace was matched.
117
- def scan_whitespace
118
- if @scanner.scan(/(\s+)/)
119
- @line += @scanner[1].count("\n")
120
- end
121
- end
122
-
123
- private
124
-
125
- # Raises an error.
126
- #
127
- # @raise [SyntaxError] always.
128
- # @return [void]
129
- def error!
130
- raise SyntaxError, "invalid syntax"
131
- end
132
- end
133
- end
134
- end
1
+ # encoding: utf-8
2
+
3
+ require "strscan"
4
+ require "antelope/ace/scanner/argument"
5
+ require "antelope/ace/scanner/first"
6
+ require "antelope/ace/scanner/second"
7
+ require "antelope/ace/scanner/third"
8
+
9
+ module Antelope
10
+ module Ace
11
+
12
+ # Scans a given input. The input should be a properly formatted
13
+ # ACE file; see the Ace module for more information. This scanner
14
+ # uses the StringScanner class internally; see the ruby
15
+ # documentation for more on that. This scanner seperates scanning
16
+ # into three seperate stages: First, Second, and Third, for each
17
+ # section of the file, respectively.
18
+ #
19
+ # @see Ace
20
+ # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
21
+ class Scanner
22
+
23
+ IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
24
+
25
+ include First
26
+ include Second
27
+ include Third
28
+
29
+ # The string scanner that we're using to scan the string with.
30
+ #
31
+ # @return [StringScanner]
32
+ attr_reader :scanner
33
+
34
+ # An array of the tokens that the scanner scanned.
35
+ #
36
+ # @return [Array<Array<(Symbol, Object, ...)>>]
37
+ attr_reader :tokens
38
+
39
+ # The boundry between each section. Placed here to be easily.
40
+ # modifiable. **MUST** be a regular expression.
41
+ #
42
+ # @return [RegExp]
43
+ CONTENT_BOUNDRY = /%%/
44
+
45
+ # The value regular expression. It should match values; for
46
+ # example, things quoted in strings or word letters without
47
+ # quotes. Must respond to #to_s, since it is embedded within
48
+ # other regular expressions. The regular expression should
49
+ # place the contents of the value in the groups 2 or 3.
50
+ #
51
+ # @return [#to_s]
52
+ VALUE = %q{(?:
53
+ (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
54
+ | ([A-Za-z0-9_.<>*-]+)
55
+ )}
56
+
57
+ # Scans a file. It returns the tokens resulting from scanning.
58
+ #
59
+ # @param source [String] the source to scan. This should be
60
+ # compatible with StringScanner.
61
+ # @param name [String] the name of the source file. This is
62
+ # primarilyused in backtrace information.
63
+ # @return [Array<Array<(Symbol, Object, ...)>>]
64
+ # @see #tokens
65
+ def self.scan(source, name = "(ace file)")
66
+ new(source, name).scan_file
67
+ end
68
+
69
+ # Initialize the scanner with the input.
70
+ #
71
+ # @param input [String] The source to scan.
72
+ # @param source [String] the source file. This is primarily
73
+ # used in backtrace information.
74
+ def initialize(input, source = "(ace file)")
75
+ @source = source
76
+ @scanner = StringScanner.new(input)
77
+ @tokens = []
78
+ end
79
+
80
+ # Scans the file in parts.
81
+ #
82
+ # @raise [SyntaxError] if the source is malformed in some way.
83
+ # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
84
+ # were scanned in this file.
85
+ # @see #scan_first_part
86
+ # @see #scan_second_part
87
+ # @see #scan_third_part
88
+ # @see #tokens
89
+ def scan_file
90
+ @line = 1
91
+ scan_first_part
92
+ scan_second_part
93
+ scan_third_part
94
+ tokens
95
+ rescue SyntaxError => e
96
+ start = [@scanner.pos - 8, 0].max
97
+ stop = [@scanner.pos + 8, @scanner.string.length].min
98
+ snip = @scanner.string[start..stop].strip.inspect
99
+ char = @scanner.string[@scanner.pos]
100
+ char = if char
101
+ char.inspect
102
+ else
103
+ "EOF"
104
+ end
105
+
106
+ new_line = "#{@source}:#{@line}: unexpected #{char} " \
107
+ "(near #{snip})"
108
+
109
+ raise e, e.message, [new_line, *e.backtrace]
110
+ end
111
+
112
+ # Scans for whitespace. If the next character is whitespace, it
113
+ # will consume all whitespace until the next non-whitespace
114
+ # character.
115
+ #
116
+ # @return [Boolean] if any whitespace was matched.
117
+ def scan_whitespace
118
+ if @scanner.scan(/(\s+)/)
119
+ @line += @scanner[1].count("\n")
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ # Raises an error.
126
+ #
127
+ # @raise [SyntaxError] always.
128
+ # @return [void]
129
+ def error!
130
+ raise SyntaxError, "invalid syntax"
131
+ end
132
+ end
133
+ end
134
+ end