antelope 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +25 -23
  3. data/.rspec +3 -3
  4. data/.travis.yml +10 -9
  5. data/.yardopts +7 -7
  6. data/CONTRIBUTING.md +38 -38
  7. data/GENERATORS.md +124 -124
  8. data/Gemfile +7 -7
  9. data/LICENSE.txt +22 -22
  10. data/README.md +104 -104
  11. data/Rakefile +2 -2
  12. data/TODO.md +58 -58
  13. data/antelope.gemspec +28 -28
  14. data/bin/antelope +7 -7
  15. data/examples/deterministic.ace +35 -35
  16. data/examples/example.ace +51 -50
  17. data/examples/example.err +192 -0
  18. data/examples/{example.output → example.inf} +384 -385
  19. data/examples/liquidscript.ace +233 -162
  20. data/examples/simple.ace +22 -22
  21. data/lib/antelope/ace/compiler.rb +334 -334
  22. data/lib/antelope/ace/errors.rb +48 -48
  23. data/lib/antelope/ace/grammar/generation.rb +80 -80
  24. data/lib/antelope/ace/grammar/loading.rb +53 -53
  25. data/lib/antelope/ace/grammar/precedences.rb +68 -65
  26. data/lib/antelope/ace/grammar/productions.rb +156 -150
  27. data/lib/antelope/ace/grammar/symbols.rb +66 -66
  28. data/lib/antelope/ace/grammar.rb +69 -69
  29. data/lib/antelope/ace/precedence.rb +61 -61
  30. data/lib/antelope/ace/production.rb +57 -57
  31. data/lib/antelope/ace/scanner/argument.rb +57 -57
  32. data/lib/antelope/ace/scanner/first.rb +89 -89
  33. data/lib/antelope/ace/scanner/second.rb +177 -177
  34. data/lib/antelope/ace/scanner/third.rb +27 -27
  35. data/lib/antelope/ace/scanner.rb +134 -134
  36. data/lib/antelope/ace/token/epsilon.rb +24 -24
  37. data/lib/antelope/ace/token/error.rb +26 -26
  38. data/lib/antelope/ace/token/nonterminal.rb +17 -17
  39. data/lib/antelope/ace/token/terminal.rb +17 -17
  40. data/lib/antelope/ace/token.rb +238 -238
  41. data/lib/antelope/ace.rb +53 -53
  42. data/lib/antelope/cli.rb +55 -55
  43. data/lib/antelope/errors.rb +8 -8
  44. data/lib/antelope/generation/constructor/first.rb +88 -88
  45. data/lib/antelope/generation/constructor/follow.rb +103 -103
  46. data/lib/antelope/generation/constructor/nullable.rb +64 -64
  47. data/lib/antelope/generation/constructor.rb +126 -126
  48. data/lib/antelope/generation/errors.rb +17 -17
  49. data/lib/antelope/generation/null.rb +13 -13
  50. data/lib/antelope/generation/recognizer/rule.rb +216 -216
  51. data/lib/antelope/generation/recognizer/state.rb +130 -130
  52. data/lib/antelope/generation/recognizer.rb +180 -180
  53. data/lib/antelope/generation/tableizer.rb +175 -154
  54. data/lib/antelope/generation.rb +15 -15
  55. data/lib/antelope/generator/base.rb +264 -264
  56. data/lib/antelope/generator/c.rb +11 -11
  57. data/lib/antelope/generator/c_header.rb +105 -105
  58. data/lib/antelope/generator/c_source.rb +39 -39
  59. data/lib/antelope/generator/error.rb +34 -0
  60. data/lib/antelope/generator/group.rb +57 -57
  61. data/lib/antelope/generator/html.rb +51 -0
  62. data/lib/antelope/generator/info.rb +47 -0
  63. data/lib/antelope/generator/null.rb +18 -18
  64. data/lib/antelope/generator/output.rb +17 -49
  65. data/lib/antelope/generator/ruby.rb +79 -79
  66. data/lib/antelope/generator/templates/c_header.ant +36 -36
  67. data/lib/antelope/generator/templates/c_source.ant +202 -202
  68. data/lib/antelope/generator/templates/error.ant +33 -0
  69. data/lib/antelope/generator/templates/html/antelope.css +1 -0
  70. data/lib/antelope/generator/templates/html/antelope.html +1 -0
  71. data/lib/antelope/generator/templates/html/antelope.js +1 -0
  72. data/lib/antelope/generator/templates/html/css.ant +53 -0
  73. data/lib/antelope/generator/templates/html/html.ant +82 -0
  74. data/lib/antelope/generator/templates/html/js.ant +9 -0
  75. data/lib/antelope/generator/templates/info.ant +53 -0
  76. data/lib/antelope/generator/templates/ruby.ant +178 -146
  77. data/lib/antelope/generator.rb +66 -63
  78. data/lib/antelope/template/compiler.rb +78 -78
  79. data/lib/antelope/template/errors.rb +9 -9
  80. data/lib/antelope/template/scanner.rb +109 -109
  81. data/lib/antelope/template.rb +65 -60
  82. data/lib/antelope/version.rb +6 -6
  83. data/lib/antelope.rb +13 -13
  84. data/optimizations.txt +42 -0
  85. data/spec/antelope/ace/compiler_spec.rb +60 -60
  86. data/spec/antelope/ace/scanner_spec.rb +27 -27
  87. data/spec/antelope/constructor_spec.rb +133 -136
  88. data/spec/antelope/template_spec.rb +50 -49
  89. data/spec/fixtures/simple.ace +22 -22
  90. data/spec/spec_helper.rb +39 -39
  91. data/spec/support/benchmark_helper.rb +5 -5
  92. data/spec/support/grammar_helper.rb +15 -15
  93. data/subl/Ace (Ruby).JSON-tmLanguage +94 -94
  94. data/subl/Ace (Ruby).tmLanguage +153 -153
  95. metadata +17 -6
  96. data/lib/antelope/generator/templates/output.ant +0 -68
@@ -1,177 +1,177 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- module Ace
5
- class Scanner
6
-
7
- # Scans the second part of the file. The second part of the
8
- # file _only_ contains productions (or rules). Rules have a
9
- # label and a body; the label may be any lowercase alphabetical
10
- # identifier followed by a colon; the body consists of "parts",
11
- # an "or", a "prec", and/or a "block". The part may consist
12
- # of any alphabetical characters. An or is just a vertical bar
13
- # (`|`). A prec is a precedence declaraction, which is `%prec `
14
- # followed by any alphabetical characters. A block is a `{`,
15
- # followed by code, followed by a terminating `}`. Rules _may_
16
- # be terminated by a semicolon, but this is optional.
17
- module Second
18
-
19
- # Scans the second part of the file. This should be from just
20
- # before the first content boundry; if the scanner doesn't
21
- # find a content boundry, it will error. It will then check
22
- # for a rule.
23
- #
24
- # @raise [SyntaxError] if no content boundry was found, or if
25
- # the scanner encounters anything but a rule or whitespace.
26
- # @return [void]
27
- # @see #scan_second_rule
28
- # @see #scan_whitespace
29
- # @see #error!
30
- def scan_second_part
31
- scanner.scan(CONTENT_BOUNDRY) or error!
32
- tokens << [:second]
33
-
34
- until @scanner.check(CONTENT_BOUNDRY)
35
- scan_second_rule || scan_whitespace || error!
36
- end
37
- end
38
-
39
- # Scans a rule. A rule consists of a label (the nonterminal
40
- # the production is for), a body, and a block; and then,
41
- # an optional semicolon.
42
- #
43
- # @return [Boolean] if it matched
44
- # @see #scan_second_rule_label
45
- # @see #scan_second_rule_body
46
- # @see #error!
47
- def scan_second_rule
48
- if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
49
- scan_second_rule_label or error!
50
- scan_second_rule_body
51
- true
52
- end
53
- end
54
-
55
- # Scans the label for a rule. It should contain only lower
56
- # case letters and a colon.
57
- #
58
- # @return [Boolean] if it matched.
59
- def scan_second_rule_label
60
- if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
61
- tokens << [:label, @scanner[1], @scanner[2]]
62
- end
63
- end
64
-
65
- # The body can contain parts, ors, precs, or blocks (or
66
- # whitespaces). Scans all of them, and then attempts to
67
- # scan a semicolon.
68
- #
69
- # @return [void]
70
- # @see #scan_second_rule_part
71
- # @see #scan_second_rule_or
72
- # @see #scan_second_rule_prec
73
- # @see #scan_second_rule_block
74
- # @see #scan_whitespace
75
- def scan_second_rule_body
76
- body = true
77
- while body
78
- scan_second_rule_part || scan_second_rule_or ||
79
- scan_second_rule_prec || scan_second_rule_block ||
80
- scan_whitespace || (body = false)
81
- end
82
- @scanner.scan(/;/)
83
- end
84
-
85
- # Attempts to scan a "part". A part is any series of
86
- # alphabetical characters that are not followed by a
87
- # colon.
88
- #
89
- # @return [Boolean] if it matched.
90
- def scan_second_rule_part
91
- if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?(?!\:|[A-Za-z._])/)
92
- tokens << [:part, @scanner[1], @scanner[2]]
93
- end
94
- end
95
-
96
- # Attempts to scan an "or". It's just a vertical bar.
97
- #
98
- # @return [Boolean] if it matched.
99
- def scan_second_rule_or
100
- if @scanner.scan(/\|/)
101
- tokens << [:or]
102
- end
103
- end
104
-
105
- # Attempts to scan a precedence definition. A precedence
106
- # definition is "%prec " followed by a terminal or nonterminal.
107
- #
108
- # @return [Boolean] if it matched.
109
- def scan_second_rule_prec
110
- if @scanner.scan(/%prec (#{IDENTIFIER})/)
111
- tokens << [:prec, @scanner[1]]
112
- end
113
- end
114
-
115
- # Attempts to scan a block. This correctly balances brackets;
116
- # however, if a bracket is opened/closed within a string, it
117
- # still counts that as a bracket that needs to be balanced.
118
- # So, having extensive code within a block is not a good idea.
119
- #
120
- # @return [Boolean] if it matched.
121
- def scan_second_rule_block
122
- if @scanner.scan(/\{/)
123
- tokens << [:block, _scan_block]
124
- end
125
- end
126
-
127
- private
128
-
129
- # Scans the block; it scans until it encounters enough closing
130
- # brackets to match the opening brackets. If it encounters
131
- # an opening brackets, it increments the bracket counter by
132
- # one; if it encounters a closing bracket, it decrements by
133
- # one. It will error if it reaches the end before the
134
- # brackets are fully closed.
135
- #
136
- # @return [String] the block's body.
137
- # @raise [SyntaxError] if it reaches the end before the final
138
- # bracket is closed.
139
- def _scan_block
140
- brack = 1
141
- body = "{"
142
- scan_for = %r{
143
- (
144
- (?: " ( \\\\ | \\" | [^"] )* "? )
145
- | (?: ' ( \\\\ | \\' | [^'] )* '? )
146
- | (?: // .*? \n )
147
- | (?: \# .*? \n )
148
- | (?: /\* [\s\S]+? \*/ )
149
- | (?: \} )
150
- | (?: \{ )
151
- )
152
- }x
153
-
154
- until brack.zero?
155
- if part = @scanner.scan_until(scan_for)
156
- body << part
157
-
158
-
159
- if @scanner[1] == "}"
160
- brack -= 1
161
- elsif @scanner[1] == "{"
162
- brack += 1
163
- end
164
- else
165
- if @scanner.scan(/(.+)/m)
166
- @line += @scanner[1].count("\n")
167
- end
168
- error!
169
- end
170
- end
171
-
172
- body
173
- end
174
- end
175
- end
176
- end
177
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ module Ace
5
+ class Scanner
6
+
7
+ # Scans the second part of the file. The second part of the
8
+ # file _only_ contains productions (or rules). Rules have a
9
+ # label and a body; the label may be any lowercase alphabetical
10
+ # identifier followed by a colon; the body consists of "parts",
11
+ # an "or", a "prec", and/or a "block". The part may consist
12
+ # of any alphabetical characters. An or is just a vertical bar
13
+ # (`|`). A prec is a precedence declaraction, which is `%prec `
14
+ # followed by any alphabetical characters. A block is a `{`,
15
+ # followed by code, followed by a terminating `}`. Rules _may_
16
+ # be terminated by a semicolon, but this is optional.
17
+ module Second
18
+
19
+ # Scans the second part of the file. This should be from just
20
+ # before the first content boundry; if the scanner doesn't
21
+ # find a content boundry, it will error. It will then check
22
+ # for a rule.
23
+ #
24
+ # @raise [SyntaxError] if no content boundry was found, or if
25
+ # the scanner encounters anything but a rule or whitespace.
26
+ # @return [void]
27
+ # @see #scan_second_rule
28
+ # @see #scan_whitespace
29
+ # @see #error!
30
+ def scan_second_part
31
+ scanner.scan(CONTENT_BOUNDRY) or error!
32
+ tokens << [:second]
33
+
34
+ until @scanner.check(CONTENT_BOUNDRY)
35
+ scan_second_rule || scan_whitespace || error!
36
+ end
37
+ end
38
+
39
+ # Scans a rule. A rule consists of a label (the nonterminal
40
+ # the production is for), a body, and a block; and then,
41
+ # an optional semicolon.
42
+ #
43
+ # @return [Boolean] if it matched
44
+ # @see #scan_second_rule_label
45
+ # @see #scan_second_rule_body
46
+ # @see #error!
47
+ def scan_second_rule
48
+ if @scanner.check(/(#{IDENTIFIER})(\[#{IDENTIFIER}\])?:/)
49
+ scan_second_rule_label or error!
50
+ scan_second_rule_body
51
+ true
52
+ end
53
+ end
54
+
55
+ # Scans the label for a rule. It should contain only lower
56
+ # case letters and a colon.
57
+ #
58
+ # @return [Boolean] if it matched.
59
+ def scan_second_rule_label
60
+ if @scanner.scan(/(#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?: ?/)
61
+ tokens << [:label, @scanner[1], @scanner[2]]
62
+ end
63
+ end
64
+
65
+ # The body can contain parts, ors, precs, or blocks (or
66
+ # whitespaces). Scans all of them, and then attempts to
67
+ # scan a semicolon.
68
+ #
69
+ # @return [void]
70
+ # @see #scan_second_rule_part
71
+ # @see #scan_second_rule_or
72
+ # @see #scan_second_rule_prec
73
+ # @see #scan_second_rule_block
74
+ # @see #scan_whitespace
75
+ def scan_second_rule_body
76
+ body = true
77
+ while body
78
+ scan_second_rule_prec || scan_second_rule_part ||
79
+ scan_second_rule_or || scan_second_rule_block ||
80
+ scan_whitespace || (body = false)
81
+ end
82
+ @scanner.scan(/;/)
83
+ end
84
+
85
+ # Attempts to scan a "part". A part is any series of
86
+ # alphabetical characters that are not followed by a
87
+ # colon.
88
+ #
89
+ # @return [Boolean] if it matched.
90
+ def scan_second_rule_part
91
+ if @scanner.scan(/(%?#{IDENTIFIER})(?:\[(#{IDENTIFIER})\])?(?!\:|[A-Za-z._])/)
92
+ tokens << [:part, @scanner[1], @scanner[2]]
93
+ end
94
+ end
95
+
96
+ # Attempts to scan an "or". It's just a vertical bar.
97
+ #
98
+ # @return [Boolean] if it matched.
99
+ def scan_second_rule_or
100
+ if @scanner.scan(/\|/)
101
+ tokens << [:or]
102
+ end
103
+ end
104
+
105
+ # Attempts to scan a precedence definition. A precedence
106
+ # definition is "%prec " followed by a terminal or nonterminal.
107
+ #
108
+ # @return [Boolean] if it matched.
109
+ def scan_second_rule_prec
110
+ if @scanner.scan(/%prec (#{IDENTIFIER})/)
111
+ tokens << [:prec, @scanner[1]]
112
+ end
113
+ end
114
+
115
+ # Attempts to scan a block. This correctly balances brackets;
116
+ # however, if a bracket is opened/closed within a string, it
117
+ # still counts that as a bracket that needs to be balanced.
118
+ # So, having extensive code within a block is not a good idea.
119
+ #
120
+ # @return [Boolean] if it matched.
121
+ def scan_second_rule_block
122
+ if @scanner.scan(/\{/)
123
+ tokens << [:block, _scan_block]
124
+ end
125
+ end
126
+
127
+ private
128
+
129
+ # Scans the block; it scans until it encounters enough closing
130
+ # brackets to match the opening brackets. If it encounters
131
+ # an opening brackets, it increments the bracket counter by
132
+ # one; if it encounters a closing bracket, it decrements by
133
+ # one. It will error if it reaches the end before the
134
+ # brackets are fully closed.
135
+ #
136
+ # @return [String] the block's body.
137
+ # @raise [SyntaxError] if it reaches the end before the final
138
+ # bracket is closed.
139
+ def _scan_block
140
+ brack = 1
141
+ body = "{"
142
+ scan_for = %r{
143
+ (
144
+ (?: " ( \\\\ | \\" | [^"] )* "? )
145
+ | (?: ' ( \\\\ | \\' | [^'] )* '? )
146
+ | (?: // .*? \n )
147
+ | (?: \# .*? \n )
148
+ | (?: /\* [\s\S]+? \*/ )
149
+ | (?: \} )
150
+ | (?: \{ )
151
+ )
152
+ }x
153
+
154
+ until brack.zero?
155
+ if part = @scanner.scan_until(scan_for)
156
+ body << part
157
+
158
+
159
+ if @scanner[1] == "}"
160
+ brack -= 1
161
+ elsif @scanner[1] == "{"
162
+ brack += 1
163
+ end
164
+ else
165
+ if @scanner.scan(/(.+)/m)
166
+ @line += @scanner[1].count("\n")
167
+ end
168
+ error!
169
+ end
170
+ end
171
+
172
+ body
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
@@ -1,27 +1,27 @@
1
- # encoding: utf-8
2
-
3
- module Antelope
4
- module Ace
5
- class Scanner
6
-
7
- # Scans the third part. Everything after the content
8
- # boundry is copied directly into the output.
9
- module Third
10
-
11
- # Scans the third part. It should start with a content
12
- # boundry; raises an error if it does not. It then scans
13
- # until the end of the file.
14
- #
15
- # @raise [SyntaxError] if somehow there is no content
16
- # boundry.
17
- # @return [void]
18
- def scan_third_part
19
- @scanner.scan(CONTENT_BOUNDRY) or error!
20
-
21
- tokens << [:third]
22
- tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
23
- end
24
- end
25
- end
26
- end
27
- end
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ module Ace
5
+ class Scanner
6
+
7
+ # Scans the third part. Everything after the content
8
+ # boundry is copied directly into the output.
9
+ module Third
10
+
11
+ # Scans the third part. It should start with a content
12
+ # boundry; raises an error if it does not. It then scans
13
+ # until the end of the file.
14
+ #
15
+ # @raise [SyntaxError] if somehow there is no content
16
+ # boundry.
17
+ # @return [void]
18
+ def scan_third_part
19
+ @scanner.scan(CONTENT_BOUNDRY) or error!
20
+
21
+ tokens << [:third]
22
+ tokens << [:copy, @scanner.scan(/[\s\S]+/m) || ""]
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,134 +1,134 @@
1
- # encoding: utf-8
2
-
3
- require "strscan"
4
- require "antelope/ace/scanner/argument"
5
- require "antelope/ace/scanner/first"
6
- require "antelope/ace/scanner/second"
7
- require "antelope/ace/scanner/third"
8
-
9
- module Antelope
10
- module Ace
11
-
12
- # Scans a given input. The input should be a properly formatted
13
- # ACE file; see the Ace module for more information. This scanner
14
- # uses the StringScanner class internally; see the ruby
15
- # documentation for more on that. This scanner seperates scanning
16
- # into three seperate stages: First, Second, and Third, for each
17
- # section of the file, respectively.
18
- #
19
- # @see Ace
20
- # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
21
- class Scanner
22
-
23
- IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
24
-
25
- include First
26
- include Second
27
- include Third
28
-
29
- # The string scanner that we're using to scan the string with.
30
- #
31
- # @return [StringScanner]
32
- attr_reader :scanner
33
-
34
- # An array of the tokens that the scanner scanned.
35
- #
36
- # @return [Array<Array<(Symbol, Object, ...)>>]
37
- attr_reader :tokens
38
-
39
- # The boundry between each section. Placed here to be easily.
40
- # modifiable. **MUST** be a regular expression.
41
- #
42
- # @return [RegExp]
43
- CONTENT_BOUNDRY = /%%/
44
-
45
- # The value regular expression. It should match values; for
46
- # example, things quoted in strings or word letters without
47
- # quotes. Must respond to #to_s, since it is embedded within
48
- # other regular expressions. The regular expression should
49
- # place the contents of the value in the groups 2 or 3.
50
- #
51
- # @return [#to_s]
52
- VALUE = %q{(?:
53
- (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
54
- | ([A-Za-z0-9_.<>*-]+)
55
- )}
56
-
57
- # Scans a file. It returns the tokens resulting from scanning.
58
- #
59
- # @param source [String] the source to scan. This should be
60
- # compatible with StringScanner.
61
- # @param name [String] the name of the source file. This is
62
- # primarilyused in backtrace information.
63
- # @return [Array<Array<(Symbol, Object, ...)>>]
64
- # @see #tokens
65
- def self.scan(source, name = "(ace file)")
66
- new(source, name).scan_file
67
- end
68
-
69
- # Initialize the scanner with the input.
70
- #
71
- # @param input [String] The source to scan.
72
- # @param source [String] the source file. This is primarily
73
- # used in backtrace information.
74
- def initialize(input, source = "(ace file)")
75
- @source = source
76
- @scanner = StringScanner.new(input)
77
- @tokens = []
78
- end
79
-
80
- # Scans the file in parts.
81
- #
82
- # @raise [SyntaxError] if the source is malformed in some way.
83
- # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
84
- # were scanned in this file.
85
- # @see #scan_first_part
86
- # @see #scan_second_part
87
- # @see #scan_third_part
88
- # @see #tokens
89
- def scan_file
90
- @line = 1
91
- scan_first_part
92
- scan_second_part
93
- scan_third_part
94
- tokens
95
- rescue SyntaxError => e
96
- start = [@scanner.pos - 8, 0].max
97
- stop = [@scanner.pos + 8, @scanner.string.length].min
98
- snip = @scanner.string[start..stop].strip.inspect
99
- char = @scanner.string[@scanner.pos]
100
- char = if char
101
- char.inspect
102
- else
103
- "EOF"
104
- end
105
-
106
- new_line = "#{@source}:#{@line}: unexpected #{char} " \
107
- "(near #{snip})"
108
-
109
- raise e, e.message, [new_line, *e.backtrace]
110
- end
111
-
112
- # Scans for whitespace. If the next character is whitespace, it
113
- # will consume all whitespace until the next non-whitespace
114
- # character.
115
- #
116
- # @return [Boolean] if any whitespace was matched.
117
- def scan_whitespace
118
- if @scanner.scan(/(\s+)/)
119
- @line += @scanner[1].count("\n")
120
- end
121
- end
122
-
123
- private
124
-
125
- # Raises an error.
126
- #
127
- # @raise [SyntaxError] always.
128
- # @return [void]
129
- def error!
130
- raise SyntaxError, "invalid syntax"
131
- end
132
- end
133
- end
134
- end
1
+ # encoding: utf-8
2
+
3
+ require "strscan"
4
+ require "antelope/ace/scanner/argument"
5
+ require "antelope/ace/scanner/first"
6
+ require "antelope/ace/scanner/second"
7
+ require "antelope/ace/scanner/third"
8
+
9
+ module Antelope
10
+ module Ace
11
+
12
+ # Scans a given input. The input should be a properly formatted
13
+ # ACE file; see the Ace module for more information. This scanner
14
+ # uses the StringScanner class internally; see the ruby
15
+ # documentation for more on that. This scanner seperates scanning
16
+ # into three seperate stages: First, Second, and Third, for each
17
+ # section of the file, respectively.
18
+ #
19
+ # @see Ace
20
+ # @see http://ruby-doc.org/stdlib-2.1.2/libdoc/strscan/rdoc/StringScanner.html
21
+ class Scanner
22
+
23
+ IDENTIFIER = "[a-zA-Z_.][a-zA-Z0-9_.-]*"
24
+
25
+ include First
26
+ include Second
27
+ include Third
28
+
29
+ # The string scanner that we're using to scan the string with.
30
+ #
31
+ # @return [StringScanner]
32
+ attr_reader :scanner
33
+
34
+ # An array of the tokens that the scanner scanned.
35
+ #
36
+ # @return [Array<Array<(Symbol, Object, ...)>>]
37
+ attr_reader :tokens
38
+
39
+ # The boundry between each section. Placed here to be easily.
40
+ # modifiable. **MUST** be a regular expression.
41
+ #
42
+ # @return [RegExp]
43
+ CONTENT_BOUNDRY = /%%/
44
+
45
+ # The value regular expression. It should match values; for
46
+ # example, things quoted in strings or word letters without
47
+ # quotes. Must respond to #to_s, since it is embedded within
48
+ # other regular expressions. The regular expression should
49
+ # place the contents of the value in the groups 2 or 3.
50
+ #
51
+ # @return [#to_s]
52
+ VALUE = %q{(?:
53
+ (?:("|')((?:\\\\|\\"|\\'|.)+?)\\1)
54
+ | ([A-Za-z0-9_.<>*-]+)
55
+ )}
56
+
57
+ # Scans a file. It returns the tokens resulting from scanning.
58
+ #
59
+ # @param source [String] the source to scan. This should be
60
+ # compatible with StringScanner.
61
+ # @param name [String] the name of the source file. This is
62
+ # primarilyused in backtrace information.
63
+ # @return [Array<Array<(Symbol, Object, ...)>>]
64
+ # @see #tokens
65
+ def self.scan(source, name = "(ace file)")
66
+ new(source, name).scan_file
67
+ end
68
+
69
+ # Initialize the scanner with the input.
70
+ #
71
+ # @param input [String] The source to scan.
72
+ # @param source [String] the source file. This is primarily
73
+ # used in backtrace information.
74
+ def initialize(input, source = "(ace file)")
75
+ @source = source
76
+ @scanner = StringScanner.new(input)
77
+ @tokens = []
78
+ end
79
+
80
+ # Scans the file in parts.
81
+ #
82
+ # @raise [SyntaxError] if the source is malformed in some way.
83
+ # @return [Array<Array<(Symbol, Object, ...)>>] the tokens that
84
+ # were scanned in this file.
85
+ # @see #scan_first_part
86
+ # @see #scan_second_part
87
+ # @see #scan_third_part
88
+ # @see #tokens
89
+ def scan_file
90
+ @line = 1
91
+ scan_first_part
92
+ scan_second_part
93
+ scan_third_part
94
+ tokens
95
+ rescue SyntaxError => e
96
+ start = [@scanner.pos - 8, 0].max
97
+ stop = [@scanner.pos + 8, @scanner.string.length].min
98
+ snip = @scanner.string[start..stop].strip.inspect
99
+ char = @scanner.string[@scanner.pos]
100
+ char = if char
101
+ char.inspect
102
+ else
103
+ "EOF"
104
+ end
105
+
106
+ new_line = "#{@source}:#{@line}: unexpected #{char} " \
107
+ "(near #{snip})"
108
+
109
+ raise e, e.message, [new_line, *e.backtrace]
110
+ end
111
+
112
+ # Scans for whitespace. If the next character is whitespace, it
113
+ # will consume all whitespace until the next non-whitespace
114
+ # character.
115
+ #
116
+ # @return [Boolean] if any whitespace was matched.
117
+ def scan_whitespace
118
+ if @scanner.scan(/(\s+)/)
119
+ @line += @scanner[1].count("\n")
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ # Raises an error.
126
+ #
127
+ # @raise [SyntaxError] always.
128
+ # @return [void]
129
+ def error!
130
+ raise SyntaxError, "invalid syntax"
131
+ end
132
+ end
133
+ end
134
+ end