ebnf 0.3.9 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -2
- data/VERSION +1 -1
- data/etc/ebnf.ebnf +1 -1
- data/etc/ebnf.html +2 -1
- data/etc/ebnf.ll1.sxp +1 -1
- data/etc/ebnf.rb +1 -1
- data/etc/ebnf.sxp +1 -1
- data/lib/ebnf/ll1/lexer.rb +11 -3
- data/lib/ebnf/ll1/parser.rb +2 -0
- data/lib/ebnf/ll1/scanner.rb +22 -11
- data/lib/ebnf/rule.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80ecfa0591fe3d22e557724d70f4ba384f0809bd
|
4
|
+
data.tar.gz: 96c0de9edaa30e4d8a063cc9f635a2157d85ba7e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e952a2ccac175a29d4860f802eb3e5eb28ad41db4a36fca9eac98370d93b4543c12c775d698d9492ab28c876851117b9ea51f35824a040df20d06c52fb3f8b5
|
7
|
+
data.tar.gz: c8d2135602e8cfe704e7cb677dcea4e9ec500067a54bb41cb44f0c7fe4f99df9eba59e4a1b51717e5b7559bff6cf159e309e5224678cef1e63b3654b14c9d529
|
data/README.md
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
-
# EBNF
|
2
|
-
|
1
|
+
# EBNF
|
3
2
|
|
4
3
|
[EBNF][] parser and generic parser generator.
|
5
4
|
|
5
|
+
[](http://badge.fury.io/rb/ebnf)
|
6
|
+
[](http://travis-ci.org/gkellogg/ebnf)
|
7
|
+
[](https://coveralls.io/r/gkellogg/ebnf)
|
8
|
+
[](https://gemnasium.com/gkellogg/ebnf)
|
9
|
+
|
6
10
|
## Description
|
7
11
|
This is a [Ruby][] implementation of an [EBNF][] and [BNF][] parser and parser generator.
|
8
12
|
It parses [EBNF][] grammars to [BNF][], generates [First/Follow and Branch][] tables for
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/etc/ebnf.ebnf
CHANGED
data/etc/ebnf.html
CHANGED
@@ -155,7 +155,8 @@
|
|
155
155
|
<td>::=</td>
|
156
156
|
<td>
|
157
157
|
"<code class="grammar-literal">'</code>"
|
158
|
-
|
158
|
+
(<a href="#grammar-production-CHAR">CHAR</a> <code>-</code> "<code class="grammar-literal">'</code>")<code>*</code>
|
159
|
+
"<code class="grammar-literal">'</code>"
|
159
160
|
</td>
|
160
161
|
</tr>
|
161
162
|
<tr id='grammar-production-CHAR'>
|
data/etc/ebnf.ll1.sxp
CHANGED
@@ -149,7 +149,7 @@
|
|
149
149
|
(terminal O_RANGE "15"
|
150
150
|
(seq "[^" (plus (alt (seq R_BEGIN (alt HEX R_CHAR)) (alt HEX R_CHAR))) "]"))
|
151
151
|
(terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\""))
|
152
|
-
(terminal STRING2 "17" (seq "'" (diff CHAR "'")))
|
152
|
+
(terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'"))
|
153
153
|
(terminal CHAR "18" (alt HEX (range "#x20#x21#x22") (range "#x24-#x00FFFFFF")))
|
154
154
|
(terminal R_CHAR "19" (diff CHAR "]"))
|
155
155
|
(terminal R_BEGIN "20" (seq (alt HEX R_CHAR) "-"))
|
data/etc/ebnf.rb
CHANGED
data/etc/ebnf.sxp
CHANGED
@@ -19,7 +19,7 @@
|
|
19
19
|
(terminal O_RANGE "15"
|
20
20
|
(seq "[^" (plus (alt (seq R_BEGIN (alt HEX R_CHAR)) (alt HEX R_CHAR))) "]"))
|
21
21
|
(terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\""))
|
22
|
-
(terminal STRING2 "17" (seq "'" (diff CHAR "'")))
|
22
|
+
(terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'"))
|
23
23
|
(terminal CHAR "18" (alt HEX (range "#x20#x21#x22") (range "#x24-#x00FFFFFF")))
|
24
24
|
(terminal R_CHAR "19" (diff CHAR "]"))
|
25
25
|
(terminal R_BEGIN "20" (seq (alt HEX R_CHAR) "-"))
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -113,6 +113,8 @@ module EBNF::LL1
|
|
113
113
|
# @param [Hash{Symbol => Object}] options
|
114
114
|
# @option options [Regexp] :whitespace
|
115
115
|
# Whitespace between tokens, including comments
|
116
|
+
# @option options[Integer] :high_water passed to scanner
|
117
|
+
# @option options[Integer] :low_water passed to scanner
|
116
118
|
def initialize(input = nil, terminals = nil, options = {})
|
117
119
|
@options = options.dup
|
118
120
|
@whitespace = @options[:whitespace]
|
@@ -123,7 +125,7 @@ module EBNF::LL1
|
|
123
125
|
raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
|
124
126
|
|
125
127
|
@lineno = 1
|
126
|
-
@scanner = Scanner.new(input)
|
128
|
+
@scanner = Scanner.new(input, options)
|
127
129
|
end
|
128
130
|
|
129
131
|
##
|
@@ -261,6 +263,10 @@ module EBNF::LL1
|
|
261
263
|
def match_token
|
262
264
|
@terminals.each do |term|
|
263
265
|
#STDERR.puts "match[#{term.type}] #{scanner.rest[0..100].inspect} against #{term.regexp.inspect}" #if term.type == :STRING_LITERAL_SINGLE_QUOTE
|
266
|
+
if term.partial_regexp && scanner.match?(term.partial_regexp) && !scanner.match?(term.regexp)
|
267
|
+
scanner.ensure_buffer_full
|
268
|
+
end
|
269
|
+
|
264
270
|
if matched = scanner.scan(term.regexp)
|
265
271
|
#STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
|
266
272
|
tok = token(term.type, term.canonicalize(matched))
|
@@ -278,6 +284,7 @@ module EBNF::LL1
|
|
278
284
|
class Terminal
|
279
285
|
attr_reader :type
|
280
286
|
attr_reader :regexp
|
287
|
+
attr_reader :partial_regexp
|
281
288
|
|
282
289
|
# @param [Symbol, nil] type
|
283
290
|
# @param [Regexp] regexp
|
@@ -287,8 +294,11 @@ module EBNF::LL1
|
|
287
294
|
# their canonical value
|
288
295
|
# @option options [Boolean] :unescape
|
289
296
|
# Cause strings and codepoints to be unescaped.
|
297
|
+
# @option options [Regexp] :partial_regexp
|
298
|
+
# A regular expression matching the beginning of this terminal; useful for terminals that match things longer than the scanner low water mark.
|
290
299
|
def initialize(type, regexp, options = {})
|
291
300
|
@type, @regexp, @options = type, regexp, options
|
301
|
+
@partial_regexp = options[:partial_regexp]
|
292
302
|
@map = options.fetch(:map, {})
|
293
303
|
end
|
294
304
|
|
@@ -327,8 +337,6 @@ module EBNF::LL1
|
|
327
337
|
|
328
338
|
end
|
329
339
|
|
330
|
-
protected
|
331
|
-
|
332
340
|
##
|
333
341
|
# Constructs a new token object annotated with the current line number.
|
334
342
|
#
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -206,6 +206,8 @@ module EBNF::LL1
|
|
206
206
|
# Detailed debug output
|
207
207
|
# @option options [Boolean] :reset_on_start
|
208
208
|
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
209
|
+
# @option options[Integer] :high_water passed to lexer
|
210
|
+
# @option options[Integer] :low_water passed to lexer
|
209
211
|
# @yield [context, *data]
|
210
212
|
# Yields for to return data to parser
|
211
213
|
# @yieldparam [:statement, :trace] context
|
data/lib/ebnf/ll1/scanner.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'strscan' unless defined?(StringScanner)
|
2
3
|
|
3
4
|
module EBNF::LL1
|
@@ -9,8 +10,8 @@ module EBNF::LL1
|
|
9
10
|
#
|
10
11
|
# FIXME: Only implements the subset required by the Lexer for now.
|
11
12
|
class Scanner < StringScanner
|
12
|
-
HIGH_WATER =
|
13
|
-
LOW_WATER =
|
13
|
+
HIGH_WATER = 512 * 1024 # Hopefully large enough to deal with long multi-line comments
|
14
|
+
LOW_WATER = 4 * 1024
|
14
15
|
|
15
16
|
##
|
16
17
|
# @return [IO, StringIO]
|
@@ -25,14 +26,14 @@ module EBNF::LL1
|
|
25
26
|
# @option options[Integer] :low_water (LOW_WATER)
|
26
27
|
# @return [Scanner]
|
27
28
|
def initialize(input, options = {})
|
28
|
-
@options = options.merge(:
|
29
|
+
@options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
|
29
30
|
|
30
31
|
if input.respond_to?(:read)
|
31
32
|
@input = input
|
32
33
|
super("")
|
33
34
|
feed_me
|
34
35
|
else
|
35
|
-
super(input.to_s)
|
36
|
+
super(encode_utf8 input.to_s)
|
36
37
|
end
|
37
38
|
end
|
38
39
|
|
@@ -95,12 +96,12 @@ module EBNF::LL1
|
|
95
96
|
feed_me
|
96
97
|
encode_utf8 super
|
97
98
|
end
|
98
|
-
|
99
|
-
|
100
|
-
#
|
101
|
-
def
|
102
|
-
|
103
|
-
|
99
|
+
|
100
|
+
##
|
101
|
+
# Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
|
102
|
+
def ensure_buffer_full
|
103
|
+
# Read up to high-water mark ensuring we're at an end of line
|
104
|
+
if @input && !@input.eof?
|
104
105
|
diff = @options[:high_water] - rest_size
|
105
106
|
string = encode_utf8(@input.read(diff))
|
106
107
|
string << encode_utf8(@input.gets) unless @input.eof?
|
@@ -108,9 +109,19 @@ module EBNF::LL1
|
|
108
109
|
end
|
109
110
|
end
|
110
111
|
|
112
|
+
private
|
113
|
+
# Maintain low-water mark
|
114
|
+
def feed_me
|
115
|
+
ensure_buffer_full if rest_size < @options[:low_water]
|
116
|
+
end
|
117
|
+
|
111
118
|
# Perform UTF-8 encoding of input
|
112
119
|
def encode_utf8(string)
|
113
|
-
string
|
120
|
+
if string && string.encoding != Encoding::UTF_8
|
121
|
+
string = string.dup if string.frozen?
|
122
|
+
string.force_encoding(Encoding::UTF_8)
|
123
|
+
end
|
124
|
+
string
|
114
125
|
end
|
115
126
|
end
|
116
127
|
end
|
data/lib/ebnf/rule.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sxp
|
@@ -175,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
175
|
version: '0'
|
176
176
|
requirements: []
|
177
177
|
rubyforge_project:
|
178
|
-
rubygems_version: 2.4.
|
178
|
+
rubygems_version: 2.4.5.1
|
179
179
|
signing_key:
|
180
180
|
specification_version: 4
|
181
181
|
summary: EBNF parser and parser generator.
|