ebnf 0.3.9 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -2
- data/VERSION +1 -1
- data/etc/ebnf.ebnf +1 -1
- data/etc/ebnf.html +2 -1
- data/etc/ebnf.ll1.sxp +1 -1
- data/etc/ebnf.rb +1 -1
- data/etc/ebnf.sxp +1 -1
- data/lib/ebnf/ll1/lexer.rb +11 -3
- data/lib/ebnf/ll1/parser.rb +2 -0
- data/lib/ebnf/ll1/scanner.rb +22 -11
- data/lib/ebnf/rule.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80ecfa0591fe3d22e557724d70f4ba384f0809bd
|
4
|
+
data.tar.gz: 96c0de9edaa30e4d8a063cc9f635a2157d85ba7e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e952a2ccac175a29d4860f802eb3e5eb28ad41db4a36fca9eac98370d93b4543c12c775d698d9492ab28c876851117b9ea51f35824a040df20d06c52fb3f8b5
|
7
|
+
data.tar.gz: c8d2135602e8cfe704e7cb677dcea4e9ec500067a54bb41cb44f0c7fe4f99df9eba59e4a1b51717e5b7559bff6cf159e309e5224678cef1e63b3654b14c9d529
|
data/README.md
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
-
# EBNF
|
2
|
-
|
1
|
+
# EBNF
|
3
2
|
|
4
3
|
[EBNF][] parser and generic parser generator.
|
5
4
|
|
5
|
+
[![Gem Version](https://badge.fury.io/rb/ebnf.png)](http://badge.fury.io/rb/ebnf)
|
6
|
+
[![Build Status](https://secure.travis-ci.org/gkellogg/ebnf.png?branch=master)](http://travis-ci.org/gkellogg/ebnf)
|
7
|
+
[![Coverage Status](https://coveralls.io/repos/gkellogg/ebnf/badge.svg)](https://coveralls.io/r/gkellogg/ebnf)
|
8
|
+
[![Dependency Status](https://gemnasium.com/gkellogg/ebnf.png)](https://gemnasium.com/gkellogg/ebnf)
|
9
|
+
|
6
10
|
## Description
|
7
11
|
This is a [Ruby][] implementation of an [EBNF][] and [BNF][] parser and parser generator.
|
8
12
|
It parses [EBNF][] grammars to [BNF][], generates [First/Follow and Branch][] tables for
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/etc/ebnf.ebnf
CHANGED
data/etc/ebnf.html
CHANGED
@@ -155,7 +155,8 @@
|
|
155
155
|
<td>::=</td>
|
156
156
|
<td>
|
157
157
|
"<code class="grammar-literal">'</code>"
|
158
|
-
|
158
|
+
(<a href="#grammar-production-CHAR">CHAR</a> <code>-</code> "<code class="grammar-literal">'</code>")<code>*</code>
|
159
|
+
"<code class="grammar-literal">'</code>"
|
159
160
|
</td>
|
160
161
|
</tr>
|
161
162
|
<tr id='grammar-production-CHAR'>
|
data/etc/ebnf.ll1.sxp
CHANGED
@@ -149,7 +149,7 @@
|
|
149
149
|
(terminal O_RANGE "15"
|
150
150
|
(seq "[^" (plus (alt (seq R_BEGIN (alt HEX R_CHAR)) (alt HEX R_CHAR))) "]"))
|
151
151
|
(terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\""))
|
152
|
-
(terminal STRING2 "17" (seq "'" (diff CHAR "'")))
|
152
|
+
(terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'"))
|
153
153
|
(terminal CHAR "18" (alt HEX (range "#x20#x21#x22") (range "#x24-#x00FFFFFF")))
|
154
154
|
(terminal R_CHAR "19" (diff CHAR "]"))
|
155
155
|
(terminal R_BEGIN "20" (seq (alt HEX R_CHAR) "-"))
|
data/etc/ebnf.rb
CHANGED
data/etc/ebnf.sxp
CHANGED
@@ -19,7 +19,7 @@
|
|
19
19
|
(terminal O_RANGE "15"
|
20
20
|
(seq "[^" (plus (alt (seq R_BEGIN (alt HEX R_CHAR)) (alt HEX R_CHAR))) "]"))
|
21
21
|
(terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\""))
|
22
|
-
(terminal STRING2 "17" (seq "'" (diff CHAR "'")))
|
22
|
+
(terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'"))
|
23
23
|
(terminal CHAR "18" (alt HEX (range "#x20#x21#x22") (range "#x24-#x00FFFFFF")))
|
24
24
|
(terminal R_CHAR "19" (diff CHAR "]"))
|
25
25
|
(terminal R_BEGIN "20" (seq (alt HEX R_CHAR) "-"))
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -113,6 +113,8 @@ module EBNF::LL1
|
|
113
113
|
# @param [Hash{Symbol => Object}] options
|
114
114
|
# @option options [Regexp] :whitespace
|
115
115
|
# Whitespace between tokens, including comments
|
116
|
+
# @option options[Integer] :high_water passed to scanner
|
117
|
+
# @option options[Integer] :low_water passed to scanner
|
116
118
|
def initialize(input = nil, terminals = nil, options = {})
|
117
119
|
@options = options.dup
|
118
120
|
@whitespace = @options[:whitespace]
|
@@ -123,7 +125,7 @@ module EBNF::LL1
|
|
123
125
|
raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
|
124
126
|
|
125
127
|
@lineno = 1
|
126
|
-
@scanner = Scanner.new(input)
|
128
|
+
@scanner = Scanner.new(input, options)
|
127
129
|
end
|
128
130
|
|
129
131
|
##
|
@@ -261,6 +263,10 @@ module EBNF::LL1
|
|
261
263
|
def match_token
|
262
264
|
@terminals.each do |term|
|
263
265
|
#STDERR.puts "match[#{term.type}] #{scanner.rest[0..100].inspect} against #{term.regexp.inspect}" #if term.type == :STRING_LITERAL_SINGLE_QUOTE
|
266
|
+
if term.partial_regexp && scanner.match?(term.partial_regexp) && !scanner.match?(term.regexp)
|
267
|
+
scanner.ensure_buffer_full
|
268
|
+
end
|
269
|
+
|
264
270
|
if matched = scanner.scan(term.regexp)
|
265
271
|
#STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
|
266
272
|
tok = token(term.type, term.canonicalize(matched))
|
@@ -278,6 +284,7 @@ module EBNF::LL1
|
|
278
284
|
class Terminal
|
279
285
|
attr_reader :type
|
280
286
|
attr_reader :regexp
|
287
|
+
attr_reader :partial_regexp
|
281
288
|
|
282
289
|
# @param [Symbol, nil] type
|
283
290
|
# @param [Regexp] regexp
|
@@ -287,8 +294,11 @@ module EBNF::LL1
|
|
287
294
|
# their canonical value
|
288
295
|
# @option options [Boolean] :unescape
|
289
296
|
# Cause strings and codepoints to be unescaped.
|
297
|
+
# @option options [Regexp] :partial_regexp
|
298
|
+
# A regular expression matching the beginning of this terminal; useful for terminals that match things longer than the scanner low water mark.
|
290
299
|
def initialize(type, regexp, options = {})
|
291
300
|
@type, @regexp, @options = type, regexp, options
|
301
|
+
@partial_regexp = options[:partial_regexp]
|
292
302
|
@map = options.fetch(:map, {})
|
293
303
|
end
|
294
304
|
|
@@ -327,8 +337,6 @@ module EBNF::LL1
|
|
327
337
|
|
328
338
|
end
|
329
339
|
|
330
|
-
protected
|
331
|
-
|
332
340
|
##
|
333
341
|
# Constructs a new token object annotated with the current line number.
|
334
342
|
#
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -206,6 +206,8 @@ module EBNF::LL1
|
|
206
206
|
# Detailed debug output
|
207
207
|
# @option options [Boolean] :reset_on_start
|
208
208
|
# Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
|
209
|
+
# @option options[Integer] :high_water passed to lexer
|
210
|
+
# @option options[Integer] :low_water passed to lexer
|
209
211
|
# @yield [context, *data]
|
210
212
|
# Yields for to return data to parser
|
211
213
|
# @yieldparam [:statement, :trace] context
|
data/lib/ebnf/ll1/scanner.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'strscan' unless defined?(StringScanner)
|
2
3
|
|
3
4
|
module EBNF::LL1
|
@@ -9,8 +10,8 @@ module EBNF::LL1
|
|
9
10
|
#
|
10
11
|
# FIXME: Only implements the subset required by the Lexer for now.
|
11
12
|
class Scanner < StringScanner
|
12
|
-
HIGH_WATER =
|
13
|
-
LOW_WATER =
|
13
|
+
HIGH_WATER = 512 * 1024 # Hopefully large enough to deal with long multi-line comments
|
14
|
+
LOW_WATER = 4 * 1024
|
14
15
|
|
15
16
|
##
|
16
17
|
# @return [IO, StringIO]
|
@@ -25,14 +26,14 @@ module EBNF::LL1
|
|
25
26
|
# @option options[Integer] :low_water (LOW_WATER)
|
26
27
|
# @return [Scanner]
|
27
28
|
def initialize(input, options = {})
|
28
|
-
@options = options.merge(:
|
29
|
+
@options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
|
29
30
|
|
30
31
|
if input.respond_to?(:read)
|
31
32
|
@input = input
|
32
33
|
super("")
|
33
34
|
feed_me
|
34
35
|
else
|
35
|
-
super(input.to_s)
|
36
|
+
super(encode_utf8 input.to_s)
|
36
37
|
end
|
37
38
|
end
|
38
39
|
|
@@ -95,12 +96,12 @@ module EBNF::LL1
|
|
95
96
|
feed_me
|
96
97
|
encode_utf8 super
|
97
98
|
end
|
98
|
-
|
99
|
-
|
100
|
-
#
|
101
|
-
def
|
102
|
-
|
103
|
-
|
99
|
+
|
100
|
+
##
|
101
|
+
# Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
|
102
|
+
def ensure_buffer_full
|
103
|
+
# Read up to high-water mark ensuring we're at an end of line
|
104
|
+
if @input && !@input.eof?
|
104
105
|
diff = @options[:high_water] - rest_size
|
105
106
|
string = encode_utf8(@input.read(diff))
|
106
107
|
string << encode_utf8(@input.gets) unless @input.eof?
|
@@ -108,9 +109,19 @@ module EBNF::LL1
|
|
108
109
|
end
|
109
110
|
end
|
110
111
|
|
112
|
+
private
|
113
|
+
# Maintain low-water mark
|
114
|
+
def feed_me
|
115
|
+
ensure_buffer_full if rest_size < @options[:low_water]
|
116
|
+
end
|
117
|
+
|
111
118
|
# Perform UTF-8 encoding of input
|
112
119
|
def encode_utf8(string)
|
113
|
-
string
|
120
|
+
if string && string.encoding != Encoding::UTF_8
|
121
|
+
string = string.dup if string.frozen?
|
122
|
+
string.force_encoding(Encoding::UTF_8)
|
123
|
+
end
|
124
|
+
string
|
114
125
|
end
|
115
126
|
end
|
116
127
|
end
|
data/lib/ebnf/rule.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sxp
|
@@ -175,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
175
|
version: '0'
|
176
176
|
requirements: []
|
177
177
|
rubyforge_project:
|
178
|
-
rubygems_version: 2.4.
|
178
|
+
rubygems_version: 2.4.5.1
|
179
179
|
signing_key:
|
180
180
|
specification_version: 4
|
181
181
|
summary: EBNF parser and parser generator.
|