ebnf 0.3.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0a5b5d879cad3c68665f4cc360299ec06934f91
4
- data.tar.gz: 3310fb9e5dc956f6d51cde5d6a4cc816ef78fc4e
3
+ metadata.gz: 80ecfa0591fe3d22e557724d70f4ba384f0809bd
4
+ data.tar.gz: 96c0de9edaa30e4d8a063cc9f635a2157d85ba7e
5
5
  SHA512:
6
- metadata.gz: c4f1450e2035d5f0457b98a00d7e89b4a397f6899d16fa0bcfa78c4cb88d287eaff5e4ef5435c2a06612b3d972b544770300ce1608f743215cde3aee0b275385
7
- data.tar.gz: a44d8ed56897dcb5eb224c7dba62d5876e50e979eb0d95459fa7cd2714626d4414f11d27b1661f3fefe755766362ba7048ee3bb4256b4bc888d3fe6703e7b44c
6
+ metadata.gz: 5e952a2ccac175a29d4860f802eb3e5eb28ad41db4a36fca9eac98370d93b4543c12c775d698d9492ab28c876851117b9ea51f35824a040df20d06c52fb3f8b5
7
+ data.tar.gz: c8d2135602e8cfe704e7cb677dcea4e9ec500067a54bb41cb44f0c7fe4f99df9eba59e4a1b51717e5b7559bff6cf159e309e5224678cef1e63b3654b14c9d529
data/README.md CHANGED
@@ -1,8 +1,12 @@
1
- # EBNF [![Build Status](https://secure.travis-ci.org/gkellogg/ebnf.png?branch=master)](http://travis-ci.org/gkellogg/ebnf)
2
-
1
+ # EBNF
3
2
 
4
3
  [EBNF][] parser and generic parser generator.
5
4
 
5
+ [![Gem Version](https://badge.fury.io/rb/ebnf.png)](http://badge.fury.io/rb/ebnf)
6
+ [![Build Status](https://secure.travis-ci.org/gkellogg/ebnf.png?branch=master)](http://travis-ci.org/gkellogg/ebnf)
7
+ [![Coverage Status](https://coveralls.io/repos/gkellogg/ebnf/badge.svg)](https://coveralls.io/r/gkellogg/ebnf)
8
+ [![Dependency Status](https://gemnasium.com/gkellogg/ebnf.png)](https://gemnasium.com/gkellogg/ebnf)
9
+
6
10
  ## Description
7
11
  This is a [Ruby][] implementation of an [EBNF][] and [BNF][] parser and parser generator.
8
12
  It parses [EBNF][] grammars to [BNF][], generates [First/Follow and Branch][] tables for
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.9
1
+ 1.0.0
@@ -45,7 +45,7 @@
45
45
  # Strings are unescaped Unicode, excepting control characters and hash (#)
46
46
  [16] STRING1 ::= '"' (CHAR - '"')* '"'
47
47
 
48
- [17] STRING2 ::= "'" (CHAR - "'"))* "'"
48
+ [17] STRING2 ::= "'" (CHAR - "'")* "'"
49
49
 
50
50
  [18] CHAR ::= HEX
51
51
  | [#x20#x21#x22]
@@ -155,7 +155,8 @@
155
155
  <td>::=</td>
156
156
  <td>
157
157
  "<code class="grammar-literal">'</code>"
158
- <code>(</code> <a href="#grammar-production-CHAR">CHAR</a> <code>-</code> "<code class="grammar-literal">'</code>"<code>)</code>
158
+ (<a href="#grammar-production-CHAR">CHAR</a> <code>-</code> "<code class="grammar-literal">'</code>")<code>*</code>
159
+ "<code class="grammar-literal">'</code>"
159
160
  </td>
160
161
  </tr>
161
162
  <tr id='grammar-production-CHAR'>
@@ -149,7 +149,7 @@
149
149
  (terminal O_RANGE "15"
150
150
  (seq "[^" (plus (alt (seq R_BEGIN (alt HEX R_CHAR)) (alt HEX R_CHAR))) "]"))
151
151
  (terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\""))
152
- (terminal STRING2 "17" (seq "'" (diff CHAR "'")))
152
+ (terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'"))
153
153
  (terminal CHAR "18" (alt HEX (range "#x20#x21#x22") (range "#x24-#x00FFFFFF")))
154
154
  (terminal R_CHAR "19" (diff CHAR "]"))
155
155
  (terminal R_BEGIN "20" (seq (alt HEX R_CHAR) "-"))
@@ -1,4 +1,4 @@
1
- # This file is automatically generated by bin/ebnf
1
+ # This file is automatically generated by /Users/gregg/Projects/ebnf/lib/ebnf/base.rb
2
2
  # BRANCH derived from etc/ebnf.ebnf
3
3
  module Branch
4
4
  START = :ebnf
@@ -19,7 +19,7 @@
19
19
  (terminal O_RANGE "15"
20
20
  (seq "[^" (plus (alt (seq R_BEGIN (alt HEX R_CHAR)) (alt HEX R_CHAR))) "]"))
21
21
  (terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\""))
22
- (terminal STRING2 "17" (seq "'" (diff CHAR "'")))
22
+ (terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'"))
23
23
  (terminal CHAR "18" (alt HEX (range "#x20#x21#x22") (range "#x24-#x00FFFFFF")))
24
24
  (terminal R_CHAR "19" (diff CHAR "]"))
25
25
  (terminal R_BEGIN "20" (seq (alt HEX R_CHAR) "-"))
@@ -113,6 +113,8 @@ module EBNF::LL1
113
113
  # @param [Hash{Symbol => Object}] options
114
114
  # @option options [Regexp] :whitespace
115
115
  # Whitespace between tokens, including comments
116
+ # @option options[Integer] :high_water passed to scanner
117
+ # @option options[Integer] :low_water passed to scanner
116
118
  def initialize(input = nil, terminals = nil, options = {})
117
119
  @options = options.dup
118
120
  @whitespace = @options[:whitespace]
@@ -123,7 +125,7 @@ module EBNF::LL1
123
125
  raise Error, "Terminal patterns not defined" unless @terminals && @terminals.length > 0
124
126
 
125
127
  @lineno = 1
126
- @scanner = Scanner.new(input)
128
+ @scanner = Scanner.new(input, options)
127
129
  end
128
130
 
129
131
  ##
@@ -261,6 +263,10 @@ module EBNF::LL1
261
263
  def match_token
262
264
  @terminals.each do |term|
263
265
  #STDERR.puts "match[#{term.type}] #{scanner.rest[0..100].inspect} against #{term.regexp.inspect}" #if term.type == :STRING_LITERAL_SINGLE_QUOTE
266
+ if term.partial_regexp && scanner.match?(term.partial_regexp) && !scanner.match?(term.regexp)
267
+ scanner.ensure_buffer_full
268
+ end
269
+
264
270
  if matched = scanner.scan(term.regexp)
265
271
  #STDERR.puts " matched #{term.type.inspect}: #{matched.inspect}"
266
272
  tok = token(term.type, term.canonicalize(matched))
@@ -278,6 +284,7 @@ module EBNF::LL1
278
284
  class Terminal
279
285
  attr_reader :type
280
286
  attr_reader :regexp
287
+ attr_reader :partial_regexp
281
288
 
282
289
  # @param [Symbol, nil] type
283
290
  # @param [Regexp] regexp
@@ -287,8 +294,11 @@ module EBNF::LL1
287
294
  # their canonical value
288
295
  # @option options [Boolean] :unescape
289
296
  # Cause strings and codepoints to be unescaped.
297
+ # @option options [Regexp] :partial_regexp
298
+ # A regular expression matching the beginning of this terminal; useful for terminals that match things longer than the scanner low water mark.
290
299
  def initialize(type, regexp, options = {})
291
300
  @type, @regexp, @options = type, regexp, options
301
+ @partial_regexp = options[:partial_regexp]
292
302
  @map = options.fetch(:map, {})
293
303
  end
294
304
 
@@ -327,8 +337,6 @@ module EBNF::LL1
327
337
 
328
338
  end
329
339
 
330
- protected
331
-
332
340
  ##
333
341
  # Constructs a new token object annotated with the current line number.
334
342
  #
@@ -206,6 +206,8 @@ module EBNF::LL1
206
206
  # Detailed debug output
207
207
  # @option options [Boolean] :reset_on_start
208
208
  # Reset the parser state if the start token set with `prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.
209
+ # @option options[Integer] :high_water passed to lexer
210
+ # @option options[Integer] :low_water passed to lexer
209
211
  # @yield [context, *data]
210
212
  # Yields for to return data to parser
211
213
  # @yieldparam [:statement, :trace] context
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  require 'strscan' unless defined?(StringScanner)
2
3
 
3
4
  module EBNF::LL1
@@ -9,8 +10,8 @@ module EBNF::LL1
9
10
  #
10
11
  # FIXME: Only implements the subset required by the Lexer for now.
11
12
  class Scanner < StringScanner
12
- HIGH_WATER = 10240
13
- LOW_WATER = 2048 # Hopefully large enough to deal with long multi-line comments
13
+ HIGH_WATER = 512 * 1024 # Hopefully large enough to deal with long multi-line comments
14
+ LOW_WATER = 4 * 1024
14
15
 
15
16
  ##
16
17
  # @return [IO, StringIO]
@@ -25,14 +26,14 @@ module EBNF::LL1
25
26
  # @option options[Integer] :low_water (LOW_WATER)
26
27
  # @return [Scanner]
27
28
  def initialize(input, options = {})
28
- @options = options.merge(:high_water => HIGH_WATER, :low_water => LOW_WATER)
29
+ @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
29
30
 
30
31
  if input.respond_to?(:read)
31
32
  @input = input
32
33
  super("")
33
34
  feed_me
34
35
  else
35
- super(input.to_s)
36
+ super(encode_utf8 input.to_s)
36
37
  end
37
38
  end
38
39
 
@@ -95,12 +96,12 @@ module EBNF::LL1
95
96
  feed_me
96
97
  encode_utf8 super
97
98
  end
98
-
99
- private
100
- # Maintain low-water mark
101
- def feed_me
102
- if rest_size < @options[:low_water] && @input && !@input.eof?
103
- # Read up to high-water mark ensuring we're at an end of line
99
+
100
+ ##
101
+ # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
102
+ def ensure_buffer_full
103
+ # Read up to high-water mark ensuring we're at an end of line
104
+ if @input && !@input.eof?
104
105
  diff = @options[:high_water] - rest_size
105
106
  string = encode_utf8(@input.read(diff))
106
107
  string << encode_utf8(@input.gets) unless @input.eof?
@@ -108,9 +109,19 @@ module EBNF::LL1
108
109
  end
109
110
  end
110
111
 
112
+ private
113
+ # Maintain low-water mark
114
+ def feed_me
115
+ ensure_buffer_full if rest_size < @options[:low_water]
116
+ end
117
+
111
118
  # Perform UTF-8 encoding of input
112
119
  def encode_utf8(string)
113
- string.respond_to?(:force_encoding) ? string.force_encoding(Encoding::UTF_8) : string
120
+ if string && string.encoding != Encoding::UTF_8
121
+ string = string.dup if string.frozen?
122
+ string.force_encoding(Encoding::UTF_8)
123
+ end
124
+ string
114
125
  end
115
126
  end
116
127
  end
@@ -142,7 +142,7 @@ module EBNF
142
142
  # Serializes this rule to an Turtle
143
143
  # @return [String]
144
144
  def to_ttl
145
- @ebnf.debug("to_ttl") {inspect}
145
+ @ebnf.debug("to_ttl") {inspect} if @ebnf
146
146
  comment = orig.strip.
147
147
  gsub(/"""/, '\"\"\"').
148
148
  gsub("\\", "\\\\").
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ebnf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregg Kellogg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-01 00:00:00.000000000 Z
11
+ date: 2015-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sxp
@@ -175,7 +175,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
175
  version: '0'
176
176
  requirements: []
177
177
  rubyforge_project:
178
- rubygems_version: 2.4.3
178
+ rubygems_version: 2.4.5.1
179
179
  signing_key:
180
180
  specification_version: 4
181
181
  summary: EBNF parser and parser generator.