ebnf 2.1.2 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/VERSION +1 -1
- data/bin/ebnf +5 -6
- data/etc/doap.ttl +1 -4
- data/etc/ebnf.html +22 -117
- data/etc/ebnf.ll1.rb +1 -1
- data/etc/ebnf.peg.rb +1 -1
- data/lib/ebnf/base.rb +5 -3
- data/lib/ebnf/ll1/lexer.rb +4 -28
- data/lib/ebnf/native.rb +2 -2
- data/lib/ebnf/peg/parser.rb +19 -4
- data/lib/ebnf/peg/rule.rb +39 -12
- data/lib/ebnf/rule.rb +4 -2
- data/lib/ebnf/unescape.rb +62 -0
- data/lib/ebnf/writer.rb +39 -17
- data/lib/ebnf.rb +1 -0
- metadata +46 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c56b57d125d152b6bfcd4c77b3a2d160ba1e7e11fc538c182085868fe5b2d4a4
|
4
|
+
data.tar.gz: c7c3987ce05152cc1efa031f0ecc03f2bc111442fbcbbcb3d2cf82b85f59e4cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9692cedc65cfde26fd21781f01a3f44c52f69de178893b63ce94b74e9a9ef24218979a9693eec18c02469bf69ea9b15a9b248817958b5c6a6874ec078ac02091
|
7
|
+
data.tar.gz: dc8c937d4ba0f70e4b981cd4c91a40b2cd59c6803e1b5409f5bdc8f5dea6acc4af2c176f26e1cde805077c035aa93baf3d8e0b15858238380e2a1c25a6f1c32b
|
data/README.md
CHANGED
@@ -3,8 +3,9 @@
|
|
3
3
|
[EBNF][] parser and generic parser generator.
|
4
4
|
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/ebnf.png)](https://badge.fury.io/rb/ebnf)
|
6
|
-
[![Build Status](https://
|
7
|
-
[![Coverage Status](https://coveralls.io/repos/dryruby/ebnf/badge.svg)](https://coveralls.io/r/dryruby/ebnf)
|
6
|
+
[![Build Status](https://github.com/dryruby/ebnf/workflows/CI/badge.svg?branch=develop)](https://github.com/dryruby/ebnf/actions?query=workflow%3ACI)
|
7
|
+
[![Coverage Status](https://coveralls.io/repos/dryruby/ebnf/badge.svg?branch=develop)](https://coveralls.io/r/dryruby/ebnf?branch=develop)
|
8
|
+
[![Gitter chat](https://badges.gitter.im/ruby-rdf/rdf.png)](https://gitter.im/ruby-rdf/rdf)
|
8
9
|
|
9
10
|
## Description
|
10
11
|
This is a [Ruby][] implementation of an [EBNF][] and [BNF][] parser and parser generator.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.3.0
|
data/bin/ebnf
CHANGED
@@ -6,7 +6,6 @@
|
|
6
6
|
# * And with First/Follow rules
|
7
7
|
|
8
8
|
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), "..", 'lib')))
|
9
|
-
require "bundler/setup"
|
10
9
|
require 'rubygems'
|
11
10
|
require 'getoptlong'
|
12
11
|
require 'ebnf'
|
@@ -34,7 +33,7 @@ OPT_ARGS = [
|
|
34
33
|
["--prefix", "-p", GetoptLong::REQUIRED_ARGUMENT,"Prefix to use when generating Turtle"],
|
35
34
|
["--progress", "-v", GetoptLong::NO_ARGUMENT, "Detail on execution"],
|
36
35
|
["--renumber", GetoptLong::NO_ARGUMENT, "Renumber parsed reules"],
|
37
|
-
["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar"],
|
36
|
+
["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar and any generated HTML"],
|
38
37
|
["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"]
|
39
38
|
]
|
40
39
|
def usage
|
@@ -67,7 +66,7 @@ opts.each do |opt, arg|
|
|
67
66
|
end
|
68
67
|
options[:format] = arg.to_sym
|
69
68
|
when '--format'
|
70
|
-
unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp).include?(arg)
|
69
|
+
unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp ttl).include?(arg)
|
71
70
|
STDERR.puts("unrecognized output format #{arg}")
|
72
71
|
usage
|
73
72
|
end
|
@@ -99,11 +98,11 @@ ebnf.renumber! if options[:renumber]
|
|
99
98
|
|
100
99
|
res = case options[:output_format]
|
101
100
|
when :abnf then ebnf.to_s(format: :abnf)
|
102
|
-
when :abnfh then ebnf.to_html(format: :abnf)
|
101
|
+
when :abnfh then ebnf.to_html(format: :abnf, validate: options[:validate])
|
103
102
|
when :ebnf then ebnf.to_s
|
104
|
-
when :html then ebnf.to_html
|
103
|
+
when :html then ebnf.to_html(validate: options[:validate])
|
105
104
|
when :isoebnf then ebnf.to_s(format: :isoebnf)
|
106
|
-
when :isoebnfh then ebnf.to_html(format: :isoebnf)
|
105
|
+
when :isoebnfh then ebnf.to_html(format: :isoebnf, validate: options[:validate])
|
107
106
|
when :sxp then ebnf.to_sxp
|
108
107
|
when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace])
|
109
108
|
when :rb then ebnf.to_ruby(out, grammarFile: ARGV[0], **options)
|
data/etc/doap.ttl
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
doap:name "ebnf" ;
|
13
13
|
doap:homepage <https://github.com/dryruby/ebnf> ;
|
14
14
|
doap:license <https://unlicense.org/1.0/> ;
|
15
|
-
doap:shortdesc "EBNF parser and parser generator"@en ;
|
15
|
+
doap:shortdesc "EBNF parser and parser generator in Ruby."@en ;
|
16
16
|
doap:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ;
|
17
17
|
doap:created "2011-08-29"^^xsd:date ;
|
18
18
|
doap:programming-language "Ruby" ;
|
@@ -34,7 +34,4 @@
|
|
34
34
|
doap:maintainer <https://greggkellogg.net/foaf#me> ;
|
35
35
|
doap:documenter <https://greggkellogg.net/foaf#me> ;
|
36
36
|
foaf:maker <https://greggkellogg.net/foaf#me> ;
|
37
|
-
dc:title "ebnf" ;
|
38
|
-
dc:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ;
|
39
|
-
dc:date "2011-08-29"^^xsd:date ;
|
40
37
|
dc:creator <https://greggkellogg.net/foaf#me> .
|
data/etc/ebnf.html
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
<td>[2]</td>
|
12
12
|
<td><code>declaration</code></td>
|
13
13
|
<td>::=</td>
|
14
|
-
<td
|
14
|
+
<td>"@terminals" <code>|</code> <a href="#grammar-production-pass">pass</a></td>
|
15
15
|
</tr>
|
16
16
|
<tr id="grammar-production-rule">
|
17
17
|
<td>[3]</td>
|
@@ -53,61 +53,24 @@
|
|
53
53
|
<td>[9]</td>
|
54
54
|
<td><code>primary</code></td>
|
55
55
|
<td>::=</td>
|
56
|
-
<td><a href="#grammar-production-HEX">HEX</a
|
57
|
-
</tr>
|
58
|
-
<tr>
|
59
|
-
<td>[9]</td>
|
60
|
-
<td><code></code></td>
|
61
|
-
<td>|</td>
|
62
|
-
<td><a href="#grammar-production-SYMBOL">SYMBOL</a></td>
|
63
|
-
</tr>
|
64
|
-
<tr>
|
65
|
-
<td>[9]</td>
|
66
|
-
<td><code></code></td>
|
67
|
-
<td>|</td>
|
68
|
-
<td><a href="#grammar-production-O_RANGE">O_RANGE</a></td>
|
69
|
-
</tr>
|
70
|
-
<tr>
|
71
|
-
<td>[9]</td>
|
72
|
-
<td><code></code></td>
|
73
|
-
<td>|</td>
|
74
|
-
<td><a href="#grammar-production-RANGE">RANGE</a></td>
|
75
|
-
</tr>
|
76
|
-
<tr>
|
77
|
-
<td>[9]</td>
|
78
|
-
<td><code></code></td>
|
79
|
-
<td>|</td>
|
80
|
-
<td><a href="#grammar-production-STRING1">STRING1</a></td>
|
81
|
-
</tr>
|
82
|
-
<tr>
|
83
|
-
<td>[9]</td>
|
84
|
-
<td><code></code></td>
|
85
|
-
<td>|</td>
|
86
|
-
<td><a href="#grammar-production-STRING2">STRING2</a></td>
|
87
|
-
</tr>
|
88
|
-
<tr>
|
89
|
-
<td>[9]</td>
|
90
|
-
<td><code></code></td>
|
91
|
-
<td>|</td>
|
92
|
-
<td><code>(</code> "<code class="grammar-literal">(</code>" <a href="#grammar-production-expression">expression</a> "<code class="grammar-literal">)</code>"<code>)</code> </td>
|
56
|
+
<td><a href="#grammar-production-HEX">HEX</a> <code>|</code> <a href="#grammar-production-SYMBOL">SYMBOL</a> <code>|</code> <a href="#grammar-production-O_RANGE">O_RANGE</a> <code>|</code> <a href="#grammar-production-RANGE">RANGE</a> <code>|</code> <a href="#grammar-production-STRING1">STRING1</a> <code>|</code> <a href="#grammar-production-STRING2">STRING2</a> <code>|</code> <code>(</code> "<code class="grammar-literal">(</code>" <a href="#grammar-production-expression">expression</a> "<code class="grammar-literal">)</code>"<code>)</code> </td>
|
93
57
|
</tr>
|
94
58
|
<tr id="grammar-production-pass">
|
95
59
|
<td>[10]</td>
|
96
60
|
<td><code>pass</code></td>
|
97
61
|
<td>::=</td>
|
98
|
-
<td
|
62
|
+
<td>"@pass" <a href="#grammar-production-expression">expression</a></td>
|
99
63
|
</tr>
|
100
|
-
<tr
|
101
|
-
<td>@terminals</td>
|
102
|
-
<td><code></code></td>
|
64
|
+
<tr>
|
65
|
+
<td colspan=2>@terminals</td>
|
103
66
|
<td></td>
|
104
|
-
<td><strong
|
67
|
+
<td><strong># Productions for terminals</strong></td>
|
105
68
|
</tr>
|
106
69
|
<tr id="grammar-production-LHS">
|
107
70
|
<td>[11]</td>
|
108
71
|
<td><code>LHS</code></td>
|
109
72
|
<td>::=</td>
|
110
|
-
<td><code>(</code> "<code class="grammar-literal">[</code>" <a href="#grammar-production-SYMBOL">SYMBOL</a> "<code class="grammar-literal">]</code>" <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>+</code> <code>)</code> <code>?</code> <a href="#grammar-production-SYMBOL">SYMBOL</a> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>*</code>
|
73
|
+
<td><code>(</code> "<code class="grammar-literal">[</code>" <a href="#grammar-production-SYMBOL">SYMBOL</a> "<code class="grammar-literal">]</code>" <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>+</code> <code>)</code> <code>?</code> <a href="#grammar-production-SYMBOL">SYMBOL</a> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>*</code> "::="</td>
|
111
74
|
</tr>
|
112
75
|
<tr id="grammar-production-SYMBOL">
|
113
76
|
<td>[12]</td>
|
@@ -119,91 +82,37 @@
|
|
119
82
|
<td>[13]</td>
|
120
83
|
<td><code>HEX</code></td>
|
121
84
|
<td>::=</td>
|
122
|
-
<td
|
85
|
+
<td>"#x" <code>(</code> <code>[</code> <code class="grammar-literal">a-f</code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-literal">A-F</code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-literal">0-9</code><code>]</code> <code>)</code> <code>+</code> </td>
|
123
86
|
</tr>
|
124
87
|
<tr id="grammar-production-RANGE">
|
125
88
|
<td>[14]</td>
|
126
89
|
<td><code>RANGE</code></td>
|
127
90
|
<td>::=</td>
|
128
|
-
<td>"<code class="grammar-literal">[</code>"</td>
|
129
|
-
</tr>
|
130
|
-
<tr id="grammar-production-">
|
131
|
-
<td>[14]</td>
|
132
|
-
<td><code></code></td>
|
133
|
-
<td></td>
|
134
|
-
<td><code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code><code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code></td>
|
135
|
-
</tr>
|
136
|
-
<tr id="grammar-production-">
|
137
|
-
<td>[14]</td>
|
138
|
-
<td><code></code></td>
|
139
|
-
<td></td>
|
140
|
-
<td>"<code class="grammar-literal">-</code>"<code>?</code></td>
|
141
|
-
</tr>
|
142
|
-
<tr id="grammar-production-">
|
143
|
-
<td>[14]</td>
|
144
|
-
<td><code></code></td>
|
145
|
-
<td></td>
|
146
|
-
<td><code>(</code> "<code class="grammar-literal">]</code>" <code>-</code> <a href="#grammar-production-LHS">LHS</a><code>)</code> </td>
|
91
|
+
<td>"<code class="grammar-literal">[</code>" <code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code> <code>|</code> <code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code> "<code class="grammar-literal">-</code>"<code>?</code> <code>(</code> "<code class="grammar-literal">]</code>" <code>-</code> <a href="#grammar-production-LHS">LHS</a><code>)</code> </td>
|
147
92
|
</tr>
|
148
93
|
<tr id="grammar-production-O_RANGE">
|
149
94
|
<td>[15]</td>
|
150
95
|
<td><code>O_RANGE</code></td>
|
151
96
|
<td>::=</td>
|
152
|
-
<td>"
|
153
|
-
</tr>
|
154
|
-
<tr id="grammar-production-">
|
155
|
-
<td>[15]</td>
|
156
|
-
<td><code></code></td>
|
157
|
-
<td></td>
|
158
|
-
<td><code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code><code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code></td>
|
159
|
-
</tr>
|
160
|
-
<tr id="grammar-production-">
|
161
|
-
<td>[15]</td>
|
162
|
-
<td><code></code></td>
|
163
|
-
<td></td>
|
164
|
-
<td>"<code class="grammar-literal">-</code>"<code>?</code></td>
|
165
|
-
</tr>
|
166
|
-
<tr id="grammar-production-">
|
167
|
-
<td>[15]</td>
|
168
|
-
<td><code></code></td>
|
169
|
-
<td></td>
|
170
|
-
<td>"<code class="grammar-literal">]</code>"</td>
|
97
|
+
<td>"[^" <code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code> <code>|</code> <code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code> "<code class="grammar-literal">-</code>"<code>?</code> "<code class="grammar-literal">]</code>"</td>
|
171
98
|
</tr>
|
172
99
|
<tr id="grammar-production-STRING1">
|
173
100
|
<td>[16]</td>
|
174
101
|
<td><code>STRING1</code></td>
|
175
102
|
<td>::=</td>
|
176
|
-
<td>'<code class="grammar-literal"
|
103
|
+
<td>'<code class="grammar-literal">"</code>' <code>(</code> <a href="#grammar-production-CHAR">CHAR</a> <code>-</code> '<code class="grammar-literal">"</code>'<code>)</code> <code>*</code> '<code class="grammar-literal">"</code>'</td>
|
177
104
|
</tr>
|
178
105
|
<tr id="grammar-production-STRING2">
|
179
106
|
<td>[17]</td>
|
180
107
|
<td><code>STRING2</code></td>
|
181
108
|
<td>::=</td>
|
182
|
-
<td>"<code class="grammar-literal"
|
109
|
+
<td>"<code class="grammar-literal">'</code>" <code>(</code> <a href="#grammar-production-CHAR">CHAR</a> <code>-</code> "<code class="grammar-literal">'</code>"<code>)</code> <code>*</code> "<code class="grammar-literal">'</code>"</td>
|
183
110
|
</tr>
|
184
111
|
<tr id="grammar-production-CHAR">
|
185
112
|
<td>[18]</td>
|
186
113
|
<td><code>CHAR</code></td>
|
187
114
|
<td>::=</td>
|
188
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="horizontal tab">#x09</abbr></code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code>]</code></td>
|
189
|
-
</tr>
|
190
|
-
<tr>
|
191
|
-
<td>[18]</td>
|
192
|
-
<td><code></code></td>
|
193
|
-
<td>|</td>
|
194
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode ''">#xD7FF</abbr></code><code>]</code></td>
|
195
|
-
</tr>
|
196
|
-
<tr>
|
197
|
-
<td>[18]</td>
|
198
|
-
<td><code></code></td>
|
199
|
-
<td>|</td>
|
200
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="unicode ''">#xE000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode '�'">#xFFFD</abbr></code><code>]</code></td>
|
201
|
-
</tr>
|
202
|
-
<tr>
|
203
|
-
<td>[18]</td>
|
204
|
-
<td><code></code></td>
|
205
|
-
<td>|</td>
|
206
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="unicode '𐀀'">#x00010000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode ''">#x0010FFFF</abbr></code><code>]</code> </td>
|
115
|
+
<td><code>[</code> <code class="grammar-char-escape"><abbr title="horizontal tab">#x09</abbr></code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode 'Reserved'">#xD7FF</abbr></code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-char-escape"><abbr title="unicode 'Private-use'">#xE000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode 'Graphic'">#xFFFD</abbr></code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-char-escape"><abbr title="unicode 'Graphic'">#x00010000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode 'Noncharacter'">#x0010FFFF</abbr></code><code>]</code> </td>
|
207
116
|
</tr>
|
208
117
|
<tr id="grammar-production-R_CHAR">
|
209
118
|
<td>[19]</td>
|
@@ -224,28 +133,24 @@
|
|
224
133
|
<td><code>[</code> <code class="grammar-char-escape"><abbr title="horizontal tab">#x09</abbr></code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>]</code></td>
|
225
134
|
</tr>
|
226
135
|
<tr>
|
227
|
-
<td
|
228
|
-
<td><code></code></td>
|
136
|
+
<td colspan=2></td>
|
229
137
|
<td>|</td>
|
230
|
-
<td><code>(</code> <code>(</code> <code>(</code> "<code class="grammar-literal">#</code>" <code>-</code>
|
138
|
+
<td><code>(</code> <code>(</code> <code>(</code> "<code class="grammar-literal">#</code>" <code>-</code> "#x"<code>)</code> <code>|</code> "//"<code>)</code> <code>[</code> <code class="grammar-literal">^</code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code>]</code> <code>*</code> <code>)</code></td>
|
231
139
|
</tr>
|
232
140
|
<tr>
|
233
|
-
<td
|
234
|
-
<td><code></code></td>
|
141
|
+
<td colspan=2></td>
|
235
142
|
<td>|</td>
|
236
|
-
<td><code>(</code>
|
143
|
+
<td><code>(</code> "/*" <code>(</code> <code>(</code> "<code class="grammar-literal">*</code>" <code>[</code> <code class="grammar-literal">^/</code><code>]</code> <code>)</code> <code>?</code> <code>|</code> <code>[</code> <code class="grammar-literal">^*</code><code>]</code> <code>)</code> <code>*</code> "*/"<code>)</code></td>
|
237
144
|
</tr>
|
238
145
|
<tr>
|
239
|
-
<td
|
240
|
-
<td><code></code></td>
|
146
|
+
<td colspan=2></td>
|
241
147
|
<td>|</td>
|
242
|
-
<td><code>(</code>
|
148
|
+
<td><code>(</code> "(*" <code>(</code> <code>(</code> "<code class="grammar-literal">*</code>" <code>[</code> <code class="grammar-literal">^)</code><code>]</code> <code>)</code> <code>?</code> <code>|</code> <code>[</code> <code class="grammar-literal">^*</code><code>]</code> <code>)</code> <code>*</code> "*)"<code>)</code> </td>
|
243
149
|
</tr>
|
244
|
-
<tr
|
245
|
-
<td>@pass</td>
|
246
|
-
<td><code></code></td>
|
247
|
-
<td></td>
|
150
|
+
<tr>
|
151
|
+
<td colspan=2>@pass</td>
|
248
152
|
<td></td>
|
153
|
+
<td><a href="#grammar-production-PASS">PASS</a></td>
|
249
154
|
</tr>
|
250
155
|
</tbody>
|
251
156
|
</table>
|
data/etc/ebnf.ll1.rb
CHANGED
data/etc/ebnf.peg.rb
CHANGED
data/lib/ebnf/base.rb
CHANGED
@@ -201,8 +201,9 @@ module EBNF
|
|
201
201
|
|
202
202
|
##
|
203
203
|
# Write out parsed syntax string as an S-Expression
|
204
|
+
#
|
204
205
|
# @return [String]
|
205
|
-
def to_sxp
|
206
|
+
def to_sxp(**options)
|
206
207
|
require 'sxp' unless defined?(SXP)
|
207
208
|
SXP::Generator.string(ast.map(&:for_sxp))
|
208
209
|
end
|
@@ -220,9 +221,10 @@ module EBNF
|
|
220
221
|
# Output formatted EBNF as HTML
|
221
222
|
#
|
222
223
|
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
224
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
223
225
|
# @return [String]
|
224
|
-
def to_html(format: :ebnf)
|
225
|
-
Writer.html(*ast, format: format)
|
226
|
+
def to_html(format: :ebnf, validate: false)
|
227
|
+
Writer.html(*ast, format: format, validate: validate)
|
226
228
|
end
|
227
229
|
|
228
230
|
##
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -32,21 +32,7 @@ module EBNF::LL1
|
|
32
32
|
# @see https://en.wikipedia.org/wiki/Lexical_analysis
|
33
33
|
class Lexer
|
34
34
|
include Enumerable
|
35
|
-
|
36
|
-
ESCAPE_CHARS = {
|
37
|
-
'\\t' => "\t", # \u0009 (tab)
|
38
|
-
'\\n' => "\n", # \u000A (line feed)
|
39
|
-
'\\r' => "\r", # \u000D (carriage return)
|
40
|
-
'\\b' => "\b", # \u0008 (backspace)
|
41
|
-
'\\f' => "\f", # \u000C (form feed)
|
42
|
-
'\\"' => '"', # \u0022 (quotation mark, double quote mark)
|
43
|
-
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
44
|
-
'\\\\' => '\\' # \u005C (backslash)
|
45
|
-
}.freeze
|
46
|
-
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
47
|
-
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
48
|
-
ECHAR = /\\./u.freeze # More liberal unescaping
|
49
|
-
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
35
|
+
include ::EBNF::Unescape
|
50
36
|
|
51
37
|
##
|
52
38
|
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
@@ -61,17 +47,7 @@ module EBNF::LL1
|
|
61
47
|
# @return [String]
|
62
48
|
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
63
49
|
def self.unescape_codepoints(string)
|
64
|
-
string
|
65
|
-
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
66
|
-
|
67
|
-
# Decode \uXXXX and \UXXXXXXXX code points:
|
68
|
-
string = string.gsub(UCHAR) do |c|
|
69
|
-
s = [(c[2..-1]).hex].pack('U*')
|
70
|
-
s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
|
71
|
-
end
|
72
|
-
|
73
|
-
string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
|
74
|
-
string
|
50
|
+
::EBNF::Unescape.unescape_codepoints(string)
|
75
51
|
end
|
76
52
|
|
77
53
|
##
|
@@ -83,7 +59,7 @@ module EBNF::LL1
|
|
83
59
|
# @return [String]
|
84
60
|
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
85
61
|
def self.unescape_string(input)
|
86
|
-
|
62
|
+
::EBNF::Unescape.unescape_string(input)
|
87
63
|
end
|
88
64
|
|
89
65
|
##
|
@@ -338,7 +314,7 @@ module EBNF::LL1
|
|
338
314
|
# @return [String]
|
339
315
|
def unescape(string)
|
340
316
|
if @options[:unescape]
|
341
|
-
|
317
|
+
EBNF::Unescape.unescape(string)
|
342
318
|
else
|
343
319
|
string
|
344
320
|
end
|
data/lib/ebnf/native.rb
CHANGED
@@ -287,10 +287,10 @@ module EBNF
|
|
287
287
|
case m = s[0,1]
|
288
288
|
when '"', "'" # STRING1 or STRING2
|
289
289
|
l, s = s[1..-1].split(m.rstrip, 2)
|
290
|
-
[
|
290
|
+
[Unescape.unescape_string(l), s]
|
291
291
|
when '[' # RANGE, O_RANGE
|
292
292
|
l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
|
293
|
-
[[:range,
|
293
|
+
[[:range, Unescape.unescape_string(l)], s]
|
294
294
|
when '#' # HEX
|
295
295
|
s.match(/(#x\h+)(.*)$/)
|
296
296
|
l, s = $1, $2
|
data/lib/ebnf/peg/parser.rb
CHANGED
@@ -55,6 +55,7 @@ module EBNF::PEG
|
|
55
55
|
def production_handlers; (@production_handlers ||= {}); end
|
56
56
|
def terminal_handlers; (@terminal_handlers ||= {}); end
|
57
57
|
def terminal_regexps; (@terminal_regexps ||= {}); end
|
58
|
+
def terminal_options; (@terminal_options ||= {}); end
|
58
59
|
|
59
60
|
##
|
60
61
|
# Defines the pattern for a terminal node and a block to be invoked
|
@@ -72,9 +73,8 @@ module EBNF::PEG
|
|
72
73
|
# defaults to the expression defined in the associated rule.
|
73
74
|
# If unset, the terminal rule is used for matching.
|
74
75
|
# @param [Hash] options
|
75
|
-
# @option options [
|
76
|
-
#
|
77
|
-
# their canonical value
|
76
|
+
# @option options [Boolean] :unescape
|
77
|
+
# Cause strings and codepoints to be unescaped.
|
78
78
|
# @yield [value, prod]
|
79
79
|
# @yieldparam [String] value
|
80
80
|
# The scanned terminal value.
|
@@ -86,6 +86,7 @@ module EBNF::PEG
|
|
86
86
|
def terminal(term, regexp = nil, **options, &block)
|
87
87
|
terminal_regexps[term] = regexp if regexp
|
88
88
|
terminal_handlers[term] = block if block_given?
|
89
|
+
terminal_options[term] = options.freeze
|
89
90
|
end
|
90
91
|
|
91
92
|
##
|
@@ -100,6 +101,8 @@ module EBNF::PEG
|
|
100
101
|
# Options which are returned from {Parser#onStart}.
|
101
102
|
# @option options [Boolean] :as_hash (false)
|
102
103
|
# If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
|
104
|
+
# @option options[:upper, :lower] :insensitive_strings
|
105
|
+
# Perform case-insensitive match of strings not defined as terminals, and map to either upper or lower case.
|
103
106
|
# @yield [data, block]
|
104
107
|
# @yieldparam [Hash] data
|
105
108
|
# A Hash defined for the current production, during :start
|
@@ -182,6 +185,8 @@ module EBNF::PEG
|
|
182
185
|
# @option options[Integer] :high_water passed to lexer
|
183
186
|
# @option options [Logger] :logger for errors/progress/debug.
|
184
187
|
# @option options[Integer] :low_water passed to lexer
|
188
|
+
# @option options[Boolean] :seq_hash (false)
|
189
|
+
# If `true`, sets the default for the value sent to a production handler that is for a `seq` to a hash composed of the flattened consitutent hashes that are otherwise provided.
|
185
190
|
# @option options [Symbol, Regexp] :whitespace
|
186
191
|
# Symbol of whitespace rule (defaults to `@pass`), or a regular expression
|
187
192
|
# for eating whitespace between non-terminal rules (strongly encouraged).
|
@@ -195,6 +200,7 @@ module EBNF::PEG
|
|
195
200
|
# @raise [Exception] Raises exceptions for parsing errors
|
196
201
|
# or errors raised during processing callbacks. Internal
|
197
202
|
# errors are raised using {Error}.
|
203
|
+
# @todo FIXME implement seq_hash
|
198
204
|
def parse(input = nil, start = nil, rules = nil, **options, &block)
|
199
205
|
start ||= options[:start]
|
200
206
|
rules ||= options[:rules] || []
|
@@ -467,10 +473,19 @@ module EBNF::PEG
|
|
467
473
|
#
|
468
474
|
# @param [Symbol] sym
|
469
475
|
# @return [Regexp]
|
470
|
-
def
|
476
|
+
def terminal_regexp(sym)
|
471
477
|
self.class.terminal_regexps[sym]
|
472
478
|
end
|
473
479
|
|
480
|
+
##
|
481
|
+
# Find a regular expression defined for a terminal
|
482
|
+
#
|
483
|
+
# @param [Symbol] sym
|
484
|
+
# @return [Regexp]
|
485
|
+
def terminal_options(sym)
|
486
|
+
self.class.terminal_options[sym]
|
487
|
+
end
|
488
|
+
|
474
489
|
##
|
475
490
|
# Record furthest failure.
|
476
491
|
#
|
data/lib/ebnf/peg/rule.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module EBNF::PEG
|
2
2
|
# Behaviior for parsing a PEG rule
|
3
3
|
module Rule
|
4
|
+
include ::EBNF::Unescape
|
5
|
+
|
4
6
|
##
|
5
7
|
# Initialized by parser when loading rules.
|
6
8
|
# Used for finding rules and invoking elements of the parse process.
|
@@ -45,9 +47,18 @@ module EBNF::PEG
|
|
45
47
|
# If the terminal is defined with a regular expression,
|
46
48
|
# use that to match the input,
|
47
49
|
# otherwise,
|
48
|
-
if regexp = parser.
|
49
|
-
|
50
|
+
if regexp = parser.terminal_regexp(sym)
|
51
|
+
term_opts = parser.terminal_options(sym)
|
52
|
+
if matched = input.scan(regexp)
|
53
|
+
# Optionally map matched
|
54
|
+
matched = term_opts.fetch(:map, {}).fetch(matched.downcase, matched)
|
55
|
+
|
56
|
+
# Optionally unescape matched
|
57
|
+
matched = unescape(matched) if term_opts[:unescape]
|
58
|
+
end
|
59
|
+
|
50
60
|
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
61
|
+
|
51
62
|
# Update furthest failure for strings and terminals
|
52
63
|
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
53
64
|
parser.packrat[sym][pos] = {
|
@@ -61,6 +72,7 @@ module EBNF::PEG
|
|
61
72
|
eat_whitespace(input)
|
62
73
|
end
|
63
74
|
start_options = parser.onStart(sym)
|
75
|
+
string_regexp_opts = start_options[:insensitive_strings] ? Regexp::IGNORECASE : 0
|
64
76
|
|
65
77
|
result = case expr.first
|
66
78
|
when :alt
|
@@ -74,7 +86,12 @@ module EBNF::PEG
|
|
74
86
|
raise "No rule found for #{prod}" unless rule
|
75
87
|
rule.parse(input)
|
76
88
|
when String
|
77
|
-
input.scan(Regexp.new(Regexp.quote(prod)))
|
89
|
+
s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
|
90
|
+
case start_options[:insensitive_strings]
|
91
|
+
when :lower then s && s.downcase
|
92
|
+
when :upper then s && s.upcase
|
93
|
+
else s
|
94
|
+
end || :unmatched
|
78
95
|
end
|
79
96
|
if alt == :unmatched
|
80
97
|
# Update furthest failure for strings and terminals
|
@@ -112,7 +129,7 @@ module EBNF::PEG
|
|
112
129
|
raise "No rule found for #{prod}" unless rule
|
113
130
|
rule.parse(input)
|
114
131
|
when String
|
115
|
-
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
132
|
+
input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
|
116
133
|
end
|
117
134
|
if res != :unmatched
|
118
135
|
# Update furthest failure for terminals
|
@@ -123,7 +140,7 @@ module EBNF::PEG
|
|
123
140
|
end
|
124
141
|
when :opt
|
125
142
|
# Result is the matched value or nil
|
126
|
-
opt = rept(input, 0, 1, expr[1])
|
143
|
+
opt = rept(input, 0, 1, expr[1], string_regexp_opts, **start_options)
|
127
144
|
|
128
145
|
# Update furthest failure for strings and terminals
|
129
146
|
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
@@ -131,7 +148,7 @@ module EBNF::PEG
|
|
131
148
|
when :plus
|
132
149
|
# Result is an array of all expressions while they match,
|
133
150
|
# at least one must match
|
134
|
-
plus = rept(input, 1, '*', expr[1])
|
151
|
+
plus = rept(input, 1, '*', expr[1], string_regexp_opts)
|
135
152
|
|
136
153
|
# Update furthest failure for strings and terminals
|
137
154
|
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
@@ -146,7 +163,7 @@ module EBNF::PEG
|
|
146
163
|
when :rept
|
147
164
|
# Result is an array of all expressions while they match,
|
148
165
|
# an empty array of none match
|
149
|
-
rept = rept(input, expr[1], expr[2], expr[3])
|
166
|
+
rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts)
|
150
167
|
|
151
168
|
# # Update furthest failure for strings and terminals
|
152
169
|
parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
|
@@ -161,7 +178,12 @@ module EBNF::PEG
|
|
161
178
|
raise "No rule found for #{prod}" unless rule
|
162
179
|
rule.parse(input)
|
163
180
|
when String
|
164
|
-
input.scan(Regexp.new(Regexp.quote(prod)))
|
181
|
+
s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
|
182
|
+
case start_options[:insensitive_strings]
|
183
|
+
when :lower then s && s.downcase
|
184
|
+
when :upper then s && s.upcase
|
185
|
+
else s
|
186
|
+
end || :unmatched
|
165
187
|
end
|
166
188
|
if res == :unmatched
|
167
189
|
# Update furthest failure for strings and terminals
|
@@ -182,7 +204,7 @@ module EBNF::PEG
|
|
182
204
|
when :star
|
183
205
|
# Result is an array of all expressions while they match,
|
184
206
|
# an empty array of none match
|
185
|
-
star = rept(input, 0, '*', expr[1])
|
207
|
+
star = rept(input, 0, '*', expr[1], string_regexp_opts)
|
186
208
|
|
187
209
|
# Update furthest failure for strings and terminals
|
188
210
|
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
@@ -214,8 +236,9 @@ module EBNF::PEG
|
|
214
236
|
# @param [Integer] max
|
215
237
|
# If it is an integer, it stops matching after max entries.
|
216
238
|
# @param [Symbol, String] prod
|
239
|
+
# @param [Integer] string_regexp_opts
|
217
240
|
# @return [:unmatched, Array]
|
218
|
-
def rept(input, min, max, prod)
|
241
|
+
def rept(input, min, max, prod, string_regexp_opts, **options)
|
219
242
|
result = []
|
220
243
|
|
221
244
|
case prod
|
@@ -227,9 +250,13 @@ module EBNF::PEG
|
|
227
250
|
result << res
|
228
251
|
end
|
229
252
|
when String
|
230
|
-
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
253
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))) && (max == '*' || result.length < max)
|
231
254
|
eat_whitespace(input) unless terminal?
|
232
|
-
result <<
|
255
|
+
result << case options[:insensitive_strings]
|
256
|
+
when :lower then res.downcase
|
257
|
+
when :upper then res.upcase
|
258
|
+
else res
|
259
|
+
end
|
233
260
|
end
|
234
261
|
end
|
235
262
|
|
data/lib/ebnf/rule.rb
CHANGED
@@ -206,10 +206,11 @@ module EBNF
|
|
206
206
|
end
|
207
207
|
|
208
208
|
# Return SXP representation of this rule
|
209
|
+
#
|
209
210
|
# @return [String]
|
210
|
-
def to_sxp
|
211
|
+
def to_sxp(**options)
|
211
212
|
require 'sxp' unless defined?(SXP)
|
212
|
-
for_sxp.to_sxp
|
213
|
+
for_sxp.to_sxp(**options)
|
213
214
|
end
|
214
215
|
|
215
216
|
alias_method :to_s, :to_sxp
|
@@ -416,6 +417,7 @@ module EBNF
|
|
416
417
|
# @param [Rule] other
|
417
418
|
# @return [Boolean]
|
418
419
|
def ==(other)
|
420
|
+
other.is_a?(Rule) &&
|
419
421
|
sym == other.sym &&
|
420
422
|
kind == other.kind &&
|
421
423
|
expr == other.expr
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Unsecape strings
|
3
|
+
module EBNF::Unescape
|
4
|
+
ESCAPE_CHARS = {
|
5
|
+
'\\t' => "\t", # \u0009 (tab)
|
6
|
+
'\\n' => "\n", # \u000A (line feed)
|
7
|
+
'\\r' => "\r", # \u000D (carriage return)
|
8
|
+
'\\b' => "\b", # \u0008 (backspace)
|
9
|
+
'\\f' => "\f", # \u000C (form feed)
|
10
|
+
'\\"' => '"', # \u0022 (quotation mark, double quote mark)
|
11
|
+
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
12
|
+
'\\\\' => '\\' # \u005C (backslash)
|
13
|
+
}.freeze
|
14
|
+
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
15
|
+
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
16
|
+
ECHAR = /\\./u.freeze # More liberal unescaping
|
17
|
+
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
18
|
+
|
19
|
+
##
|
20
|
+
# Returns a copy of the given `input` string with all `\uXXXX` and
|
21
|
+
# `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
|
22
|
+
# unescaped UTF-8 character counterparts.
|
23
|
+
#
|
24
|
+
# @param [String] string
|
25
|
+
# @return [String]
|
26
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
27
|
+
def unescape_codepoints(string)
|
28
|
+
string = string.dup
|
29
|
+
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
30
|
+
|
31
|
+
# Decode \uXXXX and \UXXXXXXXX code points:
|
32
|
+
string = string.gsub(UCHAR) do |c|
|
33
|
+
s = [(c[2..-1]).hex].pack('U*')
|
34
|
+
s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
|
35
|
+
end
|
36
|
+
|
37
|
+
string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
|
38
|
+
string
|
39
|
+
end
|
40
|
+
module_function :unescape_codepoints
|
41
|
+
|
42
|
+
##
|
43
|
+
# Returns a copy of the given `input` string with all string escape
|
44
|
+
# sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
|
45
|
+
# character counterparts.
|
46
|
+
#
|
47
|
+
# @param [String] input
|
48
|
+
# @return [String]
|
49
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
50
|
+
def unescape_string(input)
|
51
|
+
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
|
52
|
+
end
|
53
|
+
module_function :unescape_string
|
54
|
+
|
55
|
+
# Perform string and codepoint unescaping if defined for this terminal
|
56
|
+
# @param [String] string
|
57
|
+
# @return [String]
|
58
|
+
def unescape(string)
|
59
|
+
unescape_string(unescape_codepoints(string))
|
60
|
+
end
|
61
|
+
module_function :unescape
|
62
|
+
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
4
|
require "ostruct"
|
5
|
+
require 'unicode/types'
|
5
6
|
|
6
7
|
##
|
7
8
|
# Serialize ruleset back to EBNF
|
@@ -86,22 +87,23 @@ module EBNF
|
|
86
87
|
#
|
87
88
|
# @param [Array<Rule>] rules
|
88
89
|
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
90
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
89
91
|
# @return [Object]
|
90
|
-
def self.html(*rules, format: :ebnf)
|
92
|
+
def self.html(*rules, format: :ebnf, validate: false)
|
91
93
|
require 'stringio' unless defined?(StringIO)
|
92
94
|
buf = StringIO.new
|
93
|
-
Writer.new(rules, out: buf, html: true, format: format)
|
95
|
+
Writer.new(rules, out: buf, html: true, format: format, validate: validate)
|
94
96
|
buf.string
|
95
97
|
end
|
96
98
|
|
97
99
|
##
|
98
100
|
# @param [Array<Rule>] rules
|
101
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
102
|
+
# @param [Boolean] html (false) generate HTML output
|
103
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
99
104
|
# @param [Hash{Symbol => Object}] options
|
100
105
|
# @param [#write] out ($stdout)
|
101
|
-
|
102
|
-
# @option options [Symbol] format
|
103
|
-
# @option options [Boolean] html (false)
|
104
|
-
def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
|
106
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false, **options)
|
105
107
|
@options = options.merge(html: html)
|
106
108
|
return if rules.empty?
|
107
109
|
|
@@ -174,7 +176,21 @@ module EBNF
|
|
174
176
|
end
|
175
177
|
end
|
176
178
|
end.flatten
|
177
|
-
|
179
|
+
|
180
|
+
html_result = eruby.evaluate(format: format, rules: formatted_rules)
|
181
|
+
|
182
|
+
if validate
|
183
|
+
begin
|
184
|
+
# Validate the output HTML
|
185
|
+
doc = Nokogiri::HTML5("<!DOCTYPE html>" + html_result, max_errors: 10)
|
186
|
+
raise EncodingError, "Errors found in generated HTML:\n " +
|
187
|
+
doc.errors.map(&:to_s).join("\n ") unless doc.errors.empty?
|
188
|
+
rescue LoadError, NoMethodError
|
189
|
+
# Skip
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
out.write html_result
|
178
194
|
return
|
179
195
|
rescue LoadError
|
180
196
|
$stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
|
@@ -347,16 +363,20 @@ module EBNF
|
|
347
363
|
end
|
348
364
|
char = fmt % u.ord
|
349
365
|
if @options[:html]
|
350
|
-
if u.ord <= 0x20
|
351
|
-
|
366
|
+
char = if u.ord <= 0x20
|
367
|
+
%(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
368
|
+
elsif u.ord == 0x22
|
369
|
+
%(<abbr title="quot">>"</abbr>)
|
352
370
|
elsif u.ord < 0x7F
|
353
|
-
|
371
|
+
%(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
|
354
372
|
elsif u.ord == 0x7F
|
355
|
-
|
373
|
+
%(<abbr title="delete">#{@coder.encode char}</abbr>)
|
356
374
|
elsif u.ord <= 0xFF
|
357
|
-
|
375
|
+
%(<abbr title="extended ascii '#{@coder.encode char}'">#{char}</abbr>)
|
376
|
+
elsif (%w(Control Private-use Surrogate Noncharacter Reserved) - ::Unicode::Types.of(u)).empty?
|
377
|
+
%(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
358
378
|
else
|
359
|
-
|
379
|
+
%(<abbr title="unicode '#{::Unicode::Types.of(u).first}'">#{char}</abbr>)
|
360
380
|
end
|
361
381
|
%(<code class="grammar-char-escape">#{char}</code>)
|
362
382
|
else
|
@@ -455,7 +475,7 @@ module EBNF
|
|
455
475
|
# Format a single-character string, prefering hex for non-main ASCII
|
456
476
|
def format_abnf_char(c)
|
457
477
|
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
458
|
-
c.inspect
|
478
|
+
@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : c.inspect
|
459
479
|
else
|
460
480
|
escape_abnf_hex(c)
|
461
481
|
end
|
@@ -536,14 +556,16 @@ module EBNF
|
|
536
556
|
if @options[:html]
|
537
557
|
if u.ord <= 0x20
|
538
558
|
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
539
|
-
elsif u.ord
|
559
|
+
elsif u.ord == 0x22
|
560
|
+
%(<abbr title="quot">>"</abbr>)
|
561
|
+
elsif u.ord < 0x7F
|
540
562
|
char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
|
541
563
|
elsif u.ord == 0x7F
|
542
564
|
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
543
565
|
elsif u.ord <= 0xFF
|
544
566
|
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
545
567
|
else
|
546
|
-
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
568
|
+
char = %(<abbr title="unicode '#{u.unicode_normaliz}'">#{char}</abbr>)
|
547
569
|
end
|
548
570
|
%(<code class="grammar-char-escape">#{char}</code>)
|
549
571
|
else
|
@@ -686,7 +708,7 @@ module EBNF
|
|
686
708
|
<table class="grammar">
|
687
709
|
<tbody id="grammar-productions" class="<%= @format %>">
|
688
710
|
<% for rule in @rules %>
|
689
|
-
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)
|
711
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign) || rule.sym.nil?%>>
|
690
712
|
<% if rule.id %>
|
691
713
|
<td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
|
692
714
|
<% end %>
|
data/lib/ebnf.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-12-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sxp
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.2'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: scanf
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '3.
|
47
|
+
version: '3.2'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '3.
|
54
|
+
version: '3.2'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: htmlentities
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,48 +66,76 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '4.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: unicode-types
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.7'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.7'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: amazing_print
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.4'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.4'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: rdf-spec
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
72
100
|
requirements:
|
73
101
|
- - "~>"
|
74
102
|
- !ruby/object:Gem::Version
|
75
|
-
version: '3.
|
103
|
+
version: '3.2'
|
76
104
|
type: :development
|
77
105
|
prerelease: false
|
78
106
|
version_requirements: !ruby/object:Gem::Requirement
|
79
107
|
requirements:
|
80
108
|
- - "~>"
|
81
109
|
- !ruby/object:Gem::Version
|
82
|
-
version: '3.
|
110
|
+
version: '3.2'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
112
|
name: rdf-turtle
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
86
114
|
requirements:
|
87
115
|
- - "~>"
|
88
116
|
- !ruby/object:Gem::Version
|
89
|
-
version: '3.
|
117
|
+
version: '3.2'
|
90
118
|
type: :development
|
91
119
|
prerelease: false
|
92
120
|
version_requirements: !ruby/object:Gem::Requirement
|
93
121
|
requirements:
|
94
122
|
- - "~>"
|
95
123
|
- !ruby/object:Gem::Version
|
96
|
-
version: '3.
|
124
|
+
version: '3.2'
|
97
125
|
- !ruby/object:Gem::Dependency
|
98
126
|
name: nokogiri
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
100
128
|
requirements:
|
101
129
|
- - "~>"
|
102
130
|
- !ruby/object:Gem::Version
|
103
|
-
version: '1.
|
131
|
+
version: '1.12'
|
104
132
|
type: :development
|
105
133
|
prerelease: false
|
106
134
|
version_requirements: !ruby/object:Gem::Requirement
|
107
135
|
requirements:
|
108
136
|
- - "~>"
|
109
137
|
- !ruby/object:Gem::Version
|
110
|
-
version: '1.
|
138
|
+
version: '1.12'
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: erubis
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,14 +156,14 @@ dependencies:
|
|
128
156
|
requirements:
|
129
157
|
- - "~>"
|
130
158
|
- !ruby/object:Gem::Version
|
131
|
-
version: '3.
|
159
|
+
version: '3.10'
|
132
160
|
type: :development
|
133
161
|
prerelease: false
|
134
162
|
version_requirements: !ruby/object:Gem::Requirement
|
135
163
|
requirements:
|
136
164
|
- - "~>"
|
137
165
|
- !ruby/object:Gem::Version
|
138
|
-
version: '3.
|
166
|
+
version: '3.10'
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: rspec-its
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -240,6 +268,7 @@ files:
|
|
240
268
|
- lib/ebnf/peg/rule.rb
|
241
269
|
- lib/ebnf/rule.rb
|
242
270
|
- lib/ebnf/terminals.rb
|
271
|
+
- lib/ebnf/unescape.rb
|
243
272
|
- lib/ebnf/version.rb
|
244
273
|
- lib/ebnf/writer.rb
|
245
274
|
homepage: https://github.com/dryruby/ebnf
|
@@ -254,15 +283,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
254
283
|
requirements:
|
255
284
|
- - ">="
|
256
285
|
- !ruby/object:Gem::Version
|
257
|
-
version: '2.
|
286
|
+
version: '2.6'
|
258
287
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
259
288
|
requirements:
|
260
289
|
- - ">="
|
261
290
|
- !ruby/object:Gem::Version
|
262
291
|
version: '0'
|
263
292
|
requirements: []
|
264
|
-
rubygems_version: 3.
|
293
|
+
rubygems_version: 3.3.3
|
265
294
|
signing_key:
|
266
295
|
specification_version: 4
|
267
|
-
summary: EBNF parser and parser generator.
|
296
|
+
summary: EBNF parser and parser generator in Ruby.
|
268
297
|
test_files: []
|