ebnf 2.1.1 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -3
- data/VERSION +1 -1
- data/bin/ebnf +5 -5
- data/etc/doap.ttl +1 -4
- data/etc/ebnf.html +22 -117
- data/etc/ebnf.ll1.rb +1 -1
- data/etc/ebnf.peg.rb +1 -1
- data/lib/ebnf/base.rb +3 -2
- data/lib/ebnf/ll1/lexer.rb +4 -28
- data/lib/ebnf/native.rb +2 -2
- data/lib/ebnf/peg/parser.rb +19 -4
- data/lib/ebnf/peg/rule.rb +47 -11
- data/lib/ebnf/unescape.rb +62 -0
- data/lib/ebnf/writer.rb +46 -19
- data/lib/ebnf.rb +1 -0
- metadata +38 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 675c1d0315518a2a8159fb1cdfb3bd0054e2a4eac1a12adae63dbbe02cf2d611
|
4
|
+
data.tar.gz: '0229b65832d59f84c39bfc9770555fb50a3500f5d551ea400bd3ae57df1b408b'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e13f53ed30fad026c5fd901a449805089e948838e6aae38b4e3fe87d274f07e19ae9c197a93cba0dc90a57cea4f8b341f13904265ac47a91631a5cc7801e6ec
|
7
|
+
data.tar.gz: 9bb40f43a8a9ff95bbd9d097729db694033217ba1226a403844e9bf5b20ee852b57f8fef54dd200d3ee98847da7a21efcaa675a34fb607c799a901f4798f3085
|
data/README.md
CHANGED
@@ -3,8 +3,9 @@
|
|
3
3
|
[EBNF][] parser and generic parser generator.
|
4
4
|
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/ebnf.png)](https://badge.fury.io/rb/ebnf)
|
6
|
-
[![Build Status](https://
|
7
|
-
[![Coverage Status](https://coveralls.io/repos/dryruby/ebnf/badge.svg)](https://coveralls.io/r/dryruby/ebnf)
|
6
|
+
[![Build Status](https://github.com/dryruby/ebnf/workflows/CI/badge.svg?branch=develop)](https://github.com/dryruby/ebnf/actions?query=workflow%3ACI)
|
7
|
+
[![Coverage Status](https://coveralls.io/repos/dryruby/ebnf/badge.svg?branch=develop)](https://coveralls.io/r/dryruby/ebnf?branch=develop)
|
8
|
+
[![Gitter chat](https://badges.gitter.im/ruby-rdf/rdf.png)](https://gitter.im/ruby-rdf/rdf)
|
8
9
|
|
9
10
|
## Description
|
10
11
|
This is a [Ruby][] implementation of an [EBNF][] and [BNF][] parser and parser generator.
|
@@ -101,6 +102,8 @@ On a parsing failure, and exception is raised with information that may be usefu
|
|
101
102
|
The [EBNF][] variant used here is based on [W3C](https://w3.org/) [EBNF][] (see {file:etc/ebnf.ebnf EBNF grammar}) as defined in the
|
102
103
|
[XML 1.0 recommendation](https://www.w3.org/TR/REC-xml/), with minor extensions:
|
103
104
|
|
105
|
+
Note that the grammar includes an optional `[identifer]` in front of rule names, which can be in conflict with the `RANGE` terminal. It is typically not a problem, but if it comes up, try parsing with the `native` parser, add comments or sequences to disambiguate. EBNF does not have beginning of line checks as all whitespace is treated the same, so the common practice of identifying each rule inherently leads to such ambiguity.
|
106
|
+
|
104
107
|
The character set for EBNF is UTF-8.
|
105
108
|
|
106
109
|
The general form of a rule is:
|
@@ -259,7 +262,8 @@ This repository uses [Git Flow](https://github.com/nvie/gitflow) to mange develo
|
|
259
262
|
list in the the `README`. Alphabetical order applies.
|
260
263
|
* Do note that in order for us to merge any non-trivial changes (as a rule
|
261
264
|
of thumb, additions larger than about 15 lines of code), we need an
|
262
|
-
explicit [public domain dedication][PDD] on record from you
|
265
|
+
explicit [public domain dedication][PDD] on record from you,
|
266
|
+
which you will be asked to agree to on the first commit to a repo within the organization.
|
263
267
|
|
264
268
|
## License
|
265
269
|
This is free and unencumbered public domain software. For more information,
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.2.1
|
data/bin/ebnf
CHANGED
@@ -34,7 +34,7 @@ OPT_ARGS = [
|
|
34
34
|
["--prefix", "-p", GetoptLong::REQUIRED_ARGUMENT,"Prefix to use when generating Turtle"],
|
35
35
|
["--progress", "-v", GetoptLong::NO_ARGUMENT, "Detail on execution"],
|
36
36
|
["--renumber", GetoptLong::NO_ARGUMENT, "Renumber parsed reules"],
|
37
|
-
["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar"],
|
37
|
+
["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar and any generated HTML"],
|
38
38
|
["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"]
|
39
39
|
]
|
40
40
|
def usage
|
@@ -67,7 +67,7 @@ opts.each do |opt, arg|
|
|
67
67
|
end
|
68
68
|
options[:format] = arg.to_sym
|
69
69
|
when '--format'
|
70
|
-
unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp).include?(arg)
|
70
|
+
unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp ttl).include?(arg)
|
71
71
|
STDERR.puts("unrecognized output format #{arg}")
|
72
72
|
usage
|
73
73
|
end
|
@@ -99,11 +99,11 @@ ebnf.renumber! if options[:renumber]
|
|
99
99
|
|
100
100
|
res = case options[:output_format]
|
101
101
|
when :abnf then ebnf.to_s(format: :abnf)
|
102
|
-
when :abnfh then ebnf.to_html(format: :abnf)
|
102
|
+
when :abnfh then ebnf.to_html(format: :abnf, validate: options[:validate])
|
103
103
|
when :ebnf then ebnf.to_s
|
104
|
-
when :html then ebnf.to_html
|
104
|
+
when :html then ebnf.to_html(validate: options[:validate])
|
105
105
|
when :isoebnf then ebnf.to_s(format: :isoebnf)
|
106
|
-
when :isoebnfh then ebnf.to_html(format: :isoebnf)
|
106
|
+
when :isoebnfh then ebnf.to_html(format: :isoebnf, validate: options[:validate])
|
107
107
|
when :sxp then ebnf.to_sxp
|
108
108
|
when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace])
|
109
109
|
when :rb then ebnf.to_ruby(out, grammarFile: ARGV[0], **options)
|
data/etc/doap.ttl
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
doap:name "ebnf" ;
|
13
13
|
doap:homepage <https://github.com/dryruby/ebnf> ;
|
14
14
|
doap:license <https://unlicense.org/1.0/> ;
|
15
|
-
doap:shortdesc "EBNF parser and parser generator"@en ;
|
15
|
+
doap:shortdesc "EBNF parser and parser generator in Ruby."@en ;
|
16
16
|
doap:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ;
|
17
17
|
doap:created "2011-08-29"^^xsd:date ;
|
18
18
|
doap:programming-language "Ruby" ;
|
@@ -34,7 +34,4 @@
|
|
34
34
|
doap:maintainer <https://greggkellogg.net/foaf#me> ;
|
35
35
|
doap:documenter <https://greggkellogg.net/foaf#me> ;
|
36
36
|
foaf:maker <https://greggkellogg.net/foaf#me> ;
|
37
|
-
dc:title "ebnf" ;
|
38
|
-
dc:description "EBNF is a Ruby parser for W3C EBNF and a parser generator for PEG and LL(1). Also includes parsing modes for ISO EBNF and ABNF."@en ;
|
39
|
-
dc:date "2011-08-29"^^xsd:date ;
|
40
37
|
dc:creator <https://greggkellogg.net/foaf#me> .
|
data/etc/ebnf.html
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
<td>[2]</td>
|
12
12
|
<td><code>declaration</code></td>
|
13
13
|
<td>::=</td>
|
14
|
-
<td
|
14
|
+
<td>"@terminals" <code>|</code> <a href="#grammar-production-pass">pass</a></td>
|
15
15
|
</tr>
|
16
16
|
<tr id="grammar-production-rule">
|
17
17
|
<td>[3]</td>
|
@@ -53,61 +53,24 @@
|
|
53
53
|
<td>[9]</td>
|
54
54
|
<td><code>primary</code></td>
|
55
55
|
<td>::=</td>
|
56
|
-
<td><a href="#grammar-production-HEX">HEX</a
|
57
|
-
</tr>
|
58
|
-
<tr>
|
59
|
-
<td>[9]</td>
|
60
|
-
<td><code></code></td>
|
61
|
-
<td>|</td>
|
62
|
-
<td><a href="#grammar-production-SYMBOL">SYMBOL</a></td>
|
63
|
-
</tr>
|
64
|
-
<tr>
|
65
|
-
<td>[9]</td>
|
66
|
-
<td><code></code></td>
|
67
|
-
<td>|</td>
|
68
|
-
<td><a href="#grammar-production-O_RANGE">O_RANGE</a></td>
|
69
|
-
</tr>
|
70
|
-
<tr>
|
71
|
-
<td>[9]</td>
|
72
|
-
<td><code></code></td>
|
73
|
-
<td>|</td>
|
74
|
-
<td><a href="#grammar-production-RANGE">RANGE</a></td>
|
75
|
-
</tr>
|
76
|
-
<tr>
|
77
|
-
<td>[9]</td>
|
78
|
-
<td><code></code></td>
|
79
|
-
<td>|</td>
|
80
|
-
<td><a href="#grammar-production-STRING1">STRING1</a></td>
|
81
|
-
</tr>
|
82
|
-
<tr>
|
83
|
-
<td>[9]</td>
|
84
|
-
<td><code></code></td>
|
85
|
-
<td>|</td>
|
86
|
-
<td><a href="#grammar-production-STRING2">STRING2</a></td>
|
87
|
-
</tr>
|
88
|
-
<tr>
|
89
|
-
<td>[9]</td>
|
90
|
-
<td><code></code></td>
|
91
|
-
<td>|</td>
|
92
|
-
<td><code>(</code> "<code class="grammar-literal">(</code>" <a href="#grammar-production-expression">expression</a> "<code class="grammar-literal">)</code>"<code>)</code> </td>
|
56
|
+
<td><a href="#grammar-production-HEX">HEX</a> <code>|</code> <a href="#grammar-production-SYMBOL">SYMBOL</a> <code>|</code> <a href="#grammar-production-O_RANGE">O_RANGE</a> <code>|</code> <a href="#grammar-production-RANGE">RANGE</a> <code>|</code> <a href="#grammar-production-STRING1">STRING1</a> <code>|</code> <a href="#grammar-production-STRING2">STRING2</a> <code>|</code> <code>(</code> "<code class="grammar-literal">(</code>" <a href="#grammar-production-expression">expression</a> "<code class="grammar-literal">)</code>"<code>)</code> </td>
|
93
57
|
</tr>
|
94
58
|
<tr id="grammar-production-pass">
|
95
59
|
<td>[10]</td>
|
96
60
|
<td><code>pass</code></td>
|
97
61
|
<td>::=</td>
|
98
|
-
<td
|
62
|
+
<td>"@pass" <a href="#grammar-production-expression">expression</a></td>
|
99
63
|
</tr>
|
100
|
-
<tr
|
101
|
-
<td>@terminals</td>
|
102
|
-
<td><code></code></td>
|
64
|
+
<tr>
|
65
|
+
<td colspan=2>@terminals</td>
|
103
66
|
<td></td>
|
104
|
-
<td><strong
|
67
|
+
<td><strong># Productions for terminals</strong></td>
|
105
68
|
</tr>
|
106
69
|
<tr id="grammar-production-LHS">
|
107
70
|
<td>[11]</td>
|
108
71
|
<td><code>LHS</code></td>
|
109
72
|
<td>::=</td>
|
110
|
-
<td><code>(</code> "<code class="grammar-literal">[</code>" <a href="#grammar-production-SYMBOL">SYMBOL</a> "<code class="grammar-literal">]</code>" <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>+</code> <code>)</code> <code>?</code> <a href="#grammar-production-SYMBOL">SYMBOL</a> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>*</code>
|
73
|
+
<td><code>(</code> "<code class="grammar-literal">[</code>" <a href="#grammar-production-SYMBOL">SYMBOL</a> "<code class="grammar-literal">]</code>" <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>+</code> <code>)</code> <code>?</code> <a href="#grammar-production-SYMBOL">SYMBOL</a> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>*</code> "::="</td>
|
111
74
|
</tr>
|
112
75
|
<tr id="grammar-production-SYMBOL">
|
113
76
|
<td>[12]</td>
|
@@ -119,91 +82,37 @@
|
|
119
82
|
<td>[13]</td>
|
120
83
|
<td><code>HEX</code></td>
|
121
84
|
<td>::=</td>
|
122
|
-
<td
|
85
|
+
<td>"#x" <code>(</code> <code>[</code> <code class="grammar-literal">a-f</code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-literal">A-F</code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-literal">0-9</code><code>]</code> <code>)</code> <code>+</code> </td>
|
123
86
|
</tr>
|
124
87
|
<tr id="grammar-production-RANGE">
|
125
88
|
<td>[14]</td>
|
126
89
|
<td><code>RANGE</code></td>
|
127
90
|
<td>::=</td>
|
128
|
-
<td>"<code class="grammar-literal">[</code>"</td>
|
129
|
-
</tr>
|
130
|
-
<tr id="grammar-production-">
|
131
|
-
<td>[14]</td>
|
132
|
-
<td><code></code></td>
|
133
|
-
<td></td>
|
134
|
-
<td><code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code><code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code></td>
|
135
|
-
</tr>
|
136
|
-
<tr id="grammar-production-">
|
137
|
-
<td>[14]</td>
|
138
|
-
<td><code></code></td>
|
139
|
-
<td></td>
|
140
|
-
<td>"<code class="grammar-literal">-</code>"<code>?</code></td>
|
141
|
-
</tr>
|
142
|
-
<tr id="grammar-production-">
|
143
|
-
<td>[14]</td>
|
144
|
-
<td><code></code></td>
|
145
|
-
<td></td>
|
146
|
-
<td><code>(</code> "<code class="grammar-literal">]</code>" <code>-</code> <a href="#grammar-production-LHS">LHS</a><code>)</code> </td>
|
91
|
+
<td>"<code class="grammar-literal">[</code>" <code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code> <code>|</code> <code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code> "<code class="grammar-literal">-</code>"<code>?</code> <code>(</code> "<code class="grammar-literal">]</code>" <code>-</code> <a href="#grammar-production-LHS">LHS</a><code>)</code> </td>
|
147
92
|
</tr>
|
148
93
|
<tr id="grammar-production-O_RANGE">
|
149
94
|
<td>[15]</td>
|
150
95
|
<td><code>O_RANGE</code></td>
|
151
96
|
<td>::=</td>
|
152
|
-
<td>"
|
153
|
-
</tr>
|
154
|
-
<tr id="grammar-production-">
|
155
|
-
<td>[15]</td>
|
156
|
-
<td><code></code></td>
|
157
|
-
<td></td>
|
158
|
-
<td><code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code><code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code></td>
|
159
|
-
</tr>
|
160
|
-
<tr id="grammar-production-">
|
161
|
-
<td>[15]</td>
|
162
|
-
<td><code></code></td>
|
163
|
-
<td></td>
|
164
|
-
<td>"<code class="grammar-literal">-</code>"<code>?</code></td>
|
165
|
-
</tr>
|
166
|
-
<tr id="grammar-production-">
|
167
|
-
<td>[15]</td>
|
168
|
-
<td><code></code></td>
|
169
|
-
<td></td>
|
170
|
-
<td>"<code class="grammar-literal">]</code>"</td>
|
97
|
+
<td>"[^" <code>(</code> <code>(</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-R_CHAR">R_CHAR</a><code>)</code> <code>|</code> <code>(</code> <a href="#grammar-production-HEX">HEX</a> "<code class="grammar-literal">-</code>" <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>|</code> <a href="#grammar-production-R_CHAR">R_CHAR</a> <code>|</code> <a href="#grammar-production-HEX">HEX</a><code>)</code> <code>+</code> "<code class="grammar-literal">-</code>"<code>?</code> "<code class="grammar-literal">]</code>"</td>
|
171
98
|
</tr>
|
172
99
|
<tr id="grammar-production-STRING1">
|
173
100
|
<td>[16]</td>
|
174
101
|
<td><code>STRING1</code></td>
|
175
102
|
<td>::=</td>
|
176
|
-
<td>'<code class="grammar-literal"
|
103
|
+
<td>'<code class="grammar-literal">"</code>' <code>(</code> <a href="#grammar-production-CHAR">CHAR</a> <code>-</code> '<code class="grammar-literal">"</code>'<code>)</code> <code>*</code> '<code class="grammar-literal">"</code>'</td>
|
177
104
|
</tr>
|
178
105
|
<tr id="grammar-production-STRING2">
|
179
106
|
<td>[17]</td>
|
180
107
|
<td><code>STRING2</code></td>
|
181
108
|
<td>::=</td>
|
182
|
-
<td>"<code class="grammar-literal"
|
109
|
+
<td>"<code class="grammar-literal">'</code>" <code>(</code> <a href="#grammar-production-CHAR">CHAR</a> <code>-</code> "<code class="grammar-literal">'</code>"<code>)</code> <code>*</code> "<code class="grammar-literal">'</code>"</td>
|
183
110
|
</tr>
|
184
111
|
<tr id="grammar-production-CHAR">
|
185
112
|
<td>[18]</td>
|
186
113
|
<td><code>CHAR</code></td>
|
187
114
|
<td>::=</td>
|
188
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="horizontal tab">#x09</abbr></code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code>]</code></td>
|
189
|
-
</tr>
|
190
|
-
<tr>
|
191
|
-
<td>[18]</td>
|
192
|
-
<td><code></code></td>
|
193
|
-
<td>|</td>
|
194
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode ''">#xD7FF</abbr></code><code>]</code></td>
|
195
|
-
</tr>
|
196
|
-
<tr>
|
197
|
-
<td>[18]</td>
|
198
|
-
<td><code></code></td>
|
199
|
-
<td>|</td>
|
200
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="unicode ''">#xE000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode '�'">#xFFFD</abbr></code><code>]</code></td>
|
201
|
-
</tr>
|
202
|
-
<tr>
|
203
|
-
<td>[18]</td>
|
204
|
-
<td><code></code></td>
|
205
|
-
<td>|</td>
|
206
|
-
<td><code>[</code> <code class="grammar-char-escape"><abbr title="unicode '𐀀'">#x00010000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode ''">#x0010FFFF</abbr></code><code>]</code> </td>
|
115
|
+
<td><code>[</code> <code class="grammar-char-escape"><abbr title="horizontal tab">#x09</abbr></code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode 'Reserved'">#xD7FF</abbr></code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-char-escape"><abbr title="unicode 'Private-use'">#xE000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode 'Graphic'">#xFFFD</abbr></code><code>]</code> <code>|</code> <code>[</code> <code class="grammar-char-escape"><abbr title="unicode 'Graphic'">#x00010000</abbr></code><code class="grammar-literal">-</code><code class="grammar-char-escape"><abbr title="unicode 'Noncharacter'">#x0010FFFF</abbr></code><code>]</code> </td>
|
207
116
|
</tr>
|
208
117
|
<tr id="grammar-production-R_CHAR">
|
209
118
|
<td>[19]</td>
|
@@ -224,28 +133,24 @@
|
|
224
133
|
<td><code>[</code> <code class="grammar-char-escape"><abbr title="horizontal tab">#x09</abbr></code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code class="grammar-char-escape"><abbr title="space">#x20</abbr></code><code>]</code></td>
|
225
134
|
</tr>
|
226
135
|
<tr>
|
227
|
-
<td
|
228
|
-
<td><code></code></td>
|
136
|
+
<td colspan=2></td>
|
229
137
|
<td>|</td>
|
230
|
-
<td><code>(</code> <code>(</code> <code>(</code> "<code class="grammar-literal">#</code>" <code>-</code>
|
138
|
+
<td><code>(</code> <code>(</code> <code>(</code> "<code class="grammar-literal">#</code>" <code>-</code> "#x"<code>)</code> <code>|</code> "//"<code>)</code> <code>[</code> <code class="grammar-literal">^</code><code class="grammar-char-escape"><abbr title="new line">#x0A</abbr></code><code class="grammar-char-escape"><abbr title="carriage return">#x0D</abbr></code><code>]</code> <code>*</code> <code>)</code></td>
|
231
139
|
</tr>
|
232
140
|
<tr>
|
233
|
-
<td
|
234
|
-
<td><code></code></td>
|
141
|
+
<td colspan=2></td>
|
235
142
|
<td>|</td>
|
236
|
-
<td><code>(</code>
|
143
|
+
<td><code>(</code> "/*" <code>(</code> <code>(</code> "<code class="grammar-literal">*</code>" <code>[</code> <code class="grammar-literal">^/</code><code>]</code> <code>)</code> <code>?</code> <code>|</code> <code>[</code> <code class="grammar-literal">^*</code><code>]</code> <code>)</code> <code>*</code> "*/"<code>)</code></td>
|
237
144
|
</tr>
|
238
145
|
<tr>
|
239
|
-
<td
|
240
|
-
<td><code></code></td>
|
146
|
+
<td colspan=2></td>
|
241
147
|
<td>|</td>
|
242
|
-
<td><code>(</code>
|
148
|
+
<td><code>(</code> "(*" <code>(</code> <code>(</code> "<code class="grammar-literal">*</code>" <code>[</code> <code class="grammar-literal">^)</code><code>]</code> <code>)</code> <code>?</code> <code>|</code> <code>[</code> <code class="grammar-literal">^*</code><code>]</code> <code>)</code> <code>*</code> "*)"<code>)</code> </td>
|
243
149
|
</tr>
|
244
|
-
<tr
|
245
|
-
<td>@pass</td>
|
246
|
-
<td><code></code></td>
|
247
|
-
<td></td>
|
150
|
+
<tr>
|
151
|
+
<td colspan=2>@pass</td>
|
248
152
|
<td></td>
|
153
|
+
<td><a href="#grammar-production-PASS">PASS</a></td>
|
249
154
|
</tr>
|
250
155
|
</tbody>
|
251
156
|
</table>
|
data/etc/ebnf.ll1.rb
CHANGED
data/etc/ebnf.peg.rb
CHANGED
data/lib/ebnf/base.rb
CHANGED
@@ -220,9 +220,10 @@ module EBNF
|
|
220
220
|
# Output formatted EBNF as HTML
|
221
221
|
#
|
222
222
|
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
223
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
223
224
|
# @return [String]
|
224
|
-
def to_html(format: :ebnf)
|
225
|
-
Writer.html(*ast, format: format)
|
225
|
+
def to_html(format: :ebnf, validate: false)
|
226
|
+
Writer.html(*ast, format: format, validate: validate)
|
226
227
|
end
|
227
228
|
|
228
229
|
##
|
data/lib/ebnf/ll1/lexer.rb
CHANGED
@@ -32,21 +32,7 @@ module EBNF::LL1
|
|
32
32
|
# @see https://en.wikipedia.org/wiki/Lexical_analysis
|
33
33
|
class Lexer
|
34
34
|
include Enumerable
|
35
|
-
|
36
|
-
ESCAPE_CHARS = {
|
37
|
-
'\\t' => "\t", # \u0009 (tab)
|
38
|
-
'\\n' => "\n", # \u000A (line feed)
|
39
|
-
'\\r' => "\r", # \u000D (carriage return)
|
40
|
-
'\\b' => "\b", # \u0008 (backspace)
|
41
|
-
'\\f' => "\f", # \u000C (form feed)
|
42
|
-
'\\"' => '"', # \u0022 (quotation mark, double quote mark)
|
43
|
-
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
44
|
-
'\\\\' => '\\' # \u005C (backslash)
|
45
|
-
}.freeze
|
46
|
-
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
47
|
-
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
48
|
-
ECHAR = /\\./u.freeze # More liberal unescaping
|
49
|
-
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
35
|
+
include ::EBNF::Unescape
|
50
36
|
|
51
37
|
##
|
52
38
|
# @return [Regexp] defines whitespace, including comments, otherwise whitespace must be explicit in terminals
|
@@ -61,17 +47,7 @@ module EBNF::LL1
|
|
61
47
|
# @return [String]
|
62
48
|
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
63
49
|
def self.unescape_codepoints(string)
|
64
|
-
string
|
65
|
-
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
66
|
-
|
67
|
-
# Decode \uXXXX and \UXXXXXXXX code points:
|
68
|
-
string = string.gsub(UCHAR) do |c|
|
69
|
-
s = [(c[2..-1]).hex].pack('U*')
|
70
|
-
s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
|
71
|
-
end
|
72
|
-
|
73
|
-
string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
|
74
|
-
string
|
50
|
+
::EBNF::Unescape.unescape_codepoints(string)
|
75
51
|
end
|
76
52
|
|
77
53
|
##
|
@@ -83,7 +59,7 @@ module EBNF::LL1
|
|
83
59
|
# @return [String]
|
84
60
|
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
85
61
|
def self.unescape_string(input)
|
86
|
-
|
62
|
+
::EBNF::Unescape.unescape_string(input)
|
87
63
|
end
|
88
64
|
|
89
65
|
##
|
@@ -338,7 +314,7 @@ module EBNF::LL1
|
|
338
314
|
# @return [String]
|
339
315
|
def unescape(string)
|
340
316
|
if @options[:unescape]
|
341
|
-
|
317
|
+
EBNF::Unescape.unescape(string)
|
342
318
|
else
|
343
319
|
string
|
344
320
|
end
|
data/lib/ebnf/native.rb
CHANGED
@@ -287,10 +287,10 @@ module EBNF
|
|
287
287
|
case m = s[0,1]
|
288
288
|
when '"', "'" # STRING1 or STRING2
|
289
289
|
l, s = s[1..-1].split(m.rstrip, 2)
|
290
|
-
[
|
290
|
+
[Unescape.unescape_string(l), s]
|
291
291
|
when '[' # RANGE, O_RANGE
|
292
292
|
l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
|
293
|
-
[[:range,
|
293
|
+
[[:range, Unescape.unescape_string(l)], s]
|
294
294
|
when '#' # HEX
|
295
295
|
s.match(/(#x\h+)(.*)$/)
|
296
296
|
l, s = $1, $2
|
data/lib/ebnf/peg/parser.rb
CHANGED
@@ -55,6 +55,7 @@ module EBNF::PEG
|
|
55
55
|
def production_handlers; (@production_handlers ||= {}); end
|
56
56
|
def terminal_handlers; (@terminal_handlers ||= {}); end
|
57
57
|
def terminal_regexps; (@terminal_regexps ||= {}); end
|
58
|
+
def terminal_options; (@terminal_options ||= {}); end
|
58
59
|
|
59
60
|
##
|
60
61
|
# Defines the pattern for a terminal node and a block to be invoked
|
@@ -72,9 +73,8 @@ module EBNF::PEG
|
|
72
73
|
# defaults to the expression defined in the associated rule.
|
73
74
|
# If unset, the terminal rule is used for matching.
|
74
75
|
# @param [Hash] options
|
75
|
-
# @option options [
|
76
|
-
#
|
77
|
-
# their canonical value
|
76
|
+
# @option options [Boolean] :unescape
|
77
|
+
# Cause strings and codepoints to be unescaped.
|
78
78
|
# @yield [value, prod]
|
79
79
|
# @yieldparam [String] value
|
80
80
|
# The scanned terminal value.
|
@@ -86,6 +86,7 @@ module EBNF::PEG
|
|
86
86
|
def terminal(term, regexp = nil, **options, &block)
|
87
87
|
terminal_regexps[term] = regexp if regexp
|
88
88
|
terminal_handlers[term] = block if block_given?
|
89
|
+
terminal_options[term] = options.freeze
|
89
90
|
end
|
90
91
|
|
91
92
|
##
|
@@ -100,6 +101,8 @@ module EBNF::PEG
|
|
100
101
|
# Options which are returned from {Parser#onStart}.
|
101
102
|
# @option options [Boolean] :as_hash (false)
|
102
103
|
# If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
|
104
|
+
# @option options[:upper, :lower] :insensitive_strings
|
105
|
+
# Perform case-insensitive match of strings not defined as terminals, and map to either upper or lower case.
|
103
106
|
# @yield [data, block]
|
104
107
|
# @yieldparam [Hash] data
|
105
108
|
# A Hash defined for the current production, during :start
|
@@ -182,6 +185,8 @@ module EBNF::PEG
|
|
182
185
|
# @option options[Integer] :high_water passed to lexer
|
183
186
|
# @option options [Logger] :logger for errors/progress/debug.
|
184
187
|
# @option options[Integer] :low_water passed to lexer
|
188
|
+
# @option options[Boolean] :seq_hash (false)
|
189
|
+
# If `true`, sets the default for the value sent to a production handler that is for a `seq` to a hash composed of the flattened consitutent hashes that are otherwise provided.
|
185
190
|
# @option options [Symbol, Regexp] :whitespace
|
186
191
|
# Symbol of whitespace rule (defaults to `@pass`), or a regular expression
|
187
192
|
# for eating whitespace between non-terminal rules (strongly encouraged).
|
@@ -195,6 +200,7 @@ module EBNF::PEG
|
|
195
200
|
# @raise [Exception] Raises exceptions for parsing errors
|
196
201
|
# or errors raised during processing callbacks. Internal
|
197
202
|
# errors are raised using {Error}.
|
203
|
+
# @todo FIXME implement seq_hash
|
198
204
|
def parse(input = nil, start = nil, rules = nil, **options, &block)
|
199
205
|
start ||= options[:start]
|
200
206
|
rules ||= options[:rules] || []
|
@@ -467,10 +473,19 @@ module EBNF::PEG
|
|
467
473
|
#
|
468
474
|
# @param [Symbol] sym
|
469
475
|
# @return [Regexp]
|
470
|
-
def
|
476
|
+
def terminal_regexp(sym)
|
471
477
|
self.class.terminal_regexps[sym]
|
472
478
|
end
|
473
479
|
|
480
|
+
##
|
481
|
+
# Find a regular expression defined for a terminal
|
482
|
+
#
|
483
|
+
# @param [Symbol] sym
|
484
|
+
# @return [Regexp]
|
485
|
+
def terminal_options(sym)
|
486
|
+
self.class.terminal_options[sym]
|
487
|
+
end
|
488
|
+
|
474
489
|
##
|
475
490
|
# Record furthest failure.
|
476
491
|
#
|
data/lib/ebnf/peg/rule.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module EBNF::PEG
|
2
2
|
# Behaviior for parsing a PEG rule
|
3
3
|
module Rule
|
4
|
+
include ::EBNF::Unescape
|
5
|
+
|
4
6
|
##
|
5
7
|
# Initialized by parser when loading rules.
|
6
8
|
# Used for finding rules and invoking elements of the parse process.
|
@@ -24,6 +26,7 @@ module EBNF::PEG
|
|
24
26
|
# * `opt`: returns the value matched, or `nil` if unmatched.
|
25
27
|
# * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
26
28
|
# * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
|
29
|
+
# * `rept`: returns an array of the values matched for the speficied production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
|
27
30
|
# * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
|
28
31
|
# * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
|
29
32
|
#
|
@@ -44,9 +47,18 @@ module EBNF::PEG
|
|
44
47
|
# If the terminal is defined with a regular expression,
|
45
48
|
# use that to match the input,
|
46
49
|
# otherwise,
|
47
|
-
if regexp = parser.
|
48
|
-
|
50
|
+
if regexp = parser.terminal_regexp(sym)
|
51
|
+
term_opts = parser.terminal_options(sym)
|
52
|
+
if matched = input.scan(regexp)
|
53
|
+
# Optionally map matched
|
54
|
+
matched = term_opts.fetch(:map, {}).fetch(matched.downcase, matched)
|
55
|
+
|
56
|
+
# Optionally unescape matched
|
57
|
+
matched = unescape(matched) if term_opts[:unescape]
|
58
|
+
end
|
59
|
+
|
49
60
|
result = parser.onTerminal(sym, (matched ? matched : :unmatched))
|
61
|
+
|
50
62
|
# Update furthest failure for strings and terminals
|
51
63
|
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
|
52
64
|
parser.packrat[sym][pos] = {
|
@@ -60,6 +72,7 @@ module EBNF::PEG
|
|
60
72
|
eat_whitespace(input)
|
61
73
|
end
|
62
74
|
start_options = parser.onStart(sym)
|
75
|
+
string_regexp_opts = start_options[:insensitive_strings] ? Regexp::IGNORECASE : 0
|
63
76
|
|
64
77
|
result = case expr.first
|
65
78
|
when :alt
|
@@ -73,7 +86,12 @@ module EBNF::PEG
|
|
73
86
|
raise "No rule found for #{prod}" unless rule
|
74
87
|
rule.parse(input)
|
75
88
|
when String
|
76
|
-
input.scan(Regexp.new(Regexp.quote(prod)))
|
89
|
+
s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
|
90
|
+
case start_options[:insensitive_strings]
|
91
|
+
when :lower then s && s.downcase
|
92
|
+
when :upper then s && s.upcase
|
93
|
+
else s
|
94
|
+
end || :unmatched
|
77
95
|
end
|
78
96
|
if alt == :unmatched
|
79
97
|
# Update furthest failure for strings and terminals
|
@@ -111,7 +129,7 @@ module EBNF::PEG
|
|
111
129
|
raise "No rule found for #{prod}" unless rule
|
112
130
|
rule.parse(input)
|
113
131
|
when String
|
114
|
-
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
|
132
|
+
input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
|
115
133
|
end
|
116
134
|
if res != :unmatched
|
117
135
|
# Update furthest failure for terminals
|
@@ -122,7 +140,7 @@ module EBNF::PEG
|
|
122
140
|
end
|
123
141
|
when :opt
|
124
142
|
# Result is the matched value or nil
|
125
|
-
opt = rept(input, 0, 1, expr[1])
|
143
|
+
opt = rept(input, 0, 1, expr[1], string_regexp_opts, **start_options)
|
126
144
|
|
127
145
|
# Update furthest failure for strings and terminals
|
128
146
|
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
@@ -130,7 +148,7 @@ module EBNF::PEG
|
|
130
148
|
when :plus
|
131
149
|
# Result is an array of all expressions while they match,
|
132
150
|
# at least one must match
|
133
|
-
plus = rept(input, 1, '*', expr[1])
|
151
|
+
plus = rept(input, 1, '*', expr[1], string_regexp_opts)
|
134
152
|
|
135
153
|
# Update furthest failure for strings and terminals
|
136
154
|
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
@@ -142,6 +160,14 @@ module EBNF::PEG
|
|
142
160
|
parser.update_furthest_failure(input.pos, input.lineno, expr[1])
|
143
161
|
:unmatched
|
144
162
|
end
|
163
|
+
when :rept
|
164
|
+
# Result is an array of all expressions while they match,
|
165
|
+
# an empty array of none match
|
166
|
+
rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts)
|
167
|
+
|
168
|
+
# # Update furthest failure for strings and terminals
|
169
|
+
parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
|
170
|
+
rept.is_a?(Array) && terminal? ? rept.join("") : rept
|
145
171
|
when :seq
|
146
172
|
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
|
147
173
|
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
|
@@ -152,7 +178,12 @@ module EBNF::PEG
|
|
152
178
|
raise "No rule found for #{prod}" unless rule
|
153
179
|
rule.parse(input)
|
154
180
|
when String
|
155
|
-
input.scan(Regexp.new(Regexp.quote(prod)))
|
181
|
+
s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
|
182
|
+
case start_options[:insensitive_strings]
|
183
|
+
when :lower then s && s.downcase
|
184
|
+
when :upper then s && s.upcase
|
185
|
+
else s
|
186
|
+
end || :unmatched
|
156
187
|
end
|
157
188
|
if res == :unmatched
|
158
189
|
# Update furthest failure for strings and terminals
|
@@ -173,7 +204,7 @@ module EBNF::PEG
|
|
173
204
|
when :star
|
174
205
|
# Result is an array of all expressions while they match,
|
175
206
|
# an empty array of none match
|
176
|
-
star = rept(input, 0, '*', expr[1])
|
207
|
+
star = rept(input, 0, '*', expr[1], string_regexp_opts)
|
177
208
|
|
178
209
|
# Update furthest failure for strings and terminals
|
179
210
|
parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
|
@@ -205,8 +236,9 @@ module EBNF::PEG
|
|
205
236
|
# @param [Integer] max
|
206
237
|
# If it is an integer, it stops matching after max entries.
|
207
238
|
# @param [Symbol, String] prod
|
239
|
+
# @param [Integer] string_regexp_opts
|
208
240
|
# @return [:unmatched, Array]
|
209
|
-
def rept(input, min, max, prod)
|
241
|
+
def rept(input, min, max, prod, string_regexp_opts, **options)
|
210
242
|
result = []
|
211
243
|
|
212
244
|
case prod
|
@@ -218,9 +250,13 @@ module EBNF::PEG
|
|
218
250
|
result << res
|
219
251
|
end
|
220
252
|
when String
|
221
|
-
while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
|
253
|
+
while (res = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))) && (max == '*' || result.length < max)
|
222
254
|
eat_whitespace(input) unless terminal?
|
223
|
-
result <<
|
255
|
+
result << case options[:insensitive_strings]
|
256
|
+
when :lower then res.downcase
|
257
|
+
when :upper then res.upcase
|
258
|
+
else res
|
259
|
+
end
|
224
260
|
end
|
225
261
|
end
|
226
262
|
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# Unsecape strings
|
3
|
+
module EBNF::Unescape
|
4
|
+
ESCAPE_CHARS = {
|
5
|
+
'\\t' => "\t", # \u0009 (tab)
|
6
|
+
'\\n' => "\n", # \u000A (line feed)
|
7
|
+
'\\r' => "\r", # \u000D (carriage return)
|
8
|
+
'\\b' => "\b", # \u0008 (backspace)
|
9
|
+
'\\f' => "\f", # \u000C (form feed)
|
10
|
+
'\\"' => '"', # \u0022 (quotation mark, double quote mark)
|
11
|
+
"\\'" => '\'', # \u0027 (apostrophe-quote, single quote mark)
|
12
|
+
'\\\\' => '\\' # \u005C (backslash)
|
13
|
+
}.freeze
|
14
|
+
ESCAPE_CHAR4 = /\\u(?:[0-9A-Fa-f]{4,4})/u.freeze # \uXXXX
|
15
|
+
ESCAPE_CHAR8 = /\\U(?:[0-9A-Fa-f]{8,8})/u.freeze # \UXXXXXXXX
|
16
|
+
ECHAR = /\\./u.freeze # More liberal unescaping
|
17
|
+
UCHAR = /#{ESCAPE_CHAR4}|#{ESCAPE_CHAR8}/n.freeze
|
18
|
+
|
19
|
+
##
|
20
|
+
# Returns a copy of the given `input` string with all `\uXXXX` and
|
21
|
+
# `\UXXXXXXXX` Unicode codepoint escape sequences replaced with their
|
22
|
+
# unescaped UTF-8 character counterparts.
|
23
|
+
#
|
24
|
+
# @param [String] string
|
25
|
+
# @return [String]
|
26
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#codepointEscape
|
27
|
+
def unescape_codepoints(string)
|
28
|
+
string = string.dup
|
29
|
+
string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)
|
30
|
+
|
31
|
+
# Decode \uXXXX and \UXXXXXXXX code points:
|
32
|
+
string = string.gsub(UCHAR) do |c|
|
33
|
+
s = [(c[2..-1]).hex].pack('U*')
|
34
|
+
s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
|
35
|
+
end
|
36
|
+
|
37
|
+
string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
|
38
|
+
string
|
39
|
+
end
|
40
|
+
module_function :unescape_codepoints
|
41
|
+
|
42
|
+
##
|
43
|
+
# Returns a copy of the given `input` string with all string escape
|
44
|
+
# sequences (e.g. `\n` and `\t`) replaced with their unescaped UTF-8
|
45
|
+
# character counterparts.
|
46
|
+
#
|
47
|
+
# @param [String] input
|
48
|
+
# @return [String]
|
49
|
+
# @see https://www.w3.org/TR/rdf-sparql-query/#grammarEscapes
|
50
|
+
def unescape_string(input)
|
51
|
+
input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
|
52
|
+
end
|
53
|
+
module_function :unescape_string
|
54
|
+
|
55
|
+
# Perform string and codepoint unescaping if defined for this terminal
|
56
|
+
# @param [String] string
|
57
|
+
# @return [String]
|
58
|
+
def unescape(string)
|
59
|
+
unescape_string(unescape_codepoints(string))
|
60
|
+
end
|
61
|
+
module_function :unescape
|
62
|
+
end
|
data/lib/ebnf/writer.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
require 'rdf'
|
3
3
|
require 'strscan' unless defined?(StringScanner)
|
4
4
|
require "ostruct"
|
5
|
+
require 'unicode/types'
|
5
6
|
|
6
7
|
##
|
7
8
|
# Serialize ruleset back to EBNF
|
@@ -86,22 +87,23 @@ module EBNF
|
|
86
87
|
#
|
87
88
|
# @param [Array<Rule>] rules
|
88
89
|
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
90
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
89
91
|
# @return [Object]
|
90
|
-
def self.html(*rules, format: :ebnf)
|
92
|
+
def self.html(*rules, format: :ebnf, validate: false)
|
91
93
|
require 'stringio' unless defined?(StringIO)
|
92
94
|
buf = StringIO.new
|
93
|
-
Writer.new(rules, out: buf, html: true, format: format)
|
95
|
+
Writer.new(rules, out: buf, html: true, format: format, validate: validate)
|
94
96
|
buf.string
|
95
97
|
end
|
96
98
|
|
97
99
|
##
|
98
100
|
# @param [Array<Rule>] rules
|
101
|
+
# @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
|
102
|
+
# @param [Boolean] html (false) generate HTML output
|
103
|
+
# @param [Boolean] validate (false) validate generated HTML.
|
99
104
|
# @param [Hash{Symbol => Object}] options
|
100
105
|
# @param [#write] out ($stdout)
|
101
|
-
|
102
|
-
# @option options [Symbol] format
|
103
|
-
# @option options [Boolean] html (false)
|
104
|
-
def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
|
106
|
+
def initialize(rules, out: $stdout, html: false, format: :ebnf, validate: false, **options)
|
105
107
|
@options = options.merge(html: html)
|
106
108
|
return if rules.empty?
|
107
109
|
|
@@ -133,7 +135,10 @@ module EBNF
|
|
133
135
|
OpenStruct.new(id: ("@#{rule.kind}"),
|
134
136
|
sym: nil,
|
135
137
|
assign: nil,
|
136
|
-
formatted: (
|
138
|
+
formatted: (
|
139
|
+
rule.kind == :terminals ?
|
140
|
+
"<strong># Productions for terminals</strong>" :
|
141
|
+
self.send(format_meth, rule.expr)))
|
137
142
|
else
|
138
143
|
formatted_expr = self.send(format_meth, rule.expr)
|
139
144
|
# Measure text without markup
|
@@ -171,7 +176,21 @@ module EBNF
|
|
171
176
|
end
|
172
177
|
end
|
173
178
|
end.flatten
|
174
|
-
|
179
|
+
|
180
|
+
html_result = eruby.evaluate(format: format, rules: formatted_rules)
|
181
|
+
|
182
|
+
if validate
|
183
|
+
begin
|
184
|
+
# Validate the output HTML
|
185
|
+
doc = Nokogiri::HTML5("<!DOCTYPE html>" + html_result, max_errors: 10)
|
186
|
+
raise EncodingError, "Errors found in generated HTML:\n " +
|
187
|
+
doc.errors.map(&:to_s).join("\n ") unless doc.errors.empty?
|
188
|
+
rescue LoadError, NoMethodError
|
189
|
+
# Skip
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
out.write html_result
|
175
194
|
return
|
176
195
|
rescue LoadError
|
177
196
|
$stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded"
|
@@ -344,16 +363,20 @@ module EBNF
|
|
344
363
|
end
|
345
364
|
char = fmt % u.ord
|
346
365
|
if @options[:html]
|
347
|
-
if u.ord <= 0x20
|
348
|
-
|
366
|
+
char = if u.ord <= 0x20
|
367
|
+
%(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
368
|
+
elsif u.ord == 0x22
|
369
|
+
%(<abbr title="quot">>"</abbr>)
|
349
370
|
elsif u.ord < 0x7F
|
350
|
-
|
371
|
+
%(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>)
|
351
372
|
elsif u.ord == 0x7F
|
352
|
-
|
373
|
+
%(<abbr title="delete">#{@coder.encode char}</abbr>)
|
353
374
|
elsif u.ord <= 0xFF
|
354
|
-
|
375
|
+
%(<abbr title="extended ascii '#{@coder.encode char}'">#{char}</abbr>)
|
376
|
+
elsif (%w(Control Private-use Surrogate Noncharacter Reserved) - ::Unicode::Types.of(u)).empty?
|
377
|
+
%(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
355
378
|
else
|
356
|
-
|
379
|
+
%(<abbr title="unicode '#{::Unicode::Types.of(u).first}'">#{char}</abbr>)
|
357
380
|
end
|
358
381
|
%(<code class="grammar-char-escape">#{char}</code>)
|
359
382
|
else
|
@@ -452,7 +475,7 @@ module EBNF
|
|
452
475
|
# Format a single-character string, prefering hex for non-main ASCII
|
453
476
|
def format_abnf_char(c)
|
454
477
|
if /[\x20-\x21\x23-\x7E]/.match?(c)
|
455
|
-
c.inspect
|
478
|
+
@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : c.inspect
|
456
479
|
else
|
457
480
|
escape_abnf_hex(c)
|
458
481
|
end
|
@@ -533,14 +556,16 @@ module EBNF
|
|
533
556
|
if @options[:html]
|
534
557
|
if u.ord <= 0x20
|
535
558
|
char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>)
|
536
|
-
elsif u.ord
|
559
|
+
elsif u.ord == 0x22
|
560
|
+
%(<abbr title="quot">>"</abbr>)
|
561
|
+
elsif u.ord < 0x7F
|
537
562
|
char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>)
|
538
563
|
elsif u.ord == 0x7F
|
539
564
|
char = %(<abbr title="delete">#{@coder.encode char}</abbr>)
|
540
565
|
elsif u.ord <= 0xFF
|
541
566
|
char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>)
|
542
567
|
else
|
543
|
-
char = %(<abbr title="unicode '#{u}'">#{char}</abbr>)
|
568
|
+
char = %(<abbr title="unicode '#{u.unicode_normaliz}'">#{char}</abbr>)
|
544
569
|
end
|
545
570
|
%(<code class="grammar-char-escape">#{char}</code>)
|
546
571
|
else
|
@@ -683,11 +708,13 @@ module EBNF
|
|
683
708
|
<table class="grammar">
|
684
709
|
<tbody id="grammar-productions" class="<%= @format %>">
|
685
710
|
<% for rule in @rules %>
|
686
|
-
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign)
|
711
|
+
<tr<%= %{ id="grammar-production-#{rule.sym}"} unless %w(=/ |).include?(rule.assign) || rule.sym.nil?%>>
|
687
712
|
<% if rule.id %>
|
688
|
-
<td
|
713
|
+
<td<%= " colspan=2" unless rule.sym %>><%= rule.id %></td>
|
689
714
|
<% end %>
|
715
|
+
<% if rule.sym %>
|
690
716
|
<td><code><%== rule.sym %></code></td>
|
717
|
+
<% end %>
|
691
718
|
<td><%= rule.assign %></td>
|
692
719
|
<td><%= rule.formatted %></td>
|
693
720
|
</tr>
|
data/lib/ebnf.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ebnf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sxp
|
@@ -66,6 +66,34 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '4.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: unicode-types
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.6'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.6'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: amazing_print
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.2'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.2'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: rdf-spec
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,14 +156,14 @@ dependencies:
|
|
128
156
|
requirements:
|
129
157
|
- - "~>"
|
130
158
|
- !ruby/object:Gem::Version
|
131
|
-
version: '3.
|
159
|
+
version: '3.10'
|
132
160
|
type: :development
|
133
161
|
prerelease: false
|
134
162
|
version_requirements: !ruby/object:Gem::Requirement
|
135
163
|
requirements:
|
136
164
|
- - "~>"
|
137
165
|
- !ruby/object:Gem::Version
|
138
|
-
version: '3.
|
166
|
+
version: '3.10'
|
139
167
|
- !ruby/object:Gem::Dependency
|
140
168
|
name: rspec-its
|
141
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -240,13 +268,14 @@ files:
|
|
240
268
|
- lib/ebnf/peg/rule.rb
|
241
269
|
- lib/ebnf/rule.rb
|
242
270
|
- lib/ebnf/terminals.rb
|
271
|
+
- lib/ebnf/unescape.rb
|
243
272
|
- lib/ebnf/version.rb
|
244
273
|
- lib/ebnf/writer.rb
|
245
274
|
homepage: https://github.com/dryruby/ebnf
|
246
275
|
licenses:
|
247
276
|
- Unlicense
|
248
277
|
metadata: {}
|
249
|
-
post_install_message:
|
278
|
+
post_install_message:
|
250
279
|
rdoc_options: []
|
251
280
|
require_paths:
|
252
281
|
- lib
|
@@ -261,8 +290,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
261
290
|
- !ruby/object:Gem::Version
|
262
291
|
version: '0'
|
263
292
|
requirements: []
|
264
|
-
rubygems_version: 3.
|
265
|
-
signing_key:
|
293
|
+
rubygems_version: 3.2.15
|
294
|
+
signing_key:
|
266
295
|
specification_version: 4
|
267
|
-
summary: EBNF parser and parser generator.
|
296
|
+
summary: EBNF parser and parser generator in Ruby.
|
268
297
|
test_files: []
|