abnftt 0.2.4 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 152ef34be3428f0a7cda4f157ec08a1986f5cf295436be8186ee53d61eddb386
4
- data.tar.gz: fdc3504304d4e9c5244dcfce7a2e103e2801d3f6087de307bcd976a099fe0b5d
3
+ metadata.gz: 267bcebab4bd13c22da536e87b6ccd3444b214db589a5e9d4b054a56ffe3a686
4
+ data.tar.gz: cf4c222a9825e210e635989837ba43432e87661ba6e5e98a9a52aedcfbdad055
5
5
  SHA512:
6
- metadata.gz: e5b786e95c96464bf516e2d6ba00f1363a6a720d0dfd01b83401fef3ade2dc51116fda365881fb770fa9756f0ec71a14e7c6e85e7e3a7bc3589a930e8b9e5381
7
- data.tar.gz: '08eb748abf720cfdfe4a742e675e7b5b0144a60cdd2fce90f85783a2b7eac2d856e138f6536394dcccc123e72eb6fa8439b472b1e8ff26fff7630938445b2316'
6
+ metadata.gz: 28fe16c851f050e8072bdf939b45569fde9233fbeb26a2e43169fa04b1c66e85e43e893056faab3b87a4e5c49f2f7cea08274dd852c26f8d42688a85198eab57
7
+ data.tar.gz: 0f4977a6b13b8b4a621b594a31ad87d6759b5780bb59b98f7afc3ab4ac3e70fd2f9ee5cb9f710b8db1ce79e8f411734ec1517ad829456f3cd0f4a383f683d721
data/abnftt.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "abnftt"
3
- s.version = "0.2.4"
3
+ s.version = "0.2.6"
4
4
  s.summary = "RFC 5234+7405 ABNF to Treetop"
5
5
  s.description = %q{Less shifty support for tools based on IETF's ABNF}
6
6
  s.author = "Carsten Bormann"
data/bin/abnfrob ADDED
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env ruby -Ku
2
+ require 'pp'
3
+ require 'yaml'
4
+
5
+ Encoding.default_external = Encoding::UTF_8
6
+ require 'optparse'
7
+ require 'ostruct'
8
+
9
+ $options = OpenStruct.new
10
+ begin
11
+ op = OptionParser.new do |opts|
12
+ opts.version = "(from abnftt #{Gem.loaded_specs['abnftt'].version})" rescue "unknown-version"
13
+
14
+ opts.banner = "Usage: abnfrob [options] file.abnf"
15
+ opts.on("-b", "--bap=[OPTIONS]", "Pretty-print using bap") do |v|
16
+ $options.bap = true
17
+ $options.bap_options = v ? " #{v}" : ""
18
+ # warn "** bap_options #{$options.bap_options.inspect}"
19
+ end
20
+ opts.on("-tFMT", "--to=FMT", [:abnf, :json, :pp, :yaml], "Target format") do |v|
21
+ $options.target = v
22
+ end
23
+ opts.on("-y", "--yaml", "Output separate YAML copy of rules") do |v|
24
+ $options.yaml = v
25
+ end
26
+ opts.on("-a", "--asr33", "Line-break to fit on teletype") do |v|
27
+ $options.asr33 = v
28
+ end
29
+ opts.on("--squash=PREFIX", String, "Squash to app-prefix") do |v|
30
+ $options.squash = v
31
+ end
32
+ end
33
+ op.parse!
34
+ rescue StandardError => e
35
+ warn e
36
+ exit 1
37
+ end
38
+
39
+ require 'abnftt'
40
+
41
+ unless fn = ARGV[0]
42
+ warn op
43
+ exit 1
44
+ end
45
+ unless File.extname(fn) =~ /\A.abnf/
46
+ warn op
47
+ exit 1
48
+ end
49
+ outfn = (Pathname.new(File.dirname(fn)) + File.basename(fn, ".*")).to_s
50
+
51
+ parser = ABNFGrammarParser.new
52
+ abnf_file = File.read(fn)
53
+ ast = parser.parse abnf_file
54
+ unless ast
55
+ puts parser.failure_reason
56
+ parser.failure_reason =~ /^(Expected .+) after/m
57
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
58
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
59
+ puts "#{'~' * (parser.failure_column - 1)}^"
60
+ exit 1
61
+ end
62
+
63
+ abnf = ABNF.new(ast)
64
+
65
+ if $options.yaml
66
+ File.open("#{outfn}.yaml", "w") do |f|
67
+ f.puts abnf.rules.to_yaml
68
+ end
69
+ end
70
+
71
+ if $options.squash
72
+ require 'abnftt/abnf-flattener.rb'
73
+ require 'abnftt/abnf-util.rb'
74
+ require 'abnftt/abnf-squasher.rb'
75
+
76
+ prefix = $options.squash + "-"
77
+
78
+ abnf.flatten_ops
79
+ abnf.flatten_strings
80
+
81
+ abnf.squash_edn_levels
82
+ abnf.char_range_to_string
83
+
84
+ abnf.share_hex("sq")
85
+
86
+ abnf.share_alt("sq")
87
+
88
+ abnf.rules.replace(Hash[abnf.rules.map do |k, v|
89
+ [k.sub(/^(?:#{prefix})?/, prefix), abnf.visit(v) do |prod|
90
+ if String === prod
91
+ [true, prod.sub(/^(?:#{prefix})?/, prefix)]
92
+ end
93
+ end]
94
+ end])
95
+
96
+ rule1 = abnf.rules.first
97
+ outer_name = "sq-#{rule1[0]}"
98
+ outer_elements = ["seq",
99
+ ["cs", $options.squash+"'"],
100
+ rule1[0],
101
+ ["cs", "'"]
102
+ ]
103
+ abnf.rules.replace(a = Hash[[[outer_name, outer_elements],
104
+ *abnf.rules.to_a]])
105
+ File.open("#{outfn}-sq.yaml", "w") do |f|
106
+ f.puts abnf.rules.to_yaml
107
+ end
108
+ end
109
+
110
+ ## Work on abnf.rules
111
+
112
+ case $options.target
113
+ when :pp
114
+ pp abnf.rules
115
+ when :json
116
+ require 'neatjson'
117
+ puts JSON.neat_generate(abnf.rules, after_comma: 1, after_colon: 1)
118
+ when :yaml
119
+ puts abnf.rules.to_yaml
120
+ when :abnf, nil
121
+ require_relative '../lib/abnftt/abnf-writer.rb'
122
+ result = abnf.to_s
123
+
124
+ if $options.bap
125
+ require 'open3'
126
+ result, err, status =
127
+ Open3.capture3("bap -o RFC7405#{$options.bap_options}",
128
+ stdin_data: result)
129
+ warn err.gsub(/^/, "** ") unless err == ""
130
+ unless status.success?
131
+ warn "*** Giving up"
132
+ exit 1
133
+ end
134
+ end
135
+
136
+ if $options.asr33
137
+ result = abnf.breaker(result)
138
+ end
139
+
140
+ puts result
141
+ File.open("#{outfn}-sq.abnf", "w") do |f|
142
+ f.puts result
143
+ end
144
+ else
145
+ warn ["Unknown target format: ", $options.target].inspect
146
+ end
data/bin/abnfrob~ ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby -Ku
2
+
3
+ require 'pp'
4
+ require 'yaml'
5
+
6
+ Encoding.default_external = Encoding::UTF_8
7
+
8
+ require 'abnftt'
9
+
10
+ unless fn = ARGV[0]
11
+ warn "Usage: abnftt grammar.abnftt"
12
+ exit 1
13
+ end
14
+ outfn = fn.sub(/\.abnftt\z/, "")
15
+
16
+ parser = ABNFGrammarParser.new
17
+ abnf_file = File.read(fn)
18
+ ast = parser.parse abnf_file
19
+ if ast
20
+ # p ast
21
+ abnf = ABNF.new(ast)
22
+ File.open("#{outfn}.yaml", "w") do |f|
23
+ f.puts abnf.tree.to_yaml
24
+ end
25
+ # pp ast.ast
26
+ File.open("#{outfn}.treetop", "w") do |f|
27
+ modname = File.basename(outfn).gsub("-", "_").gsub(/[^_a-zA-Z0-9]/, "").upcase
28
+ f.puts abnf.to_treetop(modname)
29
+ end
30
+ File.open("#{outfn}.abnf", "w") do |f|
31
+ f.puts ast.clean_abnf.lines.map(&:rstrip).join("\n")
32
+ end
33
+ else
34
+
35
+ puts parser.failure_reason
36
+ parser.failure_reason =~ /^(Expected .+) after/m
37
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
38
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
39
+ puts "#{'~' * (parser.failure_column - 1)}^"
40
+ end
@@ -0,0 +1,32 @@
1
+ require "abnftt/abnf-visitor.rb"
2
+
3
+ class ABNF
4
+ def expand_op_into(s, op, out = [op])
5
+ s.each do |el|
6
+ case el
7
+ in [^op, *inner]
8
+ expand_op_into(inner, op, out)
9
+ else
10
+ out << flatten_ops_1(el)
11
+ end
12
+ end
13
+ out
14
+ end
15
+ def flatten_ops_1(prod)
16
+ visit(prod) do |here|
17
+ case here
18
+ in ["seq", *rest]
19
+ [true, expand_op_into(rest, "seq")]
20
+ in ["alt", *rest]
21
+ [true, expand_op_into(rest, "alt")]
22
+ else
23
+ false
24
+ end
25
+ end
26
+ end
27
+ def flatten_ops
28
+ rules.each do |name, prod|
29
+ rules[name] = flatten_ops_1(prod)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,57 @@
1
+ require "abnftt"
2
+ require "abnftt/abnf-flattener"
3
+ require "abnftt/abnf-util"
4
+ require "abnftt/abnf-visitor"
5
+
6
+ class ABNF
7
+
8
+ UNESCAPED_SQSTR_RANGES =
9
+ # [[0xA, 0xA], [0x20, 0x21], [0x23, 0x26], -- but DQUOTE is allowed
10
+ [[0xA, 0xA], [0x20, 0x26], # "'"
11
+ # [0x28, 0x5b], [0x5d, 0x7e], [0xa0, 0xd7ff], -- but JSON allows 7F-9F
12
+ [0x28, 0x5b], [0x5d, 0xd7ff], # \
13
+ [0xe000, 0x10ffff]].map {|l, r|
14
+ [l.chr(Encoding::UTF_8), r.chr(Encoding::UTF_8)]
15
+ }
16
+
17
+ ESCAPED_SQSTR_MAPPINGS = [
18
+ ["\x08", "b"],
19
+ ["\x09", "t"],
20
+ ["\x0A", "n"],
21
+ ["\x0C", "f"],
22
+ ["\x0D", "r"],
23
+ ["\x27", "'"],
24
+ ["\x2F", "/"],
25
+ ["\x5C", "\\"]]
26
+
27
+ def squash_edn_levels_1(prod)
28
+ f1 = visit(prod) do |here|
29
+ case here
30
+ in ["char-range", c1, c2]
31
+ lit = UNESCAPED_SQSTR_RANGES.map { |u1, u2|
32
+ overlap(here, u1, u2) }.compact
33
+ esc = ESCAPED_SQSTR_MAPPINGS.map {|cv, ev|
34
+ if cv >= c1 && cv <= c2
35
+ ["seq", ["char-range", "\\", "\\"], ["char-range", ev, ev]]
36
+ end
37
+ }.compact
38
+ old = alt_ranges_legacy(c1.ord, c2.ord)
39
+ new = alt_ranges_modern(c1.ord, c2.ord)
40
+ oldnew = ["seq",
41
+ ["cs", "\\u"],
42
+ wrap_flat("alt", [old, new]) ]
43
+ [true, wrap_flat("alt", [*lit, *esc, oldnew].sort)]
44
+ else
45
+ false
46
+ end
47
+ end
48
+ flatten_ops_1(f1)
49
+ end
50
+
51
+ def squash_edn_levels
52
+ rules.each do |name, prod|
53
+ rules[name] = squash_edn_levels_1(prod)
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,371 @@
1
+ require "abnftt/abnf-visitor"
2
+ require "abnftt/abnf-flattener"
3
+
4
+ class ABNF
5
+
6
+ def wrap(head, all)
7
+ if all.size == 1
8
+ all.first
9
+ else
10
+ [head, *all]
11
+ end
12
+ end
13
+
14
+ def wrap_flat(head, all)
15
+ if all.size == 1
16
+ all.first
17
+ else
18
+ [head, *all.collect_concat {|el|
19
+ case el
20
+ in [^head, *rest]
21
+ rest
22
+ else
23
+ [el]
24
+ end
25
+ }]
26
+ end
27
+ end
28
+
29
+ def overlap(cr, l, r)
30
+ if cr[2] >= l && cr[1] <= r
31
+ ["char-range", [cr[1], l].max, [cr[2], r].min]
32
+ end
33
+ end
34
+
35
+ # Utilities for creating hexadecimal rules from unsigned integers
36
+
37
+ def hexdig_range(l, r)
38
+ alt = []
39
+ if l < 10
40
+ alt << ["char-range",
41
+ (l+0x30).chr(Encoding::UTF_8),
42
+ ([r, 9].min+0x30).chr(Encoding::UTF_8)]
43
+ end
44
+ if r >= 10
45
+ alt << ["char-range", ([l, 10].max+0x41-0xA).chr(Encoding::UTF_8),
46
+ (r+0x41-0xA).chr(Encoding::UTF_8)]
47
+ alt << ["char-range", ([l, 10].max+0x61-0xA).chr(Encoding::UTF_8),
48
+ (r+0x61-0xA).chr(Encoding::UTF_8)]
49
+ end
50
+ wrap("alt", alt)
51
+ end
52
+
53
+ # This assumes l and r are preprocessed to have single or full ranges except in one place
54
+ def hex_ranges(l, r, ndig = false)
55
+ ld = l.digits(16)
56
+ rd = r.digits(16)
57
+ ndig ||= rd.size
58
+ seq = []
59
+ (0...ndig).each do |dig|
60
+ seq << hexdig_range(ld[dig] || 0, rd[dig] || 0)
61
+ end
62
+ wrap("seq", seq.reverse)
63
+ end
64
+
65
+ # split range into passages that have the property needed for hex_ranges
66
+ def do_range(l, r, step = 4)
67
+ column = 0
68
+ while l <= r
69
+ mask = (1 << step * (column + 1)) - 1
70
+ new_r = l | mask
71
+ if new_r > r # right hand side: come down from mountain
72
+ while column >= 0
73
+ mask >>= step
74
+ new_r = (r + 1) & ~mask
75
+ yield l, new_r - 1, column + 1 if l != new_r
76
+ l = new_r
77
+ column -= 1
78
+ end
79
+ return
80
+ else
81
+ column += 1
82
+ if (l & mask) != 0
83
+ yield l, new_r, column
84
+ l = new_r + 1
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ # Support legacy JSON \u/\u\u and \u{...} hex unicode
91
+
92
+ def alt_ranges(l, r, step = 4, ndig = false)
93
+ alt = []
94
+ do_range(l.ord, r.ord, step) do |l, r, column|
95
+ alt << hex_ranges(l, r, ndig)
96
+ end
97
+ wrap("alt", alt.reverse) # work around prioritized choice
98
+ end
99
+
100
+ def alt_ranges_legacy(l, r)
101
+ alt = []
102
+ if l < 0x10000
103
+ alt << ["alt", alt_ranges(l, [r, 0xFFFF].min, 4, 4)]
104
+ end
105
+ if r >= 0x10000
106
+ l1 = [l, 0x10000].max - 0x10000
107
+ r1 = r - 0x10000
108
+ do_range(l1, r1, 10) do |l2, r2, column|
109
+ alt << ["seq",
110
+ alt_ranges((l2 >> 10) + 0xD800, (r2 >> 10) + 0xD800, 4, 4),
111
+ expand_string("\\u"),
112
+ alt_ranges((l2 & 0x3FF) + 0xDC00, (r2 & 0x3FF) + 0xDC00, 4, 4)]
113
+ end
114
+ end
115
+ wrap_flat("alt", alt)
116
+ end
117
+
118
+ def alt_ranges_modern(l, r, step = 4)
119
+ ["seq",
120
+ expand_string("{"),
121
+ ["rep", 0, true, ["cs","0"]],
122
+ alt_ranges(l, r, 4, false),
123
+ expand_string("}")]
124
+ end
125
+
126
+ # flatten_strings: reduce all strings to char-range/seq/alt
127
+
128
+ def expand_string(s, case_fold = false)
129
+ wrap("seq",
130
+ s.chars.map do |ch|
131
+ if case_fold &&
132
+ (u = ch.upcase; d = ch.downcase; u != d)
133
+ ["alt", expand_string(u), expand_string(d)]
134
+ else
135
+ ["char-range", ch, ch]
136
+ end
137
+ end)
138
+ end
139
+
140
+ def flatten_strings_1(prod)
141
+ f1 = visit(prod) do |here|
142
+ case here
143
+ in ["cs", string]
144
+ [true, expand_string(string, false)]
145
+ in ["ci", string]
146
+ [true, expand_string(string, true)]
147
+ else
148
+ false
149
+ end
150
+ end
151
+ merge_strings_1(flatten_ops_1(f1))
152
+ end
153
+
154
+
155
+ def merge_strings_1(prod)
156
+ visit(prod) do |here|
157
+ case here
158
+ in ["alt", *rest]
159
+ ranges = []
160
+ i = 0
161
+ while i < rest.size
162
+ case rest[i]
163
+ in ["char-range", _ic1, _ic2]
164
+ j = i
165
+ while j+1 < rest.size && (rest[j+1] in ["char-range", _jc1, _jc2])
166
+ j += 1
167
+ end
168
+ ranges << [i, j] if i != j # inclusive right
169
+ i = j
170
+ else
171
+ here[i+1] = merge_strings_1(rest[i]) # XXX could be part of a range
172
+ end
173
+ i += 1
174
+ end
175
+ ranges.reverse.each do |i, j|
176
+ sorted = here[i+1..j+1].sort
177
+ l = sorted.length
178
+ while l > 1
179
+ l -= 1 # index to last item
180
+ if sorted[l][1].ord == sorted[l-1][2].ord+1 # merge:
181
+ sorted[l-1..l] = [["char-range", sorted[l-1][1], sorted[l][2]]]
182
+ end
183
+ end
184
+ here[i+1..j+1] = sorted
185
+ end
186
+ [true, here]
187
+ else
188
+ false
189
+ end
190
+ end
191
+ end
192
+
193
+ def flatten_strings
194
+ rules.each do |name, prod|
195
+ rules[name] = flatten_strings_1(prod)
196
+ end
197
+ end
198
+
199
+ # Cleanup operations
200
+
201
+ def expand_range_into(s, op, out = [op])
202
+ s.each do |el|
203
+ case el
204
+ in [^op, *inner]
205
+ expand_range_into(inner, op, out)
206
+ else
207
+ out << char_range_to_string1(el)
208
+ end
209
+ end
210
+ out
211
+ end
212
+ def char_range_to_string1(prod)
213
+ visit(prod) do |here|
214
+ case here
215
+ in ["seq", *rest]
216
+ rest = expand_range_into(rest, "seq")
217
+ i = rest.size
218
+ while i > 1
219
+ if (rest[i-1] in ["cs", s2]) && (rest[i-2] in ["cs", s1])
220
+ rest[i-2..i-1] = [["cs", s1 + s2]]
221
+ end
222
+ i -= 1
223
+ end
224
+ [true, rest]
225
+ in ["char-range", chr, ^chr] if chr.between?(" ", "!") || chr.between?("#", "~")
226
+ [true, ["cs", chr]]
227
+ else
228
+ false
229
+ end
230
+ end
231
+ end
232
+ def char_range_to_string
233
+ rules.each do |name, prod|
234
+ rules[name] = ci_cs_merge(detect_ci(char_range_to_string1(prod)))
235
+ end
236
+ end
237
+
238
+ def detect_ci(prod)
239
+ visit(prod) do |here|
240
+ case here
241
+ in ["alt", ["cs", c1], ["cs", c2]] if c1.downcase == c2 && c2.upcase == c1
242
+ [true, ["ci", c1]]
243
+ else
244
+ false
245
+ end
246
+ end
247
+ end
248
+ def ci_compat(prod)
249
+ case prod
250
+ in ["ci", s]
251
+ s
252
+ in ["cs", s] if s =~ /\A[^A-Za-z]*\z/
253
+ s
254
+ else
255
+ nil
256
+ end
257
+ end
258
+ def ci_cs_merge(prod)
259
+ visit(prod) do |here|
260
+ case here
261
+ in ["seq", *rest]
262
+ rest = rest.map{|x| ci_cs_merge(x)}
263
+ i = rest.size
264
+ while i > 1
265
+ if (s2 = ci_compat(rest[i-1])) && (s1 = ci_compat(rest[i-2]))
266
+ rest[i-2..i-1] = [["ci", s1 + s2]]
267
+ end
268
+ i -= 1
269
+ end
270
+ [true, wrap_flat("seq", rest)]
271
+ else
272
+ false
273
+ end
274
+ end
275
+ end
276
+
277
+ def seq_rep(prod)
278
+ visit(prod) do |here|
279
+ case here
280
+ in ["seq", *rest]
281
+ rest = rest.map{|x| seq_rep(x)}
282
+ i = rest.size # behind last element
283
+ while i > 1
284
+ j = i - 1 # end of range
285
+ s_end = rest[j]
286
+ k = j # start of range
287
+ while k > 0 && rest[k-1] == s_end
288
+ k -= 1
289
+ end
290
+ if k != j
291
+ n = j - k + 1
292
+ rest[k..j] = [["rep", n, n, s_end]]
293
+ end
294
+ i = k # skip element k
295
+ end
296
+ [true, wrap_flat("seq", rest)]
297
+ else
298
+ false
299
+ end
300
+ end
301
+ end
302
+
303
+ # sharing
304
+ def count_alt(counter, prod)
305
+ visit(prod) do |here|
306
+ case here
307
+ in ["alt", *rest]
308
+ rest.each {|pr| count_alt(counter, pr)}
309
+ counter[here] += 1
310
+ else
311
+ false
312
+ end
313
+ end
314
+ end
315
+
316
+ def share_alt(prefix)
317
+ counter = Hash.new(0)
318
+ rules.each do |name, prod|
319
+ count_alt(counter, prod)
320
+ end
321
+ subs = {}
322
+ counter.to_a.select{|k, v| v > 2}.sort_by{|k, v| -v}.each_with_index do |(el, _count), i|
323
+ name = "#{prefix}-a#{i}"
324
+ rules[name] = el
325
+ subs[el] = name
326
+ end
327
+ rules.each do |name, prod|
328
+ count_alt(counter, prod)
329
+ end
330
+ rules.replace(Hash[rules.map do |k, v|
331
+ [k, seq_rep(visit(v) do |prod|
332
+ if (s = subs[prod]) && k != s
333
+ [true, s]
334
+ end
335
+ end)]
336
+ end])
337
+ end
338
+
339
+ def share_hex_1(prod, rules)
340
+ visit(prod) do |here|
341
+ case here
342
+ in ["alt",
343
+ ["char-range", c3l, "9"],
344
+ ["char-range", "A", c4r],
345
+ ["char-range", "a", c6r]] if c4r == c6r.upcase && c3l >= "0" && c6r <= "f"
346
+ name = "x#{c3l}#{c6r}"
347
+ rules[name] ||= here
348
+ [true, name]
349
+ in ["alt",
350
+ ["char-range", c4l, c4r],
351
+ ["char-range", c6l, c6r]] if c4r == c6r.upcase &&
352
+ c4l == c6l.upcase &&
353
+ c6l.between?("a", "f") &&
354
+ c6r.between?("a", "f")
355
+ name = "x#{c6l}#{c6r}"
356
+ rules[name] ||= here
357
+ [true, name]
358
+ else
359
+ false
360
+ end
361
+ end
362
+ end
363
+
364
+ def share_hex(_prefix)
365
+ newrules = {}
366
+ rules.each do |name, prod|
367
+ rules[name] = share_hex_1(prod, newrules)
368
+ end
369
+ rules.merge!(Hash[newrules.sort])
370
+ end
371
+ end
@@ -0,0 +1,26 @@
1
+ require_relative "../abnftt.rb"
2
+
3
+ class ABNF
4
+ def visit_all(prod_array, &block)
5
+ prod_array.map {|prod| visit(prod, &block)}
6
+ end
7
+ def visit(prod, &block)
8
+ done, ret = block.call(prod, &block)
9
+ if done
10
+ return ret
11
+ end
12
+
13
+ case prod
14
+ in ["alt", *prods]
15
+ ["alt", *visit_all(prods, &block)]
16
+ in ["tadd", *prods]
17
+ ["tadd", *visit_all(prods, &block)]
18
+ in ["seq", *prods]
19
+ ["seq", *visit_all(prods, &block)]
20
+ in ["rep", s, e, prod]
21
+ ["rep", s, e, visit(prod, &block)]
22
+ else
23
+ prod
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,106 @@
1
+ class ABNF
2
+
3
+ # return [precedence ((2 if seq needed)), string]
4
+
5
+ def stringify(s)
6
+ fail "Can't stringify #{s.inspect} yet" unless s =~ /\A[ !#-~]*\z/
7
+ %{"#{s}"}
8
+ end
9
+
10
+ def write_lhs(k)
11
+ k
12
+ end
13
+
14
+ # precedence:
15
+ # 1: / alt -> (type1)
16
+ # 2: »« seq
17
+ # 4: atomic
18
+
19
+ def prec_check(inner, targetprec, prec)
20
+ if targetprec >= prec
21
+ "(#{inner})"
22
+ else
23
+ inner
24
+ end
25
+ end
26
+
27
+ def write_rhs(v, targetprec = 0)
28
+ prec, ret =
29
+ case v
30
+ in String # this should really be ["name", id]
31
+ [4, v]
32
+ in ["name", id]
33
+ [4, id]
34
+ in ["alt" | "tadd", *types]
35
+ [1, types.map{write_rhs(_1, 1)}.join(" / ")]
36
+ in ["seq", *groups]
37
+ case groups.size
38
+ when 0; [4, ""] # XXX
39
+ else
40
+ [2, "#{groups.map{write_rhs(_1, 2)}.join(" ")}"]
41
+ end
42
+ in ["ci", s]
43
+ [4, stringify(s)]
44
+ in ["cs", s]
45
+ if s =~ /\A[^A-Za-z]*\z/
46
+ [4, stringify(s)]
47
+ else
48
+ [4, "%s" << stringify(s)] # reduce noise if no alphabetics
49
+ end
50
+ in ["char-range", c1, c2]
51
+ nc1 = "%02x" % c1.ord
52
+
53
+ nc2 = "%02x" % c2.ord
54
+ nc2add = "-#{nc2}" if nc2 != nc1
55
+ [4, "%x#{nc1}#{nc2add}"]
56
+ in ["rep", s, e, group]
57
+ if s == 0 && e == 1
58
+ [4, "[#{write_rhs(group)}]"]
59
+ else
60
+ occur = case [s, e]
61
+ in [1, 1]; ""
62
+ in [0, true]; "*"
63
+ in [n, ^n]; n.to_s
64
+ else
65
+ "#{s}*#{e != true ? e : ""}"
66
+ end
67
+ [4, "#{occur}#{write_rhs(group, 4)}"]
68
+ end
69
+ else
70
+ fail [:WRITE_NOMATCH, v].inspect
71
+ end
72
+ prec_check(ret, targetprec, prec)
73
+ end
74
+
75
+ def write_rule(k, v)
76
+ case v
77
+ in ["tadd", *_rest]
78
+ assign = "=/"
79
+ else
80
+ assign = "="
81
+ end
82
+ "#{write_lhs(k)} #{assign} #{write_rhs(v, 0)}"
83
+ end
84
+
85
+ def to_s
86
+ rules.map {|k, v| write_rule(k, v) }.join("\n").sub(/.\z/) {$& << "\n"}
87
+ end
88
+
89
+ # primitively break down lines so they fit on a teletype
90
+ def breaker(s, col = 69)
91
+ ret = ""
92
+ s.each_line do |*l|
93
+ while l[-1].size > col
94
+ breakpoint = l[-1][0...col].rindex(' ')
95
+ break unless breakpoint && breakpoint > 4
96
+ l[-1..-1] = [
97
+ l[-1][0...breakpoint],
98
+ " " << l[-1][breakpoint+1..-1]
99
+ ]
100
+ end
101
+ ret << l.join("\n")
102
+ end
103
+ ret
104
+ end
105
+
106
+ end
data/lib/abnftt.rb CHANGED
@@ -53,11 +53,17 @@ class ABNF
53
53
  ABNF.new(ast)
54
54
  end
55
55
 
56
+ def self.from_rules(r)
57
+ ABNF.new(nil, r)
58
+ end
59
+
56
60
  attr_accessor :ast, :rules, :tree
57
- def initialize(ast_)
58
- @ast = ast_
59
- @tree = ast.ast
60
- @rules = {}
61
+ def initialize(ast_ = nil, rules_ = {})
62
+ if ast_
63
+ @ast = ast_
64
+ @tree = ast.ast
65
+ end
66
+ @rules = rules_
61
67
  @tree.each do |x|
62
68
  op, name, val, rest = x
63
69
  fail rest if rest
@@ -72,7 +78,7 @@ class ABNF
72
78
  else
73
79
  val
74
80
  end
75
- end
81
+ end if @tree
76
82
  # warn "** rules #{rules.inspect}"
77
83
  end
78
84
 
metadata CHANGED
@@ -1,28 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: abnftt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carsten Bormann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-26 00:00:00.000000000 Z
11
+ date: 2024-12-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Less shifty support for tools based on IETF's ABNF
14
14
  email: cabo@tzi.org
15
15
  executables:
16
+ - abnfrob
17
+ - abnfrob~
16
18
  - abnftt
17
- - abnftt~
18
19
  extensions: []
19
20
  extra_rdoc_files: []
20
21
  files:
21
22
  - abnftt.gemspec
23
+ - bin/abnfrob
24
+ - bin/abnfrob~
22
25
  - bin/abnftt
23
- - bin/abnftt~
24
26
  - lib/abnfgrammar.rb
25
27
  - lib/abnftt.rb
28
+ - lib/abnftt/abnf-flattener.rb
29
+ - lib/abnftt/abnf-squasher.rb
30
+ - lib/abnftt/abnf-util.rb
31
+ - lib/abnftt/abnf-visitor.rb
32
+ - lib/abnftt/abnf-writer.rb
26
33
  homepage: http://github.com/cabo/abnftt
27
34
  licenses:
28
35
  - MIT
@@ -42,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
49
  - !ruby/object:Gem::Version
43
50
  version: '0'
44
51
  requirements: []
45
- rubygems_version: 3.4.10
52
+ rubygems_version: 3.5.14
46
53
  signing_key:
47
54
  specification_version: 4
48
55
  summary: RFC 5234+7405 ABNF to Treetop
data/bin/abnftt~ DELETED
@@ -1,135 +0,0 @@
1
- #!/usr/bin/env ruby -Ku
2
-
3
- require 'pp'
4
- require 'yaml'
5
- require 'treetop'
6
-
7
- Encoding.default_external = Encoding::UTF_8
8
-
9
- require 'abnf'
10
-
11
- class Treetop::Runtime::SyntaxNode
12
- def clean_abnf
13
- if elements
14
- elements.map {|el| el.clean_abnf}.join
15
- else
16
- text_value
17
- end
18
- end
19
- def ast
20
- fail "undefined_ast #{inspect}"
21
- end
22
- def ast_from_percent(base, first, second)
23
- c1 = first.to_i(base).chr(Encoding::UTF_8)
24
- case second[0]
25
- when nil
26
- ["cs", c1]
27
- when "-"
28
- c2 = second[1..-1].to_i(base).chr(Encoding::UTF_8)
29
- ["char-range", c1, c2]
30
- when "."
31
- el = second.split(".")
32
- el[0] = first
33
- ["cs", el.map {|c| c.to_i(base).chr(Encoding::UTF_8)}.join]
34
- else
35
- fail "ast_from_percent"
36
- end
37
- end
38
- end
39
-
40
- def to_treetop(ast)
41
- <<~EOS
42
- # Encoding: UTF-8
43
- grammar TESTME
44
- #{ast.map {|x| to_treetop0(x)}.join}
45
- end
46
- EOS
47
- end
48
- def to_treetop0(ast)
49
- fail ast.inspect unless ast[0] == "="
50
- <<~EOS
51
- rule #{to_treetop1(ast[1])}
52
- #{to_treetop1(ast[2])}
53
- end
54
- EOS
55
- end
56
- FIXUP_NAMES = Hash.new {|h, k| k}
57
- FIXUP_NAMES.merge!({
58
- "rule" => "r__rule",
59
- })
60
- def to_treetop1(ast)
61
- case ast
62
- when String
63
- FIXUP_NAMES[ast].gsub("-", "_")
64
- when Array
65
- case ast[0]
66
- when "alt" # ["alt", *a]
67
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" / ")})"
68
- when "seq" # ["seq", *a]
69
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" ")})"
70
- when "rep" # ["rep", s, e, a]
71
- t = to_treetop1(ast[3]) || "@@@"
72
- case [ast[1], ast[2]]
73
- when [0, 1]
74
- t + "?"
75
- when [0, true]
76
- t + "*"
77
- when [1, true]
78
- t + "+"
79
- else
80
- t + " #{ast[1]}..#{ast[2] == true ? '' : ast[2]}"
81
- end
82
- when "prose" # ["prose", text]
83
- fail "prose not implemented #{ast.inspect}"
84
- when "ci" # ["ci", text]
85
- s = ast[1]
86
- if s =~ /\A[^A-Za-z]*\z/
87
- s.inspect
88
- else
89
- s.inspect << "i" # could do this always, but reduce noise
90
- end
91
- when "cs" # ["cs", text]
92
- ast[1].inspect
93
- when "char-range" # ["char-range", c1, c2]
94
- c1 = Regexp.quote(ast[1])
95
- c2 = Regexp.quote(ast[2])
96
- "[#{c1}-#{c2}]" # XXX does that always work
97
- when "im" # ["im", a, text]
98
- to_treetop1(ast[1]) + " " + ast[2]
99
- else
100
- fail "to_treetop(#{ast.inspect})"
101
- end
102
- else
103
- fail "to_treetop(#{ast.inspect})"
104
- end
105
- end
106
-
107
- unless fn = ARGV[0]
108
- warn "Usage: abnftt grammar.abnftt"
109
- exit 1
110
- end
111
- outfn = fn.sub(/\.abnftt\z/, "")
112
-
113
- parser = ABNFParser.new
114
- abnf_file = File.read(fn)
115
- ast = parser.parse abnf_file
116
- if ast
117
- # p ast
118
- File.open("#{outfn}.yaml", "w") do |f|
119
- f.puts ast.ast.to_yaml
120
- end
121
- # pp ast.ast
122
- File.open("#{outfn}.treetop", "w") do |f|
123
- f.puts to_treetop(ast.ast)
124
- end
125
- File.open("#{outfn}.abnf", "w") do |f|
126
- f.puts ast.clean_abnf
127
- end
128
- else
129
-
130
- puts parser.failure_reason
131
- parser.failure_reason =~ /^(Expected .+) after/m
132
- puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
133
- puts abnf_file.lines.to_a[parser.failure_line - 1]
134
- puts "#{'~' * (parser.failure_column - 1)}^"
135
- end