abnftt 0.2.4 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 152ef34be3428f0a7cda4f157ec08a1986f5cf295436be8186ee53d61eddb386
4
- data.tar.gz: fdc3504304d4e9c5244dcfce7a2e103e2801d3f6087de307bcd976a099fe0b5d
3
+ metadata.gz: 436afd21fea00a7369bf5dfdfdcc0a61b33ee0b14f482a6d1c00488a520c93c1
4
+ data.tar.gz: 93cb2255e048e4482c4245e9beb6bf635a79b253124359a8c0debbd2abb204b2
5
5
  SHA512:
6
- metadata.gz: e5b786e95c96464bf516e2d6ba00f1363a6a720d0dfd01b83401fef3ade2dc51116fda365881fb770fa9756f0ec71a14e7c6e85e7e3a7bc3589a930e8b9e5381
7
- data.tar.gz: '08eb748abf720cfdfe4a742e675e7b5b0144a60cdd2fce90f85783a2b7eac2d856e138f6536394dcccc123e72eb6fa8439b472b1e8ff26fff7630938445b2316'
6
+ metadata.gz: '09621b3970222f8de758894c6fa6c58821fca89980612234ca31cdedae11fcc1c2c9b2e1fd9edb873865169b640926e049388888a82da7baa4b1dfa972f37dde'
7
+ data.tar.gz: 4f77b32132f12ae7a2f6c91b250df5213fdb386c21ecd6fe02c6470115be7996448eefbd94f0e9cd833cc446a7a451c7ff65567069cd15695e6093c11505bd43
data/abnftt.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "abnftt"
3
- s.version = "0.2.4"
3
+ s.version = "0.2.7"
4
4
  s.summary = "RFC 5234+7405 ABNF to Treetop"
5
5
  s.description = %q{Less shifty support for tools based on IETF's ABNF}
6
6
  s.author = "Carsten Bormann"
data/bin/abnfrob ADDED
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env ruby -Ku
2
+ require 'pp'
3
+ require 'yaml'
4
+
5
+ Encoding.default_external = Encoding::UTF_8
6
+ require 'optparse'
7
+ require 'ostruct'
8
+
9
+ $options = OpenStruct.new
10
+ begin
11
+ op = OptionParser.new do |opts|
12
+ opts.version = "(from abnftt #{Gem.loaded_specs['abnftt'].version})" rescue "unknown-version"
13
+
14
+ opts.banner = "Usage: abnfrob [options] file.abnf"
15
+ opts.on("-b", "--bap=[OPTIONS]", "Pretty-print using bap") do |v|
16
+ $options.bap = true
17
+ $options.bap_options = v ? " #{v}" : ""
18
+ # warn "** bap_options #{$options.bap_options.inspect}"
19
+ end
20
+ opts.on("-tFMT", "--to=FMT", [:abnf, :json, :pp, :yaml], "Target format") do |v|
21
+ $options.target = v
22
+ end
23
+ opts.on("-y", "--yaml", "Output separate YAML copy of rules") do |v|
24
+ $options.yaml = v
25
+ end
26
+ opts.on("-a", "--asr33", "Line-break to fit on teletype") do |v|
27
+ $options.asr33 = v
28
+ end
29
+ opts.on("--ascii", "No escaping of ASCII printables %x20-7e") do |v|
30
+ $options.ascii = v
31
+ end
32
+ opts.on("--squash=PREFIX", String, "Squash to app-prefix") do |v|
33
+ $options.squash = v
34
+ end
35
+ end
36
+ op.parse!
37
+ rescue StandardError => e
38
+ warn e
39
+ exit 1
40
+ end
41
+
42
+ require 'abnftt'
43
+
44
+ unless fn = ARGV[0]
45
+ warn op
46
+ exit 1
47
+ end
48
+ unless File.extname(fn) =~ /\A.abnf/
49
+ warn op
50
+ exit 1
51
+ end
52
+ outfn = (Pathname.new(File.dirname(fn)) + File.basename(fn, ".*")).to_s
53
+
54
+ parser = ABNFGrammarParser.new
55
+ abnf_file = File.read(fn)
56
+ ast = parser.parse abnf_file
57
+ unless ast
58
+ puts parser.failure_reason
59
+ parser.failure_reason =~ /^(Expected .+) after/m
60
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
61
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
62
+ puts "#{'~' * (parser.failure_column - 1)}^"
63
+ exit 1
64
+ end
65
+
66
+ abnf = ABNF.new(ast)
67
+
68
+ if $options.yaml
69
+ File.open("#{outfn}.yaml", "w") do |f|
70
+ f.puts abnf.rules.to_yaml
71
+ end
72
+ end
73
+
74
+ if $options.squash
75
+ require 'abnftt/abnf-flattener.rb'
76
+ require 'abnftt/abnf-util.rb'
77
+ require 'abnftt/abnf-squasher.rb'
78
+
79
+ prefix = $options.squash + "-"
80
+
81
+ abnf.flatten_ops
82
+ abnf.flatten_strings
83
+
84
+ abnf.squash_edn_levels(ascii: $options.ascii)
85
+ abnf.char_range_to_string
86
+
87
+ abnf.share_hex("sq")
88
+
89
+ abnf.share_alt("sq")
90
+
91
+ abnf.rules.replace(Hash[abnf.rules.map do |k, v|
92
+ [k.sub(/^(?:#{prefix})?/, prefix), abnf.visit(v) do |prod|
93
+ if String === prod
94
+ [true, prod.sub(/^(?:#{prefix})?/, prefix)]
95
+ end
96
+ end]
97
+ end])
98
+
99
+ rule1 = abnf.rules.first
100
+ outer_name = "sq-#{rule1[0]}"
101
+ outer_elements = ["seq",
102
+ ["cs", $options.squash+"'"],
103
+ rule1[0],
104
+ ["cs", "'"]
105
+ ]
106
+ abnf.rules.replace(a = Hash[[[outer_name, outer_elements],
107
+ *abnf.rules.to_a]])
108
+ File.open("#{outfn}-sq.yaml", "w") do |f|
109
+ f.puts abnf.rules.to_yaml
110
+ end
111
+ end
112
+
113
+ ## Work on abnf.rules
114
+
115
+ case $options.target
116
+ when :pp
117
+ pp abnf.rules
118
+ when :json
119
+ require 'neatjson'
120
+ puts JSON.neat_generate(abnf.rules, after_comma: 1, after_colon: 1)
121
+ when :yaml
122
+ puts abnf.rules.to_yaml
123
+ when :abnf, nil
124
+ require_relative '../lib/abnftt/abnf-writer.rb'
125
+ result = abnf.to_s
126
+
127
+ if $options.bap
128
+ require 'open3'
129
+ result, err, status =
130
+ Open3.capture3("bap -o RFC7405#{$options.bap_options}",
131
+ stdin_data: result)
132
+ warn err.gsub(/^/, "** ") unless err == ""
133
+ unless status.success?
134
+ warn "*** Giving up"
135
+ exit 1
136
+ end
137
+ end
138
+
139
+ if $options.asr33
140
+ result = abnf.breaker(result)
141
+ end
142
+
143
+ puts result
144
+ File.open("#{outfn}-sq.abnf", "w") do |f|
145
+ f.puts result
146
+ end
147
+ else
148
+ warn ["Unknown target format: ", $options.target].inspect
149
+ end
data/bin/abnfrob~ ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby -Ku
2
+
3
+ require 'pp'
4
+ require 'yaml'
5
+
6
+ Encoding.default_external = Encoding::UTF_8
7
+
8
+ require 'abnftt'
9
+
10
+ unless fn = ARGV[0]
11
+ warn "Usage: abnftt grammar.abnftt"
12
+ exit 1
13
+ end
14
+ outfn = fn.sub(/\.abnftt\z/, "")
15
+
16
+ parser = ABNFGrammarParser.new
17
+ abnf_file = File.read(fn)
18
+ ast = parser.parse abnf_file
19
+ if ast
20
+ # p ast
21
+ abnf = ABNF.new(ast)
22
+ File.open("#{outfn}.yaml", "w") do |f|
23
+ f.puts abnf.tree.to_yaml
24
+ end
25
+ # pp ast.ast
26
+ File.open("#{outfn}.treetop", "w") do |f|
27
+ modname = File.basename(outfn).gsub("-", "_").gsub(/[^_a-zA-Z0-9]/, "").upcase
28
+ f.puts abnf.to_treetop(modname)
29
+ end
30
+ File.open("#{outfn}.abnf", "w") do |f|
31
+ f.puts ast.clean_abnf.lines.map(&:rstrip).join("\n")
32
+ end
33
+ else
34
+
35
+ puts parser.failure_reason
36
+ parser.failure_reason =~ /^(Expected .+) after/m
37
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
38
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
39
+ puts "#{'~' * (parser.failure_column - 1)}^"
40
+ end
@@ -0,0 +1,32 @@
1
+ require "abnftt/abnf-visitor.rb"
2
+
3
+ class ABNF
4
+ def expand_op_into(s, op, out = [op])
5
+ s.each do |el|
6
+ case el
7
+ in [^op, *inner]
8
+ expand_op_into(inner, op, out)
9
+ else
10
+ out << flatten_ops_1(el)
11
+ end
12
+ end
13
+ out
14
+ end
15
+ def flatten_ops_1(prod)
16
+ visit(prod) do |here|
17
+ case here
18
+ in ["seq", *rest]
19
+ [true, expand_op_into(rest, "seq")]
20
+ in ["alt", *rest]
21
+ [true, expand_op_into(rest, "alt")]
22
+ else
23
+ false
24
+ end
25
+ end
26
+ end
27
+ def flatten_ops
28
+ rules.each do |name, prod|
29
+ rules[name] = flatten_ops_1(prod)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,65 @@
1
+ require "abnftt"
2
+ require "abnftt/abnf-flattener"
3
+ require "abnftt/abnf-util"
4
+ require "abnftt/abnf-visitor"
5
+
6
+ class ABNF
7
+
8
+ UNESCAPED_SQSTR_RANGES =
9
+ # [[0xA, 0xA], [0x20, 0x21], [0x23, 0x26], -- but DQUOTE is allowed
10
+ [[0xA, 0xA], [0x20, 0x26], # "'"
11
+ # [0x28, 0x5b], [0x5d, 0x7e], [0xa0, 0xd7ff], -- but JSON allows 7F-9F
12
+ [0x28, 0x5b], [0x5d, 0xd7ff], # \
13
+ [0xe000, 0x10ffff]].map {|l, r|
14
+ [l.chr(Encoding::UTF_8), r.chr(Encoding::UTF_8)]
15
+ }
16
+
17
+ ESCAPED_SQSTR_MAPPINGS = [
18
+ ["\x08", "b"],
19
+ ["\x09", "t"],
20
+ ["\x0A", "n"],
21
+ ["\x0C", "f"],
22
+ ["\x0D", "r"],
23
+ ["\x27", "'"],
24
+ ["\x2F", "/"],
25
+ ["\x5C", "\\"]]
26
+
27
+ def squash_edn_levels_1(prod, **options)
28
+ f1 = visit(prod) do |here|
29
+ case here
30
+ in ["char-range", c1, c2]
31
+ lit = UNESCAPED_SQSTR_RANGES.map { |u1, u2|
32
+ overlap(here, u1, u2) }.compact
33
+ esc = ESCAPED_SQSTR_MAPPINGS.map {|cv, ev|
34
+ if cv >= c1 && cv <= c2
35
+ ["seq", ["char-range", "\\", "\\"], ["char-range", ev, ev]]
36
+ end
37
+ }.compact
38
+ u_escapes = []
39
+ if options[:ascii]
40
+ do_ranges_outside(here, " ", "~") do |l, r|
41
+ u_escapes << alt_ranges_legacy(l, r) # old
42
+ u_escapes << alt_ranges_modern(l, r) # new
43
+ end
44
+ else
45
+ u_escapes << alt_ranges_legacy(c1.ord, c2.ord) # old
46
+ u_escapes << alt_ranges_modern(c1.ord, c2.ord) # new
47
+ end
48
+ oldnew = ["seq",
49
+ ["cs", "\\u"],
50
+ wrap_flat("alt", u_escapes) ] if u_escapes != []
51
+ [true, wrap_flat("alt", [*lit, *esc, oldnew].compact.sort)]
52
+ else
53
+ false
54
+ end
55
+ end
56
+ flatten_ops_1(f1)
57
+ end
58
+
59
+ def squash_edn_levels(**options)
60
+ rules.each do |name, prod|
61
+ rules[name] = squash_edn_levels_1(prod, **options)
62
+ end
63
+ end
64
+
65
+ end
@@ -0,0 +1,388 @@
1
+ require "abnftt/abnf-visitor"
2
+ require "abnftt/abnf-flattener"
3
+
4
+ class ABNF
5
+
6
+ def wrap(head, all)
7
+ if all.size == 1
8
+ all.first
9
+ else
10
+ [head, *all]
11
+ end
12
+ end
13
+
14
+ def wrap_flat(head, all)
15
+ if all.size == 1
16
+ all.first
17
+ else
18
+ [head, *all.collect_concat {|el|
19
+ case el
20
+ in [^head, *rest]
21
+ rest
22
+ else
23
+ [el]
24
+ end
25
+ }]
26
+ end
27
+ end
28
+
29
+ def overlap(cr, l, r)
30
+ if cr[2] >= l && cr[1] <= r
31
+ ["char-range", [cr[1], l].max, [cr[2], r].min]
32
+ end
33
+ end
34
+
35
+ def do_ranges_outside(cr, l, r)
36
+ if cr[2] < l || cr[1] > r # outside
37
+ yield cr[1].ord, cr[2].ord
38
+ else
39
+ if cr[1] < l
40
+ yield cr[1].ord, l.ord - 1
41
+ end
42
+ if cr[2] > r
43
+ yield r.ord + 1, cr[2].ord
44
+ end
45
+ end
46
+ end
47
+
48
+ # Utilities for creating hexadecimal rules from unsigned integers
49
+
50
+ def hexdig_range(l, r)
51
+ alt = []
52
+ if l < 10
53
+ alt << ["char-range",
54
+ (l+0x30).chr(Encoding::UTF_8),
55
+ ([r, 9].min+0x30).chr(Encoding::UTF_8)]
56
+ end
57
+ if r >= 10
58
+ alt << ["char-range", ([l, 10].max+0x41-0xA).chr(Encoding::UTF_8),
59
+ (r+0x41-0xA).chr(Encoding::UTF_8)]
60
+ alt << ["char-range", ([l, 10].max+0x61-0xA).chr(Encoding::UTF_8),
61
+ (r+0x61-0xA).chr(Encoding::UTF_8)]
62
+ end
63
+ wrap("alt", alt)
64
+ end
65
+
66
+ # This assumes l and r are preprocessed to have single or full ranges except in one place
67
+ def hex_ranges(l, r, ndig = false)
68
+ ld = l.digits(16)
69
+ rd = r.digits(16)
70
+ ndig ||= rd.size
71
+ seq = []
72
+ (0...ndig).each do |dig|
73
+ seq << hexdig_range(ld[dig] || 0, rd[dig] || 0)
74
+ end
75
+ wrap("seq", seq.reverse)
76
+ end
77
+
78
+ # split range into passages that have the property needed for hex_ranges
79
+ def do_range(l, r, step = 4)
80
+ column = 0
81
+ while l <= r
82
+ mask = (1 << step * (column + 1)) - 1
83
+ new_r = l | mask
84
+ if new_r > r # right hand side: come down from mountain
85
+ while column >= 0
86
+ mask >>= step
87
+ new_r = (r + 1) & ~mask
88
+ yield l, new_r - 1, column + 1 if l != new_r
89
+ l = new_r
90
+ column -= 1
91
+ end
92
+ return
93
+ else
94
+ column += 1
95
+ if (l & mask) != 0
96
+ yield l, new_r, column
97
+ l = new_r + 1
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ # Support legacy JSON \u/\u\u and \u{...} hex unicode
104
+
105
+ def alt_ranges(l, r, step = 4, ndig = false)
106
+ alt = []
107
+ do_range(l.ord, r.ord, step) do |l, r, column|
108
+ alt << hex_ranges(l, r, ndig)
109
+ end
110
+ wrap("alt", alt.reverse) # work around prioritized choice
111
+ end
112
+
113
+ def alt_ranges_legacy(l, r)
114
+ alt = []
115
+ if l < 0x10000
116
+ alt << ["alt", alt_ranges(l, [r, 0xFFFF].min, 4, 4)]
117
+ end
118
+ if r >= 0x10000
119
+ l1 = [l, 0x10000].max - 0x10000
120
+ r1 = r - 0x10000
121
+ do_range(l1, r1, 10) do |l2, r2, column|
122
+ alt << ["seq",
123
+ alt_ranges((l2 >> 10) + 0xD800, (r2 >> 10) + 0xD800, 4, 4),
124
+ expand_string("\\u"),
125
+ alt_ranges((l2 & 0x3FF) + 0xDC00, (r2 & 0x3FF) + 0xDC00, 4, 4)]
126
+ end
127
+ end
128
+ wrap_flat("alt", alt)
129
+ end
130
+
131
+ def alt_ranges_modern(l, r, step = 4)
132
+ ["seq",
133
+ expand_string("{"),
134
+ ["rep", 0, true, ["cs","0"]],
135
+ alt_ranges(l, r, 4, false),
136
+ expand_string("}")]
137
+ end
138
+
139
+ # flatten_strings: reduce all strings to char-range/seq/alt
140
+
141
+ def expand_string(s, case_fold = false)
142
+ wrap("seq",
143
+ s.chars.map do |ch|
144
+ if case_fold &&
145
+ (u = ch.upcase; d = ch.downcase; u != d)
146
+ ["alt", expand_string(u), expand_string(d)]
147
+ else
148
+ ["char-range", ch, ch]
149
+ end
150
+ end)
151
+ end
152
+
153
+ def flatten_strings_1(prod)
154
+ f1 = visit(prod) do |here|
155
+ case here
156
+ in ["cs", string]
157
+ [true, expand_string(string, false)]
158
+ in ["ci", string]
159
+ [true, expand_string(string, true)]
160
+ else
161
+ false
162
+ end
163
+ end
164
+ merge_strings_1(flatten_ops_1(f1))
165
+ end
166
+
167
+
168
+ def merge_strings_1(prod)
169
+ visit(prod) do |here|
170
+ case here
171
+ in ["alt", *rest]
172
+ ranges = []
173
+ i = 0
174
+ while i < rest.size
175
+ case rest[i]
176
+ in ["char-range", _ic1, _ic2]
177
+ j = i
178
+ while j+1 < rest.size && (rest[j+1] in ["char-range", _jc1, _jc2])
179
+ j += 1
180
+ end
181
+ ranges << [i, j] if i != j # inclusive right
182
+ i = j
183
+ else
184
+ here[i+1] = merge_strings_1(rest[i]) # XXX could be part of a range
185
+ end
186
+ i += 1
187
+ end
188
+ ranges.reverse.each do |i, j|
189
+ sorted = here[i+1..j+1].sort
190
+ l = sorted.length
191
+ while l > 1
192
+ l -= 1 # index to last item
193
+ if sorted[l][1].ord == sorted[l-1][2].ord+1 # merge:
194
+ sorted[l-1..l] = [["char-range", sorted[l-1][1], sorted[l][2]]]
195
+ end
196
+ end
197
+ here[i+1..j+1] = sorted
198
+ end
199
+ [true, here]
200
+ else
201
+ false
202
+ end
203
+ end
204
+ end
205
+
206
+ def flatten_strings
207
+ rules.each do |name, prod|
208
+ rules[name] = flatten_strings_1(prod)
209
+ end
210
+ end
211
+
212
+ # Cleanup operations
213
+
214
+ def expand_range_into(s, op, out = [op])
215
+ s.each do |el|
216
+ case el
217
+ in [^op, *inner]
218
+ expand_range_into(inner, op, out)
219
+ else
220
+ out << char_range_to_string1(el)
221
+ end
222
+ end
223
+ out
224
+ end
225
+ def char_range_to_string1(prod)
226
+ visit(prod) do |here|
227
+ case here
228
+ in ["seq", *rest]
229
+ rest = expand_range_into(rest, "seq")
230
+ i = rest.size
231
+ while i > 1
232
+ if (rest[i-1] in ["cs", s2]) && (rest[i-2] in ["cs", s1])
233
+ rest[i-2..i-1] = [["cs", s1 + s2]]
234
+ end
235
+ i -= 1
236
+ end
237
+ [true, rest]
238
+ in ["char-range", chr, ^chr] if chr.between?(" ", "!") || chr.between?("#", "~")
239
+ [true, ["cs", chr]]
240
+ else
241
+ false
242
+ end
243
+ end
244
+ end
245
+ def char_range_to_string
246
+ rules.each do |name, prod|
247
+ rules[name] = ci_cs_merge(detect_ci(char_range_to_string1(prod)))
248
+ end
249
+ end
250
+
251
+ def detect_ci(prod)
252
+ visit(prod) do |here|
253
+ case here
254
+ in ["alt", ["cs", c1], ["cs", c2]] if c1.downcase == c2 && c2.upcase == c1
255
+ [true, ["ci", c1]]
256
+ else
257
+ false
258
+ end
259
+ end
260
+ end
261
+ def ci_compat(prod)
262
+ case prod
263
+ in ["ci", s]
264
+ s
265
+ in ["cs", s] if s =~ /\A[^A-Za-z]*\z/
266
+ s
267
+ else
268
+ nil
269
+ end
270
+ end
271
+ def ci_cs_merge(prod)
272
+ visit(prod) do |here|
273
+ case here
274
+ in ["seq", *rest]
275
+ rest = rest.map{|x| ci_cs_merge(x)}
276
+ i = rest.size
277
+ while i > 1
278
+ if (s2 = ci_compat(rest[i-1])) && (s1 = ci_compat(rest[i-2]))
279
+ rest[i-2..i-1] = [["ci", s1 + s2]]
280
+ end
281
+ i -= 1
282
+ end
283
+ [true, wrap_flat("seq", rest)]
284
+ else
285
+ false
286
+ end
287
+ end
288
+ end
289
+
290
+ def seq_rep(prod)
291
+ visit(prod) do |here|
292
+ case here
293
+ in ["seq", *rest]
294
+ rest = rest.map{|x| seq_rep(x)}
295
+ i = rest.size # behind last element
296
+ while i > 1
297
+ j = i - 1 # end of range
298
+ s_end = rest[j]
299
+ k = j # start of range
300
+ while k > 0 && rest[k-1] == s_end
301
+ k -= 1
302
+ end
303
+ if k != j
304
+ n = j - k + 1
305
+ rest[k..j] = [["rep", n, n, s_end]]
306
+ end
307
+ i = k # skip element k
308
+ end
309
+ [true, wrap_flat("seq", rest)]
310
+ else
311
+ false
312
+ end
313
+ end
314
+ end
315
+
316
+ # sharing
317
+ def count_alt(counter, prod)
318
+ visit(prod) do |here|
319
+ case here
320
+ in ["alt", *rest]
321
+ rest.each {|pr| count_alt(counter, pr)}
322
+ counter[here] += 1
323
+ else
324
+ false
325
+ end
326
+ end
327
+ end
328
+
329
+ def share_alt(prefix)
330
+ counter = Hash.new(0)
331
+ rules.each do |name, prod|
332
+ count_alt(counter, prod)
333
+ end
334
+ subs = {}
335
+ counter.to_a.select{|k, v| v > 2}.sort_by{|k, v| -v}.each_with_index do |(el, _count), i|
336
+ name = "#{prefix}-a#{i}"
337
+ rules[name] = el
338
+ subs[el] = name
339
+ end
340
+ rules.each do |name, prod|
341
+ count_alt(counter, prod)
342
+ end
343
+ rules.replace(Hash[rules.map do |k, v|
344
+ [k, seq_rep(visit(v) do |prod|
345
+ if (s = subs[prod]) && k != s
346
+ [true, s]
347
+ end
348
+ end)]
349
+ end])
350
+ end
351
+
352
+ def share_hex_1(prod, rules)
353
+ visit(prod) do |here|
354
+ case here
355
+ in ["alt",
356
+ ["char-range", c3l, "9"],
357
+ ["char-range", "A", c4r],
358
+ ["char-range", "a", c6r]] if c4r == c6r.upcase && c3l >= "0" && c6r <= "f"
359
+ name = "x#{c3l}#{c6r}"
360
+ rules[name] ||= here
361
+ [true, name]
362
+ in ["alt",
363
+ ["char-range", c4l, c4r],
364
+ ["char-range", c6l, c6r]] if c4r == c6r.upcase &&
365
+ c4l == c6l.upcase &&
366
+ c6l.between?("a", "f") &&
367
+ c6r.between?("a", "f")
368
+ name = "x#{c6l}#{c6r}"
369
+ rules[name] ||= here
370
+ [true, name]
371
+ in ["char-range", l, r] if l >= "0" && r <= "9"
372
+ name = "x#{l}#{r}"
373
+ rules[name] ||= here
374
+ [true, name]
375
+ else
376
+ false
377
+ end
378
+ end
379
+ end
380
+
381
+ def share_hex(_prefix)
382
+ newrules = {}
383
+ rules.each do |name, prod|
384
+ rules[name] = share_hex_1(prod, newrules)
385
+ end
386
+ rules.merge!(Hash[newrules.sort])
387
+ end
388
+ end
@@ -0,0 +1,26 @@
1
+ require_relative "../abnftt.rb"
2
+
3
+ class ABNF
4
+ def visit_all(prod_array, &block)
5
+ prod_array.map {|prod| visit(prod, &block)}
6
+ end
7
+ def visit(prod, &block)
8
+ done, ret = block.call(prod, &block)
9
+ if done
10
+ return ret
11
+ end
12
+
13
+ case prod
14
+ in ["alt", *prods]
15
+ ["alt", *visit_all(prods, &block)]
16
+ in ["tadd", *prods]
17
+ ["tadd", *visit_all(prods, &block)]
18
+ in ["seq", *prods]
19
+ ["seq", *visit_all(prods, &block)]
20
+ in ["rep", s, e, prod]
21
+ ["rep", s, e, visit(prod, &block)]
22
+ else
23
+ prod
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,106 @@
1
+ class ABNF
2
+
3
+ # return [precedence ((2 if seq needed)), string]
4
+
5
+ def stringify(s)
6
+ fail "Can't stringify #{s.inspect} yet" unless s =~ /\A[ !#-~]*\z/
7
+ %{"#{s}"}
8
+ end
9
+
10
+ def write_lhs(k)
11
+ k
12
+ end
13
+
14
+ # precedence:
15
+ # 1: / alt -> (type1)
16
+ # 2: »« seq
17
+ # 4: atomic
18
+
19
+ def prec_check(inner, targetprec, prec)
20
+ if targetprec >= prec
21
+ "(#{inner})"
22
+ else
23
+ inner
24
+ end
25
+ end
26
+
27
+ def write_rhs(v, targetprec = 0)
28
+ prec, ret =
29
+ case v
30
+ in String # this should really be ["name", id]
31
+ [4, v]
32
+ in ["name", id]
33
+ [4, id]
34
+ in ["alt" | "tadd", *types]
35
+ [1, types.map{write_rhs(_1, 1)}.join(" / ")]
36
+ in ["seq", *groups]
37
+ case groups.size
38
+ when 0; [4, ""] # XXX
39
+ else
40
+ [2, "#{groups.map{write_rhs(_1, 2)}.join(" ")}"]
41
+ end
42
+ in ["ci", s]
43
+ [4, stringify(s)]
44
+ in ["cs", s]
45
+ if s =~ /\A[^A-Za-z]*\z/
46
+ [4, stringify(s)]
47
+ else
48
+ [4, "%s" << stringify(s)] # reduce noise if no alphabetics
49
+ end
50
+ in ["char-range", c1, c2]
51
+ nc1 = "%02x" % c1.ord
52
+
53
+ nc2 = "%02x" % c2.ord
54
+ nc2add = "-#{nc2}" if nc2 != nc1
55
+ [4, "%x#{nc1}#{nc2add}"]
56
+ in ["rep", s, e, group]
57
+ if s == 0 && e == 1
58
+ [4, "[#{write_rhs(group)}]"]
59
+ else
60
+ occur = case [s, e]
61
+ in [1, 1]; ""
62
+ in [0, true]; "*"
63
+ in [n, ^n]; n.to_s
64
+ else
65
+ "#{s}*#{e != true ? e : ""}"
66
+ end
67
+ [4, "#{occur}#{write_rhs(group, 4)}"]
68
+ end
69
+ else
70
+ fail [:WRITE_NOMATCH, v].inspect
71
+ end
72
+ prec_check(ret, targetprec, prec)
73
+ end
74
+
75
+ def write_rule(k, v)
76
+ case v
77
+ in ["tadd", *_rest]
78
+ assign = "=/"
79
+ else
80
+ assign = "="
81
+ end
82
+ "#{write_lhs(k)} #{assign} #{write_rhs(v, 0)}"
83
+ end
84
+
85
+ def to_s
86
+ rules.map {|k, v| write_rule(k, v) }.join("\n").sub(/.\z/) {$& << "\n"}
87
+ end
88
+
89
+ # primitively break down lines so they fit on a teletype
90
+ def breaker(s, col = 69)
91
+ ret = ""
92
+ s.each_line do |*l|
93
+ while l[-1].size > col
94
+ breakpoint = l[-1][0...col].rindex(' ')
95
+ break unless breakpoint && breakpoint > 4
96
+ l[-1..-1] = [
97
+ l[-1][0...breakpoint],
98
+ " " << l[-1][breakpoint+1..-1]
99
+ ]
100
+ end
101
+ ret << l.join("\n")
102
+ end
103
+ ret
104
+ end
105
+
106
+ end
data/lib/abnftt.rb CHANGED
@@ -53,11 +53,17 @@ class ABNF
53
53
  ABNF.new(ast)
54
54
  end
55
55
 
56
+ def self.from_rules(r)
57
+ ABNF.new(nil, r)
58
+ end
59
+
56
60
  attr_accessor :ast, :rules, :tree
57
- def initialize(ast_)
58
- @ast = ast_
59
- @tree = ast.ast
60
- @rules = {}
61
+ def initialize(ast_ = nil, rules_ = {})
62
+ if ast_
63
+ @ast = ast_
64
+ @tree = ast.ast
65
+ end
66
+ @rules = rules_
61
67
  @tree.each do |x|
62
68
  op, name, val, rest = x
63
69
  fail rest if rest
@@ -72,7 +78,7 @@ class ABNF
72
78
  else
73
79
  val
74
80
  end
75
- end
81
+ end if @tree
76
82
  # warn "** rules #{rules.inspect}"
77
83
  end
78
84
 
metadata CHANGED
@@ -1,28 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: abnftt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carsten Bormann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-26 00:00:00.000000000 Z
11
+ date: 2025-01-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Less shifty support for tools based on IETF's ABNF
14
14
  email: cabo@tzi.org
15
15
  executables:
16
+ - abnfrob
17
+ - abnfrob~
16
18
  - abnftt
17
- - abnftt~
18
19
  extensions: []
19
20
  extra_rdoc_files: []
20
21
  files:
21
22
  - abnftt.gemspec
23
+ - bin/abnfrob
24
+ - bin/abnfrob~
22
25
  - bin/abnftt
23
- - bin/abnftt~
24
26
  - lib/abnfgrammar.rb
25
27
  - lib/abnftt.rb
28
+ - lib/abnftt/abnf-flattener.rb
29
+ - lib/abnftt/abnf-squasher.rb
30
+ - lib/abnftt/abnf-util.rb
31
+ - lib/abnftt/abnf-visitor.rb
32
+ - lib/abnftt/abnf-writer.rb
26
33
  homepage: http://github.com/cabo/abnftt
27
34
  licenses:
28
35
  - MIT
@@ -42,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
49
  - !ruby/object:Gem::Version
43
50
  version: '0'
44
51
  requirements: []
45
- rubygems_version: 3.4.10
52
+ rubygems_version: 3.5.14
46
53
  signing_key:
47
54
  specification_version: 4
48
55
  summary: RFC 5234+7405 ABNF to Treetop
data/bin/abnftt~ DELETED
@@ -1,135 +0,0 @@
1
- #!/usr/bin/env ruby -Ku
2
-
3
- require 'pp'
4
- require 'yaml'
5
- require 'treetop'
6
-
7
- Encoding.default_external = Encoding::UTF_8
8
-
9
- require 'abnf'
10
-
11
- class Treetop::Runtime::SyntaxNode
12
- def clean_abnf
13
- if elements
14
- elements.map {|el| el.clean_abnf}.join
15
- else
16
- text_value
17
- end
18
- end
19
- def ast
20
- fail "undefined_ast #{inspect}"
21
- end
22
- def ast_from_percent(base, first, second)
23
- c1 = first.to_i(base).chr(Encoding::UTF_8)
24
- case second[0]
25
- when nil
26
- ["cs", c1]
27
- when "-"
28
- c2 = second[1..-1].to_i(base).chr(Encoding::UTF_8)
29
- ["char-range", c1, c2]
30
- when "."
31
- el = second.split(".")
32
- el[0] = first
33
- ["cs", el.map {|c| c.to_i(base).chr(Encoding::UTF_8)}.join]
34
- else
35
- fail "ast_from_percent"
36
- end
37
- end
38
- end
39
-
40
- def to_treetop(ast)
41
- <<~EOS
42
- # Encoding: UTF-8
43
- grammar TESTME
44
- #{ast.map {|x| to_treetop0(x)}.join}
45
- end
46
- EOS
47
- end
48
- def to_treetop0(ast)
49
- fail ast.inspect unless ast[0] == "="
50
- <<~EOS
51
- rule #{to_treetop1(ast[1])}
52
- #{to_treetop1(ast[2])}
53
- end
54
- EOS
55
- end
56
- FIXUP_NAMES = Hash.new {|h, k| k}
57
- FIXUP_NAMES.merge!({
58
- "rule" => "r__rule",
59
- })
60
- def to_treetop1(ast)
61
- case ast
62
- when String
63
- FIXUP_NAMES[ast].gsub("-", "_")
64
- when Array
65
- case ast[0]
66
- when "alt" # ["alt", *a]
67
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" / ")})"
68
- when "seq" # ["seq", *a]
69
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" ")})"
70
- when "rep" # ["rep", s, e, a]
71
- t = to_treetop1(ast[3]) || "@@@"
72
- case [ast[1], ast[2]]
73
- when [0, 1]
74
- t + "?"
75
- when [0, true]
76
- t + "*"
77
- when [1, true]
78
- t + "+"
79
- else
80
- t + " #{ast[1]}..#{ast[2] == true ? '' : ast[2]}"
81
- end
82
- when "prose" # ["prose", text]
83
- fail "prose not implemented #{ast.inspect}"
84
- when "ci" # ["ci", text]
85
- s = ast[1]
86
- if s =~ /\A[^A-Za-z]*\z/
87
- s.inspect
88
- else
89
- s.inspect << "i" # could do this always, but reduce noise
90
- end
91
- when "cs" # ["cs", text]
92
- ast[1].inspect
93
- when "char-range" # ["char-range", c1, c2]
94
- c1 = Regexp.quote(ast[1])
95
- c2 = Regexp.quote(ast[2])
96
- "[#{c1}-#{c2}]" # XXX does that always work
97
- when "im" # ["im", a, text]
98
- to_treetop1(ast[1]) + " " + ast[2]
99
- else
100
- fail "to_treetop(#{ast.inspect})"
101
- end
102
- else
103
- fail "to_treetop(#{ast.inspect})"
104
- end
105
- end
106
-
107
- unless fn = ARGV[0]
108
- warn "Usage: abnftt grammar.abnftt"
109
- exit 1
110
- end
111
- outfn = fn.sub(/\.abnftt\z/, "")
112
-
113
- parser = ABNFParser.new
114
- abnf_file = File.read(fn)
115
- ast = parser.parse abnf_file
116
- if ast
117
- # p ast
118
- File.open("#{outfn}.yaml", "w") do |f|
119
- f.puts ast.ast.to_yaml
120
- end
121
- # pp ast.ast
122
- File.open("#{outfn}.treetop", "w") do |f|
123
- f.puts to_treetop(ast.ast)
124
- end
125
- File.open("#{outfn}.abnf", "w") do |f|
126
- f.puts ast.clean_abnf
127
- end
128
- else
129
-
130
- puts parser.failure_reason
131
- parser.failure_reason =~ /^(Expected .+) after/m
132
- puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
133
- puts abnf_file.lines.to_a[parser.failure_line - 1]
134
- puts "#{'~' * (parser.failure_column - 1)}^"
135
- end