abnftt 0.2.4 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 152ef34be3428f0a7cda4f157ec08a1986f5cf295436be8186ee53d61eddb386
4
- data.tar.gz: fdc3504304d4e9c5244dcfce7a2e103e2801d3f6087de307bcd976a099fe0b5d
3
+ metadata.gz: 436afd21fea00a7369bf5dfdfdcc0a61b33ee0b14f482a6d1c00488a520c93c1
4
+ data.tar.gz: 93cb2255e048e4482c4245e9beb6bf635a79b253124359a8c0debbd2abb204b2
5
5
  SHA512:
6
- metadata.gz: e5b786e95c96464bf516e2d6ba00f1363a6a720d0dfd01b83401fef3ade2dc51116fda365881fb770fa9756f0ec71a14e7c6e85e7e3a7bc3589a930e8b9e5381
7
- data.tar.gz: '08eb748abf720cfdfe4a742e675e7b5b0144a60cdd2fce90f85783a2b7eac2d856e138f6536394dcccc123e72eb6fa8439b472b1e8ff26fff7630938445b2316'
6
+ metadata.gz: '09621b3970222f8de758894c6fa6c58821fca89980612234ca31cdedae11fcc1c2c9b2e1fd9edb873865169b640926e049388888a82da7baa4b1dfa972f37dde'
7
+ data.tar.gz: 4f77b32132f12ae7a2f6c91b250df5213fdb386c21ecd6fe02c6470115be7996448eefbd94f0e9cd833cc446a7a451c7ff65567069cd15695e6093c11505bd43
data/abnftt.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "abnftt"
3
- s.version = "0.2.4"
3
+ s.version = "0.2.7"
4
4
  s.summary = "RFC 5234+7405 ABNF to Treetop"
5
5
  s.description = %q{Less shifty support for tools based on IETF's ABNF}
6
6
  s.author = "Carsten Bormann"
data/bin/abnfrob ADDED
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env ruby -Ku
2
+ require 'pp'
3
+ require 'yaml'
4
+
5
+ Encoding.default_external = Encoding::UTF_8
6
+ require 'optparse'
7
+ require 'ostruct'
8
+
9
+ $options = OpenStruct.new
10
+ begin
11
+ op = OptionParser.new do |opts|
12
+ opts.version = "(from abnftt #{Gem.loaded_specs['abnftt'].version})" rescue "unknown-version"
13
+
14
+ opts.banner = "Usage: abnfrob [options] file.abnf"
15
+ opts.on("-b", "--bap=[OPTIONS]", "Pretty-print using bap") do |v|
16
+ $options.bap = true
17
+ $options.bap_options = v ? " #{v}" : ""
18
+ # warn "** bap_options #{$options.bap_options.inspect}"
19
+ end
20
+ opts.on("-tFMT", "--to=FMT", [:abnf, :json, :pp, :yaml], "Target format") do |v|
21
+ $options.target = v
22
+ end
23
+ opts.on("-y", "--yaml", "Output separate YAML copy of rules") do |v|
24
+ $options.yaml = v
25
+ end
26
+ opts.on("-a", "--asr33", "Line-break to fit on teletype") do |v|
27
+ $options.asr33 = v
28
+ end
29
+ opts.on("--ascii", "No escaping of ASCII printables %x20-7e") do |v|
30
+ $options.ascii = v
31
+ end
32
+ opts.on("--squash=PREFIX", String, "Squash to app-prefix") do |v|
33
+ $options.squash = v
34
+ end
35
+ end
36
+ op.parse!
37
+ rescue StandardError => e
38
+ warn e
39
+ exit 1
40
+ end
41
+
42
+ require 'abnftt'
43
+
44
+ unless fn = ARGV[0]
45
+ warn op
46
+ exit 1
47
+ end
48
+ unless File.extname(fn) =~ /\A.abnf/
49
+ warn op
50
+ exit 1
51
+ end
52
+ outfn = (Pathname.new(File.dirname(fn)) + File.basename(fn, ".*")).to_s
53
+
54
+ parser = ABNFGrammarParser.new
55
+ abnf_file = File.read(fn)
56
+ ast = parser.parse abnf_file
57
+ unless ast
58
+ puts parser.failure_reason
59
+ parser.failure_reason =~ /^(Expected .+) after/m
60
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
61
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
62
+ puts "#{'~' * (parser.failure_column - 1)}^"
63
+ exit 1
64
+ end
65
+
66
+ abnf = ABNF.new(ast)
67
+
68
+ if $options.yaml
69
+ File.open("#{outfn}.yaml", "w") do |f|
70
+ f.puts abnf.rules.to_yaml
71
+ end
72
+ end
73
+
74
+ if $options.squash
75
+ require 'abnftt/abnf-flattener.rb'
76
+ require 'abnftt/abnf-util.rb'
77
+ require 'abnftt/abnf-squasher.rb'
78
+
79
+ prefix = $options.squash + "-"
80
+
81
+ abnf.flatten_ops
82
+ abnf.flatten_strings
83
+
84
+ abnf.squash_edn_levels(ascii: $options.ascii)
85
+ abnf.char_range_to_string
86
+
87
+ abnf.share_hex("sq")
88
+
89
+ abnf.share_alt("sq")
90
+
91
+ abnf.rules.replace(Hash[abnf.rules.map do |k, v|
92
+ [k.sub(/^(?:#{prefix})?/, prefix), abnf.visit(v) do |prod|
93
+ if String === prod
94
+ [true, prod.sub(/^(?:#{prefix})?/, prefix)]
95
+ end
96
+ end]
97
+ end])
98
+
99
+ rule1 = abnf.rules.first
100
+ outer_name = "sq-#{rule1[0]}"
101
+ outer_elements = ["seq",
102
+ ["cs", $options.squash+"'"],
103
+ rule1[0],
104
+ ["cs", "'"]
105
+ ]
106
+ abnf.rules.replace(a = Hash[[[outer_name, outer_elements],
107
+ *abnf.rules.to_a]])
108
+ File.open("#{outfn}-sq.yaml", "w") do |f|
109
+ f.puts abnf.rules.to_yaml
110
+ end
111
+ end
112
+
113
+ ## Work on abnf.rules
114
+
115
+ case $options.target
116
+ when :pp
117
+ pp abnf.rules
118
+ when :json
119
+ require 'neatjson'
120
+ puts JSON.neat_generate(abnf.rules, after_comma: 1, after_colon: 1)
121
+ when :yaml
122
+ puts abnf.rules.to_yaml
123
+ when :abnf, nil
124
+ require_relative '../lib/abnftt/abnf-writer.rb'
125
+ result = abnf.to_s
126
+
127
+ if $options.bap
128
+ require 'open3'
129
+ result, err, status =
130
+ Open3.capture3("bap -o RFC7405#{$options.bap_options}",
131
+ stdin_data: result)
132
+ warn err.gsub(/^/, "** ") unless err == ""
133
+ unless status.success?
134
+ warn "*** Giving up"
135
+ exit 1
136
+ end
137
+ end
138
+
139
+ if $options.asr33
140
+ result = abnf.breaker(result)
141
+ end
142
+
143
+ puts result
144
+ File.open("#{outfn}-sq.abnf", "w") do |f|
145
+ f.puts result
146
+ end
147
+ else
148
+ warn ["Unknown target format: ", $options.target].inspect
149
+ end
data/bin/abnfrob~ ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby -Ku
2
+
3
+ require 'pp'
4
+ require 'yaml'
5
+
6
+ Encoding.default_external = Encoding::UTF_8
7
+
8
+ require 'abnftt'
9
+
10
+ unless fn = ARGV[0]
11
+ warn "Usage: abnftt grammar.abnftt"
12
+ exit 1
13
+ end
14
+ outfn = fn.sub(/\.abnftt\z/, "")
15
+
16
+ parser = ABNFGrammarParser.new
17
+ abnf_file = File.read(fn)
18
+ ast = parser.parse abnf_file
19
+ if ast
20
+ # p ast
21
+ abnf = ABNF.new(ast)
22
+ File.open("#{outfn}.yaml", "w") do |f|
23
+ f.puts abnf.tree.to_yaml
24
+ end
25
+ # pp ast.ast
26
+ File.open("#{outfn}.treetop", "w") do |f|
27
+ modname = File.basename(outfn).gsub("-", "_").gsub(/[^_a-zA-Z0-9]/, "").upcase
28
+ f.puts abnf.to_treetop(modname)
29
+ end
30
+ File.open("#{outfn}.abnf", "w") do |f|
31
+ f.puts ast.clean_abnf.lines.map(&:rstrip).join("\n")
32
+ end
33
+ else
34
+
35
+ puts parser.failure_reason
36
+ parser.failure_reason =~ /^(Expected .+) after/m
37
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
38
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
39
+ puts "#{'~' * (parser.failure_column - 1)}^"
40
+ end
@@ -0,0 +1,32 @@
1
+ require "abnftt/abnf-visitor.rb"
2
+
3
+ class ABNF
4
+ def expand_op_into(s, op, out = [op])
5
+ s.each do |el|
6
+ case el
7
+ in [^op, *inner]
8
+ expand_op_into(inner, op, out)
9
+ else
10
+ out << flatten_ops_1(el)
11
+ end
12
+ end
13
+ out
14
+ end
15
+ def flatten_ops_1(prod)
16
+ visit(prod) do |here|
17
+ case here
18
+ in ["seq", *rest]
19
+ [true, expand_op_into(rest, "seq")]
20
+ in ["alt", *rest]
21
+ [true, expand_op_into(rest, "alt")]
22
+ else
23
+ false
24
+ end
25
+ end
26
+ end
27
+ def flatten_ops
28
+ rules.each do |name, prod|
29
+ rules[name] = flatten_ops_1(prod)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,65 @@
1
+ require "abnftt"
2
+ require "abnftt/abnf-flattener"
3
+ require "abnftt/abnf-util"
4
+ require "abnftt/abnf-visitor"
5
+
6
+ class ABNF
7
+
8
+ UNESCAPED_SQSTR_RANGES =
9
+ # [[0xA, 0xA], [0x20, 0x21], [0x23, 0x26], -- but DQUOTE is allowed
10
+ [[0xA, 0xA], [0x20, 0x26], # "'"
11
+ # [0x28, 0x5b], [0x5d, 0x7e], [0xa0, 0xd7ff], -- but JSON allows 7F-9F
12
+ [0x28, 0x5b], [0x5d, 0xd7ff], # \
13
+ [0xe000, 0x10ffff]].map {|l, r|
14
+ [l.chr(Encoding::UTF_8), r.chr(Encoding::UTF_8)]
15
+ }
16
+
17
+ ESCAPED_SQSTR_MAPPINGS = [
18
+ ["\x08", "b"],
19
+ ["\x09", "t"],
20
+ ["\x0A", "n"],
21
+ ["\x0C", "f"],
22
+ ["\x0D", "r"],
23
+ ["\x27", "'"],
24
+ ["\x2F", "/"],
25
+ ["\x5C", "\\"]]
26
+
27
+ def squash_edn_levels_1(prod, **options)
28
+ f1 = visit(prod) do |here|
29
+ case here
30
+ in ["char-range", c1, c2]
31
+ lit = UNESCAPED_SQSTR_RANGES.map { |u1, u2|
32
+ overlap(here, u1, u2) }.compact
33
+ esc = ESCAPED_SQSTR_MAPPINGS.map {|cv, ev|
34
+ if cv >= c1 && cv <= c2
35
+ ["seq", ["char-range", "\\", "\\"], ["char-range", ev, ev]]
36
+ end
37
+ }.compact
38
+ u_escapes = []
39
+ if options[:ascii]
40
+ do_ranges_outside(here, " ", "~") do |l, r|
41
+ u_escapes << alt_ranges_legacy(l, r) # old
42
+ u_escapes << alt_ranges_modern(l, r) # new
43
+ end
44
+ else
45
+ u_escapes << alt_ranges_legacy(c1.ord, c2.ord) # old
46
+ u_escapes << alt_ranges_modern(c1.ord, c2.ord) # new
47
+ end
48
+ oldnew = ["seq",
49
+ ["cs", "\\u"],
50
+ wrap_flat("alt", u_escapes) ] if u_escapes != []
51
+ [true, wrap_flat("alt", [*lit, *esc, oldnew].compact.sort)]
52
+ else
53
+ false
54
+ end
55
+ end
56
+ flatten_ops_1(f1)
57
+ end
58
+
59
+ def squash_edn_levels(**options)
60
+ rules.each do |name, prod|
61
+ rules[name] = squash_edn_levels_1(prod, **options)
62
+ end
63
+ end
64
+
65
+ end
@@ -0,0 +1,388 @@
1
+ require "abnftt/abnf-visitor"
2
+ require "abnftt/abnf-flattener"
3
+
4
+ class ABNF
5
+
6
+ def wrap(head, all)
7
+ if all.size == 1
8
+ all.first
9
+ else
10
+ [head, *all]
11
+ end
12
+ end
13
+
14
+ def wrap_flat(head, all)
15
+ if all.size == 1
16
+ all.first
17
+ else
18
+ [head, *all.collect_concat {|el|
19
+ case el
20
+ in [^head, *rest]
21
+ rest
22
+ else
23
+ [el]
24
+ end
25
+ }]
26
+ end
27
+ end
28
+
29
+ def overlap(cr, l, r)
30
+ if cr[2] >= l && cr[1] <= r
31
+ ["char-range", [cr[1], l].max, [cr[2], r].min]
32
+ end
33
+ end
34
+
35
+ def do_ranges_outside(cr, l, r)
36
+ if cr[2] < l || cr[1] > r # outside
37
+ yield cr[1].ord, cr[2].ord
38
+ else
39
+ if cr[1] < l
40
+ yield cr[1].ord, l.ord - 1
41
+ end
42
+ if cr[2] > r
43
+ yield r.ord + 1, cr[2].ord
44
+ end
45
+ end
46
+ end
47
+
48
+ # Utilities for creating hexadecimal rules from unsigned integers
49
+
50
+ def hexdig_range(l, r)
51
+ alt = []
52
+ if l < 10
53
+ alt << ["char-range",
54
+ (l+0x30).chr(Encoding::UTF_8),
55
+ ([r, 9].min+0x30).chr(Encoding::UTF_8)]
56
+ end
57
+ if r >= 10
58
+ alt << ["char-range", ([l, 10].max+0x41-0xA).chr(Encoding::UTF_8),
59
+ (r+0x41-0xA).chr(Encoding::UTF_8)]
60
+ alt << ["char-range", ([l, 10].max+0x61-0xA).chr(Encoding::UTF_8),
61
+ (r+0x61-0xA).chr(Encoding::UTF_8)]
62
+ end
63
+ wrap("alt", alt)
64
+ end
65
+
66
+ # This assumes l and r are preprocessed to have single or full ranges except in one place
67
+ def hex_ranges(l, r, ndig = false)
68
+ ld = l.digits(16)
69
+ rd = r.digits(16)
70
+ ndig ||= rd.size
71
+ seq = []
72
+ (0...ndig).each do |dig|
73
+ seq << hexdig_range(ld[dig] || 0, rd[dig] || 0)
74
+ end
75
+ wrap("seq", seq.reverse)
76
+ end
77
+
78
+ # split range into passages that have the property needed for hex_ranges
79
+ def do_range(l, r, step = 4)
80
+ column = 0
81
+ while l <= r
82
+ mask = (1 << step * (column + 1)) - 1
83
+ new_r = l | mask
84
+ if new_r > r # right hand side: come down from mountain
85
+ while column >= 0
86
+ mask >>= step
87
+ new_r = (r + 1) & ~mask
88
+ yield l, new_r - 1, column + 1 if l != new_r
89
+ l = new_r
90
+ column -= 1
91
+ end
92
+ return
93
+ else
94
+ column += 1
95
+ if (l & mask) != 0
96
+ yield l, new_r, column
97
+ l = new_r + 1
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ # Support legacy JSON \u/\u\u and \u{...} hex unicode
104
+
105
+ def alt_ranges(l, r, step = 4, ndig = false)
106
+ alt = []
107
+ do_range(l.ord, r.ord, step) do |l, r, column|
108
+ alt << hex_ranges(l, r, ndig)
109
+ end
110
+ wrap("alt", alt.reverse) # work around prioritized choice
111
+ end
112
+
113
+ def alt_ranges_legacy(l, r)
114
+ alt = []
115
+ if l < 0x10000
116
+ alt << ["alt", alt_ranges(l, [r, 0xFFFF].min, 4, 4)]
117
+ end
118
+ if r >= 0x10000
119
+ l1 = [l, 0x10000].max - 0x10000
120
+ r1 = r - 0x10000
121
+ do_range(l1, r1, 10) do |l2, r2, column|
122
+ alt << ["seq",
123
+ alt_ranges((l2 >> 10) + 0xD800, (r2 >> 10) + 0xD800, 4, 4),
124
+ expand_string("\\u"),
125
+ alt_ranges((l2 & 0x3FF) + 0xDC00, (r2 & 0x3FF) + 0xDC00, 4, 4)]
126
+ end
127
+ end
128
+ wrap_flat("alt", alt)
129
+ end
130
+
131
+ def alt_ranges_modern(l, r, step = 4)
132
+ ["seq",
133
+ expand_string("{"),
134
+ ["rep", 0, true, ["cs","0"]],
135
+ alt_ranges(l, r, 4, false),
136
+ expand_string("}")]
137
+ end
138
+
139
+ # flatten_strings: reduce all strings to char-range/seq/alt
140
+
141
+ def expand_string(s, case_fold = false)
142
+ wrap("seq",
143
+ s.chars.map do |ch|
144
+ if case_fold &&
145
+ (u = ch.upcase; d = ch.downcase; u != d)
146
+ ["alt", expand_string(u), expand_string(d)]
147
+ else
148
+ ["char-range", ch, ch]
149
+ end
150
+ end)
151
+ end
152
+
153
+ def flatten_strings_1(prod)
154
+ f1 = visit(prod) do |here|
155
+ case here
156
+ in ["cs", string]
157
+ [true, expand_string(string, false)]
158
+ in ["ci", string]
159
+ [true, expand_string(string, true)]
160
+ else
161
+ false
162
+ end
163
+ end
164
+ merge_strings_1(flatten_ops_1(f1))
165
+ end
166
+
167
+
168
+ def merge_strings_1(prod)
169
+ visit(prod) do |here|
170
+ case here
171
+ in ["alt", *rest]
172
+ ranges = []
173
+ i = 0
174
+ while i < rest.size
175
+ case rest[i]
176
+ in ["char-range", _ic1, _ic2]
177
+ j = i
178
+ while j+1 < rest.size && (rest[j+1] in ["char-range", _jc1, _jc2])
179
+ j += 1
180
+ end
181
+ ranges << [i, j] if i != j # inclusive right
182
+ i = j
183
+ else
184
+ here[i+1] = merge_strings_1(rest[i]) # XXX could be part of a range
185
+ end
186
+ i += 1
187
+ end
188
+ ranges.reverse.each do |i, j|
189
+ sorted = here[i+1..j+1].sort
190
+ l = sorted.length
191
+ while l > 1
192
+ l -= 1 # index to last item
193
+ if sorted[l][1].ord == sorted[l-1][2].ord+1 # merge:
194
+ sorted[l-1..l] = [["char-range", sorted[l-1][1], sorted[l][2]]]
195
+ end
196
+ end
197
+ here[i+1..j+1] = sorted
198
+ end
199
+ [true, here]
200
+ else
201
+ false
202
+ end
203
+ end
204
+ end
205
+
206
+ def flatten_strings
207
+ rules.each do |name, prod|
208
+ rules[name] = flatten_strings_1(prod)
209
+ end
210
+ end
211
+
212
+ # Cleanup operations
213
+
214
+ def expand_range_into(s, op, out = [op])
215
+ s.each do |el|
216
+ case el
217
+ in [^op, *inner]
218
+ expand_range_into(inner, op, out)
219
+ else
220
+ out << char_range_to_string1(el)
221
+ end
222
+ end
223
+ out
224
+ end
225
+ def char_range_to_string1(prod)
226
+ visit(prod) do |here|
227
+ case here
228
+ in ["seq", *rest]
229
+ rest = expand_range_into(rest, "seq")
230
+ i = rest.size
231
+ while i > 1
232
+ if (rest[i-1] in ["cs", s2]) && (rest[i-2] in ["cs", s1])
233
+ rest[i-2..i-1] = [["cs", s1 + s2]]
234
+ end
235
+ i -= 1
236
+ end
237
+ [true, rest]
238
+ in ["char-range", chr, ^chr] if chr.between?(" ", "!") || chr.between?("#", "~")
239
+ [true, ["cs", chr]]
240
+ else
241
+ false
242
+ end
243
+ end
244
+ end
245
+ def char_range_to_string
246
+ rules.each do |name, prod|
247
+ rules[name] = ci_cs_merge(detect_ci(char_range_to_string1(prod)))
248
+ end
249
+ end
250
+
251
+ def detect_ci(prod)
252
+ visit(prod) do |here|
253
+ case here
254
+ in ["alt", ["cs", c1], ["cs", c2]] if c1.downcase == c2 && c2.upcase == c1
255
+ [true, ["ci", c1]]
256
+ else
257
+ false
258
+ end
259
+ end
260
+ end
261
+ def ci_compat(prod)
262
+ case prod
263
+ in ["ci", s]
264
+ s
265
+ in ["cs", s] if s =~ /\A[^A-Za-z]*\z/
266
+ s
267
+ else
268
+ nil
269
+ end
270
+ end
271
+ def ci_cs_merge(prod)
272
+ visit(prod) do |here|
273
+ case here
274
+ in ["seq", *rest]
275
+ rest = rest.map{|x| ci_cs_merge(x)}
276
+ i = rest.size
277
+ while i > 1
278
+ if (s2 = ci_compat(rest[i-1])) && (s1 = ci_compat(rest[i-2]))
279
+ rest[i-2..i-1] = [["ci", s1 + s2]]
280
+ end
281
+ i -= 1
282
+ end
283
+ [true, wrap_flat("seq", rest)]
284
+ else
285
+ false
286
+ end
287
+ end
288
+ end
289
+
290
+ def seq_rep(prod)
291
+ visit(prod) do |here|
292
+ case here
293
+ in ["seq", *rest]
294
+ rest = rest.map{|x| seq_rep(x)}
295
+ i = rest.size # behind last element
296
+ while i > 1
297
+ j = i - 1 # end of range
298
+ s_end = rest[j]
299
+ k = j # start of range
300
+ while k > 0 && rest[k-1] == s_end
301
+ k -= 1
302
+ end
303
+ if k != j
304
+ n = j - k + 1
305
+ rest[k..j] = [["rep", n, n, s_end]]
306
+ end
307
+ i = k # skip element k
308
+ end
309
+ [true, wrap_flat("seq", rest)]
310
+ else
311
+ false
312
+ end
313
+ end
314
+ end
315
+
316
+ # sharing
317
+ def count_alt(counter, prod)
318
+ visit(prod) do |here|
319
+ case here
320
+ in ["alt", *rest]
321
+ rest.each {|pr| count_alt(counter, pr)}
322
+ counter[here] += 1
323
+ else
324
+ false
325
+ end
326
+ end
327
+ end
328
+
329
+ def share_alt(prefix)
330
+ counter = Hash.new(0)
331
+ rules.each do |name, prod|
332
+ count_alt(counter, prod)
333
+ end
334
+ subs = {}
335
+ counter.to_a.select{|k, v| v > 2}.sort_by{|k, v| -v}.each_with_index do |(el, _count), i|
336
+ name = "#{prefix}-a#{i}"
337
+ rules[name] = el
338
+ subs[el] = name
339
+ end
340
+ rules.each do |name, prod|
341
+ count_alt(counter, prod)
342
+ end
343
+ rules.replace(Hash[rules.map do |k, v|
344
+ [k, seq_rep(visit(v) do |prod|
345
+ if (s = subs[prod]) && k != s
346
+ [true, s]
347
+ end
348
+ end)]
349
+ end])
350
+ end
351
+
352
+ def share_hex_1(prod, rules)
353
+ visit(prod) do |here|
354
+ case here
355
+ in ["alt",
356
+ ["char-range", c3l, "9"],
357
+ ["char-range", "A", c4r],
358
+ ["char-range", "a", c6r]] if c4r == c6r.upcase && c3l >= "0" && c6r <= "f"
359
+ name = "x#{c3l}#{c6r}"
360
+ rules[name] ||= here
361
+ [true, name]
362
+ in ["alt",
363
+ ["char-range", c4l, c4r],
364
+ ["char-range", c6l, c6r]] if c4r == c6r.upcase &&
365
+ c4l == c6l.upcase &&
366
+ c6l.between?("a", "f") &&
367
+ c6r.between?("a", "f")
368
+ name = "x#{c6l}#{c6r}"
369
+ rules[name] ||= here
370
+ [true, name]
371
+ in ["char-range", l, r] if l >= "0" && r <= "9"
372
+ name = "x#{l}#{r}"
373
+ rules[name] ||= here
374
+ [true, name]
375
+ else
376
+ false
377
+ end
378
+ end
379
+ end
380
+
381
+ def share_hex(_prefix)
382
+ newrules = {}
383
+ rules.each do |name, prod|
384
+ rules[name] = share_hex_1(prod, newrules)
385
+ end
386
+ rules.merge!(Hash[newrules.sort])
387
+ end
388
+ end
@@ -0,0 +1,26 @@
1
+ require_relative "../abnftt.rb"
2
+
3
+ class ABNF
4
+ def visit_all(prod_array, &block)
5
+ prod_array.map {|prod| visit(prod, &block)}
6
+ end
7
+ def visit(prod, &block)
8
+ done, ret = block.call(prod, &block)
9
+ if done
10
+ return ret
11
+ end
12
+
13
+ case prod
14
+ in ["alt", *prods]
15
+ ["alt", *visit_all(prods, &block)]
16
+ in ["tadd", *prods]
17
+ ["tadd", *visit_all(prods, &block)]
18
+ in ["seq", *prods]
19
+ ["seq", *visit_all(prods, &block)]
20
+ in ["rep", s, e, prod]
21
+ ["rep", s, e, visit(prod, &block)]
22
+ else
23
+ prod
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,106 @@
1
+ class ABNF
2
+
3
+ # return [precedence ((2 if seq needed)), string]
4
+
5
+ def stringify(s)
6
+ fail "Can't stringify #{s.inspect} yet" unless s =~ /\A[ !#-~]*\z/
7
+ %{"#{s}"}
8
+ end
9
+
10
+ def write_lhs(k)
11
+ k
12
+ end
13
+
14
+ # precedence:
15
+ # 1: / alt -> (type1)
16
+ # 2: »« seq
17
+ # 4: atomic
18
+
19
+ def prec_check(inner, targetprec, prec)
20
+ if targetprec >= prec
21
+ "(#{inner})"
22
+ else
23
+ inner
24
+ end
25
+ end
26
+
27
+ def write_rhs(v, targetprec = 0)
28
+ prec, ret =
29
+ case v
30
+ in String # this should really be ["name", id]
31
+ [4, v]
32
+ in ["name", id]
33
+ [4, id]
34
+ in ["alt" | "tadd", *types]
35
+ [1, types.map{write_rhs(_1, 1)}.join(" / ")]
36
+ in ["seq", *groups]
37
+ case groups.size
38
+ when 0; [4, ""] # XXX
39
+ else
40
+ [2, "#{groups.map{write_rhs(_1, 2)}.join(" ")}"]
41
+ end
42
+ in ["ci", s]
43
+ [4, stringify(s)]
44
+ in ["cs", s]
45
+ if s =~ /\A[^A-Za-z]*\z/
46
+ [4, stringify(s)]
47
+ else
48
+ [4, "%s" << stringify(s)] # reduce noise if no alphabetics
49
+ end
50
+ in ["char-range", c1, c2]
51
+ nc1 = "%02x" % c1.ord
52
+
53
+ nc2 = "%02x" % c2.ord
54
+ nc2add = "-#{nc2}" if nc2 != nc1
55
+ [4, "%x#{nc1}#{nc2add}"]
56
+ in ["rep", s, e, group]
57
+ if s == 0 && e == 1
58
+ [4, "[#{write_rhs(group)}]"]
59
+ else
60
+ occur = case [s, e]
61
+ in [1, 1]; ""
62
+ in [0, true]; "*"
63
+ in [n, ^n]; n.to_s
64
+ else
65
+ "#{s}*#{e != true ? e : ""}"
66
+ end
67
+ [4, "#{occur}#{write_rhs(group, 4)}"]
68
+ end
69
+ else
70
+ fail [:WRITE_NOMATCH, v].inspect
71
+ end
72
+ prec_check(ret, targetprec, prec)
73
+ end
74
+
75
+ def write_rule(k, v)
76
+ case v
77
+ in ["tadd", *_rest]
78
+ assign = "=/"
79
+ else
80
+ assign = "="
81
+ end
82
+ "#{write_lhs(k)} #{assign} #{write_rhs(v, 0)}"
83
+ end
84
+
85
+ def to_s
86
+ rules.map {|k, v| write_rule(k, v) }.join("\n").sub(/.\z/) {$& << "\n"}
87
+ end
88
+
89
+ # primitively break down lines so they fit on a teletype
90
+ def breaker(s, col = 69)
91
+ ret = ""
92
+ s.each_line do |*l|
93
+ while l[-1].size > col
94
+ breakpoint = l[-1][0...col].rindex(' ')
95
+ break unless breakpoint && breakpoint > 4
96
+ l[-1..-1] = [
97
+ l[-1][0...breakpoint],
98
+ " " << l[-1][breakpoint+1..-1]
99
+ ]
100
+ end
101
+ ret << l.join("\n")
102
+ end
103
+ ret
104
+ end
105
+
106
+ end
data/lib/abnftt.rb CHANGED
@@ -53,11 +53,17 @@ class ABNF
53
53
  ABNF.new(ast)
54
54
  end
55
55
 
56
+ def self.from_rules(r)
57
+ ABNF.new(nil, r)
58
+ end
59
+
56
60
  attr_accessor :ast, :rules, :tree
57
- def initialize(ast_)
58
- @ast = ast_
59
- @tree = ast.ast
60
- @rules = {}
61
+ def initialize(ast_ = nil, rules_ = {})
62
+ if ast_
63
+ @ast = ast_
64
+ @tree = ast.ast
65
+ end
66
+ @rules = rules_
61
67
  @tree.each do |x|
62
68
  op, name, val, rest = x
63
69
  fail rest if rest
@@ -72,7 +78,7 @@ class ABNF
72
78
  else
73
79
  val
74
80
  end
75
- end
81
+ end if @tree
76
82
  # warn "** rules #{rules.inspect}"
77
83
  end
78
84
 
metadata CHANGED
@@ -1,28 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: abnftt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carsten Bormann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-26 00:00:00.000000000 Z
11
+ date: 2025-01-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Less shifty support for tools based on IETF's ABNF
14
14
  email: cabo@tzi.org
15
15
  executables:
16
+ - abnfrob
17
+ - abnfrob~
16
18
  - abnftt
17
- - abnftt~
18
19
  extensions: []
19
20
  extra_rdoc_files: []
20
21
  files:
21
22
  - abnftt.gemspec
23
+ - bin/abnfrob
24
+ - bin/abnfrob~
22
25
  - bin/abnftt
23
- - bin/abnftt~
24
26
  - lib/abnfgrammar.rb
25
27
  - lib/abnftt.rb
28
+ - lib/abnftt/abnf-flattener.rb
29
+ - lib/abnftt/abnf-squasher.rb
30
+ - lib/abnftt/abnf-util.rb
31
+ - lib/abnftt/abnf-visitor.rb
32
+ - lib/abnftt/abnf-writer.rb
26
33
  homepage: http://github.com/cabo/abnftt
27
34
  licenses:
28
35
  - MIT
@@ -42,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
49
  - !ruby/object:Gem::Version
43
50
  version: '0'
44
51
  requirements: []
45
- rubygems_version: 3.4.10
52
+ rubygems_version: 3.5.14
46
53
  signing_key:
47
54
  specification_version: 4
48
55
  summary: RFC 5234+7405 ABNF to Treetop
data/bin/abnftt~ DELETED
@@ -1,135 +0,0 @@
1
- #!/usr/bin/env ruby -Ku
2
-
3
- require 'pp'
4
- require 'yaml'
5
- require 'treetop'
6
-
7
- Encoding.default_external = Encoding::UTF_8
8
-
9
- require 'abnf'
10
-
11
- class Treetop::Runtime::SyntaxNode
12
- def clean_abnf
13
- if elements
14
- elements.map {|el| el.clean_abnf}.join
15
- else
16
- text_value
17
- end
18
- end
19
- def ast
20
- fail "undefined_ast #{inspect}"
21
- end
22
- def ast_from_percent(base, first, second)
23
- c1 = first.to_i(base).chr(Encoding::UTF_8)
24
- case second[0]
25
- when nil
26
- ["cs", c1]
27
- when "-"
28
- c2 = second[1..-1].to_i(base).chr(Encoding::UTF_8)
29
- ["char-range", c1, c2]
30
- when "."
31
- el = second.split(".")
32
- el[0] = first
33
- ["cs", el.map {|c| c.to_i(base).chr(Encoding::UTF_8)}.join]
34
- else
35
- fail "ast_from_percent"
36
- end
37
- end
38
- end
39
-
40
- def to_treetop(ast)
41
- <<~EOS
42
- # Encoding: UTF-8
43
- grammar TESTME
44
- #{ast.map {|x| to_treetop0(x)}.join}
45
- end
46
- EOS
47
- end
48
- def to_treetop0(ast)
49
- fail ast.inspect unless ast[0] == "="
50
- <<~EOS
51
- rule #{to_treetop1(ast[1])}
52
- #{to_treetop1(ast[2])}
53
- end
54
- EOS
55
- end
56
- FIXUP_NAMES = Hash.new {|h, k| k}
57
- FIXUP_NAMES.merge!({
58
- "rule" => "r__rule",
59
- })
60
- def to_treetop1(ast)
61
- case ast
62
- when String
63
- FIXUP_NAMES[ast].gsub("-", "_")
64
- when Array
65
- case ast[0]
66
- when "alt" # ["alt", *a]
67
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" / ")})"
68
- when "seq" # ["seq", *a]
69
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" ")})"
70
- when "rep" # ["rep", s, e, a]
71
- t = to_treetop1(ast[3]) || "@@@"
72
- case [ast[1], ast[2]]
73
- when [0, 1]
74
- t + "?"
75
- when [0, true]
76
- t + "*"
77
- when [1, true]
78
- t + "+"
79
- else
80
- t + " #{ast[1]}..#{ast[2] == true ? '' : ast[2]}"
81
- end
82
- when "prose" # ["prose", text]
83
- fail "prose not implemented #{ast.inspect}"
84
- when "ci" # ["ci", text]
85
- s = ast[1]
86
- if s =~ /\A[^A-Za-z]*\z/
87
- s.inspect
88
- else
89
- s.inspect << "i" # could do this always, but reduce noise
90
- end
91
- when "cs" # ["cs", text]
92
- ast[1].inspect
93
- when "char-range" # ["char-range", c1, c2]
94
- c1 = Regexp.quote(ast[1])
95
- c2 = Regexp.quote(ast[2])
96
- "[#{c1}-#{c2}]" # XXX does that always work
97
- when "im" # ["im", a, text]
98
- to_treetop1(ast[1]) + " " + ast[2]
99
- else
100
- fail "to_treetop(#{ast.inspect})"
101
- end
102
- else
103
- fail "to_treetop(#{ast.inspect})"
104
- end
105
- end
106
-
107
- unless fn = ARGV[0]
108
- warn "Usage: abnftt grammar.abnftt"
109
- exit 1
110
- end
111
- outfn = fn.sub(/\.abnftt\z/, "")
112
-
113
- parser = ABNFParser.new
114
- abnf_file = File.read(fn)
115
- ast = parser.parse abnf_file
116
- if ast
117
- # p ast
118
- File.open("#{outfn}.yaml", "w") do |f|
119
- f.puts ast.ast.to_yaml
120
- end
121
- # pp ast.ast
122
- File.open("#{outfn}.treetop", "w") do |f|
123
- f.puts to_treetop(ast.ast)
124
- end
125
- File.open("#{outfn}.abnf", "w") do |f|
126
- f.puts ast.clean_abnf
127
- end
128
- else
129
-
130
- puts parser.failure_reason
131
- parser.failure_reason =~ /^(Expected .+) after/m
132
- puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
133
- puts abnf_file.lines.to_a[parser.failure_line - 1]
134
- puts "#{'~' * (parser.failure_column - 1)}^"
135
- end