abnftt 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 152ef34be3428f0a7cda4f157ec08a1986f5cf295436be8186ee53d61eddb386
4
- data.tar.gz: fdc3504304d4e9c5244dcfce7a2e103e2801d3f6087de307bcd976a099fe0b5d
3
+ metadata.gz: 267bcebab4bd13c22da536e87b6ccd3444b214db589a5e9d4b054a56ffe3a686
4
+ data.tar.gz: cf4c222a9825e210e635989837ba43432e87661ba6e5e98a9a52aedcfbdad055
5
5
  SHA512:
6
- metadata.gz: e5b786e95c96464bf516e2d6ba00f1363a6a720d0dfd01b83401fef3ade2dc51116fda365881fb770fa9756f0ec71a14e7c6e85e7e3a7bc3589a930e8b9e5381
7
- data.tar.gz: '08eb748abf720cfdfe4a742e675e7b5b0144a60cdd2fce90f85783a2b7eac2d856e138f6536394dcccc123e72eb6fa8439b472b1e8ff26fff7630938445b2316'
6
+ metadata.gz: 28fe16c851f050e8072bdf939b45569fde9233fbeb26a2e43169fa04b1c66e85e43e893056faab3b87a4e5c49f2f7cea08274dd852c26f8d42688a85198eab57
7
+ data.tar.gz: 0f4977a6b13b8b4a621b594a31ad87d6759b5780bb59b98f7afc3ab4ac3e70fd2f9ee5cb9f710b8db1ce79e8f411734ec1517ad829456f3cd0f4a383f683d721
data/abnftt.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "abnftt"
3
- s.version = "0.2.4"
3
+ s.version = "0.2.6"
4
4
  s.summary = "RFC 5234+7405 ABNF to Treetop"
5
5
  s.description = %q{Less shifty support for tools based on IETF's ABNF}
6
6
  s.author = "Carsten Bormann"
data/bin/abnfrob ADDED
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env ruby -Ku
2
+ require 'pp'
3
+ require 'yaml'
4
+
5
+ Encoding.default_external = Encoding::UTF_8
6
+ require 'optparse'
7
+ require 'ostruct'
8
+
9
+ $options = OpenStruct.new
10
+ begin
11
+ op = OptionParser.new do |opts|
12
+ opts.version = "(from abnftt #{Gem.loaded_specs['abnftt'].version})" rescue "unknown-version"
13
+
14
+ opts.banner = "Usage: abnfrob [options] file.abnf"
15
+ opts.on("-b", "--bap=[OPTIONS]", "Pretty-print using bap") do |v|
16
+ $options.bap = true
17
+ $options.bap_options = v ? " #{v}" : ""
18
+ # warn "** bap_options #{$options.bap_options.inspect}"
19
+ end
20
+ opts.on("-tFMT", "--to=FMT", [:abnf, :json, :pp, :yaml], "Target format") do |v|
21
+ $options.target = v
22
+ end
23
+ opts.on("-y", "--yaml", "Output separate YAML copy of rules") do |v|
24
+ $options.yaml = v
25
+ end
26
+ opts.on("-a", "--asr33", "Line-break to fit on teletype") do |v|
27
+ $options.asr33 = v
28
+ end
29
+ opts.on("--squash=PREFIX", String, "Squash to app-prefix") do |v|
30
+ $options.squash = v
31
+ end
32
+ end
33
+ op.parse!
34
+ rescue StandardError => e
35
+ warn e
36
+ exit 1
37
+ end
38
+
39
+ require 'abnftt'
40
+
41
+ unless fn = ARGV[0]
42
+ warn op
43
+ exit 1
44
+ end
45
+ unless File.extname(fn) =~ /\A.abnf/
46
+ warn op
47
+ exit 1
48
+ end
49
+ outfn = (Pathname.new(File.dirname(fn)) + File.basename(fn, ".*")).to_s
50
+
51
+ parser = ABNFGrammarParser.new
52
+ abnf_file = File.read(fn)
53
+ ast = parser.parse abnf_file
54
+ unless ast
55
+ puts parser.failure_reason
56
+ parser.failure_reason =~ /^(Expected .+) after/m
57
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
58
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
59
+ puts "#{'~' * (parser.failure_column - 1)}^"
60
+ exit 1
61
+ end
62
+
63
+ abnf = ABNF.new(ast)
64
+
65
+ if $options.yaml
66
+ File.open("#{outfn}.yaml", "w") do |f|
67
+ f.puts abnf.rules.to_yaml
68
+ end
69
+ end
70
+
71
+ if $options.squash
72
+ require 'abnftt/abnf-flattener.rb'
73
+ require 'abnftt/abnf-util.rb'
74
+ require 'abnftt/abnf-squasher.rb'
75
+
76
+ prefix = $options.squash + "-"
77
+
78
+ abnf.flatten_ops
79
+ abnf.flatten_strings
80
+
81
+ abnf.squash_edn_levels
82
+ abnf.char_range_to_string
83
+
84
+ abnf.share_hex("sq")
85
+
86
+ abnf.share_alt("sq")
87
+
88
+ abnf.rules.replace(Hash[abnf.rules.map do |k, v|
89
+ [k.sub(/^(?:#{prefix})?/, prefix), abnf.visit(v) do |prod|
90
+ if String === prod
91
+ [true, prod.sub(/^(?:#{prefix})?/, prefix)]
92
+ end
93
+ end]
94
+ end])
95
+
96
+ rule1 = abnf.rules.first
97
+ outer_name = "sq-#{rule1[0]}"
98
+ outer_elements = ["seq",
99
+ ["cs", $options.squash+"'"],
100
+ rule1[0],
101
+ ["cs", "'"]
102
+ ]
103
+ abnf.rules.replace(a = Hash[[[outer_name, outer_elements],
104
+ *abnf.rules.to_a]])
105
+ File.open("#{outfn}-sq.yaml", "w") do |f|
106
+ f.puts abnf.rules.to_yaml
107
+ end
108
+ end
109
+
110
+ ## Work on abnf.rules
111
+
112
+ case $options.target
113
+ when :pp
114
+ pp abnf.rules
115
+ when :json
116
+ require 'neatjson'
117
+ puts JSON.neat_generate(abnf.rules, after_comma: 1, after_colon: 1)
118
+ when :yaml
119
+ puts abnf.rules.to_yaml
120
+ when :abnf, nil
121
+ require_relative '../lib/abnftt/abnf-writer.rb'
122
+ result = abnf.to_s
123
+
124
+ if $options.bap
125
+ require 'open3'
126
+ result, err, status =
127
+ Open3.capture3("bap -o RFC7405#{$options.bap_options}",
128
+ stdin_data: result)
129
+ warn err.gsub(/^/, "** ") unless err == ""
130
+ unless status.success?
131
+ warn "*** Giving up"
132
+ exit 1
133
+ end
134
+ end
135
+
136
+ if $options.asr33
137
+ result = abnf.breaker(result)
138
+ end
139
+
140
+ puts result
141
+ File.open("#{outfn}-sq.abnf", "w") do |f|
142
+ f.puts result
143
+ end
144
+ else
145
+ warn ["Unknown target format: ", $options.target].inspect
146
+ end
data/bin/abnfrob~ ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby -Ku
2
+
3
+ require 'pp'
4
+ require 'yaml'
5
+
6
+ Encoding.default_external = Encoding::UTF_8
7
+
8
+ require 'abnftt'
9
+
10
+ unless fn = ARGV[0]
11
+ warn "Usage: abnftt grammar.abnftt"
12
+ exit 1
13
+ end
14
+ outfn = fn.sub(/\.abnftt\z/, "")
15
+
16
+ parser = ABNFGrammarParser.new
17
+ abnf_file = File.read(fn)
18
+ ast = parser.parse abnf_file
19
+ if ast
20
+ # p ast
21
+ abnf = ABNF.new(ast)
22
+ File.open("#{outfn}.yaml", "w") do |f|
23
+ f.puts abnf.tree.to_yaml
24
+ end
25
+ # pp ast.ast
26
+ File.open("#{outfn}.treetop", "w") do |f|
27
+ modname = File.basename(outfn).gsub("-", "_").gsub(/[^_a-zA-Z0-9]/, "").upcase
28
+ f.puts abnf.to_treetop(modname)
29
+ end
30
+ File.open("#{outfn}.abnf", "w") do |f|
31
+ f.puts ast.clean_abnf.lines.map(&:rstrip).join("\n")
32
+ end
33
+ else
34
+
35
+ puts parser.failure_reason
36
+ parser.failure_reason =~ /^(Expected .+) after/m
37
+ puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
38
+ puts abnf_file.lines.to_a[parser.failure_line - 1]
39
+ puts "#{'~' * (parser.failure_column - 1)}^"
40
+ end
@@ -0,0 +1,32 @@
1
+ require "abnftt/abnf-visitor.rb"
2
+
3
+ class ABNF
4
+ def expand_op_into(s, op, out = [op])
5
+ s.each do |el|
6
+ case el
7
+ in [^op, *inner]
8
+ expand_op_into(inner, op, out)
9
+ else
10
+ out << flatten_ops_1(el)
11
+ end
12
+ end
13
+ out
14
+ end
15
+ def flatten_ops_1(prod)
16
+ visit(prod) do |here|
17
+ case here
18
+ in ["seq", *rest]
19
+ [true, expand_op_into(rest, "seq")]
20
+ in ["alt", *rest]
21
+ [true, expand_op_into(rest, "alt")]
22
+ else
23
+ false
24
+ end
25
+ end
26
+ end
27
+ def flatten_ops
28
+ rules.each do |name, prod|
29
+ rules[name] = flatten_ops_1(prod)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,57 @@
1
+ require "abnftt"
2
+ require "abnftt/abnf-flattener"
3
+ require "abnftt/abnf-util"
4
+ require "abnftt/abnf-visitor"
5
+
6
+ class ABNF
7
+
8
+ UNESCAPED_SQSTR_RANGES =
9
+ # [[0xA, 0xA], [0x20, 0x21], [0x23, 0x26], -- but DQUOTE is allowed
10
+ [[0xA, 0xA], [0x20, 0x26], # "'"
11
+ # [0x28, 0x5b], [0x5d, 0x7e], [0xa0, 0xd7ff], -- but JSON allows 7F-9F
12
+ [0x28, 0x5b], [0x5d, 0xd7ff], # \
13
+ [0xe000, 0x10ffff]].map {|l, r|
14
+ [l.chr(Encoding::UTF_8), r.chr(Encoding::UTF_8)]
15
+ }
16
+
17
+ ESCAPED_SQSTR_MAPPINGS = [
18
+ ["\x08", "b"],
19
+ ["\x09", "t"],
20
+ ["\x0A", "n"],
21
+ ["\x0C", "f"],
22
+ ["\x0D", "r"],
23
+ ["\x27", "'"],
24
+ ["\x2F", "/"],
25
+ ["\x5C", "\\"]]
26
+
27
+ def squash_edn_levels_1(prod)
28
+ f1 = visit(prod) do |here|
29
+ case here
30
+ in ["char-range", c1, c2]
31
+ lit = UNESCAPED_SQSTR_RANGES.map { |u1, u2|
32
+ overlap(here, u1, u2) }.compact
33
+ esc = ESCAPED_SQSTR_MAPPINGS.map {|cv, ev|
34
+ if cv >= c1 && cv <= c2
35
+ ["seq", ["char-range", "\\", "\\"], ["char-range", ev, ev]]
36
+ end
37
+ }.compact
38
+ old = alt_ranges_legacy(c1.ord, c2.ord)
39
+ new = alt_ranges_modern(c1.ord, c2.ord)
40
+ oldnew = ["seq",
41
+ ["cs", "\\u"],
42
+ wrap_flat("alt", [old, new]) ]
43
+ [true, wrap_flat("alt", [*lit, *esc, oldnew].sort)]
44
+ else
45
+ false
46
+ end
47
+ end
48
+ flatten_ops_1(f1)
49
+ end
50
+
51
+ def squash_edn_levels
52
+ rules.each do |name, prod|
53
+ rules[name] = squash_edn_levels_1(prod)
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,371 @@
1
+ require "abnftt/abnf-visitor"
2
+ require "abnftt/abnf-flattener"
3
+
4
+ class ABNF
5
+
6
+ def wrap(head, all)
7
+ if all.size == 1
8
+ all.first
9
+ else
10
+ [head, *all]
11
+ end
12
+ end
13
+
14
+ def wrap_flat(head, all)
15
+ if all.size == 1
16
+ all.first
17
+ else
18
+ [head, *all.collect_concat {|el|
19
+ case el
20
+ in [^head, *rest]
21
+ rest
22
+ else
23
+ [el]
24
+ end
25
+ }]
26
+ end
27
+ end
28
+
29
+ def overlap(cr, l, r)
30
+ if cr[2] >= l && cr[1] <= r
31
+ ["char-range", [cr[1], l].max, [cr[2], r].min]
32
+ end
33
+ end
34
+
35
+ # Utilities for creating hexadecimal rules from unsigned integers
36
+
37
+ def hexdig_range(l, r)
38
+ alt = []
39
+ if l < 10
40
+ alt << ["char-range",
41
+ (l+0x30).chr(Encoding::UTF_8),
42
+ ([r, 9].min+0x30).chr(Encoding::UTF_8)]
43
+ end
44
+ if r >= 10
45
+ alt << ["char-range", ([l, 10].max+0x41-0xA).chr(Encoding::UTF_8),
46
+ (r+0x41-0xA).chr(Encoding::UTF_8)]
47
+ alt << ["char-range", ([l, 10].max+0x61-0xA).chr(Encoding::UTF_8),
48
+ (r+0x61-0xA).chr(Encoding::UTF_8)]
49
+ end
50
+ wrap("alt", alt)
51
+ end
52
+
53
+ # This assumes l and r are preprocessed to have single or full ranges except in one place
54
+ def hex_ranges(l, r, ndig = false)
55
+ ld = l.digits(16)
56
+ rd = r.digits(16)
57
+ ndig ||= rd.size
58
+ seq = []
59
+ (0...ndig).each do |dig|
60
+ seq << hexdig_range(ld[dig] || 0, rd[dig] || 0)
61
+ end
62
+ wrap("seq", seq.reverse)
63
+ end
64
+
65
+ # split range into passages that have the property needed for hex_ranges
66
+ def do_range(l, r, step = 4)
67
+ column = 0
68
+ while l <= r
69
+ mask = (1 << step * (column + 1)) - 1
70
+ new_r = l | mask
71
+ if new_r > r # right hand side: come down from mountain
72
+ while column >= 0
73
+ mask >>= step
74
+ new_r = (r + 1) & ~mask
75
+ yield l, new_r - 1, column + 1 if l != new_r
76
+ l = new_r
77
+ column -= 1
78
+ end
79
+ return
80
+ else
81
+ column += 1
82
+ if (l & mask) != 0
83
+ yield l, new_r, column
84
+ l = new_r + 1
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ # Support legacy JSON \u/\u\u and \u{...} hex unicode
91
+
92
+ def alt_ranges(l, r, step = 4, ndig = false)
93
+ alt = []
94
+ do_range(l.ord, r.ord, step) do |l, r, column|
95
+ alt << hex_ranges(l, r, ndig)
96
+ end
97
+ wrap("alt", alt.reverse) # work around prioritized choice
98
+ end
99
+
100
+ def alt_ranges_legacy(l, r)
101
+ alt = []
102
+ if l < 0x10000
103
+ alt << ["alt", alt_ranges(l, [r, 0xFFFF].min, 4, 4)]
104
+ end
105
+ if r >= 0x10000
106
+ l1 = [l, 0x10000].max - 0x10000
107
+ r1 = r - 0x10000
108
+ do_range(l1, r1, 10) do |l2, r2, column|
109
+ alt << ["seq",
110
+ alt_ranges((l2 >> 10) + 0xD800, (r2 >> 10) + 0xD800, 4, 4),
111
+ expand_string("\\u"),
112
+ alt_ranges((l2 & 0x3FF) + 0xDC00, (r2 & 0x3FF) + 0xDC00, 4, 4)]
113
+ end
114
+ end
115
+ wrap_flat("alt", alt)
116
+ end
117
+
118
+ def alt_ranges_modern(l, r, step = 4)
119
+ ["seq",
120
+ expand_string("{"),
121
+ ["rep", 0, true, ["cs","0"]],
122
+ alt_ranges(l, r, 4, false),
123
+ expand_string("}")]
124
+ end
125
+
126
+ # flatten_strings: reduce all strings to char-range/seq/alt
127
+
128
+ def expand_string(s, case_fold = false)
129
+ wrap("seq",
130
+ s.chars.map do |ch|
131
+ if case_fold &&
132
+ (u = ch.upcase; d = ch.downcase; u != d)
133
+ ["alt", expand_string(u), expand_string(d)]
134
+ else
135
+ ["char-range", ch, ch]
136
+ end
137
+ end)
138
+ end
139
+
140
+ def flatten_strings_1(prod)
141
+ f1 = visit(prod) do |here|
142
+ case here
143
+ in ["cs", string]
144
+ [true, expand_string(string, false)]
145
+ in ["ci", string]
146
+ [true, expand_string(string, true)]
147
+ else
148
+ false
149
+ end
150
+ end
151
+ merge_strings_1(flatten_ops_1(f1))
152
+ end
153
+
154
+
155
+ def merge_strings_1(prod)
156
+ visit(prod) do |here|
157
+ case here
158
+ in ["alt", *rest]
159
+ ranges = []
160
+ i = 0
161
+ while i < rest.size
162
+ case rest[i]
163
+ in ["char-range", _ic1, _ic2]
164
+ j = i
165
+ while j+1 < rest.size && (rest[j+1] in ["char-range", _jc1, _jc2])
166
+ j += 1
167
+ end
168
+ ranges << [i, j] if i != j # inclusive right
169
+ i = j
170
+ else
171
+ here[i+1] = merge_strings_1(rest[i]) # XXX could be part of a range
172
+ end
173
+ i += 1
174
+ end
175
+ ranges.reverse.each do |i, j|
176
+ sorted = here[i+1..j+1].sort
177
+ l = sorted.length
178
+ while l > 1
179
+ l -= 1 # index to last item
180
+ if sorted[l][1].ord == sorted[l-1][2].ord+1 # merge:
181
+ sorted[l-1..l] = [["char-range", sorted[l-1][1], sorted[l][2]]]
182
+ end
183
+ end
184
+ here[i+1..j+1] = sorted
185
+ end
186
+ [true, here]
187
+ else
188
+ false
189
+ end
190
+ end
191
+ end
192
+
193
+ def flatten_strings
194
+ rules.each do |name, prod|
195
+ rules[name] = flatten_strings_1(prod)
196
+ end
197
+ end
198
+
199
+ # Cleanup operations
200
+
201
+ def expand_range_into(s, op, out = [op])
202
+ s.each do |el|
203
+ case el
204
+ in [^op, *inner]
205
+ expand_range_into(inner, op, out)
206
+ else
207
+ out << char_range_to_string1(el)
208
+ end
209
+ end
210
+ out
211
+ end
212
+ def char_range_to_string1(prod)
213
+ visit(prod) do |here|
214
+ case here
215
+ in ["seq", *rest]
216
+ rest = expand_range_into(rest, "seq")
217
+ i = rest.size
218
+ while i > 1
219
+ if (rest[i-1] in ["cs", s2]) && (rest[i-2] in ["cs", s1])
220
+ rest[i-2..i-1] = [["cs", s1 + s2]]
221
+ end
222
+ i -= 1
223
+ end
224
+ [true, rest]
225
+ in ["char-range", chr, ^chr] if chr.between?(" ", "!") || chr.between?("#", "~")
226
+ [true, ["cs", chr]]
227
+ else
228
+ false
229
+ end
230
+ end
231
+ end
232
+ def char_range_to_string
233
+ rules.each do |name, prod|
234
+ rules[name] = ci_cs_merge(detect_ci(char_range_to_string1(prod)))
235
+ end
236
+ end
237
+
238
+ def detect_ci(prod)
239
+ visit(prod) do |here|
240
+ case here
241
+ in ["alt", ["cs", c1], ["cs", c2]] if c1.downcase == c2 && c2.upcase == c1
242
+ [true, ["ci", c1]]
243
+ else
244
+ false
245
+ end
246
+ end
247
+ end
248
+ def ci_compat(prod)
249
+ case prod
250
+ in ["ci", s]
251
+ s
252
+ in ["cs", s] if s =~ /\A[^A-Za-z]*\z/
253
+ s
254
+ else
255
+ nil
256
+ end
257
+ end
258
+ def ci_cs_merge(prod)
259
+ visit(prod) do |here|
260
+ case here
261
+ in ["seq", *rest]
262
+ rest = rest.map{|x| ci_cs_merge(x)}
263
+ i = rest.size
264
+ while i > 1
265
+ if (s2 = ci_compat(rest[i-1])) && (s1 = ci_compat(rest[i-2]))
266
+ rest[i-2..i-1] = [["ci", s1 + s2]]
267
+ end
268
+ i -= 1
269
+ end
270
+ [true, wrap_flat("seq", rest)]
271
+ else
272
+ false
273
+ end
274
+ end
275
+ end
276
+
277
+ def seq_rep(prod)
278
+ visit(prod) do |here|
279
+ case here
280
+ in ["seq", *rest]
281
+ rest = rest.map{|x| seq_rep(x)}
282
+ i = rest.size # behind last element
283
+ while i > 1
284
+ j = i - 1 # end of range
285
+ s_end = rest[j]
286
+ k = j # start of range
287
+ while k > 0 && rest[k-1] == s_end
288
+ k -= 1
289
+ end
290
+ if k != j
291
+ n = j - k + 1
292
+ rest[k..j] = [["rep", n, n, s_end]]
293
+ end
294
+ i = k # skip element k
295
+ end
296
+ [true, wrap_flat("seq", rest)]
297
+ else
298
+ false
299
+ end
300
+ end
301
+ end
302
+
303
+ # sharing
304
+ def count_alt(counter, prod)
305
+ visit(prod) do |here|
306
+ case here
307
+ in ["alt", *rest]
308
+ rest.each {|pr| count_alt(counter, pr)}
309
+ counter[here] += 1
310
+ else
311
+ false
312
+ end
313
+ end
314
+ end
315
+
316
+ def share_alt(prefix)
317
+ counter = Hash.new(0)
318
+ rules.each do |name, prod|
319
+ count_alt(counter, prod)
320
+ end
321
+ subs = {}
322
+ counter.to_a.select{|k, v| v > 2}.sort_by{|k, v| -v}.each_with_index do |(el, _count), i|
323
+ name = "#{prefix}-a#{i}"
324
+ rules[name] = el
325
+ subs[el] = name
326
+ end
327
+ rules.each do |name, prod|
328
+ count_alt(counter, prod)
329
+ end
330
+ rules.replace(Hash[rules.map do |k, v|
331
+ [k, seq_rep(visit(v) do |prod|
332
+ if (s = subs[prod]) && k != s
333
+ [true, s]
334
+ end
335
+ end)]
336
+ end])
337
+ end
338
+
339
+ def share_hex_1(prod, rules)
340
+ visit(prod) do |here|
341
+ case here
342
+ in ["alt",
343
+ ["char-range", c3l, "9"],
344
+ ["char-range", "A", c4r],
345
+ ["char-range", "a", c6r]] if c4r == c6r.upcase && c3l >= "0" && c6r <= "f"
346
+ name = "x#{c3l}#{c6r}"
347
+ rules[name] ||= here
348
+ [true, name]
349
+ in ["alt",
350
+ ["char-range", c4l, c4r],
351
+ ["char-range", c6l, c6r]] if c4r == c6r.upcase &&
352
+ c4l == c6l.upcase &&
353
+ c6l.between?("a", "f") &&
354
+ c6r.between?("a", "f")
355
+ name = "x#{c6l}#{c6r}"
356
+ rules[name] ||= here
357
+ [true, name]
358
+ else
359
+ false
360
+ end
361
+ end
362
+ end
363
+
364
+ def share_hex(_prefix)
365
+ newrules = {}
366
+ rules.each do |name, prod|
367
+ rules[name] = share_hex_1(prod, newrules)
368
+ end
369
+ rules.merge!(Hash[newrules.sort])
370
+ end
371
+ end
@@ -0,0 +1,26 @@
1
+ require_relative "../abnftt.rb"
2
+
3
+ class ABNF
4
+ def visit_all(prod_array, &block)
5
+ prod_array.map {|prod| visit(prod, &block)}
6
+ end
7
+ def visit(prod, &block)
8
+ done, ret = block.call(prod, &block)
9
+ if done
10
+ return ret
11
+ end
12
+
13
+ case prod
14
+ in ["alt", *prods]
15
+ ["alt", *visit_all(prods, &block)]
16
+ in ["tadd", *prods]
17
+ ["tadd", *visit_all(prods, &block)]
18
+ in ["seq", *prods]
19
+ ["seq", *visit_all(prods, &block)]
20
+ in ["rep", s, e, prod]
21
+ ["rep", s, e, visit(prod, &block)]
22
+ else
23
+ prod
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,106 @@
1
+ class ABNF
2
+
3
+ # return [precedence ((2 if seq needed)), string]
4
+
5
+ def stringify(s)
6
+ fail "Can't stringify #{s.inspect} yet" unless s =~ /\A[ !#-~]*\z/
7
+ %{"#{s}"}
8
+ end
9
+
10
+ def write_lhs(k)
11
+ k
12
+ end
13
+
14
+ # precedence:
15
+ # 1: / alt -> (type1)
16
+ # 2: »« seq
17
+ # 4: atomic
18
+
19
+ def prec_check(inner, targetprec, prec)
20
+ if targetprec >= prec
21
+ "(#{inner})"
22
+ else
23
+ inner
24
+ end
25
+ end
26
+
27
+ def write_rhs(v, targetprec = 0)
28
+ prec, ret =
29
+ case v
30
+ in String # this should really be ["name", id]
31
+ [4, v]
32
+ in ["name", id]
33
+ [4, id]
34
+ in ["alt" | "tadd", *types]
35
+ [1, types.map{write_rhs(_1, 1)}.join(" / ")]
36
+ in ["seq", *groups]
37
+ case groups.size
38
+ when 0; [4, ""] # XXX
39
+ else
40
+ [2, "#{groups.map{write_rhs(_1, 2)}.join(" ")}"]
41
+ end
42
+ in ["ci", s]
43
+ [4, stringify(s)]
44
+ in ["cs", s]
45
+ if s =~ /\A[^A-Za-z]*\z/
46
+ [4, stringify(s)]
47
+ else
48
+ [4, "%s" << stringify(s)] # reduce noise if no alphabetics
49
+ end
50
+ in ["char-range", c1, c2]
51
+ nc1 = "%02x" % c1.ord
52
+
53
+ nc2 = "%02x" % c2.ord
54
+ nc2add = "-#{nc2}" if nc2 != nc1
55
+ [4, "%x#{nc1}#{nc2add}"]
56
+ in ["rep", s, e, group]
57
+ if s == 0 && e == 1
58
+ [4, "[#{write_rhs(group)}]"]
59
+ else
60
+ occur = case [s, e]
61
+ in [1, 1]; ""
62
+ in [0, true]; "*"
63
+ in [n, ^n]; n.to_s
64
+ else
65
+ "#{s}*#{e != true ? e : ""}"
66
+ end
67
+ [4, "#{occur}#{write_rhs(group, 4)}"]
68
+ end
69
+ else
70
+ fail [:WRITE_NOMATCH, v].inspect
71
+ end
72
+ prec_check(ret, targetprec, prec)
73
+ end
74
+
75
+ def write_rule(k, v)
76
+ case v
77
+ in ["tadd", *_rest]
78
+ assign = "=/"
79
+ else
80
+ assign = "="
81
+ end
82
+ "#{write_lhs(k)} #{assign} #{write_rhs(v, 0)}"
83
+ end
84
+
85
+ def to_s
86
+ rules.map {|k, v| write_rule(k, v) }.join("\n").sub(/.\z/) {$& << "\n"}
87
+ end
88
+
89
+ # primitively break down lines so they fit on a teletype
90
+ def breaker(s, col = 69)
91
+ ret = ""
92
+ s.each_line do |*l|
93
+ while l[-1].size > col
94
+ breakpoint = l[-1][0...col].rindex(' ')
95
+ break unless breakpoint && breakpoint > 4
96
+ l[-1..-1] = [
97
+ l[-1][0...breakpoint],
98
+ " " << l[-1][breakpoint+1..-1]
99
+ ]
100
+ end
101
+ ret << l.join("\n")
102
+ end
103
+ ret
104
+ end
105
+
106
+ end
data/lib/abnftt.rb CHANGED
@@ -53,11 +53,17 @@ class ABNF
53
53
  ABNF.new(ast)
54
54
  end
55
55
 
56
+ def self.from_rules(r)
57
+ ABNF.new(nil, r)
58
+ end
59
+
56
60
  attr_accessor :ast, :rules, :tree
57
- def initialize(ast_)
58
- @ast = ast_
59
- @tree = ast.ast
60
- @rules = {}
61
+ def initialize(ast_ = nil, rules_ = {})
62
+ if ast_
63
+ @ast = ast_
64
+ @tree = ast.ast
65
+ end
66
+ @rules = rules_
61
67
  @tree.each do |x|
62
68
  op, name, val, rest = x
63
69
  fail rest if rest
@@ -72,7 +78,7 @@ class ABNF
72
78
  else
73
79
  val
74
80
  end
75
- end
81
+ end if @tree
76
82
  # warn "** rules #{rules.inspect}"
77
83
  end
78
84
 
metadata CHANGED
@@ -1,28 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: abnftt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carsten Bormann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-26 00:00:00.000000000 Z
11
+ date: 2024-12-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Less shifty support for tools based on IETF's ABNF
14
14
  email: cabo@tzi.org
15
15
  executables:
16
+ - abnfrob
17
+ - abnfrob~
16
18
  - abnftt
17
- - abnftt~
18
19
  extensions: []
19
20
  extra_rdoc_files: []
20
21
  files:
21
22
  - abnftt.gemspec
23
+ - bin/abnfrob
24
+ - bin/abnfrob~
22
25
  - bin/abnftt
23
- - bin/abnftt~
24
26
  - lib/abnfgrammar.rb
25
27
  - lib/abnftt.rb
28
+ - lib/abnftt/abnf-flattener.rb
29
+ - lib/abnftt/abnf-squasher.rb
30
+ - lib/abnftt/abnf-util.rb
31
+ - lib/abnftt/abnf-visitor.rb
32
+ - lib/abnftt/abnf-writer.rb
26
33
  homepage: http://github.com/cabo/abnftt
27
34
  licenses:
28
35
  - MIT
@@ -42,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
49
  - !ruby/object:Gem::Version
43
50
  version: '0'
44
51
  requirements: []
45
- rubygems_version: 3.4.10
52
+ rubygems_version: 3.5.14
46
53
  signing_key:
47
54
  specification_version: 4
48
55
  summary: RFC 5234+7405 ABNF to Treetop
data/bin/abnftt~ DELETED
@@ -1,135 +0,0 @@
1
- #!/usr/bin/env ruby -Ku
2
-
3
- require 'pp'
4
- require 'yaml'
5
- require 'treetop'
6
-
7
- Encoding.default_external = Encoding::UTF_8
8
-
9
- require 'abnf'
10
-
11
- class Treetop::Runtime::SyntaxNode
12
- def clean_abnf
13
- if elements
14
- elements.map {|el| el.clean_abnf}.join
15
- else
16
- text_value
17
- end
18
- end
19
- def ast
20
- fail "undefined_ast #{inspect}"
21
- end
22
- def ast_from_percent(base, first, second)
23
- c1 = first.to_i(base).chr(Encoding::UTF_8)
24
- case second[0]
25
- when nil
26
- ["cs", c1]
27
- when "-"
28
- c2 = second[1..-1].to_i(base).chr(Encoding::UTF_8)
29
- ["char-range", c1, c2]
30
- when "."
31
- el = second.split(".")
32
- el[0] = first
33
- ["cs", el.map {|c| c.to_i(base).chr(Encoding::UTF_8)}.join]
34
- else
35
- fail "ast_from_percent"
36
- end
37
- end
38
- end
39
-
40
- def to_treetop(ast)
41
- <<~EOS
42
- # Encoding: UTF-8
43
- grammar TESTME
44
- #{ast.map {|x| to_treetop0(x)}.join}
45
- end
46
- EOS
47
- end
48
- def to_treetop0(ast)
49
- fail ast.inspect unless ast[0] == "="
50
- <<~EOS
51
- rule #{to_treetop1(ast[1])}
52
- #{to_treetop1(ast[2])}
53
- end
54
- EOS
55
- end
56
- FIXUP_NAMES = Hash.new {|h, k| k}
57
- FIXUP_NAMES.merge!({
58
- "rule" => "r__rule",
59
- })
60
- def to_treetop1(ast)
61
- case ast
62
- when String
63
- FIXUP_NAMES[ast].gsub("-", "_")
64
- when Array
65
- case ast[0]
66
- when "alt" # ["alt", *a]
67
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" / ")})"
68
- when "seq" # ["seq", *a]
69
- "(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" ")})"
70
- when "rep" # ["rep", s, e, a]
71
- t = to_treetop1(ast[3]) || "@@@"
72
- case [ast[1], ast[2]]
73
- when [0, 1]
74
- t + "?"
75
- when [0, true]
76
- t + "*"
77
- when [1, true]
78
- t + "+"
79
- else
80
- t + " #{ast[1]}..#{ast[2] == true ? '' : ast[2]}"
81
- end
82
- when "prose" # ["prose", text]
83
- fail "prose not implemented #{ast.inspect}"
84
- when "ci" # ["ci", text]
85
- s = ast[1]
86
- if s =~ /\A[^A-Za-z]*\z/
87
- s.inspect
88
- else
89
- s.inspect << "i" # could do this always, but reduce noise
90
- end
91
- when "cs" # ["cs", text]
92
- ast[1].inspect
93
- when "char-range" # ["char-range", c1, c2]
94
- c1 = Regexp.quote(ast[1])
95
- c2 = Regexp.quote(ast[2])
96
- "[#{c1}-#{c2}]" # XXX does that always work
97
- when "im" # ["im", a, text]
98
- to_treetop1(ast[1]) + " " + ast[2]
99
- else
100
- fail "to_treetop(#{ast.inspect})"
101
- end
102
- else
103
- fail "to_treetop(#{ast.inspect})"
104
- end
105
- end
106
-
107
- unless fn = ARGV[0]
108
- warn "Usage: abnftt grammar.abnftt"
109
- exit 1
110
- end
111
- outfn = fn.sub(/\.abnftt\z/, "")
112
-
113
- parser = ABNFParser.new
114
- abnf_file = File.read(fn)
115
- ast = parser.parse abnf_file
116
- if ast
117
- # p ast
118
- File.open("#{outfn}.yaml", "w") do |f|
119
- f.puts ast.ast.to_yaml
120
- end
121
- # pp ast.ast
122
- File.open("#{outfn}.treetop", "w") do |f|
123
- f.puts to_treetop(ast.ast)
124
- end
125
- File.open("#{outfn}.abnf", "w") do |f|
126
- f.puts ast.clean_abnf
127
- end
128
- else
129
-
130
- puts parser.failure_reason
131
- parser.failure_reason =~ /^(Expected .+) after/m
132
- puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
133
- puts abnf_file.lines.to_a[parser.failure_line - 1]
134
- puts "#{'~' * (parser.failure_column - 1)}^"
135
- end