abnftt 0.2.4 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/abnftt.gemspec +1 -1
- data/bin/abnfrob +146 -0
- data/bin/abnfrob~ +40 -0
- data/lib/abnftt/abnf-flattener.rb +32 -0
- data/lib/abnftt/abnf-squasher.rb +57 -0
- data/lib/abnftt/abnf-util.rb +371 -0
- data/lib/abnftt/abnf-visitor.rb +26 -0
- data/lib/abnftt/abnf-writer.rb +106 -0
- data/lib/abnftt.rb +11 -5
- metadata +12 -5
- data/bin/abnftt~ +0 -135
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 267bcebab4bd13c22da536e87b6ccd3444b214db589a5e9d4b054a56ffe3a686
|
4
|
+
data.tar.gz: cf4c222a9825e210e635989837ba43432e87661ba6e5e98a9a52aedcfbdad055
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28fe16c851f050e8072bdf939b45569fde9233fbeb26a2e43169fa04b1c66e85e43e893056faab3b87a4e5c49f2f7cea08274dd852c26f8d42688a85198eab57
|
7
|
+
data.tar.gz: 0f4977a6b13b8b4a621b594a31ad87d6759b5780bb59b98f7afc3ab4ac3e70fd2f9ee5cb9f710b8db1ce79e8f411734ec1517ad829456f3cd0f4a383f683d721
|
data/abnftt.gemspec
CHANGED
data/bin/abnfrob
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby -Ku
|
2
|
+
require 'pp'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Encoding.default_external = Encoding::UTF_8
|
6
|
+
require 'optparse'
|
7
|
+
require 'ostruct'
|
8
|
+
|
9
|
+
$options = OpenStruct.new
|
10
|
+
begin
|
11
|
+
op = OptionParser.new do |opts|
|
12
|
+
opts.version = "(from abnftt #{Gem.loaded_specs['abnftt'].version})" rescue "unknown-version"
|
13
|
+
|
14
|
+
opts.banner = "Usage: abnfrob [options] file.abnf"
|
15
|
+
opts.on("-b", "--bap=[OPTIONS]", "Pretty-print using bap") do |v|
|
16
|
+
$options.bap = true
|
17
|
+
$options.bap_options = v ? " #{v}" : ""
|
18
|
+
# warn "** bap_options #{$options.bap_options.inspect}"
|
19
|
+
end
|
20
|
+
opts.on("-tFMT", "--to=FMT", [:abnf, :json, :pp, :yaml], "Target format") do |v|
|
21
|
+
$options.target = v
|
22
|
+
end
|
23
|
+
opts.on("-y", "--yaml", "Output separate YAML copy of rules") do |v|
|
24
|
+
$options.yaml = v
|
25
|
+
end
|
26
|
+
opts.on("-a", "--asr33", "Line-break to fit on teletype") do |v|
|
27
|
+
$options.asr33 = v
|
28
|
+
end
|
29
|
+
opts.on("--squash=PREFIX", String, "Squash to app-prefix") do |v|
|
30
|
+
$options.squash = v
|
31
|
+
end
|
32
|
+
end
|
33
|
+
op.parse!
|
34
|
+
rescue StandardError => e
|
35
|
+
warn e
|
36
|
+
exit 1
|
37
|
+
end
|
38
|
+
|
39
|
+
require 'abnftt'
|
40
|
+
|
41
|
+
unless fn = ARGV[0]
|
42
|
+
warn op
|
43
|
+
exit 1
|
44
|
+
end
|
45
|
+
unless File.extname(fn) =~ /\A.abnf/
|
46
|
+
warn op
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
outfn = (Pathname.new(File.dirname(fn)) + File.basename(fn, ".*")).to_s
|
50
|
+
|
51
|
+
parser = ABNFGrammarParser.new
|
52
|
+
abnf_file = File.read(fn)
|
53
|
+
ast = parser.parse abnf_file
|
54
|
+
unless ast
|
55
|
+
puts parser.failure_reason
|
56
|
+
parser.failure_reason =~ /^(Expected .+) after/m
|
57
|
+
puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
|
58
|
+
puts abnf_file.lines.to_a[parser.failure_line - 1]
|
59
|
+
puts "#{'~' * (parser.failure_column - 1)}^"
|
60
|
+
exit 1
|
61
|
+
end
|
62
|
+
|
63
|
+
abnf = ABNF.new(ast)
|
64
|
+
|
65
|
+
if $options.yaml
|
66
|
+
File.open("#{outfn}.yaml", "w") do |f|
|
67
|
+
f.puts abnf.rules.to_yaml
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if $options.squash
|
72
|
+
require 'abnftt/abnf-flattener.rb'
|
73
|
+
require 'abnftt/abnf-util.rb'
|
74
|
+
require 'abnftt/abnf-squasher.rb'
|
75
|
+
|
76
|
+
prefix = $options.squash + "-"
|
77
|
+
|
78
|
+
abnf.flatten_ops
|
79
|
+
abnf.flatten_strings
|
80
|
+
|
81
|
+
abnf.squash_edn_levels
|
82
|
+
abnf.char_range_to_string
|
83
|
+
|
84
|
+
abnf.share_hex("sq")
|
85
|
+
|
86
|
+
abnf.share_alt("sq")
|
87
|
+
|
88
|
+
abnf.rules.replace(Hash[abnf.rules.map do |k, v|
|
89
|
+
[k.sub(/^(?:#{prefix})?/, prefix), abnf.visit(v) do |prod|
|
90
|
+
if String === prod
|
91
|
+
[true, prod.sub(/^(?:#{prefix})?/, prefix)]
|
92
|
+
end
|
93
|
+
end]
|
94
|
+
end])
|
95
|
+
|
96
|
+
rule1 = abnf.rules.first
|
97
|
+
outer_name = "sq-#{rule1[0]}"
|
98
|
+
outer_elements = ["seq",
|
99
|
+
["cs", $options.squash+"'"],
|
100
|
+
rule1[0],
|
101
|
+
["cs", "'"]
|
102
|
+
]
|
103
|
+
abnf.rules.replace(a = Hash[[[outer_name, outer_elements],
|
104
|
+
*abnf.rules.to_a]])
|
105
|
+
File.open("#{outfn}-sq.yaml", "w") do |f|
|
106
|
+
f.puts abnf.rules.to_yaml
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
## Work on abnf.rules
|
111
|
+
|
112
|
+
case $options.target
|
113
|
+
when :pp
|
114
|
+
pp abnf.rules
|
115
|
+
when :json
|
116
|
+
require 'neatjson'
|
117
|
+
puts JSON.neat_generate(abnf.rules, after_comma: 1, after_colon: 1)
|
118
|
+
when :yaml
|
119
|
+
puts abnf.rules.to_yaml
|
120
|
+
when :abnf, nil
|
121
|
+
require_relative '../lib/abnftt/abnf-writer.rb'
|
122
|
+
result = abnf.to_s
|
123
|
+
|
124
|
+
if $options.bap
|
125
|
+
require 'open3'
|
126
|
+
result, err, status =
|
127
|
+
Open3.capture3("bap -o RFC7405#{$options.bap_options}",
|
128
|
+
stdin_data: result)
|
129
|
+
warn err.gsub(/^/, "** ") unless err == ""
|
130
|
+
unless status.success?
|
131
|
+
warn "*** Giving up"
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
if $options.asr33
|
137
|
+
result = abnf.breaker(result)
|
138
|
+
end
|
139
|
+
|
140
|
+
puts result
|
141
|
+
File.open("#{outfn}-sq.abnf", "w") do |f|
|
142
|
+
f.puts result
|
143
|
+
end
|
144
|
+
else
|
145
|
+
warn ["Unknown target format: ", $options.target].inspect
|
146
|
+
end
|
data/bin/abnfrob~
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env ruby -Ku
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
Encoding.default_external = Encoding::UTF_8
|
7
|
+
|
8
|
+
require 'abnftt'
|
9
|
+
|
10
|
+
unless fn = ARGV[0]
|
11
|
+
warn "Usage: abnftt grammar.abnftt"
|
12
|
+
exit 1
|
13
|
+
end
|
14
|
+
outfn = fn.sub(/\.abnftt\z/, "")
|
15
|
+
|
16
|
+
parser = ABNFGrammarParser.new
|
17
|
+
abnf_file = File.read(fn)
|
18
|
+
ast = parser.parse abnf_file
|
19
|
+
if ast
|
20
|
+
# p ast
|
21
|
+
abnf = ABNF.new(ast)
|
22
|
+
File.open("#{outfn}.yaml", "w") do |f|
|
23
|
+
f.puts abnf.tree.to_yaml
|
24
|
+
end
|
25
|
+
# pp ast.ast
|
26
|
+
File.open("#{outfn}.treetop", "w") do |f|
|
27
|
+
modname = File.basename(outfn).gsub("-", "_").gsub(/[^_a-zA-Z0-9]/, "").upcase
|
28
|
+
f.puts abnf.to_treetop(modname)
|
29
|
+
end
|
30
|
+
File.open("#{outfn}.abnf", "w") do |f|
|
31
|
+
f.puts ast.clean_abnf.lines.map(&:rstrip).join("\n")
|
32
|
+
end
|
33
|
+
else
|
34
|
+
|
35
|
+
puts parser.failure_reason
|
36
|
+
parser.failure_reason =~ /^(Expected .+) after/m
|
37
|
+
puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
|
38
|
+
puts abnf_file.lines.to_a[parser.failure_line - 1]
|
39
|
+
puts "#{'~' * (parser.failure_column - 1)}^"
|
40
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "abnftt/abnf-visitor.rb"
|
2
|
+
|
3
|
+
class ABNF
|
4
|
+
def expand_op_into(s, op, out = [op])
|
5
|
+
s.each do |el|
|
6
|
+
case el
|
7
|
+
in [^op, *inner]
|
8
|
+
expand_op_into(inner, op, out)
|
9
|
+
else
|
10
|
+
out << flatten_ops_1(el)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
out
|
14
|
+
end
|
15
|
+
def flatten_ops_1(prod)
|
16
|
+
visit(prod) do |here|
|
17
|
+
case here
|
18
|
+
in ["seq", *rest]
|
19
|
+
[true, expand_op_into(rest, "seq")]
|
20
|
+
in ["alt", *rest]
|
21
|
+
[true, expand_op_into(rest, "alt")]
|
22
|
+
else
|
23
|
+
false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
def flatten_ops
|
28
|
+
rules.each do |name, prod|
|
29
|
+
rules[name] = flatten_ops_1(prod)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require "abnftt"
|
2
|
+
require "abnftt/abnf-flattener"
|
3
|
+
require "abnftt/abnf-util"
|
4
|
+
require "abnftt/abnf-visitor"
|
5
|
+
|
6
|
+
class ABNF
|
7
|
+
|
8
|
+
UNESCAPED_SQSTR_RANGES =
|
9
|
+
# [[0xA, 0xA], [0x20, 0x21], [0x23, 0x26], -- but DQUOTE is allowed
|
10
|
+
[[0xA, 0xA], [0x20, 0x26], # "'"
|
11
|
+
# [0x28, 0x5b], [0x5d, 0x7e], [0xa0, 0xd7ff], -- but JSON allows 7F-9F
|
12
|
+
[0x28, 0x5b], [0x5d, 0xd7ff], # \
|
13
|
+
[0xe000, 0x10ffff]].map {|l, r|
|
14
|
+
[l.chr(Encoding::UTF_8), r.chr(Encoding::UTF_8)]
|
15
|
+
}
|
16
|
+
|
17
|
+
ESCAPED_SQSTR_MAPPINGS = [
|
18
|
+
["\x08", "b"],
|
19
|
+
["\x09", "t"],
|
20
|
+
["\x0A", "n"],
|
21
|
+
["\x0C", "f"],
|
22
|
+
["\x0D", "r"],
|
23
|
+
["\x27", "'"],
|
24
|
+
["\x2F", "/"],
|
25
|
+
["\x5C", "\\"]]
|
26
|
+
|
27
|
+
def squash_edn_levels_1(prod)
|
28
|
+
f1 = visit(prod) do |here|
|
29
|
+
case here
|
30
|
+
in ["char-range", c1, c2]
|
31
|
+
lit = UNESCAPED_SQSTR_RANGES.map { |u1, u2|
|
32
|
+
overlap(here, u1, u2) }.compact
|
33
|
+
esc = ESCAPED_SQSTR_MAPPINGS.map {|cv, ev|
|
34
|
+
if cv >= c1 && cv <= c2
|
35
|
+
["seq", ["char-range", "\\", "\\"], ["char-range", ev, ev]]
|
36
|
+
end
|
37
|
+
}.compact
|
38
|
+
old = alt_ranges_legacy(c1.ord, c2.ord)
|
39
|
+
new = alt_ranges_modern(c1.ord, c2.ord)
|
40
|
+
oldnew = ["seq",
|
41
|
+
["cs", "\\u"],
|
42
|
+
wrap_flat("alt", [old, new]) ]
|
43
|
+
[true, wrap_flat("alt", [*lit, *esc, oldnew].sort)]
|
44
|
+
else
|
45
|
+
false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
flatten_ops_1(f1)
|
49
|
+
end
|
50
|
+
|
51
|
+
def squash_edn_levels
|
52
|
+
rules.each do |name, prod|
|
53
|
+
rules[name] = squash_edn_levels_1(prod)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,371 @@
|
|
1
|
+
require "abnftt/abnf-visitor"
|
2
|
+
require "abnftt/abnf-flattener"
|
3
|
+
|
4
|
+
class ABNF
|
5
|
+
|
6
|
+
def wrap(head, all)
|
7
|
+
if all.size == 1
|
8
|
+
all.first
|
9
|
+
else
|
10
|
+
[head, *all]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def wrap_flat(head, all)
|
15
|
+
if all.size == 1
|
16
|
+
all.first
|
17
|
+
else
|
18
|
+
[head, *all.collect_concat {|el|
|
19
|
+
case el
|
20
|
+
in [^head, *rest]
|
21
|
+
rest
|
22
|
+
else
|
23
|
+
[el]
|
24
|
+
end
|
25
|
+
}]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def overlap(cr, l, r)
|
30
|
+
if cr[2] >= l && cr[1] <= r
|
31
|
+
["char-range", [cr[1], l].max, [cr[2], r].min]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Utilities for creating hexadecimal rules from unsigned integers
|
36
|
+
|
37
|
+
def hexdig_range(l, r)
|
38
|
+
alt = []
|
39
|
+
if l < 10
|
40
|
+
alt << ["char-range",
|
41
|
+
(l+0x30).chr(Encoding::UTF_8),
|
42
|
+
([r, 9].min+0x30).chr(Encoding::UTF_8)]
|
43
|
+
end
|
44
|
+
if r >= 10
|
45
|
+
alt << ["char-range", ([l, 10].max+0x41-0xA).chr(Encoding::UTF_8),
|
46
|
+
(r+0x41-0xA).chr(Encoding::UTF_8)]
|
47
|
+
alt << ["char-range", ([l, 10].max+0x61-0xA).chr(Encoding::UTF_8),
|
48
|
+
(r+0x61-0xA).chr(Encoding::UTF_8)]
|
49
|
+
end
|
50
|
+
wrap("alt", alt)
|
51
|
+
end
|
52
|
+
|
53
|
+
# This assumes l and r are preprocessed to have single or full ranges except in one place
|
54
|
+
def hex_ranges(l, r, ndig = false)
|
55
|
+
ld = l.digits(16)
|
56
|
+
rd = r.digits(16)
|
57
|
+
ndig ||= rd.size
|
58
|
+
seq = []
|
59
|
+
(0...ndig).each do |dig|
|
60
|
+
seq << hexdig_range(ld[dig] || 0, rd[dig] || 0)
|
61
|
+
end
|
62
|
+
wrap("seq", seq.reverse)
|
63
|
+
end
|
64
|
+
|
65
|
+
# split range into passages that have the property needed for hex_ranges
|
66
|
+
def do_range(l, r, step = 4)
|
67
|
+
column = 0
|
68
|
+
while l <= r
|
69
|
+
mask = (1 << step * (column + 1)) - 1
|
70
|
+
new_r = l | mask
|
71
|
+
if new_r > r # right hand side: come down from mountain
|
72
|
+
while column >= 0
|
73
|
+
mask >>= step
|
74
|
+
new_r = (r + 1) & ~mask
|
75
|
+
yield l, new_r - 1, column + 1 if l != new_r
|
76
|
+
l = new_r
|
77
|
+
column -= 1
|
78
|
+
end
|
79
|
+
return
|
80
|
+
else
|
81
|
+
column += 1
|
82
|
+
if (l & mask) != 0
|
83
|
+
yield l, new_r, column
|
84
|
+
l = new_r + 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Support legacy JSON \u/\u\u and \u{...} hex unicode
|
91
|
+
|
92
|
+
def alt_ranges(l, r, step = 4, ndig = false)
|
93
|
+
alt = []
|
94
|
+
do_range(l.ord, r.ord, step) do |l, r, column|
|
95
|
+
alt << hex_ranges(l, r, ndig)
|
96
|
+
end
|
97
|
+
wrap("alt", alt.reverse) # work around prioritized choice
|
98
|
+
end
|
99
|
+
|
100
|
+
def alt_ranges_legacy(l, r)
|
101
|
+
alt = []
|
102
|
+
if l < 0x10000
|
103
|
+
alt << ["alt", alt_ranges(l, [r, 0xFFFF].min, 4, 4)]
|
104
|
+
end
|
105
|
+
if r >= 0x10000
|
106
|
+
l1 = [l, 0x10000].max - 0x10000
|
107
|
+
r1 = r - 0x10000
|
108
|
+
do_range(l1, r1, 10) do |l2, r2, column|
|
109
|
+
alt << ["seq",
|
110
|
+
alt_ranges((l2 >> 10) + 0xD800, (r2 >> 10) + 0xD800, 4, 4),
|
111
|
+
expand_string("\\u"),
|
112
|
+
alt_ranges((l2 & 0x3FF) + 0xDC00, (r2 & 0x3FF) + 0xDC00, 4, 4)]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
wrap_flat("alt", alt)
|
116
|
+
end
|
117
|
+
|
118
|
+
def alt_ranges_modern(l, r, step = 4)
|
119
|
+
["seq",
|
120
|
+
expand_string("{"),
|
121
|
+
["rep", 0, true, ["cs","0"]],
|
122
|
+
alt_ranges(l, r, 4, false),
|
123
|
+
expand_string("}")]
|
124
|
+
end
|
125
|
+
|
126
|
+
# flatten_strings: reduce all strings to char-range/seq/alt
|
127
|
+
|
128
|
+
def expand_string(s, case_fold = false)
|
129
|
+
wrap("seq",
|
130
|
+
s.chars.map do |ch|
|
131
|
+
if case_fold &&
|
132
|
+
(u = ch.upcase; d = ch.downcase; u != d)
|
133
|
+
["alt", expand_string(u), expand_string(d)]
|
134
|
+
else
|
135
|
+
["char-range", ch, ch]
|
136
|
+
end
|
137
|
+
end)
|
138
|
+
end
|
139
|
+
|
140
|
+
def flatten_strings_1(prod)
|
141
|
+
f1 = visit(prod) do |here|
|
142
|
+
case here
|
143
|
+
in ["cs", string]
|
144
|
+
[true, expand_string(string, false)]
|
145
|
+
in ["ci", string]
|
146
|
+
[true, expand_string(string, true)]
|
147
|
+
else
|
148
|
+
false
|
149
|
+
end
|
150
|
+
end
|
151
|
+
merge_strings_1(flatten_ops_1(f1))
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def merge_strings_1(prod)
|
156
|
+
visit(prod) do |here|
|
157
|
+
case here
|
158
|
+
in ["alt", *rest]
|
159
|
+
ranges = []
|
160
|
+
i = 0
|
161
|
+
while i < rest.size
|
162
|
+
case rest[i]
|
163
|
+
in ["char-range", _ic1, _ic2]
|
164
|
+
j = i
|
165
|
+
while j+1 < rest.size && (rest[j+1] in ["char-range", _jc1, _jc2])
|
166
|
+
j += 1
|
167
|
+
end
|
168
|
+
ranges << [i, j] if i != j # inclusive right
|
169
|
+
i = j
|
170
|
+
else
|
171
|
+
here[i+1] = merge_strings_1(rest[i]) # XXX could be part of a range
|
172
|
+
end
|
173
|
+
i += 1
|
174
|
+
end
|
175
|
+
ranges.reverse.each do |i, j|
|
176
|
+
sorted = here[i+1..j+1].sort
|
177
|
+
l = sorted.length
|
178
|
+
while l > 1
|
179
|
+
l -= 1 # index to last item
|
180
|
+
if sorted[l][1].ord == sorted[l-1][2].ord+1 # merge:
|
181
|
+
sorted[l-1..l] = [["char-range", sorted[l-1][1], sorted[l][2]]]
|
182
|
+
end
|
183
|
+
end
|
184
|
+
here[i+1..j+1] = sorted
|
185
|
+
end
|
186
|
+
[true, here]
|
187
|
+
else
|
188
|
+
false
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def flatten_strings
|
194
|
+
rules.each do |name, prod|
|
195
|
+
rules[name] = flatten_strings_1(prod)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Cleanup operations
|
200
|
+
|
201
|
+
def expand_range_into(s, op, out = [op])
|
202
|
+
s.each do |el|
|
203
|
+
case el
|
204
|
+
in [^op, *inner]
|
205
|
+
expand_range_into(inner, op, out)
|
206
|
+
else
|
207
|
+
out << char_range_to_string1(el)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
out
|
211
|
+
end
|
212
|
+
def char_range_to_string1(prod)
|
213
|
+
visit(prod) do |here|
|
214
|
+
case here
|
215
|
+
in ["seq", *rest]
|
216
|
+
rest = expand_range_into(rest, "seq")
|
217
|
+
i = rest.size
|
218
|
+
while i > 1
|
219
|
+
if (rest[i-1] in ["cs", s2]) && (rest[i-2] in ["cs", s1])
|
220
|
+
rest[i-2..i-1] = [["cs", s1 + s2]]
|
221
|
+
end
|
222
|
+
i -= 1
|
223
|
+
end
|
224
|
+
[true, rest]
|
225
|
+
in ["char-range", chr, ^chr] if chr.between?(" ", "!") || chr.between?("#", "~")
|
226
|
+
[true, ["cs", chr]]
|
227
|
+
else
|
228
|
+
false
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
def char_range_to_string
|
233
|
+
rules.each do |name, prod|
|
234
|
+
rules[name] = ci_cs_merge(detect_ci(char_range_to_string1(prod)))
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def detect_ci(prod)
|
239
|
+
visit(prod) do |here|
|
240
|
+
case here
|
241
|
+
in ["alt", ["cs", c1], ["cs", c2]] if c1.downcase == c2 && c2.upcase == c1
|
242
|
+
[true, ["ci", c1]]
|
243
|
+
else
|
244
|
+
false
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
def ci_compat(prod)
|
249
|
+
case prod
|
250
|
+
in ["ci", s]
|
251
|
+
s
|
252
|
+
in ["cs", s] if s =~ /\A[^A-Za-z]*\z/
|
253
|
+
s
|
254
|
+
else
|
255
|
+
nil
|
256
|
+
end
|
257
|
+
end
|
258
|
+
def ci_cs_merge(prod)
|
259
|
+
visit(prod) do |here|
|
260
|
+
case here
|
261
|
+
in ["seq", *rest]
|
262
|
+
rest = rest.map{|x| ci_cs_merge(x)}
|
263
|
+
i = rest.size
|
264
|
+
while i > 1
|
265
|
+
if (s2 = ci_compat(rest[i-1])) && (s1 = ci_compat(rest[i-2]))
|
266
|
+
rest[i-2..i-1] = [["ci", s1 + s2]]
|
267
|
+
end
|
268
|
+
i -= 1
|
269
|
+
end
|
270
|
+
[true, wrap_flat("seq", rest)]
|
271
|
+
else
|
272
|
+
false
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
def seq_rep(prod)
|
278
|
+
visit(prod) do |here|
|
279
|
+
case here
|
280
|
+
in ["seq", *rest]
|
281
|
+
rest = rest.map{|x| seq_rep(x)}
|
282
|
+
i = rest.size # behind last element
|
283
|
+
while i > 1
|
284
|
+
j = i - 1 # end of range
|
285
|
+
s_end = rest[j]
|
286
|
+
k = j # start of range
|
287
|
+
while k > 0 && rest[k-1] == s_end
|
288
|
+
k -= 1
|
289
|
+
end
|
290
|
+
if k != j
|
291
|
+
n = j - k + 1
|
292
|
+
rest[k..j] = [["rep", n, n, s_end]]
|
293
|
+
end
|
294
|
+
i = k # skip element k
|
295
|
+
end
|
296
|
+
[true, wrap_flat("seq", rest)]
|
297
|
+
else
|
298
|
+
false
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
# sharing
|
304
|
+
def count_alt(counter, prod)
|
305
|
+
visit(prod) do |here|
|
306
|
+
case here
|
307
|
+
in ["alt", *rest]
|
308
|
+
rest.each {|pr| count_alt(counter, pr)}
|
309
|
+
counter[here] += 1
|
310
|
+
else
|
311
|
+
false
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def share_alt(prefix)
|
317
|
+
counter = Hash.new(0)
|
318
|
+
rules.each do |name, prod|
|
319
|
+
count_alt(counter, prod)
|
320
|
+
end
|
321
|
+
subs = {}
|
322
|
+
counter.to_a.select{|k, v| v > 2}.sort_by{|k, v| -v}.each_with_index do |(el, _count), i|
|
323
|
+
name = "#{prefix}-a#{i}"
|
324
|
+
rules[name] = el
|
325
|
+
subs[el] = name
|
326
|
+
end
|
327
|
+
rules.each do |name, prod|
|
328
|
+
count_alt(counter, prod)
|
329
|
+
end
|
330
|
+
rules.replace(Hash[rules.map do |k, v|
|
331
|
+
[k, seq_rep(visit(v) do |prod|
|
332
|
+
if (s = subs[prod]) && k != s
|
333
|
+
[true, s]
|
334
|
+
end
|
335
|
+
end)]
|
336
|
+
end])
|
337
|
+
end
|
338
|
+
|
339
|
+
def share_hex_1(prod, rules)
|
340
|
+
visit(prod) do |here|
|
341
|
+
case here
|
342
|
+
in ["alt",
|
343
|
+
["char-range", c3l, "9"],
|
344
|
+
["char-range", "A", c4r],
|
345
|
+
["char-range", "a", c6r]] if c4r == c6r.upcase && c3l >= "0" && c6r <= "f"
|
346
|
+
name = "x#{c3l}#{c6r}"
|
347
|
+
rules[name] ||= here
|
348
|
+
[true, name]
|
349
|
+
in ["alt",
|
350
|
+
["char-range", c4l, c4r],
|
351
|
+
["char-range", c6l, c6r]] if c4r == c6r.upcase &&
|
352
|
+
c4l == c6l.upcase &&
|
353
|
+
c6l.between?("a", "f") &&
|
354
|
+
c6r.between?("a", "f")
|
355
|
+
name = "x#{c6l}#{c6r}"
|
356
|
+
rules[name] ||= here
|
357
|
+
[true, name]
|
358
|
+
else
|
359
|
+
false
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def share_hex(_prefix)
|
365
|
+
newrules = {}
|
366
|
+
rules.each do |name, prod|
|
367
|
+
rules[name] = share_hex_1(prod, newrules)
|
368
|
+
end
|
369
|
+
rules.merge!(Hash[newrules.sort])
|
370
|
+
end
|
371
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require_relative "../abnftt.rb"
|
2
|
+
|
3
|
+
class ABNF
|
4
|
+
def visit_all(prod_array, &block)
|
5
|
+
prod_array.map {|prod| visit(prod, &block)}
|
6
|
+
end
|
7
|
+
def visit(prod, &block)
|
8
|
+
done, ret = block.call(prod, &block)
|
9
|
+
if done
|
10
|
+
return ret
|
11
|
+
end
|
12
|
+
|
13
|
+
case prod
|
14
|
+
in ["alt", *prods]
|
15
|
+
["alt", *visit_all(prods, &block)]
|
16
|
+
in ["tadd", *prods]
|
17
|
+
["tadd", *visit_all(prods, &block)]
|
18
|
+
in ["seq", *prods]
|
19
|
+
["seq", *visit_all(prods, &block)]
|
20
|
+
in ["rep", s, e, prod]
|
21
|
+
["rep", s, e, visit(prod, &block)]
|
22
|
+
else
|
23
|
+
prod
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
class ABNF
|
2
|
+
|
3
|
+
# return [precedence ((2 if seq needed)), string]
|
4
|
+
|
5
|
+
def stringify(s)
|
6
|
+
fail "Can't stringify #{s.inspect} yet" unless s =~ /\A[ !#-~]*\z/
|
7
|
+
%{"#{s}"}
|
8
|
+
end
|
9
|
+
|
10
|
+
def write_lhs(k)
|
11
|
+
k
|
12
|
+
end
|
13
|
+
|
14
|
+
# precedence:
|
15
|
+
# 1: / alt -> (type1)
|
16
|
+
# 2: »« seq
|
17
|
+
# 4: atomic
|
18
|
+
|
19
|
+
def prec_check(inner, targetprec, prec)
|
20
|
+
if targetprec >= prec
|
21
|
+
"(#{inner})"
|
22
|
+
else
|
23
|
+
inner
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_rhs(v, targetprec = 0)
|
28
|
+
prec, ret =
|
29
|
+
case v
|
30
|
+
in String # this should really be ["name", id]
|
31
|
+
[4, v]
|
32
|
+
in ["name", id]
|
33
|
+
[4, id]
|
34
|
+
in ["alt" | "tadd", *types]
|
35
|
+
[1, types.map{write_rhs(_1, 1)}.join(" / ")]
|
36
|
+
in ["seq", *groups]
|
37
|
+
case groups.size
|
38
|
+
when 0; [4, ""] # XXX
|
39
|
+
else
|
40
|
+
[2, "#{groups.map{write_rhs(_1, 2)}.join(" ")}"]
|
41
|
+
end
|
42
|
+
in ["ci", s]
|
43
|
+
[4, stringify(s)]
|
44
|
+
in ["cs", s]
|
45
|
+
if s =~ /\A[^A-Za-z]*\z/
|
46
|
+
[4, stringify(s)]
|
47
|
+
else
|
48
|
+
[4, "%s" << stringify(s)] # reduce noise if no alphabetics
|
49
|
+
end
|
50
|
+
in ["char-range", c1, c2]
|
51
|
+
nc1 = "%02x" % c1.ord
|
52
|
+
|
53
|
+
nc2 = "%02x" % c2.ord
|
54
|
+
nc2add = "-#{nc2}" if nc2 != nc1
|
55
|
+
[4, "%x#{nc1}#{nc2add}"]
|
56
|
+
in ["rep", s, e, group]
|
57
|
+
if s == 0 && e == 1
|
58
|
+
[4, "[#{write_rhs(group)}]"]
|
59
|
+
else
|
60
|
+
occur = case [s, e]
|
61
|
+
in [1, 1]; ""
|
62
|
+
in [0, true]; "*"
|
63
|
+
in [n, ^n]; n.to_s
|
64
|
+
else
|
65
|
+
"#{s}*#{e != true ? e : ""}"
|
66
|
+
end
|
67
|
+
[4, "#{occur}#{write_rhs(group, 4)}"]
|
68
|
+
end
|
69
|
+
else
|
70
|
+
fail [:WRITE_NOMATCH, v].inspect
|
71
|
+
end
|
72
|
+
prec_check(ret, targetprec, prec)
|
73
|
+
end
|
74
|
+
|
75
|
+
def write_rule(k, v)
|
76
|
+
case v
|
77
|
+
in ["tadd", *_rest]
|
78
|
+
assign = "=/"
|
79
|
+
else
|
80
|
+
assign = "="
|
81
|
+
end
|
82
|
+
"#{write_lhs(k)} #{assign} #{write_rhs(v, 0)}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_s
|
86
|
+
rules.map {|k, v| write_rule(k, v) }.join("\n").sub(/.\z/) {$& << "\n"}
|
87
|
+
end
|
88
|
+
|
89
|
+
# primitively break down lines so they fit on a teletype
|
90
|
+
def breaker(s, col = 69)
|
91
|
+
ret = ""
|
92
|
+
s.each_line do |*l|
|
93
|
+
while l[-1].size > col
|
94
|
+
breakpoint = l[-1][0...col].rindex(' ')
|
95
|
+
break unless breakpoint && breakpoint > 4
|
96
|
+
l[-1..-1] = [
|
97
|
+
l[-1][0...breakpoint],
|
98
|
+
" " << l[-1][breakpoint+1..-1]
|
99
|
+
]
|
100
|
+
end
|
101
|
+
ret << l.join("\n")
|
102
|
+
end
|
103
|
+
ret
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
data/lib/abnftt.rb
CHANGED
@@ -53,11 +53,17 @@ class ABNF
|
|
53
53
|
ABNF.new(ast)
|
54
54
|
end
|
55
55
|
|
56
|
+
def self.from_rules(r)
|
57
|
+
ABNF.new(nil, r)
|
58
|
+
end
|
59
|
+
|
56
60
|
attr_accessor :ast, :rules, :tree
|
57
|
-
def initialize(ast_)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
+
def initialize(ast_ = nil, rules_ = {})
|
62
|
+
if ast_
|
63
|
+
@ast = ast_
|
64
|
+
@tree = ast.ast
|
65
|
+
end
|
66
|
+
@rules = rules_
|
61
67
|
@tree.each do |x|
|
62
68
|
op, name, val, rest = x
|
63
69
|
fail rest if rest
|
@@ -72,7 +78,7 @@ class ABNF
|
|
72
78
|
else
|
73
79
|
val
|
74
80
|
end
|
75
|
-
end
|
81
|
+
end if @tree
|
76
82
|
# warn "** rules #{rules.inspect}"
|
77
83
|
end
|
78
84
|
|
metadata
CHANGED
@@ -1,28 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: abnftt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Carsten Bormann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Less shifty support for tools based on IETF's ABNF
|
14
14
|
email: cabo@tzi.org
|
15
15
|
executables:
|
16
|
+
- abnfrob
|
17
|
+
- abnfrob~
|
16
18
|
- abnftt
|
17
|
-
- abnftt~
|
18
19
|
extensions: []
|
19
20
|
extra_rdoc_files: []
|
20
21
|
files:
|
21
22
|
- abnftt.gemspec
|
23
|
+
- bin/abnfrob
|
24
|
+
- bin/abnfrob~
|
22
25
|
- bin/abnftt
|
23
|
-
- bin/abnftt~
|
24
26
|
- lib/abnfgrammar.rb
|
25
27
|
- lib/abnftt.rb
|
28
|
+
- lib/abnftt/abnf-flattener.rb
|
29
|
+
- lib/abnftt/abnf-squasher.rb
|
30
|
+
- lib/abnftt/abnf-util.rb
|
31
|
+
- lib/abnftt/abnf-visitor.rb
|
32
|
+
- lib/abnftt/abnf-writer.rb
|
26
33
|
homepage: http://github.com/cabo/abnftt
|
27
34
|
licenses:
|
28
35
|
- MIT
|
@@ -42,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
49
|
- !ruby/object:Gem::Version
|
43
50
|
version: '0'
|
44
51
|
requirements: []
|
45
|
-
rubygems_version: 3.
|
52
|
+
rubygems_version: 3.5.14
|
46
53
|
signing_key:
|
47
54
|
specification_version: 4
|
48
55
|
summary: RFC 5234+7405 ABNF to Treetop
|
data/bin/abnftt~
DELETED
@@ -1,135 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby -Ku
|
2
|
-
|
3
|
-
require 'pp'
|
4
|
-
require 'yaml'
|
5
|
-
require 'treetop'
|
6
|
-
|
7
|
-
Encoding.default_external = Encoding::UTF_8
|
8
|
-
|
9
|
-
require 'abnf'
|
10
|
-
|
11
|
-
class Treetop::Runtime::SyntaxNode
|
12
|
-
def clean_abnf
|
13
|
-
if elements
|
14
|
-
elements.map {|el| el.clean_abnf}.join
|
15
|
-
else
|
16
|
-
text_value
|
17
|
-
end
|
18
|
-
end
|
19
|
-
def ast
|
20
|
-
fail "undefined_ast #{inspect}"
|
21
|
-
end
|
22
|
-
def ast_from_percent(base, first, second)
|
23
|
-
c1 = first.to_i(base).chr(Encoding::UTF_8)
|
24
|
-
case second[0]
|
25
|
-
when nil
|
26
|
-
["cs", c1]
|
27
|
-
when "-"
|
28
|
-
c2 = second[1..-1].to_i(base).chr(Encoding::UTF_8)
|
29
|
-
["char-range", c1, c2]
|
30
|
-
when "."
|
31
|
-
el = second.split(".")
|
32
|
-
el[0] = first
|
33
|
-
["cs", el.map {|c| c.to_i(base).chr(Encoding::UTF_8)}.join]
|
34
|
-
else
|
35
|
-
fail "ast_from_percent"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def to_treetop(ast)
|
41
|
-
<<~EOS
|
42
|
-
# Encoding: UTF-8
|
43
|
-
grammar TESTME
|
44
|
-
#{ast.map {|x| to_treetop0(x)}.join}
|
45
|
-
end
|
46
|
-
EOS
|
47
|
-
end
|
48
|
-
def to_treetop0(ast)
|
49
|
-
fail ast.inspect unless ast[0] == "="
|
50
|
-
<<~EOS
|
51
|
-
rule #{to_treetop1(ast[1])}
|
52
|
-
#{to_treetop1(ast[2])}
|
53
|
-
end
|
54
|
-
EOS
|
55
|
-
end
|
56
|
-
FIXUP_NAMES = Hash.new {|h, k| k}
|
57
|
-
FIXUP_NAMES.merge!({
|
58
|
-
"rule" => "r__rule",
|
59
|
-
})
|
60
|
-
def to_treetop1(ast)
|
61
|
-
case ast
|
62
|
-
when String
|
63
|
-
FIXUP_NAMES[ast].gsub("-", "_")
|
64
|
-
when Array
|
65
|
-
case ast[0]
|
66
|
-
when "alt" # ["alt", *a]
|
67
|
-
"(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" / ")})"
|
68
|
-
when "seq" # ["seq", *a]
|
69
|
-
"(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" ")})"
|
70
|
-
when "rep" # ["rep", s, e, a]
|
71
|
-
t = to_treetop1(ast[3]) || "@@@"
|
72
|
-
case [ast[1], ast[2]]
|
73
|
-
when [0, 1]
|
74
|
-
t + "?"
|
75
|
-
when [0, true]
|
76
|
-
t + "*"
|
77
|
-
when [1, true]
|
78
|
-
t + "+"
|
79
|
-
else
|
80
|
-
t + " #{ast[1]}..#{ast[2] == true ? '' : ast[2]}"
|
81
|
-
end
|
82
|
-
when "prose" # ["prose", text]
|
83
|
-
fail "prose not implemented #{ast.inspect}"
|
84
|
-
when "ci" # ["ci", text]
|
85
|
-
s = ast[1]
|
86
|
-
if s =~ /\A[^A-Za-z]*\z/
|
87
|
-
s.inspect
|
88
|
-
else
|
89
|
-
s.inspect << "i" # could do this always, but reduce noise
|
90
|
-
end
|
91
|
-
when "cs" # ["cs", text]
|
92
|
-
ast[1].inspect
|
93
|
-
when "char-range" # ["char-range", c1, c2]
|
94
|
-
c1 = Regexp.quote(ast[1])
|
95
|
-
c2 = Regexp.quote(ast[2])
|
96
|
-
"[#{c1}-#{c2}]" # XXX does that always work
|
97
|
-
when "im" # ["im", a, text]
|
98
|
-
to_treetop1(ast[1]) + " " + ast[2]
|
99
|
-
else
|
100
|
-
fail "to_treetop(#{ast.inspect})"
|
101
|
-
end
|
102
|
-
else
|
103
|
-
fail "to_treetop(#{ast.inspect})"
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
unless fn = ARGV[0]
|
108
|
-
warn "Usage: abnftt grammar.abnftt"
|
109
|
-
exit 1
|
110
|
-
end
|
111
|
-
outfn = fn.sub(/\.abnftt\z/, "")
|
112
|
-
|
113
|
-
parser = ABNFParser.new
|
114
|
-
abnf_file = File.read(fn)
|
115
|
-
ast = parser.parse abnf_file
|
116
|
-
if ast
|
117
|
-
# p ast
|
118
|
-
File.open("#{outfn}.yaml", "w") do |f|
|
119
|
-
f.puts ast.ast.to_yaml
|
120
|
-
end
|
121
|
-
# pp ast.ast
|
122
|
-
File.open("#{outfn}.treetop", "w") do |f|
|
123
|
-
f.puts to_treetop(ast.ast)
|
124
|
-
end
|
125
|
-
File.open("#{outfn}.abnf", "w") do |f|
|
126
|
-
f.puts ast.clean_abnf
|
127
|
-
end
|
128
|
-
else
|
129
|
-
|
130
|
-
puts parser.failure_reason
|
131
|
-
parser.failure_reason =~ /^(Expected .+) after/m
|
132
|
-
puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
|
133
|
-
puts abnf_file.lines.to_a[parser.failure_line - 1]
|
134
|
-
puts "#{'~' * (parser.failure_column - 1)}^"
|
135
|
-
end
|