abnftt 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/abnftt.gemspec +1 -1
- data/bin/abnfrob +146 -0
- data/bin/abnfrob~ +40 -0
- data/lib/abnftt/abnf-flattener.rb +32 -0
- data/lib/abnftt/abnf-squasher.rb +57 -0
- data/lib/abnftt/abnf-util.rb +371 -0
- data/lib/abnftt/abnf-visitor.rb +26 -0
- data/lib/abnftt/abnf-writer.rb +106 -0
- data/lib/abnftt.rb +11 -5
- metadata +12 -5
- data/bin/abnftt~ +0 -135
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 267bcebab4bd13c22da536e87b6ccd3444b214db589a5e9d4b054a56ffe3a686
|
4
|
+
data.tar.gz: cf4c222a9825e210e635989837ba43432e87661ba6e5e98a9a52aedcfbdad055
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28fe16c851f050e8072bdf939b45569fde9233fbeb26a2e43169fa04b1c66e85e43e893056faab3b87a4e5c49f2f7cea08274dd852c26f8d42688a85198eab57
|
7
|
+
data.tar.gz: 0f4977a6b13b8b4a621b594a31ad87d6759b5780bb59b98f7afc3ab4ac3e70fd2f9ee5cb9f710b8db1ce79e8f411734ec1517ad829456f3cd0f4a383f683d721
|
data/abnftt.gemspec
CHANGED
data/bin/abnfrob
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby -Ku
|
2
|
+
require 'pp'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
Encoding.default_external = Encoding::UTF_8
|
6
|
+
require 'optparse'
|
7
|
+
require 'ostruct'
|
8
|
+
|
9
|
+
$options = OpenStruct.new
|
10
|
+
begin
|
11
|
+
op = OptionParser.new do |opts|
|
12
|
+
opts.version = "(from abnftt #{Gem.loaded_specs['abnftt'].version})" rescue "unknown-version"
|
13
|
+
|
14
|
+
opts.banner = "Usage: abnfrob [options] file.abnf"
|
15
|
+
opts.on("-b", "--bap=[OPTIONS]", "Pretty-print using bap") do |v|
|
16
|
+
$options.bap = true
|
17
|
+
$options.bap_options = v ? " #{v}" : ""
|
18
|
+
# warn "** bap_options #{$options.bap_options.inspect}"
|
19
|
+
end
|
20
|
+
opts.on("-tFMT", "--to=FMT", [:abnf, :json, :pp, :yaml], "Target format") do |v|
|
21
|
+
$options.target = v
|
22
|
+
end
|
23
|
+
opts.on("-y", "--yaml", "Output separate YAML copy of rules") do |v|
|
24
|
+
$options.yaml = v
|
25
|
+
end
|
26
|
+
opts.on("-a", "--asr33", "Line-break to fit on teletype") do |v|
|
27
|
+
$options.asr33 = v
|
28
|
+
end
|
29
|
+
opts.on("--squash=PREFIX", String, "Squash to app-prefix") do |v|
|
30
|
+
$options.squash = v
|
31
|
+
end
|
32
|
+
end
|
33
|
+
op.parse!
|
34
|
+
rescue StandardError => e
|
35
|
+
warn e
|
36
|
+
exit 1
|
37
|
+
end
|
38
|
+
|
39
|
+
require 'abnftt'
|
40
|
+
|
41
|
+
unless fn = ARGV[0]
|
42
|
+
warn op
|
43
|
+
exit 1
|
44
|
+
end
|
45
|
+
unless File.extname(fn) =~ /\A.abnf/
|
46
|
+
warn op
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
outfn = (Pathname.new(File.dirname(fn)) + File.basename(fn, ".*")).to_s
|
50
|
+
|
51
|
+
parser = ABNFGrammarParser.new
|
52
|
+
abnf_file = File.read(fn)
|
53
|
+
ast = parser.parse abnf_file
|
54
|
+
unless ast
|
55
|
+
puts parser.failure_reason
|
56
|
+
parser.failure_reason =~ /^(Expected .+) after/m
|
57
|
+
puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
|
58
|
+
puts abnf_file.lines.to_a[parser.failure_line - 1]
|
59
|
+
puts "#{'~' * (parser.failure_column - 1)}^"
|
60
|
+
exit 1
|
61
|
+
end
|
62
|
+
|
63
|
+
abnf = ABNF.new(ast)
|
64
|
+
|
65
|
+
if $options.yaml
|
66
|
+
File.open("#{outfn}.yaml", "w") do |f|
|
67
|
+
f.puts abnf.rules.to_yaml
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if $options.squash
|
72
|
+
require 'abnftt/abnf-flattener.rb'
|
73
|
+
require 'abnftt/abnf-util.rb'
|
74
|
+
require 'abnftt/abnf-squasher.rb'
|
75
|
+
|
76
|
+
prefix = $options.squash + "-"
|
77
|
+
|
78
|
+
abnf.flatten_ops
|
79
|
+
abnf.flatten_strings
|
80
|
+
|
81
|
+
abnf.squash_edn_levels
|
82
|
+
abnf.char_range_to_string
|
83
|
+
|
84
|
+
abnf.share_hex("sq")
|
85
|
+
|
86
|
+
abnf.share_alt("sq")
|
87
|
+
|
88
|
+
abnf.rules.replace(Hash[abnf.rules.map do |k, v|
|
89
|
+
[k.sub(/^(?:#{prefix})?/, prefix), abnf.visit(v) do |prod|
|
90
|
+
if String === prod
|
91
|
+
[true, prod.sub(/^(?:#{prefix})?/, prefix)]
|
92
|
+
end
|
93
|
+
end]
|
94
|
+
end])
|
95
|
+
|
96
|
+
rule1 = abnf.rules.first
|
97
|
+
outer_name = "sq-#{rule1[0]}"
|
98
|
+
outer_elements = ["seq",
|
99
|
+
["cs", $options.squash+"'"],
|
100
|
+
rule1[0],
|
101
|
+
["cs", "'"]
|
102
|
+
]
|
103
|
+
abnf.rules.replace(a = Hash[[[outer_name, outer_elements],
|
104
|
+
*abnf.rules.to_a]])
|
105
|
+
File.open("#{outfn}-sq.yaml", "w") do |f|
|
106
|
+
f.puts abnf.rules.to_yaml
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
## Work on abnf.rules
|
111
|
+
|
112
|
+
case $options.target
|
113
|
+
when :pp
|
114
|
+
pp abnf.rules
|
115
|
+
when :json
|
116
|
+
require 'neatjson'
|
117
|
+
puts JSON.neat_generate(abnf.rules, after_comma: 1, after_colon: 1)
|
118
|
+
when :yaml
|
119
|
+
puts abnf.rules.to_yaml
|
120
|
+
when :abnf, nil
|
121
|
+
require_relative '../lib/abnftt/abnf-writer.rb'
|
122
|
+
result = abnf.to_s
|
123
|
+
|
124
|
+
if $options.bap
|
125
|
+
require 'open3'
|
126
|
+
result, err, status =
|
127
|
+
Open3.capture3("bap -o RFC7405#{$options.bap_options}",
|
128
|
+
stdin_data: result)
|
129
|
+
warn err.gsub(/^/, "** ") unless err == ""
|
130
|
+
unless status.success?
|
131
|
+
warn "*** Giving up"
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
if $options.asr33
|
137
|
+
result = abnf.breaker(result)
|
138
|
+
end
|
139
|
+
|
140
|
+
puts result
|
141
|
+
File.open("#{outfn}-sq.abnf", "w") do |f|
|
142
|
+
f.puts result
|
143
|
+
end
|
144
|
+
else
|
145
|
+
warn ["Unknown target format: ", $options.target].inspect
|
146
|
+
end
|
data/bin/abnfrob~
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/usr/bin/env ruby -Ku
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
Encoding.default_external = Encoding::UTF_8
|
7
|
+
|
8
|
+
require 'abnftt'
|
9
|
+
|
10
|
+
unless fn = ARGV[0]
|
11
|
+
warn "Usage: abnftt grammar.abnftt"
|
12
|
+
exit 1
|
13
|
+
end
|
14
|
+
outfn = fn.sub(/\.abnftt\z/, "")
|
15
|
+
|
16
|
+
parser = ABNFGrammarParser.new
|
17
|
+
abnf_file = File.read(fn)
|
18
|
+
ast = parser.parse abnf_file
|
19
|
+
if ast
|
20
|
+
# p ast
|
21
|
+
abnf = ABNF.new(ast)
|
22
|
+
File.open("#{outfn}.yaml", "w") do |f|
|
23
|
+
f.puts abnf.tree.to_yaml
|
24
|
+
end
|
25
|
+
# pp ast.ast
|
26
|
+
File.open("#{outfn}.treetop", "w") do |f|
|
27
|
+
modname = File.basename(outfn).gsub("-", "_").gsub(/[^_a-zA-Z0-9]/, "").upcase
|
28
|
+
f.puts abnf.to_treetop(modname)
|
29
|
+
end
|
30
|
+
File.open("#{outfn}.abnf", "w") do |f|
|
31
|
+
f.puts ast.clean_abnf.lines.map(&:rstrip).join("\n")
|
32
|
+
end
|
33
|
+
else
|
34
|
+
|
35
|
+
puts parser.failure_reason
|
36
|
+
parser.failure_reason =~ /^(Expected .+) after/m
|
37
|
+
puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
|
38
|
+
puts abnf_file.lines.to_a[parser.failure_line - 1]
|
39
|
+
puts "#{'~' * (parser.failure_column - 1)}^"
|
40
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "abnftt/abnf-visitor.rb"
|
2
|
+
|
3
|
+
class ABNF
|
4
|
+
def expand_op_into(s, op, out = [op])
|
5
|
+
s.each do |el|
|
6
|
+
case el
|
7
|
+
in [^op, *inner]
|
8
|
+
expand_op_into(inner, op, out)
|
9
|
+
else
|
10
|
+
out << flatten_ops_1(el)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
out
|
14
|
+
end
|
15
|
+
def flatten_ops_1(prod)
|
16
|
+
visit(prod) do |here|
|
17
|
+
case here
|
18
|
+
in ["seq", *rest]
|
19
|
+
[true, expand_op_into(rest, "seq")]
|
20
|
+
in ["alt", *rest]
|
21
|
+
[true, expand_op_into(rest, "alt")]
|
22
|
+
else
|
23
|
+
false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
def flatten_ops
|
28
|
+
rules.each do |name, prod|
|
29
|
+
rules[name] = flatten_ops_1(prod)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require "abnftt"
|
2
|
+
require "abnftt/abnf-flattener"
|
3
|
+
require "abnftt/abnf-util"
|
4
|
+
require "abnftt/abnf-visitor"
|
5
|
+
|
6
|
+
class ABNF
|
7
|
+
|
8
|
+
UNESCAPED_SQSTR_RANGES =
|
9
|
+
# [[0xA, 0xA], [0x20, 0x21], [0x23, 0x26], -- but DQUOTE is allowed
|
10
|
+
[[0xA, 0xA], [0x20, 0x26], # "'"
|
11
|
+
# [0x28, 0x5b], [0x5d, 0x7e], [0xa0, 0xd7ff], -- but JSON allows 7F-9F
|
12
|
+
[0x28, 0x5b], [0x5d, 0xd7ff], # \
|
13
|
+
[0xe000, 0x10ffff]].map {|l, r|
|
14
|
+
[l.chr(Encoding::UTF_8), r.chr(Encoding::UTF_8)]
|
15
|
+
}
|
16
|
+
|
17
|
+
ESCAPED_SQSTR_MAPPINGS = [
|
18
|
+
["\x08", "b"],
|
19
|
+
["\x09", "t"],
|
20
|
+
["\x0A", "n"],
|
21
|
+
["\x0C", "f"],
|
22
|
+
["\x0D", "r"],
|
23
|
+
["\x27", "'"],
|
24
|
+
["\x2F", "/"],
|
25
|
+
["\x5C", "\\"]]
|
26
|
+
|
27
|
+
def squash_edn_levels_1(prod)
|
28
|
+
f1 = visit(prod) do |here|
|
29
|
+
case here
|
30
|
+
in ["char-range", c1, c2]
|
31
|
+
lit = UNESCAPED_SQSTR_RANGES.map { |u1, u2|
|
32
|
+
overlap(here, u1, u2) }.compact
|
33
|
+
esc = ESCAPED_SQSTR_MAPPINGS.map {|cv, ev|
|
34
|
+
if cv >= c1 && cv <= c2
|
35
|
+
["seq", ["char-range", "\\", "\\"], ["char-range", ev, ev]]
|
36
|
+
end
|
37
|
+
}.compact
|
38
|
+
old = alt_ranges_legacy(c1.ord, c2.ord)
|
39
|
+
new = alt_ranges_modern(c1.ord, c2.ord)
|
40
|
+
oldnew = ["seq",
|
41
|
+
["cs", "\\u"],
|
42
|
+
wrap_flat("alt", [old, new]) ]
|
43
|
+
[true, wrap_flat("alt", [*lit, *esc, oldnew].sort)]
|
44
|
+
else
|
45
|
+
false
|
46
|
+
end
|
47
|
+
end
|
48
|
+
flatten_ops_1(f1)
|
49
|
+
end
|
50
|
+
|
51
|
+
def squash_edn_levels
|
52
|
+
rules.each do |name, prod|
|
53
|
+
rules[name] = squash_edn_levels_1(prod)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,371 @@
|
|
1
|
+
require "abnftt/abnf-visitor"
|
2
|
+
require "abnftt/abnf-flattener"
|
3
|
+
|
4
|
+
class ABNF
|
5
|
+
|
6
|
+
def wrap(head, all)
|
7
|
+
if all.size == 1
|
8
|
+
all.first
|
9
|
+
else
|
10
|
+
[head, *all]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def wrap_flat(head, all)
|
15
|
+
if all.size == 1
|
16
|
+
all.first
|
17
|
+
else
|
18
|
+
[head, *all.collect_concat {|el|
|
19
|
+
case el
|
20
|
+
in [^head, *rest]
|
21
|
+
rest
|
22
|
+
else
|
23
|
+
[el]
|
24
|
+
end
|
25
|
+
}]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def overlap(cr, l, r)
|
30
|
+
if cr[2] >= l && cr[1] <= r
|
31
|
+
["char-range", [cr[1], l].max, [cr[2], r].min]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Utilities for creating hexadecimal rules from unsigned integers
|
36
|
+
|
37
|
+
def hexdig_range(l, r)
|
38
|
+
alt = []
|
39
|
+
if l < 10
|
40
|
+
alt << ["char-range",
|
41
|
+
(l+0x30).chr(Encoding::UTF_8),
|
42
|
+
([r, 9].min+0x30).chr(Encoding::UTF_8)]
|
43
|
+
end
|
44
|
+
if r >= 10
|
45
|
+
alt << ["char-range", ([l, 10].max+0x41-0xA).chr(Encoding::UTF_8),
|
46
|
+
(r+0x41-0xA).chr(Encoding::UTF_8)]
|
47
|
+
alt << ["char-range", ([l, 10].max+0x61-0xA).chr(Encoding::UTF_8),
|
48
|
+
(r+0x61-0xA).chr(Encoding::UTF_8)]
|
49
|
+
end
|
50
|
+
wrap("alt", alt)
|
51
|
+
end
|
52
|
+
|
53
|
+
# This assumes l and r are preprocessed to have single or full ranges except in one place
|
54
|
+
def hex_ranges(l, r, ndig = false)
|
55
|
+
ld = l.digits(16)
|
56
|
+
rd = r.digits(16)
|
57
|
+
ndig ||= rd.size
|
58
|
+
seq = []
|
59
|
+
(0...ndig).each do |dig|
|
60
|
+
seq << hexdig_range(ld[dig] || 0, rd[dig] || 0)
|
61
|
+
end
|
62
|
+
wrap("seq", seq.reverse)
|
63
|
+
end
|
64
|
+
|
65
|
+
# split range into passages that have the property needed for hex_ranges
|
66
|
+
def do_range(l, r, step = 4)
|
67
|
+
column = 0
|
68
|
+
while l <= r
|
69
|
+
mask = (1 << step * (column + 1)) - 1
|
70
|
+
new_r = l | mask
|
71
|
+
if new_r > r # right hand side: come down from mountain
|
72
|
+
while column >= 0
|
73
|
+
mask >>= step
|
74
|
+
new_r = (r + 1) & ~mask
|
75
|
+
yield l, new_r - 1, column + 1 if l != new_r
|
76
|
+
l = new_r
|
77
|
+
column -= 1
|
78
|
+
end
|
79
|
+
return
|
80
|
+
else
|
81
|
+
column += 1
|
82
|
+
if (l & mask) != 0
|
83
|
+
yield l, new_r, column
|
84
|
+
l = new_r + 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Support legacy JSON \u/\u\u and \u{...} hex unicode
|
91
|
+
|
92
|
+
def alt_ranges(l, r, step = 4, ndig = false)
|
93
|
+
alt = []
|
94
|
+
do_range(l.ord, r.ord, step) do |l, r, column|
|
95
|
+
alt << hex_ranges(l, r, ndig)
|
96
|
+
end
|
97
|
+
wrap("alt", alt.reverse) # work around prioritized choice
|
98
|
+
end
|
99
|
+
|
100
|
+
def alt_ranges_legacy(l, r)
|
101
|
+
alt = []
|
102
|
+
if l < 0x10000
|
103
|
+
alt << ["alt", alt_ranges(l, [r, 0xFFFF].min, 4, 4)]
|
104
|
+
end
|
105
|
+
if r >= 0x10000
|
106
|
+
l1 = [l, 0x10000].max - 0x10000
|
107
|
+
r1 = r - 0x10000
|
108
|
+
do_range(l1, r1, 10) do |l2, r2, column|
|
109
|
+
alt << ["seq",
|
110
|
+
alt_ranges((l2 >> 10) + 0xD800, (r2 >> 10) + 0xD800, 4, 4),
|
111
|
+
expand_string("\\u"),
|
112
|
+
alt_ranges((l2 & 0x3FF) + 0xDC00, (r2 & 0x3FF) + 0xDC00, 4, 4)]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
wrap_flat("alt", alt)
|
116
|
+
end
|
117
|
+
|
118
|
+
def alt_ranges_modern(l, r, step = 4)
|
119
|
+
["seq",
|
120
|
+
expand_string("{"),
|
121
|
+
["rep", 0, true, ["cs","0"]],
|
122
|
+
alt_ranges(l, r, 4, false),
|
123
|
+
expand_string("}")]
|
124
|
+
end
|
125
|
+
|
126
|
+
# flatten_strings: reduce all strings to char-range/seq/alt
|
127
|
+
|
128
|
+
def expand_string(s, case_fold = false)
|
129
|
+
wrap("seq",
|
130
|
+
s.chars.map do |ch|
|
131
|
+
if case_fold &&
|
132
|
+
(u = ch.upcase; d = ch.downcase; u != d)
|
133
|
+
["alt", expand_string(u), expand_string(d)]
|
134
|
+
else
|
135
|
+
["char-range", ch, ch]
|
136
|
+
end
|
137
|
+
end)
|
138
|
+
end
|
139
|
+
|
140
|
+
def flatten_strings_1(prod)
|
141
|
+
f1 = visit(prod) do |here|
|
142
|
+
case here
|
143
|
+
in ["cs", string]
|
144
|
+
[true, expand_string(string, false)]
|
145
|
+
in ["ci", string]
|
146
|
+
[true, expand_string(string, true)]
|
147
|
+
else
|
148
|
+
false
|
149
|
+
end
|
150
|
+
end
|
151
|
+
merge_strings_1(flatten_ops_1(f1))
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def merge_strings_1(prod)
|
156
|
+
visit(prod) do |here|
|
157
|
+
case here
|
158
|
+
in ["alt", *rest]
|
159
|
+
ranges = []
|
160
|
+
i = 0
|
161
|
+
while i < rest.size
|
162
|
+
case rest[i]
|
163
|
+
in ["char-range", _ic1, _ic2]
|
164
|
+
j = i
|
165
|
+
while j+1 < rest.size && (rest[j+1] in ["char-range", _jc1, _jc2])
|
166
|
+
j += 1
|
167
|
+
end
|
168
|
+
ranges << [i, j] if i != j # inclusive right
|
169
|
+
i = j
|
170
|
+
else
|
171
|
+
here[i+1] = merge_strings_1(rest[i]) # XXX could be part of a range
|
172
|
+
end
|
173
|
+
i += 1
|
174
|
+
end
|
175
|
+
ranges.reverse.each do |i, j|
|
176
|
+
sorted = here[i+1..j+1].sort
|
177
|
+
l = sorted.length
|
178
|
+
while l > 1
|
179
|
+
l -= 1 # index to last item
|
180
|
+
if sorted[l][1].ord == sorted[l-1][2].ord+1 # merge:
|
181
|
+
sorted[l-1..l] = [["char-range", sorted[l-1][1], sorted[l][2]]]
|
182
|
+
end
|
183
|
+
end
|
184
|
+
here[i+1..j+1] = sorted
|
185
|
+
end
|
186
|
+
[true, here]
|
187
|
+
else
|
188
|
+
false
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def flatten_strings
|
194
|
+
rules.each do |name, prod|
|
195
|
+
rules[name] = flatten_strings_1(prod)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Cleanup operations
|
200
|
+
|
201
|
+
def expand_range_into(s, op, out = [op])
|
202
|
+
s.each do |el|
|
203
|
+
case el
|
204
|
+
in [^op, *inner]
|
205
|
+
expand_range_into(inner, op, out)
|
206
|
+
else
|
207
|
+
out << char_range_to_string1(el)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
out
|
211
|
+
end
|
212
|
+
def char_range_to_string1(prod)
|
213
|
+
visit(prod) do |here|
|
214
|
+
case here
|
215
|
+
in ["seq", *rest]
|
216
|
+
rest = expand_range_into(rest, "seq")
|
217
|
+
i = rest.size
|
218
|
+
while i > 1
|
219
|
+
if (rest[i-1] in ["cs", s2]) && (rest[i-2] in ["cs", s1])
|
220
|
+
rest[i-2..i-1] = [["cs", s1 + s2]]
|
221
|
+
end
|
222
|
+
i -= 1
|
223
|
+
end
|
224
|
+
[true, rest]
|
225
|
+
in ["char-range", chr, ^chr] if chr.between?(" ", "!") || chr.between?("#", "~")
|
226
|
+
[true, ["cs", chr]]
|
227
|
+
else
|
228
|
+
false
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
def char_range_to_string
|
233
|
+
rules.each do |name, prod|
|
234
|
+
rules[name] = ci_cs_merge(detect_ci(char_range_to_string1(prod)))
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
def detect_ci(prod)
|
239
|
+
visit(prod) do |here|
|
240
|
+
case here
|
241
|
+
in ["alt", ["cs", c1], ["cs", c2]] if c1.downcase == c2 && c2.upcase == c1
|
242
|
+
[true, ["ci", c1]]
|
243
|
+
else
|
244
|
+
false
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
def ci_compat(prod)
|
249
|
+
case prod
|
250
|
+
in ["ci", s]
|
251
|
+
s
|
252
|
+
in ["cs", s] if s =~ /\A[^A-Za-z]*\z/
|
253
|
+
s
|
254
|
+
else
|
255
|
+
nil
|
256
|
+
end
|
257
|
+
end
|
258
|
+
def ci_cs_merge(prod)
|
259
|
+
visit(prod) do |here|
|
260
|
+
case here
|
261
|
+
in ["seq", *rest]
|
262
|
+
rest = rest.map{|x| ci_cs_merge(x)}
|
263
|
+
i = rest.size
|
264
|
+
while i > 1
|
265
|
+
if (s2 = ci_compat(rest[i-1])) && (s1 = ci_compat(rest[i-2]))
|
266
|
+
rest[i-2..i-1] = [["ci", s1 + s2]]
|
267
|
+
end
|
268
|
+
i -= 1
|
269
|
+
end
|
270
|
+
[true, wrap_flat("seq", rest)]
|
271
|
+
else
|
272
|
+
false
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
def seq_rep(prod)
|
278
|
+
visit(prod) do |here|
|
279
|
+
case here
|
280
|
+
in ["seq", *rest]
|
281
|
+
rest = rest.map{|x| seq_rep(x)}
|
282
|
+
i = rest.size # behind last element
|
283
|
+
while i > 1
|
284
|
+
j = i - 1 # end of range
|
285
|
+
s_end = rest[j]
|
286
|
+
k = j # start of range
|
287
|
+
while k > 0 && rest[k-1] == s_end
|
288
|
+
k -= 1
|
289
|
+
end
|
290
|
+
if k != j
|
291
|
+
n = j - k + 1
|
292
|
+
rest[k..j] = [["rep", n, n, s_end]]
|
293
|
+
end
|
294
|
+
i = k # skip element k
|
295
|
+
end
|
296
|
+
[true, wrap_flat("seq", rest)]
|
297
|
+
else
|
298
|
+
false
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
# sharing
|
304
|
+
def count_alt(counter, prod)
|
305
|
+
visit(prod) do |here|
|
306
|
+
case here
|
307
|
+
in ["alt", *rest]
|
308
|
+
rest.each {|pr| count_alt(counter, pr)}
|
309
|
+
counter[here] += 1
|
310
|
+
else
|
311
|
+
false
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def share_alt(prefix)
|
317
|
+
counter = Hash.new(0)
|
318
|
+
rules.each do |name, prod|
|
319
|
+
count_alt(counter, prod)
|
320
|
+
end
|
321
|
+
subs = {}
|
322
|
+
counter.to_a.select{|k, v| v > 2}.sort_by{|k, v| -v}.each_with_index do |(el, _count), i|
|
323
|
+
name = "#{prefix}-a#{i}"
|
324
|
+
rules[name] = el
|
325
|
+
subs[el] = name
|
326
|
+
end
|
327
|
+
rules.each do |name, prod|
|
328
|
+
count_alt(counter, prod)
|
329
|
+
end
|
330
|
+
rules.replace(Hash[rules.map do |k, v|
|
331
|
+
[k, seq_rep(visit(v) do |prod|
|
332
|
+
if (s = subs[prod]) && k != s
|
333
|
+
[true, s]
|
334
|
+
end
|
335
|
+
end)]
|
336
|
+
end])
|
337
|
+
end
|
338
|
+
|
339
|
+
def share_hex_1(prod, rules)
|
340
|
+
visit(prod) do |here|
|
341
|
+
case here
|
342
|
+
in ["alt",
|
343
|
+
["char-range", c3l, "9"],
|
344
|
+
["char-range", "A", c4r],
|
345
|
+
["char-range", "a", c6r]] if c4r == c6r.upcase && c3l >= "0" && c6r <= "f"
|
346
|
+
name = "x#{c3l}#{c6r}"
|
347
|
+
rules[name] ||= here
|
348
|
+
[true, name]
|
349
|
+
in ["alt",
|
350
|
+
["char-range", c4l, c4r],
|
351
|
+
["char-range", c6l, c6r]] if c4r == c6r.upcase &&
|
352
|
+
c4l == c6l.upcase &&
|
353
|
+
c6l.between?("a", "f") &&
|
354
|
+
c6r.between?("a", "f")
|
355
|
+
name = "x#{c6l}#{c6r}"
|
356
|
+
rules[name] ||= here
|
357
|
+
[true, name]
|
358
|
+
else
|
359
|
+
false
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def share_hex(_prefix)
|
365
|
+
newrules = {}
|
366
|
+
rules.each do |name, prod|
|
367
|
+
rules[name] = share_hex_1(prod, newrules)
|
368
|
+
end
|
369
|
+
rules.merge!(Hash[newrules.sort])
|
370
|
+
end
|
371
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require_relative "../abnftt.rb"
|
2
|
+
|
3
|
+
class ABNF
|
4
|
+
def visit_all(prod_array, &block)
|
5
|
+
prod_array.map {|prod| visit(prod, &block)}
|
6
|
+
end
|
7
|
+
def visit(prod, &block)
|
8
|
+
done, ret = block.call(prod, &block)
|
9
|
+
if done
|
10
|
+
return ret
|
11
|
+
end
|
12
|
+
|
13
|
+
case prod
|
14
|
+
in ["alt", *prods]
|
15
|
+
["alt", *visit_all(prods, &block)]
|
16
|
+
in ["tadd", *prods]
|
17
|
+
["tadd", *visit_all(prods, &block)]
|
18
|
+
in ["seq", *prods]
|
19
|
+
["seq", *visit_all(prods, &block)]
|
20
|
+
in ["rep", s, e, prod]
|
21
|
+
["rep", s, e, visit(prod, &block)]
|
22
|
+
else
|
23
|
+
prod
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
class ABNF
|
2
|
+
|
3
|
+
# return [precedence ((2 if seq needed)), string]
|
4
|
+
|
5
|
+
def stringify(s)
|
6
|
+
fail "Can't stringify #{s.inspect} yet" unless s =~ /\A[ !#-~]*\z/
|
7
|
+
%{"#{s}"}
|
8
|
+
end
|
9
|
+
|
10
|
+
def write_lhs(k)
|
11
|
+
k
|
12
|
+
end
|
13
|
+
|
14
|
+
# precedence:
|
15
|
+
# 1: / alt -> (type1)
|
16
|
+
# 2: »« seq
|
17
|
+
# 4: atomic
|
18
|
+
|
19
|
+
def prec_check(inner, targetprec, prec)
|
20
|
+
if targetprec >= prec
|
21
|
+
"(#{inner})"
|
22
|
+
else
|
23
|
+
inner
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_rhs(v, targetprec = 0)
|
28
|
+
prec, ret =
|
29
|
+
case v
|
30
|
+
in String # this should really be ["name", id]
|
31
|
+
[4, v]
|
32
|
+
in ["name", id]
|
33
|
+
[4, id]
|
34
|
+
in ["alt" | "tadd", *types]
|
35
|
+
[1, types.map{write_rhs(_1, 1)}.join(" / ")]
|
36
|
+
in ["seq", *groups]
|
37
|
+
case groups.size
|
38
|
+
when 0; [4, ""] # XXX
|
39
|
+
else
|
40
|
+
[2, "#{groups.map{write_rhs(_1, 2)}.join(" ")}"]
|
41
|
+
end
|
42
|
+
in ["ci", s]
|
43
|
+
[4, stringify(s)]
|
44
|
+
in ["cs", s]
|
45
|
+
if s =~ /\A[^A-Za-z]*\z/
|
46
|
+
[4, stringify(s)]
|
47
|
+
else
|
48
|
+
[4, "%s" << stringify(s)] # reduce noise if no alphabetics
|
49
|
+
end
|
50
|
+
in ["char-range", c1, c2]
|
51
|
+
nc1 = "%02x" % c1.ord
|
52
|
+
|
53
|
+
nc2 = "%02x" % c2.ord
|
54
|
+
nc2add = "-#{nc2}" if nc2 != nc1
|
55
|
+
[4, "%x#{nc1}#{nc2add}"]
|
56
|
+
in ["rep", s, e, group]
|
57
|
+
if s == 0 && e == 1
|
58
|
+
[4, "[#{write_rhs(group)}]"]
|
59
|
+
else
|
60
|
+
occur = case [s, e]
|
61
|
+
in [1, 1]; ""
|
62
|
+
in [0, true]; "*"
|
63
|
+
in [n, ^n]; n.to_s
|
64
|
+
else
|
65
|
+
"#{s}*#{e != true ? e : ""}"
|
66
|
+
end
|
67
|
+
[4, "#{occur}#{write_rhs(group, 4)}"]
|
68
|
+
end
|
69
|
+
else
|
70
|
+
fail [:WRITE_NOMATCH, v].inspect
|
71
|
+
end
|
72
|
+
prec_check(ret, targetprec, prec)
|
73
|
+
end
|
74
|
+
|
75
|
+
def write_rule(k, v)
|
76
|
+
case v
|
77
|
+
in ["tadd", *_rest]
|
78
|
+
assign = "=/"
|
79
|
+
else
|
80
|
+
assign = "="
|
81
|
+
end
|
82
|
+
"#{write_lhs(k)} #{assign} #{write_rhs(v, 0)}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_s
|
86
|
+
rules.map {|k, v| write_rule(k, v) }.join("\n").sub(/.\z/) {$& << "\n"}
|
87
|
+
end
|
88
|
+
|
89
|
+
# primitively break down lines so they fit on a teletype
|
90
|
+
def breaker(s, col = 69)
|
91
|
+
ret = ""
|
92
|
+
s.each_line do |*l|
|
93
|
+
while l[-1].size > col
|
94
|
+
breakpoint = l[-1][0...col].rindex(' ')
|
95
|
+
break unless breakpoint && breakpoint > 4
|
96
|
+
l[-1..-1] = [
|
97
|
+
l[-1][0...breakpoint],
|
98
|
+
" " << l[-1][breakpoint+1..-1]
|
99
|
+
]
|
100
|
+
end
|
101
|
+
ret << l.join("\n")
|
102
|
+
end
|
103
|
+
ret
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
data/lib/abnftt.rb
CHANGED
@@ -53,11 +53,17 @@ class ABNF
|
|
53
53
|
ABNF.new(ast)
|
54
54
|
end
|
55
55
|
|
56
|
+
def self.from_rules(r)
|
57
|
+
ABNF.new(nil, r)
|
58
|
+
end
|
59
|
+
|
56
60
|
attr_accessor :ast, :rules, :tree
|
57
|
-
def initialize(ast_)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
+
def initialize(ast_ = nil, rules_ = {})
|
62
|
+
if ast_
|
63
|
+
@ast = ast_
|
64
|
+
@tree = ast.ast
|
65
|
+
end
|
66
|
+
@rules = rules_
|
61
67
|
@tree.each do |x|
|
62
68
|
op, name, val, rest = x
|
63
69
|
fail rest if rest
|
@@ -72,7 +78,7 @@ class ABNF
|
|
72
78
|
else
|
73
79
|
val
|
74
80
|
end
|
75
|
-
end
|
81
|
+
end if @tree
|
76
82
|
# warn "** rules #{rules.inspect}"
|
77
83
|
end
|
78
84
|
|
metadata
CHANGED
@@ -1,28 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: abnftt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Carsten Bormann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Less shifty support for tools based on IETF's ABNF
|
14
14
|
email: cabo@tzi.org
|
15
15
|
executables:
|
16
|
+
- abnfrob
|
17
|
+
- abnfrob~
|
16
18
|
- abnftt
|
17
|
-
- abnftt~
|
18
19
|
extensions: []
|
19
20
|
extra_rdoc_files: []
|
20
21
|
files:
|
21
22
|
- abnftt.gemspec
|
23
|
+
- bin/abnfrob
|
24
|
+
- bin/abnfrob~
|
22
25
|
- bin/abnftt
|
23
|
-
- bin/abnftt~
|
24
26
|
- lib/abnfgrammar.rb
|
25
27
|
- lib/abnftt.rb
|
28
|
+
- lib/abnftt/abnf-flattener.rb
|
29
|
+
- lib/abnftt/abnf-squasher.rb
|
30
|
+
- lib/abnftt/abnf-util.rb
|
31
|
+
- lib/abnftt/abnf-visitor.rb
|
32
|
+
- lib/abnftt/abnf-writer.rb
|
26
33
|
homepage: http://github.com/cabo/abnftt
|
27
34
|
licenses:
|
28
35
|
- MIT
|
@@ -42,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
49
|
- !ruby/object:Gem::Version
|
43
50
|
version: '0'
|
44
51
|
requirements: []
|
45
|
-
rubygems_version: 3.
|
52
|
+
rubygems_version: 3.5.14
|
46
53
|
signing_key:
|
47
54
|
specification_version: 4
|
48
55
|
summary: RFC 5234+7405 ABNF to Treetop
|
data/bin/abnftt~
DELETED
@@ -1,135 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby -Ku
|
2
|
-
|
3
|
-
require 'pp'
|
4
|
-
require 'yaml'
|
5
|
-
require 'treetop'
|
6
|
-
|
7
|
-
Encoding.default_external = Encoding::UTF_8
|
8
|
-
|
9
|
-
require 'abnf'
|
10
|
-
|
11
|
-
class Treetop::Runtime::SyntaxNode
|
12
|
-
def clean_abnf
|
13
|
-
if elements
|
14
|
-
elements.map {|el| el.clean_abnf}.join
|
15
|
-
else
|
16
|
-
text_value
|
17
|
-
end
|
18
|
-
end
|
19
|
-
def ast
|
20
|
-
fail "undefined_ast #{inspect}"
|
21
|
-
end
|
22
|
-
def ast_from_percent(base, first, second)
|
23
|
-
c1 = first.to_i(base).chr(Encoding::UTF_8)
|
24
|
-
case second[0]
|
25
|
-
when nil
|
26
|
-
["cs", c1]
|
27
|
-
when "-"
|
28
|
-
c2 = second[1..-1].to_i(base).chr(Encoding::UTF_8)
|
29
|
-
["char-range", c1, c2]
|
30
|
-
when "."
|
31
|
-
el = second.split(".")
|
32
|
-
el[0] = first
|
33
|
-
["cs", el.map {|c| c.to_i(base).chr(Encoding::UTF_8)}.join]
|
34
|
-
else
|
35
|
-
fail "ast_from_percent"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def to_treetop(ast)
|
41
|
-
<<~EOS
|
42
|
-
# Encoding: UTF-8
|
43
|
-
grammar TESTME
|
44
|
-
#{ast.map {|x| to_treetop0(x)}.join}
|
45
|
-
end
|
46
|
-
EOS
|
47
|
-
end
|
48
|
-
def to_treetop0(ast)
|
49
|
-
fail ast.inspect unless ast[0] == "="
|
50
|
-
<<~EOS
|
51
|
-
rule #{to_treetop1(ast[1])}
|
52
|
-
#{to_treetop1(ast[2])}
|
53
|
-
end
|
54
|
-
EOS
|
55
|
-
end
|
56
|
-
FIXUP_NAMES = Hash.new {|h, k| k}
|
57
|
-
FIXUP_NAMES.merge!({
|
58
|
-
"rule" => "r__rule",
|
59
|
-
})
|
60
|
-
def to_treetop1(ast)
|
61
|
-
case ast
|
62
|
-
when String
|
63
|
-
FIXUP_NAMES[ast].gsub("-", "_")
|
64
|
-
when Array
|
65
|
-
case ast[0]
|
66
|
-
when "alt" # ["alt", *a]
|
67
|
-
"(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" / ")})"
|
68
|
-
when "seq" # ["seq", *a]
|
69
|
-
"(#{ast[1..-1].map {|x| to_treetop1(x)}.join(" ")})"
|
70
|
-
when "rep" # ["rep", s, e, a]
|
71
|
-
t = to_treetop1(ast[3]) || "@@@"
|
72
|
-
case [ast[1], ast[2]]
|
73
|
-
when [0, 1]
|
74
|
-
t + "?"
|
75
|
-
when [0, true]
|
76
|
-
t + "*"
|
77
|
-
when [1, true]
|
78
|
-
t + "+"
|
79
|
-
else
|
80
|
-
t + " #{ast[1]}..#{ast[2] == true ? '' : ast[2]}"
|
81
|
-
end
|
82
|
-
when "prose" # ["prose", text]
|
83
|
-
fail "prose not implemented #{ast.inspect}"
|
84
|
-
when "ci" # ["ci", text]
|
85
|
-
s = ast[1]
|
86
|
-
if s =~ /\A[^A-Za-z]*\z/
|
87
|
-
s.inspect
|
88
|
-
else
|
89
|
-
s.inspect << "i" # could do this always, but reduce noise
|
90
|
-
end
|
91
|
-
when "cs" # ["cs", text]
|
92
|
-
ast[1].inspect
|
93
|
-
when "char-range" # ["char-range", c1, c2]
|
94
|
-
c1 = Regexp.quote(ast[1])
|
95
|
-
c2 = Regexp.quote(ast[2])
|
96
|
-
"[#{c1}-#{c2}]" # XXX does that always work
|
97
|
-
when "im" # ["im", a, text]
|
98
|
-
to_treetop1(ast[1]) + " " + ast[2]
|
99
|
-
else
|
100
|
-
fail "to_treetop(#{ast.inspect})"
|
101
|
-
end
|
102
|
-
else
|
103
|
-
fail "to_treetop(#{ast.inspect})"
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
unless fn = ARGV[0]
|
108
|
-
warn "Usage: abnftt grammar.abnftt"
|
109
|
-
exit 1
|
110
|
-
end
|
111
|
-
outfn = fn.sub(/\.abnftt\z/, "")
|
112
|
-
|
113
|
-
parser = ABNFParser.new
|
114
|
-
abnf_file = File.read(fn)
|
115
|
-
ast = parser.parse abnf_file
|
116
|
-
if ast
|
117
|
-
# p ast
|
118
|
-
File.open("#{outfn}.yaml", "w") do |f|
|
119
|
-
f.puts ast.ast.to_yaml
|
120
|
-
end
|
121
|
-
# pp ast.ast
|
122
|
-
File.open("#{outfn}.treetop", "w") do |f|
|
123
|
-
f.puts to_treetop(ast.ast)
|
124
|
-
end
|
125
|
-
File.open("#{outfn}.abnf", "w") do |f|
|
126
|
-
f.puts ast.clean_abnf
|
127
|
-
end
|
128
|
-
else
|
129
|
-
|
130
|
-
puts parser.failure_reason
|
131
|
-
parser.failure_reason =~ /^(Expected .+) after/m
|
132
|
-
puts "#{$1.gsub("\n", '<<<NEWLINE>>>')}:"
|
133
|
-
puts abnf_file.lines.to_a[parser.failure_line - 1]
|
134
|
-
puts "#{'~' * (parser.failure_column - 1)}^"
|
135
|
-
end
|