ace-compressor 0.24.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/compressor/config.yml +11 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-compressor.yml +10 -0
- data/CHANGELOG.md +357 -0
- data/README.md +46 -0
- data/Rakefile +15 -0
- data/exe/ace-compressor +13 -0
- data/handbook/templates/agent/minify-single-source.template.md +34 -0
- data/lib/ace/compressor/atoms/canonical_block_transformer.rb +341 -0
- data/lib/ace/compressor/atoms/compact_policy_classifier.rb +130 -0
- data/lib/ace/compressor/atoms/markdown_parser.rb +190 -0
- data/lib/ace/compressor/atoms/retention_reporter.rb +111 -0
- data/lib/ace/compressor/cli/commands/benchmark.rb +51 -0
- data/lib/ace/compressor/cli/commands/compress.rb +89 -0
- data/lib/ace/compressor/cli.rb +23 -0
- data/lib/ace/compressor/models/context_pack.rb +175 -0
- data/lib/ace/compressor/molecules/cache_store.rb +301 -0
- data/lib/ace/compressor/molecules/input_resolver.rb +98 -0
- data/lib/ace/compressor/organisms/agent_compressor.rb +325 -0
- data/lib/ace/compressor/organisms/benchmark_runner.rb +172 -0
- data/lib/ace/compressor/organisms/compact_compressor.rb +470 -0
- data/lib/ace/compressor/organisms/compression_runner.rb +315 -0
- data/lib/ace/compressor/organisms/exact_compressor.rb +187 -0
- data/lib/ace/compressor/version.rb +7 -0
- data/lib/ace/compressor.rb +109 -0
- metadata +156 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require "json"
|
|
6
|
+
require "pathname"
|
|
7
|
+
|
|
8
|
+
module Ace
|
|
9
|
+
module Compressor
|
|
10
|
+
module Molecules
|
|
11
|
+
class CacheStore
|
|
12
|
+
PACK_EXTENSION = ".pack"
|
|
13
|
+
METADATA_EXTENSION = ".json"
|
|
14
|
+
SHORT_KEY_LENGTH = 12
|
|
15
|
+
EXACT_CACHE_CONTRACT = "exact-list-shell-v3"
|
|
16
|
+
COMPACT_CACHE_CONTRACT = "compact-list-shell-v3"
|
|
17
|
+
AGENT_CACHE_CONTRACT = "agent-payload-rewrite-v7"
|
|
18
|
+
|
|
19
|
+
def initialize(cache_root: nil, project_root: Dir.pwd)
|
|
20
|
+
@cache_root = File.expand_path(cache_root || default_cache_root, project_root)
|
|
21
|
+
@project_root = File.expand_path(project_root)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def manifest(mode:, sources:)
|
|
25
|
+
source_entries = Array(sources).map { |source| normalize_source_entry(source) }
|
|
26
|
+
.sort_by { |entry| entry.fetch("path") }
|
|
27
|
+
.map do |entry|
|
|
28
|
+
content = File.binread(entry.fetch("content_path"))
|
|
29
|
+
{
|
|
30
|
+
"path" => entry.fetch("path"),
|
|
31
|
+
"sha256" => Digest::SHA256.hexdigest(content),
|
|
32
|
+
"bytes" => content.bytesize,
|
|
33
|
+
"lines" => line_count(content)
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
payload = {
|
|
38
|
+
"schema" => Ace::Compressor::Models::ContextPack::SCHEMA,
|
|
39
|
+
"mode" => mode,
|
|
40
|
+
"sources" => source_entries
|
|
41
|
+
}
|
|
42
|
+
payload["mode_contract"] = mode_contract_for(mode)
|
|
43
|
+
|
|
44
|
+
{
|
|
45
|
+
"key" => Digest::SHA256.hexdigest(JSON.generate(payload)),
|
|
46
|
+
"sources" => source_entries,
|
|
47
|
+
"original_bytes" => source_entries.sum { |entry| entry["bytes"] },
|
|
48
|
+
"original_lines" => source_entries.sum { |entry| entry["lines"] }
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def canonical_paths(mode:, sources:, manifest_key:)
|
|
53
|
+
relative_stem = default_stem_for(sources)
|
|
54
|
+
short_key = manifest_key[0, SHORT_KEY_LENGTH]
|
|
55
|
+
pack_path = File.join(@cache_root, mode, "#{relative_stem}.#{short_key}.#{mode}#{PACK_EXTENSION}")
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
pack_path: pack_path,
|
|
59
|
+
metadata_path: pack_path.sub(/#{Regexp.escape(PACK_EXTENSION)}\z/o, METADATA_EXTENSION),
|
|
60
|
+
short_key: short_key
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def shared_cache_enabled?
|
|
65
|
+
!shared_cache_root.to_s.strip.empty?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def shared_cache_eligible?(sources)
|
|
69
|
+
return false unless shared_cache_enabled?
|
|
70
|
+
return false unless shared_cache_scope == "workflow_only"
|
|
71
|
+
|
|
72
|
+
values = Array(sources)
|
|
73
|
+
values.size == 1 && workflow_source?(values.first)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def shared_manifest(mode:, sources:)
|
|
77
|
+
return nil unless shared_cache_eligible?(sources)
|
|
78
|
+
|
|
79
|
+
source = normalize_source_entry(Array(sources).first)
|
|
80
|
+
content = File.binread(source.fetch("content_path"))
|
|
81
|
+
payload = {
|
|
82
|
+
"schema" => Ace::Compressor::Models::ContextPack::SCHEMA,
|
|
83
|
+
"mode" => mode,
|
|
84
|
+
"mode_contract" => mode_contract_for(mode),
|
|
85
|
+
"source_kind" => "workflow",
|
|
86
|
+
"sha256" => Digest::SHA256.hexdigest(content),
|
|
87
|
+
"bytes" => content.bytesize,
|
|
88
|
+
"lines" => line_count(content)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
{"key" => Digest::SHA256.hexdigest(JSON.generate(payload))}
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def shared_canonical_paths(mode:, sources:, manifest_key:)
|
|
95
|
+
return nil unless shared_cache_eligible?(sources)
|
|
96
|
+
|
|
97
|
+
source = normalize_source_entry(Array(sources).first)
|
|
98
|
+
stem = shared_stem_for(source)
|
|
99
|
+
short_key = manifest_key[0, SHORT_KEY_LENGTH]
|
|
100
|
+
pack_path = File.join(shared_cache_root, mode, "#{stem}.#{short_key}.#{mode}#{PACK_EXTENSION}")
|
|
101
|
+
|
|
102
|
+
{
|
|
103
|
+
pack_path: pack_path,
|
|
104
|
+
metadata_path: pack_path.sub(/#{Regexp.escape(PACK_EXTENSION)}\z/o, METADATA_EXTENSION),
|
|
105
|
+
short_key: short_key
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def output_path_for(output:, mode:, sources:, manifest_key:)
|
|
110
|
+
paths = canonical_paths(mode: mode, sources: sources, manifest_key: manifest_key)
|
|
111
|
+
return paths[:pack_path] if output.nil? || output.to_s.strip.empty?
|
|
112
|
+
|
|
113
|
+
expanded = File.expand_path(output)
|
|
114
|
+
return directory_output_path(expanded, paths[:short_key], mode, sources) if directory_target?(output, expanded)
|
|
115
|
+
|
|
116
|
+
expanded
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def cache_hit?(pack_path:, metadata_path:)
|
|
120
|
+
File.file?(pack_path) && File.file?(metadata_path)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def read_pack(pack_path)
|
|
124
|
+
File.read(pack_path)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def read_metadata(metadata_path)
|
|
128
|
+
JSON.parse(File.read(metadata_path))
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def write_cache(pack_path:, metadata_path:, content:, metadata:)
|
|
132
|
+
ensure_parent_dir(pack_path)
|
|
133
|
+
ensure_parent_dir(metadata_path)
|
|
134
|
+
File.write(pack_path, content)
|
|
135
|
+
File.write(metadata_path, JSON.pretty_generate(metadata))
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def write_output(output_path, content)
|
|
139
|
+
ensure_parent_dir(output_path)
|
|
140
|
+
File.write(output_path, content)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def stats_block(mode:, cache_hit:, output_path:, metadata:)
|
|
144
|
+
[
|
|
145
|
+
"Cache: #{cache_hit ? "hit" : "miss"}",
|
|
146
|
+
"Output: #{output_path}",
|
|
147
|
+
"Sources: #{file_label(metadata.fetch("file_count"))}",
|
|
148
|
+
"Mode: #{mode}",
|
|
149
|
+
"Original: #{format_bytes(metadata.fetch("original_bytes"))}, #{line_label(metadata.fetch("original_lines"))}",
|
|
150
|
+
"Packed: #{format_bytes(metadata.fetch("packed_bytes"))}, #{line_label(metadata.fetch("packed_lines"))}",
|
|
151
|
+
"Change: #{format_change(metadata.fetch("original_bytes"), metadata.fetch("packed_bytes"), "bytes")}, #{format_change(metadata.fetch("original_lines"), metadata.fetch("packed_lines"), "lines")}"
|
|
152
|
+
].join("\n")
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
def default_cache_root
|
|
158
|
+
Ace::Compressor.config["cache_dir"] || ".ace-local/compressor"
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def shared_cache_root
|
|
162
|
+
value = Ace::Compressor.config["shared_cache_dir"]
|
|
163
|
+
return "" if value.to_s.strip.empty?
|
|
164
|
+
|
|
165
|
+
File.expand_path(value.to_s)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def shared_cache_scope
|
|
169
|
+
(Ace::Compressor.config["shared_cache_scope"] || "workflow_only").to_s
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def mode_contract_for(mode)
|
|
173
|
+
case mode.to_s
|
|
174
|
+
when "exact" then EXACT_CACHE_CONTRACT
|
|
175
|
+
when "compact" then COMPACT_CACHE_CONTRACT
|
|
176
|
+
when "agent" then AGENT_CACHE_CONTRACT
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def default_stem_for(sources)
|
|
181
|
+
source = normalize_source_entry(Array(sources).first)
|
|
182
|
+
return "multi" unless Array(sources).size == 1 && source
|
|
183
|
+
|
|
184
|
+
source_path = source.fetch("path")
|
|
185
|
+
relative = logical_source?(source_path) ? sanitize_logical_source(source_path) : relative_to_project(source_path).to_s
|
|
186
|
+
sanitized = relative.sub(/\.[^.]+\z/, "")
|
|
187
|
+
sanitized.empty? ? "multi" : sanitized
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def shared_stem_for(source)
|
|
191
|
+
if source.fetch("source_kind") == "workflow" && source.fetch("path").start_with?("wfi://")
|
|
192
|
+
workflow_path = source.fetch("path").sub(/\Awfi:\/\//, "")
|
|
193
|
+
return File.join("workflow", workflow_path)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
expanded = File.expand_path(source.fetch("content_path"))
|
|
197
|
+
if (match = expanded.match(%r{/(ace-[^/]+)/handbook/workflow-instructions/(.+)\.wf\.md\z}))
|
|
198
|
+
package = match[1]
|
|
199
|
+
remainder = match[2]
|
|
200
|
+
return File.join(package, "workflow-instructions", remainder)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
basename = File.basename(source.fetch("path")).sub(/\.[^.]+\z/, "")
|
|
204
|
+
File.join("workflow", basename.empty? ? "source" : basename)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def relative_to_project(path)
|
|
208
|
+
expanded = File.expand_path(path)
|
|
209
|
+
pathname = Pathname.new(expanded)
|
|
210
|
+
project = Pathname.new(@project_root)
|
|
211
|
+
relative = pathname.relative_path_from(project).to_s
|
|
212
|
+
return File.basename(expanded) if relative.start_with?("..")
|
|
213
|
+
|
|
214
|
+
relative
|
|
215
|
+
rescue ArgumentError
|
|
216
|
+
File.basename(path.to_s)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def directory_target?(raw_output, expanded_output)
|
|
220
|
+
raw_output.end_with?(File::SEPARATOR) || Dir.exist?(expanded_output)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def directory_output_path(directory, short_key, mode, sources)
|
|
224
|
+
label = File.basename(default_stem_for(sources))
|
|
225
|
+
File.join(directory, "#{label}.#{short_key}.#{mode}#{PACK_EXTENSION}")
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def normalize_source_entry(source)
|
|
229
|
+
if source.is_a?(Hash)
|
|
230
|
+
content_path = File.expand_path(source.fetch(:content_path) { source.fetch("content_path") })
|
|
231
|
+
source_path = source.fetch(:source_path) { source.fetch("source_path") }
|
|
232
|
+
source_kind = source.fetch(:source_kind) { source.fetch("source_kind", "file") }
|
|
233
|
+
return {
|
|
234
|
+
"content_path" => content_path,
|
|
235
|
+
"path" => source_path.to_s,
|
|
236
|
+
"source_kind" => source_kind.to_s
|
|
237
|
+
}
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
expanded = File.expand_path(source.to_s)
|
|
241
|
+
{
|
|
242
|
+
"content_path" => expanded,
|
|
243
|
+
"path" => expanded,
|
|
244
|
+
"source_kind" => "file"
|
|
245
|
+
}
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def ensure_parent_dir(path)
|
|
249
|
+
directory = File.dirname(path)
|
|
250
|
+
FileUtils.mkdir_p(directory) unless directory == "."
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def line_count(content)
|
|
254
|
+
content.lines.count
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def logical_source?(value)
|
|
258
|
+
value.to_s.match?(%r{\A[a-z][a-z0-9+\-.]*://}i) || !Pathname.new(value.to_s).absolute?
|
|
259
|
+
rescue ArgumentError
|
|
260
|
+
true
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def sanitize_logical_source(value)
|
|
264
|
+
value.to_s.sub(%r{\A([a-z][a-z0-9+\-.]*)://}i, "\\1/")
|
|
265
|
+
.gsub(/[^A-Za-z0-9._\/-]+/, "_")
|
|
266
|
+
.sub(/\A\/+/, "")
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def format_bytes(bytes)
|
|
270
|
+
"#{format_number(bytes)} B"
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def format_number(number)
|
|
274
|
+
number.to_i.to_s.gsub(/(\d)(?=(\d{3})+(?!\d))/, '\\1,')
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def file_label(count)
|
|
278
|
+
"#{format_number(count)} #{(count == 1) ? "file" : "files"}"
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def line_label(count)
|
|
282
|
+
"#{format_number(count)} #{(count == 1) ? "line" : "lines"}"
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def format_change(original, packed, label)
|
|
286
|
+
return "0.0% #{label}" if original.to_f.zero?
|
|
287
|
+
|
|
288
|
+
percent = ((packed.to_f - original.to_f) / original.to_f) * 100.0
|
|
289
|
+
format("%+.1f%% %s", percent, label)
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def workflow_source?(source)
|
|
293
|
+
entry = normalize_source_entry(source)
|
|
294
|
+
return true if entry.fetch("source_kind") == "workflow"
|
|
295
|
+
|
|
296
|
+
File.expand_path(entry.fetch("content_path")).match?(%r{/handbook/workflow-instructions/.+\.wf\.md\z})
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "open3"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
module Ace
|
|
8
|
+
module Compressor
|
|
9
|
+
module Molecules
|
|
10
|
+
class InputResolver
|
|
11
|
+
CONFIG_EXTENSIONS = %w[.yml .yaml].freeze
|
|
12
|
+
|
|
13
|
+
def initialize(inputs, shell_runner: nil, temp_root: nil)
|
|
14
|
+
@inputs = Array(inputs)
|
|
15
|
+
@shell_runner = shell_runner || method(:default_shell_runner)
|
|
16
|
+
@temp_root = temp_root || Dir.mktmpdir("ace_compressor_inputs")
|
|
17
|
+
@owns_temp_root = temp_root.nil?
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def call
|
|
21
|
+
@inputs.map.with_index do |raw_input, index|
|
|
22
|
+
resolve_one(raw_input.to_s, index)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def cleanup
|
|
27
|
+
FileUtils.rm_rf(@temp_root) if @owns_temp_root && @temp_root && Dir.exist?(@temp_root)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def resolve_one(raw_input, index)
|
|
33
|
+
expanded = File.expand_path(raw_input)
|
|
34
|
+
if File.file?(expanded)
|
|
35
|
+
return resolve_with_bundle(raw_input, index, source_path: expanded, source_kind: "bundle_config") if config_extension?(raw_input)
|
|
36
|
+
|
|
37
|
+
return resolved_input(content_path: expanded, source_path: expanded, source_kind: "file")
|
|
38
|
+
end
|
|
39
|
+
return resolved_input(content_path: expanded, source_path: expanded, source_kind: "directory") if File.directory?(expanded)
|
|
40
|
+
return resolve_with_bundle(raw_input, index, source_path: raw_input, source_kind: protocol_source_kind(raw_input)) if protocol_input?(raw_input)
|
|
41
|
+
raise Ace::Compressor::Error, "Input source not found: #{raw_input}" if looks_like_path?(raw_input)
|
|
42
|
+
|
|
43
|
+
resolve_with_bundle(raw_input, index, source_path: raw_input, source_kind: "preset")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def resolve_with_bundle(raw_input, index, source_path:, source_kind:)
|
|
47
|
+
output_path = File.join(@temp_root, "resolved_#{index + 1}.md")
|
|
48
|
+
stdout, stderr, status = @shell_runner.call(["ace-bundle", raw_input, "--output", output_path])
|
|
49
|
+
if status.success?
|
|
50
|
+
result = resolved_input(content_path: output_path, source_path: source_path, source_kind: source_kind)
|
|
51
|
+
meta_path = "#{output_path}.meta.json"
|
|
52
|
+
if File.exist?(meta_path)
|
|
53
|
+
require "json"
|
|
54
|
+
result[:bundle_compression_stats] = JSON.parse(File.read(meta_path))
|
|
55
|
+
end
|
|
56
|
+
return result
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
details = stderr.to_s.strip
|
|
60
|
+
details = stdout.to_s.strip if details.empty?
|
|
61
|
+
raise Ace::Compressor::Error, "Failed to resolve input '#{raw_input}': #{details}"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def resolved_input(content_path:, source_path:, source_kind:)
|
|
65
|
+
{
|
|
66
|
+
content_path: content_path,
|
|
67
|
+
source_path: source_path,
|
|
68
|
+
source_kind: source_kind
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def looks_like_path?(value)
|
|
73
|
+
return false if protocol_input?(value)
|
|
74
|
+
|
|
75
|
+
value.start_with?(".", "/", "~") || value.include?(File::SEPARATOR) || config_extension?(value)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def config_extension?(value)
|
|
79
|
+
CONFIG_EXTENSIONS.include?(File.extname(value).downcase)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def protocol_input?(value)
|
|
83
|
+
value.match?(%r{\A[a-z][a-z0-9+\-.]*://}i)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def protocol_source_kind(value)
|
|
87
|
+
return "workflow" if value.to_s.start_with?("wfi://")
|
|
88
|
+
|
|
89
|
+
"protocol"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def default_shell_runner(command)
|
|
93
|
+
Open3.capture3(*command)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|