ace-compressor 0.24.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/compressor/config.yml +11 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-compressor.yml +10 -0
- data/CHANGELOG.md +357 -0
- data/README.md +46 -0
- data/Rakefile +15 -0
- data/exe/ace-compressor +13 -0
- data/handbook/templates/agent/minify-single-source.template.md +34 -0
- data/lib/ace/compressor/atoms/canonical_block_transformer.rb +341 -0
- data/lib/ace/compressor/atoms/compact_policy_classifier.rb +130 -0
- data/lib/ace/compressor/atoms/markdown_parser.rb +190 -0
- data/lib/ace/compressor/atoms/retention_reporter.rb +111 -0
- data/lib/ace/compressor/cli/commands/benchmark.rb +51 -0
- data/lib/ace/compressor/cli/commands/compress.rb +89 -0
- data/lib/ace/compressor/cli.rb +23 -0
- data/lib/ace/compressor/models/context_pack.rb +175 -0
- data/lib/ace/compressor/molecules/cache_store.rb +301 -0
- data/lib/ace/compressor/molecules/input_resolver.rb +98 -0
- data/lib/ace/compressor/organisms/agent_compressor.rb +325 -0
- data/lib/ace/compressor/organisms/benchmark_runner.rb +172 -0
- data/lib/ace/compressor/organisms/compact_compressor.rb +470 -0
- data/lib/ace/compressor/organisms/compression_runner.rb +315 -0
- data/lib/ace/compressor/organisms/exact_compressor.rb +187 -0
- data/lib/ace/compressor/version.rb +7 -0
- data/lib/ace/compressor.rb +109 -0
- metadata +156 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ace
|
|
4
|
+
module Compressor
|
|
5
|
+
module Organisms
|
|
6
|
+
class CompressionRunner
|
|
7
|
+
SUPPORTED_FORMATS = %w[path stdio stats].freeze
|
|
8
|
+
SUPPORTED_MODES = %w[exact compact agent].freeze
|
|
9
|
+
SUPPORTED_SOURCE_SCOPES = %w[merged per-source].freeze
|
|
10
|
+
|
|
11
|
+
def initialize(paths, mode:, source_scope: "merged", output: nil, format: nil, verbose: false)
|
|
12
|
+
@paths = Array(paths)
|
|
13
|
+
@mode = mode
|
|
14
|
+
@source_scope = source_scope.to_s
|
|
15
|
+
@output = output
|
|
16
|
+
@format = (format || Ace::Compressor.config["default_format"] || "path").to_s
|
|
17
|
+
@verbose = verbose
|
|
18
|
+
@cache_store = Ace::Compressor::Molecules::CacheStore.new
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def call
|
|
22
|
+
raise Ace::Compressor::Error, "Unsupported format '#{@format}'. Use --format path, stdio, or stats" unless SUPPORTED_FORMATS.include?(@format)
|
|
23
|
+
unless SUPPORTED_MODES.include?(@mode)
|
|
24
|
+
raise Ace::Compressor::Error,
|
|
25
|
+
"Unsupported mode '#{@mode}'. Use --mode exact, --mode compact, or --mode agent"
|
|
26
|
+
end
|
|
27
|
+
unless SUPPORTED_SOURCE_SCOPES.include?(@source_scope)
|
|
28
|
+
raise Ace::Compressor::Error,
|
|
29
|
+
"Unsupported source scope '#{@source_scope}'. Use --source-scope merged or --source-scope per-source"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
resolver = Ace::Compressor::Molecules::InputResolver.new(@paths)
|
|
33
|
+
begin
|
|
34
|
+
resolved_inputs = resolver.call
|
|
35
|
+
return run_per_source(resolved_inputs) if @source_scope == "per-source"
|
|
36
|
+
|
|
37
|
+
run_for_sources(resolved_inputs)
|
|
38
|
+
ensure
|
|
39
|
+
resolver.cleanup if resolver.respond_to?(:cleanup)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def run_for_sources(resolved_inputs)
|
|
46
|
+
resolved_paths = Array(resolved_inputs).map { |entry| entry.fetch(:content_path) }
|
|
47
|
+
compressor = compressor_for_mode(resolved_paths)
|
|
48
|
+
sources = compressor.resolve_sources
|
|
49
|
+
source_metadata = build_source_metadata(sources, resolved_inputs)
|
|
50
|
+
manifest = @cache_store.manifest(mode: @mode, sources: source_metadata)
|
|
51
|
+
|
|
52
|
+
# If ace-bundle already compressed this content, override original sizes with
|
|
53
|
+
# the true uncompressed values and pass through without re-compression.
|
|
54
|
+
bundle_stats = extract_bundle_compression_stats(resolved_inputs)
|
|
55
|
+
if bundle_stats
|
|
56
|
+
manifest["original_bytes"] = bundle_stats["original_bytes"]
|
|
57
|
+
manifest["original_lines"] = bundle_stats["original_lines"]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
canonical = @cache_store.canonical_paths(mode: @mode, sources: source_metadata, manifest_key: manifest["key"])
|
|
61
|
+
shared_manifest = @cache_store.shared_manifest(mode: @mode, sources: source_metadata)
|
|
62
|
+
shared_canonical = if shared_manifest
|
|
63
|
+
@cache_store.shared_canonical_paths(mode: @mode, sources: source_metadata, manifest_key: shared_manifest["key"])
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
cache_hit = @cache_store.cache_hit?(pack_path: canonical[:pack_path], metadata_path: canonical[:metadata_path])
|
|
67
|
+
content, metadata = if cache_hit
|
|
68
|
+
content = @cache_store.read_pack(canonical[:pack_path])
|
|
69
|
+
metadata = hydrate_metadata(
|
|
70
|
+
@cache_store.read_metadata(canonical[:metadata_path]),
|
|
71
|
+
manifest,
|
|
72
|
+
canonical,
|
|
73
|
+
content
|
|
74
|
+
)
|
|
75
|
+
[content, metadata]
|
|
76
|
+
elsif shared_cache_hit?(shared_canonical)
|
|
77
|
+
hydrate_from_shared_cache(manifest, canonical, shared_canonical)
|
|
78
|
+
elsif bundle_stats
|
|
79
|
+
build_passthrough_entry(sources, manifest, canonical, shared_canonical)
|
|
80
|
+
else
|
|
81
|
+
build_cache_entry(compressor, sources, source_metadata, manifest, canonical, shared_canonical)
|
|
82
|
+
end
|
|
83
|
+
cache_hit ||= shared_cache_hit?(shared_canonical)
|
|
84
|
+
|
|
85
|
+
output_path = @cache_store.output_path_for(
|
|
86
|
+
output: @output,
|
|
87
|
+
mode: @mode,
|
|
88
|
+
sources: source_metadata,
|
|
89
|
+
manifest_key: manifest["key"]
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@cache_store.write_output(output_path, content) unless output_path == canonical[:pack_path]
|
|
93
|
+
refusal_lines = refusal_lines(content)
|
|
94
|
+
fallback_lines = fallback_lines(content)
|
|
95
|
+
|
|
96
|
+
{
|
|
97
|
+
console_output: format_console_output(
|
|
98
|
+
content: content,
|
|
99
|
+
cache_hit: cache_hit,
|
|
100
|
+
output_path: output_path,
|
|
101
|
+
metadata: metadata
|
|
102
|
+
),
|
|
103
|
+
ignored_paths: compressor.ignored_paths,
|
|
104
|
+
output_path: output_path,
|
|
105
|
+
cache_hit: cache_hit,
|
|
106
|
+
metadata: metadata,
|
|
107
|
+
refusal_lines: refusal_lines,
|
|
108
|
+
fallback_lines: fallback_lines,
|
|
109
|
+
exit_code: refusal_lines.empty? ? 0 : 1
|
|
110
|
+
}
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def run_per_source(resolved_inputs)
|
|
114
|
+
if resolved_inputs.size > 1 && output_file_target?
|
|
115
|
+
raise Ace::Compressor::Error,
|
|
116
|
+
"Per-source mode with multiple inputs requires --output to be a directory path"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
results = resolved_inputs.map { |entry| run_for_sources([entry]) }
|
|
120
|
+
{
|
|
121
|
+
console_output: join_console_outputs(results.map { |result| result[:console_output] }),
|
|
122
|
+
ignored_paths: results.flat_map { |result| result[:ignored_paths] }.uniq,
|
|
123
|
+
output_path: results.first&.dig(:output_path),
|
|
124
|
+
output_paths: results.map { |result| result[:output_path] },
|
|
125
|
+
cache_hit: results.all? { |result| result[:cache_hit] },
|
|
126
|
+
metadata: results.map { |result| result[:metadata] },
|
|
127
|
+
refusal_lines: results.flat_map { |result| result[:refusal_lines] },
|
|
128
|
+
fallback_lines: results.flat_map { |result| result[:fallback_lines] },
|
|
129
|
+
exit_code: (results.any? { |result| result[:exit_code].to_i.nonzero? }) ? 1 : 0
|
|
130
|
+
}
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def join_console_outputs(outputs)
|
|
134
|
+
values = Array(outputs).map(&:to_s).reject(&:empty?)
|
|
135
|
+
return "" if values.empty?
|
|
136
|
+
|
|
137
|
+
separator = (@format == "stats") ? "\n\n" : "\n"
|
|
138
|
+
values.join(separator)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def output_file_target?
|
|
142
|
+
return false if @output.nil? || @output.to_s.strip.empty?
|
|
143
|
+
return false if @output.end_with?(File::SEPARATOR)
|
|
144
|
+
|
|
145
|
+
!Dir.exist?(File.expand_path(@output))
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def build_passthrough_entry(sources, manifest, canonical, shared_canonical = nil)
|
|
149
|
+
raw = sources.map { |s| File.read(s) }.join("\n")
|
|
150
|
+
header = Ace::Compressor::Models::ContextPack.header(@mode)
|
|
151
|
+
content = "#{header}\n#{raw}"
|
|
152
|
+
metadata = cache_metadata(manifest, canonical[:short_key], content)
|
|
153
|
+
@cache_store.write_cache(
|
|
154
|
+
pack_path: canonical[:pack_path],
|
|
155
|
+
metadata_path: canonical[:metadata_path],
|
|
156
|
+
content: content,
|
|
157
|
+
metadata: metadata
|
|
158
|
+
)
|
|
159
|
+
if shared_canonical
|
|
160
|
+
shared_metadata = cache_metadata(manifest, shared_canonical[:short_key], content, metadata.merge("cache_scope" => "shared"))
|
|
161
|
+
@cache_store.write_cache(
|
|
162
|
+
pack_path: shared_canonical[:pack_path],
|
|
163
|
+
metadata_path: shared_canonical[:metadata_path],
|
|
164
|
+
content: content,
|
|
165
|
+
metadata: shared_metadata
|
|
166
|
+
)
|
|
167
|
+
end
|
|
168
|
+
[content, metadata]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def extract_bundle_compression_stats(resolved_inputs)
|
|
172
|
+
Array(resolved_inputs).each do |entry|
|
|
173
|
+
stats = entry[:bundle_compression_stats]
|
|
174
|
+
return stats if stats
|
|
175
|
+
end
|
|
176
|
+
nil
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def build_cache_entry(compressor, sources, source_metadata, manifest, canonical, shared_canonical = nil)
|
|
180
|
+
content = compress_content(compressor, sources, source_metadata)
|
|
181
|
+
metadata = cache_metadata(manifest, canonical[:short_key], content)
|
|
182
|
+
@cache_store.write_cache(
|
|
183
|
+
pack_path: canonical[:pack_path],
|
|
184
|
+
metadata_path: canonical[:metadata_path],
|
|
185
|
+
content: content,
|
|
186
|
+
metadata: metadata
|
|
187
|
+
)
|
|
188
|
+
if shared_canonical
|
|
189
|
+
shared_metadata = cache_metadata(manifest, shared_canonical[:short_key], content, metadata.merge("cache_scope" => "shared"))
|
|
190
|
+
@cache_store.write_cache(
|
|
191
|
+
pack_path: shared_canonical[:pack_path],
|
|
192
|
+
metadata_path: shared_canonical[:metadata_path],
|
|
193
|
+
content: content,
|
|
194
|
+
metadata: shared_metadata
|
|
195
|
+
)
|
|
196
|
+
end
|
|
197
|
+
[content, metadata]
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def compress_content(compressor, sources, source_metadata)
|
|
201
|
+
source_paths = source_metadata.each_with_object({}) do |entry, hash|
|
|
202
|
+
hash[entry.fetch(:content_path)] = entry.fetch(:source_path)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
parameters = compressor.method(:compress_sources).parameters
|
|
206
|
+
supports_source_paths = parameters.any? do |type, name|
|
|
207
|
+
[:key, :keyreq].include?(type) && name == :source_paths || type == :keyrest
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
return compressor.compress_sources(sources, source_paths: source_paths) if supports_source_paths
|
|
211
|
+
|
|
212
|
+
compressor.compress_sources(sources)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def build_source_metadata(sources, resolved_inputs)
|
|
216
|
+
identities = Array(resolved_inputs).each_with_object({}) do |entry, hash|
|
|
217
|
+
hash[File.expand_path(entry.fetch(:content_path))] = entry
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
Array(sources).map do |source|
|
|
221
|
+
expanded = File.expand_path(source)
|
|
222
|
+
resolved = identities[expanded]
|
|
223
|
+
if resolved
|
|
224
|
+
{
|
|
225
|
+
content_path: expanded,
|
|
226
|
+
source_path: resolved.fetch(:source_path),
|
|
227
|
+
source_kind: resolved.fetch(:source_kind)
|
|
228
|
+
}
|
|
229
|
+
else
|
|
230
|
+
{
|
|
231
|
+
content_path: expanded,
|
|
232
|
+
source_path: expanded,
|
|
233
|
+
source_kind: "file"
|
|
234
|
+
}
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def format_console_output(content:, cache_hit:, output_path:, metadata:)
|
|
240
|
+
case @format
|
|
241
|
+
when "stdio"
|
|
242
|
+
content
|
|
243
|
+
when "stats"
|
|
244
|
+
@cache_store.stats_block(mode: @mode, cache_hit: cache_hit, output_path: output_path, metadata: metadata)
|
|
245
|
+
else
|
|
246
|
+
output_path
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def cache_metadata(manifest, short_key, content, existing_metadata = nil)
|
|
251
|
+
base = existing_metadata || {}
|
|
252
|
+
base.merge(
|
|
253
|
+
"schema" => Ace::Compressor::Models::ContextPack::SCHEMA,
|
|
254
|
+
"mode" => @mode,
|
|
255
|
+
"key" => manifest["key"],
|
|
256
|
+
"short_key" => short_key,
|
|
257
|
+
"sources" => manifest["sources"],
|
|
258
|
+
"file_count" => manifest["sources"].size,
|
|
259
|
+
"original_bytes" => manifest["original_bytes"],
|
|
260
|
+
"original_lines" => manifest["original_lines"],
|
|
261
|
+
"packed_bytes" => content.bytesize,
|
|
262
|
+
"packed_lines" => content.lines.count
|
|
263
|
+
)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def hydrate_metadata(existing_metadata, manifest, canonical, content)
|
|
267
|
+
metadata = cache_metadata(manifest, canonical[:short_key], content, existing_metadata)
|
|
268
|
+
return metadata if metadata == existing_metadata
|
|
269
|
+
|
|
270
|
+
@cache_store.write_cache(
|
|
271
|
+
pack_path: canonical[:pack_path],
|
|
272
|
+
metadata_path: canonical[:metadata_path],
|
|
273
|
+
content: content,
|
|
274
|
+
metadata: metadata
|
|
275
|
+
)
|
|
276
|
+
metadata
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def compressor_for_mode(paths)
|
|
280
|
+
return CompactCompressor.new(paths, verbose: @verbose) if @mode == "compact"
|
|
281
|
+
return AgentCompressor.new(paths, verbose: @verbose) if @mode == "agent"
|
|
282
|
+
|
|
283
|
+
ExactCompressor.new(paths, verbose: @verbose, mode_label: @mode)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def refusal_lines(content)
|
|
287
|
+
content.to_s.lines.map(&:strip).select { |line| line.start_with?("REFUSAL|") }
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def fallback_lines(content)
|
|
291
|
+
content.to_s.lines.map(&:strip).select { |line| line.start_with?("FALLBACK|") }
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def shared_cache_hit?(shared_canonical)
|
|
295
|
+
return false unless shared_canonical
|
|
296
|
+
|
|
297
|
+
@cache_store.cache_hit?(pack_path: shared_canonical[:pack_path], metadata_path: shared_canonical[:metadata_path])
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def hydrate_from_shared_cache(manifest, canonical, shared_canonical)
|
|
301
|
+
content = @cache_store.read_pack(shared_canonical[:pack_path])
|
|
302
|
+
shared_metadata = @cache_store.read_metadata(shared_canonical[:metadata_path])
|
|
303
|
+
metadata = cache_metadata(manifest, canonical[:short_key], content, shared_metadata.merge("cache_scope" => "shared"))
|
|
304
|
+
@cache_store.write_cache(
|
|
305
|
+
pack_path: canonical[:pack_path],
|
|
306
|
+
metadata_path: canonical[:metadata_path],
|
|
307
|
+
content: content,
|
|
308
|
+
metadata: metadata
|
|
309
|
+
)
|
|
310
|
+
[content, metadata]
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "find"
|
|
4
|
+
require "pathname"
|
|
5
|
+
|
|
6
|
+
module Ace
|
|
7
|
+
module Compressor
|
|
8
|
+
module Organisms
|
|
9
|
+
class ExactCompressor
|
|
10
|
+
SUPPORTED_EXTENSIONS = %w[.md .markdown .mdown .mkd .txt .text].freeze
|
|
11
|
+
BINARY_SNIFF_BYTES = 4096
|
|
12
|
+
|
|
13
|
+
attr_reader :ignored_paths
|
|
14
|
+
|
|
15
|
+
def initialize(paths, verbose: false, mode_label: "exact")
|
|
16
|
+
@paths = Array(paths)
|
|
17
|
+
@verbose = verbose
|
|
18
|
+
@mode_label = mode_label
|
|
19
|
+
@parser = Ace::Compressor::Atoms::MarkdownParser.new
|
|
20
|
+
@transformer = Ace::Compressor::Atoms::CanonicalBlockTransformer
|
|
21
|
+
@ignored_paths = []
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def call
|
|
25
|
+
compress_sources(resolve_sources)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def resolve_sources
|
|
29
|
+
explicit_set = Set.new
|
|
30
|
+
resolved = []
|
|
31
|
+
|
|
32
|
+
@paths.each do |raw_path|
|
|
33
|
+
path = raw_path.to_s
|
|
34
|
+
expanded = File.expand_path(path)
|
|
35
|
+
|
|
36
|
+
if File.directory?(expanded)
|
|
37
|
+
directory_files = collect_supported_directory_files(expanded)
|
|
38
|
+
if directory_files.empty?
|
|
39
|
+
raise Ace::Compressor::Error,
|
|
40
|
+
"Directory has no supported markdown/text sources: #{path}. Supported extensions: #{SUPPORTED_EXTENSIONS.join(", ")}"
|
|
41
|
+
end
|
|
42
|
+
directory_files.each do |file|
|
|
43
|
+
next if explicit_set.include?(file)
|
|
44
|
+
|
|
45
|
+
explicit_set << file
|
|
46
|
+
resolved << file
|
|
47
|
+
end
|
|
48
|
+
elsif File.file?(expanded)
|
|
49
|
+
validate_explicit_file!(expanded, path)
|
|
50
|
+
next if explicit_set.include?(expanded)
|
|
51
|
+
|
|
52
|
+
explicit_set << expanded
|
|
53
|
+
resolved << expanded
|
|
54
|
+
else
|
|
55
|
+
raise Ace::Compressor::Error, "Input source not found: #{path}"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
sorted = resolved.sort
|
|
60
|
+
if sorted.empty?
|
|
61
|
+
raise Ace::Compressor::Error,
|
|
62
|
+
"No supported markdown/text sources found. Supported extensions: #{SUPPORTED_EXTENSIONS.join(", ")}"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
sorted
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def compress_sources(sources, source_paths: nil)
|
|
69
|
+
lines = [Ace::Compressor::Models::ContextPack.header("exact")]
|
|
70
|
+
|
|
71
|
+
sources.each do |source|
|
|
72
|
+
source_label = source_label(display_source(source, source_paths))
|
|
73
|
+
lines << Ace::Compressor::Models::ContextPack.file_line(source_label)
|
|
74
|
+
text = File.read(source)
|
|
75
|
+
if text.strip.empty?
|
|
76
|
+
raise Ace::Compressor::Error, "Input file is empty. #{mode_title} mode requires content: #{source}"
|
|
77
|
+
end
|
|
78
|
+
blocks = @parser.call(text)
|
|
79
|
+
if blocks.empty?
|
|
80
|
+
lines << text
|
|
81
|
+
next
|
|
82
|
+
end
|
|
83
|
+
lines.concat transformed_lines(source_label, blocks)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
lines.join("\n")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Compress a content string directly without filesystem access.
|
|
90
|
+
# Returns the compressed ContextPack text (without header line).
|
|
91
|
+
# @param text [String] markdown/text content to compress
|
|
92
|
+
# @param label [String] display label for the source (e.g. original file path)
|
|
93
|
+
# @return [String] compressed ContextPack records (no header)
|
|
94
|
+
def compress_text(text, label:)
|
|
95
|
+
return text if text.to_s.strip.empty?
|
|
96
|
+
|
|
97
|
+
blocks = @parser.call(text)
|
|
98
|
+
return text if blocks.empty?
|
|
99
|
+
|
|
100
|
+
lines = []
|
|
101
|
+
lines << Ace::Compressor::Models::ContextPack.file_line(label)
|
|
102
|
+
lines.concat @transformer.new(label).call(blocks)
|
|
103
|
+
lines.join("\n")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
def transformed_lines(source, blocks)
|
|
109
|
+
transformer = @transformer.new(source)
|
|
110
|
+
transformer.call(blocks)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def display_source(source, source_paths)
|
|
114
|
+
return source unless source_paths
|
|
115
|
+
|
|
116
|
+
source_paths[File.expand_path(source)] || source_paths[source] || source
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def collect_supported_directory_files(directory)
|
|
120
|
+
supported = []
|
|
121
|
+
|
|
122
|
+
Find.find(directory) do |entry|
|
|
123
|
+
next unless File.file?(entry)
|
|
124
|
+
|
|
125
|
+
expanded = File.expand_path(entry)
|
|
126
|
+
if supported_extension?(entry)
|
|
127
|
+
if binary_file?(expanded)
|
|
128
|
+
@ignored_paths << expanded if @verbose
|
|
129
|
+
else
|
|
130
|
+
supported << expanded
|
|
131
|
+
end
|
|
132
|
+
elsif @verbose
|
|
133
|
+
@ignored_paths << expanded
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
supported.sort
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def validate_explicit_file!(expanded, original)
|
|
141
|
+
unless supported_extension?(expanded)
|
|
142
|
+
if binary_file?(expanded)
|
|
143
|
+
raise Ace::Compressor::Error, "Binary input is not supported in #{mode_label} mode: #{original}"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
raise Ace::Compressor::Error,
|
|
147
|
+
"Unsupported explicit file: #{original}. Supported extensions: #{SUPPORTED_EXTENSIONS.join(", ")}"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
return unless binary_file?(expanded)
|
|
151
|
+
|
|
152
|
+
raise Ace::Compressor::Error, "Binary input is not supported in #{mode_label} mode: #{original}"
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def supported_extension?(path)
|
|
156
|
+
SUPPORTED_EXTENSIONS.include?(File.extname(path).downcase)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def binary_file?(path)
|
|
160
|
+
sample = File.binread(path, BINARY_SNIFF_BYTES) || ""
|
|
161
|
+
sample.include?("\x00")
|
|
162
|
+
rescue
|
|
163
|
+
false
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def source_label(source)
|
|
167
|
+
pathname = Pathname.new(source)
|
|
168
|
+
project_root = Pathname.new(Dir.pwd)
|
|
169
|
+
relative = pathname.relative_path_from(project_root).to_s
|
|
170
|
+
return relative unless relative.start_with?("..")
|
|
171
|
+
|
|
172
|
+
source
|
|
173
|
+
rescue ArgumentError
|
|
174
|
+
source
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def mode_label
|
|
178
|
+
@mode_label.to_s.strip.empty? ? "exact" : @mode_label
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def mode_title
|
|
182
|
+
mode_label.capitalize
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "tmpdir"
|
|
5
|
+
require "yaml"
|
|
6
|
+
require "ace/support/config"
|
|
7
|
+
|
|
8
|
+
require_relative "compressor/version"
|
|
9
|
+
require_relative "compressor/models/context_pack"
|
|
10
|
+
require_relative "compressor/atoms/markdown_parser"
|
|
11
|
+
require_relative "compressor/atoms/canonical_block_transformer"
|
|
12
|
+
require_relative "compressor/atoms/compact_policy_classifier"
|
|
13
|
+
require_relative "compressor/atoms/retention_reporter"
|
|
14
|
+
require_relative "compressor/molecules/cache_store"
|
|
15
|
+
require_relative "compressor/molecules/input_resolver"
|
|
16
|
+
require_relative "compressor/organisms/exact_compressor"
|
|
17
|
+
require_relative "compressor/organisms/compact_compressor"
|
|
18
|
+
require_relative "compressor/organisms/agent_compressor"
|
|
19
|
+
require_relative "compressor/organisms/compression_runner"
|
|
20
|
+
require_relative "compressor/organisms/benchmark_runner"
|
|
21
|
+
require_relative "compressor/cli"
|
|
22
|
+
|
|
23
|
+
module Ace
|
|
24
|
+
module Compressor
|
|
25
|
+
class Error < StandardError; end
|
|
26
|
+
|
|
27
|
+
# Compress a content string directly (Ruby API for other gems).
|
|
28
|
+
# @param text [String] markdown/text content
|
|
29
|
+
# @param label [String] display label (original file path)
|
|
30
|
+
# @param mode [String] compression mode ("exact" or "agent")
|
|
31
|
+
# @return [String] compressed ContextPack records
|
|
32
|
+
def self.compress_text(text, label:, mode: "exact")
|
|
33
|
+
case mode
|
|
34
|
+
when "exact"
|
|
35
|
+
compressor = Organisms::ExactCompressor.new([], mode_label: mode)
|
|
36
|
+
compressor.compress_text(text, label: label)
|
|
37
|
+
when "agent"
|
|
38
|
+
compress_text_via_file(text, label: label, mode: mode)
|
|
39
|
+
else
|
|
40
|
+
raise Error, "compress_text only supports exact and agent modes, got: #{mode}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.config
|
|
45
|
+
@config ||= begin
|
|
46
|
+
gem_root = Gem.loaded_specs["ace-compressor"]&.gem_dir ||
|
|
47
|
+
File.expand_path("../..", __dir__)
|
|
48
|
+
|
|
49
|
+
resolver = Ace::Support::Config.create(
|
|
50
|
+
config_dir: ".ace",
|
|
51
|
+
defaults_dir: ".ace-defaults",
|
|
52
|
+
gem_path: gem_root
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
config = resolver.resolve_namespace("compressor").to_h
|
|
56
|
+
config.empty? ? load_gem_defaults_fallback(gem_root) : config
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def self.reset_config!
|
|
61
|
+
@config = nil
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.load_gem_defaults_fallback(gem_root = nil)
|
|
65
|
+
root = gem_root || Gem.loaded_specs["ace-compressor"]&.gem_dir ||
|
|
66
|
+
File.expand_path("../..", __dir__)
|
|
67
|
+
defaults_path = File.join(root, ".ace-defaults", "compressor", "config.yml")
|
|
68
|
+
return {} unless File.exist?(defaults_path)
|
|
69
|
+
|
|
70
|
+
YAML.safe_load_file(defaults_path, aliases: true) || {}
|
|
71
|
+
rescue
|
|
72
|
+
{}
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.compress_text_via_file(text, label:, mode:)
|
|
76
|
+
return text if text.to_s.strip.empty?
|
|
77
|
+
|
|
78
|
+
Dir.mktmpdir("ace_compressor_text") do |tmpdir|
|
|
79
|
+
source = File.join(tmpdir, label.to_s)
|
|
80
|
+
FileUtils.mkdir_p(File.dirname(source))
|
|
81
|
+
File.write(source, text)
|
|
82
|
+
|
|
83
|
+
compressor = build_text_compressor(mode, source)
|
|
84
|
+
output = compressor.compress_sources([source])
|
|
85
|
+
strip_context_pack_header(output)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
private_class_method :compress_text_via_file
|
|
89
|
+
|
|
90
|
+
def self.build_text_compressor(mode, source)
|
|
91
|
+
case mode
|
|
92
|
+
when "agent"
|
|
93
|
+
Organisms::AgentCompressor.new([source])
|
|
94
|
+
else
|
|
95
|
+
raise Error, "Unsupported text compressor mode: #{mode}"
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
private_class_method :build_text_compressor
|
|
99
|
+
|
|
100
|
+
def self.strip_context_pack_header(output)
|
|
101
|
+
lines = output.to_s.lines
|
|
102
|
+
lines.shift if lines.first&.start_with?("H|ContextPack/")
|
|
103
|
+
lines.join.strip
|
|
104
|
+
end
|
|
105
|
+
private_class_method :strip_context_pack_header
|
|
106
|
+
|
|
107
|
+
private_class_method :load_gem_defaults_fallback
|
|
108
|
+
end
|
|
109
|
+
end
|