ace-compressor 0.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,315 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Compressor
5
+ module Organisms
6
+ class CompressionRunner
7
+ SUPPORTED_FORMATS = %w[path stdio stats].freeze
8
+ SUPPORTED_MODES = %w[exact compact agent].freeze
9
+ SUPPORTED_SOURCE_SCOPES = %w[merged per-source].freeze
10
+
11
+ def initialize(paths, mode:, source_scope: "merged", output: nil, format: nil, verbose: false)
12
+ @paths = Array(paths)
13
+ @mode = mode
14
+ @source_scope = source_scope.to_s
15
+ @output = output
16
+ @format = (format || Ace::Compressor.config["default_format"] || "path").to_s
17
+ @verbose = verbose
18
+ @cache_store = Ace::Compressor::Molecules::CacheStore.new
19
+ end
20
+
21
+ def call
22
+ raise Ace::Compressor::Error, "Unsupported format '#{@format}'. Use --format path, stdio, or stats" unless SUPPORTED_FORMATS.include?(@format)
23
+ unless SUPPORTED_MODES.include?(@mode)
24
+ raise Ace::Compressor::Error,
25
+ "Unsupported mode '#{@mode}'. Use --mode exact, --mode compact, or --mode agent"
26
+ end
27
+ unless SUPPORTED_SOURCE_SCOPES.include?(@source_scope)
28
+ raise Ace::Compressor::Error,
29
+ "Unsupported source scope '#{@source_scope}'. Use --source-scope merged or --source-scope per-source"
30
+ end
31
+
32
+ resolver = Ace::Compressor::Molecules::InputResolver.new(@paths)
33
+ begin
34
+ resolved_inputs = resolver.call
35
+ return run_per_source(resolved_inputs) if @source_scope == "per-source"
36
+
37
+ run_for_sources(resolved_inputs)
38
+ ensure
39
+ resolver.cleanup if resolver.respond_to?(:cleanup)
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def run_for_sources(resolved_inputs)
46
+ resolved_paths = Array(resolved_inputs).map { |entry| entry.fetch(:content_path) }
47
+ compressor = compressor_for_mode(resolved_paths)
48
+ sources = compressor.resolve_sources
49
+ source_metadata = build_source_metadata(sources, resolved_inputs)
50
+ manifest = @cache_store.manifest(mode: @mode, sources: source_metadata)
51
+
52
+ # If ace-bundle already compressed this content, override original sizes with
53
+ # the true uncompressed values and pass through without re-compression.
54
+ bundle_stats = extract_bundle_compression_stats(resolved_inputs)
55
+ if bundle_stats
56
+ manifest["original_bytes"] = bundle_stats["original_bytes"]
57
+ manifest["original_lines"] = bundle_stats["original_lines"]
58
+ end
59
+
60
+ canonical = @cache_store.canonical_paths(mode: @mode, sources: source_metadata, manifest_key: manifest["key"])
61
+ shared_manifest = @cache_store.shared_manifest(mode: @mode, sources: source_metadata)
62
+ shared_canonical = if shared_manifest
63
+ @cache_store.shared_canonical_paths(mode: @mode, sources: source_metadata, manifest_key: shared_manifest["key"])
64
+ end
65
+
66
+ cache_hit = @cache_store.cache_hit?(pack_path: canonical[:pack_path], metadata_path: canonical[:metadata_path])
67
+ content, metadata = if cache_hit
68
+ content = @cache_store.read_pack(canonical[:pack_path])
69
+ metadata = hydrate_metadata(
70
+ @cache_store.read_metadata(canonical[:metadata_path]),
71
+ manifest,
72
+ canonical,
73
+ content
74
+ )
75
+ [content, metadata]
76
+ elsif shared_cache_hit?(shared_canonical)
77
+ hydrate_from_shared_cache(manifest, canonical, shared_canonical)
78
+ elsif bundle_stats
79
+ build_passthrough_entry(sources, manifest, canonical, shared_canonical)
80
+ else
81
+ build_cache_entry(compressor, sources, source_metadata, manifest, canonical, shared_canonical)
82
+ end
83
+ cache_hit ||= shared_cache_hit?(shared_canonical)
84
+
85
+ output_path = @cache_store.output_path_for(
86
+ output: @output,
87
+ mode: @mode,
88
+ sources: source_metadata,
89
+ manifest_key: manifest["key"]
90
+ )
91
+
92
+ @cache_store.write_output(output_path, content) unless output_path == canonical[:pack_path]
93
+ refusal_lines = refusal_lines(content)
94
+ fallback_lines = fallback_lines(content)
95
+
96
+ {
97
+ console_output: format_console_output(
98
+ content: content,
99
+ cache_hit: cache_hit,
100
+ output_path: output_path,
101
+ metadata: metadata
102
+ ),
103
+ ignored_paths: compressor.ignored_paths,
104
+ output_path: output_path,
105
+ cache_hit: cache_hit,
106
+ metadata: metadata,
107
+ refusal_lines: refusal_lines,
108
+ fallback_lines: fallback_lines,
109
+ exit_code: refusal_lines.empty? ? 0 : 1
110
+ }
111
+ end
112
+
113
+ def run_per_source(resolved_inputs)
114
+ if resolved_inputs.size > 1 && output_file_target?
115
+ raise Ace::Compressor::Error,
116
+ "Per-source mode with multiple inputs requires --output to be a directory path"
117
+ end
118
+
119
+ results = resolved_inputs.map { |entry| run_for_sources([entry]) }
120
+ {
121
+ console_output: join_console_outputs(results.map { |result| result[:console_output] }),
122
+ ignored_paths: results.flat_map { |result| result[:ignored_paths] }.uniq,
123
+ output_path: results.first&.dig(:output_path),
124
+ output_paths: results.map { |result| result[:output_path] },
125
+ cache_hit: results.all? { |result| result[:cache_hit] },
126
+ metadata: results.map { |result| result[:metadata] },
127
+ refusal_lines: results.flat_map { |result| result[:refusal_lines] },
128
+ fallback_lines: results.flat_map { |result| result[:fallback_lines] },
129
+ exit_code: (results.any? { |result| result[:exit_code].to_i.nonzero? }) ? 1 : 0
130
+ }
131
+ end
132
+
133
+ def join_console_outputs(outputs)
134
+ values = Array(outputs).map(&:to_s).reject(&:empty?)
135
+ return "" if values.empty?
136
+
137
+ separator = (@format == "stats") ? "\n\n" : "\n"
138
+ values.join(separator)
139
+ end
140
+
141
+ def output_file_target?
142
+ return false if @output.nil? || @output.to_s.strip.empty?
143
+ return false if @output.end_with?(File::SEPARATOR)
144
+
145
+ !Dir.exist?(File.expand_path(@output))
146
+ end
147
+
148
+ def build_passthrough_entry(sources, manifest, canonical, shared_canonical = nil)
149
+ raw = sources.map { |s| File.read(s) }.join("\n")
150
+ header = Ace::Compressor::Models::ContextPack.header(@mode)
151
+ content = "#{header}\n#{raw}"
152
+ metadata = cache_metadata(manifest, canonical[:short_key], content)
153
+ @cache_store.write_cache(
154
+ pack_path: canonical[:pack_path],
155
+ metadata_path: canonical[:metadata_path],
156
+ content: content,
157
+ metadata: metadata
158
+ )
159
+ if shared_canonical
160
+ shared_metadata = cache_metadata(manifest, shared_canonical[:short_key], content, metadata.merge("cache_scope" => "shared"))
161
+ @cache_store.write_cache(
162
+ pack_path: shared_canonical[:pack_path],
163
+ metadata_path: shared_canonical[:metadata_path],
164
+ content: content,
165
+ metadata: shared_metadata
166
+ )
167
+ end
168
+ [content, metadata]
169
+ end
170
+
171
+ def extract_bundle_compression_stats(resolved_inputs)
172
+ Array(resolved_inputs).each do |entry|
173
+ stats = entry[:bundle_compression_stats]
174
+ return stats if stats
175
+ end
176
+ nil
177
+ end
178
+
179
+ def build_cache_entry(compressor, sources, source_metadata, manifest, canonical, shared_canonical = nil)
180
+ content = compress_content(compressor, sources, source_metadata)
181
+ metadata = cache_metadata(manifest, canonical[:short_key], content)
182
+ @cache_store.write_cache(
183
+ pack_path: canonical[:pack_path],
184
+ metadata_path: canonical[:metadata_path],
185
+ content: content,
186
+ metadata: metadata
187
+ )
188
+ if shared_canonical
189
+ shared_metadata = cache_metadata(manifest, shared_canonical[:short_key], content, metadata.merge("cache_scope" => "shared"))
190
+ @cache_store.write_cache(
191
+ pack_path: shared_canonical[:pack_path],
192
+ metadata_path: shared_canonical[:metadata_path],
193
+ content: content,
194
+ metadata: shared_metadata
195
+ )
196
+ end
197
+ [content, metadata]
198
+ end
199
+
200
+ def compress_content(compressor, sources, source_metadata)
201
+ source_paths = source_metadata.each_with_object({}) do |entry, hash|
202
+ hash[entry.fetch(:content_path)] = entry.fetch(:source_path)
203
+ end
204
+
205
+ parameters = compressor.method(:compress_sources).parameters
206
+ supports_source_paths = parameters.any? do |type, name|
207
+ [:key, :keyreq].include?(type) && name == :source_paths || type == :keyrest
208
+ end
209
+
210
+ return compressor.compress_sources(sources, source_paths: source_paths) if supports_source_paths
211
+
212
+ compressor.compress_sources(sources)
213
+ end
214
+
215
+ def build_source_metadata(sources, resolved_inputs)
216
+ identities = Array(resolved_inputs).each_with_object({}) do |entry, hash|
217
+ hash[File.expand_path(entry.fetch(:content_path))] = entry
218
+ end
219
+
220
+ Array(sources).map do |source|
221
+ expanded = File.expand_path(source)
222
+ resolved = identities[expanded]
223
+ if resolved
224
+ {
225
+ content_path: expanded,
226
+ source_path: resolved.fetch(:source_path),
227
+ source_kind: resolved.fetch(:source_kind)
228
+ }
229
+ else
230
+ {
231
+ content_path: expanded,
232
+ source_path: expanded,
233
+ source_kind: "file"
234
+ }
235
+ end
236
+ end
237
+ end
238
+
239
+ def format_console_output(content:, cache_hit:, output_path:, metadata:)
240
+ case @format
241
+ when "stdio"
242
+ content
243
+ when "stats"
244
+ @cache_store.stats_block(mode: @mode, cache_hit: cache_hit, output_path: output_path, metadata: metadata)
245
+ else
246
+ output_path
247
+ end
248
+ end
249
+
250
+ def cache_metadata(manifest, short_key, content, existing_metadata = nil)
251
+ base = existing_metadata || {}
252
+ base.merge(
253
+ "schema" => Ace::Compressor::Models::ContextPack::SCHEMA,
254
+ "mode" => @mode,
255
+ "key" => manifest["key"],
256
+ "short_key" => short_key,
257
+ "sources" => manifest["sources"],
258
+ "file_count" => manifest["sources"].size,
259
+ "original_bytes" => manifest["original_bytes"],
260
+ "original_lines" => manifest["original_lines"],
261
+ "packed_bytes" => content.bytesize,
262
+ "packed_lines" => content.lines.count
263
+ )
264
+ end
265
+
266
+ def hydrate_metadata(existing_metadata, manifest, canonical, content)
267
+ metadata = cache_metadata(manifest, canonical[:short_key], content, existing_metadata)
268
+ return metadata if metadata == existing_metadata
269
+
270
+ @cache_store.write_cache(
271
+ pack_path: canonical[:pack_path],
272
+ metadata_path: canonical[:metadata_path],
273
+ content: content,
274
+ metadata: metadata
275
+ )
276
+ metadata
277
+ end
278
+
279
+ def compressor_for_mode(paths)
280
+ return CompactCompressor.new(paths, verbose: @verbose) if @mode == "compact"
281
+ return AgentCompressor.new(paths, verbose: @verbose) if @mode == "agent"
282
+
283
+ ExactCompressor.new(paths, verbose: @verbose, mode_label: @mode)
284
+ end
285
+
286
+ def refusal_lines(content)
287
+ content.to_s.lines.map(&:strip).select { |line| line.start_with?("REFUSAL|") }
288
+ end
289
+
290
+ def fallback_lines(content)
291
+ content.to_s.lines.map(&:strip).select { |line| line.start_with?("FALLBACK|") }
292
+ end
293
+
294
+ def shared_cache_hit?(shared_canonical)
295
+ return false unless shared_canonical
296
+
297
+ @cache_store.cache_hit?(pack_path: shared_canonical[:pack_path], metadata_path: shared_canonical[:metadata_path])
298
+ end
299
+
300
+ def hydrate_from_shared_cache(manifest, canonical, shared_canonical)
301
+ content = @cache_store.read_pack(shared_canonical[:pack_path])
302
+ shared_metadata = @cache_store.read_metadata(shared_canonical[:metadata_path])
303
+ metadata = cache_metadata(manifest, canonical[:short_key], content, shared_metadata.merge("cache_scope" => "shared"))
304
+ @cache_store.write_cache(
305
+ pack_path: canonical[:pack_path],
306
+ metadata_path: canonical[:metadata_path],
307
+ content: content,
308
+ metadata: metadata
309
+ )
310
+ [content, metadata]
311
+ end
312
+ end
313
+ end
314
+ end
315
+ end
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "find"
4
+ require "pathname"
5
+
6
+ module Ace
7
+ module Compressor
8
+ module Organisms
9
+ class ExactCompressor
10
+ SUPPORTED_EXTENSIONS = %w[.md .markdown .mdown .mkd .txt .text].freeze
11
+ BINARY_SNIFF_BYTES = 4096
12
+
13
+ attr_reader :ignored_paths
14
+
15
+ def initialize(paths, verbose: false, mode_label: "exact")
16
+ @paths = Array(paths)
17
+ @verbose = verbose
18
+ @mode_label = mode_label
19
+ @parser = Ace::Compressor::Atoms::MarkdownParser.new
20
+ @transformer = Ace::Compressor::Atoms::CanonicalBlockTransformer
21
+ @ignored_paths = []
22
+ end
23
+
24
+ def call
25
+ compress_sources(resolve_sources)
26
+ end
27
+
28
+ def resolve_sources
29
+ explicit_set = Set.new
30
+ resolved = []
31
+
32
+ @paths.each do |raw_path|
33
+ path = raw_path.to_s
34
+ expanded = File.expand_path(path)
35
+
36
+ if File.directory?(expanded)
37
+ directory_files = collect_supported_directory_files(expanded)
38
+ if directory_files.empty?
39
+ raise Ace::Compressor::Error,
40
+ "Directory has no supported markdown/text sources: #{path}. Supported extensions: #{SUPPORTED_EXTENSIONS.join(", ")}"
41
+ end
42
+ directory_files.each do |file|
43
+ next if explicit_set.include?(file)
44
+
45
+ explicit_set << file
46
+ resolved << file
47
+ end
48
+ elsif File.file?(expanded)
49
+ validate_explicit_file!(expanded, path)
50
+ next if explicit_set.include?(expanded)
51
+
52
+ explicit_set << expanded
53
+ resolved << expanded
54
+ else
55
+ raise Ace::Compressor::Error, "Input source not found: #{path}"
56
+ end
57
+ end
58
+
59
+ sorted = resolved.sort
60
+ if sorted.empty?
61
+ raise Ace::Compressor::Error,
62
+ "No supported markdown/text sources found. Supported extensions: #{SUPPORTED_EXTENSIONS.join(", ")}"
63
+ end
64
+
65
+ sorted
66
+ end
67
+
68
+ def compress_sources(sources, source_paths: nil)
69
+ lines = [Ace::Compressor::Models::ContextPack.header("exact")]
70
+
71
+ sources.each do |source|
72
+ source_label = source_label(display_source(source, source_paths))
73
+ lines << Ace::Compressor::Models::ContextPack.file_line(source_label)
74
+ text = File.read(source)
75
+ if text.strip.empty?
76
+ raise Ace::Compressor::Error, "Input file is empty. #{mode_title} mode requires content: #{source}"
77
+ end
78
+ blocks = @parser.call(text)
79
+ if blocks.empty?
80
+ lines << text
81
+ next
82
+ end
83
+ lines.concat transformed_lines(source_label, blocks)
84
+ end
85
+
86
+ lines.join("\n")
87
+ end
88
+
89
+ # Compress a content string directly without filesystem access.
90
+ # Returns the compressed ContextPack text (without header line).
91
+ # @param text [String] markdown/text content to compress
92
+ # @param label [String] display label for the source (e.g. original file path)
93
+ # @return [String] compressed ContextPack records (no header)
94
+ def compress_text(text, label:)
95
+ return text if text.to_s.strip.empty?
96
+
97
+ blocks = @parser.call(text)
98
+ return text if blocks.empty?
99
+
100
+ lines = []
101
+ lines << Ace::Compressor::Models::ContextPack.file_line(label)
102
+ lines.concat @transformer.new(label).call(blocks)
103
+ lines.join("\n")
104
+ end
105
+
106
+ private
107
+
108
+ def transformed_lines(source, blocks)
109
+ transformer = @transformer.new(source)
110
+ transformer.call(blocks)
111
+ end
112
+
113
+ def display_source(source, source_paths)
114
+ return source unless source_paths
115
+
116
+ source_paths[File.expand_path(source)] || source_paths[source] || source
117
+ end
118
+
119
+ def collect_supported_directory_files(directory)
120
+ supported = []
121
+
122
+ Find.find(directory) do |entry|
123
+ next unless File.file?(entry)
124
+
125
+ expanded = File.expand_path(entry)
126
+ if supported_extension?(entry)
127
+ if binary_file?(expanded)
128
+ @ignored_paths << expanded if @verbose
129
+ else
130
+ supported << expanded
131
+ end
132
+ elsif @verbose
133
+ @ignored_paths << expanded
134
+ end
135
+ end
136
+
137
+ supported.sort
138
+ end
139
+
140
+ def validate_explicit_file!(expanded, original)
141
+ unless supported_extension?(expanded)
142
+ if binary_file?(expanded)
143
+ raise Ace::Compressor::Error, "Binary input is not supported in #{mode_label} mode: #{original}"
144
+ end
145
+
146
+ raise Ace::Compressor::Error,
147
+ "Unsupported explicit file: #{original}. Supported extensions: #{SUPPORTED_EXTENSIONS.join(", ")}"
148
+ end
149
+
150
+ return unless binary_file?(expanded)
151
+
152
+ raise Ace::Compressor::Error, "Binary input is not supported in #{mode_label} mode: #{original}"
153
+ end
154
+
155
+ def supported_extension?(path)
156
+ SUPPORTED_EXTENSIONS.include?(File.extname(path).downcase)
157
+ end
158
+
159
+ def binary_file?(path)
160
+ sample = File.binread(path, BINARY_SNIFF_BYTES) || ""
161
+ sample.include?("\x00")
162
+ rescue
163
+ false
164
+ end
165
+
166
+ def source_label(source)
167
+ pathname = Pathname.new(source)
168
+ project_root = Pathname.new(Dir.pwd)
169
+ relative = pathname.relative_path_from(project_root).to_s
170
+ return relative unless relative.start_with?("..")
171
+
172
+ source
173
+ rescue ArgumentError
174
+ source
175
+ end
176
+
177
+ def mode_label
178
+ @mode_label.to_s.strip.empty? ? "exact" : @mode_label
179
+ end
180
+
181
+ def mode_title
182
+ mode_label.capitalize
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Compressor
5
+ VERSION = "0.24.1"
6
+ end
7
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "tmpdir"
5
+ require "yaml"
6
+ require "ace/support/config"
7
+
8
+ require_relative "compressor/version"
9
+ require_relative "compressor/models/context_pack"
10
+ require_relative "compressor/atoms/markdown_parser"
11
+ require_relative "compressor/atoms/canonical_block_transformer"
12
+ require_relative "compressor/atoms/compact_policy_classifier"
13
+ require_relative "compressor/atoms/retention_reporter"
14
+ require_relative "compressor/molecules/cache_store"
15
+ require_relative "compressor/molecules/input_resolver"
16
+ require_relative "compressor/organisms/exact_compressor"
17
+ require_relative "compressor/organisms/compact_compressor"
18
+ require_relative "compressor/organisms/agent_compressor"
19
+ require_relative "compressor/organisms/compression_runner"
20
+ require_relative "compressor/organisms/benchmark_runner"
21
+ require_relative "compressor/cli"
22
+
23
+ module Ace
24
+ module Compressor
25
+ class Error < StandardError; end
26
+
27
+ # Compress a content string directly (Ruby API for other gems).
28
+ # @param text [String] markdown/text content
29
+ # @param label [String] display label (original file path)
30
+ # @param mode [String] compression mode ("exact" or "agent")
31
+ # @return [String] compressed ContextPack records
32
+ def self.compress_text(text, label:, mode: "exact")
33
+ case mode
34
+ when "exact"
35
+ compressor = Organisms::ExactCompressor.new([], mode_label: mode)
36
+ compressor.compress_text(text, label: label)
37
+ when "agent"
38
+ compress_text_via_file(text, label: label, mode: mode)
39
+ else
40
+ raise Error, "compress_text only supports exact and agent modes, got: #{mode}"
41
+ end
42
+ end
43
+
44
+ def self.config
45
+ @config ||= begin
46
+ gem_root = Gem.loaded_specs["ace-compressor"]&.gem_dir ||
47
+ File.expand_path("../..", __dir__)
48
+
49
+ resolver = Ace::Support::Config.create(
50
+ config_dir: ".ace",
51
+ defaults_dir: ".ace-defaults",
52
+ gem_path: gem_root
53
+ )
54
+
55
+ config = resolver.resolve_namespace("compressor").to_h
56
+ config.empty? ? load_gem_defaults_fallback(gem_root) : config
57
+ end
58
+ end
59
+
60
+ def self.reset_config!
61
+ @config = nil
62
+ end
63
+
64
+ def self.load_gem_defaults_fallback(gem_root = nil)
65
+ root = gem_root || Gem.loaded_specs["ace-compressor"]&.gem_dir ||
66
+ File.expand_path("../..", __dir__)
67
+ defaults_path = File.join(root, ".ace-defaults", "compressor", "config.yml")
68
+ return {} unless File.exist?(defaults_path)
69
+
70
+ YAML.safe_load_file(defaults_path, aliases: true) || {}
71
+ rescue
72
+ {}
73
+ end
74
+
75
+ def self.compress_text_via_file(text, label:, mode:)
76
+ return text if text.to_s.strip.empty?
77
+
78
+ Dir.mktmpdir("ace_compressor_text") do |tmpdir|
79
+ source = File.join(tmpdir, label.to_s)
80
+ FileUtils.mkdir_p(File.dirname(source))
81
+ File.write(source, text)
82
+
83
+ compressor = build_text_compressor(mode, source)
84
+ output = compressor.compress_sources([source])
85
+ strip_context_pack_header(output)
86
+ end
87
+ end
88
+ private_class_method :compress_text_via_file
89
+
90
+ def self.build_text_compressor(mode, source)
91
+ case mode
92
+ when "agent"
93
+ Organisms::AgentCompressor.new([source])
94
+ else
95
+ raise Error, "Unsupported text compressor mode: #{mode}"
96
+ end
97
+ end
98
+ private_class_method :build_text_compressor
99
+
100
+ def self.strip_context_pack_header(output)
101
+ lines = output.to_s.lines
102
+ lines.shift if lines.first&.start_with?("H|ContextPack/")
103
+ lines.join.strip
104
+ end
105
+ private_class_method :strip_context_pack_header
106
+
107
+ private_class_method :load_gem_defaults_fallback
108
+ end
109
+ end