ace-compressor 0.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,325 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "open3"
5
+
6
+ module Ace
7
+ module Compressor
8
+ module Organisms
9
+ # Agent mode keeps exact ContextPack structure and asks the LLM to rewrite
10
+ # only compressible payloads. The model never emits headers, file markers,
11
+ # or section markers.
12
+ class AgentCompressor
13
+ PAYLOAD_PREFIXES = ["SUMMARY|", "FACT|"].freeze
14
+ PROTECTED_PREFIXES = [
15
+ "RULE|", "CONSTRAINT|", "CMD|", "TABLE|", "U|", "CODE|", "PROBLEMS|",
16
+ "EXAMPLE|", "EXAMPLE_REF|", "FILES|", "TREE|", "LOSS|"
17
+ ].freeze
18
+ LIST_REWRITE_MIN_ITEMS = 5
19
+ LIST_REWRITE_MIN_BYTES = 140
20
+ LIST_STOPWORDS = %w[
21
+ a an and as at by for from in into is of on or that the this to via with within
22
+ ].freeze
23
+ LIST_TOKEN_MAP = {
24
+ "architecture" => "arch",
25
+ "architectural" => "arch",
26
+ "configuration" => "config",
27
+ "documentation" => "docs",
28
+ "generation" => "gen",
29
+ "management" => "mgmt",
30
+ "repository" => "repo",
31
+ "repositories" => "repos",
32
+ "development" => "dev",
33
+ "integration" => "integr",
34
+ "execution" => "exec",
35
+ "reporting" => "reports",
36
+ "organization" => "org",
37
+ "organizations" => "orgs",
38
+ "capabilities" => "caps",
39
+ "capability" => "cap",
40
+ "foundation" => "base",
41
+ "tracking" => "track",
42
+ "powered" => "pwr",
43
+ "detected" => "detect",
44
+ "matching" => "match"
45
+ }.freeze
46
+
47
+ attr_reader :ignored_paths
48
+
49
+ def initialize(paths, verbose: false, shell_runner: nil)
50
+ @exact = ExactCompressor.new(paths, verbose: verbose, mode_label: "agent")
51
+ @shell_runner = shell_runner || method(:default_shell_runner)
52
+ end
53
+
54
+ def call
55
+ compress_sources(resolve_sources)
56
+ end
57
+
58
+ def resolve_sources
59
+ @exact.resolve_sources
60
+ end
61
+
62
+ def ignored_paths
63
+ @exact.ignored_paths
64
+ end
65
+
66
+ def compress_sources(sources, source_paths: nil)
67
+ exact_output = @exact.compress_sources(sources, source_paths: source_paths)
68
+ exact_lines = normalize_output_lines(exact_output)
69
+ return exact_output if exact_lines.empty?
70
+
71
+ job = build_rewrite_job(exact_lines)
72
+ rewrites = rewrite_payloads(job[:records])
73
+ rebuild_output(job[:entries], rewrites)
74
+ end
75
+
76
+ private
77
+
78
+ def build_rewrite_job(exact_lines)
79
+ entries = [Ace::Compressor::Models::ContextPack.header("agent")]
80
+ records = []
81
+ current_file = nil
82
+ current_section = nil
83
+ next_id = 1
84
+
85
+ Array(exact_lines).drop(1).each do |line|
86
+ if line.start_with?("FILE|")
87
+ current_file = line.sub("FILE|", "").strip
88
+ entries << line
89
+ next
90
+ end
91
+
92
+ if line.start_with?("SEC|")
93
+ current_section = line.sub("SEC|", "").strip
94
+ entries << line
95
+ next
96
+ end
97
+
98
+ record = rewrite_record_for(line, current_file, current_section, next_id)
99
+ if record
100
+ records << record
101
+ entries << {rewrite_id: record[:id], original_line: line}
102
+ next_id += 1
103
+ else
104
+ entries << line
105
+ end
106
+ end
107
+
108
+ {entries: entries, records: records}
109
+ end
110
+
111
+ def rewrite_record_for(line, current_file, current_section, next_id)
112
+ return nil if line.start_with?(*PROTECTED_PREFIXES)
113
+
114
+ if line.start_with?(*PAYLOAD_PREFIXES)
115
+ type = line.split("|", 2).first
116
+ return {
117
+ id: record_id(next_id),
118
+ type: type,
119
+ file: current_file,
120
+ section: current_section,
121
+ payload: record_payload(line)
122
+ }
123
+ end
124
+
125
+ return nil unless line.start_with?("LIST|")
126
+
127
+ name, items = parse_list_line(line)
128
+ return nil unless list_rewrite_eligible?(line, items)
129
+
130
+ {
131
+ id: record_id(next_id),
132
+ type: "LIST",
133
+ file: current_file,
134
+ section: current_section,
135
+ name: name,
136
+ items: items
137
+ }
138
+ end
139
+
140
+ def list_rewrite_eligible?(line, items)
141
+ return false if items.empty?
142
+
143
+ items.length >= LIST_REWRITE_MIN_ITEMS || line.bytesize >= LIST_REWRITE_MIN_BYTES
144
+ end
145
+
146
+ def rewrite_payloads(records)
147
+ return {} if records.empty?
148
+
149
+ prompt = compose_prompt(records)
150
+ response = invoke_agent(prompt)
151
+ extract_rewrites(records, response)
152
+ rescue Ace::Compressor::Error, JSON::ParserError
153
+ {}
154
+ end
155
+
156
+ def compose_prompt(records)
157
+ <<~PROMPT
158
+ #{agent_template}
159
+
160
+ <records_json>
161
+ #{JSON.pretty_generate("records" => prompt_records(records))}
162
+ </records_json>
163
+ PROMPT
164
+ end
165
+
166
+ def prompt_records(records)
167
+ Array(records).map do |record|
168
+ base = {
169
+ "id" => record[:id],
170
+ "type" => record[:type],
171
+ "file" => record[:file],
172
+ "section" => record[:section]
173
+ }
174
+
175
+ if record[:type] == "LIST"
176
+ base.merge("items" => record[:items])
177
+ else
178
+ base.merge("payload" => record[:payload])
179
+ end
180
+ end
181
+ end
182
+
183
+ def agent_template
184
+ @agent_template ||= execute_command(["ace-bundle", agent_template_uri]).strip
185
+ end
186
+
187
+ def invoke_agent(prompt)
188
+ execute_command(["ace-llm", agent_model, prompt]).strip
189
+ end
190
+
191
+ def extract_rewrites(records, response)
192
+ parsed = parse_agent_response(response)
193
+ rewrites = {}
194
+ records_by_id = Array(records).each_with_object({}) { |record, hash| hash[record[:id]] = record }
195
+
196
+ Array(parsed.fetch("records", [])).each do |candidate|
197
+ original = records_by_id[candidate["id"]]
198
+ next unless original
199
+
200
+ rewrite = normalized_rewrite(original, candidate)
201
+ rewrites[original[:id]] = rewrite if rewrite
202
+ end
203
+
204
+ rewrites
205
+ end
206
+
207
+ def parse_agent_response(response)
208
+ payload = response.to_s.strip
209
+ payload = payload.sub(/\A```(?:json)?\s*/i, "")
210
+ payload = payload.sub(/\s*```\z/, "")
211
+ JSON.parse(payload)
212
+ end
213
+
214
+ def normalized_rewrite(original, candidate)
215
+ case original[:type]
216
+ when "SUMMARY", "FACT"
217
+ payload = normalize_payload_text(candidate["payload"])
218
+ return nil if payload.empty?
219
+
220
+ {type: original[:type], payload: payload}
221
+ when "LIST"
222
+ items = Array(candidate["items"]).map { |item| normalize_list_item(item) }
223
+ return nil unless items.length == original[:items].length
224
+ return nil if items.any?(&:empty?)
225
+
226
+ {type: "LIST", name: original[:name], items: items}
227
+ end
228
+ end
229
+
230
+ def rebuild_output(entries, rewrites)
231
+ Array(entries).map do |entry|
232
+ next entry if entry.is_a?(String)
233
+
234
+ rewrite = rewrites[entry[:rewrite_id]]
235
+ rewrite ? render_rewrite(rewrite) : entry[:original_line]
236
+ end.join("\n")
237
+ end
238
+
239
+ def render_rewrite(rewrite)
240
+ case rewrite[:type]
241
+ when "SUMMARY", "FACT"
242
+ "#{rewrite[:type]}|#{rewrite[:payload]}"
243
+ when "LIST"
244
+ "LIST|#{rewrite[:name]}|[#{rewrite[:items].join(",")}]"
245
+ else
246
+ raise Ace::Compressor::Error, "Unsupported agent rewrite type: #{rewrite[:type]}"
247
+ end
248
+ end
249
+
250
+ def parse_list_line(line)
251
+ _prefix, name, raw_items = line.split("|", 3)
252
+ items = raw_items.to_s.sub(/\A\[/, "").sub(/\]\z/, "").split(",").map(&:strip).reject(&:empty?)
253
+ [name.to_s, items]
254
+ end
255
+
256
+ def record_id(index)
257
+ "r#{index}"
258
+ end
259
+
260
+ def normalize_output_lines(output)
261
+ output.to_s.lines.map(&:strip).reject(&:empty?)
262
+ end
263
+
264
+ def normalize_payload_text(text)
265
+ text.to_s.gsub(/\s+/, " ").strip
266
+ end
267
+
268
+ def normalize_list_item(text)
269
+ tokens = text.to_s.downcase.gsub(/[^a-z0-9]+/, "_").split("_").reject(&:empty?)
270
+ tokens = compact_list_tokens(tokens)
271
+ tokens.join("_")
272
+ end
273
+
274
+ def compact_list_tokens(tokens)
275
+ compacted = Array(tokens).filter_map do |token|
276
+ next if LIST_STOPWORDS.include?(token)
277
+
278
+ LIST_TOKEN_MAP.fetch(token, token)
279
+ end
280
+
281
+ compacted = compacted.each_with_object([]) do |token, result|
282
+ result << token unless result.last == token
283
+ end
284
+
285
+ compacted.empty? ? Array(tokens).first(1) : compacted
286
+ end
287
+
288
+ def record_payload(line)
289
+ line.to_s.split("|", 2).last.to_s
290
+ end
291
+
292
+ def agent_model
293
+ @agent_model ||= begin
294
+ config = Ace::Compressor.config
295
+ model = config["agent_model"].to_s.strip
296
+ model = config["agent_provider"].to_s.strip if model.empty?
297
+ raise Ace::Compressor::Error, "Agent model not configured: set compressor.agent_model" if model.empty?
298
+ model
299
+ end
300
+ end
301
+
302
+ def agent_template_uri
303
+ @agent_template_uri ||= begin
304
+ template_uri = Ace::Compressor.config["agent_template_uri"].to_s.strip
305
+ raise Ace::Compressor::Error, "Agent template URI not configured: set compressor.agent_template_uri" if template_uri.empty?
306
+ template_uri
307
+ end
308
+ end
309
+
310
+ def execute_command(command)
311
+ stdout, stderr, status = @shell_runner.call(command)
312
+ return stdout if status.success?
313
+
314
+ details = stderr.to_s.strip
315
+ details = stdout.to_s.strip if details.empty?
316
+ raise Ace::Compressor::Error, "#{command.first} failed: #{details}"
317
+ end
318
+
319
+ def default_shell_runner(command)
320
+ Open3.capture3(*command)
321
+ end
322
+ end
323
+ end
324
+ end
325
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "pathname"
5
+
6
+ module Ace
7
+ module Compressor
8
+ module Organisms
9
+ class BenchmarkRunner
10
+ SUPPORTED_FORMATS = %w[table json].freeze
11
+ SUPPORTED_MODES = CompressionRunner::SUPPORTED_MODES
12
+
13
+ def initialize(paths, modes: nil, format: "table", verbose: false)
14
+ @paths = Array(paths)
15
+ @modes = parse_modes(modes)
16
+ @format = (format || "table").to_s
17
+ @verbose = verbose
18
+ @resolver = ExactCompressor.new(paths, verbose: verbose)
19
+ @retention = Ace::Compressor::Atoms::RetentionReporter.new
20
+ end
21
+
22
+ def call
23
+ raise Ace::Compressor::Error, "Unsupported format '#{@format}'. Use --format table or json" unless SUPPORTED_FORMATS.include?(@format)
24
+
25
+ sources = @resolver.resolve_sources
26
+ per_source = sources.map { |source| benchmark_source(source) }
27
+ {
28
+ "sources" => per_source,
29
+ "summary" => summarize(per_source)
30
+ }
31
+ end
32
+
33
+ def render(report)
34
+ return JSON.pretty_generate(report) if @format == "json"
35
+
36
+ render_table(report.fetch("sources"))
37
+ end
38
+
39
+ private
40
+
41
+ def parse_modes(modes)
42
+ values = Array(modes || SUPPORTED_MODES)
43
+ values = values.flat_map { |value| value.to_s.split(",") }.map(&:strip).reject(&:empty?)
44
+ values = SUPPORTED_MODES if values.empty?
45
+ unknown = values - SUPPORTED_MODES
46
+ raise Ace::Compressor::Error, "Unsupported modes: #{unknown.join(", ")}" unless unknown.empty?
47
+
48
+ values.uniq
49
+ end
50
+
51
+ def benchmark_source(source)
52
+ exact_result = run_mode(source, "exact")
53
+ reference_content = exact_result["content"]
54
+
55
+ {
56
+ "source" => relative_source(source),
57
+ "modes" => @modes.map { |mode| mode_report(source, mode, reference_content, exact_result) }
58
+ }
59
+ end
60
+
61
+ def mode_report(source, mode, reference_content, exact_result)
62
+ result = (mode == "exact") ? exact_result : run_mode(source, mode)
63
+ return result.merge("mode" => mode) unless result["status"] == "ok"
64
+
65
+ metrics = @retention.compare(reference_content: reference_content, candidate_content: result.fetch("content"))
66
+ metadata = result.fetch("metadata")
67
+
68
+ {
69
+ "mode" => mode,
70
+ "status" => "ok",
71
+ "cache" => result.fetch("cache_hit") ? "hit" : "miss",
72
+ "output_path" => result.fetch("output_path"),
73
+ "original_bytes" => metadata.fetch("original_bytes"),
74
+ "original_lines" => metadata.fetch("original_lines"),
75
+ "packed_bytes" => metadata.fetch("packed_bytes"),
76
+ "packed_lines" => metadata.fetch("packed_lines"),
77
+ "byte_change_percent" => percent_change(metadata.fetch("original_bytes"), metadata.fetch("packed_bytes")),
78
+ "line_change_percent" => percent_change(metadata.fetch("original_lines"), metadata.fetch("packed_lines")),
79
+ "coverage" => metrics
80
+ }
81
+ end
82
+
83
+ def run_mode(source, mode)
84
+ result = CompressionRunner.new([source], mode: mode, format: "path", verbose: @verbose).call
85
+ {
86
+ "status" => result[:exit_code].to_i.zero? ? "ok" : "nonzero",
87
+ "cache_hit" => result[:cache_hit],
88
+ "metadata" => result[:metadata],
89
+ "output_path" => result[:output_path],
90
+ "content" => File.read(result[:output_path]),
91
+ "refusal_lines" => result[:refusal_lines],
92
+ "fallback_lines" => result[:fallback_lines]
93
+ }
94
+ rescue => e
95
+ {
96
+ "status" => "error",
97
+ "error" => e.message
98
+ }
99
+ end
100
+
101
+ def render_table(per_source)
102
+ rows = [["Source", "Mode", "Cache", "Bytes", "Lines", "Secs", "Prot", "Struct", "Loss", "Status"]]
103
+
104
+ per_source.each do |entry|
105
+ entry.fetch("modes").each do |mode|
106
+ if mode.fetch("status") != "ok"
107
+ rows << [entry.fetch("source"), mode.fetch("mode"), "-", "-", "-", "-", "-", "-", "-", mode.fetch("status")]
108
+ next
109
+ end
110
+
111
+ coverage = mode.fetch("coverage")
112
+ rows << [
113
+ entry.fetch("source"),
114
+ mode.fetch("mode"),
115
+ mode.fetch("cache"),
116
+ format_change(mode.fetch("byte_change_percent")),
117
+ format_change(mode.fetch("line_change_percent")),
118
+ coverage_ratio(coverage.fetch("sections")),
119
+ coverage_ratio(coverage.fetch("protected")),
120
+ coverage_ratio(coverage.fetch("structured")),
121
+ total_loss_markers(coverage.fetch("loss_markers")),
122
+ mode.fetch("status")
123
+ ]
124
+ end
125
+ end
126
+
127
+ render_rows(rows)
128
+ end
129
+
130
+ def render_rows(rows)
131
+ widths = rows.transpose.map { |column| column.map { |cell| cell.to_s.length }.max }
132
+ rows.map.with_index do |row, index|
133
+ line = row.each_with_index.map { |cell, column| cell.to_s.ljust(widths[column]) }.join(" ")
134
+ (index == 0) ? "#{line}\n#{widths.map { |width| "-" * width }.join(" ")}" : line
135
+ end.join("\n")
136
+ end
137
+
138
+ def coverage_ratio(metric)
139
+ "#{metric.fetch("retained")}/#{metric.fetch("total")} (#{metric.fetch("percent")}%)"
140
+ end
141
+
142
+ def total_loss_markers(markers)
143
+ markers.values.sum
144
+ end
145
+
146
+ def percent_change(original, packed)
147
+ return 0.0 if original.to_f.zero?
148
+
149
+ (((packed.to_f - original.to_f) / original.to_f) * 100.0).round(1)
150
+ end
151
+
152
+ def format_change(value)
153
+ format("%+.1f%%", value)
154
+ end
155
+
156
+ def summarize(per_source)
157
+ rows = per_source.flat_map { |entry| entry.fetch("modes") }.select { |row| row.fetch("status") == "ok" }
158
+ {
159
+ "sources" => per_source.size,
160
+ "rows" => rows.size
161
+ }
162
+ end
163
+
164
+ def relative_source(source)
165
+ Pathname.new(File.expand_path(source)).relative_path_from(Pathname.new(Dir.pwd)).to_s
166
+ rescue ArgumentError
167
+ source
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end