ace-compressor 0.24.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Compressor
5
+ module Atoms
6
+ class RetentionReporter
7
+ LITERAL_PROTECTED_TYPES = %w[RULE CONSTRAINT CMD U].freeze
8
+ COUNT_PROTECTED_TYPES = %w[CODE TABLE].freeze
9
+ STRUCTURED_TYPES = %w[LIST PROBLEMS FILES TREE EXAMPLE].freeze
10
+ LOSS_MARKER_TYPES = %w[LOSS EXAMPLE_REF REFUSAL FALLBACK].freeze
11
+
12
+ def compare(reference_content:, candidate_content:)
13
+ reference = parse(reference_content)
14
+ candidate = parse(candidate_content)
15
+
16
+ {
17
+ "sections" => coverage_for_sections(reference, candidate),
18
+ "protected" => coverage_for_protected(reference, candidate),
19
+ "structured" => coverage_for_structured(reference, candidate),
20
+ "loss_markers" => LOSS_MARKER_TYPES.to_h { |type| [type.downcase, candidate.fetch("counts").fetch(type, 0)] }
21
+ }
22
+ end
23
+
24
+ private
25
+
26
+ def parse(content)
27
+ lines = content.to_s.lines.map(&:strip).reject(&:empty?)
28
+ counts = Hash.new(0)
29
+ literal = Hash.new { |hash, key| hash[key] = Set.new }
30
+ sections = Set.new
31
+ structured_keys = Hash.new { |hash, key| hash[key] = Set.new }
32
+
33
+ lines.each do |line|
34
+ type = line.split("|", 2).first
35
+ next if type.to_s.empty?
36
+
37
+ counts[type] += 1
38
+ sections << line.delete_prefix("SEC|") if type == "SEC"
39
+ literal[type] << line if LITERAL_PROTECTED_TYPES.include?(type)
40
+ structured_keys[type] << structured_key(type, line) if STRUCTURED_TYPES.include?(type)
41
+ end
42
+
43
+ {
44
+ "counts" => counts,
45
+ "sections" => sections,
46
+ "literal" => literal,
47
+ "structured_keys" => structured_keys
48
+ }
49
+ end
50
+
51
+ def coverage_for_sections(reference, candidate)
52
+ total = reference.fetch("sections").size
53
+ retained = reference.fetch("sections").intersection(candidate.fetch("sections")).size
54
+ coverage_hash(retained, total)
55
+ end
56
+
57
+ def coverage_for_protected(reference, candidate)
58
+ literal_total = 0
59
+ literal_retained = 0
60
+
61
+ LITERAL_PROTECTED_TYPES.each do |type|
62
+ ref_lines = reference.fetch("literal").fetch(type, Set.new)
63
+ cand_lines = candidate.fetch("literal").fetch(type, Set.new)
64
+ literal_total += ref_lines.size
65
+ literal_retained += ref_lines.intersection(cand_lines).size
66
+ end
67
+
68
+ count_total = COUNT_PROTECTED_TYPES.sum { |type| reference.fetch("counts").fetch(type, 0) }
69
+ count_retained = COUNT_PROTECTED_TYPES.sum do |type|
70
+ [reference.fetch("counts").fetch(type, 0), candidate.fetch("counts").fetch(type, 0)].min
71
+ end
72
+
73
+ coverage_hash(literal_retained + count_retained, literal_total + count_total)
74
+ end
75
+
76
+ def coverage_for_structured(reference, candidate)
77
+ total = 0
78
+ retained = 0
79
+
80
+ STRUCTURED_TYPES.each do |type|
81
+ ref_keys = reference.fetch("structured_keys").fetch(type, Set.new)
82
+ cand_keys = candidate.fetch("structured_keys").fetch(type, Set.new)
83
+ total += ref_keys.size
84
+ retained += ref_keys.intersection(cand_keys).size
85
+ end
86
+
87
+ coverage_hash(retained, total)
88
+ end
89
+
90
+ def structured_key(type, line)
91
+ case type
92
+ when "LIST", "FILES", "TREE"
93
+ line.split("|", 3)[1].to_s
94
+ when "EXAMPLE"
95
+ line.split("|", 2)[1].to_s
96
+ else
97
+ line
98
+ end
99
+ end
100
+
101
+ def coverage_hash(retained, total)
102
+ {
103
+ "retained" => retained,
104
+ "total" => total,
105
+ "percent" => total.zero? ? 100.0 : ((retained.to_f / total.to_f) * 100.0).round(1)
106
+ }
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ace/support/cli"
4
+ require "ace/core"
5
+
6
+ module Ace
7
+ module Compressor
8
+ module CLI
9
+ module Commands
10
+ class Benchmark < Ace::Support::Cli::Command
11
+ include Ace::Support::Cli::Base
12
+
13
+ desc "Compare exact, compact, and agent output on live sources"
14
+
15
+ argument :sources, required: false, type: :array, desc: "File or directory paths"
16
+ option :modes, type: :string, desc: "Comma-delimited modes: exact,compact,agent"
17
+ option :format, type: :string, aliases: ["-f"], desc: "Benchmark output format: table|json"
18
+ option :verbose, type: :boolean, aliases: ["-v"], default: false, desc: "Show verbose output"
19
+
20
+ def call(**options)
21
+ sources = normalize_sources(options[:sources] || [])
22
+ if sources.empty?
23
+ raise Ace::Support::Cli::Error,
24
+ "Missing input path. Usage: ace-compressor benchmark <file-or-dir> [more-paths...]"
25
+ end
26
+
27
+ runner = Ace::Compressor::Organisms::BenchmarkRunner.new(
28
+ sources,
29
+ modes: options[:modes],
30
+ format: options[:format],
31
+ verbose: !!options[:verbose]
32
+ )
33
+ report = runner.call
34
+ puts runner.render(report)
35
+ 0
36
+ rescue Ace::Compressor::Error => e
37
+ raise Ace::Support::Cli::Error, e.message
38
+ end
39
+
40
+ private
41
+
42
+ def normalize_sources(sources)
43
+ values = Array(sources).dup
44
+ values.shift if values.first == "benchmark"
45
+ values
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ace/support/cli"
4
+ require "ace/core"
5
+
6
+ module Ace
7
+ module Compressor
8
+ module CLI
9
+ module Commands
10
+ class Compress < Ace::Support::Cli::Command
11
+ include Ace::Support::Cli::Base
12
+
13
+ SUPPORTED_MODES = %w[exact compact agent].freeze
14
+ SUPPORTED_SOURCE_SCOPES = %w[merged per-source].freeze
15
+
16
+ desc "Compress markdown/text files into ContextPack/3 records"
17
+
18
+ argument :sources, required: false, type: :array, desc: "File or directory paths"
19
+ option :mode, type: :string, default: "exact", desc: "Compression mode (exact|compact|agent)"
20
+ option :source_scope, type: :string, default: "merged", desc: "Source handling mode (merged|per-source)"
21
+ option :output, type: :string, aliases: ["-o"], desc: "Save output to file or directory path"
22
+ option :format, type: :string, aliases: ["-f"], desc: "Console output format: path|stdio|stats"
23
+ option :version, type: :boolean, desc: "Show version information"
24
+ option :quiet, type: :boolean, aliases: ["-q"], default: false, desc: "Suppress non-essential output"
25
+ option :verbose, type: :boolean, aliases: ["-v"], default: false, desc: "Show verbose output"
26
+ option :debug, type: :boolean, aliases: ["-d"], default: false, desc: "Show debug output"
27
+
28
+ def call(**options)
29
+ if options[:version]
30
+ puts "ace-compressor #{Ace::Compressor::VERSION}"
31
+ return 0
32
+ end
33
+
34
+ sources = normalize_sources(options[:sources] || [])
35
+ if sources.empty?
36
+ raise Ace::Support::Cli::Error,
37
+ "Missing input path. Usage: ace-compressor <file-or-dir> [more-paths...] --mode <exact|compact|agent>"
38
+ end
39
+
40
+ mode = (options[:mode] || "exact").to_s
41
+ unless SUPPORTED_MODES.include?(mode)
42
+ raise Ace::Support::Cli::Error, "Unsupported mode '#{mode}'. Use --mode exact, --mode compact, or --mode agent"
43
+ end
44
+ source_scope = (options[:source_scope] || "merged").to_s
45
+ unless SUPPORTED_SOURCE_SCOPES.include?(source_scope)
46
+ raise Ace::Support::Cli::Error,
47
+ "Unsupported source scope '#{source_scope}'. Use --source-scope merged or --source-scope per-source"
48
+ end
49
+
50
+ runner = Ace::Compressor::Organisms::CompressionRunner.new(
51
+ sources,
52
+ mode: mode,
53
+ source_scope: source_scope,
54
+ output: options[:output],
55
+ format: options[:format],
56
+ verbose: !!options[:verbose]
57
+ )
58
+ result = runner.call
59
+ if options[:verbose]
60
+ result[:ignored_paths].each { |path| warn "Ignoring unsupported file: #{path}" }
61
+ end
62
+ puts result[:console_output]
63
+ if result[:exit_code].to_i.nonzero?
64
+ raise Ace::Support::Cli::Error, refusal_message_for(mode)
65
+ end
66
+ rescue Ace::Compressor::Error => e
67
+ raise Ace::Support::Cli::Error, e.message
68
+ end
69
+
70
+ private
71
+
72
+ def normalize_sources(sources)
73
+ values = Array(sources).dup
74
+ values.shift if values.first == "compress"
75
+ values
76
+ end
77
+
78
+ def refusal_message_for(mode)
79
+ if mode == "compact"
80
+ "One or more sources were refused in compact mode. Retry refused sources with --mode exact"
81
+ else
82
+ "One or more sources were refused in #{mode} mode. Retry refused sources with --mode exact"
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ace/support/cli"
4
+ require "ace/core"
5
+
6
+ require_relative "cli/commands/compress"
7
+ require_relative "cli/commands/benchmark"
8
+ require_relative "version"
9
+
10
+ module Ace
11
+ module Compressor
12
+ module CLI
13
+ def self.start(args)
14
+ args = ["--help"] if args.empty?
15
+ if args.first == "benchmark"
16
+ Ace::Support::Cli::Runner.new(Commands::Benchmark).call(args: args.drop(1))
17
+ else
18
+ Ace::Support::Cli::Runner.new(Commands::Compress).call(args: args)
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ace
4
+ module Compressor
5
+ module Models
6
+ class ContextPack
7
+ SCHEMA = "ContextPack/3"
8
+
9
+ def self.escape(value)
10
+ value.to_s.gsub("|", "\\|").tr("\n", " ").strip
11
+ end
12
+
13
+ def self.header(mode)
14
+ "H|#{SCHEMA}|#{escape(mode)}"
15
+ end
16
+
17
+ def self.file_line(source)
18
+ "FILE|#{escape(source)}"
19
+ end
20
+
21
+ def self.policy_line(doc_class:, action:)
22
+ "POLICY|class=#{escape(doc_class)}|action=#{escape(action)}"
23
+ end
24
+
25
+ def self.fidelity_line(source:, status:, check:, details: nil)
26
+ line = "FIDELITY|source=#{escape(source)}|status=#{escape(status)}|check=#{escape(check)}"
27
+ details_text = details.to_s.strip
28
+ return line if details_text.empty?
29
+
30
+ "#{line}|details=#{escape(details_text)}"
31
+ end
32
+
33
+ def self.refusal_line(source:, reason:, failed_check:)
34
+ "REFUSAL|source=#{escape(source)}|reason=#{escape(reason)}|failed_check=#{escape(failed_check)}"
35
+ end
36
+
37
+ def self.guidance_line(source:, retry_with:)
38
+ "GUIDANCE|source=#{escape(source)}|retry_with=#{escape(retry_with)}"
39
+ end
40
+
41
+ def self.fallback_line(source:, from:, to:, reason:, check: nil, details: nil)
42
+ line = "FALLBACK|source=#{escape(source)}|from=#{escape(from)}|to=#{escape(to)}|reason=#{escape(reason)}"
43
+ line += "|check=#{escape(check)}" unless check.to_s.strip.empty?
44
+ line += "|details=#{escape(details)}" unless details.to_s.strip.empty?
45
+ line
46
+ end
47
+
48
+ def self.section_line(title)
49
+ "SEC|#{escape(title)}"
50
+ end
51
+
52
+ def self.summary_line(text)
53
+ "SUMMARY|#{escape(text)}"
54
+ end
55
+
56
+ def self.fact_line(text)
57
+ "FACT|#{escape(text)}"
58
+ end
59
+
60
+ def self.rule_line(text)
61
+ "RULE|#{escape(text)}"
62
+ end
63
+
64
+ def self.constraint_line(text)
65
+ "CONSTRAINT|#{escape(text)}"
66
+ end
67
+
68
+ def self.problems_line(items)
69
+ values = Array(items).map { |item| escape(item) }.join(",")
70
+ "PROBLEMS|[#{values}]"
71
+ end
72
+
73
+ def self.list_line(list_key, items)
74
+ values = Array(items).map { |item| escape(item) }.join(",")
75
+ key = list_key.to_s.strip
76
+ key = "items" if key.empty?
77
+ "LIST|#{escape(key)}|[#{values}]"
78
+ end
79
+
80
+ def self.example_line(tool)
81
+ "EXAMPLE|#{escape("tool=#{tool}")}"
82
+ end
83
+
84
+ def self.cmd_line(command)
85
+ "CMD|#{escape(command)}"
86
+ end
87
+
88
+ def self.files_line(label, files)
89
+ "FILES|#{escape(label)}|[#{Array(files).map { |value| escape(value) }.join(",")}]"
90
+ end
91
+
92
+ def self.tree_line(label, tree)
93
+ "TREE|#{escape(label)}|#{escape(tree)}"
94
+ end
95
+
96
+ def self.code_line(language, code)
97
+ language_value = language.to_s.strip.empty? ? "code" : language.to_s.strip
98
+ "CODE|#{escape(language_value)}|#{escape(code)}"
99
+ end
100
+
101
+ def self.table_line(rows, table_id: nil, strategy: nil)
102
+ fields = []
103
+ columns, data_rows = normalize_table_rows(rows)
104
+ fields << "id=#{escape(table_id)}" unless table_id.to_s.strip.empty?
105
+ fields << "strategy=#{escape(strategy)}" unless strategy.to_s.strip.empty?
106
+ fields << "cols=#{escape(columns.join(","))}" unless columns.empty?
107
+ fields << "rows=#{escape(encode_table_data_rows(data_rows))}"
108
+ "TABLE|#{fields.join("|")}"
109
+ end
110
+
111
+ def self.loss_line(kind:, target:, strategy:, original:, retained:, unit:, source: nil, details: nil)
112
+ unit_key = unit.to_s.strip
113
+ unit_key = "items" if unit_key.empty?
114
+ unit_key = unit_key.gsub(/[^a-z0-9_]/i, "_")
115
+
116
+ original_count = original.to_i
117
+ retained_count = retained.to_i
118
+ dropped_count = [original_count - retained_count, 0].max
119
+
120
+ line = [
121
+ "LOSS|kind=#{escape(kind)}",
122
+ "target=#{escape(target)}",
123
+ "strategy=#{escape(strategy)}",
124
+ "original_#{unit_key}=#{escape(original_count)}",
125
+ "retained_#{unit_key}=#{escape(retained_count)}",
126
+ "dropped_#{unit_key}=#{escape(dropped_count)}"
127
+ ].join("|")
128
+
129
+ line += "|source=#{escape(source)}" unless source.to_s.strip.empty?
130
+ line += "|details=#{escape(details)}" unless details.to_s.strip.empty?
131
+ line
132
+ end
133
+
134
+ def self.example_ref_line(tool:, source:, original_source:, reason: "duplicate")
135
+ "EXAMPLE_REF|tool=#{escape(tool)}|source=#{escape(source)}|original_source=#{escape(original_source)}|reason=#{escape(reason)}"
136
+ end
137
+
138
+ def self.unresolved_line(kind, raw)
139
+ "U|#{escape(kind)}|#{escape(raw)}"
140
+ end
141
+
142
+ def self.normalize_table_rows(rows)
143
+ row_values = Array(rows)
144
+ return [[], []] if row_values.empty?
145
+
146
+ return [[], []] if row_values.all? { |row| row.to_s.strip.empty? }
147
+
148
+ if row_values.length == 1 && !row_values.first.to_s.include?("|")
149
+ return [[], [row_values.first.to_s]]
150
+ end
151
+
152
+ header_cells = parse_table_cells(row_values[0])
153
+ data_rows = row_values[1..].to_a.reject { |row| table_separator_row?(row) }.map { |row| parse_table_cells(row) }
154
+ [header_cells, data_rows]
155
+ end
156
+
157
+ def self.encode_table_data_rows(rows)
158
+ Array(rows).map { |cells| Array(cells).map { |cell| escape_table_cell(cell) }.join(">") }.join(";")
159
+ end
160
+
161
+ def self.escape_table_cell(value)
162
+ value.to_s.gsub("\\", "\\\\").gsub(">", "\\>").gsub(";", "\\;").strip
163
+ end
164
+
165
+ def self.parse_table_cells(row)
166
+ row.to_s.split("|").map(&:strip).reject(&:empty?)
167
+ end
168
+
169
+ def self.table_separator_row?(row)
170
+ row.to_s.strip.match?(/\A\|?[-\s:|]+\|?\z/)
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end