ace-compressor 0.24.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ace-defaults/compressor/config.yml +11 -0
- data/.ace-defaults/nav/protocols/tmpl-sources/ace-compressor.yml +10 -0
- data/CHANGELOG.md +357 -0
- data/README.md +46 -0
- data/Rakefile +15 -0
- data/exe/ace-compressor +13 -0
- data/handbook/templates/agent/minify-single-source.template.md +34 -0
- data/lib/ace/compressor/atoms/canonical_block_transformer.rb +341 -0
- data/lib/ace/compressor/atoms/compact_policy_classifier.rb +130 -0
- data/lib/ace/compressor/atoms/markdown_parser.rb +190 -0
- data/lib/ace/compressor/atoms/retention_reporter.rb +111 -0
- data/lib/ace/compressor/cli/commands/benchmark.rb +51 -0
- data/lib/ace/compressor/cli/commands/compress.rb +89 -0
- data/lib/ace/compressor/cli.rb +23 -0
- data/lib/ace/compressor/models/context_pack.rb +175 -0
- data/lib/ace/compressor/molecules/cache_store.rb +301 -0
- data/lib/ace/compressor/molecules/input_resolver.rb +98 -0
- data/lib/ace/compressor/organisms/agent_compressor.rb +325 -0
- data/lib/ace/compressor/organisms/benchmark_runner.rb +172 -0
- data/lib/ace/compressor/organisms/compact_compressor.rb +470 -0
- data/lib/ace/compressor/organisms/compression_runner.rb +315 -0
- data/lib/ace/compressor/organisms/exact_compressor.rb +187 -0
- data/lib/ace/compressor/version.rb +7 -0
- data/lib/ace/compressor.rb +109 -0
- metadata +156 -0
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module Ace
|
|
6
|
+
module Compressor
|
|
7
|
+
module Organisms
|
|
8
|
+
# Orchestrates compact mode with a tiered section ladder. In agent mode the
|
|
9
|
+
# same ladder is used, but eligible narrative sections may be rewritten by
|
|
10
|
+
# AgentCompressor while rule-bearing sections stay deterministic.
|
|
11
|
+
class CompactCompressor
|
|
12
|
+
DENSE_FACT_RE = /\d|(?:must|must not|never|required|should|shall|cannot|can't|do not|only)\b/i
|
|
13
|
+
MIMICRY_RE = /\b(?:exact output|required format|must match|mimic|verbatim|follow exactly)\b/i
|
|
14
|
+
SENSITIVE_TABLE_RE = /\b(?:must|must not|never|required|only|shall|cannot|do not|policy|constraint)\b/i
|
|
15
|
+
TABLE_SEPARATOR_RE = /\A\|?\s*:?-{3,}:?(?:\s*\|\s*:?-{3,}:?)*\|?\z/
|
|
16
|
+
TABLE_ROW_SEPARATOR_ESCAPED_RE = /\s+\\\|\\\|ROW\\\|\\\|\s+/
|
|
17
|
+
EXAMPLE_PAYLOAD_PREFIXES = ["CMD|", "FILES|", "TREE|", "CODE|"].freeze
|
|
18
|
+
PRESERVE_TABLE_MAX_ROWS = 2
|
|
19
|
+
SCHEMA_KEY_ROWS_MAX_ROWS = 6
|
|
20
|
+
SCHEMA_KEY_ROWS_LIMIT = 2
|
|
21
|
+
SUMMARY_ROWS_LIMIT = 1
|
|
22
|
+
EXACT_SECTION_PREFIXES = ["RULE|", "CONSTRAINT|", "CMD|", "U|"].freeze
|
|
23
|
+
|
|
24
|
+
attr_reader :ignored_paths, :refused_sources
|
|
25
|
+
|
|
26
|
+
def initialize(paths, verbose: false, mode_label: "compact", agent_rewriter: nil)
|
|
27
|
+
@paths = Array(paths)
|
|
28
|
+
@mode_label = mode_label
|
|
29
|
+
@resolver = ExactCompressor.new(paths, verbose: verbose, mode_label: mode_label)
|
|
30
|
+
@parser = Ace::Compressor::Atoms::MarkdownParser.new
|
|
31
|
+
@transformer = Ace::Compressor::Atoms::CanonicalBlockTransformer
|
|
32
|
+
@classifier = Ace::Compressor::Atoms::CompactPolicyClassifier.new
|
|
33
|
+
@agent_rewriter = agent_rewriter
|
|
34
|
+
@refused_sources = []
|
|
35
|
+
@example_registry = {}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def call
|
|
39
|
+
compress_sources(resolve_sources)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def resolve_sources
|
|
43
|
+
@resolver.resolve_sources
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def ignored_paths
|
|
47
|
+
@resolver.ignored_paths
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def compress_sources(sources, source_paths: nil)
|
|
51
|
+
@refused_sources = []
|
|
52
|
+
@example_registry = {}
|
|
53
|
+
lines = [Ace::Compressor::Models::ContextPack.header(@mode_label)]
|
|
54
|
+
|
|
55
|
+
Array(sources).each do |source|
|
|
56
|
+
source_label = source_label(display_source(source, source_paths))
|
|
57
|
+
lines << Ace::Compressor::Models::ContextPack.file_line(source_label)
|
|
58
|
+
text = File.read(source)
|
|
59
|
+
raise Ace::Compressor::Error, "Input file is empty. #{@mode_label.capitalize} mode requires content: #{source}" if text.strip.empty?
|
|
60
|
+
|
|
61
|
+
blocks = @parser.call(text)
|
|
62
|
+
raise Ace::Compressor::Error, "Input file is empty after frontmatter removal. #{@mode_label.capitalize} mode requires content: #{source}" if blocks.empty?
|
|
63
|
+
|
|
64
|
+
policy = @classifier.call(source: source_label, blocks: blocks)
|
|
65
|
+
action = policy.fetch("action")
|
|
66
|
+
lines << Ace::Compressor::Models::ContextPack.policy_line(doc_class: policy.fetch("class"), action: action)
|
|
67
|
+
transformed = @transformer.new(source_label).call(blocks)
|
|
68
|
+
|
|
69
|
+
if @mode_label == "agent"
|
|
70
|
+
lines.concat agent_records(transformed, policy: policy, source_label: source_label)
|
|
71
|
+
else
|
|
72
|
+
lines.concat deterministic_records(transformed, action: action, source_label: source_label, policy_class: policy.fetch("class"))
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
lines.join("\n")
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def deterministic_records(transformed, action:, source_label:, policy_class:)
|
|
82
|
+
case action
|
|
83
|
+
when "refuse_compact"
|
|
84
|
+
refusal_for_source(source_label, policy_class, "compact_preflight", "rule-heavy source requires exact mode")
|
|
85
|
+
when "compact_with_exact_rule_sections"
|
|
86
|
+
mixed_records = compact_with_exact_rule_sections(transformed, source_label: source_label)
|
|
87
|
+
fidelity = mixed_fidelity(transformed, mixed_records)
|
|
88
|
+
if fidelity.fetch(:status) == "pass"
|
|
89
|
+
[Ace::Compressor::Models::ContextPack.fidelity_line(source: source_label, status: "pass", check: "exact_rule_sections", details: fidelity.fetch(:details)), *mixed_records]
|
|
90
|
+
else
|
|
91
|
+
refusal_for_source(source_label, policy_class, "exact_rule_sections", fidelity.fetch(:details))
|
|
92
|
+
end
|
|
93
|
+
else
|
|
94
|
+
compact_records(transformed, action, source_label: source_label)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def agent_records(transformed, policy:, source_label:)
|
|
99
|
+
sections = split_sections(transformed)
|
|
100
|
+
used_agent = false
|
|
101
|
+
result = []
|
|
102
|
+
|
|
103
|
+
sections.each do |section_records|
|
|
104
|
+
strategy = section_strategy(section_records, policy_class: policy.fetch("class"))
|
|
105
|
+
deterministic = deterministic_section_records(section_records, strategy: strategy, source_label: source_label)
|
|
106
|
+
|
|
107
|
+
if agent_eligible?(strategy) && @agent_rewriter
|
|
108
|
+
rewrite = @agent_rewriter.rewrite_section(section_records, source_label: source_label)
|
|
109
|
+
if rewrite[:ok] && section_improves?(rewrite[:lines], deterministic)
|
|
110
|
+
result.concat(rewrite[:lines])
|
|
111
|
+
used_agent = true
|
|
112
|
+
next
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
result.concat(deterministic)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
if used_agent
|
|
120
|
+
[Ace::Compressor::Models::ContextPack.fidelity_line(source: source_label, status: "pass", check: "agent_value", details: "agent_sections_applied"), *result]
|
|
121
|
+
else
|
|
122
|
+
@refused_sources << {"source" => source_label, "reason" => "no_win", "failed_check" => "agent_value"}
|
|
123
|
+
[
|
|
124
|
+
Ace::Compressor::Models::ContextPack.fidelity_line(source: source_label, status: "fail", check: "agent_value", details: "no_agent_section_beat_deterministic"),
|
|
125
|
+
*result,
|
|
126
|
+
Ace::Compressor::Models::ContextPack.refusal_line(source: source_label, reason: "no_win", failed_check: "agent_value"),
|
|
127
|
+
Ace::Compressor::Models::ContextPack.guidance_line(source: source_label, retry_with: "--mode compact")
|
|
128
|
+
]
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def split_sections(records)
|
|
133
|
+
sections = []
|
|
134
|
+
current = []
|
|
135
|
+
Array(records).each do |line|
|
|
136
|
+
if line.start_with?("SEC|")
|
|
137
|
+
sections << current unless current.empty?
|
|
138
|
+
current = [line]
|
|
139
|
+
else
|
|
140
|
+
current << line unless current.empty?
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
sections << current unless current.empty?
|
|
144
|
+
sections
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def section_strategy(section_records, policy_class:)
|
|
148
|
+
payload = Array(section_records).reject { |line| line.start_with?("SEC|") }
|
|
149
|
+
return :exact if payload.any? { |line| line.start_with?(*EXACT_SECTION_PREFIXES) }
|
|
150
|
+
return :hybrid if payload.any? { |line| line.start_with?("TABLE|") }
|
|
151
|
+
return :lossy if %w[narrative-heavy mixed].include?(policy_class)
|
|
152
|
+
return :hybrid if %w[overview architecture reference guide vision].include?(policy_class)
|
|
153
|
+
|
|
154
|
+
:deterministic
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def agent_eligible?(strategy)
|
|
158
|
+
%i[lossy hybrid].include?(strategy)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def deterministic_section_records(section_records, strategy:, source_label:)
|
|
162
|
+
return section_records if strategy == :exact
|
|
163
|
+
|
|
164
|
+
action = (strategy == :lossy) ? "aggressive_compact" : "conservative_compact"
|
|
165
|
+
compact_records(section_records, action, source_label: source_label)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def section_improves?(candidate_lines, deterministic_lines)
|
|
169
|
+
Array(candidate_lines).join("\n").bytesize < Array(deterministic_lines).join("\n").bytesize
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def compact_records(records, action, source_label:)
|
|
173
|
+
if action == "aggressive_compact"
|
|
174
|
+
aggressive_compact(records, source_label: source_label)
|
|
175
|
+
else
|
|
176
|
+
conservative_compact(records, source_label: source_label)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def aggressive_compact(records, source_label:)
|
|
181
|
+
current_section = "__root__"
|
|
182
|
+
pending_section = nil
|
|
183
|
+
summary_seen = {}
|
|
184
|
+
fact_seen = {}
|
|
185
|
+
kept = []
|
|
186
|
+
|
|
187
|
+
Array(records).each do |line|
|
|
188
|
+
case line
|
|
189
|
+
when /\ASEC\|/
|
|
190
|
+
current_section = line
|
|
191
|
+
pending_section = line
|
|
192
|
+
when /\ASUMMARY\|/
|
|
193
|
+
next if summary_seen[current_section]
|
|
194
|
+
summary_seen[current_section] = true
|
|
195
|
+
flush_pending_section!(kept, pending_section)
|
|
196
|
+
pending_section = nil
|
|
197
|
+
kept << compact_summary_line(line)
|
|
198
|
+
when /\A(?:RULE|CONSTRAINT|PROBLEMS|LIST|EXAMPLE|U|CMD|FILES|TREE|CODE|TABLE)\|/
|
|
199
|
+
flush_pending_section!(kept, pending_section)
|
|
200
|
+
pending_section = nil
|
|
201
|
+
kept << line
|
|
202
|
+
when /\AFACT\|/
|
|
203
|
+
fact_key = current_section
|
|
204
|
+
text = line.sub(/\AFACT\|/, "")
|
|
205
|
+
next if fact_seen[fact_key] && !DENSE_FACT_RE.match?(text)
|
|
206
|
+
fact_seen[fact_key] = true
|
|
207
|
+
flush_pending_section!(kept, pending_section)
|
|
208
|
+
pending_section = nil
|
|
209
|
+
kept << line
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
post_process_structured_records(kept, source_label: source_label)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def compact_with_exact_rule_sections(records, source_label:)
|
|
217
|
+
sections = split_sections(records)
|
|
218
|
+
sections.flat_map do |section_records|
|
|
219
|
+
strategy = section_strategy(section_records, policy_class: "mixed")
|
|
220
|
+
deterministic_section_records(section_records, strategy: strategy, source_label: source_label)
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def mixed_fidelity(original_records, compacted_records)
|
|
225
|
+
original_rules = rule_records(original_records)
|
|
226
|
+
compacted_rules = rule_records(compacted_records)
|
|
227
|
+
missing_rules = original_rules.reject { |line| compacted_rules.include?(line) }
|
|
228
|
+
|
|
229
|
+
if missing_rules.empty?
|
|
230
|
+
{status: "pass", details: "all_rule_records_preserved"}
|
|
231
|
+
else
|
|
232
|
+
{status: "fail", details: "missing_rule_records=#{missing_rules.size}"}
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def rule_records(records)
|
|
237
|
+
Array(records).select { |line| line.start_with?("RULE|", "CONSTRAINT|") }
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def refusal_for_source(source_label, reason, failed_check, details)
|
|
241
|
+
@refused_sources << {"source" => source_label, "reason" => reason, "failed_check" => failed_check}
|
|
242
|
+
[
|
|
243
|
+
Ace::Compressor::Models::ContextPack.fidelity_line(source: source_label, status: "fail", check: failed_check, details: details),
|
|
244
|
+
Ace::Compressor::Models::ContextPack.refusal_line(source: source_label, reason: reason, failed_check: failed_check),
|
|
245
|
+
Ace::Compressor::Models::ContextPack.guidance_line(source: source_label, retry_with: "--mode exact")
|
|
246
|
+
]
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def conservative_compact(records, source_label:)
|
|
250
|
+
seen = {}
|
|
251
|
+
deduped = Array(records).each_with_object([]) do |line, acc|
|
|
252
|
+
next if seen[line]
|
|
253
|
+
seen[line] = true
|
|
254
|
+
acc << line
|
|
255
|
+
end
|
|
256
|
+
post_process_structured_records(deduped, source_label: source_label)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def post_process_structured_records(records, source_label:)
|
|
260
|
+
table_index = 0
|
|
261
|
+
mimicry_required = false
|
|
262
|
+
collapse_example_payload = false
|
|
263
|
+
Array(records).each_with_object([]) do |line, acc|
|
|
264
|
+
case line
|
|
265
|
+
when /\ASEC\|/
|
|
266
|
+
mimicry_required = false
|
|
267
|
+
collapse_example_payload = false
|
|
268
|
+
acc << line
|
|
269
|
+
when /\A(?:RULE|CONSTRAINT|FACT)\|/
|
|
270
|
+
mimicry_required ||= line.match?(MIMICRY_RE)
|
|
271
|
+
collapse_example_payload = false
|
|
272
|
+
acc << line
|
|
273
|
+
when /\AEXAMPLE\|/
|
|
274
|
+
collapse_example_payload = false
|
|
275
|
+
tool = example_tool(line)
|
|
276
|
+
if tool.empty?
|
|
277
|
+
acc << line
|
|
278
|
+
next
|
|
279
|
+
end
|
|
280
|
+
if mimicry_required
|
|
281
|
+
register_example(tool, source_label)
|
|
282
|
+
acc << line
|
|
283
|
+
next
|
|
284
|
+
end
|
|
285
|
+
seen = @example_registry[tool]
|
|
286
|
+
if seen
|
|
287
|
+
acc << Ace::Compressor::Models::ContextPack.example_ref_line(tool: tool, source: source_label, original_source: seen.fetch("source"), reason: "duplicate_example")
|
|
288
|
+
acc << Ace::Compressor::Models::ContextPack.loss_line(kind: "example", target: tool, strategy: "reference", original: 1, retained: 0, unit: "examples", source: source_label, details: "collapsed_to=#{seen.fetch("source")}")
|
|
289
|
+
collapse_example_payload = true
|
|
290
|
+
else
|
|
291
|
+
register_example(tool, source_label)
|
|
292
|
+
acc << line
|
|
293
|
+
end
|
|
294
|
+
when /\ATABLE\|/
|
|
295
|
+
collapse_example_payload = false
|
|
296
|
+
table_index += 1
|
|
297
|
+
acc.concat compact_table_records(line, source_label: source_label, table_index: table_index)
|
|
298
|
+
else
|
|
299
|
+
if collapse_example_payload && example_payload_line?(line)
|
|
300
|
+
next
|
|
301
|
+
end
|
|
302
|
+
collapse_example_payload = false unless example_payload_line?(line)
|
|
303
|
+
acc << line
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def compact_table_records(line, source_label:, table_index:)
|
|
309
|
+
rows = parse_table_rows(line)
|
|
310
|
+
return [line] if rows.empty?
|
|
311
|
+
|
|
312
|
+
header_rows, data_rows = split_table_rows(rows)
|
|
313
|
+
original_data_count = data_rows.size
|
|
314
|
+
table_id = table_record_id(source_label, table_index)
|
|
315
|
+
strategy, retained_data_rows = select_table_strategy(rows, data_rows)
|
|
316
|
+
retained_rows = header_rows + retained_data_rows
|
|
317
|
+
records = [Ace::Compressor::Models::ContextPack.table_line(retained_rows, table_id: table_id, strategy: strategy)]
|
|
318
|
+
|
|
319
|
+
if original_data_count > retained_data_rows.size
|
|
320
|
+
records << Ace::Compressor::Models::ContextPack.loss_line(kind: "table", target: table_id, strategy: strategy, original: original_data_count, retained: retained_data_rows.size, unit: "rows", source: source_label, details: "data_rows_only")
|
|
321
|
+
end
|
|
322
|
+
records
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def select_table_strategy(all_rows, data_rows)
|
|
326
|
+
original_count = data_rows.size
|
|
327
|
+
if original_count <= PRESERVE_TABLE_MAX_ROWS || sensitive_table?(all_rows)
|
|
328
|
+
["preserve", data_rows]
|
|
329
|
+
elsif original_count <= SCHEMA_KEY_ROWS_MAX_ROWS
|
|
330
|
+
["schema_plus_key_rows", select_key_rows(data_rows, limit: SCHEMA_KEY_ROWS_LIMIT)]
|
|
331
|
+
else
|
|
332
|
+
["summarize_with_loss", select_key_rows(data_rows, limit: SUMMARY_ROWS_LIMIT)]
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def split_table_rows(rows)
|
|
337
|
+
separator_index = rows.find_index { |row| row.match?(TABLE_SEPARATOR_RE) }
|
|
338
|
+
if separator_index
|
|
339
|
+
[rows[0..separator_index], rows[(separator_index + 1)..] || []]
|
|
340
|
+
else
|
|
341
|
+
[rows.first ? [rows.first] : [], rows[1..] || []]
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def parse_table_rows(line)
|
|
346
|
+
payload = line.sub(/\ATABLE\|/, "").to_s
|
|
347
|
+
return [] if payload.strip.empty?
|
|
348
|
+
|
|
349
|
+
return parse_structured_table_rows(payload) if payload.include?("rows=")
|
|
350
|
+
|
|
351
|
+
payload.split(TABLE_ROW_SEPARATOR_ESCAPED_RE).map { |row| row.gsub("\\|", "|").strip }
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def parse_structured_table_rows(payload)
|
|
355
|
+
fields = payload.split("|").each_with_object({}) do |field, hash|
|
|
356
|
+
key, value = field.split("=", 2)
|
|
357
|
+
next if value.nil?
|
|
358
|
+
|
|
359
|
+
hash[key] = value
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
columns = fields.fetch("cols", "").split(",").map { |cell| cell.gsub("\\|", "|").strip }.reject(&:empty?)
|
|
363
|
+
data_rows = decode_structured_table_rows(fields.fetch("rows", ""))
|
|
364
|
+
rows = []
|
|
365
|
+
rows << "| #{columns.join(" | ")} |" unless columns.empty?
|
|
366
|
+
rows << "|#{Array(columns).map { "---" }.join("|")}|" unless columns.empty?
|
|
367
|
+
rows.concat(data_rows.map { |cells| "| #{cells.join(" | ")} |" })
|
|
368
|
+
rows
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def decode_structured_table_rows(value)
|
|
372
|
+
rows = []
|
|
373
|
+
current_row = []
|
|
374
|
+
current_cell = +""
|
|
375
|
+
escape_next = false
|
|
376
|
+
|
|
377
|
+
value.to_s.each_char do |char|
|
|
378
|
+
if escape_next
|
|
379
|
+
current_cell << char
|
|
380
|
+
escape_next = false
|
|
381
|
+
elsif char == "\\"
|
|
382
|
+
escape_next = true
|
|
383
|
+
elsif char == ">"
|
|
384
|
+
current_row << current_cell.strip
|
|
385
|
+
current_cell = +""
|
|
386
|
+
elsif char == ";"
|
|
387
|
+
current_row << current_cell.strip
|
|
388
|
+
rows << current_row
|
|
389
|
+
current_row = []
|
|
390
|
+
current_cell = +""
|
|
391
|
+
else
|
|
392
|
+
current_cell << char
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
unless current_cell.empty? && current_row.empty?
|
|
397
|
+
current_row << current_cell.strip
|
|
398
|
+
rows << current_row
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
rows
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def select_key_rows(rows, limit:)
|
|
405
|
+
return rows if rows.size <= limit
|
|
406
|
+
selected_indexes = [0]
|
|
407
|
+
dense_index = rows.find_index { |row| row.match?(DENSE_FACT_RE) }
|
|
408
|
+
selected_indexes << dense_index unless dense_index.nil?
|
|
409
|
+
selected_indexes << (rows.size - 1)
|
|
410
|
+
selected_indexes = selected_indexes.uniq
|
|
411
|
+
rows.each_index do |index|
|
|
412
|
+
break if selected_indexes.size >= limit
|
|
413
|
+
next if selected_indexes.include?(index)
|
|
414
|
+
selected_indexes << index
|
|
415
|
+
end
|
|
416
|
+
selected_indexes.sort.take(limit).map { |index| rows[index] }
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def sensitive_table?(rows)
|
|
420
|
+
Array(rows).any? { |row| row.match?(SENSITIVE_TABLE_RE) }
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def table_record_id(source_label, table_index)
|
|
424
|
+
base = File.basename(source_label.to_s, File.extname(source_label.to_s)).downcase.gsub(/[^a-z0-9]+/, "_").sub(/\A_+/, "").sub(/_+\z/, "")
|
|
425
|
+
base = "source" if base.empty?
|
|
426
|
+
"#{base}_t#{table_index}"
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def example_payload_line?(line)
|
|
430
|
+
line.start_with?(*EXAMPLE_PAYLOAD_PREFIXES)
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def example_tool(line)
|
|
434
|
+
line[/\AEXAMPLE\|tool=([^|]+)/, 1].to_s.gsub("\\|", "|")
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
def register_example(tool, source_label)
|
|
438
|
+
@example_registry[tool] ||= {"source" => source_label}
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
def display_source(source, source_paths)
|
|
442
|
+
return source unless source_paths
|
|
443
|
+
|
|
444
|
+
source_paths[File.expand_path(source)] || source_paths[source] || source
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def source_label(source)
|
|
448
|
+
pathname = Pathname.new(source)
|
|
449
|
+
project_root = Pathname.new(Dir.pwd)
|
|
450
|
+
relative = pathname.relative_path_from(project_root).to_s
|
|
451
|
+
return relative unless relative.start_with?("..")
|
|
452
|
+
source
|
|
453
|
+
rescue ArgumentError
|
|
454
|
+
source
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
def flush_pending_section!(lines, pending_section)
|
|
458
|
+
lines << pending_section unless pending_section.nil?
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def compact_summary_line(line)
|
|
462
|
+
text = line.sub(/\ASUMMARY\|/, "")
|
|
463
|
+
first_sentence = text.split(/(?<=[.!?])\s+/).first.to_s.strip
|
|
464
|
+
compact_text = first_sentence.empty? ? text : first_sentence
|
|
465
|
+
Ace::Compressor::Models::ContextPack.summary_line(compact_text)
|
|
466
|
+
end
|
|
467
|
+
end
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
end
|