@stupify/cli 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.js +0 -2
- package/dist/checks.js +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/constants.js +1 -1
- package/dist/counter-scout.js +1 -1
- package/dist/model.js +3 -3
- package/dist/render.js +1 -1
- package/dist/stupify.js +147 -19
- package/dist/types.d.ts +2 -0
- package/package.json +1 -1
- package/src/cache.ts +0 -2
- package/src/checks.ts +1 -1
- package/src/constants.ts +1 -1
- package/src/counter-scout.ts +1 -1
- package/src/model.ts +3 -3
- package/src/render.ts +1 -1
- package/src/stupify.ts +200 -28
- package/src/types.ts +2 -0
package/dist/cache.js
CHANGED
|
@@ -10,11 +10,9 @@ export async function cachedJson(namespace, key, compute) {
|
|
|
10
10
|
const filePath = cachePath(namespace, key);
|
|
11
11
|
try {
|
|
12
12
|
const value = JSON.parse(await readFile(filePath, "utf8"));
|
|
13
|
-
console.error(`cache hit ${namespace} ${key.slice(0, 12)}`);
|
|
14
13
|
return value;
|
|
15
14
|
}
|
|
16
15
|
catch {
|
|
17
|
-
console.error(`cache miss ${namespace} ${key.slice(0, 12)}`);
|
|
18
16
|
}
|
|
19
17
|
const value = await compute();
|
|
20
18
|
await writeCache(filePath, value).catch(() => undefined);
|
package/dist/checks.js
CHANGED
|
@@ -173,7 +173,7 @@ Prefer no match over a weak match.`,
|
|
|
173
173
|
"helper is domain-specific or used by multiple local call sites",
|
|
174
174
|
],
|
|
175
175
|
hookMode: "warn",
|
|
176
|
-
searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify,
|
|
176
|
+
searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify, sort, pick, omit, uniq, or shuffle without domain-specific behavior. Do not match group/resolve/parse/format helpers, domain formatting, feature constants, or helpers with multiple obvious call sites.",
|
|
177
177
|
searchExamples: {
|
|
178
178
|
match: [
|
|
179
179
|
"clampValue returns min, max, or value.",
|
package/dist/constants.d.ts
CHANGED
package/dist/constants.js
CHANGED
package/dist/counter-scout.js
CHANGED
|
@@ -120,7 +120,7 @@ function lintBypassSignal(value) {
|
|
|
120
120
|
}
|
|
121
121
|
function reinventedUtilitySignal(change) {
|
|
122
122
|
const name = change.entityName;
|
|
123
|
-
if (!/^(clamp|debounce|throttle|slug|slugify|
|
|
123
|
+
if (!/^(clamp|debounce|throttle|slug|slugify|sort|shuffle|memoize|pick|omit|uniq)/i.test(name))
|
|
124
124
|
return false;
|
|
125
125
|
const content = change.afterContent ?? "";
|
|
126
126
|
if (/currency|invoice|refund|subscription|tier|domain/i.test(`${name}\n${content}`))
|
package/dist/model.js
CHANGED
|
@@ -36,7 +36,7 @@ export async function loadLocalModel(modelPath, modelId, profile = "scout") {
|
|
|
36
36
|
if (runningModel !== modelId)
|
|
37
37
|
await stopManagedServer(runtime);
|
|
38
38
|
if (runningModel === modelId) {
|
|
39
|
-
console.error(`Using
|
|
39
|
+
console.error(`Using local model: ${selectedModel.name}`);
|
|
40
40
|
return {
|
|
41
41
|
id: modelId,
|
|
42
42
|
name: selectedModel.name,
|
|
@@ -107,7 +107,7 @@ async function startLlamaServer(modelPath, modelId, modelName, runtime) {
|
|
|
107
107
|
const logPath = path.join(logDir, "llama-server.log");
|
|
108
108
|
const out = await open(logPath, "a");
|
|
109
109
|
const err = await open(logPath, "a");
|
|
110
|
-
console.error(`Starting local
|
|
110
|
+
console.error(`Starting local model server: ${modelName}`);
|
|
111
111
|
console.error(`llama-server log: ${logPath}`);
|
|
112
112
|
const args = [
|
|
113
113
|
"-m",
|
|
@@ -158,7 +158,7 @@ async function stopManagedServer(runtime) {
|
|
|
158
158
|
throw new Error(`A llama-server is already running with ${runningModel ?? "another model"}.
|
|
159
159
|
Stop it before switching models, or use STUPIFY_LLAMA_SERVER_URL for that server.`);
|
|
160
160
|
}
|
|
161
|
-
console.error(
|
|
161
|
+
console.error("Restarting local model server for selected model.");
|
|
162
162
|
try {
|
|
163
163
|
process.kill(pid, "SIGTERM");
|
|
164
164
|
}
|
package/dist/render.js
CHANGED
|
@@ -12,7 +12,7 @@ ${run.stats.inputTokenCap ?? "unknown"} tokens
|
|
|
12
12
|
Stupify skipped the search rather than review truncated context.
|
|
13
13
|
Nothing was blocked.
|
|
14
14
|
Try:
|
|
15
|
-
|
|
15
|
+
rerun with ${sourceHint(command)} --max-search-input-tokens ${Math.max((run.stats.inputTokens ?? 12_000) + 1, (run.stats.inputTokenCap ?? 12_000) * 2)}`;
|
|
16
16
|
}
|
|
17
17
|
if (run.stats.skipped && run.stats.skipReason === "no_candidates") {
|
|
18
18
|
return `🧙 stupify 🪄
|
package/dist/stupify.js
CHANGED
|
@@ -48,12 +48,9 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
48
48
|
const t = createTracer({
|
|
49
49
|
writeLine: () => undefined,
|
|
50
50
|
onEvent: (event) => {
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if (event.detail)
|
|
55
|
-
parts.push(event.detail);
|
|
56
|
-
console.error(parts.join(" "));
|
|
51
|
+
if (command.json)
|
|
52
|
+
return;
|
|
53
|
+
console.error(formatStep(event.name, event.ms, event.count, event.detail));
|
|
57
54
|
},
|
|
58
55
|
});
|
|
59
56
|
const profile = await loadSearchProfile(command.searchProfilePath);
|
|
@@ -61,12 +58,12 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
61
58
|
const patternIds = checks.map((check) => check.id);
|
|
62
59
|
const maxCandidates = effectiveMaxCandidates(command.maxCandidates, profile);
|
|
63
60
|
const maxSearchInputTokens = effectiveMaxSearchInputTokens(command.maxSearchInputTokens, profile);
|
|
61
|
+
printRunPlan(command, patternIds);
|
|
64
62
|
const { value: changeSet } = await t.trace("entity.diff", () => semChangeSetForCommand(command), {
|
|
65
63
|
count: (v) => v.summary.total,
|
|
66
64
|
detail: (v) => `${v.summary.fileCount} files`,
|
|
67
65
|
});
|
|
68
66
|
try {
|
|
69
|
-
printRunPlan(command, changeSet.summary.fileCount, changeSet.summary.total, patternIds);
|
|
70
67
|
const candidates = counterScoutTargets(changeSet, checks, maxCandidates);
|
|
71
68
|
const contexts = entityContextsFromChanges(candidates, changeSet.changes);
|
|
72
69
|
const targetsByPattern = countTargetsByPattern(contexts);
|
|
@@ -136,11 +133,18 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
136
133
|
const pack = profile?.context === "sem" || searchContexts.length === contexts.length
|
|
137
134
|
? initialPack
|
|
138
135
|
: await repomixContextPack(changeSet.contextCwd, searchContexts, changeSet.changes, baseRepomixConfig);
|
|
139
|
-
const
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
136
|
+
const batches = await buildSearchBatches({
|
|
137
|
+
command,
|
|
138
|
+
changeSet,
|
|
139
|
+
contexts: searchContexts,
|
|
140
|
+
initialPack: pack,
|
|
141
|
+
checks,
|
|
142
|
+
profile,
|
|
143
|
+
includeCounterReasonInPrompt: command.includeCounterReasonInPrompt,
|
|
144
|
+
maxSearchInputTokens,
|
|
145
|
+
baseRepomixConfig,
|
|
146
|
+
});
|
|
147
|
+
if (batches.batches.length === 0) {
|
|
144
148
|
return {
|
|
145
149
|
schemaVersion: "search.v1",
|
|
146
150
|
mode: "search",
|
|
@@ -150,7 +154,7 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
150
154
|
stats: {
|
|
151
155
|
elapsedMs: Date.now() - startedAt,
|
|
152
156
|
modelCalls: 0,
|
|
153
|
-
inputTokens,
|
|
157
|
+
inputTokens: batches.estimatedInputTokens,
|
|
154
158
|
inputTokenCap: maxSearchInputTokens,
|
|
155
159
|
skipped: true,
|
|
156
160
|
skipReason: "input_too_large",
|
|
@@ -161,6 +165,8 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
161
165
|
repomixFiles: pack.filePaths.length,
|
|
162
166
|
repomixTokens: pack.totalTokens,
|
|
163
167
|
repomixConfig: pack.config,
|
|
168
|
+
searchBatches: 0,
|
|
169
|
+
skippedTargets: batches.skippedTargets,
|
|
164
170
|
profileId: profile?.id,
|
|
165
171
|
targetsByPattern: countTargetsByPattern(searchContexts),
|
|
166
172
|
targetsPreview: previewTargets(searchContexts),
|
|
@@ -168,7 +174,33 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
168
174
|
matches: [],
|
|
169
175
|
};
|
|
170
176
|
}
|
|
171
|
-
|
|
177
|
+
if (batches.wasSplit && !command.json) {
|
|
178
|
+
console.error(`Search input is large; queued ${batches.batches.length} smaller search batches.`);
|
|
179
|
+
if (batches.skippedTargets > 0) {
|
|
180
|
+
console.error(`Skipped ${batches.skippedTargets} oversized targets that could not fit alone.`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
const modelPath = await firstRunModelBootstrap(command.model);
|
|
184
|
+
const model = await loadLocalModel(modelPath, command.model, "scout");
|
|
185
|
+
const matches = [];
|
|
186
|
+
let modelCalls = 0;
|
|
187
|
+
let inputTokens = 0;
|
|
188
|
+
let exactSkippedTargets = batches.skippedTargets;
|
|
189
|
+
for (const batch of batches.batches) {
|
|
190
|
+
const batchInputTokens = await countPromptTokens(model, batch.request.prompt);
|
|
191
|
+
inputTokens += batchInputTokens;
|
|
192
|
+
if (batchInputTokens > maxSearchInputTokens) {
|
|
193
|
+
exactSkippedTargets += batch.contexts.length;
|
|
194
|
+
if (!command.json) {
|
|
195
|
+
console.error(`Skipped ${batch.contexts.length} targets after exact token count exceeded the limit.`);
|
|
196
|
+
}
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
const { value } = await t.trace("search.model", () => runSearch(model, batch.request), { count: (v) => v.length });
|
|
200
|
+
modelCalls += 1;
|
|
201
|
+
matches.push(...value);
|
|
202
|
+
}
|
|
203
|
+
const uniqueMatches = dedupeMatches(matches);
|
|
172
204
|
return {
|
|
173
205
|
schemaVersion: "search.v1",
|
|
174
206
|
mode: "search",
|
|
@@ -177,7 +209,7 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
177
209
|
patterns: patternIds,
|
|
178
210
|
stats: {
|
|
179
211
|
elapsedMs: Date.now() - startedAt,
|
|
180
|
-
modelCalls
|
|
212
|
+
modelCalls,
|
|
181
213
|
inputTokens,
|
|
182
214
|
inputTokenCap: maxSearchInputTokens,
|
|
183
215
|
filesChanged: changeSet.summary.fileCount,
|
|
@@ -187,17 +219,91 @@ export async function runSearchCommand(command, startedAt) {
|
|
|
187
219
|
repomixFiles: pack.filePaths.length,
|
|
188
220
|
repomixTokens: pack.totalTokens,
|
|
189
221
|
repomixConfig: pack.config,
|
|
222
|
+
searchBatches: batches.batches.length,
|
|
223
|
+
skippedTargets: exactSkippedTargets,
|
|
190
224
|
profileId: profile?.id,
|
|
191
225
|
targetsByPattern: countTargetsByPattern(searchContexts),
|
|
192
226
|
targetsPreview: previewTargets(searchContexts),
|
|
193
227
|
},
|
|
194
|
-
matches,
|
|
228
|
+
matches: uniqueMatches,
|
|
195
229
|
};
|
|
196
230
|
}
|
|
197
231
|
finally {
|
|
198
232
|
await changeSet.cleanup();
|
|
199
233
|
}
|
|
200
234
|
}
|
|
235
|
+
function dedupeMatches(matches) {
|
|
236
|
+
const seen = new Set();
|
|
237
|
+
return matches.filter((match) => {
|
|
238
|
+
const key = `${match.patternId}\n${match.proof.trim()}`;
|
|
239
|
+
if (seen.has(key))
|
|
240
|
+
return false;
|
|
241
|
+
seen.add(key);
|
|
242
|
+
return true;
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
async function buildSearchBatches(input) {
|
|
246
|
+
const first = makeSearchBatch(input, input.contexts, input.initialPack);
|
|
247
|
+
if (first.estimatedInputTokens <= input.maxSearchInputTokens) {
|
|
248
|
+
return {
|
|
249
|
+
batches: [first],
|
|
250
|
+
estimatedInputTokens: first.estimatedInputTokens,
|
|
251
|
+
skippedTargets: 0,
|
|
252
|
+
wasSplit: false,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
const batches = [];
|
|
256
|
+
let skippedTargets = 0;
|
|
257
|
+
let currentContexts = [];
|
|
258
|
+
let currentBatch = null;
|
|
259
|
+
for (const context of input.contexts) {
|
|
260
|
+
const candidateContexts = [...currentContexts, context];
|
|
261
|
+
const candidateBatch = await makeSearchBatchWithPack(input, candidateContexts);
|
|
262
|
+
if (candidateBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
|
|
263
|
+
currentContexts = candidateContexts;
|
|
264
|
+
currentBatch = candidateBatch;
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
if (currentBatch) {
|
|
268
|
+
batches.push(currentBatch);
|
|
269
|
+
currentContexts = [];
|
|
270
|
+
currentBatch = null;
|
|
271
|
+
}
|
|
272
|
+
const singleBatch = candidateContexts.length === 1
|
|
273
|
+
? candidateBatch
|
|
274
|
+
: await makeSearchBatchWithPack(input, [context]);
|
|
275
|
+
if (singleBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
|
|
276
|
+
currentContexts = [context];
|
|
277
|
+
currentBatch = singleBatch;
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
skippedTargets += 1;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
if (currentBatch)
|
|
284
|
+
batches.push(currentBatch);
|
|
285
|
+
return {
|
|
286
|
+
batches,
|
|
287
|
+
estimatedInputTokens: first.estimatedInputTokens,
|
|
288
|
+
skippedTargets,
|
|
289
|
+
wasSplit: true,
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
function makeSearchBatch(input, contexts, pack) {
|
|
293
|
+
const request = buildSearchRequest(input.changeSet, contexts, pack, input.checks, input.profile, input.includeCounterReasonInPrompt);
|
|
294
|
+
return {
|
|
295
|
+
contexts,
|
|
296
|
+
pack,
|
|
297
|
+
request,
|
|
298
|
+
estimatedInputTokens: estimatePromptTokens(request.prompt),
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
async function makeSearchBatchWithPack(input, contexts) {
|
|
302
|
+
const pack = input.profile?.context === "sem"
|
|
303
|
+
? emptyContextPack()
|
|
304
|
+
: await repomixContextPack(input.changeSet.contextCwd, contexts, input.changeSet.changes, input.baseRepomixConfig);
|
|
305
|
+
return makeSearchBatch(input, contexts, pack);
|
|
306
|
+
}
|
|
201
307
|
function buildSearchRequest(changeSet, contexts, pack, patterns, profile, includeCounterReasonInPrompt) {
|
|
202
308
|
return searchRequest({
|
|
203
309
|
changeSet,
|
|
@@ -207,14 +313,36 @@ function buildSearchRequest(changeSet, contexts, pack, patterns, profile, includ
|
|
|
207
313
|
includeCounterReasonInPrompt: profile?.includeCounterReasonInPrompt ?? includeCounterReasonInPrompt,
|
|
208
314
|
});
|
|
209
315
|
}
|
|
210
|
-
function printRunPlan(command,
|
|
316
|
+
function printRunPlan(command, patternIds) {
|
|
211
317
|
if (command.json)
|
|
212
318
|
return;
|
|
213
319
|
console.error("🧙 stupify 🪄");
|
|
214
|
-
console.error(`
|
|
215
|
-
console.error(`Sem: ${filesChanged} files, ${entitiesScanned} changed entities`);
|
|
320
|
+
console.error(`Search: ${sourceLabel(command)}`);
|
|
216
321
|
console.error(`Patterns: ${patternIds.join(", ")}`);
|
|
217
322
|
}
|
|
323
|
+
function formatStep(name, ms, count, detail) {
|
|
324
|
+
if (name === "entity.diff")
|
|
325
|
+
return `Diff: ${detail ?? "changed files"}, ${count ?? 0} changed entities (${ms}ms)`;
|
|
326
|
+
if (name === "context.pack")
|
|
327
|
+
return `Context: ${count ?? 0} files, ${detail ?? "0 tokens"} (${ms}ms)`;
|
|
328
|
+
if (name === "search.model")
|
|
329
|
+
return `Model: ${count ?? 0} matches (${ms}ms)`;
|
|
330
|
+
return `${name}: ${ms}ms`;
|
|
331
|
+
}
|
|
332
|
+
function sourceLabel(command) {
|
|
333
|
+
if (command.kind === "since")
|
|
334
|
+
return `since ${command.since}`;
|
|
335
|
+
if (command.kind === "commit")
|
|
336
|
+
return `commit ${command.commit}`;
|
|
337
|
+
if (command.kind === "commits")
|
|
338
|
+
return `last ${command.count} commits`;
|
|
339
|
+
if (command.kind === "staged")
|
|
340
|
+
return "staged changes";
|
|
341
|
+
return "stdin diff";
|
|
342
|
+
}
|
|
343
|
+
function estimatePromptTokens(prompt) {
|
|
344
|
+
return Math.ceil(prompt.length / 3);
|
|
345
|
+
}
|
|
218
346
|
function countTargetsByPattern(contexts) {
|
|
219
347
|
const counts = {};
|
|
220
348
|
for (const context of contexts)
|
package/dist/types.d.ts
CHANGED
|
@@ -219,6 +219,8 @@ export type SearchRunJson = Readonly<{
|
|
|
219
219
|
repomixTokens?: number;
|
|
220
220
|
repomixConfig?: RepomixSearchConfig;
|
|
221
221
|
searchTargets?: number;
|
|
222
|
+
searchBatches?: number;
|
|
223
|
+
skippedTargets?: number;
|
|
222
224
|
profileId?: string;
|
|
223
225
|
targetsByPattern?: Readonly<Record<string, number>>;
|
|
224
226
|
targetsPreview?: readonly SearchTargetPreview[];
|
package/package.json
CHANGED
package/src/cache.ts
CHANGED
|
@@ -16,10 +16,8 @@ export async function cachedJson<T>(
|
|
|
16
16
|
const filePath = cachePath(namespace, key);
|
|
17
17
|
try {
|
|
18
18
|
const value = JSON.parse(await readFile(filePath, "utf8")) as T;
|
|
19
|
-
console.error(`cache hit ${namespace} ${key.slice(0, 12)}`);
|
|
20
19
|
return value;
|
|
21
20
|
} catch {
|
|
22
|
-
console.error(`cache miss ${namespace} ${key.slice(0, 12)}`);
|
|
23
21
|
}
|
|
24
22
|
|
|
25
23
|
const value = await compute();
|
package/src/checks.ts
CHANGED
|
@@ -174,7 +174,7 @@ Prefer no match over a weak match.`,
|
|
|
174
174
|
"helper is domain-specific or used by multiple local call sites",
|
|
175
175
|
],
|
|
176
176
|
hookMode: "warn",
|
|
177
|
-
searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify,
|
|
177
|
+
searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify, sort, pick, omit, uniq, or shuffle without domain-specific behavior. Do not match group/resolve/parse/format helpers, domain formatting, feature constants, or helpers with multiple obvious call sites.",
|
|
178
178
|
searchExamples: {
|
|
179
179
|
match: [
|
|
180
180
|
"clampValue returns min, max, or value.",
|
package/src/constants.ts
CHANGED
package/src/counter-scout.ts
CHANGED
|
@@ -140,7 +140,7 @@ function lintBypassSignal(value: string): boolean {
|
|
|
140
140
|
|
|
141
141
|
function reinventedUtilitySignal(change: SemChange): boolean {
|
|
142
142
|
const name = change.entityName;
|
|
143
|
-
if (!/^(clamp|debounce|throttle|slug|slugify|
|
|
143
|
+
if (!/^(clamp|debounce|throttle|slug|slugify|sort|shuffle|memoize|pick|omit|uniq)/i.test(name)) return false;
|
|
144
144
|
const content = change.afterContent ?? "";
|
|
145
145
|
if (/currency|invoice|refund|subscription|tier|domain/i.test(`${name}\n${content}`)) return false;
|
|
146
146
|
return true;
|
package/src/model.ts
CHANGED
|
@@ -85,7 +85,7 @@ export async function loadLocalModel(
|
|
|
85
85
|
if (runningModel !== modelId) await stopManagedServer(runtime);
|
|
86
86
|
if (runningModel === modelId) {
|
|
87
87
|
console.error(
|
|
88
|
-
`Using
|
|
88
|
+
`Using local model: ${selectedModel.name}`,
|
|
89
89
|
);
|
|
90
90
|
return {
|
|
91
91
|
id: modelId,
|
|
@@ -166,7 +166,7 @@ async function startLlamaServer(
|
|
|
166
166
|
const out = await open(logPath, "a");
|
|
167
167
|
const err = await open(logPath, "a");
|
|
168
168
|
|
|
169
|
-
console.error(`Starting local
|
|
169
|
+
console.error(`Starting local model server: ${modelName}`);
|
|
170
170
|
console.error(`llama-server log: ${logPath}`);
|
|
171
171
|
|
|
172
172
|
const args = [
|
|
@@ -215,7 +215,7 @@ Stop it before switching models, or use STUPIFY_LLAMA_SERVER_URL for that server
|
|
|
215
215
|
}
|
|
216
216
|
|
|
217
217
|
console.error(
|
|
218
|
-
|
|
218
|
+
"Restarting local model server for selected model.",
|
|
219
219
|
);
|
|
220
220
|
try {
|
|
221
221
|
process.kill(pid, "SIGTERM");
|
package/src/render.ts
CHANGED
|
@@ -14,7 +14,7 @@ ${run.stats.inputTokenCap ?? "unknown"} tokens
|
|
|
14
14
|
Stupify skipped the search rather than review truncated context.
|
|
15
15
|
Nothing was blocked.
|
|
16
16
|
Try:
|
|
17
|
-
|
|
17
|
+
rerun with ${sourceHint(command)} --max-search-input-tokens ${Math.max((run.stats.inputTokens ?? 12_000) + 1, (run.stats.inputTokenCap ?? 12_000) * 2)}`;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
if (run.stats.skipped && run.stats.skipReason === "no_candidates") {
|
package/src/stupify.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import { realpathSync } from "node:fs";
|
|
4
4
|
import { fileURLToPath } from "node:url";
|
|
5
|
-
import { countPromptTokens, runSearch, searchRequest } from "./analysis.ts";
|
|
5
|
+
import { countPromptTokens, runSearch, searchRequest, type SearchRequest } from "./analysis.ts";
|
|
6
6
|
import { searchChecks } from "./checks.ts";
|
|
7
7
|
import { parseCommand } from "./command.ts";
|
|
8
8
|
import { counterScoutTargets } from "./counter-scout.ts";
|
|
@@ -58,10 +58,8 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
58
58
|
const t = createTracer({
|
|
59
59
|
writeLine: () => undefined,
|
|
60
60
|
onEvent: (event) => {
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if (event.detail) parts.push(event.detail);
|
|
64
|
-
console.error(parts.join(" "));
|
|
61
|
+
if (command.json) return;
|
|
62
|
+
console.error(formatStep(event.name, event.ms, event.count, event.detail));
|
|
65
63
|
},
|
|
66
64
|
});
|
|
67
65
|
|
|
@@ -70,6 +68,7 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
70
68
|
const patternIds = checks.map((check) => check.id);
|
|
71
69
|
const maxCandidates = effectiveMaxCandidates(command.maxCandidates, profile);
|
|
72
70
|
const maxSearchInputTokens = effectiveMaxSearchInputTokens(command.maxSearchInputTokens, profile);
|
|
71
|
+
printRunPlan(command, patternIds);
|
|
73
72
|
const { value: changeSet } = await t.trace(
|
|
74
73
|
"entity.diff",
|
|
75
74
|
() => semChangeSetForCommand(command),
|
|
@@ -80,7 +79,6 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
80
79
|
);
|
|
81
80
|
|
|
82
81
|
try {
|
|
83
|
-
printRunPlan(command, changeSet.summary.fileCount, changeSet.summary.total, patternIds);
|
|
84
82
|
const candidates = counterScoutTargets(changeSet, checks, maxCandidates);
|
|
85
83
|
const contexts = entityContextsFromChanges(candidates, changeSet.changes);
|
|
86
84
|
const targetsByPattern = countTargetsByPattern(contexts);
|
|
@@ -155,19 +153,19 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
155
153
|
const pack = profile?.context === "sem" || searchContexts.length === contexts.length
|
|
156
154
|
? initialPack
|
|
157
155
|
: await repomixContextPack(changeSet.contextCwd, searchContexts, changeSet.changes, baseRepomixConfig);
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
const model = await loadLocalModel(modelPath, command.model, "scout");
|
|
161
|
-
const request = buildSearchRequest(
|
|
156
|
+
const batches = await buildSearchBatches({
|
|
157
|
+
command,
|
|
162
158
|
changeSet,
|
|
163
|
-
searchContexts,
|
|
164
|
-
pack,
|
|
159
|
+
contexts: searchContexts,
|
|
160
|
+
initialPack: pack,
|
|
165
161
|
checks,
|
|
166
162
|
profile,
|
|
167
|
-
command.includeCounterReasonInPrompt,
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
163
|
+
includeCounterReasonInPrompt: command.includeCounterReasonInPrompt,
|
|
164
|
+
maxSearchInputTokens,
|
|
165
|
+
baseRepomixConfig,
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
if (batches.batches.length === 0) {
|
|
171
169
|
return {
|
|
172
170
|
schemaVersion: "search.v1",
|
|
173
171
|
mode: "search",
|
|
@@ -177,7 +175,7 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
177
175
|
stats: {
|
|
178
176
|
elapsedMs: Date.now() - startedAt,
|
|
179
177
|
modelCalls: 0,
|
|
180
|
-
inputTokens,
|
|
178
|
+
inputTokens: batches.estimatedInputTokens,
|
|
181
179
|
inputTokenCap: maxSearchInputTokens,
|
|
182
180
|
skipped: true,
|
|
183
181
|
skipReason: "input_too_large",
|
|
@@ -188,6 +186,8 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
188
186
|
repomixFiles: pack.filePaths.length,
|
|
189
187
|
repomixTokens: pack.totalTokens,
|
|
190
188
|
repomixConfig: pack.config,
|
|
189
|
+
searchBatches: 0,
|
|
190
|
+
skippedTargets: batches.skippedTargets,
|
|
191
191
|
profileId: profile?.id,
|
|
192
192
|
targetsByPattern: countTargetsByPattern(searchContexts),
|
|
193
193
|
targetsPreview: previewTargets(searchContexts),
|
|
@@ -196,11 +196,38 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
196
196
|
};
|
|
197
197
|
}
|
|
198
198
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
(
|
|
202
|
-
|
|
203
|
-
|
|
199
|
+
if (batches.wasSplit && !command.json) {
|
|
200
|
+
console.error(`Search input is large; queued ${batches.batches.length} smaller search batches.`);
|
|
201
|
+
if (batches.skippedTargets > 0) {
|
|
202
|
+
console.error(`Skipped ${batches.skippedTargets} oversized targets that could not fit alone.`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const modelPath = await firstRunModelBootstrap(command.model);
|
|
207
|
+
const model = await loadLocalModel(modelPath, command.model, "scout");
|
|
208
|
+
const matches = [];
|
|
209
|
+
let modelCalls = 0;
|
|
210
|
+
let inputTokens = 0;
|
|
211
|
+
let exactSkippedTargets = batches.skippedTargets;
|
|
212
|
+
for (const batch of batches.batches) {
|
|
213
|
+
const batchInputTokens = await countPromptTokens(model, batch.request.prompt);
|
|
214
|
+
inputTokens += batchInputTokens;
|
|
215
|
+
if (batchInputTokens > maxSearchInputTokens) {
|
|
216
|
+
exactSkippedTargets += batch.contexts.length;
|
|
217
|
+
if (!command.json) {
|
|
218
|
+
console.error(`Skipped ${batch.contexts.length} targets after exact token count exceeded the limit.`);
|
|
219
|
+
}
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
const { value } = await t.trace(
|
|
223
|
+
"search.model",
|
|
224
|
+
() => runSearch(model, batch.request),
|
|
225
|
+
{ count: (v) => v.length },
|
|
226
|
+
);
|
|
227
|
+
modelCalls += 1;
|
|
228
|
+
matches.push(...value);
|
|
229
|
+
}
|
|
230
|
+
const uniqueMatches = dedupeMatches(matches);
|
|
204
231
|
|
|
205
232
|
return {
|
|
206
233
|
schemaVersion: "search.v1",
|
|
@@ -210,7 +237,7 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
210
237
|
patterns: patternIds,
|
|
211
238
|
stats: {
|
|
212
239
|
elapsedMs: Date.now() - startedAt,
|
|
213
|
-
modelCalls
|
|
240
|
+
modelCalls,
|
|
214
241
|
inputTokens,
|
|
215
242
|
inputTokenCap: maxSearchInputTokens,
|
|
216
243
|
filesChanged: changeSet.summary.fileCount,
|
|
@@ -220,17 +247,146 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
|
|
|
220
247
|
repomixFiles: pack.filePaths.length,
|
|
221
248
|
repomixTokens: pack.totalTokens,
|
|
222
249
|
repomixConfig: pack.config,
|
|
250
|
+
searchBatches: batches.batches.length,
|
|
251
|
+
skippedTargets: exactSkippedTargets,
|
|
223
252
|
profileId: profile?.id,
|
|
224
253
|
targetsByPattern: countTargetsByPattern(searchContexts),
|
|
225
254
|
targetsPreview: previewTargets(searchContexts),
|
|
226
255
|
},
|
|
227
|
-
matches,
|
|
256
|
+
matches: uniqueMatches,
|
|
228
257
|
};
|
|
229
258
|
} finally {
|
|
230
259
|
await changeSet.cleanup();
|
|
231
260
|
}
|
|
232
261
|
}
|
|
233
262
|
|
|
263
|
+
function dedupeMatches<T extends { targetId: string; patternId: string; proof: string }>(matches: readonly T[]): readonly T[] {
|
|
264
|
+
const seen = new Set<string>();
|
|
265
|
+
return matches.filter((match) => {
|
|
266
|
+
const key = `${match.patternId}\n${match.proof.trim()}`;
|
|
267
|
+
if (seen.has(key)) return false;
|
|
268
|
+
seen.add(key);
|
|
269
|
+
return true;
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
type SearchBatch = Readonly<{
|
|
274
|
+
contexts: readonly SemContext[];
|
|
275
|
+
pack: SemContextPack;
|
|
276
|
+
request: SearchRequest;
|
|
277
|
+
estimatedInputTokens: number;
|
|
278
|
+
}>;
|
|
279
|
+
|
|
280
|
+
async function buildSearchBatches(input: Readonly<{
|
|
281
|
+
command: SearchCommand;
|
|
282
|
+
changeSet: Parameters<typeof searchRequest>[0]["changeSet"];
|
|
283
|
+
contexts: readonly SemContext[];
|
|
284
|
+
initialPack: SemContextPack;
|
|
285
|
+
checks: readonly StupifyCheck[];
|
|
286
|
+
profile: SearchProfile | null;
|
|
287
|
+
includeCounterReasonInPrompt: boolean;
|
|
288
|
+
maxSearchInputTokens: number;
|
|
289
|
+
baseRepomixConfig: Parameters<typeof repomixContextPack>[3];
|
|
290
|
+
}>): Promise<Readonly<{
|
|
291
|
+
batches: readonly SearchBatch[];
|
|
292
|
+
estimatedInputTokens: number;
|
|
293
|
+
skippedTargets: number;
|
|
294
|
+
wasSplit: boolean;
|
|
295
|
+
}>> {
|
|
296
|
+
const first = makeSearchBatch(input, input.contexts, input.initialPack);
|
|
297
|
+
if (first.estimatedInputTokens <= input.maxSearchInputTokens) {
|
|
298
|
+
return {
|
|
299
|
+
batches: [first],
|
|
300
|
+
estimatedInputTokens: first.estimatedInputTokens,
|
|
301
|
+
skippedTargets: 0,
|
|
302
|
+
wasSplit: false,
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const batches: SearchBatch[] = [];
|
|
307
|
+
let skippedTargets = 0;
|
|
308
|
+
let currentContexts: readonly SemContext[] = [];
|
|
309
|
+
let currentBatch: SearchBatch | null = null;
|
|
310
|
+
|
|
311
|
+
for (const context of input.contexts) {
|
|
312
|
+
const candidateContexts = [...currentContexts, context];
|
|
313
|
+
const candidateBatch = await makeSearchBatchWithPack(input, candidateContexts);
|
|
314
|
+
if (candidateBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
|
|
315
|
+
currentContexts = candidateContexts;
|
|
316
|
+
currentBatch = candidateBatch;
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
if (currentBatch) {
|
|
321
|
+
batches.push(currentBatch);
|
|
322
|
+
currentContexts = [];
|
|
323
|
+
currentBatch = null;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const singleBatch = candidateContexts.length === 1
|
|
327
|
+
? candidateBatch
|
|
328
|
+
: await makeSearchBatchWithPack(input, [context]);
|
|
329
|
+
if (singleBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
|
|
330
|
+
currentContexts = [context];
|
|
331
|
+
currentBatch = singleBatch;
|
|
332
|
+
} else {
|
|
333
|
+
skippedTargets += 1;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
if (currentBatch) batches.push(currentBatch);
|
|
338
|
+
|
|
339
|
+
return {
|
|
340
|
+
batches,
|
|
341
|
+
estimatedInputTokens: first.estimatedInputTokens,
|
|
342
|
+
skippedTargets,
|
|
343
|
+
wasSplit: true,
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function makeSearchBatch(
|
|
348
|
+
input: Readonly<{
|
|
349
|
+
changeSet: Parameters<typeof searchRequest>[0]["changeSet"];
|
|
350
|
+
checks: readonly StupifyCheck[];
|
|
351
|
+
profile: SearchProfile | null;
|
|
352
|
+
includeCounterReasonInPrompt: boolean;
|
|
353
|
+
}>,
|
|
354
|
+
contexts: readonly SemContext[],
|
|
355
|
+
pack: SemContextPack,
|
|
356
|
+
): SearchBatch {
|
|
357
|
+
const request = buildSearchRequest(
|
|
358
|
+
input.changeSet,
|
|
359
|
+
contexts,
|
|
360
|
+
pack,
|
|
361
|
+
input.checks,
|
|
362
|
+
input.profile,
|
|
363
|
+
input.includeCounterReasonInPrompt,
|
|
364
|
+
);
|
|
365
|
+
return {
|
|
366
|
+
contexts,
|
|
367
|
+
pack,
|
|
368
|
+
request,
|
|
369
|
+
estimatedInputTokens: estimatePromptTokens(request.prompt),
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
async function makeSearchBatchWithPack(
|
|
374
|
+
input: Readonly<{
|
|
375
|
+
command: SearchCommand;
|
|
376
|
+
changeSet: Parameters<typeof searchRequest>[0]["changeSet"];
|
|
377
|
+
checks: readonly StupifyCheck[];
|
|
378
|
+
profile: SearchProfile | null;
|
|
379
|
+
includeCounterReasonInPrompt: boolean;
|
|
380
|
+
baseRepomixConfig: Parameters<typeof repomixContextPack>[3];
|
|
381
|
+
}>,
|
|
382
|
+
contexts: readonly SemContext[],
|
|
383
|
+
): Promise<SearchBatch> {
|
|
384
|
+
const pack = input.profile?.context === "sem"
|
|
385
|
+
? emptyContextPack()
|
|
386
|
+
: await repomixContextPack(input.changeSet.contextCwd, contexts, input.changeSet.changes, input.baseRepomixConfig);
|
|
387
|
+
return makeSearchBatch(input, contexts, pack);
|
|
388
|
+
}
|
|
389
|
+
|
|
234
390
|
function buildSearchRequest(
|
|
235
391
|
changeSet: Parameters<typeof searchRequest>[0]["changeSet"],
|
|
236
392
|
contexts: Parameters<typeof searchRequest>[0]["contexts"],
|
|
@@ -250,17 +406,33 @@ function buildSearchRequest(
|
|
|
250
406
|
|
|
251
407
|
function printRunPlan(
|
|
252
408
|
command: SearchCommand,
|
|
253
|
-
filesChanged: number,
|
|
254
|
-
entitiesScanned: number,
|
|
255
409
|
patternIds: readonly string[],
|
|
256
410
|
): void {
|
|
257
411
|
if (command.json) return;
|
|
258
412
|
console.error("🧙 stupify 🪄");
|
|
259
|
-
console.error(`
|
|
260
|
-
console.error(`Sem: ${filesChanged} files, ${entitiesScanned} changed entities`);
|
|
413
|
+
console.error(`Search: ${sourceLabel(command)}`);
|
|
261
414
|
console.error(`Patterns: ${patternIds.join(", ")}`);
|
|
262
415
|
}
|
|
263
416
|
|
|
417
|
+
function formatStep(name: string, ms: number, count?: number, detail?: string): string {
|
|
418
|
+
if (name === "entity.diff") return `Diff: ${detail ?? "changed files"}, ${count ?? 0} changed entities (${ms}ms)`;
|
|
419
|
+
if (name === "context.pack") return `Context: ${count ?? 0} files, ${detail ?? "0 tokens"} (${ms}ms)`;
|
|
420
|
+
if (name === "search.model") return `Model: ${count ?? 0} matches (${ms}ms)`;
|
|
421
|
+
return `${name}: ${ms}ms`;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function sourceLabel(command: SearchCommand): string {
|
|
425
|
+
if (command.kind === "since") return `since ${command.since}`;
|
|
426
|
+
if (command.kind === "commit") return `commit ${command.commit}`;
|
|
427
|
+
if (command.kind === "commits") return `last ${command.count} commits`;
|
|
428
|
+
if (command.kind === "staged") return "staged changes";
|
|
429
|
+
return "stdin diff";
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function estimatePromptTokens(prompt: string): number {
|
|
433
|
+
return Math.ceil(prompt.length / 3);
|
|
434
|
+
}
|
|
435
|
+
|
|
264
436
|
function countTargetsByPattern(contexts: readonly SemContext[]): Record<string, number> {
|
|
265
437
|
const counts: Record<string, number> = {};
|
|
266
438
|
for (const context of contexts) counts[context.checkId] = (counts[context.checkId] ?? 0) + 1;
|
package/src/types.ts
CHANGED
|
@@ -216,6 +216,8 @@ export type SearchRunJson = Readonly<{
|
|
|
216
216
|
repomixTokens?: number;
|
|
217
217
|
repomixConfig?: RepomixSearchConfig;
|
|
218
218
|
searchTargets?: number;
|
|
219
|
+
searchBatches?: number;
|
|
220
|
+
skippedTargets?: number;
|
|
219
221
|
profileId?: string;
|
|
220
222
|
targetsByPattern?: Readonly<Record<string, number>>;
|
|
221
223
|
targetsPreview?: readonly SearchTargetPreview[];
|