@stupify/cli 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cache.js CHANGED
@@ -10,11 +10,9 @@ export async function cachedJson(namespace, key, compute) {
10
10
  const filePath = cachePath(namespace, key);
11
11
  try {
12
12
  const value = JSON.parse(await readFile(filePath, "utf8"));
13
- console.error(`cache hit ${namespace} ${key.slice(0, 12)}`);
14
13
  return value;
15
14
  }
16
15
  catch {
17
- console.error(`cache miss ${namespace} ${key.slice(0, 12)}`);
18
16
  }
19
17
  const value = await compute();
20
18
  await writeCache(filePath, value).catch(() => undefined);
package/dist/checks.js CHANGED
@@ -173,7 +173,7 @@ Prefer no match over a weak match.`,
173
173
  "helper is domain-specific or used by multiple local call sites",
174
174
  ],
175
175
  hookMode: "warn",
176
- searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify, group, sort, pick, omit, uniq, or shuffle without domain-specific behavior. Do not match resolve/parse/format helpers, domain formatting, feature constants, or helpers with multiple obvious call sites.",
176
+ searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify, sort, pick, omit, uniq, or shuffle without domain-specific behavior. Do not match group/resolve/parse/format helpers, domain formatting, feature constants, or helpers with multiple obvious call sites.",
177
177
  searchExamples: {
178
178
  match: [
179
179
  "clampValue returns min, max, or value.",
@@ -1,4 +1,4 @@
1
- export declare const VERSION = "0.0.5";
1
+ export declare const VERSION = "0.0.7";
2
2
  import type { ModelConfig, ModelId } from "./types.ts";
3
3
  export declare const DEFAULT_MODEL_ID: ModelId;
4
4
  export declare const MODEL_REGISTRY: Record<ModelId, ModelConfig>;
package/dist/constants.js CHANGED
@@ -1,4 +1,4 @@
1
- export const VERSION = "0.0.5";
1
+ export const VERSION = "0.0.7";
2
2
  export const DEFAULT_MODEL_ID = "gemma-4-e2b";
3
3
  export const MODEL_REGISTRY = {
4
4
  "gemma-4-e2b": {
@@ -120,7 +120,7 @@ function lintBypassSignal(value) {
120
120
  }
121
121
  function reinventedUtilitySignal(change) {
122
122
  const name = change.entityName;
123
- if (!/^(clamp|debounce|throttle|slug|slugify|group|sort|shuffle|memoize|pick|omit|uniq)/i.test(name))
123
+ if (!/^(clamp|debounce|throttle|slug|slugify|sort|shuffle|memoize|pick|omit|uniq)/i.test(name))
124
124
  return false;
125
125
  const content = change.afterContent ?? "";
126
126
  if (/currency|invoice|refund|subscription|tier|domain/i.test(`${name}\n${content}`))
package/dist/model.js CHANGED
@@ -36,7 +36,7 @@ export async function loadLocalModel(modelPath, modelId, profile = "scout") {
36
36
  if (runningModel !== modelId)
37
37
  await stopManagedServer(runtime);
38
38
  if (runningModel === modelId) {
39
- console.error(`Using already-loaded local ${profile} model: ${selectedModel.name}`);
39
+ console.error(`Using local model: ${selectedModel.name}`);
40
40
  return {
41
41
  id: modelId,
42
42
  name: selectedModel.name,
@@ -107,7 +107,7 @@ async function startLlamaServer(modelPath, modelId, modelName, runtime) {
107
107
  const logPath = path.join(logDir, "llama-server.log");
108
108
  const out = await open(logPath, "a");
109
109
  const err = await open(logPath, "a");
110
- console.error(`Starting local ${runtime.profile} model server: ${modelName}`);
110
+ console.error(`Starting local model server: ${modelName}`);
111
111
  console.error(`llama-server log: ${logPath}`);
112
112
  const args = [
113
113
  "-m",
@@ -158,7 +158,7 @@ async function stopManagedServer(runtime) {
158
158
  throw new Error(`A llama-server is already running with ${runningModel ?? "another model"}.
159
159
  Stop it before switching models, or use STUPIFY_LLAMA_SERVER_URL for that server.`);
160
160
  }
161
- console.error(`Restarting local ${runtime.profile} model server for selected model.`);
161
+ console.error("Restarting local model server for selected model.");
162
162
  try {
163
163
  process.kill(pid, "SIGTERM");
164
164
  }
package/dist/render.js CHANGED
@@ -12,7 +12,7 @@ ${run.stats.inputTokenCap ?? "unknown"} tokens
12
12
  Stupify skipped the search rather than review truncated context.
13
13
  Nothing was blocked.
14
14
  Try:
15
- stupify ${sourceHint(command)} --max-search-input-tokens ${Math.max((run.stats.inputTokens ?? 12_000) + 1, (run.stats.inputTokenCap ?? 12_000) * 2)}`;
15
+ rerun with ${sourceHint(command)} --max-search-input-tokens ${Math.max((run.stats.inputTokens ?? 12_000) + 1, (run.stats.inputTokenCap ?? 12_000) * 2)}`;
16
16
  }
17
17
  if (run.stats.skipped && run.stats.skipReason === "no_candidates") {
18
18
  return `🧙 stupify 🪄
package/dist/stupify.js CHANGED
@@ -48,12 +48,9 @@ export async function runSearchCommand(command, startedAt) {
48
48
  const t = createTracer({
49
49
  writeLine: () => undefined,
50
50
  onEvent: (event) => {
51
- const parts = [`trace ${event.name}`, `${event.ms}ms`];
52
- if (event.count !== undefined)
53
- parts.push(`count=${event.count}`);
54
- if (event.detail)
55
- parts.push(event.detail);
56
- console.error(parts.join(" "));
51
+ if (command.json)
52
+ return;
53
+ console.error(formatStep(event.name, event.ms, event.count, event.detail));
57
54
  },
58
55
  });
59
56
  const profile = await loadSearchProfile(command.searchProfilePath);
@@ -61,12 +58,12 @@ export async function runSearchCommand(command, startedAt) {
61
58
  const patternIds = checks.map((check) => check.id);
62
59
  const maxCandidates = effectiveMaxCandidates(command.maxCandidates, profile);
63
60
  const maxSearchInputTokens = effectiveMaxSearchInputTokens(command.maxSearchInputTokens, profile);
61
+ printRunPlan(command, patternIds);
64
62
  const { value: changeSet } = await t.trace("entity.diff", () => semChangeSetForCommand(command), {
65
63
  count: (v) => v.summary.total,
66
64
  detail: (v) => `${v.summary.fileCount} files`,
67
65
  });
68
66
  try {
69
- printRunPlan(command, changeSet.summary.fileCount, changeSet.summary.total, patternIds);
70
67
  const candidates = counterScoutTargets(changeSet, checks, maxCandidates);
71
68
  const contexts = entityContextsFromChanges(candidates, changeSet.changes);
72
69
  const targetsByPattern = countTargetsByPattern(contexts);
@@ -136,11 +133,18 @@ export async function runSearchCommand(command, startedAt) {
136
133
  const pack = profile?.context === "sem" || searchContexts.length === contexts.length
137
134
  ? initialPack
138
135
  : await repomixContextPack(changeSet.contextCwd, searchContexts, changeSet.changes, baseRepomixConfig);
139
- const modelPath = await firstRunModelBootstrap(command.model);
140
- const model = await loadLocalModel(modelPath, command.model, "scout");
141
- const request = buildSearchRequest(changeSet, searchContexts, pack, checks, profile, command.includeCounterReasonInPrompt);
142
- const inputTokens = await countPromptTokens(model, request.prompt);
143
- if (inputTokens > maxSearchInputTokens) {
136
+ const batches = await buildSearchBatches({
137
+ command,
138
+ changeSet,
139
+ contexts: searchContexts,
140
+ initialPack: pack,
141
+ checks,
142
+ profile,
143
+ includeCounterReasonInPrompt: command.includeCounterReasonInPrompt,
144
+ maxSearchInputTokens,
145
+ baseRepomixConfig,
146
+ });
147
+ if (batches.batches.length === 0) {
144
148
  return {
145
149
  schemaVersion: "search.v1",
146
150
  mode: "search",
@@ -150,7 +154,7 @@ export async function runSearchCommand(command, startedAt) {
150
154
  stats: {
151
155
  elapsedMs: Date.now() - startedAt,
152
156
  modelCalls: 0,
153
- inputTokens,
157
+ inputTokens: batches.estimatedInputTokens,
154
158
  inputTokenCap: maxSearchInputTokens,
155
159
  skipped: true,
156
160
  skipReason: "input_too_large",
@@ -161,6 +165,8 @@ export async function runSearchCommand(command, startedAt) {
161
165
  repomixFiles: pack.filePaths.length,
162
166
  repomixTokens: pack.totalTokens,
163
167
  repomixConfig: pack.config,
168
+ searchBatches: 0,
169
+ skippedTargets: batches.skippedTargets,
164
170
  profileId: profile?.id,
165
171
  targetsByPattern: countTargetsByPattern(searchContexts),
166
172
  targetsPreview: previewTargets(searchContexts),
@@ -168,7 +174,33 @@ export async function runSearchCommand(command, startedAt) {
168
174
  matches: [],
169
175
  };
170
176
  }
171
- const { value: matches } = await t.trace("search.model", () => runSearch(model, request), { count: (v) => v.length });
177
+ if (batches.wasSplit && !command.json) {
178
+ console.error(`Search input is large; queued ${batches.batches.length} smaller search batches.`);
179
+ if (batches.skippedTargets > 0) {
180
+ console.error(`Skipped ${batches.skippedTargets} oversized targets that could not fit alone.`);
181
+ }
182
+ }
183
+ const modelPath = await firstRunModelBootstrap(command.model);
184
+ const model = await loadLocalModel(modelPath, command.model, "scout");
185
+ const matches = [];
186
+ let modelCalls = 0;
187
+ let inputTokens = 0;
188
+ let exactSkippedTargets = batches.skippedTargets;
189
+ for (const batch of batches.batches) {
190
+ const batchInputTokens = await countPromptTokens(model, batch.request.prompt);
191
+ inputTokens += batchInputTokens;
192
+ if (batchInputTokens > maxSearchInputTokens) {
193
+ exactSkippedTargets += batch.contexts.length;
194
+ if (!command.json) {
195
+ console.error(`Skipped ${batch.contexts.length} targets after exact token count exceeded the limit.`);
196
+ }
197
+ continue;
198
+ }
199
+ const { value } = await t.trace("search.model", () => runSearch(model, batch.request), { count: (v) => v.length });
200
+ modelCalls += 1;
201
+ matches.push(...value);
202
+ }
203
+ const uniqueMatches = dedupeMatches(matches);
172
204
  return {
173
205
  schemaVersion: "search.v1",
174
206
  mode: "search",
@@ -177,7 +209,7 @@ export async function runSearchCommand(command, startedAt) {
177
209
  patterns: patternIds,
178
210
  stats: {
179
211
  elapsedMs: Date.now() - startedAt,
180
- modelCalls: 1,
212
+ modelCalls,
181
213
  inputTokens,
182
214
  inputTokenCap: maxSearchInputTokens,
183
215
  filesChanged: changeSet.summary.fileCount,
@@ -187,17 +219,91 @@ export async function runSearchCommand(command, startedAt) {
187
219
  repomixFiles: pack.filePaths.length,
188
220
  repomixTokens: pack.totalTokens,
189
221
  repomixConfig: pack.config,
222
+ searchBatches: batches.batches.length,
223
+ skippedTargets: exactSkippedTargets,
190
224
  profileId: profile?.id,
191
225
  targetsByPattern: countTargetsByPattern(searchContexts),
192
226
  targetsPreview: previewTargets(searchContexts),
193
227
  },
194
- matches,
228
+ matches: uniqueMatches,
195
229
  };
196
230
  }
197
231
  finally {
198
232
  await changeSet.cleanup();
199
233
  }
200
234
  }
235
+ function dedupeMatches(matches) {
236
+ const seen = new Set();
237
+ return matches.filter((match) => {
238
+ const key = `${match.patternId}\n${match.proof.trim()}`;
239
+ if (seen.has(key))
240
+ return false;
241
+ seen.add(key);
242
+ return true;
243
+ });
244
+ }
245
+ async function buildSearchBatches(input) {
246
+ const first = makeSearchBatch(input, input.contexts, input.initialPack);
247
+ if (first.estimatedInputTokens <= input.maxSearchInputTokens) {
248
+ return {
249
+ batches: [first],
250
+ estimatedInputTokens: first.estimatedInputTokens,
251
+ skippedTargets: 0,
252
+ wasSplit: false,
253
+ };
254
+ }
255
+ const batches = [];
256
+ let skippedTargets = 0;
257
+ let currentContexts = [];
258
+ let currentBatch = null;
259
+ for (const context of input.contexts) {
260
+ const candidateContexts = [...currentContexts, context];
261
+ const candidateBatch = await makeSearchBatchWithPack(input, candidateContexts);
262
+ if (candidateBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
263
+ currentContexts = candidateContexts;
264
+ currentBatch = candidateBatch;
265
+ continue;
266
+ }
267
+ if (currentBatch) {
268
+ batches.push(currentBatch);
269
+ currentContexts = [];
270
+ currentBatch = null;
271
+ }
272
+ const singleBatch = candidateContexts.length === 1
273
+ ? candidateBatch
274
+ : await makeSearchBatchWithPack(input, [context]);
275
+ if (singleBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
276
+ currentContexts = [context];
277
+ currentBatch = singleBatch;
278
+ }
279
+ else {
280
+ skippedTargets += 1;
281
+ }
282
+ }
283
+ if (currentBatch)
284
+ batches.push(currentBatch);
285
+ return {
286
+ batches,
287
+ estimatedInputTokens: first.estimatedInputTokens,
288
+ skippedTargets,
289
+ wasSplit: true,
290
+ };
291
+ }
292
+ function makeSearchBatch(input, contexts, pack) {
293
+ const request = buildSearchRequest(input.changeSet, contexts, pack, input.checks, input.profile, input.includeCounterReasonInPrompt);
294
+ return {
295
+ contexts,
296
+ pack,
297
+ request,
298
+ estimatedInputTokens: estimatePromptTokens(request.prompt),
299
+ };
300
+ }
301
+ async function makeSearchBatchWithPack(input, contexts) {
302
+ const pack = input.profile?.context === "sem"
303
+ ? emptyContextPack()
304
+ : await repomixContextPack(input.changeSet.contextCwd, contexts, input.changeSet.changes, input.baseRepomixConfig);
305
+ return makeSearchBatch(input, contexts, pack);
306
+ }
201
307
  function buildSearchRequest(changeSet, contexts, pack, patterns, profile, includeCounterReasonInPrompt) {
202
308
  return searchRequest({
203
309
  changeSet,
@@ -207,14 +313,36 @@ function buildSearchRequest(changeSet, contexts, pack, patterns, profile, includ
207
313
  includeCounterReasonInPrompt: profile?.includeCounterReasonInPrompt ?? includeCounterReasonInPrompt,
208
314
  });
209
315
  }
210
- function printRunPlan(command, filesChanged, entitiesScanned, patternIds) {
316
+ function printRunPlan(command, patternIds) {
211
317
  if (command.json)
212
318
  return;
213
319
  console.error("🧙 stupify 🪄");
214
- console.error(`Mode: search (${command.source})`);
215
- console.error(`Sem: ${filesChanged} files, ${entitiesScanned} changed entities`);
320
+ console.error(`Search: ${sourceLabel(command)}`);
216
321
  console.error(`Patterns: ${patternIds.join(", ")}`);
217
322
  }
323
+ function formatStep(name, ms, count, detail) {
324
+ if (name === "entity.diff")
325
+ return `Diff: ${detail ?? "changed files"}, ${count ?? 0} changed entities (${ms}ms)`;
326
+ if (name === "context.pack")
327
+ return `Context: ${count ?? 0} files, ${detail ?? "0 tokens"} (${ms}ms)`;
328
+ if (name === "search.model")
329
+ return `Model: ${count ?? 0} matches (${ms}ms)`;
330
+ return `${name}: ${ms}ms`;
331
+ }
332
+ function sourceLabel(command) {
333
+ if (command.kind === "since")
334
+ return `since ${command.since}`;
335
+ if (command.kind === "commit")
336
+ return `commit ${command.commit}`;
337
+ if (command.kind === "commits")
338
+ return `last ${command.count} commits`;
339
+ if (command.kind === "staged")
340
+ return "staged changes";
341
+ return "stdin diff";
342
+ }
343
+ function estimatePromptTokens(prompt) {
344
+ return Math.ceil(prompt.length / 3);
345
+ }
218
346
  function countTargetsByPattern(contexts) {
219
347
  const counts = {};
220
348
  for (const context of contexts)
package/dist/types.d.ts CHANGED
@@ -219,6 +219,8 @@ export type SearchRunJson = Readonly<{
219
219
  repomixTokens?: number;
220
220
  repomixConfig?: RepomixSearchConfig;
221
221
  searchTargets?: number;
222
+ searchBatches?: number;
223
+ skippedTargets?: number;
222
224
  profileId?: string;
223
225
  targetsByPattern?: Readonly<Record<string, number>>;
224
226
  targetsPreview?: readonly SearchTargetPreview[];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stupify/cli",
3
- "version": "0.0.5",
3
+ "version": "0.0.7",
4
4
  "description": "Local-only diagnostic CLI for checking whether AI is making you dumber.",
5
5
  "private": false,
6
6
  "type": "module",
package/src/cache.ts CHANGED
@@ -16,10 +16,8 @@ export async function cachedJson<T>(
16
16
  const filePath = cachePath(namespace, key);
17
17
  try {
18
18
  const value = JSON.parse(await readFile(filePath, "utf8")) as T;
19
- console.error(`cache hit ${namespace} ${key.slice(0, 12)}`);
20
19
  return value;
21
20
  } catch {
22
- console.error(`cache miss ${namespace} ${key.slice(0, 12)}`);
23
21
  }
24
22
 
25
23
  const value = await compute();
package/src/checks.ts CHANGED
@@ -174,7 +174,7 @@ Prefer no match over a weak match.`,
174
174
  "helper is domain-specific or used by multiple local call sites",
175
175
  ],
176
176
  hookMode: "warn",
177
- searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify, group, sort, pick, omit, uniq, or shuffle without domain-specific behavior. Do not match resolve/parse/format helpers, domain formatting, feature constants, or helpers with multiple obvious call sites.",
177
+ searchPrompt: "Find only tiny generic utility functions that recreate common helpers such as clamp, debounce, throttle, slugify, sort, pick, omit, uniq, or shuffle without domain-specific behavior. Do not match group/resolve/parse/format helpers, domain formatting, feature constants, or helpers with multiple obvious call sites.",
178
178
  searchExamples: {
179
179
  match: [
180
180
  "clampValue returns min, max, or value.",
package/src/constants.ts CHANGED
@@ -1,4 +1,4 @@
1
- export const VERSION = "0.0.5";
1
+ export const VERSION = "0.0.7";
2
2
  import type { ModelConfig, ModelId } from "./types.ts";
3
3
 
4
4
  export const DEFAULT_MODEL_ID: ModelId = "gemma-4-e2b";
@@ -140,7 +140,7 @@ function lintBypassSignal(value: string): boolean {
140
140
 
141
141
  function reinventedUtilitySignal(change: SemChange): boolean {
142
142
  const name = change.entityName;
143
- if (!/^(clamp|debounce|throttle|slug|slugify|group|sort|shuffle|memoize|pick|omit|uniq)/i.test(name)) return false;
143
+ if (!/^(clamp|debounce|throttle|slug|slugify|sort|shuffle|memoize|pick|omit|uniq)/i.test(name)) return false;
144
144
  const content = change.afterContent ?? "";
145
145
  if (/currency|invoice|refund|subscription|tier|domain/i.test(`${name}\n${content}`)) return false;
146
146
  return true;
package/src/model.ts CHANGED
@@ -85,7 +85,7 @@ export async function loadLocalModel(
85
85
  if (runningModel !== modelId) await stopManagedServer(runtime);
86
86
  if (runningModel === modelId) {
87
87
  console.error(
88
- `Using already-loaded local ${profile} model: ${selectedModel.name}`,
88
+ `Using local model: ${selectedModel.name}`,
89
89
  );
90
90
  return {
91
91
  id: modelId,
@@ -166,7 +166,7 @@ async function startLlamaServer(
166
166
  const out = await open(logPath, "a");
167
167
  const err = await open(logPath, "a");
168
168
 
169
- console.error(`Starting local ${runtime.profile} model server: ${modelName}`);
169
+ console.error(`Starting local model server: ${modelName}`);
170
170
  console.error(`llama-server log: ${logPath}`);
171
171
 
172
172
  const args = [
@@ -215,7 +215,7 @@ Stop it before switching models, or use STUPIFY_LLAMA_SERVER_URL for that server
215
215
  }
216
216
 
217
217
  console.error(
218
- `Restarting local ${runtime.profile} model server for selected model.`,
218
+ "Restarting local model server for selected model.",
219
219
  );
220
220
  try {
221
221
  process.kill(pid, "SIGTERM");
package/src/render.ts CHANGED
@@ -14,7 +14,7 @@ ${run.stats.inputTokenCap ?? "unknown"} tokens
14
14
  Stupify skipped the search rather than review truncated context.
15
15
  Nothing was blocked.
16
16
  Try:
17
- stupify ${sourceHint(command)} --max-search-input-tokens ${Math.max((run.stats.inputTokens ?? 12_000) + 1, (run.stats.inputTokenCap ?? 12_000) * 2)}`;
17
+ rerun with ${sourceHint(command)} --max-search-input-tokens ${Math.max((run.stats.inputTokens ?? 12_000) + 1, (run.stats.inputTokenCap ?? 12_000) * 2)}`;
18
18
  }
19
19
 
20
20
  if (run.stats.skipped && run.stats.skipReason === "no_candidates") {
package/src/stupify.ts CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  import { realpathSync } from "node:fs";
4
4
  import { fileURLToPath } from "node:url";
5
- import { countPromptTokens, runSearch, searchRequest } from "./analysis.ts";
5
+ import { countPromptTokens, runSearch, searchRequest, type SearchRequest } from "./analysis.ts";
6
6
  import { searchChecks } from "./checks.ts";
7
7
  import { parseCommand } from "./command.ts";
8
8
  import { counterScoutTargets } from "./counter-scout.ts";
@@ -58,10 +58,8 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
58
58
  const t = createTracer({
59
59
  writeLine: () => undefined,
60
60
  onEvent: (event) => {
61
- const parts = [`trace ${event.name}`, `${event.ms}ms`];
62
- if (event.count !== undefined) parts.push(`count=${event.count}`);
63
- if (event.detail) parts.push(event.detail);
64
- console.error(parts.join(" "));
61
+ if (command.json) return;
62
+ console.error(formatStep(event.name, event.ms, event.count, event.detail));
65
63
  },
66
64
  });
67
65
 
@@ -70,6 +68,7 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
70
68
  const patternIds = checks.map((check) => check.id);
71
69
  const maxCandidates = effectiveMaxCandidates(command.maxCandidates, profile);
72
70
  const maxSearchInputTokens = effectiveMaxSearchInputTokens(command.maxSearchInputTokens, profile);
71
+ printRunPlan(command, patternIds);
73
72
  const { value: changeSet } = await t.trace(
74
73
  "entity.diff",
75
74
  () => semChangeSetForCommand(command),
@@ -80,7 +79,6 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
80
79
  );
81
80
 
82
81
  try {
83
- printRunPlan(command, changeSet.summary.fileCount, changeSet.summary.total, patternIds);
84
82
  const candidates = counterScoutTargets(changeSet, checks, maxCandidates);
85
83
  const contexts = entityContextsFromChanges(candidates, changeSet.changes);
86
84
  const targetsByPattern = countTargetsByPattern(contexts);
@@ -155,19 +153,19 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
155
153
  const pack = profile?.context === "sem" || searchContexts.length === contexts.length
156
154
  ? initialPack
157
155
  : await repomixContextPack(changeSet.contextCwd, searchContexts, changeSet.changes, baseRepomixConfig);
158
-
159
- const modelPath = await firstRunModelBootstrap(command.model);
160
- const model = await loadLocalModel(modelPath, command.model, "scout");
161
- const request = buildSearchRequest(
156
+ const batches = await buildSearchBatches({
157
+ command,
162
158
  changeSet,
163
- searchContexts,
164
- pack,
159
+ contexts: searchContexts,
160
+ initialPack: pack,
165
161
  checks,
166
162
  profile,
167
- command.includeCounterReasonInPrompt,
168
- );
169
- const inputTokens = await countPromptTokens(model, request.prompt);
170
- if (inputTokens > maxSearchInputTokens) {
163
+ includeCounterReasonInPrompt: command.includeCounterReasonInPrompt,
164
+ maxSearchInputTokens,
165
+ baseRepomixConfig,
166
+ });
167
+
168
+ if (batches.batches.length === 0) {
171
169
  return {
172
170
  schemaVersion: "search.v1",
173
171
  mode: "search",
@@ -177,7 +175,7 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
177
175
  stats: {
178
176
  elapsedMs: Date.now() - startedAt,
179
177
  modelCalls: 0,
180
- inputTokens,
178
+ inputTokens: batches.estimatedInputTokens,
181
179
  inputTokenCap: maxSearchInputTokens,
182
180
  skipped: true,
183
181
  skipReason: "input_too_large",
@@ -188,6 +186,8 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
188
186
  repomixFiles: pack.filePaths.length,
189
187
  repomixTokens: pack.totalTokens,
190
188
  repomixConfig: pack.config,
189
+ searchBatches: 0,
190
+ skippedTargets: batches.skippedTargets,
191
191
  profileId: profile?.id,
192
192
  targetsByPattern: countTargetsByPattern(searchContexts),
193
193
  targetsPreview: previewTargets(searchContexts),
@@ -196,11 +196,38 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
196
196
  };
197
197
  }
198
198
 
199
- const { value: matches } = await t.trace(
200
- "search.model",
201
- () => runSearch(model, request),
202
- { count: (v) => v.length },
203
- );
199
+ if (batches.wasSplit && !command.json) {
200
+ console.error(`Search input is large; queued ${batches.batches.length} smaller search batches.`);
201
+ if (batches.skippedTargets > 0) {
202
+ console.error(`Skipped ${batches.skippedTargets} oversized targets that could not fit alone.`);
203
+ }
204
+ }
205
+
206
+ const modelPath = await firstRunModelBootstrap(command.model);
207
+ const model = await loadLocalModel(modelPath, command.model, "scout");
208
+ const matches = [];
209
+ let modelCalls = 0;
210
+ let inputTokens = 0;
211
+ let exactSkippedTargets = batches.skippedTargets;
212
+ for (const batch of batches.batches) {
213
+ const batchInputTokens = await countPromptTokens(model, batch.request.prompt);
214
+ inputTokens += batchInputTokens;
215
+ if (batchInputTokens > maxSearchInputTokens) {
216
+ exactSkippedTargets += batch.contexts.length;
217
+ if (!command.json) {
218
+ console.error(`Skipped ${batch.contexts.length} targets after exact token count exceeded the limit.`);
219
+ }
220
+ continue;
221
+ }
222
+ const { value } = await t.trace(
223
+ "search.model",
224
+ () => runSearch(model, batch.request),
225
+ { count: (v) => v.length },
226
+ );
227
+ modelCalls += 1;
228
+ matches.push(...value);
229
+ }
230
+ const uniqueMatches = dedupeMatches(matches);
204
231
 
205
232
  return {
206
233
  schemaVersion: "search.v1",
@@ -210,7 +237,7 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
210
237
  patterns: patternIds,
211
238
  stats: {
212
239
  elapsedMs: Date.now() - startedAt,
213
- modelCalls: 1,
240
+ modelCalls,
214
241
  inputTokens,
215
242
  inputTokenCap: maxSearchInputTokens,
216
243
  filesChanged: changeSet.summary.fileCount,
@@ -220,17 +247,146 @@ export async function runSearchCommand(command: SearchCommand, startedAt: number
220
247
  repomixFiles: pack.filePaths.length,
221
248
  repomixTokens: pack.totalTokens,
222
249
  repomixConfig: pack.config,
250
+ searchBatches: batches.batches.length,
251
+ skippedTargets: exactSkippedTargets,
223
252
  profileId: profile?.id,
224
253
  targetsByPattern: countTargetsByPattern(searchContexts),
225
254
  targetsPreview: previewTargets(searchContexts),
226
255
  },
227
- matches,
256
+ matches: uniqueMatches,
228
257
  };
229
258
  } finally {
230
259
  await changeSet.cleanup();
231
260
  }
232
261
  }
233
262
 
263
+ function dedupeMatches<T extends { targetId: string; patternId: string; proof: string }>(matches: readonly T[]): readonly T[] {
264
+ const seen = new Set<string>();
265
+ return matches.filter((match) => {
266
+ const key = `${match.patternId}\n${match.proof.trim()}`;
267
+ if (seen.has(key)) return false;
268
+ seen.add(key);
269
+ return true;
270
+ });
271
+ }
272
+
273
+ type SearchBatch = Readonly<{
274
+ contexts: readonly SemContext[];
275
+ pack: SemContextPack;
276
+ request: SearchRequest;
277
+ estimatedInputTokens: number;
278
+ }>;
279
+
280
+ async function buildSearchBatches(input: Readonly<{
281
+ command: SearchCommand;
282
+ changeSet: Parameters<typeof searchRequest>[0]["changeSet"];
283
+ contexts: readonly SemContext[];
284
+ initialPack: SemContextPack;
285
+ checks: readonly StupifyCheck[];
286
+ profile: SearchProfile | null;
287
+ includeCounterReasonInPrompt: boolean;
288
+ maxSearchInputTokens: number;
289
+ baseRepomixConfig: Parameters<typeof repomixContextPack>[3];
290
+ }>): Promise<Readonly<{
291
+ batches: readonly SearchBatch[];
292
+ estimatedInputTokens: number;
293
+ skippedTargets: number;
294
+ wasSplit: boolean;
295
+ }>> {
296
+ const first = makeSearchBatch(input, input.contexts, input.initialPack);
297
+ if (first.estimatedInputTokens <= input.maxSearchInputTokens) {
298
+ return {
299
+ batches: [first],
300
+ estimatedInputTokens: first.estimatedInputTokens,
301
+ skippedTargets: 0,
302
+ wasSplit: false,
303
+ };
304
+ }
305
+
306
+ const batches: SearchBatch[] = [];
307
+ let skippedTargets = 0;
308
+ let currentContexts: readonly SemContext[] = [];
309
+ let currentBatch: SearchBatch | null = null;
310
+
311
+ for (const context of input.contexts) {
312
+ const candidateContexts = [...currentContexts, context];
313
+ const candidateBatch = await makeSearchBatchWithPack(input, candidateContexts);
314
+ if (candidateBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
315
+ currentContexts = candidateContexts;
316
+ currentBatch = candidateBatch;
317
+ continue;
318
+ }
319
+
320
+ if (currentBatch) {
321
+ batches.push(currentBatch);
322
+ currentContexts = [];
323
+ currentBatch = null;
324
+ }
325
+
326
+ const singleBatch = candidateContexts.length === 1
327
+ ? candidateBatch
328
+ : await makeSearchBatchWithPack(input, [context]);
329
+ if (singleBatch.estimatedInputTokens <= input.maxSearchInputTokens) {
330
+ currentContexts = [context];
331
+ currentBatch = singleBatch;
332
+ } else {
333
+ skippedTargets += 1;
334
+ }
335
+ }
336
+
337
+ if (currentBatch) batches.push(currentBatch);
338
+
339
+ return {
340
+ batches,
341
+ estimatedInputTokens: first.estimatedInputTokens,
342
+ skippedTargets,
343
+ wasSplit: true,
344
+ };
345
+ }
346
+
347
+ function makeSearchBatch(
348
+ input: Readonly<{
349
+ changeSet: Parameters<typeof searchRequest>[0]["changeSet"];
350
+ checks: readonly StupifyCheck[];
351
+ profile: SearchProfile | null;
352
+ includeCounterReasonInPrompt: boolean;
353
+ }>,
354
+ contexts: readonly SemContext[],
355
+ pack: SemContextPack,
356
+ ): SearchBatch {
357
+ const request = buildSearchRequest(
358
+ input.changeSet,
359
+ contexts,
360
+ pack,
361
+ input.checks,
362
+ input.profile,
363
+ input.includeCounterReasonInPrompt,
364
+ );
365
+ return {
366
+ contexts,
367
+ pack,
368
+ request,
369
+ estimatedInputTokens: estimatePromptTokens(request.prompt),
370
+ };
371
+ }
372
+
373
+ async function makeSearchBatchWithPack(
374
+ input: Readonly<{
375
+ command: SearchCommand;
376
+ changeSet: Parameters<typeof searchRequest>[0]["changeSet"];
377
+ checks: readonly StupifyCheck[];
378
+ profile: SearchProfile | null;
379
+ includeCounterReasonInPrompt: boolean;
380
+ baseRepomixConfig: Parameters<typeof repomixContextPack>[3];
381
+ }>,
382
+ contexts: readonly SemContext[],
383
+ ): Promise<SearchBatch> {
384
+ const pack = input.profile?.context === "sem"
385
+ ? emptyContextPack()
386
+ : await repomixContextPack(input.changeSet.contextCwd, contexts, input.changeSet.changes, input.baseRepomixConfig);
387
+ return makeSearchBatch(input, contexts, pack);
388
+ }
389
+
234
390
  function buildSearchRequest(
235
391
  changeSet: Parameters<typeof searchRequest>[0]["changeSet"],
236
392
  contexts: Parameters<typeof searchRequest>[0]["contexts"],
@@ -250,17 +406,33 @@ function buildSearchRequest(
250
406
 
251
407
  function printRunPlan(
252
408
  command: SearchCommand,
253
- filesChanged: number,
254
- entitiesScanned: number,
255
409
  patternIds: readonly string[],
256
410
  ): void {
257
411
  if (command.json) return;
258
412
  console.error("🧙 stupify 🪄");
259
- console.error(`Mode: search (${command.source})`);
260
- console.error(`Sem: ${filesChanged} files, ${entitiesScanned} changed entities`);
413
+ console.error(`Search: ${sourceLabel(command)}`);
261
414
  console.error(`Patterns: ${patternIds.join(", ")}`);
262
415
  }
263
416
 
417
+ function formatStep(name: string, ms: number, count?: number, detail?: string): string {
418
+ if (name === "entity.diff") return `Diff: ${detail ?? "changed files"}, ${count ?? 0} changed entities (${ms}ms)`;
419
+ if (name === "context.pack") return `Context: ${count ?? 0} files, ${detail ?? "0 tokens"} (${ms}ms)`;
420
+ if (name === "search.model") return `Model: ${count ?? 0} matches (${ms}ms)`;
421
+ return `${name}: ${ms}ms`;
422
+ }
423
+
424
+ function sourceLabel(command: SearchCommand): string {
425
+ if (command.kind === "since") return `since ${command.since}`;
426
+ if (command.kind === "commit") return `commit ${command.commit}`;
427
+ if (command.kind === "commits") return `last ${command.count} commits`;
428
+ if (command.kind === "staged") return "staged changes";
429
+ return "stdin diff";
430
+ }
431
+
432
+ function estimatePromptTokens(prompt: string): number {
433
+ return Math.ceil(prompt.length / 3);
434
+ }
435
+
264
436
  function countTargetsByPattern(contexts: readonly SemContext[]): Record<string, number> {
265
437
  const counts: Record<string, number> = {};
266
438
  for (const context of contexts) counts[context.checkId] = (counts[context.checkId] ?? 0) + 1;
package/src/types.ts CHANGED
@@ -216,6 +216,8 @@ export type SearchRunJson = Readonly<{
216
216
  repomixTokens?: number;
217
217
  repomixConfig?: RepomixSearchConfig;
218
218
  searchTargets?: number;
219
+ searchBatches?: number;
220
+ skippedTargets?: number;
219
221
  profileId?: string;
220
222
  targetsByPattern?: Readonly<Record<string, number>>;
221
223
  targetsPreview?: readonly SearchTargetPreview[];