sweet-search 2.5.12 → 2.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/eval/agent-read-workflows/bin/_ss-argparse.mjs +194 -0
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +127 -60
- package/eval/agent-read-workflows/bin/ss-find +1 -1
- package/eval/agent-read-workflows/bin/ss-grep +1 -1
- package/eval/agent-read-workflows/bin/ss-read +2 -0
- package/package.json +8 -7
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
// Pure argument-parsing helpers for the ss-* CLI wrappers.
|
|
2
|
+
//
|
|
3
|
+
// Extracted from _ss-helpers.mjs so they can be unit-tested without triggering
|
|
4
|
+
// the CLI's top-level IIFE (which runs on import). NOTHING here touches
|
|
5
|
+
// process.* or the filesystem — every function is a pure transform over an
|
|
6
|
+
// args array (some mutate the array in place, by design, and return a value).
|
|
7
|
+
|
|
8
|
+
// --- value-flag parsers (mutate `args`, returning the consumed value) --------
|
|
9
|
+
|
|
10
|
+
export function parseFlag(args, name, fallback) {
|
|
11
|
+
const i = args.indexOf(name);
|
|
12
|
+
if (i === -1) return fallback;
|
|
13
|
+
const v = args[i + 1];
|
|
14
|
+
args.splice(i, 2);
|
|
15
|
+
return v;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function parseShortFlag(args, names, fallback) {
|
|
19
|
+
for (const n of names) {
|
|
20
|
+
const i = args.indexOf(n);
|
|
21
|
+
if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
|
|
22
|
+
}
|
|
23
|
+
return fallback;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Boolean (value-less) flag: remove every occurrence, return whether any present.
|
|
27
|
+
export function parseBoolFlag(args, names) {
|
|
28
|
+
let present = false;
|
|
29
|
+
for (const n of names) {
|
|
30
|
+
let i;
|
|
31
|
+
while ((i = args.indexOf(n)) !== -1) { args.splice(i, 1); present = true; }
|
|
32
|
+
}
|
|
33
|
+
return present;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// --- pattern construction ----------------------------------------------------
|
|
37
|
+
|
|
38
|
+
export function escapeRegex(s) {
|
|
39
|
+
return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Translate the grep-family pattern flags into a single regex — no engine change
|
|
43
|
+
// needed. `-F` escapes the pattern so metacharacters are literal; `-w` wraps it
|
|
44
|
+
// in word boundaries; `-i` prepends the `(?i)` inline flag the planner already
|
|
45
|
+
// honours end-to-end (hasCaseInsensitiveRegexFlag → ripgrep prefilter + Rust
|
|
46
|
+
// gram+grep). Order matters: escape (literal) → word-wrap → case flag.
|
|
47
|
+
export function buildGrepPattern(pattern, { ignoreCase = false, wordBound = false, fixedString = false } = {}) {
|
|
48
|
+
if (!pattern) return pattern;
|
|
49
|
+
let p = fixedString ? escapeRegex(pattern) : pattern;
|
|
50
|
+
if (wordBound) p = `\\b(?:${p})\\b`;
|
|
51
|
+
if (ignoreCase && !/^\(\?[a-z-]*i[a-z-]*[:)]/.test(p)) p = `(?i)${p}`;
|
|
52
|
+
return p;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// --- inert flags (always true for ss-*, safe to accept as no-ops) ------------
|
|
56
|
+
// These never change which lines match: we always print file:line, always
|
|
57
|
+
// search the whole index, never colourise. Stripping them lets reflexive grep
|
|
58
|
+
// muscle-memory pass without a wasted call — UNLIKE semantic flags (-w/-F/-v/
|
|
59
|
+
// -C…), which we either implement or reject, never silently drop.
|
|
60
|
+
export const INERT_FLAGS = new Set([
|
|
61
|
+
'-n', '--line-number', '-H', '--with-filename', '--no-filename',
|
|
62
|
+
'-r', '-R', '--recursive', '--color', '--colour',
|
|
63
|
+
]);
|
|
64
|
+
|
|
65
|
+
export function stripInertFlags(args) {
|
|
66
|
+
for (let i = args.length - 1; i >= 0; i--) {
|
|
67
|
+
const a = args[i];
|
|
68
|
+
if (typeof a === 'string' && (INERT_FLAGS.has(a) || /^--colou?r=/.test(a))) {
|
|
69
|
+
args.splice(i, 1);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// --- normalisation: make agent-typed forms canonical before parsing ----------
|
|
75
|
+
// Short flags that consume a following value, and value-less boolean shorts.
|
|
76
|
+
// Used to split attached/bundled forms (-k5, -iw, -iwk5) the way getopt would,
|
|
77
|
+
// so they parse instead of being mistaken for an unknown flag or the pattern.
|
|
78
|
+
export const VALUE_SHORTS = new Set(['k']);
|
|
79
|
+
export const BOOL_SHORTS = new Set(['i', 'w', 'F']);
|
|
80
|
+
export const VALUE_LONGS = new Set([
|
|
81
|
+
'--top', '--regex', '--mode', '--max-tokens',
|
|
82
|
+
'--in', '--file', '--query', '--hint', '--depth', '--budget',
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
export function normalizeArgs(args) {
|
|
86
|
+
const out = [];
|
|
87
|
+
let positionalOnly = false;
|
|
88
|
+
for (const tok of args) {
|
|
89
|
+
if (positionalOnly || typeof tok !== 'string') { out.push(tok); continue; }
|
|
90
|
+
if (tok === '--') { out.push(tok); positionalOnly = true; continue; }
|
|
91
|
+
|
|
92
|
+
// --name=value → --name value, but only for known value flags. Unknown
|
|
93
|
+
// long options stay intact so the guard can reject the whole token, and
|
|
94
|
+
// optional-value no-ops like --color=always can be stripped atomically.
|
|
95
|
+
let m = /^(--[A-Za-z][\w-]*)=(.*)$/.exec(tok);
|
|
96
|
+
if (m && VALUE_LONGS.has(m[1])) { out.push(m[1], m[2]); continue; }
|
|
97
|
+
|
|
98
|
+
// attached short value or boolean bundle: -k5, -iw, -iwk5
|
|
99
|
+
m = /^-([A-Za-z])(.+)$/.exec(tok);
|
|
100
|
+
if (m) {
|
|
101
|
+
const first = m[1];
|
|
102
|
+
if (VALUE_SHORTS.has(first)) { out.push('-' + first, m[2]); continue; } // -k5 → -k 5
|
|
103
|
+
if (BOOL_SHORTS.has(first)) {
|
|
104
|
+
const chars = tok.slice(1);
|
|
105
|
+
const expanded = [];
|
|
106
|
+
let i = 0, ok = true;
|
|
107
|
+
while (i < chars.length) {
|
|
108
|
+
const ch = chars[i];
|
|
109
|
+
if (BOOL_SHORTS.has(ch)) { expanded.push('-' + ch); i++; }
|
|
110
|
+
else if (VALUE_SHORTS.has(ch)) { // value short ends the bundle
|
|
111
|
+
const val = chars.slice(i + 1);
|
|
112
|
+
expanded.push('-' + ch);
|
|
113
|
+
if (val) expanded.push(val);
|
|
114
|
+
i = chars.length;
|
|
115
|
+
} else { ok = false; break; } // unknown char → leave token intact
|
|
116
|
+
}
|
|
117
|
+
if (ok) { out.push(...expanded); continue; }
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
out.push(tok);
|
|
121
|
+
}
|
|
122
|
+
return out;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// A token that looks like a real CLI option, as opposed to a regex/query that
|
|
126
|
+
// merely begins with '-' (e.g. `-?\d+`, `-->`). Narrow on purpose: single short
|
|
127
|
+
// letter, pure-letter bundle, or GNU long flag. Anything containing regex
|
|
128
|
+
// metacharacters falls through and is treated as the positional pattern, so a
|
|
129
|
+
// dash-leading pattern works WITHOUT the agent needing to know about `--`.
|
|
130
|
+
export function looksLikeOption(tok) {
|
|
131
|
+
if (typeof tok !== 'string' || tok === '-' || tok === '--') return false;
|
|
132
|
+
return /^-[A-Za-z][A-Za-z0-9]*$/.test(tok) // -i, -iw, -C2
|
|
133
|
+
|| /^--[A-Za-z][\w-]*(?:=.*)?$/.test(tok); // --ignore-case, --foo=bar
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function parseValueFlag(args, names, fallback, { allowOptionValue = false } = {}) {
|
|
137
|
+
const allNames = Array.isArray(names) ? names : [names];
|
|
138
|
+
for (const n of allNames) {
|
|
139
|
+
const i = args.indexOf(n);
|
|
140
|
+
if (i === -1) continue;
|
|
141
|
+
const v = args[i + 1];
|
|
142
|
+
if (v == null || (!allowOptionValue && looksLikeOption(v))) {
|
|
143
|
+
return { value: fallback, flag: n, error: `${n} requires a value` };
|
|
144
|
+
}
|
|
145
|
+
args.splice(i, 2);
|
|
146
|
+
return { value: v, flag: n, error: null };
|
|
147
|
+
}
|
|
148
|
+
return { value: fallback, flag: null, error: null };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function parsePositiveIntFlag(args, names, fallback, { min = 1 } = {}) {
|
|
152
|
+
const parsed = parseValueFlag(args, names, fallback);
|
|
153
|
+
if (parsed.error) return parsed;
|
|
154
|
+
if (parsed.flag == null) return { ...parsed, value: fallback };
|
|
155
|
+
const n = Number(parsed.value);
|
|
156
|
+
if (!Number.isInteger(n) || n < min) {
|
|
157
|
+
return { value: fallback, flag: parsed.flag, error: `${parsed.flag} must be an integer >= ${min}` };
|
|
158
|
+
}
|
|
159
|
+
return { value: n, flag: parsed.flag, error: null };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Parse a line range supplied as a single positional token — `10-20`, `10:20`
|
|
163
|
+
// or `10,20` (sed/bat/"lines 10-20" muscle memory). Returns { start, end } only
|
|
164
|
+
// for a well-formed ascending range; null otherwise (so the caller falls back to
|
|
165
|
+
// the plain numeric path or its own validation). Deliberately strict: both ends
|
|
166
|
+
// required, no open-ended `10-` (which previously caused accidental over-reads).
|
|
167
|
+
export function parseLineRange(token) {
|
|
168
|
+
if (typeof token !== 'string') return null;
|
|
169
|
+
const m = /^(\d+)[-:,](\d+)$/.exec(token);
|
|
170
|
+
if (!m) return null;
|
|
171
|
+
const start = +m[1];
|
|
172
|
+
const end = +m[2];
|
|
173
|
+
if (start < 1 || end < start) return null;
|
|
174
|
+
return { start, end };
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// After known flags are consumed, resolve the positional pattern. `--` ends
|
|
178
|
+
// option parsing (everything after is positional). Any remaining option-shaped
|
|
179
|
+
// token is an unsupported flag → reported, not silently dropped and not
|
|
180
|
+
// mistaken for the pattern. Returns { pattern, unknownFlag }; the caller decides
|
|
181
|
+
// how to surface the error (kept side-effect-free for testability).
|
|
182
|
+
export function extractPositional(args) {
|
|
183
|
+
const sep = args.indexOf('--');
|
|
184
|
+
if (sep !== -1) {
|
|
185
|
+
const before = args.slice(0, sep);
|
|
186
|
+
const after = args.slice(sep + 1);
|
|
187
|
+
const bad = before.find(looksLikeOption);
|
|
188
|
+
if (bad) return { pattern: undefined, unknownFlag: bad };
|
|
189
|
+
return { pattern: after[0], unknownFlag: null };
|
|
190
|
+
}
|
|
191
|
+
const bad = args.find(looksLikeOption);
|
|
192
|
+
if (bad) return { pattern: undefined, unknownFlag: bad };
|
|
193
|
+
return { pattern: args[0], unknownFlag: null };
|
|
194
|
+
}
|
|
@@ -13,6 +13,11 @@ import path from 'node:path';
|
|
|
13
13
|
import { createHash } from 'node:crypto';
|
|
14
14
|
import { existsSync, readFileSync } from 'node:fs';
|
|
15
15
|
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import {
|
|
17
|
+
parseBoolFlag, parseValueFlag, parsePositiveIntFlag,
|
|
18
|
+
buildGrepPattern, stripInertFlags, normalizeArgs, extractPositional,
|
|
19
|
+
parseLineRange, looksLikeOption,
|
|
20
|
+
} from './_ss-argparse.mjs';
|
|
16
21
|
|
|
17
22
|
// 8-char SHA1 prefix is enough for grouping identical queries across
|
|
18
23
|
// benchmark runs without bloating artifacts.
|
|
@@ -40,19 +45,39 @@ process.env.SWEET_SEARCH_PROJECT_ROOT = PROJECT_ROOT;
|
|
|
40
45
|
const subcommand = process.argv[2];
|
|
41
46
|
const rest = process.argv.slice(3);
|
|
42
47
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
const i = args.indexOf(n);
|
|
53
|
-
if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
|
|
48
|
+
// Pure arg-parsing helpers (parseFlag/parseShortFlag/parseBoolFlag/
|
|
49
|
+
// buildGrepPattern/stripInertFlags/normalizeArgs/extractPositional) live in
|
|
50
|
+
// ./_ss-argparse.mjs so they can be unit-tested without this file's top-level
|
|
51
|
+
// IIFE firing. resolvePositional wraps the side-effect-free extractPositional
|
|
52
|
+
// with the CLI's loud-error exit.
|
|
53
|
+
function resolvePositional(args, usage) {
|
|
54
|
+
const { pattern, unknownFlag } = extractPositional(args);
|
|
55
|
+
if (unknownFlag) {
|
|
56
|
+
failUsage(`unrecognised option "${unknownFlag}"`, usage);
|
|
54
57
|
}
|
|
55
|
-
return
|
|
58
|
+
return pattern;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function failUsage(message, usage) {
|
|
62
|
+
process.stderr.write(`[ss] ${message}\n${usage}\n`);
|
|
63
|
+
process.exit(2);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function readPositiveIntFlag(args, names, fallback, usage) {
|
|
67
|
+
const parsed = parsePositiveIntFlag(args, names, fallback);
|
|
68
|
+
if (parsed.error) failUsage(parsed.error, usage);
|
|
69
|
+
return parsed.value;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function readValueFlag(args, names, fallback, usage, opts = {}) {
|
|
73
|
+
const parsed = parseValueFlag(args, names, fallback, opts);
|
|
74
|
+
if (parsed.error) failUsage(parsed.error, usage);
|
|
75
|
+
return parsed.value;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function rejectUnknownOptions(args, usage) {
|
|
79
|
+
const bad = args.find(looksLikeOption);
|
|
80
|
+
if (bad) failUsage(`unrecognised option "${bad}"`, usage);
|
|
56
81
|
}
|
|
57
82
|
|
|
58
83
|
async function getSweetSearch() {
|
|
@@ -79,11 +104,17 @@ async function ensureWarmServerReady({ timeoutMs = 60000, intervalMs = 500 } = {
|
|
|
79
104
|
|
|
80
105
|
// --- subcommands ----------------------------------------------------------
|
|
81
106
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const
|
|
107
|
+
const GREP_USAGE = 'Usage: ss-grep <regex> [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [-k N]';
|
|
108
|
+
async function cmdGrep(rawArgs) {
|
|
109
|
+
const args = normalizeArgs(rawArgs);
|
|
110
|
+
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
111
|
+
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
112
|
+
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
113
|
+
const k = readPositiveIntFlag(args, ['-k', '--top'], 20, GREP_USAGE);
|
|
114
|
+
stripInertFlags(args);
|
|
115
|
+
const regex = buildGrepPattern(resolvePositional(args, GREP_USAGE), { ignoreCase, wordBound, fixedString });
|
|
85
116
|
if (!regex) {
|
|
86
|
-
process.stderr.write(
|
|
117
|
+
process.stderr.write(GREP_USAGE + '\n');
|
|
87
118
|
process.exit(2);
|
|
88
119
|
}
|
|
89
120
|
const s = await getSweetSearch();
|
|
@@ -109,27 +140,34 @@ async function cmdGrep(args) {
|
|
|
109
140
|
process.exit(0);
|
|
110
141
|
}
|
|
111
142
|
|
|
112
|
-
async function cmdFind(
|
|
143
|
+
async function cmdFind(rawArgs) {
|
|
144
|
+
const args = normalizeArgs(rawArgs);
|
|
113
145
|
// ColGrep pattern search with token-budgeted agent packaging — returns the
|
|
114
146
|
// FULL useful answer (ranked code blocks + confidence + sufficiency), the same
|
|
115
147
|
// agent packaging ss-search emits. ss-grep is the short/locator counterpart, so
|
|
116
148
|
// ss-find defaults to the full answer: it saves the follow-up read entirely.
|
|
117
149
|
// (Mirrors the agent-in-the-loop H2H adapter eval/agent-eval/tools/
|
|
118
150
|
// pattern-agent-tools.js, which calls search(...,{format:'agent'}).)
|
|
151
|
+
const FIND_USAGE = 'Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [--full|--xl] [-k N]';
|
|
119
152
|
let format = 'agent';
|
|
120
153
|
if (args.includes('--full')) { format = 'agent_full'; args.splice(args.indexOf('--full'), 1); }
|
|
121
154
|
if (args.includes('--xl')) { format = 'agent_full_xl'; args.splice(args.indexOf('--xl'), 1); }
|
|
122
|
-
const
|
|
123
|
-
const
|
|
124
|
-
const
|
|
155
|
+
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
156
|
+
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
157
|
+
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
158
|
+
const k = readPositiveIntFlag(args, ['-k', '--top'], 6, FIND_USAGE);
|
|
159
|
+
const regex = readValueFlag(args, '--regex', '', FIND_USAGE, { allowOptionValue: true });
|
|
160
|
+
stripInertFlags(args);
|
|
161
|
+
const query = resolvePositional(args, FIND_USAGE);
|
|
125
162
|
if (!query) {
|
|
126
|
-
process.stderr.write(
|
|
163
|
+
process.stderr.write(FIND_USAGE + '\n');
|
|
127
164
|
process.exit(2);
|
|
128
165
|
}
|
|
129
166
|
// Budget-sweep experiment hook: lets the bench pin the response token budget
|
|
130
167
|
// per-process without changing the agent-visible tool surface.
|
|
131
168
|
const envFindBudget = Number(process.env.SS_SMOKE_FIND_BUDGET || '') || null;
|
|
132
|
-
|
|
169
|
+
// Pattern flags apply to the regex candidate generator; the NL query is untouched.
|
|
170
|
+
const effectiveRegex = buildGrepPattern(regex || '', { ignoreCase, wordBound, fixedString });
|
|
133
171
|
const s = await getSweetSearch();
|
|
134
172
|
if (!s.hasLateInteractionIndex) {
|
|
135
173
|
process.stderr.write(`[ss-find] no late-interaction index — falling back to ss-grep\n`);
|
|
@@ -172,12 +210,26 @@ async function cmdFind(args) {
|
|
|
172
210
|
process.exit(0);
|
|
173
211
|
}
|
|
174
212
|
|
|
213
|
+
// ss-read takes NO flags — only positional <file> [start] [end] (or a single
|
|
214
|
+
// "start-end" / "start:end" / "start,end" range token). Unlike ss-grep, a stray
|
|
215
|
+
// flag here can never silently corrupt the result: the line slots are validated
|
|
216
|
+
// as numbers, so a misuse is already a loud error. These hints exist only to
|
|
217
|
+
// turn that error into a self-correcting one (the M++ prompt, which we may not
|
|
218
|
+
// touch, documents the positional form, not these recovery messages).
|
|
219
|
+
const READ_USAGE =
|
|
220
|
+
'Usage: ss-read <file> # whole file\n' +
|
|
221
|
+
' ss-read <file> <start> # ONE line\n' +
|
|
222
|
+
' ss-read <file> <start> <end>\n' +
|
|
223
|
+
' ss-read <file> 10-20 # range (also 10:20, 10,20)\n' +
|
|
224
|
+
'Note: ss-read has no flags (no -n/--limit/-r); line selection is positional.';
|
|
175
225
|
async function cmdRead(args) {
|
|
176
226
|
const file = args[0];
|
|
177
227
|
if (!file) {
|
|
178
|
-
process.stderr.write(
|
|
179
|
-
process.
|
|
180
|
-
|
|
228
|
+
process.stderr.write(READ_USAGE + '\n');
|
|
229
|
+
process.exit(2);
|
|
230
|
+
}
|
|
231
|
+
if (looksLikeOption(file)) {
|
|
232
|
+
process.stderr.write(`[ss-read] "${file}" looks like a flag, but ss-read takes a file path first.\n${READ_USAGE}\n`);
|
|
181
233
|
process.exit(2);
|
|
182
234
|
}
|
|
183
235
|
// If start is provided and end is omitted, read EXACTLY that one line —
|
|
@@ -185,19 +237,27 @@ async function cmdRead(args) {
|
|
|
185
237
|
// caused accidental over-reading on large files).
|
|
186
238
|
let start = null, end = null;
|
|
187
239
|
if (args[1] != null) {
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
240
|
+
// Accept a single-token range (10-20 / 10:20 / 10,20) before the plain
|
|
241
|
+
// numeric path, so "lines 10-20" muscle memory works without a wasted call.
|
|
242
|
+
const range = parseLineRange(args[1]);
|
|
243
|
+
if (range && args[2] == null) {
|
|
244
|
+
start = range.start;
|
|
245
|
+
end = range.end;
|
|
246
|
+
} else {
|
|
247
|
+
start = +args[1];
|
|
248
|
+
if (!Number.isFinite(start) || start < 1) {
|
|
249
|
+
process.stderr.write(`[ss-read] invalid start line: "${args[1]}" (expected a line number, e.g. 10, or a range like 10-20)\n${READ_USAGE}\n`);
|
|
197
250
|
process.exit(2);
|
|
198
251
|
}
|
|
199
|
-
|
|
200
|
-
|
|
252
|
+
if (args[2] != null) {
|
|
253
|
+
end = +args[2];
|
|
254
|
+
if (!Number.isFinite(end) || end < start) {
|
|
255
|
+
process.stderr.write(`[ss-read] invalid end line: "${args[2]}" (must be ≥ start ${start})\n`);
|
|
256
|
+
process.exit(2);
|
|
257
|
+
}
|
|
258
|
+
} else {
|
|
259
|
+
end = start; // single-line read
|
|
260
|
+
}
|
|
201
261
|
}
|
|
202
262
|
}
|
|
203
263
|
const { readFile } = await import(path.join(REPO_ROOT, 'core/search/search-read.js'));
|
|
@@ -212,7 +272,9 @@ async function cmdRead(args) {
|
|
|
212
272
|
process.exit(0);
|
|
213
273
|
}
|
|
214
274
|
|
|
215
|
-
|
|
275
|
+
const SEARCH_USAGE = 'Usage: ss-search "<query>" [--full|--xl] [-k N] [--mode auto|lexical|semantic|hybrid]';
|
|
276
|
+
async function cmdAgentSearch(rawArgs) {
|
|
277
|
+
const args = normalizeArgs(rawArgs);
|
|
216
278
|
// Main sweet-search auto/CatBoost search with token-budgeted agent packaging.
|
|
217
279
|
//
|
|
218
280
|
// Usage:
|
|
@@ -229,11 +291,11 @@ async function cmdAgentSearch(args) {
|
|
|
229
291
|
let format = 'agent';
|
|
230
292
|
if (args.includes('--full')) { format = 'agent_full'; args.splice(args.indexOf('--full'), 1); }
|
|
231
293
|
if (args.includes('--xl')) { format = 'agent_full_xl'; args.splice(args.indexOf('--xl'), 1); }
|
|
232
|
-
const k =
|
|
233
|
-
const mode =
|
|
234
|
-
const query = args
|
|
294
|
+
const k = readPositiveIntFlag(args, ['-k', '--top'], 5, SEARCH_USAGE);
|
|
295
|
+
const mode = readValueFlag(args, '--mode', 'auto', SEARCH_USAGE);
|
|
296
|
+
const query = resolvePositional(args, SEARCH_USAGE);
|
|
235
297
|
if (!query) {
|
|
236
|
-
process.stderr.write(
|
|
298
|
+
process.stderr.write(SEARCH_USAGE + '\n');
|
|
237
299
|
process.exit(2);
|
|
238
300
|
}
|
|
239
301
|
|
|
@@ -383,18 +445,21 @@ async function cmdAgentSearch(args) {
|
|
|
383
445
|
process.exit(0);
|
|
384
446
|
}
|
|
385
447
|
|
|
386
|
-
|
|
448
|
+
const SEMANTIC_USAGE = 'Usage: ss-semantic <file> "<question>" [--max-tokens N]';
|
|
449
|
+
async function cmdSemantic(rawArgs) {
|
|
450
|
+
const args = normalizeArgs(rawArgs);
|
|
451
|
+
// Default 600 (was 800) per the 2026-06 budget sweep — scaled with the 3k
|
|
452
|
+
// preview tier. Env hook overrides the default for sweeps; an explicit
|
|
453
|
+
// --max-tokens flag from the agent always wins.
|
|
454
|
+
const maxTokens = readPositiveIntFlag(args, '--max-tokens',
|
|
455
|
+
Number(process.env.SS_SMOKE_SEMANTIC_MAXTOKENS || '') || 600, SEMANTIC_USAGE);
|
|
456
|
+
rejectUnknownOptions(args, SEMANTIC_USAGE);
|
|
387
457
|
const file = args[0];
|
|
388
458
|
const query = args[1];
|
|
389
459
|
if (!file || !query) {
|
|
390
|
-
process.stderr.write(
|
|
460
|
+
process.stderr.write(SEMANTIC_USAGE + '\n');
|
|
391
461
|
process.exit(2);
|
|
392
462
|
}
|
|
393
|
-
// Default 600 (was 800) per the 2026-06 budget sweep — scaled with the 3k
|
|
394
|
-
// preview tier. Env hook overrides the default for sweeps; an explicit
|
|
395
|
-
// --max-tokens flag from the agent always wins.
|
|
396
|
-
const maxTokens = +parseFlag(args.slice(2), '--max-tokens',
|
|
397
|
-
Number(process.env.SS_SMOKE_SEMANTIC_MAXTOKENS || '') || 600);
|
|
398
463
|
const { readSemantic } = await import(path.join(REPO_ROOT, 'core/search/search-read-semantic.js'));
|
|
399
464
|
const r = await readSemantic({
|
|
400
465
|
path: file, query, projectRoot: PROJECT_ROOT,
|
|
@@ -413,29 +478,31 @@ async function cmdSemantic(args) {
|
|
|
413
478
|
process.exit(0);
|
|
414
479
|
}
|
|
415
480
|
|
|
416
|
-
|
|
481
|
+
const TRACE_USAGE = 'Usage: ss-trace <symbol> [--in <file>] [--query <hint>] [--depth N] [--budget N]';
|
|
482
|
+
async function cmdTrace(rawArgs) {
|
|
483
|
+
const args = normalizeArgs(rawArgs);
|
|
417
484
|
let json = false;
|
|
418
485
|
if (args.includes('--json')) {
|
|
419
486
|
json = true;
|
|
420
487
|
args.splice(args.indexOf('--json'), 1);
|
|
421
488
|
}
|
|
422
|
-
const symbol = args[0];
|
|
423
|
-
if (!symbol) {
|
|
424
|
-
process.stderr.write('Usage: ss-trace <symbol> [--in <file>] [--query <hint>] [--depth N] [--budget N]\n');
|
|
425
|
-
process.exit(2);
|
|
426
|
-
}
|
|
427
489
|
const { traceSymbol, formatStructuralContext } = await import(path.join(REPO_ROOT, 'core/search/search-trace.js'));
|
|
428
490
|
|
|
429
491
|
const opts = { projectRoot: PROJECT_ROOT };
|
|
430
|
-
const file =
|
|
431
|
-
const queryHint =
|
|
432
|
-
const depth =
|
|
433
|
-
const budget =
|
|
492
|
+
const file = readValueFlag(args, ['--in', '--file'], null, TRACE_USAGE);
|
|
493
|
+
const queryHint = readValueFlag(args, ['--query', '--hint'], '', TRACE_USAGE, { allowOptionValue: true });
|
|
494
|
+
const depth = readPositiveIntFlag(args, '--depth', null, TRACE_USAGE);
|
|
495
|
+
const budget = readPositiveIntFlag(args, '--budget', null, TRACE_USAGE);
|
|
496
|
+
const symbol = resolvePositional(args, TRACE_USAGE);
|
|
497
|
+
if (!symbol) {
|
|
498
|
+
process.stderr.write(TRACE_USAGE + '\n');
|
|
499
|
+
process.exit(2);
|
|
500
|
+
}
|
|
434
501
|
if (file) opts.filePath = file;
|
|
435
502
|
if (queryHint) opts.queryHint = queryHint;
|
|
436
|
-
if (depth != null) opts.maxDepth =
|
|
503
|
+
if (depth != null) opts.maxDepth = depth;
|
|
437
504
|
// Budget-sweep experiment hook: env sets the default; explicit --budget wins.
|
|
438
|
-
if (budget != null) opts.tokenBudget =
|
|
505
|
+
if (budget != null) opts.tokenBudget = budget;
|
|
439
506
|
else if (Number(process.env.SS_SMOKE_TRACE_BUDGET || '') > 0) opts.tokenBudget = Number(process.env.SS_SMOKE_TRACE_BUDGET);
|
|
440
507
|
|
|
441
508
|
const response = traceSymbol(symbol, opts);
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Use for behavioural / semantic questions where lexical alone won't pinpoint the
|
|
6
6
|
# chunk. (ss-grep is the short file:line locator.)
|
|
7
7
|
#
|
|
8
|
-
# Usage: ss-find "<query>" --regex "<regex>" [--full|--xl] [-k N]
|
|
8
|
+
# Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [--full|--xl] [-k N]
|
|
9
9
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
10
10
|
TMPERR=$(mktemp)
|
|
11
11
|
node "$DIR/_ss-helpers.mjs" find "$@" 2>"$TMPERR"
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# ss-grep: indexed bare grep (gram-prefiltered) over the cwd's Sweet Search index.
|
|
3
3
|
# Compact agent-friendly output: file:line matchText
|
|
4
4
|
#
|
|
5
|
-
# Usage: ss-grep <regex> [-k N]
|
|
5
|
+
# Usage: ss-grep <regex> [-i|--ignore-case] [-k N]
|
|
6
6
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
7
7
|
TMPERR=$(mktemp)
|
|
8
8
|
node "$DIR/_ss-helpers.mjs" grep "$@" 2>"$TMPERR"
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
# ss-read <file> # whole file
|
|
4
4
|
# ss-read <file> <start> # ONE line (NOT start-to-EOF)
|
|
5
5
|
# ss-read <file> <start> <end> # explicit range
|
|
6
|
+
# ss-read <file> 10-20 # range token (also 10:20, 10,20)
|
|
7
|
+
# No flags: ss-read takes no -n/--limit/-r — line selection is positional.
|
|
6
8
|
# Open-ended start-to-EOF is intentionally not supported in the bench wrapper
|
|
7
9
|
# to prevent accidental over-reading. To pull a span, give an explicit end.
|
|
8
10
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sweet-search",
|
|
3
|
-
"version": "2.5.
|
|
3
|
+
"version": "2.5.14",
|
|
4
4
|
"description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "core/search/sweet-search.js",
|
|
@@ -76,6 +76,7 @@
|
|
|
76
76
|
"eval/agent-read-workflows/bin/ss-trace",
|
|
77
77
|
"eval/agent-read-workflows/bin/ss-read",
|
|
78
78
|
"eval/agent-read-workflows/bin/_ss-helpers.mjs",
|
|
79
|
+
"eval/agent-read-workflows/bin/_ss-argparse.mjs",
|
|
79
80
|
"crates/wasm-router/pkg/",
|
|
80
81
|
"LICENSE",
|
|
81
82
|
"NOTICE"
|
|
@@ -164,12 +165,12 @@
|
|
|
164
165
|
},
|
|
165
166
|
"optionalDependencies": {
|
|
166
167
|
"usearch": "^2.21.4",
|
|
167
|
-
"@sweet-search/native-darwin-arm64": "2.5.
|
|
168
|
-
"@sweet-search/native-darwin-x64": "2.5.
|
|
169
|
-
"@sweet-search/native-linux-arm64-gnu": "2.5.
|
|
170
|
-
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.
|
|
171
|
-
"@sweet-search/native-linux-x64-gnu": "2.5.
|
|
172
|
-
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.
|
|
168
|
+
"@sweet-search/native-darwin-arm64": "2.5.14",
|
|
169
|
+
"@sweet-search/native-darwin-x64": "2.5.14",
|
|
170
|
+
"@sweet-search/native-linux-arm64-gnu": "2.5.14",
|
|
171
|
+
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.14",
|
|
172
|
+
"@sweet-search/native-linux-x64-gnu": "2.5.14",
|
|
173
|
+
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.14"
|
|
173
174
|
},
|
|
174
175
|
"engines": {
|
|
175
176
|
"node": ">=18.0.0"
|