sweet-search 2.5.13 → 2.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -77,6 +77,10 @@ export function stripInertFlags(args) {
|
|
|
77
77
|
// so they parse instead of being mistaken for an unknown flag or the pattern.
|
|
78
78
|
export const VALUE_SHORTS = new Set(['k']);
|
|
79
79
|
export const BOOL_SHORTS = new Set(['i', 'w', 'F']);
|
|
80
|
+
export const VALUE_LONGS = new Set([
|
|
81
|
+
'--top', '--regex', '--mode', '--max-tokens',
|
|
82
|
+
'--in', '--file', '--query', '--hint', '--depth', '--budget',
|
|
83
|
+
]);
|
|
80
84
|
|
|
81
85
|
export function normalizeArgs(args) {
|
|
82
86
|
const out = [];
|
|
@@ -85,9 +89,11 @@ export function normalizeArgs(args) {
|
|
|
85
89
|
if (positionalOnly || typeof tok !== 'string') { out.push(tok); continue; }
|
|
86
90
|
if (tok === '--') { out.push(tok); positionalOnly = true; continue; }
|
|
87
91
|
|
|
88
|
-
// --name=value → --name value
|
|
92
|
+
// --name=value → --name value, but only for known value flags. Unknown
|
|
93
|
+
// long options stay intact so the guard can reject the whole token, and
|
|
94
|
+
// optional-value no-ops like --color=always can be stripped atomically.
|
|
89
95
|
let m = /^(--[A-Za-z][\w-]*)=(.*)$/.exec(tok);
|
|
90
|
-
if (m) { out.push(m[1], m[2]); continue; }
|
|
96
|
+
if (m && VALUE_LONGS.has(m[1])) { out.push(m[1], m[2]); continue; }
|
|
91
97
|
|
|
92
98
|
// attached short value or boolean bundle: -k5, -iw, -iwk5
|
|
93
99
|
m = /^-([A-Za-z])(.+)$/.exec(tok);
|
|
@@ -123,9 +129,49 @@ export function normalizeArgs(args) {
|
|
|
123
129
|
// dash-leading pattern works WITHOUT the agent needing to know about `--`.
|
|
124
130
|
export function looksLikeOption(tok) {
|
|
125
131
|
if (typeof tok !== 'string' || tok === '-' || tok === '--') return false;
|
|
126
|
-
return /^-[A-Za-z]
|
|
127
|
-
||
|
|
128
|
-
|
|
132
|
+
return /^-[A-Za-z][A-Za-z0-9]*$/.test(tok) // -i, -iw, -C2
|
|
133
|
+
|| /^--[A-Za-z][\w-]*(?:=.*)?$/.test(tok); // --ignore-case, --foo=bar
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function parseValueFlag(args, names, fallback, { allowOptionValue = false } = {}) {
|
|
137
|
+
const allNames = Array.isArray(names) ? names : [names];
|
|
138
|
+
for (const n of allNames) {
|
|
139
|
+
const i = args.indexOf(n);
|
|
140
|
+
if (i === -1) continue;
|
|
141
|
+
const v = args[i + 1];
|
|
142
|
+
if (v == null || (!allowOptionValue && looksLikeOption(v))) {
|
|
143
|
+
return { value: fallback, flag: n, error: `${n} requires a value` };
|
|
144
|
+
}
|
|
145
|
+
args.splice(i, 2);
|
|
146
|
+
return { value: v, flag: n, error: null };
|
|
147
|
+
}
|
|
148
|
+
return { value: fallback, flag: null, error: null };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function parsePositiveIntFlag(args, names, fallback, { min = 1 } = {}) {
|
|
152
|
+
const parsed = parseValueFlag(args, names, fallback);
|
|
153
|
+
if (parsed.error) return parsed;
|
|
154
|
+
if (parsed.flag == null) return { ...parsed, value: fallback };
|
|
155
|
+
const n = Number(parsed.value);
|
|
156
|
+
if (!Number.isInteger(n) || n < min) {
|
|
157
|
+
return { value: fallback, flag: parsed.flag, error: `${parsed.flag} must be an integer >= ${min}` };
|
|
158
|
+
}
|
|
159
|
+
return { value: n, flag: parsed.flag, error: null };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Parse a line range supplied as a single positional token — `10-20`, `10:20`
|
|
163
|
+
// or `10,20` (sed/bat/"lines 10-20" muscle memory). Returns { start, end } only
|
|
164
|
+
// for a well-formed ascending range; null otherwise (so the caller falls back to
|
|
165
|
+
// the plain numeric path or its own validation). Deliberately strict: both ends
|
|
166
|
+
// required, no open-ended `10-` (which previously caused accidental over-reads).
|
|
167
|
+
export function parseLineRange(token) {
|
|
168
|
+
if (typeof token !== 'string') return null;
|
|
169
|
+
const m = /^(\d+)[-:,](\d+)$/.exec(token);
|
|
170
|
+
if (!m) return null;
|
|
171
|
+
const start = +m[1];
|
|
172
|
+
const end = +m[2];
|
|
173
|
+
if (start < 1 || end < start) return null;
|
|
174
|
+
return { start, end };
|
|
129
175
|
}
|
|
130
176
|
|
|
131
177
|
// After known flags are consumed, resolve the positional pattern. `--` ends
|
|
@@ -14,8 +14,9 @@ import { createHash } from 'node:crypto';
|
|
|
14
14
|
import { existsSync, readFileSync } from 'node:fs';
|
|
15
15
|
import { fileURLToPath } from 'node:url';
|
|
16
16
|
import {
|
|
17
|
-
|
|
17
|
+
parseBoolFlag, parseValueFlag, parsePositiveIntFlag,
|
|
18
18
|
buildGrepPattern, stripInertFlags, normalizeArgs, extractPositional,
|
|
19
|
+
parseLineRange, looksLikeOption,
|
|
19
20
|
} from './_ss-argparse.mjs';
|
|
20
21
|
|
|
21
22
|
// 8-char SHA1 prefix is enough for grouping identical queries across
|
|
@@ -52,12 +53,33 @@ const rest = process.argv.slice(3);
|
|
|
52
53
|
function resolvePositional(args, usage) {
|
|
53
54
|
const { pattern, unknownFlag } = extractPositional(args);
|
|
54
55
|
if (unknownFlag) {
|
|
55
|
-
|
|
56
|
-
process.exit(2);
|
|
56
|
+
failUsage(`unrecognised option "${unknownFlag}"`, usage);
|
|
57
57
|
}
|
|
58
58
|
return pattern;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
+
function failUsage(message, usage) {
|
|
62
|
+
process.stderr.write(`[ss] ${message}\n${usage}\n`);
|
|
63
|
+
process.exit(2);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function readPositiveIntFlag(args, names, fallback, usage) {
|
|
67
|
+
const parsed = parsePositiveIntFlag(args, names, fallback);
|
|
68
|
+
if (parsed.error) failUsage(parsed.error, usage);
|
|
69
|
+
return parsed.value;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function readValueFlag(args, names, fallback, usage, opts = {}) {
|
|
73
|
+
const parsed = parseValueFlag(args, names, fallback, opts);
|
|
74
|
+
if (parsed.error) failUsage(parsed.error, usage);
|
|
75
|
+
return parsed.value;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function rejectUnknownOptions(args, usage) {
|
|
79
|
+
const bad = args.find(looksLikeOption);
|
|
80
|
+
if (bad) failUsage(`unrecognised option "${bad}"`, usage);
|
|
81
|
+
}
|
|
82
|
+
|
|
61
83
|
async function getSweetSearch() {
|
|
62
84
|
const { SweetSearch } = await import(path.join(REPO_ROOT, 'core/search/sweet-search.js'));
|
|
63
85
|
const s = new SweetSearch({ projectRoot: PROJECT_ROOT });
|
|
@@ -88,7 +110,7 @@ async function cmdGrep(rawArgs) {
|
|
|
88
110
|
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
89
111
|
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
90
112
|
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
91
|
-
const k =
|
|
113
|
+
const k = readPositiveIntFlag(args, ['-k', '--top'], 20, GREP_USAGE);
|
|
92
114
|
stripInertFlags(args);
|
|
93
115
|
const regex = buildGrepPattern(resolvePositional(args, GREP_USAGE), { ignoreCase, wordBound, fixedString });
|
|
94
116
|
if (!regex) {
|
|
@@ -133,8 +155,8 @@ async function cmdFind(rawArgs) {
|
|
|
133
155
|
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
134
156
|
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
135
157
|
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
136
|
-
const k =
|
|
137
|
-
const regex =
|
|
158
|
+
const k = readPositiveIntFlag(args, ['-k', '--top'], 6, FIND_USAGE);
|
|
159
|
+
const regex = readValueFlag(args, '--regex', '', FIND_USAGE, { allowOptionValue: true });
|
|
138
160
|
stripInertFlags(args);
|
|
139
161
|
const query = resolvePositional(args, FIND_USAGE);
|
|
140
162
|
if (!query) {
|
|
@@ -188,12 +210,26 @@ async function cmdFind(rawArgs) {
|
|
|
188
210
|
process.exit(0);
|
|
189
211
|
}
|
|
190
212
|
|
|
213
|
+
// ss-read takes NO flags — only positional <file> [start] [end] (or a single
|
|
214
|
+
// "start-end" / "start:end" / "start,end" range token). Unlike ss-grep, a stray
|
|
215
|
+
// flag here can never silently corrupt the result: the line slots are validated
|
|
216
|
+
// as numbers, so a misuse is already a loud error. These hints exist only to
|
|
217
|
+
// turn that error into a self-correcting one (the M++ prompt, which we may not
|
|
218
|
+
// touch, documents the positional form, not these recovery messages).
|
|
219
|
+
const READ_USAGE =
|
|
220
|
+
'Usage: ss-read <file> # whole file\n' +
|
|
221
|
+
' ss-read <file> <start> # ONE line\n' +
|
|
222
|
+
' ss-read <file> <start> <end>\n' +
|
|
223
|
+
' ss-read <file> 10-20 # range (also 10:20, 10,20)\n' +
|
|
224
|
+
'Note: ss-read has no flags (no -n/--limit/-r); line selection is positional.';
|
|
191
225
|
async function cmdRead(args) {
|
|
192
226
|
const file = args[0];
|
|
193
227
|
if (!file) {
|
|
194
|
-
process.stderr.write(
|
|
195
|
-
process.
|
|
196
|
-
|
|
228
|
+
process.stderr.write(READ_USAGE + '\n');
|
|
229
|
+
process.exit(2);
|
|
230
|
+
}
|
|
231
|
+
if (looksLikeOption(file)) {
|
|
232
|
+
process.stderr.write(`[ss-read] "${file}" looks like a flag, but ss-read takes a file path first.\n${READ_USAGE}\n`);
|
|
197
233
|
process.exit(2);
|
|
198
234
|
}
|
|
199
235
|
// If start is provided and end is omitted, read EXACTLY that one line —
|
|
@@ -201,19 +237,27 @@ async function cmdRead(args) {
|
|
|
201
237
|
// caused accidental over-reading on large files).
|
|
202
238
|
let start = null, end = null;
|
|
203
239
|
if (args[1] != null) {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
240
|
+
// Accept a single-token range (10-20 / 10:20 / 10,20) before the plain
|
|
241
|
+
// numeric path, so "lines 10-20" muscle memory works without a wasted call.
|
|
242
|
+
const range = parseLineRange(args[1]);
|
|
243
|
+
if (range && args[2] == null) {
|
|
244
|
+
start = range.start;
|
|
245
|
+
end = range.end;
|
|
246
|
+
} else {
|
|
247
|
+
start = +args[1];
|
|
248
|
+
if (!Number.isFinite(start) || start < 1) {
|
|
249
|
+
process.stderr.write(`[ss-read] invalid start line: "${args[1]}" (expected a line number, e.g. 10, or a range like 10-20)\n${READ_USAGE}\n`);
|
|
213
250
|
process.exit(2);
|
|
214
251
|
}
|
|
215
|
-
|
|
216
|
-
|
|
252
|
+
if (args[2] != null) {
|
|
253
|
+
end = +args[2];
|
|
254
|
+
if (!Number.isFinite(end) || end < start) {
|
|
255
|
+
process.stderr.write(`[ss-read] invalid end line: "${args[2]}" (must be ≥ start ${start})\n`);
|
|
256
|
+
process.exit(2);
|
|
257
|
+
}
|
|
258
|
+
} else {
|
|
259
|
+
end = start; // single-line read
|
|
260
|
+
}
|
|
217
261
|
}
|
|
218
262
|
}
|
|
219
263
|
const { readFile } = await import(path.join(REPO_ROOT, 'core/search/search-read.js'));
|
|
@@ -228,7 +272,9 @@ async function cmdRead(args) {
|
|
|
228
272
|
process.exit(0);
|
|
229
273
|
}
|
|
230
274
|
|
|
231
|
-
|
|
275
|
+
const SEARCH_USAGE = 'Usage: ss-search "<query>" [--full|--xl] [-k N] [--mode auto|lexical|semantic|hybrid]';
|
|
276
|
+
async function cmdAgentSearch(rawArgs) {
|
|
277
|
+
const args = normalizeArgs(rawArgs);
|
|
232
278
|
// Main sweet-search auto/CatBoost search with token-budgeted agent packaging.
|
|
233
279
|
//
|
|
234
280
|
// Usage:
|
|
@@ -245,11 +291,11 @@ async function cmdAgentSearch(args) {
|
|
|
245
291
|
let format = 'agent';
|
|
246
292
|
if (args.includes('--full')) { format = 'agent_full'; args.splice(args.indexOf('--full'), 1); }
|
|
247
293
|
if (args.includes('--xl')) { format = 'agent_full_xl'; args.splice(args.indexOf('--xl'), 1); }
|
|
248
|
-
const k =
|
|
249
|
-
const mode =
|
|
250
|
-
const query = args
|
|
294
|
+
const k = readPositiveIntFlag(args, ['-k', '--top'], 5, SEARCH_USAGE);
|
|
295
|
+
const mode = readValueFlag(args, '--mode', 'auto', SEARCH_USAGE);
|
|
296
|
+
const query = resolvePositional(args, SEARCH_USAGE);
|
|
251
297
|
if (!query) {
|
|
252
|
-
process.stderr.write(
|
|
298
|
+
process.stderr.write(SEARCH_USAGE + '\n');
|
|
253
299
|
process.exit(2);
|
|
254
300
|
}
|
|
255
301
|
|
|
@@ -399,18 +445,21 @@ async function cmdAgentSearch(args) {
|
|
|
399
445
|
process.exit(0);
|
|
400
446
|
}
|
|
401
447
|
|
|
402
|
-
|
|
448
|
+
const SEMANTIC_USAGE = 'Usage: ss-semantic <file> "<question>" [--max-tokens N]';
|
|
449
|
+
async function cmdSemantic(rawArgs) {
|
|
450
|
+
const args = normalizeArgs(rawArgs);
|
|
451
|
+
// Default 600 (was 800) per the 2026-06 budget sweep — scaled with the 3k
|
|
452
|
+
// preview tier. Env hook overrides the default for sweeps; an explicit
|
|
453
|
+
// --max-tokens flag from the agent always wins.
|
|
454
|
+
const maxTokens = readPositiveIntFlag(args, '--max-tokens',
|
|
455
|
+
Number(process.env.SS_SMOKE_SEMANTIC_MAXTOKENS || '') || 600, SEMANTIC_USAGE);
|
|
456
|
+
rejectUnknownOptions(args, SEMANTIC_USAGE);
|
|
403
457
|
const file = args[0];
|
|
404
458
|
const query = args[1];
|
|
405
459
|
if (!file || !query) {
|
|
406
|
-
process.stderr.write(
|
|
460
|
+
process.stderr.write(SEMANTIC_USAGE + '\n');
|
|
407
461
|
process.exit(2);
|
|
408
462
|
}
|
|
409
|
-
// Default 600 (was 800) per the 2026-06 budget sweep — scaled with the 3k
|
|
410
|
-
// preview tier. Env hook overrides the default for sweeps; an explicit
|
|
411
|
-
// --max-tokens flag from the agent always wins.
|
|
412
|
-
const maxTokens = +parseFlag(args.slice(2), '--max-tokens',
|
|
413
|
-
Number(process.env.SS_SMOKE_SEMANTIC_MAXTOKENS || '') || 600);
|
|
414
463
|
const { readSemantic } = await import(path.join(REPO_ROOT, 'core/search/search-read-semantic.js'));
|
|
415
464
|
const r = await readSemantic({
|
|
416
465
|
path: file, query, projectRoot: PROJECT_ROOT,
|
|
@@ -429,29 +478,31 @@ async function cmdSemantic(args) {
|
|
|
429
478
|
process.exit(0);
|
|
430
479
|
}
|
|
431
480
|
|
|
432
|
-
|
|
481
|
+
const TRACE_USAGE = 'Usage: ss-trace <symbol> [--in <file>] [--query <hint>] [--depth N] [--budget N]';
|
|
482
|
+
async function cmdTrace(rawArgs) {
|
|
483
|
+
const args = normalizeArgs(rawArgs);
|
|
433
484
|
let json = false;
|
|
434
485
|
if (args.includes('--json')) {
|
|
435
486
|
json = true;
|
|
436
487
|
args.splice(args.indexOf('--json'), 1);
|
|
437
488
|
}
|
|
438
|
-
const symbol = args[0];
|
|
439
|
-
if (!symbol) {
|
|
440
|
-
process.stderr.write('Usage: ss-trace <symbol> [--in <file>] [--query <hint>] [--depth N] [--budget N]\n');
|
|
441
|
-
process.exit(2);
|
|
442
|
-
}
|
|
443
489
|
const { traceSymbol, formatStructuralContext } = await import(path.join(REPO_ROOT, 'core/search/search-trace.js'));
|
|
444
490
|
|
|
445
491
|
const opts = { projectRoot: PROJECT_ROOT };
|
|
446
|
-
const file =
|
|
447
|
-
const queryHint =
|
|
448
|
-
const depth =
|
|
449
|
-
const budget =
|
|
492
|
+
const file = readValueFlag(args, ['--in', '--file'], null, TRACE_USAGE);
|
|
493
|
+
const queryHint = readValueFlag(args, ['--query', '--hint'], '', TRACE_USAGE, { allowOptionValue: true });
|
|
494
|
+
const depth = readPositiveIntFlag(args, '--depth', null, TRACE_USAGE);
|
|
495
|
+
const budget = readPositiveIntFlag(args, '--budget', null, TRACE_USAGE);
|
|
496
|
+
const symbol = resolvePositional(args, TRACE_USAGE);
|
|
497
|
+
if (!symbol) {
|
|
498
|
+
process.stderr.write(TRACE_USAGE + '\n');
|
|
499
|
+
process.exit(2);
|
|
500
|
+
}
|
|
450
501
|
if (file) opts.filePath = file;
|
|
451
502
|
if (queryHint) opts.queryHint = queryHint;
|
|
452
|
-
if (depth != null) opts.maxDepth =
|
|
503
|
+
if (depth != null) opts.maxDepth = depth;
|
|
453
504
|
// Budget-sweep experiment hook: env sets the default; explicit --budget wins.
|
|
454
|
-
if (budget != null) opts.tokenBudget =
|
|
505
|
+
if (budget != null) opts.tokenBudget = budget;
|
|
455
506
|
else if (Number(process.env.SS_SMOKE_TRACE_BUDGET || '') > 0) opts.tokenBudget = Number(process.env.SS_SMOKE_TRACE_BUDGET);
|
|
456
507
|
|
|
457
508
|
const response = traceSymbol(symbol, opts);
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
# ss-read <file> # whole file
|
|
4
4
|
# ss-read <file> <start> # ONE line (NOT start-to-EOF)
|
|
5
5
|
# ss-read <file> <start> <end> # explicit range
|
|
6
|
+
# ss-read <file> 10-20 # range token (also 10:20, 10,20)
|
|
7
|
+
# No flags: ss-read takes no -n/--limit/-r — line selection is positional.
|
|
6
8
|
# Open-ended start-to-EOF is intentionally not supported in the bench wrapper
|
|
7
9
|
# to prevent accidental over-reading. To pull a span, give an explicit end.
|
|
8
10
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sweet-search",
|
|
3
|
-
"version": "2.5.
|
|
3
|
+
"version": "2.5.14",
|
|
4
4
|
"description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "core/search/sweet-search.js",
|
|
@@ -165,12 +165,12 @@
|
|
|
165
165
|
},
|
|
166
166
|
"optionalDependencies": {
|
|
167
167
|
"usearch": "^2.21.4",
|
|
168
|
-
"@sweet-search/native-darwin-arm64": "2.5.
|
|
169
|
-
"@sweet-search/native-darwin-x64": "2.5.
|
|
170
|
-
"@sweet-search/native-linux-arm64-gnu": "2.5.
|
|
171
|
-
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.
|
|
172
|
-
"@sweet-search/native-linux-x64-gnu": "2.5.
|
|
173
|
-
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.
|
|
168
|
+
"@sweet-search/native-darwin-arm64": "2.5.14",
|
|
169
|
+
"@sweet-search/native-darwin-x64": "2.5.14",
|
|
170
|
+
"@sweet-search/native-linux-arm64-gnu": "2.5.14",
|
|
171
|
+
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.14",
|
|
172
|
+
"@sweet-search/native-linux-x64-gnu": "2.5.14",
|
|
173
|
+
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.14"
|
|
174
174
|
},
|
|
175
175
|
"engines": {
|
|
176
176
|
"node": ">=18.0.0"
|