sweet-search 2.5.13 → 2.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,6 +77,10 @@ export function stripInertFlags(args) {
77
77
  // so they parse instead of being mistaken for an unknown flag or the pattern.
78
78
  export const VALUE_SHORTS = new Set(['k']);
79
79
  export const BOOL_SHORTS = new Set(['i', 'w', 'F']);
80
+ export const VALUE_LONGS = new Set([
81
+ '--top', '--regex', '--mode', '--max-tokens',
82
+ '--in', '--file', '--query', '--hint', '--depth', '--budget',
83
+ ]);
80
84
 
81
85
  export function normalizeArgs(args) {
82
86
  const out = [];
@@ -85,9 +89,11 @@ export function normalizeArgs(args) {
85
89
  if (positionalOnly || typeof tok !== 'string') { out.push(tok); continue; }
86
90
  if (tok === '--') { out.push(tok); positionalOnly = true; continue; }
87
91
 
88
- // --name=value → --name value
92
+ // --name=value → --name value, but only for known value flags. Unknown
93
+ // long options stay intact so the guard can reject the whole token, and
94
+ // optional-value no-ops like --color=always can be stripped atomically.
89
95
  let m = /^(--[A-Za-z][\w-]*)=(.*)$/.exec(tok);
90
- if (m) { out.push(m[1], m[2]); continue; }
96
+ if (m && VALUE_LONGS.has(m[1])) { out.push(m[1], m[2]); continue; }
91
97
 
92
98
  // attached short value or boolean bundle: -k5, -iw, -iwk5
93
99
  m = /^-([A-Za-z])(.+)$/.exec(tok);
@@ -123,9 +129,49 @@ export function normalizeArgs(args) {
123
129
  // dash-leading pattern works WITHOUT the agent needing to know about `--`.
124
130
  export function looksLikeOption(tok) {
125
131
  if (typeof tok !== 'string' || tok === '-' || tok === '--') return false;
126
- return /^-[A-Za-z]$/.test(tok) // -i
127
- || /^-[A-Za-z]{2,}$/.test(tok) // -iw (pure-letter bundle)
128
- || /^--[A-Za-z][\w-]*$/.test(tok); // --ignore-case
132
+ return /^-[A-Za-z][A-Za-z0-9]*$/.test(tok) // -i, -iw, -C2
133
+ || /^--[A-Za-z][\w-]*(?:=.*)?$/.test(tok); // --ignore-case, --foo=bar
134
+ }
135
+
136
+ export function parseValueFlag(args, names, fallback, { allowOptionValue = false } = {}) {
137
+ const allNames = Array.isArray(names) ? names : [names];
138
+ for (const n of allNames) {
139
+ const i = args.indexOf(n);
140
+ if (i === -1) continue;
141
+ const v = args[i + 1];
142
+ if (v == null || (!allowOptionValue && looksLikeOption(v))) {
143
+ return { value: fallback, flag: n, error: `${n} requires a value` };
144
+ }
145
+ args.splice(i, 2);
146
+ return { value: v, flag: n, error: null };
147
+ }
148
+ return { value: fallback, flag: null, error: null };
149
+ }
150
+
151
+ export function parsePositiveIntFlag(args, names, fallback, { min = 1 } = {}) {
152
+ const parsed = parseValueFlag(args, names, fallback);
153
+ if (parsed.error) return parsed;
154
+ if (parsed.flag == null) return { ...parsed, value: fallback };
155
+ const n = Number(parsed.value);
156
+ if (!Number.isInteger(n) || n < min) {
157
+ return { value: fallback, flag: parsed.flag, error: `${parsed.flag} must be an integer >= ${min}` };
158
+ }
159
+ return { value: n, flag: parsed.flag, error: null };
160
+ }
161
+
162
+ // Parse a line range supplied as a single positional token — `10-20`, `10:20`
163
+ // or `10,20` (sed/bat/"lines 10-20" muscle memory). Returns { start, end } only
164
+ // for a well-formed ascending range; null otherwise (so the caller falls back to
165
+ // the plain numeric path or its own validation). Deliberately strict: both ends
166
+ // required, no open-ended `10-` (which previously caused accidental over-reads).
167
+ export function parseLineRange(token) {
168
+ if (typeof token !== 'string') return null;
169
+ const m = /^(\d+)[-:,](\d+)$/.exec(token);
170
+ if (!m) return null;
171
+ const start = +m[1];
172
+ const end = +m[2];
173
+ if (start < 1 || end < start) return null;
174
+ return { start, end };
129
175
  }
130
176
 
131
177
  // After known flags are consumed, resolve the positional pattern. `--` ends
@@ -14,8 +14,9 @@ import { createHash } from 'node:crypto';
14
14
  import { existsSync, readFileSync } from 'node:fs';
15
15
  import { fileURLToPath } from 'node:url';
16
16
  import {
17
- parseFlag, parseShortFlag, parseBoolFlag,
17
+ parseBoolFlag, parseValueFlag, parsePositiveIntFlag,
18
18
  buildGrepPattern, stripInertFlags, normalizeArgs, extractPositional,
19
+ parseLineRange, looksLikeOption,
19
20
  } from './_ss-argparse.mjs';
20
21
 
21
22
  // 8-char SHA1 prefix is enough for grouping identical queries across
@@ -52,12 +53,33 @@ const rest = process.argv.slice(3);
52
53
  function resolvePositional(args, usage) {
53
54
  const { pattern, unknownFlag } = extractPositional(args);
54
55
  if (unknownFlag) {
55
- process.stderr.write(`[ss] unrecognised option "${unknownFlag}"\n${usage}\n`);
56
- process.exit(2);
56
+ failUsage(`unrecognised option "${unknownFlag}"`, usage);
57
57
  }
58
58
  return pattern;
59
59
  }
60
60
 
61
+ function failUsage(message, usage) {
62
+ process.stderr.write(`[ss] ${message}\n${usage}\n`);
63
+ process.exit(2);
64
+ }
65
+
66
+ function readPositiveIntFlag(args, names, fallback, usage) {
67
+ const parsed = parsePositiveIntFlag(args, names, fallback);
68
+ if (parsed.error) failUsage(parsed.error, usage);
69
+ return parsed.value;
70
+ }
71
+
72
+ function readValueFlag(args, names, fallback, usage, opts = {}) {
73
+ const parsed = parseValueFlag(args, names, fallback, opts);
74
+ if (parsed.error) failUsage(parsed.error, usage);
75
+ return parsed.value;
76
+ }
77
+
78
+ function rejectUnknownOptions(args, usage) {
79
+ const bad = args.find(looksLikeOption);
80
+ if (bad) failUsage(`unrecognised option "${bad}"`, usage);
81
+ }
82
+
61
83
  async function getSweetSearch() {
62
84
  const { SweetSearch } = await import(path.join(REPO_ROOT, 'core/search/sweet-search.js'));
63
85
  const s = new SweetSearch({ projectRoot: PROJECT_ROOT });
@@ -88,7 +110,7 @@ async function cmdGrep(rawArgs) {
88
110
  const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
89
111
  const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
90
112
  const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
91
- const k = +parseShortFlag(args, ['-k', '--top'], 20);
113
+ const k = readPositiveIntFlag(args, ['-k', '--top'], 20, GREP_USAGE);
92
114
  stripInertFlags(args);
93
115
  const regex = buildGrepPattern(resolvePositional(args, GREP_USAGE), { ignoreCase, wordBound, fixedString });
94
116
  if (!regex) {
@@ -133,8 +155,8 @@ async function cmdFind(rawArgs) {
133
155
  const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
134
156
  const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
135
157
  const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
136
- const k = +parseShortFlag(args, ['-k', '--top'], 6);
137
- const regex = parseFlag(args, '--regex', '');
158
+ const k = readPositiveIntFlag(args, ['-k', '--top'], 6, FIND_USAGE);
159
+ const regex = readValueFlag(args, '--regex', '', FIND_USAGE, { allowOptionValue: true });
138
160
  stripInertFlags(args);
139
161
  const query = resolvePositional(args, FIND_USAGE);
140
162
  if (!query) {
@@ -188,12 +210,26 @@ async function cmdFind(rawArgs) {
188
210
  process.exit(0);
189
211
  }
190
212
 
213
+ // ss-read takes NO flags — only positional <file> [start] [end] (or a single
214
+ // "start-end" / "start:end" / "start,end" range token). Unlike ss-grep, a stray
215
+ // flag here can never silently corrupt the result: the line slots are validated
216
+ // as numbers, so a misuse is already a loud error. These hints exist only to
217
+ // turn that error into a self-correcting one (the M++ prompt, which we may not
218
+ // touch, documents the positional form, not these recovery messages).
219
+ const READ_USAGE =
220
+ 'Usage: ss-read <file> # whole file\n' +
221
+ ' ss-read <file> <start> # ONE line\n' +
222
+ ' ss-read <file> <start> <end>\n' +
223
+ ' ss-read <file> 10-20 # range (also 10:20, 10,20)\n' +
224
+ 'Note: ss-read has no flags (no -n/--limit/-r); line selection is positional.';
191
225
  async function cmdRead(args) {
192
226
  const file = args[0];
193
227
  if (!file) {
194
- process.stderr.write('Usage: ss-read <file> # whole file\n');
195
- process.stderr.write(' ss-read <file> <start> # ONE line\n');
196
- process.stderr.write(' ss-read <file> <start> <end>\n');
228
+ process.stderr.write(READ_USAGE + '\n');
229
+ process.exit(2);
230
+ }
231
+ if (looksLikeOption(file)) {
232
+ process.stderr.write(`[ss-read] "${file}" looks like a flag, but ss-read takes a file path first.\n${READ_USAGE}\n`);
197
233
  process.exit(2);
198
234
  }
199
235
  // If start is provided and end is omitted, read EXACTLY that one line —
@@ -201,19 +237,27 @@ async function cmdRead(args) {
201
237
  // caused accidental over-reading on large files).
202
238
  let start = null, end = null;
203
239
  if (args[1] != null) {
204
- start = +args[1];
205
- if (!Number.isFinite(start) || start < 1) {
206
- process.stderr.write(`[ss-read] invalid start line: "${args[1]}"\n`);
207
- process.exit(2);
208
- }
209
- if (args[2] != null) {
210
- end = +args[2];
211
- if (!Number.isFinite(end) || end < start) {
212
- process.stderr.write(`[ss-read] invalid end line: "${args[2]}" (must be ≥ start ${start})\n`);
240
+ // Accept a single-token range (10-20 / 10:20 / 10,20) before the plain
241
+ // numeric path, so "lines 10-20" muscle memory works without a wasted call.
242
+ const range = parseLineRange(args[1]);
243
+ if (range && args[2] == null) {
244
+ start = range.start;
245
+ end = range.end;
246
+ } else {
247
+ start = +args[1];
248
+ if (!Number.isFinite(start) || start < 1) {
249
+ process.stderr.write(`[ss-read] invalid start line: "${args[1]}" (expected a line number, e.g. 10, or a range like 10-20)\n${READ_USAGE}\n`);
213
250
  process.exit(2);
214
251
  }
215
- } else {
216
- end = start; // single-line read
252
+ if (args[2] != null) {
253
+ end = +args[2];
254
+ if (!Number.isFinite(end) || end < start) {
255
+ process.stderr.write(`[ss-read] invalid end line: "${args[2]}" (must be ≥ start ${start})\n`);
256
+ process.exit(2);
257
+ }
258
+ } else {
259
+ end = start; // single-line read
260
+ }
217
261
  }
218
262
  }
219
263
  const { readFile } = await import(path.join(REPO_ROOT, 'core/search/search-read.js'));
@@ -228,7 +272,9 @@ async function cmdRead(args) {
228
272
  process.exit(0);
229
273
  }
230
274
 
231
- async function cmdAgentSearch(args) {
275
+ const SEARCH_USAGE = 'Usage: ss-search "<query>" [--full|--xl] [-k N] [--mode auto|lexical|semantic|hybrid]';
276
+ async function cmdAgentSearch(rawArgs) {
277
+ const args = normalizeArgs(rawArgs);
232
278
  // Main sweet-search auto/CatBoost search with token-budgeted agent packaging.
233
279
  //
234
280
  // Usage:
@@ -245,11 +291,11 @@ async function cmdAgentSearch(args) {
245
291
  let format = 'agent';
246
292
  if (args.includes('--full')) { format = 'agent_full'; args.splice(args.indexOf('--full'), 1); }
247
293
  if (args.includes('--xl')) { format = 'agent_full_xl'; args.splice(args.indexOf('--xl'), 1); }
248
- const k = +parseShortFlag(args, ['-k', '--top'], 5);
249
- const mode = parseFlag(args, '--mode', 'auto');
250
- const query = args[0];
294
+ const k = readPositiveIntFlag(args, ['-k', '--top'], 5, SEARCH_USAGE);
295
+ const mode = readValueFlag(args, '--mode', 'auto', SEARCH_USAGE);
296
+ const query = resolvePositional(args, SEARCH_USAGE);
251
297
  if (!query) {
252
- process.stderr.write('Usage: ss-search "<query>" [--full|--xl] [-k N] [--mode auto|lexical|semantic|hybrid]\n');
298
+ process.stderr.write(SEARCH_USAGE + '\n');
253
299
  process.exit(2);
254
300
  }
255
301
 
@@ -399,18 +445,21 @@ async function cmdAgentSearch(args) {
399
445
  process.exit(0);
400
446
  }
401
447
 
402
- async function cmdSemantic(args) {
448
+ const SEMANTIC_USAGE = 'Usage: ss-semantic <file> "<question>" [--max-tokens N]';
449
+ async function cmdSemantic(rawArgs) {
450
+ const args = normalizeArgs(rawArgs);
451
+ // Default 600 (was 800) per the 2026-06 budget sweep — scaled with the 3k
452
+ // preview tier. Env hook overrides the default for sweeps; an explicit
453
+ // --max-tokens flag from the agent always wins.
454
+ const maxTokens = readPositiveIntFlag(args, '--max-tokens',
455
+ Number(process.env.SS_SMOKE_SEMANTIC_MAXTOKENS || '') || 600, SEMANTIC_USAGE);
456
+ rejectUnknownOptions(args, SEMANTIC_USAGE);
403
457
  const file = args[0];
404
458
  const query = args[1];
405
459
  if (!file || !query) {
406
- process.stderr.write('Usage: ss-semantic <file> "<question>" [--max-tokens N]\n');
460
+ process.stderr.write(SEMANTIC_USAGE + '\n');
407
461
  process.exit(2);
408
462
  }
409
- // Default 600 (was 800) per the 2026-06 budget sweep — scaled with the 3k
410
- // preview tier. Env hook overrides the default for sweeps; an explicit
411
- // --max-tokens flag from the agent always wins.
412
- const maxTokens = +parseFlag(args.slice(2), '--max-tokens',
413
- Number(process.env.SS_SMOKE_SEMANTIC_MAXTOKENS || '') || 600);
414
463
  const { readSemantic } = await import(path.join(REPO_ROOT, 'core/search/search-read-semantic.js'));
415
464
  const r = await readSemantic({
416
465
  path: file, query, projectRoot: PROJECT_ROOT,
@@ -429,29 +478,31 @@ async function cmdSemantic(args) {
429
478
  process.exit(0);
430
479
  }
431
480
 
432
- async function cmdTrace(args) {
481
+ const TRACE_USAGE = 'Usage: ss-trace <symbol> [--in <file>] [--query <hint>] [--depth N] [--budget N]';
482
+ async function cmdTrace(rawArgs) {
483
+ const args = normalizeArgs(rawArgs);
433
484
  let json = false;
434
485
  if (args.includes('--json')) {
435
486
  json = true;
436
487
  args.splice(args.indexOf('--json'), 1);
437
488
  }
438
- const symbol = args[0];
439
- if (!symbol) {
440
- process.stderr.write('Usage: ss-trace <symbol> [--in <file>] [--query <hint>] [--depth N] [--budget N]\n');
441
- process.exit(2);
442
- }
443
489
  const { traceSymbol, formatStructuralContext } = await import(path.join(REPO_ROOT, 'core/search/search-trace.js'));
444
490
 
445
491
  const opts = { projectRoot: PROJECT_ROOT };
446
- const file = parseFlag(args, '--in', null) || parseFlag(args, '--file', null);
447
- const queryHint = parseFlag(args, '--query', '') || parseFlag(args, '--hint', '');
448
- const depth = parseFlag(args, '--depth', null);
449
- const budget = parseFlag(args, '--budget', null);
492
+ const file = readValueFlag(args, ['--in', '--file'], null, TRACE_USAGE);
493
+ const queryHint = readValueFlag(args, ['--query', '--hint'], '', TRACE_USAGE, { allowOptionValue: true });
494
+ const depth = readPositiveIntFlag(args, '--depth', null, TRACE_USAGE);
495
+ const budget = readPositiveIntFlag(args, '--budget', null, TRACE_USAGE);
496
+ const symbol = resolvePositional(args, TRACE_USAGE);
497
+ if (!symbol) {
498
+ process.stderr.write(TRACE_USAGE + '\n');
499
+ process.exit(2);
500
+ }
450
501
  if (file) opts.filePath = file;
451
502
  if (queryHint) opts.queryHint = queryHint;
452
- if (depth != null) opts.maxDepth = +depth;
503
+ if (depth != null) opts.maxDepth = depth;
453
504
  // Budget-sweep experiment hook: env sets the default; explicit --budget wins.
454
- if (budget != null) opts.tokenBudget = +budget;
505
+ if (budget != null) opts.tokenBudget = budget;
455
506
  else if (Number(process.env.SS_SMOKE_TRACE_BUDGET || '') > 0) opts.tokenBudget = Number(process.env.SS_SMOKE_TRACE_BUDGET);
456
507
 
457
508
  const response = traceSymbol(symbol, opts);
@@ -3,6 +3,8 @@
3
3
  # ss-read <file> # whole file
4
4
  # ss-read <file> <start> # ONE line (NOT start-to-EOF)
5
5
  # ss-read <file> <start> <end> # explicit range
6
+ # ss-read <file> 10-20 # range token (also 10:20, 10,20)
7
+ # No flags: ss-read takes no -n/--limit/-r — line selection is positional.
6
8
  # Open-ended start-to-EOF is intentionally not supported in the bench wrapper
7
9
  # to prevent accidental over-reading. To pull a span, give an explicit end.
8
10
  DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sweet-search",
3
- "version": "2.5.13",
3
+ "version": "2.5.14",
4
4
  "description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
5
5
  "type": "module",
6
6
  "main": "core/search/sweet-search.js",
@@ -165,12 +165,12 @@
165
165
  },
166
166
  "optionalDependencies": {
167
167
  "usearch": "^2.21.4",
168
- "@sweet-search/native-darwin-arm64": "2.5.13",
169
- "@sweet-search/native-darwin-x64": "2.5.13",
170
- "@sweet-search/native-linux-arm64-gnu": "2.5.13",
171
- "@sweet-search/native-linux-arm64-gnu-cuda": "2.5.13",
172
- "@sweet-search/native-linux-x64-gnu": "2.5.13",
173
- "@sweet-search/native-linux-x64-gnu-cuda": "2.5.13"
168
+ "@sweet-search/native-darwin-arm64": "2.5.14",
169
+ "@sweet-search/native-darwin-x64": "2.5.14",
170
+ "@sweet-search/native-linux-arm64-gnu": "2.5.14",
171
+ "@sweet-search/native-linux-arm64-gnu-cuda": "2.5.14",
172
+ "@sweet-search/native-linux-x64-gnu": "2.5.14",
173
+ "@sweet-search/native-linux-x64-gnu-cuda": "2.5.14"
174
174
  },
175
175
  "engines": {
176
176
  "node": ">=18.0.0"