sweet-search 2.5.12 → 2.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ // Pure argument-parsing helpers for the ss-* CLI wrappers.
2
+ //
3
+ // Extracted from _ss-helpers.mjs so they can be unit-tested without triggering
4
+ // the CLI's top-level IIFE (which runs on import). NOTHING here touches
5
+ // process.* or the filesystem — every function is a pure transform over an
6
+ // args array (some mutate the array in place, by design, and return a value).
7
+
8
+ // --- value-flag parsers (mutate `args`, returning the consumed value) --------
9
+
10
+ export function parseFlag(args, name, fallback) {
11
+ const i = args.indexOf(name);
12
+ if (i === -1) return fallback;
13
+ const v = args[i + 1];
14
+ args.splice(i, 2);
15
+ return v;
16
+ }
17
+
18
+ export function parseShortFlag(args, names, fallback) {
19
+ for (const n of names) {
20
+ const i = args.indexOf(n);
21
+ if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
22
+ }
23
+ return fallback;
24
+ }
25
+
26
+ // Boolean (value-less) flag: remove every occurrence, return whether any present.
27
+ export function parseBoolFlag(args, names) {
28
+ let present = false;
29
+ for (const n of names) {
30
+ let i;
31
+ while ((i = args.indexOf(n)) !== -1) { args.splice(i, 1); present = true; }
32
+ }
33
+ return present;
34
+ }
35
+
36
+ // --- pattern construction ----------------------------------------------------
37
+
38
+ export function escapeRegex(s) {
39
+ return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
40
+ }
41
+
42
+ // Translate the grep-family pattern flags into a single regex — no engine change
43
+ // needed. `-F` escapes the pattern so metacharacters are literal; `-w` wraps it
44
+ // in word boundaries; `-i` prepends the `(?i)` inline flag the planner already
45
+ // honours end-to-end (hasCaseInsensitiveRegexFlag → ripgrep prefilter + Rust
46
+ // gram+grep). Order matters: escape (literal) → word-wrap → case flag.
47
+ export function buildGrepPattern(pattern, { ignoreCase = false, wordBound = false, fixedString = false } = {}) {
48
+ if (!pattern) return pattern;
49
+ let p = fixedString ? escapeRegex(pattern) : pattern;
50
+ if (wordBound) p = `\\b(?:${p})\\b`;
51
+ if (ignoreCase && !/^\(\?[a-z-]*i[a-z-]*[:)]/.test(p)) p = `(?i)${p}`;
52
+ return p;
53
+ }
54
+
55
+ // --- inert flags (always true for ss-*, safe to accept as no-ops) ------------
56
+ // These never change which lines match: we always print file:line, always
57
+ // search the whole index, never colourise. Stripping them lets reflexive grep
58
+ // muscle-memory pass without a wasted call — UNLIKE semantic flags (-w/-F/-v/
59
+ // -C…), which we either implement or reject, never silently drop.
60
+ export const INERT_FLAGS = new Set([
61
+ '-n', '--line-number', '-H', '--with-filename', '--no-filename',
62
+ '-r', '-R', '--recursive', '--color', '--colour',
63
+ ]);
64
+
65
+ export function stripInertFlags(args) {
66
+ for (let i = args.length - 1; i >= 0; i--) {
67
+ const a = args[i];
68
+ if (typeof a === 'string' && (INERT_FLAGS.has(a) || /^--colou?r=/.test(a))) {
69
+ args.splice(i, 1);
70
+ }
71
+ }
72
+ }
73
+
74
+ // --- normalisation: make agent-typed forms canonical before parsing ----------
75
+ // Short flags that consume a following value, and value-less boolean shorts.
76
+ // Used to split attached/bundled forms (-k5, -iw, -iwk5) the way getopt would,
77
+ // so they parse instead of being mistaken for an unknown flag or the pattern.
78
+ export const VALUE_SHORTS = new Set(['k']);
79
+ export const BOOL_SHORTS = new Set(['i', 'w', 'F']);
80
+
81
+ export function normalizeArgs(args) {
82
+ const out = [];
83
+ let positionalOnly = false;
84
+ for (const tok of args) {
85
+ if (positionalOnly || typeof tok !== 'string') { out.push(tok); continue; }
86
+ if (tok === '--') { out.push(tok); positionalOnly = true; continue; }
87
+
88
+ // --name=value → --name value
89
+ let m = /^(--[A-Za-z][\w-]*)=(.*)$/.exec(tok);
90
+ if (m) { out.push(m[1], m[2]); continue; }
91
+
92
+ // attached short value or boolean bundle: -k5, -iw, -iwk5
93
+ m = /^-([A-Za-z])(.+)$/.exec(tok);
94
+ if (m) {
95
+ const first = m[1];
96
+ if (VALUE_SHORTS.has(first)) { out.push('-' + first, m[2]); continue; } // -k5 → -k 5
97
+ if (BOOL_SHORTS.has(first)) {
98
+ const chars = tok.slice(1);
99
+ const expanded = [];
100
+ let i = 0, ok = true;
101
+ while (i < chars.length) {
102
+ const ch = chars[i];
103
+ if (BOOL_SHORTS.has(ch)) { expanded.push('-' + ch); i++; }
104
+ else if (VALUE_SHORTS.has(ch)) { // value short ends the bundle
105
+ const val = chars.slice(i + 1);
106
+ expanded.push('-' + ch);
107
+ if (val) expanded.push(val);
108
+ i = chars.length;
109
+ } else { ok = false; break; } // unknown char → leave token intact
110
+ }
111
+ if (ok) { out.push(...expanded); continue; }
112
+ }
113
+ }
114
+ out.push(tok);
115
+ }
116
+ return out;
117
+ }
118
+
119
+ // A token that looks like a real CLI option, as opposed to a regex/query that
120
+ // merely begins with '-' (e.g. `-?\d+`, `-->`). Narrow on purpose: single short
121
+ // letter, pure-letter bundle, or GNU long flag. Anything containing regex
122
+ // metacharacters falls through and is treated as the positional pattern, so a
123
+ // dash-leading pattern works WITHOUT the agent needing to know about `--`.
124
+ export function looksLikeOption(tok) {
125
+ if (typeof tok !== 'string' || tok === '-' || tok === '--') return false;
126
+ return /^-[A-Za-z]$/.test(tok) // -i
127
+ || /^-[A-Za-z]{2,}$/.test(tok) // -iw (pure-letter bundle)
128
+ || /^--[A-Za-z][\w-]*$/.test(tok); // --ignore-case
129
+ }
130
+
131
+ // After known flags are consumed, resolve the positional pattern. `--` ends
132
+ // option parsing (everything after is positional). Any remaining option-shaped
133
+ // token is an unsupported flag → reported, not silently dropped and not
134
+ // mistaken for the pattern. Returns { pattern, unknownFlag }; the caller decides
135
+ // how to surface the error (kept side-effect-free for testability).
136
+ export function extractPositional(args) {
137
+ const sep = args.indexOf('--');
138
+ if (sep !== -1) {
139
+ const before = args.slice(0, sep);
140
+ const after = args.slice(sep + 1);
141
+ const bad = before.find(looksLikeOption);
142
+ if (bad) return { pattern: undefined, unknownFlag: bad };
143
+ return { pattern: after[0], unknownFlag: null };
144
+ }
145
+ const bad = args.find(looksLikeOption);
146
+ if (bad) return { pattern: undefined, unknownFlag: bad };
147
+ return { pattern: args[0], unknownFlag: null };
148
+ }
@@ -13,6 +13,10 @@ import path from 'node:path';
13
13
  import { createHash } from 'node:crypto';
14
14
  import { existsSync, readFileSync } from 'node:fs';
15
15
  import { fileURLToPath } from 'node:url';
16
+ import {
17
+ parseFlag, parseShortFlag, parseBoolFlag,
18
+ buildGrepPattern, stripInertFlags, normalizeArgs, extractPositional,
19
+ } from './_ss-argparse.mjs';
16
20
 
17
21
  // 8-char SHA1 prefix is enough for grouping identical queries across
18
22
  // benchmark runs without bloating artifacts.
@@ -40,19 +44,18 @@ process.env.SWEET_SEARCH_PROJECT_ROOT = PROJECT_ROOT;
40
44
  const subcommand = process.argv[2];
41
45
  const rest = process.argv.slice(3);
42
46
 
43
- function parseFlag(args, name, fallback) {
44
- const i = args.indexOf(name);
45
- if (i === -1) return fallback;
46
- const v = args[i + 1];
47
- args.splice(i, 2);
48
- return v;
49
- }
50
- function parseShortFlag(args, names, fallback) {
51
- for (const n of names) {
52
- const i = args.indexOf(n);
53
- if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
47
+ // Pure arg-parsing helpers (parseFlag/parseShortFlag/parseBoolFlag/
48
+ // buildGrepPattern/stripInertFlags/normalizeArgs/extractPositional) live in
49
+ // ./_ss-argparse.mjs so they can be unit-tested without this file's top-level
50
+ // IIFE firing. resolvePositional wraps the side-effect-free extractPositional
51
+ // with the CLI's loud-error exit.
52
+ function resolvePositional(args, usage) {
53
+ const { pattern, unknownFlag } = extractPositional(args);
54
+ if (unknownFlag) {
55
+ process.stderr.write(`[ss] unrecognised option "${unknownFlag}"\n${usage}\n`);
56
+ process.exit(2);
54
57
  }
55
- return fallback;
58
+ return pattern;
56
59
  }
57
60
 
58
61
  async function getSweetSearch() {
@@ -79,11 +82,17 @@ async function ensureWarmServerReady({ timeoutMs = 60000, intervalMs = 500 } = {
79
82
 
80
83
  // --- subcommands ----------------------------------------------------------
81
84
 
82
- async function cmdGrep(args) {
85
+ const GREP_USAGE = 'Usage: ss-grep <regex> [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [-k N]';
86
+ async function cmdGrep(rawArgs) {
87
+ const args = normalizeArgs(rawArgs);
88
+ const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
89
+ const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
90
+ const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
83
91
  const k = +parseShortFlag(args, ['-k', '--top'], 20);
84
- const regex = args[0];
92
+ stripInertFlags(args);
93
+ const regex = buildGrepPattern(resolvePositional(args, GREP_USAGE), { ignoreCase, wordBound, fixedString });
85
94
  if (!regex) {
86
- process.stderr.write('Usage: ss-grep <regex> [-k N]\n');
95
+ process.stderr.write(GREP_USAGE + '\n');
87
96
  process.exit(2);
88
97
  }
89
98
  const s = await getSweetSearch();
@@ -109,27 +118,34 @@ async function cmdGrep(args) {
109
118
  process.exit(0);
110
119
  }
111
120
 
112
- async function cmdFind(args) {
121
+ async function cmdFind(rawArgs) {
122
+ const args = normalizeArgs(rawArgs);
113
123
  // ColGrep pattern search with token-budgeted agent packaging — returns the
114
124
  // FULL useful answer (ranked code blocks + confidence + sufficiency), the same
115
125
  // agent packaging ss-search emits. ss-grep is the short/locator counterpart, so
116
126
  // ss-find defaults to the full answer: it saves the follow-up read entirely.
117
127
  // (Mirrors the agent-in-the-loop H2H adapter eval/agent-eval/tools/
118
128
  // pattern-agent-tools.js, which calls search(...,{format:'agent'}).)
129
+ const FIND_USAGE = 'Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [--full|--xl] [-k N]';
119
130
  let format = 'agent';
120
131
  if (args.includes('--full')) { format = 'agent_full'; args.splice(args.indexOf('--full'), 1); }
121
132
  if (args.includes('--xl')) { format = 'agent_full_xl'; args.splice(args.indexOf('--xl'), 1); }
133
+ const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
134
+ const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
135
+ const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
122
136
  const k = +parseShortFlag(args, ['-k', '--top'], 6);
123
137
  const regex = parseFlag(args, '--regex', '');
124
- const query = args[0];
138
+ stripInertFlags(args);
139
+ const query = resolvePositional(args, FIND_USAGE);
125
140
  if (!query) {
126
- process.stderr.write('Usage: ss-find "<query>" --regex "<regex>" [--full|--xl] [-k N]\n');
141
+ process.stderr.write(FIND_USAGE + '\n');
127
142
  process.exit(2);
128
143
  }
129
144
  // Budget-sweep experiment hook: lets the bench pin the response token budget
130
145
  // per-process without changing the agent-visible tool surface.
131
146
  const envFindBudget = Number(process.env.SS_SMOKE_FIND_BUDGET || '') || null;
132
- const effectiveRegex = regex || '';
147
+ // Pattern flags apply to the regex candidate generator; the NL query is untouched.
148
+ const effectiveRegex = buildGrepPattern(regex || '', { ignoreCase, wordBound, fixedString });
133
149
  const s = await getSweetSearch();
134
150
  if (!s.hasLateInteractionIndex) {
135
151
  process.stderr.write(`[ss-find] no late-interaction index — falling back to ss-grep\n`);
@@ -5,7 +5,7 @@
5
5
  # Use for behavioural / semantic questions where lexical alone won't pinpoint the
6
6
  # chunk. (ss-grep is the short file:line locator.)
7
7
  #
8
- # Usage: ss-find "<query>" --regex "<regex>" [--full|--xl] [-k N]
8
+ # Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [--full|--xl] [-k N]
9
9
  DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
10
10
  TMPERR=$(mktemp)
11
11
  node "$DIR/_ss-helpers.mjs" find "$@" 2>"$TMPERR"
@@ -2,7 +2,7 @@
2
2
  # ss-grep: indexed bare grep (gram-prefiltered) over the cwd's Sweet Search index.
3
3
  # Compact agent-friendly output: file:line matchText
4
4
  #
5
- # Usage: ss-grep <regex> [-k N]
5
+ # Usage: ss-grep <regex> [-i|--ignore-case] [-k N]
6
6
  DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
7
7
  TMPERR=$(mktemp)
8
8
  node "$DIR/_ss-helpers.mjs" grep "$@" 2>"$TMPERR"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sweet-search",
3
- "version": "2.5.12",
3
+ "version": "2.5.13",
4
4
  "description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
5
5
  "type": "module",
6
6
  "main": "core/search/sweet-search.js",
@@ -76,6 +76,7 @@
76
76
  "eval/agent-read-workflows/bin/ss-trace",
77
77
  "eval/agent-read-workflows/bin/ss-read",
78
78
  "eval/agent-read-workflows/bin/_ss-helpers.mjs",
79
+ "eval/agent-read-workflows/bin/_ss-argparse.mjs",
79
80
  "crates/wasm-router/pkg/",
80
81
  "LICENSE",
81
82
  "NOTICE"
@@ -164,12 +165,12 @@
164
165
  },
165
166
  "optionalDependencies": {
166
167
  "usearch": "^2.21.4",
167
- "@sweet-search/native-darwin-arm64": "2.5.12",
168
- "@sweet-search/native-darwin-x64": "2.5.12",
169
- "@sweet-search/native-linux-arm64-gnu": "2.5.12",
170
- "@sweet-search/native-linux-arm64-gnu-cuda": "2.5.12",
171
- "@sweet-search/native-linux-x64-gnu": "2.5.12",
172
- "@sweet-search/native-linux-x64-gnu-cuda": "2.5.12"
168
+ "@sweet-search/native-darwin-arm64": "2.5.13",
169
+ "@sweet-search/native-darwin-x64": "2.5.13",
170
+ "@sweet-search/native-linux-arm64-gnu": "2.5.13",
171
+ "@sweet-search/native-linux-arm64-gnu-cuda": "2.5.13",
172
+ "@sweet-search/native-linux-x64-gnu": "2.5.13",
173
+ "@sweet-search/native-linux-x64-gnu-cuda": "2.5.13"
173
174
  },
174
175
  "engines": {
175
176
  "node": ">=18.0.0"