sweet-search 2.5.12 → 2.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// Pure argument-parsing helpers for the ss-* CLI wrappers.
|
|
2
|
+
//
|
|
3
|
+
// Extracted from _ss-helpers.mjs so they can be unit-tested without triggering
|
|
4
|
+
// the CLI's top-level IIFE (which runs on import). NOTHING here touches
|
|
5
|
+
// process.* or the filesystem — every function is a pure transform over an
|
|
6
|
+
// args array (some mutate the array in place, by design, and return a value).
|
|
7
|
+
|
|
8
|
+
// --- value-flag parsers (mutate `args`, returning the consumed value) --------
|
|
9
|
+
|
|
10
|
+
export function parseFlag(args, name, fallback) {
|
|
11
|
+
const i = args.indexOf(name);
|
|
12
|
+
if (i === -1) return fallback;
|
|
13
|
+
const v = args[i + 1];
|
|
14
|
+
args.splice(i, 2);
|
|
15
|
+
return v;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function parseShortFlag(args, names, fallback) {
|
|
19
|
+
for (const n of names) {
|
|
20
|
+
const i = args.indexOf(n);
|
|
21
|
+
if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
|
|
22
|
+
}
|
|
23
|
+
return fallback;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Boolean (value-less) flag: remove every occurrence, return whether any present.
|
|
27
|
+
export function parseBoolFlag(args, names) {
|
|
28
|
+
let present = false;
|
|
29
|
+
for (const n of names) {
|
|
30
|
+
let i;
|
|
31
|
+
while ((i = args.indexOf(n)) !== -1) { args.splice(i, 1); present = true; }
|
|
32
|
+
}
|
|
33
|
+
return present;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// --- pattern construction ----------------------------------------------------
|
|
37
|
+
|
|
38
|
+
export function escapeRegex(s) {
|
|
39
|
+
return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Translate the grep-family pattern flags into a single regex — no engine change
|
|
43
|
+
// needed. `-F` escapes the pattern so metacharacters are literal; `-w` wraps it
|
|
44
|
+
// in word boundaries; `-i` prepends the `(?i)` inline flag the planner already
|
|
45
|
+
// honours end-to-end (hasCaseInsensitiveRegexFlag → ripgrep prefilter + Rust
|
|
46
|
+
// gram+grep). Order matters: escape (literal) → word-wrap → case flag.
|
|
47
|
+
export function buildGrepPattern(pattern, { ignoreCase = false, wordBound = false, fixedString = false } = {}) {
|
|
48
|
+
if (!pattern) return pattern;
|
|
49
|
+
let p = fixedString ? escapeRegex(pattern) : pattern;
|
|
50
|
+
if (wordBound) p = `\\b(?:${p})\\b`;
|
|
51
|
+
if (ignoreCase && !/^\(\?[a-z-]*i[a-z-]*[:)]/.test(p)) p = `(?i)${p}`;
|
|
52
|
+
return p;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// --- inert flags (always true for ss-*, safe to accept as no-ops) ------------
|
|
56
|
+
// These never change which lines match: we always print file:line, always
|
|
57
|
+
// search the whole index, never colourise. Stripping them lets reflexive grep
|
|
58
|
+
// muscle-memory pass without a wasted call — UNLIKE semantic flags (-w/-F/-v/
|
|
59
|
+
// -C…), which we either implement or reject, never silently drop.
|
|
60
|
+
export const INERT_FLAGS = new Set([
|
|
61
|
+
'-n', '--line-number', '-H', '--with-filename', '--no-filename',
|
|
62
|
+
'-r', '-R', '--recursive', '--color', '--colour',
|
|
63
|
+
]);
|
|
64
|
+
|
|
65
|
+
export function stripInertFlags(args) {
|
|
66
|
+
for (let i = args.length - 1; i >= 0; i--) {
|
|
67
|
+
const a = args[i];
|
|
68
|
+
if (typeof a === 'string' && (INERT_FLAGS.has(a) || /^--colou?r=/.test(a))) {
|
|
69
|
+
args.splice(i, 1);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// --- normalisation: make agent-typed forms canonical before parsing ----------
|
|
75
|
+
// Short flags that consume a following value, and value-less boolean shorts.
|
|
76
|
+
// Used to split attached/bundled forms (-k5, -iw, -iwk5) the way getopt would,
|
|
77
|
+
// so they parse instead of being mistaken for an unknown flag or the pattern.
|
|
78
|
+
export const VALUE_SHORTS = new Set(['k']);
|
|
79
|
+
export const BOOL_SHORTS = new Set(['i', 'w', 'F']);
|
|
80
|
+
|
|
81
|
+
export function normalizeArgs(args) {
|
|
82
|
+
const out = [];
|
|
83
|
+
let positionalOnly = false;
|
|
84
|
+
for (const tok of args) {
|
|
85
|
+
if (positionalOnly || typeof tok !== 'string') { out.push(tok); continue; }
|
|
86
|
+
if (tok === '--') { out.push(tok); positionalOnly = true; continue; }
|
|
87
|
+
|
|
88
|
+
// --name=value → --name value
|
|
89
|
+
let m = /^(--[A-Za-z][\w-]*)=(.*)$/.exec(tok);
|
|
90
|
+
if (m) { out.push(m[1], m[2]); continue; }
|
|
91
|
+
|
|
92
|
+
// attached short value or boolean bundle: -k5, -iw, -iwk5
|
|
93
|
+
m = /^-([A-Za-z])(.+)$/.exec(tok);
|
|
94
|
+
if (m) {
|
|
95
|
+
const first = m[1];
|
|
96
|
+
if (VALUE_SHORTS.has(first)) { out.push('-' + first, m[2]); continue; } // -k5 → -k 5
|
|
97
|
+
if (BOOL_SHORTS.has(first)) {
|
|
98
|
+
const chars = tok.slice(1);
|
|
99
|
+
const expanded = [];
|
|
100
|
+
let i = 0, ok = true;
|
|
101
|
+
while (i < chars.length) {
|
|
102
|
+
const ch = chars[i];
|
|
103
|
+
if (BOOL_SHORTS.has(ch)) { expanded.push('-' + ch); i++; }
|
|
104
|
+
else if (VALUE_SHORTS.has(ch)) { // value short ends the bundle
|
|
105
|
+
const val = chars.slice(i + 1);
|
|
106
|
+
expanded.push('-' + ch);
|
|
107
|
+
if (val) expanded.push(val);
|
|
108
|
+
i = chars.length;
|
|
109
|
+
} else { ok = false; break; } // unknown char → leave token intact
|
|
110
|
+
}
|
|
111
|
+
if (ok) { out.push(...expanded); continue; }
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
out.push(tok);
|
|
115
|
+
}
|
|
116
|
+
return out;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// A token that looks like a real CLI option, as opposed to a regex/query that
|
|
120
|
+
// merely begins with '-' (e.g. `-?\d+`, `-->`). Narrow on purpose: single short
|
|
121
|
+
// letter, pure-letter bundle, or GNU long flag. Anything containing regex
|
|
122
|
+
// metacharacters falls through and is treated as the positional pattern, so a
|
|
123
|
+
// dash-leading pattern works WITHOUT the agent needing to know about `--`.
|
|
124
|
+
export function looksLikeOption(tok) {
|
|
125
|
+
if (typeof tok !== 'string' || tok === '-' || tok === '--') return false;
|
|
126
|
+
return /^-[A-Za-z]$/.test(tok) // -i
|
|
127
|
+
|| /^-[A-Za-z]{2,}$/.test(tok) // -iw (pure-letter bundle)
|
|
128
|
+
|| /^--[A-Za-z][\w-]*$/.test(tok); // --ignore-case
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// After known flags are consumed, resolve the positional pattern. `--` ends
|
|
132
|
+
// option parsing (everything after is positional). Any remaining option-shaped
|
|
133
|
+
// token is an unsupported flag → reported, not silently dropped and not
|
|
134
|
+
// mistaken for the pattern. Returns { pattern, unknownFlag }; the caller decides
|
|
135
|
+
// how to surface the error (kept side-effect-free for testability).
|
|
136
|
+
export function extractPositional(args) {
|
|
137
|
+
const sep = args.indexOf('--');
|
|
138
|
+
if (sep !== -1) {
|
|
139
|
+
const before = args.slice(0, sep);
|
|
140
|
+
const after = args.slice(sep + 1);
|
|
141
|
+
const bad = before.find(looksLikeOption);
|
|
142
|
+
if (bad) return { pattern: undefined, unknownFlag: bad };
|
|
143
|
+
return { pattern: after[0], unknownFlag: null };
|
|
144
|
+
}
|
|
145
|
+
const bad = args.find(looksLikeOption);
|
|
146
|
+
if (bad) return { pattern: undefined, unknownFlag: bad };
|
|
147
|
+
return { pattern: args[0], unknownFlag: null };
|
|
148
|
+
}
|
|
@@ -13,6 +13,10 @@ import path from 'node:path';
|
|
|
13
13
|
import { createHash } from 'node:crypto';
|
|
14
14
|
import { existsSync, readFileSync } from 'node:fs';
|
|
15
15
|
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import {
|
|
17
|
+
parseFlag, parseShortFlag, parseBoolFlag,
|
|
18
|
+
buildGrepPattern, stripInertFlags, normalizeArgs, extractPositional,
|
|
19
|
+
} from './_ss-argparse.mjs';
|
|
16
20
|
|
|
17
21
|
// 8-char SHA1 prefix is enough for grouping identical queries across
|
|
18
22
|
// benchmark runs without bloating artifacts.
|
|
@@ -40,19 +44,18 @@ process.env.SWEET_SEARCH_PROJECT_ROOT = PROJECT_ROOT;
|
|
|
40
44
|
const subcommand = process.argv[2];
|
|
41
45
|
const rest = process.argv.slice(3);
|
|
42
46
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if (i !== -1) { const v = args[i + 1]; args.splice(i, 2); return v; }
|
|
47
|
+
// Pure arg-parsing helpers (parseFlag/parseShortFlag/parseBoolFlag/
|
|
48
|
+
// buildGrepPattern/stripInertFlags/normalizeArgs/extractPositional) live in
|
|
49
|
+
// ./_ss-argparse.mjs so they can be unit-tested without this file's top-level
|
|
50
|
+
// IIFE firing. resolvePositional wraps the side-effect-free extractPositional
|
|
51
|
+
// with the CLI's loud-error exit.
|
|
52
|
+
function resolvePositional(args, usage) {
|
|
53
|
+
const { pattern, unknownFlag } = extractPositional(args);
|
|
54
|
+
if (unknownFlag) {
|
|
55
|
+
process.stderr.write(`[ss] unrecognised option "${unknownFlag}"\n${usage}\n`);
|
|
56
|
+
process.exit(2);
|
|
54
57
|
}
|
|
55
|
-
return
|
|
58
|
+
return pattern;
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
async function getSweetSearch() {
|
|
@@ -79,11 +82,17 @@ async function ensureWarmServerReady({ timeoutMs = 60000, intervalMs = 500 } = {
|
|
|
79
82
|
|
|
80
83
|
// --- subcommands ----------------------------------------------------------
|
|
81
84
|
|
|
82
|
-
|
|
85
|
+
const GREP_USAGE = 'Usage: ss-grep <regex> [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [-k N]';
|
|
86
|
+
async function cmdGrep(rawArgs) {
|
|
87
|
+
const args = normalizeArgs(rawArgs);
|
|
88
|
+
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
89
|
+
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
90
|
+
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
83
91
|
const k = +parseShortFlag(args, ['-k', '--top'], 20);
|
|
84
|
-
|
|
92
|
+
stripInertFlags(args);
|
|
93
|
+
const regex = buildGrepPattern(resolvePositional(args, GREP_USAGE), { ignoreCase, wordBound, fixedString });
|
|
85
94
|
if (!regex) {
|
|
86
|
-
process.stderr.write(
|
|
95
|
+
process.stderr.write(GREP_USAGE + '\n');
|
|
87
96
|
process.exit(2);
|
|
88
97
|
}
|
|
89
98
|
const s = await getSweetSearch();
|
|
@@ -109,27 +118,34 @@ async function cmdGrep(args) {
|
|
|
109
118
|
process.exit(0);
|
|
110
119
|
}
|
|
111
120
|
|
|
112
|
-
async function cmdFind(
|
|
121
|
+
async function cmdFind(rawArgs) {
|
|
122
|
+
const args = normalizeArgs(rawArgs);
|
|
113
123
|
// ColGrep pattern search with token-budgeted agent packaging — returns the
|
|
114
124
|
// FULL useful answer (ranked code blocks + confidence + sufficiency), the same
|
|
115
125
|
// agent packaging ss-search emits. ss-grep is the short/locator counterpart, so
|
|
116
126
|
// ss-find defaults to the full answer: it saves the follow-up read entirely.
|
|
117
127
|
// (Mirrors the agent-in-the-loop H2H adapter eval/agent-eval/tools/
|
|
118
128
|
// pattern-agent-tools.js, which calls search(...,{format:'agent'}).)
|
|
129
|
+
const FIND_USAGE = 'Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [-w|--word-regexp] [-F|--fixed-strings] [--full|--xl] [-k N]';
|
|
119
130
|
let format = 'agent';
|
|
120
131
|
if (args.includes('--full')) { format = 'agent_full'; args.splice(args.indexOf('--full'), 1); }
|
|
121
132
|
if (args.includes('--xl')) { format = 'agent_full_xl'; args.splice(args.indexOf('--xl'), 1); }
|
|
133
|
+
const ignoreCase = parseBoolFlag(args, ['-i', '--ignore-case']);
|
|
134
|
+
const wordBound = parseBoolFlag(args, ['-w', '--word-regexp']);
|
|
135
|
+
const fixedString = parseBoolFlag(args, ['-F', '--fixed-strings']);
|
|
122
136
|
const k = +parseShortFlag(args, ['-k', '--top'], 6);
|
|
123
137
|
const regex = parseFlag(args, '--regex', '');
|
|
124
|
-
|
|
138
|
+
stripInertFlags(args);
|
|
139
|
+
const query = resolvePositional(args, FIND_USAGE);
|
|
125
140
|
if (!query) {
|
|
126
|
-
process.stderr.write(
|
|
141
|
+
process.stderr.write(FIND_USAGE + '\n');
|
|
127
142
|
process.exit(2);
|
|
128
143
|
}
|
|
129
144
|
// Budget-sweep experiment hook: lets the bench pin the response token budget
|
|
130
145
|
// per-process without changing the agent-visible tool surface.
|
|
131
146
|
const envFindBudget = Number(process.env.SS_SMOKE_FIND_BUDGET || '') || null;
|
|
132
|
-
|
|
147
|
+
// Pattern flags apply to the regex candidate generator; the NL query is untouched.
|
|
148
|
+
const effectiveRegex = buildGrepPattern(regex || '', { ignoreCase, wordBound, fixedString });
|
|
133
149
|
const s = await getSweetSearch();
|
|
134
150
|
if (!s.hasLateInteractionIndex) {
|
|
135
151
|
process.stderr.write(`[ss-find] no late-interaction index — falling back to ss-grep\n`);
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Use for behavioural / semantic questions where lexical alone won't pinpoint the
|
|
6
6
|
# chunk. (ss-grep is the short file:line locator.)
|
|
7
7
|
#
|
|
8
|
-
# Usage: ss-find "<query>" --regex "<regex>" [--full|--xl] [-k N]
|
|
8
|
+
# Usage: ss-find "<query>" --regex "<regex>" [-i|--ignore-case] [--full|--xl] [-k N]
|
|
9
9
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
10
10
|
TMPERR=$(mktemp)
|
|
11
11
|
node "$DIR/_ss-helpers.mjs" find "$@" 2>"$TMPERR"
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# ss-grep: indexed bare grep (gram-prefiltered) over the cwd's Sweet Search index.
|
|
3
3
|
# Compact agent-friendly output: file:line matchText
|
|
4
4
|
#
|
|
5
|
-
# Usage: ss-grep <regex> [-k N]
|
|
5
|
+
# Usage: ss-grep <regex> [-i|--ignore-case] [-k N]
|
|
6
6
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
7
7
|
TMPERR=$(mktemp)
|
|
8
8
|
node "$DIR/_ss-helpers.mjs" grep "$@" 2>"$TMPERR"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sweet-search",
|
|
3
|
-
"version": "2.5.
|
|
3
|
+
"version": "2.5.13",
|
|
4
4
|
"description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "core/search/sweet-search.js",
|
|
@@ -76,6 +76,7 @@
|
|
|
76
76
|
"eval/agent-read-workflows/bin/ss-trace",
|
|
77
77
|
"eval/agent-read-workflows/bin/ss-read",
|
|
78
78
|
"eval/agent-read-workflows/bin/_ss-helpers.mjs",
|
|
79
|
+
"eval/agent-read-workflows/bin/_ss-argparse.mjs",
|
|
79
80
|
"crates/wasm-router/pkg/",
|
|
80
81
|
"LICENSE",
|
|
81
82
|
"NOTICE"
|
|
@@ -164,12 +165,12 @@
|
|
|
164
165
|
},
|
|
165
166
|
"optionalDependencies": {
|
|
166
167
|
"usearch": "^2.21.4",
|
|
167
|
-
"@sweet-search/native-darwin-arm64": "2.5.
|
|
168
|
-
"@sweet-search/native-darwin-x64": "2.5.
|
|
169
|
-
"@sweet-search/native-linux-arm64-gnu": "2.5.
|
|
170
|
-
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.
|
|
171
|
-
"@sweet-search/native-linux-x64-gnu": "2.5.
|
|
172
|
-
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.
|
|
168
|
+
"@sweet-search/native-darwin-arm64": "2.5.13",
|
|
169
|
+
"@sweet-search/native-darwin-x64": "2.5.13",
|
|
170
|
+
"@sweet-search/native-linux-arm64-gnu": "2.5.13",
|
|
171
|
+
"@sweet-search/native-linux-arm64-gnu-cuda": "2.5.13",
|
|
172
|
+
"@sweet-search/native-linux-x64-gnu": "2.5.13",
|
|
173
|
+
"@sweet-search/native-linux-x64-gnu-cuda": "2.5.13"
|
|
173
174
|
},
|
|
174
175
|
"engines": {
|
|
175
176
|
"node": ">=18.0.0"
|