ruvector 0.2.21 → 0.2.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +160 -0
- package/dist/core/router-wrapper.d.ts +20 -7
- package/dist/core/router-wrapper.d.ts.map +1 -1
- package/dist/core/router-wrapper.js +52 -18
- package/package.json +1 -1
- package/src/decompiler/api-prober.js +302 -0
- package/src/decompiler/model-decompiler.js +423 -0
- package/dist/core/onnx/loader.js +0 -348
- package/dist/core/onnx/pkg/LICENSE +0 -21
- package/dist/core/onnx/pkg/loader.js +0 -348
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.d.ts +0 -112
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js +0 -5
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js +0 -638
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm +0 -0
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm.d.ts +0 -29
- package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js +0 -127
- package/dist/core/onnx-llm.d.ts +0 -206
- package/dist/core/onnx-llm.d.ts.map +0 -1
- package/dist/core/onnx-llm.js +0 -430
package/bin/cli.js
CHANGED
|
@@ -8935,13 +8935,52 @@ const decompileCmd = program
|
|
|
8935
8935
|
.option('-q, --quiet', 'Suppress progress output')
|
|
8936
8936
|
.option('--version-pkg <ver>', 'Package version (alternative to @version syntax)')
|
|
8937
8937
|
.option('--diff <version>', 'Compare against another version')
|
|
8938
|
+
.option('--model <file>', 'Decompile LLM model weight file (.gguf, .safetensors)')
|
|
8939
|
+
.option('--api <model-id>', 'Probe remote LLM API to discover architecture')
|
|
8940
|
+
.option('--api-key <key>', 'API key for --api mode (or use env vars)')
|
|
8938
8941
|
.action(async (target, opts) => {
|
|
8942
|
+
// Model weight decompilation mode (ADR-138)
|
|
8943
|
+
if (opts.model) {
|
|
8944
|
+
try {
|
|
8945
|
+
const modelDecompiler = require('../src/decompiler/model-decompiler.js');
|
|
8946
|
+
const result = await modelDecompiler.decompileModelFile(opts.model);
|
|
8947
|
+
if (opts.json) {
|
|
8948
|
+
console.log(JSON.stringify(result, null, 2));
|
|
8949
|
+
} else {
|
|
8950
|
+
modelDecompiler.printModelResult(result);
|
|
8951
|
+
}
|
|
8952
|
+
} catch (err) {
|
|
8953
|
+
console.error(chalk.red(`Model decompilation failed: ${err.message}`));
|
|
8954
|
+
process.exit(1);
|
|
8955
|
+
}
|
|
8956
|
+
return;
|
|
8957
|
+
}
|
|
8958
|
+
|
|
8959
|
+
// API probing mode (ADR-138)
|
|
8960
|
+
if (opts.api) {
|
|
8961
|
+
try {
|
|
8962
|
+
const apiProber = require('../src/decompiler/api-prober.js');
|
|
8963
|
+
const result = await apiProber.probeModel(opts.api, { apiKey: opts.apiKey });
|
|
8964
|
+
if (opts.json) {
|
|
8965
|
+
console.log(JSON.stringify(result, null, 2));
|
|
8966
|
+
} else {
|
|
8967
|
+
apiProber.printProbeResult(result);
|
|
8968
|
+
}
|
|
8969
|
+
} catch (err) {
|
|
8970
|
+
console.error(chalk.red(`API probe failed: ${err.message}`));
|
|
8971
|
+
process.exit(1);
|
|
8972
|
+
}
|
|
8973
|
+
return;
|
|
8974
|
+
}
|
|
8975
|
+
|
|
8939
8976
|
if (!target) {
|
|
8940
8977
|
console.log(chalk.cyan('\nUsage:'));
|
|
8941
8978
|
console.log(chalk.white(' ruvector decompile <package> Decompile npm package'));
|
|
8942
8979
|
console.log(chalk.white(' ruvector decompile <pkg>@<ver> Specific version'));
|
|
8943
8980
|
console.log(chalk.white(' ruvector decompile ./bundle.js Local file'));
|
|
8944
8981
|
console.log(chalk.white(' ruvector decompile https://unpkg.com/x URL'));
|
|
8982
|
+
console.log(chalk.white(' ruvector decompile --model <file.gguf> LLM weight file'));
|
|
8983
|
+
console.log(chalk.white(' ruvector decompile --api <model-id> Probe remote API'));
|
|
8945
8984
|
console.log(chalk.dim('\nOptions:'));
|
|
8946
8985
|
console.log(chalk.dim(' -o, --output <dir> Output directory'));
|
|
8947
8986
|
console.log(chalk.dim(' -f, --format <type> modules | single | json'));
|
|
@@ -8949,6 +8988,9 @@ const decompileCmd = program
|
|
|
8949
8988
|
console.log(chalk.dim(' --no-witness Skip witness chain'));
|
|
8950
8989
|
console.log(chalk.dim(' --json JSON to stdout'));
|
|
8951
8990
|
console.log(chalk.dim(' --diff <version> Diff against another version'));
|
|
8991
|
+
console.log(chalk.dim(' --model <file> Decompile .gguf/.safetensors'));
|
|
8992
|
+
console.log(chalk.dim(' --api <model-id> Probe LLM API'));
|
|
8993
|
+
console.log(chalk.dim(' --api-key <key> API key (or set env var)'));
|
|
8952
8994
|
console.log('');
|
|
8953
8995
|
return;
|
|
8954
8996
|
}
|
|
@@ -9061,6 +9103,124 @@ const decompileCmd = program
|
|
|
9061
9103
|
}
|
|
9062
9104
|
});
|
|
9063
9105
|
|
|
9106
|
+
// =============================================================================
|
|
9107
|
+
// Optimize Commands — Claude Code profile optimization (ADR-139)
|
|
9108
|
+
// =============================================================================
|
|
9109
|
+
|
|
9110
|
+
const optimizeCmd = program.command('optimize')
|
|
9111
|
+
.description('Optimize Claude Code configuration per task type (ADR-139)')
|
|
9112
|
+
.option('-p, --profile <type>', 'Task profile: coding|research|quickfix|planning|background|swarm|review|ci')
|
|
9113
|
+
.option('-s, --show', 'Show current optimization status')
|
|
9114
|
+
.option('-l, --list', 'List all available profiles')
|
|
9115
|
+
.option('--generate-settings', 'Output optimal .claude/settings.json')
|
|
9116
|
+
.option('--detect <prompt>', 'Auto-detect task type from a prompt')
|
|
9117
|
+
.option('--apply', 'Apply profile env vars to current process (for hooks)')
|
|
9118
|
+
.option('--json', 'JSON output')
|
|
9119
|
+
.action(async (opts) => {
|
|
9120
|
+
let optimizerMod;
|
|
9121
|
+
try {
|
|
9122
|
+
optimizerMod = require('../src/optimizer/index.js');
|
|
9123
|
+
} catch (e) {
|
|
9124
|
+
console.error(chalk.red('Error: Failed to load optimizer module.'));
|
|
9125
|
+
console.error(chalk.dim(` ${e.message}`));
|
|
9126
|
+
process.exit(1);
|
|
9127
|
+
}
|
|
9128
|
+
|
|
9129
|
+
// --list: show all profiles
|
|
9130
|
+
if (opts.list) {
|
|
9131
|
+
const profiles = optimizerMod.listProfiles();
|
|
9132
|
+
if (opts.json) {
|
|
9133
|
+
const data = {};
|
|
9134
|
+
for (const name of profiles) {
|
|
9135
|
+
data[name] = optimizerMod.getProfile(name);
|
|
9136
|
+
}
|
|
9137
|
+
console.log(JSON.stringify(data, null, 2));
|
|
9138
|
+
return;
|
|
9139
|
+
}
|
|
9140
|
+
console.log(chalk.bold.cyan('\n RVAgent Optimizer Profiles (ADR-139)\n'));
|
|
9141
|
+
console.log(chalk.dim(' Based on decompiled Claude Code v2.1.91 intelligence\n'));
|
|
9142
|
+
for (const name of profiles) {
|
|
9143
|
+
const p = optimizerMod.getProfile(name);
|
|
9144
|
+
const envCount = Object.keys(p.env).length;
|
|
9145
|
+
console.log(` ${chalk.bold.white(name.padEnd(12))} ${chalk.dim(p.description)}`);
|
|
9146
|
+
console.log(chalk.dim(`${''.padEnd(14)}Permission: ${p.permissionMode}, Env vars: ${envCount}`));
|
|
9147
|
+
}
|
|
9148
|
+
console.log('');
|
|
9149
|
+
console.log(chalk.dim(' Usage: ruvector optimize --profile <type>'));
|
|
9150
|
+
console.log(chalk.dim(' ruvector optimize --generate-settings --profile coding'));
|
|
9151
|
+
console.log('');
|
|
9152
|
+
return;
|
|
9153
|
+
}
|
|
9154
|
+
|
|
9155
|
+
// --detect: infer task type from prompt
|
|
9156
|
+
if (opts.detect) {
|
|
9157
|
+
const detected = optimizerMod.detectTaskType(opts.detect);
|
|
9158
|
+
if (opts.json) {
|
|
9159
|
+
console.log(JSON.stringify({ prompt: opts.detect, taskType: detected }));
|
|
9160
|
+
return;
|
|
9161
|
+
}
|
|
9162
|
+
console.log(chalk.cyan(` Detected task type: ${chalk.bold(detected)}`));
|
|
9163
|
+
return;
|
|
9164
|
+
}
|
|
9165
|
+
|
|
9166
|
+
// Determine profile to use
|
|
9167
|
+
const profileName = opts.profile || 'coding';
|
|
9168
|
+
const profile = optimizerMod.getProfile(profileName);
|
|
9169
|
+
|
|
9170
|
+
if (!profile) {
|
|
9171
|
+
console.error(chalk.red(` Unknown profile: ${profileName}`));
|
|
9172
|
+
console.error(chalk.yellow(` Available: ${optimizerMod.listProfiles().join(', ')}`));
|
|
9173
|
+
process.exit(1);
|
|
9174
|
+
}
|
|
9175
|
+
|
|
9176
|
+
// --generate-settings: output settings.json
|
|
9177
|
+
if (opts.generateSettings) {
|
|
9178
|
+
const { generateSettings, formatSettings } = require('../src/optimizer/settings-generator.js');
|
|
9179
|
+
const settings = generateSettings({ ...profile, taskType: profileName });
|
|
9180
|
+
if (opts.json) {
|
|
9181
|
+
console.log(formatSettings(settings));
|
|
9182
|
+
} else {
|
|
9183
|
+
console.log(chalk.bold.cyan(`\n Generated settings.json for profile: ${profileName}\n`));
|
|
9184
|
+
console.log(formatSettings(settings));
|
|
9185
|
+
console.log('');
|
|
9186
|
+
console.log(chalk.dim(' Save to .claude/settings.json to activate.'));
|
|
9187
|
+
console.log('');
|
|
9188
|
+
}
|
|
9189
|
+
return;
|
|
9190
|
+
}
|
|
9191
|
+
|
|
9192
|
+
// --show: display profile details
|
|
9193
|
+
if (opts.show) {
|
|
9194
|
+
if (opts.json) {
|
|
9195
|
+
console.log(JSON.stringify({ profile: profileName, ...profile }, null, 2));
|
|
9196
|
+
return;
|
|
9197
|
+
}
|
|
9198
|
+
console.log(chalk.bold.cyan(`\n Profile: ${profileName}\n`));
|
|
9199
|
+
console.log(` ${chalk.dim('Description:')} ${profile.description}`);
|
|
9200
|
+
console.log(` ${chalk.dim('Permission:')} ${profile.permissionMode}`);
|
|
9201
|
+
console.log(` ${chalk.dim('Env vars:')}`);
|
|
9202
|
+
for (const [key, val] of Object.entries(profile.env)) {
|
|
9203
|
+
console.log(` ${chalk.white(key)}=${chalk.green(val)}`);
|
|
9204
|
+
}
|
|
9205
|
+
console.log('');
|
|
9206
|
+
return;
|
|
9207
|
+
}
|
|
9208
|
+
|
|
9209
|
+
// --apply or default: apply env vars
|
|
9210
|
+
const result = optimizerMod.applyProfile(profileName);
|
|
9211
|
+
if (opts.json) {
|
|
9212
|
+
console.log(JSON.stringify(result, null, 2));
|
|
9213
|
+
return;
|
|
9214
|
+
}
|
|
9215
|
+
console.log(chalk.bold.cyan(`\n Applied profile: ${profileName}`));
|
|
9216
|
+
console.log(chalk.dim(` ${profile.description}\n`));
|
|
9217
|
+
for (const [key, val] of Object.entries(result.applied)) {
|
|
9218
|
+
console.log(` ${chalk.green('+')} ${key}=${val}`);
|
|
9219
|
+
}
|
|
9220
|
+
console.log(`\n ${chalk.dim('Permission mode:')} ${result.permissionMode}`);
|
|
9221
|
+
console.log('');
|
|
9222
|
+
});
|
|
9223
|
+
|
|
9064
9224
|
program.parse();
|
|
9065
9225
|
|
|
9066
9226
|
|
|
@@ -22,25 +22,38 @@ export declare class SemanticRouter {
|
|
|
22
22
|
private inner;
|
|
23
23
|
private routes;
|
|
24
24
|
constructor(options?: {
|
|
25
|
-
|
|
25
|
+
dimension?: number;
|
|
26
26
|
threshold?: number;
|
|
27
27
|
});
|
|
28
28
|
/**
|
|
29
|
-
*
|
|
29
|
+
* Set the embedder function for converting text to vectors.
|
|
30
|
+
* Required before match() can accept string input.
|
|
30
31
|
*/
|
|
31
|
-
|
|
32
|
+
setEmbedder(embedder: (text: string) => Promise<Float32Array>): void;
|
|
33
|
+
/**
|
|
34
|
+
* Add a route with example utterances (sync, requires pre-computed embedding)
|
|
35
|
+
*/
|
|
36
|
+
addRoute(name: string, utterances: string[], metadata?: Record<string, any>, embedding?: Float32Array | number[]): void;
|
|
37
|
+
/**
|
|
38
|
+
* Add a route with automatic embedding computation (requires setEmbedder)
|
|
39
|
+
*/
|
|
40
|
+
addRouteAsync(name: string, utterances: string[], metadata?: Record<string, any>): Promise<void>;
|
|
32
41
|
/**
|
|
33
42
|
* Add multiple routes at once
|
|
34
43
|
*/
|
|
35
44
|
addRoutes(routes: Route[]): void;
|
|
36
45
|
/**
|
|
37
|
-
* Match input to best route
|
|
46
|
+
* Match input to best route (async, accepts string if embedder is set, or Float32Array)
|
|
47
|
+
*/
|
|
48
|
+
match(input: string | Float32Array): Promise<RouteMatch | null>;
|
|
49
|
+
/**
|
|
50
|
+
* Get top-k route matches (async)
|
|
38
51
|
*/
|
|
39
|
-
|
|
52
|
+
matchTopK(input: string | Float32Array, k?: number): Promise<RouteMatch[]>;
|
|
40
53
|
/**
|
|
41
|
-
*
|
|
54
|
+
* Match with a pre-computed embedding (synchronous)
|
|
42
55
|
*/
|
|
43
|
-
|
|
56
|
+
matchWithEmbedding(embedding: Float32Array, k?: number): RouteMatch[];
|
|
44
57
|
/**
|
|
45
58
|
* Get all registered routes
|
|
46
59
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"router-wrapper.d.ts","sourceRoot":"","sources":["../../src/core/router-wrapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAqBH,wBAAgB,iBAAiB,IAAI,OAAO,CAO3C;AAED,MAAM,WAAW,KAAK;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,KAAK,CAAM;IACnB,OAAO,CAAC,MAAM,CAAiC;gBAEnC,OAAO,GAAE;QAAE,
|
|
1
|
+
{"version":3,"file":"router-wrapper.d.ts","sourceRoot":"","sources":["../../src/core/router-wrapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAqBH,wBAAgB,iBAAiB,IAAI,OAAO,CAO3C;AAED,MAAM,WAAW,KAAK;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,KAAK,CAAM;IACnB,OAAO,CAAC,MAAM,CAAiC;gBAEnC,OAAO,GAAE;QAAE,SAAS,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAO;IAQpE;;;OAGG;IACH,WAAW,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI;IAIpE;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,SAAS,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,GAAG,IAAI;IAUvH;;OAEG;IACG,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAStG;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,IAAI;IAMhC;;OAEG;IACG,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAWrE;;OAEG;IACG,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,CAAC,GAAE,MAAU,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;IASnF;;OAEG;IACH,kBAAkB,CAAC,SAAS,EAAE,YAAY,EAAE,CAAC,GAAE,MAAU,GAAG,UAAU,EAAE;IASxE;;OAEG;IACH,SAAS,IAAI,KAAK,EAAE;IAIpB;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAMlC;;OAEG;IACH,KAAK,IAAI,IAAI;CAId;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,cAAc,CA8FlD;AAED,eAAe,cAAc,CAAC"}
|
|
@@ -43,16 +43,39 @@ class SemanticRouter {
|
|
|
43
43
|
this.routes = new Map();
|
|
44
44
|
const router = getRouterModule();
|
|
45
45
|
this.inner = new router.SemanticRouter({
|
|
46
|
-
|
|
46
|
+
dimension: options.dimension ?? 384,
|
|
47
47
|
threshold: options.threshold ?? 0.7,
|
|
48
48
|
});
|
|
49
49
|
}
|
|
50
50
|
/**
|
|
51
|
-
*
|
|
51
|
+
* Set the embedder function for converting text to vectors.
|
|
52
|
+
* Required before match() can accept string input.
|
|
52
53
|
*/
|
|
53
|
-
|
|
54
|
+
setEmbedder(embedder) {
|
|
55
|
+
this.inner.setEmbedder(embedder);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Add a route with example utterances (sync, requires pre-computed embedding)
|
|
59
|
+
*/
|
|
60
|
+
addRoute(name, utterances, metadata, embedding) {
|
|
61
|
+
this.routes.set(name, { name, utterances, metadata });
|
|
62
|
+
this.inner.addIntent({
|
|
63
|
+
name,
|
|
64
|
+
utterances,
|
|
65
|
+
metadata,
|
|
66
|
+
embedding,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Add a route with automatic embedding computation (requires setEmbedder)
|
|
71
|
+
*/
|
|
72
|
+
async addRouteAsync(name, utterances, metadata) {
|
|
54
73
|
this.routes.set(name, { name, utterances, metadata });
|
|
55
|
-
this.inner.
|
|
74
|
+
await this.inner.addIntentAsync({
|
|
75
|
+
name,
|
|
76
|
+
utterances,
|
|
77
|
+
metadata,
|
|
78
|
+
});
|
|
56
79
|
}
|
|
57
80
|
/**
|
|
58
81
|
* Add multiple routes at once
|
|
@@ -63,27 +86,38 @@ class SemanticRouter {
|
|
|
63
86
|
}
|
|
64
87
|
}
|
|
65
88
|
/**
|
|
66
|
-
* Match input to best route
|
|
89
|
+
* Match input to best route (async, accepts string if embedder is set, or Float32Array)
|
|
67
90
|
*/
|
|
68
|
-
match(input) {
|
|
69
|
-
const
|
|
70
|
-
if (!
|
|
91
|
+
async match(input) {
|
|
92
|
+
const results = await this.inner.route(input, 1);
|
|
93
|
+
if (!results || results.length === 0)
|
|
71
94
|
return null;
|
|
72
95
|
return {
|
|
73
|
-
route:
|
|
74
|
-
score:
|
|
75
|
-
metadata:
|
|
96
|
+
route: results[0].intent,
|
|
97
|
+
score: results[0].score,
|
|
98
|
+
metadata: results[0].metadata,
|
|
76
99
|
};
|
|
77
100
|
}
|
|
78
101
|
/**
|
|
79
|
-
* Get top-k route matches
|
|
102
|
+
* Get top-k route matches (async)
|
|
103
|
+
*/
|
|
104
|
+
async matchTopK(input, k = 3) {
|
|
105
|
+
const results = await this.inner.route(input, k);
|
|
106
|
+
return (results || []).map((r) => ({
|
|
107
|
+
route: r.intent,
|
|
108
|
+
score: r.score,
|
|
109
|
+
metadata: r.metadata,
|
|
110
|
+
}));
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Match with a pre-computed embedding (synchronous)
|
|
80
114
|
*/
|
|
81
|
-
|
|
82
|
-
const results = this.inner.
|
|
83
|
-
return results.map((r) => ({
|
|
84
|
-
route: r.
|
|
115
|
+
matchWithEmbedding(embedding, k = 1) {
|
|
116
|
+
const results = this.inner.routeWithEmbedding(embedding, k);
|
|
117
|
+
return (results || []).map((r) => ({
|
|
118
|
+
route: r.intent,
|
|
85
119
|
score: r.score,
|
|
86
|
-
metadata: r.metadata
|
|
120
|
+
metadata: r.metadata,
|
|
87
121
|
}));
|
|
88
122
|
}
|
|
89
123
|
/**
|
|
@@ -99,7 +133,7 @@ class SemanticRouter {
|
|
|
99
133
|
if (!this.routes.has(name))
|
|
100
134
|
return false;
|
|
101
135
|
this.routes.delete(name);
|
|
102
|
-
return this.inner.
|
|
136
|
+
return this.inner.removeIntent(name);
|
|
103
137
|
}
|
|
104
138
|
/**
|
|
105
139
|
* Clear all routes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ruvector",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.22",
|
|
4
4
|
"description": "Self-learning vector database for Node.js — hybrid search, Graph RAG, FlashAttention-3, DiskANN, 50+ attention mechanisms",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* LLM API prober -- discovers model architecture by probing remote APIs.
|
|
5
|
+
* Detects capabilities, token limits, tokenizer behavior, and model fingerprints.
|
|
6
|
+
* See ADR-138.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ── Provider detection ───────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
const PROVIDERS = {
|
|
12
|
+
anthropic: {
|
|
13
|
+
endpoint: 'https://api.anthropic.com/v1/messages',
|
|
14
|
+
envKey: 'ANTHROPIC_API_KEY',
|
|
15
|
+
models: ['claude-sonnet-4-6', 'claude-sonnet-4-20250514', 'claude-haiku-4-20250414', 'claude-opus-4-20250514'],
|
|
16
|
+
},
|
|
17
|
+
openai: {
|
|
18
|
+
endpoint: 'https://api.openai.com/v1/chat/completions',
|
|
19
|
+
envKey: 'OPENAI_API_KEY',
|
|
20
|
+
models: ['gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo', 'o1', 'o1-mini'],
|
|
21
|
+
},
|
|
22
|
+
google: {
|
|
23
|
+
endpoint: 'https://generativelanguage.googleapis.com/v1beta/models',
|
|
24
|
+
envKey: 'GOOGLE_AI_API_KEY',
|
|
25
|
+
models: ['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.0-flash'],
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
function detectProvider(modelId) {
|
|
30
|
+
modelId = modelId.toLowerCase();
|
|
31
|
+
if (modelId.startsWith('claude')) return 'anthropic';
|
|
32
|
+
if (modelId.startsWith('gpt') || modelId.startsWith('o1') || modelId.startsWith('o3')) return 'openai';
|
|
33
|
+
if (modelId.startsWith('gemini')) return 'google';
|
|
34
|
+
return 'unknown';
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// ── Main probe ───────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
async function probeModel(modelId, opts = {}) {
|
|
40
|
+
const provider = detectProvider(modelId);
|
|
41
|
+
const providerConfig = PROVIDERS[provider];
|
|
42
|
+
if (!providerConfig && provider === 'unknown') {
|
|
43
|
+
throw new Error(`Unknown provider for model: ${modelId}. Supported: claude-*, gpt-*, gemini-*`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const apiKey = opts.apiKey || process.env[providerConfig?.envKey || ''];
|
|
47
|
+
if (!apiKey) {
|
|
48
|
+
throw new Error(
|
|
49
|
+
`No API key found. Set ${providerConfig?.envKey || 'API_KEY'} env var or pass --api-key`
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const result = {
|
|
54
|
+
model: modelId,
|
|
55
|
+
provider,
|
|
56
|
+
capabilities: {},
|
|
57
|
+
tokenizer: {},
|
|
58
|
+
limits: {},
|
|
59
|
+
fingerprint: {},
|
|
60
|
+
latency: {},
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const send = buildSender(provider, modelId, apiKey);
|
|
64
|
+
|
|
65
|
+
// 1. Basic probe -- verify model is reachable and measure latency
|
|
66
|
+
const start = Date.now();
|
|
67
|
+
const basicResp = await send('Say exactly: PROBE_OK');
|
|
68
|
+
result.latency.first_token_ms = Date.now() - start;
|
|
69
|
+
result.capabilities.reachable = !!basicResp;
|
|
70
|
+
|
|
71
|
+
if (!basicResp) {
|
|
72
|
+
result.capabilities.error = 'Model unreachable or invalid API key';
|
|
73
|
+
return result;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// 2. Capability probes (run in parallel for speed)
|
|
77
|
+
const [streamResp, toolResp, sysResp] = await Promise.allSettled([
|
|
78
|
+
testStreaming(send),
|
|
79
|
+
testToolUse(provider, modelId, apiKey),
|
|
80
|
+
send('What is 2+2? Reply with just the number.', { systemPrompt: 'You are a calculator.' }),
|
|
81
|
+
]);
|
|
82
|
+
|
|
83
|
+
result.capabilities.streaming = streamResp.status === 'fulfilled' && streamResp.value;
|
|
84
|
+
result.capabilities.tools = toolResp.status === 'fulfilled' && toolResp.value;
|
|
85
|
+
result.capabilities.system_prompt = sysResp.status === 'fulfilled' && !!sysResp.value;
|
|
86
|
+
|
|
87
|
+
// 3. Tokenizer probe -- send known strings, analyze responses
|
|
88
|
+
const tokenizerResult = await probeTokenizer(send);
|
|
89
|
+
result.tokenizer = tokenizerResult;
|
|
90
|
+
|
|
91
|
+
// 4. Model fingerprint -- specific prompts that distinguish families
|
|
92
|
+
const fingerprint = await fingerprintModel(send, provider);
|
|
93
|
+
result.fingerprint = fingerprint;
|
|
94
|
+
|
|
95
|
+
// 5. Measure response speed
|
|
96
|
+
const speedStart = Date.now();
|
|
97
|
+
const longResp = await send('Count from 1 to 20, one per line.');
|
|
98
|
+
const speedMs = Date.now() - speedStart;
|
|
99
|
+
const outputTokens = longResp ? longResp.split(/\s+/).length : 0;
|
|
100
|
+
result.latency.generation_ms = speedMs;
|
|
101
|
+
result.latency.est_tokens_per_sec = speedMs > 0 ? Math.round((outputTokens / speedMs) * 1000) : 0;
|
|
102
|
+
|
|
103
|
+
return result;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── Provider-specific request builders ───────────────────────────────────
|
|
107
|
+
|
|
108
|
+
function buildSender(provider, modelId, apiKey) {
|
|
109
|
+
return async (prompt, opts = {}) => {
|
|
110
|
+
try {
|
|
111
|
+
if (provider === 'anthropic') return await sendAnthropic(modelId, apiKey, prompt, opts);
|
|
112
|
+
if (provider === 'openai') return await sendOpenAI(modelId, apiKey, prompt, opts);
|
|
113
|
+
if (provider === 'google') return await sendGoogle(modelId, apiKey, prompt, opts);
|
|
114
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
115
|
+
} catch (err) {
|
|
116
|
+
// Return null on API errors (model may not support the feature)
|
|
117
|
+
if (err.message?.includes('API error')) return null;
|
|
118
|
+
throw err;
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async function sendAnthropic(model, apiKey, prompt, opts = {}) {
|
|
124
|
+
const body = {
|
|
125
|
+
model,
|
|
126
|
+
max_tokens: opts.maxTokens || 100,
|
|
127
|
+
messages: [{ role: 'user', content: prompt }],
|
|
128
|
+
};
|
|
129
|
+
if (opts.systemPrompt) body.system = opts.systemPrompt;
|
|
130
|
+
|
|
131
|
+
const resp = await fetch('https://api.anthropic.com/v1/messages', {
|
|
132
|
+
method: 'POST',
|
|
133
|
+
headers: {
|
|
134
|
+
'Content-Type': 'application/json',
|
|
135
|
+
'x-api-key': apiKey,
|
|
136
|
+
'anthropic-version': '2023-06-01',
|
|
137
|
+
},
|
|
138
|
+
body: JSON.stringify(body),
|
|
139
|
+
});
|
|
140
|
+
if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`);
|
|
141
|
+
const data = await resp.json();
|
|
142
|
+
return data.content?.[0]?.text || '';
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async function sendOpenAI(model, apiKey, prompt, opts = {}) {
|
|
146
|
+
const messages = [];
|
|
147
|
+
if (opts.systemPrompt) messages.push({ role: 'system', content: opts.systemPrompt });
|
|
148
|
+
messages.push({ role: 'user', content: prompt });
|
|
149
|
+
|
|
150
|
+
const resp = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
151
|
+
method: 'POST',
|
|
152
|
+
headers: {
|
|
153
|
+
'Content-Type': 'application/json',
|
|
154
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
155
|
+
},
|
|
156
|
+
body: JSON.stringify({ model, messages, max_tokens: opts.maxTokens || 100 }),
|
|
157
|
+
});
|
|
158
|
+
if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`);
|
|
159
|
+
const data = await resp.json();
|
|
160
|
+
return data.choices?.[0]?.message?.content || '';
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
async function sendGoogle(model, apiKey, prompt, opts = {}) {
|
|
164
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${apiKey}`;
|
|
165
|
+
const body = {
|
|
166
|
+
contents: [{ parts: [{ text: prompt }] }],
|
|
167
|
+
generationConfig: { maxOutputTokens: opts.maxTokens || 100 },
|
|
168
|
+
};
|
|
169
|
+
if (opts.systemPrompt) {
|
|
170
|
+
body.systemInstruction = { parts: [{ text: opts.systemPrompt }] };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const resp = await fetch(url, {
|
|
174
|
+
method: 'POST',
|
|
175
|
+
headers: { 'Content-Type': 'application/json' },
|
|
176
|
+
body: JSON.stringify(body),
|
|
177
|
+
});
|
|
178
|
+
if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`);
|
|
179
|
+
const data = await resp.json();
|
|
180
|
+
return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// ── Feature probes ───────────────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
async function testStreaming(send) {
|
|
186
|
+
// Streaming support is provider-dependent; we just check if the model responds
|
|
187
|
+
const resp = await send('Say "stream test"');
|
|
188
|
+
return !!resp;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async function testToolUse(provider, modelId, apiKey) {
|
|
192
|
+
try {
|
|
193
|
+
if (provider === 'anthropic') {
|
|
194
|
+
const resp = await fetch('https://api.anthropic.com/v1/messages', {
|
|
195
|
+
method: 'POST',
|
|
196
|
+
headers: {
|
|
197
|
+
'Content-Type': 'application/json',
|
|
198
|
+
'x-api-key': apiKey,
|
|
199
|
+
'anthropic-version': '2023-06-01',
|
|
200
|
+
},
|
|
201
|
+
body: JSON.stringify({
|
|
202
|
+
model: modelId,
|
|
203
|
+
max_tokens: 100,
|
|
204
|
+
messages: [{ role: 'user', content: 'What is the weather in SF?' }],
|
|
205
|
+
tools: [{
|
|
206
|
+
name: 'get_weather',
|
|
207
|
+
description: 'Get weather for a location',
|
|
208
|
+
input_schema: { type: 'object', properties: { location: { type: 'string' } } },
|
|
209
|
+
}],
|
|
210
|
+
}),
|
|
211
|
+
});
|
|
212
|
+
return resp.ok;
|
|
213
|
+
}
|
|
214
|
+
return true; // Assume supported for other providers
|
|
215
|
+
} catch {
|
|
216
|
+
return false;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ── Tokenizer probing ────────────────────────────────────────────────────
|
|
221
|
+
|
|
222
|
+
async function probeTokenizer(send) {
|
|
223
|
+
// Send known strings and analyze how the model interprets them
|
|
224
|
+
const testStr = 'antidisestablishmentarianism';
|
|
225
|
+
const resp = await send(
|
|
226
|
+
`How many tokens does the word "${testStr}" require? Just give the number.`
|
|
227
|
+
);
|
|
228
|
+
const tokenCount = resp ? parseInt(resp.match(/\d+/)?.[0] || '0', 10) : 0;
|
|
229
|
+
|
|
230
|
+
// Detect BPE vs SentencePiece by checking token boundary behavior
|
|
231
|
+
const bpeResp = await send(
|
|
232
|
+
'Split "unhappiness" into its BPE tokens. List each token on a line.'
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
let type = 'unknown';
|
|
236
|
+
if (bpeResp) {
|
|
237
|
+
if (bpeResp.includes('un') && bpeResp.includes('happiness')) type = 'BPE';
|
|
238
|
+
if (bpeResp.includes('_un') || bpeResp.includes('\u2581un')) type = 'SentencePiece';
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return {
|
|
242
|
+
type,
|
|
243
|
+
estimated_tokens_for_test_word: tokenCount,
|
|
244
|
+
test_word: testStr,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// ── Model fingerprinting ─────────────────────────────────────────────────
|
|
249
|
+
|
|
250
|
+
async function fingerprintModel(send, provider) {
|
|
251
|
+
// Ask the model to identify itself
|
|
252
|
+
const identResp = await send(
|
|
253
|
+
'What LLM are you? Reply in format: "I am [model name] by [company]"'
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
// Test for specific behaviors
|
|
257
|
+
const mathResp = await send('What is 7 * 8? Reply with just the number.');
|
|
258
|
+
|
|
259
|
+
return {
|
|
260
|
+
self_identification: identResp || 'unknown',
|
|
261
|
+
provider_detected: provider,
|
|
262
|
+
math_correct: mathResp?.trim() === '56',
|
|
263
|
+
timestamp: new Date().toISOString(),
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ── Pretty printer ───────────────────────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
function printProbeResult(result) {
|
|
270
|
+
const _chalk = require('chalk');
|
|
271
|
+
const chalk = _chalk.default || _chalk;
|
|
272
|
+
|
|
273
|
+
console.log(chalk.bold.cyan('\n LLM API Probe Results'));
|
|
274
|
+
console.log(chalk.white(` Model: ${result.model}`));
|
|
275
|
+
console.log(chalk.white(` Provider: ${result.provider}`));
|
|
276
|
+
console.log('');
|
|
277
|
+
|
|
278
|
+
console.log(chalk.bold(' Capabilities:'));
|
|
279
|
+
console.log(chalk.white(` Reachable: ${result.capabilities.reachable ? 'Yes' : 'No'}`));
|
|
280
|
+
console.log(chalk.white(` Streaming: ${result.capabilities.streaming ? 'Yes' : 'No'}`));
|
|
281
|
+
console.log(chalk.white(` Tool use: ${result.capabilities.tools ? 'Yes' : 'No'}`));
|
|
282
|
+
console.log(chalk.white(` System prompt: ${result.capabilities.system_prompt ? 'Yes' : 'No'}`));
|
|
283
|
+
console.log('');
|
|
284
|
+
|
|
285
|
+
console.log(chalk.bold(' Latency:'));
|
|
286
|
+
console.log(chalk.white(` First token: ${result.latency.first_token_ms} ms`));
|
|
287
|
+
console.log(chalk.white(` Generation: ${result.latency.generation_ms} ms`));
|
|
288
|
+
console.log(chalk.white(` Est. tok/sec: ${result.latency.est_tokens_per_sec}`));
|
|
289
|
+
console.log('');
|
|
290
|
+
|
|
291
|
+
console.log(chalk.bold(' Tokenizer:'));
|
|
292
|
+
console.log(chalk.white(` Type: ${result.tokenizer.type}`));
|
|
293
|
+
console.log(chalk.white(` Test word: "${result.tokenizer.test_word}" -> ${result.tokenizer.estimated_tokens_for_test_word} tokens`));
|
|
294
|
+
console.log('');
|
|
295
|
+
|
|
296
|
+
console.log(chalk.bold(' Fingerprint:'));
|
|
297
|
+
console.log(chalk.white(` Self-ID: ${result.fingerprint.self_identification?.slice(0, 80)}`));
|
|
298
|
+
console.log(chalk.white(` Math correct: ${result.fingerprint.math_correct ? 'Yes' : 'No'}`));
|
|
299
|
+
console.log('');
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
module.exports = { probeModel, printProbeResult, detectProvider };
|