agent-working-memory 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -46
- package/dist/api/routes.d.ts.map +1 -1
- package/dist/api/routes.js +21 -5
- package/dist/api/routes.js.map +1 -1
- package/dist/cli.js +67 -67
- package/dist/coordination/index.d.ts +11 -0
- package/dist/coordination/index.d.ts.map +1 -0
- package/dist/coordination/index.js +39 -0
- package/dist/coordination/index.js.map +1 -0
- package/dist/coordination/mcp-tools.d.ts +8 -0
- package/dist/coordination/mcp-tools.d.ts.map +1 -0
- package/dist/coordination/mcp-tools.js +216 -0
- package/dist/coordination/mcp-tools.js.map +1 -0
- package/dist/coordination/routes.d.ts +9 -0
- package/dist/coordination/routes.d.ts.map +1 -0
- package/dist/coordination/routes.js +434 -0
- package/dist/coordination/routes.js.map +1 -0
- package/dist/coordination/schema.d.ts +12 -0
- package/dist/coordination/schema.d.ts.map +1 -0
- package/dist/coordination/schema.js +91 -0
- package/dist/coordination/schema.js.map +1 -0
- package/dist/coordination/schemas.d.ts +208 -0
- package/dist/coordination/schemas.d.ts.map +1 -0
- package/dist/coordination/schemas.js +109 -0
- package/dist/coordination/schemas.js.map +1 -0
- package/dist/coordination/stale.d.ts +25 -0
- package/dist/coordination/stale.d.ts.map +1 -0
- package/dist/coordination/stale.js +53 -0
- package/dist/coordination/stale.js.map +1 -0
- package/dist/index.js +21 -3
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +90 -79
- package/dist/mcp.js.map +1 -1
- package/dist/storage/sqlite.d.ts +3 -0
- package/dist/storage/sqlite.d.ts.map +1 -1
- package/dist/storage/sqlite.js +285 -281
- package/dist/storage/sqlite.js.map +1 -1
- package/package.json +55 -55
- package/src/api/index.ts +3 -3
- package/src/api/routes.ts +551 -536
- package/src/cli.ts +397 -397
- package/src/coordination/index.ts +47 -0
- package/src/coordination/mcp-tools.ts +313 -0
- package/src/coordination/routes.ts +656 -0
- package/src/coordination/schema.ts +94 -0
- package/src/coordination/schemas.ts +136 -0
- package/src/coordination/stale.ts +89 -0
- package/src/core/decay.ts +63 -63
- package/src/core/embeddings.ts +88 -88
- package/src/core/hebbian.ts +93 -93
- package/src/core/index.ts +5 -5
- package/src/core/logger.ts +36 -36
- package/src/core/query-expander.ts +66 -66
- package/src/core/reranker.ts +101 -101
- package/src/engine/activation.ts +656 -656
- package/src/engine/connections.ts +103 -103
- package/src/engine/consolidation-scheduler.ts +125 -125
- package/src/engine/eval.ts +102 -102
- package/src/engine/eviction.ts +101 -101
- package/src/engine/index.ts +8 -8
- package/src/engine/retraction.ts +100 -100
- package/src/engine/staging.ts +74 -74
- package/src/index.ts +137 -121
- package/src/mcp.ts +1024 -1013
- package/src/storage/index.ts +3 -3
- package/src/storage/sqlite.ts +968 -963
- package/src/types/agent.ts +67 -67
- package/src/types/checkpoint.ts +46 -46
- package/src/types/engram.ts +217 -217
- package/src/types/eval.ts +100 -100
- package/src/types/index.ts +6 -6
package/src/core/hebbian.ts
CHANGED
|
@@ -1,93 +1,93 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
/**
|
|
4
|
-
* Hebbian Learning — "neurons that fire together wire together"
|
|
5
|
-
*
|
|
6
|
-
* When two engrams are co-activated (retrieved together in the same
|
|
7
|
-
* activation query), their association weight increases.
|
|
8
|
-
*
|
|
9
|
-
* Log-space weight update prevents runaway growth:
|
|
10
|
-
* logNew = log(w) + signal * log(1 + rate)
|
|
11
|
-
*
|
|
12
|
-
* Associations decay symmetrically when unused.
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
const MIN_WEIGHT = 0.001;
|
|
16
|
-
const MAX_WEIGHT = 5.0; // Cap at 5 to prevent graph walk explosion
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Strengthen an association weight after co-activation.
|
|
20
|
-
*/
|
|
21
|
-
export function strengthenAssociation(
|
|
22
|
-
currentWeight: number,
|
|
23
|
-
signal: number = 1.0,
|
|
24
|
-
rate: number = 0.25
|
|
25
|
-
): number {
|
|
26
|
-
const logW = Math.log(Math.max(currentWeight, MIN_WEIGHT));
|
|
27
|
-
const logNew = logW + signal * Math.log(1 + rate);
|
|
28
|
-
return Math.min(Math.exp(logNew), MAX_WEIGHT);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Weaken an association weight due to lack of co-activation.
|
|
33
|
-
* Called periodically by the connection engine.
|
|
34
|
-
*/
|
|
35
|
-
export function decayAssociation(
|
|
36
|
-
currentWeight: number,
|
|
37
|
-
daysSinceActivation: number,
|
|
38
|
-
halfLife: number = 7.0 // days
|
|
39
|
-
): number {
|
|
40
|
-
const decayFactor = Math.pow(0.5, daysSinceActivation / halfLife);
|
|
41
|
-
return Math.max(currentWeight * decayFactor, MIN_WEIGHT);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Ring buffer for tracking recent co-activations.
|
|
46
|
-
* Feeds the Hebbian worker — when two engrams appear in the buffer
|
|
47
|
-
* within a window, their association is strengthened.
|
|
48
|
-
*/
|
|
49
|
-
export class CoActivationBuffer {
|
|
50
|
-
private buffer: { engramId: string; timestamp: number }[] = [];
|
|
51
|
-
private maxSize: number;
|
|
52
|
-
|
|
53
|
-
constructor(maxSize: number = 50) {
|
|
54
|
-
this.maxSize = maxSize;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
push(engramId: string): void {
|
|
58
|
-
this.buffer.push({ engramId, timestamp: Date.now() });
|
|
59
|
-
if (this.buffer.length > this.maxSize) {
|
|
60
|
-
this.buffer.shift();
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
pushBatch(engramIds: string[]): void {
|
|
65
|
-
for (const id of engramIds) {
|
|
66
|
-
this.push(id);
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Get all pairs of engrams that were co-activated within windowMs.
|
|
72
|
-
*/
|
|
73
|
-
getCoActivatedPairs(windowMs: number = 5000): [string, string][] {
|
|
74
|
-
const pairs: [string, string][] = [];
|
|
75
|
-
for (let i = 0; i < this.buffer.length; i++) {
|
|
76
|
-
for (let j = i + 1; j < this.buffer.length; j++) {
|
|
77
|
-
const a = this.buffer[i];
|
|
78
|
-
const b = this.buffer[j];
|
|
79
|
-
if (
|
|
80
|
-
a.engramId !== b.engramId &&
|
|
81
|
-
Math.abs(a.timestamp - b.timestamp) <= windowMs
|
|
82
|
-
) {
|
|
83
|
-
pairs.push([a.engramId, b.engramId]);
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
return pairs;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
clear(): void {
|
|
91
|
-
this.buffer = [];
|
|
92
|
-
}
|
|
93
|
-
}
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* Hebbian Learning — "neurons that fire together wire together"
|
|
5
|
+
*
|
|
6
|
+
* When two engrams are co-activated (retrieved together in the same
|
|
7
|
+
* activation query), their association weight increases.
|
|
8
|
+
*
|
|
9
|
+
* Log-space weight update prevents runaway growth:
|
|
10
|
+
* logNew = log(w) + signal * log(1 + rate)
|
|
11
|
+
*
|
|
12
|
+
* Associations decay symmetrically when unused.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const MIN_WEIGHT = 0.001;
|
|
16
|
+
const MAX_WEIGHT = 5.0; // Cap at 5 to prevent graph walk explosion
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Strengthen an association weight after co-activation.
|
|
20
|
+
*/
|
|
21
|
+
export function strengthenAssociation(
|
|
22
|
+
currentWeight: number,
|
|
23
|
+
signal: number = 1.0,
|
|
24
|
+
rate: number = 0.25
|
|
25
|
+
): number {
|
|
26
|
+
const logW = Math.log(Math.max(currentWeight, MIN_WEIGHT));
|
|
27
|
+
const logNew = logW + signal * Math.log(1 + rate);
|
|
28
|
+
return Math.min(Math.exp(logNew), MAX_WEIGHT);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Weaken an association weight due to lack of co-activation.
|
|
33
|
+
* Called periodically by the connection engine.
|
|
34
|
+
*/
|
|
35
|
+
export function decayAssociation(
|
|
36
|
+
currentWeight: number,
|
|
37
|
+
daysSinceActivation: number,
|
|
38
|
+
halfLife: number = 7.0 // days
|
|
39
|
+
): number {
|
|
40
|
+
const decayFactor = Math.pow(0.5, daysSinceActivation / halfLife);
|
|
41
|
+
return Math.max(currentWeight * decayFactor, MIN_WEIGHT);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Ring buffer for tracking recent co-activations.
|
|
46
|
+
* Feeds the Hebbian worker — when two engrams appear in the buffer
|
|
47
|
+
* within a window, their association is strengthened.
|
|
48
|
+
*/
|
|
49
|
+
export class CoActivationBuffer {
|
|
50
|
+
private buffer: { engramId: string; timestamp: number }[] = [];
|
|
51
|
+
private maxSize: number;
|
|
52
|
+
|
|
53
|
+
constructor(maxSize: number = 50) {
|
|
54
|
+
this.maxSize = maxSize;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
push(engramId: string): void {
|
|
58
|
+
this.buffer.push({ engramId, timestamp: Date.now() });
|
|
59
|
+
if (this.buffer.length > this.maxSize) {
|
|
60
|
+
this.buffer.shift();
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
pushBatch(engramIds: string[]): void {
|
|
65
|
+
for (const id of engramIds) {
|
|
66
|
+
this.push(id);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Get all pairs of engrams that were co-activated within windowMs.
|
|
72
|
+
*/
|
|
73
|
+
getCoActivatedPairs(windowMs: number = 5000): [string, string][] {
|
|
74
|
+
const pairs: [string, string][] = [];
|
|
75
|
+
for (let i = 0; i < this.buffer.length; i++) {
|
|
76
|
+
for (let j = i + 1; j < this.buffer.length; j++) {
|
|
77
|
+
const a = this.buffer[i];
|
|
78
|
+
const b = this.buffer[j];
|
|
79
|
+
if (
|
|
80
|
+
a.engramId !== b.engramId &&
|
|
81
|
+
Math.abs(a.timestamp - b.timestamp) <= windowMs
|
|
82
|
+
) {
|
|
83
|
+
pairs.push([a.engramId, b.engramId]);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return pairs;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
clear(): void {
|
|
91
|
+
this.buffer = [];
|
|
92
|
+
}
|
|
93
|
+
}
|
package/src/core/index.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
export * from './decay.js';
|
|
4
|
-
export * from './hebbian.js';
|
|
5
|
-
export * from './salience.js';
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
export * from './decay.js';
|
|
4
|
+
export * from './hebbian.js';
|
|
5
|
+
export * from './salience.js';
|
package/src/core/logger.ts
CHANGED
|
@@ -1,36 +1,36 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
/**
|
|
4
|
-
* Simple file logger for AWM activity.
|
|
5
|
-
*
|
|
6
|
-
* Appends one line per event to data/awm.log (next to memory.db).
|
|
7
|
-
* Format: ISO timestamp | agent | event | detail
|
|
8
|
-
*
|
|
9
|
-
* Designed for dev pilot observability — know at a glance what's happening.
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { appendFileSync, mkdirSync } from 'node:fs';
|
|
13
|
-
import { dirname, resolve } from 'node:path';
|
|
14
|
-
|
|
15
|
-
let logPath: string | null = null;
|
|
16
|
-
|
|
17
|
-
export function initLogger(dbPath: string): void {
|
|
18
|
-
const dir = dirname(resolve(dbPath));
|
|
19
|
-
mkdirSync(dir, { recursive: true });
|
|
20
|
-
logPath = resolve(dir, 'awm.log');
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export function log(agentId: string, event: string, detail: string): void {
|
|
24
|
-
if (!logPath) return;
|
|
25
|
-
const ts = new Date().toISOString();
|
|
26
|
-
const line = `${ts} | ${agentId} | ${event} | ${detail}\n`;
|
|
27
|
-
try {
|
|
28
|
-
appendFileSync(logPath, line);
|
|
29
|
-
} catch {
|
|
30
|
-
// Logging should never crash the server
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
export function getLogPath(): string | null {
|
|
35
|
-
return logPath;
|
|
36
|
-
}
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* Simple file logger for AWM activity.
|
|
5
|
+
*
|
|
6
|
+
* Appends one line per event to data/awm.log (next to memory.db).
|
|
7
|
+
* Format: ISO timestamp | agent | event | detail
|
|
8
|
+
*
|
|
9
|
+
* Designed for dev pilot observability — know at a glance what's happening.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { appendFileSync, mkdirSync } from 'node:fs';
|
|
13
|
+
import { dirname, resolve } from 'node:path';
|
|
14
|
+
|
|
15
|
+
let logPath: string | null = null;
|
|
16
|
+
|
|
17
|
+
export function initLogger(dbPath: string): void {
|
|
18
|
+
const dir = dirname(resolve(dbPath));
|
|
19
|
+
mkdirSync(dir, { recursive: true });
|
|
20
|
+
logPath = resolve(dir, 'awm.log');
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function log(agentId: string, event: string, detail: string): void {
|
|
24
|
+
if (!logPath) return;
|
|
25
|
+
const ts = new Date().toISOString();
|
|
26
|
+
const line = `${ts} | ${agentId} | ${event} | ${detail}\n`;
|
|
27
|
+
try {
|
|
28
|
+
appendFileSync(logPath, line);
|
|
29
|
+
} catch {
|
|
30
|
+
// Logging should never crash the server
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function getLogPath(): string | null {
|
|
35
|
+
return logPath;
|
|
36
|
+
}
|
|
@@ -1,66 +1,66 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
/**
|
|
4
|
-
* Query Expander — rewrites queries with synonyms and related terms.
|
|
5
|
-
*
|
|
6
|
-
* Uses Xenova/flan-t5-small (~80MB ONNX) to expand search queries
|
|
7
|
-
* with related terms that improve BM25 recall.
|
|
8
|
-
*
|
|
9
|
-
* Example: "What is Caroline's identity?" →
|
|
10
|
-
* "What is Caroline's identity? Caroline personal gender transgender self"
|
|
11
|
-
*
|
|
12
|
-
* Singleton pattern — call getExpander() to get the shared instance.
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
import { pipeline, type Text2TextGenerationPipeline } from '@huggingface/transformers';
|
|
16
|
-
|
|
17
|
-
const MODEL_ID = 'Xenova/flan-t5-small';
|
|
18
|
-
|
|
19
|
-
let instance: Text2TextGenerationPipeline | null = null;
|
|
20
|
-
let initPromise: Promise<Text2TextGenerationPipeline> | null = null;
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Get or initialize the text generation pipeline (singleton).
|
|
24
|
-
* First call downloads the model (~80MB), subsequent calls are instant.
|
|
25
|
-
*/
|
|
26
|
-
export async function getExpander(): Promise<Text2TextGenerationPipeline> {
|
|
27
|
-
if (instance) return instance;
|
|
28
|
-
if (initPromise) return initPromise;
|
|
29
|
-
|
|
30
|
-
initPromise = pipeline('text2text-generation', MODEL_ID, {
|
|
31
|
-
dtype: 'fp32',
|
|
32
|
-
}).then(pipe => {
|
|
33
|
-
instance = pipe as Text2TextGenerationPipeline;
|
|
34
|
-
console.log(`Query expander loaded: ${MODEL_ID}`);
|
|
35
|
-
return instance;
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
return initPromise;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Expand a query with related terms and synonyms.
|
|
43
|
-
* Returns the original query + generated expansion terms.
|
|
44
|
-
* Falls back to the original query on any error.
|
|
45
|
-
*/
|
|
46
|
-
export async function expandQuery(originalQuery: string): Promise<string> {
|
|
47
|
-
try {
|
|
48
|
-
const expander = await getExpander();
|
|
49
|
-
const prompt = `Expand this search query with synonyms and related terms. Only output the additional terms, not the original query. Query: ${originalQuery}. Additional terms:`;
|
|
50
|
-
|
|
51
|
-
const result = await expander(prompt, {
|
|
52
|
-
max_new_tokens: 25,
|
|
53
|
-
no_repeat_ngram_size: 2,
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
const expanded = Array.isArray(result) ? (result[0] as any)?.generated_text ?? '' : '';
|
|
57
|
-
const cleanExpanded = expanded.trim();
|
|
58
|
-
|
|
59
|
-
if (cleanExpanded && cleanExpanded.length > 2) {
|
|
60
|
-
return `${originalQuery} ${cleanExpanded}`;
|
|
61
|
-
}
|
|
62
|
-
return originalQuery;
|
|
63
|
-
} catch {
|
|
64
|
-
return originalQuery;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* Query Expander — rewrites queries with synonyms and related terms.
|
|
5
|
+
*
|
|
6
|
+
* Uses Xenova/flan-t5-small (~80MB ONNX) to expand search queries
|
|
7
|
+
* with related terms that improve BM25 recall.
|
|
8
|
+
*
|
|
9
|
+
* Example: "What is Caroline's identity?" →
|
|
10
|
+
* "What is Caroline's identity? Caroline personal gender transgender self"
|
|
11
|
+
*
|
|
12
|
+
* Singleton pattern — call getExpander() to get the shared instance.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { pipeline, type Text2TextGenerationPipeline } from '@huggingface/transformers';
|
|
16
|
+
|
|
17
|
+
const MODEL_ID = 'Xenova/flan-t5-small';
|
|
18
|
+
|
|
19
|
+
let instance: Text2TextGenerationPipeline | null = null;
|
|
20
|
+
let initPromise: Promise<Text2TextGenerationPipeline> | null = null;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Get or initialize the text generation pipeline (singleton).
|
|
24
|
+
* First call downloads the model (~80MB), subsequent calls are instant.
|
|
25
|
+
*/
|
|
26
|
+
export async function getExpander(): Promise<Text2TextGenerationPipeline> {
|
|
27
|
+
if (instance) return instance;
|
|
28
|
+
if (initPromise) return initPromise;
|
|
29
|
+
|
|
30
|
+
initPromise = pipeline('text2text-generation', MODEL_ID, {
|
|
31
|
+
dtype: 'fp32',
|
|
32
|
+
}).then(pipe => {
|
|
33
|
+
instance = pipe as Text2TextGenerationPipeline;
|
|
34
|
+
console.log(`Query expander loaded: ${MODEL_ID}`);
|
|
35
|
+
return instance;
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
return initPromise;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Expand a query with related terms and synonyms.
|
|
43
|
+
* Returns the original query + generated expansion terms.
|
|
44
|
+
* Falls back to the original query on any error.
|
|
45
|
+
*/
|
|
46
|
+
export async function expandQuery(originalQuery: string): Promise<string> {
|
|
47
|
+
try {
|
|
48
|
+
const expander = await getExpander();
|
|
49
|
+
const prompt = `Expand this search query with synonyms and related terms. Only output the additional terms, not the original query. Query: ${originalQuery}. Additional terms:`;
|
|
50
|
+
|
|
51
|
+
const result = await expander(prompt, {
|
|
52
|
+
max_new_tokens: 25,
|
|
53
|
+
no_repeat_ngram_size: 2,
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const expanded = Array.isArray(result) ? (result[0] as any)?.generated_text ?? '' : '';
|
|
57
|
+
const cleanExpanded = expanded.trim();
|
|
58
|
+
|
|
59
|
+
if (cleanExpanded && cleanExpanded.length > 2) {
|
|
60
|
+
return `${originalQuery} ${cleanExpanded}`;
|
|
61
|
+
}
|
|
62
|
+
return originalQuery;
|
|
63
|
+
} catch {
|
|
64
|
+
return originalQuery;
|
|
65
|
+
}
|
|
66
|
+
}
|
package/src/core/reranker.ts
CHANGED
|
@@ -1,101 +1,101 @@
|
|
|
1
|
-
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
/**
|
|
4
|
-
* Cross-Encoder Re-Ranker — scores (query, passage) pairs for relevance.
|
|
5
|
-
*
|
|
6
|
-
* Uses Xenova/ms-marco-MiniLM-L-6-v2 (~22MB ONNX) which is trained on
|
|
7
|
-
* MS-MARCO passage ranking. Unlike bi-encoders, cross-encoders see both
|
|
8
|
-
* query and passage together via full attention — much better at judging
|
|
9
|
-
* if a passage actually answers a question.
|
|
10
|
-
*
|
|
11
|
-
* Uses direct tokenizer + model inference (NOT the text-classification
|
|
12
|
-
* pipeline, which doesn't support text_pair and returns identical scores).
|
|
13
|
-
*
|
|
14
|
-
* Singleton pattern — call getReranker() to get the shared instance.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
import {
|
|
18
|
-
AutoTokenizer,
|
|
19
|
-
AutoModelForSequenceClassification,
|
|
20
|
-
type PreTrainedTokenizer,
|
|
21
|
-
type PreTrainedModel,
|
|
22
|
-
} from '@huggingface/transformers';
|
|
23
|
-
|
|
24
|
-
const DEFAULT_MODEL = 'Xenova/ms-marco-MiniLM-L-6-v2';
|
|
25
|
-
const MODEL_ID = process.env.AWM_RERANKER_MODEL || DEFAULT_MODEL;
|
|
26
|
-
|
|
27
|
-
let tokenizer: PreTrainedTokenizer | null = null;
|
|
28
|
-
let model: PreTrainedModel | null = null;
|
|
29
|
-
let initPromise: Promise<void> | null = null;
|
|
30
|
-
|
|
31
|
-
async function ensureLoaded(): Promise<void> {
|
|
32
|
-
if (tokenizer && model) return;
|
|
33
|
-
if (initPromise) return initPromise;
|
|
34
|
-
|
|
35
|
-
initPromise = (async () => {
|
|
36
|
-
tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
|
|
37
|
-
model = await AutoModelForSequenceClassification.from_pretrained(MODEL_ID, {
|
|
38
|
-
dtype: 'fp32',
|
|
39
|
-
});
|
|
40
|
-
console.log(`Re-ranker model loaded: ${MODEL_ID}`);
|
|
41
|
-
})();
|
|
42
|
-
|
|
43
|
-
return initPromise;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
/** Kept for backwards compat — returns the model (unused externally). */
|
|
47
|
-
export async function getReranker(): Promise<any> {
|
|
48
|
-
await ensureLoaded();
|
|
49
|
-
return model;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export interface RerankResult {
|
|
53
|
-
index: number;
|
|
54
|
-
score: number; // sigmoid-normalized relevance (0-1)
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function sigmoid(x: number): number {
|
|
58
|
-
return 1 / (1 + Math.exp(-x));
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
/**
|
|
62
|
-
* Re-rank candidate passages against a query using the cross-encoder.
|
|
63
|
-
* Returns results sorted by relevance score (descending).
|
|
64
|
-
*/
|
|
65
|
-
export async function rerank(
|
|
66
|
-
query: string,
|
|
67
|
-
passages: string[],
|
|
68
|
-
): Promise<RerankResult[]> {
|
|
69
|
-
if (passages.length === 0) return [];
|
|
70
|
-
|
|
71
|
-
await ensureLoaded();
|
|
72
|
-
|
|
73
|
-
const results: RerankResult[] = [];
|
|
74
|
-
|
|
75
|
-
for (let i = 0; i < passages.length; i++) {
|
|
76
|
-
try {
|
|
77
|
-
// Tokenize as a query-passage PAIR using text_pair
|
|
78
|
-
const inputs = tokenizer!(query, {
|
|
79
|
-
text_pair: passages[i],
|
|
80
|
-
padding: true,
|
|
81
|
-
truncation: true,
|
|
82
|
-
return_tensors: 'pt',
|
|
83
|
-
});
|
|
84
|
-
|
|
85
|
-
const output = await model!(inputs);
|
|
86
|
-
|
|
87
|
-
// Model outputs raw logits — extract the single relevance logit
|
|
88
|
-
const logits = output.logits ?? output.last_hidden_state;
|
|
89
|
-
const rawLogit = logits.data[0] as number;
|
|
90
|
-
|
|
91
|
-
// Apply sigmoid to map to 0-1 probability
|
|
92
|
-
results.push({ index: i, score: sigmoid(rawLogit) });
|
|
93
|
-
} catch {
|
|
94
|
-
results.push({ index: i, score: 0 });
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
// Sort by score descending
|
|
99
|
-
results.sort((a, b) => b.score - a.score);
|
|
100
|
-
return results;
|
|
101
|
-
}
|
|
1
|
+
// Copyright 2026 Robert Winter / Complete Ideas
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
/**
|
|
4
|
+
* Cross-Encoder Re-Ranker — scores (query, passage) pairs for relevance.
|
|
5
|
+
*
|
|
6
|
+
* Uses Xenova/ms-marco-MiniLM-L-6-v2 (~22MB ONNX) which is trained on
|
|
7
|
+
* MS-MARCO passage ranking. Unlike bi-encoders, cross-encoders see both
|
|
8
|
+
* query and passage together via full attention — much better at judging
|
|
9
|
+
* if a passage actually answers a question.
|
|
10
|
+
*
|
|
11
|
+
* Uses direct tokenizer + model inference (NOT the text-classification
|
|
12
|
+
* pipeline, which doesn't support text_pair and returns identical scores).
|
|
13
|
+
*
|
|
14
|
+
* Singleton pattern — call getReranker() to get the shared instance.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import {
|
|
18
|
+
AutoTokenizer,
|
|
19
|
+
AutoModelForSequenceClassification,
|
|
20
|
+
type PreTrainedTokenizer,
|
|
21
|
+
type PreTrainedModel,
|
|
22
|
+
} from '@huggingface/transformers';
|
|
23
|
+
|
|
24
|
+
const DEFAULT_MODEL = 'Xenova/ms-marco-MiniLM-L-6-v2';
|
|
25
|
+
const MODEL_ID = process.env.AWM_RERANKER_MODEL || DEFAULT_MODEL;
|
|
26
|
+
|
|
27
|
+
let tokenizer: PreTrainedTokenizer | null = null;
|
|
28
|
+
let model: PreTrainedModel | null = null;
|
|
29
|
+
let initPromise: Promise<void> | null = null;
|
|
30
|
+
|
|
31
|
+
async function ensureLoaded(): Promise<void> {
|
|
32
|
+
if (tokenizer && model) return;
|
|
33
|
+
if (initPromise) return initPromise;
|
|
34
|
+
|
|
35
|
+
initPromise = (async () => {
|
|
36
|
+
tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
|
|
37
|
+
model = await AutoModelForSequenceClassification.from_pretrained(MODEL_ID, {
|
|
38
|
+
dtype: 'fp32',
|
|
39
|
+
});
|
|
40
|
+
console.log(`Re-ranker model loaded: ${MODEL_ID}`);
|
|
41
|
+
})();
|
|
42
|
+
|
|
43
|
+
return initPromise;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Kept for backwards compat — returns the model (unused externally). */
|
|
47
|
+
export async function getReranker(): Promise<any> {
|
|
48
|
+
await ensureLoaded();
|
|
49
|
+
return model;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface RerankResult {
|
|
53
|
+
index: number;
|
|
54
|
+
score: number; // sigmoid-normalized relevance (0-1)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function sigmoid(x: number): number {
|
|
58
|
+
return 1 / (1 + Math.exp(-x));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Re-rank candidate passages against a query using the cross-encoder.
|
|
63
|
+
* Returns results sorted by relevance score (descending).
|
|
64
|
+
*/
|
|
65
|
+
export async function rerank(
|
|
66
|
+
query: string,
|
|
67
|
+
passages: string[],
|
|
68
|
+
): Promise<RerankResult[]> {
|
|
69
|
+
if (passages.length === 0) return [];
|
|
70
|
+
|
|
71
|
+
await ensureLoaded();
|
|
72
|
+
|
|
73
|
+
const results: RerankResult[] = [];
|
|
74
|
+
|
|
75
|
+
for (let i = 0; i < passages.length; i++) {
|
|
76
|
+
try {
|
|
77
|
+
// Tokenize as a query-passage PAIR using text_pair
|
|
78
|
+
const inputs = tokenizer!(query, {
|
|
79
|
+
text_pair: passages[i],
|
|
80
|
+
padding: true,
|
|
81
|
+
truncation: true,
|
|
82
|
+
return_tensors: 'pt',
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
const output = await model!(inputs);
|
|
86
|
+
|
|
87
|
+
// Model outputs raw logits — extract the single relevance logit
|
|
88
|
+
const logits = output.logits ?? output.last_hidden_state;
|
|
89
|
+
const rawLogit = logits.data[0] as number;
|
|
90
|
+
|
|
91
|
+
// Apply sigmoid to map to 0-1 probability
|
|
92
|
+
results.push({ index: i, score: sigmoid(rawLogit) });
|
|
93
|
+
} catch {
|
|
94
|
+
results.push({ index: i, score: 0 });
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Sort by score descending
|
|
99
|
+
results.sort((a, b) => b.score - a.score);
|
|
100
|
+
return results;
|
|
101
|
+
}
|