opengauge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/bin/opengauge.js +70 -0
- package/dist/core/optimizer/checkpoint.d.ts +37 -0
- package/dist/core/optimizer/checkpoint.d.ts.map +1 -0
- package/dist/core/optimizer/checkpoint.js +81 -0
- package/dist/core/optimizer/checkpoint.js.map +1 -0
- package/dist/core/optimizer/compressor.d.ts +41 -0
- package/dist/core/optimizer/compressor.d.ts.map +1 -0
- package/dist/core/optimizer/compressor.js +134 -0
- package/dist/core/optimizer/compressor.js.map +1 -0
- package/dist/core/optimizer/dedup.d.ts +48 -0
- package/dist/core/optimizer/dedup.d.ts.map +1 -0
- package/dist/core/optimizer/dedup.js +147 -0
- package/dist/core/optimizer/dedup.js.map +1 -0
- package/dist/core/providers/adapter.d.ts +48 -0
- package/dist/core/providers/adapter.d.ts.map +1 -0
- package/dist/core/providers/adapter.js +22 -0
- package/dist/core/providers/adapter.js.map +1 -0
- package/dist/core/providers/anthropic.d.ts +12 -0
- package/dist/core/providers/anthropic.d.ts.map +1 -0
- package/dist/core/providers/anthropic.js +155 -0
- package/dist/core/providers/anthropic.js.map +1 -0
- package/dist/core/providers/gemini.d.ts +13 -0
- package/dist/core/providers/gemini.d.ts.map +1 -0
- package/dist/core/providers/gemini.js +154 -0
- package/dist/core/providers/gemini.js.map +1 -0
- package/dist/core/providers/ollama.d.ts +11 -0
- package/dist/core/providers/ollama.d.ts.map +1 -0
- package/dist/core/providers/ollama.js +119 -0
- package/dist/core/providers/ollama.js.map +1 -0
- package/dist/core/providers/openai.d.ts +12 -0
- package/dist/core/providers/openai.d.ts.map +1 -0
- package/dist/core/providers/openai.js +169 -0
- package/dist/core/providers/openai.js.map +1 -0
- package/dist/core/rag/assembler.d.ts +47 -0
- package/dist/core/rag/assembler.d.ts.map +1 -0
- package/dist/core/rag/assembler.js +178 -0
- package/dist/core/rag/assembler.js.map +1 -0
- package/dist/core/rag/embedder.d.ts +16 -0
- package/dist/core/rag/embedder.d.ts.map +1 -0
- package/dist/core/rag/embedder.js +223 -0
- package/dist/core/rag/embedder.js.map +1 -0
- package/dist/core/rag/retriever.d.ts +20 -0
- package/dist/core/rag/retriever.d.ts.map +1 -0
- package/dist/core/rag/retriever.js +71 -0
- package/dist/core/rag/retriever.js.map +1 -0
- package/dist/db/index.d.ts +5 -0
- package/dist/db/index.d.ts.map +1 -0
- package/dist/db/index.js +48 -0
- package/dist/db/index.js.map +1 -0
- package/dist/db/queries.d.ts +72 -0
- package/dist/db/queries.d.ts.map +1 -0
- package/dist/db/queries.js +169 -0
- package/dist/db/queries.js.map +1 -0
- package/dist/db/schema.d.ts +3 -0
- package/dist/db/schema.d.ts.map +1 -0
- package/dist/db/schema.js +71 -0
- package/dist/db/schema.js.map +1 -0
- package/dist/server/config.d.ts +25 -0
- package/dist/server/config.d.ts.map +1 -0
- package/dist/server/config.js +69 -0
- package/dist/server/config.js.map +1 -0
- package/dist/server/index.d.ts +5 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +61 -0
- package/dist/server/index.js.map +1 -0
- package/dist/server/routes/index.d.ts +6 -0
- package/dist/server/routes/index.d.ts.map +1 -0
- package/dist/server/routes/index.js +272 -0
- package/dist/server/routes/index.js.map +1 -0
- package/dist/server/sse.d.ts +21 -0
- package/dist/server/sse.d.ts.map +1 -0
- package/dist/server/sse.js +40 -0
- package/dist/server/sse.js.map +1 -0
- package/dist/ui/static/app.js +515 -0
- package/dist/ui/static/index.html +13 -0
- package/dist/ui/static/styles.css +506 -0
- package/dist/ui/static/vendor.js +26 -0
- package/package.json +49 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.OpenAIProvider = void 0;
|
|
37
|
+
class OpenAIProvider {
|
|
38
|
+
name = 'openai';
|
|
39
|
+
defaultModel;
|
|
40
|
+
apiKey;
|
|
41
|
+
baseUrl;
|
|
42
|
+
constructor(config) {
|
|
43
|
+
this.apiKey = config.api_key || '';
|
|
44
|
+
this.baseUrl = config.base_url || 'https://api.openai.com';
|
|
45
|
+
this.defaultModel = config.default_model || 'gpt-4o';
|
|
46
|
+
}
|
|
47
|
+
async chat(request) {
|
|
48
|
+
const model = request.model || this.defaultModel;
|
|
49
|
+
const body = {
|
|
50
|
+
model,
|
|
51
|
+
messages: request.messages.map((m) => ({
|
|
52
|
+
role: m.role,
|
|
53
|
+
content: m.content,
|
|
54
|
+
})),
|
|
55
|
+
max_tokens: request.maxTokens || 4096,
|
|
56
|
+
};
|
|
57
|
+
if (request.temperature !== undefined) {
|
|
58
|
+
body.temperature = request.temperature;
|
|
59
|
+
}
|
|
60
|
+
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
61
|
+
method: 'POST',
|
|
62
|
+
headers: {
|
|
63
|
+
'Content-Type': 'application/json',
|
|
64
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
65
|
+
},
|
|
66
|
+
body: JSON.stringify(body),
|
|
67
|
+
});
|
|
68
|
+
if (!response.ok) {
|
|
69
|
+
const err = await response.text();
|
|
70
|
+
throw new Error(`OpenAI API error (${response.status}): ${err}`);
|
|
71
|
+
}
|
|
72
|
+
const data = await response.json();
|
|
73
|
+
const content = data.choices?.[0]?.message?.content || '';
|
|
74
|
+
return {
|
|
75
|
+
content,
|
|
76
|
+
tokensIn: data.usage?.prompt_tokens || 0,
|
|
77
|
+
tokensOut: data.usage?.completion_tokens || 0,
|
|
78
|
+
model,
|
|
79
|
+
provider: this.name,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
async *chatStream(request) {
|
|
83
|
+
const model = request.model || this.defaultModel;
|
|
84
|
+
const body = {
|
|
85
|
+
model,
|
|
86
|
+
messages: request.messages.map((m) => ({
|
|
87
|
+
role: m.role,
|
|
88
|
+
content: m.content,
|
|
89
|
+
})),
|
|
90
|
+
max_tokens: request.maxTokens || 4096,
|
|
91
|
+
stream: true,
|
|
92
|
+
stream_options: { include_usage: true },
|
|
93
|
+
};
|
|
94
|
+
if (request.temperature !== undefined) {
|
|
95
|
+
body.temperature = request.temperature;
|
|
96
|
+
}
|
|
97
|
+
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
98
|
+
method: 'POST',
|
|
99
|
+
headers: {
|
|
100
|
+
'Content-Type': 'application/json',
|
|
101
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
102
|
+
},
|
|
103
|
+
body: JSON.stringify(body),
|
|
104
|
+
});
|
|
105
|
+
if (!response.ok) {
|
|
106
|
+
const err = await response.text();
|
|
107
|
+
throw new Error(`OpenAI API error (${response.status}): ${err}`);
|
|
108
|
+
}
|
|
109
|
+
const reader = response.body?.getReader();
|
|
110
|
+
if (!reader)
|
|
111
|
+
throw new Error('No response body');
|
|
112
|
+
const decoder = new TextDecoder();
|
|
113
|
+
let buffer = '';
|
|
114
|
+
let tokensIn = 0;
|
|
115
|
+
let tokensOut = 0;
|
|
116
|
+
try {
|
|
117
|
+
while (true) {
|
|
118
|
+
const { done, value } = await reader.read();
|
|
119
|
+
if (done)
|
|
120
|
+
break;
|
|
121
|
+
buffer += decoder.decode(value, { stream: true });
|
|
122
|
+
const lines = buffer.split('\n');
|
|
123
|
+
buffer = lines.pop() || '';
|
|
124
|
+
for (const line of lines) {
|
|
125
|
+
if (!line.startsWith('data: '))
|
|
126
|
+
continue;
|
|
127
|
+
const data = line.slice(6).trim();
|
|
128
|
+
if (data === '[DONE]')
|
|
129
|
+
continue;
|
|
130
|
+
try {
|
|
131
|
+
const event = JSON.parse(data);
|
|
132
|
+
const delta = event.choices?.[0]?.delta?.content;
|
|
133
|
+
if (delta) {
|
|
134
|
+
yield { content: delta, done: false };
|
|
135
|
+
}
|
|
136
|
+
if (event.usage) {
|
|
137
|
+
tokensIn = event.usage.prompt_tokens || 0;
|
|
138
|
+
tokensOut = event.usage.completion_tokens || 0;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
// Skip malformed JSON
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
finally {
|
|
148
|
+
reader.releaseLock();
|
|
149
|
+
}
|
|
150
|
+
yield { content: '', done: true, tokensIn, tokensOut };
|
|
151
|
+
}
|
|
152
|
+
async countTokens(text) {
|
|
153
|
+
// Use tiktoken for accurate OpenAI token counting
|
|
154
|
+
try {
|
|
155
|
+
const { encoding_for_model } = await Promise.resolve().then(() => __importStar(require('tiktoken')));
|
|
156
|
+
const enc = encoding_for_model('gpt-4o');
|
|
157
|
+
const tokens = enc.encode(text);
|
|
158
|
+
const count = tokens.length;
|
|
159
|
+
enc.free();
|
|
160
|
+
return count;
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
// Fallback: ~4 chars per token
|
|
164
|
+
return Math.ceil(text.length / 4);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
exports.OpenAIProvider = OpenAIProvider;
|
|
169
|
+
//# sourceMappingURL=openai.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai.js","sourceRoot":"","sources":["../../../src/core/providers/openai.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAQA,MAAa,cAAc;IACzB,IAAI,GAAG,QAAQ,CAAC;IAChB,YAAY,CAAS;IACb,MAAM,CAAS;IACf,OAAO,CAAS;IAExB,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QACnC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,wBAAwB,CAAC;QAC3D,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,aAAa,IAAI,QAAQ,CAAC;IACvD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,OAAoB;QAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;QAEjD,MAAM,IAAI,GAAQ;YAChB,KAAK;YACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACrC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC;YACH,UAAU,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI;SACtC,CAAC;QAEF,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACtC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACzC,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;YAClE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAS,CAAC;QAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;QAE1D,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC;YACxC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC;YAC7C,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,IAAI;SACpB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,CAAC,UAAU,CAAC,OAAoB;QACpC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;QAEjD,MAAM,IAAI,GAAQ;YAChB,KAAK;YACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACrC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC;YACH,UAAU,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI;YACrC,MAAM,EAAE,IAAI;YACZ,cAAc,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE;SACxC,CAAC;QAEF,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACtC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACzC,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;YAClE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1C,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QAEjD,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;QAClC,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,IAAI,CAAC;YACH,OAAO,IAAI,EAAE,CAAC;gBACZ,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,IAAI;oBAAE,MAAM;gBAEhB,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;gBAClD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjC,MAAM,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;gBAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;wBAAE,SAAS;oBACzC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oBAClC,IAAI,IAAI,KAAK,QAAQ;wBAAE,SAAS;oBAEhC,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;wBAC/B,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC;wBAEjD,IAAI,KAAK,EAAE,CAAC;4BACV,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;wBACxC,CAAC;wBAED,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;4BAChB,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,aAAa,IAAI,CAAC,CAAC;4BAC1C,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,iBAAiB,IAAI,CAAC,CAAC;wBACjD,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,sBAAsB;oBACxB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,MAAM,CAAC,WAAW,EAAE,CAAC;QACvB,CAAC;QAED,MAAM,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY;QAC5B,kDAAkD;QAClD,IAAI,CAAC;YACH,MAAM,EAAE,kBAAkB,EAAE,GAAG,wDAAa,UAAU,GAAC,CAAC;YACxD,MAAM,GAAG,GAAG,kBAAkB,CAAC,QAAe,CAAC,CAAC;YAChD,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;YAC5B,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,KAAK,CAAC;QACf,CAAC;QAAC,MAAM,CAAC;YACP,+BAA+B;YAC/B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;CACF;AAlJD,wCAkJC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Assembler — Builds the optimal context window for LLM requests
|
|
3
|
+
*
|
|
4
|
+
* Combines:
|
|
5
|
+
* 1. System prompt
|
|
6
|
+
* 2. Retrieved relevant context (RAG)
|
|
7
|
+
* 3. Last N verbatim turns
|
|
8
|
+
* 4. Compressed checkpoint (if exists)
|
|
9
|
+
*
|
|
10
|
+
* Applies token budget to ensure we fit within the model's context window.
|
|
11
|
+
*/
|
|
12
|
+
import { ChatMessage } from '../providers/adapter';
|
|
13
|
+
import { Queries } from '../../db/queries';
|
|
14
|
+
export interface AssemblerConfig {
|
|
15
|
+
/** Maximum tokens for the context window. Default: 8000 */
|
|
16
|
+
maxContextTokens: number;
|
|
17
|
+
/** Tokens reserved for the model's response. Default: 2000 */
|
|
18
|
+
responseReserve: number;
|
|
19
|
+
/** Number of recent messages to keep verbatim. Default: 10 */
|
|
20
|
+
recentMessageCount: number;
|
|
21
|
+
/** Number of RAG results to retrieve. Default: 15 */
|
|
22
|
+
ragTopK: number;
|
|
23
|
+
/** Compression aggressiveness (0-1). Default: 0.3 */
|
|
24
|
+
compressionLevel: number;
|
|
25
|
+
/** Desired savings percentage against raw prompt tokens. Default: 50 */
|
|
26
|
+
targetSavingsPercent: number;
|
|
27
|
+
/** Minimum token budget to preserve quality/context intelligence. Default: 320 */
|
|
28
|
+
qualityFloorTokens: number;
|
|
29
|
+
}
|
|
30
|
+
export declare const DEFAULT_ASSEMBLER_CONFIG: AssemblerConfig;
|
|
31
|
+
/**
|
|
32
|
+
* Assemble the context window for an LLM request.
|
|
33
|
+
*
|
|
34
|
+
* @param userMessage - The current user message.
|
|
35
|
+
* @param conversationId - The conversation ID.
|
|
36
|
+
* @param systemPrompt - The system prompt.
|
|
37
|
+
* @param queries - Database queries instance.
|
|
38
|
+
* @param checkpointSummary - Latest checkpoint summary, if any.
|
|
39
|
+
* @param config - Assembler configuration.
|
|
40
|
+
*/
|
|
41
|
+
export declare function assembleContext(userMessage: string, conversationId: string, systemPrompt: string | null, queries: Queries, checkpointSummary: string | null, config?: AssemblerConfig): Promise<{
|
|
42
|
+
messages: ChatMessage[];
|
|
43
|
+
tokensRaw: number;
|
|
44
|
+
tokensSent: number;
|
|
45
|
+
ragResultCount: number;
|
|
46
|
+
}>;
|
|
47
|
+
//# sourceMappingURL=assembler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"assembler.d.ts","sourceRoot":"","sources":["../../../src/core/rag/assembler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAK3C,MAAM,WAAW,eAAe;IAC9B,2DAA2D;IAC3D,gBAAgB,EAAE,MAAM,CAAC;IACzB,8DAA8D;IAC9D,eAAe,EAAE,MAAM,CAAC;IACxB,8DAA8D;IAC9D,kBAAkB,EAAE,MAAM,CAAC;IAC3B,qDAAqD;IACrD,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,gBAAgB,EAAE,MAAM,CAAC;IACzB,wEAAwE;IACxE,oBAAoB,EAAE,MAAM,CAAC;IAC7B,kFAAkF;IAClF,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AAED,eAAO,MAAM,wBAAwB,EAAE,eAQtC,CAAC;AAMF;;;;;;;;;GASG;AACH,wBAAsB,eAAe,CACnC,WAAW,EAAE,MAAM,EACnB,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,GAAG,IAAI,EAC3B,OAAO,EAAE,OAAO,EAChB,iBAAiB,EAAE,MAAM,GAAG,IAAI,EAChC,MAAM,GAAE,eAA0C,GACjD,OAAO,CAAC;IACT,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAuLD"}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Context Assembler — Builds the optimal context window for LLM requests
|
|
4
|
+
*
|
|
5
|
+
* Combines:
|
|
6
|
+
* 1. System prompt
|
|
7
|
+
* 2. Retrieved relevant context (RAG)
|
|
8
|
+
* 3. Last N verbatim turns
|
|
9
|
+
* 4. Compressed checkpoint (if exists)
|
|
10
|
+
*
|
|
11
|
+
* Applies token budget to ensure we fit within the model's context window.
|
|
12
|
+
*/
|
|
13
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
exports.DEFAULT_ASSEMBLER_CONFIG = void 0;
|
|
15
|
+
exports.assembleContext = assembleContext;
|
|
16
|
+
const retriever_1 = require("./retriever");
|
|
17
|
+
const compressor_1 = require("../optimizer/compressor");
|
|
18
|
+
const dedup_1 = require("../optimizer/dedup");
|
|
19
|
+
exports.DEFAULT_ASSEMBLER_CONFIG = {
|
|
20
|
+
maxContextTokens: 8000,
|
|
21
|
+
responseReserve: 2000,
|
|
22
|
+
recentMessageCount: 6,
|
|
23
|
+
ragTopK: 15,
|
|
24
|
+
compressionLevel: 0.3,
|
|
25
|
+
targetSavingsPercent: 50,
|
|
26
|
+
qualityFloorTokens: 320,
|
|
27
|
+
};
|
|
28
|
+
function estimateTokens(text) {
|
|
29
|
+
return Math.ceil(text.split(/\s+/).length * 1.3);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Assemble the context window for an LLM request.
|
|
33
|
+
*
|
|
34
|
+
* @param userMessage - The current user message.
|
|
35
|
+
* @param conversationId - The conversation ID.
|
|
36
|
+
* @param systemPrompt - The system prompt.
|
|
37
|
+
* @param queries - Database queries instance.
|
|
38
|
+
* @param checkpointSummary - Latest checkpoint summary, if any.
|
|
39
|
+
* @param config - Assembler configuration.
|
|
40
|
+
*/
|
|
41
|
+
async function assembleContext(userMessage, conversationId, systemPrompt, queries, checkpointSummary, config = exports.DEFAULT_ASSEMBLER_CONFIG) {
|
|
42
|
+
const tokenBudget = config.maxContextTokens - config.responseReserve;
|
|
43
|
+
let tokensUsed = 0;
|
|
44
|
+
let tokensRaw = 0;
|
|
45
|
+
const assembled = [];
|
|
46
|
+
// 1. System prompt (always included)
|
|
47
|
+
if (systemPrompt) {
|
|
48
|
+
const sysTokens = estimateTokens(systemPrompt);
|
|
49
|
+
assembled.push({ role: 'system', content: systemPrompt });
|
|
50
|
+
tokensUsed += sysTokens;
|
|
51
|
+
tokensRaw += sysTokens;
|
|
52
|
+
}
|
|
53
|
+
// 2. Checkpoint summary (if exists)
|
|
54
|
+
if (checkpointSummary) {
|
|
55
|
+
const cpTokens = estimateTokens(checkpointSummary);
|
|
56
|
+
assembled.push({
|
|
57
|
+
role: 'system',
|
|
58
|
+
content: `[Conversation Summary]\n${checkpointSummary}\n[End Summary]`,
|
|
59
|
+
});
|
|
60
|
+
tokensUsed += cpTokens;
|
|
61
|
+
tokensRaw += cpTokens;
|
|
62
|
+
}
|
|
63
|
+
// 3. Retrieve relevant past context via RAG
|
|
64
|
+
let ragResults = [];
|
|
65
|
+
try {
|
|
66
|
+
ragResults = await (0, retriever_1.retrieveSimilar)(userMessage, conversationId, queries, config.ragTopK);
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
// RAG may not be available
|
|
70
|
+
}
|
|
71
|
+
// 4. Get all messages and calculate raw token count
|
|
72
|
+
const allMessages = queries.getMessages(conversationId);
|
|
73
|
+
for (const msg of allMessages) {
|
|
74
|
+
tokensRaw += estimateTokens(msg.content);
|
|
75
|
+
}
|
|
76
|
+
const historyAlreadyHasCurrentUser = allMessages.length > 0 &&
|
|
77
|
+
allMessages[allMessages.length - 1].role === 'user' &&
|
|
78
|
+
allMessages[allMessages.length - 1].content.trim() === userMessage.trim();
|
|
79
|
+
if (!historyAlreadyHasCurrentUser) {
|
|
80
|
+
tokensRaw += estimateTokens(userMessage);
|
|
81
|
+
}
|
|
82
|
+
const targetSentBySavings = Math.floor(tokensRaw * (1 - Math.min(95, Math.max(0, config.targetSavingsPercent)) / 100));
|
|
83
|
+
const effectiveBudget = Math.min(tokenBudget, Math.max(config.qualityFloorTokens, targetSentBySavings));
|
|
84
|
+
// 5. Deduplicate messages (Stage 2 of optimizer pipeline)
|
|
85
|
+
const dedupResult = (0, dedup_1.deduplicateByText)(allMessages.map((m) => ({
|
|
86
|
+
id: m.id,
|
|
87
|
+
role: m.role,
|
|
88
|
+
content: m.content,
|
|
89
|
+
created_at: m.created_at,
|
|
90
|
+
})), 0.7);
|
|
91
|
+
const dedupedMessages = dedupResult.kept;
|
|
92
|
+
const latestHistoryMessage = dedupedMessages[dedupedMessages.length - 1];
|
|
93
|
+
const hasCurrentUserInHistory = latestHistoryMessage?.role === 'user' &&
|
|
94
|
+
latestHistoryMessage?.content.trim() === userMessage.trim();
|
|
95
|
+
// Split into recent (verbatim) and older (compressible)
|
|
96
|
+
const verbatimCount = Math.min(2, config.recentMessageCount);
|
|
97
|
+
const recentMessages = dedupedMessages.slice(-verbatimCount);
|
|
98
|
+
const olderMessages = dedupedMessages.slice(0, -verbatimCount || undefined);
|
|
99
|
+
const recentIds = new Set(recentMessages.map((m) => m.id));
|
|
100
|
+
// 6. Compress older messages (Stage 1 of optimizer pipeline)
|
|
101
|
+
const compressedOlder = (0, compressor_1.compressMessages)(olderMessages.map((m) => ({ role: m.role, content: m.content })), 0, // don't preserve any recent within this set — they're already separated
|
|
102
|
+
config.compressionLevel);
|
|
103
|
+
// 7. Add RAG results (that aren't already in recent or older messages)
|
|
104
|
+
const ragTokenBudget = Math.floor((effectiveBudget - tokensUsed) * 0.2);
|
|
105
|
+
let ragTokensUsed = 0;
|
|
106
|
+
const allKeptIds = new Set([...recentIds, ...olderMessages.map((m) => m.id)]);
|
|
107
|
+
if (ragResults.length > 0) {
|
|
108
|
+
const relevantRag = ragResults.filter((r) => !allKeptIds.has(r.id));
|
|
109
|
+
if (relevantRag.length > 0) {
|
|
110
|
+
const ragContextParts = [];
|
|
111
|
+
for (const rag of relevantRag) {
|
|
112
|
+
const compressed = (0, compressor_1.compressPrompt)(rag.content, config.compressionLevel);
|
|
113
|
+
const tokens = estimateTokens(compressed.compressed);
|
|
114
|
+
if (ragTokensUsed + tokens <= ragTokenBudget) {
|
|
115
|
+
ragContextParts.push(`[${rag.role}]: ${compressed.compressed}`);
|
|
116
|
+
ragTokensUsed += tokens;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (ragContextParts.length > 0) {
|
|
120
|
+
assembled.push({
|
|
121
|
+
role: 'system',
|
|
122
|
+
content: `[Relevant Earlier Context]\n${ragContextParts.join('\n')}\n[End Context]`,
|
|
123
|
+
});
|
|
124
|
+
tokensUsed += ragTokensUsed;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// 8. Add compressed older messages
|
|
129
|
+
const olderBudget = Math.floor((effectiveBudget - tokensUsed) * 0.45);
|
|
130
|
+
let olderTokensUsed = 0;
|
|
131
|
+
for (const msg of compressedOlder) {
|
|
132
|
+
const tokens = estimateTokens(msg.content);
|
|
133
|
+
if (olderTokensUsed + tokens <= olderBudget) {
|
|
134
|
+
assembled.push({
|
|
135
|
+
role: msg.role,
|
|
136
|
+
content: msg.content,
|
|
137
|
+
});
|
|
138
|
+
olderTokensUsed += tokens;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
tokensUsed += olderTokensUsed;
|
|
142
|
+
// 9. Add recent messages (adaptive compression in aggressive mode)
|
|
143
|
+
const remainingBudget = effectiveBudget - tokensUsed;
|
|
144
|
+
let recentTokensUsed = 0;
|
|
145
|
+
const aggressiveMode = config.targetSavingsPercent >= 70;
|
|
146
|
+
for (let index = 0; index < recentMessages.length; index++) {
|
|
147
|
+
const msg = recentMessages[index];
|
|
148
|
+
const isNewestHistoryMessage = index === recentMessages.length - 1;
|
|
149
|
+
// Preserve newest user message from history verbatim for intent fidelity.
|
|
150
|
+
const shouldPreserveVerbatim = msg.role === 'system' || (isNewestHistoryMessage && msg.role === 'user');
|
|
151
|
+
let candidateContent = msg.content;
|
|
152
|
+
if (!shouldPreserveVerbatim && aggressiveMode && msg.role === 'assistant') {
|
|
153
|
+
candidateContent = (0, compressor_1.compressPrompt)(msg.content, Math.min(0.95, config.compressionLevel + 0.1)).compressed;
|
|
154
|
+
}
|
|
155
|
+
const tokens = estimateTokens(candidateContent);
|
|
156
|
+
if (recentTokensUsed + tokens <= remainingBudget) {
|
|
157
|
+
assembled.push({
|
|
158
|
+
role: msg.role,
|
|
159
|
+
content: candidateContent,
|
|
160
|
+
});
|
|
161
|
+
recentTokensUsed += tokens;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
tokensUsed += recentTokensUsed;
|
|
165
|
+
// 10. Add the current user message if not already included from history
|
|
166
|
+
if (!hasCurrentUserInHistory) {
|
|
167
|
+
assembled.push({ role: 'user', content: userMessage });
|
|
168
|
+
tokensUsed += estimateTokens(userMessage);
|
|
169
|
+
tokensRaw += estimateTokens(userMessage);
|
|
170
|
+
}
|
|
171
|
+
return {
|
|
172
|
+
messages: assembled,
|
|
173
|
+
tokensRaw,
|
|
174
|
+
tokensSent: tokensUsed,
|
|
175
|
+
ragResultCount: ragResults.filter((r) => !allKeptIds.has(r.id)).length,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
//# sourceMappingURL=assembler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"assembler.js","sourceRoot":"","sources":["../../../src/core/rag/assembler.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;GAUG;;;AAiDH,0CAmMC;AAhPD,2CAAgE;AAChE,wDAA2E;AAC3E,8CAAuD;AAmB1C,QAAA,wBAAwB,GAAoB;IACvD,gBAAgB,EAAE,IAAI;IACtB,eAAe,EAAE,IAAI;IACrB,kBAAkB,EAAE,CAAC;IACrB,OAAO,EAAE,EAAE;IACX,gBAAgB,EAAE,GAAG;IACrB,oBAAoB,EAAE,EAAE;IACxB,kBAAkB,EAAE,GAAG;CACxB,CAAC;AAEF,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;AACnD,CAAC;AAED;;;;;;;;;GASG;AACI,KAAK,UAAU,eAAe,CACnC,WAAmB,EACnB,cAAsB,EACtB,YAA2B,EAC3B,OAAgB,EAChB,iBAAgC,EAChC,SAA0B,gCAAwB;IAOlD,MAAM,WAAW,GAAG,MAAM,CAAC,gBAAgB,GAAG,MAAM,CAAC,eAAe,CAAC;IACrE,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,SAAS,GAAkB,EAAE,CAAC;IAEpC,qCAAqC;IACrC,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;QAC1D,UAAU,IAAI,SAAS,CAAC;QACxB,SAAS,IAAI,SAAS,CAAC;IACzB,CAAC;IAED,oCAAoC;IACpC,IAAI,iBAAiB,EAAE,CAAC;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,iBAAiB,CAAC,CAAC;QACnD,SAAS,CAAC,IAAI,CAAC;YACb,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,2BAA2B,iBAAiB,iBAAiB;SACvE,CAAC,CAAC;QACH,UAAU,IAAI,QAAQ,CAAC;QACvB,SAAS,IAAI,QAAQ,CAAC;IACxB,CAAC;IAED,4CAA4C;IAC5C,IAAI,UAAU,GAAuB,EAAE,CAAC;IACxC,IAAI,CAAC;QACH,UAAU,GAAG,MAAM,IAAA,2BAAe,EAChC,WAAW,EACX,cAAc,EACd,OAAO,EACP,MAAM,CAAC,OAAO,CACf,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,2BAA2B;IAC7B,CAAC;IAED,oDAAoD;IACpD,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;IACxD,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC9B,SAAS,IAAI,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3C,CAAC;IAED,MAAM,4BAA4B,GAChC,WAAW,CAAC,MAAM,GAAG,CAAC;QACtB,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM;QACnD,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;IAE5E,IAAI,CAAC,4BAA4B,EAAE,CAAC;QAClC,SAAS,IAAI,cAAc,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC;IAED,MAAM,mBAAmB,GAAG,IAAI,CAAC,KAAK,CACpC,SAAS,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,oBAAoB,CAAC,CAAC,GAAG,GAAG,CAAC,CAC/E,CAAC;IAEF,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAC9B,WAAW,EACX,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,kBAAkB,EAAE,mBAAmB,CAAC,CACzD,CAAC;IAEF,0DAA0D;IAC1D,MAAM,WAAW,GAAG,IAAA,yBAAiB,EACnC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtB,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,UAAU,EAAE,CAAC,CAAC,UAAU;KACzB,CAAC,CAAC,EACH,GAAG,CACJ,CAAC;IACF,MAAM,eAAe,GAAG,WAAW,CAAC,IAAI,CAAC;IACzC,MAAM,oBAAoB,GAAG,eAAe,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzE,MAAM,uBAAuB,GAC3B,oBAAoB,EAAE,IAAI,KAAK,MAAM;QACrC,oBAAoB,EAAE,OAAO,CAAC,IAAI,EAAE,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;IAE9D,wDAAwD;IACxD,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,kBAAkB,CAAC,CAAC;IAC7D,MAAM,cAAc,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC,aAAa,CAAC,CAAC;IAC7D,MAAM,aAAa,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,aAAa,IAAI,SAAS,CAAC,CAAC;IAC5E,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAE3D,6DAA6D;IAC7D,MAAM,eAAe,GAAG,IAAA,6BAAgB,EACtC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,EAChE,CAAC,EAAE,wEAAwE;IAC3E,MAAM,CAAC,gBAAgB,CACxB,CAAC;IAEF,uEAAuE;IACvE,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC;IACxE,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE9E,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEpE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,MAAM,eAAe,GAAa,EAAE,CAAC;YAErC,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;gBAC9B,MAAM,UAAU,GAAG,IAAA,2BAAc,EAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,gBAAgB,CAAC,CAAC;gBACxE,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;gBAErD,IAAI,aAAa,GAAG,MAAM,IAAI,cAAc,EAAE,CAAC;oBAC7C,eAAe,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC,CAAC;oBAChE,aAAa,IAAI,MAAM,CAAC;gBAC1B,CAAC;YACH,CAAC;YAED,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/B,SAAS,CAAC,IAAI,CAAC;oBACb,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,+BAA+B,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB;iBACpF,CAAC,CAAC;gBACH,UAAU,IAAI,aAAa,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,mCAAmC;IACnC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC;IACtE,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,KAAK,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC3C,IAAI,eAAe,GAAG,MAAM,IAAI,WAAW,EAAE,CAAC;YAC5C,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,GAAG,CAAC,IAAuC;gBACjD,OAAO,EAAE,GAAG,CAAC,OAAO;aACrB,CAAC,CAAC;YACH,eAAe,IAAI,MAAM,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,UAAU,IAAI,eAAe,CAAC;IAE9B,mEAAmE;IACnE,MAAM,eAAe,GAAG,eAAe,GAAG,UAAU,CAAC;IACrD,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,MAAM,cAAc,GAAG,MAAM,CAAC,oBAAoB,IAAI,EAAE,CAAC;IAEzD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,cAAc,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QAC3D,MAAM,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,sBAAsB,GAAG,KAAK,KAAK,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC;QAEnE,0EAA0E;QAC1E,MAAM,sBAAsB,GAC1B,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,sBAAsB,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;QAE3E,IAAI,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC;QACnC,IAAI,CAAC,sBAAsB,IAAI,cAAc,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAC1E,gBAAgB,GAAG,IAAA,2BAAc,EAC/B,GAAG,CAAC,OAAO,EACX,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,gBAAgB,GAAG,GAAG,CAAC,CAC9C,CAAC,UAAU,CAAC;QACf,CAAC;QAED,MAAM,MAAM,GAAG,cAAc,CAAC,gBAAgB,CAAC,CAAC;QAChD,IAAI,gBAAgB,GAAG,MAAM,IAAI,eAAe,EAAE,CAAC;YACjD,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,GAAG,CAAC,IAAuC;gBACjD,OAAO,EAAE,gBAAgB;aAC1B,CAAC,CAAC;YACH,gBAAgB,IAAI,MAAM,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,UAAU,IAAI,gBAAgB,CAAC;IAE/B,wEAAwE;IACxE,IAAI,CAAC,uBAAuB,EAAE,CAAC;QAC7B,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;QACvD,UAAU,IAAI,cAAc,CAAC,WAAW,CAAC,CAAC;QAC1C,SAAS,IAAI,cAAc,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO;QACL,QAAQ,EAAE,SAAS;QACnB,SAAS;QACT,UAAU,EAAE,UAAU;QACtB,cAAc,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM;KACvE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedder — Local embedding using all-MiniLM-L6-v2 via onnxruntime-node
|
|
3
|
+
*
|
|
4
|
+
* Zero API cost, ~23ms per embedding, fully offline.
|
|
5
|
+
* Output dimension: 384
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Generate embedding for a text using the ONNX model.
|
|
9
|
+
* Returns null if the model is not available.
|
|
10
|
+
*/
|
|
11
|
+
export declare function embed(text: string): Promise<Float32Array | null>;
|
|
12
|
+
/**
|
|
13
|
+
* Batch embed multiple texts.
|
|
14
|
+
*/
|
|
15
|
+
export declare function embedBatch(texts: string[]): Promise<(Float32Array | null)[]>;
|
|
16
|
+
//# sourceMappingURL=embedder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../../src/core/rag/embedder.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAmGH;;;GAGG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,CAuDtE;AA2CD;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,YAAY,GAAG,IAAI,CAAC,EAAE,CAAC,CAMlF"}
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Embedder — Local embedding using all-MiniLM-L6-v2 via onnxruntime-node
|
|
4
|
+
*
|
|
5
|
+
* Zero API cost, ~23ms per embedding, fully offline.
|
|
6
|
+
* Output dimension: 384
|
|
7
|
+
*/
|
|
8
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
9
|
+
if (k2 === undefined) k2 = k;
|
|
10
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
11
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
12
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
13
|
+
}
|
|
14
|
+
Object.defineProperty(o, k2, desc);
|
|
15
|
+
}) : (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
o[k2] = m[k];
|
|
18
|
+
}));
|
|
19
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
20
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
21
|
+
}) : function(o, v) {
|
|
22
|
+
o["default"] = v;
|
|
23
|
+
});
|
|
24
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
25
|
+
var ownKeys = function(o) {
|
|
26
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
27
|
+
var ar = [];
|
|
28
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
29
|
+
return ar;
|
|
30
|
+
};
|
|
31
|
+
return ownKeys(o);
|
|
32
|
+
};
|
|
33
|
+
return function (mod) {
|
|
34
|
+
if (mod && mod.__esModule) return mod;
|
|
35
|
+
var result = {};
|
|
36
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
37
|
+
__setModuleDefault(result, mod);
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
40
|
+
})();
|
|
41
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
42
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.embed = embed;
|
|
46
|
+
exports.embedBatch = embedBatch;
|
|
47
|
+
const path_1 = __importDefault(require("path"));
|
|
48
|
+
const fs_1 = __importDefault(require("fs"));
|
|
49
|
+
const os_1 = __importDefault(require("os"));
|
|
50
|
+
let session = null;
|
|
51
|
+
let sessionChecked = false;
|
|
52
|
+
let tokenizer = null;
|
|
53
|
+
// Simple word-piece tokenizer for MiniLM
|
|
54
|
+
// In production, you'd use the actual tokenizer from the model
|
|
55
|
+
// For now, we use a simplified approach
|
|
56
|
+
const MAX_SEQ_LENGTH = 256;
|
|
57
|
+
/**
|
|
58
|
+
* Initialize the ONNX runtime session with MiniLM-L6-v2.
|
|
59
|
+
* Caches the result (including "not available") to avoid repeated checks.
|
|
60
|
+
*/
|
|
61
|
+
async function getSession() {
|
|
62
|
+
if (session)
|
|
63
|
+
return session;
|
|
64
|
+
if (sessionChecked)
|
|
65
|
+
return null; // Already checked, model not available
|
|
66
|
+
sessionChecked = true;
|
|
67
|
+
try {
|
|
68
|
+
const ort = await Promise.resolve().then(() => __importStar(require('onnxruntime-node')));
|
|
69
|
+
// Check for model in ~/.opengauge/models/
|
|
70
|
+
const modelDir = path_1.default.join(os_1.default.homedir(), '.opengauge', 'models');
|
|
71
|
+
const modelPath = path_1.default.join(modelDir, 'all-MiniLM-L6-v2.onnx');
|
|
72
|
+
if (!fs_1.default.existsSync(modelPath)) {
|
|
73
|
+
console.log('[OpenGauge] Embedding model not found. Using fallback hash embeddings.');
|
|
74
|
+
console.log(` To enable full semantic search, place all-MiniLM-L6-v2.onnx in ${modelDir}`);
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
session = await ort.InferenceSession.create(modelPath, {
|
|
78
|
+
executionProviders: ['cpu'],
|
|
79
|
+
});
|
|
80
|
+
console.log('[OpenGauge] ONNX embedding model loaded successfully.');
|
|
81
|
+
return session;
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
console.warn('[OpenGauge] Failed to initialize ONNX embedding model. Using fallback.', error);
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Simple tokenization: split on whitespace and punctuation,
|
|
90
|
+
* convert to indices. This is a fallback; a proper tokenizer
|
|
91
|
+
* would use the model's vocabulary.
|
|
92
|
+
*/
|
|
93
|
+
function simpleTokenize(text) {
|
|
94
|
+
// Simple whitespace tokenization + padding/truncation
|
|
95
|
+
const words = text
|
|
96
|
+
.toLowerCase()
|
|
97
|
+
.replace(/[^\w\s]/g, ' ')
|
|
98
|
+
.split(/\s+/)
|
|
99
|
+
.filter((w) => w.length > 0)
|
|
100
|
+
.slice(0, MAX_SEQ_LENGTH - 2);
|
|
101
|
+
const seqLength = words.length + 2; // [CLS] + words + [SEP]
|
|
102
|
+
const inputIds = new BigInt64Array(seqLength);
|
|
103
|
+
const attentionMask = new BigInt64Array(seqLength);
|
|
104
|
+
const tokenTypeIds = new BigInt64Array(seqLength);
|
|
105
|
+
// [CLS] token = 101
|
|
106
|
+
inputIds[0] = BigInt(101);
|
|
107
|
+
attentionMask[0] = BigInt(1);
|
|
108
|
+
// Map words to pseudo token IDs using hash
|
|
109
|
+
for (let i = 0; i < words.length; i++) {
|
|
110
|
+
inputIds[i + 1] = BigInt(hashWord(words[i]));
|
|
111
|
+
attentionMask[i + 1] = BigInt(1);
|
|
112
|
+
}
|
|
113
|
+
// [SEP] token = 102
|
|
114
|
+
inputIds[seqLength - 1] = BigInt(102);
|
|
115
|
+
attentionMask[seqLength - 1] = BigInt(1);
|
|
116
|
+
return { inputIds, attentionMask, tokenTypeIds };
|
|
117
|
+
}
|
|
118
|
+
function hashWord(word) {
|
|
119
|
+
let hash = 0;
|
|
120
|
+
for (let i = 0; i < word.length; i++) {
|
|
121
|
+
hash = ((hash << 5) - hash + word.charCodeAt(i)) & 0x7fff;
|
|
122
|
+
}
|
|
123
|
+
return (hash % 30000) + 1000; // Keep in vocab range
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Generate embedding for a text using the ONNX model.
|
|
127
|
+
* Returns null if the model is not available.
|
|
128
|
+
*/
|
|
129
|
+
async function embed(text) {
|
|
130
|
+
const sess = await getSession();
|
|
131
|
+
if (!sess) {
|
|
132
|
+
// Fallback: generate a simple TF-IDF-like hash embedding
|
|
133
|
+
return fallbackEmbed(text);
|
|
134
|
+
}
|
|
135
|
+
try {
|
|
136
|
+
const ort = await Promise.resolve().then(() => __importStar(require('onnxruntime-node')));
|
|
137
|
+
const { inputIds, attentionMask, tokenTypeIds } = simpleTokenize(text);
|
|
138
|
+
const feeds = {
|
|
139
|
+
input_ids: new ort.Tensor('int64', inputIds, [1, inputIds.length]),
|
|
140
|
+
attention_mask: new ort.Tensor('int64', attentionMask, [1, attentionMask.length]),
|
|
141
|
+
token_type_ids: new ort.Tensor('int64', tokenTypeIds, [1, tokenTypeIds.length]),
|
|
142
|
+
};
|
|
143
|
+
const results = await sess.run(feeds);
|
|
144
|
+
// Mean pooling over token embeddings
|
|
145
|
+
const output = results['last_hidden_state'] || results['token_embeddings'] || Object.values(results)[0];
|
|
146
|
+
const data = output.data;
|
|
147
|
+
const hiddenSize = 384;
|
|
148
|
+
const numTokens = inputIds.length;
|
|
149
|
+
const embedding = new Float32Array(hiddenSize);
|
|
150
|
+
for (let i = 0; i < numTokens; i++) {
|
|
151
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
152
|
+
embedding[j] += data[i * hiddenSize + j];
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
// Average
|
|
156
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
157
|
+
embedding[j] /= numTokens;
|
|
158
|
+
}
|
|
159
|
+
// L2 normalize
|
|
160
|
+
let norm = 0;
|
|
161
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
162
|
+
norm += embedding[j] * embedding[j];
|
|
163
|
+
}
|
|
164
|
+
norm = Math.sqrt(norm);
|
|
165
|
+
if (norm > 0) {
|
|
166
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
167
|
+
embedding[j] /= norm;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return embedding;
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
console.warn('Embedding failed, using fallback:', error);
|
|
174
|
+
return fallbackEmbed(text);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Fallback embedding using character n-gram hashing.
|
|
179
|
+
* Produces a 384-dim vector. Not as good as MiniLM but enables
|
|
180
|
+
* basic similarity search without the ONNX model.
|
|
181
|
+
*/
|
|
182
|
+
function fallbackEmbed(text) {
|
|
183
|
+
const DIM = 384;
|
|
184
|
+
const embedding = new Float32Array(DIM);
|
|
185
|
+
const normalized = text.toLowerCase().replace(/[^\w\s]/g, '');
|
|
186
|
+
const words = normalized.split(/\s+/);
|
|
187
|
+
// Hash words into the embedding dimensions
|
|
188
|
+
for (const word of words) {
|
|
189
|
+
for (let n = 1; n <= 3; n++) {
|
|
190
|
+
for (let i = 0; i <= word.length - n; i++) {
|
|
191
|
+
const ngram = word.substring(i, i + n);
|
|
192
|
+
let hash = 0;
|
|
193
|
+
for (let c = 0; c < ngram.length; c++) {
|
|
194
|
+
hash = (hash * 31 + ngram.charCodeAt(c)) % DIM;
|
|
195
|
+
}
|
|
196
|
+
embedding[hash] += 1;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
// L2 normalize
|
|
201
|
+
let norm = 0;
|
|
202
|
+
for (let i = 0; i < DIM; i++) {
|
|
203
|
+
norm += embedding[i] * embedding[i];
|
|
204
|
+
}
|
|
205
|
+
norm = Math.sqrt(norm);
|
|
206
|
+
if (norm > 0) {
|
|
207
|
+
for (let i = 0; i < DIM; i++) {
|
|
208
|
+
embedding[i] /= norm;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return embedding;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Batch embed multiple texts.
|
|
215
|
+
*/
|
|
216
|
+
async function embedBatch(texts) {
|
|
217
|
+
const results = [];
|
|
218
|
+
for (const text of texts) {
|
|
219
|
+
results.push(await embed(text));
|
|
220
|
+
}
|
|
221
|
+
return results;
|
|
222
|
+
}
|
|
223
|
+
//# sourceMappingURL=embedder.js.map
|