opengauge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +64 -0
  2. package/bin/opengauge.js +70 -0
  3. package/dist/core/optimizer/checkpoint.d.ts +37 -0
  4. package/dist/core/optimizer/checkpoint.d.ts.map +1 -0
  5. package/dist/core/optimizer/checkpoint.js +81 -0
  6. package/dist/core/optimizer/checkpoint.js.map +1 -0
  7. package/dist/core/optimizer/compressor.d.ts +41 -0
  8. package/dist/core/optimizer/compressor.d.ts.map +1 -0
  9. package/dist/core/optimizer/compressor.js +134 -0
  10. package/dist/core/optimizer/compressor.js.map +1 -0
  11. package/dist/core/optimizer/dedup.d.ts +48 -0
  12. package/dist/core/optimizer/dedup.d.ts.map +1 -0
  13. package/dist/core/optimizer/dedup.js +147 -0
  14. package/dist/core/optimizer/dedup.js.map +1 -0
  15. package/dist/core/providers/adapter.d.ts +48 -0
  16. package/dist/core/providers/adapter.d.ts.map +1 -0
  17. package/dist/core/providers/adapter.js +22 -0
  18. package/dist/core/providers/adapter.js.map +1 -0
  19. package/dist/core/providers/anthropic.d.ts +12 -0
  20. package/dist/core/providers/anthropic.d.ts.map +1 -0
  21. package/dist/core/providers/anthropic.js +155 -0
  22. package/dist/core/providers/anthropic.js.map +1 -0
  23. package/dist/core/providers/gemini.d.ts +13 -0
  24. package/dist/core/providers/gemini.d.ts.map +1 -0
  25. package/dist/core/providers/gemini.js +154 -0
  26. package/dist/core/providers/gemini.js.map +1 -0
  27. package/dist/core/providers/ollama.d.ts +11 -0
  28. package/dist/core/providers/ollama.d.ts.map +1 -0
  29. package/dist/core/providers/ollama.js +119 -0
  30. package/dist/core/providers/ollama.js.map +1 -0
  31. package/dist/core/providers/openai.d.ts +12 -0
  32. package/dist/core/providers/openai.d.ts.map +1 -0
  33. package/dist/core/providers/openai.js +169 -0
  34. package/dist/core/providers/openai.js.map +1 -0
  35. package/dist/core/rag/assembler.d.ts +47 -0
  36. package/dist/core/rag/assembler.d.ts.map +1 -0
  37. package/dist/core/rag/assembler.js +178 -0
  38. package/dist/core/rag/assembler.js.map +1 -0
  39. package/dist/core/rag/embedder.d.ts +16 -0
  40. package/dist/core/rag/embedder.d.ts.map +1 -0
  41. package/dist/core/rag/embedder.js +223 -0
  42. package/dist/core/rag/embedder.js.map +1 -0
  43. package/dist/core/rag/retriever.d.ts +20 -0
  44. package/dist/core/rag/retriever.d.ts.map +1 -0
  45. package/dist/core/rag/retriever.js +71 -0
  46. package/dist/core/rag/retriever.js.map +1 -0
  47. package/dist/db/index.d.ts +5 -0
  48. package/dist/db/index.d.ts.map +1 -0
  49. package/dist/db/index.js +48 -0
  50. package/dist/db/index.js.map +1 -0
  51. package/dist/db/queries.d.ts +72 -0
  52. package/dist/db/queries.d.ts.map +1 -0
  53. package/dist/db/queries.js +169 -0
  54. package/dist/db/queries.js.map +1 -0
  55. package/dist/db/schema.d.ts +3 -0
  56. package/dist/db/schema.d.ts.map +1 -0
  57. package/dist/db/schema.js +71 -0
  58. package/dist/db/schema.js.map +1 -0
  59. package/dist/server/config.d.ts +25 -0
  60. package/dist/server/config.d.ts.map +1 -0
  61. package/dist/server/config.js +69 -0
  62. package/dist/server/config.js.map +1 -0
  63. package/dist/server/index.d.ts +5 -0
  64. package/dist/server/index.d.ts.map +1 -0
  65. package/dist/server/index.js +61 -0
  66. package/dist/server/index.js.map +1 -0
  67. package/dist/server/routes/index.d.ts +6 -0
  68. package/dist/server/routes/index.d.ts.map +1 -0
  69. package/dist/server/routes/index.js +272 -0
  70. package/dist/server/routes/index.js.map +1 -0
  71. package/dist/server/sse.d.ts +21 -0
  72. package/dist/server/sse.d.ts.map +1 -0
  73. package/dist/server/sse.js +40 -0
  74. package/dist/server/sse.js.map +1 -0
  75. package/dist/ui/static/app.js +515 -0
  76. package/dist/ui/static/index.html +13 -0
  77. package/dist/ui/static/styles.css +506 -0
  78. package/dist/ui/static/vendor.js +26 -0
  79. package/package.json +49 -0
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.OpenAIProvider = void 0;
37
+ class OpenAIProvider {
38
+ name = 'openai';
39
+ defaultModel;
40
+ apiKey;
41
+ baseUrl;
42
+ constructor(config) {
43
+ this.apiKey = config.api_key || '';
44
+ this.baseUrl = config.base_url || 'https://api.openai.com';
45
+ this.defaultModel = config.default_model || 'gpt-4o';
46
+ }
47
+ async chat(request) {
48
+ const model = request.model || this.defaultModel;
49
+ const body = {
50
+ model,
51
+ messages: request.messages.map((m) => ({
52
+ role: m.role,
53
+ content: m.content,
54
+ })),
55
+ max_tokens: request.maxTokens || 4096,
56
+ };
57
+ if (request.temperature !== undefined) {
58
+ body.temperature = request.temperature;
59
+ }
60
+ const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
61
+ method: 'POST',
62
+ headers: {
63
+ 'Content-Type': 'application/json',
64
+ Authorization: `Bearer ${this.apiKey}`,
65
+ },
66
+ body: JSON.stringify(body),
67
+ });
68
+ if (!response.ok) {
69
+ const err = await response.text();
70
+ throw new Error(`OpenAI API error (${response.status}): ${err}`);
71
+ }
72
+ const data = await response.json();
73
+ const content = data.choices?.[0]?.message?.content || '';
74
+ return {
75
+ content,
76
+ tokensIn: data.usage?.prompt_tokens || 0,
77
+ tokensOut: data.usage?.completion_tokens || 0,
78
+ model,
79
+ provider: this.name,
80
+ };
81
+ }
82
+ async *chatStream(request) {
83
+ const model = request.model || this.defaultModel;
84
+ const body = {
85
+ model,
86
+ messages: request.messages.map((m) => ({
87
+ role: m.role,
88
+ content: m.content,
89
+ })),
90
+ max_tokens: request.maxTokens || 4096,
91
+ stream: true,
92
+ stream_options: { include_usage: true },
93
+ };
94
+ if (request.temperature !== undefined) {
95
+ body.temperature = request.temperature;
96
+ }
97
+ const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
98
+ method: 'POST',
99
+ headers: {
100
+ 'Content-Type': 'application/json',
101
+ Authorization: `Bearer ${this.apiKey}`,
102
+ },
103
+ body: JSON.stringify(body),
104
+ });
105
+ if (!response.ok) {
106
+ const err = await response.text();
107
+ throw new Error(`OpenAI API error (${response.status}): ${err}`);
108
+ }
109
+ const reader = response.body?.getReader();
110
+ if (!reader)
111
+ throw new Error('No response body');
112
+ const decoder = new TextDecoder();
113
+ let buffer = '';
114
+ let tokensIn = 0;
115
+ let tokensOut = 0;
116
+ try {
117
+ while (true) {
118
+ const { done, value } = await reader.read();
119
+ if (done)
120
+ break;
121
+ buffer += decoder.decode(value, { stream: true });
122
+ const lines = buffer.split('\n');
123
+ buffer = lines.pop() || '';
124
+ for (const line of lines) {
125
+ if (!line.startsWith('data: '))
126
+ continue;
127
+ const data = line.slice(6).trim();
128
+ if (data === '[DONE]')
129
+ continue;
130
+ try {
131
+ const event = JSON.parse(data);
132
+ const delta = event.choices?.[0]?.delta?.content;
133
+ if (delta) {
134
+ yield { content: delta, done: false };
135
+ }
136
+ if (event.usage) {
137
+ tokensIn = event.usage.prompt_tokens || 0;
138
+ tokensOut = event.usage.completion_tokens || 0;
139
+ }
140
+ }
141
+ catch {
142
+ // Skip malformed JSON
143
+ }
144
+ }
145
+ }
146
+ }
147
+ finally {
148
+ reader.releaseLock();
149
+ }
150
+ yield { content: '', done: true, tokensIn, tokensOut };
151
+ }
152
+ async countTokens(text) {
153
+ // Use tiktoken for accurate OpenAI token counting
154
+ try {
155
+ const { encoding_for_model } = await Promise.resolve().then(() => __importStar(require('tiktoken')));
156
+ const enc = encoding_for_model('gpt-4o');
157
+ const tokens = enc.encode(text);
158
+ const count = tokens.length;
159
+ enc.free();
160
+ return count;
161
+ }
162
+ catch {
163
+ // Fallback: ~4 chars per token
164
+ return Math.ceil(text.length / 4);
165
+ }
166
+ }
167
+ }
168
+ exports.OpenAIProvider = OpenAIProvider;
169
+ //# sourceMappingURL=openai.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openai.js","sourceRoot":"","sources":["../../../src/core/providers/openai.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAQA,MAAa,cAAc;IACzB,IAAI,GAAG,QAAQ,CAAC;IAChB,YAAY,CAAS;IACb,MAAM,CAAS;IACf,OAAO,CAAS;IAExB,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QACnC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,wBAAwB,CAAC;QAC3D,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,aAAa,IAAI,QAAQ,CAAC;IACvD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,OAAoB;QAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;QAEjD,MAAM,IAAI,GAAQ;YAChB,KAAK;YACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACrC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC;YACH,UAAU,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI;SACtC,CAAC;QAEF,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACtC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACzC,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;YAClE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAS,CAAC;QAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;QAE1D,OAAO;YACL,OAAO;YACP,QAAQ,EAAE,IAAI,CAAC,KAAK,EAAE,aAAa,IAAI,CAAC;YACxC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,iBAAiB,IAAI,CAAC;YAC7C,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,IAAI;SACpB,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,CAAC,UAAU,CAAC,OAAoB;QACpC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;QAEjD,MAAM,IAAI,GAAQ;YAChB,KAAK;YACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACrC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC;YACH,UAAU,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI;YACrC,MAAM,EAAE,IAAI;YACZ,cAAc,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE;SACxC,CAAC;QAEF,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACtC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACzC,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,sBAAsB,EAAE;YAClE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QACnE,CAAC;QAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1C,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QAEjD,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;QAClC,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,IAAI,CAAC;YACH,OAAO,IAAI,EAAE,CAAC;gBACZ,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC5C,IAAI,IAAI;oBAAE,MAAM;gBAEhB,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;gBAClD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACjC,MAAM,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;gBAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;wBAAE,SAAS;oBACzC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oBAClC,IAAI,IAAI,KAAK,QAAQ;wBAAE,SAAS;oBAEhC,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;wBAC/B,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC;wBAEjD,IAAI,KAAK,EAAE,CAAC;4BACV,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;wBACxC,CAAC;wBAED,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;4BAChB,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,aAAa,IAAI,CAAC,CAAC;4BAC1C,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,iBAAiB,IAAI,CAAC,CAAC;wBACjD,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,sBAAsB;oBACxB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,MAAM,CAAC,WAAW,EAAE,CAAC;QACvB,CAAC;QAED,MAAM,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY;QAC5B,kDAAkD;QAClD,IAAI,CAAC;YACH,MAAM,EAAE,kBAAkB,EAAE,GAAG,wDAAa,UAAU,GAAC,CAAC;YACxD,MAAM,GAAG,GAAG,kBAAkB,CAAC,QAAe,CAAC,CAAC;YAChD,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;YAC5B,GAAG,CAAC,IAAI,EAAE,CAAC;YACX,OAAO,KAAK,CAAC;QACf,CAAC;QAAC,MAAM,CAAC;YACP,+BAA+B;YAC/B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;CACF;AAlJD,wCAkJC"}
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Context Assembler — Builds the optimal context window for LLM requests
3
+ *
4
+ * Combines:
5
+ * 1. System prompt
6
+ * 2. Retrieved relevant context (RAG)
7
+ * 3. Last N verbatim turns
8
+ * 4. Compressed checkpoint (if exists)
9
+ *
10
+ * Applies token budget to ensure we fit within the model's context window.
11
+ */
12
+ import { ChatMessage } from '../providers/adapter';
13
+ import { Queries } from '../../db/queries';
14
+ export interface AssemblerConfig {
15
+ /** Maximum tokens for the context window. Default: 8000 */
16
+ maxContextTokens: number;
17
+ /** Tokens reserved for the model's response. Default: 2000 */
18
+ responseReserve: number;
19
+ /** Number of recent messages to keep verbatim. Default: 10 */
20
+ recentMessageCount: number;
21
+ /** Number of RAG results to retrieve. Default: 15 */
22
+ ragTopK: number;
23
+ /** Compression aggressiveness (0-1). Default: 0.3 */
24
+ compressionLevel: number;
25
+ /** Desired savings percentage against raw prompt tokens. Default: 50 */
26
+ targetSavingsPercent: number;
27
+ /** Minimum token budget to preserve quality/context intelligence. Default: 320 */
28
+ qualityFloorTokens: number;
29
+ }
30
+ export declare const DEFAULT_ASSEMBLER_CONFIG: AssemblerConfig;
31
+ /**
32
+ * Assemble the context window for an LLM request.
33
+ *
34
+ * @param userMessage - The current user message.
35
+ * @param conversationId - The conversation ID.
36
+ * @param systemPrompt - The system prompt.
37
+ * @param queries - Database queries instance.
38
+ * @param checkpointSummary - Latest checkpoint summary, if any.
39
+ * @param config - Assembler configuration.
40
+ */
41
+ export declare function assembleContext(userMessage: string, conversationId: string, systemPrompt: string | null, queries: Queries, checkpointSummary: string | null, config?: AssemblerConfig): Promise<{
42
+ messages: ChatMessage[];
43
+ tokensRaw: number;
44
+ tokensSent: number;
45
+ ragResultCount: number;
46
+ }>;
47
+ //# sourceMappingURL=assembler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"assembler.d.ts","sourceRoot":"","sources":["../../../src/core/rag/assembler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAK3C,MAAM,WAAW,eAAe;IAC9B,2DAA2D;IAC3D,gBAAgB,EAAE,MAAM,CAAC;IACzB,8DAA8D;IAC9D,eAAe,EAAE,MAAM,CAAC;IACxB,8DAA8D;IAC9D,kBAAkB,EAAE,MAAM,CAAC;IAC3B,qDAAqD;IACrD,OAAO,EAAE,MAAM,CAAC;IAChB,qDAAqD;IACrD,gBAAgB,EAAE,MAAM,CAAC;IACzB,wEAAwE;IACxE,oBAAoB,EAAE,MAAM,CAAC;IAC7B,kFAAkF;IAClF,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AAED,eAAO,MAAM,wBAAwB,EAAE,eAQtC,CAAC;AAMF;;;;;;;;;GASG;AACH,wBAAsB,eAAe,CACnC,WAAW,EAAE,MAAM,EACnB,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,GAAG,IAAI,EAC3B,OAAO,EAAE,OAAO,EAChB,iBAAiB,EAAE,MAAM,GAAG,IAAI,EAChC,MAAM,GAAE,eAA0C,GACjD,OAAO,CAAC;IACT,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC,CAuLD"}
@@ -0,0 +1,178 @@
1
+ "use strict";
2
+ /**
3
+ * Context Assembler — Builds the optimal context window for LLM requests
4
+ *
5
+ * Combines:
6
+ * 1. System prompt
7
+ * 2. Retrieved relevant context (RAG)
8
+ * 3. Last N verbatim turns
9
+ * 4. Compressed checkpoint (if exists)
10
+ *
11
+ * Applies token budget to ensure we fit within the model's context window.
12
+ */
13
+ Object.defineProperty(exports, "__esModule", { value: true });
14
+ exports.DEFAULT_ASSEMBLER_CONFIG = void 0;
15
+ exports.assembleContext = assembleContext;
16
+ const retriever_1 = require("./retriever");
17
+ const compressor_1 = require("../optimizer/compressor");
18
+ const dedup_1 = require("../optimizer/dedup");
19
+ exports.DEFAULT_ASSEMBLER_CONFIG = {
20
+ maxContextTokens: 8000,
21
+ responseReserve: 2000,
22
+ recentMessageCount: 6,
23
+ ragTopK: 15,
24
+ compressionLevel: 0.3,
25
+ targetSavingsPercent: 50,
26
+ qualityFloorTokens: 320,
27
+ };
28
+ function estimateTokens(text) {
29
+ return Math.ceil(text.split(/\s+/).length * 1.3);
30
+ }
31
+ /**
32
+ * Assemble the context window for an LLM request.
33
+ *
34
+ * @param userMessage - The current user message.
35
+ * @param conversationId - The conversation ID.
36
+ * @param systemPrompt - The system prompt.
37
+ * @param queries - Database queries instance.
38
+ * @param checkpointSummary - Latest checkpoint summary, if any.
39
+ * @param config - Assembler configuration.
40
+ */
41
+ async function assembleContext(userMessage, conversationId, systemPrompt, queries, checkpointSummary, config = exports.DEFAULT_ASSEMBLER_CONFIG) {
42
+ const tokenBudget = config.maxContextTokens - config.responseReserve;
43
+ let tokensUsed = 0;
44
+ let tokensRaw = 0;
45
+ const assembled = [];
46
+ // 1. System prompt (always included)
47
+ if (systemPrompt) {
48
+ const sysTokens = estimateTokens(systemPrompt);
49
+ assembled.push({ role: 'system', content: systemPrompt });
50
+ tokensUsed += sysTokens;
51
+ tokensRaw += sysTokens;
52
+ }
53
+ // 2. Checkpoint summary (if exists)
54
+ if (checkpointSummary) {
55
+ const cpTokens = estimateTokens(checkpointSummary);
56
+ assembled.push({
57
+ role: 'system',
58
+ content: `[Conversation Summary]\n${checkpointSummary}\n[End Summary]`,
59
+ });
60
+ tokensUsed += cpTokens;
61
+ tokensRaw += cpTokens;
62
+ }
63
+ // 3. Retrieve relevant past context via RAG
64
+ let ragResults = [];
65
+ try {
66
+ ragResults = await (0, retriever_1.retrieveSimilar)(userMessage, conversationId, queries, config.ragTopK);
67
+ }
68
+ catch {
69
+ // RAG may not be available
70
+ }
71
+ // 4. Get all messages and calculate raw token count
72
+ const allMessages = queries.getMessages(conversationId);
73
+ for (const msg of allMessages) {
74
+ tokensRaw += estimateTokens(msg.content);
75
+ }
76
+ const historyAlreadyHasCurrentUser = allMessages.length > 0 &&
77
+ allMessages[allMessages.length - 1].role === 'user' &&
78
+ allMessages[allMessages.length - 1].content.trim() === userMessage.trim();
79
+ if (!historyAlreadyHasCurrentUser) {
80
+ tokensRaw += estimateTokens(userMessage);
81
+ }
82
+ const targetSentBySavings = Math.floor(tokensRaw * (1 - Math.min(95, Math.max(0, config.targetSavingsPercent)) / 100));
83
+ const effectiveBudget = Math.min(tokenBudget, Math.max(config.qualityFloorTokens, targetSentBySavings));
84
+ // 5. Deduplicate messages (Stage 2 of optimizer pipeline)
85
+ const dedupResult = (0, dedup_1.deduplicateByText)(allMessages.map((m) => ({
86
+ id: m.id,
87
+ role: m.role,
88
+ content: m.content,
89
+ created_at: m.created_at,
90
+ })), 0.7);
91
+ const dedupedMessages = dedupResult.kept;
92
+ const latestHistoryMessage = dedupedMessages[dedupedMessages.length - 1];
93
+ const hasCurrentUserInHistory = latestHistoryMessage?.role === 'user' &&
94
+ latestHistoryMessage?.content.trim() === userMessage.trim();
95
+ // Split into recent (verbatim) and older (compressible)
96
+ const verbatimCount = Math.min(2, config.recentMessageCount);
97
+ const recentMessages = dedupedMessages.slice(-verbatimCount);
98
+ const olderMessages = dedupedMessages.slice(0, -verbatimCount || undefined);
99
+ const recentIds = new Set(recentMessages.map((m) => m.id));
100
+ // 6. Compress older messages (Stage 1 of optimizer pipeline)
101
+ const compressedOlder = (0, compressor_1.compressMessages)(olderMessages.map((m) => ({ role: m.role, content: m.content })), 0, // don't preserve any recent within this set — they're already separated
102
+ config.compressionLevel);
103
+ // 7. Add RAG results (that aren't already in recent or older messages)
104
+ const ragTokenBudget = Math.floor((effectiveBudget - tokensUsed) * 0.2);
105
+ let ragTokensUsed = 0;
106
+ const allKeptIds = new Set([...recentIds, ...olderMessages.map((m) => m.id)]);
107
+ if (ragResults.length > 0) {
108
+ const relevantRag = ragResults.filter((r) => !allKeptIds.has(r.id));
109
+ if (relevantRag.length > 0) {
110
+ const ragContextParts = [];
111
+ for (const rag of relevantRag) {
112
+ const compressed = (0, compressor_1.compressPrompt)(rag.content, config.compressionLevel);
113
+ const tokens = estimateTokens(compressed.compressed);
114
+ if (ragTokensUsed + tokens <= ragTokenBudget) {
115
+ ragContextParts.push(`[${rag.role}]: ${compressed.compressed}`);
116
+ ragTokensUsed += tokens;
117
+ }
118
+ }
119
+ if (ragContextParts.length > 0) {
120
+ assembled.push({
121
+ role: 'system',
122
+ content: `[Relevant Earlier Context]\n${ragContextParts.join('\n')}\n[End Context]`,
123
+ });
124
+ tokensUsed += ragTokensUsed;
125
+ }
126
+ }
127
+ }
128
+ // 8. Add compressed older messages
129
+ const olderBudget = Math.floor((effectiveBudget - tokensUsed) * 0.45);
130
+ let olderTokensUsed = 0;
131
+ for (const msg of compressedOlder) {
132
+ const tokens = estimateTokens(msg.content);
133
+ if (olderTokensUsed + tokens <= olderBudget) {
134
+ assembled.push({
135
+ role: msg.role,
136
+ content: msg.content,
137
+ });
138
+ olderTokensUsed += tokens;
139
+ }
140
+ }
141
+ tokensUsed += olderTokensUsed;
142
+ // 9. Add recent messages (adaptive compression in aggressive mode)
143
+ const remainingBudget = effectiveBudget - tokensUsed;
144
+ let recentTokensUsed = 0;
145
+ const aggressiveMode = config.targetSavingsPercent >= 70;
146
+ for (let index = 0; index < recentMessages.length; index++) {
147
+ const msg = recentMessages[index];
148
+ const isNewestHistoryMessage = index === recentMessages.length - 1;
149
+ // Preserve newest user message from history verbatim for intent fidelity.
150
+ const shouldPreserveVerbatim = msg.role === 'system' || (isNewestHistoryMessage && msg.role === 'user');
151
+ let candidateContent = msg.content;
152
+ if (!shouldPreserveVerbatim && aggressiveMode && msg.role === 'assistant') {
153
+ candidateContent = (0, compressor_1.compressPrompt)(msg.content, Math.min(0.95, config.compressionLevel + 0.1)).compressed;
154
+ }
155
+ const tokens = estimateTokens(candidateContent);
156
+ if (recentTokensUsed + tokens <= remainingBudget) {
157
+ assembled.push({
158
+ role: msg.role,
159
+ content: candidateContent,
160
+ });
161
+ recentTokensUsed += tokens;
162
+ }
163
+ }
164
+ tokensUsed += recentTokensUsed;
165
+ // 10. Add the current user message if not already included from history
166
+ if (!hasCurrentUserInHistory) {
167
+ assembled.push({ role: 'user', content: userMessage });
168
+ tokensUsed += estimateTokens(userMessage);
169
+ tokensRaw += estimateTokens(userMessage);
170
+ }
171
+ return {
172
+ messages: assembled,
173
+ tokensRaw,
174
+ tokensSent: tokensUsed,
175
+ ragResultCount: ragResults.filter((r) => !allKeptIds.has(r.id)).length,
176
+ };
177
+ }
178
+ //# sourceMappingURL=assembler.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"assembler.js","sourceRoot":"","sources":["../../../src/core/rag/assembler.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;GAUG;;;AAiDH,0CAmMC;AAhPD,2CAAgE;AAChE,wDAA2E;AAC3E,8CAAuD;AAmB1C,QAAA,wBAAwB,GAAoB;IACvD,gBAAgB,EAAE,IAAI;IACtB,eAAe,EAAE,IAAI;IACrB,kBAAkB,EAAE,CAAC;IACrB,OAAO,EAAE,EAAE;IACX,gBAAgB,EAAE,GAAG;IACrB,oBAAoB,EAAE,EAAE;IACxB,kBAAkB,EAAE,GAAG;CACxB,CAAC;AAEF,SAAS,cAAc,CAAC,IAAY;IAClC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;AACnD,CAAC;AAED;;;;;;;;;GASG;AACI,KAAK,UAAU,eAAe,CACnC,WAAmB,EACnB,cAAsB,EACtB,YAA2B,EAC3B,OAAgB,EAChB,iBAAgC,EAChC,SAA0B,gCAAwB;IAOlD,MAAM,WAAW,GAAG,MAAM,CAAC,gBAAgB,GAAG,MAAM,CAAC,eAAe,CAAC;IACrE,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,SAAS,GAAkB,EAAE,CAAC;IAEpC,qCAAqC;IACrC,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,SAAS,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;QAC1D,UAAU,IAAI,SAAS,CAAC;QACxB,SAAS,IAAI,SAAS,CAAC;IACzB,CAAC;IAED,oCAAoC;IACpC,IAAI,iBAAiB,EAAE,CAAC;QACtB,MAAM,QAAQ,GAAG,cAAc,CAAC,iBAAiB,CAAC,CAAC;QACnD,SAAS,CAAC,IAAI,CAAC;YACb,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,2BAA2B,iBAAiB,iBAAiB;SACvE,CAAC,CAAC;QACH,UAAU,IAAI,QAAQ,CAAC;QACvB,SAAS,IAAI,QAAQ,CAAC;IACxB,CAAC;IAED,4CAA4C;IAC5C,IAAI,UAAU,GAAuB,EAAE,CAAC;IACxC,IAAI,CAAC;QACH,UAAU,GAAG,MAAM,IAAA,2BAAe,EAChC,WAAW,EACX,cAAc,EACd,OAAO,EACP,MAAM,CAAC,OAAO,CACf,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,2BAA2B;IAC7B,CAAC;IAED,oDAAoD;IACpD,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;IACxD,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC9B,SAAS,IAAI,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3C,CAAC;IAED,MAAM,4BAA4B,GAChC,WAAW,CAAC,MAAM,GAAG,CAAC;QACtB,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM;QACnD,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;IAE5E,IAAI,CAAC,4BAA4B,EAAE,CAAC;QAClC,SAAS,IAAI,cAAc,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC;IAED,MAAM,mBAAmB,GAAG,IAAI,CAAC,KAAK,CACpC,SAAS,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,oBAAoB,CAAC,CAAC,GAAG,GAAG,CAAC,CAC/E,CAAC;IAEF,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAC9B,WAAW,EACX,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,kBAAkB,EAAE,mBAAmB,CAAC,CACzD,CAAC;IAEF,0DAA0D;IAC1D,MAAM,WAAW,GAAG,IAAA,yBAAiB,EACnC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtB,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,UAAU,EAAE,CAAC,CAAC,UAAU;KACzB,CAAC,CAAC,EACH,GAAG,CACJ,CAAC;IACF,MAAM,eAAe,GAAG,WAAW,CAAC,IAAI,CAAC;IACzC,MAAM,oBAAoB,GAAG,eAAe,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzE,MAAM,uBAAuB,GAC3B,oBAAoB,EAAE,IAAI,KAAK,MAAM;QACrC,oBAAoB,EAAE,OAAO,CAAC,IAAI,EAAE,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;IAE9D,wDAAwD;IACxD,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,kBAAkB,CAAC,CAAC;IAC7D,MAAM,cAAc,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC,aAAa,CAAC,CAAC;IAC7D,MAAM,aAAa,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,aAAa,IAAI,SAAS,CAAC,CAAC;IAC5E,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAE3D,6DAA6D;IAC7D,MAAM,eAAe,GAAG,IAAA,6BAAgB,EACtC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,EAChE,CAAC,EAAE,wEAAwE;IAC3E,MAAM,CAAC,gBAAgB,CACxB,CAAC;IAEF,uEAAuE;IACvE,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC;IACxE,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE9E,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEpE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,MAAM,eAAe,GAAa,EAAE,CAAC;YAErC,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;gBAC9B,MAAM,UAAU,GAAG,IAAA,2BAAc,EAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,gBAAgB,CAAC,CAAC;gBACxE,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;gBAErD,IAAI,aAAa,GAAG,MAAM,IAAI,cAAc,EAAE,CAAC;oBAC7C,eAAe,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC,CAAC;oBAChE,aAAa,IAAI,MAAM,CAAC;gBAC1B,CAAC;YACH,CAAC;YAED,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/B,SAAS,CAAC,IAAI,CAAC;oBACb,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,+BAA+B,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB;iBACpF,CAAC,CAAC;gBACH,UAAU,IAAI,aAAa,CAAC;YAC9B,CAAC;QACH,CAAC;IACH,CAAC;IAED,mCAAmC;IACnC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC;IACtE,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,KAAK,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC3C,IAAI,eAAe,GAAG,MAAM,IAAI,WAAW,EAAE,CAAC;YAC5C,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,GAAG,CAAC,IAAuC;gBACjD,OAAO,EAAE,GAAG,CAAC,OAAO;aACrB,CAAC,CAAC;YACH,eAAe,IAAI,MAAM,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,UAAU,IAAI,eAAe,CAAC;IAE9B,mEAAmE;IACnE,MAAM,eAAe,GAAG,eAAe,GAAG,UAAU,CAAC;IACrD,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,MAAM,cAAc,GAAG,MAAM,CAAC,oBAAoB,IAAI,EAAE,CAAC;IAEzD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,cAAc,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QAC3D,MAAM,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,sBAAsB,GAAG,KAAK,KAAK,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC;QAEnE,0EAA0E;QAC1E,MAAM,sBAAsB,GAC1B,GAAG,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,sBAAsB,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;QAE3E,IAAI,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC;QACnC,IAAI,CAAC,sBAAsB,IAAI,cAAc,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAC1E,gBAAgB,GAAG,IAAA,2BAAc,EAC/B,GAAG,CAAC,OAAO,EACX,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,gBAAgB,GAAG,GAAG,CAAC,CAC9C,CAAC,UAAU,CAAC;QACf,CAAC;QAED,MAAM,MAAM,GAAG,cAAc,CAAC,gBAAgB,CAAC,CAAC;QAChD,IAAI,gBAAgB,GAAG,MAAM,IAAI,eAAe,EAAE,CAAC;YACjD,SAAS,CAAC,IAAI,CAAC;gBACb,IAAI,EAAE,GAAG,CAAC,IAAuC;gBACjD,OAAO,EAAE,gBAAgB;aAC1B,CAAC,CAAC;YACH,gBAAgB,IAAI,MAAM,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,UAAU,IAAI,gBAAgB,CAAC;IAE/B,wEAAwE;IACxE,IAAI,CAAC,uBAAuB,EAAE,CAAC;QAC7B,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;QACvD,UAAU,IAAI,cAAc,CAAC,WAAW,CAAC,CAAC;QAC1C,SAAS,IAAI,cAAc,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO;QACL,QAAQ,EAAE,SAAS;QACnB,SAAS;QACT,UAAU,EAAE,UAAU;QACtB,cAAc,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM;KACvE,CAAC;AACJ,CAAC"}
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Embedder — Local embedding using all-MiniLM-L6-v2 via onnxruntime-node
3
+ *
4
+ * Zero API cost, ~23ms per embedding, fully offline.
5
+ * Output dimension: 384
6
+ */
7
+ /**
8
+ * Generate embedding for a text using the ONNX model.
9
+ * Returns null if the model is not available.
10
+ */
11
+ export declare function embed(text: string): Promise<Float32Array | null>;
12
+ /**
13
+ * Batch embed multiple texts.
14
+ */
15
+ export declare function embedBatch(texts: string[]): Promise<(Float32Array | null)[]>;
16
+ //# sourceMappingURL=embedder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../../src/core/rag/embedder.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAmGH;;;GAGG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,CAuDtE;AA2CD;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,YAAY,GAAG,IAAI,CAAC,EAAE,CAAC,CAMlF"}
@@ -0,0 +1,223 @@
1
+ "use strict";
2
+ /**
3
+ * Embedder — Local embedding using all-MiniLM-L6-v2 via onnxruntime-node
4
+ *
5
+ * Zero API cost, ~23ms per embedding, fully offline.
6
+ * Output dimension: 384
7
+ */
8
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
+ if (k2 === undefined) k2 = k;
10
+ var desc = Object.getOwnPropertyDescriptor(m, k);
11
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
12
+ desc = { enumerable: true, get: function() { return m[k]; } };
13
+ }
14
+ Object.defineProperty(o, k2, desc);
15
+ }) : (function(o, m, k, k2) {
16
+ if (k2 === undefined) k2 = k;
17
+ o[k2] = m[k];
18
+ }));
19
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
20
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
21
+ }) : function(o, v) {
22
+ o["default"] = v;
23
+ });
24
+ var __importStar = (this && this.__importStar) || (function () {
25
+ var ownKeys = function(o) {
26
+ ownKeys = Object.getOwnPropertyNames || function (o) {
27
+ var ar = [];
28
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
29
+ return ar;
30
+ };
31
+ return ownKeys(o);
32
+ };
33
+ return function (mod) {
34
+ if (mod && mod.__esModule) return mod;
35
+ var result = {};
36
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
37
+ __setModuleDefault(result, mod);
38
+ return result;
39
+ };
40
+ })();
41
+ var __importDefault = (this && this.__importDefault) || function (mod) {
42
+ return (mod && mod.__esModule) ? mod : { "default": mod };
43
+ };
44
+ Object.defineProperty(exports, "__esModule", { value: true });
45
+ exports.embed = embed;
46
+ exports.embedBatch = embedBatch;
47
+ const path_1 = __importDefault(require("path"));
48
+ const fs_1 = __importDefault(require("fs"));
49
+ const os_1 = __importDefault(require("os"));
50
+ let session = null;
51
+ let sessionChecked = false;
52
+ let tokenizer = null;
53
+ // Simple word-piece tokenizer for MiniLM
54
+ // In production, you'd use the actual tokenizer from the model
55
+ // For now, we use a simplified approach
56
+ const MAX_SEQ_LENGTH = 256;
57
+ /**
58
+ * Initialize the ONNX runtime session with MiniLM-L6-v2.
59
+ * Caches the result (including "not available") to avoid repeated checks.
60
+ */
61
+ async function getSession() {
62
+ if (session)
63
+ return session;
64
+ if (sessionChecked)
65
+ return null; // Already checked, model not available
66
+ sessionChecked = true;
67
+ try {
68
+ const ort = await Promise.resolve().then(() => __importStar(require('onnxruntime-node')));
69
+ // Check for model in ~/.opengauge/models/
70
+ const modelDir = path_1.default.join(os_1.default.homedir(), '.opengauge', 'models');
71
+ const modelPath = path_1.default.join(modelDir, 'all-MiniLM-L6-v2.onnx');
72
+ if (!fs_1.default.existsSync(modelPath)) {
73
+ console.log('[OpenGauge] Embedding model not found. Using fallback hash embeddings.');
74
+ console.log(` To enable full semantic search, place all-MiniLM-L6-v2.onnx in ${modelDir}`);
75
+ return null;
76
+ }
77
+ session = await ort.InferenceSession.create(modelPath, {
78
+ executionProviders: ['cpu'],
79
+ });
80
+ console.log('[OpenGauge] ONNX embedding model loaded successfully.');
81
+ return session;
82
+ }
83
+ catch (error) {
84
+ console.warn('[OpenGauge] Failed to initialize ONNX embedding model. Using fallback.', error);
85
+ return null;
86
+ }
87
+ }
88
+ /**
89
+ * Simple tokenization: split on whitespace and punctuation,
90
+ * convert to indices. This is a fallback; a proper tokenizer
91
+ * would use the model's vocabulary.
92
+ */
93
+ function simpleTokenize(text) {
94
+ // Simple whitespace tokenization + padding/truncation
95
+ const words = text
96
+ .toLowerCase()
97
+ .replace(/[^\w\s]/g, ' ')
98
+ .split(/\s+/)
99
+ .filter((w) => w.length > 0)
100
+ .slice(0, MAX_SEQ_LENGTH - 2);
101
+ const seqLength = words.length + 2; // [CLS] + words + [SEP]
102
+ const inputIds = new BigInt64Array(seqLength);
103
+ const attentionMask = new BigInt64Array(seqLength);
104
+ const tokenTypeIds = new BigInt64Array(seqLength);
105
+ // [CLS] token = 101
106
+ inputIds[0] = BigInt(101);
107
+ attentionMask[0] = BigInt(1);
108
+ // Map words to pseudo token IDs using hash
109
+ for (let i = 0; i < words.length; i++) {
110
+ inputIds[i + 1] = BigInt(hashWord(words[i]));
111
+ attentionMask[i + 1] = BigInt(1);
112
+ }
113
+ // [SEP] token = 102
114
+ inputIds[seqLength - 1] = BigInt(102);
115
+ attentionMask[seqLength - 1] = BigInt(1);
116
+ return { inputIds, attentionMask, tokenTypeIds };
117
+ }
118
+ function hashWord(word) {
119
+ let hash = 0;
120
+ for (let i = 0; i < word.length; i++) {
121
+ hash = ((hash << 5) - hash + word.charCodeAt(i)) & 0x7fff;
122
+ }
123
+ return (hash % 30000) + 1000; // Keep in vocab range
124
+ }
125
+ /**
126
+ * Generate embedding for a text using the ONNX model.
127
+ * Returns null if the model is not available.
128
+ */
129
+ async function embed(text) {
130
+ const sess = await getSession();
131
+ if (!sess) {
132
+ // Fallback: generate a simple TF-IDF-like hash embedding
133
+ return fallbackEmbed(text);
134
+ }
135
+ try {
136
+ const ort = await Promise.resolve().then(() => __importStar(require('onnxruntime-node')));
137
+ const { inputIds, attentionMask, tokenTypeIds } = simpleTokenize(text);
138
+ const feeds = {
139
+ input_ids: new ort.Tensor('int64', inputIds, [1, inputIds.length]),
140
+ attention_mask: new ort.Tensor('int64', attentionMask, [1, attentionMask.length]),
141
+ token_type_ids: new ort.Tensor('int64', tokenTypeIds, [1, tokenTypeIds.length]),
142
+ };
143
+ const results = await sess.run(feeds);
144
+ // Mean pooling over token embeddings
145
+ const output = results['last_hidden_state'] || results['token_embeddings'] || Object.values(results)[0];
146
+ const data = output.data;
147
+ const hiddenSize = 384;
148
+ const numTokens = inputIds.length;
149
+ const embedding = new Float32Array(hiddenSize);
150
+ for (let i = 0; i < numTokens; i++) {
151
+ for (let j = 0; j < hiddenSize; j++) {
152
+ embedding[j] += data[i * hiddenSize + j];
153
+ }
154
+ }
155
+ // Average
156
+ for (let j = 0; j < hiddenSize; j++) {
157
+ embedding[j] /= numTokens;
158
+ }
159
+ // L2 normalize
160
+ let norm = 0;
161
+ for (let j = 0; j < hiddenSize; j++) {
162
+ norm += embedding[j] * embedding[j];
163
+ }
164
+ norm = Math.sqrt(norm);
165
+ if (norm > 0) {
166
+ for (let j = 0; j < hiddenSize; j++) {
167
+ embedding[j] /= norm;
168
+ }
169
+ }
170
+ return embedding;
171
+ }
172
+ catch (error) {
173
+ console.warn('Embedding failed, using fallback:', error);
174
+ return fallbackEmbed(text);
175
+ }
176
+ }
177
+ /**
178
+ * Fallback embedding using character n-gram hashing.
179
+ * Produces a 384-dim vector. Not as good as MiniLM but enables
180
+ * basic similarity search without the ONNX model.
181
+ */
182
+ function fallbackEmbed(text) {
183
+ const DIM = 384;
184
+ const embedding = new Float32Array(DIM);
185
+ const normalized = text.toLowerCase().replace(/[^\w\s]/g, '');
186
+ const words = normalized.split(/\s+/);
187
+ // Hash words into the embedding dimensions
188
+ for (const word of words) {
189
+ for (let n = 1; n <= 3; n++) {
190
+ for (let i = 0; i <= word.length - n; i++) {
191
+ const ngram = word.substring(i, i + n);
192
+ let hash = 0;
193
+ for (let c = 0; c < ngram.length; c++) {
194
+ hash = (hash * 31 + ngram.charCodeAt(c)) % DIM;
195
+ }
196
+ embedding[hash] += 1;
197
+ }
198
+ }
199
+ }
200
+ // L2 normalize
201
+ let norm = 0;
202
+ for (let i = 0; i < DIM; i++) {
203
+ norm += embedding[i] * embedding[i];
204
+ }
205
+ norm = Math.sqrt(norm);
206
+ if (norm > 0) {
207
+ for (let i = 0; i < DIM; i++) {
208
+ embedding[i] /= norm;
209
+ }
210
+ }
211
+ return embedding;
212
+ }
213
+ /**
214
+ * Batch embed multiple texts.
215
+ */
216
+ async function embedBatch(texts) {
217
+ const results = [];
218
+ for (const text of texts) {
219
+ results.push(await embed(text));
220
+ }
221
+ return results;
222
+ }
223
+ //# sourceMappingURL=embedder.js.map