@meller/tokentalos 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +121 -0
- package/api/api/v1/analytics.js +153 -0
- package/api/api/v1/opv.js +36 -0
- package/api/api/v1/usage.js +318 -0
- package/api/index.js +111 -0
- package/api/middleware/auth.js +45 -0
- package/api/package.json +38 -0
- package/bin/tokentalos.js +221 -0
- package/index.js +151 -0
- package/lib/engine/ai_analyzer.js +66 -0
- package/lib/engine/analyzer.js +117 -0
- package/lib/engine/cache.js +30 -0
- package/lib/engine/db.js +307 -0
- package/lib/engine/index.js +320 -0
- package/lib/engine/llm_clients.js +255 -0
- package/lib/engine/opv.js +96 -0
- package/lib/engine/parameterizer.js +68 -0
- package/lib/engine/pii_detector.js +73 -0
- package/lib/engine/pricing.js +106 -0
- package/lib/engine/processor.js +157 -0
- package/lib/engine/security.js +101 -0
- package/lib/engine/tokenizers.js +40 -0
- package/package.json +63 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import { initDb, getDb } from './db.js';
|
|
2
|
+
import { processPromptParts } from './processor.js';
|
|
3
|
+
import { TokenTalosPrompt } from './parameterizer.js';
|
|
4
|
+
import { getCostCalculator } from './pricing.js';
|
|
5
|
+
import { getLLMGateway } from './llm_clients.js';
|
|
6
|
+
import { PromptCache } from './cache.js';
|
|
7
|
+
import { OPVService } from './opv.js';
|
|
8
|
+
import { runHeuristicAnalysis } from './analyzer.js';
|
|
9
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
10
|
+
|
|
11
|
+
export class TokenTalosEngine {
|
|
12
|
+
constructor(config = {}) {
|
|
13
|
+
this.config = {
|
|
14
|
+
formattingFeatures: ['pii', 'neutralize'],
|
|
15
|
+
intelligenceFeatures: ['cache', 'explain'],
|
|
16
|
+
securityFeatures: ['injection', 'secrets'],
|
|
17
|
+
securityAction: 'warn',
|
|
18
|
+
piiAction: 'mask',
|
|
19
|
+
...config
|
|
20
|
+
};
|
|
21
|
+
this.managedMode = this.config.managedMode || false;
|
|
22
|
+
this.orgId = this.config.orgId || 'default_org';
|
|
23
|
+
this.projectId = this.config.projectId || 'default';
|
|
24
|
+
this.db = null;
|
|
25
|
+
this.cache = null;
|
|
26
|
+
this.opv = null;
|
|
27
|
+
this.initialized = false;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async init() {
|
|
31
|
+
if (this.initialized) return;
|
|
32
|
+
|
|
33
|
+
// Default to SQLite if not specified
|
|
34
|
+
const dbConfig = {
|
|
35
|
+
databaseType: this.config.databaseType || 'sqlite',
|
|
36
|
+
sqlitePath: this.config.sqlitePath || ':memory:',
|
|
37
|
+
...this.config
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
this.db = await initDb(dbConfig);
|
|
41
|
+
|
|
42
|
+
// Create default org if it doesn't exist
|
|
43
|
+
await this.ensureDefaultOrg();
|
|
44
|
+
|
|
45
|
+
this.cache = new PromptCache(getDb());
|
|
46
|
+
this.opv = new OPVService(this.config, getLLMGateway(this.config));
|
|
47
|
+
this.initialized = true;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async ensureDefaultOrg() {
|
|
51
|
+
const db = getDb();
|
|
52
|
+
try {
|
|
53
|
+
// 1. Ensure Default Organization
|
|
54
|
+
await db.run('INSERT INTO organizations (id, name) VALUES (?, ?) ON CONFLICT DO NOTHING', ['default_org', 'Default Organization']);
|
|
55
|
+
|
|
56
|
+
// 2. Ensure Default User (for local mode)
|
|
57
|
+
await db.run('INSERT INTO users (id, email, name) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['local_user', 'dev@tokentalos.local', 'Local Developer']);
|
|
58
|
+
|
|
59
|
+
// 3. Ensure Membership
|
|
60
|
+
await db.run('INSERT INTO organization_members (org_id, user_id, role) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['default_org', 'local_user', 'admin']);
|
|
61
|
+
} catch (e) {
|
|
62
|
+
// Ignore if schema not ready or unique constraint (for non-ID on conflict)
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async validateApiKey(key) {
|
|
67
|
+
if (!key) return null;
|
|
68
|
+
const db = this.getDb();
|
|
69
|
+
// In a real system, we'd hash the key here before lookup
|
|
70
|
+
const keyRecord = await db.get('SELECT org_id FROM api_keys WHERE key_hash = ?', [key]);
|
|
71
|
+
return keyRecord ? keyRecord.org_id : null;
|
|
72
|
+
}
|
|
73
|
+
async verifyReasoning(params) {
|
|
74
|
+
if (!this.initialized) await this.init();
|
|
75
|
+
return await this.opv.verifyReasoning(params);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
getDb() {
|
|
79
|
+
if (!this.initialized) throw new Error('TokenTalos Engine not initialized. Call init() first.');
|
|
80
|
+
return getDb();
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async process(parts) {
|
|
84
|
+
return processPromptParts(parts, this.config);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
createPrompt(provider, model) {
|
|
88
|
+
const finalProvider = provider || this.config.llmProvider || 'gemini';
|
|
89
|
+
const finalModel = model || this.config.defaultModel || 'gemini-3-flash-preview';
|
|
90
|
+
return new TokenTalosPrompt(finalProvider, finalModel);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async execute(params) {
|
|
94
|
+
const { provider, model, parts, endpoint, options = {}, bypassCache = false, orgId, projectId } = params;
|
|
95
|
+
const startTime = Date.now();
|
|
96
|
+
|
|
97
|
+
const finalOrgId = orgId || this.orgId;
|
|
98
|
+
const finalProjectId = projectId || this.projectId;
|
|
99
|
+
|
|
100
|
+
// 1. Process Parts
|
|
101
|
+
const { processedParts, metadata } = await this.process(parts);
|
|
102
|
+
|
|
103
|
+
const finalProvider = provider || this.config.llmProvider || 'gemini';
|
|
104
|
+
const finalModel = model || this.config.defaultModel || 'gemini-3-flash-preview';
|
|
105
|
+
|
|
106
|
+
const prompt = this.createPrompt(finalProvider, finalModel);
|
|
107
|
+
|
|
108
|
+
// Add processed parts to the prompt
|
|
109
|
+
for (const key in processedParts) {
|
|
110
|
+
if (key === 'system') prompt.addSystem(processedParts[key], parts[key]);
|
|
111
|
+
else if (key === 'context') prompt.addContext(processedParts[key], parts[key]);
|
|
112
|
+
else if (key === 'history') prompt.addHistory(processedParts[key], parts[key]);
|
|
113
|
+
else if (key === 'user_query') prompt.addUserQuery(processedParts[key], parts[key]);
|
|
114
|
+
else prompt.add(key, processedParts[key], parts[key]);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const fullPromptString = prompt.toString();
|
|
118
|
+
const promptHash = this.cache.generateHash(fullPromptString);
|
|
119
|
+
|
|
120
|
+
// Track compression savings
|
|
121
|
+
const compressionAction = metadata.actions_taken.find(a => a.type === 'compress');
|
|
122
|
+
const savedChars = compressionAction?.saved_chars || 0;
|
|
123
|
+
const savedTokens = Math.ceil(savedChars / 4); // Heuristic
|
|
124
|
+
const calculator = getCostCalculator();
|
|
125
|
+
const [savedCompressionCost] = calculator.calculateCost(finalProvider, finalModel, savedTokens, 0);
|
|
126
|
+
|
|
127
|
+
// 2. Cache Check
|
|
128
|
+
if (!bypassCache && this.config.intelligenceFeatures?.includes('cache')) {
|
|
129
|
+
const cached = await this.cache.get(promptHash);
|
|
130
|
+
if (cached) {
|
|
131
|
+
// Log the cache hit as a usage event with 0 cost but record saved tokens
|
|
132
|
+
const hitId = uuidv4();
|
|
133
|
+
const db = this.getDb();
|
|
134
|
+
|
|
135
|
+
// Calculate what it WOULD have cost
|
|
136
|
+
const [savedInputCost] = calculator.calculateCost(finalProvider, finalModel, prompt.getTrackingData().total_tokens, 0);
|
|
137
|
+
const trackingData = prompt.getTrackingData();
|
|
138
|
+
|
|
139
|
+
await db.run(`
|
|
140
|
+
INSERT INTO usage_data (id, org_id, project_id, type, provider, model, full_prompt, response_content, input_tokens, total_tokens, saved_tokens, saved_cost, input_cost, total_cost, endpoint, latency_ms, timestamp)
|
|
141
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
142
|
+
`, [
|
|
143
|
+
hitId, finalOrgId, finalProjectId, 'cache_hit', finalProvider, finalModel, fullPromptString, cached.response_content, 0, 0,
|
|
144
|
+
prompt.getTrackingData().total_tokens, savedInputCost, 0, 0, endpoint, Date.now() - startTime, trackingData.timestamp
|
|
145
|
+
]);
|
|
146
|
+
|
|
147
|
+
for (const v of trackingData.variables) {
|
|
148
|
+
await db.run(`
|
|
149
|
+
INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
|
|
150
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
151
|
+
`, [hitId, v.name, v.content, v.original_content, v.token_count, v.char_count, v.position]);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
for (const action of metadata.actions_taken) {
|
|
155
|
+
await db.run(`
|
|
156
|
+
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
157
|
+
VALUES (?, ?, ?, ?, ?)
|
|
158
|
+
`, [
|
|
159
|
+
hitId,
|
|
160
|
+
action.target,
|
|
161
|
+
action.type,
|
|
162
|
+
action.method || null,
|
|
163
|
+
JSON.stringify(action)
|
|
164
|
+
]);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
id: cached.usage_id,
|
|
169
|
+
content: cached.response_content,
|
|
170
|
+
cached: true,
|
|
171
|
+
saved_tokens: prompt.getTrackingData().total_tokens,
|
|
172
|
+
saved_cost: savedInputCost,
|
|
173
|
+
metadata: { ...metadata, latency_ms: Date.now() - startTime }
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// 3. Execution
|
|
179
|
+
const gateway = getLLMGateway(this.config);
|
|
180
|
+
const messages = prompt.toMessages();
|
|
181
|
+
const result = await gateway.execute(finalProvider, finalModel, messages, options);
|
|
182
|
+
const latencyMs = Date.now() - startTime;
|
|
183
|
+
|
|
184
|
+
// 4. Persistence
|
|
185
|
+
const trackingData = prompt.getTrackingData();
|
|
186
|
+
const [inputCost, outputCost] = calculator.calculateCost(
|
|
187
|
+
finalProvider, finalModel, result.input_tokens, result.output_tokens
|
|
188
|
+
);
|
|
189
|
+
|
|
190
|
+
const db = this.getDb();
|
|
191
|
+
const limitExceeded = (result.input_tokens + result.output_tokens) > (this.config.maxTokens || 32000);
|
|
192
|
+
|
|
193
|
+
await db.run(`
|
|
194
|
+
INSERT INTO usage_data (id, org_id, project_id, type, provider, model, full_prompt, response_content, input_tokens, output_tokens, total_tokens, saved_tokens, saved_cost, input_cost, output_cost, total_cost, endpoint, latency_ms, token_limit_exceeded, timestamp)
|
|
195
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
196
|
+
`, [
|
|
197
|
+
trackingData.id, finalOrgId, finalProjectId, 'execution', finalProvider, finalModel, fullPromptString, result.content, result.input_tokens, result.output_tokens,
|
|
198
|
+
result.input_tokens + result.output_tokens, savedTokens, savedCompressionCost, inputCost, outputCost, inputCost + outputCost,
|
|
199
|
+
endpoint, latencyMs, limitExceeded ? 1 : 0, trackingData.timestamp
|
|
200
|
+
]);
|
|
201
|
+
|
|
202
|
+
for (const v of trackingData.variables) {
|
|
203
|
+
await db.run(`
|
|
204
|
+
INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
|
|
205
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
206
|
+
`, [trackingData.id, v.name, v.content, v.original_content, v.token_count, v.char_count, v.position]);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
for (const action of metadata.actions_taken) {
|
|
210
|
+
await db.run(`
|
|
211
|
+
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
212
|
+
VALUES (?, ?, ?, ?, ?)
|
|
213
|
+
`, [
|
|
214
|
+
trackingData.id,
|
|
215
|
+
action.target,
|
|
216
|
+
action.type,
|
|
217
|
+
action.method || null,
|
|
218
|
+
JSON.stringify(action)
|
|
219
|
+
]);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Persist Security Alerts
|
|
223
|
+
for (const finding of (metadata.security_findings || [])) {
|
|
224
|
+
await db.run(`
|
|
225
|
+
INSERT INTO security_alerts (usage_id, type, description, severity, action_taken)
|
|
226
|
+
VALUES (?, ?, ?, ?, ?)
|
|
227
|
+
`, [
|
|
228
|
+
trackingData.id,
|
|
229
|
+
finding.type,
|
|
230
|
+
finding.description,
|
|
231
|
+
finding.severity,
|
|
232
|
+
config.securityAction || 'warn'
|
|
233
|
+
]);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// 5. Heuristic Analysis
|
|
237
|
+
const analysis = runHeuristicAnalysis({
|
|
238
|
+
total_tokens: result.input_tokens + result.output_tokens,
|
|
239
|
+
input_tokens: result.input_tokens,
|
|
240
|
+
output_tokens: result.output_tokens,
|
|
241
|
+
total_cost: inputCost + outputCost,
|
|
242
|
+
provider: finalProvider,
|
|
243
|
+
model: finalModel
|
|
244
|
+
}, trackingData.variables);
|
|
245
|
+
if (analysis) {
|
|
246
|
+
const planId = uuidv4();
|
|
247
|
+
await db.run(`
|
|
248
|
+
INSERT INTO explain_plans (
|
|
249
|
+
id, usage_id, variable_analysis, detected_issues, optimization_suggestions,
|
|
250
|
+
estimated_savings_pct, estimated_savings_usd,
|
|
251
|
+
mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct
|
|
252
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
253
|
+
`, [
|
|
254
|
+
planId,
|
|
255
|
+
trackingData.id,
|
|
256
|
+
JSON.stringify(analysis.variable_analysis),
|
|
257
|
+
JSON.stringify(analysis.detected_issues),
|
|
258
|
+
JSON.stringify(analysis.optimization_suggestions),
|
|
259
|
+
analysis.estimated_savings_pct,
|
|
260
|
+
analysis.estimated_savings_usd,
|
|
261
|
+
analysis.mce_best_alternative_model || null,
|
|
262
|
+
analysis.mce_best_alternative_provider || null,
|
|
263
|
+
analysis.mce_best_alternative_cost || 0,
|
|
264
|
+
analysis.mce_savings_pct || 0
|
|
265
|
+
]);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Save to Cache
|
|
269
|
+
await this.cache.set(promptHash, result.content, trackingData.id);
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
id: trackingData.id,
|
|
273
|
+
content: result.content,
|
|
274
|
+
cached: false,
|
|
275
|
+
usage: {
|
|
276
|
+
input_tokens: result.input_tokens,
|
|
277
|
+
output_tokens: result.output_tokens,
|
|
278
|
+
cost_usd: inputCost + outputCost
|
|
279
|
+
},
|
|
280
|
+
metadata: { ...metadata, latency_ms: latencyMs }
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async ingest(data) {
|
|
285
|
+
const db = this.getDb();
|
|
286
|
+
const usageId = uuidv4();
|
|
287
|
+
const provider = data.provider || this.config.llmProvider || 'gemini';
|
|
288
|
+
const model = data.model || this.config.defaultModel || 'gemini-3-flash-preview';
|
|
289
|
+
|
|
290
|
+
const totalTokens = (data.input_tokens || 0) + (data.output_tokens || 0);
|
|
291
|
+
const calculator = getCostCalculator();
|
|
292
|
+
const [inputCost, outputCost] = calculator.calculateCost(
|
|
293
|
+
provider,
|
|
294
|
+
model,
|
|
295
|
+
data.input_tokens || 0,
|
|
296
|
+
data.output_tokens || 0
|
|
297
|
+
);
|
|
298
|
+
|
|
299
|
+
const totalCost = inputCost + outputCost;
|
|
300
|
+
const limitExceeded = totalTokens > (this.config.maxTokens || 32000);
|
|
301
|
+
const finalProjectId = data.projectId || this.projectId;
|
|
302
|
+
const finalOrgId = data.orgId || this.orgId;
|
|
303
|
+
|
|
304
|
+
await db.run(`
|
|
305
|
+
INSERT INTO usage_data (
|
|
306
|
+
id, org_id, project_id, provider, model, input_tokens, output_tokens, total_tokens,
|
|
307
|
+
input_cost, output_cost, total_cost, endpoint, latency_ms, token_limit_exceeded, timestamp
|
|
308
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
309
|
+
`, [
|
|
310
|
+
usageId, finalOrgId, finalProjectId, provider, model, data.input_tokens || 0, data.output_tokens || 0,
|
|
311
|
+
totalTokens, inputCost, outputCost, totalCost, data.endpoint, data.latency_ms,
|
|
312
|
+
limitExceeded, data.timestamp || new Date().toISOString()
|
|
313
|
+
]);
|
|
314
|
+
|
|
315
|
+
// Handle variables if present...
|
|
316
|
+
// (We will migrate the full ingestion logic here)
|
|
317
|
+
|
|
318
|
+
return { id: usageId, totalCost };
|
|
319
|
+
}
|
|
320
|
+
}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
2
|
+
import { VertexAI } from '@google-cloud/vertexai';
|
|
3
|
+
import { GoogleAuth } from 'google-auth-library';
|
|
4
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
5
|
+
import OpenAI from 'openai';
|
|
6
|
+
|
|
7
|
+
export class LLMGateway {
|
|
8
|
+
constructor(config) {
|
|
9
|
+
this.config = config;
|
|
10
|
+
this.clients = {};
|
|
11
|
+
this.auth = new GoogleAuth();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
getGemini() {
|
|
15
|
+
if (!process.env.GOOGLE_API_KEY) {
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
if (!this.clients.gemini) {
|
|
19
|
+
this.clients.gemini = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
|
|
20
|
+
}
|
|
21
|
+
return this.clients.gemini;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async getVertex() {
|
|
25
|
+
if (!this.clients.vertex) {
|
|
26
|
+
const location = this.config.location || process.env.GCP_REGION || 'us-central1';
|
|
27
|
+
let project = process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT || this.config.gcpProjectId || this.config.project;
|
|
28
|
+
|
|
29
|
+
if (!project) {
|
|
30
|
+
try {
|
|
31
|
+
// Try to auto-detect project ID from ADC
|
|
32
|
+
project = await this.auth.getProjectId();
|
|
33
|
+
} catch (e) {
|
|
34
|
+
// Ignore auth errors here
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (!project) {
|
|
39
|
+
console.warn('Vertex AI requires a Project ID. Set GOOGLE_CLOUD_PROJECT, GCLOUD_PROJECT, or configure it in setup.');
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const pgConfig = {
|
|
44
|
+
project,
|
|
45
|
+
location
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
if (location === 'global') {
|
|
49
|
+
pgConfig.apiEndpoint = 'aiplatform.googleapis.com';
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
this.clients.vertex = new VertexAI(pgConfig);
|
|
53
|
+
}
|
|
54
|
+
return this.clients.vertex;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
getAnthropic() {
|
|
58
|
+
if (!this.clients.anthropic) {
|
|
59
|
+
this.clients.anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY || '' });
|
|
60
|
+
}
|
|
61
|
+
return this.clients.anthropic;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
getOpenAI() {
|
|
65
|
+
if (!this.clients.openai) {
|
|
66
|
+
this.clients.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY || '' });
|
|
67
|
+
}
|
|
68
|
+
return this.clients.openai;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
getDeepSeek() {
|
|
72
|
+
if (!this.clients.deepseek) {
|
|
73
|
+
// DeepSeek is OpenAI-compatible
|
|
74
|
+
this.clients.deepseek = new OpenAI({
|
|
75
|
+
apiKey: process.env.DEEPSEEK_API_KEY || '',
|
|
76
|
+
baseURL: 'https://api.deepseek.com'
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
return this.clients.deepseek;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async execute(provider, model, messages, options = {}) {
|
|
83
|
+
switch (provider.toLowerCase()) {
|
|
84
|
+
case 'gemini':
|
|
85
|
+
return this.executeGemini(model, messages, options);
|
|
86
|
+
case 'anthropic':
|
|
87
|
+
return this.executeAnthropic(model, messages, options);
|
|
88
|
+
case 'openai':
|
|
89
|
+
return this.executeOpenAI(model, messages, options);
|
|
90
|
+
case 'deepseek':
|
|
91
|
+
return this.executeDeepSeek(model, messages, options);
|
|
92
|
+
default:
|
|
93
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async executeGemini(modelName, messages, options) {
|
|
98
|
+
try {
|
|
99
|
+
const aiClient = this.getGemini();
|
|
100
|
+
const vertexClient = await this.getVertex();
|
|
101
|
+
|
|
102
|
+
// Prefer Vertex AI if GOOGLE_API_KEY is missing, or if explicitly configured
|
|
103
|
+
if (vertexClient && (!aiClient || this.config.useVertex)) {
|
|
104
|
+
return await this.executeVertex(modelName, messages, options);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (!aiClient) {
|
|
108
|
+
throw new Error('No LLM client available for Gemini. Set GOOGLE_API_KEY or GOOGLE_CLOUD_PROJECT (for Vertex AI).');
|
|
109
|
+
}
|
|
110
|
+
// ... (rest of the function remains the same)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
const model = aiClient.getGenerativeModel({ model: modelName });
|
|
114
|
+
|
|
115
|
+
// Convert OpenAI-style messages to Gemini contents
|
|
116
|
+
const systemInstruction = messages.find(m => m.role === 'system')?.content;
|
|
117
|
+
const contents = messages
|
|
118
|
+
.filter(m => m.role !== 'system')
|
|
119
|
+
.map(m => ({
|
|
120
|
+
role: m.role === 'assistant' ? 'model' : 'user',
|
|
121
|
+
parts: [{ text: m.content }]
|
|
122
|
+
}));
|
|
123
|
+
|
|
124
|
+
const result = await model.generateContent({
|
|
125
|
+
contents,
|
|
126
|
+
systemInstruction: systemInstruction ? { parts: [{ text: systemInstruction }] } : undefined,
|
|
127
|
+
generationConfig: {
|
|
128
|
+
maxOutputTokens: options.max_tokens,
|
|
129
|
+
temperature: options.temperature,
|
|
130
|
+
responseMimeType: options.responseMimeType
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const response = await result.response;
|
|
135
|
+
const usage = response.usageMetadata;
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
content: response.text(),
|
|
139
|
+
input_tokens: usage.promptTokenCount,
|
|
140
|
+
output_tokens: usage.candidatesTokenCount,
|
|
141
|
+
raw: response
|
|
142
|
+
};
|
|
143
|
+
} catch (err) {
|
|
144
|
+
console.error('[TokenTalos] Gemini execution failed:', err);
|
|
145
|
+
throw err;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async executeVertex(modelName, messages, options) {
|
|
150
|
+
try {
|
|
151
|
+
const client = await this.getVertex();
|
|
152
|
+
const model = client.getGenerativeModel({ model: modelName });
|
|
153
|
+
|
|
154
|
+
const systemInstruction = messages.find(m => m.role === 'system')?.content;
|
|
155
|
+
const contents = messages
|
|
156
|
+
.filter(m => m.role !== 'system')
|
|
157
|
+
.map(m => ({
|
|
158
|
+
role: m.role === 'assistant' ? 'model' : 'user',
|
|
159
|
+
parts: [{ text: m.content }]
|
|
160
|
+
}));
|
|
161
|
+
|
|
162
|
+
const result = await model.generateContent({
|
|
163
|
+
contents,
|
|
164
|
+
systemInstruction: systemInstruction ? { parts: [{ text: systemInstruction }] } : undefined,
|
|
165
|
+
generationConfig: {
|
|
166
|
+
maxOutputTokens: options.max_tokens,
|
|
167
|
+
temperature: options.temperature,
|
|
168
|
+
responseMimeType: options.responseMimeType
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
const response = (await result.response);
|
|
173
|
+
|
|
174
|
+
// Vertex response structure is slightly different for usage
|
|
175
|
+
// It's under usageMetadata or usage_metadata
|
|
176
|
+
const usage = response.usageMetadata || {};
|
|
177
|
+
|
|
178
|
+
// Handle candidates properly
|
|
179
|
+
const content = response.candidates?.[0]?.content?.parts?.[0]?.text || response.text?.() || '';
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
content,
|
|
183
|
+
input_tokens: usage.promptTokenCount || 0,
|
|
184
|
+
output_tokens: usage.candidatesTokenCount || 0,
|
|
185
|
+
raw: response
|
|
186
|
+
};
|
|
187
|
+
} catch (err) {
|
|
188
|
+
console.error('[TokenTalos] Vertex execution failed:', err);
|
|
189
|
+
throw err;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async executeAnthropic(model, messages, options) {
|
|
194
|
+
const client = this.getAnthropic();
|
|
195
|
+
const system = messages.find(m => m.role === 'system')?.content;
|
|
196
|
+
const filteredMessages = messages.filter(m => m.role !== 'system');
|
|
197
|
+
|
|
198
|
+
const response = await client.messages.create({
|
|
199
|
+
model,
|
|
200
|
+
messages: filteredMessages,
|
|
201
|
+
system,
|
|
202
|
+
max_tokens: options.max_tokens || 1024,
|
|
203
|
+
temperature: options.temperature,
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
content: response.content[0].text,
|
|
208
|
+
input_tokens: response.usage.input_tokens,
|
|
209
|
+
output_tokens: response.usage.output_tokens,
|
|
210
|
+
raw: response
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async executeOpenAI(model, messages, options) {
|
|
215
|
+
const client = this.getOpenAI();
|
|
216
|
+
const response = await client.chat.completions.create({
|
|
217
|
+
model,
|
|
218
|
+
messages,
|
|
219
|
+
max_tokens: options.max_tokens,
|
|
220
|
+
temperature: options.temperature,
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
content: response.choices[0].message.content,
|
|
225
|
+
input_tokens: response.usage.prompt_tokens,
|
|
226
|
+
output_tokens: response.usage.completion_tokens,
|
|
227
|
+
raw: response
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
async executeDeepSeek(model, messages, options) {
|
|
232
|
+
const client = this.getDeepSeek();
|
|
233
|
+
const response = await client.chat.completions.create({
|
|
234
|
+
model,
|
|
235
|
+
messages,
|
|
236
|
+
max_tokens: options.max_tokens,
|
|
237
|
+
temperature: options.temperature,
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
content: response.choices[0].message.content,
|
|
242
|
+
input_tokens: response.usage.prompt_tokens,
|
|
243
|
+
output_tokens: response.usage.completion_tokens,
|
|
244
|
+
raw: response
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
let gateway;
|
|
250
|
+
export function getLLMGateway(config) {
|
|
251
|
+
if (!gateway) {
|
|
252
|
+
gateway = new LLMGateway(config);
|
|
253
|
+
}
|
|
254
|
+
return gateway;
|
|
255
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OPV (Optimized Process Verification) Logic
|
|
3
|
+
*
|
|
4
|
+
* Ported and adapted for TokenTalos Engine.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export const ReasoningStatus = {
|
|
8
|
+
ON_TRACK: "on_track",
|
|
9
|
+
UNCERTAIN: "uncertain",
|
|
10
|
+
FAILED: "failed",
|
|
11
|
+
LOOPING: "looping",
|
|
12
|
+
COMPLETED: "completed"
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export class OPVService {
|
|
16
|
+
constructor(config, llmGateway) {
|
|
17
|
+
this.config = config;
|
|
18
|
+
this.gateway = llmGateway;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async verifyReasoning(params) {
|
|
22
|
+
const { thinking_sample, task_description, previous_status } = params;
|
|
23
|
+
|
|
24
|
+
const verificationPrompt = this._buildVerificationPrompt(
|
|
25
|
+
thinking_sample,
|
|
26
|
+
task_description,
|
|
27
|
+
previous_status
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
const provider = this.config.llmProvider || 'gemini';
|
|
31
|
+
const model = this.config.defaultModel || 'gemini-3-flash-preview';
|
|
32
|
+
|
|
33
|
+
// Use default analysis model
|
|
34
|
+
const result = await this.gateway.execute(
|
|
35
|
+
provider,
|
|
36
|
+
model,
|
|
37
|
+
[{ role: 'user', content: verificationPrompt }]
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
return this._parseVerificationResponse(result.content, thinking_sample.length);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_buildVerificationPrompt(thinking, task, previousStatus) {
|
|
44
|
+
return `You are a reasoning verification system. Your job is to analyze the reasoning process of another AI model and determine if it's on the right track.
|
|
45
|
+
|
|
46
|
+
**Task the model is trying to solve:**
|
|
47
|
+
${task}
|
|
48
|
+
|
|
49
|
+
**Current reasoning (thinking tokens):**
|
|
50
|
+
${thinking}
|
|
51
|
+
|
|
52
|
+
**Previous verification status:** ${previousStatus || "None (first check)"}
|
|
53
|
+
|
|
54
|
+
**Your task:**
|
|
55
|
+
Analyze the reasoning and determine its status. Choose ONE of:
|
|
56
|
+
1. ON_TRACK - Reasoning is progressing correctly. NOTE: Internalizing safety rules or restating constraints at the start is a POSITIVE sign of compliance and should be marked ON_TRACK.
|
|
57
|
+
2. UNCERTAIN - Cannot determine yet, need more reasoning tokens.
|
|
58
|
+
3. FAILED - Reasoning contains logical errors, violates safety rules, or takes a wrong approach.
|
|
59
|
+
4. LOOPING - Reasoning is repeating itself without progress.
|
|
60
|
+
5. COMPLETED - Reasoning has successfully reached a conclusion.
|
|
61
|
+
|
|
62
|
+
**Respond in this EXACT format:**
|
|
63
|
+
STATUS: [one of: ON_TRACK, UNCERTAIN, FAILED, LOOPING, COMPLETED]
|
|
64
|
+
CONFIDENCE: [0.0 to 1.0]
|
|
65
|
+
SHOULD_CONTINUE: [yes or no]
|
|
66
|
+
REASONING: [brief explanation of your assessment]
|
|
67
|
+
|
|
68
|
+
Be critical and err on the side of FAILED/LOOPING if you see signs of trouble.`;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
_parseVerificationResponse(response, charCount) {
|
|
72
|
+
const lines = response.strip ? response.strip().split("\n") : response.trim().split("\n");
|
|
73
|
+
|
|
74
|
+
let status = ReasoningStatus.UNCERTAIN;
|
|
75
|
+
let confidence = 0.5;
|
|
76
|
+
let shouldContinue = true;
|
|
77
|
+
let reasoning = "Unable to parse response";
|
|
78
|
+
|
|
79
|
+
for (let line of lines) {
|
|
80
|
+
line = line.trim();
|
|
81
|
+
if (line.startsWith("STATUS:")) status = line.split("STATUS:")[1].trim().toLowerCase();
|
|
82
|
+
else if (line.startsWith("CONFIDENCE:")) confidence = parseFloat(line.split("CONFIDENCE:")[1].trim()) || 0.5;
|
|
83
|
+
else if (line.startsWith("SHOULD_CONTINUE:")) shouldContinue = line.split("SHOULD_CONTINUE:")[1].trim().toLowerCase() === 'yes';
|
|
84
|
+
else if (line.startsWith("REASONING:")) reasoning = line.split("REASONING:")[1].trim();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { status, confidence, should_continue: shouldContinue, char_count: charCount, reasoning };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
shouldKillRequest(result, threshold = 0.7) {
|
|
91
|
+
if (result.status === ReasoningStatus.FAILED && result.confidence > threshold) return true;
|
|
92
|
+
if (result.status === ReasoningStatus.LOOPING) return true;
|
|
93
|
+
if (result.status === ReasoningStatus.COMPLETED) return true;
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
}
|