@meller/tokentalos 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,320 @@
1
+ import { initDb, getDb } from './db.js';
2
+ import { processPromptParts } from './processor.js';
3
+ import { TokenTalosPrompt } from './parameterizer.js';
4
+ import { getCostCalculator } from './pricing.js';
5
+ import { getLLMGateway } from './llm_clients.js';
6
+ import { PromptCache } from './cache.js';
7
+ import { OPVService } from './opv.js';
8
+ import { runHeuristicAnalysis } from './analyzer.js';
9
+ import { v4 as uuidv4 } from 'uuid';
10
+
11
+ export class TokenTalosEngine {
12
+ constructor(config = {}) {
13
+ this.config = {
14
+ formattingFeatures: ['pii', 'neutralize'],
15
+ intelligenceFeatures: ['cache', 'explain'],
16
+ securityFeatures: ['injection', 'secrets'],
17
+ securityAction: 'warn',
18
+ piiAction: 'mask',
19
+ ...config
20
+ };
21
+ this.managedMode = this.config.managedMode || false;
22
+ this.orgId = this.config.orgId || 'default_org';
23
+ this.projectId = this.config.projectId || 'default';
24
+ this.db = null;
25
+ this.cache = null;
26
+ this.opv = null;
27
+ this.initialized = false;
28
+ }
29
+
30
+ async init() {
31
+ if (this.initialized) return;
32
+
33
+ // Default to SQLite if not specified
34
+ const dbConfig = {
35
+ databaseType: this.config.databaseType || 'sqlite',
36
+ sqlitePath: this.config.sqlitePath || ':memory:',
37
+ ...this.config
38
+ };
39
+
40
+ this.db = await initDb(dbConfig);
41
+
42
+ // Create default org if it doesn't exist
43
+ await this.ensureDefaultOrg();
44
+
45
+ this.cache = new PromptCache(getDb());
46
+ this.opv = new OPVService(this.config, getLLMGateway(this.config));
47
+ this.initialized = true;
48
+ }
49
+
50
+ async ensureDefaultOrg() {
51
+ const db = getDb();
52
+ try {
53
+ // 1. Ensure Default Organization
54
+ await db.run('INSERT INTO organizations (id, name) VALUES (?, ?) ON CONFLICT DO NOTHING', ['default_org', 'Default Organization']);
55
+
56
+ // 2. Ensure Default User (for local mode)
57
+ await db.run('INSERT INTO users (id, email, name) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['local_user', 'dev@tokentalos.local', 'Local Developer']);
58
+
59
+ // 3. Ensure Membership
60
+ await db.run('INSERT INTO organization_members (org_id, user_id, role) VALUES (?, ?, ?) ON CONFLICT DO NOTHING', ['default_org', 'local_user', 'admin']);
61
+ } catch (e) {
62
+ // Ignore if schema not ready or unique constraint (for non-ID on conflict)
63
+ }
64
+ }
65
+
66
+ async validateApiKey(key) {
67
+ if (!key) return null;
68
+ const db = this.getDb();
69
+ // In a real system, we'd hash the key here before lookup
70
+ const keyRecord = await db.get('SELECT org_id FROM api_keys WHERE key_hash = ?', [key]);
71
+ return keyRecord ? keyRecord.org_id : null;
72
+ }
73
+ async verifyReasoning(params) {
74
+ if (!this.initialized) await this.init();
75
+ return await this.opv.verifyReasoning(params);
76
+ }
77
+
78
+ getDb() {
79
+ if (!this.initialized) throw new Error('TokenTalos Engine not initialized. Call init() first.');
80
+ return getDb();
81
+ }
82
+
83
+ async process(parts) {
84
+ return processPromptParts(parts, this.config);
85
+ }
86
+
87
+ createPrompt(provider, model) {
88
+ const finalProvider = provider || this.config.llmProvider || 'gemini';
89
+ const finalModel = model || this.config.defaultModel || 'gemini-3-flash-preview';
90
+ return new TokenTalosPrompt(finalProvider, finalModel);
91
+ }
92
+
93
+ async execute(params) {
94
+ const { provider, model, parts, endpoint, options = {}, bypassCache = false, orgId, projectId } = params;
95
+ const startTime = Date.now();
96
+
97
+ const finalOrgId = orgId || this.orgId;
98
+ const finalProjectId = projectId || this.projectId;
99
+
100
+ // 1. Process Parts
101
+ const { processedParts, metadata } = await this.process(parts);
102
+
103
+ const finalProvider = provider || this.config.llmProvider || 'gemini';
104
+ const finalModel = model || this.config.defaultModel || 'gemini-3-flash-preview';
105
+
106
+ const prompt = this.createPrompt(finalProvider, finalModel);
107
+
108
+ // Add processed parts to the prompt
109
+ for (const key in processedParts) {
110
+ if (key === 'system') prompt.addSystem(processedParts[key], parts[key]);
111
+ else if (key === 'context') prompt.addContext(processedParts[key], parts[key]);
112
+ else if (key === 'history') prompt.addHistory(processedParts[key], parts[key]);
113
+ else if (key === 'user_query') prompt.addUserQuery(processedParts[key], parts[key]);
114
+ else prompt.add(key, processedParts[key], parts[key]);
115
+ }
116
+
117
+ const fullPromptString = prompt.toString();
118
+ const promptHash = this.cache.generateHash(fullPromptString);
119
+
120
+ // Track compression savings
121
+ const compressionAction = metadata.actions_taken.find(a => a.type === 'compress');
122
+ const savedChars = compressionAction?.saved_chars || 0;
123
+ const savedTokens = Math.ceil(savedChars / 4); // Heuristic
124
+ const calculator = getCostCalculator();
125
+ const [savedCompressionCost] = calculator.calculateCost(finalProvider, finalModel, savedTokens, 0);
126
+
127
+ // 2. Cache Check
128
+ if (!bypassCache && this.config.intelligenceFeatures?.includes('cache')) {
129
+ const cached = await this.cache.get(promptHash);
130
+ if (cached) {
131
+ // Log the cache hit as a usage event with 0 cost but record saved tokens
132
+ const hitId = uuidv4();
133
+ const db = this.getDb();
134
+
135
+ // Calculate what it WOULD have cost
136
+ const [savedInputCost] = calculator.calculateCost(finalProvider, finalModel, prompt.getTrackingData().total_tokens, 0);
137
+ const trackingData = prompt.getTrackingData();
138
+
139
+ await db.run(`
140
+ INSERT INTO usage_data (id, org_id, project_id, type, provider, model, full_prompt, response_content, input_tokens, total_tokens, saved_tokens, saved_cost, input_cost, total_cost, endpoint, latency_ms, timestamp)
141
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
142
+ `, [
143
+ hitId, finalOrgId, finalProjectId, 'cache_hit', finalProvider, finalModel, fullPromptString, cached.response_content, 0, 0,
144
+ prompt.getTrackingData().total_tokens, savedInputCost, 0, 0, endpoint, Date.now() - startTime, trackingData.timestamp
145
+ ]);
146
+
147
+ for (const v of trackingData.variables) {
148
+ await db.run(`
149
+ INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
150
+ VALUES (?, ?, ?, ?, ?, ?, ?)
151
+ `, [hitId, v.name, v.content, v.original_content, v.token_count, v.char_count, v.position]);
152
+ }
153
+
154
+ for (const action of metadata.actions_taken) {
155
+ await db.run(`
156
+ INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
157
+ VALUES (?, ?, ?, ?, ?)
158
+ `, [
159
+ hitId,
160
+ action.target,
161
+ action.type,
162
+ action.method || null,
163
+ JSON.stringify(action)
164
+ ]);
165
+ }
166
+
167
+ return {
168
+ id: cached.usage_id,
169
+ content: cached.response_content,
170
+ cached: true,
171
+ saved_tokens: prompt.getTrackingData().total_tokens,
172
+ saved_cost: savedInputCost,
173
+ metadata: { ...metadata, latency_ms: Date.now() - startTime }
174
+ };
175
+ }
176
+ }
177
+
178
+ // 3. Execution
179
+ const gateway = getLLMGateway(this.config);
180
+ const messages = prompt.toMessages();
181
+ const result = await gateway.execute(finalProvider, finalModel, messages, options);
182
+ const latencyMs = Date.now() - startTime;
183
+
184
+ // 4. Persistence
185
+ const trackingData = prompt.getTrackingData();
186
+ const [inputCost, outputCost] = calculator.calculateCost(
187
+ finalProvider, finalModel, result.input_tokens, result.output_tokens
188
+ );
189
+
190
+ const db = this.getDb();
191
+ const limitExceeded = (result.input_tokens + result.output_tokens) > (this.config.maxTokens || 32000);
192
+
193
+ await db.run(`
194
+ INSERT INTO usage_data (id, org_id, project_id, type, provider, model, full_prompt, response_content, input_tokens, output_tokens, total_tokens, saved_tokens, saved_cost, input_cost, output_cost, total_cost, endpoint, latency_ms, token_limit_exceeded, timestamp)
195
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
196
+ `, [
197
+ trackingData.id, finalOrgId, finalProjectId, 'execution', finalProvider, finalModel, fullPromptString, result.content, result.input_tokens, result.output_tokens,
198
+ result.input_tokens + result.output_tokens, savedTokens, savedCompressionCost, inputCost, outputCost, inputCost + outputCost,
199
+ endpoint, latencyMs, limitExceeded ? 1 : 0, trackingData.timestamp
200
+ ]);
201
+
202
+ for (const v of trackingData.variables) {
203
+ await db.run(`
204
+ INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
205
+ VALUES (?, ?, ?, ?, ?, ?, ?)
206
+ `, [trackingData.id, v.name, v.content, v.original_content, v.token_count, v.char_count, v.position]);
207
+ }
208
+
209
+ for (const action of metadata.actions_taken) {
210
+ await db.run(`
211
+ INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
212
+ VALUES (?, ?, ?, ?, ?)
213
+ `, [
214
+ trackingData.id,
215
+ action.target,
216
+ action.type,
217
+ action.method || null,
218
+ JSON.stringify(action)
219
+ ]);
220
+ }
221
+
222
+ // Persist Security Alerts
223
+ for (const finding of (metadata.security_findings || [])) {
224
+ await db.run(`
225
+ INSERT INTO security_alerts (usage_id, type, description, severity, action_taken)
226
+ VALUES (?, ?, ?, ?, ?)
227
+ `, [
228
+ trackingData.id,
229
+ finding.type,
230
+ finding.description,
231
+ finding.severity,
232
+ config.securityAction || 'warn'
233
+ ]);
234
+ }
235
+
236
+ // 5. Heuristic Analysis
237
+ const analysis = runHeuristicAnalysis({
238
+ total_tokens: result.input_tokens + result.output_tokens,
239
+ input_tokens: result.input_tokens,
240
+ output_tokens: result.output_tokens,
241
+ total_cost: inputCost + outputCost,
242
+ provider: finalProvider,
243
+ model: finalModel
244
+ }, trackingData.variables);
245
+ if (analysis) {
246
+ const planId = uuidv4();
247
+ await db.run(`
248
+ INSERT INTO explain_plans (
249
+ id, usage_id, variable_analysis, detected_issues, optimization_suggestions,
250
+ estimated_savings_pct, estimated_savings_usd,
251
+ mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct
252
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
253
+ `, [
254
+ planId,
255
+ trackingData.id,
256
+ JSON.stringify(analysis.variable_analysis),
257
+ JSON.stringify(analysis.detected_issues),
258
+ JSON.stringify(analysis.optimization_suggestions),
259
+ analysis.estimated_savings_pct,
260
+ analysis.estimated_savings_usd,
261
+ analysis.mce_best_alternative_model || null,
262
+ analysis.mce_best_alternative_provider || null,
263
+ analysis.mce_best_alternative_cost || 0,
264
+ analysis.mce_savings_pct || 0
265
+ ]);
266
+ }
267
+
268
+ // Save to Cache
269
+ await this.cache.set(promptHash, result.content, trackingData.id);
270
+
271
+ return {
272
+ id: trackingData.id,
273
+ content: result.content,
274
+ cached: false,
275
+ usage: {
276
+ input_tokens: result.input_tokens,
277
+ output_tokens: result.output_tokens,
278
+ cost_usd: inputCost + outputCost
279
+ },
280
+ metadata: { ...metadata, latency_ms: latencyMs }
281
+ };
282
+ }
283
+
284
+ async ingest(data) {
285
+ const db = this.getDb();
286
+ const usageId = uuidv4();
287
+ const provider = data.provider || this.config.llmProvider || 'gemini';
288
+ const model = data.model || this.config.defaultModel || 'gemini-3-flash-preview';
289
+
290
+ const totalTokens = (data.input_tokens || 0) + (data.output_tokens || 0);
291
+ const calculator = getCostCalculator();
292
+ const [inputCost, outputCost] = calculator.calculateCost(
293
+ provider,
294
+ model,
295
+ data.input_tokens || 0,
296
+ data.output_tokens || 0
297
+ );
298
+
299
+ const totalCost = inputCost + outputCost;
300
+ const limitExceeded = totalTokens > (this.config.maxTokens || 32000);
301
+ const finalProjectId = data.projectId || this.projectId;
302
+ const finalOrgId = data.orgId || this.orgId;
303
+
304
+ await db.run(`
305
+ INSERT INTO usage_data (
306
+ id, org_id, project_id, provider, model, input_tokens, output_tokens, total_tokens,
307
+ input_cost, output_cost, total_cost, endpoint, latency_ms, token_limit_exceeded, timestamp
308
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
309
+ `, [
310
+ usageId, finalOrgId, finalProjectId, provider, model, data.input_tokens || 0, data.output_tokens || 0,
311
+ totalTokens, inputCost, outputCost, totalCost, data.endpoint, data.latency_ms,
312
+ limitExceeded, data.timestamp || new Date().toISOString()
313
+ ]);
314
+
315
+ // Handle variables if present...
316
+ // (We will migrate the full ingestion logic here)
317
+
318
+ return { id: usageId, totalCost };
319
+ }
320
+ }
@@ -0,0 +1,255 @@
1
+ import { GoogleGenerativeAI } from '@google/generative-ai';
2
+ import { VertexAI } from '@google-cloud/vertexai';
3
+ import { GoogleAuth } from 'google-auth-library';
4
+ import Anthropic from '@anthropic-ai/sdk';
5
+ import OpenAI from 'openai';
6
+
7
+ export class LLMGateway {
8
+ constructor(config) {
9
+ this.config = config;
10
+ this.clients = {};
11
+ this.auth = new GoogleAuth();
12
+ }
13
+
14
+ getGemini() {
15
+ if (!process.env.GOOGLE_API_KEY) {
16
+ return null;
17
+ }
18
+ if (!this.clients.gemini) {
19
+ this.clients.gemini = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
20
+ }
21
+ return this.clients.gemini;
22
+ }
23
+
24
+ async getVertex() {
25
+ if (!this.clients.vertex) {
26
+ const location = this.config.location || process.env.GCP_REGION || 'us-central1';
27
+ let project = process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT || this.config.gcpProjectId || this.config.project;
28
+
29
+ if (!project) {
30
+ try {
31
+ // Try to auto-detect project ID from ADC
32
+ project = await this.auth.getProjectId();
33
+ } catch (e) {
34
+ // Ignore auth errors here
35
+ }
36
+ }
37
+
38
+ if (!project) {
39
+ console.warn('Vertex AI requires a Project ID. Set GOOGLE_CLOUD_PROJECT, GCLOUD_PROJECT, or configure it in setup.');
40
+ return null;
41
+ }
42
+
43
+ const pgConfig = {
44
+ project,
45
+ location
46
+ };
47
+
48
+ if (location === 'global') {
49
+ pgConfig.apiEndpoint = 'aiplatform.googleapis.com';
50
+ }
51
+
52
+ this.clients.vertex = new VertexAI(pgConfig);
53
+ }
54
+ return this.clients.vertex;
55
+ }
56
+
57
+ getAnthropic() {
58
+ if (!this.clients.anthropic) {
59
+ this.clients.anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY || '' });
60
+ }
61
+ return this.clients.anthropic;
62
+ }
63
+
64
+ getOpenAI() {
65
+ if (!this.clients.openai) {
66
+ this.clients.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY || '' });
67
+ }
68
+ return this.clients.openai;
69
+ }
70
+
71
+ getDeepSeek() {
72
+ if (!this.clients.deepseek) {
73
+ // DeepSeek is OpenAI-compatible
74
+ this.clients.deepseek = new OpenAI({
75
+ apiKey: process.env.DEEPSEEK_API_KEY || '',
76
+ baseURL: 'https://api.deepseek.com'
77
+ });
78
+ }
79
+ return this.clients.deepseek;
80
+ }
81
+
82
+ async execute(provider, model, messages, options = {}) {
83
+ switch (provider.toLowerCase()) {
84
+ case 'gemini':
85
+ return this.executeGemini(model, messages, options);
86
+ case 'anthropic':
87
+ return this.executeAnthropic(model, messages, options);
88
+ case 'openai':
89
+ return this.executeOpenAI(model, messages, options);
90
+ case 'deepseek':
91
+ return this.executeDeepSeek(model, messages, options);
92
+ default:
93
+ throw new Error(`Unsupported provider: ${provider}`);
94
+ }
95
+ }
96
+
97
+ async executeGemini(modelName, messages, options) {
98
+ try {
99
+ const aiClient = this.getGemini();
100
+ const vertexClient = await this.getVertex();
101
+
102
+ // Prefer Vertex AI if GOOGLE_API_KEY is missing, or if explicitly configured
103
+ if (vertexClient && (!aiClient || this.config.useVertex)) {
104
+ return await this.executeVertex(modelName, messages, options);
105
+ }
106
+
107
+ if (!aiClient) {
108
+ throw new Error('No LLM client available for Gemini. Set GOOGLE_API_KEY or GOOGLE_CLOUD_PROJECT (for Vertex AI).');
109
+ }
110
+ // ... (rest of the function remains the same)
111
+
112
+
113
+ const model = aiClient.getGenerativeModel({ model: modelName });
114
+
115
+ // Convert OpenAI-style messages to Gemini contents
116
+ const systemInstruction = messages.find(m => m.role === 'system')?.content;
117
+ const contents = messages
118
+ .filter(m => m.role !== 'system')
119
+ .map(m => ({
120
+ role: m.role === 'assistant' ? 'model' : 'user',
121
+ parts: [{ text: m.content }]
122
+ }));
123
+
124
+ const result = await model.generateContent({
125
+ contents,
126
+ systemInstruction: systemInstruction ? { parts: [{ text: systemInstruction }] } : undefined,
127
+ generationConfig: {
128
+ maxOutputTokens: options.max_tokens,
129
+ temperature: options.temperature,
130
+ responseMimeType: options.responseMimeType
131
+ }
132
+ });
133
+
134
+ const response = await result.response;
135
+ const usage = response.usageMetadata;
136
+
137
+ return {
138
+ content: response.text(),
139
+ input_tokens: usage.promptTokenCount,
140
+ output_tokens: usage.candidatesTokenCount,
141
+ raw: response
142
+ };
143
+ } catch (err) {
144
+ console.error('[TokenTalos] Gemini execution failed:', err);
145
+ throw err;
146
+ }
147
+ }
148
+
149
+ async executeVertex(modelName, messages, options) {
150
+ try {
151
+ const client = await this.getVertex();
152
+ const model = client.getGenerativeModel({ model: modelName });
153
+
154
+ const systemInstruction = messages.find(m => m.role === 'system')?.content;
155
+ const contents = messages
156
+ .filter(m => m.role !== 'system')
157
+ .map(m => ({
158
+ role: m.role === 'assistant' ? 'model' : 'user',
159
+ parts: [{ text: m.content }]
160
+ }));
161
+
162
+ const result = await model.generateContent({
163
+ contents,
164
+ systemInstruction: systemInstruction ? { parts: [{ text: systemInstruction }] } : undefined,
165
+ generationConfig: {
166
+ maxOutputTokens: options.max_tokens,
167
+ temperature: options.temperature,
168
+ responseMimeType: options.responseMimeType
169
+ }
170
+ });
171
+
172
+ const response = (await result.response);
173
+
174
+ // Vertex response structure is slightly different for usage
175
+ // It's under usageMetadata or usage_metadata
176
+ const usage = response.usageMetadata || {};
177
+
178
+ // Handle candidates properly
179
+ const content = response.candidates?.[0]?.content?.parts?.[0]?.text || response.text?.() || '';
180
+
181
+ return {
182
+ content,
183
+ input_tokens: usage.promptTokenCount || 0,
184
+ output_tokens: usage.candidatesTokenCount || 0,
185
+ raw: response
186
+ };
187
+ } catch (err) {
188
+ console.error('[TokenTalos] Vertex execution failed:', err);
189
+ throw err;
190
+ }
191
+ }
192
+
193
+ async executeAnthropic(model, messages, options) {
194
+ const client = this.getAnthropic();
195
+ const system = messages.find(m => m.role === 'system')?.content;
196
+ const filteredMessages = messages.filter(m => m.role !== 'system');
197
+
198
+ const response = await client.messages.create({
199
+ model,
200
+ messages: filteredMessages,
201
+ system,
202
+ max_tokens: options.max_tokens || 1024,
203
+ temperature: options.temperature,
204
+ });
205
+
206
+ return {
207
+ content: response.content[0].text,
208
+ input_tokens: response.usage.input_tokens,
209
+ output_tokens: response.usage.output_tokens,
210
+ raw: response
211
+ };
212
+ }
213
+
214
+ async executeOpenAI(model, messages, options) {
215
+ const client = this.getOpenAI();
216
+ const response = await client.chat.completions.create({
217
+ model,
218
+ messages,
219
+ max_tokens: options.max_tokens,
220
+ temperature: options.temperature,
221
+ });
222
+
223
+ return {
224
+ content: response.choices[0].message.content,
225
+ input_tokens: response.usage.prompt_tokens,
226
+ output_tokens: response.usage.completion_tokens,
227
+ raw: response
228
+ };
229
+ }
230
+
231
+ async executeDeepSeek(model, messages, options) {
232
+ const client = this.getDeepSeek();
233
+ const response = await client.chat.completions.create({
234
+ model,
235
+ messages,
236
+ max_tokens: options.max_tokens,
237
+ temperature: options.temperature,
238
+ });
239
+
240
+ return {
241
+ content: response.choices[0].message.content,
242
+ input_tokens: response.usage.prompt_tokens,
243
+ output_tokens: response.usage.completion_tokens,
244
+ raw: response
245
+ };
246
+ }
247
+ }
248
+
249
+ let gateway;
250
+ export function getLLMGateway(config) {
251
+ if (!gateway) {
252
+ gateway = new LLMGateway(config);
253
+ }
254
+ return gateway;
255
+ }
@@ -0,0 +1,96 @@
1
+ /**
2
+ * OPV (Optimized Process Verification) Logic
3
+ *
4
+ * Ported and adapted for TokenTalos Engine.
5
+ */
6
+
7
+ export const ReasoningStatus = {
8
+ ON_TRACK: "on_track",
9
+ UNCERTAIN: "uncertain",
10
+ FAILED: "failed",
11
+ LOOPING: "looping",
12
+ COMPLETED: "completed"
13
+ };
14
+
15
+ export class OPVService {
16
+ constructor(config, llmGateway) {
17
+ this.config = config;
18
+ this.gateway = llmGateway;
19
+ }
20
+
21
+ async verifyReasoning(params) {
22
+ const { thinking_sample, task_description, previous_status } = params;
23
+
24
+ const verificationPrompt = this._buildVerificationPrompt(
25
+ thinking_sample,
26
+ task_description,
27
+ previous_status
28
+ );
29
+
30
+ const provider = this.config.llmProvider || 'gemini';
31
+ const model = this.config.defaultModel || 'gemini-3-flash-preview';
32
+
33
+ // Use default analysis model
34
+ const result = await this.gateway.execute(
35
+ provider,
36
+ model,
37
+ [{ role: 'user', content: verificationPrompt }]
38
+ );
39
+
40
+ return this._parseVerificationResponse(result.content, thinking_sample.length);
41
+ }
42
+
43
+ _buildVerificationPrompt(thinking, task, previousStatus) {
44
+ return `You are a reasoning verification system. Your job is to analyze the reasoning process of another AI model and determine if it's on the right track.
45
+
46
+ **Task the model is trying to solve:**
47
+ ${task}
48
+
49
+ **Current reasoning (thinking tokens):**
50
+ ${thinking}
51
+
52
+ **Previous verification status:** ${previousStatus || "None (first check)"}
53
+
54
+ **Your task:**
55
+ Analyze the reasoning and determine its status. Choose ONE of:
56
+ 1. ON_TRACK - Reasoning is progressing correctly. NOTE: Internalizing safety rules or restating constraints at the start is a POSITIVE sign of compliance and should be marked ON_TRACK.
57
+ 2. UNCERTAIN - Cannot determine yet, need more reasoning tokens.
58
+ 3. FAILED - Reasoning contains logical errors, violates safety rules, or takes a wrong approach.
59
+ 4. LOOPING - Reasoning is repeating itself without progress.
60
+ 5. COMPLETED - Reasoning has successfully reached a conclusion.
61
+
62
+ **Respond in this EXACT format:**
63
+ STATUS: [one of: ON_TRACK, UNCERTAIN, FAILED, LOOPING, COMPLETED]
64
+ CONFIDENCE: [0.0 to 1.0]
65
+ SHOULD_CONTINUE: [yes or no]
66
+ REASONING: [brief explanation of your assessment]
67
+
68
+ Be critical and err on the side of FAILED/LOOPING if you see signs of trouble.`;
69
+ }
70
+
71
+ _parseVerificationResponse(response, charCount) {
72
+ const lines = response.strip ? response.strip().split("\n") : response.trim().split("\n");
73
+
74
+ let status = ReasoningStatus.UNCERTAIN;
75
+ let confidence = 0.5;
76
+ let shouldContinue = true;
77
+ let reasoning = "Unable to parse response";
78
+
79
+ for (let line of lines) {
80
+ line = line.trim();
81
+ if (line.startsWith("STATUS:")) status = line.split("STATUS:")[1].trim().toLowerCase();
82
+ else if (line.startsWith("CONFIDENCE:")) confidence = parseFloat(line.split("CONFIDENCE:")[1].trim()) || 0.5;
83
+ else if (line.startsWith("SHOULD_CONTINUE:")) shouldContinue = line.split("SHOULD_CONTINUE:")[1].trim().toLowerCase() === 'yes';
84
+ else if (line.startsWith("REASONING:")) reasoning = line.split("REASONING:")[1].trim();
85
+ }
86
+
87
+ return { status, confidence, should_continue: shouldContinue, char_count: charCount, reasoning };
88
+ }
89
+
90
+ shouldKillRequest(result, threshold = 0.7) {
91
+ if (result.status === ReasoningStatus.FAILED && result.confidence > threshold) return true;
92
+ if (result.status === ReasoningStatus.LOOPING) return true;
93
+ if (result.status === ReasoningStatus.COMPLETED) return true;
94
+ return false;
95
+ }
96
+ }