@meller/tokentalos 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +121 -0
- package/api/api/v1/analytics.js +153 -0
- package/api/api/v1/opv.js +36 -0
- package/api/api/v1/usage.js +318 -0
- package/api/index.js +111 -0
- package/api/middleware/auth.js +45 -0
- package/api/package.json +38 -0
- package/bin/tokentalos.js +221 -0
- package/index.js +151 -0
- package/lib/engine/ai_analyzer.js +66 -0
- package/lib/engine/analyzer.js +117 -0
- package/lib/engine/cache.js +30 -0
- package/lib/engine/db.js +307 -0
- package/lib/engine/index.js +320 -0
- package/lib/engine/llm_clients.js +255 -0
- package/lib/engine/opv.js +96 -0
- package/lib/engine/parameterizer.js +68 -0
- package/lib/engine/pii_detector.js +73 -0
- package/lib/engine/pricing.js +106 -0
- package/lib/engine/processor.js +157 -0
- package/lib/engine/security.js +101 -0
- package/lib/engine/tokenizers.js +40 -0
- package/package.json +63 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Asaf Meller
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# TokenTalos 🛡️
|
|
2
|
+
|
|
3
|
+
**The "ORM for LLMs"** — A library-first LLM Gateway and Proxy that empowers developers to build modular, tracked, and cost-optimized prompts with built-in safety filtering and reasoning verification.
|
|
4
|
+
|
|
5
|
+
Think of TokenTalos as an **Object-Relational Mapper (ORM)**, but for your Large Language Model interactions. Instead of sending raw, expensive, and potentially insecure strings to providers, you define **parameterized prompt parts** that TokenTalos binds, compresses, and secures before execution.
|
|
6
|
+
|
|
7
|
+
## 🚀 Key Features
|
|
8
|
+
|
|
9
|
+
### 1. Active Guard (Security & Safety)
|
|
10
|
+
* **Prompt Injection Scanning (LLM01):** Heuristic detection of jailbreaks, "Ignore instructions", and system overrides.
|
|
11
|
+
* **Secret Detection (LLM06):** Automatic scanning for API keys, AWS secrets, and high-entropy strings.
|
|
12
|
+
* **PII Redaction:** Automatic masking or rejection of sensitive data (Emails, Keys) before logging or execution.
|
|
13
|
+
* **Input Neutralization:** Automatic XML-wrapping of untrusted data with injected security instructions for the LLM.
|
|
14
|
+
|
|
15
|
+
### 2. Efficiency & Cost Optimization
|
|
16
|
+
* **Semantic Caching:** Skip redundant LLM calls for identical prompt constructions.
|
|
17
|
+
* **Lossless Compression:** Automatic minification of whitespace and JSON blocks within prompts.
|
|
18
|
+
* **Cost Analysis:** Detailed token attribution and real-time provider cost comparisons.
|
|
19
|
+
* **Optimization Recommendations:** Heuristic-based suggestions for model switching and prompt pruning to reduce token weight.
|
|
20
|
+
* **Tokenizer Discrepancy Detection:** Advanced analysis of local vs. provider token counting.
|
|
21
|
+
|
|
22
|
+
### 3. Intelligence & Observability
|
|
23
|
+
* **Streaming OPV (Optimized Process Verification):** Real-time analysis of "thinking tokens" to verify if reasoning is on-track or looping.
|
|
24
|
+
* **Variable Attribution:** Granular tracking of token weight per prompt part (e.g., system vs context vs query).
|
|
25
|
+
* **Engine Insights:** Heuristic-based recommendations for semantic summarization and prompt pruning.
|
|
26
|
+
|
|
27
|
+
## 🚀 Quick Start (SDK)
|
|
28
|
+
|
|
29
|
+
TokenTalos is primarily a developer tool. You can use it as a standalone library (direct DB access) or as a client to a remote Gateway.
|
|
30
|
+
|
|
31
|
+
### Standalone Mode (Library-First)
|
|
32
|
+
Ideal for local development or Node.js backends where you want Zero-Install tracking.
|
|
33
|
+
|
|
34
|
+
```javascript
|
|
35
|
+
import TokenTalos from 'tokentalos';
|
|
36
|
+
|
|
37
|
+
const tt = new TokenTalos({
|
|
38
|
+
mode: 'standalone',
|
|
39
|
+
projectId: 'my-project-id',
|
|
40
|
+
config: {
|
|
41
|
+
// Database and Persistence
|
|
42
|
+
databaseType: 'sqlite',
|
|
43
|
+
sqlitePath: './tokentalos.db',
|
|
44
|
+
|
|
45
|
+
// Regional and Provider Settings
|
|
46
|
+
location: 'us-central1', // GCP/Vertex Region
|
|
47
|
+
|
|
48
|
+
// Feature and Policy Configuration
|
|
49
|
+
securityFeatures: ['injection', 'secrets'],
|
|
50
|
+
formattingFeatures: ['pii', 'neutralize'],
|
|
51
|
+
intelligenceFeatures: ['cache', 'explain'],
|
|
52
|
+
piiAction: 'mask' // Automatic PII masking
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
await tt.init();
|
|
57
|
+
|
|
58
|
+
const result = await tt.execute({
|
|
59
|
+
provider: 'gemini',
|
|
60
|
+
model: 'gemini-3-flash-preview',
|
|
61
|
+
parts: {
|
|
62
|
+
system: 'You are a technical writer.',
|
|
63
|
+
user_query: 'Explain TokenTalos in 20 words.'
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
console.log(result.content);
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
> **Note:** TokenTalos includes native support for Google Gemini. To use other providers (OpenAI, Anthropic, etc.), ensure you have their respective API keys configured and dependencies installed.
|
|
71
|
+
|
|
72
|
+
### Proxy Mode (Gateway)
|
|
73
|
+
Ideal for production environments or non-Node.js apps (PHP, Python, Go) connecting to a central TokenTalos server.
|
|
74
|
+
|
|
75
|
+
```javascript
|
|
76
|
+
const tt = new TokenTalos({
|
|
77
|
+
mode: 'proxy',
|
|
78
|
+
apiUrl: 'https://your-gateway.com/api/v1',
|
|
79
|
+
apiKey: 'your-secret-key'
|
|
80
|
+
});
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### 🌍 Cross-Language Support
|
|
84
|
+
TokenTalos is designed as a language-agnostic Gateway. You can use standard HTTP clients in any language (PHP, Python, Go, etc.) to communicate with the TokenTalos Proxy.
|
|
85
|
+
|
|
86
|
+
Check out the [examples/](./examples) directory for a **PHP cURL** example.
|
|
87
|
+
|
|
88
|
+
## 🛠️ Installation
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# As a project dependency
|
|
92
|
+
npm install tokentalos
|
|
93
|
+
|
|
94
|
+
# For CLI and Dashboard access
|
|
95
|
+
npx tokentalos setup
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## 🏗️ Architecture
|
|
99
|
+
|
|
100
|
+
## 💻 CLI Usage
|
|
101
|
+
|
|
102
|
+
| Command | Description |
|
|
103
|
+
| :--- | :--- |
|
|
104
|
+
| `tokentalos setup` | Run the interactive configuration wizard. |
|
|
105
|
+
| `tokentalos start` | Start the full service (Collector + Dashboard). |
|
|
106
|
+
| `tokentalos start collector` | Start only the API ingestion service. |
|
|
107
|
+
| `tokentalos start dashboard` | Start only the visual interface. |
|
|
108
|
+
| `tokentalos stop` | Stop all services. |
|
|
109
|
+
| `tokentalos stats` | Show aggregate token and cost statistics in the terminal. |
|
|
110
|
+
| `tokentalos list` | Display a table of recent prompt logs. |
|
|
111
|
+
| `tokentalos export` | Export usage logs to JSONL or LangSmith formats. |
|
|
112
|
+
|
|
113
|
+
## 🧩 Special Variables
|
|
114
|
+
|
|
115
|
+
TokenTalos recognizes specific variable names to enable enhanced features:
|
|
116
|
+
* `safety_guardrails`: Used as ground-truth context for OPV verification.
|
|
117
|
+
* `thinking` / `reasoning`: Targeted for chain-of-thought analysis.
|
|
118
|
+
* `system` / `context` / `history`: Recognized for specialized tracking and bloating analysis.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
*For a full list of variable behaviors, see [VARIABLES.md](./VARIABLES.md).*
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import express from 'express';
|
|
2
|
+
import { getDb } from '../../../lib/engine/db.js';
|
|
3
|
+
import { authMiddleware } from '../../middleware/auth.js';
|
|
4
|
+
import { getCostCalculator } from '../../../lib/engine/pricing.js';
|
|
5
|
+
|
|
6
|
+
const router = express.Router();
|
|
7
|
+
|
|
8
|
+
// GET /api/v1/analytics/stats
|
|
9
|
+
router.get('/stats', authMiddleware, async (req, res) => {
|
|
10
|
+
const db = getDb();
|
|
11
|
+
const projectId = req.query.projectId;
|
|
12
|
+
const orgId = req.orgId;
|
|
13
|
+
|
|
14
|
+
try {
|
|
15
|
+
let whereClause = ' WHERE org_id = ?';
|
|
16
|
+
let params = [orgId];
|
|
17
|
+
|
|
18
|
+
if (projectId) {
|
|
19
|
+
whereClause += ' AND project_id = ?';
|
|
20
|
+
params.push(projectId);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const totals = await db.get(`
|
|
24
|
+
SELECT
|
|
25
|
+
SUM(total_tokens) as total_tokens,
|
|
26
|
+
SUM(total_cost) as total_cost,
|
|
27
|
+
SUM(CASE WHEN type = 'cache_hit' THEN saved_tokens ELSE 0 END) as cache_saved_tokens,
|
|
28
|
+
SUM(CASE WHEN type = 'execution' THEN saved_tokens ELSE 0 END) as compression_saved_tokens,
|
|
29
|
+
SUM(saved_cost) as total_saved_cost,
|
|
30
|
+
COUNT(id) as total_requests,
|
|
31
|
+
COUNT(CASE WHEN type = 'cache_hit' THEN 1 END) as total_cache_hits
|
|
32
|
+
FROM usage_data
|
|
33
|
+
${whereClause}
|
|
34
|
+
`, params);
|
|
35
|
+
|
|
36
|
+
// Calculate Potential Savings (Optimization Opportunity)
|
|
37
|
+
const recentExecutions = await db.all(`
|
|
38
|
+
SELECT provider, model, input_tokens, output_tokens, total_cost
|
|
39
|
+
FROM usage_data
|
|
40
|
+
${whereClause} AND type = 'execution'
|
|
41
|
+
ORDER BY timestamp DESC LIMIT 100
|
|
42
|
+
`, params);
|
|
43
|
+
|
|
44
|
+
const calculator = getCostCalculator();
|
|
45
|
+
let totalPotentialSavings = 0;
|
|
46
|
+
|
|
47
|
+
for (const record of recentExecutions) {
|
|
48
|
+
const bestAlt = calculator.getBestAlternative(record.provider, record.model, record.input_tokens, record.output_tokens);
|
|
49
|
+
if (bestAlt && bestAlt.cost < record.total_cost) {
|
|
50
|
+
totalPotentialSavings += (record.total_cost - bestAlt.cost);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const byProvider = await db.all(`
|
|
55
|
+
SELECT provider, SUM(total_tokens) as total_tokens, SUM(total_cost) as total_cost
|
|
56
|
+
FROM usage_data
|
|
57
|
+
${whereClause}
|
|
58
|
+
GROUP BY provider
|
|
59
|
+
`, params);
|
|
60
|
+
|
|
61
|
+
const byModel = await db.all(`
|
|
62
|
+
SELECT model, SUM(total_tokens) as total_tokens, SUM(total_cost) as total_cost
|
|
63
|
+
FROM usage_data
|
|
64
|
+
${whereClause}
|
|
65
|
+
GROUP BY model
|
|
66
|
+
`, params);
|
|
67
|
+
|
|
68
|
+
const piiHits = await db.get(`
|
|
69
|
+
SELECT COUNT(*) as count FROM pii_hits
|
|
70
|
+
JOIN usage_data ON pii_hits.usage_id = usage_data.id
|
|
71
|
+
${whereClause}
|
|
72
|
+
`, params);
|
|
73
|
+
|
|
74
|
+
const securityAlerts = await db.get(`
|
|
75
|
+
SELECT COUNT(*) as count FROM security_alerts
|
|
76
|
+
JOIN usage_data ON security_alerts.usage_id = usage_data.id
|
|
77
|
+
${whereClause}
|
|
78
|
+
`, params);
|
|
79
|
+
|
|
80
|
+
res.json({
|
|
81
|
+
total_tokens: totals.total_tokens || 0,
|
|
82
|
+
total_cost: totals.total_cost || 0,
|
|
83
|
+
total_saved_tokens: (totals.cache_saved_tokens || 0) + (totals.compression_saved_tokens || 0),
|
|
84
|
+
cache_saved_tokens: totals.cache_saved_tokens || 0,
|
|
85
|
+
compression_saved_tokens: totals.compression_saved_tokens || 0,
|
|
86
|
+
total_cache_hits: totals.total_cache_hits || 0,
|
|
87
|
+
total_saved_cost: totals.total_saved_cost || 0,
|
|
88
|
+
total_requests: totals.total_requests || 0,
|
|
89
|
+
pii_hits: piiHits.count || 0,
|
|
90
|
+
security_alerts: securityAlerts.count || 0,
|
|
91
|
+
potential_savings: totalPotentialSavings,
|
|
92
|
+
by_provider: byProvider,
|
|
93
|
+
by_model: byModel
|
|
94
|
+
});
|
|
95
|
+
} catch (err) {
|
|
96
|
+
console.error('Stats error:', err);
|
|
97
|
+
res.status(500).json({ error: 'Failed to fetch statistics' });
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// GET /api/v1/analytics/heatmap
|
|
102
|
+
router.get('/heatmap', authMiddleware, async (req, res) => {
|
|
103
|
+
const db = getDb();
|
|
104
|
+
const days = req.query.days || 30;
|
|
105
|
+
const projectId = req.query.projectId;
|
|
106
|
+
const orgId = req.orgId;
|
|
107
|
+
|
|
108
|
+
try {
|
|
109
|
+
const timestampFilter = db.type === 'sqlite'
|
|
110
|
+
? `usage_data.timestamp >= datetime('now', '-${days} days')`
|
|
111
|
+
: `usage_data.timestamp >= CURRENT_TIMESTAMP - INTERVAL '${days} days'`;
|
|
112
|
+
|
|
113
|
+
let filterClause = ` WHERE ${timestampFilter} AND usage_data.org_id = ?`;
|
|
114
|
+
let params = [orgId];
|
|
115
|
+
|
|
116
|
+
if (projectId) {
|
|
117
|
+
filterClause += ` AND usage_data.project_id = ?`;
|
|
118
|
+
params.push(projectId);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const heatmap = await db.all(`
|
|
122
|
+
SELECT
|
|
123
|
+
name as variable_name,
|
|
124
|
+
SUM(token_count) as total_tokens,
|
|
125
|
+
COUNT(usage_id) as request_count
|
|
126
|
+
FROM prompt_variables
|
|
127
|
+
JOIN usage_data ON prompt_variables.usage_id = usage_data.id
|
|
128
|
+
${filterClause}
|
|
129
|
+
AND name NOT IN ('system', 'context')
|
|
130
|
+
GROUP BY name
|
|
131
|
+
ORDER BY total_tokens DESC
|
|
132
|
+
`, params);
|
|
133
|
+
|
|
134
|
+
res.json({ heatmap });
|
|
135
|
+
} catch (err) {
|
|
136
|
+
console.error('Heatmap error:', err);
|
|
137
|
+
res.status(500).json({ error: 'Failed to fetch heatmap data' });
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// GET /api/v1/analytics/projects
|
|
142
|
+
router.get('/projects', authMiddleware, async (req, res) => {
|
|
143
|
+
const db = getDb();
|
|
144
|
+
const orgId = req.orgId;
|
|
145
|
+
try {
|
|
146
|
+
const projects = await db.all('SELECT DISTINCT project_id FROM usage_data WHERE org_id = ?', [orgId]);
|
|
147
|
+
res.json(projects.map(p => p.project_id));
|
|
148
|
+
} catch (err) {
|
|
149
|
+
res.status(500).json({ error: 'Failed to fetch projects' });
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
export default router;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import express from 'express';
|
|
2
|
+
import { TokenTalosEngine } from '../../../lib/engine/index.js';
|
|
3
|
+
|
|
4
|
+
const router = express.Router();
|
|
5
|
+
let config = {};
|
|
6
|
+
|
|
7
|
+
export function setConfig(c) {
|
|
8
|
+
config = c;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
// POST /api/v1/opv/heartbeat - Real-time reasoning analysis
|
|
12
|
+
router.post('/heartbeat', async (req, res) => {
|
|
13
|
+
const { thinking_sample, task_description, previous_status } = req.body;
|
|
14
|
+
|
|
15
|
+
if (!thinking_sample || !task_description) {
|
|
16
|
+
return res.status(400).json({ error: 'thinking_sample and task_description are required' });
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
const engine = new TokenTalosEngine(config);
|
|
21
|
+
await engine.init();
|
|
22
|
+
|
|
23
|
+
const result = await engine.verifyReasoning({
|
|
24
|
+
thinking_sample,
|
|
25
|
+
task_description,
|
|
26
|
+
previous_status
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
res.json(result);
|
|
30
|
+
} catch (err) {
|
|
31
|
+
console.error('OPV heartbeat error:', err);
|
|
32
|
+
res.status(500).json({ error: 'Failed to verify reasoning', details: err.message });
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
export default router;
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
import express from 'express';
|
|
2
|
+
import { getDb } from '../../../lib/engine/db.js';
|
|
3
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
4
|
+
import { TokenTalosPrompt } from '../../../lib/engine/parameterizer.js';
|
|
5
|
+
import { getCostCalculator } from '../../../lib/engine/pricing.js';
|
|
6
|
+
import { processPromptParts } from '../../../lib/engine/processor.js';
|
|
7
|
+
import { detectPII } from '../../../lib/engine/pii_detector.js';
|
|
8
|
+
import { getLLMGateway } from '../../../lib/engine/llm_clients.js';
|
|
9
|
+
import { TokenTalosEngine } from '../../../lib/engine/index.js';
|
|
10
|
+
import { authMiddleware } from '../../middleware/auth.js';
|
|
11
|
+
|
|
12
|
+
const router = express.Router();
|
|
13
|
+
let config = {};
|
|
14
|
+
|
|
15
|
+
export function setConfig(c) {
|
|
16
|
+
config = c;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// POST /api/v1/usage/ingest - Ingest usage data from external sources
|
|
20
|
+
router.post('/ingest', authMiddleware, async (req, res) => {
|
|
21
|
+
const db = getDb();
|
|
22
|
+
const data = req.body;
|
|
23
|
+
const usageId = uuidv4();
|
|
24
|
+
const projectId = data.projectId || 'default';
|
|
25
|
+
const orgId = req.orgId;
|
|
26
|
+
|
|
27
|
+
const provider = data.provider || config.llmProvider || 'gemini';
|
|
28
|
+
const model = data.model || config.defaultModel || 'gemini-3-flash-preview';
|
|
29
|
+
|
|
30
|
+
const totalTokens = (data.input_tokens || 0) + (data.output_tokens || 0);
|
|
31
|
+
const calculator = getCostCalculator();
|
|
32
|
+
const [inputCost, outputCost] = calculator.calculateCost(
|
|
33
|
+
provider,
|
|
34
|
+
model,
|
|
35
|
+
data.input_tokens || 0,
|
|
36
|
+
data.output_tokens || 0
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
const totalCost = inputCost + outputCost;
|
|
40
|
+
const limitExceeded = totalTokens > (config.maxTokens || 32000);
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
await db.run(`
|
|
44
|
+
INSERT INTO usage_data (
|
|
45
|
+
id, org_id, project_id, type, provider, model, full_prompt, response_content, input_tokens, output_tokens, total_tokens,
|
|
46
|
+
input_cost, output_cost, total_cost, endpoint, latency_ms, token_limit_exceeded, timestamp
|
|
47
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
48
|
+
`, [
|
|
49
|
+
usageId, orgId, projectId, 'ingested', provider, model, data.full_prompt || null, data.response_content || null,
|
|
50
|
+
data.input_tokens || 0, data.output_tokens || 0,
|
|
51
|
+
totalTokens, inputCost, outputCost, totalCost, data.endpoint, data.latency_ms,
|
|
52
|
+
limitExceeded ? 1 : 0, data.timestamp || new Date().toISOString()
|
|
53
|
+
]);
|
|
54
|
+
|
|
55
|
+
if (data.variables && Array.isArray(data.variables)) {
|
|
56
|
+
for (const v of data.variables) {
|
|
57
|
+
await db.run(`
|
|
58
|
+
INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
|
|
59
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
60
|
+
`, [
|
|
61
|
+
usageId,
|
|
62
|
+
v.name,
|
|
63
|
+
v.content || '',
|
|
64
|
+
v.original_content || v.content || '',
|
|
65
|
+
v.token_count || 0,
|
|
66
|
+
v.char_count || 0,
|
|
67
|
+
v.position || 0
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
if (config.formattingFeatures && config.formattingFeatures.includes('pii')) {
|
|
71
|
+
const findings = detectPII(v.content);
|
|
72
|
+
if (findings.length > 0) {
|
|
73
|
+
for (const finding of findings) {
|
|
74
|
+
await db.run(`
|
|
75
|
+
INSERT INTO pii_hits (usage_id, variable_name, pii_type, action_taken)
|
|
76
|
+
VALUES (?, ?, ?, ?)
|
|
77
|
+
`, [usageId, v.name, finding.type, 'ingested_warning']);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (data.actions_taken && Array.isArray(data.actions_taken)) {
|
|
85
|
+
for (const action of data.actions_taken) {
|
|
86
|
+
await db.run(`
|
|
87
|
+
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
88
|
+
VALUES (?, ?, ?, ?, ?)
|
|
89
|
+
`, [
|
|
90
|
+
usageId,
|
|
91
|
+
action.target,
|
|
92
|
+
action.type,
|
|
93
|
+
action.method || null,
|
|
94
|
+
JSON.stringify(action)
|
|
95
|
+
]);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
res.status(201).json({ id: usageId, message: 'Usage data ingested successfully' });
|
|
100
|
+
} catch (err) {
|
|
101
|
+
console.error('Ingestion error:', err);
|
|
102
|
+
res.status(500).json({ error: 'Failed to ingest usage data' });
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// GET /api/v1/usage/recent - Get recent usage records
|
|
107
|
+
router.get('/recent', authMiddleware, async (req, res) => {
|
|
108
|
+
const db = getDb();
|
|
109
|
+
const limit = req.query.limit || 50;
|
|
110
|
+
const projectId = req.query.projectId;
|
|
111
|
+
const orgId = req.orgId;
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
let sql = 'SELECT * FROM usage_data WHERE org_id = ?';
|
|
115
|
+
let params = [orgId];
|
|
116
|
+
|
|
117
|
+
if (projectId) {
|
|
118
|
+
sql += ' AND project_id = ?';
|
|
119
|
+
params.push(projectId);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
sql += ' ORDER BY timestamp DESC LIMIT ?';
|
|
123
|
+
params.push(parseInt(limit));
|
|
124
|
+
|
|
125
|
+
const usageRecords = await db.all(sql, params);
|
|
126
|
+
|
|
127
|
+
for (const record of usageRecords) {
|
|
128
|
+
record.variables = await db.all(`
|
|
129
|
+
SELECT name, content, original_content, token_count, char_count, position
|
|
130
|
+
FROM prompt_variables
|
|
131
|
+
WHERE usage_id = ?
|
|
132
|
+
`, [record.id]);
|
|
133
|
+
|
|
134
|
+
record.explain_plan = await db.get(`
|
|
135
|
+
SELECT * FROM explain_plans WHERE usage_id = ?
|
|
136
|
+
`, [record.id]);
|
|
137
|
+
|
|
138
|
+
if (record.explain_plan) {
|
|
139
|
+
if (record.explain_plan.variable_analysis) record.explain_plan.variable_analysis = JSON.parse(record.explain_plan.variable_analysis);
|
|
140
|
+
if (record.explain_plan.detected_issues) record.explain_plan.detected_issues = JSON.parse(record.explain_plan.detected_issues);
|
|
141
|
+
if (record.explain_plan.optimization_suggestions) record.explain_plan.optimization_suggestions = JSON.parse(record.explain_plan.optimization_suggestions);
|
|
142
|
+
|
|
143
|
+
// On-the-fly MCE calculation if missing from DB (for existing records)
|
|
144
|
+
if (!record.explain_plan.mce_best_alternative_model) {
|
|
145
|
+
const calculator = getCostCalculator();
|
|
146
|
+
const bestAlt = calculator.getBestAlternative(record.provider, record.model, record.input_tokens, record.output_tokens);
|
|
147
|
+
if (bestAlt) {
|
|
148
|
+
const savingsPct = record.total_cost > 0 ? ((record.total_cost - bestAlt.cost) / record.total_cost) * 100 : 0;
|
|
149
|
+
if (savingsPct > 10) {
|
|
150
|
+
record.explain_plan.mce_best_alternative_model = bestAlt.model;
|
|
151
|
+
record.explain_plan.mce_best_alternative_provider = bestAlt.provider;
|
|
152
|
+
record.explain_plan.mce_best_alternative_cost = bestAlt.cost;
|
|
153
|
+
record.explain_plan.mce_savings_pct = savingsPct;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
res.json(usageRecords);
|
|
161
|
+
} catch (err) {
|
|
162
|
+
console.error('Recent usage error:', err);
|
|
163
|
+
res.status(500).json({ error: 'Failed to fetch recent usage' });
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
// POST /api/v1/usage/execute - Gateway execution
|
|
168
|
+
router.post('/execute', authMiddleware, async (req, res) => {
|
|
169
|
+
try {
|
|
170
|
+
const engine = new TokenTalosEngine(config);
|
|
171
|
+
await engine.init();
|
|
172
|
+
|
|
173
|
+
const result = await engine.execute({
|
|
174
|
+
...req.body,
|
|
175
|
+
orgId: req.orgId,
|
|
176
|
+
projectId: req.body.projectId || req.query.projectId,
|
|
177
|
+
bypassCache: req.query.bypassCache === 'true'
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
res.json(result);
|
|
181
|
+
} catch (err) {
|
|
182
|
+
console.error('Execution error:', err);
|
|
183
|
+
res.status(500).json({ error: 'Failed to execute prompt', details: err.message });
|
|
184
|
+
}
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// POST /api/v1/usage/prompt/construct - Active orchestration
|
|
188
|
+
router.post('/prompt/construct', authMiddleware, async (req, res) => {
|
|
189
|
+
const { provider, model, parts, endpoint, projectId } = req.body;
|
|
190
|
+
const orgId = req.orgId;
|
|
191
|
+
|
|
192
|
+
const { processedParts, metadata } = await processPromptParts(parts, config);
|
|
193
|
+
|
|
194
|
+
const finalProvider = provider || config.llmProvider || 'gemini';
|
|
195
|
+
const finalModel = model || config.defaultModel || 'gemini-3-flash-preview';
|
|
196
|
+
|
|
197
|
+
const prompt = new TokenTalosPrompt(finalProvider, finalModel);
|
|
198
|
+
|
|
199
|
+
for (const key in processedParts) {
|
|
200
|
+
if (key === 'system') prompt.addSystem(processedParts[key], parts[key]);
|
|
201
|
+
else if (key === 'context') prompt.addContext(processedParts[key], parts[key]);
|
|
202
|
+
else if (key === 'history') prompt.addHistory(processedParts[key], parts[key]);
|
|
203
|
+
else if (key === 'user_query') prompt.addUserQuery(processedParts[key], parts[key]);
|
|
204
|
+
else prompt.add(key, processedParts[key], parts[key]);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const messages = prompt.toMessages();
|
|
208
|
+
const trackingData = prompt.getTrackingData();
|
|
209
|
+
|
|
210
|
+
const maxTokens = config.maxTokens || 32000;
|
|
211
|
+
const thresholdAction = config.thresholdAction || 'warning';
|
|
212
|
+
|
|
213
|
+
if (trackingData.total_tokens > maxTokens && thresholdAction === 'reject') {
|
|
214
|
+
return res.status(400).json({
|
|
215
|
+
error: 'Token limit exceeded',
|
|
216
|
+
total_tokens: trackingData.total_tokens,
|
|
217
|
+
max_tokens: maxTokens,
|
|
218
|
+
message: 'Construction rejected by policy. Truncate parts or increase limit in setup.'
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const db = getDb();
|
|
223
|
+
const calculator = getCostCalculator();
|
|
224
|
+
const [inputCost] = calculator.calculateCost(finalProvider, finalModel, trackingData.total_tokens, 0);
|
|
225
|
+
|
|
226
|
+
const limitExceeded = trackingData.total_tokens > maxTokens;
|
|
227
|
+
const finalProjectId = projectId || 'default';
|
|
228
|
+
|
|
229
|
+
try {
|
|
230
|
+
await db.run(`
|
|
231
|
+
INSERT INTO usage_data (id, org_id, project_id, type, provider, model, input_tokens, total_tokens, input_cost, total_cost, endpoint, token_limit_exceeded, timestamp)
|
|
232
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
233
|
+
`, [
|
|
234
|
+
trackingData.id, orgId, finalProjectId, 'construction', finalProvider, finalModel, trackingData.total_tokens, trackingData.total_tokens,
|
|
235
|
+
inputCost, inputCost, endpoint, limitExceeded ? 1 : 0, trackingData.timestamp
|
|
236
|
+
]);
|
|
237
|
+
|
|
238
|
+
for (const v of trackingData.variables) {
|
|
239
|
+
await db.run(`
|
|
240
|
+
INSERT INTO prompt_variables (usage_id, name, content, original_content, token_count, char_count, position)
|
|
241
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
242
|
+
`, [trackingData.id, v.name, v.content, v.original_content, v.token_count, v.char_count, v.position]);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
for (const action of metadata.actions_taken) {
|
|
246
|
+
// ... same generic action log ...
|
|
247
|
+
await db.run(`
|
|
248
|
+
INSERT INTO variable_actions (usage_id, variable_name, action_type, action_method, details)
|
|
249
|
+
VALUES (?, ?, ?, ?, ?)
|
|
250
|
+
`, [
|
|
251
|
+
trackingData.id,
|
|
252
|
+
action.target,
|
|
253
|
+
action.type,
|
|
254
|
+
action.method || null,
|
|
255
|
+
JSON.stringify(action)
|
|
256
|
+
]);
|
|
257
|
+
|
|
258
|
+
// 2. Legacy PII hit log (for backward compat/dashboards)
|
|
259
|
+
if (action.type === 'pii') {
|
|
260
|
+
for (const finding of action.findings) {
|
|
261
|
+
await db.run(`
|
|
262
|
+
INSERT INTO pii_hits (usage_id, variable_name, pii_type, action_taken)
|
|
263
|
+
VALUES (?, ?, ?, ?)
|
|
264
|
+
`, [trackingData.id, action.target, finding.type, action.method]);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// 3. Heuristic Analysis
|
|
270
|
+
const analysis = runHeuristicAnalysis({
|
|
271
|
+
total_tokens: trackingData.total_tokens,
|
|
272
|
+
total_cost: inputCost,
|
|
273
|
+
provider: finalProvider,
|
|
274
|
+
model: finalModel
|
|
275
|
+
}, trackingData.variables);
|
|
276
|
+
|
|
277
|
+
if (analysis) {
|
|
278
|
+
const planId = uuidv4();
|
|
279
|
+
await db.run(`
|
|
280
|
+
INSERT INTO explain_plans (
|
|
281
|
+
id, usage_id, variable_analysis, detected_issues, optimization_suggestions,
|
|
282
|
+
estimated_savings_pct, estimated_savings_usd,
|
|
283
|
+
mce_best_alternative_model, mce_best_alternative_provider, mce_best_alternative_cost, mce_savings_pct
|
|
284
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
285
|
+
`, [
|
|
286
|
+
planId,
|
|
287
|
+
trackingData.id,
|
|
288
|
+
JSON.stringify(analysis.variable_analysis),
|
|
289
|
+
JSON.stringify(analysis.detected_issues),
|
|
290
|
+
JSON.stringify(analysis.optimization_suggestions),
|
|
291
|
+
analysis.estimated_savings_pct,
|
|
292
|
+
analysis.estimated_savings_usd,
|
|
293
|
+
analysis.mce_best_alternative_model || null,
|
|
294
|
+
analysis.mce_best_alternative_provider || null,
|
|
295
|
+
analysis.mce_best_alternative_cost || 0,
|
|
296
|
+
analysis.mce_savings_pct || 0
|
|
297
|
+
]);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
res.json({
|
|
301
|
+
id: trackingData.id,
|
|
302
|
+
messages: messages,
|
|
303
|
+
full_prompt_string: prompt.toString(),
|
|
304
|
+
total_tokens: trackingData.total_tokens,
|
|
305
|
+
estimated_input_cost: inputCost,
|
|
306
|
+
token_limit_exceeded: limitExceeded,
|
|
307
|
+
max_tokens: maxTokens,
|
|
308
|
+
threshold_action: thresholdAction,
|
|
309
|
+
checks_run: metadata.checks_run,
|
|
310
|
+
actions_taken: metadata.actions_taken
|
|
311
|
+
});
|
|
312
|
+
} catch (err) {
|
|
313
|
+
console.error('Construction error:', err);
|
|
314
|
+
res.status(500).json({ error: 'Failed to construct prompt' });
|
|
315
|
+
}
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
export default router;
|