@papyruslabsai/seshat-mcp 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bootstrap.d.ts +29 -0
- package/dist/bootstrap.js +187 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +74 -5
- package/dist/supabase.d.ts +73 -0
- package/dist/supabase.js +124 -0
- package/dist/tools/functors.d.ts +19 -1
- package/dist/tools/functors.js +166 -2
- package/package.json +1 -1
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-Bootstrap — extract a .seshat/ bundle on first run.
|
|
3
|
+
*
|
|
4
|
+
* When the MCP server starts and finds no .seshat/_bundle.json, this module
|
|
5
|
+
* spawns `npx @papyruslabsai/seshat-extract` to generate one on the fly.
|
|
6
|
+
*
|
|
7
|
+
* CI continues to regenerate the bundle on every merge to main, overwriting
|
|
8
|
+
* the bootstrap result. The bootstrap only runs when .seshat/ doesn't exist.
|
|
9
|
+
*
|
|
10
|
+
* Env vars:
|
|
11
|
+
* SESHAT_BOOTSTRAP_TIMEOUT — spawn timeout in ms (default: 120000)
|
|
12
|
+
*/
|
|
13
|
+
export interface BootstrapResult {
|
|
14
|
+
success: boolean;
|
|
15
|
+
entityCount: number;
|
|
16
|
+
languages: string[];
|
|
17
|
+
durationMs: number;
|
|
18
|
+
error?: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Auto-bootstrap: extract a .seshat/ bundle for the given project directory.
|
|
22
|
+
*
|
|
23
|
+
* Spawns `npx @papyruslabsai/seshat-extract <dir> <dir>/.seshat <name>`
|
|
24
|
+
* and waits for it to complete.
|
|
25
|
+
*
|
|
26
|
+
* @param projectDir - Absolute path to the project root
|
|
27
|
+
* @returns Bootstrap result with entity count, languages, duration, and any error
|
|
28
|
+
*/
|
|
29
|
+
export declare function bootstrap(projectDir: string): Promise<BootstrapResult>;
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-Bootstrap — extract a .seshat/ bundle on first run.
|
|
3
|
+
*
|
|
4
|
+
* When the MCP server starts and finds no .seshat/_bundle.json, this module
|
|
5
|
+
* spawns `npx @papyruslabsai/seshat-extract` to generate one on the fly.
|
|
6
|
+
*
|
|
7
|
+
* CI continues to regenerate the bundle on every merge to main, overwriting
|
|
8
|
+
* the bootstrap result. The bootstrap only runs when .seshat/ doesn't exist.
|
|
9
|
+
*
|
|
10
|
+
* Env vars:
|
|
11
|
+
* SESHAT_BOOTSTRAP_TIMEOUT — spawn timeout in ms (default: 120000)
|
|
12
|
+
*/
|
|
13
|
+
import { spawn } from 'child_process';
|
|
14
|
+
import fs from 'fs';
|
|
15
|
+
import path from 'path';
|
|
16
|
+
import { execSync } from 'child_process';
|
|
17
|
+
// Project marker files — presence of any means "this is a code project"
|
|
18
|
+
const PROJECT_MARKERS = [
|
|
19
|
+
'.git',
|
|
20
|
+
'package.json',
|
|
21
|
+
'go.mod',
|
|
22
|
+
'Cargo.toml',
|
|
23
|
+
'pyproject.toml',
|
|
24
|
+
'pom.xml',
|
|
25
|
+
'build.gradle',
|
|
26
|
+
'Makefile',
|
|
27
|
+
'CMakeLists.txt',
|
|
28
|
+
];
|
|
29
|
+
/**
|
|
30
|
+
* Check if a directory looks like a code project.
|
|
31
|
+
*/
|
|
32
|
+
function isCodeProject(dir) {
|
|
33
|
+
for (const marker of PROJECT_MARKERS) {
|
|
34
|
+
const markerPath = path.join(dir, marker);
|
|
35
|
+
if (fs.existsSync(markerPath))
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
// Also check for *.sln files (C# solutions)
|
|
39
|
+
try {
|
|
40
|
+
const entries = fs.readdirSync(dir);
|
|
41
|
+
if (entries.some(e => e.endsWith('.sln')))
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
catch { }
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Infer a project name from the directory.
|
|
49
|
+
* Priority: package.json name → git remote → directory basename
|
|
50
|
+
*/
|
|
51
|
+
function inferProjectName(dir) {
|
|
52
|
+
// 1. Try package.json
|
|
53
|
+
try {
|
|
54
|
+
const pkgPath = path.join(dir, 'package.json');
|
|
55
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf-8'));
|
|
56
|
+
if (pkg.name) {
|
|
57
|
+
return pkg.name.replace(/^@[^/]+\//, '');
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
catch { }
|
|
61
|
+
// 2. Try git remote
|
|
62
|
+
try {
|
|
63
|
+
const remote = execSync('git remote get-url origin', {
|
|
64
|
+
cwd: dir, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe']
|
|
65
|
+
}).trim();
|
|
66
|
+
const match = remote.match(/\/([^/]+?)(?:\.git)?$/);
|
|
67
|
+
if (match)
|
|
68
|
+
return match[1];
|
|
69
|
+
}
|
|
70
|
+
catch { }
|
|
71
|
+
// 3. Fallback: directory basename
|
|
72
|
+
return path.basename(dir);
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Auto-bootstrap: extract a .seshat/ bundle for the given project directory.
|
|
76
|
+
*
|
|
77
|
+
* Spawns `npx @papyruslabsai/seshat-extract <dir> <dir>/.seshat <name>`
|
|
78
|
+
* and waits for it to complete.
|
|
79
|
+
*
|
|
80
|
+
* @param projectDir - Absolute path to the project root
|
|
81
|
+
* @returns Bootstrap result with entity count, languages, duration, and any error
|
|
82
|
+
*/
|
|
83
|
+
export async function bootstrap(projectDir) {
|
|
84
|
+
const startTime = Date.now();
|
|
85
|
+
const timeoutMs = parseInt(process.env.SESHAT_BOOTSTRAP_TIMEOUT || '120000', 10);
|
|
86
|
+
// Sanity checks
|
|
87
|
+
if (!isCodeProject(projectDir)) {
|
|
88
|
+
return {
|
|
89
|
+
success: false,
|
|
90
|
+
entityCount: 0,
|
|
91
|
+
languages: [],
|
|
92
|
+
durationMs: Date.now() - startTime,
|
|
93
|
+
error: `${projectDir} does not appear to be a code project (no package.json, .git, go.mod, etc.)`,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
const seshatDir = path.join(projectDir, '.seshat');
|
|
97
|
+
const projectName = inferProjectName(projectDir);
|
|
98
|
+
process.stderr.write(`[seshat-mcp] Auto-bootstrap: extracting ${projectName} from ${projectDir}\n`);
|
|
99
|
+
return new Promise((resolve) => {
|
|
100
|
+
// On Windows, npx is npx.cmd
|
|
101
|
+
const isWindows = process.platform === 'win32';
|
|
102
|
+
const npxCmd = isWindows ? 'npx.cmd' : 'npx';
|
|
103
|
+
const child = spawn(npxCmd, ['-y', '@papyruslabsai/seshat-extract', projectDir, seshatDir, projectName], {
|
|
104
|
+
cwd: projectDir,
|
|
105
|
+
shell: isWindows, // Windows needs shell: true for .cmd files
|
|
106
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
107
|
+
timeout: timeoutMs,
|
|
108
|
+
});
|
|
109
|
+
let stdout = '';
|
|
110
|
+
let stderr = '';
|
|
111
|
+
child.stdout?.on('data', (data) => {
|
|
112
|
+
stdout += data.toString();
|
|
113
|
+
});
|
|
114
|
+
child.stderr?.on('data', (data) => {
|
|
115
|
+
const text = data.toString();
|
|
116
|
+
stderr += text;
|
|
117
|
+
// Forward progress to parent stderr
|
|
118
|
+
process.stderr.write(`[seshat-extract] ${text}`);
|
|
119
|
+
});
|
|
120
|
+
child.on('error', (err) => {
|
|
121
|
+
resolve({
|
|
122
|
+
success: false,
|
|
123
|
+
entityCount: 0,
|
|
124
|
+
languages: [],
|
|
125
|
+
durationMs: Date.now() - startTime,
|
|
126
|
+
error: `Failed to spawn seshat-extract: ${err.message}`,
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
child.on('close', (code) => {
|
|
130
|
+
const durationMs = Date.now() - startTime;
|
|
131
|
+
if (code !== 0) {
|
|
132
|
+
resolve({
|
|
133
|
+
success: false,
|
|
134
|
+
entityCount: 0,
|
|
135
|
+
languages: [],
|
|
136
|
+
durationMs,
|
|
137
|
+
error: `seshat-extract exited with code ${code}. stderr: ${stderr.slice(-500)}`,
|
|
138
|
+
});
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
// Parse the JSON result from stdout
|
|
142
|
+
try {
|
|
143
|
+
const result = JSON.parse(stdout.trim());
|
|
144
|
+
if (result.ok) {
|
|
145
|
+
process.stderr.write(`[seshat-mcp] Bootstrap complete: ${result.entities} entities, ${result.languages?.join(', ')} in ${(durationMs / 1000).toFixed(1)}s\n`);
|
|
146
|
+
resolve({
|
|
147
|
+
success: true,
|
|
148
|
+
entityCount: result.entities || 0,
|
|
149
|
+
languages: result.languages || [],
|
|
150
|
+
durationMs,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
resolve({
|
|
155
|
+
success: false,
|
|
156
|
+
entityCount: 0,
|
|
157
|
+
languages: [],
|
|
158
|
+
durationMs,
|
|
159
|
+
error: result.error || 'Unknown extraction error',
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
// JSON parse failed — check if bundle was written anyway
|
|
165
|
+
const bundlePath = path.join(seshatDir, '_bundle.json');
|
|
166
|
+
if (fs.existsSync(bundlePath)) {
|
|
167
|
+
process.stderr.write(`[seshat-mcp] Bootstrap produced bundle but JSON result was malformed. Proceeding.\n`);
|
|
168
|
+
resolve({
|
|
169
|
+
success: true,
|
|
170
|
+
entityCount: 0,
|
|
171
|
+
languages: [],
|
|
172
|
+
durationMs,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
resolve({
|
|
177
|
+
success: false,
|
|
178
|
+
entityCount: 0,
|
|
179
|
+
languages: [],
|
|
180
|
+
durationMs,
|
|
181
|
+
error: `seshat-extract succeeded but produced no parseable result. stdout: ${stdout.slice(-200)}`,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
* Single-project mode (default):
|
|
14
14
|
* When SESHAT_PROJECTS is not set, loads from CWD. No `project` param needed.
|
|
15
15
|
*
|
|
16
|
-
* Tools (8 core +
|
|
16
|
+
* Tools (8 core + 9 interpretation functors + 1 meta):
|
|
17
17
|
* list_projects — Show loaded projects with entity counts
|
|
18
18
|
* query_entities — Search entities by name, layer, module, language
|
|
19
19
|
* get_entity — Full 9D coordinate dump for one entity
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
* get_test_coverage — Entities exercised by tests vs uncovered
|
|
32
32
|
* get_optimal_context — Greedy knapsack: max relevance per token for LLM context
|
|
33
33
|
* estimate_task_cost — Pre-work token burn projection from blast radius + source tokens
|
|
34
|
+
* report_actual_burn — Close calibration loop: actual tokens vs prediction, drift stats
|
|
34
35
|
*
|
|
35
36
|
* Usage:
|
|
36
37
|
* npx @papyruslabs/seshat-mcp # single project (CWD)
|
package/dist/index.js
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
* Single-project mode (default):
|
|
14
14
|
* When SESHAT_PROJECTS is not set, loads from CWD. No `project` param needed.
|
|
15
15
|
*
|
|
16
|
-
* Tools (8 core +
|
|
16
|
+
* Tools (8 core + 9 interpretation functors + 1 meta):
|
|
17
17
|
* list_projects — Show loaded projects with entity counts
|
|
18
18
|
* query_entities — Search entities by name, layer, module, language
|
|
19
19
|
* get_entity — Full 9D coordinate dump for one entity
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
* get_test_coverage — Entities exercised by tests vs uncovered
|
|
32
32
|
* get_optimal_context — Greedy knapsack: max relevance per token for LLM context
|
|
33
33
|
* estimate_task_cost — Pre-work token burn projection from blast radius + source tokens
|
|
34
|
+
* report_actual_burn — Close calibration loop: actual tokens vs prediction, drift stats
|
|
34
35
|
*
|
|
35
36
|
* Usage:
|
|
36
37
|
* npx @papyruslabs/seshat-mcp # single project (CWD)
|
|
@@ -43,8 +44,9 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
|
43
44
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
44
45
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
45
46
|
import { MultiLoader } from './loader.js';
|
|
47
|
+
import { bootstrap } from './bootstrap.js';
|
|
46
48
|
import { initTools, queryEntities, getEntity, getDependencies, getDataFlow, findByConstraint, getBlastRadius, listModules, getTopology, } from './tools/index.js';
|
|
47
|
-
import { findDeadCode, findLayerViolations, getCouplingMetrics, getAuthMatrix, findErrorGaps, getTestCoverage, getOptimalContext, estimateTaskCost, } from './tools/functors.js';
|
|
49
|
+
import { findDeadCode, findLayerViolations, getCouplingMetrics, getAuthMatrix, findErrorGaps, getTestCoverage, getOptimalContext, estimateTaskCost, reportActualBurn, } from './tools/functors.js';
|
|
48
50
|
// ─── Project Discovery ───────────────────────────────────────────
|
|
49
51
|
/**
|
|
50
52
|
* Discover project directories from SESHAT_PROJECTS env var.
|
|
@@ -358,18 +360,82 @@ const TOOLS = [
|
|
|
358
360
|
required: ['target_entities'],
|
|
359
361
|
},
|
|
360
362
|
},
|
|
363
|
+
{
|
|
364
|
+
name: 'report_actual_burn',
|
|
365
|
+
description: 'Close the calibration feedback loop: report actual token usage against a prior prediction from estimate_task_cost. Computes drift (actual - predicted) / predicted. Also supports listing recent predictions with aggregate calibration stats, or abandoning predictions for cancelled tasks.',
|
|
366
|
+
inputSchema: {
|
|
367
|
+
type: 'object',
|
|
368
|
+
properties: {
|
|
369
|
+
prediction_id: {
|
|
370
|
+
type: 'string',
|
|
371
|
+
description: 'The prediction ID returned by estimate_task_cost. Required for complete/abandon actions.',
|
|
372
|
+
},
|
|
373
|
+
actual_input_tokens: {
|
|
374
|
+
type: 'number',
|
|
375
|
+
description: 'Actual input tokens consumed (from LLM usage metadata).',
|
|
376
|
+
},
|
|
377
|
+
actual_output_tokens: {
|
|
378
|
+
type: 'number',
|
|
379
|
+
description: 'Actual output tokens consumed (from LLM usage metadata).',
|
|
380
|
+
},
|
|
381
|
+
actual_total_tokens: {
|
|
382
|
+
type: 'number',
|
|
383
|
+
description: 'Actual total tokens consumed (input + output).',
|
|
384
|
+
},
|
|
385
|
+
model: {
|
|
386
|
+
type: 'string',
|
|
387
|
+
description: 'Model used (e.g. claude-opus-4-6, claude-sonnet-4-6).',
|
|
388
|
+
},
|
|
389
|
+
action: {
|
|
390
|
+
type: 'string',
|
|
391
|
+
enum: ['complete', 'abandon', 'list'],
|
|
392
|
+
description: 'Action: "complete" reports actuals (default), "abandon" marks prediction as cancelled, "list" shows recent predictions with calibration stats.',
|
|
393
|
+
},
|
|
394
|
+
project: projectParam,
|
|
395
|
+
notes: {
|
|
396
|
+
type: 'string',
|
|
397
|
+
description: 'Optional notes about the task outcome.',
|
|
398
|
+
},
|
|
399
|
+
},
|
|
400
|
+
},
|
|
401
|
+
},
|
|
361
402
|
];
|
|
362
403
|
// ─── Server Setup ─────────────────────────────────────────────────
|
|
363
404
|
async function main() {
|
|
364
405
|
// Discover and load projects
|
|
365
406
|
const projectDirs = discoverProjects();
|
|
366
|
-
|
|
407
|
+
let loader = new MultiLoader(projectDirs);
|
|
367
408
|
try {
|
|
368
409
|
loader.load();
|
|
369
410
|
}
|
|
370
411
|
catch (err) {
|
|
371
412
|
process.stderr.write(`Warning: ${err.message}\n`);
|
|
372
413
|
}
|
|
414
|
+
// Auto-bootstrap: if no projects loaded, try to extract from CWD
|
|
415
|
+
if (!loader.isLoaded() && !process.env.SESHAT_PROJECTS) {
|
|
416
|
+
const cwd = process.cwd();
|
|
417
|
+
const seshatDir = path.join(cwd, '.seshat');
|
|
418
|
+
// Only bootstrap when .seshat/ doesn't exist at all (not corruption)
|
|
419
|
+
if (!fs.existsSync(seshatDir)) {
|
|
420
|
+
process.stderr.write(`[seshat-mcp] No .seshat/ found — attempting auto-bootstrap...\n`);
|
|
421
|
+
const result = await bootstrap(cwd);
|
|
422
|
+
if (result.success) {
|
|
423
|
+
// Re-create loader and retry
|
|
424
|
+
loader = new MultiLoader([cwd]);
|
|
425
|
+
try {
|
|
426
|
+
loader.load();
|
|
427
|
+
process.stderr.write(`[seshat-mcp] Auto-bootstrap succeeded: ${loader.totalEntities()} entities loaded\n`);
|
|
428
|
+
}
|
|
429
|
+
catch (err) {
|
|
430
|
+
process.stderr.write(`[seshat-mcp] Auto-bootstrap produced files but load failed: ${err.message}\n`);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
else {
|
|
434
|
+
process.stderr.write(`[seshat-mcp] Auto-bootstrap failed: ${result.error || 'unknown error'}\n`);
|
|
435
|
+
process.stderr.write(`[seshat-mcp] Starting with 0 entities. Run extraction manually or push to trigger CI.\n`);
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
373
439
|
initTools(loader);
|
|
374
440
|
// Build server name
|
|
375
441
|
const projectNames = loader.getProjectNames();
|
|
@@ -388,7 +454,7 @@ async function main() {
|
|
|
388
454
|
}
|
|
389
455
|
const server = new Server({
|
|
390
456
|
name: serverLabel,
|
|
391
|
-
version: '0.
|
|
457
|
+
version: '0.4.0',
|
|
392
458
|
}, {
|
|
393
459
|
capabilities: {
|
|
394
460
|
tools: {},
|
|
@@ -473,7 +539,10 @@ async function main() {
|
|
|
473
539
|
result = getOptimalContext(args);
|
|
474
540
|
break;
|
|
475
541
|
case 'estimate_task_cost':
|
|
476
|
-
result = estimateTaskCost(args);
|
|
542
|
+
result = await estimateTaskCost(args);
|
|
543
|
+
break;
|
|
544
|
+
case 'report_actual_burn':
|
|
545
|
+
result = await reportActualBurn(args);
|
|
477
546
|
break;
|
|
478
547
|
default:
|
|
479
548
|
result = { error: `Unknown tool: ${name}` };
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight Supabase REST client for the calibration feedback loop.
|
|
3
|
+
*
|
|
4
|
+
* Uses Node 20+ native fetch — zero external dependencies.
|
|
5
|
+
* Gracefully degrades: if SESHAT_SUPABASE_URL / SESHAT_SUPABASE_KEY are not set,
|
|
6
|
+
* all operations silently no-op so the MCP server works offline.
|
|
7
|
+
*
|
|
8
|
+
* Environment variables (checks both prefixed and standard names):
|
|
9
|
+
* SUPABASE_URL — e.g. https://nlsrjceluztfllbqfpkm.supabase.co
|
|
10
|
+
* SUPABASE_SERVICE_ROLE_KEY — service_role key (preferred)
|
|
11
|
+
* SESHAT_SUPABASE_URL — override (optional)
|
|
12
|
+
* SESHAT_SUPABASE_KEY — override (optional)
|
|
13
|
+
*/
|
|
14
|
+
/** Whether Supabase logging is available. */
|
|
15
|
+
export declare function isSupabaseConfigured(): boolean;
|
|
16
|
+
export interface TokenPredictionInsert {
|
|
17
|
+
project: string;
|
|
18
|
+
target_entities: string[];
|
|
19
|
+
predicted_context_load: number;
|
|
20
|
+
predicted_iteration_mult: number;
|
|
21
|
+
predicted_total: number;
|
|
22
|
+
affected_entities: number;
|
|
23
|
+
affected_files: number;
|
|
24
|
+
affected_layers: string[];
|
|
25
|
+
estimator_used: 'syntactic' | 'charDiv4' | 'heuristic';
|
|
26
|
+
context_budget: number;
|
|
27
|
+
session_id?: string;
|
|
28
|
+
}
|
|
29
|
+
export interface ActualBurnUpdate {
|
|
30
|
+
actual_input_tokens: number;
|
|
31
|
+
actual_output_tokens: number;
|
|
32
|
+
actual_total_tokens: number;
|
|
33
|
+
model: string;
|
|
34
|
+
notes?: string;
|
|
35
|
+
}
|
|
36
|
+
export interface TokenPredictionRow {
|
|
37
|
+
id: string;
|
|
38
|
+
created_at: string;
|
|
39
|
+
project: string;
|
|
40
|
+
target_entities: string[];
|
|
41
|
+
predicted_context_load: number;
|
|
42
|
+
predicted_iteration_mult: number;
|
|
43
|
+
predicted_total: number;
|
|
44
|
+
affected_entities: number;
|
|
45
|
+
affected_files: number;
|
|
46
|
+
affected_layers: string[];
|
|
47
|
+
estimator_used: string;
|
|
48
|
+
context_budget: number;
|
|
49
|
+
actual_input_tokens: number | null;
|
|
50
|
+
actual_output_tokens: number | null;
|
|
51
|
+
actual_total_tokens: number | null;
|
|
52
|
+
model: string | null;
|
|
53
|
+
drift_ratio: number | null;
|
|
54
|
+
status: string;
|
|
55
|
+
session_id: string | null;
|
|
56
|
+
notes: string | null;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Insert a prediction row. Returns the row ID or null on failure.
|
|
60
|
+
*/
|
|
61
|
+
export declare function insertPrediction(row: TokenPredictionInsert): Promise<string | null>;
|
|
62
|
+
/**
|
|
63
|
+
* Report actual token burn for a prediction. Computes drift and sets status=completed.
|
|
64
|
+
*/
|
|
65
|
+
export declare function updateActualBurn(predictionId: string, actual: ActualBurnUpdate): Promise<TokenPredictionRow | null>;
|
|
66
|
+
/**
|
|
67
|
+
* Abandon a prediction (task was cancelled or not completed).
|
|
68
|
+
*/
|
|
69
|
+
export declare function abandonPrediction(predictionId: string): Promise<boolean>;
|
|
70
|
+
/**
|
|
71
|
+
* List recent predictions for a project (for calibration analysis).
|
|
72
|
+
*/
|
|
73
|
+
export declare function listPredictions(project?: string, limit?: number): Promise<TokenPredictionRow[]>;
|
package/dist/supabase.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight Supabase REST client for the calibration feedback loop.
|
|
3
|
+
*
|
|
4
|
+
* Uses Node 20+ native fetch — zero external dependencies.
|
|
5
|
+
* Gracefully degrades: if SESHAT_SUPABASE_URL / SESHAT_SUPABASE_KEY are not set,
|
|
6
|
+
* all operations silently no-op so the MCP server works offline.
|
|
7
|
+
*
|
|
8
|
+
* Environment variables (checks both prefixed and standard names):
|
|
9
|
+
* SUPABASE_URL — e.g. https://nlsrjceluztfllbqfpkm.supabase.co
|
|
10
|
+
* SUPABASE_SERVICE_ROLE_KEY — service_role key (preferred)
|
|
11
|
+
* SESHAT_SUPABASE_URL — override (optional)
|
|
12
|
+
* SESHAT_SUPABASE_KEY — override (optional)
|
|
13
|
+
*/
|
|
14
|
+
const SUPABASE_URL = (process.env.SESHAT_SUPABASE_URL || process.env.SUPABASE_URL || '').replace(/\/$/, '');
|
|
15
|
+
const SUPABASE_KEY = process.env.SESHAT_SUPABASE_KEY || process.env.SUPABASE_SERVICE_ROLE_KEY || '';
|
|
16
|
+
/** Whether Supabase logging is available. */
|
|
17
|
+
export function isSupabaseConfigured() {
|
|
18
|
+
return SUPABASE_URL.length > 0 && SUPABASE_KEY.length > 0;
|
|
19
|
+
}
|
|
20
|
+
// ─── REST helpers ────────────────────────────────────────────────
|
|
21
|
+
const TABLE = 'mcp_token_predictions';
|
|
22
|
+
async function supabaseRequest(method, path, body, headers) {
|
|
23
|
+
if (!isSupabaseConfigured()) {
|
|
24
|
+
return { ok: false, status: 0, error: 'Supabase not configured' };
|
|
25
|
+
}
|
|
26
|
+
const url = `${SUPABASE_URL}/rest/v1/${path}`;
|
|
27
|
+
const reqHeaders = {
|
|
28
|
+
'apikey': SUPABASE_KEY,
|
|
29
|
+
'Authorization': `Bearer ${SUPABASE_KEY}`,
|
|
30
|
+
'Content-Type': 'application/json',
|
|
31
|
+
'Prefer': 'return=representation',
|
|
32
|
+
...headers,
|
|
33
|
+
};
|
|
34
|
+
try {
|
|
35
|
+
const res = await fetch(url, {
|
|
36
|
+
method,
|
|
37
|
+
headers: reqHeaders,
|
|
38
|
+
body: body ? JSON.stringify(body) : undefined,
|
|
39
|
+
});
|
|
40
|
+
const text = await res.text();
|
|
41
|
+
let data;
|
|
42
|
+
try {
|
|
43
|
+
data = JSON.parse(text);
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
data = text;
|
|
47
|
+
}
|
|
48
|
+
if (!res.ok) {
|
|
49
|
+
return { ok: false, status: res.status, error: `${res.status}: ${text}` };
|
|
50
|
+
}
|
|
51
|
+
return { ok: true, status: res.status, data };
|
|
52
|
+
}
|
|
53
|
+
catch (err) {
|
|
54
|
+
return { ok: false, status: 0, error: err.message };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// ─── Public API ──────────────────────────────────────────────────
|
|
58
|
+
/**
|
|
59
|
+
* Insert a prediction row. Returns the row ID or null on failure.
|
|
60
|
+
*/
|
|
61
|
+
export async function insertPrediction(row) {
|
|
62
|
+
const result = await supabaseRequest('POST', TABLE, row);
|
|
63
|
+
if (!result.ok) {
|
|
64
|
+
process.stderr.write(`[seshat] Prediction log failed: ${result.error}\n`);
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
const rows = result.data;
|
|
68
|
+
return rows?.[0]?.id ?? null;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Report actual token burn for a prediction. Computes drift and sets status=completed.
|
|
72
|
+
*/
|
|
73
|
+
export async function updateActualBurn(predictionId, actual) {
|
|
74
|
+
// First fetch the prediction to compute drift
|
|
75
|
+
const fetchResult = await supabaseRequest('GET', `${TABLE}?id=eq.${predictionId}&select=predicted_total,status`);
|
|
76
|
+
if (!fetchResult.ok) {
|
|
77
|
+
process.stderr.write(`[seshat] Fetch prediction failed: ${fetchResult.error}\n`);
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
const rows = fetchResult.data;
|
|
81
|
+
if (!rows || rows.length === 0) {
|
|
82
|
+
return null; // not found
|
|
83
|
+
}
|
|
84
|
+
const predicted = rows[0];
|
|
85
|
+
if (predicted.status !== 'predicted') {
|
|
86
|
+
return null; // already completed or abandoned
|
|
87
|
+
}
|
|
88
|
+
// Compute drift ratio
|
|
89
|
+
const driftRatio = predicted.predicted_total > 0
|
|
90
|
+
? Math.round(((actual.actual_total_tokens - predicted.predicted_total) / predicted.predicted_total) * 1000) / 1000
|
|
91
|
+
: null;
|
|
92
|
+
const updateBody = {
|
|
93
|
+
actual_input_tokens: actual.actual_input_tokens,
|
|
94
|
+
actual_output_tokens: actual.actual_output_tokens,
|
|
95
|
+
actual_total_tokens: actual.actual_total_tokens,
|
|
96
|
+
model: actual.model,
|
|
97
|
+
drift_ratio: driftRatio,
|
|
98
|
+
status: 'completed',
|
|
99
|
+
...(actual.notes ? { notes: actual.notes } : {}),
|
|
100
|
+
};
|
|
101
|
+
const updateResult = await supabaseRequest('PATCH', `${TABLE}?id=eq.${predictionId}`, updateBody);
|
|
102
|
+
if (!updateResult.ok) {
|
|
103
|
+
process.stderr.write(`[seshat] Update actual burn failed: ${updateResult.error}\n`);
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
return updateResult.data?.[0] ?? null;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Abandon a prediction (task was cancelled or not completed).
|
|
110
|
+
*/
|
|
111
|
+
export async function abandonPrediction(predictionId) {
|
|
112
|
+
const result = await supabaseRequest('PATCH', `${TABLE}?id=eq.${predictionId}&status=eq.predicted`, { status: 'abandoned' });
|
|
113
|
+
return result.ok;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* List recent predictions for a project (for calibration analysis).
|
|
117
|
+
*/
|
|
118
|
+
export async function listPredictions(project, limit = 20) {
|
|
119
|
+
const filter = project ? `&project=eq.${encodeURIComponent(project)}` : '';
|
|
120
|
+
const result = await supabaseRequest('GET', `${TABLE}?select=*${filter}&order=created_at.desc&limit=${limit}`);
|
|
121
|
+
if (!result.ok)
|
|
122
|
+
return [];
|
|
123
|
+
return result.data || [];
|
|
124
|
+
}
|
package/dist/tools/functors.d.ts
CHANGED
|
@@ -42,9 +42,27 @@ export declare function getOptimalContext(args: {
|
|
|
42
42
|
/**
|
|
43
43
|
* Estimate token cost of a code change BEFORE starting work.
|
|
44
44
|
* Computes blast radius, sums source token counts, and projects total burn.
|
|
45
|
+
* Logs prediction to Supabase when configured (for calibration feedback loop).
|
|
45
46
|
*/
|
|
46
47
|
export declare function estimateTaskCost(args: {
|
|
47
48
|
target_entities: string[];
|
|
48
49
|
context_budget?: number;
|
|
49
50
|
project?: string;
|
|
50
|
-
}): unknown
|
|
51
|
+
}): Promise<unknown>;
|
|
52
|
+
/**
|
|
53
|
+
* Close the calibration feedback loop by reporting actual token usage
|
|
54
|
+
* against a prior prediction from estimate_task_cost.
|
|
55
|
+
*
|
|
56
|
+
* Can also abandon a prediction (task was cancelled/not completed),
|
|
57
|
+
* or list recent predictions for calibration analysis.
|
|
58
|
+
*/
|
|
59
|
+
export declare function reportActualBurn(args: {
|
|
60
|
+
prediction_id?: string;
|
|
61
|
+
actual_input_tokens?: number;
|
|
62
|
+
actual_output_tokens?: number;
|
|
63
|
+
actual_total_tokens?: number;
|
|
64
|
+
model?: string;
|
|
65
|
+
action?: 'complete' | 'abandon' | 'list';
|
|
66
|
+
project?: string;
|
|
67
|
+
notes?: string;
|
|
68
|
+
}): Promise<unknown>;
|
package/dist/tools/functors.js
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import { computeBlastRadius } from '../graph.js';
|
|
9
9
|
import { getLoader, getGraph, validateProject, entityLayer, entitySummary, } from './index.js';
|
|
10
|
+
import { isSupabaseConfigured, insertPrediction, updateActualBurn, abandonPrediction, listPredictions, } from '../supabase.js';
|
|
10
11
|
// ─── Layer ordering for violation detection ──────────────────────
|
|
11
12
|
const LAYER_ORDER = {
|
|
12
13
|
route: 0,
|
|
@@ -49,6 +50,36 @@ export function estimateTokens(e) {
|
|
|
49
50
|
}
|
|
50
51
|
return tokens;
|
|
51
52
|
}
|
|
53
|
+
/**
|
|
54
|
+
* Detect which estimator was used for an entity.
|
|
55
|
+
* Returns 'syntactic' if tree-sitter leaf node count was used,
|
|
56
|
+
* 'charDiv4' if chars/4 heuristic was used, 'heuristic' for 9D fallback.
|
|
57
|
+
*/
|
|
58
|
+
function detectEstimator(e) {
|
|
59
|
+
const raw = e;
|
|
60
|
+
const st = raw.sourceTokens;
|
|
61
|
+
if (!st?.estimated)
|
|
62
|
+
return 'heuristic';
|
|
63
|
+
if (st.syntactic && st.estimated === st.syntactic)
|
|
64
|
+
return 'syntactic';
|
|
65
|
+
if (st.charDiv4 && st.estimated === st.charDiv4)
|
|
66
|
+
return 'charDiv4';
|
|
67
|
+
return 'charDiv4'; // estimated present but can't distinguish → default to charDiv4
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Determine the dominant estimator across a set of entities.
|
|
71
|
+
*/
|
|
72
|
+
function dominantEstimator(entities) {
|
|
73
|
+
const counts = { syntactic: 0, charDiv4: 0, heuristic: 0 };
|
|
74
|
+
for (const e of entities) {
|
|
75
|
+
counts[detectEstimator(e)]++;
|
|
76
|
+
}
|
|
77
|
+
if (counts.syntactic >= counts.charDiv4 && counts.syntactic >= counts.heuristic)
|
|
78
|
+
return 'syntactic';
|
|
79
|
+
if (counts.charDiv4 >= counts.heuristic)
|
|
80
|
+
return 'charDiv4';
|
|
81
|
+
return 'heuristic';
|
|
82
|
+
}
|
|
52
83
|
// ─── Functor 1: find_dead_code ───────────────────────────────────
|
|
53
84
|
export function findDeadCode(args) {
|
|
54
85
|
const projErr = validateProject(args.project);
|
|
@@ -571,8 +602,9 @@ export function getOptimalContext(args) {
|
|
|
571
602
|
/**
|
|
572
603
|
* Estimate token cost of a code change BEFORE starting work.
|
|
573
604
|
* Computes blast radius, sums source token counts, and projects total burn.
|
|
605
|
+
* Logs prediction to Supabase when configured (for calibration feedback loop).
|
|
574
606
|
*/
|
|
575
|
-
export function estimateTaskCost(args) {
|
|
607
|
+
export async function estimateTaskCost(args) {
|
|
576
608
|
const projErr = validateProject(args.project);
|
|
577
609
|
if (projErr)
|
|
578
610
|
return { error: projErr };
|
|
@@ -682,7 +714,10 @@ export function estimateTaskCost(args) {
|
|
|
682
714
|
sourceTokens: estimateTokens(e),
|
|
683
715
|
}));
|
|
684
716
|
const affectedLayers = [...layerSet].sort();
|
|
685
|
-
|
|
717
|
+
const estimator = dominantEstimator(affectedEntities);
|
|
718
|
+
// Resolve project name for logging
|
|
719
|
+
const projectName = args.project || loader.getProjectNames()[0] || 'unknown';
|
|
720
|
+
const result = {
|
|
686
721
|
targets: targetSummaries,
|
|
687
722
|
...(unresolvedNames.length > 0 ? { unresolved: unresolvedNames } : {}),
|
|
688
723
|
affectedEntities: allAffectedIds.size,
|
|
@@ -692,6 +727,7 @@ export function estimateTaskCost(args) {
|
|
|
692
727
|
contextLoad,
|
|
693
728
|
iterationMultiplier: Math.round(iterationMultiplier * 10) / 10,
|
|
694
729
|
projectedTotal,
|
|
730
|
+
estimatorUsed: estimator,
|
|
695
731
|
},
|
|
696
732
|
feasibility: {
|
|
697
733
|
contextBudget: context_budget,
|
|
@@ -701,4 +737,132 @@ export function estimateTaskCost(args) {
|
|
|
701
737
|
breakdown: breakdown.slice(0, 30),
|
|
702
738
|
_summary: `Changing ${target_entities.join(', ')} affects ${allAffectedIds.size} entities across ${fileTokens.size} files. Context load: ~${Math.round(contextLoad / 1000)}K tokens. Projected total with ${Math.round(iterationMultiplier * 10) / 10}x iteration: ~${Math.round(projectedTotal / 1000)}K tokens. ${fitsInSinglePass ? `Fits in ${Math.round(context_budget / 1000)}K budget (${passesRequired} pass).` : `Exceeds ${Math.round(context_budget / 1000)}K budget — needs ${passesRequired} passes.`}`,
|
|
703
739
|
};
|
|
740
|
+
// Log prediction to Supabase if configured (calibration feedback loop)
|
|
741
|
+
if (isSupabaseConfigured()) {
|
|
742
|
+
try {
|
|
743
|
+
const predictionId = await insertPrediction({
|
|
744
|
+
project: projectName,
|
|
745
|
+
target_entities,
|
|
746
|
+
predicted_context_load: contextLoad,
|
|
747
|
+
predicted_iteration_mult: Math.round(iterationMultiplier * 10) / 10,
|
|
748
|
+
predicted_total: projectedTotal,
|
|
749
|
+
affected_entities: allAffectedIds.size,
|
|
750
|
+
affected_files: fileTokens.size,
|
|
751
|
+
affected_layers: affectedLayers,
|
|
752
|
+
estimator_used: estimator,
|
|
753
|
+
context_budget,
|
|
754
|
+
});
|
|
755
|
+
if (predictionId) {
|
|
756
|
+
result.predictionId = predictionId;
|
|
757
|
+
result._summary += ` Prediction logged (${predictionId.slice(0, 8)}…).`;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
catch {
|
|
761
|
+
// Silently swallow — prediction logging is best-effort
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
return result;
|
|
765
|
+
}
|
|
766
|
+
// ─── Functor 9: report_actual_burn ───────────────────────────────
|
|
767
|
+
/**
|
|
768
|
+
* Close the calibration feedback loop by reporting actual token usage
|
|
769
|
+
* against a prior prediction from estimate_task_cost.
|
|
770
|
+
*
|
|
771
|
+
* Can also abandon a prediction (task was cancelled/not completed),
|
|
772
|
+
* or list recent predictions for calibration analysis.
|
|
773
|
+
*/
|
|
774
|
+
export async function reportActualBurn(args) {
|
|
775
|
+
if (!isSupabaseConfigured()) {
|
|
776
|
+
return {
|
|
777
|
+
error: 'Supabase not configured. Set SESHAT_SUPABASE_URL and SESHAT_SUPABASE_KEY env vars.',
|
|
778
|
+
hint: 'The calibration feedback loop requires a Supabase connection to store predictions.',
|
|
779
|
+
};
|
|
780
|
+
}
|
|
781
|
+
const { action = 'complete' } = args;
|
|
782
|
+
// List mode: show recent predictions for calibration analysis
|
|
783
|
+
if (action === 'list') {
|
|
784
|
+
const rows = await listPredictions(args.project);
|
|
785
|
+
if (rows.length === 0) {
|
|
786
|
+
return { message: 'No predictions found.', predictions: [] };
|
|
787
|
+
}
|
|
788
|
+
const summary = rows.map((r) => ({
|
|
789
|
+
id: r.id,
|
|
790
|
+
project: r.project,
|
|
791
|
+
targets: r.target_entities,
|
|
792
|
+
predicted: r.predicted_total,
|
|
793
|
+
actual: r.actual_total_tokens,
|
|
794
|
+
drift: r.drift_ratio,
|
|
795
|
+
estimator: r.estimator_used,
|
|
796
|
+
status: r.status,
|
|
797
|
+
createdAt: r.created_at,
|
|
798
|
+
}));
|
|
799
|
+
// Compute aggregate calibration stats for completed predictions
|
|
800
|
+
const completed = rows.filter((r) => r.status === 'completed' && r.drift_ratio != null);
|
|
801
|
+
let calibration;
|
|
802
|
+
if (completed.length >= 3) {
|
|
803
|
+
const drifts = completed.map((r) => r.drift_ratio);
|
|
804
|
+
const meanDrift = drifts.reduce((a, b) => a + b, 0) / drifts.length;
|
|
805
|
+
const sortedDrifts = [...drifts].sort((a, b) => a - b);
|
|
806
|
+
const medianDrift = sortedDrifts[Math.floor(sortedDrifts.length / 2)];
|
|
807
|
+
const maxOvershoot = Math.max(...drifts);
|
|
808
|
+
const maxUndershoot = Math.min(...drifts);
|
|
809
|
+
calibration = {
|
|
810
|
+
completedSamples: completed.length,
|
|
811
|
+
meanDrift: Math.round(meanDrift * 1000) / 1000,
|
|
812
|
+
medianDrift: Math.round(medianDrift * 1000) / 1000,
|
|
813
|
+
maxOvershoot: Math.round(maxOvershoot * 1000) / 1000,
|
|
814
|
+
maxUndershoot: Math.round(maxUndershoot * 1000) / 1000,
|
|
815
|
+
_interpretation: meanDrift > 0.2
|
|
816
|
+
? 'Predictions underestimate — consider increasing iteration multiplier.'
|
|
817
|
+
: meanDrift < -0.2
|
|
818
|
+
? 'Predictions overestimate — consider decreasing iteration multiplier.'
|
|
819
|
+
: 'Predictions are well-calibrated (within 20% mean drift).',
|
|
820
|
+
};
|
|
821
|
+
}
|
|
822
|
+
return {
|
|
823
|
+
total: rows.length,
|
|
824
|
+
predictions: summary,
|
|
825
|
+
...(calibration ? { calibration } : {}),
|
|
826
|
+
};
|
|
827
|
+
}
|
|
828
|
+
// Complete or abandon requires prediction_id
|
|
829
|
+
if (!args.prediction_id) {
|
|
830
|
+
return {
|
|
831
|
+
error: 'prediction_id is required for complete/abandon actions.',
|
|
832
|
+
hint: 'Use estimate_task_cost first to get a predictionId, then pass it here.',
|
|
833
|
+
};
|
|
834
|
+
}
|
|
835
|
+
// Abandon mode
|
|
836
|
+
if (action === 'abandon') {
|
|
837
|
+
const ok = await abandonPrediction(args.prediction_id);
|
|
838
|
+
return ok
|
|
839
|
+
? { status: 'abandoned', predictionId: args.prediction_id }
|
|
840
|
+
: { error: `Failed to abandon prediction ${args.prediction_id}. It may already be completed or not exist.` };
|
|
841
|
+
}
|
|
842
|
+
// Complete mode: requires actual token counts
|
|
843
|
+
if (!args.actual_input_tokens || !args.actual_output_tokens || !args.actual_total_tokens || !args.model) {
|
|
844
|
+
return {
|
|
845
|
+
error: 'actual_input_tokens, actual_output_tokens, actual_total_tokens, and model are required to complete a prediction.',
|
|
846
|
+
};
|
|
847
|
+
}
|
|
848
|
+
const updated = await updateActualBurn(args.prediction_id, {
|
|
849
|
+
actual_input_tokens: args.actual_input_tokens,
|
|
850
|
+
actual_output_tokens: args.actual_output_tokens,
|
|
851
|
+
actual_total_tokens: args.actual_total_tokens,
|
|
852
|
+
model: args.model,
|
|
853
|
+
notes: args.notes,
|
|
854
|
+
});
|
|
855
|
+
if (!updated) {
|
|
856
|
+
return {
|
|
857
|
+
error: `Failed to update prediction ${args.prediction_id}. It may not exist or may already be completed.`,
|
|
858
|
+
};
|
|
859
|
+
}
|
|
860
|
+
return {
|
|
861
|
+
status: 'completed',
|
|
862
|
+
predictionId: updated.id,
|
|
863
|
+
predicted: updated.predicted_total,
|
|
864
|
+
actual: updated.actual_total_tokens,
|
|
865
|
+
drift: updated.drift_ratio,
|
|
866
|
+
_summary: `Prediction ${updated.id.slice(0, 8)}… closed. Predicted ${updated.predicted_total} tokens, actual ${updated.actual_total_tokens} tokens. Drift: ${updated.drift_ratio != null ? `${(updated.drift_ratio * 100).toFixed(1)}%` : 'N/A'}.`,
|
|
867
|
+
};
|
|
704
868
|
}
|