@papyruslabsai/seshat-mcp 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -13,7 +13,7 @@
13
13
  * Single-project mode (default):
14
14
  * When SESHAT_PROJECTS is not set, loads from CWD. No `project` param needed.
15
15
  *
16
- * Tools (8 core + 7 interpretation functors + 1 meta):
16
+ * Tools (8 core + 9 interpretation functors + 1 meta):
17
17
  * list_projects — Show loaded projects with entity counts
18
18
  * query_entities — Search entities by name, layer, module, language
19
19
  * get_entity — Full 9D coordinate dump for one entity
@@ -30,6 +30,8 @@
30
30
  * find_error_gaps — Fallible callees whose callers lack try/catch
31
31
  * get_test_coverage — Entities exercised by tests vs uncovered
32
32
  * get_optimal_context — Greedy knapsack: max relevance per token for LLM context
33
+ * estimate_task_cost — Pre-work token burn projection from blast radius + source tokens
34
+ * report_actual_burn — Close calibration loop: actual tokens vs prediction, drift stats
33
35
  *
34
36
  * Usage:
35
37
  * npx @papyruslabs/seshat-mcp # single project (CWD)
package/dist/index.js CHANGED
@@ -13,7 +13,7 @@
13
13
  * Single-project mode (default):
14
14
  * When SESHAT_PROJECTS is not set, loads from CWD. No `project` param needed.
15
15
  *
16
- * Tools (8 core + 7 interpretation functors + 1 meta):
16
+ * Tools (8 core + 9 interpretation functors + 1 meta):
17
17
  * list_projects — Show loaded projects with entity counts
18
18
  * query_entities — Search entities by name, layer, module, language
19
19
  * get_entity — Full 9D coordinate dump for one entity
@@ -30,6 +30,8 @@
30
30
  * find_error_gaps — Fallible callees whose callers lack try/catch
31
31
  * get_test_coverage — Entities exercised by tests vs uncovered
32
32
  * get_optimal_context — Greedy knapsack: max relevance per token for LLM context
33
+ * estimate_task_cost — Pre-work token burn projection from blast radius + source tokens
34
+ * report_actual_burn — Close calibration loop: actual tokens vs prediction, drift stats
33
35
  *
34
36
  * Usage:
35
37
  * npx @papyruslabs/seshat-mcp # single project (CWD)
@@ -43,7 +45,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
43
45
  import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
44
46
  import { MultiLoader } from './loader.js';
45
47
  import { initTools, queryEntities, getEntity, getDependencies, getDataFlow, findByConstraint, getBlastRadius, listModules, getTopology, } from './tools/index.js';
46
- import { findDeadCode, findLayerViolations, getCouplingMetrics, getAuthMatrix, findErrorGaps, getTestCoverage, getOptimalContext, } from './tools/functors.js';
48
+ import { findDeadCode, findLayerViolations, getCouplingMetrics, getAuthMatrix, findErrorGaps, getTestCoverage, getOptimalContext, estimateTaskCost, reportActualBurn, } from './tools/functors.js';
47
49
  // ─── Project Discovery ───────────────────────────────────────────
48
50
  /**
49
51
  * Discover project directories from SESHAT_PROJECTS env var.
@@ -337,6 +339,65 @@ const TOOLS = [
337
339
  required: ['target_entity'],
338
340
  },
339
341
  },
342
+ {
343
+ name: 'estimate_task_cost',
344
+ description: 'Estimate token cost of a code change BEFORE starting work. Computes blast radius, sums source token counts across affected entities, and projects total token burn including iteration cycles. Call this before planning to check if a task fits within your token budget.',
345
+ inputSchema: {
346
+ type: 'object',
347
+ properties: {
348
+ project: projectParam,
349
+ target_entities: {
350
+ type: 'array',
351
+ items: { type: 'string' },
352
+ description: 'Entity IDs or names that will be modified',
353
+ },
354
+ context_budget: {
355
+ type: 'number',
356
+ description: 'LLM context window token budget (default: 200000)',
357
+ },
358
+ },
359
+ required: ['target_entities'],
360
+ },
361
+ },
362
+ {
363
+ name: 'report_actual_burn',
364
+ description: 'Close the calibration feedback loop: report actual token usage against a prior prediction from estimate_task_cost. Computes drift (actual - predicted) / predicted. Also supports listing recent predictions with aggregate calibration stats, or abandoning predictions for cancelled tasks.',
365
+ inputSchema: {
366
+ type: 'object',
367
+ properties: {
368
+ prediction_id: {
369
+ type: 'string',
370
+ description: 'The prediction ID returned by estimate_task_cost. Required for complete/abandon actions.',
371
+ },
372
+ actual_input_tokens: {
373
+ type: 'number',
374
+ description: 'Actual input tokens consumed (from LLM usage metadata).',
375
+ },
376
+ actual_output_tokens: {
377
+ type: 'number',
378
+ description: 'Actual output tokens consumed (from LLM usage metadata).',
379
+ },
380
+ actual_total_tokens: {
381
+ type: 'number',
382
+ description: 'Actual total tokens consumed (input + output).',
383
+ },
384
+ model: {
385
+ type: 'string',
386
+ description: 'Model used (e.g. claude-opus-4-6, claude-sonnet-4-6).',
387
+ },
388
+ action: {
389
+ type: 'string',
390
+ enum: ['complete', 'abandon', 'list'],
391
+ description: 'Action: "complete" reports actuals (default), "abandon" marks prediction as cancelled, "list" shows recent predictions with calibration stats.',
392
+ },
393
+ project: projectParam,
394
+ notes: {
395
+ type: 'string',
396
+ description: 'Optional notes about the task outcome.',
397
+ },
398
+ },
399
+ },
400
+ },
340
401
  ];
341
402
  // ─── Server Setup ─────────────────────────────────────────────────
342
403
  async function main() {
@@ -367,7 +428,7 @@ async function main() {
367
428
  }
368
429
  const server = new Server({
369
430
  name: serverLabel,
370
- version: '0.3.1',
431
+ version: '0.3.3',
371
432
  }, {
372
433
  capabilities: {
373
434
  tools: {},
@@ -451,6 +512,12 @@ async function main() {
451
512
  case 'get_optimal_context':
452
513
  result = getOptimalContext(args);
453
514
  break;
515
+ case 'estimate_task_cost':
516
+ result = await estimateTaskCost(args);
517
+ break;
518
+ case 'report_actual_burn':
519
+ result = await reportActualBurn(args);
520
+ break;
454
521
  default:
455
522
  result = { error: `Unknown tool: ${name}` };
456
523
  }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Lightweight Supabase REST client for the calibration feedback loop.
3
+ *
4
+ * Uses Node 20+ native fetch — zero external dependencies.
5
+ * Gracefully degrades: if SESHAT_SUPABASE_URL / SESHAT_SUPABASE_KEY are not set,
6
+ * all operations silently no-op so the MCP server works offline.
7
+ *
8
+ * Environment variables (checks both prefixed and standard names):
9
+ * SUPABASE_URL — e.g. https://nlsrjceluztfllbqfpkm.supabase.co
10
+ * SUPABASE_SERVICE_ROLE_KEY — service_role key (preferred)
11
+ * SESHAT_SUPABASE_URL — override (optional)
12
+ * SESHAT_SUPABASE_KEY — override (optional)
13
+ */
14
+ /** Whether Supabase logging is available. */
15
+ export declare function isSupabaseConfigured(): boolean;
16
+ export interface TokenPredictionInsert {
17
+ project: string;
18
+ target_entities: string[];
19
+ predicted_context_load: number;
20
+ predicted_iteration_mult: number;
21
+ predicted_total: number;
22
+ affected_entities: number;
23
+ affected_files: number;
24
+ affected_layers: string[];
25
+ estimator_used: 'syntactic' | 'charDiv4' | 'heuristic';
26
+ context_budget: number;
27
+ session_id?: string;
28
+ }
29
+ export interface ActualBurnUpdate {
30
+ actual_input_tokens: number;
31
+ actual_output_tokens: number;
32
+ actual_total_tokens: number;
33
+ model: string;
34
+ notes?: string;
35
+ }
36
+ export interface TokenPredictionRow {
37
+ id: string;
38
+ created_at: string;
39
+ project: string;
40
+ target_entities: string[];
41
+ predicted_context_load: number;
42
+ predicted_iteration_mult: number;
43
+ predicted_total: number;
44
+ affected_entities: number;
45
+ affected_files: number;
46
+ affected_layers: string[];
47
+ estimator_used: string;
48
+ context_budget: number;
49
+ actual_input_tokens: number | null;
50
+ actual_output_tokens: number | null;
51
+ actual_total_tokens: number | null;
52
+ model: string | null;
53
+ drift_ratio: number | null;
54
+ status: string;
55
+ session_id: string | null;
56
+ notes: string | null;
57
+ }
58
+ /**
59
+ * Insert a prediction row. Returns the row ID or null on failure.
60
+ */
61
+ export declare function insertPrediction(row: TokenPredictionInsert): Promise<string | null>;
62
+ /**
63
+ * Report actual token burn for a prediction. Computes drift and sets status=completed.
64
+ */
65
+ export declare function updateActualBurn(predictionId: string, actual: ActualBurnUpdate): Promise<TokenPredictionRow | null>;
66
+ /**
67
+ * Abandon a prediction (task was cancelled or not completed).
68
+ */
69
+ export declare function abandonPrediction(predictionId: string): Promise<boolean>;
70
+ /**
71
+ * List recent predictions for a project (for calibration analysis).
72
+ */
73
+ export declare function listPredictions(project?: string, limit?: number): Promise<TokenPredictionRow[]>;
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Lightweight Supabase REST client for the calibration feedback loop.
3
+ *
4
+ * Uses Node 20+ native fetch — zero external dependencies.
5
+ * Gracefully degrades: if SESHAT_SUPABASE_URL / SESHAT_SUPABASE_KEY are not set,
6
+ * all operations silently no-op so the MCP server works offline.
7
+ *
8
+ * Environment variables (checks both prefixed and standard names):
9
+ * SUPABASE_URL — e.g. https://nlsrjceluztfllbqfpkm.supabase.co
10
+ * SUPABASE_SERVICE_ROLE_KEY — service_role key (preferred)
11
+ * SESHAT_SUPABASE_URL — override (optional)
12
+ * SESHAT_SUPABASE_KEY — override (optional)
13
+ */
14
+ const SUPABASE_URL = (process.env.SESHAT_SUPABASE_URL || process.env.SUPABASE_URL || '').replace(/\/$/, '');
15
+ const SUPABASE_KEY = process.env.SESHAT_SUPABASE_KEY || process.env.SUPABASE_SERVICE_ROLE_KEY || '';
16
+ /** Whether Supabase logging is available. */
17
+ export function isSupabaseConfigured() {
18
+ return SUPABASE_URL.length > 0 && SUPABASE_KEY.length > 0;
19
+ }
20
+ // ─── REST helpers ────────────────────────────────────────────────
21
+ const TABLE = 'mcp_token_predictions';
22
+ async function supabaseRequest(method, path, body, headers) {
23
+ if (!isSupabaseConfigured()) {
24
+ return { ok: false, status: 0, error: 'Supabase not configured' };
25
+ }
26
+ const url = `${SUPABASE_URL}/rest/v1/${path}`;
27
+ const reqHeaders = {
28
+ 'apikey': SUPABASE_KEY,
29
+ 'Authorization': `Bearer ${SUPABASE_KEY}`,
30
+ 'Content-Type': 'application/json',
31
+ 'Prefer': 'return=representation',
32
+ ...headers,
33
+ };
34
+ try {
35
+ const res = await fetch(url, {
36
+ method,
37
+ headers: reqHeaders,
38
+ body: body ? JSON.stringify(body) : undefined,
39
+ });
40
+ const text = await res.text();
41
+ let data;
42
+ try {
43
+ data = JSON.parse(text);
44
+ }
45
+ catch {
46
+ data = text;
47
+ }
48
+ if (!res.ok) {
49
+ return { ok: false, status: res.status, error: `${res.status}: ${text}` };
50
+ }
51
+ return { ok: true, status: res.status, data };
52
+ }
53
+ catch (err) {
54
+ return { ok: false, status: 0, error: err.message };
55
+ }
56
+ }
57
+ // ─── Public API ──────────────────────────────────────────────────
58
+ /**
59
+ * Insert a prediction row. Returns the row ID or null on failure.
60
+ */
61
+ export async function insertPrediction(row) {
62
+ const result = await supabaseRequest('POST', TABLE, row);
63
+ if (!result.ok) {
64
+ process.stderr.write(`[seshat] Prediction log failed: ${result.error}\n`);
65
+ return null;
66
+ }
67
+ const rows = result.data;
68
+ return rows?.[0]?.id ?? null;
69
+ }
70
+ /**
71
+ * Report actual token burn for a prediction. Computes drift and sets status=completed.
72
+ */
73
+ export async function updateActualBurn(predictionId, actual) {
74
+ // First fetch the prediction to compute drift
75
+ const fetchResult = await supabaseRequest('GET', `${TABLE}?id=eq.${predictionId}&select=predicted_total,status`);
76
+ if (!fetchResult.ok) {
77
+ process.stderr.write(`[seshat] Fetch prediction failed: ${fetchResult.error}\n`);
78
+ return null;
79
+ }
80
+ const rows = fetchResult.data;
81
+ if (!rows || rows.length === 0) {
82
+ return null; // not found
83
+ }
84
+ const predicted = rows[0];
85
+ if (predicted.status !== 'predicted') {
86
+ return null; // already completed or abandoned
87
+ }
88
+ // Compute drift ratio
89
+ const driftRatio = predicted.predicted_total > 0
90
+ ? Math.round(((actual.actual_total_tokens - predicted.predicted_total) / predicted.predicted_total) * 1000) / 1000
91
+ : null;
92
+ const updateBody = {
93
+ actual_input_tokens: actual.actual_input_tokens,
94
+ actual_output_tokens: actual.actual_output_tokens,
95
+ actual_total_tokens: actual.actual_total_tokens,
96
+ model: actual.model,
97
+ drift_ratio: driftRatio,
98
+ status: 'completed',
99
+ ...(actual.notes ? { notes: actual.notes } : {}),
100
+ };
101
+ const updateResult = await supabaseRequest('PATCH', `${TABLE}?id=eq.${predictionId}`, updateBody);
102
+ if (!updateResult.ok) {
103
+ process.stderr.write(`[seshat] Update actual burn failed: ${updateResult.error}\n`);
104
+ return null;
105
+ }
106
+ return updateResult.data?.[0] ?? null;
107
+ }
108
+ /**
109
+ * Abandon a prediction (task was cancelled or not completed).
110
+ */
111
+ export async function abandonPrediction(predictionId) {
112
+ const result = await supabaseRequest('PATCH', `${TABLE}?id=eq.${predictionId}&status=eq.predicted`, { status: 'abandoned' });
113
+ return result.ok;
114
+ }
115
+ /**
116
+ * List recent predictions for a project (for calibration analysis).
117
+ */
118
+ export async function listPredictions(project, limit = 20) {
119
+ const filter = project ? `&project=eq.${encodeURIComponent(project)}` : '';
120
+ const result = await supabaseRequest('GET', `${TABLE}?select=*${filter}&order=created_at.desc&limit=${limit}`);
121
+ if (!result.ok)
122
+ return [];
123
+ return result.data || [];
124
+ }
@@ -5,6 +5,13 @@
5
5
  * onto a domain-specific judgment. These are composite analyses built
6
6
  * from the primitive dimensions (sigma, epsilon, delta, kappa, chi, tau, rho).
7
7
  */
8
+ import type { JstfEntity } from '../types.js';
9
+ /**
10
+ * Estimate the token cost of loading an entity's source code into an LLM context.
11
+ * Uses real sourceTokens from the extraction pipeline when available (v0.3.2+),
12
+ * falls back to heuristic estimation from 9D coordinates for older bundles.
13
+ */
14
+ export declare function estimateTokens(e: JstfEntity): number;
8
15
  export declare function findDeadCode(args: {
9
16
  include_tests?: boolean;
10
17
  project?: string;
@@ -32,3 +39,30 @@ export declare function getOptimalContext(args: {
32
39
  strategy?: 'bfs' | 'blast_radius';
33
40
  project?: string;
34
41
  }): unknown;
42
+ /**
43
+ * Estimate token cost of a code change BEFORE starting work.
44
+ * Computes blast radius, sums source token counts, and projects total burn.
45
+ * Logs prediction to Supabase when configured (for calibration feedback loop).
46
+ */
47
+ export declare function estimateTaskCost(args: {
48
+ target_entities: string[];
49
+ context_budget?: number;
50
+ project?: string;
51
+ }): Promise<unknown>;
52
+ /**
53
+ * Close the calibration feedback loop by reporting actual token usage
54
+ * against a prior prediction from estimate_task_cost.
55
+ *
56
+ * Can also abandon a prediction (task was cancelled/not completed),
57
+ * or list recent predictions for calibration analysis.
58
+ */
59
+ export declare function reportActualBurn(args: {
60
+ prediction_id?: string;
61
+ actual_input_tokens?: number;
62
+ actual_output_tokens?: number;
63
+ actual_total_tokens?: number;
64
+ model?: string;
65
+ action?: 'complete' | 'abandon' | 'list';
66
+ project?: string;
67
+ notes?: string;
68
+ }): Promise<unknown>;
@@ -7,6 +7,7 @@
7
7
  */
8
8
  import { computeBlastRadius } from '../graph.js';
9
9
  import { getLoader, getGraph, validateProject, entityLayer, entitySummary, } from './index.js';
10
+ import { isSupabaseConfigured, insertPrediction, updateActualBurn, abandonPrediction, listPredictions, } from '../supabase.js';
10
11
  // ─── Layer ordering for violation detection ──────────────────────
11
12
  const LAYER_ORDER = {
12
13
  route: 0,
@@ -20,6 +21,65 @@ const LAYER_ORDER = {
20
21
  utility: 8,
21
22
  component: 1, // UI components are peers to controllers
22
23
  };
24
+ // ─── Shared: Token estimation ────────────────────────────────────
25
+ /**
26
+ * Estimate the token cost of loading an entity's source code into an LLM context.
27
+ * Uses real sourceTokens from the extraction pipeline when available (v0.3.2+),
28
+ * falls back to heuristic estimation from 9D coordinates for older bundles.
29
+ */
30
+ export function estimateTokens(e) {
31
+ // Use real source token count from extraction pipeline
32
+ const raw = e;
33
+ const st = raw.sourceTokens;
34
+ if (st?.estimated)
35
+ return st.estimated;
36
+ // Fallback: heuristic from 9D coordinates (pre-v0.3.2 bundles)
37
+ let tokens = 50; // Base: name, id, layer
38
+ if (e.struct && typeof e.struct !== 'string') {
39
+ tokens += 20; // signature
40
+ tokens += (e.struct.params?.length || 0) * 10;
41
+ }
42
+ if (e.edges?.calls)
43
+ tokens += e.edges.calls.length * 8;
44
+ if (e.edges?.imports)
45
+ tokens += e.edges.imports.length * 6;
46
+ if (e.data?.inputs)
47
+ tokens += e.data.inputs.length * 10;
48
+ if (e.constraints && typeof e.constraints === 'object' && !Array.isArray(e.constraints)) {
49
+ tokens += 30;
50
+ }
51
+ return tokens;
52
+ }
53
+ /**
54
+ * Detect which estimator was used for an entity.
55
+ * Returns 'syntactic' if tree-sitter leaf node count was used,
56
+ * 'charDiv4' if chars/4 heuristic was used, 'heuristic' for 9D fallback.
57
+ */
58
+ function detectEstimator(e) {
59
+ const raw = e;
60
+ const st = raw.sourceTokens;
61
+ if (!st?.estimated)
62
+ return 'heuristic';
63
+ if (st.syntactic && st.estimated === st.syntactic)
64
+ return 'syntactic';
65
+ if (st.charDiv4 && st.estimated === st.charDiv4)
66
+ return 'charDiv4';
67
+ return 'charDiv4'; // estimated present but can't distinguish → default to charDiv4
68
+ }
69
+ /**
70
+ * Determine the dominant estimator across a set of entities.
71
+ */
72
+ function dominantEstimator(entities) {
73
+ const counts = { syntactic: 0, charDiv4: 0, heuristic: 0 };
74
+ for (const e of entities) {
75
+ counts[detectEstimator(e)]++;
76
+ }
77
+ if (counts.syntactic >= counts.charDiv4 && counts.syntactic >= counts.heuristic)
78
+ return 'syntactic';
79
+ if (counts.charDiv4 >= counts.heuristic)
80
+ return 'charDiv4';
81
+ return 'heuristic';
82
+ }
23
83
  // ─── Functor 1: find_dead_code ───────────────────────────────────
24
84
  export function findDeadCode(args) {
25
85
  const projErr = validateProject(args.project);
@@ -207,6 +267,7 @@ export function getCouplingMetrics(args) {
207
267
  cohesion: Math.round(cohesion * 1000) / 1000,
208
268
  coupling,
209
269
  instability: Math.round(instability * 1000) / 1000,
270
+ _summary: `${groupName}: ${internalEdges} internal / ${totalExternal} external edges, cohesion ${Math.round(cohesion * 1000) / 1000} (${size} entities)`,
210
271
  });
211
272
  }
212
273
  // Sort by coupling (most coupled first)
@@ -338,6 +399,7 @@ export function findErrorGaps(args) {
338
399
  return {
339
400
  totalFallible: fallibleIds.size,
340
401
  errorGaps: gaps.length,
402
+ _summary: `${gaps.length} error handling gaps across ${fallibleIds.size} fallible entities`,
341
403
  gaps: gaps.slice(0, 100),
342
404
  };
343
405
  }
@@ -384,14 +446,16 @@ export function getTestCoverage(args) {
384
446
  }
385
447
  const covered = productionEntities.filter(e => exercised.has(e.id));
386
448
  const uncovered = productionEntities.filter(e => !exercised.has(e.id));
449
+ const coveragePercent = productionEntities.length > 0
450
+ ? Math.round((covered.length / productionEntities.length) * 1000) / 10
451
+ : 0;
387
452
  const result = {
388
453
  totalProduction: productionEntities.length,
389
454
  totalTests: testIds.size,
390
455
  coveredCount: covered.length,
391
456
  uncoveredCount: uncovered.length,
392
- coveragePercent: productionEntities.length > 0
393
- ? Math.round((covered.length / productionEntities.length) * 1000) / 10
394
- : 0,
457
+ coveragePercent,
458
+ _summary: `${testIds.size} test entities exercise ${covered.length} of ${productionEntities.length} production entities (${coveragePercent}% coverage)`,
395
459
  };
396
460
  if (weight_by_blast_radius && uncovered.length > 0) {
397
461
  // Compute blast radius for each uncovered entity to prioritize what to test
@@ -431,24 +495,6 @@ export function getOptimalContext(args) {
431
495
  return { error: `Entity not found: ${target_entity}` };
432
496
  }
433
497
  const targetId = entity.id;
434
- // Estimate tokens for an entity based on its dimensions
435
- function estimateTokens(e) {
436
- let tokens = 50; // Base: name, id, layer
437
- if (e.struct && typeof e.struct !== 'string') {
438
- tokens += 20; // signature
439
- tokens += (e.struct.params?.length || 0) * 10;
440
- }
441
- if (e.edges?.calls)
442
- tokens += e.edges.calls.length * 8;
443
- if (e.edges?.imports)
444
- tokens += e.edges.imports.length * 6;
445
- if (e.data?.inputs)
446
- tokens += e.data.inputs.length * 10;
447
- if (e.constraints && typeof e.constraints === 'object' && !Array.isArray(e.constraints)) {
448
- tokens += 30;
449
- }
450
- return tokens;
451
- }
452
498
  const candidates = [];
453
499
  if (strategy === 'blast_radius') {
454
500
  // Use blast radius to get all related entities with depth
@@ -552,3 +598,271 @@ export function getOptimalContext(args) {
552
598
  context: selected,
553
599
  };
554
600
  }
601
+ // ─── Functor 8: estimate_task_cost ───────────────────────────────
602
+ /**
603
+ * Estimate token cost of a code change BEFORE starting work.
604
+ * Computes blast radius, sums source token counts, and projects total burn.
605
+ * Logs prediction to Supabase when configured (for calibration feedback loop).
606
+ */
607
+ export async function estimateTaskCost(args) {
608
+ const projErr = validateProject(args.project);
609
+ if (projErr)
610
+ return { error: projErr };
611
+ const { target_entities, context_budget = 200000 } = args;
612
+ const loader = getLoader();
613
+ const g = getGraph(args.project);
614
+ // Resolve target entities
615
+ const resolvedTargets = [];
616
+ const unresolvedNames = [];
617
+ const changedIds = new Set();
618
+ for (const name of target_entities) {
619
+ const entity = loader.getEntityById(name, args.project)
620
+ || loader.getEntityByName(name, args.project);
621
+ if (entity) {
622
+ resolvedTargets.push(entity);
623
+ changedIds.add(entity.id);
624
+ }
625
+ else {
626
+ unresolvedNames.push(name);
627
+ }
628
+ }
629
+ if (resolvedTargets.length === 0) {
630
+ return {
631
+ error: `No entities resolved. Unresolved: ${unresolvedNames.join(', ')}`,
632
+ hint: 'Use query_entities to find the correct entity IDs.',
633
+ };
634
+ }
635
+ // Compute blast radius across all targets
636
+ const br = computeBlastRadius(g, changedIds);
637
+ // Collect all affected entities (targets + blast radius)
638
+ const allAffectedIds = new Set([...changedIds, ...br.affected]);
639
+ const affectedEntities = [];
640
+ for (const id of allAffectedIds) {
641
+ const e = g.entityById.get(id);
642
+ if (e)
643
+ affectedEntities.push(e);
644
+ }
645
+ // Sum token estimates
646
+ let contextLoad = 0;
647
+ const fileTokens = new Map();
648
+ const layerSet = new Set();
649
+ for (const e of affectedEntities) {
650
+ const tokens = estimateTokens(e);
651
+ contextLoad += tokens;
652
+ const file = e._sourceFile || 'unknown';
653
+ if (!fileTokens.has(file))
654
+ fileTokens.set(file, { entities: 0, tokens: 0 });
655
+ const ft = fileTokens.get(file);
656
+ ft.entities++;
657
+ ft.tokens += tokens;
658
+ layerSet.add(entityLayer(e));
659
+ }
660
+ // Compute iteration multiplier
661
+ let iterationMultiplier = 1.5; // base: read + write pass
662
+ // +0.5 if cross-cutting (3+ layers)
663
+ if (layerSet.size >= 3)
664
+ iterationMultiplier += 0.5;
665
+ // +0.5 if any affected module has high instability
666
+ const affectedModules = new Set();
667
+ for (const e of affectedEntities) {
668
+ if (e.context?.module)
669
+ affectedModules.add(e.context.module);
670
+ }
671
+ // Quick instability check: compute outgoing / (outgoing + incoming) for affected modules
672
+ for (const mod of affectedModules) {
673
+ let outgoing = 0;
674
+ let incoming = 0;
675
+ const entities = loader.getEntities(args.project);
676
+ const modIds = new Set(entities.filter(e => e.context?.module === mod).map(e => e.id));
677
+ for (const id of modIds) {
678
+ const calleeSet = g.callees.get(id);
679
+ if (calleeSet) {
680
+ for (const cid of calleeSet) {
681
+ if (!modIds.has(cid))
682
+ outgoing++;
683
+ }
684
+ }
685
+ const callerSet = g.callers.get(id);
686
+ if (callerSet) {
687
+ for (const cid of callerSet) {
688
+ if (!modIds.has(cid))
689
+ incoming++;
690
+ }
691
+ }
692
+ }
693
+ const total = outgoing + incoming;
694
+ if (total > 0 && outgoing / total > 0.8) {
695
+ iterationMultiplier += 0.5;
696
+ break; // only add once
697
+ }
698
+ }
699
+ // +0.5 if large blast radius
700
+ if (allAffectedIds.size > 50)
701
+ iterationMultiplier += 0.5;
702
+ // Cap at 4.0
703
+ iterationMultiplier = Math.min(4.0, iterationMultiplier);
704
+ const projectedTotal = Math.round(contextLoad * iterationMultiplier);
705
+ const fitsInSinglePass = contextLoad <= context_budget;
706
+ const passesRequired = fitsInSinglePass ? 1 : Math.ceil(contextLoad / context_budget);
707
+ // Build file breakdown sorted by token count
708
+ const breakdown = [...fileTokens.entries()]
709
+ .map(([file, data]) => ({ file, entities: data.entities, tokens: data.tokens }))
710
+ .sort((a, b) => b.tokens - a.tokens);
711
+ // Build target summaries with token estimates
712
+ const targetSummaries = resolvedTargets.map(e => ({
713
+ ...entitySummary(e),
714
+ sourceTokens: estimateTokens(e),
715
+ }));
716
+ const affectedLayers = [...layerSet].sort();
717
+ const estimator = dominantEstimator(affectedEntities);
718
+ // Resolve project name for logging
719
+ const projectName = args.project || loader.getProjectNames()[0] || 'unknown';
720
+ const result = {
721
+ targets: targetSummaries,
722
+ ...(unresolvedNames.length > 0 ? { unresolved: unresolvedNames } : {}),
723
+ affectedEntities: allAffectedIds.size,
724
+ affectedFiles: fileTokens.size,
725
+ affectedLayers,
726
+ tokenEstimate: {
727
+ contextLoad,
728
+ iterationMultiplier: Math.round(iterationMultiplier * 10) / 10,
729
+ projectedTotal,
730
+ estimatorUsed: estimator,
731
+ },
732
+ feasibility: {
733
+ contextBudget: context_budget,
734
+ fitsInSinglePass,
735
+ passesRequired,
736
+ },
737
+ breakdown: breakdown.slice(0, 30),
738
+ _summary: `Changing ${target_entities.join(', ')} affects ${allAffectedIds.size} entities across ${fileTokens.size} files. Context load: ~${Math.round(contextLoad / 1000)}K tokens. Projected total with ${Math.round(iterationMultiplier * 10) / 10}x iteration: ~${Math.round(projectedTotal / 1000)}K tokens. ${fitsInSinglePass ? `Fits in ${Math.round(context_budget / 1000)}K budget (${passesRequired} pass).` : `Exceeds ${Math.round(context_budget / 1000)}K budget — needs ${passesRequired} passes.`}`,
739
+ };
740
+ // Log prediction to Supabase if configured (calibration feedback loop)
741
+ if (isSupabaseConfigured()) {
742
+ try {
743
+ const predictionId = await insertPrediction({
744
+ project: projectName,
745
+ target_entities,
746
+ predicted_context_load: contextLoad,
747
+ predicted_iteration_mult: Math.round(iterationMultiplier * 10) / 10,
748
+ predicted_total: projectedTotal,
749
+ affected_entities: allAffectedIds.size,
750
+ affected_files: fileTokens.size,
751
+ affected_layers: affectedLayers,
752
+ estimator_used: estimator,
753
+ context_budget,
754
+ });
755
+ if (predictionId) {
756
+ result.predictionId = predictionId;
757
+ result._summary += ` Prediction logged (${predictionId.slice(0, 8)}…).`;
758
+ }
759
+ }
760
+ catch {
761
+ // Silently swallow — prediction logging is best-effort
762
+ }
763
+ }
764
+ return result;
765
+ }
766
+ // ─── Functor 9: report_actual_burn ───────────────────────────────
767
+ /**
768
+ * Close the calibration feedback loop by reporting actual token usage
769
+ * against a prior prediction from estimate_task_cost.
770
+ *
771
+ * Can also abandon a prediction (task was cancelled/not completed),
772
+ * or list recent predictions for calibration analysis.
773
+ */
774
+ export async function reportActualBurn(args) {
775
+ if (!isSupabaseConfigured()) {
776
+ return {
777
+ error: 'Supabase not configured. Set SESHAT_SUPABASE_URL and SESHAT_SUPABASE_KEY env vars.',
778
+ hint: 'The calibration feedback loop requires a Supabase connection to store predictions.',
779
+ };
780
+ }
781
+ const { action = 'complete' } = args;
782
+ // List mode: show recent predictions for calibration analysis
783
+ if (action === 'list') {
784
+ const rows = await listPredictions(args.project);
785
+ if (rows.length === 0) {
786
+ return { message: 'No predictions found.', predictions: [] };
787
+ }
788
+ const summary = rows.map((r) => ({
789
+ id: r.id,
790
+ project: r.project,
791
+ targets: r.target_entities,
792
+ predicted: r.predicted_total,
793
+ actual: r.actual_total_tokens,
794
+ drift: r.drift_ratio,
795
+ estimator: r.estimator_used,
796
+ status: r.status,
797
+ createdAt: r.created_at,
798
+ }));
799
+ // Compute aggregate calibration stats for completed predictions
800
+ const completed = rows.filter((r) => r.status === 'completed' && r.drift_ratio != null);
801
+ let calibration;
802
+ if (completed.length >= 3) {
803
+ const drifts = completed.map((r) => r.drift_ratio);
804
+ const meanDrift = drifts.reduce((a, b) => a + b, 0) / drifts.length;
805
+ const sortedDrifts = [...drifts].sort((a, b) => a - b);
806
+ const medianDrift = sortedDrifts[Math.floor(sortedDrifts.length / 2)];
807
+ const maxOvershoot = Math.max(...drifts);
808
+ const maxUndershoot = Math.min(...drifts);
809
+ calibration = {
810
+ completedSamples: completed.length,
811
+ meanDrift: Math.round(meanDrift * 1000) / 1000,
812
+ medianDrift: Math.round(medianDrift * 1000) / 1000,
813
+ maxOvershoot: Math.round(maxOvershoot * 1000) / 1000,
814
+ maxUndershoot: Math.round(maxUndershoot * 1000) / 1000,
815
+ _interpretation: meanDrift > 0.2
816
+ ? 'Predictions underestimate — consider increasing iteration multiplier.'
817
+ : meanDrift < -0.2
818
+ ? 'Predictions overestimate — consider decreasing iteration multiplier.'
819
+ : 'Predictions are well-calibrated (within 20% mean drift).',
820
+ };
821
+ }
822
+ return {
823
+ total: rows.length,
824
+ predictions: summary,
825
+ ...(calibration ? { calibration } : {}),
826
+ };
827
+ }
828
+ // Complete or abandon requires prediction_id
829
+ if (!args.prediction_id) {
830
+ return {
831
+ error: 'prediction_id is required for complete/abandon actions.',
832
+ hint: 'Use estimate_task_cost first to get a predictionId, then pass it here.',
833
+ };
834
+ }
835
+ // Abandon mode
836
+ if (action === 'abandon') {
837
+ const ok = await abandonPrediction(args.prediction_id);
838
+ return ok
839
+ ? { status: 'abandoned', predictionId: args.prediction_id }
840
+ : { error: `Failed to abandon prediction ${args.prediction_id}. It may already be completed or not exist.` };
841
+ }
842
+ // Complete mode: requires actual token counts
843
+ if (!args.actual_input_tokens || !args.actual_output_tokens || !args.actual_total_tokens || !args.model) {
844
+ return {
845
+ error: 'actual_input_tokens, actual_output_tokens, actual_total_tokens, and model are required to complete a prediction.',
846
+ };
847
+ }
848
+ const updated = await updateActualBurn(args.prediction_id, {
849
+ actual_input_tokens: args.actual_input_tokens,
850
+ actual_output_tokens: args.actual_output_tokens,
851
+ actual_total_tokens: args.actual_total_tokens,
852
+ model: args.model,
853
+ notes: args.notes,
854
+ });
855
+ if (!updated) {
856
+ return {
857
+ error: `Failed to update prediction ${args.prediction_id}. It may not exist or may already be completed.`,
858
+ };
859
+ }
860
+ return {
861
+ status: 'completed',
862
+ predictionId: updated.id,
863
+ predicted: updated.predicted_total,
864
+ actual: updated.actual_total_tokens,
865
+ drift: updated.drift_ratio,
866
+ _summary: `Prediction ${updated.id.slice(0, 8)}… closed. Predicted ${updated.predicted_total} tokens, actual ${updated.actual_total_tokens} tokens. Drift: ${updated.drift_ratio != null ? `${(updated.drift_ratio * 100).toFixed(1)}%` : 'N/A'}.`,
867
+ };
868
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@papyruslabsai/seshat-mcp",
3
- "version": "0.3.1",
3
+ "version": "0.3.3",
4
4
  "description": "Semantic MCP server — exposes a codebase's 9D JSTF-T coordinate space as queryable tools",
5
5
  "type": "module",
6
6
  "bin": {