@rosh100yx/outlier 0.4.24 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli.ts CHANGED
@@ -20,6 +20,74 @@ const ASCII_LOGO = `
20
20
 
21
21
  let finalReceipt = '';
22
22
 
23
+ // Build a stable, machine-readable audit object. This is the contract agents,
24
+ // swarms, and CI parse — everything the human receipt shows, as plain JSON.
25
+ async function emitJson() {
26
+ const pkg = require('../package.json');
27
+ const [gitStats, carbon, caps] = await Promise.all([
28
+ getAuthorshipStats().catch(() => null),
29
+ getCarbonStats().catch(() => null),
30
+ getCapabilitiesStats().catch(() => null),
31
+ ]);
32
+
33
+ const aiRatio = gitStats ? gitStats.ratio : 0;
34
+ const cap = 0.70;
35
+ const writeOrDeploy = caps
36
+ ? caps.mcps.filter((m: any) => ['money', 'exec', 'deploy', 'write-remote', 'write-local'].includes(m.reach)).length
37
+ : 0;
38
+
39
+ const out = {
40
+ tool: 'outlier',
41
+ version: pkg.version,
42
+ repo: process.cwd().split('/').pop(),
43
+ generatedAt: new Date().toISOString(),
44
+ localFirst: true,
45
+ authorship: gitStats ? {
46
+ aiPercent: +(gitStats.ratio * 100).toFixed(1),
47
+ aiRatio: gitStats.ratio,
48
+ totalCommits: gitStats.total,
49
+ aiCommits: gitStats.ai,
50
+ nonMergePercent: +(gitStats.ratioNoMerges * 100).toFixed(1),
51
+ provenance: 'proxy',
52
+ note: 'git Co-Authored-By trailers; under-counts if the agent omits the trailer',
53
+ } : null,
54
+ cost: carbon ? {
55
+ totalTokens: carbon.totalTokens,
56
+ outputTokens: carbon.outputTokens,
57
+ cacheReusePercent: carbon.totalTokens ? +((carbon.cacheReadTokens / carbon.totalTokens) * 100).toFixed(1) : 0,
58
+ estUsd: +carbon.estUsd.toFixed(2),
59
+ costIsReal: carbon.costIsReal,
60
+ provenance: carbon.tokenProvenance,
61
+ source: carbon.sourceLabel,
62
+ } : null,
63
+ carbon: carbon ? {
64
+ energyKwh: +carbon.energyKwh.toFixed(4),
65
+ co2Kg: +carbon.localCo2Kg.toFixed(4),
66
+ region: carbon.localRegion,
67
+ provenance: carbon.carbonProvenance,
68
+ note: 'counterfactual: cloud inference runs on the provider grid, not yours',
69
+ } : null,
70
+ reach: caps ? {
71
+ blastRadius: caps.blastRadius,
72
+ reasons: caps.blastReasons,
73
+ toolCount: caps.mcps.length,
74
+ writeOrDeployCount: writeOrDeploy,
75
+ tools: caps.mcps,
76
+ subagents: caps.subagents,
77
+ hooks: caps.hooks,
78
+ skills: caps.skills.length,
79
+ orchestration: caps.hasOrchestration,
80
+ } : null,
81
+ policy: {
82
+ aiCapPercent: cap * 100,
83
+ status: aiRatio > cap ? 'over' : 'within',
84
+ },
85
+ };
86
+
87
+ // Only JSON on stdout — nothing else.
88
+ process.stdout.write(JSON.stringify(out, null, 2) + '\n');
89
+ }
90
+
23
91
  async function runOnboarding() {
24
92
  console.log(pc.cyan(ASCII_LOGO));
25
93
  intro(pc.inverse(' outlier: Welcome '));
@@ -78,24 +146,38 @@ async function main() {
78
146
  action = 'status';
79
147
  }
80
148
 
149
+ // Agent / CI / swarm contract: --json emits a structured audit and nothing else
150
+ // (no logo, no spinner, no ANSI). This is how an agent perceives outlier.
151
+ if (process.argv.includes('--json')) {
152
+ await emitJson();
153
+ process.exit(0);
154
+ }
155
+
81
156
  console.log(pc.cyan(ASCII_LOGO));
82
157
  const pkg = require('../package.json');
83
158
  console.log(pc.dim(` Outlier v${pkg.version} · AI Code Reliance & Telemetry Engine\n`));
84
-
85
-
159
+
160
+
86
161
  if (action === '--help' || action === '-h' || action === 'help') {
87
- console.log(pc.bold('\nCOMMANDS:'));
88
- console.log(` ${pc.cyan('outlier')} Interactive menu (Onboarding for first-timers)`);
89
- console.log(` ${pc.cyan('outlier status')} Run full AI reliance & capability audit`);
90
- console.log(` ${pc.cyan('outlier authorship')} Scan git history for AI co-authorship ratio`);
91
- console.log(` ${pc.cyan('outlier carbon')} Scan local logs for token waste & carbon cost`);
92
- console.log(` ${pc.cyan('outlier policy')} Configure CI/CD guardrails and thresholds`);
93
- console.log(` ${pc.cyan('outlier impact')} See the compounding horizon of AI Deskilling`);
94
- console.log(` ${pc.cyan('outlier knowledge')} Explore references, graphs, and the core literature`);
95
- console.log(` ${pc.cyan('outlier participate')} Help build the academic literature on AI deskilling`);
96
- console.log(` ${pc.cyan('outlier init')} Install the once-per-day shell greeting`);
97
- console.log(` ${pc.cyan('outlier uninit')} Remove the shell greeting`);
98
- console.log('\n' + pc.dim('Run without arguments to start the interactive wizard.'));
162
+ console.log(pc.bold('\nWHAT OUTLIER DOES'));
163
+ console.log(pc.dim(' Reads your local git history and AI logs — on your machine — to show'));
164
+ console.log(pc.dim(' how much of your code AI wrote, what it cost, and how to keep your skill.\n'));
165
+ console.log(pc.bold('COMMANDS:'));
166
+ console.log(` ${pc.cyan('outlier')} Run the audit (the default same as 'status')`);
167
+ console.log(` ${pc.cyan('outlier status')} Full audit: who wrote the code, what it cost, your limit`);
168
+ console.log(` ${pc.cyan('outlier status --save')} Save the audit to ./outlier-audit.txt`);
169
+ console.log(` ${pc.cyan('outlier --json')} Machine-readable audit (for agents, CI, swarms)`);
170
+ console.log(` ${pc.cyan('outlier authorship')} Just the AI-vs-human commit breakdown`);
171
+ console.log(` ${pc.cyan('outlier carbon')} Just the token spend, cache waste & carbon`);
172
+ console.log(` ${pc.cyan('outlier capabilities')} What tools & skills your agents can reach`);
173
+ console.log(` ${pc.cyan('outlier policy')} Set an AI-authorship limit (local git hook / CI)`);
174
+ console.log(` ${pc.cyan('outlier impact')} What AI reliance compounds to over time`);
175
+ console.log(` ${pc.cyan('outlier knowledge')} The research behind the metrics`);
176
+ console.log(` ${pc.cyan('outlier participate')} Share anonymous feedback for the deskilling study`);
177
+ console.log(` ${pc.cyan('outlier init')} Show a once-per-day reliance greeting in new shells`);
178
+ console.log(` ${pc.cyan('outlier uninit')} Remove that greeting`);
179
+ console.log('\n' + pc.dim('Local-first: nothing ever leaves your machine.'));
180
+ console.log(pc.dim('How it works → https://github.com/rosh100yx/outlier#how-it-works'));
99
181
  process.exit(0);
100
182
  }
101
183
 
@@ -275,22 +357,45 @@ Conservative Floor: ${color(nmPct + '%')}`,
275
357
 
276
358
  try {
277
359
  let authPct = '0%';
360
+ let nmFloorStr = '';
278
361
  let ruleFailures = 0;
279
362
 
280
363
  if (gitStats) {
281
364
  authPct = `${(gitStats.ratio * 100).toFixed(1)}%`;
365
+ // Conservative floor: non-merge commits only (merges often lack the trailer).
366
+ nmFloorStr = ` ${pc.dim(`(${(gitStats.ratioNoMerges * 100).toFixed(0)}% excl. merges)`)}`;
282
367
  if (gitStats.ratio > 0.7) ruleFailures++;
283
368
  }
284
369
 
370
+ // Honesty: a very low ratio alongside heavy token use usually means the agent
371
+ // doesn't tag commits, not that the human wrote everything.
372
+ const lowTrailerWarn =
373
+ gitStats && gitStats.ratio < 0.1 && carbon && carbon.totalTokens > 1_000_000
374
+ ? `\n ${pc.dim('│')} ${pc.dim('Low %? Your agent may not tag commits — outlier counts only')}\n ${pc.dim('│')} ${pc.dim('commits with a Co-Authored-By trailer.')}`
375
+ : '';
376
+
285
377
  let cachePct = '0';
286
378
  let co2Str = '0.0kg';
287
379
  let regionStr = 'Global Average';
380
+ let sourceLabel = 'no local AI logs found';
381
+ let noData = true;
288
382
  if (carbon) {
289
383
  if (carbon.totalTokens > 0) {
290
384
  cachePct = ((carbon.cacheReadTokens / carbon.totalTokens) * 100).toFixed(1);
385
+ noData = false;
291
386
  }
292
387
  co2Str = `${carbon.localCo2Kg.toFixed(2)}kg CO2`;
293
388
  regionStr = carbon.localRegion;
389
+ sourceLabel = carbon.sourceLabel;
390
+ }
391
+
392
+ // One-line agent-reach summary (full detail in `outlier capabilities`).
393
+ let reachStr = pc.dim('run: outlier capabilities');
394
+ if (capabilities) {
395
+ const rc = capabilities.blastRadius;
396
+ const col = rc === 'CRITICAL' || rc === 'HIGH' ? pc.red : rc === 'MEDIUM' ? pc.yellow : pc.green;
397
+ const risky = capabilities.mcps.filter((m: any) => ['money','exec','deploy','write-remote','write-local'].includes(m.reach)).length;
398
+ reachStr = `${col(pc.bold(rc))} · ${capabilities.mcps.length} tools` + (risky ? pc.dim(`, ${risky} can write/deploy`) : '');
294
399
  }
295
400
 
296
401
  // The thermal receipt below is the single canonical output for `status`.
@@ -344,25 +449,32 @@ Conservative Floor: ${color(nmPct + '%')}`,
344
449
  ${pc.dim('│')} ${pc.cyan('█▄█ █▄█ ░█░ █▄▄ █ ██▄ █▀▄')} ${pc.dim(`:: ${repoName} · ${dateStr}`)}
345
450
  ${pc.dim('├────────────────────────────────────────────────────────')}
346
451
  ${pc.dim('│')} ${pc.bold(pc.bgBlue(' WHO WROTE THE CODE '))}
347
- ${pc.dim('│')} AI ${aiBar} ${authorshipStr}
452
+ ${pc.dim('│')} AI ${aiBar} ${authorshipStr}${nmFloorStr}
348
453
  ${pc.dim('│')} You ${humanBar} ${pc.bold(humanSov)}
454
+ ${pc.dim('│')} ${pc.dim('Typical: solo devs 10–40% · AI-framework repos up to ~80%')}
349
455
  ${pc.dim('│')}
350
- ${pc.dim('│')} ${verdictZone} — ${verdictText.split('\n').join('\n ' + pc.dim('│') + ' ')}
456
+ ${pc.dim('│')} ${verdictZone} — ${verdictText.split('\n').join('\n ' + pc.dim('│') + ' ')}${lowTrailerWarn}
351
457
  ${pc.dim('├────────────────────────────────────────────────────────')}
352
458
  ${pc.dim('│')} ${pc.bold(pc.bgMagenta(' WHAT IT COST '))}
353
459
  ${pc.dim('│')} Tokens used ${pc.bold(totalTokensStr)}
354
460
  ${pc.dim('│')} Est. spend ${pc.bold(estUsdStr)}
355
461
  ${pc.dim('│')} Re-used context ${cacheBar} ${pc.bold(cachePct + '%')}
356
- ${pc.dim('│')} Energy ${pc.bold(co2Str)} ${pc.dim(`(${regionStr} grid, rough)`)}
462
+ ${pc.dim('│')} Energy ${pc.bold(co2Str)} ${pc.dim(`(${regionStr} grid)`)}
463
+ ${pc.dim('│')} ${pc.dim(`Source: ${sourceLabel}`)}
357
464
  ${pc.dim('│')}
358
465
  ${pc.dim('│')} ${cacheVerdict} — ${cacheText.split('\n').join('\n ' + pc.dim('│') + ' ')}
359
466
  ${pc.dim('├────────────────────────────────────────────────────────')}
467
+ ${pc.dim('│')} ${pc.bold(pc.bgCyan(pc.black(' WHAT YOUR AGENTS CAN REACH ')))}
468
+ ${pc.dim('│')} Blast radius ${reachStr}
469
+ ${pc.dim('│')} ${pc.dim('Full map (deploy/push/write tools): outlier capabilities')}
470
+ ${pc.dim('├────────────────────────────────────────────────────────')}
360
471
  ${pc.dim('│')} ${pc.bold(pc.bgYellow(pc.black(' YOUR LIMIT ')))}
361
472
  ${pc.dim('│')} AI cap ${pc.bold('70%')} ${pc.dim('· change with: outlier policy')}
362
473
  ${pc.dim('│')} Status ${policyStatus} ${pc.dim('·')} ${policyAction}
363
474
  ${pc.dim('├────────────────────────────────────────────────────────')}
364
- ${pc.dim('│')} ${pc.dim(pc.italic('Run this before you start. Keep the skill while you'))}
365
- ${pc.dim('│')} ${pc.dim(pc.italic('use the speed.'))}
475
+ ${pc.dim('│')} ${pc.dim('Numbers are local estimates authorship is a proxy and')}
476
+ ${pc.dim('│')} ${pc.dim('carbon is rough. How it works: outlier --help')}
477
+ ${pc.dim('│')} ${pc.dim(pc.italic('Run this before you start. Keep the skill while you use the speed.'))}
366
478
  ${pc.dim('└────────────────────────────────────────────────────────')}`;
367
479
  } else {
368
480
  note(
@@ -376,24 +488,42 @@ Conservative Floor: ${color(nmPct + '%')}`,
376
488
  }
377
489
 
378
490
  } else if (action === 'capabilities') {
379
- s.start('Auditing AI surface area (MCPs, Skills, Orchestrators)...');
491
+ s.start('Mapping what your agents can reach...');
380
492
  try {
381
493
  const caps = await getCapabilitiesStats();
382
- s.stop('Capabilities Scan Complete');
494
+ s.stop('Reach map complete');
495
+
496
+ const radiusColor = caps.blastRadius === 'CRITICAL' ? pc.red
497
+ : caps.blastRadius === 'HIGH' ? pc.red
498
+ : caps.blastRadius === 'MEDIUM' ? pc.yellow : pc.green;
499
+
500
+ // Group tools by reach so the risky ones stand out.
501
+ const order: string[] = ['money', 'exec', 'deploy', 'write-remote', 'write-local', 'data', 'network', 'model', 'read'];
502
+ const reachLabel: Record<string, string> = {
503
+ money: 'can move money', exec: 'can run shell', deploy: 'can deploy', 'write-remote': 'can push to repos',
504
+ 'write-local': 'can write files', data: 'data stores', network: 'network', model: 'models', read: 'read-only',
505
+ };
506
+ const riskyReaches = new Set(['money', 'exec', 'deploy', 'write-remote', 'write-local']);
507
+ const toolLines = caps.mcps.length === 0 ? ' None detected'
508
+ : order.filter(r => caps.mcps.some(m => m.reach === r)).map(r => {
509
+ const names = caps.mcps.filter(m => m.reach === r).map(m => m.name).join(', ');
510
+ const tag = riskyReaches.has(r) ? pc.red(`[${reachLabel[r]}]`) : pc.dim(`[${reachLabel[r]}]`);
511
+ return ` ${tag} ${names}`;
512
+ }).join('\n');
383
513
 
384
514
  note(
385
- `Orchestration Policy: ${caps.hasOrchestration ? pc.green('Detected (AGENTS.md)') : pc.yellow('None')}
515
+ `${pc.bold('BLAST RADIUS:')} ${radiusColor(pc.bold(caps.blastRadius))} ${pc.dim('— if an agent or a prompt injection drives your tools')}
516
+ ${caps.blastReasons.length ? caps.blastReasons.map(r => ` ${pc.red('•')} ${r}`).join('\n') : pc.green(' • read-only — limited reach')}
386
517
 
387
- Active Skills (${caps.skills.length}):
388
- ${caps.skills.length > 0 ? pc.cyan(caps.skills.map(s => ` • ${s}`).join('\n')) : ' None'}
518
+ ${pc.bold(`What your agents can reach (${caps.mcps.length} MCP tools):`)}
519
+ ${toolLines}
389
520
 
390
- Active MCP Servers (${caps.mcps.length}):
391
- ${caps.mcps.length > 0 ? pc.magenta(caps.mcps.map(m => ` • ${m}`).join('\n')) : ' None'}
521
+ ${pc.bold('Automation & agents:')}
522
+ Hooks that fire for you: ${caps.hooks.length ? pc.yellow(caps.hooks.join(', ')) : 'none'}
523
+ Sub-agents: ${caps.subagents} Skills: ${caps.skills.length} Orchestration policy: ${caps.hasOrchestration ? pc.green('yes') : pc.yellow('no')}
392
524
 
393
- ${pc.bold('Governance Assessment:')}
394
- This repository provides agents with ${caps.mcps.length} toolsets and ${caps.skills.length} skills.
395
- ${caps.skills.length > 5 ? pc.red('⚠ High Surface Area: Ensure strict authorship review is enabled.') : pc.green('✓ Low Surface Area: Risk contained.')}`,
396
- 'AI Capabilities Map'
525
+ ${pc.dim('This is your attack surface. Fewer write/deploy tools per session = smaller blast radius.')}`,
526
+ 'Agent Reach & Blast Radius'
397
527
  );
398
528
  } catch (e: any) {
399
529
  s.stop('Audit failed');
@@ -566,10 +696,20 @@ Artifact: ${pc.cyan(reportPath)}`,
566
696
  console.log(`\nRead the full academic foundation at: ${pc.underline('https://github.com/rosh100yx/outlier')}\n`);
567
697
  }
568
698
 
569
- outro('Local telemetry run completed. No data left your machine.');
699
+ outro('Done nothing left your machine. (How it works: outlier --help)');
570
700
 
571
701
  if (typeof finalReceipt !== 'undefined' && finalReceipt) {
572
702
  console.log(finalReceipt);
703
+
704
+ // --save: write a plain-text (no color) copy of the receipt next to the repo.
705
+ if (process.argv.includes('--save')) {
706
+ const stripAnsi = (s: string) => s.replace(/\x1b\[[0-9;]*m/g, '');
707
+ const savePath = join(process.cwd(), 'outlier-audit.txt');
708
+ try {
709
+ writeFileSync(savePath, stripAnsi(finalReceipt).trimStart() + '\n');
710
+ console.log(pc.dim(`\n 💾 Saved to ${savePath}`));
711
+ } catch {}
712
+ }
573
713
  }
574
714
 
575
715
  if (action === 'status') {
@@ -586,13 +726,20 @@ Artifact: ${pc.cyan(reportPath)}`,
586
726
  }
587
727
  console.log('');
588
728
  console.log(
589
- pc.bold(pc.cyan(' Research: ')) + 'Contribute to the AI deskilling study ➔ ' + pc.bold('outlier participate')
729
+ pc.bold(pc.green(' 📸 Share: ')) + 'Screenshot this receipt, or post your score ➔ ' +
730
+ pc.underline('https://x.com/intent/tweet?text=I+just+audited+my+codebase+with+%23Outlier')
590
731
  );
591
732
  console.log(
592
- pc.bold(pc.green(' Share: ')) + pc.underline('https://x.com/intent/tweet?text=I+just+audited+my+codebase+with+%23Outlier')
733
+ pc.bold(pc.cyan(' 🔬 Research: ')) + 'Help the AI-deskilling study — type: ' + pc.bold('outlier participate')
734
+ );
735
+ if (!process.argv.includes('--save')) {
736
+ console.log(pc.dim(' 💾 Save: outlier status --save'));
737
+ }
738
+ console.log(
739
+ pc.dim('\n outlier does more than this audit — see how you adopt AI, what it')
593
740
  );
594
741
  console.log(
595
- pc.dim('\n (To see all local governance modules, run: ') + pc.dim(pc.bold('outlier --help')) + pc.dim(')')
742
+ pc.dim(' costs, and what is actually working: ') + pc.bold(pc.cyan('outlier --help'))
596
743
  );
597
744
  }
598
745
  }
@@ -0,0 +1,69 @@
1
+ // Offline, model-aware emissions engine.
2
+ //
3
+ // Local-first means NO network: no Electricity Maps / WattTime API calls. We bundle
4
+ // the coefficients and look them up. This is the same approach CodeCarbon uses for its
5
+ // offline tracker. Two inputs:
6
+ // 1. per-model energy (kWh per 1M output tokens) — output tokens dominate inference cost
7
+ // 2. grid carbon intensity (gCO2 per kWh) for the assumed region
8
+ //
9
+ // All numbers are estimates with wide uncertainty (inference energy varies ~4-20x in the
10
+ // literature). We expose the method so the UI can label provenance honestly. We never
11
+ // claim precision we don't have.
12
+
13
+ // Energy per 1M OUTPUT tokens, by model class (kWh). Anchor: the paper measured ~10 kWh
14
+ // across 15.1M output tokens on Opus-class (~0.66). Smaller/faster models use materially
15
+ // less. These are order-of-magnitude class estimates, not vendor figures.
16
+ const MODEL_ENERGY_KWH_PER_M_OUTPUT: Record<string, number> = {
17
+ 'opus': 0.66, // large frontier (Claude Opus, GPT-4 class)
18
+ 'sonnet': 0.30, // mid (Claude Sonnet, GPT-4o)
19
+ 'haiku': 0.10, // small/fast (Claude Haiku, GPT-4o-mini)
20
+ 'gpt-4': 0.55,
21
+ 'gpt-4o': 0.30,
22
+ 'gpt-5': 0.45,
23
+ 'gemini': 0.35, // Gemini Pro class
24
+ 'flash': 0.10, // Gemini Flash class
25
+ 'local': 0.50, // self-hosted / unknown open weights
26
+ 'default': 0.45, // unknown model -> conservative mid
27
+ };
28
+
29
+ // Map a raw model id (e.g. "claude-opus-4-8", "gpt-4o-mini", "gemini-2.5-flash") to a class.
30
+ export function modelClass(modelId: string): string {
31
+ const m = (modelId || '').toLowerCase();
32
+ if (m.includes('opus')) return 'opus';
33
+ if (m.includes('sonnet')) return 'sonnet';
34
+ if (m.includes('haiku')) return 'haiku';
35
+ if (m.includes('flash') || m.includes('mini')) return 'haiku';
36
+ if (m.includes('gpt-5')) return 'gpt-5';
37
+ if (m.includes('gpt-4o')) return 'gpt-4o';
38
+ if (m.includes('gpt-4')) return 'gpt-4';
39
+ if (m.includes('gemini')) return 'gemini';
40
+ if (m.includes('llama') || m.includes('qwen') || m.includes('mistral') || m.includes('local')) return 'local';
41
+ return 'default';
42
+ }
43
+
44
+ export function energyKwhForModel(modelId: string, outputTokens: number): number {
45
+ const cls = modelClass(modelId);
46
+ const coeff = MODEL_ENERGY_KWH_PER_M_OUTPUT[cls] ?? 0.45;
47
+ return (outputTokens / 1_000_000) * coeff;
48
+ }
49
+
50
+ // Sum energy across a per-model output-token breakdown. Falls back to 'default' when the
51
+ // model is unknown. Returns total kWh.
52
+ export function energyKwhByModel(outputByModel: Record<string, number>): number {
53
+ let kwh = 0;
54
+ for (const [model, out] of Object.entries(outputByModel)) {
55
+ kwh += energyKwhForModel(model, out);
56
+ }
57
+ return kwh;
58
+ }
59
+
60
+ export interface EmissionsResult {
61
+ energyKwh: number;
62
+ co2Kg: number;
63
+ gridFactor: number;
64
+ method: string; // human-readable provenance for the UI
65
+ }
66
+
67
+ export function co2FromEnergy(energyKwh: number, gridFactorGPerKwh: number): number {
68
+ return (energyKwh * gridFactorGPerKwh) / 1000; // kg
69
+ }
package/src/sources.ts ADDED
@@ -0,0 +1,110 @@
1
+ // Source Detector — the foundation for being tool-agnostic.
2
+ //
3
+ // outlier reads whatever AI telemetry the developer's tools already leave on disk, then
4
+ // uses the richest source per metric and labels its provenance. This keeps us local-first
5
+ // (we never call a tool's API — we read the local trace it writes) and lets us add new
6
+ // tools without changing the receipt.
7
+ //
8
+ // Provenance ladder (per metric): MEASURED > ESTIMATED > PROXY > NONE.
9
+
10
+ import { homedir } from 'os';
11
+ import { join } from 'path';
12
+ import { existsSync } from 'fs';
13
+ import { execSync } from 'child_process';
14
+
15
+ export type Provenance = 'measured' | 'estimated' | 'proxy' | 'none';
16
+
17
+ export interface DetectedSources {
18
+ tools: string[]; // tools/CLIs found on this machine
19
+ tokenSource: { name: string; provenance: Provenance };
20
+ carbonSource: { name: string; provenance: Provenance };
21
+ capabilitySource: { name: string; provenance: Provenance };
22
+ }
23
+
24
+ const HOME = homedir();
25
+
26
+ function hasCli(cmd: string): boolean {
27
+ try {
28
+ // `command -v` is POSIX and does not execute the target.
29
+ execSync(`command -v ${cmd}`, { stdio: 'ignore' });
30
+ return true;
31
+ } catch {
32
+ return false;
33
+ }
34
+ }
35
+
36
+ function hasPath(p: string): boolean {
37
+ try { return existsSync(p); } catch { return false; }
38
+ }
39
+
40
+ // Fingerprint the local environment. Cheap checks only (no file reads here).
41
+ export function detectSources(cwd: string = process.cwd()): DetectedSources {
42
+ const tools: string[] = [];
43
+ const add = (t: string) => { if (!tools.includes(t)) tools.push(t); };
44
+
45
+ // AI coding agents (CLI on PATH or a config dir)
46
+ const cliTools: Record<string, string> = {
47
+ claude: 'claude', cursor: 'cursor', aider: 'aider', gemini: 'gemini',
48
+ opencode: 'opencode', cody: 'cody', continue: 'continue', codex: 'codex',
49
+ };
50
+ for (const [name, cmd] of Object.entries(cliTools)) {
51
+ if (hasCli(cmd)) add(name);
52
+ }
53
+ for (const [name, dir] of Object.entries({
54
+ claude: '.claude', cursor: '.cursor', gemini: '.gemini',
55
+ codeium: '.codeium', continue: '.continue', aider: '.aider.conf.yml',
56
+ })) {
57
+ if (hasPath(join(HOME, dir))) add(name);
58
+ }
59
+
60
+ // Carbon/cost tooling that writes local data we can trust
61
+ if (hasCli('codecarbon')) add('codecarbon');
62
+ if (hasCli('ccusage')) add('ccusage');
63
+
64
+ // ---- Token / cost source (richest first) ----
65
+ const slug = cwd.replace(/\//g, '-');
66
+ const claudeProjectDir = join(HOME, '.claude', 'projects', slug);
67
+ const tokenomicsLog = join(HOME, '.claude', 'tokenomics-log.jsonl');
68
+ let tokenSource: DetectedSources['tokenSource'];
69
+ if (hasPath(tokenomicsLog)) {
70
+ // Custom Stop hook: carries a real cost_usd field -> measured cost.
71
+ tokenSource = { name: 'caveman tokenomics log', provenance: 'measured' };
72
+ } else if (hasPath(claudeProjectDir)) {
73
+ // Standard transcripts: real tokens, estimated cost.
74
+ tokenSource = { name: 'Claude Code transcripts', provenance: 'estimated' };
75
+ } else if (tools.includes('ccusage')) {
76
+ tokenSource = { name: 'ccusage', provenance: 'estimated' };
77
+ } else {
78
+ tokenSource = { name: 'none', provenance: 'none' };
79
+ }
80
+
81
+ // ---- Carbon source ----
82
+ // Baseline is our bundled offline model+grid ESTIMATE. CodeCarbon, when it has actually
83
+ // written an emissions.csv, is a higher-accuracy MEASURED path (parser wired in a later
84
+ // pass). We do not claim "measured" just because the CLI is installed.
85
+ let carbonSource: DetectedSources['carbonSource'];
86
+ const codecarbonData = hasPath(join(cwd, 'emissions.csv')) || hasPath(join(HOME, '.codecarbon', 'emissions.csv'));
87
+ if (codecarbonData) {
88
+ carbonSource = { name: 'CodeCarbon emissions.csv', provenance: 'measured' };
89
+ } else if (tokenSource.provenance !== 'none') {
90
+ carbonSource = { name: 'model+grid estimate', provenance: 'estimated' };
91
+ } else {
92
+ carbonSource = { name: 'none', provenance: 'none' };
93
+ }
94
+
95
+ // ---- Capability source ----
96
+ let capabilitySource: DetectedSources['capabilitySource'];
97
+ if (hasPath(join(HOME, '.claude', 'settings.json')) || hasPath(join(cwd, 'AGENTS.md')) || hasPath(join(cwd, '.mcp.json'))) {
98
+ capabilitySource = { name: 'local config (settings/AGENTS/MCP)', provenance: 'measured' };
99
+ } else {
100
+ capabilitySource = { name: 'none', provenance: 'none' };
101
+ }
102
+
103
+ return { tools, tokenSource, carbonSource, capabilitySource };
104
+ }
105
+
106
+ // Short label for the receipt, e.g. "measured · caveman tokenomics log".
107
+ export function provLabel(s: { name: string; provenance: Provenance }): string {
108
+ if (s.provenance === 'none') return 'no local data';
109
+ return `${s.provenance} · ${s.name}`;
110
+ }