@wundam/orchex 1.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/LICENSE +65 -0
  2. package/README.md +332 -0
  3. package/bin/orchex.js +2 -0
  4. package/dist/artifacts.d.ts +132 -0
  5. package/dist/artifacts.js +832 -0
  6. package/dist/claude-executor.d.ts +31 -0
  7. package/dist/claude-executor.js +200 -0
  8. package/dist/commands.d.ts +36 -0
  9. package/dist/commands.js +264 -0
  10. package/dist/config.d.ts +100 -0
  11. package/dist/config.js +172 -0
  12. package/dist/context-builder.d.ts +46 -0
  13. package/dist/context-builder.js +506 -0
  14. package/dist/cost.d.ts +29 -0
  15. package/dist/cost.js +60 -0
  16. package/dist/execution-broadcaster.d.ts +18 -0
  17. package/dist/execution-broadcaster.js +17 -0
  18. package/dist/executors/base.d.ts +99 -0
  19. package/dist/executors/base.js +206 -0
  20. package/dist/executors/circuit-breaker.d.ts +36 -0
  21. package/dist/executors/circuit-breaker.js +109 -0
  22. package/dist/executors/deepseek-executor.d.ts +22 -0
  23. package/dist/executors/deepseek-executor.js +145 -0
  24. package/dist/executors/gemini-executor.d.ts +20 -0
  25. package/dist/executors/gemini-executor.js +176 -0
  26. package/dist/executors/index.d.ts +81 -0
  27. package/dist/executors/index.js +193 -0
  28. package/dist/executors/ollama-executor.d.ts +25 -0
  29. package/dist/executors/ollama-executor.js +184 -0
  30. package/dist/executors/openai-executor.d.ts +22 -0
  31. package/dist/executors/openai-executor.js +142 -0
  32. package/dist/index.d.ts +1 -0
  33. package/dist/index.js +115 -0
  34. package/dist/intelligence/anti-pattern-detector.d.ts +117 -0
  35. package/dist/intelligence/anti-pattern-detector.js +327 -0
  36. package/dist/intelligence/budget-enforcer.d.ts +119 -0
  37. package/dist/intelligence/budget-enforcer.js +226 -0
  38. package/dist/intelligence/context-optimizer.d.ts +111 -0
  39. package/dist/intelligence/context-optimizer.js +282 -0
  40. package/dist/intelligence/cost-tracker.d.ts +114 -0
  41. package/dist/intelligence/cost-tracker.js +183 -0
  42. package/dist/intelligence/deliverable-extractor.d.ts +134 -0
  43. package/dist/intelligence/deliverable-extractor.js +909 -0
  44. package/dist/intelligence/dependency-inferrer.d.ts +87 -0
  45. package/dist/intelligence/dependency-inferrer.js +403 -0
  46. package/dist/intelligence/diagnostics.d.ts +25 -0
  47. package/dist/intelligence/diagnostics.js +36 -0
  48. package/dist/intelligence/error-analyzer.d.ts +7 -0
  49. package/dist/intelligence/error-analyzer.js +76 -0
  50. package/dist/intelligence/file-chunker.d.ts +15 -0
  51. package/dist/intelligence/file-chunker.js +64 -0
  52. package/dist/intelligence/fix-stream-manager.d.ts +59 -0
  53. package/dist/intelligence/fix-stream-manager.js +212 -0
  54. package/dist/intelligence/heuristics.d.ts +23 -0
  55. package/dist/intelligence/heuristics.js +124 -0
  56. package/dist/intelligence/learning-engine.d.ts +157 -0
  57. package/dist/intelligence/learning-engine.js +433 -0
  58. package/dist/intelligence/learning-feedback.d.ts +96 -0
  59. package/dist/intelligence/learning-feedback.js +202 -0
  60. package/dist/intelligence/pattern-analyzer.d.ts +35 -0
  61. package/dist/intelligence/pattern-analyzer.js +189 -0
  62. package/dist/intelligence/plan-parser.d.ts +124 -0
  63. package/dist/intelligence/plan-parser.js +498 -0
  64. package/dist/intelligence/planner.d.ts +29 -0
  65. package/dist/intelligence/planner.js +86 -0
  66. package/dist/intelligence/self-healer.d.ts +16 -0
  67. package/dist/intelligence/self-healer.js +84 -0
  68. package/dist/intelligence/slicing-metrics.d.ts +62 -0
  69. package/dist/intelligence/slicing-metrics.js +202 -0
  70. package/dist/intelligence/slicing-templates.d.ts +81 -0
  71. package/dist/intelligence/slicing-templates.js +420 -0
  72. package/dist/intelligence/split-suggester.d.ts +69 -0
  73. package/dist/intelligence/split-suggester.js +176 -0
  74. package/dist/intelligence/stream-generator.d.ts +90 -0
  75. package/dist/intelligence/stream-generator.js +452 -0
  76. package/dist/logger.d.ts +34 -0
  77. package/dist/logger.js +83 -0
  78. package/dist/logging.d.ts +5 -0
  79. package/dist/logging.js +38 -0
  80. package/dist/manifest.d.ts +56 -0
  81. package/dist/manifest.js +254 -0
  82. package/dist/metrics.d.ts +35 -0
  83. package/dist/metrics.js +75 -0
  84. package/dist/orchestrator.d.ts +35 -0
  85. package/dist/orchestrator.js +723 -0
  86. package/dist/ownership.d.ts +44 -0
  87. package/dist/ownership.js +250 -0
  88. package/dist/semaphore.d.ts +12 -0
  89. package/dist/semaphore.js +34 -0
  90. package/dist/telemetry/telemetry-types.d.ts +85 -0
  91. package/dist/telemetry/telemetry-types.js +1 -0
  92. package/dist/tier-gating.d.ts +24 -0
  93. package/dist/tier-gating.js +88 -0
  94. package/dist/tiers.d.ts +92 -0
  95. package/dist/tiers.js +108 -0
  96. package/dist/tools.d.ts +18 -0
  97. package/dist/tools.js +1363 -0
  98. package/dist/types.d.ts +740 -0
  99. package/dist/types.js +160 -0
  100. package/dist/utils/ownership-validator.d.ts +6 -0
  101. package/dist/utils/ownership-validator.js +21 -0
  102. package/dist/waves.d.ts +21 -0
  103. package/dist/waves.js +146 -0
  104. package/package.json +120 -0
@@ -0,0 +1,111 @@
1
+ export interface TokenBudget {
2
+ totalTokens: number;
3
+ projectContext: number;
4
+ streamContext: number;
5
+ dependencyContext: number;
6
+ instructions: number;
7
+ }
8
+ export interface ContextOptimization {
9
+ includedFiles: string[];
10
+ excludedFiles: string[];
11
+ tokenBudget: TokenBudget;
12
+ cachingHints: CachingHint[];
13
+ estimatedSavings: number;
14
+ warnings?: string[];
15
+ metrics?: OptimizationMetrics;
16
+ }
17
+ export interface CachingHint {
18
+ content: string;
19
+ type: 'system_prompt' | 'project_context' | 'stream_context';
20
+ reusable: boolean;
21
+ estimatedTokens: number;
22
+ }
23
+ export interface OptimizationMetrics {
24
+ totalFiles: number;
25
+ includedFiles: number;
26
+ excludedFiles: number;
27
+ originalTokens: number;
28
+ optimizedTokens: number;
29
+ compressionRatio: number;
30
+ }
31
+ /**
32
+ * Default token budgets by model.
33
+ * Based on Anthropic's context window sizes.
34
+ */
35
+ export declare const MODEL_TOKEN_LIMITS: Record<string, number>;
36
+ /**
37
+ * Analyze import graph and return files actually needed for a stream.
38
+ * Prunes files that are not transitively imported.
39
+ *
40
+ * Uses breadth-first search to traverse the import graph starting from owned files.
41
+ * Detects circular dependencies and includes them in warnings.
42
+ *
43
+ * Algorithm:
44
+ * - BFS from owned files, following imports
45
+ * - Track the import chain (path from root) for each discovered file
46
+ * - Detect cycles when we find an import to an already-visited file
47
+ */
48
+ export declare function pruneUnusedFiles(ownedFiles: string[], readFiles: string[], fileContents: Record<string, string>): {
49
+ needed: string[];
50
+ pruned: string[];
51
+ warnings?: string[];
52
+ };
53
+ /**
54
+ * Allocate token budget across context layers with intelligent prioritization.
55
+ *
56
+ * Priority: instructions > stream context > dependency context > project context
57
+ *
58
+ * Strategy:
59
+ * - Instructions: Fixed 15% or 1000 tokens (whichever is smaller) for task description
60
+ * - Stream context: Up to 50% of remaining, based on actual file sizes
61
+ * - Dependency context: Up to 30% of remaining, ~300 tokens per artifact
62
+ * - Project context: Remaining budget for file tree and metadata
63
+ *
64
+ * When file contents are provided, uses actual token estimates for more accurate allocation.
65
+ */
66
+ export declare function allocateTokenBudget(totalTokens: number, streamFiles: string[], dependencyArtifacts: number, fileContents?: Record<string, string>): TokenBudget;
67
+ /**
68
+ * Generate caching hints for Anthropic's prompt caching.
69
+ *
70
+ * Prompt caching can reduce costs by up to 90% for repeated context.
71
+ * See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
72
+ *
73
+ * Caching strategy:
74
+ * - System prompts: Highly reusable across all streams
75
+ * - Project context: Reusable within an orchestration run
76
+ * - Stream context: Reusable across retries of the same stream
77
+ *
78
+ * Only generates hints for content exceeding minimum thresholds to avoid
79
+ * cache overhead on small contexts.
80
+ */
81
+ export declare function generateCachingHints(projectContext: string, streamContext: string, instructions: string): CachingHint[];
82
+ /**
83
+ * Estimate token count for a string using content-aware heuristics.
84
+ *
85
+ * Different content types have different token densities:
86
+ * - Code: ~3.5 chars/token (dense, lots of symbols)
87
+ * - Comments: ~4.5 chars/token (prose, more natural language)
88
+ * - Whitespace: ~6.0 chars/token (sparse)
89
+ * - JSON: ~3.8 chars/token (structured data)
90
+ * - Markdown: ~4.2 chars/token (formatted prose)
91
+ *
92
+ * This provides better accuracy than simple char/4 estimation.
93
+ */
94
+ export declare function estimateTokens(text: string): number;
95
+ /**
96
+ * Full context optimization for a stream.
97
+ *
98
+ * This is the main entry point for context optimization. It:
99
+ * 1. Prunes unused files based on import graph analysis
100
+ * 2. Allocates token budget across context layers
101
+ * 3. Generates prompt caching hints for cost optimization
102
+ * 4. Calculates optimization metrics and savings estimates
103
+ *
104
+ * @param ownedFiles - Files this stream owns (will always be included)
105
+ * @param readFiles - Files this stream needs to read (will always be included)
106
+ * @param fileContents - Map of file paths to their contents
107
+ * @param dependencyArtifacts - Number of dependency artifacts to include
108
+ * @param model - Claude model name for token limit lookup
109
+ * @returns Complete optimization analysis with included/excluded files, budgets, and hints
110
+ */
111
+ export declare function optimizeContext(ownedFiles: string[], readFiles: string[], fileContents: Record<string, string>, dependencyArtifacts: number, model?: string): ContextOptimization;
@@ -0,0 +1,282 @@
1
+ import { extractImports, resolveImportPath } from './heuristics.js';
2
+ /**
3
+ * Default token budgets by model.
4
+ * Based on Anthropic's context window sizes.
5
+ */
6
+ export const MODEL_TOKEN_LIMITS = {
7
+ 'claude-sonnet-4-20250514': 8192,
8
+ 'claude-3-5-sonnet-20241022': 8192,
9
+ 'claude-3-opus-20240229': 4096,
10
+ default: 8192,
11
+ };
12
+ /**
13
+ * Token estimation constants based on empirical analysis.
14
+ * Different content types have different token densities.
15
+ */
16
+ const TOKEN_DENSITY = {
17
+ CODE: 3.5, // chars per token for code
18
+ COMMENTS: 4.5, // chars per token for comments/prose
19
+ WHITESPACE: 6.0, // chars per token for whitespace
20
+ JSON: 3.8, // chars per token for JSON
21
+ MARKDOWN: 4.2, // chars per token for markdown
22
+ };
23
+ /**
24
+ * Analyze import graph and return files actually needed for a stream.
25
+ * Prunes files that are not transitively imported.
26
+ *
27
+ * Uses breadth-first search to traverse the import graph starting from owned files.
28
+ * Detects circular dependencies and includes them in warnings.
29
+ *
30
+ * Algorithm:
31
+ * - BFS from owned files, following imports
32
+ * - Track the import chain (path from root) for each discovered file
33
+ * - Detect cycles when we find an import to an already-visited file
34
+ */
35
+ export function pruneUnusedFiles(ownedFiles, readFiles, fileContents) {
36
+ const needed = new Set([...ownedFiles]);
37
+ const visited = new Set();
38
+ const queue = [...ownedFiles];
39
+ const warnings = [];
40
+ const importChain = new Map(); // Track import path for cycle reporting
41
+ // Initialize chains for owned files (they're the roots)
42
+ for (const file of ownedFiles) {
43
+ importChain.set(file, []);
44
+ }
45
+ // BFS through import graph
46
+ while (queue.length > 0) {
47
+ const file = queue.shift();
48
+ if (visited.has(file))
49
+ continue; // Already processed
50
+ visited.add(file);
51
+ const content = fileContents[file];
52
+ if (!content)
53
+ continue;
54
+ const imports = extractImports(content);
55
+ for (const imp of imports) {
56
+ const resolved = resolveImportPath(file, imp);
57
+ if (resolved && fileContents[resolved]) {
58
+ if (visited.has(resolved)) {
59
+ // CYCLE DETECTED: importing a file we've already fully processed
60
+ const currentChain = importChain.get(file) || [];
61
+ warnings.push(`Circular dependency detected: ${[...currentChain, file, resolved].join(' → ')}`);
62
+ }
63
+ else if (!needed.has(resolved)) {
64
+ // New file discovered - add to queue
65
+ needed.add(resolved);
66
+ queue.push(resolved);
67
+ // Track import chain for cycle reporting
68
+ const currentChain = importChain.get(file) || [];
69
+ importChain.set(resolved, [...currentChain, file]);
70
+ }
71
+ }
72
+ }
73
+ }
74
+ // Add explicitly requested read files
75
+ for (const f of readFiles) {
76
+ needed.add(f);
77
+ }
78
+ const pruned = Object.keys(fileContents).filter(f => !needed.has(f));
79
+ return {
80
+ needed: [...needed],
81
+ pruned,
82
+ warnings: warnings.length > 0 ? warnings : undefined
83
+ };
84
+ }
85
+ /**
86
+ * Allocate token budget across context layers with intelligent prioritization.
87
+ *
88
+ * Priority: instructions > stream context > dependency context > project context
89
+ *
90
+ * Strategy:
91
+ * - Instructions: Fixed 15% or 1000 tokens (whichever is smaller) for task description
92
+ * - Stream context: Up to 50% of remaining, based on actual file sizes
93
+ * - Dependency context: Up to 30% of remaining, ~300 tokens per artifact
94
+ * - Project context: Remaining budget for file tree and metadata
95
+ *
96
+ * When file contents are provided, uses actual token estimates for more accurate allocation.
97
+ */
98
+ export function allocateTokenBudget(totalTokens, streamFiles, dependencyArtifacts, fileContents) {
99
+ // Reserve fixed amounts for essential parts
100
+ const instructions = Math.min(1000, totalTokens * 0.15); // 15% or 1000 max
101
+ const remaining = totalTokens - instructions;
102
+ // If we have file contents, use actual sizes; otherwise use estimates
103
+ let streamContext;
104
+ if (fileContents) {
105
+ const actualTokens = streamFiles.reduce((sum, file) => {
106
+ const content = fileContents[file] || '';
107
+ return sum + estimateTokens(content);
108
+ }, 0);
109
+ streamContext = Math.min(remaining * 0.5, actualTokens);
110
+ }
111
+ else {
112
+ streamContext = Math.min(remaining * 0.5, streamFiles.length * 500); // ~500 tokens per file estimate
113
+ }
114
+ const afterStream = remaining - streamContext;
115
+ // Dependency context: ~300 tokens per artifact
116
+ const dependencyContext = Math.min(afterStream * 0.3, dependencyArtifacts * 300);
117
+ const afterDeps = afterStream - dependencyContext;
118
+ // Project context gets the rest
119
+ const projectContext = afterDeps;
120
+ return {
121
+ totalTokens,
122
+ projectContext: Math.floor(projectContext),
123
+ streamContext: Math.floor(streamContext),
124
+ dependencyContext: Math.floor(dependencyContext),
125
+ instructions: Math.floor(instructions),
126
+ };
127
+ }
128
+ /**
129
+ * Generate caching hints for Anthropic's prompt caching.
130
+ *
131
+ * Prompt caching can reduce costs by up to 90% for repeated context.
132
+ * See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
133
+ *
134
+ * Caching strategy:
135
+ * - System prompts: Highly reusable across all streams
136
+ * - Project context: Reusable within an orchestration run
137
+ * - Stream context: Reusable across retries of the same stream
138
+ *
139
+ * Only generates hints for content exceeding minimum thresholds to avoid
140
+ * cache overhead on small contexts.
141
+ */
142
+ export function generateCachingHints(projectContext, streamContext, instructions) {
143
+ const hints = [];
144
+ // System prompt / instructions are highly reusable
145
+ if (instructions.length > 100) {
146
+ hints.push({
147
+ content: instructions,
148
+ type: 'system_prompt',
149
+ reusable: true,
150
+ estimatedTokens: estimateTokens(instructions),
151
+ });
152
+ }
153
+ // Project context (file tree, deps) is reusable within an orchestration
154
+ if (projectContext.length > 500) {
155
+ hints.push({
156
+ content: projectContext,
157
+ type: 'project_context',
158
+ reusable: true,
159
+ estimatedTokens: estimateTokens(projectContext),
160
+ });
161
+ }
162
+ // Stream context is reusable across retries of the same stream
163
+ if (streamContext.length > 500) {
164
+ hints.push({
165
+ content: streamContext,
166
+ type: 'stream_context',
167
+ reusable: true,
168
+ estimatedTokens: estimateTokens(streamContext),
169
+ });
170
+ }
171
+ return hints;
172
+ }
173
+ /**
174
+ * Estimate token count for a string using content-aware heuristics.
175
+ *
176
+ * Different content types have different token densities:
177
+ * - Code: ~3.5 chars/token (dense, lots of symbols)
178
+ * - Comments: ~4.5 chars/token (prose, more natural language)
179
+ * - Whitespace: ~6.0 chars/token (sparse)
180
+ * - JSON: ~3.8 chars/token (structured data)
181
+ * - Markdown: ~4.2 chars/token (formatted prose)
182
+ *
183
+ * This provides better accuracy than simple char/4 estimation.
184
+ */
185
+ export function estimateTokens(text) {
186
+ if (!text || text.length === 0)
187
+ return 0;
188
+ // Detect file type from content patterns
189
+ const isJson = text.trim().startsWith('{') || text.trim().startsWith('[');
190
+ const isMarkdown = /^#+\s|^-\s|^\*\s|^\d+\.\s/m.test(text);
191
+ // For JSON and Markdown, use specialized densities
192
+ if (isJson) {
193
+ return Math.ceil(text.length / TOKEN_DENSITY.JSON);
194
+ }
195
+ if (isMarkdown) {
196
+ return Math.ceil(text.length / TOKEN_DENSITY.MARKDOWN);
197
+ }
198
+ // For code, analyze line-by-line
199
+ const lines = text.split('\n');
200
+ let totalChars = 0;
201
+ let codeChars = 0;
202
+ let commentChars = 0;
203
+ let whitespaceChars = 0;
204
+ for (const line of lines) {
205
+ const trimmed = line.trim();
206
+ totalChars += line.length;
207
+ // Whitespace-only lines
208
+ if (trimmed.length === 0) {
209
+ whitespaceChars += line.length;
210
+ continue;
211
+ }
212
+ // Detect code vs comments (rough heuristic)
213
+ if (trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*') || trimmed.startsWith('#')) {
214
+ commentChars += line.length;
215
+ }
216
+ else {
217
+ codeChars += line.length;
218
+ // Account for indentation as whitespace
219
+ const indent = line.length - trimmed.length;
220
+ whitespaceChars += indent;
221
+ codeChars -= indent;
222
+ }
223
+ }
224
+ // Calculate tokens by content type
225
+ const codeTokens = Math.ceil(codeChars / TOKEN_DENSITY.CODE);
226
+ const commentTokens = Math.ceil(commentChars / TOKEN_DENSITY.COMMENTS);
227
+ const whitespaceTokens = Math.ceil(whitespaceChars / TOKEN_DENSITY.WHITESPACE);
228
+ const otherTokens = Math.ceil((totalChars - codeChars - commentChars - whitespaceChars) / TOKEN_DENSITY.COMMENTS);
229
+ return codeTokens + commentTokens + whitespaceTokens + otherTokens;
230
+ }
231
+ /**
232
+ * Full context optimization for a stream.
233
+ *
234
+ * This is the main entry point for context optimization. It:
235
+ * 1. Prunes unused files based on import graph analysis
236
+ * 2. Allocates token budget across context layers
237
+ * 3. Generates prompt caching hints for cost optimization
238
+ * 4. Calculates optimization metrics and savings estimates
239
+ *
240
+ * @param ownedFiles - Files this stream owns (will always be included)
241
+ * @param readFiles - Files this stream needs to read (will always be included)
242
+ * @param fileContents - Map of file paths to their contents
243
+ * @param dependencyArtifacts - Number of dependency artifacts to include
244
+ * @param model - Claude model name for token limit lookup
245
+ * @returns Complete optimization analysis with included/excluded files, budgets, and hints
246
+ */
247
+ export function optimizeContext(ownedFiles, readFiles, fileContents, dependencyArtifacts, model = 'default') {
248
+ const totalTokens = MODEL_TOKEN_LIMITS[model] ?? MODEL_TOKEN_LIMITS.default;
249
+ // Prune unused files
250
+ const { needed, pruned, warnings } = pruneUnusedFiles(ownedFiles, readFiles, fileContents);
251
+ // Build context strings
252
+ const streamContext = needed.map(f => fileContents[f] || '').join('\n');
253
+ const projectContext = `Project files: ${Object.keys(fileContents).length}\nIncluded: ${needed.length}`;
254
+ const instructions = `You are implementing stream changes. Output an orchex-artifact block.`;
255
+ // Allocate budget using actual file contents
256
+ const tokenBudget = allocateTokenBudget(totalTokens, needed, dependencyArtifacts, fileContents);
257
+ // Generate caching hints
258
+ const cachingHints = generateCachingHints(projectContext, streamContext, instructions);
259
+ // Calculate optimization metrics
260
+ const originalTokens = estimateTokens(Object.values(fileContents).join('\n'));
261
+ const optimizedTokens = estimateTokens(streamContext);
262
+ const estimatedSavings = originalTokens > 0
263
+ ? Math.round((1 - optimizedTokens / originalTokens) * 100)
264
+ : 0;
265
+ const metrics = {
266
+ totalFiles: Object.keys(fileContents).length,
267
+ includedFiles: needed.length,
268
+ excludedFiles: pruned.length,
269
+ originalTokens,
270
+ optimizedTokens,
271
+ compressionRatio: originalTokens > 0 ? optimizedTokens / originalTokens : 0,
272
+ };
273
+ return {
274
+ includedFiles: needed,
275
+ excludedFiles: pruned,
276
+ tokenBudget,
277
+ cachingHints,
278
+ estimatedSavings,
279
+ warnings,
280
+ metrics,
281
+ };
282
+ }
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Token cost tracking and estimation for Orchex Learn.
3
+ * Provides cost estimation and aggregation for multi-provider token usage.
4
+ */
5
+ import type { TelemetryEvent } from '../telemetry/telemetry-types.js';
6
+ /**
7
+ * Token costs per 1000 tokens (in USD).
8
+ * Prices as of February 2026 - should be updated periodically.
9
+ *
10
+ * Reference (per 1M tokens):
11
+ * - DeepSeek V3.2: $0.28 / $0.42 (~95% cheaper than Claude)
12
+ * - Gemini 2.5 Pro: $1.25 / $10.00 (~60% cheaper than Claude)
13
+ * - Claude Sonnet: $3.00 / $15.00 (baseline)
14
+ * - OpenAI GPT-4o: $5.00 / $15.00 (~40% more than Claude)
15
+ * - Claude Opus: $15.00 / $75.00 (5x Claude Sonnet)
16
+ */
17
+ export declare const TOKEN_COSTS: Record<string, {
18
+ input: number;
19
+ output: number;
20
+ }>;
21
+ /**
22
+ * Cache discount rate for Anthropic prompt caching.
23
+ * Cached tokens cost 90% less than regular input tokens.
24
+ */
25
+ export declare const CACHE_DISCOUNT_RATE = 0.9;
26
+ /**
27
+ * Cost estimate for a single execution.
28
+ */
29
+ export interface CostEstimate {
30
+ /** Number of input tokens */
31
+ inputTokens: number;
32
+ /** Number of output tokens */
33
+ outputTokens: number;
34
+ /** Cost for input tokens (USD) */
35
+ inputCost: number;
36
+ /** Cost for output tokens (USD) */
37
+ outputCost: number;
38
+ /** Total cost before cache savings (USD) */
39
+ totalCost: number;
40
+ /** Tokens read from cache (if applicable) */
41
+ cacheHitTokens?: number;
42
+ /** Estimated savings from cache (USD) */
43
+ cacheDiscount?: number;
44
+ /** Final cost after cache discount (USD) */
45
+ finalCost: number;
46
+ /** Model used for this estimate */
47
+ model: string;
48
+ }
49
+ /**
50
+ * Aggregated cost summary across multiple executions.
51
+ */
52
+ export interface CostSummary {
53
+ /** Total cost across all executions (USD) */
54
+ totalCost: number;
55
+ /** Total input tokens */
56
+ totalInputTokens: number;
57
+ /** Total output tokens */
58
+ totalOutputTokens: number;
59
+ /** Total cache hit tokens */
60
+ totalCacheHitTokens: number;
61
+ /** Total savings from cache (USD) */
62
+ totalCacheSavings: number;
63
+ /** Final cost after cache discounts (USD) */
64
+ finalCost: number;
65
+ /** Cost breakdown by provider */
66
+ byProvider: Record<string, number>;
67
+ /** Cost breakdown by model */
68
+ byModel: Record<string, number>;
69
+ /** Number of executions */
70
+ executionCount: number;
71
+ /** Average cost per execution (USD) */
72
+ avgCostPerExecution: number;
73
+ }
74
+ /**
75
+ * Get token costs for a model, falling back to default if unknown.
76
+ */
77
+ export declare function getModelCosts(model: string): {
78
+ input: number;
79
+ output: number;
80
+ };
81
+ /**
82
+ * Estimate cost for a single execution.
83
+ *
84
+ * @param inputTokens - Number of input tokens
85
+ * @param outputTokens - Number of output tokens
86
+ * @param model - Model identifier
87
+ * @param cacheHitTokens - Optional tokens read from cache
88
+ * @returns Cost estimate with breakdown
89
+ */
90
+ export declare function estimateCost(inputTokens: number, outputTokens: number, model: string, cacheHitTokens?: number): CostEstimate;
91
+ /**
92
+ * Aggregate costs from multiple telemetry events.
93
+ *
94
+ * @param events - Array of telemetry events with token data
95
+ * @returns Aggregated cost summary
96
+ */
97
+ export declare function aggregateCosts(events: TelemetryEvent[]): CostSummary;
98
+ /**
99
+ * Format a cost value for display.
100
+ *
101
+ * @param cost - Cost in USD
102
+ * @returns Formatted string like "$0.0123" or "<$0.01"
103
+ */
104
+ export declare function formatCost(cost: number): string;
105
+ /**
106
+ * Estimate cost for a planned execution based on token estimates.
107
+ * Useful for budget warnings before execution.
108
+ *
109
+ * @param estimatedInputTokens - Estimated input tokens
110
+ * @param model - Model to use
111
+ * @param estimatedOutputRatio - Expected output/input ratio (default 0.3)
112
+ * @returns Cost estimate
113
+ */
114
+ export declare function estimatePlannedCost(estimatedInputTokens: number, model: string, estimatedOutputRatio?: number): CostEstimate;
@@ -0,0 +1,183 @@
1
+ /**
2
+ * Token cost tracking and estimation for Orchex Learn.
3
+ * Provides cost estimation and aggregation for multi-provider token usage.
4
+ */
5
+ /**
6
+ * Token costs per 1000 tokens (in USD).
7
+ * Prices as of February 2026 - should be updated periodically.
8
+ *
9
+ * Reference (per 1M tokens):
10
+ * - DeepSeek V3.2: $0.28 / $0.42 (~95% cheaper than Claude)
11
+ * - Gemini 2.5 Pro: $1.25 / $10.00 (~60% cheaper than Claude)
12
+ * - Claude Sonnet: $3.00 / $15.00 (baseline)
13
+ * - OpenAI GPT-4o: $5.00 / $15.00 (~40% more than Claude)
14
+ * - Claude Opus: $15.00 / $75.00 (5x Claude Sonnet)
15
+ */
16
+ export const TOKEN_COSTS = {
17
+ // Anthropic models (Feb 2026)
18
+ 'claude-opus-4-5-20251101': { input: 0.015, output: 0.075 },
19
+ 'claude-sonnet-4-5-20250929': { input: 0.003, output: 0.015 },
20
+ 'claude-sonnet-4-20250514': { input: 0.003, output: 0.015 },
21
+ 'claude-3-5-sonnet-20241022': { input: 0.003, output: 0.015 },
22
+ 'claude-3-opus-20240229': { input: 0.015, output: 0.075 },
23
+ 'claude-3-haiku-20240307': { input: 0.00025, output: 0.00125 },
24
+ // OpenAI models (Feb 2026)
25
+ 'gpt-4.5-turbo': { input: 0.005, output: 0.015 },
26
+ 'gpt-4-turbo': { input: 0.01, output: 0.03 },
27
+ 'gpt-4-turbo-preview': { input: 0.01, output: 0.03 },
28
+ 'gpt-4o': { input: 0.005, output: 0.015 },
29
+ 'gpt-4o-mini': { input: 0.00015, output: 0.0006 },
30
+ 'o1-preview': { input: 0.015, output: 0.06 },
31
+ 'o1-mini': { input: 0.003, output: 0.012 },
32
+ 'o3-mini': { input: 0.0011, output: 0.0044 },
33
+ 'gpt-3.5-turbo': { input: 0.0005, output: 0.0015 },
34
+ // Google models (Feb 2026)
35
+ 'gemini-2.5-pro': { input: 0.00125, output: 0.01 },
36
+ 'gemini-2.0-flash': { input: 0.0001, output: 0.0004 },
37
+ 'gemini-1.5-pro': { input: 0.00125, output: 0.005 },
38
+ 'gemini-1.5-flash': { input: 0.000075, output: 0.0003 },
39
+ 'gemini-pro': { input: 0.000125, output: 0.000375 },
40
+ // DeepSeek models (Feb 2026) — ~95% cheaper than Claude!
41
+ 'deepseek-chat': { input: 0.00028, output: 0.00042 },
42
+ 'deepseek-coder': { input: 0.00028, output: 0.00042 },
43
+ 'deepseek-reasoner': { input: 0.00055, output: 0.00219 },
44
+ // Ollama/Local (free but track for comparison)
45
+ 'llama3.3:70b': { input: 0, output: 0 },
46
+ 'llama3.2:latest': { input: 0, output: 0 },
47
+ 'mistral:latest': { input: 0, output: 0 },
48
+ // Default fallback (conservative estimate)
49
+ default: { input: 0.003, output: 0.015 },
50
+ };
51
+ /**
52
+ * Cache discount rate for Anthropic prompt caching.
53
+ * Cached tokens cost 90% less than regular input tokens.
54
+ */
55
+ export const CACHE_DISCOUNT_RATE = 0.9;
56
+ /**
57
+ * Get token costs for a model, falling back to default if unknown.
58
+ */
59
+ export function getModelCosts(model) {
60
+ // Try exact match first
61
+ if (TOKEN_COSTS[model]) {
62
+ return TOKEN_COSTS[model];
63
+ }
64
+ // Try to match by prefix (handles versioned model names)
65
+ const modelLower = model.toLowerCase();
66
+ for (const [key, costs] of Object.entries(TOKEN_COSTS)) {
67
+ if (key !== 'default' && modelLower.includes(key.split('-').slice(0, 2).join('-'))) {
68
+ return costs;
69
+ }
70
+ }
71
+ return TOKEN_COSTS.default;
72
+ }
73
+ /**
74
+ * Estimate cost for a single execution.
75
+ *
76
+ * @param inputTokens - Number of input tokens
77
+ * @param outputTokens - Number of output tokens
78
+ * @param model - Model identifier
79
+ * @param cacheHitTokens - Optional tokens read from cache
80
+ * @returns Cost estimate with breakdown
81
+ */
82
+ export function estimateCost(inputTokens, outputTokens, model, cacheHitTokens) {
83
+ const costs = getModelCosts(model);
84
+ // Calculate base costs (per 1000 tokens)
85
+ const inputCost = (inputTokens / 1000) * costs.input;
86
+ const outputCost = (outputTokens / 1000) * costs.output;
87
+ const totalCost = inputCost + outputCost;
88
+ // Calculate cache discount
89
+ let cacheDiscount;
90
+ if (cacheHitTokens && cacheHitTokens > 0) {
91
+ // Cache hits save CACHE_DISCOUNT_RATE of the input cost for those tokens
92
+ cacheDiscount = (cacheHitTokens / 1000) * costs.input * CACHE_DISCOUNT_RATE;
93
+ }
94
+ const finalCost = totalCost - (cacheDiscount ?? 0);
95
+ return {
96
+ inputTokens,
97
+ outputTokens,
98
+ inputCost,
99
+ outputCost,
100
+ totalCost,
101
+ cacheHitTokens,
102
+ cacheDiscount,
103
+ finalCost,
104
+ model,
105
+ };
106
+ }
107
+ /**
108
+ * Aggregate costs from multiple telemetry events.
109
+ *
110
+ * @param events - Array of telemetry events with token data
111
+ * @returns Aggregated cost summary
112
+ */
113
+ export function aggregateCosts(events) {
114
+ const byProvider = {};
115
+ const byModel = {};
116
+ let totalInputTokens = 0;
117
+ let totalOutputTokens = 0;
118
+ let totalCacheHitTokens = 0;
119
+ let totalCost = 0;
120
+ let totalCacheSavings = 0;
121
+ let executionCount = 0;
122
+ for (const event of events) {
123
+ // Skip events without token data
124
+ if (!event.tokensInput && !event.tokensOutput) {
125
+ continue;
126
+ }
127
+ const inputTokens = event.tokensInput ?? 0;
128
+ const outputTokens = event.tokensOutput ?? 0;
129
+ const cacheHitTokens = event.cacheReadTokens ?? 0;
130
+ const model = event.model ?? 'default';
131
+ const provider = event.provider ?? 'unknown';
132
+ const estimate = estimateCost(inputTokens, outputTokens, model, cacheHitTokens);
133
+ totalInputTokens += inputTokens;
134
+ totalOutputTokens += outputTokens;
135
+ totalCacheHitTokens += cacheHitTokens;
136
+ totalCost += estimate.totalCost;
137
+ totalCacheSavings += estimate.cacheDiscount ?? 0;
138
+ executionCount++;
139
+ // Aggregate by provider
140
+ byProvider[provider] = (byProvider[provider] ?? 0) + estimate.finalCost;
141
+ // Aggregate by model
142
+ byModel[model] = (byModel[model] ?? 0) + estimate.finalCost;
143
+ }
144
+ const finalCost = totalCost - totalCacheSavings;
145
+ return {
146
+ totalCost,
147
+ totalInputTokens,
148
+ totalOutputTokens,
149
+ totalCacheHitTokens,
150
+ totalCacheSavings,
151
+ finalCost,
152
+ byProvider,
153
+ byModel,
154
+ executionCount,
155
+ avgCostPerExecution: executionCount > 0 ? finalCost / executionCount : 0,
156
+ };
157
+ }
158
+ /**
159
+ * Format a cost value for display.
160
+ *
161
+ * @param cost - Cost in USD
162
+ * @returns Formatted string like "$0.0123" or "<$0.01"
163
+ */
164
+ export function formatCost(cost) {
165
+ if (cost === 0)
166
+ return '$0.00';
167
+ if (cost < 0.01)
168
+ return '<$0.01';
169
+ return `$${cost.toFixed(4)}`;
170
+ }
171
+ /**
172
+ * Estimate cost for a planned execution based on token estimates.
173
+ * Useful for budget warnings before execution.
174
+ *
175
+ * @param estimatedInputTokens - Estimated input tokens
176
+ * @param model - Model to use
177
+ * @param estimatedOutputRatio - Expected output/input ratio (default 0.3)
178
+ * @returns Cost estimate
179
+ */
180
+ export function estimatePlannedCost(estimatedInputTokens, model, estimatedOutputRatio = 0.3) {
181
+ const estimatedOutputTokens = Math.ceil(estimatedInputTokens * estimatedOutputRatio);
182
+ return estimateCost(estimatedInputTokens, estimatedOutputTokens, model);
183
+ }