@wundam/orchex 1.0.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +65 -0
- package/README.md +332 -0
- package/bin/orchex.js +2 -0
- package/dist/artifacts.d.ts +132 -0
- package/dist/artifacts.js +832 -0
- package/dist/claude-executor.d.ts +31 -0
- package/dist/claude-executor.js +200 -0
- package/dist/commands.d.ts +36 -0
- package/dist/commands.js +264 -0
- package/dist/config.d.ts +100 -0
- package/dist/config.js +172 -0
- package/dist/context-builder.d.ts +46 -0
- package/dist/context-builder.js +506 -0
- package/dist/cost.d.ts +29 -0
- package/dist/cost.js +60 -0
- package/dist/execution-broadcaster.d.ts +18 -0
- package/dist/execution-broadcaster.js +17 -0
- package/dist/executors/base.d.ts +99 -0
- package/dist/executors/base.js +206 -0
- package/dist/executors/circuit-breaker.d.ts +36 -0
- package/dist/executors/circuit-breaker.js +109 -0
- package/dist/executors/deepseek-executor.d.ts +22 -0
- package/dist/executors/deepseek-executor.js +145 -0
- package/dist/executors/gemini-executor.d.ts +20 -0
- package/dist/executors/gemini-executor.js +176 -0
- package/dist/executors/index.d.ts +81 -0
- package/dist/executors/index.js +193 -0
- package/dist/executors/ollama-executor.d.ts +25 -0
- package/dist/executors/ollama-executor.js +184 -0
- package/dist/executors/openai-executor.d.ts +22 -0
- package/dist/executors/openai-executor.js +142 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +115 -0
- package/dist/intelligence/anti-pattern-detector.d.ts +117 -0
- package/dist/intelligence/anti-pattern-detector.js +327 -0
- package/dist/intelligence/budget-enforcer.d.ts +119 -0
- package/dist/intelligence/budget-enforcer.js +226 -0
- package/dist/intelligence/context-optimizer.d.ts +111 -0
- package/dist/intelligence/context-optimizer.js +282 -0
- package/dist/intelligence/cost-tracker.d.ts +114 -0
- package/dist/intelligence/cost-tracker.js +183 -0
- package/dist/intelligence/deliverable-extractor.d.ts +134 -0
- package/dist/intelligence/deliverable-extractor.js +909 -0
- package/dist/intelligence/dependency-inferrer.d.ts +87 -0
- package/dist/intelligence/dependency-inferrer.js +403 -0
- package/dist/intelligence/diagnostics.d.ts +25 -0
- package/dist/intelligence/diagnostics.js +36 -0
- package/dist/intelligence/error-analyzer.d.ts +7 -0
- package/dist/intelligence/error-analyzer.js +76 -0
- package/dist/intelligence/file-chunker.d.ts +15 -0
- package/dist/intelligence/file-chunker.js +64 -0
- package/dist/intelligence/fix-stream-manager.d.ts +59 -0
- package/dist/intelligence/fix-stream-manager.js +212 -0
- package/dist/intelligence/heuristics.d.ts +23 -0
- package/dist/intelligence/heuristics.js +124 -0
- package/dist/intelligence/learning-engine.d.ts +157 -0
- package/dist/intelligence/learning-engine.js +433 -0
- package/dist/intelligence/learning-feedback.d.ts +96 -0
- package/dist/intelligence/learning-feedback.js +202 -0
- package/dist/intelligence/pattern-analyzer.d.ts +35 -0
- package/dist/intelligence/pattern-analyzer.js +189 -0
- package/dist/intelligence/plan-parser.d.ts +124 -0
- package/dist/intelligence/plan-parser.js +498 -0
- package/dist/intelligence/planner.d.ts +29 -0
- package/dist/intelligence/planner.js +86 -0
- package/dist/intelligence/self-healer.d.ts +16 -0
- package/dist/intelligence/self-healer.js +84 -0
- package/dist/intelligence/slicing-metrics.d.ts +62 -0
- package/dist/intelligence/slicing-metrics.js +202 -0
- package/dist/intelligence/slicing-templates.d.ts +81 -0
- package/dist/intelligence/slicing-templates.js +420 -0
- package/dist/intelligence/split-suggester.d.ts +69 -0
- package/dist/intelligence/split-suggester.js +176 -0
- package/dist/intelligence/stream-generator.d.ts +90 -0
- package/dist/intelligence/stream-generator.js +452 -0
- package/dist/logger.d.ts +34 -0
- package/dist/logger.js +83 -0
- package/dist/logging.d.ts +5 -0
- package/dist/logging.js +38 -0
- package/dist/manifest.d.ts +56 -0
- package/dist/manifest.js +254 -0
- package/dist/metrics.d.ts +35 -0
- package/dist/metrics.js +75 -0
- package/dist/orchestrator.d.ts +35 -0
- package/dist/orchestrator.js +723 -0
- package/dist/ownership.d.ts +44 -0
- package/dist/ownership.js +250 -0
- package/dist/semaphore.d.ts +12 -0
- package/dist/semaphore.js +34 -0
- package/dist/telemetry/telemetry-types.d.ts +85 -0
- package/dist/telemetry/telemetry-types.js +1 -0
- package/dist/tier-gating.d.ts +24 -0
- package/dist/tier-gating.js +88 -0
- package/dist/tiers.d.ts +92 -0
- package/dist/tiers.js +108 -0
- package/dist/tools.d.ts +18 -0
- package/dist/tools.js +1363 -0
- package/dist/types.d.ts +740 -0
- package/dist/types.js +160 -0
- package/dist/utils/ownership-validator.d.ts +6 -0
- package/dist/utils/ownership-validator.js +21 -0
- package/dist/waves.d.ts +21 -0
- package/dist/waves.js +146 -0
- package/package.json +120 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
export interface TokenBudget {
|
|
2
|
+
totalTokens: number;
|
|
3
|
+
projectContext: number;
|
|
4
|
+
streamContext: number;
|
|
5
|
+
dependencyContext: number;
|
|
6
|
+
instructions: number;
|
|
7
|
+
}
|
|
8
|
+
export interface ContextOptimization {
|
|
9
|
+
includedFiles: string[];
|
|
10
|
+
excludedFiles: string[];
|
|
11
|
+
tokenBudget: TokenBudget;
|
|
12
|
+
cachingHints: CachingHint[];
|
|
13
|
+
estimatedSavings: number;
|
|
14
|
+
warnings?: string[];
|
|
15
|
+
metrics?: OptimizationMetrics;
|
|
16
|
+
}
|
|
17
|
+
export interface CachingHint {
|
|
18
|
+
content: string;
|
|
19
|
+
type: 'system_prompt' | 'project_context' | 'stream_context';
|
|
20
|
+
reusable: boolean;
|
|
21
|
+
estimatedTokens: number;
|
|
22
|
+
}
|
|
23
|
+
export interface OptimizationMetrics {
|
|
24
|
+
totalFiles: number;
|
|
25
|
+
includedFiles: number;
|
|
26
|
+
excludedFiles: number;
|
|
27
|
+
originalTokens: number;
|
|
28
|
+
optimizedTokens: number;
|
|
29
|
+
compressionRatio: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Default token budgets by model.
|
|
33
|
+
* Based on Anthropic's context window sizes.
|
|
34
|
+
*/
|
|
35
|
+
export declare const MODEL_TOKEN_LIMITS: Record<string, number>;
|
|
36
|
+
/**
|
|
37
|
+
* Analyze import graph and return files actually needed for a stream.
|
|
38
|
+
* Prunes files that are not transitively imported.
|
|
39
|
+
*
|
|
40
|
+
* Uses breadth-first search to traverse the import graph starting from owned files.
|
|
41
|
+
* Detects circular dependencies and includes them in warnings.
|
|
42
|
+
*
|
|
43
|
+
* Algorithm:
|
|
44
|
+
* - BFS from owned files, following imports
|
|
45
|
+
* - Track the import chain (path from root) for each discovered file
|
|
46
|
+
* - Detect cycles when we find an import to an already-visited file
|
|
47
|
+
*/
|
|
48
|
+
export declare function pruneUnusedFiles(ownedFiles: string[], readFiles: string[], fileContents: Record<string, string>): {
|
|
49
|
+
needed: string[];
|
|
50
|
+
pruned: string[];
|
|
51
|
+
warnings?: string[];
|
|
52
|
+
};
|
|
53
|
+
/**
|
|
54
|
+
* Allocate token budget across context layers with intelligent prioritization.
|
|
55
|
+
*
|
|
56
|
+
* Priority: instructions > stream context > dependency context > project context
|
|
57
|
+
*
|
|
58
|
+
* Strategy:
|
|
59
|
+
* - Instructions: Fixed 15% or 1000 tokens (whichever is smaller) for task description
|
|
60
|
+
* - Stream context: Up to 50% of remaining, based on actual file sizes
|
|
61
|
+
* - Dependency context: Up to 30% of remaining, ~300 tokens per artifact
|
|
62
|
+
* - Project context: Remaining budget for file tree and metadata
|
|
63
|
+
*
|
|
64
|
+
* When file contents are provided, uses actual token estimates for more accurate allocation.
|
|
65
|
+
*/
|
|
66
|
+
export declare function allocateTokenBudget(totalTokens: number, streamFiles: string[], dependencyArtifacts: number, fileContents?: Record<string, string>): TokenBudget;
|
|
67
|
+
/**
|
|
68
|
+
* Generate caching hints for Anthropic's prompt caching.
|
|
69
|
+
*
|
|
70
|
+
* Prompt caching can reduce costs by up to 90% for repeated context.
|
|
71
|
+
* See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
|
72
|
+
*
|
|
73
|
+
* Caching strategy:
|
|
74
|
+
* - System prompts: Highly reusable across all streams
|
|
75
|
+
* - Project context: Reusable within an orchestration run
|
|
76
|
+
* - Stream context: Reusable across retries of the same stream
|
|
77
|
+
*
|
|
78
|
+
* Only generates hints for content exceeding minimum thresholds to avoid
|
|
79
|
+
* cache overhead on small contexts.
|
|
80
|
+
*/
|
|
81
|
+
export declare function generateCachingHints(projectContext: string, streamContext: string, instructions: string): CachingHint[];
|
|
82
|
+
/**
|
|
83
|
+
* Estimate token count for a string using content-aware heuristics.
|
|
84
|
+
*
|
|
85
|
+
* Different content types have different token densities:
|
|
86
|
+
* - Code: ~3.5 chars/token (dense, lots of symbols)
|
|
87
|
+
* - Comments: ~4.5 chars/token (prose, more natural language)
|
|
88
|
+
* - Whitespace: ~6.0 chars/token (sparse)
|
|
89
|
+
* - JSON: ~3.8 chars/token (structured data)
|
|
90
|
+
* - Markdown: ~4.2 chars/token (formatted prose)
|
|
91
|
+
*
|
|
92
|
+
* This provides better accuracy than simple char/4 estimation.
|
|
93
|
+
*/
|
|
94
|
+
export declare function estimateTokens(text: string): number;
|
|
95
|
+
/**
|
|
96
|
+
* Full context optimization for a stream.
|
|
97
|
+
*
|
|
98
|
+
* This is the main entry point for context optimization. It:
|
|
99
|
+
* 1. Prunes unused files based on import graph analysis
|
|
100
|
+
* 2. Allocates token budget across context layers
|
|
101
|
+
* 3. Generates prompt caching hints for cost optimization
|
|
102
|
+
* 4. Calculates optimization metrics and savings estimates
|
|
103
|
+
*
|
|
104
|
+
* @param ownedFiles - Files this stream owns (will always be included)
|
|
105
|
+
* @param readFiles - Files this stream needs to read (will always be included)
|
|
106
|
+
* @param fileContents - Map of file paths to their contents
|
|
107
|
+
* @param dependencyArtifacts - Number of dependency artifacts to include
|
|
108
|
+
* @param model - Claude model name for token limit lookup
|
|
109
|
+
* @returns Complete optimization analysis with included/excluded files, budgets, and hints
|
|
110
|
+
*/
|
|
111
|
+
export declare function optimizeContext(ownedFiles: string[], readFiles: string[], fileContents: Record<string, string>, dependencyArtifacts: number, model?: string): ContextOptimization;
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import { extractImports, resolveImportPath } from './heuristics.js';
|
|
2
|
+
/**
|
|
3
|
+
* Default token budgets by model.
|
|
4
|
+
* Based on Anthropic's context window sizes.
|
|
5
|
+
*/
|
|
6
|
+
export const MODEL_TOKEN_LIMITS = {
|
|
7
|
+
'claude-sonnet-4-20250514': 8192,
|
|
8
|
+
'claude-3-5-sonnet-20241022': 8192,
|
|
9
|
+
'claude-3-opus-20240229': 4096,
|
|
10
|
+
default: 8192,
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Token estimation constants based on empirical analysis.
|
|
14
|
+
* Different content types have different token densities.
|
|
15
|
+
*/
|
|
16
|
+
const TOKEN_DENSITY = {
|
|
17
|
+
CODE: 3.5, // chars per token for code
|
|
18
|
+
COMMENTS: 4.5, // chars per token for comments/prose
|
|
19
|
+
WHITESPACE: 6.0, // chars per token for whitespace
|
|
20
|
+
JSON: 3.8, // chars per token for JSON
|
|
21
|
+
MARKDOWN: 4.2, // chars per token for markdown
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* Analyze import graph and return files actually needed for a stream.
|
|
25
|
+
* Prunes files that are not transitively imported.
|
|
26
|
+
*
|
|
27
|
+
* Uses breadth-first search to traverse the import graph starting from owned files.
|
|
28
|
+
* Detects circular dependencies and includes them in warnings.
|
|
29
|
+
*
|
|
30
|
+
* Algorithm:
|
|
31
|
+
* - BFS from owned files, following imports
|
|
32
|
+
* - Track the import chain (path from root) for each discovered file
|
|
33
|
+
* - Detect cycles when we find an import to an already-visited file
|
|
34
|
+
*/
|
|
35
|
+
export function pruneUnusedFiles(ownedFiles, readFiles, fileContents) {
|
|
36
|
+
const needed = new Set([...ownedFiles]);
|
|
37
|
+
const visited = new Set();
|
|
38
|
+
const queue = [...ownedFiles];
|
|
39
|
+
const warnings = [];
|
|
40
|
+
const importChain = new Map(); // Track import path for cycle reporting
|
|
41
|
+
// Initialize chains for owned files (they're the roots)
|
|
42
|
+
for (const file of ownedFiles) {
|
|
43
|
+
importChain.set(file, []);
|
|
44
|
+
}
|
|
45
|
+
// BFS through import graph
|
|
46
|
+
while (queue.length > 0) {
|
|
47
|
+
const file = queue.shift();
|
|
48
|
+
if (visited.has(file))
|
|
49
|
+
continue; // Already processed
|
|
50
|
+
visited.add(file);
|
|
51
|
+
const content = fileContents[file];
|
|
52
|
+
if (!content)
|
|
53
|
+
continue;
|
|
54
|
+
const imports = extractImports(content);
|
|
55
|
+
for (const imp of imports) {
|
|
56
|
+
const resolved = resolveImportPath(file, imp);
|
|
57
|
+
if (resolved && fileContents[resolved]) {
|
|
58
|
+
if (visited.has(resolved)) {
|
|
59
|
+
// CYCLE DETECTED: importing a file we've already fully processed
|
|
60
|
+
const currentChain = importChain.get(file) || [];
|
|
61
|
+
warnings.push(`Circular dependency detected: ${[...currentChain, file, resolved].join(' → ')}`);
|
|
62
|
+
}
|
|
63
|
+
else if (!needed.has(resolved)) {
|
|
64
|
+
// New file discovered - add to queue
|
|
65
|
+
needed.add(resolved);
|
|
66
|
+
queue.push(resolved);
|
|
67
|
+
// Track import chain for cycle reporting
|
|
68
|
+
const currentChain = importChain.get(file) || [];
|
|
69
|
+
importChain.set(resolved, [...currentChain, file]);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Add explicitly requested read files
|
|
75
|
+
for (const f of readFiles) {
|
|
76
|
+
needed.add(f);
|
|
77
|
+
}
|
|
78
|
+
const pruned = Object.keys(fileContents).filter(f => !needed.has(f));
|
|
79
|
+
return {
|
|
80
|
+
needed: [...needed],
|
|
81
|
+
pruned,
|
|
82
|
+
warnings: warnings.length > 0 ? warnings : undefined
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Allocate token budget across context layers with intelligent prioritization.
|
|
87
|
+
*
|
|
88
|
+
* Priority: instructions > stream context > dependency context > project context
|
|
89
|
+
*
|
|
90
|
+
* Strategy:
|
|
91
|
+
* - Instructions: Fixed 15% or 1000 tokens (whichever is smaller) for task description
|
|
92
|
+
* - Stream context: Up to 50% of remaining, based on actual file sizes
|
|
93
|
+
* - Dependency context: Up to 30% of remaining, ~300 tokens per artifact
|
|
94
|
+
* - Project context: Remaining budget for file tree and metadata
|
|
95
|
+
*
|
|
96
|
+
* When file contents are provided, uses actual token estimates for more accurate allocation.
|
|
97
|
+
*/
|
|
98
|
+
export function allocateTokenBudget(totalTokens, streamFiles, dependencyArtifacts, fileContents) {
|
|
99
|
+
// Reserve fixed amounts for essential parts
|
|
100
|
+
const instructions = Math.min(1000, totalTokens * 0.15); // 15% or 1000 max
|
|
101
|
+
const remaining = totalTokens - instructions;
|
|
102
|
+
// If we have file contents, use actual sizes; otherwise use estimates
|
|
103
|
+
let streamContext;
|
|
104
|
+
if (fileContents) {
|
|
105
|
+
const actualTokens = streamFiles.reduce((sum, file) => {
|
|
106
|
+
const content = fileContents[file] || '';
|
|
107
|
+
return sum + estimateTokens(content);
|
|
108
|
+
}, 0);
|
|
109
|
+
streamContext = Math.min(remaining * 0.5, actualTokens);
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
streamContext = Math.min(remaining * 0.5, streamFiles.length * 500); // ~500 tokens per file estimate
|
|
113
|
+
}
|
|
114
|
+
const afterStream = remaining - streamContext;
|
|
115
|
+
// Dependency context: ~300 tokens per artifact
|
|
116
|
+
const dependencyContext = Math.min(afterStream * 0.3, dependencyArtifacts * 300);
|
|
117
|
+
const afterDeps = afterStream - dependencyContext;
|
|
118
|
+
// Project context gets the rest
|
|
119
|
+
const projectContext = afterDeps;
|
|
120
|
+
return {
|
|
121
|
+
totalTokens,
|
|
122
|
+
projectContext: Math.floor(projectContext),
|
|
123
|
+
streamContext: Math.floor(streamContext),
|
|
124
|
+
dependencyContext: Math.floor(dependencyContext),
|
|
125
|
+
instructions: Math.floor(instructions),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Generate caching hints for Anthropic's prompt caching.
|
|
130
|
+
*
|
|
131
|
+
* Prompt caching can reduce costs by up to 90% for repeated context.
|
|
132
|
+
* See: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
|
133
|
+
*
|
|
134
|
+
* Caching strategy:
|
|
135
|
+
* - System prompts: Highly reusable across all streams
|
|
136
|
+
* - Project context: Reusable within an orchestration run
|
|
137
|
+
* - Stream context: Reusable across retries of the same stream
|
|
138
|
+
*
|
|
139
|
+
* Only generates hints for content exceeding minimum thresholds to avoid
|
|
140
|
+
* cache overhead on small contexts.
|
|
141
|
+
*/
|
|
142
|
+
export function generateCachingHints(projectContext, streamContext, instructions) {
|
|
143
|
+
const hints = [];
|
|
144
|
+
// System prompt / instructions are highly reusable
|
|
145
|
+
if (instructions.length > 100) {
|
|
146
|
+
hints.push({
|
|
147
|
+
content: instructions,
|
|
148
|
+
type: 'system_prompt',
|
|
149
|
+
reusable: true,
|
|
150
|
+
estimatedTokens: estimateTokens(instructions),
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
// Project context (file tree, deps) is reusable within an orchestration
|
|
154
|
+
if (projectContext.length > 500) {
|
|
155
|
+
hints.push({
|
|
156
|
+
content: projectContext,
|
|
157
|
+
type: 'project_context',
|
|
158
|
+
reusable: true,
|
|
159
|
+
estimatedTokens: estimateTokens(projectContext),
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
// Stream context is reusable across retries of the same stream
|
|
163
|
+
if (streamContext.length > 500) {
|
|
164
|
+
hints.push({
|
|
165
|
+
content: streamContext,
|
|
166
|
+
type: 'stream_context',
|
|
167
|
+
reusable: true,
|
|
168
|
+
estimatedTokens: estimateTokens(streamContext),
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
return hints;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Estimate token count for a string using content-aware heuristics.
|
|
175
|
+
*
|
|
176
|
+
* Different content types have different token densities:
|
|
177
|
+
* - Code: ~3.5 chars/token (dense, lots of symbols)
|
|
178
|
+
* - Comments: ~4.5 chars/token (prose, more natural language)
|
|
179
|
+
* - Whitespace: ~6.0 chars/token (sparse)
|
|
180
|
+
* - JSON: ~3.8 chars/token (structured data)
|
|
181
|
+
* - Markdown: ~4.2 chars/token (formatted prose)
|
|
182
|
+
*
|
|
183
|
+
* This provides better accuracy than simple char/4 estimation.
|
|
184
|
+
*/
|
|
185
|
+
export function estimateTokens(text) {
|
|
186
|
+
if (!text || text.length === 0)
|
|
187
|
+
return 0;
|
|
188
|
+
// Detect file type from content patterns
|
|
189
|
+
const isJson = text.trim().startsWith('{') || text.trim().startsWith('[');
|
|
190
|
+
const isMarkdown = /^#+\s|^-\s|^\*\s|^\d+\.\s/m.test(text);
|
|
191
|
+
// For JSON and Markdown, use specialized densities
|
|
192
|
+
if (isJson) {
|
|
193
|
+
return Math.ceil(text.length / TOKEN_DENSITY.JSON);
|
|
194
|
+
}
|
|
195
|
+
if (isMarkdown) {
|
|
196
|
+
return Math.ceil(text.length / TOKEN_DENSITY.MARKDOWN);
|
|
197
|
+
}
|
|
198
|
+
// For code, analyze line-by-line
|
|
199
|
+
const lines = text.split('\n');
|
|
200
|
+
let totalChars = 0;
|
|
201
|
+
let codeChars = 0;
|
|
202
|
+
let commentChars = 0;
|
|
203
|
+
let whitespaceChars = 0;
|
|
204
|
+
for (const line of lines) {
|
|
205
|
+
const trimmed = line.trim();
|
|
206
|
+
totalChars += line.length;
|
|
207
|
+
// Whitespace-only lines
|
|
208
|
+
if (trimmed.length === 0) {
|
|
209
|
+
whitespaceChars += line.length;
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
// Detect code vs comments (rough heuristic)
|
|
213
|
+
if (trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*') || trimmed.startsWith('#')) {
|
|
214
|
+
commentChars += line.length;
|
|
215
|
+
}
|
|
216
|
+
else {
|
|
217
|
+
codeChars += line.length;
|
|
218
|
+
// Account for indentation as whitespace
|
|
219
|
+
const indent = line.length - trimmed.length;
|
|
220
|
+
whitespaceChars += indent;
|
|
221
|
+
codeChars -= indent;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Calculate tokens by content type
|
|
225
|
+
const codeTokens = Math.ceil(codeChars / TOKEN_DENSITY.CODE);
|
|
226
|
+
const commentTokens = Math.ceil(commentChars / TOKEN_DENSITY.COMMENTS);
|
|
227
|
+
const whitespaceTokens = Math.ceil(whitespaceChars / TOKEN_DENSITY.WHITESPACE);
|
|
228
|
+
const otherTokens = Math.ceil((totalChars - codeChars - commentChars - whitespaceChars) / TOKEN_DENSITY.COMMENTS);
|
|
229
|
+
return codeTokens + commentTokens + whitespaceTokens + otherTokens;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Full context optimization for a stream.
|
|
233
|
+
*
|
|
234
|
+
* This is the main entry point for context optimization. It:
|
|
235
|
+
* 1. Prunes unused files based on import graph analysis
|
|
236
|
+
* 2. Allocates token budget across context layers
|
|
237
|
+
* 3. Generates prompt caching hints for cost optimization
|
|
238
|
+
* 4. Calculates optimization metrics and savings estimates
|
|
239
|
+
*
|
|
240
|
+
* @param ownedFiles - Files this stream owns (will always be included)
|
|
241
|
+
* @param readFiles - Files this stream needs to read (will always be included)
|
|
242
|
+
* @param fileContents - Map of file paths to their contents
|
|
243
|
+
* @param dependencyArtifacts - Number of dependency artifacts to include
|
|
244
|
+
* @param model - Claude model name for token limit lookup
|
|
245
|
+
* @returns Complete optimization analysis with included/excluded files, budgets, and hints
|
|
246
|
+
*/
|
|
247
|
+
export function optimizeContext(ownedFiles, readFiles, fileContents, dependencyArtifacts, model = 'default') {
|
|
248
|
+
const totalTokens = MODEL_TOKEN_LIMITS[model] ?? MODEL_TOKEN_LIMITS.default;
|
|
249
|
+
// Prune unused files
|
|
250
|
+
const { needed, pruned, warnings } = pruneUnusedFiles(ownedFiles, readFiles, fileContents);
|
|
251
|
+
// Build context strings
|
|
252
|
+
const streamContext = needed.map(f => fileContents[f] || '').join('\n');
|
|
253
|
+
const projectContext = `Project files: ${Object.keys(fileContents).length}\nIncluded: ${needed.length}`;
|
|
254
|
+
const instructions = `You are implementing stream changes. Output an orchex-artifact block.`;
|
|
255
|
+
// Allocate budget using actual file contents
|
|
256
|
+
const tokenBudget = allocateTokenBudget(totalTokens, needed, dependencyArtifacts, fileContents);
|
|
257
|
+
// Generate caching hints
|
|
258
|
+
const cachingHints = generateCachingHints(projectContext, streamContext, instructions);
|
|
259
|
+
// Calculate optimization metrics
|
|
260
|
+
const originalTokens = estimateTokens(Object.values(fileContents).join('\n'));
|
|
261
|
+
const optimizedTokens = estimateTokens(streamContext);
|
|
262
|
+
const estimatedSavings = originalTokens > 0
|
|
263
|
+
? Math.round((1 - optimizedTokens / originalTokens) * 100)
|
|
264
|
+
: 0;
|
|
265
|
+
const metrics = {
|
|
266
|
+
totalFiles: Object.keys(fileContents).length,
|
|
267
|
+
includedFiles: needed.length,
|
|
268
|
+
excludedFiles: pruned.length,
|
|
269
|
+
originalTokens,
|
|
270
|
+
optimizedTokens,
|
|
271
|
+
compressionRatio: originalTokens > 0 ? optimizedTokens / originalTokens : 0,
|
|
272
|
+
};
|
|
273
|
+
return {
|
|
274
|
+
includedFiles: needed,
|
|
275
|
+
excludedFiles: pruned,
|
|
276
|
+
tokenBudget,
|
|
277
|
+
cachingHints,
|
|
278
|
+
estimatedSavings,
|
|
279
|
+
warnings,
|
|
280
|
+
metrics,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token cost tracking and estimation for Orchex Learn.
|
|
3
|
+
* Provides cost estimation and aggregation for multi-provider token usage.
|
|
4
|
+
*/
|
|
5
|
+
import type { TelemetryEvent } from '../telemetry/telemetry-types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Token costs per 1000 tokens (in USD).
|
|
8
|
+
* Prices as of February 2026 - should be updated periodically.
|
|
9
|
+
*
|
|
10
|
+
* Reference (per 1M tokens):
|
|
11
|
+
* - DeepSeek V3.2: $0.28 / $0.42 (~95% cheaper than Claude)
|
|
12
|
+
* - Gemini 2.5 Pro: $1.25 / $10.00 (~60% cheaper than Claude)
|
|
13
|
+
* - Claude Sonnet: $3.00 / $15.00 (baseline)
|
|
14
|
+
* - OpenAI GPT-4o: $5.00 / $15.00 (~40% more than Claude)
|
|
15
|
+
* - Claude Opus: $15.00 / $75.00 (5x Claude Sonnet)
|
|
16
|
+
*/
|
|
17
|
+
export declare const TOKEN_COSTS: Record<string, {
|
|
18
|
+
input: number;
|
|
19
|
+
output: number;
|
|
20
|
+
}>;
|
|
21
|
+
/**
|
|
22
|
+
* Cache discount rate for Anthropic prompt caching.
|
|
23
|
+
* Cached tokens cost 90% less than regular input tokens.
|
|
24
|
+
*/
|
|
25
|
+
export declare const CACHE_DISCOUNT_RATE = 0.9;
|
|
26
|
+
/**
|
|
27
|
+
* Cost estimate for a single execution.
|
|
28
|
+
*/
|
|
29
|
+
export interface CostEstimate {
|
|
30
|
+
/** Number of input tokens */
|
|
31
|
+
inputTokens: number;
|
|
32
|
+
/** Number of output tokens */
|
|
33
|
+
outputTokens: number;
|
|
34
|
+
/** Cost for input tokens (USD) */
|
|
35
|
+
inputCost: number;
|
|
36
|
+
/** Cost for output tokens (USD) */
|
|
37
|
+
outputCost: number;
|
|
38
|
+
/** Total cost before cache savings (USD) */
|
|
39
|
+
totalCost: number;
|
|
40
|
+
/** Tokens read from cache (if applicable) */
|
|
41
|
+
cacheHitTokens?: number;
|
|
42
|
+
/** Estimated savings from cache (USD) */
|
|
43
|
+
cacheDiscount?: number;
|
|
44
|
+
/** Final cost after cache discount (USD) */
|
|
45
|
+
finalCost: number;
|
|
46
|
+
/** Model used for this estimate */
|
|
47
|
+
model: string;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Aggregated cost summary across multiple executions.
|
|
51
|
+
*/
|
|
52
|
+
export interface CostSummary {
|
|
53
|
+
/** Total cost across all executions (USD) */
|
|
54
|
+
totalCost: number;
|
|
55
|
+
/** Total input tokens */
|
|
56
|
+
totalInputTokens: number;
|
|
57
|
+
/** Total output tokens */
|
|
58
|
+
totalOutputTokens: number;
|
|
59
|
+
/** Total cache hit tokens */
|
|
60
|
+
totalCacheHitTokens: number;
|
|
61
|
+
/** Total savings from cache (USD) */
|
|
62
|
+
totalCacheSavings: number;
|
|
63
|
+
/** Final cost after cache discounts (USD) */
|
|
64
|
+
finalCost: number;
|
|
65
|
+
/** Cost breakdown by provider */
|
|
66
|
+
byProvider: Record<string, number>;
|
|
67
|
+
/** Cost breakdown by model */
|
|
68
|
+
byModel: Record<string, number>;
|
|
69
|
+
/** Number of executions */
|
|
70
|
+
executionCount: number;
|
|
71
|
+
/** Average cost per execution (USD) */
|
|
72
|
+
avgCostPerExecution: number;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Get token costs for a model, falling back to default if unknown.
|
|
76
|
+
*/
|
|
77
|
+
export declare function getModelCosts(model: string): {
|
|
78
|
+
input: number;
|
|
79
|
+
output: number;
|
|
80
|
+
};
|
|
81
|
+
/**
|
|
82
|
+
* Estimate cost for a single execution.
|
|
83
|
+
*
|
|
84
|
+
* @param inputTokens - Number of input tokens
|
|
85
|
+
* @param outputTokens - Number of output tokens
|
|
86
|
+
* @param model - Model identifier
|
|
87
|
+
* @param cacheHitTokens - Optional tokens read from cache
|
|
88
|
+
* @returns Cost estimate with breakdown
|
|
89
|
+
*/
|
|
90
|
+
export declare function estimateCost(inputTokens: number, outputTokens: number, model: string, cacheHitTokens?: number): CostEstimate;
|
|
91
|
+
/**
|
|
92
|
+
* Aggregate costs from multiple telemetry events.
|
|
93
|
+
*
|
|
94
|
+
* @param events - Array of telemetry events with token data
|
|
95
|
+
* @returns Aggregated cost summary
|
|
96
|
+
*/
|
|
97
|
+
export declare function aggregateCosts(events: TelemetryEvent[]): CostSummary;
|
|
98
|
+
/**
|
|
99
|
+
* Format a cost value for display.
|
|
100
|
+
*
|
|
101
|
+
* @param cost - Cost in USD
|
|
102
|
+
* @returns Formatted string like "$0.0123" or "<$0.01"
|
|
103
|
+
*/
|
|
104
|
+
export declare function formatCost(cost: number): string;
|
|
105
|
+
/**
|
|
106
|
+
* Estimate cost for a planned execution based on token estimates.
|
|
107
|
+
* Useful for budget warnings before execution.
|
|
108
|
+
*
|
|
109
|
+
* @param estimatedInputTokens - Estimated input tokens
|
|
110
|
+
* @param model - Model to use
|
|
111
|
+
* @param estimatedOutputRatio - Expected output/input ratio (default 0.3)
|
|
112
|
+
* @returns Cost estimate
|
|
113
|
+
*/
|
|
114
|
+
export declare function estimatePlannedCost(estimatedInputTokens: number, model: string, estimatedOutputRatio?: number): CostEstimate;
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token cost tracking and estimation for Orchex Learn.
|
|
3
|
+
* Provides cost estimation and aggregation for multi-provider token usage.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Token costs per 1000 tokens (in USD).
|
|
7
|
+
* Prices as of February 2026 - should be updated periodically.
|
|
8
|
+
*
|
|
9
|
+
* Reference (per 1M tokens):
|
|
10
|
+
* - DeepSeek V3.2: $0.28 / $0.42 (~95% cheaper than Claude)
|
|
11
|
+
* - Gemini 2.5 Pro: $1.25 / $10.00 (~60% cheaper than Claude)
|
|
12
|
+
* - Claude Sonnet: $3.00 / $15.00 (baseline)
|
|
13
|
+
* - OpenAI GPT-4o: $5.00 / $15.00 (~40% more than Claude)
|
|
14
|
+
* - Claude Opus: $15.00 / $75.00 (5x Claude Sonnet)
|
|
15
|
+
*/
|
|
16
|
+
export const TOKEN_COSTS = {
|
|
17
|
+
// Anthropic models (Feb 2026)
|
|
18
|
+
'claude-opus-4-5-20251101': { input: 0.015, output: 0.075 },
|
|
19
|
+
'claude-sonnet-4-5-20250929': { input: 0.003, output: 0.015 },
|
|
20
|
+
'claude-sonnet-4-20250514': { input: 0.003, output: 0.015 },
|
|
21
|
+
'claude-3-5-sonnet-20241022': { input: 0.003, output: 0.015 },
|
|
22
|
+
'claude-3-opus-20240229': { input: 0.015, output: 0.075 },
|
|
23
|
+
'claude-3-haiku-20240307': { input: 0.00025, output: 0.00125 },
|
|
24
|
+
// OpenAI models (Feb 2026)
|
|
25
|
+
'gpt-4.5-turbo': { input: 0.005, output: 0.015 },
|
|
26
|
+
'gpt-4-turbo': { input: 0.01, output: 0.03 },
|
|
27
|
+
'gpt-4-turbo-preview': { input: 0.01, output: 0.03 },
|
|
28
|
+
'gpt-4o': { input: 0.005, output: 0.015 },
|
|
29
|
+
'gpt-4o-mini': { input: 0.00015, output: 0.0006 },
|
|
30
|
+
'o1-preview': { input: 0.015, output: 0.06 },
|
|
31
|
+
'o1-mini': { input: 0.003, output: 0.012 },
|
|
32
|
+
'o3-mini': { input: 0.0011, output: 0.0044 },
|
|
33
|
+
'gpt-3.5-turbo': { input: 0.0005, output: 0.0015 },
|
|
34
|
+
// Google models (Feb 2026)
|
|
35
|
+
'gemini-2.5-pro': { input: 0.00125, output: 0.01 },
|
|
36
|
+
'gemini-2.0-flash': { input: 0.0001, output: 0.0004 },
|
|
37
|
+
'gemini-1.5-pro': { input: 0.00125, output: 0.005 },
|
|
38
|
+
'gemini-1.5-flash': { input: 0.000075, output: 0.0003 },
|
|
39
|
+
'gemini-pro': { input: 0.000125, output: 0.000375 },
|
|
40
|
+
// DeepSeek models (Feb 2026) — ~95% cheaper than Claude!
|
|
41
|
+
'deepseek-chat': { input: 0.00028, output: 0.00042 },
|
|
42
|
+
'deepseek-coder': { input: 0.00028, output: 0.00042 },
|
|
43
|
+
'deepseek-reasoner': { input: 0.00055, output: 0.00219 },
|
|
44
|
+
// Ollama/Local (free but track for comparison)
|
|
45
|
+
'llama3.3:70b': { input: 0, output: 0 },
|
|
46
|
+
'llama3.2:latest': { input: 0, output: 0 },
|
|
47
|
+
'mistral:latest': { input: 0, output: 0 },
|
|
48
|
+
// Default fallback (conservative estimate)
|
|
49
|
+
default: { input: 0.003, output: 0.015 },
|
|
50
|
+
};
|
|
51
|
+
/**
|
|
52
|
+
* Cache discount rate for Anthropic prompt caching.
|
|
53
|
+
* Cached tokens cost 90% less than regular input tokens.
|
|
54
|
+
*/
|
|
55
|
+
export const CACHE_DISCOUNT_RATE = 0.9;
|
|
56
|
+
/**
|
|
57
|
+
* Get token costs for a model, falling back to default if unknown.
|
|
58
|
+
*/
|
|
59
|
+
export function getModelCosts(model) {
|
|
60
|
+
// Try exact match first
|
|
61
|
+
if (TOKEN_COSTS[model]) {
|
|
62
|
+
return TOKEN_COSTS[model];
|
|
63
|
+
}
|
|
64
|
+
// Try to match by prefix (handles versioned model names)
|
|
65
|
+
const modelLower = model.toLowerCase();
|
|
66
|
+
for (const [key, costs] of Object.entries(TOKEN_COSTS)) {
|
|
67
|
+
if (key !== 'default' && modelLower.includes(key.split('-').slice(0, 2).join('-'))) {
|
|
68
|
+
return costs;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return TOKEN_COSTS.default;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Estimate cost for a single execution.
|
|
75
|
+
*
|
|
76
|
+
* @param inputTokens - Number of input tokens
|
|
77
|
+
* @param outputTokens - Number of output tokens
|
|
78
|
+
* @param model - Model identifier
|
|
79
|
+
* @param cacheHitTokens - Optional tokens read from cache
|
|
80
|
+
* @returns Cost estimate with breakdown
|
|
81
|
+
*/
|
|
82
|
+
export function estimateCost(inputTokens, outputTokens, model, cacheHitTokens) {
|
|
83
|
+
const costs = getModelCosts(model);
|
|
84
|
+
// Calculate base costs (per 1000 tokens)
|
|
85
|
+
const inputCost = (inputTokens / 1000) * costs.input;
|
|
86
|
+
const outputCost = (outputTokens / 1000) * costs.output;
|
|
87
|
+
const totalCost = inputCost + outputCost;
|
|
88
|
+
// Calculate cache discount
|
|
89
|
+
let cacheDiscount;
|
|
90
|
+
if (cacheHitTokens && cacheHitTokens > 0) {
|
|
91
|
+
// Cache hits save CACHE_DISCOUNT_RATE of the input cost for those tokens
|
|
92
|
+
cacheDiscount = (cacheHitTokens / 1000) * costs.input * CACHE_DISCOUNT_RATE;
|
|
93
|
+
}
|
|
94
|
+
const finalCost = totalCost - (cacheDiscount ?? 0);
|
|
95
|
+
return {
|
|
96
|
+
inputTokens,
|
|
97
|
+
outputTokens,
|
|
98
|
+
inputCost,
|
|
99
|
+
outputCost,
|
|
100
|
+
totalCost,
|
|
101
|
+
cacheHitTokens,
|
|
102
|
+
cacheDiscount,
|
|
103
|
+
finalCost,
|
|
104
|
+
model,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Aggregate costs from multiple telemetry events.
|
|
109
|
+
*
|
|
110
|
+
* @param events - Array of telemetry events with token data
|
|
111
|
+
* @returns Aggregated cost summary
|
|
112
|
+
*/
|
|
113
|
+
export function aggregateCosts(events) {
|
|
114
|
+
const byProvider = {};
|
|
115
|
+
const byModel = {};
|
|
116
|
+
let totalInputTokens = 0;
|
|
117
|
+
let totalOutputTokens = 0;
|
|
118
|
+
let totalCacheHitTokens = 0;
|
|
119
|
+
let totalCost = 0;
|
|
120
|
+
let totalCacheSavings = 0;
|
|
121
|
+
let executionCount = 0;
|
|
122
|
+
for (const event of events) {
|
|
123
|
+
// Skip events without token data
|
|
124
|
+
if (!event.tokensInput && !event.tokensOutput) {
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
const inputTokens = event.tokensInput ?? 0;
|
|
128
|
+
const outputTokens = event.tokensOutput ?? 0;
|
|
129
|
+
const cacheHitTokens = event.cacheReadTokens ?? 0;
|
|
130
|
+
const model = event.model ?? 'default';
|
|
131
|
+
const provider = event.provider ?? 'unknown';
|
|
132
|
+
const estimate = estimateCost(inputTokens, outputTokens, model, cacheHitTokens);
|
|
133
|
+
totalInputTokens += inputTokens;
|
|
134
|
+
totalOutputTokens += outputTokens;
|
|
135
|
+
totalCacheHitTokens += cacheHitTokens;
|
|
136
|
+
totalCost += estimate.totalCost;
|
|
137
|
+
totalCacheSavings += estimate.cacheDiscount ?? 0;
|
|
138
|
+
executionCount++;
|
|
139
|
+
// Aggregate by provider
|
|
140
|
+
byProvider[provider] = (byProvider[provider] ?? 0) + estimate.finalCost;
|
|
141
|
+
// Aggregate by model
|
|
142
|
+
byModel[model] = (byModel[model] ?? 0) + estimate.finalCost;
|
|
143
|
+
}
|
|
144
|
+
const finalCost = totalCost - totalCacheSavings;
|
|
145
|
+
return {
|
|
146
|
+
totalCost,
|
|
147
|
+
totalInputTokens,
|
|
148
|
+
totalOutputTokens,
|
|
149
|
+
totalCacheHitTokens,
|
|
150
|
+
totalCacheSavings,
|
|
151
|
+
finalCost,
|
|
152
|
+
byProvider,
|
|
153
|
+
byModel,
|
|
154
|
+
executionCount,
|
|
155
|
+
avgCostPerExecution: executionCount > 0 ? finalCost / executionCount : 0,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Format a cost value for display.
|
|
160
|
+
*
|
|
161
|
+
* @param cost - Cost in USD
|
|
162
|
+
* @returns Formatted string like "$0.0123" or "<$0.01"
|
|
163
|
+
*/
|
|
164
|
+
export function formatCost(cost) {
|
|
165
|
+
if (cost === 0)
|
|
166
|
+
return '$0.00';
|
|
167
|
+
if (cost < 0.01)
|
|
168
|
+
return '<$0.01';
|
|
169
|
+
return `$${cost.toFixed(4)}`;
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Estimate cost for a planned execution based on token estimates.
|
|
173
|
+
* Useful for budget warnings before execution.
|
|
174
|
+
*
|
|
175
|
+
* @param estimatedInputTokens - Estimated input tokens
|
|
176
|
+
* @param model - Model to use
|
|
177
|
+
* @param estimatedOutputRatio - Expected output/input ratio (default 0.3)
|
|
178
|
+
* @returns Cost estimate
|
|
179
|
+
*/
|
|
180
|
+
export function estimatePlannedCost(estimatedInputTokens, model, estimatedOutputRatio = 0.3) {
|
|
181
|
+
const estimatedOutputTokens = Math.ceil(estimatedInputTokens * estimatedOutputRatio);
|
|
182
|
+
return estimateCost(estimatedInputTokens, estimatedOutputTokens, model);
|
|
183
|
+
}
|