omgkit 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugin/skills/SKILL_STANDARDS.md +743 -0
- package/plugin/skills/databases/mongodb/SKILL.md +797 -28
- package/plugin/skills/databases/postgresql/SKILL.md +494 -18
- package/plugin/skills/databases/prisma/SKILL.md +776 -30
- package/plugin/skills/databases/redis/SKILL.md +885 -25
- package/plugin/skills/devops/aws/SKILL.md +686 -28
- package/plugin/skills/devops/docker/SKILL.md +466 -18
- package/plugin/skills/devops/github-actions/SKILL.md +684 -29
- package/plugin/skills/devops/kubernetes/SKILL.md +621 -24
- package/plugin/skills/frameworks/django/SKILL.md +920 -20
- package/plugin/skills/frameworks/express/SKILL.md +1361 -35
- package/plugin/skills/frameworks/fastapi/SKILL.md +1260 -33
- package/plugin/skills/frameworks/laravel/SKILL.md +1244 -31
- package/plugin/skills/frameworks/nestjs/SKILL.md +1005 -26
- package/plugin/skills/frameworks/nextjs/SKILL.md +407 -44
- package/plugin/skills/frameworks/rails/SKILL.md +594 -28
- package/plugin/skills/frameworks/react/SKILL.md +1006 -32
- package/plugin/skills/frameworks/spring/SKILL.md +528 -35
- package/plugin/skills/frameworks/vue/SKILL.md +1296 -27
- package/plugin/skills/frontend/accessibility/SKILL.md +1108 -34
- package/plugin/skills/frontend/frontend-design/SKILL.md +1304 -26
- package/plugin/skills/frontend/responsive/SKILL.md +847 -21
- package/plugin/skills/frontend/shadcn-ui/SKILL.md +976 -38
- package/plugin/skills/frontend/tailwindcss/SKILL.md +831 -35
- package/plugin/skills/frontend/threejs/SKILL.md +1298 -29
- package/plugin/skills/languages/javascript/SKILL.md +935 -31
- package/plugin/skills/languages/python/SKILL.md +489 -25
- package/plugin/skills/languages/typescript/SKILL.md +379 -30
- package/plugin/skills/methodology/brainstorming/SKILL.md +597 -23
- package/plugin/skills/methodology/defense-in-depth/SKILL.md +832 -34
- package/plugin/skills/methodology/dispatching-parallel-agents/SKILL.md +665 -31
- package/plugin/skills/methodology/executing-plans/SKILL.md +556 -24
- package/plugin/skills/methodology/finishing-development-branch/SKILL.md +595 -25
- package/plugin/skills/methodology/problem-solving/SKILL.md +429 -61
- package/plugin/skills/methodology/receiving-code-review/SKILL.md +536 -24
- package/plugin/skills/methodology/requesting-code-review/SKILL.md +632 -21
- package/plugin/skills/methodology/root-cause-tracing/SKILL.md +641 -30
- package/plugin/skills/methodology/sequential-thinking/SKILL.md +262 -3
- package/plugin/skills/methodology/systematic-debugging/SKILL.md +571 -32
- package/plugin/skills/methodology/test-driven-development/SKILL.md +779 -24
- package/plugin/skills/methodology/testing-anti-patterns/SKILL.md +691 -29
- package/plugin/skills/methodology/token-optimization/SKILL.md +598 -29
- package/plugin/skills/methodology/verification-before-completion/SKILL.md +543 -22
- package/plugin/skills/methodology/writing-plans/SKILL.md +590 -18
- package/plugin/skills/omega/omega-architecture/SKILL.md +838 -39
- package/plugin/skills/omega/omega-coding/SKILL.md +636 -39
- package/plugin/skills/omega/omega-sprint/SKILL.md +855 -48
- package/plugin/skills/omega/omega-testing/SKILL.md +940 -41
- package/plugin/skills/omega/omega-thinking/SKILL.md +703 -50
- package/plugin/skills/security/better-auth/SKILL.md +1065 -28
- package/plugin/skills/security/oauth/SKILL.md +968 -31
- package/plugin/skills/security/owasp/SKILL.md +894 -33
- package/plugin/skills/testing/playwright/SKILL.md +764 -38
- package/plugin/skills/testing/pytest/SKILL.md +873 -36
- package/plugin/skills/testing/vitest/SKILL.md +980 -35
|
@@ -1,51 +1,620 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: token-optimization
|
|
3
|
-
description:
|
|
3
|
+
description: Optimize AI token usage for efficient, cost-effective interactions while maintaining quality
|
|
4
|
+
category: methodology
|
|
5
|
+
triggers:
|
|
6
|
+
- token optimization
|
|
7
|
+
- cost optimization
|
|
8
|
+
- efficient prompts
|
|
9
|
+
- reduce tokens
|
|
10
|
+
- AI efficiency
|
|
11
|
+
- context management
|
|
12
|
+
- prompt efficiency
|
|
4
13
|
---
|
|
5
14
|
|
|
6
|
-
# Token Optimization
|
|
15
|
+
# Token Optimization
|
|
7
16
|
|
|
8
|
-
|
|
17
|
+
Master **AI token usage optimization** for efficient, cost-effective interactions while maintaining output quality. This skill provides strategies to reduce costs by 30-70% without sacrificing results.
|
|
9
18
|
|
|
10
|
-
|
|
19
|
+
## Purpose
|
|
20
|
+
|
|
21
|
+
Maximize AI efficiency and minimize costs:
|
|
22
|
+
|
|
23
|
+
- Reduce token usage by 30-70%
|
|
24
|
+
- Maintain or improve output quality
|
|
25
|
+
- Optimize prompt and context design
|
|
26
|
+
- Use strategic caching and batching
|
|
27
|
+
- Choose appropriate models for tasks
|
|
28
|
+
- Manage conversation context efficiently
|
|
29
|
+
- Track and measure token economics
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
### 1. Token Economics Framework
|
|
34
|
+
|
|
35
|
+
```markdown
|
|
36
|
+
## Understanding Token Costs
|
|
37
|
+
|
|
38
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
39
|
+
│ TOKEN ECONOMICS │
|
|
40
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
41
|
+
│ │
|
|
42
|
+
│ INPUT TOKENS (Context) OUTPUT TOKENS (Response) │
|
|
43
|
+
│ ═══════════════════════ ════════════════════════ │
|
|
44
|
+
│ • System prompt • Generated content │
|
|
45
|
+
│ • Conversation history • Code output │
|
|
46
|
+
│ • File contents read • Explanations │
|
|
47
|
+
│ • Tool results • Structured data │
|
|
48
|
+
│ │
|
|
49
|
+
│ TYPICAL RATIOS: │
|
|
50
|
+
│ ──────────────── │
|
|
51
|
+
│ • Exploration: 90% input, 10% output │
|
|
52
|
+
│ • Code generation: 60% input, 40% output │
|
|
53
|
+
│ • Documentation: 30% input, 70% output │
|
|
54
|
+
│ │
|
|
55
|
+
│ OPTIMIZATION PRIORITY: │
|
|
56
|
+
│ ────────────────────── │
|
|
57
|
+
│ 1. Reduce unnecessary context (highest impact) │
|
|
58
|
+
│ 2. Use efficient prompts │
|
|
59
|
+
│ 3. Request concise outputs │
|
|
60
|
+
│ 4. Choose right model for task │
|
|
61
|
+
│ │
|
|
62
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
63
|
+
|
|
64
|
+
## Token Estimation
|
|
65
|
+
|
|
66
|
+
Rough estimates (varies by language/content):
|
|
67
|
+
• English: ~4 characters = 1 token
|
|
68
|
+
• Code: ~3-4 characters = 1 token
|
|
69
|
+
• JSON: ~3 characters = 1 token
|
|
11
70
|
```
|
|
12
|
-
# Bad
|
|
13
|
-
Can you please help me understand what this function does
|
|
14
|
-
and explain it in detail with examples?
|
|
15
71
|
|
|
16
|
-
|
|
17
|
-
|
|
72
|
+
### 2. Prompt Optimization Techniques
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
/**
|
|
76
|
+
* Prompt Optimization: Write efficient prompts
|
|
77
|
+
*/
|
|
78
|
+
|
|
79
|
+
// ❌ INEFFICIENT: Verbose, repetitive prompt
|
|
80
|
+
const inefficientPrompt = `
|
|
81
|
+
I would really appreciate it if you could help me with this task.
|
|
82
|
+
What I need you to do is to please analyze this code and look for
|
|
83
|
+
any bugs or issues that might be present. Could you please check
|
|
84
|
+
if there are any problems with error handling, and also please
|
|
85
|
+
make sure to look for any security vulnerabilities. When you find
|
|
86
|
+
issues, please explain them in detail so I can understand what's
|
|
87
|
+
wrong. Please also suggest how to fix each issue you find.
|
|
88
|
+
Thank you so much for your help with this!
|
|
89
|
+
`;
|
|
90
|
+
// Tokens: ~120
|
|
91
|
+
|
|
92
|
+
// ✅ EFFICIENT: Direct, clear prompt
|
|
93
|
+
const efficientPrompt = `
|
|
94
|
+
Analyze for bugs, error handling issues, and security vulnerabilities.
|
|
95
|
+
For each issue: location, problem, fix.
|
|
96
|
+
`;
|
|
97
|
+
// Tokens: ~25 (79% reduction)
|
|
98
|
+
|
|
99
|
+
// Optimization techniques:
|
|
100
|
+
const promptOptimizations = {
|
|
101
|
+
// 1. Use direct commands, not polite requests
|
|
102
|
+
before: "Could you please help me understand what this function does?",
|
|
103
|
+
after: "Explain this function.",
|
|
104
|
+
|
|
105
|
+
// 2. Remove filler words
|
|
106
|
+
before: "I think we might need to maybe consider refactoring this code",
|
|
107
|
+
after: "Refactor this code",
|
|
108
|
+
|
|
109
|
+
// 3. Use structured output requests
|
|
110
|
+
before: "Please provide the results in a nice format with the name, description, and priority for each item",
|
|
111
|
+
after: "Output: JSON array with {name, description, priority}",
|
|
112
|
+
|
|
113
|
+
// 4. Be specific about scope
|
|
114
|
+
before: "Look at the codebase and find issues",
|
|
115
|
+
after: "Check src/auth/*.ts for SQL injection",
|
|
116
|
+
|
|
117
|
+
// 5. Use abbreviations in technical contexts
|
|
118
|
+
before: "Application Programming Interface",
|
|
119
|
+
after: "API"
|
|
120
|
+
};
|
|
18
121
|
```
|
|
19
122
|
|
|
20
|
-
###
|
|
123
|
+
### 3. Context Management Strategies
|
|
124
|
+
|
|
125
|
+
```typescript
|
|
126
|
+
/**
|
|
127
|
+
* Context Management: Minimize unnecessary input tokens
|
|
128
|
+
*/
|
|
129
|
+
|
|
130
|
+
// Strategy 1: Targeted file reading
|
|
131
|
+
class ContextOptimizer {
|
|
132
|
+
// ❌ Reading entire files
|
|
133
|
+
async readEntireFile(path: string): Promise<string> {
|
|
134
|
+
return fs.readFile(path, 'utf-8');
|
|
135
|
+
// Could be 1000+ lines = 10,000+ tokens
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ✅ Read only what's needed
|
|
139
|
+
async readRelevantSection(
|
|
140
|
+
path: string,
|
|
141
|
+
options: { startLine?: number; endLine?: number; searchPattern?: string }
|
|
142
|
+
): Promise<string> {
|
|
143
|
+
const content = await fs.readFile(path, 'utf-8');
|
|
144
|
+
const lines = content.split('\n');
|
|
145
|
+
|
|
146
|
+
if (options.startLine !== undefined && options.endLine !== undefined) {
|
|
147
|
+
// Read specific line range
|
|
148
|
+
return lines.slice(options.startLine - 1, options.endLine).join('\n');
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (options.searchPattern) {
|
|
152
|
+
// Find and return context around pattern
|
|
153
|
+
const regex = new RegExp(options.searchPattern);
|
|
154
|
+
for (let i = 0; i < lines.length; i++) {
|
|
155
|
+
if (regex.test(lines[i])) {
|
|
156
|
+
// Return 10 lines before and after
|
|
157
|
+
const start = Math.max(0, i - 10);
|
|
158
|
+
const end = Math.min(lines.length, i + 10);
|
|
159
|
+
return lines.slice(start, end).join('\n');
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Fallback: return summary
|
|
165
|
+
return this.summarizeFile(lines);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
private summarizeFile(lines: string[]): string {
|
|
169
|
+
// Return file structure, not full content
|
|
170
|
+
const summary = [];
|
|
171
|
+
for (const line of lines) {
|
|
172
|
+
if (this.isStructuralLine(line)) {
|
|
173
|
+
summary.push(line);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return summary.join('\n');
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
private isStructuralLine(line: string): boolean {
|
|
180
|
+
const patterns = [
|
|
181
|
+
/^(export\s+)?(async\s+)?function\s+\w+/, // Function definitions
|
|
182
|
+
/^(export\s+)?(class|interface|type)\s+\w+/, // Type definitions
|
|
183
|
+
/^(import|export)\s+/, // Imports/exports
|
|
184
|
+
/^\s*\/\*\*/, // JSDoc start
|
|
185
|
+
];
|
|
186
|
+
return patterns.some(p => p.test(line));
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Strategy 2: Progressive context loading
|
|
191
|
+
async function progressiveContext(query: string): Promise<Context> {
|
|
192
|
+
// Start with minimal context
|
|
193
|
+
let context = await loadMinimalContext();
|
|
194
|
+
|
|
195
|
+
// Check if sufficient
|
|
196
|
+
if (await isContextSufficient(context, query)) {
|
|
197
|
+
return context;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Progressively load more
|
|
201
|
+
context = await loadExpandedContext(context, query);
|
|
202
|
+
|
|
203
|
+
if (await isContextSufficient(context, query)) {
|
|
204
|
+
return context;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Only load full context if necessary
|
|
208
|
+
return await loadFullContext(context, query);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Strategy 3: Context compression
|
|
212
|
+
function compressContext(context: string, maxTokens: number): string {
|
|
213
|
+
const currentTokens = estimateTokens(context);
|
|
214
|
+
|
|
215
|
+
if (currentTokens <= maxTokens) {
|
|
216
|
+
return context;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Remove low-value content
|
|
220
|
+
let compressed = context;
|
|
221
|
+
|
|
222
|
+
// 1. Remove comments (often duplicates code meaning)
|
|
223
|
+
compressed = compressed.replace(/\/\/.*$/gm, '');
|
|
224
|
+
compressed = compressed.replace(/\/\*[\s\S]*?\*\//g, '');
|
|
225
|
+
|
|
226
|
+
// 2. Remove blank lines
|
|
227
|
+
compressed = compressed.replace(/\n\s*\n/g, '\n');
|
|
228
|
+
|
|
229
|
+
// 3. Remove import statements if types are inferrable
|
|
230
|
+
compressed = compressed.replace(/^import.*from.*$/gm, '');
|
|
231
|
+
|
|
232
|
+
// 4. Truncate if still too long
|
|
233
|
+
if (estimateTokens(compressed) > maxTokens) {
|
|
234
|
+
compressed = truncateToTokens(compressed, maxTokens);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return compressed;
|
|
238
|
+
}
|
|
21
239
|
```
|
|
22
|
-
# Bad
|
|
23
|
-
Read("entire-large-file.ts")
|
|
24
240
|
|
|
25
|
-
|
|
26
|
-
|
|
241
|
+
### 4. Efficient Search Patterns
|
|
242
|
+
|
|
243
|
+
```typescript
|
|
244
|
+
/**
|
|
245
|
+
* Search Optimization: Find information efficiently
|
|
246
|
+
*/
|
|
247
|
+
|
|
248
|
+
// ❌ INEFFICIENT: Broad searches that return too much
|
|
249
|
+
const inefficientSearch = {
|
|
250
|
+
grep: {
|
|
251
|
+
pattern: '.*', // Matches everything
|
|
252
|
+
path: '/', // Searches entire filesystem
|
|
253
|
+
},
|
|
254
|
+
glob: {
|
|
255
|
+
pattern: '**/*', // All files
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// ✅ EFFICIENT: Targeted searches
|
|
260
|
+
const efficientSearch = {
|
|
261
|
+
// Specify file types
|
|
262
|
+
grep: {
|
|
263
|
+
pattern: 'handleAuth',
|
|
264
|
+
path: 'src/',
|
|
265
|
+
glob: '*.ts',
|
|
266
|
+
// Only get file names first
|
|
267
|
+
outputMode: 'files_with_matches'
|
|
268
|
+
},
|
|
269
|
+
|
|
270
|
+
// Use specific patterns
|
|
271
|
+
glob: {
|
|
272
|
+
pattern: 'src/services/*Service.ts'
|
|
273
|
+
},
|
|
274
|
+
|
|
275
|
+
// Limit results
|
|
276
|
+
headLimit: 10
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
// Search strategy hierarchy
|
|
280
|
+
const searchStrategies = [
|
|
281
|
+
{
|
|
282
|
+
name: 'Exact match',
|
|
283
|
+
when: 'You know the exact term',
|
|
284
|
+
example: 'grep "function calculateTax" src/tax.ts',
|
|
285
|
+
tokens: 'Low (single file, exact match)'
|
|
286
|
+
},
|
|
287
|
+
{
|
|
288
|
+
name: 'Scoped search',
|
|
289
|
+
when: 'You know the directory',
|
|
290
|
+
example: 'grep "validate" src/validators/ --type ts',
|
|
291
|
+
tokens: 'Medium (limited scope)'
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
name: 'File pattern search',
|
|
295
|
+
when: 'You know the file naming convention',
|
|
296
|
+
example: 'glob "**/test/*.spec.ts"',
|
|
297
|
+
tokens: 'Medium (structured results)'
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
name: 'Broad search with limits',
|
|
301
|
+
when: 'You need to explore',
|
|
302
|
+
example: 'grep "TODO" --head-limit 20',
|
|
303
|
+
tokens: 'Medium (capped results)'
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
name: 'Agent delegation',
|
|
307
|
+
when: 'Complex multi-step search',
|
|
308
|
+
example: 'spawn explore agent with specific query',
|
|
309
|
+
tokens: 'High but contained in subagent'
|
|
310
|
+
}
|
|
311
|
+
];
|
|
27
312
|
```
|
|
28
313
|
|
|
29
|
-
###
|
|
314
|
+
### 5. Model Selection Strategy
|
|
315
|
+
|
|
316
|
+
```typescript
|
|
317
|
+
/**
|
|
318
|
+
* Model Selection: Right model for the task
|
|
319
|
+
*/
|
|
320
|
+
|
|
321
|
+
type ModelTier = 'haiku' | 'sonnet' | 'opus';
|
|
322
|
+
|
|
323
|
+
interface TaskProfile {
|
|
324
|
+
complexity: 'simple' | 'moderate' | 'complex';
|
|
325
|
+
requiresReasoning: boolean;
|
|
326
|
+
requiresCreativity: boolean;
|
|
327
|
+
outputLength: 'short' | 'medium' | 'long';
|
|
328
|
+
qualityCritical: boolean;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function selectModel(task: TaskProfile): ModelTier {
|
|
332
|
+
// Simple, short tasks → Haiku (fastest, cheapest)
|
|
333
|
+
if (
|
|
334
|
+
task.complexity === 'simple' &&
|
|
335
|
+
!task.requiresReasoning &&
|
|
336
|
+
!task.qualityCritical
|
|
337
|
+
) {
|
|
338
|
+
return 'haiku';
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Complex reasoning or quality-critical → Opus
|
|
342
|
+
if (
|
|
343
|
+
task.complexity === 'complex' ||
|
|
344
|
+
(task.requiresReasoning && task.qualityCritical)
|
|
345
|
+
) {
|
|
346
|
+
return 'opus';
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// Default: Sonnet (balanced)
|
|
350
|
+
return 'sonnet';
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Task examples by model
|
|
354
|
+
const modelUseCases = {
|
|
355
|
+
haiku: [
|
|
356
|
+
'Simple code formatting',
|
|
357
|
+
'Syntax error detection',
|
|
358
|
+
'File renaming suggestions',
|
|
359
|
+
'Basic text extraction',
|
|
360
|
+
'Quick lookups'
|
|
361
|
+
],
|
|
362
|
+
sonnet: [
|
|
363
|
+
'Feature implementation',
|
|
364
|
+
'Bug fixing',
|
|
365
|
+
'Code review',
|
|
366
|
+
'Documentation generation',
|
|
367
|
+
'Test writing'
|
|
368
|
+
],
|
|
369
|
+
opus: [
|
|
370
|
+
'Architecture design',
|
|
371
|
+
'Complex debugging',
|
|
372
|
+
'Security analysis',
|
|
373
|
+
'Performance optimization',
|
|
374
|
+
'Critical code generation'
|
|
375
|
+
]
|
|
376
|
+
};
|
|
30
377
|
```
|
|
31
|
-
# Bad
|
|
32
|
-
Grep(".*") in all files
|
|
33
378
|
|
|
34
|
-
|
|
35
|
-
|
|
379
|
+
### 6. Batching and Caching
|
|
380
|
+
|
|
381
|
+
```typescript
|
|
382
|
+
/**
|
|
383
|
+
* Batching: Combine related operations
|
|
384
|
+
*/
|
|
385
|
+
|
|
386
|
+
// ❌ INEFFICIENT: Multiple separate calls
|
|
387
|
+
async function inefficientApproach() {
|
|
388
|
+
const file1 = await readFile('src/a.ts'); // Call 1
|
|
389
|
+
const file2 = await readFile('src/b.ts'); // Call 2
|
|
390
|
+
const file3 = await readFile('src/c.ts'); // Call 3
|
|
391
|
+
// Each call has overhead
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// ✅ EFFICIENT: Batch related operations
|
|
395
|
+
async function efficientApproach() {
|
|
396
|
+
const files = await Promise.all([
|
|
397
|
+
readFile('src/a.ts'),
|
|
398
|
+
readFile('src/b.ts'),
|
|
399
|
+
readFile('src/c.ts')
|
|
400
|
+
]);
|
|
401
|
+
// Single logical operation
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// ❌ INEFFICIENT: Repeated context
|
|
405
|
+
async function repeatedContext() {
|
|
406
|
+
await ask("Given this code: [100 lines], find bugs");
|
|
407
|
+
await ask("Given this code: [same 100 lines], suggest improvements");
|
|
408
|
+
await ask("Given this code: [same 100 lines], add types");
|
|
409
|
+
// 300 lines of duplicate context
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// ✅ EFFICIENT: Combined request
|
|
413
|
+
async function combinedRequest() {
|
|
414
|
+
await ask(`
|
|
415
|
+
Given this code: [100 lines]
|
|
416
|
+
1. Find bugs
|
|
417
|
+
2. Suggest improvements
|
|
418
|
+
3. Add types
|
|
419
|
+
`);
|
|
420
|
+
// 100 lines + small overhead
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Caching: Avoid redundant operations
|
|
425
|
+
*/
|
|
426
|
+
|
|
427
|
+
class ResultCache {
|
|
428
|
+
private cache: Map<string, CacheEntry> = new Map();
|
|
429
|
+
private readonly TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
430
|
+
|
|
431
|
+
async getOrCompute<T>(
|
|
432
|
+
key: string,
|
|
433
|
+
compute: () => Promise<T>
|
|
434
|
+
): Promise<T> {
|
|
435
|
+
const cached = this.cache.get(key);
|
|
436
|
+
|
|
437
|
+
if (cached && Date.now() - cached.timestamp < this.TTL_MS) {
|
|
438
|
+
return cached.value as T;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
const result = await compute();
|
|
442
|
+
this.cache.set(key, {
|
|
443
|
+
value: result,
|
|
444
|
+
timestamp: Date.now()
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
return result;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Cache file analysis results
|
|
451
|
+
async analyzeFile(path: string): Promise<Analysis> {
|
|
452
|
+
const stat = await fs.stat(path);
|
|
453
|
+
const cacheKey = `${path}:${stat.mtimeMs}`;
|
|
454
|
+
|
|
455
|
+
return this.getOrCompute(cacheKey, async () => {
|
|
456
|
+
const content = await fs.readFile(path, 'utf-8');
|
|
457
|
+
return performAnalysis(content);
|
|
458
|
+
});
|
|
459
|
+
}
|
|
460
|
+
}
|
|
36
461
|
```
|
|
37
462
|
|
|
38
|
-
###
|
|
463
|
+
### 7. Output Optimization
|
|
464
|
+
|
|
465
|
+
```typescript
|
|
466
|
+
/**
|
|
467
|
+
* Output Optimization: Request efficient responses
|
|
468
|
+
*/
|
|
469
|
+
|
|
470
|
+
// ❌ INEFFICIENT: Verbose output request
|
|
471
|
+
const verboseRequest = `
|
|
472
|
+
Please explain your reasoning step by step, and provide a detailed
|
|
473
|
+
analysis of each issue you find. Make sure to include examples and
|
|
474
|
+
context for each point you make.
|
|
475
|
+
`;
|
|
476
|
+
|
|
477
|
+
// ✅ EFFICIENT: Structured, concise output
|
|
478
|
+
const efficientRequest = `
|
|
479
|
+
Output format:
|
|
480
|
+
- issue: [one line]
|
|
481
|
+
- fix: [one line]
|
|
482
|
+
`;
|
|
483
|
+
|
|
484
|
+
// Response length control
|
|
485
|
+
const responseLengthStrategies = {
|
|
486
|
+
minimal: {
|
|
487
|
+
description: 'Bare essentials only',
|
|
488
|
+
example: 'Answer: yes/no',
|
|
489
|
+
useCase: 'Binary decisions, confirmations'
|
|
490
|
+
},
|
|
491
|
+
concise: {
|
|
492
|
+
description: 'Key points only',
|
|
493
|
+
example: 'List: 3-5 bullet points',
|
|
494
|
+
useCase: 'Summaries, overviews'
|
|
495
|
+
},
|
|
496
|
+
standard: {
|
|
497
|
+
description: 'Normal explanation',
|
|
498
|
+
example: 'Explain with one example',
|
|
499
|
+
useCase: 'Most tasks'
|
|
500
|
+
},
|
|
501
|
+
detailed: {
|
|
502
|
+
description: 'Comprehensive response',
|
|
503
|
+
example: 'Full analysis with examples',
|
|
504
|
+
useCase: 'Complex problems, documentation'
|
|
505
|
+
}
|
|
506
|
+
};
|
|
507
|
+
|
|
508
|
+
// Format optimization
|
|
509
|
+
const formatOptimization = {
|
|
510
|
+
// Use structured formats for parsing
|
|
511
|
+
json: {
|
|
512
|
+
pros: 'Easy to parse, compact',
|
|
513
|
+
cons: 'Less readable',
|
|
514
|
+
best_for: 'Data extraction, API responses'
|
|
515
|
+
},
|
|
516
|
+
yaml: {
|
|
517
|
+
pros: 'Readable, compact',
|
|
518
|
+
cons: 'Parsing overhead',
|
|
519
|
+
best_for: 'Configuration, simple structures'
|
|
520
|
+
},
|
|
521
|
+
markdown: {
|
|
522
|
+
pros: 'Human readable',
|
|
523
|
+
cons: 'More tokens than JSON',
|
|
524
|
+
best_for: 'Documentation, explanations'
|
|
525
|
+
},
|
|
526
|
+
plain: {
|
|
527
|
+
pros: 'Minimal overhead',
|
|
528
|
+
cons: 'Harder to parse',
|
|
529
|
+
best_for: 'Simple text responses'
|
|
530
|
+
}
|
|
531
|
+
};
|
|
39
532
|
```
|
|
40
|
-
# Bad
|
|
41
|
-
Multiple separate tool calls
|
|
42
533
|
|
|
43
|
-
|
|
44
|
-
|
|
534
|
+
## Use Cases
|
|
535
|
+
|
|
536
|
+
### Optimizing a Code Review Task
|
|
537
|
+
|
|
538
|
+
```typescript
|
|
539
|
+
// Before optimization: ~15,000 tokens
|
|
540
|
+
const beforeOptimization = {
|
|
541
|
+
input: {
|
|
542
|
+
systemPrompt: 500, // Detailed instructions
|
|
543
|
+
fullFiles: 10000, // 3 complete files
|
|
544
|
+
conversationHistory: 3000,
|
|
545
|
+
prompt: 500
|
|
546
|
+
},
|
|
547
|
+
output: {
|
|
548
|
+
verboseAnalysis: 2000
|
|
549
|
+
},
|
|
550
|
+
total: 16000
|
|
551
|
+
};
|
|
552
|
+
|
|
553
|
+
// After optimization: ~4,000 tokens (75% reduction)
|
|
554
|
+
const afterOptimization = {
|
|
555
|
+
input: {
|
|
556
|
+
systemPrompt: 200, // Concise instructions
|
|
557
|
+
relevantSections: 2000, // Only changed code + context
|
|
558
|
+
noHistory: 0, // Fresh context
|
|
559
|
+
prompt: 100 // Direct request
|
|
560
|
+
},
|
|
561
|
+
output: {
|
|
562
|
+
structuredList: 700
|
|
563
|
+
},
|
|
564
|
+
total: 3000
|
|
565
|
+
};
|
|
566
|
+
|
|
567
|
+
// Implementation
|
|
568
|
+
async function optimizedCodeReview(prDiff: string): Promise<Review> {
|
|
569
|
+
// 1. Extract only the diff (not full files)
|
|
570
|
+
const relevantCode = extractDiff(prDiff);
|
|
571
|
+
|
|
572
|
+
// 2. Use concise prompt
|
|
573
|
+
const prompt = `
|
|
574
|
+
Review this diff. Output JSON:
|
|
575
|
+
{issues: [{line, severity, message, fix}]}
|
|
576
|
+
`;
|
|
577
|
+
|
|
578
|
+
// 3. Use appropriate model
|
|
579
|
+
const model = selectModel({
|
|
580
|
+
complexity: 'moderate',
|
|
581
|
+
requiresReasoning: true,
|
|
582
|
+
qualityCritical: true,
|
|
583
|
+
outputLength: 'medium'
|
|
584
|
+
}); // Returns 'sonnet'
|
|
585
|
+
|
|
586
|
+
// 4. Request structured output
|
|
587
|
+
return await analyze(relevantCode, prompt, { model });
|
|
588
|
+
}
|
|
45
589
|
```
|
|
46
590
|
|
|
47
|
-
##
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
-
|
|
591
|
+
## Best Practices
|
|
592
|
+
|
|
593
|
+
### Do's
|
|
594
|
+
|
|
595
|
+
- **Read only what's needed** - use line ranges and targeted searches
|
|
596
|
+
- **Use direct language** - commands over requests
|
|
597
|
+
- **Structure outputs** - JSON/YAML over prose
|
|
598
|
+
- **Batch operations** - combine related requests
|
|
599
|
+
- **Cache results** - avoid redundant computation
|
|
600
|
+
- **Choose right model** - Haiku for simple, Opus for complex
|
|
601
|
+
- **Limit search results** - use head_limit parameters
|
|
602
|
+
- **Progressive loading** - start minimal, expand if needed
|
|
603
|
+
|
|
604
|
+
### Don'ts
|
|
605
|
+
|
|
606
|
+
- Don't read entire files when you need one function
|
|
607
|
+
- Don't include full conversation history when not needed
|
|
608
|
+
- Don't use verbose, polite language in prompts
|
|
609
|
+
- Don't request detailed explanations for simple tasks
|
|
610
|
+
- Don't repeat context across multiple requests
|
|
611
|
+
- Don't use the most powerful model for simple tasks
|
|
612
|
+
- Don't search without scope limits
|
|
613
|
+
- Don't ignore token usage in your workflow
|
|
614
|
+
|
|
615
|
+
## References
|
|
616
|
+
|
|
617
|
+
- [OpenAI Tokenizer](https://platform.openai.com/tokenizer)
|
|
618
|
+
- [Anthropic Token Counting](https://docs.anthropic.com/claude/docs/counting-tokens)
|
|
619
|
+
- [Prompt Engineering Guide](https://www.promptingguide.ai/)
|
|
620
|
+
- [AI Cost Optimization Strategies](https://docs.anthropic.com/claude/docs/reducing-costs)
|