@git.zone/tsdoc 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist_ts/00_commitinfo_data.js +3 -3
- package/dist_ts/aidocs_classes/commit.js +16 -10
- package/dist_ts/aidocs_classes/description.js +14 -6
- package/dist_ts/aidocs_classes/projectcontext.d.ts +22 -0
- package/dist_ts/aidocs_classes/projectcontext.js +41 -1
- package/dist_ts/aidocs_classes/readme.js +10 -4
- package/dist_ts/classes.aidoc.d.ts +22 -0
- package/dist_ts/classes.aidoc.js +31 -1
- package/dist_ts/cli.js +110 -1
- package/dist_ts/context/config-manager.d.ts +58 -0
- package/dist_ts/context/config-manager.js +183 -0
- package/dist_ts/context/context-trimmer.d.ts +52 -0
- package/dist_ts/context/context-trimmer.js +199 -0
- package/dist_ts/context/enhanced-context.d.ts +75 -0
- package/dist_ts/context/enhanced-context.js +272 -0
- package/dist_ts/context/index.d.ts +7 -0
- package/dist_ts/context/index.js +8 -0
- package/dist_ts/context/task-context-factory.d.ts +46 -0
- package/dist_ts/context/task-context-factory.js +109 -0
- package/dist_ts/context/types.d.ts +89 -0
- package/dist_ts/context/types.js +2 -0
- package/dist_ts/plugins.d.ts +2 -1
- package/dist_ts/plugins.js +3 -2
- package/npmextra.json +10 -9
- package/package.json +12 -10
- package/readme.md +588 -174
- package/readme.plan.md +314 -0
- package/ts/00_commitinfo_data.ts +2 -2
- package/ts/aidocs_classes/commit.ts +21 -9
- package/ts/aidocs_classes/description.ts +16 -5
- package/ts/aidocs_classes/projectcontext.ts +43 -0
- package/ts/aidocs_classes/readme.ts +11 -3
- package/ts/classes.aidoc.ts +33 -0
- package/ts/cli.ts +128 -0
- package/ts/context/config-manager.ts +209 -0
- package/ts/context/context-trimmer.ts +246 -0
- package/ts/context/enhanced-context.ts +343 -0
- package/ts/context/index.ts +32 -0
- package/ts/context/task-context-factory.ts +138 -0
- package/ts/context/types.ts +95 -0
- package/ts/plugins.ts +2 -1
package/readme.plan.md
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
# TSDocs Context Optimization Plan
|
|
2
|
+
|
|
3
|
+
## Problem Statement
|
|
4
|
+
|
|
5
|
+
For large TypeScript projects, the context generated for AI-based documentation creation becomes too large, potentially exceeding even o4-mini's 200K token limit. This affects the ability to effectively generate:
|
|
6
|
+
|
|
7
|
+
- Project documentation (README.md)
|
|
8
|
+
- API descriptions and keywords
|
|
9
|
+
- Commit messages and changelogs
|
|
10
|
+
|
|
11
|
+
Current implementation simply includes all TypeScript files and key project files, but lacks intelligent selection, prioritization, or content reduction mechanisms.
|
|
12
|
+
|
|
13
|
+
## Analysis of Approaches
|
|
14
|
+
|
|
15
|
+
### 1. Smart Content Selection
|
|
16
|
+
|
|
17
|
+
**Description:** Intelligently select only files that are necessary for the specific task being performed, using heuristic rules.
|
|
18
|
+
|
|
19
|
+
**Advantages:**
|
|
20
|
+
- Simple to implement
|
|
21
|
+
- Predictable behavior
|
|
22
|
+
- Can be fine-tuned for different operations
|
|
23
|
+
|
|
24
|
+
**Disadvantages:**
|
|
25
|
+
- Requires manual tuning of rules
|
|
26
|
+
- May miss important context in complex projects
|
|
27
|
+
- Static approach lacks adaptability
|
|
28
|
+
|
|
29
|
+
**Implementation Complexity:** Medium
|
|
30
|
+
|
|
31
|
+
### 2. File Prioritization
|
|
32
|
+
|
|
33
|
+
**Description:** Rank files by relevance using git history, file size, import/export analysis, and relationship to the current task.
|
|
34
|
+
|
|
35
|
+
**Advantages:**
|
|
36
|
+
- Adaptively includes the most relevant files first
|
|
37
|
+
- Maintains context for frequently changed or central files
|
|
38
|
+
- Can leverage git history for additional signals
|
|
39
|
+
|
|
40
|
+
**Disadvantages:**
|
|
41
|
+
- Complexity in determining accurate relevance scores
|
|
42
|
+
- Requires analyzing project structure
|
|
43
|
+
- May require scanning imports/exports for dependency analysis
|
|
44
|
+
|
|
45
|
+
**Implementation Complexity:** High
|
|
46
|
+
|
|
47
|
+
### 3. Chunking Strategy
|
|
48
|
+
|
|
49
|
+
**Description:** Process the project in logical segments, generating intermediate results that are then combined to create the final output.
|
|
50
|
+
|
|
51
|
+
**Advantages:**
|
|
52
|
+
- Can handle projects of any size
|
|
53
|
+
- Focused context for each specific part
|
|
54
|
+
- May improve quality by focusing on specific areas deeply
|
|
55
|
+
|
|
56
|
+
**Disadvantages:**
|
|
57
|
+
- Complex orchestration of multiple AI calls
|
|
58
|
+
- Challenge in maintaining consistency across chunks
|
|
59
|
+
- May increase time and cost for processing
|
|
60
|
+
|
|
61
|
+
**Implementation Complexity:** High
|
|
62
|
+
|
|
63
|
+
### 4. Dynamic Context Trimming
|
|
64
|
+
|
|
65
|
+
**Description:** Automatically reduce context by removing non-essential code while preserving structure. Techniques include:
|
|
66
|
+
- Removing implementation details but keeping interfaces and type definitions
|
|
67
|
+
- Truncating large functions while keeping signatures
|
|
68
|
+
- Removing comments and whitespace (except JSDoc)
|
|
69
|
+
- Keeping only imports/exports for context files
|
|
70
|
+
|
|
71
|
+
**Advantages:**
|
|
72
|
+
- Preserves full project structure
|
|
73
|
+
- Flexible token usage based on importance
|
|
74
|
+
- Good balance between completeness and token efficiency
|
|
75
|
+
|
|
76
|
+
**Disadvantages:**
|
|
77
|
+
- Potential to remove important implementation details
|
|
78
|
+
- Risk of missing context needed for specific tasks
|
|
79
|
+
- Complex rules for what to trim vs keep
|
|
80
|
+
|
|
81
|
+
**Implementation Complexity:** Medium
|
|
82
|
+
|
|
83
|
+
### 5. Embeddings-Based Retrieval
|
|
84
|
+
|
|
85
|
+
**Description:** Create vector embeddings of project files and retrieve only the most relevant ones for a specific task using semantic similarity.
|
|
86
|
+
|
|
87
|
+
**Advantages:**
|
|
88
|
+
- Highly adaptive to different types of requests
|
|
89
|
+
- Leverages semantic understanding of content
|
|
90
|
+
- Can scale to extremely large projects
|
|
91
|
+
|
|
92
|
+
**Disadvantages:**
|
|
93
|
+
- Requires setting up and managing embeddings database
|
|
94
|
+
- Added complexity of running vector similarity searches
|
|
95
|
+
- Higher resource requirements for maintaining embeddings
|
|
96
|
+
|
|
97
|
+
**Implementation Complexity:** Very High
|
|
98
|
+
|
|
99
|
+
### 6. Task-Specific Contexts
|
|
100
|
+
|
|
101
|
+
**Description:** Create separate optimized contexts for different tasks (readme, commit messages, etc.) with distinct file selection and processing strategies.
|
|
102
|
+
|
|
103
|
+
**Advantages:**
|
|
104
|
+
- Highly optimized for each specific task
|
|
105
|
+
- Efficient token usage for each operation
|
|
106
|
+
- Improved quality through task-focused contexts
|
|
107
|
+
|
|
108
|
+
**Disadvantages:**
|
|
109
|
+
- Maintenance of multiple context building strategies
|
|
110
|
+
- More complex configuration
|
|
111
|
+
- Potential duplication in implementation
|
|
112
|
+
|
|
113
|
+
**Implementation Complexity:** Medium
|
|
114
|
+
|
|
115
|
+
### 7. Recursive Summarization
|
|
116
|
+
|
|
117
|
+
**Description:** Summarize larger files first, then include these summaries in the final context along with smaller files included in full.
|
|
118
|
+
|
|
119
|
+
**Advantages:**
|
|
120
|
+
- Can handle arbitrary project sizes
|
|
121
|
+
- Preserves essential information from all files
|
|
122
|
+
- Balanced approach to token usage
|
|
123
|
+
|
|
124
|
+
**Disadvantages:**
|
|
125
|
+
- Quality loss from summarization
|
|
126
|
+
- Increased processing time from multiple AI calls
|
|
127
|
+
- Complex orchestration logic
|
|
128
|
+
|
|
129
|
+
**Implementation Complexity:** High
|
|
130
|
+
|
|
131
|
+
## Implementation Strategy
|
|
132
|
+
|
|
133
|
+
We propose a phased implementation approach, starting with the most impactful and straightforward approaches, then building toward more complex solutions as needed:
|
|
134
|
+
|
|
135
|
+
### Phase 1: Foundation (1-2 weeks)
|
|
136
|
+
|
|
137
|
+
1. **Implement Dynamic Context Trimming**
|
|
138
|
+
- Create a `ContextProcessor` class that takes SmartFile objects and applies trimming rules
|
|
139
|
+
- Implement configurable trimming rules (remove implementations, keep signatures)
|
|
140
|
+
- Add a configuration option to control trimming aggressiveness
|
|
141
|
+
- Support preserving JSDoc comments while removing other comments
|
|
142
|
+
|
|
143
|
+
2. **Enhance Token Monitoring**
|
|
144
|
+
- Track token usage per file to identify problematic files
|
|
145
|
+
- Implement token budgeting to stay within limits
|
|
146
|
+
- Add detailed token reporting for optimization
|
|
147
|
+
|
|
148
|
+
### Phase 2: Smart Selection (2-3 weeks)
|
|
149
|
+
|
|
150
|
+
3. **Implement Task-Specific Contexts**
|
|
151
|
+
- Create specialized context builders for readme, commit messages, and descriptions
|
|
152
|
+
- Customize file selection rules for each task
|
|
153
|
+
- Add configuration options for task-specific settings
|
|
154
|
+
|
|
155
|
+
4. **Add Smart Content Selection**
|
|
156
|
+
- Implement heuristic rules for file importance
|
|
157
|
+
- Create configuration for inclusion/exclusion patterns
|
|
158
|
+
- Add ability to focus on specific directories or modules
|
|
159
|
+
|
|
160
|
+
### Phase 3: Advanced Techniques (3-4 weeks)
|
|
161
|
+
|
|
162
|
+
5. **Implement File Prioritization**
|
|
163
|
+
- Add git history analysis to identify frequently changed files
|
|
164
|
+
- Implement dependency analysis to identify central files
|
|
165
|
+
- Create a scoring system for file relevance
|
|
166
|
+
|
|
167
|
+
6. **Add Optional Recursive Summarization**
|
|
168
|
+
- Implement file summarization for large files
|
|
169
|
+
- Create a hybrid approach that mixes full files and summaries
|
|
170
|
+
- Add configuration to control summarization thresholds
|
|
171
|
+
|
|
172
|
+
### Phase 4: Research-Based Approaches (Future Consideration)
|
|
173
|
+
|
|
174
|
+
7. **Research and Evaluate Embeddings-Based Retrieval**
|
|
175
|
+
- Prototype embeddings creation for TypeScript files
|
|
176
|
+
- Evaluate performance and accuracy
|
|
177
|
+
- Implement if benefits justify the complexity
|
|
178
|
+
|
|
179
|
+
8. **Explore Chunking Strategies**
|
|
180
|
+
- Research effective chunking approaches for documentation
|
|
181
|
+
- Prototype and evaluate performance
|
|
182
|
+
- Implement if benefits justify the complexity
|
|
183
|
+
|
|
184
|
+
## Technical Design
|
|
185
|
+
|
|
186
|
+
### Core Components
|
|
187
|
+
|
|
188
|
+
1. **ContextBuilder** - Enhanced version of current ProjectContext
|
|
189
|
+
```typescript
|
|
190
|
+
interface IContextBuilder {
|
|
191
|
+
buildContext(): Promise<string>;
|
|
192
|
+
getTokenCount(): number;
|
|
193
|
+
setContextMode(mode: 'normal' | 'trimmed' | 'summarized'): void;
|
|
194
|
+
setTokenBudget(maxTokens: number): void;
|
|
195
|
+
setPrioritizationStrategy(strategy: IPrioritizationStrategy): void;
|
|
196
|
+
}
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
2. **FileProcessor** - Handles per-file processing and trimming
|
|
200
|
+
```typescript
|
|
201
|
+
interface IFileProcessor {
|
|
202
|
+
processFile(file: SmartFile): Promise<string>;
|
|
203
|
+
setProcessingMode(mode: 'full' | 'trim' | 'summarize'): void;
|
|
204
|
+
getTokenCount(): number;
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
3. **PrioritizationStrategy** - Ranks files by importance
|
|
209
|
+
```typescript
|
|
210
|
+
interface IPrioritizationStrategy {
|
|
211
|
+
rankFiles(files: SmartFile[], context: string): Promise<SmartFile[]>;
|
|
212
|
+
setImportanceMetrics(metrics: IImportanceMetrics): void;
|
|
213
|
+
}
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
4. **TaskContextFactory** - Creates optimized contexts for specific tasks
|
|
217
|
+
```typescript
|
|
218
|
+
interface ITaskContextFactory {
|
|
219
|
+
createContextForReadme(projectDir: string): Promise<string>;
|
|
220
|
+
createContextForCommit(projectDir: string, diff: string): Promise<string>;
|
|
221
|
+
createContextForDescription(projectDir: string): Promise<string>;
|
|
222
|
+
}
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Configuration Options
|
|
226
|
+
|
|
227
|
+
The system will support configuration via a new section in `npmextra.json`:
|
|
228
|
+
|
|
229
|
+
```json
|
|
230
|
+
{
|
|
231
|
+
"tsdoc": {
|
|
232
|
+
"context": {
|
|
233
|
+
"maxTokens": 190000,
|
|
234
|
+
"defaultMode": "dynamic",
|
|
235
|
+
"taskSpecificSettings": {
|
|
236
|
+
"readme": {
|
|
237
|
+
"mode": "full",
|
|
238
|
+
"includePaths": ["src/", "lib/"],
|
|
239
|
+
"excludePaths": ["test/", "examples/"]
|
|
240
|
+
},
|
|
241
|
+
"commit": {
|
|
242
|
+
"mode": "trimmed",
|
|
243
|
+
"focusOnChangedFiles": true
|
|
244
|
+
},
|
|
245
|
+
"description": {
|
|
246
|
+
"mode": "summarized",
|
|
247
|
+
"includePackageInfo": true
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
"trimming": {
|
|
251
|
+
"removeImplementations": true,
|
|
252
|
+
"preserveInterfaces": true,
|
|
253
|
+
"preserveTypeDefs": true,
|
|
254
|
+
"preserveJSDoc": true,
|
|
255
|
+
"maxFunctionLines": 5
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
## Cost-Benefit Analysis
|
|
263
|
+
|
|
264
|
+
### Cost Considerations
|
|
265
|
+
|
|
266
|
+
1. **Development costs**
|
|
267
|
+
- Initial implementation of foundational components (~30-40 hours)
|
|
268
|
+
- Testing and validation across different project sizes (~10-15 hours)
|
|
269
|
+
- Documentation and configuration examples (~5 hours)
|
|
270
|
+
|
|
271
|
+
2. **Operational costs**
|
|
272
|
+
- Potential increased processing time for context preparation
|
|
273
|
+
- Additional API calls for summarization or embeddings approaches
|
|
274
|
+
- Monitoring and maintenance of the system
|
|
275
|
+
|
|
276
|
+
### Benefits
|
|
277
|
+
|
|
278
|
+
1. **Scalability**
|
|
279
|
+
- Support for projects of any size, up to and beyond o4-mini's 200K token limit
|
|
280
|
+
- Future-proof design that can adapt to different models and token limits
|
|
281
|
+
|
|
282
|
+
2. **Quality improvements**
|
|
283
|
+
- More focused contexts lead to better AI outputs
|
|
284
|
+
- Task-specific optimization improves relevance
|
|
285
|
+
- Consistent performance regardless of project size
|
|
286
|
+
|
|
287
|
+
3. **User experience**
|
|
288
|
+
- Predictable behavior for all project sizes
|
|
289
|
+
- Transparent token usage reporting
|
|
290
|
+
- Configuration options for different usage patterns
|
|
291
|
+
|
|
292
|
+
## First Deliverable
|
|
293
|
+
|
|
294
|
+
For immediate improvements, we recommend implementing Dynamic Context Trimming and Task-Specific Contexts first, as these offer the best balance of impact and implementation complexity.
|
|
295
|
+
|
|
296
|
+
### Implementation Plan for Dynamic Context Trimming
|
|
297
|
+
|
|
298
|
+
1. Create a basic `ContextTrimmer` class that processes TypeScript files:
|
|
299
|
+
- Remove function bodies but keep signatures
|
|
300
|
+
- Preserve interface and type definitions
|
|
301
|
+
- Keep imports and exports
|
|
302
|
+
- Preserve JSDoc comments
|
|
303
|
+
|
|
304
|
+
2. Integrate with the existing ProjectContext class:
|
|
305
|
+
- Add a trimming mode option
|
|
306
|
+
- Apply trimming during the context building process
|
|
307
|
+
- Track and report token savings
|
|
308
|
+
|
|
309
|
+
3. Modify the CLI to support trimming options:
|
|
310
|
+
- Add a `--trim` flag to enable trimming
|
|
311
|
+
- Add a `--trim-level` option for controlling aggressiveness
|
|
312
|
+
- Show token usage with and without trimming
|
|
313
|
+
|
|
314
|
+
This approach could reduce token usage by 40-70% while preserving the essential structure of the codebase, making it suitable for large projects while maintaining high-quality AI outputs.
|
package/ts/00_commitinfo_data.ts
CHANGED
|
@@ -3,6 +3,6 @@
|
|
|
3
3
|
*/
|
|
4
4
|
export const commitinfo = {
|
|
5
5
|
name: '@git.zone/tsdoc',
|
|
6
|
-
version: '1.
|
|
7
|
-
description: '
|
|
6
|
+
version: '1.5.0',
|
|
7
|
+
description: 'A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.'
|
|
8
8
|
}
|
|
@@ -31,15 +31,27 @@ export class Commit {
|
|
|
31
31
|
'pnpm-lock.yaml',
|
|
32
32
|
'package-lock.json',
|
|
33
33
|
]);
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
34
|
+
// Use the new TaskContextFactory for optimized context
|
|
35
|
+
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(this.projectDir);
|
|
36
|
+
await taskContextFactory.initialize();
|
|
37
|
+
|
|
38
|
+
// Generate context specifically for commit task
|
|
39
|
+
const contextResult = await taskContextFactory.createContextForCommit(
|
|
40
|
+
diffStringArray[0] ? diffStringArray.join('\n\n') : 'No changes.'
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
// Get the optimized context string
|
|
44
|
+
let contextString = contextResult.context;
|
|
45
|
+
|
|
46
|
+
// Log token usage statistics
|
|
47
|
+
console.log(`Token usage - Context: ${contextResult.tokenCount}, Files: ${contextResult.includedFiles.length + contextResult.trimmedFiles.length}, Savings: ${contextResult.tokenSavings}`);
|
|
48
|
+
|
|
49
|
+
// Check for token overflow against model limits
|
|
50
|
+
const MODEL_TOKEN_LIMIT = 200000; // o4-mini
|
|
51
|
+
if (contextResult.tokenCount > MODEL_TOKEN_LIMIT * 0.9) {
|
|
52
|
+
console.log(`⚠️ Warning: Context size (${contextResult.tokenCount} tokens) is close to or exceeds model limit (${MODEL_TOKEN_LIMIT} tokens).`);
|
|
53
|
+
console.log(`The model may not be able to process all information effectively.`);
|
|
54
|
+
}
|
|
43
55
|
|
|
44
56
|
let result = await this.aiDocsRef.openaiInstance.chat({
|
|
45
57
|
systemMessage: `
|
|
@@ -18,9 +18,16 @@ export class Description {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
public async build() {
|
|
21
|
-
//
|
|
22
|
-
const
|
|
23
|
-
|
|
21
|
+
// Use the new TaskContextFactory for optimized context
|
|
22
|
+
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(this.projectDir);
|
|
23
|
+
await taskContextFactory.initialize();
|
|
24
|
+
|
|
25
|
+
// Generate context specifically for description task
|
|
26
|
+
const contextResult = await taskContextFactory.createContextForDescription();
|
|
27
|
+
const contextString = contextResult.context;
|
|
28
|
+
|
|
29
|
+
// Log token usage statistics
|
|
30
|
+
console.log(`Token usage - Context: ${contextResult.tokenCount}, Files: ${contextResult.includedFiles.length + contextResult.trimmedFiles.length}, Savings: ${contextResult.tokenSavings}`);
|
|
24
31
|
|
|
25
32
|
let result = await this.aiDocsRef.openaiInstance.chat({
|
|
26
33
|
systemMessage: `
|
|
@@ -48,7 +55,11 @@ Don't wrap the JSON in three ticks json!!!
|
|
|
48
55
|
result.message.replace('```json', '').replace('```', ''),
|
|
49
56
|
);
|
|
50
57
|
|
|
51
|
-
|
|
58
|
+
// Create a standard ProjectContext instance for file operations
|
|
59
|
+
const projectContext = new ProjectContext(this.projectDir);
|
|
60
|
+
const files = await projectContext.gatherFiles();
|
|
61
|
+
|
|
62
|
+
const npmextraJson = files.smartfilesNpmextraJSON;
|
|
52
63
|
const npmextraJsonContent = JSON.parse(npmextraJson.contents.toString());
|
|
53
64
|
|
|
54
65
|
npmextraJsonContent.gitzone.module.description = resultObject.description;
|
|
@@ -58,7 +69,7 @@ Don't wrap the JSON in three ticks json!!!
|
|
|
58
69
|
await npmextraJson.write();
|
|
59
70
|
|
|
60
71
|
// do the same with packageJson
|
|
61
|
-
const packageJson =
|
|
72
|
+
const packageJson = files.smartfilePackageJSON;
|
|
62
73
|
const packageJsonContent = JSON.parse(packageJson.contents.toString());
|
|
63
74
|
packageJsonContent.description = resultObject.description;
|
|
64
75
|
packageJsonContent.keywords = resultObject.keywords;
|
|
@@ -5,6 +5,8 @@ export class ProjectContext {
|
|
|
5
5
|
|
|
6
6
|
// INSTANCE
|
|
7
7
|
public projectDir: string;
|
|
8
|
+
private tokenCount: number = 0;
|
|
9
|
+
private contextString: string = '';
|
|
8
10
|
|
|
9
11
|
constructor(projectDirArg: string) {
|
|
10
12
|
this.projectDir = projectDirArg;
|
|
@@ -63,6 +65,24 @@ ${smartfile.contents.toString()}
|
|
|
63
65
|
.join('\n');
|
|
64
66
|
}
|
|
65
67
|
|
|
68
|
+
/**
|
|
69
|
+
* Calculate the token count for a string using the GPT tokenizer
|
|
70
|
+
* @param text The text to count tokens for
|
|
71
|
+
* @param model The model to use for token counting (default: gpt-3.5-turbo)
|
|
72
|
+
* @returns The number of tokens in the text
|
|
73
|
+
*/
|
|
74
|
+
public countTokens(text: string, model: string = 'gpt-3.5-turbo'): number {
|
|
75
|
+
try {
|
|
76
|
+
// Use the gpt-tokenizer library to count tokens
|
|
77
|
+
const tokens = plugins.gptTokenizer.encode(text);
|
|
78
|
+
return tokens.length;
|
|
79
|
+
} catch (error) {
|
|
80
|
+
console.error('Error counting tokens:', error);
|
|
81
|
+
// Provide a rough estimate (4 chars per token) if tokenization fails
|
|
82
|
+
return Math.ceil(text.length / 4);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
66
86
|
private async buildContext(dirArg: string) {
|
|
67
87
|
const files = await this.gatherFiles();
|
|
68
88
|
let context = await this.convertFilesToContext([
|
|
@@ -73,10 +93,33 @@ ${smartfile.contents.toString()}
|
|
|
73
93
|
...files.smartfilesMod,
|
|
74
94
|
...files.smartfilesTest,
|
|
75
95
|
]);
|
|
96
|
+
// Count tokens in the context
|
|
97
|
+
this.contextString = context;
|
|
98
|
+
this.tokenCount = this.countTokens(context);
|
|
99
|
+
|
|
76
100
|
// console.log(context);
|
|
77
101
|
return context;
|
|
78
102
|
}
|
|
79
103
|
|
|
104
|
+
/**
|
|
105
|
+
* Get the token count for the current context
|
|
106
|
+
* @returns The number of tokens in the context
|
|
107
|
+
*/
|
|
108
|
+
public getTokenCount(): number {
|
|
109
|
+
return this.tokenCount;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get both the context string and its token count
|
|
114
|
+
* @returns An object containing the context string and token count
|
|
115
|
+
*/
|
|
116
|
+
public getContextWithTokenCount(): { context: string; tokenCount: number } {
|
|
117
|
+
return {
|
|
118
|
+
context: this.contextString,
|
|
119
|
+
tokenCount: this.tokenCount
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
80
123
|
public async update() {
|
|
81
124
|
const result = await this.buildContext(this.projectDir);
|
|
82
125
|
return result;
|
|
@@ -17,11 +17,19 @@ export class Readme {
|
|
|
17
17
|
public async build() {
|
|
18
18
|
let finalReadmeString = ``;
|
|
19
19
|
|
|
20
|
-
//
|
|
21
|
-
const
|
|
22
|
-
|
|
20
|
+
// Use the new TaskContextFactory for optimized context
|
|
21
|
+
const taskContextFactory = new (await import('../context/index.js')).TaskContextFactory(this.projectDir);
|
|
22
|
+
await taskContextFactory.initialize();
|
|
23
|
+
|
|
24
|
+
// Generate context specifically for readme task
|
|
25
|
+
const contextResult = await taskContextFactory.createContextForReadme();
|
|
26
|
+
const contextString = contextResult.context;
|
|
27
|
+
|
|
28
|
+
// Log token usage statistics
|
|
29
|
+
console.log(`Token usage - Context: ${contextResult.tokenCount}, Files: ${contextResult.includedFiles.length + contextResult.trimmedFiles.length}, Savings: ${contextResult.tokenSavings}`);
|
|
23
30
|
|
|
24
31
|
// lets first check legal before introducung any cost
|
|
32
|
+
const projectContext = new ProjectContext(this.projectDir);
|
|
25
33
|
const npmExtraJson = JSON.parse(
|
|
26
34
|
(await projectContext.gatherFiles()).smartfilesNpmextraJSON.contents.toString()
|
|
27
35
|
);
|
package/ts/classes.aidoc.ts
CHANGED
|
@@ -94,4 +94,37 @@ export class AiDoc {
|
|
|
94
94
|
const projectContextInstance = new aiDocsClasses.ProjectContext(projectDirArg);
|
|
95
95
|
return await projectContextInstance.gatherFiles();
|
|
96
96
|
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Get the context with token count information
|
|
100
|
+
* @param projectDirArg The path to the project directory
|
|
101
|
+
* @returns An object containing the context string and its token count
|
|
102
|
+
*/
|
|
103
|
+
public async getProjectContextWithTokenCount(projectDirArg: string) {
|
|
104
|
+
const projectContextInstance = new aiDocsClasses.ProjectContext(projectDirArg);
|
|
105
|
+
await projectContextInstance.update();
|
|
106
|
+
return projectContextInstance.getContextWithTokenCount();
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Get just the token count for a project's context
|
|
111
|
+
* @param projectDirArg The path to the project directory
|
|
112
|
+
* @returns The number of tokens in the project context
|
|
113
|
+
*/
|
|
114
|
+
public async getProjectContextTokenCount(projectDirArg: string) {
|
|
115
|
+
const projectContextInstance = new aiDocsClasses.ProjectContext(projectDirArg);
|
|
116
|
+
await projectContextInstance.update();
|
|
117
|
+
return projectContextInstance.getTokenCount();
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Count tokens in a text string using GPT tokenizer
|
|
122
|
+
* @param text The text to count tokens for
|
|
123
|
+
* @param model The model to use for tokenization (default: gpt-3.5-turbo)
|
|
124
|
+
* @returns The number of tokens in the text
|
|
125
|
+
*/
|
|
126
|
+
public countTokens(text: string, model: string = 'gpt-3.5-turbo'): number {
|
|
127
|
+
const projectContextInstance = new aiDocsClasses.ProjectContext('');
|
|
128
|
+
return projectContextInstance.countTokens(text, model);
|
|
129
|
+
}
|
|
97
130
|
}
|
package/ts/cli.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { logger } from './logging.js';
|
|
|
4
4
|
|
|
5
5
|
import { TypeDoc } from './classes.typedoc.js';
|
|
6
6
|
import { AiDoc } from './classes.aidoc.js';
|
|
7
|
+
import * as context from './context/index.js';
|
|
7
8
|
|
|
8
9
|
export const run = async () => {
|
|
9
10
|
const tsdocCli = new plugins.smartcli.Smartcli();
|
|
@@ -30,6 +31,18 @@ export const run = async () => {
|
|
|
30
31
|
tsdocCli.addCommand('aidoc').subscribe(async (argvArg) => {
|
|
31
32
|
const aidocInstance = new AiDoc();
|
|
32
33
|
await aidocInstance.start();
|
|
34
|
+
|
|
35
|
+
// Get context token count if requested
|
|
36
|
+
if (argvArg.tokens || argvArg.showTokens) {
|
|
37
|
+
logger.log('info', `Calculating context token count...`);
|
|
38
|
+
const tokenCount = await aidocInstance.getProjectContextTokenCount(paths.cwd);
|
|
39
|
+
logger.log('ok', `Total context token count: ${tokenCount}`);
|
|
40
|
+
|
|
41
|
+
if (argvArg.tokensOnly) {
|
|
42
|
+
return; // Exit early if we only want token count
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
33
46
|
logger.log('info', `Generating new readme...`);
|
|
34
47
|
logger.log('info', `This may take some time...`);
|
|
35
48
|
await aidocInstance.buildReadme(paths.cwd);
|
|
@@ -38,6 +51,121 @@ export const run = async () => {
|
|
|
38
51
|
await aidocInstance.buildDescription(paths.cwd);
|
|
39
52
|
});
|
|
40
53
|
|
|
54
|
+
tsdocCli.addCommand('tokens').subscribe(async (argvArg) => {
|
|
55
|
+
const aidocInstance = new AiDoc();
|
|
56
|
+
await aidocInstance.start();
|
|
57
|
+
|
|
58
|
+
logger.log('info', `Calculating context token count...`);
|
|
59
|
+
|
|
60
|
+
// Determine context mode based on args
|
|
61
|
+
let contextMode: context.ContextMode = 'full';
|
|
62
|
+
if (argvArg.trim || argvArg.trimmed) {
|
|
63
|
+
contextMode = 'trimmed';
|
|
64
|
+
} else if (argvArg.summarize || argvArg.summarized) {
|
|
65
|
+
contextMode = 'summarized';
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Get task type if specified
|
|
69
|
+
let taskType: context.TaskType | undefined = undefined;
|
|
70
|
+
if (argvArg.task) {
|
|
71
|
+
if (['readme', 'commit', 'description'].includes(argvArg.task)) {
|
|
72
|
+
taskType = argvArg.task as context.TaskType;
|
|
73
|
+
} else {
|
|
74
|
+
logger.log('warn', `Unknown task type: ${argvArg.task}. Using default context.`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Use enhanced context
|
|
79
|
+
const taskFactory = new context.TaskContextFactory(paths.cwd);
|
|
80
|
+
await taskFactory.initialize();
|
|
81
|
+
|
|
82
|
+
let contextResult: context.IContextResult;
|
|
83
|
+
|
|
84
|
+
if (argvArg.all) {
|
|
85
|
+
// Show stats for all task types
|
|
86
|
+
const stats = await taskFactory.getTokenStats();
|
|
87
|
+
|
|
88
|
+
logger.log('ok', 'Token statistics by task:');
|
|
89
|
+
for (const [task, data] of Object.entries(stats)) {
|
|
90
|
+
logger.log('info', `\n${task.toUpperCase()}:`);
|
|
91
|
+
logger.log('info', ` Tokens: ${data.tokenCount}`);
|
|
92
|
+
logger.log('info', ` Token savings: ${data.savings}`);
|
|
93
|
+
logger.log('info', ` Files: ${data.includedFiles} included, ${data.trimmedFiles} trimmed, ${data.excludedFiles} excluded`);
|
|
94
|
+
|
|
95
|
+
// Calculate percentage of model context
|
|
96
|
+
const o4MiniPercentage = (data.tokenCount / 200000 * 100).toFixed(2);
|
|
97
|
+
logger.log('info', ` Context usage: ${o4MiniPercentage}% of o4-mini (200K tokens)`);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (taskType) {
|
|
104
|
+
// Get context for specific task
|
|
105
|
+
contextResult = await taskFactory.createContextForTask(taskType);
|
|
106
|
+
} else {
|
|
107
|
+
// Get generic context with specified mode
|
|
108
|
+
const enhancedContext = new context.EnhancedContext(paths.cwd);
|
|
109
|
+
await enhancedContext.initialize();
|
|
110
|
+
enhancedContext.setContextMode(contextMode);
|
|
111
|
+
|
|
112
|
+
if (argvArg.maxTokens) {
|
|
113
|
+
enhancedContext.setTokenBudget(parseInt(argvArg.maxTokens, 10));
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
contextResult = await enhancedContext.buildContext();
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Display results
|
|
120
|
+
logger.log('ok', `Total context token count: ${contextResult.tokenCount}`);
|
|
121
|
+
logger.log('info', `Files included: ${contextResult.includedFiles.length}`);
|
|
122
|
+
logger.log('info', `Files trimmed: ${contextResult.trimmedFiles.length}`);
|
|
123
|
+
logger.log('info', `Files excluded: ${contextResult.excludedFiles.length}`);
|
|
124
|
+
logger.log('info', `Token savings: ${contextResult.tokenSavings}`);
|
|
125
|
+
|
|
126
|
+
if (argvArg.detailed) {
|
|
127
|
+
// Show more detailed info about the context and token usage
|
|
128
|
+
const o4MiniPercentage = (contextResult.tokenCount / 200000 * 100).toFixed(2);
|
|
129
|
+
logger.log('info', `Token usage: ${o4MiniPercentage}% of o4-mini 200K token context window`);
|
|
130
|
+
|
|
131
|
+
if (argvArg.model) {
|
|
132
|
+
// Show percentages for different models
|
|
133
|
+
if (argvArg.model === 'gpt4') {
|
|
134
|
+
const gpt4Percentage = (contextResult.tokenCount / 8192 * 100).toFixed(2);
|
|
135
|
+
logger.log('info', `Token usage (GPT-4): ${gpt4Percentage}% of 8192 token context window`);
|
|
136
|
+
} else if (argvArg.model === 'gpt35') {
|
|
137
|
+
const gpt35Percentage = (contextResult.tokenCount / 4096 * 100).toFixed(2);
|
|
138
|
+
logger.log('info', `Token usage (GPT-3.5): ${gpt35Percentage}% of 4096 token context window`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Estimate cost (approximate values)
|
|
143
|
+
const o4MiniInputCost = 0.00005; // per 1K tokens for o4-mini
|
|
144
|
+
const estimatedCost = (contextResult.tokenCount / 1000 * o4MiniInputCost).toFixed(6);
|
|
145
|
+
logger.log('info', `Estimated input cost: $${estimatedCost} (o4-mini)`);
|
|
146
|
+
|
|
147
|
+
if (argvArg.listFiles) {
|
|
148
|
+
// List files included in context
|
|
149
|
+
logger.log('info', '\nIncluded files:');
|
|
150
|
+
contextResult.includedFiles.forEach(file => {
|
|
151
|
+
logger.log('info', ` ${file.relativePath} (${file.tokenCount} tokens)`);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
logger.log('info', '\nTrimmed files:');
|
|
155
|
+
contextResult.trimmedFiles.forEach(file => {
|
|
156
|
+
logger.log('info', ` ${file.relativePath} (${file.tokenCount} tokens)`);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
if (contextResult.excludedFiles.length > 0) {
|
|
160
|
+
logger.log('info', '\nExcluded files:');
|
|
161
|
+
contextResult.excludedFiles.forEach(file => {
|
|
162
|
+
logger.log('info', ` ${file.relativePath} (${file.tokenCount} tokens)`);
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
|
|
41
169
|
tsdocCli.addCommand('test').subscribe((argvArg) => {
|
|
42
170
|
tsdocCli.triggerCommand('typedoc', argvArg);
|
|
43
171
|
process.on('exit', async () => {
|