@afterxleep/doc-bot 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/package.json +7 -4
  2. package/src/__tests__/temp-docs-1756129972061/test.md +5 -0
  3. package/src/__tests__/temp-docs-1756129972071/test.md +5 -0
  4. package/src/__tests__/temp-docs-1756129972075/test.md +5 -0
  5. package/src/__tests__/temp-docs-1756129972077/test.md +5 -0
  6. package/src/__tests__/temp-docs-1756129972079/test.md +5 -0
  7. package/src/__tests__/temp-docs-1756130189361/test.md +5 -0
  8. package/src/__tests__/temp-docs-1756130189372/test.md +5 -0
  9. package/src/__tests__/temp-docs-1756130189375/test.md +5 -0
  10. package/src/__tests__/temp-docs-1756130189378/test.md +5 -0
  11. package/src/__tests__/temp-docs-1756130189379/test.md +5 -0
  12. package/src/__tests__/temp-docs-1756130271128/test.md +5 -0
  13. package/src/__tests__/temp-docs-1756130271139/test.md +5 -0
  14. package/src/__tests__/temp-docs-1756130271142/test.md +5 -0
  15. package/src/__tests__/temp-docs-1756130271145/test.md +5 -0
  16. package/src/__tests__/temp-docs-1756130271146/test.md +5 -0
  17. package/src/__tests__/temp-docs-1756130687030/test.md +5 -0
  18. package/src/__tests__/temp-docs-1756130687044/test.md +5 -0
  19. package/src/__tests__/temp-docs-1756130687048/test.md +5 -0
  20. package/src/__tests__/temp-docs-1756130687051/test.md +5 -0
  21. package/src/__tests__/temp-docs-1756130687053/test.md +5 -0
  22. package/src/__tests__/temp-docs-1756131694925/test.md +5 -0
  23. package/src/__tests__/temp-docs-1756131694937/test.md +5 -0
  24. package/src/__tests__/temp-docs-1756131694941/test.md +5 -0
  25. package/src/__tests__/temp-docs-1756131694944/test.md +5 -0
  26. package/src/__tests__/temp-docs-1756131694946/test.md +5 -0
  27. package/src/__tests__/temp-docs-1756133998710/test.md +5 -0
  28. package/src/__tests__/temp-docs-1756133998721/test.md +5 -0
  29. package/src/__tests__/temp-docs-1756133998724/test.md +5 -0
  30. package/src/__tests__/temp-docs-1756133998727/test.md +5 -0
  31. package/src/__tests__/temp-docs-1756133998729/test.md +5 -0
  32. package/src/__tests__/temp-docs-1756134345935/test.md +5 -0
  33. package/src/__tests__/temp-docs-1756134345948/test.md +5 -0
  34. package/src/__tests__/temp-docs-1756134345952/test.md +5 -0
  35. package/src/__tests__/temp-docs-1756134345954/test.md +5 -0
  36. package/src/__tests__/temp-docs-1756134345957/test.md +5 -0
  37. package/src/__tests__/temp-docsets-1756129972079/2e443167/Mock.docset/Contents/Info.plist +10 -0
  38. package/src/__tests__/temp-docsets-1756129972079/2e443167/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  39. package/src/__tests__/temp-docsets-1756129972079/Mock.docset/Contents/Info.plist +10 -0
  40. package/src/__tests__/temp-docsets-1756129972079/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  41. package/src/__tests__/temp-docsets-1756129972079/docsets.json +10 -0
  42. package/src/__tests__/temp-docsets-1756130189379/Mock.docset/Contents/Info.plist +10 -0
  43. package/src/__tests__/temp-docsets-1756130189379/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  44. package/src/__tests__/temp-docsets-1756130189379/a4934c14/Mock.docset/Contents/Info.plist +10 -0
  45. package/src/__tests__/temp-docsets-1756130189379/a4934c14/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  46. package/src/__tests__/temp-docsets-1756130189379/docsets.json +10 -0
  47. package/src/__tests__/temp-docsets-1756130271146/3f8acbb2/Mock.docset/Contents/Info.plist +10 -0
  48. package/src/__tests__/temp-docsets-1756130271146/3f8acbb2/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  49. package/src/__tests__/temp-docsets-1756130271146/Mock.docset/Contents/Info.plist +10 -0
  50. package/src/__tests__/temp-docsets-1756130271146/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  51. package/src/__tests__/temp-docsets-1756130271146/docsets.json +10 -0
  52. package/src/__tests__/temp-docsets-1756130687053/6810e6bd/Mock.docset/Contents/Info.plist +10 -0
  53. package/src/__tests__/temp-docsets-1756130687053/6810e6bd/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  54. package/src/__tests__/temp-docsets-1756130687053/Mock.docset/Contents/Info.plist +10 -0
  55. package/src/__tests__/temp-docsets-1756130687053/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  56. package/src/__tests__/temp-docsets-1756130687053/docsets.json +10 -0
  57. package/src/__tests__/temp-docsets-1756131694946/Mock.docset/Contents/Info.plist +10 -0
  58. package/src/__tests__/temp-docsets-1756131694946/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  59. package/src/__tests__/temp-docsets-1756131694946/dd703046/Mock.docset/Contents/Info.plist +10 -0
  60. package/src/__tests__/temp-docsets-1756131694946/dd703046/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  61. package/src/__tests__/temp-docsets-1756131694946/docsets.json +10 -0
  62. package/src/__tests__/temp-docsets-1756133998729/9e061136/Mock.docset/Contents/Info.plist +10 -0
  63. package/src/__tests__/temp-docsets-1756133998729/9e061136/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  64. package/src/__tests__/temp-docsets-1756133998729/Mock.docset/Contents/Info.plist +10 -0
  65. package/src/__tests__/temp-docsets-1756133998729/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  66. package/src/__tests__/temp-docsets-1756133998729/docsets.json +10 -0
  67. package/src/__tests__/temp-docsets-1756134345957/03e730af/Mock.docset/Contents/Info.plist +10 -0
  68. package/src/__tests__/temp-docsets-1756134345957/03e730af/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  69. package/src/__tests__/temp-docsets-1756134345957/Mock.docset/Contents/Info.plist +10 -0
  70. package/src/__tests__/temp-docsets-1756134345957/Mock.docset/Contents/Resources/docSet.dsidx +0 -0
  71. package/src/__tests__/temp-docsets-1756134345957/docsets.json +10 -0
  72. package/src/index.js +269 -63
  73. package/src/services/DocumentationService.js +26 -1
  74. package/src/services/PaginationService.js +378 -0
  75. package/src/services/__tests__/PaginationService.integration.test.js +185 -0
  76. package/src/services/__tests__/PaginationService.test.js +398 -0
  77. package/src/utils/TokenEstimator.js +134 -0
  78. package/prompts/file-docs.md +0 -69
  79. package/prompts/global-rules.md +0 -142
  80. package/prompts/mandatory-rules.md +0 -90
  81. package/prompts/search-results.md +0 -59
  82. package/prompts/system-prompt.md +0 -270
  83. package/src/__tests__/docset-integration.test.js +0 -146
  84. package/src/services/__tests__/DocumentationService.test.js +0 -318
  85. package/src/services/__tests__/UnifiedSearchService.test.js +0 -302
  86. package/src/services/docset/__tests__/EnhancedDocsetDatabase.test.js +0 -324
@@ -0,0 +1,398 @@
1
+ import { PaginationService } from '../PaginationService.js';
2
+
3
+ describe('PaginationService', () => {
4
+ let paginationService;
5
+
6
+ beforeEach(() => {
7
+ paginationService = new PaginationService();
8
+ });
9
+
10
+ describe('estimateTokens', () => {
11
+ it('should estimate tokens using realistic tokenization', () => {
12
+ expect(paginationService.estimateTokens('test')).toBe(2); // Realistic: single word + overhead
13
+ expect(paginationService.estimateTokens('a'.repeat(100))).toBeGreaterThan(25); // More realistic than 4:1 ratio
14
+ expect(paginationService.estimateTokens('a'.repeat(1000))).toBeGreaterThan(250); // More realistic than 4:1 ratio
15
+ });
16
+
17
+ it('should handle empty or null input', () => {
18
+ expect(paginationService.estimateTokens('')).toBe(0);
19
+ expect(paginationService.estimateTokens(null)).toBe(0);
20
+ expect(paginationService.estimateTokens(undefined)).toBe(0);
21
+ });
22
+ });
23
+
24
+ describe('needsPagination', () => {
25
+ it('should return true for content over 24000 tokens', () => {
26
+ const largeContent = 'a'.repeat(100000); // 25000 tokens
27
+ expect(paginationService.needsPagination(largeContent)).toBe(true);
28
+ });
29
+
30
+ it('should return false for content under 24000 tokens', () => {
31
+ const smallContent = 'a'.repeat(50000); // 12500 tokens
32
+ expect(paginationService.needsPagination(smallContent)).toBe(false);
33
+ });
34
+ });
35
+
36
+ describe('paginateArray', () => {
37
+ const testItems = Array.from({ length: 25 }, (_, i) => ({ id: i + 1, name: `Item ${i + 1}` }));
38
+
39
+ it('should paginate array with default page size', () => {
40
+ const result = paginationService.paginateArray(testItems, 1);
41
+
42
+ expect(result.items).toHaveLength(10);
43
+ expect(result.page).toBe(1);
44
+ expect(result.pageSize).toBe(10);
45
+ expect(result.totalPages).toBe(3);
46
+ expect(result.totalItems).toBe(25);
47
+ expect(result.hasMore).toBe(true);
48
+ expect(result.nextPage).toBe(2);
49
+ expect(result.prevPage).toBe(null);
50
+ });
51
+
52
+ it('should paginate array with custom page size', () => {
53
+ const result = paginationService.paginateArray(testItems, 1, 5);
54
+
55
+ expect(result.items).toHaveLength(5);
56
+ expect(result.pageSize).toBe(5);
57
+ expect(result.totalPages).toBe(5);
58
+ expect(result.items[0].id).toBe(1);
59
+ expect(result.items[4].id).toBe(5);
60
+ });
61
+
62
+ it('should handle page 2 correctly', () => {
63
+ const result = paginationService.paginateArray(testItems, 2, 10);
64
+
65
+ expect(result.items).toHaveLength(10);
66
+ expect(result.page).toBe(2);
67
+ expect(result.items[0].id).toBe(11);
68
+ expect(result.items[9].id).toBe(20);
69
+ expect(result.prevPage).toBe(1);
70
+ expect(result.nextPage).toBe(3);
71
+ });
72
+
73
+ it('should handle last page correctly', () => {
74
+ const result = paginationService.paginateArray(testItems, 3, 10);
75
+
76
+ expect(result.items).toHaveLength(5);
77
+ expect(result.page).toBe(3);
78
+ expect(result.hasMore).toBe(false);
79
+ expect(result.nextPage).toBe(null);
80
+ expect(result.prevPage).toBe(2);
81
+ });
82
+
83
+ it('should handle empty array', () => {
84
+ const result = paginationService.paginateArray([], 1, 10);
85
+
86
+ expect(result.items).toHaveLength(0);
87
+ expect(result.totalPages).toBe(0);
88
+ expect(result.totalItems).toBe(0);
89
+ expect(result.hasMore).toBe(false);
90
+ });
91
+
92
+ it('should handle out of bounds page number', () => {
93
+ const result = paginationService.paginateArray(testItems, 10, 10);
94
+
95
+ expect(result.page).toBe(3); // Should cap at max page
96
+ expect(result.items).toHaveLength(5);
97
+ });
98
+
99
+ it('should handle negative page number', () => {
100
+ const result = paginationService.paginateArray(testItems, -1, 10);
101
+
102
+ expect(result.page).toBe(1); // Should default to page 1
103
+ expect(result.items[0].id).toBe(1);
104
+ });
105
+ });
106
+
107
+ describe('smartPaginate', () => {
108
+ const createLargeItems = (count) => {
109
+ return Array.from({ length: count }, (_, i) => ({
110
+ id: i + 1,
111
+ content: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. '.repeat(100) // ~1400 chars each
112
+ }));
113
+ };
114
+
115
+ const formatter = (items) => {
116
+ return items.map(item => `Item ${item.id}: ${item.content}`).join('\n\n');
117
+ };
118
+
119
+ it('should auto-fit items within token limit when no page size specified', () => {
120
+ const items = createLargeItems(50);
121
+ const result = paginationService.smartPaginate(items, formatter, 1);
122
+
123
+ expect(result.pagination.totalItems).toBe(50);
124
+ expect(result.pagination.page).toBe(1);
125
+ expect(result.pagination.hasMore).toBe(true);
126
+
127
+ // Content should be under 20000 tokens
128
+ const estimatedTokens = paginationService.estimateTokens(result.content);
129
+ expect(estimatedTokens).toBeLessThanOrEqual(20000);
130
+ expect(estimatedTokens).toBeGreaterThan(0);
131
+ });
132
+
133
+ it('should use specified page size when provided', () => {
134
+ const items = createLargeItems(20);
135
+ const result = paginationService.smartPaginate(items, formatter, 1, 5);
136
+
137
+ expect(result.pagination.page).toBe(1);
138
+ expect(result.pagination.pageSize).toBe(5);
139
+ expect(result.pagination.totalPages).toBe(4);
140
+ expect(result.pagination.totalItems).toBe(20);
141
+ });
142
+
143
+ it('should handle page navigation in smart mode', () => {
144
+ const items = createLargeItems(100);
145
+ const page1 = paginationService.smartPaginate(items, formatter, 1);
146
+ const page2 = paginationService.smartPaginate(items, formatter, 2);
147
+
148
+ expect(page1.pagination.page).toBe(1);
149
+ expect(page1.pagination.nextPage).toBe(2);
150
+ expect(page1.pagination.prevPage).toBe(null);
151
+
152
+ expect(page2.pagination.page).toBe(2);
153
+ expect(page2.pagination.prevPage).toBe(1);
154
+ });
155
+
156
+ it('should handle empty items array', () => {
157
+ const result = paginationService.smartPaginate([], formatter, 1);
158
+
159
+ expect(result.content).toBe('No items found.');
160
+ expect(result.pagination.totalItems).toBe(0);
161
+ expect(result.pagination.hasMore).toBe(false);
162
+ });
163
+
164
+ it('should handle single small item', () => {
165
+ const items = [{ id: 1, content: 'Small content' }];
166
+ const result = paginationService.smartPaginate(items, formatter, 1);
167
+
168
+ expect(result.pagination.itemsInPage).toBe(1);
169
+ expect(result.pagination.totalItems).toBe(1);
170
+ expect(result.pagination.hasMore).toBe(false);
171
+ });
172
+ });
173
+
174
+ describe('formatPaginationInfo', () => {
175
+ it('should format pagination info with total pages', () => {
176
+ const pagination = {
177
+ page: 2,
178
+ totalPages: 5,
179
+ itemsInPage: 10,
180
+ totalItems: 50,
181
+ hasMore: true,
182
+ nextPage: 3,
183
+ prevPage: 1
184
+ };
185
+
186
+ const formatted = paginationService.formatPaginationInfo(pagination);
187
+
188
+ expect(formatted).toContain('Page 2 of 5');
189
+ expect(formatted).toContain('Showing 10 of 50 items');
190
+ expect(formatted).toContain('Previous: Add `page: 1`');
191
+ expect(formatted).toContain('Next: Add `page: 3`');
192
+ });
193
+
194
+ it('should format pagination info with estimated pages', () => {
195
+ const pagination = {
196
+ page: 1,
197
+ estimatedTotalPages: 3,
198
+ itemsInPage: 15,
199
+ totalItems: 45,
200
+ hasMore: true,
201
+ nextPage: 2,
202
+ prevPage: null
203
+ };
204
+
205
+ const formatted = paginationService.formatPaginationInfo(pagination);
206
+
207
+ expect(formatted).toContain('Page 1 of ~3');
208
+ expect(formatted).not.toContain('Previous:');
209
+ expect(formatted).toContain('Next: Add `page: 2`');
210
+ });
211
+
212
+ it('should format pagination info without navigation on last page', () => {
213
+ const pagination = {
214
+ page: 3,
215
+ totalPages: 3,
216
+ pageSize: 10,
217
+ totalItems: 25,
218
+ hasMore: false,
219
+ nextPage: null,
220
+ prevPage: 2
221
+ };
222
+
223
+ const formatted = paginationService.formatPaginationInfo(pagination);
224
+
225
+ expect(formatted).toContain('Page 3 of 3');
226
+ expect(formatted).toContain('Previous: Add `page: 2`');
227
+ expect(formatted).not.toContain('Next:');
228
+ });
229
+
230
+ it('should handle pageSize display correctly', () => {
231
+ const pagination = {
232
+ page: 2,
233
+ pageSize: 20,
234
+ totalPages: 3,
235
+ totalItems: 50,
236
+ hasMore: true,
237
+ nextPage: 3,
238
+ prevPage: 1
239
+ };
240
+
241
+ const formatted = paginationService.formatPaginationInfo(pagination);
242
+
243
+ expect(formatted).toContain('Showing items 21-40 of 50');
244
+ });
245
+ });
246
+
247
+ describe('chunkText', () => {
248
+ it('should not chunk text under token limit', () => {
249
+ const text = 'a'.repeat(50000); // 12500 tokens
250
+ const chunks = paginationService.chunkText(text);
251
+
252
+ expect(chunks).toHaveLength(1);
253
+ expect(chunks[0]).toBe(text);
254
+ });
255
+
256
+ it('should chunk large text into multiple parts', () => {
257
+ const text = 'a'.repeat(100000); // 25000 tokens
258
+ const chunks = paginationService.chunkText(text);
259
+
260
+ expect(chunks).toHaveLength(2);
261
+ chunks.forEach(chunk => {
262
+ const tokens = paginationService.estimateTokens(chunk);
263
+ expect(tokens).toBeLessThanOrEqual(20500); // Allow small buffer for realistic tokenization
264
+ });
265
+ });
266
+
267
+ it('should preserve line breaks when chunking', () => {
268
+ const lines = Array.from({ length: 1000 }, (_, i) => 'a'.repeat(100)).join('\n');
269
+ const chunks = paginationService.chunkText(lines);
270
+
271
+ expect(chunks.length).toBeGreaterThan(1);
272
+ chunks.forEach(chunk => {
273
+ // Each chunk should be properly formatted (allow trimming)
274
+ expect(chunk.length).toBeGreaterThan(0);
275
+ expect(typeof chunk).toBe('string');
276
+ });
277
+ });
278
+
279
+ it('should handle very long single lines', () => {
280
+ const longLine = 'word '.repeat(20000); // Single line with ~100000 chars
281
+ const chunks = paginationService.chunkText(longLine);
282
+
283
+ expect(chunks.length).toBeGreaterThan(1);
284
+ chunks.forEach(chunk => {
285
+ const tokens = paginationService.estimateTokens(chunk);
286
+ expect(tokens).toBeLessThanOrEqual(20500); // Allow small buffer for realistic tokenization
287
+ });
288
+ });
289
+
290
+ it('should handle empty text', () => {
291
+ const chunks = paginationService.chunkText('');
292
+ expect(chunks).toEqual(['']);
293
+ });
294
+
295
+ it('should handle null text', () => {
296
+ const chunks = paginationService.chunkText(null);
297
+ expect(chunks).toEqual([null]);
298
+ });
299
+
300
+ it('should respect target token parameter', () => {
301
+ const text = 'word '.repeat(50000); // More complex text that tokenizes differently
302
+ const chunks = paginationService.chunkText(text, 10000); // 10000 token target
303
+
304
+ expect(chunks.length).toBeGreaterThan(1); // Should need chunking
305
+ chunks.forEach(chunk => {
306
+ const tokens = paginationService.estimateTokens(chunk);
307
+ expect(tokens).toBeLessThanOrEqual(10500); // Allow small buffer
308
+ });
309
+ });
310
+ });
311
+
312
+ describe('Edge Cases', () => {
313
+ it('should handle mixed content sizes in smart pagination', () => {
314
+ const items = [
315
+ { id: 1, content: 'a'.repeat(50000) }, // Large item
316
+ { id: 2, content: 'b'.repeat(10) }, // Small item
317
+ { id: 3, content: 'c'.repeat(30000) }, // Medium item
318
+ ];
319
+
320
+ const formatter = (items) => items.map(i => i.content).join('');
321
+ const result = paginationService.smartPaginate(items, formatter, 1);
322
+
323
+ // Should include at least the first item, maybe more
324
+ expect(result.pagination.itemsInPage).toBeGreaterThanOrEqual(1);
325
+ const tokens = paginationService.estimateTokens(result.content);
326
+ expect(tokens).toBeLessThanOrEqual(20000);
327
+ });
328
+
329
+ it('should handle single item exceeding token limit', () => {
330
+ const items = [
331
+ { id: 1, content: 'x'.repeat(100000) } // 25000 tokens - exceeds limit
332
+ ];
333
+
334
+ const formatter = (items) => items.map(i => i.content).join('');
335
+ const result = paginationService.smartPaginate(items, formatter, 1);
336
+
337
+ // Should still include the item even if it exceeds limit
338
+ expect(result.pagination.itemsInPage).toBe(1);
339
+ expect(result.pagination.hasMore).toBe(false);
340
+ });
341
+
342
+ it('should handle unicode characters correctly', () => {
343
+ const text = '你好世界🌍'.repeat(1000);
344
+ const tokens = paginationService.estimateTokens(text);
345
+
346
+ // Unicode chars should still be counted
347
+ expect(tokens).toBeGreaterThan(0);
348
+
349
+ const chunks = paginationService.chunkText(text.repeat(10));
350
+ expect(chunks.length).toBeGreaterThanOrEqual(1);
351
+ });
352
+
353
+ it('should handle formatter that returns empty string', () => {
354
+ const items = [{ id: 1 }, { id: 2 }];
355
+ const formatter = () => '';
356
+
357
+ const result = paginationService.smartPaginate(items, formatter, 1);
358
+ expect(result.content).toBe('');
359
+ expect(result.pagination.itemsInPage).toBe(1); // New pagination logic: 1 item per page
360
+ });
361
+
362
+ it('should handle formatter that throws error gracefully', () => {
363
+ const items = [{ id: 1 }];
364
+ const formatter = () => {
365
+ throw new Error('Formatter error');
366
+ };
367
+
368
+ expect(() => {
369
+ paginationService.smartPaginate(items, formatter, 1);
370
+ }).toThrow('Formatter error');
371
+ });
372
+ });
373
+
374
+ describe('Performance', () => {
375
+ it('should handle very large arrays efficiently', () => {
376
+ const largeArray = Array.from({ length: 10000 }, (_, i) => ({ id: i }));
377
+
378
+ const startTime = Date.now();
379
+ const result = paginationService.paginateArray(largeArray, 1, 100);
380
+ const endTime = Date.now();
381
+
382
+ expect(result.items).toHaveLength(100);
383
+ expect(result.totalItems).toBe(10000);
384
+ expect(endTime - startTime).toBeLessThan(100); // Should be fast
385
+ });
386
+
387
+ it('should handle very large text efficiently', () => {
388
+ const largeText = 'a'.repeat(1000000); // 1M chars
389
+
390
+ const startTime = Date.now();
391
+ const chunks = paginationService.chunkText(largeText);
392
+ const endTime = Date.now();
393
+
394
+ expect(chunks.length).toBeGreaterThanOrEqual(12); // 1M chars / 80K = ~12.5 chunks
395
+ expect(endTime - startTime).toBeLessThan(500); // Should complete quickly
396
+ });
397
+ });
398
+ });
@@ -0,0 +1,134 @@
1
+ /**
2
+ * TokenEstimator - Realistic token counting for LLM content
3
+ *
4
+ * Implements proper tokenization estimation based on modern LLM patterns.
5
+ * Much more accurate than naive character-count approaches.
6
+ */
7
+ export class TokenEstimator {
8
+
9
+ /**
10
+ * Estimate token count using realistic tokenization patterns
11
+ * Based on GPT-style tokenization rules and observed patterns
12
+ *
13
+ * @param {string} text - Text to analyze
14
+ * @returns {number} Estimated token count
15
+ */
16
+ static estimateTokens(text) {
17
+ if (!text) return 0;
18
+
19
+ let tokens = 0;
20
+
21
+ // Split by whitespace and punctuation patterns
22
+ const words = text.split(/(\s+|[^\w\s])/);
23
+
24
+ for (const word of words) {
25
+ if (!word) continue;
26
+
27
+ // Whitespace is often merged with adjacent tokens
28
+ if (/^\s+$/.test(word)) {
29
+ continue; // Don't count pure whitespace as tokens
30
+ }
31
+
32
+ // Single punctuation marks are usually 1 token
33
+ if (/^[^\w\s]$/.test(word)) {
34
+ tokens += 1;
35
+ continue;
36
+ }
37
+
38
+ // Handle different word types
39
+ if (/^\w+$/.test(word)) {
40
+ // Regular words: estimate based on length and common patterns
41
+ if (word.length <= 3) {
42
+ tokens += 1; // Short words: 1 token
43
+ } else if (word.length <= 6) {
44
+ tokens += 1; // Medium words: usually 1 token
45
+ } else if (word.length <= 10) {
46
+ tokens += Math.ceil(word.length / 5); // Longer words: ~5 chars per token
47
+ } else {
48
+ // Very long words (often technical terms): ~4 chars per token
49
+ tokens += Math.ceil(word.length / 4);
50
+ }
51
+ } else {
52
+ // Mixed content (URLs, emails, code, etc.)
53
+ // These are often tokenized more aggressively
54
+ if (word.includes('://') || word.includes('@')) {
55
+ // URLs and emails: roughly 3-4 chars per token
56
+ tokens += Math.ceil(word.length / 3.5);
57
+ } else if (/[A-Z]{2,}/.test(word) || /\d+/.test(word)) {
58
+ // Acronyms and numbers: often 2-3 chars per token
59
+ tokens += Math.ceil(word.length / 2.5);
60
+ } else {
61
+ // Other mixed content: 4 chars per token
62
+ tokens += Math.ceil(word.length / 4);
63
+ }
64
+ }
65
+ }
66
+
67
+ // Account for special sequences that are tokenized differently
68
+ // Code blocks, markdown, JSON, etc. tend to have more tokens
69
+ const specialPatterns = [
70
+ /```[\s\S]*?```/g, // Code blocks
71
+ /`[^`]+`/g, // Inline code
72
+ /\[[^\]]*\]\([^)]*\)/g, // Markdown links
73
+ /\*\*[^*]+\*\*/g, // Bold text
74
+ /\*[^*]+\*/g, // Italic text
75
+ /{[^}]*}/g, // JSON-like structures
76
+ /\([^)]*\)/g, // Parenthetical content
77
+ ];
78
+
79
+ let specialTokens = 0;
80
+ for (const pattern of specialPatterns) {
81
+ const matches = text.match(pattern);
82
+ if (matches) {
83
+ for (const match of matches) {
84
+ // Special content has higher token density
85
+ specialTokens += Math.ceil(match.length / 3);
86
+ }
87
+ }
88
+ }
89
+
90
+ // Use the higher of the two estimates (word-based vs special-pattern-based)
91
+ // This accounts for content that's heavily formatted vs plain text
92
+ const wordBasedEstimate = tokens;
93
+ const specialContentRatio = specialTokens / Math.max(1, text.length);
94
+
95
+ if (specialContentRatio > 0.1) {
96
+ // High special content - use pattern-based estimate with adjustment
97
+ tokens = Math.max(wordBasedEstimate, Math.ceil(text.length / 3.2));
98
+ } else {
99
+ // Regular content - use word-based estimate
100
+ tokens = wordBasedEstimate;
101
+ }
102
+
103
+ // Add buffer for control tokens, formatting, etc. (5-10% overhead)
104
+ tokens = Math.ceil(tokens * 1.08);
105
+
106
+ return tokens;
107
+ }
108
+
109
+ /**
110
+ * Get the average characters per token for specific text
111
+ * Useful for chunking operations
112
+ */
113
+ static getAvgCharsPerToken(text) {
114
+ if (!text) return 4; // Fallback
115
+ const tokens = this.estimateTokens(text);
116
+ return tokens > 0 ? text.length / tokens : 4;
117
+ }
118
+
119
+ /**
120
+ * Check if text exceeds a token limit
121
+ */
122
+ static exceedsLimit(text, maxTokens) {
123
+ return this.estimateTokens(text) > maxTokens;
124
+ }
125
+
126
+ /**
127
+ * Estimate how many characters would fit within a token budget
128
+ * for a given text style
129
+ */
130
+ static estimateCharsForTokens(sampleText, targetTokens) {
131
+ const avgCharsPerToken = this.getAvgCharsPerToken(sampleText);
132
+ return Math.floor(targetTokens * avgCharsPerToken);
133
+ }
134
+ }
@@ -1,69 +0,0 @@
1
- # FILE CONTEXT DOCUMENTATION
2
-
3
- **File**: `${filePath}`
4
-
5
- ## CONTEXTUAL STANDARDS
6
-
7
- ${docsContent}
8
-
9
- ## SMART IMPLEMENTATION CHECKLIST
10
-
11
- ### Quick Analysis (< 10 seconds)
12
- 1. **Scan Patterns** - What patterns does this file use?
13
- 2. **Check Dependencies** - What does it import?
14
- 3. **Note Conventions** - Naming, structure, style
15
-
16
- ### Implementation Rules
17
- 1. **Match Style** - Follow the file's existing patterns
18
- 2. **Preserve Logic** - Don't break existing functionality
19
- 3. **Minimal Changes** - Small, focused modifications
20
-
21
- ## DECISION FRAMEWORK
22
-
23
- ### Need More Context?
24
-
25
- ```javascript
26
- if (changeIsSimple) {
27
- // Just make the change
28
- applyDirectFix();
29
- } else if (needsPatternContext) {
30
- // One quick search
31
- search_documentation(specificPattern);
32
- } else {
33
- // Use file's existing patterns
34
- followLocalConventions();
35
- }
36
- ```
37
-
38
- ## PERFORMANCE CONSIDERATIONS
39
-
40
- **For Hot Paths:**
41
- - Keep complexity at current level or better
42
- - Don't introduce blocking operations
43
- - Maintain existing optimizations
44
-
45
- **For Regular Code:**
46
- - Prioritize readability
47
- - Follow SOLID principles
48
- - Keep it simple
49
-
50
- ## VALIDATION
51
-
52
- Before committing:
53
- - ✓ Follows file conventions
54
- - ✓ Maintains contracts
55
- - ✓ Preserves performance
56
- - ✓ Doesn't break tests
57
-
58
- ## QUICK WINS
59
-
60
- **Common tasks that need NO documentation search:**
61
- - Adding logging/debugging
62
- - Fixing typos or syntax
63
- - Adding comments
64
- - Simple refactoring
65
- - Error handling with try/catch
66
-
67
- ## THE LOCAL PATTERN RULE
68
-
69
- When in doubt, copy what the file already does. Local consistency > global perfection.