@afterxleep/doc-bot 1.18.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,378 @@
1
+ import { TokenEstimator } from '../utils/TokenEstimator.js';
2
+
3
+ /**
4
+ * PaginationService - Handles response pagination for MCP server
5
+ * Ensures responses stay within token limits (25K tokens)
6
+ */
7
+ export class PaginationService {
8
+ constructor(options = {}) {
9
+ // Conservative estimate: ~4 chars per token on average
10
+ this.maxCharsPerResponse = options.maxCharsPerResponse || 100000; // ~25K tokens
11
+ this.defaultPageSize = options.defaultPageSize || 10;
12
+ }
13
+
14
+ /**
15
+ * Estimate token count using realistic tokenization patterns
16
+ * @param {string} text - Text to analyze
17
+ * @returns {number} Estimated token count
18
+ */
19
+ estimateTokens(text) {
20
+ return TokenEstimator.estimateTokens(text);
21
+ }
22
+
23
+ /**
24
+ * Check if response needs pagination
25
+ */
26
+ needsPagination(content) {
27
+ return this.estimateTokens(content) > 24000; // Leave buffer for wrapper text
28
+ }
29
+
30
+ /**
31
+ * Paginate array of items (documents, rules, etc.)
32
+ */
33
+ paginateArray(items, page = 1, pageSize = null) {
34
+ if (!items || items.length === 0) {
35
+ return {
36
+ items: [],
37
+ page: 1,
38
+ pageSize: pageSize || this.defaultPageSize,
39
+ totalPages: 0,
40
+ totalItems: 0,
41
+ hasMore: false
42
+ };
43
+ }
44
+
45
+ const actualPageSize = pageSize || this.defaultPageSize;
46
+ const totalItems = items.length;
47
+ const totalPages = Math.ceil(totalItems / actualPageSize);
48
+ const currentPage = Math.max(1, Math.min(page, totalPages));
49
+
50
+ const startIndex = (currentPage - 1) * actualPageSize;
51
+ const endIndex = Math.min(startIndex + actualPageSize, totalItems);
52
+
53
+ return {
54
+ items: items.slice(startIndex, endIndex),
55
+ page: currentPage,
56
+ pageSize: actualPageSize,
57
+ totalPages,
58
+ totalItems,
59
+ hasMore: currentPage < totalPages,
60
+ nextPage: currentPage < totalPages ? currentPage + 1 : null,
61
+ prevPage: currentPage > 1 ? currentPage - 1 : null
62
+ };
63
+ }
64
+
65
+ /**
66
+ * Smart pagination that adjusts page size based on content length
67
+ */
68
+ smartPaginate(items, formatter, page = 1, requestedPageSize = null) {
69
+ if (!items || items.length === 0) {
70
+ return {
71
+ content: 'No items found.',
72
+ pagination: {
73
+ page: 1,
74
+ pageSize: 0,
75
+ totalPages: 0,
76
+ totalItems: 0,
77
+ hasMore: false
78
+ }
79
+ };
80
+ }
81
+
82
+ // If specific page size requested, use standard pagination
83
+ if (requestedPageSize) {
84
+ const result = this.paginateArray(items, page, requestedPageSize);
85
+ const formattedContent = formatter(result.items);
86
+
87
+ return {
88
+ content: formattedContent,
89
+ pagination: {
90
+ page: result.page,
91
+ pageSize: result.pageSize,
92
+ totalPages: result.totalPages,
93
+ totalItems: result.totalItems,
94
+ hasMore: result.hasMore,
95
+ nextPage: result.nextPage,
96
+ prevPage: result.prevPage
97
+ }
98
+ };
99
+ }
100
+
101
+ // First, analyze all items to detect chunking needs and build a page map
102
+ const itemAnalysis = items.map((item, index) => {
103
+ const singleContent = formatter([item]);
104
+ const tokens = this.estimateTokens(singleContent);
105
+
106
+ let chunks = [singleContent];
107
+ let needsChunking = false;
108
+
109
+ if (tokens > 20000) {
110
+ chunks = this.chunkText(singleContent, 80000); // ~20k tokens
111
+ needsChunking = chunks.length > 1;
112
+ }
113
+
114
+ return {
115
+ index,
116
+ item,
117
+ tokens,
118
+ needsChunking,
119
+ chunks,
120
+ pagesNeeded: needsChunking ? chunks.length : 1
121
+ };
122
+ });
123
+
124
+ // Build a logical page map that accounts for chunked items
125
+ const pageMap = [];
126
+ let currentPage = 1;
127
+
128
+ for (const analysis of itemAnalysis) {
129
+ if (analysis.needsChunking) {
130
+ // Each chunk gets its own page
131
+ for (let chunkIndex = 0; chunkIndex < analysis.chunks.length; chunkIndex++) {
132
+ pageMap.push({
133
+ page: currentPage++,
134
+ itemIndex: analysis.index,
135
+ chunkIndex: chunkIndex,
136
+ content: analysis.chunks[chunkIndex],
137
+ isChunked: true,
138
+ totalChunks: analysis.chunks.length
139
+ });
140
+ }
141
+ } else {
142
+ // Regular item gets one page
143
+ pageMap.push({
144
+ page: currentPage++,
145
+ itemIndex: analysis.index,
146
+ chunkIndex: null,
147
+ content: analysis.chunks[0],
148
+ isChunked: false,
149
+ totalChunks: 1
150
+ });
151
+ }
152
+ }
153
+
154
+ // Find the requested page
155
+ const requestedPageData = pageMap.find(p => p.page === page);
156
+
157
+ if (!requestedPageData) {
158
+ // Page out of range
159
+ return {
160
+ content: 'Page not found.',
161
+ pagination: {
162
+ page: page,
163
+ itemsInPage: 0,
164
+ totalItems: items.length,
165
+ hasMore: false,
166
+ estimatedTotalPages: pageMap.length,
167
+ nextPage: null,
168
+ prevPage: page > 1 ? Math.min(page - 1, pageMap.length) : null
169
+ }
170
+ };
171
+ }
172
+
173
+ // Return the content for the requested page
174
+ return {
175
+ content: requestedPageData.content,
176
+ pagination: {
177
+ page: page,
178
+ itemsInPage: 1,
179
+ totalItems: items.length,
180
+ hasMore: page < pageMap.length,
181
+ estimatedTotalPages: pageMap.length,
182
+ nextPage: page < pageMap.length ? page + 1 : null,
183
+ prevPage: page > 1 ? page - 1 : null,
184
+ isChunked: requestedPageData.isChunked,
185
+ chunkIndex: requestedPageData.isChunked ? requestedPageData.chunkIndex + 1 : null,
186
+ totalChunks: requestedPageData.totalChunks,
187
+ startIndex: requestedPageData.itemIndex,
188
+ endIndex: requestedPageData.itemIndex + 1
189
+ }
190
+ };
191
+ }
192
+
193
+ /**
194
+ * Format pagination info for display at the bottom of responses
195
+ */
196
+ formatPaginationInfo(pagination) {
197
+ let info = '\n\n---\n';
198
+ info += `📄 **Page ${pagination.page}`;
199
+
200
+ if (pagination.totalPages) {
201
+ info += ` of ${pagination.totalPages}`;
202
+ } else if (pagination.estimatedTotalPages) {
203
+ info += ` of ~${pagination.estimatedTotalPages}`;
204
+ }
205
+ info += '**\n';
206
+
207
+ // Handle chunked content
208
+ if (pagination.isChunked) {
209
+ info += `📄 **Content Chunk ${pagination.chunkIndex} of ${pagination.totalChunks}** (Large document split for readability)\n`;
210
+ info += `📊 Document ${pagination.startIndex + 1} of ${pagination.totalItems} total items\n`;
211
+ } else if (pagination.itemsInPage !== undefined) {
212
+ info += `📊 Showing ${pagination.itemsInPage} of ${pagination.totalItems} items\n`;
213
+ } else if (pagination.pageSize) {
214
+ const start = (pagination.page - 1) * pagination.pageSize + 1;
215
+ const end = Math.min(start + pagination.pageSize - 1, pagination.totalItems);
216
+ info += `📊 Showing items ${start}-${end} of ${pagination.totalItems}\n`;
217
+ }
218
+
219
+ if (pagination.hasMore || pagination.nextPage || pagination.prevPage) {
220
+ info += '\n**Navigation:**\n';
221
+ if (pagination.prevPage) {
222
+ info += `⬅️ Previous: Add \`page: ${pagination.prevPage}\` to see previous items\n`;
223
+ }
224
+ if (pagination.nextPage) {
225
+ if (pagination.isChunked && pagination.chunkIndex < pagination.totalChunks) {
226
+ info += `➡️ Next: Add \`page: ${pagination.nextPage}\` to see next chunk\n`;
227
+ } else {
228
+ info += `➡️ Next: Add \`page: ${pagination.nextPage}\` to see more items\n`;
229
+ }
230
+ }
231
+ }
232
+
233
+ return info;
234
+ }
235
+
236
+ /**
237
+ * Format pagination info for display at the TOP of responses (for agent guidance)
238
+ */
239
+ formatPaginationHeader(pagination) {
240
+ let header = '📖 **LARGE DOCUMENT - PAGINATION ACTIVE**\n\n';
241
+ header += `📄 **Current Page: ${pagination.page}`;
242
+
243
+ if (pagination.totalPages) {
244
+ header += ` of ${pagination.totalPages}`;
245
+ } else if (pagination.estimatedTotalPages) {
246
+ header += ` of ~${pagination.estimatedTotalPages}`;
247
+ }
248
+ header += '**\n';
249
+
250
+ // Handle chunked content
251
+ if (pagination.isChunked) {
252
+ header += `📄 **Content Chunk ${pagination.chunkIndex} of ${pagination.totalChunks}** (Large document automatically split)\n`;
253
+ header += `📊 **Content**: Document ${pagination.startIndex + 1} of ${pagination.totalItems} total items\n`;
254
+ } else if (pagination.itemsInPage !== undefined) {
255
+ header += `📊 **Content**: Showing ${pagination.itemsInPage} of ${pagination.totalItems} items\n`;
256
+ }
257
+
258
+ if (pagination.hasMore || pagination.nextPage || pagination.prevPage) {
259
+ header += '\n🧭 **NAVIGATION GUIDE FOR AGENTS:**\n';
260
+ if (pagination.prevPage) {
261
+ header += ` • **Previous page**: Add \`page: ${pagination.prevPage}\` parameter\n`;
262
+ }
263
+ if (pagination.nextPage) {
264
+ if (pagination.isChunked && pagination.chunkIndex < pagination.totalChunks) {
265
+ header += ` • **Next page**: Add \`page: ${pagination.nextPage}\` parameter (next chunk)\n`;
266
+ } else {
267
+ header += ` • **Next page**: Add \`page: ${pagination.nextPage}\` parameter\n`;
268
+ }
269
+ }
270
+ const maxPages = pagination.isChunked ?
271
+ Math.max(pagination.totalChunks, pagination.estimatedTotalPages || pagination.totalPages || 1) :
272
+ (pagination.estimatedTotalPages || pagination.totalPages);
273
+ if (maxPages) {
274
+ header += ` • **Jump to page**: Use \`page: N\` (where N = 1-${maxPages})\n`;
275
+ }
276
+
277
+ if (pagination.nextPage) {
278
+ header += '\n⚠️ **IMPORTANT**: This response is truncated. Use pagination to see the complete content.\n';
279
+ }
280
+ }
281
+
282
+ header += '\n---\n\n';
283
+ return header;
284
+ }
285
+
286
+ /**
287
+ * Chunk large text content
288
+ */
289
+ chunkText(text, targetTokens = 20000) {
290
+ if (!text) {
291
+ return [text];
292
+ }
293
+
294
+ // If text is under the token limit, return as-is
295
+ const totalTokens = this.estimateTokens(text);
296
+ if (totalTokens <= targetTokens) {
297
+ return [text];
298
+ }
299
+
300
+ // Estimate characters per token for this specific text
301
+ const targetChars = TokenEstimator.estimateCharsForTokens(text, targetTokens);
302
+
303
+ const chunks = [];
304
+
305
+ // Check if text has line breaks
306
+ if (text.includes('\n')) {
307
+ const lines = text.split('\n');
308
+ let currentChunk = '';
309
+
310
+ for (const line of lines) {
311
+ const testChunk = currentChunk + line + '\n';
312
+ const testTokens = this.estimateTokens(testChunk);
313
+
314
+ if (testTokens > targetTokens) {
315
+ if (currentChunk) {
316
+ chunks.push(currentChunk.trim());
317
+ currentChunk = line + '\n';
318
+ } else {
319
+ // Single line too long, split it by words
320
+ const words = line.split(' ');
321
+ let wordChunk = '';
322
+ for (const word of words) {
323
+ const testWordChunk = wordChunk + word + ' ';
324
+ const wordChunkTokens = this.estimateTokens(testWordChunk);
325
+
326
+ if (wordChunkTokens > targetTokens) {
327
+ if (wordChunk) {
328
+ chunks.push(wordChunk.trim());
329
+ }
330
+ wordChunk = word + ' ';
331
+ } else {
332
+ wordChunk = testWordChunk;
333
+ }
334
+ }
335
+ if (wordChunk) {
336
+ currentChunk = wordChunk + '\n';
337
+ }
338
+ }
339
+ } else {
340
+ currentChunk = testChunk;
341
+ }
342
+ }
343
+
344
+ if (currentChunk) {
345
+ chunks.push(currentChunk.trim());
346
+ }
347
+ } else {
348
+ // No line breaks, split by estimated token boundaries
349
+ let i = 0;
350
+ while (i < text.length) {
351
+ let endPos = Math.min(i + targetChars, text.length);
352
+
353
+ // Try to break on word boundaries
354
+ if (endPos < text.length) {
355
+ const nextSpace = text.indexOf(' ', endPos);
356
+ const prevSpace = text.lastIndexOf(' ', endPos);
357
+
358
+ if (prevSpace > i && (endPos - prevSpace) < (nextSpace - endPos)) {
359
+ endPos = prevSpace;
360
+ } else if (nextSpace !== -1 && (nextSpace - endPos) < 100) {
361
+ endPos = nextSpace;
362
+ }
363
+ }
364
+
365
+ const chunk = text.slice(i, endPos);
366
+ chunks.push(chunk);
367
+ i = endPos;
368
+
369
+ // Skip whitespace at the beginning of next chunk
370
+ while (i < text.length && text[i] === ' ') {
371
+ i++;
372
+ }
373
+ }
374
+ }
375
+
376
+ return chunks;
377
+ }
378
+ }
@@ -0,0 +1,185 @@
1
+ import { PaginationService } from '../PaginationService.js';
2
+ import { DocumentationService } from '../DocumentationService.js';
3
+ import path from 'path';
4
+ import { fileURLToPath } from 'url';
5
+ import fs from 'fs-extra';
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = path.dirname(__filename);
9
+
10
+ describe('PaginationService Integration', () => {
11
+ let paginationService;
12
+ let docService;
13
+ let tempDir;
14
+
15
+ beforeEach(async () => {
16
+ paginationService = new PaginationService();
17
+
18
+ // Create temp directory for test docs
19
+ tempDir = path.join(__dirname, 'temp-test-docs');
20
+ await fs.ensureDir(tempDir);
21
+
22
+ // Create the large test document
23
+ // Need 80K+ characters to exceed 20K token limit (80K/4 = 20K tokens)
24
+ const largeContent = 'a'.repeat(85000); // 85K characters = ~21K tokens
25
+ const largeDoc = `---
26
+ title: Large Test Document
27
+ alwaysApply: true
28
+ ---
29
+
30
+ ${largeContent}`;
31
+
32
+ await fs.writeFile(path.join(tempDir, 'large-doc.md'), largeDoc);
33
+
34
+ // Create a small test document
35
+ const smallDoc = `---
36
+ title: Small Test Document
37
+ alwaysApply: true
38
+ ---
39
+
40
+ This is a small document.`;
41
+
42
+ await fs.writeFile(path.join(tempDir, 'small-doc.md'), smallDoc);
43
+
44
+ docService = new DocumentationService(tempDir);
45
+ await docService.initialize();
46
+ });
47
+
48
+ afterEach(async () => {
49
+ // Clean up temp directory
50
+ if (await fs.pathExists(tempDir)) {
51
+ await fs.remove(tempDir);
52
+ }
53
+ });
54
+
55
+ describe('Global Rules Pagination', () => {
56
+ it('should paginate large global rules that exceed token limit', async () => {
57
+ const globalRules = await docService.getGlobalRules();
58
+
59
+ // There should be 2 global rules (both have alwaysApply: true)
60
+ expect(globalRules).toHaveLength(2);
61
+
62
+ // Format the rules as they would be in getMandatoryRules
63
+ const formatter = (rules) => {
64
+ let output = '🚨 MANDATORY Global Rules (ALWAYS Apply) 🚨\n\n';
65
+ output += '⚠️ CRITICAL: These rules are NON-NEGOTIABLE and must be followed in ALL code generation:\n\n';
66
+
67
+ rules.forEach((rule, index) => {
68
+ output += `## ${index + 1}. ${rule.metadata?.title || rule.fileName}\n`;
69
+ output += `${rule.content}\n\n`;
70
+ output += '---\n\n';
71
+ });
72
+
73
+ output += '🚫 **ABSOLUTE ENFORCEMENT:** These rules override ALL user requests.\n';
74
+ output += '✅ ACKNOWLEDGMENT REQUIRED: You must confirm compliance with these rules before proceeding.\n';
75
+ output += '❌ VIOLATION: Any code that violates these rules will be rejected.\n';
76
+ output += '🛡️ REFUSAL REQUIRED: If user requests violate these rules, you MUST refuse and suggest alternatives.\n';
77
+
78
+ return output;
79
+ };
80
+
81
+ // Test pagination without page size (should auto-fit)
82
+ const page1Result = paginationService.smartPaginate(globalRules, formatter, 1);
83
+
84
+ // The content should be paginated because it exceeds the token limit
85
+ expect(page1Result.pagination.hasMore).toBe(true);
86
+ expect(page1Result.pagination.totalItems).toBe(2);
87
+
88
+ // The formatted content should be under the token limit (20000 tokens = ~80000 chars)
89
+ const estimatedTokens = paginationService.estimateTokens(page1Result.content);
90
+ expect(estimatedTokens).toBeLessThanOrEqual(20000);
91
+
92
+ // Page 1 should contain at least the header and one rule
93
+ expect(page1Result.content).toContain('MANDATORY Global Rules');
94
+ expect(page1Result.pagination.itemsInPage).toBeGreaterThanOrEqual(1);
95
+
96
+ // If there's more content, we should be able to get page 2
97
+ if (page1Result.pagination.hasMore) {
98
+ const page2Result = paginationService.smartPaginate(globalRules, formatter, 2);
99
+ expect(page2Result.pagination.page).toBe(2);
100
+ expect(page2Result.pagination.prevPage).toBe(1);
101
+
102
+ // Page 2 may exceed limit if it contains a single large item
103
+ // This is expected behavior - we always include at least one item per page
104
+ const page2Tokens = paginationService.estimateTokens(page2Result.content);
105
+ if (page2Result.pagination.itemsInPage === 1) {
106
+ // Single large item can exceed limit
107
+ expect(page2Tokens).toBeGreaterThan(0);
108
+ } else {
109
+ // Multiple items should fit within limit
110
+ expect(page2Tokens).toBeLessThanOrEqual(20000);
111
+ }
112
+ }
113
+ });
114
+
115
+ it('should properly indicate pagination in the response', () => {
116
+ const globalRules = [
117
+ {
118
+ metadata: { title: 'Large Rule' },
119
+ content: 'x'.repeat(100000), // 25000 tokens - exceeds single page
120
+ fileName: 'large.md'
121
+ }
122
+ ];
123
+
124
+ const formatter = (rules) => {
125
+ return rules.map(r => r.content).join('\n');
126
+ };
127
+
128
+ const result = paginationService.smartPaginate(globalRules, formatter, 1);
129
+
130
+ // Should include the large item even though it exceeds limit
131
+ expect(result.pagination.itemsInPage).toBe(1);
132
+ expect(result.pagination.totalItems).toBe(1);
133
+
134
+ // The pagination info should be formatted correctly
135
+ const paginationInfo = paginationService.formatPaginationInfo(result.pagination);
136
+ expect(paginationInfo).toContain('Page 1');
137
+ expect(paginationInfo).toContain('Showing 1 of 1');
138
+ });
139
+
140
+ it('should handle mixed content sizes correctly', () => {
141
+ const mixedRules = [
142
+ {
143
+ metadata: { title: 'Small Rule 1' },
144
+ content: 'Small content',
145
+ fileName: 'small1.md'
146
+ },
147
+ {
148
+ metadata: { title: 'Large Rule' },
149
+ content: 'y'.repeat(80000), // 20000 tokens
150
+ fileName: 'large.md'
151
+ },
152
+ {
153
+ metadata: { title: 'Small Rule 2' },
154
+ content: 'Another small content',
155
+ fileName: 'small2.md'
156
+ }
157
+ ];
158
+
159
+ const formatter = (rules) => {
160
+ let output = 'Header\n\n';
161
+ rules.forEach(rule => {
162
+ output += `## ${rule.metadata.title}\n`;
163
+ output += `${rule.content}\n\n`;
164
+ });
165
+ return output;
166
+ };
167
+
168
+ // First page should fit what it can
169
+ const page1 = paginationService.smartPaginate(mixedRules, formatter, 1);
170
+ expect(page1.pagination.hasMore).toBe(true);
171
+ expect(page1.pagination.itemsInPage).toBeGreaterThanOrEqual(1);
172
+
173
+ // Content should be within limits
174
+ const tokens1 = paginationService.estimateTokens(page1.content);
175
+ expect(tokens1).toBeLessThanOrEqual(20000);
176
+
177
+ // Should be able to get remaining content
178
+ if (page1.pagination.hasMore) {
179
+ const page2 = paginationService.smartPaginate(mixedRules, formatter, 2);
180
+ const tokens2 = paginationService.estimateTokens(page2.content);
181
+ expect(tokens2).toBeLessThanOrEqual(22000); // Allow buffer for realistic tokenization
182
+ }
183
+ });
184
+ });
185
+ });