agents-reverse-engineer 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +238 -0
  3. package/dist/change-detection/detector.d.ts +24 -0
  4. package/dist/change-detection/detector.d.ts.map +1 -0
  5. package/dist/change-detection/detector.js +114 -0
  6. package/dist/change-detection/detector.js.map +1 -0
  7. package/dist/change-detection/index.d.ts +9 -0
  8. package/dist/change-detection/index.d.ts.map +1 -0
  9. package/dist/change-detection/index.js +8 -0
  10. package/dist/change-detection/index.js.map +1 -0
  11. package/dist/change-detection/types.d.ts +39 -0
  12. package/dist/change-detection/types.d.ts.map +1 -0
  13. package/dist/change-detection/types.js +5 -0
  14. package/dist/change-detection/types.js.map +1 -0
  15. package/dist/cli/discover.d.ts +52 -0
  16. package/dist/cli/discover.d.ts.map +1 -0
  17. package/dist/cli/discover.js +125 -0
  18. package/dist/cli/discover.js.map +1 -0
  19. package/dist/cli/generate.d.ts +41 -0
  20. package/dist/cli/generate.d.ts.map +1 -0
  21. package/dist/cli/generate.js +179 -0
  22. package/dist/cli/generate.js.map +1 -0
  23. package/dist/cli/index.d.ts +12 -0
  24. package/dist/cli/index.d.ts.map +1 -0
  25. package/dist/cli/index.js +182 -0
  26. package/dist/cli/index.js.map +1 -0
  27. package/dist/cli/init.d.ts +38 -0
  28. package/dist/cli/init.d.ts.map +1 -0
  29. package/dist/cli/init.js +94 -0
  30. package/dist/cli/init.js.map +1 -0
  31. package/dist/cli/update.d.ts +28 -0
  32. package/dist/cli/update.d.ts.map +1 -0
  33. package/dist/cli/update.js +296 -0
  34. package/dist/cli/update.js.map +1 -0
  35. package/dist/config/defaults.d.ts +38 -0
  36. package/dist/config/defaults.d.ts.map +1 -0
  37. package/dist/config/defaults.js +89 -0
  38. package/dist/config/defaults.js.map +1 -0
  39. package/dist/config/loader.d.ts +66 -0
  40. package/dist/config/loader.d.ts.map +1 -0
  41. package/dist/config/loader.js +158 -0
  42. package/dist/config/loader.js.map +1 -0
  43. package/dist/config/schema.d.ts +235 -0
  44. package/dist/config/schema.d.ts.map +1 -0
  45. package/dist/config/schema.js +80 -0
  46. package/dist/config/schema.js.map +1 -0
  47. package/dist/discovery/filters/binary.d.ts +46 -0
  48. package/dist/discovery/filters/binary.d.ts.map +1 -0
  49. package/dist/discovery/filters/binary.js +157 -0
  50. package/dist/discovery/filters/binary.js.map +1 -0
  51. package/dist/discovery/filters/custom.d.ts +26 -0
  52. package/dist/discovery/filters/custom.d.ts.map +1 -0
  53. package/dist/discovery/filters/custom.js +50 -0
  54. package/dist/discovery/filters/custom.js.map +1 -0
  55. package/dist/discovery/filters/gitignore.d.ts +24 -0
  56. package/dist/discovery/filters/gitignore.d.ts.map +1 -0
  57. package/dist/discovery/filters/gitignore.js +53 -0
  58. package/dist/discovery/filters/gitignore.js.map +1 -0
  59. package/dist/discovery/filters/index.d.ts +85 -0
  60. package/dist/discovery/filters/index.d.ts.map +1 -0
  61. package/dist/discovery/filters/index.js +98 -0
  62. package/dist/discovery/filters/index.js.map +1 -0
  63. package/dist/discovery/filters/vendor.d.ts +30 -0
  64. package/dist/discovery/filters/vendor.d.ts.map +1 -0
  65. package/dist/discovery/filters/vendor.js +57 -0
  66. package/dist/discovery/filters/vendor.js.map +1 -0
  67. package/dist/discovery/types.d.ts +66 -0
  68. package/dist/discovery/types.d.ts.map +1 -0
  69. package/dist/discovery/types.js +8 -0
  70. package/dist/discovery/types.js.map +1 -0
  71. package/dist/discovery/walker.d.ts +24 -0
  72. package/dist/discovery/walker.d.ts.map +1 -0
  73. package/dist/discovery/walker.js +35 -0
  74. package/dist/discovery/walker.js.map +1 -0
  75. package/dist/generation/budget/chunker.d.ts +38 -0
  76. package/dist/generation/budget/chunker.d.ts.map +1 -0
  77. package/dist/generation/budget/chunker.js +73 -0
  78. package/dist/generation/budget/chunker.js.map +1 -0
  79. package/dist/generation/budget/counter.d.ts +26 -0
  80. package/dist/generation/budget/counter.d.ts.map +1 -0
  81. package/dist/generation/budget/counter.js +45 -0
  82. package/dist/generation/budget/counter.js.map +1 -0
  83. package/dist/generation/budget/index.d.ts +4 -0
  84. package/dist/generation/budget/index.d.ts.map +1 -0
  85. package/dist/generation/budget/index.js +4 -0
  86. package/dist/generation/budget/index.js.map +1 -0
  87. package/dist/generation/budget/tracker.d.ts +63 -0
  88. package/dist/generation/budget/tracker.d.ts.map +1 -0
  89. package/dist/generation/budget/tracker.js +96 -0
  90. package/dist/generation/budget/tracker.js.map +1 -0
  91. package/dist/generation/complexity.d.ts +43 -0
  92. package/dist/generation/complexity.d.ts.map +1 -0
  93. package/dist/generation/complexity.js +156 -0
  94. package/dist/generation/complexity.js.map +1 -0
  95. package/dist/generation/detection/detector.d.ts +23 -0
  96. package/dist/generation/detection/detector.d.ts.map +1 -0
  97. package/dist/generation/detection/detector.js +62 -0
  98. package/dist/generation/detection/detector.js.map +1 -0
  99. package/dist/generation/detection/patterns.d.ts +21 -0
  100. package/dist/generation/detection/patterns.d.ts.map +1 -0
  101. package/dist/generation/detection/patterns.js +115 -0
  102. package/dist/generation/detection/patterns.js.map +1 -0
  103. package/dist/generation/executor.d.ts +95 -0
  104. package/dist/generation/executor.d.ts.map +1 -0
  105. package/dist/generation/executor.js +352 -0
  106. package/dist/generation/executor.js.map +1 -0
  107. package/dist/generation/orchestrator.d.ts +126 -0
  108. package/dist/generation/orchestrator.d.ts.map +1 -0
  109. package/dist/generation/orchestrator.js +222 -0
  110. package/dist/generation/orchestrator.js.map +1 -0
  111. package/dist/generation/prompts/builder.d.ts +31 -0
  112. package/dist/generation/prompts/builder.d.ts.map +1 -0
  113. package/dist/generation/prompts/builder.js +136 -0
  114. package/dist/generation/prompts/builder.js.map +1 -0
  115. package/dist/generation/prompts/index.d.ts +5 -0
  116. package/dist/generation/prompts/index.d.ts.map +1 -0
  117. package/dist/generation/prompts/index.js +4 -0
  118. package/dist/generation/prompts/index.js.map +1 -0
  119. package/dist/generation/prompts/templates.d.ts +11 -0
  120. package/dist/generation/prompts/templates.d.ts.map +1 -0
  121. package/dist/generation/prompts/templates.js +247 -0
  122. package/dist/generation/prompts/templates.js.map +1 -0
  123. package/dist/generation/prompts/types.d.ts +71 -0
  124. package/dist/generation/prompts/types.d.ts.map +1 -0
  125. package/dist/generation/prompts/types.js +23 -0
  126. package/dist/generation/prompts/types.js.map +1 -0
  127. package/dist/generation/types.d.ts +72 -0
  128. package/dist/generation/types.d.ts.map +1 -0
  129. package/dist/generation/types.js +5 -0
  130. package/dist/generation/types.js.map +1 -0
  131. package/dist/generation/writers/agents-md.d.ts +63 -0
  132. package/dist/generation/writers/agents-md.d.ts.map +1 -0
  133. package/dist/generation/writers/agents-md.js +235 -0
  134. package/dist/generation/writers/agents-md.js.map +1 -0
  135. package/dist/generation/writers/claude-md.d.ts +13 -0
  136. package/dist/generation/writers/claude-md.d.ts.map +1 -0
  137. package/dist/generation/writers/claude-md.js +33 -0
  138. package/dist/generation/writers/claude-md.js.map +1 -0
  139. package/dist/generation/writers/index.d.ts +5 -0
  140. package/dist/generation/writers/index.d.ts.map +1 -0
  141. package/dist/generation/writers/index.js +5 -0
  142. package/dist/generation/writers/index.js.map +1 -0
  143. package/dist/generation/writers/sum.d.ts +37 -0
  144. package/dist/generation/writers/sum.d.ts.map +1 -0
  145. package/dist/generation/writers/sum.js +98 -0
  146. package/dist/generation/writers/sum.js.map +1 -0
  147. package/dist/generation/writers/supplementary.d.ts +53 -0
  148. package/dist/generation/writers/supplementary.d.ts.map +1 -0
  149. package/dist/generation/writers/supplementary.js +195 -0
  150. package/dist/generation/writers/supplementary.js.map +1 -0
  151. package/dist/integration/detect.d.ts +28 -0
  152. package/dist/integration/detect.d.ts.map +1 -0
  153. package/dist/integration/detect.js +64 -0
  154. package/dist/integration/detect.js.map +1 -0
  155. package/dist/integration/generate.d.ts +36 -0
  156. package/dist/integration/generate.d.ts.map +1 -0
  157. package/dist/integration/generate.js +107 -0
  158. package/dist/integration/generate.js.map +1 -0
  159. package/dist/integration/templates.d.ts +42 -0
  160. package/dist/integration/templates.d.ts.map +1 -0
  161. package/dist/integration/templates.js +203 -0
  162. package/dist/integration/templates.js.map +1 -0
  163. package/dist/integration/types.d.ts +44 -0
  164. package/dist/integration/types.d.ts.map +1 -0
  165. package/dist/integration/types.js +8 -0
  166. package/dist/integration/types.js.map +1 -0
  167. package/dist/output/logger.d.ts +86 -0
  168. package/dist/output/logger.d.ts.map +1 -0
  169. package/dist/output/logger.js +107 -0
  170. package/dist/output/logger.js.map +1 -0
  171. package/dist/state/database.d.ts +9 -0
  172. package/dist/state/database.d.ts.map +1 -0
  173. package/dist/state/database.js +66 -0
  174. package/dist/state/database.js.map +1 -0
  175. package/dist/state/index.d.ts +8 -0
  176. package/dist/state/index.d.ts.map +1 -0
  177. package/dist/state/index.js +7 -0
  178. package/dist/state/index.js.map +1 -0
  179. package/dist/state/migrations.d.ts +12 -0
  180. package/dist/state/migrations.d.ts.map +1 -0
  181. package/dist/state/migrations.js +39 -0
  182. package/dist/state/migrations.js.map +1 -0
  183. package/dist/state/types.d.ts +54 -0
  184. package/dist/state/types.d.ts.map +1 -0
  185. package/dist/state/types.js +2 -0
  186. package/dist/state/types.js.map +1 -0
  187. package/dist/types/index.d.ts +39 -0
  188. package/dist/types/index.d.ts.map +1 -0
  189. package/dist/types/index.js +5 -0
  190. package/dist/types/index.js.map +1 -0
  191. package/dist/update/index.d.ts +10 -0
  192. package/dist/update/index.d.ts.map +1 -0
  193. package/dist/update/index.js +9 -0
  194. package/dist/update/index.js.map +1 -0
  195. package/dist/update/orchestrator.d.ts +91 -0
  196. package/dist/update/orchestrator.d.ts.map +1 -0
  197. package/dist/update/orchestrator.js +204 -0
  198. package/dist/update/orchestrator.js.map +1 -0
  199. package/dist/update/orphan-cleaner.d.ts +30 -0
  200. package/dist/update/orphan-cleaner.d.ts.map +1 -0
  201. package/dist/update/orphan-cleaner.js +151 -0
  202. package/dist/update/orphan-cleaner.js.map +1 -0
  203. package/dist/update/types.d.ts +59 -0
  204. package/dist/update/types.d.ts.map +1 -0
  205. package/dist/update/types.js +2 -0
  206. package/dist/update/types.js.map +1 -0
  207. package/package.json +60 -0
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Gitignore pattern filter for file discovery.
3
+ *
4
+ * Uses the `ignore` library to parse and match .gitignore patterns.
5
+ * This filter loads the root .gitignore file and checks paths against
6
+ * the patterns to determine exclusion.
7
+ */
8
+ import ignore from 'ignore';
9
+ import fs from 'node:fs/promises';
10
+ import path from 'node:path';
11
+ /**
12
+ * Creates a gitignore filter that excludes files matching .gitignore patterns.
13
+ *
14
+ * @param root - The root directory containing the .gitignore file
15
+ * @returns A FileFilter that checks paths against gitignore patterns
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * const filter = await createGitignoreFilter('/path/to/project');
20
+ * if (filter.shouldExclude('/path/to/project/dist/bundle.js')) {
21
+ * console.log('File is gitignored');
22
+ * }
23
+ * ```
24
+ */
25
+ export async function createGitignoreFilter(root) {
26
+ const ig = ignore();
27
+ const normalizedRoot = path.resolve(root);
28
+ // Load .gitignore from root if it exists
29
+ const gitignorePath = path.join(normalizedRoot, '.gitignore');
30
+ try {
31
+ const content = await fs.readFile(gitignorePath, 'utf-8');
32
+ ig.add(content);
33
+ }
34
+ catch {
35
+ // No .gitignore file, filter will pass everything through
36
+ }
37
+ return {
38
+ name: 'gitignore',
39
+ shouldExclude(absolutePath) {
40
+ // Convert to relative path (ignore library requires relative paths)
41
+ const relativePath = path.relative(normalizedRoot, absolutePath);
42
+ // If path is outside root (starts with ..) or is empty, don't exclude
43
+ if (!relativePath || relativePath.startsWith('..')) {
44
+ return false;
45
+ }
46
+ // CRITICAL: The ignore library treats paths differently based on trailing slash
47
+ // We check the path as-is (for files) - directory handling would need trailing slash
48
+ // Since our walker returns files only, we don't append slash here
49
+ return ig.ignores(relativePath);
50
+ },
51
+ };
52
+ }
53
+ //# sourceMappingURL=gitignore.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gitignore.js","sourceRoot":"","sources":["../../../src/discovery/filters/gitignore.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,MAAuB,MAAM,QAAQ,CAAC;AAC7C,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAG7B;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,IAAY;IACtD,MAAM,EAAE,GAAW,MAAM,EAAE,CAAC;IAC5B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1C,yCAAyC;IACzC,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;IAC9D,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAC1D,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAClB,CAAC;IAAC,MAAM,CAAC;QACP,0DAA0D;IAC5D,CAAC;IAED,OAAO;QACL,IAAI,EAAE,WAAW;QAEjB,aAAa,CAAC,YAAoB;YAChC,oEAAoE;YACpE,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;YAEjE,sEAAsE;YACtE,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnD,OAAO,KAAK,CAAC;YACf,CAAC;YAED,gFAAgF;YAChF,qFAAqF;YACrF,kEAAkE;YAClE,OAAO,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAClC,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Filter chain orchestration for file discovery.
3
+ *
4
+ * This module exports all filter creators and provides the applyFilters
5
+ * function that runs files through the filter chain, recording which
6
+ * filter excluded each file.
7
+ */
8
+ import type { FileFilter, FilterResult } from '../types.js';
9
+ export { createGitignoreFilter } from './gitignore.js';
10
+ export { createVendorFilter, DEFAULT_VENDOR_DIRS } from './vendor.js';
11
+ export { createBinaryFilter, BINARY_EXTENSIONS, type BinaryFilterOptions } from './binary.js';
12
+ export { createCustomFilter } from './custom.js';
13
+ /**
14
+ * Applies a chain of filters to a list of files.
15
+ *
16
+ * Each file is run through filters in order until one excludes it
17
+ * (short-circuit evaluation). The result includes both included files
18
+ * and excluded files with the reason and responsible filter name.
19
+ *
20
+ * @param files - Array of absolute file paths to filter
21
+ * @param filters - Array of filters to apply in order
22
+ * @returns Promise resolving to FilterResult with included and excluded lists
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * const filters = [
27
+ * createVendorFilter(['node_modules']),
28
+ * createBinaryFilter({}),
29
+ * ];
30
+ * const result = await applyFilters(['/a/b.js', '/a/node_modules/c.js'], filters);
31
+ * // result.included: ['/a/b.js']
32
+ * // result.excluded: [{ path: '/a/node_modules/c.js', filter: 'vendor', reason: '...' }]
33
+ * ```
34
+ */
35
+ export declare function applyFilters(files: string[], filters: FileFilter[]): Promise<FilterResult>;
36
+ /**
37
+ * Configuration options for creating default filters.
38
+ */
39
+ export interface DefaultFilterConfig {
40
+ /**
41
+ * Vendor directories to exclude.
42
+ * Default: DEFAULT_VENDOR_DIRS
43
+ */
44
+ vendorDirs?: string[];
45
+ /**
46
+ * Custom patterns to exclude (gitignore syntax).
47
+ * Default: []
48
+ */
49
+ patterns?: string[];
50
+ /**
51
+ * Maximum file size in bytes before excluding.
52
+ * Default: 1MB (1048576)
53
+ */
54
+ maxFileSize?: number;
55
+ /**
56
+ * Additional binary extensions to recognize.
57
+ * Default: []
58
+ */
59
+ additionalBinaryExtensions?: string[];
60
+ }
61
+ /**
62
+ * Creates the default filter chain in standard order.
63
+ *
64
+ * Filter order:
65
+ * 1. Gitignore - respects .gitignore patterns
66
+ * 2. Vendor - excludes vendor directories (node_modules, etc.)
67
+ * 3. Binary - excludes binary files by extension and content
68
+ * 4. Custom - excludes user-specified patterns
69
+ *
70
+ * @param root - Root directory for gitignore and custom pattern matching
71
+ * @param config - Optional configuration for the filters
72
+ * @returns Promise resolving to array of filters ready for applyFilters
73
+ *
74
+ * @example
75
+ * ```typescript
76
+ * const filters = await createDefaultFilters('/path/to/project', {
77
+ * vendorDirs: ['node_modules', 'vendor'],
78
+ * patterns: ['*.log', 'tmp/**'],
79
+ * maxFileSize: 500000,
80
+ * });
81
+ * const result = await applyFilters(files, filters);
82
+ * ```
83
+ */
84
+ export declare function createDefaultFilters(root: string, config?: DefaultFilterConfig): Promise<FileFilter[]>;
85
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/discovery/filters/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,YAAY,EAAgB,MAAM,aAAa,CAAC;AAO1E,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AACtE,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,KAAK,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAC9F,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAsB,YAAY,CAChC,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,EAAE,UAAU,EAAE,GACpB,OAAO,CAAC,YAAY,CAAC,CA4BvB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IAEpB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,EAAE,CAAC;CACvC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAsB,oBAAoB,CACxC,IAAI,EAAE,MAAM,EACZ,MAAM,GAAE,mBAAwB,GAC/B,OAAO,CAAC,UAAU,EAAE,CAAC,CAkBvB"}
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Filter chain orchestration for file discovery.
3
+ *
4
+ * This module exports all filter creators and provides the applyFilters
5
+ * function that runs files through the filter chain, recording which
6
+ * filter excluded each file.
7
+ */
8
+ import { createGitignoreFilter } from './gitignore.js';
9
+ import { createVendorFilter, DEFAULT_VENDOR_DIRS } from './vendor.js';
10
+ import { createBinaryFilter } from './binary.js';
11
+ import { createCustomFilter } from './custom.js';
12
+ // Re-export all filter creators
13
+ export { createGitignoreFilter } from './gitignore.js';
14
+ export { createVendorFilter, DEFAULT_VENDOR_DIRS } from './vendor.js';
15
+ export { createBinaryFilter, BINARY_EXTENSIONS } from './binary.js';
16
+ export { createCustomFilter } from './custom.js';
17
+ /**
18
+ * Applies a chain of filters to a list of files.
19
+ *
20
+ * Each file is run through filters in order until one excludes it
21
+ * (short-circuit evaluation). The result includes both included files
22
+ * and excluded files with the reason and responsible filter name.
23
+ *
24
+ * @param files - Array of absolute file paths to filter
25
+ * @param filters - Array of filters to apply in order
26
+ * @returns Promise resolving to FilterResult with included and excluded lists
27
+ *
28
+ * @example
29
+ * ```typescript
30
+ * const filters = [
31
+ * createVendorFilter(['node_modules']),
32
+ * createBinaryFilter({}),
33
+ * ];
34
+ * const result = await applyFilters(['/a/b.js', '/a/node_modules/c.js'], filters);
35
+ * // result.included: ['/a/b.js']
36
+ * // result.excluded: [{ path: '/a/node_modules/c.js', filter: 'vendor', reason: '...' }]
37
+ * ```
38
+ */
39
+ export async function applyFilters(files, filters) {
40
+ const included = [];
41
+ const excluded = [];
42
+ for (const file of files) {
43
+ let wasExcluded = false;
44
+ // Run through filters in order, stop at first exclusion
45
+ for (const filter of filters) {
46
+ const shouldExclude = await filter.shouldExclude(file);
47
+ if (shouldExclude) {
48
+ excluded.push({
49
+ path: file,
50
+ reason: `Excluded by ${filter.name} filter`,
51
+ filter: filter.name,
52
+ });
53
+ wasExcluded = true;
54
+ break; // Short-circuit: stop checking other filters
55
+ }
56
+ }
57
+ if (!wasExcluded) {
58
+ included.push(file);
59
+ }
60
+ }
61
+ return { included, excluded };
62
+ }
63
+ /**
64
+ * Creates the default filter chain in standard order.
65
+ *
66
+ * Filter order:
67
+ * 1. Gitignore - respects .gitignore patterns
68
+ * 2. Vendor - excludes vendor directories (node_modules, etc.)
69
+ * 3. Binary - excludes binary files by extension and content
70
+ * 4. Custom - excludes user-specified patterns
71
+ *
72
+ * @param root - Root directory for gitignore and custom pattern matching
73
+ * @param config - Optional configuration for the filters
74
+ * @returns Promise resolving to array of filters ready for applyFilters
75
+ *
76
+ * @example
77
+ * ```typescript
78
+ * const filters = await createDefaultFilters('/path/to/project', {
79
+ * vendorDirs: ['node_modules', 'vendor'],
80
+ * patterns: ['*.log', 'tmp/**'],
81
+ * maxFileSize: 500000,
82
+ * });
83
+ * const result = await applyFilters(files, filters);
84
+ * ```
85
+ */
86
+ export async function createDefaultFilters(root, config = {}) {
87
+ const { vendorDirs = [...DEFAULT_VENDOR_DIRS], patterns = [], maxFileSize = 1024 * 1024, additionalBinaryExtensions = [], } = config;
88
+ // Create filters in standard order
89
+ const gitignoreFilter = await createGitignoreFilter(root);
90
+ const vendorFilter = createVendorFilter(vendorDirs);
91
+ const binaryFilter = createBinaryFilter({
92
+ maxFileSize,
93
+ additionalExtensions: additionalBinaryExtensions,
94
+ });
95
+ const customFilter = createCustomFilter(patterns, root);
96
+ return [gitignoreFilter, vendorFilter, binaryFilter, customFilter];
97
+ }
98
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/discovery/filters/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AACtE,OAAO,EAAE,kBAAkB,EAA4B,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD,gCAAgC;AAChC,OAAO,EAAE,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AACtE,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAA4B,MAAM,aAAa,CAAC;AAC9F,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAe,EACf,OAAqB;IAErB,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAmB,EAAE,CAAC;IAEpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,WAAW,GAAG,KAAK,CAAC;QAExB,wDAAwD;QACxD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;YAEvD,IAAI,aAAa,EAAE,CAAC;gBAClB,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,IAAI;oBACV,MAAM,EAAE,eAAe,MAAM,CAAC,IAAI,SAAS;oBAC3C,MAAM,EAAE,MAAM,CAAC,IAAI;iBACpB,CAAC,CAAC;gBACH,WAAW,GAAG,IAAI,CAAC;gBACnB,MAAM,CAAC,6CAA6C;YACtD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AAChC,CAAC;AA+BD;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,SAA8B,EAAE;IAEhC,MAAM,EACJ,UAAU,GAAG,CAAC,GAAG,mBAAmB,CAAC,EACrC,QAAQ,GAAG,EAAE,EACb,WAAW,GAAG,IAAI,GAAG,IAAI,EACzB,0BAA0B,GAAG,EAAE,GAChC,GAAG,MAAM,CAAC;IAEX,mCAAmC;IACnC,MAAM,eAAe,GAAG,MAAM,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC1D,MAAM,YAAY,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,YAAY,GAAG,kBAAkB,CAAC;QACtC,WAAW;QACX,oBAAoB,EAAE,0BAA0B;KACjD,CAAC,CAAC;IACH,MAAM,YAAY,GAAG,kBAAkB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAExD,OAAO,CAAC,eAAe,EAAE,YAAY,EAAE,YAAY,EAAE,YAAY,CAAC,CAAC;AACrE,CAAC"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Vendor directory filter for file discovery.
3
+ *
4
+ * Excludes files within common vendor/dependency directories that typically
5
+ * contain third-party code not relevant for documentation purposes.
6
+ */
7
+ import type { FileFilter } from '../types.js';
8
+ /**
9
+ * Default vendor directories to exclude.
10
+ * These are common directories containing third-party code, build output,
11
+ * or generated files that should not be analyzed.
12
+ */
13
+ export declare const DEFAULT_VENDOR_DIRS: readonly ["node_modules", "vendor", ".git", "dist", "build", "__pycache__", ".next", "venv", ".venv", "target"];
14
+ /**
15
+ * Creates a vendor filter that excludes files within specified directories.
16
+ *
17
+ * @param vendorDirs - Array of directory names to exclude. Files within
18
+ * any of these directories (at any nesting level) will
19
+ * be excluded.
20
+ * @returns A FileFilter that checks if a path is within a vendor directory
21
+ *
22
+ * @example
23
+ * ```typescript
24
+ * const filter = createVendorFilter(['node_modules', 'vendor']);
25
+ * filter.shouldExclude('/project/node_modules/lodash/index.js'); // true
26
+ * filter.shouldExclude('/project/src/utils.js'); // false
27
+ * ```
28
+ */
29
+ export declare function createVendorFilter(vendorDirs: string[]): FileFilter;
30
+ //# sourceMappingURL=vendor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vendor.d.ts","sourceRoot":"","sources":["../../../src/discovery/filters/vendor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAE9C;;;;GAIG;AACH,eAAO,MAAM,mBAAmB,iHAWtB,CAAC;AAEX;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,UAAU,CAoBnE"}
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Vendor directory filter for file discovery.
3
+ *
4
+ * Excludes files within common vendor/dependency directories that typically
5
+ * contain third-party code not relevant for documentation purposes.
6
+ */
7
+ import path from 'node:path';
8
+ /**
9
+ * Default vendor directories to exclude.
10
+ * These are common directories containing third-party code, build output,
11
+ * or generated files that should not be analyzed.
12
+ */
13
+ export const DEFAULT_VENDOR_DIRS = [
14
+ 'node_modules',
15
+ 'vendor',
16
+ '.git',
17
+ 'dist',
18
+ 'build',
19
+ '__pycache__',
20
+ '.next',
21
+ 'venv',
22
+ '.venv',
23
+ 'target',
24
+ ];
25
+ /**
26
+ * Creates a vendor filter that excludes files within specified directories.
27
+ *
28
+ * @param vendorDirs - Array of directory names to exclude. Files within
29
+ * any of these directories (at any nesting level) will
30
+ * be excluded.
31
+ * @returns A FileFilter that checks if a path is within a vendor directory
32
+ *
33
+ * @example
34
+ * ```typescript
35
+ * const filter = createVendorFilter(['node_modules', 'vendor']);
36
+ * filter.shouldExclude('/project/node_modules/lodash/index.js'); // true
37
+ * filter.shouldExclude('/project/src/utils.js'); // false
38
+ * ```
39
+ */
40
+ export function createVendorFilter(vendorDirs) {
41
+ // Convert to Set for O(1) lookup
42
+ const vendorSet = new Set(vendorDirs);
43
+ return {
44
+ name: 'vendor',
45
+ shouldExclude(absolutePath) {
46
+ // Split path by separator and check if any segment matches vendor dirs
47
+ const segments = absolutePath.split(path.sep);
48
+ for (const segment of segments) {
49
+ if (vendorSet.has(segment)) {
50
+ return true;
51
+ }
52
+ }
53
+ return false;
54
+ },
55
+ };
56
+ }
57
+ //# sourceMappingURL=vendor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vendor.js","sourceRoot":"","sources":["../../../src/discovery/filters/vendor.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,IAAI,MAAM,WAAW,CAAC;AAG7B;;;;GAIG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,cAAc;IACd,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,aAAa;IACb,OAAO;IACP,MAAM;IACN,OAAO;IACP,QAAQ;CACA,CAAC;AAEX;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,kBAAkB,CAAC,UAAoB;IACrD,iCAAiC;IACjC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;IAEtC,OAAO;QACL,IAAI,EAAE,QAAQ;QAEd,aAAa,CAAC,YAAoB;YAChC,uEAAuE;YACvE,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAE9C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC/B,IAAI,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC3B,OAAO,IAAI,CAAC;gBACd,CAAC;YACH,CAAC;YAED,OAAO,KAAK,CAAC;QACf,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Discovery types for the agents-reverse file discovery system.
3
+ *
4
+ * This module defines the core interfaces and types used by the directory
5
+ * walker and filter chain for file discovery operations.
6
+ */
7
+ import type { Stats } from 'node:fs';
8
+ /**
9
+ * Interface for file filters in the discovery pipeline.
10
+ *
11
+ * Filters are applied to each file discovered by the walker to determine
12
+ * whether it should be included or excluded from analysis. Filters can be
13
+ * synchronous or asynchronous.
14
+ *
15
+ * Examples: GitignoreFilter, BinaryFilter, VendorFilter, CustomPatternFilter
16
+ */
17
+ export interface FileFilter {
18
+ /** Name of the filter for logging which filter excluded a file */
19
+ readonly name: string;
20
+ /**
21
+ * Determine whether a file should be excluded from discovery.
22
+ *
23
+ * @param path - Absolute path to the file
24
+ * @param stats - Optional file stats (for size-based filtering, etc.)
25
+ * @returns true if the file should be excluded, false to include
26
+ */
27
+ shouldExclude(path: string, stats?: Stats): Promise<boolean> | boolean;
28
+ }
29
+ /**
30
+ * Record of an excluded file with reason and responsible filter.
31
+ */
32
+ export interface ExcludedFile {
33
+ /** Absolute path to the excluded file */
34
+ path: string;
35
+ /** Human-readable reason for exclusion */
36
+ reason: string;
37
+ /** Name of the filter that excluded this file */
38
+ filter: string;
39
+ }
40
+ /**
41
+ * Result of running the discovery and filter chain.
42
+ */
43
+ export interface FilterResult {
44
+ /** Files that passed all filters and should be analyzed */
45
+ included: string[];
46
+ /** Files that were excluded with reasons */
47
+ excluded: ExcludedFile[];
48
+ }
49
+ /**
50
+ * Options for the directory walker.
51
+ */
52
+ export interface WalkerOptions {
53
+ /** Root directory to walk (absolute path) */
54
+ cwd: string;
55
+ /**
56
+ * Whether to follow symbolic links.
57
+ * Default: false (per CONTEXT.md - skip symlinks by default)
58
+ */
59
+ followSymlinks?: boolean;
60
+ /**
61
+ * Whether to include dotfiles (files starting with .).
62
+ * Default: true (include dotfiles for analysis)
63
+ */
64
+ dot?: boolean;
65
+ }
66
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/discovery/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AAErC;;;;;;;;GAQG;AACH,MAAM,WAAW,UAAU;IACzB,kEAAkE;IAClE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAEtB;;;;;;OAMG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,OAAO,CAAC;CACxE;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,yCAAyC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,0CAA0C;IAC1C,MAAM,EAAE,MAAM,CAAC;IACf,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,2DAA2D;IAC3D,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,4CAA4C;IAC5C,QAAQ,EAAE,YAAY,EAAE,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,6CAA6C;IAC7C,GAAG,EAAE,MAAM,CAAC;IAEZ;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;OAGG;IACH,GAAG,CAAC,EAAE,OAAO,CAAC;CACf"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Discovery types for the agents-reverse file discovery system.
3
+ *
4
+ * This module defines the core interfaces and types used by the directory
5
+ * walker and filter chain for file discovery operations.
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/discovery/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Directory walker for the agents-reverse file discovery system.
3
+ *
4
+ * Uses fast-glob to traverse directories and return all candidate files.
5
+ * Filters are applied separately via the filter chain (not in this module).
6
+ */
7
+ import type { WalkerOptions } from './types.js';
8
+ /**
9
+ * Walk a directory tree and return all files.
10
+ *
11
+ * This walker returns ALL files in the directory tree. Filtering happens
12
+ * separately via the filter chain (gitignore, binary, vendor, custom patterns).
13
+ *
14
+ * @param options - Walker configuration
15
+ * @returns Array of absolute file paths
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * const files = await walkDirectory({ cwd: '/path/to/repo' });
20
+ * console.log(`Found ${files.length} files`);
21
+ * ```
22
+ */
23
+ export declare function walkDirectory(options: WalkerOptions): Promise<string[]>;
24
+ //# sourceMappingURL=walker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"walker.d.ts","sourceRoot":"","sources":["../../src/discovery/walker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,aAAa,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAW7E"}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Directory walker for the agents-reverse file discovery system.
3
+ *
4
+ * Uses fast-glob to traverse directories and return all candidate files.
5
+ * Filters are applied separately via the filter chain (not in this module).
6
+ */
7
+ import fg from 'fast-glob';
8
+ /**
9
+ * Walk a directory tree and return all files.
10
+ *
11
+ * This walker returns ALL files in the directory tree. Filtering happens
12
+ * separately via the filter chain (gitignore, binary, vendor, custom patterns).
13
+ *
14
+ * @param options - Walker configuration
15
+ * @returns Array of absolute file paths
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * const files = await walkDirectory({ cwd: '/path/to/repo' });
20
+ * console.log(`Found ${files.length} files`);
21
+ * ```
22
+ */
23
+ export async function walkDirectory(options) {
24
+ return fg.glob('**/*', {
25
+ cwd: options.cwd,
26
+ absolute: true,
27
+ onlyFiles: true,
28
+ dot: options.dot ?? true,
29
+ followSymbolicLinks: options.followSymlinks ?? false,
30
+ suppressErrors: true, // Don't throw on permission errors (per RESEARCH.md)
31
+ // Always exclude .git internals for performance
32
+ ignore: ['**/.git/**'],
33
+ });
34
+ }
35
+ //# sourceMappingURL=walker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"walker.js","sourceRoot":"","sources":["../../src/discovery/walker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,MAAM,WAAW,CAAC;AAG3B;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,OAAsB;IACxD,OAAO,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE;QACrB,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,QAAQ,EAAE,IAAI;QACd,SAAS,EAAE,IAAI;QACf,GAAG,EAAE,OAAO,CAAC,GAAG,IAAI,IAAI;QACxB,mBAAmB,EAAE,OAAO,CAAC,cAAc,IAAI,KAAK;QACpD,cAAc,EAAE,IAAI,EAAE,qDAAqD;QAC3E,gDAAgD;QAChD,MAAM,EAAE,CAAC,YAAY,CAAC;KACvB,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,38 @@
1
+ export interface Chunk {
2
+ index: number;
3
+ content: string;
4
+ tokens: number;
5
+ startLine: number;
6
+ endLine: number;
7
+ }
8
+ export interface ChunkOptions {
9
+ /** Target tokens per chunk (default: 3000) */
10
+ chunkSize?: number;
11
+ /** Lines of overlap between chunks (default: 10) */
12
+ overlapLines?: number;
13
+ }
14
+ /**
15
+ * Check if a file needs to be chunked for processing.
16
+ *
17
+ * @param content - File content
18
+ * @param threshold - Token threshold for chunking (default: 4000)
19
+ * @returns true if file should be chunked
20
+ */
21
+ export declare function needsChunking(content: string, threshold?: number): boolean;
22
+ /**
23
+ * Split a large file into overlapping chunks for map-reduce summarization.
24
+ *
25
+ * Each chunk includes some overlap with the previous chunk to maintain
26
+ * context continuity. The overlap uses line-based boundaries to avoid
27
+ * cutting in the middle of statements.
28
+ *
29
+ * @param content - File content to chunk
30
+ * @param options - Chunking options
31
+ * @returns Array of chunks with metadata
32
+ */
33
+ export declare function chunkFile(content: string, options?: ChunkOptions): Chunk[];
34
+ /**
35
+ * Get total tokens across all chunks.
36
+ */
37
+ export declare function getTotalChunkTokens(chunks: Chunk[]): number;
38
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../../src/generation/budget/chunker.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,KAAK;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,8CAA8C;IAC9C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAKD;;;;;;GAMG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,GAAE,MAAa,GAAG,OAAO,CAEhF;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,KAAK,EAAE,CAoD9E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAE3D"}
@@ -0,0 +1,73 @@
1
+ import { countTokens } from './counter.js';
2
+ const DEFAULT_CHUNK_SIZE = 3000;
3
+ const DEFAULT_OVERLAP_LINES = 10;
4
+ /**
5
+ * Check if a file needs to be chunked for processing.
6
+ *
7
+ * @param content - File content
8
+ * @param threshold - Token threshold for chunking (default: 4000)
9
+ * @returns true if file should be chunked
10
+ */
11
+ export function needsChunking(content, threshold = 4000) {
12
+ return countTokens(content) > threshold;
13
+ }
14
+ /**
15
+ * Split a large file into overlapping chunks for map-reduce summarization.
16
+ *
17
+ * Each chunk includes some overlap with the previous chunk to maintain
18
+ * context continuity. The overlap uses line-based boundaries to avoid
19
+ * cutting in the middle of statements.
20
+ *
21
+ * @param content - File content to chunk
22
+ * @param options - Chunking options
23
+ * @returns Array of chunks with metadata
24
+ */
25
+ export function chunkFile(content, options = {}) {
26
+ const { chunkSize = DEFAULT_CHUNK_SIZE, overlapLines = DEFAULT_OVERLAP_LINES, } = options;
27
+ const lines = content.split('\n');
28
+ const chunks = [];
29
+ let currentLines = [];
30
+ let currentTokens = 0;
31
+ let startLine = 0;
32
+ for (let i = 0; i < lines.length; i++) {
33
+ const line = lines[i];
34
+ const lineTokens = countTokens(line + '\n');
35
+ // Check if adding this line would exceed chunk size
36
+ if (currentTokens + lineTokens > chunkSize && currentLines.length > 0) {
37
+ // Save current chunk
38
+ chunks.push({
39
+ index: chunks.length,
40
+ content: currentLines.join('\n'),
41
+ tokens: currentTokens,
42
+ startLine,
43
+ endLine: i - 1,
44
+ });
45
+ // Start new chunk with overlap from previous
46
+ const overlapStart = Math.max(0, currentLines.length - overlapLines);
47
+ const overlapContent = currentLines.slice(overlapStart);
48
+ currentLines = overlapContent;
49
+ currentTokens = countTokens(overlapContent.join('\n'));
50
+ startLine = i - overlapContent.length;
51
+ }
52
+ currentLines.push(line);
53
+ currentTokens += lineTokens;
54
+ }
55
+ // Add final chunk if there's remaining content
56
+ if (currentLines.length > 0) {
57
+ chunks.push({
58
+ index: chunks.length,
59
+ content: currentLines.join('\n'),
60
+ tokens: currentTokens,
61
+ startLine,
62
+ endLine: lines.length - 1,
63
+ });
64
+ }
65
+ return chunks;
66
+ }
67
+ /**
68
+ * Get total tokens across all chunks.
69
+ */
70
+ export function getTotalChunkTokens(chunks) {
71
+ return chunks.reduce((sum, chunk) => sum + chunk.tokens, 0);
72
+ }
73
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../../src/generation/budget/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAiB3C,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAChC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAEjC;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,OAAe,EAAE,YAAoB,IAAI;IACrE,OAAO,WAAW,CAAC,OAAO,CAAC,GAAG,SAAS,CAAC;AAC1C,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,SAAS,CAAC,OAAe,EAAE,UAAwB,EAAE;IACnE,MAAM,EACJ,SAAS,GAAG,kBAAkB,EAC9B,YAAY,GAAG,qBAAqB,GACrC,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE5C,oDAAoD;QACpD,IAAI,aAAa,GAAG,UAAU,GAAG,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtE,qBAAqB;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,MAAM,CAAC,MAAM;gBACpB,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;gBAChC,MAAM,EAAE,aAAa;gBACrB,SAAS;gBACT,OAAO,EAAE,CAAC,GAAG,CAAC;aACf,CAAC,CAAC;YAEH,6CAA6C;YAC7C,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,CAAC,MAAM,GAAG,YAAY,CAAC,CAAC;YACrE,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;YACxD,YAAY,GAAG,cAAc,CAAC;YAC9B,aAAa,GAAG,WAAW,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YACvD,SAAS,GAAG,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC;QACxC,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,aAAa,IAAI,UAAU,CAAC;IAC9B,CAAC;IAED,+CAA+C;IAC/C,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,MAAM,CAAC,MAAM;YACpB,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;YAChC,MAAM,EAAE,aAAa;YACrB,SAAS;YACT,OAAO,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC;SAC1B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,MAAe;IACjD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;AAC9D,CAAC"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Count tokens in content using BPE tokenization.
3
+ * Uses cl100k_base encoding (compatible with Claude/GPT-4).
4
+ *
5
+ * @param content - Text to count tokens in
6
+ * @returns Token count
7
+ */
8
+ export declare function countTokens(content: string): number;
9
+ /**
10
+ * Check if content fits within token limit without fully encoding.
11
+ * More efficient than counting when you only need a boolean check.
12
+ *
13
+ * @param content - Text to check
14
+ * @param limit - Maximum allowed tokens
15
+ * @returns true if content is within limit
16
+ */
17
+ export declare function isWithinLimit(content: string, limit: number): boolean;
18
+ /**
19
+ * Estimate prompt overhead for a given file type.
20
+ * Includes template tokens + system prompt portion.
21
+ *
22
+ * @param fileType - Type of file being summarized
23
+ * @returns Estimated overhead in tokens
24
+ */
25
+ export declare function estimatePromptOverhead(fileType: string): number;
26
+ //# sourceMappingURL=counter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"counter.d.ts","sourceRoot":"","sources":["../../../src/generation/budget/counter.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED;;;;;;;GAOG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAGrE;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAe/D"}