codevault 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/.env.example +75 -40
  2. package/README.md +112 -345
  3. package/package.json +4 -3
  4. package/dist/chunking/file-grouper.d.ts +0 -39
  5. package/dist/chunking/file-grouper.d.ts.map +0 -1
  6. package/dist/chunking/file-grouper.js +0 -181
  7. package/dist/chunking/file-grouper.js.map +0 -1
  8. package/dist/chunking/semantic-chunker.d.ts +0 -37
  9. package/dist/chunking/semantic-chunker.d.ts.map +0 -1
  10. package/dist/chunking/semantic-chunker.js +0 -172
  11. package/dist/chunking/semantic-chunker.js.map +0 -1
  12. package/dist/chunking/token-counter.d.ts +0 -28
  13. package/dist/chunking/token-counter.d.ts.map +0 -1
  14. package/dist/chunking/token-counter.js +0 -207
  15. package/dist/chunking/token-counter.js.map +0 -1
  16. package/dist/cli/commands/ask-cmd.d.ts +0 -3
  17. package/dist/cli/commands/ask-cmd.d.ts.map +0 -1
  18. package/dist/cli/commands/ask-cmd.js +0 -130
  19. package/dist/cli/commands/ask-cmd.js.map +0 -1
  20. package/dist/cli/commands/config-cmd.d.ts +0 -3
  21. package/dist/cli/commands/config-cmd.d.ts.map +0 -1
  22. package/dist/cli/commands/config-cmd.js +0 -245
  23. package/dist/cli/commands/config-cmd.js.map +0 -1
  24. package/dist/cli/commands/context.d.ts +0 -3
  25. package/dist/cli/commands/context.d.ts.map +0 -1
  26. package/dist/cli/commands/context.js +0 -98
  27. package/dist/cli/commands/context.js.map +0 -1
  28. package/dist/cli/commands/interactive-config.d.ts +0 -2
  29. package/dist/cli/commands/interactive-config.d.ts.map +0 -1
  30. package/dist/cli/commands/interactive-config.js +0 -274
  31. package/dist/cli/commands/interactive-config.js.map +0 -1
  32. package/dist/cli.d.ts +0 -3
  33. package/dist/cli.d.ts.map +0 -1
  34. package/dist/cli.js +0 -398
  35. package/dist/cli.js.map +0 -1
  36. package/dist/codemap/io.d.ts +0 -5
  37. package/dist/codemap/io.d.ts.map +0 -1
  38. package/dist/codemap/io.js +0 -30
  39. package/dist/codemap/io.js.map +0 -1
  40. package/dist/config/apply-env.d.ts +0 -15
  41. package/dist/config/apply-env.d.ts.map +0 -1
  42. package/dist/config/apply-env.js +0 -91
  43. package/dist/config/apply-env.js.map +0 -1
  44. package/dist/config/loader.d.ts +0 -57
  45. package/dist/config/loader.d.ts.map +0 -1
  46. package/dist/config/loader.js +0 -279
  47. package/dist/config/loader.js.map +0 -1
  48. package/dist/config/types.d.ts +0 -46
  49. package/dist/config/types.d.ts.map +0 -1
  50. package/dist/config/types.js +0 -2
  51. package/dist/config/types.js.map +0 -1
  52. package/dist/context/packs.d.ts +0 -33
  53. package/dist/context/packs.d.ts.map +0 -1
  54. package/dist/context/packs.js +0 -180
  55. package/dist/context/packs.js.map +0 -1
  56. package/dist/core/batch-indexer.d.ts +0 -44
  57. package/dist/core/batch-indexer.d.ts.map +0 -1
  58. package/dist/core/batch-indexer.js +0 -161
  59. package/dist/core/batch-indexer.js.map +0 -1
  60. package/dist/core/indexer.d.ts +0 -3
  61. package/dist/core/indexer.d.ts.map +0 -1
  62. package/dist/core/indexer.js +0 -624
  63. package/dist/core/indexer.js.map +0 -1
  64. package/dist/core/metadata.d.ts +0 -19
  65. package/dist/core/metadata.d.ts.map +0 -1
  66. package/dist/core/metadata.js +0 -161
  67. package/dist/core/metadata.js.map +0 -1
  68. package/dist/core/search.d.ts +0 -7
  69. package/dist/core/search.d.ts.map +0 -1
  70. package/dist/core/search.js +0 -542
  71. package/dist/core/search.js.map +0 -1
  72. package/dist/core/symbol-extractor.d.ts +0 -3
  73. package/dist/core/symbol-extractor.d.ts.map +0 -1
  74. package/dist/core/symbol-extractor.js +0 -78
  75. package/dist/core/symbol-extractor.js.map +0 -1
  76. package/dist/core/types.d.ts +0 -104
  77. package/dist/core/types.d.ts.map +0 -1
  78. package/dist/core/types.js +0 -2
  79. package/dist/core/types.js.map +0 -1
  80. package/dist/database/db.d.ts +0 -63
  81. package/dist/database/db.d.ts.map +0 -1
  82. package/dist/database/db.js +0 -205
  83. package/dist/database/db.js.map +0 -1
  84. package/dist/indexer/merkle.d.ts +0 -13
  85. package/dist/indexer/merkle.d.ts.map +0 -1
  86. package/dist/indexer/merkle.js +0 -86
  87. package/dist/indexer/merkle.js.map +0 -1
  88. package/dist/indexer/update.d.ts +0 -19
  89. package/dist/indexer/update.d.ts.map +0 -1
  90. package/dist/indexer/update.js +0 -40
  91. package/dist/indexer/update.js.map +0 -1
  92. package/dist/indexer/watch.d.ts +0 -21
  93. package/dist/indexer/watch.d.ts.map +0 -1
  94. package/dist/indexer/watch.js +0 -222
  95. package/dist/indexer/watch.js.map +0 -1
  96. package/dist/languages/rules.d.ts +0 -11
  97. package/dist/languages/rules.d.ts.map +0 -1
  98. package/dist/languages/rules.js +0 -371
  99. package/dist/languages/rules.js.map +0 -1
  100. package/dist/languages/tree-sitter-loader.d.ts +0 -27
  101. package/dist/languages/tree-sitter-loader.d.ts.map +0 -1
  102. package/dist/languages/tree-sitter-loader.js +0 -76
  103. package/dist/languages/tree-sitter-loader.js.map +0 -1
  104. package/dist/mcp/tools/ask-codebase.d.ts +0 -85
  105. package/dist/mcp/tools/ask-codebase.d.ts.map +0 -1
  106. package/dist/mcp/tools/ask-codebase.js +0 -125
  107. package/dist/mcp/tools/ask-codebase.js.map +0 -1
  108. package/dist/mcp/tools/use-context-pack.d.ts +0 -57
  109. package/dist/mcp/tools/use-context-pack.d.ts.map +0 -1
  110. package/dist/mcp/tools/use-context-pack.js +0 -91
  111. package/dist/mcp/tools/use-context-pack.js.map +0 -1
  112. package/dist/mcp-server.d.ts +0 -3
  113. package/dist/mcp-server.d.ts.map +0 -1
  114. package/dist/mcp-server.js +0 -518
  115. package/dist/mcp-server.js.map +0 -1
  116. package/dist/providers/base.d.ts +0 -39
  117. package/dist/providers/base.d.ts.map +0 -1
  118. package/dist/providers/base.js +0 -198
  119. package/dist/providers/base.js.map +0 -1
  120. package/dist/providers/chat-llm.d.ts +0 -30
  121. package/dist/providers/chat-llm.d.ts.map +0 -1
  122. package/dist/providers/chat-llm.js +0 -82
  123. package/dist/providers/chat-llm.js.map +0 -1
  124. package/dist/providers/index.d.ts +0 -5
  125. package/dist/providers/index.d.ts.map +0 -1
  126. package/dist/providers/index.js +0 -12
  127. package/dist/providers/index.js.map +0 -1
  128. package/dist/providers/ollama.d.ts +0 -13
  129. package/dist/providers/ollama.d.ts.map +0 -1
  130. package/dist/providers/ollama.js +0 -50
  131. package/dist/providers/ollama.js.map +0 -1
  132. package/dist/providers/openai.d.ts +0 -14
  133. package/dist/providers/openai.d.ts.map +0 -1
  134. package/dist/providers/openai.js +0 -122
  135. package/dist/providers/openai.js.map +0 -1
  136. package/dist/providers/token-counter.d.ts +0 -2
  137. package/dist/providers/token-counter.d.ts.map +0 -1
  138. package/dist/providers/token-counter.js +0 -18
  139. package/dist/providers/token-counter.js.map +0 -1
  140. package/dist/ranking/api-reranker.d.ts +0 -18
  141. package/dist/ranking/api-reranker.d.ts.map +0 -1
  142. package/dist/ranking/api-reranker.js +0 -134
  143. package/dist/ranking/api-reranker.js.map +0 -1
  144. package/dist/ranking/symbol-boost.d.ts +0 -15
  145. package/dist/ranking/symbol-boost.d.ts.map +0 -1
  146. package/dist/ranking/symbol-boost.js +0 -154
  147. package/dist/ranking/symbol-boost.js.map +0 -1
  148. package/dist/search/bm25.d.ts +0 -17
  149. package/dist/search/bm25.d.ts.map +0 -1
  150. package/dist/search/bm25.js +0 -56
  151. package/dist/search/bm25.js.map +0 -1
  152. package/dist/search/hybrid.d.ts +0 -21
  153. package/dist/search/hybrid.d.ts.map +0 -1
  154. package/dist/search/hybrid.js +0 -50
  155. package/dist/search/hybrid.js.map +0 -1
  156. package/dist/search/scope.d.ts +0 -5
  157. package/dist/search/scope.d.ts.map +0 -1
  158. package/dist/search/scope.js +0 -107
  159. package/dist/search/scope.js.map +0 -1
  160. package/dist/storage/encrypted-chunks.d.ts +0 -40
  161. package/dist/storage/encrypted-chunks.d.ts.map +0 -1
  162. package/dist/storage/encrypted-chunks.js +0 -237
  163. package/dist/storage/encrypted-chunks.js.map +0 -1
  164. package/dist/symbols/extract.d.ts +0 -15
  165. package/dist/symbols/extract.d.ts.map +0 -1
  166. package/dist/symbols/extract.js +0 -187
  167. package/dist/symbols/extract.js.map +0 -1
  168. package/dist/symbols/graph.d.ts +0 -3
  169. package/dist/symbols/graph.d.ts.map +0 -1
  170. package/dist/symbols/graph.js +0 -89
  171. package/dist/symbols/graph.js.map +0 -1
  172. package/dist/synthesis/markdown-formatter.d.ts +0 -13
  173. package/dist/synthesis/markdown-formatter.d.ts.map +0 -1
  174. package/dist/synthesis/markdown-formatter.js +0 -104
  175. package/dist/synthesis/markdown-formatter.js.map +0 -1
  176. package/dist/synthesis/prompt-builder.d.ts +0 -21
  177. package/dist/synthesis/prompt-builder.d.ts.map +0 -1
  178. package/dist/synthesis/prompt-builder.js +0 -129
  179. package/dist/synthesis/prompt-builder.js.map +0 -1
  180. package/dist/synthesis/synthesizer.d.ts +0 -30
  181. package/dist/synthesis/synthesizer.d.ts.map +0 -1
  182. package/dist/synthesis/synthesizer.js +0 -210
  183. package/dist/synthesis/synthesizer.js.map +0 -1
  184. package/dist/types/ast.d.ts +0 -3
  185. package/dist/types/ast.d.ts.map +0 -1
  186. package/dist/types/ast.js +0 -2
  187. package/dist/types/ast.js.map +0 -1
  188. package/dist/types/codemap.d.ts +0 -58
  189. package/dist/types/codemap.d.ts.map +0 -1
  190. package/dist/types/codemap.js +0 -224
  191. package/dist/types/codemap.js.map +0 -1
  192. package/dist/types/context-pack.d.ts +0 -47
  193. package/dist/types/context-pack.d.ts.map +0 -1
  194. package/dist/types/context-pack.js +0 -44
  195. package/dist/types/context-pack.js.map +0 -1
  196. package/dist/types/search.d.ts +0 -15
  197. package/dist/types/search.d.ts.map +0 -1
  198. package/dist/types/search.js +0 -11
  199. package/dist/types/search.js.map +0 -1
  200. package/dist/utils/cli-ui.d.ts +0 -44
  201. package/dist/utils/cli-ui.d.ts.map +0 -1
  202. package/dist/utils/cli-ui.js +0 -139
  203. package/dist/utils/cli-ui.js.map +0 -1
  204. package/dist/utils/indexer-with-progress.d.ts +0 -10
  205. package/dist/utils/indexer-with-progress.d.ts.map +0 -1
  206. package/dist/utils/indexer-with-progress.js +0 -58
  207. package/dist/utils/indexer-with-progress.js.map +0 -1
  208. package/dist/utils/rate-limiter.d.ts +0 -34
  209. package/dist/utils/rate-limiter.d.ts.map +0 -1
  210. package/dist/utils/rate-limiter.js +0 -178
  211. package/dist/utils/rate-limiter.js.map +0 -1
@@ -1,181 +0,0 @@
1
- import { batchAnalyzeCodeSize } from './token-counter.js';
2
- function getSizeLimits(profile) {
3
- if (profile.useTokens && profile.tokenCounter) {
4
- return {
5
- optimal: profile.optimalTokens,
6
- min: profile.minChunkTokens,
7
- max: profile.maxChunkTokens,
8
- overlap: profile.overlapTokens,
9
- unit: 'tokens'
10
- };
11
- }
12
- return {
13
- optimal: profile.optimalChars,
14
- min: profile.minChunkChars,
15
- max: profile.maxChunkChars,
16
- overlap: profile.overlapChars,
17
- unit: 'characters'
18
- };
19
- }
20
- async function batchAnalyzeNodesInternal(nodes, source, profile) {
21
- const codes = nodes.map(node => source.slice(node.startIndex, node.endIndex));
22
- const limits = getSizeLimits(profile);
23
- if (profile.useTokens && profile.tokenCounter) {
24
- const analyses = await batchAnalyzeCodeSize(codes, limits, profile.tokenCounter, false);
25
- return nodes.map((node, i) => ({
26
- node,
27
- size: analyses[i].size,
28
- code: codes[i]
29
- }));
30
- }
31
- return nodes.map((node, i) => ({
32
- node,
33
- size: codes[i].length,
34
- code: codes[i]
35
- }));
36
- }
37
- function isContainerNode(node, rule) {
38
- const containerTypes = [
39
- 'class_declaration',
40
- 'class_definition',
41
- 'interface_declaration',
42
- 'module_declaration',
43
- 'namespace_declaration',
44
- 'trait_declaration',
45
- 'enum_declaration'
46
- ];
47
- return containerTypes.includes(node.type);
48
- }
49
- function identifySemanticGroups(nodes, source, nodeAnalyses, rule) {
50
- const groups = [];
51
- let currentGroup = {
52
- type: 'file_section',
53
- nodes: [],
54
- analyses: [],
55
- parentNode: null
56
- };
57
- for (let i = 0; i < nodes.length; i++) {
58
- const node = nodes[i];
59
- const analysis = nodeAnalyses[i];
60
- if (isContainerNode(node, rule)) {
61
- if (currentGroup.nodes.length > 0) {
62
- groups.push(currentGroup);
63
- }
64
- currentGroup = {
65
- type: 'container',
66
- containerType: node.type,
67
- nodes: [node],
68
- analyses: [analysis],
69
- parentNode: node
70
- };
71
- groups.push(currentGroup);
72
- currentGroup = {
73
- type: 'file_section',
74
- nodes: [],
75
- analyses: [],
76
- parentNode: null
77
- };
78
- }
79
- else {
80
- currentGroup.nodes.push(node);
81
- currentGroup.analyses.push(analysis);
82
- }
83
- }
84
- if (currentGroup.nodes.length > 0) {
85
- groups.push(currentGroup);
86
- }
87
- return groups;
88
- }
89
- async function combineGroupsToOptimalSize(semanticGroups, source, profile, limits) {
90
- const optimalGroups = [];
91
- let currentCombinedGroup = {
92
- nodes: [],
93
- totalSize: 0,
94
- groupInfo: []
95
- };
96
- for (const group of semanticGroups) {
97
- const groupTotalSize = group.analyses.reduce((sum, a) => sum + a.size, 0);
98
- if (groupTotalSize > limits.optimal) {
99
- if (currentCombinedGroup.nodes.length > 0) {
100
- optimalGroups.push(currentCombinedGroup);
101
- }
102
- optimalGroups.push({
103
- nodes: group.nodes,
104
- totalSize: groupTotalSize,
105
- groupInfo: [group]
106
- });
107
- currentCombinedGroup = {
108
- nodes: [],
109
- totalSize: 0,
110
- groupInfo: []
111
- };
112
- continue;
113
- }
114
- if (currentCombinedGroup.totalSize + groupTotalSize > limits.max) {
115
- if (currentCombinedGroup.nodes.length > 0) {
116
- optimalGroups.push(currentCombinedGroup);
117
- }
118
- currentCombinedGroup = {
119
- nodes: group.nodes,
120
- totalSize: groupTotalSize,
121
- groupInfo: [group]
122
- };
123
- continue;
124
- }
125
- currentCombinedGroup.nodes.push(...group.nodes);
126
- currentCombinedGroup.totalSize += groupTotalSize;
127
- currentCombinedGroup.groupInfo.push(group);
128
- if (currentCombinedGroup.totalSize >= limits.optimal * 0.9) {
129
- optimalGroups.push(currentCombinedGroup);
130
- currentCombinedGroup = {
131
- nodes: [],
132
- totalSize: 0,
133
- groupInfo: []
134
- };
135
- }
136
- }
137
- if (currentCombinedGroup.nodes.length > 0) {
138
- optimalGroups.push(currentCombinedGroup);
139
- }
140
- return optimalGroups;
141
- }
142
- export async function groupNodesForChunking(nodes, source, profile, rule) {
143
- if (!nodes || nodes.length === 0)
144
- return [];
145
- const limits = getSizeLimits(profile);
146
- if (nodes.length <= 10) {
147
- return nodes.map(node => ({
148
- nodes: [node],
149
- totalSize: 0,
150
- groupInfo: []
151
- }));
152
- }
153
- const nodeAnalyses = await batchAnalyzeNodesInternal(nodes, source, profile);
154
- const semanticGroups = identifySemanticGroups(nodes, source, nodeAnalyses, rule);
155
- const optimalGroups = await combineGroupsToOptimalSize(semanticGroups, source, profile, limits);
156
- return optimalGroups;
157
- }
158
- export function createCombinedChunk(nodeGroup, source, filerel) {
159
- if (!nodeGroup.nodes || nodeGroup.nodes.length === 0) {
160
- return null;
161
- }
162
- const codes = nodeGroup.nodes.map(node => source.slice(node.startIndex, node.endIndex));
163
- const combinedCode = codes.join('\n\n');
164
- const firstNode = nodeGroup.nodes[0];
165
- const lastNode = nodeGroup.nodes[nodeGroup.nodes.length - 1];
166
- return {
167
- code: combinedCode,
168
- node: {
169
- ...firstNode,
170
- type: `${firstNode.type}_group_${nodeGroup.nodes.length}`,
171
- endIndex: lastNode.endIndex
172
- },
173
- metadata: {
174
- isGroup: true,
175
- nodeCount: nodeGroup.nodes.length,
176
- totalSize: nodeGroup.totalSize,
177
- groupTypes: nodeGroup.groupInfo?.map(g => g.type) || ['combined']
178
- }
179
- };
180
- }
181
- //# sourceMappingURL=file-grouper.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"file-grouper.js","sourceRoot":"","sources":["../../src/chunking/file-grouper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,oBAAoB,EAAyB,MAAM,oBAAoB,CAAC;AAqClG,SAAS,aAAa,CAAC,OAAqB;IAC1C,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC9C,OAAO;YACL,OAAO,EAAE,OAAO,CAAC,aAAa;YAC9B,GAAG,EAAE,OAAO,CAAC,cAAc;YAC3B,GAAG,EAAE,OAAO,CAAC,cAAc;YAC3B,OAAO,EAAE,OAAO,CAAC,aAAa;YAC9B,IAAI,EAAE,QAAQ;SACf,CAAC;IACJ,CAAC;IACD,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,YAAY;QAC7B,GAAG,EAAE,OAAO,CAAC,aAAa;QAC1B,GAAG,EAAE,OAAO,CAAC,aAAa;QAC1B,OAAO,EAAE,OAAO,CAAC,YAAY;QAC7B,IAAI,EAAE,YAAY;KACnB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,yBAAyB,CAAC,KAAuB,EAAE,MAAc,EAAE,OAAqB;IACrG,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC9E,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAEtC,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC9C,MAAM,QAAQ,GAAG,MAAM,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;QACxF,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YAC7B,IAAI;YACJ,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI;YACtB,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;SACf,CAAC,CAAC,CAAC;IACN,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC7B,IAAI;QACJ,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;QACrB,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;KACf,CAAC,CAAC,CAAC;AACN,CAAC;AAED,SAAS,eAAe,CAAC,IAAoB,EAAE,IAAkB;IAC/D,MAAM,cAAc,GAAG;QACrB,mBAAmB;QACnB,kBAAkB;QAClB,uBAAuB;QACvB,oBAAoB;QACpB,uBAAuB;QACvB,mBAAmB;QACnB,kBAAkB;KACnB,CAAC;IAEF,OAAO,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,sBAAsB,CAAC,KAAuB,EAAE,MAAc,EAAE,YAA4B,EAAE,IAAkB;IACvH,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,IAAI,YAAY,GAAkB;QAChC,IAAI,EAAE,cAAc;QACpB,KAAK,EAAE,EAAE;QACT,QAAQ,EAAE,EAAE;QACZ,UAAU,EAAE,IAAI;KACjB,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QAEjC,IAAI,eAAe,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;YAChC,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAC5B,CAAC;YAED,YAAY,GAAG;gBACb,IAAI,EAAE,WAAW;gBACjB,aAAa,EAAE,IAAI,CAAC,IAAI;gBACxB,KAAK,EAAE,CAAC,IAAI,CAAC;gBACb,QAAQ,EAAE,CAAC,QAAQ,CAAC;gBACpB,UAAU,EAAE,IAAI;aACjB,CAAC;YAEF,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAE1B,YAAY,GAAG;gBACb,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,EAAE;gBACZ,UAAU,EAAE,IAAI;aACjB,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC9B,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC5B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,0BAA0B,CACvC,cAA+B,EAC/B,MAAc,EACd,OAAqB,EACrB,MAAkB;IAElB,MAAM,aAAa,GAAgB,EAAE,CAAC;IACtC,IAAI,oBAAoB,GAAc;QACpC,KAAK,EAAE,EAAE;QACT,SAAS,EAAE,CAAC;QACZ,SAAS,EAAE,EAAE;KACd,CAAC;IAEF,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;QACnC,MAAM,cAAc,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAE1E,IAAI,cAAc,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;YACpC,IAAI,oBAAoB,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1C,aAAa,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC3C,CAAC;YAED,aAAa,CAAC,IAAI,CAAC;gBACjB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,SAAS,EAAE,cAAc;gBACzB,SAAS,EAAE,CAAC,KAAK,CAAC;aACnB,CAAC,CAAC;YAEH,oBAAoB,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,SAAS,EAAE,CAAC;gBACZ,SAAS,EAAE,EAAE;aACd,CAAC;YACF,SAAS;QACX,CAAC;QAED,IAAI,oBAAoB,CAAC,SAAS,GAAG,cAAc,GAAG,MAAM,CAAC,GAAG,EAAE,CAAC;YACjE,IAAI,oBAAoB,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1C,aAAa,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC3C,CAAC;YAED,oBAAoB,GAAG;gBACrB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,SAAS,EAAE,cAAc;gBACzB,SAAS,EAAE,CAAC,KAAK,CAAC;aACnB,CAAC;YACF,SAAS;QACX,CAAC;QAED,oBAAoB,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QAChD,oBAAoB,CAAC,SAAS,IAAI,cAAc,CAAC;QACjD,oBAAoB,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAE3C,IAAI,oBAAoB,CAAC,SAAS,IAAI,MAAM,CAAC,OAAO,GAAG,GAAG,EAAE,CAAC;YAC3D,aAAa,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YACzC,oBAAoB,GAAG;gBACrB,KAAK,EAAE,EAAE;gBACT,SAAS,EAAE,CAAC;gBACZ,SAAS,EAAE,EAAE;aACd,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,oBAAoB,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1C,aAAa,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;IAC3C,CAAC;IAED,OAAO,aAAa,CAAC;AACvB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,KAAuB,EACvB,MAAc,EACd,OAAqB,EACrB,IAAkB;IAElB,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE5C,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAEtC,IAAI,KAAK,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;QACvB,OAAO,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACxB,KAAK,EAAE,CAAC,IAAI,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,SAAS,EAAE,EAAE;SACd,CAAC,CAAC,CAAC;IACN,CAAC;IAED,MAAM,YAAY,GAAG,MAAM,yBAAyB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IAC7E,MAAM,cAAc,GAAG,sBAAsB,CAAC,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,CAAC,CAAC;IACjF,MAAM,aAAa,GAAG,MAAM,0BAA0B,CAAC,cAAc,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;IAEhG,OAAO,aAAa,CAAC;AACvB,CAAC;AAaD,MAAM,UAAU,mBAAmB,CAAC,SAAoB,EAAE,MAAc,EAAE,OAAe;IACvF,IAAI,CAAC,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CACvC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAC7C,CAAC;IAEF,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAExC,MAAM,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACrC,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE7D,OAAO;QACL,IAAI,EAAE,YAAY;QAClB,IAAI,EAAE;YACJ,GAAG,SAAS;YACZ,IAAI,EAAE,GAAG,SAAS,CAAC,IAAI,UAAU,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE;YACzD,QAAQ,EAAE,QAAQ,CAAC,QAAQ;SAC5B;QACD,QAAQ,EAAE;YACR,OAAO,EAAE,IAAI;YACb,SAAS,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM;YACjC,SAAS,EAAE,SAAS,CAAC,SAAS;YAC9B,UAAU,EAAE,SAAS,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC;SAClE;KACF,CAAC;AACJ,CAAC"}
@@ -1,37 +0,0 @@
1
- import type { ModelProfile } from '../providers/base.js';
2
- import type { TreeSitterNode } from '../types/ast.js';
3
- interface LanguageRule {
4
- subdivisionTypes?: Record<string, string[]>;
5
- [key: string]: any;
6
- }
7
- export declare function findSemanticSubdivisions(node: TreeSitterNode, rule: LanguageRule): TreeSitterNode[];
8
- export declare function findLastCompleteBoundary(code: string, maxSize: number): number;
9
- export declare function extractSignature(node: TreeSitterNode, source: string): string;
10
- export declare function extractLinesBeforeNode(node: TreeSitterNode, source: string, numLines: number): string;
11
- export declare function extractParentContext(node: TreeSitterNode, source: string): {
12
- signature: string;
13
- startLine: number;
14
- endLine: number;
15
- };
16
- export declare function getLineNumber(byteOffset: number, source: string): number;
17
- export interface NodeAnalysis {
18
- isSingleChunk: boolean;
19
- needsSubdivision: boolean;
20
- subdivisionCandidates: TreeSitterNode[];
21
- size: number;
22
- unit: string;
23
- method: string;
24
- estimatedSubchunks: number;
25
- }
26
- export declare function analyzeNodeForChunking(node: TreeSitterNode, source: string, rule: LanguageRule, profile: ModelProfile): Promise<NodeAnalysis>;
27
- export declare function batchAnalyzeNodes(nodes: TreeSitterNode[], source: string, rule: LanguageRule, profile: ModelProfile, isSubdivision?: boolean): Promise<Array<NodeAnalysis & {
28
- node: TreeSitterNode;
29
- }>>;
30
- export interface StatementChunk {
31
- code: string;
32
- size: number;
33
- unit: string;
34
- }
35
- export declare function yieldStatementChunks(node: TreeSitterNode, source: string, maxSize: number, overlapSize: number, profile: ModelProfile): Promise<StatementChunk[]>;
36
- export {};
37
- //# sourceMappingURL=semantic-chunker.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"semantic-chunker.d.ts","sourceRoot":"","sources":["../../src/chunking/semantic-chunker.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAEtD,UAAU,YAAY;IACpB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IAC5C,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAUD,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,YAAY,GAAG,cAAc,EAAE,CAwBnG;AAED,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAgB9E;AAED,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAO7E;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAIrG;AAED,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,GAAG;IAC1E,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB,CAMA;AAED,wBAAgB,aAAa,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAGxE;AAqBD,MAAM,WAAW,YAAY;IAC3B,aAAa,EAAE,OAAO,CAAC;IACvB,gBAAgB,EAAE,OAAO,CAAC;IAC1B,qBAAqB,EAAE,cAAc,EAAE,CAAC;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AAED,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,cAAc,EACpB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,YAAY,EAClB,OAAO,EAAE,YAAY,GACpB,OAAO,CAAC,YAAY,CAAC,CA2BvB;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,cAAc,EAAE,EACvB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,YAAY,EAClB,OAAO,EAAE,YAAY,EACrB,aAAa,UAAQ,GACpB,OAAO,CAAC,KAAK,CAAC,YAAY,GAAG;IAAE,IAAI,EAAE,cAAc,CAAA;CAAE,CAAC,CAAC,CAgCzD;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAsB,oBAAoB,CACxC,IAAI,EAAE,cAAc,EACpB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,YAAY,GACpB,OAAO,CAAC,cAAc,EAAE,CAAC,CAwC3B"}
@@ -1,172 +0,0 @@
1
- import { analyzeCodeSize, batchAnalyzeCodeSize } from './token-counter.js';
2
- export function findSemanticSubdivisions(node, rule) {
3
- if (!node || !rule)
4
- return [];
5
- const subdivisionTypes = rule.subdivisionTypes?.[node.type] || [];
6
- if (subdivisionTypes.length === 0)
7
- return [];
8
- const candidates = [];
9
- function walk(n, depth = 0) {
10
- if (depth > 0 && subdivisionTypes.includes(n.type)) {
11
- candidates.push(n);
12
- return;
13
- }
14
- for (let i = 0; i < n.childCount; i++) {
15
- const child = n.child(i);
16
- if (child) {
17
- walk(child, depth + 1);
18
- }
19
- }
20
- }
21
- walk(node);
22
- return candidates;
23
- }
24
- export function findLastCompleteBoundary(code, maxSize) {
25
- const boundaries = [
26
- { pattern: /\n\s*}\s*$/gm, priority: 1 },
27
- { pattern: /;\s*$/gm, priority: 2 },
28
- { pattern: /\n\s*$/gm, priority: 3 }
29
- ];
30
- for (const boundary of boundaries) {
31
- const matches = [...code.substring(0, maxSize).matchAll(boundary.pattern)];
32
- if (matches.length > 0) {
33
- const lastMatch = matches[matches.length - 1];
34
- return lastMatch.index + lastMatch[0].length;
35
- }
36
- }
37
- return maxSize;
38
- }
39
- export function extractSignature(node, source) {
40
- const code = source.slice(node.startIndex, node.endIndex);
41
- const firstBrace = code.indexOf('{');
42
- if (firstBrace !== -1) {
43
- return code.substring(0, firstBrace).trim() + ' {';
44
- }
45
- return code.split('\n')[0];
46
- }
47
- export function extractLinesBeforeNode(node, source, numLines) {
48
- const beforeCode = source.substring(0, node.startIndex);
49
- const lines = beforeCode.split('\n');
50
- return lines.slice(-numLines).join('\n');
51
- }
52
- export function extractParentContext(node, source) {
53
- return {
54
- signature: extractSignature(node, source),
55
- startLine: getLineNumber(node.startIndex, source),
56
- endLine: getLineNumber(node.endIndex, source)
57
- };
58
- }
59
- export function getLineNumber(byteOffset, source) {
60
- const before = source.substring(0, byteOffset);
61
- return before.split('\n').length;
62
- }
63
- function getSizeLimits(profile) {
64
- if (profile.useTokens && profile.tokenCounter) {
65
- return {
66
- optimal: profile.optimalTokens,
67
- min: profile.minChunkTokens,
68
- max: profile.maxChunkTokens,
69
- overlap: profile.overlapTokens,
70
- unit: 'tokens'
71
- };
72
- }
73
- return {
74
- optimal: profile.optimalChars,
75
- min: profile.minChunkChars,
76
- max: profile.maxChunkChars,
77
- overlap: profile.overlapChars,
78
- unit: 'characters'
79
- };
80
- }
81
- export async function analyzeNodeForChunking(node, source, rule, profile) {
82
- const code = source.slice(node.startIndex, node.endIndex);
83
- const limits = getSizeLimits(profile);
84
- let actualSize;
85
- let method;
86
- if (profile.useTokens && profile.tokenCounter) {
87
- const analysis = await analyzeCodeSize(code, limits, profile.tokenCounter);
88
- actualSize = analysis.size;
89
- method = analysis.method;
90
- }
91
- else {
92
- actualSize = code.length;
93
- method = 'chars';
94
- }
95
- const subdivisionThreshold = limits.max;
96
- return {
97
- isSingleChunk: actualSize <= subdivisionThreshold,
98
- needsSubdivision: actualSize > subdivisionThreshold,
99
- subdivisionCandidates: findSemanticSubdivisions(node, rule),
100
- size: actualSize,
101
- unit: limits.unit,
102
- method,
103
- estimatedSubchunks: Math.ceil(actualSize / limits.optimal)
104
- };
105
- }
106
- export async function batchAnalyzeNodes(nodes, source, rule, profile, isSubdivision = false) {
107
- const codes = nodes.map(node => source.slice(node.startIndex, node.endIndex));
108
- const limits = getSizeLimits(profile);
109
- let analyses;
110
- if (profile.useTokens && profile.tokenCounter) {
111
- analyses = await batchAnalyzeCodeSize(codes, limits, profile.tokenCounter, isSubdivision);
112
- }
113
- else {
114
- analyses = codes.map(code => ({
115
- size: code.length,
116
- decision: code.length < limits.min ? 'too_small'
117
- : code.length > limits.max ? 'too_large'
118
- : code.length <= limits.optimal ? 'optimal'
119
- : 'needs_tokenization',
120
- method: 'chars'
121
- }));
122
- }
123
- return nodes.map((node, i) => {
124
- const analysis = analyses[i];
125
- const subdivisionThreshold = limits.max;
126
- return {
127
- node,
128
- isSingleChunk: analysis.size <= subdivisionThreshold,
129
- needsSubdivision: analysis.size > subdivisionThreshold,
130
- subdivisionCandidates: findSemanticSubdivisions(node, rule),
131
- size: analysis.size,
132
- unit: limits.unit,
133
- method: analysis.method,
134
- estimatedSubchunks: Math.ceil(analysis.size / limits.optimal)
135
- };
136
- });
137
- }
138
- export async function yieldStatementChunks(node, source, maxSize, overlapSize, profile) {
139
- const code = source.slice(node.startIndex, node.endIndex);
140
- const lines = code.split('\n');
141
- const chunks = [];
142
- let currentChunk = [];
143
- let currentSize = 0;
144
- for (const line of lines) {
145
- const lineSize = profile.useTokens && profile.tokenCounter
146
- ? await profile.tokenCounter(line)
147
- : line.length;
148
- if (currentSize + lineSize > maxSize && currentChunk.length > 0) {
149
- chunks.push({
150
- code: currentChunk.join('\n'),
151
- size: currentSize,
152
- unit: profile.useTokens ? 'tokens' : 'characters'
153
- });
154
- const overlapLines = Math.floor(currentChunk.length * 0.2);
155
- currentChunk = currentChunk.slice(-overlapLines);
156
- currentSize = profile.useTokens && profile.tokenCounter
157
- ? await profile.tokenCounter(currentChunk.join('\n'))
158
- : currentChunk.join('\n').length;
159
- }
160
- currentChunk.push(line);
161
- currentSize += lineSize;
162
- }
163
- if (currentChunk.length > 0) {
164
- chunks.push({
165
- code: currentChunk.join('\n'),
166
- size: currentSize,
167
- unit: profile.useTokens ? 'tokens' : 'characters'
168
- });
169
- }
170
- return chunks;
171
- }
172
- //# sourceMappingURL=semantic-chunker.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"semantic-chunker.js","sourceRoot":"","sources":["../../src/chunking/semantic-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,oBAAoB,EAAyB,MAAM,oBAAoB,CAAC;AAiBlG,MAAM,UAAU,wBAAwB,CAAC,IAAoB,EAAE,IAAkB;IAC/E,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAE9B,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAClE,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE7C,MAAM,UAAU,GAAqB,EAAE,CAAC;IAExC,SAAS,IAAI,CAAC,CAAiB,EAAE,KAAK,GAAG,CAAC;QACxC,IAAI,KAAK,GAAG,CAAC,IAAI,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACnB,OAAO;QACT,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACzB,IAAI,KAAK,EAAE,CAAC;gBACV,IAAI,CAAC,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC;IACX,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,IAAY,EAAE,OAAe;IACpE,MAAM,UAAU,GAAG;QACjB,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,EAAE;QACxC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,EAAE;QACnC,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC,EAAE;KACrC,CAAC;IAEF,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,MAAM,OAAO,GAAG,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QAC3E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAC9C,OAAO,SAAS,CAAC,KAAM,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAChD,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,IAAoB,EAAE,MAAc;IACnE,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACtB,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC;IACrD,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAAoB,EAAE,MAAc,EAAE,QAAgB;IAC3F,MAAM,UAAU,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IACxD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACrC,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,IAAoB,EAAE,MAAc;IAKvE,OAAO;QACL,SAAS,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC;QACzC,SAAS,EAAE,aAAa,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QACjD,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC;KAC9C,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,UAAkB,EAAE,MAAc;IAC9D,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;IAC/C,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;AACnC,CAAC;AAED,SAAS,aAAa,CAAC,OAAqB;IAC1C,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC9C,OAAO;YACL,OAAO,EAAE,OAAO,CAAC,aAAa;YAC9B,GAAG,EAAE,OAAO,CAAC,cAAc;YAC3B,GAAG,EAAE,OAAO,CAAC,cAAc;YAC3B,OAAO,EAAE,OAAO,CAAC,aAAa;YAC9B,IAAI,EAAE,QAAQ;SACf,CAAC;IACJ,CAAC;IACD,OAAO;QACL,OAAO,EAAE,OAAO,CAAC,YAAY;QAC7B,GAAG,EAAE,OAAO,CAAC,aAAa;QAC1B,GAAG,EAAE,OAAO,CAAC,aAAa;QAC1B,OAAO,EAAE,OAAO,CAAC,YAAY;QAC7B,IAAI,EAAE,YAAY;KACnB,CAAC;AACJ,CAAC;AAYD,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,IAAoB,EACpB,MAAc,EACd,IAAkB,EAClB,OAAqB;IAErB,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAEtC,IAAI,UAAkB,CAAC;IACvB,IAAI,MAAc,CAAC;IAEnB,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC9C,MAAM,QAAQ,GAAG,MAAM,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;QAC3E,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC;QAC3B,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC3B,CAAC;SAAM,CAAC;QACN,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC;QACzB,MAAM,GAAG,OAAO,CAAC;IACnB,CAAC;IAED,MAAM,oBAAoB,GAAG,MAAM,CAAC,GAAG,CAAC;IAExC,OAAO;QACL,aAAa,EAAE,UAAU,IAAI,oBAAoB;QACjD,gBAAgB,EAAE,UAAU,GAAG,oBAAoB;QACnD,qBAAqB,EAAE,wBAAwB,CAAC,IAAI,EAAE,IAAI,CAAC;QAC3D,IAAI,EAAE,UAAU;QAChB,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM;QACN,kBAAkB,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC;KAC3D,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAuB,EACvB,MAAc,EACd,IAAkB,EAClB,OAAqB,EACrB,aAAa,GAAG,KAAK;IAErB,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC9E,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAEtC,IAAI,QAA4B,CAAC;IACjC,IAAI,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC9C,QAAQ,GAAG,MAAM,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC,CAAC;IAC5F,CAAC;SAAM,CAAC;QACN,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC5B,IAAI,EAAE,IAAI,CAAC,MAAM;YACjB,QAAQ,EAAE,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,WAAoB;gBACnD,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,WAAoB;oBACjD,CAAC,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,SAAkB;wBACpD,CAAC,CAAC,oBAA6B;YACrC,MAAM,EAAE,OAAO;SAChB,CAAC,CAAC,CAAC;IACN,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QAC3B,MAAM,QAAQ,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,oBAAoB,GAAG,MAAM,CAAC,GAAG,CAAC;QACxC,OAAO;YACL,IAAI;YACJ,aAAa,EAAE,QAAQ,CAAC,IAAI,IAAI,oBAAoB;YACpD,gBAAgB,EAAE,QAAQ,CAAC,IAAI,GAAG,oBAAoB;YACtD,qBAAqB,EAAE,wBAAwB,CAAC,IAAI,EAAE,IAAI,CAAC;YAC3D,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,kBAAkB,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC;SAC9D,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAQD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAoB,EACpB,MAAc,EACd,OAAe,EACf,WAAmB,EACnB,OAAqB;IAErB,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAE/B,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY;YACxD,CAAC,CAAC,MAAM,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC;YAClC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;QAEhB,IAAI,WAAW,GAAG,QAAQ,GAAG,OAAO,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChE,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;gBAC7B,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY;aAClD,CAAC,CAAC;YAEH,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;YAC3D,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC;YACjD,WAAW,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY;gBACrD,CAAC,CAAC,MAAM,OAAO,CAAC,YAAY,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrD,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;QACrC,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,WAAW,IAAI,QAAQ,CAAC;IAC1B,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC;YAC7B,IAAI,EAAE,WAAW;YACjB,IAAI,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY;SAClD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -1,28 +0,0 @@
1
- interface TokenCountStats {
2
- totalRequests: number;
3
- cacheHits: number;
4
- charFilterSkips: number;
5
- actualTokenizations: number;
6
- batchTokenizations: number;
7
- cacheHitRate: string;
8
- charFilterRate: string;
9
- tokenizationRate: string;
10
- }
11
- interface SizeLimits {
12
- min: number;
13
- max: number;
14
- optimal: number;
15
- }
16
- type Decision = 'too_small' | 'too_large' | 'needs_tokenization' | 'optimal';
17
- export interface CodeSizeAnalysis {
18
- size: number;
19
- decision: Decision;
20
- method: string;
21
- }
22
- export declare function analyzeCodeSize(code: string, limits: SizeLimits, tokenCounter: (text: string) => number | Promise<number>, allowEstimateForSkip?: boolean): Promise<CodeSizeAnalysis>;
23
- export declare function batchAnalyzeCodeSize(codeSnippets: string[], limits: SizeLimits, tokenCounter: (text: string) => number | Promise<number>, allowEstimateForSkip?: boolean): Promise<CodeSizeAnalysis[]>;
24
- export declare function getTokenCountStats(): TokenCountStats;
25
- export declare function resetTokenCountStats(): void;
26
- export declare function clearTokenCache(): void;
27
- export {};
28
- //# sourceMappingURL=token-counter.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"token-counter.d.ts","sourceRoot":"","sources":["../../src/chunking/token-counter.ts"],"names":[],"mappings":"AAqCA,UAAU,eAAe;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAeD,UAAU,UAAU;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,KAAK,QAAQ,GAAG,WAAW,GAAG,WAAW,GAAG,oBAAoB,GAAG,SAAS,CAAC;AAwF7E,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,wBAAsB,eAAe,CACnC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,UAAU,EAClB,YAAY,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,EACxD,oBAAoB,UAAQ,GAC3B,OAAO,CAAC,gBAAgB,CAAC,CAgC3B;AAED,wBAAsB,oBAAoB,CACxC,YAAY,EAAE,MAAM,EAAE,EACtB,MAAM,EAAE,UAAU,EAClB,YAAY,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,EACxD,oBAAoB,UAAQ,GAC3B,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAiD7B;AAED,wBAAgB,kBAAkB,IAAI,eAAe,CAapD;AAED,wBAAgB,oBAAoB,IAAI,IAAI,CAM3C;AAED,wBAAgB,eAAe,IAAI,IAAI,CAEtC"}
@@ -1,207 +0,0 @@
1
- class LRUCache {
2
- maxSize;
3
- cache = new Map();
4
- constructor(maxSize = 1000) {
5
- this.maxSize = maxSize;
6
- }
7
- get(key) {
8
- if (!this.cache.has(key))
9
- return undefined;
10
- const value = this.cache.get(key);
11
- this.cache.delete(key);
12
- this.cache.set(key, value);
13
- return value;
14
- }
15
- set(key, value) {
16
- if (this.cache.has(key)) {
17
- this.cache.delete(key);
18
- }
19
- this.cache.set(key, value);
20
- if (this.cache.size > this.maxSize) {
21
- const firstKey = this.cache.keys().next().value;
22
- if (firstKey !== undefined) {
23
- this.cache.delete(firstKey);
24
- }
25
- }
26
- }
27
- clear() {
28
- this.cache.clear();
29
- }
30
- }
31
- const tokenCountCache = new LRUCache(1000);
32
- const stats = {
33
- totalRequests: 0,
34
- cacheHits: 0,
35
- charFilterSkips: 0,
36
- actualTokenizations: 0,
37
- batchTokenizations: 0
38
- };
39
- function estimateTokensFromChars(charCount) {
40
- return Math.ceil(charCount / 4);
41
- }
42
- function preFilterByChars(code, limits) {
43
- const charCount = code.length;
44
- const estimatedTokens = estimateTokensFromChars(charCount);
45
- const minEstimate = limits.min * 0.8;
46
- const maxEstimate = limits.max * 1.2;
47
- const optimalLow = limits.optimal * 0.8;
48
- const optimalHigh = limits.optimal * 1.2;
49
- if (estimatedTokens < minEstimate) {
50
- return { decision: 'too_small', estimate: estimatedTokens };
51
- }
52
- if (estimatedTokens > maxEstimate) {
53
- return { decision: 'too_large', estimate: estimatedTokens };
54
- }
55
- if (estimatedTokens >= optimalLow && estimatedTokens <= optimalHigh) {
56
- return { decision: 'optimal', estimate: estimatedTokens };
57
- }
58
- return { decision: 'needs_tokenization', estimate: estimatedTokens };
59
- }
60
- async function countTokensWithCache(code, tokenCounter) {
61
- stats.totalRequests++;
62
- const cached = tokenCountCache.get(code);
63
- if (cached !== undefined) {
64
- stats.cacheHits++;
65
- return cached;
66
- }
67
- stats.actualTokenizations++;
68
- const result = tokenCounter(code);
69
- const count = result instanceof Promise ? await result : result;
70
- tokenCountCache.set(code, count);
71
- return count;
72
- }
73
- async function batchCountTokens(codeSnippets, tokenCounter) {
74
- stats.batchTokenizations++;
75
- const results = [];
76
- const uncached = [];
77
- const uncachedIndices = [];
78
- for (let i = 0; i < codeSnippets.length; i++) {
79
- const code = codeSnippets[i];
80
- const cached = tokenCountCache.get(code);
81
- if (cached !== undefined) {
82
- stats.cacheHits++;
83
- results[i] = cached;
84
- }
85
- else {
86
- uncached.push(code);
87
- uncachedIndices.push(i);
88
- }
89
- }
90
- if (uncached.length > 0) {
91
- stats.actualTokenizations += uncached.length;
92
- const counts = await Promise.all(uncached.map(async (code) => {
93
- const result = tokenCounter(code);
94
- return result instanceof Promise ? await result : result;
95
- }));
96
- for (let i = 0; i < counts.length; i++) {
97
- const code = uncached[i];
98
- const count = counts[i];
99
- tokenCountCache.set(code, count);
100
- results[uncachedIndices[i]] = count;
101
- }
102
- }
103
- return results;
104
- }
105
- export async function analyzeCodeSize(code, limits, tokenCounter, allowEstimateForSkip = false) {
106
- stats.totalRequests++;
107
- const preFilter = preFilterByChars(code, limits);
108
- if (allowEstimateForSkip && preFilter.decision === 'too_large') {
109
- stats.charFilterSkips++;
110
- return {
111
- size: preFilter.estimate,
112
- decision: preFilter.decision,
113
- method: 'char_estimate'
114
- };
115
- }
116
- const actualSize = await countTokensWithCache(code, tokenCounter);
117
- let decision;
118
- if (actualSize < limits.min) {
119
- decision = 'too_small';
120
- }
121
- else if (actualSize > limits.max) {
122
- decision = 'too_large';
123
- }
124
- else if (actualSize <= limits.optimal) {
125
- decision = 'optimal';
126
- }
127
- else {
128
- decision = 'needs_tokenization';
129
- }
130
- return {
131
- size: actualSize,
132
- decision,
133
- method: 'tokenized'
134
- };
135
- }
136
- export async function batchAnalyzeCodeSize(codeSnippets, limits, tokenCounter, allowEstimateForSkip = false) {
137
- const results = [];
138
- const needsTokenization = [];
139
- const needsTokenizationIndices = [];
140
- for (let i = 0; i < codeSnippets.length; i++) {
141
- const code = codeSnippets[i];
142
- const preFilter = preFilterByChars(code, limits);
143
- if (allowEstimateForSkip && preFilter.decision === 'too_large') {
144
- stats.charFilterSkips++;
145
- results[i] = {
146
- size: preFilter.estimate,
147
- decision: preFilter.decision,
148
- method: 'char_estimate'
149
- };
150
- }
151
- else {
152
- needsTokenization.push(code);
153
- needsTokenizationIndices.push(i);
154
- }
155
- }
156
- if (needsTokenization.length > 0) {
157
- const tokenCounts = await batchCountTokens(needsTokenization, tokenCounter);
158
- for (let i = 0; i < tokenCounts.length; i++) {
159
- const actualSize = tokenCounts[i];
160
- const idx = needsTokenizationIndices[i];
161
- let decision;
162
- if (actualSize < limits.min) {
163
- decision = 'too_small';
164
- }
165
- else if (actualSize > limits.max) {
166
- decision = 'too_large';
167
- }
168
- else if (actualSize <= limits.optimal) {
169
- decision = 'optimal';
170
- }
171
- else {
172
- decision = 'needs_tokenization';
173
- }
174
- results[idx] = {
175
- size: actualSize,
176
- decision,
177
- method: 'tokenized'
178
- };
179
- }
180
- }
181
- return results;
182
- }
183
- export function getTokenCountStats() {
184
- return {
185
- ...stats,
186
- cacheHitRate: stats.totalRequests > 0
187
- ? (stats.cacheHits / stats.totalRequests * 100).toFixed(1) + '%'
188
- : '0%',
189
- charFilterRate: stats.totalRequests > 0
190
- ? (stats.charFilterSkips / stats.totalRequests * 100).toFixed(1) + '%'
191
- : '0%',
192
- tokenizationRate: stats.totalRequests > 0
193
- ? (stats.actualTokenizations / stats.totalRequests * 100).toFixed(1) + '%'
194
- : '0%'
195
- };
196
- }
197
- export function resetTokenCountStats() {
198
- stats.totalRequests = 0;
199
- stats.cacheHits = 0;
200
- stats.charFilterSkips = 0;
201
- stats.actualTokenizations = 0;
202
- stats.batchTokenizations = 0;
203
- }
204
- export function clearTokenCache() {
205
- tokenCountCache.clear();
206
- }
207
- //# sourceMappingURL=token-counter.js.map