@pleaseai/context-please-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/LICENSE +24 -0
  2. package/README.md +287 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/context.d.ts +276 -0
  5. package/dist/context.d.ts.map +1 -0
  6. package/dist/context.js +1072 -0
  7. package/dist/context.js.map +1 -0
  8. package/dist/embedding/base-embedding.d.ts +51 -0
  9. package/dist/embedding/base-embedding.d.ts.map +1 -0
  10. package/dist/embedding/base-embedding.js +36 -0
  11. package/dist/embedding/base-embedding.js.map +1 -0
  12. package/dist/embedding/gemini-embedding.d.ts +53 -0
  13. package/dist/embedding/gemini-embedding.d.ts.map +1 -0
  14. package/dist/embedding/gemini-embedding.js +152 -0
  15. package/dist/embedding/gemini-embedding.js.map +1 -0
  16. package/dist/embedding/index.d.ts +6 -0
  17. package/dist/embedding/index.d.ts.map +1 -0
  18. package/dist/embedding/index.js +24 -0
  19. package/dist/embedding/index.js.map +1 -0
  20. package/dist/embedding/ollama-embedding.d.ts +55 -0
  21. package/dist/embedding/ollama-embedding.d.ts.map +1 -0
  22. package/dist/embedding/ollama-embedding.js +192 -0
  23. package/dist/embedding/ollama-embedding.js.map +1 -0
  24. package/dist/embedding/openai-embedding.d.ts +36 -0
  25. package/dist/embedding/openai-embedding.d.ts.map +1 -0
  26. package/dist/embedding/openai-embedding.js +159 -0
  27. package/dist/embedding/openai-embedding.js.map +1 -0
  28. package/dist/embedding/voyageai-embedding.d.ts +44 -0
  29. package/dist/embedding/voyageai-embedding.d.ts.map +1 -0
  30. package/dist/embedding/voyageai-embedding.js +227 -0
  31. package/dist/embedding/voyageai-embedding.js.map +1 -0
  32. package/dist/index.d.ts +8 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +24 -0
  35. package/dist/index.js.map +1 -0
  36. package/dist/splitter/ast-splitter.d.ts +22 -0
  37. package/dist/splitter/ast-splitter.d.ts.map +1 -0
  38. package/dist/splitter/ast-splitter.js +234 -0
  39. package/dist/splitter/ast-splitter.js.map +1 -0
  40. package/dist/splitter/index.d.ts +41 -0
  41. package/dist/splitter/index.d.ts.map +1 -0
  42. package/dist/splitter/index.js +27 -0
  43. package/dist/splitter/index.js.map +1 -0
  44. package/dist/splitter/langchain-splitter.d.ts +13 -0
  45. package/dist/splitter/langchain-splitter.d.ts.map +1 -0
  46. package/dist/splitter/langchain-splitter.js +118 -0
  47. package/dist/splitter/langchain-splitter.js.map +1 -0
  48. package/dist/sync/merkle.d.ts +26 -0
  49. package/dist/sync/merkle.d.ts.map +1 -0
  50. package/dist/sync/merkle.js +112 -0
  51. package/dist/sync/merkle.js.map +1 -0
  52. package/dist/sync/synchronizer.d.ts +30 -0
  53. package/dist/sync/synchronizer.d.ts.map +1 -0
  54. package/dist/sync/synchronizer.js +339 -0
  55. package/dist/sync/synchronizer.js.map +1 -0
  56. package/dist/types.d.ts +14 -0
  57. package/dist/types.d.ts.map +1 -0
  58. package/dist/types.js +3 -0
  59. package/dist/types.js.map +1 -0
  60. package/dist/utils/env-manager.d.ts +19 -0
  61. package/dist/utils/env-manager.d.ts.map +1 -0
  62. package/dist/utils/env-manager.js +125 -0
  63. package/dist/utils/env-manager.js.map +1 -0
  64. package/dist/utils/index.d.ts +2 -0
  65. package/dist/utils/index.d.ts.map +1 -0
  66. package/dist/utils/index.js +7 -0
  67. package/dist/utils/index.js.map +1 -0
  68. package/dist/vectordb/base/base-vector-database.d.ts +58 -0
  69. package/dist/vectordb/base/base-vector-database.d.ts.map +1 -0
  70. package/dist/vectordb/base/base-vector-database.js +32 -0
  71. package/dist/vectordb/base/base-vector-database.js.map +1 -0
  72. package/dist/vectordb/factory.d.ts +80 -0
  73. package/dist/vectordb/factory.d.ts.map +1 -0
  74. package/dist/vectordb/factory.js +89 -0
  75. package/dist/vectordb/factory.js.map +1 -0
  76. package/dist/vectordb/index.d.ts +12 -0
  77. package/dist/vectordb/index.d.ts.map +1 -0
  78. package/dist/vectordb/index.js +27 -0
  79. package/dist/vectordb/index.js.map +1 -0
  80. package/dist/vectordb/milvus-restful-vectordb.d.ts +75 -0
  81. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +1 -0
  82. package/dist/vectordb/milvus-restful-vectordb.js +707 -0
  83. package/dist/vectordb/milvus-restful-vectordb.js.map +1 -0
  84. package/dist/vectordb/milvus-vectordb.d.ts +59 -0
  85. package/dist/vectordb/milvus-vectordb.d.ts.map +1 -0
  86. package/dist/vectordb/milvus-vectordb.js +641 -0
  87. package/dist/vectordb/milvus-vectordb.js.map +1 -0
  88. package/dist/vectordb/qdrant-vectordb.d.ts +124 -0
  89. package/dist/vectordb/qdrant-vectordb.d.ts.map +1 -0
  90. package/dist/vectordb/qdrant-vectordb.js +582 -0
  91. package/dist/vectordb/qdrant-vectordb.js.map +1 -0
  92. package/dist/vectordb/sparse/index.d.ts +4 -0
  93. package/dist/vectordb/sparse/index.d.ts.map +1 -0
  94. package/dist/vectordb/sparse/index.js +23 -0
  95. package/dist/vectordb/sparse/index.js.map +1 -0
  96. package/dist/vectordb/sparse/simple-bm25.d.ts +104 -0
  97. package/dist/vectordb/sparse/simple-bm25.d.ts.map +1 -0
  98. package/dist/vectordb/sparse/simple-bm25.js +189 -0
  99. package/dist/vectordb/sparse/simple-bm25.js.map +1 -0
  100. package/dist/vectordb/sparse/sparse-vector-generator.d.ts +54 -0
  101. package/dist/vectordb/sparse/sparse-vector-generator.d.ts.map +1 -0
  102. package/dist/vectordb/sparse/sparse-vector-generator.js +3 -0
  103. package/dist/vectordb/sparse/sparse-vector-generator.js.map +1 -0
  104. package/dist/vectordb/sparse/types.d.ts +38 -0
  105. package/dist/vectordb/sparse/types.d.ts.map +1 -0
  106. package/dist/vectordb/sparse/types.js +3 -0
  107. package/dist/vectordb/sparse/types.js.map +1 -0
  108. package/dist/vectordb/types.d.ts +120 -0
  109. package/dist/vectordb/types.d.ts.map +1 -0
  110. package/dist/vectordb/types.js +9 -0
  111. package/dist/vectordb/types.js.map +1 -0
  112. package/dist/vectordb/zilliz-utils.d.ts +135 -0
  113. package/dist/vectordb/zilliz-utils.d.ts.map +1 -0
  114. package/dist/vectordb/zilliz-utils.js +192 -0
  115. package/dist/vectordb/zilliz-utils.js.map +1 -0
  116. package/package.json +61 -0
@@ -0,0 +1,227 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.VoyageAIEmbedding = void 0;
4
+ const voyageai_1 = require("voyageai");
5
+ const base_embedding_1 = require("./base-embedding");
6
+ class VoyageAIEmbedding extends base_embedding_1.Embedding {
7
+ constructor(config) {
8
+ super();
9
+ this.dimension = 1024; // Default dimension for voyage-code-3
10
+ this.inputType = 'document';
11
+ this.maxTokens = 32000; // Default max tokens
12
+ this.config = config;
13
+ this.client = new voyageai_1.VoyageAIClient({
14
+ apiKey: config.apiKey,
15
+ });
16
+ // Set dimension and context length based on different models
17
+ this.updateModelSettings(config.model || 'voyage-code-3');
18
+ }
19
+ updateModelSettings(model) {
20
+ const supportedModels = VoyageAIEmbedding.getSupportedModels();
21
+ const modelInfo = supportedModels[model];
22
+ if (modelInfo) {
23
+ // If dimension is a string (indicating variable dimension), use default value 1024
24
+ if (typeof modelInfo.dimension === 'string') {
25
+ this.dimension = 1024; // Default dimension
26
+ }
27
+ else {
28
+ this.dimension = modelInfo.dimension;
29
+ }
30
+ // Set max tokens based on model's context length
31
+ this.maxTokens = modelInfo.contextLength;
32
+ }
33
+ else {
34
+ // Use default dimension and context length for unknown models
35
+ this.dimension = 1024;
36
+ this.maxTokens = 32000;
37
+ }
38
+ }
39
+ updateDimensionForModel(model) {
40
+ const supportedModels = VoyageAIEmbedding.getSupportedModels();
41
+ const modelInfo = supportedModels[model];
42
+ if (modelInfo) {
43
+ // If dimension is a string (indicating variable dimension), use default value 1024
44
+ if (typeof modelInfo.dimension === 'string') {
45
+ this.dimension = 1024; // Default dimension
46
+ }
47
+ else {
48
+ this.dimension = modelInfo.dimension;
49
+ }
50
+ }
51
+ else {
52
+ // Use default dimension for unknown models
53
+ this.dimension = 1024;
54
+ }
55
+ }
56
+ async detectDimension() {
57
+ // VoyageAI doesn't need dynamic detection, return configured dimension
58
+ return this.dimension;
59
+ }
60
+ async embed(text) {
61
+ const processedText = this.preprocessText(text);
62
+ const model = this.config.model || 'voyage-code-3';
63
+ const response = await this.client.embed({
64
+ input: processedText,
65
+ model: model,
66
+ inputType: this.inputType,
67
+ });
68
+ if (!response.data || !response.data[0] || !response.data[0].embedding) {
69
+ throw new Error('VoyageAI API returned invalid response');
70
+ }
71
+ return {
72
+ vector: response.data[0].embedding,
73
+ dimension: this.dimension
74
+ };
75
+ }
76
+ async embedBatch(texts) {
77
+ const processedTexts = this.preprocessTexts(texts);
78
+ const model = this.config.model || 'voyage-code-3';
79
+ const response = await this.client.embed({
80
+ input: processedTexts,
81
+ model: model,
82
+ inputType: this.inputType,
83
+ });
84
+ if (!response.data) {
85
+ throw new Error('VoyageAI API returned invalid response');
86
+ }
87
+ return response.data.map((item) => {
88
+ if (!item.embedding) {
89
+ throw new Error('VoyageAI API returned invalid embedding data');
90
+ }
91
+ return {
92
+ vector: item.embedding,
93
+ dimension: this.dimension
94
+ };
95
+ });
96
+ }
97
+ getDimension() {
98
+ return this.dimension;
99
+ }
100
+ getProvider() {
101
+ return 'VoyageAI';
102
+ }
103
+ /**
104
+ * Set model type
105
+ * @param model Model name
106
+ */
107
+ setModel(model) {
108
+ this.config.model = model;
109
+ this.updateModelSettings(model);
110
+ }
111
+ /**
112
+ * Set input type (VoyageAI specific feature)
113
+ * @param inputType Input type: 'document' | 'query'
114
+ */
115
+ setInputType(inputType) {
116
+ this.inputType = inputType;
117
+ }
118
+ /**
119
+ * Get client instance (for advanced usage)
120
+ */
121
+ getClient() {
122
+ return this.client;
123
+ }
124
+ /**
125
+ * Get list of supported models
126
+ */
127
+ static getSupportedModels() {
128
+ return {
129
+ // Latest recommended models
130
+ 'voyage-3-large': {
131
+ dimension: '1024 (default), 256, 512, 2048',
132
+ contextLength: 32000,
133
+ description: 'The best general-purpose and multilingual retrieval quality'
134
+ },
135
+ 'voyage-3.5': {
136
+ dimension: '1024 (default), 256, 512, 2048',
137
+ contextLength: 32000,
138
+ description: 'Optimized for general-purpose and multilingual retrieval quality'
139
+ },
140
+ 'voyage-3.5-lite': {
141
+ dimension: '1024 (default), 256, 512, 2048',
142
+ contextLength: 32000,
143
+ description: 'Optimized for latency and cost'
144
+ },
145
+ 'voyage-code-3': {
146
+ dimension: '1024 (default), 256, 512, 2048',
147
+ contextLength: 32000,
148
+ description: 'Optimized for code retrieval (recommended for code)'
149
+ },
150
+ // Professional domain models
151
+ 'voyage-finance-2': {
152
+ dimension: 1024,
153
+ contextLength: 32000,
154
+ description: 'Optimized for finance retrieval and RAG'
155
+ },
156
+ 'voyage-law-2': {
157
+ dimension: 1024,
158
+ contextLength: 16000,
159
+ description: 'Optimized for legal retrieval and RAG'
160
+ },
161
+ 'voyage-multilingual-2': {
162
+ dimension: 1024,
163
+ contextLength: 32000,
164
+ description: 'Legacy: Use voyage-3.5 for multilingual tasks'
165
+ },
166
+ 'voyage-large-2-instruct': {
167
+ dimension: 1024,
168
+ contextLength: 16000,
169
+ description: 'Legacy: Use voyage-3.5 instead'
170
+ },
171
+ // Legacy models
172
+ 'voyage-large-2': {
173
+ dimension: 1536,
174
+ contextLength: 16000,
175
+ description: 'Legacy: Use voyage-3.5 instead'
176
+ },
177
+ 'voyage-code-2': {
178
+ dimension: 1536,
179
+ contextLength: 16000,
180
+ description: 'Previous generation of code embeddings'
181
+ },
182
+ 'voyage-3': {
183
+ dimension: 1024,
184
+ contextLength: 32000,
185
+ description: 'Legacy: Use voyage-3.5 instead'
186
+ },
187
+ 'voyage-3-lite': {
188
+ dimension: 512,
189
+ contextLength: 32000,
190
+ description: 'Legacy: Use voyage-3.5-lite instead'
191
+ },
192
+ 'voyage-2': {
193
+ dimension: 1024,
194
+ contextLength: 4000,
195
+ description: 'Legacy: Use voyage-3.5-lite instead'
196
+ },
197
+ // Other legacy models
198
+ 'voyage-02': {
199
+ dimension: 1024,
200
+ contextLength: 4000,
201
+ description: 'Legacy model'
202
+ },
203
+ 'voyage-01': {
204
+ dimension: 1024,
205
+ contextLength: 4000,
206
+ description: 'Legacy model'
207
+ },
208
+ 'voyage-lite-01': {
209
+ dimension: 1024,
210
+ contextLength: 4000,
211
+ description: 'Legacy model'
212
+ },
213
+ 'voyage-lite-01-instruct': {
214
+ dimension: 1024,
215
+ contextLength: 4000,
216
+ description: 'Legacy model'
217
+ },
218
+ 'voyage-lite-02-instruct': {
219
+ dimension: 1024,
220
+ contextLength: 4000,
221
+ description: 'Legacy model'
222
+ }
223
+ };
224
+ }
225
+ }
226
+ exports.VoyageAIEmbedding = VoyageAIEmbedding;
227
+ //# sourceMappingURL=voyageai-embedding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"voyageai-embedding.js","sourceRoot":"","sources":["../../src/embedding/voyageai-embedding.ts"],"names":[],"mappings":";;;AAAA,uCAA0C;AAC1C,qDAA8D;AAO9D,MAAa,iBAAkB,SAAQ,0BAAS;IAO5C,YAAY,MAA+B;QACvC,KAAK,EAAE,CAAC;QALJ,cAAS,GAAW,IAAI,CAAC,CAAC,sCAAsC;QAChE,cAAS,GAAyB,UAAU,CAAC;QAC3C,cAAS,GAAW,KAAK,CAAC,CAAC,qBAAqB;QAItD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,IAAI,yBAAc,CAAC;YAC7B,MAAM,EAAE,MAAM,CAAC,MAAM;SACxB,CAAC,CAAC;QAEH,6DAA6D;QAC7D,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,KAAK,IAAI,eAAe,CAAC,CAAC;IAC9D,CAAC;IAEO,mBAAmB,CAAC,KAAa;QACrC,MAAM,eAAe,GAAG,iBAAiB,CAAC,kBAAkB,EAAE,CAAC;QAC/D,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAEzC,IAAI,SAAS,EAAE,CAAC;YACZ,mFAAmF;YACnF,IAAI,OAAO,SAAS,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;gBAC1C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,oBAAoB;YAC/C,CAAC;iBAAM,CAAC;gBACJ,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,SAAS,CAAC;YACzC,CAAC;YACD,iDAAiD;YACjD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,aAAa,CAAC;QAC7C,CAAC;aAAM,CAAC;YACJ,8DAA8D;YAC9D,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YACtB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QAC3B,CAAC;IACL,CAAC;IAEO,uBAAuB,CAAC,KAAa;QACzC,MAAM,eAAe,GAAG,iBAAiB,CAAC,kBAAkB,EAAE,CAAC;QAC/D,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAEzC,IAAI,SAAS,EAAE,CAAC;YACZ,mFAAmF;YACnF,IAAI,OAAO,SAAS,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;gBAC1C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,oBAAoB;YAC/C,CAAC;iBAAM,CAAC;gBACJ,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,SAAS,CAAC;YACzC,CAAC;QACL,CAAC;aAAM,CAAC;YACJ,2CAA2C;YAC3C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAC1B,CAAC;IACL,CAAC;IAED,KAAK,CAAC,eAAe;QACjB,uEAAuE;QACvE,OAAO,IAAI,CAAC,SAAS,CAAC;IAC1B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACpB,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,eAAe,CAAC;QAEnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;YACrC,KAAK,EAAE,aAAa;YACpB,KAAK,EAAE,KAAK;YACZ,SAAS,EAAE,IAAI,CAAC,SAAS;SAC5B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;YACrE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;QAC9D,CAAC;QAED,OAAO;YACH,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YAClC,SAAS,EAAE,IAAI,CAAC,SAAS;SAC5B,CAAC;IACN,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC5B,MAAM,cAAc,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,eAAe,CAAC;QAEnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;YACrC,KAAK,EAAE,cAAc;YACrB,KAAK,EAAE,KAAK;YACZ,SAAS,EAAE,IAAI,CAAC,SAAS;SAC5B,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;QAC9D,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YAC9B,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;YACpE,CAAC;YACD,OAAO;gBACH,MAAM,EAAE,IAAI,CAAC,SAAS;gBACtB,SAAS,EAAE,IAAI,CAAC,SAAS;aAC5B,CAAC;QACN,CAAC,CAAC,CAAC;IACP,CAAC;IAED,YAAY;QACR,OAAO,IAAI,CAAC,SAAS,CAAC;IAC1B,CAAC;IAED,WAAW;QACP,OAAO,UAAU,CAAC;IACtB,CAAC;IAED;;;OAGG;IACH,QAAQ,CAAC,KAAa;QAClB,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;QAC1B,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC;IAED;;;OAGG;IACH,YAAY,CAAC,SAA+B;QACxC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,SAAS;QACL,OAAO,IAAI,CAAC,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,kBAAkB;QACrB,OAAO;YACH,4BAA4B;YAC5B,gBAAgB,EAAE;gBACd,SAAS,EAAE,gCAAgC;gBAC3C,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,6DAA6D;aAC7E;YACD,YAAY,EAAE;gBACV,SAAS,EAAE,gCAAgC;gBAC3C,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,kEAAkE;aAClF;YACD,iBAAiB,EAAE;gBACf,SAAS,EAAE,gCAAgC;gBAC3C,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,gCAAgC;aAChD;YACD,eAAe,EAAE;gBACb,SAAS,EAAE,gCAAgC;gBAC3C,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,qDAAqD;aACrE;YACD,6BAA6B;YAC7B,kBAAkB,EAAE;gBAChB,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,yCAAyC;aACzD;YACD,cAAc,EAAE;gBACZ,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,uCAAuC;aACvD;YACD,uBAAuB,EAAE;gBACrB,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,+CAA+C;aAC/D;YACD,yBAAyB,EAAE;gBACvB,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,gCAAgC;aAChD;YACD,gBAAgB;YAChB,gBAAgB,EAAE;gBACd,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,gCAAgC;aAChD;YACD,eAAe,EAAE;gBACb,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,wCAAwC;aACxD;YACD,UAAU,EAAE;gBACR,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,gCAAgC;aAChD;YACD,eAAe,EAAE;gBACb,SAAS,EAAE,GAAG;gBACd,aAAa,EAAE,KAAK;gBACpB,WAAW,EAAE,qCAAqC;aACrD;YACD,UAAU,EAAE;gBACR,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,qCAAqC;aACrD;YACD,sBAAsB;YACtB,WAAW,EAAE;gBACT,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,cAAc;aAC9B;YACD,WAAW,EAAE;gBACT,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,cAAc;aAC9B;YACD,gBAAgB,EAAE;gBACd,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,cAAc;aAC9B;YACD,yBAAyB,EAAE;gBACvB,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,cAAc;aAC9B;YACD,yBAAyB,EAAE;gBACvB,SAAS,EAAE,IAAI;gBACf,aAAa,EAAE,IAAI;gBACnB,WAAW,EAAE,cAAc;aAC9B;SACJ,CAAC;IACN,CAAC;CACJ;AA9OD,8CA8OC"}
@@ -0,0 +1,8 @@
1
+ export * from './splitter';
2
+ export * from './embedding';
3
+ export * from './vectordb';
4
+ export * from './types';
5
+ export * from './context';
6
+ export * from './sync/synchronizer';
7
+ export * from './utils';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,aAAa,CAAC;AAC5B,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC;AACxB,cAAc,WAAW,CAAC;AAC1B,cAAc,qBAAqB,CAAC;AACpC,cAAc,SAAS,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,24 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./splitter"), exports);
18
+ __exportStar(require("./embedding"), exports);
19
+ __exportStar(require("./vectordb"), exports);
20
+ __exportStar(require("./types"), exports);
21
+ __exportStar(require("./context"), exports);
22
+ __exportStar(require("./sync/synchronizer"), exports);
23
+ __exportStar(require("./utils"), exports);
24
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,6CAA2B;AAC3B,8CAA4B;AAC5B,6CAA2B;AAC3B,0CAAwB;AACxB,4CAA0B;AAC1B,sDAAoC;AACpC,0CAAwB"}
@@ -0,0 +1,22 @@
1
+ import { Splitter, CodeChunk } from './index';
2
+ export declare class AstCodeSplitter implements Splitter {
3
+ private chunkSize;
4
+ private chunkOverlap;
5
+ private parser;
6
+ private langchainFallback;
7
+ constructor(chunkSize?: number, chunkOverlap?: number);
8
+ split(code: string, language: string, filePath?: string): Promise<CodeChunk[]>;
9
+ setChunkSize(chunkSize: number): void;
10
+ setChunkOverlap(chunkOverlap: number): void;
11
+ private getLanguageConfig;
12
+ private extractChunks;
13
+ private refineChunks;
14
+ private splitLargeChunk;
15
+ private addOverlap;
16
+ private getLineCount;
17
+ /**
18
+ * Check if AST splitting is supported for the given language
19
+ */
20
+ static isLanguageSupported(language: string): boolean;
21
+ }
22
+ //# sourceMappingURL=ast-splitter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast-splitter.d.ts","sourceRoot":"","sources":["../../src/splitter/ast-splitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AA0B9C,qBAAa,eAAgB,YAAW,QAAQ;IAC5C,OAAO,CAAC,SAAS,CAAgB;IACjC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,iBAAiB,CAAM;gBAEnB,SAAS,CAAC,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM;IAU/C,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAgCpF,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAKrC,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAK3C,OAAO,CAAC,iBAAiB;IAuBzB,OAAO,CAAC,aAAa;YAuDP,YAAY;IAgB1B,OAAO,CAAC,eAAe;IAgDvB,OAAO,CAAC,UAAU;IA4BlB,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,MAAM,CAAC,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;CAOxD"}
@@ -0,0 +1,234 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.AstCodeSplitter = void 0;
7
+ const tree_sitter_1 = __importDefault(require("tree-sitter"));
8
+ // Language parsers
9
+ const JavaScript = require('tree-sitter-javascript');
10
+ const TypeScript = require('tree-sitter-typescript').typescript;
11
+ const Python = require('tree-sitter-python');
12
+ const Java = require('tree-sitter-java');
13
+ const Cpp = require('tree-sitter-cpp');
14
+ const Go = require('tree-sitter-go');
15
+ const Rust = require('tree-sitter-rust');
16
+ const CSharp = require('tree-sitter-c-sharp');
17
+ const Scala = require('tree-sitter-scala');
18
+ // Node types that represent logical code units
19
+ const SPLITTABLE_NODE_TYPES = {
20
+ javascript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement'],
21
+ typescript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement', 'interface_declaration', 'type_alias_declaration'],
22
+ python: ['function_definition', 'class_definition', 'decorated_definition', 'async_function_definition'],
23
+ java: ['method_declaration', 'class_declaration', 'interface_declaration', 'constructor_declaration'],
24
+ cpp: ['function_definition', 'class_specifier', 'namespace_definition', 'declaration'],
25
+ go: ['function_declaration', 'method_declaration', 'type_declaration', 'var_declaration', 'const_declaration'],
26
+ rust: ['function_item', 'impl_item', 'struct_item', 'enum_item', 'trait_item', 'mod_item'],
27
+ csharp: ['method_declaration', 'class_declaration', 'interface_declaration', 'struct_declaration', 'enum_declaration'],
28
+ scala: ['method_declaration', 'class_declaration', 'interface_declaration', 'constructor_declaration']
29
+ };
30
+ class AstCodeSplitter {
31
+ constructor(chunkSize, chunkOverlap) {
32
+ this.chunkSize = 2500;
33
+ this.chunkOverlap = 300;
34
+ if (chunkSize)
35
+ this.chunkSize = chunkSize;
36
+ if (chunkOverlap)
37
+ this.chunkOverlap = chunkOverlap;
38
+ this.parser = new tree_sitter_1.default();
39
+ // Initialize fallback splitter
40
+ const { LangChainCodeSplitter } = require('./langchain-splitter');
41
+ this.langchainFallback = new LangChainCodeSplitter(chunkSize, chunkOverlap);
42
+ }
43
+ async split(code, language, filePath) {
44
+ // Check if language is supported by AST splitter
45
+ const langConfig = this.getLanguageConfig(language);
46
+ if (!langConfig) {
47
+ console.log(`📝 Language ${language} not supported by AST, using LangChain splitter for: ${filePath || 'unknown'}`);
48
+ return await this.langchainFallback.split(code, language, filePath);
49
+ }
50
+ try {
51
+ console.log(`🌳 Using AST splitter for ${language} file: ${filePath || 'unknown'}`);
52
+ this.parser.setLanguage(langConfig.parser);
53
+ const tree = this.parser.parse(code);
54
+ if (!tree.rootNode) {
55
+ console.warn(`[ASTSplitter] ⚠️ Failed to parse AST for ${language}, falling back to LangChain: ${filePath || 'unknown'}`);
56
+ return await this.langchainFallback.split(code, language, filePath);
57
+ }
58
+ // Extract chunks based on AST nodes
59
+ const chunks = this.extractChunks(tree.rootNode, code, langConfig.nodeTypes, language, filePath);
60
+ // If chunks are too large, split them further
61
+ const refinedChunks = await this.refineChunks(chunks, code);
62
+ return refinedChunks;
63
+ }
64
+ catch (error) {
65
+ console.warn(`[ASTSplitter] ⚠️ AST splitter failed for ${language}, falling back to LangChain: ${error}`);
66
+ return await this.langchainFallback.split(code, language, filePath);
67
+ }
68
+ }
69
+ setChunkSize(chunkSize) {
70
+ this.chunkSize = chunkSize;
71
+ this.langchainFallback.setChunkSize(chunkSize);
72
+ }
73
+ setChunkOverlap(chunkOverlap) {
74
+ this.chunkOverlap = chunkOverlap;
75
+ this.langchainFallback.setChunkOverlap(chunkOverlap);
76
+ }
77
+ getLanguageConfig(language) {
78
+ const langMap = {
79
+ 'javascript': { parser: JavaScript, nodeTypes: SPLITTABLE_NODE_TYPES.javascript },
80
+ 'js': { parser: JavaScript, nodeTypes: SPLITTABLE_NODE_TYPES.javascript },
81
+ 'typescript': { parser: TypeScript, nodeTypes: SPLITTABLE_NODE_TYPES.typescript },
82
+ 'ts': { parser: TypeScript, nodeTypes: SPLITTABLE_NODE_TYPES.typescript },
83
+ 'python': { parser: Python, nodeTypes: SPLITTABLE_NODE_TYPES.python },
84
+ 'py': { parser: Python, nodeTypes: SPLITTABLE_NODE_TYPES.python },
85
+ 'java': { parser: Java, nodeTypes: SPLITTABLE_NODE_TYPES.java },
86
+ 'cpp': { parser: Cpp, nodeTypes: SPLITTABLE_NODE_TYPES.cpp },
87
+ 'c++': { parser: Cpp, nodeTypes: SPLITTABLE_NODE_TYPES.cpp },
88
+ 'c': { parser: Cpp, nodeTypes: SPLITTABLE_NODE_TYPES.cpp },
89
+ 'go': { parser: Go, nodeTypes: SPLITTABLE_NODE_TYPES.go },
90
+ 'rust': { parser: Rust, nodeTypes: SPLITTABLE_NODE_TYPES.rust },
91
+ 'rs': { parser: Rust, nodeTypes: SPLITTABLE_NODE_TYPES.rust },
92
+ 'cs': { parser: CSharp, nodeTypes: SPLITTABLE_NODE_TYPES.csharp },
93
+ 'csharp': { parser: CSharp, nodeTypes: SPLITTABLE_NODE_TYPES.csharp },
94
+ 'scala': { parser: Scala, nodeTypes: SPLITTABLE_NODE_TYPES.scala }
95
+ };
96
+ return langMap[language.toLowerCase()] || null;
97
+ }
98
+ extractChunks(node, code, splittableTypes, language, filePath) {
99
+ const chunks = [];
100
+ const codeLines = code.split('\n');
101
+ const traverse = (currentNode) => {
102
+ // Check if this node type should be split into a chunk
103
+ if (splittableTypes.includes(currentNode.type)) {
104
+ const startLine = currentNode.startPosition.row + 1;
105
+ const endLine = currentNode.endPosition.row + 1;
106
+ const nodeText = code.slice(currentNode.startIndex, currentNode.endIndex);
107
+ // Only create chunk if it has meaningful content
108
+ if (nodeText.trim().length > 0) {
109
+ chunks.push({
110
+ content: nodeText,
111
+ metadata: {
112
+ startLine,
113
+ endLine,
114
+ language,
115
+ filePath,
116
+ }
117
+ });
118
+ }
119
+ }
120
+ // Continue traversing child nodes
121
+ for (const child of currentNode.children) {
122
+ traverse(child);
123
+ }
124
+ };
125
+ traverse(node);
126
+ // If no meaningful chunks found, create a single chunk with the entire code
127
+ if (chunks.length === 0) {
128
+ chunks.push({
129
+ content: code,
130
+ metadata: {
131
+ startLine: 1,
132
+ endLine: codeLines.length,
133
+ language,
134
+ filePath,
135
+ }
136
+ });
137
+ }
138
+ return chunks;
139
+ }
140
+ async refineChunks(chunks, originalCode) {
141
+ const refinedChunks = [];
142
+ for (const chunk of chunks) {
143
+ if (chunk.content.length <= this.chunkSize) {
144
+ refinedChunks.push(chunk);
145
+ }
146
+ else {
147
+ // Split large chunks using character-based splitting
148
+ const subChunks = this.splitLargeChunk(chunk, originalCode);
149
+ refinedChunks.push(...subChunks);
150
+ }
151
+ }
152
+ return this.addOverlap(refinedChunks);
153
+ }
154
+ splitLargeChunk(chunk, originalCode) {
155
+ const lines = chunk.content.split('\n');
156
+ const subChunks = [];
157
+ let currentChunk = '';
158
+ let currentStartLine = chunk.metadata.startLine;
159
+ let currentLineCount = 0;
160
+ for (let i = 0; i < lines.length; i++) {
161
+ const line = lines[i];
162
+ const lineWithNewline = i === lines.length - 1 ? line : line + '\n';
163
+ if (currentChunk.length + lineWithNewline.length > this.chunkSize && currentChunk.length > 0) {
164
+ // Create a sub-chunk
165
+ subChunks.push({
166
+ content: currentChunk.trim(),
167
+ metadata: {
168
+ startLine: currentStartLine,
169
+ endLine: currentStartLine + currentLineCount - 1,
170
+ language: chunk.metadata.language,
171
+ filePath: chunk.metadata.filePath,
172
+ }
173
+ });
174
+ currentChunk = lineWithNewline;
175
+ currentStartLine = chunk.metadata.startLine + i;
176
+ currentLineCount = 1;
177
+ }
178
+ else {
179
+ currentChunk += lineWithNewline;
180
+ currentLineCount++;
181
+ }
182
+ }
183
+ // Add the last sub-chunk
184
+ if (currentChunk.trim().length > 0) {
185
+ subChunks.push({
186
+ content: currentChunk.trim(),
187
+ metadata: {
188
+ startLine: currentStartLine,
189
+ endLine: currentStartLine + currentLineCount - 1,
190
+ language: chunk.metadata.language,
191
+ filePath: chunk.metadata.filePath,
192
+ }
193
+ });
194
+ }
195
+ return subChunks;
196
+ }
197
+ addOverlap(chunks) {
198
+ if (chunks.length <= 1 || this.chunkOverlap <= 0) {
199
+ return chunks;
200
+ }
201
+ const overlappedChunks = [];
202
+ for (let i = 0; i < chunks.length; i++) {
203
+ let content = chunks[i].content;
204
+ const metadata = { ...chunks[i].metadata };
205
+ // Add overlap from previous chunk
206
+ if (i > 0 && this.chunkOverlap > 0) {
207
+ const prevChunk = chunks[i - 1];
208
+ const overlapText = prevChunk.content.slice(-this.chunkOverlap);
209
+ content = overlapText + '\n' + content;
210
+ metadata.startLine = Math.max(1, metadata.startLine - this.getLineCount(overlapText));
211
+ }
212
+ overlappedChunks.push({
213
+ content,
214
+ metadata
215
+ });
216
+ }
217
+ return overlappedChunks;
218
+ }
219
+ getLineCount(text) {
220
+ return text.split('\n').length;
221
+ }
222
+ /**
223
+ * Check if AST splitting is supported for the given language
224
+ */
225
+ static isLanguageSupported(language) {
226
+ const supportedLanguages = [
227
+ 'javascript', 'js', 'typescript', 'ts', 'python', 'py',
228
+ 'java', 'cpp', 'c++', 'c', 'go', 'rust', 'rs', 'cs', 'csharp', 'scala'
229
+ ];
230
+ return supportedLanguages.includes(language.toLowerCase());
231
+ }
232
+ }
233
+ exports.AstCodeSplitter = AstCodeSplitter;
234
+ //# sourceMappingURL=ast-splitter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ast-splitter.js","sourceRoot":"","sources":["../../src/splitter/ast-splitter.ts"],"names":[],"mappings":";;;;;;AAAA,8DAAiC;AAGjC,mBAAmB;AACnB,MAAM,UAAU,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC;AACrD,MAAM,UAAU,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC,UAAU,CAAC;AAChE,MAAM,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC;AAC7C,MAAM,IAAI,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;AACzC,MAAM,GAAG,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;AACvC,MAAM,EAAE,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;AACrC,MAAM,IAAI,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;AACzC,MAAM,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;AAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;AAE3C,+CAA+C;AAC/C,MAAM,qBAAqB,GAAG;IAC1B,UAAU,EAAE,CAAC,sBAAsB,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,kBAAkB,CAAC;IACpH,UAAU,EAAE,CAAC,sBAAsB,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,uBAAuB,EAAE,wBAAwB,CAAC;IACvK,MAAM,EAAE,CAAC,qBAAqB,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,2BAA2B,CAAC;IACxG,IAAI,EAAE,CAAC,oBAAoB,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,yBAAyB,CAAC;IACrG,GAAG,EAAE,CAAC,qBAAqB,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,aAAa,CAAC;IACtF,EAAE,EAAE,CAAC,sBAAsB,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,mBAAmB,CAAC;IAC9G,IAAI,EAAE,CAAC,eAAe,EAAE,WAAW,EAAE,aAAa,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,CAAC;IAC1F,MAAM,EAAE,CAAC,oBAAoB,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,oBAAoB,EAAE,kBAAkB,CAAC;IACtH,KAAK,EAAE,CAAC,oBAAoB,EAAE,mBAAmB,EAAE,uBAAuB,EAAE,yBAAyB,CAAC;CACzG,CAAC;AAEF,MAAa,eAAe;IAMxB,YAAY,SAAkB,EAAE,YAAqB;QAL7C,cAAS,GAAW,IAAI,CAAC;QACzB,iBAAY,GAAW,GAAG,CAAC;QAK/B,IAAI,SAAS;YAAE,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC1C,IAAI,YAAY;YAAE,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACnD,IAAI,CAAC,MAAM,GAAG,IAAI,qBAAM,EAAE,CAAC;QAE3B,+BAA+B;QAC/B,MAAM,EAAE,qBAAqB,EAAE,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;QAClE,IAAI,CAAC,iBAAiB,GAAG,IAAI,qBAAqB,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAChF,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,QAAgB,EAAE,QAAiB;QACzD,iDAAiD;QACjD,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,EAAE,CAAC;YACd,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,wDAAwD,QAAQ,IAAI,SAAS,EAAE,CAAC,CAAC;YACpH,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxE,CAAC;QAED,IAAI,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,6BAA6B,QAAQ,UAAU,QAAQ,IAAI,SAAS,EAAE,CAAC,CAAC;YAEpF,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAErC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC,6CAA6C,QAAQ,gCAAgC,QAAQ,IAAI,SAAS,EAAE,CAAC,CAAC;gBAC3H,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;YACxE,CAAC;YAED,oCAAoC;YACpC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,UAAU,CAAC,SAAS,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAEjG,8CAA8C;YAC9C,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAE5D,OAAO,aAAa,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACb,OAAO,CAAC,IAAI,CAAC,6CAA6C,QAAQ,gCAAgC,KAAK,EAAE,CAAC,CAAC;YAC3G,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;QACxE,CAAC;IACL,CAAC;IAED,YAAY,CAAC,SAAiB;QAC1B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,iBAAiB,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACnD,CAAC;IAED,eAAe,CAAC,YAAoB;QAChC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,iBAAiB,CAAC,eAAe,CAAC,YAAY,CAAC,CAAC;IACzD,CAAC;IAEO,iBAAiB,CAAC,QAAgB;QACtC,MAAM,OAAO,GAAyD;YAClE,YAAY,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACjF,IAAI,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACzE,YAAY,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACjF,IAAI,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE;YACzE,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,qBAAqB,CAAC,MAAM,EAAE;YACrE,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,qBAAqB,CAAC,MAAM,EAAE;YACjE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,qBAAqB,CAAC,IAAI,EAAE;YAC/D,KAAK,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,GAAG,EAAE;YAC5D,KAAK,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,GAAG,EAAE;YAC5D,GAAG,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,GAAG,EAAE;YAC1D,IAAI,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,qBAAqB,CAAC,EAAE,EAAE;YACzD,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,qBAAqB,CAAC,IAAI,EAAE;YAC/D,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,qBAAqB,CAAC,IAAI,EAAE;YAC7D,IAAI,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,qBAAqB,CAAC,MAAM,EAAE;YACjE,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,qBAAqB,CAAC,MAAM,EAAE;YACrE,OAAO,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,qBAAqB,CAAC,KAAK,EAAE;SACrE,CAAC;QAEF,OAAO,OAAO,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC;IACnD,CAAC;IAEO,aAAa,CACjB,IAAuB,EACvB,IAAY,EACZ,eAAyB,EACzB,QAAgB,EAChB,QAAiB;QAEjB,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEnC,MAAM,QAAQ,GAAG,CAAC,WAA8B,EAAE,EAAE;YAChD,uDAAuD;YACvD,IAAI,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7C,MAAM,SAAS,GAAG,WAAW,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC;gBACpD,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC,CAAC;gBAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,UAAU,EAAE,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAE1E,iDAAiD;gBACjD,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7B,MAAM,CAAC,IAAI,CAAC;wBACR,OAAO,EAAE,QAAQ;wBACjB,QAAQ,EAAE;4BACN,SAAS;4BACT,OAAO;4BACP,QAAQ;4BACR,QAAQ;yBACX;qBACJ,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;YAED,kCAAkC;YAClC,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,QAAQ,EAAE,CAAC;gBACvC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACpB,CAAC;QACL,CAAC,CAAC;QAEF,QAAQ,CAAC,IAAI,CAAC,CAAC;QAEf,4EAA4E;QAC5E,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,MAAM,CAAC,IAAI,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,QAAQ,EAAE;oBACN,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,SAAS,CAAC,MAAM;oBACzB,QAAQ;oBACR,QAAQ;iBACX;aACJ,CAAC,CAAC;QACP,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,MAAmB,EAAE,YAAoB;QAChE,MAAM,aAAa,GAAgB,EAAE,CAAC;QAEtC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YACzB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACzC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACJ,qDAAqD;gBACrD,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;gBAC5D,aAAa,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;YACrC,CAAC;QACL,CAAC;QAED,OAAO,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;IAC1C,CAAC;IAEO,eAAe,CAAC,KAAgB,EAAE,YAAoB;QAC1D,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,SAAS,GAAgB,EAAE,CAAC;QAClC,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,gBAAgB,GAAG,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;QAChD,IAAI,gBAAgB,GAAG,CAAC,CAAC;QAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,eAAe,GAAG,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC;YAEpE,IAAI,YAAY,CAAC,MAAM,GAAG,eAAe,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3F,qBAAqB;gBACrB,SAAS,CAAC,IAAI,CAAC;oBACX,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;oBAC5B,QAAQ,EAAE;wBACN,SAAS,EAAE,gBAAgB;wBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;wBAChD,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;wBACjC,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;qBACpC;iBACJ,CAAC,CAAC;gBAEH,YAAY,GAAG,eAAe,CAAC;gBAC/B,gBAAgB,GAAG,KAAK,CAAC,QAAQ,CAAC,SAAS,GAAG,CAAC,CAAC;gBAChD,gBAAgB,GAAG,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACJ,YAAY,IAAI,eAAe,CAAC;gBAChC,gBAAgB,EAAE,CAAC;YACvB,CAAC;QACL,CAAC;QAED,yBAAyB;QACzB,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC;gBACX,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE;gBAC5B,QAAQ,EAAE;oBACN,SAAS,EAAE,gBAAgB;oBAC3B,OAAO,EAAE,gBAAgB,GAAG,gBAAgB,GAAG,CAAC;oBAChD,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;oBACjC,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ;iBACpC;aACJ,CAAC,CAAC;QACP,CAAC;QAED,OAAO,SAAS,CAAC;IACrB,CAAC;IAEO,UAAU,CAAC,MAAmB;QAClC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,YAAY,IAAI,CAAC,EAAE,CAAC;YAC/C,OAAO,MAAM,CAAC;QAClB,CAAC;QAED,MAAM,gBAAgB,GAAgB,EAAE,CAAC;QAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAChC,MAAM,QAAQ,GAAG,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YAE3C,kCAAkC;YAClC,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC;gBACjC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBAChC,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAChE,OAAO,GAAG,WAAW,GAAG,IAAI,GAAG,OAAO,CAAC;gBACvC,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAC1F,CAAC;YAED,gBAAgB,CAAC,IAAI,CAAC;gBAClB,OAAO;gBACP,QAAQ;aACX,CAAC,CAAC;QACP,CAAC;QAED,OAAO,gBAAgB,CAAC;IAC5B,CAAC;IAEO,YAAY,CAAC,IAAY;QAC7B,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,mBAAmB,CAAC,QAAgB;QACvC,MAAM,kBAAkB,GAAG;YACvB,YAAY,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI;YACtD,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO;SACzE,CAAC;QACF,OAAO,kBAAkB,CAAC,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;IAC/D,CAAC;CACJ;AAlPD,0CAkPC"}
@@ -0,0 +1,41 @@
1
+ export interface CodeChunk {
2
+ content: string;
3
+ metadata: {
4
+ startLine: number;
5
+ endLine: number;
6
+ language?: string;
7
+ filePath?: string;
8
+ };
9
+ }
10
+ export declare enum SplitterType {
11
+ LANGCHAIN = "langchain",
12
+ AST = "ast"
13
+ }
14
+ export interface SplitterConfig {
15
+ type?: SplitterType;
16
+ chunkSize?: number;
17
+ chunkOverlap?: number;
18
+ }
19
+ export interface Splitter {
20
+ /**
21
+ * Split code into code chunks
22
+ * @param code Code content
23
+ * @param language Programming language
24
+ * @param filePath File path
25
+ * @returns Array of code chunks
26
+ */
27
+ split(code: string, language: string, filePath?: string): Promise<CodeChunk[]>;
28
+ /**
29
+ * Set chunk size
30
+ * @param chunkSize Chunk size
31
+ */
32
+ setChunkSize(chunkSize: number): void;
33
+ /**
34
+ * Set chunk overlap size
35
+ * @param chunkOverlap Chunk overlap size
36
+ */
37
+ setChunkOverlap(chunkOverlap: number): void;
38
+ }
39
+ export * from './langchain-splitter';
40
+ export * from './ast-splitter';
41
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/splitter/index.ts"],"names":[],"mappings":"AACA,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACL;AAGD,oBAAY,YAAY;IACpB,SAAS,cAAc;IACvB,GAAG,QAAQ;CACd;AAGD,MAAM,WAAW,cAAc;IAC3B,IAAI,CAAC,EAAE,YAAY,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACrB;;;;;;OAMG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IAE/E;;;OAGG;IACH,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAEtC;;;OAGG;IACH,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/C;AAGD,cAAc,sBAAsB,CAAC;AACrC,cAAc,gBAAgB,CAAC"}
@@ -0,0 +1,27 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.SplitterType = void 0;
18
+ // Splitter type enumeration
19
+ var SplitterType;
20
+ (function (SplitterType) {
21
+ SplitterType["LANGCHAIN"] = "langchain";
22
+ SplitterType["AST"] = "ast";
23
+ })(SplitterType || (exports.SplitterType = SplitterType = {}));
24
+ // Implementation class exports
25
+ __exportStar(require("./langchain-splitter"), exports);
26
+ __exportStar(require("./ast-splitter"), exports);
27
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/splitter/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAWA,4BAA4B;AAC5B,IAAY,YAGX;AAHD,WAAY,YAAY;IACpB,uCAAuB,CAAA;IACvB,2BAAW,CAAA;AACf,CAAC,EAHW,YAAY,4BAAZ,YAAY,QAGvB;AAgCD,+BAA+B;AAC/B,uDAAqC;AACrC,iDAA+B"}
@@ -0,0 +1,13 @@
1
+ import { Splitter, CodeChunk } from './index';
2
+ export declare class LangChainCodeSplitter implements Splitter {
3
+ private chunkSize;
4
+ private chunkOverlap;
5
+ constructor(chunkSize?: number, chunkOverlap?: number);
6
+ split(code: string, language: string, filePath?: string): Promise<CodeChunk[]>;
7
+ setChunkSize(chunkSize: number): void;
8
+ setChunkOverlap(chunkOverlap: number): void;
9
+ private mapLanguage;
10
+ private fallbackSplit;
11
+ private estimateLines;
12
+ }
13
+ //# sourceMappingURL=langchain-splitter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"langchain-splitter.d.ts","sourceRoot":"","sources":["../../src/splitter/langchain-splitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAK9C,qBAAa,qBAAsB,YAAW,QAAQ;IAClD,OAAO,CAAC,SAAS,CAAgB;IACjC,OAAO,CAAC,YAAY,CAAe;gBAEvB,SAAS,CAAC,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM;IAK/C,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAwCpF,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAIrC,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,IAAI;IAI3C,OAAO,CAAC,WAAW;YA4BL,aAAa;IAuB3B,OAAO,CAAC,aAAa;CAiBxB"}