@rws-framework/ai-tools 3.9.1 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@rws-framework/ai-tools",
3
3
  "private": false,
4
- "version": "3.9.1",
4
+ "version": "3.10.0",
5
5
  "description": "",
6
6
  "main": "src/index.ts",
7
7
  "scripts": {},
@@ -87,14 +87,16 @@ export class LangChainEmbeddingService {
87
87
  this.ensureInitialized();
88
88
 
89
89
  if (this.config.rateLimiting) {
90
+ let doneFiles = 0;
90
91
  return await this.rateLimitingService.executeWithRateLimit(
91
92
  docs,
92
93
  async (batch: Document[]) => {
93
94
  const embeddings = await this.embeddings.embedDocuments(batch.map(d => d.pageContent));
94
95
 
95
96
  if(batchCallback){
96
- const fragments = batch.map(d => d.pageContent);
97
- const percentage = (batch.length / docs.length) * 100;
97
+ const fragments = batch.map(d => d.pageContent);
98
+ doneFiles += batch.length;
99
+ const percentage = (doneFiles / docs.length) * 100;
98
100
  await batchCallback(fragments, embeddings, percentage);
99
101
  }
100
102
 
@@ -109,14 +111,15 @@ export class LangChainEmbeddingService {
109
111
 
110
112
  async embedTexts(texts: string[], batchCallback?: (fragments:string[], batch: number[][], percentage: number) => Promise<void>): Promise<number[][]> {
111
113
  this.ensureInitialized();
112
-
114
+ let doneTexts = 0;
113
115
  if (this.config.rateLimiting) {
114
116
  return await this.rateLimitingService.executeWithRateLimit(
115
117
  texts,
116
118
  async (batch: string[]) => {
117
119
  const embeddings = await this.embeddings.embedDocuments(batch);
118
120
  if (batchCallback) {
119
- const percentage = (batch.length / texts.length) * 100;
121
+ doneTexts += batch.length;
122
+ const percentage = (doneTexts / texts.length) * 100;
120
123
  await batchCallback(batch, embeddings, percentage);
121
124
  }
122
125
  return embeddings;
@@ -165,16 +168,7 @@ export class LangChainEmbeddingService {
165
168
  return TextChunker.chunkText(text, maxTokens, overlap, separators);
166
169
  }
167
170
 
168
- async chunkCSV(rows: Record<string, any>[], ragOverride?: IChunkConfig): Promise<Document[]> {
169
- // Use safe token limits - the TextChunker handles token estimation internally
170
- const maxTokens = ragOverride ? ragOverride.chunkSize : (this.chunkConfig?.chunkSize || 450); // Safe token limit for embedding models
171
- const overlap = ragOverride ? ragOverride.chunkOverlap : (this.chunkConfig?.chunkOverlap || 50); // Character overlap, not token
172
-
173
- const splitter = new RecursiveCharacterTextSplitter({
174
- chunkSize: maxTokens,
175
- chunkOverlap: overlap
176
- });
177
-
171
+ async chunkCSV(rows: Record<string, any>[], ragOverride?: IChunkConfig): Promise<Document[]> {
178
172
  const docs = rows.map((row, i) => {
179
173
  const text = Object.entries(row)
180
174
  .map(([k, v]) => `${k}: ${v}`)
@@ -186,7 +180,7 @@ export class LangChainEmbeddingService {
186
180
  });
187
181
  });
188
182
 
189
- return await splitter.splitDocuments(docs);
183
+ return docs;
190
184
  }
191
185
 
192
186
  /**