@rws-framework/ai-tools 3.9.1 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -87,14 +87,16 @@ export class LangChainEmbeddingService {
|
|
|
87
87
|
this.ensureInitialized();
|
|
88
88
|
|
|
89
89
|
if (this.config.rateLimiting) {
|
|
90
|
+
let doneFiles = 0;
|
|
90
91
|
return await this.rateLimitingService.executeWithRateLimit(
|
|
91
92
|
docs,
|
|
92
93
|
async (batch: Document[]) => {
|
|
93
94
|
const embeddings = await this.embeddings.embedDocuments(batch.map(d => d.pageContent));
|
|
94
95
|
|
|
95
96
|
if(batchCallback){
|
|
96
|
-
const fragments = batch.map(d => d.pageContent);
|
|
97
|
-
|
|
97
|
+
const fragments = batch.map(d => d.pageContent);
|
|
98
|
+
doneFiles += batch.length;
|
|
99
|
+
const percentage = (doneFiles / docs.length) * 100;
|
|
98
100
|
await batchCallback(fragments, embeddings, percentage);
|
|
99
101
|
}
|
|
100
102
|
|
|
@@ -109,14 +111,15 @@ export class LangChainEmbeddingService {
|
|
|
109
111
|
|
|
110
112
|
async embedTexts(texts: string[], batchCallback?: (fragments:string[], batch: number[][], percentage: number) => Promise<void>): Promise<number[][]> {
|
|
111
113
|
this.ensureInitialized();
|
|
112
|
-
|
|
114
|
+
let doneTexts = 0;
|
|
113
115
|
if (this.config.rateLimiting) {
|
|
114
116
|
return await this.rateLimitingService.executeWithRateLimit(
|
|
115
117
|
texts,
|
|
116
118
|
async (batch: string[]) => {
|
|
117
119
|
const embeddings = await this.embeddings.embedDocuments(batch);
|
|
118
120
|
if (batchCallback) {
|
|
119
|
-
|
|
121
|
+
doneTexts += batch.length;
|
|
122
|
+
const percentage = (doneTexts / texts.length) * 100;
|
|
120
123
|
await batchCallback(batch, embeddings, percentage);
|
|
121
124
|
}
|
|
122
125
|
return embeddings;
|
|
@@ -165,16 +168,7 @@ export class LangChainEmbeddingService {
|
|
|
165
168
|
return TextChunker.chunkText(text, maxTokens, overlap, separators);
|
|
166
169
|
}
|
|
167
170
|
|
|
168
|
-
async chunkCSV(rows: Record<string, any>[], ragOverride?: IChunkConfig): Promise<Document[]> {
|
|
169
|
-
// Use safe token limits - the TextChunker handles token estimation internally
|
|
170
|
-
const maxTokens = ragOverride ? ragOverride.chunkSize : (this.chunkConfig?.chunkSize || 450); // Safe token limit for embedding models
|
|
171
|
-
const overlap = ragOverride ? ragOverride.chunkOverlap : (this.chunkConfig?.chunkOverlap || 50); // Character overlap, not token
|
|
172
|
-
|
|
173
|
-
const splitter = new RecursiveCharacterTextSplitter({
|
|
174
|
-
chunkSize: maxTokens,
|
|
175
|
-
chunkOverlap: overlap
|
|
176
|
-
});
|
|
177
|
-
|
|
171
|
+
async chunkCSV(rows: Record<string, any>[], ragOverride?: IChunkConfig): Promise<Document[]> {
|
|
178
172
|
const docs = rows.map((row, i) => {
|
|
179
173
|
const text = Object.entries(row)
|
|
180
174
|
.map(([k, v]) => `${k}: ${v}`)
|
|
@@ -186,7 +180,7 @@ export class LangChainEmbeddingService {
|
|
|
186
180
|
});
|
|
187
181
|
});
|
|
188
182
|
|
|
189
|
-
return
|
|
183
|
+
return docs;
|
|
190
184
|
}
|
|
191
185
|
|
|
192
186
|
/**
|