@rws-framework/ai-tools 3.9.2 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -168,16 +168,7 @@ export class LangChainEmbeddingService {
|
|
|
168
168
|
return TextChunker.chunkText(text, maxTokens, overlap, separators);
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
-
async chunkCSV(rows: Record<string, any>[], ragOverride?: IChunkConfig): Promise<Document[]> {
|
|
172
|
-
// Use safe token limits - the TextChunker handles token estimation internally
|
|
173
|
-
const maxTokens = ragOverride ? ragOverride.chunkSize : (this.chunkConfig?.chunkSize || 450); // Safe token limit for embedding models
|
|
174
|
-
const overlap = ragOverride ? ragOverride.chunkOverlap : (this.chunkConfig?.chunkOverlap || 50); // Character overlap, not token
|
|
175
|
-
|
|
176
|
-
const splitter = new RecursiveCharacterTextSplitter({
|
|
177
|
-
chunkSize: maxTokens,
|
|
178
|
-
chunkOverlap: overlap
|
|
179
|
-
});
|
|
180
|
-
|
|
171
|
+
async chunkCSV(rows: Record<string, any>[], ragOverride?: IChunkConfig): Promise<Document[]> {
|
|
181
172
|
const docs = rows.map((row, i) => {
|
|
182
173
|
const text = Object.entries(row)
|
|
183
174
|
.map(([k, v]) => `${k}: ${v}`)
|
|
@@ -189,7 +180,7 @@ export class LangChainEmbeddingService {
|
|
|
189
180
|
});
|
|
190
181
|
});
|
|
191
182
|
|
|
192
|
-
return
|
|
183
|
+
return docs;
|
|
193
184
|
}
|
|
194
185
|
|
|
195
186
|
/**
|