npm - modelfusion - Versions diffs - 0.5.0 → 0.7.0 - Mend

modelfusion 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/text-chunk/split/splitRecursively.d.ts CHANGED Viewed

@@ -1,9 +1,18 @@
 import { FullTokenizer } from "../../model-function/tokenize-text/Tokenizer.js";
 import { SplitFunction } from "./SplitFunction.js";
-export declare const splitRecursivelyAtCharacter: ({ maxChunkSize }: {
-    maxChunkSize: number;
+/**
+ * Splits text recursively until the resulting chunks are smaller than the `maxCharactersPerChunk`.
+ * The text is recursively split in the middle, so that all chunks are roughtly the same size.
+ */
+export declare const splitAtCharacter: ({ maxCharactersPerChunk, }: {
+    maxCharactersPerChunk: number;
 }) => SplitFunction;
-export declare const splitRecursivelyAtToken: ({ tokenizer, maxChunkSize, }: {
+/**
+ * Splits text recursively until the resulting chunks are smaller than the `maxTokensPerChunk`,
+ * while respecting the token boundaries.
+ * The text is recursively split in the middle, so that all chunks are roughtly the same size.
+ */
+export declare const splitAtToken: ({ tokenizer, maxTokensPerChunk, }: {
     tokenizer: FullTokenizer;
-    maxChunkSize: number;
+    maxTokensPerChunk: number;
 }) => SplitFunction;

package/text-chunk/split/splitRecursively.js CHANGED Viewed

@@ -17,11 +17,20 @@ function splitRecursively({ maxChunkSize, segments, }) {
         }),
     ];
 }
-export const splitRecursivelyAtCharacter = ({ maxChunkSize }) => async ({ text }) => splitRecursively({
-    maxChunkSize,
+/**
+ * Splits text recursively until the resulting chunks are smaller than the `maxCharactersPerChunk`.
+ * The text is recursively split in the middle, so that all chunks are roughtly the same size.
+ */
+export const splitAtCharacter = ({ maxCharactersPerChunk, }) => async ({ text }) => splitRecursively({
+    maxChunkSize: maxCharactersPerChunk,
     segments: text,
 });
-export const splitRecursivelyAtToken = ({ tokenizer, maxChunkSize, }) => async ({ text }) => splitRecursively({
-    maxChunkSize,
+/**
+ * Splits text recursively until the resulting chunks are smaller than the `maxTokensPerChunk`,
+ * while respecting the token boundaries.
+ * The text is recursively split in the middle, so that all chunks are roughtly the same size.
+ */
+export const splitAtToken = ({ tokenizer, maxTokensPerChunk, }) => async ({ text }) => splitRecursively({
+    maxChunkSize: maxTokensPerChunk,
     segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts,
 });

package/text-chunk/split/splitTextChunks.cjs CHANGED Viewed

@@ -1,14 +1,16 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.splitTextChunks = void 0;
+exports.splitTextChunk = exports.splitTextChunks = void 0;
 async function splitTextChunks(splitFunction, inputs) {
-    const pageChunks = await Promise.all(inputs.map(async (input) => {
-        const parts = await splitFunction(input);
-        return parts.map((text) => ({
-            ...input,
-            text,
-        }));
-    }));
+    const pageChunks = await Promise.all(inputs.map((input) => splitTextChunk(splitFunction, input)));
     return pageChunks.flat();
 }
 exports.splitTextChunks = splitTextChunks;
+async function splitTextChunk(splitFunction, input) {
+    const parts = await splitFunction(input);
+    return parts.map((text) => ({
+        ...input,
+        text,
+    }));
+}
+exports.splitTextChunk = splitTextChunk;

package/text-chunk/split/splitTextChunks.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 import { TextChunk } from "../TextChunk.js";
 import { SplitFunction } from "./SplitFunction.js";
 export declare function splitTextChunks<CHUNK extends TextChunk>(splitFunction: SplitFunction, inputs: CHUNK[]): Promise<CHUNK[]>;
+export declare function splitTextChunk<CHUNK extends TextChunk>(splitFunction: SplitFunction, input: CHUNK): Promise<CHUNK[]>;

package/text-chunk/split/splitTextChunks.js CHANGED Viewed

@@ -1,10 +1,11 @@
 export async function splitTextChunks(splitFunction, inputs) {
-    const pageChunks = await Promise.all(inputs.map(async (input) => {
-        const parts = await splitFunction(input);
-        return parts.map((text) => ({
-            ...input,
-            text,
-        }));
-    }));
+    const pageChunks = await Promise.all(inputs.map((input) => splitTextChunk(splitFunction, input)));
     return pageChunks.flat();
 }
+export async function splitTextChunk(splitFunction, input) {
+    const parts = await splitFunction(input);
+    return parts.map((text) => ({
+        ...input,
+        text,
+    }));
+}

package/text-chunk/upsertTextChunks.cjs CHANGED Viewed

@@ -5,7 +5,7 @@ const nanoid_1 = require("nanoid");
 const embedText_js_1 = require("../model-function/embed-text/embedText.cjs");
 async function upsertTextChunks({ vectorIndex, embeddingModel, generateId = nanoid_1.nanoid, chunks, ids, }, options) {
     // many embedding models support bulk embedding, so we first embed all texts:
-    const { embeddings } = await (0, embedText_js_1.embedTexts)(embeddingModel, chunks.map((chunk) => chunk.text), options);
+    const embeddings = await (0, embedText_js_1.embedTexts)(embeddingModel, chunks.map((chunk) => chunk.text), options);
     await vectorIndex.upsertMany(chunks.map((chunk, i) => ({
         id: ids?.[i] ?? generateId(),
         vector: embeddings[i],

package/text-chunk/upsertTextChunks.js CHANGED Viewed

@@ -2,7 +2,7 @@ import { nanoid as createId } from "nanoid";
 import { embedTexts } from "../model-function/embed-text/embedText.js";
 export async function upsertTextChunks({ vectorIndex, embeddingModel, generateId = createId, chunks, ids, }, options) {
     // many embedding models support bulk embedding, so we first embed all texts:
-    const { embeddings } = await embedTexts(embeddingModel, chunks.map((chunk) => chunk.text), options);
+    const embeddings = await embedTexts(embeddingModel, chunks.map((chunk) => chunk.text), options);
     await vectorIndex.upsertMany(chunks.map((chunk, i) => ({
         id: ids?.[i] ?? generateId(),
         vector: embeddings[i],