modelfusion 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs +2 -2
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js +3 -3
- package/package.json +2 -2
- package/text-chunk/split/splitOnSeparator.cjs +7 -9
- package/text-chunk/split/splitOnSeparator.d.ts +5 -6
- package/text-chunk/split/splitOnSeparator.js +6 -7
- package/text-chunk/split/splitRecursively.cjs +16 -7
- package/text-chunk/split/splitRecursively.d.ts +13 -4
- package/text-chunk/split/splitRecursively.js +13 -4
- package/text-chunk/split/splitTextChunks.cjs +10 -8
- package/text-chunk/split/splitTextChunks.d.ts +1 -0
- package/text-chunk/split/splitTextChunks.js +8 -7
    
        package/README.md
    CHANGED
    
    | @@ -343,9 +343,9 @@ const { chunks } = await retrieveTextChunks( | |
| 343 343 | 
             
              - [Transcribe Audio](https://modelfusion.dev/guide/function/transcribe-audio)
         | 
| 344 344 | 
             
              - [Generate images](https://modelfusion.dev/guide/function/generate-image)
         | 
| 345 345 | 
             
            - Summarize text
         | 
| 346 | 
            -
            - Split text
         | 
| 347 346 | 
             
            - [Tools](https://modelfusion.dev/guide/tools)
         | 
| 348 | 
            -
            - [Text Chunks](https://modelfusion.dev/guide/text- | 
| 347 | 
            +
            - [Text Chunks](https://modelfusion.dev/guide/text-chunk/)
         | 
| 348 | 
            +
              - [Split Text](https://modelfusion.dev/guide/text-chunk/split)
         | 
| 349 349 | 
             
            - [Run abstraction](https://modelfusion.dev/guide/run/)
         | 
| 350 350 | 
             
              - [Abort signals](https://modelfusion.dev/guide/run/abort)
         | 
| 351 351 | 
             
              - [Cost calculation](https://modelfusion.dev/guide/run/cost-calculation)
         | 
| @@ -416,6 +416,12 @@ Examples for the individual functions and objects. | |
| 416 416 |  | 
| 417 417 | 
             
            A web chat with an AI assistant, implemented as a Next.js app.
         | 
| 418 418 |  | 
| 419 | 
            +
            ### [Chat with PDF](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-chat-terminal)
         | 
| 420 | 
            +
             | 
| 421 | 
            +
            > _terminal app_, _PDF parsing_, _in memory vector indices_, _retrieval augmented generation_, _hypothetical document embedding_
         | 
| 422 | 
            +
             | 
| 423 | 
            +
            Ask questions about a PDF document and get answers from the document.
         | 
| 424 | 
            +
             | 
| 419 425 | 
             
            ### [Image generator (Next.js)](https://github.com/lgrammel/modelfusion/tree/main/examples/image-generator-next-js)
         | 
| 420 426 |  | 
| 421 427 | 
             
            > _Next.js app_, _Stability AI image generation_
         | 
| @@ -440,12 +446,6 @@ TypeScript implementation of the classic [BabyAGI](https://github.com/yoheinakaj | |
| 440 446 |  | 
| 441 447 | 
             
            Small agent that solves middle school math problems. It uses a calculator tool to solve the problems.
         | 
| 442 448 |  | 
| 443 | 
            -
            ### [Chat with PDF](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-chat-terminal)
         | 
| 444 | 
            -
             | 
| 445 | 
            -
            > _terminal app_, _PDF parsing_, _in memory vector indices_, _retrieval augmented generation_, _hypothetical document embedding_
         | 
| 446 | 
            -
             | 
| 447 | 
            -
            Ask questions about a PDF document and get answers from the document.
         | 
| 448 | 
            -
             | 
| 449 449 | 
             
            ### [PDF to Tweet](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-to-tweet)
         | 
| 450 450 |  | 
| 451 451 | 
             
            > _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
         | 
    
        package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs
    CHANGED
    
    | @@ -13,9 +13,9 @@ async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, m | |
| 13 13 | 
             
                (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
         | 
| 14 14 | 
             
                const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
         | 
| 15 15 | 
             
                return (0, summarizeRecursively_js_1.summarizeRecursively)({
         | 
| 16 | 
            -
                    split: (0, splitRecursively_js_1. | 
| 16 | 
            +
                    split: (0, splitRecursively_js_1.splitAtToken)({
         | 
| 17 17 | 
             
                        tokenizer: model.tokenizer,
         | 
| 18 | 
            -
                         | 
| 18 | 
            +
                        maxTokensPerChunk: tokenLimit - emptyPromptTokens,
         | 
| 19 19 | 
             
                    }),
         | 
| 20 20 | 
             
                    summarize: async (input) => {
         | 
| 21 21 | 
             
                        const { text } = await (0, generateText_js_1.generateText)(model, await prompt(input), options);
         | 
    
        package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            import { generateText } from "../../model-function/generate-text/generateText.js";
         | 
| 2 | 
            -
            import {  | 
| 2 | 
            +
            import { splitAtToken } from "../../text-chunk/split/splitRecursively.js";
         | 
| 3 3 | 
             
            import { summarizeRecursively } from "./summarizeRecursively.js";
         | 
| 4 4 | 
             
            /**
         | 
| 5 5 | 
             
             * Recursively summarizes a text using a text generation model, e.g. for summarization or text extraction.
         | 
| @@ -10,9 +10,9 @@ export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ | |
| 10 10 | 
             
                (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
         | 
| 11 11 | 
             
                const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
         | 
| 12 12 | 
             
                return summarizeRecursively({
         | 
| 13 | 
            -
                    split:  | 
| 13 | 
            +
                    split: splitAtToken({
         | 
| 14 14 | 
             
                        tokenizer: model.tokenizer,
         | 
| 15 | 
            -
                         | 
| 15 | 
            +
                        maxTokensPerChunk: tokenLimit - emptyPromptTokens,
         | 
| 16 16 | 
             
                    }),
         | 
| 17 17 | 
             
                    summarize: async (input) => {
         | 
| 18 18 | 
             
                        const { text } = await generateText(model, await prompt(input), options);
         | 
    
        package/package.json
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            {
         | 
| 2 2 | 
             
              "name": "modelfusion",
         | 
| 3 3 | 
             
              "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
         | 
| 4 | 
            -
              "version": "0. | 
| 4 | 
            +
              "version": "0.6.0",
         | 
| 5 5 | 
             
              "author": "Lars Grammel",
         | 
| 6 6 | 
             
              "license": "MIT",
         | 
| 7 7 | 
             
              "keywords": [
         | 
| @@ -65,7 +65,7 @@ | |
| 65 65 | 
             
                "@typescript-eslint/parser": "^6.1.0",
         | 
| 66 66 | 
             
                "copyfiles": "2.4.1",
         | 
| 67 67 | 
             
                "eslint": "^8.45.0",
         | 
| 68 | 
            -
                "eslint-config-prettier": " | 
| 68 | 
            +
                "eslint-config-prettier": "9.0.0",
         | 
| 69 69 | 
             
                "husky": "^8.0.3",
         | 
| 70 70 | 
             
                "lint-staged": "13.2.3",
         | 
| 71 71 | 
             
                "prettier": "3.0.1",
         | 
| @@ -1,12 +1,10 @@ | |
| 1 1 | 
             
            "use strict";
         | 
| 2 2 | 
             
            Object.defineProperty(exports, "__esModule", { value: true });
         | 
| 3 | 
            -
            exports. | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 3 | 
            +
            exports.splitOnSeparator = void 0;
         | 
| 4 | 
            +
            /**
         | 
| 5 | 
            +
             * Splits text on a separator string.
         | 
| 6 | 
            +
             */
         | 
| 7 | 
            +
            function splitOnSeparator({ separator, }) {
         | 
| 8 | 
            +
                return async ({ text }) => text.split(separator);
         | 
| 9 | 
            +
            }
         | 
| 7 10 | 
             
            exports.splitOnSeparator = splitOnSeparator;
         | 
| 8 | 
            -
            const splitOnSeparatorAsSplitFunction = ({ separator }) => async ({ text }) => (0, exports.splitOnSeparator)({
         | 
| 9 | 
            -
                separator,
         | 
| 10 | 
            -
                text,
         | 
| 11 | 
            -
            });
         | 
| 12 | 
            -
            exports.splitOnSeparatorAsSplitFunction = splitOnSeparatorAsSplitFunction;
         | 
| @@ -1,8 +1,7 @@ | |
| 1 1 | 
             
            import { SplitFunction } from "./SplitFunction.js";
         | 
| 2 | 
            -
             | 
| 2 | 
            +
            /**
         | 
| 3 | 
            +
             * Splits text on a separator string.
         | 
| 4 | 
            +
             */
         | 
| 5 | 
            +
            export declare function splitOnSeparator({ separator, }: {
         | 
| 3 6 | 
             
                separator: string;
         | 
| 4 | 
            -
             | 
| 5 | 
            -
            }) => Promise<string[]>;
         | 
| 6 | 
            -
            export declare const splitOnSeparatorAsSplitFunction: ({ separator }: {
         | 
| 7 | 
            -
                separator: string;
         | 
| 8 | 
            -
            }) => SplitFunction;
         | 
| 7 | 
            +
            }): SplitFunction;
         | 
| @@ -1,7 +1,6 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 3 | 
            -
             | 
| 4 | 
            -
            export  | 
| 5 | 
            -
                separator | 
| 6 | 
            -
             | 
| 7 | 
            -
            });
         | 
| 1 | 
            +
            /**
         | 
| 2 | 
            +
             * Splits text on a separator string.
         | 
| 3 | 
            +
             */
         | 
| 4 | 
            +
            export function splitOnSeparator({ separator, }) {
         | 
| 5 | 
            +
                return async ({ text }) => text.split(separator);
         | 
| 6 | 
            +
            }
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            "use strict";
         | 
| 2 2 | 
             
            Object.defineProperty(exports, "__esModule", { value: true });
         | 
| 3 | 
            -
            exports. | 
| 3 | 
            +
            exports.splitAtToken = exports.splitAtCharacter = void 0;
         | 
| 4 4 | 
             
            // when segments is a string, it splits by character, otherwise according to the provided segments
         | 
| 5 5 | 
             
            function splitRecursively({ maxChunkSize, segments, }) {
         | 
| 6 6 | 
             
                if (segments.length < maxChunkSize) {
         | 
| @@ -20,13 +20,22 @@ function splitRecursively({ maxChunkSize, segments, }) { | |
| 20 20 | 
             
                    }),
         | 
| 21 21 | 
             
                ];
         | 
| 22 22 | 
             
            }
         | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 23 | 
            +
            /**
         | 
| 24 | 
            +
             * Splits text recursively until the resulting chunks are smaller than the `maxCharactersPerChunk`.
         | 
| 25 | 
            +
             * The text is recursively split in the middle, so that all chunks are roughtly the same size.
         | 
| 26 | 
            +
             */
         | 
| 27 | 
            +
            const splitAtCharacter = ({ maxCharactersPerChunk, }) => async ({ text }) => splitRecursively({
         | 
| 28 | 
            +
                maxChunkSize: maxCharactersPerChunk,
         | 
| 25 29 | 
             
                segments: text,
         | 
| 26 30 | 
             
            });
         | 
| 27 | 
            -
            exports. | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 31 | 
            +
            exports.splitAtCharacter = splitAtCharacter;
         | 
| 32 | 
            +
            /**
         | 
| 33 | 
            +
             * Splits text recursively until the resulting chunks are smaller than the `maxTokensPerChunk`,
         | 
| 34 | 
            +
             * while respecting the token boundaries.
         | 
| 35 | 
            +
             * The text is recursively split in the middle, so that all chunks are roughtly the same size.
         | 
| 36 | 
            +
             */
         | 
| 37 | 
            +
            const splitAtToken = ({ tokenizer, maxTokensPerChunk, }) => async ({ text }) => splitRecursively({
         | 
| 38 | 
            +
                maxChunkSize: maxTokensPerChunk,
         | 
| 30 39 | 
             
                segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts,
         | 
| 31 40 | 
             
            });
         | 
| 32 | 
            -
            exports. | 
| 41 | 
            +
            exports.splitAtToken = splitAtToken;
         | 
| @@ -1,9 +1,18 @@ | |
| 1 1 | 
             
            import { FullTokenizer } from "../../model-function/tokenize-text/Tokenizer.js";
         | 
| 2 2 | 
             
            import { SplitFunction } from "./SplitFunction.js";
         | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 3 | 
            +
            /**
         | 
| 4 | 
            +
             * Splits text recursively until the resulting chunks are smaller than the `maxCharactersPerChunk`.
         | 
| 5 | 
            +
             * The text is recursively split in the middle, so that all chunks are roughtly the same size.
         | 
| 6 | 
            +
             */
         | 
| 7 | 
            +
            export declare const splitAtCharacter: ({ maxCharactersPerChunk, }: {
         | 
| 8 | 
            +
                maxCharactersPerChunk: number;
         | 
| 5 9 | 
             
            }) => SplitFunction;
         | 
| 6 | 
            -
             | 
| 10 | 
            +
            /**
         | 
| 11 | 
            +
             * Splits text recursively until the resulting chunks are smaller than the `maxTokensPerChunk`,
         | 
| 12 | 
            +
             * while respecting the token boundaries.
         | 
| 13 | 
            +
             * The text is recursively split in the middle, so that all chunks are roughtly the same size.
         | 
| 14 | 
            +
             */
         | 
| 15 | 
            +
            export declare const splitAtToken: ({ tokenizer, maxTokensPerChunk, }: {
         | 
| 7 16 | 
             
                tokenizer: FullTokenizer;
         | 
| 8 | 
            -
                 | 
| 17 | 
            +
                maxTokensPerChunk: number;
         | 
| 9 18 | 
             
            }) => SplitFunction;
         | 
| @@ -17,11 +17,20 @@ function splitRecursively({ maxChunkSize, segments, }) { | |
| 17 17 | 
             
                    }),
         | 
| 18 18 | 
             
                ];
         | 
| 19 19 | 
             
            }
         | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 20 | 
            +
            /**
         | 
| 21 | 
            +
             * Splits text recursively until the resulting chunks are smaller than the `maxCharactersPerChunk`.
         | 
| 22 | 
            +
             * The text is recursively split in the middle, so that all chunks are roughtly the same size.
         | 
| 23 | 
            +
             */
         | 
| 24 | 
            +
            export const splitAtCharacter = ({ maxCharactersPerChunk, }) => async ({ text }) => splitRecursively({
         | 
| 25 | 
            +
                maxChunkSize: maxCharactersPerChunk,
         | 
| 22 26 | 
             
                segments: text,
         | 
| 23 27 | 
             
            });
         | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 28 | 
            +
            /**
         | 
| 29 | 
            +
             * Splits text recursively until the resulting chunks are smaller than the `maxTokensPerChunk`,
         | 
| 30 | 
            +
             * while respecting the token boundaries.
         | 
| 31 | 
            +
             * The text is recursively split in the middle, so that all chunks are roughtly the same size.
         | 
| 32 | 
            +
             */
         | 
| 33 | 
            +
            export const splitAtToken = ({ tokenizer, maxTokensPerChunk, }) => async ({ text }) => splitRecursively({
         | 
| 34 | 
            +
                maxChunkSize: maxTokensPerChunk,
         | 
| 26 35 | 
             
                segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts,
         | 
| 27 36 | 
             
            });
         | 
| @@ -1,14 +1,16 @@ | |
| 1 1 | 
             
            "use strict";
         | 
| 2 2 | 
             
            Object.defineProperty(exports, "__esModule", { value: true });
         | 
| 3 | 
            -
            exports.splitTextChunks = void 0;
         | 
| 3 | 
            +
            exports.splitTextChunk = exports.splitTextChunks = void 0;
         | 
| 4 4 | 
             
            async function splitTextChunks(splitFunction, inputs) {
         | 
| 5 | 
            -
                const pageChunks = await Promise.all(inputs.map( | 
| 6 | 
            -
                    const parts = await splitFunction(input);
         | 
| 7 | 
            -
                    return parts.map((text) => ({
         | 
| 8 | 
            -
                        ...input,
         | 
| 9 | 
            -
                        text,
         | 
| 10 | 
            -
                    }));
         | 
| 11 | 
            -
                }));
         | 
| 5 | 
            +
                const pageChunks = await Promise.all(inputs.map((input) => splitTextChunk(splitFunction, input)));
         | 
| 12 6 | 
             
                return pageChunks.flat();
         | 
| 13 7 | 
             
            }
         | 
| 14 8 | 
             
            exports.splitTextChunks = splitTextChunks;
         | 
| 9 | 
            +
            async function splitTextChunk(splitFunction, input) {
         | 
| 10 | 
            +
                const parts = await splitFunction(input);
         | 
| 11 | 
            +
                return parts.map((text) => ({
         | 
| 12 | 
            +
                    ...input,
         | 
| 13 | 
            +
                    text,
         | 
| 14 | 
            +
                }));
         | 
| 15 | 
            +
            }
         | 
| 16 | 
            +
            exports.splitTextChunk = splitTextChunk;
         | 
| @@ -1,3 +1,4 @@ | |
| 1 1 | 
             
            import { TextChunk } from "../TextChunk.js";
         | 
| 2 2 | 
             
            import { SplitFunction } from "./SplitFunction.js";
         | 
| 3 3 | 
             
            export declare function splitTextChunks<CHUNK extends TextChunk>(splitFunction: SplitFunction, inputs: CHUNK[]): Promise<CHUNK[]>;
         | 
| 4 | 
            +
            export declare function splitTextChunk<CHUNK extends TextChunk>(splitFunction: SplitFunction, input: CHUNK): Promise<CHUNK[]>;
         | 
| @@ -1,10 +1,11 @@ | |
| 1 1 | 
             
            export async function splitTextChunks(splitFunction, inputs) {
         | 
| 2 | 
            -
                const pageChunks = await Promise.all(inputs.map( | 
| 3 | 
            -
                    const parts = await splitFunction(input);
         | 
| 4 | 
            -
                    return parts.map((text) => ({
         | 
| 5 | 
            -
                        ...input,
         | 
| 6 | 
            -
                        text,
         | 
| 7 | 
            -
                    }));
         | 
| 8 | 
            -
                }));
         | 
| 2 | 
            +
                const pageChunks = await Promise.all(inputs.map((input) => splitTextChunk(splitFunction, input)));
         | 
| 9 3 | 
             
                return pageChunks.flat();
         | 
| 10 4 | 
             
            }
         | 
| 5 | 
            +
            export async function splitTextChunk(splitFunction, input) {
         | 
| 6 | 
            +
                const parts = await splitFunction(input);
         | 
| 7 | 
            +
                return parts.map((text) => ({
         | 
| 8 | 
            +
                    ...input,
         | 
| 9 | 
            +
                    text,
         | 
| 10 | 
            +
                }));
         | 
| 11 | 
            +
            }
         |