modelfusion 0.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +429 -0
- package/composed-function/index.cjs +22 -0
- package/composed-function/index.d.ts +6 -0
- package/composed-function/index.js +6 -0
- package/composed-function/summarize/SummarizationFunction.cjs +2 -0
- package/composed-function/summarize/SummarizationFunction.d.ts +4 -0
- package/composed-function/summarize/SummarizationFunction.js +1 -0
- package/composed-function/summarize/summarizeRecursively.cjs +19 -0
- package/composed-function/summarize/summarizeRecursively.d.ts +11 -0
- package/composed-function/summarize/summarizeRecursively.js +15 -0
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs +29 -0
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.d.ts +24 -0
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js +25 -0
- package/composed-function/use-tool/NoSuchToolError.cjs +17 -0
- package/composed-function/use-tool/NoSuchToolError.d.ts +4 -0
- package/composed-function/use-tool/NoSuchToolError.js +13 -0
- package/composed-function/use-tool/Tool.cjs +43 -0
- package/composed-function/use-tool/Tool.d.ts +15 -0
- package/composed-function/use-tool/Tool.js +39 -0
- package/composed-function/use-tool/useTool.cjs +59 -0
- package/composed-function/use-tool/useTool.d.ts +36 -0
- package/composed-function/use-tool/useTool.js +54 -0
- package/cost/Cost.cjs +38 -0
- package/cost/Cost.d.ts +16 -0
- package/cost/Cost.js +34 -0
- package/cost/CostCalculator.cjs +2 -0
- package/cost/CostCalculator.d.ts +8 -0
- package/cost/CostCalculator.js +1 -0
- package/cost/calculateCost.cjs +28 -0
- package/cost/calculateCost.d.ts +7 -0
- package/cost/calculateCost.js +24 -0
- package/cost/index.cjs +19 -0
- package/cost/index.d.ts +3 -0
- package/cost/index.js +3 -0
- package/index.cjs +25 -0
- package/index.d.ts +9 -0
- package/index.js +9 -0
- package/model-function/AbstractModel.cjs +22 -0
- package/model-function/AbstractModel.d.ts +12 -0
- package/model-function/AbstractModel.js +18 -0
- package/model-function/FunctionOptions.cjs +2 -0
- package/model-function/FunctionOptions.d.ts +6 -0
- package/model-function/FunctionOptions.js +1 -0
- package/model-function/Model.cjs +2 -0
- package/model-function/Model.d.ts +23 -0
- package/model-function/Model.js +1 -0
- package/model-function/ModelCallEvent.cjs +2 -0
- package/model-function/ModelCallEvent.d.ts +18 -0
- package/model-function/ModelCallEvent.js +1 -0
- package/model-function/ModelCallEventSource.cjs +42 -0
- package/model-function/ModelCallEventSource.d.ts +13 -0
- package/model-function/ModelCallEventSource.js +38 -0
- package/model-function/ModelCallObserver.cjs +2 -0
- package/model-function/ModelCallObserver.d.ts +5 -0
- package/model-function/ModelCallObserver.js +1 -0
- package/model-function/ModelInformation.cjs +2 -0
- package/model-function/ModelInformation.d.ts +4 -0
- package/model-function/ModelInformation.js +1 -0
- package/model-function/SuccessfulModelCall.cjs +22 -0
- package/model-function/SuccessfulModelCall.d.ts +9 -0
- package/model-function/SuccessfulModelCall.js +18 -0
- package/model-function/embed-text/TextEmbeddingEvent.cjs +2 -0
- package/model-function/embed-text/TextEmbeddingEvent.d.ts +23 -0
- package/model-function/embed-text/TextEmbeddingEvent.js +1 -0
- package/model-function/embed-text/TextEmbeddingModel.cjs +2 -0
- package/model-function/embed-text/TextEmbeddingModel.d.ts +18 -0
- package/model-function/embed-text/TextEmbeddingModel.js +1 -0
- package/model-function/embed-text/embedText.cjs +90 -0
- package/model-function/embed-text/embedText.d.ts +33 -0
- package/model-function/embed-text/embedText.js +85 -0
- package/model-function/executeCall.cjs +60 -0
- package/model-function/executeCall.d.ts +27 -0
- package/model-function/executeCall.js +56 -0
- package/model-function/generate-image/ImageGenerationEvent.cjs +2 -0
- package/model-function/generate-image/ImageGenerationEvent.d.ts +22 -0
- package/model-function/generate-image/ImageGenerationEvent.js +1 -0
- package/model-function/generate-image/ImageGenerationModel.cjs +2 -0
- package/model-function/generate-image/ImageGenerationModel.d.ts +8 -0
- package/model-function/generate-image/ImageGenerationModel.js +1 -0
- package/model-function/generate-image/generateImage.cjs +63 -0
- package/model-function/generate-image/generateImage.d.ts +23 -0
- package/model-function/generate-image/generateImage.js +59 -0
- package/model-function/generate-json/GenerateJsonModel.cjs +2 -0
- package/model-function/generate-json/GenerateJsonModel.d.ts +10 -0
- package/model-function/generate-json/GenerateJsonModel.js +1 -0
- package/model-function/generate-json/GenerateJsonOrTextModel.cjs +2 -0
- package/model-function/generate-json/GenerateJsonOrTextModel.d.ts +18 -0
- package/model-function/generate-json/GenerateJsonOrTextModel.js +1 -0
- package/model-function/generate-json/JsonGenerationEvent.cjs +2 -0
- package/model-function/generate-json/JsonGenerationEvent.d.ts +22 -0
- package/model-function/generate-json/JsonGenerationEvent.js +1 -0
- package/model-function/generate-json/NoSuchSchemaError.cjs +17 -0
- package/model-function/generate-json/NoSuchSchemaError.d.ts +4 -0
- package/model-function/generate-json/NoSuchSchemaError.js +13 -0
- package/model-function/generate-json/SchemaDefinition.cjs +2 -0
- package/model-function/generate-json/SchemaDefinition.d.ts +6 -0
- package/model-function/generate-json/SchemaDefinition.js +1 -0
- package/model-function/generate-json/SchemaValidationError.cjs +36 -0
- package/model-function/generate-json/SchemaValidationError.d.ts +11 -0
- package/model-function/generate-json/SchemaValidationError.js +32 -0
- package/model-function/generate-json/generateJson.cjs +61 -0
- package/model-function/generate-json/generateJson.d.ts +9 -0
- package/model-function/generate-json/generateJson.js +57 -0
- package/model-function/generate-json/generateJsonOrText.cjs +74 -0
- package/model-function/generate-json/generateJsonOrText.d.ts +25 -0
- package/model-function/generate-json/generateJsonOrText.js +70 -0
- package/model-function/generate-text/AsyncQueue.cjs +66 -0
- package/model-function/generate-text/AsyncQueue.d.ts +17 -0
- package/model-function/generate-text/AsyncQueue.js +62 -0
- package/model-function/generate-text/DeltaEvent.cjs +2 -0
- package/model-function/generate-text/DeltaEvent.d.ts +7 -0
- package/model-function/generate-text/DeltaEvent.js +1 -0
- package/model-function/generate-text/TextDeltaEventSource.cjs +54 -0
- package/model-function/generate-text/TextDeltaEventSource.d.ts +5 -0
- package/model-function/generate-text/TextDeltaEventSource.js +46 -0
- package/model-function/generate-text/TextGenerationEvent.cjs +2 -0
- package/model-function/generate-text/TextGenerationEvent.d.ts +22 -0
- package/model-function/generate-text/TextGenerationEvent.js +1 -0
- package/model-function/generate-text/TextGenerationModel.cjs +2 -0
- package/model-function/generate-text/TextGenerationModel.d.ts +42 -0
- package/model-function/generate-text/TextGenerationModel.js +1 -0
- package/model-function/generate-text/TextStreamingEvent.cjs +2 -0
- package/model-function/generate-text/TextStreamingEvent.d.ts +22 -0
- package/model-function/generate-text/TextStreamingEvent.js +1 -0
- package/model-function/generate-text/extractTextDeltas.cjs +23 -0
- package/model-function/generate-text/extractTextDeltas.d.ts +7 -0
- package/model-function/generate-text/extractTextDeltas.js +19 -0
- package/model-function/generate-text/generateText.cjs +67 -0
- package/model-function/generate-text/generateText.d.ts +20 -0
- package/model-function/generate-text/generateText.js +63 -0
- package/model-function/generate-text/parseEventSourceReadableStream.cjs +30 -0
- package/model-function/generate-text/parseEventSourceReadableStream.d.ts +8 -0
- package/model-function/generate-text/parseEventSourceReadableStream.js +26 -0
- package/model-function/generate-text/streamText.cjs +115 -0
- package/model-function/generate-text/streamText.d.ts +11 -0
- package/model-function/generate-text/streamText.js +111 -0
- package/model-function/index.cjs +47 -0
- package/model-function/index.d.ts +31 -0
- package/model-function/index.js +31 -0
- package/model-function/tokenize-text/Tokenizer.cjs +2 -0
- package/model-function/tokenize-text/Tokenizer.d.ts +19 -0
- package/model-function/tokenize-text/Tokenizer.js +1 -0
- package/model-function/tokenize-text/countTokens.cjs +10 -0
- package/model-function/tokenize-text/countTokens.d.ts +5 -0
- package/model-function/tokenize-text/countTokens.js +6 -0
- package/model-function/transcribe-audio/TranscriptionEvent.cjs +2 -0
- package/model-function/transcribe-audio/TranscriptionEvent.d.ts +22 -0
- package/model-function/transcribe-audio/TranscriptionEvent.js +1 -0
- package/model-function/transcribe-audio/TranscriptionModel.cjs +2 -0
- package/model-function/transcribe-audio/TranscriptionModel.d.ts +8 -0
- package/model-function/transcribe-audio/TranscriptionModel.js +1 -0
- package/model-function/transcribe-audio/transcribe.cjs +62 -0
- package/model-function/transcribe-audio/transcribe.d.ts +22 -0
- package/model-function/transcribe-audio/transcribe.js +58 -0
- package/model-provider/automatic1111/Automatic1111Error.cjs +39 -0
- package/model-provider/automatic1111/Automatic1111Error.d.ts +31 -0
- package/model-provider/automatic1111/Automatic1111Error.js +31 -0
- package/model-provider/automatic1111/Automatic1111ImageGenerationModel.cjs +76 -0
- package/model-provider/automatic1111/Automatic1111ImageGenerationModel.d.ts +54 -0
- package/model-provider/automatic1111/Automatic1111ImageGenerationModel.js +72 -0
- package/model-provider/automatic1111/index.cjs +20 -0
- package/model-provider/automatic1111/index.d.ts +2 -0
- package/model-provider/automatic1111/index.js +2 -0
- package/model-provider/cohere/CohereError.cjs +36 -0
- package/model-provider/cohere/CohereError.d.ts +22 -0
- package/model-provider/cohere/CohereError.js +28 -0
- package/model-provider/cohere/CohereTextEmbeddingModel.cjs +172 -0
- package/model-provider/cohere/CohereTextEmbeddingModel.d.ts +119 -0
- package/model-provider/cohere/CohereTextEmbeddingModel.js +165 -0
- package/model-provider/cohere/CohereTextGenerationModel.cjs +283 -0
- package/model-provider/cohere/CohereTextGenerationModel.d.ts +203 -0
- package/model-provider/cohere/CohereTextGenerationModel.js +276 -0
- package/model-provider/cohere/CohereTokenizer.cjs +136 -0
- package/model-provider/cohere/CohereTokenizer.d.ts +118 -0
- package/model-provider/cohere/CohereTokenizer.js +129 -0
- package/model-provider/cohere/index.cjs +22 -0
- package/model-provider/cohere/index.d.ts +4 -0
- package/model-provider/cohere/index.js +4 -0
- package/model-provider/huggingface/HuggingFaceError.cjs +52 -0
- package/model-provider/huggingface/HuggingFaceError.d.ts +22 -0
- package/model-provider/huggingface/HuggingFaceError.js +44 -0
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.cjs +174 -0
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.d.ts +75 -0
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.js +167 -0
- package/model-provider/huggingface/index.cjs +20 -0
- package/model-provider/huggingface/index.d.ts +2 -0
- package/model-provider/huggingface/index.js +2 -0
- package/model-provider/index.cjs +22 -0
- package/model-provider/index.d.ts +6 -0
- package/model-provider/index.js +6 -0
- package/model-provider/llamacpp/LlamaCppError.cjs +52 -0
- package/model-provider/llamacpp/LlamaCppError.d.ts +22 -0
- package/model-provider/llamacpp/LlamaCppError.js +44 -0
- package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.cjs +96 -0
- package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.d.ts +40 -0
- package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.js +89 -0
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.cjs +245 -0
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.d.ts +399 -0
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.js +238 -0
- package/model-provider/llamacpp/LlamaCppTokenizer.cjs +64 -0
- package/model-provider/llamacpp/LlamaCppTokenizer.d.ts +38 -0
- package/model-provider/llamacpp/LlamaCppTokenizer.js +57 -0
- package/model-provider/llamacpp/index.cjs +22 -0
- package/model-provider/llamacpp/index.d.ts +4 -0
- package/model-provider/llamacpp/index.js +4 -0
- package/model-provider/openai/OpenAICostCalculator.cjs +71 -0
- package/model-provider/openai/OpenAICostCalculator.d.ts +6 -0
- package/model-provider/openai/OpenAICostCalculator.js +67 -0
- package/model-provider/openai/OpenAIError.cjs +50 -0
- package/model-provider/openai/OpenAIError.d.ts +47 -0
- package/model-provider/openai/OpenAIError.js +42 -0
- package/model-provider/openai/OpenAIImageGenerationModel.cjs +124 -0
- package/model-provider/openai/OpenAIImageGenerationModel.d.ts +113 -0
- package/model-provider/openai/OpenAIImageGenerationModel.js +119 -0
- package/model-provider/openai/OpenAIModelSettings.cjs +2 -0
- package/model-provider/openai/OpenAIModelSettings.d.ts +8 -0
- package/model-provider/openai/OpenAIModelSettings.js +1 -0
- package/model-provider/openai/OpenAITextEmbeddingModel.cjs +171 -0
- package/model-provider/openai/OpenAITextEmbeddingModel.d.ts +122 -0
- package/model-provider/openai/OpenAITextEmbeddingModel.js +162 -0
- package/model-provider/openai/OpenAITextGenerationModel.cjs +326 -0
- package/model-provider/openai/OpenAITextGenerationModel.d.ts +254 -0
- package/model-provider/openai/OpenAITextGenerationModel.js +317 -0
- package/model-provider/openai/OpenAITranscriptionModel.cjs +195 -0
- package/model-provider/openai/OpenAITranscriptionModel.d.ts +196 -0
- package/model-provider/openai/OpenAITranscriptionModel.js +187 -0
- package/model-provider/openai/TikTokenTokenizer.cjs +86 -0
- package/model-provider/openai/TikTokenTokenizer.d.ts +35 -0
- package/model-provider/openai/TikTokenTokenizer.js +82 -0
- package/model-provider/openai/chat/OpenAIChatMessage.cjs +24 -0
- package/model-provider/openai/chat/OpenAIChatMessage.d.ts +26 -0
- package/model-provider/openai/chat/OpenAIChatMessage.js +21 -0
- package/model-provider/openai/chat/OpenAIChatModel.cjs +288 -0
- package/model-provider/openai/chat/OpenAIChatModel.d.ts +344 -0
- package/model-provider/openai/chat/OpenAIChatModel.js +279 -0
- package/model-provider/openai/chat/OpenAIChatPrompt.cjs +143 -0
- package/model-provider/openai/chat/OpenAIChatPrompt.d.ts +108 -0
- package/model-provider/openai/chat/OpenAIChatPrompt.js +135 -0
- package/model-provider/openai/chat/OpenAIChatStreamIterable.cjs +112 -0
- package/model-provider/openai/chat/OpenAIChatStreamIterable.d.ts +19 -0
- package/model-provider/openai/chat/OpenAIChatStreamIterable.js +105 -0
- package/model-provider/openai/chat/countOpenAIChatMessageTokens.cjs +28 -0
- package/model-provider/openai/chat/countOpenAIChatMessageTokens.d.ts +20 -0
- package/model-provider/openai/chat/countOpenAIChatMessageTokens.js +23 -0
- package/model-provider/openai/index.cjs +31 -0
- package/model-provider/openai/index.d.ts +13 -0
- package/model-provider/openai/index.js +12 -0
- package/model-provider/stability/StabilityError.cjs +36 -0
- package/model-provider/stability/StabilityError.d.ts +22 -0
- package/model-provider/stability/StabilityError.js +28 -0
- package/model-provider/stability/StabilityImageGenerationModel.cjs +133 -0
- package/model-provider/stability/StabilityImageGenerationModel.d.ts +95 -0
- package/model-provider/stability/StabilityImageGenerationModel.js +129 -0
- package/model-provider/stability/index.cjs +20 -0
- package/model-provider/stability/index.d.ts +2 -0
- package/model-provider/stability/index.js +2 -0
- package/package.json +87 -0
- package/prompt/InstructionPrompt.cjs +2 -0
- package/prompt/InstructionPrompt.d.ts +7 -0
- package/prompt/InstructionPrompt.js +1 -0
- package/prompt/Llama2PromptMapping.cjs +56 -0
- package/prompt/Llama2PromptMapping.d.ts +10 -0
- package/prompt/Llama2PromptMapping.js +51 -0
- package/prompt/OpenAIChatPromptMapping.cjs +62 -0
- package/prompt/OpenAIChatPromptMapping.d.ts +6 -0
- package/prompt/OpenAIChatPromptMapping.js +57 -0
- package/prompt/PromptMapping.cjs +2 -0
- package/prompt/PromptMapping.d.ts +7 -0
- package/prompt/PromptMapping.js +1 -0
- package/prompt/PromptMappingTextGenerationModel.cjs +88 -0
- package/prompt/PromptMappingTextGenerationModel.d.ts +26 -0
- package/prompt/PromptMappingTextGenerationModel.js +84 -0
- package/prompt/TextPromptMapping.cjs +50 -0
- package/prompt/TextPromptMapping.d.ts +14 -0
- package/prompt/TextPromptMapping.js +45 -0
- package/prompt/chat/ChatPrompt.cjs +2 -0
- package/prompt/chat/ChatPrompt.d.ts +33 -0
- package/prompt/chat/ChatPrompt.js +1 -0
- package/prompt/chat/trimChatPrompt.cjs +50 -0
- package/prompt/chat/trimChatPrompt.d.ts +19 -0
- package/prompt/chat/trimChatPrompt.js +46 -0
- package/prompt/chat/validateChatPrompt.cjs +36 -0
- package/prompt/chat/validateChatPrompt.d.ts +8 -0
- package/prompt/chat/validateChatPrompt.js +31 -0
- package/prompt/index.cjs +25 -0
- package/prompt/index.d.ts +9 -0
- package/prompt/index.js +9 -0
- package/run/ConsoleLogger.cjs +12 -0
- package/run/ConsoleLogger.d.ts +6 -0
- package/run/ConsoleLogger.js +8 -0
- package/run/DefaultRun.cjs +78 -0
- package/run/DefaultRun.d.ts +24 -0
- package/run/DefaultRun.js +74 -0
- package/run/IdMetadata.cjs +2 -0
- package/run/IdMetadata.d.ts +7 -0
- package/run/IdMetadata.js +1 -0
- package/run/Run.cjs +2 -0
- package/run/Run.d.ts +27 -0
- package/run/Run.js +1 -0
- package/run/RunFunction.cjs +2 -0
- package/run/RunFunction.d.ts +13 -0
- package/run/RunFunction.js +1 -0
- package/run/Vector.cjs +2 -0
- package/run/Vector.d.ts +5 -0
- package/run/Vector.js +1 -0
- package/run/index.cjs +22 -0
- package/run/index.d.ts +6 -0
- package/run/index.js +6 -0
- package/text-chunk/TextChunk.cjs +2 -0
- package/text-chunk/TextChunk.d.ts +3 -0
- package/text-chunk/TextChunk.js +1 -0
- package/text-chunk/index.cjs +22 -0
- package/text-chunk/index.d.ts +6 -0
- package/text-chunk/index.js +6 -0
- package/text-chunk/retrieve-text-chunks/TextChunkRetriever.cjs +2 -0
- package/text-chunk/retrieve-text-chunks/TextChunkRetriever.d.ts +8 -0
- package/text-chunk/retrieve-text-chunks/TextChunkRetriever.js +1 -0
- package/text-chunk/retrieve-text-chunks/retrieveTextChunks.cjs +10 -0
- package/text-chunk/retrieve-text-chunks/retrieveTextChunks.d.ts +6 -0
- package/text-chunk/retrieve-text-chunks/retrieveTextChunks.js +6 -0
- package/text-chunk/split/SplitFunction.cjs +2 -0
- package/text-chunk/split/SplitFunction.d.ts +4 -0
- package/text-chunk/split/SplitFunction.js +1 -0
- package/text-chunk/split/splitOnSeparator.cjs +12 -0
- package/text-chunk/split/splitOnSeparator.d.ts +8 -0
- package/text-chunk/split/splitOnSeparator.js +7 -0
- package/text-chunk/split/splitRecursively.cjs +41 -0
- package/text-chunk/split/splitRecursively.d.ts +22 -0
- package/text-chunk/split/splitRecursively.js +33 -0
- package/util/DurationMeasurement.cjs +42 -0
- package/util/DurationMeasurement.d.ts +5 -0
- package/util/DurationMeasurement.js +38 -0
- package/util/ErrorHandler.cjs +2 -0
- package/util/ErrorHandler.d.ts +1 -0
- package/util/ErrorHandler.js +1 -0
- package/util/SafeResult.cjs +2 -0
- package/util/SafeResult.d.ts +8 -0
- package/util/SafeResult.js +1 -0
- package/util/api/AbortError.cjs +9 -0
- package/util/api/AbortError.d.ts +3 -0
- package/util/api/AbortError.js +5 -0
- package/util/api/ApiCallError.cjs +45 -0
- package/util/api/ApiCallError.d.ts +15 -0
- package/util/api/ApiCallError.js +41 -0
- package/util/api/RetryError.cjs +24 -0
- package/util/api/RetryError.d.ts +10 -0
- package/util/api/RetryError.js +20 -0
- package/util/api/RetryFunction.cjs +2 -0
- package/util/api/RetryFunction.d.ts +1 -0
- package/util/api/RetryFunction.js +1 -0
- package/util/api/ThrottleFunction.cjs +2 -0
- package/util/api/ThrottleFunction.d.ts +1 -0
- package/util/api/ThrottleFunction.js +1 -0
- package/util/api/callWithRetryAndThrottle.cjs +7 -0
- package/util/api/callWithRetryAndThrottle.d.ts +7 -0
- package/util/api/callWithRetryAndThrottle.js +3 -0
- package/util/api/postToApi.cjs +103 -0
- package/util/api/postToApi.d.ts +29 -0
- package/util/api/postToApi.js +96 -0
- package/util/api/retryNever.cjs +8 -0
- package/util/api/retryNever.d.ts +4 -0
- package/util/api/retryNever.js +4 -0
- package/util/api/retryWithExponentialBackoff.cjs +48 -0
- package/util/api/retryWithExponentialBackoff.d.ts +10 -0
- package/util/api/retryWithExponentialBackoff.js +44 -0
- package/util/api/throttleMaxConcurrency.cjs +65 -0
- package/util/api/throttleMaxConcurrency.d.ts +7 -0
- package/util/api/throttleMaxConcurrency.js +61 -0
- package/util/api/throttleUnlimitedConcurrency.cjs +8 -0
- package/util/api/throttleUnlimitedConcurrency.d.ts +5 -0
- package/util/api/throttleUnlimitedConcurrency.js +4 -0
- package/util/cosineSimilarity.cjs +26 -0
- package/util/cosineSimilarity.d.ts +11 -0
- package/util/cosineSimilarity.js +22 -0
- package/util/index.cjs +26 -0
- package/util/index.d.ts +10 -0
- package/util/index.js +10 -0
- package/util/never.cjs +6 -0
- package/util/never.d.ts +1 -0
- package/util/never.js +2 -0
- package/util/runSafe.cjs +15 -0
- package/util/runSafe.d.ts +2 -0
- package/util/runSafe.js +11 -0
- package/vector-index/VectorIndex.cjs +2 -0
- package/vector-index/VectorIndex.d.ts +18 -0
- package/vector-index/VectorIndex.js +1 -0
- package/vector-index/VectorIndexSimilarTextChunkRetriever.cjs +57 -0
- package/vector-index/VectorIndexSimilarTextChunkRetriever.d.ts +20 -0
- package/vector-index/VectorIndexSimilarTextChunkRetriever.js +53 -0
- package/vector-index/VectorIndexTextChunkStore.cjs +77 -0
- package/vector-index/VectorIndexTextChunkStore.d.ts +35 -0
- package/vector-index/VectorIndexTextChunkStore.js +73 -0
- package/vector-index/index.cjs +22 -0
- package/vector-index/index.d.ts +6 -0
- package/vector-index/index.js +6 -0
- package/vector-index/memory/MemoryVectorIndex.cjs +63 -0
- package/vector-index/memory/MemoryVectorIndex.d.ts +31 -0
- package/vector-index/memory/MemoryVectorIndex.js +56 -0
- package/vector-index/pinecone/PineconeVectorIndex.cjs +66 -0
- package/vector-index/pinecone/PineconeVectorIndex.d.ts +29 -0
- package/vector-index/pinecone/PineconeVectorIndex.js +62 -0
- package/vector-index/upsertTextChunks.cjs +15 -0
- package/vector-index/upsertTextChunks.d.ts +11 -0
- package/vector-index/upsertTextChunks.js +11 -0
@@ -0,0 +1,399 @@
|
|
1
|
+
import z from "zod";
|
2
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
3
|
+
import { FunctionOptions } from "../../model-function/FunctionOptions.js";
|
4
|
+
import { DeltaEvent } from "../../model-function/generate-text/DeltaEvent.js";
|
5
|
+
import { TextGenerationModel, TextGenerationModelSettings } from "../../model-function/generate-text/TextGenerationModel.js";
|
6
|
+
import { PromptMapping } from "../../prompt/PromptMapping.js";
|
7
|
+
import { PromptMappingTextGenerationModel } from "../../prompt/PromptMappingTextGenerationModel.js";
|
8
|
+
import { RetryFunction } from "../../util/api/RetryFunction.js";
|
9
|
+
import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
|
10
|
+
import { ResponseHandler } from "../../util/api/postToApi.js";
|
11
|
+
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
|
12
|
+
export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends TextGenerationModelSettings {
|
13
|
+
baseUrl?: string;
|
14
|
+
retry?: RetryFunction;
|
15
|
+
throttle?: ThrottleFunction;
|
16
|
+
tokenizerSettings?: {
|
17
|
+
retry?: RetryFunction;
|
18
|
+
throttle?: ThrottleFunction;
|
19
|
+
};
|
20
|
+
/**
|
21
|
+
* Specify the context window size of the model that you have loaded in your
|
22
|
+
* Llama.cpp server.
|
23
|
+
*/
|
24
|
+
contextWindowSize?: CONTEXT_WINDOW_SIZE;
|
25
|
+
temperature?: number;
|
26
|
+
topK?: number;
|
27
|
+
topP?: number;
|
28
|
+
nPredict?: number;
|
29
|
+
nKeep?: number;
|
30
|
+
stop?: string[];
|
31
|
+
tfsZ?: number;
|
32
|
+
typicalP?: number;
|
33
|
+
repeatPenalty?: number;
|
34
|
+
repeatLastN?: number;
|
35
|
+
penalizeNl?: boolean;
|
36
|
+
mirostat?: number;
|
37
|
+
mirostatTau?: number;
|
38
|
+
mirostatEta?: number;
|
39
|
+
seed?: number;
|
40
|
+
ignoreEos?: boolean;
|
41
|
+
logitBias?: Array<[number, number | false]>;
|
42
|
+
}
|
43
|
+
export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends number | undefined> extends AbstractModel<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> implements TextGenerationModel<string, LlamaCppTextGenerationResponse, LlamaCppTextGenerationDelta, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> {
|
44
|
+
constructor(settings?: LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>);
|
45
|
+
readonly provider = "llamacpp";
|
46
|
+
get modelName(): null;
|
47
|
+
get contextWindowSize(): CONTEXT_WINDOW_SIZE;
|
48
|
+
readonly tokenizer: LlamaCppTokenizer;
|
49
|
+
callAPI<RESPONSE>(prompt: string, options: {
|
50
|
+
responseFormat: LlamaCppTextGenerationResponseFormatType<RESPONSE>;
|
51
|
+
} & FunctionOptions<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): Promise<RESPONSE>;
|
52
|
+
countPromptTokens(prompt: string): Promise<number>;
|
53
|
+
generateTextResponse(prompt: string, options?: FunctionOptions<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): Promise<{
|
54
|
+
model: string;
|
55
|
+
prompt: string;
|
56
|
+
content: string;
|
57
|
+
stop: true;
|
58
|
+
generation_settings: {
|
59
|
+
model: string;
|
60
|
+
stream: boolean;
|
61
|
+
seed: number;
|
62
|
+
stop: string[];
|
63
|
+
mirostat: number;
|
64
|
+
frequency_penalty: number;
|
65
|
+
ignore_eos: boolean;
|
66
|
+
logit_bias: number[];
|
67
|
+
mirostat_eta: number;
|
68
|
+
mirostat_tau: number;
|
69
|
+
n_ctx: number;
|
70
|
+
n_keep: number;
|
71
|
+
n_predict: number;
|
72
|
+
n_probs: number;
|
73
|
+
penalize_nl: boolean;
|
74
|
+
presence_penalty: number;
|
75
|
+
repeat_last_n: number;
|
76
|
+
repeat_penalty: number;
|
77
|
+
temp: number;
|
78
|
+
tfs_z: number;
|
79
|
+
top_k: number;
|
80
|
+
top_p: number;
|
81
|
+
typical_p: number;
|
82
|
+
};
|
83
|
+
stopped_eos: boolean;
|
84
|
+
stopped_limit: boolean;
|
85
|
+
stopped_word: boolean;
|
86
|
+
stopping_word: string;
|
87
|
+
timings: {
|
88
|
+
predicted_ms: number;
|
89
|
+
predicted_n: number;
|
90
|
+
predicted_per_second: number | null;
|
91
|
+
predicted_per_token_ms: number | null;
|
92
|
+
prompt_ms: number | null;
|
93
|
+
prompt_n: number;
|
94
|
+
prompt_per_second: number | null;
|
95
|
+
prompt_per_token_ms: number | null;
|
96
|
+
};
|
97
|
+
tokens_cached: number;
|
98
|
+
tokens_evaluated: number;
|
99
|
+
tokens_predicted: number;
|
100
|
+
truncated: boolean;
|
101
|
+
}>;
|
102
|
+
extractText(response: LlamaCppTextGenerationResponse): string;
|
103
|
+
generateDeltaStreamResponse(prompt: string, options?: FunctionOptions<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): Promise<AsyncIterable<DeltaEvent<LlamaCppTextGenerationDelta>>>;
|
104
|
+
extractTextDelta(fullDelta: LlamaCppTextGenerationDelta): string | undefined;
|
105
|
+
mapPrompt<INPUT_PROMPT>(promptMapping: PromptMapping<INPUT_PROMPT, string>): PromptMappingTextGenerationModel<INPUT_PROMPT, string, LlamaCppTextGenerationResponse, LlamaCppTextGenerationDelta, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>;
|
106
|
+
withSettings(additionalSettings: Partial<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): this;
|
107
|
+
get maxCompletionTokens(): number | undefined;
|
108
|
+
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
109
|
+
withStopTokens(stopTokens: string[]): this;
|
110
|
+
}
|
111
|
+
declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
112
|
+
content: z.ZodString;
|
113
|
+
stop: z.ZodLiteral<true>;
|
114
|
+
generation_settings: z.ZodObject<{
|
115
|
+
frequency_penalty: z.ZodNumber;
|
116
|
+
ignore_eos: z.ZodBoolean;
|
117
|
+
logit_bias: z.ZodArray<z.ZodNumber, "many">;
|
118
|
+
mirostat: z.ZodNumber;
|
119
|
+
mirostat_eta: z.ZodNumber;
|
120
|
+
mirostat_tau: z.ZodNumber;
|
121
|
+
model: z.ZodString;
|
122
|
+
n_ctx: z.ZodNumber;
|
123
|
+
n_keep: z.ZodNumber;
|
124
|
+
n_predict: z.ZodNumber;
|
125
|
+
n_probs: z.ZodNumber;
|
126
|
+
penalize_nl: z.ZodBoolean;
|
127
|
+
presence_penalty: z.ZodNumber;
|
128
|
+
repeat_last_n: z.ZodNumber;
|
129
|
+
repeat_penalty: z.ZodNumber;
|
130
|
+
seed: z.ZodNumber;
|
131
|
+
stop: z.ZodArray<z.ZodString, "many">;
|
132
|
+
stream: z.ZodBoolean;
|
133
|
+
temp: z.ZodNumber;
|
134
|
+
tfs_z: z.ZodNumber;
|
135
|
+
top_k: z.ZodNumber;
|
136
|
+
top_p: z.ZodNumber;
|
137
|
+
typical_p: z.ZodNumber;
|
138
|
+
}, "strip", z.ZodTypeAny, {
|
139
|
+
model: string;
|
140
|
+
stream: boolean;
|
141
|
+
seed: number;
|
142
|
+
stop: string[];
|
143
|
+
mirostat: number;
|
144
|
+
frequency_penalty: number;
|
145
|
+
ignore_eos: boolean;
|
146
|
+
logit_bias: number[];
|
147
|
+
mirostat_eta: number;
|
148
|
+
mirostat_tau: number;
|
149
|
+
n_ctx: number;
|
150
|
+
n_keep: number;
|
151
|
+
n_predict: number;
|
152
|
+
n_probs: number;
|
153
|
+
penalize_nl: boolean;
|
154
|
+
presence_penalty: number;
|
155
|
+
repeat_last_n: number;
|
156
|
+
repeat_penalty: number;
|
157
|
+
temp: number;
|
158
|
+
tfs_z: number;
|
159
|
+
top_k: number;
|
160
|
+
top_p: number;
|
161
|
+
typical_p: number;
|
162
|
+
}, {
|
163
|
+
model: string;
|
164
|
+
stream: boolean;
|
165
|
+
seed: number;
|
166
|
+
stop: string[];
|
167
|
+
mirostat: number;
|
168
|
+
frequency_penalty: number;
|
169
|
+
ignore_eos: boolean;
|
170
|
+
logit_bias: number[];
|
171
|
+
mirostat_eta: number;
|
172
|
+
mirostat_tau: number;
|
173
|
+
n_ctx: number;
|
174
|
+
n_keep: number;
|
175
|
+
n_predict: number;
|
176
|
+
n_probs: number;
|
177
|
+
penalize_nl: boolean;
|
178
|
+
presence_penalty: number;
|
179
|
+
repeat_last_n: number;
|
180
|
+
repeat_penalty: number;
|
181
|
+
temp: number;
|
182
|
+
tfs_z: number;
|
183
|
+
top_k: number;
|
184
|
+
top_p: number;
|
185
|
+
typical_p: number;
|
186
|
+
}>;
|
187
|
+
model: z.ZodString;
|
188
|
+
prompt: z.ZodString;
|
189
|
+
stopped_eos: z.ZodBoolean;
|
190
|
+
stopped_limit: z.ZodBoolean;
|
191
|
+
stopped_word: z.ZodBoolean;
|
192
|
+
stopping_word: z.ZodString;
|
193
|
+
timings: z.ZodObject<{
|
194
|
+
predicted_ms: z.ZodNumber;
|
195
|
+
predicted_n: z.ZodNumber;
|
196
|
+
predicted_per_second: z.ZodNullable<z.ZodNumber>;
|
197
|
+
predicted_per_token_ms: z.ZodNullable<z.ZodNumber>;
|
198
|
+
prompt_ms: z.ZodNullable<z.ZodNumber>;
|
199
|
+
prompt_n: z.ZodNumber;
|
200
|
+
prompt_per_second: z.ZodNullable<z.ZodNumber>;
|
201
|
+
prompt_per_token_ms: z.ZodNullable<z.ZodNumber>;
|
202
|
+
}, "strip", z.ZodTypeAny, {
|
203
|
+
predicted_ms: number;
|
204
|
+
predicted_n: number;
|
205
|
+
predicted_per_second: number | null;
|
206
|
+
predicted_per_token_ms: number | null;
|
207
|
+
prompt_ms: number | null;
|
208
|
+
prompt_n: number;
|
209
|
+
prompt_per_second: number | null;
|
210
|
+
prompt_per_token_ms: number | null;
|
211
|
+
}, {
|
212
|
+
predicted_ms: number;
|
213
|
+
predicted_n: number;
|
214
|
+
predicted_per_second: number | null;
|
215
|
+
predicted_per_token_ms: number | null;
|
216
|
+
prompt_ms: number | null;
|
217
|
+
prompt_n: number;
|
218
|
+
prompt_per_second: number | null;
|
219
|
+
prompt_per_token_ms: number | null;
|
220
|
+
}>;
|
221
|
+
tokens_cached: z.ZodNumber;
|
222
|
+
tokens_evaluated: z.ZodNumber;
|
223
|
+
tokens_predicted: z.ZodNumber;
|
224
|
+
truncated: z.ZodBoolean;
|
225
|
+
}, "strip", z.ZodTypeAny, {
|
226
|
+
model: string;
|
227
|
+
prompt: string;
|
228
|
+
content: string;
|
229
|
+
stop: true;
|
230
|
+
generation_settings: {
|
231
|
+
model: string;
|
232
|
+
stream: boolean;
|
233
|
+
seed: number;
|
234
|
+
stop: string[];
|
235
|
+
mirostat: number;
|
236
|
+
frequency_penalty: number;
|
237
|
+
ignore_eos: boolean;
|
238
|
+
logit_bias: number[];
|
239
|
+
mirostat_eta: number;
|
240
|
+
mirostat_tau: number;
|
241
|
+
n_ctx: number;
|
242
|
+
n_keep: number;
|
243
|
+
n_predict: number;
|
244
|
+
n_probs: number;
|
245
|
+
penalize_nl: boolean;
|
246
|
+
presence_penalty: number;
|
247
|
+
repeat_last_n: number;
|
248
|
+
repeat_penalty: number;
|
249
|
+
temp: number;
|
250
|
+
tfs_z: number;
|
251
|
+
top_k: number;
|
252
|
+
top_p: number;
|
253
|
+
typical_p: number;
|
254
|
+
};
|
255
|
+
stopped_eos: boolean;
|
256
|
+
stopped_limit: boolean;
|
257
|
+
stopped_word: boolean;
|
258
|
+
stopping_word: string;
|
259
|
+
timings: {
|
260
|
+
predicted_ms: number;
|
261
|
+
predicted_n: number;
|
262
|
+
predicted_per_second: number | null;
|
263
|
+
predicted_per_token_ms: number | null;
|
264
|
+
prompt_ms: number | null;
|
265
|
+
prompt_n: number;
|
266
|
+
prompt_per_second: number | null;
|
267
|
+
prompt_per_token_ms: number | null;
|
268
|
+
};
|
269
|
+
tokens_cached: number;
|
270
|
+
tokens_evaluated: number;
|
271
|
+
tokens_predicted: number;
|
272
|
+
truncated: boolean;
|
273
|
+
}, {
|
274
|
+
model: string;
|
275
|
+
prompt: string;
|
276
|
+
content: string;
|
277
|
+
stop: true;
|
278
|
+
generation_settings: {
|
279
|
+
model: string;
|
280
|
+
stream: boolean;
|
281
|
+
seed: number;
|
282
|
+
stop: string[];
|
283
|
+
mirostat: number;
|
284
|
+
frequency_penalty: number;
|
285
|
+
ignore_eos: boolean;
|
286
|
+
logit_bias: number[];
|
287
|
+
mirostat_eta: number;
|
288
|
+
mirostat_tau: number;
|
289
|
+
n_ctx: number;
|
290
|
+
n_keep: number;
|
291
|
+
n_predict: number;
|
292
|
+
n_probs: number;
|
293
|
+
penalize_nl: boolean;
|
294
|
+
presence_penalty: number;
|
295
|
+
repeat_last_n: number;
|
296
|
+
repeat_penalty: number;
|
297
|
+
temp: number;
|
298
|
+
tfs_z: number;
|
299
|
+
top_k: number;
|
300
|
+
top_p: number;
|
301
|
+
typical_p: number;
|
302
|
+
};
|
303
|
+
stopped_eos: boolean;
|
304
|
+
stopped_limit: boolean;
|
305
|
+
stopped_word: boolean;
|
306
|
+
stopping_word: string;
|
307
|
+
timings: {
|
308
|
+
predicted_ms: number;
|
309
|
+
predicted_n: number;
|
310
|
+
predicted_per_second: number | null;
|
311
|
+
predicted_per_token_ms: number | null;
|
312
|
+
prompt_ms: number | null;
|
313
|
+
prompt_n: number;
|
314
|
+
prompt_per_second: number | null;
|
315
|
+
prompt_per_token_ms: number | null;
|
316
|
+
};
|
317
|
+
tokens_cached: number;
|
318
|
+
tokens_evaluated: number;
|
319
|
+
tokens_predicted: number;
|
320
|
+
truncated: boolean;
|
321
|
+
}>;
|
322
|
+
export type LlamaCppTextGenerationResponse = z.infer<typeof llamaCppTextGenerationResponseSchema>;
|
323
|
+
export type LlamaCppTextGenerationDelta = {
|
324
|
+
content: string;
|
325
|
+
isComplete: boolean;
|
326
|
+
delta: string;
|
327
|
+
};
|
328
|
+
export type LlamaCppTextGenerationResponseFormatType<T> = {
|
329
|
+
stream: boolean;
|
330
|
+
handler: ResponseHandler<T>;
|
331
|
+
};
|
332
|
+
export declare const LlamaCppTextGenerationResponseFormat: {
|
333
|
+
/**
|
334
|
+
* Returns the response as a JSON object.
|
335
|
+
*/
|
336
|
+
json: {
|
337
|
+
stream: false;
|
338
|
+
handler: ResponseHandler<{
|
339
|
+
model: string;
|
340
|
+
prompt: string;
|
341
|
+
content: string;
|
342
|
+
stop: true;
|
343
|
+
generation_settings: {
|
344
|
+
model: string;
|
345
|
+
stream: boolean;
|
346
|
+
seed: number;
|
347
|
+
stop: string[];
|
348
|
+
mirostat: number;
|
349
|
+
frequency_penalty: number;
|
350
|
+
ignore_eos: boolean;
|
351
|
+
logit_bias: number[];
|
352
|
+
mirostat_eta: number;
|
353
|
+
mirostat_tau: number;
|
354
|
+
n_ctx: number;
|
355
|
+
n_keep: number;
|
356
|
+
n_predict: number;
|
357
|
+
n_probs: number;
|
358
|
+
penalize_nl: boolean;
|
359
|
+
presence_penalty: number;
|
360
|
+
repeat_last_n: number;
|
361
|
+
repeat_penalty: number;
|
362
|
+
temp: number;
|
363
|
+
tfs_z: number;
|
364
|
+
top_k: number;
|
365
|
+
top_p: number;
|
366
|
+
typical_p: number;
|
367
|
+
};
|
368
|
+
stopped_eos: boolean;
|
369
|
+
stopped_limit: boolean;
|
370
|
+
stopped_word: boolean;
|
371
|
+
stopping_word: string;
|
372
|
+
timings: {
|
373
|
+
predicted_ms: number;
|
374
|
+
predicted_n: number;
|
375
|
+
predicted_per_second: number | null;
|
376
|
+
predicted_per_token_ms: number | null;
|
377
|
+
prompt_ms: number | null;
|
378
|
+
prompt_n: number;
|
379
|
+
prompt_per_second: number | null;
|
380
|
+
prompt_per_token_ms: number | null;
|
381
|
+
};
|
382
|
+
tokens_cached: number;
|
383
|
+
tokens_evaluated: number;
|
384
|
+
tokens_predicted: number;
|
385
|
+
truncated: boolean;
|
386
|
+
}>;
|
387
|
+
};
|
388
|
+
/**
|
389
|
+
* Returns an async iterable over the full deltas (all choices, including full current state at time of event)
|
390
|
+
* of the response stream.
|
391
|
+
*/
|
392
|
+
deltaIterable: {
|
393
|
+
stream: true;
|
394
|
+
handler: ({ response }: {
|
395
|
+
response: Response;
|
396
|
+
}) => Promise<AsyncIterable<DeltaEvent<LlamaCppTextGenerationDelta>>>;
|
397
|
+
};
|
398
|
+
};
|
399
|
+
export {};
|
@@ -0,0 +1,238 @@
|
|
1
|
+
import SecureJSON from "secure-json-parse";
|
2
|
+
import z from "zod";
|
3
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
4
|
+
import { AsyncQueue } from "../../model-function/generate-text/AsyncQueue.js";
|
5
|
+
import { parseEventSourceReadableStream } from "../../model-function/generate-text/parseEventSourceReadableStream.js";
|
6
|
+
import { PromptMappingTextGenerationModel } from "../../prompt/PromptMappingTextGenerationModel.js";
|
7
|
+
import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
|
8
|
+
import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
|
9
|
+
import { failedLlamaCppCallResponseHandler } from "./LlamaCppError.js";
|
10
|
+
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
|
11
|
+
export class LlamaCppTextGenerationModel extends AbstractModel {
|
12
|
+
constructor(settings = {}) {
|
13
|
+
super({ settings });
|
14
|
+
Object.defineProperty(this, "provider", {
|
15
|
+
enumerable: true,
|
16
|
+
configurable: true,
|
17
|
+
writable: true,
|
18
|
+
value: "llamacpp"
|
19
|
+
});
|
20
|
+
Object.defineProperty(this, "tokenizer", {
|
21
|
+
enumerable: true,
|
22
|
+
configurable: true,
|
23
|
+
writable: true,
|
24
|
+
value: void 0
|
25
|
+
});
|
26
|
+
this.tokenizer = new LlamaCppTokenizer({
|
27
|
+
baseUrl: this.settings.baseUrl,
|
28
|
+
retry: this.settings.tokenizerSettings?.retry,
|
29
|
+
throttle: this.settings.tokenizerSettings?.throttle,
|
30
|
+
});
|
31
|
+
}
|
32
|
+
get modelName() {
|
33
|
+
return null;
|
34
|
+
}
|
35
|
+
get contextWindowSize() {
|
36
|
+
return this.settings.contextWindowSize;
|
37
|
+
}
|
38
|
+
async callAPI(prompt, options) {
|
39
|
+
const { run, settings, responseFormat } = options;
|
40
|
+
const callSettings = Object.assign(this.settings, settings, {
|
41
|
+
abortSignal: run?.abortSignal,
|
42
|
+
prompt,
|
43
|
+
responseFormat,
|
44
|
+
});
|
45
|
+
return callWithRetryAndThrottle({
|
46
|
+
retry: this.settings.retry,
|
47
|
+
throttle: this.settings.throttle,
|
48
|
+
call: async () => callLlamaCppTextGenerationAPI(callSettings),
|
49
|
+
});
|
50
|
+
}
|
51
|
+
async countPromptTokens(prompt) {
|
52
|
+
const tokens = await this.tokenizer.tokenize(prompt);
|
53
|
+
return tokens.length;
|
54
|
+
}
|
55
|
+
generateTextResponse(prompt, options) {
|
56
|
+
return this.callAPI(prompt, {
|
57
|
+
...options,
|
58
|
+
responseFormat: LlamaCppTextGenerationResponseFormat.json,
|
59
|
+
});
|
60
|
+
}
|
61
|
+
extractText(response) {
|
62
|
+
return response.content;
|
63
|
+
}
|
64
|
+
generateDeltaStreamResponse(prompt, options) {
|
65
|
+
return this.callAPI(prompt, {
|
66
|
+
...options,
|
67
|
+
responseFormat: LlamaCppTextGenerationResponseFormat.deltaIterable,
|
68
|
+
});
|
69
|
+
}
|
70
|
+
extractTextDelta(fullDelta) {
|
71
|
+
return fullDelta.delta;
|
72
|
+
}
|
73
|
+
mapPrompt(promptMapping) {
|
74
|
+
return new PromptMappingTextGenerationModel({
|
75
|
+
model: this.withStopTokens(promptMapping.stopTokens),
|
76
|
+
promptMapping,
|
77
|
+
});
|
78
|
+
}
|
79
|
+
withSettings(additionalSettings) {
|
80
|
+
return new LlamaCppTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
81
|
+
}
|
82
|
+
get maxCompletionTokens() {
|
83
|
+
return this.settings.nPredict;
|
84
|
+
}
|
85
|
+
withMaxCompletionTokens(maxCompletionTokens) {
|
86
|
+
return this.withSettings({ nPredict: maxCompletionTokens });
|
87
|
+
}
|
88
|
+
withStopTokens(stopTokens) {
|
89
|
+
return this.withSettings({ stop: stopTokens });
|
90
|
+
}
|
91
|
+
}
|
92
|
+
const llamaCppTextGenerationResponseSchema = z.object({
|
93
|
+
content: z.string(),
|
94
|
+
stop: z.literal(true),
|
95
|
+
generation_settings: z.object({
|
96
|
+
frequency_penalty: z.number(),
|
97
|
+
ignore_eos: z.boolean(),
|
98
|
+
logit_bias: z.array(z.number()),
|
99
|
+
mirostat: z.number(),
|
100
|
+
mirostat_eta: z.number(),
|
101
|
+
mirostat_tau: z.number(),
|
102
|
+
model: z.string(),
|
103
|
+
n_ctx: z.number(),
|
104
|
+
n_keep: z.number(),
|
105
|
+
n_predict: z.number(),
|
106
|
+
n_probs: z.number(),
|
107
|
+
penalize_nl: z.boolean(),
|
108
|
+
presence_penalty: z.number(),
|
109
|
+
repeat_last_n: z.number(),
|
110
|
+
repeat_penalty: z.number(),
|
111
|
+
seed: z.number(),
|
112
|
+
stop: z.array(z.string()),
|
113
|
+
stream: z.boolean(),
|
114
|
+
temp: z.number(),
|
115
|
+
tfs_z: z.number(),
|
116
|
+
top_k: z.number(),
|
117
|
+
top_p: z.number(),
|
118
|
+
typical_p: z.number(),
|
119
|
+
}),
|
120
|
+
model: z.string(),
|
121
|
+
prompt: z.string(),
|
122
|
+
stopped_eos: z.boolean(),
|
123
|
+
stopped_limit: z.boolean(),
|
124
|
+
stopped_word: z.boolean(),
|
125
|
+
stopping_word: z.string(),
|
126
|
+
timings: z.object({
|
127
|
+
predicted_ms: z.number(),
|
128
|
+
predicted_n: z.number(),
|
129
|
+
predicted_per_second: z.number().nullable(),
|
130
|
+
predicted_per_token_ms: z.number().nullable(),
|
131
|
+
prompt_ms: z.number().nullable(),
|
132
|
+
prompt_n: z.number(),
|
133
|
+
prompt_per_second: z.number().nullable(),
|
134
|
+
prompt_per_token_ms: z.number().nullable(),
|
135
|
+
}),
|
136
|
+
tokens_cached: z.number(),
|
137
|
+
tokens_evaluated: z.number(),
|
138
|
+
tokens_predicted: z.number(),
|
139
|
+
truncated: z.boolean(),
|
140
|
+
});
|
141
|
+
const llamaCppTextStreamingResponseSchema = z.discriminatedUnion("stop", [
|
142
|
+
z.object({
|
143
|
+
content: z.string(),
|
144
|
+
stop: z.literal(false),
|
145
|
+
}),
|
146
|
+
llamaCppTextGenerationResponseSchema,
|
147
|
+
]);
|
148
|
+
async function callLlamaCppTextGenerationAPI({ baseUrl = "http://127.0.0.1:8080", abortSignal, responseFormat, prompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
|
149
|
+
return postJsonToApi({
|
150
|
+
url: `${baseUrl}/completion`,
|
151
|
+
body: {
|
152
|
+
stream: responseFormat.stream,
|
153
|
+
prompt,
|
154
|
+
temperature,
|
155
|
+
top_k: topK,
|
156
|
+
top_p: topP,
|
157
|
+
n_predict: nPredict,
|
158
|
+
n_keep: nKeep,
|
159
|
+
stop,
|
160
|
+
tfs_z: tfsZ,
|
161
|
+
typical_p: typicalP,
|
162
|
+
repeat_penalty: repeatPenalty,
|
163
|
+
repeat_last_n: repeatLastN,
|
164
|
+
penalize_nl: penalizeNl,
|
165
|
+
mirostat,
|
166
|
+
mirostat_tau: mirostatTau,
|
167
|
+
mirostat_eta: mirostatEta,
|
168
|
+
seed,
|
169
|
+
ignore_eos: ignoreEos,
|
170
|
+
logit_bias: logitBias,
|
171
|
+
},
|
172
|
+
failedResponseHandler: failedLlamaCppCallResponseHandler,
|
173
|
+
successfulResponseHandler: responseFormat.handler,
|
174
|
+
abortSignal,
|
175
|
+
});
|
176
|
+
}
|
177
|
+
async function createLlamaCppFullDeltaIterableQueue(stream) {
|
178
|
+
const queue = new AsyncQueue();
|
179
|
+
let content = "";
|
180
|
+
// process the stream asynchonously (no 'await' on purpose):
|
181
|
+
parseEventSourceReadableStream({
|
182
|
+
stream,
|
183
|
+
callback: (event) => {
|
184
|
+
if (event.type !== "event") {
|
185
|
+
return;
|
186
|
+
}
|
187
|
+
const data = event.data;
|
188
|
+
try {
|
189
|
+
const json = SecureJSON.parse(data);
|
190
|
+
const parseResult = llamaCppTextStreamingResponseSchema.safeParse(json);
|
191
|
+
if (!parseResult.success) {
|
192
|
+
queue.push({
|
193
|
+
type: "error",
|
194
|
+
error: parseResult.error,
|
195
|
+
});
|
196
|
+
queue.close();
|
197
|
+
return;
|
198
|
+
}
|
199
|
+
const event = parseResult.data;
|
200
|
+
content += event.content;
|
201
|
+
queue.push({
|
202
|
+
type: "delta",
|
203
|
+
fullDelta: {
|
204
|
+
content,
|
205
|
+
isComplete: event.stop,
|
206
|
+
delta: event.content,
|
207
|
+
},
|
208
|
+
});
|
209
|
+
if (event.stop) {
|
210
|
+
queue.close();
|
211
|
+
}
|
212
|
+
}
|
213
|
+
catch (error) {
|
214
|
+
queue.push({ type: "error", error });
|
215
|
+
queue.close();
|
216
|
+
return;
|
217
|
+
}
|
218
|
+
},
|
219
|
+
});
|
220
|
+
return queue;
|
221
|
+
}
|
222
|
+
export const LlamaCppTextGenerationResponseFormat = {
|
223
|
+
/**
|
224
|
+
* Returns the response as a JSON object.
|
225
|
+
*/
|
226
|
+
json: {
|
227
|
+
stream: false,
|
228
|
+
handler: createJsonResponseHandler(llamaCppTextGenerationResponseSchema),
|
229
|
+
},
|
230
|
+
/**
|
231
|
+
* Returns an async iterable over the full deltas (all choices, including full current state at time of event)
|
232
|
+
* of the response stream.
|
233
|
+
*/
|
234
|
+
deltaIterable: {
|
235
|
+
stream: true,
|
236
|
+
handler: async ({ response }) => createLlamaCppFullDeltaIterableQueue(response.body),
|
237
|
+
},
|
238
|
+
};
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.LlamaCppTokenizer = void 0;
|
7
|
+
const zod_1 = __importDefault(require("zod"));
|
8
|
+
const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
|
9
|
+
const postToApi_js_1 = require("../../util/api/postToApi.cjs");
|
10
|
+
const LlamaCppError_js_1 = require("./LlamaCppError.cjs");
|
11
|
+
/**
|
12
|
+
* Tokenizer for LlamaCpp.
|
13
|
+
|
14
|
+
* @example
|
15
|
+
* const tokenizer = new LlamaCppTokenizer();
|
16
|
+
*
|
17
|
+
* const text = "At first, Nox didn't know what to do with the pup.";
|
18
|
+
*
|
19
|
+
* const tokenCount = await countTokens(tokenizer, text);
|
20
|
+
* const tokens = await tokenizer.tokenize(text);
|
21
|
+
* const tokensAndTokenTexts = await tokenizer.tokenizeWithTexts(text);
|
22
|
+
* const reconstructedText = await tokenizer.detokenize(tokens);
|
23
|
+
*/
|
24
|
+
class LlamaCppTokenizer {
|
25
|
+
constructor(settings = {}) {
|
26
|
+
Object.defineProperty(this, "settings", {
|
27
|
+
enumerable: true,
|
28
|
+
configurable: true,
|
29
|
+
writable: true,
|
30
|
+
value: void 0
|
31
|
+
});
|
32
|
+
this.settings = settings;
|
33
|
+
}
|
34
|
+
async callTokenizeAPI(text, context) {
|
35
|
+
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
36
|
+
retry: this.settings.retry,
|
37
|
+
throttle: this.settings.throttle,
|
38
|
+
call: async () => callLlamaCppTokenizeAPI({
|
39
|
+
abortSignal: context?.abortSignal,
|
40
|
+
text,
|
41
|
+
...this.settings,
|
42
|
+
}),
|
43
|
+
});
|
44
|
+
}
|
45
|
+
async tokenize(text) {
|
46
|
+
const response = await this.callTokenizeAPI(text);
|
47
|
+
return response.tokens;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
exports.LlamaCppTokenizer = LlamaCppTokenizer;
|
51
|
+
const llamaCppTokenizationResponseSchema = zod_1.default.object({
|
52
|
+
tokens: zod_1.default.array(zod_1.default.number()),
|
53
|
+
});
|
54
|
+
async function callLlamaCppTokenizeAPI({ baseUrl = "http://127.0.0.1:8080", abortSignal, text, }) {
|
55
|
+
return (0, postToApi_js_1.postJsonToApi)({
|
56
|
+
url: `${baseUrl}/tokenize`,
|
57
|
+
body: {
|
58
|
+
content: text,
|
59
|
+
},
|
60
|
+
failedResponseHandler: LlamaCppError_js_1.failedLlamaCppCallResponseHandler,
|
61
|
+
successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(llamaCppTokenizationResponseSchema),
|
62
|
+
abortSignal,
|
63
|
+
});
|
64
|
+
}
|