modelfusion 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -12
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs +4 -5
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.d.ts +2 -2
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js +4 -5
- package/model-function/generate-text/streamText.d.ts +1 -1
- package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.cjs +138 -0
- package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.d.ts +57 -0
- package/model-provider/huggingface/HuggingFaceTextEmbeddingModel.js +131 -0
- package/model-provider/huggingface/index.cjs +1 -0
- package/model-provider/huggingface/index.d.ts +1 -0
- package/model-provider/huggingface/index.js +1 -0
- package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.cjs +2 -1
- package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.d.ts +8 -7
- package/model-provider/llamacpp/LlamaCppTextEmbeddingModel.js +2 -1
- package/package.json +1 -1
- package/prompt/chat/trimChatPrompt.cjs +4 -1
- package/prompt/chat/trimChatPrompt.d.ts +3 -2
- package/prompt/chat/trimChatPrompt.js +4 -1
package/README.md
CHANGED
@@ -366,7 +366,7 @@ const { chunks } = await retrieveTextChunks(
|
|
366
366
|
| [Stream text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ | | | |
|
367
367
|
| [Generate JSON](https://modelfusion.dev/guide/function/generate-json) | chat models | | | | | |
|
368
368
|
| [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models | | | | | |
|
369
|
-
| [Embed text](https://modelfusion.dev/guide/function/embed-text) | ✅ | ✅ | ✅ |
|
369
|
+
| [Embed text](https://modelfusion.dev/guide/function/embed-text) | ✅ | ✅ | ✅ | ✅ | | |
|
370
370
|
| [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text) | full | full | basic | | | |
|
371
371
|
| [Generate image](https://modelfusion.dev/guide/function/generate-image) | ✅ | | | | ✅ | ✅ |
|
372
372
|
| [Transcribe audio](https://modelfusion.dev/guide/function/transcribe-audio) | ✅ | | | | | |
|
@@ -406,17 +406,15 @@ Use higher level prompts that are mapped into model specific prompt formats.
|
|
406
406
|
|
407
407
|
Examples for the individual functions and objects.
|
408
408
|
|
409
|
-
### [
|
410
|
-
|
411
|
-
> _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
|
409
|
+
### [Chatbot (Terminal)](https://github.com/lgrammel/modelfusion/tree/main/examples/chatbot-terminal)
|
412
410
|
|
413
|
-
|
411
|
+
> _Terminal app_, _chat_, _llama.cpp_
|
414
412
|
|
415
|
-
### [
|
413
|
+
### [Chatbot (Next.JS)](https://github.com/lgrammel/modelfusion/tree/main/examples/chatbot-next-js)
|
416
414
|
|
417
415
|
> _Next.js app_, _OpenAI GPT-3.5-turbo_, _streaming_, _abort handling_
|
418
416
|
|
419
|
-
A
|
417
|
+
A web chat with an AI assistant, implemented as a Next.js app.
|
420
418
|
|
421
419
|
### [Image generator (Next.js)](https://github.com/lgrammel/modelfusion/tree/main/examples/image-generator-next-js)
|
422
420
|
|
@@ -430,20 +428,20 @@ Create an 19th century painting image for your input.
|
|
430
428
|
|
431
429
|
Record audio with push-to-talk and transcribe it using Whisper, implemented as a Next.js app. The app shows a list of the transcriptions.
|
432
430
|
|
433
|
-
### [BabyAGI
|
431
|
+
### [BabyAGI Agent](https://github.com/lgrammel/modelfusion/tree/main/examples/babyagi-agent)
|
434
432
|
|
435
433
|
> _terminal app_, _agent_, _BabyAGI_, _OpenAI text-davinci-003_
|
436
434
|
|
437
435
|
TypeScript implementation of the classic [BabyAGI](https://github.com/yoheinakajima/babyagi/blob/main/classic/babyagi.py) by [@yoheinakajima](https://twitter.com/yoheinakajima) without embeddings.
|
438
436
|
|
439
|
-
### [Middle school math](https://github.com/lgrammel/modelfusion/tree/main/examples/middle-school-math)
|
437
|
+
### [Middle school math agent](https://github.com/lgrammel/modelfusion/tree/main/examples/middle-school-math-agent)
|
440
438
|
|
441
439
|
> _terminal app_, _agent_, _tools_, _GPT-4_
|
442
440
|
|
443
441
|
Small agent that solves middle school math problems. It uses a calculator tool to solve the problems.
|
444
442
|
|
445
|
-
### [
|
443
|
+
### [PDF to Tweet](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-to-tweet)
|
446
444
|
|
447
|
-
>
|
445
|
+
> _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
|
448
446
|
|
449
|
-
|
447
|
+
Extracts information about a topic from a PDF and writes a tweet in your own style about it.
|
package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs
CHANGED
@@ -9,17 +9,16 @@ const summarizeRecursively_js_1 = require("./summarizeRecursively.cjs");
|
|
9
9
|
* It automatically splits the text into optimal chunks that are small enough to be processed by the model,
|
10
10
|
* while leaving enough space for the model to generate text.
|
11
11
|
*/
|
12
|
-
async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt,
|
12
|
+
async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
|
13
|
+
(model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
|
13
14
|
const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
|
14
15
|
return (0, summarizeRecursively_js_1.summarizeRecursively)({
|
15
16
|
split: (0, splitRecursively_js_1.splitRecursivelyAtTokenAsSplitFunction)({
|
16
17
|
tokenizer: model.tokenizer,
|
17
|
-
maxChunkSize:
|
18
|
-
reservedCompletionTokens -
|
19
|
-
emptyPromptTokens,
|
18
|
+
maxChunkSize: tokenLimit - emptyPromptTokens,
|
20
19
|
}),
|
21
20
|
summarize: async (input) => {
|
22
|
-
const { text } = await (0, generateText_js_1.generateText)(model
|
21
|
+
const { text } = await (0, generateText_js_1.generateText)(model, await prompt(input), options);
|
23
22
|
return text;
|
24
23
|
},
|
25
24
|
join,
|
package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.d.ts
CHANGED
@@ -6,7 +6,7 @@ import { Run } from "../../run/Run.js";
|
|
6
6
|
* It automatically splits the text into optimal chunks that are small enough to be processed by the model,
|
7
7
|
* while leaving enough space for the model to generate text.
|
8
8
|
*/
|
9
|
-
export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<PROMPT>({ text, model, prompt,
|
9
|
+
export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<PROMPT>({ text, model, prompt, tokenLimit, join, }: {
|
10
10
|
text: string;
|
11
11
|
model: TextGenerationModel<PROMPT, any, any, TextGenerationModelSettings> & {
|
12
12
|
contextWindowSize: number;
|
@@ -16,7 +16,7 @@ export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<
|
|
16
16
|
prompt: (input: {
|
17
17
|
text: string;
|
18
18
|
}) => Promise<PROMPT>;
|
19
|
-
|
19
|
+
tokenLimit?: number;
|
20
20
|
join?: (texts: Array<string>) => string;
|
21
21
|
}, options?: {
|
22
22
|
functionId?: string;
|
package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js
CHANGED
@@ -6,17 +6,16 @@ import { summarizeRecursively } from "./summarizeRecursively.js";
|
|
6
6
|
* It automatically splits the text into optimal chunks that are small enough to be processed by the model,
|
7
7
|
* while leaving enough space for the model to generate text.
|
8
8
|
*/
|
9
|
-
export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt,
|
9
|
+
export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
|
10
|
+
(model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
|
10
11
|
const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
|
11
12
|
return summarizeRecursively({
|
12
13
|
split: splitRecursivelyAtTokenAsSplitFunction({
|
13
14
|
tokenizer: model.tokenizer,
|
14
|
-
maxChunkSize:
|
15
|
-
reservedCompletionTokens -
|
16
|
-
emptyPromptTokens,
|
15
|
+
maxChunkSize: tokenLimit - emptyPromptTokens,
|
17
16
|
}),
|
18
17
|
summarize: async (input) => {
|
19
|
-
const { text } = await generateText(model
|
18
|
+
const { text } = await generateText(model, await prompt(input), options);
|
20
19
|
return text;
|
21
20
|
},
|
22
21
|
join,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { FunctionOptions } from "../FunctionOptions.js";
|
2
|
+
import { CallMetadata } from "../executeCall.js";
|
2
3
|
import { DeltaEvent } from "./DeltaEvent.js";
|
3
4
|
import { TextGenerationModel, TextGenerationModelSettings } from "./TextGenerationModel.js";
|
4
|
-
import { CallMetadata } from "model-function/executeCall.js";
|
5
5
|
export declare function streamText<PROMPT, FULL_DELTA, SETTINGS extends TextGenerationModelSettings>(model: TextGenerationModel<PROMPT, unknown, FULL_DELTA, SETTINGS> & {
|
6
6
|
generateDeltaStreamResponse: (prompt: PROMPT, options: FunctionOptions<SETTINGS>) => PromiseLike<AsyncIterable<DeltaEvent<FULL_DELTA>>>;
|
7
7
|
extractTextDelta: (fullDelta: FULL_DELTA) => string | undefined;
|
@@ -0,0 +1,138 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.HuggingFaceTextEmbeddingModel = void 0;
|
7
|
+
const zod_1 = __importDefault(require("zod"));
|
8
|
+
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
|
9
|
+
const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
|
10
|
+
const postToApi_js_1 = require("../../util/api/postToApi.cjs");
|
11
|
+
const HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
|
12
|
+
/**
|
13
|
+
* Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
|
14
|
+
*
|
15
|
+
* @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
|
16
|
+
*
|
17
|
+
* @example
|
18
|
+
* const model = new HuggingFaceTextGenerationModel({
|
19
|
+
* model: "intfloat/e5-base-v2",
|
20
|
+
* maxTexstsPerCall: 5,
|
21
|
+
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
22
|
+
* });
|
23
|
+
*
|
24
|
+
* const { embeddings } = await embedTexts(
|
25
|
+
* model,
|
26
|
+
* [
|
27
|
+
* "At first, Nox didn't know what to do with the pup.",
|
28
|
+
* "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
|
29
|
+
* ]
|
30
|
+
* );
|
31
|
+
*/
|
32
|
+
class HuggingFaceTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
33
|
+
constructor(settings) {
|
34
|
+
super({ settings });
|
35
|
+
Object.defineProperty(this, "provider", {
|
36
|
+
enumerable: true,
|
37
|
+
configurable: true,
|
38
|
+
writable: true,
|
39
|
+
value: "huggingface"
|
40
|
+
});
|
41
|
+
Object.defineProperty(this, "maxTextsPerCall", {
|
42
|
+
enumerable: true,
|
43
|
+
configurable: true,
|
44
|
+
writable: true,
|
45
|
+
value: void 0
|
46
|
+
});
|
47
|
+
Object.defineProperty(this, "contextWindowSize", {
|
48
|
+
enumerable: true,
|
49
|
+
configurable: true,
|
50
|
+
writable: true,
|
51
|
+
value: undefined
|
52
|
+
});
|
53
|
+
Object.defineProperty(this, "embeddingDimensions", {
|
54
|
+
enumerable: true,
|
55
|
+
configurable: true,
|
56
|
+
writable: true,
|
57
|
+
value: void 0
|
58
|
+
});
|
59
|
+
Object.defineProperty(this, "tokenizer", {
|
60
|
+
enumerable: true,
|
61
|
+
configurable: true,
|
62
|
+
writable: true,
|
63
|
+
value: undefined
|
64
|
+
});
|
65
|
+
Object.defineProperty(this, "countPromptTokens", {
|
66
|
+
enumerable: true,
|
67
|
+
configurable: true,
|
68
|
+
writable: true,
|
69
|
+
value: undefined
|
70
|
+
});
|
71
|
+
// There is no limit documented in the HuggingFace API. Use 1024 as a reasonable default.
|
72
|
+
this.maxTextsPerCall = settings.maxTextsPerCall ?? 1024;
|
73
|
+
this.embeddingDimensions = settings.embeddingDimensions;
|
74
|
+
}
|
75
|
+
get modelName() {
|
76
|
+
return this.settings.model;
|
77
|
+
}
|
78
|
+
get apiKey() {
|
79
|
+
const apiKey = this.settings.apiKey ?? process.env.HUGGINGFACE_API_KEY;
|
80
|
+
if (apiKey == null) {
|
81
|
+
throw new Error("No Hugging Face API key provided. Pass it in the constructor or set the HUGGINGFACE_API_KEY environment variable.");
|
82
|
+
}
|
83
|
+
return apiKey;
|
84
|
+
}
|
85
|
+
async callAPI(texts, options) {
|
86
|
+
if (texts.length > this.maxTextsPerCall) {
|
87
|
+
throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxTextsPerCall} texts per API call.`);
|
88
|
+
}
|
89
|
+
const run = options?.run;
|
90
|
+
const settings = options?.settings;
|
91
|
+
const callSettings = Object.assign({
|
92
|
+
apiKey: this.apiKey,
|
93
|
+
options: {
|
94
|
+
useCache: true,
|
95
|
+
waitForModel: true,
|
96
|
+
},
|
97
|
+
}, this.settings, settings, {
|
98
|
+
abortSignal: run?.abortSignal,
|
99
|
+
inputs: texts,
|
100
|
+
});
|
101
|
+
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
102
|
+
retry: this.settings.retry,
|
103
|
+
throttle: this.settings.throttle,
|
104
|
+
call: async () => callHuggingFaceTextGenerationAPI(callSettings),
|
105
|
+
});
|
106
|
+
}
|
107
|
+
generateEmbeddingResponse(texts, options) {
|
108
|
+
return this.callAPI(texts, options);
|
109
|
+
}
|
110
|
+
extractEmbeddings(response) {
|
111
|
+
return response;
|
112
|
+
}
|
113
|
+
withSettings(additionalSettings) {
|
114
|
+
return new HuggingFaceTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
|
115
|
+
}
|
116
|
+
}
|
117
|
+
exports.HuggingFaceTextEmbeddingModel = HuggingFaceTextEmbeddingModel;
|
118
|
+
const huggingFaceTextEmbeddingResponseSchema = zod_1.default.array(zod_1.default.array(zod_1.default.number()));
|
119
|
+
async function callHuggingFaceTextGenerationAPI({ baseUrl = "https://api-inference.huggingface.co/pipeline/feature-extraction", abortSignal, apiKey, model, inputs, options, }) {
|
120
|
+
return (0, postToApi_js_1.postJsonToApi)({
|
121
|
+
url: `${baseUrl}/${model}`,
|
122
|
+
headers: {
|
123
|
+
Authorization: `Bearer ${apiKey}`,
|
124
|
+
},
|
125
|
+
body: {
|
126
|
+
inputs,
|
127
|
+
options: options
|
128
|
+
? {
|
129
|
+
use_cache: options?.useCache,
|
130
|
+
wait_for_model: options?.waitForModel,
|
131
|
+
}
|
132
|
+
: undefined,
|
133
|
+
},
|
134
|
+
failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
|
135
|
+
successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(huggingFaceTextEmbeddingResponseSchema),
|
136
|
+
abortSignal,
|
137
|
+
});
|
138
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
import z from "zod";
|
2
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
3
|
+
import { FunctionOptions } from "../../model-function/FunctionOptions.js";
|
4
|
+
import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../../model-function/embed-text/TextEmbeddingModel.js";
|
5
|
+
import { RetryFunction } from "../../util/api/RetryFunction.js";
|
6
|
+
import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
|
7
|
+
export interface HuggingFaceTextEmbeddingModelSettings extends TextEmbeddingModelSettings {
|
8
|
+
model: string;
|
9
|
+
baseUrl?: string;
|
10
|
+
apiKey?: string;
|
11
|
+
maxTextsPerCall?: number;
|
12
|
+
embeddingDimensions?: number;
|
13
|
+
retry?: RetryFunction;
|
14
|
+
throttle?: ThrottleFunction;
|
15
|
+
options?: {
|
16
|
+
useCache?: boolean;
|
17
|
+
waitForModel?: boolean;
|
18
|
+
};
|
19
|
+
}
|
20
|
+
/**
|
21
|
+
* Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
|
22
|
+
*
|
23
|
+
* @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
|
24
|
+
*
|
25
|
+
* @example
|
26
|
+
* const model = new HuggingFaceTextGenerationModel({
|
27
|
+
* model: "intfloat/e5-base-v2",
|
28
|
+
* maxTexstsPerCall: 5,
|
29
|
+
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
30
|
+
* });
|
31
|
+
*
|
32
|
+
* const { embeddings } = await embedTexts(
|
33
|
+
* model,
|
34
|
+
* [
|
35
|
+
* "At first, Nox didn't know what to do with the pup.",
|
36
|
+
* "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
|
37
|
+
* ]
|
38
|
+
* );
|
39
|
+
*/
|
40
|
+
export declare class HuggingFaceTextEmbeddingModel extends AbstractModel<HuggingFaceTextEmbeddingModelSettings> implements TextEmbeddingModel<HuggingFaceTextEmbeddingResponse, HuggingFaceTextEmbeddingModelSettings> {
|
41
|
+
constructor(settings: HuggingFaceTextEmbeddingModelSettings);
|
42
|
+
readonly provider = "huggingface";
|
43
|
+
get modelName(): string;
|
44
|
+
readonly maxTextsPerCall: number;
|
45
|
+
readonly contextWindowSize: undefined;
|
46
|
+
readonly embeddingDimensions: number | undefined;
|
47
|
+
readonly tokenizer: undefined;
|
48
|
+
private get apiKey();
|
49
|
+
callAPI(texts: Array<string>, options?: FunctionOptions<HuggingFaceTextEmbeddingModelSettings>): Promise<HuggingFaceTextEmbeddingResponse>;
|
50
|
+
readonly countPromptTokens: undefined;
|
51
|
+
generateEmbeddingResponse(texts: string[], options?: FunctionOptions<HuggingFaceTextEmbeddingModelSettings>): Promise<number[][]>;
|
52
|
+
extractEmbeddings(response: HuggingFaceTextEmbeddingResponse): number[][];
|
53
|
+
withSettings(additionalSettings: Partial<HuggingFaceTextEmbeddingModelSettings>): this;
|
54
|
+
}
|
55
|
+
declare const huggingFaceTextEmbeddingResponseSchema: z.ZodArray<z.ZodArray<z.ZodNumber, "many">, "many">;
|
56
|
+
export type HuggingFaceTextEmbeddingResponse = z.infer<typeof huggingFaceTextEmbeddingResponseSchema>;
|
57
|
+
export {};
|
@@ -0,0 +1,131 @@
|
|
1
|
+
import z from "zod";
|
2
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
3
|
+
import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
|
4
|
+
import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
|
5
|
+
import { failedHuggingFaceCallResponseHandler } from "./HuggingFaceError.js";
|
6
|
+
/**
|
7
|
+
* Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
|
8
|
+
*
|
9
|
+
* @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
|
10
|
+
*
|
11
|
+
* @example
|
12
|
+
* const model = new HuggingFaceTextGenerationModel({
|
13
|
+
* model: "intfloat/e5-base-v2",
|
14
|
+
* maxTexstsPerCall: 5,
|
15
|
+
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
16
|
+
* });
|
17
|
+
*
|
18
|
+
* const { embeddings } = await embedTexts(
|
19
|
+
* model,
|
20
|
+
* [
|
21
|
+
* "At first, Nox didn't know what to do with the pup.",
|
22
|
+
* "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
|
23
|
+
* ]
|
24
|
+
* );
|
25
|
+
*/
|
26
|
+
export class HuggingFaceTextEmbeddingModel extends AbstractModel {
|
27
|
+
constructor(settings) {
|
28
|
+
super({ settings });
|
29
|
+
Object.defineProperty(this, "provider", {
|
30
|
+
enumerable: true,
|
31
|
+
configurable: true,
|
32
|
+
writable: true,
|
33
|
+
value: "huggingface"
|
34
|
+
});
|
35
|
+
Object.defineProperty(this, "maxTextsPerCall", {
|
36
|
+
enumerable: true,
|
37
|
+
configurable: true,
|
38
|
+
writable: true,
|
39
|
+
value: void 0
|
40
|
+
});
|
41
|
+
Object.defineProperty(this, "contextWindowSize", {
|
42
|
+
enumerable: true,
|
43
|
+
configurable: true,
|
44
|
+
writable: true,
|
45
|
+
value: undefined
|
46
|
+
});
|
47
|
+
Object.defineProperty(this, "embeddingDimensions", {
|
48
|
+
enumerable: true,
|
49
|
+
configurable: true,
|
50
|
+
writable: true,
|
51
|
+
value: void 0
|
52
|
+
});
|
53
|
+
Object.defineProperty(this, "tokenizer", {
|
54
|
+
enumerable: true,
|
55
|
+
configurable: true,
|
56
|
+
writable: true,
|
57
|
+
value: undefined
|
58
|
+
});
|
59
|
+
Object.defineProperty(this, "countPromptTokens", {
|
60
|
+
enumerable: true,
|
61
|
+
configurable: true,
|
62
|
+
writable: true,
|
63
|
+
value: undefined
|
64
|
+
});
|
65
|
+
// There is no limit documented in the HuggingFace API. Use 1024 as a reasonable default.
|
66
|
+
this.maxTextsPerCall = settings.maxTextsPerCall ?? 1024;
|
67
|
+
this.embeddingDimensions = settings.embeddingDimensions;
|
68
|
+
}
|
69
|
+
get modelName() {
|
70
|
+
return this.settings.model;
|
71
|
+
}
|
72
|
+
get apiKey() {
|
73
|
+
const apiKey = this.settings.apiKey ?? process.env.HUGGINGFACE_API_KEY;
|
74
|
+
if (apiKey == null) {
|
75
|
+
throw new Error("No Hugging Face API key provided. Pass it in the constructor or set the HUGGINGFACE_API_KEY environment variable.");
|
76
|
+
}
|
77
|
+
return apiKey;
|
78
|
+
}
|
79
|
+
async callAPI(texts, options) {
|
80
|
+
if (texts.length > this.maxTextsPerCall) {
|
81
|
+
throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxTextsPerCall} texts per API call.`);
|
82
|
+
}
|
83
|
+
const run = options?.run;
|
84
|
+
const settings = options?.settings;
|
85
|
+
const callSettings = Object.assign({
|
86
|
+
apiKey: this.apiKey,
|
87
|
+
options: {
|
88
|
+
useCache: true,
|
89
|
+
waitForModel: true,
|
90
|
+
},
|
91
|
+
}, this.settings, settings, {
|
92
|
+
abortSignal: run?.abortSignal,
|
93
|
+
inputs: texts,
|
94
|
+
});
|
95
|
+
return callWithRetryAndThrottle({
|
96
|
+
retry: this.settings.retry,
|
97
|
+
throttle: this.settings.throttle,
|
98
|
+
call: async () => callHuggingFaceTextGenerationAPI(callSettings),
|
99
|
+
});
|
100
|
+
}
|
101
|
+
generateEmbeddingResponse(texts, options) {
|
102
|
+
return this.callAPI(texts, options);
|
103
|
+
}
|
104
|
+
extractEmbeddings(response) {
|
105
|
+
return response;
|
106
|
+
}
|
107
|
+
withSettings(additionalSettings) {
|
108
|
+
return new HuggingFaceTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
|
109
|
+
}
|
110
|
+
}
|
111
|
+
const huggingFaceTextEmbeddingResponseSchema = z.array(z.array(z.number()));
|
112
|
+
async function callHuggingFaceTextGenerationAPI({ baseUrl = "https://api-inference.huggingface.co/pipeline/feature-extraction", abortSignal, apiKey, model, inputs, options, }) {
|
113
|
+
return postJsonToApi({
|
114
|
+
url: `${baseUrl}/${model}`,
|
115
|
+
headers: {
|
116
|
+
Authorization: `Bearer ${apiKey}`,
|
117
|
+
},
|
118
|
+
body: {
|
119
|
+
inputs,
|
120
|
+
options: options
|
121
|
+
? {
|
122
|
+
use_cache: options?.useCache,
|
123
|
+
wait_for_model: options?.waitForModel,
|
124
|
+
}
|
125
|
+
: undefined,
|
126
|
+
},
|
127
|
+
failedResponseHandler: failedHuggingFaceCallResponseHandler,
|
128
|
+
successfulResponseHandler: createJsonResponseHandler(huggingFaceTextEmbeddingResponseSchema),
|
129
|
+
abortSignal,
|
130
|
+
});
|
131
|
+
}
|
@@ -17,4 +17,5 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
exports.HuggingFaceError = void 0;
|
18
18
|
var HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
|
19
19
|
Object.defineProperty(exports, "HuggingFaceError", { enumerable: true, get: function () { return HuggingFaceError_js_1.HuggingFaceError; } });
|
20
|
+
__exportStar(require("./HuggingFaceTextEmbeddingModel.cjs"), exports);
|
20
21
|
__exportStar(require("./HuggingFaceTextGenerationModel.cjs"), exports);
|
@@ -35,7 +35,7 @@ class LlamaCppTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
|
35
35
|
enumerable: true,
|
36
36
|
configurable: true,
|
37
37
|
writable: true,
|
38
|
-
value:
|
38
|
+
value: void 0
|
39
39
|
});
|
40
40
|
Object.defineProperty(this, "tokenizer", {
|
41
41
|
enumerable: true,
|
@@ -48,6 +48,7 @@ class LlamaCppTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
|
48
48
|
retry: this.settings.tokenizerSettings?.retry,
|
49
49
|
throttle: this.settings.tokenizerSettings?.throttle,
|
50
50
|
});
|
51
|
+
this.embeddingDimensions = this.settings.embeddingDimensions;
|
51
52
|
}
|
52
53
|
get modelName() {
|
53
54
|
return null;
|
@@ -4,8 +4,9 @@ import { FunctionOptions } from "../../model-function/FunctionOptions.js";
|
|
4
4
|
import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../../model-function/embed-text/TextEmbeddingModel.js";
|
5
5
|
import { RetryFunction } from "../../util/api/RetryFunction.js";
|
6
6
|
import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
|
7
|
-
export interface
|
7
|
+
export interface LlamaCppTextEmbeddingModelSettings extends TextEmbeddingModelSettings {
|
8
8
|
baseUrl?: string;
|
9
|
+
embeddingDimensions?: number;
|
9
10
|
retry?: RetryFunction;
|
10
11
|
throttle?: ThrottleFunction;
|
11
12
|
tokenizerSettings?: {
|
@@ -13,21 +14,21 @@ export interface LlamaCppEmbeddingModelSettings extends TextEmbeddingModelSettin
|
|
13
14
|
throttle?: ThrottleFunction;
|
14
15
|
};
|
15
16
|
}
|
16
|
-
export declare class LlamaCppTextEmbeddingModel extends AbstractModel<
|
17
|
-
constructor(settings?:
|
17
|
+
export declare class LlamaCppTextEmbeddingModel extends AbstractModel<LlamaCppTextEmbeddingModelSettings> implements TextEmbeddingModel<LlamaCppTextEmbeddingResponse, LlamaCppTextEmbeddingModelSettings> {
|
18
|
+
constructor(settings?: LlamaCppTextEmbeddingModelSettings);
|
18
19
|
readonly provider: "llamacpp";
|
19
20
|
get modelName(): null;
|
20
21
|
readonly maxTextsPerCall = 1;
|
21
22
|
readonly contextWindowSize: undefined;
|
22
|
-
readonly embeddingDimensions: undefined;
|
23
|
+
readonly embeddingDimensions: number | undefined;
|
23
24
|
private readonly tokenizer;
|
24
25
|
tokenize(text: string): Promise<number[]>;
|
25
|
-
callAPI(texts: Array<string>, options?: FunctionOptions<
|
26
|
-
generateEmbeddingResponse(texts: string[], options?: FunctionOptions<
|
26
|
+
callAPI(texts: Array<string>, options?: FunctionOptions<LlamaCppTextEmbeddingModelSettings>): Promise<LlamaCppTextEmbeddingResponse>;
|
27
|
+
generateEmbeddingResponse(texts: string[], options?: FunctionOptions<LlamaCppTextEmbeddingModelSettings>): Promise<{
|
27
28
|
embedding: number[];
|
28
29
|
}>;
|
29
30
|
extractEmbeddings(response: LlamaCppTextEmbeddingResponse): number[][];
|
30
|
-
withSettings(additionalSettings: Partial<
|
31
|
+
withSettings(additionalSettings: Partial<LlamaCppTextEmbeddingModelSettings>): this;
|
31
32
|
}
|
32
33
|
declare const llamaCppTextEmbeddingResponseSchema: z.ZodObject<{
|
33
34
|
embedding: z.ZodArray<z.ZodNumber, "many">;
|
@@ -29,7 +29,7 @@ export class LlamaCppTextEmbeddingModel extends AbstractModel {
|
|
29
29
|
enumerable: true,
|
30
30
|
configurable: true,
|
31
31
|
writable: true,
|
32
|
-
value:
|
32
|
+
value: void 0
|
33
33
|
});
|
34
34
|
Object.defineProperty(this, "tokenizer", {
|
35
35
|
enumerable: true,
|
@@ -42,6 +42,7 @@ export class LlamaCppTextEmbeddingModel extends AbstractModel {
|
|
42
42
|
retry: this.settings.tokenizerSettings?.retry,
|
43
43
|
throttle: this.settings.tokenizerSettings?.throttle,
|
44
44
|
});
|
45
|
+
this.embeddingDimensions = this.settings.embeddingDimensions;
|
45
46
|
}
|
46
47
|
get modelName() {
|
47
48
|
return null;
|
package/package.json
CHANGED
@@ -9,8 +9,11 @@ const validateChatPrompt_js_1 = require("./validateChatPrompt.cjs");
|
|
9
9
|
*
|
10
10
|
* When the minimal chat prompt (system message + last user message) is already too long, it will only
|
11
11
|
* return this minimal chat prompt.
|
12
|
+
*
|
13
|
+
* @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
|
12
14
|
*/
|
13
|
-
async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
15
|
+
async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
16
|
+
(model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
14
17
|
(0, validateChatPrompt_js_1.validateChatPrompt)(prompt);
|
15
18
|
const startsWithSystemMessage = "system" in prompt[0];
|
16
19
|
const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import { TextGenerationModel } from "model-function/generate-text/TextGenerationModel.js";
|
1
|
+
import { TextGenerationModel } from "../../model-function/generate-text/TextGenerationModel.js";
|
2
2
|
import { ChatPrompt } from "./ChatPrompt.js";
|
3
3
|
/**
|
4
4
|
* Keeps only the most recent messages in the prompt, while leaving enough space for the completion.
|
@@ -7,12 +7,13 @@ import { ChatPrompt } from "./ChatPrompt.js";
|
|
7
7
|
*
|
8
8
|
* When the minimal chat prompt (system message + last user message) is already too long, it will only
|
9
9
|
* return this minimal chat prompt.
|
10
|
+
*
|
11
|
+
* @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
|
10
12
|
*/
|
11
13
|
export declare function trimChatPrompt({ prompt, model, tokenLimit, }: {
|
12
14
|
prompt: ChatPrompt;
|
13
15
|
model: TextGenerationModel<ChatPrompt, any, any, any> & {
|
14
16
|
contextWindowSize: number;
|
15
|
-
maxCompletionTokens: number;
|
16
17
|
countPromptTokens: (prompt: ChatPrompt) => PromiseLike<number>;
|
17
18
|
};
|
18
19
|
tokenLimit?: number;
|
@@ -6,8 +6,11 @@ import { validateChatPrompt } from "./validateChatPrompt.js";
|
|
6
6
|
*
|
7
7
|
* When the minimal chat prompt (system message + last user message) is already too long, it will only
|
8
8
|
* return this minimal chat prompt.
|
9
|
+
*
|
10
|
+
* @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
|
9
11
|
*/
|
10
|
-
export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
12
|
+
export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
13
|
+
(model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
11
14
|
validateChatPrompt(prompt);
|
12
15
|
const startsWithSystemMessage = "system" in prompt[0];
|
13
16
|
const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];
|