modelfusion 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -366,7 +366,7 @@ const { chunks } = await retrieveTextChunks(
366
366
  | [Stream text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ | | | |
367
367
  | [Generate JSON](https://modelfusion.dev/guide/function/generate-json) | chat models | | | | | |
368
368
  | [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models | | | | | |
369
- | [Embed text](https://modelfusion.dev/guide/function/embed-text) | ✅ | ✅ | ✅ | | | |
369
+ | [Embed text](https://modelfusion.dev/guide/function/embed-text) | ✅ | ✅ | ✅ || | |
370
370
  | [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text) | full | full | basic | | | |
371
371
  | [Generate image](https://modelfusion.dev/guide/function/generate-image) | ✅ | | | | ✅ | ✅ |
372
372
  | [Transcribe audio](https://modelfusion.dev/guide/function/transcribe-audio) | ✅ | | | | | |
@@ -406,17 +406,15 @@ Use higher level prompts that are mapped into model specific prompt formats.
406
406
 
407
407
  Examples for the individual functions and objects.
408
408
 
409
- ### [PDF to Tweet](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-to-tweet)
410
-
411
- > _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
409
+ ### [Chatbot (Terminal)](https://github.com/lgrammel/modelfusion/tree/main/examples/chatbot-terminal)
412
410
 
413
- Extracts information about a topic from a PDF and writes a tweet in your own style about it.
411
+ > _Terminal app_, _chat_, _llama.cpp_
414
412
 
415
- ### [AI Chat (Next.JS)](https://github.com/lgrammel/modelfusion/tree/main/examples/ai-chat-next-js)
413
+ ### [Chatbot (Next.JS)](https://github.com/lgrammel/modelfusion/tree/main/examples/chatbot-next-js)
416
414
 
417
415
  > _Next.js app_, _OpenAI GPT-3.5-turbo_, _streaming_, _abort handling_
418
416
 
419
- A basic web chat with an AI assistant, implemented as a Next.js app.
417
+ A web chat with an AI assistant, implemented as a Next.js app.
420
418
 
421
419
  ### [Image generator (Next.js)](https://github.com/lgrammel/modelfusion/tree/main/examples/image-generator-next-js)
422
420
 
@@ -430,20 +428,20 @@ Create an 19th century painting image for your input.
430
428
 
431
429
  Record audio with push-to-talk and transcribe it using Whisper, implemented as a Next.js app. The app shows a list of the transcriptions.
432
430
 
433
- ### [BabyAGI Classic](https://github.com/lgrammel/modelfusion/tree/main/examples/baby-agi)
431
+ ### [BabyAGI Agent](https://github.com/lgrammel/modelfusion/tree/main/examples/babyagi-agent)
434
432
 
435
433
  > _terminal app_, _agent_, _BabyAGI_, _OpenAI text-davinci-003_
436
434
 
437
435
  TypeScript implementation of the classic [BabyAGI](https://github.com/yoheinakajima/babyagi/blob/main/classic/babyagi.py) by [@yoheinakajima](https://twitter.com/yoheinakajima) without embeddings.
438
436
 
439
- ### [Middle school math](https://github.com/lgrammel/modelfusion/tree/main/examples/middle-school-math)
437
+ ### [Middle school math agent](https://github.com/lgrammel/modelfusion/tree/main/examples/middle-school-math-agent)
440
438
 
441
439
  > _terminal app_, _agent_, _tools_, _GPT-4_
442
440
 
443
441
  Small agent that solves middle school math problems. It uses a calculator tool to solve the problems.
444
442
 
445
- ### [Terminal Chat (llama.cpp)](https://github.com/lgrammel/modelfusion/tree/main/examples/terminal-chat-llamacpp)
443
+ ### [PDF to Tweet](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-to-tweet)
446
444
 
447
- > _Terminal app_, _chat_, _llama.cpp_
445
+ > _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
448
446
 
449
- A terminal chat with a Llama.cpp server backend.
447
+ Extracts information about a topic from a PDF and writes a tweet in your own style about it.
@@ -9,17 +9,16 @@ const summarizeRecursively_js_1 = require("./summarizeRecursively.cjs");
9
9
  * It automatically splits the text into optimal chunks that are small enough to be processed by the model,
10
10
  * while leaving enough space for the model to generate text.
11
11
  */
12
- async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, reservedCompletionTokens, join, }, options) {
12
+ async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
13
+ (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
13
14
  const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
14
15
  return (0, summarizeRecursively_js_1.summarizeRecursively)({
15
16
  split: (0, splitRecursively_js_1.splitRecursivelyAtTokenAsSplitFunction)({
16
17
  tokenizer: model.tokenizer,
17
- maxChunkSize: model.contextWindowSize -
18
- reservedCompletionTokens -
19
- emptyPromptTokens,
18
+ maxChunkSize: tokenLimit - emptyPromptTokens,
20
19
  }),
21
20
  summarize: async (input) => {
22
- const { text } = await (0, generateText_js_1.generateText)(model.withMaxCompletionTokens(reservedCompletionTokens), await prompt(input), options);
21
+ const { text } = await (0, generateText_js_1.generateText)(model, await prompt(input), options);
23
22
  return text;
24
23
  },
25
24
  join,
@@ -6,7 +6,7 @@ import { Run } from "../../run/Run.js";
6
6
  * It automatically splits the text into optimal chunks that are small enough to be processed by the model,
7
7
  * while leaving enough space for the model to generate text.
8
8
  */
9
- export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<PROMPT>({ text, model, prompt, reservedCompletionTokens, join, }: {
9
+ export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<PROMPT>({ text, model, prompt, tokenLimit, join, }: {
10
10
  text: string;
11
11
  model: TextGenerationModel<PROMPT, any, any, TextGenerationModelSettings> & {
12
12
  contextWindowSize: number;
@@ -16,7 +16,7 @@ export declare function summarizeRecursivelyWithTextGenerationAndTokenSplitting<
16
16
  prompt: (input: {
17
17
  text: string;
18
18
  }) => Promise<PROMPT>;
19
- reservedCompletionTokens: number;
19
+ tokenLimit?: number;
20
20
  join?: (texts: Array<string>) => string;
21
21
  }, options?: {
22
22
  functionId?: string;
@@ -6,17 +6,16 @@ import { summarizeRecursively } from "./summarizeRecursively.js";
6
6
  * It automatically splits the text into optimal chunks that are small enough to be processed by the model,
7
7
  * while leaving enough space for the model to generate text.
8
8
  */
9
- export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, reservedCompletionTokens, join, }, options) {
9
+ export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
10
+ (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
10
11
  const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
11
12
  return summarizeRecursively({
12
13
  split: splitRecursivelyAtTokenAsSplitFunction({
13
14
  tokenizer: model.tokenizer,
14
- maxChunkSize: model.contextWindowSize -
15
- reservedCompletionTokens -
16
- emptyPromptTokens,
15
+ maxChunkSize: tokenLimit - emptyPromptTokens,
17
16
  }),
18
17
  summarize: async (input) => {
19
- const { text } = await generateText(model.withMaxCompletionTokens(reservedCompletionTokens), await prompt(input), options);
18
+ const { text } = await generateText(model, await prompt(input), options);
20
19
  return text;
21
20
  },
22
21
  join,
@@ -1,7 +1,7 @@
1
1
  import { FunctionOptions } from "../FunctionOptions.js";
2
+ import { CallMetadata } from "../executeCall.js";
2
3
  import { DeltaEvent } from "./DeltaEvent.js";
3
4
  import { TextGenerationModel, TextGenerationModelSettings } from "./TextGenerationModel.js";
4
- import { CallMetadata } from "model-function/executeCall.js";
5
5
  export declare function streamText<PROMPT, FULL_DELTA, SETTINGS extends TextGenerationModelSettings>(model: TextGenerationModel<PROMPT, unknown, FULL_DELTA, SETTINGS> & {
6
6
  generateDeltaStreamResponse: (prompt: PROMPT, options: FunctionOptions<SETTINGS>) => PromiseLike<AsyncIterable<DeltaEvent<FULL_DELTA>>>;
7
7
  extractTextDelta: (fullDelta: FULL_DELTA) => string | undefined;
@@ -0,0 +1,138 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.HuggingFaceTextEmbeddingModel = void 0;
7
+ const zod_1 = __importDefault(require("zod"));
8
+ const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
9
+ const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
10
+ const postToApi_js_1 = require("../../util/api/postToApi.cjs");
11
+ const HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
12
+ /**
13
+ * Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
14
+ *
15
+ * @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
16
+ *
17
+ * @example
18
+ * const model = new HuggingFaceTextGenerationModel({
19
+ * model: "intfloat/e5-base-v2",
20
+ * maxTexstsPerCall: 5,
21
+ * retry: retryWithExponentialBackoff({ maxTries: 5 }),
22
+ * });
23
+ *
24
+ * const { embeddings } = await embedTexts(
25
+ * model,
26
+ * [
27
+ * "At first, Nox didn't know what to do with the pup.",
28
+ * "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
29
+ * ]
30
+ * );
31
+ */
32
+ class HuggingFaceTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
33
+ constructor(settings) {
34
+ super({ settings });
35
+ Object.defineProperty(this, "provider", {
36
+ enumerable: true,
37
+ configurable: true,
38
+ writable: true,
39
+ value: "huggingface"
40
+ });
41
+ Object.defineProperty(this, "maxTextsPerCall", {
42
+ enumerable: true,
43
+ configurable: true,
44
+ writable: true,
45
+ value: void 0
46
+ });
47
+ Object.defineProperty(this, "contextWindowSize", {
48
+ enumerable: true,
49
+ configurable: true,
50
+ writable: true,
51
+ value: undefined
52
+ });
53
+ Object.defineProperty(this, "embeddingDimensions", {
54
+ enumerable: true,
55
+ configurable: true,
56
+ writable: true,
57
+ value: void 0
58
+ });
59
+ Object.defineProperty(this, "tokenizer", {
60
+ enumerable: true,
61
+ configurable: true,
62
+ writable: true,
63
+ value: undefined
64
+ });
65
+ Object.defineProperty(this, "countPromptTokens", {
66
+ enumerable: true,
67
+ configurable: true,
68
+ writable: true,
69
+ value: undefined
70
+ });
71
+ // There is no limit documented in the HuggingFace API. Use 1024 as a reasonable default.
72
+ this.maxTextsPerCall = settings.maxTextsPerCall ?? 1024;
73
+ this.embeddingDimensions = settings.embeddingDimensions;
74
+ }
75
+ get modelName() {
76
+ return this.settings.model;
77
+ }
78
+ get apiKey() {
79
+ const apiKey = this.settings.apiKey ?? process.env.HUGGINGFACE_API_KEY;
80
+ if (apiKey == null) {
81
+ throw new Error("No Hugging Face API key provided. Pass it in the constructor or set the HUGGINGFACE_API_KEY environment variable.");
82
+ }
83
+ return apiKey;
84
+ }
85
+ async callAPI(texts, options) {
86
+ if (texts.length > this.maxTextsPerCall) {
87
+ throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxTextsPerCall} texts per API call.`);
88
+ }
89
+ const run = options?.run;
90
+ const settings = options?.settings;
91
+ const callSettings = Object.assign({
92
+ apiKey: this.apiKey,
93
+ options: {
94
+ useCache: true,
95
+ waitForModel: true,
96
+ },
97
+ }, this.settings, settings, {
98
+ abortSignal: run?.abortSignal,
99
+ inputs: texts,
100
+ });
101
+ return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
102
+ retry: this.settings.retry,
103
+ throttle: this.settings.throttle,
104
+ call: async () => callHuggingFaceTextGenerationAPI(callSettings),
105
+ });
106
+ }
107
+ generateEmbeddingResponse(texts, options) {
108
+ return this.callAPI(texts, options);
109
+ }
110
+ extractEmbeddings(response) {
111
+ return response;
112
+ }
113
+ withSettings(additionalSettings) {
114
+ return new HuggingFaceTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
115
+ }
116
+ }
117
+ exports.HuggingFaceTextEmbeddingModel = HuggingFaceTextEmbeddingModel;
118
+ const huggingFaceTextEmbeddingResponseSchema = zod_1.default.array(zod_1.default.array(zod_1.default.number()));
119
+ async function callHuggingFaceTextGenerationAPI({ baseUrl = "https://api-inference.huggingface.co/pipeline/feature-extraction", abortSignal, apiKey, model, inputs, options, }) {
120
+ return (0, postToApi_js_1.postJsonToApi)({
121
+ url: `${baseUrl}/${model}`,
122
+ headers: {
123
+ Authorization: `Bearer ${apiKey}`,
124
+ },
125
+ body: {
126
+ inputs,
127
+ options: options
128
+ ? {
129
+ use_cache: options?.useCache,
130
+ wait_for_model: options?.waitForModel,
131
+ }
132
+ : undefined,
133
+ },
134
+ failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
135
+ successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(huggingFaceTextEmbeddingResponseSchema),
136
+ abortSignal,
137
+ });
138
+ }
@@ -0,0 +1,57 @@
1
+ import z from "zod";
2
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
3
+ import { FunctionOptions } from "../../model-function/FunctionOptions.js";
4
+ import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../../model-function/embed-text/TextEmbeddingModel.js";
5
+ import { RetryFunction } from "../../util/api/RetryFunction.js";
6
+ import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
7
+ export interface HuggingFaceTextEmbeddingModelSettings extends TextEmbeddingModelSettings {
8
+ model: string;
9
+ baseUrl?: string;
10
+ apiKey?: string;
11
+ maxTextsPerCall?: number;
12
+ embeddingDimensions?: number;
13
+ retry?: RetryFunction;
14
+ throttle?: ThrottleFunction;
15
+ options?: {
16
+ useCache?: boolean;
17
+ waitForModel?: boolean;
18
+ };
19
+ }
20
+ /**
21
+ * Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
22
+ *
23
+ * @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
24
+ *
25
+ * @example
26
+ * const model = new HuggingFaceTextGenerationModel({
27
+ * model: "intfloat/e5-base-v2",
28
+ * maxTexstsPerCall: 5,
29
+ * retry: retryWithExponentialBackoff({ maxTries: 5 }),
30
+ * });
31
+ *
32
+ * const { embeddings } = await embedTexts(
33
+ * model,
34
+ * [
35
+ * "At first, Nox didn't know what to do with the pup.",
36
+ * "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
37
+ * ]
38
+ * );
39
+ */
40
+ export declare class HuggingFaceTextEmbeddingModel extends AbstractModel<HuggingFaceTextEmbeddingModelSettings> implements TextEmbeddingModel<HuggingFaceTextEmbeddingResponse, HuggingFaceTextEmbeddingModelSettings> {
41
+ constructor(settings: HuggingFaceTextEmbeddingModelSettings);
42
+ readonly provider = "huggingface";
43
+ get modelName(): string;
44
+ readonly maxTextsPerCall: number;
45
+ readonly contextWindowSize: undefined;
46
+ readonly embeddingDimensions: number | undefined;
47
+ readonly tokenizer: undefined;
48
+ private get apiKey();
49
+ callAPI(texts: Array<string>, options?: FunctionOptions<HuggingFaceTextEmbeddingModelSettings>): Promise<HuggingFaceTextEmbeddingResponse>;
50
+ readonly countPromptTokens: undefined;
51
+ generateEmbeddingResponse(texts: string[], options?: FunctionOptions<HuggingFaceTextEmbeddingModelSettings>): Promise<number[][]>;
52
+ extractEmbeddings(response: HuggingFaceTextEmbeddingResponse): number[][];
53
+ withSettings(additionalSettings: Partial<HuggingFaceTextEmbeddingModelSettings>): this;
54
+ }
55
+ declare const huggingFaceTextEmbeddingResponseSchema: z.ZodArray<z.ZodArray<z.ZodNumber, "many">, "many">;
56
+ export type HuggingFaceTextEmbeddingResponse = z.infer<typeof huggingFaceTextEmbeddingResponseSchema>;
57
+ export {};
@@ -0,0 +1,131 @@
1
+ import z from "zod";
2
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
3
+ import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
4
+ import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
5
+ import { failedHuggingFaceCallResponseHandler } from "./HuggingFaceError.js";
6
+ /**
7
+ * Create a text embeddinng model that calls a Hugging Face Inference API Feature Extraction Task.
8
+ *
9
+ * @see https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task
10
+ *
11
+ * @example
12
+ * const model = new HuggingFaceTextGenerationModel({
13
+ * model: "intfloat/e5-base-v2",
14
+ * maxTexstsPerCall: 5,
15
+ * retry: retryWithExponentialBackoff({ maxTries: 5 }),
16
+ * });
17
+ *
18
+ * const { embeddings } = await embedTexts(
19
+ * model,
20
+ * [
21
+ * "At first, Nox didn't know what to do with the pup.",
22
+ * "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
23
+ * ]
24
+ * );
25
+ */
26
+ export class HuggingFaceTextEmbeddingModel extends AbstractModel {
27
+ constructor(settings) {
28
+ super({ settings });
29
+ Object.defineProperty(this, "provider", {
30
+ enumerable: true,
31
+ configurable: true,
32
+ writable: true,
33
+ value: "huggingface"
34
+ });
35
+ Object.defineProperty(this, "maxTextsPerCall", {
36
+ enumerable: true,
37
+ configurable: true,
38
+ writable: true,
39
+ value: void 0
40
+ });
41
+ Object.defineProperty(this, "contextWindowSize", {
42
+ enumerable: true,
43
+ configurable: true,
44
+ writable: true,
45
+ value: undefined
46
+ });
47
+ Object.defineProperty(this, "embeddingDimensions", {
48
+ enumerable: true,
49
+ configurable: true,
50
+ writable: true,
51
+ value: void 0
52
+ });
53
+ Object.defineProperty(this, "tokenizer", {
54
+ enumerable: true,
55
+ configurable: true,
56
+ writable: true,
57
+ value: undefined
58
+ });
59
+ Object.defineProperty(this, "countPromptTokens", {
60
+ enumerable: true,
61
+ configurable: true,
62
+ writable: true,
63
+ value: undefined
64
+ });
65
+ // There is no limit documented in the HuggingFace API. Use 1024 as a reasonable default.
66
+ this.maxTextsPerCall = settings.maxTextsPerCall ?? 1024;
67
+ this.embeddingDimensions = settings.embeddingDimensions;
68
+ }
69
+ get modelName() {
70
+ return this.settings.model;
71
+ }
72
+ get apiKey() {
73
+ const apiKey = this.settings.apiKey ?? process.env.HUGGINGFACE_API_KEY;
74
+ if (apiKey == null) {
75
+ throw new Error("No Hugging Face API key provided. Pass it in the constructor or set the HUGGINGFACE_API_KEY environment variable.");
76
+ }
77
+ return apiKey;
78
+ }
79
+ async callAPI(texts, options) {
80
+ if (texts.length > this.maxTextsPerCall) {
81
+ throw new Error(`The HuggingFace feature extraction API is configured to only support ${this.maxTextsPerCall} texts per API call.`);
82
+ }
83
+ const run = options?.run;
84
+ const settings = options?.settings;
85
+ const callSettings = Object.assign({
86
+ apiKey: this.apiKey,
87
+ options: {
88
+ useCache: true,
89
+ waitForModel: true,
90
+ },
91
+ }, this.settings, settings, {
92
+ abortSignal: run?.abortSignal,
93
+ inputs: texts,
94
+ });
95
+ return callWithRetryAndThrottle({
96
+ retry: this.settings.retry,
97
+ throttle: this.settings.throttle,
98
+ call: async () => callHuggingFaceTextGenerationAPI(callSettings),
99
+ });
100
+ }
101
+ generateEmbeddingResponse(texts, options) {
102
+ return this.callAPI(texts, options);
103
+ }
104
+ extractEmbeddings(response) {
105
+ return response;
106
+ }
107
+ withSettings(additionalSettings) {
108
+ return new HuggingFaceTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
109
+ }
110
+ }
111
+ const huggingFaceTextEmbeddingResponseSchema = z.array(z.array(z.number()));
112
+ async function callHuggingFaceTextGenerationAPI({ baseUrl = "https://api-inference.huggingface.co/pipeline/feature-extraction", abortSignal, apiKey, model, inputs, options, }) {
113
+ return postJsonToApi({
114
+ url: `${baseUrl}/${model}`,
115
+ headers: {
116
+ Authorization: `Bearer ${apiKey}`,
117
+ },
118
+ body: {
119
+ inputs,
120
+ options: options
121
+ ? {
122
+ use_cache: options?.useCache,
123
+ wait_for_model: options?.waitForModel,
124
+ }
125
+ : undefined,
126
+ },
127
+ failedResponseHandler: failedHuggingFaceCallResponseHandler,
128
+ successfulResponseHandler: createJsonResponseHandler(huggingFaceTextEmbeddingResponseSchema),
129
+ abortSignal,
130
+ });
131
+ }
@@ -17,4 +17,5 @@ Object.defineProperty(exports, "__esModule", { value: true });
17
17
  exports.HuggingFaceError = void 0;
18
18
  var HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
19
19
  Object.defineProperty(exports, "HuggingFaceError", { enumerable: true, get: function () { return HuggingFaceError_js_1.HuggingFaceError; } });
20
+ __exportStar(require("./HuggingFaceTextEmbeddingModel.cjs"), exports);
20
21
  __exportStar(require("./HuggingFaceTextGenerationModel.cjs"), exports);
@@ -1,2 +1,3 @@
1
1
  export { HuggingFaceError, HuggingFaceErrorData } from "./HuggingFaceError.js";
2
+ export * from "./HuggingFaceTextEmbeddingModel.js";
2
3
  export * from "./HuggingFaceTextGenerationModel.js";
@@ -1,2 +1,3 @@
1
1
  export { HuggingFaceError } from "./HuggingFaceError.js";
2
+ export * from "./HuggingFaceTextEmbeddingModel.js";
2
3
  export * from "./HuggingFaceTextGenerationModel.js";
@@ -35,7 +35,7 @@ class LlamaCppTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
35
35
  enumerable: true,
36
36
  configurable: true,
37
37
  writable: true,
38
- value: undefined
38
+ value: void 0
39
39
  });
40
40
  Object.defineProperty(this, "tokenizer", {
41
41
  enumerable: true,
@@ -48,6 +48,7 @@ class LlamaCppTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
48
48
  retry: this.settings.tokenizerSettings?.retry,
49
49
  throttle: this.settings.tokenizerSettings?.throttle,
50
50
  });
51
+ this.embeddingDimensions = this.settings.embeddingDimensions;
51
52
  }
52
53
  get modelName() {
53
54
  return null;
@@ -4,8 +4,9 @@ import { FunctionOptions } from "../../model-function/FunctionOptions.js";
4
4
  import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../../model-function/embed-text/TextEmbeddingModel.js";
5
5
  import { RetryFunction } from "../../util/api/RetryFunction.js";
6
6
  import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
7
- export interface LlamaCppEmbeddingModelSettings extends TextEmbeddingModelSettings {
7
+ export interface LlamaCppTextEmbeddingModelSettings extends TextEmbeddingModelSettings {
8
8
  baseUrl?: string;
9
+ embeddingDimensions?: number;
9
10
  retry?: RetryFunction;
10
11
  throttle?: ThrottleFunction;
11
12
  tokenizerSettings?: {
@@ -13,21 +14,21 @@ export interface LlamaCppEmbeddingModelSettings extends TextEmbeddingModelSettin
13
14
  throttle?: ThrottleFunction;
14
15
  };
15
16
  }
16
- export declare class LlamaCppTextEmbeddingModel extends AbstractModel<LlamaCppEmbeddingModelSettings> implements TextEmbeddingModel<LlamaCppTextEmbeddingResponse, LlamaCppEmbeddingModelSettings> {
17
- constructor(settings?: LlamaCppEmbeddingModelSettings);
17
+ export declare class LlamaCppTextEmbeddingModel extends AbstractModel<LlamaCppTextEmbeddingModelSettings> implements TextEmbeddingModel<LlamaCppTextEmbeddingResponse, LlamaCppTextEmbeddingModelSettings> {
18
+ constructor(settings?: LlamaCppTextEmbeddingModelSettings);
18
19
  readonly provider: "llamacpp";
19
20
  get modelName(): null;
20
21
  readonly maxTextsPerCall = 1;
21
22
  readonly contextWindowSize: undefined;
22
- readonly embeddingDimensions: undefined;
23
+ readonly embeddingDimensions: number | undefined;
23
24
  private readonly tokenizer;
24
25
  tokenize(text: string): Promise<number[]>;
25
- callAPI(texts: Array<string>, options?: FunctionOptions<LlamaCppEmbeddingModelSettings>): Promise<LlamaCppTextEmbeddingResponse>;
26
- generateEmbeddingResponse(texts: string[], options?: FunctionOptions<LlamaCppEmbeddingModelSettings>): Promise<{
26
+ callAPI(texts: Array<string>, options?: FunctionOptions<LlamaCppTextEmbeddingModelSettings>): Promise<LlamaCppTextEmbeddingResponse>;
27
+ generateEmbeddingResponse(texts: string[], options?: FunctionOptions<LlamaCppTextEmbeddingModelSettings>): Promise<{
27
28
  embedding: number[];
28
29
  }>;
29
30
  extractEmbeddings(response: LlamaCppTextEmbeddingResponse): number[][];
30
- withSettings(additionalSettings: Partial<LlamaCppEmbeddingModelSettings>): this;
31
+ withSettings(additionalSettings: Partial<LlamaCppTextEmbeddingModelSettings>): this;
31
32
  }
32
33
  declare const llamaCppTextEmbeddingResponseSchema: z.ZodObject<{
33
34
  embedding: z.ZodArray<z.ZodNumber, "many">;
@@ -29,7 +29,7 @@ export class LlamaCppTextEmbeddingModel extends AbstractModel {
29
29
  enumerable: true,
30
30
  configurable: true,
31
31
  writable: true,
32
- value: undefined
32
+ value: void 0
33
33
  });
34
34
  Object.defineProperty(this, "tokenizer", {
35
35
  enumerable: true,
@@ -42,6 +42,7 @@ export class LlamaCppTextEmbeddingModel extends AbstractModel {
42
42
  retry: this.settings.tokenizerSettings?.retry,
43
43
  throttle: this.settings.tokenizerSettings?.throttle,
44
44
  });
45
+ this.embeddingDimensions = this.settings.embeddingDimensions;
45
46
  }
46
47
  get modelName() {
47
48
  return null;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "modelfusion",
3
3
  "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
4
- "version": "0.3.0",
4
+ "version": "0.4.1",
5
5
  "author": "Lars Grammel",
6
6
  "license": "MIT",
7
7
  "keywords": [
@@ -9,8 +9,11 @@ const validateChatPrompt_js_1 = require("./validateChatPrompt.cjs");
9
9
  *
10
10
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
11
11
  * return this minimal chat prompt.
12
+ *
13
+ * @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
12
14
  */
13
- async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize - model.maxCompletionTokens, }) {
15
+ async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
16
+ (model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
14
17
  (0, validateChatPrompt_js_1.validateChatPrompt)(prompt);
15
18
  const startsWithSystemMessage = "system" in prompt[0];
16
19
  const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];
@@ -1,4 +1,4 @@
1
- import { TextGenerationModel } from "model-function/generate-text/TextGenerationModel.js";
1
+ import { TextGenerationModel } from "../../model-function/generate-text/TextGenerationModel.js";
2
2
  import { ChatPrompt } from "./ChatPrompt.js";
3
3
  /**
4
4
  * Keeps only the most recent messages in the prompt, while leaving enough space for the completion.
@@ -7,12 +7,13 @@ import { ChatPrompt } from "./ChatPrompt.js";
7
7
  *
8
8
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
9
9
  * return this minimal chat prompt.
10
+ *
11
+ * @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
10
12
  */
11
13
  export declare function trimChatPrompt({ prompt, model, tokenLimit, }: {
12
14
  prompt: ChatPrompt;
13
15
  model: TextGenerationModel<ChatPrompt, any, any, any> & {
14
16
  contextWindowSize: number;
15
- maxCompletionTokens: number;
16
17
  countPromptTokens: (prompt: ChatPrompt) => PromiseLike<number>;
17
18
  };
18
19
  tokenLimit?: number;
@@ -6,8 +6,11 @@ import { validateChatPrompt } from "./validateChatPrompt.js";
6
6
  *
7
7
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
8
8
  * return this minimal chat prompt.
9
+ *
10
+ * @see https://modelfusion.dev/guide/function/generate-text/prompt-mapping#limiting-the-chat-length
9
11
  */
10
- export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize - model.maxCompletionTokens, }) {
12
+ export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
13
+ (model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
11
14
  validateChatPrompt(prompt);
12
15
  const startsWithSystemMessage = "system" in prompt[0];
13
16
  const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];