modelfusion 0.28.0 → 0.29.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +17 -1
  2. package/core/api/postToApi.d.ts +1 -1
  3. package/model-function/ModelCallEvent.d.ts +3 -2
  4. package/model-function/SuccessfulModelCall.d.ts +1 -1
  5. package/model-function/describe-image/ImageDescriptionEvent.cjs +2 -0
  6. package/model-function/describe-image/ImageDescriptionEvent.d.ts +18 -0
  7. package/model-function/describe-image/ImageDescriptionEvent.js +1 -0
  8. package/model-function/describe-image/ImageDescriptionModel.cjs +2 -0
  9. package/model-function/describe-image/ImageDescriptionModel.d.ts +8 -0
  10. package/model-function/describe-image/ImageDescriptionModel.js +1 -0
  11. package/model-function/describe-image/describeImage.cjs +20 -0
  12. package/model-function/describe-image/describeImage.d.ts +9 -0
  13. package/model-function/describe-image/describeImage.js +16 -0
  14. package/model-function/embed-text/TextEmbeddingModel.d.ts +4 -1
  15. package/model-function/embed-text/embedText.cjs +7 -2
  16. package/model-function/embed-text/embedText.js +7 -2
  17. package/model-function/index.cjs +3 -0
  18. package/model-function/index.d.ts +3 -0
  19. package/model-function/index.js +3 -0
  20. package/model-provider/huggingface/HuggingFaceError.cjs +4 -2
  21. package/model-provider/huggingface/HuggingFaceError.d.ts +3 -3
  22. package/model-provider/huggingface/HuggingFaceError.js +4 -2
  23. package/model-provider/huggingface/HuggingFaceImageDescriptionModel.cjs +81 -0
  24. package/model-provider/huggingface/HuggingFaceImageDescriptionModel.d.ts +37 -0
  25. package/model-provider/huggingface/HuggingFaceImageDescriptionModel.js +74 -0
  26. package/model-provider/huggingface/index.cjs +1 -0
  27. package/model-provider/huggingface/index.d.ts +1 -0
  28. package/model-provider/huggingface/index.js +1 -0
  29. package/model-provider/openai/OpenAITextEmbeddingModel.cjs +6 -8
  30. package/model-provider/openai/OpenAITextEmbeddingModel.d.ts +2 -2
  31. package/model-provider/openai/OpenAITextEmbeddingModel.js +6 -8
  32. package/package.json +1 -1
package/README.md CHANGED
@@ -314,6 +314,21 @@ const speech = await synthesizeSpeech(
314
314
 
315
315
  Providers: [Eleven Labs](https://modelfusion.dev/integration/model-provider/elevenlabs), [LMNT](https://modelfusion.dev/integration/model-provider/lmnt)
316
316
 
317
+ ### [Describe Image](https://modelfusion.dev/guide/function/describe-image)
318
+
319
+ Describe an image as text, e.g. for image captioning or OCR.
320
+
321
+ ```ts
322
+ const text = await describeImage(
323
+ new HuggingFaceImageDescriptionModel({
324
+ model: "nlpconnect/vit-gpt2-image-captioning",
325
+ }),
326
+ data: buffer
327
+ );
328
+ ```
329
+
330
+ Providers: [HuggingFace](/integration/model-provider/huggingface)
331
+
317
332
  ### [Generate Image](https://modelfusion.dev/guide/function/generate-image)
318
333
 
319
334
  Generate a base64-encoded image from a prompt.
@@ -412,7 +427,8 @@ Integrations: [Helicone](https://modelfusion.dev/integration/observability/helic
412
427
  - [Tokenize Text](https://modelfusion.dev/guide/function/tokenize-text)
413
428
  - [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
414
429
  - [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
415
- - [Generate images](https://modelfusion.dev/guide/function/generate-image)
430
+ - [Describe Image](https://modelfusion.dev/guide/function/describe-image)
431
+ - [Generate Image](https://modelfusion.dev/guide/function/generate-image)
416
432
  - [Tools](https://modelfusion.dev/guide/tools)
417
433
  - [Text Chunks](https://modelfusion.dev/guide/text-chunk/)
418
434
  - [Split Text](https://modelfusion.dev/guide/text-chunk/split)
@@ -21,7 +21,7 @@ export declare const postToApi: <T>({ url, headers, body, successfulResponseHand
21
21
  url: string;
22
22
  headers?: Record<string, string> | undefined;
23
23
  body: {
24
- content: string | FormData;
24
+ content: string | FormData | Buffer;
25
25
  values: unknown;
26
26
  };
27
27
  failedResponseHandler: ResponseHandler<Error>;
@@ -1,5 +1,6 @@
1
1
  import { BaseFunctionFinishedEvent, BaseFunctionStartedEvent } from "../core/FunctionEvent.js";
2
2
  import { ModelInformation } from "./ModelInformation.js";
3
+ import { ImageDescriptionFinishedEvent, ImageDescriptionStartedEvent } from "./describe-image/ImageDescriptionEvent.js";
3
4
  import { TextEmbeddingFinishedEvent, TextEmbeddingStartedEvent } from "./embed-text/TextEmbeddingEvent.js";
4
5
  import { ImageGenerationFinishedEvent, ImageGenerationStartedEvent } from "./generate-image/ImageGenerationEvent.js";
5
6
  import { JsonGenerationFinishedEvent, JsonGenerationStartedEvent } from "./generate-json/JsonGenerationEvent.js";
@@ -47,5 +48,5 @@ export interface BaseModelCallFinishedEvent extends BaseFunctionFinishedEvent {
47
48
  */
48
49
  result: BaseModelCallFinishedEventResult;
49
50
  }
50
- export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
51
- export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
51
+ export type ModelCallStartedEvent = ImageDescriptionStartedEvent | ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
52
+ export type ModelCallFinishedEvent = ImageDescriptionFinishedEvent | ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
@@ -1,7 +1,7 @@
1
1
  import { FunctionEvent } from "../core/FunctionEvent.js";
2
2
  import { ModelInformation } from "./ModelInformation.js";
3
3
  export type SuccessfulModelCall = {
4
- type: "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
4
+ type: "image-description" | "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
5
5
  model: ModelInformation;
6
6
  settings: unknown;
7
7
  response: unknown;
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,18 @@
1
+ import { BaseModelCallFinishedEvent, BaseModelCallStartedEvent } from "../ModelCallEvent.js";
2
+ export interface ImageDescriptionStartedEvent extends BaseModelCallStartedEvent {
3
+ functionType: "image-description";
4
+ }
5
+ export type ImageDescriptionFinishedEventResult = {
6
+ status: "success";
7
+ response: unknown;
8
+ output: string;
9
+ } | {
10
+ status: "error";
11
+ error: unknown;
12
+ } | {
13
+ status: "abort";
14
+ };
15
+ export interface ImageDescriptionFinishedEvent extends BaseModelCallFinishedEvent {
16
+ functionType: "image-description";
17
+ result: ImageDescriptionFinishedEventResult;
18
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,8 @@
1
+ import { ModelFunctionOptions } from "../ModelFunctionOptions.js";
2
+ import { Model, ModelSettings } from "../Model.js";
3
+ export interface ImageDescriptionModelSettings extends ModelSettings {
4
+ }
5
+ export interface ImageDescriptionModel<DATA, RESPONSE, SETTINGS extends ImageDescriptionModelSettings> extends Model<SETTINGS> {
6
+ generateImageDescriptionResponse: (data: DATA, options?: ModelFunctionOptions<SETTINGS>) => PromiseLike<RESPONSE>;
7
+ extractImageDescription: (response: RESPONSE) => string;
8
+ }
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.describeImage = void 0;
4
+ const executeCall_js_1 = require("../executeCall.cjs");
5
+ /**
6
+ * Describe an image as text.
7
+ *
8
+ * Depending on the model, this can be used for image captioning, for describing the contents of an image, or for OCR.
9
+ */
10
+ function describeImage(model, data, options) {
11
+ return (0, executeCall_js_1.executeCall)({
12
+ functionType: "image-description",
13
+ input: data,
14
+ model,
15
+ options,
16
+ generateResponse: (options) => model.generateImageDescriptionResponse(data, options),
17
+ extractOutputValue: model.extractImageDescription,
18
+ });
19
+ }
20
+ exports.describeImage = describeImage;
@@ -0,0 +1,9 @@
1
+ import { ModelFunctionOptions } from "../ModelFunctionOptions.js";
2
+ import { ModelFunctionPromise } from "../executeCall.js";
3
+ import { ImageDescriptionModel, ImageDescriptionModelSettings } from "./ImageDescriptionModel.js";
4
+ /**
5
+ * Describe an image as text.
6
+ *
7
+ * Depending on the model, this can be used for image captioning, for describing the contents of an image, or for OCR.
8
+ */
9
+ export declare function describeImage<DATA, RESPONSE, SETTINGS extends ImageDescriptionModelSettings>(model: ImageDescriptionModel<DATA, RESPONSE, SETTINGS>, data: DATA, options?: ModelFunctionOptions<SETTINGS>): ModelFunctionPromise<string, RESPONSE>;
@@ -0,0 +1,16 @@
1
+ import { executeCall } from "../executeCall.js";
2
+ /**
3
+ * Describe an image as text.
4
+ *
5
+ * Depending on the model, this can be used for image captioning, for describing the contents of an image, or for OCR.
6
+ */
7
+ export function describeImage(model, data, options) {
8
+ return executeCall({
9
+ functionType: "image-description",
10
+ input: data,
11
+ model,
12
+ options,
13
+ generateResponse: (options) => model.generateImageDescriptionResponse(data, options),
14
+ extractOutputValue: model.extractImageDescription,
15
+ });
16
+ }
@@ -12,7 +12,10 @@ export interface TextEmbeddingModel<RESPONSE, SETTINGS extends TextEmbeddingMode
12
12
  * The size of the embedding vector.
13
13
  */
14
14
  readonly embeddingDimensions: number | undefined;
15
- readonly maxTextsPerCall: number;
15
+ /**
16
+ * Limit of how many texts can be sent in a single API call.
17
+ */
18
+ readonly maxTextsPerCall: number | undefined;
16
19
  generateEmbeddingResponse(texts: string[], options?: ModelFunctionOptions<SETTINGS>): PromiseLike<RESPONSE>;
17
20
  extractEmbeddings(response: RESPONSE): Vector[];
18
21
  }
@@ -24,8 +24,13 @@ function embedTexts(model, texts, options) {
24
24
  // split the texts into groups that are small enough to be sent in one call:
25
25
  const maxTextsPerCall = model.maxTextsPerCall;
26
26
  const textGroups = [];
27
- for (let i = 0; i < texts.length; i += maxTextsPerCall) {
28
- textGroups.push(texts.slice(i, i + maxTextsPerCall));
27
+ if (maxTextsPerCall == null) {
28
+ textGroups.push(texts);
29
+ }
30
+ else {
31
+ for (let i = 0; i < texts.length; i += maxTextsPerCall) {
32
+ textGroups.push(texts.slice(i, i + maxTextsPerCall));
33
+ }
29
34
  }
30
35
  return Promise.all(textGroups.map((textGroup) => model.generateEmbeddingResponse(textGroup, options)));
31
36
  },
@@ -21,8 +21,13 @@ export function embedTexts(model, texts, options) {
21
21
  // split the texts into groups that are small enough to be sent in one call:
22
22
  const maxTextsPerCall = model.maxTextsPerCall;
23
23
  const textGroups = [];
24
- for (let i = 0; i < texts.length; i += maxTextsPerCall) {
25
- textGroups.push(texts.slice(i, i + maxTextsPerCall));
24
+ if (maxTextsPerCall == null) {
25
+ textGroups.push(texts);
26
+ }
27
+ else {
28
+ for (let i = 0; i < texts.length; i += maxTextsPerCall) {
29
+ textGroups.push(texts.slice(i, i + maxTextsPerCall));
30
+ }
26
31
  }
27
32
  return Promise.all(textGroups.map((textGroup) => model.generateEmbeddingResponse(textGroup, options)));
28
33
  },
@@ -19,6 +19,9 @@ __exportStar(require("./ModelCallEvent.cjs"), exports);
19
19
  __exportStar(require("./ModelFunctionOptions.cjs"), exports);
20
20
  __exportStar(require("./ModelInformation.cjs"), exports);
21
21
  __exportStar(require("./SuccessfulModelCall.cjs"), exports);
22
+ __exportStar(require("./describe-image/ImageDescriptionEvent.cjs"), exports);
23
+ __exportStar(require("./describe-image/ImageDescriptionModel.cjs"), exports);
24
+ __exportStar(require("./describe-image/describeImage.cjs"), exports);
22
25
  __exportStar(require("./embed-text/TextEmbeddingEvent.cjs"), exports);
23
26
  __exportStar(require("./embed-text/TextEmbeddingModel.cjs"), exports);
24
27
  __exportStar(require("./embed-text/embedText.cjs"), exports);
@@ -3,6 +3,9 @@ export * from "./ModelCallEvent.js";
3
3
  export * from "./ModelFunctionOptions.js";
4
4
  export * from "./ModelInformation.js";
5
5
  export * from "./SuccessfulModelCall.js";
6
+ export * from "./describe-image/ImageDescriptionEvent.js";
7
+ export * from "./describe-image/ImageDescriptionModel.js";
8
+ export * from "./describe-image/describeImage.js";
6
9
  export * from "./embed-text/TextEmbeddingEvent.js";
7
10
  export * from "./embed-text/TextEmbeddingModel.js";
8
11
  export * from "./embed-text/embedText.js";
@@ -3,6 +3,9 @@ export * from "./ModelCallEvent.js";
3
3
  export * from "./ModelFunctionOptions.js";
4
4
  export * from "./ModelInformation.js";
5
5
  export * from "./SuccessfulModelCall.js";
6
+ export * from "./describe-image/ImageDescriptionEvent.js";
7
+ export * from "./describe-image/ImageDescriptionModel.js";
8
+ export * from "./describe-image/describeImage.js";
6
9
  export * from "./embed-text/TextEmbeddingEvent.js";
7
10
  export * from "./embed-text/TextEmbeddingModel.js";
8
11
  export * from "./embed-text/embedText.js";
@@ -8,10 +8,12 @@ const secure_json_parse_1 = __importDefault(require("secure-json-parse"));
8
8
  const zod_1 = require("zod");
9
9
  const ApiCallError_js_1 = require("../../core/api/ApiCallError.cjs");
10
10
  exports.huggingFaceErrorDataSchema = zod_1.z.object({
11
- error: zod_1.z.array(zod_1.z.string()),
11
+ error: zod_1.z.array(zod_1.z.string()).or(zod_1.z.string()),
12
12
  });
13
13
  class HuggingFaceError extends ApiCallError_js_1.ApiCallError {
14
- constructor({ data, statusCode, url, requestBodyValues, message = data.error.join("\n\n"), }) {
14
+ constructor({ data, statusCode, url, requestBodyValues, message = typeof data.error === "string"
15
+ ? data.error
16
+ : data.error.join("\n\n"), }) {
15
17
  super({ message, statusCode, requestBodyValues, url });
16
18
  Object.defineProperty(this, "data", {
17
19
  enumerable: true,
@@ -2,11 +2,11 @@ import { z } from "zod";
2
2
  import { ApiCallError } from "../../core/api/ApiCallError.js";
3
3
  import { ResponseHandler } from "../../core/api/postToApi.js";
4
4
  export declare const huggingFaceErrorDataSchema: z.ZodObject<{
5
- error: z.ZodArray<z.ZodString, "many">;
5
+ error: z.ZodUnion<[z.ZodArray<z.ZodString, "many">, z.ZodString]>;
6
6
  }, "strip", z.ZodTypeAny, {
7
- error: string[];
7
+ error: (string | string[]) & (string | string[] | undefined);
8
8
  }, {
9
- error: string[];
9
+ error: (string | string[]) & (string | string[] | undefined);
10
10
  }>;
11
11
  export type HuggingFaceErrorData = z.infer<typeof huggingFaceErrorDataSchema>;
12
12
  export declare class HuggingFaceError extends ApiCallError {
@@ -2,10 +2,12 @@ import SecureJSON from "secure-json-parse";
2
2
  import { z } from "zod";
3
3
  import { ApiCallError } from "../../core/api/ApiCallError.js";
4
4
  export const huggingFaceErrorDataSchema = z.object({
5
- error: z.array(z.string()),
5
+ error: z.array(z.string()).or(z.string()),
6
6
  });
7
7
  export class HuggingFaceError extends ApiCallError {
8
- constructor({ data, statusCode, url, requestBodyValues, message = data.error.join("\n\n"), }) {
8
+ constructor({ data, statusCode, url, requestBodyValues, message = typeof data.error === "string"
9
+ ? data.error
10
+ : data.error.join("\n\n"), }) {
9
11
  super({ message, statusCode, requestBodyValues, url });
10
12
  Object.defineProperty(this, "data", {
11
13
  enumerable: true,
@@ -0,0 +1,81 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.HuggingFaceImageDescriptionModel = void 0;
7
+ const zod_1 = __importDefault(require("zod"));
8
+ const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
9
+ const postToApi_js_1 = require("../../core/api/postToApi.cjs");
10
+ const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
11
+ const HuggingFaceApiConfiguration_js_1 = require("./HuggingFaceApiConfiguration.cjs");
12
+ const HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
13
+ /**
14
+ * Create an image to text model that calls a Hugging Face Image-to-Text Inference API.
15
+ *
16
+ * @see https://huggingface.co/tasks/image-to-text
17
+ */
18
+ class HuggingFaceImageDescriptionModel extends AbstractModel_js_1.AbstractModel {
19
+ constructor(settings) {
20
+ super({ settings });
21
+ Object.defineProperty(this, "provider", {
22
+ enumerable: true,
23
+ configurable: true,
24
+ writable: true,
25
+ value: "huggingface"
26
+ });
27
+ Object.defineProperty(this, "countPromptTokens", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: undefined
32
+ });
33
+ }
34
+ get modelName() {
35
+ return this.settings.model;
36
+ }
37
+ async callAPI(data, options) {
38
+ const run = options?.run;
39
+ const settings = options?.settings;
40
+ const callSettings = {
41
+ ...this.settings,
42
+ ...settings,
43
+ abortSignal: run?.abortSignal,
44
+ data,
45
+ };
46
+ return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
47
+ retry: callSettings.api?.retry,
48
+ throttle: callSettings.api?.throttle,
49
+ call: async () => callHuggingFaceImageDescriptionAPI(callSettings),
50
+ });
51
+ }
52
+ get settingsForEvent() {
53
+ return {};
54
+ }
55
+ generateImageDescriptionResponse(data, options) {
56
+ return this.callAPI(data, options);
57
+ }
58
+ extractImageDescription(response) {
59
+ return response[0].generated_text;
60
+ }
61
+ withSettings(additionalSettings) {
62
+ return new HuggingFaceImageDescriptionModel(Object.assign({}, this.settings, additionalSettings));
63
+ }
64
+ }
65
+ exports.HuggingFaceImageDescriptionModel = HuggingFaceImageDescriptionModel;
66
+ const huggingFaceImageDescriptionResponseSchema = zod_1.default.array(zod_1.default.object({
67
+ generated_text: zod_1.default.string(),
68
+ }));
69
+ async function callHuggingFaceImageDescriptionAPI({ api = new HuggingFaceApiConfiguration_js_1.HuggingFaceApiConfiguration(), abortSignal, model, data, }) {
70
+ return (0, postToApi_js_1.postToApi)({
71
+ url: api.assembleUrl(`/${model}`),
72
+ headers: api.headers,
73
+ body: {
74
+ content: data,
75
+ values: {},
76
+ },
77
+ failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
78
+ successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(huggingFaceImageDescriptionResponseSchema),
79
+ abortSignal,
80
+ });
81
+ }
@@ -0,0 +1,37 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import z from "zod";
3
+ import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
4
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
5
+ import { ModelFunctionOptions } from "../../model-function/ModelFunctionOptions.js";
6
+ import { ImageDescriptionModel, ImageDescriptionModelSettings } from "../../model-function/describe-image/ImageDescriptionModel.js";
7
+ export interface HuggingFaceImageDescriptionModelSettings extends ImageDescriptionModelSettings {
8
+ api?: ApiConfiguration;
9
+ model: string;
10
+ }
11
+ /**
12
+ * Create an image to text model that calls a Hugging Face Image-to-Text Inference API.
13
+ *
14
+ * @see https://huggingface.co/tasks/image-to-text
15
+ */
16
+ export declare class HuggingFaceImageDescriptionModel extends AbstractModel<HuggingFaceImageDescriptionModelSettings> implements ImageDescriptionModel<Buffer, HuggingFaceImageDescriptionResponse, HuggingFaceImageDescriptionModelSettings> {
17
+ constructor(settings: HuggingFaceImageDescriptionModelSettings);
18
+ readonly provider = "huggingface";
19
+ get modelName(): string;
20
+ callAPI(data: Buffer, options?: ModelFunctionOptions<HuggingFaceImageDescriptionModelSettings>): Promise<HuggingFaceImageDescriptionResponse>;
21
+ get settingsForEvent(): Partial<HuggingFaceImageDescriptionModelSettings>;
22
+ readonly countPromptTokens: undefined;
23
+ generateImageDescriptionResponse(data: Buffer, options?: ModelFunctionOptions<HuggingFaceImageDescriptionModelSettings>): Promise<{
24
+ generated_text: string;
25
+ }[]>;
26
+ extractImageDescription(response: HuggingFaceImageDescriptionResponse): string;
27
+ withSettings(additionalSettings: Partial<HuggingFaceImageDescriptionModelSettings>): this;
28
+ }
29
+ declare const huggingFaceImageDescriptionResponseSchema: z.ZodArray<z.ZodObject<{
30
+ generated_text: z.ZodString;
31
+ }, "strip", z.ZodTypeAny, {
32
+ generated_text: string;
33
+ }, {
34
+ generated_text: string;
35
+ }>, "many">;
36
+ export type HuggingFaceImageDescriptionResponse = z.infer<typeof huggingFaceImageDescriptionResponseSchema>;
37
+ export {};
@@ -0,0 +1,74 @@
1
+ import z from "zod";
2
+ import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
3
+ import { createJsonResponseHandler, postToApi, } from "../../core/api/postToApi.js";
4
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
5
+ import { HuggingFaceApiConfiguration } from "./HuggingFaceApiConfiguration.js";
6
+ import { failedHuggingFaceCallResponseHandler } from "./HuggingFaceError.js";
7
+ /**
8
+ * Create an image to text model that calls a Hugging Face Image-to-Text Inference API.
9
+ *
10
+ * @see https://huggingface.co/tasks/image-to-text
11
+ */
12
+ export class HuggingFaceImageDescriptionModel extends AbstractModel {
13
+ constructor(settings) {
14
+ super({ settings });
15
+ Object.defineProperty(this, "provider", {
16
+ enumerable: true,
17
+ configurable: true,
18
+ writable: true,
19
+ value: "huggingface"
20
+ });
21
+ Object.defineProperty(this, "countPromptTokens", {
22
+ enumerable: true,
23
+ configurable: true,
24
+ writable: true,
25
+ value: undefined
26
+ });
27
+ }
28
+ get modelName() {
29
+ return this.settings.model;
30
+ }
31
+ async callAPI(data, options) {
32
+ const run = options?.run;
33
+ const settings = options?.settings;
34
+ const callSettings = {
35
+ ...this.settings,
36
+ ...settings,
37
+ abortSignal: run?.abortSignal,
38
+ data,
39
+ };
40
+ return callWithRetryAndThrottle({
41
+ retry: callSettings.api?.retry,
42
+ throttle: callSettings.api?.throttle,
43
+ call: async () => callHuggingFaceImageDescriptionAPI(callSettings),
44
+ });
45
+ }
46
+ get settingsForEvent() {
47
+ return {};
48
+ }
49
+ generateImageDescriptionResponse(data, options) {
50
+ return this.callAPI(data, options);
51
+ }
52
+ extractImageDescription(response) {
53
+ return response[0].generated_text;
54
+ }
55
+ withSettings(additionalSettings) {
56
+ return new HuggingFaceImageDescriptionModel(Object.assign({}, this.settings, additionalSettings));
57
+ }
58
+ }
59
+ const huggingFaceImageDescriptionResponseSchema = z.array(z.object({
60
+ generated_text: z.string(),
61
+ }));
62
+ async function callHuggingFaceImageDescriptionAPI({ api = new HuggingFaceApiConfiguration(), abortSignal, model, data, }) {
63
+ return postToApi({
64
+ url: api.assembleUrl(`/${model}`),
65
+ headers: api.headers,
66
+ body: {
67
+ content: data,
68
+ values: {},
69
+ },
70
+ failedResponseHandler: failedHuggingFaceCallResponseHandler,
71
+ successfulResponseHandler: createJsonResponseHandler(huggingFaceImageDescriptionResponseSchema),
72
+ abortSignal,
73
+ });
74
+ }
@@ -18,5 +18,6 @@ exports.HuggingFaceError = void 0;
18
18
  __exportStar(require("./HuggingFaceApiConfiguration.cjs"), exports);
19
19
  var HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
20
20
  Object.defineProperty(exports, "HuggingFaceError", { enumerable: true, get: function () { return HuggingFaceError_js_1.HuggingFaceError; } });
21
+ __exportStar(require("./HuggingFaceImageDescriptionModel.cjs"), exports);
21
22
  __exportStar(require("./HuggingFaceTextEmbeddingModel.cjs"), exports);
22
23
  __exportStar(require("./HuggingFaceTextGenerationModel.cjs"), exports);
@@ -1,4 +1,5 @@
1
1
  export * from "./HuggingFaceApiConfiguration.js";
2
2
  export { HuggingFaceError, HuggingFaceErrorData } from "./HuggingFaceError.js";
3
+ export * from "./HuggingFaceImageDescriptionModel.js";
3
4
  export * from "./HuggingFaceTextEmbeddingModel.js";
4
5
  export * from "./HuggingFaceTextGenerationModel.js";
@@ -1,4 +1,5 @@
1
1
  export * from "./HuggingFaceApiConfiguration.js";
2
2
  export { HuggingFaceError } from "./HuggingFaceError.js";
3
+ export * from "./HuggingFaceImageDescriptionModel.js";
3
4
  export * from "./HuggingFaceTextEmbeddingModel.js";
4
5
  export * from "./HuggingFaceTextGenerationModel.js";
@@ -58,7 +58,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
58
58
  enumerable: true,
59
59
  configurable: true,
60
60
  writable: true,
61
- value: 1
61
+ value: 2048
62
62
  });
63
63
  Object.defineProperty(this, "embeddingDimensions", {
64
64
  enumerable: true,
@@ -90,7 +90,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
90
90
  async countTokens(input) {
91
91
  return (0, countTokens_js_1.countTokens)(this.tokenizer, input);
92
92
  }
93
- async callAPI(text, options) {
93
+ async callAPI(texts, options) {
94
94
  const run = options?.run;
95
95
  const settings = options?.settings;
96
96
  const combinedSettings = {
@@ -103,7 +103,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
103
103
  ...combinedSettings,
104
104
  // other settings:
105
105
  abortSignal: run?.abortSignal,
106
- input: text,
106
+ input: texts,
107
107
  };
108
108
  return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
109
109
  retry: callSettings.api?.retry,
@@ -118,7 +118,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
118
118
  if (texts.length > this.maxTextsPerCall) {
119
119
  throw new Error(`The OpenAI embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
120
120
  }
121
- return this.callAPI(texts[0], options);
121
+ return this.callAPI(texts, options);
122
122
  }
123
123
  extractEmbeddings(response) {
124
124
  return [response.data[0].embedding];
@@ -130,13 +130,11 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
130
130
  exports.OpenAITextEmbeddingModel = OpenAITextEmbeddingModel;
131
131
  const openAITextEmbeddingResponseSchema = zod_1.default.object({
132
132
  object: zod_1.default.literal("list"),
133
- data: zod_1.default
134
- .array(zod_1.default.object({
133
+ data: zod_1.default.array(zod_1.default.object({
135
134
  object: zod_1.default.literal("embedding"),
136
135
  embedding: zod_1.default.array(zod_1.default.number()),
137
136
  index: zod_1.default.number(),
138
- }))
139
- .length(1),
137
+ })),
140
138
  model: zod_1.default.string(),
141
139
  usage: zod_1.default.object({
142
140
  prompt_tokens: zod_1.default.number(),
@@ -40,12 +40,12 @@ export declare class OpenAITextEmbeddingModel extends AbstractModel<OpenAITextEm
40
40
  constructor(settings: OpenAITextEmbeddingModelSettings);
41
41
  readonly provider: "openai";
42
42
  get modelName(): "text-embedding-ada-002";
43
- readonly maxTextsPerCall = 1;
43
+ readonly maxTextsPerCall = 2048;
44
44
  readonly embeddingDimensions: number;
45
45
  readonly tokenizer: TikTokenTokenizer;
46
46
  readonly contextWindowSize: number;
47
47
  countTokens(input: string): Promise<number>;
48
- callAPI(text: string, options?: ModelFunctionOptions<OpenAITextEmbeddingModelSettings>): Promise<OpenAITextEmbeddingResponse>;
48
+ callAPI(texts: Array<string>, options?: ModelFunctionOptions<OpenAITextEmbeddingModelSettings>): Promise<OpenAITextEmbeddingResponse>;
49
49
  get settingsForEvent(): Partial<OpenAITextEmbeddingModelSettings>;
50
50
  generateEmbeddingResponse(texts: string[], options?: ModelFunctionOptions<OpenAITextEmbeddingModelSettings>): Promise<{
51
51
  object: "list";
@@ -50,7 +50,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
50
50
  enumerable: true,
51
51
  configurable: true,
52
52
  writable: true,
53
- value: 1
53
+ value: 2048
54
54
  });
55
55
  Object.defineProperty(this, "embeddingDimensions", {
56
56
  enumerable: true,
@@ -82,7 +82,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
82
82
  async countTokens(input) {
83
83
  return countTokens(this.tokenizer, input);
84
84
  }
85
- async callAPI(text, options) {
85
+ async callAPI(texts, options) {
86
86
  const run = options?.run;
87
87
  const settings = options?.settings;
88
88
  const combinedSettings = {
@@ -95,7 +95,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
95
95
  ...combinedSettings,
96
96
  // other settings:
97
97
  abortSignal: run?.abortSignal,
98
- input: text,
98
+ input: texts,
99
99
  };
100
100
  return callWithRetryAndThrottle({
101
101
  retry: callSettings.api?.retry,
@@ -110,7 +110,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
110
110
  if (texts.length > this.maxTextsPerCall) {
111
111
  throw new Error(`The OpenAI embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
112
112
  }
113
- return this.callAPI(texts[0], options);
113
+ return this.callAPI(texts, options);
114
114
  }
115
115
  extractEmbeddings(response) {
116
116
  return [response.data[0].embedding];
@@ -121,13 +121,11 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
121
121
  }
122
122
  const openAITextEmbeddingResponseSchema = z.object({
123
123
  object: z.literal("list"),
124
- data: z
125
- .array(z.object({
124
+ data: z.array(z.object({
126
125
  object: z.literal("embedding"),
127
126
  embedding: z.array(z.number()),
128
127
  index: z.number(),
129
- }))
130
- .length(1),
128
+ })),
131
129
  model: z.string(),
132
130
  usage: z.object({
133
131
  prompt_tokens: z.number(),
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "modelfusion",
3
3
  "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
4
- "version": "0.28.0",
4
+ "version": "0.29.1",
5
5
  "author": "Lars Grammel",
6
6
  "license": "MIT",
7
7
  "keywords": [