modelfusion 0.28.0 → 0.29.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/core/api/postToApi.d.ts +1 -1
- package/model-function/ModelCallEvent.d.ts +3 -2
- package/model-function/SuccessfulModelCall.d.ts +1 -1
- package/model-function/describe-image/ImageDescriptionEvent.cjs +2 -0
- package/model-function/describe-image/ImageDescriptionEvent.d.ts +18 -0
- package/model-function/describe-image/ImageDescriptionEvent.js +1 -0
- package/model-function/describe-image/ImageDescriptionModel.cjs +2 -0
- package/model-function/describe-image/ImageDescriptionModel.d.ts +8 -0
- package/model-function/describe-image/ImageDescriptionModel.js +1 -0
- package/model-function/describe-image/describeImage.cjs +20 -0
- package/model-function/describe-image/describeImage.d.ts +9 -0
- package/model-function/describe-image/describeImage.js +16 -0
- package/model-function/embed-text/TextEmbeddingModel.d.ts +4 -1
- package/model-function/embed-text/embedText.cjs +7 -2
- package/model-function/embed-text/embedText.js +7 -2
- package/model-function/index.cjs +3 -0
- package/model-function/index.d.ts +3 -0
- package/model-function/index.js +3 -0
- package/model-provider/huggingface/HuggingFaceError.cjs +4 -2
- package/model-provider/huggingface/HuggingFaceError.d.ts +3 -3
- package/model-provider/huggingface/HuggingFaceError.js +4 -2
- package/model-provider/huggingface/HuggingFaceImageDescriptionModel.cjs +81 -0
- package/model-provider/huggingface/HuggingFaceImageDescriptionModel.d.ts +37 -0
- package/model-provider/huggingface/HuggingFaceImageDescriptionModel.js +74 -0
- package/model-provider/huggingface/index.cjs +1 -0
- package/model-provider/huggingface/index.d.ts +1 -0
- package/model-provider/huggingface/index.js +1 -0
- package/model-provider/openai/OpenAITextEmbeddingModel.cjs +6 -8
- package/model-provider/openai/OpenAITextEmbeddingModel.d.ts +2 -2
- package/model-provider/openai/OpenAITextEmbeddingModel.js +6 -8
- package/package.json +1 -1
package/README.md
CHANGED
@@ -314,6 +314,21 @@ const speech = await synthesizeSpeech(
|
|
314
314
|
|
315
315
|
Providers: [Eleven Labs](https://modelfusion.dev/integration/model-provider/elevenlabs), [LMNT](https://modelfusion.dev/integration/model-provider/lmnt)
|
316
316
|
|
317
|
+
### [Describe Image](https://modelfusion.dev/guide/function/describe-image)
|
318
|
+
|
319
|
+
Describe an image as text, e.g. for image captioning or OCR.
|
320
|
+
|
321
|
+
```ts
|
322
|
+
const text = await describeImage(
|
323
|
+
new HuggingFaceImageDescriptionModel({
|
324
|
+
model: "nlpconnect/vit-gpt2-image-captioning",
|
325
|
+
}),
|
326
|
+
data: buffer
|
327
|
+
);
|
328
|
+
```
|
329
|
+
|
330
|
+
Providers: [HuggingFace](/integration/model-provider/huggingface)
|
331
|
+
|
317
332
|
### [Generate Image](https://modelfusion.dev/guide/function/generate-image)
|
318
333
|
|
319
334
|
Generate a base64-encoded image from a prompt.
|
@@ -412,7 +427,8 @@ Integrations: [Helicone](https://modelfusion.dev/integration/observability/helic
|
|
412
427
|
- [Tokenize Text](https://modelfusion.dev/guide/function/tokenize-text)
|
413
428
|
- [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
|
414
429
|
- [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
|
415
|
-
- [
|
430
|
+
- [Describe Image](https://modelfusion.dev/guide/function/describe-image)
|
431
|
+
- [Generate Image](https://modelfusion.dev/guide/function/generate-image)
|
416
432
|
- [Tools](https://modelfusion.dev/guide/tools)
|
417
433
|
- [Text Chunks](https://modelfusion.dev/guide/text-chunk/)
|
418
434
|
- [Split Text](https://modelfusion.dev/guide/text-chunk/split)
|
package/core/api/postToApi.d.ts
CHANGED
@@ -21,7 +21,7 @@ export declare const postToApi: <T>({ url, headers, body, successfulResponseHand
|
|
21
21
|
url: string;
|
22
22
|
headers?: Record<string, string> | undefined;
|
23
23
|
body: {
|
24
|
-
content: string | FormData;
|
24
|
+
content: string | FormData | Buffer;
|
25
25
|
values: unknown;
|
26
26
|
};
|
27
27
|
failedResponseHandler: ResponseHandler<Error>;
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import { BaseFunctionFinishedEvent, BaseFunctionStartedEvent } from "../core/FunctionEvent.js";
|
2
2
|
import { ModelInformation } from "./ModelInformation.js";
|
3
|
+
import { ImageDescriptionFinishedEvent, ImageDescriptionStartedEvent } from "./describe-image/ImageDescriptionEvent.js";
|
3
4
|
import { TextEmbeddingFinishedEvent, TextEmbeddingStartedEvent } from "./embed-text/TextEmbeddingEvent.js";
|
4
5
|
import { ImageGenerationFinishedEvent, ImageGenerationStartedEvent } from "./generate-image/ImageGenerationEvent.js";
|
5
6
|
import { JsonGenerationFinishedEvent, JsonGenerationStartedEvent } from "./generate-json/JsonGenerationEvent.js";
|
@@ -47,5 +48,5 @@ export interface BaseModelCallFinishedEvent extends BaseFunctionFinishedEvent {
|
|
47
48
|
*/
|
48
49
|
result: BaseModelCallFinishedEventResult;
|
49
50
|
}
|
50
|
-
export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
|
51
|
-
export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
|
51
|
+
export type ModelCallStartedEvent = ImageDescriptionStartedEvent | ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
|
52
|
+
export type ModelCallFinishedEvent = ImageDescriptionFinishedEvent | ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { FunctionEvent } from "../core/FunctionEvent.js";
|
2
2
|
import { ModelInformation } from "./ModelInformation.js";
|
3
3
|
export type SuccessfulModelCall = {
|
4
|
-
type: "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
|
4
|
+
type: "image-description" | "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
|
5
5
|
model: ModelInformation;
|
6
6
|
settings: unknown;
|
7
7
|
response: unknown;
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import { BaseModelCallFinishedEvent, BaseModelCallStartedEvent } from "../ModelCallEvent.js";
|
2
|
+
export interface ImageDescriptionStartedEvent extends BaseModelCallStartedEvent {
|
3
|
+
functionType: "image-description";
|
4
|
+
}
|
5
|
+
export type ImageDescriptionFinishedEventResult = {
|
6
|
+
status: "success";
|
7
|
+
response: unknown;
|
8
|
+
output: string;
|
9
|
+
} | {
|
10
|
+
status: "error";
|
11
|
+
error: unknown;
|
12
|
+
} | {
|
13
|
+
status: "abort";
|
14
|
+
};
|
15
|
+
export interface ImageDescriptionFinishedEvent extends BaseModelCallFinishedEvent {
|
16
|
+
functionType: "image-description";
|
17
|
+
result: ImageDescriptionFinishedEventResult;
|
18
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|
@@ -0,0 +1,8 @@
|
|
1
|
+
import { ModelFunctionOptions } from "../ModelFunctionOptions.js";
|
2
|
+
import { Model, ModelSettings } from "../Model.js";
|
3
|
+
export interface ImageDescriptionModelSettings extends ModelSettings {
|
4
|
+
}
|
5
|
+
export interface ImageDescriptionModel<DATA, RESPONSE, SETTINGS extends ImageDescriptionModelSettings> extends Model<SETTINGS> {
|
6
|
+
generateImageDescriptionResponse: (data: DATA, options?: ModelFunctionOptions<SETTINGS>) => PromiseLike<RESPONSE>;
|
7
|
+
extractImageDescription: (response: RESPONSE) => string;
|
8
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|
@@ -0,0 +1,20 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.describeImage = void 0;
|
4
|
+
const executeCall_js_1 = require("../executeCall.cjs");
|
5
|
+
/**
|
6
|
+
* Describe an image as text.
|
7
|
+
*
|
8
|
+
* Depending on the model, this can be used for image captioning, for describing the contents of an image, or for OCR.
|
9
|
+
*/
|
10
|
+
function describeImage(model, data, options) {
|
11
|
+
return (0, executeCall_js_1.executeCall)({
|
12
|
+
functionType: "image-description",
|
13
|
+
input: data,
|
14
|
+
model,
|
15
|
+
options,
|
16
|
+
generateResponse: (options) => model.generateImageDescriptionResponse(data, options),
|
17
|
+
extractOutputValue: model.extractImageDescription,
|
18
|
+
});
|
19
|
+
}
|
20
|
+
exports.describeImage = describeImage;
|
@@ -0,0 +1,9 @@
|
|
1
|
+
import { ModelFunctionOptions } from "../ModelFunctionOptions.js";
|
2
|
+
import { ModelFunctionPromise } from "../executeCall.js";
|
3
|
+
import { ImageDescriptionModel, ImageDescriptionModelSettings } from "./ImageDescriptionModel.js";
|
4
|
+
/**
|
5
|
+
* Describe an image as text.
|
6
|
+
*
|
7
|
+
* Depending on the model, this can be used for image captioning, for describing the contents of an image, or for OCR.
|
8
|
+
*/
|
9
|
+
export declare function describeImage<DATA, RESPONSE, SETTINGS extends ImageDescriptionModelSettings>(model: ImageDescriptionModel<DATA, RESPONSE, SETTINGS>, data: DATA, options?: ModelFunctionOptions<SETTINGS>): ModelFunctionPromise<string, RESPONSE>;
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import { executeCall } from "../executeCall.js";
|
2
|
+
/**
|
3
|
+
* Describe an image as text.
|
4
|
+
*
|
5
|
+
* Depending on the model, this can be used for image captioning, for describing the contents of an image, or for OCR.
|
6
|
+
*/
|
7
|
+
export function describeImage(model, data, options) {
|
8
|
+
return executeCall({
|
9
|
+
functionType: "image-description",
|
10
|
+
input: data,
|
11
|
+
model,
|
12
|
+
options,
|
13
|
+
generateResponse: (options) => model.generateImageDescriptionResponse(data, options),
|
14
|
+
extractOutputValue: model.extractImageDescription,
|
15
|
+
});
|
16
|
+
}
|
@@ -12,7 +12,10 @@ export interface TextEmbeddingModel<RESPONSE, SETTINGS extends TextEmbeddingMode
|
|
12
12
|
* The size of the embedding vector.
|
13
13
|
*/
|
14
14
|
readonly embeddingDimensions: number | undefined;
|
15
|
-
|
15
|
+
/**
|
16
|
+
* Limit of how many texts can be sent in a single API call.
|
17
|
+
*/
|
18
|
+
readonly maxTextsPerCall: number | undefined;
|
16
19
|
generateEmbeddingResponse(texts: string[], options?: ModelFunctionOptions<SETTINGS>): PromiseLike<RESPONSE>;
|
17
20
|
extractEmbeddings(response: RESPONSE): Vector[];
|
18
21
|
}
|
@@ -24,8 +24,13 @@ function embedTexts(model, texts, options) {
|
|
24
24
|
// split the texts into groups that are small enough to be sent in one call:
|
25
25
|
const maxTextsPerCall = model.maxTextsPerCall;
|
26
26
|
const textGroups = [];
|
27
|
-
|
28
|
-
textGroups.push(texts
|
27
|
+
if (maxTextsPerCall == null) {
|
28
|
+
textGroups.push(texts);
|
29
|
+
}
|
30
|
+
else {
|
31
|
+
for (let i = 0; i < texts.length; i += maxTextsPerCall) {
|
32
|
+
textGroups.push(texts.slice(i, i + maxTextsPerCall));
|
33
|
+
}
|
29
34
|
}
|
30
35
|
return Promise.all(textGroups.map((textGroup) => model.generateEmbeddingResponse(textGroup, options)));
|
31
36
|
},
|
@@ -21,8 +21,13 @@ export function embedTexts(model, texts, options) {
|
|
21
21
|
// split the texts into groups that are small enough to be sent in one call:
|
22
22
|
const maxTextsPerCall = model.maxTextsPerCall;
|
23
23
|
const textGroups = [];
|
24
|
-
|
25
|
-
textGroups.push(texts
|
24
|
+
if (maxTextsPerCall == null) {
|
25
|
+
textGroups.push(texts);
|
26
|
+
}
|
27
|
+
else {
|
28
|
+
for (let i = 0; i < texts.length; i += maxTextsPerCall) {
|
29
|
+
textGroups.push(texts.slice(i, i + maxTextsPerCall));
|
30
|
+
}
|
26
31
|
}
|
27
32
|
return Promise.all(textGroups.map((textGroup) => model.generateEmbeddingResponse(textGroup, options)));
|
28
33
|
},
|
package/model-function/index.cjs
CHANGED
@@ -19,6 +19,9 @@ __exportStar(require("./ModelCallEvent.cjs"), exports);
|
|
19
19
|
__exportStar(require("./ModelFunctionOptions.cjs"), exports);
|
20
20
|
__exportStar(require("./ModelInformation.cjs"), exports);
|
21
21
|
__exportStar(require("./SuccessfulModelCall.cjs"), exports);
|
22
|
+
__exportStar(require("./describe-image/ImageDescriptionEvent.cjs"), exports);
|
23
|
+
__exportStar(require("./describe-image/ImageDescriptionModel.cjs"), exports);
|
24
|
+
__exportStar(require("./describe-image/describeImage.cjs"), exports);
|
22
25
|
__exportStar(require("./embed-text/TextEmbeddingEvent.cjs"), exports);
|
23
26
|
__exportStar(require("./embed-text/TextEmbeddingModel.cjs"), exports);
|
24
27
|
__exportStar(require("./embed-text/embedText.cjs"), exports);
|
@@ -3,6 +3,9 @@ export * from "./ModelCallEvent.js";
|
|
3
3
|
export * from "./ModelFunctionOptions.js";
|
4
4
|
export * from "./ModelInformation.js";
|
5
5
|
export * from "./SuccessfulModelCall.js";
|
6
|
+
export * from "./describe-image/ImageDescriptionEvent.js";
|
7
|
+
export * from "./describe-image/ImageDescriptionModel.js";
|
8
|
+
export * from "./describe-image/describeImage.js";
|
6
9
|
export * from "./embed-text/TextEmbeddingEvent.js";
|
7
10
|
export * from "./embed-text/TextEmbeddingModel.js";
|
8
11
|
export * from "./embed-text/embedText.js";
|
package/model-function/index.js
CHANGED
@@ -3,6 +3,9 @@ export * from "./ModelCallEvent.js";
|
|
3
3
|
export * from "./ModelFunctionOptions.js";
|
4
4
|
export * from "./ModelInformation.js";
|
5
5
|
export * from "./SuccessfulModelCall.js";
|
6
|
+
export * from "./describe-image/ImageDescriptionEvent.js";
|
7
|
+
export * from "./describe-image/ImageDescriptionModel.js";
|
8
|
+
export * from "./describe-image/describeImage.js";
|
6
9
|
export * from "./embed-text/TextEmbeddingEvent.js";
|
7
10
|
export * from "./embed-text/TextEmbeddingModel.js";
|
8
11
|
export * from "./embed-text/embedText.js";
|
@@ -8,10 +8,12 @@ const secure_json_parse_1 = __importDefault(require("secure-json-parse"));
|
|
8
8
|
const zod_1 = require("zod");
|
9
9
|
const ApiCallError_js_1 = require("../../core/api/ApiCallError.cjs");
|
10
10
|
exports.huggingFaceErrorDataSchema = zod_1.z.object({
|
11
|
-
error: zod_1.z.array(zod_1.z.string()),
|
11
|
+
error: zod_1.z.array(zod_1.z.string()).or(zod_1.z.string()),
|
12
12
|
});
|
13
13
|
class HuggingFaceError extends ApiCallError_js_1.ApiCallError {
|
14
|
-
constructor({ data, statusCode, url, requestBodyValues, message = data.error
|
14
|
+
constructor({ data, statusCode, url, requestBodyValues, message = typeof data.error === "string"
|
15
|
+
? data.error
|
16
|
+
: data.error.join("\n\n"), }) {
|
15
17
|
super({ message, statusCode, requestBodyValues, url });
|
16
18
|
Object.defineProperty(this, "data", {
|
17
19
|
enumerable: true,
|
@@ -2,11 +2,11 @@ import { z } from "zod";
|
|
2
2
|
import { ApiCallError } from "../../core/api/ApiCallError.js";
|
3
3
|
import { ResponseHandler } from "../../core/api/postToApi.js";
|
4
4
|
export declare const huggingFaceErrorDataSchema: z.ZodObject<{
|
5
|
-
error: z.ZodArray<z.ZodString, "many">;
|
5
|
+
error: z.ZodUnion<[z.ZodArray<z.ZodString, "many">, z.ZodString]>;
|
6
6
|
}, "strip", z.ZodTypeAny, {
|
7
|
-
error: string[];
|
7
|
+
error: (string | string[]) & (string | string[] | undefined);
|
8
8
|
}, {
|
9
|
-
error: string[];
|
9
|
+
error: (string | string[]) & (string | string[] | undefined);
|
10
10
|
}>;
|
11
11
|
export type HuggingFaceErrorData = z.infer<typeof huggingFaceErrorDataSchema>;
|
12
12
|
export declare class HuggingFaceError extends ApiCallError {
|
@@ -2,10 +2,12 @@ import SecureJSON from "secure-json-parse";
|
|
2
2
|
import { z } from "zod";
|
3
3
|
import { ApiCallError } from "../../core/api/ApiCallError.js";
|
4
4
|
export const huggingFaceErrorDataSchema = z.object({
|
5
|
-
error: z.array(z.string()),
|
5
|
+
error: z.array(z.string()).or(z.string()),
|
6
6
|
});
|
7
7
|
export class HuggingFaceError extends ApiCallError {
|
8
|
-
constructor({ data, statusCode, url, requestBodyValues, message = data.error
|
8
|
+
constructor({ data, statusCode, url, requestBodyValues, message = typeof data.error === "string"
|
9
|
+
? data.error
|
10
|
+
: data.error.join("\n\n"), }) {
|
9
11
|
super({ message, statusCode, requestBodyValues, url });
|
10
12
|
Object.defineProperty(this, "data", {
|
11
13
|
enumerable: true,
|
@@ -0,0 +1,81 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.HuggingFaceImageDescriptionModel = void 0;
|
7
|
+
const zod_1 = __importDefault(require("zod"));
|
8
|
+
const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
|
9
|
+
const postToApi_js_1 = require("../../core/api/postToApi.cjs");
|
10
|
+
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
|
11
|
+
const HuggingFaceApiConfiguration_js_1 = require("./HuggingFaceApiConfiguration.cjs");
|
12
|
+
const HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
|
13
|
+
/**
|
14
|
+
* Create an image to text model that calls a Hugging Face Image-to-Text Inference API.
|
15
|
+
*
|
16
|
+
* @see https://huggingface.co/tasks/image-to-text
|
17
|
+
*/
|
18
|
+
class HuggingFaceImageDescriptionModel extends AbstractModel_js_1.AbstractModel {
|
19
|
+
constructor(settings) {
|
20
|
+
super({ settings });
|
21
|
+
Object.defineProperty(this, "provider", {
|
22
|
+
enumerable: true,
|
23
|
+
configurable: true,
|
24
|
+
writable: true,
|
25
|
+
value: "huggingface"
|
26
|
+
});
|
27
|
+
Object.defineProperty(this, "countPromptTokens", {
|
28
|
+
enumerable: true,
|
29
|
+
configurable: true,
|
30
|
+
writable: true,
|
31
|
+
value: undefined
|
32
|
+
});
|
33
|
+
}
|
34
|
+
get modelName() {
|
35
|
+
return this.settings.model;
|
36
|
+
}
|
37
|
+
async callAPI(data, options) {
|
38
|
+
const run = options?.run;
|
39
|
+
const settings = options?.settings;
|
40
|
+
const callSettings = {
|
41
|
+
...this.settings,
|
42
|
+
...settings,
|
43
|
+
abortSignal: run?.abortSignal,
|
44
|
+
data,
|
45
|
+
};
|
46
|
+
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
47
|
+
retry: callSettings.api?.retry,
|
48
|
+
throttle: callSettings.api?.throttle,
|
49
|
+
call: async () => callHuggingFaceImageDescriptionAPI(callSettings),
|
50
|
+
});
|
51
|
+
}
|
52
|
+
get settingsForEvent() {
|
53
|
+
return {};
|
54
|
+
}
|
55
|
+
generateImageDescriptionResponse(data, options) {
|
56
|
+
return this.callAPI(data, options);
|
57
|
+
}
|
58
|
+
extractImageDescription(response) {
|
59
|
+
return response[0].generated_text;
|
60
|
+
}
|
61
|
+
withSettings(additionalSettings) {
|
62
|
+
return new HuggingFaceImageDescriptionModel(Object.assign({}, this.settings, additionalSettings));
|
63
|
+
}
|
64
|
+
}
|
65
|
+
exports.HuggingFaceImageDescriptionModel = HuggingFaceImageDescriptionModel;
|
66
|
+
const huggingFaceImageDescriptionResponseSchema = zod_1.default.array(zod_1.default.object({
|
67
|
+
generated_text: zod_1.default.string(),
|
68
|
+
}));
|
69
|
+
async function callHuggingFaceImageDescriptionAPI({ api = new HuggingFaceApiConfiguration_js_1.HuggingFaceApiConfiguration(), abortSignal, model, data, }) {
|
70
|
+
return (0, postToApi_js_1.postToApi)({
|
71
|
+
url: api.assembleUrl(`/${model}`),
|
72
|
+
headers: api.headers,
|
73
|
+
body: {
|
74
|
+
content: data,
|
75
|
+
values: {},
|
76
|
+
},
|
77
|
+
failedResponseHandler: HuggingFaceError_js_1.failedHuggingFaceCallResponseHandler,
|
78
|
+
successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(huggingFaceImageDescriptionResponseSchema),
|
79
|
+
abortSignal,
|
80
|
+
});
|
81
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
2
|
+
import z from "zod";
|
3
|
+
import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
|
4
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
5
|
+
import { ModelFunctionOptions } from "../../model-function/ModelFunctionOptions.js";
|
6
|
+
import { ImageDescriptionModel, ImageDescriptionModelSettings } from "../../model-function/describe-image/ImageDescriptionModel.js";
|
7
|
+
export interface HuggingFaceImageDescriptionModelSettings extends ImageDescriptionModelSettings {
|
8
|
+
api?: ApiConfiguration;
|
9
|
+
model: string;
|
10
|
+
}
|
11
|
+
/**
|
12
|
+
* Create an image to text model that calls a Hugging Face Image-to-Text Inference API.
|
13
|
+
*
|
14
|
+
* @see https://huggingface.co/tasks/image-to-text
|
15
|
+
*/
|
16
|
+
export declare class HuggingFaceImageDescriptionModel extends AbstractModel<HuggingFaceImageDescriptionModelSettings> implements ImageDescriptionModel<Buffer, HuggingFaceImageDescriptionResponse, HuggingFaceImageDescriptionModelSettings> {
|
17
|
+
constructor(settings: HuggingFaceImageDescriptionModelSettings);
|
18
|
+
readonly provider = "huggingface";
|
19
|
+
get modelName(): string;
|
20
|
+
callAPI(data: Buffer, options?: ModelFunctionOptions<HuggingFaceImageDescriptionModelSettings>): Promise<HuggingFaceImageDescriptionResponse>;
|
21
|
+
get settingsForEvent(): Partial<HuggingFaceImageDescriptionModelSettings>;
|
22
|
+
readonly countPromptTokens: undefined;
|
23
|
+
generateImageDescriptionResponse(data: Buffer, options?: ModelFunctionOptions<HuggingFaceImageDescriptionModelSettings>): Promise<{
|
24
|
+
generated_text: string;
|
25
|
+
}[]>;
|
26
|
+
extractImageDescription(response: HuggingFaceImageDescriptionResponse): string;
|
27
|
+
withSettings(additionalSettings: Partial<HuggingFaceImageDescriptionModelSettings>): this;
|
28
|
+
}
|
29
|
+
declare const huggingFaceImageDescriptionResponseSchema: z.ZodArray<z.ZodObject<{
|
30
|
+
generated_text: z.ZodString;
|
31
|
+
}, "strip", z.ZodTypeAny, {
|
32
|
+
generated_text: string;
|
33
|
+
}, {
|
34
|
+
generated_text: string;
|
35
|
+
}>, "many">;
|
36
|
+
export type HuggingFaceImageDescriptionResponse = z.infer<typeof huggingFaceImageDescriptionResponseSchema>;
|
37
|
+
export {};
|
@@ -0,0 +1,74 @@
|
|
1
|
+
import z from "zod";
|
2
|
+
import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
|
3
|
+
import { createJsonResponseHandler, postToApi, } from "../../core/api/postToApi.js";
|
4
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
5
|
+
import { HuggingFaceApiConfiguration } from "./HuggingFaceApiConfiguration.js";
|
6
|
+
import { failedHuggingFaceCallResponseHandler } from "./HuggingFaceError.js";
|
7
|
+
/**
|
8
|
+
* Create an image to text model that calls a Hugging Face Image-to-Text Inference API.
|
9
|
+
*
|
10
|
+
* @see https://huggingface.co/tasks/image-to-text
|
11
|
+
*/
|
12
|
+
export class HuggingFaceImageDescriptionModel extends AbstractModel {
|
13
|
+
constructor(settings) {
|
14
|
+
super({ settings });
|
15
|
+
Object.defineProperty(this, "provider", {
|
16
|
+
enumerable: true,
|
17
|
+
configurable: true,
|
18
|
+
writable: true,
|
19
|
+
value: "huggingface"
|
20
|
+
});
|
21
|
+
Object.defineProperty(this, "countPromptTokens", {
|
22
|
+
enumerable: true,
|
23
|
+
configurable: true,
|
24
|
+
writable: true,
|
25
|
+
value: undefined
|
26
|
+
});
|
27
|
+
}
|
28
|
+
get modelName() {
|
29
|
+
return this.settings.model;
|
30
|
+
}
|
31
|
+
async callAPI(data, options) {
|
32
|
+
const run = options?.run;
|
33
|
+
const settings = options?.settings;
|
34
|
+
const callSettings = {
|
35
|
+
...this.settings,
|
36
|
+
...settings,
|
37
|
+
abortSignal: run?.abortSignal,
|
38
|
+
data,
|
39
|
+
};
|
40
|
+
return callWithRetryAndThrottle({
|
41
|
+
retry: callSettings.api?.retry,
|
42
|
+
throttle: callSettings.api?.throttle,
|
43
|
+
call: async () => callHuggingFaceImageDescriptionAPI(callSettings),
|
44
|
+
});
|
45
|
+
}
|
46
|
+
get settingsForEvent() {
|
47
|
+
return {};
|
48
|
+
}
|
49
|
+
generateImageDescriptionResponse(data, options) {
|
50
|
+
return this.callAPI(data, options);
|
51
|
+
}
|
52
|
+
extractImageDescription(response) {
|
53
|
+
return response[0].generated_text;
|
54
|
+
}
|
55
|
+
withSettings(additionalSettings) {
|
56
|
+
return new HuggingFaceImageDescriptionModel(Object.assign({}, this.settings, additionalSettings));
|
57
|
+
}
|
58
|
+
}
|
59
|
+
const huggingFaceImageDescriptionResponseSchema = z.array(z.object({
|
60
|
+
generated_text: z.string(),
|
61
|
+
}));
|
62
|
+
async function callHuggingFaceImageDescriptionAPI({ api = new HuggingFaceApiConfiguration(), abortSignal, model, data, }) {
|
63
|
+
return postToApi({
|
64
|
+
url: api.assembleUrl(`/${model}`),
|
65
|
+
headers: api.headers,
|
66
|
+
body: {
|
67
|
+
content: data,
|
68
|
+
values: {},
|
69
|
+
},
|
70
|
+
failedResponseHandler: failedHuggingFaceCallResponseHandler,
|
71
|
+
successfulResponseHandler: createJsonResponseHandler(huggingFaceImageDescriptionResponseSchema),
|
72
|
+
abortSignal,
|
73
|
+
});
|
74
|
+
}
|
@@ -18,5 +18,6 @@ exports.HuggingFaceError = void 0;
|
|
18
18
|
__exportStar(require("./HuggingFaceApiConfiguration.cjs"), exports);
|
19
19
|
var HuggingFaceError_js_1 = require("./HuggingFaceError.cjs");
|
20
20
|
Object.defineProperty(exports, "HuggingFaceError", { enumerable: true, get: function () { return HuggingFaceError_js_1.HuggingFaceError; } });
|
21
|
+
__exportStar(require("./HuggingFaceImageDescriptionModel.cjs"), exports);
|
21
22
|
__exportStar(require("./HuggingFaceTextEmbeddingModel.cjs"), exports);
|
22
23
|
__exportStar(require("./HuggingFaceTextGenerationModel.cjs"), exports);
|
@@ -1,4 +1,5 @@
|
|
1
1
|
export * from "./HuggingFaceApiConfiguration.js";
|
2
2
|
export { HuggingFaceError, HuggingFaceErrorData } from "./HuggingFaceError.js";
|
3
|
+
export * from "./HuggingFaceImageDescriptionModel.js";
|
3
4
|
export * from "./HuggingFaceTextEmbeddingModel.js";
|
4
5
|
export * from "./HuggingFaceTextGenerationModel.js";
|
@@ -1,4 +1,5 @@
|
|
1
1
|
export * from "./HuggingFaceApiConfiguration.js";
|
2
2
|
export { HuggingFaceError } from "./HuggingFaceError.js";
|
3
|
+
export * from "./HuggingFaceImageDescriptionModel.js";
|
3
4
|
export * from "./HuggingFaceTextEmbeddingModel.js";
|
4
5
|
export * from "./HuggingFaceTextGenerationModel.js";
|
@@ -58,7 +58,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
|
58
58
|
enumerable: true,
|
59
59
|
configurable: true,
|
60
60
|
writable: true,
|
61
|
-
value:
|
61
|
+
value: 2048
|
62
62
|
});
|
63
63
|
Object.defineProperty(this, "embeddingDimensions", {
|
64
64
|
enumerable: true,
|
@@ -90,7 +90,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
|
90
90
|
async countTokens(input) {
|
91
91
|
return (0, countTokens_js_1.countTokens)(this.tokenizer, input);
|
92
92
|
}
|
93
|
-
async callAPI(
|
93
|
+
async callAPI(texts, options) {
|
94
94
|
const run = options?.run;
|
95
95
|
const settings = options?.settings;
|
96
96
|
const combinedSettings = {
|
@@ -103,7 +103,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
|
103
103
|
...combinedSettings,
|
104
104
|
// other settings:
|
105
105
|
abortSignal: run?.abortSignal,
|
106
|
-
input:
|
106
|
+
input: texts,
|
107
107
|
};
|
108
108
|
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
109
109
|
retry: callSettings.api?.retry,
|
@@ -118,7 +118,7 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
|
118
118
|
if (texts.length > this.maxTextsPerCall) {
|
119
119
|
throw new Error(`The OpenAI embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
|
120
120
|
}
|
121
|
-
return this.callAPI(texts
|
121
|
+
return this.callAPI(texts, options);
|
122
122
|
}
|
123
123
|
extractEmbeddings(response) {
|
124
124
|
return [response.data[0].embedding];
|
@@ -130,13 +130,11 @@ class OpenAITextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
|
|
130
130
|
exports.OpenAITextEmbeddingModel = OpenAITextEmbeddingModel;
|
131
131
|
const openAITextEmbeddingResponseSchema = zod_1.default.object({
|
132
132
|
object: zod_1.default.literal("list"),
|
133
|
-
data: zod_1.default
|
134
|
-
.array(zod_1.default.object({
|
133
|
+
data: zod_1.default.array(zod_1.default.object({
|
135
134
|
object: zod_1.default.literal("embedding"),
|
136
135
|
embedding: zod_1.default.array(zod_1.default.number()),
|
137
136
|
index: zod_1.default.number(),
|
138
|
-
}))
|
139
|
-
.length(1),
|
137
|
+
})),
|
140
138
|
model: zod_1.default.string(),
|
141
139
|
usage: zod_1.default.object({
|
142
140
|
prompt_tokens: zod_1.default.number(),
|
@@ -40,12 +40,12 @@ export declare class OpenAITextEmbeddingModel extends AbstractModel<OpenAITextEm
|
|
40
40
|
constructor(settings: OpenAITextEmbeddingModelSettings);
|
41
41
|
readonly provider: "openai";
|
42
42
|
get modelName(): "text-embedding-ada-002";
|
43
|
-
readonly maxTextsPerCall =
|
43
|
+
readonly maxTextsPerCall = 2048;
|
44
44
|
readonly embeddingDimensions: number;
|
45
45
|
readonly tokenizer: TikTokenTokenizer;
|
46
46
|
readonly contextWindowSize: number;
|
47
47
|
countTokens(input: string): Promise<number>;
|
48
|
-
callAPI(
|
48
|
+
callAPI(texts: Array<string>, options?: ModelFunctionOptions<OpenAITextEmbeddingModelSettings>): Promise<OpenAITextEmbeddingResponse>;
|
49
49
|
get settingsForEvent(): Partial<OpenAITextEmbeddingModelSettings>;
|
50
50
|
generateEmbeddingResponse(texts: string[], options?: ModelFunctionOptions<OpenAITextEmbeddingModelSettings>): Promise<{
|
51
51
|
object: "list";
|
@@ -50,7 +50,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
|
|
50
50
|
enumerable: true,
|
51
51
|
configurable: true,
|
52
52
|
writable: true,
|
53
|
-
value:
|
53
|
+
value: 2048
|
54
54
|
});
|
55
55
|
Object.defineProperty(this, "embeddingDimensions", {
|
56
56
|
enumerable: true,
|
@@ -82,7 +82,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
|
|
82
82
|
async countTokens(input) {
|
83
83
|
return countTokens(this.tokenizer, input);
|
84
84
|
}
|
85
|
-
async callAPI(
|
85
|
+
async callAPI(texts, options) {
|
86
86
|
const run = options?.run;
|
87
87
|
const settings = options?.settings;
|
88
88
|
const combinedSettings = {
|
@@ -95,7 +95,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
|
|
95
95
|
...combinedSettings,
|
96
96
|
// other settings:
|
97
97
|
abortSignal: run?.abortSignal,
|
98
|
-
input:
|
98
|
+
input: texts,
|
99
99
|
};
|
100
100
|
return callWithRetryAndThrottle({
|
101
101
|
retry: callSettings.api?.retry,
|
@@ -110,7 +110,7 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
|
|
110
110
|
if (texts.length > this.maxTextsPerCall) {
|
111
111
|
throw new Error(`The OpenAI embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
|
112
112
|
}
|
113
|
-
return this.callAPI(texts
|
113
|
+
return this.callAPI(texts, options);
|
114
114
|
}
|
115
115
|
extractEmbeddings(response) {
|
116
116
|
return [response.data[0].embedding];
|
@@ -121,13 +121,11 @@ export class OpenAITextEmbeddingModel extends AbstractModel {
|
|
121
121
|
}
|
122
122
|
const openAITextEmbeddingResponseSchema = z.object({
|
123
123
|
object: z.literal("list"),
|
124
|
-
data: z
|
125
|
-
.array(z.object({
|
124
|
+
data: z.array(z.object({
|
126
125
|
object: z.literal("embedding"),
|
127
126
|
embedding: z.array(z.number()),
|
128
127
|
index: z.number(),
|
129
|
-
}))
|
130
|
-
.length(1),
|
128
|
+
})),
|
131
129
|
model: z.string(),
|
132
130
|
usage: z.object({
|
133
131
|
prompt_tokens: z.number(),
|