unrag 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +616 -174
- package/package.json +12 -6
- package/registry/config/unrag.config.ts +9 -8
- package/registry/connectors/google-drive/client.ts +171 -0
- package/registry/connectors/google-drive/index.ts +10 -0
- package/registry/connectors/google-drive/mime.ts +76 -0
- package/registry/connectors/google-drive/sync.ts +528 -0
- package/registry/connectors/google-drive/types.ts +127 -0
- package/registry/core/context-engine.ts +66 -2
- package/registry/core/types.ts +44 -2
- package/registry/docs/unrag.md +6 -1
- package/registry/embedding/_shared.ts +20 -0
- package/registry/embedding/ai.ts +6 -65
- package/registry/embedding/azure.ts +79 -0
- package/registry/embedding/bedrock.ts +79 -0
- package/registry/embedding/cohere.ts +79 -0
- package/registry/embedding/google.ts +93 -0
- package/registry/embedding/mistral.ts +62 -0
- package/registry/embedding/ollama.ts +75 -0
- package/registry/embedding/openai.ts +79 -0
- package/registry/embedding/openrouter.ts +85 -0
- package/registry/embedding/together.ts +67 -0
- package/registry/embedding/vertex.ts +102 -0
- package/registry/embedding/voyage.ts +159 -0
- package/registry/manifest.json +346 -0
- package/registry/store/drizzle-postgres-pgvector/store.ts +5 -2
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import type { EmbeddingProvider } from "../core/types";
|
|
3
|
+
import { requireOptional } from "./_shared";
|
|
4
|
+
|
|
5
|
+
export type OllamaEmbeddingConfig = {
|
|
6
|
+
model?: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
baseURL?: string;
|
|
9
|
+
headers?: Record<string, string>;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const DEFAULT_TEXT_MODEL = "nomic-embed-text";
|
|
13
|
+
|
|
14
|
+
const resolveProvider = (config: OllamaEmbeddingConfig) => {
|
|
15
|
+
const { createOllama, ollama } = requireOptional<any>({
|
|
16
|
+
id: "ollama-ai-provider-v2",
|
|
17
|
+
installHint: "bun add ollama-ai-provider-v2",
|
|
18
|
+
providerName: "ollama",
|
|
19
|
+
});
|
|
20
|
+
if (config.baseURL || config.headers) {
|
|
21
|
+
return createOllama({
|
|
22
|
+
...(config.baseURL ? { baseURL: config.baseURL } : {}),
|
|
23
|
+
...(config.headers ? { headers: config.headers } : {}),
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
return ollama;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export const createOllamaEmbeddingProvider = (
|
|
30
|
+
config: OllamaEmbeddingConfig = {}
|
|
31
|
+
): EmbeddingProvider => {
|
|
32
|
+
const model =
|
|
33
|
+
config.model ?? process.env.OLLAMA_EMBEDDING_MODEL ?? DEFAULT_TEXT_MODEL;
|
|
34
|
+
const timeoutMs = config.timeoutMs;
|
|
35
|
+
const provider = resolveProvider(config);
|
|
36
|
+
const embeddingModel = provider.textEmbeddingModel(model);
|
|
37
|
+
|
|
38
|
+
return {
|
|
39
|
+
name: `ollama:${model}`,
|
|
40
|
+
dimensions: undefined,
|
|
41
|
+
embed: async ({ text }) => {
|
|
42
|
+
const abortSignal = timeoutMs
|
|
43
|
+
? AbortSignal.timeout(timeoutMs)
|
|
44
|
+
: undefined;
|
|
45
|
+
|
|
46
|
+
const result = await embed({
|
|
47
|
+
model: embeddingModel,
|
|
48
|
+
value: text,
|
|
49
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
if (!result.embedding) {
|
|
53
|
+
throw new Error("Embedding missing from Ollama response");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return result.embedding;
|
|
57
|
+
},
|
|
58
|
+
embedMany: async (inputs) => {
|
|
59
|
+
const values = inputs.map((i) => i.text);
|
|
60
|
+
const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
61
|
+
|
|
62
|
+
const result = await embedMany({
|
|
63
|
+
model: embeddingModel,
|
|
64
|
+
values,
|
|
65
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const { embeddings } = result;
|
|
69
|
+
if (!Array.isArray(embeddings)) {
|
|
70
|
+
throw new Error("Embeddings missing from Ollama embedMany response");
|
|
71
|
+
}
|
|
72
|
+
return embeddings;
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import type { EmbeddingProvider } from "../core/types";
|
|
3
|
+
import { requireOptional } from "./_shared";
|
|
4
|
+
|
|
5
|
+
export type OpenAiEmbeddingConfig = {
|
|
6
|
+
model?: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
dimensions?: number;
|
|
9
|
+
user?: string;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const DEFAULT_TEXT_MODEL = "text-embedding-3-small";
|
|
13
|
+
|
|
14
|
+
const buildProviderOptions = (config: OpenAiEmbeddingConfig) => {
|
|
15
|
+
if (config.dimensions === undefined && config.user === undefined) {
|
|
16
|
+
return undefined;
|
|
17
|
+
}
|
|
18
|
+
return {
|
|
19
|
+
openai: {
|
|
20
|
+
...(config.dimensions !== undefined ? { dimensions: config.dimensions } : {}),
|
|
21
|
+
...(config.user ? { user: config.user } : {}),
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export const createOpenAiEmbeddingProvider = (
|
|
27
|
+
config: OpenAiEmbeddingConfig = {}
|
|
28
|
+
): EmbeddingProvider => {
|
|
29
|
+
const { openai } = requireOptional<any>({
|
|
30
|
+
id: "@ai-sdk/openai",
|
|
31
|
+
installHint: "bun add @ai-sdk/openai",
|
|
32
|
+
providerName: "openai",
|
|
33
|
+
});
|
|
34
|
+
const model =
|
|
35
|
+
config.model ?? process.env.OPENAI_EMBEDDING_MODEL ?? DEFAULT_TEXT_MODEL;
|
|
36
|
+
const timeoutMs = config.timeoutMs;
|
|
37
|
+
const providerOptions = buildProviderOptions(config);
|
|
38
|
+
const embeddingModel = openai.embedding(model);
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
name: `openai:${model}`,
|
|
42
|
+
dimensions: config.dimensions,
|
|
43
|
+
embed: async ({ text }) => {
|
|
44
|
+
const abortSignal = timeoutMs
|
|
45
|
+
? AbortSignal.timeout(timeoutMs)
|
|
46
|
+
: undefined;
|
|
47
|
+
|
|
48
|
+
const result = await embed({
|
|
49
|
+
model: embeddingModel,
|
|
50
|
+
value: text,
|
|
51
|
+
...(providerOptions ? { providerOptions } : {}),
|
|
52
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
if (!result.embedding) {
|
|
56
|
+
throw new Error("Embedding missing from OpenAI response");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return result.embedding;
|
|
60
|
+
},
|
|
61
|
+
embedMany: async (inputs) => {
|
|
62
|
+
const values = inputs.map((i) => i.text);
|
|
63
|
+
const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
64
|
+
|
|
65
|
+
const result = await embedMany({
|
|
66
|
+
model: embeddingModel,
|
|
67
|
+
values,
|
|
68
|
+
...(providerOptions ? { providerOptions } : {}),
|
|
69
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const { embeddings } = result;
|
|
73
|
+
if (!Array.isArray(embeddings)) {
|
|
74
|
+
throw new Error("Embeddings missing from OpenAI embedMany response");
|
|
75
|
+
}
|
|
76
|
+
return embeddings;
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
};
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from "../core/types";
|
|
2
|
+
import { requireOptional } from "./_shared";
|
|
3
|
+
|
|
4
|
+
export type OpenRouterEmbeddingConfig = {
|
|
5
|
+
model?: string;
|
|
6
|
+
timeoutMs?: number;
|
|
7
|
+
apiKey?: string;
|
|
8
|
+
baseURL?: string;
|
|
9
|
+
headers?: Record<string, string>;
|
|
10
|
+
referer?: string;
|
|
11
|
+
title?: string;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
const DEFAULT_TEXT_MODEL = "text-embedding-3-small";
|
|
15
|
+
|
|
16
|
+
const buildHeaders = (config: OpenRouterEmbeddingConfig) => {
|
|
17
|
+
const headers: Record<string, string> = { ...(config.headers ?? {}) };
|
|
18
|
+
if (config.referer) headers["HTTP-Referer"] = config.referer;
|
|
19
|
+
if (config.title) headers["X-Title"] = config.title;
|
|
20
|
+
return headers;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export const createOpenRouterEmbeddingProvider = (
|
|
24
|
+
config: OpenRouterEmbeddingConfig = {}
|
|
25
|
+
): EmbeddingProvider => {
|
|
26
|
+
const { OpenRouter } = requireOptional<any>({
|
|
27
|
+
id: "@openrouter/sdk",
|
|
28
|
+
installHint: "bun add @openrouter/sdk",
|
|
29
|
+
providerName: "openrouter",
|
|
30
|
+
});
|
|
31
|
+
const model =
|
|
32
|
+
config.model ?? process.env.OPENROUTER_EMBEDDING_MODEL ?? DEFAULT_TEXT_MODEL;
|
|
33
|
+
const timeoutMs = config.timeoutMs;
|
|
34
|
+
const headers = buildHeaders(config);
|
|
35
|
+
|
|
36
|
+
const client = new OpenRouter({
|
|
37
|
+
apiKey: config.apiKey ?? process.env.OPENROUTER_API_KEY ?? "",
|
|
38
|
+
...(config.baseURL ? { baseURL: config.baseURL } : {}),
|
|
39
|
+
...(Object.keys(headers).length ? { headers } : {}),
|
|
40
|
+
} as any);
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
name: `openrouter:${model}`,
|
|
44
|
+
dimensions: undefined,
|
|
45
|
+
embed: async ({ text }) => {
|
|
46
|
+
const abortSignal = timeoutMs
|
|
47
|
+
? AbortSignal.timeout(timeoutMs)
|
|
48
|
+
: undefined;
|
|
49
|
+
|
|
50
|
+
const result = await (client as any).embeddings.generate(
|
|
51
|
+
{ input: text, model },
|
|
52
|
+
abortSignal ? { fetchOptions: { signal: abortSignal } } : undefined
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
const embedding =
|
|
56
|
+
(result as any)?.data?.[0]?.embedding ??
|
|
57
|
+
(result as any)?.embedding ??
|
|
58
|
+
(result as any)?.data?.embedding;
|
|
59
|
+
if (!embedding) {
|
|
60
|
+
throw new Error("Embedding missing from OpenRouter response");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return embedding as number[];
|
|
64
|
+
},
|
|
65
|
+
embedMany: async (inputs) => {
|
|
66
|
+
const values = inputs.map((i) => i.text);
|
|
67
|
+
const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
68
|
+
|
|
69
|
+
const result = await (client as any).embeddings.generate(
|
|
70
|
+
{ input: values, model },
|
|
71
|
+
abortSignal ? { fetchOptions: { signal: abortSignal } } : undefined
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
const embeddings = (result as any)?.data?.map(
|
|
75
|
+
(item: { embedding?: number[] }) => item.embedding
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
if (!Array.isArray(embeddings) || embeddings.some((e) => !Array.isArray(e))) {
|
|
79
|
+
throw new Error("Embeddings missing from OpenRouter response");
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return embeddings as number[][];
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
};
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import type { EmbeddingProvider } from "../core/types";
|
|
3
|
+
import { requireOptional } from "./_shared";
|
|
4
|
+
|
|
5
|
+
export type TogetherEmbeddingConfig = {
|
|
6
|
+
model?: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
const DEFAULT_TEXT_MODEL = "togethercomputer/m2-bert-80M-2k-retrieval";
|
|
11
|
+
|
|
12
|
+
export const createTogetherEmbeddingProvider = (
|
|
13
|
+
config: TogetherEmbeddingConfig = {}
|
|
14
|
+
): EmbeddingProvider => {
|
|
15
|
+
const { togetherai } = requireOptional<any>({
|
|
16
|
+
id: "@ai-sdk/togetherai",
|
|
17
|
+
installHint: "bun add @ai-sdk/togetherai",
|
|
18
|
+
providerName: "together",
|
|
19
|
+
});
|
|
20
|
+
const model =
|
|
21
|
+
config.model ??
|
|
22
|
+
process.env.TOGETHER_AI_EMBEDDING_MODEL ??
|
|
23
|
+
DEFAULT_TEXT_MODEL;
|
|
24
|
+
const timeoutMs = config.timeoutMs;
|
|
25
|
+
const embeddingModel =
|
|
26
|
+
"embeddingModel" in togetherai
|
|
27
|
+
? (togetherai as any).embeddingModel(model)
|
|
28
|
+
: (togetherai as any).textEmbeddingModel(model);
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
name: `together:${model}`,
|
|
32
|
+
dimensions: undefined,
|
|
33
|
+
embed: async ({ text }) => {
|
|
34
|
+
const abortSignal = timeoutMs
|
|
35
|
+
? AbortSignal.timeout(timeoutMs)
|
|
36
|
+
: undefined;
|
|
37
|
+
|
|
38
|
+
const result = await embed({
|
|
39
|
+
model: embeddingModel,
|
|
40
|
+
value: text,
|
|
41
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
if (!result.embedding) {
|
|
45
|
+
throw new Error("Embedding missing from Together.ai response");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return result.embedding;
|
|
49
|
+
},
|
|
50
|
+
embedMany: async (inputs) => {
|
|
51
|
+
const values = inputs.map((i) => i.text);
|
|
52
|
+
const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
53
|
+
|
|
54
|
+
const result = await embedMany({
|
|
55
|
+
model: embeddingModel,
|
|
56
|
+
values,
|
|
57
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
const { embeddings } = result;
|
|
61
|
+
if (!Array.isArray(embeddings)) {
|
|
62
|
+
throw new Error("Embeddings missing from Together.ai embedMany response");
|
|
63
|
+
}
|
|
64
|
+
return embeddings;
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
};
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import type { EmbeddingProvider } from "../core/types";
|
|
3
|
+
import { requireOptional } from "./_shared";
|
|
4
|
+
|
|
5
|
+
export type VertexEmbeddingTaskType =
|
|
6
|
+
| "SEMANTIC_SIMILARITY"
|
|
7
|
+
| "CLASSIFICATION"
|
|
8
|
+
| "CLUSTERING"
|
|
9
|
+
| "RETRIEVAL_DOCUMENT"
|
|
10
|
+
| "RETRIEVAL_QUERY"
|
|
11
|
+
| "QUESTION_ANSWERING"
|
|
12
|
+
| "FACT_VERIFICATION"
|
|
13
|
+
| "CODE_RETRIEVAL_QUERY";
|
|
14
|
+
|
|
15
|
+
export type VertexEmbeddingConfig = {
|
|
16
|
+
model?: string;
|
|
17
|
+
timeoutMs?: number;
|
|
18
|
+
outputDimensionality?: number;
|
|
19
|
+
taskType?: VertexEmbeddingTaskType;
|
|
20
|
+
title?: string;
|
|
21
|
+
autoTruncate?: boolean;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const DEFAULT_TEXT_MODEL = "text-embedding-004";
|
|
25
|
+
|
|
26
|
+
const buildProviderOptions = (config: VertexEmbeddingConfig) => {
|
|
27
|
+
if (
|
|
28
|
+
config.outputDimensionality === undefined &&
|
|
29
|
+
!config.taskType &&
|
|
30
|
+
config.autoTruncate === undefined &&
|
|
31
|
+
!config.title
|
|
32
|
+
) {
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
35
|
+
return {
|
|
36
|
+
google: {
|
|
37
|
+
...(config.outputDimensionality !== undefined
|
|
38
|
+
? { outputDimensionality: config.outputDimensionality }
|
|
39
|
+
: {}),
|
|
40
|
+
...(config.taskType ? { taskType: config.taskType } : {}),
|
|
41
|
+
...(config.autoTruncate !== undefined ? { autoTruncate: config.autoTruncate } : {}),
|
|
42
|
+
...(config.title ? { title: config.title } : {}),
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
export const createVertexEmbeddingProvider = (
|
|
48
|
+
config: VertexEmbeddingConfig = {}
|
|
49
|
+
): EmbeddingProvider => {
|
|
50
|
+
const { vertex } = requireOptional<any>({
|
|
51
|
+
id: "@ai-sdk/google-vertex",
|
|
52
|
+
installHint: "bun add @ai-sdk/google-vertex",
|
|
53
|
+
providerName: "vertex",
|
|
54
|
+
});
|
|
55
|
+
const model =
|
|
56
|
+
config.model ??
|
|
57
|
+
process.env.GOOGLE_VERTEX_EMBEDDING_MODEL ??
|
|
58
|
+
DEFAULT_TEXT_MODEL;
|
|
59
|
+
const timeoutMs = config.timeoutMs;
|
|
60
|
+
const providerOptions = buildProviderOptions(config);
|
|
61
|
+
const embeddingModel = vertex.embeddingModel(model);
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
name: `vertex:${model}`,
|
|
65
|
+
dimensions: config.outputDimensionality,
|
|
66
|
+
embed: async ({ text }) => {
|
|
67
|
+
const abortSignal = timeoutMs
|
|
68
|
+
? AbortSignal.timeout(timeoutMs)
|
|
69
|
+
: undefined;
|
|
70
|
+
|
|
71
|
+
const result = await embed({
|
|
72
|
+
model: embeddingModel,
|
|
73
|
+
value: text,
|
|
74
|
+
...(providerOptions ? { providerOptions } : {}),
|
|
75
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
if (!result.embedding) {
|
|
79
|
+
throw new Error("Embedding missing from Vertex response");
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return result.embedding;
|
|
83
|
+
},
|
|
84
|
+
embedMany: async (inputs) => {
|
|
85
|
+
const values = inputs.map((i) => i.text);
|
|
86
|
+
const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
87
|
+
|
|
88
|
+
const result = await embedMany({
|
|
89
|
+
model: embeddingModel,
|
|
90
|
+
values,
|
|
91
|
+
...(providerOptions ? { providerOptions } : {}),
|
|
92
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const { embeddings } = result;
|
|
96
|
+
if (!Array.isArray(embeddings)) {
|
|
97
|
+
throw new Error("Embeddings missing from Vertex embedMany response");
|
|
98
|
+
}
|
|
99
|
+
return embeddings;
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
};
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
|
+
import type { EmbeddingProvider, ImageEmbeddingInput } from "../core/types";
|
|
3
|
+
import { requireOptional } from "./_shared";
|
|
4
|
+
|
|
5
|
+
type BaseConfig = {
|
|
6
|
+
model?: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export type VoyageEmbeddingConfig =
|
|
11
|
+
| (BaseConfig & {
|
|
12
|
+
type?: "text";
|
|
13
|
+
})
|
|
14
|
+
| (BaseConfig & {
|
|
15
|
+
type: "multimodal";
|
|
16
|
+
text?: {
|
|
17
|
+
value?: (text: string) => unknown;
|
|
18
|
+
};
|
|
19
|
+
image?: {
|
|
20
|
+
value?: (input: ImageEmbeddingInput) => unknown;
|
|
21
|
+
};
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const DEFAULT_TEXT_MODEL = "voyage-3.5-lite";
|
|
25
|
+
const DEFAULT_MULTIMODAL_MODEL = "voyage-multimodal-3";
|
|
26
|
+
|
|
27
|
+
const bytesToDataUrl = (bytes: Uint8Array, mediaType: string) => {
|
|
28
|
+
const base64 = Buffer.from(bytes).toString("base64");
|
|
29
|
+
return `data:${mediaType};base64,${base64}`;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const defaultTextValue = (text: string) => ({
|
|
33
|
+
text: [text],
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const defaultImageValue = (input: ImageEmbeddingInput) => {
|
|
37
|
+
const v =
|
|
38
|
+
typeof input.data === "string"
|
|
39
|
+
? input.data
|
|
40
|
+
: bytesToDataUrl(input.data, input.mediaType ?? "image/jpeg");
|
|
41
|
+
return { image: [v] };
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
export const createVoyageEmbeddingProvider = (
|
|
45
|
+
config: VoyageEmbeddingConfig = {}
|
|
46
|
+
): EmbeddingProvider => {
|
|
47
|
+
const { voyage } = requireOptional<any>({
|
|
48
|
+
id: "voyage-ai-provider",
|
|
49
|
+
installHint: "bun add voyage-ai-provider",
|
|
50
|
+
providerName: "voyage",
|
|
51
|
+
});
|
|
52
|
+
const type = config.type ?? "text";
|
|
53
|
+
const isMultimodal = config.type === "multimodal";
|
|
54
|
+
const model =
|
|
55
|
+
config.model ??
|
|
56
|
+
process.env.VOYAGE_MODEL ??
|
|
57
|
+
(type === "multimodal" ? DEFAULT_MULTIMODAL_MODEL : DEFAULT_TEXT_MODEL);
|
|
58
|
+
const timeoutMs = config.timeoutMs;
|
|
59
|
+
|
|
60
|
+
const voyageProvider = voyage as any;
|
|
61
|
+
const textEmbeddingModel =
|
|
62
|
+
type === "multimodal"
|
|
63
|
+
? undefined
|
|
64
|
+
: typeof voyageProvider.embeddingModel === "function"
|
|
65
|
+
? voyageProvider.embeddingModel(model)
|
|
66
|
+
: voyageProvider.textEmbeddingModel(model);
|
|
67
|
+
const multimodalEmbeddingModel =
|
|
68
|
+
type === "multimodal" ? (voyage as any).multimodalEmbeddingModel(model) : undefined;
|
|
69
|
+
|
|
70
|
+
// AI SDK 6 types only accept string inputs; cast multimodal models/values.
|
|
71
|
+
const multimodalModel = multimodalEmbeddingModel as unknown as EmbeddingModel;
|
|
72
|
+
|
|
73
|
+
const resolveTextValue = (text: string) => {
|
|
74
|
+
if (isMultimodal && config.text?.value) {
|
|
75
|
+
return config.text.value(text);
|
|
76
|
+
}
|
|
77
|
+
return defaultTextValue(text);
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
const resolveImageValue = (input: ImageEmbeddingInput) => {
|
|
81
|
+
if (isMultimodal && config.image?.value) {
|
|
82
|
+
return config.image.value(input);
|
|
83
|
+
}
|
|
84
|
+
return defaultImageValue(input);
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
name: `voyage:${model}`,
|
|
89
|
+
dimensions: undefined,
|
|
90
|
+
embed: async ({ text }) => {
|
|
91
|
+
const abortSignal = timeoutMs
|
|
92
|
+
? AbortSignal.timeout(timeoutMs)
|
|
93
|
+
: undefined;
|
|
94
|
+
|
|
95
|
+
const result =
|
|
96
|
+
type === "multimodal"
|
|
97
|
+
? await embed({
|
|
98
|
+
model: multimodalModel,
|
|
99
|
+
value: resolveTextValue(text) as unknown as string,
|
|
100
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
101
|
+
})
|
|
102
|
+
: await embed({
|
|
103
|
+
model: textEmbeddingModel!,
|
|
104
|
+
value: text,
|
|
105
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
if (!result.embedding) {
|
|
109
|
+
throw new Error("Embedding missing from Voyage response");
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return result.embedding;
|
|
113
|
+
},
|
|
114
|
+
embedMany: async (inputs) => {
|
|
115
|
+
const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
116
|
+
const result =
|
|
117
|
+
type === "multimodal"
|
|
118
|
+
? await embedMany({
|
|
119
|
+
model: multimodalModel,
|
|
120
|
+
values: inputs.map((i) => resolveTextValue(i.text)) as unknown as string[],
|
|
121
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
122
|
+
})
|
|
123
|
+
: await embedMany({
|
|
124
|
+
model: textEmbeddingModel!,
|
|
125
|
+
values: inputs.map((i) => i.text),
|
|
126
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
const { embeddings } = result;
|
|
130
|
+
if (!Array.isArray(embeddings)) {
|
|
131
|
+
throw new Error("Embeddings missing from Voyage embedMany response");
|
|
132
|
+
}
|
|
133
|
+
return embeddings;
|
|
134
|
+
},
|
|
135
|
+
...(type === "multimodal"
|
|
136
|
+
? {
|
|
137
|
+
embedImage: async (input: ImageEmbeddingInput) => {
|
|
138
|
+
const abortSignal = timeoutMs
|
|
139
|
+
? AbortSignal.timeout(timeoutMs)
|
|
140
|
+
: undefined;
|
|
141
|
+
|
|
142
|
+
const value = resolveImageValue(input);
|
|
143
|
+
|
|
144
|
+
const result = await embed({
|
|
145
|
+
model: multimodalModel,
|
|
146
|
+
value: value as unknown as string,
|
|
147
|
+
...(abortSignal ? { abortSignal } : {}),
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
if (!result.embedding) {
|
|
151
|
+
throw new Error("Embedding missing from Voyage response");
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return result.embedding;
|
|
155
|
+
},
|
|
156
|
+
}
|
|
157
|
+
: {}),
|
|
158
|
+
};
|
|
159
|
+
};
|