unrag 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -6,18 +6,24 @@
6
6
  "bin": {
7
7
  "unrag": "./dist/cli/index.js"
8
8
  },
9
- "version": "0.2.5",
9
+ "version": "0.2.6",
10
10
  "private": false,
11
11
  "license": "Apache-2.0",
12
12
  "devDependencies": {
13
+ "@ai-sdk/amazon-bedrock": "^3.0.72",
14
+ "@ai-sdk/cohere": "^3.0.1",
15
+ "@ai-sdk/google": "^3.0.1",
16
+ "@ai-sdk/openai": "^3.0.1",
17
+ "@openrouter/sdk": "^0.3.10",
18
+ "@prisma/client": "^6.0.0",
13
19
  "@types/bun": "latest",
14
20
  "@types/pg": "^8.16.0",
15
- "@prisma/client": "^6.0.0",
16
- "prisma": "^6.0.0",
17
- "drizzle-orm": "^0.45.1",
21
+ "ai": "^6.0.3",
18
22
  "drizzle-kit": "^0.31.8",
19
- "ai": "^5.0.113",
20
- "pg": "^8.16.3"
23
+ "drizzle-orm": "^0.45.1",
24
+ "pg": "^8.16.3",
25
+ "prisma": "^6.0.0",
26
+ "voyage-ai-provider": "^3.0.0"
21
27
  },
22
28
  "dependencies": {
23
29
  "@clack/prompts": "^0.11.0",
@@ -18,19 +18,18 @@
18
18
  export const unrag = defineUnragConfig({
19
19
  defaults: {
20
20
  chunking: {
21
- chunkSize: 200,
22
- chunkOverlap: 40,
21
+ chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__
22
+ chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__
23
23
  },
24
24
  retrieval: {
25
- topK: 8,
25
+ topK: 8, // __UNRAG_DEFAULT_topK__
26
26
  },
27
27
  },
28
28
  embedding: {
29
29
  provider: "ai",
30
30
  config: {
31
- type: "text", // __UNRAG_EMBEDDING_TYPE__
32
31
  model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__
33
- timeoutMs: 15_000,
32
+ timeoutMs: 15_000, // __UNRAG_EMBEDDING_TIMEOUT__
34
33
  },
35
34
  },
36
35
  engine: {
@@ -41,8 +40,8 @@ export const unrag = defineUnragConfig({
41
40
  * - storeDocumentContent: whether the full original document text is stored in `documents.content`.
42
41
  */
43
42
  storage: {
44
- storeChunkContent: true,
45
- storeDocumentContent: true,
43
+ storeChunkContent: true, // __UNRAG_STORAGE_storeChunkContent__
44
+ storeDocumentContent: true, // __UNRAG_STORAGE_storeDocumentContent__
46
45
  },
47
46
  /**
48
47
  * Optional extractor modules that can process non-text assets into text outputs.
@@ -62,9 +61,10 @@ export const unrag = defineUnragConfig({
62
61
  *
63
62
  * Notes:
64
63
  * - This generated config is cost-safe by default (all extraction is off).
65
- * - `unrag init` can enable rich media + multimodal embeddings for you.
64
+ * - `unrag init --rich-media` can enable rich media ingestion for you (extractors + assetProcessing flags).
66
65
  * - Tighten fetch allowlists/limits in production if you ingest URL-based assets.
67
66
  */
67
+ // __UNRAG_ASSET_PROCESSING_BLOCK_START__
68
68
  assetProcessing: {
69
69
  onUnsupportedAsset: "skip",
70
70
  onError: "skip",
@@ -181,6 +181,7 @@ export const unrag = defineUnragConfig({
181
181
  },
182
182
  },
183
183
  },
184
+ // __UNRAG_ASSET_PROCESSING_BLOCK_END__
184
185
  },
185
186
  } as const);
186
187
 
@@ -3,6 +3,17 @@ import { ingest, planIngest } from "./ingest";
3
3
  import { retrieve } from "./retrieve";
4
4
  import { defineConfig, resolveConfig } from "./config";
5
5
  import { createAiEmbeddingProvider } from "../embedding/ai";
6
+ import { createOpenAiEmbeddingProvider } from "../embedding/openai";
7
+ import { createGoogleEmbeddingProvider } from "../embedding/google";
8
+ import { createOpenRouterEmbeddingProvider } from "../embedding/openrouter";
9
+ import { createAzureEmbeddingProvider } from "../embedding/azure";
10
+ import { createVertexEmbeddingProvider } from "../embedding/vertex";
11
+ import { createBedrockEmbeddingProvider } from "../embedding/bedrock";
12
+ import { createCohereEmbeddingProvider } from "../embedding/cohere";
13
+ import { createMistralEmbeddingProvider } from "../embedding/mistral";
14
+ import { createTogetherEmbeddingProvider } from "../embedding/together";
15
+ import { createOllamaEmbeddingProvider } from "../embedding/ollama";
16
+ import { createVoyageEmbeddingProvider } from "../embedding/voyage";
6
17
  import type {
7
18
  AssetExtractor,
8
19
  ContextEngineConfig,
@@ -70,6 +81,61 @@ export const defineUnragConfig = <T extends DefineUnragConfigInput>(config: T) =
70
81
  return embeddingProvider;
71
82
  }
72
83
 
84
+ if (config.embedding.provider === "openai") {
85
+ embeddingProvider = createOpenAiEmbeddingProvider(config.embedding.config);
86
+ return embeddingProvider;
87
+ }
88
+
89
+ if (config.embedding.provider === "google") {
90
+ embeddingProvider = createGoogleEmbeddingProvider(config.embedding.config);
91
+ return embeddingProvider;
92
+ }
93
+
94
+ if (config.embedding.provider === "openrouter") {
95
+ embeddingProvider = createOpenRouterEmbeddingProvider(config.embedding.config);
96
+ return embeddingProvider;
97
+ }
98
+
99
+ if (config.embedding.provider === "azure") {
100
+ embeddingProvider = createAzureEmbeddingProvider(config.embedding.config);
101
+ return embeddingProvider;
102
+ }
103
+
104
+ if (config.embedding.provider === "vertex") {
105
+ embeddingProvider = createVertexEmbeddingProvider(config.embedding.config);
106
+ return embeddingProvider;
107
+ }
108
+
109
+ if (config.embedding.provider === "bedrock") {
110
+ embeddingProvider = createBedrockEmbeddingProvider(config.embedding.config);
111
+ return embeddingProvider;
112
+ }
113
+
114
+ if (config.embedding.provider === "cohere") {
115
+ embeddingProvider = createCohereEmbeddingProvider(config.embedding.config);
116
+ return embeddingProvider;
117
+ }
118
+
119
+ if (config.embedding.provider === "mistral") {
120
+ embeddingProvider = createMistralEmbeddingProvider(config.embedding.config);
121
+ return embeddingProvider;
122
+ }
123
+
124
+ if (config.embedding.provider === "together") {
125
+ embeddingProvider = createTogetherEmbeddingProvider(config.embedding.config);
126
+ return embeddingProvider;
127
+ }
128
+
129
+ if (config.embedding.provider === "ollama") {
130
+ embeddingProvider = createOllamaEmbeddingProvider(config.embedding.config);
131
+ return embeddingProvider;
132
+ }
133
+
134
+ if (config.embedding.provider === "voyage") {
135
+ embeddingProvider = createVoyageEmbeddingProvider(config.embedding.config);
136
+ return embeddingProvider;
137
+ }
138
+
73
139
  embeddingProvider = config.embedding.create();
74
140
  return embeddingProvider;
75
141
  };
@@ -104,5 +170,3 @@ export const defineUnragConfig = <T extends DefineUnragConfigInput>(config: T) =
104
170
  new ContextEngine(createEngineConfig(runtime)),
105
171
  };
106
172
  };
107
-
108
-
@@ -670,6 +670,50 @@ export type UnragEmbeddingConfig =
670
670
  provider: "ai";
671
671
  config?: import("../embedding/ai").AiEmbeddingConfig;
672
672
  }
673
+ | {
674
+ provider: "openai";
675
+ config?: import("../embedding/openai").OpenAiEmbeddingConfig;
676
+ }
677
+ | {
678
+ provider: "google";
679
+ config?: import("../embedding/google").GoogleEmbeddingConfig;
680
+ }
681
+ | {
682
+ provider: "openrouter";
683
+ config?: import("../embedding/openrouter").OpenRouterEmbeddingConfig;
684
+ }
685
+ | {
686
+ provider: "azure";
687
+ config?: import("../embedding/azure").AzureEmbeddingConfig;
688
+ }
689
+ | {
690
+ provider: "vertex";
691
+ config?: import("../embedding/vertex").VertexEmbeddingConfig;
692
+ }
693
+ | {
694
+ provider: "bedrock";
695
+ config?: import("../embedding/bedrock").BedrockEmbeddingConfig;
696
+ }
697
+ | {
698
+ provider: "cohere";
699
+ config?: import("../embedding/cohere").CohereEmbeddingConfig;
700
+ }
701
+ | {
702
+ provider: "mistral";
703
+ config?: import("../embedding/mistral").MistralEmbeddingConfig;
704
+ }
705
+ | {
706
+ provider: "together";
707
+ config?: import("../embedding/together").TogetherEmbeddingConfig;
708
+ }
709
+ | {
710
+ provider: "ollama";
711
+ config?: import("../embedding/ollama").OllamaEmbeddingConfig;
712
+ }
713
+ | {
714
+ provider: "voyage";
715
+ config?: import("../embedding/voyage").VoyageEmbeddingConfig;
716
+ }
673
717
  | {
674
718
  provider: "custom";
675
719
  /**
@@ -736,5 +780,3 @@ export type ResolvedContextEngineConfig = {
736
780
  storage: ContentStorageConfig;
737
781
  assetProcessing: AssetProcessingConfig;
738
782
  };
739
-
740
-
@@ -8,9 +8,14 @@ Unrag installs a small RAG module into your codebase with:
8
8
 
9
9
  Add these to your environment:
10
10
  - `DATABASE_URL` (Postgres connection string)
11
- - `AI_GATEWAY_API_KEY` (required by the `ai` SDK when using Vercel AI Gateway)
11
+ - (Embedding) set the environment variables required by your selected provider.
12
+
13
+ If you used the default provider (Vercel AI Gateway):
14
+ - `AI_GATEWAY_API_KEY`
12
15
  - Optional: `AI_GATEWAY_MODEL` (defaults to `openai/text-embedding-3-small`)
13
16
 
17
+ If you picked a different provider (OpenAI / Google / Voyage / etc.), see the installed provider docs under your Unrag docs site (`/docs/providers/*`).
18
+
14
19
  ## Database requirements
15
20
 
16
21
  Enable pgvector:
@@ -0,0 +1,20 @@
1
+ import { createRequire } from "node:module";
2
+
3
+ const require = createRequire(import.meta.url);
4
+
5
+ export function requireOptional<T = any>(args: {
6
+ id: string;
7
+ installHint: string;
8
+ providerName: string;
9
+ }): T {
10
+ try {
11
+ return require(args.id) as T;
12
+ } catch {
13
+ throw new Error(
14
+ `Unrag embedding provider "${args.providerName}" requires "${args.id}" to be installed.\n` +
15
+ `Install it with: ${args.installHint}`
16
+ );
17
+ }
18
+ }
19
+
20
+
@@ -1,5 +1,5 @@
1
1
  import { embed, embedMany } from "ai";
2
- import type { EmbeddingProvider, ImageEmbeddingInput } from "../core/types";
2
+ import type { EmbeddingProvider } from "../core/types";
3
3
 
4
4
  type BaseConfig = {
5
5
  /**
@@ -9,52 +9,18 @@ type BaseConfig = {
9
9
  timeoutMs?: number;
10
10
  };
11
11
 
12
- export type AiEmbeddingConfig =
13
- | (BaseConfig & {
14
- /**
15
- * Defaults to "text" for backwards compatibility.
16
- * - "text": only supports embedding strings
17
- * - "multimodal": additionally enables `embedImage()` for image assets (when the model supports it)
18
- */
19
- type?: "text";
20
- })
21
- | (BaseConfig & {
22
- type: "multimodal";
23
- /**
24
- * Controls how images are translated into AI SDK `embed()` values.
25
- * Different providers use different shapes; this is the escape hatch.
26
- */
27
- image?: {
28
- value?: (input: ImageEmbeddingInput) => unknown;
29
- };
30
- });
12
+ /**
13
+ * Text-only embedding config for the AI SDK provider.
14
+ */
15
+ export type AiEmbeddingConfig = BaseConfig;
31
16
 
32
17
  const DEFAULT_TEXT_MODEL = "openai/text-embedding-3-small";
33
- const DEFAULT_MULTIMODAL_MODEL = "cohere/embed-v4.0";
34
-
35
- const bytesToDataUrl = (bytes: Uint8Array, mediaType: string) => {
36
- const base64 = Buffer.from(bytes).toString("base64");
37
- return `data:${mediaType};base64,${base64}`;
38
- };
39
-
40
- const defaultImageValue = (input: ImageEmbeddingInput) => {
41
- const v =
42
- typeof input.data === "string"
43
- ? input.data
44
- : bytesToDataUrl(input.data, input.mediaType ?? "image/jpeg");
45
- // This matches common AI Gateway multimodal embedding inputs where
46
- // the embedding value is an object containing one or more images.
47
- return { image: [v] };
48
- };
49
18
 
50
19
  export const createAiEmbeddingProvider = (
51
20
  config: AiEmbeddingConfig = {}
52
21
  ): EmbeddingProvider => {
53
- const type = (config as any).type ?? "text";
54
22
  const model =
55
- config.model ??
56
- process.env.AI_GATEWAY_MODEL ??
57
- (type === "multimodal" ? DEFAULT_MULTIMODAL_MODEL : DEFAULT_TEXT_MODEL);
23
+ config.model ?? process.env.AI_GATEWAY_MODEL ?? DEFAULT_TEXT_MODEL;
58
24
  const timeoutMs = config.timeoutMs;
59
25
 
60
26
  return {
@@ -93,31 +59,6 @@ export const createAiEmbeddingProvider = (
93
59
  }
94
60
  return embeddings;
95
61
  },
96
- ...(type === "multimodal"
97
- ? {
98
- embedImage: async (input: ImageEmbeddingInput) => {
99
- const abortSignal = timeoutMs
100
- ? AbortSignal.timeout(timeoutMs)
101
- : undefined;
102
-
103
- const imageValue =
104
- (config as any)?.image?.value?.(input) ?? defaultImageValue(input);
105
-
106
- const result = await embed({
107
- model,
108
- value: imageValue,
109
- ...(abortSignal ? { abortSignal } : {}),
110
- });
111
-
112
- if (!result.embedding) {
113
- throw new Error("Embedding missing from AI SDK response");
114
- }
115
-
116
- return result.embedding;
117
- },
118
- }
119
- : {}),
120
62
  };
121
63
  };
122
64
 
123
-
@@ -0,0 +1,79 @@
1
+ import { embed, embedMany } from "ai";
2
+ import type { EmbeddingProvider } from "../core/types";
3
+ import { requireOptional } from "./_shared";
4
+
5
+ export type AzureEmbeddingConfig = {
6
+ model?: string;
7
+ timeoutMs?: number;
8
+ dimensions?: number;
9
+ user?: string;
10
+ };
11
+
12
+ const DEFAULT_TEXT_MODEL = "text-embedding-3-small";
13
+
14
+ const buildProviderOptions = (config: AzureEmbeddingConfig) => {
15
+ if (config.dimensions === undefined && config.user === undefined) {
16
+ return undefined;
17
+ }
18
+ return {
19
+ openai: {
20
+ ...(config.dimensions !== undefined ? { dimensions: config.dimensions } : {}),
21
+ ...(config.user ? { user: config.user } : {}),
22
+ },
23
+ };
24
+ };
25
+
26
+ export const createAzureEmbeddingProvider = (
27
+ config: AzureEmbeddingConfig = {}
28
+ ): EmbeddingProvider => {
29
+ const { azure } = requireOptional<any>({
30
+ id: "@ai-sdk/azure",
31
+ installHint: "bun add @ai-sdk/azure",
32
+ providerName: "azure",
33
+ });
34
+ const model =
35
+ config.model ?? process.env.AZURE_EMBEDDING_MODEL ?? DEFAULT_TEXT_MODEL;
36
+ const timeoutMs = config.timeoutMs;
37
+ const providerOptions = buildProviderOptions(config);
38
+ const embeddingModel = azure.embedding(model);
39
+
40
+ return {
41
+ name: `azure:${model}`,
42
+ dimensions: config.dimensions,
43
+ embed: async ({ text }) => {
44
+ const abortSignal = timeoutMs
45
+ ? AbortSignal.timeout(timeoutMs)
46
+ : undefined;
47
+
48
+ const result = await embed({
49
+ model: embeddingModel,
50
+ value: text,
51
+ ...(providerOptions ? { providerOptions } : {}),
52
+ ...(abortSignal ? { abortSignal } : {}),
53
+ });
54
+
55
+ if (!result.embedding) {
56
+ throw new Error("Embedding missing from Azure OpenAI response");
57
+ }
58
+
59
+ return result.embedding;
60
+ },
61
+ embedMany: async (inputs) => {
62
+ const values = inputs.map((i) => i.text);
63
+ const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
64
+
65
+ const result = await embedMany({
66
+ model: embeddingModel,
67
+ values,
68
+ ...(providerOptions ? { providerOptions } : {}),
69
+ ...(abortSignal ? { abortSignal } : {}),
70
+ });
71
+
72
+ const { embeddings } = result;
73
+ if (!Array.isArray(embeddings)) {
74
+ throw new Error("Embeddings missing from Azure OpenAI embedMany response");
75
+ }
76
+ return embeddings;
77
+ },
78
+ };
79
+ };
@@ -0,0 +1,79 @@
1
+ import { embed, embedMany } from "ai";
2
+ import type { EmbeddingProvider } from "../core/types";
3
+ import { requireOptional } from "./_shared";
4
+
5
+ export type BedrockEmbeddingConfig = {
6
+ model?: string;
7
+ timeoutMs?: number;
8
+ dimensions?: number;
9
+ normalize?: boolean;
10
+ };
11
+
12
+ const DEFAULT_TEXT_MODEL = "amazon.titan-embed-text-v2:0";
13
+
14
+ const buildProviderOptions = (config: BedrockEmbeddingConfig) => {
15
+ if (config.dimensions === undefined && config.normalize === undefined) {
16
+ return undefined;
17
+ }
18
+ return {
19
+ bedrock: {
20
+ ...(config.dimensions !== undefined ? { dimensions: config.dimensions } : {}),
21
+ ...(config.normalize !== undefined ? { normalize: config.normalize } : {}),
22
+ },
23
+ };
24
+ };
25
+
26
+ export const createBedrockEmbeddingProvider = (
27
+ config: BedrockEmbeddingConfig = {}
28
+ ): EmbeddingProvider => {
29
+ const { bedrock } = requireOptional<any>({
30
+ id: "@ai-sdk/amazon-bedrock",
31
+ installHint: "bun add @ai-sdk/amazon-bedrock",
32
+ providerName: "bedrock",
33
+ });
34
+ const model =
35
+ config.model ?? process.env.BEDROCK_EMBEDDING_MODEL ?? DEFAULT_TEXT_MODEL;
36
+ const timeoutMs = config.timeoutMs;
37
+ const providerOptions = buildProviderOptions(config);
38
+ const embeddingModel = bedrock.embedding(model);
39
+
40
+ return {
41
+ name: `bedrock:${model}`,
42
+ dimensions: config.dimensions,
43
+ embed: async ({ text }) => {
44
+ const abortSignal = timeoutMs
45
+ ? AbortSignal.timeout(timeoutMs)
46
+ : undefined;
47
+
48
+ const result = await embed({
49
+ model: embeddingModel,
50
+ value: text,
51
+ ...(providerOptions ? { providerOptions } : {}),
52
+ ...(abortSignal ? { abortSignal } : {}),
53
+ });
54
+
55
+ if (!result.embedding) {
56
+ throw new Error("Embedding missing from Bedrock response");
57
+ }
58
+
59
+ return result.embedding;
60
+ },
61
+ embedMany: async (inputs) => {
62
+ const values = inputs.map((i) => i.text);
63
+ const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
64
+
65
+ const result = await embedMany({
66
+ model: embeddingModel,
67
+ values,
68
+ ...(providerOptions ? { providerOptions } : {}),
69
+ ...(abortSignal ? { abortSignal } : {}),
70
+ });
71
+
72
+ const { embeddings } = result;
73
+ if (!Array.isArray(embeddings)) {
74
+ throw new Error("Embeddings missing from Bedrock embedMany response");
75
+ }
76
+ return embeddings;
77
+ },
78
+ };
79
+ };
@@ -0,0 +1,79 @@
1
+ import { embed, embedMany } from "ai";
2
+ import type { EmbeddingProvider } from "../core/types";
3
+ import { requireOptional } from "./_shared";
4
+
5
+ export type CohereEmbeddingConfig = {
6
+ model?: string;
7
+ timeoutMs?: number;
8
+ inputType?: "search_document" | "search_query" | "classification" | "clustering";
9
+ truncate?: "NONE" | "START" | "END";
10
+ };
11
+
12
+ const DEFAULT_TEXT_MODEL = "embed-english-v3.0";
13
+
14
+ const buildProviderOptions = (config: CohereEmbeddingConfig) => {
15
+ if (!config.inputType && !config.truncate) {
16
+ return undefined;
17
+ }
18
+ return {
19
+ cohere: {
20
+ ...(config.inputType ? { inputType: config.inputType } : {}),
21
+ ...(config.truncate ? { truncate: config.truncate } : {}),
22
+ },
23
+ };
24
+ };
25
+
26
+ export const createCohereEmbeddingProvider = (
27
+ config: CohereEmbeddingConfig = {}
28
+ ): EmbeddingProvider => {
29
+ const { cohere } = requireOptional<any>({
30
+ id: "@ai-sdk/cohere",
31
+ installHint: "bun add @ai-sdk/cohere",
32
+ providerName: "cohere",
33
+ });
34
+ const model =
35
+ config.model ?? process.env.COHERE_EMBEDDING_MODEL ?? DEFAULT_TEXT_MODEL;
36
+ const timeoutMs = config.timeoutMs;
37
+ const providerOptions = buildProviderOptions(config);
38
+ const embeddingModel = cohere.embedding(model);
39
+
40
+ return {
41
+ name: `cohere:${model}`,
42
+ dimensions: undefined,
43
+ embed: async ({ text }) => {
44
+ const abortSignal = timeoutMs
45
+ ? AbortSignal.timeout(timeoutMs)
46
+ : undefined;
47
+
48
+ const result = await embed({
49
+ model: embeddingModel,
50
+ value: text,
51
+ ...(providerOptions ? { providerOptions } : {}),
52
+ ...(abortSignal ? { abortSignal } : {}),
53
+ });
54
+
55
+ if (!result.embedding) {
56
+ throw new Error("Embedding missing from Cohere response");
57
+ }
58
+
59
+ return result.embedding;
60
+ },
61
+ embedMany: async (inputs) => {
62
+ const values = inputs.map((i) => i.text);
63
+ const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
64
+
65
+ const result = await embedMany({
66
+ model: embeddingModel,
67
+ values,
68
+ ...(providerOptions ? { providerOptions } : {}),
69
+ ...(abortSignal ? { abortSignal } : {}),
70
+ });
71
+
72
+ const { embeddings } = result;
73
+ if (!Array.isArray(embeddings)) {
74
+ throw new Error("Embeddings missing from Cohere embedMany response");
75
+ }
76
+ return embeddings;
77
+ },
78
+ };
79
+ };
@@ -0,0 +1,93 @@
1
+ import { embed, embedMany } from "ai";
2
+ import type { EmbeddingProvider } from "../core/types";
3
+ import { requireOptional } from "./_shared";
4
+
5
+ export type GoogleEmbeddingTaskType =
6
+ | "SEMANTIC_SIMILARITY"
7
+ | "CLASSIFICATION"
8
+ | "CLUSTERING"
9
+ | "RETRIEVAL_DOCUMENT"
10
+ | "RETRIEVAL_QUERY"
11
+ | "QUESTION_ANSWERING"
12
+ | "FACT_VERIFICATION"
13
+ | "CODE_RETRIEVAL_QUERY";
14
+
15
+ export type GoogleEmbeddingConfig = {
16
+ model?: string;
17
+ timeoutMs?: number;
18
+ outputDimensionality?: number;
19
+ taskType?: GoogleEmbeddingTaskType;
20
+ };
21
+
22
+ const DEFAULT_TEXT_MODEL = "gemini-embedding-001";
23
+
24
+ const buildProviderOptions = (config: GoogleEmbeddingConfig) => {
25
+ if (config.outputDimensionality === undefined && config.taskType === undefined) {
26
+ return undefined;
27
+ }
28
+ return {
29
+ google: {
30
+ ...(config.outputDimensionality !== undefined
31
+ ? { outputDimensionality: config.outputDimensionality }
32
+ : {}),
33
+ ...(config.taskType ? { taskType: config.taskType } : {}),
34
+ },
35
+ };
36
+ };
37
+
38
+ export const createGoogleEmbeddingProvider = (
39
+ config: GoogleEmbeddingConfig = {}
40
+ ): EmbeddingProvider => {
41
+ const { google } = requireOptional<any>({
42
+ id: "@ai-sdk/google",
43
+ installHint: "bun add @ai-sdk/google",
44
+ providerName: "google",
45
+ });
46
+ const model =
47
+ config.model ??
48
+ process.env.GOOGLE_GENERATIVE_AI_EMBEDDING_MODEL ??
49
+ DEFAULT_TEXT_MODEL;
50
+ const timeoutMs = config.timeoutMs;
51
+ const providerOptions = buildProviderOptions(config);
52
+ const embeddingModel = google.embedding(model);
53
+
54
+ return {
55
+ name: `google:${model}`,
56
+ dimensions: config.outputDimensionality,
57
+ embed: async ({ text }) => {
58
+ const abortSignal = timeoutMs
59
+ ? AbortSignal.timeout(timeoutMs)
60
+ : undefined;
61
+
62
+ const result = await embed({
63
+ model: embeddingModel,
64
+ value: text,
65
+ ...(providerOptions ? { providerOptions } : {}),
66
+ ...(abortSignal ? { abortSignal } : {}),
67
+ });
68
+
69
+ if (!result.embedding) {
70
+ throw new Error("Embedding missing from Google response");
71
+ }
72
+
73
+ return result.embedding;
74
+ },
75
+ embedMany: async (inputs) => {
76
+ const values = inputs.map((i) => i.text);
77
+ const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
78
+
79
+ const result = await embedMany({
80
+ model: embeddingModel,
81
+ values,
82
+ ...(providerOptions ? { providerOptions } : {}),
83
+ ...(abortSignal ? { abortSignal } : {}),
84
+ });
85
+
86
+ const { embeddings } = result;
87
+ if (!Array.isArray(embeddings)) {
88
+ throw new Error("Embeddings missing from Google embedMany response");
89
+ }
90
+ return embeddings;
91
+ },
92
+ };
93
+ };