unrag 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/registry/connectors/google-drive/_api-types.ts +60 -0
- package/registry/connectors/google-drive/client.ts +99 -38
- package/registry/connectors/google-drive/sync.ts +97 -69
- package/registry/connectors/google-drive/types.ts +76 -37
- package/registry/connectors/notion/client.ts +12 -3
- package/registry/connectors/notion/render.ts +62 -23
- package/registry/connectors/notion/sync.ts +30 -23
- package/registry/core/assets.ts +11 -10
- package/registry/core/config.ts +10 -25
- package/registry/core/context-engine.ts +5 -0
- package/registry/core/deep-merge.ts +45 -0
- package/registry/core/ingest.ts +117 -44
- package/registry/core/types.ts +52 -0
- package/registry/embedding/_shared.ts +6 -1
- package/registry/embedding/ai.ts +2 -3
- package/registry/embedding/azure.ts +11 -2
- package/registry/embedding/bedrock.ts +11 -2
- package/registry/embedding/cohere.ts +11 -2
- package/registry/embedding/google.ts +11 -2
- package/registry/embedding/mistral.ts +11 -2
- package/registry/embedding/ollama.ts +18 -3
- package/registry/embedding/openai.ts +11 -2
- package/registry/embedding/openrouter.ts +53 -11
- package/registry/embedding/together.ts +15 -5
- package/registry/embedding/vertex.ts +11 -2
- package/registry/embedding/voyage.ts +16 -6
- package/registry/extractors/audio-transcribe/index.ts +39 -23
- package/registry/extractors/file-docx/index.ts +8 -1
- package/registry/extractors/file-pptx/index.ts +22 -1
- package/registry/extractors/file-xlsx/index.ts +24 -1
- package/registry/extractors/image-caption-llm/index.ts +8 -3
- package/registry/extractors/image-ocr/index.ts +9 -4
- package/registry/extractors/pdf-llm/index.ts +9 -4
- package/registry/extractors/pdf-text-layer/index.ts +23 -2
- package/registry/extractors/video-frames/index.ts +8 -3
- package/registry/extractors/video-transcribe/index.ts +40 -24
- package/registry/manifest.json +6 -6
- package/registry/store/drizzle-postgres-pgvector/store.ts +24 -7
package/registry/core/ingest.ts
CHANGED
|
@@ -9,37 +9,13 @@ import type {
|
|
|
9
9
|
IngestInput,
|
|
10
10
|
IngestResult,
|
|
11
11
|
IngestWarning,
|
|
12
|
+
Metadata,
|
|
12
13
|
ResolvedContextEngineConfig,
|
|
13
14
|
} from "./types";
|
|
15
|
+
import { mergeDeep } from "./deep-merge";
|
|
14
16
|
|
|
15
17
|
const now = () => performance.now();
|
|
16
18
|
|
|
17
|
-
const mergeDeep = <T extends Record<string, any>>(
|
|
18
|
-
base: T,
|
|
19
|
-
overrides: any | undefined
|
|
20
|
-
): T => {
|
|
21
|
-
if (!overrides) return base;
|
|
22
|
-
const out: any = Array.isArray(base) ? [...base] : { ...base };
|
|
23
|
-
for (const key of Object.keys(overrides)) {
|
|
24
|
-
const nextVal = overrides[key];
|
|
25
|
-
if (nextVal === undefined) continue;
|
|
26
|
-
const baseVal = (base as any)[key];
|
|
27
|
-
if (
|
|
28
|
-
baseVal &&
|
|
29
|
-
typeof baseVal === "object" &&
|
|
30
|
-
!Array.isArray(baseVal) &&
|
|
31
|
-
nextVal &&
|
|
32
|
-
typeof nextVal === "object" &&
|
|
33
|
-
!Array.isArray(nextVal)
|
|
34
|
-
) {
|
|
35
|
-
out[key] = mergeDeep(baseVal, nextVal);
|
|
36
|
-
} else {
|
|
37
|
-
out[key] = nextVal;
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
return out as T;
|
|
41
|
-
};
|
|
42
|
-
|
|
43
19
|
const asMessage = (err: unknown) => {
|
|
44
20
|
if (err instanceof Error) return err.message;
|
|
45
21
|
try {
|
|
@@ -123,7 +99,7 @@ export const ingest = async (
|
|
|
123
99
|
|
|
124
100
|
const assets: AssetInput[] = Array.isArray(input.assets) ? input.assets : [];
|
|
125
101
|
type PreparedChunkSpec = Omit<Chunk, "id" | "index"> & {
|
|
126
|
-
metadata:
|
|
102
|
+
metadata: Metadata;
|
|
127
103
|
embed:
|
|
128
104
|
| { kind: "text"; text: string }
|
|
129
105
|
| { kind: "image"; data: Uint8Array | string; mediaType?: string; assetId?: string };
|
|
@@ -140,7 +116,7 @@ export const ingest = async (
|
|
|
140
116
|
|
|
141
117
|
const runExtractors = async (args: {
|
|
142
118
|
asset: AssetInput;
|
|
143
|
-
assetMeta:
|
|
119
|
+
assetMeta: Metadata;
|
|
144
120
|
assetUri?: string;
|
|
145
121
|
assetMediaType?: string;
|
|
146
122
|
extractors: AssetExtractor[];
|
|
@@ -528,14 +504,41 @@ export const ingest = async (
|
|
|
528
504
|
const chunkingMs = now() - chunkingStart;
|
|
529
505
|
const embeddingStart = now();
|
|
530
506
|
|
|
531
|
-
const embeddedChunks =
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
507
|
+
const embeddedChunks: Chunk[] = new Array(prepared.length);
|
|
508
|
+
|
|
509
|
+
const textSpecs: Array<{
|
|
510
|
+
idx: number;
|
|
511
|
+
chunk: Chunk;
|
|
512
|
+
input: {
|
|
513
|
+
text: string;
|
|
514
|
+
metadata: Metadata;
|
|
515
|
+
position: number;
|
|
516
|
+
sourceId: string;
|
|
517
|
+
documentId: string;
|
|
518
|
+
};
|
|
519
|
+
}> = [];
|
|
520
|
+
|
|
521
|
+
const imageSpecs: Array<{
|
|
522
|
+
idx: number;
|
|
523
|
+
chunk: Chunk;
|
|
524
|
+
input: {
|
|
525
|
+
data: Uint8Array | string;
|
|
526
|
+
mediaType?: string;
|
|
527
|
+
metadata: Metadata;
|
|
528
|
+
position: number;
|
|
529
|
+
sourceId: string;
|
|
530
|
+
documentId: string;
|
|
531
|
+
assetId?: string;
|
|
532
|
+
};
|
|
533
|
+
}> = [];
|
|
534
|
+
|
|
535
|
+
for (let i = 0; i < prepared.length; i++) {
|
|
536
|
+
const { chunk, embed } = prepared[i]!;
|
|
537
|
+
if (embed.kind === "image") {
|
|
538
|
+
imageSpecs.push({
|
|
539
|
+
idx: i,
|
|
540
|
+
chunk,
|
|
541
|
+
input: {
|
|
539
542
|
data: embed.data,
|
|
540
543
|
mediaType: embed.mediaType,
|
|
541
544
|
metadata: chunk.metadata,
|
|
@@ -543,21 +546,91 @@ export const ingest = async (
|
|
|
543
546
|
sourceId: chunk.sourceId,
|
|
544
547
|
documentId: chunk.documentId,
|
|
545
548
|
assetId: embed.assetId,
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
|
|
549
|
+
},
|
|
550
|
+
});
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
549
553
|
|
|
550
|
-
|
|
554
|
+
textSpecs.push({
|
|
555
|
+
idx: i,
|
|
556
|
+
chunk,
|
|
557
|
+
input: {
|
|
551
558
|
text: embed.text,
|
|
552
559
|
metadata: chunk.metadata,
|
|
553
560
|
position: chunk.index,
|
|
554
561
|
sourceId: chunk.sourceId,
|
|
555
562
|
documentId: chunk.documentId,
|
|
556
|
-
}
|
|
563
|
+
},
|
|
564
|
+
});
|
|
565
|
+
}
|
|
557
566
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
)
|
|
567
|
+
const concurrency = config.embeddingProcessing.concurrency;
|
|
568
|
+
|
|
569
|
+
// Text embeddings (prefer batch when supported).
|
|
570
|
+
if (textSpecs.length > 0) {
|
|
571
|
+
const embedMany = config.embedding.embedMany;
|
|
572
|
+
if (embedMany) {
|
|
573
|
+
const batchSize = Math.max(1, Math.floor(config.embeddingProcessing.batchSize || 1));
|
|
574
|
+
const batches: Array<typeof textSpecs> = [];
|
|
575
|
+
for (let i = 0; i < textSpecs.length; i += batchSize) {
|
|
576
|
+
batches.push(textSpecs.slice(i, i + batchSize));
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
const batchEmbeddings = await mapWithConcurrency(
|
|
580
|
+
batches,
|
|
581
|
+
concurrency,
|
|
582
|
+
async (batch) => {
|
|
583
|
+
const embeddings = await embedMany(batch.map((b) => b.input));
|
|
584
|
+
if (!Array.isArray(embeddings) || embeddings.length !== batch.length) {
|
|
585
|
+
throw new Error(
|
|
586
|
+
`embedMany() returned ${Array.isArray(embeddings) ? embeddings.length : "non-array"} embeddings for a batch of ${batch.length}`
|
|
587
|
+
);
|
|
588
|
+
}
|
|
589
|
+
return embeddings;
|
|
590
|
+
}
|
|
591
|
+
);
|
|
592
|
+
|
|
593
|
+
let batchIdx = 0;
|
|
594
|
+
for (const batch of batches) {
|
|
595
|
+
const embeddings = batchEmbeddings[batchIdx++]!;
|
|
596
|
+
for (let i = 0; i < batch.length; i++) {
|
|
597
|
+
const spec = batch[i]!;
|
|
598
|
+
embeddedChunks[spec.idx] = { ...spec.chunk, embedding: embeddings[i]! };
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
} else {
|
|
602
|
+
const embeddings = await mapWithConcurrency(textSpecs, concurrency, async (spec) =>
|
|
603
|
+
config.embedding.embed(spec.input)
|
|
604
|
+
);
|
|
605
|
+
for (let i = 0; i < textSpecs.length; i++) {
|
|
606
|
+
const spec = textSpecs[i]!;
|
|
607
|
+
embeddedChunks[spec.idx] = { ...spec.chunk, embedding: embeddings[i]! };
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// Image embeddings (bounded concurrency).
|
|
613
|
+
if (imageSpecs.length > 0) {
|
|
614
|
+
const embedImage = config.embedding.embedImage;
|
|
615
|
+
if (!embedImage) {
|
|
616
|
+
throw new Error("Image embedding requested but provider does not support embedImage()");
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
const embeddings = await mapWithConcurrency(imageSpecs, concurrency, async (spec) =>
|
|
620
|
+
embedImage(spec.input)
|
|
621
|
+
);
|
|
622
|
+
for (let i = 0; i < imageSpecs.length; i++) {
|
|
623
|
+
const spec = imageSpecs[i]!;
|
|
624
|
+
embeddedChunks[spec.idx] = { ...spec.chunk, embedding: embeddings[i]! };
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
// Safety check: ensure all chunks got an embedding.
|
|
629
|
+
for (let i = 0; i < embeddedChunks.length; i++) {
|
|
630
|
+
if (!embeddedChunks[i]) {
|
|
631
|
+
throw new Error("Internal error: missing embedding for one or more chunks");
|
|
632
|
+
}
|
|
633
|
+
}
|
|
561
634
|
|
|
562
635
|
const embeddingMs = now() - embeddingStart;
|
|
563
636
|
const storageStart = now();
|
package/registry/core/types.ts
CHANGED
|
@@ -5,6 +5,30 @@ export type Metadata = Record<
|
|
|
5
5
|
MetadataValue | MetadataValue[] | undefined
|
|
6
6
|
>;
|
|
7
7
|
|
|
8
|
+
/**
|
|
9
|
+
* Standard fields for asset-related metadata.
|
|
10
|
+
* These are added to chunk metadata when chunks are derived from assets.
|
|
11
|
+
*/
|
|
12
|
+
export interface AssetMetadataFields {
|
|
13
|
+
assetKind?: "image" | "pdf" | "audio" | "video" | "file";
|
|
14
|
+
assetId?: string;
|
|
15
|
+
assetUri?: string;
|
|
16
|
+
assetMediaType?: string;
|
|
17
|
+
extractor?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Type guard for checking if metadata contains required asset fields.
|
|
22
|
+
*/
|
|
23
|
+
export function hasAssetMetadata(
|
|
24
|
+
metadata: Metadata
|
|
25
|
+
): metadata is Metadata & Required<Pick<AssetMetadataFields, "assetKind" | "assetId">> {
|
|
26
|
+
return (
|
|
27
|
+
typeof metadata.assetKind === "string" &&
|
|
28
|
+
typeof metadata.assetId === "string"
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
8
32
|
export type Chunk = {
|
|
9
33
|
id: string;
|
|
10
34
|
documentId: string;
|
|
@@ -31,6 +55,24 @@ export type ContentStorageConfig = {
|
|
|
31
55
|
storeDocumentContent: boolean;
|
|
32
56
|
};
|
|
33
57
|
|
|
58
|
+
/**
|
|
59
|
+
* Controls performance characteristics of embedding during ingest.
|
|
60
|
+
*
|
|
61
|
+
* These defaults are intentionally conservative to reduce rate-limit risk.
|
|
62
|
+
*/
|
|
63
|
+
export type EmbeddingProcessingConfig = {
|
|
64
|
+
/**
|
|
65
|
+
* Maximum number of concurrent embedding requests.
|
|
66
|
+
* This applies to both text embedding (embed/embedMany) and image embedding (embedImage).
|
|
67
|
+
*/
|
|
68
|
+
concurrency: number;
|
|
69
|
+
/**
|
|
70
|
+
* Max number of text chunks per embedMany batch (when embedMany is supported).
|
|
71
|
+
* Ignored when the provider does not implement embedMany().
|
|
72
|
+
*/
|
|
73
|
+
batchSize: number;
|
|
74
|
+
};
|
|
75
|
+
|
|
34
76
|
export type ChunkText = {
|
|
35
77
|
index: number;
|
|
36
78
|
content: string;
|
|
@@ -655,6 +697,11 @@ export type RetrieveResult = {
|
|
|
655
697
|
*/
|
|
656
698
|
export type UnragDefaultsConfig = {
|
|
657
699
|
chunking?: Partial<ChunkingOptions>;
|
|
700
|
+
/**
|
|
701
|
+
* Embedding performance defaults (batching + concurrency).
|
|
702
|
+
* These map to the engine's `embeddingProcessing` config.
|
|
703
|
+
*/
|
|
704
|
+
embedding?: Partial<EmbeddingProcessingConfig>;
|
|
658
705
|
retrieval?: {
|
|
659
706
|
topK?: number;
|
|
660
707
|
};
|
|
@@ -768,6 +815,10 @@ export type ContextEngineConfig = {
|
|
|
768
815
|
* captions, which can still be ingested via `assets[].text` if you choose).
|
|
769
816
|
*/
|
|
770
817
|
assetProcessing?: DeepPartial<AssetProcessingConfig>;
|
|
818
|
+
/**
|
|
819
|
+
* Embedding performance defaults for ingest (batching + concurrency).
|
|
820
|
+
*/
|
|
821
|
+
embeddingProcessing?: DeepPartial<EmbeddingProcessingConfig>;
|
|
771
822
|
};
|
|
772
823
|
|
|
773
824
|
export type ResolvedContextEngineConfig = {
|
|
@@ -779,4 +830,5 @@ export type ResolvedContextEngineConfig = {
|
|
|
779
830
|
extractors: AssetExtractor[];
|
|
780
831
|
storage: ContentStorageConfig;
|
|
781
832
|
assetProcessing: AssetProcessingConfig;
|
|
833
|
+
embeddingProcessing: EmbeddingProcessingConfig;
|
|
782
834
|
};
|
|
@@ -2,7 +2,12 @@ import { createRequire } from "node:module";
|
|
|
2
2
|
|
|
3
3
|
const require = createRequire(import.meta.url);
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
/**
|
|
6
|
+
* Dynamically require an optional dependency with type-safe return.
|
|
7
|
+
*
|
|
8
|
+
* @template T - The expected module type (callers must define this)
|
|
9
|
+
*/
|
|
10
|
+
export function requireOptional<T>(args: {
|
|
6
11
|
id: string;
|
|
7
12
|
installHint: string;
|
|
8
13
|
providerName: string;
|
package/registry/embedding/ai.ts
CHANGED
|
@@ -53,11 +53,10 @@ export const createAiEmbeddingProvider = (
|
|
|
53
53
|
...(abortSignal ? { abortSignal } : {}),
|
|
54
54
|
});
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
if (!embeddings) {
|
|
56
|
+
if (!result.embeddings || result.embeddings.length === 0) {
|
|
58
57
|
throw new Error("Embeddings missing from AI SDK embedMany response");
|
|
59
58
|
}
|
|
60
|
-
return embeddings;
|
|
59
|
+
return result.embeddings;
|
|
61
60
|
},
|
|
62
61
|
};
|
|
63
62
|
};
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Azure OpenAI provider module interface.
|
|
7
|
+
*/
|
|
8
|
+
interface AzureModule {
|
|
9
|
+
azure: {
|
|
10
|
+
embedding: (model: string) => EmbeddingModel<string>;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
5
14
|
export type AzureEmbeddingConfig = {
|
|
6
15
|
model?: string;
|
|
7
16
|
timeoutMs?: number;
|
|
@@ -26,7 +35,7 @@ const buildProviderOptions = (config: AzureEmbeddingConfig) => {
|
|
|
26
35
|
export const createAzureEmbeddingProvider = (
|
|
27
36
|
config: AzureEmbeddingConfig = {}
|
|
28
37
|
): EmbeddingProvider => {
|
|
29
|
-
const { azure } = requireOptional<
|
|
38
|
+
const { azure } = requireOptional<AzureModule>({
|
|
30
39
|
id: "@ai-sdk/azure",
|
|
31
40
|
installHint: "bun add @ai-sdk/azure",
|
|
32
41
|
providerName: "azure",
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Amazon Bedrock provider module interface.
|
|
7
|
+
*/
|
|
8
|
+
interface BedrockModule {
|
|
9
|
+
bedrock: {
|
|
10
|
+
embedding: (model: string) => EmbeddingModel<string>;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
5
14
|
export type BedrockEmbeddingConfig = {
|
|
6
15
|
model?: string;
|
|
7
16
|
timeoutMs?: number;
|
|
@@ -26,7 +35,7 @@ const buildProviderOptions = (config: BedrockEmbeddingConfig) => {
|
|
|
26
35
|
export const createBedrockEmbeddingProvider = (
|
|
27
36
|
config: BedrockEmbeddingConfig = {}
|
|
28
37
|
): EmbeddingProvider => {
|
|
29
|
-
const { bedrock } = requireOptional<
|
|
38
|
+
const { bedrock } = requireOptional<BedrockModule>({
|
|
30
39
|
id: "@ai-sdk/amazon-bedrock",
|
|
31
40
|
installHint: "bun add @ai-sdk/amazon-bedrock",
|
|
32
41
|
providerName: "bedrock",
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Cohere provider module interface.
|
|
7
|
+
*/
|
|
8
|
+
interface CohereModule {
|
|
9
|
+
cohere: {
|
|
10
|
+
embedding: (model: string) => EmbeddingModel<string>;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
5
14
|
export type CohereEmbeddingConfig = {
|
|
6
15
|
model?: string;
|
|
7
16
|
timeoutMs?: number;
|
|
@@ -26,7 +35,7 @@ const buildProviderOptions = (config: CohereEmbeddingConfig) => {
|
|
|
26
35
|
export const createCohereEmbeddingProvider = (
|
|
27
36
|
config: CohereEmbeddingConfig = {}
|
|
28
37
|
): EmbeddingProvider => {
|
|
29
|
-
const { cohere } = requireOptional<
|
|
38
|
+
const { cohere } = requireOptional<CohereModule>({
|
|
30
39
|
id: "@ai-sdk/cohere",
|
|
31
40
|
installHint: "bun add @ai-sdk/cohere",
|
|
32
41
|
providerName: "cohere",
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Google AI provider module interface.
|
|
7
|
+
*/
|
|
8
|
+
interface GoogleModule {
|
|
9
|
+
google: {
|
|
10
|
+
embedding: (model: string) => EmbeddingModel<string>;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
5
14
|
export type GoogleEmbeddingTaskType =
|
|
6
15
|
| "SEMANTIC_SIMILARITY"
|
|
7
16
|
| "CLASSIFICATION"
|
|
@@ -38,7 +47,7 @@ const buildProviderOptions = (config: GoogleEmbeddingConfig) => {
|
|
|
38
47
|
export const createGoogleEmbeddingProvider = (
|
|
39
48
|
config: GoogleEmbeddingConfig = {}
|
|
40
49
|
): EmbeddingProvider => {
|
|
41
|
-
const { google } = requireOptional<
|
|
50
|
+
const { google } = requireOptional<GoogleModule>({
|
|
42
51
|
id: "@ai-sdk/google",
|
|
43
52
|
installHint: "bun add @ai-sdk/google",
|
|
44
53
|
providerName: "google",
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Mistral provider module interface.
|
|
7
|
+
*/
|
|
8
|
+
interface MistralModule {
|
|
9
|
+
mistral: {
|
|
10
|
+
embedding: (model: string) => EmbeddingModel<string>;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
5
14
|
export type MistralEmbeddingConfig = {
|
|
6
15
|
model?: string;
|
|
7
16
|
timeoutMs?: number;
|
|
@@ -12,7 +21,7 @@ const DEFAULT_TEXT_MODEL = "mistral-embed";
|
|
|
12
21
|
export const createMistralEmbeddingProvider = (
|
|
13
22
|
config: MistralEmbeddingConfig = {}
|
|
14
23
|
): EmbeddingProvider => {
|
|
15
|
-
const { mistral } = requireOptional<
|
|
24
|
+
const { mistral } = requireOptional<MistralModule>({
|
|
16
25
|
id: "@ai-sdk/mistral",
|
|
17
26
|
installHint: "bun add @ai-sdk/mistral",
|
|
18
27
|
providerName: "mistral",
|
|
@@ -1,7 +1,22 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Ollama provider instance interface.
|
|
7
|
+
*/
|
|
8
|
+
interface OllamaProvider {
|
|
9
|
+
textEmbeddingModel: (model: string) => EmbeddingModel<string>;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Ollama provider module interface.
|
|
14
|
+
*/
|
|
15
|
+
interface OllamaModule {
|
|
16
|
+
createOllama: (config: { baseURL?: string; headers?: Record<string, string> }) => OllamaProvider;
|
|
17
|
+
ollama: OllamaProvider;
|
|
18
|
+
}
|
|
19
|
+
|
|
5
20
|
export type OllamaEmbeddingConfig = {
|
|
6
21
|
model?: string;
|
|
7
22
|
timeoutMs?: number;
|
|
@@ -11,8 +26,8 @@ export type OllamaEmbeddingConfig = {
|
|
|
11
26
|
|
|
12
27
|
const DEFAULT_TEXT_MODEL = "nomic-embed-text";
|
|
13
28
|
|
|
14
|
-
const resolveProvider = (config: OllamaEmbeddingConfig) => {
|
|
15
|
-
const { createOllama, ollama } = requireOptional<
|
|
29
|
+
const resolveProvider = (config: OllamaEmbeddingConfig): OllamaProvider => {
|
|
30
|
+
const { createOllama, ollama } = requireOptional<OllamaModule>({
|
|
16
31
|
id: "ollama-ai-provider-v2",
|
|
17
32
|
installHint: "bun add ollama-ai-provider-v2",
|
|
18
33
|
providerName: "ollama",
|
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* OpenAI provider module interface.
|
|
7
|
+
*/
|
|
8
|
+
interface OpenAiModule {
|
|
9
|
+
openai: {
|
|
10
|
+
embedding: (model: string) => EmbeddingModel<string>;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
|
|
5
14
|
export type OpenAiEmbeddingConfig = {
|
|
6
15
|
model?: string;
|
|
7
16
|
timeoutMs?: number;
|
|
@@ -26,7 +35,7 @@ const buildProviderOptions = (config: OpenAiEmbeddingConfig) => {
|
|
|
26
35
|
export const createOpenAiEmbeddingProvider = (
|
|
27
36
|
config: OpenAiEmbeddingConfig = {}
|
|
28
37
|
): EmbeddingProvider => {
|
|
29
|
-
const { openai } = requireOptional<
|
|
38
|
+
const { openai } = requireOptional<OpenAiModule>({
|
|
30
39
|
id: "@ai-sdk/openai",
|
|
31
40
|
installHint: "bun add @ai-sdk/openai",
|
|
32
41
|
providerName: "openai",
|
|
@@ -11,6 +11,49 @@ export type OpenRouterEmbeddingConfig = {
|
|
|
11
11
|
title?: string;
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
+
/**
|
|
15
|
+
* OpenRouter embedding result item.
|
|
16
|
+
*/
|
|
17
|
+
interface EmbeddingDataItem {
|
|
18
|
+
embedding?: number[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* OpenRouter embedding response.
|
|
23
|
+
*/
|
|
24
|
+
interface EmbeddingResponse {
|
|
25
|
+
data?: EmbeddingDataItem[];
|
|
26
|
+
embedding?: number[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* OpenRouter client embeddings interface.
|
|
31
|
+
*/
|
|
32
|
+
interface EmbeddingsClient {
|
|
33
|
+
generate(
|
|
34
|
+
params: { input: string | string[]; model: string },
|
|
35
|
+
options?: { fetchOptions?: { signal?: AbortSignal } }
|
|
36
|
+
): Promise<EmbeddingResponse>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* OpenRouter client interface.
|
|
41
|
+
*/
|
|
42
|
+
interface OpenRouterClient {
|
|
43
|
+
embeddings: EmbeddingsClient;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* OpenRouter SDK module interface.
|
|
48
|
+
*/
|
|
49
|
+
interface OpenRouterModule {
|
|
50
|
+
OpenRouter: new (config: {
|
|
51
|
+
apiKey: string;
|
|
52
|
+
baseURL?: string;
|
|
53
|
+
headers?: Record<string, string>;
|
|
54
|
+
}) => OpenRouterClient;
|
|
55
|
+
}
|
|
56
|
+
|
|
14
57
|
const DEFAULT_TEXT_MODEL = "text-embedding-3-small";
|
|
15
58
|
|
|
16
59
|
const buildHeaders = (config: OpenRouterEmbeddingConfig) => {
|
|
@@ -23,7 +66,7 @@ const buildHeaders = (config: OpenRouterEmbeddingConfig) => {
|
|
|
23
66
|
export const createOpenRouterEmbeddingProvider = (
|
|
24
67
|
config: OpenRouterEmbeddingConfig = {}
|
|
25
68
|
): EmbeddingProvider => {
|
|
26
|
-
const { OpenRouter } = requireOptional<
|
|
69
|
+
const { OpenRouter } = requireOptional<OpenRouterModule>({
|
|
27
70
|
id: "@openrouter/sdk",
|
|
28
71
|
installHint: "bun add @openrouter/sdk",
|
|
29
72
|
providerName: "openrouter",
|
|
@@ -37,7 +80,7 @@ export const createOpenRouterEmbeddingProvider = (
|
|
|
37
80
|
apiKey: config.apiKey ?? process.env.OPENROUTER_API_KEY ?? "",
|
|
38
81
|
...(config.baseURL ? { baseURL: config.baseURL } : {}),
|
|
39
82
|
...(Object.keys(headers).length ? { headers } : {}),
|
|
40
|
-
}
|
|
83
|
+
});
|
|
41
84
|
|
|
42
85
|
return {
|
|
43
86
|
name: `openrouter:${model}`,
|
|
@@ -47,35 +90,34 @@ export const createOpenRouterEmbeddingProvider = (
|
|
|
47
90
|
? AbortSignal.timeout(timeoutMs)
|
|
48
91
|
: undefined;
|
|
49
92
|
|
|
50
|
-
const result = await
|
|
93
|
+
const result = await client.embeddings.generate(
|
|
51
94
|
{ input: text, model },
|
|
52
95
|
abortSignal ? { fetchOptions: { signal: abortSignal } } : undefined
|
|
53
96
|
);
|
|
54
97
|
|
|
55
98
|
const embedding =
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
(result as any)?.data?.embedding;
|
|
99
|
+
result.data?.[0]?.embedding ??
|
|
100
|
+
result.embedding;
|
|
59
101
|
if (!embedding) {
|
|
60
102
|
throw new Error("Embedding missing from OpenRouter response");
|
|
61
103
|
}
|
|
62
104
|
|
|
63
|
-
return embedding
|
|
105
|
+
return embedding;
|
|
64
106
|
},
|
|
65
107
|
embedMany: async (inputs) => {
|
|
66
108
|
const values = inputs.map((i) => i.text);
|
|
67
109
|
const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
68
110
|
|
|
69
|
-
const result = await
|
|
111
|
+
const result = await client.embeddings.generate(
|
|
70
112
|
{ input: values, model },
|
|
71
113
|
abortSignal ? { fetchOptions: { signal: abortSignal } } : undefined
|
|
72
114
|
);
|
|
73
115
|
|
|
74
|
-
const embeddings =
|
|
75
|
-
(item
|
|
116
|
+
const embeddings = result.data?.map(
|
|
117
|
+
(item) => item.embedding
|
|
76
118
|
);
|
|
77
119
|
|
|
78
|
-
if (!
|
|
120
|
+
if (!embeddings || embeddings.some((e) => !Array.isArray(e))) {
|
|
79
121
|
throw new Error("Embeddings missing from OpenRouter response");
|
|
80
122
|
}
|
|
81
123
|
|
|
@@ -1,7 +1,17 @@
|
|
|
1
|
-
import { embed, embedMany } from "ai";
|
|
1
|
+
import { embed, embedMany, type EmbeddingModel } from "ai";
|
|
2
2
|
import type { EmbeddingProvider } from "../core/types";
|
|
3
3
|
import { requireOptional } from "./_shared";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Together AI provider module interface.
|
|
7
|
+
*/
|
|
8
|
+
interface TogetherAiModule {
|
|
9
|
+
togetherai: {
|
|
10
|
+
embeddingModel?: (model: string) => EmbeddingModel<string>;
|
|
11
|
+
textEmbeddingModel?: (model: string) => EmbeddingModel<string>;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
|
|
5
15
|
export type TogetherEmbeddingConfig = {
|
|
6
16
|
model?: string;
|
|
7
17
|
timeoutMs?: number;
|
|
@@ -12,7 +22,7 @@ const DEFAULT_TEXT_MODEL = "togethercomputer/m2-bert-80M-2k-retrieval";
|
|
|
12
22
|
export const createTogetherEmbeddingProvider = (
|
|
13
23
|
config: TogetherEmbeddingConfig = {}
|
|
14
24
|
): EmbeddingProvider => {
|
|
15
|
-
const { togetherai } = requireOptional<
|
|
25
|
+
const { togetherai } = requireOptional<TogetherAiModule>({
|
|
16
26
|
id: "@ai-sdk/togetherai",
|
|
17
27
|
installHint: "bun add @ai-sdk/togetherai",
|
|
18
28
|
providerName: "together",
|
|
@@ -23,9 +33,9 @@ export const createTogetherEmbeddingProvider = (
|
|
|
23
33
|
DEFAULT_TEXT_MODEL;
|
|
24
34
|
const timeoutMs = config.timeoutMs;
|
|
25
35
|
const embeddingModel =
|
|
26
|
-
|
|
27
|
-
?
|
|
28
|
-
:
|
|
36
|
+
typeof togetherai.embeddingModel === "function"
|
|
37
|
+
? togetherai.embeddingModel(model)
|
|
38
|
+
: togetherai.textEmbeddingModel?.(model);
|
|
29
39
|
|
|
30
40
|
return {
|
|
31
41
|
name: `together:${model}`,
|