unrag 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/cli/index.js +611 -174
  2. package/package.json +12 -6
  3. package/registry/config/unrag.config.ts +9 -8
  4. package/registry/connectors/google-drive/_api-types.ts +60 -0
  5. package/registry/connectors/google-drive/client.ts +99 -38
  6. package/registry/connectors/google-drive/sync.ts +97 -69
  7. package/registry/connectors/google-drive/types.ts +76 -37
  8. package/registry/connectors/notion/client.ts +12 -3
  9. package/registry/connectors/notion/render.ts +62 -23
  10. package/registry/connectors/notion/sync.ts +30 -23
  11. package/registry/core/assets.ts +11 -10
  12. package/registry/core/config.ts +10 -25
  13. package/registry/core/context-engine.ts +71 -2
  14. package/registry/core/deep-merge.ts +45 -0
  15. package/registry/core/ingest.ts +117 -44
  16. package/registry/core/types.ts +96 -2
  17. package/registry/docs/unrag.md +6 -1
  18. package/registry/embedding/_shared.ts +25 -0
  19. package/registry/embedding/ai.ts +8 -68
  20. package/registry/embedding/azure.ts +88 -0
  21. package/registry/embedding/bedrock.ts +88 -0
  22. package/registry/embedding/cohere.ts +88 -0
  23. package/registry/embedding/google.ts +102 -0
  24. package/registry/embedding/mistral.ts +71 -0
  25. package/registry/embedding/ollama.ts +90 -0
  26. package/registry/embedding/openai.ts +88 -0
  27. package/registry/embedding/openrouter.ts +127 -0
  28. package/registry/embedding/together.ts +77 -0
  29. package/registry/embedding/vertex.ts +111 -0
  30. package/registry/embedding/voyage.ts +169 -0
  31. package/registry/extractors/audio-transcribe/index.ts +39 -23
  32. package/registry/extractors/file-docx/index.ts +8 -1
  33. package/registry/extractors/file-pptx/index.ts +22 -1
  34. package/registry/extractors/file-xlsx/index.ts +24 -1
  35. package/registry/extractors/image-caption-llm/index.ts +8 -3
  36. package/registry/extractors/image-ocr/index.ts +9 -4
  37. package/registry/extractors/pdf-llm/index.ts +9 -4
  38. package/registry/extractors/pdf-text-layer/index.ts +23 -2
  39. package/registry/extractors/video-frames/index.ts +8 -3
  40. package/registry/extractors/video-transcribe/index.ts +40 -24
  41. package/registry/manifest.json +346 -0
  42. package/registry/store/drizzle-postgres-pgvector/store.ts +26 -6
@@ -0,0 +1,111 @@
1
+ import { embed, embedMany, type EmbeddingModel } from "ai";
2
+ import type { EmbeddingProvider } from "../core/types";
3
+ import { requireOptional } from "./_shared";
4
+
5
+ /**
6
+ * Vertex AI provider module interface.
7
+ */
8
+ interface VertexModule {
9
+ vertex: {
10
+ embeddingModel: (model: string) => EmbeddingModel<string>;
11
+ };
12
+ }
13
+
14
+ export type VertexEmbeddingTaskType =
15
+ | "SEMANTIC_SIMILARITY"
16
+ | "CLASSIFICATION"
17
+ | "CLUSTERING"
18
+ | "RETRIEVAL_DOCUMENT"
19
+ | "RETRIEVAL_QUERY"
20
+ | "QUESTION_ANSWERING"
21
+ | "FACT_VERIFICATION"
22
+ | "CODE_RETRIEVAL_QUERY";
23
+
24
+ export type VertexEmbeddingConfig = {
25
+ model?: string;
26
+ timeoutMs?: number;
27
+ outputDimensionality?: number;
28
+ taskType?: VertexEmbeddingTaskType;
29
+ title?: string;
30
+ autoTruncate?: boolean;
31
+ };
32
+
33
+ const DEFAULT_TEXT_MODEL = "text-embedding-004";
34
+
35
+ const buildProviderOptions = (config: VertexEmbeddingConfig) => {
36
+ if (
37
+ config.outputDimensionality === undefined &&
38
+ !config.taskType &&
39
+ config.autoTruncate === undefined &&
40
+ !config.title
41
+ ) {
42
+ return undefined;
43
+ }
44
+ return {
45
+ google: {
46
+ ...(config.outputDimensionality !== undefined
47
+ ? { outputDimensionality: config.outputDimensionality }
48
+ : {}),
49
+ ...(config.taskType ? { taskType: config.taskType } : {}),
50
+ ...(config.autoTruncate !== undefined ? { autoTruncate: config.autoTruncate } : {}),
51
+ ...(config.title ? { title: config.title } : {}),
52
+ },
53
+ };
54
+ };
55
+
56
+ export const createVertexEmbeddingProvider = (
57
+ config: VertexEmbeddingConfig = {}
58
+ ): EmbeddingProvider => {
59
+ const { vertex } = requireOptional<VertexModule>({
60
+ id: "@ai-sdk/google-vertex",
61
+ installHint: "bun add @ai-sdk/google-vertex",
62
+ providerName: "vertex",
63
+ });
64
+ const model =
65
+ config.model ??
66
+ process.env.GOOGLE_VERTEX_EMBEDDING_MODEL ??
67
+ DEFAULT_TEXT_MODEL;
68
+ const timeoutMs = config.timeoutMs;
69
+ const providerOptions = buildProviderOptions(config);
70
+ const embeddingModel = vertex.embeddingModel(model);
71
+
72
+ return {
73
+ name: `vertex:${model}`,
74
+ dimensions: config.outputDimensionality,
75
+ embed: async ({ text }) => {
76
+ const abortSignal = timeoutMs
77
+ ? AbortSignal.timeout(timeoutMs)
78
+ : undefined;
79
+
80
+ const result = await embed({
81
+ model: embeddingModel,
82
+ value: text,
83
+ ...(providerOptions ? { providerOptions } : {}),
84
+ ...(abortSignal ? { abortSignal } : {}),
85
+ });
86
+
87
+ if (!result.embedding) {
88
+ throw new Error("Embedding missing from Vertex response");
89
+ }
90
+
91
+ return result.embedding;
92
+ },
93
+ embedMany: async (inputs) => {
94
+ const values = inputs.map((i) => i.text);
95
+ const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
96
+
97
+ const result = await embedMany({
98
+ model: embeddingModel,
99
+ values,
100
+ ...(providerOptions ? { providerOptions } : {}),
101
+ ...(abortSignal ? { abortSignal } : {}),
102
+ });
103
+
104
+ const { embeddings } = result;
105
+ if (!Array.isArray(embeddings)) {
106
+ throw new Error("Embeddings missing from Vertex embedMany response");
107
+ }
108
+ return embeddings;
109
+ },
110
+ };
111
+ };
@@ -0,0 +1,169 @@
1
+ import { embed, embedMany, type EmbeddingModel } from "ai";
2
+ import type { EmbeddingProvider, ImageEmbeddingInput } from "../core/types";
3
+ import { requireOptional } from "./_shared";
4
+
5
+ /**
6
+ * Voyage AI provider module interface.
7
+ */
8
+ interface VoyageModule {
9
+ voyage: {
10
+ embeddingModel?: (model: string) => EmbeddingModel<string>;
11
+ textEmbeddingModel?: (model: string) => EmbeddingModel<string>;
12
+ multimodalEmbeddingModel?: (model: string) => EmbeddingModel<unknown>;
13
+ };
14
+ }
15
+
16
+ type BaseConfig = {
17
+ model?: string;
18
+ timeoutMs?: number;
19
+ };
20
+
21
+ export type VoyageEmbeddingConfig =
22
+ | (BaseConfig & {
23
+ type?: "text";
24
+ })
25
+ | (BaseConfig & {
26
+ type: "multimodal";
27
+ text?: {
28
+ value?: (text: string) => unknown;
29
+ };
30
+ image?: {
31
+ value?: (input: ImageEmbeddingInput) => unknown;
32
+ };
33
+ });
34
+
35
+ const DEFAULT_TEXT_MODEL = "voyage-3.5-lite";
36
+ const DEFAULT_MULTIMODAL_MODEL = "voyage-multimodal-3";
37
+
38
+ const bytesToDataUrl = (bytes: Uint8Array, mediaType: string) => {
39
+ const base64 = Buffer.from(bytes).toString("base64");
40
+ return `data:${mediaType};base64,${base64}`;
41
+ };
42
+
43
+ const defaultTextValue = (text: string) => ({
44
+ text: [text],
45
+ });
46
+
47
+ const defaultImageValue = (input: ImageEmbeddingInput) => {
48
+ const v =
49
+ typeof input.data === "string"
50
+ ? input.data
51
+ : bytesToDataUrl(input.data, input.mediaType ?? "image/jpeg");
52
+ return { image: [v] };
53
+ };
54
+
55
+ export const createVoyageEmbeddingProvider = (
56
+ config: VoyageEmbeddingConfig = {}
57
+ ): EmbeddingProvider => {
58
+ const { voyage } = requireOptional<VoyageModule>({
59
+ id: "voyage-ai-provider",
60
+ installHint: "bun add voyage-ai-provider",
61
+ providerName: "voyage",
62
+ });
63
+ const type = config.type ?? "text";
64
+ const isMultimodal = config.type === "multimodal";
65
+ const model =
66
+ config.model ??
67
+ process.env.VOYAGE_MODEL ??
68
+ (type === "multimodal" ? DEFAULT_MULTIMODAL_MODEL : DEFAULT_TEXT_MODEL);
69
+ const timeoutMs = config.timeoutMs;
70
+
71
+ const textEmbeddingModel =
72
+ type === "multimodal"
73
+ ? undefined
74
+ : typeof voyage.embeddingModel === "function"
75
+ ? voyage.embeddingModel(model)
76
+ : voyage.textEmbeddingModel?.(model);
77
+ const multimodalEmbeddingModel =
78
+ type === "multimodal" ? voyage.multimodalEmbeddingModel?.(model) : undefined;
79
+
80
+ // AI SDK 6 types only accept string inputs; cast multimodal models/values.
81
+ const multimodalModel = multimodalEmbeddingModel as unknown as EmbeddingModel;
82
+
83
+ const resolveTextValue = (text: string) => {
84
+ if (isMultimodal && config.text?.value) {
85
+ return config.text.value(text);
86
+ }
87
+ return defaultTextValue(text);
88
+ };
89
+
90
+ const resolveImageValue = (input: ImageEmbeddingInput) => {
91
+ if (isMultimodal && config.image?.value) {
92
+ return config.image.value(input);
93
+ }
94
+ return defaultImageValue(input);
95
+ };
96
+
97
+ return {
98
+ name: `voyage:${model}`,
99
+ dimensions: undefined,
100
+ embed: async ({ text }) => {
101
+ const abortSignal = timeoutMs
102
+ ? AbortSignal.timeout(timeoutMs)
103
+ : undefined;
104
+
105
+ const result =
106
+ type === "multimodal"
107
+ ? await embed({
108
+ model: multimodalModel,
109
+ value: resolveTextValue(text) as unknown as string,
110
+ ...(abortSignal ? { abortSignal } : {}),
111
+ })
112
+ : await embed({
113
+ model: textEmbeddingModel!,
114
+ value: text,
115
+ ...(abortSignal ? { abortSignal } : {}),
116
+ });
117
+
118
+ if (!result.embedding) {
119
+ throw new Error("Embedding missing from Voyage response");
120
+ }
121
+
122
+ return result.embedding;
123
+ },
124
+ embedMany: async (inputs) => {
125
+ const abortSignal = timeoutMs ? AbortSignal.timeout(timeoutMs) : undefined;
126
+ const result =
127
+ type === "multimodal"
128
+ ? await embedMany({
129
+ model: multimodalModel,
130
+ values: inputs.map((i) => resolveTextValue(i.text)) as unknown as string[],
131
+ ...(abortSignal ? { abortSignal } : {}),
132
+ })
133
+ : await embedMany({
134
+ model: textEmbeddingModel!,
135
+ values: inputs.map((i) => i.text),
136
+ ...(abortSignal ? { abortSignal } : {}),
137
+ });
138
+
139
+ const { embeddings } = result;
140
+ if (!Array.isArray(embeddings)) {
141
+ throw new Error("Embeddings missing from Voyage embedMany response");
142
+ }
143
+ return embeddings;
144
+ },
145
+ ...(type === "multimodal"
146
+ ? {
147
+ embedImage: async (input: ImageEmbeddingInput) => {
148
+ const abortSignal = timeoutMs
149
+ ? AbortSignal.timeout(timeoutMs)
150
+ : undefined;
151
+
152
+ const value = resolveImageValue(input);
153
+
154
+ const result = await embed({
155
+ model: multimodalModel,
156
+ value: value as unknown as string,
157
+ ...(abortSignal ? { abortSignal } : {}),
158
+ });
159
+
160
+ if (!result.embedding) {
161
+ throw new Error("Embedding missing from Voyage response");
162
+ }
163
+
164
+ return result.embedding;
165
+ },
166
+ }
167
+ : {}),
168
+ };
169
+ };
@@ -1,7 +1,21 @@
1
- import { experimental_transcribe as transcribe } from "ai";
2
- import type { AssetExtractor } from "../../core/types";
1
+ import { experimental_transcribe as transcribe, type TranscriptionModel } from "ai";
2
+ import type { AssetExtractor, ExtractedTextItem } from "../../core/types";
3
3
  import { getAssetBytes } from "../_shared/fetch";
4
4
 
5
+ /**
6
+ * Model reference type that accepts both string gateway IDs and TranscriptionModel instances.
7
+ */
8
+ type TranscriptionModelRef = string | TranscriptionModel;
9
+
10
+ /**
11
+ * Transcription segment from the AI SDK.
12
+ */
13
+ interface TranscriptionSegment {
14
+ text?: string;
15
+ startSecond?: number;
16
+ endSecond?: number;
17
+ }
18
+
5
19
  /**
6
20
  * Audio transcription via the AI SDK `transcribe()` API.
7
21
  */
@@ -25,43 +39,45 @@ export function createAudioTranscribeExtractor(): AssetExtractor {
25
39
  const abortSignal = AbortSignal.timeout(cfg.timeoutMs);
26
40
 
27
41
  const result = await transcribe({
28
- model: cfg.model as any,
42
+ model: cfg.model as TranscriptionModelRef,
29
43
  audio: bytes,
30
44
  abortSignal,
31
45
  });
32
46
 
33
- const segments: any[] = Array.isArray((result as any)?.segments)
34
- ? (result as any).segments
47
+ const segments: TranscriptionSegment[] = Array.isArray(result.segments)
48
+ ? result.segments
35
49
  : [];
36
50
 
37
51
  if (segments.length > 0) {
52
+ const textItems: ExtractedTextItem[] = segments
53
+ .map((s, i) => {
54
+ const t = String(s?.text ?? "").trim();
55
+ if (!t) return null;
56
+ const start = Number(s?.startSecond ?? NaN);
57
+ const end = Number(s?.endSecond ?? NaN);
58
+ return {
59
+ label: `segment-${i + 1}`,
60
+ content: t,
61
+ ...(Number.isFinite(start) && Number.isFinite(end)
62
+ ? { timeRangeSec: [start, end] as [number, number] }
63
+ : {}),
64
+ };
65
+ })
66
+ .filter((item): item is ExtractedTextItem => item !== null);
67
+
38
68
  return {
39
- texts: segments
40
- .map((s, i) => {
41
- const t = String(s?.text ?? "").trim();
42
- if (!t) return null;
43
- const start = Number(s?.startSecond ?? NaN);
44
- const end = Number(s?.endSecond ?? NaN);
45
- return {
46
- label: `segment-${i + 1}`,
47
- content: t,
48
- ...(Number.isFinite(start) && Number.isFinite(end)
49
- ? { timeRangeSec: [start, end] as [number, number] }
50
- : {}),
51
- };
52
- })
53
- .filter(Boolean) as any,
69
+ texts: textItems,
54
70
  diagnostics: {
55
71
  model: cfg.model,
56
72
  seconds:
57
- typeof (result as any)?.durationInSeconds === "number"
58
- ? (result as any).durationInSeconds
73
+ typeof result.durationInSeconds === "number"
74
+ ? result.durationInSeconds
59
75
  : undefined,
60
76
  },
61
77
  };
62
78
  }
63
79
 
64
- const text = String((result as any)?.text ?? "").trim();
80
+ const text = (result.text ?? "").trim();
65
81
  if (!text) return { texts: [], diagnostics: { model: cfg.model } };
66
82
 
67
83
  return {
@@ -3,6 +3,13 @@ import { getAssetBytes } from "../_shared/fetch";
3
3
  import { extFromFilename, normalizeMediaType } from "../_shared/media";
4
4
  import { capText } from "../_shared/text";
5
5
 
6
+ /**
7
+ * Minimal mammoth module interface.
8
+ */
9
+ interface MammothModule {
10
+ extractRawText(options: { arrayBuffer: ArrayBuffer }): Promise<{ value?: string }>;
11
+ }
12
+
6
13
  const DOCX_MEDIA =
7
14
  "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
8
15
 
@@ -33,7 +40,7 @@ export function createFileDocxExtractor(): AssetExtractor {
33
40
  });
34
41
 
35
42
  // Dynamic import so the core package can be used without mammoth unless this extractor is installed.
36
- const mammoth: any = await import("mammoth");
43
+ const mammoth = (await import("mammoth")) as MammothModule;
37
44
  const arrayBuffer = bytes.buffer.slice(
38
45
  bytes.byteOffset,
39
46
  bytes.byteOffset + bytes.byteLength
@@ -3,6 +3,27 @@ import { getAssetBytes } from "../_shared/fetch";
3
3
  import { extFromFilename, normalizeMediaType } from "../_shared/media";
4
4
  import { capText } from "../_shared/text";
5
5
 
6
+ /**
7
+ * Zip file entry interface.
8
+ */
9
+ interface ZipFile {
10
+ async(type: "string"): Promise<string>;
11
+ }
12
+
13
+ /**
14
+ * JSZip instance interface.
15
+ */
16
+ interface JSZipInstance {
17
+ files: Record<string, ZipFile>;
18
+ }
19
+
20
+ /**
21
+ * JSZip constructor interface.
22
+ */
23
+ interface JSZipConstructor {
24
+ loadAsync(data: Uint8Array): Promise<JSZipInstance>;
25
+ }
26
+
6
27
  const PPTX_MEDIA =
7
28
  "application/vnd.openxmlformats-officedocument.presentationml.presentation";
8
29
 
@@ -41,7 +62,7 @@ export function createFilePptxExtractor(): AssetExtractor {
41
62
  });
42
63
 
43
64
  // Dynamic import to avoid hard dependency unless installed.
44
- const JSZip: any = (await import("jszip")).default;
65
+ const JSZip = (await import("jszip")).default as unknown as JSZipConstructor;
45
66
  const zip = await JSZip.loadAsync(bytes);
46
67
 
47
68
  const slidePaths = Object.keys(zip.files).filter((p) =>
@@ -3,6 +3,29 @@ import { getAssetBytes } from "../_shared/fetch";
3
3
  import { extFromFilename, normalizeMediaType } from "../_shared/media";
4
4
  import { capText } from "../_shared/text";
5
5
 
6
+ /**
7
+ * XLSX sheet interface.
8
+ */
9
+ type XLSXSheet = unknown;
10
+
11
+ /**
12
+ * XLSX workbook interface.
13
+ */
14
+ interface XLSXWorkbook {
15
+ SheetNames?: string[];
16
+ Sheets?: Record<string, XLSXSheet>;
17
+ }
18
+
19
+ /**
20
+ * Minimal xlsx module interface.
21
+ */
22
+ interface XLSXModule {
23
+ read(data: Buffer, options: { type: string }): XLSXWorkbook;
24
+ utils: {
25
+ sheet_to_csv(sheet: XLSXSheet): string;
26
+ };
27
+ }
28
+
6
29
  const XLSX_MEDIA =
7
30
  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
8
31
 
@@ -32,7 +55,7 @@ export function createFileXlsxExtractor(): AssetExtractor {
32
55
  defaultMediaType: XLSX_MEDIA,
33
56
  });
34
57
 
35
- const xlsx: any = await import("xlsx");
58
+ const xlsx = (await import("xlsx")) as XLSXModule;
36
59
  const wb = xlsx.read(Buffer.from(bytes), { type: "buffer" });
37
60
 
38
61
  const parts: string[] = [];
@@ -1,9 +1,14 @@
1
- import { generateText } from "ai";
1
+ import { generateText, type LanguageModel } from "ai";
2
2
  import type { AssetExtractor } from "../../core/types";
3
3
  import { getAssetBytes } from "../_shared/fetch";
4
4
  import { normalizeMediaType } from "../_shared/media";
5
5
  import { capText } from "../_shared/text";
6
6
 
7
+ /**
8
+ * Model reference type that accepts both string gateway IDs and LanguageModel instances.
9
+ */
10
+ type ModelRef = string | LanguageModel;
11
+
7
12
  /**
8
13
  * Caption generation for images via a vision-capable LLM.
9
14
  *
@@ -29,7 +34,7 @@ export function createImageCaptionLlmExtractor(): AssetExtractor {
29
34
  const abortSignal = AbortSignal.timeout(cfg.timeoutMs);
30
35
 
31
36
  const result = await generateText({
32
- model: cfg.model as any,
37
+ model: cfg.model as ModelRef,
33
38
  abortSignal,
34
39
  messages: [
35
40
  {
@@ -46,7 +51,7 @@ export function createImageCaptionLlmExtractor(): AssetExtractor {
46
51
  ],
47
52
  });
48
53
 
49
- const caption = String((result as any)?.text ?? "").trim();
54
+ const caption = (result.text ?? "").trim();
50
55
  if (!caption) return { texts: [], diagnostics: { model: cfg.model } };
51
56
 
52
57
  return {
@@ -1,13 +1,18 @@
1
- import { generateText } from "ai";
1
+ import { generateText, type LanguageModel } from "ai";
2
2
  import type { AssetExtractor } from "../../core/types";
3
3
  import { getAssetBytes } from "../_shared/fetch";
4
4
  import { normalizeMediaType } from "../_shared/media";
5
5
  import { capText } from "../_shared/text";
6
6
 
7
+ /**
8
+ * Model reference type that accepts both string gateway IDs and LanguageModel instances.
9
+ */
10
+ type ModelRef = string | LanguageModel;
11
+
7
12
  /**
8
13
  * Image OCR via a vision-capable LLM.
9
14
  *
10
- * This extractor is intended for screenshots, charts, diagrams, and any image with embedded text.
15
+ * This extractor is intended for screenshots, charts, diagrams, and images with embedded text.
11
16
  */
12
17
  export function createImageOcrExtractor(): AssetExtractor {
13
18
  return {
@@ -29,7 +34,7 @@ export function createImageOcrExtractor(): AssetExtractor {
29
34
  const abortSignal = AbortSignal.timeout(cfg.timeoutMs);
30
35
 
31
36
  const result = await generateText({
32
- model: cfg.model as any,
37
+ model: cfg.model as ModelRef,
33
38
  abortSignal,
34
39
  messages: [
35
40
  {
@@ -46,7 +51,7 @@ export function createImageOcrExtractor(): AssetExtractor {
46
51
  ],
47
52
  });
48
53
 
49
- const text = String((result as any)?.text ?? "").trim();
54
+ const text = (result.text ?? "").trim();
50
55
  if (!text) return { texts: [], diagnostics: { model: cfg.model } };
51
56
 
52
57
  return {
@@ -1,9 +1,14 @@
1
- import { generateText } from "ai";
1
+ import { generateText, type LanguageModel } from "ai";
2
2
  import type { AssetData, AssetExtractor, AssetFetchConfig } from "../../core/types";
3
3
  import { getAssetBytes } from "../_shared/fetch";
4
4
  import { normalizeMediaType } from "../_shared/media";
5
5
  import { capText } from "../_shared/text";
6
6
 
7
+ /**
8
+ * Model reference type that accepts both string gateway IDs and LanguageModel instances.
9
+ */
10
+ type ModelRef = string | LanguageModel;
11
+
7
12
  async function getPdfBytes(args: {
8
13
  data: AssetData;
9
14
  fetchConfig: AssetFetchConfig;
@@ -49,8 +54,8 @@ export function createPdfLlmExtractor(): AssetExtractor {
49
54
  const abortSignal = AbortSignal.timeout(llm.timeoutMs);
50
55
 
51
56
  const result = await generateText({
52
- // Intentionally allow string model ids for AI Gateway usage.
53
- model: llm.model as any,
57
+ // String model IDs are supported for AI Gateway routing.
58
+ model: llm.model as ModelRef,
54
59
  abortSignal,
55
60
  messages: [
56
61
  {
@@ -68,7 +73,7 @@ export function createPdfLlmExtractor(): AssetExtractor {
68
73
  ],
69
74
  });
70
75
 
71
- const text = String((result as any)?.text ?? "").trim();
76
+ const text = (result.text ?? "").trim();
72
77
  if (!text) return { texts: [], diagnostics: { model: llm.model } };
73
78
 
74
79
  const capped = capText(text, llm.maxOutputChars);
@@ -2,6 +2,27 @@ import type { AssetExtractor } from "../../core/types";
2
2
  import { getAssetBytes } from "../_shared/fetch";
3
3
  import { capText } from "../_shared/text";
4
4
 
5
+ /**
6
+ * Text content item from pdfjs-dist.
7
+ */
8
+ interface PdfTextItem {
9
+ str?: string;
10
+ }
11
+
12
+ /**
13
+ * Minimal pdfjs-dist module interface.
14
+ */
15
+ interface PdfJsModule {
16
+ getDocument(params: { data: Uint8Array }): {
17
+ promise: Promise<{
18
+ numPages: number;
19
+ getPage(pageNum: number): Promise<{
20
+ getTextContent(): Promise<{ items?: PdfTextItem[] }>;
21
+ }>;
22
+ }>;
23
+ };
24
+ }
25
+
5
26
  /**
6
27
  * Fast/cheap PDF extraction using the PDF's built-in text layer.
7
28
  *
@@ -29,7 +50,7 @@ export function createPdfTextLayerExtractor(): AssetExtractor {
29
50
  });
30
51
 
31
52
  // Dynamic import so the core package can be used without pdfjs unless this extractor is installed.
32
- const pdfjs: any = await import("pdfjs-dist/legacy/build/pdf.mjs");
53
+ const pdfjs = (await import("pdfjs-dist/legacy/build/pdf.mjs")) as PdfJsModule;
33
54
 
34
55
  const doc = await pdfjs.getDocument({ data: bytes }).promise;
35
56
  const totalPages: number = Number(doc?.numPages ?? 0);
@@ -42,7 +63,7 @@ export function createPdfTextLayerExtractor(): AssetExtractor {
42
63
  for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
43
64
  const page = await doc.getPage(pageNum);
44
65
  const textContent = await page.getTextContent();
45
- const items: any[] = Array.isArray(textContent?.items)
66
+ const items: PdfTextItem[] = Array.isArray(textContent?.items)
46
67
  ? textContent.items
47
68
  : [];
48
69
  const pageText = items
@@ -1,4 +1,4 @@
1
- import { generateText } from "ai";
1
+ import { generateText, type LanguageModel } from "ai";
2
2
  import { spawn } from "node:child_process";
3
3
  import { mkdir, readdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import os from "node:os";
@@ -7,6 +7,11 @@ import type { AssetExtractor } from "../../core/types";
7
7
  import { getAssetBytes } from "../_shared/fetch";
8
8
  import { capText } from "../_shared/text";
9
9
 
10
+ /**
11
+ * Model reference type that accepts both string gateway IDs and LanguageModel instances.
12
+ */
13
+ type ModelRef = string | LanguageModel;
14
+
10
15
  const run = async (cmd: string, args: string[], opts: { cwd: string }) => {
11
16
  return await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => {
12
17
  const child = spawn(cmd, args, { cwd: opts.cwd, stdio: ["ignore", "pipe", "pipe"] });
@@ -87,7 +92,7 @@ export function createVideoFramesExtractor(): AssetExtractor {
87
92
 
88
93
  const imgBytes = await readFile(path.join(tmpDir, f));
89
94
  const result = await generateText({
90
- model: cfg.model as any,
95
+ model: cfg.model as ModelRef,
91
96
  abortSignal: abortPerFrame(cfg.timeoutMs),
92
97
  messages: [
93
98
  {
@@ -100,7 +105,7 @@ export function createVideoFramesExtractor(): AssetExtractor {
100
105
  ],
101
106
  });
102
107
 
103
- const t = String((result as any)?.text ?? "").trim();
108
+ const t = (result.text ?? "").trim();
104
109
  if (!t) continue;
105
110
 
106
111
  const capped = capText(t, cfg.maxOutputChars - totalChars);