unrag 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/package.json +1 -1
  2. package/registry/connectors/google-drive/_api-types.ts +60 -0
  3. package/registry/connectors/google-drive/client.ts +99 -38
  4. package/registry/connectors/google-drive/sync.ts +97 -69
  5. package/registry/connectors/google-drive/types.ts +76 -37
  6. package/registry/connectors/notion/client.ts +12 -3
  7. package/registry/connectors/notion/render.ts +62 -23
  8. package/registry/connectors/notion/sync.ts +30 -23
  9. package/registry/core/assets.ts +11 -10
  10. package/registry/core/config.ts +10 -25
  11. package/registry/core/context-engine.ts +5 -0
  12. package/registry/core/deep-merge.ts +45 -0
  13. package/registry/core/ingest.ts +117 -44
  14. package/registry/core/types.ts +52 -0
  15. package/registry/embedding/_shared.ts +6 -1
  16. package/registry/embedding/ai.ts +2 -3
  17. package/registry/embedding/azure.ts +11 -2
  18. package/registry/embedding/bedrock.ts +11 -2
  19. package/registry/embedding/cohere.ts +11 -2
  20. package/registry/embedding/google.ts +11 -2
  21. package/registry/embedding/mistral.ts +11 -2
  22. package/registry/embedding/ollama.ts +18 -3
  23. package/registry/embedding/openai.ts +11 -2
  24. package/registry/embedding/openrouter.ts +53 -11
  25. package/registry/embedding/together.ts +15 -5
  26. package/registry/embedding/vertex.ts +11 -2
  27. package/registry/embedding/voyage.ts +16 -6
  28. package/registry/extractors/audio-transcribe/index.ts +39 -23
  29. package/registry/extractors/file-docx/index.ts +8 -1
  30. package/registry/extractors/file-pptx/index.ts +22 -1
  31. package/registry/extractors/file-xlsx/index.ts +24 -1
  32. package/registry/extractors/image-caption-llm/index.ts +8 -3
  33. package/registry/extractors/image-ocr/index.ts +9 -4
  34. package/registry/extractors/pdf-llm/index.ts +9 -4
  35. package/registry/extractors/pdf-text-layer/index.ts +23 -2
  36. package/registry/extractors/video-frames/index.ts +8 -3
  37. package/registry/extractors/video-transcribe/index.ts +40 -24
  38. package/registry/manifest.json +6 -6
  39. package/registry/store/drizzle-postgres-pgvector/store.ts +24 -7
package/package.json CHANGED
@@ -6,7 +6,7 @@
6
6
  "bin": {
7
7
  "unrag": "./dist/cli/index.js"
8
8
  },
9
- "version": "0.2.6",
9
+ "version": "0.2.7",
10
10
  "private": false,
11
11
  "license": "Apache-2.0",
12
12
  "devDependencies": {
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Structural types for the Google Drive API.
3
+ *
4
+ * These are minimal interfaces that match the googleapis API structure,
5
+ * allowing the connector to work without depending on googleapis types at compile time.
6
+ */
7
+
8
+ export interface DriveFile {
9
+ id?: string | null;
10
+ name?: string | null;
11
+ mimeType?: string | null;
12
+ size?: string | null;
13
+ webViewLink?: string | null;
14
+ modifiedTime?: string | null;
15
+ parents?: string[] | null;
16
+ shortcutDetails?: {
17
+ targetId?: string | null;
18
+ targetMimeType?: string | null;
19
+ } | null;
20
+ }
21
+
22
+ export interface DriveFileList {
23
+ files?: DriveFile[];
24
+ nextPageToken?: string | null;
25
+ }
26
+
27
+ export interface DriveFilesResource {
28
+ get(params: {
29
+ fileId: string;
30
+ fields?: string;
31
+ alt?: string;
32
+ supportsAllDrives?: boolean;
33
+ }): Promise<{ data: DriveFile | ArrayBuffer | string }>;
34
+
35
+ list(params: {
36
+ q?: string;
37
+ fields?: string;
38
+ pageToken?: string;
39
+ pageSize?: number;
40
+ supportsAllDrives?: boolean;
41
+ includeItemsFromAllDrives?: boolean;
42
+ }): Promise<{ data: DriveFileList }>;
43
+
44
+ export(params: {
45
+ fileId: string;
46
+ mimeType: string;
47
+ }): Promise<{ data: ArrayBuffer | string }>;
48
+ }
49
+
50
+ export interface DriveClient {
51
+ files: DriveFilesResource;
52
+ }
53
+
54
+ /**
55
+ * Auth client interface - minimal subset used by the connector.
56
+ */
57
+ export interface AuthClient {
58
+ getAccessToken?(): Promise<{ token?: string | null }>;
59
+ }
60
+
@@ -1,4 +1,11 @@
1
- import type { GoogleDriveAuth } from "./types";
1
+ import type { DriveClient, AuthClient } from "./_api-types";
2
+ import type {
3
+ GoogleDriveAuth,
4
+ GoogleDriveOAuthAuth,
5
+ GoogleDriveServiceAccountAuth,
6
+ GoogleDriveGoogleAuthAuth,
7
+ ServiceAccountCredentials,
8
+ } from "./types";
2
9
 
3
10
  export const DEFAULT_DRIVE_SCOPES = [
4
11
  "https://www.googleapis.com/auth/drive.readonly",
@@ -17,36 +24,51 @@ type NormalizedAuth =
17
24
  }
18
25
  | {
19
26
  kind: "service_account";
20
- credentials: Record<string, any>;
27
+ credentials: ServiceAccountCredentials;
21
28
  subject?: string;
22
29
  }
23
30
  | { kind: "google_auth"; auth: unknown };
24
31
 
32
+ /**
33
+ * Type guard for service account auth.
34
+ */
35
+ function isServiceAccountAuth(auth: GoogleDriveAuth): auth is GoogleDriveServiceAccountAuth {
36
+ return auth.kind === "service_account";
37
+ }
38
+
39
+ /**
40
+ * Type guard for google auth.
41
+ */
42
+ function isGoogleAuth(auth: GoogleDriveAuth): auth is GoogleDriveGoogleAuthAuth {
43
+ return auth.kind === "google_auth";
44
+ }
45
+
46
+ /**
47
+ * Type guard for oauth.
48
+ */
49
+ function isOAuthAuth(auth: GoogleDriveAuth): auth is GoogleDriveOAuthAuth {
50
+ return auth.kind === "oauth";
51
+ }
52
+
25
53
  export function normalizeGoogleDriveAuth(auth: GoogleDriveAuth): NormalizedAuth {
26
54
  if (!auth || typeof auth !== "object") {
27
55
  throw new Error("Google Drive auth is required");
28
56
  }
29
57
 
30
- const kind = (auth as any).kind;
31
- if (kind !== "oauth" && kind !== "service_account" && kind !== "google_auth") {
32
- throw new Error(`Unknown Google Drive auth kind: ${String(kind)}`);
33
- }
34
-
35
- if (kind === "google_auth") {
36
- const a = (auth as any).auth;
37
- if (!a) throw new Error('Google Drive auth.kind="google_auth" requires auth');
38
- return { kind: "google_auth", auth: a };
58
+ if (isGoogleAuth(auth)) {
59
+ if (!auth.auth) throw new Error('Google Drive auth.kind="google_auth" requires auth');
60
+ return { kind: "google_auth", auth: auth.auth };
39
61
  }
40
62
 
41
- if (kind === "service_account") {
42
- const raw = (auth as any).credentialsJson;
63
+ if (isServiceAccountAuth(auth)) {
64
+ const raw = auth.credentialsJson;
43
65
  if (!raw) {
44
66
  throw new Error(
45
67
  'Google Drive auth.kind="service_account" requires credentialsJson'
46
68
  );
47
69
  }
48
- const credentials =
49
- typeof raw === "string" ? (JSON.parse(raw) as Record<string, any>) : (raw as any);
70
+ const credentials: ServiceAccountCredentials =
71
+ typeof raw === "string" ? (JSON.parse(raw) as ServiceAccountCredentials) : raw;
50
72
  if (!credentials?.client_email || !credentials?.private_key) {
51
73
  throw new Error(
52
74
  'Google Drive service account credentials must include "client_email" and "private_key".'
@@ -55,29 +77,33 @@ export function normalizeGoogleDriveAuth(auth: GoogleDriveAuth): NormalizedAuth
55
77
  return {
56
78
  kind: "service_account",
57
79
  credentials,
58
- subject: (auth as any).subject ? String((auth as any).subject) : undefined,
80
+ subject: auth.subject ? String(auth.subject) : undefined,
59
81
  };
60
82
  }
61
83
 
62
- // oauth
63
- if ((auth as any).oauthClient) {
64
- return { kind: "oauth_client", oauthClient: (auth as any).oauthClient };
65
- }
84
+ if (isOAuthAuth(auth)) {
85
+ // oauth
86
+ if (auth.oauthClient) {
87
+ return { kind: "oauth_client", oauthClient: auth.oauthClient };
88
+ }
66
89
 
67
- const { clientId, clientSecret, redirectUri, refreshToken, accessToken } = auth as any;
68
- if (!clientId || !clientSecret || !redirectUri || !refreshToken) {
69
- throw new Error(
70
- 'Google Drive auth.kind="oauth" requires either oauthClient or { clientId, clientSecret, redirectUri, refreshToken }'
71
- );
90
+ const { clientId, clientSecret, redirectUri, refreshToken, accessToken } = auth;
91
+ if (!clientId || !clientSecret || !redirectUri || !refreshToken) {
92
+ throw new Error(
93
+ 'Google Drive auth.kind="oauth" requires either oauthClient or { clientId, clientSecret, redirectUri, refreshToken }'
94
+ );
95
+ }
96
+ return {
97
+ kind: "oauth_config",
98
+ clientId: String(clientId),
99
+ clientSecret: String(clientSecret),
100
+ redirectUri: String(redirectUri),
101
+ refreshToken: String(refreshToken),
102
+ ...(accessToken ? { accessToken: String(accessToken) } : {}),
103
+ };
72
104
  }
73
- return {
74
- kind: "oauth_config",
75
- clientId: String(clientId),
76
- clientSecret: String(clientSecret),
77
- redirectUri: String(redirectUri),
78
- refreshToken: String(refreshToken),
79
- ...(accessToken ? { accessToken: String(accessToken) } : {}),
80
- };
105
+
106
+ throw new Error(`Unknown Google Drive auth kind: ${String((auth as Record<string, unknown>).kind)}`);
81
107
  }
82
108
 
83
109
  const asMessage = (err: unknown) => {
@@ -89,6 +115,41 @@ const asMessage = (err: unknown) => {
89
115
  }
90
116
  };
91
117
 
118
+ /**
119
+ * Google Auth Library module shape for dynamic import.
120
+ */
121
+ interface GoogleAuthLibraryModule {
122
+ OAuth2Client?: new (
123
+ clientId: string,
124
+ clientSecret: string,
125
+ redirectUri: string
126
+ ) => {
127
+ setCredentials(credentials: Record<string, string>): void;
128
+ };
129
+ OAuth2?: new (
130
+ clientId: string,
131
+ clientSecret: string,
132
+ redirectUri: string
133
+ ) => {
134
+ setCredentials(credentials: Record<string, string>): void;
135
+ };
136
+ JWT?: new (options: {
137
+ email: string;
138
+ key: string;
139
+ scopes: string[];
140
+ subject?: string;
141
+ }) => unknown;
142
+ }
143
+
144
+ /**
145
+ * Googleapis module shape for dynamic import.
146
+ */
147
+ interface GoogleApisModule {
148
+ google: {
149
+ drive(options: { version: string; auth: unknown }): DriveClient;
150
+ };
151
+ }
152
+
92
153
  /**
93
154
  * Creates a Google Drive API client from a plug-and-play auth input.
94
155
  *
@@ -98,11 +159,11 @@ const asMessage = (err: unknown) => {
98
159
  export async function createGoogleDriveClient(args: {
99
160
  auth: GoogleDriveAuth;
100
161
  scopes?: string[];
101
- }): Promise<{ drive: any; authClient: any }> {
162
+ }): Promise<{ drive: DriveClient; authClient: AuthClient }> {
102
163
  const normalized = normalizeGoogleDriveAuth(args.auth);
103
164
  const scopes = (args.scopes?.length ? args.scopes : DEFAULT_DRIVE_SCOPES) as string[];
104
165
 
105
- let authClient: any;
166
+ let authClient: unknown;
106
167
 
107
168
  try {
108
169
  if (normalized.kind === "oauth_client") {
@@ -111,7 +172,7 @@ export async function createGoogleDriveClient(args: {
111
172
  authClient = normalized.auth;
112
173
  } else {
113
174
  // google-auth-library (dynamic)
114
- const gal: any = await import("google-auth-library");
175
+ const gal = (await import("google-auth-library")) as GoogleAuthLibraryModule;
115
176
 
116
177
  if (normalized.kind === "oauth_config") {
117
178
  const OAuth2Client = gal.OAuth2Client ?? gal.OAuth2;
@@ -143,7 +204,7 @@ export async function createGoogleDriveClient(args: {
143
204
  }
144
205
  }
145
206
 
146
- const { google }: any = await import("googleapis");
207
+ const { google } = (await import("googleapis")) as GoogleApisModule;
147
208
  if (!google?.drive) {
148
209
  throw new Error("googleapis.google.drive not found");
149
210
  }
@@ -153,7 +214,7 @@ export async function createGoogleDriveClient(args: {
153
214
  auth: authClient,
154
215
  });
155
216
 
156
- return { drive, authClient };
217
+ return { drive, authClient: authClient as AuthClient };
157
218
  } catch (err) {
158
219
  const msg = asMessage(err);
159
220
  if (
@@ -1,5 +1,6 @@
1
- import type { IngestResult } from "../../core";
1
+ import type { IngestResult, Metadata } from "../../core";
2
2
  import type { AssetInput } from "../../core/types";
3
+ import type { DriveClient, DriveFile } from "./_api-types";
3
4
  import { createGoogleDriveClient } from "./client";
4
5
  import {
5
6
  assetKindFromMediaType,
@@ -17,6 +18,25 @@ import type {
17
18
 
18
19
  const DEFAULT_MAX_BYTES = 15 * 1024 * 1024; // 15MB
19
20
 
21
+ /**
22
+ * Internal metadata type for Google Drive documents.
23
+ */
24
+ interface GoogleDriveMetadata extends Metadata {
25
+ connector: "google-drive";
26
+ kind: "file" | "folder" | "shortcut";
27
+ fileId: string;
28
+ name?: string;
29
+ mimeType?: string;
30
+ size?: number;
31
+ googleNativeKind?: string;
32
+ unsupportedGoogleMime?: boolean;
33
+ skippedTooLarge?: boolean;
34
+ exportedTooLarge?: boolean;
35
+ shortcutUnresolved?: boolean;
36
+ exportMimeType?: string;
37
+ exportFallback?: string;
38
+ }
39
+
20
40
  const joinPrefix = (prefix: string | undefined, rest: string) => {
21
41
  const p = (prefix ?? "").trim();
22
42
  if (!p) return rest;
@@ -44,10 +64,10 @@ const asMessage = (err: unknown) => {
44
64
  }
45
65
  };
46
66
 
47
- const toUint8Array = (data: any): Uint8Array => {
67
+ const toUint8Array = (data: unknown): Uint8Array => {
48
68
  if (!data) return new Uint8Array();
49
69
  if (data instanceof Uint8Array) return data;
50
- if (typeof Buffer !== "undefined" && data instanceof Buffer) {
70
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
51
71
  return new Uint8Array(data);
52
72
  }
53
73
  if (data instanceof ArrayBuffer) return new Uint8Array(data);
@@ -61,51 +81,54 @@ const toUint8Array = (data: any): Uint8Array => {
61
81
  return new Uint8Array();
62
82
  };
63
83
 
64
- const bytesToText = (bytes: Uint8Array) => {
84
+ const bytesToText = (bytes: Uint8Array): string => {
65
85
  return new TextDecoder("utf-8", { fatal: false }).decode(bytes);
66
86
  };
67
87
 
68
- const isNotFound = (err: any, treatForbiddenAsNotFound: boolean) => {
88
+ const isNotFound = (err: unknown, treatForbiddenAsNotFound: boolean): boolean => {
89
+ if (typeof err !== "object" || err === null) return false;
90
+ const e = err as Record<string, unknown>;
91
+ const response = e.response as Record<string, unknown> | undefined;
69
92
  const status =
70
- Number(err?.code ?? err?.status ?? err?.response?.status ?? err?.statusCode) ||
71
- Number(err?.response?.status);
93
+ Number(e.code ?? e.status ?? response?.status ?? e.statusCode ?? 0);
72
94
  if (status === 404) return true;
73
95
  if (treatForbiddenAsNotFound && status === 403) return true;
74
96
  return false;
75
97
  };
76
98
 
77
- async function getFileMetadata(drive: any, fileId: string) {
99
+ async function getFileMetadata(drive: DriveClient, fileId: string): Promise<DriveFile> {
78
100
  const res = await drive.files.get({
79
101
  fileId,
80
102
  supportsAllDrives: true,
81
103
  fields:
82
104
  "id,name,mimeType,size,md5Checksum,modifiedTime,webViewLink,webContentLink,iconLink,shortcutDetails,driveId",
83
105
  });
84
- return res?.data ?? {};
106
+ return (res?.data ?? {}) as DriveFile;
85
107
  }
86
108
 
87
- async function downloadFileBytes(drive: any, fileId: string): Promise<Uint8Array> {
88
- const res = await drive.files.get(
89
- { fileId, alt: "media", supportsAllDrives: true },
90
- { responseType: "arraybuffer" }
91
- );
109
+ async function downloadFileBytes(drive: DriveClient, fileId: string): Promise<Uint8Array> {
110
+ const res = await drive.files.get({
111
+ fileId,
112
+ alt: "media",
113
+ supportsAllDrives: true,
114
+ });
92
115
  return toUint8Array(res?.data);
93
116
  }
94
117
 
95
118
  async function exportFileBytes(
96
- drive: any,
119
+ drive: DriveClient,
97
120
  fileId: string,
98
121
  mimeType: string
99
122
  ): Promise<Uint8Array> {
100
- const res = await drive.files.export(
101
- { fileId, mimeType },
102
- { responseType: "arraybuffer" }
103
- );
123
+ const res = await drive.files.export({
124
+ fileId,
125
+ mimeType,
126
+ });
104
127
  return toUint8Array(res?.data);
105
128
  }
106
129
 
107
130
  export async function loadGoogleDriveFileDocument(args: {
108
- drive: any;
131
+ drive: DriveClient;
109
132
  fileId: string;
110
133
  sourceIdPrefix?: string;
111
134
  options?: {
@@ -128,21 +151,22 @@ export async function loadGoogleDriveFileDocument(args: {
128
151
 
129
152
  // Handle folders: return a document shape but with no content/assets; callers typically skip.
130
153
  if (classification.kind === "folder") {
154
+ const folderMetadata: GoogleDriveMetadata = {
155
+ connector: "google-drive",
156
+ kind: "folder",
157
+ fileId,
158
+ name,
159
+ mimeType: DRIVE_MIME.folder,
160
+ ...(meta?.webViewLink ? { webViewLink: String(meta.webViewLink) } : {}),
161
+ ...(meta?.modifiedTime ? { modifiedTime: String(meta.modifiedTime) } : {}),
162
+ };
131
163
  return buildGoogleDriveFileIngestInput({
132
164
  fileId,
133
165
  sourceIdPrefix: args.sourceIdPrefix,
134
166
  content: "",
135
167
  assets: [],
136
- metadata: {
137
- connector: "google-drive",
138
- kind: "folder",
139
- fileId,
140
- name,
141
- mimeType: DRIVE_MIME.folder,
142
- ...(meta?.webViewLink ? { webViewLink: String(meta.webViewLink) } : {}),
143
- ...(meta?.modifiedTime ? { modifiedTime: String(meta.modifiedTime) } : {}),
144
- },
145
- }) as any;
168
+ metadata: folderMetadata,
169
+ });
146
170
  }
147
171
 
148
172
  // Shortcuts: resolve to target if possible (1-level), otherwise let caller decide.
@@ -150,20 +174,21 @@ export async function loadGoogleDriveFileDocument(args: {
150
174
  const visited = args._visited ?? new Set<string>();
151
175
  if (visited.has(fileId)) {
152
176
  // cycle
177
+ const cycleMetadata: GoogleDriveMetadata = {
178
+ connector: "google-drive",
179
+ kind: "shortcut",
180
+ fileId,
181
+ name,
182
+ mimeType: DRIVE_MIME.shortcut,
183
+ shortcutUnresolved: true,
184
+ };
153
185
  return buildGoogleDriveFileIngestInput({
154
186
  fileId,
155
187
  sourceIdPrefix: args.sourceIdPrefix,
156
188
  content: "",
157
189
  assets: [],
158
- metadata: {
159
- connector: "google-drive",
160
- kind: "shortcut",
161
- fileId,
162
- name,
163
- mimeType: DRIVE_MIME.shortcut,
164
- shortcutUnresolved: true,
165
- },
166
- }) as any;
190
+ metadata: cycleMetadata,
191
+ });
167
192
  }
168
193
  visited.add(fileId);
169
194
 
@@ -172,20 +197,21 @@ export async function loadGoogleDriveFileDocument(args: {
172
197
  : "";
173
198
 
174
199
  if (!targetId) {
200
+ const unresolvedMetadata: GoogleDriveMetadata = {
201
+ connector: "google-drive",
202
+ kind: "shortcut",
203
+ fileId,
204
+ name,
205
+ mimeType: DRIVE_MIME.shortcut,
206
+ shortcutUnresolved: true,
207
+ };
175
208
  return buildGoogleDriveFileIngestInput({
176
209
  fileId,
177
210
  sourceIdPrefix: args.sourceIdPrefix,
178
211
  content: "",
179
212
  assets: [],
180
- metadata: {
181
- connector: "google-drive",
182
- kind: "shortcut",
183
- fileId,
184
- name,
185
- mimeType: DRIVE_MIME.shortcut,
186
- shortcutUnresolved: true,
187
- },
188
- }) as any;
213
+ metadata: unresolvedMetadata,
214
+ });
189
215
  }
190
216
 
191
217
  // Resolve target content/assets but keep sourceId stable to the shortcut file id.
@@ -209,7 +235,7 @@ export async function loadGoogleDriveFileDocument(args: {
209
235
  };
210
236
  }
211
237
 
212
- const baseMetadata = {
238
+ const baseMetadata: Record<string, unknown> = {
213
239
  connector: "google-drive",
214
240
  kind: "file",
215
241
  fileId,
@@ -222,7 +248,7 @@ export async function loadGoogleDriveFileDocument(args: {
222
248
  ...(meta?.webContentLink ? { webContentLink: String(meta.webContentLink) } : {}),
223
249
  ...(meta?.iconLink ? { iconLink: String(meta.iconLink) } : {}),
224
250
  ...(meta?.driveId ? { driveId: String(meta.driveId) } : {}),
225
- } as const;
251
+ };
226
252
 
227
253
  // Google-native export path
228
254
  if (classification.kind === "google_native") {
@@ -238,7 +264,7 @@ export async function loadGoogleDriveFileDocument(args: {
238
264
  googleNativeKind: classification.nativeKind,
239
265
  unsupportedGoogleMime: true,
240
266
  },
241
- }) as any;
267
+ });
242
268
  }
243
269
 
244
270
  // For content export, enforce maxBytesPerFile by bytes length.
@@ -252,7 +278,7 @@ export async function loadGoogleDriveFileDocument(args: {
252
278
  content: "",
253
279
  assets: [],
254
280
  metadata: { ...baseMetadata, exportedTooLarge: true },
255
- }) as any;
281
+ });
256
282
  }
257
283
  const content = bytesToText(bytes).trim();
258
284
  return buildGoogleDriveFileIngestInput({
@@ -261,7 +287,7 @@ export async function loadGoogleDriveFileDocument(args: {
261
287
  content,
262
288
  assets: [],
263
289
  metadata: { ...baseMetadata, googleNativeKind: classification.nativeKind, exportMimeType: plan.mimeType },
264
- }) as any;
290
+ });
265
291
  } catch (err) {
266
292
  // Slides can fail to export as text; fallback to PPTX unless strict.
267
293
  if (classification.nativeKind === "slides" && !strictNativeExport) {
@@ -274,7 +300,7 @@ export async function loadGoogleDriveFileDocument(args: {
274
300
  content: "",
275
301
  assets: [],
276
302
  metadata: { ...baseMetadata, exportedTooLarge: true },
277
- }) as any;
303
+ });
278
304
  }
279
305
  const asset: AssetInput = {
280
306
  assetId: fileId,
@@ -286,7 +312,7 @@ export async function loadGoogleDriveFileDocument(args: {
286
312
  filename: name ? `${name}.pptx` : undefined,
287
313
  },
288
314
  uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
289
- metadata: { connector: "google-drive", fileId, exportMimeType: EXPORT_MIME.pptx } as any,
315
+ metadata: { connector: "google-drive", fileId, exportMimeType: EXPORT_MIME.pptx },
290
316
  };
291
317
  return buildGoogleDriveFileIngestInput({
292
318
  fileId,
@@ -294,7 +320,7 @@ export async function loadGoogleDriveFileDocument(args: {
294
320
  content: "",
295
321
  assets: [asset],
296
322
  metadata: { ...baseMetadata, googleNativeKind: "slides", exportFallback: "pptx" },
297
- }) as any;
323
+ });
298
324
  } catch {
299
325
  // fall through to strict error
300
326
  }
@@ -314,7 +340,7 @@ export async function loadGoogleDriveFileDocument(args: {
314
340
  content: "",
315
341
  assets: [],
316
342
  metadata: { ...baseMetadata, exportedTooLarge: true },
317
- }) as any;
343
+ });
318
344
  }
319
345
 
320
346
  const filename = name && plan.filenameExt ? `${name}.${plan.filenameExt}` : name || undefined;
@@ -323,7 +349,7 @@ export async function loadGoogleDriveFileDocument(args: {
323
349
  kind: plan.assetKind,
324
350
  data: { kind: "bytes", bytes, mediaType: plan.mimeType, ...(filename ? { filename } : {}) },
325
351
  uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
326
- metadata: { connector: "google-drive", fileId, exportMimeType: plan.mimeType } as any,
352
+ metadata: { connector: "google-drive", fileId, exportMimeType: plan.mimeType },
327
353
  };
328
354
 
329
355
  return buildGoogleDriveFileIngestInput({
@@ -332,7 +358,7 @@ export async function loadGoogleDriveFileDocument(args: {
332
358
  content: "",
333
359
  assets: [asset],
334
360
  metadata: { ...baseMetadata, googleNativeKind: classification.nativeKind, exportMimeType: plan.mimeType },
335
- }) as any;
361
+ });
336
362
  }
337
363
  }
338
364
 
@@ -344,7 +370,7 @@ export async function loadGoogleDriveFileDocument(args: {
344
370
  content: "",
345
371
  assets: [],
346
372
  metadata: { ...baseMetadata, skippedTooLarge: true },
347
- }) as any;
373
+ });
348
374
  }
349
375
 
350
376
  const bytes = await downloadFileBytes(args.drive, fileId);
@@ -355,7 +381,7 @@ export async function loadGoogleDriveFileDocument(args: {
355
381
  content: "",
356
382
  assets: [],
357
383
  metadata: { ...baseMetadata, skippedTooLarge: true },
358
- }) as any;
384
+ });
359
385
  }
360
386
 
361
387
  const assetKind = assetKindFromMediaType(mimeType);
@@ -370,7 +396,7 @@ export async function loadGoogleDriveFileDocument(args: {
370
396
  ...(filename ? { filename } : {}),
371
397
  },
372
398
  uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
373
- metadata: { connector: "google-drive", fileId, name, mimeType } as any,
399
+ metadata: { connector: "google-drive", fileId, name, mimeType },
374
400
  };
375
401
 
376
402
  // For pure binaries, keep content empty; extraction occurs via engine asset processing + extractors.
@@ -379,8 +405,8 @@ export async function loadGoogleDriveFileDocument(args: {
379
405
  sourceIdPrefix: args.sourceIdPrefix,
380
406
  content: "",
381
407
  assets: [asset],
382
- metadata: baseMetadata as any,
383
- }) as any;
408
+ metadata: baseMetadata,
409
+ });
384
410
  }
385
411
 
386
412
  export async function syncGoogleDriveFiles(
@@ -434,8 +460,10 @@ export async function syncGoogleDriveFiles(
434
460
  },
435
461
  });
436
462
 
463
+ const meta = doc.metadata as Record<string, unknown>;
464
+
437
465
  // Skip folders explicitly (v1).
438
- if ((doc.metadata as any)?.kind === "folder") {
466
+ if (meta.kind === "folder") {
439
467
  emit({
440
468
  type: "file:skipped",
441
469
  fileId,
@@ -446,7 +474,7 @@ export async function syncGoogleDriveFiles(
446
474
  continue;
447
475
  }
448
476
 
449
- if ((doc.metadata as any)?.unsupportedGoogleMime) {
477
+ if (meta.unsupportedGoogleMime) {
450
478
  emit({
451
479
  type: "file:skipped",
452
480
  fileId,
@@ -458,7 +486,7 @@ export async function syncGoogleDriveFiles(
458
486
  continue;
459
487
  }
460
488
 
461
- if ((doc.metadata as any)?.skippedTooLarge || (doc.metadata as any)?.exportedTooLarge) {
489
+ if (meta.skippedTooLarge || meta.exportedTooLarge) {
462
490
  emit({
463
491
  type: "file:skipped",
464
492
  fileId,
@@ -469,7 +497,7 @@ export async function syncGoogleDriveFiles(
469
497
  continue;
470
498
  }
471
499
 
472
- if ((doc.metadata as any)?.shortcutUnresolved) {
500
+ if (meta.shortcutUnresolved) {
473
501
  emit({
474
502
  type: "file:skipped",
475
503
  fileId,
@@ -484,7 +512,7 @@ export async function syncGoogleDriveFiles(
484
512
  sourceId: doc.sourceId,
485
513
  content: doc.content,
486
514
  assets: doc.assets,
487
- metadata: doc.metadata as any,
515
+ metadata: doc.metadata,
488
516
  });
489
517
 
490
518
  succeeded += 1;