unrag 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -455,6 +455,10 @@ function depsForConnector(connector) {
455
455
  if (connector === "notion") {
456
456
  deps["@notionhq/client"] = "^2.2.16";
457
457
  }
458
+ if (connector === "google-drive") {
459
+ deps["googleapis"] = "^148.0.0";
460
+ deps["google-auth-library"] = "^10.0.0";
461
+ }
458
462
  return { deps, devDeps };
459
463
  }
460
464
  function depsForExtractor(extractor) {
@@ -894,6 +898,7 @@ var AVAILABLE_EXTRACTORS2 = [
894
898
  "file-pptx",
895
899
  "file-xlsx"
896
900
  ];
901
+ var AVAILABLE_CONNECTORS = ["notion", "google-drive"];
897
902
  var parseAddArgs = (args) => {
898
903
  const out = {};
899
904
  for (let i = 0;i < args.length; i++) {
@@ -932,7 +937,7 @@ async function addCommand(args) {
932
937
  " unrag add <connector>",
933
938
  " unrag add extractor <name>",
934
939
  "",
935
- "Available connectors: notion",
940
+ `Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`,
936
941
  `Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`
937
942
  ].join(`
938
943
  `));
@@ -952,10 +957,10 @@ async function addCommand(args) {
952
957
  const pkg = await readPackageJson(root);
953
958
  if (kind === "connector") {
954
959
  const connector = name;
955
- if (connector !== "notion") {
960
+ if (!connector || !AVAILABLE_CONNECTORS.includes(connector)) {
956
961
  outro2(`Unknown connector: ${name}
957
962
 
958
- Available connectors: notion`);
963
+ Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`);
959
964
  return;
960
965
  }
961
966
  await copyConnectorFiles({
@@ -979,7 +984,7 @@ Available connectors: notion`);
979
984
  `- Docs: ${docsUrl(`/docs/connectors/${connector}`)}`,
980
985
  "",
981
986
  merged2.changes.length > 0 ? `Added deps: ${merged2.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
982
- nonInteractive ? "" : "Tip: keep NOTION_TOKEN server-side only (env var)."
987
+ nonInteractive ? "" : connector === "notion" ? "Tip: keep NOTION_TOKEN server-side only (env var)." : connector === "google-drive" ? "Tip: keep Google OAuth refresh tokens and service account keys server-side only." : ""
983
988
  ].filter(Boolean).join(`
984
989
  `));
985
990
  return;
package/package.json CHANGED
@@ -6,7 +6,7 @@
6
6
  "bin": {
7
7
  "unrag": "./dist/cli/index.js"
8
8
  },
9
- "version": "0.2.4",
9
+ "version": "0.2.5",
10
10
  "private": false,
11
11
  "license": "Apache-2.0",
12
12
  "devDependencies": {
@@ -0,0 +1,171 @@
1
+ import type { GoogleDriveAuth } from "./types";
2
+
3
+ export const DEFAULT_DRIVE_SCOPES = [
4
+ "https://www.googleapis.com/auth/drive.readonly",
5
+ "https://www.googleapis.com/auth/drive.metadata.readonly",
6
+ ] as const;
7
+
8
+ type NormalizedAuth =
9
+ | { kind: "oauth_client"; oauthClient: unknown }
10
+ | {
11
+ kind: "oauth_config";
12
+ clientId: string;
13
+ clientSecret: string;
14
+ redirectUri: string;
15
+ refreshToken: string;
16
+ accessToken?: string;
17
+ }
18
+ | {
19
+ kind: "service_account";
20
+ credentials: Record<string, any>;
21
+ subject?: string;
22
+ }
23
+ | { kind: "google_auth"; auth: unknown };
24
+
25
+ export function normalizeGoogleDriveAuth(auth: GoogleDriveAuth): NormalizedAuth {
26
+ if (!auth || typeof auth !== "object") {
27
+ throw new Error("Google Drive auth is required");
28
+ }
29
+
30
+ const kind = (auth as any).kind;
31
+ if (kind !== "oauth" && kind !== "service_account" && kind !== "google_auth") {
32
+ throw new Error(`Unknown Google Drive auth kind: ${String(kind)}`);
33
+ }
34
+
35
+ if (kind === "google_auth") {
36
+ const a = (auth as any).auth;
37
+ if (!a) throw new Error('Google Drive auth.kind="google_auth" requires auth');
38
+ return { kind: "google_auth", auth: a };
39
+ }
40
+
41
+ if (kind === "service_account") {
42
+ const raw = (auth as any).credentialsJson;
43
+ if (!raw) {
44
+ throw new Error(
45
+ 'Google Drive auth.kind="service_account" requires credentialsJson'
46
+ );
47
+ }
48
+ const credentials =
49
+ typeof raw === "string" ? (JSON.parse(raw) as Record<string, any>) : (raw as any);
50
+ if (!credentials?.client_email || !credentials?.private_key) {
51
+ throw new Error(
52
+ 'Google Drive service account credentials must include "client_email" and "private_key".'
53
+ );
54
+ }
55
+ return {
56
+ kind: "service_account",
57
+ credentials,
58
+ subject: (auth as any).subject ? String((auth as any).subject) : undefined,
59
+ };
60
+ }
61
+
62
+ // oauth
63
+ if ((auth as any).oauthClient) {
64
+ return { kind: "oauth_client", oauthClient: (auth as any).oauthClient };
65
+ }
66
+
67
+ const { clientId, clientSecret, redirectUri, refreshToken, accessToken } = auth as any;
68
+ if (!clientId || !clientSecret || !redirectUri || !refreshToken) {
69
+ throw new Error(
70
+ 'Google Drive auth.kind="oauth" requires either oauthClient or { clientId, clientSecret, redirectUri, refreshToken }'
71
+ );
72
+ }
73
+ return {
74
+ kind: "oauth_config",
75
+ clientId: String(clientId),
76
+ clientSecret: String(clientSecret),
77
+ redirectUri: String(redirectUri),
78
+ refreshToken: String(refreshToken),
79
+ ...(accessToken ? { accessToken: String(accessToken) } : {}),
80
+ };
81
+ }
82
+
83
+ const asMessage = (err: unknown) => {
84
+ if (err instanceof Error) return err.message;
85
+ try {
86
+ return typeof err === "string" ? err : JSON.stringify(err);
87
+ } catch {
88
+ return String(err);
89
+ }
90
+ };
91
+
92
+ /**
93
+ * Creates a Google Drive API client from a plug-and-play auth input.
94
+ *
95
+ * Note: This uses dynamic imports so the core Unrag package does not require
96
+ * Google dependencies unless the connector is installed into a user project.
97
+ */
98
+ export async function createGoogleDriveClient(args: {
99
+ auth: GoogleDriveAuth;
100
+ scopes?: string[];
101
+ }): Promise<{ drive: any; authClient: any }> {
102
+ const normalized = normalizeGoogleDriveAuth(args.auth);
103
+ const scopes = (args.scopes?.length ? args.scopes : DEFAULT_DRIVE_SCOPES) as string[];
104
+
105
+ let authClient: any;
106
+
107
+ try {
108
+ if (normalized.kind === "oauth_client") {
109
+ authClient = normalized.oauthClient;
110
+ } else if (normalized.kind === "google_auth") {
111
+ authClient = normalized.auth;
112
+ } else {
113
+ // google-auth-library (dynamic)
114
+ const gal: any = await import("google-auth-library");
115
+
116
+ if (normalized.kind === "oauth_config") {
117
+ const OAuth2Client = gal.OAuth2Client ?? gal.OAuth2;
118
+ if (!OAuth2Client) {
119
+ throw new Error("OAuth2Client not found in google-auth-library");
120
+ }
121
+ const client = new OAuth2Client(
122
+ normalized.clientId,
123
+ normalized.clientSecret,
124
+ normalized.redirectUri
125
+ );
126
+ client.setCredentials({
127
+ refresh_token: normalized.refreshToken,
128
+ ...(normalized.accessToken ? { access_token: normalized.accessToken } : {}),
129
+ });
130
+ authClient = client;
131
+ } else {
132
+ const JWT = gal.JWT;
133
+ if (!JWT) {
134
+ throw new Error("JWT not found in google-auth-library");
135
+ }
136
+ const c = normalized.credentials;
137
+ authClient = new JWT({
138
+ email: c.client_email,
139
+ key: c.private_key,
140
+ scopes,
141
+ ...(normalized.subject ? { subject: normalized.subject } : {}),
142
+ });
143
+ }
144
+ }
145
+
146
+ const { google }: any = await import("googleapis");
147
+ if (!google?.drive) {
148
+ throw new Error("googleapis.google.drive not found");
149
+ }
150
+
151
+ const drive = google.drive({
152
+ version: "v3",
153
+ auth: authClient,
154
+ });
155
+
156
+ return { drive, authClient };
157
+ } catch (err) {
158
+ const msg = asMessage(err);
159
+ if (
160
+ msg.includes("Cannot find module") &&
161
+ (msg.includes("googleapis") || msg.includes("google-auth-library"))
162
+ ) {
163
+ throw new Error(
164
+ `Missing Google Drive connector dependencies. Ensure you've installed the connector via \`unrag add google-drive\` (which adds "googleapis" and "google-auth-library"). Original error: ${msg}`
165
+ );
166
+ }
167
+ throw err;
168
+ }
169
+ }
170
+
171
+
@@ -0,0 +1,10 @@
1
+ export { createGoogleDriveClient } from "./client";
2
+ export {
3
+ loadGoogleDriveFileDocument,
4
+ syncGoogleDriveFiles,
5
+ buildGoogleDriveFileIngestInput,
6
+ } from "./sync";
7
+ export * from "./types";
8
+ export * from "./mime";
9
+
10
+
@@ -0,0 +1,76 @@
1
+ import type { AssetKind } from "../../core/types";
2
+
3
+ export const DRIVE_MIME = {
4
+ folder: "application/vnd.google-apps.folder",
5
+ shortcut: "application/vnd.google-apps.shortcut",
6
+ doc: "application/vnd.google-apps.document",
7
+ sheet: "application/vnd.google-apps.spreadsheet",
8
+ slides: "application/vnd.google-apps.presentation",
9
+ drawing: "application/vnd.google-apps.drawing",
10
+ } as const;
11
+
12
+ export const EXPORT_MIME = {
13
+ text: "text/plain",
14
+ csv: "text/csv",
15
+ pptx:
16
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
17
+ png: "image/png",
18
+ } as const;
19
+
20
+ export type DriveGoogleNativeKind = "doc" | "sheet" | "slides" | "drawing";
21
+
22
+ export type DriveMimeClassification =
23
+ | { kind: "folder" }
24
+ | { kind: "shortcut" }
25
+ | { kind: "google_native"; nativeKind: DriveGoogleNativeKind }
26
+ | { kind: "binary" };
27
+
28
+ export function classifyDriveMimeType(mimeType: string | undefined): DriveMimeClassification {
29
+ const mt = String(mimeType ?? "").trim();
30
+ if (!mt) return { kind: "binary" };
31
+
32
+ if (mt === DRIVE_MIME.folder) return { kind: "folder" };
33
+ if (mt === DRIVE_MIME.shortcut) return { kind: "shortcut" };
34
+
35
+ if (mt === DRIVE_MIME.doc) return { kind: "google_native", nativeKind: "doc" };
36
+ if (mt === DRIVE_MIME.sheet) return { kind: "google_native", nativeKind: "sheet" };
37
+ if (mt === DRIVE_MIME.slides) return { kind: "google_native", nativeKind: "slides" };
38
+ if (mt === DRIVE_MIME.drawing) return { kind: "google_native", nativeKind: "drawing" };
39
+
40
+ return { kind: "binary" };
41
+ }
42
+
43
+ export type DriveNativeExportPlan =
44
+ | { kind: "content"; mimeType: string }
45
+ | {
46
+ kind: "asset";
47
+ assetKind: AssetKind;
48
+ mimeType: string;
49
+ filenameExt?: string;
50
+ }
51
+ | { kind: "unsupported" };
52
+
53
+ /**
54
+ * Default behavior (Notion-like): Google-native files are exported to text-ish content.
55
+ * Drawings are exported as PNG image assets (no good text representation).
56
+ */
57
+ export function getNativeExportPlan(nativeKind: DriveGoogleNativeKind): DriveNativeExportPlan {
58
+ if (nativeKind === "doc") return { kind: "content", mimeType: EXPORT_MIME.text };
59
+ if (nativeKind === "sheet") return { kind: "content", mimeType: EXPORT_MIME.csv };
60
+ if (nativeKind === "slides") return { kind: "content", mimeType: EXPORT_MIME.text };
61
+ if (nativeKind === "drawing") {
62
+ return { kind: "asset", assetKind: "image", mimeType: EXPORT_MIME.png, filenameExt: "png" };
63
+ }
64
+ return { kind: "unsupported" };
65
+ }
66
+
67
+ export function assetKindFromMediaType(mediaType: string | undefined): AssetKind {
68
+ const mt = String(mediaType ?? "").trim().toLowerCase();
69
+ if (mt === "application/pdf") return "pdf";
70
+ if (mt.startsWith("image/")) return "image";
71
+ if (mt.startsWith("audio/")) return "audio";
72
+ if (mt.startsWith("video/")) return "video";
73
+ return "file";
74
+ }
75
+
76
+
@@ -0,0 +1,528 @@
1
+ import type { IngestResult } from "../../core";
2
+ import type { AssetInput } from "../../core/types";
3
+ import { createGoogleDriveClient } from "./client";
4
+ import {
5
+ assetKindFromMediaType,
6
+ classifyDriveMimeType,
7
+ EXPORT_MIME,
8
+ getNativeExportPlan,
9
+ DRIVE_MIME,
10
+ } from "./mime";
11
+ import type {
12
+ BuildGoogleDriveFileIngestInputArgs,
13
+ GoogleDriveFileDocument,
14
+ GoogleDriveSyncProgressEvent,
15
+ SyncGoogleDriveFilesInput,
16
+ } from "./types";
17
+
18
+ const DEFAULT_MAX_BYTES = 15 * 1024 * 1024; // 15MB
19
+
20
+ const joinPrefix = (prefix: string | undefined, rest: string) => {
21
+ const p = (prefix ?? "").trim();
22
+ if (!p) return rest;
23
+ return p.endsWith(":") ? p + rest : p + ":" + rest;
24
+ };
25
+
26
+ export function buildGoogleDriveFileIngestInput(
27
+ args: BuildGoogleDriveFileIngestInputArgs
28
+ ) {
29
+ const sourceId = joinPrefix(args.sourceIdPrefix, `gdrive:file:${args.fileId}`);
30
+ return {
31
+ sourceId,
32
+ content: args.content,
33
+ metadata: args.metadata ?? {},
34
+ assets: args.assets ?? [],
35
+ };
36
+ }
37
+
38
+ const asMessage = (err: unknown) => {
39
+ if (err instanceof Error) return err.message;
40
+ try {
41
+ return typeof err === "string" ? err : JSON.stringify(err);
42
+ } catch {
43
+ return String(err);
44
+ }
45
+ };
46
+
47
+ const toUint8Array = (data: any): Uint8Array => {
48
+ if (!data) return new Uint8Array();
49
+ if (data instanceof Uint8Array) return data;
50
+ if (typeof Buffer !== "undefined" && data instanceof Buffer) {
51
+ return new Uint8Array(data);
52
+ }
53
+ if (data instanceof ArrayBuffer) return new Uint8Array(data);
54
+ if (ArrayBuffer.isView(data)) {
55
+ return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
56
+ }
57
+ // Axios can hand back a string for some responseTypes; treat as utf-8 bytes.
58
+ if (typeof data === "string") {
59
+ return new TextEncoder().encode(data);
60
+ }
61
+ return new Uint8Array();
62
+ };
63
+
64
+ const bytesToText = (bytes: Uint8Array) => {
65
+ return new TextDecoder("utf-8", { fatal: false }).decode(bytes);
66
+ };
67
+
68
+ const isNotFound = (err: any, treatForbiddenAsNotFound: boolean) => {
69
+ const status =
70
+ Number(err?.code ?? err?.status ?? err?.response?.status ?? err?.statusCode) ||
71
+ Number(err?.response?.status);
72
+ if (status === 404) return true;
73
+ if (treatForbiddenAsNotFound && status === 403) return true;
74
+ return false;
75
+ };
76
+
77
+ async function getFileMetadata(drive: any, fileId: string) {
78
+ const res = await drive.files.get({
79
+ fileId,
80
+ supportsAllDrives: true,
81
+ fields:
82
+ "id,name,mimeType,size,md5Checksum,modifiedTime,webViewLink,webContentLink,iconLink,shortcutDetails,driveId",
83
+ });
84
+ return res?.data ?? {};
85
+ }
86
+
87
+ async function downloadFileBytes(drive: any, fileId: string): Promise<Uint8Array> {
88
+ const res = await drive.files.get(
89
+ { fileId, alt: "media", supportsAllDrives: true },
90
+ { responseType: "arraybuffer" }
91
+ );
92
+ return toUint8Array(res?.data);
93
+ }
94
+
95
+ async function exportFileBytes(
96
+ drive: any,
97
+ fileId: string,
98
+ mimeType: string
99
+ ): Promise<Uint8Array> {
100
+ const res = await drive.files.export(
101
+ { fileId, mimeType },
102
+ { responseType: "arraybuffer" }
103
+ );
104
+ return toUint8Array(res?.data);
105
+ }
106
+
107
+ export async function loadGoogleDriveFileDocument(args: {
108
+ drive: any;
109
+ fileId: string;
110
+ sourceIdPrefix?: string;
111
+ options?: {
112
+ maxBytesPerFile?: number;
113
+ strictNativeExport?: boolean;
114
+ };
115
+ /** internal: recursion guard for shortcuts */
116
+ _visited?: Set<string>;
117
+ }): Promise<GoogleDriveFileDocument> {
118
+ const maxBytesPerFile = args.options?.maxBytesPerFile ?? DEFAULT_MAX_BYTES;
119
+ const strictNativeExport = Boolean(args.options?.strictNativeExport ?? false);
120
+
121
+ const meta = await getFileMetadata(args.drive, args.fileId);
122
+ const fileId = String(meta?.id ?? args.fileId);
123
+ const name = String(meta?.name ?? "");
124
+ const mimeType = String(meta?.mimeType ?? "");
125
+ const size = meta?.size !== undefined ? Number(meta.size) : undefined;
126
+
127
+ const classification = classifyDriveMimeType(mimeType);
128
+
129
+ // Handle folders: return a document shape but with no content/assets; callers typically skip.
130
+ if (classification.kind === "folder") {
131
+ return buildGoogleDriveFileIngestInput({
132
+ fileId,
133
+ sourceIdPrefix: args.sourceIdPrefix,
134
+ content: "",
135
+ assets: [],
136
+ metadata: {
137
+ connector: "google-drive",
138
+ kind: "folder",
139
+ fileId,
140
+ name,
141
+ mimeType: DRIVE_MIME.folder,
142
+ ...(meta?.webViewLink ? { webViewLink: String(meta.webViewLink) } : {}),
143
+ ...(meta?.modifiedTime ? { modifiedTime: String(meta.modifiedTime) } : {}),
144
+ },
145
+ }) as any;
146
+ }
147
+
148
+ // Shortcuts: resolve to target if possible (1-level), otherwise let caller decide.
149
+ if (classification.kind === "shortcut") {
150
+ const visited = args._visited ?? new Set<string>();
151
+ if (visited.has(fileId)) {
152
+ // cycle
153
+ return buildGoogleDriveFileIngestInput({
154
+ fileId,
155
+ sourceIdPrefix: args.sourceIdPrefix,
156
+ content: "",
157
+ assets: [],
158
+ metadata: {
159
+ connector: "google-drive",
160
+ kind: "shortcut",
161
+ fileId,
162
+ name,
163
+ mimeType: DRIVE_MIME.shortcut,
164
+ shortcutUnresolved: true,
165
+ },
166
+ }) as any;
167
+ }
168
+ visited.add(fileId);
169
+
170
+ const targetId = meta?.shortcutDetails?.targetId
171
+ ? String(meta.shortcutDetails.targetId)
172
+ : "";
173
+
174
+ if (!targetId) {
175
+ return buildGoogleDriveFileIngestInput({
176
+ fileId,
177
+ sourceIdPrefix: args.sourceIdPrefix,
178
+ content: "",
179
+ assets: [],
180
+ metadata: {
181
+ connector: "google-drive",
182
+ kind: "shortcut",
183
+ fileId,
184
+ name,
185
+ mimeType: DRIVE_MIME.shortcut,
186
+ shortcutUnresolved: true,
187
+ },
188
+ }) as any;
189
+ }
190
+
191
+ // Resolve target content/assets but keep sourceId stable to the shortcut file id.
192
+ const targetDoc = await loadGoogleDriveFileDocument({
193
+ drive: args.drive,
194
+ fileId: targetId,
195
+ sourceIdPrefix: args.sourceIdPrefix,
196
+ options: args.options,
197
+ _visited: visited,
198
+ });
199
+
200
+ return {
201
+ ...targetDoc,
202
+ sourceId: joinPrefix(args.sourceIdPrefix, `gdrive:file:${fileId}`),
203
+ metadata: {
204
+ ...(targetDoc.metadata ?? {}),
205
+ connector: "google-drive",
206
+ shortcutFileId: fileId,
207
+ shortcutTargetId: targetId,
208
+ },
209
+ };
210
+ }
211
+
212
+ const baseMetadata = {
213
+ connector: "google-drive",
214
+ kind: "file",
215
+ fileId,
216
+ name,
217
+ mimeType,
218
+ ...(Number.isFinite(size) ? { size } : {}),
219
+ ...(meta?.md5Checksum ? { md5Checksum: String(meta.md5Checksum) } : {}),
220
+ ...(meta?.modifiedTime ? { modifiedTime: String(meta.modifiedTime) } : {}),
221
+ ...(meta?.webViewLink ? { webViewLink: String(meta.webViewLink) } : {}),
222
+ ...(meta?.webContentLink ? { webContentLink: String(meta.webContentLink) } : {}),
223
+ ...(meta?.iconLink ? { iconLink: String(meta.iconLink) } : {}),
224
+ ...(meta?.driveId ? { driveId: String(meta.driveId) } : {}),
225
+ } as const;
226
+
227
+ // Google-native export path
228
+ if (classification.kind === "google_native") {
229
+ const plan = getNativeExportPlan(classification.nativeKind);
230
+ if (plan.kind === "unsupported") {
231
+ return buildGoogleDriveFileIngestInput({
232
+ fileId,
233
+ sourceIdPrefix: args.sourceIdPrefix,
234
+ content: "",
235
+ assets: [],
236
+ metadata: {
237
+ ...baseMetadata,
238
+ googleNativeKind: classification.nativeKind,
239
+ unsupportedGoogleMime: true,
240
+ },
241
+ }) as any;
242
+ }
243
+
244
+ // For content export, enforce maxBytesPerFile by bytes length.
245
+ if (plan.kind === "content") {
246
+ try {
247
+ const bytes = await exportFileBytes(args.drive, fileId, plan.mimeType);
248
+ if (bytes.byteLength > maxBytesPerFile) {
249
+ return buildGoogleDriveFileIngestInput({
250
+ fileId,
251
+ sourceIdPrefix: args.sourceIdPrefix,
252
+ content: "",
253
+ assets: [],
254
+ metadata: { ...baseMetadata, exportedTooLarge: true },
255
+ }) as any;
256
+ }
257
+ const content = bytesToText(bytes).trim();
258
+ return buildGoogleDriveFileIngestInput({
259
+ fileId,
260
+ sourceIdPrefix: args.sourceIdPrefix,
261
+ content,
262
+ assets: [],
263
+ metadata: { ...baseMetadata, googleNativeKind: classification.nativeKind, exportMimeType: plan.mimeType },
264
+ }) as any;
265
+ } catch (err) {
266
+ // Slides can fail to export as text; fallback to PPTX unless strict.
267
+ if (classification.nativeKind === "slides" && !strictNativeExport) {
268
+ try {
269
+ const bytes = await exportFileBytes(args.drive, fileId, EXPORT_MIME.pptx);
270
+ if (bytes.byteLength > maxBytesPerFile) {
271
+ return buildGoogleDriveFileIngestInput({
272
+ fileId,
273
+ sourceIdPrefix: args.sourceIdPrefix,
274
+ content: "",
275
+ assets: [],
276
+ metadata: { ...baseMetadata, exportedTooLarge: true },
277
+ }) as any;
278
+ }
279
+ const asset: AssetInput = {
280
+ assetId: fileId,
281
+ kind: "file",
282
+ data: {
283
+ kind: "bytes",
284
+ bytes,
285
+ mediaType: EXPORT_MIME.pptx,
286
+ filename: name ? `${name}.pptx` : undefined,
287
+ },
288
+ uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
289
+ metadata: { connector: "google-drive", fileId, exportMimeType: EXPORT_MIME.pptx } as any,
290
+ };
291
+ return buildGoogleDriveFileIngestInput({
292
+ fileId,
293
+ sourceIdPrefix: args.sourceIdPrefix,
294
+ content: "",
295
+ assets: [asset],
296
+ metadata: { ...baseMetadata, googleNativeKind: "slides", exportFallback: "pptx" },
297
+ }) as any;
298
+ } catch {
299
+ // fall through to strict error
300
+ }
301
+ }
302
+
303
+ throw err;
304
+ }
305
+ }
306
+
307
+ // Asset export path (drawings -> PNG image)
308
+ if (plan.kind === "asset") {
309
+ const bytes = await exportFileBytes(args.drive, fileId, plan.mimeType);
310
+ if (bytes.byteLength > maxBytesPerFile) {
311
+ return buildGoogleDriveFileIngestInput({
312
+ fileId,
313
+ sourceIdPrefix: args.sourceIdPrefix,
314
+ content: "",
315
+ assets: [],
316
+ metadata: { ...baseMetadata, exportedTooLarge: true },
317
+ }) as any;
318
+ }
319
+
320
+ const filename = name && plan.filenameExt ? `${name}.${plan.filenameExt}` : name || undefined;
321
+ const asset: AssetInput = {
322
+ assetId: fileId,
323
+ kind: plan.assetKind,
324
+ data: { kind: "bytes", bytes, mediaType: plan.mimeType, ...(filename ? { filename } : {}) },
325
+ uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
326
+ metadata: { connector: "google-drive", fileId, exportMimeType: plan.mimeType } as any,
327
+ };
328
+
329
+ return buildGoogleDriveFileIngestInput({
330
+ fileId,
331
+ sourceIdPrefix: args.sourceIdPrefix,
332
+ content: "",
333
+ assets: [asset],
334
+ metadata: { ...baseMetadata, googleNativeKind: classification.nativeKind, exportMimeType: plan.mimeType },
335
+ }) as any;
336
+ }
337
+ }
338
+
339
+ // Binary download path
340
+ if (Number.isFinite(size) && (size as number) > maxBytesPerFile) {
341
+ return buildGoogleDriveFileIngestInput({
342
+ fileId,
343
+ sourceIdPrefix: args.sourceIdPrefix,
344
+ content: "",
345
+ assets: [],
346
+ metadata: { ...baseMetadata, skippedTooLarge: true },
347
+ }) as any;
348
+ }
349
+
350
+ const bytes = await downloadFileBytes(args.drive, fileId);
351
+ if (bytes.byteLength > maxBytesPerFile) {
352
+ return buildGoogleDriveFileIngestInput({
353
+ fileId,
354
+ sourceIdPrefix: args.sourceIdPrefix,
355
+ content: "",
356
+ assets: [],
357
+ metadata: { ...baseMetadata, skippedTooLarge: true },
358
+ }) as any;
359
+ }
360
+
361
+ const assetKind = assetKindFromMediaType(mimeType);
362
+ const filename = name || undefined;
363
+ const asset: AssetInput = {
364
+ assetId: fileId,
365
+ kind: assetKind,
366
+ data: {
367
+ kind: "bytes",
368
+ bytes,
369
+ mediaType: mimeType || "application/octet-stream",
370
+ ...(filename ? { filename } : {}),
371
+ },
372
+ uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
373
+ metadata: { connector: "google-drive", fileId, name, mimeType } as any,
374
+ };
375
+
376
+ // For pure binaries, keep content empty; extraction occurs via engine asset processing + extractors.
377
+ return buildGoogleDriveFileIngestInput({
378
+ fileId,
379
+ sourceIdPrefix: args.sourceIdPrefix,
380
+ content: "",
381
+ assets: [asset],
382
+ metadata: baseMetadata as any,
383
+ }) as any;
384
+ }
385
+
386
+ export async function syncGoogleDriveFiles(
387
+ input: SyncGoogleDriveFilesInput
388
+ ): Promise<{
389
+ fileCount: number;
390
+ succeeded: number;
391
+ failed: number;
392
+ deleted: number;
393
+ errors: Array<{ fileId: string; sourceId: string; error: unknown }>;
394
+ }> {
395
+ const deleteOnNotFound = input.deleteOnNotFound ?? false;
396
+ const options = input.options ?? {};
397
+ const maxBytesPerFile = options.maxBytesPerFile ?? DEFAULT_MAX_BYTES;
398
+ const treatForbiddenAsNotFound = options.treatForbiddenAsNotFound ?? true;
399
+
400
+ const { drive } = await createGoogleDriveClient({
401
+ auth: input.auth,
402
+ scopes: options.scopes,
403
+ });
404
+
405
+ const errors: Array<{ fileId: string; sourceId: string; error: unknown }> = [];
406
+ let succeeded = 0;
407
+ let failed = 0;
408
+ let deleted = 0;
409
+
410
+ for (const fileIdRaw of input.fileIds) {
411
+ const fileId = String(fileIdRaw ?? "").trim();
412
+ if (!fileId) continue;
413
+
414
+ const sourceId = joinPrefix(input.sourceIdPrefix, `gdrive:file:${fileId}`);
415
+
416
+ const emit = (event: GoogleDriveSyncProgressEvent) => {
417
+ try {
418
+ input.onProgress?.(event);
419
+ } catch {
420
+ // ignore progress handler errors
421
+ }
422
+ };
423
+
424
+ emit({ type: "file:start", fileId, sourceId });
425
+
426
+ try {
427
+ const doc = await loadGoogleDriveFileDocument({
428
+ drive,
429
+ fileId,
430
+ sourceIdPrefix: input.sourceIdPrefix,
431
+ options: {
432
+ maxBytesPerFile,
433
+ strictNativeExport: options.strictNativeExport,
434
+ },
435
+ });
436
+
437
+ // Skip folders explicitly (v1).
438
+ if ((doc.metadata as any)?.kind === "folder") {
439
+ emit({
440
+ type: "file:skipped",
441
+ fileId,
442
+ sourceId,
443
+ reason: "is_folder",
444
+ message: "Skipping folder (v1: files-only connector).",
445
+ });
446
+ continue;
447
+ }
448
+
449
+ if ((doc.metadata as any)?.unsupportedGoogleMime) {
450
+ emit({
451
+ type: "file:skipped",
452
+ fileId,
453
+ sourceId,
454
+ reason: "unsupported_google_mime",
455
+ message:
456
+ "Skipping Google-native file type because it has no supported export plan.",
457
+ });
458
+ continue;
459
+ }
460
+
461
+ if ((doc.metadata as any)?.skippedTooLarge || (doc.metadata as any)?.exportedTooLarge) {
462
+ emit({
463
+ type: "file:skipped",
464
+ fileId,
465
+ sourceId,
466
+ reason: "too_large",
467
+ message: `Skipping file because it exceeds maxBytesPerFile (${maxBytesPerFile}).`,
468
+ });
469
+ continue;
470
+ }
471
+
472
+ if ((doc.metadata as any)?.shortcutUnresolved) {
473
+ emit({
474
+ type: "file:skipped",
475
+ fileId,
476
+ sourceId,
477
+ reason: "shortcut_unresolved",
478
+ message: "Skipping shortcut because target could not be resolved.",
479
+ });
480
+ continue;
481
+ }
482
+
483
+ const result: IngestResult = await input.engine.ingest({
484
+ sourceId: doc.sourceId,
485
+ content: doc.content,
486
+ assets: doc.assets,
487
+ metadata: doc.metadata as any,
488
+ });
489
+
490
+ succeeded += 1;
491
+ emit({
492
+ type: "file:success",
493
+ fileId,
494
+ sourceId,
495
+ chunkCount: result.chunkCount,
496
+ });
497
+ } catch (err) {
498
+ if (isNotFound(err, Boolean(treatForbiddenAsNotFound))) {
499
+ emit({ type: "file:not-found", fileId, sourceId });
500
+ if (deleteOnNotFound) {
501
+ try {
502
+ await input.engine.delete({ sourceId });
503
+ deleted += 1;
504
+ } catch (deleteErr) {
505
+ failed += 1;
506
+ errors.push({ fileId, sourceId, error: deleteErr });
507
+ emit({ type: "file:error", fileId, sourceId, error: deleteErr });
508
+ }
509
+ }
510
+ continue;
511
+ }
512
+
513
+ failed += 1;
514
+ errors.push({ fileId, sourceId, error: err });
515
+ emit({ type: "file:error", fileId, sourceId, error: err });
516
+ }
517
+ }
518
+
519
+ return {
520
+ fileCount: input.fileIds.length,
521
+ succeeded,
522
+ failed,
523
+ deleted,
524
+ errors,
525
+ };
526
+ }
527
+
528
+
@@ -0,0 +1,127 @@
1
+ import type { ContextEngine, AssetInput, IngestInput } from "../../core";
2
+
3
+ /**
4
+ * A plug-and-play auth input for Google Drive.
5
+ *
6
+ * This is intentionally structural (no hard dependency on google-auth-library types),
7
+ * because the connector code is vendored into user projects and dependencies are added
8
+ * by the CLI (`unrag add google-drive`).
9
+ */
10
+ export type GoogleDriveAuth =
11
+ | {
12
+ /** Use an existing OAuth2 client instance (recommended if your app already has one). */
13
+ kind: "oauth";
14
+ oauthClient: unknown;
15
+ }
16
+ | {
17
+ /**
18
+ * Convenience form for OAuth2: the connector will construct an OAuth2 client
19
+ * and set credentials including the refresh token.
20
+ */
21
+ kind: "oauth";
22
+ clientId: string;
23
+ clientSecret: string;
24
+ redirectUri: string;
25
+ refreshToken: string;
26
+ /** Optional access token if you already have one. */
27
+ accessToken?: string;
28
+ }
29
+ | {
30
+ /**
31
+ * Service account credentials. This supports both:
32
+ * - direct service-account access (files must be shared to the service account)
33
+ * - Workspace domain-wide delegation (DWD) when `subject` is provided
34
+ */
35
+ kind: "service_account";
36
+ credentialsJson: string | Record<string, unknown>;
37
+ /**
38
+ * DWD impersonation subject email (Workspace only).
39
+ * When provided, the service account will impersonate this user.
40
+ */
41
+ subject?: string;
42
+ }
43
+ | {
44
+ /** Escape hatch: provide a pre-configured GoogleAuth (or equivalent) instance. */
45
+ kind: "google_auth";
46
+ auth: unknown;
47
+ };
48
+
49
+ export type GoogleDriveSyncProgressEvent =
50
+ | { type: "file:start"; fileId: string; sourceId: string }
51
+ | {
52
+ type: "file:success";
53
+ fileId: string;
54
+ sourceId: string;
55
+ chunkCount: number;
56
+ }
57
+ | {
58
+ type: "file:skipped";
59
+ fileId: string;
60
+ sourceId: string;
61
+ reason:
62
+ | "is_folder"
63
+ | "unsupported_google_mime"
64
+ | "too_large"
65
+ | "shortcut_unresolved";
66
+ message: string;
67
+ }
68
+ | { type: "file:not-found"; fileId: string; sourceId: string }
69
+ | { type: "file:error"; fileId: string; sourceId: string; error: unknown };
70
+
71
+ export type GoogleDriveFileDocument = {
72
+ sourceId: string;
73
+ content: string;
74
+ metadata: Record<string, unknown>;
75
+ assets: AssetInput[];
76
+ };
77
+
78
+ export type BuildGoogleDriveFileIngestInputArgs = {
79
+ fileId: string;
80
+ content: string;
81
+ assets?: AssetInput[];
82
+ metadata?: Record<string, unknown>;
83
+ sourceIdPrefix?: string;
84
+ };
85
+
86
+ export type BuildGoogleDriveFileIngestInputResult = IngestInput;
87
+
88
+ export type SyncGoogleDriveFilesInput = {
89
+ engine: ContextEngine;
90
+ auth: GoogleDriveAuth;
91
+ /** Explicit Drive file IDs (Notion-like v1 behavior). */
92
+ fileIds: string[];
93
+ /**
94
+ * Optional namespace prefix, useful for multi-tenant apps:
95
+ * `tenant:acme:` -> `tenant:acme:gdrive:file:<id>`
96
+ */
97
+ sourceIdPrefix?: string;
98
+ /**
99
+ * When true, if a file is not found/accessible, delete the previously ingested
100
+ * document for that file (exact sourceId).
101
+ */
102
+ deleteOnNotFound?: boolean;
103
+ /** Optional progress callback. */
104
+ onProgress?: (event: GoogleDriveSyncProgressEvent) => void;
105
+ /** Optional connector-level knobs. */
106
+ options?: SyncGoogleDriveFilesOptions;
107
+ };
108
+
109
+ export type SyncGoogleDriveFilesOptions = {
110
+ /** Max bytes to download/export per file. Default: 15MB. */
111
+ maxBytesPerFile?: number;
112
+ /**
113
+ * If true, treat 403 (forbidden) as not-found for cleanup purposes.
114
+ * Default: true.
115
+ */
116
+ treatForbiddenAsNotFound?: boolean;
117
+ /**
118
+ * If true, failures to export Google-native files (e.g., Slides -> text)
119
+ * will cause the file to be skipped instead of falling back to a binary export.
120
+ * Default: false (best-effort fallback).
121
+ */
122
+ strictNativeExport?: boolean;
123
+ /** Override Drive API scopes if desired. */
124
+ scopes?: string[];
125
+ };
126
+
127
+