unrag 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +9 -4
- package/package.json +1 -1
- package/registry/connectors/google-drive/client.ts +171 -0
- package/registry/connectors/google-drive/index.ts +10 -0
- package/registry/connectors/google-drive/mime.ts +76 -0
- package/registry/connectors/google-drive/sync.ts +528 -0
- package/registry/connectors/google-drive/types.ts +127 -0
package/dist/cli/index.js
CHANGED
|
@@ -455,6 +455,10 @@ function depsForConnector(connector) {
|
|
|
455
455
|
if (connector === "notion") {
|
|
456
456
|
deps["@notionhq/client"] = "^2.2.16";
|
|
457
457
|
}
|
|
458
|
+
if (connector === "google-drive") {
|
|
459
|
+
deps["googleapis"] = "^148.0.0";
|
|
460
|
+
deps["google-auth-library"] = "^10.0.0";
|
|
461
|
+
}
|
|
458
462
|
return { deps, devDeps };
|
|
459
463
|
}
|
|
460
464
|
function depsForExtractor(extractor) {
|
|
@@ -894,6 +898,7 @@ var AVAILABLE_EXTRACTORS2 = [
|
|
|
894
898
|
"file-pptx",
|
|
895
899
|
"file-xlsx"
|
|
896
900
|
];
|
|
901
|
+
var AVAILABLE_CONNECTORS = ["notion", "google-drive"];
|
|
897
902
|
var parseAddArgs = (args) => {
|
|
898
903
|
const out = {};
|
|
899
904
|
for (let i = 0;i < args.length; i++) {
|
|
@@ -932,7 +937,7 @@ async function addCommand(args) {
|
|
|
932
937
|
" unrag add <connector>",
|
|
933
938
|
" unrag add extractor <name>",
|
|
934
939
|
"",
|
|
935
|
-
|
|
940
|
+
`Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`,
|
|
936
941
|
`Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`
|
|
937
942
|
].join(`
|
|
938
943
|
`));
|
|
@@ -952,10 +957,10 @@ async function addCommand(args) {
|
|
|
952
957
|
const pkg = await readPackageJson(root);
|
|
953
958
|
if (kind === "connector") {
|
|
954
959
|
const connector = name;
|
|
955
|
-
if (connector
|
|
960
|
+
if (!connector || !AVAILABLE_CONNECTORS.includes(connector)) {
|
|
956
961
|
outro2(`Unknown connector: ${name}
|
|
957
962
|
|
|
958
|
-
Available connectors:
|
|
963
|
+
Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`);
|
|
959
964
|
return;
|
|
960
965
|
}
|
|
961
966
|
await copyConnectorFiles({
|
|
@@ -979,7 +984,7 @@ Available connectors: notion`);
|
|
|
979
984
|
`- Docs: ${docsUrl(`/docs/connectors/${connector}`)}`,
|
|
980
985
|
"",
|
|
981
986
|
merged2.changes.length > 0 ? `Added deps: ${merged2.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
|
|
982
|
-
nonInteractive ? "" : "Tip: keep NOTION_TOKEN server-side only (env var)."
|
|
987
|
+
nonInteractive ? "" : connector === "notion" ? "Tip: keep NOTION_TOKEN server-side only (env var)." : connector === "google-drive" ? "Tip: keep Google OAuth refresh tokens and service account keys server-side only." : ""
|
|
983
988
|
].filter(Boolean).join(`
|
|
984
989
|
`));
|
|
985
990
|
return;
|
package/package.json
CHANGED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import type { GoogleDriveAuth } from "./types";
|
|
2
|
+
|
|
3
|
+
export const DEFAULT_DRIVE_SCOPES = [
|
|
4
|
+
"https://www.googleapis.com/auth/drive.readonly",
|
|
5
|
+
"https://www.googleapis.com/auth/drive.metadata.readonly",
|
|
6
|
+
] as const;
|
|
7
|
+
|
|
8
|
+
type NormalizedAuth =
|
|
9
|
+
| { kind: "oauth_client"; oauthClient: unknown }
|
|
10
|
+
| {
|
|
11
|
+
kind: "oauth_config";
|
|
12
|
+
clientId: string;
|
|
13
|
+
clientSecret: string;
|
|
14
|
+
redirectUri: string;
|
|
15
|
+
refreshToken: string;
|
|
16
|
+
accessToken?: string;
|
|
17
|
+
}
|
|
18
|
+
| {
|
|
19
|
+
kind: "service_account";
|
|
20
|
+
credentials: Record<string, any>;
|
|
21
|
+
subject?: string;
|
|
22
|
+
}
|
|
23
|
+
| { kind: "google_auth"; auth: unknown };
|
|
24
|
+
|
|
25
|
+
export function normalizeGoogleDriveAuth(auth: GoogleDriveAuth): NormalizedAuth {
|
|
26
|
+
if (!auth || typeof auth !== "object") {
|
|
27
|
+
throw new Error("Google Drive auth is required");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const kind = (auth as any).kind;
|
|
31
|
+
if (kind !== "oauth" && kind !== "service_account" && kind !== "google_auth") {
|
|
32
|
+
throw new Error(`Unknown Google Drive auth kind: ${String(kind)}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (kind === "google_auth") {
|
|
36
|
+
const a = (auth as any).auth;
|
|
37
|
+
if (!a) throw new Error('Google Drive auth.kind="google_auth" requires auth');
|
|
38
|
+
return { kind: "google_auth", auth: a };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (kind === "service_account") {
|
|
42
|
+
const raw = (auth as any).credentialsJson;
|
|
43
|
+
if (!raw) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
'Google Drive auth.kind="service_account" requires credentialsJson'
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
const credentials =
|
|
49
|
+
typeof raw === "string" ? (JSON.parse(raw) as Record<string, any>) : (raw as any);
|
|
50
|
+
if (!credentials?.client_email || !credentials?.private_key) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
'Google Drive service account credentials must include "client_email" and "private_key".'
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
kind: "service_account",
|
|
57
|
+
credentials,
|
|
58
|
+
subject: (auth as any).subject ? String((auth as any).subject) : undefined,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// oauth
|
|
63
|
+
if ((auth as any).oauthClient) {
|
|
64
|
+
return { kind: "oauth_client", oauthClient: (auth as any).oauthClient };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const { clientId, clientSecret, redirectUri, refreshToken, accessToken } = auth as any;
|
|
68
|
+
if (!clientId || !clientSecret || !redirectUri || !refreshToken) {
|
|
69
|
+
throw new Error(
|
|
70
|
+
'Google Drive auth.kind="oauth" requires either oauthClient or { clientId, clientSecret, redirectUri, refreshToken }'
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
kind: "oauth_config",
|
|
75
|
+
clientId: String(clientId),
|
|
76
|
+
clientSecret: String(clientSecret),
|
|
77
|
+
redirectUri: String(redirectUri),
|
|
78
|
+
refreshToken: String(refreshToken),
|
|
79
|
+
...(accessToken ? { accessToken: String(accessToken) } : {}),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const asMessage = (err: unknown) => {
|
|
84
|
+
if (err instanceof Error) return err.message;
|
|
85
|
+
try {
|
|
86
|
+
return typeof err === "string" ? err : JSON.stringify(err);
|
|
87
|
+
} catch {
|
|
88
|
+
return String(err);
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Creates a Google Drive API client from a plug-and-play auth input.
|
|
94
|
+
*
|
|
95
|
+
* Note: This uses dynamic imports so the core Unrag package does not require
|
|
96
|
+
* Google dependencies unless the connector is installed into a user project.
|
|
97
|
+
*/
|
|
98
|
+
export async function createGoogleDriveClient(args: {
|
|
99
|
+
auth: GoogleDriveAuth;
|
|
100
|
+
scopes?: string[];
|
|
101
|
+
}): Promise<{ drive: any; authClient: any }> {
|
|
102
|
+
const normalized = normalizeGoogleDriveAuth(args.auth);
|
|
103
|
+
const scopes = (args.scopes?.length ? args.scopes : DEFAULT_DRIVE_SCOPES) as string[];
|
|
104
|
+
|
|
105
|
+
let authClient: any;
|
|
106
|
+
|
|
107
|
+
try {
|
|
108
|
+
if (normalized.kind === "oauth_client") {
|
|
109
|
+
authClient = normalized.oauthClient;
|
|
110
|
+
} else if (normalized.kind === "google_auth") {
|
|
111
|
+
authClient = normalized.auth;
|
|
112
|
+
} else {
|
|
113
|
+
// google-auth-library (dynamic)
|
|
114
|
+
const gal: any = await import("google-auth-library");
|
|
115
|
+
|
|
116
|
+
if (normalized.kind === "oauth_config") {
|
|
117
|
+
const OAuth2Client = gal.OAuth2Client ?? gal.OAuth2;
|
|
118
|
+
if (!OAuth2Client) {
|
|
119
|
+
throw new Error("OAuth2Client not found in google-auth-library");
|
|
120
|
+
}
|
|
121
|
+
const client = new OAuth2Client(
|
|
122
|
+
normalized.clientId,
|
|
123
|
+
normalized.clientSecret,
|
|
124
|
+
normalized.redirectUri
|
|
125
|
+
);
|
|
126
|
+
client.setCredentials({
|
|
127
|
+
refresh_token: normalized.refreshToken,
|
|
128
|
+
...(normalized.accessToken ? { access_token: normalized.accessToken } : {}),
|
|
129
|
+
});
|
|
130
|
+
authClient = client;
|
|
131
|
+
} else {
|
|
132
|
+
const JWT = gal.JWT;
|
|
133
|
+
if (!JWT) {
|
|
134
|
+
throw new Error("JWT not found in google-auth-library");
|
|
135
|
+
}
|
|
136
|
+
const c = normalized.credentials;
|
|
137
|
+
authClient = new JWT({
|
|
138
|
+
email: c.client_email,
|
|
139
|
+
key: c.private_key,
|
|
140
|
+
scopes,
|
|
141
|
+
...(normalized.subject ? { subject: normalized.subject } : {}),
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const { google }: any = await import("googleapis");
|
|
147
|
+
if (!google?.drive) {
|
|
148
|
+
throw new Error("googleapis.google.drive not found");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const drive = google.drive({
|
|
152
|
+
version: "v3",
|
|
153
|
+
auth: authClient,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
return { drive, authClient };
|
|
157
|
+
} catch (err) {
|
|
158
|
+
const msg = asMessage(err);
|
|
159
|
+
if (
|
|
160
|
+
msg.includes("Cannot find module") &&
|
|
161
|
+
(msg.includes("googleapis") || msg.includes("google-auth-library"))
|
|
162
|
+
) {
|
|
163
|
+
throw new Error(
|
|
164
|
+
`Missing Google Drive connector dependencies. Ensure you've installed the connector via \`unrag add google-drive\` (which adds "googleapis" and "google-auth-library"). Original error: ${msg}`
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
throw err;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import type { AssetKind } from "../../core/types";
|
|
2
|
+
|
|
3
|
+
export const DRIVE_MIME = {
|
|
4
|
+
folder: "application/vnd.google-apps.folder",
|
|
5
|
+
shortcut: "application/vnd.google-apps.shortcut",
|
|
6
|
+
doc: "application/vnd.google-apps.document",
|
|
7
|
+
sheet: "application/vnd.google-apps.spreadsheet",
|
|
8
|
+
slides: "application/vnd.google-apps.presentation",
|
|
9
|
+
drawing: "application/vnd.google-apps.drawing",
|
|
10
|
+
} as const;
|
|
11
|
+
|
|
12
|
+
export const EXPORT_MIME = {
|
|
13
|
+
text: "text/plain",
|
|
14
|
+
csv: "text/csv",
|
|
15
|
+
pptx:
|
|
16
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
17
|
+
png: "image/png",
|
|
18
|
+
} as const;
|
|
19
|
+
|
|
20
|
+
export type DriveGoogleNativeKind = "doc" | "sheet" | "slides" | "drawing";
|
|
21
|
+
|
|
22
|
+
export type DriveMimeClassification =
|
|
23
|
+
| { kind: "folder" }
|
|
24
|
+
| { kind: "shortcut" }
|
|
25
|
+
| { kind: "google_native"; nativeKind: DriveGoogleNativeKind }
|
|
26
|
+
| { kind: "binary" };
|
|
27
|
+
|
|
28
|
+
export function classifyDriveMimeType(mimeType: string | undefined): DriveMimeClassification {
|
|
29
|
+
const mt = String(mimeType ?? "").trim();
|
|
30
|
+
if (!mt) return { kind: "binary" };
|
|
31
|
+
|
|
32
|
+
if (mt === DRIVE_MIME.folder) return { kind: "folder" };
|
|
33
|
+
if (mt === DRIVE_MIME.shortcut) return { kind: "shortcut" };
|
|
34
|
+
|
|
35
|
+
if (mt === DRIVE_MIME.doc) return { kind: "google_native", nativeKind: "doc" };
|
|
36
|
+
if (mt === DRIVE_MIME.sheet) return { kind: "google_native", nativeKind: "sheet" };
|
|
37
|
+
if (mt === DRIVE_MIME.slides) return { kind: "google_native", nativeKind: "slides" };
|
|
38
|
+
if (mt === DRIVE_MIME.drawing) return { kind: "google_native", nativeKind: "drawing" };
|
|
39
|
+
|
|
40
|
+
return { kind: "binary" };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export type DriveNativeExportPlan =
|
|
44
|
+
| { kind: "content"; mimeType: string }
|
|
45
|
+
| {
|
|
46
|
+
kind: "asset";
|
|
47
|
+
assetKind: AssetKind;
|
|
48
|
+
mimeType: string;
|
|
49
|
+
filenameExt?: string;
|
|
50
|
+
}
|
|
51
|
+
| { kind: "unsupported" };
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Default behavior (Notion-like): Google-native files are exported to text-ish content.
|
|
55
|
+
* Drawings are exported as PNG image assets (no good text representation).
|
|
56
|
+
*/
|
|
57
|
+
export function getNativeExportPlan(nativeKind: DriveGoogleNativeKind): DriveNativeExportPlan {
|
|
58
|
+
if (nativeKind === "doc") return { kind: "content", mimeType: EXPORT_MIME.text };
|
|
59
|
+
if (nativeKind === "sheet") return { kind: "content", mimeType: EXPORT_MIME.csv };
|
|
60
|
+
if (nativeKind === "slides") return { kind: "content", mimeType: EXPORT_MIME.text };
|
|
61
|
+
if (nativeKind === "drawing") {
|
|
62
|
+
return { kind: "asset", assetKind: "image", mimeType: EXPORT_MIME.png, filenameExt: "png" };
|
|
63
|
+
}
|
|
64
|
+
return { kind: "unsupported" };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function assetKindFromMediaType(mediaType: string | undefined): AssetKind {
|
|
68
|
+
const mt = String(mediaType ?? "").trim().toLowerCase();
|
|
69
|
+
if (mt === "application/pdf") return "pdf";
|
|
70
|
+
if (mt.startsWith("image/")) return "image";
|
|
71
|
+
if (mt.startsWith("audio/")) return "audio";
|
|
72
|
+
if (mt.startsWith("video/")) return "video";
|
|
73
|
+
return "file";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
import type { IngestResult } from "../../core";
|
|
2
|
+
import type { AssetInput } from "../../core/types";
|
|
3
|
+
import { createGoogleDriveClient } from "./client";
|
|
4
|
+
import {
|
|
5
|
+
assetKindFromMediaType,
|
|
6
|
+
classifyDriveMimeType,
|
|
7
|
+
EXPORT_MIME,
|
|
8
|
+
getNativeExportPlan,
|
|
9
|
+
DRIVE_MIME,
|
|
10
|
+
} from "./mime";
|
|
11
|
+
import type {
|
|
12
|
+
BuildGoogleDriveFileIngestInputArgs,
|
|
13
|
+
GoogleDriveFileDocument,
|
|
14
|
+
GoogleDriveSyncProgressEvent,
|
|
15
|
+
SyncGoogleDriveFilesInput,
|
|
16
|
+
} from "./types";
|
|
17
|
+
|
|
18
|
+
const DEFAULT_MAX_BYTES = 15 * 1024 * 1024; // 15MB
|
|
19
|
+
|
|
20
|
+
const joinPrefix = (prefix: string | undefined, rest: string) => {
|
|
21
|
+
const p = (prefix ?? "").trim();
|
|
22
|
+
if (!p) return rest;
|
|
23
|
+
return p.endsWith(":") ? p + rest : p + ":" + rest;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export function buildGoogleDriveFileIngestInput(
|
|
27
|
+
args: BuildGoogleDriveFileIngestInputArgs
|
|
28
|
+
) {
|
|
29
|
+
const sourceId = joinPrefix(args.sourceIdPrefix, `gdrive:file:${args.fileId}`);
|
|
30
|
+
return {
|
|
31
|
+
sourceId,
|
|
32
|
+
content: args.content,
|
|
33
|
+
metadata: args.metadata ?? {},
|
|
34
|
+
assets: args.assets ?? [],
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const asMessage = (err: unknown) => {
|
|
39
|
+
if (err instanceof Error) return err.message;
|
|
40
|
+
try {
|
|
41
|
+
return typeof err === "string" ? err : JSON.stringify(err);
|
|
42
|
+
} catch {
|
|
43
|
+
return String(err);
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const toUint8Array = (data: any): Uint8Array => {
|
|
48
|
+
if (!data) return new Uint8Array();
|
|
49
|
+
if (data instanceof Uint8Array) return data;
|
|
50
|
+
if (typeof Buffer !== "undefined" && data instanceof Buffer) {
|
|
51
|
+
return new Uint8Array(data);
|
|
52
|
+
}
|
|
53
|
+
if (data instanceof ArrayBuffer) return new Uint8Array(data);
|
|
54
|
+
if (ArrayBuffer.isView(data)) {
|
|
55
|
+
return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
|
|
56
|
+
}
|
|
57
|
+
// Axios can hand back a string for some responseTypes; treat as utf-8 bytes.
|
|
58
|
+
if (typeof data === "string") {
|
|
59
|
+
return new TextEncoder().encode(data);
|
|
60
|
+
}
|
|
61
|
+
return new Uint8Array();
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
const bytesToText = (bytes: Uint8Array) => {
|
|
65
|
+
return new TextDecoder("utf-8", { fatal: false }).decode(bytes);
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const isNotFound = (err: any, treatForbiddenAsNotFound: boolean) => {
|
|
69
|
+
const status =
|
|
70
|
+
Number(err?.code ?? err?.status ?? err?.response?.status ?? err?.statusCode) ||
|
|
71
|
+
Number(err?.response?.status);
|
|
72
|
+
if (status === 404) return true;
|
|
73
|
+
if (treatForbiddenAsNotFound && status === 403) return true;
|
|
74
|
+
return false;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
async function getFileMetadata(drive: any, fileId: string) {
|
|
78
|
+
const res = await drive.files.get({
|
|
79
|
+
fileId,
|
|
80
|
+
supportsAllDrives: true,
|
|
81
|
+
fields:
|
|
82
|
+
"id,name,mimeType,size,md5Checksum,modifiedTime,webViewLink,webContentLink,iconLink,shortcutDetails,driveId",
|
|
83
|
+
});
|
|
84
|
+
return res?.data ?? {};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async function downloadFileBytes(drive: any, fileId: string): Promise<Uint8Array> {
|
|
88
|
+
const res = await drive.files.get(
|
|
89
|
+
{ fileId, alt: "media", supportsAllDrives: true },
|
|
90
|
+
{ responseType: "arraybuffer" }
|
|
91
|
+
);
|
|
92
|
+
return toUint8Array(res?.data);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function exportFileBytes(
|
|
96
|
+
drive: any,
|
|
97
|
+
fileId: string,
|
|
98
|
+
mimeType: string
|
|
99
|
+
): Promise<Uint8Array> {
|
|
100
|
+
const res = await drive.files.export(
|
|
101
|
+
{ fileId, mimeType },
|
|
102
|
+
{ responseType: "arraybuffer" }
|
|
103
|
+
);
|
|
104
|
+
return toUint8Array(res?.data);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export async function loadGoogleDriveFileDocument(args: {
|
|
108
|
+
drive: any;
|
|
109
|
+
fileId: string;
|
|
110
|
+
sourceIdPrefix?: string;
|
|
111
|
+
options?: {
|
|
112
|
+
maxBytesPerFile?: number;
|
|
113
|
+
strictNativeExport?: boolean;
|
|
114
|
+
};
|
|
115
|
+
/** internal: recursion guard for shortcuts */
|
|
116
|
+
_visited?: Set<string>;
|
|
117
|
+
}): Promise<GoogleDriveFileDocument> {
|
|
118
|
+
const maxBytesPerFile = args.options?.maxBytesPerFile ?? DEFAULT_MAX_BYTES;
|
|
119
|
+
const strictNativeExport = Boolean(args.options?.strictNativeExport ?? false);
|
|
120
|
+
|
|
121
|
+
const meta = await getFileMetadata(args.drive, args.fileId);
|
|
122
|
+
const fileId = String(meta?.id ?? args.fileId);
|
|
123
|
+
const name = String(meta?.name ?? "");
|
|
124
|
+
const mimeType = String(meta?.mimeType ?? "");
|
|
125
|
+
const size = meta?.size !== undefined ? Number(meta.size) : undefined;
|
|
126
|
+
|
|
127
|
+
const classification = classifyDriveMimeType(mimeType);
|
|
128
|
+
|
|
129
|
+
// Handle folders: return a document shape but with no content/assets; callers typically skip.
|
|
130
|
+
if (classification.kind === "folder") {
|
|
131
|
+
return buildGoogleDriveFileIngestInput({
|
|
132
|
+
fileId,
|
|
133
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
134
|
+
content: "",
|
|
135
|
+
assets: [],
|
|
136
|
+
metadata: {
|
|
137
|
+
connector: "google-drive",
|
|
138
|
+
kind: "folder",
|
|
139
|
+
fileId,
|
|
140
|
+
name,
|
|
141
|
+
mimeType: DRIVE_MIME.folder,
|
|
142
|
+
...(meta?.webViewLink ? { webViewLink: String(meta.webViewLink) } : {}),
|
|
143
|
+
...(meta?.modifiedTime ? { modifiedTime: String(meta.modifiedTime) } : {}),
|
|
144
|
+
},
|
|
145
|
+
}) as any;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Shortcuts: resolve to target if possible (1-level), otherwise let caller decide.
|
|
149
|
+
if (classification.kind === "shortcut") {
|
|
150
|
+
const visited = args._visited ?? new Set<string>();
|
|
151
|
+
if (visited.has(fileId)) {
|
|
152
|
+
// cycle
|
|
153
|
+
return buildGoogleDriveFileIngestInput({
|
|
154
|
+
fileId,
|
|
155
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
156
|
+
content: "",
|
|
157
|
+
assets: [],
|
|
158
|
+
metadata: {
|
|
159
|
+
connector: "google-drive",
|
|
160
|
+
kind: "shortcut",
|
|
161
|
+
fileId,
|
|
162
|
+
name,
|
|
163
|
+
mimeType: DRIVE_MIME.shortcut,
|
|
164
|
+
shortcutUnresolved: true,
|
|
165
|
+
},
|
|
166
|
+
}) as any;
|
|
167
|
+
}
|
|
168
|
+
visited.add(fileId);
|
|
169
|
+
|
|
170
|
+
const targetId = meta?.shortcutDetails?.targetId
|
|
171
|
+
? String(meta.shortcutDetails.targetId)
|
|
172
|
+
: "";
|
|
173
|
+
|
|
174
|
+
if (!targetId) {
|
|
175
|
+
return buildGoogleDriveFileIngestInput({
|
|
176
|
+
fileId,
|
|
177
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
178
|
+
content: "",
|
|
179
|
+
assets: [],
|
|
180
|
+
metadata: {
|
|
181
|
+
connector: "google-drive",
|
|
182
|
+
kind: "shortcut",
|
|
183
|
+
fileId,
|
|
184
|
+
name,
|
|
185
|
+
mimeType: DRIVE_MIME.shortcut,
|
|
186
|
+
shortcutUnresolved: true,
|
|
187
|
+
},
|
|
188
|
+
}) as any;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Resolve target content/assets but keep sourceId stable to the shortcut file id.
|
|
192
|
+
const targetDoc = await loadGoogleDriveFileDocument({
|
|
193
|
+
drive: args.drive,
|
|
194
|
+
fileId: targetId,
|
|
195
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
196
|
+
options: args.options,
|
|
197
|
+
_visited: visited,
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
...targetDoc,
|
|
202
|
+
sourceId: joinPrefix(args.sourceIdPrefix, `gdrive:file:${fileId}`),
|
|
203
|
+
metadata: {
|
|
204
|
+
...(targetDoc.metadata ?? {}),
|
|
205
|
+
connector: "google-drive",
|
|
206
|
+
shortcutFileId: fileId,
|
|
207
|
+
shortcutTargetId: targetId,
|
|
208
|
+
},
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const baseMetadata = {
|
|
213
|
+
connector: "google-drive",
|
|
214
|
+
kind: "file",
|
|
215
|
+
fileId,
|
|
216
|
+
name,
|
|
217
|
+
mimeType,
|
|
218
|
+
...(Number.isFinite(size) ? { size } : {}),
|
|
219
|
+
...(meta?.md5Checksum ? { md5Checksum: String(meta.md5Checksum) } : {}),
|
|
220
|
+
...(meta?.modifiedTime ? { modifiedTime: String(meta.modifiedTime) } : {}),
|
|
221
|
+
...(meta?.webViewLink ? { webViewLink: String(meta.webViewLink) } : {}),
|
|
222
|
+
...(meta?.webContentLink ? { webContentLink: String(meta.webContentLink) } : {}),
|
|
223
|
+
...(meta?.iconLink ? { iconLink: String(meta.iconLink) } : {}),
|
|
224
|
+
...(meta?.driveId ? { driveId: String(meta.driveId) } : {}),
|
|
225
|
+
} as const;
|
|
226
|
+
|
|
227
|
+
// Google-native export path
|
|
228
|
+
if (classification.kind === "google_native") {
|
|
229
|
+
const plan = getNativeExportPlan(classification.nativeKind);
|
|
230
|
+
if (plan.kind === "unsupported") {
|
|
231
|
+
return buildGoogleDriveFileIngestInput({
|
|
232
|
+
fileId,
|
|
233
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
234
|
+
content: "",
|
|
235
|
+
assets: [],
|
|
236
|
+
metadata: {
|
|
237
|
+
...baseMetadata,
|
|
238
|
+
googleNativeKind: classification.nativeKind,
|
|
239
|
+
unsupportedGoogleMime: true,
|
|
240
|
+
},
|
|
241
|
+
}) as any;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// For content export, enforce maxBytesPerFile by bytes length.
|
|
245
|
+
if (plan.kind === "content") {
|
|
246
|
+
try {
|
|
247
|
+
const bytes = await exportFileBytes(args.drive, fileId, plan.mimeType);
|
|
248
|
+
if (bytes.byteLength > maxBytesPerFile) {
|
|
249
|
+
return buildGoogleDriveFileIngestInput({
|
|
250
|
+
fileId,
|
|
251
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
252
|
+
content: "",
|
|
253
|
+
assets: [],
|
|
254
|
+
metadata: { ...baseMetadata, exportedTooLarge: true },
|
|
255
|
+
}) as any;
|
|
256
|
+
}
|
|
257
|
+
const content = bytesToText(bytes).trim();
|
|
258
|
+
return buildGoogleDriveFileIngestInput({
|
|
259
|
+
fileId,
|
|
260
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
261
|
+
content,
|
|
262
|
+
assets: [],
|
|
263
|
+
metadata: { ...baseMetadata, googleNativeKind: classification.nativeKind, exportMimeType: plan.mimeType },
|
|
264
|
+
}) as any;
|
|
265
|
+
} catch (err) {
|
|
266
|
+
// Slides can fail to export as text; fallback to PPTX unless strict.
|
|
267
|
+
if (classification.nativeKind === "slides" && !strictNativeExport) {
|
|
268
|
+
try {
|
|
269
|
+
const bytes = await exportFileBytes(args.drive, fileId, EXPORT_MIME.pptx);
|
|
270
|
+
if (bytes.byteLength > maxBytesPerFile) {
|
|
271
|
+
return buildGoogleDriveFileIngestInput({
|
|
272
|
+
fileId,
|
|
273
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
274
|
+
content: "",
|
|
275
|
+
assets: [],
|
|
276
|
+
metadata: { ...baseMetadata, exportedTooLarge: true },
|
|
277
|
+
}) as any;
|
|
278
|
+
}
|
|
279
|
+
const asset: AssetInput = {
|
|
280
|
+
assetId: fileId,
|
|
281
|
+
kind: "file",
|
|
282
|
+
data: {
|
|
283
|
+
kind: "bytes",
|
|
284
|
+
bytes,
|
|
285
|
+
mediaType: EXPORT_MIME.pptx,
|
|
286
|
+
filename: name ? `${name}.pptx` : undefined,
|
|
287
|
+
},
|
|
288
|
+
uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
|
|
289
|
+
metadata: { connector: "google-drive", fileId, exportMimeType: EXPORT_MIME.pptx } as any,
|
|
290
|
+
};
|
|
291
|
+
return buildGoogleDriveFileIngestInput({
|
|
292
|
+
fileId,
|
|
293
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
294
|
+
content: "",
|
|
295
|
+
assets: [asset],
|
|
296
|
+
metadata: { ...baseMetadata, googleNativeKind: "slides", exportFallback: "pptx" },
|
|
297
|
+
}) as any;
|
|
298
|
+
} catch {
|
|
299
|
+
// fall through to strict error
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
throw err;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Asset export path (drawings -> PNG image)
|
|
308
|
+
if (plan.kind === "asset") {
|
|
309
|
+
const bytes = await exportFileBytes(args.drive, fileId, plan.mimeType);
|
|
310
|
+
if (bytes.byteLength > maxBytesPerFile) {
|
|
311
|
+
return buildGoogleDriveFileIngestInput({
|
|
312
|
+
fileId,
|
|
313
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
314
|
+
content: "",
|
|
315
|
+
assets: [],
|
|
316
|
+
metadata: { ...baseMetadata, exportedTooLarge: true },
|
|
317
|
+
}) as any;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const filename = name && plan.filenameExt ? `${name}.${plan.filenameExt}` : name || undefined;
|
|
321
|
+
const asset: AssetInput = {
|
|
322
|
+
assetId: fileId,
|
|
323
|
+
kind: plan.assetKind,
|
|
324
|
+
data: { kind: "bytes", bytes, mediaType: plan.mimeType, ...(filename ? { filename } : {}) },
|
|
325
|
+
uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
|
|
326
|
+
metadata: { connector: "google-drive", fileId, exportMimeType: plan.mimeType } as any,
|
|
327
|
+
};
|
|
328
|
+
|
|
329
|
+
return buildGoogleDriveFileIngestInput({
|
|
330
|
+
fileId,
|
|
331
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
332
|
+
content: "",
|
|
333
|
+
assets: [asset],
|
|
334
|
+
metadata: { ...baseMetadata, googleNativeKind: classification.nativeKind, exportMimeType: plan.mimeType },
|
|
335
|
+
}) as any;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Binary download path
|
|
340
|
+
if (Number.isFinite(size) && (size as number) > maxBytesPerFile) {
|
|
341
|
+
return buildGoogleDriveFileIngestInput({
|
|
342
|
+
fileId,
|
|
343
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
344
|
+
content: "",
|
|
345
|
+
assets: [],
|
|
346
|
+
metadata: { ...baseMetadata, skippedTooLarge: true },
|
|
347
|
+
}) as any;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const bytes = await downloadFileBytes(args.drive, fileId);
|
|
351
|
+
if (bytes.byteLength > maxBytesPerFile) {
|
|
352
|
+
return buildGoogleDriveFileIngestInput({
|
|
353
|
+
fileId,
|
|
354
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
355
|
+
content: "",
|
|
356
|
+
assets: [],
|
|
357
|
+
metadata: { ...baseMetadata, skippedTooLarge: true },
|
|
358
|
+
}) as any;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
const assetKind = assetKindFromMediaType(mimeType);
|
|
362
|
+
const filename = name || undefined;
|
|
363
|
+
const asset: AssetInput = {
|
|
364
|
+
assetId: fileId,
|
|
365
|
+
kind: assetKind,
|
|
366
|
+
data: {
|
|
367
|
+
kind: "bytes",
|
|
368
|
+
bytes,
|
|
369
|
+
mediaType: mimeType || "application/octet-stream",
|
|
370
|
+
...(filename ? { filename } : {}),
|
|
371
|
+
},
|
|
372
|
+
uri: meta?.webViewLink ? String(meta.webViewLink) : undefined,
|
|
373
|
+
metadata: { connector: "google-drive", fileId, name, mimeType } as any,
|
|
374
|
+
};
|
|
375
|
+
|
|
376
|
+
// For pure binaries, keep content empty; extraction occurs via engine asset processing + extractors.
|
|
377
|
+
return buildGoogleDriveFileIngestInput({
|
|
378
|
+
fileId,
|
|
379
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
380
|
+
content: "",
|
|
381
|
+
assets: [asset],
|
|
382
|
+
metadata: baseMetadata as any,
|
|
383
|
+
}) as any;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
export async function syncGoogleDriveFiles(
|
|
387
|
+
input: SyncGoogleDriveFilesInput
|
|
388
|
+
): Promise<{
|
|
389
|
+
fileCount: number;
|
|
390
|
+
succeeded: number;
|
|
391
|
+
failed: number;
|
|
392
|
+
deleted: number;
|
|
393
|
+
errors: Array<{ fileId: string; sourceId: string; error: unknown }>;
|
|
394
|
+
}> {
|
|
395
|
+
const deleteOnNotFound = input.deleteOnNotFound ?? false;
|
|
396
|
+
const options = input.options ?? {};
|
|
397
|
+
const maxBytesPerFile = options.maxBytesPerFile ?? DEFAULT_MAX_BYTES;
|
|
398
|
+
const treatForbiddenAsNotFound = options.treatForbiddenAsNotFound ?? true;
|
|
399
|
+
|
|
400
|
+
const { drive } = await createGoogleDriveClient({
|
|
401
|
+
auth: input.auth,
|
|
402
|
+
scopes: options.scopes,
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
const errors: Array<{ fileId: string; sourceId: string; error: unknown }> = [];
|
|
406
|
+
let succeeded = 0;
|
|
407
|
+
let failed = 0;
|
|
408
|
+
let deleted = 0;
|
|
409
|
+
|
|
410
|
+
for (const fileIdRaw of input.fileIds) {
|
|
411
|
+
const fileId = String(fileIdRaw ?? "").trim();
|
|
412
|
+
if (!fileId) continue;
|
|
413
|
+
|
|
414
|
+
const sourceId = joinPrefix(input.sourceIdPrefix, `gdrive:file:${fileId}`);
|
|
415
|
+
|
|
416
|
+
const emit = (event: GoogleDriveSyncProgressEvent) => {
|
|
417
|
+
try {
|
|
418
|
+
input.onProgress?.(event);
|
|
419
|
+
} catch {
|
|
420
|
+
// ignore progress handler errors
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
|
|
424
|
+
emit({ type: "file:start", fileId, sourceId });
|
|
425
|
+
|
|
426
|
+
try {
|
|
427
|
+
const doc = await loadGoogleDriveFileDocument({
|
|
428
|
+
drive,
|
|
429
|
+
fileId,
|
|
430
|
+
sourceIdPrefix: input.sourceIdPrefix,
|
|
431
|
+
options: {
|
|
432
|
+
maxBytesPerFile,
|
|
433
|
+
strictNativeExport: options.strictNativeExport,
|
|
434
|
+
},
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
// Skip folders explicitly (v1).
|
|
438
|
+
if ((doc.metadata as any)?.kind === "folder") {
|
|
439
|
+
emit({
|
|
440
|
+
type: "file:skipped",
|
|
441
|
+
fileId,
|
|
442
|
+
sourceId,
|
|
443
|
+
reason: "is_folder",
|
|
444
|
+
message: "Skipping folder (v1: files-only connector).",
|
|
445
|
+
});
|
|
446
|
+
continue;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
if ((doc.metadata as any)?.unsupportedGoogleMime) {
|
|
450
|
+
emit({
|
|
451
|
+
type: "file:skipped",
|
|
452
|
+
fileId,
|
|
453
|
+
sourceId,
|
|
454
|
+
reason: "unsupported_google_mime",
|
|
455
|
+
message:
|
|
456
|
+
"Skipping Google-native file type because it has no supported export plan.",
|
|
457
|
+
});
|
|
458
|
+
continue;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
if ((doc.metadata as any)?.skippedTooLarge || (doc.metadata as any)?.exportedTooLarge) {
|
|
462
|
+
emit({
|
|
463
|
+
type: "file:skipped",
|
|
464
|
+
fileId,
|
|
465
|
+
sourceId,
|
|
466
|
+
reason: "too_large",
|
|
467
|
+
message: `Skipping file because it exceeds maxBytesPerFile (${maxBytesPerFile}).`,
|
|
468
|
+
});
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
if ((doc.metadata as any)?.shortcutUnresolved) {
|
|
473
|
+
emit({
|
|
474
|
+
type: "file:skipped",
|
|
475
|
+
fileId,
|
|
476
|
+
sourceId,
|
|
477
|
+
reason: "shortcut_unresolved",
|
|
478
|
+
message: "Skipping shortcut because target could not be resolved.",
|
|
479
|
+
});
|
|
480
|
+
continue;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
const result: IngestResult = await input.engine.ingest({
|
|
484
|
+
sourceId: doc.sourceId,
|
|
485
|
+
content: doc.content,
|
|
486
|
+
assets: doc.assets,
|
|
487
|
+
metadata: doc.metadata as any,
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
succeeded += 1;
|
|
491
|
+
emit({
|
|
492
|
+
type: "file:success",
|
|
493
|
+
fileId,
|
|
494
|
+
sourceId,
|
|
495
|
+
chunkCount: result.chunkCount,
|
|
496
|
+
});
|
|
497
|
+
} catch (err) {
|
|
498
|
+
if (isNotFound(err, Boolean(treatForbiddenAsNotFound))) {
|
|
499
|
+
emit({ type: "file:not-found", fileId, sourceId });
|
|
500
|
+
if (deleteOnNotFound) {
|
|
501
|
+
try {
|
|
502
|
+
await input.engine.delete({ sourceId });
|
|
503
|
+
deleted += 1;
|
|
504
|
+
} catch (deleteErr) {
|
|
505
|
+
failed += 1;
|
|
506
|
+
errors.push({ fileId, sourceId, error: deleteErr });
|
|
507
|
+
emit({ type: "file:error", fileId, sourceId, error: deleteErr });
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
continue;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
failed += 1;
|
|
514
|
+
errors.push({ fileId, sourceId, error: err });
|
|
515
|
+
emit({ type: "file:error", fileId, sourceId, error: err });
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
return {
|
|
520
|
+
fileCount: input.fileIds.length,
|
|
521
|
+
succeeded,
|
|
522
|
+
failed,
|
|
523
|
+
deleted,
|
|
524
|
+
errors,
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import type { ContextEngine, AssetInput, IngestInput } from "../../core";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* A plug-and-play auth input for Google Drive.
|
|
5
|
+
*
|
|
6
|
+
* This is intentionally structural (no hard dependency on google-auth-library types),
|
|
7
|
+
* because the connector code is vendored into user projects and dependencies are added
|
|
8
|
+
* by the CLI (`unrag add google-drive`).
|
|
9
|
+
*/
|
|
10
|
+
export type GoogleDriveAuth =
|
|
11
|
+
| {
|
|
12
|
+
/** Use an existing OAuth2 client instance (recommended if your app already has one). */
|
|
13
|
+
kind: "oauth";
|
|
14
|
+
oauthClient: unknown;
|
|
15
|
+
}
|
|
16
|
+
| {
|
|
17
|
+
/**
|
|
18
|
+
* Convenience form for OAuth2: the connector will construct an OAuth2 client
|
|
19
|
+
* and set credentials including the refresh token.
|
|
20
|
+
*/
|
|
21
|
+
kind: "oauth";
|
|
22
|
+
clientId: string;
|
|
23
|
+
clientSecret: string;
|
|
24
|
+
redirectUri: string;
|
|
25
|
+
refreshToken: string;
|
|
26
|
+
/** Optional access token if you already have one. */
|
|
27
|
+
accessToken?: string;
|
|
28
|
+
}
|
|
29
|
+
| {
|
|
30
|
+
/**
|
|
31
|
+
* Service account credentials. This supports both:
|
|
32
|
+
* - direct service-account access (files must be shared to the service account)
|
|
33
|
+
* - Workspace domain-wide delegation (DWD) when `subject` is provided
|
|
34
|
+
*/
|
|
35
|
+
kind: "service_account";
|
|
36
|
+
credentialsJson: string | Record<string, unknown>;
|
|
37
|
+
/**
|
|
38
|
+
* DWD impersonation subject email (Workspace only).
|
|
39
|
+
* When provided, the service account will impersonate this user.
|
|
40
|
+
*/
|
|
41
|
+
subject?: string;
|
|
42
|
+
}
|
|
43
|
+
| {
|
|
44
|
+
/** Escape hatch: provide a pre-configured GoogleAuth (or equivalent) instance. */
|
|
45
|
+
kind: "google_auth";
|
|
46
|
+
auth: unknown;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export type GoogleDriveSyncProgressEvent =
|
|
50
|
+
| { type: "file:start"; fileId: string; sourceId: string }
|
|
51
|
+
| {
|
|
52
|
+
type: "file:success";
|
|
53
|
+
fileId: string;
|
|
54
|
+
sourceId: string;
|
|
55
|
+
chunkCount: number;
|
|
56
|
+
}
|
|
57
|
+
| {
|
|
58
|
+
type: "file:skipped";
|
|
59
|
+
fileId: string;
|
|
60
|
+
sourceId: string;
|
|
61
|
+
reason:
|
|
62
|
+
| "is_folder"
|
|
63
|
+
| "unsupported_google_mime"
|
|
64
|
+
| "too_large"
|
|
65
|
+
| "shortcut_unresolved";
|
|
66
|
+
message: string;
|
|
67
|
+
}
|
|
68
|
+
| { type: "file:not-found"; fileId: string; sourceId: string }
|
|
69
|
+
| { type: "file:error"; fileId: string; sourceId: string; error: unknown };
|
|
70
|
+
|
|
71
|
+
export type GoogleDriveFileDocument = {
|
|
72
|
+
sourceId: string;
|
|
73
|
+
content: string;
|
|
74
|
+
metadata: Record<string, unknown>;
|
|
75
|
+
assets: AssetInput[];
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
export type BuildGoogleDriveFileIngestInputArgs = {
|
|
79
|
+
fileId: string;
|
|
80
|
+
content: string;
|
|
81
|
+
assets?: AssetInput[];
|
|
82
|
+
metadata?: Record<string, unknown>;
|
|
83
|
+
sourceIdPrefix?: string;
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
export type BuildGoogleDriveFileIngestInputResult = IngestInput;
|
|
87
|
+
|
|
88
|
+
export type SyncGoogleDriveFilesInput = {
|
|
89
|
+
engine: ContextEngine;
|
|
90
|
+
auth: GoogleDriveAuth;
|
|
91
|
+
/** Explicit Drive file IDs (Notion-like v1 behavior). */
|
|
92
|
+
fileIds: string[];
|
|
93
|
+
/**
|
|
94
|
+
* Optional namespace prefix, useful for multi-tenant apps:
|
|
95
|
+
* `tenant:acme:` -> `tenant:acme:gdrive:file:<id>`
|
|
96
|
+
*/
|
|
97
|
+
sourceIdPrefix?: string;
|
|
98
|
+
/**
|
|
99
|
+
* When true, if a file is not found/accessible, delete the previously ingested
|
|
100
|
+
* document for that file (exact sourceId).
|
|
101
|
+
*/
|
|
102
|
+
deleteOnNotFound?: boolean;
|
|
103
|
+
/** Optional progress callback. */
|
|
104
|
+
onProgress?: (event: GoogleDriveSyncProgressEvent) => void;
|
|
105
|
+
/** Optional connector-level knobs. */
|
|
106
|
+
options?: SyncGoogleDriveFilesOptions;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
export type SyncGoogleDriveFilesOptions = {
|
|
110
|
+
/** Max bytes to download/export per file. Default: 15MB. */
|
|
111
|
+
maxBytesPerFile?: number;
|
|
112
|
+
/**
|
|
113
|
+
* If true, treat 403 (forbidden) as not-found for cleanup purposes.
|
|
114
|
+
* Default: true.
|
|
115
|
+
*/
|
|
116
|
+
treatForbiddenAsNotFound?: boolean;
|
|
117
|
+
/**
|
|
118
|
+
* If true, failures to export Google-native files (e.g., Slides -> text)
|
|
119
|
+
* will cause the file to be skipped instead of falling back to a binary export.
|
|
120
|
+
* Default: false (best-effort fallback).
|
|
121
|
+
*/
|
|
122
|
+
strictNativeExport?: boolean;
|
|
123
|
+
/** Override Drive API scopes if desired. */
|
|
124
|
+
scopes?: string[];
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
|