@credal/actions 0.2.142 → 0.2.144
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actions/providers/google-oauth/getDriveFileContentById.js +10 -9
- package/dist/actions/providers/google-oauth/searchDriveByKeywordsAndGetFileContent.js +40 -7
- package/dist/actions/providers/google-oauth/searchDriveByQuery.js +5 -5
- package/dist/utils/google.js +53 -28
- package/package.json +1 -1
|
@@ -14,19 +14,20 @@ import { extractTextFromPdf } from "../../../utils/pdf.js";
|
|
|
14
14
|
import { getGoogleDocContent, getGoogleSheetContent, getGoogleSlidesContent } from "../../../utils/google.js";
|
|
15
15
|
import { read, utils } from "xlsx";
|
|
16
16
|
import officeParser from "officeparser";
|
|
17
|
+
const BASE_WEB_URL = "https://drive.google.com/file/d/";
|
|
18
|
+
const BASE_API_URL = "https://www.googleapis.com/drive/v3/files/";
|
|
17
19
|
const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
|
|
18
20
|
var _b, _c, _d, _e;
|
|
19
21
|
if (!authParams.authToken) {
|
|
20
22
|
return { success: false, error: MISSING_AUTH_TOKEN };
|
|
21
23
|
}
|
|
22
|
-
const BASE_URL = "https://www.googleapis.com/drive/v3/files/";
|
|
23
24
|
const headers = { Authorization: `Bearer ${authParams.authToken}` };
|
|
24
25
|
const { limit: charLimit, fileId } = params;
|
|
25
26
|
const timeoutLimit = params.timeoutLimit !== undefined && params.timeoutLimit > 0 ? params.timeoutLimit * 1000 : 15000;
|
|
26
27
|
const axiosClient = createAxiosClientWithTimeout(timeoutLimit);
|
|
27
28
|
// helper to fetch drive metadata with fields we need (incl. shortcut details)
|
|
28
29
|
const fetchMeta = (fid) => __awaiter(void 0, void 0, void 0, function* () {
|
|
29
|
-
const metaUrl = `${
|
|
30
|
+
const metaUrl = `${BASE_API_URL}${encodeURIComponent(fid)}` +
|
|
30
31
|
`?fields=name,mimeType,size,driveId,parents,` +
|
|
31
32
|
`shortcutDetails(targetId,targetMimeType)` +
|
|
32
33
|
`&supportsAllDrives=true`;
|
|
@@ -63,7 +64,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
|
|
|
63
64
|
content = yield getGoogleSlidesContent(params.fileId, authParams.authToken, axiosClient, sharedDriveParam);
|
|
64
65
|
}
|
|
65
66
|
else if (mimeType === "application/pdf") {
|
|
66
|
-
const downloadUrl = `${
|
|
67
|
+
const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
|
|
67
68
|
const downloadRes = yield axiosClient.get(downloadUrl, {
|
|
68
69
|
headers,
|
|
69
70
|
responseType: "arraybuffer",
|
|
@@ -80,7 +81,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
|
|
|
80
81
|
}
|
|
81
82
|
else if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
|
|
82
83
|
mimeType === "application/msword") {
|
|
83
|
-
const downloadUrl = `${
|
|
84
|
+
const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
|
|
84
85
|
const downloadRes = yield axiosClient.get(downloadUrl, {
|
|
85
86
|
headers,
|
|
86
87
|
responseType: "arraybuffer",
|
|
@@ -97,7 +98,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
|
|
|
97
98
|
}
|
|
98
99
|
}
|
|
99
100
|
else if (mimeType === "text/plain" || mimeType === "text/html" || mimeType === "application/rtf") {
|
|
100
|
-
const downloadUrl = `${
|
|
101
|
+
const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
|
|
101
102
|
const downloadRes = yield axiosClient.get(downloadUrl, {
|
|
102
103
|
headers,
|
|
103
104
|
responseType: "text",
|
|
@@ -109,7 +110,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
|
|
|
109
110
|
mimeType === "text/tab-separated-values" ||
|
|
110
111
|
mimeType === "application/rtf" ||
|
|
111
112
|
mimeType === "application/json") {
|
|
112
|
-
const downloadUrl = `${
|
|
113
|
+
const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
|
|
113
114
|
const downloadRes = yield axiosClient.get(downloadUrl, {
|
|
114
115
|
headers,
|
|
115
116
|
responseType: "arraybuffer",
|
|
@@ -120,7 +121,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
|
|
|
120
121
|
}
|
|
121
122
|
else if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ||
|
|
122
123
|
mimeType === "application/vnd.ms-excel") {
|
|
123
|
-
const downloadUrl = `${
|
|
124
|
+
const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
|
|
124
125
|
const downloadRes = yield axiosClient.get(downloadUrl, {
|
|
125
126
|
headers,
|
|
126
127
|
responseType: "arraybuffer",
|
|
@@ -139,7 +140,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
|
|
|
139
140
|
}
|
|
140
141
|
else if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation") {
|
|
141
142
|
// Handle modern PowerPoint files (.pptx only)
|
|
142
|
-
const downloadUrl = `${
|
|
143
|
+
const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
|
|
143
144
|
const downloadRes = yield axiosClient.get(downloadUrl, {
|
|
144
145
|
headers,
|
|
145
146
|
responseType: "arraybuffer",
|
|
@@ -174,7 +175,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
|
|
|
174
175
|
results: [
|
|
175
176
|
{
|
|
176
177
|
name: fileName,
|
|
177
|
-
url: `${
|
|
178
|
+
url: `${BASE_WEB_URL}${params.fileId}`,
|
|
178
179
|
contents: { content, fileName, fileLength: originalLength },
|
|
179
180
|
},
|
|
180
181
|
],
|
|
@@ -10,6 +10,20 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
import { MISSING_AUTH_TOKEN } from "../../util/missingAuthConstants.js";
|
|
11
11
|
import searchDriveByQuery from "./searchDriveByQuery.js";
|
|
12
12
|
import getDriveFileContentById from "./getDriveFileContentById.js";
|
|
13
|
+
// Helper function to process files in batches with concurrency control
|
|
14
|
+
const processBatch = (items_1, processor_1, ...args_1) => __awaiter(void 0, [items_1, processor_1, ...args_1], void 0, function* (items, processor, batchSize = 3) {
|
|
15
|
+
const results = [];
|
|
16
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
17
|
+
const batch = items.slice(i, i + batchSize);
|
|
18
|
+
const batchResults = yield Promise.allSettled(batch.map(processor));
|
|
19
|
+
for (const result of batchResults) {
|
|
20
|
+
if (result.status === "fulfilled") {
|
|
21
|
+
results.push(result.value);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return results;
|
|
26
|
+
});
|
|
13
27
|
const searchDriveByKeywordsAndGetFileContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
|
|
14
28
|
var _b;
|
|
15
29
|
if (!authParams.authToken) {
|
|
@@ -29,13 +43,33 @@ const searchDriveByKeywordsAndGetFileContent = (_a) => __awaiter(void 0, [_a], v
|
|
|
29
43
|
if (!searchResult.success) {
|
|
30
44
|
return { success: false, error: searchResult.error };
|
|
31
45
|
}
|
|
32
|
-
// For each file, fetch its content in parallel
|
|
33
46
|
const files = (_b = searchResult.files) !== null && _b !== void 0 ? _b : [];
|
|
34
|
-
|
|
47
|
+
// File types that are likely to fail or have no useful text content
|
|
48
|
+
const problematicMimeTypes = new Set([
|
|
49
|
+
"application/vnd.google-apps.form",
|
|
50
|
+
"application/vnd.google-apps.site",
|
|
51
|
+
"application/vnd.google-apps.map",
|
|
52
|
+
"application/vnd.google-apps.drawing",
|
|
53
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation", // PowerPoint
|
|
54
|
+
"application/vnd.ms-powerpoint",
|
|
55
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // Excel (not supported yet)
|
|
56
|
+
"application/vnd.ms-excel",
|
|
57
|
+
]);
|
|
58
|
+
// Filter out problematic files BEFORE processing to avoid wasting resources
|
|
59
|
+
const validFiles = files
|
|
60
|
+
.slice(0, limit)
|
|
61
|
+
.filter(file => file.id && file.name && !problematicMimeTypes.has(file.mimeType));
|
|
62
|
+
// Process only valid files in smaller batches to avoid overwhelming the API
|
|
63
|
+
const filesWithContent = yield processBatch(validFiles, (file) => __awaiter(void 0, void 0, void 0, function* () {
|
|
35
64
|
var _a, _b, _c;
|
|
36
65
|
try {
|
|
66
|
+
// Add timeout for individual file content requests with shorter timeout
|
|
37
67
|
const contentResult = yield getDriveFileContentById({
|
|
38
|
-
params: {
|
|
68
|
+
params: {
|
|
69
|
+
fileId: file.id,
|
|
70
|
+
limit: maxChars,
|
|
71
|
+
timeoutLimit: 2,
|
|
72
|
+
},
|
|
39
73
|
authParams,
|
|
40
74
|
});
|
|
41
75
|
return {
|
|
@@ -46,17 +80,16 @@ const searchDriveByKeywordsAndGetFileContent = (_a) => __awaiter(void 0, [_a], v
|
|
|
46
80
|
content: contentResult.success ? (_c = (_b = (_a = contentResult.results) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.contents) === null || _c === void 0 ? void 0 : _c.content : undefined,
|
|
47
81
|
};
|
|
48
82
|
}
|
|
49
|
-
catch (
|
|
50
|
-
console.error(`Error fetching content for file ${file.id}:`, error);
|
|
83
|
+
catch (_d) {
|
|
51
84
|
return {
|
|
52
85
|
id: file.id,
|
|
53
86
|
name: file.name,
|
|
54
87
|
mimeType: file.mimeType,
|
|
55
88
|
url: file.url,
|
|
89
|
+
content: undefined, // Gracefully handle errors
|
|
56
90
|
};
|
|
57
91
|
}
|
|
58
|
-
}));
|
|
59
|
-
const filesWithContent = yield Promise.all(contentPromises);
|
|
92
|
+
}), 5);
|
|
60
93
|
// Return combined results
|
|
61
94
|
return {
|
|
62
95
|
success: true,
|
|
@@ -37,14 +37,14 @@ const searchDriveByQuery = (_a) => __awaiter(void 0, [_a], void 0, function* ({
|
|
|
37
37
|
});
|
|
38
38
|
// Original search method - search all drives at once
|
|
39
39
|
const searchAllDrivesAtOnce = (query, authToken, limit, orderByQuery) => __awaiter(void 0, void 0, void 0, function* () {
|
|
40
|
-
const allDrivesUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=allDrives&pageSize=
|
|
40
|
+
const allDrivesUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=allDrives&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
|
|
41
41
|
const allDrivesRes = axiosClient.get(allDrivesUrl, {
|
|
42
42
|
headers: {
|
|
43
43
|
Authorization: `Bearer ${authToken}`,
|
|
44
44
|
},
|
|
45
45
|
});
|
|
46
46
|
// need to search domain wide separately because the allDrives search doesn't include domain wide files
|
|
47
|
-
const orgWideUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&corpora=domain&pageSize=
|
|
47
|
+
const orgWideUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&corpora=domain&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
|
|
48
48
|
const orgWideRes = axiosClient.get(orgWideUrl, {
|
|
49
49
|
headers: {
|
|
50
50
|
Authorization: `Bearer ${authToken}`,
|
|
@@ -76,7 +76,7 @@ const searchAllDrivesIndividually = (query, authToken, limit, orderByQuery) => _
|
|
|
76
76
|
`q=${encodeURIComponent(query)}&` +
|
|
77
77
|
`fields=files(id,name,mimeType,webViewLink),nextPageToken&` +
|
|
78
78
|
`corpora=domain&` +
|
|
79
|
-
`pageSize=
|
|
79
|
+
`pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
|
|
80
80
|
const domainDriveFunction = () => __awaiter(void 0, void 0, void 0, function* () {
|
|
81
81
|
var _a, _b;
|
|
82
82
|
const domainRes = yield axiosClient.get(domainUrl, {
|
|
@@ -148,11 +148,11 @@ const searchSingleDrive = (query, driveId, authToken, orderByQuery) => __awaiter
|
|
|
148
148
|
let url;
|
|
149
149
|
if (driveId === "root") {
|
|
150
150
|
// Search in user's personal drive
|
|
151
|
-
url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&pageSize=
|
|
151
|
+
url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}${nextPageToken ? `&pageToken=${nextPageToken}` : ""}`;
|
|
152
152
|
}
|
|
153
153
|
else {
|
|
154
154
|
// Search in specific shared drive
|
|
155
|
-
url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(`${query} and parents in '${driveId}'`)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=drive&driveId=${driveId}&pageSize=
|
|
155
|
+
url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(`${query} and parents in '${driveId}'`)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=drive&driveId=${driveId}&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}${nextPageToken ? `&pageToken=${nextPageToken}` : ""}`;
|
|
156
156
|
}
|
|
157
157
|
const res = yield axiosClient.get(url, {
|
|
158
158
|
headers: {
|
package/dist/utils/google.js
CHANGED
|
@@ -263,45 +263,70 @@ export function getGoogleDocContent(fileId, authToken, axiosClient, sharedDriveP
|
|
|
263
263
|
}
|
|
264
264
|
else {
|
|
265
265
|
console.log("Error using Google Docs API", docsError);
|
|
266
|
-
//
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
266
|
+
// Check if it's a 404 or permission error - don't retry these
|
|
267
|
+
if (docsError && typeof docsError === "object" && "status" in docsError) {
|
|
268
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
269
|
+
const status = docsError.status;
|
|
270
|
+
if (status === 404 || status === 403) {
|
|
271
|
+
throw new Error(`File not accessible (${status}): ${fileId}`);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
try {
|
|
275
|
+
// Fallback to Drive API export if Docs API fails
|
|
276
|
+
const exportUrl = `${GDRIVE_BASE_URL}${encodeURIComponent(fileId)}/export?mimeType=text/plain${sharedDriveParams}`;
|
|
277
|
+
const exportRes = yield axiosClient.get(exportUrl, {
|
|
278
|
+
headers: { Authorization: `Bearer ${authToken}` },
|
|
279
|
+
responseType: "text",
|
|
280
|
+
});
|
|
281
|
+
return exportRes.data;
|
|
282
|
+
}
|
|
283
|
+
catch (_a) {
|
|
284
|
+
throw new Error(`Unable to access document content: ${fileId}`);
|
|
285
|
+
}
|
|
273
286
|
}
|
|
274
287
|
}
|
|
275
288
|
});
|
|
276
289
|
}
|
|
277
290
|
export function getGoogleSheetContent(fileId, authToken, axiosClient, sharedDriveParams) {
|
|
278
291
|
return __awaiter(this, void 0, void 0, function* () {
|
|
292
|
+
// Use CSV export as primary method - it's much faster and more reliable for large sheets
|
|
293
|
+
// The Sheets API with includeGridData can timeout on large spreadsheets
|
|
279
294
|
try {
|
|
280
|
-
const
|
|
281
|
-
const
|
|
282
|
-
headers: {
|
|
283
|
-
|
|
284
|
-
},
|
|
295
|
+
const exportUrl = `${GDRIVE_BASE_URL}${encodeURIComponent(fileId)}/export?mimeType=text/csv${sharedDriveParams}`;
|
|
296
|
+
const exportRes = yield axiosClient.get(exportUrl, {
|
|
297
|
+
headers: { Authorization: `Bearer ${authToken}` },
|
|
298
|
+
responseType: "text",
|
|
285
299
|
});
|
|
286
|
-
return
|
|
300
|
+
return exportRes.data
|
|
301
|
+
.split("\n")
|
|
302
|
+
.map((line) => line.replace(/,+$/, ""))
|
|
303
|
+
.map((line) => line.replace(/,{2,}/g, ","))
|
|
304
|
+
.join("\n");
|
|
287
305
|
}
|
|
288
|
-
catch (
|
|
289
|
-
if
|
|
290
|
-
|
|
291
|
-
|
|
306
|
+
catch (exportError) {
|
|
307
|
+
// Check if it's a 404 or permission error
|
|
308
|
+
if (exportError && typeof exportError === "object" && "status" in exportError) {
|
|
309
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
310
|
+
const status = exportError.status;
|
|
311
|
+
if (status === 404 || status === 403) {
|
|
312
|
+
throw new Error(`Spreadsheet not accessible (${status}): ${fileId}`);
|
|
313
|
+
}
|
|
292
314
|
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
const
|
|
296
|
-
const
|
|
297
|
-
headers: {
|
|
298
|
-
|
|
315
|
+
// If CSV export fails, try the Sheets API as fallback (but this is slower)
|
|
316
|
+
try {
|
|
317
|
+
const sheetsUrl = `https://sheets.googleapis.com/v4/spreadsheets/${fileId}?includeGridData=true`;
|
|
318
|
+
const sheetsRes = yield axiosClient.get(sheetsUrl, {
|
|
319
|
+
headers: {
|
|
320
|
+
Authorization: `Bearer ${authToken}`,
|
|
321
|
+
},
|
|
299
322
|
});
|
|
300
|
-
return
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
323
|
+
return parseGoogleSheetsFromRawContentToPlainText(sheetsRes.data);
|
|
324
|
+
}
|
|
325
|
+
catch (sheetsError) {
|
|
326
|
+
if (isAxiosTimeoutError(sheetsError)) {
|
|
327
|
+
throw new Error("Request timed out using Google Sheets API");
|
|
328
|
+
}
|
|
329
|
+
throw new Error(`Unable to access spreadsheet content: ${fileId}`);
|
|
305
330
|
}
|
|
306
331
|
}
|
|
307
332
|
});
|