@credal/actions 0.2.142 → 0.2.144

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,19 +14,20 @@ import { extractTextFromPdf } from "../../../utils/pdf.js";
14
14
  import { getGoogleDocContent, getGoogleSheetContent, getGoogleSlidesContent } from "../../../utils/google.js";
15
15
  import { read, utils } from "xlsx";
16
16
  import officeParser from "officeparser";
17
+ const BASE_WEB_URL = "https://drive.google.com/file/d/";
18
+ const BASE_API_URL = "https://www.googleapis.com/drive/v3/files/";
17
19
  const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
18
20
  var _b, _c, _d, _e;
19
21
  if (!authParams.authToken) {
20
22
  return { success: false, error: MISSING_AUTH_TOKEN };
21
23
  }
22
- const BASE_URL = "https://www.googleapis.com/drive/v3/files/";
23
24
  const headers = { Authorization: `Bearer ${authParams.authToken}` };
24
25
  const { limit: charLimit, fileId } = params;
25
26
  const timeoutLimit = params.timeoutLimit !== undefined && params.timeoutLimit > 0 ? params.timeoutLimit * 1000 : 15000;
26
27
  const axiosClient = createAxiosClientWithTimeout(timeoutLimit);
27
28
  // helper to fetch drive metadata with fields we need (incl. shortcut details)
28
29
  const fetchMeta = (fid) => __awaiter(void 0, void 0, void 0, function* () {
29
- const metaUrl = `${BASE_URL}${encodeURIComponent(fid)}` +
30
+ const metaUrl = `${BASE_API_URL}${encodeURIComponent(fid)}` +
30
31
  `?fields=name,mimeType,size,driveId,parents,` +
31
32
  `shortcutDetails(targetId,targetMimeType)` +
32
33
  `&supportsAllDrives=true`;
@@ -63,7 +64,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
63
64
  content = yield getGoogleSlidesContent(params.fileId, authParams.authToken, axiosClient, sharedDriveParam);
64
65
  }
65
66
  else if (mimeType === "application/pdf") {
66
- const downloadUrl = `${BASE_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
67
+ const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
67
68
  const downloadRes = yield axiosClient.get(downloadUrl, {
68
69
  headers,
69
70
  responseType: "arraybuffer",
@@ -80,7 +81,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
80
81
  }
81
82
  else if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
82
83
  mimeType === "application/msword") {
83
- const downloadUrl = `${BASE_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
84
+ const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
84
85
  const downloadRes = yield axiosClient.get(downloadUrl, {
85
86
  headers,
86
87
  responseType: "arraybuffer",
@@ -97,7 +98,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
97
98
  }
98
99
  }
99
100
  else if (mimeType === "text/plain" || mimeType === "text/html" || mimeType === "application/rtf") {
100
- const downloadUrl = `${BASE_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
101
+ const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
101
102
  const downloadRes = yield axiosClient.get(downloadUrl, {
102
103
  headers,
103
104
  responseType: "text",
@@ -109,7 +110,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
109
110
  mimeType === "text/tab-separated-values" ||
110
111
  mimeType === "application/rtf" ||
111
112
  mimeType === "application/json") {
112
- const downloadUrl = `${BASE_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
113
+ const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
113
114
  const downloadRes = yield axiosClient.get(downloadUrl, {
114
115
  headers,
115
116
  responseType: "arraybuffer",
@@ -120,7 +121,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
120
121
  }
121
122
  else if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ||
122
123
  mimeType === "application/vnd.ms-excel") {
123
- const downloadUrl = `${BASE_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
124
+ const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
124
125
  const downloadRes = yield axiosClient.get(downloadUrl, {
125
126
  headers,
126
127
  responseType: "arraybuffer",
@@ -139,7 +140,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
139
140
  }
140
141
  else if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation") {
141
142
  // Handle modern PowerPoint files (.pptx only)
142
- const downloadUrl = `${BASE_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
143
+ const downloadUrl = `${BASE_API_URL}${encodeURIComponent(params.fileId)}?alt=media${sharedDriveParam}`;
143
144
  const downloadRes = yield axiosClient.get(downloadUrl, {
144
145
  headers,
145
146
  responseType: "arraybuffer",
@@ -174,7 +175,7 @@ const getDriveFileContentById = (_a) => __awaiter(void 0, [_a], void 0, function
174
175
  results: [
175
176
  {
176
177
  name: fileName,
177
- url: `${BASE_URL}${encodeURIComponent(params.fileId)}`,
178
+ url: `${BASE_WEB_URL}${params.fileId}`,
178
179
  contents: { content, fileName, fileLength: originalLength },
179
180
  },
180
181
  ],
@@ -10,6 +10,20 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
10
10
  import { MISSING_AUTH_TOKEN } from "../../util/missingAuthConstants.js";
11
11
  import searchDriveByQuery from "./searchDriveByQuery.js";
12
12
  import getDriveFileContentById from "./getDriveFileContentById.js";
13
+ // Helper function to process files in batches with concurrency control
14
+ const processBatch = (items_1, processor_1, ...args_1) => __awaiter(void 0, [items_1, processor_1, ...args_1], void 0, function* (items, processor, batchSize = 3) {
15
+ const results = [];
16
+ for (let i = 0; i < items.length; i += batchSize) {
17
+ const batch = items.slice(i, i + batchSize);
18
+ const batchResults = yield Promise.allSettled(batch.map(processor));
19
+ for (const result of batchResults) {
20
+ if (result.status === "fulfilled") {
21
+ results.push(result.value);
22
+ }
23
+ }
24
+ }
25
+ return results;
26
+ });
13
27
  const searchDriveByKeywordsAndGetFileContent = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
14
28
  var _b;
15
29
  if (!authParams.authToken) {
@@ -29,13 +43,33 @@ const searchDriveByKeywordsAndGetFileContent = (_a) => __awaiter(void 0, [_a], v
29
43
  if (!searchResult.success) {
30
44
  return { success: false, error: searchResult.error };
31
45
  }
32
- // For each file, fetch its content in parallel
33
46
  const files = (_b = searchResult.files) !== null && _b !== void 0 ? _b : [];
34
- const contentPromises = files.map((file) => __awaiter(void 0, void 0, void 0, function* () {
47
+ // File types that are likely to fail or have no useful text content
48
+ const problematicMimeTypes = new Set([
49
+ "application/vnd.google-apps.form",
50
+ "application/vnd.google-apps.site",
51
+ "application/vnd.google-apps.map",
52
+ "application/vnd.google-apps.drawing",
53
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation", // PowerPoint
54
+ "application/vnd.ms-powerpoint",
55
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // Excel (not supported yet)
56
+ "application/vnd.ms-excel",
57
+ ]);
58
+ // Filter out problematic files BEFORE processing to avoid wasting resources
59
+ const validFiles = files
60
+ .slice(0, limit)
61
+ .filter(file => file.id && file.name && !problematicMimeTypes.has(file.mimeType));
62
+ // Process only valid files in smaller batches to avoid overwhelming the API
63
+ const filesWithContent = yield processBatch(validFiles, (file) => __awaiter(void 0, void 0, void 0, function* () {
35
64
  var _a, _b, _c;
36
65
  try {
66
+ // Add timeout for individual file content requests with shorter timeout
37
67
  const contentResult = yield getDriveFileContentById({
38
- params: { fileId: file.id, limit: maxChars },
68
+ params: {
69
+ fileId: file.id,
70
+ limit: maxChars,
71
+ timeoutLimit: 2,
72
+ },
39
73
  authParams,
40
74
  });
41
75
  return {
@@ -46,17 +80,16 @@ const searchDriveByKeywordsAndGetFileContent = (_a) => __awaiter(void 0, [_a], v
46
80
  content: contentResult.success ? (_c = (_b = (_a = contentResult.results) === null || _a === void 0 ? void 0 : _a[0]) === null || _b === void 0 ? void 0 : _b.contents) === null || _c === void 0 ? void 0 : _c.content : undefined,
47
81
  };
48
82
  }
49
- catch (error) {
50
- console.error(`Error fetching content for file ${file.id}:`, error);
83
+ catch (_d) {
51
84
  return {
52
85
  id: file.id,
53
86
  name: file.name,
54
87
  mimeType: file.mimeType,
55
88
  url: file.url,
89
+ content: undefined, // Gracefully handle errors
56
90
  };
57
91
  }
58
- }));
59
- const filesWithContent = yield Promise.all(contentPromises);
92
+ }), 5);
60
93
  // Return combined results
61
94
  return {
62
95
  success: true,
@@ -37,14 +37,14 @@ const searchDriveByQuery = (_a) => __awaiter(void 0, [_a], void 0, function* ({
37
37
  });
38
38
  // Original search method - search all drives at once
39
39
  const searchAllDrivesAtOnce = (query, authToken, limit, orderByQuery) => __awaiter(void 0, void 0, void 0, function* () {
40
- const allDrivesUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=allDrives&pageSize=1000${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
40
+ const allDrivesUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=allDrives&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
41
41
  const allDrivesRes = axiosClient.get(allDrivesUrl, {
42
42
  headers: {
43
43
  Authorization: `Bearer ${authToken}`,
44
44
  },
45
45
  });
46
46
  // need to search domain wide separately because the allDrives search doesn't include domain wide files
47
- const orgWideUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&corpora=domain&pageSize=1000${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
47
+ const orgWideUrl = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&corpora=domain&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
48
48
  const orgWideRes = axiosClient.get(orgWideUrl, {
49
49
  headers: {
50
50
  Authorization: `Bearer ${authToken}`,
@@ -76,7 +76,7 @@ const searchAllDrivesIndividually = (query, authToken, limit, orderByQuery) => _
76
76
  `q=${encodeURIComponent(query)}&` +
77
77
  `fields=files(id,name,mimeType,webViewLink),nextPageToken&` +
78
78
  `corpora=domain&` +
79
- `pageSize=1000${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
79
+ `pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}`;
80
80
  const domainDriveFunction = () => __awaiter(void 0, void 0, void 0, function* () {
81
81
  var _a, _b;
82
82
  const domainRes = yield axiosClient.get(domainUrl, {
@@ -148,11 +148,11 @@ const searchSingleDrive = (query, driveId, authToken, orderByQuery) => __awaiter
148
148
  let url;
149
149
  if (driveId === "root") {
150
150
  // Search in user's personal drive
151
- url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&pageSize=1000${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}${nextPageToken ? `&pageToken=${nextPageToken}` : ""}`;
151
+ url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}${nextPageToken ? `&pageToken=${nextPageToken}` : ""}`;
152
152
  }
153
153
  else {
154
154
  // Search in specific shared drive
155
- url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(`${query} and parents in '${driveId}'`)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=drive&driveId=${driveId}&pageSize=1000${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}${nextPageToken ? `&pageToken=${nextPageToken}` : ""}`;
155
+ url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(`${query} and parents in '${driveId}'`)}&fields=files(id,name,mimeType,webViewLink),nextPageToken&supportsAllDrives=true&includeItemsFromAllDrives=true&corpora=drive&driveId=${driveId}&pageSize=100${orderByQuery ? `&orderBy=${encodeURIComponent(orderByQuery)}` : ""}${nextPageToken ? `&pageToken=${nextPageToken}` : ""}`;
156
156
  }
157
157
  const res = yield axiosClient.get(url, {
158
158
  headers: {
@@ -263,45 +263,70 @@ export function getGoogleDocContent(fileId, authToken, axiosClient, sharedDriveP
263
263
  }
264
264
  else {
265
265
  console.log("Error using Google Docs API", docsError);
266
- // Fallback to Drive API export if Docs API fails
267
- const exportUrl = `${GDRIVE_BASE_URL}${encodeURIComponent(fileId)}/export?mimeType=text/plain${sharedDriveParams}`;
268
- const exportRes = yield axiosClient.get(exportUrl, {
269
- headers: { Authorization: `Bearer ${authToken}` },
270
- responseType: "text",
271
- });
272
- return exportRes.data;
266
+ // Check if it's a 404 or permission error - don't retry these
267
+ if (docsError && typeof docsError === "object" && "status" in docsError) {
268
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
269
+ const status = docsError.status;
270
+ if (status === 404 || status === 403) {
271
+ throw new Error(`File not accessible (${status}): ${fileId}`);
272
+ }
273
+ }
274
+ try {
275
+ // Fallback to Drive API export if Docs API fails
276
+ const exportUrl = `${GDRIVE_BASE_URL}${encodeURIComponent(fileId)}/export?mimeType=text/plain${sharedDriveParams}`;
277
+ const exportRes = yield axiosClient.get(exportUrl, {
278
+ headers: { Authorization: `Bearer ${authToken}` },
279
+ responseType: "text",
280
+ });
281
+ return exportRes.data;
282
+ }
283
+ catch (_a) {
284
+ throw new Error(`Unable to access document content: ${fileId}`);
285
+ }
273
286
  }
274
287
  }
275
288
  });
276
289
  }
277
290
  export function getGoogleSheetContent(fileId, authToken, axiosClient, sharedDriveParams) {
278
291
  return __awaiter(this, void 0, void 0, function* () {
292
+ // Use CSV export as primary method - it's much faster and more reliable for large sheets
293
+ // The Sheets API with includeGridData can timeout on large spreadsheets
279
294
  try {
280
- const sheetsUrl = `https://sheets.googleapis.com/v4/spreadsheets/${fileId}?includeGridData=true`;
281
- const sheetsRes = yield axiosClient.get(sheetsUrl, {
282
- headers: {
283
- Authorization: `Bearer ${authToken}`,
284
- },
295
+ const exportUrl = `${GDRIVE_BASE_URL}${encodeURIComponent(fileId)}/export?mimeType=text/csv${sharedDriveParams}`;
296
+ const exportRes = yield axiosClient.get(exportUrl, {
297
+ headers: { Authorization: `Bearer ${authToken}` },
298
+ responseType: "text",
285
299
  });
286
- return parseGoogleSheetsFromRawContentToPlainText(sheetsRes.data);
300
+ return exportRes.data
301
+ .split("\n")
302
+ .map((line) => line.replace(/,+$/, ""))
303
+ .map((line) => line.replace(/,{2,}/g, ","))
304
+ .join("\n");
287
305
  }
288
- catch (sheetsError) {
289
- if (isAxiosTimeoutError(sheetsError)) {
290
- console.log("Request timed out using Google Sheets API - dont retry");
291
- throw new Error("Request timed out using Google Sheets API");
306
+ catch (exportError) {
307
+ // Check if it's a 404 or permission error
308
+ if (exportError && typeof exportError === "object" && "status" in exportError) {
309
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
310
+ const status = exportError.status;
311
+ if (status === 404 || status === 403) {
312
+ throw new Error(`Spreadsheet not accessible (${status}): ${fileId}`);
313
+ }
292
314
  }
293
- else {
294
- console.log("Error using Google Sheets API", sheetsError);
295
- const exportUrl = `${GDRIVE_BASE_URL}${encodeURIComponent(fileId)}/export?mimeType=text/csv${sharedDriveParams}`;
296
- const exportRes = yield axiosClient.get(exportUrl, {
297
- headers: { Authorization: `Bearer ${authToken}` },
298
- responseType: "text",
315
+ // If CSV export fails, try the Sheets API as fallback (but this is slower)
316
+ try {
317
+ const sheetsUrl = `https://sheets.googleapis.com/v4/spreadsheets/${fileId}?includeGridData=true`;
318
+ const sheetsRes = yield axiosClient.get(sheetsUrl, {
319
+ headers: {
320
+ Authorization: `Bearer ${authToken}`,
321
+ },
299
322
  });
300
- return exportRes.data
301
- .split("\n")
302
- .map((line) => line.replace(/,+$/, ""))
303
- .map((line) => line.replace(/,{2,}/g, ","))
304
- .join("\n");
323
+ return parseGoogleSheetsFromRawContentToPlainText(sheetsRes.data);
324
+ }
325
+ catch (sheetsError) {
326
+ if (isAxiosTimeoutError(sheetsError)) {
327
+ throw new Error("Request timed out using Google Sheets API");
328
+ }
329
+ throw new Error(`Unable to access spreadsheet content: ${fileId}`);
305
330
  }
306
331
  }
307
332
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@credal/actions",
3
- "version": "0.2.142",
3
+ "version": "0.2.144",
4
4
  "type": "module",
5
5
  "description": "AI Actions by Credal AI",
6
6
  "sideEffects": false,