@credal/actions 0.2.36 → 0.2.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { axiosClient } from "../../util/axiosClient.js";
11
+ import { MISSING_AUTH_TOKEN } from "../../util/missingAuthConstants.js";
12
+ import extractContentFromDriveFileId from "./utils/extractContentFromDriveFileId.js";
13
+ import { normalizeText } from "../../../utils/string.js";
14
+ /** Intelligently selects a section of text around the median occurrence of keywords */
15
+ const intelligentSelectByMedianSection = (text, keywords, limit) => {
16
+ if (!text || text.length <= limit)
17
+ return text;
18
+ if (!(keywords === null || keywords === void 0 ? void 0 : keywords.length))
19
+ return text.substring(0, limit);
20
+ // Find all keyword positions (case-insensitive, limited to first 1000 matches)
21
+ const positions = [];
22
+ const normalizedText = normalizeText(text);
23
+ for (const keyword of keywords) {
24
+ if (keyword.length < 3)
25
+ continue; // Skip very short keywords
26
+ let pos = -1;
27
+ const normalizedKeyword = normalizeText(keyword);
28
+ while ((pos = normalizedText.indexOf(normalizedKeyword, pos + 1)) !== -1 && positions.length < 1000) {
29
+ positions.push(pos);
30
+ }
31
+ }
32
+ if (!positions.length)
33
+ return text.substring(0, limit);
34
+ // Find median position (using sort for simplicity, still fast for 1000 elements)
35
+ positions.sort((a, b) => a - b);
36
+ const medianPos = positions[Math.floor(positions.length / 2)];
37
+ // Return window around median
38
+ const half = Math.floor(limit / 2);
39
+ const start = Math.max(0, medianPos - half);
40
+ const end = Math.min(text.length, start + limit);
41
+ return text.substring(start, end);
42
+ };
43
+ const searchDriveAndGetContentByKeywords = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
44
+ var _b;
45
+ if (!authParams.authToken) {
46
+ return { success: false, error: MISSING_AUTH_TOKEN, files: [] };
47
+ }
48
+ const { keywords, fileLimit, fileSizeLimit } = params;
49
+ let files = [];
50
+ // 1. Search for files and get their metadata
51
+ // Build the query: fullText contains 'keyword1' or fullText contains 'keyword2' ...
52
+ const query = keywords.map(kw => `fullText contains '${kw.replace(/'/g, "\\'")}'`).join(" or ");
53
+ const url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&supportsAllDrives=true&includeItemsFromAllDrives=true`;
54
+ try {
55
+ const res = yield axiosClient.get(url, {
56
+ headers: {
57
+ Authorization: `Bearer ${authParams.authToken}`,
58
+ },
59
+ });
60
+ files =
61
+ ((_b = res.data.files) === null || _b === void 0 ? void 0 : _b.map((file) => ({
62
+ id: file.id,
63
+ name: file.name,
64
+ mimeType: file.mimeType,
65
+ url: file.webViewLink,
66
+ }))) || [];
67
+ }
68
+ catch (error) {
69
+ console.error("Error searching Google Drive", error);
70
+ return {
71
+ success: false,
72
+ error: error instanceof Error ? error.message : "Unknown error",
73
+ files: [],
74
+ };
75
+ }
76
+ files = fileLimit ? files.splice(0, fileLimit) : files;
77
+ // 2. Extract content from files and do some smart range selection
78
+ const processedFiles = yield Promise.all(files
79
+ .filter((file) => file.id && file.mimeType)
80
+ .map((file) => __awaiter(void 0, void 0, void 0, function* () {
81
+ const content = yield extractContentFromDriveFileId({
82
+ params: { fileId: file.id, mimeType: file.mimeType },
83
+ authParams,
84
+ });
85
+ if (content.success) {
86
+ let selectedContent = content.content;
87
+ if (fileSizeLimit && selectedContent && selectedContent.length > fileSizeLimit) {
88
+ selectedContent = intelligentSelectByMedianSection(selectedContent, keywords, fileSizeLimit);
89
+ }
90
+ return {
91
+ id: file.id || "",
92
+ name: file.name || "",
93
+ mimeType: file.mimeType || "",
94
+ url: file.url || "",
95
+ content: selectedContent,
96
+ };
97
+ }
98
+ else {
99
+ return {
100
+ id: file.id || "",
101
+ name: file.name || "",
102
+ mimeType: file.mimeType || "",
103
+ url: file.url || "",
104
+ error: content.error,
105
+ };
106
+ }
107
+ })));
108
+ return { success: true, files: processedFiles };
109
+ });
110
+ export default searchDriveAndGetContentByKeywords;
@@ -1,12 +1,11 @@
1
1
  import type { AuthParamsType } from "../../../autogen/types.js";
2
2
  export type getDriveFileContentParams = {
3
3
  fileId: string;
4
- keywords?: string[];
4
+ mimeType: string;
5
5
  };
6
6
  export type getDriveFileContentOutput = {
7
7
  success: boolean;
8
8
  content?: string;
9
- fileName?: string;
10
9
  error?: string;
11
10
  };
12
11
  declare const extractContentFromDriveFileId: ({ params, authParams, }: {
@@ -9,31 +9,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
9
9
  };
10
10
  import { axiosClient } from "../../../util/axiosClient.js";
11
11
  import mammoth from "mammoth";
12
- import PDFParser from "pdf2json";
13
12
  import { MISSING_AUTH_TOKEN } from "../../../util/missingAuthConstants.js";
14
13
  const extractContentFromDriveFileId = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
15
14
  if (!authParams.authToken) {
16
15
  return { success: false, error: MISSING_AUTH_TOKEN };
17
16
  }
18
- const { fileId } = params;
17
+ const { fileId, mimeType } = params;
18
+ let content = "";
19
19
  try {
20
- // First, get file metadata to determine the file type
21
- const metadataUrl = `https://www.googleapis.com/drive/v3/files/${encodeURIComponent(fileId)}?fields=name,mimeType,size`;
22
- const metadataRes = yield axiosClient.get(metadataUrl, {
23
- headers: {
24
- Authorization: `Bearer ${authParams.authToken}`,
25
- },
26
- });
27
- const { name: fileName, mimeType, size } = metadataRes.data;
28
- // Check if file is too large (50MB limit for safety)
29
- const maxFileSize = 50 * 1024 * 1024;
30
- if (size && parseInt(size) > maxFileSize) {
31
- return {
32
- success: false,
33
- error: "File too large (>50MB)",
34
- };
35
- }
36
- let content = "";
37
20
  // Handle different file types - read content directly
38
21
  if (mimeType === "application/vnd.google-apps.document") {
39
22
  // Google Docs - download as plain text
@@ -74,37 +57,10 @@ const extractContentFromDriveFileId = (_a) => __awaiter(void 0, [_a], void 0, fu
74
57
  content = exportRes.data;
75
58
  }
76
59
  else if (mimeType === "application/pdf") {
77
- // PDF files - use pdf2json
78
- const downloadUrl = `https://www.googleapis.com/drive/v3/files/${encodeURIComponent(fileId)}?alt=media`;
79
- const downloadRes = yield axiosClient.get(downloadUrl, {
80
- headers: {
81
- Authorization: `Bearer ${authParams.authToken}`,
82
- },
83
- responseType: "arraybuffer",
84
- });
85
- try {
86
- const pdfParser = new PDFParser(null); // null context, 1 for text extraction
87
- // Create a promise to handle the async PDF parsing
88
- const pdfContent = yield new Promise((resolve, reject) => {
89
- pdfParser.on("pdfParser_dataError", errData => {
90
- reject(new Error(`PDF parsing error: ${errData.parserError}`));
91
- });
92
- pdfParser.on("pdfParser_dataReady", pdfData => {
93
- // Extract text from all pages
94
- const textContent = pdfData.Pages.map(page => page.Texts.map(text => text.R.map(run => decodeURIComponent(run.T)).join("")).join("")).join("\n");
95
- resolve(textContent);
96
- });
97
- // Parse the PDF buffer
98
- pdfParser.parseBuffer(Buffer.from(downloadRes.data));
99
- });
100
- content = pdfContent;
101
- }
102
- catch (pdfError) {
103
- return {
104
- success: false,
105
- error: `Failed to parse PDF: ${pdfError instanceof Error ? pdfError.message : "Unknown PDF error"}`,
106
- };
107
- }
60
+ return {
61
+ success: false,
62
+ error: "PDF files are not supported for text extraction",
63
+ };
108
64
  }
109
65
  else if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
110
66
  mimeType === "application/msword") {
@@ -160,7 +116,6 @@ const extractContentFromDriveFileId = (_a) => __awaiter(void 0, [_a], void 0, fu
160
116
  return {
161
117
  success: true,
162
118
  content,
163
- fileName,
164
119
  };
165
120
  }
166
121
  catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@credal/actions",
3
- "version": "0.2.36",
3
+ "version": "0.2.37",
4
4
  "type": "module",
5
5
  "description": "AI Actions by Credal AI",
6
6
  "sideEffects": false,