@credal/actions 0.2.36 → 0.2.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actions/autogen/templates.js +106 -106
- package/dist/actions/autogen/types.js +101 -117
- package/dist/actions/providers/google-oauth/searchDriveAndGetContentByKeywords.d.ts +3 -0
- package/dist/actions/providers/google-oauth/searchDriveAndGetContentByKeywords.js +110 -0
- package/dist/actions/providers/google-oauth/utils/extractContentFromDriveFileId.d.ts +1 -2
- package/dist/actions/providers/google-oauth/utils/extractContentFromDriveFileId.js +6 -51
- package/package.json +1 -1
@@ -0,0 +1,110 @@
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
8
|
+
});
|
9
|
+
};
|
10
|
+
import { axiosClient } from "../../util/axiosClient.js";
|
11
|
+
import { MISSING_AUTH_TOKEN } from "../../util/missingAuthConstants.js";
|
12
|
+
import extractContentFromDriveFileId from "./utils/extractContentFromDriveFileId.js";
|
13
|
+
import { normalizeText } from "../../../utils/string.js";
|
14
|
+
/** Intelligently selects a section of text around the median occurrence of keywords */
|
15
|
+
const intelligentSelectByMedianSection = (text, keywords, limit) => {
|
16
|
+
if (!text || text.length <= limit)
|
17
|
+
return text;
|
18
|
+
if (!(keywords === null || keywords === void 0 ? void 0 : keywords.length))
|
19
|
+
return text.substring(0, limit);
|
20
|
+
// Find all keyword positions (case-insensitive, limited to first 1000 matches)
|
21
|
+
const positions = [];
|
22
|
+
const normalizedText = normalizeText(text);
|
23
|
+
for (const keyword of keywords) {
|
24
|
+
if (keyword.length < 3)
|
25
|
+
continue; // Skip very short keywords
|
26
|
+
let pos = -1;
|
27
|
+
const normalizedKeyword = normalizeText(keyword);
|
28
|
+
while ((pos = normalizedText.indexOf(normalizedKeyword, pos + 1)) !== -1 && positions.length < 1000) {
|
29
|
+
positions.push(pos);
|
30
|
+
}
|
31
|
+
}
|
32
|
+
if (!positions.length)
|
33
|
+
return text.substring(0, limit);
|
34
|
+
// Find median position (using sort for simplicity, still fast for 1000 elements)
|
35
|
+
positions.sort((a, b) => a - b);
|
36
|
+
const medianPos = positions[Math.floor(positions.length / 2)];
|
37
|
+
// Return window around median
|
38
|
+
const half = Math.floor(limit / 2);
|
39
|
+
const start = Math.max(0, medianPos - half);
|
40
|
+
const end = Math.min(text.length, start + limit);
|
41
|
+
return text.substring(start, end);
|
42
|
+
};
|
43
|
+
const searchDriveAndGetContentByKeywords = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
|
44
|
+
var _b;
|
45
|
+
if (!authParams.authToken) {
|
46
|
+
return { success: false, error: MISSING_AUTH_TOKEN, files: [] };
|
47
|
+
}
|
48
|
+
const { keywords, fileLimit, fileSizeLimit } = params;
|
49
|
+
let files = [];
|
50
|
+
// 1. Search for files and get their metadata
|
51
|
+
// Build the query: fullText contains 'keyword1' or fullText contains 'keyword2' ...
|
52
|
+
const query = keywords.map(kw => `fullText contains '${kw.replace(/'/g, "\\'")}'`).join(" or ");
|
53
|
+
const url = `https://www.googleapis.com/drive/v3/files?q=${encodeURIComponent(query)}&fields=files(id,name,mimeType,webViewLink)&supportsAllDrives=true&includeItemsFromAllDrives=true`;
|
54
|
+
try {
|
55
|
+
const res = yield axiosClient.get(url, {
|
56
|
+
headers: {
|
57
|
+
Authorization: `Bearer ${authParams.authToken}`,
|
58
|
+
},
|
59
|
+
});
|
60
|
+
files =
|
61
|
+
((_b = res.data.files) === null || _b === void 0 ? void 0 : _b.map((file) => ({
|
62
|
+
id: file.id,
|
63
|
+
name: file.name,
|
64
|
+
mimeType: file.mimeType,
|
65
|
+
url: file.webViewLink,
|
66
|
+
}))) || [];
|
67
|
+
}
|
68
|
+
catch (error) {
|
69
|
+
console.error("Error searching Google Drive", error);
|
70
|
+
return {
|
71
|
+
success: false,
|
72
|
+
error: error instanceof Error ? error.message : "Unknown error",
|
73
|
+
files: [],
|
74
|
+
};
|
75
|
+
}
|
76
|
+
files = fileLimit ? files.splice(0, fileLimit) : files;
|
77
|
+
// 2. Extract content from files and do some smart range selection
|
78
|
+
const processedFiles = yield Promise.all(files
|
79
|
+
.filter((file) => file.id && file.mimeType)
|
80
|
+
.map((file) => __awaiter(void 0, void 0, void 0, function* () {
|
81
|
+
const content = yield extractContentFromDriveFileId({
|
82
|
+
params: { fileId: file.id, mimeType: file.mimeType },
|
83
|
+
authParams,
|
84
|
+
});
|
85
|
+
if (content.success) {
|
86
|
+
let selectedContent = content.content;
|
87
|
+
if (fileSizeLimit && selectedContent && selectedContent.length > fileSizeLimit) {
|
88
|
+
selectedContent = intelligentSelectByMedianSection(selectedContent, keywords, fileSizeLimit);
|
89
|
+
}
|
90
|
+
return {
|
91
|
+
id: file.id || "",
|
92
|
+
name: file.name || "",
|
93
|
+
mimeType: file.mimeType || "",
|
94
|
+
url: file.url || "",
|
95
|
+
content: selectedContent,
|
96
|
+
};
|
97
|
+
}
|
98
|
+
else {
|
99
|
+
return {
|
100
|
+
id: file.id || "",
|
101
|
+
name: file.name || "",
|
102
|
+
mimeType: file.mimeType || "",
|
103
|
+
url: file.url || "",
|
104
|
+
error: content.error,
|
105
|
+
};
|
106
|
+
}
|
107
|
+
})));
|
108
|
+
return { success: true, files: processedFiles };
|
109
|
+
});
|
110
|
+
export default searchDriveAndGetContentByKeywords;
|
@@ -1,12 +1,11 @@
|
|
1
1
|
import type { AuthParamsType } from "../../../autogen/types.js";
|
2
2
|
export type getDriveFileContentParams = {
|
3
3
|
fileId: string;
|
4
|
-
|
4
|
+
mimeType: string;
|
5
5
|
};
|
6
6
|
export type getDriveFileContentOutput = {
|
7
7
|
success: boolean;
|
8
8
|
content?: string;
|
9
|
-
fileName?: string;
|
10
9
|
error?: string;
|
11
10
|
};
|
12
11
|
declare const extractContentFromDriveFileId: ({ params, authParams, }: {
|
@@ -9,31 +9,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
9
9
|
};
|
10
10
|
import { axiosClient } from "../../../util/axiosClient.js";
|
11
11
|
import mammoth from "mammoth";
|
12
|
-
import PDFParser from "pdf2json";
|
13
12
|
import { MISSING_AUTH_TOKEN } from "../../../util/missingAuthConstants.js";
|
14
13
|
const extractContentFromDriveFileId = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, authParams, }) {
|
15
14
|
if (!authParams.authToken) {
|
16
15
|
return { success: false, error: MISSING_AUTH_TOKEN };
|
17
16
|
}
|
18
|
-
const { fileId } = params;
|
17
|
+
const { fileId, mimeType } = params;
|
18
|
+
let content = "";
|
19
19
|
try {
|
20
|
-
// First, get file metadata to determine the file type
|
21
|
-
const metadataUrl = `https://www.googleapis.com/drive/v3/files/${encodeURIComponent(fileId)}?fields=name,mimeType,size`;
|
22
|
-
const metadataRes = yield axiosClient.get(metadataUrl, {
|
23
|
-
headers: {
|
24
|
-
Authorization: `Bearer ${authParams.authToken}`,
|
25
|
-
},
|
26
|
-
});
|
27
|
-
const { name: fileName, mimeType, size } = metadataRes.data;
|
28
|
-
// Check if file is too large (50MB limit for safety)
|
29
|
-
const maxFileSize = 50 * 1024 * 1024;
|
30
|
-
if (size && parseInt(size) > maxFileSize) {
|
31
|
-
return {
|
32
|
-
success: false,
|
33
|
-
error: "File too large (>50MB)",
|
34
|
-
};
|
35
|
-
}
|
36
|
-
let content = "";
|
37
20
|
// Handle different file types - read content directly
|
38
21
|
if (mimeType === "application/vnd.google-apps.document") {
|
39
22
|
// Google Docs - download as plain text
|
@@ -74,37 +57,10 @@ const extractContentFromDriveFileId = (_a) => __awaiter(void 0, [_a], void 0, fu
|
|
74
57
|
content = exportRes.data;
|
75
58
|
}
|
76
59
|
else if (mimeType === "application/pdf") {
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
Authorization: `Bearer ${authParams.authToken}`,
|
82
|
-
},
|
83
|
-
responseType: "arraybuffer",
|
84
|
-
});
|
85
|
-
try {
|
86
|
-
const pdfParser = new PDFParser(null); // null context, 1 for text extraction
|
87
|
-
// Create a promise to handle the async PDF parsing
|
88
|
-
const pdfContent = yield new Promise((resolve, reject) => {
|
89
|
-
pdfParser.on("pdfParser_dataError", errData => {
|
90
|
-
reject(new Error(`PDF parsing error: ${errData.parserError}`));
|
91
|
-
});
|
92
|
-
pdfParser.on("pdfParser_dataReady", pdfData => {
|
93
|
-
// Extract text from all pages
|
94
|
-
const textContent = pdfData.Pages.map(page => page.Texts.map(text => text.R.map(run => decodeURIComponent(run.T)).join("")).join("")).join("\n");
|
95
|
-
resolve(textContent);
|
96
|
-
});
|
97
|
-
// Parse the PDF buffer
|
98
|
-
pdfParser.parseBuffer(Buffer.from(downloadRes.data));
|
99
|
-
});
|
100
|
-
content = pdfContent;
|
101
|
-
}
|
102
|
-
catch (pdfError) {
|
103
|
-
return {
|
104
|
-
success: false,
|
105
|
-
error: `Failed to parse PDF: ${pdfError instanceof Error ? pdfError.message : "Unknown PDF error"}`,
|
106
|
-
};
|
107
|
-
}
|
60
|
+
return {
|
61
|
+
success: false,
|
62
|
+
error: "PDF files are not supported for text extraction",
|
63
|
+
};
|
108
64
|
}
|
109
65
|
else if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
|
110
66
|
mimeType === "application/msword") {
|
@@ -160,7 +116,6 @@ const extractContentFromDriveFileId = (_a) => __awaiter(void 0, [_a], void 0, fu
|
|
160
116
|
return {
|
161
117
|
success: true,
|
162
118
|
content,
|
163
|
-
fileName,
|
164
119
|
};
|
165
120
|
}
|
166
121
|
catch (error) {
|