@sybilion/uilib 1.3.14 → 1.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/components/ui/Chat/ChatChrome/ChatChrome.js +3 -0
- package/dist/esm/components/ui/Chat/ChatMessage/UserTextFileAttachmentBubble.js +9 -2
- package/dist/esm/components/ui/Chat/ChatPrompt/ChatPromptAttachments.js +7 -1
- package/dist/esm/components/ui/Chat/buildChatSendMessagePayload.js +4 -0
- package/dist/esm/components/ui/Chat/chatAttachmentAccept.js +20 -1
- package/dist/esm/components/ui/Chat/chatAttachmentExtract.js +11 -1
- package/dist/esm/components/ui/Chat/chatDocxExtract.js +17 -0
- package/dist/esm/components/ui/Chat/chatXlsxExtract.js +34 -0
- package/dist/esm/types/src/components/ui/Chat/Chat.types.d.ts +2 -2
- package/dist/esm/types/src/components/ui/Chat/chatAttachmentAccept.d.ts +3 -1
- package/dist/esm/types/src/components/ui/Chat/chatAttachmentAccept.test.d.ts +1 -0
- package/dist/esm/types/src/components/ui/Chat/chatDocxExtract.d.ts +2 -0
- package/dist/esm/types/src/components/ui/Chat/chatDocxExtract.test.d.ts +1 -0
- package/dist/esm/types/src/components/ui/Chat/chatXlsxExtract.d.ts +2 -0
- package/dist/esm/types/src/components/ui/Chat/chatXlsxExtract.test.d.ts +1 -0
- package/dist/esm/types/tests/setup.d.ts +1 -0
- package/package.json +4 -2
- package/src/components/ui/Chat/Chat.types.ts +2 -2
- package/src/components/ui/Chat/ChatChrome/ChatChrome.tsx +3 -0
- package/src/components/ui/Chat/ChatMessage/UserTextFileAttachmentBubble.tsx +6 -2
- package/src/components/ui/Chat/ChatPrompt/ChatPromptAttachments.tsx +9 -1
- package/src/components/ui/Chat/buildChatSendMessagePayload.test.ts +15 -1
- package/src/components/ui/Chat/buildChatSendMessagePayload.ts +2 -0
- package/src/components/ui/Chat/chatAttachmentAccept.test.ts +78 -0
- package/src/components/ui/Chat/chatAttachmentAccept.ts +25 -0
- package/src/components/ui/Chat/chatAttachmentExtract.ts +13 -1
- package/src/components/ui/Chat/chatDocxExtract.test.ts +40 -0
- package/src/components/ui/Chat/chatDocxExtract.ts +19 -0
- package/src/components/ui/Chat/chatXlsxExtract.test.ts +72 -0
- package/src/components/ui/Chat/chatXlsxExtract.ts +43 -0
- package/src/docs/pages/ChatAttachmentsDropzonePage.tsx +14 -20
|
@@ -30,6 +30,9 @@ function ChatChrome({ showResizeHandle, resizeHandle, onClose, isEmpty, renderPr
|
|
|
30
30
|
if (items.length > 0) {
|
|
31
31
|
setPendingAttachments(prev => [...prev, ...items]);
|
|
32
32
|
}
|
|
33
|
+
})
|
|
34
|
+
.catch(() => {
|
|
35
|
+
// Extraction failed (parse error, size limit, etc.); skip staging.
|
|
33
36
|
})
|
|
34
37
|
.finally(() => setIsExtractingAttachments(false));
|
|
35
38
|
}, [allowPdfAttachments, promptBusy]);
|
|
@@ -8,6 +8,8 @@ function formatFromFilename(filename) {
|
|
|
8
8
|
return 'csv';
|
|
9
9
|
if (lower.endsWith('.pdf'))
|
|
10
10
|
return 'pdf';
|
|
11
|
+
if (lower.endsWith('.xlsx'))
|
|
12
|
+
return 'text';
|
|
11
13
|
return 'text';
|
|
12
14
|
}
|
|
13
15
|
function mimeForFormat(format) {
|
|
@@ -17,16 +19,21 @@ function mimeForFormat(format) {
|
|
|
17
19
|
return 'application/pdf';
|
|
18
20
|
return 'text/plain;charset=utf-8';
|
|
19
21
|
}
|
|
20
|
-
function hintForFormat(format) {
|
|
22
|
+
function hintForFormat(format, filename) {
|
|
23
|
+
const lower = filename.toLowerCase();
|
|
21
24
|
if (format === 'csv')
|
|
22
25
|
return 'Download .CSV file';
|
|
23
26
|
if (format === 'pdf')
|
|
24
27
|
return 'Download file';
|
|
28
|
+
if (lower.endsWith('.docx'))
|
|
29
|
+
return 'Download Word document';
|
|
30
|
+
if (lower.endsWith('.xlsx'))
|
|
31
|
+
return 'Download spreadsheet';
|
|
25
32
|
return 'Download text file';
|
|
26
33
|
}
|
|
27
34
|
function UserTextFileAttachmentBubble({ attachment, }) {
|
|
28
35
|
const format = formatFromFilename(attachment.filename);
|
|
29
|
-
return (jsx(FileChip, { name: attachment.displayName, format: format, hint: hintForFormat(format), onClick: () => downloadTextFile(attachment.content, attachment.filename, mimeForFormat(format)) }));
|
|
36
|
+
return (jsx(FileChip, { name: attachment.displayName, format: format, hint: hintForFormat(format, attachment.filename), onClick: () => downloadTextFile(attachment.content, attachment.filename, mimeForFormat(format)) }));
|
|
30
37
|
}
|
|
31
38
|
|
|
32
39
|
export { UserTextFileAttachmentBubble };
|
|
@@ -5,7 +5,13 @@ import S from './ChatPrompt.styl.js';
|
|
|
5
5
|
function ChatPromptAttachments({ attachments, onRemove, disabled = false, }) {
|
|
6
6
|
if (attachments.length === 0)
|
|
7
7
|
return null;
|
|
8
|
-
return (jsx("div", { className: S.attachments, children: attachments.map((item, index) => (jsx(FileChip, { className: S.attachmentItem, name: item.file.name, format: item.kind === 'pdf' ? 'pdf' : 'text', hint: item.kind === 'pdf'
|
|
8
|
+
return (jsx("div", { className: S.attachments, children: attachments.map((item, index) => (jsx(FileChip, { className: S.attachmentItem, name: item.file.name, format: item.kind === 'pdf' ? 'pdf' : 'text', hint: item.kind === 'pdf'
|
|
9
|
+
? 'PDF'
|
|
10
|
+
: item.kind === 'docx'
|
|
11
|
+
? 'Word document'
|
|
12
|
+
: item.kind === 'xlsx'
|
|
13
|
+
? 'Spreadsheet'
|
|
14
|
+
: 'Text file', onRemove: () => onRemove(index), disabled: disabled }, `${item.file.name}-${index}`))) }));
|
|
9
15
|
}
|
|
10
16
|
|
|
11
17
|
export { ChatPromptAttachments };
|
|
@@ -4,6 +4,10 @@ function defaultExtForAttachment(item) {
|
|
|
4
4
|
const name = item.file.name.toLowerCase();
|
|
5
5
|
if (item.kind === 'pdf' || name.endsWith('.pdf'))
|
|
6
6
|
return 'pdf';
|
|
7
|
+
if (item.kind === 'docx' || name.endsWith('.docx'))
|
|
8
|
+
return 'docx';
|
|
9
|
+
if (item.kind === 'xlsx' || name.endsWith('.xlsx'))
|
|
10
|
+
return 'xlsx';
|
|
7
11
|
if (name.endsWith('.csv'))
|
|
8
12
|
return 'csv';
|
|
9
13
|
if (name.endsWith('.json'))
|
|
@@ -24,6 +24,10 @@ const TEXT_ATTACHMENT_ACCEPT_PARTS = [
|
|
|
24
24
|
'.tsv',
|
|
25
25
|
'text/calendar',
|
|
26
26
|
'.ics',
|
|
27
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
28
|
+
'.docx',
|
|
29
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
30
|
+
'.xlsx',
|
|
27
31
|
];
|
|
28
32
|
const PDF_ATTACHMENT_ACCEPT_PARTS = ['application/pdf', '.pdf'];
|
|
29
33
|
const TEXT_ATTACHMENT_ACCEPT_SET = new Set(TEXT_ATTACHMENT_ACCEPT_PARTS.map(part => part.toLowerCase()));
|
|
@@ -46,9 +50,24 @@ function isPdfFile(file) {
|
|
|
46
50
|
return true;
|
|
47
51
|
return file.name.toLowerCase().endsWith('.pdf');
|
|
48
52
|
}
|
|
53
|
+
function isDocxFile(file) {
|
|
54
|
+
const type = file.type.toLowerCase();
|
|
55
|
+
if (type ===
|
|
56
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
return file.name.toLowerCase().endsWith('.docx');
|
|
60
|
+
}
|
|
61
|
+
function isXlsxFile(file) {
|
|
62
|
+
const type = file.type.toLowerCase();
|
|
63
|
+
if (type === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') {
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
66
|
+
return file.name.toLowerCase().endsWith('.xlsx');
|
|
67
|
+
}
|
|
49
68
|
function isAttachmentsDropzoneEnabled(allowedAttachments, allowPdfAttachments) {
|
|
50
69
|
return (filterToTextAttachments(allowedAttachments).length > 0 ||
|
|
51
70
|
Boolean(allowPdfAttachments));
|
|
52
71
|
}
|
|
53
72
|
|
|
54
|
-
export { PDF_ATTACHMENT_ACCEPT_PARTS, TEXT_ATTACHMENT_ACCEPT_PARTS, buildAcceptAttr, filterToTextAttachments, isAttachmentsDropzoneEnabled, isPdfFile };
|
|
73
|
+
export { PDF_ATTACHMENT_ACCEPT_PARTS, TEXT_ATTACHMENT_ACCEPT_PARTS, buildAcceptAttr, filterToTextAttachments, isAttachmentsDropzoneEnabled, isDocxFile, isPdfFile, isXlsxFile };
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import { isPdfFile } from './chatAttachmentAccept.js';
|
|
1
|
+
import { isPdfFile, isDocxFile, isXlsxFile } from './chatAttachmentAccept.js';
|
|
2
|
+
import { extractDocxFileToText } from './chatDocxExtract.js';
|
|
2
3
|
import { extractPdfFileToText } from './chatPdfExtract.js';
|
|
4
|
+
import { extractXlsxFileToText } from './chatXlsxExtract.js';
|
|
3
5
|
|
|
4
6
|
function readTextFile(file) {
|
|
5
7
|
return new Promise((resolve, reject) => {
|
|
@@ -17,6 +19,14 @@ async function extractChatAttachmentItems(files, allowPdfAttachments) {
|
|
|
17
19
|
const text = await extractPdfFileToText(file);
|
|
18
20
|
return { file, text, kind: 'pdf' };
|
|
19
21
|
}
|
|
22
|
+
if (isDocxFile(file)) {
|
|
23
|
+
const text = await extractDocxFileToText(file);
|
|
24
|
+
return { file, text, kind: 'docx' };
|
|
25
|
+
}
|
|
26
|
+
if (isXlsxFile(file)) {
|
|
27
|
+
const text = await extractXlsxFileToText(file);
|
|
28
|
+
return { file, text, kind: 'xlsx' };
|
|
29
|
+
}
|
|
20
30
|
const text = await readTextFile(file);
|
|
21
31
|
return { file, text, kind: 'text' };
|
|
22
32
|
}));
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/** Best-effort plain text from DOCX via mammoth (loaded on demand). */
|
|
2
|
+
async function extractDocxFileToText(file) {
|
|
3
|
+
const mammoth = await import('mammoth');
|
|
4
|
+
const result = await mammoth.extractRawText({
|
|
5
|
+
arrayBuffer: await file.arrayBuffer(),
|
|
6
|
+
});
|
|
7
|
+
const errors = result.messages.filter(m => m.type === 'error');
|
|
8
|
+
if (errors.length > 0) {
|
|
9
|
+
const detail = errors.map(m => m.message).join('; ');
|
|
10
|
+
throw new Error(detail
|
|
11
|
+
? `Failed to read ${file.name}: ${detail}`
|
|
12
|
+
: `Failed to read ${file.name}`);
|
|
13
|
+
}
|
|
14
|
+
return result.value.trim();
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export { extractDocxFileToText };
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
const MAX_FILE_BYTES = 10 * 1024 * 1024;
|
|
2
|
+
const MAX_SHEETS = 20;
|
|
3
|
+
const MAX_CSV_CHARS_PER_SHEET = 500_000;
|
|
4
|
+
function truncateCsv(csv) {
|
|
5
|
+
if (csv.length <= MAX_CSV_CHARS_PER_SHEET)
|
|
6
|
+
return csv;
|
|
7
|
+
return `${csv.slice(0, MAX_CSV_CHARS_PER_SHEET).trimEnd()}…`;
|
|
8
|
+
}
|
|
9
|
+
/** Best-effort plain text from XLSX; one CSV block per sheet (xlsx loaded on demand). */
|
|
10
|
+
async function extractXlsxFileToText(file) {
|
|
11
|
+
const buffer = new Uint8Array(await file.arrayBuffer());
|
|
12
|
+
if (buffer.byteLength > MAX_FILE_BYTES) {
|
|
13
|
+
throw new Error(`${file.name} is too large (max ${MAX_FILE_BYTES / (1024 * 1024)} MB)`);
|
|
14
|
+
}
|
|
15
|
+
const XLSX = await import('xlsx');
|
|
16
|
+
const workbook = XLSX.read(buffer, { type: 'array' });
|
|
17
|
+
const sheetNames = workbook.SheetNames.slice(0, MAX_SHEETS);
|
|
18
|
+
const sheetTexts = [];
|
|
19
|
+
for (const sheetName of sheetNames) {
|
|
20
|
+
const sheet = workbook.Sheets[sheetName];
|
|
21
|
+
if (!sheet)
|
|
22
|
+
continue;
|
|
23
|
+
const csv = truncateCsv(XLSX.utils.sheet_to_csv(sheet, { blankrows: false }).trim());
|
|
24
|
+
if (csv) {
|
|
25
|
+
sheetTexts.push(`## Sheet ${sheetName}\n\n${csv}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
if (workbook.SheetNames.length > MAX_SHEETS) {
|
|
29
|
+
sheetTexts.push(`_(Only the first ${MAX_SHEETS} of ${workbook.SheetNames.length} sheets were included.)_`);
|
|
30
|
+
}
|
|
31
|
+
return sheetTexts.join('\n\n');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export { extractXlsxFileToText };
|
|
@@ -59,9 +59,9 @@ export type ScriptCompletePayload = {
|
|
|
59
59
|
};
|
|
60
60
|
export type ChatAttachmentDropItem = {
|
|
61
61
|
file: File;
|
|
62
|
-
/** UTF-8 text for native text files; PDF
|
|
62
|
+
/** UTF-8 text for native text files; PDF/DOCX/XLSX yield extracted text. */
|
|
63
63
|
text: string;
|
|
64
|
-
kind: 'text' | 'pdf';
|
|
64
|
+
kind: 'text' | 'pdf' | 'docx' | 'xlsx';
|
|
65
65
|
};
|
|
66
66
|
export interface ChatPromptProps {
|
|
67
67
|
className?: string;
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
/** MIME types and extensions accepted for chat text attachments. */
|
|
2
|
-
export declare const TEXT_ATTACHMENT_ACCEPT_PARTS: readonly ["text/plain", ".txt", "text/csv", ".csv", "text/markdown", ".md", ".markdown", "application/json", ".json", "text/html", ".html", ".htm", "text/xml", "application/xml", ".xml", "text/yaml", "application/yaml", "application/x-yaml", ".yaml", ".yml", "text/tab-separated-values", ".tsv", "text/calendar", ".ics"];
|
|
2
|
+
export declare const TEXT_ATTACHMENT_ACCEPT_PARTS: readonly ["text/plain", ".txt", "text/csv", ".csv", "text/markdown", ".md", ".markdown", "application/json", ".json", "text/html", ".html", ".htm", "text/xml", "application/xml", ".xml", "text/yaml", "application/yaml", "application/x-yaml", ".yaml", ".yml", "text/tab-separated-values", ".tsv", "text/calendar", ".ics", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx"];
|
|
3
3
|
export declare const PDF_ATTACHMENT_ACCEPT_PARTS: readonly ["application/pdf", ".pdf"];
|
|
4
4
|
/** Keep only tokens from `parts` that appear in the text attachment allowlist. */
|
|
5
5
|
export declare function filterToTextAttachments(parts: readonly string[] | undefined): string[];
|
|
6
6
|
export declare function buildAcceptAttr(filteredTextParts: readonly string[], allowPdf: boolean): string;
|
|
7
7
|
export declare function isPdfFile(file: File): boolean;
|
|
8
|
+
export declare function isDocxFile(file: File): boolean;
|
|
9
|
+
export declare function isXlsxFile(file: File): boolean;
|
|
8
10
|
export declare function isAttachmentsDropzoneEnabled(allowedAttachments: readonly string[] | undefined, allowPdfAttachments: boolean | undefined): boolean;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sybilion/uilib",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.15",
|
|
4
4
|
"description": "Sybilion Design System — React UI components (Webpack + Stylus)",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public",
|
|
@@ -102,6 +102,7 @@
|
|
|
102
102
|
"classnames": "^2.3.2",
|
|
103
103
|
"lightweight-charts": "^5.0.9",
|
|
104
104
|
"lucide-react": "^0.546.0",
|
|
105
|
+
"mammoth": "^1.9.0",
|
|
105
106
|
"motion": "^12.23.12",
|
|
106
107
|
"pdfjs-dist": "^4.10.38",
|
|
107
108
|
"recharts": "^3.2.1",
|
|
@@ -109,7 +110,8 @@
|
|
|
109
110
|
"style-inject": "^0.3.0",
|
|
110
111
|
"tailwindcss": "^4.2.2",
|
|
111
112
|
"tslib": "^2.8.1",
|
|
112
|
-
"vaul": "^1.1.2"
|
|
113
|
+
"vaul": "^1.1.2",
|
|
114
|
+
"xlsx": "^0.18.5"
|
|
113
115
|
},
|
|
114
116
|
"peerDependencies": {
|
|
115
117
|
"@auth0/auth0-react": "^2.3.1",
|
|
@@ -69,9 +69,9 @@ export type ScriptCompletePayload = {
|
|
|
69
69
|
|
|
70
70
|
export type ChatAttachmentDropItem = {
|
|
71
71
|
file: File;
|
|
72
|
-
/** UTF-8 text for native text files; PDF
|
|
72
|
+
/** UTF-8 text for native text files; PDF/DOCX/XLSX yield extracted text. */
|
|
73
73
|
text: string;
|
|
74
|
-
kind: 'text' | 'pdf';
|
|
74
|
+
kind: 'text' | 'pdf' | 'docx' | 'xlsx';
|
|
75
75
|
};
|
|
76
76
|
|
|
77
77
|
export interface ChatPromptProps {
|
|
@@ -86,6 +86,9 @@ export function ChatChrome({
|
|
|
86
86
|
setPendingAttachments(prev => [...prev, ...items]);
|
|
87
87
|
}
|
|
88
88
|
})
|
|
89
|
+
.catch(() => {
|
|
90
|
+
// Extraction failed (parse error, size limit, etc.); skip staging.
|
|
91
|
+
})
|
|
89
92
|
.finally(() => setIsExtractingAttachments(false));
|
|
90
93
|
},
|
|
91
94
|
[allowPdfAttachments, promptBusy],
|
|
@@ -7,6 +7,7 @@ function formatFromFilename(filename: string): FileChipFormat {
|
|
|
7
7
|
const lower = filename.toLowerCase();
|
|
8
8
|
if (lower.endsWith('.csv')) return 'csv';
|
|
9
9
|
if (lower.endsWith('.pdf')) return 'pdf';
|
|
10
|
+
if (lower.endsWith('.xlsx')) return 'text';
|
|
10
11
|
return 'text';
|
|
11
12
|
}
|
|
12
13
|
|
|
@@ -16,9 +17,12 @@ function mimeForFormat(format: FileChipFormat): string {
|
|
|
16
17
|
return 'text/plain;charset=utf-8';
|
|
17
18
|
}
|
|
18
19
|
|
|
19
|
-
function hintForFormat(format: FileChipFormat): string {
|
|
20
|
+
function hintForFormat(format: FileChipFormat, filename: string): string {
|
|
21
|
+
const lower = filename.toLowerCase();
|
|
20
22
|
if (format === 'csv') return 'Download .CSV file';
|
|
21
23
|
if (format === 'pdf') return 'Download file';
|
|
24
|
+
if (lower.endsWith('.docx')) return 'Download Word document';
|
|
25
|
+
if (lower.endsWith('.xlsx')) return 'Download spreadsheet';
|
|
22
26
|
return 'Download text file';
|
|
23
27
|
}
|
|
24
28
|
|
|
@@ -33,7 +37,7 @@ export function UserTextFileAttachmentBubble({
|
|
|
33
37
|
<FileChip
|
|
34
38
|
name={attachment.displayName}
|
|
35
39
|
format={format}
|
|
36
|
-
hint={hintForFormat(format)}
|
|
40
|
+
hint={hintForFormat(format, attachment.filename)}
|
|
37
41
|
onClick={() =>
|
|
38
42
|
downloadTextFile(
|
|
39
43
|
attachment.content,
|
|
@@ -24,7 +24,15 @@ export function ChatPromptAttachments({
|
|
|
24
24
|
className={S.attachmentItem}
|
|
25
25
|
name={item.file.name}
|
|
26
26
|
format={item.kind === 'pdf' ? 'pdf' : 'text'}
|
|
27
|
-
hint={
|
|
27
|
+
hint={
|
|
28
|
+
item.kind === 'pdf'
|
|
29
|
+
? 'PDF'
|
|
30
|
+
: item.kind === 'docx'
|
|
31
|
+
? 'Word document'
|
|
32
|
+
: item.kind === 'xlsx'
|
|
33
|
+
? 'Spreadsheet'
|
|
34
|
+
: 'Text file'
|
|
35
|
+
}
|
|
28
36
|
onRemove={() => onRemove(index)}
|
|
29
37
|
disabled={disabled}
|
|
30
38
|
/>
|
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
function makeDropItem(
|
|
9
9
|
name: string,
|
|
10
10
|
text: string,
|
|
11
|
-
kind: '
|
|
11
|
+
kind: ChatAttachmentDropItem['kind'] = 'text',
|
|
12
12
|
): ChatAttachmentDropItem {
|
|
13
13
|
return {
|
|
14
14
|
file: { name } as File,
|
|
@@ -50,6 +50,20 @@ describe('buildChatSendMessagePayload', () => {
|
|
|
50
50
|
expect(result.userTextFileAttachments?.[0].filename).toMatch(/\.pdf$/i);
|
|
51
51
|
});
|
|
52
52
|
|
|
53
|
+
it('uses docx and xlsx extensions from kind', () => {
|
|
54
|
+
const docx = buildChatSendMessagePayload('', [
|
|
55
|
+
makeDropItem('brief.docx', 'word body', 'docx'),
|
|
56
|
+
]);
|
|
57
|
+
const xlsx = buildChatSendMessagePayload('', [
|
|
58
|
+
makeDropItem('sheet.xlsx', 'a,b', 'xlsx'),
|
|
59
|
+
]);
|
|
60
|
+
if (typeof docx === 'string' || typeof xlsx === 'string') {
|
|
61
|
+
throw new Error('expected payload object');
|
|
62
|
+
}
|
|
63
|
+
expect(docx.userTextFileAttachments?.[0].filename).toMatch(/\.docx$/i);
|
|
64
|
+
expect(xlsx.userTextFileAttachments?.[0].filename).toMatch(/\.xlsx$/i);
|
|
65
|
+
});
|
|
66
|
+
|
|
53
67
|
it('maps multiple attachments', () => {
|
|
54
68
|
const result = buildChatSendMessagePayload('Hi', [
|
|
55
69
|
makeDropItem('one.txt', 'first'),
|
|
@@ -8,6 +8,8 @@ import { sanitizeAttachmentFilename } from './sanitizeAttachmentFilename';
|
|
|
8
8
|
function defaultExtForAttachment(item: ChatAttachmentDropItem): string {
|
|
9
9
|
const name = item.file.name.toLowerCase();
|
|
10
10
|
if (item.kind === 'pdf' || name.endsWith('.pdf')) return 'pdf';
|
|
11
|
+
if (item.kind === 'docx' || name.endsWith('.docx')) return 'docx';
|
|
12
|
+
if (item.kind === 'xlsx' || name.endsWith('.xlsx')) return 'xlsx';
|
|
11
13
|
if (name.endsWith('.csv')) return 'csv';
|
|
12
14
|
if (name.endsWith('.json')) return 'json';
|
|
13
15
|
if (name.endsWith('.md') || name.endsWith('.markdown')) return 'md';
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TEXT_ATTACHMENT_ACCEPT_PARTS,
|
|
3
|
+
buildAcceptAttr,
|
|
4
|
+
filterToTextAttachments,
|
|
5
|
+
isDocxFile,
|
|
6
|
+
isPdfFile,
|
|
7
|
+
isXlsxFile,
|
|
8
|
+
} from './chatAttachmentAccept';
|
|
9
|
+
|
|
10
|
+
function makeFile(name: string, type = ''): File {
|
|
11
|
+
return { name, type } as File;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
describe('TEXT_ATTACHMENT_ACCEPT_PARTS', () => {
|
|
15
|
+
it('includes docx and xlsx MIME types and extensions', () => {
|
|
16
|
+
expect(TEXT_ATTACHMENT_ACCEPT_PARTS).toContain('.docx');
|
|
17
|
+
expect(TEXT_ATTACHMENT_ACCEPT_PARTS).toContain('.xlsx');
|
|
18
|
+
expect(TEXT_ATTACHMENT_ACCEPT_PARTS).toContain(
|
|
19
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
20
|
+
);
|
|
21
|
+
expect(TEXT_ATTACHMENT_ACCEPT_PARTS).toContain(
|
|
22
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
23
|
+
);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe('isDocxFile', () => {
|
|
28
|
+
it('detects by extension and MIME type', () => {
|
|
29
|
+
expect(
|
|
30
|
+
isDocxFile(
|
|
31
|
+
makeFile(
|
|
32
|
+
'notes.docx',
|
|
33
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
34
|
+
),
|
|
35
|
+
),
|
|
36
|
+
).toBe(true);
|
|
37
|
+
expect(isDocxFile(makeFile('notes.docx'))).toBe(true);
|
|
38
|
+
expect(isDocxFile(makeFile('notes.txt'))).toBe(false);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
describe('isXlsxFile', () => {
|
|
43
|
+
it('detects by extension and MIME type', () => {
|
|
44
|
+
expect(
|
|
45
|
+
isXlsxFile(
|
|
46
|
+
makeFile(
|
|
47
|
+
'data.xlsx',
|
|
48
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
49
|
+
),
|
|
50
|
+
),
|
|
51
|
+
).toBe(true);
|
|
52
|
+
expect(isXlsxFile(makeFile('data.xlsx'))).toBe(true);
|
|
53
|
+
expect(isXlsxFile(makeFile('data.csv'))).toBe(false);
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe('isPdfFile', () => {
|
|
58
|
+
it('does not treat docx or xlsx as pdf', () => {
|
|
59
|
+
expect(isPdfFile(makeFile('file.docx'))).toBe(false);
|
|
60
|
+
expect(isPdfFile(makeFile('file.xlsx'))).toBe(false);
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
describe('filterToTextAttachments', () => {
|
|
65
|
+
it('keeps docx and xlsx tokens from the allowlist', () => {
|
|
66
|
+
expect(
|
|
67
|
+
filterToTextAttachments(['.docx', '.xlsx', 'application/pdf']),
|
|
68
|
+
).toEqual(['.docx', '.xlsx']);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
describe('buildAcceptAttr', () => {
|
|
73
|
+
it('includes filtered text parts and optional pdf', () => {
|
|
74
|
+
expect(buildAcceptAttr(['.docx', '.txt'], false)).toBe('.docx,.txt');
|
|
75
|
+
expect(buildAcceptAttr(['.docx'], true)).toContain('.docx');
|
|
76
|
+
expect(buildAcceptAttr(['.docx'], true)).toContain('.pdf');
|
|
77
|
+
});
|
|
78
|
+
});
|
|
@@ -24,6 +24,10 @@ export const TEXT_ATTACHMENT_ACCEPT_PARTS = [
|
|
|
24
24
|
'.tsv',
|
|
25
25
|
'text/calendar',
|
|
26
26
|
'.ics',
|
|
27
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
28
|
+
'.docx',
|
|
29
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
30
|
+
'.xlsx',
|
|
27
31
|
] as const;
|
|
28
32
|
|
|
29
33
|
export const PDF_ATTACHMENT_ACCEPT_PARTS = ['application/pdf', '.pdf'] as const;
|
|
@@ -59,6 +63,27 @@ export function isPdfFile(file: File): boolean {
|
|
|
59
63
|
return file.name.toLowerCase().endsWith('.pdf');
|
|
60
64
|
}
|
|
61
65
|
|
|
66
|
+
export function isDocxFile(file: File): boolean {
|
|
67
|
+
const type = file.type.toLowerCase();
|
|
68
|
+
if (
|
|
69
|
+
type ===
|
|
70
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
|
71
|
+
) {
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
return file.name.toLowerCase().endsWith('.docx');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function isXlsxFile(file: File): boolean {
|
|
78
|
+
const type = file.type.toLowerCase();
|
|
79
|
+
if (
|
|
80
|
+
type === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
|
81
|
+
) {
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
return file.name.toLowerCase().endsWith('.xlsx');
|
|
85
|
+
}
|
|
86
|
+
|
|
62
87
|
export function isAttachmentsDropzoneEnabled(
|
|
63
88
|
allowedAttachments: readonly string[] | undefined,
|
|
64
89
|
allowPdfAttachments: boolean | undefined,
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import type { ChatAttachmentDropItem } from './Chat.types';
|
|
2
|
-
import { isPdfFile } from './chatAttachmentAccept';
|
|
2
|
+
import { isDocxFile, isPdfFile, isXlsxFile } from './chatAttachmentAccept';
|
|
3
|
+
import { extractDocxFileToText } from './chatDocxExtract';
|
|
3
4
|
import { extractPdfFileToText } from './chatPdfExtract';
|
|
5
|
+
import { extractXlsxFileToText } from './chatXlsxExtract';
|
|
4
6
|
|
|
5
7
|
function readTextFile(file: File): Promise<string> {
|
|
6
8
|
return new Promise((resolve, reject) => {
|
|
@@ -24,6 +26,16 @@ export async function extractChatAttachmentItems(
|
|
|
24
26
|
return { file, text, kind: 'pdf' as const };
|
|
25
27
|
}
|
|
26
28
|
|
|
29
|
+
if (isDocxFile(file)) {
|
|
30
|
+
const text = await extractDocxFileToText(file);
|
|
31
|
+
return { file, text, kind: 'docx' as const };
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (isXlsxFile(file)) {
|
|
35
|
+
const text = await extractXlsxFileToText(file);
|
|
36
|
+
return { file, text, kind: 'xlsx' as const };
|
|
37
|
+
}
|
|
38
|
+
|
|
27
39
|
const text = await readTextFile(file);
|
|
28
40
|
return { file, text, kind: 'text' as const };
|
|
29
41
|
}),
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { extractDocxFileToText } from './chatDocxExtract';
|
|
2
|
+
|
|
3
|
+
const extractRawText = jest.fn();
|
|
4
|
+
|
|
5
|
+
jest.mock('mammoth', () => ({
|
|
6
|
+
extractRawText: (...args: unknown[]) => extractRawText(...args),
|
|
7
|
+
}));
|
|
8
|
+
|
|
9
|
+
describe('extractDocxFileToText', () => {
|
|
10
|
+
beforeEach(() => {
|
|
11
|
+
extractRawText.mockReset();
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it('returns trimmed text from mammoth', async () => {
|
|
15
|
+
extractRawText.mockResolvedValue({
|
|
16
|
+
value: ' Hello from Word ',
|
|
17
|
+
messages: [],
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
const file = new File([new Uint8Array(8)], 'doc.docx', {
|
|
21
|
+
type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
await expect(extractDocxFileToText(file)).resolves.toBe('Hello from Word');
|
|
25
|
+
expect(extractRawText).toHaveBeenCalledTimes(1);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('throws when mammoth reports errors', async () => {
|
|
29
|
+
extractRawText.mockResolvedValue({
|
|
30
|
+
value: '',
|
|
31
|
+
messages: [{ type: 'error', message: 'corrupt file' }],
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const file = new File([new Uint8Array(8)], 'bad.docx', {
|
|
35
|
+
type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
await expect(extractDocxFileToText(file)).rejects.toThrow(/corrupt file/i);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/** Best-effort plain text from DOCX via mammoth (loaded on demand). */
|
|
2
|
+
export async function extractDocxFileToText(file: File): Promise<string> {
|
|
3
|
+
const mammoth = await import('mammoth');
|
|
4
|
+
const result = await mammoth.extractRawText({
|
|
5
|
+
arrayBuffer: await file.arrayBuffer(),
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
const errors = result.messages.filter(m => m.type === 'error');
|
|
9
|
+
if (errors.length > 0) {
|
|
10
|
+
const detail = errors.map(m => m.message).join('; ');
|
|
11
|
+
throw new Error(
|
|
12
|
+
detail
|
|
13
|
+
? `Failed to read ${file.name}: ${detail}`
|
|
14
|
+
: `Failed to read ${file.name}`,
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return result.value.trim();
|
|
19
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import * as XLSX from 'xlsx';
|
|
2
|
+
|
|
3
|
+
import { extractXlsxFileToText } from './chatXlsxExtract';
|
|
4
|
+
|
|
5
|
+
function makeXlsxFile(
|
|
6
|
+
sheets: Record<string, (string | number)[][]>,
|
|
7
|
+
name = 'test.xlsx',
|
|
8
|
+
): File {
|
|
9
|
+
const workbook = XLSX.utils.book_new();
|
|
10
|
+
for (const [sheetName, rows] of Object.entries(sheets)) {
|
|
11
|
+
XLSX.utils.book_append_sheet(
|
|
12
|
+
workbook,
|
|
13
|
+
XLSX.utils.aoa_to_sheet(rows),
|
|
14
|
+
sheetName,
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
const buffer = new Uint8Array(
|
|
18
|
+
XLSX.write(workbook, { type: 'array', bookType: 'xlsx' }),
|
|
19
|
+
);
|
|
20
|
+
return new File([buffer], name, {
|
|
21
|
+
type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
describe('extractXlsxFileToText', () => {
|
|
26
|
+
it('extracts non-empty sheets as CSV with headings', async () => {
|
|
27
|
+
const file = makeXlsxFile({
|
|
28
|
+
Data: [
|
|
29
|
+
['name', 'value'],
|
|
30
|
+
['a', 1],
|
|
31
|
+
],
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const text = await extractXlsxFileToText(file);
|
|
35
|
+
expect(text).toContain('## Sheet Data');
|
|
36
|
+
expect(text).toContain('name,value');
|
|
37
|
+
expect(text).toContain('a,1');
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('skips empty sheets', async () => {
|
|
41
|
+
const workbook = XLSX.utils.book_new();
|
|
42
|
+
XLSX.utils.book_append_sheet(
|
|
43
|
+
workbook,
|
|
44
|
+
XLSX.utils.aoa_to_sheet([]),
|
|
45
|
+
'Empty',
|
|
46
|
+
);
|
|
47
|
+
XLSX.utils.book_append_sheet(
|
|
48
|
+
workbook,
|
|
49
|
+
XLSX.utils.aoa_to_sheet([['x']]),
|
|
50
|
+
'Filled',
|
|
51
|
+
);
|
|
52
|
+
const buffer = new Uint8Array(
|
|
53
|
+
XLSX.write(workbook, { type: 'array', bookType: 'xlsx' }),
|
|
54
|
+
);
|
|
55
|
+
const file = new File([buffer], 'mixed.xlsx', {
|
|
56
|
+
type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const text = await extractXlsxFileToText(file);
|
|
60
|
+
expect(text).not.toContain('## Sheet Empty');
|
|
61
|
+
expect(text).toContain('## Sheet Filled');
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('rejects files over the size limit', async () => {
|
|
65
|
+
const huge = new Uint8Array(10 * 1024 * 1024 + 1);
|
|
66
|
+
const file = new File([huge], 'huge.xlsx', {
|
|
67
|
+
type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
await expect(extractXlsxFileToText(file)).rejects.toThrow(/too large/i);
|
|
71
|
+
});
|
|
72
|
+
});
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
const MAX_FILE_BYTES = 10 * 1024 * 1024;
|
|
2
|
+
const MAX_SHEETS = 20;
|
|
3
|
+
const MAX_CSV_CHARS_PER_SHEET = 500_000;
|
|
4
|
+
|
|
5
|
+
function truncateCsv(csv: string): string {
|
|
6
|
+
if (csv.length <= MAX_CSV_CHARS_PER_SHEET) return csv;
|
|
7
|
+
return `${csv.slice(0, MAX_CSV_CHARS_PER_SHEET).trimEnd()}…`;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/** Best-effort plain text from XLSX; one CSV block per sheet (xlsx loaded on demand). */
|
|
11
|
+
export async function extractXlsxFileToText(file: File): Promise<string> {
|
|
12
|
+
const buffer = new Uint8Array(await file.arrayBuffer());
|
|
13
|
+
if (buffer.byteLength > MAX_FILE_BYTES) {
|
|
14
|
+
throw new Error(
|
|
15
|
+
`${file.name} is too large (max ${MAX_FILE_BYTES / (1024 * 1024)} MB)`,
|
|
16
|
+
);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const XLSX = await import('xlsx');
|
|
20
|
+
const workbook = XLSX.read(buffer, { type: 'array' });
|
|
21
|
+
const sheetNames = workbook.SheetNames.slice(0, MAX_SHEETS);
|
|
22
|
+
const sheetTexts: string[] = [];
|
|
23
|
+
|
|
24
|
+
for (const sheetName of sheetNames) {
|
|
25
|
+
const sheet = workbook.Sheets[sheetName];
|
|
26
|
+
if (!sheet) continue;
|
|
27
|
+
|
|
28
|
+
const csv = truncateCsv(
|
|
29
|
+
XLSX.utils.sheet_to_csv(sheet, { blankrows: false }).trim(),
|
|
30
|
+
);
|
|
31
|
+
if (csv) {
|
|
32
|
+
sheetTexts.push(`## Sheet ${sheetName}\n\n${csv}`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (workbook.SheetNames.length > MAX_SHEETS) {
|
|
37
|
+
sheetTexts.push(
|
|
38
|
+
`_(Only the first ${MAX_SHEETS} of ${workbook.SheetNames.length} sheets were included.)_`,
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return sheetTexts.join('\n\n');
|
|
43
|
+
}
|
|
@@ -97,16 +97,16 @@ export default function ChatAttachmentsDropzonePage() {
|
|
|
97
97
|
<AppPageHeader
|
|
98
98
|
breadcrumbs={[{ label: 'Chat' }, { label: 'Attachments dropzone' }]}
|
|
99
99
|
title="Chat — attachments dropzone"
|
|
100
|
-
subheader="Drop text files onto the chat shell; they appear on the prompt until you send."
|
|
100
|
+
subheader="Drop text, Office, or PDF files onto the chat shell; they appear on the prompt until you send."
|
|
101
101
|
actions={<DocsHeaderActions />}
|
|
102
102
|
/>
|
|
103
103
|
<PageContentSection>
|
|
104
104
|
<p style={{ marginBottom: 16, fontSize: 14, lineHeight: 1.5 }}>
|
|
105
|
-
Drop a <code>.txt</code>, <code>.csv</code>, <code>.md</code>,
|
|
106
|
-
<code>.
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
<code>allowPdfAttachments</code>.
|
|
105
|
+
Drop a <code>.txt</code>, <code>.csv</code>, <code>.md</code>,{' '}
|
|
106
|
+
<code>.docx</code>, <code>.xlsx</code>, or <code>.pdf</code> file
|
|
107
|
+
anywhere on the chat panel. The file shows above the composer; press
|
|
108
|
+
send to post it. Office and PDF parsers load on demand when you attach
|
|
109
|
+
those types; PDF also requires <code>allowPdfAttachments</code>.
|
|
110
110
|
</p>
|
|
111
111
|
<ChatChrome
|
|
112
112
|
showResizeHandle={false}
|
|
@@ -133,25 +133,19 @@ export default function ChatAttachmentsDropzonePage() {
|
|
|
133
133
|
effectiveScopeId="docs-chat-attachments-dropzone"
|
|
134
134
|
onPromptSubmit={onPromptSubmit}
|
|
135
135
|
onChatDeleted={() => {}}
|
|
136
|
-
allowedAttachments={
|
|
137
|
-
'text/plain',
|
|
138
|
-
'.txt',
|
|
139
|
-
'text/csv',
|
|
140
|
-
'.csv',
|
|
141
|
-
'text/markdown',
|
|
142
|
-
'.md',
|
|
143
|
-
'application/json',
|
|
144
|
-
'.json',
|
|
145
|
-
]}
|
|
136
|
+
allowedAttachments={TEXT_ATTACHMENT_ACCEPT_PARTS}
|
|
146
137
|
allowPdfAttachments
|
|
147
138
|
emptyState={{
|
|
148
|
-
title: 'Drop a text
|
|
139
|
+
title: 'Drop a text, Office, or PDF file',
|
|
149
140
|
description:
|
|
150
|
-
'Drag a file onto this panel, review it above the composer, then send.',
|
|
141
|
+
'Drag a file onto this panel, review it above the composer, then send. DOCX and XLSX are parsed in the browser.',
|
|
151
142
|
additionalContent: (
|
|
152
143
|
<p style={{ fontSize: 13, opacity: 0.85 }}>
|
|
153
|
-
|
|
154
|
-
{
|
|
144
|
+
Accepted types include <code>.txt</code>, <code>.csv</code>,{' '}
|
|
145
|
+
<code>.md</code>, <code>.json</code>, <code>.docx</code>,{' '}
|
|
146
|
+
<code>.xlsx</code>, and more (
|
|
147
|
+
{TEXT_ATTACHMENT_ACCEPT_PARTS.length} entries in the text
|
|
148
|
+
allowlist).
|
|
155
149
|
</p>
|
|
156
150
|
),
|
|
157
151
|
}}
|