wikimem 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +97 -8
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/core/connectors.d.ts +1 -1
- package/dist/core/connectors.d.ts.map +1 -1
- package/dist/core/git.d.ts +1 -1
- package/dist/core/git.d.ts.map +1 -1
- package/dist/core/git.js.map +1 -1
- package/dist/core/ingest.d.ts.map +1 -1
- package/dist/core/ingest.js +74 -3
- package/dist/core/ingest.js.map +1 -1
- package/dist/core/lint.d.ts.map +1 -1
- package/dist/core/lint.js +23 -4
- package/dist/core/lint.js.map +1 -1
- package/dist/core/oauth-defaults.d.ts +31 -0
- package/dist/core/oauth-defaults.d.ts.map +1 -0
- package/dist/core/oauth-defaults.js +79 -0
- package/dist/core/oauth-defaults.js.map +1 -0
- package/dist/core/observer.d.ts +24 -1
- package/dist/core/observer.d.ts.map +1 -1
- package/dist/core/observer.js +146 -4
- package/dist/core/observer.js.map +1 -1
- package/dist/core/sync/gdrive.d.ts +14 -0
- package/dist/core/sync/gdrive.d.ts.map +1 -0
- package/dist/core/sync/gdrive.js +205 -0
- package/dist/core/sync/gdrive.js.map +1 -0
- package/dist/core/sync/github.d.ts +20 -0
- package/dist/core/sync/github.d.ts.map +1 -0
- package/dist/core/sync/github.js +206 -0
- package/dist/core/sync/github.js.map +1 -0
- package/dist/core/sync/gmail.d.ts +15 -0
- package/dist/core/sync/gmail.d.ts.map +1 -0
- package/dist/core/sync/gmail.js +159 -0
- package/dist/core/sync/gmail.js.map +1 -0
- package/dist/core/sync/index.d.ts +47 -0
- package/dist/core/sync/index.d.ts.map +1 -0
- package/dist/core/sync/index.js +100 -0
- package/dist/core/sync/index.js.map +1 -0
- package/dist/core/sync/jira.d.ts +15 -0
- package/dist/core/sync/jira.d.ts.map +1 -0
- package/dist/core/sync/jira.js +176 -0
- package/dist/core/sync/jira.js.map +1 -0
- package/dist/core/sync/linear.d.ts +15 -0
- package/dist/core/sync/linear.d.ts.map +1 -0
- package/dist/core/sync/linear.js +111 -0
- package/dist/core/sync/linear.js.map +1 -0
- package/dist/core/sync/notion.d.ts +14 -0
- package/dist/core/sync/notion.d.ts.map +1 -0
- package/dist/core/sync/notion.js +168 -0
- package/dist/core/sync/notion.js.map +1 -0
- package/dist/core/sync/rss.d.ts +20 -0
- package/dist/core/sync/rss.d.ts.map +1 -0
- package/dist/core/sync/rss.js +165 -0
- package/dist/core/sync/rss.js.map +1 -0
- package/dist/core/sync/scheduler.d.ts +31 -0
- package/dist/core/sync/scheduler.d.ts.map +1 -0
- package/dist/core/sync/scheduler.js +129 -0
- package/dist/core/sync/scheduler.js.map +1 -0
- package/dist/core/sync/slack.d.ts +16 -0
- package/dist/core/sync/slack.d.ts.map +1 -0
- package/dist/core/sync/slack.js +173 -0
- package/dist/core/sync/slack.js.map +1 -0
- package/dist/core/vault.d.ts +22 -0
- package/dist/core/vault.d.ts.map +1 -1
- package/dist/core/vault.js +65 -0
- package/dist/core/vault.js.map +1 -1
- package/dist/core/webhooks.d.ts +13 -0
- package/dist/core/webhooks.d.ts.map +1 -0
- package/dist/core/webhooks.js +206 -0
- package/dist/core/webhooks.js.map +1 -0
- package/dist/mcp-server.d.ts +11 -6
- package/dist/mcp-server.d.ts.map +1 -1
- package/dist/mcp-server.js +99 -6
- package/dist/mcp-server.js.map +1 -1
- package/dist/mcp-tools-extended.d.ts +15 -0
- package/dist/mcp-tools-extended.d.ts.map +1 -0
- package/dist/mcp-tools-extended.js +277 -0
- package/dist/mcp-tools-extended.js.map +1 -0
- package/dist/processors/csv.d.ts +18 -0
- package/dist/processors/csv.d.ts.map +1 -0
- package/dist/processors/csv.js +230 -0
- package/dist/processors/csv.js.map +1 -0
- package/dist/processors/image.d.ts.map +1 -1
- package/dist/processors/image.js +55 -27
- package/dist/processors/image.js.map +1 -1
- package/dist/processors/pdf.d.ts.map +1 -1
- package/dist/processors/pdf.js +5 -1
- package/dist/processors/pdf.js.map +1 -1
- package/dist/processors/pptx.d.ts +3 -1
- package/dist/processors/pptx.d.ts.map +1 -1
- package/dist/processors/pptx.js +236 -95
- package/dist/processors/pptx.js.map +1 -1
- package/dist/processors/xlsx.d.ts +2 -0
- package/dist/processors/xlsx.d.ts.map +1 -1
- package/dist/processors/xlsx.js +182 -46
- package/dist/processors/xlsx.js.map +1 -1
- package/dist/templates/source-types.d.ts +33 -0
- package/dist/templates/source-types.d.ts.map +1 -0
- package/dist/templates/source-types.js +178 -0
- package/dist/templates/source-types.js.map +1 -0
- package/dist/web/public/index.html +1785 -103
- package/dist/web/server.d.ts.map +1 -1
- package/dist/web/server.js +746 -38
- package/dist/web/server.js.map +1 -1
- package/package.json +4 -1
- package/src/web/public/index.html +1785 -103
- package/templates/source-types/article.md +21 -0
- package/templates/source-types/book.md +21 -0
- package/templates/source-types/paper.md +23 -0
- package/templates/source-types/podcast.md +21 -0
- package/templates/source-types/raw-notes.md +17 -0
- package/templates/source-types/tweet-thread.md +19 -0
- package/templates/source-types/video.md +21 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV (.csv, .tsv) processor.
|
|
3
|
+
* Pure Node.js parser — no external dependencies.
|
|
4
|
+
* Handles: quoted fields, commas in values, newlines in quotes, BOM, TSV.
|
|
5
|
+
*/
|
|
6
|
+
import { readFileSync } from 'node:fs';
|
|
7
|
+
import { basename, extname } from 'node:path';
|
|
8
|
+
const MAX_DISPLAY_ROWS = 100;
|
|
9
|
+
const MAX_DISPLAY_COLS = 10;
|
|
10
|
+
/** Check whether a file path looks like CSV/TSV. */
|
|
11
|
+
export function isCsvFile(filePath) {
|
|
12
|
+
const ext = extname(filePath).toLowerCase();
|
|
13
|
+
return ext === '.csv' || ext === '.tsv';
|
|
14
|
+
}
|
|
15
|
+
/** Process a CSV or TSV file into structured output with markdown table. */
|
|
16
|
+
export async function processCsv(filePath) {
|
|
17
|
+
const ext = extname(filePath).toLowerCase();
|
|
18
|
+
const title = basename(filePath, ext);
|
|
19
|
+
const raw = readFileSync(filePath, 'utf-8');
|
|
20
|
+
const cleaned = stripBom(raw);
|
|
21
|
+
const delimiter = ext === '.tsv' ? '\t' : detectDelimiter(cleaned);
|
|
22
|
+
const rows = parseRows(cleaned, delimiter);
|
|
23
|
+
if (rows.length === 0) {
|
|
24
|
+
return {
|
|
25
|
+
title,
|
|
26
|
+
content: `[CSV — no data extracted from ${basename(filePath)}]`,
|
|
27
|
+
markdown: buildMarkdown(title, filePath, '', 0, 0, []),
|
|
28
|
+
rowCount: 0,
|
|
29
|
+
columnCount: 0,
|
|
30
|
+
sourcePath: filePath,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
const columnCount = Math.max(...rows.map((r) => r.length));
|
|
34
|
+
const rowCount = rows.length - 1; // exclude header row
|
|
35
|
+
const columnTypes = detectColumnTypes(rows);
|
|
36
|
+
const tableContent = buildTable(rows, columnCount);
|
|
37
|
+
return {
|
|
38
|
+
title,
|
|
39
|
+
content: tableContent,
|
|
40
|
+
markdown: buildMarkdown(title, filePath, tableContent, rowCount, columnCount, columnTypes),
|
|
41
|
+
rowCount,
|
|
42
|
+
columnCount,
|
|
43
|
+
sourcePath: filePath,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// CSV parser — handles RFC 4180 (quoted fields, embedded commas/newlines)
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
function stripBom(text) {
|
|
50
|
+
return text.charCodeAt(0) === 0xfeff ? text.slice(1) : text;
|
|
51
|
+
}
|
|
52
|
+
/** Sniff the most likely delimiter from the first few lines. */
|
|
53
|
+
function detectDelimiter(text) {
|
|
54
|
+
const sample = text.substring(0, 2000);
|
|
55
|
+
const commas = (sample.match(/,/g) ?? []).length;
|
|
56
|
+
const tabs = (sample.match(/\t/g) ?? []).length;
|
|
57
|
+
const semicolons = (sample.match(/;/g) ?? []).length;
|
|
58
|
+
const pipes = (sample.match(/\|/g) ?? []).length;
|
|
59
|
+
const counts = [
|
|
60
|
+
[',', commas],
|
|
61
|
+
['\t', tabs],
|
|
62
|
+
[';', semicolons],
|
|
63
|
+
['|', pipes],
|
|
64
|
+
];
|
|
65
|
+
counts.sort((a, b) => b[1] - a[1]);
|
|
66
|
+
const best = counts[0];
|
|
67
|
+
return best && best[1] > 0 ? best[0] : ',';
|
|
68
|
+
}
|
|
69
|
+
/** Parse CSV text into a 2-D array of strings, respecting quoted fields. */
|
|
70
|
+
function parseRows(text, delimiter) {
|
|
71
|
+
const rows = [];
|
|
72
|
+
let currentRow = [];
|
|
73
|
+
let field = '';
|
|
74
|
+
let inQuotes = false;
|
|
75
|
+
let i = 0;
|
|
76
|
+
while (i < text.length) {
|
|
77
|
+
const ch = text[i];
|
|
78
|
+
if (inQuotes) {
|
|
79
|
+
if (ch === '"') {
|
|
80
|
+
// Escaped quote ("")
|
|
81
|
+
if (i + 1 < text.length && text[i + 1] === '"') {
|
|
82
|
+
field += '"';
|
|
83
|
+
i += 2;
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
// End of quoted field
|
|
87
|
+
inQuotes = false;
|
|
88
|
+
i++;
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
field += ch;
|
|
92
|
+
i++;
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
// Not inside quotes
|
|
96
|
+
if (ch === '"' && field.length === 0) {
|
|
97
|
+
inQuotes = true;
|
|
98
|
+
i++;
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
if (ch === delimiter) {
|
|
102
|
+
currentRow.push(field.trim());
|
|
103
|
+
field = '';
|
|
104
|
+
i++;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
if (ch === '\r') {
|
|
108
|
+
// CR or CRLF
|
|
109
|
+
currentRow.push(field.trim());
|
|
110
|
+
field = '';
|
|
111
|
+
if (currentRow.some((c) => c.length > 0)) {
|
|
112
|
+
rows.push(currentRow);
|
|
113
|
+
}
|
|
114
|
+
currentRow = [];
|
|
115
|
+
i++;
|
|
116
|
+
if (i < text.length && text[i] === '\n')
|
|
117
|
+
i++;
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
if (ch === '\n') {
|
|
121
|
+
currentRow.push(field.trim());
|
|
122
|
+
field = '';
|
|
123
|
+
if (currentRow.some((c) => c.length > 0)) {
|
|
124
|
+
rows.push(currentRow);
|
|
125
|
+
}
|
|
126
|
+
currentRow = [];
|
|
127
|
+
i++;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
field += ch;
|
|
131
|
+
i++;
|
|
132
|
+
}
|
|
133
|
+
// Flush last field/row
|
|
134
|
+
if (field.length > 0 || currentRow.length > 0) {
|
|
135
|
+
currentRow.push(field.trim());
|
|
136
|
+
if (currentRow.some((c) => c.length > 0)) {
|
|
137
|
+
rows.push(currentRow);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return rows;
|
|
141
|
+
}
|
|
142
|
+
function detectColumnTypes(rows) {
|
|
143
|
+
if (rows.length < 2)
|
|
144
|
+
return [];
|
|
145
|
+
const headers = rows[0];
|
|
146
|
+
if (!headers)
|
|
147
|
+
return [];
|
|
148
|
+
const dataRows = rows.slice(1, Math.min(rows.length, 51)); // sample up to 50 data rows
|
|
149
|
+
return headers.map((header, colIdx) => {
|
|
150
|
+
const values = dataRows
|
|
151
|
+
.map((row) => row[colIdx] ?? '')
|
|
152
|
+
.filter((v) => v.length > 0);
|
|
153
|
+
if (values.length === 0) {
|
|
154
|
+
return { name: header || `col_${colIdx + 1}`, type: 'text' };
|
|
155
|
+
}
|
|
156
|
+
// Check number
|
|
157
|
+
const numberCount = values.filter((v) => /^-?[\d,]+\.?\d*$/.test(v.replace(/,/g, ''))).length;
|
|
158
|
+
if (numberCount / values.length > 0.8) {
|
|
159
|
+
return { name: header || `col_${colIdx + 1}`, type: 'number' };
|
|
160
|
+
}
|
|
161
|
+
// Check boolean
|
|
162
|
+
const boolCount = values.filter((v) => ['true', 'false', 'yes', 'no', '0', '1'].includes(v.toLowerCase())).length;
|
|
163
|
+
if (boolCount / values.length > 0.8) {
|
|
164
|
+
return { name: header || `col_${colIdx + 1}`, type: 'boolean' };
|
|
165
|
+
}
|
|
166
|
+
// Check date
|
|
167
|
+
const dateCount = values.filter((v) => /^\d{4}[-/]\d{1,2}[-/]\d{1,2}/.test(v) || /^\d{1,2}[-/]\d{1,2}[-/]\d{2,4}/.test(v)).length;
|
|
168
|
+
if (dateCount / values.length > 0.8) {
|
|
169
|
+
return { name: header || `col_${colIdx + 1}`, type: 'date' };
|
|
170
|
+
}
|
|
171
|
+
return { name: header || `col_${colIdx + 1}`, type: 'text' };
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
// Markdown table builder
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
function buildTable(rows, totalCols) {
|
|
178
|
+
if (rows.length === 0)
|
|
179
|
+
return '';
|
|
180
|
+
const truncateCols = totalCols > MAX_DISPLAY_COLS;
|
|
181
|
+
const displayCols = truncateCols ? MAX_DISPLAY_COLS : totalCols;
|
|
182
|
+
const lines = [];
|
|
183
|
+
for (let i = 0; i < Math.min(rows.length, MAX_DISPLAY_ROWS + 1); i++) {
|
|
184
|
+
const row = rows[i];
|
|
185
|
+
if (!row)
|
|
186
|
+
continue;
|
|
187
|
+
const cells = [];
|
|
188
|
+
for (let j = 0; j < displayCols; j++) {
|
|
189
|
+
const cell = row[j] ?? '';
|
|
190
|
+
// Escape pipes and collapse newlines for table safety
|
|
191
|
+
cells.push(cell.replace(/\|/g, '\\|').replace(/\n/g, ' '));
|
|
192
|
+
}
|
|
193
|
+
if (truncateCols) {
|
|
194
|
+
cells.push('...');
|
|
195
|
+
}
|
|
196
|
+
lines.push(`| ${cells.join(' | ')} |`);
|
|
197
|
+
// Separator after header row
|
|
198
|
+
if (i === 0) {
|
|
199
|
+
const sep = cells.map(() => '---');
|
|
200
|
+
lines.push(`| ${sep.join(' | ')} |`);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (rows.length > MAX_DISPLAY_ROWS + 1) {
|
|
204
|
+
const remaining = rows.length - MAX_DISPLAY_ROWS - 1;
|
|
205
|
+
lines.push(`\n> _...and ${remaining} more rows (truncated)_`);
|
|
206
|
+
}
|
|
207
|
+
return lines.join('\n');
|
|
208
|
+
}
|
|
209
|
+
// ---------------------------------------------------------------------------
|
|
210
|
+
// Full markdown output
|
|
211
|
+
// ---------------------------------------------------------------------------
|
|
212
|
+
function buildMarkdown(title, filePath, content, rowCount, columnCount, columnTypes) {
|
|
213
|
+
const typesSummary = columnTypes.length > 0
|
|
214
|
+
? columnTypes.map((c) => `\`${c.name}\` (${c.type})`).join(', ')
|
|
215
|
+
: 'N/A';
|
|
216
|
+
return `# ${title}
|
|
217
|
+
|
|
218
|
+
> **Source:** [${basename(filePath)}](${filePath})
|
|
219
|
+
> **Type:** CSV
|
|
220
|
+
> **Rows:** ${rowCount}
|
|
221
|
+
> **Columns:** ${columnCount}
|
|
222
|
+
> **Column types:** ${typesSummary}
|
|
223
|
+
> **Processed:** ${new Date().toISOString().split('T')[0]}
|
|
224
|
+
|
|
225
|
+
## Data
|
|
226
|
+
|
|
227
|
+
${content}
|
|
228
|
+
`;
|
|
229
|
+
}
|
|
230
|
+
//# sourceMappingURL=csv.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/processors/csv.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAW9C,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,MAAM,gBAAgB,GAAG,EAAE,CAAC;AAE5B,oDAAoD;AACpD,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,CAAC;AAC1C,CAAC;AAED,4EAA4E;AAC5E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC5C,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;IAE9B,MAAM,SAAS,GAAG,GAAG,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;IACnE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAE3C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,KAAK;YACL,OAAO,EAAE,iCAAiC,QAAQ,CAAC,QAAQ,CAAC,GAAG;YAC/D,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACtD,QAAQ,EAAE,CAAC;YACX,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,qBAAqB;IACvD,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEnD,OAAO;QACL,KAAK;QACL,OAAO,EAAE,YAAY;QACrB,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,CAAC;QAC1F,QAAQ;QACR,WAAW;QACX,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,0EAA0E;AAC1E,8EAA8E;AAE9E,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC9D,CAAC;AAED,gEAAgE;AAChE,SAAS,eAAe,CAAC,IAAY;IACnC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACjD,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAChD,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACrD,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAEjD,MAAM,MAAM,GAA4B;QACtC,CAAC,GAAG,EAAE,MAAM,CAAC;QACb,CAAC,IAAI,EAAE,IAAI,CAAC;QACZ,CAAC,GAAG,EAAE,UAAU,CAAC;QACjB,CAAC,GAAG,EAAE,KAAK,CAAC;KACb,CAAC;IACF,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACvB,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC7C,CAAC;AAED,4EAA4E;AAC5E,SAAS,SAAS,CAAC,IAAY,EAAE,SAAiB;IAChD,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,IAAI,UAAU,GAAa,EAAE,CAAC;IAC9B,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;gBACf,qBAAqB;gBACrB,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC/C,KAAK,IAAI,GAAG,CAAC;oBACb,CAAC,IAAI,CAAC,CAAC;oBACP,SAAS;gBACX,CAAC;gBACD,sBAAsB;gBACtB,QAAQ,GAAG,KAAK,CAAC;gBACjB,CAAC,EAAE,CAAC;gBACJ,SAAS;YACX,CAAC;YACD,KAAK,IAAI,EAAE,CAAC;YACZ,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,EAAE,KAAK,GAAG,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrC,QAAQ,GAAG,IAAI,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,aAAa;YACb,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI;gBAAE,CAAC,EAAE,CAAC;YAC7C,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,KAAK,IAAI,EAAE,CAAC;QACZ,CAAC,EAAE,CAAC;IACN,CAAC;IAED,uBAAuB;IACvB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9C,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAWD,SAAS,iBAAiB,CAAC,IAAgB;IACzC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,4BAA4B;IAEvF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAc,EAAE;QAChD,MAAM,MAAM,GAAG,QAAQ;aACpB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;aAC/B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,eAAe;QACf,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC9F,IAAI,WAAW,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;QACjE,CAAC;QAED,gBAAgB;QAChB,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACnE,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAClE,CAAC;QAED,aAAa;QACb,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,8BAA8B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,gCAAgC,CAAC,IAAI,CAAC,CAAC,CAAC,CACnF,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC/D,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,SAAS,UAAU,CAAC,IAAgB,EAAE,SAAiB;IACrD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,YAAY,GAAG,SAAS,GAAG,gBAAgB,CAAC;IAClD,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC;IAEhE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACrE,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1B,sDAAsD;YACtD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;QAC7D,CAAC;QACD,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,6BAA6B;QAC7B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,eAAe,SAAS,yBAAyB,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E,SAAS,aAAa,CACpB,KAAa,EACb,QAAgB,EAChB,OAAe,EACf,QAAgB,EAChB,WAAmB,EACnB,WAAyB;IAEzB,MAAM,YAAY,GAChB,WAAW,CAAC,MAAM,GAAG,CAAC;QACpB,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;QAChE,CAAC,CAAC,KAAK,CAAC;IAEZ,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;cAElC,QAAQ;iBACL,WAAW;sBACN,YAAY;mBACf,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,
|
|
1
|
+
{"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAgCzE"}
|
package/dist/processors/image.js
CHANGED
|
@@ -25,42 +25,70 @@ export async function processImage(filePath) {
|
|
|
25
25
|
sourcePath: filePath,
|
|
26
26
|
};
|
|
27
27
|
}
|
|
28
|
+
const description = await describeWithVision(apiKey, base64, mediaType);
|
|
29
|
+
return {
|
|
30
|
+
title,
|
|
31
|
+
description,
|
|
32
|
+
markdown: buildMarkdown(title, filePath, description),
|
|
33
|
+
sourcePath: filePath,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
async function describeWithVision(apiKey, base64, mediaType) {
|
|
28
37
|
const client = new Anthropic({ apiKey });
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
{
|
|
37
|
-
type: 'image',
|
|
38
|
-
source: { type: 'base64', media_type: mediaType, data: base64 },
|
|
39
|
-
},
|
|
38
|
+
// Try Claude Vision — retry once on transient failures
|
|
39
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
40
|
+
try {
|
|
41
|
+
const response = await client.messages.create({
|
|
42
|
+
model: 'claude-sonnet-4-20250514',
|
|
43
|
+
max_tokens: 2048,
|
|
44
|
+
messages: [
|
|
40
45
|
{
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
role: 'user',
|
|
47
|
+
content: [
|
|
48
|
+
{
|
|
49
|
+
type: 'image',
|
|
50
|
+
source: {
|
|
51
|
+
type: 'base64',
|
|
52
|
+
media_type: mediaType,
|
|
53
|
+
data: base64,
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
type: 'text',
|
|
58
|
+
text: `Describe this image in detail for a knowledge base. Include:
|
|
43
59
|
1. What the image shows (objects, people, text, diagrams, charts)
|
|
44
60
|
2. Key information or data visible
|
|
45
|
-
3. Any text content (OCR)
|
|
61
|
+
3. Any text content (OCR — extract ALL visible text verbatim)
|
|
46
62
|
4. Context and significance
|
|
47
63
|
|
|
48
64
|
Be thorough but concise. This description will represent the image in a markdown wiki where agents need to understand its content without seeing it directly.`,
|
|
65
|
+
},
|
|
66
|
+
],
|
|
49
67
|
},
|
|
50
68
|
],
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
69
|
+
});
|
|
70
|
+
return response.content
|
|
71
|
+
.filter((block) => block.type === 'text')
|
|
72
|
+
.map((block) => block.text)
|
|
73
|
+
.join('');
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
const isRetryable = err instanceof Error &&
|
|
77
|
+
(err.message.includes('rate_limit') ||
|
|
78
|
+
err.message.includes('overloaded') ||
|
|
79
|
+
err.message.includes('529') ||
|
|
80
|
+
err.message.includes('timeout'));
|
|
81
|
+
if (isRetryable && attempt === 0) {
|
|
82
|
+
// Wait 2s and retry once
|
|
83
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
// Non-retryable or second failure — return fallback description
|
|
87
|
+
const sizeKB = Math.round(Buffer.from(base64, 'base64').length / 1024);
|
|
88
|
+
return `[Image — Claude Vision analysis failed: ${err instanceof Error ? err.message : 'unknown error'}]\n\n_File size: ${sizeKB} KB. Set ANTHROPIC_API_KEY and ensure API access to enable image description._`;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return '[Image — description unavailable]';
|
|
64
92
|
}
|
|
65
93
|
function buildMarkdown(title, filePath, description) {
|
|
66
94
|
return `# ${title}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAExE,OAAO;QACL,KAAK;QACL,WAAW;QACX,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC;QACrD,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,MAAc,EAAE,MAAc,EAAE,SAAiB;IACjF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,uDAAuD;IACvD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC5C,KAAK,EAAE,0BAA0B;gBACjC,UAAU,EAAE,IAAI;gBAChB,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,OAAO;gCACb,MAAM,EAAE;oCACN,IAAI,EAAE,QAAQ;oCACd,UAAU,EAAE,SAAoE;oCAChF,IAAI,EAAE,MAAM;iCACb;6BACF;4BACD;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE;;;;;;8JAMwI;6BAC/I;yBACF;qBACF;iBACF;aACF,CAAC,CAAC;YAEH,OAAO,QAAQ,CAAC,OAAO;iBACpB,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;iBACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;iBAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;QACd,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,WAAW,GACf,GAAG,YAAY,KAAK;gBACpB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBACjC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBAClC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;oBAC3B,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;YAErC,IAAI,WAAW,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;gBACjC,yBAAyB;gBACzB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC9C,SAAS;YACX,CAAC;YAED,gEAAgE;YAChE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;YACvE,OAAO,2CAA2C,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,oBAAoB,MAAM,gFAAgF,CAAC;QACnN,CAAC;IACH,CAAC;IAED,OAAO,mCAAmC,CAAC;AAC7C,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,WAAmB;IACzE,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;mBAE7B,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;IAErD,KAAK,KAAK,QAAQ;;;;EAIpB,WAAW;CACZ,CAAC;AACF,CAAC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB;YACE,OAAO,YAAY,CAAC;IACxB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,
|
|
1
|
+
{"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CA6BrE"}
|
package/dist/processors/pdf.js
CHANGED
|
@@ -7,7 +7,11 @@ export async function processPdf(filePath) {
|
|
|
7
7
|
const title = basename(filePath, '.pdf');
|
|
8
8
|
const buffer = readFileSync(filePath);
|
|
9
9
|
try {
|
|
10
|
-
|
|
10
|
+
// Import from lib/ directly to avoid pdf-parse's index.js self-test bug
|
|
11
|
+
// (index.js tries to open ./test/data/05-versions-space.pdf on import)
|
|
12
|
+
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
13
|
+
// @ts-expect-error — pdf-parse/lib has no type declarations
|
|
14
|
+
const pdfParseModule = await import('pdf-parse/lib/pdf-parse.js');
|
|
11
15
|
const pdfParse = (pdfParseModule.default ?? pdfParseModule);
|
|
12
16
|
const data = await pdfParse(buffer);
|
|
13
17
|
const content = data.text.trim();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,MAAM,cAAc,GAAG,MAAM,MAAM,CAAC,
|
|
1
|
+
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,wEAAwE;QACxE,uEAAuE;QACvE,6DAA6D;QAC7D,4DAA4D;QAC5D,MAAM,cAAc,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,CAAC,cAAc,CAAC,OAAO,IAAI,cAAc,CAAgG,CAAC;QAC3J,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEjC,OAAO;YACL,KAAK;YACL,OAAO;YACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC;YAChE,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,SAAS,KAAK,4BAA4B;YACnD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,0DAA0D,CAAC;YACpG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,SAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;iBAC/B,SAAS,CAAC,CAAC,CAAC,kBAAkB,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC5C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PowerPoint (.pptx) processor.
|
|
3
|
-
* Extracts
|
|
3
|
+
* Extracts slides via zip (adm-zip) + XML parsing.
|
|
4
|
+
* Features: slide titles, body text with bullet/numbered list structure,
|
|
5
|
+
* speaker notes, image alt-text, proper paragraph grouping.
|
|
4
6
|
*/
|
|
5
7
|
export interface PptxResult {
|
|
6
8
|
title: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAiBD,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAgCvE"}
|