wikimem 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/cli/commands/init.d.ts.map +1 -1
  2. package/dist/cli/commands/init.js +97 -8
  3. package/dist/cli/commands/init.js.map +1 -1
  4. package/dist/core/connectors.d.ts +1 -1
  5. package/dist/core/connectors.d.ts.map +1 -1
  6. package/dist/core/git.d.ts +1 -1
  7. package/dist/core/git.d.ts.map +1 -1
  8. package/dist/core/git.js.map +1 -1
  9. package/dist/core/ingest.d.ts.map +1 -1
  10. package/dist/core/ingest.js +74 -3
  11. package/dist/core/ingest.js.map +1 -1
  12. package/dist/core/lint.d.ts.map +1 -1
  13. package/dist/core/lint.js +23 -4
  14. package/dist/core/lint.js.map +1 -1
  15. package/dist/core/oauth-defaults.d.ts +31 -0
  16. package/dist/core/oauth-defaults.d.ts.map +1 -0
  17. package/dist/core/oauth-defaults.js +79 -0
  18. package/dist/core/oauth-defaults.js.map +1 -0
  19. package/dist/core/observer.d.ts +24 -1
  20. package/dist/core/observer.d.ts.map +1 -1
  21. package/dist/core/observer.js +146 -4
  22. package/dist/core/observer.js.map +1 -1
  23. package/dist/core/sync/gdrive.d.ts +14 -0
  24. package/dist/core/sync/gdrive.d.ts.map +1 -0
  25. package/dist/core/sync/gdrive.js +205 -0
  26. package/dist/core/sync/gdrive.js.map +1 -0
  27. package/dist/core/sync/github.d.ts +20 -0
  28. package/dist/core/sync/github.d.ts.map +1 -0
  29. package/dist/core/sync/github.js +206 -0
  30. package/dist/core/sync/github.js.map +1 -0
  31. package/dist/core/sync/gmail.d.ts +15 -0
  32. package/dist/core/sync/gmail.d.ts.map +1 -0
  33. package/dist/core/sync/gmail.js +159 -0
  34. package/dist/core/sync/gmail.js.map +1 -0
  35. package/dist/core/sync/index.d.ts +47 -0
  36. package/dist/core/sync/index.d.ts.map +1 -0
  37. package/dist/core/sync/index.js +100 -0
  38. package/dist/core/sync/index.js.map +1 -0
  39. package/dist/core/sync/jira.d.ts +15 -0
  40. package/dist/core/sync/jira.d.ts.map +1 -0
  41. package/dist/core/sync/jira.js +176 -0
  42. package/dist/core/sync/jira.js.map +1 -0
  43. package/dist/core/sync/linear.d.ts +15 -0
  44. package/dist/core/sync/linear.d.ts.map +1 -0
  45. package/dist/core/sync/linear.js +111 -0
  46. package/dist/core/sync/linear.js.map +1 -0
  47. package/dist/core/sync/notion.d.ts +14 -0
  48. package/dist/core/sync/notion.d.ts.map +1 -0
  49. package/dist/core/sync/notion.js +168 -0
  50. package/dist/core/sync/notion.js.map +1 -0
  51. package/dist/core/sync/rss.d.ts +20 -0
  52. package/dist/core/sync/rss.d.ts.map +1 -0
  53. package/dist/core/sync/rss.js +165 -0
  54. package/dist/core/sync/rss.js.map +1 -0
  55. package/dist/core/sync/scheduler.d.ts +31 -0
  56. package/dist/core/sync/scheduler.d.ts.map +1 -0
  57. package/dist/core/sync/scheduler.js +129 -0
  58. package/dist/core/sync/scheduler.js.map +1 -0
  59. package/dist/core/sync/slack.d.ts +16 -0
  60. package/dist/core/sync/slack.d.ts.map +1 -0
  61. package/dist/core/sync/slack.js +173 -0
  62. package/dist/core/sync/slack.js.map +1 -0
  63. package/dist/core/vault.d.ts +22 -0
  64. package/dist/core/vault.d.ts.map +1 -1
  65. package/dist/core/vault.js +65 -0
  66. package/dist/core/vault.js.map +1 -1
  67. package/dist/core/webhooks.d.ts +13 -0
  68. package/dist/core/webhooks.d.ts.map +1 -0
  69. package/dist/core/webhooks.js +206 -0
  70. package/dist/core/webhooks.js.map +1 -0
  71. package/dist/mcp-server.d.ts +11 -6
  72. package/dist/mcp-server.d.ts.map +1 -1
  73. package/dist/mcp-server.js +99 -6
  74. package/dist/mcp-server.js.map +1 -1
  75. package/dist/mcp-tools-extended.d.ts +15 -0
  76. package/dist/mcp-tools-extended.d.ts.map +1 -0
  77. package/dist/mcp-tools-extended.js +277 -0
  78. package/dist/mcp-tools-extended.js.map +1 -0
  79. package/dist/processors/csv.d.ts +18 -0
  80. package/dist/processors/csv.d.ts.map +1 -0
  81. package/dist/processors/csv.js +230 -0
  82. package/dist/processors/csv.js.map +1 -0
  83. package/dist/processors/image.d.ts.map +1 -1
  84. package/dist/processors/image.js +55 -27
  85. package/dist/processors/image.js.map +1 -1
  86. package/dist/processors/pdf.d.ts.map +1 -1
  87. package/dist/processors/pdf.js +5 -1
  88. package/dist/processors/pdf.js.map +1 -1
  89. package/dist/processors/pptx.d.ts +3 -1
  90. package/dist/processors/pptx.d.ts.map +1 -1
  91. package/dist/processors/pptx.js +236 -95
  92. package/dist/processors/pptx.js.map +1 -1
  93. package/dist/processors/xlsx.d.ts +2 -0
  94. package/dist/processors/xlsx.d.ts.map +1 -1
  95. package/dist/processors/xlsx.js +182 -46
  96. package/dist/processors/xlsx.js.map +1 -1
  97. package/dist/templates/source-types.d.ts +33 -0
  98. package/dist/templates/source-types.d.ts.map +1 -0
  99. package/dist/templates/source-types.js +178 -0
  100. package/dist/templates/source-types.js.map +1 -0
  101. package/dist/web/public/index.html +1785 -103
  102. package/dist/web/server.d.ts.map +1 -1
  103. package/dist/web/server.js +746 -38
  104. package/dist/web/server.js.map +1 -1
  105. package/package.json +4 -1
  106. package/src/web/public/index.html +1785 -103
  107. package/templates/source-types/article.md +21 -0
  108. package/templates/source-types/book.md +21 -0
  109. package/templates/source-types/paper.md +23 -0
  110. package/templates/source-types/podcast.md +21 -0
  111. package/templates/source-types/raw-notes.md +17 -0
  112. package/templates/source-types/tweet-thread.md +19 -0
  113. package/templates/source-types/video.md +21 -0
@@ -0,0 +1,230 @@
1
+ /**
2
+ * CSV (.csv, .tsv) processor.
3
+ * Pure Node.js parser — no external dependencies.
4
+ * Handles: quoted fields, commas in values, newlines in quotes, BOM, TSV.
5
+ */
6
+ import { readFileSync } from 'node:fs';
7
+ import { basename, extname } from 'node:path';
8
+ const MAX_DISPLAY_ROWS = 100;
9
+ const MAX_DISPLAY_COLS = 10;
10
+ /** Check whether a file path looks like CSV/TSV. */
11
+ export function isCsvFile(filePath) {
12
+ const ext = extname(filePath).toLowerCase();
13
+ return ext === '.csv' || ext === '.tsv';
14
+ }
15
+ /** Process a CSV or TSV file into structured output with markdown table. */
16
+ export async function processCsv(filePath) {
17
+ const ext = extname(filePath).toLowerCase();
18
+ const title = basename(filePath, ext);
19
+ const raw = readFileSync(filePath, 'utf-8');
20
+ const cleaned = stripBom(raw);
21
+ const delimiter = ext === '.tsv' ? '\t' : detectDelimiter(cleaned);
22
+ const rows = parseRows(cleaned, delimiter);
23
+ if (rows.length === 0) {
24
+ return {
25
+ title,
26
+ content: `[CSV — no data extracted from ${basename(filePath)}]`,
27
+ markdown: buildMarkdown(title, filePath, '', 0, 0, []),
28
+ rowCount: 0,
29
+ columnCount: 0,
30
+ sourcePath: filePath,
31
+ };
32
+ }
33
+ const columnCount = Math.max(...rows.map((r) => r.length));
34
+ const rowCount = rows.length - 1; // exclude header row
35
+ const columnTypes = detectColumnTypes(rows);
36
+ const tableContent = buildTable(rows, columnCount);
37
+ return {
38
+ title,
39
+ content: tableContent,
40
+ markdown: buildMarkdown(title, filePath, tableContent, rowCount, columnCount, columnTypes),
41
+ rowCount,
42
+ columnCount,
43
+ sourcePath: filePath,
44
+ };
45
+ }
46
+ // ---------------------------------------------------------------------------
47
+ // CSV parser — handles RFC 4180 (quoted fields, embedded commas/newlines)
48
+ // ---------------------------------------------------------------------------
49
+ function stripBom(text) {
50
+ return text.charCodeAt(0) === 0xfeff ? text.slice(1) : text;
51
+ }
52
+ /** Sniff the most likely delimiter from the first few lines. */
53
+ function detectDelimiter(text) {
54
+ const sample = text.substring(0, 2000);
55
+ const commas = (sample.match(/,/g) ?? []).length;
56
+ const tabs = (sample.match(/\t/g) ?? []).length;
57
+ const semicolons = (sample.match(/;/g) ?? []).length;
58
+ const pipes = (sample.match(/\|/g) ?? []).length;
59
+ const counts = [
60
+ [',', commas],
61
+ ['\t', tabs],
62
+ [';', semicolons],
63
+ ['|', pipes],
64
+ ];
65
+ counts.sort((a, b) => b[1] - a[1]);
66
+ const best = counts[0];
67
+ return best && best[1] > 0 ? best[0] : ',';
68
+ }
69
+ /** Parse CSV text into a 2-D array of strings, respecting quoted fields. */
70
+ function parseRows(text, delimiter) {
71
+ const rows = [];
72
+ let currentRow = [];
73
+ let field = '';
74
+ let inQuotes = false;
75
+ let i = 0;
76
+ while (i < text.length) {
77
+ const ch = text[i];
78
+ if (inQuotes) {
79
+ if (ch === '"') {
80
+ // Escaped quote ("")
81
+ if (i + 1 < text.length && text[i + 1] === '"') {
82
+ field += '"';
83
+ i += 2;
84
+ continue;
85
+ }
86
+ // End of quoted field
87
+ inQuotes = false;
88
+ i++;
89
+ continue;
90
+ }
91
+ field += ch;
92
+ i++;
93
+ continue;
94
+ }
95
+ // Not inside quotes
96
+ if (ch === '"' && field.length === 0) {
97
+ inQuotes = true;
98
+ i++;
99
+ continue;
100
+ }
101
+ if (ch === delimiter) {
102
+ currentRow.push(field.trim());
103
+ field = '';
104
+ i++;
105
+ continue;
106
+ }
107
+ if (ch === '\r') {
108
+ // CR or CRLF
109
+ currentRow.push(field.trim());
110
+ field = '';
111
+ if (currentRow.some((c) => c.length > 0)) {
112
+ rows.push(currentRow);
113
+ }
114
+ currentRow = [];
115
+ i++;
116
+ if (i < text.length && text[i] === '\n')
117
+ i++;
118
+ continue;
119
+ }
120
+ if (ch === '\n') {
121
+ currentRow.push(field.trim());
122
+ field = '';
123
+ if (currentRow.some((c) => c.length > 0)) {
124
+ rows.push(currentRow);
125
+ }
126
+ currentRow = [];
127
+ i++;
128
+ continue;
129
+ }
130
+ field += ch;
131
+ i++;
132
+ }
133
+ // Flush last field/row
134
+ if (field.length > 0 || currentRow.length > 0) {
135
+ currentRow.push(field.trim());
136
+ if (currentRow.some((c) => c.length > 0)) {
137
+ rows.push(currentRow);
138
+ }
139
+ }
140
+ return rows;
141
+ }
142
+ function detectColumnTypes(rows) {
143
+ if (rows.length < 2)
144
+ return [];
145
+ const headers = rows[0];
146
+ if (!headers)
147
+ return [];
148
+ const dataRows = rows.slice(1, Math.min(rows.length, 51)); // sample up to 50 data rows
149
+ return headers.map((header, colIdx) => {
150
+ const values = dataRows
151
+ .map((row) => row[colIdx] ?? '')
152
+ .filter((v) => v.length > 0);
153
+ if (values.length === 0) {
154
+ return { name: header || `col_${colIdx + 1}`, type: 'text' };
155
+ }
156
+ // Check number
157
+ const numberCount = values.filter((v) => /^-?[\d,]+\.?\d*$/.test(v.replace(/,/g, ''))).length;
158
+ if (numberCount / values.length > 0.8) {
159
+ return { name: header || `col_${colIdx + 1}`, type: 'number' };
160
+ }
161
+ // Check boolean
162
+ const boolCount = values.filter((v) => ['true', 'false', 'yes', 'no', '0', '1'].includes(v.toLowerCase())).length;
163
+ if (boolCount / values.length > 0.8) {
164
+ return { name: header || `col_${colIdx + 1}`, type: 'boolean' };
165
+ }
166
+ // Check date
167
+ const dateCount = values.filter((v) => /^\d{4}[-/]\d{1,2}[-/]\d{1,2}/.test(v) || /^\d{1,2}[-/]\d{1,2}[-/]\d{2,4}/.test(v)).length;
168
+ if (dateCount / values.length > 0.8) {
169
+ return { name: header || `col_${colIdx + 1}`, type: 'date' };
170
+ }
171
+ return { name: header || `col_${colIdx + 1}`, type: 'text' };
172
+ });
173
+ }
174
+ // ---------------------------------------------------------------------------
175
+ // Markdown table builder
176
+ // ---------------------------------------------------------------------------
177
+ function buildTable(rows, totalCols) {
178
+ if (rows.length === 0)
179
+ return '';
180
+ const truncateCols = totalCols > MAX_DISPLAY_COLS;
181
+ const displayCols = truncateCols ? MAX_DISPLAY_COLS : totalCols;
182
+ const lines = [];
183
+ for (let i = 0; i < Math.min(rows.length, MAX_DISPLAY_ROWS + 1); i++) {
184
+ const row = rows[i];
185
+ if (!row)
186
+ continue;
187
+ const cells = [];
188
+ for (let j = 0; j < displayCols; j++) {
189
+ const cell = row[j] ?? '';
190
+ // Escape pipes and collapse newlines for table safety
191
+ cells.push(cell.replace(/\|/g, '\\|').replace(/\n/g, ' '));
192
+ }
193
+ if (truncateCols) {
194
+ cells.push('...');
195
+ }
196
+ lines.push(`| ${cells.join(' | ')} |`);
197
+ // Separator after header row
198
+ if (i === 0) {
199
+ const sep = cells.map(() => '---');
200
+ lines.push(`| ${sep.join(' | ')} |`);
201
+ }
202
+ }
203
+ if (rows.length > MAX_DISPLAY_ROWS + 1) {
204
+ const remaining = rows.length - MAX_DISPLAY_ROWS - 1;
205
+ lines.push(`\n> _...and ${remaining} more rows (truncated)_`);
206
+ }
207
+ return lines.join('\n');
208
+ }
209
+ // ---------------------------------------------------------------------------
210
+ // Full markdown output
211
+ // ---------------------------------------------------------------------------
212
+ function buildMarkdown(title, filePath, content, rowCount, columnCount, columnTypes) {
213
+ const typesSummary = columnTypes.length > 0
214
+ ? columnTypes.map((c) => `\`${c.name}\` (${c.type})`).join(', ')
215
+ : 'N/A';
216
+ return `# ${title}
217
+
218
+ > **Source:** [${basename(filePath)}](${filePath})
219
+ > **Type:** CSV
220
+ > **Rows:** ${rowCount}
221
+ > **Columns:** ${columnCount}
222
+ > **Column types:** ${typesSummary}
223
+ > **Processed:** ${new Date().toISOString().split('T')[0]}
224
+
225
+ ## Data
226
+
227
+ ${content}
228
+ `;
229
+ }
230
+ //# sourceMappingURL=csv.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/processors/csv.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAW9C,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,MAAM,gBAAgB,GAAG,EAAE,CAAC;AAE5B,oDAAoD;AACpD,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,CAAC;AAC1C,CAAC;AAED,4EAA4E;AAC5E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC5C,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;IAE9B,MAAM,SAAS,GAAG,GAAG,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;IACnE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAE3C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,KAAK;YACL,OAAO,EAAE,iCAAiC,QAAQ,CAAC,QAAQ,CAAC,GAAG;YAC/D,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACtD,QAAQ,EAAE,CAAC;YACX,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,qBAAqB;IACvD,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEnD,OAAO;QACL,KAAK;QACL,OAAO,EAAE,YAAY;QACrB,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,CAAC;QAC1F,QAAQ;QACR,WAAW;QACX,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,0EAA0E;AAC1E,8EAA8E;AAE9E,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC9D,CAAC;AAED,gEAAgE;AAChE,SAAS,eAAe,CAAC,IAAY;IACnC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACjD,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAChD,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACrD,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAEjD,MAAM,MAAM,GAA4B;QACtC,CAAC,GAAG,EAAE,MAAM,CAAC;QACb,CAAC,IAAI,EAAE,IAAI,CAAC;QACZ,CAAC,GAAG,EAAE,UAAU,CAAC;QACjB,CAAC,GAAG,EAAE,KAAK,CAAC;KACb,CAAC;IACF,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACvB,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC7C,CAAC;AAED,4EAA4E;AAC5E,SAAS,SAAS,CAAC,IAAY,EAAE,SAAiB;IAChD,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,IAAI,UAAU,GAAa,EAAE,CAAC;IAC9B,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;gBACf,qBAAqB;gBACrB,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC/C,KAAK,IAAI,GAAG,CAAC;oBACb,CAAC,IAAI,CAAC,CAAC;oBACP,SAAS;gBACX,CAAC;gBACD,sBAAsB;gBACtB,QAAQ,GAAG,KAAK,CAAC;gBACjB,CAAC,EAAE,CAAC;gBACJ,SAAS;YACX,CAAC;YACD,KAAK,IAAI,EAAE,CAAC;YACZ,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,EAAE,KAAK,GAAG,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrC,QAAQ,GAAG,IAAI,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,aAAa;YACb,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI;gBAAE,CAAC,EAAE,CAAC;YAC7C,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,KAAK,IAAI,EAAE,CAAC;QACZ,CAAC,EAAE,CAAC;IACN,CAAC;IAED,uBAAuB;IACvB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9C,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAWD,SAAS,iBAAiB,CAAC,IAAgB;IACzC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,4BAA4B;IAEvF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAc,EAAE;QAChD,MAAM,MAAM,GAAG,QAAQ;aACpB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;aAC/B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,eAAe;QACf,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC9F,IAAI,WAAW,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;QACjE,CAAC;QAED,gBAAgB;QAChB,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACnE,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAClE,CAAC;QAED,aAAa;QACb,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,8BAA8B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,gCAAgC,CAAC,IAAI,CAAC,CAAC,CAAC,CACnF,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC/D,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,SAAS,UAAU,CAAC,IAAgB,EAAE,SAAiB;IACrD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,YAAY,GAAG,SAAS,GAAG,gBAAgB,CAAC;IAClD,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC;IAEhE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACrE,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1B,sDAAsD;YACtD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;QAC7D,CAAC;QACD,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,6BAA6B;QAC7B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,eAAe,SAAS,yBAAyB,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E,SAAS,aAAa,CACpB,KAAa,EACb,QAAgB,EAChB,OAAe,EACf,QAAgB,EAChB,WAAmB,EACnB,WAAyB;IAEzB,MAAM,YAAY,GAChB,WAAW,CAAC,MAAM,GAAG,CAAC;QACpB,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;QAChE,CAAC,CAAC,KAAK,CAAC;IAEZ,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;cAElC,QAAQ;iBACL,WAAW;sBACN,YAAY;mBACf,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA+DzE"}
1
+ {"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAgCzE"}
@@ -25,42 +25,70 @@ export async function processImage(filePath) {
25
25
  sourcePath: filePath,
26
26
  };
27
27
  }
28
+ const description = await describeWithVision(apiKey, base64, mediaType);
29
+ return {
30
+ title,
31
+ description,
32
+ markdown: buildMarkdown(title, filePath, description),
33
+ sourcePath: filePath,
34
+ };
35
+ }
36
+ async function describeWithVision(apiKey, base64, mediaType) {
28
37
  const client = new Anthropic({ apiKey });
29
- const response = await client.messages.create({
30
- model: 'claude-sonnet-4-20250514',
31
- max_tokens: 2048,
32
- messages: [
33
- {
34
- role: 'user',
35
- content: [
36
- {
37
- type: 'image',
38
- source: { type: 'base64', media_type: mediaType, data: base64 },
39
- },
38
+ // Try Claude Vision — retry once on transient failures
39
+ for (let attempt = 0; attempt < 2; attempt++) {
40
+ try {
41
+ const response = await client.messages.create({
42
+ model: 'claude-sonnet-4-20250514',
43
+ max_tokens: 2048,
44
+ messages: [
40
45
  {
41
- type: 'text',
42
- text: `Describe this image in detail for a knowledge base. Include:
46
+ role: 'user',
47
+ content: [
48
+ {
49
+ type: 'image',
50
+ source: {
51
+ type: 'base64',
52
+ media_type: mediaType,
53
+ data: base64,
54
+ },
55
+ },
56
+ {
57
+ type: 'text',
58
+ text: `Describe this image in detail for a knowledge base. Include:
43
59
  1. What the image shows (objects, people, text, diagrams, charts)
44
60
  2. Key information or data visible
45
- 3. Any text content (OCR)
61
+ 3. Any text content (OCR — extract ALL visible text verbatim)
46
62
  4. Context and significance
47
63
 
48
64
  Be thorough but concise. This description will represent the image in a markdown wiki where agents need to understand its content without seeing it directly.`,
65
+ },
66
+ ],
49
67
  },
50
68
  ],
51
- },
52
- ],
53
- });
54
- const description = response.content
55
- .filter((block) => block.type === 'text')
56
- .map((block) => block.text)
57
- .join('');
58
- return {
59
- title,
60
- description,
61
- markdown: buildMarkdown(title, filePath, description),
62
- sourcePath: filePath,
63
- };
69
+ });
70
+ return response.content
71
+ .filter((block) => block.type === 'text')
72
+ .map((block) => block.text)
73
+ .join('');
74
+ }
75
+ catch (err) {
76
+ const isRetryable = err instanceof Error &&
77
+ (err.message.includes('rate_limit') ||
78
+ err.message.includes('overloaded') ||
79
+ err.message.includes('529') ||
80
+ err.message.includes('timeout'));
81
+ if (isRetryable && attempt === 0) {
82
+ // Wait 2s and retry once
83
+ await new Promise((r) => setTimeout(r, 2000));
84
+ continue;
85
+ }
86
+ // Non-retryable or second failure — return fallback description
87
+ const sizeKB = Math.round(Buffer.from(base64, 'base64').length / 1024);
88
+ return `[Image — Claude Vision analysis failed: ${err instanceof Error ? err.message : 'unknown error'}]\n\n_File size: ${sizeKB} KB. Set ANTHROPIC_API_KEY and ensure API access to enable image description._`;
89
+ }
90
+ }
91
+ return '[Image — description unavailable]';
64
92
  }
65
93
  function buildMarkdown(title, filePath, description) {
66
94
  return `# ${title}
@@ -1 +1 @@
1
- {"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;QAC5C,KAAK,EAAE,0BAA0B;QACjC,UAAU,EAAE,IAAI;QAChB,QAAQ,EAAE;YACR;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,OAAO;wBACb,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE;qBAChE;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE;;;;;;8JAM4I;qBACnJ;iBACF;aACF;SACF;KACF,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO;SACjC,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;SACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;SAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;IAEZ,OAAO;QACL,KAAK;QACL,WAAW;QACX,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC;QACrD,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,WAAmB;IACzE,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;mBAE7B,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;IAErD,KAAK,KAAK,QAAQ;;;;EAIpB,WAAW;CACZ,CAAC;AACF,CAAC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB;YACE,OAAO,YAAY,CAAC;IACxB,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAExE,OAAO;QACL,KAAK;QACL,WAAW;QACX,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC;QACrD,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,MAAc,EAAE,MAAc,EAAE,SAAiB;IACjF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,uDAAuD;IACvD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC5C,KAAK,EAAE,0BAA0B;gBACjC,UAAU,EAAE,IAAI;gBAChB,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,OAAO;gCACb,MAAM,EAAE;oCACN,IAAI,EAAE,QAAQ;oCACd,UAAU,EAAE,SAAoE;oCAChF,IAAI,EAAE,MAAM;iCACb;6BACF;4BACD;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE;;;;;;8JAMwI;6BAC/I;yBACF;qBACF;iBACF;aACF,CAAC,CAAC;YAEH,OAAO,QAAQ,CAAC,OAAO;iBACpB,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;iBACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;iBAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;QACd,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,WAAW,GACf,GAAG,YAAY,KAAK;gBACpB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBACjC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBAClC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;oBAC3B,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;YAErC,IAAI,WAAW,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;gBACjC,yBAAyB;gBACzB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC9C,SAAS;YACX,CAAC;YAED,gEAAgE;YAChE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;YACvE,OAAO,2CAA2C,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,oBAAoB,MAAM,gFAAgF,CAAC;QACnN,CAAC;IACH,CAAC;IAED,OAAO,mCAAmC,CAAC;AAC7C,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,WAAmB;IACzE,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;mBAE7B,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;IAErD,KAAK,KAAK,QAAQ;;;;EAIpB,WAAW;CACZ,CAAC;AACF,CAAC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB;YACE,OAAO,YAAY,CAAC;IACxB,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAyBrE"}
1
+ {"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CA6BrE"}
@@ -7,7 +7,11 @@ export async function processPdf(filePath) {
7
7
  const title = basename(filePath, '.pdf');
8
8
  const buffer = readFileSync(filePath);
9
9
  try {
10
- const pdfParseModule = await import('pdf-parse');
10
+ // Import from lib/ directly to avoid pdf-parse's index.js self-test bug
11
+ // (index.js tries to open ./test/data/05-versions-space.pdf on import)
12
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
13
+ // @ts-expect-error — pdf-parse/lib has no type declarations
14
+ const pdfParseModule = await import('pdf-parse/lib/pdf-parse.js');
11
15
  const pdfParse = (pdfParseModule.default ?? pdfParseModule);
12
16
  const data = await pdfParse(buffer);
13
17
  const content = data.text.trim();
@@ -1 +1 @@
1
- {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,MAAM,cAAc,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;QACjD,MAAM,QAAQ,GAAG,CAAC,cAAc,CAAC,OAAO,IAAI,cAAc,CAAgG,CAAC;QAC3J,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEjC,OAAO;YACL,KAAK;YACL,OAAO;YACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC;YAChE,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,SAAS,KAAK,4BAA4B;YACnD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,0DAA0D,CAAC;YACpG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,SAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;iBAC/B,SAAS,CAAC,CAAC,CAAC,kBAAkB,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC5C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
1
+ {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,wEAAwE;QACxE,uEAAuE;QACvE,6DAA6D;QAC7D,4DAA4D;QAC5D,MAAM,cAAc,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,CAAC,cAAc,CAAC,OAAO,IAAI,cAAc,CAAgG,CAAC;QAC3J,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEjC,OAAO;YACL,KAAK;YACL,OAAO;YACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC;YAChE,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,SAAS,KAAK,4BAA4B;YACnD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,0DAA0D,CAAC;YACpG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,SAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;iBAC/B,SAAS,CAAC,CAAC,CAAC,kBAAkB,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC5C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
@@ -1,6 +1,8 @@
1
1
  /**
2
2
  * PowerPoint (.pptx) processor.
3
- * Extracts slide text and speaker notes from raw XML (no external deps).
3
+ * Extracts slides via zip (adm-zip) + XML parsing.
4
+ * Features: slide titles, body text with bullet/numbered list structure,
5
+ * speaker notes, image alt-text, proper paragraph grouping.
4
6
  */
5
7
  export interface PptxResult {
6
8
  title: string;
@@ -1 +1 @@
1
- {"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAQD,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAsBvE"}
1
+ {"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAiBD,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAgCvE"}