clementine-agent 1.0.69 → 1.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,18 +13,24 @@ export async function* parsePdf(filePath) {
13
13
  try {
14
14
  buf = readFileSync(filePath);
15
15
  }
16
- catch {
17
- return;
16
+ catch (err) {
17
+ throw new Error(`Failed to read PDF ${path.basename(filePath)}: ${err instanceof Error ? err.message : String(err)}`);
18
18
  }
19
19
  let result;
20
20
  try {
21
21
  result = await pdfParse(buf);
22
22
  }
23
- catch {
24
- return;
23
+ catch (err) {
24
+ const msg = err instanceof Error ? err.message : String(err);
25
+ const hint = /password/i.test(msg) ? ' (looks password-protected)' : '';
26
+ throw new Error(`Failed to parse PDF ${path.basename(filePath)}${hint}: ${msg}`);
25
27
  }
26
28
  const hint = path.basename(filePath, path.extname(filePath));
27
29
  const pages = splitPages(result.text);
30
+ const hasAnyText = pages.some((p) => p.trim().length > 0);
31
+ if (!hasAnyText) {
32
+ throw new Error(`PDF ${path.basename(filePath)} has no extractable text — likely image-only (OCR is not supported). Re-export with a text layer or transcribe it first.`);
33
+ }
28
34
  for (let i = 0; i < pages.length; i++) {
29
35
  const pageText = pages[i].trim();
30
36
  if (!pageText)
@@ -75,32 +75,41 @@ export async function runIngestion(opts) {
75
75
  const samples = [];
76
76
  const pendingStructured = [];
77
77
  for (const iter of recordIterators) {
78
- for await (const record of iter) {
79
- recordsIn += 1;
80
- if (opts.limit && recordsIn > opts.limit)
81
- break;
82
- const flowPath = classifyRecord(record, intelligenceMode);
83
- if (flowPath === 'structured') {
84
- if (!schemaMapping && samples.length < SAMPLE_SIZE) {
85
- samples.push(record);
86
- pendingStructured.push(record);
87
- continue;
88
- }
89
- if (!schemaMapping && samples.length >= SAMPLE_SIZE) {
90
- schemaMapping = await inferSchema(samples, source.slug);
91
- await applyStructuredColumns(schemaMapping);
92
- for (const s of pendingStructured) {
93
- await processStructured(s, schemaMapping, source, opts, store, report, plannedRecords, errors, writtenSummaries, counters());
78
+ // Wrap so one unreadable file doesn't abort an otherwise-good folder
79
+ // ingest — record the adapter error and move on to the next iterator.
80
+ try {
81
+ for await (const record of iter) {
82
+ recordsIn += 1;
83
+ if (opts.limit && recordsIn > opts.limit)
84
+ break;
85
+ const flowPath = classifyRecord(record, intelligenceMode);
86
+ if (flowPath === 'structured') {
87
+ if (!schemaMapping && samples.length < SAMPLE_SIZE) {
88
+ samples.push(record);
89
+ pendingStructured.push(record);
90
+ continue;
91
+ }
92
+ if (!schemaMapping && samples.length >= SAMPLE_SIZE) {
93
+ schemaMapping = await inferSchema(samples, source.slug);
94
+ await applyStructuredColumns(schemaMapping);
95
+ for (const s of pendingStructured) {
96
+ await processStructured(s, schemaMapping, source, opts, store, report, plannedRecords, errors, writtenSummaries, counters());
97
+ }
98
+ pendingStructured.length = 0;
99
+ }
100
+ if (schemaMapping) {
101
+ await processStructured(record, schemaMapping, source, opts, store, report, plannedRecords, errors, writtenSummaries, counters());
94
102
  }
95
- pendingStructured.length = 0;
96
103
  }
97
- if (schemaMapping) {
98
- await processStructured(record, schemaMapping, source, opts, store, report, plannedRecords, errors, writtenSummaries, counters());
104
+ else {
105
+ await processFreeForm(record, source, opts, store, report, plannedRecords, errors, writtenSummaries, counters());
99
106
  }
100
107
  }
101
- else {
102
- await processFreeForm(record, source, opts, store, report, plannedRecords, errors, writtenSummaries, counters());
103
- }
108
+ }
109
+ catch (err) {
110
+ const msg = err instanceof Error ? err.message : String(err);
111
+ errors.push({ error: msg });
112
+ report('parsing', msg);
104
113
  }
105
114
  }
106
115
  // Flush structured records that never reached the schema-infer threshold
@@ -207,11 +216,22 @@ async function processStructured(record, mapping, source, opts, store, _report,
207
216
  }
208
217
  async function processFreeForm(record, source, opts, store, report, planned, _errors, _writtenSummaries, counters) {
209
218
  try {
210
- report('distilling');
211
219
  const chunks = chunkContent(record.content, 3000);
212
- const distillations = [];
213
- for (const chunk of chunks) {
214
- distillations.push(await distillChunk(chunk, record.metadata ?? {}));
220
+ report('distilling', chunks.length > 1 ? `chunk 0/${chunks.length}` : undefined);
221
+ // Parallelize per-chunk Haiku calls in small batches — one chunk at a
222
+ // time on a 30KB PDF adds up to 60–90s; 5-way concurrency cuts it to
223
+ // ~15s without pushing the API's rate limits.
224
+ const CONCURRENCY = 5;
225
+ const distillations = new Array(chunks.length);
226
+ let completed = 0;
227
+ for (let i = 0; i < chunks.length; i += CONCURRENCY) {
228
+ const batch = chunks.slice(i, i + CONCURRENCY);
229
+ const results = await Promise.all(batch.map((chunk) => distillChunk(chunk, record.metadata ?? {})));
230
+ for (let j = 0; j < results.length; j++)
231
+ distillations[i + j] = results[j];
232
+ completed += results.length;
233
+ if (chunks.length > 1)
234
+ report('distilling', `chunk ${completed}/${chunks.length}`);
215
235
  }
216
236
  const targetFolder = sanitizeFolder(source.targetFolder || `04-Ingest/${source.slug}`, source.slug);
217
237
  const partial = combineDistillations(record, distillations, source.slug, targetFolder);
@@ -2483,6 +2483,8 @@ export async function cmdDashboard(opts) {
2483
2483
  title: r.title, tags: r.tags, targetRelPath: r.targetRelPath,
2484
2484
  body: (r.body || '').slice(0, 800),
2485
2485
  })),
2486
+ recordsIn: result.recordsIn,
2487
+ errors: result.errors.slice(0, 10),
2486
2488
  });
2487
2489
  }
2488
2490
  catch (err) {
@@ -10210,14 +10212,25 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
10210
10212
 
10211
10213
  const manifest = manifestData.manifest;
10212
10214
  const preview = finalData.preview || [];
10215
+ const errorsList = finalData.errors || [];
10213
10216
  const manifestRows = Object.entries(manifest.formats || {})
10214
10217
  .map(([fmt, n]) => '<tr><td>' + escapeHtml(fmt) + '</td><td>' + n + '</td></tr>').join('');
10218
+ const warnBlock = errorsList.length
10219
+ ? '<div style="margin-top:10px;padding:10px;background:#fff3cd;border:1px solid #f0c36d;border-radius:6px;color:#8a5a00;font-size:13px">' +
10220
+ '<div style="font-weight:600;margin-bottom:4px">' + errorsList.length + ' file(s) could not be ingested</div>' +
10221
+ errorsList.map((e) => '<div style="font-family:monospace;font-size:12px">• ' + escapeHtml(e.error) + '</div>').join('') +
10222
+ '</div>'
10223
+ : '';
10224
+ const emptyNote = (preview.length === 0 && !errorsList.length)
10225
+ ? '<div style="margin-top:10px;padding:10px;background:#fff3cd;border:1px solid #f0c36d;border-radius:6px;color:#8a5a00;font-size:13px">No records extracted. The file may be empty or in an unsupported format.</div>'
10226
+ : '';
10215
10227
  manifestEl.innerHTML =
10216
10228
  '<div class="card" style="padding:12px"><div style="font-weight:600;margin-bottom:8px">Manifest</div>' +
10217
10229
  '<div style="color:var(--muted);font-size:13px;margin-bottom:8px">' +
10218
10230
  manifest.totalFiles + ' file(s), ' + brainHumanBytes(manifest.totalBytes) +
10219
10231
  ' · scanned in ' + Math.floor((Date.now() - progress.startedAt) / 1000) + 's</div>' +
10220
- '<table class="data-table"><thead><tr><th>Format</th><th>Count</th></tr></thead><tbody>' + manifestRows + '</tbody></table></div>';
10232
+ '<table class="data-table"><thead><tr><th>Format</th><th>Count</th></tr></thead><tbody>' + manifestRows + '</tbody></table>' +
10233
+ warnBlock + emptyNote + '</div>';
10221
10234
  if (preview.length) {
10222
10235
  const previewHtml = preview.slice(0, 10).map((p, i) =>
10223
10236
  '<div class="card" style="padding:12px;margin-bottom:8px">' +
@@ -10280,14 +10293,27 @@ if('serviceWorker' in navigator){navigator.serviceWorker.getRegistrations().then
10280
10293
  return;
10281
10294
  }
10282
10295
  const elapsed = Math.floor((Date.now() - progress.startedAt) / 1000);
10296
+ const errList = finalData.errors || [];
10297
+ const headerColor = (finalData.recordsWritten > 0) ? '#4ade80' : '#e5a84a';
10298
+ const headerIcon = (finalData.recordsWritten > 0) ? '✓' : '⚠';
10299
+ const headerText = (finalData.recordsWritten > 0)
10300
+ ? 'Ingestion complete'
10301
+ : 'Ingestion finished, but nothing was written';
10302
+ const errBlock = errList.length
10303
+ ? '<div style="margin-top:10px;padding:10px;background:#fff3cd;border:1px solid #f0c36d;border-radius:6px;color:#8a5a00;font-size:13px">' +
10304
+ '<div style="font-weight:600;margin-bottom:4px">' + errList.length + ' error(s)</div>' +
10305
+ errList.map((e) => '<div style="font-family:monospace;font-size:12px">• ' + escapeHtml(e.error) + '</div>').join('') +
10306
+ '</div>'
10307
+ : '';
10283
10308
  progEl.innerHTML =
10284
10309
  '<div class="card" style="padding:12px">' +
10285
- '<div style="font-weight:600;color:#4ade80">✓ Ingestion complete · ' + elapsed + 's</div>' +
10310
+ '<div style="font-weight:600;color:' + headerColor + '">' + headerIcon + ' ' + headerText + ' · ' + elapsed + 's</div>' +
10286
10311
  '<div>Records in: ' + finalData.recordsIn + '</div>' +
10287
10312
  '<div>Records written: ' + finalData.recordsWritten + '</div>' +
10288
10313
  '<div>Records skipped: ' + finalData.recordsSkipped + '</div>' +
10289
10314
  '<div>Records failed: ' + finalData.recordsFailed + '</div>' +
10290
10315
  (finalData.overviewNotePath ? '<div style="margin-top:8px">Overview note: <code>' + escapeHtml(finalData.overviewNotePath) + '</code></div>' : '') +
10316
+ errBlock +
10291
10317
  '</div>';
10292
10318
  }
10293
10319
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.69",
3
+ "version": "1.0.70",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",