anymd 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "anymd",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "description": "Convert any document (PDF, DOC, DOCX) to clean Markdown for RAG",
5
5
  "keywords": [
6
6
  "markdown",
package/src/bootstrap.ts CHANGED
@@ -9,8 +9,8 @@ interface BootstrapCallbacks {
9
9
  onStep: (message: string) => void
10
10
  }
11
11
 
12
- const REQUIRED_PACKAGES = ['marker', 'markitdown', 'mlx_vlm', 'pypdfium2']
13
- const PIP_PACKAGES = ['marker-pdf', 'markitdown', 'mlx-vlm', 'pypdfium2']
12
+ const REQUIRED_PACKAGES = ['marker', 'markitdown', 'mammoth', 'mlx_vlm', 'pypdfium2', 'torchvision']
13
+ const PIP_PACKAGES = ['marker-pdf', 'markitdown[docx]', 'mlx-vlm', 'pypdfium2', 'torchvision']
14
14
  const CHANDRA_MODEL_ID = 'mlx-community/chandra-8bit'
15
15
 
16
16
  const checkImportable = async (py: string, pkg: string): Promise<boolean> => {
package/src/tui-data.ts CHANGED
@@ -6,6 +6,8 @@ import pMap from 'p-map'
6
6
  import { loadConfig } from '~/config'
7
7
  import { getPaths } from '~/paths'
8
8
 
9
+ const stripAnsi = (s: string): string => s.replaceAll(new RegExp(`${String.fromCodePoint(0x1b)}\\[[0-9;]*m`, 'gu'), '')
10
+
9
11
  interface Classification {
10
12
  errors: number
11
13
  files: { mixed: string[]; native: string[]; scanned: string[] }
@@ -223,7 +225,7 @@ const runClassify = async (onProgress: (p: ClassifyProgress) => void): Promise<C
223
225
  total: results.length
224
226
  }
225
227
 
226
- mkdirSync(getPaths().dataDir, { recursive: true })
228
+ mkdirSync(getPaths().outputDir, { recursive: true })
227
229
  await writeFile(getPaths().classification, `${JSON.stringify(classification, null, 2)}\n`)
228
230
  return classification
229
231
  }
@@ -540,7 +542,7 @@ const fetchStepData = async (): Promise<AllStepsData> => {
540
542
 
541
543
  const appendPipelineLog = async (line: string): Promise<void> => {
542
544
  try {
543
- await appendFile(getPaths().pipelineLog, `${line}\n`)
545
+ await appendFile(getPaths().pipelineLog, `${stripAnsi(line)}\n`)
544
546
  } catch {
545
547
  /* Empty */
546
548
  }
@@ -557,7 +559,7 @@ const clearPipelineLog = async (): Promise<void> => {
557
559
  const appendErrorLog = async (step: string, message: string): Promise<void> => {
558
560
  try {
559
561
  const ts = new Date().toISOString()
560
- await appendFile(getPaths().errorsLog, `[${ts}] [${step}] ${message}\n`)
562
+ await appendFile(getPaths().errorsLog, `[${ts}] [${step}] ${stripAnsi(message)}\n`)
561
563
  } catch {
562
564
  /* Empty */
563
565
  }
package/tui.tsx CHANGED
@@ -25,7 +25,10 @@ import {
25
25
  writeNativeFileList
26
26
  } from '~/tui-data'
27
27
 
28
+ const stripAnsi = (s: string): string => s.replaceAll(new RegExp(`${String.fromCodePoint(0x1b)}\\[[0-9;]*m`, 'gu'), '')
29
+
28
30
  const DIM = '#888888'
31
+ const SIDEBAR_WIDTH = 38
29
32
 
30
33
  const setTerminalTitle = (title: string): void => {
31
34
  process.stdout.write(`\u001B]0;${title}\u0007`)
@@ -109,7 +112,7 @@ const initialState: AppState = {
109
112
  const reducer = (state: AppState, action: Action): AppState => {
110
113
  switch (action.type) {
111
114
  case 'APPEND_OUTPUT':
112
- return { ...state, runningLines: [...state.runningLines.slice(-MAX_OUTPUT_LINES), action.line] }
115
+ return { ...state, runningLines: [...state.runningLines.slice(-MAX_OUTPUT_LINES), stripAnsi(action.line)] }
113
116
  case 'CLEAR_FAILURE':
114
117
  return { ...state, backgroundOcr: false, failed: false, runningCommand: null }
115
118
  case 'COMMAND_DONE': {
@@ -136,7 +139,7 @@ const reducer = (state: AppState, action: Action): AppState => {
136
139
  case 'SET_DATASET_RESULT':
137
140
  return { ...state, datasetResult: action.result }
138
141
  case 'SET_LOG':
139
- return { ...state, logLines: action.lines }
142
+ return { ...state, logLines: action.lines.map(stripAnsi) }
140
143
  case 'SET_PREFLIGHT':
141
144
  return { ...state, preflightErrors: action.errors, preflightWarnings: action.warnings }
142
145
  case 'SET_RUNNING_STATUS':
@@ -195,10 +198,10 @@ const SpinnerDots = ({ fg }: { fg?: string }) => {
195
198
  return <text fg={fg ?? 'yellow'}>{SPINNER_FRAMES[frame]} </text>
196
199
  }
197
200
 
198
- const PROGRESS_FULL = '\u2588'
199
- const PROGRESS_EMPTY = '\u2591'
201
+ const PROGRESS_FULL = ''
202
+ const PROGRESS_EMPTY = ''
200
203
 
201
- const ProgressBarSimple = ({ value, width = 20 }: { value: number; width?: number }) => {
204
+ const ProgressBarSimple = ({ value, width = 12 }: { value: number; width?: number }) => {
202
205
  const filled = Math.round((value / 100) * width)
203
206
  const empty = width - filled
204
207
  return (
@@ -227,20 +230,19 @@ const ElapsedTimer = ({ startedAt }: { startedAt: number }) => {
227
230
  }
228
231
 
229
232
  const RecentFiles = ({ files }: { files: { duration: number; name: string; pages: number; per_page: number }[] }) => {
230
- const display = files.slice(-5)
233
+ const display = files.slice(-3)
231
234
  return (
232
235
  <box flexDirection='column'>
233
236
  <text fg={DIM}>
234
- <b>─── recent ───</b>
237
+ <b>── recent ──</b>
235
238
  </text>
236
239
  {display.map((f, i) => (
237
240
  // eslint-disable-next-line react/no-array-index-key
238
241
  <text key={i}>
239
- <span fg='green'>✓ </span>
240
- <span fg='cyan'>{f.name.slice(0, 30).padEnd(30)}</span>
241
- <span> {f.pages.toString().padStart(3)}p </span>
242
- <span fg='yellow'>{formatDuration(f.duration).padStart(7)}</span>
243
- <span fg={DIM}> ({formatDuration(f.per_page)}/p)</span>
242
+ <span fg='green'>✓</span>
243
+ <span fg='cyan'> {f.name.slice(0, 18)}</span>
244
+ <span> {f.pages.toString().padStart(2)}p</span>
245
+ <span fg='yellow'> {formatDuration(f.duration)}</span>
244
246
  </text>
245
247
  ))}
246
248
  </box>
@@ -265,41 +267,41 @@ const OcrLiveInfo = ({ progress }: { progress: OcrProgress }) => {
265
267
  }, [progress.current_file, progress.current_file_started])
266
268
 
267
269
  return (
268
- <box flexDirection='column' paddingLeft={6}>
270
+ <box flexDirection='column' paddingLeft={2}>
269
271
  {progress.current_file === '-' ? null : (
270
272
  <text>
271
- <span fg={DIM}>Current: </span>
272
- <b fg='cyan'>{progress.current_file}</b>
273
- {progress.current_pages_total ? <span fg={DIM}> ({progress.current_pages_total}p)</span> : null}
274
- <span fg={DIM}> page </span>
275
- <span>{progress.current_page}</span>
276
- <span fg={DIM}>{' '}elapsed: </span>
277
- <span fg='yellow'>{formatDuration(elapsed)}</span>
273
+ <span fg={DIM}>Now: </span>
274
+ <b fg='cyan'>{progress.current_file.slice(0, 20)}</b>
278
275
  </text>
279
276
  )}
280
- <box gap={2}>
281
- <text>
282
- <span fg={DIM}>ETA: </span>
283
- <span fg='yellow'>{progress.eta}</span>
284
- </text>
285
- <text>
286
- <span fg={DIM}>avg: </span>
287
- <span>{progress.avg_per_file}/file</span>
288
- </text>
277
+ {progress.current_file === '-' ? null : (
289
278
  <text>
290
- <span fg={DIM}>errors: </span>
291
- <span fg={progress.errors > 0 ? 'red' : 'green'}>{progress.errors}</span>
279
+ <span fg={DIM}>p</span>
280
+ <span>{progress.current_page}</span>
281
+ {progress.current_pages_total ? <span fg={DIM}>/{progress.current_pages_total}</span> : null}
282
+ <span fg={DIM}> </span>
283
+ <span fg='yellow'>{formatDuration(elapsed)}</span>
292
284
  </text>
293
- </box>
285
+ )}
286
+ <text>
287
+ <span fg={DIM}>ETA </span>
288
+ <span fg='yellow'>{progress.eta}</span>
289
+ <span fg={DIM}> avg </span>
290
+ <span>{progress.avg_per_file}</span>
291
+ </text>
292
+ <text>
293
+ <span fg={DIM}>err </span>
294
+ <span fg={progress.errors > 0 ? 'red' : 'green'}>{progress.errors}</span>
295
+ </text>
294
296
  {progress.recent_files?.length ? <RecentFiles files={progress.recent_files} /> : null}
295
297
  </box>
296
298
  )
297
299
  }
298
300
 
299
301
  const getStepIcon = (isDone: boolean, isFailed: boolean): string => {
300
- if (isFailed) return '\u2717'
301
- if (isDone) return '\u2713'
302
- return ' '
302
+ if (isFailed) return ''
303
+ if (isDone) return ''
304
+ return '·'
303
305
  }
304
306
 
305
307
  const getStepColor = (isRunning: boolean, isDone: boolean, isFailed: boolean): string => {
@@ -328,100 +330,7 @@ const StepIcon = ({
328
330
  )
329
331
  }
330
332
 
331
- const StepHeader = ({
332
- color,
333
- completedDuration,
334
- done,
335
- failedCount,
336
- isActive,
337
- isDone,
338
- isRunning,
339
- pct,
340
- step,
341
- stepStartedAt,
342
- total
343
- }: {
344
- color: string
345
- completedDuration?: number
346
- done: number
347
- failedCount?: number
348
- isActive: boolean
349
- isDone: boolean
350
- isRunning: boolean
351
- pct: number
352
- step: StepConfig
353
- stepStartedAt: number
354
- total: number
355
- }) => {
356
- const fgColor = isActive ? (color === 'gray' ? undefined : color) : DIM
357
- return (
358
- <box gap={1}>
359
- <box width={3}>
360
- <StepIcon color={color} isDone={isDone} isFailed={false} isRunning={isRunning} />
361
- </box>
362
- <text fg={color}>
363
- <b>[{step.stepNum}]</b>
364
- </text>
365
- <text fg={fgColor}>{isActive ? <b>{step.name}</b> : step.name}</text>
366
- {isRunning ? <ElapsedTimer startedAt={stepStartedAt} /> : null}
367
- {isDone && !isRunning && completedDuration !== undefined ? (
368
- <text fg={DIM}>{formatDuration(completedDuration)}</text>
369
- ) : null}
370
- <box width={20}>
371
- <ProgressBarSimple value={pct} />
372
- </box>
373
- <text>
374
- <span fg='green'>{done}</span>
375
- <span fg={DIM}>
376
- /{total > 0 ? total : '?'} {step.unit}
377
- </span>
378
- {failedCount && failedCount > 0 ? <span fg='red'> ({failedCount} failed)</span> : null}
379
- </text>
380
- </box>
381
- )
382
- }
383
-
384
- const StepDetails = ({
385
- isRunning,
386
- ocrProgress,
387
- requires,
388
- runningStatus,
389
- showOcr,
390
- texts
391
- }: {
392
- isRunning: boolean
393
- ocrProgress?: null | OcrProgress
394
- requires?: string
395
- runningStatus: string
396
- showOcr: boolean
397
- texts: string[]
398
- }) => (
399
- <>
400
- {texts.length > 0 ? (
401
- <box paddingLeft={6}>
402
- {texts.map((d, i) => (
403
- // eslint-disable-next-line react/no-array-index-key
404
- <text fg={DIM} key={i}>
405
- {d}
406
- </text>
407
- ))}
408
- </box>
409
- ) : null}
410
- {isRunning && runningStatus !== '' ? (
411
- <box paddingLeft={6}>
412
- <text fg='cyan'>{runningStatus}</text>
413
- </box>
414
- ) : null}
415
- {requires ? (
416
- <box paddingLeft={6}>
417
- <text fg='yellow'>{`\u26A0 Requires: ${requires}`}</text>
418
- </box>
419
- ) : null}
420
- {showOcr && ocrProgress ? <OcrLiveInfo progress={ocrProgress} /> : null}
421
- </>
422
- )
423
-
424
- const StepCard = ({
333
+ const SidebarStep = ({
425
334
  completedDuration,
426
335
  failedCount,
427
336
  isFailed,
@@ -441,6 +350,7 @@ const StepCard = ({
441
350
  step: StepConfig
442
351
  stepData: StepData | undefined
443
352
  stepStartedAt: number
353
+ // eslint-disable-next-line complexity, max-statements
444
354
  }) => {
445
355
  const done = stepData?.done ?? 0
446
356
  const total = stepData?.total ?? 0
@@ -452,67 +362,52 @@ const StepCard = ({
452
362
  const showOcr = isRunning && step.command === 'ocr'
453
363
 
454
364
  return (
455
- <box flexDirection='column' paddingLeft={1}>
456
- <StepHeader
457
- color={color}
458
- completedDuration={completedDuration}
459
- done={done}
460
- failedCount={failedCount}
461
- isActive={isActive}
462
- isDone={isDone}
463
- isRunning={isRunning}
464
- pct={pct}
465
- step={step}
466
- stepStartedAt={stepStartedAt}
467
- total={total}
468
- />
469
- <StepDetails
470
- isRunning={isRunning}
471
- ocrProgress={ocrProgress}
472
- requires={showRequires ? stepData?.requires : undefined}
473
- runningStatus={runningStatus}
474
- showOcr={showOcr}
475
- texts={stepData?.details ?? []}
476
- />
365
+ <box flexDirection='column'>
366
+ <box>
367
+ <box width={2}>
368
+ <StepIcon color={color} isDone={isDone} isFailed={false} isRunning={isRunning} />
369
+ </box>
370
+ <text fg={isActive ? color : DIM}>{step.name}</text>
371
+ {isRunning ? <ElapsedTimer startedAt={stepStartedAt} /> : null}
372
+ {isDone && !isRunning && completedDuration !== undefined ? (
373
+ <text fg={DIM}> {formatDuration(completedDuration)}</text>
374
+ ) : null}
375
+ </box>
376
+ <box paddingLeft={2}>
377
+ <ProgressBarSimple value={pct} />
378
+ <text>
379
+ <span fg='green'> {done}</span>
380
+ <span fg={DIM}>/{total > 0 ? total : '?'}</span>
381
+ {failedCount && failedCount > 0 ? <span fg='red'> {failedCount}✗</span> : null}
382
+ </text>
383
+ </box>
384
+ {stepData?.details?.length ? (
385
+ <box flexDirection='column' paddingLeft={2}>
386
+ {stepData.details.map((d, i) => (
387
+ // eslint-disable-next-line react/no-array-index-key
388
+ <text fg={DIM} key={i}>
389
+ {d}
390
+ </text>
391
+ ))}
392
+ </box>
393
+ ) : null}
394
+ {isRunning && runningStatus !== '' ? (
395
+ <box paddingLeft={2}>
396
+ <text fg='cyan'>{runningStatus}</text>
397
+ </box>
398
+ ) : null}
399
+ {showRequires ? (
400
+ <box paddingLeft={2}>
401
+ <text fg='yellow'>⚠ {stepData?.requires}</text>
402
+ </box>
403
+ ) : null}
404
+ {showOcr && ocrProgress ? <OcrLiveInfo progress={ocrProgress} /> : null}
477
405
  </box>
478
406
  )
479
407
  }
480
408
 
481
409
  const ERROR_PATTERN = /\b(?:ERROR|Error:|Failed:|failed|FAILED|\u2716|exception|traceback)/iu
482
410
 
483
- const OutputBox = ({ lines, status }: { lines: string[]; status: string }) => {
484
- if (lines.length === 0 && status === '') return null
485
- return (
486
- <box
487
- border
488
- borderColor='gray'
489
- borderStyle='rounded'
490
- flexDirection='column'
491
- marginTop={1}
492
- paddingLeft={1}
493
- paddingRight={1}>
494
- <text fg={DIM}>
495
- <b>Output</b>
496
- </text>
497
- <box flexDirection='column'>
498
- {lines.length > 0 ? (
499
- lines.map((line, i) => {
500
- const isError = ERROR_PATTERN.test(line)
501
- return (
502
- // eslint-disable-next-line react/no-array-index-key
503
- <text fg={isError ? 'red' : DIM} key={i}>
504
- {line}
505
- </text>
506
- )
507
- })
508
- ) : (
509
- <text fg='cyan'>{status}</text>
510
- )}
511
- </box>
512
- </box>
513
- )
514
- }
515
-
516
411
  const PreflightBanner = ({ errors, warnings }: { errors: string[]; warnings: string[] }) => {
517
412
  if (errors.length === 0 && warnings.length === 0) return null
518
413
  return (
@@ -555,98 +450,20 @@ const PreflightBanner = ({ errors, warnings }: { errors: string[]; warnings: str
555
450
  )
556
451
  }
557
452
 
558
- const LogOverlay = ({ lines }: { lines: string[] }) => {
559
- const { height } = useTerminalDimensions()
560
- const logHeight = Math.max(5, height - 18)
561
-
562
- return (
563
- <box
564
- border
565
- borderColor='magenta'
566
- borderStyle='rounded'
567
- flexDirection='column'
568
- marginTop={1}
569
- paddingLeft={1}
570
- paddingRight={1}>
571
- <box justifyContent='space-between'>
572
- <text fg='magenta'>
573
- <b>Log</b>
574
- </text>
575
- <text fg={DIM}>L/ESC close · ↑↓ scroll</text>
576
- </box>
577
- <scrollbox focused height={logHeight} marginTop={1} stickyScroll>
578
- {lines.length > 0 ? (
579
- lines.map((line, i) => (
580
- // eslint-disable-next-line react/no-array-index-key
581
- <text fg={DIM} key={i}>
582
- {line}
583
- </text>
584
- ))
585
- ) : (
586
- <text fg={DIM}>No log entries yet</text>
587
- )}
588
- </scrollbox>
589
- </box>
590
- )
591
- }
592
-
593
- const TitleBar = ({ allDone, failed }: { allDone: boolean; failed: boolean }) => (
594
- <box justifyContent='space-between' paddingLeft={1} paddingRight={1}>
595
- <text fg='cyan'>
596
- <b>Document Pipeline</b>
597
- </text>
598
- <box gap={2}>
599
- {failed ? (
600
- <>
601
- <text>
602
- <b fg='yellow'>[R]</b>
603
- <span fg={DIM}> Retry</span>
604
- </text>
605
- <text>
606
- <b fg='yellow'>[S]</b>
607
- <span fg={DIM}> Skip</span>
608
- </text>
609
- </>
610
- ) : null}
611
- {allDone ? (
612
- <text fg='green'>
613
- <b>✓ Pipeline complete</b>
614
- </text>
615
- ) : null}
616
- <text>
617
- <b fg='magenta'>[L]</b>
618
- <span fg={DIM}> Log</span>
619
- </text>
620
- <text>
621
- <b fg='red'>[Q]</b>
622
- <span fg={DIM}> Quit</span>
623
- </text>
624
- </box>
625
- </box>
626
- )
627
-
628
- const formatChars = (chars: number): string => {
629
- if (chars >= 1_000_000) return `${(chars / 1_000_000).toFixed(1)}M`
630
- if (chars >= 1000) return `${(chars / 1000).toFixed(1)}K`
631
- return String(chars)
632
- }
633
-
634
453
  // eslint-disable-next-line max-statements
635
- const PipelineSummary = ({
454
+ const TitleBarTop = ({
636
455
  allDone,
637
456
  backgroundOcr,
638
- datasetResult,
457
+ failed,
639
458
  pipelineStartedAt,
640
459
  runningCommand,
641
- stepDurations,
642
460
  stepsData
643
461
  }: {
644
462
  allDone: boolean
645
463
  backgroundOcr: boolean
646
- datasetResult: DatasetResult | null
464
+ failed: boolean
647
465
  pipelineStartedAt: number
648
466
  runningCommand: CommandKey | null
649
- stepDurations: Partial<Record<CommandKey, number>>
650
467
  stepsData: AllStepsData | null
651
468
  }) => {
652
469
  const [elapsed, setElapsed] = useState(0)
@@ -661,83 +478,148 @@ const PipelineSummary = ({
661
478
  return () => clearInterval(timer)
662
479
  }, [pipelineStartedAt])
663
480
 
664
- if (!stepsData) return null
665
- const elapsedStr = formatDuration(elapsed)
481
+ let statusText = ''
482
+ if (allDone) statusText = '✓ Complete'
483
+ else if (failed) statusText = '✗ Failed'
484
+ else if (runningCommand && stepsData) {
485
+ let currentIdx = 0
486
+ let currentName = ''
487
+ for (const s of STEPS)
488
+ if (s.command === runningCommand) {
489
+ currentIdx = s.stepNum
490
+ currentName = s.name
491
+ break
492
+ }
493
+ if (currentIdx > 0) {
494
+ const parallelLabel = backgroundOcr && runningCommand === 'pipeline' ? '2+3/5 Convert+OCR' : null
495
+ statusText = parallelLabel ?? `${currentIdx}/5 ${currentName}`
496
+ }
497
+ }
498
+
499
+ const statusColor = allDone ? 'green' : failed ? 'red' : 'cyan'
666
500
 
667
- if (allDone)
668
- return (
669
- <box flexDirection='column' paddingLeft={1} paddingRight={1}>
670
- <box justifyContent='space-between'>
671
- <text fg='green'>
672
- <b>✓ All 5 steps complete</b>
501
+ return (
502
+ <box justifyContent='space-between' paddingLeft={1} paddingRight={1}>
503
+ <text fg='#e0a040'>
504
+ <b>anymd</b>
505
+ </text>
506
+ <box gap={2}>
507
+ {statusText === '' ? null : (
508
+ <text fg={statusColor}>
509
+ <b>{statusText}</b>
673
510
  </text>
674
- <text fg={DIM}>total: {elapsedStr}</text>
675
- </box>
676
- {datasetResult ? (
677
- <box gap={3} paddingLeft={2}>
511
+ )}
512
+ {pipelineStartedAt > 0 ? <text fg={DIM}>{formatDuration(elapsed)}</text> : null}
513
+ </box>
514
+ </box>
515
+ )
516
+ }
517
+
518
+ const formatChars = (chars: number): string => {
519
+ if (chars >= 1_000_000) return `${(chars / 1_000_000).toFixed(1)}M`
520
+ if (chars >= 1000) return `${(chars / 1000).toFixed(1)}K`
521
+ return String(chars)
522
+ }
523
+
524
+ const SidebarSummary = ({
525
+ allDone,
526
+ datasetResult,
527
+ stepDurations
528
+ }: {
529
+ allDone: boolean
530
+ datasetResult: DatasetResult | null
531
+ stepDurations: Partial<Record<CommandKey, number>>
532
+ }) => {
533
+ if (!allDone) return null
534
+ return (
535
+ <box flexDirection='column' paddingLeft={1} paddingTop={1}>
536
+ <text fg='green'>
537
+ <b>✓ All 5 steps complete</b>
538
+ </text>
539
+ {datasetResult ? (
540
+ <box flexDirection='column' paddingLeft={1}>
541
+ <text>
542
+ <span fg={DIM}>entries </span>
543
+ <b fg='green'>{datasetResult.entries.toLocaleString()}</b>
544
+ </text>
545
+ <text>
546
+ <span fg={DIM}>chars </span>
547
+ <b>{formatChars(datasetResult.totalChars)}</b>
548
+ </text>
549
+ {datasetResult.skipped > 0 ? (
678
550
  <text>
679
- <span fg={DIM}>entries: </span>
680
- <b fg='green'>{datasetResult.entries.toLocaleString()}</b>
551
+ <span fg={DIM}>skipped </span>
552
+ <span fg='yellow'>{datasetResult.skipped}</span>
681
553
  </text>
554
+ ) : null}
555
+ {datasetResult.duplicates > 0 ? (
682
556
  <text>
683
- <span fg={DIM}>chars: </span>
684
- <b>{formatChars(datasetResult.totalChars)}</b>
557
+ <span fg={DIM}>deduped </span>
558
+ <span fg='yellow'>{datasetResult.duplicates}</span>
685
559
  </text>
686
- {datasetResult.skipped > 0 ? (
687
- <text>
688
- <span fg={DIM}>skipped: </span>
689
- <span fg='yellow'>{datasetResult.skipped}</span>
690
- </text>
691
- ) : null}
692
- {datasetResult.duplicates > 0 ? (
693
- <text>
694
- <span fg={DIM}>deduped: </span>
695
- <span fg='yellow'>{datasetResult.duplicates}</span>
696
- </text>
697
- ) : null}
698
- </box>
699
- ) : null}
700
- <box gap={2} paddingLeft={2}>
701
- {STEPS.map(step => {
702
- const dur = stepDurations[step.command]
703
- if (dur === undefined) return null
704
- return (
705
- <text key={step.command}>
706
- <span fg={DIM}>{step.name}: </span>
707
- <span fg='cyan'>{formatDuration(dur)}</span>
708
- </text>
709
- )
710
- })}
560
+ ) : null}
711
561
  </box>
562
+ ) : null}
563
+ <box flexDirection='column' paddingLeft={1}>
564
+ {STEPS.map(step => {
565
+ const dur = stepDurations[step.command]
566
+ if (dur === undefined) return null
567
+ return (
568
+ <text key={step.command}>
569
+ <span fg={DIM}>{step.name.slice(0, 14).padEnd(14)} </span>
570
+ <span fg='cyan'>{formatDuration(dur)}</span>
571
+ </text>
572
+ )
573
+ })}
712
574
  </box>
713
- )
714
-
715
- let currentIdx = 0
716
- let currentName = ''
717
- for (const s of STEPS)
718
- if (s.command === runningCommand) {
719
- currentIdx = s.stepNum
720
- currentName = s.name
721
- break
722
- }
723
-
724
- if (currentIdx === 0) return null
575
+ </box>
576
+ )
577
+ }
725
578
 
726
- const parallelLabel = backgroundOcr && runningCommand === 'pipeline' ? 'Steps 2+3/5 \u2014 Convert + OCR' : null
579
+ const HelpDialog = ({ height, width }: { height: number; width: number }) => {
580
+ const boxW = 40
581
+ const boxH = 12
582
+ const left = Math.max(0, Math.floor((width - boxW) / 2))
583
+ const top = Math.max(0, Math.floor((height - boxH) / 2))
727
584
 
728
585
  return (
729
- <box justifyContent='space-between' paddingLeft={1} paddingRight={1}>
586
+ <box
587
+ border
588
+ borderColor='#e0a040'
589
+ borderStyle='rounded'
590
+ flexDirection='column'
591
+ height={boxH}
592
+ marginLeft={left}
593
+ marginTop={top}
594
+ paddingLeft={2}
595
+ paddingRight={2}
596
+ width={boxW}>
597
+ <text fg='#e0a040'>
598
+ <b>Keybinds</b>
599
+ </text>
600
+ <text> </text>
730
601
  <text>
731
- {parallelLabel ? (
732
- <b fg='cyan'>{parallelLabel}</b>
733
- ) : (
734
- <>
735
- <b fg='cyan'>Step {currentIdx}/5</b>
736
- <span fg={DIM}> {currentName}</span>
737
- </>
738
- )}
602
+ <b fg='cyan'>Q</b>
603
+ <span fg={DIM}>{' '}Quit</span>
604
+ </text>
605
+ <text>
606
+ <b fg='cyan'>L</b>
607
+ <span fg={DIM}>{' '}Toggle log / output view</span>
608
+ </text>
609
+ <text>
610
+ <b fg='cyan'>R</b>
611
+ <span fg={DIM}>{' '}Retry failed step</span>
612
+ </text>
613
+ <text>
614
+ <b fg='cyan'>S</b>
615
+ <span fg={DIM}>{' '}Skip failed step</span>
616
+ </text>
617
+ <text>
618
+ <b fg='cyan'>?</b>
619
+ <span fg={DIM}>{' '}Toggle this help</span>
739
620
  </text>
740
- <text fg={DIM}>elapsed: {elapsedStr}</text>
621
+ <text> </text>
622
+ <text fg={DIM}>Press ? or Esc to close</text>
741
623
  </box>
742
624
  )
743
625
  }
@@ -757,13 +639,6 @@ const computeTerminalTitle = (s: AppState): string => {
757
639
  return `Doc Pipeline \u2014 ${label}`
758
640
  }
759
641
 
760
- const RunningFooter = () => (
761
- <box justifyContent='space-between' marginTop={1} paddingLeft={1} paddingRight={1}>
762
- <text fg={DIM}>Ctrl+C safe — progress saved, re-run to resume</text>
763
- <text fg={DIM}>↻ auto-refresh 2s</text>
764
- </box>
765
- )
766
-
767
642
  // eslint-disable-next-line max-statements
768
643
  const readStream = async (stream: ReadableStream<Uint8Array>, onLine: (line: string) => void): Promise<void> => {
769
644
  const reader = stream.getReader()
@@ -792,6 +667,8 @@ const App = () => {
792
667
  const ocrProcRef = useRef<null | ReturnType<typeof Bun.spawn>>(null)
793
668
  const busyRef = useRef(false)
794
669
  const errorLogClearedRef = useRef(false)
670
+ const [showHelp, setShowHelp] = useState(false)
671
+ const { height, width } = useTerminalDimensions()
795
672
 
796
673
  useEffect(() => {
797
674
  setTerminalTitle(computeTerminalTitle(state))
@@ -1093,9 +970,13 @@ const App = () => {
1093
970
 
1094
971
  // eslint-disable-next-line complexity, max-statements
1095
972
  useKeyboard(key => {
1096
- if (state.showLog) {
1097
- if (key.name === 'l' || key.name === 'escape') dispatch({ type: 'TOGGLE_LOG' })
973
+ if (showHelp) {
974
+ if (key.name === '?' || key.name === 'escape') setShowHelp(false)
975
+ return
976
+ }
1098
977
 
978
+ if (key.name === '?') {
979
+ setShowHelp(true)
1099
980
  return
1100
981
  }
1101
982
 
@@ -1129,55 +1010,76 @@ const App = () => {
1129
1010
  }
1130
1011
  })
1131
1012
 
1013
+ const logHeight = Math.max(5, height - 3)
1014
+ const displayLines = state.showLog ? state.logLines : state.runningLines
1015
+
1132
1016
  return (
1133
- <box flexDirection='column'>
1134
- <TitleBar allDone={state.allDone} failed={state.failed} />
1135
- <PipelineSummary
1017
+ <box flexDirection='column' height={height}>
1018
+ <TitleBarTop
1136
1019
  allDone={state.allDone}
1137
1020
  backgroundOcr={state.backgroundOcr}
1138
- datasetResult={state.datasetResult}
1021
+ failed={state.failed}
1139
1022
  pipelineStartedAt={state.pipelineStartedAt}
1140
1023
  runningCommand={state.runningCommand}
1141
- stepDurations={state.stepDurations}
1142
1024
  stepsData={state.stepsData}
1143
1025
  />
1144
-
1145
- {state.stepsData ? (
1146
- <box flexDirection='column'>
1147
- {STEPS.map(step => {
1148
- const isFg = state.runningCommand === step.command
1149
- const isBgOcr = step.command === 'ocr' && state.backgroundOcr
1150
- const isActive = isFg || isBgOcr
1151
- const sd = state.stepsData ? state.stepsData[step.command] : undefined
1152
- const failures = sd?.failed ?? state.stepFailures[step.command]
1153
- return (
1154
- <StepCard
1155
- completedDuration={state.stepDurations[step.command]}
1156
- failedCount={failures}
1157
- isFailed={state.failed ? isFg : false}
1158
- isRunning={!state.failed && isActive}
1159
- key={step.command}
1160
- ocrProgress={step.command === 'ocr' ? state.stepsData?.ocr.progress : undefined}
1161
- runningStatus={isFg ? state.runningStatus : ''}
1162
- step={step}
1163
- stepData={sd}
1164
- stepStartedAt={isActive ? state.stepStartedAt : 0}
1165
- />
1166
- )
1167
- })}
1026
+ <box flexGrow={1}>
1027
+ <box flexDirection='column' paddingLeft={1} width={SIDEBAR_WIDTH}>
1028
+ {state.stepsData ? (
1029
+ <box flexDirection='column'>
1030
+ {STEPS.map(step => {
1031
+ const isFg = state.runningCommand === step.command
1032
+ const isBgOcr = step.command === 'ocr' && state.backgroundOcr
1033
+ const isActive = isFg || isBgOcr
1034
+ const sd = state.stepsData ? state.stepsData[step.command] : undefined
1035
+ const failures = sd?.failed ?? state.stepFailures[step.command]
1036
+ return (
1037
+ <SidebarStep
1038
+ completedDuration={state.stepDurations[step.command]}
1039
+ failedCount={failures}
1040
+ isFailed={state.failed ? isFg : false}
1041
+ isRunning={!state.failed && isActive}
1042
+ key={step.command}
1043
+ ocrProgress={step.command === 'ocr' ? state.stepsData?.ocr.progress : undefined}
1044
+ runningStatus={isFg ? state.runningStatus : ''}
1045
+ step={step}
1046
+ stepData={sd}
1047
+ stepStartedAt={isActive ? state.stepStartedAt : 0}
1048
+ />
1049
+ )
1050
+ })}
1051
+ </box>
1052
+ ) : (
1053
+ <text fg={DIM}>Loading...</text>
1054
+ )}
1055
+ <SidebarSummary
1056
+ allDone={state.allDone}
1057
+ datasetResult={state.datasetResult}
1058
+ stepDurations={state.stepDurations}
1059
+ />
1168
1060
  </box>
1169
- ) : (
1170
- <text fg={DIM}>Loading...</text>
1171
- )}
1172
-
1173
- <PreflightBanner errors={state.preflightErrors} warnings={state.preflightWarnings} />
1174
-
1175
- {state.runningCommand || state.runningLines.length > 0 ? (
1176
- <OutputBox lines={state.runningLines} status={state.runningStatus} />
1177
- ) : null}
1178
- {state.runningCommand && !state.failed ? <RunningFooter /> : null}
1179
-
1180
- {state.showLog ? <LogOverlay lines={state.logLines} /> : null}
1061
+ <box flexDirection='column' flexGrow={1} paddingLeft={2}>
1062
+ {state.preflightErrors.length > 0 || state.preflightWarnings.length > 0 ? (
1063
+ <PreflightBanner errors={state.preflightErrors} warnings={state.preflightWarnings} />
1064
+ ) : null}
1065
+ <scrollbox focused height={logHeight} paddingLeft={1} stickyScroll>
1066
+ {displayLines.length > 0 ? (
1067
+ displayLines.map((line, i) => {
1068
+ const isError = ERROR_PATTERN.test(line)
1069
+ return (
1070
+ // eslint-disable-next-line react/no-array-index-key
1071
+ <text fg={isError ? 'red' : DIM} key={i}>
1072
+ {line}
1073
+ </text>
1074
+ )
1075
+ })
1076
+ ) : (
1077
+ <text fg={DIM}>Waiting for output...</text>
1078
+ )}
1079
+ </scrollbox>
1080
+ </box>
1081
+ </box>
1082
+ {showHelp ? <HelpDialog height={height} width={width} /> : null}
1181
1083
  </box>
1182
1084
  )
1183
1085
  }