anymd 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/bootstrap.ts +2 -2
- package/src/tui-data.ts +5 -3
- package/tui.tsx +281 -379
package/package.json
CHANGED
package/src/bootstrap.ts
CHANGED
|
@@ -9,8 +9,8 @@ interface BootstrapCallbacks {
|
|
|
9
9
|
onStep: (message: string) => void
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
const REQUIRED_PACKAGES = ['marker', 'markitdown', 'mlx_vlm', 'pypdfium2']
|
|
13
|
-
const PIP_PACKAGES = ['marker-pdf', 'markitdown', 'mlx-vlm', 'pypdfium2']
|
|
12
|
+
const REQUIRED_PACKAGES = ['marker', 'markitdown', 'mammoth', 'mlx_vlm', 'pypdfium2', 'torchvision']
|
|
13
|
+
const PIP_PACKAGES = ['marker-pdf', 'markitdown[docx]', 'mlx-vlm', 'pypdfium2', 'torchvision']
|
|
14
14
|
const CHANDRA_MODEL_ID = 'mlx-community/chandra-8bit'
|
|
15
15
|
|
|
16
16
|
const checkImportable = async (py: string, pkg: string): Promise<boolean> => {
|
package/src/tui-data.ts
CHANGED
|
@@ -6,6 +6,8 @@ import pMap from 'p-map'
|
|
|
6
6
|
import { loadConfig } from '~/config'
|
|
7
7
|
import { getPaths } from '~/paths'
|
|
8
8
|
|
|
9
|
+
const stripAnsi = (s: string): string => s.replaceAll(new RegExp(`${String.fromCodePoint(0x1b)}\\[[0-9;]*m`, 'gu'), '')
|
|
10
|
+
|
|
9
11
|
interface Classification {
|
|
10
12
|
errors: number
|
|
11
13
|
files: { mixed: string[]; native: string[]; scanned: string[] }
|
|
@@ -223,7 +225,7 @@ const runClassify = async (onProgress: (p: ClassifyProgress) => void): Promise<C
|
|
|
223
225
|
total: results.length
|
|
224
226
|
}
|
|
225
227
|
|
|
226
|
-
mkdirSync(getPaths().
|
|
228
|
+
mkdirSync(getPaths().outputDir, { recursive: true })
|
|
227
229
|
await writeFile(getPaths().classification, `${JSON.stringify(classification, null, 2)}\n`)
|
|
228
230
|
return classification
|
|
229
231
|
}
|
|
@@ -540,7 +542,7 @@ const fetchStepData = async (): Promise<AllStepsData> => {
|
|
|
540
542
|
|
|
541
543
|
const appendPipelineLog = async (line: string): Promise<void> => {
|
|
542
544
|
try {
|
|
543
|
-
await appendFile(getPaths().pipelineLog, `${line}\n`)
|
|
545
|
+
await appendFile(getPaths().pipelineLog, `${stripAnsi(line)}\n`)
|
|
544
546
|
} catch {
|
|
545
547
|
/* Empty */
|
|
546
548
|
}
|
|
@@ -557,7 +559,7 @@ const clearPipelineLog = async (): Promise<void> => {
|
|
|
557
559
|
const appendErrorLog = async (step: string, message: string): Promise<void> => {
|
|
558
560
|
try {
|
|
559
561
|
const ts = new Date().toISOString()
|
|
560
|
-
await appendFile(getPaths().errorsLog, `[${ts}] [${step}] ${message}\n`)
|
|
562
|
+
await appendFile(getPaths().errorsLog, `[${ts}] [${step}] ${stripAnsi(message)}\n`)
|
|
561
563
|
} catch {
|
|
562
564
|
/* Empty */
|
|
563
565
|
}
|
package/tui.tsx
CHANGED
|
@@ -25,7 +25,10 @@ import {
|
|
|
25
25
|
writeNativeFileList
|
|
26
26
|
} from '~/tui-data'
|
|
27
27
|
|
|
28
|
+
const stripAnsi = (s: string): string => s.replaceAll(new RegExp(`${String.fromCodePoint(0x1b)}\\[[0-9;]*m`, 'gu'), '')
|
|
29
|
+
|
|
28
30
|
const DIM = '#888888'
|
|
31
|
+
const SIDEBAR_WIDTH = 38
|
|
29
32
|
|
|
30
33
|
const setTerminalTitle = (title: string): void => {
|
|
31
34
|
process.stdout.write(`\u001B]0;${title}\u0007`)
|
|
@@ -109,7 +112,7 @@ const initialState: AppState = {
|
|
|
109
112
|
const reducer = (state: AppState, action: Action): AppState => {
|
|
110
113
|
switch (action.type) {
|
|
111
114
|
case 'APPEND_OUTPUT':
|
|
112
|
-
return { ...state, runningLines: [...state.runningLines.slice(-MAX_OUTPUT_LINES), action.line] }
|
|
115
|
+
return { ...state, runningLines: [...state.runningLines.slice(-MAX_OUTPUT_LINES), stripAnsi(action.line)] }
|
|
113
116
|
case 'CLEAR_FAILURE':
|
|
114
117
|
return { ...state, backgroundOcr: false, failed: false, runningCommand: null }
|
|
115
118
|
case 'COMMAND_DONE': {
|
|
@@ -136,7 +139,7 @@ const reducer = (state: AppState, action: Action): AppState => {
|
|
|
136
139
|
case 'SET_DATASET_RESULT':
|
|
137
140
|
return { ...state, datasetResult: action.result }
|
|
138
141
|
case 'SET_LOG':
|
|
139
|
-
return { ...state, logLines: action.lines }
|
|
142
|
+
return { ...state, logLines: action.lines.map(stripAnsi) }
|
|
140
143
|
case 'SET_PREFLIGHT':
|
|
141
144
|
return { ...state, preflightErrors: action.errors, preflightWarnings: action.warnings }
|
|
142
145
|
case 'SET_RUNNING_STATUS':
|
|
@@ -195,10 +198,10 @@ const SpinnerDots = ({ fg }: { fg?: string }) => {
|
|
|
195
198
|
return <text fg={fg ?? 'yellow'}>{SPINNER_FRAMES[frame]} </text>
|
|
196
199
|
}
|
|
197
200
|
|
|
198
|
-
const PROGRESS_FULL = '
|
|
199
|
-
const PROGRESS_EMPTY = '
|
|
201
|
+
const PROGRESS_FULL = '█'
|
|
202
|
+
const PROGRESS_EMPTY = '░'
|
|
200
203
|
|
|
201
|
-
const ProgressBarSimple = ({ value, width =
|
|
204
|
+
const ProgressBarSimple = ({ value, width = 12 }: { value: number; width?: number }) => {
|
|
202
205
|
const filled = Math.round((value / 100) * width)
|
|
203
206
|
const empty = width - filled
|
|
204
207
|
return (
|
|
@@ -227,20 +230,19 @@ const ElapsedTimer = ({ startedAt }: { startedAt: number }) => {
|
|
|
227
230
|
}
|
|
228
231
|
|
|
229
232
|
const RecentFiles = ({ files }: { files: { duration: number; name: string; pages: number; per_page: number }[] }) => {
|
|
230
|
-
const display = files.slice(-
|
|
233
|
+
const display = files.slice(-3)
|
|
231
234
|
return (
|
|
232
235
|
<box flexDirection='column'>
|
|
233
236
|
<text fg={DIM}>
|
|
234
|
-
<b
|
|
237
|
+
<b>── recent ──</b>
|
|
235
238
|
</text>
|
|
236
239
|
{display.map((f, i) => (
|
|
237
240
|
// eslint-disable-next-line react/no-array-index-key
|
|
238
241
|
<text key={i}>
|
|
239
|
-
<span fg='green'
|
|
240
|
-
<span fg='cyan'>{f.name.slice(0,
|
|
241
|
-
<span> {f.pages.toString().padStart(
|
|
242
|
-
<span fg='yellow'>{formatDuration(f.duration)
|
|
243
|
-
<span fg={DIM}> ({formatDuration(f.per_page)}/p)</span>
|
|
242
|
+
<span fg='green'>✓</span>
|
|
243
|
+
<span fg='cyan'> {f.name.slice(0, 18)}</span>
|
|
244
|
+
<span> {f.pages.toString().padStart(2)}p</span>
|
|
245
|
+
<span fg='yellow'> {formatDuration(f.duration)}</span>
|
|
244
246
|
</text>
|
|
245
247
|
))}
|
|
246
248
|
</box>
|
|
@@ -265,41 +267,41 @@ const OcrLiveInfo = ({ progress }: { progress: OcrProgress }) => {
|
|
|
265
267
|
}, [progress.current_file, progress.current_file_started])
|
|
266
268
|
|
|
267
269
|
return (
|
|
268
|
-
<box flexDirection='column' paddingLeft={
|
|
270
|
+
<box flexDirection='column' paddingLeft={2}>
|
|
269
271
|
{progress.current_file === '-' ? null : (
|
|
270
272
|
<text>
|
|
271
|
-
<span fg={DIM}>
|
|
272
|
-
<b fg='cyan'>{progress.current_file}</b>
|
|
273
|
-
{progress.current_pages_total ? <span fg={DIM}> ({progress.current_pages_total}p)</span> : null}
|
|
274
|
-
<span fg={DIM}> page </span>
|
|
275
|
-
<span>{progress.current_page}</span>
|
|
276
|
-
<span fg={DIM}>{' '}elapsed: </span>
|
|
277
|
-
<span fg='yellow'>{formatDuration(elapsed)}</span>
|
|
273
|
+
<span fg={DIM}>Now: </span>
|
|
274
|
+
<b fg='cyan'>{progress.current_file.slice(0, 20)}</b>
|
|
278
275
|
</text>
|
|
279
276
|
)}
|
|
280
|
-
|
|
281
|
-
<text>
|
|
282
|
-
<span fg={DIM}>ETA: </span>
|
|
283
|
-
<span fg='yellow'>{progress.eta}</span>
|
|
284
|
-
</text>
|
|
285
|
-
<text>
|
|
286
|
-
<span fg={DIM}>avg: </span>
|
|
287
|
-
<span>{progress.avg_per_file}/file</span>
|
|
288
|
-
</text>
|
|
277
|
+
{progress.current_file === '-' ? null : (
|
|
289
278
|
<text>
|
|
290
|
-
<span fg={DIM}>
|
|
291
|
-
<span
|
|
279
|
+
<span fg={DIM}>p</span>
|
|
280
|
+
<span>{progress.current_page}</span>
|
|
281
|
+
{progress.current_pages_total ? <span fg={DIM}>/{progress.current_pages_total}</span> : null}
|
|
282
|
+
<span fg={DIM}> </span>
|
|
283
|
+
<span fg='yellow'>{formatDuration(elapsed)}</span>
|
|
292
284
|
</text>
|
|
293
|
-
|
|
285
|
+
)}
|
|
286
|
+
<text>
|
|
287
|
+
<span fg={DIM}>ETA </span>
|
|
288
|
+
<span fg='yellow'>{progress.eta}</span>
|
|
289
|
+
<span fg={DIM}> avg </span>
|
|
290
|
+
<span>{progress.avg_per_file}</span>
|
|
291
|
+
</text>
|
|
292
|
+
<text>
|
|
293
|
+
<span fg={DIM}>err </span>
|
|
294
|
+
<span fg={progress.errors > 0 ? 'red' : 'green'}>{progress.errors}</span>
|
|
295
|
+
</text>
|
|
294
296
|
{progress.recent_files?.length ? <RecentFiles files={progress.recent_files} /> : null}
|
|
295
297
|
</box>
|
|
296
298
|
)
|
|
297
299
|
}
|
|
298
300
|
|
|
299
301
|
const getStepIcon = (isDone: boolean, isFailed: boolean): string => {
|
|
300
|
-
if (isFailed) return '
|
|
301
|
-
if (isDone) return '
|
|
302
|
-
return '
|
|
302
|
+
if (isFailed) return '✗'
|
|
303
|
+
if (isDone) return '✓'
|
|
304
|
+
return '·'
|
|
303
305
|
}
|
|
304
306
|
|
|
305
307
|
const getStepColor = (isRunning: boolean, isDone: boolean, isFailed: boolean): string => {
|
|
@@ -328,100 +330,7 @@ const StepIcon = ({
|
|
|
328
330
|
)
|
|
329
331
|
}
|
|
330
332
|
|
|
331
|
-
const
|
|
332
|
-
color,
|
|
333
|
-
completedDuration,
|
|
334
|
-
done,
|
|
335
|
-
failedCount,
|
|
336
|
-
isActive,
|
|
337
|
-
isDone,
|
|
338
|
-
isRunning,
|
|
339
|
-
pct,
|
|
340
|
-
step,
|
|
341
|
-
stepStartedAt,
|
|
342
|
-
total
|
|
343
|
-
}: {
|
|
344
|
-
color: string
|
|
345
|
-
completedDuration?: number
|
|
346
|
-
done: number
|
|
347
|
-
failedCount?: number
|
|
348
|
-
isActive: boolean
|
|
349
|
-
isDone: boolean
|
|
350
|
-
isRunning: boolean
|
|
351
|
-
pct: number
|
|
352
|
-
step: StepConfig
|
|
353
|
-
stepStartedAt: number
|
|
354
|
-
total: number
|
|
355
|
-
}) => {
|
|
356
|
-
const fgColor = isActive ? (color === 'gray' ? undefined : color) : DIM
|
|
357
|
-
return (
|
|
358
|
-
<box gap={1}>
|
|
359
|
-
<box width={3}>
|
|
360
|
-
<StepIcon color={color} isDone={isDone} isFailed={false} isRunning={isRunning} />
|
|
361
|
-
</box>
|
|
362
|
-
<text fg={color}>
|
|
363
|
-
<b>[{step.stepNum}]</b>
|
|
364
|
-
</text>
|
|
365
|
-
<text fg={fgColor}>{isActive ? <b>{step.name}</b> : step.name}</text>
|
|
366
|
-
{isRunning ? <ElapsedTimer startedAt={stepStartedAt} /> : null}
|
|
367
|
-
{isDone && !isRunning && completedDuration !== undefined ? (
|
|
368
|
-
<text fg={DIM}>{formatDuration(completedDuration)}</text>
|
|
369
|
-
) : null}
|
|
370
|
-
<box width={20}>
|
|
371
|
-
<ProgressBarSimple value={pct} />
|
|
372
|
-
</box>
|
|
373
|
-
<text>
|
|
374
|
-
<span fg='green'>{done}</span>
|
|
375
|
-
<span fg={DIM}>
|
|
376
|
-
/{total > 0 ? total : '?'} {step.unit}
|
|
377
|
-
</span>
|
|
378
|
-
{failedCount && failedCount > 0 ? <span fg='red'> ({failedCount} failed)</span> : null}
|
|
379
|
-
</text>
|
|
380
|
-
</box>
|
|
381
|
-
)
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
const StepDetails = ({
|
|
385
|
-
isRunning,
|
|
386
|
-
ocrProgress,
|
|
387
|
-
requires,
|
|
388
|
-
runningStatus,
|
|
389
|
-
showOcr,
|
|
390
|
-
texts
|
|
391
|
-
}: {
|
|
392
|
-
isRunning: boolean
|
|
393
|
-
ocrProgress?: null | OcrProgress
|
|
394
|
-
requires?: string
|
|
395
|
-
runningStatus: string
|
|
396
|
-
showOcr: boolean
|
|
397
|
-
texts: string[]
|
|
398
|
-
}) => (
|
|
399
|
-
<>
|
|
400
|
-
{texts.length > 0 ? (
|
|
401
|
-
<box paddingLeft={6}>
|
|
402
|
-
{texts.map((d, i) => (
|
|
403
|
-
// eslint-disable-next-line react/no-array-index-key
|
|
404
|
-
<text fg={DIM} key={i}>
|
|
405
|
-
{d}
|
|
406
|
-
</text>
|
|
407
|
-
))}
|
|
408
|
-
</box>
|
|
409
|
-
) : null}
|
|
410
|
-
{isRunning && runningStatus !== '' ? (
|
|
411
|
-
<box paddingLeft={6}>
|
|
412
|
-
<text fg='cyan'>{runningStatus}</text>
|
|
413
|
-
</box>
|
|
414
|
-
) : null}
|
|
415
|
-
{requires ? (
|
|
416
|
-
<box paddingLeft={6}>
|
|
417
|
-
<text fg='yellow'>{`\u26A0 Requires: ${requires}`}</text>
|
|
418
|
-
</box>
|
|
419
|
-
) : null}
|
|
420
|
-
{showOcr && ocrProgress ? <OcrLiveInfo progress={ocrProgress} /> : null}
|
|
421
|
-
</>
|
|
422
|
-
)
|
|
423
|
-
|
|
424
|
-
const StepCard = ({
|
|
333
|
+
const SidebarStep = ({
|
|
425
334
|
completedDuration,
|
|
426
335
|
failedCount,
|
|
427
336
|
isFailed,
|
|
@@ -441,6 +350,7 @@ const StepCard = ({
|
|
|
441
350
|
step: StepConfig
|
|
442
351
|
stepData: StepData | undefined
|
|
443
352
|
stepStartedAt: number
|
|
353
|
+
// eslint-disable-next-line complexity, max-statements
|
|
444
354
|
}) => {
|
|
445
355
|
const done = stepData?.done ?? 0
|
|
446
356
|
const total = stepData?.total ?? 0
|
|
@@ -452,67 +362,52 @@ const StepCard = ({
|
|
|
452
362
|
const showOcr = isRunning && step.command === 'ocr'
|
|
453
363
|
|
|
454
364
|
return (
|
|
455
|
-
<box flexDirection='column'
|
|
456
|
-
<
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
365
|
+
<box flexDirection='column'>
|
|
366
|
+
<box>
|
|
367
|
+
<box width={2}>
|
|
368
|
+
<StepIcon color={color} isDone={isDone} isFailed={false} isRunning={isRunning} />
|
|
369
|
+
</box>
|
|
370
|
+
<text fg={isActive ? color : DIM}>{step.name}</text>
|
|
371
|
+
{isRunning ? <ElapsedTimer startedAt={stepStartedAt} /> : null}
|
|
372
|
+
{isDone && !isRunning && completedDuration !== undefined ? (
|
|
373
|
+
<text fg={DIM}> {formatDuration(completedDuration)}</text>
|
|
374
|
+
) : null}
|
|
375
|
+
</box>
|
|
376
|
+
<box paddingLeft={2}>
|
|
377
|
+
<ProgressBarSimple value={pct} />
|
|
378
|
+
<text>
|
|
379
|
+
<span fg='green'> {done}</span>
|
|
380
|
+
<span fg={DIM}>/{total > 0 ? total : '?'}</span>
|
|
381
|
+
{failedCount && failedCount > 0 ? <span fg='red'> {failedCount}✗</span> : null}
|
|
382
|
+
</text>
|
|
383
|
+
</box>
|
|
384
|
+
{stepData?.details?.length ? (
|
|
385
|
+
<box flexDirection='column' paddingLeft={2}>
|
|
386
|
+
{stepData.details.map((d, i) => (
|
|
387
|
+
// eslint-disable-next-line react/no-array-index-key
|
|
388
|
+
<text fg={DIM} key={i}>
|
|
389
|
+
{d}
|
|
390
|
+
</text>
|
|
391
|
+
))}
|
|
392
|
+
</box>
|
|
393
|
+
) : null}
|
|
394
|
+
{isRunning && runningStatus !== '' ? (
|
|
395
|
+
<box paddingLeft={2}>
|
|
396
|
+
<text fg='cyan'>{runningStatus}</text>
|
|
397
|
+
</box>
|
|
398
|
+
) : null}
|
|
399
|
+
{showRequires ? (
|
|
400
|
+
<box paddingLeft={2}>
|
|
401
|
+
<text fg='yellow'>⚠ {stepData?.requires}</text>
|
|
402
|
+
</box>
|
|
403
|
+
) : null}
|
|
404
|
+
{showOcr && ocrProgress ? <OcrLiveInfo progress={ocrProgress} /> : null}
|
|
477
405
|
</box>
|
|
478
406
|
)
|
|
479
407
|
}
|
|
480
408
|
|
|
481
409
|
const ERROR_PATTERN = /\b(?:ERROR|Error:|Failed:|failed|FAILED|\u2716|exception|traceback)/iu
|
|
482
410
|
|
|
483
|
-
const OutputBox = ({ lines, status }: { lines: string[]; status: string }) => {
|
|
484
|
-
if (lines.length === 0 && status === '') return null
|
|
485
|
-
return (
|
|
486
|
-
<box
|
|
487
|
-
border
|
|
488
|
-
borderColor='gray'
|
|
489
|
-
borderStyle='rounded'
|
|
490
|
-
flexDirection='column'
|
|
491
|
-
marginTop={1}
|
|
492
|
-
paddingLeft={1}
|
|
493
|
-
paddingRight={1}>
|
|
494
|
-
<text fg={DIM}>
|
|
495
|
-
<b>Output</b>
|
|
496
|
-
</text>
|
|
497
|
-
<box flexDirection='column'>
|
|
498
|
-
{lines.length > 0 ? (
|
|
499
|
-
lines.map((line, i) => {
|
|
500
|
-
const isError = ERROR_PATTERN.test(line)
|
|
501
|
-
return (
|
|
502
|
-
// eslint-disable-next-line react/no-array-index-key
|
|
503
|
-
<text fg={isError ? 'red' : DIM} key={i}>
|
|
504
|
-
{line}
|
|
505
|
-
</text>
|
|
506
|
-
)
|
|
507
|
-
})
|
|
508
|
-
) : (
|
|
509
|
-
<text fg='cyan'>{status}</text>
|
|
510
|
-
)}
|
|
511
|
-
</box>
|
|
512
|
-
</box>
|
|
513
|
-
)
|
|
514
|
-
}
|
|
515
|
-
|
|
516
411
|
const PreflightBanner = ({ errors, warnings }: { errors: string[]; warnings: string[] }) => {
|
|
517
412
|
if (errors.length === 0 && warnings.length === 0) return null
|
|
518
413
|
return (
|
|
@@ -555,98 +450,20 @@ const PreflightBanner = ({ errors, warnings }: { errors: string[]; warnings: str
|
|
|
555
450
|
)
|
|
556
451
|
}
|
|
557
452
|
|
|
558
|
-
const LogOverlay = ({ lines }: { lines: string[] }) => {
|
|
559
|
-
const { height } = useTerminalDimensions()
|
|
560
|
-
const logHeight = Math.max(5, height - 18)
|
|
561
|
-
|
|
562
|
-
return (
|
|
563
|
-
<box
|
|
564
|
-
border
|
|
565
|
-
borderColor='magenta'
|
|
566
|
-
borderStyle='rounded'
|
|
567
|
-
flexDirection='column'
|
|
568
|
-
marginTop={1}
|
|
569
|
-
paddingLeft={1}
|
|
570
|
-
paddingRight={1}>
|
|
571
|
-
<box justifyContent='space-between'>
|
|
572
|
-
<text fg='magenta'>
|
|
573
|
-
<b>Log</b>
|
|
574
|
-
</text>
|
|
575
|
-
<text fg={DIM}>L/ESC close · ↑↓ scroll</text>
|
|
576
|
-
</box>
|
|
577
|
-
<scrollbox focused height={logHeight} marginTop={1} stickyScroll>
|
|
578
|
-
{lines.length > 0 ? (
|
|
579
|
-
lines.map((line, i) => (
|
|
580
|
-
// eslint-disable-next-line react/no-array-index-key
|
|
581
|
-
<text fg={DIM} key={i}>
|
|
582
|
-
{line}
|
|
583
|
-
</text>
|
|
584
|
-
))
|
|
585
|
-
) : (
|
|
586
|
-
<text fg={DIM}>No log entries yet</text>
|
|
587
|
-
)}
|
|
588
|
-
</scrollbox>
|
|
589
|
-
</box>
|
|
590
|
-
)
|
|
591
|
-
}
|
|
592
|
-
|
|
593
|
-
const TitleBar = ({ allDone, failed }: { allDone: boolean; failed: boolean }) => (
|
|
594
|
-
<box justifyContent='space-between' paddingLeft={1} paddingRight={1}>
|
|
595
|
-
<text fg='cyan'>
|
|
596
|
-
<b>Document Pipeline</b>
|
|
597
|
-
</text>
|
|
598
|
-
<box gap={2}>
|
|
599
|
-
{failed ? (
|
|
600
|
-
<>
|
|
601
|
-
<text>
|
|
602
|
-
<b fg='yellow'>[R]</b>
|
|
603
|
-
<span fg={DIM}> Retry</span>
|
|
604
|
-
</text>
|
|
605
|
-
<text>
|
|
606
|
-
<b fg='yellow'>[S]</b>
|
|
607
|
-
<span fg={DIM}> Skip</span>
|
|
608
|
-
</text>
|
|
609
|
-
</>
|
|
610
|
-
) : null}
|
|
611
|
-
{allDone ? (
|
|
612
|
-
<text fg='green'>
|
|
613
|
-
<b>✓ Pipeline complete</b>
|
|
614
|
-
</text>
|
|
615
|
-
) : null}
|
|
616
|
-
<text>
|
|
617
|
-
<b fg='magenta'>[L]</b>
|
|
618
|
-
<span fg={DIM}> Log</span>
|
|
619
|
-
</text>
|
|
620
|
-
<text>
|
|
621
|
-
<b fg='red'>[Q]</b>
|
|
622
|
-
<span fg={DIM}> Quit</span>
|
|
623
|
-
</text>
|
|
624
|
-
</box>
|
|
625
|
-
</box>
|
|
626
|
-
)
|
|
627
|
-
|
|
628
|
-
const formatChars = (chars: number): string => {
|
|
629
|
-
if (chars >= 1_000_000) return `${(chars / 1_000_000).toFixed(1)}M`
|
|
630
|
-
if (chars >= 1000) return `${(chars / 1000).toFixed(1)}K`
|
|
631
|
-
return String(chars)
|
|
632
|
-
}
|
|
633
|
-
|
|
634
453
|
// eslint-disable-next-line max-statements
|
|
635
|
-
const
|
|
454
|
+
const TitleBarTop = ({
|
|
636
455
|
allDone,
|
|
637
456
|
backgroundOcr,
|
|
638
|
-
|
|
457
|
+
failed,
|
|
639
458
|
pipelineStartedAt,
|
|
640
459
|
runningCommand,
|
|
641
|
-
stepDurations,
|
|
642
460
|
stepsData
|
|
643
461
|
}: {
|
|
644
462
|
allDone: boolean
|
|
645
463
|
backgroundOcr: boolean
|
|
646
|
-
|
|
464
|
+
failed: boolean
|
|
647
465
|
pipelineStartedAt: number
|
|
648
466
|
runningCommand: CommandKey | null
|
|
649
|
-
stepDurations: Partial<Record<CommandKey, number>>
|
|
650
467
|
stepsData: AllStepsData | null
|
|
651
468
|
}) => {
|
|
652
469
|
const [elapsed, setElapsed] = useState(0)
|
|
@@ -661,83 +478,148 @@ const PipelineSummary = ({
|
|
|
661
478
|
return () => clearInterval(timer)
|
|
662
479
|
}, [pipelineStartedAt])
|
|
663
480
|
|
|
664
|
-
|
|
665
|
-
|
|
481
|
+
let statusText = ''
|
|
482
|
+
if (allDone) statusText = '✓ Complete'
|
|
483
|
+
else if (failed) statusText = '✗ Failed'
|
|
484
|
+
else if (runningCommand && stepsData) {
|
|
485
|
+
let currentIdx = 0
|
|
486
|
+
let currentName = ''
|
|
487
|
+
for (const s of STEPS)
|
|
488
|
+
if (s.command === runningCommand) {
|
|
489
|
+
currentIdx = s.stepNum
|
|
490
|
+
currentName = s.name
|
|
491
|
+
break
|
|
492
|
+
}
|
|
493
|
+
if (currentIdx > 0) {
|
|
494
|
+
const parallelLabel = backgroundOcr && runningCommand === 'pipeline' ? '2+3/5 Convert+OCR' : null
|
|
495
|
+
statusText = parallelLabel ?? `${currentIdx}/5 ${currentName}`
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
const statusColor = allDone ? 'green' : failed ? 'red' : 'cyan'
|
|
666
500
|
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
<
|
|
670
|
-
<
|
|
671
|
-
|
|
672
|
-
|
|
501
|
+
return (
|
|
502
|
+
<box justifyContent='space-between' paddingLeft={1} paddingRight={1}>
|
|
503
|
+
<text fg='#e0a040'>
|
|
504
|
+
<b>anymd</b>
|
|
505
|
+
</text>
|
|
506
|
+
<box gap={2}>
|
|
507
|
+
{statusText === '' ? null : (
|
|
508
|
+
<text fg={statusColor}>
|
|
509
|
+
<b>{statusText}</b>
|
|
673
510
|
</text>
|
|
674
|
-
|
|
675
|
-
</
|
|
676
|
-
|
|
677
|
-
|
|
511
|
+
)}
|
|
512
|
+
{pipelineStartedAt > 0 ? <text fg={DIM}>{formatDuration(elapsed)}</text> : null}
|
|
513
|
+
</box>
|
|
514
|
+
</box>
|
|
515
|
+
)
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
const formatChars = (chars: number): string => {
|
|
519
|
+
if (chars >= 1_000_000) return `${(chars / 1_000_000).toFixed(1)}M`
|
|
520
|
+
if (chars >= 1000) return `${(chars / 1000).toFixed(1)}K`
|
|
521
|
+
return String(chars)
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const SidebarSummary = ({
|
|
525
|
+
allDone,
|
|
526
|
+
datasetResult,
|
|
527
|
+
stepDurations
|
|
528
|
+
}: {
|
|
529
|
+
allDone: boolean
|
|
530
|
+
datasetResult: DatasetResult | null
|
|
531
|
+
stepDurations: Partial<Record<CommandKey, number>>
|
|
532
|
+
}) => {
|
|
533
|
+
if (!allDone) return null
|
|
534
|
+
return (
|
|
535
|
+
<box flexDirection='column' paddingLeft={1} paddingTop={1}>
|
|
536
|
+
<text fg='green'>
|
|
537
|
+
<b>✓ All 5 steps complete</b>
|
|
538
|
+
</text>
|
|
539
|
+
{datasetResult ? (
|
|
540
|
+
<box flexDirection='column' paddingLeft={1}>
|
|
541
|
+
<text>
|
|
542
|
+
<span fg={DIM}>entries </span>
|
|
543
|
+
<b fg='green'>{datasetResult.entries.toLocaleString()}</b>
|
|
544
|
+
</text>
|
|
545
|
+
<text>
|
|
546
|
+
<span fg={DIM}>chars </span>
|
|
547
|
+
<b>{formatChars(datasetResult.totalChars)}</b>
|
|
548
|
+
</text>
|
|
549
|
+
{datasetResult.skipped > 0 ? (
|
|
678
550
|
<text>
|
|
679
|
-
<span fg={DIM}>
|
|
680
|
-
<
|
|
551
|
+
<span fg={DIM}>skipped </span>
|
|
552
|
+
<span fg='yellow'>{datasetResult.skipped}</span>
|
|
681
553
|
</text>
|
|
554
|
+
) : null}
|
|
555
|
+
{datasetResult.duplicates > 0 ? (
|
|
682
556
|
<text>
|
|
683
|
-
<span fg={DIM}>
|
|
684
|
-
<
|
|
557
|
+
<span fg={DIM}>deduped </span>
|
|
558
|
+
<span fg='yellow'>{datasetResult.duplicates}</span>
|
|
685
559
|
</text>
|
|
686
|
-
|
|
687
|
-
<text>
|
|
688
|
-
<span fg={DIM}>skipped: </span>
|
|
689
|
-
<span fg='yellow'>{datasetResult.skipped}</span>
|
|
690
|
-
</text>
|
|
691
|
-
) : null}
|
|
692
|
-
{datasetResult.duplicates > 0 ? (
|
|
693
|
-
<text>
|
|
694
|
-
<span fg={DIM}>deduped: </span>
|
|
695
|
-
<span fg='yellow'>{datasetResult.duplicates}</span>
|
|
696
|
-
</text>
|
|
697
|
-
) : null}
|
|
698
|
-
</box>
|
|
699
|
-
) : null}
|
|
700
|
-
<box gap={2} paddingLeft={2}>
|
|
701
|
-
{STEPS.map(step => {
|
|
702
|
-
const dur = stepDurations[step.command]
|
|
703
|
-
if (dur === undefined) return null
|
|
704
|
-
return (
|
|
705
|
-
<text key={step.command}>
|
|
706
|
-
<span fg={DIM}>{step.name}: </span>
|
|
707
|
-
<span fg='cyan'>{formatDuration(dur)}</span>
|
|
708
|
-
</text>
|
|
709
|
-
)
|
|
710
|
-
})}
|
|
560
|
+
) : null}
|
|
711
561
|
</box>
|
|
562
|
+
) : null}
|
|
563
|
+
<box flexDirection='column' paddingLeft={1}>
|
|
564
|
+
{STEPS.map(step => {
|
|
565
|
+
const dur = stepDurations[step.command]
|
|
566
|
+
if (dur === undefined) return null
|
|
567
|
+
return (
|
|
568
|
+
<text key={step.command}>
|
|
569
|
+
<span fg={DIM}>{step.name.slice(0, 14).padEnd(14)} </span>
|
|
570
|
+
<span fg='cyan'>{formatDuration(dur)}</span>
|
|
571
|
+
</text>
|
|
572
|
+
)
|
|
573
|
+
})}
|
|
712
574
|
</box>
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
let currentName = ''
|
|
717
|
-
for (const s of STEPS)
|
|
718
|
-
if (s.command === runningCommand) {
|
|
719
|
-
currentIdx = s.stepNum
|
|
720
|
-
currentName = s.name
|
|
721
|
-
break
|
|
722
|
-
}
|
|
723
|
-
|
|
724
|
-
if (currentIdx === 0) return null
|
|
575
|
+
</box>
|
|
576
|
+
)
|
|
577
|
+
}
|
|
725
578
|
|
|
726
|
-
|
|
579
|
+
const HelpDialog = ({ height, width }: { height: number; width: number }) => {
|
|
580
|
+
const boxW = 40
|
|
581
|
+
const boxH = 12
|
|
582
|
+
const left = Math.max(0, Math.floor((width - boxW) / 2))
|
|
583
|
+
const top = Math.max(0, Math.floor((height - boxH) / 2))
|
|
727
584
|
|
|
728
585
|
return (
|
|
729
|
-
<box
|
|
586
|
+
<box
|
|
587
|
+
border
|
|
588
|
+
borderColor='#e0a040'
|
|
589
|
+
borderStyle='rounded'
|
|
590
|
+
flexDirection='column'
|
|
591
|
+
height={boxH}
|
|
592
|
+
marginLeft={left}
|
|
593
|
+
marginTop={top}
|
|
594
|
+
paddingLeft={2}
|
|
595
|
+
paddingRight={2}
|
|
596
|
+
width={boxW}>
|
|
597
|
+
<text fg='#e0a040'>
|
|
598
|
+
<b>Keybinds</b>
|
|
599
|
+
</text>
|
|
600
|
+
<text> </text>
|
|
730
601
|
<text>
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
602
|
+
<b fg='cyan'>Q</b>
|
|
603
|
+
<span fg={DIM}>{' '}Quit</span>
|
|
604
|
+
</text>
|
|
605
|
+
<text>
|
|
606
|
+
<b fg='cyan'>L</b>
|
|
607
|
+
<span fg={DIM}>{' '}Toggle log / output view</span>
|
|
608
|
+
</text>
|
|
609
|
+
<text>
|
|
610
|
+
<b fg='cyan'>R</b>
|
|
611
|
+
<span fg={DIM}>{' '}Retry failed step</span>
|
|
612
|
+
</text>
|
|
613
|
+
<text>
|
|
614
|
+
<b fg='cyan'>S</b>
|
|
615
|
+
<span fg={DIM}>{' '}Skip failed step</span>
|
|
616
|
+
</text>
|
|
617
|
+
<text>
|
|
618
|
+
<b fg='cyan'>?</b>
|
|
619
|
+
<span fg={DIM}>{' '}Toggle this help</span>
|
|
739
620
|
</text>
|
|
740
|
-
<text
|
|
621
|
+
<text> </text>
|
|
622
|
+
<text fg={DIM}>Press ? or Esc to close</text>
|
|
741
623
|
</box>
|
|
742
624
|
)
|
|
743
625
|
}
|
|
@@ -757,13 +639,6 @@ const computeTerminalTitle = (s: AppState): string => {
|
|
|
757
639
|
return `Doc Pipeline \u2014 ${label}`
|
|
758
640
|
}
|
|
759
641
|
|
|
760
|
-
const RunningFooter = () => (
|
|
761
|
-
<box justifyContent='space-between' marginTop={1} paddingLeft={1} paddingRight={1}>
|
|
762
|
-
<text fg={DIM}>Ctrl+C safe — progress saved, re-run to resume</text>
|
|
763
|
-
<text fg={DIM}>↻ auto-refresh 2s</text>
|
|
764
|
-
</box>
|
|
765
|
-
)
|
|
766
|
-
|
|
767
642
|
// eslint-disable-next-line max-statements
|
|
768
643
|
const readStream = async (stream: ReadableStream<Uint8Array>, onLine: (line: string) => void): Promise<void> => {
|
|
769
644
|
const reader = stream.getReader()
|
|
@@ -792,6 +667,8 @@ const App = () => {
|
|
|
792
667
|
const ocrProcRef = useRef<null | ReturnType<typeof Bun.spawn>>(null)
|
|
793
668
|
const busyRef = useRef(false)
|
|
794
669
|
const errorLogClearedRef = useRef(false)
|
|
670
|
+
const [showHelp, setShowHelp] = useState(false)
|
|
671
|
+
const { height, width } = useTerminalDimensions()
|
|
795
672
|
|
|
796
673
|
useEffect(() => {
|
|
797
674
|
setTerminalTitle(computeTerminalTitle(state))
|
|
@@ -1093,9 +970,13 @@ const App = () => {
|
|
|
1093
970
|
|
|
1094
971
|
// eslint-disable-next-line complexity, max-statements
|
|
1095
972
|
useKeyboard(key => {
|
|
1096
|
-
if (
|
|
1097
|
-
if (key.name === '
|
|
973
|
+
if (showHelp) {
|
|
974
|
+
if (key.name === '?' || key.name === 'escape') setShowHelp(false)
|
|
975
|
+
return
|
|
976
|
+
}
|
|
1098
977
|
|
|
978
|
+
if (key.name === '?') {
|
|
979
|
+
setShowHelp(true)
|
|
1099
980
|
return
|
|
1100
981
|
}
|
|
1101
982
|
|
|
@@ -1129,55 +1010,76 @@ const App = () => {
|
|
|
1129
1010
|
}
|
|
1130
1011
|
})
|
|
1131
1012
|
|
|
1013
|
+
const logHeight = Math.max(5, height - 3)
|
|
1014
|
+
const displayLines = state.showLog ? state.logLines : state.runningLines
|
|
1015
|
+
|
|
1132
1016
|
return (
|
|
1133
|
-
<box flexDirection='column'>
|
|
1134
|
-
<
|
|
1135
|
-
<PipelineSummary
|
|
1017
|
+
<box flexDirection='column' height={height}>
|
|
1018
|
+
<TitleBarTop
|
|
1136
1019
|
allDone={state.allDone}
|
|
1137
1020
|
backgroundOcr={state.backgroundOcr}
|
|
1138
|
-
|
|
1021
|
+
failed={state.failed}
|
|
1139
1022
|
pipelineStartedAt={state.pipelineStartedAt}
|
|
1140
1023
|
runningCommand={state.runningCommand}
|
|
1141
|
-
stepDurations={state.stepDurations}
|
|
1142
1024
|
stepsData={state.stepsData}
|
|
1143
1025
|
/>
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1026
|
+
<box flexGrow={1}>
|
|
1027
|
+
<box flexDirection='column' paddingLeft={1} width={SIDEBAR_WIDTH}>
|
|
1028
|
+
{state.stepsData ? (
|
|
1029
|
+
<box flexDirection='column'>
|
|
1030
|
+
{STEPS.map(step => {
|
|
1031
|
+
const isFg = state.runningCommand === step.command
|
|
1032
|
+
const isBgOcr = step.command === 'ocr' && state.backgroundOcr
|
|
1033
|
+
const isActive = isFg || isBgOcr
|
|
1034
|
+
const sd = state.stepsData ? state.stepsData[step.command] : undefined
|
|
1035
|
+
const failures = sd?.failed ?? state.stepFailures[step.command]
|
|
1036
|
+
return (
|
|
1037
|
+
<SidebarStep
|
|
1038
|
+
completedDuration={state.stepDurations[step.command]}
|
|
1039
|
+
failedCount={failures}
|
|
1040
|
+
isFailed={state.failed ? isFg : false}
|
|
1041
|
+
isRunning={!state.failed && isActive}
|
|
1042
|
+
key={step.command}
|
|
1043
|
+
ocrProgress={step.command === 'ocr' ? state.stepsData?.ocr.progress : undefined}
|
|
1044
|
+
runningStatus={isFg ? state.runningStatus : ''}
|
|
1045
|
+
step={step}
|
|
1046
|
+
stepData={sd}
|
|
1047
|
+
stepStartedAt={isActive ? state.stepStartedAt : 0}
|
|
1048
|
+
/>
|
|
1049
|
+
)
|
|
1050
|
+
})}
|
|
1051
|
+
</box>
|
|
1052
|
+
) : (
|
|
1053
|
+
<text fg={DIM}>Loading...</text>
|
|
1054
|
+
)}
|
|
1055
|
+
<SidebarSummary
|
|
1056
|
+
allDone={state.allDone}
|
|
1057
|
+
datasetResult={state.datasetResult}
|
|
1058
|
+
stepDurations={state.stepDurations}
|
|
1059
|
+
/>
|
|
1168
1060
|
</box>
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1061
|
+
<box flexDirection='column' flexGrow={1} paddingLeft={2}>
|
|
1062
|
+
{state.preflightErrors.length > 0 || state.preflightWarnings.length > 0 ? (
|
|
1063
|
+
<PreflightBanner errors={state.preflightErrors} warnings={state.preflightWarnings} />
|
|
1064
|
+
) : null}
|
|
1065
|
+
<scrollbox focused height={logHeight} paddingLeft={1} stickyScroll>
|
|
1066
|
+
{displayLines.length > 0 ? (
|
|
1067
|
+
displayLines.map((line, i) => {
|
|
1068
|
+
const isError = ERROR_PATTERN.test(line)
|
|
1069
|
+
return (
|
|
1070
|
+
// eslint-disable-next-line react/no-array-index-key
|
|
1071
|
+
<text fg={isError ? 'red' : DIM} key={i}>
|
|
1072
|
+
{line}
|
|
1073
|
+
</text>
|
|
1074
|
+
)
|
|
1075
|
+
})
|
|
1076
|
+
) : (
|
|
1077
|
+
<text fg={DIM}>Waiting for output...</text>
|
|
1078
|
+
)}
|
|
1079
|
+
</scrollbox>
|
|
1080
|
+
</box>
|
|
1081
|
+
</box>
|
|
1082
|
+
{showHelp ? <HelpDialog height={height} width={width} /> : null}
|
|
1181
1083
|
</box>
|
|
1182
1084
|
)
|
|
1183
1085
|
}
|