anymd 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/package.json +1 -1
  2. package/src/bootstrap.ts +104 -39
  3. package/tui.tsx +18 -19
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "anymd",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "description": "Convert any document (PDF, DOC, DOCX) to clean Markdown for RAG",
5
5
  "keywords": [
6
6
  "markdown",
package/src/bootstrap.ts CHANGED
@@ -11,38 +11,76 @@ interface BootstrapCallbacks {
11
11
 
12
12
  const REQUIRED_PACKAGES = ['marker', 'markitdown', 'mlx_vlm', 'pypdfium2']
13
13
  const PIP_PACKAGES = ['marker-pdf', 'markitdown', 'mlx-vlm', 'pypdfium2']
14
+ const CHANDRA_MODEL_ID = 'mlx-community/chandra-8bit'
14
15
 
15
- const checkPackageImportable = async (pythonPath: string, pkg: string): Promise<boolean> => {
16
+ const checkImportable = async (py: string, pkg: string): Promise<boolean> => {
16
17
  try {
17
- const proc = Bun.spawn([pythonPath, '-c', `import ${pkg}`], { stderr: 'pipe', stdout: 'pipe' })
18
- const code = await proc.exited
19
- return code === 0
18
+ const proc = Bun.spawn([py, '-c', `import ${pkg}`], { stderr: 'pipe', stdout: 'pipe' })
19
+ return (await proc.exited) === 0
20
20
  } catch {
21
21
  return false
22
22
  }
23
23
  }
24
24
 
25
- const allPackagesInstalled = async (pythonPath: string): Promise<boolean> => {
26
- const results = await Promise.all(REQUIRED_PACKAGES.map(async pkg => checkPackageImportable(pythonPath, pkg)))
25
+ const allPackagesInstalled = async (py: string): Promise<boolean> => {
26
+ const results = await Promise.all(REQUIRED_PACKAGES.map(async pkg => checkImportable(py, pkg)))
27
27
  for (const r of results) if (!r) return false
28
28
  return true
29
29
  }
30
30
 
31
- const runCommand = async (args: string[]): Promise<{ ok: boolean; stderr: string }> => {
32
- const proc = Bun.spawn(args, { stderr: 'pipe', stdout: 'pipe' })
33
- const code = await proc.exited
34
- const stderrText = await new Response(proc.stderr).text()
35
- return { ok: code === 0, stderr: stderrText }
31
+ const runQuiet = async (args: string[]): Promise<{ ok: boolean; stderr: string; stdout: string }> => {
32
+ try {
33
+ const proc = Bun.spawn(args, { stderr: 'pipe', stdout: 'pipe' })
34
+ const code = await proc.exited
35
+ const [stdout, stderr] = await Promise.all([new Response(proc.stdout).text(), new Response(proc.stderr).text()])
36
+ return { ok: code === 0, stderr, stdout }
37
+ } catch {
38
+ return { ok: false, stderr: 'command not found', stdout: '' }
39
+ }
40
+ }
41
+
42
+ const emitLines = (chunk: string, onLine: (l: string) => void): string => {
43
+ const lines = chunk.split('\n')
44
+ const remainder = lines.pop() ?? ''
45
+ for (const line of lines) {
46
+ const t = line.replaceAll('\r', '').trim()
47
+ if (t.length > 0) onLine(t)
48
+ }
49
+ return remainder
50
+ }
51
+
52
+ // eslint-disable-next-line max-statements
53
+ const streamLines = async (stream: ReadableStream<Uint8Array>, onLine: (l: string) => void): Promise<void> => {
54
+ const reader = stream.getReader()
55
+ const decoder = new TextDecoder()
56
+ let buf = ''
57
+ for (;;) {
58
+ /** biome-ignore lint/performance/noAwaitInLoops: sequential stream reads */
59
+ const { done, value } = await reader.read() // eslint-disable-line no-await-in-loop
60
+ if (done) break
61
+ buf = emitLines(buf + decoder.decode(value, { stream: true }), onLine)
62
+ }
63
+ emitLines(`${buf}\n`, onLine)
64
+ }
65
+
66
+ const runStreaming = async (args: string[], onLine: (l: string) => void): Promise<boolean> => {
67
+ try {
68
+ const proc = Bun.spawn(args, { stderr: 'pipe', stdout: 'pipe' })
69
+ await Promise.all([
70
+ streamLines(proc.stdout as ReadableStream<Uint8Array>, onLine),
71
+ streamLines(proc.stderr as ReadableStream<Uint8Array>, onLine)
72
+ ])
73
+ return (await proc.exited) === 0
74
+ } catch {
75
+ return false
76
+ }
36
77
  }
37
78
 
38
79
  const UV_CANDIDATES = ['uv', '/opt/homebrew/bin/uv', join(homedir(), '.local', 'bin', 'uv')]
39
80
 
40
81
  const findUv = async (): Promise<string | undefined> => {
41
- const checks = await Promise.all(
42
- UV_CANDIDATES.map(async bin => ({ bin, ok: (await runCommand([bin, '--version'])).ok }))
43
- )
44
- const found = checks.find(c => c.ok)
45
- return found?.bin
82
+ const checks = await Promise.all(UV_CANDIDATES.map(async bin => ({ bin, ok: (await runQuiet([bin, '--version'])).ok })))
83
+ return checks.find(c => c.ok)?.bin
46
84
  }
47
85
 
48
86
  interface VenvOpts {
@@ -53,39 +91,56 @@ interface VenvOpts {
53
91
  }
54
92
 
55
93
  const createVenv = async (opts: VenvOpts): Promise<boolean> => {
56
- opts.cbs.onStep('Creating Python virtual environment via uv...')
57
- const mkdirResult = await runCommand(['mkdir', '-p', opts.cacheDir])
58
- if (!mkdirResult.ok) return false
59
- const venvResult = await runCommand([opts.uv, 'venv', '--python', '3.13', opts.venvDir])
60
- if (!venvResult.ok) {
61
- opts.cbs.onStep(`Failed to create venv: ${venvResult.stderr}`)
94
+ opts.cbs.onStep('Creating Python 3.13 virtual environment...')
95
+ if (!(await runQuiet(['mkdir', '-p', opts.cacheDir])).ok) return false
96
+ const ok = await runStreaming([opts.uv, 'venv', '--python', '3.13', opts.venvDir], opts.cbs.onStep)
97
+ if (!ok) {
98
+ opts.cbs.onStep('Failed to create venv.')
62
99
  return false
63
100
  }
64
- opts.cbs.onStep('Virtual environment created (Python 3.13).')
101
+ opts.cbs.onStep('Virtual environment created.')
65
102
  return true
66
103
  }
67
104
 
68
- const installPackages = async (venvDir: string, uv: string, cbs: BootstrapCallbacks): Promise<boolean> => {
105
+ const installPkgs = async (venvDir: string, uv: string, cbs: BootstrapCallbacks): Promise<boolean> => {
69
106
  cbs.onStep(`Installing ${PIP_PACKAGES.join(', ')}...`)
70
- const result = await runCommand([uv, 'pip', 'install', '--python', `${venvDir}/bin/python`, ...PIP_PACKAGES])
71
- if (!result.ok) {
72
- cbs.onStep(`uv pip install failed: ${result.stderr}`)
107
+ const ok = await runStreaming([uv, 'pip', 'install', '--python', `${venvDir}/bin/python`, ...PIP_PACKAGES], cbs.onStep)
108
+ if (!ok) {
109
+ cbs.onStep('Package installation failed.')
73
110
  return false
74
111
  }
75
- cbs.onStep('Packages installed successfully.')
112
+ cbs.onStep('All packages installed.')
76
113
  return true
77
114
  }
78
115
 
79
- const ensureVenvExists = async (
80
- uv: string,
81
- cbs: BootstrapCallbacks
82
- ): Promise<null | { skip: boolean; venvDir: string }> => {
116
+ const downloadMarkerModels = async (py: string, cbs: BootstrapCallbacks): Promise<void> => {
117
+ cbs.onStep('Downloading marker PDF models (first run only)...')
118
+ const ok = await runStreaming([py, '-c', 'from marker.models import create_model_dict; create_model_dict()'], cbs.onStep)
119
+ cbs.onStep(ok ? 'Marker models ready.' : 'Marker model download failed (will retry on first convert).')
120
+ }
121
+
122
+ const downloadChandraModel = async (py: string, cbs: BootstrapCallbacks): Promise<void> => {
123
+ cbs.onStep(`Downloading OCR model ${CHANDRA_MODEL_ID} (first run only)...`)
124
+ const ok = await runStreaming([py, '-c', `from mlx_vlm import load; load("${CHANDRA_MODEL_ID}")`], cbs.onStep)
125
+ cbs.onStep(ok ? 'OCR model ready.' : 'OCR model download failed (will retry on first OCR).')
126
+ }
127
+
128
+ const chandraModelCached = async (py: string): Promise<boolean> => {
129
+ const r = await runQuiet([
130
+ py,
131
+ '-c',
132
+ `from huggingface_hub import scan_cache_dir; print(any(r.repo_id == "${CHANDRA_MODEL_ID}" for r in scan_cache_dir().repos))`
133
+ ])
134
+ return r.ok && r.stdout.trim() === 'True'
135
+ }
136
+
137
+ const ensureVenv = async (uv: string, cbs: BootstrapCallbacks): Promise<null | { skip: boolean; venvDir: string }> => {
83
138
  const { cacheDir, venvPython } = getPaths()
84
139
  const venvDir = `${cacheDir}/.venv`
85
140
  if (existsSync(venvPython)) {
86
141
  const installed = await allPackagesInstalled(venvPython)
87
142
  if (installed) return { skip: true, venvDir }
88
- cbs.onStep('Some packages missing, installing...')
143
+ cbs.onStep('Some packages missing, reinstalling...')
89
144
  return { skip: false, venvDir }
90
145
  }
91
146
  const created = await createVenv({ cacheDir, cbs, uv, venvDir })
@@ -98,19 +153,29 @@ const requireUv = async (cbs: BootstrapCallbacks): Promise<string | undefined> =
98
153
  return uv
99
154
  }
100
155
 
101
- const setupAndInstall = async (uv: string, cbs: BootstrapCallbacks): Promise<boolean> => {
102
- const result = await ensureVenvExists(uv, cbs)
156
+ const ensurePackages = async (uv: string, cbs: BootstrapCallbacks): Promise<boolean> => {
157
+ const result = await ensureVenv(uv, cbs)
103
158
  if (!result) return false
104
159
  if (result.skip) return true
105
- return installPackages(result.venvDir, uv, cbs)
160
+ return installPkgs(result.venvDir, uv, cbs)
161
+ }
162
+
163
+ const ensureModels = async (cbs: BootstrapCallbacks): Promise<void> => {
164
+ const py = getPaths().venvPython
165
+ const cached = await chandraModelCached(py)
166
+ if (cached) return
167
+ await downloadMarkerModels(py, cbs)
168
+ await downloadChandraModel(py, cbs)
106
169
  }
107
170
 
108
171
  const bootstrapPython = async (cbs: BootstrapCallbacks): Promise<boolean> => {
109
172
  const uv = await requireUv(cbs)
110
173
  if (!uv) return false
111
- const ok = await setupAndInstall(uv, cbs)
112
- if (ok) cbs.onDone()
113
- return ok
174
+ const ok = await ensurePackages(uv, cbs)
175
+ if (!ok) return false
176
+ await ensureModels(cbs)
177
+ cbs.onDone()
178
+ return true
114
179
  }
115
180
 
116
181
  export { bootstrapPython }
package/tui.tsx CHANGED
@@ -1003,25 +1003,22 @@ const App = () => {
1003
1003
  await clearErrorLog()
1004
1004
  }
1005
1005
 
1006
- const { existsSync } = await import('node:fs')
1007
- if (!existsSync(getPaths().venvPython)) {
1008
- dispatch({ line: 'Setting up Python environment...', type: 'APPEND_OUTPUT' })
1009
- const ok = await bootstrapPython({
1010
- onDone: () => {
1011
- dispatch({ line: 'Python environment ready.', type: 'APPEND_OUTPUT' })
1012
- },
1013
- onStep: (msg: string) => {
1014
- dispatch({ line: msg, type: 'APPEND_OUTPUT' })
1015
- }
1016
- })
1017
- if (!ok) {
1018
- dispatch({
1019
- errors: ['Python bootstrap failed. Install python3 and try again.'],
1020
- type: 'SET_PREFLIGHT',
1021
- warnings: []
1022
- })
1023
- return
1006
+ dispatch({ line: 'Checking Python environment...', type: 'APPEND_OUTPUT' })
1007
+ const ok = await bootstrapPython({
1008
+ onDone: () => {
1009
+ dispatch({ line: 'Python environment ready.', type: 'APPEND_OUTPUT' })
1010
+ },
1011
+ onStep: (msg: string) => {
1012
+ dispatch({ line: msg, type: 'APPEND_OUTPUT' })
1024
1013
  }
1014
+ })
1015
+ if (!ok) {
1016
+ dispatch({
1017
+ errors: ['Python bootstrap failed. Install uv and try again.'],
1018
+ type: 'SET_PREFLIGHT',
1019
+ warnings: []
1020
+ })
1021
+ return
1025
1022
  }
1026
1023
 
1027
1024
  const preflight = await runPreflight()
@@ -1175,7 +1172,9 @@ const App = () => {
1175
1172
 
1176
1173
  <PreflightBanner errors={state.preflightErrors} warnings={state.preflightWarnings} />
1177
1174
 
1178
- {state.runningCommand ? <OutputBox lines={state.runningLines} status={state.runningStatus} /> : null}
1175
+ {state.runningCommand || state.runningLines.length > 0 ? (
1176
+ <OutputBox lines={state.runningLines} status={state.runningStatus} />
1177
+ ) : null}
1179
1178
  {state.runningCommand && !state.failed ? <RunningFooter /> : null}
1180
1179
 
1181
1180
  {state.showLog ? <LogOverlay lines={state.logLines} /> : null}