@alta-foundation/plaud-extractor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/.env.example +9 -0
  2. package/.github/workflows/ci.yml +33 -0
  3. package/.github/workflows/publish.yml +46 -0
  4. package/CLAUDE.md +53 -0
  5. package/README.md +318 -0
  6. package/dist/PlaudExtractor.d.ts +61 -0
  7. package/dist/PlaudExtractor.d.ts.map +1 -0
  8. package/dist/PlaudExtractor.js +236 -0
  9. package/dist/PlaudExtractor.js.map +1 -0
  10. package/dist/auth/browser-auth.d.ts +10 -0
  11. package/dist/auth/browser-auth.d.ts.map +1 -0
  12. package/dist/auth/browser-auth.js +220 -0
  13. package/dist/auth/browser-auth.js.map +1 -0
  14. package/dist/auth/token-store.d.ts +9 -0
  15. package/dist/auth/token-store.d.ts.map +1 -0
  16. package/dist/auth/token-store.js +74 -0
  17. package/dist/auth/token-store.js.map +1 -0
  18. package/dist/auth/types.d.ts +266 -0
  19. package/dist/auth/types.d.ts.map +1 -0
  20. package/dist/auth/types.js +32 -0
  21. package/dist/auth/types.js.map +1 -0
  22. package/dist/cli/bin.d.ts +3 -0
  23. package/dist/cli/bin.d.ts.map +1 -0
  24. package/dist/cli/bin.js +30 -0
  25. package/dist/cli/bin.js.map +1 -0
  26. package/dist/cli/commands/auth.d.ts +3 -0
  27. package/dist/cli/commands/auth.d.ts.map +1 -0
  28. package/dist/cli/commands/auth.js +22 -0
  29. package/dist/cli/commands/auth.js.map +1 -0
  30. package/dist/cli/commands/backfill.d.ts +3 -0
  31. package/dist/cli/commands/backfill.d.ts.map +1 -0
  32. package/dist/cli/commands/backfill.js +59 -0
  33. package/dist/cli/commands/backfill.js.map +1 -0
  34. package/dist/cli/commands/sync.d.ts +3 -0
  35. package/dist/cli/commands/sync.d.ts.map +1 -0
  36. package/dist/cli/commands/sync.js +55 -0
  37. package/dist/cli/commands/sync.js.map +1 -0
  38. package/dist/cli/commands/verify.d.ts +3 -0
  39. package/dist/cli/commands/verify.d.ts.map +1 -0
  40. package/dist/cli/commands/verify.js +28 -0
  41. package/dist/cli/commands/verify.js.map +1 -0
  42. package/dist/cli/exit-codes.d.ts +8 -0
  43. package/dist/cli/exit-codes.d.ts.map +1 -0
  44. package/dist/cli/exit-codes.js +16 -0
  45. package/dist/cli/exit-codes.js.map +1 -0
  46. package/dist/cli/options.d.ts +31 -0
  47. package/dist/cli/options.d.ts.map +1 -0
  48. package/dist/cli/options.js +11 -0
  49. package/dist/cli/options.js.map +1 -0
  50. package/dist/client/endpoints.d.ts +26 -0
  51. package/dist/client/endpoints.d.ts.map +1 -0
  52. package/dist/client/endpoints.js +54 -0
  53. package/dist/client/endpoints.js.map +1 -0
  54. package/dist/client/http.d.ts +17 -0
  55. package/dist/client/http.d.ts.map +1 -0
  56. package/dist/client/http.js +92 -0
  57. package/dist/client/http.js.map +1 -0
  58. package/dist/client/plaud-client.d.ts +14 -0
  59. package/dist/client/plaud-client.d.ts.map +1 -0
  60. package/dist/client/plaud-client.js +216 -0
  61. package/dist/client/plaud-client.js.map +1 -0
  62. package/dist/client/types.d.ts +154 -0
  63. package/dist/client/types.d.ts.map +1 -0
  64. package/dist/client/types.js +41 -0
  65. package/dist/client/types.js.map +1 -0
  66. package/dist/errors.d.ts +24 -0
  67. package/dist/errors.d.ts.map +1 -0
  68. package/dist/errors.js +51 -0
  69. package/dist/errors.js.map +1 -0
  70. package/dist/index.d.ts +7 -0
  71. package/dist/index.d.ts.map +1 -0
  72. package/dist/index.js +5 -0
  73. package/dist/index.js.map +1 -0
  74. package/dist/logger.d.ts +9 -0
  75. package/dist/logger.d.ts.map +1 -0
  76. package/dist/logger.js +37 -0
  77. package/dist/logger.js.map +1 -0
  78. package/dist/mcp/job-tools.d.ts +3 -0
  79. package/dist/mcp/job-tools.d.ts.map +1 -0
  80. package/dist/mcp/job-tools.js +108 -0
  81. package/dist/mcp/job-tools.js.map +1 -0
  82. package/dist/mcp/read-tools.d.ts +3 -0
  83. package/dist/mcp/read-tools.d.ts.map +1 -0
  84. package/dist/mcp/read-tools.js +173 -0
  85. package/dist/mcp/read-tools.js.map +1 -0
  86. package/dist/mcp/server.d.ts +3 -0
  87. package/dist/mcp/server.d.ts.map +1 -0
  88. package/dist/mcp/server.js +32 -0
  89. package/dist/mcp/server.js.map +1 -0
  90. package/dist/storage/atomic.d.ts +5 -0
  91. package/dist/storage/atomic.d.ts.map +1 -0
  92. package/dist/storage/atomic.js +51 -0
  93. package/dist/storage/atomic.js.map +1 -0
  94. package/dist/storage/checksums.d.ts +15 -0
  95. package/dist/storage/checksums.d.ts.map +1 -0
  96. package/dist/storage/checksums.js +56 -0
  97. package/dist/storage/checksums.js.map +1 -0
  98. package/dist/storage/dataset-writer.d.ts +21 -0
  99. package/dist/storage/dataset-writer.d.ts.map +1 -0
  100. package/dist/storage/dataset-writer.js +52 -0
  101. package/dist/storage/dataset-writer.js.map +1 -0
  102. package/dist/storage/paths.d.ts +9 -0
  103. package/dist/storage/paths.d.ts.map +1 -0
  104. package/dist/storage/paths.js +38 -0
  105. package/dist/storage/paths.js.map +1 -0
  106. package/dist/storage/recording-store.d.ts +24 -0
  107. package/dist/storage/recording-store.d.ts.map +1 -0
  108. package/dist/storage/recording-store.js +161 -0
  109. package/dist/storage/recording-store.js.map +1 -0
  110. package/dist/sync/download-queue.d.ts +21 -0
  111. package/dist/sync/download-queue.d.ts.map +1 -0
  112. package/dist/sync/download-queue.js +82 -0
  113. package/dist/sync/download-queue.js.map +1 -0
  114. package/dist/sync/incremental.d.ts +21 -0
  115. package/dist/sync/incremental.d.ts.map +1 -0
  116. package/dist/sync/incremental.js +96 -0
  117. package/dist/sync/incremental.js.map +1 -0
  118. package/dist/sync/sync-engine.d.ts +6 -0
  119. package/dist/sync/sync-engine.d.ts.map +1 -0
  120. package/dist/sync/sync-engine.js +135 -0
  121. package/dist/sync/sync-engine.js.map +1 -0
  122. package/dist/sync/types.d.ts +130 -0
  123. package/dist/sync/types.d.ts.map +1 -0
  124. package/dist/sync/types.js +17 -0
  125. package/dist/sync/types.js.map +1 -0
  126. package/dist/transcript/formatter.d.ts +4 -0
  127. package/dist/transcript/formatter.d.ts.map +1 -0
  128. package/dist/transcript/formatter.js +88 -0
  129. package/dist/transcript/formatter.js.map +1 -0
  130. package/package.json +41 -0
  131. package/src/PlaudExtractor.ts +275 -0
  132. package/src/auth/browser-auth.ts +248 -0
  133. package/src/auth/token-store.ts +79 -0
  134. package/src/auth/types.ts +41 -0
  135. package/src/cli/bin.ts +30 -0
  136. package/src/cli/commands/auth.ts +27 -0
  137. package/src/cli/commands/backfill.ts +77 -0
  138. package/src/cli/commands/sync.ts +71 -0
  139. package/src/cli/commands/verify.ts +31 -0
  140. package/src/cli/exit-codes.ts +14 -0
  141. package/src/cli/options.ts +10 -0
  142. package/src/client/endpoints.ts +62 -0
  143. package/src/client/http.ts +110 -0
  144. package/src/client/plaud-client.ts +268 -0
  145. package/src/client/types.ts +62 -0
  146. package/src/errors.ts +57 -0
  147. package/src/index.ts +17 -0
  148. package/src/logger.ts +49 -0
  149. package/src/mcp/job-tools.ts +156 -0
  150. package/src/mcp/read-tools.ts +204 -0
  151. package/src/mcp/server.ts +39 -0
  152. package/src/storage/atomic.ts +51 -0
  153. package/src/storage/checksums.ts +76 -0
  154. package/src/storage/dataset-writer.ts +74 -0
  155. package/src/storage/paths.ts +44 -0
  156. package/src/storage/recording-store.ts +182 -0
  157. package/src/sync/download-queue.ts +102 -0
  158. package/src/sync/incremental.ts +111 -0
  159. package/src/sync/sync-engine.ts +183 -0
  160. package/src/sync/types.ts +64 -0
  161. package/src/transcript/formatter.ts +91 -0
  162. package/tsconfig.build.json +8 -0
  163. package/tsconfig.json +19 -0
@@ -0,0 +1,76 @@
1
+ import crypto from 'node:crypto'
2
+ import fs from 'node:fs/promises'
3
+ import path from 'node:path'
4
+ import { writeFileAtomic } from './atomic.js'
5
+ import { ChecksumMismatchError } from '../errors.js'
6
+
7
+ export interface FileChecksum {
8
+ sha256: string
9
+ sizeBytes: number
10
+ }
11
+
12
+ export interface ChecksumManifest {
13
+ schemaVersion: 1
14
+ recordingId: string
15
+ computedAt: string
16
+ files: Record<string, FileChecksum>
17
+ }
18
+
19
+ export async function sha256File(filePath: string): Promise<string> {
20
+ const data = await fs.readFile(filePath)
21
+ return crypto.createHash('sha256').update(data).digest('hex')
22
+ }
23
+
24
+ export async function writeChecksumManifest(
25
+ dir: string,
26
+ recordingId: string,
27
+ ): Promise<ChecksumManifest> {
28
+ const files = await fs.readdir(dir)
29
+ const manifest: ChecksumManifest = {
30
+ schemaVersion: 1,
31
+ recordingId,
32
+ computedAt: new Date().toISOString(),
33
+ files: {},
34
+ }
35
+
36
+ for (const file of files.filter(f => f !== 'checksums.json')) {
37
+ const fPath = path.join(dir, file)
38
+ const stat = await fs.stat(fPath)
39
+ if (!stat.isFile()) continue
40
+ manifest.files[file] = {
41
+ sha256: await sha256File(fPath),
42
+ sizeBytes: stat.size,
43
+ }
44
+ }
45
+
46
+ await writeFileAtomic(path.join(dir, 'checksums.json'), JSON.stringify(manifest, null, 2))
47
+ return manifest
48
+ }
49
+
50
+ export async function verifyChecksums(dir: string): Promise<ChecksumMismatchError[]> {
51
+ const manifestPath = path.join(dir, 'checksums.json')
52
+ let manifest: ChecksumManifest
53
+
54
+ try {
55
+ const raw = await fs.readFile(manifestPath, 'utf8')
56
+ manifest = JSON.parse(raw) as ChecksumManifest
57
+ } catch {
58
+ return [] // No manifest yet — skip verification
59
+ }
60
+
61
+ const mismatches: ChecksumMismatchError[] = []
62
+
63
+ for (const [file, expected] of Object.entries(manifest.files)) {
64
+ const fPath = path.join(dir, file)
65
+ try {
66
+ const actual = await sha256File(fPath)
67
+ if (actual !== expected.sha256) {
68
+ mismatches.push(new ChecksumMismatchError(fPath, expected.sha256, actual))
69
+ }
70
+ } catch {
71
+ mismatches.push(new ChecksumMismatchError(fPath, expected.sha256, 'MISSING'))
72
+ }
73
+ }
74
+
75
+ return mismatches
76
+ }
@@ -0,0 +1,74 @@
1
+ import fs from 'node:fs/promises'
2
+ import { createWriteStream, type WriteStream } from 'node:fs'
3
+ import path from 'node:path'
4
+ import { datasetPath, recordingDir } from './paths.js'
5
+ import type { PlaudRecording, PlaudTranscript } from '../client/types.js'
6
+ import { StorageError } from '../errors.js'
7
+
8
+ export interface DatasetEntry {
9
+ id: string
10
+ title: string | null
11
+ recorded_at: string
12
+ duration_seconds: number
13
+ language: string | null
14
+ text: string
15
+ path: string
16
+ segment_count: number
17
+ }
18
+
19
+ export class DatasetWriter {
20
+ private readonly filePath: string
21
+ private stream: WriteStream | null = null
22
+
23
+ constructor(outDir: string) {
24
+ this.filePath = datasetPath(outDir)
25
+ }
26
+
27
+ async open(): Promise<void> {
28
+ await fs.mkdir(path.dirname(this.filePath), { recursive: true })
29
+ this.stream = createWriteStream(this.filePath, { flags: 'a', encoding: 'utf8' })
30
+ await new Promise<void>((resolve, reject) => {
31
+ this.stream!.once('open', () => resolve())
32
+ this.stream!.once('error', reject)
33
+ })
34
+ }
35
+
36
+ async append(outDir: string, recording: PlaudRecording, transcript: PlaudTranscript): Promise<void> {
37
+ if (!this.stream) throw new StorageError('DatasetWriter not opened', this.filePath)
38
+
39
+ const relPath = path.relative(
40
+ outDir,
41
+ path.join(recordingDir(outDir, recording.recordedAt, recording.id), 'transcript.txt'),
42
+ )
43
+
44
+ const entry: DatasetEntry = {
45
+ id: `plaud:${recording.id}`,
46
+ title: recording.title ?? null,
47
+ recorded_at: recording.recordedAt,
48
+ duration_seconds: recording.duration,
49
+ language: recording.language ?? null,
50
+ text: transcript.fullText,
51
+ path: relPath,
52
+ segment_count: transcript.segments.length,
53
+ }
54
+
55
+ const line = JSON.stringify(entry) + '\n'
56
+
57
+ await new Promise<void>((resolve, reject) => {
58
+ this.stream!.write(line, err => (err ? reject(err) : resolve()))
59
+ })
60
+ }
61
+
62
+ async close(): Promise<void> {
63
+ if (!this.stream) return
64
+ await new Promise<void>((resolve, reject) => {
65
+ this.stream!.end(() => resolve())
66
+ this.stream!.once('error', reject)
67
+ })
68
+ this.stream = null
69
+ }
70
+
71
+ get path(): string {
72
+ return this.filePath
73
+ }
74
+ }
@@ -0,0 +1,44 @@
1
+ import path from 'node:path'
2
+ import os from 'node:os'
3
+
4
+ export function defaultOutDir(): string {
5
+ const env = process.env['ALTA_DATA_DIR']
6
+ if (env) return path.resolve(env)
7
+ return path.join(os.homedir(), 'alta', 'data', 'plaud')
8
+ }
9
+
10
+ /** plaud/recordings/2026/02/2026-02-24T083012Z__plaud_<id>/ */
11
+ export function recordingDir(outDir: string, recordedAt: string, recordingId: string): string {
12
+ const dt = new Date(recordedAt)
13
+ const year = String(dt.getUTCFullYear())
14
+ const month = String(dt.getUTCMonth() + 1).padStart(2, '0')
15
+ const timestamp = formatISOCompact(dt)
16
+ const dirName = `${timestamp}__plaud_${recordingId}`
17
+ return path.join(outDir, 'recordings', year, month, dirName)
18
+ }
19
+
20
+ /** Format: 2026-02-24T083012Z */
21
+ function formatISOCompact(dt: Date): string {
22
+ const iso = dt.toISOString() // 2026-02-24T08:30:12.000Z
23
+ return iso.replace(/[-:]/g, '').replace(/\.\d{3}/, '').replace('T', 'T').slice(0, 16) + 'Z'
24
+ }
25
+
26
+ export function stateDir(outDir: string): string {
27
+ return path.join(outDir, '_state')
28
+ }
29
+
30
+ export function syncStatePath(outDir: string): string {
31
+ return path.join(stateDir(outDir), 'sync_state.json')
32
+ }
33
+
34
+ export function runLogsPath(outDir: string): string {
35
+ return path.join(stateDir(outDir), 'run_logs.ndjson')
36
+ }
37
+
38
+ export function datasetPath(outDir: string): string {
39
+ return path.join(outDir, 'datasets', 'plaud_transcripts.jsonl')
40
+ }
41
+
42
+ export function authTokenPath(): string {
43
+ return path.join(os.homedir(), '.alta', 'plaud-auth.json')
44
+ }
@@ -0,0 +1,182 @@
1
+ import fs from 'node:fs/promises'
2
+ import path from 'node:path'
3
+ import { writeFileAtomic, writeStreamAtomic } from './atomic.js'
4
+ import { writeChecksumManifest, verifyChecksums } from './checksums.js'
5
+ import { toPlainText, toMarkdown } from '../transcript/formatter.js'
6
+ import { recordingDir } from './paths.js'
7
+ import type { PlaudRecording, PlaudTranscript } from '../client/types.js'
8
+ import type { ChecksumMismatchError } from '../errors.js'
9
+ import type { HttpClient } from '../client/http.js'
10
+ import { getLogger } from '../logger.js'
11
+
12
+ export type TranscriptFormat = 'json' | 'txt' | 'md'
13
+
14
+ export interface RecordingWriteResult {
15
+ dir: string
16
+ hasAudio: boolean
17
+ hasTranscript: boolean
18
+ }
19
+
20
+ export class RecordingStore {
21
+ constructor(private readonly outDir: string) {}
22
+
23
+ recordingDir(recording: PlaudRecording): string {
24
+ return recordingDir(this.outDir, recording.recordedAt, recording.id)
25
+ }
26
+
27
+ async writeMetadata(recording: PlaudRecording): Promise<string> {
28
+ const dir = this.recordingDir(recording)
29
+ const meta = buildMetaJson(recording)
30
+ await writeFileAtomic(path.join(dir, 'meta.json'), JSON.stringify(meta, null, 2))
31
+ getLogger().debug({ recordingId: recording.id, dir }, 'Wrote meta.json')
32
+ return dir
33
+ }
34
+
35
+ async writeTranscript(
36
+ recording: PlaudRecording,
37
+ transcript: PlaudTranscript,
38
+ formats: TranscriptFormat[] = ['json', 'txt', 'md'],
39
+ ): Promise<void> {
40
+ const dir = this.recordingDir(recording)
41
+
42
+ if (formats.includes('json')) {
43
+ const json = {
44
+ recordingId: transcript.recordingId,
45
+ language: transcript.language,
46
+ duration: transcript.duration,
47
+ segments: transcript.segments,
48
+ }
49
+ await writeFileAtomic(path.join(dir, 'transcript.json'), JSON.stringify(json, null, 2))
50
+ }
51
+
52
+ if (formats.includes('txt')) {
53
+ const txt = toPlainText(transcript)
54
+ await writeFileAtomic(path.join(dir, 'transcript.txt'), txt)
55
+ }
56
+
57
+ if (formats.includes('md')) {
58
+ const md = toMarkdown(transcript, recording)
59
+ await writeFileAtomic(path.join(dir, 'transcript.md'), md)
60
+ }
61
+
62
+ getLogger().debug({ recordingId: recording.id, formats }, 'Wrote transcript files')
63
+ }
64
+
65
+ async writeAudio(recording: PlaudRecording, http: HttpClient): Promise<boolean> {
66
+ const url = await this.getAudioUrl(recording, http)
67
+ if (!url) return false
68
+
69
+ const dir = this.recordingDir(recording)
70
+ const ext = guessAudioExtension(recording.mimeType)
71
+ const destPath = path.join(dir, `audio.${ext}`)
72
+
73
+ try {
74
+ const stream = await http.getStream(url)
75
+ await writeStreamAtomic(destPath, stream)
76
+ getLogger().debug({ recordingId: recording.id, path: destPath }, 'Wrote audio file')
77
+ return true
78
+ } catch (err) {
79
+ getLogger().warn({ recordingId: recording.id, err }, 'Failed to download audio')
80
+ return false
81
+ }
82
+ }
83
+
84
+ private async getAudioUrl(recording: PlaudRecording, http: HttpClient): Promise<string | null> {
85
+ // This is a stub — the PlaudApiClient resolves the actual URL
86
+ // RecordingStore receives the already-resolved URL via writeAudioFromUrl
87
+ return null
88
+ }
89
+
90
+ async writeAudioFromUrl(recording: PlaudRecording, url: string, http: HttpClient): Promise<boolean> {
91
+ const dir = this.recordingDir(recording)
92
+ const ext = guessAudioExtension(recording.mimeType)
93
+ const destPath = path.join(dir, `audio.${ext}`)
94
+
95
+ try {
96
+ // S3 presigned URLs must be fetched without Plaud auth headers
97
+ const stream = await http.downloadExternalUrl(url)
98
+ await writeStreamAtomic(destPath, stream)
99
+ getLogger().debug({ recordingId: recording.id, path: destPath }, 'Wrote audio file')
100
+ return true
101
+ } catch (err) {
102
+ getLogger().warn({ recordingId: recording.id, err }, 'Failed to download audio')
103
+ return false
104
+ }
105
+ }
106
+
107
+ async writeChecksums(recording: PlaudRecording): Promise<void> {
108
+ const dir = this.recordingDir(recording)
109
+ await writeChecksumManifest(dir, recording.id)
110
+ getLogger().debug({ recordingId: recording.id }, 'Wrote checksums.json')
111
+ }
112
+
113
+ async verify(recording: PlaudRecording): Promise<ChecksumMismatchError[]> {
114
+ const dir = this.recordingDir(recording)
115
+ return verifyChecksums(dir)
116
+ }
117
+
118
+ async exists(recording: PlaudRecording): Promise<boolean> {
119
+ const dir = this.recordingDir(recording)
120
+ try {
121
+ await fs.access(dir)
122
+ return true
123
+ } catch {
124
+ return false
125
+ }
126
+ }
127
+
128
+ async hasMissingFiles(recording: PlaudRecording): Promise<boolean> {
129
+ const dir = this.recordingDir(recording)
130
+ const required = ['meta.json']
131
+ for (const file of required) {
132
+ try {
133
+ await fs.access(path.join(dir, file))
134
+ } catch {
135
+ return true
136
+ }
137
+ }
138
+ return false
139
+ }
140
+ }
141
+
142
+ function buildMetaJson(recording: PlaudRecording): object {
143
+ return {
144
+ source: 'plaud',
145
+ source_recording_id: recording.id,
146
+ recorded_at: recording.recordedAt,
147
+ imported_at: new Date().toISOString(),
148
+ title: recording.title,
149
+ duration_seconds: recording.duration,
150
+ language: recording.language,
151
+ audio: recording.fileSize
152
+ ? {
153
+ filename: `audio.${guessAudioExtension(recording.mimeType)}`,
154
+ mime: recording.mimeType,
155
+ bytes: recording.fileSize,
156
+ }
157
+ : null,
158
+ transcript: {
159
+ has_timestamps: true, // will be updated after writing
160
+ format: 'segments',
161
+ filename_json: 'transcript.json',
162
+ filename_txt: 'transcript.txt',
163
+ filename_md: 'transcript.md',
164
+ },
165
+ integrity: {
166
+ dedupe_key: `plaud:${recording.id}`,
167
+ },
168
+ tags: recording.tags,
169
+ folder_id: recording.folderId,
170
+ device_id: recording.deviceId,
171
+ summary: recording.summary,
172
+ }
173
+ }
174
+
175
+ function guessAudioExtension(mimeType: string): string {
176
+ if (mimeType.includes('m4a') || mimeType.includes('mp4')) return 'm4a'
177
+ if (mimeType.includes('wav')) return 'wav'
178
+ if (mimeType.includes('mp3')) return 'mp3'
179
+ if (mimeType.includes('ogg')) return 'ogg'
180
+ if (mimeType.includes('webm')) return 'webm'
181
+ return 'm4a' // default
182
+ }
@@ -0,0 +1,102 @@
1
+ import { ApiError } from '../errors.js'
2
+ import { getLogger } from '../logger.js'
3
+
4
+ export interface QueueResult<T> {
5
+ succeeded: T[]
6
+ failed: Array<{ item: T; error: Error }>
7
+ }
8
+
9
+ /**
10
+ * Process items with bounded concurrency.
11
+ * Uses a simple semaphore — no extra dependencies.
12
+ */
13
+ export async function processQueue<T>(
14
+ items: T[],
15
+ processor: (item: T) => Promise<void>,
16
+ concurrency: number,
17
+ ): Promise<QueueResult<T>> {
18
+ const succeeded: T[] = []
19
+ const failed: Array<{ item: T; error: Error }> = []
20
+
21
+ let index = 0
22
+ let active = 0
23
+ let resolve: (() => void) | null = null
24
+
25
+ const tick = async (): Promise<void> => {
26
+ while (active < concurrency && index < items.length) {
27
+ const item = items[index++]!
28
+ active++
29
+
30
+ ;(async () => {
31
+ try {
32
+ await processor(item)
33
+ succeeded.push(item)
34
+ } catch (err) {
35
+ failed.push({ item, error: err instanceof Error ? err : new Error(String(err)) })
36
+ } finally {
37
+ active--
38
+ if (resolve) {
39
+ const r = resolve
40
+ resolve = null
41
+ r()
42
+ }
43
+ }
44
+ })().catch(() => undefined)
45
+ }
46
+ }
47
+
48
+ await tick()
49
+
50
+ while (active > 0 || index < items.length) {
51
+ await new Promise<void>(r => { resolve = r })
52
+ await tick()
53
+ }
54
+
55
+ return { succeeded, failed }
56
+ }
57
+
58
+ /**
59
+ * Retry a function with exponential backoff.
60
+ * Only retries if the error is retryable (isRetryable === true for ApiError).
61
+ */
62
+ export async function retryWithBackoff<T>(
63
+ fn: () => Promise<T>,
64
+ opts: { maxAttempts?: number; label?: string } = {},
65
+ ): Promise<T> {
66
+ const maxAttempts = opts.maxAttempts ?? 4
67
+ const delays = [0, 1000, 4000, 16000]
68
+ const log = getLogger()
69
+
70
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
71
+ try {
72
+ return await fn()
73
+ } catch (err) {
74
+ const isLast = attempt === maxAttempts
75
+ const retryable = isRetryableError(err)
76
+
77
+ if (isLast || !retryable) {
78
+ if (attempt > 1) {
79
+ log.warn({ attempt, label: opts.label, err }, 'Giving up after retries')
80
+ }
81
+ throw err
82
+ }
83
+
84
+ const delay = delays[attempt] ?? 16000
85
+ log.debug({ attempt, delay, label: opts.label }, 'Retrying after delay')
86
+ await sleep(delay)
87
+ }
88
+ }
89
+
90
+ // Unreachable — TypeScript needs this
91
+ throw new Error('Retry loop exhausted')
92
+ }
93
+
94
+ function isRetryableError(err: unknown): boolean {
95
+ if (err instanceof ApiError) return err.isRetryable
96
+ if (err instanceof Error && err.message.includes('429')) return true
97
+ return false
98
+ }
99
+
100
+ function sleep(ms: number): Promise<void> {
101
+ return new Promise(resolve => setTimeout(resolve, ms))
102
+ }
@@ -0,0 +1,111 @@
1
+ import fs from 'node:fs/promises'
2
+ import crypto from 'node:crypto'
3
+ import { syncStatePath } from '../storage/paths.js'
4
+ import { writeFileAtomic } from '../storage/atomic.js'
5
+ import { SyncStateSchema, type SyncState, type RecordingState } from './types.js'
6
+ import type { PlaudRecording } from '../client/types.js'
7
+ import { getLogger } from '../logger.js'
8
+
9
+ export class IncrementalTracker {
10
+ private state: SyncState = {
11
+ schemaVersion: 1,
12
+ recordings: {},
13
+ }
14
+
15
+ async load(outDir: string): Promise<void> {
16
+ const filePath = syncStatePath(outDir)
17
+ try {
18
+ const raw = await fs.readFile(filePath, 'utf8')
19
+ const json = JSON.parse(raw)
20
+ const result = SyncStateSchema.safeParse(json)
21
+ if (result.success) {
22
+ this.state = result.data
23
+ getLogger().debug(
24
+ { recordingCount: Object.keys(this.state.recordings).length },
25
+ 'Loaded sync state',
26
+ )
27
+ } else {
28
+ getLogger().warn({ issues: result.error.issues }, 'Sync state schema invalid — starting fresh')
29
+ }
30
+ } catch (err: unknown) {
31
+ if ((err as NodeJS.ErrnoException).code !== 'ENOENT') {
32
+ getLogger().warn({ err }, 'Failed to read sync state — starting fresh')
33
+ }
34
+ }
35
+ }
36
+
37
+ async persist(outDir: string): Promise<void> {
38
+ this.state.lastAttemptAt = new Date().toISOString()
39
+ await writeFileAtomic(syncStatePath(outDir), JSON.stringify(this.state, null, 2))
40
+ }
41
+
42
+ markSuccessfulSync(): void {
43
+ this.state.lastSuccessfulSyncAt = new Date().toISOString()
44
+ }
45
+
46
+ getSince(): Date | undefined {
47
+ if (!this.state.lastSuccessfulSyncAt) return undefined
48
+ return new Date(this.state.lastSuccessfulSyncAt)
49
+ }
50
+
51
+ needsDownload(recording: PlaudRecording): boolean {
52
+ const existing = this.state.recordings[recording.id]
53
+ if (!existing) return true
54
+
55
+ const newHash = this.computeContentHash(recording)
56
+ if (existing.contentHash !== newHash) return true
57
+
58
+ // Re-download if key files are missing
59
+ if (!existing.hasTranscript && recording.hasTranscript) return true
60
+ if (!existing.downloadedAt) return true
61
+
62
+ return false
63
+ }
64
+
65
+ computeContentHash(recording: PlaudRecording): string {
66
+ const key = JSON.stringify({
67
+ id: recording.id,
68
+ updatedAt: recording.updatedAt,
69
+ hasTranscript: recording.hasTranscript,
70
+ transcriptStatus: recording.transcriptStatus,
71
+ duration: recording.duration,
72
+ title: recording.title,
73
+ })
74
+ return crypto.createHash('sha256').update(key).digest('hex').slice(0, 16)
75
+ }
76
+
77
+ markComplete(
78
+ recordingId: string,
79
+ recordedAt: string,
80
+ opts: { hasAudio: boolean; hasTranscript: boolean; contentHash: string },
81
+ ): void {
82
+ this.state.recordings[recordingId] = {
83
+ recordedAt,
84
+ contentHash: opts.contentHash,
85
+ downloadedAt: new Date().toISOString(),
86
+ hasAudio: opts.hasAudio,
87
+ hasTranscript: opts.hasTranscript,
88
+ verified: false,
89
+ }
90
+ }
91
+
92
+ markVerified(recordingId: string): void {
93
+ const existing = this.state.recordings[recordingId]
94
+ if (existing) {
95
+ existing.verified = true
96
+ existing.verifiedAt = new Date().toISOString()
97
+ }
98
+ }
99
+
100
+ getRecordingState(recordingId: string): RecordingState | undefined {
101
+ return this.state.recordings[recordingId]
102
+ }
103
+
104
+ getAllRecordingIds(): string[] {
105
+ return Object.keys(this.state.recordings)
106
+ }
107
+
108
+ get lastSuccessfulSyncAt(): string | undefined {
109
+ return this.state.lastSuccessfulSyncAt
110
+ }
111
+ }