@alta-foundation/plaud-extractor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/.env.example +9 -0
  2. package/.github/workflows/ci.yml +33 -0
  3. package/.github/workflows/publish.yml +46 -0
  4. package/CLAUDE.md +53 -0
  5. package/README.md +318 -0
  6. package/dist/PlaudExtractor.d.ts +61 -0
  7. package/dist/PlaudExtractor.d.ts.map +1 -0
  8. package/dist/PlaudExtractor.js +236 -0
  9. package/dist/PlaudExtractor.js.map +1 -0
  10. package/dist/auth/browser-auth.d.ts +10 -0
  11. package/dist/auth/browser-auth.d.ts.map +1 -0
  12. package/dist/auth/browser-auth.js +220 -0
  13. package/dist/auth/browser-auth.js.map +1 -0
  14. package/dist/auth/token-store.d.ts +9 -0
  15. package/dist/auth/token-store.d.ts.map +1 -0
  16. package/dist/auth/token-store.js +74 -0
  17. package/dist/auth/token-store.js.map +1 -0
  18. package/dist/auth/types.d.ts +266 -0
  19. package/dist/auth/types.d.ts.map +1 -0
  20. package/dist/auth/types.js +32 -0
  21. package/dist/auth/types.js.map +1 -0
  22. package/dist/cli/bin.d.ts +3 -0
  23. package/dist/cli/bin.d.ts.map +1 -0
  24. package/dist/cli/bin.js +30 -0
  25. package/dist/cli/bin.js.map +1 -0
  26. package/dist/cli/commands/auth.d.ts +3 -0
  27. package/dist/cli/commands/auth.d.ts.map +1 -0
  28. package/dist/cli/commands/auth.js +22 -0
  29. package/dist/cli/commands/auth.js.map +1 -0
  30. package/dist/cli/commands/backfill.d.ts +3 -0
  31. package/dist/cli/commands/backfill.d.ts.map +1 -0
  32. package/dist/cli/commands/backfill.js +59 -0
  33. package/dist/cli/commands/backfill.js.map +1 -0
  34. package/dist/cli/commands/sync.d.ts +3 -0
  35. package/dist/cli/commands/sync.d.ts.map +1 -0
  36. package/dist/cli/commands/sync.js +55 -0
  37. package/dist/cli/commands/sync.js.map +1 -0
  38. package/dist/cli/commands/verify.d.ts +3 -0
  39. package/dist/cli/commands/verify.d.ts.map +1 -0
  40. package/dist/cli/commands/verify.js +28 -0
  41. package/dist/cli/commands/verify.js.map +1 -0
  42. package/dist/cli/exit-codes.d.ts +8 -0
  43. package/dist/cli/exit-codes.d.ts.map +1 -0
  44. package/dist/cli/exit-codes.js +16 -0
  45. package/dist/cli/exit-codes.js.map +1 -0
  46. package/dist/cli/options.d.ts +31 -0
  47. package/dist/cli/options.d.ts.map +1 -0
  48. package/dist/cli/options.js +11 -0
  49. package/dist/cli/options.js.map +1 -0
  50. package/dist/client/endpoints.d.ts +26 -0
  51. package/dist/client/endpoints.d.ts.map +1 -0
  52. package/dist/client/endpoints.js +54 -0
  53. package/dist/client/endpoints.js.map +1 -0
  54. package/dist/client/http.d.ts +17 -0
  55. package/dist/client/http.d.ts.map +1 -0
  56. package/dist/client/http.js +92 -0
  57. package/dist/client/http.js.map +1 -0
  58. package/dist/client/plaud-client.d.ts +14 -0
  59. package/dist/client/plaud-client.d.ts.map +1 -0
  60. package/dist/client/plaud-client.js +216 -0
  61. package/dist/client/plaud-client.js.map +1 -0
  62. package/dist/client/types.d.ts +154 -0
  63. package/dist/client/types.d.ts.map +1 -0
  64. package/dist/client/types.js +41 -0
  65. package/dist/client/types.js.map +1 -0
  66. package/dist/errors.d.ts +24 -0
  67. package/dist/errors.d.ts.map +1 -0
  68. package/dist/errors.js +51 -0
  69. package/dist/errors.js.map +1 -0
  70. package/dist/index.d.ts +7 -0
  71. package/dist/index.d.ts.map +1 -0
  72. package/dist/index.js +5 -0
  73. package/dist/index.js.map +1 -0
  74. package/dist/logger.d.ts +9 -0
  75. package/dist/logger.d.ts.map +1 -0
  76. package/dist/logger.js +37 -0
  77. package/dist/logger.js.map +1 -0
  78. package/dist/mcp/job-tools.d.ts +3 -0
  79. package/dist/mcp/job-tools.d.ts.map +1 -0
  80. package/dist/mcp/job-tools.js +108 -0
  81. package/dist/mcp/job-tools.js.map +1 -0
  82. package/dist/mcp/read-tools.d.ts +3 -0
  83. package/dist/mcp/read-tools.d.ts.map +1 -0
  84. package/dist/mcp/read-tools.js +173 -0
  85. package/dist/mcp/read-tools.js.map +1 -0
  86. package/dist/mcp/server.d.ts +3 -0
  87. package/dist/mcp/server.d.ts.map +1 -0
  88. package/dist/mcp/server.js +32 -0
  89. package/dist/mcp/server.js.map +1 -0
  90. package/dist/storage/atomic.d.ts +5 -0
  91. package/dist/storage/atomic.d.ts.map +1 -0
  92. package/dist/storage/atomic.js +51 -0
  93. package/dist/storage/atomic.js.map +1 -0
  94. package/dist/storage/checksums.d.ts +15 -0
  95. package/dist/storage/checksums.d.ts.map +1 -0
  96. package/dist/storage/checksums.js +56 -0
  97. package/dist/storage/checksums.js.map +1 -0
  98. package/dist/storage/dataset-writer.d.ts +21 -0
  99. package/dist/storage/dataset-writer.d.ts.map +1 -0
  100. package/dist/storage/dataset-writer.js +52 -0
  101. package/dist/storage/dataset-writer.js.map +1 -0
  102. package/dist/storage/paths.d.ts +9 -0
  103. package/dist/storage/paths.d.ts.map +1 -0
  104. package/dist/storage/paths.js +38 -0
  105. package/dist/storage/paths.js.map +1 -0
  106. package/dist/storage/recording-store.d.ts +24 -0
  107. package/dist/storage/recording-store.d.ts.map +1 -0
  108. package/dist/storage/recording-store.js +161 -0
  109. package/dist/storage/recording-store.js.map +1 -0
  110. package/dist/sync/download-queue.d.ts +21 -0
  111. package/dist/sync/download-queue.d.ts.map +1 -0
  112. package/dist/sync/download-queue.js +82 -0
  113. package/dist/sync/download-queue.js.map +1 -0
  114. package/dist/sync/incremental.d.ts +21 -0
  115. package/dist/sync/incremental.d.ts.map +1 -0
  116. package/dist/sync/incremental.js +96 -0
  117. package/dist/sync/incremental.js.map +1 -0
  118. package/dist/sync/sync-engine.d.ts +6 -0
  119. package/dist/sync/sync-engine.d.ts.map +1 -0
  120. package/dist/sync/sync-engine.js +135 -0
  121. package/dist/sync/sync-engine.js.map +1 -0
  122. package/dist/sync/types.d.ts +130 -0
  123. package/dist/sync/types.d.ts.map +1 -0
  124. package/dist/sync/types.js +17 -0
  125. package/dist/sync/types.js.map +1 -0
  126. package/dist/transcript/formatter.d.ts +4 -0
  127. package/dist/transcript/formatter.d.ts.map +1 -0
  128. package/dist/transcript/formatter.js +88 -0
  129. package/dist/transcript/formatter.js.map +1 -0
  130. package/package.json +41 -0
  131. package/src/PlaudExtractor.ts +275 -0
  132. package/src/auth/browser-auth.ts +248 -0
  133. package/src/auth/token-store.ts +79 -0
  134. package/src/auth/types.ts +41 -0
  135. package/src/cli/bin.ts +30 -0
  136. package/src/cli/commands/auth.ts +27 -0
  137. package/src/cli/commands/backfill.ts +77 -0
  138. package/src/cli/commands/sync.ts +71 -0
  139. package/src/cli/commands/verify.ts +31 -0
  140. package/src/cli/exit-codes.ts +14 -0
  141. package/src/cli/options.ts +10 -0
  142. package/src/client/endpoints.ts +62 -0
  143. package/src/client/http.ts +110 -0
  144. package/src/client/plaud-client.ts +268 -0
  145. package/src/client/types.ts +62 -0
  146. package/src/errors.ts +57 -0
  147. package/src/index.ts +17 -0
  148. package/src/logger.ts +49 -0
  149. package/src/mcp/job-tools.ts +156 -0
  150. package/src/mcp/read-tools.ts +204 -0
  151. package/src/mcp/server.ts +39 -0
  152. package/src/storage/atomic.ts +51 -0
  153. package/src/storage/checksums.ts +76 -0
  154. package/src/storage/dataset-writer.ts +74 -0
  155. package/src/storage/paths.ts +44 -0
  156. package/src/storage/recording-store.ts +182 -0
  157. package/src/sync/download-queue.ts +102 -0
  158. package/src/sync/incremental.ts +111 -0
  159. package/src/sync/sync-engine.ts +183 -0
  160. package/src/sync/types.ts +64 -0
  161. package/src/transcript/formatter.ts +91 -0
  162. package/tsconfig.build.json +8 -0
  163. package/tsconfig.json +19 -0
@@ -0,0 +1,275 @@
1
+ import path from 'node:path'
2
+ import fs from 'node:fs/promises'
3
+ import os from 'node:os'
4
+ import { createLogger, setLogger, type Logger } from './logger.js'
5
+ import { loadCredentials, saveCredentials, isExpired } from './auth/token-store.js'
6
+ import { runBrowserAuth, type BrowserAuthOptions } from './auth/browser-auth.js'
7
+ import { PlaudApiClient } from './client/plaud-client.js'
8
+ import { SyncEngine } from './sync/sync-engine.js'
9
+ import { IncrementalTracker } from './sync/incremental.js'
10
+ import { RecordingStore } from './storage/recording-store.js'
11
+ import { verifyChecksums } from './storage/checksums.js'
12
+ import { recordingDir, defaultOutDir } from './storage/paths.js'
13
+ import { AuthError } from './errors.js'
14
+ import type { SyncOptions, SyncResult, BackfillOptions, VerifyResult } from './sync/types.js'
15
+
16
+ export interface PlaudExtractorConfig {
17
+ /** Output directory for recordings. Default: ~/alta/data/plaud */
18
+ outDir?: string
19
+ /** Inject a custom pino logger (e.g., from Alta CORE) */
20
+ logger?: Logger
21
+ /** Verbose logging */
22
+ verbose?: boolean
23
+ /** Redact tokens from logs */
24
+ redact?: boolean
25
+ }
26
+
27
+ export class PlaudExtractor {
28
+ private readonly outDir: string
29
+ private readonly engine: SyncEngine
30
+
31
+ constructor(config: PlaudExtractorConfig = {}) {
32
+ this.outDir = config.outDir
33
+ ? path.resolve(config.outDir.replace(/^~/, os.homedir()))
34
+ : defaultOutDir()
35
+
36
+ if (config.logger) {
37
+ setLogger(config.logger)
38
+ } else {
39
+ createLogger(this.outDir, { verbose: config.verbose, redact: config.redact })
40
+ }
41
+
42
+ this.engine = new SyncEngine()
43
+ }
44
+
45
+ /**
46
+ * Launch browser for authentication.
47
+ * Saves credentials to ~/.alta/plaud-auth.json.
48
+ */
49
+ async authenticate(opts: BrowserAuthOptions = {}): Promise<void> {
50
+ const session = await runBrowserAuth(opts)
51
+ await saveCredentials(session)
52
+ }
53
+
54
+ /**
55
+ * Check if credentials exist and are not expired.
56
+ */
57
+ async isAuthenticated(): Promise<boolean> {
58
+ const creds = await loadCredentials()
59
+ if (!creds) return false
60
+ if (isExpired(creds)) return false
61
+ return true
62
+ }
63
+
64
+ /**
65
+ * Incremental sync: only download new or changed recordings since last run.
66
+ * If the token expires mid-sync, re-authenticates automatically and retries once.
67
+ */
68
+ async sync(opts: Partial<SyncOptions> = {}): Promise<SyncResult> {
69
+ return this.runWithReauth(opts, 'sync')
70
+ }
71
+
72
+ /**
73
+ * Full backfill: re-evaluate all recordings regardless of sync state.
74
+ * If the token expires mid-backfill, re-authenticates automatically and retries once.
75
+ */
76
+ async backfill(opts: Partial<BackfillOptions> = {}): Promise<SyncResult> {
77
+ return this.runWithReauth(opts, 'backfill')
78
+ }
79
+
80
+ /**
81
+ * Run sync/backfill, and if a token-expired AuthError occurs mid-run,
82
+ * automatically re-authenticate and retry once.
83
+ */
84
+ private async runWithReauth(
85
+ opts: Partial<SyncOptions>,
86
+ mode: 'sync' | 'backfill',
87
+ ): Promise<SyncResult> {
88
+ try {
89
+ const client = await this.buildClient()
90
+ return await this.engine.run(client, this.buildSyncOptions(opts), mode)
91
+ } catch (err) {
92
+ if (!(err instanceof AuthError)) throw err
93
+
94
+ // Token expired or rejected mid-run — re-authenticate and try once more
95
+ console.error('\nSession expired during sync. Re-authenticating...')
96
+ await this.authenticate()
97
+ console.log('Re-authenticated. Resuming sync...\n')
98
+
99
+ const client = await this.buildClient()
100
+ return this.engine.run(client, this.buildSyncOptions(opts), mode)
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Walk all recording folders and verify checksums.
106
+ * With repair=true, re-download any file with a mismatch.
107
+ */
108
+ async verify(opts: { repair?: boolean } = {}): Promise<VerifyResult> {
109
+ const client = opts.repair ? await this.buildClient() : null
110
+ const tracker = new IncrementalTracker()
111
+ await tracker.load(this.outDir)
112
+
113
+ const result: VerifyResult = { scanned: 0, ok: 0, failed: 0, repaired: 0, issues: [] }
114
+ const recordingIds = tracker.getAllRecordingIds()
115
+
116
+ for (const id of recordingIds) {
117
+ const state = tracker.getRecordingState(id)
118
+ if (!state) continue
119
+
120
+ const dir = recordingDir(this.outDir, state.recordedAt, id)
121
+ result.scanned++
122
+
123
+ try {
124
+ const mismatches = await verifyChecksums(dir)
125
+ if (mismatches.length === 0) {
126
+ result.ok++
127
+ tracker.markVerified(id)
128
+ } else {
129
+ result.failed++
130
+ for (const m of mismatches) {
131
+ result.issues.push({
132
+ recordingId: id,
133
+ file: path.basename(m.filePath),
134
+ issue: `checksum mismatch (expected: ${m.expected.slice(0, 8)}..., got: ${m.actual === 'MISSING' ? 'MISSING' : m.actual.slice(0, 8) + '...'})`,
135
+ })
136
+ }
137
+
138
+ // TODO: repair support requires re-fetching the recording object
139
+ // For now, log the mismatch
140
+ }
141
+ } catch (err) {
142
+ result.failed++
143
+ result.issues.push({ recordingId: id, file: '', issue: String(err) })
144
+ }
145
+ }
146
+
147
+ await tracker.persist(this.outDir)
148
+ return result
149
+ }
150
+
151
+ /**
152
+ * Export all local recordings to a JSONL dataset file.
153
+ * Returns the path to the generated file.
154
+ */
155
+ async exportDataset(opts: { format?: 'jsonl' } = {}): Promise<string> {
156
+ const { DatasetWriter } = await import('./storage/dataset-writer.js')
157
+ const { default: fsSync } = await import('node:fs')
158
+
159
+ // Walk recordings dir and collect existing transcript data
160
+ const datasetWriter = new DatasetWriter(this.outDir)
161
+ await datasetWriter.open()
162
+
163
+ // Re-generate from existing transcript.json files on disk
164
+ const recordingsBase = path.join(this.outDir, 'recordings')
165
+ try {
166
+ await this.walkAndExport(recordingsBase, datasetWriter)
167
+ } finally {
168
+ await datasetWriter.close()
169
+ }
170
+
171
+ return datasetWriter.path
172
+ }
173
+
174
+ private async walkAndExport(
175
+ recordingsBase: string,
176
+ dataset: InstanceType<typeof import('./storage/dataset-writer.js').DatasetWriter>,
177
+ ): Promise<void> {
178
+ const { PlaudRecordingSchema } = await import('./client/types.js')
179
+ const { PlaudTranscriptSchema } = await import('./client/types.js')
180
+
181
+ // Walk year/month/dir structure
182
+ let yearDirs: string[]
183
+ try {
184
+ yearDirs = await fs.readdir(recordingsBase)
185
+ } catch {
186
+ return
187
+ }
188
+
189
+ for (const year of yearDirs) {
190
+ const yearPath = path.join(recordingsBase, year)
191
+ let monthDirs: string[]
192
+ try {
193
+ monthDirs = await fs.readdir(yearPath)
194
+ } catch {
195
+ continue
196
+ }
197
+
198
+ for (const month of monthDirs) {
199
+ const monthPath = path.join(yearPath, month)
200
+ let recDirs: string[]
201
+ try {
202
+ recDirs = await fs.readdir(monthPath)
203
+ } catch {
204
+ continue
205
+ }
206
+
207
+ for (const recDir of recDirs) {
208
+ const recPath = path.join(monthPath, recDir)
209
+ try {
210
+ const metaRaw = await fs.readFile(path.join(recPath, 'meta.json'), 'utf8')
211
+ const transcriptRaw = await fs.readFile(path.join(recPath, 'transcript.json'), 'utf8')
212
+ const meta = JSON.parse(metaRaw) as Record<string, unknown>
213
+ const transcriptData = JSON.parse(transcriptRaw) as Record<string, unknown>
214
+
215
+ // Reconstruct minimal PlaudRecording from meta.json
216
+ const recording = PlaudRecordingSchema.parse({
217
+ id: meta['source_recording_id'],
218
+ title: meta['title'],
219
+ duration: meta['duration_seconds'],
220
+ recordedAt: meta['recorded_at'],
221
+ createdAt: meta['recorded_at'],
222
+ updatedAt: meta['recorded_at'],
223
+ hasTranscript: true,
224
+ _raw: meta,
225
+ })
226
+
227
+ const fullText = ((transcriptData['segments'] ?? []) as Array<{ text?: string }>)
228
+ .map(s => s.text ?? '')
229
+ .filter(Boolean)
230
+ .join('\n\n')
231
+
232
+ const transcript = PlaudTranscriptSchema.parse({
233
+ recordingId: String(meta['source_recording_id'] ?? ''),
234
+ duration: Number(meta['duration_seconds'] ?? 0),
235
+ segments: transcriptData['segments'] ?? [],
236
+ fullText,
237
+ _raw: transcriptData,
238
+ })
239
+
240
+ await dataset.append(this.outDir, recording, transcript)
241
+ } catch {
242
+ // Skip recordings with missing/invalid files
243
+ }
244
+ }
245
+ }
246
+ }
247
+ }
248
+
249
+ private async buildClient(): Promise<PlaudApiClient> {
250
+ const creds = await loadCredentials()
251
+ if (!creds) {
252
+ throw new AuthError("No credentials found — run 'alta-plaud auth' to authenticate")
253
+ }
254
+ if (isExpired(creds)) {
255
+ throw new AuthError("Credentials expired — run 'alta-plaud auth' to re-authenticate")
256
+ }
257
+ return new PlaudApiClient(creds)
258
+ }
259
+
260
+ private buildSyncOptions(partial: Partial<SyncOptions>): SyncOptions {
261
+ return {
262
+ outDir: this.outDir,
263
+ since: partial.since,
264
+ limit: partial.limit,
265
+ concurrency: partial.concurrency ?? 3,
266
+ formats: partial.formats ?? ['json', 'txt', 'md'],
267
+ includeDataset: partial.includeDataset ?? true,
268
+ dryRun: partial.dryRun ?? false,
269
+ }
270
+ }
271
+
272
+ get dataDir(): string {
273
+ return this.outDir
274
+ }
275
+ }
@@ -0,0 +1,248 @@
1
+ import { execSync } from 'node:child_process'
2
+ import { chromium, type Page, type Request as PWRequest, type BrowserContext } from 'playwright'
3
+ import { AuthError } from '../errors.js'
4
+ import { getLogger } from '../logger.js'
5
+ import { loadCredentials } from './token-store.js'
6
+ import { extractRegionalBaseUrl } from '../client/endpoints.js'
7
+ import type { AuthSession, EndpointMap } from './types.js'
8
+
9
+ const PLAUD_APP_URL = 'https://web.plaud.ai'
10
+
11
+ export interface BrowserAuthOptions {
12
+ headless?: boolean
13
+ email?: string
14
+ password?: string
15
+ /** How long to wait for the user to log in (ms). Default: 5 minutes. */
16
+ loginTimeoutMs?: number
17
+ }
18
+
19
+ export async function runBrowserAuth(opts: BrowserAuthOptions = {}): Promise<AuthSession> {
20
+ const log = getLogger()
21
+ const launchOpts = {
22
+ channel: 'chrome' as const,
23
+ headless: opts.headless ?? false,
24
+ args: ['--disable-blink-features=AutomationControlled'],
25
+ }
26
+
27
+ const browser = await chromium.launch(launchOpts).catch(async err => {
28
+ const msg = String(err)
29
+ if (msg.includes("Executable doesn't exist") || msg.includes('not found')) {
30
+ log.warn('System Chrome not found, falling back to Playwright Chromium (Google OAuth may be blocked)')
31
+ return chromium.launch({ headless: opts.headless ?? false }).catch(err2 => {
32
+ if (String(err2).includes("Executable doesn't exist")) {
33
+ log.info('Installing Playwright Chromium (one-time setup)...')
34
+ execSync('npx playwright install chromium', { stdio: 'inherit' })
35
+ return chromium.launch({ headless: opts.headless ?? false })
36
+ }
37
+ throw err2
38
+ })
39
+ }
40
+ throw err
41
+ })
42
+
43
+ const context = await browser.newContext({ userAgent: undefined })
44
+ const page = await context.newPage()
45
+
46
+ // Remove webdriver property that Google checks for automation detection
47
+ await page.addInitScript(
48
+ 'Object.defineProperty(navigator, "webdriver", { get: () => undefined })',
49
+ )
50
+
51
+ // Inject existing plaud.ai cookies so we don't need a fresh login if session is still valid
52
+ await injectExistingCookies(context)
53
+
54
+ try {
55
+ log.info('Opening Plaud...')
56
+
57
+ // Set up Bearer token capture BEFORE navigation — the SPA fires API calls on load
58
+ const loginTimeoutMs = opts.loginTimeoutMs ?? 5 * 60_000
59
+ const bearerTokenCapture = captureBearerToken(page, loginTimeoutMs, log)
60
+
61
+ await page.goto(PLAUD_APP_URL, { waitUntil: 'domcontentloaded' })
62
+ // Give SPA time to initialize and run its auth check (may redirect to /login)
63
+ await page.waitForLoadState('networkidle', { timeout: 10_000 }).catch(() => {})
64
+
65
+ if (opts.email && opts.password) {
66
+ await automatedLogin(page, opts.email, opts.password)
67
+ } else if (isLoginUrl(page.url())) {
68
+ // Not logged in — prompt user and wait
69
+ console.log('\n──────────────────────────────────────────────────────────')
70
+ console.log(' Log in to Plaud in the browser window.')
71
+ console.log(' The browser will close automatically once connected.')
72
+ console.log(` (Waiting up to ${Math.round(loginTimeoutMs / 60_000)} minutes)`)
73
+ console.log('──────────────────────────────────────────────────────────\n')
74
+ } else {
75
+ log.info('Already connected — capturing token...')
76
+ }
77
+
78
+ // Wait for Bearer token from any API request (fires on page load if session is active,
79
+ // or after login if the user needed to authenticate)
80
+ const authToken = await bearerTokenCapture
81
+ log.info('Bearer token captured — closing browser')
82
+
83
+ const cookies = await context.cookies()
84
+ // Close browser without blocking — Chrome can take a long time to flush its profile
85
+ void browser.close().catch(() => {})
86
+
87
+ // Discover the correct regional API base URL (e.g. api-euc1.plaud.ai for EU users)
88
+ const apiBaseUrl = await discoverApiRegion(authToken)
89
+ log.info({ apiBaseUrl }, 'Regional API base URL discovered')
90
+
91
+ return {
92
+ cookies: cookies.map(c => ({
93
+ name: c.name,
94
+ value: c.value,
95
+ domain: c.domain,
96
+ path: c.path,
97
+ httpOnly: c.httpOnly,
98
+ secure: c.secure,
99
+ sameSite: c.sameSite as 'Strict' | 'Lax' | 'None' | undefined,
100
+ expires: c.expires && c.expires > 0 ? c.expires : undefined,
101
+ })),
102
+ authToken,
103
+ apiBaseUrl,
104
+ capturedAt: new Date().toISOString(),
105
+ endpointMap: buildEndpointMap(apiBaseUrl),
106
+ }
107
+ } catch (err) {
108
+ await browser.close().catch(() => {})
109
+ throw err
110
+ }
111
+ }
112
+
113
+ // ─── Helpers ──────────────────────────────────────────────────────────────────
114
+
115
+ /**
116
+ * Inject plaud.ai cookies from the previous auth session so the browser picks up
117
+ * an existing session without requiring the user to log in again.
118
+ */
119
+ async function injectExistingCookies(context: BrowserContext): Promise<void> {
120
+ const log = getLogger()
121
+ const existing = await loadCredentials().catch(() => null)
122
+ if (!existing?.cookies?.length) return
123
+
124
+ const plaudCookies = existing.cookies.filter(
125
+ c => c.domain === 'web.plaud.ai' || c.domain.endsWith('.plaud.ai') || c.domain === 'plaud.ai',
126
+ )
127
+ if (plaudCookies.length === 0) return
128
+
129
+ try {
130
+ await context.addCookies(
131
+ plaudCookies.map(c => ({
132
+ name: c.name,
133
+ value: c.value,
134
+ domain: c.domain,
135
+ path: c.path,
136
+ httpOnly: c.httpOnly,
137
+ secure: c.secure,
138
+ sameSite: (c.sameSite ?? 'Lax') as 'Strict' | 'Lax' | 'None',
139
+ expires: c.expires ?? -1,
140
+ })),
141
+ )
142
+ log.debug({ count: plaudCookies.length }, 'Injected existing session cookies')
143
+ } catch (err) {
144
+ log.debug({ err }, 'Could not inject existing cookies — fresh login required')
145
+ }
146
+ }
147
+
148
+ /**
149
+ * Wait for the first API request that carries a Bearer token.
150
+ * This fires automatically when:
151
+ * - The page loads with an existing authenticated session (cookies restored)
152
+ * - The user completes login via Google OAuth or email
153
+ *
154
+ * Resolves with the raw token string (without "bearer " prefix).
155
+ */
156
+ function captureBearerToken(page: Page, timeoutMs: number, log: ReturnType<typeof getLogger>): Promise<string> {
157
+ return new Promise((resolve, reject) => {
158
+ const timer = setTimeout(() => {
159
+ page.off('request', handler)
160
+ reject(new AuthError(`Login timeout after ${Math.round(timeoutMs / 60_000)} minutes — no token captured`))
161
+ }, timeoutMs)
162
+
163
+ const handler = (req: PWRequest) => {
164
+ const auth = req.headers()['authorization'] ?? req.headers()['Authorization']
165
+ if (!auth) return
166
+ const token = auth.replace(/^bearer\s+/i, '').trim()
167
+ // Basic sanity check: JWT has 3 parts separated by dots
168
+ if (token.split('.').length === 3) {
169
+ clearTimeout(timer)
170
+ page.off('request', handler)
171
+ log.debug({ url: req.url() }, 'Bearer token found in request')
172
+ resolve(token)
173
+ }
174
+ }
175
+
176
+ page.on('request', handler)
177
+ })
178
+ }
179
+
180
+ /** Discover the correct regional API base URL (e.g. https://api-euc1.plaud.ai). */
181
+ async function discoverApiRegion(token: string): Promise<string> {
182
+ const log = getLogger()
183
+ try {
184
+ // The global endpoint returns a region-redirect response pointing to the right server
185
+ const res = await fetch('https://api.plaud.ai/user/me', {
186
+ headers: {
187
+ 'Authorization': `bearer ${token}`,
188
+ 'app-platform': 'web',
189
+ 'Origin': 'https://web.plaud.ai',
190
+ },
191
+ })
192
+ const body = await res.json()
193
+ const regional = extractRegionalBaseUrl(body)
194
+ if (regional) return regional
195
+
196
+ // If the global endpoint returns user data directly (no redirect), it IS the right base
197
+ if ((body as Record<string, unknown>)?.data_user) return 'https://api.plaud.ai'
198
+ } catch (err) {
199
+ log.debug({ err }, 'Region discovery failed — using global API')
200
+ }
201
+ return 'https://api.plaud.ai'
202
+ }
203
+
204
+ /** Build the complete endpoint map from the known regional API base URL. */
205
+ function buildEndpointMap(apiBaseUrl: string): EndpointMap {
206
+ return {
207
+ listRecordings: `${apiBaseUrl}/file/simple/web`,
208
+ batchDetail: `${apiBaseUrl}/file/list`,
209
+ getAudioUrl: `${apiBaseUrl}/file/temp-url`,
210
+ userProfile: `${apiBaseUrl}/user/me`,
211
+ apiBaseUrl,
212
+ }
213
+ }
214
+
215
+ function isLoginUrl(url: string): boolean {
216
+ try {
217
+ const p = new URL(url).pathname
218
+ return p.startsWith('/login') || p.startsWith('/signin') || p.startsWith('/auth')
219
+ } catch {
220
+ return false
221
+ }
222
+ }
223
+
224
+ async function automatedLogin(page: Page, email: string, password: string): Promise<void> {
225
+ const log = getLogger()
226
+ log.info('Attempting automated login...')
227
+
228
+ const emailSelectors = [
229
+ 'input[type="email"]', 'input[name="email"]',
230
+ 'input[name="username"]', '[data-testid="email"]', '#email',
231
+ ]
232
+ const passwordSelectors = [
233
+ 'input[type="password"]', 'input[name="password"]',
234
+ '[data-testid="password"]', '#password',
235
+ ]
236
+
237
+ for (const sel of emailSelectors) {
238
+ if (await page.locator(sel).count() > 0) { await page.fill(sel, email); break }
239
+ }
240
+ for (const sel of passwordSelectors) {
241
+ if (await page.locator(sel).count() > 0) { await page.fill(sel, password); break }
242
+ }
243
+
244
+ await page.click(
245
+ 'button[type="submit"], [type="submit"], button:has-text("Login"), button:has-text("Sign in")',
246
+ )
247
+ await page.waitForLoadState('networkidle', { timeout: 15_000 }).catch(() => undefined)
248
+ }
@@ -0,0 +1,79 @@
1
+ import fs from 'node:fs/promises'
2
+ import { authTokenPath } from '../storage/paths.js'
3
+ import { StoredCredentialsSchema, type AuthSession, type StoredCredentials } from './types.js'
4
+ import { writeFileAtomic } from '../storage/atomic.js'
5
+ import { getLogger } from '../logger.js'
6
+
7
+ export async function loadCredentials(): Promise<StoredCredentials | null> {
8
+ const tokenPath = authTokenPath()
9
+ try {
10
+ const raw = await fs.readFile(tokenPath, 'utf8')
11
+ const json = JSON.parse(raw)
12
+ const result = StoredCredentialsSchema.safeParse(json)
13
+ if (!result.success) {
14
+ getLogger().warn({ issues: result.error.issues }, 'Stored credentials failed schema validation — re-authenticate')
15
+ return null
16
+ }
17
+ return result.data
18
+ } catch (err: unknown) {
19
+ if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null
20
+ getLogger().warn({ err }, 'Failed to read credentials file')
21
+ return null
22
+ }
23
+ }
24
+
25
+ export async function saveCredentials(session: AuthSession): Promise<void> {
26
+ const tokenPath = authTokenPath()
27
+ const stored: StoredCredentials = { ...session, schemaVersion: 1 }
28
+ await writeFileAtomic(tokenPath, JSON.stringify(stored, null, 2))
29
+ getLogger().info({ path: tokenPath }, 'Auth credentials saved')
30
+ }
31
+
32
+ /** Returns true if the stored credentials are expired. */
33
+ export function isExpired(creds: StoredCredentials): boolean {
34
+ const now = Date.now()
35
+
36
+ // Explicit expiresAt takes precedence
37
+ if (creds.expiresAt) {
38
+ return now > new Date(creds.expiresAt).getTime()
39
+ }
40
+
41
+ // If we have a JWT bearer token, decode the exp claim (most reliable)
42
+ if (creds.authToken) {
43
+ const jwtExp = decodeJwtExp(creds.authToken)
44
+ if (jwtExp !== null) return now > jwtExp * 1000
45
+ }
46
+
47
+ // Fallback: check only plaud.ai session cookies (ignore analytics/CDN cookies
48
+ // which have short TTLs and would cause false "expired" readings)
49
+ const plaudCookies = creds.cookies.filter(
50
+ c => c.expires && c.expires > 0 && (c.domain.endsWith('.plaud.ai') || c.domain === 'plaud.ai')
51
+ )
52
+ if (plaudCookies.length > 0) {
53
+ const minExpiry = Math.min(...plaudCookies.map(c => (c.expires ?? 0) * 1000))
54
+ if (minExpiry > 0 && now > minExpiry) return true
55
+ }
56
+
57
+ // Last resort: treat as expired after 30 days
58
+ const capturedAt = new Date(creds.capturedAt).getTime()
59
+ return now - capturedAt > 30 * 24 * 60 * 60 * 1000
60
+ }
61
+
62
+ /** Decode the `exp` claim from a JWT (no signature verification — just decode). */
63
+ function decodeJwtExp(token: string): number | null {
64
+ try {
65
+ const parts = token.split('.')
66
+ if (parts.length !== 3) return null
67
+ const payload = JSON.parse(Buffer.from(parts[1]!, 'base64url').toString('utf8')) as Record<string, unknown>
68
+ const exp = payload['exp']
69
+ return typeof exp === 'number' ? exp : null
70
+ } catch {
71
+ return null
72
+ }
73
+ }
74
+
75
+ export function cookieHeader(creds: StoredCredentials): string {
76
+ return creds.cookies.map(c => `${c.name}=${c.value}`).join('; ')
77
+ }
78
+
79
+ export { authTokenPath }
@@ -0,0 +1,41 @@
1
+ import { z } from 'zod'
2
+
3
+ export const CookieSchema = z.object({
4
+ name: z.string(),
5
+ value: z.string(),
6
+ domain: z.string(),
7
+ path: z.string(),
8
+ httpOnly: z.boolean(),
9
+ secure: z.boolean(),
10
+ sameSite: z.enum(['Strict', 'Lax', 'None']).optional(),
11
+ expires: z.number().optional(),
12
+ })
13
+
14
+ export const EndpointMapSchema = z.object({
15
+ listRecordings: z.string().optional(), // GET /file/simple/web
16
+ batchDetail: z.string().optional(), // POST /file/list
17
+ getAudioUrl: z.string().optional(), // GET /file/temp-url/<id>
18
+ userProfile: z.string().optional(), // GET /user/me
19
+ apiBaseUrl: z.string().optional(),
20
+ /** @deprecated — transcript is embedded in the recording, not a separate endpoint */
21
+ getTranscript: z.string().optional(),
22
+ })
23
+
24
+ export type EndpointMap = z.infer<typeof EndpointMapSchema>
25
+
26
+ export const AuthSessionSchema = z.object({
27
+ cookies: z.array(CookieSchema),
28
+ authToken: z.string().optional(),
29
+ apiBaseUrl: z.string(),
30
+ capturedAt: z.string().datetime(),
31
+ expiresAt: z.string().datetime().optional(),
32
+ endpointMap: EndpointMapSchema.optional(),
33
+ })
34
+
35
+ export type AuthSession = z.infer<typeof AuthSessionSchema>
36
+
37
+ export const StoredCredentialsSchema = AuthSessionSchema.extend({
38
+ schemaVersion: z.literal(1),
39
+ })
40
+
41
+ export type StoredCredentials = z.infer<typeof StoredCredentialsSchema>
package/src/cli/bin.ts ADDED
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander'
3
+ import { registerAuthCommand } from './commands/auth.js'
4
+ import { registerSyncCommand } from './commands/sync.js'
5
+ import { registerBackfillCommand } from './commands/backfill.js'
6
+ import { registerVerifyCommand } from './commands/verify.js'
7
+ import { ExitCode, toExitCode } from './exit-codes.js'
8
+
9
+ const program = new Command()
10
+ .name('alta-plaud')
11
+ .description('Export recordings, transcripts, and metadata from Plaud')
12
+ .version('1.0.0')
13
+ .helpOption('-h, --help', 'Show help')
14
+
15
+ registerAuthCommand(program)
16
+ registerSyncCommand(program)
17
+ registerBackfillCommand(program)
18
+ registerVerifyCommand(program)
19
+
20
+ program.parseAsync(process.argv).catch((err: unknown) => {
21
+ // This is the only place in the codebase where process.exit() is called.
22
+ const code = toExitCode(err)
23
+ if (err instanceof Error) {
24
+ console.error(`\nError: ${err.message}`)
25
+ if (process.env['DEBUG']) console.error(err.stack)
26
+ } else {
27
+ console.error(`\nUnexpected error: ${String(err)}`)
28
+ }
29
+ process.exit(code)
30
+ })