@alta-foundation/plaud-extractor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +9 -0
- package/.github/workflows/ci.yml +33 -0
- package/.github/workflows/publish.yml +46 -0
- package/CLAUDE.md +53 -0
- package/README.md +318 -0
- package/dist/PlaudExtractor.d.ts +61 -0
- package/dist/PlaudExtractor.d.ts.map +1 -0
- package/dist/PlaudExtractor.js +236 -0
- package/dist/PlaudExtractor.js.map +1 -0
- package/dist/auth/browser-auth.d.ts +10 -0
- package/dist/auth/browser-auth.d.ts.map +1 -0
- package/dist/auth/browser-auth.js +220 -0
- package/dist/auth/browser-auth.js.map +1 -0
- package/dist/auth/token-store.d.ts +9 -0
- package/dist/auth/token-store.d.ts.map +1 -0
- package/dist/auth/token-store.js +74 -0
- package/dist/auth/token-store.js.map +1 -0
- package/dist/auth/types.d.ts +266 -0
- package/dist/auth/types.d.ts.map +1 -0
- package/dist/auth/types.js +32 -0
- package/dist/auth/types.js.map +1 -0
- package/dist/cli/bin.d.ts +3 -0
- package/dist/cli/bin.d.ts.map +1 -0
- package/dist/cli/bin.js +30 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/commands/auth.d.ts +3 -0
- package/dist/cli/commands/auth.d.ts.map +1 -0
- package/dist/cli/commands/auth.js +22 -0
- package/dist/cli/commands/auth.js.map +1 -0
- package/dist/cli/commands/backfill.d.ts +3 -0
- package/dist/cli/commands/backfill.d.ts.map +1 -0
- package/dist/cli/commands/backfill.js +59 -0
- package/dist/cli/commands/backfill.js.map +1 -0
- package/dist/cli/commands/sync.d.ts +3 -0
- package/dist/cli/commands/sync.d.ts.map +1 -0
- package/dist/cli/commands/sync.js +55 -0
- package/dist/cli/commands/sync.js.map +1 -0
- package/dist/cli/commands/verify.d.ts +3 -0
- package/dist/cli/commands/verify.d.ts.map +1 -0
- package/dist/cli/commands/verify.js +28 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/exit-codes.d.ts +8 -0
- package/dist/cli/exit-codes.d.ts.map +1 -0
- package/dist/cli/exit-codes.js +16 -0
- package/dist/cli/exit-codes.js.map +1 -0
- package/dist/cli/options.d.ts +31 -0
- package/dist/cli/options.d.ts.map +1 -0
- package/dist/cli/options.js +11 -0
- package/dist/cli/options.js.map +1 -0
- package/dist/client/endpoints.d.ts +26 -0
- package/dist/client/endpoints.d.ts.map +1 -0
- package/dist/client/endpoints.js +54 -0
- package/dist/client/endpoints.js.map +1 -0
- package/dist/client/http.d.ts +17 -0
- package/dist/client/http.d.ts.map +1 -0
- package/dist/client/http.js +92 -0
- package/dist/client/http.js.map +1 -0
- package/dist/client/plaud-client.d.ts +14 -0
- package/dist/client/plaud-client.d.ts.map +1 -0
- package/dist/client/plaud-client.js +216 -0
- package/dist/client/plaud-client.js.map +1 -0
- package/dist/client/types.d.ts +154 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +41 -0
- package/dist/client/types.js.map +1 -0
- package/dist/errors.d.ts +24 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +51 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +9 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +37 -0
- package/dist/logger.js.map +1 -0
- package/dist/mcp/job-tools.d.ts +3 -0
- package/dist/mcp/job-tools.d.ts.map +1 -0
- package/dist/mcp/job-tools.js +108 -0
- package/dist/mcp/job-tools.js.map +1 -0
- package/dist/mcp/read-tools.d.ts +3 -0
- package/dist/mcp/read-tools.d.ts.map +1 -0
- package/dist/mcp/read-tools.js +173 -0
- package/dist/mcp/read-tools.js.map +1 -0
- package/dist/mcp/server.d.ts +3 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +32 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/storage/atomic.d.ts +5 -0
- package/dist/storage/atomic.d.ts.map +1 -0
- package/dist/storage/atomic.js +51 -0
- package/dist/storage/atomic.js.map +1 -0
- package/dist/storage/checksums.d.ts +15 -0
- package/dist/storage/checksums.d.ts.map +1 -0
- package/dist/storage/checksums.js +56 -0
- package/dist/storage/checksums.js.map +1 -0
- package/dist/storage/dataset-writer.d.ts +21 -0
- package/dist/storage/dataset-writer.d.ts.map +1 -0
- package/dist/storage/dataset-writer.js +52 -0
- package/dist/storage/dataset-writer.js.map +1 -0
- package/dist/storage/paths.d.ts +9 -0
- package/dist/storage/paths.d.ts.map +1 -0
- package/dist/storage/paths.js +38 -0
- package/dist/storage/paths.js.map +1 -0
- package/dist/storage/recording-store.d.ts +24 -0
- package/dist/storage/recording-store.d.ts.map +1 -0
- package/dist/storage/recording-store.js +161 -0
- package/dist/storage/recording-store.js.map +1 -0
- package/dist/sync/download-queue.d.ts +21 -0
- package/dist/sync/download-queue.d.ts.map +1 -0
- package/dist/sync/download-queue.js +82 -0
- package/dist/sync/download-queue.js.map +1 -0
- package/dist/sync/incremental.d.ts +21 -0
- package/dist/sync/incremental.d.ts.map +1 -0
- package/dist/sync/incremental.js +96 -0
- package/dist/sync/incremental.js.map +1 -0
- package/dist/sync/sync-engine.d.ts +6 -0
- package/dist/sync/sync-engine.d.ts.map +1 -0
- package/dist/sync/sync-engine.js +135 -0
- package/dist/sync/sync-engine.js.map +1 -0
- package/dist/sync/types.d.ts +130 -0
- package/dist/sync/types.d.ts.map +1 -0
- package/dist/sync/types.js +17 -0
- package/dist/sync/types.js.map +1 -0
- package/dist/transcript/formatter.d.ts +4 -0
- package/dist/transcript/formatter.d.ts.map +1 -0
- package/dist/transcript/formatter.js +88 -0
- package/dist/transcript/formatter.js.map +1 -0
- package/package.json +41 -0
- package/src/PlaudExtractor.ts +275 -0
- package/src/auth/browser-auth.ts +248 -0
- package/src/auth/token-store.ts +79 -0
- package/src/auth/types.ts +41 -0
- package/src/cli/bin.ts +30 -0
- package/src/cli/commands/auth.ts +27 -0
- package/src/cli/commands/backfill.ts +77 -0
- package/src/cli/commands/sync.ts +71 -0
- package/src/cli/commands/verify.ts +31 -0
- package/src/cli/exit-codes.ts +14 -0
- package/src/cli/options.ts +10 -0
- package/src/client/endpoints.ts +62 -0
- package/src/client/http.ts +110 -0
- package/src/client/plaud-client.ts +268 -0
- package/src/client/types.ts +62 -0
- package/src/errors.ts +57 -0
- package/src/index.ts +17 -0
- package/src/logger.ts +49 -0
- package/src/mcp/job-tools.ts +156 -0
- package/src/mcp/read-tools.ts +204 -0
- package/src/mcp/server.ts +39 -0
- package/src/storage/atomic.ts +51 -0
- package/src/storage/checksums.ts +76 -0
- package/src/storage/dataset-writer.ts +74 -0
- package/src/storage/paths.ts +44 -0
- package/src/storage/recording-store.ts +182 -0
- package/src/sync/download-queue.ts +102 -0
- package/src/sync/incremental.ts +111 -0
- package/src/sync/sync-engine.ts +183 -0
- package/src/sync/types.ts +64 -0
- package/src/transcript/formatter.ts +91 -0
- package/tsconfig.build.json +8 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
import path from 'node:path'
|
|
2
|
+
import fs from 'node:fs/promises'
|
|
3
|
+
import os from 'node:os'
|
|
4
|
+
import { createLogger, setLogger, type Logger } from './logger.js'
|
|
5
|
+
import { loadCredentials, saveCredentials, isExpired } from './auth/token-store.js'
|
|
6
|
+
import { runBrowserAuth, type BrowserAuthOptions } from './auth/browser-auth.js'
|
|
7
|
+
import { PlaudApiClient } from './client/plaud-client.js'
|
|
8
|
+
import { SyncEngine } from './sync/sync-engine.js'
|
|
9
|
+
import { IncrementalTracker } from './sync/incremental.js'
|
|
10
|
+
import { RecordingStore } from './storage/recording-store.js'
|
|
11
|
+
import { verifyChecksums } from './storage/checksums.js'
|
|
12
|
+
import { recordingDir, defaultOutDir } from './storage/paths.js'
|
|
13
|
+
import { AuthError } from './errors.js'
|
|
14
|
+
import type { SyncOptions, SyncResult, BackfillOptions, VerifyResult } from './sync/types.js'
|
|
15
|
+
|
|
16
|
+
export interface PlaudExtractorConfig {
|
|
17
|
+
/** Output directory for recordings. Default: ~/alta/data/plaud */
|
|
18
|
+
outDir?: string
|
|
19
|
+
/** Inject a custom pino logger (e.g., from Alta CORE) */
|
|
20
|
+
logger?: Logger
|
|
21
|
+
/** Verbose logging */
|
|
22
|
+
verbose?: boolean
|
|
23
|
+
/** Redact tokens from logs */
|
|
24
|
+
redact?: boolean
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export class PlaudExtractor {
|
|
28
|
+
private readonly outDir: string
|
|
29
|
+
private readonly engine: SyncEngine
|
|
30
|
+
|
|
31
|
+
constructor(config: PlaudExtractorConfig = {}) {
|
|
32
|
+
this.outDir = config.outDir
|
|
33
|
+
? path.resolve(config.outDir.replace(/^~/, os.homedir()))
|
|
34
|
+
: defaultOutDir()
|
|
35
|
+
|
|
36
|
+
if (config.logger) {
|
|
37
|
+
setLogger(config.logger)
|
|
38
|
+
} else {
|
|
39
|
+
createLogger(this.outDir, { verbose: config.verbose, redact: config.redact })
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
this.engine = new SyncEngine()
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Launch browser for authentication.
|
|
47
|
+
* Saves credentials to ~/.alta/plaud-auth.json.
|
|
48
|
+
*/
|
|
49
|
+
async authenticate(opts: BrowserAuthOptions = {}): Promise<void> {
|
|
50
|
+
const session = await runBrowserAuth(opts)
|
|
51
|
+
await saveCredentials(session)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Check if credentials exist and are not expired.
|
|
56
|
+
*/
|
|
57
|
+
async isAuthenticated(): Promise<boolean> {
|
|
58
|
+
const creds = await loadCredentials()
|
|
59
|
+
if (!creds) return false
|
|
60
|
+
if (isExpired(creds)) return false
|
|
61
|
+
return true
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Incremental sync: only download new or changed recordings since last run.
|
|
66
|
+
* If the token expires mid-sync, re-authenticates automatically and retries once.
|
|
67
|
+
*/
|
|
68
|
+
async sync(opts: Partial<SyncOptions> = {}): Promise<SyncResult> {
|
|
69
|
+
return this.runWithReauth(opts, 'sync')
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Full backfill: re-evaluate all recordings regardless of sync state.
|
|
74
|
+
* If the token expires mid-backfill, re-authenticates automatically and retries once.
|
|
75
|
+
*/
|
|
76
|
+
async backfill(opts: Partial<BackfillOptions> = {}): Promise<SyncResult> {
|
|
77
|
+
return this.runWithReauth(opts, 'backfill')
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Run sync/backfill, and if a token-expired AuthError occurs mid-run,
|
|
82
|
+
* automatically re-authenticate and retry once.
|
|
83
|
+
*/
|
|
84
|
+
private async runWithReauth(
|
|
85
|
+
opts: Partial<SyncOptions>,
|
|
86
|
+
mode: 'sync' | 'backfill',
|
|
87
|
+
): Promise<SyncResult> {
|
|
88
|
+
try {
|
|
89
|
+
const client = await this.buildClient()
|
|
90
|
+
return await this.engine.run(client, this.buildSyncOptions(opts), mode)
|
|
91
|
+
} catch (err) {
|
|
92
|
+
if (!(err instanceof AuthError)) throw err
|
|
93
|
+
|
|
94
|
+
// Token expired or rejected mid-run — re-authenticate and try once more
|
|
95
|
+
console.error('\nSession expired during sync. Re-authenticating...')
|
|
96
|
+
await this.authenticate()
|
|
97
|
+
console.log('Re-authenticated. Resuming sync...\n')
|
|
98
|
+
|
|
99
|
+
const client = await this.buildClient()
|
|
100
|
+
return this.engine.run(client, this.buildSyncOptions(opts), mode)
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Walk all recording folders and verify checksums.
|
|
106
|
+
* With repair=true, re-download any file with a mismatch.
|
|
107
|
+
*/
|
|
108
|
+
async verify(opts: { repair?: boolean } = {}): Promise<VerifyResult> {
|
|
109
|
+
const client = opts.repair ? await this.buildClient() : null
|
|
110
|
+
const tracker = new IncrementalTracker()
|
|
111
|
+
await tracker.load(this.outDir)
|
|
112
|
+
|
|
113
|
+
const result: VerifyResult = { scanned: 0, ok: 0, failed: 0, repaired: 0, issues: [] }
|
|
114
|
+
const recordingIds = tracker.getAllRecordingIds()
|
|
115
|
+
|
|
116
|
+
for (const id of recordingIds) {
|
|
117
|
+
const state = tracker.getRecordingState(id)
|
|
118
|
+
if (!state) continue
|
|
119
|
+
|
|
120
|
+
const dir = recordingDir(this.outDir, state.recordedAt, id)
|
|
121
|
+
result.scanned++
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
const mismatches = await verifyChecksums(dir)
|
|
125
|
+
if (mismatches.length === 0) {
|
|
126
|
+
result.ok++
|
|
127
|
+
tracker.markVerified(id)
|
|
128
|
+
} else {
|
|
129
|
+
result.failed++
|
|
130
|
+
for (const m of mismatches) {
|
|
131
|
+
result.issues.push({
|
|
132
|
+
recordingId: id,
|
|
133
|
+
file: path.basename(m.filePath),
|
|
134
|
+
issue: `checksum mismatch (expected: ${m.expected.slice(0, 8)}..., got: ${m.actual === 'MISSING' ? 'MISSING' : m.actual.slice(0, 8) + '...'})`,
|
|
135
|
+
})
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// TODO: repair support requires re-fetching the recording object
|
|
139
|
+
// For now, log the mismatch
|
|
140
|
+
}
|
|
141
|
+
} catch (err) {
|
|
142
|
+
result.failed++
|
|
143
|
+
result.issues.push({ recordingId: id, file: '', issue: String(err) })
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
await tracker.persist(this.outDir)
|
|
148
|
+
return result
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Export all local recordings to a JSONL dataset file.
|
|
153
|
+
* Returns the path to the generated file.
|
|
154
|
+
*/
|
|
155
|
+
async exportDataset(opts: { format?: 'jsonl' } = {}): Promise<string> {
|
|
156
|
+
const { DatasetWriter } = await import('./storage/dataset-writer.js')
|
|
157
|
+
const { default: fsSync } = await import('node:fs')
|
|
158
|
+
|
|
159
|
+
// Walk recordings dir and collect existing transcript data
|
|
160
|
+
const datasetWriter = new DatasetWriter(this.outDir)
|
|
161
|
+
await datasetWriter.open()
|
|
162
|
+
|
|
163
|
+
// Re-generate from existing transcript.json files on disk
|
|
164
|
+
const recordingsBase = path.join(this.outDir, 'recordings')
|
|
165
|
+
try {
|
|
166
|
+
await this.walkAndExport(recordingsBase, datasetWriter)
|
|
167
|
+
} finally {
|
|
168
|
+
await datasetWriter.close()
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return datasetWriter.path
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
private async walkAndExport(
|
|
175
|
+
recordingsBase: string,
|
|
176
|
+
dataset: InstanceType<typeof import('./storage/dataset-writer.js').DatasetWriter>,
|
|
177
|
+
): Promise<void> {
|
|
178
|
+
const { PlaudRecordingSchema } = await import('./client/types.js')
|
|
179
|
+
const { PlaudTranscriptSchema } = await import('./client/types.js')
|
|
180
|
+
|
|
181
|
+
// Walk year/month/dir structure
|
|
182
|
+
let yearDirs: string[]
|
|
183
|
+
try {
|
|
184
|
+
yearDirs = await fs.readdir(recordingsBase)
|
|
185
|
+
} catch {
|
|
186
|
+
return
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
for (const year of yearDirs) {
|
|
190
|
+
const yearPath = path.join(recordingsBase, year)
|
|
191
|
+
let monthDirs: string[]
|
|
192
|
+
try {
|
|
193
|
+
monthDirs = await fs.readdir(yearPath)
|
|
194
|
+
} catch {
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
for (const month of monthDirs) {
|
|
199
|
+
const monthPath = path.join(yearPath, month)
|
|
200
|
+
let recDirs: string[]
|
|
201
|
+
try {
|
|
202
|
+
recDirs = await fs.readdir(monthPath)
|
|
203
|
+
} catch {
|
|
204
|
+
continue
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
for (const recDir of recDirs) {
|
|
208
|
+
const recPath = path.join(monthPath, recDir)
|
|
209
|
+
try {
|
|
210
|
+
const metaRaw = await fs.readFile(path.join(recPath, 'meta.json'), 'utf8')
|
|
211
|
+
const transcriptRaw = await fs.readFile(path.join(recPath, 'transcript.json'), 'utf8')
|
|
212
|
+
const meta = JSON.parse(metaRaw) as Record<string, unknown>
|
|
213
|
+
const transcriptData = JSON.parse(transcriptRaw) as Record<string, unknown>
|
|
214
|
+
|
|
215
|
+
// Reconstruct minimal PlaudRecording from meta.json
|
|
216
|
+
const recording = PlaudRecordingSchema.parse({
|
|
217
|
+
id: meta['source_recording_id'],
|
|
218
|
+
title: meta['title'],
|
|
219
|
+
duration: meta['duration_seconds'],
|
|
220
|
+
recordedAt: meta['recorded_at'],
|
|
221
|
+
createdAt: meta['recorded_at'],
|
|
222
|
+
updatedAt: meta['recorded_at'],
|
|
223
|
+
hasTranscript: true,
|
|
224
|
+
_raw: meta,
|
|
225
|
+
})
|
|
226
|
+
|
|
227
|
+
const fullText = ((transcriptData['segments'] ?? []) as Array<{ text?: string }>)
|
|
228
|
+
.map(s => s.text ?? '')
|
|
229
|
+
.filter(Boolean)
|
|
230
|
+
.join('\n\n')
|
|
231
|
+
|
|
232
|
+
const transcript = PlaudTranscriptSchema.parse({
|
|
233
|
+
recordingId: String(meta['source_recording_id'] ?? ''),
|
|
234
|
+
duration: Number(meta['duration_seconds'] ?? 0),
|
|
235
|
+
segments: transcriptData['segments'] ?? [],
|
|
236
|
+
fullText,
|
|
237
|
+
_raw: transcriptData,
|
|
238
|
+
})
|
|
239
|
+
|
|
240
|
+
await dataset.append(this.outDir, recording, transcript)
|
|
241
|
+
} catch {
|
|
242
|
+
// Skip recordings with missing/invalid files
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
private async buildClient(): Promise<PlaudApiClient> {
|
|
250
|
+
const creds = await loadCredentials()
|
|
251
|
+
if (!creds) {
|
|
252
|
+
throw new AuthError("No credentials found — run 'alta-plaud auth' to authenticate")
|
|
253
|
+
}
|
|
254
|
+
if (isExpired(creds)) {
|
|
255
|
+
throw new AuthError("Credentials expired — run 'alta-plaud auth' to re-authenticate")
|
|
256
|
+
}
|
|
257
|
+
return new PlaudApiClient(creds)
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
private buildSyncOptions(partial: Partial<SyncOptions>): SyncOptions {
|
|
261
|
+
return {
|
|
262
|
+
outDir: this.outDir,
|
|
263
|
+
since: partial.since,
|
|
264
|
+
limit: partial.limit,
|
|
265
|
+
concurrency: partial.concurrency ?? 3,
|
|
266
|
+
formats: partial.formats ?? ['json', 'txt', 'md'],
|
|
267
|
+
includeDataset: partial.includeDataset ?? true,
|
|
268
|
+
dryRun: partial.dryRun ?? false,
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
get dataDir(): string {
|
|
273
|
+
return this.outDir
|
|
274
|
+
}
|
|
275
|
+
}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import { execSync } from 'node:child_process'
|
|
2
|
+
import { chromium, type Page, type Request as PWRequest, type BrowserContext } from 'playwright'
|
|
3
|
+
import { AuthError } from '../errors.js'
|
|
4
|
+
import { getLogger } from '../logger.js'
|
|
5
|
+
import { loadCredentials } from './token-store.js'
|
|
6
|
+
import { extractRegionalBaseUrl } from '../client/endpoints.js'
|
|
7
|
+
import type { AuthSession, EndpointMap } from './types.js'
|
|
8
|
+
|
|
9
|
+
const PLAUD_APP_URL = 'https://web.plaud.ai'
|
|
10
|
+
|
|
11
|
+
export interface BrowserAuthOptions {
|
|
12
|
+
headless?: boolean
|
|
13
|
+
email?: string
|
|
14
|
+
password?: string
|
|
15
|
+
/** How long to wait for the user to log in (ms). Default: 5 minutes. */
|
|
16
|
+
loginTimeoutMs?: number
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function runBrowserAuth(opts: BrowserAuthOptions = {}): Promise<AuthSession> {
|
|
20
|
+
const log = getLogger()
|
|
21
|
+
const launchOpts = {
|
|
22
|
+
channel: 'chrome' as const,
|
|
23
|
+
headless: opts.headless ?? false,
|
|
24
|
+
args: ['--disable-blink-features=AutomationControlled'],
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const browser = await chromium.launch(launchOpts).catch(async err => {
|
|
28
|
+
const msg = String(err)
|
|
29
|
+
if (msg.includes("Executable doesn't exist") || msg.includes('not found')) {
|
|
30
|
+
log.warn('System Chrome not found, falling back to Playwright Chromium (Google OAuth may be blocked)')
|
|
31
|
+
return chromium.launch({ headless: opts.headless ?? false }).catch(err2 => {
|
|
32
|
+
if (String(err2).includes("Executable doesn't exist")) {
|
|
33
|
+
log.info('Installing Playwright Chromium (one-time setup)...')
|
|
34
|
+
execSync('npx playwright install chromium', { stdio: 'inherit' })
|
|
35
|
+
return chromium.launch({ headless: opts.headless ?? false })
|
|
36
|
+
}
|
|
37
|
+
throw err2
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
throw err
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
const context = await browser.newContext({ userAgent: undefined })
|
|
44
|
+
const page = await context.newPage()
|
|
45
|
+
|
|
46
|
+
// Remove webdriver property that Google checks for automation detection
|
|
47
|
+
await page.addInitScript(
|
|
48
|
+
'Object.defineProperty(navigator, "webdriver", { get: () => undefined })',
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
// Inject existing plaud.ai cookies so we don't need a fresh login if session is still valid
|
|
52
|
+
await injectExistingCookies(context)
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
log.info('Opening Plaud...')
|
|
56
|
+
|
|
57
|
+
// Set up Bearer token capture BEFORE navigation — the SPA fires API calls on load
|
|
58
|
+
const loginTimeoutMs = opts.loginTimeoutMs ?? 5 * 60_000
|
|
59
|
+
const bearerTokenCapture = captureBearerToken(page, loginTimeoutMs, log)
|
|
60
|
+
|
|
61
|
+
await page.goto(PLAUD_APP_URL, { waitUntil: 'domcontentloaded' })
|
|
62
|
+
// Give SPA time to initialize and run its auth check (may redirect to /login)
|
|
63
|
+
await page.waitForLoadState('networkidle', { timeout: 10_000 }).catch(() => {})
|
|
64
|
+
|
|
65
|
+
if (opts.email && opts.password) {
|
|
66
|
+
await automatedLogin(page, opts.email, opts.password)
|
|
67
|
+
} else if (isLoginUrl(page.url())) {
|
|
68
|
+
// Not logged in — prompt user and wait
|
|
69
|
+
console.log('\n──────────────────────────────────────────────────────────')
|
|
70
|
+
console.log(' Log in to Plaud in the browser window.')
|
|
71
|
+
console.log(' The browser will close automatically once connected.')
|
|
72
|
+
console.log(` (Waiting up to ${Math.round(loginTimeoutMs / 60_000)} minutes)`)
|
|
73
|
+
console.log('──────────────────────────────────────────────────────────\n')
|
|
74
|
+
} else {
|
|
75
|
+
log.info('Already connected — capturing token...')
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Wait for Bearer token from any API request (fires on page load if session is active,
|
|
79
|
+
// or after login if the user needed to authenticate)
|
|
80
|
+
const authToken = await bearerTokenCapture
|
|
81
|
+
log.info('Bearer token captured — closing browser')
|
|
82
|
+
|
|
83
|
+
const cookies = await context.cookies()
|
|
84
|
+
// Close browser without blocking — Chrome can take a long time to flush its profile
|
|
85
|
+
void browser.close().catch(() => {})
|
|
86
|
+
|
|
87
|
+
// Discover the correct regional API base URL (e.g. api-euc1.plaud.ai for EU users)
|
|
88
|
+
const apiBaseUrl = await discoverApiRegion(authToken)
|
|
89
|
+
log.info({ apiBaseUrl }, 'Regional API base URL discovered')
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
cookies: cookies.map(c => ({
|
|
93
|
+
name: c.name,
|
|
94
|
+
value: c.value,
|
|
95
|
+
domain: c.domain,
|
|
96
|
+
path: c.path,
|
|
97
|
+
httpOnly: c.httpOnly,
|
|
98
|
+
secure: c.secure,
|
|
99
|
+
sameSite: c.sameSite as 'Strict' | 'Lax' | 'None' | undefined,
|
|
100
|
+
expires: c.expires && c.expires > 0 ? c.expires : undefined,
|
|
101
|
+
})),
|
|
102
|
+
authToken,
|
|
103
|
+
apiBaseUrl,
|
|
104
|
+
capturedAt: new Date().toISOString(),
|
|
105
|
+
endpointMap: buildEndpointMap(apiBaseUrl),
|
|
106
|
+
}
|
|
107
|
+
} catch (err) {
|
|
108
|
+
await browser.close().catch(() => {})
|
|
109
|
+
throw err
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Inject plaud.ai cookies from the previous auth session so the browser picks up
|
|
117
|
+
* an existing session without requiring the user to log in again.
|
|
118
|
+
*/
|
|
119
|
+
async function injectExistingCookies(context: BrowserContext): Promise<void> {
|
|
120
|
+
const log = getLogger()
|
|
121
|
+
const existing = await loadCredentials().catch(() => null)
|
|
122
|
+
if (!existing?.cookies?.length) return
|
|
123
|
+
|
|
124
|
+
const plaudCookies = existing.cookies.filter(
|
|
125
|
+
c => c.domain === 'web.plaud.ai' || c.domain.endsWith('.plaud.ai') || c.domain === 'plaud.ai',
|
|
126
|
+
)
|
|
127
|
+
if (plaudCookies.length === 0) return
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
await context.addCookies(
|
|
131
|
+
plaudCookies.map(c => ({
|
|
132
|
+
name: c.name,
|
|
133
|
+
value: c.value,
|
|
134
|
+
domain: c.domain,
|
|
135
|
+
path: c.path,
|
|
136
|
+
httpOnly: c.httpOnly,
|
|
137
|
+
secure: c.secure,
|
|
138
|
+
sameSite: (c.sameSite ?? 'Lax') as 'Strict' | 'Lax' | 'None',
|
|
139
|
+
expires: c.expires ?? -1,
|
|
140
|
+
})),
|
|
141
|
+
)
|
|
142
|
+
log.debug({ count: plaudCookies.length }, 'Injected existing session cookies')
|
|
143
|
+
} catch (err) {
|
|
144
|
+
log.debug({ err }, 'Could not inject existing cookies — fresh login required')
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Wait for the first API request that carries a Bearer token.
|
|
150
|
+
* This fires automatically when:
|
|
151
|
+
* - The page loads with an existing authenticated session (cookies restored)
|
|
152
|
+
* - The user completes login via Google OAuth or email
|
|
153
|
+
*
|
|
154
|
+
* Resolves with the raw token string (without "bearer " prefix).
|
|
155
|
+
*/
|
|
156
|
+
function captureBearerToken(page: Page, timeoutMs: number, log: ReturnType<typeof getLogger>): Promise<string> {
|
|
157
|
+
return new Promise((resolve, reject) => {
|
|
158
|
+
const timer = setTimeout(() => {
|
|
159
|
+
page.off('request', handler)
|
|
160
|
+
reject(new AuthError(`Login timeout after ${Math.round(timeoutMs / 60_000)} minutes — no token captured`))
|
|
161
|
+
}, timeoutMs)
|
|
162
|
+
|
|
163
|
+
const handler = (req: PWRequest) => {
|
|
164
|
+
const auth = req.headers()['authorization'] ?? req.headers()['Authorization']
|
|
165
|
+
if (!auth) return
|
|
166
|
+
const token = auth.replace(/^bearer\s+/i, '').trim()
|
|
167
|
+
// Basic sanity check: JWT has 3 parts separated by dots
|
|
168
|
+
if (token.split('.').length === 3) {
|
|
169
|
+
clearTimeout(timer)
|
|
170
|
+
page.off('request', handler)
|
|
171
|
+
log.debug({ url: req.url() }, 'Bearer token found in request')
|
|
172
|
+
resolve(token)
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
page.on('request', handler)
|
|
177
|
+
})
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/** Discover the correct regional API base URL (e.g. https://api-euc1.plaud.ai). */
|
|
181
|
+
async function discoverApiRegion(token: string): Promise<string> {
|
|
182
|
+
const log = getLogger()
|
|
183
|
+
try {
|
|
184
|
+
// The global endpoint returns a region-redirect response pointing to the right server
|
|
185
|
+
const res = await fetch('https://api.plaud.ai/user/me', {
|
|
186
|
+
headers: {
|
|
187
|
+
'Authorization': `bearer ${token}`,
|
|
188
|
+
'app-platform': 'web',
|
|
189
|
+
'Origin': 'https://web.plaud.ai',
|
|
190
|
+
},
|
|
191
|
+
})
|
|
192
|
+
const body = await res.json()
|
|
193
|
+
const regional = extractRegionalBaseUrl(body)
|
|
194
|
+
if (regional) return regional
|
|
195
|
+
|
|
196
|
+
// If the global endpoint returns user data directly (no redirect), it IS the right base
|
|
197
|
+
if ((body as Record<string, unknown>)?.data_user) return 'https://api.plaud.ai'
|
|
198
|
+
} catch (err) {
|
|
199
|
+
log.debug({ err }, 'Region discovery failed — using global API')
|
|
200
|
+
}
|
|
201
|
+
return 'https://api.plaud.ai'
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/** Build the complete endpoint map from the known regional API base URL. */
|
|
205
|
+
function buildEndpointMap(apiBaseUrl: string): EndpointMap {
|
|
206
|
+
return {
|
|
207
|
+
listRecordings: `${apiBaseUrl}/file/simple/web`,
|
|
208
|
+
batchDetail: `${apiBaseUrl}/file/list`,
|
|
209
|
+
getAudioUrl: `${apiBaseUrl}/file/temp-url`,
|
|
210
|
+
userProfile: `${apiBaseUrl}/user/me`,
|
|
211
|
+
apiBaseUrl,
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function isLoginUrl(url: string): boolean {
|
|
216
|
+
try {
|
|
217
|
+
const p = new URL(url).pathname
|
|
218
|
+
return p.startsWith('/login') || p.startsWith('/signin') || p.startsWith('/auth')
|
|
219
|
+
} catch {
|
|
220
|
+
return false
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
async function automatedLogin(page: Page, email: string, password: string): Promise<void> {
|
|
225
|
+
const log = getLogger()
|
|
226
|
+
log.info('Attempting automated login...')
|
|
227
|
+
|
|
228
|
+
const emailSelectors = [
|
|
229
|
+
'input[type="email"]', 'input[name="email"]',
|
|
230
|
+
'input[name="username"]', '[data-testid="email"]', '#email',
|
|
231
|
+
]
|
|
232
|
+
const passwordSelectors = [
|
|
233
|
+
'input[type="password"]', 'input[name="password"]',
|
|
234
|
+
'[data-testid="password"]', '#password',
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
for (const sel of emailSelectors) {
|
|
238
|
+
if (await page.locator(sel).count() > 0) { await page.fill(sel, email); break }
|
|
239
|
+
}
|
|
240
|
+
for (const sel of passwordSelectors) {
|
|
241
|
+
if (await page.locator(sel).count() > 0) { await page.fill(sel, password); break }
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
await page.click(
|
|
245
|
+
'button[type="submit"], [type="submit"], button:has-text("Login"), button:has-text("Sign in")',
|
|
246
|
+
)
|
|
247
|
+
await page.waitForLoadState('networkidle', { timeout: 15_000 }).catch(() => undefined)
|
|
248
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import fs from 'node:fs/promises'
|
|
2
|
+
import { authTokenPath } from '../storage/paths.js'
|
|
3
|
+
import { StoredCredentialsSchema, type AuthSession, type StoredCredentials } from './types.js'
|
|
4
|
+
import { writeFileAtomic } from '../storage/atomic.js'
|
|
5
|
+
import { getLogger } from '../logger.js'
|
|
6
|
+
|
|
7
|
+
export async function loadCredentials(): Promise<StoredCredentials | null> {
|
|
8
|
+
const tokenPath = authTokenPath()
|
|
9
|
+
try {
|
|
10
|
+
const raw = await fs.readFile(tokenPath, 'utf8')
|
|
11
|
+
const json = JSON.parse(raw)
|
|
12
|
+
const result = StoredCredentialsSchema.safeParse(json)
|
|
13
|
+
if (!result.success) {
|
|
14
|
+
getLogger().warn({ issues: result.error.issues }, 'Stored credentials failed schema validation — re-authenticate')
|
|
15
|
+
return null
|
|
16
|
+
}
|
|
17
|
+
return result.data
|
|
18
|
+
} catch (err: unknown) {
|
|
19
|
+
if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null
|
|
20
|
+
getLogger().warn({ err }, 'Failed to read credentials file')
|
|
21
|
+
return null
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export async function saveCredentials(session: AuthSession): Promise<void> {
|
|
26
|
+
const tokenPath = authTokenPath()
|
|
27
|
+
const stored: StoredCredentials = { ...session, schemaVersion: 1 }
|
|
28
|
+
await writeFileAtomic(tokenPath, JSON.stringify(stored, null, 2))
|
|
29
|
+
getLogger().info({ path: tokenPath }, 'Auth credentials saved')
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Returns true if the stored credentials are expired. */
|
|
33
|
+
export function isExpired(creds: StoredCredentials): boolean {
|
|
34
|
+
const now = Date.now()
|
|
35
|
+
|
|
36
|
+
// Explicit expiresAt takes precedence
|
|
37
|
+
if (creds.expiresAt) {
|
|
38
|
+
return now > new Date(creds.expiresAt).getTime()
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// If we have a JWT bearer token, decode the exp claim (most reliable)
|
|
42
|
+
if (creds.authToken) {
|
|
43
|
+
const jwtExp = decodeJwtExp(creds.authToken)
|
|
44
|
+
if (jwtExp !== null) return now > jwtExp * 1000
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Fallback: check only plaud.ai session cookies (ignore analytics/CDN cookies
|
|
48
|
+
// which have short TTLs and would cause false "expired" readings)
|
|
49
|
+
const plaudCookies = creds.cookies.filter(
|
|
50
|
+
c => c.expires && c.expires > 0 && (c.domain.endsWith('.plaud.ai') || c.domain === 'plaud.ai')
|
|
51
|
+
)
|
|
52
|
+
if (plaudCookies.length > 0) {
|
|
53
|
+
const minExpiry = Math.min(...plaudCookies.map(c => (c.expires ?? 0) * 1000))
|
|
54
|
+
if (minExpiry > 0 && now > minExpiry) return true
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Last resort: treat as expired after 30 days
|
|
58
|
+
const capturedAt = new Date(creds.capturedAt).getTime()
|
|
59
|
+
return now - capturedAt > 30 * 24 * 60 * 60 * 1000
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** Decode the `exp` claim from a JWT (no signature verification — just decode). */
|
|
63
|
+
function decodeJwtExp(token: string): number | null {
|
|
64
|
+
try {
|
|
65
|
+
const parts = token.split('.')
|
|
66
|
+
if (parts.length !== 3) return null
|
|
67
|
+
const payload = JSON.parse(Buffer.from(parts[1]!, 'base64url').toString('utf8')) as Record<string, unknown>
|
|
68
|
+
const exp = payload['exp']
|
|
69
|
+
return typeof exp === 'number' ? exp : null
|
|
70
|
+
} catch {
|
|
71
|
+
return null
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function cookieHeader(creds: StoredCredentials): string {
|
|
76
|
+
return creds.cookies.map(c => `${c.name}=${c.value}`).join('; ')
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export { authTokenPath }
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
export const CookieSchema = z.object({
|
|
4
|
+
name: z.string(),
|
|
5
|
+
value: z.string(),
|
|
6
|
+
domain: z.string(),
|
|
7
|
+
path: z.string(),
|
|
8
|
+
httpOnly: z.boolean(),
|
|
9
|
+
secure: z.boolean(),
|
|
10
|
+
sameSite: z.enum(['Strict', 'Lax', 'None']).optional(),
|
|
11
|
+
expires: z.number().optional(),
|
|
12
|
+
})
|
|
13
|
+
|
|
14
|
+
export const EndpointMapSchema = z.object({
|
|
15
|
+
listRecordings: z.string().optional(), // GET /file/simple/web
|
|
16
|
+
batchDetail: z.string().optional(), // POST /file/list
|
|
17
|
+
getAudioUrl: z.string().optional(), // GET /file/temp-url/<id>
|
|
18
|
+
userProfile: z.string().optional(), // GET /user/me
|
|
19
|
+
apiBaseUrl: z.string().optional(),
|
|
20
|
+
/** @deprecated — transcript is embedded in the recording, not a separate endpoint */
|
|
21
|
+
getTranscript: z.string().optional(),
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
export type EndpointMap = z.infer<typeof EndpointMapSchema>
|
|
25
|
+
|
|
26
|
+
export const AuthSessionSchema = z.object({
|
|
27
|
+
cookies: z.array(CookieSchema),
|
|
28
|
+
authToken: z.string().optional(),
|
|
29
|
+
apiBaseUrl: z.string(),
|
|
30
|
+
capturedAt: z.string().datetime(),
|
|
31
|
+
expiresAt: z.string().datetime().optional(),
|
|
32
|
+
endpointMap: EndpointMapSchema.optional(),
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
export type AuthSession = z.infer<typeof AuthSessionSchema>
|
|
36
|
+
|
|
37
|
+
export const StoredCredentialsSchema = AuthSessionSchema.extend({
|
|
38
|
+
schemaVersion: z.literal(1),
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
export type StoredCredentials = z.infer<typeof StoredCredentialsSchema>
|
package/src/cli/bin.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from 'commander'
|
|
3
|
+
import { registerAuthCommand } from './commands/auth.js'
|
|
4
|
+
import { registerSyncCommand } from './commands/sync.js'
|
|
5
|
+
import { registerBackfillCommand } from './commands/backfill.js'
|
|
6
|
+
import { registerVerifyCommand } from './commands/verify.js'
|
|
7
|
+
import { ExitCode, toExitCode } from './exit-codes.js'
|
|
8
|
+
|
|
9
|
+
const program = new Command()
|
|
10
|
+
.name('alta-plaud')
|
|
11
|
+
.description('Export recordings, transcripts, and metadata from Plaud')
|
|
12
|
+
.version('1.0.0')
|
|
13
|
+
.helpOption('-h, --help', 'Show help')
|
|
14
|
+
|
|
15
|
+
registerAuthCommand(program)
|
|
16
|
+
registerSyncCommand(program)
|
|
17
|
+
registerBackfillCommand(program)
|
|
18
|
+
registerVerifyCommand(program)
|
|
19
|
+
|
|
20
|
+
program.parseAsync(process.argv).catch((err: unknown) => {
|
|
21
|
+
// This is the only place in the codebase where process.exit() is called.
|
|
22
|
+
const code = toExitCode(err)
|
|
23
|
+
if (err instanceof Error) {
|
|
24
|
+
console.error(`\nError: ${err.message}`)
|
|
25
|
+
if (process.env['DEBUG']) console.error(err.stack)
|
|
26
|
+
} else {
|
|
27
|
+
console.error(`\nUnexpected error: ${String(err)}`)
|
|
28
|
+
}
|
|
29
|
+
process.exit(code)
|
|
30
|
+
})
|