@alta-foundation/plaud-extractor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/.env.example +9 -0
  2. package/.github/workflows/ci.yml +33 -0
  3. package/.github/workflows/publish.yml +46 -0
  4. package/CLAUDE.md +53 -0
  5. package/README.md +318 -0
  6. package/dist/PlaudExtractor.d.ts +61 -0
  7. package/dist/PlaudExtractor.d.ts.map +1 -0
  8. package/dist/PlaudExtractor.js +236 -0
  9. package/dist/PlaudExtractor.js.map +1 -0
  10. package/dist/auth/browser-auth.d.ts +10 -0
  11. package/dist/auth/browser-auth.d.ts.map +1 -0
  12. package/dist/auth/browser-auth.js +220 -0
  13. package/dist/auth/browser-auth.js.map +1 -0
  14. package/dist/auth/token-store.d.ts +9 -0
  15. package/dist/auth/token-store.d.ts.map +1 -0
  16. package/dist/auth/token-store.js +74 -0
  17. package/dist/auth/token-store.js.map +1 -0
  18. package/dist/auth/types.d.ts +266 -0
  19. package/dist/auth/types.d.ts.map +1 -0
  20. package/dist/auth/types.js +32 -0
  21. package/dist/auth/types.js.map +1 -0
  22. package/dist/cli/bin.d.ts +3 -0
  23. package/dist/cli/bin.d.ts.map +1 -0
  24. package/dist/cli/bin.js +30 -0
  25. package/dist/cli/bin.js.map +1 -0
  26. package/dist/cli/commands/auth.d.ts +3 -0
  27. package/dist/cli/commands/auth.d.ts.map +1 -0
  28. package/dist/cli/commands/auth.js +22 -0
  29. package/dist/cli/commands/auth.js.map +1 -0
  30. package/dist/cli/commands/backfill.d.ts +3 -0
  31. package/dist/cli/commands/backfill.d.ts.map +1 -0
  32. package/dist/cli/commands/backfill.js +59 -0
  33. package/dist/cli/commands/backfill.js.map +1 -0
  34. package/dist/cli/commands/sync.d.ts +3 -0
  35. package/dist/cli/commands/sync.d.ts.map +1 -0
  36. package/dist/cli/commands/sync.js +55 -0
  37. package/dist/cli/commands/sync.js.map +1 -0
  38. package/dist/cli/commands/verify.d.ts +3 -0
  39. package/dist/cli/commands/verify.d.ts.map +1 -0
  40. package/dist/cli/commands/verify.js +28 -0
  41. package/dist/cli/commands/verify.js.map +1 -0
  42. package/dist/cli/exit-codes.d.ts +8 -0
  43. package/dist/cli/exit-codes.d.ts.map +1 -0
  44. package/dist/cli/exit-codes.js +16 -0
  45. package/dist/cli/exit-codes.js.map +1 -0
  46. package/dist/cli/options.d.ts +31 -0
  47. package/dist/cli/options.d.ts.map +1 -0
  48. package/dist/cli/options.js +11 -0
  49. package/dist/cli/options.js.map +1 -0
  50. package/dist/client/endpoints.d.ts +26 -0
  51. package/dist/client/endpoints.d.ts.map +1 -0
  52. package/dist/client/endpoints.js +54 -0
  53. package/dist/client/endpoints.js.map +1 -0
  54. package/dist/client/http.d.ts +17 -0
  55. package/dist/client/http.d.ts.map +1 -0
  56. package/dist/client/http.js +92 -0
  57. package/dist/client/http.js.map +1 -0
  58. package/dist/client/plaud-client.d.ts +14 -0
  59. package/dist/client/plaud-client.d.ts.map +1 -0
  60. package/dist/client/plaud-client.js +216 -0
  61. package/dist/client/plaud-client.js.map +1 -0
  62. package/dist/client/types.d.ts +154 -0
  63. package/dist/client/types.d.ts.map +1 -0
  64. package/dist/client/types.js +41 -0
  65. package/dist/client/types.js.map +1 -0
  66. package/dist/errors.d.ts +24 -0
  67. package/dist/errors.d.ts.map +1 -0
  68. package/dist/errors.js +51 -0
  69. package/dist/errors.js.map +1 -0
  70. package/dist/index.d.ts +7 -0
  71. package/dist/index.d.ts.map +1 -0
  72. package/dist/index.js +5 -0
  73. package/dist/index.js.map +1 -0
  74. package/dist/logger.d.ts +9 -0
  75. package/dist/logger.d.ts.map +1 -0
  76. package/dist/logger.js +37 -0
  77. package/dist/logger.js.map +1 -0
  78. package/dist/mcp/job-tools.d.ts +3 -0
  79. package/dist/mcp/job-tools.d.ts.map +1 -0
  80. package/dist/mcp/job-tools.js +108 -0
  81. package/dist/mcp/job-tools.js.map +1 -0
  82. package/dist/mcp/read-tools.d.ts +3 -0
  83. package/dist/mcp/read-tools.d.ts.map +1 -0
  84. package/dist/mcp/read-tools.js +173 -0
  85. package/dist/mcp/read-tools.js.map +1 -0
  86. package/dist/mcp/server.d.ts +3 -0
  87. package/dist/mcp/server.d.ts.map +1 -0
  88. package/dist/mcp/server.js +32 -0
  89. package/dist/mcp/server.js.map +1 -0
  90. package/dist/storage/atomic.d.ts +5 -0
  91. package/dist/storage/atomic.d.ts.map +1 -0
  92. package/dist/storage/atomic.js +51 -0
  93. package/dist/storage/atomic.js.map +1 -0
  94. package/dist/storage/checksums.d.ts +15 -0
  95. package/dist/storage/checksums.d.ts.map +1 -0
  96. package/dist/storage/checksums.js +56 -0
  97. package/dist/storage/checksums.js.map +1 -0
  98. package/dist/storage/dataset-writer.d.ts +21 -0
  99. package/dist/storage/dataset-writer.d.ts.map +1 -0
  100. package/dist/storage/dataset-writer.js +52 -0
  101. package/dist/storage/dataset-writer.js.map +1 -0
  102. package/dist/storage/paths.d.ts +9 -0
  103. package/dist/storage/paths.d.ts.map +1 -0
  104. package/dist/storage/paths.js +38 -0
  105. package/dist/storage/paths.js.map +1 -0
  106. package/dist/storage/recording-store.d.ts +24 -0
  107. package/dist/storage/recording-store.d.ts.map +1 -0
  108. package/dist/storage/recording-store.js +161 -0
  109. package/dist/storage/recording-store.js.map +1 -0
  110. package/dist/sync/download-queue.d.ts +21 -0
  111. package/dist/sync/download-queue.d.ts.map +1 -0
  112. package/dist/sync/download-queue.js +82 -0
  113. package/dist/sync/download-queue.js.map +1 -0
  114. package/dist/sync/incremental.d.ts +21 -0
  115. package/dist/sync/incremental.d.ts.map +1 -0
  116. package/dist/sync/incremental.js +96 -0
  117. package/dist/sync/incremental.js.map +1 -0
  118. package/dist/sync/sync-engine.d.ts +6 -0
  119. package/dist/sync/sync-engine.d.ts.map +1 -0
  120. package/dist/sync/sync-engine.js +135 -0
  121. package/dist/sync/sync-engine.js.map +1 -0
  122. package/dist/sync/types.d.ts +130 -0
  123. package/dist/sync/types.d.ts.map +1 -0
  124. package/dist/sync/types.js +17 -0
  125. package/dist/sync/types.js.map +1 -0
  126. package/dist/transcript/formatter.d.ts +4 -0
  127. package/dist/transcript/formatter.d.ts.map +1 -0
  128. package/dist/transcript/formatter.js +88 -0
  129. package/dist/transcript/formatter.js.map +1 -0
  130. package/package.json +41 -0
  131. package/src/PlaudExtractor.ts +275 -0
  132. package/src/auth/browser-auth.ts +248 -0
  133. package/src/auth/token-store.ts +79 -0
  134. package/src/auth/types.ts +41 -0
  135. package/src/cli/bin.ts +30 -0
  136. package/src/cli/commands/auth.ts +27 -0
  137. package/src/cli/commands/backfill.ts +77 -0
  138. package/src/cli/commands/sync.ts +71 -0
  139. package/src/cli/commands/verify.ts +31 -0
  140. package/src/cli/exit-codes.ts +14 -0
  141. package/src/cli/options.ts +10 -0
  142. package/src/client/endpoints.ts +62 -0
  143. package/src/client/http.ts +110 -0
  144. package/src/client/plaud-client.ts +268 -0
  145. package/src/client/types.ts +62 -0
  146. package/src/errors.ts +57 -0
  147. package/src/index.ts +17 -0
  148. package/src/logger.ts +49 -0
  149. package/src/mcp/job-tools.ts +156 -0
  150. package/src/mcp/read-tools.ts +204 -0
  151. package/src/mcp/server.ts +39 -0
  152. package/src/storage/atomic.ts +51 -0
  153. package/src/storage/checksums.ts +76 -0
  154. package/src/storage/dataset-writer.ts +74 -0
  155. package/src/storage/paths.ts +44 -0
  156. package/src/storage/recording-store.ts +182 -0
  157. package/src/sync/download-queue.ts +102 -0
  158. package/src/sync/incremental.ts +111 -0
  159. package/src/sync/sync-engine.ts +183 -0
  160. package/src/sync/types.ts +64 -0
  161. package/src/transcript/formatter.ts +91 -0
  162. package/tsconfig.build.json +8 -0
  163. package/tsconfig.json +19 -0
@@ -0,0 +1,27 @@
1
+ import type { Command } from 'commander'
2
+ import { PlaudExtractor } from '../../PlaudExtractor.js'
3
+ import { AuthError } from '../../errors.js'
4
+ import { authTokenPath } from '../../auth/token-store.js'
5
+
6
+ export function registerAuthCommand(program: Command): void {
7
+ program
8
+ .command('auth')
9
+ .description('Authenticate with Plaud by launching a browser (required before first sync)')
10
+ .option('--headless', 'Run browser in headless mode (requires PLAUD_EMAIL + PLAUD_PASSWORD env vars)', false)
11
+ .option('--out <dir>', 'Data directory for logs', undefined)
12
+ .action(async (opts: { headless: boolean; out?: string }) => {
13
+ const extractor = new PlaudExtractor({ outDir: opts.out })
14
+
15
+ console.log('Launching browser to authenticate with Plaud...')
16
+
17
+ await extractor.authenticate({
18
+ headless: opts.headless,
19
+ email: process.env['PLAUD_EMAIL'],
20
+ password: process.env['PLAUD_PASSWORD'],
21
+ })
22
+
23
+ console.log(`\nAuthentication successful!`)
24
+ console.log(`Credentials saved to: ${authTokenPath()}`)
25
+ console.log(`\nYou can now run: alta-plaud sync`)
26
+ })
27
+ }
@@ -0,0 +1,77 @@
1
+ import type { Command } from 'commander'
2
+ import { PlaudExtractor } from '../../PlaudExtractor.js'
3
+ import { defaultOutDir } from '../../storage/paths.js'
4
+ import type { TranscriptFormat } from '../../storage/recording-store.js'
5
+
6
+ export function registerBackfillCommand(program: Command): void {
7
+ program
8
+ .command('backfill')
9
+ .description('Download all recordings from scratch (ignores incremental state)')
10
+ .option('--out <dir>', 'Output directory', defaultOutDir())
11
+ .option('--since <iso>', 'Only backfill recordings after this ISO date')
12
+ .option('--limit <n>', 'Max number of recordings to process', parseInt)
13
+ .option('--concurrency <n>', 'Parallel downloads (default: 3)', parseInt, 3)
14
+ .option('--formats <list>', 'Transcript formats: json,txt,md (default: all)', 'json,txt,md')
15
+ .option('--dataset', 'Append to datasets/plaud_transcripts.jsonl (default: on)', true)
16
+ .option('--no-dataset', 'Skip dataset output')
17
+ .option('--dry-run', 'Print plan without downloading', false)
18
+ .option('--verbose', 'Verbose logging', false)
19
+ .option('--yes', 'Skip confirmation prompt', false)
20
+ .action(async (opts: {
21
+ out: string
22
+ since?: string
23
+ limit?: number
24
+ concurrency: number
25
+ formats: string
26
+ dataset: boolean
27
+ dryRun: boolean
28
+ verbose: boolean
29
+ yes: boolean
30
+ }) => {
31
+ if (!opts.yes && !opts.dryRun) {
32
+ const confirmed = await confirm(
33
+ 'Backfill will re-evaluate all recordings and may overwrite existing files. Continue? (y/N) '
34
+ )
35
+ if (!confirmed) {
36
+ console.log('Aborted.')
37
+ return
38
+ }
39
+ }
40
+
41
+ const extractor = new PlaudExtractor({ outDir: opts.out, verbose: opts.verbose })
42
+ const formats = parseFormats(opts.formats)
43
+
44
+ const result = await extractor.backfill({
45
+ since: opts.since ? new Date(opts.since) : undefined,
46
+ limit: opts.limit,
47
+ concurrency: opts.concurrency,
48
+ formats,
49
+ includeDataset: opts.dataset,
50
+ dryRun: opts.dryRun,
51
+ })
52
+
53
+ const durationSec = (result.durationMs / 1000).toFixed(1)
54
+ console.log(`\nBackfill complete (${durationSec}s)`)
55
+ console.log(` Downloaded: ${result.succeeded}`)
56
+ console.log(` Skipped: ${result.skipped}`)
57
+ console.log(` Failed: ${result.failed}`)
58
+ if (result.datasetPath) console.log(` Dataset: ${result.datasetPath}`)
59
+ })
60
+ }
61
+
62
+ function parseFormats(str: string): TranscriptFormat[] {
63
+ const valid: TranscriptFormat[] = ['json', 'txt', 'md']
64
+ return str.split(',').filter((f): f is TranscriptFormat => valid.includes(f as TranscriptFormat))
65
+ }
66
+
67
+ async function confirm(message: string): Promise<boolean> {
68
+ const { createInterface } = await import('node:readline')
69
+ process.stdout.write(message)
70
+ return new Promise(resolve => {
71
+ const rl = createInterface({ input: process.stdin, output: process.stdout })
72
+ rl.once('line', (answer: string) => {
73
+ rl.close()
74
+ resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes')
75
+ })
76
+ })
77
+ }
@@ -0,0 +1,71 @@
1
+ import type { Command } from 'commander'
2
+ import { PlaudExtractor } from '../../PlaudExtractor.js'
3
+ import { defaultOutDir } from '../../storage/paths.js'
4
+ import type { TranscriptFormat } from '../../storage/recording-store.js'
5
+
6
+ export function registerSyncCommand(program: Command): void {
7
+ program
8
+ .command('sync')
9
+ .description('Pull new or updated recordings from Plaud (incremental)')
10
+ .option('--out <dir>', 'Output directory', defaultOutDir())
11
+ .option('--since <iso>', 'Only sync recordings after this ISO date (overrides last-sync state)')
12
+ .option('--limit <n>', 'Max number of recordings to process', parseInt)
13
+ .option('--concurrency <n>', 'Parallel downloads (default: 3)', parseInt, 3)
14
+ .option('--formats <list>', 'Transcript formats: json,txt,md (default: all)', 'json,txt,md')
15
+ .option('--dataset', 'Append to datasets/plaud_transcripts.jsonl (default: on)', true)
16
+ .option('--no-dataset', 'Skip dataset output')
17
+ .option('--dry-run', 'Print plan without downloading', false)
18
+ .option('--verbose', 'Verbose logging', false)
19
+ .option('--redact', 'Redact tokens from logs', false)
20
+ .action(async (opts: {
21
+ out: string
22
+ since?: string
23
+ limit?: number
24
+ concurrency: number
25
+ formats: string
26
+ dataset: boolean
27
+ dryRun: boolean
28
+ verbose: boolean
29
+ redact: boolean
30
+ }) => {
31
+ const extractor = new PlaudExtractor({
32
+ outDir: opts.out,
33
+ verbose: opts.verbose,
34
+ redact: opts.redact,
35
+ })
36
+
37
+ const formats = parseFormats(opts.formats)
38
+ const result = await extractor.sync({
39
+ since: opts.since ? new Date(opts.since) : undefined,
40
+ limit: opts.limit,
41
+ concurrency: opts.concurrency,
42
+ formats,
43
+ includeDataset: opts.dataset,
44
+ dryRun: opts.dryRun,
45
+ })
46
+
47
+ printSyncSummary(result)
48
+ })
49
+ }
50
+
51
+ function parseFormats(str: string): TranscriptFormat[] {
52
+ const valid: TranscriptFormat[] = ['json', 'txt', 'md']
53
+ return str.split(',').filter((f): f is TranscriptFormat => valid.includes(f as TranscriptFormat))
54
+ }
55
+
56
+ function printSyncSummary(result: import('../../sync/types.js').SyncResult): void {
57
+ const durationSec = (result.durationMs / 1000).toFixed(1)
58
+ console.log(`\nSync complete (${durationSec}s)`)
59
+ console.log(` Downloaded: ${result.succeeded}`)
60
+ console.log(` Skipped: ${result.skipped}`)
61
+ console.log(` Failed: ${result.failed}`)
62
+ if (result.datasetPath) {
63
+ console.log(` Dataset: ${result.datasetPath}`)
64
+ }
65
+ if (result.errors.length > 0) {
66
+ console.error(`\nFailed recordings:`)
67
+ for (const { recordingId, error } of result.errors) {
68
+ console.error(` ${recordingId}: ${error.message}`)
69
+ }
70
+ }
71
+ }
@@ -0,0 +1,31 @@
1
+ import type { Command } from 'commander'
2
+ import { PlaudExtractor } from '../../PlaudExtractor.js'
3
+ import { defaultOutDir } from '../../storage/paths.js'
4
+
5
+ export function registerVerifyCommand(program: Command): void {
6
+ program
7
+ .command('verify')
8
+ .description('Verify checksums for all downloaded recordings')
9
+ .option('--out <dir>', 'Output directory', defaultOutDir())
10
+ .option('--repair', 'Re-download files with checksum mismatches', false)
11
+ .option('--verbose', 'Verbose logging', false)
12
+ .action(async (opts: { out: string; repair: boolean; verbose: boolean }) => {
13
+ const extractor = new PlaudExtractor({ outDir: opts.out, verbose: opts.verbose })
14
+
15
+ console.log(`Verifying recordings in ${opts.out}...`)
16
+ const result = await extractor.verify({ repair: opts.repair })
17
+
18
+ console.log(`\nVerify complete`)
19
+ console.log(` Scanned: ${result.scanned}`)
20
+ console.log(` OK: ${result.ok}`)
21
+ console.log(` Failed: ${result.failed}`)
22
+ if (opts.repair) console.log(` Repaired: ${result.repaired}`)
23
+
24
+ if (result.issues.length > 0) {
25
+ console.error(`\nIssues found:`)
26
+ for (const issue of result.issues) {
27
+ console.error(` ${issue.recordingId}/${issue.file}: ${issue.issue}`)
28
+ }
29
+ }
30
+ })
31
+ }
@@ -0,0 +1,14 @@
1
+ import { AuthError, StorageError } from '../errors.js'
2
+
3
+ export enum ExitCode {
4
+ Success = 0,
5
+ PartialFailure = 1,
6
+ AuthFailure = 2,
7
+ StorageError = 3,
8
+ }
9
+
10
+ export function toExitCode(err: unknown): ExitCode {
11
+ if (err instanceof AuthError) return ExitCode.AuthFailure
12
+ if (err instanceof StorageError) return ExitCode.StorageError
13
+ return ExitCode.PartialFailure
14
+ }
@@ -0,0 +1,10 @@
1
+ // Shared option definitions (reused across commands if needed in future)
2
+ export const COMMON_OPTIONS = {
3
+ out: { flags: '--out <dir>', description: 'Output directory' },
4
+ verbose: { flags: '--verbose', description: 'Verbose logging' },
5
+ redact: { flags: '--redact', description: 'Redact auth tokens from logs' },
6
+ dryRun: { flags: '--dry-run', description: 'Print plan without downloading' },
7
+ concurrency: { flags: '--concurrency <n>', description: 'Parallel downloads (default: 3)' },
8
+ formats: { flags: '--formats <list>', description: 'Transcript formats: json,txt,md' },
9
+ limit: { flags: '--limit <n>', description: 'Max number of recordings to process' },
10
+ }
@@ -0,0 +1,62 @@
1
+ import type { EndpointMap } from '../auth/types.js'
2
+
3
+ export type { EndpointMap }
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Real Plaud API paths (discovered from arbuzmell/plaud-api reference client)
7
+ // The user's account lives on a regional endpoint (e.g. api-euc1.plaud.ai for EU).
8
+ // Hit api.plaud.ai/user/me to discover the correct regional base URL.
9
+ // ---------------------------------------------------------------------------
10
+
11
+ const API_BASE = (map: EndpointMap) => map.apiBaseUrl ?? 'https://api.plaud.ai'
12
+
13
+ /**
14
+ * Build list URL using skip/limit pagination.
15
+ * GET /file/simple/web?skip=N&limit=50&is_trash=0&sort_by=start_time&is_desc=true
16
+ */
17
+ export function buildListUrl(map: EndpointMap, skip: number, limit = 50): string {
18
+ const base = map.listRecordings ?? `${API_BASE(map)}/file/simple/web`
19
+ const url = new URL(base.replace('/{id}', ''))
20
+ url.searchParams.set('skip', String(skip))
21
+ url.searchParams.set('limit', String(limit))
22
+ url.searchParams.set('is_trash', '0')
23
+ url.searchParams.set('sort_by', 'start_time')
24
+ url.searchParams.set('is_desc', 'true')
25
+ return url.toString()
26
+ }
27
+
28
+ /**
29
+ * Build URL for POST /file/list — batch detail fetch that includes trans_result.
30
+ * Body: ["file_id_1", "file_id_2"]
31
+ * Response: { data_file_list: [...full recording objects with trans_result...] }
32
+ */
33
+ export function buildBatchDetailUrl(map: EndpointMap): string {
34
+ return map.batchDetail ?? `${API_BASE(map)}/file/list`
35
+ }
36
+
37
+ /**
38
+ * Build URL for GET /file/temp-url/<id> — returns a presigned S3 audio download URL.
39
+ * Response: { temp_url: "https://s3.amazonaws.com/...?X-Amz-..." }
40
+ */
41
+ export function buildAudioTempUrl(map: EndpointMap, id: string): string {
42
+ const base = map.getAudioUrl ?? `${API_BASE(map)}/file/temp-url`
43
+ return `${base.replace('/{id}', '')}/${id}`
44
+ }
45
+
46
+ export function buildProfileUrl(map: EndpointMap): string {
47
+ return map.userProfile ?? `${API_BASE(map)}/user/me`
48
+ }
49
+
50
+ /**
51
+ * Discover the correct regional API base URL by hitting the global endpoint.
52
+ * The global api.plaud.ai returns a region-redirect response:
53
+ * { status: -302, data: { domains: { api: "https://api-euc1.plaud.ai" } } }
54
+ */
55
+ export function extractRegionalBaseUrl(response: unknown): string | null {
56
+ const r = response as Record<string, unknown>
57
+ if (r?.status === -302) {
58
+ const api = (r?.data as Record<string, unknown>)?.domains as Record<string, unknown>
59
+ if (typeof api?.api === 'string') return api.api
60
+ }
61
+ return null
62
+ }
@@ -0,0 +1,110 @@
1
+ import { fetch, type RequestInit, type Response } from 'undici'
2
+ import { ApiError, AuthError } from '../errors.js'
3
+ import { getLogger } from '../logger.js'
4
+ import type { StoredCredentials } from '../auth/types.js'
5
+ import { cookieHeader } from '../auth/token-store.js'
6
+
7
+ // Browser-like headers that Plaud's API validates.
8
+ // app-platform and edit-from are required custom headers.
9
+ const STATIC_HEADERS = {
10
+ 'Accept': 'application/json, */*',
11
+ 'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
12
+ 'Origin': 'https://web.plaud.ai',
13
+ 'Referer': 'https://web.plaud.ai/',
14
+ 'app-platform': 'web',
15
+ 'edit-from': 'web',
16
+ 'User-Agent':
17
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
18
+ }
19
+
20
+ export class HttpClient {
21
+ constructor(private readonly creds: StoredCredentials) {}
22
+
23
+ private buildHeaders(): Record<string, string> {
24
+ const headers: Record<string, string> = {
25
+ 'Content-Type': 'application/json',
26
+ ...STATIC_HEADERS,
27
+ 'Cookie': cookieHeader(this.creds),
28
+ }
29
+ if (this.creds.authToken) {
30
+ // Plaud API expects lowercase 'bearer' (per arbuzmell/plaud-api reference client)
31
+ headers['Authorization'] = `bearer ${this.creds.authToken}`
32
+ }
33
+ return headers
34
+ }
35
+
36
+ async get<T>(url: string, init?: RequestInit): Promise<T> {
37
+ const log = getLogger()
38
+ log.debug({ url }, 'GET')
39
+
40
+ const res = await fetch(url, {
41
+ ...init,
42
+ method: 'GET',
43
+ headers: { ...this.buildHeaders(), ...(init?.headers as Record<string, string> | undefined) },
44
+ })
45
+
46
+ await this.assertOk(res, url)
47
+ return res.json() as Promise<T>
48
+ }
49
+
50
+ async post<T>(url: string, body: unknown, init?: RequestInit): Promise<T> {
51
+ const log = getLogger()
52
+ log.debug({ url }, 'POST')
53
+
54
+ const res = await fetch(url, {
55
+ ...init,
56
+ method: 'POST',
57
+ headers: { ...this.buildHeaders(), ...(init?.headers as Record<string, string> | undefined) },
58
+ body: JSON.stringify(body),
59
+ })
60
+
61
+ await this.assertOk(res, url)
62
+ return res.json() as Promise<T>
63
+ }
64
+
65
+ async getStream(url: string): Promise<AsyncIterable<Uint8Array>> {
66
+ const log = getLogger()
67
+ log.debug({ url }, 'GET (stream)')
68
+
69
+ const res = await fetch(url, {
70
+ method: 'GET',
71
+ headers: this.buildHeaders(),
72
+ })
73
+
74
+ await this.assertOk(res, url)
75
+
76
+ if (!res.body) {
77
+ throw new ApiError(`No response body from ${url}`, res.status)
78
+ }
79
+
80
+ return res.body as unknown as AsyncIterable<Uint8Array>
81
+ }
82
+
83
+ /**
84
+ * Download from an external URL (e.g. presigned S3) without Plaud auth headers.
85
+ * S3 presigned URLs sign only the `host` header — sending extra headers breaks the request.
86
+ */
87
+ async downloadExternalUrl(url: string): Promise<AsyncIterable<Uint8Array>> {
88
+ const log = getLogger()
89
+ log.debug({ url: url.split('?')[0] }, 'GET (external)')
90
+
91
+ const res = await fetch(url, { method: 'GET' })
92
+ await this.assertOk(res, url)
93
+
94
+ if (!res.body) {
95
+ throw new ApiError(`No response body from external URL`, res.status)
96
+ }
97
+
98
+ return res.body as unknown as AsyncIterable<Uint8Array>
99
+ }
100
+
101
+ private async assertOk(res: Response, url: string): Promise<void> {
102
+ if (res.ok) return
103
+
104
+ if (res.status === 401 || res.status === 403) {
105
+ throw new AuthError(`Auth failed for ${url} (${res.status}) — run 'alta-plaud auth' to re-authenticate`)
106
+ }
107
+
108
+ throw new ApiError(`HTTP ${res.status} for ${url}`, res.status)
109
+ }
110
+ }
@@ -0,0 +1,268 @@
1
+ import { ApiError, AuthError } from '../errors.js'
2
+ import { getLogger } from '../logger.js'
3
+ import type { StoredCredentials } from '../auth/types.js'
4
+ import {
5
+ buildListUrl,
6
+ buildBatchDetailUrl,
7
+ buildAudioTempUrl,
8
+ buildProfileUrl,
9
+ extractRegionalBaseUrl,
10
+ type EndpointMap,
11
+ } from './endpoints.js'
12
+ import { HttpClient } from './http.js'
13
+ import {
14
+ PlaudRecordingSchema,
15
+ PlaudTranscriptSchema,
16
+ type PlaudRecording,
17
+ type PlaudTranscript,
18
+ type ListOptions,
19
+ type PlaudClient,
20
+ } from './types.js'
21
+
22
+ export class PlaudApiClient implements PlaudClient {
23
+ private readonly http: HttpClient
24
+ private endpoints: EndpointMap
25
+
26
+ constructor(creds: StoredCredentials) {
27
+ this.http = new HttpClient(creds)
28
+ this.endpoints = {
29
+ ...creds.endpointMap,
30
+ apiBaseUrl: creds.apiBaseUrl,
31
+ }
32
+ }
33
+
34
+ async isAuthenticated(): Promise<boolean> {
35
+ try {
36
+ const profileUrl = buildProfileUrl(this.endpoints)
37
+ const raw = await this.http.get<unknown>(profileUrl)
38
+ // A region-redirect response still means we're authenticated
39
+ if (extractRegionalBaseUrl(raw) !== null) return true
40
+ // Check for error status in response body
41
+ const r = raw as Record<string, unknown>
42
+ return r?.status === 0 || r?.data_user != null
43
+ } catch (err) {
44
+ if (err instanceof AuthError) return false
45
+ if (err instanceof ApiError && (err.statusCode === 401 || err.statusCode === 403)) return false
46
+ return false
47
+ }
48
+ }
49
+
50
+ async *listRecordings(opts?: ListOptions): AsyncGenerator<PlaudRecording> {
51
+ const log = getLogger()
52
+ let skip = 0
53
+ const limit = 50
54
+ let count = 0
55
+
56
+ while (true) {
57
+ const url = buildListUrl(this.endpoints, skip, limit)
58
+ const raw = await this.http.get<unknown>(url)
59
+ const items = extractFileList(raw)
60
+
61
+ if (items.length === 0) break
62
+
63
+ for (const item of items) {
64
+ const recording = normalizeRecording(item)
65
+ const parsed = PlaudRecordingSchema.parse(recording)
66
+
67
+ // Apply since filter (no server-side date filtering)
68
+ if (opts?.since && new Date(parsed.recordedAt) < opts.since) continue
69
+
70
+ yield parsed
71
+ count++
72
+
73
+ if (opts?.limit && count >= opts.limit) return
74
+ }
75
+
76
+ log.debug({ skip, fetched: items.length }, 'Fetched recording page')
77
+
78
+ // Stop when we get fewer items than the limit (last page)
79
+ if (items.length < limit) break
80
+ skip += items.length
81
+ }
82
+ }
83
+
84
+ async getTranscript(recordingId: string): Promise<PlaudTranscript> {
85
+ // Transcript data lives inside the recording object (trans_result field).
86
+ // Fetch it via POST /file/list with the single recording ID.
87
+ const url = buildBatchDetailUrl(this.endpoints)
88
+ const raw = await this.http.post<unknown>(url, [recordingId])
89
+ const items = extractFileList(raw)
90
+
91
+ if (items.length === 0) {
92
+ throw new ApiError(`Recording ${recordingId} not found`, 404)
93
+ }
94
+
95
+ const recording = items[0] as Record<string, unknown>
96
+ const normalized = normalizeTranscript(recording, recordingId)
97
+ return PlaudTranscriptSchema.parse(normalized)
98
+ }
99
+
100
+ async getAudioDownloadUrl(recordingId: string): Promise<string | null> {
101
+ try {
102
+ const url = buildAudioTempUrl(this.endpoints, recordingId)
103
+ const raw = await this.http.get<unknown>(url)
104
+ return extractTempUrl(raw)
105
+ } catch (err) {
106
+ getLogger().debug({ recordingId, err }, 'Could not get audio download URL')
107
+ return null
108
+ }
109
+ }
110
+
111
+ getHttpClient(): HttpClient {
112
+ return this.http
113
+ }
114
+ }
115
+
116
+ // ─── Adaptation Layer ─────────────────────────────────────────────────────────
117
+ //
118
+ // Real Plaud API shapes (from arbuzmell/plaud-api reference client):
119
+ //
120
+ // GET /file/simple/web response:
121
+ // { data_file_list: [{ id, filename, duration_ms, created_at, has_transcription,
122
+ // filesize, filetag_id_list, has_summary }] }
123
+ //
124
+ // POST /file/list response (full detail):
125
+ // { data_file_list: [{ ...above... + trans_result: [...segments...], ai_content: {...} }] }
126
+ //
127
+ // trans_result segment shape:
128
+ // { speaker: string, text: string, start_time_ms: number, end_time_ms: number }
129
+ //
130
+ // GET /file/temp-url/<id> response:
131
+ // { temp_url: "https://s3.amazonaws.com/...?X-Amz-..." }
132
+
133
+ type AnyObject = Record<string, unknown>
134
+
135
+ function extractFileList(raw: unknown): unknown[] {
136
+ const obj = raw as AnyObject
137
+ // Primary response shape: { data_file_list: [...] }
138
+ if (Array.isArray(obj?.data_file_list)) return obj.data_file_list as unknown[]
139
+ // Fallback shapes
140
+ if (Array.isArray(obj?.data)) return obj.data as unknown[]
141
+ if (Array.isArray(obj?.list)) return obj.list as unknown[]
142
+ if (Array.isArray(raw)) return raw as unknown[]
143
+ return []
144
+ }
145
+
146
+ function normalizeRecording(raw: unknown): Omit<PlaudRecording, 'mimeType'> & { mimeType?: string } {
147
+ const r = raw as AnyObject
148
+
149
+ // duration is in milliseconds — convert to seconds
150
+ const durationMs = Number(r['duration'] ?? r['duration_ms'] ?? 0)
151
+ const duration = durationMs / 1000
152
+
153
+ // start_time is Unix milliseconds (the actual recording start time)
154
+ const startTimeMs = Number(r['start_time'] ?? 0)
155
+ const recordedAt = startTimeMs > 0 ? new Date(startTimeMs).toISOString() : new Date().toISOString()
156
+
157
+ // version_ms is when the record was last synced; edit_time is seconds
158
+ const versionMs = Number(r['version_ms'] ?? 0)
159
+ const editTimeSec = Number(r['edit_time'] ?? 0)
160
+ const updatedAt = versionMs > 0 ? new Date(versionMs).toISOString() : recordedAt
161
+ const createdAt = editTimeSec > 0 ? new Date(editTimeSec * 1000).toISOString() : recordedAt
162
+
163
+ // Determine mime type from the fullname file extension
164
+ const fullname = String(r['fullname'] ?? '')
165
+ const ext = fullname.split('.').pop()?.toLowerCase()
166
+ const mimeType =
167
+ ext === 'ogg' ? 'audio/ogg' :
168
+ ext === 'm4a' ? 'audio/m4a' :
169
+ ext === 'mp3' ? 'audio/mpeg' :
170
+ ext === 'opus' ? 'audio/ogg; codecs=opus' :
171
+ 'audio/mp4'
172
+
173
+ return {
174
+ id: String(r['id'] ?? ''),
175
+ title: stringOrUndefined(r['filename'] ?? r['name'] ?? r['title']),
176
+ duration,
177
+ recordedAt,
178
+ createdAt,
179
+ updatedAt,
180
+ fileSize: numberOrUndefined(r['filesize'] ?? r['file_size']),
181
+ mimeType,
182
+ hasTranscript: Boolean(r['is_trans'] ?? r['has_transcription'] ?? r['hasNote']),
183
+ transcriptStatus: r['is_trans'] ? 'completed' : undefined,
184
+ language: stringOrUndefined(r['language'] ?? r['lang']),
185
+ deviceId: stringOrUndefined(r['serial_number'] ?? r['device_id'] ?? r['deviceId']),
186
+ tags: arrayOfStrings(r['filetag_id_list'] ?? r['tags']),
187
+ folderId: undefined,
188
+ summary: extractSummaryText(r['ai_content']),
189
+ _raw: r,
190
+ }
191
+ }
192
+
193
+ function normalizeTranscript(raw: unknown, recordingId: string): PlaudTranscript {
194
+ const r = raw as AnyObject
195
+ const transResult = r['trans_result']
196
+ const segmentsRaw = Array.isArray(transResult) ? transResult as AnyObject[] : []
197
+
198
+ const segments = segmentsRaw.map((s, i) => ({
199
+ index: i,
200
+ startMs: Number(s['start_time_ms'] ?? s['startMs'] ?? s['startTime'] ?? 0),
201
+ endMs: Number(s['end_time_ms'] ?? s['endMs'] ?? s['endTime'] ?? 0),
202
+ speaker: stringOrUndefined(s['speaker']),
203
+ text: String(s['text'] ?? '').trim(),
204
+ confidence: undefined,
205
+ }))
206
+
207
+ const fullText = segments.map(s => s.text).filter(Boolean).join('\n\n')
208
+ // duration field from POST /file/list is in milliseconds — convert to seconds
209
+ const durationMs = Number(r['duration_ms'] ?? r['duration'] ?? 0)
210
+ const duration = durationMs / 1000
211
+
212
+ return {
213
+ recordingId,
214
+ language: stringOrUndefined(r['language'] ?? r['lang']),
215
+ duration,
216
+ segments,
217
+ fullText,
218
+ createdAt: stringOrUndefined(r['created_at'] ?? r['createTime']) ? toIso(r['created_at'] ?? r['createTime']) : undefined,
219
+ _raw: r as Record<string, unknown>,
220
+ }
221
+ }
222
+
223
+ function extractTempUrl(raw: unknown): string | null {
224
+ const obj = raw as AnyObject
225
+ return stringOrUndefined(obj?.['temp_url'] ?? obj?.['url'] ?? obj?.['downloadUrl']) ?? null
226
+ }
227
+
228
+ function extractSummaryText(aiContent: unknown): string | undefined {
229
+ if (!aiContent || typeof aiContent !== 'object') return undefined
230
+ const obj = aiContent as AnyObject
231
+ // ai_content can have various summary fields
232
+ const text = obj['summary'] ?? obj['text'] ?? obj['content']
233
+ return stringOrUndefined(text)
234
+ }
235
+
236
+ // ─── Utilities ────────────────────────────────────────────────────────────────
237
+
238
+ function toIso(val: unknown): string {
239
+ if (typeof val === 'string' && val.length > 0) {
240
+ // Already ISO string
241
+ if (val.includes('T') || val.includes('-')) return new Date(val).toISOString()
242
+ // Unix ms as string
243
+ const n = Number(val)
244
+ if (isFinite(n) && n > 0) {
245
+ return new Date(n > 1e12 ? n : n * 1000).toISOString()
246
+ }
247
+ }
248
+ if (typeof val === 'number' && val > 0) {
249
+ return new Date(val > 1e12 ? val : val * 1000).toISOString()
250
+ }
251
+ return new Date().toISOString()
252
+ }
253
+
254
+ function stringOrUndefined(val: unknown): string | undefined {
255
+ if (typeof val === 'string' && val.length > 0) return val
256
+ return undefined
257
+ }
258
+
259
+ function numberOrUndefined(val: unknown): number | undefined {
260
+ const n = Number(val)
261
+ return isFinite(n) && n >= 0 ? n : undefined
262
+ }
263
+
264
+ function arrayOfStrings(val: unknown): string[] | undefined {
265
+ if (!Array.isArray(val)) return undefined
266
+ const result = val.filter(v => typeof v === 'string') as string[]
267
+ return result.length > 0 ? result : undefined
268
+ }