@alta-foundation/plaud-extractor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +9 -0
- package/.github/workflows/ci.yml +33 -0
- package/.github/workflows/publish.yml +46 -0
- package/CLAUDE.md +53 -0
- package/README.md +318 -0
- package/dist/PlaudExtractor.d.ts +61 -0
- package/dist/PlaudExtractor.d.ts.map +1 -0
- package/dist/PlaudExtractor.js +236 -0
- package/dist/PlaudExtractor.js.map +1 -0
- package/dist/auth/browser-auth.d.ts +10 -0
- package/dist/auth/browser-auth.d.ts.map +1 -0
- package/dist/auth/browser-auth.js +220 -0
- package/dist/auth/browser-auth.js.map +1 -0
- package/dist/auth/token-store.d.ts +9 -0
- package/dist/auth/token-store.d.ts.map +1 -0
- package/dist/auth/token-store.js +74 -0
- package/dist/auth/token-store.js.map +1 -0
- package/dist/auth/types.d.ts +266 -0
- package/dist/auth/types.d.ts.map +1 -0
- package/dist/auth/types.js +32 -0
- package/dist/auth/types.js.map +1 -0
- package/dist/cli/bin.d.ts +3 -0
- package/dist/cli/bin.d.ts.map +1 -0
- package/dist/cli/bin.js +30 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/commands/auth.d.ts +3 -0
- package/dist/cli/commands/auth.d.ts.map +1 -0
- package/dist/cli/commands/auth.js +22 -0
- package/dist/cli/commands/auth.js.map +1 -0
- package/dist/cli/commands/backfill.d.ts +3 -0
- package/dist/cli/commands/backfill.d.ts.map +1 -0
- package/dist/cli/commands/backfill.js +59 -0
- package/dist/cli/commands/backfill.js.map +1 -0
- package/dist/cli/commands/sync.d.ts +3 -0
- package/dist/cli/commands/sync.d.ts.map +1 -0
- package/dist/cli/commands/sync.js +55 -0
- package/dist/cli/commands/sync.js.map +1 -0
- package/dist/cli/commands/verify.d.ts +3 -0
- package/dist/cli/commands/verify.d.ts.map +1 -0
- package/dist/cli/commands/verify.js +28 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/exit-codes.d.ts +8 -0
- package/dist/cli/exit-codes.d.ts.map +1 -0
- package/dist/cli/exit-codes.js +16 -0
- package/dist/cli/exit-codes.js.map +1 -0
- package/dist/cli/options.d.ts +31 -0
- package/dist/cli/options.d.ts.map +1 -0
- package/dist/cli/options.js +11 -0
- package/dist/cli/options.js.map +1 -0
- package/dist/client/endpoints.d.ts +26 -0
- package/dist/client/endpoints.d.ts.map +1 -0
- package/dist/client/endpoints.js +54 -0
- package/dist/client/endpoints.js.map +1 -0
- package/dist/client/http.d.ts +17 -0
- package/dist/client/http.d.ts.map +1 -0
- package/dist/client/http.js +92 -0
- package/dist/client/http.js.map +1 -0
- package/dist/client/plaud-client.d.ts +14 -0
- package/dist/client/plaud-client.d.ts.map +1 -0
- package/dist/client/plaud-client.js +216 -0
- package/dist/client/plaud-client.js.map +1 -0
- package/dist/client/types.d.ts +154 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +41 -0
- package/dist/client/types.js.map +1 -0
- package/dist/errors.d.ts +24 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +51 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +9 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +37 -0
- package/dist/logger.js.map +1 -0
- package/dist/mcp/job-tools.d.ts +3 -0
- package/dist/mcp/job-tools.d.ts.map +1 -0
- package/dist/mcp/job-tools.js +108 -0
- package/dist/mcp/job-tools.js.map +1 -0
- package/dist/mcp/read-tools.d.ts +3 -0
- package/dist/mcp/read-tools.d.ts.map +1 -0
- package/dist/mcp/read-tools.js +173 -0
- package/dist/mcp/read-tools.js.map +1 -0
- package/dist/mcp/server.d.ts +3 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +32 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/storage/atomic.d.ts +5 -0
- package/dist/storage/atomic.d.ts.map +1 -0
- package/dist/storage/atomic.js +51 -0
- package/dist/storage/atomic.js.map +1 -0
- package/dist/storage/checksums.d.ts +15 -0
- package/dist/storage/checksums.d.ts.map +1 -0
- package/dist/storage/checksums.js +56 -0
- package/dist/storage/checksums.js.map +1 -0
- package/dist/storage/dataset-writer.d.ts +21 -0
- package/dist/storage/dataset-writer.d.ts.map +1 -0
- package/dist/storage/dataset-writer.js +52 -0
- package/dist/storage/dataset-writer.js.map +1 -0
- package/dist/storage/paths.d.ts +9 -0
- package/dist/storage/paths.d.ts.map +1 -0
- package/dist/storage/paths.js +38 -0
- package/dist/storage/paths.js.map +1 -0
- package/dist/storage/recording-store.d.ts +24 -0
- package/dist/storage/recording-store.d.ts.map +1 -0
- package/dist/storage/recording-store.js +161 -0
- package/dist/storage/recording-store.js.map +1 -0
- package/dist/sync/download-queue.d.ts +21 -0
- package/dist/sync/download-queue.d.ts.map +1 -0
- package/dist/sync/download-queue.js +82 -0
- package/dist/sync/download-queue.js.map +1 -0
- package/dist/sync/incremental.d.ts +21 -0
- package/dist/sync/incremental.d.ts.map +1 -0
- package/dist/sync/incremental.js +96 -0
- package/dist/sync/incremental.js.map +1 -0
- package/dist/sync/sync-engine.d.ts +6 -0
- package/dist/sync/sync-engine.d.ts.map +1 -0
- package/dist/sync/sync-engine.js +135 -0
- package/dist/sync/sync-engine.js.map +1 -0
- package/dist/sync/types.d.ts +130 -0
- package/dist/sync/types.d.ts.map +1 -0
- package/dist/sync/types.js +17 -0
- package/dist/sync/types.js.map +1 -0
- package/dist/transcript/formatter.d.ts +4 -0
- package/dist/transcript/formatter.d.ts.map +1 -0
- package/dist/transcript/formatter.js +88 -0
- package/dist/transcript/formatter.js.map +1 -0
- package/package.json +41 -0
- package/src/PlaudExtractor.ts +275 -0
- package/src/auth/browser-auth.ts +248 -0
- package/src/auth/token-store.ts +79 -0
- package/src/auth/types.ts +41 -0
- package/src/cli/bin.ts +30 -0
- package/src/cli/commands/auth.ts +27 -0
- package/src/cli/commands/backfill.ts +77 -0
- package/src/cli/commands/sync.ts +71 -0
- package/src/cli/commands/verify.ts +31 -0
- package/src/cli/exit-codes.ts +14 -0
- package/src/cli/options.ts +10 -0
- package/src/client/endpoints.ts +62 -0
- package/src/client/http.ts +110 -0
- package/src/client/plaud-client.ts +268 -0
- package/src/client/types.ts +62 -0
- package/src/errors.ts +57 -0
- package/src/index.ts +17 -0
- package/src/logger.ts +49 -0
- package/src/mcp/job-tools.ts +156 -0
- package/src/mcp/read-tools.ts +204 -0
- package/src/mcp/server.ts +39 -0
- package/src/storage/atomic.ts +51 -0
- package/src/storage/checksums.ts +76 -0
- package/src/storage/dataset-writer.ts +74 -0
- package/src/storage/paths.ts +44 -0
- package/src/storage/recording-store.ts +182 -0
- package/src/sync/download-queue.ts +102 -0
- package/src/sync/incremental.ts +111 -0
- package/src/sync/sync-engine.ts +183 -0
- package/src/sync/types.ts +64 -0
- package/src/transcript/formatter.ts +91 -0
- package/tsconfig.build.json +8 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { Command } from 'commander'
|
|
2
|
+
import { PlaudExtractor } from '../../PlaudExtractor.js'
|
|
3
|
+
import { AuthError } from '../../errors.js'
|
|
4
|
+
import { authTokenPath } from '../../auth/token-store.js'
|
|
5
|
+
|
|
6
|
+
export function registerAuthCommand(program: Command): void {
|
|
7
|
+
program
|
|
8
|
+
.command('auth')
|
|
9
|
+
.description('Authenticate with Plaud by launching a browser (required before first sync)')
|
|
10
|
+
.option('--headless', 'Run browser in headless mode (requires PLAUD_EMAIL + PLAUD_PASSWORD env vars)', false)
|
|
11
|
+
.option('--out <dir>', 'Data directory for logs', undefined)
|
|
12
|
+
.action(async (opts: { headless: boolean; out?: string }) => {
|
|
13
|
+
const extractor = new PlaudExtractor({ outDir: opts.out })
|
|
14
|
+
|
|
15
|
+
console.log('Launching browser to authenticate with Plaud...')
|
|
16
|
+
|
|
17
|
+
await extractor.authenticate({
|
|
18
|
+
headless: opts.headless,
|
|
19
|
+
email: process.env['PLAUD_EMAIL'],
|
|
20
|
+
password: process.env['PLAUD_PASSWORD'],
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
console.log(`\nAuthentication successful!`)
|
|
24
|
+
console.log(`Credentials saved to: ${authTokenPath()}`)
|
|
25
|
+
console.log(`\nYou can now run: alta-plaud sync`)
|
|
26
|
+
})
|
|
27
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { Command } from 'commander'
|
|
2
|
+
import { PlaudExtractor } from '../../PlaudExtractor.js'
|
|
3
|
+
import { defaultOutDir } from '../../storage/paths.js'
|
|
4
|
+
import type { TranscriptFormat } from '../../storage/recording-store.js'
|
|
5
|
+
|
|
6
|
+
export function registerBackfillCommand(program: Command): void {
|
|
7
|
+
program
|
|
8
|
+
.command('backfill')
|
|
9
|
+
.description('Download all recordings from scratch (ignores incremental state)')
|
|
10
|
+
.option('--out <dir>', 'Output directory', defaultOutDir())
|
|
11
|
+
.option('--since <iso>', 'Only backfill recordings after this ISO date')
|
|
12
|
+
.option('--limit <n>', 'Max number of recordings to process', parseInt)
|
|
13
|
+
.option('--concurrency <n>', 'Parallel downloads (default: 3)', parseInt, 3)
|
|
14
|
+
.option('--formats <list>', 'Transcript formats: json,txt,md (default: all)', 'json,txt,md')
|
|
15
|
+
.option('--dataset', 'Append to datasets/plaud_transcripts.jsonl (default: on)', true)
|
|
16
|
+
.option('--no-dataset', 'Skip dataset output')
|
|
17
|
+
.option('--dry-run', 'Print plan without downloading', false)
|
|
18
|
+
.option('--verbose', 'Verbose logging', false)
|
|
19
|
+
.option('--yes', 'Skip confirmation prompt', false)
|
|
20
|
+
.action(async (opts: {
|
|
21
|
+
out: string
|
|
22
|
+
since?: string
|
|
23
|
+
limit?: number
|
|
24
|
+
concurrency: number
|
|
25
|
+
formats: string
|
|
26
|
+
dataset: boolean
|
|
27
|
+
dryRun: boolean
|
|
28
|
+
verbose: boolean
|
|
29
|
+
yes: boolean
|
|
30
|
+
}) => {
|
|
31
|
+
if (!opts.yes && !opts.dryRun) {
|
|
32
|
+
const confirmed = await confirm(
|
|
33
|
+
'Backfill will re-evaluate all recordings and may overwrite existing files. Continue? (y/N) '
|
|
34
|
+
)
|
|
35
|
+
if (!confirmed) {
|
|
36
|
+
console.log('Aborted.')
|
|
37
|
+
return
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const extractor = new PlaudExtractor({ outDir: opts.out, verbose: opts.verbose })
|
|
42
|
+
const formats = parseFormats(opts.formats)
|
|
43
|
+
|
|
44
|
+
const result = await extractor.backfill({
|
|
45
|
+
since: opts.since ? new Date(opts.since) : undefined,
|
|
46
|
+
limit: opts.limit,
|
|
47
|
+
concurrency: opts.concurrency,
|
|
48
|
+
formats,
|
|
49
|
+
includeDataset: opts.dataset,
|
|
50
|
+
dryRun: opts.dryRun,
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
const durationSec = (result.durationMs / 1000).toFixed(1)
|
|
54
|
+
console.log(`\nBackfill complete (${durationSec}s)`)
|
|
55
|
+
console.log(` Downloaded: ${result.succeeded}`)
|
|
56
|
+
console.log(` Skipped: ${result.skipped}`)
|
|
57
|
+
console.log(` Failed: ${result.failed}`)
|
|
58
|
+
if (result.datasetPath) console.log(` Dataset: ${result.datasetPath}`)
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function parseFormats(str: string): TranscriptFormat[] {
|
|
63
|
+
const valid: TranscriptFormat[] = ['json', 'txt', 'md']
|
|
64
|
+
return str.split(',').filter((f): f is TranscriptFormat => valid.includes(f as TranscriptFormat))
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function confirm(message: string): Promise<boolean> {
|
|
68
|
+
const { createInterface } = await import('node:readline')
|
|
69
|
+
process.stdout.write(message)
|
|
70
|
+
return new Promise(resolve => {
|
|
71
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout })
|
|
72
|
+
rl.once('line', (answer: string) => {
|
|
73
|
+
rl.close()
|
|
74
|
+
resolve(answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes')
|
|
75
|
+
})
|
|
76
|
+
})
|
|
77
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { Command } from 'commander'
|
|
2
|
+
import { PlaudExtractor } from '../../PlaudExtractor.js'
|
|
3
|
+
import { defaultOutDir } from '../../storage/paths.js'
|
|
4
|
+
import type { TranscriptFormat } from '../../storage/recording-store.js'
|
|
5
|
+
|
|
6
|
+
export function registerSyncCommand(program: Command): void {
|
|
7
|
+
program
|
|
8
|
+
.command('sync')
|
|
9
|
+
.description('Pull new or updated recordings from Plaud (incremental)')
|
|
10
|
+
.option('--out <dir>', 'Output directory', defaultOutDir())
|
|
11
|
+
.option('--since <iso>', 'Only sync recordings after this ISO date (overrides last-sync state)')
|
|
12
|
+
.option('--limit <n>', 'Max number of recordings to process', parseInt)
|
|
13
|
+
.option('--concurrency <n>', 'Parallel downloads (default: 3)', parseInt, 3)
|
|
14
|
+
.option('--formats <list>', 'Transcript formats: json,txt,md (default: all)', 'json,txt,md')
|
|
15
|
+
.option('--dataset', 'Append to datasets/plaud_transcripts.jsonl (default: on)', true)
|
|
16
|
+
.option('--no-dataset', 'Skip dataset output')
|
|
17
|
+
.option('--dry-run', 'Print plan without downloading', false)
|
|
18
|
+
.option('--verbose', 'Verbose logging', false)
|
|
19
|
+
.option('--redact', 'Redact tokens from logs', false)
|
|
20
|
+
.action(async (opts: {
|
|
21
|
+
out: string
|
|
22
|
+
since?: string
|
|
23
|
+
limit?: number
|
|
24
|
+
concurrency: number
|
|
25
|
+
formats: string
|
|
26
|
+
dataset: boolean
|
|
27
|
+
dryRun: boolean
|
|
28
|
+
verbose: boolean
|
|
29
|
+
redact: boolean
|
|
30
|
+
}) => {
|
|
31
|
+
const extractor = new PlaudExtractor({
|
|
32
|
+
outDir: opts.out,
|
|
33
|
+
verbose: opts.verbose,
|
|
34
|
+
redact: opts.redact,
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
const formats = parseFormats(opts.formats)
|
|
38
|
+
const result = await extractor.sync({
|
|
39
|
+
since: opts.since ? new Date(opts.since) : undefined,
|
|
40
|
+
limit: opts.limit,
|
|
41
|
+
concurrency: opts.concurrency,
|
|
42
|
+
formats,
|
|
43
|
+
includeDataset: opts.dataset,
|
|
44
|
+
dryRun: opts.dryRun,
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
printSyncSummary(result)
|
|
48
|
+
})
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function parseFormats(str: string): TranscriptFormat[] {
|
|
52
|
+
const valid: TranscriptFormat[] = ['json', 'txt', 'md']
|
|
53
|
+
return str.split(',').filter((f): f is TranscriptFormat => valid.includes(f as TranscriptFormat))
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function printSyncSummary(result: import('../../sync/types.js').SyncResult): void {
|
|
57
|
+
const durationSec = (result.durationMs / 1000).toFixed(1)
|
|
58
|
+
console.log(`\nSync complete (${durationSec}s)`)
|
|
59
|
+
console.log(` Downloaded: ${result.succeeded}`)
|
|
60
|
+
console.log(` Skipped: ${result.skipped}`)
|
|
61
|
+
console.log(` Failed: ${result.failed}`)
|
|
62
|
+
if (result.datasetPath) {
|
|
63
|
+
console.log(` Dataset: ${result.datasetPath}`)
|
|
64
|
+
}
|
|
65
|
+
if (result.errors.length > 0) {
|
|
66
|
+
console.error(`\nFailed recordings:`)
|
|
67
|
+
for (const { recordingId, error } of result.errors) {
|
|
68
|
+
console.error(` ${recordingId}: ${error.message}`)
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { Command } from 'commander'
|
|
2
|
+
import { PlaudExtractor } from '../../PlaudExtractor.js'
|
|
3
|
+
import { defaultOutDir } from '../../storage/paths.js'
|
|
4
|
+
|
|
5
|
+
export function registerVerifyCommand(program: Command): void {
|
|
6
|
+
program
|
|
7
|
+
.command('verify')
|
|
8
|
+
.description('Verify checksums for all downloaded recordings')
|
|
9
|
+
.option('--out <dir>', 'Output directory', defaultOutDir())
|
|
10
|
+
.option('--repair', 'Re-download files with checksum mismatches', false)
|
|
11
|
+
.option('--verbose', 'Verbose logging', false)
|
|
12
|
+
.action(async (opts: { out: string; repair: boolean; verbose: boolean }) => {
|
|
13
|
+
const extractor = new PlaudExtractor({ outDir: opts.out, verbose: opts.verbose })
|
|
14
|
+
|
|
15
|
+
console.log(`Verifying recordings in ${opts.out}...`)
|
|
16
|
+
const result = await extractor.verify({ repair: opts.repair })
|
|
17
|
+
|
|
18
|
+
console.log(`\nVerify complete`)
|
|
19
|
+
console.log(` Scanned: ${result.scanned}`)
|
|
20
|
+
console.log(` OK: ${result.ok}`)
|
|
21
|
+
console.log(` Failed: ${result.failed}`)
|
|
22
|
+
if (opts.repair) console.log(` Repaired: ${result.repaired}`)
|
|
23
|
+
|
|
24
|
+
if (result.issues.length > 0) {
|
|
25
|
+
console.error(`\nIssues found:`)
|
|
26
|
+
for (const issue of result.issues) {
|
|
27
|
+
console.error(` ${issue.recordingId}/${issue.file}: ${issue.issue}`)
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
})
|
|
31
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { AuthError, StorageError } from '../errors.js'
|
|
2
|
+
|
|
3
|
+
export enum ExitCode {
|
|
4
|
+
Success = 0,
|
|
5
|
+
PartialFailure = 1,
|
|
6
|
+
AuthFailure = 2,
|
|
7
|
+
StorageError = 3,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function toExitCode(err: unknown): ExitCode {
|
|
11
|
+
if (err instanceof AuthError) return ExitCode.AuthFailure
|
|
12
|
+
if (err instanceof StorageError) return ExitCode.StorageError
|
|
13
|
+
return ExitCode.PartialFailure
|
|
14
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// Shared option definitions (reused across commands if needed in future)
|
|
2
|
+
export const COMMON_OPTIONS = {
|
|
3
|
+
out: { flags: '--out <dir>', description: 'Output directory' },
|
|
4
|
+
verbose: { flags: '--verbose', description: 'Verbose logging' },
|
|
5
|
+
redact: { flags: '--redact', description: 'Redact auth tokens from logs' },
|
|
6
|
+
dryRun: { flags: '--dry-run', description: 'Print plan without downloading' },
|
|
7
|
+
concurrency: { flags: '--concurrency <n>', description: 'Parallel downloads (default: 3)' },
|
|
8
|
+
formats: { flags: '--formats <list>', description: 'Transcript formats: json,txt,md' },
|
|
9
|
+
limit: { flags: '--limit <n>', description: 'Max number of recordings to process' },
|
|
10
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { EndpointMap } from '../auth/types.js'
|
|
2
|
+
|
|
3
|
+
export type { EndpointMap }
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Real Plaud API paths (discovered from arbuzmell/plaud-api reference client)
|
|
7
|
+
// The user's account lives on a regional endpoint (e.g. api-euc1.plaud.ai for EU).
|
|
8
|
+
// Hit api.plaud.ai/user/me to discover the correct regional base URL.
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
const API_BASE = (map: EndpointMap) => map.apiBaseUrl ?? 'https://api.plaud.ai'
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Build list URL using skip/limit pagination.
|
|
15
|
+
* GET /file/simple/web?skip=N&limit=50&is_trash=0&sort_by=start_time&is_desc=true
|
|
16
|
+
*/
|
|
17
|
+
export function buildListUrl(map: EndpointMap, skip: number, limit = 50): string {
|
|
18
|
+
const base = map.listRecordings ?? `${API_BASE(map)}/file/simple/web`
|
|
19
|
+
const url = new URL(base.replace('/{id}', ''))
|
|
20
|
+
url.searchParams.set('skip', String(skip))
|
|
21
|
+
url.searchParams.set('limit', String(limit))
|
|
22
|
+
url.searchParams.set('is_trash', '0')
|
|
23
|
+
url.searchParams.set('sort_by', 'start_time')
|
|
24
|
+
url.searchParams.set('is_desc', 'true')
|
|
25
|
+
return url.toString()
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Build URL for POST /file/list — batch detail fetch that includes trans_result.
|
|
30
|
+
* Body: ["file_id_1", "file_id_2"]
|
|
31
|
+
* Response: { data_file_list: [...full recording objects with trans_result...] }
|
|
32
|
+
*/
|
|
33
|
+
export function buildBatchDetailUrl(map: EndpointMap): string {
|
|
34
|
+
return map.batchDetail ?? `${API_BASE(map)}/file/list`
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Build URL for GET /file/temp-url/<id> — returns a presigned S3 audio download URL.
|
|
39
|
+
* Response: { temp_url: "https://s3.amazonaws.com/...?X-Amz-..." }
|
|
40
|
+
*/
|
|
41
|
+
export function buildAudioTempUrl(map: EndpointMap, id: string): string {
|
|
42
|
+
const base = map.getAudioUrl ?? `${API_BASE(map)}/file/temp-url`
|
|
43
|
+
return `${base.replace('/{id}', '')}/${id}`
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function buildProfileUrl(map: EndpointMap): string {
|
|
47
|
+
return map.userProfile ?? `${API_BASE(map)}/user/me`
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Discover the correct regional API base URL by hitting the global endpoint.
|
|
52
|
+
* The global api.plaud.ai returns a region-redirect response:
|
|
53
|
+
* { status: -302, data: { domains: { api: "https://api-euc1.plaud.ai" } } }
|
|
54
|
+
*/
|
|
55
|
+
export function extractRegionalBaseUrl(response: unknown): string | null {
|
|
56
|
+
const r = response as Record<string, unknown>
|
|
57
|
+
if (r?.status === -302) {
|
|
58
|
+
const api = (r?.data as Record<string, unknown>)?.domains as Record<string, unknown>
|
|
59
|
+
if (typeof api?.api === 'string') return api.api
|
|
60
|
+
}
|
|
61
|
+
return null
|
|
62
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { fetch, type RequestInit, type Response } from 'undici'
|
|
2
|
+
import { ApiError, AuthError } from '../errors.js'
|
|
3
|
+
import { getLogger } from '../logger.js'
|
|
4
|
+
import type { StoredCredentials } from '../auth/types.js'
|
|
5
|
+
import { cookieHeader } from '../auth/token-store.js'
|
|
6
|
+
|
|
7
|
+
// Browser-like headers that Plaud's API validates.
|
|
8
|
+
// app-platform and edit-from are required custom headers.
|
|
9
|
+
const STATIC_HEADERS = {
|
|
10
|
+
'Accept': 'application/json, */*',
|
|
11
|
+
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
|
|
12
|
+
'Origin': 'https://web.plaud.ai',
|
|
13
|
+
'Referer': 'https://web.plaud.ai/',
|
|
14
|
+
'app-platform': 'web',
|
|
15
|
+
'edit-from': 'web',
|
|
16
|
+
'User-Agent':
|
|
17
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class HttpClient {
|
|
21
|
+
constructor(private readonly creds: StoredCredentials) {}
|
|
22
|
+
|
|
23
|
+
private buildHeaders(): Record<string, string> {
|
|
24
|
+
const headers: Record<string, string> = {
|
|
25
|
+
'Content-Type': 'application/json',
|
|
26
|
+
...STATIC_HEADERS,
|
|
27
|
+
'Cookie': cookieHeader(this.creds),
|
|
28
|
+
}
|
|
29
|
+
if (this.creds.authToken) {
|
|
30
|
+
// Plaud API expects lowercase 'bearer' (per arbuzmell/plaud-api reference client)
|
|
31
|
+
headers['Authorization'] = `bearer ${this.creds.authToken}`
|
|
32
|
+
}
|
|
33
|
+
return headers
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async get<T>(url: string, init?: RequestInit): Promise<T> {
|
|
37
|
+
const log = getLogger()
|
|
38
|
+
log.debug({ url }, 'GET')
|
|
39
|
+
|
|
40
|
+
const res = await fetch(url, {
|
|
41
|
+
...init,
|
|
42
|
+
method: 'GET',
|
|
43
|
+
headers: { ...this.buildHeaders(), ...(init?.headers as Record<string, string> | undefined) },
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
await this.assertOk(res, url)
|
|
47
|
+
return res.json() as Promise<T>
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async post<T>(url: string, body: unknown, init?: RequestInit): Promise<T> {
|
|
51
|
+
const log = getLogger()
|
|
52
|
+
log.debug({ url }, 'POST')
|
|
53
|
+
|
|
54
|
+
const res = await fetch(url, {
|
|
55
|
+
...init,
|
|
56
|
+
method: 'POST',
|
|
57
|
+
headers: { ...this.buildHeaders(), ...(init?.headers as Record<string, string> | undefined) },
|
|
58
|
+
body: JSON.stringify(body),
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
await this.assertOk(res, url)
|
|
62
|
+
return res.json() as Promise<T>
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async getStream(url: string): Promise<AsyncIterable<Uint8Array>> {
|
|
66
|
+
const log = getLogger()
|
|
67
|
+
log.debug({ url }, 'GET (stream)')
|
|
68
|
+
|
|
69
|
+
const res = await fetch(url, {
|
|
70
|
+
method: 'GET',
|
|
71
|
+
headers: this.buildHeaders(),
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
await this.assertOk(res, url)
|
|
75
|
+
|
|
76
|
+
if (!res.body) {
|
|
77
|
+
throw new ApiError(`No response body from ${url}`, res.status)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return res.body as unknown as AsyncIterable<Uint8Array>
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Download from an external URL (e.g. presigned S3) without Plaud auth headers.
|
|
85
|
+
* S3 presigned URLs sign only the `host` header — sending extra headers breaks the request.
|
|
86
|
+
*/
|
|
87
|
+
async downloadExternalUrl(url: string): Promise<AsyncIterable<Uint8Array>> {
|
|
88
|
+
const log = getLogger()
|
|
89
|
+
log.debug({ url: url.split('?')[0] }, 'GET (external)')
|
|
90
|
+
|
|
91
|
+
const res = await fetch(url, { method: 'GET' })
|
|
92
|
+
await this.assertOk(res, url)
|
|
93
|
+
|
|
94
|
+
if (!res.body) {
|
|
95
|
+
throw new ApiError(`No response body from external URL`, res.status)
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return res.body as unknown as AsyncIterable<Uint8Array>
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
private async assertOk(res: Response, url: string): Promise<void> {
|
|
102
|
+
if (res.ok) return
|
|
103
|
+
|
|
104
|
+
if (res.status === 401 || res.status === 403) {
|
|
105
|
+
throw new AuthError(`Auth failed for ${url} (${res.status}) — run 'alta-plaud auth' to re-authenticate`)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
throw new ApiError(`HTTP ${res.status} for ${url}`, res.status)
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import { ApiError, AuthError } from '../errors.js'
|
|
2
|
+
import { getLogger } from '../logger.js'
|
|
3
|
+
import type { StoredCredentials } from '../auth/types.js'
|
|
4
|
+
import {
|
|
5
|
+
buildListUrl,
|
|
6
|
+
buildBatchDetailUrl,
|
|
7
|
+
buildAudioTempUrl,
|
|
8
|
+
buildProfileUrl,
|
|
9
|
+
extractRegionalBaseUrl,
|
|
10
|
+
type EndpointMap,
|
|
11
|
+
} from './endpoints.js'
|
|
12
|
+
import { HttpClient } from './http.js'
|
|
13
|
+
import {
|
|
14
|
+
PlaudRecordingSchema,
|
|
15
|
+
PlaudTranscriptSchema,
|
|
16
|
+
type PlaudRecording,
|
|
17
|
+
type PlaudTranscript,
|
|
18
|
+
type ListOptions,
|
|
19
|
+
type PlaudClient,
|
|
20
|
+
} from './types.js'
|
|
21
|
+
|
|
22
|
+
export class PlaudApiClient implements PlaudClient {
|
|
23
|
+
private readonly http: HttpClient
|
|
24
|
+
private endpoints: EndpointMap
|
|
25
|
+
|
|
26
|
+
constructor(creds: StoredCredentials) {
|
|
27
|
+
this.http = new HttpClient(creds)
|
|
28
|
+
this.endpoints = {
|
|
29
|
+
...creds.endpointMap,
|
|
30
|
+
apiBaseUrl: creds.apiBaseUrl,
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async isAuthenticated(): Promise<boolean> {
|
|
35
|
+
try {
|
|
36
|
+
const profileUrl = buildProfileUrl(this.endpoints)
|
|
37
|
+
const raw = await this.http.get<unknown>(profileUrl)
|
|
38
|
+
// A region-redirect response still means we're authenticated
|
|
39
|
+
if (extractRegionalBaseUrl(raw) !== null) return true
|
|
40
|
+
// Check for error status in response body
|
|
41
|
+
const r = raw as Record<string, unknown>
|
|
42
|
+
return r?.status === 0 || r?.data_user != null
|
|
43
|
+
} catch (err) {
|
|
44
|
+
if (err instanceof AuthError) return false
|
|
45
|
+
if (err instanceof ApiError && (err.statusCode === 401 || err.statusCode === 403)) return false
|
|
46
|
+
return false
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async *listRecordings(opts?: ListOptions): AsyncGenerator<PlaudRecording> {
|
|
51
|
+
const log = getLogger()
|
|
52
|
+
let skip = 0
|
|
53
|
+
const limit = 50
|
|
54
|
+
let count = 0
|
|
55
|
+
|
|
56
|
+
while (true) {
|
|
57
|
+
const url = buildListUrl(this.endpoints, skip, limit)
|
|
58
|
+
const raw = await this.http.get<unknown>(url)
|
|
59
|
+
const items = extractFileList(raw)
|
|
60
|
+
|
|
61
|
+
if (items.length === 0) break
|
|
62
|
+
|
|
63
|
+
for (const item of items) {
|
|
64
|
+
const recording = normalizeRecording(item)
|
|
65
|
+
const parsed = PlaudRecordingSchema.parse(recording)
|
|
66
|
+
|
|
67
|
+
// Apply since filter (no server-side date filtering)
|
|
68
|
+
if (opts?.since && new Date(parsed.recordedAt) < opts.since) continue
|
|
69
|
+
|
|
70
|
+
yield parsed
|
|
71
|
+
count++
|
|
72
|
+
|
|
73
|
+
if (opts?.limit && count >= opts.limit) return
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
log.debug({ skip, fetched: items.length }, 'Fetched recording page')
|
|
77
|
+
|
|
78
|
+
// Stop when we get fewer items than the limit (last page)
|
|
79
|
+
if (items.length < limit) break
|
|
80
|
+
skip += items.length
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async getTranscript(recordingId: string): Promise<PlaudTranscript> {
|
|
85
|
+
// Transcript data lives inside the recording object (trans_result field).
|
|
86
|
+
// Fetch it via POST /file/list with the single recording ID.
|
|
87
|
+
const url = buildBatchDetailUrl(this.endpoints)
|
|
88
|
+
const raw = await this.http.post<unknown>(url, [recordingId])
|
|
89
|
+
const items = extractFileList(raw)
|
|
90
|
+
|
|
91
|
+
if (items.length === 0) {
|
|
92
|
+
throw new ApiError(`Recording ${recordingId} not found`, 404)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const recording = items[0] as Record<string, unknown>
|
|
96
|
+
const normalized = normalizeTranscript(recording, recordingId)
|
|
97
|
+
return PlaudTranscriptSchema.parse(normalized)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async getAudioDownloadUrl(recordingId: string): Promise<string | null> {
|
|
101
|
+
try {
|
|
102
|
+
const url = buildAudioTempUrl(this.endpoints, recordingId)
|
|
103
|
+
const raw = await this.http.get<unknown>(url)
|
|
104
|
+
return extractTempUrl(raw)
|
|
105
|
+
} catch (err) {
|
|
106
|
+
getLogger().debug({ recordingId, err }, 'Could not get audio download URL')
|
|
107
|
+
return null
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
getHttpClient(): HttpClient {
|
|
112
|
+
return this.http
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ─── Adaptation Layer ─────────────────────────────────────────────────────────
|
|
117
|
+
//
|
|
118
|
+
// Real Plaud API shapes (from arbuzmell/plaud-api reference client):
|
|
119
|
+
//
|
|
120
|
+
// GET /file/simple/web response:
|
|
121
|
+
// { data_file_list: [{ id, filename, duration_ms, created_at, has_transcription,
|
|
122
|
+
// filesize, filetag_id_list, has_summary }] }
|
|
123
|
+
//
|
|
124
|
+
// POST /file/list response (full detail):
|
|
125
|
+
// { data_file_list: [{ ...above... + trans_result: [...segments...], ai_content: {...} }] }
|
|
126
|
+
//
|
|
127
|
+
// trans_result segment shape:
|
|
128
|
+
// { speaker: string, text: string, start_time_ms: number, end_time_ms: number }
|
|
129
|
+
//
|
|
130
|
+
// GET /file/temp-url/<id> response:
|
|
131
|
+
// { temp_url: "https://s3.amazonaws.com/...?X-Amz-..." }
|
|
132
|
+
|
|
133
|
+
type AnyObject = Record<string, unknown>
|
|
134
|
+
|
|
135
|
+
function extractFileList(raw: unknown): unknown[] {
|
|
136
|
+
const obj = raw as AnyObject
|
|
137
|
+
// Primary response shape: { data_file_list: [...] }
|
|
138
|
+
if (Array.isArray(obj?.data_file_list)) return obj.data_file_list as unknown[]
|
|
139
|
+
// Fallback shapes
|
|
140
|
+
if (Array.isArray(obj?.data)) return obj.data as unknown[]
|
|
141
|
+
if (Array.isArray(obj?.list)) return obj.list as unknown[]
|
|
142
|
+
if (Array.isArray(raw)) return raw as unknown[]
|
|
143
|
+
return []
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function normalizeRecording(raw: unknown): Omit<PlaudRecording, 'mimeType'> & { mimeType?: string } {
|
|
147
|
+
const r = raw as AnyObject
|
|
148
|
+
|
|
149
|
+
// duration is in milliseconds — convert to seconds
|
|
150
|
+
const durationMs = Number(r['duration'] ?? r['duration_ms'] ?? 0)
|
|
151
|
+
const duration = durationMs / 1000
|
|
152
|
+
|
|
153
|
+
// start_time is Unix milliseconds (the actual recording start time)
|
|
154
|
+
const startTimeMs = Number(r['start_time'] ?? 0)
|
|
155
|
+
const recordedAt = startTimeMs > 0 ? new Date(startTimeMs).toISOString() : new Date().toISOString()
|
|
156
|
+
|
|
157
|
+
// version_ms is when the record was last synced; edit_time is seconds
|
|
158
|
+
const versionMs = Number(r['version_ms'] ?? 0)
|
|
159
|
+
const editTimeSec = Number(r['edit_time'] ?? 0)
|
|
160
|
+
const updatedAt = versionMs > 0 ? new Date(versionMs).toISOString() : recordedAt
|
|
161
|
+
const createdAt = editTimeSec > 0 ? new Date(editTimeSec * 1000).toISOString() : recordedAt
|
|
162
|
+
|
|
163
|
+
// Determine mime type from the fullname file extension
|
|
164
|
+
const fullname = String(r['fullname'] ?? '')
|
|
165
|
+
const ext = fullname.split('.').pop()?.toLowerCase()
|
|
166
|
+
const mimeType =
|
|
167
|
+
ext === 'ogg' ? 'audio/ogg' :
|
|
168
|
+
ext === 'm4a' ? 'audio/m4a' :
|
|
169
|
+
ext === 'mp3' ? 'audio/mpeg' :
|
|
170
|
+
ext === 'opus' ? 'audio/ogg; codecs=opus' :
|
|
171
|
+
'audio/mp4'
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
id: String(r['id'] ?? ''),
|
|
175
|
+
title: stringOrUndefined(r['filename'] ?? r['name'] ?? r['title']),
|
|
176
|
+
duration,
|
|
177
|
+
recordedAt,
|
|
178
|
+
createdAt,
|
|
179
|
+
updatedAt,
|
|
180
|
+
fileSize: numberOrUndefined(r['filesize'] ?? r['file_size']),
|
|
181
|
+
mimeType,
|
|
182
|
+
hasTranscript: Boolean(r['is_trans'] ?? r['has_transcription'] ?? r['hasNote']),
|
|
183
|
+
transcriptStatus: r['is_trans'] ? 'completed' : undefined,
|
|
184
|
+
language: stringOrUndefined(r['language'] ?? r['lang']),
|
|
185
|
+
deviceId: stringOrUndefined(r['serial_number'] ?? r['device_id'] ?? r['deviceId']),
|
|
186
|
+
tags: arrayOfStrings(r['filetag_id_list'] ?? r['tags']),
|
|
187
|
+
folderId: undefined,
|
|
188
|
+
summary: extractSummaryText(r['ai_content']),
|
|
189
|
+
_raw: r,
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function normalizeTranscript(raw: unknown, recordingId: string): PlaudTranscript {
|
|
194
|
+
const r = raw as AnyObject
|
|
195
|
+
const transResult = r['trans_result']
|
|
196
|
+
const segmentsRaw = Array.isArray(transResult) ? transResult as AnyObject[] : []
|
|
197
|
+
|
|
198
|
+
const segments = segmentsRaw.map((s, i) => ({
|
|
199
|
+
index: i,
|
|
200
|
+
startMs: Number(s['start_time_ms'] ?? s['startMs'] ?? s['startTime'] ?? 0),
|
|
201
|
+
endMs: Number(s['end_time_ms'] ?? s['endMs'] ?? s['endTime'] ?? 0),
|
|
202
|
+
speaker: stringOrUndefined(s['speaker']),
|
|
203
|
+
text: String(s['text'] ?? '').trim(),
|
|
204
|
+
confidence: undefined,
|
|
205
|
+
}))
|
|
206
|
+
|
|
207
|
+
const fullText = segments.map(s => s.text).filter(Boolean).join('\n\n')
|
|
208
|
+
// duration field from POST /file/list is in milliseconds — convert to seconds
|
|
209
|
+
const durationMs = Number(r['duration_ms'] ?? r['duration'] ?? 0)
|
|
210
|
+
const duration = durationMs / 1000
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
recordingId,
|
|
214
|
+
language: stringOrUndefined(r['language'] ?? r['lang']),
|
|
215
|
+
duration,
|
|
216
|
+
segments,
|
|
217
|
+
fullText,
|
|
218
|
+
createdAt: stringOrUndefined(r['created_at'] ?? r['createTime']) ? toIso(r['created_at'] ?? r['createTime']) : undefined,
|
|
219
|
+
_raw: r as Record<string, unknown>,
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function extractTempUrl(raw: unknown): string | null {
|
|
224
|
+
const obj = raw as AnyObject
|
|
225
|
+
return stringOrUndefined(obj?.['temp_url'] ?? obj?.['url'] ?? obj?.['downloadUrl']) ?? null
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function extractSummaryText(aiContent: unknown): string | undefined {
|
|
229
|
+
if (!aiContent || typeof aiContent !== 'object') return undefined
|
|
230
|
+
const obj = aiContent as AnyObject
|
|
231
|
+
// ai_content can have various summary fields
|
|
232
|
+
const text = obj['summary'] ?? obj['text'] ?? obj['content']
|
|
233
|
+
return stringOrUndefined(text)
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ─── Utilities ────────────────────────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
function toIso(val: unknown): string {
|
|
239
|
+
if (typeof val === 'string' && val.length > 0) {
|
|
240
|
+
// Already ISO string
|
|
241
|
+
if (val.includes('T') || val.includes('-')) return new Date(val).toISOString()
|
|
242
|
+
// Unix ms as string
|
|
243
|
+
const n = Number(val)
|
|
244
|
+
if (isFinite(n) && n > 0) {
|
|
245
|
+
return new Date(n > 1e12 ? n : n * 1000).toISOString()
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
if (typeof val === 'number' && val > 0) {
|
|
249
|
+
return new Date(val > 1e12 ? val : val * 1000).toISOString()
|
|
250
|
+
}
|
|
251
|
+
return new Date().toISOString()
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function stringOrUndefined(val: unknown): string | undefined {
|
|
255
|
+
if (typeof val === 'string' && val.length > 0) return val
|
|
256
|
+
return undefined
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function numberOrUndefined(val: unknown): number | undefined {
|
|
260
|
+
const n = Number(val)
|
|
261
|
+
return isFinite(n) && n >= 0 ? n : undefined
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function arrayOfStrings(val: unknown): string[] | undefined {
|
|
265
|
+
if (!Array.isArray(val)) return undefined
|
|
266
|
+
const result = val.filter(v => typeof v === 'string') as string[]
|
|
267
|
+
return result.length > 0 ? result : undefined
|
|
268
|
+
}
|