@alta-foundation/plaud-extractor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +9 -0
- package/.github/workflows/ci.yml +33 -0
- package/.github/workflows/publish.yml +46 -0
- package/CLAUDE.md +53 -0
- package/README.md +318 -0
- package/dist/PlaudExtractor.d.ts +61 -0
- package/dist/PlaudExtractor.d.ts.map +1 -0
- package/dist/PlaudExtractor.js +236 -0
- package/dist/PlaudExtractor.js.map +1 -0
- package/dist/auth/browser-auth.d.ts +10 -0
- package/dist/auth/browser-auth.d.ts.map +1 -0
- package/dist/auth/browser-auth.js +220 -0
- package/dist/auth/browser-auth.js.map +1 -0
- package/dist/auth/token-store.d.ts +9 -0
- package/dist/auth/token-store.d.ts.map +1 -0
- package/dist/auth/token-store.js +74 -0
- package/dist/auth/token-store.js.map +1 -0
- package/dist/auth/types.d.ts +266 -0
- package/dist/auth/types.d.ts.map +1 -0
- package/dist/auth/types.js +32 -0
- package/dist/auth/types.js.map +1 -0
- package/dist/cli/bin.d.ts +3 -0
- package/dist/cli/bin.d.ts.map +1 -0
- package/dist/cli/bin.js +30 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/commands/auth.d.ts +3 -0
- package/dist/cli/commands/auth.d.ts.map +1 -0
- package/dist/cli/commands/auth.js +22 -0
- package/dist/cli/commands/auth.js.map +1 -0
- package/dist/cli/commands/backfill.d.ts +3 -0
- package/dist/cli/commands/backfill.d.ts.map +1 -0
- package/dist/cli/commands/backfill.js +59 -0
- package/dist/cli/commands/backfill.js.map +1 -0
- package/dist/cli/commands/sync.d.ts +3 -0
- package/dist/cli/commands/sync.d.ts.map +1 -0
- package/dist/cli/commands/sync.js +55 -0
- package/dist/cli/commands/sync.js.map +1 -0
- package/dist/cli/commands/verify.d.ts +3 -0
- package/dist/cli/commands/verify.d.ts.map +1 -0
- package/dist/cli/commands/verify.js +28 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/exit-codes.d.ts +8 -0
- package/dist/cli/exit-codes.d.ts.map +1 -0
- package/dist/cli/exit-codes.js +16 -0
- package/dist/cli/exit-codes.js.map +1 -0
- package/dist/cli/options.d.ts +31 -0
- package/dist/cli/options.d.ts.map +1 -0
- package/dist/cli/options.js +11 -0
- package/dist/cli/options.js.map +1 -0
- package/dist/client/endpoints.d.ts +26 -0
- package/dist/client/endpoints.d.ts.map +1 -0
- package/dist/client/endpoints.js +54 -0
- package/dist/client/endpoints.js.map +1 -0
- package/dist/client/http.d.ts +17 -0
- package/dist/client/http.d.ts.map +1 -0
- package/dist/client/http.js +92 -0
- package/dist/client/http.js.map +1 -0
- package/dist/client/plaud-client.d.ts +14 -0
- package/dist/client/plaud-client.d.ts.map +1 -0
- package/dist/client/plaud-client.js +216 -0
- package/dist/client/plaud-client.js.map +1 -0
- package/dist/client/types.d.ts +154 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +41 -0
- package/dist/client/types.js.map +1 -0
- package/dist/errors.d.ts +24 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +51 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +9 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +37 -0
- package/dist/logger.js.map +1 -0
- package/dist/mcp/job-tools.d.ts +3 -0
- package/dist/mcp/job-tools.d.ts.map +1 -0
- package/dist/mcp/job-tools.js +108 -0
- package/dist/mcp/job-tools.js.map +1 -0
- package/dist/mcp/read-tools.d.ts +3 -0
- package/dist/mcp/read-tools.d.ts.map +1 -0
- package/dist/mcp/read-tools.js +173 -0
- package/dist/mcp/read-tools.js.map +1 -0
- package/dist/mcp/server.d.ts +3 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +32 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/storage/atomic.d.ts +5 -0
- package/dist/storage/atomic.d.ts.map +1 -0
- package/dist/storage/atomic.js +51 -0
- package/dist/storage/atomic.js.map +1 -0
- package/dist/storage/checksums.d.ts +15 -0
- package/dist/storage/checksums.d.ts.map +1 -0
- package/dist/storage/checksums.js +56 -0
- package/dist/storage/checksums.js.map +1 -0
- package/dist/storage/dataset-writer.d.ts +21 -0
- package/dist/storage/dataset-writer.d.ts.map +1 -0
- package/dist/storage/dataset-writer.js +52 -0
- package/dist/storage/dataset-writer.js.map +1 -0
- package/dist/storage/paths.d.ts +9 -0
- package/dist/storage/paths.d.ts.map +1 -0
- package/dist/storage/paths.js +38 -0
- package/dist/storage/paths.js.map +1 -0
- package/dist/storage/recording-store.d.ts +24 -0
- package/dist/storage/recording-store.d.ts.map +1 -0
- package/dist/storage/recording-store.js +161 -0
- package/dist/storage/recording-store.js.map +1 -0
- package/dist/sync/download-queue.d.ts +21 -0
- package/dist/sync/download-queue.d.ts.map +1 -0
- package/dist/sync/download-queue.js +82 -0
- package/dist/sync/download-queue.js.map +1 -0
- package/dist/sync/incremental.d.ts +21 -0
- package/dist/sync/incremental.d.ts.map +1 -0
- package/dist/sync/incremental.js +96 -0
- package/dist/sync/incremental.js.map +1 -0
- package/dist/sync/sync-engine.d.ts +6 -0
- package/dist/sync/sync-engine.d.ts.map +1 -0
- package/dist/sync/sync-engine.js +135 -0
- package/dist/sync/sync-engine.js.map +1 -0
- package/dist/sync/types.d.ts +130 -0
- package/dist/sync/types.d.ts.map +1 -0
- package/dist/sync/types.js +17 -0
- package/dist/sync/types.js.map +1 -0
- package/dist/transcript/formatter.d.ts +4 -0
- package/dist/transcript/formatter.d.ts.map +1 -0
- package/dist/transcript/formatter.js +88 -0
- package/dist/transcript/formatter.js.map +1 -0
- package/package.json +41 -0
- package/src/PlaudExtractor.ts +275 -0
- package/src/auth/browser-auth.ts +248 -0
- package/src/auth/token-store.ts +79 -0
- package/src/auth/types.ts +41 -0
- package/src/cli/bin.ts +30 -0
- package/src/cli/commands/auth.ts +27 -0
- package/src/cli/commands/backfill.ts +77 -0
- package/src/cli/commands/sync.ts +71 -0
- package/src/cli/commands/verify.ts +31 -0
- package/src/cli/exit-codes.ts +14 -0
- package/src/cli/options.ts +10 -0
- package/src/client/endpoints.ts +62 -0
- package/src/client/http.ts +110 -0
- package/src/client/plaud-client.ts +268 -0
- package/src/client/types.ts +62 -0
- package/src/errors.ts +57 -0
- package/src/index.ts +17 -0
- package/src/logger.ts +49 -0
- package/src/mcp/job-tools.ts +156 -0
- package/src/mcp/read-tools.ts +204 -0
- package/src/mcp/server.ts +39 -0
- package/src/storage/atomic.ts +51 -0
- package/src/storage/checksums.ts +76 -0
- package/src/storage/dataset-writer.ts +74 -0
- package/src/storage/paths.ts +44 -0
- package/src/storage/recording-store.ts +182 -0
- package/src/sync/download-queue.ts +102 -0
- package/src/sync/incremental.ts +111 -0
- package/src/sync/sync-engine.ts +183 -0
- package/src/sync/types.ts +64 -0
- package/src/transcript/formatter.ts +91 -0
- package/tsconfig.build.json +8 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import crypto from 'node:crypto'
|
|
2
|
+
import fs from 'node:fs/promises'
|
|
3
|
+
import path from 'node:path'
|
|
4
|
+
import { writeFileAtomic } from './atomic.js'
|
|
5
|
+
import { ChecksumMismatchError } from '../errors.js'
|
|
6
|
+
|
|
7
|
+
export interface FileChecksum {
|
|
8
|
+
sha256: string
|
|
9
|
+
sizeBytes: number
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface ChecksumManifest {
|
|
13
|
+
schemaVersion: 1
|
|
14
|
+
recordingId: string
|
|
15
|
+
computedAt: string
|
|
16
|
+
files: Record<string, FileChecksum>
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function sha256File(filePath: string): Promise<string> {
|
|
20
|
+
const data = await fs.readFile(filePath)
|
|
21
|
+
return crypto.createHash('sha256').update(data).digest('hex')
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function writeChecksumManifest(
|
|
25
|
+
dir: string,
|
|
26
|
+
recordingId: string,
|
|
27
|
+
): Promise<ChecksumManifest> {
|
|
28
|
+
const files = await fs.readdir(dir)
|
|
29
|
+
const manifest: ChecksumManifest = {
|
|
30
|
+
schemaVersion: 1,
|
|
31
|
+
recordingId,
|
|
32
|
+
computedAt: new Date().toISOString(),
|
|
33
|
+
files: {},
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
for (const file of files.filter(f => f !== 'checksums.json')) {
|
|
37
|
+
const fPath = path.join(dir, file)
|
|
38
|
+
const stat = await fs.stat(fPath)
|
|
39
|
+
if (!stat.isFile()) continue
|
|
40
|
+
manifest.files[file] = {
|
|
41
|
+
sha256: await sha256File(fPath),
|
|
42
|
+
sizeBytes: stat.size,
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
await writeFileAtomic(path.join(dir, 'checksums.json'), JSON.stringify(manifest, null, 2))
|
|
47
|
+
return manifest
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export async function verifyChecksums(dir: string): Promise<ChecksumMismatchError[]> {
|
|
51
|
+
const manifestPath = path.join(dir, 'checksums.json')
|
|
52
|
+
let manifest: ChecksumManifest
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const raw = await fs.readFile(manifestPath, 'utf8')
|
|
56
|
+
manifest = JSON.parse(raw) as ChecksumManifest
|
|
57
|
+
} catch {
|
|
58
|
+
return [] // No manifest yet — skip verification
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const mismatches: ChecksumMismatchError[] = []
|
|
62
|
+
|
|
63
|
+
for (const [file, expected] of Object.entries(manifest.files)) {
|
|
64
|
+
const fPath = path.join(dir, file)
|
|
65
|
+
try {
|
|
66
|
+
const actual = await sha256File(fPath)
|
|
67
|
+
if (actual !== expected.sha256) {
|
|
68
|
+
mismatches.push(new ChecksumMismatchError(fPath, expected.sha256, actual))
|
|
69
|
+
}
|
|
70
|
+
} catch {
|
|
71
|
+
mismatches.push(new ChecksumMismatchError(fPath, expected.sha256, 'MISSING'))
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return mismatches
|
|
76
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import fs from 'node:fs/promises'
|
|
2
|
+
import { createWriteStream, type WriteStream } from 'node:fs'
|
|
3
|
+
import path from 'node:path'
|
|
4
|
+
import { datasetPath, recordingDir } from './paths.js'
|
|
5
|
+
import type { PlaudRecording, PlaudTranscript } from '../client/types.js'
|
|
6
|
+
import { StorageError } from '../errors.js'
|
|
7
|
+
|
|
8
|
+
export interface DatasetEntry {
|
|
9
|
+
id: string
|
|
10
|
+
title: string | null
|
|
11
|
+
recorded_at: string
|
|
12
|
+
duration_seconds: number
|
|
13
|
+
language: string | null
|
|
14
|
+
text: string
|
|
15
|
+
path: string
|
|
16
|
+
segment_count: number
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class DatasetWriter {
|
|
20
|
+
private readonly filePath: string
|
|
21
|
+
private stream: WriteStream | null = null
|
|
22
|
+
|
|
23
|
+
constructor(outDir: string) {
|
|
24
|
+
this.filePath = datasetPath(outDir)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async open(): Promise<void> {
|
|
28
|
+
await fs.mkdir(path.dirname(this.filePath), { recursive: true })
|
|
29
|
+
this.stream = createWriteStream(this.filePath, { flags: 'a', encoding: 'utf8' })
|
|
30
|
+
await new Promise<void>((resolve, reject) => {
|
|
31
|
+
this.stream!.once('open', () => resolve())
|
|
32
|
+
this.stream!.once('error', reject)
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async append(outDir: string, recording: PlaudRecording, transcript: PlaudTranscript): Promise<void> {
|
|
37
|
+
if (!this.stream) throw new StorageError('DatasetWriter not opened', this.filePath)
|
|
38
|
+
|
|
39
|
+
const relPath = path.relative(
|
|
40
|
+
outDir,
|
|
41
|
+
path.join(recordingDir(outDir, recording.recordedAt, recording.id), 'transcript.txt'),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
const entry: DatasetEntry = {
|
|
45
|
+
id: `plaud:${recording.id}`,
|
|
46
|
+
title: recording.title ?? null,
|
|
47
|
+
recorded_at: recording.recordedAt,
|
|
48
|
+
duration_seconds: recording.duration,
|
|
49
|
+
language: recording.language ?? null,
|
|
50
|
+
text: transcript.fullText,
|
|
51
|
+
path: relPath,
|
|
52
|
+
segment_count: transcript.segments.length,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const line = JSON.stringify(entry) + '\n'
|
|
56
|
+
|
|
57
|
+
await new Promise<void>((resolve, reject) => {
|
|
58
|
+
this.stream!.write(line, err => (err ? reject(err) : resolve()))
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async close(): Promise<void> {
|
|
63
|
+
if (!this.stream) return
|
|
64
|
+
await new Promise<void>((resolve, reject) => {
|
|
65
|
+
this.stream!.end(() => resolve())
|
|
66
|
+
this.stream!.once('error', reject)
|
|
67
|
+
})
|
|
68
|
+
this.stream = null
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get path(): string {
|
|
72
|
+
return this.filePath
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import path from 'node:path'
|
|
2
|
+
import os from 'node:os'
|
|
3
|
+
|
|
4
|
+
export function defaultOutDir(): string {
|
|
5
|
+
const env = process.env['ALTA_DATA_DIR']
|
|
6
|
+
if (env) return path.resolve(env)
|
|
7
|
+
return path.join(os.homedir(), 'alta', 'data', 'plaud')
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/** plaud/recordings/2026/02/2026-02-24T083012Z__plaud_<id>/ */
|
|
11
|
+
export function recordingDir(outDir: string, recordedAt: string, recordingId: string): string {
|
|
12
|
+
const dt = new Date(recordedAt)
|
|
13
|
+
const year = String(dt.getUTCFullYear())
|
|
14
|
+
const month = String(dt.getUTCMonth() + 1).padStart(2, '0')
|
|
15
|
+
const timestamp = formatISOCompact(dt)
|
|
16
|
+
const dirName = `${timestamp}__plaud_${recordingId}`
|
|
17
|
+
return path.join(outDir, 'recordings', year, month, dirName)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** Format: 2026-02-24T083012Z */
|
|
21
|
+
function formatISOCompact(dt: Date): string {
|
|
22
|
+
const iso = dt.toISOString() // 2026-02-24T08:30:12.000Z
|
|
23
|
+
return iso.replace(/[-:]/g, '').replace(/\.\d{3}/, '').replace('T', 'T').slice(0, 16) + 'Z'
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function stateDir(outDir: string): string {
|
|
27
|
+
return path.join(outDir, '_state')
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function syncStatePath(outDir: string): string {
|
|
31
|
+
return path.join(stateDir(outDir), 'sync_state.json')
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function runLogsPath(outDir: string): string {
|
|
35
|
+
return path.join(stateDir(outDir), 'run_logs.ndjson')
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function datasetPath(outDir: string): string {
|
|
39
|
+
return path.join(outDir, 'datasets', 'plaud_transcripts.jsonl')
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function authTokenPath(): string {
|
|
43
|
+
return path.join(os.homedir(), '.alta', 'plaud-auth.json')
|
|
44
|
+
}
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import fs from 'node:fs/promises'
|
|
2
|
+
import path from 'node:path'
|
|
3
|
+
import { writeFileAtomic, writeStreamAtomic } from './atomic.js'
|
|
4
|
+
import { writeChecksumManifest, verifyChecksums } from './checksums.js'
|
|
5
|
+
import { toPlainText, toMarkdown } from '../transcript/formatter.js'
|
|
6
|
+
import { recordingDir } from './paths.js'
|
|
7
|
+
import type { PlaudRecording, PlaudTranscript } from '../client/types.js'
|
|
8
|
+
import type { ChecksumMismatchError } from '../errors.js'
|
|
9
|
+
import type { HttpClient } from '../client/http.js'
|
|
10
|
+
import { getLogger } from '../logger.js'
|
|
11
|
+
|
|
12
|
+
export type TranscriptFormat = 'json' | 'txt' | 'md'
|
|
13
|
+
|
|
14
|
+
export interface RecordingWriteResult {
|
|
15
|
+
dir: string
|
|
16
|
+
hasAudio: boolean
|
|
17
|
+
hasTranscript: boolean
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class RecordingStore {
|
|
21
|
+
constructor(private readonly outDir: string) {}
|
|
22
|
+
|
|
23
|
+
recordingDir(recording: PlaudRecording): string {
|
|
24
|
+
return recordingDir(this.outDir, recording.recordedAt, recording.id)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async writeMetadata(recording: PlaudRecording): Promise<string> {
|
|
28
|
+
const dir = this.recordingDir(recording)
|
|
29
|
+
const meta = buildMetaJson(recording)
|
|
30
|
+
await writeFileAtomic(path.join(dir, 'meta.json'), JSON.stringify(meta, null, 2))
|
|
31
|
+
getLogger().debug({ recordingId: recording.id, dir }, 'Wrote meta.json')
|
|
32
|
+
return dir
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async writeTranscript(
|
|
36
|
+
recording: PlaudRecording,
|
|
37
|
+
transcript: PlaudTranscript,
|
|
38
|
+
formats: TranscriptFormat[] = ['json', 'txt', 'md'],
|
|
39
|
+
): Promise<void> {
|
|
40
|
+
const dir = this.recordingDir(recording)
|
|
41
|
+
|
|
42
|
+
if (formats.includes('json')) {
|
|
43
|
+
const json = {
|
|
44
|
+
recordingId: transcript.recordingId,
|
|
45
|
+
language: transcript.language,
|
|
46
|
+
duration: transcript.duration,
|
|
47
|
+
segments: transcript.segments,
|
|
48
|
+
}
|
|
49
|
+
await writeFileAtomic(path.join(dir, 'transcript.json'), JSON.stringify(json, null, 2))
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (formats.includes('txt')) {
|
|
53
|
+
const txt = toPlainText(transcript)
|
|
54
|
+
await writeFileAtomic(path.join(dir, 'transcript.txt'), txt)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (formats.includes('md')) {
|
|
58
|
+
const md = toMarkdown(transcript, recording)
|
|
59
|
+
await writeFileAtomic(path.join(dir, 'transcript.md'), md)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
getLogger().debug({ recordingId: recording.id, formats }, 'Wrote transcript files')
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async writeAudio(recording: PlaudRecording, http: HttpClient): Promise<boolean> {
|
|
66
|
+
const url = await this.getAudioUrl(recording, http)
|
|
67
|
+
if (!url) return false
|
|
68
|
+
|
|
69
|
+
const dir = this.recordingDir(recording)
|
|
70
|
+
const ext = guessAudioExtension(recording.mimeType)
|
|
71
|
+
const destPath = path.join(dir, `audio.${ext}`)
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
const stream = await http.getStream(url)
|
|
75
|
+
await writeStreamAtomic(destPath, stream)
|
|
76
|
+
getLogger().debug({ recordingId: recording.id, path: destPath }, 'Wrote audio file')
|
|
77
|
+
return true
|
|
78
|
+
} catch (err) {
|
|
79
|
+
getLogger().warn({ recordingId: recording.id, err }, 'Failed to download audio')
|
|
80
|
+
return false
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
private async getAudioUrl(recording: PlaudRecording, http: HttpClient): Promise<string | null> {
|
|
85
|
+
// This is a stub — the PlaudApiClient resolves the actual URL
|
|
86
|
+
// RecordingStore receives the already-resolved URL via writeAudioFromUrl
|
|
87
|
+
return null
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async writeAudioFromUrl(recording: PlaudRecording, url: string, http: HttpClient): Promise<boolean> {
|
|
91
|
+
const dir = this.recordingDir(recording)
|
|
92
|
+
const ext = guessAudioExtension(recording.mimeType)
|
|
93
|
+
const destPath = path.join(dir, `audio.${ext}`)
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
// S3 presigned URLs must be fetched without Plaud auth headers
|
|
97
|
+
const stream = await http.downloadExternalUrl(url)
|
|
98
|
+
await writeStreamAtomic(destPath, stream)
|
|
99
|
+
getLogger().debug({ recordingId: recording.id, path: destPath }, 'Wrote audio file')
|
|
100
|
+
return true
|
|
101
|
+
} catch (err) {
|
|
102
|
+
getLogger().warn({ recordingId: recording.id, err }, 'Failed to download audio')
|
|
103
|
+
return false
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async writeChecksums(recording: PlaudRecording): Promise<void> {
|
|
108
|
+
const dir = this.recordingDir(recording)
|
|
109
|
+
await writeChecksumManifest(dir, recording.id)
|
|
110
|
+
getLogger().debug({ recordingId: recording.id }, 'Wrote checksums.json')
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async verify(recording: PlaudRecording): Promise<ChecksumMismatchError[]> {
|
|
114
|
+
const dir = this.recordingDir(recording)
|
|
115
|
+
return verifyChecksums(dir)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async exists(recording: PlaudRecording): Promise<boolean> {
|
|
119
|
+
const dir = this.recordingDir(recording)
|
|
120
|
+
try {
|
|
121
|
+
await fs.access(dir)
|
|
122
|
+
return true
|
|
123
|
+
} catch {
|
|
124
|
+
return false
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
async hasMissingFiles(recording: PlaudRecording): Promise<boolean> {
|
|
129
|
+
const dir = this.recordingDir(recording)
|
|
130
|
+
const required = ['meta.json']
|
|
131
|
+
for (const file of required) {
|
|
132
|
+
try {
|
|
133
|
+
await fs.access(path.join(dir, file))
|
|
134
|
+
} catch {
|
|
135
|
+
return true
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return false
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function buildMetaJson(recording: PlaudRecording): object {
|
|
143
|
+
return {
|
|
144
|
+
source: 'plaud',
|
|
145
|
+
source_recording_id: recording.id,
|
|
146
|
+
recorded_at: recording.recordedAt,
|
|
147
|
+
imported_at: new Date().toISOString(),
|
|
148
|
+
title: recording.title,
|
|
149
|
+
duration_seconds: recording.duration,
|
|
150
|
+
language: recording.language,
|
|
151
|
+
audio: recording.fileSize
|
|
152
|
+
? {
|
|
153
|
+
filename: `audio.${guessAudioExtension(recording.mimeType)}`,
|
|
154
|
+
mime: recording.mimeType,
|
|
155
|
+
bytes: recording.fileSize,
|
|
156
|
+
}
|
|
157
|
+
: null,
|
|
158
|
+
transcript: {
|
|
159
|
+
has_timestamps: true, // will be updated after writing
|
|
160
|
+
format: 'segments',
|
|
161
|
+
filename_json: 'transcript.json',
|
|
162
|
+
filename_txt: 'transcript.txt',
|
|
163
|
+
filename_md: 'transcript.md',
|
|
164
|
+
},
|
|
165
|
+
integrity: {
|
|
166
|
+
dedupe_key: `plaud:${recording.id}`,
|
|
167
|
+
},
|
|
168
|
+
tags: recording.tags,
|
|
169
|
+
folder_id: recording.folderId,
|
|
170
|
+
device_id: recording.deviceId,
|
|
171
|
+
summary: recording.summary,
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function guessAudioExtension(mimeType: string): string {
|
|
176
|
+
if (mimeType.includes('m4a') || mimeType.includes('mp4')) return 'm4a'
|
|
177
|
+
if (mimeType.includes('wav')) return 'wav'
|
|
178
|
+
if (mimeType.includes('mp3')) return 'mp3'
|
|
179
|
+
if (mimeType.includes('ogg')) return 'ogg'
|
|
180
|
+
if (mimeType.includes('webm')) return 'webm'
|
|
181
|
+
return 'm4a' // default
|
|
182
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { ApiError } from '../errors.js'
|
|
2
|
+
import { getLogger } from '../logger.js'
|
|
3
|
+
|
|
4
|
+
export interface QueueResult<T> {
|
|
5
|
+
succeeded: T[]
|
|
6
|
+
failed: Array<{ item: T; error: Error }>
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Process items with bounded concurrency.
|
|
11
|
+
* Uses a simple semaphore — no extra dependencies.
|
|
12
|
+
*/
|
|
13
|
+
export async function processQueue<T>(
|
|
14
|
+
items: T[],
|
|
15
|
+
processor: (item: T) => Promise<void>,
|
|
16
|
+
concurrency: number,
|
|
17
|
+
): Promise<QueueResult<T>> {
|
|
18
|
+
const succeeded: T[] = []
|
|
19
|
+
const failed: Array<{ item: T; error: Error }> = []
|
|
20
|
+
|
|
21
|
+
let index = 0
|
|
22
|
+
let active = 0
|
|
23
|
+
let resolve: (() => void) | null = null
|
|
24
|
+
|
|
25
|
+
const tick = async (): Promise<void> => {
|
|
26
|
+
while (active < concurrency && index < items.length) {
|
|
27
|
+
const item = items[index++]!
|
|
28
|
+
active++
|
|
29
|
+
|
|
30
|
+
;(async () => {
|
|
31
|
+
try {
|
|
32
|
+
await processor(item)
|
|
33
|
+
succeeded.push(item)
|
|
34
|
+
} catch (err) {
|
|
35
|
+
failed.push({ item, error: err instanceof Error ? err : new Error(String(err)) })
|
|
36
|
+
} finally {
|
|
37
|
+
active--
|
|
38
|
+
if (resolve) {
|
|
39
|
+
const r = resolve
|
|
40
|
+
resolve = null
|
|
41
|
+
r()
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
})().catch(() => undefined)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
await tick()
|
|
49
|
+
|
|
50
|
+
while (active > 0 || index < items.length) {
|
|
51
|
+
await new Promise<void>(r => { resolve = r })
|
|
52
|
+
await tick()
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return { succeeded, failed }
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Retry a function with exponential backoff.
|
|
60
|
+
* Only retries if the error is retryable (isRetryable === true for ApiError).
|
|
61
|
+
*/
|
|
62
|
+
export async function retryWithBackoff<T>(
|
|
63
|
+
fn: () => Promise<T>,
|
|
64
|
+
opts: { maxAttempts?: number; label?: string } = {},
|
|
65
|
+
): Promise<T> {
|
|
66
|
+
const maxAttempts = opts.maxAttempts ?? 4
|
|
67
|
+
const delays = [0, 1000, 4000, 16000]
|
|
68
|
+
const log = getLogger()
|
|
69
|
+
|
|
70
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
71
|
+
try {
|
|
72
|
+
return await fn()
|
|
73
|
+
} catch (err) {
|
|
74
|
+
const isLast = attempt === maxAttempts
|
|
75
|
+
const retryable = isRetryableError(err)
|
|
76
|
+
|
|
77
|
+
if (isLast || !retryable) {
|
|
78
|
+
if (attempt > 1) {
|
|
79
|
+
log.warn({ attempt, label: opts.label, err }, 'Giving up after retries')
|
|
80
|
+
}
|
|
81
|
+
throw err
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const delay = delays[attempt] ?? 16000
|
|
85
|
+
log.debug({ attempt, delay, label: opts.label }, 'Retrying after delay')
|
|
86
|
+
await sleep(delay)
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Unreachable — TypeScript needs this
|
|
91
|
+
throw new Error('Retry loop exhausted')
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function isRetryableError(err: unknown): boolean {
|
|
95
|
+
if (err instanceof ApiError) return err.isRetryable
|
|
96
|
+
if (err instanceof Error && err.message.includes('429')) return true
|
|
97
|
+
return false
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function sleep(ms: number): Promise<void> {
|
|
101
|
+
return new Promise(resolve => setTimeout(resolve, ms))
|
|
102
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import fs from 'node:fs/promises'
|
|
2
|
+
import crypto from 'node:crypto'
|
|
3
|
+
import { syncStatePath } from '../storage/paths.js'
|
|
4
|
+
import { writeFileAtomic } from '../storage/atomic.js'
|
|
5
|
+
import { SyncStateSchema, type SyncState, type RecordingState } from './types.js'
|
|
6
|
+
import type { PlaudRecording } from '../client/types.js'
|
|
7
|
+
import { getLogger } from '../logger.js'
|
|
8
|
+
|
|
9
|
+
export class IncrementalTracker {
|
|
10
|
+
private state: SyncState = {
|
|
11
|
+
schemaVersion: 1,
|
|
12
|
+
recordings: {},
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async load(outDir: string): Promise<void> {
|
|
16
|
+
const filePath = syncStatePath(outDir)
|
|
17
|
+
try {
|
|
18
|
+
const raw = await fs.readFile(filePath, 'utf8')
|
|
19
|
+
const json = JSON.parse(raw)
|
|
20
|
+
const result = SyncStateSchema.safeParse(json)
|
|
21
|
+
if (result.success) {
|
|
22
|
+
this.state = result.data
|
|
23
|
+
getLogger().debug(
|
|
24
|
+
{ recordingCount: Object.keys(this.state.recordings).length },
|
|
25
|
+
'Loaded sync state',
|
|
26
|
+
)
|
|
27
|
+
} else {
|
|
28
|
+
getLogger().warn({ issues: result.error.issues }, 'Sync state schema invalid — starting fresh')
|
|
29
|
+
}
|
|
30
|
+
} catch (err: unknown) {
|
|
31
|
+
if ((err as NodeJS.ErrnoException).code !== 'ENOENT') {
|
|
32
|
+
getLogger().warn({ err }, 'Failed to read sync state — starting fresh')
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async persist(outDir: string): Promise<void> {
|
|
38
|
+
this.state.lastAttemptAt = new Date().toISOString()
|
|
39
|
+
await writeFileAtomic(syncStatePath(outDir), JSON.stringify(this.state, null, 2))
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
markSuccessfulSync(): void {
|
|
43
|
+
this.state.lastSuccessfulSyncAt = new Date().toISOString()
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
getSince(): Date | undefined {
|
|
47
|
+
if (!this.state.lastSuccessfulSyncAt) return undefined
|
|
48
|
+
return new Date(this.state.lastSuccessfulSyncAt)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
needsDownload(recording: PlaudRecording): boolean {
|
|
52
|
+
const existing = this.state.recordings[recording.id]
|
|
53
|
+
if (!existing) return true
|
|
54
|
+
|
|
55
|
+
const newHash = this.computeContentHash(recording)
|
|
56
|
+
if (existing.contentHash !== newHash) return true
|
|
57
|
+
|
|
58
|
+
// Re-download if key files are missing
|
|
59
|
+
if (!existing.hasTranscript && recording.hasTranscript) return true
|
|
60
|
+
if (!existing.downloadedAt) return true
|
|
61
|
+
|
|
62
|
+
return false
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
computeContentHash(recording: PlaudRecording): string {
|
|
66
|
+
const key = JSON.stringify({
|
|
67
|
+
id: recording.id,
|
|
68
|
+
updatedAt: recording.updatedAt,
|
|
69
|
+
hasTranscript: recording.hasTranscript,
|
|
70
|
+
transcriptStatus: recording.transcriptStatus,
|
|
71
|
+
duration: recording.duration,
|
|
72
|
+
title: recording.title,
|
|
73
|
+
})
|
|
74
|
+
return crypto.createHash('sha256').update(key).digest('hex').slice(0, 16)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
markComplete(
|
|
78
|
+
recordingId: string,
|
|
79
|
+
recordedAt: string,
|
|
80
|
+
opts: { hasAudio: boolean; hasTranscript: boolean; contentHash: string },
|
|
81
|
+
): void {
|
|
82
|
+
this.state.recordings[recordingId] = {
|
|
83
|
+
recordedAt,
|
|
84
|
+
contentHash: opts.contentHash,
|
|
85
|
+
downloadedAt: new Date().toISOString(),
|
|
86
|
+
hasAudio: opts.hasAudio,
|
|
87
|
+
hasTranscript: opts.hasTranscript,
|
|
88
|
+
verified: false,
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
markVerified(recordingId: string): void {
|
|
93
|
+
const existing = this.state.recordings[recordingId]
|
|
94
|
+
if (existing) {
|
|
95
|
+
existing.verified = true
|
|
96
|
+
existing.verifiedAt = new Date().toISOString()
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
getRecordingState(recordingId: string): RecordingState | undefined {
|
|
101
|
+
return this.state.recordings[recordingId]
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
getAllRecordingIds(): string[] {
|
|
105
|
+
return Object.keys(this.state.recordings)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
get lastSuccessfulSyncAt(): string | undefined {
|
|
109
|
+
return this.state.lastSuccessfulSyncAt
|
|
110
|
+
}
|
|
111
|
+
}
|