@alta-foundation/plaud-extractor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +9 -0
- package/.github/workflows/ci.yml +33 -0
- package/.github/workflows/publish.yml +46 -0
- package/CLAUDE.md +53 -0
- package/README.md +318 -0
- package/dist/PlaudExtractor.d.ts +61 -0
- package/dist/PlaudExtractor.d.ts.map +1 -0
- package/dist/PlaudExtractor.js +236 -0
- package/dist/PlaudExtractor.js.map +1 -0
- package/dist/auth/browser-auth.d.ts +10 -0
- package/dist/auth/browser-auth.d.ts.map +1 -0
- package/dist/auth/browser-auth.js +220 -0
- package/dist/auth/browser-auth.js.map +1 -0
- package/dist/auth/token-store.d.ts +9 -0
- package/dist/auth/token-store.d.ts.map +1 -0
- package/dist/auth/token-store.js +74 -0
- package/dist/auth/token-store.js.map +1 -0
- package/dist/auth/types.d.ts +266 -0
- package/dist/auth/types.d.ts.map +1 -0
- package/dist/auth/types.js +32 -0
- package/dist/auth/types.js.map +1 -0
- package/dist/cli/bin.d.ts +3 -0
- package/dist/cli/bin.d.ts.map +1 -0
- package/dist/cli/bin.js +30 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/commands/auth.d.ts +3 -0
- package/dist/cli/commands/auth.d.ts.map +1 -0
- package/dist/cli/commands/auth.js +22 -0
- package/dist/cli/commands/auth.js.map +1 -0
- package/dist/cli/commands/backfill.d.ts +3 -0
- package/dist/cli/commands/backfill.d.ts.map +1 -0
- package/dist/cli/commands/backfill.js +59 -0
- package/dist/cli/commands/backfill.js.map +1 -0
- package/dist/cli/commands/sync.d.ts +3 -0
- package/dist/cli/commands/sync.d.ts.map +1 -0
- package/dist/cli/commands/sync.js +55 -0
- package/dist/cli/commands/sync.js.map +1 -0
- package/dist/cli/commands/verify.d.ts +3 -0
- package/dist/cli/commands/verify.d.ts.map +1 -0
- package/dist/cli/commands/verify.js +28 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/exit-codes.d.ts +8 -0
- package/dist/cli/exit-codes.d.ts.map +1 -0
- package/dist/cli/exit-codes.js +16 -0
- package/dist/cli/exit-codes.js.map +1 -0
- package/dist/cli/options.d.ts +31 -0
- package/dist/cli/options.d.ts.map +1 -0
- package/dist/cli/options.js +11 -0
- package/dist/cli/options.js.map +1 -0
- package/dist/client/endpoints.d.ts +26 -0
- package/dist/client/endpoints.d.ts.map +1 -0
- package/dist/client/endpoints.js +54 -0
- package/dist/client/endpoints.js.map +1 -0
- package/dist/client/http.d.ts +17 -0
- package/dist/client/http.d.ts.map +1 -0
- package/dist/client/http.js +92 -0
- package/dist/client/http.js.map +1 -0
- package/dist/client/plaud-client.d.ts +14 -0
- package/dist/client/plaud-client.d.ts.map +1 -0
- package/dist/client/plaud-client.js +216 -0
- package/dist/client/plaud-client.js.map +1 -0
- package/dist/client/types.d.ts +154 -0
- package/dist/client/types.d.ts.map +1 -0
- package/dist/client/types.js +41 -0
- package/dist/client/types.js.map +1 -0
- package/dist/errors.d.ts +24 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +51 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +9 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +37 -0
- package/dist/logger.js.map +1 -0
- package/dist/mcp/job-tools.d.ts +3 -0
- package/dist/mcp/job-tools.d.ts.map +1 -0
- package/dist/mcp/job-tools.js +108 -0
- package/dist/mcp/job-tools.js.map +1 -0
- package/dist/mcp/read-tools.d.ts +3 -0
- package/dist/mcp/read-tools.d.ts.map +1 -0
- package/dist/mcp/read-tools.js +173 -0
- package/dist/mcp/read-tools.js.map +1 -0
- package/dist/mcp/server.d.ts +3 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +32 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/storage/atomic.d.ts +5 -0
- package/dist/storage/atomic.d.ts.map +1 -0
- package/dist/storage/atomic.js +51 -0
- package/dist/storage/atomic.js.map +1 -0
- package/dist/storage/checksums.d.ts +15 -0
- package/dist/storage/checksums.d.ts.map +1 -0
- package/dist/storage/checksums.js +56 -0
- package/dist/storage/checksums.js.map +1 -0
- package/dist/storage/dataset-writer.d.ts +21 -0
- package/dist/storage/dataset-writer.d.ts.map +1 -0
- package/dist/storage/dataset-writer.js +52 -0
- package/dist/storage/dataset-writer.js.map +1 -0
- package/dist/storage/paths.d.ts +9 -0
- package/dist/storage/paths.d.ts.map +1 -0
- package/dist/storage/paths.js +38 -0
- package/dist/storage/paths.js.map +1 -0
- package/dist/storage/recording-store.d.ts +24 -0
- package/dist/storage/recording-store.d.ts.map +1 -0
- package/dist/storage/recording-store.js +161 -0
- package/dist/storage/recording-store.js.map +1 -0
- package/dist/sync/download-queue.d.ts +21 -0
- package/dist/sync/download-queue.d.ts.map +1 -0
- package/dist/sync/download-queue.js +82 -0
- package/dist/sync/download-queue.js.map +1 -0
- package/dist/sync/incremental.d.ts +21 -0
- package/dist/sync/incremental.d.ts.map +1 -0
- package/dist/sync/incremental.js +96 -0
- package/dist/sync/incremental.js.map +1 -0
- package/dist/sync/sync-engine.d.ts +6 -0
- package/dist/sync/sync-engine.d.ts.map +1 -0
- package/dist/sync/sync-engine.js +135 -0
- package/dist/sync/sync-engine.js.map +1 -0
- package/dist/sync/types.d.ts +130 -0
- package/dist/sync/types.d.ts.map +1 -0
- package/dist/sync/types.js +17 -0
- package/dist/sync/types.js.map +1 -0
- package/dist/transcript/formatter.d.ts +4 -0
- package/dist/transcript/formatter.d.ts.map +1 -0
- package/dist/transcript/formatter.js +88 -0
- package/dist/transcript/formatter.js.map +1 -0
- package/package.json +41 -0
- package/src/PlaudExtractor.ts +275 -0
- package/src/auth/browser-auth.ts +248 -0
- package/src/auth/token-store.ts +79 -0
- package/src/auth/types.ts +41 -0
- package/src/cli/bin.ts +30 -0
- package/src/cli/commands/auth.ts +27 -0
- package/src/cli/commands/backfill.ts +77 -0
- package/src/cli/commands/sync.ts +71 -0
- package/src/cli/commands/verify.ts +31 -0
- package/src/cli/exit-codes.ts +14 -0
- package/src/cli/options.ts +10 -0
- package/src/client/endpoints.ts +62 -0
- package/src/client/http.ts +110 -0
- package/src/client/plaud-client.ts +268 -0
- package/src/client/types.ts +62 -0
- package/src/errors.ts +57 -0
- package/src/index.ts +17 -0
- package/src/logger.ts +49 -0
- package/src/mcp/job-tools.ts +156 -0
- package/src/mcp/read-tools.ts +204 -0
- package/src/mcp/server.ts +39 -0
- package/src/storage/atomic.ts +51 -0
- package/src/storage/checksums.ts +76 -0
- package/src/storage/dataset-writer.ts +74 -0
- package/src/storage/paths.ts +44 -0
- package/src/storage/recording-store.ts +182 -0
- package/src/sync/download-queue.ts +102 -0
- package/src/sync/incremental.ts +111 -0
- package/src/sync/sync-engine.ts +183 -0
- package/src/sync/types.ts +64 -0
- package/src/transcript/formatter.ts +91 -0
- package/tsconfig.build.json +8 -0
- package/tsconfig.json +19 -0
package/.env.example
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Plaud credentials (for automated/headless login)
|
|
2
|
+
PLAUD_EMAIL=you@example.com
|
|
3
|
+
PLAUD_PASSWORD=your-password
|
|
4
|
+
|
|
5
|
+
# Override default data directory (default: ~/alta/data/plaud)
|
|
6
|
+
ALTA_DATA_DIR=
|
|
7
|
+
|
|
8
|
+
# Log level: debug | info | warn | error (default: info)
|
|
9
|
+
LOG_LEVEL=info
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build:
|
|
11
|
+
name: Typecheck & Build
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- uses: pnpm/action-setup@v4
|
|
18
|
+
with:
|
|
19
|
+
version: latest
|
|
20
|
+
|
|
21
|
+
- uses: actions/setup-node@v4
|
|
22
|
+
with:
|
|
23
|
+
node-version: 20
|
|
24
|
+
cache: pnpm
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: pnpm install --frozen-lockfile
|
|
28
|
+
|
|
29
|
+
- name: Typecheck
|
|
30
|
+
run: pnpm typecheck
|
|
31
|
+
|
|
32
|
+
- name: Build
|
|
33
|
+
run: pnpm build
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
name: Build & Publish to npm
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
id-token: write # required for provenance
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- uses: pnpm/action-setup@v4
|
|
20
|
+
with:
|
|
21
|
+
version: latest
|
|
22
|
+
|
|
23
|
+
- uses: actions/setup-node@v4
|
|
24
|
+
with:
|
|
25
|
+
node-version: 20
|
|
26
|
+
cache: pnpm
|
|
27
|
+
registry-url: https://registry.npmjs.org
|
|
28
|
+
|
|
29
|
+
- name: Install dependencies
|
|
30
|
+
run: pnpm install --frozen-lockfile
|
|
31
|
+
|
|
32
|
+
- name: Typecheck
|
|
33
|
+
run: pnpm typecheck
|
|
34
|
+
|
|
35
|
+
- name: Build
|
|
36
|
+
run: pnpm build
|
|
37
|
+
|
|
38
|
+
- name: Set version from tag
|
|
39
|
+
run: |
|
|
40
|
+
VERSION=${GITHUB_REF_NAME#v}
|
|
41
|
+
pnpm version "$VERSION" --no-git-tag-version
|
|
42
|
+
|
|
43
|
+
- name: Publish
|
|
44
|
+
run: pnpm publish --no-git-checks --access public --provenance
|
|
45
|
+
env:
|
|
46
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# plaud-extractor — Claude Code guide
|
|
2
|
+
|
|
3
|
+
## What this project is
|
|
4
|
+
SDK + CLI to pull all recordings from a Plaud account into a local-first dataset.
|
|
5
|
+
Used as a building block for Alta | CORE (imported as `@alta-foundation/plaud-extractor`).
|
|
6
|
+
|
|
7
|
+
## Key architecture rules
|
|
8
|
+
- **SDK-first**: `PlaudExtractor` class in `src/PlaudExtractor.ts` is the public API. All core logic lives there and in sub-modules. The CLI (`src/cli/bin.ts`) is a thin wrapper — the **only** place `process.exit()` is called.
|
|
9
|
+
- **No side effects at import**: library code never writes to `process.stdout` or calls `process.exit()`.
|
|
10
|
+
- **Atomic writes**: all file writes go through `src/storage/atomic.ts:writeFileAtomic`.
|
|
11
|
+
- **Adaptation layer**: `src/client/plaud-client.ts` contains `normalizeRecording()` and `normalizeTranscript()`. These translate raw Plaud API shapes to the canonical schema. **Update these first** when real API response shapes are observed.
|
|
12
|
+
|
|
13
|
+
## Dev commands (pnpm)
|
|
14
|
+
```bash
|
|
15
|
+
pnpm dev -- auth # first-time auth (launches browser)
|
|
16
|
+
pnpm dev -- sync --dry-run # preview what would be downloaded
|
|
17
|
+
pnpm dev -- sync --out ~/alta/data/plaud # incremental sync
|
|
18
|
+
pnpm dev -- backfill --limit 5 # download first 5 recordings
|
|
19
|
+
pnpm dev -- verify --repair # check + fix checksums
|
|
20
|
+
pnpm build # compile to dist/
|
|
21
|
+
pnpm typecheck # type-check without emitting
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## File locations
|
|
25
|
+
| Purpose | Path |
|
|
26
|
+
|---|---|
|
|
27
|
+
| Auth credentials | `~/.alta/plaud-auth.json` |
|
|
28
|
+
| Data output (default) | `~/alta/data/plaud/` |
|
|
29
|
+
| Sync state | `<out>/_state/sync_state.json` |
|
|
30
|
+
| Run logs | `<out>/_state/run_logs.ndjson` |
|
|
31
|
+
| Dataset | `<out>/datasets/plaud_transcripts.jsonl` |
|
|
32
|
+
|
|
33
|
+
## Reading run logs
|
|
34
|
+
```bash
|
|
35
|
+
tail -f ~/alta/data/plaud/_state/run_logs.ndjson | jq .
|
|
36
|
+
jq 'select(.recordingId == "abc123")' ~/alta/data/plaud/_state/run_logs.ndjson
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## How Alta CORE uses this SDK
|
|
40
|
+
```typescript
|
|
41
|
+
import { PlaudExtractor } from '@alta-foundation/plaud-extractor'
|
|
42
|
+
|
|
43
|
+
const extractor = new PlaudExtractor({ outDir: '~/alta/data/plaud' })
|
|
44
|
+
const result = await extractor.sync({ since: new Date('2026-01-01') })
|
|
45
|
+
console.log(result.succeeded, 'new recordings downloaded')
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## After first real auth capture
|
|
49
|
+
1. Run `pnpm dev -- auth` and log in manually
|
|
50
|
+
2. Inspect `~/.alta/plaud-auth.json` → check `endpointMap`
|
|
51
|
+
3. Copy a real recording JSON response into `src/client/plaud-client.ts` normalizeRecording()
|
|
52
|
+
4. Copy a real transcript JSON response into normalizeTranscript()
|
|
53
|
+
5. Run `pnpm dev -- sync --dry-run --limit 1` to verify parsing
|
package/README.md
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
# @alta-foundation/plaud-extractor
|
|
2
|
+
|
|
3
|
+
SDK + CLI to pull all recordings, transcripts, and metadata from a [Plaud](https://www.plaud.ai) account into a structured local-first dataset. Designed as a building block for Alta | CORE.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Contents
|
|
8
|
+
|
|
9
|
+
- [Prerequisites](#prerequisites)
|
|
10
|
+
- [Installation](#installation)
|
|
11
|
+
- [Authentication](#authentication)
|
|
12
|
+
- [CLI](#cli)
|
|
13
|
+
- [SDK](#sdk)
|
|
14
|
+
- [MCP Server](#mcp-server)
|
|
15
|
+
- [Output Structure](#output-structure)
|
|
16
|
+
- [Environment Variables](#environment-variables)
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Prerequisites
|
|
21
|
+
|
|
22
|
+
- Node.js 20+
|
|
23
|
+
- pnpm (or npm/yarn)
|
|
24
|
+
- Google Chrome installed on the machine (used for auth — bypasses Google's automation detection)
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# As a CLI tool
|
|
32
|
+
pnpm add -g @alta-foundation/plaud-extractor
|
|
33
|
+
|
|
34
|
+
# As an SDK dependency
|
|
35
|
+
pnpm add @alta-foundation/plaud-extractor
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Authentication
|
|
41
|
+
|
|
42
|
+
Authentication is required once. It opens a real Chrome browser, you log in to Plaud, and the session is saved to `~/.alta/plaud-auth.json`.
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
alta-plaud auth
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
The browser closes automatically once logged in. Credentials include a JWT valid for ~1 year — you won't need to repeat this often.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## CLI
|
|
53
|
+
|
|
54
|
+
### `alta-plaud sync`
|
|
55
|
+
|
|
56
|
+
Incremental sync — only downloads new or changed recordings since the last run.
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
alta-plaud sync [options]
|
|
60
|
+
|
|
61
|
+
Options:
|
|
62
|
+
--out <dir> Output directory (default: ~/alta/data/plaud)
|
|
63
|
+
--since <iso> Only sync recordings after this ISO date
|
|
64
|
+
--limit <n> Max number of recordings to process
|
|
65
|
+
--concurrency <n> Parallel downloads (default: 3)
|
|
66
|
+
--formats <list> Transcript formats: json,txt,md (default: all)
|
|
67
|
+
--no-dataset Skip appending to datasets/plaud_transcripts.jsonl
|
|
68
|
+
--dry-run Print what would be downloaded without doing it
|
|
69
|
+
--verbose Verbose logging
|
|
70
|
+
--redact Redact tokens from logs
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Preview what would sync
|
|
75
|
+
alta-plaud sync --dry-run
|
|
76
|
+
|
|
77
|
+
# Sync last 7 days only
|
|
78
|
+
alta-plaud sync --since $(date -v-7d +%Y-%m-%d)
|
|
79
|
+
|
|
80
|
+
# Sync to a custom directory
|
|
81
|
+
alta-plaud sync --out ~/my-recordings
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
If the session token expires mid-sync, re-authentication is triggered automatically.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
### `alta-plaud backfill`
|
|
89
|
+
|
|
90
|
+
Full re-evaluation of all recordings, regardless of incremental state. Useful after a schema change or to repair gaps.
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
alta-plaud backfill [options]
|
|
94
|
+
|
|
95
|
+
Options:
|
|
96
|
+
--out <dir> Output directory (default: ~/alta/data/plaud)
|
|
97
|
+
--since <iso> Only backfill recordings after this ISO date
|
|
98
|
+
--limit <n> Max number of recordings to process
|
|
99
|
+
--concurrency <n> Parallel downloads (default: 3)
|
|
100
|
+
--formats <list> Transcript formats: json,txt,md (default: all)
|
|
101
|
+
--no-dataset Skip dataset output
|
|
102
|
+
--dry-run Print plan without downloading
|
|
103
|
+
--yes Skip confirmation prompt
|
|
104
|
+
--verbose Verbose logging
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Backfill first 10 recordings (useful for testing)
|
|
109
|
+
alta-plaud backfill --limit 10 --yes
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
### `alta-plaud verify`
|
|
115
|
+
|
|
116
|
+
Walk all downloaded recordings and verify checksums. Detects missing or corrupted files.
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
alta-plaud verify [options]
|
|
120
|
+
|
|
121
|
+
Options:
|
|
122
|
+
--out <dir> Output directory (default: ~/alta/data/plaud)
|
|
123
|
+
--repair Re-download files with checksum mismatches
|
|
124
|
+
--verbose Verbose logging
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
### `alta-plaud auth`
|
|
130
|
+
|
|
131
|
+
Launch browser for (re-)authentication.
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
alta-plaud auth [options]
|
|
135
|
+
|
|
136
|
+
Options:
|
|
137
|
+
--out <dir> Data directory for logs
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## SDK
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
import { PlaudExtractor } from '@alta-foundation/plaud-extractor'
|
|
146
|
+
|
|
147
|
+
const extractor = new PlaudExtractor({
|
|
148
|
+
outDir: '~/alta/data/plaud', // default
|
|
149
|
+
verbose: false,
|
|
150
|
+
redact: false,
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
// One-time auth (opens browser)
|
|
154
|
+
await extractor.authenticate()
|
|
155
|
+
|
|
156
|
+
// Incremental sync
|
|
157
|
+
const result = await extractor.sync({ since: new Date('2026-01-01') })
|
|
158
|
+
console.log(`${result.succeeded} downloaded, ${result.failed} failed`)
|
|
159
|
+
|
|
160
|
+
// Full backfill
|
|
161
|
+
await extractor.backfill({ limit: 50 })
|
|
162
|
+
|
|
163
|
+
// Verify checksums
|
|
164
|
+
const verify = await extractor.verify({ repair: false })
|
|
165
|
+
|
|
166
|
+
// Export JSONL dataset
|
|
167
|
+
const datasetPath = await extractor.exportDataset()
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
**Types available:**
|
|
171
|
+
|
|
172
|
+
```typescript
|
|
173
|
+
import type {
|
|
174
|
+
SyncOptions,
|
|
175
|
+
SyncResult,
|
|
176
|
+
BackfillOptions,
|
|
177
|
+
VerifyResult,
|
|
178
|
+
PlaudRecording,
|
|
179
|
+
PlaudTranscript,
|
|
180
|
+
AuthSession,
|
|
181
|
+
} from '@alta-foundation/plaud-extractor'
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Error handling:**
|
|
185
|
+
|
|
186
|
+
```typescript
|
|
187
|
+
import { AuthError, ApiError, StorageError } from '@alta-foundation/plaud-extractor'
|
|
188
|
+
|
|
189
|
+
try {
|
|
190
|
+
await extractor.sync()
|
|
191
|
+
} catch (err) {
|
|
192
|
+
if (err instanceof AuthError) {
|
|
193
|
+
// Token expired or invalid — re-authenticate
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## MCP Server
|
|
201
|
+
|
|
202
|
+
The MCP server exposes Plaud data as tools for Claude (or any MCP client). Read operations (list, transcript) are synchronous and work offline from local files. Sync/backfill run in the background and return a job ID for polling.
|
|
203
|
+
|
|
204
|
+
### Tools
|
|
205
|
+
|
|
206
|
+
| Tool | Type | Description |
|
|
207
|
+
|---|---|---|
|
|
208
|
+
| `plaud_status` | sync | Auth status, last sync time, recording count |
|
|
209
|
+
| `plaud_list_recordings` | sync | List local recordings — filter by date or title |
|
|
210
|
+
| `plaud_get_transcript` | sync | Full transcript by recording ID or partial title |
|
|
211
|
+
| `plaud_sync` | async | Start incremental sync → returns `jobId` |
|
|
212
|
+
| `plaud_backfill` | async | Start full backfill → returns `jobId` |
|
|
213
|
+
| `plaud_job_status` | sync | Poll status of a background job |
|
|
214
|
+
|
|
215
|
+
### Configuration
|
|
216
|
+
|
|
217
|
+
Add to Claude Code settings (`~/.claude/settings.json`) or your MCP client config:
|
|
218
|
+
|
|
219
|
+
```json
|
|
220
|
+
{
|
|
221
|
+
"mcpServers": {
|
|
222
|
+
"alta-plaud": {
|
|
223
|
+
"command": "alta-plaud-mcp",
|
|
224
|
+
"env": {
|
|
225
|
+
"ALTA_DATA_DIR": "/Users/you/alta/data/plaud"
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
If running from source:
|
|
233
|
+
|
|
234
|
+
```json
|
|
235
|
+
{
|
|
236
|
+
"mcpServers": {
|
|
237
|
+
"alta-plaud": {
|
|
238
|
+
"command": "node",
|
|
239
|
+
"args": ["/path/to/plaud-extractor/dist/mcp/server.js"],
|
|
240
|
+
"env": {
|
|
241
|
+
"ALTA_DATA_DIR": "/Users/you/alta/data/plaud"
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Async job flow
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
Claude: plaud_sync({ since: "2026-02-01" })
|
|
252
|
+
→ { jobId: "sync_20260226_a1b2c3", status: "running" }
|
|
253
|
+
|
|
254
|
+
Claude: plaud_job_status({ jobId: "sync_20260226_a1b2c3" })
|
|
255
|
+
→ { status: "completed", result: { succeeded: 12, failed: 0, ... } }
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## Output Structure
|
|
261
|
+
|
|
262
|
+
```
|
|
263
|
+
~/alta/data/plaud/
|
|
264
|
+
├── recordings/
|
|
265
|
+
│ └── 2026/
|
|
266
|
+
│ └── 02/
|
|
267
|
+
│ └── 20260224T083012Z__plaud_<id>/
|
|
268
|
+
│ ├── meta.json # Recording metadata
|
|
269
|
+
│ ├── transcript.json # Structured transcript with segments
|
|
270
|
+
│ ├── transcript.txt # Plain text transcript
|
|
271
|
+
│ ├── transcript.md # Markdown with YAML frontmatter + timestamps
|
|
272
|
+
│ ├── audio.ogg # Original audio file
|
|
273
|
+
│ └── checksums.json # SHA-256 hashes for all files
|
|
274
|
+
├── datasets/
|
|
275
|
+
│ └── plaud_transcripts.jsonl # All recordings as JSONL (append-only)
|
|
276
|
+
└── _state/
|
|
277
|
+
├── sync_state.json # Incremental sync state
|
|
278
|
+
├── run_logs.ndjson # Structured logs (pino NDJSON)
|
|
279
|
+
└── jobs/
|
|
280
|
+
└── sync_<id>.json # Background job state (MCP async)
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## Environment Variables
|
|
286
|
+
|
|
287
|
+
| Variable | Description |
|
|
288
|
+
|---|---|
|
|
289
|
+
| `ALTA_DATA_DIR` | Override default output directory (`~/alta/data/plaud`) |
|
|
290
|
+
| `LOG_LEVEL` | Pino log level: `debug`, `info`, `warn`, `error` (default: `info`) |
|
|
291
|
+
| `DEBUG` | Print full stack traces on CLI errors |
|
|
292
|
+
|
|
293
|
+
---
|
|
294
|
+
|
|
295
|
+
## Development
|
|
296
|
+
|
|
297
|
+
```bash
|
|
298
|
+
# Install dependencies
|
|
299
|
+
pnpm install
|
|
300
|
+
|
|
301
|
+
# First-time auth
|
|
302
|
+
pnpm dev -- auth
|
|
303
|
+
|
|
304
|
+
# Preview sync
|
|
305
|
+
pnpm dev -- sync --dry-run
|
|
306
|
+
|
|
307
|
+
# Sync with verbose logging
|
|
308
|
+
pnpm dev -- sync --verbose
|
|
309
|
+
|
|
310
|
+
# Run MCP server locally
|
|
311
|
+
pnpm dev:mcp
|
|
312
|
+
|
|
313
|
+
# Build to dist/
|
|
314
|
+
pnpm build
|
|
315
|
+
|
|
316
|
+
# Type-check without emitting
|
|
317
|
+
pnpm typecheck
|
|
318
|
+
```
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { type Logger } from './logger.js';
|
|
2
|
+
import { type BrowserAuthOptions } from './auth/browser-auth.js';
|
|
3
|
+
import type { SyncOptions, SyncResult, BackfillOptions, VerifyResult } from './sync/types.js';
|
|
4
|
+
export interface PlaudExtractorConfig {
|
|
5
|
+
/** Output directory for recordings. Default: ~/alta/data/plaud */
|
|
6
|
+
outDir?: string;
|
|
7
|
+
/** Inject a custom pino logger (e.g., from Alta CORE) */
|
|
8
|
+
logger?: Logger;
|
|
9
|
+
/** Verbose logging */
|
|
10
|
+
verbose?: boolean;
|
|
11
|
+
/** Redact tokens from logs */
|
|
12
|
+
redact?: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare class PlaudExtractor {
|
|
15
|
+
private readonly outDir;
|
|
16
|
+
private readonly engine;
|
|
17
|
+
constructor(config?: PlaudExtractorConfig);
|
|
18
|
+
/**
|
|
19
|
+
* Launch browser for authentication.
|
|
20
|
+
* Saves credentials to ~/.alta/plaud-auth.json.
|
|
21
|
+
*/
|
|
22
|
+
authenticate(opts?: BrowserAuthOptions): Promise<void>;
|
|
23
|
+
/**
|
|
24
|
+
* Check if credentials exist and are not expired.
|
|
25
|
+
*/
|
|
26
|
+
isAuthenticated(): Promise<boolean>;
|
|
27
|
+
/**
|
|
28
|
+
* Incremental sync: only download new or changed recordings since last run.
|
|
29
|
+
* If the token expires mid-sync, re-authenticates automatically and retries once.
|
|
30
|
+
*/
|
|
31
|
+
sync(opts?: Partial<SyncOptions>): Promise<SyncResult>;
|
|
32
|
+
/**
|
|
33
|
+
* Full backfill: re-evaluate all recordings regardless of sync state.
|
|
34
|
+
* If the token expires mid-backfill, re-authenticates automatically and retries once.
|
|
35
|
+
*/
|
|
36
|
+
backfill(opts?: Partial<BackfillOptions>): Promise<SyncResult>;
|
|
37
|
+
/**
|
|
38
|
+
* Run sync/backfill, and if a token-expired AuthError occurs mid-run,
|
|
39
|
+
* automatically re-authenticate and retry once.
|
|
40
|
+
*/
|
|
41
|
+
private runWithReauth;
|
|
42
|
+
/**
|
|
43
|
+
* Walk all recording folders and verify checksums.
|
|
44
|
+
* With repair=true, re-download any file with a mismatch.
|
|
45
|
+
*/
|
|
46
|
+
verify(opts?: {
|
|
47
|
+
repair?: boolean;
|
|
48
|
+
}): Promise<VerifyResult>;
|
|
49
|
+
/**
|
|
50
|
+
* Export all local recordings to a JSONL dataset file.
|
|
51
|
+
* Returns the path to the generated file.
|
|
52
|
+
*/
|
|
53
|
+
exportDataset(opts?: {
|
|
54
|
+
format?: 'jsonl';
|
|
55
|
+
}): Promise<string>;
|
|
56
|
+
private walkAndExport;
|
|
57
|
+
private buildClient;
|
|
58
|
+
private buildSyncOptions;
|
|
59
|
+
get dataDir(): string;
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=PlaudExtractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PlaudExtractor.d.ts","sourceRoot":"","sources":["../src/PlaudExtractor.ts"],"names":[],"mappings":"AAGA,OAAO,EAA2B,KAAK,MAAM,EAAE,MAAM,aAAa,CAAA;AAElE,OAAO,EAAkB,KAAK,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAQhF,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAE7F,MAAM,WAAW,oBAAoB;IACnC,kEAAkE;IAClE,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,sBAAsB;IACtB,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,8BAA8B;IAC9B,MAAM,CAAC,EAAE,OAAO,CAAA;CACjB;AAED,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAQ;IAC/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAY;gBAEvB,MAAM,GAAE,oBAAyB;IAc7C;;;OAGG;IACG,YAAY,CAAC,IAAI,GAAE,kBAAuB,GAAG,OAAO,CAAC,IAAI,CAAC;IAKhE;;OAEG;IACG,eAAe,IAAI,OAAO,CAAC,OAAO,CAAC;IAOzC;;;OAGG;IACG,IAAI,CAAC,IAAI,GAAE,OAAO,CAAC,WAAW,CAAM,GAAG,OAAO,CAAC,UAAU,CAAC;IAIhE;;;OAGG;IACG,QAAQ,CAAC,IAAI,GAAE,OAAO,CAAC,eAAe,CAAM,GAAG,OAAO,CAAC,UAAU,CAAC;IAIxE;;;OAGG;YACW,aAAa;IAoB3B;;;OAGG;IACG,MAAM,CAAC,IAAI,GAAE;QAAE,MAAM,CAAC,EAAE,OAAO,CAAA;KAAO,GAAG,OAAO,CAAC,YAAY,CAAC;IA2CpE;;;OAGG;IACG,aAAa,CAAC,IAAI,GAAE;QAAE,MAAM,CAAC,EAAE,OAAO,CAAA;KAAO,GAAG,OAAO,CAAC,MAAM,CAAC;YAmBvD,aAAa;YA2Eb,WAAW;IAWzB,OAAO,CAAC,gBAAgB;IAYxB,IAAI,OAAO,IAAI,MAAM,CAEpB;CACF"}
|