bloby-bot 0.50.1 → 0.50.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/worker/index.ts +0 -113
- package/workspace/skills/plaud/SKILL.md +245 -140
- package/workspace/skills/plaud/skill.json +3 -3
package/package.json
CHANGED
package/worker/index.ts
CHANGED
|
@@ -1002,119 +1002,6 @@ app.post('/api/whisper/transcribe', express.json({ limit: '10mb' }), async (req,
|
|
|
1002
1002
|
}
|
|
1003
1003
|
});
|
|
1004
1004
|
|
|
1005
|
-
// Transcribe an audio file already on disk under workspace/files/.
|
|
1006
|
-
// Body: { path, saveTranscriptNext?, language? }. `path` is interpreted
|
|
1007
|
-
// relative to workspace/files/ ("files/" prefix is tolerated).
|
|
1008
|
-
app.post('/api/whisper/transcribe-file', express.json({ limit: '1mb' }), async (req, res) => {
|
|
1009
|
-
const whisperEnabled = getSetting('whisper_enabled');
|
|
1010
|
-
const whisperKey = getSetting('whisper_key');
|
|
1011
|
-
|
|
1012
|
-
if (whisperEnabled !== 'true' || !whisperKey) {
|
|
1013
|
-
res.status(400).json({ error: 'Whisper not enabled or API key missing' });
|
|
1014
|
-
return;
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
const { path: relPath, saveTranscriptNext, language } = req.body as {
|
|
1018
|
-
path?: string;
|
|
1019
|
-
saveTranscriptNext?: boolean;
|
|
1020
|
-
language?: string;
|
|
1021
|
-
};
|
|
1022
|
-
|
|
1023
|
-
if (!relPath || typeof relPath !== 'string') {
|
|
1024
|
-
res.status(400).json({ error: 'Missing path' });
|
|
1025
|
-
return;
|
|
1026
|
-
}
|
|
1027
|
-
|
|
1028
|
-
const normalized = relPath.replace(/^\/+/, '').replace(/^files\//, '');
|
|
1029
|
-
const absPath = path.resolve(paths.files, normalized);
|
|
1030
|
-
if (absPath !== paths.files && !absPath.startsWith(paths.files + path.sep)) {
|
|
1031
|
-
res.status(400).json({ error: 'Path escapes workspace/files/' });
|
|
1032
|
-
return;
|
|
1033
|
-
}
|
|
1034
|
-
if (!fs.existsSync(absPath) || !fs.statSync(absPath).isFile()) {
|
|
1035
|
-
res.status(404).json({ error: 'File not found' });
|
|
1036
|
-
return;
|
|
1037
|
-
}
|
|
1038
|
-
|
|
1039
|
-
try {
|
|
1040
|
-
const audioBuffer = fs.readFileSync(absPath);
|
|
1041
|
-
const filename = path.basename(absPath);
|
|
1042
|
-
const ext = path.extname(filename).toLowerCase().slice(1);
|
|
1043
|
-
const contentTypes: Record<string, string> = {
|
|
1044
|
-
mp3: 'audio/mpeg',
|
|
1045
|
-
m4a: 'audio/mp4',
|
|
1046
|
-
mp4: 'audio/mp4',
|
|
1047
|
-
wav: 'audio/wav',
|
|
1048
|
-
webm: 'audio/webm',
|
|
1049
|
-
ogg: 'audio/ogg',
|
|
1050
|
-
opus: 'audio/ogg',
|
|
1051
|
-
flac: 'audio/flac',
|
|
1052
|
-
};
|
|
1053
|
-
const contentType = contentTypes[ext] || 'application/octet-stream';
|
|
1054
|
-
|
|
1055
|
-
const boundary = '----WhisperBoundary' + Date.now();
|
|
1056
|
-
const CRLF = '\r\n';
|
|
1057
|
-
const parts: Buffer[] = [];
|
|
1058
|
-
|
|
1059
|
-
parts.push(Buffer.from(
|
|
1060
|
-
`--${boundary}${CRLF}` +
|
|
1061
|
-
`Content-Disposition: form-data; name="file"; filename="${filename}"${CRLF}` +
|
|
1062
|
-
`Content-Type: ${contentType}${CRLF}${CRLF}`
|
|
1063
|
-
));
|
|
1064
|
-
parts.push(audioBuffer);
|
|
1065
|
-
parts.push(Buffer.from(CRLF));
|
|
1066
|
-
|
|
1067
|
-
parts.push(Buffer.from(
|
|
1068
|
-
`--${boundary}${CRLF}` +
|
|
1069
|
-
`Content-Disposition: form-data; name="model"${CRLF}${CRLF}` +
|
|
1070
|
-
`whisper-1${CRLF}`
|
|
1071
|
-
));
|
|
1072
|
-
|
|
1073
|
-
if (language && typeof language === 'string') {
|
|
1074
|
-
parts.push(Buffer.from(
|
|
1075
|
-
`--${boundary}${CRLF}` +
|
|
1076
|
-
`Content-Disposition: form-data; name="language"${CRLF}${CRLF}` +
|
|
1077
|
-
`${language}${CRLF}`
|
|
1078
|
-
));
|
|
1079
|
-
}
|
|
1080
|
-
|
|
1081
|
-
parts.push(Buffer.from(`--${boundary}--${CRLF}`));
|
|
1082
|
-
|
|
1083
|
-
const body = Buffer.concat(parts);
|
|
1084
|
-
|
|
1085
|
-
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
|
1086
|
-
method: 'POST',
|
|
1087
|
-
headers: {
|
|
1088
|
-
'Authorization': `Bearer ${whisperKey}`,
|
|
1089
|
-
'Content-Type': `multipart/form-data; boundary=${boundary}`,
|
|
1090
|
-
},
|
|
1091
|
-
body,
|
|
1092
|
-
});
|
|
1093
|
-
|
|
1094
|
-
if (!response.ok) {
|
|
1095
|
-
const errText = await response.text();
|
|
1096
|
-
log.warn(`Whisper API error: ${response.status} ${errText}`);
|
|
1097
|
-
res.status(502).json({ error: 'Whisper API error', detail: errText.slice(0, 500) });
|
|
1098
|
-
return;
|
|
1099
|
-
}
|
|
1100
|
-
|
|
1101
|
-
const result = await response.json() as { text: string };
|
|
1102
|
-
const transcript = result.text;
|
|
1103
|
-
|
|
1104
|
-
let transcriptPath: string | undefined;
|
|
1105
|
-
if (saveTranscriptNext) {
|
|
1106
|
-
const txtAbs = absPath + '.txt';
|
|
1107
|
-
fs.writeFileSync(txtAbs, transcript, 'utf8');
|
|
1108
|
-
transcriptPath = path.relative(paths.files, txtAbs).split(path.sep).join('/');
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
res.json({ transcript, ...(transcriptPath ? { transcriptPath } : {}) });
|
|
1112
|
-
} catch (err: any) {
|
|
1113
|
-
log.warn(`Whisper transcribe-file failed: ${err.message}`);
|
|
1114
|
-
res.status(500).json({ error: 'Transcription failed' });
|
|
1115
|
-
}
|
|
1116
|
-
});
|
|
1117
|
-
|
|
1118
1005
|
// Serve stored files (audio, images, documents)
|
|
1119
1006
|
app.use('/api/files', express.static(paths.files));
|
|
1120
1007
|
|
|
@@ -4,30 +4,53 @@
|
|
|
4
4
|
|
|
5
5
|
A channel for getting **recordings off the user's Plaud Note device** and into your workspace as `(audio file, transcript)` pairs you can read and act on.
|
|
6
6
|
|
|
7
|
-
Plaud is a tiny voice recorder
|
|
7
|
+
Plaud is a tiny voice recorder. When the user records something — a meeting, a lecture, a thought on a walk — the device syncs to Plaud's cloud over Bluetooth/Wi-Fi. **You don't talk to the device.** You talk to Plaud's cloud, pull the audio, and transcribe it — either via the Bloby Marketplace service or your own provider.
|
|
8
8
|
|
|
9
9
|
There is **no Plaud CLI, no Plaud webhook, no official Plaud API.** Plaud's mobile/web app uses an undocumented HTTP API. This skill uses the same one — same shape OpenPlaud uses (`https://github.com/openplaud/openplaud`).
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Two parts to this skill
|
|
14
|
+
|
|
15
|
+
1. **Pulling audio from Plaud** — same for everyone. OTP / paste-token, list, download.
|
|
16
|
+
2. **Transcribing the audio** — you have a choice (see "Transcription — pick a path" below).
|
|
12
17
|
|
|
13
18
|
---
|
|
14
19
|
|
|
15
|
-
## What Bloby Gives You (
|
|
20
|
+
## What Bloby Gives You (plumbing)
|
|
16
21
|
|
|
17
22
|
| Thing | Where | How you use it |
|
|
18
23
|
|---|---|---|
|
|
19
|
-
|
|
|
20
|
-
|
|
|
21
|
-
| Workspace files dir | `workspace/files/audio/plaud/` | Drop downloaded audio here. The supervisor serves it at `/api/files/audio/plaud/<name>`. |
|
|
24
|
+
| Workspace files dir | `workspace/files/audio/plaud/` | Drop downloaded audio here. Supervisor serves it at `/api/files/audio/plaud/<name>`. |
|
|
25
|
+
| Workspace file tools | `Read` / `Write` / `Edit` | Store Plaud auth state in `workspace/.plaud.json`. Save transcripts as `<id>.mp3.txt` next to the audio. |
|
|
22
26
|
| Scheduling | `workspace/CRONS.json` or `workspace/PULSE.json` | Run sync periodically. See "Cadence" below. |
|
|
27
|
+
| Relay token | `~/.bloby/config.json` → `relay.token` | Use as `X-Bloby-Token` header when calling marketplace services. |
|
|
28
|
+
|
|
29
|
+
### State file: `workspace/.plaud.json`
|
|
30
|
+
|
|
31
|
+
You manage all Plaud connection state in a single JSON file at workspace root. Read with `Read`, write with `Write`. Shape:
|
|
32
|
+
|
|
33
|
+
```json
|
|
34
|
+
{
|
|
35
|
+
"email": "bruno@example.com",
|
|
36
|
+
"apiBase": "https://api.plaud.ai",
|
|
37
|
+
"userToken": "eyJ...",
|
|
38
|
+
"workspaceId": "ws_xxxxx",
|
|
39
|
+
"workspaceToken": "eyJ...",
|
|
40
|
+
"workspaceTokenMintedAt": "2026-05-22T19:30:00.000Z",
|
|
41
|
+
"authMethod": "otp",
|
|
42
|
+
"lastSyncVersionMs": 0,
|
|
43
|
+
"transcriptionMode": "marketplace"
|
|
44
|
+
}
|
|
45
|
+
```
|
|
23
46
|
|
|
24
|
-
|
|
47
|
+
`transcriptionMode` is your record of which transcription path the human picked. One of: `"marketplace"`, `"groq"`, `"openai"`, `"mistral"`, `"local"`, or whatever they configured. Initialize empty (`{}`) if the file doesn't exist.
|
|
25
48
|
|
|
26
49
|
---
|
|
27
50
|
|
|
28
51
|
## Plaud's API in 60 seconds
|
|
29
52
|
|
|
30
|
-
Three regions. Pick one when pairing.
|
|
53
|
+
Three regions. Pick one when pairing. A token from one region won't work on another.
|
|
31
54
|
|
|
32
55
|
| Region | Base URL |
|
|
33
56
|
|---|---|
|
|
@@ -35,9 +58,14 @@ Three regions. Pick one when pairing. Token from one region won't work on anothe
|
|
|
35
58
|
| EU | `https://api-euc1.plaud.ai` |
|
|
36
59
|
| Asia-Pacific | `https://api-apse1.plaud.ai` |
|
|
37
60
|
|
|
38
|
-
If
|
|
61
|
+
If `POST /auth/otp-send-code` returns `status: -302` with `data.domains.api`, retry against that base. Save whichever base actually succeeded.
|
|
62
|
+
|
|
63
|
+
**Two token kinds — the part that bites everyone:**
|
|
64
|
+
|
|
65
|
+
- **User Token (UT)** — what `/auth/otp-login` returns. Authenticates `/user/me`, workspace-list, workspace-token mint. **Does NOT authenticate recording endpoints.** Calling `/file/simple/web` or `/device/list` with a UT silently returns HTTP 200 + empty list.
|
|
66
|
+
- **Workspace Token (WT)** — minted from the UT. Required on recording endpoints. ~24h lifetime. Re-mint when expired.
|
|
39
67
|
|
|
40
|
-
**User-Agent matters.** Plaud blocks some defaults. Always send
|
|
68
|
+
**User-Agent matters.** Plaud blocks some defaults. Always send:
|
|
41
69
|
|
|
42
70
|
```
|
|
43
71
|
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36
|
|
@@ -47,18 +75,17 @@ User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
|
|
|
47
75
|
|
|
48
76
|
## Pairing (first time)
|
|
49
77
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
### Step 1 — Ask for their Plaud email
|
|
78
|
+
### Step 1 — Ask for their Plaud email AND how they signed up
|
|
53
79
|
|
|
54
80
|
```
|
|
55
|
-
Bloby: Which email do you use on plaud.ai?
|
|
56
|
-
Human: bruno@example.com
|
|
81
|
+
Bloby: Which email do you use on plaud.ai? And did you sign up with email+password, or "Continue with Google" / "Continue with Apple"?
|
|
57
82
|
```
|
|
58
83
|
|
|
59
|
-
If
|
|
84
|
+
**If they signed up with Google or Apple**, skip OTP entirely and go to "Paste-token fallback". Don't try OTP first — Plaud will silently create a parallel empty account at the same email, you'll mint a WT successfully, and recording endpoints will return empty. The symptom looks like "auth worked but no recordings" but it's two different identities at the same email.
|
|
85
|
+
|
|
86
|
+
If unsure, run OTP and lean on the Step 8 ghost-account check below.
|
|
60
87
|
|
|
61
|
-
### Step 2 — Send
|
|
88
|
+
### Step 2 — Send OTP
|
|
62
89
|
|
|
63
90
|
```bash
|
|
64
91
|
curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
|
|
@@ -67,12 +94,12 @@ curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
|
|
|
67
94
|
-d '{"username":"<EMAIL>"}'
|
|
68
95
|
```
|
|
69
96
|
|
|
70
|
-
Expected `status: 0` and a `token` field.
|
|
97
|
+
Expected `status: 0` and a `token` field. Save the `token` for Step 4.
|
|
71
98
|
|
|
72
99
|
### Step 3 — Ask for the code
|
|
73
100
|
|
|
74
101
|
```
|
|
75
|
-
Bloby: Check your inbox — Plaud sent
|
|
102
|
+
Bloby: Check your inbox — Plaud sent a 6-digit code. What is it?
|
|
76
103
|
```
|
|
77
104
|
|
|
78
105
|
### Step 4 — Verify
|
|
@@ -84,242 +111,320 @@ curl -s -X POST '<apiBase>/auth/otp-login' \
|
|
|
84
111
|
-d '{"code":"<6 DIGITS>","token":"<OTP TOKEN FROM STEP 2>"}'
|
|
85
112
|
```
|
|
86
113
|
|
|
87
|
-
|
|
114
|
+
Save `access_token` as `userToken` in `.plaud.json`.
|
|
115
|
+
|
|
116
|
+
> ⚠️ `is_new_user: true` in the response is just an informational flag — it does NOT mean Plaud created a new account. Real account check happens in Step 8.
|
|
88
117
|
|
|
89
|
-
### Step 5 —
|
|
118
|
+
### Step 5 — Initial state
|
|
119
|
+
|
|
120
|
+
Write to `workspace/.plaud.json`:
|
|
121
|
+
|
|
122
|
+
```json
|
|
123
|
+
{
|
|
124
|
+
"email": "<EMAIL>",
|
|
125
|
+
"apiBase": "<BASE>",
|
|
126
|
+
"userToken": "<UT>",
|
|
127
|
+
"authMethod": "otp"
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Step 6 — Smoke test the UT
|
|
90
132
|
|
|
91
133
|
```bash
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
-H '
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
134
|
+
curl -s '<BASE>/user/me' \
|
|
135
|
+
-H 'Authorization: Bearer <UT>' \
|
|
136
|
+
-H 'User-Agent: Mozilla/5.0 ...'
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Should return the user's profile. If 401, UT is bad — restart.
|
|
140
|
+
|
|
141
|
+
### Step 7 — Mint the Workspace Token (REQUIRED)
|
|
142
|
+
|
|
143
|
+
**7a. List workspaces** (auth: UT):
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
curl -s '<BASE>/team-app/workspaces/list?need_personal_workspace=true' \
|
|
147
|
+
-H 'Authorization: Bearer <UT>' \
|
|
148
|
+
-H 'User-Agent: Mozilla/5.0 ...'
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Pick the personal workspace (`workspace_type === "0"`, or first if none). Save its `workspace_id` as `workspaceId`.
|
|
152
|
+
|
|
153
|
+
**7b. Mint a WT** (auth: UT, body literally `{}`):
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
curl -s -X POST '<BASE>/user-app/auth/workspace/token/<WORKSPACE_ID>' \
|
|
157
|
+
-H 'Authorization: Bearer <UT>' \
|
|
100
158
|
-H 'Content-Type: application/json' \
|
|
101
|
-
-
|
|
159
|
+
-H 'User-Agent: Mozilla/5.0 ...' \
|
|
160
|
+
-d '{}'
|
|
102
161
|
```
|
|
103
162
|
|
|
104
|
-
|
|
163
|
+
Save `workspace_token` as `workspaceToken` and `workspaceTokenMintedAt: <now ISO 8601>` in `.plaud.json`.
|
|
105
164
|
|
|
106
|
-
### Step
|
|
165
|
+
### Step 8 — Real smoke test + ghost-account check
|
|
107
166
|
|
|
108
167
|
```bash
|
|
109
168
|
curl -s '<BASE>/device/list' \
|
|
110
|
-
-H 'Authorization: Bearer <
|
|
169
|
+
-H 'Authorization: Bearer <WT>' \
|
|
170
|
+
-H 'User-Agent: Mozilla/5.0 ...'
|
|
171
|
+
|
|
172
|
+
curl -s '<BASE>/file/simple/web?skip=0&limit=10&is_trash=0' \
|
|
173
|
+
-H 'Authorization: Bearer <WT>' \
|
|
111
174
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
112
175
|
```
|
|
113
176
|
|
|
114
|
-
|
|
177
|
+
| `data_devices` | `data_file_list` | Meaning | Action |
|
|
178
|
+
|---|---|---|---|
|
|
179
|
+
| has entries | has entries | Real account paired | Continue to "Transcription — pick a path" |
|
|
180
|
+
| empty | has entries | Devices haven't checked in lately | Treat as success |
|
|
181
|
+
| **empty** | **empty** | **Google/Apple ghost-account case** | **Stop.** Tell the human, switch to paste-token (next section) |
|
|
182
|
+
|
|
183
|
+
### Ghost-account recovery
|
|
184
|
+
|
|
185
|
+
If empty/empty:
|
|
186
|
+
|
|
187
|
+
1. Tell the human plainly:
|
|
188
|
+
> *"OTP succeeded, but you have zero recordings on this Plaud account. Most likely your real Plaud account is signed in with Google or Apple, and the OTP I just ran created a separate empty account at the same email. Can you grab a token from web.plaud.ai DevTools so I can talk to the real account?"*
|
|
189
|
+
2. Walk them through paste-token (next section).
|
|
190
|
+
3. Once paste-token works and you see recordings, overwrite `userToken` and set `"authMethod": "paste"` in `.plaud.json` so next sync skips OTP.
|
|
115
191
|
|
|
116
192
|
---
|
|
117
193
|
|
|
118
194
|
## Paste-token fallback (Google/Apple Plaud accounts)
|
|
119
195
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
1. Open [web.plaud.ai](https://web.plaud.ai) in a browser and sign in with Google/Apple normally.
|
|
123
|
-
2. Open DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
|
|
196
|
+
1. Open [web.plaud.ai](https://web.plaud.ai), sign in with Google/Apple normally.
|
|
197
|
+
2. DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
|
|
124
198
|
3. Click any request to `api.plaud.ai`, `api-euc1.plaud.ai`, or `api-apse1.plaud.ai`.
|
|
125
|
-
4.
|
|
126
|
-
5.
|
|
127
|
-
|
|
128
|
-
JWTs from this path expire too. The skill behaviour on 401 is the same (see "Re-auth" below).
|
|
199
|
+
4. Request Headers → `Authorization` → copy everything after `Bearer ` (long `eyJ...`).
|
|
200
|
+
5. Human pastes to you. Save as `userToken`, set `apiBase` to whichever host they pulled it from, `"authMethod": "paste"`.
|
|
201
|
+
6. **Still run Step 7** — paste-token gives a UT, WT must still be minted.
|
|
129
202
|
|
|
130
203
|
---
|
|
131
204
|
|
|
132
205
|
## Syncing recordings
|
|
133
206
|
|
|
134
|
-
The shape of a sync run:
|
|
135
|
-
|
|
136
207
|
```
|
|
137
|
-
GET /file/simple/web → list
|
|
208
|
+
GET /file/simple/web → list [auth: WT]
|
|
138
209
|
for each new one:
|
|
139
|
-
GET /file/temp-url/<id>?is_opus=0 →
|
|
140
|
-
curl -o workspace/files/audio/plaud/<id>.mp3 → download
|
|
141
|
-
|
|
210
|
+
GET /file/temp-url/<id>?is_opus=0 → signed mp3 URL [auth: WT]
|
|
211
|
+
curl -o workspace/files/audio/plaud/<id>.mp3 → download (signed URL, no auth)
|
|
212
|
+
<transcription path> → produces <id>.mp3.txt
|
|
142
213
|
```
|
|
143
214
|
|
|
144
|
-
###
|
|
215
|
+
### Pre-sync: check WT freshness
|
|
216
|
+
|
|
217
|
+
Read `.plaud.json`. If `workspaceToken` is missing or `workspaceTokenMintedAt` is more than ~20 hours old, re-mint (Step 7b) before starting.
|
|
218
|
+
|
|
219
|
+
### List recordings (auth: WT)
|
|
145
220
|
|
|
146
221
|
```bash
|
|
147
222
|
curl -s '<BASE>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true' \
|
|
148
|
-
-H 'Authorization: Bearer <
|
|
223
|
+
-H 'Authorization: Bearer <WT>' \
|
|
149
224
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
150
225
|
```
|
|
151
226
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
| Field | Use |
|
|
155
|
-
|---|---|
|
|
156
|
-
| `id` | Plaud's file id. Use as the local filename. |
|
|
157
|
-
| `filename` | Human label the user gave it (or auto-generated). Sanitise before using as a filename. |
|
|
158
|
-
| `duration` | Seconds. |
|
|
159
|
-
| `start_time` / `end_time` | When the recording happened. |
|
|
160
|
-
| `version_ms` | Bumps if the user edits the recording. Track this to know when to re-download. |
|
|
161
|
-
| `serial_number` | Which Plaud device. |
|
|
162
|
-
| `is_trash` | Skip if 1. |
|
|
163
|
-
|
|
164
|
-
Page with `skip=` (the API also accepts a huge `limit`, but page through 50-at-a-time politely).
|
|
227
|
+
`data_file_list` fields you'll care about: `id`, `filename`, `duration`, `start_time`, `end_time`, `version_ms`, `serial_number`, `is_trash`. Page with `skip=`.
|
|
165
228
|
|
|
166
229
|
### Dedup
|
|
167
230
|
|
|
168
|
-
|
|
231
|
+
Either filesystem (skip if `workspace/files/audio/plaud/<id>.mp3` exists) or `lastSyncVersionMs` cursor in `.plaud.json`. If `version_ms` changed on a recording you already downloaded, the user edited the file — re-fetch and overwrite.
|
|
169
232
|
|
|
170
|
-
|
|
171
|
-
- **Cursor**: save the newest `version_ms` you've seen as `plaud_last_sync` setting. On next sync, skip anything `<=` that cursor. Faster — no `ls` needed.
|
|
172
|
-
|
|
173
|
-
If `version_ms` changed on a recording you already downloaded, the user edited the filename or trimmed it. Re-fetch and overwrite.
|
|
174
|
-
|
|
175
|
-
### Get the download URL
|
|
233
|
+
### Get the download URL (auth: WT)
|
|
176
234
|
|
|
177
235
|
```bash
|
|
178
236
|
curl -s '<BASE>/file/temp-url/<FILE_ID>?is_opus=0' \
|
|
179
|
-
-H 'Authorization: Bearer <
|
|
237
|
+
-H 'Authorization: Bearer <WT>' \
|
|
180
238
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
181
239
|
```
|
|
182
240
|
|
|
183
|
-
`is_opus=0` returns
|
|
184
|
-
|
|
185
|
-
Response: `{ "temp_url": "https://<s3...>" }`. The URL expires in a few minutes. Download immediately.
|
|
241
|
+
`is_opus=0` returns mp3 in `temp_url`. Use mp3 — Whisper handles it everywhere.
|
|
186
242
|
|
|
187
|
-
### Download
|
|
243
|
+
### Download (no auth — signed URL)
|
|
188
244
|
|
|
189
245
|
```bash
|
|
190
246
|
mkdir -p workspace/files/audio/plaud
|
|
191
247
|
curl -s -o "workspace/files/audio/plaud/<FILE_ID>.mp3" '<TEMP URL>'
|
|
192
248
|
```
|
|
193
249
|
|
|
194
|
-
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Transcription — pick a path
|
|
253
|
+
|
|
254
|
+
Once the audio is on disk, you need text. **Ask the human once** which path they want, then save it as `transcriptionMode` in `.plaud.json` so you don't re-ask every sync.
|
|
255
|
+
|
|
256
|
+
### Path A — Bloby Marketplace `audio-to-text` (easiest, pay-per-minute)
|
|
257
|
+
|
|
258
|
+
If the bloby is registered with the relay (Quick Tunnel mode → there's a token at `~/.bloby/config.json → relay.token`), just POST the file. No API key to manage, no provider account.
|
|
195
259
|
|
|
196
260
|
```bash
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
261
|
+
TOKEN=$(jq -r '.relay.token' ~/.bloby/config.json)
|
|
262
|
+
|
|
263
|
+
curl -s -X POST 'https://api.bloby.bot/api/services/audio-to-text/use' \
|
|
264
|
+
-H "X-Bloby-Token: $TOKEN" \
|
|
265
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
266
|
+
-F "language=en" # optional
|
|
200
267
|
```
|
|
201
268
|
|
|
202
|
-
Returns
|
|
269
|
+
Returns JSON:
|
|
203
270
|
|
|
204
|
-
|
|
271
|
+
```json
|
|
272
|
+
{
|
|
273
|
+
"transcript": "...",
|
|
274
|
+
"language": "en",
|
|
275
|
+
"estimatedMinutes": 5,
|
|
276
|
+
"priceUsd": 0.0185,
|
|
277
|
+
"paidVia": "balance",
|
|
278
|
+
"groqDurationSec": 275.4,
|
|
279
|
+
"model": "whisper-large-v3-turbo"
|
|
280
|
+
}
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
- **Pricing:** $0.0037 per estimated minute, rounded up (~$0.22/hr).
|
|
284
|
+
- **How duration is estimated:** file size ÷ assumed 32kbps bitrate. Plaud-sourced mp3 matches this assumption well. High-bitrate files from other sources would be over-charged proportionally — for those, switch to Path B.
|
|
285
|
+
- **Paid from:** account balance first; falls back to MPP (Tempo USDC) or Base (use `/use-base` instead). Make sure the bloby's account has funds OR its wallet is funded on the matching network.
|
|
286
|
+
- **Limits:** 25MB per file. Mp3 from Plaud comfortably fits — observed 1MB ≈ 4½min.
|
|
205
287
|
|
|
206
|
-
|
|
288
|
+
Write the response's `transcript` to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
|
|
207
289
|
|
|
208
|
-
###
|
|
290
|
+
### Path B — Bring your own API key (DIY)
|
|
209
291
|
|
|
210
|
-
|
|
292
|
+
Pick a provider, ask the human for their key, store it as a workspace secret (`workspace/.env` is fine — the backend reloads on .env change). Then call directly from Bash.
|
|
211
293
|
|
|
294
|
+
**Groq Whisper** — cheapest, fastest. Same model the marketplace uses under the hood. Free tier exists.
|
|
212
295
|
```bash
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
296
|
+
curl -s -X POST 'https://api.groq.com/openai/v1/audio/transcriptions' \
|
|
297
|
+
-H "Authorization: Bearer $GROQ_API_KEY" \
|
|
298
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
299
|
+
-F "model=whisper-large-v3-turbo" \
|
|
300
|
+
-F "response_format=json"
|
|
217
301
|
```
|
|
218
302
|
|
|
219
|
-
|
|
303
|
+
**OpenAI Whisper** — the human may already have an OpenAI key from the Bloby wizard. Read it from the settings table directly:
|
|
304
|
+
```bash
|
|
305
|
+
WHISPER_KEY=$(sqlite3 ~/.bloby/memory.db "SELECT value FROM settings WHERE key='whisper_key';")
|
|
306
|
+
curl -s -X POST 'https://api.openai.com/v1/audio/transcriptions' \
|
|
307
|
+
-H "Authorization: Bearer $WHISPER_KEY" \
|
|
308
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
309
|
+
-F "model=whisper-1"
|
|
310
|
+
```
|
|
220
311
|
|
|
221
|
-
|
|
312
|
+
**Mistral Voxtral**:
|
|
313
|
+
```bash
|
|
314
|
+
curl -s -X POST 'https://api.mistral.ai/v1/audio/transcriptions' \
|
|
315
|
+
-H "Authorization: Bearer $MISTRAL_API_KEY" \
|
|
316
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
317
|
+
-F "model=voxtral-mini-latest"
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
**Local — no API, no cost, fully private:**
|
|
321
|
+
- [whisper.cpp](https://github.com/ggerganov/whisper.cpp) — C++ binary, CPU or Metal/CUDA. Install once, transcribe forever.
|
|
322
|
+
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) — Python, ~4× faster than reference whisper.
|
|
323
|
+
- The human installs one of these themselves. The bloby invokes the CLI from Bash.
|
|
324
|
+
|
|
325
|
+
After whichever path, extract the `text` field and write it to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
|
|
326
|
+
|
|
327
|
+
### Choosing for the human
|
|
328
|
+
|
|
329
|
+
If they don't have a preference, recommend **Path A (Marketplace)**:
|
|
330
|
+
- No key setup.
|
|
331
|
+
- Already integrated with the bloby's payment.
|
|
332
|
+
- Pay-as-you-go — no monthly minimum.
|
|
333
|
+
- If their account has any balance from other marketplace use, it just works.
|
|
334
|
+
|
|
335
|
+
Recommend **Path B** if:
|
|
336
|
+
- They're transcribing a lot and want to use a free tier or flat-rate plan.
|
|
337
|
+
- They want 100% local for privacy reasons.
|
|
338
|
+
- They already have a preferred provider.
|
|
222
339
|
|
|
223
340
|
---
|
|
224
341
|
|
|
225
342
|
## Cadence — CRON or PULSE?
|
|
226
343
|
|
|
227
|
-
**
|
|
344
|
+
**No automatic schedule installed by this skill.** The human picks.
|
|
228
345
|
|
|
229
346
|
### Pattern A — CRON every N minutes
|
|
230
347
|
|
|
231
|
-
|
|
348
|
+
Add to `workspace/CRONS.json`:
|
|
232
349
|
|
|
233
350
|
```json
|
|
234
351
|
{
|
|
235
352
|
"id": "plaud-sync",
|
|
236
353
|
"schedule": "*/15 * * * *",
|
|
237
|
-
"task": "Run a Plaud sync: list new recordings, download
|
|
354
|
+
"task": "Run a Plaud sync per the plaud skill: refresh WT if needed, list new recordings, download into workspace/files/audio/plaud/, and transcribe via the configured transcriptionMode in .plaud.json. If new recordings were found, summarise to the human in chat. If nothing new, stay silent.",
|
|
238
355
|
"enabled": true,
|
|
239
356
|
"oneShot": false
|
|
240
357
|
}
|
|
241
358
|
```
|
|
242
359
|
|
|
243
|
-
Tune `*/15` to taste. `*/5` for aggressive, `0 * * * *` (top of every hour) for quiet.
|
|
244
|
-
|
|
245
360
|
### Pattern B — PULSE memo
|
|
246
361
|
|
|
247
|
-
|
|
362
|
+
Add one line to `MYSELF.md` or `MEMORY.md`:
|
|
248
363
|
|
|
249
364
|
```
|
|
250
|
-
- Each pulse, briefly check Plaud for new recordings via the plaud skill.
|
|
365
|
+
- Each pulse, briefly check Plaud for new recordings via the plaud skill. Transcribe with whatever transcriptionMode is set in workspace/.plaud.json. If new, decide whether to surface. If nothing new, move on silently.
|
|
251
366
|
```
|
|
252
367
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
### Or: don't auto-sync at all
|
|
368
|
+
### Or: manual only
|
|
256
369
|
|
|
257
|
-
|
|
370
|
+
No CRON, no pulse memo. Sync when asked.
|
|
258
371
|
|
|
259
|
-
**
|
|
372
|
+
**Default to Pattern B for new installs** unless the human says otherwise.
|
|
260
373
|
|
|
261
374
|
---
|
|
262
375
|
|
|
263
376
|
## Re-auth (401 handling)
|
|
264
377
|
|
|
265
|
-
|
|
378
|
+
| Endpoint that 401'd | What expired | Fix |
|
|
379
|
+
|---|---|---|
|
|
380
|
+
| `/file/simple/web`, `/file/temp-url/*`, `/device/list` (WT) | Workspace token | Re-mint a WT from cached UT (Step 7b). Silent — don't bother the human. |
|
|
381
|
+
| `/user-app/auth/workspace/token/...`, `/team-app/workspaces/list`, `/user/me` (UT) | User token | Tell the human. If `authMethod === "otp"`, re-OTP. If `"paste"`, walk them through DevTools again. |
|
|
382
|
+
| `POST /api/services/audio-to-text/use` (relay) | Marketplace account empty / wallet unfunded | Tell the human. Suggest topping up or switching to Path B. |
|
|
266
383
|
|
|
267
|
-
|
|
268
|
-
2. If they say yes, re-run the OTP flow from Step 1. Overwrite the `plaud_token` setting.
|
|
269
|
-
3. If they signed up with Google/Apple originally, prompt for the paste-token fallback instead.
|
|
270
|
-
4. Don't keep retrying with the dead token — pause the sync until re-paired.
|
|
384
|
+
If you can't tell which token expired, assume UT is dead → re-auth.
|
|
271
385
|
|
|
272
386
|
---
|
|
273
387
|
|
|
274
388
|
## Disconnect
|
|
275
389
|
|
|
276
390
|
```bash
|
|
277
|
-
|
|
278
|
-
-H 'Content-Type: application/json' \
|
|
279
|
-
-d '{"key":"plaud_token","value":""}'
|
|
280
|
-
curl -s -X POST 'http://localhost:7400/api/settings' \
|
|
281
|
-
-H 'Content-Type: application/json' \
|
|
282
|
-
-d '{"key":"plaud_api_base","value":""}'
|
|
391
|
+
rm -f workspace/.plaud.json
|
|
283
392
|
```
|
|
284
393
|
|
|
285
|
-
Recordings
|
|
394
|
+
Recordings on disk stay. Disable the CRON entry / remove from `CRONS.json` separately.
|
|
286
395
|
|
|
287
396
|
---
|
|
288
397
|
|
|
289
|
-
##
|
|
290
|
-
|
|
291
|
-
Plaud's "workspace" is their multi-account team feature. Personal accounts don't usually need to worry about this — the API responds correctly without a workspace token. If a human ever reports recordings missing that they can see in the Plaud app, it's likely a workspace-scoped recording.
|
|
398
|
+
## Quick Reference
|
|
292
399
|
|
|
293
|
-
|
|
400
|
+
| Action | curl | Auth |
|
|
401
|
+
|---|---|---|
|
|
402
|
+
| Send OTP | `POST <base>/auth/otp-send-code` body `{username}` | none |
|
|
403
|
+
| Verify OTP → UT | `POST <base>/auth/otp-login` body `{code, token}` | none |
|
|
404
|
+
| Profile | `GET <base>/user/me` | UT |
|
|
405
|
+
| List workspaces | `GET <base>/team-app/workspaces/list?need_personal_workspace=true` | UT |
|
|
406
|
+
| Mint WT | `POST <base>/user-app/auth/workspace/token/<workspaceId>` body `{}` | UT |
|
|
407
|
+
| List devices | `GET <base>/device/list` | **WT** |
|
|
408
|
+
| List recordings | `GET <base>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true` | **WT** |
|
|
409
|
+
| Download URL | `GET <base>/file/temp-url/<id>?is_opus=0` | **WT** |
|
|
410
|
+
| Download audio | `GET <temp_url>` | none (signed) |
|
|
411
|
+
| Transcribe (marketplace) | `POST https://api.bloby.bot/api/services/audio-to-text/use` multipart `file=@...` | `X-Bloby-Token: $relay_token` |
|
|
412
|
+
| Transcribe (Groq) | `POST https://api.groq.com/openai/v1/audio/transcriptions` multipart | Bearer GROQ_API_KEY |
|
|
413
|
+
| Transcribe (OpenAI) | `POST https://api.openai.com/v1/audio/transcriptions` multipart | Bearer OPENAI_API_KEY |
|
|
414
|
+
|
|
415
|
+
State file: `workspace/.plaud.json`. Plaud requests need a browser-style `User-Agent`.
|
|
294
416
|
|
|
295
417
|
---
|
|
296
418
|
|
|
297
419
|
## What This Skill Does NOT Do
|
|
298
420
|
|
|
299
|
-
- **No
|
|
300
|
-
- **No dashboard.** OpenPlaud has a
|
|
301
|
-
- **No push from Plaud.** No webhooks exist
|
|
302
|
-
- **No
|
|
303
|
-
- **No real-time streaming.** Plaud syncs to its cloud *after* the recording finishes. Expect a lag of seconds-to-minutes between "user stopped recording" and "file appears in `/file/simple/web`."
|
|
304
|
-
|
|
305
|
-
---
|
|
306
|
-
|
|
307
|
-
## Quick Reference
|
|
308
|
-
|
|
309
|
-
| Action | curl |
|
|
310
|
-
|---|---|
|
|
311
|
-
| Send OTP | `POST <base>/auth/otp-send-code` body `{username}` |
|
|
312
|
-
| Verify OTP | `POST <base>/auth/otp-login` body `{code, token}` |
|
|
313
|
-
| List devices | `GET <base>/device/list` |
|
|
314
|
-
| List recordings | `GET <base>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true` |
|
|
315
|
-
| Get download URL | `GET <base>/file/temp-url/<id>?is_opus=0` |
|
|
316
|
-
| Transcribe local file | `POST http://localhost:7400/api/whisper/transcribe-file` body `{path, saveTranscriptNext}` |
|
|
317
|
-
| Save setting | `POST http://localhost:7400/api/settings` body `{key, value}` |
|
|
318
|
-
|
|
319
|
-
All Plaud requests need `Authorization: Bearer <JWT>` + a browser-style `User-Agent`.
|
|
421
|
+
- **No automatic schedule.** The human + bloby pick CRON vs PULSE vs manual.
|
|
422
|
+
- **No dashboard.** OpenPlaud has a UI; we don't. The bloby's job is to *read* transcripts and act on them via normal workspace tools. If the human wants a UI, build one into `workspace/client/`.
|
|
423
|
+
- **No push from Plaud.** No webhooks exist; you only know about new recordings when you ask.
|
|
424
|
+
- **No real-time streaming.** Plaud syncs *after* the recording finishes. Lag is seconds-to-minutes between "user stopped recording" and "file appears in `/file/simple/web`."
|
|
320
425
|
|
|
321
426
|
---
|
|
322
427
|
|
|
323
428
|
## Credit
|
|
324
429
|
|
|
325
|
-
Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work. This skill reimplements just the parts a bloby needs.
|
|
430
|
+
Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work, including the painful workspace-token discovery (their issue #66) and the Google/Apple identity gotcha (issue #65). This skill reimplements just the parts a bloby needs, and routes transcription either through Bloby's marketplace or a provider of the human's choice.
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
"bloby_human": "Bruno Bertapeli",
|
|
6
6
|
"bloby": "bloby-bruno",
|
|
7
7
|
"author": "newbot-official",
|
|
8
|
-
"description": "Plaud Note integration. Pairs the user's Plaud account
|
|
8
|
+
"description": "Plaud Note integration. Pairs the user's Plaud account (email OTP or paste-token for Google/Apple identities), pulls recordings into workspace/files/audio/plaud/, and routes transcription through either the Bloby Marketplace audio-to-text service (pay-per-minute) or the human's own provider (Groq / OpenAI Whisper / Mistral Voxtral / local).",
|
|
9
9
|
"depends": [],
|
|
10
10
|
"env_keys": [],
|
|
11
11
|
"has_telemetry": false,
|
|
12
|
-
"size": "
|
|
12
|
+
"size": "12KB",
|
|
13
13
|
"contains_binaries": false,
|
|
14
|
-
"tags": ["plaud", "transcription", "audio", "recorder", "meeting"]
|
|
14
|
+
"tags": ["plaud", "transcription", "audio", "recorder", "meeting", "groq", "whisper"]
|
|
15
15
|
}
|