bloby-bot 0.50.1 → 0.50.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.50.1",
3
+ "version": "0.50.3",
4
4
  "releaseNotes": [
5
5
  "1. Something great..",
6
6
  "2. ",
package/worker/index.ts CHANGED
@@ -1002,119 +1002,6 @@ app.post('/api/whisper/transcribe', express.json({ limit: '10mb' }), async (req,
1002
1002
  }
1003
1003
  });
1004
1004
 
1005
- // Transcribe an audio file already on disk under workspace/files/.
1006
- // Body: { path, saveTranscriptNext?, language? }. `path` is interpreted
1007
- // relative to workspace/files/ ("files/" prefix is tolerated).
1008
- app.post('/api/whisper/transcribe-file', express.json({ limit: '1mb' }), async (req, res) => {
1009
- const whisperEnabled = getSetting('whisper_enabled');
1010
- const whisperKey = getSetting('whisper_key');
1011
-
1012
- if (whisperEnabled !== 'true' || !whisperKey) {
1013
- res.status(400).json({ error: 'Whisper not enabled or API key missing' });
1014
- return;
1015
- }
1016
-
1017
- const { path: relPath, saveTranscriptNext, language } = req.body as {
1018
- path?: string;
1019
- saveTranscriptNext?: boolean;
1020
- language?: string;
1021
- };
1022
-
1023
- if (!relPath || typeof relPath !== 'string') {
1024
- res.status(400).json({ error: 'Missing path' });
1025
- return;
1026
- }
1027
-
1028
- const normalized = relPath.replace(/^\/+/, '').replace(/^files\//, '');
1029
- const absPath = path.resolve(paths.files, normalized);
1030
- if (absPath !== paths.files && !absPath.startsWith(paths.files + path.sep)) {
1031
- res.status(400).json({ error: 'Path escapes workspace/files/' });
1032
- return;
1033
- }
1034
- if (!fs.existsSync(absPath) || !fs.statSync(absPath).isFile()) {
1035
- res.status(404).json({ error: 'File not found' });
1036
- return;
1037
- }
1038
-
1039
- try {
1040
- const audioBuffer = fs.readFileSync(absPath);
1041
- const filename = path.basename(absPath);
1042
- const ext = path.extname(filename).toLowerCase().slice(1);
1043
- const contentTypes: Record<string, string> = {
1044
- mp3: 'audio/mpeg',
1045
- m4a: 'audio/mp4',
1046
- mp4: 'audio/mp4',
1047
- wav: 'audio/wav',
1048
- webm: 'audio/webm',
1049
- ogg: 'audio/ogg',
1050
- opus: 'audio/ogg',
1051
- flac: 'audio/flac',
1052
- };
1053
- const contentType = contentTypes[ext] || 'application/octet-stream';
1054
-
1055
- const boundary = '----WhisperBoundary' + Date.now();
1056
- const CRLF = '\r\n';
1057
- const parts: Buffer[] = [];
1058
-
1059
- parts.push(Buffer.from(
1060
- `--${boundary}${CRLF}` +
1061
- `Content-Disposition: form-data; name="file"; filename="${filename}"${CRLF}` +
1062
- `Content-Type: ${contentType}${CRLF}${CRLF}`
1063
- ));
1064
- parts.push(audioBuffer);
1065
- parts.push(Buffer.from(CRLF));
1066
-
1067
- parts.push(Buffer.from(
1068
- `--${boundary}${CRLF}` +
1069
- `Content-Disposition: form-data; name="model"${CRLF}${CRLF}` +
1070
- `whisper-1${CRLF}`
1071
- ));
1072
-
1073
- if (language && typeof language === 'string') {
1074
- parts.push(Buffer.from(
1075
- `--${boundary}${CRLF}` +
1076
- `Content-Disposition: form-data; name="language"${CRLF}${CRLF}` +
1077
- `${language}${CRLF}`
1078
- ));
1079
- }
1080
-
1081
- parts.push(Buffer.from(`--${boundary}--${CRLF}`));
1082
-
1083
- const body = Buffer.concat(parts);
1084
-
1085
- const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
1086
- method: 'POST',
1087
- headers: {
1088
- 'Authorization': `Bearer ${whisperKey}`,
1089
- 'Content-Type': `multipart/form-data; boundary=${boundary}`,
1090
- },
1091
- body,
1092
- });
1093
-
1094
- if (!response.ok) {
1095
- const errText = await response.text();
1096
- log.warn(`Whisper API error: ${response.status} ${errText}`);
1097
- res.status(502).json({ error: 'Whisper API error', detail: errText.slice(0, 500) });
1098
- return;
1099
- }
1100
-
1101
- const result = await response.json() as { text: string };
1102
- const transcript = result.text;
1103
-
1104
- let transcriptPath: string | undefined;
1105
- if (saveTranscriptNext) {
1106
- const txtAbs = absPath + '.txt';
1107
- fs.writeFileSync(txtAbs, transcript, 'utf8');
1108
- transcriptPath = path.relative(paths.files, txtAbs).split(path.sep).join('/');
1109
- }
1110
-
1111
- res.json({ transcript, ...(transcriptPath ? { transcriptPath } : {}) });
1112
- } catch (err: any) {
1113
- log.warn(`Whisper transcribe-file failed: ${err.message}`);
1114
- res.status(500).json({ error: 'Transcription failed' });
1115
- }
1116
- });
1117
-
1118
1005
  // Serve stored files (audio, images, documents)
1119
1006
  app.use('/api/files', express.static(paths.files));
1120
1007
 
@@ -4,30 +4,53 @@
4
4
 
5
5
  A channel for getting **recordings off the user's Plaud Note device** and into your workspace as `(audio file, transcript)` pairs you can read and act on.
6
6
 
7
- Plaud is a tiny voice recorder (button on the case, magnet sticks to a phone). When the user records something — a meeting, a lecture, a thought on a walk — the device syncs to Plaud's cloud over Bluetooth/Wi-Fi. **You don't talk to the device.** You talk to Plaud's cloud, pull the audio, and transcribe it yourself.
7
+ Plaud is a tiny voice recorder. When the user records something — a meeting, a lecture, a thought on a walk — the device syncs to Plaud's cloud over Bluetooth/Wi-Fi. **You don't talk to the device.** You talk to Plaud's cloud, pull the audio, and transcribe it — either via the Bloby Marketplace service or your own provider.
8
8
 
9
9
  There is **no Plaud CLI, no Plaud webhook, no official Plaud API.** Plaud's mobile/web app uses an undocumented HTTP API. This skill uses the same one — same shape OpenPlaud uses (`https://github.com/openplaud/openplaud`).
10
10
 
11
- The user already pays Plaud $0 if they don't want Plaud's transcription subscription. We do transcription locally via Whisper using the OpenAI key the user added during the Bloby wizard. No new key, no new subscription.
11
+ ---
12
+
13
+ ## Two parts to this skill
14
+
15
+ 1. **Pulling audio from Plaud** — same for everyone. OTP / paste-token, list, download.
16
+ 2. **Transcribing the audio** — you have a choice (see "Transcription — pick a path" below).
12
17
 
13
18
  ---
14
19
 
15
- ## What Bloby Gives You (already-built plumbing)
20
+ ## What Bloby Gives You (plumbing)
16
21
 
17
22
  | Thing | Where | How you use it |
18
23
  |---|---|---|
19
- | Whisper-on-disk endpoint | `POST http://localhost:7400/api/whisper/transcribe-file` | Send a path under `workspace/files/`, get a transcript back. Optional `saveTranscriptNext: true` writes `foo.mp3.txt` next to `foo.mp3`. |
20
- | Settings k/v store | `GET/POST/PUT http://localhost:7400/api/settings` | Store/retrieve the Plaud JWT, region, workspace ID, last-sync cursor. |
21
- | Workspace files dir | `workspace/files/audio/plaud/` | Drop downloaded audio here. The supervisor serves it at `/api/files/audio/plaud/<name>`. |
24
+ | Workspace files dir | `workspace/files/audio/plaud/` | Drop downloaded audio here. Supervisor serves it at `/api/files/audio/plaud/<name>`. |
25
+ | Workspace file tools | `Read` / `Write` / `Edit` | Store Plaud auth state in `workspace/.plaud.json`. Save transcripts as `<id>.mp3.txt` next to the audio. |
22
26
  | Scheduling | `workspace/CRONS.json` or `workspace/PULSE.json` | Run sync periodically. See "Cadence" below. |
27
+ | Relay token | `~/.bloby/config.json` → `relay.token` | Use as `X-Bloby-Token` header when calling marketplace services. |
28
+
29
+ ### State file: `workspace/.plaud.json`
30
+
31
+ You manage all Plaud connection state in a single JSON file at workspace root. Read with `Read`, write with `Write`. Shape:
32
+
33
+ ```json
34
+ {
35
+ "email": "bruno@example.com",
36
+ "apiBase": "https://api.plaud.ai",
37
+ "userToken": "eyJ...",
38
+ "workspaceId": "ws_xxxxx",
39
+ "workspaceToken": "eyJ...",
40
+ "workspaceTokenMintedAt": "2026-05-22T19:30:00.000Z",
41
+ "authMethod": "otp",
42
+ "lastSyncVersionMs": 0,
43
+ "transcriptionMode": "marketplace"
44
+ }
45
+ ```
23
46
 
24
- Use `http://localhost:7400` from Bash. Auth is the same Bearer token you already have in your session for the worker; for skill-internal calls running inside the supervisor's own bloby session, `/api/settings` and `/api/whisper/transcribe-file` work the same way `/api/whisper/transcribe` does.
47
+ `transcriptionMode` is your record of which transcription path the human picked. One of: `"marketplace"`, `"groq"`, `"openai"`, `"mistral"`, `"local"`, or whatever they configured. Initialize empty (`{}`) if the file doesn't exist.
25
48
 
26
49
  ---
27
50
 
28
51
  ## Plaud's API in 60 seconds
29
52
 
30
- Three regions. Pick one when pairing. Token from one region won't work on another.
53
+ Three regions. Pick one when pairing. A token from one region won't work on another.
31
54
 
32
55
  | Region | Base URL |
33
56
  |---|---|
@@ -35,9 +58,14 @@ Three regions. Pick one when pairing. Token from one region won't work on anothe
35
58
  | EU | `https://api-euc1.plaud.ai` |
36
59
  | Asia-Pacific | `https://api-apse1.plaud.ai` |
37
60
 
38
- If the user doesn't know their region, start with Global. If `POST /auth/otp-send-code` returns `status: -302` with `data.domains.api`, redirect to that base instead — the user's account lives in a different region. Save whichever base actually succeeded.
61
+ If `POST /auth/otp-send-code` returns `status: -302` with `data.domains.api`, retry against that base. Save whichever base actually succeeded.
62
+
63
+ **Two token kinds — the part that bites everyone:**
64
+
65
+ - **User Token (UT)** — what `/auth/otp-login` returns. Authenticates `/user/me`, workspace-list, workspace-token mint. **Does NOT authenticate recording endpoints.** Calling `/file/simple/web` or `/device/list` with a UT silently returns HTTP 200 + empty list.
66
+ - **Workspace Token (WT)** — minted from the UT. Required on recording endpoints. ~24h lifetime. Re-mint when expired.
39
67
 
40
- **User-Agent matters.** Plaud blocks some defaults. Always send a normal browser UA. Example:
68
+ **User-Agent matters.** Plaud blocks some defaults. Always send:
41
69
 
42
70
  ```
43
71
  User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36
@@ -47,18 +75,17 @@ User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
47
75
 
48
76
  ## Pairing (first time)
49
77
 
50
- Walk the human through it conversationally. They don't see any UI for this — just chat with you.
51
-
52
- ### Step 1 — Ask for their Plaud email
78
+ ### Step 1 Ask for their Plaud email AND how they signed up
53
79
 
54
80
  ```
55
- Bloby: Which email do you use on plaud.ai? I'll have them send you a 6-digit code.
56
- Human: bruno@example.com
81
+ Bloby: Which email do you use on plaud.ai? And did you sign up with email+password, or "Continue with Google" / "Continue with Apple"?
57
82
  ```
58
83
 
59
- If the human mentions they signed up with **Google or Apple**, jump to the "Paste-token fallback" section instead. OTP only works for accounts that were created with an email+password identity on Plaud's side.
84
+ **If they signed up with Google or Apple**, skip OTP entirely and go to "Paste-token fallback". Don't try OTP first Plaud will silently create a parallel empty account at the same email, you'll mint a WT successfully, and recording endpoints will return empty. The symptom looks like "auth worked but no recordings" but it's two different identities at the same email.
85
+
86
+ If unsure, run OTP and lean on the Step 8 ghost-account check below.
60
87
 
61
- ### Step 2 — Send the OTP
88
+ ### Step 2 — Send OTP
62
89
 
63
90
  ```bash
64
91
  curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
@@ -67,12 +94,12 @@ curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
67
94
  -d '{"username":"<EMAIL>"}'
68
95
  ```
69
96
 
70
- Expected `status: 0` and a `token` field. **Save the `token`** — you need it for verify. If you see `status: -302`, switch `apiBase` to `data.domains.api` and retry once.
97
+ Expected `status: 0` and a `token` field. Save the `token` for Step 4.
71
98
 
72
99
  ### Step 3 — Ask for the code
73
100
 
74
101
  ```
75
- Bloby: Check your inbox — Plaud sent you a 6-digit code. What is it?
102
+ Bloby: Check your inbox — Plaud sent a 6-digit code. What is it?
76
103
  ```
77
104
 
78
105
  ### Step 4 — Verify
@@ -84,242 +111,320 @@ curl -s -X POST '<apiBase>/auth/otp-login' \
84
111
  -d '{"code":"<6 DIGITS>","token":"<OTP TOKEN FROM STEP 2>"}'
85
112
  ```
86
113
 
87
- Expected `access_token` (a long `eyJ...` JWT). **This is the long-lived token. Store it.**
114
+ Save `access_token` as `userToken` in `.plaud.json`.
115
+
116
+ > ⚠️ `is_new_user: true` in the response is just an informational flag — it does NOT mean Plaud created a new account. Real account check happens in Step 8.
88
117
 
89
- ### Step 5 — Store the connection
118
+ ### Step 5 — Initial state
119
+
120
+ Write to `workspace/.plaud.json`:
121
+
122
+ ```json
123
+ {
124
+ "email": "<EMAIL>",
125
+ "apiBase": "<BASE>",
126
+ "userToken": "<UT>",
127
+ "authMethod": "otp"
128
+ }
129
+ ```
130
+
131
+ ### Step 6 — Smoke test the UT
90
132
 
91
133
  ```bash
92
- # Each setting saved separately. Replace <TOKEN> and <BASE>.
93
- curl -s -X POST 'http://localhost:7400/api/settings' \
94
- -H 'Content-Type: application/json' \
95
- -d '{"key":"plaud_token","value":"<JWT>"}'
96
- curl -s -X POST 'http://localhost:7400/api/settings' \
97
- -H 'Content-Type: application/json' \
98
- -d '{"key":"plaud_api_base","value":"<BASE>"}'
99
- curl -s -X POST 'http://localhost:7400/api/settings' \
134
+ curl -s '<BASE>/user/me' \
135
+ -H 'Authorization: Bearer <UT>' \
136
+ -H 'User-Agent: Mozilla/5.0 ...'
137
+ ```
138
+
139
+ Should return the user's profile. If 401, UT is bad — restart.
140
+
141
+ ### Step 7 Mint the Workspace Token (REQUIRED)
142
+
143
+ **7a. List workspaces** (auth: UT):
144
+
145
+ ```bash
146
+ curl -s '<BASE>/team-app/workspaces/list?need_personal_workspace=true' \
147
+ -H 'Authorization: Bearer <UT>' \
148
+ -H 'User-Agent: Mozilla/5.0 ...'
149
+ ```
150
+
151
+ Pick the personal workspace (`workspace_type === "0"`, or first if none). Save its `workspace_id` as `workspaceId`.
152
+
153
+ **7b. Mint a WT** (auth: UT, body literally `{}`):
154
+
155
+ ```bash
156
+ curl -s -X POST '<BASE>/user-app/auth/workspace/token/<WORKSPACE_ID>' \
157
+ -H 'Authorization: Bearer <UT>' \
100
158
  -H 'Content-Type: application/json' \
101
- -d '{"key":"plaud_email","value":"<EMAIL>"}'
159
+ -H 'User-Agent: Mozilla/5.0 ...' \
160
+ -d '{}'
102
161
  ```
103
162
 
104
- You can also save `plaud_workspace_id` once you discover it (see "Workspaces" below).
163
+ Save `workspace_token` as `workspaceToken` and `workspaceTokenMintedAt: <now ISO 8601>` in `.plaud.json`.
105
164
 
106
- ### Step 6Smoke test
165
+ ### Step 8Real smoke test + ghost-account check
107
166
 
108
167
  ```bash
109
168
  curl -s '<BASE>/device/list' \
110
- -H 'Authorization: Bearer <JWT>' \
169
+ -H 'Authorization: Bearer <WT>' \
170
+ -H 'User-Agent: Mozilla/5.0 ...'
171
+
172
+ curl -s '<BASE>/file/simple/web?skip=0&limit=10&is_trash=0' \
173
+ -H 'Authorization: Bearer <WT>' \
111
174
  -H 'User-Agent: Mozilla/5.0 ...'
112
175
  ```
113
176
 
114
- Should return a JSON object listing the user's Plaud devices (each has a `serial_number`). If you get `401`, the OTP didn't grant a usable token — start over. If `200`, tell the human: *"Paired. Your Plaud (serial ending ...XXXX) is connected. Want me to pull in everything you've recorded so far?"*
177
+ | `data_devices` | `data_file_list` | Meaning | Action |
178
+ |---|---|---|---|
179
+ | has entries | has entries | Real account paired | Continue to "Transcription — pick a path" |
180
+ | empty | has entries | Devices haven't checked in lately | Treat as success |
181
+ | **empty** | **empty** | **Google/Apple ghost-account case** | **Stop.** Tell the human, switch to paste-token (next section) |
182
+
183
+ ### Ghost-account recovery
184
+
185
+ If empty/empty:
186
+
187
+ 1. Tell the human plainly:
188
+ > *"OTP succeeded, but you have zero recordings on this Plaud account. Most likely your real Plaud account is signed in with Google or Apple, and the OTP I just ran created a separate empty account at the same email. Can you grab a token from web.plaud.ai DevTools so I can talk to the real account?"*
189
+ 2. Walk them through paste-token (next section).
190
+ 3. Once paste-token works and you see recordings, overwrite `userToken` and set `"authMethod": "paste"` in `.plaud.json` so next sync skips OTP.
115
191
 
116
192
  ---
117
193
 
118
194
  ## Paste-token fallback (Google/Apple Plaud accounts)
119
195
 
120
- If OTP just won't work and the human signed up with Google or Apple, get the bearer manually:
121
-
122
- 1. Open [web.plaud.ai](https://web.plaud.ai) in a browser and sign in with Google/Apple normally.
123
- 2. Open DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
196
+ 1. Open [web.plaud.ai](https://web.plaud.ai), sign in with Google/Apple normally.
197
+ 2. DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
124
198
  3. Click any request to `api.plaud.ai`, `api-euc1.plaud.ai`, or `api-apse1.plaud.ai`.
125
- 4. Under **Request Headers**, find `Authorization`. Copy everything after `Bearer ` (the long `eyJ...`).
126
- 5. Tell the bloby in chat. The bloby saves it via the same `plaud_token` setting key, plus the matching `plaud_api_base`.
127
-
128
- JWTs from this path expire too. The skill behaviour on 401 is the same (see "Re-auth" below).
199
+ 4. Request Headers `Authorization` copy everything after `Bearer ` (long `eyJ...`).
200
+ 5. Human pastes to you. Save as `userToken`, set `apiBase` to whichever host they pulled it from, `"authMethod": "paste"`.
201
+ 6. **Still run Step 7** — paste-token gives a UT, WT must still be minted.
129
202
 
130
203
  ---
131
204
 
132
205
  ## Syncing recordings
133
206
 
134
- The shape of a sync run:
135
-
136
207
  ```
137
- GET /file/simple/web → list recent recordings (paginated)
208
+ GET /file/simple/web → list [auth: WT]
138
209
  for each new one:
139
- GET /file/temp-url/<id>?is_opus=0 → get a short-lived S3 link (request the mp3, not opus)
140
- curl -o workspace/files/audio/plaud/<id>.mp3 → download
141
- POST /api/whisper/transcribe-file → produces <id>.mp3.txt alongside
210
+ GET /file/temp-url/<id>?is_opus=0 → signed mp3 URL [auth: WT]
211
+ curl -o workspace/files/audio/plaud/<id>.mp3 → download (signed URL, no auth)
212
+ <transcription path> → produces <id>.mp3.txt
142
213
  ```
143
214
 
144
- ### List recordings
215
+ ### Pre-sync: check WT freshness
216
+
217
+ Read `.plaud.json`. If `workspaceToken` is missing or `workspaceTokenMintedAt` is more than ~20 hours old, re-mint (Step 7b) before starting.
218
+
219
+ ### List recordings (auth: WT)
145
220
 
146
221
  ```bash
147
222
  curl -s '<BASE>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true' \
148
- -H 'Authorization: Bearer <JWT>' \
223
+ -H 'Authorization: Bearer <WT>' \
149
224
  -H 'User-Agent: Mozilla/5.0 ...'
150
225
  ```
151
226
 
152
- The response has `data_file_list` an array of recording objects. Fields you'll care about:
153
-
154
- | Field | Use |
155
- |---|---|
156
- | `id` | Plaud's file id. Use as the local filename. |
157
- | `filename` | Human label the user gave it (or auto-generated). Sanitise before using as a filename. |
158
- | `duration` | Seconds. |
159
- | `start_time` / `end_time` | When the recording happened. |
160
- | `version_ms` | Bumps if the user edits the recording. Track this to know when to re-download. |
161
- | `serial_number` | Which Plaud device. |
162
- | `is_trash` | Skip if 1. |
163
-
164
- Page with `skip=` (the API also accepts a huge `limit`, but page through 50-at-a-time politely).
227
+ `data_file_list` fields you'll care about: `id`, `filename`, `duration`, `start_time`, `end_time`, `version_ms`, `serial_number`, `is_trash`. Page with `skip=`.
165
228
 
166
229
  ### Dedup
167
230
 
168
- You don't want to re-download what you already have. Two ways, pick one:
231
+ Either filesystem (skip if `workspace/files/audio/plaud/<id>.mp3` exists) or `lastSyncVersionMs` cursor in `.plaud.json`. If `version_ms` changed on a recording you already downloaded, the user edited the file — re-fetch and overwrite.
169
232
 
170
- - **Filesystem**: if `workspace/files/audio/plaud/<id>.mp3` exists, skip it.
171
- - **Cursor**: save the newest `version_ms` you've seen as `plaud_last_sync` setting. On next sync, skip anything `<=` that cursor. Faster — no `ls` needed.
172
-
173
- If `version_ms` changed on a recording you already downloaded, the user edited the filename or trimmed it. Re-fetch and overwrite.
174
-
175
- ### Get the download URL
233
+ ### Get the download URL (auth: WT)
176
234
 
177
235
  ```bash
178
236
  curl -s '<BASE>/file/temp-url/<FILE_ID>?is_opus=0' \
179
- -H 'Authorization: Bearer <JWT>' \
237
+ -H 'Authorization: Bearer <WT>' \
180
238
  -H 'User-Agent: Mozilla/5.0 ...'
181
239
  ```
182
240
 
183
- `is_opus=0` returns the mp3 variant (`temp_url`). `is_opus=1` returns opus in `temp_url_opus`. **Use mp3** — Whisper handles it cleanly, opus needs ffmpeg.
184
-
185
- Response: `{ "temp_url": "https://<s3...>" }`. The URL expires in a few minutes. Download immediately.
241
+ `is_opus=0` returns mp3 in `temp_url`. Use mp3 — Whisper handles it everywhere.
186
242
 
187
- ### Download
243
+ ### Download (no auth — signed URL)
188
244
 
189
245
  ```bash
190
246
  mkdir -p workspace/files/audio/plaud
191
247
  curl -s -o "workspace/files/audio/plaud/<FILE_ID>.mp3" '<TEMP URL>'
192
248
  ```
193
249
 
194
- ### Transcribe
250
+ ---
251
+
252
+ ## Transcription — pick a path
253
+
254
+ Once the audio is on disk, you need text. **Ask the human once** which path they want, then save it as `transcriptionMode` in `.plaud.json` so you don't re-ask every sync.
255
+
256
+ ### Path A — Bloby Marketplace `audio-to-text` (easiest, pay-per-minute)
257
+
258
+ If the bloby is registered with the relay (Quick Tunnel mode → there's a token at `~/.bloby/config.json → relay.token`), just POST the file. No API key to manage, no provider account.
195
259
 
196
260
  ```bash
197
- curl -s -X POST 'http://localhost:7400/api/whisper/transcribe-file' \
198
- -H 'Content-Type: application/json' \
199
- -d '{"path":"audio/plaud/<FILE_ID>.mp3","saveTranscriptNext":true}'
261
+ TOKEN=$(jq -r '.relay.token' ~/.bloby/config.json)
262
+
263
+ curl -s -X POST 'https://api.bloby.bot/api/services/audio-to-text/use' \
264
+ -H "X-Bloby-Token: $TOKEN" \
265
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
266
+ -F "language=en" # optional
200
267
  ```
201
268
 
202
- Returns `{ "transcript": "...", "transcriptPath": "audio/plaud/<FILE_ID>.mp3.txt" }`. The `.txt` file is now sitting next to the audio. You can read it with `Read` like any other file.
269
+ Returns JSON:
203
270
 
204
- The user's `whisper_key` from the wizard is what powers this — you don't need to know or handle the OpenAI key.
271
+ ```json
272
+ {
273
+ "transcript": "...",
274
+ "language": "en",
275
+ "estimatedMinutes": 5,
276
+ "priceUsd": 0.0185,
277
+ "paidVia": "balance",
278
+ "groqDurationSec": 275.4,
279
+ "model": "whisper-large-v3-turbo"
280
+ }
281
+ ```
282
+
283
+ - **Pricing:** $0.0037 per estimated minute, rounded up (~$0.22/hr).
284
+ - **How duration is estimated:** file size ÷ assumed 32kbps bitrate. Plaud-sourced mp3 matches this assumption well. High-bitrate files from other sources would be over-charged proportionally — for those, switch to Path B.
285
+ - **Paid from:** account balance first; falls back to MPP (Tempo USDC) or Base (use `/use-base` instead). Make sure the bloby's account has funds OR its wallet is funded on the matching network.
286
+ - **Limits:** 25MB per file. Mp3 from Plaud comfortably fits — observed 1MB ≈ 4½min.
205
287
 
206
- If transcription fails (e.g. file >25MB, Whisper API's own hard limit), leave the audio in place and skip the `.txt`. The human can ask you to split/compress later.
288
+ Write the response's `transcript` to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
207
289
 
208
- ### Pretty filenames (optional)
290
+ ### Path B — Bring your own API key (DIY)
209
291
 
210
- Tell the human you can keep the raw `<FILE_ID>.mp3` filenames, OR you can also rename to something human-readable. If they want pretty names:
292
+ Pick a provider, ask the human for their key, store it as a workspace secret (`workspace/.env` is fine the backend reloads on .env change). Then call directly from Bash.
211
293
 
294
+ **Groq Whisper** — cheapest, fastest. Same model the marketplace uses under the hood. Free tier exists.
212
295
  ```bash
213
- # After successful transcribe, also write a symlink or copy with a nicer name:
214
- NICE="$(date -d "<start_time>" +%Y-%m-%d_%H%M)_<sanitised filename>"
215
- ln -s "<FILE_ID>.mp3" "workspace/files/audio/plaud/${NICE}.mp3"
216
- ln -s "<FILE_ID>.mp3.txt" "workspace/files/audio/plaud/${NICE}.txt"
296
+ curl -s -X POST 'https://api.groq.com/openai/v1/audio/transcriptions' \
297
+ -H "Authorization: Bearer $GROQ_API_KEY" \
298
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
299
+ -F "model=whisper-large-v3-turbo" \
300
+ -F "response_format=json"
217
301
  ```
218
302
 
219
- (Sanitise `filename` by stripping `/\\:*?"<>|`.)
303
+ **OpenAI Whisper** the human may already have an OpenAI key from the Bloby wizard. Read it from the settings table directly:
304
+ ```bash
305
+ WHISPER_KEY=$(sqlite3 ~/.bloby/memory.db "SELECT value FROM settings WHERE key='whisper_key';")
306
+ curl -s -X POST 'https://api.openai.com/v1/audio/transcriptions' \
307
+ -H "Authorization: Bearer $WHISPER_KEY" \
308
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
309
+ -F "model=whisper-1"
310
+ ```
220
311
 
221
- Don't rename the originals — keep `<id>.mp3` as the canonical name so dedup keeps working.
312
+ **Mistral Voxtral**:
313
+ ```bash
314
+ curl -s -X POST 'https://api.mistral.ai/v1/audio/transcriptions' \
315
+ -H "Authorization: Bearer $MISTRAL_API_KEY" \
316
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
317
+ -F "model=voxtral-mini-latest"
318
+ ```
319
+
320
+ **Local — no API, no cost, fully private:**
321
+ - [whisper.cpp](https://github.com/ggerganov/whisper.cpp) — C++ binary, CPU or Metal/CUDA. Install once, transcribe forever.
322
+ - [faster-whisper](https://github.com/SYSTRAN/faster-whisper) — Python, ~4× faster than reference whisper.
323
+ - The human installs one of these themselves. The bloby invokes the CLI from Bash.
324
+
325
+ After whichever path, extract the `text` field and write it to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
326
+
327
+ ### Choosing for the human
328
+
329
+ If they don't have a preference, recommend **Path A (Marketplace)**:
330
+ - No key setup.
331
+ - Already integrated with the bloby's payment.
332
+ - Pay-as-you-go — no monthly minimum.
333
+ - If their account has any balance from other marketplace use, it just works.
334
+
335
+ Recommend **Path B** if:
336
+ - They're transcribing a lot and want to use a free tier or flat-rate plan.
337
+ - They want 100% local for privacy reasons.
338
+ - They already have a preferred provider.
222
339
 
223
340
  ---
224
341
 
225
342
  ## Cadence — CRON or PULSE?
226
343
 
227
- **There is no automatic cron set up by this skill.** You and your human decide together. Two reasonable patterns:
344
+ **No automatic schedule installed by this skill.** The human picks.
228
345
 
229
346
  ### Pattern A — CRON every N minutes
230
347
 
231
- When the human wants near-real-time freshness ("any time I record something, you should know about it within 15 minutes"), add an entry to `workspace/CRONS.json`:
348
+ Add to `workspace/CRONS.json`:
232
349
 
233
350
  ```json
234
351
  {
235
352
  "id": "plaud-sync",
236
353
  "schedule": "*/15 * * * *",
237
- "task": "Run a Plaud sync: list new recordings, download any new ones into workspace/files/audio/plaud/, and transcribe them via /api/whisper/transcribe-file. After, summarise to the human in chat IF there were new recordings — otherwise stay silent.",
354
+ "task": "Run a Plaud sync per the plaud skill: refresh WT if needed, list new recordings, download into workspace/files/audio/plaud/, and transcribe via the configured transcriptionMode in .plaud.json. If new recordings were found, summarise to the human in chat. If nothing new, stay silent.",
238
355
  "enabled": true,
239
356
  "oneShot": false
240
357
  }
241
358
  ```
242
359
 
243
- Tune `*/15` to taste. `*/5` for aggressive, `0 * * * *` (top of every hour) for quiet.
244
-
245
360
  ### Pattern B — PULSE memo
246
361
 
247
- When the human prefers their bloby just *check* during normal pulse wake-ups, add one line to your `MYSELF.md` or `MEMORY.md`:
362
+ Add one line to `MYSELF.md` or `MEMORY.md`:
248
363
 
249
364
  ```
250
- - Each pulse, briefly check Plaud for new recordings via the plaud skill. If there's something new, transcribe and decide whether to surface it. If nothing new, move on silently.
365
+ - Each pulse, briefly check Plaud for new recordings via the plaud skill. Transcribe with whatever transcriptionMode is set in workspace/.plaud.json. If new, decide whether to surface. If nothing new, move on silently.
251
366
  ```
252
367
 
253
- Pulse runs every 30 min by default. No CRON entry needed. Less aggressive than Pattern A, fits naturally with whatever else you're doing at pulse time.
254
-
255
- ### Or: don't auto-sync at all
368
+ ### Or: manual only
256
369
 
257
- Some humans only want manual control: *"Bloby, pull anything new from Plaud."* That's also fine — just keep the skill installed, no CRON, no pulse memo, you sync when asked.
370
+ No CRON, no pulse memo. Sync when asked.
258
371
 
259
- **Always check with the human first.** Default to Pattern B for new installs unless they tell you otherwise.
372
+ **Default to Pattern B for new installs** unless the human says otherwise.
260
373
 
261
374
  ---
262
375
 
263
376
  ## Re-auth (401 handling)
264
377
 
265
- When any Plaud call returns 401:
378
+ | Endpoint that 401'd | What expired | Fix |
379
+ |---|---|---|
380
+ | `/file/simple/web`, `/file/temp-url/*`, `/device/list` (WT) | Workspace token | Re-mint a WT from cached UT (Step 7b). Silent — don't bother the human. |
381
+ | `/user-app/auth/workspace/token/...`, `/team-app/workspaces/list`, `/user/me` (UT) | User token | Tell the human. If `authMethod === "otp"`, re-OTP. If `"paste"`, walk them through DevTools again. |
382
+ | `POST /api/services/audio-to-text/use` (relay) | Marketplace account empty / wallet unfunded | Tell the human. Suggest topping up or switching to Path B. |
266
383
 
267
- 1. Tell the human in chat: *"Your Plaud connection expired. Want me to re-pair?"* Don't silently fail.
268
- 2. If they say yes, re-run the OTP flow from Step 1. Overwrite the `plaud_token` setting.
269
- 3. If they signed up with Google/Apple originally, prompt for the paste-token fallback instead.
270
- 4. Don't keep retrying with the dead token — pause the sync until re-paired.
384
+ If you can't tell which token expired, assume UT is dead re-auth.
271
385
 
272
386
  ---
273
387
 
274
388
  ## Disconnect
275
389
 
276
390
  ```bash
277
- curl -s -X POST 'http://localhost:7400/api/settings' \
278
- -H 'Content-Type: application/json' \
279
- -d '{"key":"plaud_token","value":""}'
280
- curl -s -X POST 'http://localhost:7400/api/settings' \
281
- -H 'Content-Type: application/json' \
282
- -d '{"key":"plaud_api_base","value":""}'
391
+ rm -f workspace/.plaud.json
283
392
  ```
284
393
 
285
- Recordings already on disk stay. The user can also disable the CRON entry (or remove it from `CRONS.json`).
394
+ Recordings on disk stay. Disable the CRON entry / remove from `CRONS.json` separately.
286
395
 
287
396
  ---
288
397
 
289
- ## Workspaces (advanced)
290
-
291
- Plaud's "workspace" is their multi-account team feature. Personal accounts don't usually need to worry about this — the API responds correctly without a workspace token. If a human ever reports recordings missing that they can see in the Plaud app, it's likely a workspace-scoped recording.
398
+ ## Quick Reference
292
399
 
293
- To resolve a workspace token: there's an undocumented `/workspace/...` endpoint that mints a workspace-scoped token. OpenPlaud's `src/lib/plaud/workspace.ts` is the reference if you ever need it. Don't bother unless the human hits this case.
400
+ | Action | curl | Auth |
401
+ |---|---|---|
402
+ | Send OTP | `POST <base>/auth/otp-send-code` body `{username}` | none |
403
+ | Verify OTP → UT | `POST <base>/auth/otp-login` body `{code, token}` | none |
404
+ | Profile | `GET <base>/user/me` | UT |
405
+ | List workspaces | `GET <base>/team-app/workspaces/list?need_personal_workspace=true` | UT |
406
+ | Mint WT | `POST <base>/user-app/auth/workspace/token/<workspaceId>` body `{}` | UT |
407
+ | List devices | `GET <base>/device/list` | **WT** |
408
+ | List recordings | `GET <base>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true` | **WT** |
409
+ | Download URL | `GET <base>/file/temp-url/<id>?is_opus=0` | **WT** |
410
+ | Download audio | `GET <temp_url>` | none (signed) |
411
+ | Transcribe (marketplace) | `POST https://api.bloby.bot/api/services/audio-to-text/use` multipart `file=@...` | `X-Bloby-Token: $relay_token` |
412
+ | Transcribe (Groq) | `POST https://api.groq.com/openai/v1/audio/transcriptions` multipart | Bearer GROQ_API_KEY |
413
+ | Transcribe (OpenAI) | `POST https://api.openai.com/v1/audio/transcriptions` multipart | Bearer OPENAI_API_KEY |
414
+
415
+ State file: `workspace/.plaud.json`. Plaud requests need a browser-style `User-Agent`.
294
416
 
295
417
  ---
296
418
 
297
419
  ## What This Skill Does NOT Do
298
420
 
299
- - **No Plaud transcription.** We transcribe ourselves with Whisper. Plaud's own AI subscription is bypassed entirely.
300
- - **No dashboard.** OpenPlaud has a slick UI for browsing recordings. We don't. The bloby's job is to *read* the transcripts and act on them summaries, action items, emails — using the normal workspace tools. If the human wants a UI, build one into `workspace/client/` as a normal workspace app.
301
- - **No push from Plaud.** No webhooks exist. You only know about new recordings when you ask.
302
- - **No editing recordings.** The Plaud API technically supports `PATCH /file/<id>` to rename. We don't expose it here keep canonical `<id>.mp3` names.
303
- - **No real-time streaming.** Plaud syncs to its cloud *after* the recording finishes. Expect a lag of seconds-to-minutes between "user stopped recording" and "file appears in `/file/simple/web`."
304
-
305
- ---
306
-
307
- ## Quick Reference
308
-
309
- | Action | curl |
310
- |---|---|
311
- | Send OTP | `POST <base>/auth/otp-send-code` body `{username}` |
312
- | Verify OTP | `POST <base>/auth/otp-login` body `{code, token}` |
313
- | List devices | `GET <base>/device/list` |
314
- | List recordings | `GET <base>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true` |
315
- | Get download URL | `GET <base>/file/temp-url/<id>?is_opus=0` |
316
- | Transcribe local file | `POST http://localhost:7400/api/whisper/transcribe-file` body `{path, saveTranscriptNext}` |
317
- | Save setting | `POST http://localhost:7400/api/settings` body `{key, value}` |
318
-
319
- All Plaud requests need `Authorization: Bearer <JWT>` + a browser-style `User-Agent`.
421
+ - **No automatic schedule.** The human + bloby pick CRON vs PULSE vs manual.
422
+ - **No dashboard.** OpenPlaud has a UI; we don't. The bloby's job is to *read* transcripts and act on them via normal workspace tools. If the human wants a UI, build one into `workspace/client/`.
423
+ - **No push from Plaud.** No webhooks exist; you only know about new recordings when you ask.
424
+ - **No real-time streaming.** Plaud syncs *after* the recording finishes. Lag is seconds-to-minutes between "user stopped recording" and "file appears in `/file/simple/web`."
320
425
 
321
426
  ---
322
427
 
323
428
  ## Credit
324
429
 
325
- Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work. This skill reimplements just the parts a bloby needs.
430
+ Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work, including the painful workspace-token discovery (their issue #66) and the Google/Apple identity gotcha (issue #65). This skill reimplements just the parts a bloby needs, and routes transcription either through Bloby's marketplace or a provider of the human's choice.
@@ -5,11 +5,11 @@
5
5
  "bloby_human": "Bruno Bertapeli",
6
6
  "bloby": "bloby-bruno",
7
7
  "author": "newbot-official",
8
- "description": "Plaud Note integration. Pairs the user's Plaud account via email OTP, polls Plaud's cloud for new recordings, downloads the audio into workspace/files/audio/plaud/, and transcribes it via the user's Whisper key. Cadence (CRON vs PULSE memo) is chosen by the human and their bloby together.",
8
+ "description": "Plaud Note integration. Pairs the user's Plaud account (email OTP or paste-token for Google/Apple identities), pulls recordings into workspace/files/audio/plaud/, and routes transcription through either the Bloby Marketplace audio-to-text service (pay-per-minute) or the human's own provider (Groq / OpenAI Whisper / Mistral Voxtral / local).",
9
9
  "depends": [],
10
10
  "env_keys": [],
11
11
  "has_telemetry": false,
12
- "size": "8KB",
12
+ "size": "12KB",
13
13
  "contains_binaries": false,
14
- "tags": ["plaud", "transcription", "audio", "recorder", "meeting"]
14
+ "tags": ["plaud", "transcription", "audio", "recorder", "meeting", "groq", "whisper"]
15
15
  }