bloby-bot 0.50.2 → 0.50.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.50.2",
3
+ "version": "0.50.3",
4
4
  "releaseNotes": [
5
5
  "1. Something great..",
6
6
  "2. ",
@@ -392,7 +392,6 @@ export async function startSupervisor() {
392
392
  'POST /api/channels/whatsapp/react',
393
393
  'POST /api/channels/send',
394
394
  'POST /api/channels/alexa/handle',
395
- 'POST /api/whisper/transcribe-file',
396
395
  ];
397
396
 
398
397
  function isExemptRoute(method: string, url: string): boolean {
package/worker/index.ts CHANGED
@@ -1002,119 +1002,6 @@ app.post('/api/whisper/transcribe', express.json({ limit: '10mb' }), async (req,
1002
1002
  }
1003
1003
  });
1004
1004
 
1005
- // Transcribe an audio file already on disk under workspace/files/.
1006
- // Body: { path, saveTranscriptNext?, language? }. `path` is interpreted
1007
- // relative to workspace/files/ ("files/" prefix is tolerated).
1008
- app.post('/api/whisper/transcribe-file', express.json({ limit: '1mb' }), async (req, res) => {
1009
- const whisperEnabled = getSetting('whisper_enabled');
1010
- const whisperKey = getSetting('whisper_key');
1011
-
1012
- if (whisperEnabled !== 'true' || !whisperKey) {
1013
- res.status(400).json({ error: 'Whisper not enabled or API key missing' });
1014
- return;
1015
- }
1016
-
1017
- const { path: relPath, saveTranscriptNext, language } = req.body as {
1018
- path?: string;
1019
- saveTranscriptNext?: boolean;
1020
- language?: string;
1021
- };
1022
-
1023
- if (!relPath || typeof relPath !== 'string') {
1024
- res.status(400).json({ error: 'Missing path' });
1025
- return;
1026
- }
1027
-
1028
- const normalized = relPath.replace(/^\/+/, '').replace(/^files\//, '');
1029
- const absPath = path.resolve(paths.files, normalized);
1030
- if (absPath !== paths.files && !absPath.startsWith(paths.files + path.sep)) {
1031
- res.status(400).json({ error: 'Path escapes workspace/files/' });
1032
- return;
1033
- }
1034
- if (!fs.existsSync(absPath) || !fs.statSync(absPath).isFile()) {
1035
- res.status(404).json({ error: 'File not found' });
1036
- return;
1037
- }
1038
-
1039
- try {
1040
- const audioBuffer = fs.readFileSync(absPath);
1041
- const filename = path.basename(absPath);
1042
- const ext = path.extname(filename).toLowerCase().slice(1);
1043
- const contentTypes: Record<string, string> = {
1044
- mp3: 'audio/mpeg',
1045
- m4a: 'audio/mp4',
1046
- mp4: 'audio/mp4',
1047
- wav: 'audio/wav',
1048
- webm: 'audio/webm',
1049
- ogg: 'audio/ogg',
1050
- opus: 'audio/ogg',
1051
- flac: 'audio/flac',
1052
- };
1053
- const contentType = contentTypes[ext] || 'application/octet-stream';
1054
-
1055
- const boundary = '----WhisperBoundary' + Date.now();
1056
- const CRLF = '\r\n';
1057
- const parts: Buffer[] = [];
1058
-
1059
- parts.push(Buffer.from(
1060
- `--${boundary}${CRLF}` +
1061
- `Content-Disposition: form-data; name="file"; filename="${filename}"${CRLF}` +
1062
- `Content-Type: ${contentType}${CRLF}${CRLF}`
1063
- ));
1064
- parts.push(audioBuffer);
1065
- parts.push(Buffer.from(CRLF));
1066
-
1067
- parts.push(Buffer.from(
1068
- `--${boundary}${CRLF}` +
1069
- `Content-Disposition: form-data; name="model"${CRLF}${CRLF}` +
1070
- `whisper-1${CRLF}`
1071
- ));
1072
-
1073
- if (language && typeof language === 'string') {
1074
- parts.push(Buffer.from(
1075
- `--${boundary}${CRLF}` +
1076
- `Content-Disposition: form-data; name="language"${CRLF}${CRLF}` +
1077
- `${language}${CRLF}`
1078
- ));
1079
- }
1080
-
1081
- parts.push(Buffer.from(`--${boundary}--${CRLF}`));
1082
-
1083
- const body = Buffer.concat(parts);
1084
-
1085
- const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
1086
- method: 'POST',
1087
- headers: {
1088
- 'Authorization': `Bearer ${whisperKey}`,
1089
- 'Content-Type': `multipart/form-data; boundary=${boundary}`,
1090
- },
1091
- body,
1092
- });
1093
-
1094
- if (!response.ok) {
1095
- const errText = await response.text();
1096
- log.warn(`Whisper API error: ${response.status} ${errText}`);
1097
- res.status(502).json({ error: 'Whisper API error', detail: errText.slice(0, 500) });
1098
- return;
1099
- }
1100
-
1101
- const result = await response.json() as { text: string };
1102
- const transcript = result.text;
1103
-
1104
- let transcriptPath: string | undefined;
1105
- if (saveTranscriptNext) {
1106
- const txtAbs = absPath + '.txt';
1107
- fs.writeFileSync(txtAbs, transcript, 'utf8');
1108
- transcriptPath = path.relative(paths.files, txtAbs).split(path.sep).join('/');
1109
- }
1110
-
1111
- res.json({ transcript, ...(transcriptPath ? { transcriptPath } : {}) });
1112
- } catch (err: any) {
1113
- log.warn(`Whisper transcribe-file failed: ${err.message}`);
1114
- res.status(500).json({ error: 'Transcription failed' });
1115
- }
1116
- });
1117
-
1118
1005
  // Serve stored files (audio, images, documents)
1119
1006
  app.use('/api/files', express.static(paths.files));
1120
1007
 
@@ -4,24 +4,27 @@
4
4
 
5
5
  A channel for getting **recordings off the user's Plaud Note device** and into your workspace as `(audio file, transcript)` pairs you can read and act on.
6
6
 
7
- Plaud is a tiny voice recorder. When the user records something — a meeting, a lecture, a thought on a walk — the device syncs to Plaud's cloud over Bluetooth/Wi-Fi. **You don't talk to the device.** You talk to Plaud's cloud, pull the audio, and transcribe it yourself.
7
+ Plaud is a tiny voice recorder. When the user records something — a meeting, a lecture, a thought on a walk — the device syncs to Plaud's cloud over Bluetooth/Wi-Fi. **You don't talk to the device.** You talk to Plaud's cloud, pull the audio, and transcribe it — either via the Bloby Marketplace service or your own provider.
8
8
 
9
9
  There is **no Plaud CLI, no Plaud webhook, no official Plaud API.** Plaud's mobile/web app uses an undocumented HTTP API. This skill uses the same one — same shape OpenPlaud uses (`https://github.com/openplaud/openplaud`).
10
10
 
11
- The user already has Whisper enabled via the Bloby wizard. We use that OpenAI key — no new key, no new subscription, no Plaud AI plan needed.
11
+ ---
12
+
13
+ ## Two parts to this skill
14
+
15
+ 1. **Pulling audio from Plaud** — same for everyone. OTP / paste-token, list, download.
16
+ 2. **Transcribing the audio** — you have a choice (see "Transcription — pick a path" below).
12
17
 
13
18
  ---
14
19
 
15
- ## What Bloby Gives You (already-built plumbing)
20
+ ## What Bloby Gives You (plumbing)
16
21
 
17
22
  | Thing | Where | How you use it |
18
23
  |---|---|---|
19
- | Whisper-on-disk endpoint | `POST http://localhost:7400/api/whisper/transcribe-file` | Send a path under `workspace/files/`, get a transcript back. Optional `saveTranscriptNext: true` writes `foo.mp3.txt` next to `foo.mp3`. Auth-exempt, no Bearer needed. |
20
24
  | Workspace files dir | `workspace/files/audio/plaud/` | Drop downloaded audio here. Supervisor serves it at `/api/files/audio/plaud/<name>`. |
21
- | Workspace file tools | `Read` / `Write` / `Edit` | Store Plaud auth state in `workspace/.plaud.json` (see below). No `/api/settings` calls that endpoint requires a portal Bearer token the skill can't easily produce. |
25
+ | Workspace file tools | `Read` / `Write` / `Edit` | Store Plaud auth state in `workspace/.plaud.json`. Save transcripts as `<id>.mp3.txt` next to the audio. |
22
26
  | Scheduling | `workspace/CRONS.json` or `workspace/PULSE.json` | Run sync periodically. See "Cadence" below. |
23
-
24
- Use `http://localhost:7400` from Bash for the Whisper endpoint. Everything else is the open internet (Plaud's API) or your own filesystem.
27
+ | Relay token | `~/.bloby/config.json` → `relay.token` | Use as `X-Bloby-Token` header when calling marketplace services. |
25
28
 
26
29
  ### State file: `workspace/.plaud.json`
27
30
 
@@ -29,17 +32,19 @@ You manage all Plaud connection state in a single JSON file at workspace root. R
29
32
 
30
33
  ```json
31
34
  {
32
- "email": "bruno@bertapeli.com",
35
+ "email": "bruno@example.com",
33
36
  "apiBase": "https://api.plaud.ai",
34
37
  "userToken": "eyJ...",
35
38
  "workspaceId": "ws_xxxxx",
36
39
  "workspaceToken": "eyJ...",
37
40
  "workspaceTokenMintedAt": "2026-05-22T19:30:00.000Z",
38
- "lastSyncVersionMs": 1716412800000
41
+ "authMethod": "otp",
42
+ "lastSyncVersionMs": 0,
43
+ "transcriptionMode": "marketplace"
39
44
  }
40
45
  ```
41
46
 
42
- Initialize empty (`{}`) if the file doesn't exist. Never commit secrets — `.plaud.json` is gitignored by default (starts with `.`).
47
+ `transcriptionMode` is your record of which transcription path the human picked. One of: `"marketplace"`, `"groq"`, `"openai"`, `"mistral"`, `"local"`, or whatever they configured. Initialize empty (`{}`) if the file doesn't exist.
43
48
 
44
49
  ---
45
50
 
@@ -53,16 +58,14 @@ Three regions. Pick one when pairing. A token from one region won't work on anot
53
58
  | EU | `https://api-euc1.plaud.ai` |
54
59
  | Asia-Pacific | `https://api-apse1.plaud.ai` |
55
60
 
56
- If the user doesn't know their region, start with Global. If `POST /auth/otp-send-code` returns `status: -302` with `data.domains.api`, retry against that base — the user's account lives in a different region. Save whichever base actually succeeded.
61
+ If `POST /auth/otp-send-code` returns `status: -302` with `data.domains.api`, retry against that base. Save whichever base actually succeeded.
57
62
 
58
- **Two token kinds.** This is the part that bites everyone:
63
+ **Two token kinds the part that bites everyone:**
59
64
 
60
- - **User Token (UT)** — what `/auth/otp-login` returns. Authenticates `/user/me`, the workspace-list endpoint, and the workspace-token mint endpoint. **It does NOT authenticate recording endpoints.** Calling `/file/simple/web` or `/device/list` with a UT silently returns HTTP 200 + empty list. This is exactly the "I have no recordings but my Plaud app shows 3 files" symptom.
61
- - **Workspace Token (WT)** — minted from the UT. Required on all recording endpoints. ~24h lifetime. Re-mint when expired.
65
+ - **User Token (UT)** — what `/auth/otp-login` returns. Authenticates `/user/me`, workspace-list, workspace-token mint. **Does NOT authenticate recording endpoints.** Calling `/file/simple/web` or `/device/list` with a UT silently returns HTTP 200 + empty list.
66
+ - **Workspace Token (WT)** — minted from the UT. Required on recording endpoints. ~24h lifetime. Re-mint when expired.
62
67
 
63
- **You always need both.** UT lives long, WT is short-lived. Workflow: OTP → UT → list workspaces (with UT) → mint WT for the personal workspace (with UT) → use WT for everything recording-related.
64
-
65
- **User-Agent matters.** Plaud blocks some defaults. Always send a normal browser UA:
68
+ **User-Agent matters.** Plaud blocks some defaults. Always send:
66
69
 
67
70
  ```
68
71
  User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36
@@ -72,18 +75,17 @@ User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
72
75
 
73
76
  ## Pairing (first time)
74
77
 
75
- Walk the human through it conversationally. They don't see any UI for this — just chat with you.
76
-
77
- ### Step 1 — Ask for their Plaud email
78
+ ### Step 1 Ask for their Plaud email AND how they signed up
78
79
 
79
80
  ```
80
- Bloby: Which email do you use on plaud.ai? I'll have them send you a 6-digit code.
81
- Human: bruno@example.com
81
+ Bloby: Which email do you use on plaud.ai? And did you sign up with email+password, or "Continue with Google" / "Continue with Apple"?
82
82
  ```
83
83
 
84
- If the human mentions they signed up with **Google or Apple**, jump to the "Paste-token fallback" section OTP only works for email+password Plaud identities.
84
+ **If they signed up with Google or Apple**, skip OTP entirely and go to "Paste-token fallback". Don't try OTP first Plaud will silently create a parallel empty account at the same email, you'll mint a WT successfully, and recording endpoints will return empty. The symptom looks like "auth worked but no recordings" but it's two different identities at the same email.
85
85
 
86
- ### Step 2 Send the OTP
86
+ If unsure, run OTP and lean on the Step 8 ghost-account check below.
87
+
88
+ ### Step 2 — Send OTP
87
89
 
88
90
  ```bash
89
91
  curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
@@ -92,12 +94,12 @@ curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
92
94
  -d '{"username":"<EMAIL>"}'
93
95
  ```
94
96
 
95
- Expected `status: 0` and a `token` field. **Save the `token`** — you need it for verify. If you see `status: -302`, switch `apiBase` to `data.domains.api` and retry once.
97
+ Expected `status: 0` and a `token` field. Save the `token` for Step 4.
96
98
 
97
99
  ### Step 3 — Ask for the code
98
100
 
99
101
  ```
100
- Bloby: Check your inbox — Plaud sent you a 6-digit code. What is it?
102
+ Bloby: Check your inbox — Plaud sent a 6-digit code. What is it?
101
103
  ```
102
104
 
103
105
  ### Step 4 — Verify
@@ -109,23 +111,24 @@ curl -s -X POST '<apiBase>/auth/otp-login' \
109
111
  -d '{"code":"<6 DIGITS>","token":"<OTP TOKEN FROM STEP 2>"}'
110
112
  ```
111
113
 
112
- Expected `access_token` (a long `eyJ...` JWT). **This is the User Token (UT). Save it as `userToken`.**
114
+ Save `access_token` as `userToken` in `.plaud.json`.
113
115
 
114
- > ⚠️ **Don't be misled by `is_new_user: true`** in this response. It's an informational flag for the Plaud client — it does NOT mean Plaud just created a fresh account for you. Your real account is intact. The empty `data_devices: []` you'll see next is because UT can't read recording/device endpoints — that's the workspace-token issue, not "wrong account."
116
+ > ⚠️ `is_new_user: true` in the response is just an informational flag — it does NOT mean Plaud created a new account. Real account check happens in Step 8.
115
117
 
116
- ### Step 5 — Write initial state to `workspace/.plaud.json`
118
+ ### Step 5 — Initial state
117
119
 
118
- Use the `Write` tool. No `/api/settings` calls.
120
+ Write to `workspace/.plaud.json`:
119
121
 
120
122
  ```json
121
123
  {
122
124
  "email": "<EMAIL>",
123
- "apiBase": "<BASE THAT WORKED>",
124
- "userToken": "<UT FROM STEP 4>"
125
+ "apiBase": "<BASE>",
126
+ "userToken": "<UT>",
127
+ "authMethod": "otp"
125
128
  }
126
129
  ```
127
130
 
128
- ### Step 6 — Smoke test the UT (don't try `/device/list` yet)
131
+ ### Step 6 — Smoke test the UT
129
132
 
130
133
  ```bash
131
134
  curl -s '<BASE>/user/me' \
@@ -133,12 +136,10 @@ curl -s '<BASE>/user/me' \
133
136
  -H 'User-Agent: Mozilla/5.0 ...'
134
137
  ```
135
138
 
136
- Should return the user's profile (email matches the one you used to pair). If 401, the UT is bad — start over. If 200 but the email is different from what the human gave you, the OTP went to a different identity (Google/Apple collision) — explain and go to paste-token fallback.
139
+ Should return the user's profile. If 401, UT is bad — restart.
137
140
 
138
141
  ### Step 7 — Mint the Workspace Token (REQUIRED)
139
142
 
140
- This is the step that makes the difference between "0 recordings" and "all 3 of my recordings."
141
-
142
143
  **7a. List workspaces** (auth: UT):
143
144
 
144
145
  ```bash
@@ -147,11 +148,9 @@ curl -s '<BASE>/team-app/workspaces/list?need_personal_workspace=true' \
147
148
  -H 'User-Agent: Mozilla/5.0 ...'
148
149
  ```
149
150
 
150
- Response shape: `{ status: 0, data: { workspaces: [{ workspace_id, workspace_type, ... }] } }`.
151
+ Pick the personal workspace (`workspace_type === "0"`, or first if none). Save its `workspace_id` as `workspaceId`.
151
152
 
152
- Pick the **personal** workspace the one where `workspace_type === "0"`. If no workspace has type `"0"` (rare), use the first entry. Save its `workspace_id` as `workspaceId`.
153
-
154
- **7b. Mint a WT for that workspace** (auth: UT, body is literally `{}`):
153
+ **7b. Mint a WT** (auth: UT, body literally `{}`):
155
154
 
156
155
  ```bash
157
156
  curl -s -X POST '<BASE>/user-app/auth/workspace/token/<WORKSPACE_ID>' \
@@ -161,60 +160,61 @@ curl -s -X POST '<BASE>/user-app/auth/workspace/token/<WORKSPACE_ID>' \
161
160
  -d '{}'
162
161
  ```
163
162
 
164
- Response: `{ status: 0, data: { workspace_token: "eyJ..." } }`.
165
-
166
- **Save it** as `workspaceToken` and `workspaceTokenMintedAt: <now ISO 8601>` in `.plaud.json`. Now Update the file via `Write`.
163
+ Save `workspace_token` as `workspaceToken` and `workspaceTokenMintedAt: <now ISO 8601>` in `.plaud.json`.
167
164
 
168
- ### Step 8 — Real smoke test (with WT)
165
+ ### Step 8 — Real smoke test + ghost-account check
169
166
 
170
167
  ```bash
171
168
  curl -s '<BASE>/device/list' \
172
169
  -H 'Authorization: Bearer <WT>' \
173
170
  -H 'User-Agent: Mozilla/5.0 ...'
174
- ```
175
-
176
- Now you should see devices. Tell the human: *"Paired. Your Plaud (serial ending ...XXXX) is connected. Want me to pull in everything you've recorded so far?"*
177
171
 
178
- If `data_devices` is still empty here — odd, but possible for accounts that haven't synced any device in a while. Try the recordings list directly:
179
-
180
- ```bash
181
172
  curl -s '<BASE>/file/simple/web?skip=0&limit=10&is_trash=0' \
182
173
  -H 'Authorization: Bearer <WT>' \
183
174
  -H 'User-Agent: Mozilla/5.0 ...'
184
175
  ```
185
176
 
186
- If `data_file_list` has entries, you're good devices list can be empty even when recordings exist.
177
+ | `data_devices` | `data_file_list` | Meaning | Action |
178
+ |---|---|---|---|
179
+ | has entries | has entries | Real account paired | Continue to "Transcription — pick a path" |
180
+ | empty | has entries | Devices haven't checked in lately | Treat as success |
181
+ | **empty** | **empty** | **Google/Apple ghost-account case** | **Stop.** Tell the human, switch to paste-token (next section) |
182
+
183
+ ### Ghost-account recovery
184
+
185
+ If empty/empty:
186
+
187
+ 1. Tell the human plainly:
188
+ > *"OTP succeeded, but you have zero recordings on this Plaud account. Most likely your real Plaud account is signed in with Google or Apple, and the OTP I just ran created a separate empty account at the same email. Can you grab a token from web.plaud.ai DevTools so I can talk to the real account?"*
189
+ 2. Walk them through paste-token (next section).
190
+ 3. Once paste-token works and you see recordings, overwrite `userToken` and set `"authMethod": "paste"` in `.plaud.json` so next sync skips OTP.
187
191
 
188
192
  ---
189
193
 
190
194
  ## Paste-token fallback (Google/Apple Plaud accounts)
191
195
 
192
- If OTP just won't work and the human signed up with Google or Apple, get the bearer manually:
193
-
194
- 1. Open [web.plaud.ai](https://web.plaud.ai) in a browser and sign in with Google/Apple normally.
195
- 2. Open DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
196
+ 1. Open [web.plaud.ai](https://web.plaud.ai), sign in with Google/Apple normally.
197
+ 2. DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
196
198
  3. Click any request to `api.plaud.ai`, `api-euc1.plaud.ai`, or `api-apse1.plaud.ai`.
197
- 4. Under **Request Headers**, find `Authorization`. Copy everything after `Bearer ` (the long `eyJ...`).
198
- 5. The human pastes it to you in chat. Save it as `userToken` and set `apiBase` to whichever host they pulled it from.
199
- 6. **Still run Step 7** — mint a workspace token. The paste-token gives you a UT, same as OTP. WT is still required.
199
+ 4. Request Headers `Authorization` copy everything after `Bearer ` (long `eyJ...`).
200
+ 5. Human pastes to you. Save as `userToken`, set `apiBase` to whichever host they pulled it from, `"authMethod": "paste"`.
201
+ 6. **Still run Step 7** — paste-token gives a UT, WT must still be minted.
200
202
 
201
203
  ---
202
204
 
203
205
  ## Syncing recordings
204
206
 
205
- The shape of a sync run:
206
-
207
207
  ```
208
- GET /file/simple/web → list recent recordings (paginated) [auth: WT]
208
+ GET /file/simple/web → list [auth: WT]
209
209
  for each new one:
210
- GET /file/temp-url/<id>?is_opus=0 → get a short-lived S3 link [auth: WT]
211
- curl -o workspace/files/audio/plaud/<id>.mp3 → download (no auth, signed URL)
212
- POST /api/whisper/transcribe-file → produces <id>.mp3.txt alongside
210
+ GET /file/temp-url/<id>?is_opus=0 → signed mp3 URL [auth: WT]
211
+ curl -o workspace/files/audio/plaud/<id>.mp3 → download (signed URL, no auth)
212
+ <transcription path> → produces <id>.mp3.txt
213
213
  ```
214
214
 
215
215
  ### Pre-sync: check WT freshness
216
216
 
217
- Read `.plaud.json`. If `workspaceToken` is missing or `workspaceTokenMintedAt` is more than ~20 hours old, re-mint (Step 7b above) and update the file before starting the sync. WT lifetime is ~24h; refresh defensively.
217
+ Read `.plaud.json`. If `workspaceToken` is missing or `workspaceTokenMintedAt` is more than ~20 hours old, re-mint (Step 7b) before starting.
218
218
 
219
219
  ### List recordings (auth: WT)
220
220
 
@@ -224,28 +224,11 @@ curl -s '<BASE>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_
224
224
  -H 'User-Agent: Mozilla/5.0 ...'
225
225
  ```
226
226
 
227
- The response has `data_file_list` an array of recording objects. Fields you'll care about:
228
-
229
- | Field | Use |
230
- |---|---|
231
- | `id` | Plaud's file id. Use as the local filename. |
232
- | `filename` | Human label the user gave it (or auto-generated). Sanitise before using as a filename. |
233
- | `duration` | Seconds. |
234
- | `start_time` / `end_time` | When the recording happened. |
235
- | `version_ms` | Bumps if the user edits the recording. Track this to know when to re-download. |
236
- | `serial_number` | Which Plaud device. |
237
- | `is_trash` | Skip if 1. |
238
-
239
- Page with `skip=`; do 50 at a time. Stop when a page comes back smaller than `limit` or empty.
227
+ `data_file_list` fields you'll care about: `id`, `filename`, `duration`, `start_time`, `end_time`, `version_ms`, `serial_number`, `is_trash`. Page with `skip=`.
240
228
 
241
229
  ### Dedup
242
230
 
243
- You don't want to re-download what you already have. Two ways, pick one:
244
-
245
- - **Filesystem**: if `workspace/files/audio/plaud/<id>.mp3` exists, skip it.
246
- - **Cursor**: save the newest `version_ms` you've seen as `lastSyncVersionMs` in `.plaud.json`. Skip anything `<=` that cursor next time.
247
-
248
- If `version_ms` changed on a recording you already downloaded, the user edited the filename or trimmed it. Re-fetch and overwrite.
231
+ Either filesystem (skip if `workspace/files/audio/plaud/<id>.mp3` exists) or `lastSyncVersionMs` cursor in `.plaud.json`. If `version_ms` changed on a recording you already downloaded, the user edited the file — re-fetch and overwrite.
249
232
 
250
233
  ### Get the download URL (auth: WT)
251
234
 
@@ -255,115 +238,160 @@ curl -s '<BASE>/file/temp-url/<FILE_ID>?is_opus=0' \
255
238
  -H 'User-Agent: Mozilla/5.0 ...'
256
239
  ```
257
240
 
258
- `is_opus=0` returns mp3 in `temp_url`. `is_opus=1` returns opus in `temp_url_opus`. **Use mp3** — Whisper handles it natively, opus would need ffmpeg.
259
-
260
- The URL expires in minutes. Download immediately.
241
+ `is_opus=0` returns mp3 in `temp_url`. Use mp3 — Whisper handles it everywhere.
261
242
 
262
- ### Download (no auth — URL is signed)
243
+ ### Download (no auth — signed URL)
263
244
 
264
245
  ```bash
265
246
  mkdir -p workspace/files/audio/plaud
266
247
  curl -s -o "workspace/files/audio/plaud/<FILE_ID>.mp3" '<TEMP URL>'
267
248
  ```
268
249
 
269
- ### Transcribe (no auth — endpoint is exempt)
250
+ ---
251
+
252
+ ## Transcription — pick a path
253
+
254
+ Once the audio is on disk, you need text. **Ask the human once** which path they want, then save it as `transcriptionMode` in `.plaud.json` so you don't re-ask every sync.
255
+
256
+ ### Path A — Bloby Marketplace `audio-to-text` (easiest, pay-per-minute)
257
+
258
+ If the bloby is registered with the relay (Quick Tunnel mode → there's a token at `~/.bloby/config.json → relay.token`), just POST the file. No API key to manage, no provider account.
270
259
 
271
260
  ```bash
272
- curl -s -X POST 'http://localhost:7400/api/whisper/transcribe-file' \
273
- -H 'Content-Type: application/json' \
274
- -d '{"path":"audio/plaud/<FILE_ID>.mp3","saveTranscriptNext":true}'
261
+ TOKEN=$(jq -r '.relay.token' ~/.bloby/config.json)
262
+
263
+ curl -s -X POST 'https://api.bloby.bot/api/services/audio-to-text/use' \
264
+ -H "X-Bloby-Token: $TOKEN" \
265
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
266
+ -F "language=en" # optional
267
+ ```
268
+
269
+ Returns JSON:
270
+
271
+ ```json
272
+ {
273
+ "transcript": "...",
274
+ "language": "en",
275
+ "estimatedMinutes": 5,
276
+ "priceUsd": 0.0185,
277
+ "paidVia": "balance",
278
+ "groqDurationSec": 275.4,
279
+ "model": "whisper-large-v3-turbo"
280
+ }
275
281
  ```
276
282
 
277
- Returns `{ "transcript": "...", "transcriptPath": "audio/plaud/<FILE_ID>.mp3.txt" }`. The `.txt` file is sitting next to the audio. Read it with the `Read` tool like any other file.
283
+ - **Pricing:** $0.0037 per estimated minute, rounded up (~$0.22/hr).
284
+ - **How duration is estimated:** file size ÷ assumed 32kbps bitrate. Plaud-sourced mp3 matches this assumption well. High-bitrate files from other sources would be over-charged proportionally — for those, switch to Path B.
285
+ - **Paid from:** account balance first; falls back to MPP (Tempo USDC) or Base (use `/use-base` instead). Make sure the bloby's account has funds OR its wallet is funded on the matching network.
286
+ - **Limits:** 25MB per file. Mp3 from Plaud comfortably fits — observed 1MB ≈ 4½min.
278
287
 
279
- If Whisper fails (file >25MB is Whisper's own hard cap; rate-limit; network), leave the audio in place and skip the `.txt`. The human can ask you to split/compress later.
288
+ Write the response's `transcript` to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
280
289
 
281
- ### Pretty filenames (optional)
290
+ ### Path B — Bring your own API key (DIY)
282
291
 
283
- Tell the human you can keep raw `<id>.mp3` filenames or also create human-readable copies. If they want pretty names:
292
+ Pick a provider, ask the human for their key, store it as a workspace secret (`workspace/.env` is fine the backend reloads on .env change). Then call directly from Bash.
284
293
 
294
+ **Groq Whisper** — cheapest, fastest. Same model the marketplace uses under the hood. Free tier exists.
285
295
  ```bash
286
- NICE="$(date -d "<start_time>" +%Y-%m-%d_%H%M)_<sanitised filename>"
287
- ln -s "<FILE_ID>.mp3" "workspace/files/audio/plaud/${NICE}.mp3"
288
- ln -s "<FILE_ID>.mp3.txt" "workspace/files/audio/plaud/${NICE}.txt"
296
+ curl -s -X POST 'https://api.groq.com/openai/v1/audio/transcriptions' \
297
+ -H "Authorization: Bearer $GROQ_API_KEY" \
298
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
299
+ -F "model=whisper-large-v3-turbo" \
300
+ -F "response_format=json"
289
301
  ```
290
302
 
291
- (Sanitise `filename` by stripping `/\\:*?"<>|`.)
303
+ **OpenAI Whisper** the human may already have an OpenAI key from the Bloby wizard. Read it from the settings table directly:
304
+ ```bash
305
+ WHISPER_KEY=$(sqlite3 ~/.bloby/memory.db "SELECT value FROM settings WHERE key='whisper_key';")
306
+ curl -s -X POST 'https://api.openai.com/v1/audio/transcriptions' \
307
+ -H "Authorization: Bearer $WHISPER_KEY" \
308
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
309
+ -F "model=whisper-1"
310
+ ```
311
+
312
+ **Mistral Voxtral**:
313
+ ```bash
314
+ curl -s -X POST 'https://api.mistral.ai/v1/audio/transcriptions' \
315
+ -H "Authorization: Bearer $MISTRAL_API_KEY" \
316
+ -F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
317
+ -F "model=voxtral-mini-latest"
318
+ ```
319
+
320
+ **Local — no API, no cost, fully private:**
321
+ - [whisper.cpp](https://github.com/ggerganov/whisper.cpp) — C++ binary, CPU or Metal/CUDA. Install once, transcribe forever.
322
+ - [faster-whisper](https://github.com/SYSTRAN/faster-whisper) — Python, ~4× faster than reference whisper.
323
+ - The human installs one of these themselves. The bloby invokes the CLI from Bash.
324
+
325
+ After whichever path, extract the `text` field and write it to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
292
326
 
293
- Don't rename originals `<id>.mp3` stays canonical so dedup keeps working.
327
+ ### Choosing for the human
328
+
329
+ If they don't have a preference, recommend **Path A (Marketplace)**:
330
+ - No key setup.
331
+ - Already integrated with the bloby's payment.
332
+ - Pay-as-you-go — no monthly minimum.
333
+ - If their account has any balance from other marketplace use, it just works.
334
+
335
+ Recommend **Path B** if:
336
+ - They're transcribing a lot and want to use a free tier or flat-rate plan.
337
+ - They want 100% local for privacy reasons.
338
+ - They already have a preferred provider.
294
339
 
295
340
  ---
296
341
 
297
342
  ## Cadence — CRON or PULSE?
298
343
 
299
- **This skill installs no automatic schedule.** You and your human decide together.
344
+ **No automatic schedule installed by this skill.** The human picks.
300
345
 
301
346
  ### Pattern A — CRON every N minutes
302
347
 
303
- When the human wants near-real-time freshness, add an entry to `workspace/CRONS.json`:
348
+ Add to `workspace/CRONS.json`:
304
349
 
305
350
  ```json
306
351
  {
307
352
  "id": "plaud-sync",
308
353
  "schedule": "*/15 * * * *",
309
- "task": "Run a Plaud sync per the plaud skill: refresh WT if needed, list new recordings, download into workspace/files/audio/plaud/, and transcribe via /api/whisper/transcribe-file. If new recordings were found, summarise to the human in chat. If nothing new, stay silent.",
354
+ "task": "Run a Plaud sync per the plaud skill: refresh WT if needed, list new recordings, download into workspace/files/audio/plaud/, and transcribe via the configured transcriptionMode in .plaud.json. If new recordings were found, summarise to the human in chat. If nothing new, stay silent.",
310
355
  "enabled": true,
311
356
  "oneShot": false
312
357
  }
313
358
  ```
314
359
 
315
- Tune `*/15` to taste. `*/5` for aggressive, `0 * * * *` for quiet.
316
-
317
360
  ### Pattern B — PULSE memo
318
361
 
319
- When the human prefers their bloby just *check* during normal pulse wake-ups, add one line to `MYSELF.md` or `MEMORY.md`:
362
+ Add one line to `MYSELF.md` or `MEMORY.md`:
320
363
 
321
364
  ```
322
- - Each pulse, briefly check Plaud for new recordings via the plaud skill. If there's something new, transcribe and decide whether to surface it. If nothing new, move on silently.
365
+ - Each pulse, briefly check Plaud for new recordings via the plaud skill. Transcribe with whatever transcriptionMode is set in workspace/.plaud.json. If new, decide whether to surface. If nothing new, move on silently.
323
366
  ```
324
367
 
325
- Pulse runs every 30 min by default.
326
-
327
- ### Or: don't auto-sync at all
368
+ ### Or: manual only
328
369
 
329
- Manual only. Keep the skill installed, no CRON, no pulse memo, sync when asked.
370
+ No CRON, no pulse memo. Sync when asked.
330
371
 
331
- **Default to Pattern B for new installs unless the human says otherwise.**
372
+ **Default to Pattern B for new installs** unless the human says otherwise.
332
373
 
333
374
  ---
334
375
 
335
376
  ## Re-auth (401 handling)
336
377
 
337
- Two different 401s, two different fixes.
338
-
339
378
  | Endpoint that 401'd | What expired | Fix |
340
379
  |---|---|---|
341
- | `/file/simple/web`, `/file/temp-url/*`, `/device/list` (auth: WT) | Workspace token expired | Re-mint a WT from the cached UT (Step 7b). Don't bother the human. |
342
- | `/user-app/auth/workspace/token/...`, `/team-app/workspaces/list`, `/user/me` (auth: UT) | User token expired | Tell the human, re-run OTP from Step 1. |
380
+ | `/file/simple/web`, `/file/temp-url/*`, `/device/list` (WT) | Workspace token | Re-mint a WT from cached UT (Step 7b). Silent — don't bother the human. |
381
+ | `/user-app/auth/workspace/token/...`, `/team-app/workspaces/list`, `/user/me` (UT) | User token | Tell the human. If `authMethod === "otp"`, re-OTP. If `"paste"`, walk them through DevTools again. |
382
+ | `POST /api/services/audio-to-text/use` (relay) | Marketplace account empty / wallet unfunded | Tell the human. Suggest topping up or switching to Path B. |
343
383
 
344
- If you can't tell which token expired (e.g. you tried to mint a WT and got 401), assume UT is dead → re-OTP.
384
+ If you can't tell which token expired, assume UT is dead → re-auth.
345
385
 
346
386
  ---
347
387
 
348
388
  ## Disconnect
349
389
 
350
- Delete the state file:
351
-
352
390
  ```bash
353
391
  rm -f workspace/.plaud.json
354
392
  ```
355
393
 
356
- Recordings already on disk stay. The human can also disable the CRON entry / remove it from `CRONS.json`.
357
-
358
- ---
359
-
360
- ## What This Skill Does NOT Do
361
-
362
- - **No Plaud transcription.** We transcribe ourselves with Whisper. Plaud's own AI subscription is bypassed entirely.
363
- - **No dashboard.** OpenPlaud has a slick UI for browsing recordings. We don't. The bloby's job is to *read* the transcripts and act on them — summaries, action items, emails — using the normal workspace tools. If the human wants a UI, build one into `workspace/client/` as a normal workspace app.
364
- - **No push from Plaud.** No webhooks exist. You only know about new recordings when you ask.
365
- - **No editing recordings.** The Plaud API technically supports `PATCH /file/<id>` to rename. We don't expose it — keep canonical `<id>.mp3` names.
366
- - **No real-time streaming.** Plaud syncs to its cloud *after* the recording finishes. Expect seconds-to-minutes of lag between "user stopped recording" and "file appears in `/file/simple/web`."
394
+ Recordings on disk stay. Disable the CRON entry / remove from `CRONS.json` separately.
367
395
 
368
396
  ---
369
397
 
@@ -378,16 +406,25 @@ Recordings already on disk stay. The human can also disable the CRON entry / rem
378
406
  | Mint WT | `POST <base>/user-app/auth/workspace/token/<workspaceId>` body `{}` | UT |
379
407
  | List devices | `GET <base>/device/list` | **WT** |
380
408
  | List recordings | `GET <base>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true` | **WT** |
381
- | Get download URL | `GET <base>/file/temp-url/<id>?is_opus=0` | **WT** |
409
+ | Download URL | `GET <base>/file/temp-url/<id>?is_opus=0` | **WT** |
382
410
  | Download audio | `GET <temp_url>` | none (signed) |
383
- | Transcribe local file | `POST http://localhost:7400/api/whisper/transcribe-file` body `{path, saveTranscriptNext}` | none (exempt) |
411
+ | Transcribe (marketplace) | `POST https://api.bloby.bot/api/services/audio-to-text/use` multipart `file=@...` | `X-Bloby-Token: $relay_token` |
412
+ | Transcribe (Groq) | `POST https://api.groq.com/openai/v1/audio/transcriptions` multipart | Bearer GROQ_API_KEY |
413
+ | Transcribe (OpenAI) | `POST https://api.openai.com/v1/audio/transcriptions` multipart | Bearer OPENAI_API_KEY |
414
+
415
+ State file: `workspace/.plaud.json`. Plaud requests need a browser-style `User-Agent`.
384
416
 
385
- State file: `workspace/.plaud.json` — read/write with `Read` / `Write`. **No `/api/settings` calls** — that endpoint requires a portal Bearer token the skill can't easily produce.
417
+ ---
418
+
419
+ ## What This Skill Does NOT Do
386
420
 
387
- All Plaud requests need a browser-style `User-Agent`.
421
+ - **No automatic schedule.** The human + bloby pick CRON vs PULSE vs manual.
422
+ - **No dashboard.** OpenPlaud has a UI; we don't. The bloby's job is to *read* transcripts and act on them via normal workspace tools. If the human wants a UI, build one into `workspace/client/`.
423
+ - **No push from Plaud.** No webhooks exist; you only know about new recordings when you ask.
424
+ - **No real-time streaming.** Plaud syncs *after* the recording finishes. Lag is seconds-to-minutes between "user stopped recording" and "file appears in `/file/simple/web`."
388
425
 
389
426
  ---
390
427
 
391
428
  ## Credit
392
429
 
393
- Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work, including the painful workspace-token discovery (their issue #66). This skill reimplements just the parts a bloby needs.
430
+ Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work, including the painful workspace-token discovery (their issue #66) and the Google/Apple identity gotcha (issue #65). This skill reimplements just the parts a bloby needs, and routes transcription either through Bloby's marketplace or a provider of the human's choice.
@@ -5,11 +5,11 @@
5
5
  "bloby_human": "Bruno Bertapeli",
6
6
  "bloby": "bloby-bruno",
7
7
  "author": "newbot-official",
8
- "description": "Plaud Note integration. Pairs the user's Plaud account via email OTP, polls Plaud's cloud for new recordings, downloads the audio into workspace/files/audio/plaud/, and transcribes it via the user's Whisper key. Cadence (CRON vs PULSE memo) is chosen by the human and their bloby together.",
8
+ "description": "Plaud Note integration. Pairs the user's Plaud account (email OTP or paste-token for Google/Apple identities), pulls recordings into workspace/files/audio/plaud/, and routes transcription through either the Bloby Marketplace audio-to-text service (pay-per-minute) or the human's own provider (Groq / OpenAI Whisper / Mistral Voxtral / local).",
9
9
  "depends": [],
10
10
  "env_keys": [],
11
11
  "has_telemetry": false,
12
- "size": "8KB",
12
+ "size": "12KB",
13
13
  "contains_binaries": false,
14
- "tags": ["plaud", "transcription", "audio", "recorder", "meeting"]
14
+ "tags": ["plaud", "transcription", "audio", "recorder", "meeting", "groq", "whisper"]
15
15
  }