bloby-bot 0.50.2 → 0.50.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/supervisor/index.ts +0 -1
- package/worker/index.ts +0 -113
- package/workspace/skills/plaud/SKILL.md +173 -136
- package/workspace/skills/plaud/skill.json +3 -3
package/package.json
CHANGED
package/supervisor/index.ts
CHANGED
|
@@ -392,7 +392,6 @@ export async function startSupervisor() {
|
|
|
392
392
|
'POST /api/channels/whatsapp/react',
|
|
393
393
|
'POST /api/channels/send',
|
|
394
394
|
'POST /api/channels/alexa/handle',
|
|
395
|
-
'POST /api/whisper/transcribe-file',
|
|
396
395
|
];
|
|
397
396
|
|
|
398
397
|
function isExemptRoute(method: string, url: string): boolean {
|
package/worker/index.ts
CHANGED
|
@@ -1002,119 +1002,6 @@ app.post('/api/whisper/transcribe', express.json({ limit: '10mb' }), async (req,
|
|
|
1002
1002
|
}
|
|
1003
1003
|
});
|
|
1004
1004
|
|
|
1005
|
-
// Transcribe an audio file already on disk under workspace/files/.
|
|
1006
|
-
// Body: { path, saveTranscriptNext?, language? }. `path` is interpreted
|
|
1007
|
-
// relative to workspace/files/ ("files/" prefix is tolerated).
|
|
1008
|
-
app.post('/api/whisper/transcribe-file', express.json({ limit: '1mb' }), async (req, res) => {
|
|
1009
|
-
const whisperEnabled = getSetting('whisper_enabled');
|
|
1010
|
-
const whisperKey = getSetting('whisper_key');
|
|
1011
|
-
|
|
1012
|
-
if (whisperEnabled !== 'true' || !whisperKey) {
|
|
1013
|
-
res.status(400).json({ error: 'Whisper not enabled or API key missing' });
|
|
1014
|
-
return;
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
const { path: relPath, saveTranscriptNext, language } = req.body as {
|
|
1018
|
-
path?: string;
|
|
1019
|
-
saveTranscriptNext?: boolean;
|
|
1020
|
-
language?: string;
|
|
1021
|
-
};
|
|
1022
|
-
|
|
1023
|
-
if (!relPath || typeof relPath !== 'string') {
|
|
1024
|
-
res.status(400).json({ error: 'Missing path' });
|
|
1025
|
-
return;
|
|
1026
|
-
}
|
|
1027
|
-
|
|
1028
|
-
const normalized = relPath.replace(/^\/+/, '').replace(/^files\//, '');
|
|
1029
|
-
const absPath = path.resolve(paths.files, normalized);
|
|
1030
|
-
if (absPath !== paths.files && !absPath.startsWith(paths.files + path.sep)) {
|
|
1031
|
-
res.status(400).json({ error: 'Path escapes workspace/files/' });
|
|
1032
|
-
return;
|
|
1033
|
-
}
|
|
1034
|
-
if (!fs.existsSync(absPath) || !fs.statSync(absPath).isFile()) {
|
|
1035
|
-
res.status(404).json({ error: 'File not found' });
|
|
1036
|
-
return;
|
|
1037
|
-
}
|
|
1038
|
-
|
|
1039
|
-
try {
|
|
1040
|
-
const audioBuffer = fs.readFileSync(absPath);
|
|
1041
|
-
const filename = path.basename(absPath);
|
|
1042
|
-
const ext = path.extname(filename).toLowerCase().slice(1);
|
|
1043
|
-
const contentTypes: Record<string, string> = {
|
|
1044
|
-
mp3: 'audio/mpeg',
|
|
1045
|
-
m4a: 'audio/mp4',
|
|
1046
|
-
mp4: 'audio/mp4',
|
|
1047
|
-
wav: 'audio/wav',
|
|
1048
|
-
webm: 'audio/webm',
|
|
1049
|
-
ogg: 'audio/ogg',
|
|
1050
|
-
opus: 'audio/ogg',
|
|
1051
|
-
flac: 'audio/flac',
|
|
1052
|
-
};
|
|
1053
|
-
const contentType = contentTypes[ext] || 'application/octet-stream';
|
|
1054
|
-
|
|
1055
|
-
const boundary = '----WhisperBoundary' + Date.now();
|
|
1056
|
-
const CRLF = '\r\n';
|
|
1057
|
-
const parts: Buffer[] = [];
|
|
1058
|
-
|
|
1059
|
-
parts.push(Buffer.from(
|
|
1060
|
-
`--${boundary}${CRLF}` +
|
|
1061
|
-
`Content-Disposition: form-data; name="file"; filename="${filename}"${CRLF}` +
|
|
1062
|
-
`Content-Type: ${contentType}${CRLF}${CRLF}`
|
|
1063
|
-
));
|
|
1064
|
-
parts.push(audioBuffer);
|
|
1065
|
-
parts.push(Buffer.from(CRLF));
|
|
1066
|
-
|
|
1067
|
-
parts.push(Buffer.from(
|
|
1068
|
-
`--${boundary}${CRLF}` +
|
|
1069
|
-
`Content-Disposition: form-data; name="model"${CRLF}${CRLF}` +
|
|
1070
|
-
`whisper-1${CRLF}`
|
|
1071
|
-
));
|
|
1072
|
-
|
|
1073
|
-
if (language && typeof language === 'string') {
|
|
1074
|
-
parts.push(Buffer.from(
|
|
1075
|
-
`--${boundary}${CRLF}` +
|
|
1076
|
-
`Content-Disposition: form-data; name="language"${CRLF}${CRLF}` +
|
|
1077
|
-
`${language}${CRLF}`
|
|
1078
|
-
));
|
|
1079
|
-
}
|
|
1080
|
-
|
|
1081
|
-
parts.push(Buffer.from(`--${boundary}--${CRLF}`));
|
|
1082
|
-
|
|
1083
|
-
const body = Buffer.concat(parts);
|
|
1084
|
-
|
|
1085
|
-
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
|
1086
|
-
method: 'POST',
|
|
1087
|
-
headers: {
|
|
1088
|
-
'Authorization': `Bearer ${whisperKey}`,
|
|
1089
|
-
'Content-Type': `multipart/form-data; boundary=${boundary}`,
|
|
1090
|
-
},
|
|
1091
|
-
body,
|
|
1092
|
-
});
|
|
1093
|
-
|
|
1094
|
-
if (!response.ok) {
|
|
1095
|
-
const errText = await response.text();
|
|
1096
|
-
log.warn(`Whisper API error: ${response.status} ${errText}`);
|
|
1097
|
-
res.status(502).json({ error: 'Whisper API error', detail: errText.slice(0, 500) });
|
|
1098
|
-
return;
|
|
1099
|
-
}
|
|
1100
|
-
|
|
1101
|
-
const result = await response.json() as { text: string };
|
|
1102
|
-
const transcript = result.text;
|
|
1103
|
-
|
|
1104
|
-
let transcriptPath: string | undefined;
|
|
1105
|
-
if (saveTranscriptNext) {
|
|
1106
|
-
const txtAbs = absPath + '.txt';
|
|
1107
|
-
fs.writeFileSync(txtAbs, transcript, 'utf8');
|
|
1108
|
-
transcriptPath = path.relative(paths.files, txtAbs).split(path.sep).join('/');
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
res.json({ transcript, ...(transcriptPath ? { transcriptPath } : {}) });
|
|
1112
|
-
} catch (err: any) {
|
|
1113
|
-
log.warn(`Whisper transcribe-file failed: ${err.message}`);
|
|
1114
|
-
res.status(500).json({ error: 'Transcription failed' });
|
|
1115
|
-
}
|
|
1116
|
-
});
|
|
1117
|
-
|
|
1118
1005
|
// Serve stored files (audio, images, documents)
|
|
1119
1006
|
app.use('/api/files', express.static(paths.files));
|
|
1120
1007
|
|
|
@@ -4,24 +4,27 @@
|
|
|
4
4
|
|
|
5
5
|
A channel for getting **recordings off the user's Plaud Note device** and into your workspace as `(audio file, transcript)` pairs you can read and act on.
|
|
6
6
|
|
|
7
|
-
Plaud is a tiny voice recorder. When the user records something — a meeting, a lecture, a thought on a walk — the device syncs to Plaud's cloud over Bluetooth/Wi-Fi. **You don't talk to the device.** You talk to Plaud's cloud, pull the audio, and transcribe it
|
|
7
|
+
Plaud is a tiny voice recorder. When the user records something — a meeting, a lecture, a thought on a walk — the device syncs to Plaud's cloud over Bluetooth/Wi-Fi. **You don't talk to the device.** You talk to Plaud's cloud, pull the audio, and transcribe it — either via the Bloby Marketplace service or your own provider.
|
|
8
8
|
|
|
9
9
|
There is **no Plaud CLI, no Plaud webhook, no official Plaud API.** Plaud's mobile/web app uses an undocumented HTTP API. This skill uses the same one — same shape OpenPlaud uses (`https://github.com/openplaud/openplaud`).
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Two parts to this skill
|
|
14
|
+
|
|
15
|
+
1. **Pulling audio from Plaud** — same for everyone. OTP / paste-token, list, download.
|
|
16
|
+
2. **Transcribing the audio** — you have a choice (see "Transcription — pick a path" below).
|
|
12
17
|
|
|
13
18
|
---
|
|
14
19
|
|
|
15
|
-
## What Bloby Gives You (
|
|
20
|
+
## What Bloby Gives You (plumbing)
|
|
16
21
|
|
|
17
22
|
| Thing | Where | How you use it |
|
|
18
23
|
|---|---|---|
|
|
19
|
-
| Whisper-on-disk endpoint | `POST http://localhost:7400/api/whisper/transcribe-file` | Send a path under `workspace/files/`, get a transcript back. Optional `saveTranscriptNext: true` writes `foo.mp3.txt` next to `foo.mp3`. Auth-exempt, no Bearer needed. |
|
|
20
24
|
| Workspace files dir | `workspace/files/audio/plaud/` | Drop downloaded audio here. Supervisor serves it at `/api/files/audio/plaud/<name>`. |
|
|
21
|
-
| Workspace file tools | `Read` / `Write` / `Edit` | Store Plaud auth state in `workspace/.plaud.json
|
|
25
|
+
| Workspace file tools | `Read` / `Write` / `Edit` | Store Plaud auth state in `workspace/.plaud.json`. Save transcripts as `<id>.mp3.txt` next to the audio. |
|
|
22
26
|
| Scheduling | `workspace/CRONS.json` or `workspace/PULSE.json` | Run sync periodically. See "Cadence" below. |
|
|
23
|
-
|
|
24
|
-
Use `http://localhost:7400` from Bash for the Whisper endpoint. Everything else is the open internet (Plaud's API) or your own filesystem.
|
|
27
|
+
| Relay token | `~/.bloby/config.json` → `relay.token` | Use as `X-Bloby-Token` header when calling marketplace services. |
|
|
25
28
|
|
|
26
29
|
### State file: `workspace/.plaud.json`
|
|
27
30
|
|
|
@@ -29,17 +32,19 @@ You manage all Plaud connection state in a single JSON file at workspace root. R
|
|
|
29
32
|
|
|
30
33
|
```json
|
|
31
34
|
{
|
|
32
|
-
"email": "bruno@
|
|
35
|
+
"email": "bruno@example.com",
|
|
33
36
|
"apiBase": "https://api.plaud.ai",
|
|
34
37
|
"userToken": "eyJ...",
|
|
35
38
|
"workspaceId": "ws_xxxxx",
|
|
36
39
|
"workspaceToken": "eyJ...",
|
|
37
40
|
"workspaceTokenMintedAt": "2026-05-22T19:30:00.000Z",
|
|
38
|
-
"
|
|
41
|
+
"authMethod": "otp",
|
|
42
|
+
"lastSyncVersionMs": 0,
|
|
43
|
+
"transcriptionMode": "marketplace"
|
|
39
44
|
}
|
|
40
45
|
```
|
|
41
46
|
|
|
42
|
-
Initialize empty (`{}`) if the file doesn't exist.
|
|
47
|
+
`transcriptionMode` is your record of which transcription path the human picked. One of: `"marketplace"`, `"groq"`, `"openai"`, `"mistral"`, `"local"`, or whatever they configured. Initialize empty (`{}`) if the file doesn't exist.
|
|
43
48
|
|
|
44
49
|
---
|
|
45
50
|
|
|
@@ -53,16 +58,14 @@ Three regions. Pick one when pairing. A token from one region won't work on anot
|
|
|
53
58
|
| EU | `https://api-euc1.plaud.ai` |
|
|
54
59
|
| Asia-Pacific | `https://api-apse1.plaud.ai` |
|
|
55
60
|
|
|
56
|
-
If
|
|
61
|
+
If `POST /auth/otp-send-code` returns `status: -302` with `data.domains.api`, retry against that base. Save whichever base actually succeeded.
|
|
57
62
|
|
|
58
|
-
**Two token kinds
|
|
63
|
+
**Two token kinds — the part that bites everyone:**
|
|
59
64
|
|
|
60
|
-
- **User Token (UT)** — what `/auth/otp-login` returns. Authenticates `/user/me`,
|
|
61
|
-
- **Workspace Token (WT)** — minted from the UT. Required on
|
|
65
|
+
- **User Token (UT)** — what `/auth/otp-login` returns. Authenticates `/user/me`, workspace-list, workspace-token mint. **Does NOT authenticate recording endpoints.** Calling `/file/simple/web` or `/device/list` with a UT silently returns HTTP 200 + empty list.
|
|
66
|
+
- **Workspace Token (WT)** — minted from the UT. Required on recording endpoints. ~24h lifetime. Re-mint when expired.
|
|
62
67
|
|
|
63
|
-
**
|
|
64
|
-
|
|
65
|
-
**User-Agent matters.** Plaud blocks some defaults. Always send a normal browser UA:
|
|
68
|
+
**User-Agent matters.** Plaud blocks some defaults. Always send:
|
|
66
69
|
|
|
67
70
|
```
|
|
68
71
|
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36
|
|
@@ -72,18 +75,17 @@ User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,
|
|
|
72
75
|
|
|
73
76
|
## Pairing (first time)
|
|
74
77
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
### Step 1 — Ask for their Plaud email
|
|
78
|
+
### Step 1 — Ask for their Plaud email AND how they signed up
|
|
78
79
|
|
|
79
80
|
```
|
|
80
|
-
Bloby: Which email do you use on plaud.ai?
|
|
81
|
-
Human: bruno@example.com
|
|
81
|
+
Bloby: Which email do you use on plaud.ai? And did you sign up with email+password, or "Continue with Google" / "Continue with Apple"?
|
|
82
82
|
```
|
|
83
83
|
|
|
84
|
-
If
|
|
84
|
+
**If they signed up with Google or Apple**, skip OTP entirely and go to "Paste-token fallback". Don't try OTP first — Plaud will silently create a parallel empty account at the same email, you'll mint a WT successfully, and recording endpoints will return empty. The symptom looks like "auth worked but no recordings" but it's two different identities at the same email.
|
|
85
85
|
|
|
86
|
-
|
|
86
|
+
If unsure, run OTP and lean on the Step 8 ghost-account check below.
|
|
87
|
+
|
|
88
|
+
### Step 2 — Send OTP
|
|
87
89
|
|
|
88
90
|
```bash
|
|
89
91
|
curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
|
|
@@ -92,12 +94,12 @@ curl -s -X POST 'https://api.plaud.ai/auth/otp-send-code' \
|
|
|
92
94
|
-d '{"username":"<EMAIL>"}'
|
|
93
95
|
```
|
|
94
96
|
|
|
95
|
-
Expected `status: 0` and a `token` field.
|
|
97
|
+
Expected `status: 0` and a `token` field. Save the `token` for Step 4.
|
|
96
98
|
|
|
97
99
|
### Step 3 — Ask for the code
|
|
98
100
|
|
|
99
101
|
```
|
|
100
|
-
Bloby: Check your inbox — Plaud sent
|
|
102
|
+
Bloby: Check your inbox — Plaud sent a 6-digit code. What is it?
|
|
101
103
|
```
|
|
102
104
|
|
|
103
105
|
### Step 4 — Verify
|
|
@@ -109,23 +111,24 @@ curl -s -X POST '<apiBase>/auth/otp-login' \
|
|
|
109
111
|
-d '{"code":"<6 DIGITS>","token":"<OTP TOKEN FROM STEP 2>"}'
|
|
110
112
|
```
|
|
111
113
|
|
|
112
|
-
|
|
114
|
+
Save `access_token` as `userToken` in `.plaud.json`.
|
|
113
115
|
|
|
114
|
-
> ⚠️
|
|
116
|
+
> ⚠️ `is_new_user: true` in the response is just an informational flag — it does NOT mean Plaud created a new account. Real account check happens in Step 8.
|
|
115
117
|
|
|
116
|
-
### Step 5 —
|
|
118
|
+
### Step 5 — Initial state
|
|
117
119
|
|
|
118
|
-
|
|
120
|
+
Write to `workspace/.plaud.json`:
|
|
119
121
|
|
|
120
122
|
```json
|
|
121
123
|
{
|
|
122
124
|
"email": "<EMAIL>",
|
|
123
|
-
"apiBase": "<BASE
|
|
124
|
-
"userToken": "<UT
|
|
125
|
+
"apiBase": "<BASE>",
|
|
126
|
+
"userToken": "<UT>",
|
|
127
|
+
"authMethod": "otp"
|
|
125
128
|
}
|
|
126
129
|
```
|
|
127
130
|
|
|
128
|
-
### Step 6 — Smoke test the UT
|
|
131
|
+
### Step 6 — Smoke test the UT
|
|
129
132
|
|
|
130
133
|
```bash
|
|
131
134
|
curl -s '<BASE>/user/me' \
|
|
@@ -133,12 +136,10 @@ curl -s '<BASE>/user/me' \
|
|
|
133
136
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
134
137
|
```
|
|
135
138
|
|
|
136
|
-
Should return the user's profile
|
|
139
|
+
Should return the user's profile. If 401, UT is bad — restart.
|
|
137
140
|
|
|
138
141
|
### Step 7 — Mint the Workspace Token (REQUIRED)
|
|
139
142
|
|
|
140
|
-
This is the step that makes the difference between "0 recordings" and "all 3 of my recordings."
|
|
141
|
-
|
|
142
143
|
**7a. List workspaces** (auth: UT):
|
|
143
144
|
|
|
144
145
|
```bash
|
|
@@ -147,11 +148,9 @@ curl -s '<BASE>/team-app/workspaces/list?need_personal_workspace=true' \
|
|
|
147
148
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
148
149
|
```
|
|
149
150
|
|
|
150
|
-
|
|
151
|
+
Pick the personal workspace (`workspace_type === "0"`, or first if none). Save its `workspace_id` as `workspaceId`.
|
|
151
152
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
**7b. Mint a WT for that workspace** (auth: UT, body is literally `{}`):
|
|
153
|
+
**7b. Mint a WT** (auth: UT, body literally `{}`):
|
|
155
154
|
|
|
156
155
|
```bash
|
|
157
156
|
curl -s -X POST '<BASE>/user-app/auth/workspace/token/<WORKSPACE_ID>' \
|
|
@@ -161,60 +160,61 @@ curl -s -X POST '<BASE>/user-app/auth/workspace/token/<WORKSPACE_ID>' \
|
|
|
161
160
|
-d '{}'
|
|
162
161
|
```
|
|
163
162
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
**Save it** as `workspaceToken` and `workspaceTokenMintedAt: <now ISO 8601>` in `.plaud.json`. Now Update the file via `Write`.
|
|
163
|
+
Save `workspace_token` as `workspaceToken` and `workspaceTokenMintedAt: <now ISO 8601>` in `.plaud.json`.
|
|
167
164
|
|
|
168
|
-
### Step 8 — Real smoke test
|
|
165
|
+
### Step 8 — Real smoke test + ghost-account check
|
|
169
166
|
|
|
170
167
|
```bash
|
|
171
168
|
curl -s '<BASE>/device/list' \
|
|
172
169
|
-H 'Authorization: Bearer <WT>' \
|
|
173
170
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
174
|
-
```
|
|
175
|
-
|
|
176
|
-
Now you should see devices. Tell the human: *"Paired. Your Plaud (serial ending ...XXXX) is connected. Want me to pull in everything you've recorded so far?"*
|
|
177
171
|
|
|
178
|
-
If `data_devices` is still empty here — odd, but possible for accounts that haven't synced any device in a while. Try the recordings list directly:
|
|
179
|
-
|
|
180
|
-
```bash
|
|
181
172
|
curl -s '<BASE>/file/simple/web?skip=0&limit=10&is_trash=0' \
|
|
182
173
|
-H 'Authorization: Bearer <WT>' \
|
|
183
174
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
184
175
|
```
|
|
185
176
|
|
|
186
|
-
|
|
177
|
+
| `data_devices` | `data_file_list` | Meaning | Action |
|
|
178
|
+
|---|---|---|---|
|
|
179
|
+
| has entries | has entries | Real account paired | Continue to "Transcription — pick a path" |
|
|
180
|
+
| empty | has entries | Devices haven't checked in lately | Treat as success |
|
|
181
|
+
| **empty** | **empty** | **Google/Apple ghost-account case** | **Stop.** Tell the human, switch to paste-token (next section) |
|
|
182
|
+
|
|
183
|
+
### Ghost-account recovery
|
|
184
|
+
|
|
185
|
+
If empty/empty:
|
|
186
|
+
|
|
187
|
+
1. Tell the human plainly:
|
|
188
|
+
> *"OTP succeeded, but you have zero recordings on this Plaud account. Most likely your real Plaud account is signed in with Google or Apple, and the OTP I just ran created a separate empty account at the same email. Can you grab a token from web.plaud.ai DevTools so I can talk to the real account?"*
|
|
189
|
+
2. Walk them through paste-token (next section).
|
|
190
|
+
3. Once paste-token works and you see recordings, overwrite `userToken` and set `"authMethod": "paste"` in `.plaud.json` so next sync skips OTP.
|
|
187
191
|
|
|
188
192
|
---
|
|
189
193
|
|
|
190
194
|
## Paste-token fallback (Google/Apple Plaud accounts)
|
|
191
195
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
1. Open [web.plaud.ai](https://web.plaud.ai) in a browser and sign in with Google/Apple normally.
|
|
195
|
-
2. Open DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
|
|
196
|
+
1. Open [web.plaud.ai](https://web.plaud.ai), sign in with Google/Apple normally.
|
|
197
|
+
2. DevTools (F12 or Cmd+Option+I) → Network tab → refresh.
|
|
196
198
|
3. Click any request to `api.plaud.ai`, `api-euc1.plaud.ai`, or `api-apse1.plaud.ai`.
|
|
197
|
-
4.
|
|
198
|
-
5.
|
|
199
|
-
6. **Still run Step 7** —
|
|
199
|
+
4. Request Headers → `Authorization` → copy everything after `Bearer ` (long `eyJ...`).
|
|
200
|
+
5. Human pastes to you. Save as `userToken`, set `apiBase` to whichever host they pulled it from, `"authMethod": "paste"`.
|
|
201
|
+
6. **Still run Step 7** — paste-token gives a UT, WT must still be minted.
|
|
200
202
|
|
|
201
203
|
---
|
|
202
204
|
|
|
203
205
|
## Syncing recordings
|
|
204
206
|
|
|
205
|
-
The shape of a sync run:
|
|
206
|
-
|
|
207
207
|
```
|
|
208
|
-
GET /file/simple/web → list
|
|
208
|
+
GET /file/simple/web → list [auth: WT]
|
|
209
209
|
for each new one:
|
|
210
|
-
GET /file/temp-url/<id>?is_opus=0 →
|
|
211
|
-
curl -o workspace/files/audio/plaud/<id>.mp3 → download (
|
|
212
|
-
|
|
210
|
+
GET /file/temp-url/<id>?is_opus=0 → signed mp3 URL [auth: WT]
|
|
211
|
+
curl -o workspace/files/audio/plaud/<id>.mp3 → download (signed URL, no auth)
|
|
212
|
+
<transcription path> → produces <id>.mp3.txt
|
|
213
213
|
```
|
|
214
214
|
|
|
215
215
|
### Pre-sync: check WT freshness
|
|
216
216
|
|
|
217
|
-
Read `.plaud.json`. If `workspaceToken` is missing or `workspaceTokenMintedAt` is more than ~20 hours old, re-mint (Step 7b
|
|
217
|
+
Read `.plaud.json`. If `workspaceToken` is missing or `workspaceTokenMintedAt` is more than ~20 hours old, re-mint (Step 7b) before starting.
|
|
218
218
|
|
|
219
219
|
### List recordings (auth: WT)
|
|
220
220
|
|
|
@@ -224,28 +224,11 @@ curl -s '<BASE>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_
|
|
|
224
224
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
225
225
|
```
|
|
226
226
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
| Field | Use |
|
|
230
|
-
|---|---|
|
|
231
|
-
| `id` | Plaud's file id. Use as the local filename. |
|
|
232
|
-
| `filename` | Human label the user gave it (or auto-generated). Sanitise before using as a filename. |
|
|
233
|
-
| `duration` | Seconds. |
|
|
234
|
-
| `start_time` / `end_time` | When the recording happened. |
|
|
235
|
-
| `version_ms` | Bumps if the user edits the recording. Track this to know when to re-download. |
|
|
236
|
-
| `serial_number` | Which Plaud device. |
|
|
237
|
-
| `is_trash` | Skip if 1. |
|
|
238
|
-
|
|
239
|
-
Page with `skip=`; do 50 at a time. Stop when a page comes back smaller than `limit` or empty.
|
|
227
|
+
`data_file_list` fields you'll care about: `id`, `filename`, `duration`, `start_time`, `end_time`, `version_ms`, `serial_number`, `is_trash`. Page with `skip=`.
|
|
240
228
|
|
|
241
229
|
### Dedup
|
|
242
230
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
- **Filesystem**: if `workspace/files/audio/plaud/<id>.mp3` exists, skip it.
|
|
246
|
-
- **Cursor**: save the newest `version_ms` you've seen as `lastSyncVersionMs` in `.plaud.json`. Skip anything `<=` that cursor next time.
|
|
247
|
-
|
|
248
|
-
If `version_ms` changed on a recording you already downloaded, the user edited the filename or trimmed it. Re-fetch and overwrite.
|
|
231
|
+
Either filesystem (skip if `workspace/files/audio/plaud/<id>.mp3` exists) or `lastSyncVersionMs` cursor in `.plaud.json`. If `version_ms` changed on a recording you already downloaded, the user edited the file — re-fetch and overwrite.
|
|
249
232
|
|
|
250
233
|
### Get the download URL (auth: WT)
|
|
251
234
|
|
|
@@ -255,115 +238,160 @@ curl -s '<BASE>/file/temp-url/<FILE_ID>?is_opus=0' \
|
|
|
255
238
|
-H 'User-Agent: Mozilla/5.0 ...'
|
|
256
239
|
```
|
|
257
240
|
|
|
258
|
-
`is_opus=0` returns mp3 in `temp_url`.
|
|
259
|
-
|
|
260
|
-
The URL expires in minutes. Download immediately.
|
|
241
|
+
`is_opus=0` returns mp3 in `temp_url`. Use mp3 — Whisper handles it everywhere.
|
|
261
242
|
|
|
262
|
-
### Download (no auth — URL
|
|
243
|
+
### Download (no auth — signed URL)
|
|
263
244
|
|
|
264
245
|
```bash
|
|
265
246
|
mkdir -p workspace/files/audio/plaud
|
|
266
247
|
curl -s -o "workspace/files/audio/plaud/<FILE_ID>.mp3" '<TEMP URL>'
|
|
267
248
|
```
|
|
268
249
|
|
|
269
|
-
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Transcription — pick a path
|
|
253
|
+
|
|
254
|
+
Once the audio is on disk, you need text. **Ask the human once** which path they want, then save it as `transcriptionMode` in `.plaud.json` so you don't re-ask every sync.
|
|
255
|
+
|
|
256
|
+
### Path A — Bloby Marketplace `audio-to-text` (easiest, pay-per-minute)
|
|
257
|
+
|
|
258
|
+
If the bloby is registered with the relay (Quick Tunnel mode → there's a token at `~/.bloby/config.json → relay.token`), just POST the file. No API key to manage, no provider account.
|
|
270
259
|
|
|
271
260
|
```bash
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
261
|
+
TOKEN=$(jq -r '.relay.token' ~/.bloby/config.json)
|
|
262
|
+
|
|
263
|
+
curl -s -X POST 'https://api.bloby.bot/api/services/audio-to-text/use' \
|
|
264
|
+
-H "X-Bloby-Token: $TOKEN" \
|
|
265
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
266
|
+
-F "language=en" # optional
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Returns JSON:
|
|
270
|
+
|
|
271
|
+
```json
|
|
272
|
+
{
|
|
273
|
+
"transcript": "...",
|
|
274
|
+
"language": "en",
|
|
275
|
+
"estimatedMinutes": 5,
|
|
276
|
+
"priceUsd": 0.0185,
|
|
277
|
+
"paidVia": "balance",
|
|
278
|
+
"groqDurationSec": 275.4,
|
|
279
|
+
"model": "whisper-large-v3-turbo"
|
|
280
|
+
}
|
|
275
281
|
```
|
|
276
282
|
|
|
277
|
-
|
|
283
|
+
- **Pricing:** $0.0037 per estimated minute, rounded up (~$0.22/hr).
|
|
284
|
+
- **How duration is estimated:** file size ÷ assumed 32kbps bitrate. Plaud-sourced mp3 matches this assumption well. High-bitrate files from other sources would be over-charged proportionally — for those, switch to Path B.
|
|
285
|
+
- **Paid from:** account balance first; falls back to MPP (Tempo USDC) or Base (use `/use-base` instead). Make sure the bloby's account has funds OR its wallet is funded on the matching network.
|
|
286
|
+
- **Limits:** 25MB per file. Mp3 from Plaud comfortably fits — observed 1MB ≈ 4½min.
|
|
278
287
|
|
|
279
|
-
|
|
288
|
+
Write the response's `transcript` to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
|
|
280
289
|
|
|
281
|
-
###
|
|
290
|
+
### Path B — Bring your own API key (DIY)
|
|
282
291
|
|
|
283
|
-
|
|
292
|
+
Pick a provider, ask the human for their key, store it as a workspace secret (`workspace/.env` is fine — the backend reloads on .env change). Then call directly from Bash.
|
|
284
293
|
|
|
294
|
+
**Groq Whisper** — cheapest, fastest. Same model the marketplace uses under the hood. Free tier exists.
|
|
285
295
|
```bash
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
296
|
+
curl -s -X POST 'https://api.groq.com/openai/v1/audio/transcriptions' \
|
|
297
|
+
-H "Authorization: Bearer $GROQ_API_KEY" \
|
|
298
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
299
|
+
-F "model=whisper-large-v3-turbo" \
|
|
300
|
+
-F "response_format=json"
|
|
289
301
|
```
|
|
290
302
|
|
|
291
|
-
|
|
303
|
+
**OpenAI Whisper** — the human may already have an OpenAI key from the Bloby wizard. Read it from the settings table directly:
|
|
304
|
+
```bash
|
|
305
|
+
WHISPER_KEY=$(sqlite3 ~/.bloby/memory.db "SELECT value FROM settings WHERE key='whisper_key';")
|
|
306
|
+
curl -s -X POST 'https://api.openai.com/v1/audio/transcriptions' \
|
|
307
|
+
-H "Authorization: Bearer $WHISPER_KEY" \
|
|
308
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
309
|
+
-F "model=whisper-1"
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Mistral Voxtral**:
|
|
313
|
+
```bash
|
|
314
|
+
curl -s -X POST 'https://api.mistral.ai/v1/audio/transcriptions' \
|
|
315
|
+
-H "Authorization: Bearer $MISTRAL_API_KEY" \
|
|
316
|
+
-F "file=@workspace/files/audio/plaud/<FILE_ID>.mp3" \
|
|
317
|
+
-F "model=voxtral-mini-latest"
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
**Local — no API, no cost, fully private:**
|
|
321
|
+
- [whisper.cpp](https://github.com/ggerganov/whisper.cpp) — C++ binary, CPU or Metal/CUDA. Install once, transcribe forever.
|
|
322
|
+
- [faster-whisper](https://github.com/SYSTRAN/faster-whisper) — Python, ~4× faster than reference whisper.
|
|
323
|
+
- The human installs one of these themselves. The bloby invokes the CLI from Bash.
|
|
324
|
+
|
|
325
|
+
After whichever path, extract the `text` field and write it to `workspace/files/audio/plaud/<FILE_ID>.mp3.txt`.
|
|
292
326
|
|
|
293
|
-
|
|
327
|
+
### Choosing for the human
|
|
328
|
+
|
|
329
|
+
If they don't have a preference, recommend **Path A (Marketplace)**:
|
|
330
|
+
- No key setup.
|
|
331
|
+
- Already integrated with the bloby's payment.
|
|
332
|
+
- Pay-as-you-go — no monthly minimum.
|
|
333
|
+
- If their account has any balance from other marketplace use, it just works.
|
|
334
|
+
|
|
335
|
+
Recommend **Path B** if:
|
|
336
|
+
- They're transcribing a lot and want to use a free tier or flat-rate plan.
|
|
337
|
+
- They want 100% local for privacy reasons.
|
|
338
|
+
- They already have a preferred provider.
|
|
294
339
|
|
|
295
340
|
---
|
|
296
341
|
|
|
297
342
|
## Cadence — CRON or PULSE?
|
|
298
343
|
|
|
299
|
-
**
|
|
344
|
+
**No automatic schedule installed by this skill.** The human picks.
|
|
300
345
|
|
|
301
346
|
### Pattern A — CRON every N minutes
|
|
302
347
|
|
|
303
|
-
|
|
348
|
+
Add to `workspace/CRONS.json`:
|
|
304
349
|
|
|
305
350
|
```json
|
|
306
351
|
{
|
|
307
352
|
"id": "plaud-sync",
|
|
308
353
|
"schedule": "*/15 * * * *",
|
|
309
|
-
"task": "Run a Plaud sync per the plaud skill: refresh WT if needed, list new recordings, download into workspace/files/audio/plaud/, and transcribe via
|
|
354
|
+
"task": "Run a Plaud sync per the plaud skill: refresh WT if needed, list new recordings, download into workspace/files/audio/plaud/, and transcribe via the configured transcriptionMode in .plaud.json. If new recordings were found, summarise to the human in chat. If nothing new, stay silent.",
|
|
310
355
|
"enabled": true,
|
|
311
356
|
"oneShot": false
|
|
312
357
|
}
|
|
313
358
|
```
|
|
314
359
|
|
|
315
|
-
Tune `*/15` to taste. `*/5` for aggressive, `0 * * * *` for quiet.
|
|
316
|
-
|
|
317
360
|
### Pattern B — PULSE memo
|
|
318
361
|
|
|
319
|
-
|
|
362
|
+
Add one line to `MYSELF.md` or `MEMORY.md`:
|
|
320
363
|
|
|
321
364
|
```
|
|
322
|
-
- Each pulse, briefly check Plaud for new recordings via the plaud skill.
|
|
365
|
+
- Each pulse, briefly check Plaud for new recordings via the plaud skill. Transcribe with whatever transcriptionMode is set in workspace/.plaud.json. If new, decide whether to surface. If nothing new, move on silently.
|
|
323
366
|
```
|
|
324
367
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
### Or: don't auto-sync at all
|
|
368
|
+
### Or: manual only
|
|
328
369
|
|
|
329
|
-
|
|
370
|
+
No CRON, no pulse memo. Sync when asked.
|
|
330
371
|
|
|
331
|
-
**Default to Pattern B for new installs unless the human says otherwise
|
|
372
|
+
**Default to Pattern B for new installs** unless the human says otherwise.
|
|
332
373
|
|
|
333
374
|
---
|
|
334
375
|
|
|
335
376
|
## Re-auth (401 handling)
|
|
336
377
|
|
|
337
|
-
Two different 401s, two different fixes.
|
|
338
|
-
|
|
339
378
|
| Endpoint that 401'd | What expired | Fix |
|
|
340
379
|
|---|---|---|
|
|
341
|
-
| `/file/simple/web`, `/file/temp-url/*`, `/device/list` (
|
|
342
|
-
| `/user-app/auth/workspace/token/...`, `/team-app/workspaces/list`, `/user/me` (
|
|
380
|
+
| `/file/simple/web`, `/file/temp-url/*`, `/device/list` (WT) | Workspace token | Re-mint a WT from cached UT (Step 7b). Silent — don't bother the human. |
|
|
381
|
+
| `/user-app/auth/workspace/token/...`, `/team-app/workspaces/list`, `/user/me` (UT) | User token | Tell the human. If `authMethod === "otp"`, re-OTP. If `"paste"`, walk them through DevTools again. |
|
|
382
|
+
| `POST /api/services/audio-to-text/use` (relay) | Marketplace account empty / wallet unfunded | Tell the human. Suggest topping up or switching to Path B. |
|
|
343
383
|
|
|
344
|
-
If you can't tell which token expired
|
|
384
|
+
If you can't tell which token expired, assume UT is dead → re-auth.
|
|
345
385
|
|
|
346
386
|
---
|
|
347
387
|
|
|
348
388
|
## Disconnect
|
|
349
389
|
|
|
350
|
-
Delete the state file:
|
|
351
|
-
|
|
352
390
|
```bash
|
|
353
391
|
rm -f workspace/.plaud.json
|
|
354
392
|
```
|
|
355
393
|
|
|
356
|
-
Recordings
|
|
357
|
-
|
|
358
|
-
---
|
|
359
|
-
|
|
360
|
-
## What This Skill Does NOT Do
|
|
361
|
-
|
|
362
|
-
- **No Plaud transcription.** We transcribe ourselves with Whisper. Plaud's own AI subscription is bypassed entirely.
|
|
363
|
-
- **No dashboard.** OpenPlaud has a slick UI for browsing recordings. We don't. The bloby's job is to *read* the transcripts and act on them — summaries, action items, emails — using the normal workspace tools. If the human wants a UI, build one into `workspace/client/` as a normal workspace app.
|
|
364
|
-
- **No push from Plaud.** No webhooks exist. You only know about new recordings when you ask.
|
|
365
|
-
- **No editing recordings.** The Plaud API technically supports `PATCH /file/<id>` to rename. We don't expose it — keep canonical `<id>.mp3` names.
|
|
366
|
-
- **No real-time streaming.** Plaud syncs to its cloud *after* the recording finishes. Expect seconds-to-minutes of lag between "user stopped recording" and "file appears in `/file/simple/web`."
|
|
394
|
+
Recordings on disk stay. Disable the CRON entry / remove from `CRONS.json` separately.
|
|
367
395
|
|
|
368
396
|
---
|
|
369
397
|
|
|
@@ -378,16 +406,25 @@ Recordings already on disk stay. The human can also disable the CRON entry / rem
|
|
|
378
406
|
| Mint WT | `POST <base>/user-app/auth/workspace/token/<workspaceId>` body `{}` | UT |
|
|
379
407
|
| List devices | `GET <base>/device/list` | **WT** |
|
|
380
408
|
| List recordings | `GET <base>/file/simple/web?skip=0&limit=50&is_trash=0&sort_by=edit_time&is_desc=true` | **WT** |
|
|
381
|
-
|
|
|
409
|
+
| Download URL | `GET <base>/file/temp-url/<id>?is_opus=0` | **WT** |
|
|
382
410
|
| Download audio | `GET <temp_url>` | none (signed) |
|
|
383
|
-
| Transcribe
|
|
411
|
+
| Transcribe (marketplace) | `POST https://api.bloby.bot/api/services/audio-to-text/use` multipart `file=@...` | `X-Bloby-Token: $relay_token` |
|
|
412
|
+
| Transcribe (Groq) | `POST https://api.groq.com/openai/v1/audio/transcriptions` multipart | Bearer GROQ_API_KEY |
|
|
413
|
+
| Transcribe (OpenAI) | `POST https://api.openai.com/v1/audio/transcriptions` multipart | Bearer OPENAI_API_KEY |
|
|
414
|
+
|
|
415
|
+
State file: `workspace/.plaud.json`. Plaud requests need a browser-style `User-Agent`.
|
|
384
416
|
|
|
385
|
-
|
|
417
|
+
---
|
|
418
|
+
|
|
419
|
+
## What This Skill Does NOT Do
|
|
386
420
|
|
|
387
|
-
|
|
421
|
+
- **No automatic schedule.** The human + bloby pick CRON vs PULSE vs manual.
|
|
422
|
+
- **No dashboard.** OpenPlaud has a UI; we don't. The bloby's job is to *read* transcripts and act on them via normal workspace tools. If the human wants a UI, build one into `workspace/client/`.
|
|
423
|
+
- **No push from Plaud.** No webhooks exist; you only know about new recordings when you ask.
|
|
424
|
+
- **No real-time streaming.** Plaud syncs *after* the recording finishes. Lag is seconds-to-minutes between "user stopped recording" and "file appears in `/file/simple/web`."
|
|
388
425
|
|
|
389
426
|
---
|
|
390
427
|
|
|
391
428
|
## Credit
|
|
392
429
|
|
|
393
|
-
Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work, including the painful workspace-token discovery (their issue #66). This skill reimplements just the parts a bloby needs.
|
|
430
|
+
Plaud API shape is the same one [OpenPlaud](https://github.com/openplaud/openplaud) uses — they did the reverse-engineering work, including the painful workspace-token discovery (their issue #66) and the Google/Apple identity gotcha (issue #65). This skill reimplements just the parts a bloby needs, and routes transcription either through Bloby's marketplace or a provider of the human's choice.
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
"bloby_human": "Bruno Bertapeli",
|
|
6
6
|
"bloby": "bloby-bruno",
|
|
7
7
|
"author": "newbot-official",
|
|
8
|
-
"description": "Plaud Note integration. Pairs the user's Plaud account
|
|
8
|
+
"description": "Plaud Note integration. Pairs the user's Plaud account (email OTP or paste-token for Google/Apple identities), pulls recordings into workspace/files/audio/plaud/, and routes transcription through either the Bloby Marketplace audio-to-text service (pay-per-minute) or the human's own provider (Groq / OpenAI Whisper / Mistral Voxtral / local).",
|
|
9
9
|
"depends": [],
|
|
10
10
|
"env_keys": [],
|
|
11
11
|
"has_telemetry": false,
|
|
12
|
-
"size": "
|
|
12
|
+
"size": "12KB",
|
|
13
13
|
"contains_binaries": false,
|
|
14
|
-
"tags": ["plaud", "transcription", "audio", "recorder", "meeting"]
|
|
14
|
+
"tags": ["plaud", "transcription", "audio", "recorder", "meeting", "groq", "whisper"]
|
|
15
15
|
}
|