wispy-cli 2.7.8 → 2.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/wispy.mjs +163 -0
- package/core/browser.mjs +327 -0
- package/core/engine.mjs +239 -0
- package/core/memory.mjs +12 -0
- package/core/secrets.mjs +251 -0
- package/core/subagents.mjs +24 -1
- package/core/task-decomposer.mjs +375 -0
- package/core/task-router.mjs +2 -2
- package/core/tools.mjs +59 -0
- package/core/tts.mjs +194 -0
- package/package.json +1 -1
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* core/task-decomposer.mjs — Task Decomposition Engine for Wispy
|
|
3
|
+
*
|
|
4
|
+
* Splits complex tasks into parallel subtasks, routes each to the best model,
|
|
5
|
+
* executes concurrently, and synthesizes results.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { routeTask, getCheapDecomposerModel, classifyTask } from "./task-router.mjs";
|
|
9
|
+
|
|
10
|
+
// ── Subtask counter ───────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
let _subtaskCounter = 0;
|
|
13
|
+
function makeSubtaskId() {
|
|
14
|
+
return `st-${(++_subtaskCounter).toString().padStart(2, "0")}`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// ── Decompose task using LLM ─────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Decompose a complex task into parallel subtasks using a cheap LLM.
|
|
21
|
+
*
|
|
22
|
+
* @param {string} task - The task description
|
|
23
|
+
* @param {object} [options]
|
|
24
|
+
* @param {number} [options.maxSubtasks=5] - Max number of subtasks
|
|
25
|
+
* @param {string} [options.costPreference="balanced"] - "minimize" | "balanced" | "maximize-quality"
|
|
26
|
+
* @param {object} [options.engine] - WispyEngine instance (for LLM calls)
|
|
27
|
+
* @returns {Promise<{
|
|
28
|
+
* subtasks: Array<{id,task,type,dependencies,priority}>,
|
|
29
|
+
* parallelGroups: string[][],
|
|
30
|
+
* estimatedCost: string,
|
|
31
|
+
* estimatedTime: string
|
|
32
|
+
* }>}
|
|
33
|
+
*/
|
|
34
|
+
export async function decomposeTask(task, options = {}) {
|
|
35
|
+
const maxSubtasks = options.maxSubtasks ?? 5;
|
|
36
|
+
const costPreference = options.costPreference ?? "balanced";
|
|
37
|
+
const engine = options.engine ?? null;
|
|
38
|
+
|
|
39
|
+
_subtaskCounter = 0; // reset for this decomposition
|
|
40
|
+
|
|
41
|
+
// Quick path: if the task seems simple, don't bother decomposing
|
|
42
|
+
const classification = classifyTask(task);
|
|
43
|
+
if (classification.complexity !== "complex" || !classification.parallelizable) {
|
|
44
|
+
const subtaskId = makeSubtaskId();
|
|
45
|
+
return {
|
|
46
|
+
subtasks: [
|
|
47
|
+
{
|
|
48
|
+
id: subtaskId,
|
|
49
|
+
task,
|
|
50
|
+
type: classification.type,
|
|
51
|
+
dependencies: [],
|
|
52
|
+
priority: 1,
|
|
53
|
+
},
|
|
54
|
+
],
|
|
55
|
+
parallelGroups: [[subtaskId]],
|
|
56
|
+
estimatedCost: costPreference === "minimize" ? "very-low" : "low",
|
|
57
|
+
estimatedTime: classification.complexity === "simple" ? "<1min" : "1-2min",
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Use LLM decomposition if engine is available
|
|
62
|
+
if (engine) {
|
|
63
|
+
try {
|
|
64
|
+
return await _llmDecompose(task, maxSubtasks, costPreference, engine);
|
|
65
|
+
} catch (err) {
|
|
66
|
+
if (process.env.WISPY_DEBUG) {
|
|
67
|
+
console.error(`[task-decomposer] LLM decompose failed: ${err.message}, falling back to heuristic`);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Heuristic decomposition (no LLM)
|
|
73
|
+
return _heuristicDecompose(task, maxSubtasks, costPreference, classification);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Decompose using an LLM call (cheap model).
|
|
78
|
+
*/
|
|
79
|
+
async function _llmDecompose(task, maxSubtasks, costPreference, engine) {
|
|
80
|
+
const { model } = getCheapDecomposerModel();
|
|
81
|
+
|
|
82
|
+
const systemPrompt = `You are a task decomposition expert. Split complex tasks into independent subtasks that can be parallelized.
|
|
83
|
+
Reply with ONLY valid JSON. No markdown, no explanation.`;
|
|
84
|
+
|
|
85
|
+
const userPrompt = `Decompose this task into at most ${maxSubtasks} subtasks.
|
|
86
|
+
Task: "${task}"
|
|
87
|
+
|
|
88
|
+
Requirements:
|
|
89
|
+
- Identify subtasks that can run in parallel (no dependencies)
|
|
90
|
+
- Identify subtasks that need results from others (add dependency IDs)
|
|
91
|
+
- Classify each subtask type: coding, research, analysis, design, review, summarize, format, or general
|
|
92
|
+
- Set priority: 1 = first group (parallel), 2 = waits for priority 1, etc.
|
|
93
|
+
|
|
94
|
+
Respond with ONLY this JSON (no markdown):
|
|
95
|
+
{
|
|
96
|
+
"subtasks": [
|
|
97
|
+
{"id": "st-01", "task": "...", "type": "coding", "dependencies": [], "priority": 1},
|
|
98
|
+
{"id": "st-02", "task": "...", "type": "research", "dependencies": [], "priority": 1},
|
|
99
|
+
{"id": "st-03", "task": "...", "type": "review", "dependencies": ["st-01","st-02"], "priority": 2}
|
|
100
|
+
],
|
|
101
|
+
"estimatedTime": "2-5min",
|
|
102
|
+
"estimatedCost": "low"
|
|
103
|
+
}`;
|
|
104
|
+
|
|
105
|
+
const messages = [
|
|
106
|
+
{ role: "system", content: systemPrompt },
|
|
107
|
+
{ role: "user", content: userPrompt },
|
|
108
|
+
];
|
|
109
|
+
|
|
110
|
+
const result = await engine.providers.chat(messages, [], { model });
|
|
111
|
+
const text = result.type === "text" ? result.text : JSON.stringify(result);
|
|
112
|
+
|
|
113
|
+
// Extract JSON from response
|
|
114
|
+
const jsonMatch = text.match(/\{[\s\S]*"subtasks"[\s\S]*\}/);
|
|
115
|
+
if (!jsonMatch) throw new Error("LLM did not return valid decomposition JSON");
|
|
116
|
+
|
|
117
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
118
|
+
const subtasks = (parsed.subtasks ?? []).slice(0, maxSubtasks);
|
|
119
|
+
|
|
120
|
+
// Ensure IDs are consistent
|
|
121
|
+
const idMap = {};
|
|
122
|
+
subtasks.forEach((st, i) => {
|
|
123
|
+
const newId = makeSubtaskId();
|
|
124
|
+
idMap[st.id] = newId;
|
|
125
|
+
st.id = newId;
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
// Remap dependency IDs
|
|
129
|
+
for (const st of subtasks) {
|
|
130
|
+
st.dependencies = (st.dependencies ?? []).map(d => idMap[d] ?? d).filter(d => subtasks.some(s => s.id === d));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Build parallel groups from priority
|
|
134
|
+
const groups = {};
|
|
135
|
+
for (const st of subtasks) {
|
|
136
|
+
const p = st.priority ?? 1;
|
|
137
|
+
if (!groups[p]) groups[p] = [];
|
|
138
|
+
groups[p].push(st.id);
|
|
139
|
+
}
|
|
140
|
+
const parallelGroups = Object.keys(groups).sort((a, b) => Number(a) - Number(b)).map(k => groups[k]);
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
subtasks,
|
|
144
|
+
parallelGroups,
|
|
145
|
+
estimatedCost: parsed.estimatedCost ?? _estimateCost(costPreference, subtasks.length),
|
|
146
|
+
estimatedTime: parsed.estimatedTime ?? _estimateTime(subtasks.length),
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Heuristic decomposition (no LLM required).
|
|
152
|
+
*/
|
|
153
|
+
function _heuristicDecompose(task, maxSubtasks, costPreference, classification) {
|
|
154
|
+
// Split by "and" / newlines / semicolons as a heuristic
|
|
155
|
+
const sentences = task
|
|
156
|
+
.split(/\n|;|\band\b(?=[^,]*,|\s+\w+\s+the\s)/)
|
|
157
|
+
.map(s => s.trim())
|
|
158
|
+
.filter(s => s.length > 10);
|
|
159
|
+
|
|
160
|
+
const subtasks = sentences.slice(0, maxSubtasks).map((s, i) => ({
|
|
161
|
+
id: makeSubtaskId(),
|
|
162
|
+
task: s,
|
|
163
|
+
type: classifyTask(s).type,
|
|
164
|
+
dependencies: [],
|
|
165
|
+
priority: 1,
|
|
166
|
+
}));
|
|
167
|
+
|
|
168
|
+
// If no useful split found, use the whole task as one subtask
|
|
169
|
+
if (subtasks.length === 0) {
|
|
170
|
+
const id = makeSubtaskId();
|
|
171
|
+
return {
|
|
172
|
+
subtasks: [{ id, task, type: classification.type, dependencies: [], priority: 1 }],
|
|
173
|
+
parallelGroups: [[id]],
|
|
174
|
+
estimatedCost: _estimateCost(costPreference, 1),
|
|
175
|
+
estimatedTime: "1-3min",
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
subtasks,
|
|
181
|
+
parallelGroups: [subtasks.map(s => s.id)],
|
|
182
|
+
estimatedCost: _estimateCost(costPreference, subtasks.length),
|
|
183
|
+
estimatedTime: _estimateTime(subtasks.length),
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function _estimateCost(costPreference, numSubtasks) {
|
|
188
|
+
if (costPreference === "minimize") return "very-low";
|
|
189
|
+
if (costPreference === "maximize-quality") return numSubtasks > 3 ? "high" : "medium";
|
|
190
|
+
return numSubtasks > 3 ? "medium" : "low";
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function _estimateTime(numSubtasks) {
|
|
194
|
+
if (numSubtasks <= 1) return "<1min";
|
|
195
|
+
if (numSubtasks <= 3) return "1-3min";
|
|
196
|
+
return "2-5min";
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ── Execute decomposed plan ──────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Execute an execution plan, running parallel groups concurrently.
|
|
203
|
+
*
|
|
204
|
+
* @param {object} plan - Result from decomposeTask()
|
|
205
|
+
* @param {object} engine - WispyEngine instance
|
|
206
|
+
* @param {object} [opts]
|
|
207
|
+
* @param {string} [opts.costPreference="balanced"]
|
|
208
|
+
* @param {Function} [opts.onSubtaskStart] - (subtask) => void
|
|
209
|
+
* @param {Function} [opts.onSubtaskComplete] - (subtask, result) => void
|
|
210
|
+
* @param {Function} [opts.onSubtaskFail] - (subtask, error) => void
|
|
211
|
+
* @returns {Promise<{ results: object[], synthesized: string, errors: object[] }>}
|
|
212
|
+
*/
|
|
213
|
+
export async function executeDecomposedPlan(plan, engine, opts = {}) {
|
|
214
|
+
const costPreference = opts.costPreference ?? "balanced";
|
|
215
|
+
const completedResults = {}; // id → result
|
|
216
|
+
const errors = [];
|
|
217
|
+
const MAX_RETRIES = 1;
|
|
218
|
+
|
|
219
|
+
for (const group of plan.parallelGroups) {
|
|
220
|
+
// Filter to subtasks in this group (skip if all deps not satisfied)
|
|
221
|
+
const groupSubtasks = group
|
|
222
|
+
.map(id => plan.subtasks.find(s => s.id === id))
|
|
223
|
+
.filter(Boolean);
|
|
224
|
+
|
|
225
|
+
// Run group in parallel
|
|
226
|
+
const groupPromises = groupSubtasks.map(async (subtask) => {
|
|
227
|
+
// Route to best model
|
|
228
|
+
const routing = routeTask(
|
|
229
|
+
{ type: subtask.type, complexity: "medium", estimatedTokens: Math.ceil(subtask.task.length / 4) + 800, parallelizable: false },
|
|
230
|
+
null,
|
|
231
|
+
{ costPreference }
|
|
232
|
+
);
|
|
233
|
+
|
|
234
|
+
opts.onSubtaskStart?.(subtask);
|
|
235
|
+
|
|
236
|
+
let attempt = 0;
|
|
237
|
+
while (attempt <= MAX_RETRIES) {
|
|
238
|
+
try {
|
|
239
|
+
// Build context from dependencies
|
|
240
|
+
const depContext = subtask.dependencies
|
|
241
|
+
.map(depId => completedResults[depId])
|
|
242
|
+
.filter(Boolean)
|
|
243
|
+
.map((r, i) => `### Dependency ${i + 1} result:\n${r}`)
|
|
244
|
+
.join("\n\n");
|
|
245
|
+
|
|
246
|
+
const fullTask = depContext
|
|
247
|
+
? `${subtask.task}\n\n---\nContext from previous steps:\n${depContext}`
|
|
248
|
+
: subtask.task;
|
|
249
|
+
|
|
250
|
+
// Use sub-agent manager if available, else direct provider call
|
|
251
|
+
let result;
|
|
252
|
+
if (engine.subagents) {
|
|
253
|
+
const agent = await engine.subagents.spawn({
|
|
254
|
+
task: fullTask,
|
|
255
|
+
label: `decomposed-${subtask.id}`,
|
|
256
|
+
model: routing.model,
|
|
257
|
+
timeout: 120, // seconds
|
|
258
|
+
workstream: engine._activeWorkstream,
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
// Wait for completion
|
|
262
|
+
result = await engine.subagents.waitFor(agent.id, 120_000);
|
|
263
|
+
result = result.result ?? result.error ?? "(no result)";
|
|
264
|
+
} else {
|
|
265
|
+
// Direct provider call
|
|
266
|
+
const messages = [
|
|
267
|
+
{ role: "system", content: `You are a focused worker agent. Complete only this specific subtask. Be concise.` },
|
|
268
|
+
{ role: "user", content: fullTask },
|
|
269
|
+
];
|
|
270
|
+
const response = await engine.providers.chat(messages, [], { model: routing.model });
|
|
271
|
+
result = response.type === "text" ? response.text : JSON.stringify(response);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
completedResults[subtask.id] = result;
|
|
275
|
+
opts.onSubtaskComplete?.(subtask, result);
|
|
276
|
+
return { id: subtask.id, result, routing, success: true };
|
|
277
|
+
} catch (err) {
|
|
278
|
+
attempt++;
|
|
279
|
+
if (attempt > MAX_RETRIES) {
|
|
280
|
+
const error = { id: subtask.id, error: err.message, subtask };
|
|
281
|
+
errors.push(error);
|
|
282
|
+
opts.onSubtaskFail?.(subtask, err);
|
|
283
|
+
// Non-critical: continue with empty result
|
|
284
|
+
completedResults[subtask.id] = `[subtask ${subtask.id} failed: ${err.message}]`;
|
|
285
|
+
return { id: subtask.id, result: null, error: err.message, success: false };
|
|
286
|
+
}
|
|
287
|
+
// Retry
|
|
288
|
+
await new Promise(r => setTimeout(r, 1000 * attempt));
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
// Wait for all in group before proceeding to next group
|
|
294
|
+
await Promise.all(groupPromises);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Collect results in subtask order
|
|
298
|
+
const orderedResults = plan.subtasks.map(st => ({
|
|
299
|
+
id: st.id,
|
|
300
|
+
task: st.task,
|
|
301
|
+
type: st.type,
|
|
302
|
+
result: completedResults[st.id] ?? null,
|
|
303
|
+
}));
|
|
304
|
+
|
|
305
|
+
// Synthesize
|
|
306
|
+
let synthesized;
|
|
307
|
+
try {
|
|
308
|
+
synthesized = await synthesizeResults(orderedResults, engine);
|
|
309
|
+
} catch (err) {
|
|
310
|
+
// Fallback: concatenate results
|
|
311
|
+
synthesized = orderedResults
|
|
312
|
+
.filter(r => r.result)
|
|
313
|
+
.map(r => `**${r.type.toUpperCase()}**: ${r.result}`)
|
|
314
|
+
.join("\n\n---\n\n");
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return {
|
|
318
|
+
results: orderedResults,
|
|
319
|
+
synthesized,
|
|
320
|
+
errors,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// ── Synthesize results ────────────────────────────────────────────────────────
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Synthesize multiple subtask results into a coherent response.
|
|
328
|
+
*
|
|
329
|
+
* @param {Array<{id, task, type, result}>} subtaskResults
|
|
330
|
+
* @param {object} [engine] - WispyEngine instance (for LLM synthesis)
|
|
331
|
+
* @returns {Promise<string>}
|
|
332
|
+
*/
|
|
333
|
+
export async function synthesizeResults(subtaskResults, engine) {
|
|
334
|
+
const validResults = subtaskResults.filter(r => r.result && !r.result.startsWith("[subtask"));
|
|
335
|
+
|
|
336
|
+
if (validResults.length === 0) {
|
|
337
|
+
return "All subtasks failed to produce results.";
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (validResults.length === 1) {
|
|
341
|
+
return validResults[0].result;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Build synthesis prompt
|
|
345
|
+
const parts = validResults.map((r, i) =>
|
|
346
|
+
`### Subtask ${i + 1} (${r.type}): ${r.task.slice(0, 100)}\n${r.result.slice(0, 2000)}`
|
|
347
|
+
).join("\n\n---\n\n");
|
|
348
|
+
|
|
349
|
+
// If no engine, concatenate
|
|
350
|
+
if (!engine) {
|
|
351
|
+
return `## Combined Results\n\n${parts}`;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
const { model } = getCheapDecomposerModel();
|
|
355
|
+
|
|
356
|
+
const messages = [
|
|
357
|
+
{
|
|
358
|
+
role: "system",
|
|
359
|
+
content: `You are a synthesis agent. Merge multiple subtask outputs into a single, coherent, well-structured response.
|
|
360
|
+
Remove redundancy. Resolve conflicts by noting them. Maintain all important information.`,
|
|
361
|
+
},
|
|
362
|
+
{
|
|
363
|
+
role: "user",
|
|
364
|
+
content: `Synthesize these ${validResults.length} subtask results into one coherent response:\n\n${parts}`,
|
|
365
|
+
},
|
|
366
|
+
];
|
|
367
|
+
|
|
368
|
+
try {
|
|
369
|
+
const result = await engine.providers.chat(messages, [], { model });
|
|
370
|
+
return result.type === "text" ? result.text : JSON.stringify(result);
|
|
371
|
+
} catch (err) {
|
|
372
|
+
// Fallback
|
|
373
|
+
return `## Synthesized Results\n\n${parts}`;
|
|
374
|
+
}
|
|
375
|
+
}
|
package/core/task-router.mjs
CHANGED
|
@@ -42,7 +42,7 @@ export const MODEL_CAPABILITIES = {
|
|
|
42
42
|
|
|
43
43
|
// Claude family
|
|
44
44
|
"claude-opus-4-20250514": {
|
|
45
|
-
strengths: ["architecture", "reasoning", "writing", "analysis"],
|
|
45
|
+
strengths: ["architecture", "reasoning", "writing", "analysis", "design"],
|
|
46
46
|
speed: "slow",
|
|
47
47
|
cost: "very-high",
|
|
48
48
|
contextWindow: 200000,
|
|
@@ -327,7 +327,7 @@ export function routeTask(task, availableModels, opts = {}) {
|
|
|
327
327
|
score -= costScore(model) * 2;
|
|
328
328
|
score -= speedScore(model);
|
|
329
329
|
} else if (costPreference === "maximize-quality") {
|
|
330
|
-
score +=
|
|
330
|
+
score += costScore(model) * 2; // prefer expensive (high quality)
|
|
331
331
|
score -= speedScore(model) * 0.5;
|
|
332
332
|
} else {
|
|
333
333
|
// balanced: for complex tasks lean toward quality, simple tasks lean toward speed+cost
|
package/core/tools.mjs
CHANGED
|
@@ -275,6 +275,57 @@ export class ToolRegistry {
|
|
|
275
275
|
required: ["id", "message"],
|
|
276
276
|
},
|
|
277
277
|
},
|
|
278
|
+
// ── Browser tools ────────────────────────────────────────────────────────
|
|
279
|
+
{
|
|
280
|
+
name: "browser_status",
|
|
281
|
+
description: "Check browser bridge health and current session status",
|
|
282
|
+
parameters: { type: "object", properties: {} },
|
|
283
|
+
},
|
|
284
|
+
{
|
|
285
|
+
name: "browser_tabs",
|
|
286
|
+
description: "List all open browser tabs",
|
|
287
|
+
parameters: {
|
|
288
|
+
type: "object",
|
|
289
|
+
properties: {
|
|
290
|
+
browser: { type: "string", enum: ["safari", "chrome"] },
|
|
291
|
+
},
|
|
292
|
+
},
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
name: "browser_navigate",
|
|
296
|
+
description: "Navigate the active browser tab to a URL",
|
|
297
|
+
parameters: {
|
|
298
|
+
type: "object",
|
|
299
|
+
properties: { url: { type: "string" } },
|
|
300
|
+
required: ["url"],
|
|
301
|
+
},
|
|
302
|
+
},
|
|
303
|
+
{
|
|
304
|
+
name: "browser_screenshot",
|
|
305
|
+
description: "Take a screenshot of the active browser tab",
|
|
306
|
+
parameters: { type: "object", properties: {} },
|
|
307
|
+
},
|
|
308
|
+
{
|
|
309
|
+
name: "browser_front_tab",
|
|
310
|
+
description: "Get info about the currently active browser tab (URL, title)",
|
|
311
|
+
parameters: { type: "object", properties: {} },
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
name: "browser_activate",
|
|
315
|
+
description: "Bring the browser tab to front / focus it",
|
|
316
|
+
parameters: { type: "object", properties: {} },
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
name: "browser_attach",
|
|
320
|
+
description: "Attach to a browser for control. Auto-selects the best available browser if no args given.",
|
|
321
|
+
parameters: {
|
|
322
|
+
type: "object",
|
|
323
|
+
properties: {
|
|
324
|
+
browser: { type: "string" },
|
|
325
|
+
mode: { type: "string" },
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
},
|
|
278
329
|
];
|
|
279
330
|
|
|
280
331
|
for (const def of builtins) {
|
|
@@ -594,6 +645,14 @@ export class ToolRegistry {
|
|
|
594
645
|
case "get_subagent_result":
|
|
595
646
|
case "kill_subagent":
|
|
596
647
|
case "steer_subagent":
|
|
648
|
+
// Browser tools — handled at engine level
|
|
649
|
+
case "browser_status":
|
|
650
|
+
case "browser_tabs":
|
|
651
|
+
case "browser_navigate":
|
|
652
|
+
case "browser_screenshot":
|
|
653
|
+
case "browser_front_tab":
|
|
654
|
+
case "browser_activate":
|
|
655
|
+
case "browser_attach":
|
|
597
656
|
return { success: false, error: `Tool "${name}" requires engine context. Call via WispyEngine.processMessage().` };
|
|
598
657
|
|
|
599
658
|
default:
|
package/core/tts.mjs
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* core/tts.mjs — Text-to-Speech Manager for Wispy
|
|
3
|
+
*
|
|
4
|
+
* Auto-detects available TTS provider:
|
|
5
|
+
* 1. OpenAI TTS API (best quality, requires OPENAI_API_KEY)
|
|
6
|
+
* 2. macOS native `say` command (free, always available on macOS)
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* const tts = new TTSManager(secretsManager);
|
|
10
|
+
* const result = await tts.speak("Hello world");
|
|
11
|
+
* // result.path → audio file path
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import os from "node:os";
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { writeFile } from "node:fs/promises";
|
|
17
|
+
|
|
18
|
+
export class TTSManager {
|
|
19
|
+
constructor(secretsManager) {
|
|
20
|
+
this.secrets = secretsManager;
|
|
21
|
+
this._provider = null; // cached auto-detected provider
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Auto-detect available TTS provider.
|
|
26
|
+
* Order: OpenAI (best quality) → macOS say (free) → null
|
|
27
|
+
*/
|
|
28
|
+
async detectProvider() {
|
|
29
|
+
if (this._provider) return this._provider;
|
|
30
|
+
|
|
31
|
+
const openaiKey = await this.secrets.resolve("OPENAI_API_KEY");
|
|
32
|
+
if (openaiKey) {
|
|
33
|
+
this._provider = "openai";
|
|
34
|
+
return "openai";
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (process.platform === "darwin") {
|
|
38
|
+
this._provider = "macos";
|
|
39
|
+
return "macos";
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Generate speech from text.
|
|
47
|
+
*
|
|
48
|
+
* @param {string} text - Text to speak
|
|
49
|
+
* @param {object} options
|
|
50
|
+
* @param {string} [options.provider] - "openai" | "macos" | "auto"
|
|
51
|
+
* @param {string} [options.voice] - Voice name
|
|
52
|
+
* @param {string} [options.model] - OpenAI TTS model
|
|
53
|
+
* @param {string} [options.format] - Output format (openai: mp3/opus/aac/flac, macos: aiff)
|
|
54
|
+
* @param {number} [options.rate] - Speech rate (macOS only)
|
|
55
|
+
* @returns {Promise<{provider, path, format, voice}|{error}>}
|
|
56
|
+
*/
|
|
57
|
+
async speak(text, options = {}) {
|
|
58
|
+
const providerOpt = options.provider ?? "auto";
|
|
59
|
+
const provider = providerOpt === "auto"
|
|
60
|
+
? await this.detectProvider()
|
|
61
|
+
: providerOpt;
|
|
62
|
+
|
|
63
|
+
switch (provider) {
|
|
64
|
+
case "openai":
|
|
65
|
+
return this._openaiTTS(text, options);
|
|
66
|
+
case "macos":
|
|
67
|
+
return this._macosTTS(text, options);
|
|
68
|
+
default:
|
|
69
|
+
return { error: "No TTS provider available. Set OPENAI_API_KEY or use macOS." };
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* OpenAI TTS API
|
|
75
|
+
* https://platform.openai.com/docs/api-reference/audio/createSpeech
|
|
76
|
+
*/
|
|
77
|
+
async _openaiTTS(text, {
|
|
78
|
+
voice = "alloy",
|
|
79
|
+
model = "tts-1",
|
|
80
|
+
format = "mp3",
|
|
81
|
+
} = {}) {
|
|
82
|
+
const apiKey = await this.secrets.resolve("OPENAI_API_KEY");
|
|
83
|
+
if (!apiKey) {
|
|
84
|
+
return { error: "OPENAI_API_KEY not found" };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const response = await fetch("https://api.openai.com/v1/audio/speech", {
|
|
88
|
+
method: "POST",
|
|
89
|
+
headers: {
|
|
90
|
+
"Authorization": `Bearer ${apiKey}`,
|
|
91
|
+
"Content-Type": "application/json",
|
|
92
|
+
},
|
|
93
|
+
body: JSON.stringify({
|
|
94
|
+
model,
|
|
95
|
+
input: text,
|
|
96
|
+
voice,
|
|
97
|
+
response_format: format,
|
|
98
|
+
}),
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
if (!response.ok) {
|
|
102
|
+
const errText = await response.text().catch(() => "unknown error");
|
|
103
|
+
return { error: `OpenAI TTS failed: ${response.status} ${errText}` };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
107
|
+
const outputPath = path.join(os.tmpdir(), `wispy-tts-${Date.now()}.${format}`);
|
|
108
|
+
await writeFile(outputPath, buffer);
|
|
109
|
+
|
|
110
|
+
return { provider: "openai", path: outputPath, format, voice };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* macOS native TTS using `say` command
|
|
115
|
+
*/
|
|
116
|
+
async _macosTTS(text, {
|
|
117
|
+
voice = "Samantha",
|
|
118
|
+
rate = 200,
|
|
119
|
+
} = {}) {
|
|
120
|
+
if (process.platform !== "darwin") {
|
|
121
|
+
return { error: "macOS TTS is only available on macOS" };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const outputPath = path.join(os.tmpdir(), `wispy-tts-${Date.now()}.aiff`);
|
|
125
|
+
|
|
126
|
+
const { execFile } = await import("node:child_process");
|
|
127
|
+
const { promisify } = await import("node:util");
|
|
128
|
+
const exec = promisify(execFile);
|
|
129
|
+
|
|
130
|
+
try {
|
|
131
|
+
await exec("say", ["-v", voice, "-r", String(rate), "-o", outputPath, text], {
|
|
132
|
+
timeout: 30000,
|
|
133
|
+
});
|
|
134
|
+
} catch (err) {
|
|
135
|
+
return { error: `macOS TTS failed: ${err.message}` };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return { provider: "macos", path: outputPath, format: "aiff", voice };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* List available macOS voices
|
|
143
|
+
*/
|
|
144
|
+
async listMacOSVoices() {
|
|
145
|
+
if (process.platform !== "darwin") return [];
|
|
146
|
+
try {
|
|
147
|
+
const { execFile } = await import("node:child_process");
|
|
148
|
+
const { promisify } = await import("node:util");
|
|
149
|
+
const exec = promisify(execFile);
|
|
150
|
+
const { stdout } = await exec("say", ["-v", "?"], { timeout: 5000 });
|
|
151
|
+
return stdout.trim().split("\n").map(line => {
|
|
152
|
+
const parts = line.trim().split(/\s+/);
|
|
153
|
+
return { name: parts[0], locale: parts[1] };
|
|
154
|
+
});
|
|
155
|
+
} catch {
|
|
156
|
+
return [];
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Tool definition for ToolRegistry integration
|
|
163
|
+
*/
|
|
164
|
+
export const TTS_TOOL_DEFINITION = {
|
|
165
|
+
name: "text_to_speech",
|
|
166
|
+
description: "Convert text to speech audio file. Returns the path to the generated audio file.",
|
|
167
|
+
parameters: {
|
|
168
|
+
type: "object",
|
|
169
|
+
properties: {
|
|
170
|
+
text: {
|
|
171
|
+
type: "string",
|
|
172
|
+
description: "Text to convert to speech",
|
|
173
|
+
},
|
|
174
|
+
voice: {
|
|
175
|
+
type: "string",
|
|
176
|
+
description: "Voice name (openai: alloy/echo/fable/onyx/nova/shimmer, macos: Samantha/Alex/Victoria/etc)",
|
|
177
|
+
},
|
|
178
|
+
provider: {
|
|
179
|
+
type: "string",
|
|
180
|
+
enum: ["openai", "macos", "auto"],
|
|
181
|
+
description: "TTS provider to use (default: auto-detect)",
|
|
182
|
+
},
|
|
183
|
+
model: {
|
|
184
|
+
type: "string",
|
|
185
|
+
description: "TTS model (OpenAI only: tts-1 or tts-1-hd)",
|
|
186
|
+
},
|
|
187
|
+
rate: {
|
|
188
|
+
type: "number",
|
|
189
|
+
description: "Speech rate in words per minute (macOS only, default: 200)",
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
required: ["text"],
|
|
193
|
+
},
|
|
194
|
+
};
|