autopreso 0.1.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -16
- package/package.json +9 -3
- package/public/app.js +141 -42
- package/public/style.css +64 -11
- package/src/cli.js +2 -2
- package/src/openai-transcription.js +37 -1
- package/src/server.js +70 -20
- package/src/settings-store.js +10 -0
- package/src/transcript-turn-queue.js +1 -1
- package/src/whiteboard-keywords.js +43 -0
- package/src/whiteboard-session.js +7 -1
package/README.md
CHANGED
|
@@ -29,13 +29,11 @@ Stage a few seed elements, hit start, and present.
|
|
|
29
29
|
```sh
|
|
30
30
|
$ npx autopreso # boots the server, opens the browser
|
|
31
31
|
autopreso listening at http://127.0.0.1:3210
|
|
32
|
-
whiteboard agent: openai gpt-5.5
|
|
33
|
-
settings file: /Users/you/.config/autopreso/settings.json
|
|
34
32
|
|
|
35
33
|
# In the browser:
|
|
36
34
|
# 1. Drop reference materials onto the staging canvas (title, agenda, etc).
|
|
37
|
-
# 2. Pick your microphone,
|
|
38
|
-
# 3. Click "Start
|
|
35
|
+
# 2. Pick your microphone, transcription model, agent model, and optional Agent instructions.
|
|
36
|
+
# 3. Click "Start Preso" and start talking.
|
|
39
37
|
```
|
|
40
38
|
|
|
41
39
|
## Install
|
|
@@ -80,10 +78,10 @@ npm start
|
|
|
80
78
|
└────────────────┘
|
|
81
79
|
```
|
|
82
80
|
|
|
83
|
-
- **Two modes** - "staging" lets you sketch seed content client-side; "live" hands the canvas over to the agent and starts streaming transcripts.
|
|
81
|
+
- **Two modes** - "staging" lets you sketch seed content client-side; "live" hands the canvas over to the agent, biases OpenAI Realtime transcription toward staging text and labels, and starts streaming transcripts.
|
|
84
82
|
- **Local server, local network only** - the Express + WebSocket server binds to 127.0.0.1; nothing is exposed beyond your machine.
|
|
85
|
-
- **Persistent settings** - models, API keys,
|
|
86
|
-
- **Warmup loop** - after you hit start the agent primes itself against your staging content so the first sentence you say doesn't get a cold model.
|
|
83
|
+
- **Persistent settings** - models, API keys, STT engine choices, and Agent instructions live in `~/.config/autopreso/settings.json` and survive restarts.
|
|
84
|
+
- **Warmup loop** - after you hit start the agent primes itself against your staging content and Agent instructions so the first sentence you say doesn't get a cold model.
|
|
87
85
|
|
|
88
86
|
## CLI Reference
|
|
89
87
|
|
|
@@ -102,6 +100,7 @@ npm start
|
|
|
102
100
|
## Configuration
|
|
103
101
|
|
|
104
102
|
Settings persist at `~/.config/autopreso/settings.json` and are managed from the in-app status panel.
|
|
103
|
+
Agent instructions are saved automatically from staging, can be up to 100,000 characters, and take effect on the next Start Preso.
|
|
105
104
|
|
|
106
105
|
### Defaults on first run
|
|
107
106
|
|
|
@@ -119,22 +118,24 @@ Auto-detection precedence: **Codex CLI auth wins over `OLLAMA_MODEL` wins over `
|
|
|
119
118
|
|
|
120
119
|
### Environment variables
|
|
121
120
|
|
|
122
|
-
|
|
121
|
+
Provider variables only seed `settings.json` on first run. Once the file exists, they're ignored - edit the file or use the in-app panel. Log path variables are read on each process start.
|
|
123
122
|
|
|
124
|
-
| Variable
|
|
125
|
-
|
|
|
126
|
-
| `PORT`
|
|
127
|
-
| `OPENAI_API_KEY`
|
|
128
|
-
| `OPENAI_MODEL`
|
|
129
|
-
| `CODEX_MODEL`
|
|
130
|
-
| `OLLAMA_MODEL`
|
|
123
|
+
| Variable | Purpose |
|
|
124
|
+
| ---------------------- | ----------------------------------------------------- |
|
|
125
|
+
| `PORT` | Port to listen on. Default: `3210`. |
|
|
126
|
+
| `OPENAI_API_KEY` | Seeds the OpenAI key for both agent and Realtime STT. |
|
|
127
|
+
| `OPENAI_MODEL` | Seeds the OpenAI agent model. |
|
|
128
|
+
| `CODEX_MODEL` | Seeds the Codex model. |
|
|
129
|
+
| `OLLAMA_MODEL` | Seeds the Ollama model. |
|
|
130
|
+
| `AUTOPRESO_CACHE_LOG` | Cache usage log path. Default: `~/.config/autopreso/logs/cache.log`. |
|
|
131
|
+
| `AUTOPRESO_DEBUG_LOG` | Agent debug log path. Default: `~/.config/autopreso/logs/debug.log`. |
|
|
131
132
|
|
|
132
133
|
Local Moonshine transcription ships as an optional native sidecar for `darwin-arm64` and `darwin-x64`. On other platforms, choose OpenAI Realtime in the STT panel.
|
|
133
134
|
|
|
134
135
|
## Credits
|
|
135
136
|
|
|
136
137
|
- [Excalidraw](https://github.com/excalidraw/excalidraw) - the whiteboard canvas, scene model, and rendering.
|
|
137
|
-
- [Moonshine](https://github.com/
|
|
138
|
+
- [Moonshine](https://github.com/moonshine-ai/moonshine) the local speech-to-text model that makes the offline path possible.
|
|
138
139
|
- [Vercel AI SDK](https://github.com/vercel/ai) - tool-calling agent loop and provider abstraction.
|
|
139
140
|
|
|
140
141
|
## Development
|
|
@@ -142,6 +143,7 @@ Local Moonshine transcription ships as an optional native sidecar for `darwin-ar
|
|
|
142
143
|
```sh
|
|
143
144
|
npm install # install deps
|
|
144
145
|
npm run dev # run the CLI from source
|
|
146
|
+
npm run typecheck # tsc --noEmit
|
|
145
147
|
npm test # node --test
|
|
146
148
|
npm run build:moonshine-sidecars # build the Python sidecar binaries
|
|
147
149
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "autopreso",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Realtime speech to presentation. Let the whiteboard whiteboard itself.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Kun Chen <kun@kunchenguid.com>",
|
|
@@ -41,7 +41,8 @@
|
|
|
41
41
|
"dev": "node ./src/cli.js",
|
|
42
42
|
"prepare:release-packages": "node ./scripts/prepare-release-packages.js",
|
|
43
43
|
"test": "node --test",
|
|
44
|
-
"start": "node ./src/cli.js"
|
|
44
|
+
"start": "node ./src/cli.js",
|
|
45
|
+
"typecheck": "tsc --noEmit"
|
|
45
46
|
},
|
|
46
47
|
"dependencies": {
|
|
47
48
|
"@ai-sdk/openai": "^3.0.63",
|
|
@@ -55,5 +56,10 @@
|
|
|
55
56
|
"@autopreso/moonshine-darwin-arm64": "0.1.1",
|
|
56
57
|
"@autopreso/moonshine-darwin-x64": "0.1.1"
|
|
57
58
|
},
|
|
58
|
-
"devDependencies": {
|
|
59
|
+
"devDependencies": {
|
|
60
|
+
"@types/express": "^5.0.6",
|
|
61
|
+
"@types/node": "^25.6.2",
|
|
62
|
+
"@types/ws": "^8.18.1",
|
|
63
|
+
"typescript": "^6.0.3"
|
|
64
|
+
}
|
|
59
65
|
}
|
package/public/app.js
CHANGED
|
@@ -13,8 +13,27 @@ const MOONSHINE_MODELS = ["tiny", "small", "medium"];
|
|
|
13
13
|
const MIC_STORAGE_KEY = "autopreso.mic";
|
|
14
14
|
|
|
15
15
|
const STARTER_STAGING_ELEMENTS = [];
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
|
|
17
|
+
function fullscreenIcon(isFullscreen) {
|
|
18
|
+
const paths = isFullscreen
|
|
19
|
+
? ["M3 6 H6 V3", "M10 3 V6 H13", "M13 10 H10 V13", "M6 13 V10 H3"]
|
|
20
|
+
: ["M3 6 V3 H6", "M10 3 H13 V6", "M13 10 V13 H10", "M6 13 H3 V10"];
|
|
21
|
+
return React.createElement(
|
|
22
|
+
"svg",
|
|
23
|
+
{
|
|
24
|
+
width: "1em",
|
|
25
|
+
height: "1em",
|
|
26
|
+
viewBox: "0 0 16 16",
|
|
27
|
+
fill: "none",
|
|
28
|
+
stroke: "currentColor",
|
|
29
|
+
strokeWidth: 1.8,
|
|
30
|
+
strokeLinecap: "round",
|
|
31
|
+
strokeLinejoin: "round",
|
|
32
|
+
"aria-hidden": "true",
|
|
33
|
+
},
|
|
34
|
+
...paths.map((d, i) => React.createElement("path", { key: i, d })),
|
|
35
|
+
);
|
|
36
|
+
}
|
|
18
37
|
|
|
19
38
|
function loadStoredMic() {
|
|
20
39
|
try {
|
|
@@ -50,6 +69,7 @@ function App() {
|
|
|
50
69
|
const [resetting, setResetting] = React.useState(false);
|
|
51
70
|
// warmupState: { state: "idle"|"running"|"confirmed"|"exhausted"|"cancelled", attempt, maxAttempts }
|
|
52
71
|
const [warmupState, setWarmupState] = React.useState({ state: "idle", attempt: 0, maxAttempts: 8 });
|
|
72
|
+
const [agentInstructions, setAgentInstructionsValue] = React.useState("");
|
|
53
73
|
const audioSessionRef = React.useRef(null);
|
|
54
74
|
const apiRef = React.useRef(null);
|
|
55
75
|
const wsRef = React.useRef(null);
|
|
@@ -63,6 +83,11 @@ function App() {
|
|
|
63
83
|
const userElementsSyncTimerRef = React.useRef(null);
|
|
64
84
|
const lastSyncedElementsHashRef = React.useRef("");
|
|
65
85
|
const listeningRef = React.useRef(false);
|
|
86
|
+
// Seed the textarea once from settings, then let the user own it locally so
|
|
87
|
+
// their keystrokes don't fight the WS settings broadcast we trigger on save.
|
|
88
|
+
const agentInstructionsSeededRef = React.useRef(false);
|
|
89
|
+
const agentInstructionsSaveTimerRef = React.useRef(null);
|
|
90
|
+
const agentInstructionsSavePromiseRef = React.useRef(Promise.resolve());
|
|
66
91
|
|
|
67
92
|
React.useEffect(() => { listeningRef.current = listening; }, [listening]);
|
|
68
93
|
const [isFullscreen, setIsFullscreen] = React.useState(false);
|
|
@@ -91,9 +116,34 @@ function App() {
|
|
|
91
116
|
clearTimeout(captionTimerRef.current);
|
|
92
117
|
clearTimeout(resetConfirmTimerRef.current);
|
|
93
118
|
clearTimeout(userElementsSyncTimerRef.current);
|
|
119
|
+
clearTimeout(agentInstructionsSaveTimerRef.current);
|
|
94
120
|
};
|
|
95
121
|
}, []);
|
|
96
122
|
|
|
123
|
+
React.useEffect(() => {
|
|
124
|
+
if (agentInstructionsSeededRef.current) return;
|
|
125
|
+
if (!settings || typeof settings.agentInstructions !== "string") return;
|
|
126
|
+
setAgentInstructionsValue(settings.agentInstructions);
|
|
127
|
+
agentInstructionsSeededRef.current = true;
|
|
128
|
+
}, [settings]);
|
|
129
|
+
|
|
130
|
+
function handleAgentInstructionsChange(value) {
|
|
131
|
+
setAgentInstructionsValue(value);
|
|
132
|
+
clearTimeout(agentInstructionsSaveTimerRef.current);
|
|
133
|
+
agentInstructionsSaveTimerRef.current = setTimeout(() => {
|
|
134
|
+
agentInstructionsSaveTimerRef.current = null;
|
|
135
|
+
agentInstructionsSavePromiseRef.current = saveSettings({ agentInstructions: value }).catch((err) => setError(err.message));
|
|
136
|
+
}, 600);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async function flushAgentInstructionsSave() {
|
|
140
|
+
clearTimeout(agentInstructionsSaveTimerRef.current);
|
|
141
|
+
agentInstructionsSaveTimerRef.current = null;
|
|
142
|
+
await agentInstructionsSavePromiseRef.current;
|
|
143
|
+
agentInstructionsSavePromiseRef.current = saveSettings({ agentInstructions });
|
|
144
|
+
await agentInstructionsSavePromiseRef.current;
|
|
145
|
+
}
|
|
146
|
+
|
|
97
147
|
function handleExcalidrawChange(elements) {
|
|
98
148
|
// Only push user edits to the server while in live mode. In staging the
|
|
99
149
|
// canvas is a client-side scratchpad; the server doesn't need to know.
|
|
@@ -322,6 +372,7 @@ function App() {
|
|
|
322
372
|
setError("");
|
|
323
373
|
setPresoStarting(true);
|
|
324
374
|
try {
|
|
375
|
+
await flushAgentInstructionsSave();
|
|
325
376
|
// Snapshot what the user has on the staging canvas right now.
|
|
326
377
|
const stagingNative = excalidrawAPI.getSceneElements().map((el) => ({ ...el }));
|
|
327
378
|
stagingSceneRef.current = stagingNative;
|
|
@@ -486,14 +537,15 @@ function App() {
|
|
|
486
537
|
const canvas = document.querySelector("canvas.excalidraw__canvas.static");
|
|
487
538
|
if (!canvas) return null;
|
|
488
539
|
const blob = await canvasToBlob(canvas);
|
|
489
|
-
|
|
540
|
+
const downscaled = await downscaleBlobByHalf(blob);
|
|
541
|
+
return await blobToDataUrl(downscaled);
|
|
490
542
|
}
|
|
491
543
|
|
|
492
544
|
async function captureStagingSceneAsImage(excalidrawAPI, elements) {
|
|
493
545
|
if (!Array.isArray(elements) || elements.length === 0) {
|
|
494
|
-
// Empty staging - no scene to render.
|
|
495
|
-
//
|
|
496
|
-
return
|
|
546
|
+
// Empty staging - no scene to render. Skip the image entirely; the
|
|
547
|
+
// server's primer already drops the image part when this is falsy.
|
|
548
|
+
return null;
|
|
497
549
|
}
|
|
498
550
|
try {
|
|
499
551
|
const appState = excalidrawAPI.getAppState();
|
|
@@ -504,7 +556,8 @@ function App() {
|
|
|
504
556
|
files,
|
|
505
557
|
mimeType: "image/png",
|
|
506
558
|
});
|
|
507
|
-
|
|
559
|
+
const downscaled = await downscaleBlobByHalf(blob);
|
|
560
|
+
return await blobToDataUrl(downscaled);
|
|
508
561
|
} catch (error) {
|
|
509
562
|
console.warn("Failed to export staging scene, falling back to viewport canvas:", error);
|
|
510
563
|
return captureCanvasDataUrl();
|
|
@@ -549,18 +602,6 @@ function App() {
|
|
|
549
602
|
React.createElement(
|
|
550
603
|
"aside",
|
|
551
604
|
{ className: "panel" },
|
|
552
|
-
isLive
|
|
553
|
-
? React.createElement(
|
|
554
|
-
"button",
|
|
555
|
-
{
|
|
556
|
-
className: "fullscreen-toggle",
|
|
557
|
-
onClick: toggleFullscreen,
|
|
558
|
-
title: isFullscreen ? "Exit fullscreen (Esc)" : "Fullscreen for screen sharing",
|
|
559
|
-
"aria-label": isFullscreen ? "Exit fullscreen" : "Enter fullscreen",
|
|
560
|
-
},
|
|
561
|
-
isFullscreen ? "⤓" : "⤢",
|
|
562
|
-
)
|
|
563
|
-
: null,
|
|
564
605
|
React.createElement(
|
|
565
606
|
"div",
|
|
566
607
|
{ className: "brand" },
|
|
@@ -620,7 +661,7 @@ function App() {
|
|
|
620
661
|
onClick: startPreso,
|
|
621
662
|
disabled: presoStarting,
|
|
622
663
|
},
|
|
623
|
-
presoStarting ? "Starting..." : "Start
|
|
664
|
+
presoStarting ? "Starting..." : "Start Preso →",
|
|
624
665
|
)
|
|
625
666
|
: null,
|
|
626
667
|
isLive
|
|
@@ -628,26 +669,40 @@ function App() {
|
|
|
628
669
|
"div",
|
|
629
670
|
{ className: "listen-controls" },
|
|
630
671
|
React.createElement(
|
|
631
|
-
"
|
|
632
|
-
{
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
: warmupState.state === "
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
? "
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
672
|
+
"div",
|
|
673
|
+
{ className: "listen-row" },
|
|
674
|
+
React.createElement(
|
|
675
|
+
"button",
|
|
676
|
+
{
|
|
677
|
+
className: `record-toggle ${listening ? "recording" : ""}`,
|
|
678
|
+
onClick: toggleListening,
|
|
679
|
+
disabled: starting || (warmupState.state === "running" && !listening),
|
|
680
|
+
title: warmupState.state === "running"
|
|
681
|
+
? "Waiting for prompt cache to warm up"
|
|
682
|
+
: warmupState.state === "exhausted"
|
|
683
|
+
? "Cache didn't fully prime; first turn may be slower"
|
|
684
|
+
: undefined,
|
|
685
|
+
},
|
|
686
|
+
React.createElement("span", { className: "record-icon" }, listening ? "■" : "●"),
|
|
687
|
+
" ",
|
|
688
|
+
listening
|
|
689
|
+
? "Stop"
|
|
690
|
+
: starting
|
|
691
|
+
? "Starting..."
|
|
692
|
+
: warmupState.state === "running"
|
|
693
|
+
? `Warming up... (${warmupState.attempt} / ${warmupState.maxAttempts})`
|
|
694
|
+
: "Start Talking",
|
|
695
|
+
),
|
|
696
|
+
React.createElement(
|
|
697
|
+
"button",
|
|
698
|
+
{
|
|
699
|
+
className: "fullscreen-toggle",
|
|
700
|
+
onClick: toggleFullscreen,
|
|
701
|
+
title: isFullscreen ? "Exit fullscreen (Esc)" : "Fullscreen for screen sharing",
|
|
702
|
+
"aria-label": isFullscreen ? "Exit fullscreen" : "Enter fullscreen",
|
|
703
|
+
},
|
|
704
|
+
fullscreenIcon(isFullscreen),
|
|
705
|
+
),
|
|
651
706
|
),
|
|
652
707
|
warmupState.state === "running" && !listening
|
|
653
708
|
? React.createElement(
|
|
@@ -657,7 +712,7 @@ function App() {
|
|
|
657
712
|
onClick: startAnyway,
|
|
658
713
|
title: "Skip warmup and start listening now. The first turn may be slower.",
|
|
659
714
|
},
|
|
660
|
-
"Start
|
|
715
|
+
"Start Anyway →",
|
|
661
716
|
)
|
|
662
717
|
: null,
|
|
663
718
|
warmupState.state === "exhausted" && !listening
|
|
@@ -681,7 +736,7 @@ function App() {
|
|
|
681
736
|
},
|
|
682
737
|
resetting ? "Resetting..." : resetConfirming
|
|
683
738
|
? "Click again to reset"
|
|
684
|
-
: mode === "staging" ? "Reset
|
|
739
|
+
: mode === "staging" ? "Reset Staging" : "Reset Session",
|
|
685
740
|
),
|
|
686
741
|
),
|
|
687
742
|
React.createElement(
|
|
@@ -734,6 +789,27 @@ function App() {
|
|
|
734
789
|
}) : null,
|
|
735
790
|
}),
|
|
736
791
|
),
|
|
792
|
+
mode === "staging"
|
|
793
|
+
? React.createElement(
|
|
794
|
+
"div",
|
|
795
|
+
{ className: "agent-instructions" },
|
|
796
|
+
React.createElement("label", { className: "agent-instructions-label", htmlFor: "agent-instructions-input" }, "Agent instructions"),
|
|
797
|
+
React.createElement("textarea", {
|
|
798
|
+
id: "agent-instructions-input",
|
|
799
|
+
className: "agent-instructions-input",
|
|
800
|
+
value: agentInstructions,
|
|
801
|
+
onChange: (e) => handleAgentInstructionsChange(e.target.value),
|
|
802
|
+
placeholder: "Optional. Tell the agent your preferences - e.g. 'Use a tight 4-color palette', 'Prefer drawings over text', 'Be funny'.",
|
|
803
|
+
rows: 4,
|
|
804
|
+
spellCheck: true,
|
|
805
|
+
}),
|
|
806
|
+
React.createElement(
|
|
807
|
+
"p",
|
|
808
|
+
{ className: "agent-instructions-hint" },
|
|
809
|
+
"Saved automatically. Takes effect on next Start Preso.",
|
|
810
|
+
),
|
|
811
|
+
)
|
|
812
|
+
: null,
|
|
737
813
|
error ? React.createElement("div", { className: "error" }, error) : null,
|
|
738
814
|
),
|
|
739
815
|
);
|
|
@@ -1229,4 +1305,27 @@ function canvasToBlob(canvas) {
|
|
|
1229
1305
|
});
|
|
1230
1306
|
}
|
|
1231
1307
|
|
|
1308
|
+
// Halve each dimension before sending to the agent. ~4x fewer pixels means
|
|
1309
|
+
// ~4x fewer image tokens and a smaller WS payload, while shapes and labels
|
|
1310
|
+
// stay legible enough for the model to do visual sanity checks.
|
|
1311
|
+
async function downscaleBlobByHalf(blob) {
|
|
1312
|
+
try {
|
|
1313
|
+
const bitmap = await createImageBitmap(blob);
|
|
1314
|
+
const w = Math.max(1, Math.floor(bitmap.width / 2));
|
|
1315
|
+
const h = Math.max(1, Math.floor(bitmap.height / 2));
|
|
1316
|
+
const canvas = document.createElement("canvas");
|
|
1317
|
+
canvas.width = w;
|
|
1318
|
+
canvas.height = h;
|
|
1319
|
+
const ctx = canvas.getContext("2d");
|
|
1320
|
+
ctx.imageSmoothingEnabled = true;
|
|
1321
|
+
ctx.imageSmoothingQuality = "high";
|
|
1322
|
+
ctx.drawImage(bitmap, 0, 0, w, h);
|
|
1323
|
+
bitmap.close?.();
|
|
1324
|
+
return await canvasToBlob(canvas);
|
|
1325
|
+
} catch (error) {
|
|
1326
|
+
console.warn("Image downscale failed, sending original:", error);
|
|
1327
|
+
return blob;
|
|
1328
|
+
}
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1232
1331
|
createRoot(document.getElementById("app")).render(React.createElement(App));
|
package/public/style.css
CHANGED
|
@@ -113,17 +113,63 @@ body {
|
|
|
113
113
|
line-height: 1.4;
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
+
.agent-instructions {
|
|
117
|
+
display: flex;
|
|
118
|
+
flex-direction: column;
|
|
119
|
+
gap: 6px;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
.agent-instructions-label {
|
|
123
|
+
font-size: 13px;
|
|
124
|
+
font-weight: 600;
|
|
125
|
+
color: #1e1e1e;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
.agent-instructions-input {
|
|
129
|
+
width: 100%;
|
|
130
|
+
resize: vertical;
|
|
131
|
+
min-height: 84px;
|
|
132
|
+
font-family: inherit;
|
|
133
|
+
font-size: 13px;
|
|
134
|
+
line-height: 1.45;
|
|
135
|
+
color: #1e1e1e;
|
|
136
|
+
background: #fffdf8;
|
|
137
|
+
border: 1px solid #dedbd2;
|
|
138
|
+
border-radius: 6px;
|
|
139
|
+
padding: 9px 10px;
|
|
140
|
+
outline: none;
|
|
141
|
+
transition: border-color 120ms ease, box-shadow 120ms ease;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
.agent-instructions-input:focus {
|
|
145
|
+
border-color: #1f6feb;
|
|
146
|
+
box-shadow: 0 0 0 3px rgba(31, 111, 235, 0.18);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
.agent-instructions-input::placeholder {
|
|
150
|
+
color: #a39b8e;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
.agent-instructions-hint {
|
|
154
|
+
margin: 0;
|
|
155
|
+
font-size: 12px;
|
|
156
|
+
color: #6d675e;
|
|
157
|
+
}
|
|
158
|
+
|
|
116
159
|
.controls {
|
|
117
160
|
display: flex;
|
|
118
161
|
flex-direction: column;
|
|
119
162
|
gap: 8px;
|
|
120
163
|
}
|
|
121
164
|
|
|
165
|
+
.controls button {
|
|
166
|
+
font-size: 15px;
|
|
167
|
+
padding: 13px 14px;
|
|
168
|
+
}
|
|
169
|
+
|
|
122
170
|
.start-preso {
|
|
123
171
|
background: #1f6feb;
|
|
124
172
|
border-color: #1f6feb;
|
|
125
|
-
font-size: 15px;
|
|
126
|
-
padding: 13px 14px;
|
|
127
173
|
letter-spacing: 0.01em;
|
|
128
174
|
}
|
|
129
175
|
|
|
@@ -144,19 +190,25 @@ body {
|
|
|
144
190
|
background: #f3f0e7;
|
|
145
191
|
}
|
|
146
192
|
|
|
193
|
+
.listen-row {
|
|
194
|
+
display: flex;
|
|
195
|
+
gap: 6px;
|
|
196
|
+
align-items: stretch;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
.listen-row .record-toggle {
|
|
200
|
+
flex: 1;
|
|
201
|
+
min-width: 0;
|
|
202
|
+
}
|
|
203
|
+
|
|
147
204
|
.fullscreen-toggle {
|
|
148
|
-
position: absolute;
|
|
149
|
-
top: 14px;
|
|
150
|
-
right: 14px;
|
|
151
|
-
z-index: 10;
|
|
152
205
|
background: transparent;
|
|
153
206
|
color: #6d675e;
|
|
154
|
-
border:
|
|
155
|
-
|
|
156
|
-
font-size: 16px;
|
|
207
|
+
border: 1px solid #dedbd2;
|
|
208
|
+
font-size: 22px;
|
|
157
209
|
line-height: 1;
|
|
158
|
-
|
|
159
|
-
|
|
210
|
+
flex: 0 0 auto;
|
|
211
|
+
aspect-ratio: 1;
|
|
160
212
|
}
|
|
161
213
|
|
|
162
214
|
.fullscreen-toggle:hover:not(:disabled) {
|
|
@@ -179,6 +231,7 @@ body {
|
|
|
179
231
|
.mode-toggle {
|
|
180
232
|
display: inline-flex;
|
|
181
233
|
align-items: stretch;
|
|
234
|
+
margin-left: auto;
|
|
182
235
|
padding: 2px;
|
|
183
236
|
gap: 0;
|
|
184
237
|
border-radius: 999px;
|
package/src/cli.js
CHANGED
|
@@ -54,8 +54,6 @@ async function main() {
|
|
|
54
54
|
});
|
|
55
55
|
|
|
56
56
|
console.log(`autopreso listening at ${url}`);
|
|
57
|
-
console.log(`whiteboard agent: ${agentProvider.provider} ${agentProvider.requestedModel ?? agentProvider.model}`);
|
|
58
|
-
console.log(`settings file: ${SETTINGS_PATH}`);
|
|
59
57
|
|
|
60
58
|
if (options.openBrowser) {
|
|
61
59
|
await open(url);
|
|
@@ -84,6 +82,8 @@ Environment:
|
|
|
84
82
|
CODEX_BASE_URL Seeds the Codex backend URL on first run
|
|
85
83
|
OLLAMA_MODEL Seeds the Ollama model on first run
|
|
86
84
|
OLLAMA_BASE_URL Seeds the Ollama base URL on first run
|
|
85
|
+
AUTOPRESO_CACHE_LOG Cache usage log path. Default: ~/.config/autopreso/logs/cache.log
|
|
86
|
+
AUTOPRESO_DEBUG_LOG Agent debug log path. Default: ~/.config/autopreso/logs/debug.log
|
|
87
87
|
|
|
88
88
|
Models and providers are configured in the UI after launch. Settings persist at:
|
|
89
89
|
${SETTINGS_PATH}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { WebSocket } from "ws";
|
|
2
2
|
|
|
3
|
+
import { buildTranscriptionVocabularyPrompt } from "./whiteboard-keywords.js";
|
|
4
|
+
|
|
3
5
|
const REALTIME_URL = "wss://api.openai.com/v1/realtime?intent=transcription";
|
|
4
6
|
|
|
5
7
|
export function createOpenAITranscription({
|
|
@@ -8,6 +10,7 @@ export function createOpenAITranscription({
|
|
|
8
10
|
options,
|
|
9
11
|
env = process.env,
|
|
10
12
|
createWebSocket = (url, protocols, init) => new WebSocket(url, protocols, init),
|
|
13
|
+
log = console,
|
|
11
14
|
}) {
|
|
12
15
|
let socket = null;
|
|
13
16
|
let readyPromise = null;
|
|
@@ -17,6 +20,7 @@ export function createOpenAITranscription({
|
|
|
17
20
|
let pendingAudio = [];
|
|
18
21
|
let partialText = "";
|
|
19
22
|
let bufferedSinceCommit = false;
|
|
23
|
+
let vocabularyPrompt = "";
|
|
20
24
|
|
|
21
25
|
function ensureSocket() {
|
|
22
26
|
if (socket) return socket;
|
|
@@ -39,6 +43,8 @@ export function createOpenAITranscription({
|
|
|
39
43
|
|
|
40
44
|
socket.on("open", () => {
|
|
41
45
|
configured = true;
|
|
46
|
+
const transcription = { model: options.openaiTranscriptionModel };
|
|
47
|
+
if (vocabularyPrompt) transcription.prompt = vocabularyPrompt;
|
|
42
48
|
socket.send(JSON.stringify({
|
|
43
49
|
type: "session.update",
|
|
44
50
|
session: {
|
|
@@ -46,7 +52,7 @@ export function createOpenAITranscription({
|
|
|
46
52
|
audio: {
|
|
47
53
|
input: {
|
|
48
54
|
format: { type: "audio/pcm", rate: 24000 },
|
|
49
|
-
transcription
|
|
55
|
+
transcription,
|
|
50
56
|
},
|
|
51
57
|
},
|
|
52
58
|
},
|
|
@@ -115,6 +121,36 @@ export function createOpenAITranscription({
|
|
|
115
121
|
connection.send(JSON.stringify({ type: "input_audio_buffer.append", audio }));
|
|
116
122
|
bufferedSinceCommit = true;
|
|
117
123
|
},
|
|
124
|
+
/** @param {{ keywords?: string[] | null }} [ctx] */
|
|
125
|
+
setSessionContext: (ctx) => {
|
|
126
|
+
const keywords = ctx?.keywords ?? [];
|
|
127
|
+
const prompt = buildTranscriptionVocabularyPrompt(keywords);
|
|
128
|
+
// Empty input + nothing to clear: bail. Empty input + a previously
|
|
129
|
+
// pushed prompt: fall through and emit a clearing session.update.
|
|
130
|
+
if (!prompt && !vocabularyPrompt) return;
|
|
131
|
+
if (prompt === vocabularyPrompt) return;
|
|
132
|
+
vocabularyPrompt = prompt;
|
|
133
|
+
if (prompt) {
|
|
134
|
+
log.debug?.(`[openai-transcription] vocabulary prompt set (${keywords.length} terms, ${prompt.length} chars)`);
|
|
135
|
+
} else {
|
|
136
|
+
log.debug?.(`[openai-transcription] vocabulary prompt cleared`);
|
|
137
|
+
}
|
|
138
|
+
if (!socket || !configured) return;
|
|
139
|
+
socket.send(JSON.stringify({
|
|
140
|
+
type: "session.update",
|
|
141
|
+
session: {
|
|
142
|
+
type: "transcription",
|
|
143
|
+
audio: {
|
|
144
|
+
input: {
|
|
145
|
+
transcription: {
|
|
146
|
+
model: options.openaiTranscriptionModel,
|
|
147
|
+
prompt: vocabularyPrompt,
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
},
|
|
152
|
+
}));
|
|
153
|
+
},
|
|
118
154
|
stop: () => {
|
|
119
155
|
if (!socket || !configured) return;
|
|
120
156
|
// If server-side VAD already auto-committed (or no audio was sent), skip the manual
|
package/src/server.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
|
-
import { appendFileSync } from "node:fs";
|
|
2
|
+
import { appendFileSync, mkdirSync } from "node:fs";
|
|
3
3
|
import { createServer as createHttpServer } from "node:http";
|
|
4
|
+
import os from "node:os";
|
|
4
5
|
import path from "node:path";
|
|
5
6
|
import { fileURLToPath } from "node:url";
|
|
6
7
|
|
|
@@ -16,8 +17,10 @@ import {
|
|
|
16
17
|
} from "./agent-provider.js";
|
|
17
18
|
import { createMoonshineTranscription as createDefaultMoonshineTranscription } from "./moonshine-transcription.js";
|
|
18
19
|
import { createOpenAITranscription as createDefaultOpenAITranscription } from "./openai-transcription.js";
|
|
20
|
+
import { validateAgentInstructions } from "./settings-store.js";
|
|
19
21
|
import { broadcast, createWhiteboardSession } from "./whiteboard-session.js";
|
|
20
22
|
import { detectMalformedLayoutWarnings, normalizeWhiteboardElements } from "./whiteboard-elements.js";
|
|
23
|
+
import { extractWhiteboardKeywords } from "./whiteboard-keywords.js";
|
|
21
24
|
import { applyWhiteboardEditOperations, formatLineNumberedWhiteboard } from "./whiteboard-tools.js";
|
|
22
25
|
|
|
23
26
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
@@ -66,17 +69,33 @@ export async function startServer(options) {
|
|
|
66
69
|
|
|
67
70
|
app.post("/api/session/reset", (_req, res) => {
|
|
68
71
|
state.reset();
|
|
72
|
+
transcription.setSessionContext({ keywords: [] });
|
|
69
73
|
broadcast(wss, { type: "whiteboard:update", elements: state.elements });
|
|
70
74
|
res.json({ ok: true });
|
|
71
75
|
});
|
|
72
76
|
|
|
73
|
-
app.post("/api/preso/start", (req, res) => {
|
|
77
|
+
app.post("/api/preso/start", async (req, res) => {
|
|
74
78
|
const { stagingElements, stagingScreenshot } = req.body ?? {};
|
|
75
79
|
if (!Array.isArray(stagingElements)) {
|
|
76
80
|
return res.status(400).json({ error: "stagingElements (array) is required." });
|
|
77
81
|
}
|
|
82
|
+
// Snapshot the user's free-form Agent instructions at start so the cached
|
|
83
|
+
// system-prompt prefix stays stable for the whole preso. Edits made to
|
|
84
|
+
// the textarea after Start Preso land on disk but only take effect on the
|
|
85
|
+
// next Start Preso.
|
|
86
|
+
let settings;
|
|
87
|
+
try {
|
|
88
|
+
settings = options.settingsStore ? await options.settingsStore.load() : null;
|
|
89
|
+
validateAgentInstructions(settings?.agentInstructions);
|
|
90
|
+
} catch (error) {
|
|
91
|
+
return res.status(400).json({ error: error.message });
|
|
92
|
+
}
|
|
93
|
+
const agentInstructions = typeof settings?.agentInstructions === "string" ? settings.agentInstructions : "";
|
|
78
94
|
const primerMessage = buildStagingPrimerMessage({ stagingElements, stagingScreenshot });
|
|
79
|
-
|
|
95
|
+
const keywords = extractWhiteboardKeywords(stagingElements);
|
|
96
|
+
console.log(`[autopreso] preso/start: ${keywords.length} staging keyword(s) for transcription bias`);
|
|
97
|
+
transcription.setSessionContext({ keywords });
|
|
98
|
+
state.startPreso({ primerMessage, agentInstructions });
|
|
80
99
|
state.startWarmupLoop({
|
|
81
100
|
runOnce: ({ attempt }) =>
|
|
82
101
|
runWhiteboardWarmupOnce({
|
|
@@ -109,6 +128,7 @@ export async function startServer(options) {
|
|
|
109
128
|
|
|
110
129
|
app.post("/api/preso/back-to-staging", (_req, res) => {
|
|
111
130
|
state.backToStaging();
|
|
131
|
+
transcription.setSessionContext({ keywords: [] });
|
|
112
132
|
broadcast(wss, { type: "mode", mode: state.mode });
|
|
113
133
|
res.json({ ok: true });
|
|
114
134
|
});
|
|
@@ -189,7 +209,7 @@ export async function startServer(options) {
|
|
|
189
209
|
});
|
|
190
210
|
});
|
|
191
211
|
|
|
192
|
-
await new Promise((resolve) => httpServer.listen(options.port, options.host, resolve));
|
|
212
|
+
await new Promise((resolve) => httpServer.listen(options.port, options.host, () => resolve(undefined)));
|
|
193
213
|
const address = httpServer.address();
|
|
194
214
|
const port = typeof address === "object" && address ? address.port : options.port;
|
|
195
215
|
return {
|
|
@@ -203,6 +223,8 @@ export async function startServer(options) {
|
|
|
203
223
|
async function createTranscriptionManager({ options, wss, queueTranscript }) {
|
|
204
224
|
let current = null;
|
|
205
225
|
let label = "";
|
|
226
|
+
let sessionContext = null;
|
|
227
|
+
let hasSessionContext = false;
|
|
206
228
|
|
|
207
229
|
const sendTranscript = (message) => broadcast(wss, message);
|
|
208
230
|
|
|
@@ -243,15 +265,16 @@ async function createTranscriptionManager({ options, wss, queueTranscript }) {
|
|
|
243
265
|
const factoryOptions = buildOptionsForFactory(settings);
|
|
244
266
|
const factory = pickFactory(settings);
|
|
245
267
|
label = newLabel;
|
|
246
|
-
options.onStatus?.(`
|
|
268
|
+
options.onStatus?.(`Preparing ${label} transcription model...`);
|
|
247
269
|
current = factory({
|
|
248
270
|
sendTranscript,
|
|
249
271
|
queueTranscript,
|
|
250
272
|
options: factoryOptions,
|
|
251
273
|
env: factoryOptions.env,
|
|
252
274
|
});
|
|
275
|
+
if (hasSessionContext) current.setSessionContext?.(sessionContext);
|
|
253
276
|
await current.ready();
|
|
254
|
-
options.onStatus?.(`${label} transcription model
|
|
277
|
+
options.onStatus?.(`${label} transcription model ready.`);
|
|
255
278
|
}
|
|
256
279
|
|
|
257
280
|
await applyCurrent();
|
|
@@ -260,6 +283,11 @@ async function createTranscriptionManager({ options, wss, queueTranscript }) {
|
|
|
260
283
|
sendAudio: (audio) => current?.sendAudio(audio),
|
|
261
284
|
stop: () => current?.stop(),
|
|
262
285
|
close: () => current?.close(),
|
|
286
|
+
setSessionContext: (ctx) => {
|
|
287
|
+
sessionContext = ctx;
|
|
288
|
+
hasSessionContext = true;
|
|
289
|
+
current?.setSessionContext?.(ctx);
|
|
290
|
+
},
|
|
263
291
|
getLabel: () => label,
|
|
264
292
|
applyCurrent,
|
|
265
293
|
};
|
|
@@ -306,7 +334,7 @@ export async function runWhiteboardAgent({ transcript, state, wss, options, gene
|
|
|
306
334
|
// are text-only across these APIs. This keeps the staging context as a
|
|
307
335
|
// first-class system instruction rather than a stale early user message.
|
|
308
336
|
const primerText = extractPrimerText(state.agentHistory?.[0]);
|
|
309
|
-
const effectiveSystem = buildEffectiveSystemPrompt(baseSystem, primerText);
|
|
337
|
+
const effectiveSystem = buildEffectiveSystemPrompt(baseSystem, primerText, state.agentInstructions);
|
|
310
338
|
const messages = primerText ? reshapeMessagesForCodex(rawMessages) : rawMessages;
|
|
311
339
|
options.onAgentEvent?.({ type: "model:start", transcript, system: effectiveSystem, messages, timestamp: new Date().toISOString() });
|
|
312
340
|
const codexInstructions = agentProvider.provider === "codex" ? effectiveSystem : null;
|
|
@@ -475,7 +503,7 @@ export async function runWhiteboardWarmupOnce({ state, options, attempt = 1, gen
|
|
|
475
503
|
? resolveAgentProviderFromSettings({ settings: await options.settingsStore.load(), env: options.env ?? process.env })
|
|
476
504
|
: defaultWhiteboardAgentProvider(options));
|
|
477
505
|
const primerText = extractPrimerText(state.agentHistory[0]);
|
|
478
|
-
const effectiveSystem = buildEffectiveSystemPrompt(baseSystem, primerText);
|
|
506
|
+
const effectiveSystem = buildEffectiveSystemPrompt(baseSystem, primerText, state.agentInstructions);
|
|
479
507
|
|
|
480
508
|
// Each warmup attempt sends the IDENTICAL prefix [primer, WARMUP_USER_MESSAGE]
|
|
481
509
|
// so attempt N hits the cache that attempt N-1 wrote. We must NOT mutate
|
|
@@ -566,8 +594,22 @@ function summarizeAgentResult(result) {
|
|
|
566
594
|
);
|
|
567
595
|
}
|
|
568
596
|
|
|
569
|
-
const
|
|
570
|
-
const
|
|
597
|
+
const DEFAULT_LOG_DIR = path.join(os.homedir(), ".config", "autopreso", "logs");
|
|
598
|
+
const CACHE_USAGE_LOG_PATH = process.env.AUTOPRESO_CACHE_LOG ?? path.join(DEFAULT_LOG_DIR, "cache.log");
|
|
599
|
+
const DEBUG_LOG_PATH = process.env.AUTOPRESO_DEBUG_LOG ?? path.join(DEFAULT_LOG_DIR, "debug.log");
|
|
600
|
+
|
|
601
|
+
let logDirsEnsured = false;
|
|
602
|
+
function ensureLogDirs() {
|
|
603
|
+
if (logDirsEnsured) return;
|
|
604
|
+
for (const file of [CACHE_USAGE_LOG_PATH, DEBUG_LOG_PATH]) {
|
|
605
|
+
try {
|
|
606
|
+
mkdirSync(path.dirname(file), { recursive: true });
|
|
607
|
+
} catch {
|
|
608
|
+
// Best effort; the appendFileSync call below will surface a real failure.
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
logDirsEnsured = true;
|
|
612
|
+
}
|
|
571
613
|
|
|
572
614
|
function summarizeMessageForDump(message) {
|
|
573
615
|
if (typeof message?.content === "string") {
|
|
@@ -593,7 +635,9 @@ function summarizeMessageForDump(message) {
|
|
|
593
635
|
return { role: message?.role, content: message?.content };
|
|
594
636
|
}
|
|
595
637
|
|
|
596
|
-
export function dumpAgentRequest(label,
|
|
638
|
+
export function dumpAgentRequest(label, args) {
|
|
639
|
+
const { system, messages, instructions, primerText } = args ?? {};
|
|
640
|
+
ensureLogDirs();
|
|
597
641
|
try {
|
|
598
642
|
const record = {
|
|
599
643
|
ts: new Date().toISOString(),
|
|
@@ -617,6 +661,7 @@ export function dumpAgentRequest(label, { system, messages, instructions, primer
|
|
|
617
661
|
}
|
|
618
662
|
|
|
619
663
|
export function dumpToolCall(toolName, input, sceneIds, result) {
|
|
664
|
+
ensureLogDirs();
|
|
620
665
|
try {
|
|
621
666
|
const record = {
|
|
622
667
|
ts: new Date().toISOString(),
|
|
@@ -681,11 +726,7 @@ function toolDefinitionFingerprintInput(tools) {
|
|
|
681
726
|
export function logAgentUsage(label, result, extras = {}) {
|
|
682
727
|
const { input, cached, output, reasoning } = extractAgentUsage(result);
|
|
683
728
|
const cachePct = input > 0 ? Math.round((cached / input) * 100) : 0;
|
|
684
|
-
|
|
685
|
-
? ` system=${extras.fingerprints.system} primer=${extras.fingerprints.primer} tools=${extras.fingerprints.tools}`
|
|
686
|
-
: "";
|
|
687
|
-
const line = `[cache] ${label.padEnd(7)} input=${input} cached=${cached} (${cachePct}%) output=${output}${reasoning ? ` reasoning=${reasoning}` : ""}${fingerprintsSuffix}`;
|
|
688
|
-
console.log(line);
|
|
729
|
+
ensureLogDirs();
|
|
689
730
|
try {
|
|
690
731
|
const record = {
|
|
691
732
|
ts: new Date().toISOString(),
|
|
@@ -695,6 +736,7 @@ export function logAgentUsage(label, result, extras = {}) {
|
|
|
695
736
|
cachePct,
|
|
696
737
|
output,
|
|
697
738
|
reasoning,
|
|
739
|
+
rawUsage: result?.usage ?? null,
|
|
698
740
|
...extras,
|
|
699
741
|
};
|
|
700
742
|
appendFileSync(CACHE_USAGE_LOG_PATH, JSON.stringify(record) + "\n");
|
|
@@ -719,9 +761,16 @@ function createWhiteboardAgentProviderOptions(agentProvider, effectiveSystem) {
|
|
|
719
761
|
};
|
|
720
762
|
}
|
|
721
763
|
|
|
722
|
-
export function buildEffectiveSystemPrompt(systemPrompt, primerText) {
|
|
723
|
-
|
|
724
|
-
|
|
764
|
+
export function buildEffectiveSystemPrompt(systemPrompt, primerText, userInstructions = "") {
|
|
765
|
+
let result = systemPrompt;
|
|
766
|
+
const trimmedUserInstructions = typeof userInstructions === "string" ? userInstructions.trim() : "";
|
|
767
|
+
if (trimmedUserInstructions) {
|
|
768
|
+
result = `${result}\n\nUser instructions:\n${trimmedUserInstructions}`;
|
|
769
|
+
}
|
|
770
|
+
if (primerText) {
|
|
771
|
+
result = `${result}\n\n${primerText}`;
|
|
772
|
+
}
|
|
773
|
+
return result;
|
|
725
774
|
}
|
|
726
775
|
|
|
727
776
|
export function extractPrimerText(primerMessage) {
|
|
@@ -762,7 +811,7 @@ function withTimeout(promise, timeoutMs, message) {
|
|
|
762
811
|
return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timeout));
|
|
763
812
|
}
|
|
764
813
|
|
|
765
|
-
export function buildWhiteboardAgentMessages({ agentHistory, elements, latestScreenshot, transcript }) {
|
|
814
|
+
export function buildWhiteboardAgentMessages({ agentHistory, elements, latestScreenshot = null, transcript }) {
|
|
766
815
|
return [
|
|
767
816
|
...agentHistory,
|
|
768
817
|
{ role: "user", content: formatSpeakerTurn(transcript) },
|
|
@@ -872,6 +921,7 @@ CRITICAL: one tool call per turn.
|
|
|
872
921
|
- If you only need to move the viewport (no edits), pass just viewport. If you only need to edit (no viewport change), pass just operations. If you need both, pass both.
|
|
873
922
|
|
|
874
923
|
You receive a screenshot of the audience's CURRENT VIEWPORT (not the entire infinite canvas) on each turn. Use it to verify your edits actually rendered well: look for clipped labels, overlapping shapes, arrows that miss their targets, and check that the right region is visible. The line-numbered text content is authoritative for positions; the screenshot is for visual sanity checking.
|
|
924
|
+
Attached images (both the staging primer and the per-turn viewport screenshot) are downscaled 2x in each dimension (4x fewer pixels) to save tokens. Do NOT read pixel dimensions off the image as if they were the canvas's real size; trust the line-numbered text for coordinates and only use the image for visual sanity checks.
|
|
875
925
|
The audience's viewport is whatever you last set it to. They cannot see anything outside it. So:
|
|
876
926
|
- After every meaningful canvas update, pass viewport with action "scroll_to_content" AND a focus_ids list naming the 1-5 elements that represent the active talking point. The viewport will center on exactly those IDs. Pass the IDs of what the speaker is talking about RIGHT NOW, not the whole diagram.
|
|
877
927
|
- When the speaker shifts topic to a different region of the canvas, send a new whiteboard_apply with viewport scroll_to_content and the new region's focus_ids.
|
package/src/settings-store.js
CHANGED
|
@@ -3,6 +3,8 @@ import path from "node:path";
|
|
|
3
3
|
|
|
4
4
|
import { readCodexCliAuthSync } from "./codex-auth.js";
|
|
5
5
|
|
|
6
|
+
export const MAX_AGENT_INSTRUCTIONS_CHARS = 100_000;
|
|
7
|
+
|
|
6
8
|
export const DEFAULT_SETTINGS = Object.freeze({
|
|
7
9
|
agent: {
|
|
8
10
|
provider: "openai",
|
|
@@ -18,6 +20,7 @@ export const DEFAULT_SETTINGS = Object.freeze({
|
|
|
18
20
|
apiKeys: {
|
|
19
21
|
openai: "",
|
|
20
22
|
},
|
|
23
|
+
agentInstructions: "",
|
|
21
24
|
});
|
|
22
25
|
|
|
23
26
|
export function createSettingsStore({ filePath, env = process.env, readCodexAuth = readCodexCliAuthSync }) {
|
|
@@ -56,6 +59,7 @@ export function createSettingsStore({ filePath, env = process.env, readCodexAuth
|
|
|
56
59
|
|
|
57
60
|
async function save(partial) {
|
|
58
61
|
if (!cached) await load();
|
|
62
|
+
validateAgentInstructions(partial?.agentInstructions);
|
|
59
63
|
cached = deepMerge(cached, partial);
|
|
60
64
|
await writeToDisk(cached);
|
|
61
65
|
return cached;
|
|
@@ -135,3 +139,9 @@ function trimOrEmpty(value) {
|
|
|
135
139
|
if (typeof value !== "string") return "";
|
|
136
140
|
return value.trim();
|
|
137
141
|
}
|
|
142
|
+
|
|
143
|
+
export function validateAgentInstructions(value) {
|
|
144
|
+
if (typeof value === "string" && value.length > MAX_AGENT_INSTRUCTIONS_CHARS) {
|
|
145
|
+
throw new Error(`Agent instructions must be ${MAX_AGENT_INSTRUCTIONS_CHARS} characters or fewer.`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export function createTranscriptTurnQueue({ runTurn, debounceMs = 150, isReady = () => true }) {
|
|
1
|
+
export function createTranscriptTurnQueue({ runTurn, debounceMs = 150, isReady = (_text) => true }) {
|
|
2
2
|
let running = false;
|
|
3
3
|
let buffered = [];
|
|
4
4
|
let current = Promise.resolve();
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
const MIN_TERM_LENGTH = 3;
|
|
2
|
+
const DEFAULT_MAX_PROMPT_CHARS = 500;
|
|
3
|
+
const PROMPT_PREFIX = "Domain vocabulary that may appear: ";
|
|
4
|
+
const PROMPT_SUFFIX = ".";
|
|
5
|
+
|
|
6
|
+
export function extractWhiteboardKeywords(elements) {
|
|
7
|
+
if (!Array.isArray(elements)) return [];
|
|
8
|
+
const seen = new Map();
|
|
9
|
+
|
|
10
|
+
for (const element of elements) {
|
|
11
|
+
if (!element || typeof element !== "object") continue;
|
|
12
|
+
const sources = [];
|
|
13
|
+
if (element.type === "text" && typeof element.text === "string") {
|
|
14
|
+
sources.push(element.text);
|
|
15
|
+
}
|
|
16
|
+
if (element.label && typeof element.label.text === "string") {
|
|
17
|
+
sources.push(element.label.text);
|
|
18
|
+
}
|
|
19
|
+
for (const source of sources) {
|
|
20
|
+
for (const line of source.split(/\r?\n/)) {
|
|
21
|
+
const term = line.trim();
|
|
22
|
+
if (term.length < MIN_TERM_LENGTH) continue;
|
|
23
|
+
if (!/[a-zA-Z]/.test(term)) continue;
|
|
24
|
+
const key = term.toLowerCase();
|
|
25
|
+
if (!seen.has(key)) seen.set(key, term);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return [...seen.values()].sort((a, b) => b.length - a.length);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function buildTranscriptionVocabularyPrompt(keywords, { maxChars = DEFAULT_MAX_PROMPT_CHARS } = {}) {
|
|
34
|
+
if (!Array.isArray(keywords) || keywords.length === 0) return "";
|
|
35
|
+
let body = "";
|
|
36
|
+
for (const term of keywords) {
|
|
37
|
+
const next = body.length === 0 ? term : `${body}, ${term}`;
|
|
38
|
+
if (PROMPT_PREFIX.length + next.length + PROMPT_SUFFIX.length > maxChars) continue;
|
|
39
|
+
body = next;
|
|
40
|
+
}
|
|
41
|
+
if (!body) return "";
|
|
42
|
+
return `${PROMPT_PREFIX}${body}${PROMPT_SUFFIX}`;
|
|
43
|
+
}
|
|
@@ -40,6 +40,11 @@ export function createWhiteboardSession({ options, wss, runAgent }) {
|
|
|
40
40
|
agentBusy: false,
|
|
41
41
|
warmupBusy: false,
|
|
42
42
|
latestScreenshot: undefined,
|
|
43
|
+
// Snapshot of the user's free-form "Agent instructions" textarea taken at
|
|
44
|
+
// /api/preso/start. Frozen for the duration of the preso so the cached
|
|
45
|
+
// system-prompt prefix the warmup loop primes stays stable; mid-preso edits
|
|
46
|
+
// to the textarea only take effect on the next Start Preso.
|
|
47
|
+
agentInstructions: "",
|
|
43
48
|
warmupPromise: Promise.resolve(),
|
|
44
49
|
// Snapshot of the warmup loop state, also broadcast to clients via WS.
|
|
45
50
|
warmupState: { state: "idle", attempt: 0, maxAttempts: DEFAULT_WARMUP_MAX_ATTEMPTS },
|
|
@@ -102,11 +107,12 @@ export function createWhiteboardSession({ options, wss, runAgent }) {
|
|
|
102
107
|
state.agentHistory = [];
|
|
103
108
|
state.latestScreenshot = undefined;
|
|
104
109
|
};
|
|
105
|
-
state.startPreso = ({ primerMessage }) => {
|
|
110
|
+
state.startPreso = ({ primerMessage, agentInstructions = "" }) => {
|
|
106
111
|
state.mode = "live";
|
|
107
112
|
state.elements = seedElements();
|
|
108
113
|
state.latestScreenshot = undefined;
|
|
109
114
|
state.agentHistory = [primerMessage];
|
|
115
|
+
state.agentInstructions = typeof agentInstructions === "string" ? agentInstructions : "";
|
|
110
116
|
state.warmupPromise = Promise.resolve();
|
|
111
117
|
state.canvasDirtyForAgent = false;
|
|
112
118
|
// Reset warmup state for this preso. The startWarmupLoop call that follows
|