@ishlabs/cli 0.24.1 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/ask.js +3 -3
- package/dist/commands/iteration.js +1 -1
- package/dist/commands/study-analyze.js +1 -1
- package/dist/commands/study-run.js +80 -12
- package/dist/commands/study.js +11 -7
- package/dist/lib/alias-store.js +1 -1
- package/dist/lib/api-client.d.ts +2 -0
- package/dist/lib/docs.js +57 -42
- package/dist/lib/local-sim/actions.d.ts +10 -2
- package/dist/lib/local-sim/actions.js +16 -11
- package/dist/lib/local-sim/adb.d.ts +103 -0
- package/dist/lib/local-sim/adb.js +352 -0
- package/dist/lib/local-sim/android.d.ts +111 -0
- package/dist/lib/local-sim/android.js +499 -0
- package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
- package/dist/lib/local-sim/apk-manifest.js +210 -0
- package/dist/lib/local-sim/browser.d.ts +22 -0
- package/dist/lib/local-sim/browser.js +65 -0
- package/dist/lib/local-sim/coordinates.d.ts +69 -0
- package/dist/lib/local-sim/coordinates.js +59 -0
- package/dist/lib/local-sim/device.d.ts +143 -0
- package/dist/lib/local-sim/device.js +152 -0
- package/dist/lib/local-sim/ios.d.ts +168 -0
- package/dist/lib/local-sim/ios.js +546 -0
- package/dist/lib/local-sim/loop.d.ts +14 -2
- package/dist/lib/local-sim/loop.js +166 -73
- package/dist/lib/local-sim/native-a11y.d.ts +97 -0
- package/dist/lib/local-sim/native-a11y.js +384 -0
- package/dist/lib/local-sim/simctl.d.ts +85 -0
- package/dist/lib/local-sim/simctl.js +273 -0
- package/dist/lib/local-sim/types.d.ts +37 -2
- package/dist/lib/local-sim/upload.d.ts +1 -1
- package/dist/lib/local-sim/upload.js +9 -6
- package/dist/lib/output.js +58 -12
- package/dist/lib/skill-content.js +10 -9
- package/package.json +2 -1
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin async wrappers over `xcrun simctl` + `idb` for the native-iOS sim path.
|
|
3
|
+
*
|
|
4
|
+
* Two tools, two jobs:
|
|
5
|
+
* - `xcrun simctl` drives the simulator LIFECYCLE (boot detection, install,
|
|
6
|
+
* terminate, launch) and the SCREENSHOT.
|
|
7
|
+
* - `idb` drives UI INPUT (tap/swipe/text/key) and reports the screen
|
|
8
|
+
* geometry (pixels, points, and the scale between them).
|
|
9
|
+
*
|
|
10
|
+
* COORDINATE SPACES (the key difference from Android, where screencap and tap
|
|
11
|
+
* share one pixel space):
|
|
12
|
+
* - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
|
|
13
|
+
* - `idb ui tap/swipe` take POINTS (e.g. 393x852) — pixels / scale.
|
|
14
|
+
* The native sim TAPS in points (de-normalize 0-1000 against the POINT size)
|
|
15
|
+
* but RECORDS in PIXELS: dimensions() returns the pixel size so the loop's
|
|
16
|
+
* round-trip is exact. Recording in points would drift — the point grid (393)
|
|
17
|
+
* is coarser than the 0-1000 normalized grid, so it double-rounds. See
|
|
18
|
+
* IOSDevice for the full derivation.
|
|
19
|
+
*/
|
|
20
|
+
import { execFile } from "node:child_process";
|
|
21
|
+
import { existsSync } from "node:fs";
|
|
22
|
+
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
|
23
|
+
import { tmpdir } from "node:os";
|
|
24
|
+
import { join } from "node:path";
|
|
25
|
+
import { promisify } from "node:util";
|
|
26
|
+
const execFileAsync = promisify(execFile);
|
|
27
|
+
// idb installs to ~/.local/bin via pip; resolve an explicit path so we don't
|
|
28
|
+
// depend on the caller's PATH. Override with ISH_IDB.
|
|
29
|
+
function resolveIdb() {
|
|
30
|
+
const fromEnv = process.env.ISH_IDB;
|
|
31
|
+
if (fromEnv && existsSync(fromEnv))
|
|
32
|
+
return fromEnv;
|
|
33
|
+
const local = `${process.env.HOME ?? ""}/.local/bin/idb`;
|
|
34
|
+
if (existsSync(local))
|
|
35
|
+
return local;
|
|
36
|
+
const homebrew = "/opt/homebrew/bin/idb";
|
|
37
|
+
if (existsSync(homebrew))
|
|
38
|
+
return homebrew;
|
|
39
|
+
return "idb";
|
|
40
|
+
}
|
|
41
|
+
const XCRUN = "/usr/bin/xcrun";
|
|
42
|
+
const IDB = resolveIdb();
|
|
43
|
+
const PLUTIL = "/usr/bin/plutil";
|
|
44
|
+
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
45
|
+
const SCREENSHOT_TIMEOUT_MS = 30_000;
|
|
46
|
+
export class IosError extends Error {
|
|
47
|
+
constructor(message) {
|
|
48
|
+
super(message);
|
|
49
|
+
this.name = "IosError";
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/** Run `xcrun simctl <args>` and return trimmed stdout. */
|
|
53
|
+
export async function simctl(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
54
|
+
try {
|
|
55
|
+
const { stdout } = await execFileAsync(XCRUN, ["simctl", ...args], {
|
|
56
|
+
timeout: timeoutMs,
|
|
57
|
+
maxBuffer: 4 * 1024 * 1024,
|
|
58
|
+
});
|
|
59
|
+
return stdout.trim();
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
63
|
+
throw new IosError(`xcrun simctl ${args.join(" ")} failed: ${msg}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/** Run `idb <args>` and return trimmed stdout. */
|
|
67
|
+
export async function idb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
68
|
+
try {
|
|
69
|
+
const { stdout } = await execFileAsync(IDB, args, {
|
|
70
|
+
timeout: timeoutMs,
|
|
71
|
+
maxBuffer: 8 * 1024 * 1024,
|
|
72
|
+
});
|
|
73
|
+
return stdout.trim();
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
77
|
+
throw new IosError(`idb ${args.join(" ")} failed: ${msg}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// --- Device state ---
|
|
81
|
+
/**
|
|
82
|
+
* Assert exactly one simulator is Booted and return its udid. We pin every
|
|
83
|
+
* subsequent idb/simctl call (and the screenshot) to "booted", so multiple
|
|
84
|
+
* booted simulators are ambiguous and rejected.
|
|
85
|
+
*/
|
|
86
|
+
export async function requireOneBootedSimulator() {
|
|
87
|
+
let out;
|
|
88
|
+
try {
|
|
89
|
+
out = await simctl(["list", "devices", "booted", "-j"]);
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
93
|
+
throw new IosError(`Could not run xcrun simctl. Is Xcode installed and a simulator booted? ${msg}`);
|
|
94
|
+
}
|
|
95
|
+
let booted = [];
|
|
96
|
+
try {
|
|
97
|
+
const parsed = JSON.parse(out);
|
|
98
|
+
booted = Object.values(parsed.devices)
|
|
99
|
+
.flat()
|
|
100
|
+
.filter((d) => d.state === "Booted")
|
|
101
|
+
.map((d) => ({ udid: d.udid, name: d.name }));
|
|
102
|
+
}
|
|
103
|
+
catch {
|
|
104
|
+
throw new IosError("Could not parse `simctl list devices booted -j` output.");
|
|
105
|
+
}
|
|
106
|
+
if (booted.length === 0) {
|
|
107
|
+
throw new IosError("No iOS simulator booted. Boot one first (e.g. `xcrun simctl boot <udid>` or open Simulator.app).");
|
|
108
|
+
}
|
|
109
|
+
if (booted.length > 1) {
|
|
110
|
+
throw new IosError(`Expected exactly one booted simulator, found ${booted.length} (${booted.map((d) => d.name).join(", ")}). ` +
|
|
111
|
+
"Shut down the extras (the sim drives a single device).");
|
|
112
|
+
}
|
|
113
|
+
return booted[0].udid;
|
|
114
|
+
}
|
|
115
|
+
export async function describeScreen(udid) {
|
|
116
|
+
const out = await idb(["describe", "--json", "--udid", udid]);
|
|
117
|
+
let dims;
|
|
118
|
+
try {
|
|
119
|
+
const parsed = JSON.parse(out);
|
|
120
|
+
dims = parsed.screen_dimensions;
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
throw new IosError("Could not parse `idb describe --json` output.");
|
|
124
|
+
}
|
|
125
|
+
if (!dims || !dims.width_points || !dims.height_points || !dims.width || !dims.height) {
|
|
126
|
+
throw new IosError(`idb describe returned no usable screen_dimensions: ${out.slice(0, 200)}`);
|
|
127
|
+
}
|
|
128
|
+
return {
|
|
129
|
+
pixelWidth: dims.width,
|
|
130
|
+
pixelHeight: dims.height,
|
|
131
|
+
pointWidth: dims.width_points,
|
|
132
|
+
pointHeight: dims.height_points,
|
|
133
|
+
density: dims.density ?? dims.width / dims.width_points,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
// --- Screenshot (PIXELS) ---
|
|
137
|
+
/**
|
|
138
|
+
* Capture the booted simulator's screen as PNG bytes via
|
|
139
|
+
* `simctl io booted screenshot`. simctl writes to a file path (no reliable
|
|
140
|
+
* stdout in current Xcode), so we round-trip through a temp file.
|
|
141
|
+
*/
|
|
142
|
+
export async function screenshotPng() {
|
|
143
|
+
const dir = await mkdtemp(join(tmpdir(), "ish-ios-shot-"));
|
|
144
|
+
const path = join(dir, "shot.png");
|
|
145
|
+
try {
|
|
146
|
+
await simctl(["io", "booted", "screenshot", path], SCREENSHOT_TIMEOUT_MS);
|
|
147
|
+
return await readFile(path);
|
|
148
|
+
}
|
|
149
|
+
finally {
|
|
150
|
+
await rm(dir, { recursive: true, force: true }).catch(() => { });
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
// --- UI input via idb (POINTS) ---
|
|
154
|
+
export async function uiTap(udid, x, y) {
|
|
155
|
+
await idb(["ui", "tap", "--udid", udid, String(Math.round(x)), String(Math.round(y))]);
|
|
156
|
+
}
|
|
157
|
+
export async function uiLongPress(udid, x, y, durationMs = 600) {
|
|
158
|
+
// idb takes the press duration in SECONDS.
|
|
159
|
+
await idb([
|
|
160
|
+
"ui", "tap", "--udid", udid,
|
|
161
|
+
"--duration", (durationMs / 1000).toFixed(2),
|
|
162
|
+
String(Math.round(x)), String(Math.round(y)),
|
|
163
|
+
]);
|
|
164
|
+
}
|
|
165
|
+
export async function uiSwipe(udid, x1, y1, x2, y2, durationMs = 300) {
|
|
166
|
+
await idb([
|
|
167
|
+
"ui", "swipe", "--udid", udid,
|
|
168
|
+
"--duration", (durationMs / 1000).toFixed(2),
|
|
169
|
+
String(Math.round(x1)), String(Math.round(y1)),
|
|
170
|
+
String(Math.round(x2)), String(Math.round(y2)),
|
|
171
|
+
]);
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Type text into the focused field. Unlike Android's `adb shell input text`,
|
|
175
|
+
* `idb ui text` handles spaces/unicode/quotes correctly, so no helper IME is
|
|
176
|
+
* needed.
|
|
177
|
+
*/
|
|
178
|
+
export async function uiText(udid, text) {
|
|
179
|
+
await idb(["ui", "text", "--udid", udid, text]);
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Press a hardware key by HID usage code. `idb ui key 40` is Return/Enter
|
|
183
|
+
* (used to submit a text field).
|
|
184
|
+
*/
|
|
185
|
+
export async function uiKey(udid, keycode) {
|
|
186
|
+
await idb(["ui", "key", "--udid", udid, String(keycode)]);
|
|
187
|
+
}
|
|
188
|
+
/** HID usage code for Return/Enter. */
|
|
189
|
+
export const HID_KEY_RETURN = 40;
|
|
190
|
+
// --- Accessibility tree (idb describe-all) ---
|
|
191
|
+
/**
|
|
192
|
+
* Capture the current accessibility tree as `idb ui describe-all` JSON (a flat
|
|
193
|
+
* array of elements, each with a POINT frame) and return it. Mirrors the
|
|
194
|
+
* oracle's `ios_describe`: right after a tap the tree can be mid-transition and
|
|
195
|
+
* come back empty/partial, so we retry until we get an array with more than just
|
|
196
|
+
* the root application node. Throws IosError if every attempt yields a trivial
|
|
197
|
+
* tree so the caller can degrade to the vision path.
|
|
198
|
+
*/
|
|
199
|
+
export async function describeAll(udid) {
|
|
200
|
+
let lastJson = "";
|
|
201
|
+
for (let i = 0; i < 5; i++) {
|
|
202
|
+
try {
|
|
203
|
+
const json = await idb(["ui", "describe-all", "--udid", udid]);
|
|
204
|
+
lastJson = json;
|
|
205
|
+
// A valid non-trivial tree has more than just the root application node.
|
|
206
|
+
if (countJsonArray(json) >= 2)
|
|
207
|
+
return json;
|
|
208
|
+
}
|
|
209
|
+
catch (err) {
|
|
210
|
+
lastJson = err instanceof Error ? err.message : String(err);
|
|
211
|
+
}
|
|
212
|
+
await delay(800);
|
|
213
|
+
}
|
|
214
|
+
throw new IosError(`idb ui describe-all returned a trivial/empty tree after retries (last: ${lastJson.slice(0, 200)})`);
|
|
215
|
+
}
|
|
216
|
+
/** Length of a JSON array string, or 0 if it isn't a parseable array. */
|
|
217
|
+
function countJsonArray(json) {
|
|
218
|
+
try {
|
|
219
|
+
const parsed = JSON.parse(json);
|
|
220
|
+
return Array.isArray(parsed) ? parsed.length : 0;
|
|
221
|
+
}
|
|
222
|
+
catch {
|
|
223
|
+
return 0;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
function delay(ms) {
|
|
227
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
228
|
+
}
|
|
229
|
+
// --- App lifecycle (simctl) ---
|
|
230
|
+
export async function terminateApp(udid, bundleId) {
|
|
231
|
+
// Terminating an app that isn't running exits non-zero ("found nothing to
|
|
232
|
+
// terminate"); that's fine for a reset, so swallow it.
|
|
233
|
+
try {
|
|
234
|
+
await simctl(["terminate", udid, bundleId]);
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
// not running — nothing to stop
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
export async function launchApp(udid, bundleId) {
|
|
241
|
+
// simctl launch exits non-zero with a clear message if the bundle id isn't
|
|
242
|
+
// installed, so the wrapper's throw is already a loud failure.
|
|
243
|
+
await simctl(["launch", udid, bundleId]);
|
|
244
|
+
}
|
|
245
|
+
export async function installApp(udid, appPath) {
|
|
246
|
+
// Simulator builds aren't code-signed; `simctl install` just stages the .app.
|
|
247
|
+
await simctl(["install", udid, appPath], 180_000);
|
|
248
|
+
}
|
|
249
|
+
export async function isAppInstalled(udid, bundleId) {
|
|
250
|
+
// `simctl listapps` emits a plist of installed bundles; a substring check on
|
|
251
|
+
// the quoted bundle id is enough to confirm presence.
|
|
252
|
+
const out = await simctl(["listapps", udid], 60_000);
|
|
253
|
+
return out.includes(`"${bundleId}"`) || out.includes(`CFBundleIdentifier = "${bundleId}"`);
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
|
|
257
|
+
* terminate+launch a just-installed app without diffing the app list.
|
|
258
|
+
*/
|
|
259
|
+
export async function bundleIdFromApp(appPath) {
|
|
260
|
+
const plist = join(appPath, "Info.plist");
|
|
261
|
+
if (!existsSync(plist))
|
|
262
|
+
return null;
|
|
263
|
+
try {
|
|
264
|
+
const { stdout } = await execFileAsync(PLUTIL, ["-extract", "CFBundleIdentifier", "raw", "-o", "-", plist], {
|
|
265
|
+
timeout: 10_000,
|
|
266
|
+
});
|
|
267
|
+
const id = stdout.trim();
|
|
268
|
+
return id || null;
|
|
269
|
+
}
|
|
270
|
+
catch {
|
|
271
|
+
return null;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
@@ -44,6 +44,15 @@ export interface ContextValue {
|
|
|
44
44
|
value: string | null;
|
|
45
45
|
description?: string;
|
|
46
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Per-turn assignment status the agent can emit. Mirrors the backend's
|
|
49
|
+
* AssignmentStatus enum (app/db/schemas/enums/study.py) restricted to the
|
|
50
|
+
* LLM-emittable values (_LLM_EMITTABLE_STATUSES in
|
|
51
|
+
* app/interactive/instructions/output.py). The harness-only values
|
|
52
|
+
* (pending / max_steps_reached / failed) are NOT emittable per turn; the
|
|
53
|
+
* run-level AssignmentStatusUpdate sends those terminal values instead.
|
|
54
|
+
*/
|
|
55
|
+
export type AssignmentStatus = "in_progress" | "completed" | "abandoned";
|
|
47
56
|
export interface HistoryEntry {
|
|
48
57
|
comment: string;
|
|
49
58
|
action_description: string;
|
|
@@ -104,6 +113,18 @@ export interface LocalStepAction {
|
|
|
104
113
|
modifiers: string[] | null;
|
|
105
114
|
key: string | null;
|
|
106
115
|
tab_id: string | null;
|
|
116
|
+
orientation: string | null;
|
|
117
|
+
scale: number | null;
|
|
118
|
+
coordinates: {
|
|
119
|
+
x: number;
|
|
120
|
+
y: number;
|
|
121
|
+
} | null;
|
|
122
|
+
drag: {
|
|
123
|
+
startX: number;
|
|
124
|
+
startY: number;
|
|
125
|
+
endX: number;
|
|
126
|
+
endY: number;
|
|
127
|
+
} | null;
|
|
107
128
|
}
|
|
108
129
|
/** Raw backend step response — output is nested, actions are separate. */
|
|
109
130
|
export interface LocalSimStepResponseRaw {
|
|
@@ -114,7 +135,7 @@ export interface LocalSimStepResponseRaw {
|
|
|
114
135
|
sentiment_intensity?: number;
|
|
115
136
|
current_location: string;
|
|
116
137
|
effort_seconds: number;
|
|
117
|
-
|
|
138
|
+
assignment_status: AssignmentStatus;
|
|
118
139
|
action: {
|
|
119
140
|
actions: Array<{
|
|
120
141
|
type: string;
|
|
@@ -135,6 +156,17 @@ export interface LocalSimStepResponseRaw {
|
|
|
135
156
|
modifiers?: string[];
|
|
136
157
|
key?: string;
|
|
137
158
|
tab_id?: string;
|
|
159
|
+
orientation?: string;
|
|
160
|
+
scale?: number;
|
|
161
|
+
coordinates?: {
|
|
162
|
+
x: number;
|
|
163
|
+
y: number;
|
|
164
|
+
} | {
|
|
165
|
+
startX: number;
|
|
166
|
+
startY: number;
|
|
167
|
+
endX: number;
|
|
168
|
+
endY: number;
|
|
169
|
+
};
|
|
138
170
|
}>;
|
|
139
171
|
};
|
|
140
172
|
};
|
|
@@ -149,6 +181,7 @@ export interface LocalSimStepResponse {
|
|
|
149
181
|
sentiment_intensity: number;
|
|
150
182
|
current_location: string;
|
|
151
183
|
effort_seconds: number;
|
|
184
|
+
assignment_status: AssignmentStatus;
|
|
152
185
|
assignment_completed: boolean;
|
|
153
186
|
actions: LocalStepAction[];
|
|
154
187
|
loop_detected: boolean;
|
|
@@ -174,6 +207,8 @@ export interface RecordInteraction {
|
|
|
174
207
|
assignment_id: string;
|
|
175
208
|
screenshot_base64?: string;
|
|
176
209
|
screenshot_url?: string;
|
|
210
|
+
screen_width?: number;
|
|
211
|
+
screen_height?: number;
|
|
177
212
|
frame_version_id?: string;
|
|
178
213
|
timestamp_ms: number;
|
|
179
214
|
comment: string | null;
|
|
@@ -181,7 +216,7 @@ export interface RecordInteraction {
|
|
|
181
216
|
sentiment: SentimentData;
|
|
182
217
|
actions: ActionData[];
|
|
183
218
|
current_location: string | null;
|
|
184
|
-
|
|
219
|
+
assignment_status: AssignmentStatus;
|
|
185
220
|
tabs?: LocalTabInfo[];
|
|
186
221
|
}
|
|
187
222
|
export interface AssignmentStatusUpdate {
|
|
@@ -3,4 +3,4 @@ export interface ScreenshotUploadResult {
|
|
|
3
3
|
screenshotUrl: string;
|
|
4
4
|
screenshotId: string;
|
|
5
5
|
}
|
|
6
|
-
export declare function uploadScreenshot(client: ApiClient, productId: string,
|
|
6
|
+
export declare function uploadScreenshot(client: ApiClient, productId: string, imageBuffer: Buffer, contentType?: "image/jpeg" | "image/png"): Promise<ScreenshotUploadResult>;
|
|
@@ -1,20 +1,23 @@
|
|
|
1
1
|
import { randomUUID } from "node:crypto";
|
|
2
|
-
export async function uploadScreenshot(client, productId,
|
|
2
|
+
export async function uploadScreenshot(client, productId, imageBuffer,
|
|
3
|
+
// Browser captures JPEG; native (adb screencap) emits PNG. The signed-URL
|
|
4
|
+
// request and the PUT header MUST agree so storage serves correct bytes.
|
|
5
|
+
contentType = "image/jpeg") {
|
|
3
6
|
const screenshotId = randomUUID();
|
|
4
7
|
// Step 1: Get signed URL from backend
|
|
5
8
|
const resp = await client.localSimScreenshotUpload({
|
|
6
9
|
product_id: productId,
|
|
7
10
|
screenshot_id: screenshotId,
|
|
8
|
-
content_type:
|
|
11
|
+
content_type: contentType,
|
|
9
12
|
});
|
|
10
|
-
// Step 2: PUT raw
|
|
13
|
+
// Step 2: PUT raw image bytes directly to Supabase Storage
|
|
11
14
|
const putResp = await fetch(resp.upload_info.signed_upload_url, {
|
|
12
15
|
method: "PUT",
|
|
13
16
|
headers: {
|
|
14
|
-
"Content-Type":
|
|
15
|
-
"Content-Length": String(
|
|
17
|
+
"Content-Type": contentType,
|
|
18
|
+
"Content-Length": String(imageBuffer.byteLength),
|
|
16
19
|
},
|
|
17
|
-
body:
|
|
20
|
+
body: imageBuffer,
|
|
18
21
|
signal: AbortSignal.timeout(30_000),
|
|
19
22
|
});
|
|
20
23
|
if (!putResp.ok) {
|
package/dist/lib/output.js
CHANGED
|
@@ -503,7 +503,7 @@ function suggestionsForError(err) {
|
|
|
503
503
|
"If you didn't pass the resource explicitly, your saved active workspace/study/ask may be stale — run `ish status` to check, then `ish workspace use --clear` (or `ish study use --clear` / `ish ask use --clear`) to reset.",
|
|
504
504
|
];
|
|
505
505
|
case "insufficient_credits":
|
|
506
|
-
return ["
|
|
506
|
+
return ["Get more credits at https://app.ishlabs.io"];
|
|
507
507
|
case "usage_limit_reached": {
|
|
508
508
|
const d = structuredDetail(err);
|
|
509
509
|
const upgradeUrl = typeof d?.upgrade_url === "string" ? d.upgrade_url : "https://app.ishlabs.io/billing";
|
|
@@ -625,7 +625,7 @@ export function outputError(err, json) {
|
|
|
625
625
|
}
|
|
626
626
|
else {
|
|
627
627
|
if (err.status === 402) {
|
|
628
|
-
console.error("Error: Insufficient credits.
|
|
628
|
+
console.error("Error: Insufficient credits. Get more credits at https://app.ishlabs.io");
|
|
629
629
|
}
|
|
630
630
|
else {
|
|
631
631
|
console.error(`Error: ${remapEntityName(err.message)}`);
|
|
@@ -1222,7 +1222,7 @@ export function formatStudyResults(study, participants, json) {
|
|
|
1222
1222
|
console.log(` ${alias} (${t.name}): ${truncate(t.errorMessage, 200)}`);
|
|
1223
1223
|
}
|
|
1224
1224
|
}
|
|
1225
|
-
console.log("\nRun `ish participant
|
|
1225
|
+
console.log("\nRun `ish study participant <id> --json` for full interaction details.");
|
|
1226
1226
|
}
|
|
1227
1227
|
}
|
|
1228
1228
|
/**
|
|
@@ -1668,7 +1668,7 @@ export function formatSimulationPoll(results, json, isMedia = false) {
|
|
|
1668
1668
|
// Pattern A (cli half): list per-participant error_message under the table so
|
|
1669
1669
|
// agents see why a simulation failed without re-fetching every participant.
|
|
1670
1670
|
// Truncate to 200 chars; full text is available via --json or
|
|
1671
|
-
// `ish study participant
|
|
1671
|
+
// `ish study participant <id>`.
|
|
1672
1672
|
const failedRows = results.filter((r) => {
|
|
1673
1673
|
const status = String(r.status || "").toLowerCase();
|
|
1674
1674
|
return (status === "failed" || status === "errored") && r.error_message;
|
|
@@ -1689,6 +1689,43 @@ function variantLetter(index) {
|
|
|
1689
1689
|
return String.fromCharCode(65 + index);
|
|
1690
1690
|
return `V${index + 1}`;
|
|
1691
1691
|
}
|
|
1692
|
+
/**
|
|
1693
|
+
* Assign one stable letter per variant id across an ask's rounds — the CLI
|
|
1694
|
+
* mirror of the backend's `app/asks/variant_loader.py:build_ask_label_map`.
|
|
1695
|
+
*
|
|
1696
|
+
* D2 fix: the LLM letters variants GLOBALLY across the whole ask. It scans
|
|
1697
|
+
* rounds in `order_index` order and gives `A, B, C…` on the *first appearance*
|
|
1698
|
+
* of each stable variant id, so the participant comments and round summaries
|
|
1699
|
+
* say `[[A]]/[[B]]` in round 1 and `[[C]]/[[D]]` in round 2. The CLI table used
|
|
1700
|
+
* to re-letter each round positionally (`variantLetter(localIndex)` → A/B every
|
|
1701
|
+
* round), so a comment's `[[C]]` pointed at a row labeled `A`. Building the same
|
|
1702
|
+
* id-keyed map here makes the table, the picks/winner/ratings aggregates, and
|
|
1703
|
+
* the cross-round columns all agree with the `[[token]]` letters.
|
|
1704
|
+
*
|
|
1705
|
+
* Identity is the variant `id` (persisted variants always carry one). A variant
|
|
1706
|
+
* missing an id is skipped from the map — it then falls back to the round-local
|
|
1707
|
+
* positional letter at the call site, matching the backend's documented fallback
|
|
1708
|
+
* for direct callers with no cross-round context.
|
|
1709
|
+
*/
|
|
1710
|
+
function buildAskLabelMap(rounds) {
|
|
1711
|
+
const labelMap = new Map();
|
|
1712
|
+
const ordered = [...rounds].sort((a, b) => {
|
|
1713
|
+
const ai = typeof a.order_index === "number" ? a.order_index : 0;
|
|
1714
|
+
const bi = typeof b.order_index === "number" ? b.order_index : 0;
|
|
1715
|
+
return ai - bi;
|
|
1716
|
+
});
|
|
1717
|
+
for (const round of ordered) {
|
|
1718
|
+
const variants = Array.isArray(round.variants) ? round.variants : [];
|
|
1719
|
+
for (const v of variants) {
|
|
1720
|
+
const id = v.id;
|
|
1721
|
+
if (typeof id !== "string" || id.length === 0)
|
|
1722
|
+
continue;
|
|
1723
|
+
if (!labelMap.has(id))
|
|
1724
|
+
labelMap.set(id, variantLetter(labelMap.size));
|
|
1725
|
+
}
|
|
1726
|
+
}
|
|
1727
|
+
return labelMap;
|
|
1728
|
+
}
|
|
1692
1729
|
export function formatAskList(asks, json) {
|
|
1693
1730
|
injectAliases(asks, ALIAS_PREFIX.ask);
|
|
1694
1731
|
if (json) {
|
|
@@ -1852,13 +1889,17 @@ export function formatRoundDetail(round, json) {
|
|
|
1852
1889
|
console.log(` ${summary.comment}`);
|
|
1853
1890
|
}
|
|
1854
1891
|
}
|
|
1855
|
-
function computeVariantStats(round) {
|
|
1892
|
+
function computeVariantStats(round, labelMap) {
|
|
1856
1893
|
const variants = Array.isArray(round.variants) ? round.variants : [];
|
|
1857
1894
|
const responses = Array.isArray(round.responses) ? round.responses : [];
|
|
1858
1895
|
const stats = variants.map((v, i) => {
|
|
1859
1896
|
const variant = v;
|
|
1897
|
+
const id = typeof variant.id === "string" ? variant.id : undefined;
|
|
1860
1898
|
return {
|
|
1861
|
-
|
|
1899
|
+
// D2: prefer the ask-global letter (id-keyed, matches the LLM's
|
|
1900
|
+
// `[[token]]` letters) and only fall back to the round-local positional
|
|
1901
|
+
// letter when no map entry exists (single-round / mapless callers).
|
|
1902
|
+
letter: (id && labelMap?.get(id)) || variantLetter(i),
|
|
1862
1903
|
label: variant.label ? String(variant.label) : undefined,
|
|
1863
1904
|
kind: String(variant.kind || "-"),
|
|
1864
1905
|
pickCount: 0,
|
|
@@ -2087,13 +2128,13 @@ export function deriveWinnerConfidence(args) {
|
|
|
2087
2128
|
return "medium";
|
|
2088
2129
|
return "high";
|
|
2089
2130
|
}
|
|
2090
|
-
function buildCrossRoundSummary(rounds) {
|
|
2131
|
+
function buildCrossRoundSummary(rounds, labelMap) {
|
|
2091
2132
|
if (rounds.length < 2)
|
|
2092
2133
|
return undefined;
|
|
2093
2134
|
const entries = [];
|
|
2094
2135
|
for (const round of rounds) {
|
|
2095
2136
|
const idx = typeof round.order_index === "number" ? round.order_index : 0;
|
|
2096
|
-
const stats = computeVariantStats(round);
|
|
2137
|
+
const stats = computeVariantStats(round, labelMap);
|
|
2097
2138
|
const aggregates = buildAggregates(round, stats);
|
|
2098
2139
|
const entry = {
|
|
2099
2140
|
round_number: idx + 1,
|
|
@@ -2128,12 +2169,17 @@ export function formatAskResults(ask, json, roundFilter) {
|
|
|
2128
2169
|
const filtered = roundFilter !== undefined
|
|
2129
2170
|
? rounds.filter((r) => (typeof r.order_index === "number" ? r.order_index : 0) === roundFilter - 1)
|
|
2130
2171
|
: rounds;
|
|
2172
|
+
// D2: build the ask-global variant→letter map from the FULL round list (not
|
|
2173
|
+
// `filtered`) so that even `--round 2` still letters its variants C/D — the
|
|
2174
|
+
// letter a variant earned when it debuted, matching the `[[token]]` letters
|
|
2175
|
+
// in the LLM's comments and round summaries.
|
|
2176
|
+
const labelMap = buildAskLabelMap(rounds);
|
|
2131
2177
|
if (json) {
|
|
2132
2178
|
let total = 0;
|
|
2133
2179
|
let complete = 0;
|
|
2134
2180
|
let errored = 0;
|
|
2135
2181
|
const enrichedRounds = filtered.map((round) => {
|
|
2136
|
-
const stats = computeVariantStats(round);
|
|
2182
|
+
const stats = computeVariantStats(round, labelMap);
|
|
2137
2183
|
const aggregates = buildAggregates(round, stats);
|
|
2138
2184
|
const decorated = denormalizeRoundCounts(round);
|
|
2139
2185
|
total += decorated.responses_total ?? 0;
|
|
@@ -2158,7 +2204,7 @@ export function formatAskResults(ask, json, roundFilter) {
|
|
|
2158
2204
|
}
|
|
2159
2205
|
// Pattern H2: include cross-round summary when 2+ rounds exist so agents
|
|
2160
2206
|
// don't have to diff two `ask results` calls themselves.
|
|
2161
|
-
const crossRound = buildCrossRoundSummary(filtered);
|
|
2207
|
+
const crossRound = buildCrossRoundSummary(filtered, labelMap);
|
|
2162
2208
|
if (crossRound)
|
|
2163
2209
|
payload.cross_round_summary = crossRound;
|
|
2164
2210
|
console.log(jsonOutput(payload));
|
|
@@ -2175,7 +2221,7 @@ export function formatAskResults(ask, json, roundFilter) {
|
|
|
2175
2221
|
const completed = responses.filter((r) => r.status === "completed");
|
|
2176
2222
|
console.log(`\nRound ${idx + 1} [${round.status || "-"}] · ${completed.length}/${responses.length} responded`);
|
|
2177
2223
|
console.log(` Prompt: "${truncate(String(round.prompt || ""), 100)}"`);
|
|
2178
|
-
const stats = computeVariantStats(round);
|
|
2224
|
+
const stats = computeVariantStats(round, labelMap);
|
|
2179
2225
|
if (stats.length > 0 && (round.wants_pick || round.wants_ratings)) {
|
|
2180
2226
|
const hasPick = !!round.wants_pick;
|
|
2181
2227
|
const hasRatings = !!round.wants_ratings;
|
|
@@ -2222,7 +2268,7 @@ export function formatAskResults(ask, json, roundFilter) {
|
|
|
2222
2268
|
}
|
|
2223
2269
|
// Pattern H2: cross-round picks comparison when 2+ rounds exist. Saves
|
|
2224
2270
|
// agents from re-running results twice and diffing aggregates by hand.
|
|
2225
|
-
const crossRound = buildCrossRoundSummary(filtered);
|
|
2271
|
+
const crossRound = buildCrossRoundSummary(filtered, labelMap);
|
|
2226
2272
|
if (crossRound) {
|
|
2227
2273
|
console.log("\nCross-round summary:");
|
|
2228
2274
|
const letters = new Set();
|
|
@@ -174,7 +174,7 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
|
|
|
174
174
|
|
|
175
175
|
- **Input**: a \`description\`, a \`count\`, and optionally \`sources\` (transcripts / audio / images / docs that seed persona generation — for "make profiles that feel like these real customers"). Local files force CLI (binary upload constraint).
|
|
176
176
|
- **Output**: a list of \`person_ids\` to pass into \`ask_run\` or \`study_run\`.
|
|
177
|
-
- **
|
|
177
|
+
- **Usage**: slow (~30-120s) + draws credits. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
|
|
178
178
|
- **Growing a group of people**: build only the delta — don't rebuild. Concat the new \`person_ids\` with the existing ones for the next run. The "audience is a query" framing means there's no audience entity to update.
|
|
179
179
|
- **Shapes**:
|
|
180
180
|
\`\`\`
|
|
@@ -216,7 +216,7 @@ To hand a study to someone **without an ish account** — a prospect, a stakehol
|
|
|
216
216
|
- **Cold start on free plan**: \`workspace_create\` returns \`usage_limit_reached\` at the free-plan cap (1 workspace). Always inspect with \`workspace_list\` first. **MCP-only recipe** (no \`--ensure\` available): \`workspace_list\` → if non-empty, use the first; if empty, \`workspace_create\`; if \`workspace_create\` returns \`usage_limit_reached\`, re-call \`workspace_list\` (a workspace exists you didn't see — possibly created by another session). **CLI shortcut**: \`ish workspace create --name <name> --ensure\` is idempotent by name.
|
|
217
217
|
- **Ask participants vs variants** — see Lifecycle table for the re-use vs new-ask decision.
|
|
218
218
|
- **Study iterations are immutable once they have results** — see Lifecycle table for new-iteration vs new-study.
|
|
219
|
-
- **Credit
|
|
219
|
+
- **Credit usage**: \`ask_run\`, \`study_run\`, and \`group_build\` draw credits — this is the normal, expected way to use ish, so run them without hesitation. Credits are a usage allowance (paid plans refill monthly; the free tier is a one-time signup grant), not a per-call bill. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` participants + 2 variants comfortably fits the signup grant.
|
|
220
220
|
- **\`group_build\` may return fewer profiles than requested** if the description is over-constrained. Always read the returned \`person_ids\` count, don't trust the requested \`count\` blindly.
|
|
221
221
|
- **Variants of wildly different length** (one-line vs paragraph) can skew picks toward the longer one. Keep variants comparable in shape.
|
|
222
222
|
- **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching participants.
|
|
@@ -869,12 +869,13 @@ ish study results <study-id> --transcript <participant-id> --json
|
|
|
869
869
|
|
|
870
870
|
## 9. Stage an ask for human review, then dispatch
|
|
871
871
|
|
|
872
|
-
Goal: prepare
|
|
873
|
-
people + prompt before any credits are
|
|
874
|
-
DRAFT status in between.
|
|
872
|
+
Goal: prepare an A/B but let the user inspect and approve the
|
|
873
|
+
people + prompt before any credits are drawn. Two-step flow with a
|
|
874
|
+
DRAFT status in between. (Drawing credits to run an ask is normal — the
|
|
875
|
+
draft step is for human review, not to avoid the credit usage.)
|
|
875
876
|
|
|
876
877
|
\`\`\`bash
|
|
877
|
-
# 1. Stage. No worker enqueued, no
|
|
878
|
+
# 1. Stage. No worker enqueued, no credits drawn. Audience flags are still
|
|
878
879
|
# required — participants materialize at create time.
|
|
879
880
|
ASK=$(ish ask create --name "tagline AB" \\
|
|
880
881
|
--prompt "Which sounds better?" \\
|
|
@@ -888,7 +889,7 @@ ASK=$(ish ask create --name "tagline AB" \\
|
|
|
888
889
|
# ish ask get "$ASK" # status: draft
|
|
889
890
|
# ish ask get "$ASK" --json | jq '.participants | length'
|
|
890
891
|
|
|
891
|
-
# 2. Dispatch once approved (
|
|
892
|
+
# 2. Dispatch once approved (draws credits). Idempotent: a non-DRAFT ask
|
|
892
893
|
# returns 409 mapped to exit 2, so re-running is safe.
|
|
893
894
|
ish ask dispatch "$ASK" --wait
|
|
894
895
|
\`\`\`
|
|
@@ -971,7 +972,7 @@ Rules to remember:
|
|
|
971
972
|
untouched. Get the new id from \`.participant_id\` / \`.participant_alias\` on
|
|
972
973
|
\`--json\`.
|
|
973
974
|
- \`--add-steps\` is **only** the extra budget; it does NOT include the
|
|
974
|
-
source's original cap. Credits
|
|
975
|
+
source's original cap. Credits draw per
|
|
975
976
|
\`max(1, round(additional_steps / 10))\` — same formula as
|
|
976
977
|
\`study run\` interactive, just scoped to the extension.
|
|
977
978
|
- \`--instruction\` accepts three input shapes (matching the rest of
|
|
@@ -982,7 +983,7 @@ Rules to remember:
|
|
|
982
983
|
\`study run\`. Extend always inherits the source's iteration config.
|
|
983
984
|
|
|
984
985
|
See \`ish docs get-page concepts/extending-a-simulation\` for the full
|
|
985
|
-
mental model (cancel + extend as a pair, error envelopes,
|
|
986
|
+
mental model (cancel + extend as a pair, error envelopes, credit model).
|
|
986
987
|
|
|
987
988
|
## 12. Slice study results by frame / segment / turn / sentiment
|
|
988
989
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ishlabs/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "The command-line interface for ish",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
"verify:skills-parity": "npm run build && node scripts/verify-skills-parity.mjs",
|
|
15
15
|
"dev": "tsc --watch",
|
|
16
16
|
"test": "npm run build && node --test --test-concurrency=1 tests/*.test.mjs",
|
|
17
|
+
"mobile-e2e": "./scripts/mobile-e2e/run.sh all",
|
|
17
18
|
"prepublishOnly": "npm test"
|
|
18
19
|
},
|
|
19
20
|
"engines": {
|