@ishlabs/cli 0.24.1 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/commands/ask.js +3 -3
  2. package/dist/commands/doctor.d.ts +26 -0
  3. package/dist/commands/doctor.js +334 -0
  4. package/dist/commands/iteration.js +1 -1
  5. package/dist/commands/study-analyze.js +1 -1
  6. package/dist/commands/study-run.js +80 -12
  7. package/dist/commands/study.js +11 -7
  8. package/dist/index.js +2 -0
  9. package/dist/lib/alias-store.js +1 -1
  10. package/dist/lib/api-client.d.ts +2 -0
  11. package/dist/lib/docs.js +57 -42
  12. package/dist/lib/local-sim/actions.d.ts +10 -2
  13. package/dist/lib/local-sim/actions.js +18 -11
  14. package/dist/lib/local-sim/adb.d.ts +113 -0
  15. package/dist/lib/local-sim/adb.js +366 -0
  16. package/dist/lib/local-sim/android.d.ts +111 -0
  17. package/dist/lib/local-sim/android.js +504 -0
  18. package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
  19. package/dist/lib/local-sim/apk-manifest.js +210 -0
  20. package/dist/lib/local-sim/browser.d.ts +22 -0
  21. package/dist/lib/local-sim/browser.js +65 -0
  22. package/dist/lib/local-sim/coordinates.d.ts +69 -0
  23. package/dist/lib/local-sim/coordinates.js +59 -0
  24. package/dist/lib/local-sim/device.d.ts +143 -0
  25. package/dist/lib/local-sim/device.js +152 -0
  26. package/dist/lib/local-sim/ios.d.ts +185 -0
  27. package/dist/lib/local-sim/ios.js +599 -0
  28. package/dist/lib/local-sim/loop.d.ts +14 -2
  29. package/dist/lib/local-sim/loop.js +168 -73
  30. package/dist/lib/local-sim/native-a11y.d.ts +111 -0
  31. package/dist/lib/local-sim/native-a11y.js +419 -0
  32. package/dist/lib/local-sim/simctl.d.ts +55 -0
  33. package/dist/lib/local-sim/simctl.js +144 -0
  34. package/dist/lib/local-sim/types.d.ts +39 -2
  35. package/dist/lib/local-sim/upload.d.ts +1 -1
  36. package/dist/lib/local-sim/upload.js +9 -6
  37. package/dist/lib/local-sim/xcuitest.d.ts +60 -0
  38. package/dist/lib/local-sim/xcuitest.js +303 -0
  39. package/dist/lib/output.js +58 -12
  40. package/dist/lib/paths.d.ts +8 -0
  41. package/dist/lib/paths.js +12 -0
  42. package/dist/lib/skill-content.js +10 -9
  43. package/package.json +2 -1
@@ -0,0 +1,152 @@
1
+ /**
2
+ * SimulationDevice — the target a local simulation drives.
3
+ *
4
+ * The observe → reason (remote) → act (local) loop in `loop.ts` used to be
5
+ * hardwired to a Playwright `Page`. This interface abstracts exactly what the
6
+ * loop needs from a target so a native Android device (driven by `adb`) can
7
+ * slot in next to the browser. `BrowserDevice` (below) wraps the existing
8
+ * Playwright path in `browser.ts`/`actions.ts`/`tabs.ts`; `AndroidDevice`
9
+ * (added later) implements the same surface via `adb`.
10
+ *
11
+ * Multi-tab handling is browser-specific and stays hidden behind the
12
+ * interface — the loop never touches a `Page` or `TabManager` directly.
13
+ */
14
+ import { launchBrowser, createTab, captureObservation, takeScreenshot, takeScreenshotJpeg, takeFullPageJpeg, navigateWithRetry, closeBrowser, } from "./browser.js";
15
+ import { executeAction } from "./actions.js";
16
+ import { TabManager } from "./tabs.js";
17
+ import { debugObservation } from "./debug.js";
18
+ /**
19
+ * Browser implementation backed by Playwright. Delegates to the existing
20
+ * `browser.ts`/`actions.ts`/`tabs.ts` helpers — no logic is rewritten here.
21
+ *
22
+ * Owns a `BrowserSession` plus a `TabManager`; the active page can swap when a
23
+ * popup auto-focuses or the LLM issues switch_tab/close_tab, so every method
24
+ * re-reads `tabs.activePage()` before acting (matching the previous loop).
25
+ */
26
+ export class BrowserDevice {
27
+ session;
28
+ tabs;
29
+ opts;
30
+ contextValues;
31
+ /** When false this device shares a browser process and only closes its tab. */
32
+ ownsBrowser;
33
+ /** CDP node map from the last observe(), needed to resolve actions. */
34
+ lastTreeData = null;
35
+ constructor(session, opts, contextValues, ownsBrowser) {
36
+ this.session = session;
37
+ this.opts = opts;
38
+ this.contextValues = contextValues;
39
+ this.ownsBrowser = ownsBrowser;
40
+ this.tabs = new TabManager(session.context, session.page);
41
+ }
42
+ async launchOrReset(target) {
43
+ await navigateWithRetry(this.tabs.activePage(), target);
44
+ }
45
+ async observe() {
46
+ const page = this.tabs.activePage();
47
+ const obs = await captureObservation(page);
48
+ this.lastTreeData = obs.treeData;
49
+ debugObservation(obs);
50
+ const tabsSnapshot = await this.tabs.list();
51
+ return {
52
+ screenshot: obs.screenshot,
53
+ accessibilityTree: obs.treeData.simplified,
54
+ url: obs.url,
55
+ width: obs.viewportWidth,
56
+ height: obs.viewportHeight,
57
+ documentHeight: obs.documentHeight,
58
+ tabs: tabsSnapshot,
59
+ };
60
+ }
61
+ async captureScreenshot() {
62
+ return takeScreenshot(this.tabs.activePage());
63
+ }
64
+ async captureScreenshotJpeg() {
65
+ return takeScreenshotJpeg(this.tabs.activePage());
66
+ }
67
+ async captureFullPageJpeg(opts) {
68
+ const page = this.tabs.activePage();
69
+ const viewportWidth = page.viewportSize()?.width ?? this.opts.viewport.width;
70
+ const fullPage = await takeFullPageJpeg(page, {
71
+ documentHeight: opts.documentHeight,
72
+ cap: opts.cap,
73
+ viewportWidth,
74
+ });
75
+ return fullPage.base64;
76
+ }
77
+ dimensions() {
78
+ const page = this.tabs.activePage();
79
+ return page.viewportSize() ?? this.opts.viewport;
80
+ }
81
+ async executeAction(action) {
82
+ // Pick up popup auto-switch / explicit tab switch from prior actions.
83
+ let page = this.tabs.activePage();
84
+ const treeData = this.lastTreeData ?? { simplified: "", nodeMap: new Map() };
85
+ const tabsBefore = (await this.tabs.list()).length;
86
+ const result = await executeAction(page, action, treeData, this.contextValues, this.tabs);
87
+ // The action may have flipped the active tab — re-read.
88
+ page = this.tabs.activePage();
89
+ const tabsAfter = (await this.tabs.list()).length;
90
+ const openedNewTab = action.type === "tap" && tabsAfter > tabsBefore;
91
+ return {
92
+ success: result.success,
93
+ elementName: result.elementName,
94
+ coordinates: result.coordinates,
95
+ openedNewTab,
96
+ };
97
+ }
98
+ currentUrl() {
99
+ return this.tabs.activePage().url();
100
+ }
101
+ async close() {
102
+ if (this.ownsBrowser) {
103
+ await closeBrowser(this.session);
104
+ }
105
+ else {
106
+ // Shared mode: close just the tab, not the context or browser.
107
+ try {
108
+ await this.session.page.close();
109
+ }
110
+ catch {
111
+ // already closed
112
+ }
113
+ }
114
+ }
115
+ }
116
+ /**
117
+ * Build the device for a platform. `web`/`browser`/`""` → Playwright
118
+ * `BrowserDevice`; `android` → `AndroidDevice` (adb); `ios` → `IOSDevice`
119
+ * (simctl + WebDriverAgent). The native cases are dynamically imported so the browser path
120
+ * never pulls in the adb/simctl modules.
121
+ */
122
+ export async function createDevice(platform, opts) {
123
+ switch (platform) {
124
+ case "web":
125
+ case "browser":
126
+ case "": {
127
+ const ownsBrowser = !opts.sharedBrowser;
128
+ const session = opts.sharedBrowser
129
+ ? await createTab(opts.sharedBrowser, opts.browserOpts)
130
+ : await launchBrowser(opts.browserOpts);
131
+ return new BrowserDevice(session, opts.browserOpts, opts.contextValues, ownsBrowser);
132
+ }
133
+ case "android": {
134
+ const { AndroidDevice } = await import("./android.js");
135
+ return new AndroidDevice({
136
+ appPath: opts.appPath,
137
+ contextValues: opts.contextValues,
138
+ log: opts.log,
139
+ });
140
+ }
141
+ case "ios": {
142
+ const { IOSDevice } = await import("./ios.js");
143
+ return new IOSDevice({
144
+ appPath: opts.appPath,
145
+ contextValues: opts.contextValues,
146
+ log: opts.log,
147
+ });
148
+ }
149
+ default:
150
+ throw new Error(`Unsupported platform for local simulation: "${platform}"`);
151
+ }
152
+ }
@@ -0,0 +1,185 @@
1
+ /**
2
+ * IOSDevice — drives a local iOS simulator via `xcrun simctl` (lifecycle +
3
+ * screenshot) and WebDriverAgent/XCUITest (UI + a11y; see xcuitest.ts),
4
+ * implementing the SimulationDevice surface the loop expects. Mirrors
5
+ * AndroidDevice; the one substantive difference is the coordinate space.
6
+ *
7
+ * Two resolution paths, mirroring the browser:
8
+ * - ELEMENT (preferred): observe() reads WDA's `/source` a11y tree, serializes
9
+ * it to the `[id] role "label"` string the backend DOMLocator reasons over,
10
+ * and keeps a local `shortId → bounds` map (bounds in POINTS). The backend
11
+ * returns a `node_id`; executeAction() looks the bounds up and taps the
12
+ * element's CENTER.
13
+ * - VISION (fallback): when the tree is empty/sparse, observe() returns an
14
+ * empty tree so the backend takes its vision branch and returns NORMALIZED
15
+ * 0-1000 coordinates. Also taken per-action whenever node_id is absent.
16
+ *
17
+ * COORDINATE SPACE — two spaces, the key difference from Android (where
18
+ * screencap and tap share one pixel space):
19
+ * `simctl io booted screenshot` is in PIXELS (e.g. 1179x2556 @3x), but
20
+ * WDA taps/swipes AND the `/source` a11y frames are POINTS (393x852).
21
+ * The invariant in BOTH paths: TAP in points, RECORD in pixels, because the
22
+ * loop re-normalizes the recorded coord against dimensions() (PIXELS).
23
+ * - VISION: tap pt = round(n/1000 * pointSize); record px = round(n/1000 * pixelSize).
24
+ * - ELEMENT: tap = bounds-center (already POINTS); record = that center
25
+ * scaled POINTS→PIXELS via pointToPixel() (the @Nx scale).
26
+ * dimensions() returns the PIXEL size, so the loop re-normalizes the recorded
27
+ * px back to a stable 0-1000. Recording in points would drift: the point grid
28
+ * (393) is coarser than the 0-1000 grid, so a points round-trip double-rounds
29
+ * (500→197→501). Pixels (1179 > 1000) are finer → identity. The vision model
30
+ * is resolution-independent (0-1000 is a fraction of the image), so the
31
+ * backend never converts coords with screen_width/height.
32
+ */
33
+ import type { LocalStepAction, ContextValue } from "./types.js";
34
+ import type { SimulationDevice, DeviceObservation, DeviceActionResult } from "./device.js";
35
+ export interface IosDeviceOptions {
36
+ /** Bundle id to terminate/relaunch between participants. Derived from --app when a .app is given. */
37
+ bundleId?: string;
38
+ /** Local .app path to install before the run, or a bundle id to launch. */
39
+ appPath?: string;
40
+ contextValues: ContextValue[];
41
+ log?: (msg: string) => void;
42
+ }
43
+ export declare class IOSDevice implements SimulationDevice {
44
+ private readonly contextValues;
45
+ private readonly log;
46
+ private bundleId;
47
+ private readonly appPath;
48
+ /** udid of the single booted simulator we drive. */
49
+ private udid;
50
+ /** Set once the WebDriverAgent runner is up, so the startup note logs once. */
51
+ private wdaStarted;
52
+ /** POINT size — what idb ui tap/swipe consume (de-normalization basis for TAPS). */
53
+ private pointWidth;
54
+ private pointHeight;
55
+ /**
56
+ * PIXEL size — the screenshot resolution and the RECORDED coord space.
57
+ * Recording in pixels (not points) keeps the loop's round-trip exact: the
58
+ * point grid (e.g. 393) is coarser than the 0-1000 normalized grid, so a
59
+ * points round-trip double-rounds and drifts; pixels (e.g. 1179 > 1000) are
60
+ * finer, so de-normalize-then-re-normalize is an identity.
61
+ */
62
+ private pixelWidth;
63
+ private pixelHeight;
64
+ /**
65
+ * shortId → bounds (POINTS — idb describe-all frames) from the last observe(),
66
+ * the local counterpart of BrowserDevice.lastTreeData. executeAction()
67
+ * resolves a backend `node_id` against this; the bounds-center is the POINT
68
+ * tap target (recorded in pixels via pointToPixel).
69
+ */
70
+ private lastNodeMap;
71
+ constructor(opts: IosDeviceOptions);
72
+ launchOrReset(target: string): Promise<void>;
73
+ /**
74
+ * Resolve the bundle id to drive, returning a non-null id or throwing.
75
+ * Installs a local `.app` first and reads its CFBundleIdentifier from
76
+ * Info.plist (no list-diff needed — a .app carries its id). A non-.app local
77
+ * value is treated as an already-installed bundle id.
78
+ */
79
+ private resolveBundleId;
80
+ private refreshScreen;
81
+ observe(): Promise<DeviceObservation>;
82
+ /**
83
+ * Read + serialize WDA's /source a11y tree (bounds in POINTS). Any
84
+ * failure (retries exhausted on a trivial tree, parse error) degrades to an
85
+ * empty tree so the backend falls back to vision — a missing tree must never
86
+ * abort the observation.
87
+ */
88
+ private dumpTree;
89
+ captureScreenshot(): Promise<string>;
90
+ captureScreenshotJpeg(): Promise<Buffer>;
91
+ dimensions(): {
92
+ width: number;
93
+ height: number;
94
+ };
95
+ /** Normalized 0-1000 → POINT space (WDA taps/swipes take points). */
96
+ private toPoints;
97
+ /** Normalized 0-1000 → PIXEL space (the recorded/reported coord). */
98
+ private toPixels;
99
+ /**
100
+ * Resolve the POINT tap target + PIXEL record coord for a positional action.
101
+ * ELEMENT path (node_id): the bounds-center is the POINT tap; the recorded
102
+ * pixel coord is that center scaled POINTS→PIXELS so it round-trips against
103
+ * dimensions() (pixels). VISION path: de-normalize the 0-1000 coord into both
104
+ * spaces. Returns {stale:true} for a node_id with no bounds (tree moved); the
105
+ * caller fails the action so the loop forwards DOM_ELEMENT_NOT_FOUND.
106
+ */
107
+ private resolveTarget;
108
+ executeAction(action: LocalStepAction): Promise<DeviceActionResult>;
109
+ private failNoCoords;
110
+ private failStaleNode;
111
+ private typeText;
112
+ private scroll;
113
+ private swipe;
114
+ /**
115
+ * Perform a drag: press the GRABBED element, move to the drop point, release.
116
+ * A drag is "click an element and let it go", so the press lands element-
117
+ * center (the resolved `grab` in POINTS — node_id bounds center, or the
118
+ * vision coordinate when the tree is blind), NOT the backend's vision-
119
+ * estimated start. The release point is the drag END (drag.endX/endY). A
120
+ * ~0.8s idb swipe reads as a drag, not a flick. Returns the grab point scaled
121
+ * to PIXELS (pointToPixel) to record so it round-trips against dimensions()
122
+ * (pixels), or null if there's no end to drag toward.
123
+ *
124
+ * idb LIMITATION: `idb ui swipe` only exposes --duration/--delta — it has no
125
+ * press-and-HOLD-then-move primitive (unlike Android's `input draganddrop`).
126
+ * So this drives the immediate-drag surfaces (sliders, drag-to-dismiss, drag
127
+ * handles that pick up on touch-move) but does NOT trigger a long-press
128
+ * pickup (home-screen jiggle mode, in-app reorder that needs a hold first) —
129
+ * verified on-device: a long uiSwipe leaves home-screen icons unmoved. The
130
+ * grab/release SEMANTICS are still correct; the gap is purely the missing
131
+ * hold, which idb can't perform in one continuous gesture.
132
+ */
133
+ private drag;
134
+ /**
135
+ * iOS has no hardware back. The system interactive-pop (left-edge swipe) is
136
+ * NOT reliably triggerable through idb's synthetic touch — verified on the
137
+ * simulator: no edge-swipe variant (start x, travel, duration, delta) pops
138
+ * the view. So we resolve and TAP the nav-bar back button instead: iOS HIG
139
+ * places "back" as the LEADING (leftmost) button in the top nav bar of any
140
+ * pushed view, so the leftmost button in the nav-bar band is it — verified to
141
+ * pop a Settings sub-screen back to root. The left-edge swipe remains a
142
+ * best-effort fallback for real devices (where idb sends real HID events that
143
+ * do drive the system gesture) when no back button is visible.
144
+ */
145
+ private navigateBack;
146
+ /**
147
+ * Best-effort open of an iOS system panel by swiping down from the top edge.
148
+ * iOS has no `cmd statusbar` equivalent, so on a Face-ID layout:
149
+ * - notifications → Notification Center: swipe down from the top-CENTER.
150
+ * - quick_settings → Control Center: swipe down from the top-RIGHT corner.
151
+ * Coordinates are POINTS (idb consumes points; see toPoints()/the swipe()
152
+ * helper). This is FLAKY on the simulator — idb's synthetic touch frequently
153
+ * doesn't trigger the system edge gesture (the same limitation navigateBack's
154
+ * edge-swipe hits). We compare a before/after screenshot and log LOUDLY when
155
+ * the screen didn't change, rather than silently reporting success, so a
156
+ * no-op is visible in the run. The executeAction caller still returns
157
+ * success:true (the gesture was attempted); the loud log is the signal.
158
+ */
159
+ private openSystemPanel;
160
+ /**
161
+ * The nav-bar back button: the leading (leftmost) actionable button in the
162
+ * top nav-bar band. iOS HIG guarantees "back" is the leading nav item in a
163
+ * pushed view, so the leftmost button high on the screen is it. Returns null
164
+ * on root screens (no leading back item) so the caller can fall back.
165
+ *
166
+ * The geometry alone (leftmost-top) would mis-fire on a modal whose LEADING
167
+ * item is Cancel/Close, or a root with a leading Edit/menu — and tapping
168
+ * Cancel/Close can DISCARD work. A stock back button is labeled with the
169
+ * PARENT screen's title (e.g. "Settings"), not "Back", so there's no reliable
170
+ * positive label signal; instead we exclude the known non-back leading
171
+ * labels. If every leading button is one of those, we return null and let the
172
+ * caller fall back rather than tap a destructive control.
173
+ *
174
+ * Known limitation: a glyph-only leading button with NO accessible label
175
+ * (e.g. a hamburger/avatar/logo) isn't in the deny-list, so on a screen whose
176
+ * leading control is an unlabeled non-back icon this can tap the wrong control
177
+ * (silently — it returns success). Acceptable for the common case (stock nav
178
+ * bars have a labeled back button), but it's why pushed views, not root/menu
179
+ * screens, are where navigate_back is reliable.
180
+ */
181
+ private findBackButton;
182
+ private failUnsupported;
183
+ currentUrl(): string;
184
+ close(): Promise<void>;
185
+ }