mobile-debug-mcp 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Pure deterministic classifier. Applies rules in fixed order.
3
+ * Same inputs always produce the same output.
4
+ */
5
+ export function classifyActionOutcome(input) {
6
+ const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input;
7
+ // Step 1 — UI signal is positive
8
+ if (uiChanged || expectedElementVisible === true) {
9
+ return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' };
10
+ }
11
+ // Step 2 — UI did not change; network signal is required
12
+ if (networkRequests === null || networkRequests === undefined) {
13
+ return {
14
+ outcome: 'unknown',
15
+ reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
16
+ nextAction: 'call_get_network_activity'
17
+ };
18
+ }
19
+ // Step 3 — any network failure
20
+ const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable');
21
+ if (failedRequest) {
22
+ return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` };
23
+ }
24
+ // Step 4 — no network requests at all
25
+ if (networkRequests.length === 0) {
26
+ const logNote = hasLogErrors ? ' (log errors present)' : '';
27
+ return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` };
28
+ }
29
+ // Step 5 — network requests exist and all succeeded
30
+ if (networkRequests.every((r) => r.status === 'success')) {
31
+ return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' };
32
+ }
33
+ // Step 6 — fallback
34
+ return { outcome: 'unknown', reasoning: 'signals are inconclusive' };
35
+ }
@@ -0,0 +1,232 @@
1
+ import { execAdb, parseLogLine } from '../utils/android/utils.js';
2
+ import { execCommand } from '../utils/ios/utils.js';
3
+ // ─── Module state ─────────────────────────────────────────────────────────────
4
+ // lastActionTimestamp: set when an action tool fires (tap, swipe, etc.)
5
+ // lastConsumedTimestamp: advanced after each get_network_activity call to prevent duplicates
6
+ let lastActionTimestamp = 0;
7
+ let lastConsumedTimestamp = 0;
8
+ export function notifyActionStart() {
9
+ lastActionTimestamp = Date.now();
10
+ lastConsumedTimestamp = 0;
11
+ }
12
+ /** Exposed for unit tests only. */
13
+ export function _setTimestampsForTests(actionTs, consumedTs) {
14
+ lastActionTimestamp = actionTs;
15
+ lastConsumedTimestamp = consumedTs;
16
+ }
17
+ // ─── Parsing constants ────────────────────────────────────────────────────────
18
+ const URL_RE = /https?:\/\/[^\s"'\]\)><]+/;
19
+ const PATH_RE = /\/[a-zA-Z0-9_.-]+(?:\/[a-zA-Z0-9_.-]+)+/;
20
+ const METHOD_RE = /\b(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\b/;
21
+ const NETWORK_ERROR_PATTERNS = [
22
+ { re: /timed?\s*out|timeout/i, code: 'timeout' },
23
+ { re: /dns|name[\s_]resolution|host\s*not\s*found|nodename/i, code: 'dns_error' },
24
+ { re: /\btls\b|\bssl\b|certificate|handshake/i, code: 'tls_error' },
25
+ { re: /connection\s*refused/i, code: 'connection_refused' },
26
+ { re: /connection\s*reset|reset\s*by\s*peer/i, code: 'connection_reset' },
27
+ ];
28
+ const BACKGROUND_TOKENS = ['/analytics', '/metrics', '/tracking', '/log', '/events', '/telemetry', '/ping', '/beacon'];
29
+ const BACKGROUND_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.css', '.js', '.svg', '.ico', '.woff', '.ttf'];
30
+ const FILESYSTEM_PREFIXES = ['/data/', '/system/', '/apex/', '/proc/', '/dev/', '/vendor/', '/product/', '/storage/', '/sdcard/', '/mnt/', '/odm/', '/cache/', '/metadata/', '/acct/', '/sys/'];
31
+ const FILESYSTEM_EXTENSIONS = ['.apk', '.apex', '.odex', '.vdex', '.dex', '.so', '.jar', '.bin', '.img', '.db', '.sqlite', '.c', '.cc', '.cpp', '.cxx', '.h', '.hpp', '.m', '.mm', '.kt', '.java', '.swift'];
32
+ // ─── Parsing helpers ─────────────────────────────────────────────────────────
33
+ function extractUrl(text) {
34
+ const m = text.match(URL_RE);
35
+ return m ? m[0] : null;
36
+ }
37
+ function isPlausibleEndpointPath(path) {
38
+ const lower = path.toLowerCase();
39
+ if (!lower.startsWith('/'))
40
+ return false;
41
+ if (FILESYSTEM_PREFIXES.some((prefix) => lower.startsWith(prefix)))
42
+ return false;
43
+ if (FILESYSTEM_EXTENSIONS.some((ext) => lower.endsWith(ext)))
44
+ return false;
45
+ return true;
46
+ }
47
+ function extractPath(text) {
48
+ const m = text.match(PATH_RE);
49
+ if (!m)
50
+ return null;
51
+ return isPlausibleEndpointPath(m[0]) ? m[0] : null;
52
+ }
53
+ function toStatusCode(value) {
54
+ if (!value)
55
+ return null;
56
+ const code = Number(value);
57
+ return code >= 100 && code <= 599 ? code : null;
58
+ }
59
+ function escapeRegExp(value) {
60
+ return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
61
+ }
62
+ function extractStatusCode(text, url, path, method) {
63
+ const directHttpMatch = text.match(/\bHTTP\/\d(?:\.\d)?\s+([1-5]\d{2})\b/i) || text.match(/\bHTTP\s+([1-5]\d{2})\b/i);
64
+ if (directHttpMatch)
65
+ return toStatusCode(directHttpMatch[1]);
66
+ const endpointToken = url || path;
67
+ const hasEndpointContext = endpointToken !== null;
68
+ if (!hasEndpointContext && method === null)
69
+ return null;
70
+ const labeledMatch = text.match(/\b(?:status(?:\s*code)?|response(?:\s*code)?)\s*[:=]?\s*([1-5]\d{2})\b/i);
71
+ if (labeledMatch && hasEndpointContext)
72
+ return toStatusCode(labeledMatch[1]);
73
+ if (endpointToken) {
74
+ const escapedEndpoint = escapeRegExp(endpointToken);
75
+ const endpointThenCode = new RegExp(`${escapedEndpoint}[^\\n]*?\\b([1-5]\\d{2})\\b`, 'i');
76
+ const codeThenEndpoint = new RegExp(`\\b([1-5]\\d{2})\\b[^\\n]*?${escapedEndpoint}`, 'i');
77
+ const contextualMatch = text.match(endpointThenCode) || text.match(codeThenEndpoint);
78
+ if (contextualMatch)
79
+ return toStatusCode(contextualMatch[1]);
80
+ }
81
+ if (method !== null && path !== null) {
82
+ const methodPathCodeMatch = text.match(/\b(?:GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\b[^\n]*?\b([1-5]\d{2})\b/i);
83
+ if (methodPathCodeMatch)
84
+ return toStatusCode(methodPathCodeMatch[1]);
85
+ }
86
+ return null;
87
+ }
88
+ function extractMethod(text) {
89
+ const m = text.match(METHOD_RE);
90
+ return m ? m[1] : null;
91
+ }
92
+ function detectNetworkError(text) {
93
+ for (const { re, code } of NETWORK_ERROR_PATTERNS) {
94
+ if (re.test(text))
95
+ return code;
96
+ }
97
+ return null;
98
+ }
99
+ export function normalizeEndpoint(raw) {
100
+ try {
101
+ const u = new URL(raw.startsWith('/') ? `https://x${raw}` : raw);
102
+ const p = u.pathname.toLowerCase().replace(/\/+$/, '');
103
+ return p || '/';
104
+ }
105
+ catch {
106
+ return raw.toLowerCase().replace(/\?.*$/, '').replace(/\/+$/, '') || '/';
107
+ }
108
+ }
109
+ export function classifyStatus(statusCode, networkError) {
110
+ if (networkError !== null)
111
+ return 'retryable';
112
+ if (statusCode === null)
113
+ return 'success'; // request detected, no failure signal
114
+ if (statusCode >= 200 && statusCode <= 299)
115
+ return 'success';
116
+ if (statusCode >= 400 && statusCode <= 499)
117
+ return 'failure';
118
+ return 'retryable'; // 5xx, 1xx, 3xx
119
+ }
120
+ function meetsEmissionCriteria(url, path, statusCode, method) {
121
+ if (url !== null)
122
+ return true; // condition 1: full http/https URL
123
+ if (statusCode !== null)
124
+ return true; // condition 2: valid HTTP status code
125
+ if (method !== null && path !== null)
126
+ return true; // condition 3: method + path
127
+ return false;
128
+ }
129
+ function classifyEventType(endpoint) {
130
+ const lower = endpoint.toLowerCase();
131
+ if (BACKGROUND_TOKENS.some(t => lower.includes(t)))
132
+ return 'background';
133
+ if (BACKGROUND_EXTENSIONS.some(e => lower.endsWith(e)))
134
+ return 'background';
135
+ return 'primary';
136
+ }
137
+ function filterToSignificantEvents(events) {
138
+ if (events.length === 0)
139
+ return events;
140
+ const hasPrimary = events.some(e => classifyEventType(e.endpoint) === 'primary');
141
+ return hasPrimary ? events.filter(e => classifyEventType(e.endpoint) === 'primary') : events;
142
+ }
143
+ /** Exported for unit testing. */
144
+ export function parseMessageToEvent(message) {
145
+ const url = extractUrl(message);
146
+ const path = url ? null : extractPath(message);
147
+ const method = extractMethod(message);
148
+ const statusCode = extractStatusCode(message, url, path, method);
149
+ const networkError = detectNetworkError(message);
150
+ if (!meetsEmissionCriteria(url, path, statusCode, method))
151
+ return null;
152
+ const rawEndpoint = url || path || 'unknown';
153
+ return {
154
+ endpoint: normalizeEndpoint(rawEndpoint),
155
+ method: method || 'unknown',
156
+ statusCode,
157
+ networkError,
158
+ status: classifyStatus(statusCode, networkError),
159
+ durationMs: 0
160
+ };
161
+ }
162
+ // ─── Android ─────────────────────────────────────────────────────────────────
163
+ async function getAndroidEvents(sinceMs, deviceId) {
164
+ try {
165
+ const stdout = await execAdb(['logcat', '-d', '-v', 'threadtime', '*:V', '-t', '2000'], deviceId);
166
+ const lines = stdout ? stdout.split(/\r?\n/).filter(Boolean) : [];
167
+ const events = [];
168
+ for (const line of lines) {
169
+ const parsed = parseLogLine(line);
170
+ if (parsed._iso) {
171
+ const ts = new Date(parsed._iso).getTime();
172
+ if (ts > 0 && ts <= sinceMs)
173
+ continue;
174
+ }
175
+ const event = parseMessageToEvent(parsed.message || line);
176
+ if (event)
177
+ events.push(event);
178
+ }
179
+ return events;
180
+ }
181
+ catch {
182
+ return [];
183
+ }
184
+ }
185
+ // ─── iOS ─────────────────────────────────────────────────────────────────────
186
+ async function getIOSEvents(sinceMs, deviceId = 'booted') {
187
+ try {
188
+ const lookbackSeconds = Math.max(15, Math.ceil((Date.now() - sinceMs) / 1000) + 5);
189
+ const args = [
190
+ 'simctl', 'spawn', deviceId, 'log', 'show',
191
+ '--last', `${lookbackSeconds}s`,
192
+ '--style', 'syslog',
193
+ '--predicate', 'eventMessage contains "http" OR eventMessage contains "URLSession" OR eventMessage contains "Task <" OR eventMessage contains "HTTP/"'
194
+ ];
195
+ const result = await execCommand(args, deviceId);
196
+ const lines = result.output ? result.output.split(/\r?\n/).filter(Boolean) : [];
197
+ const events = [];
198
+ for (const line of lines) {
199
+ const tsMatch = line.match(/^(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/);
200
+ if (tsMatch) {
201
+ const ts = new Date(tsMatch[1]).getTime();
202
+ if (ts > 0 && ts <= sinceMs)
203
+ continue;
204
+ }
205
+ const event = parseMessageToEvent(line);
206
+ if (event)
207
+ events.push(event);
208
+ }
209
+ return events;
210
+ }
211
+ catch {
212
+ return [];
213
+ }
214
+ }
215
+ // ─── Public API ───────────────────────────────────────────────────────────────
216
+ export class ToolsNetwork {
217
+ static notifyActionStart() {
218
+ notifyActionStart();
219
+ }
220
+ static async getNetworkActivity(params) {
221
+ const { platform, deviceId } = params;
222
+ const sinceMs = lastConsumedTimestamp > lastActionTimestamp
223
+ ? lastConsumedTimestamp
224
+ : lastActionTimestamp > 0 ? lastActionTimestamp : Date.now() - 30000;
225
+ const raw = platform === 'android'
226
+ ? await getAndroidEvents(sinceMs, deviceId)
227
+ : await getIOSEvents(sinceMs, deviceId);
228
+ const requests = filterToSignificantEvents(raw);
229
+ lastConsumedTimestamp = Date.now();
230
+ return { requests, count: requests.length };
231
+ }
232
+ }
@@ -3,6 +3,8 @@ import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprot
3
3
  import { ToolsManage } from './manage/index.js';
4
4
  import { ToolsInteract } from './interact/index.js';
5
5
  import { ToolsObserve } from './observe/index.js';
6
+ import { classifyActionOutcome } from './interact/classify.js';
7
+ import { ToolsNetwork } from './network/index.js';
6
8
  import { AndroidManage } from './manage/index.js';
7
9
  import { iOSManage } from './manage/index.js';
8
10
  import { getSystemStatus } from './system/index.js';
@@ -478,10 +480,99 @@ export const toolDefinitions = [
478
480
  }
479
481
  }
480
482
  }
483
+ },
484
+ {
485
+ name: 'classify_action_outcome',
486
+ description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
487
+
488
+ MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
489
+
490
+ HOW TO GATHER INPUTS before calling:
491
+ 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
492
+ 2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
493
+ 3. Do NOT call get_network_activity yet — omit networkRequests on the first call.
494
+
495
+ RULES (applied in order — stop at first match):
496
+ 1. If uiChanged=true OR expectedElementVisible=true → outcome=success
497
+ 2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
498
+ 3. If any request has status=failure or retryable → outcome=backend_failure
499
+ 4. If no requests returned → outcome=no_op
500
+ 5. If all requests succeeded → outcome=ui_failure
501
+ 6. Otherwise → outcome=unknown
502
+
503
+ BEHAVIOUR after outcome:
504
+ - success → continue
505
+ - no_op → retry the action once or re-resolve the element
506
+ - backend_failure → stop and report the failing endpoint
507
+ - ui_failure → stop and report failure
508
+ - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
509
+ inputSchema: {
510
+ type: 'object',
511
+ properties: {
512
+ uiChanged: {
513
+ type: 'boolean',
514
+ description: 'true if the screen fingerprint or activity changed after the action. Use wait_for_screen_change or compare get_screen_fingerprint before and after.'
515
+ },
516
+ expectedElementVisible: {
517
+ type: 'boolean',
518
+ description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
519
+ },
520
+ networkRequests: {
521
+ type: 'array',
522
+ description: 'Pass this only after calling get_network_activity as instructed by nextAction. Map each request to endpoint + status.',
523
+ items: {
524
+ type: 'object',
525
+ properties: {
526
+ endpoint: { type: 'string', description: 'Request endpoint or full URL' },
527
+ status: { type: 'string', enum: ['success', 'failure', 'retryable'], description: 'Outcome of the request' }
528
+ },
529
+ required: ['endpoint', 'status']
530
+ }
531
+ },
532
+ hasLogErrors: {
533
+ type: 'boolean',
534
+ description: 'true if structured log errors were observed (e.g. from read_log_stream). Optional — include if you have already read logs.'
535
+ }
536
+ },
537
+ required: ['uiChanged']
538
+ }
539
+ },
540
+ {
541
+ name: 'get_network_activity',
542
+ description: `Returns structured network events captured from platform logs since the last action.
543
+
544
+ Call this only when classify_action_outcome returns nextAction="call_get_network_activity".
545
+ Do not call more than once per action.
546
+
547
+ Events are filtered to significant (non-background) requests only.
548
+ Each event includes endpoint, method, statusCode, networkError, status, and durationMs.
549
+
550
+ status values:
551
+ - success: HTTP 2xx or request detected with no error signal
552
+ - failure: HTTP 4xx
553
+ - retryable: HTTP 5xx, network error (timeout, dns_error, tls_error, etc.)
554
+
555
+ Returns { requests: [], count: 0 } when no credible network signals are found.`,
556
+ inputSchema: {
557
+ type: 'object',
558
+ properties: {
559
+ platform: {
560
+ type: 'string',
561
+ enum: ['android', 'ios'],
562
+ description: 'Platform to read network logs from'
563
+ },
564
+ deviceId: {
565
+ type: 'string',
566
+ description: 'Device Serial (Android) or UDID (iOS). Defaults to connected/booted device.'
567
+ }
568
+ },
569
+ required: ['platform']
570
+ }
481
571
  }
482
572
  ];
483
573
  async function handleStartApp(args) {
484
574
  const { platform, appId, deviceId } = args;
575
+ ToolsNetwork.notifyActionStart();
485
576
  const res = await (platform === 'android' ? new AndroidManage().startApp(appId, deviceId) : new iOSManage().startApp(appId, deviceId));
486
577
  const response = {
487
578
  device: res.device,
@@ -498,6 +589,7 @@ async function handleTerminateApp(args) {
498
589
  }
499
590
  async function handleRestartApp(args) {
500
591
  const { platform, appId, deviceId } = args;
592
+ ToolsNetwork.notifyActionStart();
501
593
  const res = await (platform === 'android' ? new AndroidManage().restartApp(appId, deviceId) : new iOSManage().restartApp(appId, deviceId));
502
594
  const response = { device: res.device, appRestarted: res.appRestarted, launchTimeMs: res.launchTimeMs };
503
595
  return wrapResponse(response);
@@ -605,31 +697,37 @@ async function handleFindElement(args) {
605
697
  }
606
698
  async function handleTap(args) {
607
699
  const { platform, x, y, deviceId } = args;
700
+ ToolsNetwork.notifyActionStart();
608
701
  const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId });
609
702
  return wrapResponse(res);
610
703
  }
611
704
  async function handleTapElement(args) {
612
705
  const { elementId } = args;
706
+ ToolsNetwork.notifyActionStart();
613
707
  const res = await ToolsInteract.tapElementHandler({ elementId });
614
708
  return wrapResponse(res);
615
709
  }
616
710
  async function handleSwipe(args) {
617
711
  const { platform = 'android', x1, y1, x2, y2, duration, deviceId } = args;
712
+ ToolsNetwork.notifyActionStart();
618
713
  const res = await ToolsInteract.swipeHandler({ platform, x1, y1, x2, y2, duration, deviceId });
619
714
  return wrapResponse(res);
620
715
  }
621
716
  async function handleScrollToElement(args) {
622
717
  const { platform, selector, direction, maxScrolls, scrollAmount, deviceId } = args;
718
+ ToolsNetwork.notifyActionStart();
623
719
  const res = await ToolsInteract.scrollToElementHandler({ platform, selector, direction, maxScrolls, scrollAmount, deviceId });
624
720
  return wrapResponse(res);
625
721
  }
626
722
  async function handleTypeText(args) {
627
723
  const { text, deviceId } = args;
724
+ ToolsNetwork.notifyActionStart();
628
725
  const res = await ToolsInteract.typeTextHandler({ text, deviceId });
629
726
  return wrapResponse(res);
630
727
  }
631
728
  async function handlePressBack(args) {
632
729
  const { deviceId } = args;
730
+ ToolsNetwork.notifyActionStart();
633
731
  const res = await ToolsInteract.pressBackHandler({ deviceId });
634
732
  return wrapResponse(res);
635
733
  }
@@ -648,6 +746,21 @@ async function handleStopLogStream(args) {
648
746
  const res = await ToolsObserve.stopLogStreamHandler({ platform, sessionId });
649
747
  return wrapResponse(res);
650
748
  }
749
+ function handleClassifyActionOutcome(args) {
750
+ const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = args;
751
+ const result = classifyActionOutcome({
752
+ uiChanged: Boolean(uiChanged),
753
+ expectedElementVisible: expectedElementVisible ?? null,
754
+ networkRequests: networkRequests ?? null,
755
+ hasLogErrors: hasLogErrors ?? null
756
+ });
757
+ return Promise.resolve(wrapResponse(result));
758
+ }
759
+ async function handleGetNetworkActivity(args) {
760
+ const { platform, deviceId } = args;
761
+ const result = await ToolsNetwork.getNetworkActivity({ platform, deviceId });
762
+ return wrapResponse(result);
763
+ }
651
764
  const toolHandlers = {
652
765
  start_app: handleStartApp,
653
766
  terminate_app: handleTerminateApp,
@@ -675,7 +788,9 @@ const toolHandlers = {
675
788
  press_back: handlePressBack,
676
789
  start_log_stream: handleStartLogStream,
677
790
  read_log_stream: handleReadLogStream,
678
- stop_log_stream: handleStopLogStream
791
+ stop_log_stream: handleStopLogStream,
792
+ classify_action_outcome: handleClassifyActionOutcome,
793
+ get_network_activity: handleGetNetworkActivity
679
794
  };
680
795
  export async function handleToolCall(name, args = {}) {
681
796
  const handler = toolHandlers[name];
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.23.0]
6
+ - Added network monitoring
7
+ - Added
8
+
5
9
  ## [0.22.0]
6
10
  - Added a portable `test-authoring` skill package and documented the repository's vendor-neutral skill format
7
11
  - Added `AGENTS.md` as a top-level cold-start guide for autonomous agents entering the public repository
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.22.0",
3
+ "version": "0.23.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,64 @@
1
+ export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
2
+ export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
3
+
4
+ export interface NetworkRequest {
5
+ endpoint: string
6
+ status: NetworkRequestStatus
7
+ }
8
+
9
+ export interface ClassifyActionOutcomeInput {
10
+ uiChanged: boolean
11
+ expectedElementVisible?: boolean | null
12
+ /** null = get_network_activity has not been called yet */
13
+ networkRequests?: NetworkRequest[] | null
14
+ hasLogErrors?: boolean | null
15
+ }
16
+
17
+ export interface ClassifyActionOutcomeResult {
18
+ outcome: ActionOutcome
19
+ reasoning: string
20
+ /** Present when the caller must call get_network_activity before a final classification is possible */
21
+ nextAction?: 'call_get_network_activity'
22
+ }
23
+
24
+ /**
25
+ * Pure deterministic classifier. Applies rules in fixed order.
26
+ * Same inputs always produce the same output.
27
+ */
28
+ export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
29
+ const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
30
+
31
+ // Step 1 — UI signal is positive
32
+ if (uiChanged || expectedElementVisible === true) {
33
+ return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
34
+ }
35
+
36
+ // Step 2 — UI did not change; network signal is required
37
+ if (networkRequests === null || networkRequests === undefined) {
38
+ return {
39
+ outcome: 'unknown',
40
+ reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
41
+ nextAction: 'call_get_network_activity'
42
+ }
43
+ }
44
+
45
+ // Step 3 — any network failure
46
+ const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
47
+ if (failedRequest) {
48
+ return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
49
+ }
50
+
51
+ // Step 4 — no network requests at all
52
+ if (networkRequests.length === 0) {
53
+ const logNote = hasLogErrors ? ' (log errors present)' : ''
54
+ return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
55
+ }
56
+
57
+ // Step 5 — network requests exist and all succeeded
58
+ if (networkRequests.every((r) => r.status === 'success')) {
59
+ return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
60
+ }
61
+
62
+ // Step 6 — fallback
63
+ return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
64
+ }
@@ -0,0 +1,268 @@
1
+ import { execAdb, parseLogLine } from '../utils/android/utils.js'
2
+ import { execCommand } from '../utils/ios/utils.js'
3
+
4
+ export type NetworkErrorCode =
5
+ | 'timeout'
6
+ | 'dns_error'
7
+ | 'tls_error'
8
+ | 'connection_refused'
9
+ | 'connection_reset'
10
+ | 'unknown_network_error'
11
+
12
+ export type NetworkActivityStatus = 'success' | 'failure' | 'retryable'
13
+
14
+ export interface NetworkEvent {
15
+ endpoint: string
16
+ method: string
17
+ statusCode: number | null
18
+ networkError: NetworkErrorCode | null
19
+ status: NetworkActivityStatus
20
+ durationMs: number
21
+ }
22
+
23
+ export interface GetNetworkActivityResult {
24
+ requests: NetworkEvent[]
25
+ count: number
26
+ }
27
+
28
+ // ─── Module state ─────────────────────────────────────────────────────────────
29
+ // lastActionTimestamp: set when an action tool fires (tap, swipe, etc.)
30
+ // lastConsumedTimestamp: advanced after each get_network_activity call to prevent duplicates
31
+ let lastActionTimestamp = 0
32
+ let lastConsumedTimestamp = 0
33
+
34
+ export function notifyActionStart(): void {
35
+ lastActionTimestamp = Date.now()
36
+ lastConsumedTimestamp = 0
37
+ }
38
+
39
+ /** Exposed for unit tests only. */
40
+ export function _setTimestampsForTests(actionTs: number, consumedTs: number): void {
41
+ lastActionTimestamp = actionTs
42
+ lastConsumedTimestamp = consumedTs
43
+ }
44
+
45
+ // ─── Parsing constants ────────────────────────────────────────────────────────
46
+ const URL_RE = /https?:\/\/[^\s"'\]\)><]+/
47
+ const PATH_RE = /\/[a-zA-Z0-9_.-]+(?:\/[a-zA-Z0-9_.-]+)+/
48
+ const METHOD_RE = /\b(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\b/
49
+
50
+ const NETWORK_ERROR_PATTERNS: Array<{ re: RegExp; code: NetworkErrorCode }> = [
51
+ { re: /timed?\s*out|timeout/i, code: 'timeout' },
52
+ { re: /dns|name[\s_]resolution|host\s*not\s*found|nodename/i, code: 'dns_error' },
53
+ { re: /\btls\b|\bssl\b|certificate|handshake/i, code: 'tls_error' },
54
+ { re: /connection\s*refused/i, code: 'connection_refused' },
55
+ { re: /connection\s*reset|reset\s*by\s*peer/i, code: 'connection_reset' },
56
+ ]
57
+
58
+ const BACKGROUND_TOKENS = ['/analytics', '/metrics', '/tracking', '/log', '/events', '/telemetry', '/ping', '/beacon']
59
+ const BACKGROUND_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.css', '.js', '.svg', '.ico', '.woff', '.ttf']
60
+ const FILESYSTEM_PREFIXES = ['/data/', '/system/', '/apex/', '/proc/', '/dev/', '/vendor/', '/product/', '/storage/', '/sdcard/', '/mnt/', '/odm/', '/cache/', '/metadata/', '/acct/', '/sys/']
61
+ const FILESYSTEM_EXTENSIONS = ['.apk', '.apex', '.odex', '.vdex', '.dex', '.so', '.jar', '.bin', '.img', '.db', '.sqlite', '.c', '.cc', '.cpp', '.cxx', '.h', '.hpp', '.m', '.mm', '.kt', '.java', '.swift']
62
+
63
+ // ─── Parsing helpers ─────────────────────────────────────────────────────────
64
+
65
+ function extractUrl(text: string): string | null {
66
+ const m = text.match(URL_RE)
67
+ return m ? m[0] : null
68
+ }
69
+
70
+ function isPlausibleEndpointPath(path: string): boolean {
71
+ const lower = path.toLowerCase()
72
+ if (!lower.startsWith('/')) return false
73
+ if (FILESYSTEM_PREFIXES.some((prefix) => lower.startsWith(prefix))) return false
74
+ if (FILESYSTEM_EXTENSIONS.some((ext) => lower.endsWith(ext))) return false
75
+ return true
76
+ }
77
+
78
+ function extractPath(text: string): string | null {
79
+ const m = text.match(PATH_RE)
80
+ if (!m) return null
81
+ return isPlausibleEndpointPath(m[0]) ? m[0] : null
82
+ }
83
+
84
+ function toStatusCode(value: string | undefined): number | null {
85
+ if (!value) return null
86
+ const code = Number(value)
87
+ return code >= 100 && code <= 599 ? code : null
88
+ }
89
+
90
+ function escapeRegExp(value: string): string {
91
+ return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
92
+ }
93
+
94
+ function extractStatusCode(text: string, url: string | null, path: string | null, method: string | null): number | null {
95
+ const directHttpMatch = text.match(/\bHTTP\/\d(?:\.\d)?\s+([1-5]\d{2})\b/i) || text.match(/\bHTTP\s+([1-5]\d{2})\b/i)
96
+ if (directHttpMatch) return toStatusCode(directHttpMatch[1])
97
+
98
+ const endpointToken = url || path
99
+ const hasEndpointContext = endpointToken !== null
100
+ if (!hasEndpointContext && method === null) return null
101
+
102
+ const labeledMatch = text.match(/\b(?:status(?:\s*code)?|response(?:\s*code)?)\s*[:=]?\s*([1-5]\d{2})\b/i)
103
+ if (labeledMatch && hasEndpointContext) return toStatusCode(labeledMatch[1])
104
+
105
+ if (endpointToken) {
106
+ const escapedEndpoint = escapeRegExp(endpointToken)
107
+ const endpointThenCode = new RegExp(`${escapedEndpoint}[^\\n]*?\\b([1-5]\\d{2})\\b`, 'i')
108
+ const codeThenEndpoint = new RegExp(`\\b([1-5]\\d{2})\\b[^\\n]*?${escapedEndpoint}`, 'i')
109
+ const contextualMatch = text.match(endpointThenCode) || text.match(codeThenEndpoint)
110
+ if (contextualMatch) return toStatusCode(contextualMatch[1])
111
+ }
112
+
113
+ if (method !== null && path !== null) {
114
+ const methodPathCodeMatch = text.match(/\b(?:GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\b[^\n]*?\b([1-5]\d{2})\b/i)
115
+ if (methodPathCodeMatch) return toStatusCode(methodPathCodeMatch[1])
116
+ }
117
+
118
+ return null
119
+ }
120
+
121
+ function extractMethod(text: string): string | null {
122
+ const m = text.match(METHOD_RE)
123
+ return m ? m[1] : null
124
+ }
125
+
126
+ function detectNetworkError(text: string): NetworkErrorCode | null {
127
+ for (const { re, code } of NETWORK_ERROR_PATTERNS) {
128
+ if (re.test(text)) return code
129
+ }
130
+ return null
131
+ }
132
+
133
+ export function normalizeEndpoint(raw: string): string {
134
+ try {
135
+ const u = new URL(raw.startsWith('/') ? `https://x${raw}` : raw)
136
+ const p = u.pathname.toLowerCase().replace(/\/+$/, '')
137
+ return p || '/'
138
+ } catch {
139
+ return raw.toLowerCase().replace(/\?.*$/, '').replace(/\/+$/, '') || '/'
140
+ }
141
+ }
142
+
143
+ export function classifyStatus(statusCode: number | null, networkError: NetworkErrorCode | null): NetworkActivityStatus {
144
+ if (networkError !== null) return 'retryable'
145
+ if (statusCode === null) return 'success' // request detected, no failure signal
146
+ if (statusCode >= 200 && statusCode <= 299) return 'success'
147
+ if (statusCode >= 400 && statusCode <= 499) return 'failure'
148
+ return 'retryable' // 5xx, 1xx, 3xx
149
+ }
150
+
151
+ function meetsEmissionCriteria(url: string | null, path: string | null, statusCode: number | null, method: string | null): boolean {
152
+ if (url !== null) return true // condition 1: full http/https URL
153
+ if (statusCode !== null) return true // condition 2: valid HTTP status code
154
+ if (method !== null && path !== null) return true // condition 3: method + path
155
+ return false
156
+ }
157
+
158
+ function classifyEventType(endpoint: string): 'primary' | 'background' {
159
+ const lower = endpoint.toLowerCase()
160
+ if (BACKGROUND_TOKENS.some(t => lower.includes(t))) return 'background'
161
+ if (BACKGROUND_EXTENSIONS.some(e => lower.endsWith(e))) return 'background'
162
+ return 'primary'
163
+ }
164
+
165
+ function filterToSignificantEvents(events: NetworkEvent[]): NetworkEvent[] {
166
+ if (events.length === 0) return events
167
+ const hasPrimary = events.some(e => classifyEventType(e.endpoint) === 'primary')
168
+ return hasPrimary ? events.filter(e => classifyEventType(e.endpoint) === 'primary') : events
169
+ }
170
+
171
+ /** Exported for unit testing. */
172
+ export function parseMessageToEvent(message: string): NetworkEvent | null {
173
+ const url = extractUrl(message)
174
+ const path = url ? null : extractPath(message)
175
+ const method = extractMethod(message)
176
+ const statusCode = extractStatusCode(message, url, path, method)
177
+ const networkError = detectNetworkError(message)
178
+
179
+ if (!meetsEmissionCriteria(url, path, statusCode, method)) return null
180
+
181
+ const rawEndpoint = url || path || 'unknown'
182
+ return {
183
+ endpoint: normalizeEndpoint(rawEndpoint),
184
+ method: method || 'unknown',
185
+ statusCode,
186
+ networkError,
187
+ status: classifyStatus(statusCode, networkError),
188
+ durationMs: 0
189
+ }
190
+ }
191
+
192
+ // ─── Android ─────────────────────────────────────────────────────────────────
193
+
194
+ async function getAndroidEvents(sinceMs: number, deviceId?: string): Promise<NetworkEvent[]> {
195
+ try {
196
+ const stdout = await execAdb(['logcat', '-d', '-v', 'threadtime', '*:V', '-t', '2000'], deviceId)
197
+ const lines = stdout ? stdout.split(/\r?\n/).filter(Boolean) : []
198
+
199
+ const events: NetworkEvent[] = []
200
+ for (const line of lines) {
201
+ const parsed = parseLogLine(line)
202
+ if (parsed._iso) {
203
+ const ts = new Date(parsed._iso).getTime()
204
+ if (ts > 0 && ts <= sinceMs) continue
205
+ }
206
+ const event = parseMessageToEvent(parsed.message || line)
207
+ if (event) events.push(event)
208
+ }
209
+ return events
210
+ } catch {
211
+ return []
212
+ }
213
+ }
214
+
215
+ // ─── iOS ─────────────────────────────────────────────────────────────────────
216
+
217
+ async function getIOSEvents(sinceMs: number, deviceId = 'booted'): Promise<NetworkEvent[]> {
218
+ try {
219
+ const lookbackSeconds = Math.max(15, Math.ceil((Date.now() - sinceMs) / 1000) + 5)
220
+ const args = [
221
+ 'simctl', 'spawn', deviceId, 'log', 'show',
222
+ '--last', `${lookbackSeconds}s`,
223
+ '--style', 'syslog',
224
+ '--predicate', 'eventMessage contains "http" OR eventMessage contains "URLSession" OR eventMessage contains "Task <" OR eventMessage contains "HTTP/"'
225
+ ]
226
+ const result = await execCommand(args, deviceId)
227
+ const lines = result.output ? result.output.split(/\r?\n/).filter(Boolean) : []
228
+
229
+ const events: NetworkEvent[] = []
230
+ for (const line of lines) {
231
+ const tsMatch = line.match(/^(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})/)
232
+ if (tsMatch) {
233
+ const ts = new Date(tsMatch[1]).getTime()
234
+ if (ts > 0 && ts <= sinceMs) continue
235
+ }
236
+ const event = parseMessageToEvent(line)
237
+ if (event) events.push(event)
238
+ }
239
+ return events
240
+ } catch {
241
+ return []
242
+ }
243
+ }
244
+
245
+ // ─── Public API ───────────────────────────────────────────────────────────────
246
+
247
+ export class ToolsNetwork {
248
+ static notifyActionStart(): void {
249
+ notifyActionStart()
250
+ }
251
+
252
+ static async getNetworkActivity(params: { platform: string; deviceId?: string }): Promise<GetNetworkActivityResult> {
253
+ const { platform, deviceId } = params
254
+
255
+ const sinceMs = lastConsumedTimestamp > lastActionTimestamp
256
+ ? lastConsumedTimestamp
257
+ : lastActionTimestamp > 0 ? lastActionTimestamp : Date.now() - 30000
258
+
259
+ const raw = platform === 'android'
260
+ ? await getAndroidEvents(sinceMs, deviceId)
261
+ : await getIOSEvents(sinceMs, deviceId)
262
+
263
+ const requests = filterToSignificantEvents(raw)
264
+ lastConsumedTimestamp = Date.now()
265
+
266
+ return { requests, count: requests.length }
267
+ }
268
+ }
@@ -16,6 +16,8 @@ import {
16
16
  import { ToolsManage } from './manage/index.js'
17
17
  import { ToolsInteract } from './interact/index.js'
18
18
  import { ToolsObserve } from './observe/index.js'
19
+ import { classifyActionOutcome } from './interact/classify.js'
20
+ import { ToolsNetwork } from './network/index.js'
19
21
  import { AndroidManage } from './manage/index.js'
20
22
  import { iOSManage } from './manage/index.js'
21
23
  import { getSystemStatus } from './system/index.js'
@@ -494,6 +496,94 @@ export const toolDefinitions = [
494
496
  }
495
497
  }
496
498
  }
499
+ },
500
+ {
501
+ name: 'classify_action_outcome',
502
+ description: `Classify the outcome of the most recent action into exactly one of: success, no_op, backend_failure, ui_failure, unknown.
503
+
504
+ MUST be called after every action (tap, swipe, type_text, press_back, start_app, etc). Never skip.
505
+
506
+ HOW TO GATHER INPUTS before calling:
507
+ 1. Call wait_for_screen_change or compare get_screen_fingerprint before/after — set uiChanged accordingly.
508
+ 2. If you checked for a specific element with wait_for_ui, set expectedElementVisible.
509
+ 3. Do NOT call get_network_activity yet — omit networkRequests on the first call.
510
+
511
+ RULES (applied in order — stop at first match):
512
+ 1. If uiChanged=true OR expectedElementVisible=true → outcome=success
513
+ 2. Otherwise this tool returns nextAction="call_get_network_activity" — you MUST call get_network_activity once, then call classify_action_outcome again with the results in networkRequests.
514
+ 3. If any request has status=failure or retryable → outcome=backend_failure
515
+ 4. If no requests returned → outcome=no_op
516
+ 5. If all requests succeeded → outcome=ui_failure
517
+ 6. Otherwise → outcome=unknown
518
+
519
+ BEHAVIOUR after outcome:
520
+ - success → continue
521
+ - no_op → retry the action once or re-resolve the element
522
+ - backend_failure → stop and report the failing endpoint
523
+ - ui_failure → stop and report failure
524
+ - unknown → take one recovery step (e.g. capture_debug_snapshot), then stop`,
525
+ inputSchema: {
526
+ type: 'object',
527
+ properties: {
528
+ uiChanged: {
529
+ type: 'boolean',
530
+ description: 'true if the screen fingerprint or activity changed after the action. Use wait_for_screen_change or compare get_screen_fingerprint before and after.'
531
+ },
532
+ expectedElementVisible: {
533
+ type: 'boolean',
534
+ description: 'true if the element you expected to appear is now visible (from wait_for_ui). Omit if you did not check for a specific element.'
535
+ },
536
+ networkRequests: {
537
+ type: 'array',
538
+ description: 'Pass this only after calling get_network_activity as instructed by nextAction. Map each request to endpoint + status.',
539
+ items: {
540
+ type: 'object',
541
+ properties: {
542
+ endpoint: { type: 'string', description: 'Request endpoint or full URL' },
543
+ status: { type: 'string', enum: ['success', 'failure', 'retryable'], description: 'Outcome of the request' }
544
+ },
545
+ required: ['endpoint', 'status']
546
+ }
547
+ },
548
+ hasLogErrors: {
549
+ type: 'boolean',
550
+ description: 'true if structured log errors were observed (e.g. from read_log_stream). Optional — include if you have already read logs.'
551
+ }
552
+ },
553
+ required: ['uiChanged']
554
+ }
555
+ },
556
+ {
557
+ name: 'get_network_activity',
558
+ description: `Returns structured network events captured from platform logs since the last action.
559
+
560
+ Call this only when classify_action_outcome returns nextAction="call_get_network_activity".
561
+ Do not call more than once per action.
562
+
563
+ Events are filtered to significant (non-background) requests only.
564
+ Each event includes endpoint, method, statusCode, networkError, status, and durationMs.
565
+
566
+ status values:
567
+ - success: HTTP 2xx or request detected with no error signal
568
+ - failure: HTTP 4xx
569
+ - retryable: HTTP 5xx, network error (timeout, dns_error, tls_error, etc.)
570
+
571
+ Returns { requests: [], count: 0 } when no credible network signals are found.`,
572
+ inputSchema: {
573
+ type: 'object',
574
+ properties: {
575
+ platform: {
576
+ type: 'string',
577
+ enum: ['android', 'ios'],
578
+ description: 'Platform to read network logs from'
579
+ },
580
+ deviceId: {
581
+ type: 'string',
582
+ description: 'Device Serial (Android) or UDID (iOS). Defaults to connected/booted device.'
583
+ }
584
+ },
585
+ required: ['platform']
586
+ }
497
587
  }
498
588
  ]
499
589
 
@@ -503,6 +593,7 @@ type ToolHandler = (args: ToolCallArgs) => Promise<ToolCallResult>
503
593
 
504
594
  async function handleStartApp(args: ToolCallArgs) {
505
595
  const { platform, appId, deviceId } = args as any
596
+ ToolsNetwork.notifyActionStart()
506
597
  const res = await (platform === 'android' ? new AndroidManage().startApp(appId, deviceId) : new iOSManage().startApp(appId, deviceId))
507
598
  const response: StartAppResponse = {
508
599
  device: res.device,
@@ -521,6 +612,7 @@ async function handleTerminateApp(args: ToolCallArgs) {
521
612
 
522
613
  async function handleRestartApp(args: ToolCallArgs) {
523
614
  const { platform, appId, deviceId } = args as any
615
+ ToolsNetwork.notifyActionStart()
524
616
  const res = await (platform === 'android' ? new AndroidManage().restartApp(appId, deviceId) : new iOSManage().restartApp(appId, deviceId))
525
617
  const response: RestartAppResponse = { device: res.device, appRestarted: res.appRestarted, launchTimeMs: res.launchTimeMs }
526
618
  return wrapResponse(response)
@@ -644,36 +736,42 @@ async function handleFindElement(args: ToolCallArgs) {
644
736
 
645
737
  async function handleTap(args: ToolCallArgs) {
646
738
  const { platform, x, y, deviceId } = args as any
739
+ ToolsNetwork.notifyActionStart()
647
740
  const res = await ToolsInteract.tapHandler({ platform, x, y, deviceId })
648
741
  return wrapResponse(res)
649
742
  }
650
743
 
651
744
  async function handleTapElement(args: ToolCallArgs) {
652
745
  const { elementId } = args as any
746
+ ToolsNetwork.notifyActionStart()
653
747
  const res = await ToolsInteract.tapElementHandler({ elementId })
654
748
  return wrapResponse(res)
655
749
  }
656
750
 
657
751
  async function handleSwipe(args: ToolCallArgs) {
658
752
  const { platform = 'android', x1, y1, x2, y2, duration, deviceId } = args as any
753
+ ToolsNetwork.notifyActionStart()
659
754
  const res = await ToolsInteract.swipeHandler({ platform, x1, y1, x2, y2, duration, deviceId })
660
755
  return wrapResponse(res)
661
756
  }
662
757
 
663
758
  async function handleScrollToElement(args: ToolCallArgs) {
664
759
  const { platform, selector, direction, maxScrolls, scrollAmount, deviceId } = args as any
760
+ ToolsNetwork.notifyActionStart()
665
761
  const res = await ToolsInteract.scrollToElementHandler({ platform, selector, direction, maxScrolls, scrollAmount, deviceId })
666
762
  return wrapResponse(res)
667
763
  }
668
764
 
669
765
  async function handleTypeText(args: ToolCallArgs) {
670
766
  const { text, deviceId } = args as any
767
+ ToolsNetwork.notifyActionStart()
671
768
  const res = await ToolsInteract.typeTextHandler({ text, deviceId })
672
769
  return wrapResponse(res)
673
770
  }
674
771
 
675
772
  async function handlePressBack(args: ToolCallArgs) {
676
773
  const { deviceId } = args as any
774
+ ToolsNetwork.notifyActionStart()
677
775
  const res = await ToolsInteract.pressBackHandler({ deviceId })
678
776
  return wrapResponse(res)
679
777
  }
@@ -696,6 +794,23 @@ async function handleStopLogStream(args: ToolCallArgs) {
696
794
  return wrapResponse(res)
697
795
  }
698
796
 
797
+ function handleClassifyActionOutcome(args: ToolCallArgs) {
798
+ const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = args as any
799
+ const result = classifyActionOutcome({
800
+ uiChanged: Boolean(uiChanged),
801
+ expectedElementVisible: expectedElementVisible ?? null,
802
+ networkRequests: networkRequests ?? null,
803
+ hasLogErrors: hasLogErrors ?? null
804
+ })
805
+ return Promise.resolve(wrapResponse(result))
806
+ }
807
+
808
+ async function handleGetNetworkActivity(args: ToolCallArgs) {
809
+ const { platform, deviceId } = args as any
810
+ const result = await ToolsNetwork.getNetworkActivity({ platform, deviceId })
811
+ return wrapResponse(result)
812
+ }
813
+
699
814
  const toolHandlers: Record<string, ToolHandler> = {
700
815
  start_app: handleStartApp,
701
816
  terminate_app: handleTerminateApp,
@@ -723,7 +838,9 @@ const toolHandlers: Record<string, ToolHandler> = {
723
838
  press_back: handlePressBack,
724
839
  start_log_stream: handleStartLogStream,
725
840
  read_log_stream: handleReadLogStream,
726
- stop_log_stream: handleStopLogStream
841
+ stop_log_stream: handleStopLogStream,
842
+ classify_action_outcome: handleClassifyActionOutcome,
843
+ get_network_activity: handleGetNetworkActivity
727
844
  }
728
845
 
729
846
  export async function handleToolCall(name: string, args: ToolCallArgs = {}) {
@@ -0,0 +1,110 @@
1
+ import assert from 'assert'
2
+ import { classifyActionOutcome } from '../../../src/interact/classify.js'
3
+
4
+ function run() {
5
+ // Step 1 — uiChanged → success
6
+ {
7
+ const result = classifyActionOutcome({ uiChanged: true })
8
+ assert.strictEqual(result.outcome, 'success')
9
+ assert.ok(result.reasoning.length > 0)
10
+ assert.strictEqual(result.nextAction, undefined)
11
+ }
12
+
13
+ // Step 1 — expectedElementVisible → success
14
+ {
15
+ const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: true })
16
+ assert.strictEqual(result.outcome, 'success')
17
+ assert.strictEqual(result.reasoning, 'expected element is visible')
18
+ assert.strictEqual(result.nextAction, undefined)
19
+ }
20
+
21
+ // Step 1 — both uiChanged and expectedElementVisible → success
22
+ {
23
+ const result = classifyActionOutcome({ uiChanged: true, expectedElementVisible: true })
24
+ assert.strictEqual(result.outcome, 'success')
25
+ }
26
+
27
+ // Step 2 — UI did not change, networkRequests not yet provided → nextAction required
28
+ {
29
+ const result = classifyActionOutcome({ uiChanged: false })
30
+ assert.strictEqual(result.outcome, 'unknown')
31
+ assert.strictEqual(result.nextAction, 'call_get_network_activity')
32
+ }
33
+
34
+ // Step 2 — explicit null networkRequests → nextAction required
35
+ {
36
+ const result = classifyActionOutcome({ uiChanged: false, expectedElementVisible: null, networkRequests: null })
37
+ assert.strictEqual(result.outcome, 'unknown')
38
+ assert.strictEqual(result.nextAction, 'call_get_network_activity')
39
+ }
40
+
41
+ // Step 3 — failure status → backend_failure
42
+ {
43
+ const result = classifyActionOutcome({
44
+ uiChanged: false,
45
+ networkRequests: [{ endpoint: '/login', status: 'failure' }]
46
+ })
47
+ assert.strictEqual(result.outcome, 'backend_failure')
48
+ assert.ok(result.reasoning.includes('/login'))
49
+ assert.ok(result.reasoning.includes('failure'))
50
+ }
51
+
52
+ // Step 3 — retryable status → backend_failure
53
+ {
54
+ const result = classifyActionOutcome({
55
+ uiChanged: false,
56
+ networkRequests: [
57
+ { endpoint: '/api/submit', status: 'retryable' },
58
+ { endpoint: '/api/other', status: 'success' }
59
+ ]
60
+ })
61
+ assert.strictEqual(result.outcome, 'backend_failure')
62
+ assert.ok(result.reasoning.includes('/api/submit'))
63
+ }
64
+
65
+ // Step 4 — empty network requests → no_op
66
+ {
67
+ const result = classifyActionOutcome({ uiChanged: false, networkRequests: [] })
68
+ assert.strictEqual(result.outcome, 'no_op')
69
+ assert.ok(result.reasoning.includes('no UI change'))
70
+ assert.ok(result.reasoning.includes('no network activity'))
71
+ }
72
+
73
+ // Step 4 — empty network requests with log errors → no_op with note
74
+ {
75
+ const result = classifyActionOutcome({ uiChanged: false, networkRequests: [], hasLogErrors: true })
76
+ assert.strictEqual(result.outcome, 'no_op')
77
+ assert.ok(result.reasoning.includes('log errors'))
78
+ }
79
+
80
+ // Step 5 — all requests succeeded but UI unchanged → ui_failure
81
+ {
82
+ const result = classifyActionOutcome({
83
+ uiChanged: false,
84
+ networkRequests: [
85
+ { endpoint: '/api/save', status: 'success' },
86
+ { endpoint: '/api/refresh', status: 'success' }
87
+ ]
88
+ })
89
+ assert.strictEqual(result.outcome, 'ui_failure')
90
+ assert.ok(result.reasoning.includes('network requests succeeded'))
91
+ }
92
+
93
+ // Step 1 takes priority over network signals — success even when failures present
94
+ {
95
+ const result = classifyActionOutcome({
96
+ uiChanged: true,
97
+ networkRequests: [{ endpoint: '/api/log', status: 'failure' }]
98
+ })
99
+ assert.strictEqual(result.outcome, 'success')
100
+ }
101
+
102
+ console.log('classify_action_outcome tests passed')
103
+ }
104
+
105
+ try {
106
+ run()
107
+ } catch (error) {
108
+ console.error(error)
109
+ process.exit(1)
110
+ }
@@ -0,0 +1,181 @@
1
+ import assert from 'assert'
2
+ import {
3
+ parseMessageToEvent,
4
+ normalizeEndpoint,
5
+ classifyStatus,
6
+ _setTimestampsForTests,
7
+ ToolsNetwork
8
+ } from '../../../src/network/index.js'
9
+
10
+ function run() {
11
+
12
+ // ─── normalizeEndpoint ──────────────────────────────────────────────────────
13
+
14
+ {
15
+ const r = normalizeEndpoint('https://api.example.com/v1/users?page=2')
16
+ assert.strictEqual(r, '/v1/users', `normalizeEndpoint full URL: expected /v1/users, got ${r}`)
17
+ }
18
+
19
+ {
20
+ const r = normalizeEndpoint('/api/login/')
21
+ assert.strictEqual(r, '/api/login', `normalizeEndpoint path trailing slash: expected /api/login, got ${r}`)
22
+ }
23
+
24
+ {
25
+ const r = normalizeEndpoint('/Api/Login')
26
+ assert.strictEqual(r, '/api/login', `normalizeEndpoint uppercase: expected /api/login, got ${r}`)
27
+ }
28
+
29
+ // ─── classifyStatus ─────────────────────────────────────────────────────────
30
+
31
+ {
32
+ const s = classifyStatus(200, null)
33
+ assert.strictEqual(s, 'success', `200 should be success`)
34
+ }
35
+
36
+ {
37
+ const s = classifyStatus(404, null)
38
+ assert.strictEqual(s, 'failure', `404 should be failure`)
39
+ }
40
+
41
+ {
42
+ const s = classifyStatus(500, null)
43
+ assert.strictEqual(s, 'retryable', `500 should be retryable`)
44
+ }
45
+
46
+ {
47
+ const s = classifyStatus(null, 'timeout')
48
+ assert.strictEqual(s, 'retryable', `networkError should override to retryable`)
49
+ }
50
+
51
+ {
52
+ const s = classifyStatus(200, 'connection_reset')
53
+ assert.strictEqual(s, 'retryable', `networkError beats statusCode`)
54
+ }
55
+
56
+ {
57
+ const s = classifyStatus(null, null)
58
+ assert.strictEqual(s, 'success', `null/null (request detected, no error) = success`)
59
+ }
60
+
61
+ // ─── parseMessageToEvent — emission criteria ─────────────────────────────────
62
+
63
+ // Condition 1: full URL
64
+ {
65
+ const e = parseMessageToEvent('OkHttp: GET https://api.example.com/v1/login 200')
66
+ assert.ok(e !== null, 'full URL line should emit')
67
+ assert.strictEqual(e!.endpoint, '/v1/login')
68
+ assert.strictEqual(e!.method, 'GET')
69
+ assert.strictEqual(e!.statusCode, 200)
70
+ assert.strictEqual(e!.status, 'success')
71
+ }
72
+
73
+ // Condition 2: explicit HTTP status line
74
+ {
75
+ const e = parseMessageToEvent('HTTP/1.1 404 Not Found')
76
+ assert.ok(e !== null, 'HTTP status line should emit')
77
+ assert.strictEqual(e!.statusCode, 404)
78
+ assert.strictEqual(e!.status, 'failure')
79
+ }
80
+
81
+ // Condition 3: method + path
82
+ {
83
+ const e = parseMessageToEvent('Sending POST /api/register HTTP/1.1')
84
+ assert.ok(e !== null, 'method+path line should emit')
85
+ assert.strictEqual(e!.method, 'POST')
86
+ assert.strictEqual(e!.endpoint, '/api/register')
87
+ assert.strictEqual(e!.statusCode, null)
88
+ assert.strictEqual(e!.status, 'success') // no error signal
89
+ }
90
+
91
+ // No criteria met — keyword-only noise
92
+ {
93
+ const e = parseMessageToEvent('HTTP connection pool initialised')
94
+ assert.strictEqual(e, null, 'keyword-only line should not emit')
95
+ }
96
+
97
+ {
98
+ const e = parseMessageToEvent('Request interceptor registered')
99
+ assert.strictEqual(e, null, 'generic Request line should not emit')
100
+ }
101
+
102
+ {
103
+ const e = parseMessageToEvent('Task 200 completed')
104
+ assert.strictEqual(e, null, 'bare status-like numbers should not emit')
105
+ }
106
+
107
+ {
108
+ const e = parseMessageToEvent('Response code: 404')
109
+ assert.strictEqual(e, null, 'labeled status without endpoint or HTTP context should not emit')
110
+ }
111
+
112
+ {
113
+ const e = parseMessageToEvent('GetBestInfo: /data/app/~~pkg/base.apk status=447')
114
+ assert.strictEqual(e, null, 'filesystem paths should not emit as network endpoints')
115
+ }
116
+
117
+ {
118
+ const e = parseMessageToEvent('system/gd/hci/le_address_manager.cc:576 GetNextPrivateAddressIntervalRange')
119
+ assert.strictEqual(e, null, 'source file paths should not emit as network endpoints')
120
+ }
121
+
122
+ {
123
+ const e = parseMessageToEvent('status=503 for /api/session/generate')
124
+ assert.ok(e !== null, 'status with plausible endpoint should emit')
125
+ assert.strictEqual(e!.endpoint, '/api/session/generate')
126
+ assert.strictEqual(e!.statusCode, 503)
127
+ assert.strictEqual(e!.status, 'retryable')
128
+ }
129
+
130
+ // Network error detection
131
+ {
132
+ const e = parseMessageToEvent('java.net.SocketTimeoutException: POST /api/data timed out after 30s')
133
+ assert.ok(e !== null, 'timeout error should emit')
134
+ assert.strictEqual(e!.networkError, 'timeout')
135
+ assert.strictEqual(e!.status, 'retryable')
136
+ }
137
+
138
+ {
139
+ const e = parseMessageToEvent('SSL handshake failed for https://api.example.com/v1/auth')
140
+ assert.ok(e !== null, 'TLS error should emit')
141
+ assert.strictEqual(e!.networkError, 'tls_error')
142
+ assert.strictEqual(e!.status, 'retryable')
143
+ }
144
+
145
+ {
146
+ const e = parseMessageToEvent('DNS resolution failed: GET /api/users')
147
+ assert.ok(e !== null, 'DNS error should emit')
148
+ assert.strictEqual(e!.networkError, 'dns_error')
149
+ assert.strictEqual(e!.status, 'retryable')
150
+ }
151
+
152
+ // 5xx → retryable even without networkError
153
+ {
154
+ const e = parseMessageToEvent('Response 503 for https://api.example.com/v1/data')
155
+ assert.ok(e !== null, '5xx should emit')
156
+ assert.strictEqual(e!.statusCode, 503)
157
+ assert.strictEqual(e!.status, 'retryable')
158
+ }
159
+
160
+ // ─── lastConsumedTimestamp dedupe ────────────────────────────────────────────
161
+
162
+ {
163
+ // Simulate: action happened 1000ms ago, last consumed 500ms ago → use consumed
164
+ _setTimestampsForTests(Date.now() - 1000, Date.now() - 500)
165
+ // We can't easily verify the sinceMs value from outside without deep mocking,
166
+ // but we can confirm getNetworkActivity resolves without throwing.
167
+ const promise = ToolsNetwork.getNetworkActivity({ platform: 'android' })
168
+ assert.ok(promise instanceof Promise, 'getNetworkActivity should return a Promise')
169
+ // Allow the promise to settle (logcat may fail in test env — that's fine)
170
+ promise.catch(() => {})
171
+ }
172
+
173
+ console.log('get_network_activity tests passed')
174
+ }
175
+
176
+ try {
177
+ run()
178
+ } catch (err) {
179
+ console.error(err)
180
+ process.exit(1)
181
+ }