gpt-driver-node 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +79 -9
- package/dist/index.d.cts +2 -0
- package/dist/index.mjs +79 -9
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -684,6 +684,7 @@ async function executeSmartLoop(ctx, params) {
|
|
|
684
684
|
screenshotResolution = await getImageDimensions(screenshot2);
|
|
685
685
|
}
|
|
686
686
|
let aiCommands = [];
|
|
687
|
+
let reasoning = [];
|
|
687
688
|
let actionDescription = [];
|
|
688
689
|
if (!isCacheHit) {
|
|
689
690
|
anyCacheMiss = true;
|
|
@@ -727,7 +728,9 @@ async function executeSmartLoop(ctx, params) {
|
|
|
727
728
|
actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
|
|
728
729
|
}
|
|
729
730
|
const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
|
|
731
|
+
reasoning = [];
|
|
730
732
|
if (reasoningIndex !== -1) {
|
|
733
|
+
reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
|
|
731
734
|
const parsedCommands = gptCommands.slice(reasoningIndex);
|
|
732
735
|
const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
|
|
733
736
|
if (rememberCommands.length > 0) {
|
|
@@ -742,7 +745,13 @@ async function executeSmartLoop(ctx, params) {
|
|
|
742
745
|
screenshot,
|
|
743
746
|
commands
|
|
744
747
|
});
|
|
745
|
-
const logPromise = ctx.
|
|
748
|
+
const logPromise = ctx.logAIExecution({
|
|
749
|
+
screenshot,
|
|
750
|
+
prompt: params.instruction,
|
|
751
|
+
commands: aiCommands,
|
|
752
|
+
reasoning,
|
|
753
|
+
fromCache: isCacheHit
|
|
754
|
+
});
|
|
746
755
|
if (ctx.pendingLogPromises) {
|
|
747
756
|
ctx.pendingLogPromises.push(logPromise);
|
|
748
757
|
}
|
|
@@ -1113,7 +1122,10 @@ class GptDriver {
|
|
|
1113
1122
|
device_config: {
|
|
1114
1123
|
platform: this.appiumSessionConfig?.platform ?? this.gptDriverCloudConfig?.platform,
|
|
1115
1124
|
device: this.appiumSessionConfig?.deviceName ?? this.gptDriverCloudConfig?.deviceName,
|
|
1116
|
-
os: this.appiumSessionConfig?.platformVersion ?? this.gptDriverCloudConfig?.platformVersion
|
|
1125
|
+
os: this.appiumSessionConfig?.platformVersion ?? this.gptDriverCloudConfig?.platformVersion,
|
|
1126
|
+
...this.appiumSessionConfig?.size && {
|
|
1127
|
+
screenResolution: `${this.appiumSessionConfig.size.width}x${this.appiumSessionConfig.size.height}`
|
|
1128
|
+
}
|
|
1117
1129
|
},
|
|
1118
1130
|
use_internal_virtual_device: this.useGptDriverCloud,
|
|
1119
1131
|
build_id: this.buildId,
|
|
@@ -1236,7 +1248,7 @@ ${"=".repeat(50)}`);
|
|
|
1236
1248
|
performSwipe: (params) => this.performSwipe(params),
|
|
1237
1249
|
performType: (text) => this.performType(text),
|
|
1238
1250
|
performPressEnter: () => this.performPressEnter(),
|
|
1239
|
-
|
|
1251
|
+
logAIExecution: async (params) => this.logAIExecution(params),
|
|
1240
1252
|
organisationId: this.organisationId,
|
|
1241
1253
|
middleLayerAssertFn: options?.middleLayerAssertFn,
|
|
1242
1254
|
pendingLogPromises: options?.pendingLogPromises
|
|
@@ -2194,7 +2206,13 @@ ${issues}`);
|
|
|
2194
2206
|
case "type": {
|
|
2195
2207
|
const text = this.interpolateTemplate(step.text, params);
|
|
2196
2208
|
globalLogger.info(`${prefix}: Type text`);
|
|
2197
|
-
this.
|
|
2209
|
+
this.pendingLogPromises.push(
|
|
2210
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2211
|
+
prompt: `type: text=${text}`,
|
|
2212
|
+
commands: ["Local type execution"],
|
|
2213
|
+
reasoning: ["Local type execution"]
|
|
2214
|
+
})
|
|
2215
|
+
);
|
|
2198
2216
|
await this.performType(text);
|
|
2199
2217
|
if (isFromFileRef) {
|
|
2200
2218
|
fileRefStepNumber++;
|
|
@@ -2205,7 +2223,13 @@ ${issues}`);
|
|
|
2205
2223
|
}
|
|
2206
2224
|
case "enter": {
|
|
2207
2225
|
globalLogger.info(`${prefix}: Press Enter`);
|
|
2208
|
-
this.
|
|
2226
|
+
this.pendingLogPromises.push(
|
|
2227
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2228
|
+
prompt: `press: Enter`,
|
|
2229
|
+
commands: ["Local press Enter execution"],
|
|
2230
|
+
reasoning: ["Local press Enter execution"]
|
|
2231
|
+
})
|
|
2232
|
+
);
|
|
2209
2233
|
await this.performPressEnter();
|
|
2210
2234
|
if (step.delayNextStep) {
|
|
2211
2235
|
await this._delay(step.delayNextStep);
|
|
@@ -2242,7 +2266,13 @@ ${issues}`);
|
|
|
2242
2266
|
}
|
|
2243
2267
|
} else {
|
|
2244
2268
|
globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
|
|
2245
|
-
this.
|
|
2269
|
+
this.pendingLogPromises.push(
|
|
2270
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2271
|
+
prompt: `${step.type}: direction=${step.direction}`,
|
|
2272
|
+
commands: [`Local ${step.type} execution`],
|
|
2273
|
+
reasoning: [`Local ${step.type} execution`]
|
|
2274
|
+
})
|
|
2275
|
+
);
|
|
2246
2276
|
await this.performSwipe({
|
|
2247
2277
|
direction: step.direction,
|
|
2248
2278
|
x1: step.x1,
|
|
@@ -2263,7 +2293,13 @@ ${issues}`);
|
|
|
2263
2293
|
}
|
|
2264
2294
|
case "zoom": {
|
|
2265
2295
|
globalLogger.info(`${prefix}: Zoom ${step.direction}`);
|
|
2266
|
-
this.
|
|
2296
|
+
this.pendingLogPromises.push(
|
|
2297
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2298
|
+
prompt: `zoom: direction=${step.direction}`,
|
|
2299
|
+
commands: [`Local zoom execution`],
|
|
2300
|
+
reasoning: [`Local zoom execution`]
|
|
2301
|
+
})
|
|
2302
|
+
);
|
|
2267
2303
|
if (isFromFileRef) {
|
|
2268
2304
|
fileRefStepNumber++;
|
|
2269
2305
|
} else {
|
|
@@ -2274,7 +2310,13 @@ ${issues}`);
|
|
|
2274
2310
|
case "scrollUntil": {
|
|
2275
2311
|
const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
|
|
2276
2312
|
globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
|
|
2277
|
-
this.
|
|
2313
|
+
this.pendingLogPromises.push(
|
|
2314
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2315
|
+
prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
|
|
2316
|
+
commands: [`Local scrollUntil execution`],
|
|
2317
|
+
reasoning: [`Local scrollUntil execution`]
|
|
2318
|
+
})
|
|
2319
|
+
);
|
|
2278
2320
|
await this.performScrollUntil({
|
|
2279
2321
|
direction: step.direction,
|
|
2280
2322
|
text: interpolatedText,
|
|
@@ -2293,7 +2335,13 @@ ${issues}`);
|
|
|
2293
2335
|
const bundleId = params["bundleId"];
|
|
2294
2336
|
const url = this.interpolateTemplate(step.url, params);
|
|
2295
2337
|
globalLogger.info(`${prefix}: Open deeplink ${url}`);
|
|
2296
|
-
this.
|
|
2338
|
+
this.pendingLogPromises.push(
|
|
2339
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2340
|
+
prompt: `openDeepLinkUrl: url=${url}`,
|
|
2341
|
+
commands: [`Local openDeepLinkUrl execution`],
|
|
2342
|
+
reasoning: [`Local openDeepLinkUrl execution`]
|
|
2343
|
+
})
|
|
2344
|
+
);
|
|
2297
2345
|
await this.openDeepLinkUrl({ url, package: pkg, bundleId });
|
|
2298
2346
|
break;
|
|
2299
2347
|
}
|
|
@@ -2414,6 +2462,28 @@ ${"=".repeat(50)}`);
|
|
|
2414
2462
|
globalLogger.error("Failed to log code execution", e);
|
|
2415
2463
|
}
|
|
2416
2464
|
}
|
|
2465
|
+
async logAIExecution(params) {
|
|
2466
|
+
try {
|
|
2467
|
+
await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
|
|
2468
|
+
api_key: this.apiKey,
|
|
2469
|
+
base64_screenshot: params.screenshot,
|
|
2470
|
+
prompt: params.prompt,
|
|
2471
|
+
reasoning: params.reasoning,
|
|
2472
|
+
commands: params.commands,
|
|
2473
|
+
from_cache: !!params.fromCache
|
|
2474
|
+
});
|
|
2475
|
+
} catch (e) {
|
|
2476
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2477
|
+
}
|
|
2478
|
+
}
|
|
2479
|
+
async takeScreenshotAndLogAIExecution(params) {
|
|
2480
|
+
try {
|
|
2481
|
+
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|
|
2482
|
+
await this.logAIExecution({ ...params, screenshot });
|
|
2483
|
+
} catch (e) {
|
|
2484
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2485
|
+
}
|
|
2486
|
+
}
|
|
2417
2487
|
async takeScreenshotAndLogCodeExecution(command) {
|
|
2418
2488
|
try {
|
|
2419
2489
|
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|
package/dist/index.d.cts
CHANGED
package/dist/index.mjs
CHANGED
|
@@ -682,6 +682,7 @@ async function executeSmartLoop(ctx, params) {
|
|
|
682
682
|
screenshotResolution = await getImageDimensions(screenshot2);
|
|
683
683
|
}
|
|
684
684
|
let aiCommands = [];
|
|
685
|
+
let reasoning = [];
|
|
685
686
|
let actionDescription = [];
|
|
686
687
|
if (!isCacheHit) {
|
|
687
688
|
anyCacheMiss = true;
|
|
@@ -725,7 +726,9 @@ async function executeSmartLoop(ctx, params) {
|
|
|
725
726
|
actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
|
|
726
727
|
}
|
|
727
728
|
const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
|
|
729
|
+
reasoning = [];
|
|
728
730
|
if (reasoningIndex !== -1) {
|
|
731
|
+
reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
|
|
729
732
|
const parsedCommands = gptCommands.slice(reasoningIndex);
|
|
730
733
|
const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
|
|
731
734
|
if (rememberCommands.length > 0) {
|
|
@@ -740,7 +743,13 @@ async function executeSmartLoop(ctx, params) {
|
|
|
740
743
|
screenshot,
|
|
741
744
|
commands
|
|
742
745
|
});
|
|
743
|
-
const logPromise = ctx.
|
|
746
|
+
const logPromise = ctx.logAIExecution({
|
|
747
|
+
screenshot,
|
|
748
|
+
prompt: params.instruction,
|
|
749
|
+
commands: aiCommands,
|
|
750
|
+
reasoning,
|
|
751
|
+
fromCache: isCacheHit
|
|
752
|
+
});
|
|
744
753
|
if (ctx.pendingLogPromises) {
|
|
745
754
|
ctx.pendingLogPromises.push(logPromise);
|
|
746
755
|
}
|
|
@@ -1111,7 +1120,10 @@ class GptDriver {
|
|
|
1111
1120
|
device_config: {
|
|
1112
1121
|
platform: this.appiumSessionConfig?.platform ?? this.gptDriverCloudConfig?.platform,
|
|
1113
1122
|
device: this.appiumSessionConfig?.deviceName ?? this.gptDriverCloudConfig?.deviceName,
|
|
1114
|
-
os: this.appiumSessionConfig?.platformVersion ?? this.gptDriverCloudConfig?.platformVersion
|
|
1123
|
+
os: this.appiumSessionConfig?.platformVersion ?? this.gptDriverCloudConfig?.platformVersion,
|
|
1124
|
+
...this.appiumSessionConfig?.size && {
|
|
1125
|
+
screenResolution: `${this.appiumSessionConfig.size.width}x${this.appiumSessionConfig.size.height}`
|
|
1126
|
+
}
|
|
1115
1127
|
},
|
|
1116
1128
|
use_internal_virtual_device: this.useGptDriverCloud,
|
|
1117
1129
|
build_id: this.buildId,
|
|
@@ -1234,7 +1246,7 @@ ${"=".repeat(50)}`);
|
|
|
1234
1246
|
performSwipe: (params) => this.performSwipe(params),
|
|
1235
1247
|
performType: (text) => this.performType(text),
|
|
1236
1248
|
performPressEnter: () => this.performPressEnter(),
|
|
1237
|
-
|
|
1249
|
+
logAIExecution: async (params) => this.logAIExecution(params),
|
|
1238
1250
|
organisationId: this.organisationId,
|
|
1239
1251
|
middleLayerAssertFn: options?.middleLayerAssertFn,
|
|
1240
1252
|
pendingLogPromises: options?.pendingLogPromises
|
|
@@ -2192,7 +2204,13 @@ ${issues}`);
|
|
|
2192
2204
|
case "type": {
|
|
2193
2205
|
const text = this.interpolateTemplate(step.text, params);
|
|
2194
2206
|
globalLogger.info(`${prefix}: Type text`);
|
|
2195
|
-
this.
|
|
2207
|
+
this.pendingLogPromises.push(
|
|
2208
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2209
|
+
prompt: `type: text=${text}`,
|
|
2210
|
+
commands: ["Local type execution"],
|
|
2211
|
+
reasoning: ["Local type execution"]
|
|
2212
|
+
})
|
|
2213
|
+
);
|
|
2196
2214
|
await this.performType(text);
|
|
2197
2215
|
if (isFromFileRef) {
|
|
2198
2216
|
fileRefStepNumber++;
|
|
@@ -2203,7 +2221,13 @@ ${issues}`);
|
|
|
2203
2221
|
}
|
|
2204
2222
|
case "enter": {
|
|
2205
2223
|
globalLogger.info(`${prefix}: Press Enter`);
|
|
2206
|
-
this.
|
|
2224
|
+
this.pendingLogPromises.push(
|
|
2225
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2226
|
+
prompt: `press: Enter`,
|
|
2227
|
+
commands: ["Local press Enter execution"],
|
|
2228
|
+
reasoning: ["Local press Enter execution"]
|
|
2229
|
+
})
|
|
2230
|
+
);
|
|
2207
2231
|
await this.performPressEnter();
|
|
2208
2232
|
if (step.delayNextStep) {
|
|
2209
2233
|
await this._delay(step.delayNextStep);
|
|
@@ -2240,7 +2264,13 @@ ${issues}`);
|
|
|
2240
2264
|
}
|
|
2241
2265
|
} else {
|
|
2242
2266
|
globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
|
|
2243
|
-
this.
|
|
2267
|
+
this.pendingLogPromises.push(
|
|
2268
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2269
|
+
prompt: `${step.type}: direction=${step.direction}`,
|
|
2270
|
+
commands: [`Local ${step.type} execution`],
|
|
2271
|
+
reasoning: [`Local ${step.type} execution`]
|
|
2272
|
+
})
|
|
2273
|
+
);
|
|
2244
2274
|
await this.performSwipe({
|
|
2245
2275
|
direction: step.direction,
|
|
2246
2276
|
x1: step.x1,
|
|
@@ -2261,7 +2291,13 @@ ${issues}`);
|
|
|
2261
2291
|
}
|
|
2262
2292
|
case "zoom": {
|
|
2263
2293
|
globalLogger.info(`${prefix}: Zoom ${step.direction}`);
|
|
2264
|
-
this.
|
|
2294
|
+
this.pendingLogPromises.push(
|
|
2295
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2296
|
+
prompt: `zoom: direction=${step.direction}`,
|
|
2297
|
+
commands: [`Local zoom execution`],
|
|
2298
|
+
reasoning: [`Local zoom execution`]
|
|
2299
|
+
})
|
|
2300
|
+
);
|
|
2265
2301
|
if (isFromFileRef) {
|
|
2266
2302
|
fileRefStepNumber++;
|
|
2267
2303
|
} else {
|
|
@@ -2272,7 +2308,13 @@ ${issues}`);
|
|
|
2272
2308
|
case "scrollUntil": {
|
|
2273
2309
|
const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
|
|
2274
2310
|
globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
|
|
2275
|
-
this.
|
|
2311
|
+
this.pendingLogPromises.push(
|
|
2312
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2313
|
+
prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
|
|
2314
|
+
commands: [`Local scrollUntil execution`],
|
|
2315
|
+
reasoning: [`Local scrollUntil execution`]
|
|
2316
|
+
})
|
|
2317
|
+
);
|
|
2276
2318
|
await this.performScrollUntil({
|
|
2277
2319
|
direction: step.direction,
|
|
2278
2320
|
text: interpolatedText,
|
|
@@ -2291,7 +2333,13 @@ ${issues}`);
|
|
|
2291
2333
|
const bundleId = params["bundleId"];
|
|
2292
2334
|
const url = this.interpolateTemplate(step.url, params);
|
|
2293
2335
|
globalLogger.info(`${prefix}: Open deeplink ${url}`);
|
|
2294
|
-
this.
|
|
2336
|
+
this.pendingLogPromises.push(
|
|
2337
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2338
|
+
prompt: `openDeepLinkUrl: url=${url}`,
|
|
2339
|
+
commands: [`Local openDeepLinkUrl execution`],
|
|
2340
|
+
reasoning: [`Local openDeepLinkUrl execution`]
|
|
2341
|
+
})
|
|
2342
|
+
);
|
|
2295
2343
|
await this.openDeepLinkUrl({ url, package: pkg, bundleId });
|
|
2296
2344
|
break;
|
|
2297
2345
|
}
|
|
@@ -2412,6 +2460,28 @@ ${"=".repeat(50)}`);
|
|
|
2412
2460
|
globalLogger.error("Failed to log code execution", e);
|
|
2413
2461
|
}
|
|
2414
2462
|
}
|
|
2463
|
+
async logAIExecution(params) {
|
|
2464
|
+
try {
|
|
2465
|
+
await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
|
|
2466
|
+
api_key: this.apiKey,
|
|
2467
|
+
base64_screenshot: params.screenshot,
|
|
2468
|
+
prompt: params.prompt,
|
|
2469
|
+
reasoning: params.reasoning,
|
|
2470
|
+
commands: params.commands,
|
|
2471
|
+
from_cache: !!params.fromCache
|
|
2472
|
+
});
|
|
2473
|
+
} catch (e) {
|
|
2474
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2475
|
+
}
|
|
2476
|
+
}
|
|
2477
|
+
async takeScreenshotAndLogAIExecution(params) {
|
|
2478
|
+
try {
|
|
2479
|
+
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|
|
2480
|
+
await this.logAIExecution({ ...params, screenshot });
|
|
2481
|
+
} catch (e) {
|
|
2482
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2483
|
+
}
|
|
2484
|
+
}
|
|
2415
2485
|
async takeScreenshotAndLogCodeExecution(command) {
|
|
2416
2486
|
try {
|
|
2417
2487
|
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|