gpt-driver-node 1.0.10 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +75 -8
- package/dist/index.d.cts +2 -0
- package/dist/index.mjs +75 -8
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -684,6 +684,7 @@ async function executeSmartLoop(ctx, params) {
|
|
|
684
684
|
screenshotResolution = await getImageDimensions(screenshot2);
|
|
685
685
|
}
|
|
686
686
|
let aiCommands = [];
|
|
687
|
+
let reasoning = [];
|
|
687
688
|
let actionDescription = [];
|
|
688
689
|
if (!isCacheHit) {
|
|
689
690
|
anyCacheMiss = true;
|
|
@@ -727,7 +728,9 @@ async function executeSmartLoop(ctx, params) {
|
|
|
727
728
|
actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
|
|
728
729
|
}
|
|
729
730
|
const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
|
|
731
|
+
reasoning = [];
|
|
730
732
|
if (reasoningIndex !== -1) {
|
|
733
|
+
reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
|
|
731
734
|
const parsedCommands = gptCommands.slice(reasoningIndex);
|
|
732
735
|
const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
|
|
733
736
|
if (rememberCommands.length > 0) {
|
|
@@ -742,7 +745,13 @@ async function executeSmartLoop(ctx, params) {
|
|
|
742
745
|
screenshot,
|
|
743
746
|
commands
|
|
744
747
|
});
|
|
745
|
-
const logPromise = ctx.
|
|
748
|
+
const logPromise = ctx.logAIExecution({
|
|
749
|
+
screenshot,
|
|
750
|
+
prompt: params.instruction,
|
|
751
|
+
commands: aiCommands,
|
|
752
|
+
reasoning,
|
|
753
|
+
fromCache: isCacheHit
|
|
754
|
+
});
|
|
746
755
|
if (ctx.pendingLogPromises) {
|
|
747
756
|
ctx.pendingLogPromises.push(logPromise);
|
|
748
757
|
}
|
|
@@ -1239,7 +1248,7 @@ ${"=".repeat(50)}`);
|
|
|
1239
1248
|
performSwipe: (params) => this.performSwipe(params),
|
|
1240
1249
|
performType: (text) => this.performType(text),
|
|
1241
1250
|
performPressEnter: () => this.performPressEnter(),
|
|
1242
|
-
|
|
1251
|
+
logAIExecution: async (params) => this.logAIExecution(params),
|
|
1243
1252
|
organisationId: this.organisationId,
|
|
1244
1253
|
middleLayerAssertFn: options?.middleLayerAssertFn,
|
|
1245
1254
|
pendingLogPromises: options?.pendingLogPromises
|
|
@@ -2197,7 +2206,13 @@ ${issues}`);
|
|
|
2197
2206
|
case "type": {
|
|
2198
2207
|
const text = this.interpolateTemplate(step.text, params);
|
|
2199
2208
|
globalLogger.info(`${prefix}: Type text`);
|
|
2200
|
-
this.
|
|
2209
|
+
this.pendingLogPromises.push(
|
|
2210
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2211
|
+
prompt: `type: text=${text}`,
|
|
2212
|
+
commands: ["Local type execution"],
|
|
2213
|
+
reasoning: ["Local type execution"]
|
|
2214
|
+
})
|
|
2215
|
+
);
|
|
2201
2216
|
await this.performType(text);
|
|
2202
2217
|
if (isFromFileRef) {
|
|
2203
2218
|
fileRefStepNumber++;
|
|
@@ -2208,7 +2223,13 @@ ${issues}`);
|
|
|
2208
2223
|
}
|
|
2209
2224
|
case "enter": {
|
|
2210
2225
|
globalLogger.info(`${prefix}: Press Enter`);
|
|
2211
|
-
this.
|
|
2226
|
+
this.pendingLogPromises.push(
|
|
2227
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2228
|
+
prompt: `press: Enter`,
|
|
2229
|
+
commands: ["Local press Enter execution"],
|
|
2230
|
+
reasoning: ["Local press Enter execution"]
|
|
2231
|
+
})
|
|
2232
|
+
);
|
|
2212
2233
|
await this.performPressEnter();
|
|
2213
2234
|
if (step.delayNextStep) {
|
|
2214
2235
|
await this._delay(step.delayNextStep);
|
|
@@ -2245,7 +2266,13 @@ ${issues}`);
|
|
|
2245
2266
|
}
|
|
2246
2267
|
} else {
|
|
2247
2268
|
globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
|
|
2248
|
-
this.
|
|
2269
|
+
this.pendingLogPromises.push(
|
|
2270
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2271
|
+
prompt: `${step.type}: direction=${step.direction}`,
|
|
2272
|
+
commands: [`Local ${step.type} execution`],
|
|
2273
|
+
reasoning: [`Local ${step.type} execution`]
|
|
2274
|
+
})
|
|
2275
|
+
);
|
|
2249
2276
|
await this.performSwipe({
|
|
2250
2277
|
direction: step.direction,
|
|
2251
2278
|
x1: step.x1,
|
|
@@ -2266,7 +2293,13 @@ ${issues}`);
|
|
|
2266
2293
|
}
|
|
2267
2294
|
case "zoom": {
|
|
2268
2295
|
globalLogger.info(`${prefix}: Zoom ${step.direction}`);
|
|
2269
|
-
this.
|
|
2296
|
+
this.pendingLogPromises.push(
|
|
2297
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2298
|
+
prompt: `zoom: direction=${step.direction}`,
|
|
2299
|
+
commands: [`Local zoom execution`],
|
|
2300
|
+
reasoning: [`Local zoom execution`]
|
|
2301
|
+
})
|
|
2302
|
+
);
|
|
2270
2303
|
if (isFromFileRef) {
|
|
2271
2304
|
fileRefStepNumber++;
|
|
2272
2305
|
} else {
|
|
@@ -2277,7 +2310,13 @@ ${issues}`);
|
|
|
2277
2310
|
case "scrollUntil": {
|
|
2278
2311
|
const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
|
|
2279
2312
|
globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
|
|
2280
|
-
this.
|
|
2313
|
+
this.pendingLogPromises.push(
|
|
2314
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2315
|
+
prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
|
|
2316
|
+
commands: [`Local scrollUntil execution`],
|
|
2317
|
+
reasoning: [`Local scrollUntil execution`]
|
|
2318
|
+
})
|
|
2319
|
+
);
|
|
2281
2320
|
await this.performScrollUntil({
|
|
2282
2321
|
direction: step.direction,
|
|
2283
2322
|
text: interpolatedText,
|
|
@@ -2296,7 +2335,13 @@ ${issues}`);
|
|
|
2296
2335
|
const bundleId = params["bundleId"];
|
|
2297
2336
|
const url = this.interpolateTemplate(step.url, params);
|
|
2298
2337
|
globalLogger.info(`${prefix}: Open deeplink ${url}`);
|
|
2299
|
-
this.
|
|
2338
|
+
this.pendingLogPromises.push(
|
|
2339
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2340
|
+
prompt: `openDeepLinkUrl: url=${url}`,
|
|
2341
|
+
commands: [`Local openDeepLinkUrl execution`],
|
|
2342
|
+
reasoning: [`Local openDeepLinkUrl execution`]
|
|
2343
|
+
})
|
|
2344
|
+
);
|
|
2300
2345
|
await this.openDeepLinkUrl({ url, package: pkg, bundleId });
|
|
2301
2346
|
break;
|
|
2302
2347
|
}
|
|
@@ -2417,6 +2462,28 @@ ${"=".repeat(50)}`);
|
|
|
2417
2462
|
globalLogger.error("Failed to log code execution", e);
|
|
2418
2463
|
}
|
|
2419
2464
|
}
|
|
2465
|
+
async logAIExecution(params) {
|
|
2466
|
+
try {
|
|
2467
|
+
await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
|
|
2468
|
+
api_key: this.apiKey,
|
|
2469
|
+
base64_screenshot: params.screenshot,
|
|
2470
|
+
prompt: params.prompt,
|
|
2471
|
+
reasoning: params.reasoning,
|
|
2472
|
+
commands: params.commands,
|
|
2473
|
+
from_cache: !!params.fromCache
|
|
2474
|
+
});
|
|
2475
|
+
} catch (e) {
|
|
2476
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2477
|
+
}
|
|
2478
|
+
}
|
|
2479
|
+
async takeScreenshotAndLogAIExecution(params) {
|
|
2480
|
+
try {
|
|
2481
|
+
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|
|
2482
|
+
await this.logAIExecution({ ...params, screenshot });
|
|
2483
|
+
} catch (e) {
|
|
2484
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2485
|
+
}
|
|
2486
|
+
}
|
|
2420
2487
|
async takeScreenshotAndLogCodeExecution(command) {
|
|
2421
2488
|
try {
|
|
2422
2489
|
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|
package/dist/index.d.cts
CHANGED
package/dist/index.mjs
CHANGED
|
@@ -682,6 +682,7 @@ async function executeSmartLoop(ctx, params) {
|
|
|
682
682
|
screenshotResolution = await getImageDimensions(screenshot2);
|
|
683
683
|
}
|
|
684
684
|
let aiCommands = [];
|
|
685
|
+
let reasoning = [];
|
|
685
686
|
let actionDescription = [];
|
|
686
687
|
if (!isCacheHit) {
|
|
687
688
|
anyCacheMiss = true;
|
|
@@ -725,7 +726,9 @@ async function executeSmartLoop(ctx, params) {
|
|
|
725
726
|
actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
|
|
726
727
|
}
|
|
727
728
|
const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
|
|
729
|
+
reasoning = [];
|
|
728
730
|
if (reasoningIndex !== -1) {
|
|
731
|
+
reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
|
|
729
732
|
const parsedCommands = gptCommands.slice(reasoningIndex);
|
|
730
733
|
const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
|
|
731
734
|
if (rememberCommands.length > 0) {
|
|
@@ -740,7 +743,13 @@ async function executeSmartLoop(ctx, params) {
|
|
|
740
743
|
screenshot,
|
|
741
744
|
commands
|
|
742
745
|
});
|
|
743
|
-
const logPromise = ctx.
|
|
746
|
+
const logPromise = ctx.logAIExecution({
|
|
747
|
+
screenshot,
|
|
748
|
+
prompt: params.instruction,
|
|
749
|
+
commands: aiCommands,
|
|
750
|
+
reasoning,
|
|
751
|
+
fromCache: isCacheHit
|
|
752
|
+
});
|
|
744
753
|
if (ctx.pendingLogPromises) {
|
|
745
754
|
ctx.pendingLogPromises.push(logPromise);
|
|
746
755
|
}
|
|
@@ -1237,7 +1246,7 @@ ${"=".repeat(50)}`);
|
|
|
1237
1246
|
performSwipe: (params) => this.performSwipe(params),
|
|
1238
1247
|
performType: (text) => this.performType(text),
|
|
1239
1248
|
performPressEnter: () => this.performPressEnter(),
|
|
1240
|
-
|
|
1249
|
+
logAIExecution: async (params) => this.logAIExecution(params),
|
|
1241
1250
|
organisationId: this.organisationId,
|
|
1242
1251
|
middleLayerAssertFn: options?.middleLayerAssertFn,
|
|
1243
1252
|
pendingLogPromises: options?.pendingLogPromises
|
|
@@ -2195,7 +2204,13 @@ ${issues}`);
|
|
|
2195
2204
|
case "type": {
|
|
2196
2205
|
const text = this.interpolateTemplate(step.text, params);
|
|
2197
2206
|
globalLogger.info(`${prefix}: Type text`);
|
|
2198
|
-
this.
|
|
2207
|
+
this.pendingLogPromises.push(
|
|
2208
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2209
|
+
prompt: `type: text=${text}`,
|
|
2210
|
+
commands: ["Local type execution"],
|
|
2211
|
+
reasoning: ["Local type execution"]
|
|
2212
|
+
})
|
|
2213
|
+
);
|
|
2199
2214
|
await this.performType(text);
|
|
2200
2215
|
if (isFromFileRef) {
|
|
2201
2216
|
fileRefStepNumber++;
|
|
@@ -2206,7 +2221,13 @@ ${issues}`);
|
|
|
2206
2221
|
}
|
|
2207
2222
|
case "enter": {
|
|
2208
2223
|
globalLogger.info(`${prefix}: Press Enter`);
|
|
2209
|
-
this.
|
|
2224
|
+
this.pendingLogPromises.push(
|
|
2225
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2226
|
+
prompt: `press: Enter`,
|
|
2227
|
+
commands: ["Local press Enter execution"],
|
|
2228
|
+
reasoning: ["Local press Enter execution"]
|
|
2229
|
+
})
|
|
2230
|
+
);
|
|
2210
2231
|
await this.performPressEnter();
|
|
2211
2232
|
if (step.delayNextStep) {
|
|
2212
2233
|
await this._delay(step.delayNextStep);
|
|
@@ -2243,7 +2264,13 @@ ${issues}`);
|
|
|
2243
2264
|
}
|
|
2244
2265
|
} else {
|
|
2245
2266
|
globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
|
|
2246
|
-
this.
|
|
2267
|
+
this.pendingLogPromises.push(
|
|
2268
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2269
|
+
prompt: `${step.type}: direction=${step.direction}`,
|
|
2270
|
+
commands: [`Local ${step.type} execution`],
|
|
2271
|
+
reasoning: [`Local ${step.type} execution`]
|
|
2272
|
+
})
|
|
2273
|
+
);
|
|
2247
2274
|
await this.performSwipe({
|
|
2248
2275
|
direction: step.direction,
|
|
2249
2276
|
x1: step.x1,
|
|
@@ -2264,7 +2291,13 @@ ${issues}`);
|
|
|
2264
2291
|
}
|
|
2265
2292
|
case "zoom": {
|
|
2266
2293
|
globalLogger.info(`${prefix}: Zoom ${step.direction}`);
|
|
2267
|
-
this.
|
|
2294
|
+
this.pendingLogPromises.push(
|
|
2295
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2296
|
+
prompt: `zoom: direction=${step.direction}`,
|
|
2297
|
+
commands: [`Local zoom execution`],
|
|
2298
|
+
reasoning: [`Local zoom execution`]
|
|
2299
|
+
})
|
|
2300
|
+
);
|
|
2268
2301
|
if (isFromFileRef) {
|
|
2269
2302
|
fileRefStepNumber++;
|
|
2270
2303
|
} else {
|
|
@@ -2275,7 +2308,13 @@ ${issues}`);
|
|
|
2275
2308
|
case "scrollUntil": {
|
|
2276
2309
|
const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
|
|
2277
2310
|
globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
|
|
2278
|
-
this.
|
|
2311
|
+
this.pendingLogPromises.push(
|
|
2312
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2313
|
+
prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
|
|
2314
|
+
commands: [`Local scrollUntil execution`],
|
|
2315
|
+
reasoning: [`Local scrollUntil execution`]
|
|
2316
|
+
})
|
|
2317
|
+
);
|
|
2279
2318
|
await this.performScrollUntil({
|
|
2280
2319
|
direction: step.direction,
|
|
2281
2320
|
text: interpolatedText,
|
|
@@ -2294,7 +2333,13 @@ ${issues}`);
|
|
|
2294
2333
|
const bundleId = params["bundleId"];
|
|
2295
2334
|
const url = this.interpolateTemplate(step.url, params);
|
|
2296
2335
|
globalLogger.info(`${prefix}: Open deeplink ${url}`);
|
|
2297
|
-
this.
|
|
2336
|
+
this.pendingLogPromises.push(
|
|
2337
|
+
this.takeScreenshotAndLogAIExecution({
|
|
2338
|
+
prompt: `openDeepLinkUrl: url=${url}`,
|
|
2339
|
+
commands: [`Local openDeepLinkUrl execution`],
|
|
2340
|
+
reasoning: [`Local openDeepLinkUrl execution`]
|
|
2341
|
+
})
|
|
2342
|
+
);
|
|
2298
2343
|
await this.openDeepLinkUrl({ url, package: pkg, bundleId });
|
|
2299
2344
|
break;
|
|
2300
2345
|
}
|
|
@@ -2415,6 +2460,28 @@ ${"=".repeat(50)}`);
|
|
|
2415
2460
|
globalLogger.error("Failed to log code execution", e);
|
|
2416
2461
|
}
|
|
2417
2462
|
}
|
|
2463
|
+
async logAIExecution(params) {
|
|
2464
|
+
try {
|
|
2465
|
+
await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
|
|
2466
|
+
api_key: this.apiKey,
|
|
2467
|
+
base64_screenshot: params.screenshot,
|
|
2468
|
+
prompt: params.prompt,
|
|
2469
|
+
reasoning: params.reasoning,
|
|
2470
|
+
commands: params.commands,
|
|
2471
|
+
from_cache: !!params.fromCache
|
|
2472
|
+
});
|
|
2473
|
+
} catch (e) {
|
|
2474
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2475
|
+
}
|
|
2476
|
+
}
|
|
2477
|
+
async takeScreenshotAndLogAIExecution(params) {
|
|
2478
|
+
try {
|
|
2479
|
+
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|
|
2480
|
+
await this.logAIExecution({ ...params, screenshot });
|
|
2481
|
+
} catch (e) {
|
|
2482
|
+
globalLogger.error("Failed to log code execution", e);
|
|
2483
|
+
}
|
|
2484
|
+
}
|
|
2418
2485
|
async takeScreenshotAndLogCodeExecution(command) {
|
|
2419
2486
|
try {
|
|
2420
2487
|
const screenshot = await this.getScreenshot(this.appiumSessionConfig);
|