gpt-driver-node 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -684,6 +684,7 @@ async function executeSmartLoop(ctx, params) {
684
684
  screenshotResolution = await getImageDimensions(screenshot2);
685
685
  }
686
686
  let aiCommands = [];
687
+ let reasoning = [];
687
688
  let actionDescription = [];
688
689
  if (!isCacheHit) {
689
690
  anyCacheMiss = true;
@@ -727,7 +728,9 @@ async function executeSmartLoop(ctx, params) {
727
728
  actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
728
729
  }
729
730
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
731
+ reasoning = [];
730
732
  if (reasoningIndex !== -1) {
733
+ reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
731
734
  const parsedCommands = gptCommands.slice(reasoningIndex);
732
735
  const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
733
736
  if (rememberCommands.length > 0) {
@@ -742,7 +745,13 @@ async function executeSmartLoop(ctx, params) {
742
745
  screenshot,
743
746
  commands
744
747
  });
745
- const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
748
+ const logPromise = ctx.logAIExecution({
749
+ screenshot,
750
+ prompt: params.instruction,
751
+ commands: aiCommands,
752
+ reasoning,
753
+ fromCache: isCacheHit
754
+ });
746
755
  if (ctx.pendingLogPromises) {
747
756
  ctx.pendingLogPromises.push(logPromise);
748
757
  }
@@ -1239,7 +1248,7 @@ ${"=".repeat(50)}`);
1239
1248
  performSwipe: (params) => this.performSwipe(params),
1240
1249
  performType: (text) => this.performType(text),
1241
1250
  performPressEnter: () => this.performPressEnter(),
1242
- logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1251
+ logAIExecution: async (params) => this.logAIExecution(params),
1243
1252
  organisationId: this.organisationId,
1244
1253
  middleLayerAssertFn: options?.middleLayerAssertFn,
1245
1254
  pendingLogPromises: options?.pendingLogPromises
@@ -2197,7 +2206,13 @@ ${issues}`);
2197
2206
  case "type": {
2198
2207
  const text = this.interpolateTemplate(step.text, params);
2199
2208
  globalLogger.info(`${prefix}: Type text`);
2200
- this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2209
+ this.pendingLogPromises.push(
2210
+ this.takeScreenshotAndLogAIExecution({
2211
+ prompt: `type: text=${text}`,
2212
+ commands: ["Local type execution"],
2213
+ reasoning: ["Local type execution"]
2214
+ })
2215
+ );
2201
2216
  await this.performType(text);
2202
2217
  if (isFromFileRef) {
2203
2218
  fileRefStepNumber++;
@@ -2208,7 +2223,13 @@ ${issues}`);
2208
2223
  }
2209
2224
  case "enter": {
2210
2225
  globalLogger.info(`${prefix}: Press Enter`);
2211
- this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2226
+ this.pendingLogPromises.push(
2227
+ this.takeScreenshotAndLogAIExecution({
2228
+ prompt: `press: Enter`,
2229
+ commands: ["Local press Enter execution"],
2230
+ reasoning: ["Local press Enter execution"]
2231
+ })
2232
+ );
2212
2233
  await this.performPressEnter();
2213
2234
  if (step.delayNextStep) {
2214
2235
  await this._delay(step.delayNextStep);
@@ -2245,7 +2266,13 @@ ${issues}`);
2245
2266
  }
2246
2267
  } else {
2247
2268
  globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
2248
- this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2269
+ this.pendingLogPromises.push(
2270
+ this.takeScreenshotAndLogAIExecution({
2271
+ prompt: `${step.type}: direction=${step.direction}`,
2272
+ commands: [`Local ${step.type} execution`],
2273
+ reasoning: [`Local ${step.type} execution`]
2274
+ })
2275
+ );
2249
2276
  await this.performSwipe({
2250
2277
  direction: step.direction,
2251
2278
  x1: step.x1,
@@ -2266,7 +2293,13 @@ ${issues}`);
2266
2293
  }
2267
2294
  case "zoom": {
2268
2295
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
2269
- this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2296
+ this.pendingLogPromises.push(
2297
+ this.takeScreenshotAndLogAIExecution({
2298
+ prompt: `zoom: direction=${step.direction}`,
2299
+ commands: [`Local zoom execution`],
2300
+ reasoning: [`Local zoom execution`]
2301
+ })
2302
+ );
2270
2303
  if (isFromFileRef) {
2271
2304
  fileRefStepNumber++;
2272
2305
  } else {
@@ -2277,7 +2310,13 @@ ${issues}`);
2277
2310
  case "scrollUntil": {
2278
2311
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
2279
2312
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
2280
- this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2313
+ this.pendingLogPromises.push(
2314
+ this.takeScreenshotAndLogAIExecution({
2315
+ prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
2316
+ commands: [`Local scrollUntil execution`],
2317
+ reasoning: [`Local scrollUntil execution`]
2318
+ })
2319
+ );
2281
2320
  await this.performScrollUntil({
2282
2321
  direction: step.direction,
2283
2322
  text: interpolatedText,
@@ -2296,7 +2335,13 @@ ${issues}`);
2296
2335
  const bundleId = params["bundleId"];
2297
2336
  const url = this.interpolateTemplate(step.url, params);
2298
2337
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
2299
- this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2338
+ this.pendingLogPromises.push(
2339
+ this.takeScreenshotAndLogAIExecution({
2340
+ prompt: `openDeepLinkUrl: url=${url}`,
2341
+ commands: [`Local openDeepLinkUrl execution`],
2342
+ reasoning: [`Local openDeepLinkUrl execution`]
2343
+ })
2344
+ );
2300
2345
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
2301
2346
  break;
2302
2347
  }
@@ -2417,6 +2462,28 @@ ${"=".repeat(50)}`);
2417
2462
  globalLogger.error("Failed to log code execution", e);
2418
2463
  }
2419
2464
  }
2465
+ async logAIExecution(params) {
2466
+ try {
2467
+ await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
2468
+ api_key: this.apiKey,
2469
+ base64_screenshot: params.screenshot,
2470
+ prompt: params.prompt,
2471
+ reasoning: params.reasoning,
2472
+ commands: params.commands,
2473
+ from_cache: !!params.fromCache
2474
+ });
2475
+ } catch (e) {
2476
+ globalLogger.error("Failed to log code execution", e);
2477
+ }
2478
+ }
2479
+ async takeScreenshotAndLogAIExecution(params) {
2480
+ try {
2481
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
2482
+ await this.logAIExecution({ ...params, screenshot });
2483
+ } catch (e) {
2484
+ globalLogger.error("Failed to log code execution", e);
2485
+ }
2486
+ }
2420
2487
  async takeScreenshotAndLogCodeExecution(command) {
2421
2488
  try {
2422
2489
  const screenshot = await this.getScreenshot(this.appiumSessionConfig);
package/dist/index.d.cts CHANGED
@@ -877,6 +877,8 @@ declare class GptDriver {
877
877
  private gptHandler;
878
878
  private executeCommand;
879
879
  private logCodeExecution;
880
+ private logAIExecution;
881
+ private takeScreenshotAndLogAIExecution;
880
882
  private takeScreenshotAndLogCodeExecution;
881
883
  }
882
884
 
package/dist/index.mjs CHANGED
@@ -682,6 +682,7 @@ async function executeSmartLoop(ctx, params) {
682
682
  screenshotResolution = await getImageDimensions(screenshot2);
683
683
  }
684
684
  let aiCommands = [];
685
+ let reasoning = [];
685
686
  let actionDescription = [];
686
687
  if (!isCacheHit) {
687
688
  anyCacheMiss = true;
@@ -725,7 +726,9 @@ async function executeSmartLoop(ctx, params) {
725
726
  actionDescription = gptCommands.slice(actionDescriptionIndex, actionDescriptionIndex + 1);
726
727
  }
727
728
  const reasoningIndex = gptCommands.findIndex((entry) => entry.startsWith("reasoning:"));
729
+ reasoning = [];
728
730
  if (reasoningIndex !== -1) {
731
+ reasoning = gptCommands.at(reasoningIndex).split("reasoning:").at(1).trim().split("- ");
729
732
  const parsedCommands = gptCommands.slice(reasoningIndex);
730
733
  const rememberCommands = parsedCommands.filter((cmd) => isRememberCommand(cmd));
731
734
  if (rememberCommands.length > 0) {
@@ -740,7 +743,13 @@ async function executeSmartLoop(ctx, params) {
740
743
  screenshot,
741
744
  commands
742
745
  });
743
- const logPromise = ctx.logCodeExecution(screenshot, commands.join("\n"), isCacheHit);
746
+ const logPromise = ctx.logAIExecution({
747
+ screenshot,
748
+ prompt: params.instruction,
749
+ commands: aiCommands,
750
+ reasoning,
751
+ fromCache: isCacheHit
752
+ });
744
753
  if (ctx.pendingLogPromises) {
745
754
  ctx.pendingLogPromises.push(logPromise);
746
755
  }
@@ -1237,7 +1246,7 @@ ${"=".repeat(50)}`);
1237
1246
  performSwipe: (params) => this.performSwipe(params),
1238
1247
  performType: (text) => this.performType(text),
1239
1248
  performPressEnter: () => this.performPressEnter(),
1240
- logCodeExecution: async (screenshot, command, isCacheHit) => this.logCodeExecution(screenshot, command, isCacheHit),
1249
+ logAIExecution: async (params) => this.logAIExecution(params),
1241
1250
  organisationId: this.organisationId,
1242
1251
  middleLayerAssertFn: options?.middleLayerAssertFn,
1243
1252
  pendingLogPromises: options?.pendingLogPromises
@@ -2195,7 +2204,13 @@ ${issues}`);
2195
2204
  case "type": {
2196
2205
  const text = this.interpolateTemplate(step.text, params);
2197
2206
  globalLogger.info(`${prefix}: Type text`);
2198
- this.takeScreenshotAndLogCodeExecution(`type: text=${text}`);
2207
+ this.pendingLogPromises.push(
2208
+ this.takeScreenshotAndLogAIExecution({
2209
+ prompt: `type: text=${text}`,
2210
+ commands: ["Local type execution"],
2211
+ reasoning: ["Local type execution"]
2212
+ })
2213
+ );
2199
2214
  await this.performType(text);
2200
2215
  if (isFromFileRef) {
2201
2216
  fileRefStepNumber++;
@@ -2206,7 +2221,13 @@ ${issues}`);
2206
2221
  }
2207
2222
  case "enter": {
2208
2223
  globalLogger.info(`${prefix}: Press Enter`);
2209
- this.takeScreenshotAndLogCodeExecution(`press: Enter`);
2224
+ this.pendingLogPromises.push(
2225
+ this.takeScreenshotAndLogAIExecution({
2226
+ prompt: `press: Enter`,
2227
+ commands: ["Local press Enter execution"],
2228
+ reasoning: ["Local press Enter execution"]
2229
+ })
2230
+ );
2210
2231
  await this.performPressEnter();
2211
2232
  if (step.delayNextStep) {
2212
2233
  await this._delay(step.delayNextStep);
@@ -2243,7 +2264,13 @@ ${issues}`);
2243
2264
  }
2244
2265
  } else {
2245
2266
  globalLogger.info(`${prefix}: ${step.type} ${step.direction}`);
2246
- this.takeScreenshotAndLogCodeExecution(`${step.type}: direction=${step.direction}`);
2267
+ this.pendingLogPromises.push(
2268
+ this.takeScreenshotAndLogAIExecution({
2269
+ prompt: `${step.type}: direction=${step.direction}`,
2270
+ commands: [`Local ${step.type} execution`],
2271
+ reasoning: [`Local ${step.type} execution`]
2272
+ })
2273
+ );
2247
2274
  await this.performSwipe({
2248
2275
  direction: step.direction,
2249
2276
  x1: step.x1,
@@ -2264,7 +2291,13 @@ ${issues}`);
2264
2291
  }
2265
2292
  case "zoom": {
2266
2293
  globalLogger.info(`${prefix}: Zoom ${step.direction}`);
2267
- this.takeScreenshotAndLogCodeExecution(`zoom: direction=${step.direction}`);
2294
+ this.pendingLogPromises.push(
2295
+ this.takeScreenshotAndLogAIExecution({
2296
+ prompt: `zoom: direction=${step.direction}`,
2297
+ commands: [`Local zoom execution`],
2298
+ reasoning: [`Local zoom execution`]
2299
+ })
2300
+ );
2268
2301
  if (isFromFileRef) {
2269
2302
  fileRefStepNumber++;
2270
2303
  } else {
@@ -2275,7 +2308,13 @@ ${issues}`);
2275
2308
  case "scrollUntil": {
2276
2309
  const interpolatedText = step.text != null ? this.interpolateTemplate(step.text, params) : void 0;
2277
2310
  globalLogger.info(`${prefix}: Scroll until ${interpolatedText ?? step.elementId}`);
2278
- this.takeScreenshotAndLogCodeExecution(`scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`);
2311
+ this.pendingLogPromises.push(
2312
+ this.takeScreenshotAndLogAIExecution({
2313
+ prompt: `scrollUntil: text=${interpolatedText}, elementId=${step.elementId}`,
2314
+ commands: [`Local scrollUntil execution`],
2315
+ reasoning: [`Local scrollUntil execution`]
2316
+ })
2317
+ );
2279
2318
  await this.performScrollUntil({
2280
2319
  direction: step.direction,
2281
2320
  text: interpolatedText,
@@ -2294,7 +2333,13 @@ ${issues}`);
2294
2333
  const bundleId = params["bundleId"];
2295
2334
  const url = this.interpolateTemplate(step.url, params);
2296
2335
  globalLogger.info(`${prefix}: Open deeplink ${url}`);
2297
- this.takeScreenshotAndLogCodeExecution(`openDeepLinkUrl: url=${url}`);
2336
+ this.pendingLogPromises.push(
2337
+ this.takeScreenshotAndLogAIExecution({
2338
+ prompt: `openDeepLinkUrl: url=${url}`,
2339
+ commands: [`Local openDeepLinkUrl execution`],
2340
+ reasoning: [`Local openDeepLinkUrl execution`]
2341
+ })
2342
+ );
2298
2343
  await this.openDeepLinkUrl({ url, package: pkg, bundleId });
2299
2344
  break;
2300
2345
  }
@@ -2415,6 +2460,28 @@ ${"=".repeat(50)}`);
2415
2460
  globalLogger.error("Failed to log code execution", e);
2416
2461
  }
2417
2462
  }
2463
+ async logAIExecution(params) {
2464
+ try {
2465
+ await axios.post(`${this.gptDriverBaseUrl}/sessions/${this.gptDriverSessionId}/log_ai_execution`, {
2466
+ api_key: this.apiKey,
2467
+ base64_screenshot: params.screenshot,
2468
+ prompt: params.prompt,
2469
+ reasoning: params.reasoning,
2470
+ commands: params.commands,
2471
+ from_cache: !!params.fromCache
2472
+ });
2473
+ } catch (e) {
2474
+ globalLogger.error("Failed to log code execution", e);
2475
+ }
2476
+ }
2477
+ async takeScreenshotAndLogAIExecution(params) {
2478
+ try {
2479
+ const screenshot = await this.getScreenshot(this.appiumSessionConfig);
2480
+ await this.logAIExecution({ ...params, screenshot });
2481
+ } catch (e) {
2482
+ globalLogger.error("Failed to log code execution", e);
2483
+ }
2484
+ }
2418
2485
  async takeScreenshotAndLogCodeExecution(command) {
2419
2486
  try {
2420
2487
  const screenshot = await this.getScreenshot(this.appiumSessionConfig);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gpt-driver-node",
3
- "version": "1.0.10",
3
+ "version": "1.0.11",
4
4
  "main": "./dist/index.cjs",
5
5
  "module": "./dist/index.mjs",
6
6
  "types": "./dist/index.d.cts",