explorbot 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/explorbot-cli.ts +2 -0
  2. package/boat/doc-collector/bin/doc-collector-cli.ts +5 -0
  3. package/boat/doc-collector/package.json +24 -0
  4. package/boat/doc-collector/src/ai/documentarian.ts +184 -0
  5. package/boat/doc-collector/src/cli.ts +119 -0
  6. package/boat/doc-collector/src/config.ts +162 -0
  7. package/boat/doc-collector/src/docbot.ts +391 -0
  8. package/boat/doc-collector/src/docs-renderer.ts +187 -0
  9. package/boat/doc-collector/src/path-filter.ts +46 -0
  10. package/boat/doc-collector/src/research-navigation.ts +90 -0
  11. package/dist/bin/explorbot-cli.js +2 -0
  12. package/dist/boat/doc-collector/bin/doc-collector-cli.js +4 -0
  13. package/dist/boat/doc-collector/src/ai/documentarian.js +157 -0
  14. package/dist/boat/doc-collector/src/cli.js +104 -0
  15. package/dist/boat/doc-collector/src/config.js +129 -0
  16. package/dist/boat/doc-collector/src/docbot.js +326 -0
  17. package/dist/boat/doc-collector/src/docs-renderer.js +141 -0
  18. package/dist/boat/doc-collector/src/path-filter.js +35 -0
  19. package/dist/boat/doc-collector/src/research-navigation.js +71 -0
  20. package/dist/package.json +4 -1
  21. package/dist/src/ai/researcher/coordinates.js +1 -1
  22. package/dist/src/ai/researcher/parser.js +3 -0
  23. package/dist/src/ai/researcher.js +2 -1
  24. package/dist/src/config.js +10 -3
  25. package/dist/src/explorer.js +14 -1
  26. package/dist/src/state-manager.js +3 -0
  27. package/dist/src/utils/url-matcher.js +5 -3
  28. package/dist/src/utils/web-element.js +3 -2
  29. package/package.json +4 -1
  30. package/src/ai/researcher/coordinates.ts +1 -1
  31. package/src/ai/researcher/parser.ts +3 -0
  32. package/src/ai/researcher.ts +2 -1
  33. package/src/config.ts +13 -3
  34. package/src/explorbot.ts +1 -0
  35. package/src/explorer.ts +12 -1
  36. package/src/state-manager.ts +4 -0
  37. package/src/utils/url-matcher.ts +5 -2
  38. package/src/utils/web-element.ts +3 -2
@@ -189,7 +189,20 @@ class Explorer {
189
189
  }
190
190
  await this.connectOrLaunchBrowser();
191
191
  const hasSession = this.options?.session && existsSync(this.options.session);
192
- const contextOptions = hasSession ? { storageState: this.options.session } : undefined;
192
+ const helperOptions = this.playwrightHelper.options || {};
193
+ // CodeceptJS skips _createContextPage when sessions/storageState are involved, so we
194
+ // build contextOptions ourselves. Most keys share a name with Playwright's
195
+ // BrowserContextOptions and are copied as-is; `emulate` must be flattened, `basicAuth`
196
+ // renamed to `httpCredentials`, and `storageState` comes from the --session flag.
197
+ const contextOptions = {
198
+ ...helperOptions,
199
+ };
200
+ if (helperOptions.emulate)
201
+ Object.assign(contextOptions, helperOptions.emulate);
202
+ if (helperOptions.basicAuth)
203
+ contextOptions.httpCredentials = helperOptions.basicAuth;
204
+ if (hasSession)
205
+ contextOptions.storageState = this.options.session;
193
206
  await this.playwrightHelper._createContextPage(contextOptions);
194
207
  await this.playwrightRecorder.start(this.playwrightHelper.browserContext);
195
208
  this.setupXhrCapture();
@@ -416,6 +416,9 @@ export class StateManager {
416
416
  }
417
417
  }
418
418
  export function normalizeUrl(url) {
419
+ if (url.startsWith('/')) {
420
+ return url.replace(/^\/+/, '').replace(/\/+$/g, '');
421
+ }
419
422
  try {
420
423
  const parsed = new URL(url, 'http://localhost');
421
424
  const path = parsed.pathname.replace(/^\/+|\/+$/g, '');
@@ -90,11 +90,13 @@ export function matchesUrl(pattern, path) {
90
90
  }
91
91
  }
92
92
  export function extractStatePath(url) {
93
- if (url.startsWith('/'))
94
- return url;
93
+ if (url.startsWith('/')) {
94
+ return `/${url.replace(/^\/+/, '')}`;
95
+ }
95
96
  try {
96
97
  const urlObj = new URL(url);
97
- return `${urlObj.pathname}${urlObj.search}${urlObj.hash}`;
98
+ const normalizedPathname = `/${urlObj.pathname.replace(/^\/+/, '')}`;
99
+ return `${normalizedPathname}${urlObj.search}${urlObj.hash}`;
98
100
  }
99
101
  catch {
100
102
  return url;
@@ -109,7 +109,8 @@ export class WebElement {
109
109
  return WebElement.fromPlaywrightLocator(page.locator(`[${EXPLORBOT_ATTRS.eidx}="${eidx}"]`));
110
110
  }
111
111
  static async fromEidxList(page, eidxList) {
112
- if (eidxList.length === 0)
112
+ const validEidxList = eidxList.filter((eidx) => /^e\d+$/i.test(eidx));
113
+ if (validEidxList.length === 0)
113
114
  return [];
114
115
  const rawList = await page.evaluate(([list, extractFnStr, config]) => {
115
116
  const extract = new Function(`return ${extractFnStr}`)();
@@ -123,7 +124,7 @@ export class WebElement {
123
124
  results.push(data);
124
125
  }
125
126
  return results;
126
- }, [eidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG]);
127
+ }, [validEidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG]);
127
128
  return rawList.map((d) => WebElement.fromRawData(d));
128
129
  }
129
130
  static async findByXPath(html, xpath) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "explorbot",
3
- "version": "0.1.17",
3
+ "version": "0.1.18",
4
4
  "description": "CLI app built with React Ink, CodeceptJS, and Playwright",
5
5
  "license": "Elastic-2.0",
6
6
  "type": "module",
@@ -20,6 +20,9 @@
20
20
  "src/**/*.tsx",
21
21
  "bin/**/*.ts",
22
22
  "boat/api-tester/src/**/*.ts",
23
+ "boat/doc-collector/src/**/*.ts",
24
+ "boat/doc-collector/bin/**/*.ts",
25
+ "boat/doc-collector/package.json",
23
26
  "rules/",
24
27
  "assets/sample-files/"
25
28
  ],
@@ -198,7 +198,7 @@ export function WithCoordinates<T extends Constructor>(Base: T) {
198
198
  const eidxWithoutCoords: string[] = [];
199
199
  for (const section of sections) {
200
200
  for (const el of section.elements) {
201
- if (el.eidx && !el.coordinates) eidxWithoutCoords.push(el.eidx);
201
+ if (el.eidx && /^e\d+$/i.test(el.eidx) && !el.coordinates) eidxWithoutCoords.push(el.eidx);
202
202
  }
203
203
  }
204
204
  if (eidxWithoutCoords.length === 0) return;
@@ -64,6 +64,9 @@ export function mapRowToElement(row: Record<string, string>): ResearchElement |
64
64
 
65
65
  let eidxRaw = (colMap.eidx || '').trim();
66
66
  if (eidxRaw && /^\d+$/.test(eidxRaw)) eidxRaw = `e${eidxRaw}`;
67
+ if (eidxRaw && !/^e\d+$/i.test(eidxRaw)) {
68
+ eidxRaw = '';
69
+ }
67
70
 
68
71
  const aria = parseAriaLocator(colMap.aria || '-');
69
72
 
@@ -121,7 +121,8 @@ export class Researcher extends ResearcherBase implements Agent {
121
121
 
122
122
  const sessionName = `researcher: ${state.url}`;
123
123
  return Observability.run(sessionName, { tags: ['researcher'], sessionId: stateHash }, async () => {
124
- tag('info').log(`Researching ${state.url} to understand the context...`);
124
+ const displayUrl = state.fullUrl || state.url;
125
+ tag('info').log(`Researching ${displayUrl} to understand the context...`);
125
126
  setActivity(`${this.emoji} Researching...`, 'action');
126
127
 
127
128
  await this.ensureNavigated(state.url, screenshot && this.provider.hasVision());
package/src/config.ts CHANGED
@@ -266,6 +266,7 @@ export class ConfigParser {
266
266
  private static instance: ConfigParser;
267
267
  private config: ExplorbotConfig | null = null;
268
268
  private configPath: string | null = null;
269
+ private runtimeBaseUrlOverride: string | null = null;
269
270
 
270
271
  private constructor() {}
271
272
 
@@ -285,8 +286,9 @@ export class ConfigParser {
285
286
  public async loadConfig(options?: {
286
287
  config?: string;
287
288
  path?: string;
289
+ baseUrl?: string;
288
290
  }): Promise<ExplorbotConfig> {
289
- if (this.config && !options?.config && !options?.path) {
291
+ if (this.config && !options?.config && !options?.path && this.runtimeBaseUrlOverride === (options?.baseUrl || null)) {
290
292
  return this.config;
291
293
  }
292
294
 
@@ -317,7 +319,8 @@ export class ConfigParser {
317
319
  throw new Error('Configuration file is empty or invalid');
318
320
  }
319
321
 
320
- this.config = this.resolveConfig(loadedConfig as ExplorbotConfig);
322
+ this.config = this.resolveConfig(loadedConfig as ExplorbotConfig, options);
323
+ this.runtimeBaseUrlOverride = options?.baseUrl || null;
321
324
  this.configPath = resolvedPath;
322
325
 
323
326
  log(`Configuration loaded from: ${resolvedPath}`);
@@ -372,6 +375,7 @@ export class ConfigParser {
372
375
  if (ConfigParser.instance) {
373
376
  ConfigParser.instance.config = null;
374
377
  ConfigParser.instance.configPath = null;
378
+ ConfigParser.instance.runtimeBaseUrlOverride = null;
375
379
  }
376
380
  }
377
381
 
@@ -455,11 +459,17 @@ export class ConfigParser {
455
459
  }
456
460
  }
457
461
 
458
- private resolveConfig(config: ExplorbotConfig): ExplorbotConfig {
462
+ private resolveConfig(config: ExplorbotConfig, options?: { baseUrl?: string }): ExplorbotConfig {
459
463
  if (config.web?.url && !config.playwright?.url) {
460
464
  config.playwright = config.playwright || { browser: 'chromium', url: '' };
461
465
  config.playwright.url = config.web.url;
462
466
  }
467
+
468
+ if (options?.baseUrl) {
469
+ config.playwright = config.playwright || { browser: 'chromium', url: '' };
470
+ config.playwright.url = options.baseUrl;
471
+ }
472
+
463
473
  return config;
464
474
  }
465
475
 
package/src/explorbot.ts CHANGED
@@ -34,6 +34,7 @@ import { sanitizeFilename } from './utils/strings.ts';
34
34
 
35
35
  export interface ExplorBotOptions {
36
36
  from?: string;
37
+ baseUrl?: string;
37
38
  verbose?: boolean;
38
39
  config?: string;
39
40
  path?: string;
package/src/explorer.ts CHANGED
@@ -8,6 +8,7 @@ import { createTest } from 'codeceptjs/lib/mocha/test';
8
8
  import { ActionResult } from './action-result.ts';
9
9
  import Action from './action.js';
10
10
  import { AIProvider } from './ai/provider.js';
11
+ import type { BrowserContextOptions } from 'playwright';
11
12
  import { visuallyAnnotateContainers } from './ai/researcher/coordinates.ts';
12
13
  import { RequestStore } from './api/request-store.ts';
13
14
  import { XhrCapture } from './api/xhr-capture.ts';
@@ -238,7 +239,17 @@ class Explorer {
238
239
  }
239
240
  await this.connectOrLaunchBrowser();
240
241
  const hasSession = this.options?.session && existsSync(this.options.session);
241
- const contextOptions = hasSession ? { storageState: this.options!.session } : undefined;
242
+ const helperOptions = this.playwrightHelper.options || {};
243
+ // CodeceptJS skips _createContextPage when sessions/storageState are involved, so we
244
+ // build contextOptions ourselves. Most keys share a name with Playwright's
245
+ // BrowserContextOptions and are copied as-is; `emulate` must be flattened, `basicAuth`
246
+ // renamed to `httpCredentials`, and `storageState` comes from the --session flag.
247
+ const contextOptions: BrowserContextOptions = {
248
+ ...helperOptions,
249
+ };
250
+ if (helperOptions.emulate) Object.assign(contextOptions, helperOptions.emulate);
251
+ if (helperOptions.basicAuth) contextOptions.httpCredentials = helperOptions.basicAuth;
252
+ if (hasSession) contextOptions.storageState = this.options!.session;
242
253
  await this.playwrightHelper._createContextPage(contextOptions);
243
254
  await this.playwrightRecorder.start(this.playwrightHelper.browserContext);
244
255
  this.setupXhrCapture();
@@ -547,6 +547,10 @@ export class StateManager {
547
547
  }
548
548
 
549
549
  export function normalizeUrl(url: string): string {
550
+ if (url.startsWith('/')) {
551
+ return url.replace(/^\/+/, '').replace(/\/+$/g, '');
552
+ }
553
+
550
554
  try {
551
555
  const parsed = new URL(url, 'http://localhost');
552
556
  const path = parsed.pathname.replace(/^\/+|\/+$/g, '');
@@ -82,10 +82,13 @@ export function matchesUrl(pattern: string, path: string): boolean {
82
82
  }
83
83
 
84
84
  export function extractStatePath(url: string): string {
85
- if (url.startsWith('/')) return url;
85
+ if (url.startsWith('/')) {
86
+ return `/${url.replace(/^\/+/, '')}`;
87
+ }
86
88
  try {
87
89
  const urlObj = new URL(url);
88
- return `${urlObj.pathname}${urlObj.search}${urlObj.hash}`;
90
+ const normalizedPathname = `/${urlObj.pathname.replace(/^\/+/, '')}`;
91
+ return `${normalizedPathname}${urlObj.search}${urlObj.hash}`;
89
92
  } catch {
90
93
  return url;
91
94
  }
@@ -122,7 +122,8 @@ export class WebElement {
122
122
  }
123
123
 
124
124
  static async fromEidxList(page: any, eidxList: string[]): Promise<WebElement[]> {
125
- if (eidxList.length === 0) return [];
125
+ const validEidxList = eidxList.filter((eidx) => /^e\d+$/i.test(eidx));
126
+ if (validEidxList.length === 0) return [];
126
127
 
127
128
  const rawList: RawElementData[] = await page.evaluate(
128
129
  ([list, extractFnStr, config]: [string[], string, ElementExtractionConfig]) => {
@@ -136,7 +137,7 @@ export class WebElement {
136
137
  }
137
138
  return results;
138
139
  },
139
- [eidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG] as [string[], string, ElementExtractionConfig]
140
+ [validEidxList, getElementDataExtractorSource(), ELEMENT_EXTRACTION_CONFIG] as [string[], string, ElementExtractionConfig]
140
141
  );
141
142
 
142
143
  return rawList.map((d) => WebElement.fromRawData(d));