gologin-web-access 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/LICENSE +21 -0
  3. package/README.md +344 -0
  4. package/dist/cli.js +173 -0
  5. package/dist/commands/back.js +13 -0
  6. package/dist/commands/batch.js +81 -0
  7. package/dist/commands/batchChangeTrack.js +99 -0
  8. package/dist/commands/batchExtract.js +97 -0
  9. package/dist/commands/batchScrape.js +140 -0
  10. package/dist/commands/changeTrack.js +65 -0
  11. package/dist/commands/check.js +14 -0
  12. package/dist/commands/click.js +14 -0
  13. package/dist/commands/close.js +19 -0
  14. package/dist/commands/configInit.js +77 -0
  15. package/dist/commands/configShow.js +23 -0
  16. package/dist/commands/cookies.js +22 -0
  17. package/dist/commands/cookiesClear.js +13 -0
  18. package/dist/commands/cookiesImport.js +14 -0
  19. package/dist/commands/crawl.js +71 -0
  20. package/dist/commands/crawlErrors.js +20 -0
  21. package/dist/commands/crawlResult.js +27 -0
  22. package/dist/commands/crawlStart.js +56 -0
  23. package/dist/commands/crawlStatus.js +25 -0
  24. package/dist/commands/current.js +14 -0
  25. package/dist/commands/dblclick.js +14 -0
  26. package/dist/commands/eval.js +20 -0
  27. package/dist/commands/extract.js +44 -0
  28. package/dist/commands/fill.js +15 -0
  29. package/dist/commands/find.js +16 -0
  30. package/dist/commands/focus.js +14 -0
  31. package/dist/commands/forward.js +13 -0
  32. package/dist/commands/get.js +15 -0
  33. package/dist/commands/hover.js +14 -0
  34. package/dist/commands/jobs.js +47 -0
  35. package/dist/commands/map.js +61 -0
  36. package/dist/commands/open.js +22 -0
  37. package/dist/commands/parseDocument.js +34 -0
  38. package/dist/commands/pdf.js +14 -0
  39. package/dist/commands/press.js +15 -0
  40. package/dist/commands/read.js +51 -0
  41. package/dist/commands/reload.js +13 -0
  42. package/dist/commands/run.js +76 -0
  43. package/dist/commands/scrape.js +19 -0
  44. package/dist/commands/scrapeJson.js +24 -0
  45. package/dist/commands/scrapeMarkdown.js +37 -0
  46. package/dist/commands/scrapeScreenshot.js +65 -0
  47. package/dist/commands/scrapeText.js +37 -0
  48. package/dist/commands/screenshot.js +23 -0
  49. package/dist/commands/scroll.js +23 -0
  50. package/dist/commands/scrollIntoView.js +14 -0
  51. package/dist/commands/search.js +39 -0
  52. package/dist/commands/searchBrowser.js +28 -0
  53. package/dist/commands/select.js +15 -0
  54. package/dist/commands/sessions.js +14 -0
  55. package/dist/commands/shared.js +102 -0
  56. package/dist/commands/snapshot.js +18 -0
  57. package/dist/commands/storageClear.js +18 -0
  58. package/dist/commands/storageExport.js +26 -0
  59. package/dist/commands/storageImport.js +23 -0
  60. package/dist/commands/tabClose.js +18 -0
  61. package/dist/commands/tabFocus.js +15 -0
  62. package/dist/commands/tabOpen.js +19 -0
  63. package/dist/commands/tabs.js +13 -0
  64. package/dist/commands/type.js +15 -0
  65. package/dist/commands/uncheck.js +14 -0
  66. package/dist/commands/upload.js +15 -0
  67. package/dist/commands/wait.js +27 -0
  68. package/dist/config.js +260 -0
  69. package/dist/doctor.js +86 -0
  70. package/dist/internal-agent/cli.js +336 -0
  71. package/dist/internal-agent/commands/back.js +12 -0
  72. package/dist/internal-agent/commands/check.js +17 -0
  73. package/dist/internal-agent/commands/click.js +17 -0
  74. package/dist/internal-agent/commands/close.js +12 -0
  75. package/dist/internal-agent/commands/cookies.js +23 -0
  76. package/dist/internal-agent/commands/cookiesClear.js +12 -0
  77. package/dist/internal-agent/commands/cookiesImport.js +18 -0
  78. package/dist/internal-agent/commands/current.js +9 -0
  79. package/dist/internal-agent/commands/dblclick.js +17 -0
  80. package/dist/internal-agent/commands/doctor.js +53 -0
  81. package/dist/internal-agent/commands/eval.js +30 -0
  82. package/dist/internal-agent/commands/fill.js +18 -0
  83. package/dist/internal-agent/commands/find.js +86 -0
  84. package/dist/internal-agent/commands/focus.js +17 -0
  85. package/dist/internal-agent/commands/forward.js +12 -0
  86. package/dist/internal-agent/commands/get.js +19 -0
  87. package/dist/internal-agent/commands/hover.js +17 -0
  88. package/dist/internal-agent/commands/open.js +67 -0
  89. package/dist/internal-agent/commands/pdf.js +18 -0
  90. package/dist/internal-agent/commands/press.js +19 -0
  91. package/dist/internal-agent/commands/reload.js +12 -0
  92. package/dist/internal-agent/commands/screenshot.js +22 -0
  93. package/dist/internal-agent/commands/scroll.js +25 -0
  94. package/dist/internal-agent/commands/scrollIntoView.js +17 -0
  95. package/dist/internal-agent/commands/select.js +18 -0
  96. package/dist/internal-agent/commands/sessions.js +15 -0
  97. package/dist/internal-agent/commands/shared.js +51 -0
  98. package/dist/internal-agent/commands/snapshot.js +16 -0
  99. package/dist/internal-agent/commands/storageClear.js +13 -0
  100. package/dist/internal-agent/commands/storageExport.js +24 -0
  101. package/dist/internal-agent/commands/storageImport.js +20 -0
  102. package/dist/internal-agent/commands/tabClose.js +21 -0
  103. package/dist/internal-agent/commands/tabFocus.js +21 -0
  104. package/dist/internal-agent/commands/tabOpen.js +13 -0
  105. package/dist/internal-agent/commands/tabs.js +17 -0
  106. package/dist/internal-agent/commands/type.js +18 -0
  107. package/dist/internal-agent/commands/uncheck.js +17 -0
  108. package/dist/internal-agent/commands/upload.js +18 -0
  109. package/dist/internal-agent/commands/wait.js +41 -0
  110. package/dist/internal-agent/daemon/browser.js +818 -0
  111. package/dist/internal-agent/daemon/refStore.js +26 -0
  112. package/dist/internal-agent/daemon/server.js +330 -0
  113. package/dist/internal-agent/daemon/sessionManager.js +684 -0
  114. package/dist/internal-agent/daemon/snapshot.js +285 -0
  115. package/dist/internal-agent/lib/config.js +59 -0
  116. package/dist/internal-agent/lib/daemon.js +300 -0
  117. package/dist/internal-agent/lib/errors.js +63 -0
  118. package/dist/internal-agent/lib/types.js +2 -0
  119. package/dist/internal-agent/lib/utils.js +165 -0
  120. package/dist/jobRunner.js +56 -0
  121. package/dist/lib/agentCli.js +158 -0
  122. package/dist/lib/browserRead.js +125 -0
  123. package/dist/lib/browserStructured.js +77 -0
  124. package/dist/lib/changeTracking.js +117 -0
  125. package/dist/lib/cloudApi.js +41 -0
  126. package/dist/lib/concurrency.js +15 -0
  127. package/dist/lib/crawl.js +313 -0
  128. package/dist/lib/document.js +170 -0
  129. package/dist/lib/errors.js +55 -0
  130. package/dist/lib/extract.js +65 -0
  131. package/dist/lib/extractRunner.js +22 -0
  132. package/dist/lib/jobRegistry.js +164 -0
  133. package/dist/lib/output.js +122 -0
  134. package/dist/lib/readSource.js +297 -0
  135. package/dist/lib/runbooks.js +193 -0
  136. package/dist/lib/search.js +727 -0
  137. package/dist/lib/selfCli.js +136 -0
  138. package/dist/lib/structuredScrape.js +83 -0
  139. package/dist/lib/types.js +2 -0
  140. package/dist/lib/unlocker.js +383 -0
  141. package/package.json +67 -0
@@ -0,0 +1,71 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildCrawlCommand = buildCrawlCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const crawl_1 = require("../lib/crawl");
7
+ const output_1 = require("../lib/output");
8
+ function buildCrawlCommand() {
9
+ return new commander_1.Command("crawl")
10
+ .description("Crawl a website through Gologin Web Unlocker and return per-page extracted content.")
11
+ .argument("<url>", "Root website URL to crawl")
12
+ .option("--format <format>", "html, markdown, text, or json", "markdown")
13
+ .option("--limit <count>", "Maximum number of pages to visit", "25")
14
+ .option("--max-depth <depth>", "Maximum link depth from the root URL", "2")
15
+ .option("--concurrency <count>", "Number of concurrent requests", "4")
16
+ .option("--include-subdomains", "Include subdomains inside the crawl scope")
17
+ .option("--include <patterns>", "Comma-separated URL patterns to include")
18
+ .option("--exclude <patterns>", "Comma-separated URL patterns to exclude")
19
+ .option("--ignore-query", "Normalize URLs without query parameters")
20
+ .option("--sitemap <mode>", "include, only, or skip", "include")
21
+ .option("--only-main-content", "For html, markdown, or text output, isolate the most readable content block on each page")
22
+ .option("--strict", "Exit non-zero when any page fails during crawling")
23
+ .action(async (url, options) => {
24
+ const config = await (0, config_1.loadConfig)();
25
+ const apiKey = (0, config_1.requireWebUnlockerKey)(config);
26
+ const format = normalizeFormat(options.format);
27
+ const result = await (0, crawl_1.crawlSite)(url, apiKey, format, {
28
+ limit: normalizePositiveInt(options.limit, 25),
29
+ maxDepth: normalizeNonNegativeInt(options.maxDepth, 2),
30
+ concurrency: normalizePositiveInt(options.concurrency, 4),
31
+ includeSubdomains: Boolean(options.includeSubdomains),
32
+ includePatterns: splitPatterns(options.include),
33
+ excludePatterns: splitPatterns(options.exclude),
34
+ ignoreQueryParameters: Boolean(options.ignoreQuery),
35
+ sitemapMode: normalizeSitemapMode(options.sitemap),
36
+ onlyMainContent: Boolean(options.onlyMainContent),
37
+ });
38
+ (0, output_1.printJson)(result);
39
+ if (result.status === "failed" || (options.strict && result.failed > 0)) {
40
+ process.exitCode = 1;
41
+ }
42
+ });
43
+ }
44
+ function splitPatterns(value) {
45
+ return value
46
+ ? value
47
+ .split(",")
48
+ .map((pattern) => pattern.trim())
49
+ .filter(Boolean)
50
+ : [];
51
+ }
52
+ function normalizeSitemapMode(value) {
53
+ if (value === "include" || value === "only" || value === "skip") {
54
+ return value;
55
+ }
56
+ throw new Error(`Unsupported sitemap mode: ${value}`);
57
+ }
58
+ function normalizeFormat(value) {
59
+ if (value === "html" || value === "markdown" || value === "text" || value === "json") {
60
+ return value;
61
+ }
62
+ throw new Error(`Unsupported crawl format: ${value}`);
63
+ }
64
+ function normalizePositiveInt(value, fallback) {
65
+ const parsed = Number(value);
66
+ return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : fallback;
67
+ }
68
+ function normalizeNonNegativeInt(value, fallback) {
69
+ const parsed = Number(value);
70
+ return Number.isFinite(parsed) && parsed >= 0 ? Math.floor(parsed) : fallback;
71
+ }
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildCrawlErrorsCommand = buildCrawlErrorsCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const jobRegistry_1 = require("../lib/jobRegistry");
7
+ const output_1 = require("../lib/output");
8
+ function buildCrawlErrorsCommand() {
9
+ return new commander_1.Command("crawl-errors")
10
+ .description("Print stderr captured for a detached crawl job.")
11
+ .argument("<jobId>", "Crawl job identifier")
12
+ .action(async (jobId) => {
13
+ const config = await (0, config_1.loadConfig)();
14
+ const job = await (0, jobRegistry_1.getJob)(config, jobId);
15
+ if (!job) {
16
+ throw new Error(`Job ${jobId} not found`);
17
+ }
18
+ (0, output_1.printText)(await (0, jobRegistry_1.readJobErrors)(job));
19
+ });
20
+ }
@@ -0,0 +1,27 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildCrawlResultCommand = buildCrawlResultCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const jobRegistry_1 = require("../lib/jobRegistry");
7
+ const output_1 = require("../lib/output");
8
+ function buildCrawlResultCommand() {
9
+ return new commander_1.Command("crawl-result")
10
+ .description("Print the stored result of a detached crawl job.")
11
+ .argument("<jobId>", "Crawl job identifier")
12
+ .option("--json", "Print structured JSON result when available")
13
+ .action(async (jobId, options) => {
14
+ const config = await (0, config_1.loadConfig)();
15
+ const job = await (0, jobRegistry_1.getJob)(config, jobId);
16
+ if (!job) {
17
+ throw new Error(`Job ${jobId} not found`);
18
+ }
19
+ const result = await (0, jobRegistry_1.readJobResult)(config, job);
20
+ if (options.json && result !== undefined) {
21
+ (0, output_1.printJson)(result);
22
+ return;
23
+ }
24
+ const output = await (0, jobRegistry_1.readJobOutput)(job);
25
+ (0, output_1.printText)(output || (result !== undefined ? JSON.stringify(result, null, 2) : ""));
26
+ });
27
+ }
@@ -0,0 +1,56 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildCrawlStartCommand = buildCrawlStartCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const jobRegistry_1 = require("../lib/jobRegistry");
7
+ const selfCli_1 = require("../lib/selfCli");
8
+ function buildCrawlStartCommand() {
9
+ return new commander_1.Command("crawl-start")
10
+ .description("Start a detached crawl job and return a local job id.")
11
+ .argument("<url>", "Root website URL to crawl")
12
+ .option("--format <format>", "html, markdown, text, or json", "markdown")
13
+ .option("--limit <count>", "Maximum number of pages to visit", "25")
14
+ .option("--max-depth <depth>", "Maximum link depth from the root URL", "2")
15
+ .option("--concurrency <count>", "Number of concurrent requests", "4")
16
+ .option("--include-subdomains", "Include subdomains inside the crawl scope")
17
+ .option("--include <patterns>", "Comma-separated URL patterns to include")
18
+ .option("--exclude <patterns>", "Comma-separated URL patterns to exclude")
19
+ .option("--ignore-query", "Normalize URLs without query parameters")
20
+ .option("--sitemap <mode>", "include, only, or skip", "include")
21
+ .option("--only-main-content", "For html, markdown, or text output, isolate the most readable content block on each page")
22
+ .action(async (url, options) => {
23
+ const config = await (0, config_1.loadConfig)();
24
+ (0, config_1.requireWebUnlockerKey)(config);
25
+ const args = buildCrawlArgs(url, options);
26
+ const job = await (0, jobRegistry_1.createJob)(config, {
27
+ kind: "crawl",
28
+ name: url,
29
+ cwd: process.cwd(),
30
+ args
31
+ });
32
+ await (0, selfCli_1.spawnDetachedNodeInvocation)("jobRunner", [job.jobId, ...args], {
33
+ cwd: process.cwd()
34
+ });
35
+ process.stdout.write(`${job.jobId}\n`);
36
+ });
37
+ }
38
+ function buildCrawlArgs(url, options) {
39
+ const args = ["crawl", url, "--format", options.format, "--limit", options.limit, "--max-depth", options.maxDepth, "--concurrency", options.concurrency, "--sitemap", options.sitemap];
40
+ if (options.includeSubdomains) {
41
+ args.push("--include-subdomains");
42
+ }
43
+ if (options.include) {
44
+ args.push("--include", options.include);
45
+ }
46
+ if (options.exclude) {
47
+ args.push("--exclude", options.exclude);
48
+ }
49
+ if (options.ignoreQuery) {
50
+ args.push("--ignore-query");
51
+ }
52
+ if (options.onlyMainContent) {
53
+ args.push("--only-main-content");
54
+ }
55
+ return args;
56
+ }
@@ -0,0 +1,25 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildCrawlStatusCommand = buildCrawlStatusCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const jobRegistry_1 = require("../lib/jobRegistry");
7
+ const output_1 = require("../lib/output");
8
+ function buildCrawlStatusCommand() {
9
+ return new commander_1.Command("crawl-status")
10
+ .description("Show the status of a detached crawl job.")
11
+ .argument("<jobId>", "Crawl job identifier")
12
+ .option("--json", "Print JSON output")
13
+ .action(async (jobId, options) => {
14
+ const config = await (0, config_1.loadConfig)();
15
+ const job = await (0, jobRegistry_1.getJob)(config, jobId);
16
+ if (!job) {
17
+ throw new Error(`Job ${jobId} not found`);
18
+ }
19
+ if (options.json) {
20
+ (0, output_1.printJson)(job);
21
+ return;
22
+ }
23
+ (0, output_1.printText)(`job=${job.jobId}\nkind=${job.kind}\nstatus=${job.status}\nname=${job.name}`);
24
+ });
25
+ }
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildCurrentCommand = buildCurrentCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const agentCli_1 = require("../lib/agentCli");
7
+ function buildCurrentCommand() {
8
+ return new commander_1.Command("current")
9
+ .description("Show the current daemon-backed browser session.")
10
+ .action(async () => {
11
+ const config = await (0, config_1.loadConfig)();
12
+ await (0, agentCli_1.runAgentCommand)(["current"], config);
13
+ });
14
+ }
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildDoubleClickCommand = buildDoubleClickCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildDoubleClickCommand() {
7
+ const command = new commander_1.Command("dblclick")
8
+ .description("Double-click an element by snapshot ref.")
9
+ .argument("<ref>", "Snapshot ref, for example e3")
10
+ .action(async (ref, options) => {
11
+ await (0, shared_1.runBrowserCommand)(["dblclick", ref], { session: options.session });
12
+ });
13
+ return (0, shared_1.addSessionOption)(command);
14
+ }
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildEvalCommand = buildEvalCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildEvalCommand() {
7
+ const command = new commander_1.Command("eval")
8
+ .alias("js")
9
+ .description("Evaluate a JavaScript expression in the active Cloud Browser tab.")
10
+ .argument("<expression...>", "JavaScript expression")
11
+ .option("--json", "Print the evaluated value as JSON")
12
+ .action(async (expression, options) => {
13
+ const args = ["eval", expression.join(" ")];
14
+ if (options.json) {
15
+ args.push("--json");
16
+ }
17
+ await (0, shared_1.runBrowserCommand)(args, { session: options.session });
18
+ });
19
+ return (0, shared_1.addSessionOption)(command);
20
+ }
@@ -0,0 +1,44 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.buildExtractCommand = buildExtractCommand;
7
+ const fs_1 = require("fs");
8
+ const path_1 = __importDefault(require("path"));
9
+ const commander_1 = require("commander");
10
+ const config_1 = require("../config");
11
+ const extractRunner_1 = require("../lib/extractRunner");
12
+ const output_1 = require("../lib/output");
13
+ const readSource_1 = require("../lib/readSource");
14
+ const shared_1 = require("./shared");
15
+ function buildExtractCommand() {
16
+ return (0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("extract")
17
+ .description("Extract structured data from a page using a selector schema.")
18
+ .argument("<url>", "Target URL")
19
+ .requiredOption("--schema <path>", "Path to a JSON extraction schema")
20
+ .option("--output <path>", "Write extracted JSON to a file")
21
+ .option("--source <source>", "Read source: auto, unlocker, or browser", "auto")
22
+ .action(async (url, options) => {
23
+ const config = await (0, config_1.loadConfig)();
24
+ const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
25
+ const apiKey = source === "browser" ? "" : (0, config_1.requireWebUnlockerKey)(config);
26
+ const schema = await readSchema(path_1.default.resolve(options.schema));
27
+ const result = await (0, extractRunner_1.extractUrlWithSchema)(url, config, apiKey, schema, {
28
+ source,
29
+ request: (0, shared_1.normalizeUnlockerRequestOptions)(options),
30
+ });
31
+ if (options.output) {
32
+ const outputPath = path_1.default.resolve(options.output);
33
+ await fs_1.promises.mkdir(path_1.default.dirname(outputPath), { recursive: true });
34
+ await fs_1.promises.writeFile(outputPath, `${JSON.stringify(result.extracted, null, 2)}\n`, "utf8");
35
+ process.stdout.write(`${outputPath}\n`);
36
+ return;
37
+ }
38
+ (0, output_1.printJson)(result);
39
+ }));
40
+ }
41
+ async function readSchema(schemaPath) {
42
+ const raw = await fs_1.promises.readFile(schemaPath, "utf8");
43
+ return JSON.parse(raw);
44
+ }
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildFillCommand = buildFillCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildFillCommand() {
7
+ const command = new commander_1.Command("fill")
8
+ .description("Fill text into an element by snapshot ref.")
9
+ .argument("<ref>", "Snapshot ref, for example e2")
10
+ .argument("<text>", "Text to fill")
11
+ .action(async (ref, text, options) => {
12
+ await (0, shared_1.runBrowserCommand)(["fill", ref, text], { session: options.session });
13
+ });
14
+ return (0, shared_1.addSessionOption)(command);
15
+ }
@@ -0,0 +1,16 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildFindCommand = buildFindCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildFindCommand() {
7
+ const command = new commander_1.Command("find")
8
+ .description("Find elements semantically and act on them through the browser session.")
9
+ .allowUnknownOption(true)
10
+ .argument("<args...>", "Arguments forwarded to gologin-agent-browser find")
11
+ .action(async (args, options) => {
12
+ const finalArgs = ["find", ...args];
13
+ await (0, shared_1.runBrowserCommand)(finalArgs, { session: options.session });
14
+ });
15
+ return (0, shared_1.addSessionOption)(command);
16
+ }
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildFocusCommand = buildFocusCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildFocusCommand() {
7
+ const command = new commander_1.Command("focus")
8
+ .description("Focus an element by snapshot ref.")
9
+ .argument("<ref>", "Snapshot ref, for example e2")
10
+ .action(async (ref, options) => {
11
+ await (0, shared_1.runBrowserCommand)(["focus", ref], { session: options.session });
12
+ });
13
+ return (0, shared_1.addSessionOption)(command);
14
+ }
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildForwardCommand = buildForwardCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildForwardCommand() {
7
+ const command = new commander_1.Command("forward")
8
+ .description("Navigate forward in the active Cloud Browser tab history.")
9
+ .action(async (options) => {
10
+ await (0, shared_1.runBrowserCommand)(["forward"], { session: options.session });
11
+ });
12
+ return (0, shared_1.addSessionOption)(command);
13
+ }
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildGetCommand = buildGetCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildGetCommand() {
7
+ const command = new commander_1.Command("get")
8
+ .description("Get text, value, html, title, or url from the current page or a target.")
9
+ .argument("<kind>", "text, value, html, title, or url")
10
+ .argument("[target]", "Optional snapshot ref target")
11
+ .action(async (kind, target, options) => {
12
+ await (0, shared_1.runBrowserCommand)(target ? ["get", kind, target] : ["get", kind], { session: options.session });
13
+ });
14
+ return (0, shared_1.addSessionOption)(command);
15
+ }
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildHoverCommand = buildHoverCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildHoverCommand() {
7
+ const command = new commander_1.Command("hover")
8
+ .description("Hover an element by snapshot ref.")
9
+ .argument("<ref>", "Snapshot ref, for example e2")
10
+ .action(async (ref, options) => {
11
+ await (0, shared_1.runBrowserCommand)(["hover", ref], { session: options.session });
12
+ });
13
+ return (0, shared_1.addSessionOption)(command);
14
+ }
@@ -0,0 +1,47 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildJobsCommand = buildJobsCommand;
4
+ exports.buildJobCommand = buildJobCommand;
5
+ const commander_1 = require("commander");
6
+ const config_1 = require("../config");
7
+ const jobRegistry_1 = require("../lib/jobRegistry");
8
+ const output_1 = require("../lib/output");
9
+ function buildJobsCommand() {
10
+ return new commander_1.Command("jobs")
11
+ .description("List local web-access jobs such as crawl, run, and batch executions.")
12
+ .option("--json", "Print JSON output")
13
+ .action(async (options) => {
14
+ const config = await (0, config_1.loadConfig)();
15
+ const jobs = await (0, jobRegistry_1.listJobs)(config);
16
+ if (options.json) {
17
+ (0, output_1.printJson)(jobs);
18
+ return;
19
+ }
20
+ if (jobs.length === 0) {
21
+ (0, output_1.printText)("No jobs.");
22
+ return;
23
+ }
24
+ (0, output_1.printText)(jobs
25
+ .map((job) => `${job.jobId} ${job.kind} ${job.status} ${job.name}`)
26
+ .join("\n"));
27
+ });
28
+ }
29
+ function buildJobCommand() {
30
+ return new commander_1.Command("job")
31
+ .description("Inspect a specific local web-access job.")
32
+ .argument("<jobId>", "Job identifier")
33
+ .option("--json", "Print JSON output")
34
+ .action(async (jobId, options) => {
35
+ const config = await (0, config_1.loadConfig)();
36
+ const job = await (0, jobRegistry_1.getJob)(config, jobId);
37
+ if (!job) {
38
+ throw new Error(`Job ${jobId} not found`);
39
+ }
40
+ const result = await (0, jobRegistry_1.readJobResult)(config, job);
41
+ if (options.json) {
42
+ (0, output_1.printJson)({ ...job, result });
43
+ return;
44
+ }
45
+ (0, output_1.printText)(JSON.stringify({ ...job, result }, null, 2));
46
+ });
47
+ }
@@ -0,0 +1,61 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildMapCommand = buildMapCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const output_1 = require("../lib/output");
7
+ const crawl_1 = require("../lib/crawl");
8
+ function buildMapCommand() {
9
+ return new commander_1.Command("map")
10
+ .description("Discover internal website links through Gologin Web Unlocker.")
11
+ .argument("<url>", "Root website URL to map")
12
+ .option("--limit <count>", "Maximum number of pages to visit", "100")
13
+ .option("--max-depth <depth>", "Maximum link depth from the root URL", "2")
14
+ .option("--concurrency <count>", "Number of concurrent requests", "4")
15
+ .option("--include-subdomains", "Include subdomains inside the crawl scope")
16
+ .option("--include <patterns>", "Comma-separated URL patterns to include")
17
+ .option("--exclude <patterns>", "Comma-separated URL patterns to exclude")
18
+ .option("--ignore-query", "Normalize URLs without query parameters")
19
+ .option("--sitemap <mode>", "include, only, or skip", "include")
20
+ .option("--strict", "Exit non-zero when any page fails during mapping")
21
+ .action(async (url, options) => {
22
+ const config = await (0, config_1.loadConfig)();
23
+ const apiKey = (0, config_1.requireWebUnlockerKey)(config);
24
+ const result = await (0, crawl_1.mapSite)(url, apiKey, {
25
+ limit: normalizePositiveInt(options.limit, 100),
26
+ maxDepth: normalizeNonNegativeInt(options.maxDepth, 2),
27
+ concurrency: normalizePositiveInt(options.concurrency, 4),
28
+ includeSubdomains: Boolean(options.includeSubdomains),
29
+ includePatterns: splitPatterns(options.include),
30
+ excludePatterns: splitPatterns(options.exclude),
31
+ ignoreQueryParameters: Boolean(options.ignoreQuery),
32
+ sitemapMode: normalizeSitemapMode(options.sitemap),
33
+ });
34
+ (0, output_1.printJson)(result);
35
+ if (result.status === "failed" || (options.strict && result.failed > 0)) {
36
+ process.exitCode = 1;
37
+ }
38
+ });
39
+ }
40
+ function splitPatterns(value) {
41
+ return value
42
+ ? value
43
+ .split(",")
44
+ .map((pattern) => pattern.trim())
45
+ .filter(Boolean)
46
+ : [];
47
+ }
48
+ function normalizeSitemapMode(value) {
49
+ if (value === "include" || value === "only" || value === "skip") {
50
+ return value;
51
+ }
52
+ throw new Error(`Unsupported sitemap mode: ${value}`);
53
+ }
54
+ function normalizePositiveInt(value, fallback) {
55
+ const parsed = Number(value);
56
+ return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : fallback;
57
+ }
58
+ function normalizeNonNegativeInt(value, fallback) {
59
+ const parsed = Number(value);
60
+ return Number.isFinite(parsed) && parsed >= 0 ? Math.floor(parsed) : fallback;
61
+ }
@@ -0,0 +1,22 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildOpenCommand = buildOpenCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildOpenCommand() {
7
+ const command = new commander_1.Command("open")
8
+ .description("Open a URL in Gologin Cloud Browser and create a daemon-backed session.")
9
+ .argument("<url>", "URL to open")
10
+ .option("--idle-timeout-ms <ms>", "Idle timeout for the browser session")
11
+ .option("--proxy-country <country>", "Request a country-based proxy when supported")
12
+ .option("--proxy-mode <mode>", "Custom proxy mode: http, socks4, or socks5")
13
+ .option("--proxy-host <host>", "Custom proxy host")
14
+ .option("--proxy-port <port>", "Custom proxy port")
15
+ .option("--proxy-user <username>", "Custom proxy username")
16
+ .option("--proxy-pass <password>", "Custom proxy password")
17
+ .action(async (url, options) => {
18
+ await (0, shared_1.runOpenLikeCommand)(url, options);
19
+ });
20
+ (0, shared_1.addProfileOption)(command);
21
+ return (0, shared_1.addSessionOption)(command);
22
+ }
@@ -0,0 +1,34 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.buildParseDocumentCommand = buildParseDocumentCommand;
7
+ const fs_1 = require("fs");
8
+ const path_1 = __importDefault(require("path"));
9
+ const commander_1 = require("commander");
10
+ const document_1 = require("../lib/document");
11
+ const output_1 = require("../lib/output");
12
+ function buildParseDocumentCommand() {
13
+ return new commander_1.Command("parse-document")
14
+ .description("Parse PDF, DOCX, XLSX, HTML, or text-like documents from a URL or local path.")
15
+ .argument("<source>", "Document URL or local file path")
16
+ .option("--json", "Print structured JSON output")
17
+ .option("--output <path>", "Write parsed output to a file")
18
+ .action(async (source, options) => {
19
+ const parsed = await (0, document_1.parseDocumentSource)(source);
20
+ if (options.output) {
21
+ const outputPath = path_1.default.resolve(options.output);
22
+ await fs_1.promises.mkdir(path_1.default.dirname(outputPath), { recursive: true });
23
+ const payload = options.json ? JSON.stringify(parsed, null, 2) : parsed.text;
24
+ await fs_1.promises.writeFile(outputPath, `${payload}\n`, "utf8");
25
+ process.stdout.write(`${outputPath}\n`);
26
+ return;
27
+ }
28
+ if (options.json) {
29
+ (0, output_1.printJson)(parsed);
30
+ return;
31
+ }
32
+ (0, output_1.printText)(parsed.text);
33
+ });
34
+ }
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildPdfCommand = buildPdfCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildPdfCommand() {
7
+ const command = new commander_1.Command("pdf")
8
+ .description("Save the current page as a PDF.")
9
+ .argument("<targetPath>", "Where to write the PDF file")
10
+ .action(async (targetPath, options) => {
11
+ await (0, shared_1.runBrowserCommand)(["pdf", (0, shared_1.resolveOutputPath)(targetPath)], { session: options.session });
12
+ });
13
+ return (0, shared_1.addSessionOption)(command);
14
+ }
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildPressCommand = buildPressCommand;
4
+ const commander_1 = require("commander");
5
+ const shared_1 = require("./shared");
6
+ function buildPressCommand() {
7
+ const command = new commander_1.Command("press")
8
+ .description("Press a keyboard key, optionally against a target ref.")
9
+ .argument("<key>", "Keyboard key, for example Enter or ArrowDown")
10
+ .argument("[target]", "Optional snapshot ref target")
11
+ .action(async (key, target, options) => {
12
+ await (0, shared_1.runBrowserCommand)(target ? ["press", key, target] : ["press", key], { session: options.session });
13
+ });
14
+ return (0, shared_1.addSessionOption)(command);
15
+ }
@@ -0,0 +1,51 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildReadCommand = buildReadCommand;
4
+ const commander_1 = require("commander");
5
+ const config_1 = require("../config");
6
+ const readSource_1 = require("../lib/readSource");
7
+ const output_1 = require("../lib/output");
8
+ const shared_1 = require("./shared");
9
+ function buildReadCommand() {
10
+ return (0, shared_1.addProfileOption)((0, shared_1.addUnlockerRequestOptions)(new commander_1.Command("read")
11
+ .description("Read the main content of a docs page or article with automatic fallback to Cloud Browser when needed.")
12
+ .argument("<url>", "URL to read")
13
+ .option("--format <format>", "Output format: html, markdown, or text", "text")
14
+ .option("--source <source>", "Read source: auto, unlocker, or browser", "auto")
15
+ .action(async (url, options) => {
16
+ const config = await (0, config_1.loadConfig)();
17
+ const format = normalizeReadFormat(options.format);
18
+ const source = (0, readSource_1.normalizeReadSourceMode)(options.source, "auto");
19
+ const apiKey = source === "browser" ? "" : (0, config_1.requireWebUnlockerKey)(config);
20
+ const readOptions = {
21
+ source,
22
+ profile: options.profile,
23
+ request: (0, shared_1.normalizeUnlockerRequestOptions)(options),
24
+ };
25
+ const result = format === "html"
26
+ ? await (0, readSource_1.readHtmlContent)(url, config, apiKey, readOptions)
27
+ : format === "markdown"
28
+ ? await (0, readSource_1.readMarkdownContent)(url, config, apiKey, readOptions)
29
+ : await (0, readSource_1.readTextContent)(url, config, apiKey, readOptions);
30
+ emitReadNotice(result.fallbackAttempted, result.fallbackUsed, result.fallbackReason);
31
+ (0, output_1.printText)(result.content);
32
+ })));
33
+ }
34
+ function normalizeReadFormat(value) {
35
+ if (!value || value === "text" || value === "markdown" || value === "html") {
36
+ return (value ?? "text");
37
+ }
38
+ throw new Error(`Unsupported read format: ${value}`);
39
+ }
40
+ function emitReadNotice(fallbackAttempted, fallbackUsed, fallbackReason) {
41
+ if (!fallbackAttempted) {
42
+ return;
43
+ }
44
+ if (fallbackUsed) {
45
+ process.stderr.write(`JS-rendered page detected, retrying with browser. ${fallbackReason ?? ""}\n`);
46
+ return;
47
+ }
48
+ if (fallbackReason) {
49
+ process.stderr.write(`${fallbackReason}\n`);
50
+ }
51
+ }