gologin-web-access 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/LICENSE +21 -0
- package/README.md +344 -0
- package/dist/cli.js +173 -0
- package/dist/commands/back.js +13 -0
- package/dist/commands/batch.js +81 -0
- package/dist/commands/batchChangeTrack.js +99 -0
- package/dist/commands/batchExtract.js +97 -0
- package/dist/commands/batchScrape.js +140 -0
- package/dist/commands/changeTrack.js +65 -0
- package/dist/commands/check.js +14 -0
- package/dist/commands/click.js +14 -0
- package/dist/commands/close.js +19 -0
- package/dist/commands/configInit.js +77 -0
- package/dist/commands/configShow.js +23 -0
- package/dist/commands/cookies.js +22 -0
- package/dist/commands/cookiesClear.js +13 -0
- package/dist/commands/cookiesImport.js +14 -0
- package/dist/commands/crawl.js +71 -0
- package/dist/commands/crawlErrors.js +20 -0
- package/dist/commands/crawlResult.js +27 -0
- package/dist/commands/crawlStart.js +56 -0
- package/dist/commands/crawlStatus.js +25 -0
- package/dist/commands/current.js +14 -0
- package/dist/commands/dblclick.js +14 -0
- package/dist/commands/eval.js +20 -0
- package/dist/commands/extract.js +44 -0
- package/dist/commands/fill.js +15 -0
- package/dist/commands/find.js +16 -0
- package/dist/commands/focus.js +14 -0
- package/dist/commands/forward.js +13 -0
- package/dist/commands/get.js +15 -0
- package/dist/commands/hover.js +14 -0
- package/dist/commands/jobs.js +47 -0
- package/dist/commands/map.js +61 -0
- package/dist/commands/open.js +22 -0
- package/dist/commands/parseDocument.js +34 -0
- package/dist/commands/pdf.js +14 -0
- package/dist/commands/press.js +15 -0
- package/dist/commands/read.js +51 -0
- package/dist/commands/reload.js +13 -0
- package/dist/commands/run.js +76 -0
- package/dist/commands/scrape.js +19 -0
- package/dist/commands/scrapeJson.js +24 -0
- package/dist/commands/scrapeMarkdown.js +37 -0
- package/dist/commands/scrapeScreenshot.js +65 -0
- package/dist/commands/scrapeText.js +37 -0
- package/dist/commands/screenshot.js +23 -0
- package/dist/commands/scroll.js +23 -0
- package/dist/commands/scrollIntoView.js +14 -0
- package/dist/commands/search.js +39 -0
- package/dist/commands/searchBrowser.js +28 -0
- package/dist/commands/select.js +15 -0
- package/dist/commands/sessions.js +14 -0
- package/dist/commands/shared.js +102 -0
- package/dist/commands/snapshot.js +18 -0
- package/dist/commands/storageClear.js +18 -0
- package/dist/commands/storageExport.js +26 -0
- package/dist/commands/storageImport.js +23 -0
- package/dist/commands/tabClose.js +18 -0
- package/dist/commands/tabFocus.js +15 -0
- package/dist/commands/tabOpen.js +19 -0
- package/dist/commands/tabs.js +13 -0
- package/dist/commands/type.js +15 -0
- package/dist/commands/uncheck.js +14 -0
- package/dist/commands/upload.js +15 -0
- package/dist/commands/wait.js +27 -0
- package/dist/config.js +260 -0
- package/dist/doctor.js +86 -0
- package/dist/internal-agent/cli.js +336 -0
- package/dist/internal-agent/commands/back.js +12 -0
- package/dist/internal-agent/commands/check.js +17 -0
- package/dist/internal-agent/commands/click.js +17 -0
- package/dist/internal-agent/commands/close.js +12 -0
- package/dist/internal-agent/commands/cookies.js +23 -0
- package/dist/internal-agent/commands/cookiesClear.js +12 -0
- package/dist/internal-agent/commands/cookiesImport.js +18 -0
- package/dist/internal-agent/commands/current.js +9 -0
- package/dist/internal-agent/commands/dblclick.js +17 -0
- package/dist/internal-agent/commands/doctor.js +53 -0
- package/dist/internal-agent/commands/eval.js +30 -0
- package/dist/internal-agent/commands/fill.js +18 -0
- package/dist/internal-agent/commands/find.js +86 -0
- package/dist/internal-agent/commands/focus.js +17 -0
- package/dist/internal-agent/commands/forward.js +12 -0
- package/dist/internal-agent/commands/get.js +19 -0
- package/dist/internal-agent/commands/hover.js +17 -0
- package/dist/internal-agent/commands/open.js +67 -0
- package/dist/internal-agent/commands/pdf.js +18 -0
- package/dist/internal-agent/commands/press.js +19 -0
- package/dist/internal-agent/commands/reload.js +12 -0
- package/dist/internal-agent/commands/screenshot.js +22 -0
- package/dist/internal-agent/commands/scroll.js +25 -0
- package/dist/internal-agent/commands/scrollIntoView.js +17 -0
- package/dist/internal-agent/commands/select.js +18 -0
- package/dist/internal-agent/commands/sessions.js +15 -0
- package/dist/internal-agent/commands/shared.js +51 -0
- package/dist/internal-agent/commands/snapshot.js +16 -0
- package/dist/internal-agent/commands/storageClear.js +13 -0
- package/dist/internal-agent/commands/storageExport.js +24 -0
- package/dist/internal-agent/commands/storageImport.js +20 -0
- package/dist/internal-agent/commands/tabClose.js +21 -0
- package/dist/internal-agent/commands/tabFocus.js +21 -0
- package/dist/internal-agent/commands/tabOpen.js +13 -0
- package/dist/internal-agent/commands/tabs.js +17 -0
- package/dist/internal-agent/commands/type.js +18 -0
- package/dist/internal-agent/commands/uncheck.js +17 -0
- package/dist/internal-agent/commands/upload.js +18 -0
- package/dist/internal-agent/commands/wait.js +41 -0
- package/dist/internal-agent/daemon/browser.js +818 -0
- package/dist/internal-agent/daemon/refStore.js +26 -0
- package/dist/internal-agent/daemon/server.js +330 -0
- package/dist/internal-agent/daemon/sessionManager.js +684 -0
- package/dist/internal-agent/daemon/snapshot.js +285 -0
- package/dist/internal-agent/lib/config.js +59 -0
- package/dist/internal-agent/lib/daemon.js +300 -0
- package/dist/internal-agent/lib/errors.js +63 -0
- package/dist/internal-agent/lib/types.js +2 -0
- package/dist/internal-agent/lib/utils.js +165 -0
- package/dist/jobRunner.js +56 -0
- package/dist/lib/agentCli.js +158 -0
- package/dist/lib/browserRead.js +125 -0
- package/dist/lib/browserStructured.js +77 -0
- package/dist/lib/changeTracking.js +117 -0
- package/dist/lib/cloudApi.js +41 -0
- package/dist/lib/concurrency.js +15 -0
- package/dist/lib/crawl.js +313 -0
- package/dist/lib/document.js +170 -0
- package/dist/lib/errors.js +55 -0
- package/dist/lib/extract.js +65 -0
- package/dist/lib/extractRunner.js +22 -0
- package/dist/lib/jobRegistry.js +164 -0
- package/dist/lib/output.js +122 -0
- package/dist/lib/readSource.js +297 -0
- package/dist/lib/runbooks.js +193 -0
- package/dist/lib/search.js +727 -0
- package/dist/lib/selfCli.js +136 -0
- package/dist/lib/structuredScrape.js +83 -0
- package/dist/lib/types.js +2 -0
- package/dist/lib/unlocker.js +383 -0
- package/package.json +67 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AppError = void 0;
|
|
4
|
+
exports.isDaemonErrorPayload = isDaemonErrorPayload;
|
|
5
|
+
exports.serializeError = serializeError;
|
|
6
|
+
exports.fromDaemonError = fromDaemonError;
|
|
7
|
+
exports.formatErrorLine = formatErrorLine;
|
|
8
|
+
class AppError extends Error {
|
|
9
|
+
code;
|
|
10
|
+
status;
|
|
11
|
+
details;
|
|
12
|
+
constructor(code, message, status = 500, details) {
|
|
13
|
+
super(message);
|
|
14
|
+
this.name = "AppError";
|
|
15
|
+
this.code = code;
|
|
16
|
+
this.status = status;
|
|
17
|
+
this.details = details;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
exports.AppError = AppError;
|
|
21
|
+
function isDaemonErrorPayload(value) {
|
|
22
|
+
if (!value || typeof value !== "object") {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
const candidate = value;
|
|
26
|
+
return (typeof candidate.code === "string" &&
|
|
27
|
+
typeof candidate.message === "string" &&
|
|
28
|
+
typeof candidate.status === "number");
|
|
29
|
+
}
|
|
30
|
+
function serializeError(error) {
|
|
31
|
+
if (error instanceof AppError) {
|
|
32
|
+
return {
|
|
33
|
+
code: error.code,
|
|
34
|
+
message: error.message,
|
|
35
|
+
status: error.status,
|
|
36
|
+
details: error.details
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
if (error instanceof Error) {
|
|
40
|
+
return {
|
|
41
|
+
code: "INTERNAL_ERROR",
|
|
42
|
+
message: error.message,
|
|
43
|
+
status: 500
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
code: "INTERNAL_ERROR",
|
|
48
|
+
message: String(error),
|
|
49
|
+
status: 500
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
function fromDaemonError(payload) {
|
|
53
|
+
return new AppError(payload.code, payload.message, payload.status, payload.details);
|
|
54
|
+
}
|
|
55
|
+
function formatErrorLine(error) {
|
|
56
|
+
if (error instanceof AppError) {
|
|
57
|
+
return `${error.code}: ${error.message}`;
|
|
58
|
+
}
|
|
59
|
+
if (error instanceof Error) {
|
|
60
|
+
return `${error.name}: ${error.message}`;
|
|
61
|
+
}
|
|
62
|
+
return `INTERNAL_ERROR: ${String(error)}`;
|
|
63
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.parseArgs = parseArgs;
|
|
7
|
+
exports.getFlagString = getFlagString;
|
|
8
|
+
exports.getFlagBoolean = getFlagBoolean;
|
|
9
|
+
exports.generateSessionId = generateSessionId;
|
|
10
|
+
exports.ensureAbsolutePath = ensureAbsolutePath;
|
|
11
|
+
exports.isRefTarget = isRefTarget;
|
|
12
|
+
exports.isNumericToken = isNumericToken;
|
|
13
|
+
exports.formatSnapshotItem = formatSnapshotItem;
|
|
14
|
+
exports.formatProxyLabel = formatProxyLabel;
|
|
15
|
+
exports.formatSessionLine = formatSessionLine;
|
|
16
|
+
exports.formatCurrentLine = formatCurrentLine;
|
|
17
|
+
exports.writeJsonResponse = writeJsonResponse;
|
|
18
|
+
exports.readJsonBody = readJsonBody;
|
|
19
|
+
exports.appendLog = appendLog;
|
|
20
|
+
exports.buildRefDisplayText = buildRefDisplayText;
|
|
21
|
+
exports.makeTransportLabel = makeTransportLabel;
|
|
22
|
+
exports.isDaemonErrorResponse = isDaemonErrorResponse;
|
|
23
|
+
const node_fs_1 = __importDefault(require("node:fs"));
|
|
24
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
25
|
+
const errors_1 = require("./errors");
|
|
26
|
+
function parseArgs(argv) {
|
|
27
|
+
const positional = [];
|
|
28
|
+
const flags = {};
|
|
29
|
+
const booleanFlags = new Set(["interactive", "exact", "annotate", "press-escape", "json", "clear"]);
|
|
30
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
31
|
+
const token = argv[index];
|
|
32
|
+
if (token === "-i") {
|
|
33
|
+
flags.interactive = true;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (!token.startsWith("--")) {
|
|
37
|
+
positional.push(token);
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
const flagName = token.slice(2);
|
|
41
|
+
if (booleanFlags.has(flagName)) {
|
|
42
|
+
flags[flagName] = true;
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
const next = argv[index + 1];
|
|
46
|
+
if (!next || next.startsWith("--")) {
|
|
47
|
+
throw new errors_1.AppError("BAD_REQUEST", `Missing value for ${token}`, 400);
|
|
48
|
+
}
|
|
49
|
+
flags[flagName] = next;
|
|
50
|
+
index += 1;
|
|
51
|
+
}
|
|
52
|
+
return { positional, flags };
|
|
53
|
+
}
|
|
54
|
+
function getFlagString(parsed, name) {
|
|
55
|
+
const value = parsed.flags[name];
|
|
56
|
+
return typeof value === "string" ? value : undefined;
|
|
57
|
+
}
|
|
58
|
+
function getFlagBoolean(parsed, name) {
|
|
59
|
+
return parsed.flags[name] === true;
|
|
60
|
+
}
|
|
61
|
+
function generateSessionId(existingIds) {
|
|
62
|
+
const existing = new Set(existingIds);
|
|
63
|
+
let counter = 1;
|
|
64
|
+
while (existing.has(`s${counter}`)) {
|
|
65
|
+
counter += 1;
|
|
66
|
+
}
|
|
67
|
+
return `s${counter}`;
|
|
68
|
+
}
|
|
69
|
+
function ensureAbsolutePath(baseDir, targetPath) {
|
|
70
|
+
return node_path_1.default.isAbsolute(targetPath) ? targetPath : node_path_1.default.resolve(baseDir, targetPath);
|
|
71
|
+
}
|
|
72
|
+
function isRefTarget(target) {
|
|
73
|
+
return /^@e\d+$/.test(target);
|
|
74
|
+
}
|
|
75
|
+
function isNumericToken(token) {
|
|
76
|
+
return /^-?\d+$/.test(token);
|
|
77
|
+
}
|
|
78
|
+
function formatSnapshotItem(item) {
|
|
79
|
+
const flags = item.flags && item.flags.length > 0 ? ` ${item.flags.map((flag) => `[${flag}]`).join(" ")}` : "";
|
|
80
|
+
return `- ${item.kind} "${item.text}"${flags} [ref=${item.ref}]`;
|
|
81
|
+
}
|
|
82
|
+
function formatProxyLabel(proxy) {
|
|
83
|
+
if (!proxy) {
|
|
84
|
+
return undefined;
|
|
85
|
+
}
|
|
86
|
+
if (proxy.mode === "none") {
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
if (proxy.mode === "gologin") {
|
|
90
|
+
return proxy.country ? `gologin:${proxy.country}` : "gologin";
|
|
91
|
+
}
|
|
92
|
+
if (proxy.host && proxy.port) {
|
|
93
|
+
return `${proxy.mode}:${proxy.host}:${proxy.port}`;
|
|
94
|
+
}
|
|
95
|
+
return proxy.mode;
|
|
96
|
+
}
|
|
97
|
+
function formatSessionLine(session) {
|
|
98
|
+
const prefix = session.active ? "*" : "-";
|
|
99
|
+
const profile = session.profileId ? ` profile=${session.profileId}` : "";
|
|
100
|
+
const snapshotState = session.hasSnapshot ? (session.staleSnapshot ? "stale" : "fresh") : "none";
|
|
101
|
+
const proxy = formatProxyLabel(session.proxy);
|
|
102
|
+
const proxyToken = proxy ? ` proxy=${proxy}` : "";
|
|
103
|
+
const idleTimeout = session.idleTimeoutMs !== undefined ? ` idleTimeoutMs=${session.idleTimeoutMs}` : "";
|
|
104
|
+
const liveView = session.liveViewUrl ? ` liveview=${session.liveViewUrl}` : "";
|
|
105
|
+
const screenshot = session.lastScreenshotPath ? ` shot=${session.lastScreenshotPath}` : "";
|
|
106
|
+
const pdf = session.lastPdfPath ? ` pdf=${session.lastPdfPath}` : "";
|
|
107
|
+
return `${prefix} session=${session.sessionId}${profile} url=${session.url} snapshot=${snapshotState}${proxyToken}${idleTimeout}${liveView}${screenshot}${pdf}`;
|
|
108
|
+
}
|
|
109
|
+
function formatCurrentLine(session) {
|
|
110
|
+
const profile = session.profileId ? ` profile=${session.profileId}` : "";
|
|
111
|
+
const snapshotState = session.hasSnapshot ? (session.staleSnapshot ? "stale" : "fresh") : "none";
|
|
112
|
+
const proxy = formatProxyLabel(session.proxy);
|
|
113
|
+
const proxyToken = proxy ? ` proxy=${proxy}` : "";
|
|
114
|
+
const idleTimeout = session.idleTimeoutMs !== undefined ? ` idleTimeoutMs=${session.idleTimeoutMs}` : "";
|
|
115
|
+
const liveView = session.liveViewUrl ? ` liveview=${session.liveViewUrl}` : "";
|
|
116
|
+
const screenshot = session.lastScreenshotPath ? ` shot=${session.lastScreenshotPath}` : "";
|
|
117
|
+
const pdf = session.lastPdfPath ? ` pdf=${session.lastPdfPath}` : "";
|
|
118
|
+
return `session=${session.sessionId}${profile} url=${session.url} snapshot=${snapshotState}${proxyToken}${idleTimeout}${liveView}${screenshot}${pdf}`;
|
|
119
|
+
}
|
|
120
|
+
function writeJsonResponse(response, statusCode, payload) {
|
|
121
|
+
response.statusCode = statusCode;
|
|
122
|
+
response.setHeader("content-type", "application/json; charset=utf-8");
|
|
123
|
+
response.end(JSON.stringify(payload));
|
|
124
|
+
}
|
|
125
|
+
function readJsonBody(request) {
|
|
126
|
+
return new Promise((resolve, reject) => {
|
|
127
|
+
const chunks = [];
|
|
128
|
+
request.on("data", (chunk) => chunks.push(chunk));
|
|
129
|
+
request.on("end", () => {
|
|
130
|
+
if (chunks.length === 0) {
|
|
131
|
+
resolve(undefined);
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
try {
|
|
135
|
+
resolve(JSON.parse(Buffer.concat(chunks).toString("utf8")));
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
reject(new errors_1.AppError("BAD_REQUEST", "Request body must be valid JSON", 400));
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
request.on("error", reject);
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
function appendLog(logPath, line) {
|
|
145
|
+
node_fs_1.default.mkdirSync(node_path_1.default.dirname(logPath), { recursive: true });
|
|
146
|
+
node_fs_1.default.appendFileSync(logPath, `${new Date().toISOString()} ${line}\n`, "utf8");
|
|
147
|
+
}
|
|
148
|
+
function buildRefDisplayText(descriptor) {
|
|
149
|
+
return descriptor.accessibleName ?? descriptor.text ?? descriptor.placeholder ?? descriptor.name ?? "";
|
|
150
|
+
}
|
|
151
|
+
function makeTransportLabel(transport) {
|
|
152
|
+
if (transport.kind === "socket") {
|
|
153
|
+
return `socket:${transport.socketPath}`;
|
|
154
|
+
}
|
|
155
|
+
return `http://${transport.host}:${transport.port}`;
|
|
156
|
+
}
|
|
157
|
+
function isDaemonErrorResponse(payload) {
|
|
158
|
+
if (!payload || typeof payload !== "object") {
|
|
159
|
+
return false;
|
|
160
|
+
}
|
|
161
|
+
const value = payload;
|
|
162
|
+
return (typeof value.code === "string" &&
|
|
163
|
+
typeof value.message === "string" &&
|
|
164
|
+
typeof value.status === "number");
|
|
165
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const config_1 = require("./config");
|
|
4
|
+
const jobRegistry_1 = require("./lib/jobRegistry");
|
|
5
|
+
const selfCli_1 = require("./lib/selfCli");
|
|
6
|
+
async function main() {
|
|
7
|
+
const [jobId, command, ...args] = process.argv.slice(2);
|
|
8
|
+
if (!jobId || !command) {
|
|
9
|
+
throw new Error("Usage: jobRunner <jobId> <command> [args...]");
|
|
10
|
+
}
|
|
11
|
+
const config = await (0, config_1.loadConfig)();
|
|
12
|
+
await (0, jobRegistry_1.markJobRunning)(config, jobId);
|
|
13
|
+
const result = await (0, selfCli_1.runSelfCommandCapture)([command, ...args]);
|
|
14
|
+
if (result.exitCode === 0) {
|
|
15
|
+
await (0, jobRegistry_1.finalizeJob)(config, jobId, {
|
|
16
|
+
status: "ok",
|
|
17
|
+
output: result.stdout,
|
|
18
|
+
errorOutput: result.stderr,
|
|
19
|
+
result: tryParseJson(result.stdout)
|
|
20
|
+
});
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
await (0, jobRegistry_1.finalizeJob)(config, jobId, {
|
|
24
|
+
status: "failed",
|
|
25
|
+
output: result.stdout,
|
|
26
|
+
errorOutput: result.stderr,
|
|
27
|
+
error: result.stderr.trim() || result.stdout.trim() || `Command exited with code ${result.exitCode}`
|
|
28
|
+
});
|
|
29
|
+
process.exit(result.exitCode);
|
|
30
|
+
}
|
|
31
|
+
function tryParseJson(value) {
|
|
32
|
+
const trimmed = value.trim();
|
|
33
|
+
if (!trimmed) {
|
|
34
|
+
return undefined;
|
|
35
|
+
}
|
|
36
|
+
try {
|
|
37
|
+
return JSON.parse(trimmed);
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return undefined;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
void main().catch(async (error) => {
|
|
44
|
+
const [jobId] = process.argv.slice(2);
|
|
45
|
+
if (jobId) {
|
|
46
|
+
const config = await (0, config_1.loadConfig)().catch(() => undefined);
|
|
47
|
+
if (config) {
|
|
48
|
+
await (0, jobRegistry_1.finalizeJob)(config, jobId, {
|
|
49
|
+
status: "failed",
|
|
50
|
+
error: error instanceof Error ? error.message : String(error)
|
|
51
|
+
}).catch(() => undefined);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
process.stderr.write(`${error instanceof Error ? error.stack ?? error.message : String(error)}\n`);
|
|
55
|
+
process.exit(1);
|
|
56
|
+
});
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.runAgentCommand = runAgentCommand;
|
|
7
|
+
exports.runAgentCommandCapture = runAgentCommandCapture;
|
|
8
|
+
exports.isDaemonReachable = isDaemonReachable;
|
|
9
|
+
exports.agentCliAvailable = agentCliAvailable;
|
|
10
|
+
exports.inspectAgentCli = inspectAgentCli;
|
|
11
|
+
const child_process_1 = require("child_process");
|
|
12
|
+
const fs_1 = require("fs");
|
|
13
|
+
const path_1 = __importDefault(require("path"));
|
|
14
|
+
const errors_1 = require("./errors");
|
|
15
|
+
async function runAgentCommand(args, config) {
|
|
16
|
+
const invocation = await resolveAgentCliInvocation();
|
|
17
|
+
const exitCode = await spawnAndWait(invocation, args, config);
|
|
18
|
+
if (exitCode !== 0) {
|
|
19
|
+
throw new errors_1.SilentExitError(exitCode);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
async function runAgentCommandCapture(args, config) {
|
|
23
|
+
const invocation = await resolveAgentCliInvocation();
|
|
24
|
+
return spawnAndCapture(invocation, args, config);
|
|
25
|
+
}
|
|
26
|
+
async function isDaemonReachable(port) {
|
|
27
|
+
try {
|
|
28
|
+
const response = await fetch(`http://127.0.0.1:${port}/health`);
|
|
29
|
+
return response.ok;
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
async function agentCliAvailable() {
|
|
36
|
+
try {
|
|
37
|
+
await resolveAgentCliInvocation();
|
|
38
|
+
return true;
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
async function inspectAgentCli() {
|
|
45
|
+
try {
|
|
46
|
+
return await resolveAgentCliInvocation();
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
async function resolveAgentCliInvocation() {
|
|
53
|
+
const projectRoot = resolveProjectRoot();
|
|
54
|
+
const distCli = path_1.default.join(projectRoot, "dist", "internal-agent", "cli.js");
|
|
55
|
+
if (await exists(distCli)) {
|
|
56
|
+
return {
|
|
57
|
+
command: process.execPath,
|
|
58
|
+
args: [distCli],
|
|
59
|
+
cwd: projectRoot,
|
|
60
|
+
source: "internal-bundled",
|
|
61
|
+
version: await readPackageVersion(projectRoot),
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
const tsxCli = path_1.default.join(projectRoot, "node_modules", "tsx", "dist", "cli.mjs");
|
|
65
|
+
const srcCli = path_1.default.join(projectRoot, "src", "internal-agent", "cli.ts");
|
|
66
|
+
if ((await exists(tsxCli)) && (await exists(srcCli))) {
|
|
67
|
+
return {
|
|
68
|
+
command: process.execPath,
|
|
69
|
+
args: [tsxCli, srcCli],
|
|
70
|
+
cwd: projectRoot,
|
|
71
|
+
source: "internal-source",
|
|
72
|
+
version: await readPackageVersion(projectRoot),
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
throw new errors_1.CliError("Gologin Agent CLI is not available.", 1, `Internal browser runtime is missing from this gologin-web-access install at ${projectRoot}. Reinstall the package and rebuild it.`);
|
|
76
|
+
}
|
|
77
|
+
function resolveProjectRoot() {
|
|
78
|
+
return path_1.default.resolve(__dirname, "..", "..");
|
|
79
|
+
}
|
|
80
|
+
function spawnAndWait(invocation, args, config) {
|
|
81
|
+
return new Promise((resolve, reject) => {
|
|
82
|
+
const child = (0, child_process_1.spawn)(invocation.command, [...invocation.args, ...args], {
|
|
83
|
+
cwd: invocation.cwd,
|
|
84
|
+
env: buildAgentEnv(config),
|
|
85
|
+
stdio: "inherit",
|
|
86
|
+
});
|
|
87
|
+
child.on("error", reject);
|
|
88
|
+
child.on("exit", (code, signal) => {
|
|
89
|
+
if (signal) {
|
|
90
|
+
reject(new errors_1.CliError(`Gologin Agent CLI terminated by signal ${signal}.`));
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
resolve(code ?? 1);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
function spawnAndCapture(invocation, args, config) {
|
|
98
|
+
return new Promise((resolve, reject) => {
|
|
99
|
+
const child = (0, child_process_1.spawn)(invocation.command, [...invocation.args, ...args], {
|
|
100
|
+
cwd: invocation.cwd,
|
|
101
|
+
env: buildAgentEnv(config),
|
|
102
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
103
|
+
});
|
|
104
|
+
let stdout = "";
|
|
105
|
+
let stderr = "";
|
|
106
|
+
child.stdout.on("data", (chunk) => {
|
|
107
|
+
stdout += chunk.toString("utf8");
|
|
108
|
+
});
|
|
109
|
+
child.stderr.on("data", (chunk) => {
|
|
110
|
+
stderr += chunk.toString("utf8");
|
|
111
|
+
});
|
|
112
|
+
child.on("error", reject);
|
|
113
|
+
child.on("exit", (code, signal) => {
|
|
114
|
+
if (signal) {
|
|
115
|
+
reject(new errors_1.CliError(`Gologin Agent CLI terminated by signal ${signal}.`));
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
resolve({
|
|
119
|
+
exitCode: code ?? 1,
|
|
120
|
+
stdout,
|
|
121
|
+
stderr,
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
function buildAgentEnv(config) {
|
|
127
|
+
const env = {
|
|
128
|
+
...process.env,
|
|
129
|
+
GOLOGIN_DAEMON_PORT: String(config.daemonPort),
|
|
130
|
+
};
|
|
131
|
+
if (config.cloudToken) {
|
|
132
|
+
env.GOLOGIN_TOKEN = config.cloudToken;
|
|
133
|
+
}
|
|
134
|
+
if (config.defaultProfileId) {
|
|
135
|
+
env.GOLOGIN_PROFILE_ID = config.defaultProfileId;
|
|
136
|
+
}
|
|
137
|
+
return env;
|
|
138
|
+
}
|
|
139
|
+
async function exists(targetPath) {
|
|
140
|
+
try {
|
|
141
|
+
await fs_1.promises.access(targetPath);
|
|
142
|
+
return true;
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
async function readPackageVersion(packageRoot) {
|
|
149
|
+
const packageJsonPath = path_1.default.join(packageRoot, "package.json");
|
|
150
|
+
try {
|
|
151
|
+
const raw = await fs_1.promises.readFile(packageJsonPath, "utf8");
|
|
152
|
+
const parsed = JSON.parse(raw);
|
|
153
|
+
return typeof parsed.version === "string" ? parsed.version : undefined;
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
return undefined;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.scrapeReadableContentViaBrowser = scrapeReadableContentViaBrowser;
|
|
4
|
+
exports.scrapeRenderedHtmlViaBrowser = scrapeRenderedHtmlViaBrowser;
|
|
5
|
+
const crypto_1 = require("crypto");
|
|
6
|
+
const config_1 = require("../config");
|
|
7
|
+
const agentCli_1 = require("./agentCli");
|
|
8
|
+
const errors_1 = require("./errors");
|
|
9
|
+
async function scrapeReadableContentViaBrowser(url, config, options = {}) {
|
|
10
|
+
return withBrowserSession(url, config, options, async (sessionId) => {
|
|
11
|
+
const evaluated = await (0, agentCli_1.runAgentCommandCapture)(["eval", buildReadableExtractionExpression(), "--json", "--session", sessionId], config);
|
|
12
|
+
ensureBrowserCommandOk("eval", evaluated, url);
|
|
13
|
+
return JSON.parse(evaluated.stdout.trim());
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
async function scrapeRenderedHtmlViaBrowser(url, config, options = {}) {
|
|
17
|
+
return withBrowserSession(url, config, options, async (sessionId) => {
|
|
18
|
+
const evaluated = await (0, agentCli_1.runAgentCommandCapture)(["eval", "document.documentElement?.outerHTML || ''", "--json", "--session", sessionId], config);
|
|
19
|
+
ensureBrowserCommandOk("eval", evaluated, url);
|
|
20
|
+
return {
|
|
21
|
+
html: JSON.parse(evaluated.stdout.trim()),
|
|
22
|
+
};
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
async function withBrowserSession(url, config, options, handler) {
|
|
26
|
+
const sessionId = `read-${(0, crypto_1.randomUUID)().slice(0, 8)}`;
|
|
27
|
+
const openArgs = ["open", url, "--session", sessionId];
|
|
28
|
+
const profileId = (0, config_1.resolveProfileId)(config, options.profile);
|
|
29
|
+
if (profileId) {
|
|
30
|
+
openArgs.push("--profile", profileId);
|
|
31
|
+
}
|
|
32
|
+
const open = await (0, agentCli_1.runAgentCommandCapture)(openArgs, config);
|
|
33
|
+
ensureBrowserCommandOk("open", open, url);
|
|
34
|
+
try {
|
|
35
|
+
return await handler(sessionId);
|
|
36
|
+
}
|
|
37
|
+
finally {
|
|
38
|
+
await (0, agentCli_1.runAgentCommandCapture)(["close", "--session", sessionId], config).catch(() => undefined);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
function ensureBrowserCommandOk(step, response, url) {
|
|
42
|
+
if (response.exitCode === 0) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
const message = response.stderr.trim() || response.stdout.trim() || `Browser command failed for ${url}`;
|
|
46
|
+
throw new errors_1.CliError(`Browser read ${step} failed.`, 1, message);
|
|
47
|
+
}
|
|
48
|
+
function buildReadableExtractionExpression() {
|
|
49
|
+
return `(() => {
|
|
50
|
+
const candidates = [
|
|
51
|
+
["#content-area", document.querySelector("#content-area")],
|
|
52
|
+
["main article", document.querySelector("main article")],
|
|
53
|
+
["article", document.querySelector("article")],
|
|
54
|
+
["main .prose", document.querySelector("main .prose")],
|
|
55
|
+
["main", document.querySelector("main")],
|
|
56
|
+
["[role='main']", document.querySelector("[role='main']")],
|
|
57
|
+
[".mintlify-content", document.querySelector(".mintlify-content")],
|
|
58
|
+
[".docs-content", document.querySelector(".docs-content")],
|
|
59
|
+
[".content", document.querySelector(".content")],
|
|
60
|
+
[".prose", document.querySelector(".prose")],
|
|
61
|
+
["body", document.body],
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
function normalizeText(value) {
|
|
65
|
+
return (value || "").replace(/\\s+/g, " ").trim();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function scoreNode(node, selector) {
|
|
69
|
+
if (!node) return { score: -Infinity, text: "" };
|
|
70
|
+
const text = normalizeText(node.innerText);
|
|
71
|
+
if (!text) return { score: -Infinity, text: "" };
|
|
72
|
+
const headings = node.querySelectorAll("h1, h2, h3").length;
|
|
73
|
+
const paragraphs = node.querySelectorAll("p, li").length;
|
|
74
|
+
const codeBlocks = node.querySelectorAll("pre, code").length;
|
|
75
|
+
const links = node.querySelectorAll("a[href]").length;
|
|
76
|
+
let score = Math.min(text.length, 12000) + headings * 180 + paragraphs * 120 + codeBlocks * 80 - links * 8;
|
|
77
|
+
if (/^(#content-area|article|main|\\[role='main'\\])/.test(selector)) {
|
|
78
|
+
score += 400;
|
|
79
|
+
}
|
|
80
|
+
return { score, text };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for (const [selector, node] of candidates) {
|
|
84
|
+
if (!node) continue;
|
|
85
|
+
const scored = scoreNode(node, selector);
|
|
86
|
+
const headings = node.querySelectorAll("h1, h2, h3").length;
|
|
87
|
+
const paragraphs = node.querySelectorAll("p, li").length;
|
|
88
|
+
if (selector !== "body" && scored.text.length >= 600 && (headings >= 1 || paragraphs >= 3)) {
|
|
89
|
+
const clone = node.cloneNode(true);
|
|
90
|
+
clone.querySelectorAll("script, style, nav, aside, form, button, svg, dialog, [role='button'], [aria-label='More actions'], .sr-only").forEach((element) => element.remove());
|
|
91
|
+
return {
|
|
92
|
+
selector,
|
|
93
|
+
html: clone.outerHTML || "",
|
|
94
|
+
text: normalizeText(clone.innerText),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const seen = new Set();
|
|
100
|
+
let best = { selector: "body", node: document.body, html: document.body?.outerHTML || "", text: normalizeText(document.body?.innerText), score: -Infinity };
|
|
101
|
+
|
|
102
|
+
for (const [selector, node] of candidates) {
|
|
103
|
+
if (!node || seen.has(node)) continue;
|
|
104
|
+
seen.add(node);
|
|
105
|
+
const scored = scoreNode(node, selector);
|
|
106
|
+
if (scored.score > best.score) {
|
|
107
|
+
const clone = node.cloneNode(true);
|
|
108
|
+
clone.querySelectorAll("script, style, nav, aside, form, button, svg, dialog, [role='button'], [aria-label='More actions'], .sr-only").forEach((element) => element.remove());
|
|
109
|
+
best = {
|
|
110
|
+
selector,
|
|
111
|
+
node,
|
|
112
|
+
html: clone.outerHTML || "",
|
|
113
|
+
text: normalizeText(clone.innerText),
|
|
114
|
+
score: scored.score,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
selector: best.selector,
|
|
121
|
+
html: best.html,
|
|
122
|
+
text: best.text,
|
|
123
|
+
};
|
|
124
|
+
})()`;
|
|
125
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.scrapeJsonViaBrowser = scrapeJsonViaBrowser;
|
|
4
|
+
const crypto_1 = require("crypto");
|
|
5
|
+
const config_1 = require("../config");
|
|
6
|
+
const agentCli_1 = require("./agentCli");
|
|
7
|
+
const errors_1 = require("./errors");
|
|
8
|
+
const MAX_HEADINGS = 50;
|
|
9
|
+
const MAX_LINKS = 100;
|
|
10
|
+
async function scrapeJsonViaBrowser(url, config, options = {}) {
|
|
11
|
+
const sessionId = `structured-${(0, crypto_1.randomUUID)().slice(0, 8)}`;
|
|
12
|
+
const openArgs = ["open", url, "--session", sessionId];
|
|
13
|
+
const profileId = (0, config_1.resolveProfileId)(config, options.profile);
|
|
14
|
+
if (profileId) {
|
|
15
|
+
openArgs.push("--profile", profileId);
|
|
16
|
+
}
|
|
17
|
+
const open = await (0, agentCli_1.runAgentCommandCapture)(openArgs, config);
|
|
18
|
+
ensureBrowserCommandOk("open", open, url);
|
|
19
|
+
try {
|
|
20
|
+
const evaluated = await (0, agentCli_1.runAgentCommandCapture)(["eval", buildStructuredExtractionExpression(), "--json", "--session", sessionId], config);
|
|
21
|
+
ensureBrowserCommandOk("eval", evaluated, url);
|
|
22
|
+
return JSON.parse(evaluated.stdout.trim());
|
|
23
|
+
}
|
|
24
|
+
finally {
|
|
25
|
+
await (0, agentCli_1.runAgentCommandCapture)(["close", "--session", sessionId], config).catch(() => undefined);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function ensureBrowserCommandOk(step, response, url) {
|
|
29
|
+
if (response.exitCode === 0) {
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
const message = response.stderr.trim() || response.stdout.trim() || `Browser command failed for ${url}`;
|
|
33
|
+
throw new errors_1.CliError(`Browser structured extraction ${step} failed.`, 1, message);
|
|
34
|
+
}
|
|
35
|
+
function buildStructuredExtractionExpression() {
|
|
36
|
+
return `(() => {
|
|
37
|
+
const meta = {};
|
|
38
|
+
for (const node of Array.from(document.querySelectorAll("meta[name], meta[property]"))) {
|
|
39
|
+
const name = node.getAttribute("name") || node.getAttribute("property");
|
|
40
|
+
const content = node.getAttribute("content");
|
|
41
|
+
if (!name || !content) continue;
|
|
42
|
+
meta[name] = content.trim();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const headingsByLevel = { h1: [], h2: [], h3: [], h4: [], h5: [], h6: [] };
|
|
46
|
+
const headings = [];
|
|
47
|
+
for (const node of Array.from(document.querySelectorAll("h1, h2, h3, h4, h5, h6")).slice(0, ${MAX_HEADINGS})) {
|
|
48
|
+
const text = (node.textContent || "").replace(/\\s+/g, " ").trim();
|
|
49
|
+
if (!text) continue;
|
|
50
|
+
const level = node.tagName.toLowerCase();
|
|
51
|
+
if (headingsByLevel[level]) {
|
|
52
|
+
headingsByLevel[level].push(text);
|
|
53
|
+
}
|
|
54
|
+
headings.push(text);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const links = [];
|
|
58
|
+
for (const node of Array.from(document.querySelectorAll("a[href]")).slice(0, ${MAX_LINKS})) {
|
|
59
|
+
const href = (node.href || node.getAttribute("href") || "").trim();
|
|
60
|
+
if (!href) continue;
|
|
61
|
+
const text = (node.textContent || "").replace(/\\s+/g, " ").trim();
|
|
62
|
+
links.push({ href, text });
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const canonicalHref = document.querySelector('link[rel="canonical"]')?.getAttribute("href") || null;
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
title: document.title || null,
|
|
69
|
+
description: meta.description || meta["og:description"] || null,
|
|
70
|
+
canonical: canonicalHref,
|
|
71
|
+
meta,
|
|
72
|
+
headings,
|
|
73
|
+
headingsByLevel,
|
|
74
|
+
links
|
|
75
|
+
};
|
|
76
|
+
})()`;
|
|
77
|
+
}
|