@nikx/dory-worker 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +10 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/docker.d.ts +18 -1
- package/dist/docker.d.ts.map +1 -1
- package/dist/docker.js +31 -3
- package/dist/docker.js.map +1 -1
- package/dist/processor.d.ts.map +1 -1
- package/dist/processor.js +25 -9
- package/dist/processor.js.map +1 -1
- package/dist/test-harness.d.ts +18 -0
- package/dist/test-harness.d.ts.map +1 -0
- package/dist/test-harness.js +211 -0
- package/dist/test-harness.js.map +1 -0
- package/package.json +2 -1
- package/src/config.ts +11 -0
- package/src/docker.ts +63 -3
- package/src/processor.ts +56 -15
- package/src/test-harness.ts +218 -0
- package/test-image/Dockerfile +3 -0
package/.env
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
REDIS_HOST=localhost
|
|
2
|
+
REDIS_PORT=6379
|
|
3
|
+
API_BASE_URL=http://localhost:4500
|
|
4
|
+
GCS_BUCKET=dory-results
|
|
5
|
+
GCP_PROJECT_ID=dory-scraper-local
|
|
6
|
+
STORAGE_EMULATOR_HOST=http://localhost:4443
|
|
7
|
+
DOCKER_IMAGE=dory-actor:latest
|
|
8
|
+
MAX_CONCURRENT_RUNS=2
|
|
9
|
+
LOG_LEVEL=debug
|
|
10
|
+
WORKER_ID=test-worker-01
|
package/dist/config.d.ts
CHANGED
|
@@ -9,6 +9,8 @@ export interface WorkerConfig {
|
|
|
9
9
|
/** fake-gcs-server URL for local dev (e.g. http://localhost:9199) */
|
|
10
10
|
storageEmulatorHost?: string;
|
|
11
11
|
dockerImage?: string;
|
|
12
|
+
crawlerRedisUrl?: string;
|
|
13
|
+
containerCount: number;
|
|
12
14
|
maxConcurrentRuns: number;
|
|
13
15
|
workerId: string;
|
|
14
16
|
logLevel: string;
|
package/dist/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAE3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IAIvB,UAAU,EAAE,MAAM,CAAC;IAKnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,qEAAqE;IACrE,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAG7B,WAAW,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAE3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IAIvB,UAAU,EAAE,MAAM,CAAC;IAKnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,qEAAqE;IACrE,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAG7B,WAAW,CAAC,EAAE,MAAM,CAAC;IAKrB,eAAe,CAAC,EAAE,MAAM,CAAC;IAGzB,cAAc,EAAE,MAAM,CAAC;IAGvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAID,wBAAgB,UAAU,IAAI,YAAY,CAoCzC"}
|
package/dist/config.js
CHANGED
|
@@ -21,6 +21,8 @@ function loadConfig() {
|
|
|
21
21
|
gcpProjectId: process.env.GCP_PROJECT_ID,
|
|
22
22
|
storageEmulatorHost: process.env.STORAGE_EMULATOR_HOST,
|
|
23
23
|
dockerImage: process.env.DOCKER_IMAGE,
|
|
24
|
+
crawlerRedisUrl: process.env.CRAWLER_REDIS_URL,
|
|
25
|
+
containerCount: parseInt(process.env.CONTAINER_COUNT ?? "1", 10),
|
|
24
26
|
maxConcurrentRuns: parseInt(process.env.MAX_CONCURRENT_RUNS ?? "2", 10),
|
|
25
27
|
workerId: process.env.WORKER_ID ?? `dory-worker-${process.pid}`,
|
|
26
28
|
logLevel: process.env.LOG_LEVEL ?? "info",
|
package/dist/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAsCA,gCAoCC;AAtCD,MAAM,aAAa,GAAG,CAAC,cAAc,CAAU,CAAC;AAEhD,SAAgB,UAAU;IACxB,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,KAAK,CACb,2CAA2C,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAChE,CAAC;IACJ,CAAC;IAED,wDAAwD;IACxD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC;QACtD,MAAM,IAAI,KAAK,CACb,kEAAkE,CACnE,CAAC;IACJ,CAAC;IAED,OAAO;QACL,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,SAAS;QAC/B,SAAS,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,WAAW;QAChD,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,EAAE,CAAC;QACzD,aAAa,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;QAEzC,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,YAAa;QAErC,SAAS,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU;QACjC,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;QACxC,mBAAmB,EAAE,OAAO,CAAC,GAAG,CAAC,qBAAqB;QAEtD,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,YAAY;QAErC,eAAe,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;QAC9C,cAAc,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,GAAG,EAAE,EAAE,CAAC;QAEhE,iBAAiB,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,GAAG,EAAE,EAAE,CAAC;QACvE,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,eAAe,OAAO,CAAC,GAAG,EAAE;QAC/D,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,MAAM;KAC1C,CAAC;AACJ,CAAC"}
|
package/dist/docker.d.ts
CHANGED
|
@@ -5,6 +5,15 @@ interface ActorConfigEnvelope {
|
|
|
5
5
|
actorInstanceId: string;
|
|
6
6
|
userInput: Record<string, unknown>;
|
|
7
7
|
}
|
|
8
|
+
/** Extra env vars injected when running in distributed (multi-container) mode. */
|
|
9
|
+
interface DistributedOpts {
|
|
10
|
+
/** Full Redis URL for the per-run crawler queue, e.g. redis://host:6379 */
|
|
11
|
+
crawlerRedisUrl: string;
|
|
12
|
+
/** Redis key namespace — typically the runId */
|
|
13
|
+
queueName: string;
|
|
14
|
+
/** Human-readable container label, e.g. "worker-1" */
|
|
15
|
+
workerId: string;
|
|
16
|
+
}
|
|
8
17
|
/**
|
|
9
18
|
* Start the dory-core scraping container in detached mode.
|
|
10
19
|
* Returns the container ID.
|
|
@@ -12,7 +21,15 @@ interface ActorConfigEnvelope {
|
|
|
12
21
|
* @param dockerImage Image to run — sourced from GET /api/runs/:id/config
|
|
13
22
|
* so the API controls what version runs everywhere.
|
|
14
23
|
*/
|
|
15
|
-
export declare function startContainer(config: WorkerConfig, actorConfig: ActorConfigEnvelope, memoryLimit: number, dockerImage: string): Promise<string>;
|
|
24
|
+
export declare function startContainer(config: WorkerConfig, actorConfig: ActorConfigEnvelope, memoryLimit: number, dockerImage: string, distributed?: DistributedOpts): Promise<string>;
|
|
25
|
+
/**
|
|
26
|
+
* Spawn `count` containers for a distributed run.
|
|
27
|
+
* All containers share the same Redis queue (`queueName`) so they
|
|
28
|
+
* collaborate on the same crawl without duplicating work.
|
|
29
|
+
*
|
|
30
|
+
* Returns the list of container IDs in order (worker-1 … worker-N).
|
|
31
|
+
*/
|
|
32
|
+
export declare function startContainersForRun(config: WorkerConfig, actorConfig: ActorConfigEnvelope, memoryLimit: number, dockerImage: string, crawlerRedisUrl: string, queueName: string, count: number): Promise<string[]>;
|
|
16
33
|
/**
|
|
17
34
|
* Block until the container exits and return its exit code.
|
|
18
35
|
* If the container exceeds `timeoutSecs`, it is force-killed.
|
package/dist/docker.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docker.d.ts","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAK7C,UAAU,mBAAmB;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;
|
|
1
|
+
{"version":3,"file":"docker.d.ts","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAK7C,UAAU,mBAAmB;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,kFAAkF;AAClF,UAAU,eAAe;IACvB,2EAA2E;IAC3E,eAAe,EAAE,MAAM,CAAC;IACxB,gDAAgD;IAChD,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,QAAQ,EAAE,MAAM,CAAC;CAClB;AAgDD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,YAAY,EACpB,WAAW,EAAE,mBAAmB,EAChC,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,WAAW,CAAC,EAAE,eAAe,GAC5B,OAAO,CAAC,MAAM,CAAC,CAoBjB;AAED;;;;;;GAMG;AACH,wBAAsB,qBAAqB,CACzC,MAAM,EAAE,YAAY,EACpB,WAAW,EAAE,mBAAmB,EAChC,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,eAAe,EAAE,MAAM,EACvB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,MAAM,EAAE,CAAC,CAqBnB;AAED;;;GAGG;AACH,wBAAsB,gBAAgB,CACpC,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,CAAC,CAiBjB"}
|
package/dist/docker.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.startContainer = startContainer;
|
|
4
|
+
exports.startContainersForRun = startContainersForRun;
|
|
4
5
|
exports.waitForContainer = waitForContainer;
|
|
5
6
|
const child_process_1 = require("child_process");
|
|
6
7
|
const util_1 = require("util");
|
|
@@ -15,7 +16,7 @@ function toDockerHost(url) {
|
|
|
15
16
|
.replace("http://localhost", "http://host.docker.internal")
|
|
16
17
|
.replace("https://localhost", "https://host.docker.internal");
|
|
17
18
|
}
|
|
18
|
-
function buildDockerEnvArgs(config, actorConfig, memoryLimit) {
|
|
19
|
+
function buildDockerEnvArgs(config, actorConfig, memoryLimit, distributed) {
|
|
19
20
|
const configBase64 = Buffer.from(JSON.stringify(actorConfig)).toString("base64");
|
|
20
21
|
const args = [
|
|
21
22
|
`-e ACTOR_CONFIG=${configBase64}`,
|
|
@@ -30,6 +31,12 @@ function buildDockerEnvArgs(config, actorConfig, memoryLimit) {
|
|
|
30
31
|
if (config.storageEmulatorHost) {
|
|
31
32
|
args.push(`-e STORAGE_EMULATOR_HOST=${toDockerHost(config.storageEmulatorHost)}`);
|
|
32
33
|
}
|
|
34
|
+
// Distributed mode — wire the container to the shared crawler Redis queue
|
|
35
|
+
if (distributed) {
|
|
36
|
+
args.push(`-e REDIS_URL=${distributed.crawlerRedisUrl}`);
|
|
37
|
+
args.push(`-e QUEUE_NAME=${distributed.queueName}`);
|
|
38
|
+
args.push(`-e WORKER_ID=${distributed.workerId}`);
|
|
39
|
+
}
|
|
33
40
|
return args;
|
|
34
41
|
}
|
|
35
42
|
/**
|
|
@@ -39,8 +46,8 @@ function buildDockerEnvArgs(config, actorConfig, memoryLimit) {
|
|
|
39
46
|
* @param dockerImage Image to run — sourced from GET /api/runs/:id/config
|
|
40
47
|
* so the API controls what version runs everywhere.
|
|
41
48
|
*/
|
|
42
|
-
async function startContainer(config, actorConfig, memoryLimit, dockerImage) {
|
|
43
|
-
const envArgs = buildDockerEnvArgs(config, actorConfig, memoryLimit).join(" ");
|
|
49
|
+
async function startContainer(config, actorConfig, memoryLimit, dockerImage, distributed) {
|
|
50
|
+
const envArgs = buildDockerEnvArgs(config, actorConfig, memoryLimit, distributed).join(" ");
|
|
44
51
|
const memoryStr = `${memoryLimit}m`;
|
|
45
52
|
const cmd = [
|
|
46
53
|
"docker run -d --rm",
|
|
@@ -53,6 +60,27 @@ async function startContainer(config, actorConfig, memoryLimit, dockerImage) {
|
|
|
53
60
|
const { stdout } = await execAsync(cmd);
|
|
54
61
|
return stdout.trim();
|
|
55
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Spawn `count` containers for a distributed run.
|
|
65
|
+
* All containers share the same Redis queue (`queueName`) so they
|
|
66
|
+
* collaborate on the same crawl without duplicating work.
|
|
67
|
+
*
|
|
68
|
+
* Returns the list of container IDs in order (worker-1 … worker-N).
|
|
69
|
+
*/
|
|
70
|
+
async function startContainersForRun(config, actorConfig, memoryLimit, dockerImage, crawlerRedisUrl, queueName, count) {
|
|
71
|
+
const containerIds = [];
|
|
72
|
+
for (let i = 1; i <= count; i++) {
|
|
73
|
+
const workerId = `worker-${i}`;
|
|
74
|
+
const id = await startContainer(config, actorConfig, memoryLimit, dockerImage, {
|
|
75
|
+
crawlerRedisUrl,
|
|
76
|
+
queueName,
|
|
77
|
+
workerId,
|
|
78
|
+
});
|
|
79
|
+
logger_1.logger.info(`[${queueName}] ${workerId} container started: ${id.slice(0, 12)}`);
|
|
80
|
+
containerIds.push(id);
|
|
81
|
+
}
|
|
82
|
+
return containerIds;
|
|
83
|
+
}
|
|
56
84
|
/**
|
|
57
85
|
* Block until the container exits and return its exit code.
|
|
58
86
|
* If the container exceeds `timeoutSecs`, it is force-killed.
|
package/dist/docker.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docker.js","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"docker.js","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":";;AA6EA,wCA0BC;AASD,sDA6BC;AAMD,4CAoBC;AAvKD,iDAAqC;AACrC,+BAAiC;AAEjC,qCAAkC;AAElC,MAAM,SAAS,GAAG,IAAA,gBAAS,EAAC,oBAAI,CAAC,CAAC;AAmBlC;;;GAGG;AACH,SAAS,YAAY,CAAC,GAAW;IAC/B,OAAO,GAAG;SACP,OAAO,CAAC,kBAAkB,EAAE,6BAA6B,CAAC;SAC1D,OAAO,CAAC,mBAAmB,EAAE,8BAA8B,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,kBAAkB,CACzB,MAAoB,EACpB,WAAgC,EAChC,WAAmB,EACnB,WAA6B;IAE7B,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CACpE,QAAQ,CACT,CAAC;IAEF,MAAM,IAAI,GAAG;QACX,mBAAmB,YAAY,EAAE;QACjC,mBAAmB,YAAY,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE;QACpD,4BAA4B,WAAW,EAAE;KAC1C,CAAC;IAEF,wEAAwE;IACxE,IAAI,MAAM,CAAC,SAAS;QAAE,IAAI,CAAC,IAAI,CAAC,iBAAiB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IACrE,IAAI,MAAM,CAAC,YAAY;QACrB,IAAI,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;IACxD,IAAI,MAAM,CAAC,mBAAmB,EAAE,CAAC;QAC/B,IAAI,CAAC,IAAI,CACP,4BAA4B,YAAY,CAAC,MAAM,CAAC,mBAAmB,CAAC,EAAE,CACvE,CAAC;IACJ,CAAC;IAED,0EAA0E;IAC1E,IAAI,WAAW,EAAE,CAAC;QAChB,IAAI,CAAC,IAAI,CAAC,gBAAgB,WAAW,CAAC,eAAe,EAAE,CAAC,CAAC;QACzD,IAAI,CAAC,IAAI,CAAC,iBAAiB,WAAW,CAAC,SAAS,EAAE,CAAC,CAAC;QACpD,IAAI,CAAC,IAAI,CAAC,gBAAgB,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;IACpD,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;GAMG;AACI,KAAK,UAAU,cAAc,CAClC,MAAoB,EACpB,WAAgC,EAChC,WAAmB,EACnB,WAAmB,EACnB,WAA6B;IAE7B,MAAM,OAAO,GAAG,kBAAkB,CAChC,MAAM,EACN,WAAW,EACX,WAAW,EACX,WAAW,CACZ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACZ,MAAM,SAAS,GAAG,GAAG,WAAW,GAAG,CAAC;IAEpC,MAAM,GAAG,GAAG;QACV,oBAAoB;QACpB,YAAY,SAAS,EAAE;QACvB,8CAA8C;QAC9C,OAAO;QACP,WAAW;KACZ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEZ,eAAM,CAAC,KAAK,CAAC,eAAe,GAAG,EAAE,CAAC,CAAC;IACnC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,GAAG,CAAC,CAAC;IACxC,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;AACvB,CAAC;AAED;;;;;;GAMG;AACI,KAAK,UAAU,qBAAqB,CACzC,MAAoB,EACpB,WAAgC,EAChC,WAAmB,EACnB,WAAmB,EACnB,eAAuB,EACvB,SAAiB,EACjB,KAAa;IAEb,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,UAAU,CAAC,EAAE,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,cAAc,CAC7B,MAAM,EACN,WAAW,EACX,WAAW,EACX,WAAW,EACX;YACE,eAAe;YACf,SAAS;YACT,QAAQ;SACT,CACF,CAAC;QACF,eAAM,CAAC,IAAI,CACT,IAAI,SAAS,KAAK,QAAQ,uBAAuB,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CACnE,CAAC;QACF,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;GAGG;AACI,KAAK,UAAU,gBAAgB,CACpC,WAAmB,EACnB,WAAmB;IAEnB,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,eAAe,WAAW,EAAE,EAAE;YAC/D,OAAO,EAAE,CAAC,WAAW,GAAG,EAAE,CAAC,GAAG,KAAK;SACpC,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,IAAI,CACT,6BAA6B,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAM,GAAa,CAAC,OAAO,kBAAkB,CACnG,CAAC;QACF,IAAI,CAAC;YACH,MAAM,SAAS,CAAC,eAAe,WAAW,EAAE,CAAC,CAAC;QAChD,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;QACpC,CAAC;QACD,OAAO,GAAG,CAAC,CAAC,oBAAoB;IAClC,CAAC;AACH,CAAC"}
|
package/dist/processor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAkD7C;;;;;;;;;;GAUG;AACH,wBAAsB,aAAa,CACjC,MAAM,EAAE,YAAY,EACpB,GAAG,EAAE,GAAG,CAAC;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,GAC1B,OAAO,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAuI5C"}
|
package/dist/processor.js
CHANGED
|
@@ -54,22 +54,36 @@ async function processRunJob(config, job) {
|
|
|
54
54
|
runId,
|
|
55
55
|
status: "running",
|
|
56
56
|
});
|
|
57
|
-
// ── Step 3: Start container
|
|
57
|
+
// ── Step 3: Start container(s) ────────────────────────────────────────
|
|
58
58
|
const dockerImage = runConfig.dockerImage || config.dockerImage;
|
|
59
59
|
if (!dockerImage) {
|
|
60
60
|
throw new Error(`No docker image: API did not return one and DOCKER_IMAGE env var is not set`);
|
|
61
61
|
}
|
|
62
|
-
|
|
62
|
+
// API-returned containerCount takes precedence over the worker env var
|
|
63
|
+
const containerCount = runConfig.containerCount ?? config.containerCount;
|
|
64
|
+
const isDistributed = !!config.crawlerRedisUrl || containerCount > 1;
|
|
65
|
+
let containerIds;
|
|
63
66
|
try {
|
|
64
|
-
|
|
65
|
-
|
|
67
|
+
if (isDistributed) {
|
|
68
|
+
if (!config.crawlerRedisUrl) {
|
|
69
|
+
throw new Error("CONTAINER_COUNT > 1 but CRAWLER_REDIS_URL is not set — containers cannot share a queue");
|
|
70
|
+
}
|
|
71
|
+
containerIds = await (0, docker_1.startContainersForRun)(config, runConfig.actorConfig, runConfig.memoryLimitMb, dockerImage, config.crawlerRedisUrl, runId, // QUEUE_NAME = runId for job isolation
|
|
72
|
+
containerCount);
|
|
73
|
+
logger_1.logger.info(`[${runId}] ${containerIds.length} containers started (distributed mode, queue: ${runId})`);
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
const id = await (0, docker_1.startContainer)(config, runConfig.actorConfig, runConfig.memoryLimitMb, dockerImage);
|
|
77
|
+
containerIds = [id];
|
|
78
|
+
logger_1.logger.info(`[${runId}] Container started: ${id.slice(0, 12)}`);
|
|
79
|
+
}
|
|
66
80
|
}
|
|
67
81
|
catch (err) {
|
|
68
|
-
logger_1.logger.error(`[${runId}] Failed to start container: ${err.message}`);
|
|
82
|
+
logger_1.logger.error(`[${runId}] Failed to start container(s): ${err.message}`);
|
|
69
83
|
await apiPatch(config.apiBaseUrl, `/api/runs/${runId}/status`, {
|
|
70
84
|
runId,
|
|
71
85
|
status: "failed",
|
|
72
|
-
error: `Failed to start container: ${err.message}`,
|
|
86
|
+
error: `Failed to start container(s): ${err.message}`,
|
|
73
87
|
});
|
|
74
88
|
throw err;
|
|
75
89
|
}
|
|
@@ -85,11 +99,13 @@ async function processRunJob(config, job) {
|
|
|
85
99
|
}
|
|
86
100
|
}, 120_000);
|
|
87
101
|
}
|
|
88
|
-
// ── Step 5: Wait for
|
|
102
|
+
// ── Step 5: Wait for all containers ──────────────────────────────────
|
|
89
103
|
let exitCode = -1;
|
|
90
104
|
try {
|
|
91
|
-
|
|
92
|
-
|
|
105
|
+
const exitCodes = await Promise.all(containerIds.map((id) => (0, docker_1.waitForContainer)(id, runConfig.actorTimeoutSecs)));
|
|
106
|
+
// Use the worst exit code — if any container failed, the run failed
|
|
107
|
+
exitCode = exitCodes.reduce((worst, code) => (code !== 0 ? code : worst), 0);
|
|
108
|
+
logger_1.logger.info(`[${runId}] All ${containerIds.length} container(s) exited — codes: [${exitCodes.join(", ")}]`);
|
|
93
109
|
}
|
|
94
110
|
finally {
|
|
95
111
|
if (lockTimer)
|
package/dist/processor.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"processor.js","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"processor.js","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":";;AA8DA,sCA0IC;AAtMD,qCAIkB;AAClB,qCAAkC;AAiBlC,KAAK,UAAU,MAAM,CAAI,UAAkB,EAAE,IAAY;IACvD,MAAM,GAAG,GAAG,GAAG,UAAU,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC;IACvD,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9C,MAAM,IAAI,KAAK,CAAC,OAAO,IAAI,WAAW,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,EAAgB,CAAC;AAClC,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,UAAkB,EAClB,IAAY,EACZ,IAA6B;IAE7B,MAAM,GAAG,GAAG,GAAG,UAAU,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC;IACvD,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC3B,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;QAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;KAC3B,CAAC,CAAC;IACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9C,eAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,WAAW,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;IAC5D,CAAC;AACH,CAAC;AAED;;;;;;;;;;GAUG;AACI,KAAK,UAAU,aAAa,CACjC,MAAoB,EACpB,GAA2B;IAE3B,MAAM,EAAE,KAAK,EAAE,GAAG,GAAG,CAAC,IAAI,CAAC;IAE3B,IAAI,CAAC,KAAK;QAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAEtD,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,oBAAoB,GAAG,CAAC,EAAE,aAAa,GAAG,CAAC,YAAY,GAAG,CAAC,GAAG,CACxE,CAAC;IAEF,0EAA0E;IAC1E,IAAI,SAA4B,CAAC;IACjC,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,MAAM,CACtB,MAAM,CAAC,UAAU,EACjB,aAAa,KAAK,SAAS,CAC5B,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,KAAK,CACV,IAAI,KAAK,iCAAkC,GAAa,CAAC,OAAO,EAAE,CACnE,CAAC;QACF,MAAM,GAAG,CAAC,CAAC,oBAAoB;IACjC,CAAC;IAED,0EAA0E;IAC1E,MAAM,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,KAAK,SAAS,EAAE;QAC7D,KAAK;QACL,MAAM,EAAE,SAAS;KAClB,CAAC,CAAC;IAEH,yEAAyE;IACzE,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;IAChE,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CACb,6EAA6E,CAC9E,CAAC;IACJ,CAAC;IAED,uEAAuE;IACvE,MAAM,cAAc,GAAG,SAAS,CAAC,cAAc,IAAI,MAAM,CAAC,cAAc,CAAC;IACzE,MAAM,aAAa,GAAG,CAAC,CAAC,MAAM,CAAC,eAAe,IAAI,cAAc,GAAG,CAAC,CAAC;IAErE,IAAI,YAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,IAAI,aAAa,EAAE,CAAC;YAClB,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CACb,wFAAwF,CACzF,CAAC;YACJ,CAAC;YACD,YAAY,GAAG,MAAM,IAAA,8BAAqB,EACxC,MAAM,EACN,SAAS,CAAC,WAAW,EACrB,SAAS,CAAC,aAAa,EACvB,WAAW,EACX,MAAM,CAAC,eAAe,EACtB,KAAK,EAAE,uCAAuC;YAC9C,cAAc,CACf,CAAC;YACF,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,KAAK,YAAY,CAAC,MAAM,iDAAiD,KAAK,GAAG,CAC3F,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,MAAM,EAAE,GAAG,MAAM,IAAA,uBAAc,EAC7B,MAAM,EACN,SAAS,CAAC,WAAW,EACrB,SAAS,CAAC,aAAa,EACvB,WAAW,CACZ,CAAC;YACF,YAAY,GAAG,CAAC,EAAE,CAAC,CAAC;YACpB,eAAM,CAAC,IAAI,CAAC,IAAI,KAAK,wBAAwB,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,KAAK,CACV,IAAI,KAAK,mCAAoC,GAAa,CAAC,OAAO,EAAE,CACrE,CAAC;QACF,MAAM,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,KAAK,SAAS,EAAE;YAC7D,KAAK;YACL,MAAM,EAAE,QAAQ;YAChB,KAAK,EAAE,iCAAkC,GAAa,CAAC,OAAO,EAAE;SACjE,CAAC,CAAC;QACH,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,2EAA2E;IAC3E,IAAI,SAAqD,CAAC;IAC1D,IAAI,GAAG,CAAC,KAAK,EAAE,CAAC;QACd,SAAS,GAAG,WAAW,CAAC,KAAK,IAAI,EAAE;YACjC,IAAI,CAAC;gBACH,MAAM,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,KAAM,EAAE,OAAO,CAAC,CAAC;YAC5C,CAAC;YAAC,OAAO,MAAM,EAAE,CAAC;gBAChB,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,4BAA6B,MAAgB,CAAC,OAAO,EAAE,CACjE,CAAC;YACJ,CAAC;QACH,CAAC,EAAE,OAAO,CAAC,CAAC;IACd,CAAC;IAED,wEAAwE;IACxE,IAAI,QAAQ,GAAG,CAAC,CAAC,CAAC;IAClB,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CACjC,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CACtB,IAAA,yBAAgB,EAAC,EAAE,EAAE,SAAS,CAAC,gBAAgB,CAAC,CACjD,CACF,CAAC;QACF,oEAAoE;QACpE,QAAQ,GAAG,SAAS,CAAC,MAAM,CACzB,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAC5C,CAAC,CACF,CAAC;QACF,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,SAAS,YAAY,CAAC,MAAM,kCAAkC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAC/F,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,IAAI,SAAS;YAAE,aAAa,CAAC,SAAS,CAAC,CAAC;IAC1C,CAAC;IAED,yEAAyE;IACzE,yEAAyE;IACzE,oEAAoE;IACpE,wEAAwE;IACxE,8DAA8D;IAC9D,MAAM,WAAW,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC;IAC5D,MAAM,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,KAAK,SAAS,EAAE;QAC7D,KAAK;QACL,MAAM,EAAE,WAAW;QACnB,QAAQ;QACR,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,GAAG,CAAC,QAAQ,KAAK,CAAC,IAAI;YACpB,KAAK,EAAE,8BAA8B,QAAQ,iCAAiC;SAC/E,CAAC;KACH,CAAC,CAAC;IAEH,eAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,GAAG,CAAC,EAAE,mBAAmB,WAAW,EAAE,CAAC,CAAC;IACtE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;AACxC,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standalone test harness — no dory-api needed.
|
|
3
|
+
*
|
|
4
|
+
* What it does:
|
|
5
|
+
* 1. Starts a mock HTTP server that emulates the two dory-api endpoints
|
|
6
|
+
* the worker uses: GET /api/runs/:id/config and POST /api/runs/:id/status
|
|
7
|
+
* 2. Pushes a real BullMQ job into local Redis
|
|
8
|
+
* 3. Starts the real worker (same code that runs in production)
|
|
9
|
+
* 4. Waits for the job to finish and reports pass / fail
|
|
10
|
+
*
|
|
11
|
+
* Docker image used: "alpine" with command override via a tiny wrapper image
|
|
12
|
+
* built on-the-fly, or just "hello-world" which exits 0 immediately.
|
|
13
|
+
*
|
|
14
|
+
* Usage:
|
|
15
|
+
* npx ts-node --project tsconfig.json src/test-harness.ts
|
|
16
|
+
*/
|
|
17
|
+
export {};
|
|
18
|
+
//# sourceMappingURL=test-harness.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-harness.d.ts","sourceRoot":"","sources":["../src/test-harness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG"}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Standalone test harness — no dory-api needed.
|
|
4
|
+
*
|
|
5
|
+
* What it does:
|
|
6
|
+
* 1. Starts a mock HTTP server that emulates the two dory-api endpoints
|
|
7
|
+
* the worker uses: GET /api/runs/:id/config and POST /api/runs/:id/status
|
|
8
|
+
* 2. Pushes a real BullMQ job into local Redis
|
|
9
|
+
* 3. Starts the real worker (same code that runs in production)
|
|
10
|
+
* 4. Waits for the job to finish and reports pass / fail
|
|
11
|
+
*
|
|
12
|
+
* Docker image used: "alpine" with command override via a tiny wrapper image
|
|
13
|
+
* built on-the-fly, or just "hello-world" which exits 0 immediately.
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
* npx ts-node --project tsconfig.json src/test-harness.ts
|
|
17
|
+
*/
|
|
18
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
19
|
+
if (k2 === undefined) k2 = k;
|
|
20
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
21
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
22
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
23
|
+
}
|
|
24
|
+
Object.defineProperty(o, k2, desc);
|
|
25
|
+
}) : (function(o, m, k, k2) {
|
|
26
|
+
if (k2 === undefined) k2 = k;
|
|
27
|
+
o[k2] = m[k];
|
|
28
|
+
}));
|
|
29
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
30
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
31
|
+
}) : function(o, v) {
|
|
32
|
+
o["default"] = v;
|
|
33
|
+
});
|
|
34
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
35
|
+
var ownKeys = function(o) {
|
|
36
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
37
|
+
var ar = [];
|
|
38
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
39
|
+
return ar;
|
|
40
|
+
};
|
|
41
|
+
return ownKeys(o);
|
|
42
|
+
};
|
|
43
|
+
return function (mod) {
|
|
44
|
+
if (mod && mod.__esModule) return mod;
|
|
45
|
+
var result = {};
|
|
46
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
47
|
+
__setModuleDefault(result, mod);
|
|
48
|
+
return result;
|
|
49
|
+
};
|
|
50
|
+
})();
|
|
51
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
52
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
53
|
+
};
|
|
54
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
55
|
+
const http = __importStar(require("http"));
|
|
56
|
+
const bullmq_1 = require("bullmq");
|
|
57
|
+
const ioredis_1 = __importDefault(require("ioredis"));
|
|
58
|
+
const processor_1 = require("./processor");
|
|
59
|
+
const logger_1 = require("./logger");
|
|
60
|
+
// ── Config ────────────────────────────────────────────────────────────────────
|
|
61
|
+
const MOCK_API_PORT = 14500;
|
|
62
|
+
const REDIS_HOST = process.env.REDIS_HOST ?? "localhost";
|
|
63
|
+
const REDIS_PORT = parseInt(process.env.REDIS_PORT ?? "6379", 10);
|
|
64
|
+
const RUN_ID = `test-run-${Date.now()}`;
|
|
65
|
+
// Dedicated test queue — avoids interference from any running dory-api workers
|
|
66
|
+
// which consume from the real "run-execution" queue.
|
|
67
|
+
const QUEUE_NAME = "dory-worker-test";
|
|
68
|
+
// "dory-actor:local" is already pulled on this machine and exits quickly
|
|
69
|
+
// when given an empty/invalid config (no startUrls → no work → exits 0).
|
|
70
|
+
// Override with TEST_DOCKER_IMAGE env var to test a different image.
|
|
71
|
+
const TEST_DOCKER_IMAGE = process.env.TEST_DOCKER_IMAGE ?? "dory-worker-test:fake";
|
|
72
|
+
const statusUpdates = [];
|
|
73
|
+
let configRequests = 0;
|
|
74
|
+
function startMockApi() {
|
|
75
|
+
const actorConfig = {
|
|
76
|
+
jobId: `job-${RUN_ID}`,
|
|
77
|
+
runId: RUN_ID,
|
|
78
|
+
actorInstanceId: "test-actor-001",
|
|
79
|
+
userInput: {
|
|
80
|
+
crawlerType: "cheerio",
|
|
81
|
+
handlers: [],
|
|
82
|
+
startUrls: ["https://example.com"],
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
const server = http.createServer((req, res) => {
|
|
86
|
+
const url = req.url ?? "";
|
|
87
|
+
logger_1.logger.debug(`[mock-api] ${req.method} ${url}`);
|
|
88
|
+
if (req.method === "GET" && url.includes("/config")) {
|
|
89
|
+
configRequests++;
|
|
90
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
91
|
+
res.end(JSON.stringify({
|
|
92
|
+
runId: RUN_ID,
|
|
93
|
+
actorConfig,
|
|
94
|
+
memoryLimitMb: 256,
|
|
95
|
+
actorTimeoutSecs: 60,
|
|
96
|
+
dockerImage: TEST_DOCKER_IMAGE,
|
|
97
|
+
}));
|
|
98
|
+
}
|
|
99
|
+
else if (req.method === "POST" && url.includes("/status")) {
|
|
100
|
+
let body = "";
|
|
101
|
+
req.on("data", (chunk) => (body += chunk));
|
|
102
|
+
req.on("end", () => {
|
|
103
|
+
const data = JSON.parse(body || "{}");
|
|
104
|
+
statusUpdates.push(data);
|
|
105
|
+
logger_1.logger.info(`[mock-api] status callback → status=${data.status}${data.exitCode !== undefined ? ` exitCode=${data.exitCode}` : ""}`);
|
|
106
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
107
|
+
res.end(JSON.stringify({ message: "ok" }));
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
res.writeHead(404);
|
|
112
|
+
res.end();
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
return new Promise((resolve) => {
|
|
116
|
+
server.listen(MOCK_API_PORT, "0.0.0.0", () => {
|
|
117
|
+
logger_1.logger.info(`[mock-api] Listening on :${MOCK_API_PORT}`);
|
|
118
|
+
resolve(server);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
// ── Main ─────────────────────────────────────────────────────────────────────
|
|
123
|
+
async function sleep(ms) {
|
|
124
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
125
|
+
}
|
|
126
|
+
async function main() {
|
|
127
|
+
(0, logger_1.setLogLevel)("debug");
|
|
128
|
+
logger_1.logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
129
|
+
logger_1.logger.info(" dory-worker standalone test harness");
|
|
130
|
+
logger_1.logger.info(` Run ID : ${RUN_ID}`);
|
|
131
|
+
logger_1.logger.info(` Test image : ${TEST_DOCKER_IMAGE}`);
|
|
132
|
+
logger_1.logger.info(` Redis : ${REDIS_HOST}:${REDIS_PORT}`);
|
|
133
|
+
logger_1.logger.info(` Mock API : http://localhost:${MOCK_API_PORT}`);
|
|
134
|
+
logger_1.logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
135
|
+
// ── 1. Start mock API ─────────────────────────────────────────────────
|
|
136
|
+
const server = await startMockApi();
|
|
137
|
+
// ── 2. Connect to Redis ───────────────────────────────────────────────
|
|
138
|
+
const redis = new ioredis_1.default({
|
|
139
|
+
host: REDIS_HOST,
|
|
140
|
+
port: REDIS_PORT,
|
|
141
|
+
maxRetriesPerRequest: null,
|
|
142
|
+
});
|
|
143
|
+
// ── 3. Push test job to BullMQ queue ──────────────────────────────────
|
|
144
|
+
const queue = new bullmq_1.Queue(QUEUE_NAME, { connection: redis });
|
|
145
|
+
const job = await queue.add("execute-run", { runId: RUN_ID });
|
|
146
|
+
logger_1.logger.info(`[test] Job enqueued → id=${job.id}`);
|
|
147
|
+
// ── 4. Build worker config pointing at mock API ───────────────────────
|
|
148
|
+
const config = {
|
|
149
|
+
redisHost: REDIS_HOST,
|
|
150
|
+
redisPort: REDIS_PORT,
|
|
151
|
+
apiBaseUrl: `http://localhost:${MOCK_API_PORT}`,
|
|
152
|
+
maxConcurrentRuns: 1,
|
|
153
|
+
workerId: "test-worker",
|
|
154
|
+
logLevel: "debug",
|
|
155
|
+
// GCS pass-through not needed for hello-world test
|
|
156
|
+
gcsBucket: undefined,
|
|
157
|
+
gcpProjectId: undefined,
|
|
158
|
+
dockerImage: TEST_DOCKER_IMAGE,
|
|
159
|
+
containerCount: 1,
|
|
160
|
+
};
|
|
161
|
+
// ── 5. Start worker ───────────────────────────────────────────────────
|
|
162
|
+
logger_1.logger.info("[test] Starting worker...");
|
|
163
|
+
let jobCompleted = false;
|
|
164
|
+
let jobFailed = false;
|
|
165
|
+
const worker = new bullmq_1.Worker(QUEUE_NAME, async (j) => (0, processor_1.processRunJob)(config, j), {
|
|
166
|
+
connection: redis,
|
|
167
|
+
concurrency: 1,
|
|
168
|
+
lockDuration: 300_000,
|
|
169
|
+
});
|
|
170
|
+
worker.on("completed", (j) => {
|
|
171
|
+
logger_1.logger.info(`[test] ✓ Worker completed job ${j.id}`);
|
|
172
|
+
jobCompleted = true;
|
|
173
|
+
});
|
|
174
|
+
worker.on("failed", (j, err) => {
|
|
175
|
+
logger_1.logger.error(`[test] ✗ Worker failed job ${j?.id}: ${err.message}`);
|
|
176
|
+
jobFailed = true;
|
|
177
|
+
});
|
|
178
|
+
// ── 6. Wait for result (max 90s) ──────────────────────────────────────
|
|
179
|
+
const deadline = Date.now() + 90_000;
|
|
180
|
+
while (!jobCompleted && !jobFailed && Date.now() < deadline) {
|
|
181
|
+
await sleep(500);
|
|
182
|
+
}
|
|
183
|
+
// ── 7. Report ─────────────────────────────────────────────────────────
|
|
184
|
+
logger_1.logger.info("");
|
|
185
|
+
logger_1.logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
186
|
+
logger_1.logger.info(" TEST RESULTS");
|
|
187
|
+
logger_1.logger.info(` Config requests : ${configRequests} (expected 1)`);
|
|
188
|
+
logger_1.logger.info(` Status callbacks : ${statusUpdates.length} (expected ≥1)`);
|
|
189
|
+
statusUpdates.forEach((u, i) => logger_1.logger.info(` [${i + 1}] status=${u.status}${u.exitCode !== undefined ? ` exitCode=${u.exitCode}` : ""}`));
|
|
190
|
+
logger_1.logger.info(` Job completed : ${jobCompleted}`);
|
|
191
|
+
logger_1.logger.info(` Job failed : ${jobFailed}`);
|
|
192
|
+
const passed = jobCompleted &&
|
|
193
|
+
!jobFailed &&
|
|
194
|
+
configRequests === 1 &&
|
|
195
|
+
statusUpdates.length >= 1 &&
|
|
196
|
+
statusUpdates.some((u) => ["running", "completed"].includes(u.status));
|
|
197
|
+
logger_1.logger.info("");
|
|
198
|
+
logger_1.logger.info(passed ? " ✓ ALL CHECKS PASSED" : " ✗ SOME CHECKS FAILED");
|
|
199
|
+
logger_1.logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
200
|
+
// ── Cleanup ───────────────────────────────────────────────────────────
|
|
201
|
+
await worker.close();
|
|
202
|
+
await queue.close();
|
|
203
|
+
await redis.quit();
|
|
204
|
+
server.close();
|
|
205
|
+
process.exit(passed ? 0 : 1);
|
|
206
|
+
}
|
|
207
|
+
main().catch((err) => {
|
|
208
|
+
console.error("[test-harness] Fatal:", err);
|
|
209
|
+
process.exit(1);
|
|
210
|
+
});
|
|
211
|
+
//# sourceMappingURL=test-harness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"test-harness.js","sourceRoot":"","sources":["../src/test-harness.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;GAeG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,2CAA6B;AAC7B,mCAAuC;AACvC,sDAA8B;AAE9B,2CAA4C;AAC5C,qCAA+C;AAE/C,iFAAiF;AAEjF,MAAM,aAAa,GAAG,KAAK,CAAC;AAC5B,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,WAAW,CAAC;AACzD,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,EAAE,CAAC,CAAC;AAClE,MAAM,MAAM,GAAG,YAAY,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;AACxC,+EAA+E;AAC/E,qDAAqD;AACrD,MAAM,UAAU,GAAG,kBAAkB,CAAC;AAEtC,yEAAyE;AACzE,yEAAyE;AACzE,qEAAqE;AACrE,MAAM,iBAAiB,GACrB,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,uBAAuB,CAAC;AAU3D,MAAM,aAAa,GAAmB,EAAE,CAAC;AACzC,IAAI,cAAc,GAAG,CAAC,CAAC;AAEvB,SAAS,YAAY;IACnB,MAAM,WAAW,GAAG;QAClB,KAAK,EAAE,OAAO,MAAM,EAAE;QACtB,KAAK,EAAE,MAAM;QACb,eAAe,EAAE,gBAAgB;QACjC,SAAS,EAAE;YACT,WAAW,EAAE,SAAS;YACtB,QAAQ,EAAE,EAAE;YACZ,SAAS,EAAE,CAAC,qBAAqB,CAAC;SACnC;KACF,CAAC;IAEF,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QAC5C,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC;QAC1B,eAAM,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC,CAAC;QAEhD,IAAI,GAAG,CAAC,MAAM,KAAK,KAAK,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACpD,cAAc,EAAE,CAAC;YACjB,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;YAC3D,GAAG,CAAC,GAAG,CACL,IAAI,CAAC,SAAS,CAAC;gBACb,KAAK,EAAE,MAAM;gBACb,WAAW;gBACX,aAAa,EAAE,GAAG;gBAClB,gBAAgB,EAAE,EAAE;gBACpB,WAAW,EAAE,iBAAiB;aAC/B,CAAC,CACH,CAAC;QACJ,CAAC;aAAM,IAAI,GAAG,CAAC,MAAM,KAAK,MAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5D,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC;YAC3C,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;gBACjB,MAAM,IAAI,GAAiB,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC;gBACpD,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACzB,eAAM,CAAC,IAAI,CACT,uCAAuC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CACvH,CAAC;gBACF,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;gBAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7C,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACnB,GAAG,CAAC,GAAG,EAAE,CAAC;QACZ,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,CAAC,MAAM,CAAC,aAAa,EAAE,SAAS,EAAE,GAAG,EAAE;YAC3C,eAAM,CAAC,IAAI,CAAC,4BAA4B,aAAa,EAAE,CAAC,CAAC;YACzD,OAAO,CAAC,MAAM,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,gFAAgF;AAEhF,KAAK,UAAU,KAAK,CAAC,EAAU;IAC7B,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,IAAA,oBAAW,EAAC,OAAO,CAAC,CAAC;IAErB,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAC5D,eAAM,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;IACrD,eAAM,CAAC,IAAI,CAAC,mBAAmB,MAAM,EAAE,CAAC,CAAC;IACzC,eAAM,CAAC,IAAI,CAAC,mBAAmB,iBAAiB,EAAE,CAAC,CAAC;IACpD,eAAM,CAAC,IAAI,CAAC,mBAAmB,UAAU,IAAI,UAAU,EAAE,CAAC,CAAC;IAC3D,eAAM,CAAC,IAAI,CAAC,oCAAoC,aAAa,EAAE,CAAC,CAAC;IACjE,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAE5D,yEAAyE;IACzE,MAAM,MAAM,GAAG,MAAM,YAAY,EAAE,CAAC;IAEpC,yEAAyE;IACzE,MAAM,KAAK,GAAG,IAAI,iBAAO,CAAC;QACxB,IAAI,EAAE,UAAU;QAChB,IAAI,EAAE,UAAU;QAChB,oBAAoB,EAAE,IAAI;KAC3B,CAAC,CAAC;IAEH,yEAAyE;IACzE,MAAM,KAAK,GAAG,IAAI,cAAK,CAAC,UAAU,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3D,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,aAAa,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IAC9D,eAAM,CAAC,IAAI,CAAC,4BAA4B,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;IAElD,yEAAyE;IACzE,MAAM,MAAM,GAAiB;QAC3B,SAAS,EAAE,UAAU;QACrB,SAAS,EAAE,UAAU;QACrB,UAAU,EAAE,oBAAoB,aAAa,EAAE;QAC/C,iBAAiB,EAAE,CAAC;QACpB,QAAQ,EAAE,aAAa;QACvB,QAAQ,EAAE,OAAO;QACjB,mDAAmD;QACnD,SAAS,EAAE,SAAS;QACpB,YAAY,EAAE,SAAS;QACvB,WAAW,EAAE,iBAAiB;QAC9B,cAAc,EAAE,CAAC;KAClB,CAAC;IAEF,yEAAyE;IACzE,eAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAEzC,IAAI,YAAY,GAAG,KAAK,CAAC;IACzB,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,MAAM,MAAM,GAAG,IAAI,eAAM,CAAC,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,IAAA,yBAAa,EAAC,MAAM,EAAE,CAAC,CAAC,EAAE;QAC3E,UAAU,EAAE,KAAK;QACjB,WAAW,EAAE,CAAC;QACd,YAAY,EAAE,OAAO;KACtB,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE;QAC3B,eAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrD,YAAY,GAAG,IAAI,CAAC;IACtB,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QAC7B,eAAM,CAAC,KAAK,CAAC,8BAA8B,CAAC,EAAE,EAAE,KAAK,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACpE,SAAS,GAAG,IAAI,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,yEAAyE;IACzE,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC;IACrC,OAAO,CAAC,YAAY,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAC;QAC5D,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,yEAAyE;IACzE,eAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAC5D,eAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC9B,eAAM,CAAC,IAAI,CAAC,0BAA0B,cAAc,eAAe,CAAC,CAAC;IACrE,eAAM,CAAC,IAAI,CAAC,0BAA0B,aAAa,CAAC,MAAM,gBAAgB,CAAC,CAAC;IAC5E,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAC7B,eAAM,CAAC,IAAI,CACT,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAChG,CACF,CAAC;IACF,eAAM,CAAC,IAAI,CAAC,0BAA0B,YAAY,EAAE,CAAC,CAAC;IACtD,eAAM,CAAC,IAAI,CAAC,0BAA0B,SAAS,EAAE,CAAC,CAAC;IAEnD,MAAM,MAAM,GACV,YAAY;QACZ,CAAC,SAAS;QACV,cAAc,KAAK,CAAC;QACpB,aAAa,CAAC,MAAM,IAAI,CAAC;QACzB,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAEzE,eAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,eAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC;IACzE,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAE5D,yEAAyE;IACzE,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACrB,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC;IACpB,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACnB,MAAM,CAAC,KAAK,EAAE,CAAC;IAEf,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC/B,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,GAAG,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nikx/dory-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"description": "Standalone BullMQ worker for Dory – runs on any machine with Docker (including Raspberry Pi)",
|
|
5
5
|
"main": "dist/cli.js",
|
|
6
6
|
"bin": {
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
"build": "tsc",
|
|
11
11
|
"start": "node dist/cli.js",
|
|
12
12
|
"dev": "npx ts-node --project tsconfig.json src/cli.ts",
|
|
13
|
+
"test": "npx ts-node --project tsconfig.json src/test-harness.ts",
|
|
13
14
|
"prepare": "npm run build"
|
|
14
15
|
},
|
|
15
16
|
"keywords": [
|
package/src/config.ts
CHANGED
|
@@ -20,6 +20,14 @@ export interface WorkerConfig {
|
|
|
20
20
|
// Fallback docker image if the API doesn't return one in /config
|
|
21
21
|
dockerImage?: string;
|
|
22
22
|
|
|
23
|
+
// Distributed crawling — separate Redis for the per-run crawler queue.
|
|
24
|
+
// When set, the worker spawns `containerCount` containers per job and
|
|
25
|
+
// wires them all to the same Redis queue (QUEUE_NAME = runId).
|
|
26
|
+
crawlerRedisUrl?: string;
|
|
27
|
+
|
|
28
|
+
// How many containers to spawn per job (default 1 = single-container mode).
|
|
29
|
+
containerCount: number;
|
|
30
|
+
|
|
23
31
|
// Worker behaviour
|
|
24
32
|
maxConcurrentRuns: number;
|
|
25
33
|
workerId: string;
|
|
@@ -57,6 +65,9 @@ export function loadConfig(): WorkerConfig {
|
|
|
57
65
|
|
|
58
66
|
dockerImage: process.env.DOCKER_IMAGE,
|
|
59
67
|
|
|
68
|
+
crawlerRedisUrl: process.env.CRAWLER_REDIS_URL,
|
|
69
|
+
containerCount: parseInt(process.env.CONTAINER_COUNT ?? "1", 10),
|
|
70
|
+
|
|
60
71
|
maxConcurrentRuns: parseInt(process.env.MAX_CONCURRENT_RUNS ?? "2", 10),
|
|
61
72
|
workerId: process.env.WORKER_ID ?? `dory-worker-${process.pid}`,
|
|
62
73
|
logLevel: process.env.LOG_LEVEL ?? "info",
|
package/src/docker.ts
CHANGED
|
@@ -12,6 +12,16 @@ interface ActorConfigEnvelope {
|
|
|
12
12
|
userInput: Record<string, unknown>;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
/** Extra env vars injected when running in distributed (multi-container) mode. */
|
|
16
|
+
interface DistributedOpts {
|
|
17
|
+
/** Full Redis URL for the per-run crawler queue, e.g. redis://host:6379 */
|
|
18
|
+
crawlerRedisUrl: string;
|
|
19
|
+
/** Redis key namespace — typically the runId */
|
|
20
|
+
queueName: string;
|
|
21
|
+
/** Human-readable container label, e.g. "worker-1" */
|
|
22
|
+
workerId: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
15
25
|
/**
|
|
16
26
|
* Rewrite localhost URLs to host.docker.internal so containers can reach
|
|
17
27
|
* the host machine. Public URLs (Railway, GCS, etc.) are left unchanged.
|
|
@@ -26,6 +36,7 @@ function buildDockerEnvArgs(
|
|
|
26
36
|
config: WorkerConfig,
|
|
27
37
|
actorConfig: ActorConfigEnvelope,
|
|
28
38
|
memoryLimit: number,
|
|
39
|
+
distributed?: DistributedOpts,
|
|
29
40
|
): string[] {
|
|
30
41
|
const configBase64 = Buffer.from(JSON.stringify(actorConfig)).toString(
|
|
31
42
|
"base64",
|
|
@@ -47,6 +58,13 @@ function buildDockerEnvArgs(
|
|
|
47
58
|
);
|
|
48
59
|
}
|
|
49
60
|
|
|
61
|
+
// Distributed mode — wire the container to the shared crawler Redis queue
|
|
62
|
+
if (distributed) {
|
|
63
|
+
args.push(`-e REDIS_URL=${distributed.crawlerRedisUrl}`);
|
|
64
|
+
args.push(`-e QUEUE_NAME=${distributed.queueName}`);
|
|
65
|
+
args.push(`-e WORKER_ID=${distributed.workerId}`);
|
|
66
|
+
}
|
|
67
|
+
|
|
50
68
|
return args;
|
|
51
69
|
}
|
|
52
70
|
|
|
@@ -62,10 +80,14 @@ export async function startContainer(
|
|
|
62
80
|
actorConfig: ActorConfigEnvelope,
|
|
63
81
|
memoryLimit: number,
|
|
64
82
|
dockerImage: string,
|
|
83
|
+
distributed?: DistributedOpts,
|
|
65
84
|
): Promise<string> {
|
|
66
|
-
const envArgs = buildDockerEnvArgs(
|
|
67
|
-
|
|
68
|
-
|
|
85
|
+
const envArgs = buildDockerEnvArgs(
|
|
86
|
+
config,
|
|
87
|
+
actorConfig,
|
|
88
|
+
memoryLimit,
|
|
89
|
+
distributed,
|
|
90
|
+
).join(" ");
|
|
69
91
|
const memoryStr = `${memoryLimit}m`;
|
|
70
92
|
|
|
71
93
|
const cmd = [
|
|
@@ -81,6 +103,44 @@ export async function startContainer(
|
|
|
81
103
|
return stdout.trim();
|
|
82
104
|
}
|
|
83
105
|
|
|
106
|
+
/**
|
|
107
|
+
* Spawn `count` containers for a distributed run.
|
|
108
|
+
* All containers share the same Redis queue (`queueName`) so they
|
|
109
|
+
* collaborate on the same crawl without duplicating work.
|
|
110
|
+
*
|
|
111
|
+
* Returns the list of container IDs in order (worker-1 … worker-N).
|
|
112
|
+
*/
|
|
113
|
+
export async function startContainersForRun(
|
|
114
|
+
config: WorkerConfig,
|
|
115
|
+
actorConfig: ActorConfigEnvelope,
|
|
116
|
+
memoryLimit: number,
|
|
117
|
+
dockerImage: string,
|
|
118
|
+
crawlerRedisUrl: string,
|
|
119
|
+
queueName: string,
|
|
120
|
+
count: number,
|
|
121
|
+
): Promise<string[]> {
|
|
122
|
+
const containerIds: string[] = [];
|
|
123
|
+
for (let i = 1; i <= count; i++) {
|
|
124
|
+
const workerId = `worker-${i}`;
|
|
125
|
+
const id = await startContainer(
|
|
126
|
+
config,
|
|
127
|
+
actorConfig,
|
|
128
|
+
memoryLimit,
|
|
129
|
+
dockerImage,
|
|
130
|
+
{
|
|
131
|
+
crawlerRedisUrl,
|
|
132
|
+
queueName,
|
|
133
|
+
workerId,
|
|
134
|
+
},
|
|
135
|
+
);
|
|
136
|
+
logger.info(
|
|
137
|
+
`[${queueName}] ${workerId} container started: ${id.slice(0, 12)}`,
|
|
138
|
+
);
|
|
139
|
+
containerIds.push(id);
|
|
140
|
+
}
|
|
141
|
+
return containerIds;
|
|
142
|
+
}
|
|
143
|
+
|
|
84
144
|
/**
|
|
85
145
|
* Block until the container exits and return its exit code.
|
|
86
146
|
* If the container exceeds `timeoutSecs`, it is force-killed.
|
package/src/processor.ts
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import type { Job } from "bullmq";
|
|
2
2
|
import type { WorkerConfig } from "./config";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
startContainer,
|
|
5
|
+
startContainersForRun,
|
|
6
|
+
waitForContainer,
|
|
7
|
+
} from "./docker";
|
|
4
8
|
import { logger } from "./logger";
|
|
5
9
|
|
|
6
10
|
interface RunConfigResponse {
|
|
@@ -14,6 +18,8 @@ interface RunConfigResponse {
|
|
|
14
18
|
memoryLimitMb: number;
|
|
15
19
|
actorTimeoutSecs: number;
|
|
16
20
|
dockerImage: string;
|
|
21
|
+
/** Per-actor parallelism set in dory-api. Falls back to CONTAINER_COUNT env var (default 1). */
|
|
22
|
+
containerCount?: number;
|
|
17
23
|
}
|
|
18
24
|
|
|
19
25
|
async function apiGet<T>(apiBaseUrl: string, path: string): Promise<T> {
|
|
@@ -86,30 +92,56 @@ export async function processRunJob(
|
|
|
86
92
|
status: "running",
|
|
87
93
|
});
|
|
88
94
|
|
|
89
|
-
// ── Step 3: Start container
|
|
95
|
+
// ── Step 3: Start container(s) ────────────────────────────────────────
|
|
90
96
|
const dockerImage = runConfig.dockerImage || config.dockerImage;
|
|
91
97
|
if (!dockerImage) {
|
|
92
98
|
throw new Error(
|
|
93
99
|
`No docker image: API did not return one and DOCKER_IMAGE env var is not set`,
|
|
94
100
|
);
|
|
95
101
|
}
|
|
96
|
-
|
|
102
|
+
|
|
103
|
+
// API-returned containerCount takes precedence over the worker env var
|
|
104
|
+
const containerCount = runConfig.containerCount ?? config.containerCount;
|
|
105
|
+
const isDistributed = !!config.crawlerRedisUrl || containerCount > 1;
|
|
106
|
+
|
|
107
|
+
let containerIds: string[];
|
|
97
108
|
try {
|
|
98
|
-
|
|
99
|
-
config
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
109
|
+
if (isDistributed) {
|
|
110
|
+
if (!config.crawlerRedisUrl) {
|
|
111
|
+
throw new Error(
|
|
112
|
+
"CONTAINER_COUNT > 1 but CRAWLER_REDIS_URL is not set — containers cannot share a queue",
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
containerIds = await startContainersForRun(
|
|
116
|
+
config,
|
|
117
|
+
runConfig.actorConfig,
|
|
118
|
+
runConfig.memoryLimitMb,
|
|
119
|
+
dockerImage,
|
|
120
|
+
config.crawlerRedisUrl,
|
|
121
|
+
runId, // QUEUE_NAME = runId for job isolation
|
|
122
|
+
containerCount,
|
|
123
|
+
);
|
|
124
|
+
logger.info(
|
|
125
|
+
`[${runId}] ${containerIds.length} containers started (distributed mode, queue: ${runId})`,
|
|
126
|
+
);
|
|
127
|
+
} else {
|
|
128
|
+
const id = await startContainer(
|
|
129
|
+
config,
|
|
130
|
+
runConfig.actorConfig,
|
|
131
|
+
runConfig.memoryLimitMb,
|
|
132
|
+
dockerImage,
|
|
133
|
+
);
|
|
134
|
+
containerIds = [id];
|
|
135
|
+
logger.info(`[${runId}] Container started: ${id.slice(0, 12)}`);
|
|
136
|
+
}
|
|
105
137
|
} catch (err) {
|
|
106
138
|
logger.error(
|
|
107
|
-
`[${runId}] Failed to start container: ${(err as Error).message}`,
|
|
139
|
+
`[${runId}] Failed to start container(s): ${(err as Error).message}`,
|
|
108
140
|
);
|
|
109
141
|
await apiPatch(config.apiBaseUrl, `/api/runs/${runId}/status`, {
|
|
110
142
|
runId,
|
|
111
143
|
status: "failed",
|
|
112
|
-
error: `Failed to start container: ${(err as Error).message}`,
|
|
144
|
+
error: `Failed to start container(s): ${(err as Error).message}`,
|
|
113
145
|
});
|
|
114
146
|
throw err;
|
|
115
147
|
}
|
|
@@ -128,12 +160,21 @@ export async function processRunJob(
|
|
|
128
160
|
}, 120_000);
|
|
129
161
|
}
|
|
130
162
|
|
|
131
|
-
// ── Step 5: Wait for
|
|
163
|
+
// ── Step 5: Wait for all containers ──────────────────────────────────
|
|
132
164
|
let exitCode = -1;
|
|
133
165
|
try {
|
|
134
|
-
|
|
166
|
+
const exitCodes = await Promise.all(
|
|
167
|
+
containerIds.map((id) =>
|
|
168
|
+
waitForContainer(id, runConfig.actorTimeoutSecs),
|
|
169
|
+
),
|
|
170
|
+
);
|
|
171
|
+
// Use the worst exit code — if any container failed, the run failed
|
|
172
|
+
exitCode = exitCodes.reduce(
|
|
173
|
+
(worst, code) => (code !== 0 ? code : worst),
|
|
174
|
+
0,
|
|
175
|
+
);
|
|
135
176
|
logger.info(
|
|
136
|
-
`[${runId}]
|
|
177
|
+
`[${runId}] All ${containerIds.length} container(s) exited — codes: [${exitCodes.join(", ")}]`,
|
|
137
178
|
);
|
|
138
179
|
} finally {
|
|
139
180
|
if (lockTimer) clearInterval(lockTimer);
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standalone test harness — no dory-api needed.
|
|
3
|
+
*
|
|
4
|
+
* What it does:
|
|
5
|
+
* 1. Starts a mock HTTP server that emulates the two dory-api endpoints
|
|
6
|
+
* the worker uses: GET /api/runs/:id/config and POST /api/runs/:id/status
|
|
7
|
+
* 2. Pushes a real BullMQ job into local Redis
|
|
8
|
+
* 3. Starts the real worker (same code that runs in production)
|
|
9
|
+
* 4. Waits for the job to finish and reports pass / fail
|
|
10
|
+
*
|
|
11
|
+
* Docker image used: "alpine" with command override via a tiny wrapper image
|
|
12
|
+
* built on-the-fly, or just "hello-world" which exits 0 immediately.
|
|
13
|
+
*
|
|
14
|
+
* Usage:
|
|
15
|
+
* npx ts-node --project tsconfig.json src/test-harness.ts
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import * as http from "http";
|
|
19
|
+
import { Queue, Worker } from "bullmq";
|
|
20
|
+
import IORedis from "ioredis";
|
|
21
|
+
import type { WorkerConfig } from "./config";
|
|
22
|
+
import { processRunJob } from "./processor";
|
|
23
|
+
import { logger, setLogLevel } from "./logger";
|
|
24
|
+
|
|
25
|
+
// ── Config ────────────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
const MOCK_API_PORT = 14500;
|
|
28
|
+
const REDIS_HOST = process.env.REDIS_HOST ?? "localhost";
|
|
29
|
+
const REDIS_PORT = parseInt(process.env.REDIS_PORT ?? "6379", 10);
|
|
30
|
+
const RUN_ID = `test-run-${Date.now()}`;
|
|
31
|
+
// Dedicated test queue — avoids interference from any running dory-api workers
|
|
32
|
+
// which consume from the real "run-execution" queue.
|
|
33
|
+
const QUEUE_NAME = "dory-worker-test";
|
|
34
|
+
|
|
35
|
+
// "dory-actor:local" is already pulled on this machine and exits quickly
|
|
36
|
+
// when given an empty/invalid config (no startUrls → no work → exits 0).
|
|
37
|
+
// Override with TEST_DOCKER_IMAGE env var to test a different image.
|
|
38
|
+
const TEST_DOCKER_IMAGE =
|
|
39
|
+
process.env.TEST_DOCKER_IMAGE ?? "dory-worker-test:fake";
|
|
40
|
+
|
|
41
|
+
// ── Mock API server ───────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
interface StatusUpdate {
|
|
44
|
+
status: string;
|
|
45
|
+
exitCode?: number;
|
|
46
|
+
error?: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const statusUpdates: StatusUpdate[] = [];
|
|
50
|
+
let configRequests = 0;
|
|
51
|
+
|
|
52
|
+
function startMockApi(): Promise<http.Server> {
|
|
53
|
+
const actorConfig = {
|
|
54
|
+
jobId: `job-${RUN_ID}`,
|
|
55
|
+
runId: RUN_ID,
|
|
56
|
+
actorInstanceId: "test-actor-001",
|
|
57
|
+
userInput: {
|
|
58
|
+
crawlerType: "cheerio",
|
|
59
|
+
handlers: [],
|
|
60
|
+
startUrls: ["https://example.com"],
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
const server = http.createServer((req, res) => {
|
|
65
|
+
const url = req.url ?? "";
|
|
66
|
+
logger.debug(`[mock-api] ${req.method} ${url}`);
|
|
67
|
+
|
|
68
|
+
if (req.method === "GET" && url.includes("/config")) {
|
|
69
|
+
configRequests++;
|
|
70
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
71
|
+
res.end(
|
|
72
|
+
JSON.stringify({
|
|
73
|
+
runId: RUN_ID,
|
|
74
|
+
actorConfig,
|
|
75
|
+
memoryLimitMb: 256,
|
|
76
|
+
actorTimeoutSecs: 60,
|
|
77
|
+
dockerImage: TEST_DOCKER_IMAGE,
|
|
78
|
+
}),
|
|
79
|
+
);
|
|
80
|
+
} else if (req.method === "POST" && url.includes("/status")) {
|
|
81
|
+
let body = "";
|
|
82
|
+
req.on("data", (chunk) => (body += chunk));
|
|
83
|
+
req.on("end", () => {
|
|
84
|
+
const data: StatusUpdate = JSON.parse(body || "{}");
|
|
85
|
+
statusUpdates.push(data);
|
|
86
|
+
logger.info(
|
|
87
|
+
`[mock-api] status callback → status=${data.status}${data.exitCode !== undefined ? ` exitCode=${data.exitCode}` : ""}`,
|
|
88
|
+
);
|
|
89
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
90
|
+
res.end(JSON.stringify({ message: "ok" }));
|
|
91
|
+
});
|
|
92
|
+
} else {
|
|
93
|
+
res.writeHead(404);
|
|
94
|
+
res.end();
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
return new Promise((resolve) => {
|
|
99
|
+
server.listen(MOCK_API_PORT, "0.0.0.0", () => {
|
|
100
|
+
logger.info(`[mock-api] Listening on :${MOCK_API_PORT}`);
|
|
101
|
+
resolve(server);
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── Main ─────────────────────────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
async function sleep(ms: number): Promise<void> {
|
|
109
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async function main(): Promise<void> {
|
|
113
|
+
setLogLevel("debug");
|
|
114
|
+
|
|
115
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
116
|
+
logger.info(" dory-worker standalone test harness");
|
|
117
|
+
logger.info(` Run ID : ${RUN_ID}`);
|
|
118
|
+
logger.info(` Test image : ${TEST_DOCKER_IMAGE}`);
|
|
119
|
+
logger.info(` Redis : ${REDIS_HOST}:${REDIS_PORT}`);
|
|
120
|
+
logger.info(` Mock API : http://localhost:${MOCK_API_PORT}`);
|
|
121
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
122
|
+
|
|
123
|
+
// ── 1. Start mock API ─────────────────────────────────────────────────
|
|
124
|
+
const server = await startMockApi();
|
|
125
|
+
|
|
126
|
+
// ── 2. Connect to Redis ───────────────────────────────────────────────
|
|
127
|
+
const redis = new IORedis({
|
|
128
|
+
host: REDIS_HOST,
|
|
129
|
+
port: REDIS_PORT,
|
|
130
|
+
maxRetriesPerRequest: null,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// ── 3. Push test job to BullMQ queue ──────────────────────────────────
|
|
134
|
+
const queue = new Queue(QUEUE_NAME, { connection: redis });
|
|
135
|
+
const job = await queue.add("execute-run", { runId: RUN_ID });
|
|
136
|
+
logger.info(`[test] Job enqueued → id=${job.id}`);
|
|
137
|
+
|
|
138
|
+
// ── 4. Build worker config pointing at mock API ───────────────────────
|
|
139
|
+
const config: WorkerConfig = {
|
|
140
|
+
redisHost: REDIS_HOST,
|
|
141
|
+
redisPort: REDIS_PORT,
|
|
142
|
+
apiBaseUrl: `http://localhost:${MOCK_API_PORT}`,
|
|
143
|
+
maxConcurrentRuns: 1,
|
|
144
|
+
workerId: "test-worker",
|
|
145
|
+
logLevel: "debug",
|
|
146
|
+
// GCS pass-through not needed for hello-world test
|
|
147
|
+
gcsBucket: undefined,
|
|
148
|
+
gcpProjectId: undefined,
|
|
149
|
+
dockerImage: TEST_DOCKER_IMAGE,
|
|
150
|
+
containerCount: 1,
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
// ── 5. Start worker ───────────────────────────────────────────────────
|
|
154
|
+
logger.info("[test] Starting worker...");
|
|
155
|
+
|
|
156
|
+
let jobCompleted = false;
|
|
157
|
+
let jobFailed = false;
|
|
158
|
+
|
|
159
|
+
const worker = new Worker(QUEUE_NAME, async (j) => processRunJob(config, j), {
|
|
160
|
+
connection: redis,
|
|
161
|
+
concurrency: 1,
|
|
162
|
+
lockDuration: 300_000,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
worker.on("completed", (j) => {
|
|
166
|
+
logger.info(`[test] ✓ Worker completed job ${j.id}`);
|
|
167
|
+
jobCompleted = true;
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
worker.on("failed", (j, err) => {
|
|
171
|
+
logger.error(`[test] ✗ Worker failed job ${j?.id}: ${err.message}`);
|
|
172
|
+
jobFailed = true;
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// ── 6. Wait for result (max 90s) ──────────────────────────────────────
|
|
176
|
+
const deadline = Date.now() + 90_000;
|
|
177
|
+
while (!jobCompleted && !jobFailed && Date.now() < deadline) {
|
|
178
|
+
await sleep(500);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// ── 7. Report ─────────────────────────────────────────────────────────
|
|
182
|
+
logger.info("");
|
|
183
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
184
|
+
logger.info(" TEST RESULTS");
|
|
185
|
+
logger.info(` Config requests : ${configRequests} (expected 1)`);
|
|
186
|
+
logger.info(` Status callbacks : ${statusUpdates.length} (expected ≥1)`);
|
|
187
|
+
statusUpdates.forEach((u, i) =>
|
|
188
|
+
logger.info(
|
|
189
|
+
` [${i + 1}] status=${u.status}${u.exitCode !== undefined ? ` exitCode=${u.exitCode}` : ""}`,
|
|
190
|
+
),
|
|
191
|
+
);
|
|
192
|
+
logger.info(` Job completed : ${jobCompleted}`);
|
|
193
|
+
logger.info(` Job failed : ${jobFailed}`);
|
|
194
|
+
|
|
195
|
+
const passed =
|
|
196
|
+
jobCompleted &&
|
|
197
|
+
!jobFailed &&
|
|
198
|
+
configRequests === 1 &&
|
|
199
|
+
statusUpdates.length >= 1 &&
|
|
200
|
+
statusUpdates.some((u) => ["running", "completed"].includes(u.status));
|
|
201
|
+
|
|
202
|
+
logger.info("");
|
|
203
|
+
logger.info(passed ? " ✓ ALL CHECKS PASSED" : " ✗ SOME CHECKS FAILED");
|
|
204
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
205
|
+
|
|
206
|
+
// ── Cleanup ───────────────────────────────────────────────────────────
|
|
207
|
+
await worker.close();
|
|
208
|
+
await queue.close();
|
|
209
|
+
await redis.quit();
|
|
210
|
+
server.close();
|
|
211
|
+
|
|
212
|
+
process.exit(passed ? 0 : 1);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
main().catch((err) => {
|
|
216
|
+
console.error("[test-harness] Fatal:", err);
|
|
217
|
+
process.exit(1);
|
|
218
|
+
});
|