@nikx/dory-worker 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/docker.d.ts +3 -1
- package/dist/docker.d.ts.map +1 -1
- package/dist/docker.js +6 -3
- package/dist/docker.js.map +1 -1
- package/dist/processor.d.ts.map +1 -1
- package/dist/processor.js +5 -1
- package/dist/processor.js.map +1 -1
- package/dist/test-harness.js +48 -15
- package/dist/test-harness.js.map +1 -1
- package/package.json +2 -1
- package/src/docker.ts +8 -2
- package/src/processor.ts +8 -0
- package/src/test-harness.ts +54 -15
package/dist/docker.d.ts
CHANGED
|
@@ -13,6 +13,8 @@ interface DistributedOpts {
|
|
|
13
13
|
queueName: string;
|
|
14
14
|
/** Human-readable container label, e.g. "worker-1" */
|
|
15
15
|
workerId: string;
|
|
16
|
+
/** Seconds to wait with an empty queue before the container shuts down */
|
|
17
|
+
idleTimeoutSecs: number;
|
|
16
18
|
}
|
|
17
19
|
/**
|
|
18
20
|
* Start the dory-core scraping container in detached mode.
|
|
@@ -29,7 +31,7 @@ export declare function startContainer(config: WorkerConfig, actorConfig: ActorC
|
|
|
29
31
|
*
|
|
30
32
|
* Returns the list of container IDs in order (worker-1 … worker-N).
|
|
31
33
|
*/
|
|
32
|
-
export declare function startContainersForRun(config: WorkerConfig, actorConfig: ActorConfigEnvelope, memoryLimit: number, dockerImage: string, crawlerRedisUrl: string, queueName: string, count: number): Promise<string[]>;
|
|
34
|
+
export declare function startContainersForRun(config: WorkerConfig, actorConfig: ActorConfigEnvelope, memoryLimit: number, dockerImage: string, crawlerRedisUrl: string, queueName: string, count: number, idleTimeoutSecs: number): Promise<string[]>;
|
|
33
35
|
/**
|
|
34
36
|
* Block until the container exits and return its exit code.
|
|
35
37
|
* If the container exceeds `timeoutSecs`, it is force-killed.
|
package/dist/docker.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docker.d.ts","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAK7C,UAAU,mBAAmB;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,kFAAkF;AAClF,UAAU,eAAe;IACvB,2EAA2E;IAC3E,eAAe,EAAE,MAAM,CAAC;IACxB,gDAAgD;IAChD,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,QAAQ,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"docker.d.ts","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAK7C,UAAU,mBAAmB;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,kFAAkF;AAClF,UAAU,eAAe;IACvB,2EAA2E;IAC3E,eAAe,EAAE,MAAM,CAAC;IACxB,gDAAgD;IAChD,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,QAAQ,EAAE,MAAM,CAAC;IACjB,0EAA0E;IAC1E,eAAe,EAAE,MAAM,CAAC;CACzB;AAkDD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,YAAY,EACpB,WAAW,EAAE,mBAAmB,EAChC,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,WAAW,CAAC,EAAE,eAAe,GAC5B,OAAO,CAAC,MAAM,CAAC,CAoBjB;AAED;;;;;;GAMG;AACH,wBAAsB,qBAAqB,CACzC,MAAM,EAAE,YAAY,EACpB,WAAW,EAAE,mBAAmB,EAChC,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,eAAe,EAAE,MAAM,EACvB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,MAAM,EACb,eAAe,EAAE,MAAM,GACtB,OAAO,CAAC,MAAM,EAAE,CAAC,CAsBnB;AAED;;;GAGG;AACH,wBAAsB,gBAAgB,CACpC,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,CAAC,CAiBjB"}
|
package/dist/docker.js
CHANGED
|
@@ -14,7 +14,8 @@ const execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
|
14
14
|
function toDockerHost(url) {
|
|
15
15
|
return url
|
|
16
16
|
.replace("http://localhost", "http://host.docker.internal")
|
|
17
|
-
.replace("https://localhost", "https://host.docker.internal")
|
|
17
|
+
.replace("https://localhost", "https://host.docker.internal")
|
|
18
|
+
.replace("redis://localhost", "redis://host.docker.internal");
|
|
18
19
|
}
|
|
19
20
|
function buildDockerEnvArgs(config, actorConfig, memoryLimit, distributed) {
|
|
20
21
|
const configBase64 = Buffer.from(JSON.stringify(actorConfig)).toString("base64");
|
|
@@ -33,9 +34,10 @@ function buildDockerEnvArgs(config, actorConfig, memoryLimit, distributed) {
|
|
|
33
34
|
}
|
|
34
35
|
// Distributed mode — wire the container to the shared crawler Redis queue
|
|
35
36
|
if (distributed) {
|
|
36
|
-
args.push(`-e REDIS_URL=${distributed.crawlerRedisUrl}`);
|
|
37
|
+
args.push(`-e REDIS_URL=${toDockerHost(distributed.crawlerRedisUrl)}`);
|
|
37
38
|
args.push(`-e QUEUE_NAME=${distributed.queueName}`);
|
|
38
39
|
args.push(`-e WORKER_ID=${distributed.workerId}`);
|
|
40
|
+
args.push(`-e IDLE_TIMEOUT_SECS=${distributed.idleTimeoutSecs}`);
|
|
39
41
|
}
|
|
40
42
|
return args;
|
|
41
43
|
}
|
|
@@ -67,7 +69,7 @@ async function startContainer(config, actorConfig, memoryLimit, dockerImage, dis
|
|
|
67
69
|
*
|
|
68
70
|
* Returns the list of container IDs in order (worker-1 … worker-N).
|
|
69
71
|
*/
|
|
70
|
-
async function startContainersForRun(config, actorConfig, memoryLimit, dockerImage, crawlerRedisUrl, queueName, count) {
|
|
72
|
+
async function startContainersForRun(config, actorConfig, memoryLimit, dockerImage, crawlerRedisUrl, queueName, count, idleTimeoutSecs) {
|
|
71
73
|
const containerIds = [];
|
|
72
74
|
for (let i = 1; i <= count; i++) {
|
|
73
75
|
const workerId = `worker-${i}`;
|
|
@@ -75,6 +77,7 @@ async function startContainersForRun(config, actorConfig, memoryLimit, dockerIma
|
|
|
75
77
|
crawlerRedisUrl,
|
|
76
78
|
queueName,
|
|
77
79
|
workerId,
|
|
80
|
+
idleTimeoutSecs,
|
|
78
81
|
});
|
|
79
82
|
logger_1.logger.info(`[${queueName}] ${workerId} container started: ${id.slice(0, 12)}`);
|
|
80
83
|
containerIds.push(id);
|
package/dist/docker.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docker.js","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"docker.js","sourceRoot":"","sources":["../src/docker.ts"],"names":[],"mappings":";;AAiFA,wCA0BC;AASD,sDA+BC;AAMD,4CAoBC;AA7KD,iDAAqC;AACrC,+BAAiC;AAEjC,qCAAkC;AAElC,MAAM,SAAS,GAAG,IAAA,gBAAS,EAAC,oBAAI,CAAC,CAAC;AAqBlC;;;GAGG;AACH,SAAS,YAAY,CAAC,GAAW;IAC/B,OAAO,GAAG;SACP,OAAO,CAAC,kBAAkB,EAAE,6BAA6B,CAAC;SAC1D,OAAO,CAAC,mBAAmB,EAAE,8BAA8B,CAAC;SAC5D,OAAO,CAAC,mBAAmB,EAAE,8BAA8B,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,kBAAkB,CACzB,MAAoB,EACpB,WAAgC,EAChC,WAAmB,EACnB,WAA6B;IAE7B,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CACpE,QAAQ,CACT,CAAC;IAEF,MAAM,IAAI,GAAG;QACX,mBAAmB,YAAY,EAAE;QACjC,mBAAmB,YAAY,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE;QACpD,4BAA4B,WAAW,EAAE;KAC1C,CAAC;IAEF,wEAAwE;IACxE,IAAI,MAAM,CAAC,SAAS;QAAE,IAAI,CAAC,IAAI,CAAC,iBAAiB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IACrE,IAAI,MAAM,CAAC,YAAY;QACrB,IAAI,CAAC,IAAI,CAAC,qBAAqB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;IACxD,IAAI,MAAM,CAAC,mBAAmB,EAAE,CAAC;QAC/B,IAAI,CAAC,IAAI,CACP,4BAA4B,YAAY,CAAC,MAAM,CAAC,mBAAmB,CAAC,EAAE,CACvE,CAAC;IACJ,CAAC;IAED,0EAA0E;IAC1E,IAAI,WAAW,EAAE,CAAC;QAChB,IAAI,CAAC,IAAI,CAAC,gBAAgB,YAAY,CAAC,WAAW,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;QACvE,IAAI,CAAC,IAAI,CAAC,iBAAiB,WAAW,CAAC,SAAS,EAAE,CAAC,CAAC;QACpD,IAAI,CAAC,IAAI,CAAC,gBAAgB,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClD,IAAI,CAAC,IAAI,CAAC,wBAAwB,WAAW,CAAC,eAAe,EAAE,CAAC,CAAC;IACnE,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;GAMG;AACI,KAAK,UAAU,cAAc,CAClC,MAAoB,EACpB,WAAgC,EAChC,WAAmB,EACnB,WAAmB,EACnB,WAA6B;IAE7B,MAAM,OAAO,GAAG,kBAAkB,CAChC,MAAM,EACN,WAAW,EACX,WAAW,EACX,WAAW,CACZ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACZ,MAAM,SAAS,GAAG,GAAG,WAAW,GAAG,CAAC;IAEpC,MAAM,GAAG,GAAG;QACV,oBAAoB;QACpB,YAAY,SAAS,EAAE;QACvB,8CAA8C;QAC9C,OAAO;QACP,WAAW;KACZ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEZ,eAAM,CAAC,KAAK,CAAC,eAAe,GAAG,EAAE,CAAC,CAAC;IACnC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,GAAG,CAAC,CAAC;IACxC,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;AACvB,CAAC;AAED;;;;;;GAMG;AACI,KAAK,UAAU,qBAAqB,CACzC,MAAoB,EACpB,WAAgC,EAChC,WAAmB,EACnB,WAAmB,EACnB,eAAuB,EACvB,SAAiB,EACjB,KAAa,EACb,eAAuB;IAEvB,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,UAAU,CAAC,EAAE,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,cAAc,CAC7B,MAAM,EACN,WAAW,EACX,WAAW,EACX,WAAW,EACX;YACE,eAAe;YACf,SAAS;YACT,QAAQ;YACR,eAAe;SAChB,CACF,CAAC;QACF,eAAM,CAAC,IAAI,CACT,IAAI,SAAS,KAAK,QAAQ,uBAAuB,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CACnE,CAAC;QACF,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;GAGG;AACI,KAAK,UAAU,gBAAgB,CACpC,WAAmB,EACnB,WAAmB;IAEnB,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,eAAe,WAAW,EAAE,EAAE;YAC/D,OAAO,EAAE,CAAC,WAAW,GAAG,EAAE,CAAC,GAAG,KAAK;SACpC,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,IAAI,CACT,6BAA6B,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAM,GAAa,CAAC,OAAO,kBAAkB,CACnG,CAAC;QACF,IAAI,CAAC;YACH,MAAM,SAAS,CAAC,eAAe,WAAW,EAAE,CAAC,CAAC;QAChD,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;QACpC,CAAC;QACD,OAAO,GAAG,CAAC,CAAC,oBAAoB;IAClC,CAAC;AACH,CAAC"}
|
package/dist/processor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAkD7C;;;;;;;;;;GAUG;AACH,wBAAsB,aAAa,CACjC,MAAM,EAAE,YAAY,EACpB,GAAG,EAAE,GAAG,CAAC;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,GAC1B,OAAO,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,QAAQ,CAAC;AAClC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAkD7C;;;;;;;;;;GAUG;AACH,wBAAsB,aAAa,CACjC,MAAM,EAAE,YAAY,EACpB,GAAG,EAAE,GAAG,CAAC;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,GAC1B,OAAO,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CA+I5C"}
|
package/dist/processor.js
CHANGED
|
@@ -62,6 +62,10 @@ async function processRunJob(config, job) {
|
|
|
62
62
|
// API-returned containerCount takes precedence over the worker env var
|
|
63
63
|
const containerCount = runConfig.containerCount ?? config.containerCount;
|
|
64
64
|
const isDistributed = !!config.crawlerRedisUrl || containerCount > 1;
|
|
65
|
+
// Idle timeout: cap at 60s so containers don't outlast the run if the queue drains early.
|
|
66
|
+
// For longer runs (actorTimeoutSecs > 120), still cap at 60s — containers should shut
|
|
67
|
+
// down promptly once there's nothing left to crawl.
|
|
68
|
+
const idleTimeoutSecs = Math.min(60, Math.floor(runConfig.actorTimeoutSecs / 2));
|
|
65
69
|
let containerIds;
|
|
66
70
|
try {
|
|
67
71
|
if (isDistributed) {
|
|
@@ -69,7 +73,7 @@ async function processRunJob(config, job) {
|
|
|
69
73
|
throw new Error("CONTAINER_COUNT > 1 but CRAWLER_REDIS_URL is not set — containers cannot share a queue");
|
|
70
74
|
}
|
|
71
75
|
containerIds = await (0, docker_1.startContainersForRun)(config, runConfig.actorConfig, runConfig.memoryLimitMb, dockerImage, config.crawlerRedisUrl, runId, // QUEUE_NAME = runId for job isolation
|
|
72
|
-
containerCount);
|
|
76
|
+
containerCount, idleTimeoutSecs);
|
|
73
77
|
logger_1.logger.info(`[${runId}] ${containerIds.length} containers started (distributed mode, queue: ${runId})`);
|
|
74
78
|
}
|
|
75
79
|
else {
|
package/dist/processor.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"processor.js","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":";;AA8DA,
|
|
1
|
+
{"version":3,"file":"processor.js","sourceRoot":"","sources":["../src/processor.ts"],"names":[],"mappings":";;AA8DA,sCAkJC;AA9MD,qCAIkB;AAClB,qCAAkC;AAiBlC,KAAK,UAAU,MAAM,CAAI,UAAkB,EAAE,IAAY;IACvD,MAAM,GAAG,GAAG,GAAG,UAAU,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC;IACvD,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9C,MAAM,IAAI,KAAK,CAAC,OAAO,IAAI,WAAW,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;IAC/D,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,EAAgB,CAAC;AAClC,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,UAAkB,EAClB,IAAY,EACZ,IAA6B;IAE7B,MAAM,GAAG,GAAG,GAAG,UAAU,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC;IACvD,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC3B,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;QAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;KAC3B,CAAC,CAAC;IACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACZ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9C,eAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,WAAW,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;IAC5D,CAAC;AACH,CAAC;AAED;;;;;;;;;;GAUG;AACI,KAAK,UAAU,aAAa,CACjC,MAAoB,EACpB,GAA2B;IAE3B,MAAM,EAAE,KAAK,EAAE,GAAG,GAAG,CAAC,IAAI,CAAC;IAE3B,IAAI,CAAC,KAAK;QAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAEtD,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,oBAAoB,GAAG,CAAC,EAAE,aAAa,GAAG,CAAC,YAAY,GAAG,CAAC,GAAG,CACxE,CAAC;IAEF,0EAA0E;IAC1E,IAAI,SAA4B,CAAC;IACjC,IAAI,CAAC;QACH,SAAS,GAAG,MAAM,MAAM,CACtB,MAAM,CAAC,UAAU,EACjB,aAAa,KAAK,SAAS,CAC5B,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,KAAK,CACV,IAAI,KAAK,iCAAkC,GAAa,CAAC,OAAO,EAAE,CACnE,CAAC;QACF,MAAM,GAAG,CAAC,CAAC,oBAAoB;IACjC,CAAC;IAED,0EAA0E;IAC1E,MAAM,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,KAAK,SAAS,EAAE;QAC7D,KAAK;QACL,MAAM,EAAE,SAAS;KAClB,CAAC,CAAC;IAEH,yEAAyE;IACzE,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,CAAC;IAChE,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CACb,6EAA6E,CAC9E,CAAC;IACJ,CAAC;IAED,uEAAuE;IACvE,MAAM,cAAc,GAAG,SAAS,CAAC,cAAc,IAAI,MAAM,CAAC,cAAc,CAAC;IACzE,MAAM,aAAa,GAAG,CAAC,CAAC,MAAM,CAAC,eAAe,IAAI,cAAc,GAAG,CAAC,CAAC;IACrE,0FAA0F;IAC1F,sFAAsF;IACtF,oDAAoD;IACpD,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAC9B,EAAE,EACF,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,gBAAgB,GAAG,CAAC,CAAC,CAC3C,CAAC;IAEF,IAAI,YAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,IAAI,aAAa,EAAE,CAAC;YAClB,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CACb,wFAAwF,CACzF,CAAC;YACJ,CAAC;YACD,YAAY,GAAG,MAAM,IAAA,8BAAqB,EACxC,MAAM,EACN,SAAS,CAAC,WAAW,EACrB,SAAS,CAAC,aAAa,EACvB,WAAW,EACX,MAAM,CAAC,eAAe,EACtB,KAAK,EAAE,uCAAuC;YAC9C,cAAc,EACd,eAAe,CAChB,CAAC;YACF,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,KAAK,YAAY,CAAC,MAAM,iDAAiD,KAAK,GAAG,CAC3F,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,MAAM,EAAE,GAAG,MAAM,IAAA,uBAAc,EAC7B,MAAM,EACN,SAAS,CAAC,WAAW,EACrB,SAAS,CAAC,aAAa,EACvB,WAAW,CACZ,CAAC;YACF,YAAY,GAAG,CAAC,EAAE,CAAC,CAAC;YACpB,eAAM,CAAC,IAAI,CAAC,IAAI,KAAK,wBAAwB,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,KAAK,CACV,IAAI,KAAK,mCAAoC,GAAa,CAAC,OAAO,EAAE,CACrE,CAAC;QACF,MAAM,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,KAAK,SAAS,EAAE;YAC7D,KAAK;YACL,MAAM,EAAE,QAAQ;YAChB,KAAK,EAAE,iCAAkC,GAAa,CAAC,OAAO,EAAE;SACjE,CAAC,CAAC;QACH,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,2EAA2E;IAC3E,IAAI,SAAqD,CAAC;IAC1D,IAAI,GAAG,CAAC,KAAK,EAAE,CAAC;QACd,SAAS,GAAG,WAAW,CAAC,KAAK,IAAI,EAAE;YACjC,IAAI,CAAC;gBACH,MAAM,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,KAAM,EAAE,OAAO,CAAC,CAAC;YAC5C,CAAC;YAAC,OAAO,MAAM,EAAE,CAAC;gBAChB,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,4BAA6B,MAAgB,CAAC,OAAO,EAAE,CACjE,CAAC;YACJ,CAAC;QACH,CAAC,EAAE,OAAO,CAAC,CAAC;IACd,CAAC;IAED,wEAAwE;IACxE,IAAI,QAAQ,GAAG,CAAC,CAAC,CAAC;IAClB,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CACjC,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CACtB,IAAA,yBAAgB,EAAC,EAAE,EAAE,SAAS,CAAC,gBAAgB,CAAC,CACjD,CACF,CAAC;QACF,oEAAoE;QACpE,QAAQ,GAAG,SAAS,CAAC,MAAM,CACzB,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,EAC5C,CAAC,CACF,CAAC;QACF,eAAM,CAAC,IAAI,CACT,IAAI,KAAK,SAAS,YAAY,CAAC,MAAM,kCAAkC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAC/F,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,IAAI,SAAS;YAAE,aAAa,CAAC,SAAS,CAAC,CAAC;IAC1C,CAAC;IAED,yEAAyE;IACzE,yEAAyE;IACzE,oEAAoE;IACpE,wEAAwE;IACxE,8DAA8D;IAC9D,MAAM,WAAW,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC;IAC5D,MAAM,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,KAAK,SAAS,EAAE;QAC7D,KAAK;QACL,MAAM,EAAE,WAAW;QACnB,QAAQ;QACR,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,GAAG,CAAC,QAAQ,KAAK,CAAC,IAAI;YACpB,KAAK,EAAE,8BAA8B,QAAQ,iCAAiC;SAC/E,CAAC;KACH,CAAC,CAAC;IAEH,eAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,GAAG,CAAC,EAAE,mBAAmB,WAAW,EAAE,CAAC,CAAC;IACtE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;AACxC,CAAC"}
|
package/dist/test-harness.js
CHANGED
|
@@ -65,10 +65,34 @@ const RUN_ID = `test-run-${Date.now()}`;
|
|
|
65
65
|
// Dedicated test queue — avoids interference from any running dory-api workers
|
|
66
66
|
// which consume from the real "run-execution" queue.
|
|
67
67
|
const QUEUE_NAME = "dory-worker-test";
|
|
68
|
-
//
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
const
|
|
68
|
+
// Set DISTRIBUTED=true to run a real multi-container test using dory-core:v2.
|
|
69
|
+
// Single-container mode uses a lightweight fake image that exits 0 immediately.
|
|
70
|
+
const DISTRIBUTED = process.env.DISTRIBUTED === "true";
|
|
71
|
+
const CONTAINER_COUNT = parseInt(process.env.CONTAINER_COUNT ?? "2", 10);
|
|
72
|
+
const CRAWLER_REDIS_URL = process.env.CRAWLER_REDIS_URL ?? `redis://${REDIS_HOST}:${REDIS_PORT}`;
|
|
73
|
+
const DEFAULT_SINGLE_IMAGE = "dory-worker-test:fake";
|
|
74
|
+
const DEFAULT_DISTRIBUTED_IMAGE = "dory-core:v2";
|
|
75
|
+
const TEST_DOCKER_IMAGE = process.env.TEST_DOCKER_IMAGE ??
|
|
76
|
+
(DISTRIBUTED ? DEFAULT_DISTRIBUTED_IMAGE : DEFAULT_SINGLE_IMAGE);
|
|
77
|
+
// ── Actor config for distributed mode (real cheerio scrape) ──────────────────
|
|
78
|
+
const DISTRIBUTED_ACTOR_CONFIG = {
|
|
79
|
+
crawlerType: "cheerio",
|
|
80
|
+
startUrls: ["https://quotes.toscrape.com/page/1/"],
|
|
81
|
+
maxPagesPerCrawl: 10,
|
|
82
|
+
handlers: [
|
|
83
|
+
{
|
|
84
|
+
label: "DEFAULT",
|
|
85
|
+
code: `async ({ $, request, pushData, enqueueLinks }) => {
|
|
86
|
+
const quotes = [];
|
|
87
|
+
$(".quote").each((_, el) => {
|
|
88
|
+
quotes.push($(".text", el).text());
|
|
89
|
+
});
|
|
90
|
+
await pushData({ url: request.url, quotes });
|
|
91
|
+
await enqueueLinks({ selector: ".next a" });
|
|
92
|
+
}`,
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
};
|
|
72
96
|
const statusUpdates = [];
|
|
73
97
|
let configRequests = 0;
|
|
74
98
|
function startMockApi() {
|
|
@@ -76,11 +100,13 @@ function startMockApi() {
|
|
|
76
100
|
jobId: `job-${RUN_ID}`,
|
|
77
101
|
runId: RUN_ID,
|
|
78
102
|
actorInstanceId: "test-actor-001",
|
|
79
|
-
userInput:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
103
|
+
userInput: DISTRIBUTED
|
|
104
|
+
? DISTRIBUTED_ACTOR_CONFIG
|
|
105
|
+
: {
|
|
106
|
+
crawlerType: "cheerio",
|
|
107
|
+
handlers: [],
|
|
108
|
+
startUrls: ["https://example.com"],
|
|
109
|
+
},
|
|
84
110
|
};
|
|
85
111
|
const server = http.createServer((req, res) => {
|
|
86
112
|
const url = req.url ?? "";
|
|
@@ -91,9 +117,10 @@ function startMockApi() {
|
|
|
91
117
|
res.end(JSON.stringify({
|
|
92
118
|
runId: RUN_ID,
|
|
93
119
|
actorConfig,
|
|
94
|
-
memoryLimitMb:
|
|
95
|
-
actorTimeoutSecs:
|
|
120
|
+
memoryLimitMb: 512,
|
|
121
|
+
actorTimeoutSecs: 120,
|
|
96
122
|
dockerImage: TEST_DOCKER_IMAGE,
|
|
123
|
+
containerCount: DISTRIBUTED ? CONTAINER_COUNT : 1,
|
|
97
124
|
}));
|
|
98
125
|
}
|
|
99
126
|
else if (req.method === "POST" && url.includes("/status")) {
|
|
@@ -127,10 +154,14 @@ async function main() {
|
|
|
127
154
|
(0, logger_1.setLogLevel)("debug");
|
|
128
155
|
logger_1.logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
129
156
|
logger_1.logger.info(" dory-worker standalone test harness");
|
|
157
|
+
logger_1.logger.info(` Mode : ${DISTRIBUTED ? `distributed (${CONTAINER_COUNT} containers)` : "single-container"}`);
|
|
130
158
|
logger_1.logger.info(` Run ID : ${RUN_ID}`);
|
|
131
159
|
logger_1.logger.info(` Test image : ${TEST_DOCKER_IMAGE}`);
|
|
132
160
|
logger_1.logger.info(` Redis : ${REDIS_HOST}:${REDIS_PORT}`);
|
|
133
161
|
logger_1.logger.info(` Mock API : http://localhost:${MOCK_API_PORT}`);
|
|
162
|
+
if (DISTRIBUTED) {
|
|
163
|
+
logger_1.logger.info(` Crawler Redis: ${CRAWLER_REDIS_URL}`);
|
|
164
|
+
}
|
|
134
165
|
logger_1.logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
135
166
|
// ── 1. Start mock API ─────────────────────────────────────────────────
|
|
136
167
|
const server = await startMockApi();
|
|
@@ -152,11 +183,12 @@ async function main() {
|
|
|
152
183
|
maxConcurrentRuns: 1,
|
|
153
184
|
workerId: "test-worker",
|
|
154
185
|
logLevel: "debug",
|
|
155
|
-
// GCS pass-through not needed for
|
|
186
|
+
// GCS pass-through not needed for E2E test
|
|
156
187
|
gcsBucket: undefined,
|
|
157
188
|
gcpProjectId: undefined,
|
|
158
189
|
dockerImage: TEST_DOCKER_IMAGE,
|
|
159
|
-
containerCount: 1,
|
|
190
|
+
containerCount: DISTRIBUTED ? CONTAINER_COUNT : 1,
|
|
191
|
+
crawlerRedisUrl: DISTRIBUTED ? CRAWLER_REDIS_URL : undefined,
|
|
160
192
|
};
|
|
161
193
|
// ── 5. Start worker ───────────────────────────────────────────────────
|
|
162
194
|
logger_1.logger.info("[test] Starting worker...");
|
|
@@ -175,8 +207,9 @@ async function main() {
|
|
|
175
207
|
logger_1.logger.error(`[test] ✗ Worker failed job ${j?.id}: ${err.message}`);
|
|
176
208
|
jobFailed = true;
|
|
177
209
|
});
|
|
178
|
-
// ── 6. Wait for result (max 90s)
|
|
179
|
-
const
|
|
210
|
+
// ── 6. Wait for result (max 90s single-container, 180s distributed) ───
|
|
211
|
+
const timeoutMs = DISTRIBUTED ? 180_000 : 90_000;
|
|
212
|
+
const deadline = Date.now() + timeoutMs;
|
|
180
213
|
while (!jobCompleted && !jobFailed && Date.now() < deadline) {
|
|
181
214
|
await sleep(500);
|
|
182
215
|
}
|
package/dist/test-harness.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"test-harness.js","sourceRoot":"","sources":["../src/test-harness.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;GAeG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,2CAA6B;AAC7B,mCAAuC;AACvC,sDAA8B;AAE9B,2CAA4C;AAC5C,qCAA+C;AAE/C,iFAAiF;AAEjF,MAAM,aAAa,GAAG,KAAK,CAAC;AAC5B,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,WAAW,CAAC;AACzD,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,EAAE,CAAC,CAAC;AAClE,MAAM,MAAM,GAAG,YAAY,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;AACxC,+EAA+E;AAC/E,qDAAqD;AACrD,MAAM,UAAU,GAAG,kBAAkB,CAAC;AAEtC,
|
|
1
|
+
{"version":3,"file":"test-harness.js","sourceRoot":"","sources":["../src/test-harness.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;GAeG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,2CAA6B;AAC7B,mCAAuC;AACvC,sDAA8B;AAE9B,2CAA4C;AAC5C,qCAA+C;AAE/C,iFAAiF;AAEjF,MAAM,aAAa,GAAG,KAAK,CAAC;AAC5B,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,WAAW,CAAC;AACzD,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,EAAE,CAAC,CAAC;AAClE,MAAM,MAAM,GAAG,YAAY,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;AACxC,+EAA+E;AAC/E,qDAAqD;AACrD,MAAM,UAAU,GAAG,kBAAkB,CAAC;AAEtC,8EAA8E;AAC9E,gFAAgF;AAChF,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,KAAK,MAAM,CAAC;AACvD,MAAM,eAAe,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;AACzE,MAAM,iBAAiB,GACrB,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,WAAW,UAAU,IAAI,UAAU,EAAE,CAAC;AAEzE,MAAM,oBAAoB,GAAG,uBAAuB,CAAC;AACrD,MAAM,yBAAyB,GAAG,cAAc,CAAC;AACjD,MAAM,iBAAiB,GACrB,OAAO,CAAC,GAAG,CAAC,iBAAiB;IAC7B,CAAC,WAAW,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC;AAEnE,gFAAgF;AAEhF,MAAM,wBAAwB,GAAG;IAC/B,WAAW,EAAE,SAAS;IACtB,SAAS,EAAE,CAAC,qCAAqC,CAAC;IAClD,gBAAgB,EAAE,EAAE;IACpB,QAAQ,EAAE;QACR;YACE,KAAK,EAAE,SAAS;YAChB,IAAI,EAAE;;;;;;;EAOV;SACG;KACF;CACF,CAAC;AAUF,MAAM,aAAa,GAAmB,EAAE,CAAC;AACzC,IAAI,cAAc,GAAG,CAAC,CAAC;AAEvB,SAAS,YAAY;IACnB,MAAM,WAAW,GAAG;QAClB,KAAK,EAAE,OAAO,MAAM,EAAE;QACtB,KAAK,EAAE,MAAM;QACb,eAAe,EAAE,gBAAgB;QACjC,SAAS,EAAE,WAAW;YACpB,CAAC,CAAC,wBAAwB;YAC1B,CAAC,CAAC;gBACE,WAAW,EAAE,SAAS;gBACtB,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,CAAC,qBAAqB,CAAC;aACnC;KACN,CAAC;IAEF,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QAC5C,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC;QAC1B,eAAM,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC,CAAC;QAEhD,IAAI,GAAG,CAAC,MAAM,KAAK,KAAK,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YACpD,cAAc,EAAE,CAAC;YACjB,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;YAC3D,GAAG,CAAC,GAAG,CACL,IAAI,CAAC,SAAS,CAAC;gBACb,KAAK,EAAE,MAAM;gBACb,WAAW;gBACX,aAAa,EAAE,GAAG;gBAClB,gBAAgB,EAAE,GAAG;gBACrB,WAAW,EAAE,iBAAiB;gBAC9B,cAAc,EAAE,WAAW,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;aAClD,CAAC,CACH,CAAC;QACJ,CAAC;aAAM,IAAI,GAAG,CAAC,MAAM,KAAK,MAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5D,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,KAAK,CAAC,CAAC,CAAC;YAC3C,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;gBACjB,MAAM,IAAI,GAAiB,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC;gBACpD,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACzB,eAAM,CAAC,IAAI,CACT,uCAAuC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CACvH,CAAC;gBACF,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAC;gBAC3D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7C,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACnB,GAAG,CAAC,GAAG,EAAE,CAAC;QACZ,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,CAAC,MAAM,CAAC,aAAa,EAAE,SAAS,EAAE,GAAG,EAAE;YAC3C,eAAM,CAAC,IAAI,CAAC,4BAA4B,aAAa,EAAE,CAAC,CAAC;YACzD,OAAO,CAAC,MAAM,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,gFAAgF;AAEhF,KAAK,UAAU,KAAK,CAAC,EAAU;IAC7B,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;AAC/C,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,IAAA,oBAAW,EAAC,OAAO,CAAC,CAAC;IAErB,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAC5D,eAAM,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;IACrD,eAAM,CAAC,IAAI,CACT,mBAAmB,WAAW,CAAC,CAAC,CAAC,gBAAgB,eAAe,cAAc,CAAC,CAAC,CAAC,kBAAkB,EAAE,CACtG,CAAC;IACF,eAAM,CAAC,IAAI,CAAC,mBAAmB,MAAM,EAAE,CAAC,CAAC;IACzC,eAAM,CAAC,IAAI,CAAC,mBAAmB,iBAAiB,EAAE,CAAC,CAAC;IACpD,eAAM,CAAC,IAAI,CAAC,mBAAmB,UAAU,IAAI,UAAU,EAAE,CAAC,CAAC;IAC3D,eAAM,CAAC,IAAI,CAAC,oCAAoC,aAAa,EAAE,CAAC,CAAC;IACjE,IAAI,WAAW,EAAE,CAAC;QAChB,eAAM,CAAC,IAAI,CAAC,oBAAoB,iBAAiB,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAE5D,yEAAyE;IACzE,MAAM,MAAM,GAAG,MAAM,YAAY,EAAE,CAAC;IAEpC,yEAAyE;IACzE,MAAM,KAAK,GAAG,IAAI,iBAAO,CAAC;QACxB,IAAI,EAAE,UAAU;QAChB,IAAI,EAAE,UAAU;QAChB,oBAAoB,EAAE,IAAI;KAC3B,CAAC,CAAC;IAEH,yEAAyE;IACzE,MAAM,KAAK,GAAG,IAAI,cAAK,CAAC,UAAU,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3D,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,aAAa,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IAC9D,eAAM,CAAC,IAAI,CAAC,4BAA4B,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;IAElD,yEAAyE;IACzE,MAAM,MAAM,GAAiB;QAC3B,SAAS,EAAE,UAAU;QACrB,SAAS,EAAE,UAAU;QACrB,UAAU,EAAE,oBAAoB,aAAa,EAAE;QAC/C,iBAAiB,EAAE,CAAC;QACpB,QAAQ,EAAE,aAAa;QACvB,QAAQ,EAAE,OAAO;QACjB,2CAA2C;QAC3C,SAAS,EAAE,SAAS;QACpB,YAAY,EAAE,SAAS;QACvB,WAAW,EAAE,iBAAiB;QAC9B,cAAc,EAAE,WAAW,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACjD,eAAe,EAAE,WAAW,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,SAAS;KAC7D,CAAC;IAEF,yEAAyE;IACzE,eAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAEzC,IAAI,YAAY,GAAG,KAAK,CAAC;IACzB,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,MAAM,MAAM,GAAG,IAAI,eAAM,CAAC,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,IAAA,yBAAa,EAAC,MAAM,EAAE,CAAC,CAAC,EAAE;QAC3E,UAAU,EAAE,KAAK;QACjB,WAAW,EAAE,CAAC;QACd,YAAY,EAAE,OAAO;KACtB,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,EAAE;QAC3B,eAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrD,YAAY,GAAG,IAAI,CAAC;IACtB,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;QAC7B,eAAM,CAAC,KAAK,CAAC,8BAA8B,CAAC,EAAE,EAAE,KAAK,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;QACpE,SAAS,GAAG,IAAI,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,yEAAyE;IACzE,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;IACjD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;IACxC,OAAO,CAAC,YAAY,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAC;QAC5D,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,yEAAyE;IACzE,eAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAC5D,eAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC9B,eAAM,CAAC,IAAI,CAAC,0BAA0B,cAAc,eAAe,CAAC,CAAC;IACrE,eAAM,CAAC,IAAI,CAAC,0BAA0B,aAAa,CAAC,MAAM,gBAAgB,CAAC,CAAC;IAC5E,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAC7B,eAAM,CAAC,IAAI,CACT,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAChG,CACF,CAAC;IACF,eAAM,CAAC,IAAI,CAAC,0BAA0B,YAAY,EAAE,CAAC,CAAC;IACtD,eAAM,CAAC,IAAI,CAAC,0BAA0B,SAAS,EAAE,CAAC,CAAC;IAEnD,MAAM,MAAM,GACV,YAAY;QACZ,CAAC,SAAS;QACV,cAAc,KAAK,CAAC;QACpB,aAAa,CAAC,MAAM,IAAI,CAAC;QACzB,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAEzE,eAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,eAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC;IACzE,eAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAE5D,yEAAyE;IACzE,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACrB,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC;IACpB,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;IACnB,MAAM,CAAC,KAAK,EAAE,CAAC;IAEf,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC/B,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,GAAG,CAAC,CAAC;IAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nikx/dory-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"description": "Standalone BullMQ worker for Dory – runs on any machine with Docker (including Raspberry Pi)",
|
|
5
5
|
"main": "dist/cli.js",
|
|
6
6
|
"bin": {
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
"start": "node dist/cli.js",
|
|
12
12
|
"dev": "npx ts-node --project tsconfig.json src/cli.ts",
|
|
13
13
|
"test": "npx ts-node --project tsconfig.json src/test-harness.ts",
|
|
14
|
+
"test:distributed": "DISTRIBUTED=true npx ts-node --project tsconfig.json src/test-harness.ts",
|
|
14
15
|
"prepare": "npm run build"
|
|
15
16
|
},
|
|
16
17
|
"keywords": [
|
package/src/docker.ts
CHANGED
|
@@ -20,6 +20,8 @@ interface DistributedOpts {
|
|
|
20
20
|
queueName: string;
|
|
21
21
|
/** Human-readable container label, e.g. "worker-1" */
|
|
22
22
|
workerId: string;
|
|
23
|
+
/** Seconds to wait with an empty queue before the container shuts down */
|
|
24
|
+
idleTimeoutSecs: number;
|
|
23
25
|
}
|
|
24
26
|
|
|
25
27
|
/**
|
|
@@ -29,7 +31,8 @@ interface DistributedOpts {
|
|
|
29
31
|
function toDockerHost(url: string): string {
|
|
30
32
|
return url
|
|
31
33
|
.replace("http://localhost", "http://host.docker.internal")
|
|
32
|
-
.replace("https://localhost", "https://host.docker.internal")
|
|
34
|
+
.replace("https://localhost", "https://host.docker.internal")
|
|
35
|
+
.replace("redis://localhost", "redis://host.docker.internal");
|
|
33
36
|
}
|
|
34
37
|
|
|
35
38
|
function buildDockerEnvArgs(
|
|
@@ -60,9 +63,10 @@ function buildDockerEnvArgs(
|
|
|
60
63
|
|
|
61
64
|
// Distributed mode — wire the container to the shared crawler Redis queue
|
|
62
65
|
if (distributed) {
|
|
63
|
-
args.push(`-e REDIS_URL=${distributed.crawlerRedisUrl}`);
|
|
66
|
+
args.push(`-e REDIS_URL=${toDockerHost(distributed.crawlerRedisUrl)}`);
|
|
64
67
|
args.push(`-e QUEUE_NAME=${distributed.queueName}`);
|
|
65
68
|
args.push(`-e WORKER_ID=${distributed.workerId}`);
|
|
69
|
+
args.push(`-e IDLE_TIMEOUT_SECS=${distributed.idleTimeoutSecs}`);
|
|
66
70
|
}
|
|
67
71
|
|
|
68
72
|
return args;
|
|
@@ -118,6 +122,7 @@ export async function startContainersForRun(
|
|
|
118
122
|
crawlerRedisUrl: string,
|
|
119
123
|
queueName: string,
|
|
120
124
|
count: number,
|
|
125
|
+
idleTimeoutSecs: number,
|
|
121
126
|
): Promise<string[]> {
|
|
122
127
|
const containerIds: string[] = [];
|
|
123
128
|
for (let i = 1; i <= count; i++) {
|
|
@@ -131,6 +136,7 @@ export async function startContainersForRun(
|
|
|
131
136
|
crawlerRedisUrl,
|
|
132
137
|
queueName,
|
|
133
138
|
workerId,
|
|
139
|
+
idleTimeoutSecs,
|
|
134
140
|
},
|
|
135
141
|
);
|
|
136
142
|
logger.info(
|
package/src/processor.ts
CHANGED
|
@@ -103,6 +103,13 @@ export async function processRunJob(
|
|
|
103
103
|
// API-returned containerCount takes precedence over the worker env var
|
|
104
104
|
const containerCount = runConfig.containerCount ?? config.containerCount;
|
|
105
105
|
const isDistributed = !!config.crawlerRedisUrl || containerCount > 1;
|
|
106
|
+
// Idle timeout: cap at 60s so containers don't outlast the run if the queue drains early.
|
|
107
|
+
// For longer runs (actorTimeoutSecs > 120), still cap at 60s — containers should shut
|
|
108
|
+
// down promptly once there's nothing left to crawl.
|
|
109
|
+
const idleTimeoutSecs = Math.min(
|
|
110
|
+
60,
|
|
111
|
+
Math.floor(runConfig.actorTimeoutSecs / 2),
|
|
112
|
+
);
|
|
106
113
|
|
|
107
114
|
let containerIds: string[];
|
|
108
115
|
try {
|
|
@@ -120,6 +127,7 @@ export async function processRunJob(
|
|
|
120
127
|
config.crawlerRedisUrl,
|
|
121
128
|
runId, // QUEUE_NAME = runId for job isolation
|
|
122
129
|
containerCount,
|
|
130
|
+
idleTimeoutSecs,
|
|
123
131
|
);
|
|
124
132
|
logger.info(
|
|
125
133
|
`[${runId}] ${containerIds.length} containers started (distributed mode, queue: ${runId})`,
|
package/src/test-harness.ts
CHANGED
|
@@ -32,11 +32,39 @@ const RUN_ID = `test-run-${Date.now()}`;
|
|
|
32
32
|
// which consume from the real "run-execution" queue.
|
|
33
33
|
const QUEUE_NAME = "dory-worker-test";
|
|
34
34
|
|
|
35
|
-
//
|
|
36
|
-
//
|
|
37
|
-
|
|
35
|
+
// Set DISTRIBUTED=true to run a real multi-container test using dory-core:v2.
|
|
36
|
+
// Single-container mode uses a lightweight fake image that exits 0 immediately.
|
|
37
|
+
const DISTRIBUTED = process.env.DISTRIBUTED === "true";
|
|
38
|
+
const CONTAINER_COUNT = parseInt(process.env.CONTAINER_COUNT ?? "2", 10);
|
|
39
|
+
const CRAWLER_REDIS_URL =
|
|
40
|
+
process.env.CRAWLER_REDIS_URL ?? `redis://${REDIS_HOST}:${REDIS_PORT}`;
|
|
41
|
+
|
|
42
|
+
const DEFAULT_SINGLE_IMAGE = "dory-worker-test:fake";
|
|
43
|
+
const DEFAULT_DISTRIBUTED_IMAGE = "dory-core:v2";
|
|
38
44
|
const TEST_DOCKER_IMAGE =
|
|
39
|
-
process.env.TEST_DOCKER_IMAGE ??
|
|
45
|
+
process.env.TEST_DOCKER_IMAGE ??
|
|
46
|
+
(DISTRIBUTED ? DEFAULT_DISTRIBUTED_IMAGE : DEFAULT_SINGLE_IMAGE);
|
|
47
|
+
|
|
48
|
+
// ── Actor config for distributed mode (real cheerio scrape) ──────────────────
|
|
49
|
+
|
|
50
|
+
const DISTRIBUTED_ACTOR_CONFIG = {
|
|
51
|
+
crawlerType: "cheerio",
|
|
52
|
+
startUrls: ["https://quotes.toscrape.com/page/1/"],
|
|
53
|
+
maxPagesPerCrawl: 10,
|
|
54
|
+
handlers: [
|
|
55
|
+
{
|
|
56
|
+
label: "DEFAULT",
|
|
57
|
+
code: `async ({ $, request, pushData, enqueueLinks }) => {
|
|
58
|
+
const quotes = [];
|
|
59
|
+
$(".quote").each((_, el) => {
|
|
60
|
+
quotes.push($(".text", el).text());
|
|
61
|
+
});
|
|
62
|
+
await pushData({ url: request.url, quotes });
|
|
63
|
+
await enqueueLinks({ selector: ".next a" });
|
|
64
|
+
}`,
|
|
65
|
+
},
|
|
66
|
+
],
|
|
67
|
+
};
|
|
40
68
|
|
|
41
69
|
// ── Mock API server ───────────────────────────────────────────────────────────
|
|
42
70
|
|
|
@@ -54,11 +82,13 @@ function startMockApi(): Promise<http.Server> {
|
|
|
54
82
|
jobId: `job-${RUN_ID}`,
|
|
55
83
|
runId: RUN_ID,
|
|
56
84
|
actorInstanceId: "test-actor-001",
|
|
57
|
-
userInput:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
85
|
+
userInput: DISTRIBUTED
|
|
86
|
+
? DISTRIBUTED_ACTOR_CONFIG
|
|
87
|
+
: {
|
|
88
|
+
crawlerType: "cheerio",
|
|
89
|
+
handlers: [],
|
|
90
|
+
startUrls: ["https://example.com"],
|
|
91
|
+
},
|
|
62
92
|
};
|
|
63
93
|
|
|
64
94
|
const server = http.createServer((req, res) => {
|
|
@@ -72,9 +102,10 @@ function startMockApi(): Promise<http.Server> {
|
|
|
72
102
|
JSON.stringify({
|
|
73
103
|
runId: RUN_ID,
|
|
74
104
|
actorConfig,
|
|
75
|
-
memoryLimitMb:
|
|
76
|
-
actorTimeoutSecs:
|
|
105
|
+
memoryLimitMb: 512,
|
|
106
|
+
actorTimeoutSecs: 120,
|
|
77
107
|
dockerImage: TEST_DOCKER_IMAGE,
|
|
108
|
+
containerCount: DISTRIBUTED ? CONTAINER_COUNT : 1,
|
|
78
109
|
}),
|
|
79
110
|
);
|
|
80
111
|
} else if (req.method === "POST" && url.includes("/status")) {
|
|
@@ -114,10 +145,16 @@ async function main(): Promise<void> {
|
|
|
114
145
|
|
|
115
146
|
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
116
147
|
logger.info(" dory-worker standalone test harness");
|
|
148
|
+
logger.info(
|
|
149
|
+
` Mode : ${DISTRIBUTED ? `distributed (${CONTAINER_COUNT} containers)` : "single-container"}`,
|
|
150
|
+
);
|
|
117
151
|
logger.info(` Run ID : ${RUN_ID}`);
|
|
118
152
|
logger.info(` Test image : ${TEST_DOCKER_IMAGE}`);
|
|
119
153
|
logger.info(` Redis : ${REDIS_HOST}:${REDIS_PORT}`);
|
|
120
154
|
logger.info(` Mock API : http://localhost:${MOCK_API_PORT}`);
|
|
155
|
+
if (DISTRIBUTED) {
|
|
156
|
+
logger.info(` Crawler Redis: ${CRAWLER_REDIS_URL}`);
|
|
157
|
+
}
|
|
121
158
|
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
122
159
|
|
|
123
160
|
// ── 1. Start mock API ─────────────────────────────────────────────────
|
|
@@ -143,11 +180,12 @@ async function main(): Promise<void> {
|
|
|
143
180
|
maxConcurrentRuns: 1,
|
|
144
181
|
workerId: "test-worker",
|
|
145
182
|
logLevel: "debug",
|
|
146
|
-
// GCS pass-through not needed for
|
|
183
|
+
// GCS pass-through not needed for E2E test
|
|
147
184
|
gcsBucket: undefined,
|
|
148
185
|
gcpProjectId: undefined,
|
|
149
186
|
dockerImage: TEST_DOCKER_IMAGE,
|
|
150
|
-
containerCount: 1,
|
|
187
|
+
containerCount: DISTRIBUTED ? CONTAINER_COUNT : 1,
|
|
188
|
+
crawlerRedisUrl: DISTRIBUTED ? CRAWLER_REDIS_URL : undefined,
|
|
151
189
|
};
|
|
152
190
|
|
|
153
191
|
// ── 5. Start worker ───────────────────────────────────────────────────
|
|
@@ -172,8 +210,9 @@ async function main(): Promise<void> {
|
|
|
172
210
|
jobFailed = true;
|
|
173
211
|
});
|
|
174
212
|
|
|
175
|
-
// ── 6. Wait for result (max 90s)
|
|
176
|
-
const
|
|
213
|
+
// ── 6. Wait for result (max 90s single-container, 180s distributed) ───
|
|
214
|
+
const timeoutMs = DISTRIBUTED ? 180_000 : 90_000;
|
|
215
|
+
const deadline = Date.now() + timeoutMs;
|
|
177
216
|
while (!jobCompleted && !jobFailed && Date.now() < deadline) {
|
|
178
217
|
await sleep(500);
|
|
179
218
|
}
|