@msm-core/jobs 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/LICENSE +21 -0
- package/dist/approval/engine.d.ts +0 -1
- package/dist/approval/engine.js +0 -1
- package/dist/approval/policy.d.ts +0 -1
- package/dist/approval/policy.js +0 -1
- package/dist/config.d.ts +0 -1
- package/dist/config.js +0 -1
- package/dist/enums.d.ts +0 -1
- package/dist/enums.js +0 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.js +1 -1
- package/dist/mission/cron.d.ts +0 -1
- package/dist/mission/cron.js +0 -1
- package/dist/mission/scheduler.d.ts +11 -2
- package/dist/mission/scheduler.js +22 -1
- package/dist/orchestrator.d.ts +0 -1
- package/dist/orchestrator.js +312 -222
- package/dist/port.d.ts +0 -1
- package/dist/port.js +0 -1
- package/dist/reconciler.d.ts +0 -1
- package/dist/reconciler.js +0 -1
- package/dist/resume-token.d.ts +0 -1
- package/dist/resume-token.js +0 -1
- package/dist/types.d.ts +12 -1
- package/dist/types.js +0 -1
- package/dist/workflow/registry.d.ts +0 -1
- package/dist/workflow/registry.js +0 -1
- package/package.json +8 -7
- package/dist/approval/engine.d.ts.map +0 -1
- package/dist/approval/engine.js.map +0 -1
- package/dist/approval/policy.d.ts.map +0 -1
- package/dist/approval/policy.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/enums.d.ts.map +0 -1
- package/dist/enums.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/mission/cron.d.ts.map +0 -1
- package/dist/mission/cron.js.map +0 -1
- package/dist/mission/scheduler.d.ts.map +0 -1
- package/dist/mission/scheduler.js.map +0 -1
- package/dist/orchestrator.d.ts.map +0 -1
- package/dist/orchestrator.js.map +0 -1
- package/dist/port.d.ts.map +0 -1
- package/dist/port.js.map +0 -1
- package/dist/reconciler.d.ts.map +0 -1
- package/dist/reconciler.js.map +0 -1
- package/dist/resume-token.d.ts.map +0 -1
- package/dist/resume-token.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
- package/dist/workflow/registry.d.ts.map +0 -1
- package/dist/workflow/registry.js.map +0 -1
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Changelog — @msm-core/jobs
|
|
2
|
+
|
|
3
|
+
All notable changes are documented here. Follows [Semantic Versioning](https://semver.org/).
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## [0.6.0] — 2026-06-30
|
|
8
|
+
|
|
9
|
+
Durability hardening from the 2026-06 SDK audit (the two `jobs` criticals + two highs).
|
|
10
|
+
Every fix ships with a reproduction test.
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- **(C1) Exactly-once is now crash-safe.** The default step idempotency key embedded the
|
|
15
|
+
job `version`, which the claim increments on every delivery — so a crash *between*
|
|
16
|
+
`insertStep` and `finalizeStep` made a re-delivery compute a **different** key, miss the
|
|
17
|
+
already-recorded step, and **re-run the side effect** (reproduced: `execCount=2`). The key
|
|
18
|
+
is now stable per logical step (`${jobId}:step:${currentStep}`, independent of version and
|
|
19
|
+
trigger reason). To make recovery correct rather than merely de-duplicated, each step now
|
|
20
|
+
records the exact transition it decided (`JobStepRecord.finalize`), and a re-delivery that
|
|
21
|
+
finds an un-finalized step **re-applies that transition** to advance the job — without
|
|
22
|
+
re-running the side effect. New tests: `engine.test.ts › crash-safety`.
|
|
23
|
+
- **(H4) A transient infra error no longer force-fails a healthy job.** The step body was
|
|
24
|
+
wrapped in one broad `try/catch` that routed *any* throw — including a `finalizeStep`,
|
|
25
|
+
`enqueue`, or token-issue blip — into `markFailed`, permanently failing an otherwise-healthy
|
|
26
|
+
job (sometimes after its side effect had already landed). Failure handling is now scoped:
|
|
27
|
+
only a throw from the workflow resolver or the step's own `stepExecutor.execute` fails the
|
|
28
|
+
job; infra throws **propagate** so the queue retries the tick (the stable key above makes the
|
|
29
|
+
retry safe). New tests cover both the transient-insert and the genuine-step-failure paths.
|
|
30
|
+
- **(H3) The mission scheduler no longer double-spawns.** `countActiveByMission → createJob →
|
|
31
|
+
markTriggered` was an unguarded read-modify-write; two concurrent ticks could both pass the
|
|
32
|
+
concurrency check and both spawn, overrunning `maxConcurrentJobs`. Each mission's spawn is now
|
|
33
|
+
wrapped in a per-mission `LockPort` single-flight (`jobs:mission_spawn:<tenant>:<mission>`),
|
|
34
|
+
supplied automatically by `createJobEngine`. New tests: `mission-approval.test.ts`.
|
|
35
|
+
|
|
36
|
+
### Added
|
|
37
|
+
|
|
38
|
+
- **Resume-token crypto test suite (C2).** The JWT signing, the HS256 allow-list, the
|
|
39
|
+
tenant/job/event binding, one-time-use replay rejection (409), and the fail-closed
|
|
40
|
+
replay-unavailable path (503) had **zero** behavioral coverage (every prior test stubbed
|
|
41
|
+
`claimOnce → true`). New `resume-token.test.ts` (13 cases) exercises round-trip, replay,
|
|
42
|
+
tamper, wrong-secret, **alg-swap (HS512) and alg:none rejection**, expiry, binding mismatch,
|
|
43
|
+
and the 503 path — with a real in-memory `NoncePort`.
|
|
44
|
+
- `JobStepRecord.finalize?` — the durable transition a step decided (see C1). Optional and
|
|
45
|
+
backward-compatible: steps written by ≤0.5.0 simply fall through to the legacy skip path.
|
|
46
|
+
- `MissionSchedulerDeps.lock?` + `spawnLockTtlMs?` for the H3 single-flight.
|
|
47
|
+
- `package.json` `repository` field (npm provenance / source links).
|
|
48
|
+
|
|
49
|
+
### Upgrade notes
|
|
50
|
+
|
|
51
|
+
- **Idempotency key format changed.** Jobs that are *mid-step at the moment of upgrade* recorded
|
|
52
|
+
their step under the old key format; the new code computes a different key for that one
|
|
53
|
+
in-flight transition and may re-execute it once. For at-most-once-sensitive workflows, drain
|
|
54
|
+
the queue (or quiesce in-flight jobs) before deploying. Jobs created after the upgrade are
|
|
55
|
+
unaffected.
|
|
56
|
+
- No port/signature breaking changes. `JobStepRecord` gained an **optional** field; Mongo
|
|
57
|
+
adapters should persist and return `finalize` to get crash-recovery (omitting it preserves
|
|
58
|
+
≤0.5.0 behavior — no re-execution protection across a crash, but no errors).
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## [0.5.0] — prior
|
|
63
|
+
|
|
64
|
+
Initial durable engine extracted from kader: CAS claim/finalize, idempotent `insertStep`,
|
|
65
|
+
inline-resume + delayed/scan reconciler modes, cron missions with quiet-hours/cooldown/overlap,
|
|
66
|
+
HITL approval engine, resume tokens, P2-8 tenant/day cost ceiling.
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Copyright (c) 2026 MSM / UTS. All rights reserved.
|
|
2
|
+
|
|
3
|
+
PROPRIETARY AND CONFIDENTIAL
|
|
4
|
+
|
|
5
|
+
This package (@msm-core/jobs) and its contents (the "Software") are the proprietary and
|
|
6
|
+
confidential property of MSM / UTS ("the Owner").
|
|
7
|
+
|
|
8
|
+
No license, right, or permission is granted to any party to use, copy, modify,
|
|
9
|
+
merge, publish, distribute, sublicense, sell, or create derivative works of the
|
|
10
|
+
Software, in whole or in part, except under a separate written agreement signed
|
|
11
|
+
by the Owner.
|
|
12
|
+
|
|
13
|
+
This package is published to the public npm registry solely to enable
|
|
14
|
+
installation as a runtime dependency. Such publication is NOT a grant of any
|
|
15
|
+
license to its source. The package is "UNLICENSED" (proprietary) as declared in
|
|
16
|
+
its manifest. Reproducing, reverse engineering, or redistributing it is not
|
|
17
|
+
permitted.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
21
|
+
FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT.
|
package/dist/approval/engine.js
CHANGED
|
@@ -5,4 +5,3 @@ export declare const DEFAULT_JOB_APPROVAL_POLICY: JobApprovalPolicy;
|
|
|
5
5
|
export declare function isJobApprovalPolicy(value: unknown): value is JobApprovalPolicy;
|
|
6
6
|
export declare function parseJobApprovalPolicy(value: unknown, fallback?: JobApprovalPolicy): JobApprovalPolicy;
|
|
7
7
|
export declare function resolveJobApprovalTimeoutOutcome(policy: JobApprovalPolicy): JobApprovalTimeoutOutcome;
|
|
8
|
-
//# sourceMappingURL=policy.d.ts.map
|
package/dist/approval/policy.js
CHANGED
package/dist/config.d.ts
CHANGED
package/dist/config.js
CHANGED
package/dist/enums.d.ts
CHANGED
|
@@ -13,4 +13,3 @@ export declare const MISSION_RUN_STATUSES: readonly ["started", "completed", "sk
|
|
|
13
13
|
export type MissionRunStatus = (typeof MISSION_RUN_STATUSES)[number];
|
|
14
14
|
export declare function isTerminalJobStatus(status: string): status is TerminalJobStatus;
|
|
15
15
|
export declare function isRunnableJobStatus(status: string): boolean;
|
|
16
|
-
//# sourceMappingURL=enums.d.ts.map
|
package/dist/enums.js
CHANGED
package/dist/index.d.ts
CHANGED
|
@@ -56,4 +56,3 @@ export { JOB_STATUSES, TERMINAL_JOB_STATUSES, RUNNABLE_JOB_STATUSES, JOB_STEP_TY
|
|
|
56
56
|
export type { JobBudget, JobWaitEvent, JobSnapshot, JobStepRecord, JobStepToolCall, CreateJobInput, JobOpsEventInput, WorkflowResolution, WorkflowContext, WorkflowSequenceStep, WorkflowDefinition, } from "./types.js";
|
|
57
57
|
export { JOB_APPROVAL_POLICIES, DEFAULT_JOB_APPROVAL_POLICY, isJobApprovalPolicy, parseJobApprovalPolicy, resolveJobApprovalTimeoutOutcome, type JobApprovalPolicy, type JobApprovalTimeoutOutcome, } from "./approval/policy.js";
|
|
58
58
|
export * from "./port.js";
|
|
59
|
-
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -59,6 +59,7 @@ export function createJobEngine(deps) {
|
|
|
59
59
|
ops: deps.ops,
|
|
60
60
|
clock: deps.clock,
|
|
61
61
|
genMissionRunId: deps.genMissionRunId ?? (() => `mrun_${randomUUID()}`),
|
|
62
|
+
lock: deps.lock, // H3: per-mission single-flight so concurrent ticks can't double-spawn
|
|
62
63
|
})
|
|
63
64
|
: undefined;
|
|
64
65
|
return {
|
|
@@ -82,4 +83,3 @@ export { JOB_STATUSES, TERMINAL_JOB_STATUSES, RUNNABLE_JOB_STATUSES, JOB_STEP_TY
|
|
|
82
83
|
export { JOB_APPROVAL_POLICIES, DEFAULT_JOB_APPROVAL_POLICY, isJobApprovalPolicy, parseJobApprovalPolicy, resolveJobApprovalTimeoutOutcome, } from "./approval/policy.js";
|
|
83
84
|
// Ports + the typed atomic-failure error
|
|
84
85
|
export * from "./port.js";
|
|
85
|
-
//# sourceMappingURL=index.js.map
|
package/dist/mission/cron.d.ts
CHANGED
package/dist/mission/cron.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ClockPort, JobOpsPort, JobStore, MissionStore } from "../port.js";
|
|
1
|
+
import type { ClockPort, JobOpsPort, JobStore, LockPort, MissionStore } from "../port.js";
|
|
2
2
|
import type { MissionSchedulerTickResult } from "../types.js";
|
|
3
3
|
export interface MissionSchedulerDeps {
|
|
4
4
|
missionStore: MissionStore;
|
|
@@ -6,9 +6,18 @@ export interface MissionSchedulerDeps {
|
|
|
6
6
|
ops: JobOpsPort;
|
|
7
7
|
clock: ClockPort;
|
|
8
8
|
genMissionRunId: () => string;
|
|
9
|
+
/**
|
|
10
|
+
* Per-mission single-flight lock. Without it, two concurrent scheduler ticks can
|
|
11
|
+
* BOTH pass the `countActiveByMission` concurrency check and BOTH spawn a job
|
|
12
|
+
* (overrunning `maxConcurrentJobs`). The assembled engine (createJobEngine)
|
|
13
|
+
* supplies this automatically; it is optional only so the scheduler can be unit
|
|
14
|
+
* tested in isolation. Auto-expiring (no release), so a crashed tick frees it.
|
|
15
|
+
*/
|
|
16
|
+
lock?: LockPort;
|
|
17
|
+
/** TTL for the per-mission spawn lock (default 30s — outlasts one spawn, then frees). */
|
|
18
|
+
spawnLockTtlMs?: number;
|
|
9
19
|
}
|
|
10
20
|
export interface MissionScheduler {
|
|
11
21
|
processSchedulerTick(now?: Date): Promise<MissionSchedulerTickResult>;
|
|
12
22
|
}
|
|
13
23
|
export declare function createMissionScheduler(deps: MissionSchedulerDeps): MissionScheduler;
|
|
14
|
-
//# sourceMappingURL=scheduler.d.ts.map
|
|
@@ -74,6 +74,28 @@ export function createMissionScheduler(deps) {
|
|
|
74
74
|
result.skipped += 1;
|
|
75
75
|
continue;
|
|
76
76
|
}
|
|
77
|
+
// SINGLE-FLIGHT (H3): the concurrency check → spawn → markTriggered sequence
|
|
78
|
+
// below is a read-modify-write that two concurrent ticks would both pass,
|
|
79
|
+
// double-spawning past maxConcurrentJobs. Hold a per-mission lock across it.
|
|
80
|
+
// (lastTriggeredAt/cron-due already de-dupe SEQUENTIAL ticks; this guards the
|
|
81
|
+
// SIMULTANEOUS case where neither tick has stamped lastTriggeredAt yet.)
|
|
82
|
+
if (deps.lock) {
|
|
83
|
+
const lockKey = `jobs:mission_spawn:${mission.tenantId}:${mission.missionId}`;
|
|
84
|
+
const acquired = await deps.lock.acquire(lockKey, Math.max(5_000, deps.spawnLockTtlMs ?? 30_000));
|
|
85
|
+
if (!acquired) {
|
|
86
|
+
ops.record({
|
|
87
|
+
jobId: `mission:${mission.missionId}`,
|
|
88
|
+
tenantId: mission.tenantId,
|
|
89
|
+
missionId: mission.missionId,
|
|
90
|
+
source: "mission_scheduler",
|
|
91
|
+
eventType: "mission.scheduler.run.lock_contended",
|
|
92
|
+
status: "skipped",
|
|
93
|
+
employeeId: mission.employeeId,
|
|
94
|
+
metadata: { reason: "concurrent_tick_holds_spawn_lock" },
|
|
95
|
+
});
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
77
99
|
const activeJobs = await jobStore.countActiveByMission(mission.tenantId, mission.missionId, ACTIVE_JOB_STATUSES);
|
|
78
100
|
const maxConcurrentJobs = Math.max(1, mission.safety?.maxConcurrentJobs || 1);
|
|
79
101
|
if (activeJobs >= maxConcurrentJobs) {
|
|
@@ -182,4 +204,3 @@ export function createMissionScheduler(deps) {
|
|
|
182
204
|
}
|
|
183
205
|
return { processSchedulerTick };
|
|
184
206
|
}
|
|
185
|
-
//# sourceMappingURL=scheduler.js.map
|
package/dist/orchestrator.d.ts
CHANGED