@msm-core/jobs 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/LICENSE +21 -0
  3. package/dist/approval/engine.d.ts +0 -1
  4. package/dist/approval/engine.js +0 -1
  5. package/dist/approval/policy.d.ts +0 -1
  6. package/dist/approval/policy.js +0 -1
  7. package/dist/config.d.ts +0 -1
  8. package/dist/config.js +0 -1
  9. package/dist/enums.d.ts +1 -2
  10. package/dist/enums.js +1 -1
  11. package/dist/index.d.ts +1 -2
  12. package/dist/index.js +2 -2
  13. package/dist/mission/cron.d.ts +0 -1
  14. package/dist/mission/cron.js +0 -1
  15. package/dist/mission/scheduler.d.ts +11 -2
  16. package/dist/mission/scheduler.js +22 -1
  17. package/dist/orchestrator.d.ts +0 -1
  18. package/dist/orchestrator.js +312 -222
  19. package/dist/port.d.ts +0 -1
  20. package/dist/port.js +0 -1
  21. package/dist/reconciler.d.ts +0 -1
  22. package/dist/reconciler.js +0 -1
  23. package/dist/resume-token.d.ts +0 -1
  24. package/dist/resume-token.js +0 -1
  25. package/dist/types.d.ts +12 -1
  26. package/dist/types.js +0 -1
  27. package/dist/workflow/registry.d.ts +7 -1
  28. package/dist/workflow/registry.js +21 -5
  29. package/package.json +8 -7
  30. package/dist/approval/engine.d.ts.map +0 -1
  31. package/dist/approval/engine.js.map +0 -1
  32. package/dist/approval/policy.d.ts.map +0 -1
  33. package/dist/approval/policy.js.map +0 -1
  34. package/dist/config.d.ts.map +0 -1
  35. package/dist/config.js.map +0 -1
  36. package/dist/enums.d.ts.map +0 -1
  37. package/dist/enums.js.map +0 -1
  38. package/dist/index.d.ts.map +0 -1
  39. package/dist/index.js.map +0 -1
  40. package/dist/mission/cron.d.ts.map +0 -1
  41. package/dist/mission/cron.js.map +0 -1
  42. package/dist/mission/scheduler.d.ts.map +0 -1
  43. package/dist/mission/scheduler.js.map +0 -1
  44. package/dist/orchestrator.d.ts.map +0 -1
  45. package/dist/orchestrator.js.map +0 -1
  46. package/dist/port.d.ts.map +0 -1
  47. package/dist/port.js.map +0 -1
  48. package/dist/reconciler.d.ts.map +0 -1
  49. package/dist/reconciler.js.map +0 -1
  50. package/dist/resume-token.d.ts.map +0 -1
  51. package/dist/resume-token.js.map +0 -1
  52. package/dist/types.d.ts.map +0 -1
  53. package/dist/types.js.map +0 -1
  54. package/dist/workflow/registry.d.ts.map +0 -1
  55. package/dist/workflow/registry.js.map +0 -1
package/CHANGELOG.md ADDED
@@ -0,0 +1,66 @@
1
+ # Changelog — @msm-core/jobs
2
+
3
+ All notable changes are documented here. Follows [Semantic Versioning](https://semver.org/).
4
+
5
+ ---
6
+
7
+ ## [0.6.0] — 2026-06-30
8
+
9
+ Durability hardening from the 2026-06 SDK audit (the two `jobs` criticals + two highs).
10
+ Every fix ships with a reproduction test.
11
+
12
+ ### Fixed
13
+
14
+ - **(C1) Exactly-once is now crash-safe.** The default step idempotency key embedded the
15
+ job `version`, which the claim increments on every delivery — so a crash *between*
16
+ `insertStep` and `finalizeStep` made a re-delivery compute a **different** key, miss the
17
+ already-recorded step, and **re-run the side effect** (reproduced: `execCount=2`). The key
18
+ is now stable per logical step (`${jobId}:step:${currentStep}`, independent of version and
19
+ trigger reason). To make recovery correct rather than merely de-duplicated, each step now
20
+ records the exact transition it decided (`JobStepRecord.finalize`), and a re-delivery that
21
+ finds an un-finalized step **re-applies that transition** to advance the job — without
22
+ re-running the side effect. New tests: `engine.test.ts › crash-safety`.
23
+ - **(H4) A transient infra error no longer force-fails a healthy job.** The step body was
24
+ wrapped in one broad `try/catch` that routed *any* throw — including a `finalizeStep`,
25
+ `enqueue`, or token-issue blip — into `markFailed`, permanently failing an otherwise-healthy
26
+ job (sometimes after its side effect had already landed). Failure handling is now scoped:
27
+ only a throw from the workflow resolver or the step's own `stepExecutor.execute` fails the
28
+ job; infra throws **propagate** so the queue retries the tick (the stable key above makes the
29
+ retry safe). New tests cover both the transient-insert and the genuine-step-failure paths.
30
+ - **(H3) The mission scheduler no longer double-spawns.** `countActiveByMission → createJob →
31
+ markTriggered` was an unguarded read-modify-write; two concurrent ticks could both pass the
32
+ concurrency check and both spawn, overrunning `maxConcurrentJobs`. Each mission's spawn is now
33
+ wrapped in a per-mission `LockPort` single-flight (`jobs:mission_spawn:<tenant>:<mission>`),
34
+ supplied automatically by `createJobEngine`. New tests: `mission-approval.test.ts`.
35
+
36
+ ### Added
37
+
38
+ - **Resume-token crypto test suite (C2).** The JWT signing, the HS256 allow-list, the
39
+ tenant/job/event binding, one-time-use replay rejection (409), and the fail-closed
40
+ replay-unavailable path (503) had **zero** behavioral coverage (every prior test stubbed
41
+ `claimOnce → true`). New `resume-token.test.ts` (13 cases) exercises round-trip, replay,
42
+ tamper, wrong-secret, **alg-swap (HS512) and alg:none rejection**, expiry, binding mismatch,
43
+ and the 503 path — with a real in-memory `NoncePort`.
44
+ - `JobStepRecord.finalize?` — the durable transition a step decided (see C1). Optional and
45
+ backward-compatible: steps written by ≤0.5.0 simply fall through to the legacy skip path.
46
+ - `MissionSchedulerDeps.lock?` + `spawnLockTtlMs?` for the H3 single-flight.
47
+ - `package.json` `repository` field (npm provenance / source links).
48
+
49
+ ### Upgrade notes
50
+
51
+ - **Idempotency key format changed.** Jobs that are *mid-step at the moment of upgrade* recorded
52
+ their step under the old key format; the new code computes a different key for that one
53
+ in-flight transition and may re-execute it once. For at-most-once-sensitive workflows, drain
54
+ the queue (or quiesce in-flight jobs) before deploying. Jobs created after the upgrade are
55
+ unaffected.
56
+ - No port/signature breaking changes. `JobStepRecord` gained an **optional** field; Mongo
57
+ adapters should persist and return `finalize` to get crash-recovery (omitting it preserves
58
+ ≤0.5.0 behavior — no re-execution protection across a crash, but no errors).
59
+
60
+ ---
61
+
62
+ ## [0.5.0] — prior
63
+
64
+ Initial durable engine extracted from kader: CAS claim/finalize, idempotent `insertStep`,
65
+ inline-resume + delayed/scan reconciler modes, cron missions with quiet-hours/cooldown/overlap,
66
+ HITL approval engine, resume tokens, P2-8 tenant/day cost ceiling.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2026 MSM / UTS. All rights reserved.
2
+
3
+ PROPRIETARY AND CONFIDENTIAL
4
+
5
+ This package (@msm-core/jobs) and its contents (the "Software") are the proprietary and
6
+ confidential property of MSM / UTS ("the Owner").
7
+
8
+ No license, right, or permission is granted to any party to use, copy, modify,
9
+ merge, publish, distribute, sublicense, sell, or create derivative works of the
10
+ Software, in whole or in part, except under a separate written agreement signed
11
+ by the Owner.
12
+
13
+ This package is published to the public npm registry solely to enable
14
+ installation as a runtime dependency. Such publication is NOT a grant of any
15
+ license to its source. The package is "UNLICENSED" (proprietary) as declared in
16
+ its manifest. Reproducing, reverse engineering, or redistributing it is not
17
+ permitted.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
21
+ FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT.
@@ -27,4 +27,3 @@ export interface ApprovalEngine extends ApprovalCreatorPort {
27
27
  expireStale(asOf?: Date): Promise<number>;
28
28
  }
29
29
  export declare function createApprovalEngine(deps: ApprovalEngineDeps): ApprovalEngine;
30
- //# sourceMappingURL=engine.d.ts.map
@@ -143,4 +143,3 @@ export function createApprovalEngine(deps) {
143
143
  }
144
144
  return { create, resolve, expireStale };
145
145
  }
146
- //# sourceMappingURL=engine.js.map
@@ -5,4 +5,3 @@ export declare const DEFAULT_JOB_APPROVAL_POLICY: JobApprovalPolicy;
5
5
  export declare function isJobApprovalPolicy(value: unknown): value is JobApprovalPolicy;
6
6
  export declare function parseJobApprovalPolicy(value: unknown, fallback?: JobApprovalPolicy): JobApprovalPolicy;
7
7
  export declare function resolveJobApprovalTimeoutOutcome(policy: JobApprovalPolicy): JobApprovalTimeoutOutcome;
8
- //# sourceMappingURL=policy.d.ts.map
@@ -19,4 +19,3 @@ export function resolveJobApprovalTimeoutOutcome(policy) {
19
19
  return "save_draft_remind_owner_next_day";
20
20
  return "notify_owner_paused";
21
21
  }
22
- //# sourceMappingURL=policy.js.map
package/dist/config.d.ts CHANGED
@@ -37,4 +37,3 @@ export interface OrchestratorRunConfig {
37
37
  inlineResumeMaxDelayMs: number;
38
38
  }
39
39
  export declare function resolveRunConfig(config: JobEngineConfig): OrchestratorRunConfig;
40
- //# sourceMappingURL=config.d.ts.map
package/dist/config.js CHANGED
@@ -15,4 +15,3 @@ export function resolveRunConfig(config) {
15
15
  inlineResumeMaxDelayMs: Math.max(0, config.inlineResumeMaxDelayMs),
16
16
  };
17
17
  }
18
- //# sourceMappingURL=config.js.map
package/dist/enums.d.ts CHANGED
@@ -3,7 +3,7 @@ export type JobStatus = (typeof JOB_STATUSES)[number];
3
3
  export declare const TERMINAL_JOB_STATUSES: readonly ["completed", "failed", "cancelled"];
4
4
  export type TerminalJobStatus = (typeof TERMINAL_JOB_STATUSES)[number];
5
5
  export declare const RUNNABLE_JOB_STATUSES: readonly ["queued", "running"];
6
- export declare const JOB_STEP_TYPES: readonly ["run_interaction_task", "wait_time", "wait_event", "task_handoff", "notify_owner", "notify_customer", "complete", "fail"];
6
+ export declare const JOB_STEP_TYPES: readonly ["run_interaction_task", "wait_time", "wait_event", "task_handoff", "connector_action", "notify_owner", "notify_customer", "complete", "fail"];
7
7
  export type JobStepType = (typeof JOB_STEP_TYPES)[number];
8
8
  export declare const JOB_STEP_STATUSES: readonly ["running", "completed", "failed", "skipped"];
9
9
  export type JobStepStatus = (typeof JOB_STEP_STATUSES)[number];
@@ -13,4 +13,3 @@ export declare const MISSION_RUN_STATUSES: readonly ["started", "completed", "sk
13
13
  export type MissionRunStatus = (typeof MISSION_RUN_STATUSES)[number];
14
14
  export declare function isTerminalJobStatus(status: string): status is TerminalJobStatus;
15
15
  export declare function isRunnableJobStatus(status: string): boolean;
16
- //# sourceMappingURL=enums.d.ts.map
package/dist/enums.js CHANGED
@@ -19,6 +19,7 @@ export const JOB_STEP_TYPES = [
19
19
  "wait_time",
20
20
  "wait_event",
21
21
  "task_handoff",
22
+ "connector_action",
22
23
  "notify_owner",
23
24
  "notify_customer",
24
25
  "complete",
@@ -33,4 +34,3 @@ export function isTerminalJobStatus(status) {
33
34
  export function isRunnableJobStatus(status) {
34
35
  return RUNNABLE_JOB_STATUSES.includes(status);
35
36
  }
36
- //# sourceMappingURL=enums.js.map
package/dist/index.d.ts CHANGED
@@ -46,7 +46,7 @@ export { DEFAULT_JOB_ENGINE_CONFIG };
46
46
  export type { JobEngineConfig, OrchestrateJobStepInput };
47
47
  export type { ResumeTokenService };
48
48
  export { createResumeTokenService, ResumeTokenError, type IssuedResumeToken, type ConsumedResumeToken, } from "./resume-token.js";
49
- export { createWorkflowRegistry, BUILTIN_JOB_BASIC } from "./workflow/registry.js";
49
+ export { createWorkflowRegistry, BUILTIN_JOB_BASIC, BUILTIN_CONNECTOR_WRITE } from "./workflow/registry.js";
50
50
  export type { WaitingTimeReconcileResult } from "./reconciler.js";
51
51
  export { createMissionScheduler, type MissionScheduler } from "./mission/scheduler.js";
52
52
  export { isMissionCronDueAt, isWithinQuietHours, matchesCronField, parseDurationToMs, } from "./mission/cron.js";
@@ -56,4 +56,3 @@ export { JOB_STATUSES, TERMINAL_JOB_STATUSES, RUNNABLE_JOB_STATUSES, JOB_STEP_TY
56
56
  export type { JobBudget, JobWaitEvent, JobSnapshot, JobStepRecord, JobStepToolCall, CreateJobInput, JobOpsEventInput, WorkflowResolution, WorkflowContext, WorkflowSequenceStep, WorkflowDefinition, } from "./types.js";
57
57
  export { JOB_APPROVAL_POLICIES, DEFAULT_JOB_APPROVAL_POLICY, isJobApprovalPolicy, parseJobApprovalPolicy, resolveJobApprovalTimeoutOutcome, type JobApprovalPolicy, type JobApprovalTimeoutOutcome, } from "./approval/policy.js";
58
58
  export * from "./port.js";
59
- //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -59,6 +59,7 @@ export function createJobEngine(deps) {
59
59
  ops: deps.ops,
60
60
  clock: deps.clock,
61
61
  genMissionRunId: deps.genMissionRunId ?? (() => `mrun_${randomUUID()}`),
62
+ lock: deps.lock, // H3: per-mission single-flight so concurrent ticks can't double-spawn
62
63
  })
63
64
  : undefined;
64
65
  return {
@@ -72,7 +73,7 @@ export function createJobEngine(deps) {
72
73
  // ── Public surface ──
73
74
  export { DEFAULT_JOB_ENGINE_CONFIG };
74
75
  export { createResumeTokenService, ResumeTokenError, } from "./resume-token.js";
75
- export { createWorkflowRegistry, BUILTIN_JOB_BASIC } from "./workflow/registry.js";
76
+ export { createWorkflowRegistry, BUILTIN_JOB_BASIC, BUILTIN_CONNECTOR_WRITE } from "./workflow/registry.js";
76
77
  // S2 — missions + approval
77
78
  export { createMissionScheduler } from "./mission/scheduler.js";
78
79
  export { isMissionCronDueAt, isWithinQuietHours, matchesCronField, parseDurationToMs, } from "./mission/cron.js";
@@ -82,4 +83,3 @@ export { JOB_STATUSES, TERMINAL_JOB_STATUSES, RUNNABLE_JOB_STATUSES, JOB_STEP_TY
82
83
  export { JOB_APPROVAL_POLICIES, DEFAULT_JOB_APPROVAL_POLICY, isJobApprovalPolicy, parseJobApprovalPolicy, resolveJobApprovalTimeoutOutcome, } from "./approval/policy.js";
83
84
  // Ports + the typed atomic-failure error
84
85
  export * from "./port.js";
85
- //# sourceMappingURL=index.js.map
@@ -20,4 +20,3 @@ export declare function isMissionCronDueAt(params: {
20
20
  now: Date;
21
21
  lastTriggeredAt?: Date | null;
22
22
  }): boolean;
23
- //# sourceMappingURL=cron.d.ts.map
@@ -172,4 +172,3 @@ export function isMissionCronDueAt(params) {
172
172
  return dayOfMonthMatches || dayOfWeekMatches;
173
173
  return dayOfMonthMatches && dayOfWeekMatches;
174
174
  }
175
- //# sourceMappingURL=cron.js.map
@@ -1,4 +1,4 @@
1
- import type { ClockPort, JobOpsPort, JobStore, MissionStore } from "../port.js";
1
+ import type { ClockPort, JobOpsPort, JobStore, LockPort, MissionStore } from "../port.js";
2
2
  import type { MissionSchedulerTickResult } from "../types.js";
3
3
  export interface MissionSchedulerDeps {
4
4
  missionStore: MissionStore;
@@ -6,9 +6,18 @@ export interface MissionSchedulerDeps {
6
6
  ops: JobOpsPort;
7
7
  clock: ClockPort;
8
8
  genMissionRunId: () => string;
9
+ /**
10
+ * Per-mission single-flight lock. Without it, two concurrent scheduler ticks can
11
+ * BOTH pass the `countActiveByMission` concurrency check and BOTH spawn a job
12
+ * (overrunning `maxConcurrentJobs`). The assembled engine (createJobEngine)
13
+ * supplies this automatically; it is optional only so the scheduler can be unit
14
+ * tested in isolation. Auto-expiring (no release), so a crashed tick frees it.
15
+ */
16
+ lock?: LockPort;
17
+ /** TTL for the per-mission spawn lock (default 30s — outlasts one spawn, then frees). */
18
+ spawnLockTtlMs?: number;
9
19
  }
10
20
  export interface MissionScheduler {
11
21
  processSchedulerTick(now?: Date): Promise<MissionSchedulerTickResult>;
12
22
  }
13
23
  export declare function createMissionScheduler(deps: MissionSchedulerDeps): MissionScheduler;
14
- //# sourceMappingURL=scheduler.d.ts.map
@@ -74,6 +74,28 @@ export function createMissionScheduler(deps) {
74
74
  result.skipped += 1;
75
75
  continue;
76
76
  }
77
+ // SINGLE-FLIGHT (H3): the concurrency check → spawn → markTriggered sequence
78
+ // below is a read-modify-write that two concurrent ticks would both pass,
79
+ // double-spawning past maxConcurrentJobs. Hold a per-mission lock across it.
80
+ // (lastTriggeredAt/cron-due already de-dupe SEQUENTIAL ticks; this guards the
81
+ // SIMULTANEOUS case where neither tick has stamped lastTriggeredAt yet.)
82
+ if (deps.lock) {
83
+ const lockKey = `jobs:mission_spawn:${mission.tenantId}:${mission.missionId}`;
84
+ const acquired = await deps.lock.acquire(lockKey, Math.max(5_000, deps.spawnLockTtlMs ?? 30_000));
85
+ if (!acquired) {
86
+ ops.record({
87
+ jobId: `mission:${mission.missionId}`,
88
+ tenantId: mission.tenantId,
89
+ missionId: mission.missionId,
90
+ source: "mission_scheduler",
91
+ eventType: "mission.scheduler.run.lock_contended",
92
+ status: "skipped",
93
+ employeeId: mission.employeeId,
94
+ metadata: { reason: "concurrent_tick_holds_spawn_lock" },
95
+ });
96
+ continue;
97
+ }
98
+ }
77
99
  const activeJobs = await jobStore.countActiveByMission(mission.tenantId, mission.missionId, ACTIVE_JOB_STATUSES);
78
100
  const maxConcurrentJobs = Math.max(1, mission.safety?.maxConcurrentJobs || 1);
79
101
  if (activeJobs >= maxConcurrentJobs) {
@@ -182,4 +204,3 @@ export function createMissionScheduler(deps) {
182
204
  }
183
205
  return { processSchedulerTick };
184
206
  }
185
- //# sourceMappingURL=scheduler.js.map
@@ -24,4 +24,3 @@ export interface Orchestrator {
24
24
  orchestrateJobStep(input: OrchestrateJobStepInput): Promise<void>;
25
25
  }
26
26
  export declare function createOrchestrator(deps: OrchestratorDeps): Orchestrator;
27
- //# sourceMappingURL=orchestrator.d.ts.map