mongodash 2.6.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +52 -0
  2. package/dist/lib/ConcurrentRunner.js +32 -2
  3. package/dist/lib/ConcurrentRunner.js.map +1 -1
  4. package/dist/lib/createContinuousLock.js +23 -6
  5. package/dist/lib/createContinuousLock.js.map +1 -1
  6. package/dist/lib/cronTasks.js +158 -19
  7. package/dist/lib/cronTasks.js.map +1 -1
  8. package/dist/lib/index.js +10 -6
  9. package/dist/lib/index.js.map +1 -1
  10. package/dist/lib/reactiveTasks/LeaderElector.js +21 -3
  11. package/dist/lib/reactiveTasks/LeaderElector.js.map +1 -1
  12. package/dist/lib/reactiveTasks/MetricsCollector.js +31 -5
  13. package/dist/lib/reactiveTasks/MetricsCollector.js.map +1 -1
  14. package/dist/lib/reactiveTasks/ReactiveTaskPlanner.js +66 -31
  15. package/dist/lib/reactiveTasks/ReactiveTaskPlanner.js.map +1 -1
  16. package/dist/lib/reactiveTasks/ReactiveTaskRepository.js +19 -1
  17. package/dist/lib/reactiveTasks/ReactiveTaskRepository.js.map +1 -1
  18. package/dist/lib/reactiveTasks/ReactiveTaskTypes.js +7 -1
  19. package/dist/lib/reactiveTasks/ReactiveTaskTypes.js.map +1 -1
  20. package/dist/lib/reactiveTasks/ReactiveTaskWorker.js +80 -5
  21. package/dist/lib/reactiveTasks/ReactiveTaskWorker.js.map +1 -1
  22. package/dist/lib/reactiveTasks/index.js +19 -12
  23. package/dist/lib/reactiveTasks/index.js.map +1 -1
  24. package/dist/lib/testing/assertNoReactiveTaskErrors.js +16 -12
  25. package/dist/lib/testing/assertNoReactiveTaskErrors.js.map +1 -1
  26. package/dist/lib/testing/index.js +2 -0
  27. package/dist/lib/testing/index.js.map +1 -1
  28. package/dist/lib/testing/resolveWhitelistFilter.js +48 -0
  29. package/dist/lib/testing/resolveWhitelistFilter.js.map +1 -0
  30. package/dist/lib/testing/waitUntilReactiveTasksIdle.js +17 -46
  31. package/dist/lib/testing/waitUntilReactiveTasksIdle.js.map +1 -1
  32. package/dist/types/ConcurrentRunner.d.ts +14 -0
  33. package/dist/types/createContinuousLock.d.ts +17 -1
  34. package/dist/types/cronTasks.d.ts +17 -2
  35. package/dist/types/index.d.ts +1 -1
  36. package/dist/types/reactiveTasks/LeaderElector.d.ts +15 -1
  37. package/dist/types/reactiveTasks/MetricsCollector.d.ts +8 -0
  38. package/dist/types/reactiveTasks/ReactiveTaskPlanner.d.ts +11 -0
  39. package/dist/types/reactiveTasks/ReactiveTaskRepository.d.ts +10 -1
  40. package/dist/types/reactiveTasks/ReactiveTaskTypes.d.ts +6 -0
  41. package/dist/types/reactiveTasks/index.d.ts +8 -2
  42. package/dist/types/testing/assertNoReactiveTaskErrors.d.ts +4 -4
  43. package/dist/types/testing/index.d.ts +2 -0
  44. package/dist/types/testing/resolveWhitelistFilter.d.ts +35 -0
  45. package/dist/types/testing/waitUntilReactiveTasksIdle.d.ts +7 -13
  46. package/docs/.vitepress/config.mts +9 -1
  47. package/docs/cron-tasks.md +130 -1
  48. package/docs/error-handling.md +156 -0
  49. package/docs/reactive-tasks/guides.md +1 -1
  50. package/docs/reactive-tasks/index.md +1 -1
  51. package/docs/reactive-tasks/monitoring.md +7 -0
  52. package/docs/reactive-tasks/testing.md +187 -0
  53. package/docs/testing.md +60 -94
  54. package/package.json +34 -24
  55. package/docs/.vitepress/cache/deps/_metadata.json +0 -31
  56. package/docs/.vitepress/cache/deps/chunk-LE5NDSFD.js +0 -12824
  57. package/docs/.vitepress/cache/deps/chunk-LE5NDSFD.js.map +0 -7
  58. package/docs/.vitepress/cache/deps/package.json +0 -3
  59. package/docs/.vitepress/cache/deps/vitepress___@vue_devtools-api.js +0 -4505
  60. package/docs/.vitepress/cache/deps/vitepress___@vue_devtools-api.js.map +0 -7
  61. package/docs/.vitepress/cache/deps/vitepress___@vueuse_core.js +0 -9731
  62. package/docs/.vitepress/cache/deps/vitepress___@vueuse_core.js.map +0 -7
  63. package/docs/.vitepress/cache/deps/vue.js +0 -347
  64. package/docs/.vitepress/cache/deps/vue.js.map +0 -7
@@ -1,6 +1,19 @@
1
1
  import { CronExpressionOptions } from 'cron-parser';
2
2
  export interface InitOptions {
3
3
  runCronTasks: boolean;
4
+ /**
5
+ * Maximum number of cron tasks this instance will execute in parallel.
6
+ *
7
+ * The default of `1` preserves the historical behaviour: one task is
8
+ * processed at a time per instance. Raise it when you have many
9
+ * independent cron tasks and want to avoid head-of-line blocking (a
10
+ * long-running task delaying unrelated ones).
11
+ *
12
+ * Tasks with the same id are always serialised via the per-task lock
13
+ * (`lockedTill`), so raising this does not cause a single task to run
14
+ * twice in parallel.
15
+ */
16
+ cronTaskConcurrency: number;
4
17
  cronExpressionParserOptions: CronExpressionOptions;
5
18
  cronTaskCaller: CronTaskCaller;
6
19
  cronTaskFilter: CronTaskFilter;
@@ -64,8 +77,10 @@ export declare function cronTask(taskId: TaskId, interval: Interval, task: TaskF
64
77
  */
65
78
  export declare function getCronTasksList(query?: CronTaskQuery): Promise<CronPagedResult<CronTaskRecord>>;
66
79
  /**
67
- * Triggers a cron task immediately.
68
- * Alias for scheduleCronTaskImmediately but returns the new state or confirmation.
80
+ * @deprecated Alias for {@link scheduleCronTaskImmediately}. Prefer that name for
81
+ * clarity - it describes exactly what happens (the task is scheduled to run on
82
+ * the next polling tick, not necessarily this very millisecond). This alias will
83
+ * be removed in a future major version.
69
84
  */
70
85
  export declare function triggerCronTask(taskId: TaskId): Promise<void>;
71
86
  /**
@@ -10,7 +10,7 @@ export { getCollection } from './getCollection';
10
10
  export { getMongoClient } from './getMongoClient';
11
11
  export { OnError } from './OnError';
12
12
  export { processInBatches, ProcessInBatchesOptions, ProcessInBatchesResult } from './processInBatches';
13
- export { CODE_REACTIVE_TASK_FAILED, CODE_REACTIVE_TASK_FINISHED, CODE_REACTIVE_TASK_LEADER_LOCK_LOST, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_FINISHED, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_STARTED, CODE_REACTIVE_TASK_PLANNER_STARTED, CODE_REACTIVE_TASK_PLANNER_STOPPED, CODE_REACTIVE_TASK_PLANNER_STREAM_ERROR, CODE_REACTIVE_TASK_STARTED, countReactiveTasks, getPrometheusMetrics, getReactiveTasks, reactiveTask, ReactiveTask, ReactiveTaskHandler, retryReactiveTasks, startReactiveTasks, stopReactiveTasks, TaskConditionFailedError, _scheduler, } from './reactiveTasks';
13
+ export { CODE_REACTIVE_TASK_CLEANUP, CODE_REACTIVE_TASK_FAILED, CODE_REACTIVE_TASK_FINISHED, CODE_REACTIVE_TASK_INITIALIZED, CODE_REACTIVE_TASK_LEADER_LOCK_LOST, CODE_REACTIVE_TASK_LOCK_LOST, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_FINISHED, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_STARTED, CODE_REACTIVE_TASK_PLANNER_STARTED, CODE_REACTIVE_TASK_PLANNER_STOPPED, CODE_REACTIVE_TASK_PLANNER_STREAM_ERROR, CODE_REACTIVE_TASK_STARTED, countReactiveTasks, getPrometheusMetrics, getReactiveTasks, PagedResult, PaginationOptions, reactiveTask, ReactiveTask, ReactiveTaskHandler, ReactiveTaskQuery, ReactiveTaskRecord, ReactiveTaskStatus, retryReactiveTasks, startReactiveTasks, stopReactiveTasks, TaskConditionFailedError, _scheduler, } from './reactiveTasks';
14
14
  export { OperationalTaskController, serveDashboard } from './task-management';
15
15
  export * from './testing';
16
16
  export { isLockAlreadyAcquiredError, LockAlreadyAcquiredError, withLock, WithLockOptions } from './withLock';
@@ -1,6 +1,6 @@
1
1
  import { GlobalsCollection } from '../globalsCollection';
2
- import { OnInfo } from '../OnInfo';
3
2
  import { OnError } from '../OnError';
3
+ import { OnInfo } from '../OnInfo';
4
4
  export interface LeaderElectorCallbacks {
5
5
  onBecomeLeader: () => Promise<void>;
6
6
  onLoseLeader: () => Promise<void>;
@@ -35,6 +35,20 @@ export declare class LeaderElector {
35
35
  get isLeader(): boolean;
36
36
  start(): Promise<void>;
37
37
  stop(): Promise<void>;
38
+ /**
39
+ * Give up leadership locally. The DB lock is NOT released - the next
40
+ * heartbeat will likely re-acquire it (unless another instance raced
41
+ * in). onLoseLeader is fired asynchronously so callers (e.g. the
42
+ * scheduler wiring this to a flush-failure path) get a clean
43
+ * planner.stop() before the next heartbeat restarts it, rather than
44
+ * starting a new planner on top of a live one.
45
+ *
46
+ * Note: the follow-up onBecomeLeader that fires after a forced loss
47
+ * looks identical to a real leader election and will increment
48
+ * reactive_tasks_leader_elections_total; see the event codes
49
+ * CODE_REACTIVE_TASK_PLANNER_STREAM_ERROR and the flush-failure
50
+ * counter to disambiguate "real" flapping from restart-driven ones.
51
+ */
38
52
  forceLoseLeader(): void;
39
53
  private runLeaderElectionLoop;
40
54
  private tryAcquireLock;
@@ -31,6 +31,10 @@ export declare class MetricsCollector {
31
31
  private globalStatsRegistry?;
32
32
  private metricDuration?;
33
33
  private metricRetries?;
34
+ private metricLeaderElections?;
35
+ private metricLockLost?;
36
+ private metricStreamErrors?;
37
+ private metricFlushFailures?;
34
38
  private pushInterval?;
35
39
  private queueMetricsPromise;
36
40
  planner?: ReactiveTaskPlanner;
@@ -42,6 +46,10 @@ export declare class MetricsCollector {
42
46
  stop(): void;
43
47
  recordTaskExecution(task: string, status: 'success' | 'failed', durationMs: number): void;
44
48
  recordRetry(task: string): void;
49
+ recordLeaderElection(): void;
50
+ recordLockLost(task: string): void;
51
+ recordStreamError(): void;
52
+ recordFlushFailure(): void;
45
53
  getPrometheusMetrics(): Promise<Registry | null>;
46
54
  /**
47
55
  * Returns aggregated metrics from ALL instances.
@@ -6,6 +6,15 @@ import { ReactiveTaskRegistry } from './ReactiveTaskRegistry';
6
6
  export interface PlannerCallbacks {
7
7
  onStreamError: () => void;
8
8
  onTaskPlanned: (tasksCollectionName: string, debounceMs: number) => void;
9
+ /** Fired when a batch flush fails. Records the metric and should trigger a planner restart. */
10
+ onFlushFailure?: () => void;
11
+ /**
12
+ * Fired when the planner needs to restart due to a flush failure (distinct from a
13
+ * real change-stream error). Callers should trigger a leader-election cycle here
14
+ * instead of reacting to `onStreamError`, so flush failures don't pollute the
15
+ * stream-error metric.
16
+ */
17
+ onRequestRestart?: () => void;
9
18
  }
10
19
  /**
11
20
  * Responsible for listening to MongoDB Change Stream events and planning tasks.
@@ -31,6 +40,7 @@ export declare class ReactiveTaskPlanner {
31
40
  private batchFlushTimer;
32
41
  private batchFirstEventTime;
33
42
  private isFlushing;
43
+ private lastFlushFailed;
34
44
  private metaDocId;
35
45
  private lastClusterTime;
36
46
  private ops;
@@ -59,6 +69,7 @@ export declare class ReactiveTaskPlanner {
59
69
  private groupEventsByCollection;
60
70
  private processDeletions;
61
71
  private executeUpsertOperations;
72
+ private throwOnAnyRejection;
62
73
  private handleStreamError;
63
74
  private checkEvolutionStrategies;
64
75
  private checkTriggerEvolution;
@@ -24,11 +24,20 @@ export declare class ReactiveTaskRepository<T extends Document> {
24
24
  findAndLockNextTask(taskDefs: ReactiveTaskInternal<T>[], options: {
25
25
  visibilityTimeoutMs: number;
26
26
  }): Promise<ReactiveTaskRecord<T> | null>;
27
+ /**
28
+ * Finalize a task record (success or failure). Returns `true` when the
29
+ * update matched the record, `false` when it did not - which in
30
+ * practice means another worker has since re-claimed the task (its
31
+ * startedAt no longer matches) and this call was a no-op.
32
+ *
33
+ * Callers that care about the distinction (e.g. to suppress success /
34
+ * failure metrics for a stolen task) should inspect the return value.
35
+ */
27
36
  finalizeTask(taskRecord: ReactiveTaskRecord<T>, strategy: ReactiveTaskRetryStrategy, error?: Error, debounceMs?: number, executionStats?: {
28
37
  durationMs: number;
29
38
  }, executionHistoryLimit?: number, options?: {
30
39
  session?: import('mongodb').ClientSession;
31
- }): Promise<void>;
40
+ }): Promise<boolean>;
32
41
  deferTask(taskRecord: ReactiveTaskRecord<T>, delay: number | Date): Promise<void>;
33
42
  executeBulkWrite(operations: Parameters<Collection<ReactiveTaskRecord<T>>['bulkWrite']>[0], options?: CompatibleBulkWriteOptions): Promise<void>;
34
43
  findTasks(filter: Filter<ReactiveTaskRecord<T>>, options?: {
@@ -336,6 +336,7 @@ export interface ReactiveTaskCaller {
336
336
  export declare const CODE_REACTIVE_TASK_STARTED = "reactiveTaskStarted";
337
337
  export declare const CODE_REACTIVE_TASK_FINISHED = "reactiveTaskFinished";
338
338
  export declare const CODE_REACTIVE_TASK_FAILED = "reactiveTaskFailed";
339
+ export declare const CODE_REACTIVE_TASK_LOCK_LOST = "reactiveTaskLockLost";
339
340
  export declare const CODE_REACTIVE_TASK_PLANNER_STARTED = "reactiveTaskPlannerStarted";
340
341
  export declare const CODE_REACTIVE_TASK_PLANNER_STOPPED = "reactiveTaskPlannerStopped";
341
342
  export declare const CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_STARTED = "reactiveTaskPlannerReconciliationStarted";
@@ -345,6 +346,11 @@ export declare const CODE_REACTIVE_TASK_LEADER_LOCK_LOST = "reactiveTaskLeaderLo
345
346
  export declare const CODE_REACTIVE_TASK_INITIALIZED = "reactiveTaskInitialized";
346
347
  export declare const CODE_REACTIVE_TASK_CLEANUP = "reactiveTaskCleanup";
347
348
  export declare const CODE_MANUAL_TRIGGER = "manualTrigger";
349
+ /**
350
+ * @internal
351
+ * Document id used by the planner for its meta document. Exposed for the
352
+ * dashboard and advanced tooling - not part of the public API contract.
353
+ */
348
354
  export declare const REACTIVE_TASK_META_DOC_ID = "_mongodash_planner_meta";
349
355
  /**
350
356
  * Filter for querying tasks.
@@ -6,7 +6,13 @@ import { ReactiveTaskManager } from './ReactiveTaskManager';
6
6
  import { ReactiveTaskPlanner } from './ReactiveTaskPlanner';
7
7
  import { ReactiveTaskRegistry } from './ReactiveTaskRegistry';
8
8
  import { PagedResult, PaginationOptions, ReactiveTask, ReactiveTaskQuery, ReactiveTaskRecord, ReactiveTaskSchedulerOptions } from './ReactiveTaskTypes';
9
- export { CODE_REACTIVE_TASK_CLEANUP, CODE_REACTIVE_TASK_FAILED, CODE_REACTIVE_TASK_FINISHED, CODE_REACTIVE_TASK_INITIALIZED, CODE_REACTIVE_TASK_LEADER_LOCK_LOST, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_FINISHED, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_STARTED, CODE_REACTIVE_TASK_PLANNER_STARTED, CODE_REACTIVE_TASK_PLANNER_STOPPED, CODE_REACTIVE_TASK_PLANNER_STREAM_ERROR, CODE_REACTIVE_TASK_STARTED, PagedResult, PaginationOptions, ReactiveTask, ReactiveTaskCaller, ReactiveTaskFilter, ReactiveTaskHandler, ReactiveTaskQuery, ReactiveTaskRecord, ReactiveTaskSchedulerOptions, ReactiveTaskStatus, REACTIVE_TASK_META_DOC_ID, TaskConditionFailedError, } from './ReactiveTaskTypes';
9
+ export { CODE_REACTIVE_TASK_CLEANUP, CODE_REACTIVE_TASK_FAILED, CODE_REACTIVE_TASK_FINISHED, CODE_REACTIVE_TASK_INITIALIZED, CODE_REACTIVE_TASK_LEADER_LOCK_LOST, CODE_REACTIVE_TASK_LOCK_LOST, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_FINISHED, CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_STARTED, CODE_REACTIVE_TASK_PLANNER_STARTED, CODE_REACTIVE_TASK_PLANNER_STOPPED, CODE_REACTIVE_TASK_PLANNER_STREAM_ERROR, CODE_REACTIVE_TASK_STARTED, PagedResult, PaginationOptions, ReactiveTask, ReactiveTaskCaller, ReactiveTaskFilter, ReactiveTaskHandler, ReactiveTaskQuery, ReactiveTaskRecord, ReactiveTaskSchedulerOptions, ReactiveTaskStatus, REACTIVE_TASK_META_DOC_ID, TaskConditionFailedError, } from './ReactiveTaskTypes';
10
+ /**
11
+ * @internal
12
+ * Exported only for the built-in OperationalTaskController / dashboard bridge
13
+ * and for advanced testing. Not part of the public API contract: fields and
14
+ * methods on the scheduler instance can change between minor versions.
15
+ */
10
16
  export { scheduler as _scheduler };
11
17
  export type InitOptions = {
12
18
  globalsCollection: GlobalsCollection;
@@ -54,7 +60,7 @@ export declare class ReactiveTaskScheduler {
54
60
  debounce?: number;
55
61
  }): void;
56
62
  get forceDebounce(): number | string | undefined;
57
- addTask(taskDef: ReactiveTask<Document>): Promise<void>;
63
+ addTask<T extends Document>(taskDef: ReactiveTask<T>): Promise<void>;
58
64
  /**
59
65
  * Starts the entire system - leader election and workers.
60
66
  */
@@ -1,4 +1,5 @@
1
1
  import { ReactiveTaskScheduler } from '../reactiveTasks';
2
+ import { WhitelistRule } from './resolveWhitelistFilter';
2
3
  export interface AssertNoReactiveTaskErrorsOptions {
3
4
  /**
4
5
  * Check for errors occurring after this time.
@@ -6,11 +7,10 @@ export interface AssertNoReactiveTaskErrorsOptions {
6
7
  */
7
8
  since: Date;
8
9
  /**
9
- * Optional: Check only tasks related to these specific source documents.
10
- * Useful when other tests might be generating noise in the background.
11
- * Supports generic ID types (ObjectId, string, number).
10
+ * Optional: Check only tasks related to specific entities.
11
+ * If provided, errors in collections/tasks not matching the whitelist are ignored.
12
12
  */
13
- sourceDocIds?: unknown[];
13
+ whitelist?: WhitelistRule[];
14
14
  /**
15
15
  * Optional: Whitelist specific errors.
16
16
  * If a string is provided, exact match is required.
@@ -1,3 +1,5 @@
1
1
  export * from './assertNoReactiveTaskErrors';
2
2
  export * from './configureForTesting';
3
+ export * from './resolveWhitelistFilter';
4
+ export * from './waitUntil';
3
5
  export * from './waitUntilReactiveTasksIdle';
@@ -0,0 +1,35 @@
1
+ import { Collection, Document, Filter } from 'mongodb';
2
+ import { ReactiveTaskRecord } from '../reactiveTasks';
3
+ /**
4
+ * A single rule used by the testing utilities to scope checks to a set of
5
+ * source documents.
6
+ */
7
+ export interface WhitelistRule {
8
+ collection: string;
9
+ /**
10
+ * Filter to find relevant source documents. When omitted every document
11
+ * in the collection is considered.
12
+ */
13
+ filter?: Filter<Document>;
14
+ /**
15
+ * Optional: restrict to a specific reactive task name.
16
+ */
17
+ task?: string;
18
+ }
19
+ /**
20
+ * Resolution outcome for a whitelist against one registry entry.
21
+ *
22
+ * - `'skip'`: the whitelist has rules, but none apply to this collection or
23
+ * the source filters matched zero documents. Callers should skip this
24
+ * entry entirely.
25
+ * - `'matchAll'`: at least one rule for this collection wants the full
26
+ * collection. Callers should apply no extra filter.
27
+ * - An object: the caller should AND this filter with its base query.
28
+ */
29
+ export type WhitelistResolution = 'skip' | 'matchAll' | Filter<ReactiveTaskRecord>;
30
+ /**
31
+ * Build the `Filter<ReactiveTaskRecord>` for a single registry entry based on
32
+ * the provided whitelist rules. Extracted from `waitUntilReactiveTasksIdle` /
33
+ * `assertNoReactiveTaskErrors` so the two utilities cannot drift.
34
+ */
35
+ export declare function resolveWhitelistFilter(whitelist: WhitelistRule[], sourceCollection: Pick<Collection<Document>, 'collectionName' | 'find'>): Promise<WhitelistResolution>;
@@ -1,4 +1,4 @@
1
- import { Document, Filter } from 'mongodb';
1
+ import { WhitelistRule } from './resolveWhitelistFilter';
2
2
  import { WaitUntilOptions } from './waitUntil';
3
3
  /**
4
4
  * Waits until the reactive task system is idle.
@@ -8,6 +8,11 @@ import { WaitUntilOptions } from './waitUntil';
8
8
  * 3. No tasks in the database are in a pending or processing state.
9
9
  *
10
10
  * This enables robust E2E testing by ensuring that all side effects and cascading tasks have finished.
11
+ *
12
+ * @remarks
13
+ * Pending tasks scheduled far in the future (beyond `timeoutMs + stabilityDurationMs + 100ms`)
14
+ * are treated as "future work" and ignored. This prevents long-running retries (e.g. exponential backoff
15
+ * pushing `nextRunAt` hours ahead) from blocking the idle check forever.
11
16
  */
12
17
  export interface WaitUntilReactiveTasksIdleOptions extends Partial<WaitUntilOptions> {
13
18
  /**
@@ -15,17 +20,6 @@ export interface WaitUntilReactiveTasksIdleOptions extends Partial<WaitUntilOpti
15
20
  * Global checks (Planner buffer, Active workers) are SKIPPED in this mode to ensure isolation
16
21
  * from other running tests.
17
22
  */
18
- whitelist?: Array<{
19
- collection: string;
20
- /**
21
- * Filter to find relevant documents.
22
- * If not provided, ALL documents in the collection are considered (use carefully!).
23
- */
24
- filter?: Filter<Document>;
25
- /**
26
- * Optional task name filter.
27
- */
28
- task?: string;
29
- }>;
23
+ whitelist?: WhitelistRule[];
30
24
  }
31
25
  export declare function waitUntilReactiveTasksIdle(customOptions?: WaitUntilReactiveTasksIdleOptions): Promise<void>;
@@ -53,7 +53,15 @@ export default defineConfig({
53
53
  text: 'Utilities',
54
54
  items: [
55
55
  { text: 'Process In Batches', link: '/process-in-batches' },
56
- { text: 'Getters', link: '/getters' }
56
+ { text: 'Getters', link: '/getters' },
57
+ { text: 'Error Handling', link: '/error-handling' }
58
+ ]
59
+ },
60
+ {
61
+ text: 'Testing',
62
+ items: [
63
+ { text: 'Overview', link: '/testing' },
64
+ { text: 'Testing Reactive Tasks', link: '/reactive-tasks/testing' }
57
65
  ]
58
66
  }
59
67
  ],
@@ -135,10 +135,15 @@ import mongodash from 'mongodash';
135
135
  mongodash.init({
136
136
  // database connection
137
137
  uri: 'mongodb://mongodb0.example.com:27017',
138
-
138
+
139
139
  // true by default
140
140
  runCronTasks: false,
141
141
 
142
+ // Maximum number of cron tasks this instance executes in parallel.
143
+ // Default 1 (serial). See the "Parallel execution within one instance"
144
+ // section earlier on this page.
145
+ cronTaskConcurrency: 5,
146
+
142
147
  // valid only if CRON expressions used
143
148
  // see https://www.npmjs.com/package/cron-parser for valid options
144
149
  cronExpressionParserOptions: {
@@ -170,3 +175,127 @@ The system handles concurrency by locking tasks in MongoDB.
170
175
  The system maintains a brief execution history in the database:
171
176
  - **Limit**: Only the **last 5 runs** are stored in the `runLog` of the task document.
172
177
  - Use this to monitor recent successes or failures.
178
+
179
+ ### Parallel execution within one instance
180
+
181
+ By default each instance runs one cron task at a time. When you have many
182
+ independent cron tasks and a single long-running one would block the
183
+ others, opt in to parallel execution:
184
+
185
+ ```typescript
186
+ await mongodash.init({
187
+ // ...
188
+ cronTaskConcurrency: 5, // up to 5 cron tasks in flight on this instance
189
+ });
190
+ ```
191
+
192
+ - A single task can **never** run twice in parallel, regardless of the
193
+ value. The per-task `lockedTill` lock guarantees that even within one
194
+ instance — and across instances — only one execution of a given
195
+ `taskId` is in flight at a time.
196
+ - `cronTaskConcurrency: 1` (the default) keeps the historical single-loop
197
+ behaviour.
198
+ - Raising the value only affects *different* tasks running at the same
199
+ time. Use it when you see head-of-line blocking on the cron collection.
200
+
201
+ ## Monitoring
202
+
203
+ Cron tasks emit structured events through the `onInfo` callback. Each event
204
+ has a stable `code` that you can route to your logging stack without
205
+ parsing strings.
206
+
207
+ | Code constant | When it fires | Payload |
208
+ | :--- | :--- | :--- |
209
+ | `CODE_CRON_TASK_STARTED` | Handler is about to be invoked. Also fired once during `init` to announce that cron processing has begun. | `{ taskId, code }` |
210
+ | `CODE_CRON_TASK_FINISHED` | Handler returned without throwing. | `{ taskId, code, duration }` |
211
+ | `CODE_CRON_TASK_FAILED` | Handler threw. The same error is also passed to `onError`. | `{ taskId, code, reason, duration }` |
212
+ | `CODE_CRON_TASK_SCHEDULED` | The task has been scheduled for its next run. | `{ taskId, code, nextRunDate }` |
213
+
214
+ ```typescript
215
+ import { CODE_CRON_TASK_FAILED } from 'mongodash';
216
+
217
+ await mongodash.init({
218
+ onInfo: (event) => {
219
+ if (event.code === CODE_CRON_TASK_FAILED) {
220
+ metrics.increment('cron.failed', { task: event.taskId });
221
+ }
222
+ },
223
+ });
224
+ ```
225
+
226
+ See also [**Error Handling**](./error-handling.md) for how `onError` and
227
+ `onInfo` compose.
228
+
229
+ ## Task Management
230
+
231
+ ### getCronTasksList(query?) => Promise<CronPagedResult\<CronTaskRecord\>>
232
+
233
+ Inspect the state of registered tasks - useful for admin UIs, health
234
+ checks, or integration tests.
235
+
236
+ ```typescript
237
+ import { getCronTasksList } from 'mongodash';
238
+
239
+ const page = await getCronTasksList({
240
+ filter: 'daily', // regex match against taskId (case-insensitive)
241
+ limit: 20,
242
+ skip: 0,
243
+ sort: { field: 'nextRunAt', direction: 1 },
244
+ });
245
+
246
+ for (const task of page.items) {
247
+ console.log(task._id, task.status, task.lastRun?.error);
248
+ }
249
+ ```
250
+
251
+ `status` can be `'idle'`, `'running'` (lock held), `'scheduled'`
252
+ (manual trigger pending), or `'failed'` (last run errored).
253
+
254
+ ### getRegisteredCronTaskIds() => string[]
255
+
256
+ Returns the IDs of tasks registered *on this instance* (useful when
257
+ `runCronTasks: false` on some instances).
258
+
259
+ ## Testing
260
+
261
+ Cron tasks expose three helpers that are primarily useful in tests. They
262
+ live on the main `mongodash` module alongside the rest of the cron API.
263
+
264
+ ### Run a task synchronously
265
+
266
+ ```typescript
267
+ import { runCronTask } from 'mongodash';
268
+
269
+ it('processes pending invoices', async () => {
270
+ await runCronTask('invoice-sweep');
271
+ const processed = await invoices.countDocuments({ status: 'processed' });
272
+ expect(processed).toBeGreaterThan(0);
273
+ });
274
+ ```
275
+
276
+ `runCronTask(taskId)` enqueues the task and awaits its completion. It
277
+ throws if called from inside another running cron task — use
278
+ `scheduleCronTaskImmediately` / `triggerCronTask` for the "fire and
279
+ forget" case.
280
+
281
+ ### Disable the scheduler in tests
282
+
283
+ Running cron jobs in the background of unit tests causes non-determinism.
284
+ Two options:
285
+
286
+ ```typescript
287
+ // Option A: never auto-start. Tests trigger everything explicitly.
288
+ await mongodash.init({ ..., runCronTasks: false });
289
+
290
+ // Option B: stop after init. Useful for tests that register tasks and
291
+ // then inspect state without running them.
292
+ import { stopCronTasks, startCronTasks } from 'mongodash';
293
+ stopCronTasks();
294
+ // ...
295
+ startCronTasks(); // if a test needs it back
296
+ ```
297
+
298
+ Called before the first `cronTask()` registration, `stopCronTasks()`
299
+ also prevents any task from starting later in the process.
300
+
301
+ See [**Testing overview**](./testing.md) for cross-subsystem test helpers.
@@ -0,0 +1,156 @@
1
+ # Error handling
2
+
3
+ Mongodash routes all runtime errors and informational events through two
4
+ pluggable callbacks you supply at `init` time: `onError` and `onInfo`. Both
5
+ default to `console.error` / `console.log` respectively, so you can adopt
6
+ the library without any observability plumbing and tighten it later.
7
+
8
+ ## `onError`
9
+
10
+ Called with an `Error` whenever something went wrong **but the library was
11
+ able to continue running** — a failed cron task, a change-stream hiccup,
12
+ a planner flush that needed to be retried, etc. Unrecoverable errors
13
+ throw from the calling code directly (e.g. `init()` on a bad URI); they
14
+ are never routed through `onError`.
15
+
16
+ ```typescript
17
+ import mongodash, { OnError } from 'mongodash';
18
+
19
+ const onError: OnError = (err) => {
20
+ sentry.captureException(err);
21
+ logger.error({ err }, 'mongodash runtime error');
22
+ };
23
+
24
+ await mongodash.init({ uri: '...', onError });
25
+ ```
26
+
27
+ ### Signature
28
+
29
+ ```typescript
30
+ type OnError = (error: Error) => void;
31
+ ```
32
+
33
+ The callback is wrapped in a secure handler internally — if your
34
+ `onError` itself throws, the wrapper catches and logs it so a faulty
35
+ observability layer cannot crash the library. Prefer to keep the
36
+ callback fast and synchronous; offload heavy work (HTTP to an APM, disk
37
+ IO) to a queue you drain elsewhere.
38
+
39
+ ## `onInfo`
40
+
41
+ Called with a structured event object whenever the library wants to
42
+ announce something interesting that is **not an error**: task lifecycle
43
+ transitions, reconciliation progress, leader elections, metric pushes.
44
+
45
+ Each event carries a stable `code` that you can match on without
46
+ parsing the human-readable `message`:
47
+
48
+ ```typescript
49
+ import mongodash, {
50
+ OnInfo,
51
+ CODE_CRON_TASK_FAILED,
52
+ CODE_REACTIVE_TASK_FAILED,
53
+ CODE_REACTIVE_TASK_LOCK_LOST,
54
+ } from 'mongodash';
55
+
56
+ const onInfo: OnInfo = (event) => {
57
+ switch (event.code) {
58
+ case CODE_CRON_TASK_FAILED:
59
+ case CODE_REACTIVE_TASK_FAILED:
60
+ metrics.increment('tasks.failed', { task: event.taskId });
61
+ break;
62
+ case CODE_REACTIVE_TASK_LOCK_LOST:
63
+ metrics.increment('tasks.lock_lost', { task: event.taskId });
64
+ break;
65
+ }
66
+ logger.info(event);
67
+ };
68
+
69
+ await mongodash.init({ uri: '...', onInfo });
70
+ ```
71
+
72
+ ### Signature
73
+
74
+ ```typescript
75
+ type OnInfo = (event: { message: string; code: string; [key: string]: unknown }) => void;
76
+ ```
77
+
78
+ ### Event catalog
79
+
80
+ | Code constant | Subsystem | When it fires |
81
+ | :--- | :--- | :--- |
82
+ | `CODE_CRON_TASK_STARTED` | cron | Handler about to be invoked (also on `init` to announce cron processing). |
83
+ | `CODE_CRON_TASK_FINISHED` | cron | Handler returned successfully. |
84
+ | `CODE_CRON_TASK_FAILED` | cron | Handler threw. The same error is also passed to `onError`. |
85
+ | `CODE_CRON_TASK_SCHEDULED` | cron | Task scheduled for next run. |
86
+ | `CODE_REACTIVE_TASK_STARTED` | reactive | Handler about to be invoked. |
87
+ | `CODE_REACTIVE_TASK_FINISHED` | reactive | Handler succeeded (or skipped via `TaskConditionFailedError`). |
88
+ | `CODE_REACTIVE_TASK_FAILED` | reactive | Handler threw. |
89
+ | `CODE_REACTIVE_TASK_LOCK_LOST` | reactive | A long-running worker's lock was stolen by another; the worker is backing off. |
90
+ | `CODE_REACTIVE_TASK_CLEANUP` | reactive | Orphaned task records were deleted by the cleanup policy. |
91
+ | `CODE_REACTIVE_TASK_INITIALIZED` | reactive | A reactive task was registered (also fires on startup for existing registrations). |
92
+ | `CODE_REACTIVE_TASK_PLANNER_STARTED` | reactive | Planner started (leader elected or restarted after an error). |
93
+ | `CODE_REACTIVE_TASK_PLANNER_STOPPED` | reactive | Planner stopped (leader lost or shutdown). |
94
+ | `CODE_REACTIVE_TASK_PLANNER_STREAM_ERROR` | reactive | Raw change-stream error observed. |
95
+ | `CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_STARTED` | reactive | Full-scan reconciliation began. |
96
+ | `CODE_REACTIVE_TASK_PLANNER_RECONCILIATION_FINISHED` | reactive | Full-scan reconciliation finished. |
97
+ | `CODE_REACTIVE_TASK_LEADER_LOCK_LOST` | reactive | This instance was the leader and the lock expired on it. |
98
+
99
+ See [**Reactive tasks - Monitoring**](./reactive-tasks/monitoring.md) for
100
+ the matching Prometheus metrics and
101
+ [**Cron tasks - Monitoring**](./cron-tasks.md#monitoring) for the cron
102
+ side.
103
+
104
+ ## Typed errors
105
+
106
+ A handful of errors can be recognised by reference (they are exported
107
+ classes) and deserve special handling:
108
+
109
+ ### `TaskConditionFailedError`
110
+
111
+ Thrown from `context.getDocument()` inside a **reactive-task handler**
112
+ when the source document no longer matches the task filter (typically
113
+ because the user deleted or updated it between planning and execution).
114
+ The library treats it as a soft skip — the task record is marked
115
+ finished without raising an error. Operators generally do not need to
116
+ react.
117
+
118
+ ```typescript
119
+ import { reactiveTask, TaskConditionFailedError } from 'mongodash';
120
+
121
+ await reactiveTask({
122
+ // ...
123
+ handler: async (ctx) => {
124
+ try {
125
+ const doc = await ctx.getDocument();
126
+ // ...
127
+ } catch (err) {
128
+ if (err instanceof TaskConditionFailedError) {
129
+ // Expected - the upstream filter no longer matches. Skip silently.
130
+ return;
131
+ }
132
+ throw err;
133
+ }
134
+ },
135
+ });
136
+ ```
137
+
138
+ ### `LockAlreadyAcquiredError` / `isLockAlreadyAcquiredError`
139
+
140
+ Thrown from `withLock` when another caller already holds the lock and
141
+ `maxWaitForLock` elapses. Use `isLockAlreadyAcquiredError(err)` when you
142
+ do not want to take a static import dependency on the class.
143
+
144
+ ```typescript
145
+ import { withLock, LockAlreadyAcquiredError, isLockAlreadyAcquiredError } from 'mongodash';
146
+
147
+ try {
148
+ await withLock('nightly-rollup', async () => { /* ... */ });
149
+ } catch (err) {
150
+ if (isLockAlreadyAcquiredError(err)) {
151
+ // Another instance is already running the rollup - that's fine.
152
+ return;
153
+ }
154
+ throw err;
155
+ }
156
+ ```
@@ -301,4 +301,4 @@ Testing asynchronous, event-driven workflows can be challenging. Mongodash provi
301
301
 
302
302
  Use \`waitUntilReactiveTasksIdle\` to robustly wait for all side-effects (including retries and cascading tasks) to finish before making assertions.
303
303
 
304
- See **[Testing Utilities](../testing.md)** for detailed usage and examples.
304
+ See **[Testing Reactive Tasks](./testing.md)** for detailed usage and examples.
@@ -15,7 +15,7 @@ Reactive Tasks allow you to define background jobs that trigger automatically wh
15
15
  - **[Concurrency Control](./configuration.md)**: Limit parallel execution to protect downstream resources.
16
16
  - **[Deduplication](./guides.md#idempotency--re-execution)**: Automatic debouncing ("wait for data to settle") and task merging.
17
17
  - **[Observability](./monitoring.md)**: First-class Prometheus metrics support.
18
- - **[Testing Support](../testing.md)**: Built-in utilities (`waitUntilReactiveTasksIdle`) to ensure your reactive flows are robust and error-free.
18
+ - **[Testing Support](./testing.md)**: Built-in utilities (`waitUntilReactiveTasksIdle`) to ensure your reactive flows are robust and error-free.
19
19
  - **[Dashboard](../dashboard.md)**: A visual Dashboard to monitor, retry, and debug tasks.
20
20
  - **Developer Friendly**: Zero-config local development, fully typed with TypeScript.
21
21
 
@@ -59,6 +59,13 @@ The system exposes the following metrics with standardized labels:
59
59
  | `reactive_tasks_global_lag_seconds` | Gauge | `task_name` | Age of the oldest `pending` task, measured from `dueAt`. This ensures deferred tasks still reflect their true waiting time. |
60
60
  | `reactive_tasks_change_stream_lag_seconds` | Gauge | *none* | Time difference between now and the last processed Change Stream event. |
61
61
  | `reactive_tasks_last_reconciliation_timestamp_seconds` | Gauge | *none* | Timestamp when the last full reconciliation (recovery) finished. |
62
+ | `reactive_tasks_leader_elections_total` | Counter | *none* | Number of times this instance became leader. A high rate indicates leader flapping (clock skew, slow heartbeats, network partitions). |
63
+ | `reactive_tasks_lock_lost_total` | Counter | `task_name` | Number of tasks whose execution lock was stolen by another worker (detected via CAS). A non-zero value means work was duplicated; usually a signal to increase `visibilityTimeoutMs` or investigate slow handlers. |
64
+ | `reactive_tasks_stream_errors_total` | Counter | *none* | Number of change-stream errors observed by this instance (disconnects, oplog lost, etc.). |
65
+ | `reactive_tasks_flush_failures_total` | Counter | *none* | Number of planner batches that failed and required a stream restart. Distinct from stream errors: the DB was reachable but the upsert pipeline rejected a batch. |
66
+
67
+ > [!NOTE]
68
+ > All new counters are **per-instance** (exported via the instance's local registry). In `cluster` mode they are summed across instances at scrape time; in `local` mode each instance reports its own value.
62
69
 
63
70
  ## Grafana Dashboard
64
71