@flink-app/flink 2.0.0-alpha.72 → 2.0.0-alpha.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/src/FlinkApp.d.ts +32 -0
- package/dist/src/FlinkApp.js +125 -20
- package/dist/src/FlinkJob.d.ts +10 -0
- package/dist/src/FlinkRepo.d.ts +4 -0
- package/dist/src/FlinkRepo.js +11 -1
- package/dist/src/LeaderElection.d.ts +45 -0
- package/dist/src/LeaderElection.js +269 -0
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +3 -1
- package/package.json +1 -1
- package/spec/FlinkApp.routeOrdering.spec.ts +61 -0
- package/spec/FlinkJob.spec.ts +76 -0
- package/spec/FlinkRepo.spec.ts +1 -1
- package/spec/LeaderElection.spec.ts +174 -0
- package/src/FlinkApp.ts +122 -37
- package/src/FlinkJob.ts +11 -0
- package/src/FlinkRepo.ts +8 -1
- package/src/LeaderElection.ts +203 -0
- package/src/index.ts +2 -0
package/src/FlinkApp.ts
CHANGED
|
@@ -17,7 +17,8 @@ import { FlinkAuthPlugin } from "./auth/FlinkAuthPlugin";
|
|
|
17
17
|
import { FlinkContext } from "./FlinkContext";
|
|
18
18
|
import { FlinkError, internalServerError, notFound, unauthorized } from "./FlinkErrors";
|
|
19
19
|
import { FlinkRequest, Handler, HandlerFile, HttpMethod, QueryParamMetadata, RouteProps, ValidationMode } from "./FlinkHttpHandler";
|
|
20
|
-
import { FlinkJobFile } from "./FlinkJob";
|
|
20
|
+
import { FlinkJobFile, FlinkJobProps } from "./FlinkJob";
|
|
21
|
+
import { LeaderElection, LeaderElectionOptions } from "./LeaderElection";
|
|
21
22
|
import { log } from "./FlinkLog";
|
|
22
23
|
import { FlinkLogFactory } from "./FlinkLogFactory";
|
|
23
24
|
import { FlinkPlugin } from "./FlinkPlugin";
|
|
@@ -30,6 +31,7 @@ import { formatValidationErrors, getPathParams, isError } from "./utils";
|
|
|
30
31
|
|
|
31
32
|
const initLog = FlinkLogFactory.createLogger("flink.init");
|
|
32
33
|
const perfLog = FlinkLogFactory.createLogger("flink.perf");
|
|
34
|
+
const schedulerLog = FlinkLogFactory.createLogger("flink.scheduler");
|
|
33
35
|
|
|
34
36
|
const ajv = new Ajv();
|
|
35
37
|
addFormats(ajv);
|
|
@@ -179,23 +181,33 @@ export interface FlinkOptions {
|
|
|
179
181
|
*/
|
|
180
182
|
enabled?: boolean;
|
|
181
183
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
184
|
+
/**
|
|
185
|
+
* Enable leader election for horizontally scaled deployments.
|
|
186
|
+
*
|
|
187
|
+
* When enabled, only one instance (the leader) will run scheduled jobs.
|
|
188
|
+
* If the leader goes down, another instance automatically takes over.
|
|
189
|
+
*
|
|
190
|
+
* Requires a database connection (`db` option) since leader election
|
|
191
|
+
* state is persisted in MongoDB. If no database is configured, a warning
|
|
192
|
+
* will be logged and jobs will run on all instances (no leader election).
|
|
193
|
+
*
|
|
194
|
+
* Set to `true` for default settings, or pass an options object to customize.
|
|
195
|
+
*
|
|
196
|
+
* @example
|
|
197
|
+
* ```ts
|
|
198
|
+
* // Use defaults (15s lease, 5s heartbeat)
|
|
199
|
+
* scheduling: { leaderElection: true }
|
|
200
|
+
*
|
|
201
|
+
* // Custom settings
|
|
202
|
+
* scheduling: {
|
|
203
|
+
* leaderElection: {
|
|
204
|
+
* leaseDurationMs: 30000,
|
|
205
|
+
* heartbeatIntervalMs: 10000,
|
|
206
|
+
* }
|
|
207
|
+
* }
|
|
208
|
+
* ```
|
|
209
|
+
*/
|
|
210
|
+
leaderElection?: boolean | LeaderElectionOptions;
|
|
199
211
|
};
|
|
200
212
|
|
|
201
213
|
/**
|
|
@@ -337,6 +349,8 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
337
349
|
private handlerRouteCache = new Map<string, string>();
|
|
338
350
|
|
|
339
351
|
public scheduler?: ToadScheduler;
|
|
352
|
+
private allInstanceScheduler?: ToadScheduler;
|
|
353
|
+
private leaderElection?: LeaderElection;
|
|
340
354
|
|
|
341
355
|
private accessLog: { enabled: boolean; format: string };
|
|
342
356
|
|
|
@@ -406,10 +420,10 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
406
420
|
await this.initializeAgents();
|
|
407
421
|
perfLog.debug(`Initialize agents took ${Date.now() - agentInitStartTime}ms`);
|
|
408
422
|
|
|
409
|
-
if (this.isSchedulingEnabled) {
|
|
423
|
+
if (this.isSchedulingEnabled && !this.leaderElectionConfig) {
|
|
410
424
|
this.scheduler = new ToadScheduler();
|
|
411
|
-
} else {
|
|
412
|
-
|
|
425
|
+
} else if (!this.isSchedulingEnabled) {
|
|
426
|
+
schedulerLog.info("Scheduling is disabled");
|
|
413
427
|
}
|
|
414
428
|
|
|
415
429
|
if (!this.disableHttpServer) {
|
|
@@ -459,9 +473,13 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
459
473
|
perfLog.debug(`Register handlers took ${Date.now() - handlersStartTime}ms`);
|
|
460
474
|
|
|
461
475
|
if (this.isSchedulingEnabled) {
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
476
|
+
if (this.leaderElectionConfig) {
|
|
477
|
+
await this.startLeaderElection();
|
|
478
|
+
} else {
|
|
479
|
+
const jobsStartTime = Date.now();
|
|
480
|
+
await this.registerAutoRegisterableJobs();
|
|
481
|
+
perfLog.debug(`Register jobs took ${Date.now() - jobsStartTime}ms`);
|
|
482
|
+
}
|
|
465
483
|
}
|
|
466
484
|
|
|
467
485
|
// Register 404 with slight delay to allow all manually added routes to be added
|
|
@@ -495,10 +513,18 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
495
513
|
async stop() {
|
|
496
514
|
log.info("🛑 Stopping Flink app...");
|
|
497
515
|
|
|
516
|
+
if (this.leaderElection) {
|
|
517
|
+
await this.leaderElection.stop();
|
|
518
|
+
}
|
|
519
|
+
|
|
498
520
|
if (this.scheduler) {
|
|
499
521
|
await this.scheduler.stop();
|
|
500
522
|
}
|
|
501
523
|
|
|
524
|
+
if (this.allInstanceScheduler) {
|
|
525
|
+
await this.allInstanceScheduler.stop();
|
|
526
|
+
}
|
|
527
|
+
|
|
502
528
|
if (this.expressServer) {
|
|
503
529
|
return new Promise<void>((resolve, reject) => {
|
|
504
530
|
const int = setTimeout(() => {
|
|
@@ -1068,9 +1094,15 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
1068
1094
|
|
|
1069
1095
|
log.debug(`Registering ${schemaCount} schemas with AJV (manifest version: ${schemaManifest.version || "1.0"})`);
|
|
1070
1096
|
|
|
1071
|
-
for (const { handler, assumedHttpMethod, __file } of autoRegisteredHandlers.sort(
|
|
1072
|
-
|
|
1073
|
-
|
|
1097
|
+
for (const { handler, assumedHttpMethod, __file } of autoRegisteredHandlers.sort((a, b) => {
|
|
1098
|
+
const orderDiff = (a.handler.Route?.order || 0) - (b.handler.Route?.order || 0);
|
|
1099
|
+
if (orderDiff !== 0) return orderDiff;
|
|
1100
|
+
// Static segments must be registered before parameterized ones to avoid
|
|
1101
|
+
// Express matching e.g. GET /jobs/by-tags with the /jobs/:id route.
|
|
1102
|
+
const aHasParam = a.handler.Route?.path?.includes("/:") ? 1 : 0;
|
|
1103
|
+
const bHasParam = b.handler.Route?.path?.includes("/:") ? 1 : 0;
|
|
1104
|
+
return aHasParam - bHasParam;
|
|
1105
|
+
})) {
|
|
1074
1106
|
if (!handler.Route) {
|
|
1075
1107
|
log.error(`Missing Props in handler ${__file}`);
|
|
1076
1108
|
continue;
|
|
@@ -1142,40 +1174,43 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
1142
1174
|
}
|
|
1143
1175
|
}
|
|
1144
1176
|
|
|
1145
|
-
private async registerAutoRegisterableJobs() {
|
|
1177
|
+
private async registerAutoRegisterableJobs(filter?: (jobProps: FlinkJobProps) => boolean) {
|
|
1146
1178
|
if (!this.scheduler) {
|
|
1147
1179
|
throw new Error("Scheduler not initialized"); // should never happen
|
|
1148
1180
|
}
|
|
1149
1181
|
|
|
1150
1182
|
for (const { Job: jobProps, default: jobFn, __file } of autoRegisteredJobs) {
|
|
1183
|
+
if (filter && !filter(jobProps)) {
|
|
1184
|
+
continue;
|
|
1185
|
+
}
|
|
1151
1186
|
if (jobProps.cron && jobProps.interval) {
|
|
1152
|
-
|
|
1187
|
+
schedulerLog.error(`Cannot register job ${jobProps.id} - both cron and interval are set in ${__file}`);
|
|
1153
1188
|
continue;
|
|
1154
1189
|
}
|
|
1155
1190
|
|
|
1156
1191
|
if (jobProps.cron && jobProps.afterDelay) {
|
|
1157
|
-
|
|
1192
|
+
schedulerLog.error(`Cannot register job ${jobProps.id} - both cron and afterDelay are set in ${__file}`);
|
|
1158
1193
|
continue;
|
|
1159
1194
|
}
|
|
1160
1195
|
|
|
1161
1196
|
if (jobProps.interval && jobProps.afterDelay) {
|
|
1162
|
-
|
|
1197
|
+
schedulerLog.error(`Cannot register job ${jobProps.id} - both interval and afterDelay are set in ${__file}`);
|
|
1163
1198
|
continue;
|
|
1164
1199
|
}
|
|
1165
1200
|
|
|
1166
1201
|
if (this.scheduler.existsById(jobProps.id)) {
|
|
1167
|
-
|
|
1202
|
+
schedulerLog.error(`Job with id ${jobProps.id} is already registered, found duplicate in ${__file}`);
|
|
1168
1203
|
continue;
|
|
1169
1204
|
}
|
|
1170
1205
|
|
|
1171
|
-
|
|
1206
|
+
schedulerLog.debug(`Registering job ${jobProps.id}: ${JSON.stringify(jobProps)} from ${__file}`);
|
|
1172
1207
|
|
|
1173
1208
|
const task = new AsyncTask(
|
|
1174
1209
|
jobProps.id,
|
|
1175
1210
|
async () => {
|
|
1176
1211
|
await jobFn({ ctx: this.ctx });
|
|
1177
1212
|
|
|
1178
|
-
|
|
1213
|
+
schedulerLog.debug(`Job ${jobProps.id} completed`);
|
|
1179
1214
|
|
|
1180
1215
|
if (jobProps.afterDelay) {
|
|
1181
1216
|
// afterDelay runs only once, so we remove the job
|
|
@@ -1183,7 +1218,7 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
1183
1218
|
}
|
|
1184
1219
|
},
|
|
1185
1220
|
(err) => {
|
|
1186
|
-
|
|
1221
|
+
schedulerLog.error(`Job ${jobProps.id} threw unhandled exception ${err}`);
|
|
1187
1222
|
console.error(err);
|
|
1188
1223
|
}
|
|
1189
1224
|
);
|
|
@@ -1216,7 +1251,7 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
1216
1251
|
try {
|
|
1217
1252
|
await jobFn({ ctx: this.ctx });
|
|
1218
1253
|
} catch (err) {
|
|
1219
|
-
|
|
1254
|
+
schedulerLog.error(`Job ${jobProps.id} threw unhandled exception ${err}`);
|
|
1220
1255
|
console.error(err);
|
|
1221
1256
|
}
|
|
1222
1257
|
});
|
|
@@ -1235,7 +1270,7 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
1235
1270
|
this.scheduler.addSimpleIntervalJob(job);
|
|
1236
1271
|
}
|
|
1237
1272
|
} else {
|
|
1238
|
-
|
|
1273
|
+
schedulerLog.error(`Cannot register job ${jobProps.id} - no cron, interval or once set in ${__file}`);
|
|
1239
1274
|
continue;
|
|
1240
1275
|
}
|
|
1241
1276
|
}
|
|
@@ -1496,6 +1531,56 @@ export class FlinkApp<C extends FlinkContext> {
|
|
|
1496
1531
|
return this.schedulingOptions?.enabled !== false;
|
|
1497
1532
|
}
|
|
1498
1533
|
|
|
1534
|
+
private get leaderElectionConfig(): LeaderElectionOptions | undefined {
|
|
1535
|
+
const opt = this.schedulingOptions?.leaderElection;
|
|
1536
|
+
if (!opt) return undefined;
|
|
1537
|
+
return opt === true ? {} : opt;
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
private async startLeaderElection() {
|
|
1541
|
+
if (!this.db) {
|
|
1542
|
+
schedulerLog.warn(
|
|
1543
|
+
"Leader election is enabled but no database is configured. " +
|
|
1544
|
+
"Leader election requires a MongoDB connection to coordinate between instances. " +
|
|
1545
|
+
"Either add a database connection via the `db` option, or remove `scheduling.leaderElection` from your config. " +
|
|
1546
|
+
"Jobs will run on ALL instances without leader election."
|
|
1547
|
+
);
|
|
1548
|
+
// Fall back to running jobs on all instances
|
|
1549
|
+
this.scheduler = new ToadScheduler();
|
|
1550
|
+
await this.registerAutoRegisterableJobs();
|
|
1551
|
+
return;
|
|
1552
|
+
}
|
|
1553
|
+
|
|
1554
|
+
// Register runOnAllInstances jobs immediately on a separate scheduler
|
|
1555
|
+
const hasAllInstanceJobs = autoRegisteredJobs.some((j) => j.Job.runOnAllInstances);
|
|
1556
|
+
if (hasAllInstanceJobs) {
|
|
1557
|
+
this.allInstanceScheduler = new ToadScheduler();
|
|
1558
|
+
this.scheduler = this.allInstanceScheduler;
|
|
1559
|
+
await this.registerAutoRegisterableJobs((job) => !!job.runOnAllInstances);
|
|
1560
|
+
this.scheduler = undefined;
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
const opts = this.leaderElectionConfig;
|
|
1564
|
+
this.leaderElection = new LeaderElection(this.db, opts);
|
|
1565
|
+
|
|
1566
|
+
await this.leaderElection.start(
|
|
1567
|
+
// onBecameLeader
|
|
1568
|
+
async () => {
|
|
1569
|
+
schedulerLog.info("This instance is now the leader - starting scheduled jobs");
|
|
1570
|
+
this.scheduler = new ToadScheduler();
|
|
1571
|
+
await this.registerAutoRegisterableJobs((job) => !job.runOnAllInstances);
|
|
1572
|
+
},
|
|
1573
|
+
// onLostLeadership
|
|
1574
|
+
() => {
|
|
1575
|
+
schedulerLog.info("This instance lost leadership - stopping scheduled jobs");
|
|
1576
|
+
if (this.scheduler) {
|
|
1577
|
+
this.scheduler.stop();
|
|
1578
|
+
this.scheduler = undefined;
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
);
|
|
1582
|
+
}
|
|
1583
|
+
|
|
1499
1584
|
private getMongoConnectionOptions() {
|
|
1500
1585
|
if (!this.dbOpts) {
|
|
1501
1586
|
throw new Error("No db configured");
|
package/src/FlinkJob.ts
CHANGED
|
@@ -37,6 +37,17 @@ export type FlinkJobProps = {
|
|
|
37
37
|
* retried after the next interval.
|
|
38
38
|
*/
|
|
39
39
|
singleton?: boolean;
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* If true, this job will run on all instances regardless of leader election.
|
|
43
|
+
*
|
|
44
|
+
* By default, when leader election is enabled, jobs only run on the leader instance.
|
|
45
|
+
* Set this to true for jobs that should run on every instance, such as
|
|
46
|
+
* local cache cleanup or instance-specific health checks.
|
|
47
|
+
*
|
|
48
|
+
* Has no effect when leader election is not enabled.
|
|
49
|
+
*/
|
|
50
|
+
runOnAllInstances?: boolean;
|
|
40
51
|
};
|
|
41
52
|
|
|
42
53
|
/**
|
package/src/FlinkRepo.ts
CHANGED
|
@@ -51,7 +51,7 @@ export abstract class FlinkRepo<C extends FlinkContext, Model extends Document>
|
|
|
51
51
|
return { ...model, _id: result.insertedId.toString() };
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
async
|
|
54
|
+
async updateById(id: string | ObjectId, model: PartialModel<Model>): Promise<Model | null> {
|
|
55
55
|
const oid = this.buildId(id);
|
|
56
56
|
|
|
57
57
|
const { _id, ...modelWithoutId } = model;
|
|
@@ -66,6 +66,13 @@ export abstract class FlinkRepo<C extends FlinkContext, Model extends Document>
|
|
|
66
66
|
return null;
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
+
/**
|
|
70
|
+
* @deprecated Use `updateById` instead. This will be removed in a future major version.
|
|
71
|
+
*/
|
|
72
|
+
async updateOne(id: string | ObjectId, model: PartialModel<Model>): Promise<Model | null> {
|
|
73
|
+
return this.updateById(id, model);
|
|
74
|
+
}
|
|
75
|
+
|
|
69
76
|
async updateMany<U = PartialModel<Model>>(query: any, model: U): Promise<number> {
|
|
70
77
|
const { _id, ...modelWithoutId } = model as any;
|
|
71
78
|
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { Collection, Db } from "mongodb";
|
|
2
|
+
import { v4 } from "uuid";
|
|
3
|
+
import { FlinkLogFactory } from "./FlinkLogFactory";
|
|
4
|
+
|
|
5
|
+
const log = FlinkLogFactory.createLogger("flink.scheduler");
|
|
6
|
+
|
|
7
|
+
export interface LeaderElectionOptions {
|
|
8
|
+
/**
|
|
9
|
+
* Duration in milliseconds before a leader's lease expires.
|
|
10
|
+
* If the leader fails to heartbeat within this time, another instance can take over.
|
|
11
|
+
* @default 15000
|
|
12
|
+
*/
|
|
13
|
+
leaseDurationMs?: number;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Interval in milliseconds between heartbeats sent by the leader.
|
|
17
|
+
* Should be significantly less than leaseDurationMs (typically 1/3).
|
|
18
|
+
* @default 5000
|
|
19
|
+
*/
|
|
20
|
+
heartbeatIntervalMs?: number;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Name of the MongoDB collection used for leader election.
|
|
24
|
+
* @default "_flink_leader"
|
|
25
|
+
*/
|
|
26
|
+
collectionName?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface LeaderRecord {
|
|
30
|
+
_id: string;
|
|
31
|
+
instanceId: string;
|
|
32
|
+
lastHeartbeat: Date;
|
|
33
|
+
claimedAt: Date;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const LOCK_NAME = "job-scheduler";
|
|
37
|
+
|
|
38
|
+
export class LeaderElection {
|
|
39
|
+
private instanceId = v4();
|
|
40
|
+
private _isLeader = false;
|
|
41
|
+
private timer: ReturnType<typeof setTimeout> | null = null;
|
|
42
|
+
private collection: Collection<LeaderRecord>;
|
|
43
|
+
private leaseDurationMs: number;
|
|
44
|
+
private heartbeatIntervalMs: number;
|
|
45
|
+
private onBecameLeader?: () => void | Promise<void>;
|
|
46
|
+
private onLostLeadership?: () => void | Promise<void>;
|
|
47
|
+
private stopped = false;
|
|
48
|
+
private transitioning = false;
|
|
49
|
+
|
|
50
|
+
constructor(db: Db, opts?: LeaderElectionOptions) {
|
|
51
|
+
const collectionName = opts?.collectionName || "_flink_leader";
|
|
52
|
+
this.leaseDurationMs = opts?.leaseDurationMs || 15000;
|
|
53
|
+
this.heartbeatIntervalMs = opts?.heartbeatIntervalMs || 5000;
|
|
54
|
+
this.collection = db.collection<LeaderRecord>(collectionName);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
get isLeader() {
|
|
58
|
+
return this._isLeader;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Start the leader election process.
|
|
63
|
+
* @param onBecameLeader Called when this instance becomes the leader
|
|
64
|
+
* @param onLostLeadership Called when this instance loses leadership
|
|
65
|
+
*/
|
|
66
|
+
async start(onBecameLeader: () => void | Promise<void>, onLostLeadership: () => void | Promise<void>) {
|
|
67
|
+
this.onBecameLeader = onBecameLeader;
|
|
68
|
+
this.onLostLeadership = onLostLeadership;
|
|
69
|
+
this.stopped = false;
|
|
70
|
+
|
|
71
|
+
// Ensure TTL index exists for cleanup
|
|
72
|
+
const ttlSeconds = Math.ceil((this.leaseDurationMs * 2) / 1000);
|
|
73
|
+
try {
|
|
74
|
+
await this.collection.createIndex({ lastHeartbeat: 1 }, { expireAfterSeconds: ttlSeconds });
|
|
75
|
+
} catch (err: any) {
|
|
76
|
+
if (err.codeName === "IndexOptionsConflict" || err.code === 85) {
|
|
77
|
+
log.debug("TTL index options changed, recreating index");
|
|
78
|
+
await this.collection.dropIndex("lastHeartbeat_1");
|
|
79
|
+
await this.collection.createIndex({ lastHeartbeat: 1 }, { expireAfterSeconds: ttlSeconds });
|
|
80
|
+
} else {
|
|
81
|
+
throw err;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
log.info(`Leader election started (instance: ${this.instanceId.substring(0, 8)})`);
|
|
86
|
+
|
|
87
|
+
// Run first election attempt immediately
|
|
88
|
+
await this.tryClaimLeadership();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Stop the leader election and release leadership if held.
|
|
93
|
+
*/
|
|
94
|
+
async stop() {
|
|
95
|
+
this.stopped = true;
|
|
96
|
+
|
|
97
|
+
if (this.timer) {
|
|
98
|
+
clearTimeout(this.timer);
|
|
99
|
+
this.timer = null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (this._isLeader) {
|
|
103
|
+
try {
|
|
104
|
+
await this.collection.deleteOne({
|
|
105
|
+
_id: LOCK_NAME as any,
|
|
106
|
+
instanceId: this.instanceId,
|
|
107
|
+
});
|
|
108
|
+
log.info("Leadership released on shutdown");
|
|
109
|
+
} catch (err) {
|
|
110
|
+
log.error(`Failed to release leadership on shutdown: ${err}`);
|
|
111
|
+
}
|
|
112
|
+
this._isLeader = false;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
private async tryClaimLeadership() {
|
|
117
|
+
if (this.stopped || this.transitioning) return;
|
|
118
|
+
|
|
119
|
+
const now = new Date();
|
|
120
|
+
const leaseExpiry = new Date(now.getTime() - this.leaseDurationMs);
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const result = await this.collection.findOneAndUpdate(
|
|
124
|
+
{
|
|
125
|
+
_id: LOCK_NAME as any,
|
|
126
|
+
$or: [
|
|
127
|
+
{ instanceId: this.instanceId },
|
|
128
|
+
{ lastHeartbeat: { $lt: leaseExpiry } },
|
|
129
|
+
],
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
$set: {
|
|
133
|
+
instanceId: this.instanceId,
|
|
134
|
+
lastHeartbeat: now,
|
|
135
|
+
},
|
|
136
|
+
$setOnInsert: {
|
|
137
|
+
claimedAt: now,
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
{ upsert: true, returnDocument: "after" }
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
const gotLock = result && (result as any).instanceId === this.instanceId;
|
|
144
|
+
|
|
145
|
+
if (gotLock && !this._isLeader) {
|
|
146
|
+
log.info(`This instance became the leader (instance: ${this.instanceId.substring(0, 8)})`);
|
|
147
|
+
this._isLeader = true;
|
|
148
|
+
this.transitioning = true;
|
|
149
|
+
try {
|
|
150
|
+
await this.onBecameLeader?.();
|
|
151
|
+
} catch (err) {
|
|
152
|
+
log.error(`Error in onBecameLeader callback: ${err}`);
|
|
153
|
+
} finally {
|
|
154
|
+
this.transitioning = false;
|
|
155
|
+
}
|
|
156
|
+
} else if (!gotLock && this._isLeader) {
|
|
157
|
+
log.warn(`This instance lost leadership (instance: ${this.instanceId.substring(0, 8)})`);
|
|
158
|
+
this._isLeader = false;
|
|
159
|
+
this.transitioning = true;
|
|
160
|
+
try {
|
|
161
|
+
await this.onLostLeadership?.();
|
|
162
|
+
} catch (err) {
|
|
163
|
+
log.error(`Error in onLostLeadership callback: ${err}`);
|
|
164
|
+
} finally {
|
|
165
|
+
this.transitioning = false;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
} catch (err: any) {
|
|
169
|
+
if (err.code === 11000) {
|
|
170
|
+
// Duplicate key - another instance claimed first
|
|
171
|
+
if (this._isLeader) {
|
|
172
|
+
log.warn(`This instance lost leadership (instance: ${this.instanceId.substring(0, 8)})`);
|
|
173
|
+
this._isLeader = false;
|
|
174
|
+
try {
|
|
175
|
+
await this.onLostLeadership?.();
|
|
176
|
+
} catch (cbErr) {
|
|
177
|
+
log.error(`Error in onLostLeadership callback: ${cbErr}`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
} else {
|
|
181
|
+
log.error(`Leader election error: ${err}`);
|
|
182
|
+
// On error, assume we lost leadership to be safe
|
|
183
|
+
if (this._isLeader) {
|
|
184
|
+
this._isLeader = false;
|
|
185
|
+
try {
|
|
186
|
+
await this.onLostLeadership?.();
|
|
187
|
+
} catch (cbErr) {
|
|
188
|
+
log.error(`Error in onLostLeadership callback: ${cbErr}`);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Schedule next attempt
|
|
195
|
+
if (!this.stopped) {
|
|
196
|
+
const nextInterval = this._isLeader
|
|
197
|
+
? this.heartbeatIntervalMs
|
|
198
|
+
: this.heartbeatIntervalMs * 2;
|
|
199
|
+
|
|
200
|
+
this.timer = setTimeout(() => this.tryClaimLeadership(), nextInterval);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -10,6 +10,8 @@ export * from "./FlinkRequestContext";
|
|
|
10
10
|
export * from "./FlinkErrors";
|
|
11
11
|
export * from "./FlinkPlugin";
|
|
12
12
|
export * from "./FlinkJob";
|
|
13
|
+
export { LeaderElection } from "./LeaderElection";
|
|
14
|
+
export type { LeaderElectionOptions } from "./LeaderElection";
|
|
13
15
|
export * from "./auth/FlinkAuthUser";
|
|
14
16
|
export * from "./auth/FlinkAuthPlugin";
|
|
15
17
|
export * from "./ai/FlinkTool";
|