@flink-app/flink 2.0.0-alpha.73 → 2.0.0-alpha.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @flink-app/flink
2
2
 
3
+ ## 2.0.0-alpha.75
4
+
5
+ ## 2.0.0-alpha.74
6
+
7
+ ### Patch Changes
8
+
9
+ - feat: add leader election for horizontally scaled job scheduling
10
+
11
+ Adds MongoDB-based leader election so that when multiple instances of a Flink app are running, only one (the leader) executes scheduled jobs. If the leader goes down, another instance takes over automatically. Includes dedicated scheduler logger and improved robustness.
12
+
13
+ - fix: add updateById method to FlinkRepo for convenient document updates by ID
14
+
3
15
  ## 2.0.0-alpha.73
4
16
 
5
17
  ### Patch Changes
@@ -12,6 +12,7 @@ import { FlinkContext } from "./FlinkContext";
12
12
  import { FlinkError } from "./FlinkErrors";
13
13
  import { FlinkRequest, HandlerFile, HttpMethod, QueryParamMetadata, RouteProps } from "./FlinkHttpHandler";
14
14
  import { FlinkJobFile } from "./FlinkJob";
15
+ import { LeaderElectionOptions } from "./LeaderElection";
15
16
  import { FlinkPlugin } from "./FlinkPlugin";
16
17
  import { FlinkRepo } from "./FlinkRepo";
17
18
  import { FlinkResponse } from "./FlinkResponse";
@@ -132,6 +133,33 @@ export interface FlinkOptions {
132
133
  * Defaults to true.
133
134
  */
134
135
  enabled?: boolean;
136
+ /**
137
+ * Enable leader election for horizontally scaled deployments.
138
+ *
139
+ * When enabled, only one instance (the leader) will run scheduled jobs.
140
+ * If the leader goes down, another instance automatically takes over.
141
+ *
142
+ * Requires a database connection (`db` option) since leader election
143
+ * state is persisted in MongoDB. If no database is configured, a warning
144
+ * will be logged and jobs will run on all instances (no leader election).
145
+ *
146
+ * Set to `true` for default settings, or pass an options object to customize.
147
+ *
148
+ * @example
149
+ * ```ts
150
+ * // Use defaults (15s lease, 5s heartbeat)
151
+ * scheduling: { leaderElection: true }
152
+ *
153
+ * // Custom settings
154
+ * scheduling: {
155
+ * leaderElection: {
156
+ * leaseDurationMs: 30000,
157
+ * heartbeatIntervalMs: 10000,
158
+ * }
159
+ * }
160
+ * ```
161
+ */
162
+ leaderElection?: boolean | LeaderElectionOptions;
135
163
  };
136
164
  /**
137
165
  * AI configuration for agents and tools
@@ -260,6 +288,8 @@ export declare class FlinkApp<C extends FlinkContext> {
260
288
  */
261
289
  private handlerRouteCache;
262
290
  scheduler?: ToadScheduler;
291
+ private allInstanceScheduler?;
292
+ private leaderElection?;
263
293
  private accessLog;
264
294
  constructor(opts: FlinkOptions);
265
295
  get ctx(): C;
@@ -346,5 +376,7 @@ export declare class FlinkApp<C extends FlinkContext> {
346
376
  private authenticate;
347
377
  getRegisteredRoutes(): string[];
348
378
  private get isSchedulingEnabled();
379
+ private get leaderElectionConfig();
380
+ private startLeaderElection;
349
381
  private getMongoConnectionOptions;
350
382
  }
@@ -63,6 +63,7 @@ var toad_scheduler_1 = require("toad-scheduler");
63
63
  var uuid_1 = require("uuid");
64
64
  var FlinkErrors_1 = require("./FlinkErrors");
65
65
  var FlinkHttpHandler_1 = require("./FlinkHttpHandler");
66
+ var LeaderElection_1 = require("./LeaderElection");
66
67
  var FlinkLog_1 = require("./FlinkLog");
67
68
  var FlinkLogFactory_1 = require("./FlinkLogFactory");
68
69
  var FlinkRequestContext_1 = require("./FlinkRequestContext");
@@ -71,6 +72,7 @@ var mock_data_generator_1 = __importDefault(require("./mock-data-generator"));
71
72
  var utils_1 = require("./utils");
72
73
  var initLog = FlinkLogFactory_1.FlinkLogFactory.createLogger("flink.init");
73
74
  var perfLog = FlinkLogFactory_1.FlinkLogFactory.createLogger("flink.perf");
75
+ var schedulerLog = FlinkLogFactory_1.FlinkLogFactory.createLogger("flink.scheduler");
74
76
  var ajv = new ajv_1.default();
75
77
  (0, ajv_formats_1.default)(ajv);
76
78
  var defaultCorsOptions = {
@@ -197,11 +199,11 @@ var FlinkApp = /** @class */ (function () {
197
199
  case 5:
198
200
  _e.sent();
199
201
  perfLog.debug("Initialize agents took ".concat(Date.now() - agentInitStartTime, "ms"));
200
- if (this.isSchedulingEnabled) {
202
+ if (this.isSchedulingEnabled && !this.leaderElectionConfig) {
201
203
  this.scheduler = new toad_scheduler_1.ToadScheduler();
202
204
  }
203
- else {
204
- initLog.info("🚫 Scheduling is disabled");
205
+ else if (!this.isSchedulingEnabled) {
206
+ schedulerLog.info("Scheduling is disabled");
205
207
  }
206
208
  if (!this.disableHttpServer) {
207
209
  this.expressApp = (0, express_1.default)();
@@ -254,14 +256,20 @@ var FlinkApp = /** @class */ (function () {
254
256
  case 13:
255
257
  _e.sent();
256
258
  perfLog.debug("Register handlers took ".concat(Date.now() - handlersStartTime, "ms"));
257
- if (!this.isSchedulingEnabled) return [3 /*break*/, 15];
259
+ if (!this.isSchedulingEnabled) return [3 /*break*/, 17];
260
+ if (!this.leaderElectionConfig) return [3 /*break*/, 15];
261
+ return [4 /*yield*/, this.startLeaderElection()];
262
+ case 14:
263
+ _e.sent();
264
+ return [3 /*break*/, 17];
265
+ case 15:
258
266
  jobsStartTime = Date.now();
259
267
  return [4 /*yield*/, this.registerAutoRegisterableJobs()];
260
- case 14:
268
+ case 16:
261
269
  _e.sent();
262
270
  perfLog.debug("Register jobs took ".concat(Date.now() - jobsStartTime, "ms"));
263
- _e.label = 15;
264
- case 15:
271
+ _e.label = 17;
272
+ case 17:
265
273
  // Register 404 with slight delay to allow all manually added routes to be added
266
274
  // TODO: Is there a better solution to force this handler to always run last?
267
275
  setTimeout(function () {
@@ -296,12 +304,24 @@ var FlinkApp = /** @class */ (function () {
296
304
  switch (_a.label) {
297
305
  case 0:
298
306
  FlinkLog_1.log.info("🛑 Stopping Flink app...");
299
- if (!this.scheduler) return [3 /*break*/, 2];
300
- return [4 /*yield*/, this.scheduler.stop()];
307
+ if (!this.leaderElection) return [3 /*break*/, 2];
308
+ return [4 /*yield*/, this.leaderElection.stop()];
301
309
  case 1:
302
310
  _a.sent();
303
311
  _a.label = 2;
304
312
  case 2:
313
+ if (!this.scheduler) return [3 /*break*/, 4];
314
+ return [4 /*yield*/, this.scheduler.stop()];
315
+ case 3:
316
+ _a.sent();
317
+ _a.label = 4;
318
+ case 4:
319
+ if (!this.allInstanceScheduler) return [3 /*break*/, 6];
320
+ return [4 /*yield*/, this.allInstanceScheduler.stop()];
321
+ case 5:
322
+ _a.sent();
323
+ _a.label = 6;
324
+ case 6:
305
325
  if (this.expressServer) {
306
326
  return [2 /*return*/, new Promise(function (resolve, reject) {
307
327
  var int = setTimeout(function () {
@@ -861,7 +881,7 @@ var FlinkApp = /** @class */ (function () {
861
881
  });
862
882
  });
863
883
  };
864
- FlinkApp.prototype.registerAutoRegisterableJobs = function () {
884
+ FlinkApp.prototype.registerAutoRegisterableJobs = function (filter) {
865
885
  return __awaiter(this, void 0, void 0, function () {
866
886
  var _loop_1, this_1, _i, autoRegisteredJobs_1, _a, jobProps, jobFn, __file;
867
887
  var _this = this;
@@ -870,30 +890,33 @@ var FlinkApp = /** @class */ (function () {
870
890
  throw new Error("Scheduler not initialized"); // should never happen
871
891
  }
872
892
  _loop_1 = function (jobProps, jobFn, __file) {
893
+ if (filter && !filter(jobProps)) {
894
+ return "continue";
895
+ }
873
896
  if (jobProps.cron && jobProps.interval) {
874
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - both cron and interval are set in ").concat(__file));
897
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - both cron and interval are set in ").concat(__file));
875
898
  return "continue";
876
899
  }
877
900
  if (jobProps.cron && jobProps.afterDelay) {
878
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - both cron and afterDelay are set in ").concat(__file));
901
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - both cron and afterDelay are set in ").concat(__file));
879
902
  return "continue";
880
903
  }
881
904
  if (jobProps.interval && jobProps.afterDelay) {
882
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - both interval and afterDelay are set in ").concat(__file));
905
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - both interval and afterDelay are set in ").concat(__file));
883
906
  return "continue";
884
907
  }
885
908
  if (this_1.scheduler.existsById(jobProps.id)) {
886
- FlinkLog_1.log.error("Job with id ".concat(jobProps.id, " is already registered, found duplicate in ").concat(__file));
909
+ schedulerLog.error("Job with id ".concat(jobProps.id, " is already registered, found duplicate in ").concat(__file));
887
910
  return "continue";
888
911
  }
889
- FlinkLog_1.log.debug("Registering job ".concat(jobProps.id, ": ").concat(JSON.stringify(jobProps), " from ").concat(__file));
912
+ schedulerLog.debug("Registering job ".concat(jobProps.id, ": ").concat(JSON.stringify(jobProps), " from ").concat(__file));
890
913
  var task = new toad_scheduler_1.AsyncTask(jobProps.id, function () { return __awaiter(_this, void 0, void 0, function () {
891
914
  return __generator(this, function (_a) {
892
915
  switch (_a.label) {
893
916
  case 0: return [4 /*yield*/, jobFn({ ctx: this.ctx })];
894
917
  case 1:
895
918
  _a.sent();
896
- FlinkLog_1.log.debug("Job ".concat(jobProps.id, " completed"));
919
+ schedulerLog.debug("Job ".concat(jobProps.id, " completed"));
897
920
  if (jobProps.afterDelay) {
898
921
  // afterDelay runs only once, so we remove the job
899
922
  this.scheduler.removeById(jobProps.id);
@@ -902,7 +925,7 @@ var FlinkApp = /** @class */ (function () {
902
925
  }
903
926
  });
904
927
  }); }, function (err) {
905
- FlinkLog_1.log.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err));
928
+ schedulerLog.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err));
906
929
  console.error(err);
907
930
  });
908
931
  if (jobProps.cron) {
@@ -937,7 +960,7 @@ var FlinkApp = /** @class */ (function () {
937
960
  return [3 /*break*/, 3];
938
961
  case 2:
939
962
  err_2 = _a.sent();
940
- FlinkLog_1.log.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err_2));
963
+ schedulerLog.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err_2));
941
964
  console.error(err_2);
942
965
  return [3 /*break*/, 3];
943
966
  case 3: return [2 /*return*/];
@@ -957,7 +980,7 @@ var FlinkApp = /** @class */ (function () {
957
980
  }
958
981
  }
959
982
  else {
960
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - no cron, interval or once set in ").concat(__file));
983
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - no cron, interval or once set in ").concat(__file));
961
984
  return "continue";
962
985
  }
963
986
  };
@@ -1253,6 +1276,78 @@ var FlinkApp = /** @class */ (function () {
1253
1276
  enumerable: false,
1254
1277
  configurable: true
1255
1278
  });
1279
+ Object.defineProperty(FlinkApp.prototype, "leaderElectionConfig", {
1280
+ get: function () {
1281
+ var _a;
1282
+ var opt = (_a = this.schedulingOptions) === null || _a === void 0 ? void 0 : _a.leaderElection;
1283
+ if (!opt)
1284
+ return undefined;
1285
+ return opt === true ? {} : opt;
1286
+ },
1287
+ enumerable: false,
1288
+ configurable: true
1289
+ });
1290
+ FlinkApp.prototype.startLeaderElection = function () {
1291
+ return __awaiter(this, void 0, void 0, function () {
1292
+ var hasAllInstanceJobs, opts;
1293
+ var _this = this;
1294
+ return __generator(this, function (_a) {
1295
+ switch (_a.label) {
1296
+ case 0:
1297
+ if (!!this.db) return [3 /*break*/, 2];
1298
+ schedulerLog.warn("Leader election is enabled but no database is configured. " +
1299
+ "Leader election requires a MongoDB connection to coordinate between instances. " +
1300
+ "Either add a database connection via the `db` option, or remove `scheduling.leaderElection` from your config. " +
1301
+ "Jobs will run on ALL instances without leader election.");
1302
+ // Fall back to running jobs on all instances
1303
+ this.scheduler = new toad_scheduler_1.ToadScheduler();
1304
+ return [4 /*yield*/, this.registerAutoRegisterableJobs()];
1305
+ case 1:
1306
+ _a.sent();
1307
+ return [2 /*return*/];
1308
+ case 2:
1309
+ hasAllInstanceJobs = exports.autoRegisteredJobs.some(function (j) { return j.Job.runOnAllInstances; });
1310
+ if (!hasAllInstanceJobs) return [3 /*break*/, 4];
1311
+ this.allInstanceScheduler = new toad_scheduler_1.ToadScheduler();
1312
+ this.scheduler = this.allInstanceScheduler;
1313
+ return [4 /*yield*/, this.registerAutoRegisterableJobs(function (job) { return !!job.runOnAllInstances; })];
1314
+ case 3:
1315
+ _a.sent();
1316
+ this.scheduler = undefined;
1317
+ _a.label = 4;
1318
+ case 4:
1319
+ opts = this.leaderElectionConfig;
1320
+ this.leaderElection = new LeaderElection_1.LeaderElection(this.db, opts);
1321
+ return [4 /*yield*/, this.leaderElection.start(
1322
+ // onBecameLeader
1323
+ function () { return __awaiter(_this, void 0, void 0, function () {
1324
+ return __generator(this, function (_a) {
1325
+ switch (_a.label) {
1326
+ case 0:
1327
+ schedulerLog.info("This instance is now the leader - starting scheduled jobs");
1328
+ this.scheduler = new toad_scheduler_1.ToadScheduler();
1329
+ return [4 /*yield*/, this.registerAutoRegisterableJobs(function (job) { return !job.runOnAllInstances; })];
1330
+ case 1:
1331
+ _a.sent();
1332
+ return [2 /*return*/];
1333
+ }
1334
+ });
1335
+ }); },
1336
+ // onLostLeadership
1337
+ function () {
1338
+ schedulerLog.info("This instance lost leadership - stopping scheduled jobs");
1339
+ if (_this.scheduler) {
1340
+ _this.scheduler.stop();
1341
+ _this.scheduler = undefined;
1342
+ }
1343
+ })];
1344
+ case 5:
1345
+ _a.sent();
1346
+ return [2 /*return*/];
1347
+ }
1348
+ });
1349
+ });
1350
+ };
1256
1351
  FlinkApp.prototype.getMongoConnectionOptions = function () {
1257
1352
  if (!this.dbOpts) {
1258
1353
  throw new Error("No db configured");
@@ -31,6 +31,16 @@ export type FlinkJobProps = {
31
31
  * retried after the next interval.
32
32
  */
33
33
  singleton?: boolean;
34
+ /**
35
+ * If true, this job will run on all instances regardless of leader election.
36
+ *
37
+ * By default, when leader election is enabled, jobs only run on the leader instance.
38
+ * Set this to true for jobs that should run on every instance, such as
39
+ * local cache cleanup or instance-specific health checks.
40
+ *
41
+ * Has no effect when leader election is not enabled.
42
+ */
43
+ runOnAllInstances?: boolean;
34
44
  };
35
45
  /**
36
46
  * Type for Flink job function. This function should be default exported from
@@ -22,6 +22,10 @@ export declare abstract class FlinkRepo<C extends FlinkContext, Model extends Do
22
22
  create<C = Omit<Model, "_id">>(model: C): Promise<C & {
23
23
  _id: string;
24
24
  }>;
25
+ updateById(id: string | ObjectId, model: PartialModel<Model>): Promise<Model | null>;
26
+ /**
27
+ * @deprecated Use `updateById` instead. This will be removed in a future major version.
28
+ */
25
29
  updateOne(id: string | ObjectId, model: PartialModel<Model>): Promise<Model | null>;
26
30
  updateMany<U = PartialModel<Model>>(query: any, model: U): Promise<number>;
27
31
  deleteById(id: string | ObjectId): Promise<number>;
@@ -33,6 +37,10 @@ export declare abstract class FlinkRepo<C extends FlinkContext, Model extends Do
33
37
  * @returns
34
38
  */
35
39
  buildId(id: string | ObjectId): ObjectId;
36
- private objectIdToString;
40
+ protected objectIdToString<T>(doc: T & {
41
+ _id?: any;
42
+ }): T & {
43
+ _id?: any;
44
+ };
37
45
  }
38
46
  export {};
@@ -139,7 +139,7 @@ var FlinkRepo = /** @class */ (function () {
139
139
  });
140
140
  });
141
141
  };
142
- FlinkRepo.prototype.updateOne = function (id, model) {
142
+ FlinkRepo.prototype.updateById = function (id, model) {
143
143
  return __awaiter(this, void 0, void 0, function () {
144
144
  var oid, _id, modelWithoutId, res;
145
145
  return __generator(this, function (_a) {
@@ -161,6 +161,16 @@ var FlinkRepo = /** @class */ (function () {
161
161
  });
162
162
  });
163
163
  };
164
+ /**
165
+ * @deprecated Use `updateById` instead. This will be removed in a future major version.
166
+ */
167
+ FlinkRepo.prototype.updateOne = function (id, model) {
168
+ return __awaiter(this, void 0, void 0, function () {
169
+ return __generator(this, function (_a) {
170
+ return [2 /*return*/, this.updateById(id, model)];
171
+ });
172
+ });
173
+ };
164
174
  FlinkRepo.prototype.updateMany = function (query, model) {
165
175
  return __awaiter(this, void 0, void 0, function () {
166
176
  var _a, _id, modelWithoutId, modifiedCount;
@@ -0,0 +1,45 @@
1
+ import { Db } from "mongodb";
2
+ export interface LeaderElectionOptions {
3
+ /**
4
+ * Duration in milliseconds before a leader's lease expires.
5
+ * If the leader fails to heartbeat within this time, another instance can take over.
6
+ * @default 15000
7
+ */
8
+ leaseDurationMs?: number;
9
+ /**
10
+ * Interval in milliseconds between heartbeats sent by the leader.
11
+ * Should be significantly less than leaseDurationMs (typically 1/3).
12
+ * @default 5000
13
+ */
14
+ heartbeatIntervalMs?: number;
15
+ /**
16
+ * Name of the MongoDB collection used for leader election.
17
+ * @default "_flink_leader"
18
+ */
19
+ collectionName?: string;
20
+ }
21
+ export declare class LeaderElection {
22
+ private instanceId;
23
+ private _isLeader;
24
+ private timer;
25
+ private collection;
26
+ private leaseDurationMs;
27
+ private heartbeatIntervalMs;
28
+ private onBecameLeader?;
29
+ private onLostLeadership?;
30
+ private stopped;
31
+ private transitioning;
32
+ constructor(db: Db, opts?: LeaderElectionOptions);
33
+ get isLeader(): boolean;
34
+ /**
35
+ * Start the leader election process.
36
+ * @param onBecameLeader Called when this instance becomes the leader
37
+ * @param onLostLeadership Called when this instance loses leadership
38
+ */
39
+ start(onBecameLeader: () => void | Promise<void>, onLostLeadership: () => void | Promise<void>): Promise<void>;
40
+ /**
41
+ * Stop the leader election and release leadership if held.
42
+ */
43
+ stop(): Promise<void>;
44
+ private tryClaimLeadership;
45
+ }