@flink-app/flink 2.0.0-alpha.72 → 2.0.0-alpha.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # @flink-app/flink
2
2
 
3
+ ## 2.0.0-alpha.74
4
+
5
+ ### Patch Changes
6
+
7
+ - feat: add leader election for horizontally scaled job scheduling
8
+
9
+ Adds MongoDB-based leader election so that when multiple instances of a Flink app are running, only one (the leader) executes scheduled jobs. If the leader goes down, another instance takes over automatically. Includes dedicated scheduler logger and improved robustness.
10
+
11
+ - fix: add updateById method to FlinkRepo for convenient document updates by ID
12
+
13
+ ## 2.0.0-alpha.73
14
+
15
+ ### Patch Changes
16
+
17
+ - fix(flink): register static routes before parameterized routes to prevent e.g. GET /jobs/by-tags being matched by GET /jobs/:id
18
+
3
19
  ## 2.0.0-alpha.72
4
20
 
5
21
  ### Patch Changes
@@ -12,6 +12,7 @@ import { FlinkContext } from "./FlinkContext";
12
12
  import { FlinkError } from "./FlinkErrors";
13
13
  import { FlinkRequest, HandlerFile, HttpMethod, QueryParamMetadata, RouteProps } from "./FlinkHttpHandler";
14
14
  import { FlinkJobFile } from "./FlinkJob";
15
+ import { LeaderElectionOptions } from "./LeaderElection";
15
16
  import { FlinkPlugin } from "./FlinkPlugin";
16
17
  import { FlinkRepo } from "./FlinkRepo";
17
18
  import { FlinkResponse } from "./FlinkResponse";
@@ -132,6 +133,33 @@ export interface FlinkOptions {
132
133
  * Defaults to true.
133
134
  */
134
135
  enabled?: boolean;
136
+ /**
137
+ * Enable leader election for horizontally scaled deployments.
138
+ *
139
+ * When enabled, only one instance (the leader) will run scheduled jobs.
140
+ * If the leader goes down, another instance automatically takes over.
141
+ *
142
+ * Requires a database connection (`db` option) since leader election
143
+ * state is persisted in MongoDB. If no database is configured, a warning
144
+ * will be logged and jobs will run on all instances (no leader election).
145
+ *
146
+ * Set to `true` for default settings, or pass an options object to customize.
147
+ *
148
+ * @example
149
+ * ```ts
150
+ * // Use defaults (15s lease, 5s heartbeat)
151
+ * scheduling: { leaderElection: true }
152
+ *
153
+ * // Custom settings
154
+ * scheduling: {
155
+ * leaderElection: {
156
+ * leaseDurationMs: 30000,
157
+ * heartbeatIntervalMs: 10000,
158
+ * }
159
+ * }
160
+ * ```
161
+ */
162
+ leaderElection?: boolean | LeaderElectionOptions;
135
163
  };
136
164
  /**
137
165
  * AI configuration for agents and tools
@@ -260,6 +288,8 @@ export declare class FlinkApp<C extends FlinkContext> {
260
288
  */
261
289
  private handlerRouteCache;
262
290
  scheduler?: ToadScheduler;
291
+ private allInstanceScheduler?;
292
+ private leaderElection?;
263
293
  private accessLog;
264
294
  constructor(opts: FlinkOptions);
265
295
  get ctx(): C;
@@ -346,5 +376,7 @@ export declare class FlinkApp<C extends FlinkContext> {
346
376
  private authenticate;
347
377
  getRegisteredRoutes(): string[];
348
378
  private get isSchedulingEnabled();
379
+ private get leaderElectionConfig();
380
+ private startLeaderElection;
349
381
  private getMongoConnectionOptions;
350
382
  }
@@ -63,6 +63,7 @@ var toad_scheduler_1 = require("toad-scheduler");
63
63
  var uuid_1 = require("uuid");
64
64
  var FlinkErrors_1 = require("./FlinkErrors");
65
65
  var FlinkHttpHandler_1 = require("./FlinkHttpHandler");
66
+ var LeaderElection_1 = require("./LeaderElection");
66
67
  var FlinkLog_1 = require("./FlinkLog");
67
68
  var FlinkLogFactory_1 = require("./FlinkLogFactory");
68
69
  var FlinkRequestContext_1 = require("./FlinkRequestContext");
@@ -71,6 +72,7 @@ var mock_data_generator_1 = __importDefault(require("./mock-data-generator"));
71
72
  var utils_1 = require("./utils");
72
73
  var initLog = FlinkLogFactory_1.FlinkLogFactory.createLogger("flink.init");
73
74
  var perfLog = FlinkLogFactory_1.FlinkLogFactory.createLogger("flink.perf");
75
+ var schedulerLog = FlinkLogFactory_1.FlinkLogFactory.createLogger("flink.scheduler");
74
76
  var ajv = new ajv_1.default();
75
77
  (0, ajv_formats_1.default)(ajv);
76
78
  var defaultCorsOptions = {
@@ -197,11 +199,11 @@ var FlinkApp = /** @class */ (function () {
197
199
  case 5:
198
200
  _e.sent();
199
201
  perfLog.debug("Initialize agents took ".concat(Date.now() - agentInitStartTime, "ms"));
200
- if (this.isSchedulingEnabled) {
202
+ if (this.isSchedulingEnabled && !this.leaderElectionConfig) {
201
203
  this.scheduler = new toad_scheduler_1.ToadScheduler();
202
204
  }
203
- else {
204
- initLog.info("🚫 Scheduling is disabled");
205
+ else if (!this.isSchedulingEnabled) {
206
+ schedulerLog.info("Scheduling is disabled");
205
207
  }
206
208
  if (!this.disableHttpServer) {
207
209
  this.expressApp = (0, express_1.default)();
@@ -254,14 +256,20 @@ var FlinkApp = /** @class */ (function () {
254
256
  case 13:
255
257
  _e.sent();
256
258
  perfLog.debug("Register handlers took ".concat(Date.now() - handlersStartTime, "ms"));
257
- if (!this.isSchedulingEnabled) return [3 /*break*/, 15];
259
+ if (!this.isSchedulingEnabled) return [3 /*break*/, 17];
260
+ if (!this.leaderElectionConfig) return [3 /*break*/, 15];
261
+ return [4 /*yield*/, this.startLeaderElection()];
262
+ case 14:
263
+ _e.sent();
264
+ return [3 /*break*/, 17];
265
+ case 15:
258
266
  jobsStartTime = Date.now();
259
267
  return [4 /*yield*/, this.registerAutoRegisterableJobs()];
260
- case 14:
268
+ case 16:
261
269
  _e.sent();
262
270
  perfLog.debug("Register jobs took ".concat(Date.now() - jobsStartTime, "ms"));
263
- _e.label = 15;
264
- case 15:
271
+ _e.label = 17;
272
+ case 17:
265
273
  // Register 404 with slight delay to allow all manually added routes to be added
266
274
  // TODO: Is there a better solution to force this handler to always run last?
267
275
  setTimeout(function () {
@@ -296,12 +304,24 @@ var FlinkApp = /** @class */ (function () {
296
304
  switch (_a.label) {
297
305
  case 0:
298
306
  FlinkLog_1.log.info("🛑 Stopping Flink app...");
299
- if (!this.scheduler) return [3 /*break*/, 2];
300
- return [4 /*yield*/, this.scheduler.stop()];
307
+ if (!this.leaderElection) return [3 /*break*/, 2];
308
+ return [4 /*yield*/, this.leaderElection.stop()];
301
309
  case 1:
302
310
  _a.sent();
303
311
  _a.label = 2;
304
312
  case 2:
313
+ if (!this.scheduler) return [3 /*break*/, 4];
314
+ return [4 /*yield*/, this.scheduler.stop()];
315
+ case 3:
316
+ _a.sent();
317
+ _a.label = 4;
318
+ case 4:
319
+ if (!this.allInstanceScheduler) return [3 /*break*/, 6];
320
+ return [4 /*yield*/, this.allInstanceScheduler.stop()];
321
+ case 5:
322
+ _a.sent();
323
+ _a.label = 6;
324
+ case 6:
305
325
  if (this.expressServer) {
306
326
  return [2 /*return*/, new Promise(function (resolve, reject) {
307
327
  var int = setTimeout(function () {
@@ -797,7 +817,17 @@ var FlinkApp = /** @class */ (function () {
797
817
  schemaManifest = this.loadSchemaManifest();
798
818
  schemaCount = schemaManifest.version === "2.0" ? Object.keys(schemaManifest.schemas || {}).length : Object.keys(schemaManifest.definitions || {}).length;
799
819
  FlinkLog_1.log.debug("Registering ".concat(schemaCount, " schemas with AJV (manifest version: ").concat(schemaManifest.version || "1.0", ")"));
800
- for (_i = 0, _a = exports.autoRegisteredHandlers.sort(function (a, b) { var _a, _b; return (((_a = a.handler.Route) === null || _a === void 0 ? void 0 : _a.order) || 0) - (((_b = b.handler.Route) === null || _b === void 0 ? void 0 : _b.order) || 0); }); _i < _a.length; _i++) {
820
+ for (_i = 0, _a = exports.autoRegisteredHandlers.sort(function (a, b) {
821
+ var _a, _b, _c, _d, _e, _f;
822
+ var orderDiff = (((_a = a.handler.Route) === null || _a === void 0 ? void 0 : _a.order) || 0) - (((_b = b.handler.Route) === null || _b === void 0 ? void 0 : _b.order) || 0);
823
+ if (orderDiff !== 0)
824
+ return orderDiff;
825
+ // Static segments must be registered before parameterized ones to avoid
826
+ // Express matching e.g. GET /jobs/by-tags with the /jobs/:id route.
827
+ var aHasParam = ((_d = (_c = a.handler.Route) === null || _c === void 0 ? void 0 : _c.path) === null || _d === void 0 ? void 0 : _d.includes("/:")) ? 1 : 0;
828
+ var bHasParam = ((_f = (_e = b.handler.Route) === null || _e === void 0 ? void 0 : _e.path) === null || _f === void 0 ? void 0 : _f.includes("/:")) ? 1 : 0;
829
+ return aHasParam - bHasParam;
830
+ }); _i < _a.length; _i++) {
801
831
  _b = _a[_i], handler = _b.handler, assumedHttpMethod = _b.assumedHttpMethod, __file = _b.__file;
802
832
  if (!handler.Route) {
803
833
  FlinkLog_1.log.error("Missing Props in handler ".concat(__file));
@@ -851,7 +881,7 @@ var FlinkApp = /** @class */ (function () {
851
881
  });
852
882
  });
853
883
  };
854
- FlinkApp.prototype.registerAutoRegisterableJobs = function () {
884
+ FlinkApp.prototype.registerAutoRegisterableJobs = function (filter) {
855
885
  return __awaiter(this, void 0, void 0, function () {
856
886
  var _loop_1, this_1, _i, autoRegisteredJobs_1, _a, jobProps, jobFn, __file;
857
887
  var _this = this;
@@ -860,30 +890,33 @@ var FlinkApp = /** @class */ (function () {
860
890
  throw new Error("Scheduler not initialized"); // should never happen
861
891
  }
862
892
  _loop_1 = function (jobProps, jobFn, __file) {
893
+ if (filter && !filter(jobProps)) {
894
+ return "continue";
895
+ }
863
896
  if (jobProps.cron && jobProps.interval) {
864
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - both cron and interval are set in ").concat(__file));
897
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - both cron and interval are set in ").concat(__file));
865
898
  return "continue";
866
899
  }
867
900
  if (jobProps.cron && jobProps.afterDelay) {
868
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - both cron and afterDelay are set in ").concat(__file));
901
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - both cron and afterDelay are set in ").concat(__file));
869
902
  return "continue";
870
903
  }
871
904
  if (jobProps.interval && jobProps.afterDelay) {
872
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - both interval and afterDelay are set in ").concat(__file));
905
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - both interval and afterDelay are set in ").concat(__file));
873
906
  return "continue";
874
907
  }
875
908
  if (this_1.scheduler.existsById(jobProps.id)) {
876
- FlinkLog_1.log.error("Job with id ".concat(jobProps.id, " is already registered, found duplicate in ").concat(__file));
909
+ schedulerLog.error("Job with id ".concat(jobProps.id, " is already registered, found duplicate in ").concat(__file));
877
910
  return "continue";
878
911
  }
879
- FlinkLog_1.log.debug("Registering job ".concat(jobProps.id, ": ").concat(JSON.stringify(jobProps), " from ").concat(__file));
912
+ schedulerLog.debug("Registering job ".concat(jobProps.id, ": ").concat(JSON.stringify(jobProps), " from ").concat(__file));
880
913
  var task = new toad_scheduler_1.AsyncTask(jobProps.id, function () { return __awaiter(_this, void 0, void 0, function () {
881
914
  return __generator(this, function (_a) {
882
915
  switch (_a.label) {
883
916
  case 0: return [4 /*yield*/, jobFn({ ctx: this.ctx })];
884
917
  case 1:
885
918
  _a.sent();
886
- FlinkLog_1.log.debug("Job ".concat(jobProps.id, " completed"));
919
+ schedulerLog.debug("Job ".concat(jobProps.id, " completed"));
887
920
  if (jobProps.afterDelay) {
888
921
  // afterDelay runs only once, so we remove the job
889
922
  this.scheduler.removeById(jobProps.id);
@@ -892,7 +925,7 @@ var FlinkApp = /** @class */ (function () {
892
925
  }
893
926
  });
894
927
  }); }, function (err) {
895
- FlinkLog_1.log.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err));
928
+ schedulerLog.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err));
896
929
  console.error(err);
897
930
  });
898
931
  if (jobProps.cron) {
@@ -927,7 +960,7 @@ var FlinkApp = /** @class */ (function () {
927
960
  return [3 /*break*/, 3];
928
961
  case 2:
929
962
  err_2 = _a.sent();
930
- FlinkLog_1.log.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err_2));
963
+ schedulerLog.error("Job ".concat(jobProps.id, " threw unhandled exception ").concat(err_2));
931
964
  console.error(err_2);
932
965
  return [3 /*break*/, 3];
933
966
  case 3: return [2 /*return*/];
@@ -947,7 +980,7 @@ var FlinkApp = /** @class */ (function () {
947
980
  }
948
981
  }
949
982
  else {
950
- FlinkLog_1.log.error("Cannot register job ".concat(jobProps.id, " - no cron, interval or once set in ").concat(__file));
983
+ schedulerLog.error("Cannot register job ".concat(jobProps.id, " - no cron, interval or once set in ").concat(__file));
951
984
  return "continue";
952
985
  }
953
986
  };
@@ -1243,6 +1276,78 @@ var FlinkApp = /** @class */ (function () {
1243
1276
  enumerable: false,
1244
1277
  configurable: true
1245
1278
  });
1279
+ Object.defineProperty(FlinkApp.prototype, "leaderElectionConfig", {
1280
+ get: function () {
1281
+ var _a;
1282
+ var opt = (_a = this.schedulingOptions) === null || _a === void 0 ? void 0 : _a.leaderElection;
1283
+ if (!opt)
1284
+ return undefined;
1285
+ return opt === true ? {} : opt;
1286
+ },
1287
+ enumerable: false,
1288
+ configurable: true
1289
+ });
1290
+ FlinkApp.prototype.startLeaderElection = function () {
1291
+ return __awaiter(this, void 0, void 0, function () {
1292
+ var hasAllInstanceJobs, opts;
1293
+ var _this = this;
1294
+ return __generator(this, function (_a) {
1295
+ switch (_a.label) {
1296
+ case 0:
1297
+ if (!!this.db) return [3 /*break*/, 2];
1298
+ schedulerLog.warn("Leader election is enabled but no database is configured. " +
1299
+ "Leader election requires a MongoDB connection to coordinate between instances. " +
1300
+ "Either add a database connection via the `db` option, or remove `scheduling.leaderElection` from your config. " +
1301
+ "Jobs will run on ALL instances without leader election.");
1302
+ // Fall back to running jobs on all instances
1303
+ this.scheduler = new toad_scheduler_1.ToadScheduler();
1304
+ return [4 /*yield*/, this.registerAutoRegisterableJobs()];
1305
+ case 1:
1306
+ _a.sent();
1307
+ return [2 /*return*/];
1308
+ case 2:
1309
+ hasAllInstanceJobs = exports.autoRegisteredJobs.some(function (j) { return j.Job.runOnAllInstances; });
1310
+ if (!hasAllInstanceJobs) return [3 /*break*/, 4];
1311
+ this.allInstanceScheduler = new toad_scheduler_1.ToadScheduler();
1312
+ this.scheduler = this.allInstanceScheduler;
1313
+ return [4 /*yield*/, this.registerAutoRegisterableJobs(function (job) { return !!job.runOnAllInstances; })];
1314
+ case 3:
1315
+ _a.sent();
1316
+ this.scheduler = undefined;
1317
+ _a.label = 4;
1318
+ case 4:
1319
+ opts = this.leaderElectionConfig;
1320
+ this.leaderElection = new LeaderElection_1.LeaderElection(this.db, opts);
1321
+ return [4 /*yield*/, this.leaderElection.start(
1322
+ // onBecameLeader
1323
+ function () { return __awaiter(_this, void 0, void 0, function () {
1324
+ return __generator(this, function (_a) {
1325
+ switch (_a.label) {
1326
+ case 0:
1327
+ schedulerLog.info("This instance is now the leader - starting scheduled jobs");
1328
+ this.scheduler = new toad_scheduler_1.ToadScheduler();
1329
+ return [4 /*yield*/, this.registerAutoRegisterableJobs(function (job) { return !job.runOnAllInstances; })];
1330
+ case 1:
1331
+ _a.sent();
1332
+ return [2 /*return*/];
1333
+ }
1334
+ });
1335
+ }); },
1336
+ // onLostLeadership
1337
+ function () {
1338
+ schedulerLog.info("This instance lost leadership - stopping scheduled jobs");
1339
+ if (_this.scheduler) {
1340
+ _this.scheduler.stop();
1341
+ _this.scheduler = undefined;
1342
+ }
1343
+ })];
1344
+ case 5:
1345
+ _a.sent();
1346
+ return [2 /*return*/];
1347
+ }
1348
+ });
1349
+ });
1350
+ };
1246
1351
  FlinkApp.prototype.getMongoConnectionOptions = function () {
1247
1352
  if (!this.dbOpts) {
1248
1353
  throw new Error("No db configured");
@@ -31,6 +31,16 @@ export type FlinkJobProps = {
31
31
  * retried after the next interval.
32
32
  */
33
33
  singleton?: boolean;
34
+ /**
35
+ * If true, this job will run on all instances regardless of leader election.
36
+ *
37
+ * By default, when leader election is enabled, jobs only run on the leader instance.
38
+ * Set this to true for jobs that should run on every instance, such as
39
+ * local cache cleanup or instance-specific health checks.
40
+ *
41
+ * Has no effect when leader election is not enabled.
42
+ */
43
+ runOnAllInstances?: boolean;
34
44
  };
35
45
  /**
36
46
  * Type for Flink job function. This function should be default exported from
@@ -22,6 +22,10 @@ export declare abstract class FlinkRepo<C extends FlinkContext, Model extends Do
22
22
  create<C = Omit<Model, "_id">>(model: C): Promise<C & {
23
23
  _id: string;
24
24
  }>;
25
+ updateById(id: string | ObjectId, model: PartialModel<Model>): Promise<Model | null>;
26
+ /**
27
+ * @deprecated Use `updateById` instead. This will be removed in a future major version.
28
+ */
25
29
  updateOne(id: string | ObjectId, model: PartialModel<Model>): Promise<Model | null>;
26
30
  updateMany<U = PartialModel<Model>>(query: any, model: U): Promise<number>;
27
31
  deleteById(id: string | ObjectId): Promise<number>;
@@ -139,7 +139,7 @@ var FlinkRepo = /** @class */ (function () {
139
139
  });
140
140
  });
141
141
  };
142
- FlinkRepo.prototype.updateOne = function (id, model) {
142
+ FlinkRepo.prototype.updateById = function (id, model) {
143
143
  return __awaiter(this, void 0, void 0, function () {
144
144
  var oid, _id, modelWithoutId, res;
145
145
  return __generator(this, function (_a) {
@@ -161,6 +161,16 @@ var FlinkRepo = /** @class */ (function () {
161
161
  });
162
162
  });
163
163
  };
164
+ /**
165
+ * @deprecated Use `updateById` instead. This will be removed in a future major version.
166
+ */
167
+ FlinkRepo.prototype.updateOne = function (id, model) {
168
+ return __awaiter(this, void 0, void 0, function () {
169
+ return __generator(this, function (_a) {
170
+ return [2 /*return*/, this.updateById(id, model)];
171
+ });
172
+ });
173
+ };
164
174
  FlinkRepo.prototype.updateMany = function (query, model) {
165
175
  return __awaiter(this, void 0, void 0, function () {
166
176
  var _a, _id, modelWithoutId, modifiedCount;
@@ -0,0 +1,45 @@
1
+ import { Db } from "mongodb";
2
+ export interface LeaderElectionOptions {
3
+ /**
4
+ * Duration in milliseconds before a leader's lease expires.
5
+ * If the leader fails to heartbeat within this time, another instance can take over.
6
+ * @default 15000
7
+ */
8
+ leaseDurationMs?: number;
9
+ /**
10
+ * Interval in milliseconds between heartbeats sent by the leader.
11
+ * Should be significantly less than leaseDurationMs (typically 1/3).
12
+ * @default 5000
13
+ */
14
+ heartbeatIntervalMs?: number;
15
+ /**
16
+ * Name of the MongoDB collection used for leader election.
17
+ * @default "_flink_leader"
18
+ */
19
+ collectionName?: string;
20
+ }
21
+ export declare class LeaderElection {
22
+ private instanceId;
23
+ private _isLeader;
24
+ private timer;
25
+ private collection;
26
+ private leaseDurationMs;
27
+ private heartbeatIntervalMs;
28
+ private onBecameLeader?;
29
+ private onLostLeadership?;
30
+ private stopped;
31
+ private transitioning;
32
+ constructor(db: Db, opts?: LeaderElectionOptions);
33
+ get isLeader(): boolean;
34
+ /**
35
+ * Start the leader election process.
36
+ * @param onBecameLeader Called when this instance becomes the leader
37
+ * @param onLostLeadership Called when this instance loses leadership
38
+ */
39
+ start(onBecameLeader: () => void | Promise<void>, onLostLeadership: () => void | Promise<void>): Promise<void>;
40
+ /**
41
+ * Stop the leader election and release leadership if held.
42
+ */
43
+ stop(): Promise<void>;
44
+ private tryClaimLeadership;
45
+ }