@openfn/ws-worker 1.12.1 → 1.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # ws-worker
2
2
 
3
+ ## 1.13.1
4
+
5
+ ### Patch Changes
6
+
7
+ - b83d13c: Add DEFAULT_MESSAGE_TIMEOUT_SECONDS env var and tweaked some error handling around lightning messaging
8
+ - 0bd4adf: Include pod name in logs when claiming
9
+
10
+ ## 1.13.0
11
+
12
+ ### Minor Changes
13
+
14
+ - ce5022a: Added sentry notifications for server and websocket errors
15
+
16
+ ### Patch Changes
17
+
18
+ - 0a176aa: Ignore empty log lines (don't send them to lightning)
19
+ - Updated dependencies [0a176aa]
20
+ - @openfn/logger@1.0.5
21
+ - @openfn/engine-multi@1.6.2
22
+ - @openfn/lexicon@1.2.0
23
+ - @openfn/runtime@1.6.4
24
+
3
25
  ## 1.12.1
4
26
 
5
27
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -53,6 +53,7 @@ type WorkerRunOptions = ExecuteOptions & {
53
53
  };
54
54
 
55
55
  type Context = {
56
+ id: string;
56
57
  channel: Channel;
57
58
  state: RunState;
58
59
  logger: Logger;
@@ -73,7 +74,10 @@ type ServerOptions = {
73
74
  min?: number;
74
75
  max?: number;
75
76
  };
77
+ sentryDsn?: string;
78
+ sentryEnv?: string;
76
79
  socketTimeoutSeconds?: number;
80
+ messageTimeoutSeconds?: number;
77
81
  payloadLimitMb?: number;
78
82
  collectionsVersion?: string;
79
83
  collectionsUrl?: string;
package/dist/index.js CHANGED
@@ -2,6 +2,7 @@
2
2
  import { EventEmitter as EventEmitter2 } from "node:events";
3
3
  import { promisify } from "node:util";
4
4
  import { exec as _exec } from "node:child_process";
5
+ import * as Sentry5 from "@sentry/node";
5
6
  import Koa from "koa";
6
7
  import bodyParser from "koa-bodyparser";
7
8
  import koaLogger from "koa-logger";
@@ -123,9 +124,11 @@ var verifyToken = async (token, publicKey) => {
123
124
  return true;
124
125
  }
125
126
  };
127
+ var { DEPLOYED_POD_NAME } = process.env;
126
128
  var claim = (app, logger = mockLogger, options = {}) => {
127
129
  return new Promise((resolve, reject) => {
128
130
  const { maxWorkers = 5 } = options;
131
+ const podName = DEPLOYED_POD_NAME ? `[${DEPLOYED_POD_NAME}] ` : "";
129
132
  const activeWorkers = Object.keys(app.workflows).length;
130
133
  if (activeWorkers >= maxWorkers) {
131
134
  app.workloop?.stop(`server at capacity (${activeWorkers}/${maxWorkers})`);
@@ -137,10 +140,10 @@ var claim = (app, logger = mockLogger, options = {}) => {
137
140
  }
138
141
  logger.debug(`requesting run (capacity ${activeWorkers}/${maxWorkers})`);
139
142
  const start = Date.now();
140
- app.queueChannel.push(CLAIM, { demand: 1 }).receive("ok", ({ runs }) => {
143
+ app.queueChannel.push(CLAIM, { demand: 1, pod_name: DEPLOYED_POD_NAME }).receive("ok", ({ runs }) => {
141
144
  const duration = Date.now() - start;
142
145
  logger.debug(
143
- `claimed ${runs.length} runs in ${duration}ms (${runs.length ? runs.map((r) => r.id).join(",") : "-"})`
146
+ `${podName}claimed ${runs.length} runs in ${duration}ms (${runs.length ? runs.map((r) => r.id).join(",") : "-"})`
144
147
  );
145
148
  if (!runs?.length) {
146
149
  return reject(new Error("No runs returned"));
@@ -160,7 +163,7 @@ var claim = (app, logger = mockLogger, options = {}) => {
160
163
  } else {
161
164
  logger.debug("skipping run token validation for", run.id);
162
165
  }
163
- logger.debug("starting run", run.id);
166
+ logger.debug(`${podName} starting run ${run.id}`);
164
167
  app.execute(run);
165
168
  resolve();
166
169
  });
@@ -210,6 +213,9 @@ var startWorkloop = (app, logger, minBackoff, maxBackoff, maxWorkers) => {
210
213
  };
211
214
  var workloop_default = startWorkloop;
212
215
 
216
+ // src/api/execute.ts
217
+ import * as Sentry2 from "@sentry/node";
218
+
213
219
  // src/util/convert-lightning-plan.ts
214
220
  import crypto2 from "node:crypto";
215
221
  import path from "node:path";
@@ -387,26 +393,6 @@ var convert_lightning_plan_default = (run, options = {}) => {
387
393
  };
388
394
  };
389
395
 
390
- // src/util/get-with-reply.ts
391
- var get_with_reply_default = (channel, event, payload) => new Promise((resolve, reject) => {
392
- channel.push(event, payload).receive("ok", (evt) => {
393
- resolve(evt);
394
- }).receive("error", (e) => {
395
- reject(e);
396
- }).receive("timeout", (e) => {
397
- reject(e);
398
- });
399
- });
400
-
401
- // src/util/stringify.ts
402
- import stringify from "fast-safe-stringify";
403
- var stringify_default = (obj) => stringify(obj, (_key, value) => {
404
- if (value instanceof Uint8Array) {
405
- return Array.from(value);
406
- }
407
- return value;
408
- });
409
-
410
396
  // src/util/create-run-state.ts
411
397
  var create_run_state_default = (plan, input) => {
412
398
  const state = {
@@ -440,6 +426,67 @@ var create_run_state_default = (plan, input) => {
440
426
  return state;
441
427
  };
442
428
 
429
+ // src/util/send-event.ts
430
+ import * as Sentry from "@sentry/node";
431
+
432
+ // src/errors.ts
433
+ var LightningSocketError = class extends Error {
434
+ constructor(event, message) {
435
+ super(`[${event}] ${message}`);
436
+ this.name = "LightningSocketError";
437
+ this.event = "";
438
+ this.rejectMessage = "";
439
+ this.event = event;
440
+ this.rejectMessage = message;
441
+ }
442
+ };
443
+ var LightningTimeoutError = class extends Error {
444
+ constructor(event) {
445
+ super(`[${event}] timeout`);
446
+ this.name = "LightningTimeoutError";
447
+ }
448
+ };
449
+
450
+ // src/util/send-event.ts
451
+ var sendEvent = (context, event, payload) => {
452
+ const { channel, logger, id: runId = "<unknown run>" } = context;
453
+ return new Promise((resolve, reject) => {
454
+ const report = (error) => {
455
+ logger.error(`${runId} :: ${event} :: ERR: ${error.message || error}`);
456
+ const context2 = {
457
+ run_id: runId,
458
+ event
459
+ };
460
+ const extras = {};
461
+ if (error.rejectMessage) {
462
+ extras.rejection_reason = error.rejectMessage;
463
+ }
464
+ Sentry.captureException(error, (scope) => {
465
+ scope.setContext("run", context2);
466
+ scope.setExtras(extras);
467
+ return scope;
468
+ });
469
+ error.reportedToSentry = true;
470
+ reject(error);
471
+ };
472
+ channel.push(event, payload).receive("error", (message) => {
473
+ report(new LightningSocketError(event, message));
474
+ }).receive("timeout", () => {
475
+ report(new LightningTimeoutError(event));
476
+ }).receive("ok", resolve);
477
+ });
478
+ };
479
+ var send_event_default = sendEvent;
480
+
481
+ // src/util/stringify.ts
482
+ import stringify from "fast-safe-stringify";
483
+ var stringify_default = (obj) => stringify(obj, (_key, value) => {
484
+ if (value instanceof Uint8Array) {
485
+ return Array.from(value);
486
+ }
487
+ return value;
488
+ });
489
+
443
490
  // src/util/throttle.ts
444
491
  var createThrottler = () => {
445
492
  const q = [];
@@ -525,7 +572,7 @@ function getVersion() {
525
572
 
526
573
  // src/events/run-start.ts
527
574
  async function onRunStart(context, event) {
528
- const { channel, state, options = {} } = context;
575
+ const { state, options = {} } = context;
529
576
  const time = (timestamp() - BigInt(1e7)).toString();
530
577
  const versionLogContext = {
531
578
  ...context,
@@ -538,7 +585,7 @@ async function onRunStart(context, event) {
538
585
  worker: await getVersion(),
539
586
  ...event.versions
540
587
  };
541
- await sendEvent(channel, RUN_START, {
588
+ await sendEvent(context, RUN_START, {
542
589
  versions,
543
590
  /// use the engine time in run start
544
591
  timestamp: timeInMicroseconds(event.time)
@@ -600,7 +647,7 @@ var calculateRunExitReason = (state) => {
600
647
 
601
648
  // src/events/step-complete.ts
602
649
  async function onStepComplete(context, event, error) {
603
- const { channel, state, options } = context;
650
+ const { state, options } = context;
604
651
  const dataclipId = crypto3.randomUUID();
605
652
  const step_id = state.activeStep;
606
653
  const job_id = state.activeJob;
@@ -645,13 +692,13 @@ async function onStepComplete(context, event, error) {
645
692
  const reason = calculateJobExitReason(job_id, event.state, error);
646
693
  state.reasons[job_id] = reason;
647
694
  Object.assign(evt, reason);
648
- return sendEvent(channel, STEP_COMPLETE, evt);
695
+ return sendEvent(context, STEP_COMPLETE, evt);
649
696
  }
650
697
 
651
698
  // src/events/step-start.ts
652
699
  import crypto4 from "node:crypto";
653
700
  async function onStepStart(context, event) {
654
- const { channel, state } = context;
701
+ const { state } = context;
655
702
  state.activeStep = crypto4.randomUUID();
656
703
  state.activeJob = event.jobId;
657
704
  const input_dataclip_id = state.inputDataclips[event.jobId];
@@ -663,7 +710,7 @@ async function onStepStart(context, event) {
663
710
  if (!state.withheldDataclips[input_dataclip_id]) {
664
711
  evt.input_dataclip_id = input_dataclip_id;
665
712
  }
666
- await sendEvent(channel, STEP_START, evt);
713
+ await sendEvent(context, STEP_START, evt);
667
714
  }
668
715
 
669
716
  // src/util/log-final-reason.ts
@@ -685,12 +732,12 @@ ${reason.error_type}: ${reason.error_message || "unknown"}`;
685
732
 
686
733
  // src/events/run-complete.ts
687
734
  async function onWorkflowComplete(context, event) {
688
- const { state, channel, onFinish, logger } = context;
735
+ const { state, onFinish, logger } = context;
689
736
  const result = state.dataclips[state.lastDataclipId];
690
737
  const reason = calculateRunExitReason(state);
691
738
  await log_final_reason_default(context, reason);
692
739
  try {
693
- await sendEvent(channel, RUN_COMPLETE, {
740
+ await sendEvent(context, RUN_COMPLETE, {
694
741
  final_dataclip_id: state.lastDataclipId,
695
742
  timestamp: timeInMicroseconds(event.time),
696
743
  ...reason
@@ -706,14 +753,14 @@ async function onWorkflowComplete(context, event) {
706
753
 
707
754
  // src/events/run-error.ts
708
755
  async function onRunError(context, event) {
709
- const { state, channel, logger, onFinish } = context;
756
+ const { state, logger, onFinish } = context;
710
757
  try {
711
758
  const reason = calculateJobExitReason("", { data: {} }, event);
712
759
  if (state.activeJob) {
713
760
  await onJobError(context, { error: event });
714
761
  }
715
762
  await log_final_reason_default(context, reason);
716
- await sendEvent(channel, RUN_COMPLETE, {
763
+ await sendEvent(context, RUN_COMPLETE, {
717
764
  final_dataclip_id: state.lastDataclipId,
718
765
  ...reason
719
766
  });
@@ -739,6 +786,7 @@ function execute(channel, engine, logger, plan, input, options = {}, onFinish =
739
786
  logger.info("executing ", plan.id);
740
787
  const state = create_run_state_default(plan, input);
741
788
  const context = {
789
+ id: plan.id,
742
790
  channel,
743
791
  state,
744
792
  logger,
@@ -746,76 +794,104 @@ function execute(channel, engine, logger, plan, input, options = {}, onFinish =
746
794
  options,
747
795
  onFinish
748
796
  };
749
- const throttle = throttle_default();
750
- const addEvent = (eventName, handler) => {
751
- const wrappedFn = async (event) => {
752
- const lightningEvent = eventMap[eventName] ?? eventName;
753
- try {
754
- await handler(context, event);
755
- logger.info(`${plan.id} :: ${lightningEvent} :: OK`);
756
- } catch (e) {
757
- logger.error(
758
- `${plan.id} :: ${lightningEvent} :: ERR: ${e.message || e.toString()}`
759
- );
760
- logger.error(e);
797
+ Sentry2.withIsolationScope(async () => {
798
+ Sentry2.addBreadcrumb({
799
+ category: "run",
800
+ message: "Executing run: loading metadata",
801
+ level: "info",
802
+ data: {
803
+ runId: plan.id
761
804
  }
805
+ });
806
+ const throttle = throttle_default();
807
+ const addEvent = (eventName, handler) => {
808
+ const wrappedFn = async (event) => {
809
+ if (eventName !== "workflow-log") {
810
+ Sentry2.addBreadcrumb({
811
+ category: "event",
812
+ message: eventName,
813
+ level: "info"
814
+ });
815
+ }
816
+ const lightningEvent = eventMap[eventName] ?? eventName;
817
+ try {
818
+ await handler(context, event);
819
+ logger.info(`${plan.id} :: ${lightningEvent} :: OK`);
820
+ } catch (e) {
821
+ if (!e.reportedToSentry) {
822
+ Sentry2.captureException(e);
823
+ logger.error(e);
824
+ }
825
+ }
826
+ };
827
+ return {
828
+ [eventName]: wrappedFn
829
+ };
762
830
  };
763
- return {
764
- [eventName]: wrappedFn
831
+ const listeners = Object.assign(
832
+ {},
833
+ addEvent("workflow-start", throttle(onRunStart)),
834
+ addEvent("job-start", throttle(onStepStart)),
835
+ addEvent("job-complete", throttle(onStepComplete)),
836
+ addEvent("job-error", throttle(onJobError)),
837
+ addEvent("workflow-log", throttle(onJobLog)),
838
+ // This will also resolve the promise
839
+ addEvent("workflow-complete", throttle(onWorkflowComplete)),
840
+ addEvent("workflow-error", throttle(onRunError))
841
+ // TODO send autoinstall logs
842
+ );
843
+ engine.listen(plan.id, listeners);
844
+ const resolvers = {
845
+ credential: (id) => loadCredential(context, id)
846
+ // TODO not supported right now
847
+ // dataclip: (id: string) => loadDataclip(channel, id),
765
848
  };
766
- };
767
- const listeners = Object.assign(
768
- {},
769
- addEvent("workflow-start", throttle(onRunStart)),
770
- addEvent("job-start", throttle(onStepStart)),
771
- addEvent("job-complete", throttle(onStepComplete)),
772
- addEvent("job-error", throttle(onJobError)),
773
- addEvent("workflow-log", throttle(onJobLog)),
774
- // This will also resolve the promise
775
- addEvent("workflow-complete", throttle(onWorkflowComplete)),
776
- addEvent("workflow-error", throttle(onRunError))
777
- // TODO send autoinstall logs
778
- );
779
- engine.listen(plan.id, listeners);
780
- const resolvers = {
781
- credential: (id) => loadCredential(channel, id)
782
- // TODO not supported right now
783
- // dataclip: (id: string) => loadDataclip(channel, id),
784
- };
785
- setTimeout(async () => {
786
- let loadedInput = input;
787
- if (typeof input === "string") {
788
- logger.debug("loading dataclip", input);
849
+ setTimeout(async () => {
850
+ let loadedInput = input;
851
+ if (typeof input === "string") {
852
+ logger.debug("loading dataclip", input);
853
+ try {
854
+ loadedInput = await loadDataclip(context, input);
855
+ logger.success("dataclip loaded");
856
+ } catch (e) {
857
+ return onRunError(context, {
858
+ workflowId: plan.id,
859
+ message: `Failed to load dataclip ${input}${e.message ? `: ${e.message}` : ""}`,
860
+ type: "DataClipError",
861
+ severity: "exception"
862
+ });
863
+ }
864
+ }
789
865
  try {
790
- loadedInput = await loadDataclip(channel, input);
791
- logger.success("dataclip loaded");
866
+ Sentry2.addBreadcrumb({
867
+ category: "run",
868
+ message: "run metadata loaded: starting run",
869
+ level: "info",
870
+ data: {
871
+ runId: plan.id
872
+ }
873
+ });
874
+ engine.execute(plan, loadedInput, { resolvers, ...options });
792
875
  } catch (e) {
793
- return onRunError(context, {
876
+ Sentry2.addBreadcrumb({
877
+ category: "run",
878
+ message: "exception in run",
879
+ level: "info",
880
+ data: {
881
+ runId: plan.id
882
+ }
883
+ });
884
+ onRunError(context, {
794
885
  workflowId: plan.id,
795
- message: `Failed to load dataclip ${input}${e.message ? `: ${e.message}` : ""}`,
796
- type: "DataClipError",
797
- severity: "exception"
886
+ message: e.message,
887
+ type: e.type,
888
+ severity: e.severity
798
889
  });
799
890
  }
800
- }
801
- try {
802
- engine.execute(plan, loadedInput, { resolvers, ...options });
803
- } catch (e) {
804
- onRunError(context, {
805
- workflowId: plan.id,
806
- message: e.message,
807
- type: e.type,
808
- severity: e.severity
809
- });
810
- }
891
+ });
811
892
  });
812
893
  return context;
813
894
  }
814
- var sendEvent = (channel, event, payload) => new Promise((resolve, reject) => {
815
- channel.push(event, payload).receive("error", reject).receive("timeout", () => {
816
- reject(new Error("timeout"));
817
- }).receive("ok", resolve);
818
- });
819
895
  function onJobError(context, event) {
820
896
  const { state, error, jobId } = event;
821
897
  if (state?.errors?.[jobId]?.message === error.message) {
@@ -824,7 +900,8 @@ function onJobError(context, event) {
824
900
  return onStepComplete(context, event, event.error);
825
901
  }
826
902
  }
827
- function onJobLog({ channel, state, options }, event) {
903
+ function onJobLog(context, event) {
904
+ const { state, options } = context;
828
905
  let message = event.message;
829
906
  if (event.redacted) {
830
907
  message = [
@@ -844,17 +921,17 @@ function onJobLog({ channel, state, options }, event) {
844
921
  if (state.activeStep) {
845
922
  log.step_id = state.activeStep;
846
923
  }
847
- return sendEvent(channel, RUN_LOG, log);
924
+ return sendEvent(context, RUN_LOG, log);
848
925
  }
849
- async function loadDataclip(channel, stateId) {
850
- const result = await get_with_reply_default(channel, GET_DATACLIP, {
926
+ async function loadDataclip(context, stateId) {
927
+ const result = await sendEvent(context, GET_DATACLIP, {
851
928
  id: stateId
852
929
  });
853
930
  const str = enc.decode(new Uint8Array(result));
854
931
  return JSON.parse(str);
855
932
  }
856
- async function loadCredential(channel, credentialId) {
857
- return get_with_reply_default(channel, GET_CREDENTIAL, { id: credentialId });
933
+ async function loadCredential(context, credentialId) {
934
+ return sendEvent(context, GET_CREDENTIAL, { id: credentialId });
858
935
  }
859
936
 
860
937
  // src/middleware/healthcheck.ts
@@ -863,24 +940,33 @@ var healthcheck_default = (ctx) => {
863
940
  };
864
941
 
865
942
  // src/channels/run.ts
866
- var joinRunChannel = (socket, token, runId, logger) => {
943
+ import * as Sentry3 from "@sentry/node";
944
+ var joinRunChannel = (socket, token, runId, logger, timeout = 30) => {
867
945
  return new Promise((resolve, reject) => {
868
946
  let didReceiveOk = false;
869
947
  const channelName = `run:${runId}`;
870
- logger.debug("connecting to ", channelName);
948
+ logger.info(`JOINING ${channelName}`);
949
+ logger.debug(`connecting to ${channelName} with timeout ${timeout}s`);
871
950
  const channel = socket.channel(channelName, { token });
872
- channel.join().receive("ok", async (e) => {
951
+ channel.join(timeout * 1e3).receive("ok", async (e) => {
873
952
  if (!didReceiveOk) {
874
953
  didReceiveOk = true;
875
954
  logger.success(`connected to ${channelName}`, e);
876
- const run = await get_with_reply_default(channel, GET_PLAN);
955
+ const run = await send_event_default(
956
+ { channel, logger, id: runId },
957
+ GET_PLAN
958
+ );
877
959
  resolve({ channel, run });
878
960
  }
879
961
  }).receive("error", (err) => {
962
+ Sentry3.captureException(err);
880
963
  logger.error(`error connecting to ${channelName}`, err);
964
+ channel?.leave();
881
965
  reject(err);
882
966
  }).receive("timeout", (err) => {
967
+ Sentry3.captureException(err);
883
968
  logger.error(`Timeout for ${channelName}`, err);
969
+ channel?.leave();
884
970
  reject(err);
885
971
  });
886
972
  channel.onClose(() => {
@@ -896,6 +982,7 @@ var run_default = joinRunChannel;
896
982
 
897
983
  // src/channels/worker-queue.ts
898
984
  import EventEmitter from "node:events";
985
+ import * as Sentry4 from "@sentry/node";
899
986
  import { Socket as PhxSocket } from "phoenix";
900
987
  import { WebSocket } from "ws";
901
988
  import { API_VERSION } from "@openfn/lexicon/lightning";
@@ -924,7 +1011,17 @@ var worker_token_default = generateWorkerToken;
924
1011
  // src/channels/worker-queue.ts
925
1012
  var connectToWorkerQueue = (endpoint, serverId, secret, timeout = 10, logger, SocketConstructor = PhxSocket) => {
926
1013
  const events = new EventEmitter();
1014
+ Sentry4.addBreadcrumb({
1015
+ category: "lifecycle",
1016
+ message: "Connecting to worker queue",
1017
+ level: "info"
1018
+ });
927
1019
  worker_token_default(secret, serverId, logger).then(async (token) => {
1020
+ Sentry4.addBreadcrumb({
1021
+ category: "lifecycle",
1022
+ message: "Worker token generated",
1023
+ level: "info"
1024
+ });
928
1025
  const params = {
929
1026
  token,
930
1027
  api_version: API_VERSION,
@@ -933,11 +1030,19 @@ var connectToWorkerQueue = (endpoint, serverId, secret, timeout = 10, logger, So
933
1030
  const socket = new SocketConstructor(endpoint, {
934
1031
  params,
935
1032
  transport: WebSocket,
936
- timeout: timeout * 1e3
1033
+ timeout: timeout * 1e3,
1034
+ reconnectAfterMs: (tries) => Math.max(tries * 1e3)
937
1035
  });
938
1036
  let didOpen = false;
1037
+ let shouldReportConnectionError = true;
939
1038
  socket.onOpen(() => {
1039
+ Sentry4.addBreadcrumb({
1040
+ category: "lifecycle",
1041
+ message: "Web socket connected",
1042
+ level: "info"
1043
+ });
940
1044
  didOpen = true;
1045
+ shouldReportConnectionError = true;
941
1046
  const channel = socket.channel("worker:queue");
942
1047
  channel.onMessage = (ev, load) => {
943
1048
  events.emit("message", ev, load);
@@ -957,6 +1062,16 @@ var connectToWorkerQueue = (endpoint, serverId, secret, timeout = 10, logger, So
957
1062
  events.emit("disconnect");
958
1063
  });
959
1064
  socket.onError((e) => {
1065
+ Sentry4.addBreadcrumb({
1066
+ category: "lifecycle",
1067
+ message: "Error in web socket connection",
1068
+ level: "info"
1069
+ });
1070
+ if (shouldReportConnectionError) {
1071
+ logger.debug("Reporting connection error to sentry");
1072
+ shouldReportConnectionError = false;
1073
+ Sentry4.captureException(e);
1074
+ }
960
1075
  if (!didOpen) {
961
1076
  events.emit("error", e.message);
962
1077
  didOpen = false;
@@ -1061,6 +1176,13 @@ function createServer(engine, options = {}) {
1061
1176
  const router = new Router();
1062
1177
  app.events = new EventEmitter2();
1063
1178
  app.engine = engine;
1179
+ if (options.sentryDsn) {
1180
+ Sentry5.init({
1181
+ environment: options.sentryEnv,
1182
+ dsn: options.sentryDsn
1183
+ });
1184
+ Sentry5.setupKoaErrorHandler(app);
1185
+ }
1064
1186
  app.use(bodyParser());
1065
1187
  app.use(
1066
1188
  koaLogger((str, _args) => {
@@ -1099,7 +1221,8 @@ function createServer(engine, options = {}) {
1099
1221
  app.socket,
1100
1222
  token,
1101
1223
  id,
1102
- logger
1224
+ logger,
1225
+ app.options.messageTimeoutSeconds
1103
1226
  );
1104
1227
  const { plan, options: options2, input } = convert_lightning_plan_default(run, {
1105
1228
  collectionsVersion: app.options.collectionsVersion,
@@ -1136,6 +1259,8 @@ function createServer(engine, options = {}) {
1136
1259
  );
1137
1260
  app.workflows[id] = context;
1138
1261
  } catch (e) {
1262
+ delete app.workflows[id];
1263
+ app.resumeWorkloop();
1139
1264
  logger.error(`Unexpected error executing ${id}`);
1140
1265
  logger.error(e);
1141
1266
  }
package/dist/start.js CHANGED
@@ -142,6 +142,7 @@ var runtime_engine_default = createMock;
142
142
  import { EventEmitter as EventEmitter3 } from "node:events";
143
143
  import { promisify } from "node:util";
144
144
  import { exec as _exec } from "node:child_process";
145
+ import * as Sentry5 from "@sentry/node";
145
146
  import Koa from "koa";
146
147
  import bodyParser from "koa-bodyparser";
147
148
  import koaLogger from "koa-logger";
@@ -263,9 +264,11 @@ var verifyToken = async (token, publicKey) => {
263
264
  return true;
264
265
  }
265
266
  };
267
+ var { DEPLOYED_POD_NAME } = process.env;
266
268
  var claim = (app, logger2 = mockLogger, options = {}) => {
267
269
  return new Promise((resolve5, reject) => {
268
270
  const { maxWorkers = 5 } = options;
271
+ const podName = DEPLOYED_POD_NAME ? `[${DEPLOYED_POD_NAME}] ` : "";
269
272
  const activeWorkers = Object.keys(app.workflows).length;
270
273
  if (activeWorkers >= maxWorkers) {
271
274
  app.workloop?.stop(`server at capacity (${activeWorkers}/${maxWorkers})`);
@@ -277,10 +280,10 @@ var claim = (app, logger2 = mockLogger, options = {}) => {
277
280
  }
278
281
  logger2.debug(`requesting run (capacity ${activeWorkers}/${maxWorkers})`);
279
282
  const start = Date.now();
280
- app.queueChannel.push(CLAIM, { demand: 1 }).receive("ok", ({ runs }) => {
283
+ app.queueChannel.push(CLAIM, { demand: 1, pod_name: DEPLOYED_POD_NAME }).receive("ok", ({ runs }) => {
281
284
  const duration = Date.now() - start;
282
285
  logger2.debug(
283
- `claimed ${runs.length} runs in ${duration}ms (${runs.length ? runs.map((r) => r.id).join(",") : "-"})`
286
+ `${podName}claimed ${runs.length} runs in ${duration}ms (${runs.length ? runs.map((r) => r.id).join(",") : "-"})`
284
287
  );
285
288
  if (!runs?.length) {
286
289
  return reject(new Error("No runs returned"));
@@ -300,7 +303,7 @@ var claim = (app, logger2 = mockLogger, options = {}) => {
300
303
  } else {
301
304
  logger2.debug("skipping run token validation for", run2.id);
302
305
  }
303
- logger2.debug("starting run", run2.id);
306
+ logger2.debug(`${podName} starting run ${run2.id}`);
304
307
  app.execute(run2);
305
308
  resolve5();
306
309
  });
@@ -350,6 +353,9 @@ var startWorkloop = (app, logger2, minBackoff2, maxBackoff2, maxWorkers) => {
350
353
  };
351
354
  var workloop_default = startWorkloop;
352
355
 
356
+ // src/api/execute.ts
357
+ import * as Sentry2 from "@sentry/node";
358
+
353
359
  // src/util/convert-lightning-plan.ts
354
360
  import crypto3 from "node:crypto";
355
361
  import path from "node:path";
@@ -527,26 +533,6 @@ var convert_lightning_plan_default = (run2, options = {}) => {
527
533
  };
528
534
  };
529
535
 
530
- // src/util/get-with-reply.ts
531
- var get_with_reply_default = (channel, event, payload) => new Promise((resolve5, reject) => {
532
- channel.push(event, payload).receive("ok", (evt) => {
533
- resolve5(evt);
534
- }).receive("error", (e) => {
535
- reject(e);
536
- }).receive("timeout", (e) => {
537
- reject(e);
538
- });
539
- });
540
-
541
- // src/util/stringify.ts
542
- import stringify from "fast-safe-stringify";
543
- var stringify_default = (obj) => stringify(obj, (_key, value) => {
544
- if (value instanceof Uint8Array) {
545
- return Array.from(value);
546
- }
547
- return value;
548
- });
549
-
550
536
  // src/util/create-run-state.ts
551
537
  var create_run_state_default = (plan, input) => {
552
538
  const state = {
@@ -580,6 +566,67 @@ var create_run_state_default = (plan, input) => {
580
566
  return state;
581
567
  };
582
568
 
569
+ // src/util/send-event.ts
570
+ import * as Sentry from "@sentry/node";
571
+
572
+ // src/errors.ts
573
+ var LightningSocketError = class extends Error {
574
+ constructor(event, message) {
575
+ super(`[${event}] ${message}`);
576
+ this.name = "LightningSocketError";
577
+ this.event = "";
578
+ this.rejectMessage = "";
579
+ this.event = event;
580
+ this.rejectMessage = message;
581
+ }
582
+ };
583
+ var LightningTimeoutError = class extends Error {
584
+ constructor(event) {
585
+ super(`[${event}] timeout`);
586
+ this.name = "LightningTimeoutError";
587
+ }
588
+ };
589
+
590
+ // src/util/send-event.ts
591
+ var sendEvent = (context, event, payload) => {
592
+ const { channel, logger: logger2, id: runId = "<unknown run>" } = context;
593
+ return new Promise((resolve5, reject) => {
594
+ const report = (error) => {
595
+ logger2.error(`${runId} :: ${event} :: ERR: ${error.message || error}`);
596
+ const context2 = {
597
+ run_id: runId,
598
+ event
599
+ };
600
+ const extras = {};
601
+ if (error.rejectMessage) {
602
+ extras.rejection_reason = error.rejectMessage;
603
+ }
604
+ Sentry.captureException(error, (scope) => {
605
+ scope.setContext("run", context2);
606
+ scope.setExtras(extras);
607
+ return scope;
608
+ });
609
+ error.reportedToSentry = true;
610
+ reject(error);
611
+ };
612
+ channel.push(event, payload).receive("error", (message) => {
613
+ report(new LightningSocketError(event, message));
614
+ }).receive("timeout", () => {
615
+ report(new LightningTimeoutError(event));
616
+ }).receive("ok", resolve5);
617
+ });
618
+ };
619
+ var send_event_default = sendEvent;
620
+
621
+ // src/util/stringify.ts
622
+ import stringify from "fast-safe-stringify";
623
+ var stringify_default = (obj) => stringify(obj, (_key, value) => {
624
+ if (value instanceof Uint8Array) {
625
+ return Array.from(value);
626
+ }
627
+ return value;
628
+ });
629
+
583
630
  // src/util/throttle.ts
584
631
  var createThrottler = () => {
585
632
  const q = [];
@@ -665,7 +712,7 @@ function getVersion() {
665
712
 
666
713
  // src/events/run-start.ts
667
714
  async function onRunStart(context, event) {
668
- const { channel, state, options = {} } = context;
715
+ const { state, options = {} } = context;
669
716
  const time = (timestamp() - BigInt(1e7)).toString();
670
717
  const versionLogContext = {
671
718
  ...context,
@@ -678,7 +725,7 @@ async function onRunStart(context, event) {
678
725
  worker: await getVersion(),
679
726
  ...event.versions
680
727
  };
681
- await sendEvent(channel, RUN_START, {
728
+ await sendEvent(context, RUN_START, {
682
729
  versions,
683
730
  /// use the engine time in run start
684
731
  timestamp: timeInMicroseconds(event.time)
@@ -740,7 +787,7 @@ var calculateRunExitReason = (state) => {
740
787
 
741
788
  // src/events/step-complete.ts
742
789
  async function onStepComplete(context, event, error) {
743
- const { channel, state, options } = context;
790
+ const { state, options } = context;
744
791
  const dataclipId = crypto4.randomUUID();
745
792
  const step_id = state.activeStep;
746
793
  const job_id = state.activeJob;
@@ -785,13 +832,13 @@ async function onStepComplete(context, event, error) {
785
832
  const reason = calculateJobExitReason(job_id, event.state, error);
786
833
  state.reasons[job_id] = reason;
787
834
  Object.assign(evt, reason);
788
- return sendEvent(channel, STEP_COMPLETE, evt);
835
+ return sendEvent(context, STEP_COMPLETE, evt);
789
836
  }
790
837
 
791
838
  // src/events/step-start.ts
792
839
  import crypto5 from "node:crypto";
793
840
  async function onStepStart(context, event) {
794
- const { channel, state } = context;
841
+ const { state } = context;
795
842
  state.activeStep = crypto5.randomUUID();
796
843
  state.activeJob = event.jobId;
797
844
  const input_dataclip_id = state.inputDataclips[event.jobId];
@@ -803,7 +850,7 @@ async function onStepStart(context, event) {
803
850
  if (!state.withheldDataclips[input_dataclip_id]) {
804
851
  evt.input_dataclip_id = input_dataclip_id;
805
852
  }
806
- await sendEvent(channel, STEP_START, evt);
853
+ await sendEvent(context, STEP_START, evt);
807
854
  }
808
855
 
809
856
  // src/util/log-final-reason.ts
@@ -825,12 +872,12 @@ ${reason.error_type}: ${reason.error_message || "unknown"}`;
825
872
 
826
873
  // src/events/run-complete.ts
827
874
  async function onWorkflowComplete(context, event) {
828
- const { state, channel, onFinish, logger: logger2 } = context;
875
+ const { state, onFinish, logger: logger2 } = context;
829
876
  const result = state.dataclips[state.lastDataclipId];
830
877
  const reason = calculateRunExitReason(state);
831
878
  await log_final_reason_default(context, reason);
832
879
  try {
833
- await sendEvent(channel, RUN_COMPLETE, {
880
+ await sendEvent(context, RUN_COMPLETE, {
834
881
  final_dataclip_id: state.lastDataclipId,
835
882
  timestamp: timeInMicroseconds(event.time),
836
883
  ...reason
@@ -846,14 +893,14 @@ async function onWorkflowComplete(context, event) {
846
893
 
847
894
  // src/events/run-error.ts
848
895
  async function onRunError(context, event) {
849
- const { state, channel, logger: logger2, onFinish } = context;
896
+ const { state, logger: logger2, onFinish } = context;
850
897
  try {
851
898
  const reason = calculateJobExitReason("", { data: {} }, event);
852
899
  if (state.activeJob) {
853
900
  await onJobError(context, { error: event });
854
901
  }
855
902
  await log_final_reason_default(context, reason);
856
- await sendEvent(channel, RUN_COMPLETE, {
903
+ await sendEvent(context, RUN_COMPLETE, {
857
904
  final_dataclip_id: state.lastDataclipId,
858
905
  ...reason
859
906
  });
@@ -879,6 +926,7 @@ function execute(channel, engine, logger2, plan, input, options = {}, onFinish =
879
926
  logger2.info("executing ", plan.id);
880
927
  const state = create_run_state_default(plan, input);
881
928
  const context = {
929
+ id: plan.id,
882
930
  channel,
883
931
  state,
884
932
  logger: logger2,
@@ -886,76 +934,104 @@ function execute(channel, engine, logger2, plan, input, options = {}, onFinish =
886
934
  options,
887
935
  onFinish
888
936
  };
889
- const throttle = throttle_default();
890
- const addEvent = (eventName, handler) => {
891
- const wrappedFn = async (event) => {
892
- const lightningEvent = eventMap[eventName] ?? eventName;
893
- try {
894
- await handler(context, event);
895
- logger2.info(`${plan.id} :: ${lightningEvent} :: OK`);
896
- } catch (e) {
897
- logger2.error(
898
- `${plan.id} :: ${lightningEvent} :: ERR: ${e.message || e.toString()}`
899
- );
900
- logger2.error(e);
937
+ Sentry2.withIsolationScope(async () => {
938
+ Sentry2.addBreadcrumb({
939
+ category: "run",
940
+ message: "Executing run: loading metadata",
941
+ level: "info",
942
+ data: {
943
+ runId: plan.id
901
944
  }
945
+ });
946
+ const throttle = throttle_default();
947
+ const addEvent = (eventName, handler) => {
948
+ const wrappedFn = async (event) => {
949
+ if (eventName !== "workflow-log") {
950
+ Sentry2.addBreadcrumb({
951
+ category: "event",
952
+ message: eventName,
953
+ level: "info"
954
+ });
955
+ }
956
+ const lightningEvent = eventMap[eventName] ?? eventName;
957
+ try {
958
+ await handler(context, event);
959
+ logger2.info(`${plan.id} :: ${lightningEvent} :: OK`);
960
+ } catch (e) {
961
+ if (!e.reportedToSentry) {
962
+ Sentry2.captureException(e);
963
+ logger2.error(e);
964
+ }
965
+ }
966
+ };
967
+ return {
968
+ [eventName]: wrappedFn
969
+ };
902
970
  };
903
- return {
904
- [eventName]: wrappedFn
971
+ const listeners = Object.assign(
972
+ {},
973
+ addEvent("workflow-start", throttle(onRunStart)),
974
+ addEvent("job-start", throttle(onStepStart)),
975
+ addEvent("job-complete", throttle(onStepComplete)),
976
+ addEvent("job-error", throttle(onJobError)),
977
+ addEvent("workflow-log", throttle(onJobLog)),
978
+ // This will also resolve the promise
979
+ addEvent("workflow-complete", throttle(onWorkflowComplete)),
980
+ addEvent("workflow-error", throttle(onRunError))
981
+ // TODO send autoinstall logs
982
+ );
983
+ engine.listen(plan.id, listeners);
984
+ const resolvers = {
985
+ credential: (id) => loadCredential(context, id)
986
+ // TODO not supported right now
987
+ // dataclip: (id: string) => loadDataclip(channel, id),
905
988
  };
906
- };
907
- const listeners = Object.assign(
908
- {},
909
- addEvent("workflow-start", throttle(onRunStart)),
910
- addEvent("job-start", throttle(onStepStart)),
911
- addEvent("job-complete", throttle(onStepComplete)),
912
- addEvent("job-error", throttle(onJobError)),
913
- addEvent("workflow-log", throttle(onJobLog)),
914
- // This will also resolve the promise
915
- addEvent("workflow-complete", throttle(onWorkflowComplete)),
916
- addEvent("workflow-error", throttle(onRunError))
917
- // TODO send autoinstall logs
918
- );
919
- engine.listen(plan.id, listeners);
920
- const resolvers = {
921
- credential: (id) => loadCredential(channel, id)
922
- // TODO not supported right now
923
- // dataclip: (id: string) => loadDataclip(channel, id),
924
- };
925
- setTimeout(async () => {
926
- let loadedInput = input;
927
- if (typeof input === "string") {
928
- logger2.debug("loading dataclip", input);
989
+ setTimeout(async () => {
990
+ let loadedInput = input;
991
+ if (typeof input === "string") {
992
+ logger2.debug("loading dataclip", input);
993
+ try {
994
+ loadedInput = await loadDataclip(context, input);
995
+ logger2.success("dataclip loaded");
996
+ } catch (e) {
997
+ return onRunError(context, {
998
+ workflowId: plan.id,
999
+ message: `Failed to load dataclip ${input}${e.message ? `: ${e.message}` : ""}`,
1000
+ type: "DataClipError",
1001
+ severity: "exception"
1002
+ });
1003
+ }
1004
+ }
929
1005
  try {
930
- loadedInput = await loadDataclip(channel, input);
931
- logger2.success("dataclip loaded");
1006
+ Sentry2.addBreadcrumb({
1007
+ category: "run",
1008
+ message: "run metadata loaded: starting run",
1009
+ level: "info",
1010
+ data: {
1011
+ runId: plan.id
1012
+ }
1013
+ });
1014
+ engine.execute(plan, loadedInput, { resolvers, ...options });
932
1015
  } catch (e) {
933
- return onRunError(context, {
1016
+ Sentry2.addBreadcrumb({
1017
+ category: "run",
1018
+ message: "exception in run",
1019
+ level: "info",
1020
+ data: {
1021
+ runId: plan.id
1022
+ }
1023
+ });
1024
+ onRunError(context, {
934
1025
  workflowId: plan.id,
935
- message: `Failed to load dataclip ${input}${e.message ? `: ${e.message}` : ""}`,
936
- type: "DataClipError",
937
- severity: "exception"
1026
+ message: e.message,
1027
+ type: e.type,
1028
+ severity: e.severity
938
1029
  });
939
1030
  }
940
- }
941
- try {
942
- engine.execute(plan, loadedInput, { resolvers, ...options });
943
- } catch (e) {
944
- onRunError(context, {
945
- workflowId: plan.id,
946
- message: e.message,
947
- type: e.type,
948
- severity: e.severity
949
- });
950
- }
1031
+ });
951
1032
  });
952
1033
  return context;
953
1034
  }
954
- var sendEvent = (channel, event, payload) => new Promise((resolve5, reject) => {
955
- channel.push(event, payload).receive("error", reject).receive("timeout", () => {
956
- reject(new Error("timeout"));
957
- }).receive("ok", resolve5);
958
- });
959
1035
  function onJobError(context, event) {
960
1036
  const { state, error, jobId } = event;
961
1037
  if (state?.errors?.[jobId]?.message === error.message) {
@@ -964,7 +1040,8 @@ function onJobError(context, event) {
964
1040
  return onStepComplete(context, event, event.error);
965
1041
  }
966
1042
  }
967
- function onJobLog({ channel, state, options }, event) {
1043
+ function onJobLog(context, event) {
1044
+ const { state, options } = context;
968
1045
  let message = event.message;
969
1046
  if (event.redacted) {
970
1047
  message = [
@@ -984,17 +1061,17 @@ function onJobLog({ channel, state, options }, event) {
984
1061
  if (state.activeStep) {
985
1062
  log.step_id = state.activeStep;
986
1063
  }
987
- return sendEvent(channel, RUN_LOG, log);
1064
+ return sendEvent(context, RUN_LOG, log);
988
1065
  }
989
- async function loadDataclip(channel, stateId) {
990
- const result = await get_with_reply_default(channel, GET_DATACLIP, {
1066
+ async function loadDataclip(context, stateId) {
1067
+ const result = await sendEvent(context, GET_DATACLIP, {
991
1068
  id: stateId
992
1069
  });
993
1070
  const str = enc.decode(new Uint8Array(result));
994
1071
  return JSON.parse(str);
995
1072
  }
996
- async function loadCredential(channel, credentialId) {
997
- return get_with_reply_default(channel, GET_CREDENTIAL, { id: credentialId });
1073
+ async function loadCredential(context, credentialId) {
1074
+ return sendEvent(context, GET_CREDENTIAL, { id: credentialId });
998
1075
  }
999
1076
 
1000
1077
  // src/middleware/healthcheck.ts
@@ -1003,24 +1080,33 @@ var healthcheck_default = (ctx) => {
1003
1080
  };
1004
1081
 
1005
1082
  // src/channels/run.ts
1006
- var joinRunChannel = (socket, token, runId, logger2) => {
1083
+ import * as Sentry3 from "@sentry/node";
1084
+ var joinRunChannel = (socket, token, runId, logger2, timeout = 30) => {
1007
1085
  return new Promise((resolve5, reject) => {
1008
1086
  let didReceiveOk = false;
1009
1087
  const channelName = `run:${runId}`;
1010
- logger2.debug("connecting to ", channelName);
1088
+ logger2.info(`JOINING ${channelName}`);
1089
+ logger2.debug(`connecting to ${channelName} with timeout ${timeout}s`);
1011
1090
  const channel = socket.channel(channelName, { token });
1012
- channel.join().receive("ok", async (e) => {
1091
+ channel.join(timeout * 1e3).receive("ok", async (e) => {
1013
1092
  if (!didReceiveOk) {
1014
1093
  didReceiveOk = true;
1015
1094
  logger2.success(`connected to ${channelName}`, e);
1016
- const run2 = await get_with_reply_default(channel, GET_PLAN);
1095
+ const run2 = await send_event_default(
1096
+ { channel, logger: logger2, id: runId },
1097
+ GET_PLAN
1098
+ );
1017
1099
  resolve5({ channel, run: run2 });
1018
1100
  }
1019
1101
  }).receive("error", (err) => {
1102
+ Sentry3.captureException(err);
1020
1103
  logger2.error(`error connecting to ${channelName}`, err);
1104
+ channel?.leave();
1021
1105
  reject(err);
1022
1106
  }).receive("timeout", (err) => {
1107
+ Sentry3.captureException(err);
1023
1108
  logger2.error(`Timeout for ${channelName}`, err);
1109
+ channel?.leave();
1024
1110
  reject(err);
1025
1111
  });
1026
1112
  channel.onClose(() => {
@@ -1036,6 +1122,7 @@ var run_default = joinRunChannel;
1036
1122
 
1037
1123
  // src/channels/worker-queue.ts
1038
1124
  import EventEmitter2 from "node:events";
1125
+ import * as Sentry4 from "@sentry/node";
1039
1126
  import { Socket as PhxSocket } from "phoenix";
1040
1127
  import { WebSocket } from "ws";
1041
1128
  import { API_VERSION } from "@openfn/lexicon/lightning";
@@ -1064,7 +1151,17 @@ var worker_token_default = generateWorkerToken;
1064
1151
  // src/channels/worker-queue.ts
1065
1152
  var connectToWorkerQueue = (endpoint, serverId, secret, timeout = 10, logger2, SocketConstructor = PhxSocket) => {
1066
1153
  const events = new EventEmitter2();
1154
+ Sentry4.addBreadcrumb({
1155
+ category: "lifecycle",
1156
+ message: "Connecting to worker queue",
1157
+ level: "info"
1158
+ });
1067
1159
  worker_token_default(secret, serverId, logger2).then(async (token) => {
1160
+ Sentry4.addBreadcrumb({
1161
+ category: "lifecycle",
1162
+ message: "Worker token generated",
1163
+ level: "info"
1164
+ });
1068
1165
  const params = {
1069
1166
  token,
1070
1167
  api_version: API_VERSION,
@@ -1073,11 +1170,19 @@ var connectToWorkerQueue = (endpoint, serverId, secret, timeout = 10, logger2, S
1073
1170
  const socket = new SocketConstructor(endpoint, {
1074
1171
  params,
1075
1172
  transport: WebSocket,
1076
- timeout: timeout * 1e3
1173
+ timeout: timeout * 1e3,
1174
+ reconnectAfterMs: (tries) => Math.max(tries * 1e3)
1077
1175
  });
1078
1176
  let didOpen = false;
1177
+ let shouldReportConnectionError = true;
1079
1178
  socket.onOpen(() => {
1179
+ Sentry4.addBreadcrumb({
1180
+ category: "lifecycle",
1181
+ message: "Web socket connected",
1182
+ level: "info"
1183
+ });
1080
1184
  didOpen = true;
1185
+ shouldReportConnectionError = true;
1081
1186
  const channel = socket.channel("worker:queue");
1082
1187
  channel.onMessage = (ev, load) => {
1083
1188
  events.emit("message", ev, load);
@@ -1097,6 +1202,16 @@ var connectToWorkerQueue = (endpoint, serverId, secret, timeout = 10, logger2, S
1097
1202
  events.emit("disconnect");
1098
1203
  });
1099
1204
  socket.onError((e) => {
1205
+ Sentry4.addBreadcrumb({
1206
+ category: "lifecycle",
1207
+ message: "Error in web socket connection",
1208
+ level: "info"
1209
+ });
1210
+ if (shouldReportConnectionError) {
1211
+ logger2.debug("Reporting connection error to sentry");
1212
+ shouldReportConnectionError = false;
1213
+ Sentry4.captureException(e);
1214
+ }
1100
1215
  if (!didOpen) {
1101
1216
  events.emit("error", e.message);
1102
1217
  didOpen = false;
@@ -1201,6 +1316,13 @@ function createServer(engine, options = {}) {
1201
1316
  const router = new Router();
1202
1317
  app.events = new EventEmitter3();
1203
1318
  app.engine = engine;
1319
+ if (options.sentryDsn) {
1320
+ Sentry5.init({
1321
+ environment: options.sentryEnv,
1322
+ dsn: options.sentryDsn
1323
+ });
1324
+ Sentry5.setupKoaErrorHandler(app);
1325
+ }
1204
1326
  app.use(bodyParser());
1205
1327
  app.use(
1206
1328
  koaLogger((str, _args) => {
@@ -1239,7 +1361,8 @@ function createServer(engine, options = {}) {
1239
1361
  app.socket,
1240
1362
  token,
1241
1363
  id,
1242
- logger2
1364
+ logger2,
1365
+ app.options.messageTimeoutSeconds
1243
1366
  );
1244
1367
  const { plan, options: options2, input } = convert_lightning_plan_default(run2, {
1245
1368
  collectionsVersion: app.options.collectionsVersion,
@@ -1276,6 +1399,8 @@ function createServer(engine, options = {}) {
1276
1399
  );
1277
1400
  app.workflows[id] = context;
1278
1401
  } catch (e) {
1402
+ delete app.workflows[id];
1403
+ app.resumeWorkloop();
1279
1404
  logger2.error(`Unexpected error executing ${id}`);
1280
1405
  logger2.error(e);
1281
1406
  }
@@ -6185,6 +6310,7 @@ var yargs_default = Yargs;
6185
6310
  var DEFAULT_PORT2 = 2222;
6186
6311
  var DEFAULT_WORKER_CAPACITY = 5;
6187
6312
  var DEFAULT_SOCKET_TIMEOUT_SECONDS = 10;
6313
+ var DEFAULT_MESSAGE_TIMEOUT_SECONDS = 30;
6188
6314
  function setArg(argValue, envValue, defaultValue) {
6189
6315
  if (Array.isArray(defaultValue) && !argValue && typeof envValue === "string") {
6190
6316
  return envValue.split(",");
@@ -6196,22 +6322,25 @@ function setArg(argValue, envValue, defaultValue) {
6196
6322
  }
6197
6323
  function parseArgs(argv) {
6198
6324
  const {
6325
+ OPENFN_ADAPTORS_REPO,
6199
6326
  WORKER_BACKOFF,
6200
6327
  WORKER_CAPACITY,
6201
- WORKER_COLLECTIONS_VERSION,
6202
6328
  WORKER_COLLECTIONS_URL,
6329
+ WORKER_COLLECTIONS_VERSION,
6203
6330
  WORKER_LIGHTNING_PUBLIC_KEY,
6204
6331
  WORKER_LIGHTNING_SERVICE_URL,
6205
6332
  WORKER_LOG_LEVEL,
6206
6333
  WORKER_MAX_PAYLOAD_MB,
6207
6334
  WORKER_MAX_RUN_DURATION_SECONDS,
6208
6335
  WORKER_MAX_RUN_MEMORY_MB,
6336
+ WORKER_MESSAGE_TIMEOUT_SECONDS,
6209
6337
  WORKER_PORT,
6210
6338
  WORKER_REPO_DIR,
6211
6339
  WORKER_SECRET,
6212
- WORKER_STATE_PROPS_TO_REMOVE,
6340
+ WORKER_SENTRY_DSN,
6341
+ WORKER_SENTRY_ENV,
6213
6342
  WORKER_SOCKET_TIMEOUT_SECONDS,
6214
- OPENFN_ADAPTORS_REPO
6343
+ WORKER_STATE_PROPS_TO_REMOVE
6215
6344
  } = process.env;
6216
6345
  const parser2 = yargs_default(hideBin(argv)).command("server", "Start a ws-worker server").option("port", {
6217
6346
  alias: "p",
@@ -6225,12 +6354,19 @@ function parseArgs(argv) {
6225
6354
  description: "Path to the runtime repo (where modules will be installed). Env: WORKER_REPO_DIR"
6226
6355
  }).option("monorepo-dir", {
6227
6356
  alias: "m",
6228
- description: "Path to the adaptors mono repo, from where @local adaptors will be loaded. Env: OPENFN_ADAPTORS_REPO"
6357
+ description: "Path to the adaptors monorepo, from where @local adaptors will be loaded. Env: OPENFN_ADAPTORS_REPO"
6229
6358
  }).option("secret", {
6230
6359
  alias: "s",
6231
6360
  description: "Worker secret. (comes from WORKER_SECRET by default). Env: WORKER_SECRET"
6361
+ }).option("sentry-dsn", {
6362
+ alias: ["dsn"],
6363
+ description: "Sentry DSN. Env: WORKER_SENTRY_DSN"
6364
+ }).option("sentry-env", {
6365
+ description: "Sentry environment. Defaults to 'dev'. Env: WORKER_SENTRY_ENV"
6232
6366
  }).option("socket-timeout", {
6233
- description: `Timeout for websockets to Lighting, in seconds. Defaults to 10.`
6367
+ description: `Timeout for websockets to Lightning, in seconds. Defaults to 10.Env: WORKER_SOCKET_TIMEOUT_SECONDS`
6368
+ }).option("message-timeout", {
6369
+ description: `Timeout for messages in the run channel in seconds. Defaults to 1. Env: WORKER_MESSAGE_TIMEOUT_SECONDS`
6234
6370
  }).option("lightning-public-key", {
6235
6371
  description: "Base64-encoded public key. Used to verify run tokens. Env: WORKER_LIGHTNING_PUBLIC_KEY"
6236
6372
  }).option("log", {
@@ -6280,6 +6416,8 @@ function parseArgs(argv) {
6280
6416
  repoDir: setArg(args2.repoDir, WORKER_REPO_DIR),
6281
6417
  monorepoDir: setArg(args2.monorepoDir, OPENFN_ADAPTORS_REPO),
6282
6418
  secret: setArg(args2.secret, WORKER_SECRET),
6419
+ sentryDsn: setArg(args2.sentryDsn, WORKER_SENTRY_DSN),
6420
+ sentryEnv: setArg(args2.sentryEnv, WORKER_SENTRY_ENV, "dev"),
6283
6421
  lightningPublicKey: setArg(
6284
6422
  args2.lightningPublicKey,
6285
6423
  WORKER_LIGHTNING_PUBLIC_KEY
@@ -6304,6 +6442,11 @@ function parseArgs(argv) {
6304
6442
  WORKER_SOCKET_TIMEOUT_SECONDS,
6305
6443
  DEFAULT_SOCKET_TIMEOUT_SECONDS
6306
6444
  ),
6445
+ messageTimeoutSeconds: setArg(
6446
+ args2.messageTimeoutSeconds,
6447
+ WORKER_MESSAGE_TIMEOUT_SECONDS,
6448
+ DEFAULT_MESSAGE_TIMEOUT_SECONDS
6449
+ ),
6307
6450
  collectionsVersion: setArg(
6308
6451
  args2.collectionsVersion,
6309
6452
  WORKER_COLLECTIONS_VERSION
@@ -6333,6 +6476,8 @@ function engineReady(engine) {
6333
6476
  lightning: args.lightning,
6334
6477
  logger,
6335
6478
  secret: args.secret,
6479
+ sentryDsn: args.sentryDsn,
6480
+ sentryEnv: args.sentryEnv,
6336
6481
  noLoop: !args.loop,
6337
6482
  // TODO need to feed this through properly
6338
6483
  backoff: {
@@ -6343,7 +6488,9 @@ function engineReady(engine) {
6343
6488
  payloadLimitMb: args.payloadMemory,
6344
6489
  collectionsVersion: args.collectionsVersion,
6345
6490
  collectionsUrl: args.collectionsUrl,
6346
- monorepoDir: args.monorepoDir
6491
+ monorepoDir: args.monorepoDir,
6492
+ messageTimeoutSeconds: args.messageTimeoutSeconds,
6493
+ socketTimeoutSeconds: args.socketTimeoutSeconds
6347
6494
  };
6348
6495
  if (args.lightningPublicKey) {
6349
6496
  logger.info(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openfn/ws-worker",
3
- "version": "1.12.1",
3
+ "version": "1.13.1",
4
4
  "description": "A Websocket Worker to connect Lightning to a Runtime Engine",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",
@@ -11,6 +11,7 @@
11
11
  "license": "ISC",
12
12
  "dependencies": {
13
13
  "@koa/router": "^12.0.0",
14
+ "@sentry/node": "^9.5.0",
14
15
  "@types/koa-logger": "^3.1.2",
15
16
  "@types/ws": "^8.5.6",
16
17
  "fast-safe-stringify": "^2.1.1",
@@ -22,10 +23,10 @@
22
23
  "koa-logger": "^3.2.1",
23
24
  "phoenix": "1.7.10",
24
25
  "ws": "^8.18.0",
25
- "@openfn/engine-multi": "1.6.1",
26
- "@openfn/logger": "1.0.4",
27
- "@openfn/runtime": "1.6.3",
28
- "@openfn/lexicon": "^1.2.0"
26
+ "@openfn/lexicon": "^1.2.0",
27
+ "@openfn/logger": "1.0.5",
28
+ "@openfn/runtime": "1.6.4",
29
+ "@openfn/engine-multi": "1.6.2"
29
30
  },
30
31
  "devDependencies": {
31
32
  "@types/koa": "^2.13.5",
@@ -37,11 +38,12 @@
37
38
  "@types/yargs": "^17.0.12",
38
39
  "ava": "5.1.0",
39
40
  "nodemon": "3.0.1",
41
+ "sentry-testkit": "^6.1.0",
40
42
  "tslib": "^2.4.0",
41
43
  "tsup": "^6.2.3",
42
44
  "typescript": "^4.6.4",
43
45
  "yargs": "^17.6.2",
44
- "@openfn/lightning-mock": "2.1.3"
46
+ "@openfn/lightning-mock": "2.1.4"
45
47
  },
46
48
  "files": [
47
49
  "dist",