@514labs/moose-lib 0.6.256 → 0.6.257

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,6 +39,22 @@ function isTruthy(value) {
39
39
  return false;
40
40
  }
41
41
  }
42
+ function createProducerConfig(maxMessageBytes) {
43
+ return {
44
+ kafkaJS: {
45
+ idempotent: false,
46
+ // Not needed for at-least-once delivery
47
+ acks: ACKs,
48
+ retry: {
49
+ retries: MAX_RETRIES_PRODUCER,
50
+ maxRetryTime: MAX_RETRY_TIME_MS
51
+ }
52
+ },
53
+ "linger.ms": 0,
54
+ // This is to make sure at least once delivery with immediate feedback on the send
55
+ ...maxMessageBytes && { "message.max.bytes": maxMessageBytes }
56
+ };
57
+ }
42
58
  var import_http, import_client, import_kafka_javascript, Kafka, compilerLog, getClickhouseClient, cliLog, MAX_RETRIES, MAX_RETRY_TIME_MS, RETRY_INITIAL_TIME_MS, MAX_RETRIES_PRODUCER, ACKs, parseBrokerString, logError, buildSaslConfig, getKafkaClient;
43
59
  var init_commons = __esm({
44
60
  "src/commons.ts"() {
@@ -1816,156 +1832,7 @@ var MAX_RETRIES_CONSUMER = 150;
1816
1832
  var SESSION_TIMEOUT_CONSUMER = 3e4;
1817
1833
  var HEARTBEAT_INTERVAL_CONSUMER = 3e3;
1818
1834
  var DEFAULT_MAX_STREAMING_CONCURRENCY = 100;
1819
- var KAFKAJS_BYTE_MESSAGE_OVERHEAD = 500;
1820
- var isMessageTooLargeError = (error) => {
1821
- if (import_kafka_javascript2.KafkaJS.isKafkaJSError && error instanceof Error && import_kafka_javascript2.KafkaJS.isKafkaJSError(error)) {
1822
- return error.type === "ERR_MSG_SIZE_TOO_LARGE" || error.code === 10 || error.cause !== void 0 && isMessageTooLargeError(error.cause);
1823
- }
1824
- if (error && typeof error === "object") {
1825
- const err = error;
1826
- return err.type === "ERR_MSG_SIZE_TOO_LARGE" || err.code === 10 || err.cause !== void 0 && isMessageTooLargeError(err.cause);
1827
- }
1828
- return false;
1829
- };
1830
- var splitBatch = (messages, maxChunkSize) => {
1831
- if (messages.length <= 1) {
1832
- return [messages];
1833
- }
1834
- const chunks = [];
1835
- let currentChunk = [];
1836
- let currentSize = 0;
1837
- for (const message of messages) {
1838
- const messageSize = import_node_buffer.Buffer.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
1839
- if (currentSize + messageSize > maxChunkSize && currentChunk.length > 0) {
1840
- chunks.push(currentChunk);
1841
- currentChunk = [message];
1842
- currentSize = messageSize;
1843
- } else {
1844
- currentChunk.push(message);
1845
- currentSize += messageSize;
1846
- }
1847
- }
1848
- if (currentChunk.length > 0) {
1849
- chunks.push(currentChunk);
1850
- }
1851
- return chunks;
1852
- };
1853
- var sendChunkWithRetry = async (logger2, targetTopic, producer, messages, currentMaxSize, maxRetries = 3) => {
1854
- const currentMessages = messages;
1855
- let attempts = 0;
1856
- while (attempts < maxRetries) {
1857
- try {
1858
- await producer.send({
1859
- topic: targetTopic.name,
1860
- messages: currentMessages
1861
- });
1862
- logger2.log(
1863
- `Successfully sent ${currentMessages.length} messages to ${targetTopic.name}`
1864
- );
1865
- return;
1866
- } catch (error) {
1867
- if (isMessageTooLargeError(error) && currentMessages.length > 1) {
1868
- logger2.warn(
1869
- `Got MESSAGE_TOO_LARGE error, splitting batch of ${currentMessages.length} messages and retrying (${maxRetries - attempts} attempts left)`
1870
- );
1871
- const newMaxSize = Math.floor(currentMaxSize / 2);
1872
- const splitChunks = splitBatch(currentMessages, newMaxSize);
1873
- for (const chunk of splitChunks) {
1874
- await sendChunkWithRetry(
1875
- logger2,
1876
- targetTopic,
1877
- producer,
1878
- chunk,
1879
- newMaxSize,
1880
- // this error does not count as one failed attempt
1881
- maxRetries - attempts
1882
- );
1883
- }
1884
- return;
1885
- } else {
1886
- attempts++;
1887
- if (attempts >= maxRetries) {
1888
- let messagesHandledByDLQ = 0;
1889
- let messagesWithoutDLQ = 0;
1890
- const dlqErrors = [];
1891
- for (const failedMessage of currentMessages) {
1892
- const dlqTopic = failedMessage.dlq;
1893
- if (dlqTopic && failedMessage.originalValue) {
1894
- const dlqTopicName = dlqTopic.name;
1895
- const deadLetterRecord = {
1896
- originalRecord: {
1897
- ...failedMessage.originalValue,
1898
- // Include original Kafka message metadata
1899
- __sourcePartition: failedMessage.originalMessage.partition,
1900
- __sourceOffset: failedMessage.originalMessage.offset,
1901
- __sourceTimestamp: failedMessage.originalMessage.timestamp
1902
- },
1903
- errorMessage: error instanceof Error ? error.message : String(error),
1904
- errorType: error instanceof Error ? error.constructor.name : "Unknown",
1905
- failedAt: /* @__PURE__ */ new Date(),
1906
- source: "transform"
1907
- };
1908
- cliLog({
1909
- action: "DeadLetter",
1910
- message: `Sending failed message to DLQ ${dlqTopicName}: ${error instanceof Error ? error.message : String(error)}`,
1911
- message_type: "Error"
1912
- });
1913
- try {
1914
- await producer.send({
1915
- topic: dlqTopicName,
1916
- messages: [{ value: JSON.stringify(deadLetterRecord) }]
1917
- });
1918
- logger2.log(`Sent failed message to DLQ ${dlqTopicName}`);
1919
- messagesHandledByDLQ++;
1920
- } catch (dlqError) {
1921
- const errorMsg = `Failed to send message to DLQ: ${dlqError}`;
1922
- logger2.error(errorMsg);
1923
- dlqErrors.push(errorMsg);
1924
- }
1925
- } else if (!dlqTopic) {
1926
- messagesWithoutDLQ++;
1927
- logger2.warn(
1928
- `Cannot send to DLQ: no DLQ configured for message (batch has mixed DLQ configurations)`
1929
- );
1930
- } else {
1931
- messagesWithoutDLQ++;
1932
- logger2.warn(
1933
- `Cannot send to DLQ: original message value not available`
1934
- );
1935
- }
1936
- }
1937
- const allMessagesHandled = messagesHandledByDLQ === currentMessages.length && messagesWithoutDLQ === 0 && dlqErrors.length === 0;
1938
- if (allMessagesHandled) {
1939
- logger2.log(
1940
- `All ${messagesHandledByDLQ} failed message(s) sent to DLQ, not throwing original error`
1941
- );
1942
- return;
1943
- }
1944
- if (messagesWithoutDLQ > 0) {
1945
- logger2.error(
1946
- `Cannot handle batch failure: ${messagesWithoutDLQ} message(s) have no DLQ configured`
1947
- );
1948
- }
1949
- if (dlqErrors.length > 0) {
1950
- logger2.error(
1951
- `Some messages failed to send to DLQ: ${dlqErrors.join(", ")}`
1952
- );
1953
- }
1954
- if (messagesHandledByDLQ > 0) {
1955
- logger2.warn(
1956
- `Partial DLQ success: ${messagesHandledByDLQ}/${currentMessages.length} message(s) sent to DLQ, but throwing due to incomplete batch handling`
1957
- );
1958
- }
1959
- throw error;
1960
- }
1961
- logger2.warn(
1962
- `Send ${currentMessages.length} messages failed (attempt ${attempts}/${maxRetries}), retrying: ${error}`
1963
- );
1964
- await new Promise((resolve2) => setTimeout(resolve2, 100 * attempts));
1965
- }
1966
- }
1967
- }
1968
- };
1835
+ var CONSUMER_MAX_BATCH_SIZE = 1e3;
1969
1836
  var MAX_STREAMING_CONCURRENCY = process3.env.MAX_STREAMING_CONCURRENCY ? parseInt(process3.env.MAX_STREAMING_CONCURRENCY, 10) : DEFAULT_MAX_STREAMING_CONCURRENCY;
1970
1837
  var metricsLog = (log) => {
1971
1838
  const req = http3.request({
@@ -2111,57 +1978,95 @@ var handleMessage = async (logger2, streamingFunctionWithConfigList, message, pr
2111
1978
  }
2112
1979
  return void 0;
2113
1980
  };
2114
- var sendMessages = async (logger2, metrics, targetTopic, producer, messages) => {
2115
- try {
2116
- let chunk = [];
2117
- let chunkSize = 0;
2118
- const maxMessageSize = targetTopic.max_message_bytes || 1024 * 1024;
2119
- for (const message of messages) {
2120
- const messageSize = import_node_buffer.Buffer.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
2121
- if (chunkSize + messageSize > maxMessageSize) {
2122
- logger2.log(
2123
- `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2124
- );
2125
- await sendChunkWithRetry(
2126
- logger2,
2127
- targetTopic,
2128
- producer,
2129
- chunk,
2130
- maxMessageSize
2131
- );
2132
- logger2.log(
2133
- `Sent ${chunk.length} transformed records to ${targetTopic.name}`
2134
- );
2135
- chunk = [message];
2136
- chunkSize = messageSize;
2137
- } else {
2138
- chunk.push(message);
2139
- metrics.bytes += import_node_buffer.Buffer.byteLength(message.value, "utf8");
2140
- chunkSize += messageSize;
1981
+ var handleDLQForFailedMessages = async (logger2, producer, messages, error) => {
1982
+ let messagesHandledByDLQ = 0;
1983
+ let messagesWithoutDLQ = 0;
1984
+ let dlqErrors = 0;
1985
+ for (const msg of messages) {
1986
+ if (msg.dlq && msg.originalValue) {
1987
+ const deadLetterRecord = {
1988
+ originalRecord: {
1989
+ ...msg.originalValue,
1990
+ // Include original Kafka message metadata
1991
+ __sourcePartition: msg.originalMessage.partition,
1992
+ __sourceOffset: msg.originalMessage.offset,
1993
+ __sourceTimestamp: msg.originalMessage.timestamp
1994
+ },
1995
+ errorMessage: error instanceof Error ? error.message : String(error),
1996
+ errorType: error instanceof Error ? error.constructor.name : "Unknown",
1997
+ failedAt: /* @__PURE__ */ new Date(),
1998
+ source: "transform"
1999
+ };
2000
+ cliLog({
2001
+ action: "DeadLetter",
2002
+ message: `Sending failed message to DLQ ${msg.dlq.name}: ${error instanceof Error ? error.message : String(error)}`,
2003
+ message_type: "Error"
2004
+ });
2005
+ try {
2006
+ await producer.send({
2007
+ topic: msg.dlq.name,
2008
+ messages: [{ value: JSON.stringify(deadLetterRecord) }]
2009
+ });
2010
+ logger2.log(`Sent failed message to DLQ ${msg.dlq.name}`);
2011
+ messagesHandledByDLQ++;
2012
+ } catch (dlqError) {
2013
+ logger2.error(`Failed to send to DLQ: ${dlqError}`);
2014
+ dlqErrors++;
2141
2015
  }
2016
+ } else if (!msg.dlq) {
2017
+ messagesWithoutDLQ++;
2018
+ logger2.warn(`Cannot send to DLQ: no DLQ configured for message`);
2019
+ } else {
2020
+ messagesWithoutDLQ++;
2021
+ logger2.warn(`Cannot send to DLQ: original message value not available`);
2142
2022
  }
2143
- metrics.count_out += chunk.length;
2144
- if (chunk.length > 0) {
2145
- logger2.log(
2146
- `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2147
- );
2148
- await sendChunkWithRetry(
2149
- logger2,
2150
- targetTopic,
2151
- producer,
2152
- chunk,
2153
- maxMessageSize
2154
- );
2155
- logger2.log(
2156
- `Sent final ${chunk.length} transformed data to ${targetTopic.name}`
2023
+ }
2024
+ const allMessagesHandled = messagesHandledByDLQ === messages.length && messagesWithoutDLQ === 0 && dlqErrors === 0;
2025
+ if (allMessagesHandled) {
2026
+ logger2.log(
2027
+ `All ${messagesHandledByDLQ} failed message(s) sent to DLQ, suppressing original error`
2028
+ );
2029
+ } else if (messagesHandledByDLQ > 0) {
2030
+ logger2.warn(
2031
+ `Partial DLQ success: ${messagesHandledByDLQ}/${messages.length} message(s) sent to DLQ`
2032
+ );
2033
+ if (messagesWithoutDLQ > 0) {
2034
+ logger2.error(
2035
+ `Cannot handle batch failure: ${messagesWithoutDLQ} message(s) have no DLQ configured or missing original value`
2157
2036
  );
2158
2037
  }
2038
+ if (dlqErrors > 0) {
2039
+ logger2.error(`${dlqErrors} message(s) failed to send to DLQ`);
2040
+ }
2041
+ }
2042
+ return allMessagesHandled;
2043
+ };
2044
+ var sendMessages = async (logger2, metrics, targetTopic, producer, messages) => {
2045
+ if (messages.length === 0) return;
2046
+ try {
2047
+ await producer.send({
2048
+ topic: targetTopic.name,
2049
+ messages
2050
+ });
2051
+ for (const msg of messages) {
2052
+ metrics.bytes += import_node_buffer.Buffer.byteLength(msg.value, "utf8");
2053
+ }
2054
+ metrics.count_out += messages.length;
2055
+ logger2.log(`Sent ${messages.length} messages to ${targetTopic.name}`);
2159
2056
  } catch (e) {
2160
2057
  logger2.error(`Failed to send transformed data`);
2161
2058
  if (e instanceof Error) {
2162
2059
  logError(logger2, e);
2163
2060
  }
2164
- throw e;
2061
+ const allHandledByDLQ = await handleDLQForFailedMessages(
2062
+ logger2,
2063
+ producer,
2064
+ messages,
2065
+ e
2066
+ );
2067
+ if (!allHandledByDLQ) {
2068
+ throw e;
2069
+ }
2165
2070
  }
2166
2071
  };
2167
2072
  var sendMessageMetrics = (logger2, metrics) => {
@@ -2401,18 +2306,13 @@ var runStreamingFunctions = async (args) => {
2401
2306
  autoCommit: true,
2402
2307
  autoCommitInterval: AUTO_COMMIT_INTERVAL_MS,
2403
2308
  fromBeginning: true
2404
- }
2405
- });
2406
- const producer = kafka.producer({
2407
- kafkaJS: {
2408
- idempotent: true,
2409
- acks: ACKs,
2410
- retry: {
2411
- retries: MAX_RETRIES_PRODUCER,
2412
- maxRetryTime: MAX_RETRY_TIME_MS
2413
- }
2414
- }
2309
+ },
2310
+ "js.consumer.max.batch.size": CONSUMER_MAX_BATCH_SIZE
2415
2311
  });
2312
+ const maxMessageBytes = args.targetTopic?.max_message_bytes || 1024 * 1024;
2313
+ const producer = kafka.producer(
2314
+ createProducerConfig(maxMessageBytes)
2315
+ );
2416
2316
  try {
2417
2317
  logger2.log("Starting producer...");
2418
2318
  await startProducer(logger2, producer);