@514labs/moose-lib 0.6.252-ci-1-g901efb04 → 0.6.252-ci-3-g37e54b29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,22 +26,6 @@ function isTruthy(value) {
26
26
  return false;
27
27
  }
28
28
  }
29
- function createProducerConfig(maxMessageBytes) {
30
- return {
31
- kafkaJS: {
32
- idempotent: false,
33
- // Not needed for at-least-once delivery
34
- acks: ACKs,
35
- retry: {
36
- retries: MAX_RETRIES_PRODUCER,
37
- maxRetryTime: MAX_RETRY_TIME_MS
38
- }
39
- },
40
- "linger.ms": 0,
41
- // Send immediately - batching happens at application level
42
- ...maxMessageBytes && { "message.max.bytes": maxMessageBytes }
43
- };
44
- }
45
29
  var Kafka, compilerLog, getClickhouseClient, cliLog, MAX_RETRIES, MAX_RETRY_TIME_MS, RETRY_INITIAL_TIME_MS, MAX_RETRIES_PRODUCER, ACKs, parseBrokerString, logError, buildSaslConfig, getKafkaClient;
46
30
  var init_commons = __esm({
47
31
  "src/commons.ts"() {
@@ -1819,6 +1803,156 @@ var MAX_RETRIES_CONSUMER = 150;
1819
1803
  var SESSION_TIMEOUT_CONSUMER = 3e4;
1820
1804
  var HEARTBEAT_INTERVAL_CONSUMER = 3e3;
1821
1805
  var DEFAULT_MAX_STREAMING_CONCURRENCY = 100;
1806
+ var KAFKAJS_BYTE_MESSAGE_OVERHEAD = 500;
1807
+ var isMessageTooLargeError = (error) => {
1808
+ if (KafkaJS2.isKafkaJSError && error instanceof Error && KafkaJS2.isKafkaJSError(error)) {
1809
+ return error.type === "ERR_MSG_SIZE_TOO_LARGE" || error.code === 10 || error.cause !== void 0 && isMessageTooLargeError(error.cause);
1810
+ }
1811
+ if (error && typeof error === "object") {
1812
+ const err = error;
1813
+ return err.type === "ERR_MSG_SIZE_TOO_LARGE" || err.code === 10 || err.cause !== void 0 && isMessageTooLargeError(err.cause);
1814
+ }
1815
+ return false;
1816
+ };
1817
+ var splitBatch = (messages, maxChunkSize) => {
1818
+ if (messages.length <= 1) {
1819
+ return [messages];
1820
+ }
1821
+ const chunks = [];
1822
+ let currentChunk = [];
1823
+ let currentSize = 0;
1824
+ for (const message of messages) {
1825
+ const messageSize = Buffer2.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
1826
+ if (currentSize + messageSize > maxChunkSize && currentChunk.length > 0) {
1827
+ chunks.push(currentChunk);
1828
+ currentChunk = [message];
1829
+ currentSize = messageSize;
1830
+ } else {
1831
+ currentChunk.push(message);
1832
+ currentSize += messageSize;
1833
+ }
1834
+ }
1835
+ if (currentChunk.length > 0) {
1836
+ chunks.push(currentChunk);
1837
+ }
1838
+ return chunks;
1839
+ };
1840
+ var sendChunkWithRetry = async (logger2, targetTopic, producer, messages, currentMaxSize, maxRetries = 3) => {
1841
+ const currentMessages = messages;
1842
+ let attempts = 0;
1843
+ while (attempts < maxRetries) {
1844
+ try {
1845
+ await producer.send({
1846
+ topic: targetTopic.name,
1847
+ messages: currentMessages
1848
+ });
1849
+ logger2.log(
1850
+ `Successfully sent ${currentMessages.length} messages to ${targetTopic.name}`
1851
+ );
1852
+ return;
1853
+ } catch (error) {
1854
+ if (isMessageTooLargeError(error) && currentMessages.length > 1) {
1855
+ logger2.warn(
1856
+ `Got MESSAGE_TOO_LARGE error, splitting batch of ${currentMessages.length} messages and retrying (${maxRetries - attempts} attempts left)`
1857
+ );
1858
+ const newMaxSize = Math.floor(currentMaxSize / 2);
1859
+ const splitChunks = splitBatch(currentMessages, newMaxSize);
1860
+ for (const chunk of splitChunks) {
1861
+ await sendChunkWithRetry(
1862
+ logger2,
1863
+ targetTopic,
1864
+ producer,
1865
+ chunk,
1866
+ newMaxSize,
1867
+ // this error does not count as one failed attempt
1868
+ maxRetries - attempts
1869
+ );
1870
+ }
1871
+ return;
1872
+ } else {
1873
+ attempts++;
1874
+ if (attempts >= maxRetries) {
1875
+ let messagesHandledByDLQ = 0;
1876
+ let messagesWithoutDLQ = 0;
1877
+ const dlqErrors = [];
1878
+ for (const failedMessage of currentMessages) {
1879
+ const dlqTopic = failedMessage.dlq;
1880
+ if (dlqTopic && failedMessage.originalValue) {
1881
+ const dlqTopicName = dlqTopic.name;
1882
+ const deadLetterRecord = {
1883
+ originalRecord: {
1884
+ ...failedMessage.originalValue,
1885
+ // Include original Kafka message metadata
1886
+ __sourcePartition: failedMessage.originalMessage.partition,
1887
+ __sourceOffset: failedMessage.originalMessage.offset,
1888
+ __sourceTimestamp: failedMessage.originalMessage.timestamp
1889
+ },
1890
+ errorMessage: error instanceof Error ? error.message : String(error),
1891
+ errorType: error instanceof Error ? error.constructor.name : "Unknown",
1892
+ failedAt: /* @__PURE__ */ new Date(),
1893
+ source: "transform"
1894
+ };
1895
+ cliLog({
1896
+ action: "DeadLetter",
1897
+ message: `Sending failed message to DLQ ${dlqTopicName}: ${error instanceof Error ? error.message : String(error)}`,
1898
+ message_type: "Error"
1899
+ });
1900
+ try {
1901
+ await producer.send({
1902
+ topic: dlqTopicName,
1903
+ messages: [{ value: JSON.stringify(deadLetterRecord) }]
1904
+ });
1905
+ logger2.log(`Sent failed message to DLQ ${dlqTopicName}`);
1906
+ messagesHandledByDLQ++;
1907
+ } catch (dlqError) {
1908
+ const errorMsg = `Failed to send message to DLQ: ${dlqError}`;
1909
+ logger2.error(errorMsg);
1910
+ dlqErrors.push(errorMsg);
1911
+ }
1912
+ } else if (!dlqTopic) {
1913
+ messagesWithoutDLQ++;
1914
+ logger2.warn(
1915
+ `Cannot send to DLQ: no DLQ configured for message (batch has mixed DLQ configurations)`
1916
+ );
1917
+ } else {
1918
+ messagesWithoutDLQ++;
1919
+ logger2.warn(
1920
+ `Cannot send to DLQ: original message value not available`
1921
+ );
1922
+ }
1923
+ }
1924
+ const allMessagesHandled = messagesHandledByDLQ === currentMessages.length && messagesWithoutDLQ === 0 && dlqErrors.length === 0;
1925
+ if (allMessagesHandled) {
1926
+ logger2.log(
1927
+ `All ${messagesHandledByDLQ} failed message(s) sent to DLQ, not throwing original error`
1928
+ );
1929
+ return;
1930
+ }
1931
+ if (messagesWithoutDLQ > 0) {
1932
+ logger2.error(
1933
+ `Cannot handle batch failure: ${messagesWithoutDLQ} message(s) have no DLQ configured`
1934
+ );
1935
+ }
1936
+ if (dlqErrors.length > 0) {
1937
+ logger2.error(
1938
+ `Some messages failed to send to DLQ: ${dlqErrors.join(", ")}`
1939
+ );
1940
+ }
1941
+ if (messagesHandledByDLQ > 0) {
1942
+ logger2.warn(
1943
+ `Partial DLQ success: ${messagesHandledByDLQ}/${currentMessages.length} message(s) sent to DLQ, but throwing due to incomplete batch handling`
1944
+ );
1945
+ }
1946
+ throw error;
1947
+ }
1948
+ logger2.warn(
1949
+ `Send ${currentMessages.length} messages failed (attempt ${attempts}/${maxRetries}), retrying: ${error}`
1950
+ );
1951
+ await new Promise((resolve2) => setTimeout(resolve2, 100 * attempts));
1952
+ }
1953
+ }
1954
+ }
1955
+ };
1822
1956
  var MAX_STREAMING_CONCURRENCY = process3.env.MAX_STREAMING_CONCURRENCY ? parseInt(process3.env.MAX_STREAMING_CONCURRENCY, 10) : DEFAULT_MAX_STREAMING_CONCURRENCY;
1823
1957
  var metricsLog = (log) => {
1824
1958
  const req = http3.request({
@@ -1964,71 +2098,56 @@ var handleMessage = async (logger2, streamingFunctionWithConfigList, message, pr
1964
2098
  }
1965
2099
  return void 0;
1966
2100
  };
1967
- var handleDLQForFailedMessages = async (logger2, producer, messages, error) => {
1968
- let messagesHandledByDLQ = 0;
1969
- let messagesWithoutDLQ = 0;
1970
- for (const msg of messages) {
1971
- if (msg.dlq && msg.originalValue) {
1972
- const deadLetterRecord = {
1973
- originalRecord: {
1974
- ...msg.originalValue,
1975
- // Include original Kafka message metadata
1976
- __sourcePartition: msg.originalMessage.partition,
1977
- __sourceOffset: msg.originalMessage.offset,
1978
- __sourceTimestamp: msg.originalMessage.timestamp
1979
- },
1980
- errorMessage: error instanceof Error ? error.message : String(error),
1981
- errorType: error instanceof Error ? error.constructor.name : "Unknown",
1982
- failedAt: /* @__PURE__ */ new Date(),
1983
- source: "transform"
1984
- };
1985
- cliLog({
1986
- action: "DeadLetter",
1987
- message: `Sending failed message to DLQ ${msg.dlq.name}: ${error instanceof Error ? error.message : String(error)}`,
1988
- message_type: "Error"
1989
- });
1990
- try {
1991
- await producer.send({
1992
- topic: msg.dlq.name,
1993
- messages: [{ value: JSON.stringify(deadLetterRecord) }]
1994
- });
1995
- logger2.log(`Sent failed message to DLQ ${msg.dlq.name}`);
1996
- messagesHandledByDLQ++;
1997
- } catch (dlqError) {
1998
- logger2.error(`Failed to send to DLQ: ${dlqError}`);
1999
- }
2000
- } else if (!msg.dlq) {
2001
- messagesWithoutDLQ++;
2002
- logger2.warn(`Cannot send to DLQ: no DLQ configured for message`);
2003
- } else {
2004
- messagesWithoutDLQ++;
2005
- logger2.warn(`Cannot send to DLQ: original message value not available`);
2006
- }
2007
- }
2008
- if (messagesHandledByDLQ > 0 && messagesWithoutDLQ > 0) {
2009
- logger2.warn(
2010
- `Partial DLQ success: ${messagesHandledByDLQ}/${messages.length} message(s) sent to DLQ`
2011
- );
2012
- }
2013
- };
2014
2101
  var sendMessages = async (logger2, metrics, targetTopic, producer, messages) => {
2015
- if (messages.length === 0) return;
2016
- for (const msg of messages) {
2017
- metrics.bytes += Buffer2.byteLength(msg.value, "utf8");
2018
- }
2019
- metrics.count_out += messages.length;
2020
2102
  try {
2021
- await producer.send({
2022
- topic: targetTopic.name,
2023
- messages
2024
- });
2025
- logger2.log(`Sent ${messages.length} messages to ${targetTopic.name}`);
2103
+ let chunk = [];
2104
+ let chunkSize = 0;
2105
+ const maxMessageSize = targetTopic.max_message_bytes || 1024 * 1024;
2106
+ for (const message of messages) {
2107
+ const messageSize = Buffer2.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
2108
+ if (chunkSize + messageSize > maxMessageSize) {
2109
+ logger2.log(
2110
+ `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2111
+ );
2112
+ await sendChunkWithRetry(
2113
+ logger2,
2114
+ targetTopic,
2115
+ producer,
2116
+ chunk,
2117
+ maxMessageSize
2118
+ );
2119
+ logger2.log(
2120
+ `Sent ${chunk.length} transformed records to ${targetTopic.name}`
2121
+ );
2122
+ chunk = [message];
2123
+ chunkSize = messageSize;
2124
+ } else {
2125
+ chunk.push(message);
2126
+ metrics.bytes += Buffer2.byteLength(message.value, "utf8");
2127
+ chunkSize += messageSize;
2128
+ }
2129
+ }
2130
+ metrics.count_out += chunk.length;
2131
+ if (chunk.length > 0) {
2132
+ logger2.log(
2133
+ `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2134
+ );
2135
+ await sendChunkWithRetry(
2136
+ logger2,
2137
+ targetTopic,
2138
+ producer,
2139
+ chunk,
2140
+ maxMessageSize
2141
+ );
2142
+ logger2.log(
2143
+ `Sent final ${chunk.length} transformed data to ${targetTopic.name}`
2144
+ );
2145
+ }
2026
2146
  } catch (e) {
2027
2147
  logger2.error(`Failed to send transformed data`);
2028
2148
  if (e instanceof Error) {
2029
2149
  logError(logger2, e);
2030
2150
  }
2031
- await handleDLQForFailedMessages(logger2, producer, messages, e);
2032
2151
  throw e;
2033
2152
  }
2034
2153
  };
@@ -2271,10 +2390,16 @@ var runStreamingFunctions = async (args) => {
2271
2390
  fromBeginning: true
2272
2391
  }
2273
2392
  });
2274
- const maxMessageBytes = args.targetTopic?.max_message_bytes || 1024 * 1024;
2275
- const producer = kafka.producer(
2276
- createProducerConfig(maxMessageBytes)
2277
- );
2393
+ const producer = kafka.producer({
2394
+ kafkaJS: {
2395
+ idempotent: true,
2396
+ acks: ACKs,
2397
+ retry: {
2398
+ retries: MAX_RETRIES_PRODUCER,
2399
+ maxRetryTime: MAX_RETRY_TIME_MS
2400
+ }
2401
+ }
2402
+ });
2278
2403
  try {
2279
2404
  logger2.log("Starting producer...");
2280
2405
  await startProducer(logger2, producer);