@514labs/moose-lib 0.6.256-ci-4-g0ca62054 → 0.6.256-ci-3-gafce5840

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,22 +26,6 @@ function isTruthy(value) {
26
26
  return false;
27
27
  }
28
28
  }
29
- function createProducerConfig(maxMessageBytes) {
30
- return {
31
- kafkaJS: {
32
- idempotent: false,
33
- // Not needed for at-least-once delivery
34
- acks: ACKs,
35
- retry: {
36
- retries: MAX_RETRIES_PRODUCER,
37
- maxRetryTime: MAX_RETRY_TIME_MS
38
- }
39
- },
40
- "linger.ms": 0,
41
- // This is to make sure at least once delivery with immediate feedback on the send
42
- ...maxMessageBytes && { "message.max.bytes": maxMessageBytes }
43
- };
44
- }
45
29
  var Kafka, compilerLog, getClickhouseClient, cliLog, MAX_RETRIES, MAX_RETRY_TIME_MS, RETRY_INITIAL_TIME_MS, MAX_RETRIES_PRODUCER, ACKs, parseBrokerString, logError, buildSaslConfig, getKafkaClient;
46
30
  var init_commons = __esm({
47
31
  "src/commons.ts"() {
@@ -1162,7 +1146,9 @@ var moose_internal = {
1162
1146
  apis: /* @__PURE__ */ new Map(),
1163
1147
  sqlResources: /* @__PURE__ */ new Map(),
1164
1148
  workflows: /* @__PURE__ */ new Map(),
1165
- webApps: /* @__PURE__ */ new Map()
1149
+ webApps: /* @__PURE__ */ new Map(),
1150
+ materializedViews: /* @__PURE__ */ new Map(),
1151
+ customViews: /* @__PURE__ */ new Map()
1166
1152
  };
1167
1153
  var defaultRetentionPeriod = 60 * 60 * 24 * 7;
1168
1154
  function isS3QueueConfig(config) {
@@ -1360,6 +1346,8 @@ var toInfraMap = (registry) => {
1360
1346
  const sqlResources = {};
1361
1347
  const workflows = {};
1362
1348
  const webApps = {};
1349
+ const materializedViews = {};
1350
+ const customViews = {};
1363
1351
  registry.tables.forEach((table) => {
1364
1352
  const id = table.config.version ? `${table.name}_${table.config.version}` : table.name;
1365
1353
  let metadata = table.metadata;
@@ -1547,6 +1535,24 @@ var toInfraMap = (registry) => {
1547
1535
  metadata: webApp.config.metadata
1548
1536
  };
1549
1537
  });
1538
+ registry.materializedViews.forEach((mv) => {
1539
+ materializedViews[mv.name] = {
1540
+ name: mv.name,
1541
+ selectSql: mv.selectSql,
1542
+ sourceTables: mv.sourceTables,
1543
+ targetTable: mv.targetTable.name,
1544
+ targetDatabase: mv.targetTable.config.database,
1545
+ sourceFile: mv.sourceFile
1546
+ };
1547
+ });
1548
+ registry.customViews.forEach((view) => {
1549
+ customViews[view.name] = {
1550
+ name: view.name,
1551
+ selectSql: view.selectSql,
1552
+ sourceTables: view.sourceTables,
1553
+ sourceFile: view.sourceFile
1554
+ };
1555
+ });
1550
1556
  return {
1551
1557
  topics,
1552
1558
  tables,
@@ -1554,7 +1560,9 @@ var toInfraMap = (registry) => {
1554
1560
  apis,
1555
1561
  sqlResources,
1556
1562
  workflows,
1557
- webApps
1563
+ webApps,
1564
+ materializedViews,
1565
+ customViews
1558
1566
  };
1559
1567
  };
1560
1568
  var getMooseInternal = () => globalThis.moose_internal;
@@ -1578,6 +1586,8 @@ var loadIndex = () => {
1578
1586
  registry.sqlResources.clear();
1579
1587
  registry.workflows.clear();
1580
1588
  registry.webApps.clear();
1589
+ registry.materializedViews.clear();
1590
+ registry.customViews.clear();
1581
1591
  const appDir = `${process2.cwd()}/${getSourceDir()}`;
1582
1592
  Object.keys(__require.cache).forEach((key) => {
1583
1593
  if (key.startsWith(appDir)) {
@@ -1819,7 +1829,156 @@ var MAX_RETRIES_CONSUMER = 150;
1819
1829
  var SESSION_TIMEOUT_CONSUMER = 3e4;
1820
1830
  var HEARTBEAT_INTERVAL_CONSUMER = 3e3;
1821
1831
  var DEFAULT_MAX_STREAMING_CONCURRENCY = 100;
1822
- var CONSUMER_MAX_BATCH_SIZE = 1e3;
1832
+ var KAFKAJS_BYTE_MESSAGE_OVERHEAD = 500;
1833
+ var isMessageTooLargeError = (error) => {
1834
+ if (KafkaJS2.isKafkaJSError && error instanceof Error && KafkaJS2.isKafkaJSError(error)) {
1835
+ return error.type === "ERR_MSG_SIZE_TOO_LARGE" || error.code === 10 || error.cause !== void 0 && isMessageTooLargeError(error.cause);
1836
+ }
1837
+ if (error && typeof error === "object") {
1838
+ const err = error;
1839
+ return err.type === "ERR_MSG_SIZE_TOO_LARGE" || err.code === 10 || err.cause !== void 0 && isMessageTooLargeError(err.cause);
1840
+ }
1841
+ return false;
1842
+ };
1843
+ var splitBatch = (messages, maxChunkSize) => {
1844
+ if (messages.length <= 1) {
1845
+ return [messages];
1846
+ }
1847
+ const chunks = [];
1848
+ let currentChunk = [];
1849
+ let currentSize = 0;
1850
+ for (const message of messages) {
1851
+ const messageSize = Buffer2.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
1852
+ if (currentSize + messageSize > maxChunkSize && currentChunk.length > 0) {
1853
+ chunks.push(currentChunk);
1854
+ currentChunk = [message];
1855
+ currentSize = messageSize;
1856
+ } else {
1857
+ currentChunk.push(message);
1858
+ currentSize += messageSize;
1859
+ }
1860
+ }
1861
+ if (currentChunk.length > 0) {
1862
+ chunks.push(currentChunk);
1863
+ }
1864
+ return chunks;
1865
+ };
1866
+ var sendChunkWithRetry = async (logger2, targetTopic, producer, messages, currentMaxSize, maxRetries = 3) => {
1867
+ const currentMessages = messages;
1868
+ let attempts = 0;
1869
+ while (attempts < maxRetries) {
1870
+ try {
1871
+ await producer.send({
1872
+ topic: targetTopic.name,
1873
+ messages: currentMessages
1874
+ });
1875
+ logger2.log(
1876
+ `Successfully sent ${currentMessages.length} messages to ${targetTopic.name}`
1877
+ );
1878
+ return;
1879
+ } catch (error) {
1880
+ if (isMessageTooLargeError(error) && currentMessages.length > 1) {
1881
+ logger2.warn(
1882
+ `Got MESSAGE_TOO_LARGE error, splitting batch of ${currentMessages.length} messages and retrying (${maxRetries - attempts} attempts left)`
1883
+ );
1884
+ const newMaxSize = Math.floor(currentMaxSize / 2);
1885
+ const splitChunks = splitBatch(currentMessages, newMaxSize);
1886
+ for (const chunk of splitChunks) {
1887
+ await sendChunkWithRetry(
1888
+ logger2,
1889
+ targetTopic,
1890
+ producer,
1891
+ chunk,
1892
+ newMaxSize,
1893
+ // this error does not count as one failed attempt
1894
+ maxRetries - attempts
1895
+ );
1896
+ }
1897
+ return;
1898
+ } else {
1899
+ attempts++;
1900
+ if (attempts >= maxRetries) {
1901
+ let messagesHandledByDLQ = 0;
1902
+ let messagesWithoutDLQ = 0;
1903
+ const dlqErrors = [];
1904
+ for (const failedMessage of currentMessages) {
1905
+ const dlqTopic = failedMessage.dlq;
1906
+ if (dlqTopic && failedMessage.originalValue) {
1907
+ const dlqTopicName = dlqTopic.name;
1908
+ const deadLetterRecord = {
1909
+ originalRecord: {
1910
+ ...failedMessage.originalValue,
1911
+ // Include original Kafka message metadata
1912
+ __sourcePartition: failedMessage.originalMessage.partition,
1913
+ __sourceOffset: failedMessage.originalMessage.offset,
1914
+ __sourceTimestamp: failedMessage.originalMessage.timestamp
1915
+ },
1916
+ errorMessage: error instanceof Error ? error.message : String(error),
1917
+ errorType: error instanceof Error ? error.constructor.name : "Unknown",
1918
+ failedAt: /* @__PURE__ */ new Date(),
1919
+ source: "transform"
1920
+ };
1921
+ cliLog({
1922
+ action: "DeadLetter",
1923
+ message: `Sending failed message to DLQ ${dlqTopicName}: ${error instanceof Error ? error.message : String(error)}`,
1924
+ message_type: "Error"
1925
+ });
1926
+ try {
1927
+ await producer.send({
1928
+ topic: dlqTopicName,
1929
+ messages: [{ value: JSON.stringify(deadLetterRecord) }]
1930
+ });
1931
+ logger2.log(`Sent failed message to DLQ ${dlqTopicName}`);
1932
+ messagesHandledByDLQ++;
1933
+ } catch (dlqError) {
1934
+ const errorMsg = `Failed to send message to DLQ: ${dlqError}`;
1935
+ logger2.error(errorMsg);
1936
+ dlqErrors.push(errorMsg);
1937
+ }
1938
+ } else if (!dlqTopic) {
1939
+ messagesWithoutDLQ++;
1940
+ logger2.warn(
1941
+ `Cannot send to DLQ: no DLQ configured for message (batch has mixed DLQ configurations)`
1942
+ );
1943
+ } else {
1944
+ messagesWithoutDLQ++;
1945
+ logger2.warn(
1946
+ `Cannot send to DLQ: original message value not available`
1947
+ );
1948
+ }
1949
+ }
1950
+ const allMessagesHandled = messagesHandledByDLQ === currentMessages.length && messagesWithoutDLQ === 0 && dlqErrors.length === 0;
1951
+ if (allMessagesHandled) {
1952
+ logger2.log(
1953
+ `All ${messagesHandledByDLQ} failed message(s) sent to DLQ, not throwing original error`
1954
+ );
1955
+ return;
1956
+ }
1957
+ if (messagesWithoutDLQ > 0) {
1958
+ logger2.error(
1959
+ `Cannot handle batch failure: ${messagesWithoutDLQ} message(s) have no DLQ configured`
1960
+ );
1961
+ }
1962
+ if (dlqErrors.length > 0) {
1963
+ logger2.error(
1964
+ `Some messages failed to send to DLQ: ${dlqErrors.join(", ")}`
1965
+ );
1966
+ }
1967
+ if (messagesHandledByDLQ > 0) {
1968
+ logger2.warn(
1969
+ `Partial DLQ success: ${messagesHandledByDLQ}/${currentMessages.length} message(s) sent to DLQ, but throwing due to incomplete batch handling`
1970
+ );
1971
+ }
1972
+ throw error;
1973
+ }
1974
+ logger2.warn(
1975
+ `Send ${currentMessages.length} messages failed (attempt ${attempts}/${maxRetries}), retrying: ${error}`
1976
+ );
1977
+ await new Promise((resolve2) => setTimeout(resolve2, 100 * attempts));
1978
+ }
1979
+ }
1980
+ }
1981
+ };
1823
1982
  var MAX_STREAMING_CONCURRENCY = process3.env.MAX_STREAMING_CONCURRENCY ? parseInt(process3.env.MAX_STREAMING_CONCURRENCY, 10) : DEFAULT_MAX_STREAMING_CONCURRENCY;
1824
1983
  var metricsLog = (log) => {
1825
1984
  const req = http3.request({
@@ -1965,95 +2124,57 @@ var handleMessage = async (logger2, streamingFunctionWithConfigList, message, pr
1965
2124
  }
1966
2125
  return void 0;
1967
2126
  };
1968
- var handleDLQForFailedMessages = async (logger2, producer, messages, error) => {
1969
- let messagesHandledByDLQ = 0;
1970
- let messagesWithoutDLQ = 0;
1971
- let dlqErrors = 0;
1972
- for (const msg of messages) {
1973
- if (msg.dlq && msg.originalValue) {
1974
- const deadLetterRecord = {
1975
- originalRecord: {
1976
- ...msg.originalValue,
1977
- // Include original Kafka message metadata
1978
- __sourcePartition: msg.originalMessage.partition,
1979
- __sourceOffset: msg.originalMessage.offset,
1980
- __sourceTimestamp: msg.originalMessage.timestamp
1981
- },
1982
- errorMessage: error instanceof Error ? error.message : String(error),
1983
- errorType: error instanceof Error ? error.constructor.name : "Unknown",
1984
- failedAt: /* @__PURE__ */ new Date(),
1985
- source: "transform"
1986
- };
1987
- cliLog({
1988
- action: "DeadLetter",
1989
- message: `Sending failed message to DLQ ${msg.dlq.name}: ${error instanceof Error ? error.message : String(error)}`,
1990
- message_type: "Error"
1991
- });
1992
- try {
1993
- await producer.send({
1994
- topic: msg.dlq.name,
1995
- messages: [{ value: JSON.stringify(deadLetterRecord) }]
1996
- });
1997
- logger2.log(`Sent failed message to DLQ ${msg.dlq.name}`);
1998
- messagesHandledByDLQ++;
1999
- } catch (dlqError) {
2000
- logger2.error(`Failed to send to DLQ: ${dlqError}`);
2001
- dlqErrors++;
2127
+ var sendMessages = async (logger2, metrics, targetTopic, producer, messages) => {
2128
+ try {
2129
+ let chunk = [];
2130
+ let chunkSize = 0;
2131
+ const maxMessageSize = targetTopic.max_message_bytes || 1024 * 1024;
2132
+ for (const message of messages) {
2133
+ const messageSize = Buffer2.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
2134
+ if (chunkSize + messageSize > maxMessageSize) {
2135
+ logger2.log(
2136
+ `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2137
+ );
2138
+ await sendChunkWithRetry(
2139
+ logger2,
2140
+ targetTopic,
2141
+ producer,
2142
+ chunk,
2143
+ maxMessageSize
2144
+ );
2145
+ logger2.log(
2146
+ `Sent ${chunk.length} transformed records to ${targetTopic.name}`
2147
+ );
2148
+ chunk = [message];
2149
+ chunkSize = messageSize;
2150
+ } else {
2151
+ chunk.push(message);
2152
+ metrics.bytes += Buffer2.byteLength(message.value, "utf8");
2153
+ chunkSize += messageSize;
2002
2154
  }
2003
- } else if (!msg.dlq) {
2004
- messagesWithoutDLQ++;
2005
- logger2.warn(`Cannot send to DLQ: no DLQ configured for message`);
2006
- } else {
2007
- messagesWithoutDLQ++;
2008
- logger2.warn(`Cannot send to DLQ: original message value not available`);
2009
2155
  }
2010
- }
2011
- const allMessagesHandled = messagesHandledByDLQ === messages.length && messagesWithoutDLQ === 0 && dlqErrors === 0;
2012
- if (allMessagesHandled) {
2013
- logger2.log(
2014
- `All ${messagesHandledByDLQ} failed message(s) sent to DLQ, suppressing original error`
2015
- );
2016
- } else if (messagesHandledByDLQ > 0) {
2017
- logger2.warn(
2018
- `Partial DLQ success: ${messagesHandledByDLQ}/${messages.length} message(s) sent to DLQ`
2019
- );
2020
- if (messagesWithoutDLQ > 0) {
2021
- logger2.error(
2022
- `Cannot handle batch failure: ${messagesWithoutDLQ} message(s) have no DLQ configured or missing original value`
2156
+ metrics.count_out += chunk.length;
2157
+ if (chunk.length > 0) {
2158
+ logger2.log(
2159
+ `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2160
+ );
2161
+ await sendChunkWithRetry(
2162
+ logger2,
2163
+ targetTopic,
2164
+ producer,
2165
+ chunk,
2166
+ maxMessageSize
2167
+ );
2168
+ logger2.log(
2169
+ `Sent final ${chunk.length} transformed data to ${targetTopic.name}`
2023
2170
  );
2024
2171
  }
2025
- if (dlqErrors > 0) {
2026
- logger2.error(`${dlqErrors} message(s) failed to send to DLQ`);
2027
- }
2028
- }
2029
- return allMessagesHandled;
2030
- };
2031
- var sendMessages = async (logger2, metrics, targetTopic, producer, messages) => {
2032
- if (messages.length === 0) return;
2033
- try {
2034
- await producer.send({
2035
- topic: targetTopic.name,
2036
- messages
2037
- });
2038
- for (const msg of messages) {
2039
- metrics.bytes += Buffer2.byteLength(msg.value, "utf8");
2040
- }
2041
- metrics.count_out += messages.length;
2042
- logger2.log(`Sent ${messages.length} messages to ${targetTopic.name}`);
2043
2172
  } catch (e) {
2044
2173
  logger2.error(`Failed to send transformed data`);
2045
2174
  if (e instanceof Error) {
2046
2175
  logError(logger2, e);
2047
2176
  }
2048
- const allHandledByDLQ = await handleDLQForFailedMessages(
2049
- logger2,
2050
- producer,
2051
- messages,
2052
- e
2053
- );
2054
- if (!allHandledByDLQ) {
2055
- throw e;
2056
- }
2177
+ throw e;
2057
2178
  }
2058
2179
  };
2059
2180
  var sendMessageMetrics = (logger2, metrics) => {
@@ -2293,13 +2414,18 @@ var runStreamingFunctions = async (args) => {
2293
2414
  autoCommit: true,
2294
2415
  autoCommitInterval: AUTO_COMMIT_INTERVAL_MS,
2295
2416
  fromBeginning: true
2296
- },
2297
- "js.consumer.max.batch.size": CONSUMER_MAX_BATCH_SIZE
2417
+ }
2418
+ });
2419
+ const producer = kafka.producer({
2420
+ kafkaJS: {
2421
+ idempotent: true,
2422
+ acks: ACKs,
2423
+ retry: {
2424
+ retries: MAX_RETRIES_PRODUCER,
2425
+ maxRetryTime: MAX_RETRY_TIME_MS
2426
+ }
2427
+ }
2298
2428
  });
2299
- const maxMessageBytes = args.targetTopic?.max_message_bytes || 1024 * 1024;
2300
- const producer = kafka.producer(
2301
- createProducerConfig(maxMessageBytes)
2302
- );
2303
2429
  try {
2304
2430
  logger2.log("Starting producer...");
2305
2431
  await startProducer(logger2, producer);