@514labs/moose-lib 0.6.256-ci-4-g0ca62054 → 0.6.256-ci-3-gafce5840

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,22 +39,6 @@ function isTruthy(value) {
39
39
  return false;
40
40
  }
41
41
  }
42
- function createProducerConfig(maxMessageBytes) {
43
- return {
44
- kafkaJS: {
45
- idempotent: false,
46
- // Not needed for at-least-once delivery
47
- acks: ACKs,
48
- retry: {
49
- retries: MAX_RETRIES_PRODUCER,
50
- maxRetryTime: MAX_RETRY_TIME_MS
51
- }
52
- },
53
- "linger.ms": 0,
54
- // This is to make sure at least once delivery with immediate feedback on the send
55
- ...maxMessageBytes && { "message.max.bytes": maxMessageBytes }
56
- };
57
- }
58
42
  var import_http, import_client, import_kafka_javascript, Kafka, compilerLog, getClickhouseClient, cliLog, MAX_RETRIES, MAX_RETRY_TIME_MS, RETRY_INITIAL_TIME_MS, MAX_RETRIES_PRODUCER, ACKs, parseBrokerString, logError, buildSaslConfig, getKafkaClient;
59
43
  var init_commons = __esm({
60
44
  "src/commons.ts"() {
@@ -1175,7 +1159,9 @@ var moose_internal = {
1175
1159
  apis: /* @__PURE__ */ new Map(),
1176
1160
  sqlResources: /* @__PURE__ */ new Map(),
1177
1161
  workflows: /* @__PURE__ */ new Map(),
1178
- webApps: /* @__PURE__ */ new Map()
1162
+ webApps: /* @__PURE__ */ new Map(),
1163
+ materializedViews: /* @__PURE__ */ new Map(),
1164
+ customViews: /* @__PURE__ */ new Map()
1179
1165
  };
1180
1166
  var defaultRetentionPeriod = 60 * 60 * 24 * 7;
1181
1167
  function isS3QueueConfig(config) {
@@ -1373,6 +1359,8 @@ var toInfraMap = (registry) => {
1373
1359
  const sqlResources = {};
1374
1360
  const workflows = {};
1375
1361
  const webApps = {};
1362
+ const materializedViews = {};
1363
+ const customViews = {};
1376
1364
  registry.tables.forEach((table) => {
1377
1365
  const id = table.config.version ? `${table.name}_${table.config.version}` : table.name;
1378
1366
  let metadata = table.metadata;
@@ -1560,6 +1548,24 @@ var toInfraMap = (registry) => {
1560
1548
  metadata: webApp.config.metadata
1561
1549
  };
1562
1550
  });
1551
+ registry.materializedViews.forEach((mv) => {
1552
+ materializedViews[mv.name] = {
1553
+ name: mv.name,
1554
+ selectSql: mv.selectSql,
1555
+ sourceTables: mv.sourceTables,
1556
+ targetTable: mv.targetTable.name,
1557
+ targetDatabase: mv.targetTable.config.database,
1558
+ sourceFile: mv.sourceFile
1559
+ };
1560
+ });
1561
+ registry.customViews.forEach((view) => {
1562
+ customViews[view.name] = {
1563
+ name: view.name,
1564
+ selectSql: view.selectSql,
1565
+ sourceTables: view.sourceTables,
1566
+ sourceFile: view.sourceFile
1567
+ };
1568
+ });
1563
1569
  return {
1564
1570
  topics,
1565
1571
  tables,
@@ -1567,7 +1573,9 @@ var toInfraMap = (registry) => {
1567
1573
  apis,
1568
1574
  sqlResources,
1569
1575
  workflows,
1570
- webApps
1576
+ webApps,
1577
+ materializedViews,
1578
+ customViews
1571
1579
  };
1572
1580
  };
1573
1581
  var getMooseInternal = () => globalThis.moose_internal;
@@ -1591,6 +1599,8 @@ var loadIndex = () => {
1591
1599
  registry.sqlResources.clear();
1592
1600
  registry.workflows.clear();
1593
1601
  registry.webApps.clear();
1602
+ registry.materializedViews.clear();
1603
+ registry.customViews.clear();
1594
1604
  const appDir = `${import_process.default.cwd()}/${getSourceDir()}`;
1595
1605
  Object.keys(require.cache).forEach((key) => {
1596
1606
  if (key.startsWith(appDir)) {
@@ -1832,7 +1842,156 @@ var MAX_RETRIES_CONSUMER = 150;
1832
1842
  var SESSION_TIMEOUT_CONSUMER = 3e4;
1833
1843
  var HEARTBEAT_INTERVAL_CONSUMER = 3e3;
1834
1844
  var DEFAULT_MAX_STREAMING_CONCURRENCY = 100;
1835
- var CONSUMER_MAX_BATCH_SIZE = 1e3;
1845
+ var KAFKAJS_BYTE_MESSAGE_OVERHEAD = 500;
1846
+ var isMessageTooLargeError = (error) => {
1847
+ if (import_kafka_javascript2.KafkaJS.isKafkaJSError && error instanceof Error && import_kafka_javascript2.KafkaJS.isKafkaJSError(error)) {
1848
+ return error.type === "ERR_MSG_SIZE_TOO_LARGE" || error.code === 10 || error.cause !== void 0 && isMessageTooLargeError(error.cause);
1849
+ }
1850
+ if (error && typeof error === "object") {
1851
+ const err = error;
1852
+ return err.type === "ERR_MSG_SIZE_TOO_LARGE" || err.code === 10 || err.cause !== void 0 && isMessageTooLargeError(err.cause);
1853
+ }
1854
+ return false;
1855
+ };
1856
+ var splitBatch = (messages, maxChunkSize) => {
1857
+ if (messages.length <= 1) {
1858
+ return [messages];
1859
+ }
1860
+ const chunks = [];
1861
+ let currentChunk = [];
1862
+ let currentSize = 0;
1863
+ for (const message of messages) {
1864
+ const messageSize = import_node_buffer.Buffer.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
1865
+ if (currentSize + messageSize > maxChunkSize && currentChunk.length > 0) {
1866
+ chunks.push(currentChunk);
1867
+ currentChunk = [message];
1868
+ currentSize = messageSize;
1869
+ } else {
1870
+ currentChunk.push(message);
1871
+ currentSize += messageSize;
1872
+ }
1873
+ }
1874
+ if (currentChunk.length > 0) {
1875
+ chunks.push(currentChunk);
1876
+ }
1877
+ return chunks;
1878
+ };
1879
+ var sendChunkWithRetry = async (logger2, targetTopic, producer, messages, currentMaxSize, maxRetries = 3) => {
1880
+ const currentMessages = messages;
1881
+ let attempts = 0;
1882
+ while (attempts < maxRetries) {
1883
+ try {
1884
+ await producer.send({
1885
+ topic: targetTopic.name,
1886
+ messages: currentMessages
1887
+ });
1888
+ logger2.log(
1889
+ `Successfully sent ${currentMessages.length} messages to ${targetTopic.name}`
1890
+ );
1891
+ return;
1892
+ } catch (error) {
1893
+ if (isMessageTooLargeError(error) && currentMessages.length > 1) {
1894
+ logger2.warn(
1895
+ `Got MESSAGE_TOO_LARGE error, splitting batch of ${currentMessages.length} messages and retrying (${maxRetries - attempts} attempts left)`
1896
+ );
1897
+ const newMaxSize = Math.floor(currentMaxSize / 2);
1898
+ const splitChunks = splitBatch(currentMessages, newMaxSize);
1899
+ for (const chunk of splitChunks) {
1900
+ await sendChunkWithRetry(
1901
+ logger2,
1902
+ targetTopic,
1903
+ producer,
1904
+ chunk,
1905
+ newMaxSize,
1906
+ // this error does not count as one failed attempt
1907
+ maxRetries - attempts
1908
+ );
1909
+ }
1910
+ return;
1911
+ } else {
1912
+ attempts++;
1913
+ if (attempts >= maxRetries) {
1914
+ let messagesHandledByDLQ = 0;
1915
+ let messagesWithoutDLQ = 0;
1916
+ const dlqErrors = [];
1917
+ for (const failedMessage of currentMessages) {
1918
+ const dlqTopic = failedMessage.dlq;
1919
+ if (dlqTopic && failedMessage.originalValue) {
1920
+ const dlqTopicName = dlqTopic.name;
1921
+ const deadLetterRecord = {
1922
+ originalRecord: {
1923
+ ...failedMessage.originalValue,
1924
+ // Include original Kafka message metadata
1925
+ __sourcePartition: failedMessage.originalMessage.partition,
1926
+ __sourceOffset: failedMessage.originalMessage.offset,
1927
+ __sourceTimestamp: failedMessage.originalMessage.timestamp
1928
+ },
1929
+ errorMessage: error instanceof Error ? error.message : String(error),
1930
+ errorType: error instanceof Error ? error.constructor.name : "Unknown",
1931
+ failedAt: /* @__PURE__ */ new Date(),
1932
+ source: "transform"
1933
+ };
1934
+ cliLog({
1935
+ action: "DeadLetter",
1936
+ message: `Sending failed message to DLQ ${dlqTopicName}: ${error instanceof Error ? error.message : String(error)}`,
1937
+ message_type: "Error"
1938
+ });
1939
+ try {
1940
+ await producer.send({
1941
+ topic: dlqTopicName,
1942
+ messages: [{ value: JSON.stringify(deadLetterRecord) }]
1943
+ });
1944
+ logger2.log(`Sent failed message to DLQ ${dlqTopicName}`);
1945
+ messagesHandledByDLQ++;
1946
+ } catch (dlqError) {
1947
+ const errorMsg = `Failed to send message to DLQ: ${dlqError}`;
1948
+ logger2.error(errorMsg);
1949
+ dlqErrors.push(errorMsg);
1950
+ }
1951
+ } else if (!dlqTopic) {
1952
+ messagesWithoutDLQ++;
1953
+ logger2.warn(
1954
+ `Cannot send to DLQ: no DLQ configured for message (batch has mixed DLQ configurations)`
1955
+ );
1956
+ } else {
1957
+ messagesWithoutDLQ++;
1958
+ logger2.warn(
1959
+ `Cannot send to DLQ: original message value not available`
1960
+ );
1961
+ }
1962
+ }
1963
+ const allMessagesHandled = messagesHandledByDLQ === currentMessages.length && messagesWithoutDLQ === 0 && dlqErrors.length === 0;
1964
+ if (allMessagesHandled) {
1965
+ logger2.log(
1966
+ `All ${messagesHandledByDLQ} failed message(s) sent to DLQ, not throwing original error`
1967
+ );
1968
+ return;
1969
+ }
1970
+ if (messagesWithoutDLQ > 0) {
1971
+ logger2.error(
1972
+ `Cannot handle batch failure: ${messagesWithoutDLQ} message(s) have no DLQ configured`
1973
+ );
1974
+ }
1975
+ if (dlqErrors.length > 0) {
1976
+ logger2.error(
1977
+ `Some messages failed to send to DLQ: ${dlqErrors.join(", ")}`
1978
+ );
1979
+ }
1980
+ if (messagesHandledByDLQ > 0) {
1981
+ logger2.warn(
1982
+ `Partial DLQ success: ${messagesHandledByDLQ}/${currentMessages.length} message(s) sent to DLQ, but throwing due to incomplete batch handling`
1983
+ );
1984
+ }
1985
+ throw error;
1986
+ }
1987
+ logger2.warn(
1988
+ `Send ${currentMessages.length} messages failed (attempt ${attempts}/${maxRetries}), retrying: ${error}`
1989
+ );
1990
+ await new Promise((resolve2) => setTimeout(resolve2, 100 * attempts));
1991
+ }
1992
+ }
1993
+ }
1994
+ };
1836
1995
  var MAX_STREAMING_CONCURRENCY = process3.env.MAX_STREAMING_CONCURRENCY ? parseInt(process3.env.MAX_STREAMING_CONCURRENCY, 10) : DEFAULT_MAX_STREAMING_CONCURRENCY;
1837
1996
  var metricsLog = (log) => {
1838
1997
  const req = http3.request({
@@ -1978,95 +2137,57 @@ var handleMessage = async (logger2, streamingFunctionWithConfigList, message, pr
1978
2137
  }
1979
2138
  return void 0;
1980
2139
  };
1981
- var handleDLQForFailedMessages = async (logger2, producer, messages, error) => {
1982
- let messagesHandledByDLQ = 0;
1983
- let messagesWithoutDLQ = 0;
1984
- let dlqErrors = 0;
1985
- for (const msg of messages) {
1986
- if (msg.dlq && msg.originalValue) {
1987
- const deadLetterRecord = {
1988
- originalRecord: {
1989
- ...msg.originalValue,
1990
- // Include original Kafka message metadata
1991
- __sourcePartition: msg.originalMessage.partition,
1992
- __sourceOffset: msg.originalMessage.offset,
1993
- __sourceTimestamp: msg.originalMessage.timestamp
1994
- },
1995
- errorMessage: error instanceof Error ? error.message : String(error),
1996
- errorType: error instanceof Error ? error.constructor.name : "Unknown",
1997
- failedAt: /* @__PURE__ */ new Date(),
1998
- source: "transform"
1999
- };
2000
- cliLog({
2001
- action: "DeadLetter",
2002
- message: `Sending failed message to DLQ ${msg.dlq.name}: ${error instanceof Error ? error.message : String(error)}`,
2003
- message_type: "Error"
2004
- });
2005
- try {
2006
- await producer.send({
2007
- topic: msg.dlq.name,
2008
- messages: [{ value: JSON.stringify(deadLetterRecord) }]
2009
- });
2010
- logger2.log(`Sent failed message to DLQ ${msg.dlq.name}`);
2011
- messagesHandledByDLQ++;
2012
- } catch (dlqError) {
2013
- logger2.error(`Failed to send to DLQ: ${dlqError}`);
2014
- dlqErrors++;
2140
+ var sendMessages = async (logger2, metrics, targetTopic, producer, messages) => {
2141
+ try {
2142
+ let chunk = [];
2143
+ let chunkSize = 0;
2144
+ const maxMessageSize = targetTopic.max_message_bytes || 1024 * 1024;
2145
+ for (const message of messages) {
2146
+ const messageSize = import_node_buffer.Buffer.byteLength(message.value, "utf8") + KAFKAJS_BYTE_MESSAGE_OVERHEAD;
2147
+ if (chunkSize + messageSize > maxMessageSize) {
2148
+ logger2.log(
2149
+ `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2150
+ );
2151
+ await sendChunkWithRetry(
2152
+ logger2,
2153
+ targetTopic,
2154
+ producer,
2155
+ chunk,
2156
+ maxMessageSize
2157
+ );
2158
+ logger2.log(
2159
+ `Sent ${chunk.length} transformed records to ${targetTopic.name}`
2160
+ );
2161
+ chunk = [message];
2162
+ chunkSize = messageSize;
2163
+ } else {
2164
+ chunk.push(message);
2165
+ metrics.bytes += import_node_buffer.Buffer.byteLength(message.value, "utf8");
2166
+ chunkSize += messageSize;
2015
2167
  }
2016
- } else if (!msg.dlq) {
2017
- messagesWithoutDLQ++;
2018
- logger2.warn(`Cannot send to DLQ: no DLQ configured for message`);
2019
- } else {
2020
- messagesWithoutDLQ++;
2021
- logger2.warn(`Cannot send to DLQ: original message value not available`);
2022
2168
  }
2023
- }
2024
- const allMessagesHandled = messagesHandledByDLQ === messages.length && messagesWithoutDLQ === 0 && dlqErrors === 0;
2025
- if (allMessagesHandled) {
2026
- logger2.log(
2027
- `All ${messagesHandledByDLQ} failed message(s) sent to DLQ, suppressing original error`
2028
- );
2029
- } else if (messagesHandledByDLQ > 0) {
2030
- logger2.warn(
2031
- `Partial DLQ success: ${messagesHandledByDLQ}/${messages.length} message(s) sent to DLQ`
2032
- );
2033
- if (messagesWithoutDLQ > 0) {
2034
- logger2.error(
2035
- `Cannot handle batch failure: ${messagesWithoutDLQ} message(s) have no DLQ configured or missing original value`
2169
+ metrics.count_out += chunk.length;
2170
+ if (chunk.length > 0) {
2171
+ logger2.log(
2172
+ `Sending ${chunkSize} bytes of a transformed record batch to ${targetTopic.name}`
2173
+ );
2174
+ await sendChunkWithRetry(
2175
+ logger2,
2176
+ targetTopic,
2177
+ producer,
2178
+ chunk,
2179
+ maxMessageSize
2180
+ );
2181
+ logger2.log(
2182
+ `Sent final ${chunk.length} transformed data to ${targetTopic.name}`
2036
2183
  );
2037
2184
  }
2038
- if (dlqErrors > 0) {
2039
- logger2.error(`${dlqErrors} message(s) failed to send to DLQ`);
2040
- }
2041
- }
2042
- return allMessagesHandled;
2043
- };
2044
- var sendMessages = async (logger2, metrics, targetTopic, producer, messages) => {
2045
- if (messages.length === 0) return;
2046
- try {
2047
- await producer.send({
2048
- topic: targetTopic.name,
2049
- messages
2050
- });
2051
- for (const msg of messages) {
2052
- metrics.bytes += import_node_buffer.Buffer.byteLength(msg.value, "utf8");
2053
- }
2054
- metrics.count_out += messages.length;
2055
- logger2.log(`Sent ${messages.length} messages to ${targetTopic.name}`);
2056
2185
  } catch (e) {
2057
2186
  logger2.error(`Failed to send transformed data`);
2058
2187
  if (e instanceof Error) {
2059
2188
  logError(logger2, e);
2060
2189
  }
2061
- const allHandledByDLQ = await handleDLQForFailedMessages(
2062
- logger2,
2063
- producer,
2064
- messages,
2065
- e
2066
- );
2067
- if (!allHandledByDLQ) {
2068
- throw e;
2069
- }
2190
+ throw e;
2070
2191
  }
2071
2192
  };
2072
2193
  var sendMessageMetrics = (logger2, metrics) => {
@@ -2306,13 +2427,18 @@ var runStreamingFunctions = async (args) => {
2306
2427
  autoCommit: true,
2307
2428
  autoCommitInterval: AUTO_COMMIT_INTERVAL_MS,
2308
2429
  fromBeginning: true
2309
- },
2310
- "js.consumer.max.batch.size": CONSUMER_MAX_BATCH_SIZE
2430
+ }
2431
+ });
2432
+ const producer = kafka.producer({
2433
+ kafkaJS: {
2434
+ idempotent: true,
2435
+ acks: ACKs,
2436
+ retry: {
2437
+ retries: MAX_RETRIES_PRODUCER,
2438
+ maxRetryTime: MAX_RETRY_TIME_MS
2439
+ }
2440
+ }
2311
2441
  });
2312
- const maxMessageBytes = args.targetTopic?.max_message_bytes || 1024 * 1024;
2313
- const producer = kafka.producer(
2314
- createProducerConfig(maxMessageBytes)
2315
- );
2316
2442
  try {
2317
2443
  logger2.log("Starting producer...");
2318
2444
  await startProducer(logger2, producer);