@514labs/moose-lib 0.6.276-ci-1-gfe86cd2c → 0.6.276-ci-3-g7a7bb18a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,13 +55,13 @@ function createProducerConfig(maxMessageBytes) {
55
55
  ...maxMessageBytes && { "message.max.bytes": maxMessageBytes }
56
56
  };
57
57
  }
58
- var import_http, import_client, import_kafka_javascript, Kafka, compilerLog, getClickhouseClient, cliLog, MAX_RETRIES, MAX_RETRY_TIME_MS, RETRY_INITIAL_TIME_MS, MAX_RETRIES_PRODUCER, ACKs, parseBrokerString, logError, buildSaslConfig, getKafkaClient, buildNativeSaslConfig, createNativeKafkaConsumer;
58
+ var import_http, import_client, import_kafka_javascript, Kafka, compilerLog, getClickhouseClient, cliLog, MAX_RETRIES, MAX_RETRY_TIME_MS, RETRY_INITIAL_TIME_MS, MAX_RETRIES_PRODUCER, ACKs, parseBrokerString, logError, buildSaslConfig, getKafkaClient;
59
59
  var init_commons = __esm({
60
60
  "src/commons.ts"() {
61
61
  "use strict";
62
62
  import_http = __toESM(require("http"));
63
63
  import_client = require("@clickhouse/client");
64
- import_kafka_javascript = require("@confluentinc/kafka-javascript");
64
+ import_kafka_javascript = require("@514labs/kafka-javascript");
65
65
  ({ Kafka } = import_kafka_javascript.KafkaJS);
66
66
  compilerLog = (message) => {
67
67
  if (!isTruthy(process.env.MOOSE_DISABLE_COMPILER_LOGS)) {
@@ -152,59 +152,6 @@ var init_commons = __esm({
152
152
  }
153
153
  });
154
154
  };
155
- buildNativeSaslConfig = (logger2, cfg) => {
156
- if (!cfg.saslMechanism || !cfg.saslUsername || !cfg.saslPassword) {
157
- return {};
158
- }
159
- const mechanism = cfg.saslMechanism.toUpperCase();
160
- const validMechanisms = ["PLAIN", "SCRAM-SHA-256", "SCRAM-SHA-512"];
161
- if (!validMechanisms.includes(mechanism)) {
162
- logger2.warn(`Unsupported SASL mechanism: ${cfg.saslMechanism}`);
163
- return {};
164
- }
165
- return {
166
- "sasl.mechanisms": mechanism,
167
- "sasl.username": cfg.saslUsername,
168
- "sasl.password": cfg.saslPassword
169
- };
170
- };
171
- createNativeKafkaConsumer = (cfg, logger2, rebalanceCb) => {
172
- const brokers = parseBrokerString(cfg.broker || "");
173
- if (brokers.length === 0) {
174
- throw new Error(`No valid broker addresses found in: "${cfg.broker}"`);
175
- }
176
- logger2.log(
177
- `Creating native KafkaConsumer with brokers: ${brokers.join(", ")}`
178
- );
179
- logger2.log(`Security protocol: ${cfg.securityProtocol || "plaintext"}`);
180
- logger2.log(`Client ID: ${cfg.clientId}`);
181
- logger2.log(`Group ID: ${cfg.groupId}`);
182
- const saslConfig = buildNativeSaslConfig(logger2, cfg);
183
- const consumerConfig = {
184
- // Connection
185
- "bootstrap.servers": brokers.join(","),
186
- "client.id": cfg.clientId,
187
- // Group management
188
- "group.id": cfg.groupId,
189
- "session.timeout.ms": cfg.sessionTimeoutMs ?? 3e4,
190
- "heartbeat.interval.ms": cfg.heartbeatIntervalMs ?? 3e3,
191
- "max.poll.interval.ms": cfg.maxPollIntervalMs ?? 3e5,
192
- // Offset management
193
- "enable.auto.commit": cfg.autoCommit ?? true,
194
- "auto.commit.interval.ms": cfg.autoCommitIntervalMs ?? 5e3,
195
- // Security
196
- ...cfg.securityProtocol === "SASL_SSL" && {
197
- "security.protocol": "sasl_ssl"
198
- },
199
- ...saslConfig,
200
- // Rebalance callback
201
- ...rebalanceCb && { rebalance_cb: rebalanceCb }
202
- };
203
- const topicConfig = {
204
- "auto.offset.reset": cfg.autoOffsetReset ?? "earliest"
205
- };
206
- return new import_kafka_javascript.KafkaConsumer(consumerConfig, topicConfig);
207
- };
208
155
  }
209
156
  });
210
157
 
@@ -1924,7 +1871,7 @@ var runBlocks = async (config) => {
1924
1871
 
1925
1872
  // src/streaming-functions/runner.ts
1926
1873
  var import_node_stream2 = require("stream");
1927
- var import_kafka_javascript2 = require("@confluentinc/kafka-javascript");
1874
+ var import_kafka_javascript2 = require("@514labs/kafka-javascript");
1928
1875
  var import_node_buffer = require("buffer");
1929
1876
  var process3 = __toESM(require("process"));
1930
1877
  var http3 = __toESM(require("http"));
@@ -1932,6 +1879,8 @@ init_commons();
1932
1879
  var { Kafka: Kafka2 } = import_kafka_javascript2.KafkaJS;
1933
1880
  var HOSTNAME = process3.env.HOSTNAME;
1934
1881
  var AUTO_COMMIT_INTERVAL_MS = 5e3;
1882
+ var PARTITIONS_CONSUMED_CONCURRENTLY = 3;
1883
+ var MAX_RETRIES_CONSUMER = 150;
1935
1884
  var SESSION_TIMEOUT_CONSUMER = 3e4;
1936
1885
  var HEARTBEAT_INTERVAL_CONSUMER = 3e3;
1937
1886
  var DEFAULT_MAX_STREAMING_CONCURRENCY = 100;
@@ -1972,31 +1921,23 @@ var stopProducer = async (logger2, producer) => {
1972
1921
  var stopConsumer = async (logger2, consumer, sourceTopic) => {
1973
1922
  try {
1974
1923
  logger2.log("Pausing consumer...");
1975
- const topicPartitions = Array.from(
1924
+ const partitionNumbers = Array.from(
1976
1925
  { length: sourceTopic.partitions },
1977
- (_, i) => ({
1978
- topic: sourceTopic.name,
1979
- partition: i
1980
- })
1926
+ (_, i) => i
1981
1927
  );
1982
- consumer.pause(topicPartitions);
1928
+ await consumer.pause([
1929
+ {
1930
+ topic: sourceTopic.name,
1931
+ partitions: partitionNumbers
1932
+ }
1933
+ ]);
1983
1934
  logger2.log("Disconnecting consumer...");
1984
- await new Promise((resolve2, reject) => {
1985
- consumer.disconnect((err) => {
1986
- if (err) {
1987
- reject(err);
1988
- } else {
1989
- resolve2();
1990
- }
1991
- });
1992
- });
1935
+ await consumer.disconnect();
1993
1936
  logger2.log("Consumer is shutting down...");
1994
1937
  } catch (error) {
1995
1938
  logger2.error(`Error during consumer shutdown: ${error}`);
1996
1939
  try {
1997
- await new Promise((resolve2) => {
1998
- consumer.disconnect(() => resolve2());
1999
- });
1940
+ await consumer.disconnect();
2000
1941
  logger2.log("Consumer disconnected after error");
2001
1942
  } catch (disconnectError) {
2002
1943
  logger2.error(`Failed to disconnect consumer: ${disconnectError}`);
@@ -2236,15 +2177,7 @@ var startConsumer = async (args, logger2, metrics, _parallelism, consumer, produ
2236
2177
  }
2237
2178
  try {
2238
2179
  logger2.log("Connecting consumer...");
2239
- await new Promise((resolve2, reject) => {
2240
- consumer.connect({}, (err) => {
2241
- if (err) {
2242
- reject(err);
2243
- } else {
2244
- resolve2();
2245
- }
2246
- });
2247
- });
2180
+ await consumer.connect();
2248
2181
  logger2.log("Consumer connected successfully");
2249
2182
  } catch (error) {
2250
2183
  logger2.error("Failed to connect consumer:");
@@ -2269,94 +2202,61 @@ var startConsumer = async (args, logger2, metrics, _parallelism, consumer, produ
2269
2202
  streamingFunctions = [[loadStreamingFunction(args.functionFilePath), {}]];
2270
2203
  fieldMutations = void 0;
2271
2204
  }
2272
- consumer.subscribe([args.sourceTopic.name]);
2273
- consumer.setDefaultConsumeTimeout(1e3);
2274
- let isRunning = true;
2275
- const consumeLoop = async () => {
2276
- while (isRunning && consumer.isConnected()) {
2277
- try {
2278
- const messages = await new Promise(
2279
- (resolve2, reject) => {
2280
- consumer.consume(CONSUMER_MAX_BATCH_SIZE, (err, messages2) => {
2281
- if (err) {
2282
- reject(err);
2283
- } else {
2284
- resolve2(messages2 || []);
2285
- }
2286
- });
2287
- }
2288
- );
2289
- if (messages.length === 0) {
2290
- continue;
2291
- }
2292
- metrics.count_in += messages.length;
2293
- cliLog({
2294
- action: "Received",
2295
- message: `${logger2.logPrefix} ${messages.length} message(s)`
2296
- });
2297
- logger2.log(`Received ${messages.length} message(s)`);
2298
- const readableStream = import_node_stream2.Readable.from(messages);
2299
- const processedMessages = await readableStream.map(
2300
- async (message) => {
2301
- const kafkaMessage = {
2302
- value: message.value,
2303
- key: message.key,
2304
- partition: message.partition,
2305
- offset: message.offset,
2306
- timestamp: message.timestamp,
2307
- headers: message.headers
2308
- };
2309
- return handleMessage(
2310
- logger2,
2311
- streamingFunctions,
2312
- kafkaMessage,
2313
- producer,
2314
- fieldMutations
2315
- );
2316
- },
2317
- {
2318
- concurrency: MAX_STREAMING_CONCURRENCY
2205
+ await consumer.subscribe({
2206
+ topics: [args.sourceTopic.name]
2207
+ // Use full topic name for Kafka operations
2208
+ });
2209
+ await consumer.run({
2210
+ eachBatchAutoResolve: true,
2211
+ // Enable parallel processing of partitions
2212
+ partitionsConsumedConcurrently: PARTITIONS_CONSUMED_CONCURRENTLY,
2213
+ // To be adjusted
2214
+ eachBatch: async ({ batch, heartbeat, isRunning, isStale }) => {
2215
+ if (!isRunning() || isStale()) {
2216
+ return;
2217
+ }
2218
+ metrics.count_in += batch.messages.length;
2219
+ cliLog({
2220
+ action: "Received",
2221
+ message: `${logger2.logPrefix} ${batch.messages.length} message(s)`
2222
+ });
2223
+ logger2.log(`Received ${batch.messages.length} message(s)`);
2224
+ let index = 0;
2225
+ const readableStream = import_node_stream2.Readable.from(batch.messages);
2226
+ const processedMessages = await readableStream.map(
2227
+ async (message) => {
2228
+ index++;
2229
+ if (batch.messages.length > DEFAULT_MAX_STREAMING_CONCURRENCY && index % DEFAULT_MAX_STREAMING_CONCURRENCY || index - 1 === batch.messages.length) {
2230
+ await heartbeat();
2319
2231
  }
2320
- ).toArray();
2321
- const filteredMessages = processedMessages.flat().filter((msg) => msg !== void 0 && msg.value !== void 0);
2322
- if (args.targetTopic === void 0 || processedMessages.length === 0) {
2323
- continue;
2324
- }
2325
- if (filteredMessages.length > 0) {
2326
- await sendMessages(
2232
+ return handleMessage(
2327
2233
  logger2,
2328
- metrics,
2329
- args.targetTopic,
2234
+ streamingFunctions,
2235
+ message,
2330
2236
  producer,
2331
- filteredMessages
2237
+ fieldMutations
2332
2238
  );
2239
+ },
2240
+ {
2241
+ concurrency: MAX_STREAMING_CONCURRENCY
2333
2242
  }
2334
- } catch (error) {
2335
- if (error && typeof error === "object" && "code" in error) {
2336
- const kafkaError = error;
2337
- if (kafkaError.code === import_kafka_javascript2.CODES.ERRORS.ERR__TIMED_OUT) {
2338
- continue;
2339
- }
2340
- if (kafkaError.code === import_kafka_javascript2.CODES.ERRORS.ERR__PARTITION_EOF) {
2341
- continue;
2342
- }
2343
- }
2344
- logger2.error(`Error consuming messages: ${error}`);
2345
- if (error instanceof Error) {
2346
- logError(logger2, error);
2347
- }
2348
- await new Promise((resolve2) => setTimeout(resolve2, 100));
2243
+ ).toArray();
2244
+ const filteredMessages = processedMessages.flat().filter((msg) => msg !== void 0 && msg.value !== void 0);
2245
+ if (args.targetTopic === void 0 || processedMessages.length === 0) {
2246
+ return;
2247
+ }
2248
+ await heartbeat();
2249
+ if (filteredMessages.length > 0) {
2250
+ await sendMessages(
2251
+ logger2,
2252
+ metrics,
2253
+ args.targetTopic,
2254
+ producer,
2255
+ filteredMessages
2256
+ );
2349
2257
  }
2350
2258
  }
2351
- };
2352
- consumeLoop().catch((err) => {
2353
- logger2.error(`Consumer loop crashed: ${err}`);
2354
- isRunning = false;
2355
2259
  });
2356
- consumer._isRunning = isRunning;
2357
- consumer._stopConsuming = () => {
2358
- isRunning = false;
2359
- };
2360
2260
  logger2.log("Consumer is running...");
2361
2261
  };
2362
2262
  var buildLogger = (args, workerId) => {
@@ -2436,33 +2336,6 @@ var runStreamingFunctions = async (args) => {
2436
2336
  setTimeout(() => sendMessageMetrics(logger2, metrics), 1e3);
2437
2337
  const clientIdPrefix = HOSTNAME ? `${HOSTNAME}-` : "";
2438
2338
  const processId = `${clientIdPrefix}${streamingFuncId}-ts-${worker.id}`;
2439
- const consumer = createNativeKafkaConsumer(
2440
- {
2441
- clientId: processId,
2442
- broker: args.broker,
2443
- groupId: streamingFuncId,
2444
- securityProtocol: args.securityProtocol,
2445
- saslUsername: args.saslUsername,
2446
- saslPassword: args.saslPassword,
2447
- saslMechanism: args.saslMechanism,
2448
- sessionTimeoutMs: SESSION_TIMEOUT_CONSUMER,
2449
- heartbeatIntervalMs: HEARTBEAT_INTERVAL_CONSUMER,
2450
- autoCommit: true,
2451
- autoCommitIntervalMs: AUTO_COMMIT_INTERVAL_MS,
2452
- autoOffsetReset: "earliest",
2453
- maxBatchSize: CONSUMER_MAX_BATCH_SIZE
2454
- },
2455
- logger2,
2456
- (err, assignments) => {
2457
- if (err.code === import_kafka_javascript2.CODES.ERRORS.ERR__ASSIGN_PARTITIONS) {
2458
- logger2.log(`Assigned partitions: ${JSON.stringify(assignments)}`);
2459
- } else if (err.code === import_kafka_javascript2.CODES.ERRORS.ERR__REVOKE_PARTITIONS) {
2460
- logger2.log(`Revoked partitions: ${JSON.stringify(assignments)}`);
2461
- } else {
2462
- logger2.error(`Rebalance error: ${err.message}`);
2463
- }
2464
- }
2465
- );
2466
2339
  const kafka = await getKafkaClient(
2467
2340
  {
2468
2341
  clientId: processId,
@@ -2474,6 +2347,20 @@ var runStreamingFunctions = async (args) => {
2474
2347
  },
2475
2348
  logger2
2476
2349
  );
2350
+ const consumer = kafka.consumer({
2351
+ kafkaJS: {
2352
+ groupId: streamingFuncId,
2353
+ sessionTimeout: SESSION_TIMEOUT_CONSUMER,
2354
+ heartbeatInterval: HEARTBEAT_INTERVAL_CONSUMER,
2355
+ retry: {
2356
+ retries: MAX_RETRIES_CONSUMER
2357
+ },
2358
+ autoCommit: true,
2359
+ autoCommitInterval: AUTO_COMMIT_INTERVAL_MS,
2360
+ fromBeginning: true
2361
+ },
2362
+ "js.consumer.max.batch.size": CONSUMER_MAX_BATCH_SIZE
2363
+ });
2477
2364
  const maxMessageBytes = args.targetTopic?.max_message_bytes || 1024 * 1024;
2478
2365
  const producer = kafka.producer(
2479
2366
  createProducerConfig(maxMessageBytes)
@@ -2510,9 +2397,6 @@ var runStreamingFunctions = async (args) => {
2510
2397
  },
2511
2398
  workerStop: async ([logger2, producer, consumer]) => {
2512
2399
  logger2.log(`Received SIGTERM, shutting down gracefully...`);
2513
- if (consumer._stopConsuming) {
2514
- consumer._stopConsuming();
2515
- }
2516
2400
  logger2.log("Stopping consumer first...");
2517
2401
  await stopConsumer(logger2, consumer, args.sourceTopic);
2518
2402
  logger2.log("Waiting for in-flight messages to complete...");