alepha 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/batch.d.ts CHANGED
@@ -1,14 +1,280 @@
1
1
  import * as _alepha_core1 from "alepha";
2
- import * as _alepha_core0 from "alepha";
3
2
  import { Descriptor, KIND, Static, TSchema } from "alepha";
4
3
  import { DateTimeProvider, DurationLike } from "alepha/datetime";
4
+ import * as _alepha_logger0 from "alepha/logger";
5
5
  import * as _alepha_retry0 from "alepha/retry";
6
6
  import { RetryDescriptorOptions } from "alepha/retry";
7
7
 
8
8
  //#region src/descriptors/$batch.d.ts
9
+
9
10
  /**
10
- * Creates a batch processor. This is useful for grouping multiple operations
11
- * (like API calls or database writes) into a single one to improve performance.
11
+ * Creates a batch processing descriptor for efficient grouping and processing of multiple operations.
12
+ *
13
+ * This descriptor provides a powerful batching mechanism that collects multiple individual items
14
+ * and processes them together in groups, significantly improving performance by reducing overhead
15
+ * and enabling bulk operations. It supports partitioning, concurrent processing, automatic flushing,
16
+ * and intelligent retry mechanisms for robust batch processing workflows.
17
+ *
18
+ * **Key Features**
19
+ *
20
+ * - **Intelligent Batching**: Groups items based on size and time thresholds
21
+ * - **Partitioning Support**: Process different types of items in separate batches
22
+ * - **Concurrent Processing**: Handle multiple batches simultaneously with configurable limits
23
+ * - **Automatic Flushing**: Time-based and size-based automatic batch execution
24
+ * - **Type Safety**: Full TypeScript support with schema validation using TypeBox
25
+ * - **Retry Logic**: Built-in retry mechanisms for failed batch operations
26
+ * - **Resource Management**: Automatic cleanup and graceful shutdown handling
27
+ *
28
+ * **Use Cases**
29
+ *
30
+ * Perfect for optimizing high-throughput operations:
31
+ * - Database bulk inserts and updates
32
+ * - API call batching and rate limit optimization
33
+ * - Log aggregation and bulk shipping
34
+ * - File processing and bulk uploads
35
+ * - Event processing and analytics ingestion
36
+ * - Notification delivery optimization
37
+ * - Cache invalidation batching
38
+ *
39
+ * @example
40
+ * **Basic database batch operations:**
41
+ * ```ts
42
+ * import { $batch } from "alepha/batch";
43
+ * import { t } from "alepha";
44
+ *
45
+ * class UserService {
46
+ * userBatch = $batch({
47
+ * schema: t.object({
48
+ * id: t.string(),
49
+ * name: t.string(),
50
+ * email: t.string(),
51
+ * createdAt: t.optional(t.string())
52
+ * }),
53
+ * maxSize: 50, // Process up to 50 users at once
54
+ * maxDuration: [5, "seconds"], // Or flush every 5 seconds
55
+ * handler: async (users) => {
56
+ * // Bulk insert users - much faster than individual inserts
57
+ * console.log(`Processing batch of ${users.length} users`);
58
+ *
59
+ * const result = await this.database.users.insertMany(users.map(user => ({
60
+ * ...user,
61
+ * createdAt: user.createdAt || new Date().toISOString()
62
+ * })));
63
+ *
64
+ * console.log(`Successfully inserted ${result.length} users`);
65
+ * return { inserted: result.length, userIds: result.map(r => r.id) };
66
+ * }
67
+ * });
68
+ *
69
+ * async createUser(userData: { name: string; email: string }) {
70
+ * // Individual calls are automatically batched
71
+ * const result = await this.userBatch.push({
72
+ * id: generateId(),
73
+ * name: userData.name,
74
+ * email: userData.email
75
+ * });
76
+ *
77
+ * return result; // Returns the batch result once batch is processed
78
+ * }
79
+ * }
80
+ * ```
81
+ *
82
+ * @example
83
+ * **API call batching with partitioning:**
84
+ * ```ts
85
+ * class NotificationService {
86
+ * notificationBatch = $batch({
87
+ * schema: t.object({
88
+ * userId: t.string(),
89
+ * type: t.enum(["email", "sms", "push"]),
90
+ * message: t.string(),
91
+ * priority: t.enum(["high", "normal", "low"])
92
+ * }),
93
+ * maxSize: 100,
94
+ * maxDuration: [10, "seconds"],
95
+ * // Partition by notification type for different processing
96
+ * partitionBy: (notification) => notification.type,
97
+ * concurrency: 3, // Process up to 3 different types simultaneously
98
+ * handler: async (notifications) => {
99
+ * const type = notifications[0].type; // All items in batch have same type
100
+ * console.log(`Processing ${notifications.length} ${type} notifications`);
101
+ *
102
+ * switch (type) {
103
+ * case 'email':
104
+ * return await this.emailProvider.sendBulk(notifications.map(n => ({
105
+ * to: n.userId,
106
+ * subject: 'Notification',
107
+ * body: n.message,
108
+ * priority: n.priority
109
+ * })));
110
+ *
111
+ * case 'sms':
112
+ * return await this.smsProvider.sendBulk(notifications.map(n => ({
113
+ * to: n.userId,
114
+ * message: n.message
115
+ * })));
116
+ *
117
+ * case 'push':
118
+ * return await this.pushProvider.sendBulk(notifications.map(n => ({
119
+ * userId: n.userId,
120
+ * title: 'Notification',
121
+ * body: n.message,
122
+ * priority: n.priority
123
+ * })));
124
+ * }
125
+ * }
126
+ * });
127
+ *
128
+ * async sendNotification(userId: string, type: 'email' | 'sms' | 'push', message: string, priority: 'high' | 'normal' | 'low' = 'normal') {
129
+ * // Notifications are automatically batched by type
130
+ * return await this.notificationBatch.push({
131
+ * userId,
132
+ * type,
133
+ * message,
134
+ * priority
135
+ * });
136
+ * }
137
+ * }
138
+ * ```
139
+ *
140
+ * @example
141
+ * **Log aggregation with retry logic:**
142
+ * ```ts
143
+ * class LoggingService {
144
+ * logBatch = $batch({
145
+ * schema: t.object({
146
+ * timestamp: t.number(),
147
+ * level: t.enum(["info", "warn", "error"]),
148
+ * message: t.string(),
149
+ * metadata: t.optional(t.record(t.string(), t.any())),
150
+ * source: t.string()
151
+ * }),
152
+ * maxSize: 1000, // Large batches for log efficiency
153
+ * maxDuration: [30, "seconds"], // Longer duration for log aggregation
154
+ * concurrency: 2, // Limit concurrent log shipments
155
+ * retry: {
156
+ * maxAttempts: 5,
157
+ * delay: [2, "seconds"],
158
+ * backoff: "exponential"
159
+ * },
160
+ * handler: async (logEntries) => {
161
+ * console.log(`Shipping ${logEntries.length} log entries`);
162
+ *
163
+ * try {
164
+ * // Ship logs to external service (e.g., Elasticsearch, Splunk)
165
+ * const response = await this.logShipper.bulkIndex({
166
+ * index: 'application-logs',
167
+ * body: logEntries.map(entry => ([
168
+ * { index: { _index: 'application-logs' } },
169
+ * {
170
+ * ...entry,
171
+ * '@timestamp': new Date(entry.timestamp).toISOString()
172
+ * }
173
+ * ])).flat()
174
+ * });
175
+ *
176
+ * if (response.errors) {
177
+ * console.error(`Some log entries failed to index`, response.errors);
178
+ * // Retry will be triggered by throwing
179
+ * throw new Error(`Failed to index ${response.errors.length} log entries`);
180
+ * }
181
+ *
182
+ * console.log(`Successfully shipped ${logEntries.length} log entries`);
183
+ * return { shipped: logEntries.length, indexedAt: Date.now() };
184
+ *
185
+ * } catch (error) {
186
+ * console.error(`Failed to ship logs batch`, error);
187
+ * throw error; // Trigger retry mechanism
188
+ * }
189
+ * }
190
+ * });
191
+ *
192
+ * async log(level: 'info' | 'warn' | 'error', message: string, metadata?: Record<string, any>, source: string = 'application') {
193
+ * // Individual log calls are batched and shipped efficiently
194
+ * return await this.logBatch.push({
195
+ * timestamp: Date.now(),
196
+ * level,
197
+ * message,
198
+ * metadata,
199
+ * source
200
+ * });
201
+ * }
202
+ * }
203
+ * ```
204
+ *
205
+ * @example
206
+ * **File processing with dynamic partitioning:**
207
+ * ```ts
208
+ * class FileProcessingService {
209
+ * fileProcessingBatch = $batch({
210
+ * schema: t.object({
211
+ * filePath: t.string(),
212
+ * fileType: t.enum(["image", "video", "document"]),
213
+ * processingOptions: t.object({
214
+ * quality: t.optional(t.enum(["low", "medium", "high"])),
215
+ * format: t.optional(t.string()),
216
+ * compress: t.optional(t.boolean())
217
+ * }),
218
+ * priority: t.enum(["urgent", "normal", "background"])
219
+ * }),
220
+ * maxSize: 20, // Smaller batches for file processing
221
+ * maxDuration: [2, "minutes"], // Reasonable time for file accumulation
222
+ * // Partition by file type and priority for optimal resource usage
223
+ * partitionBy: (file) => `${file.fileType}-${file.priority}`,
224
+ * concurrency: 4, // Multiple concurrent processing pipelines
225
+ * retry: {
226
+ * maxAttempts: 3,
227
+ * delay: [5, "seconds"]
228
+ * },
229
+ * handler: async (files) => {
230
+ * const fileType = files[0].fileType;
231
+ * const priority = files[0].priority;
232
+ *
233
+ * console.log(`Processing ${files.length} ${fileType} files with ${priority} priority`);
234
+ *
235
+ * try {
236
+ * const results = [];
237
+ *
238
+ * for (const file of files) {
239
+ * const result = await this.processFile(file.filePath, file.fileType, file.processingOptions);
240
+ * results.push({
241
+ * originalPath: file.filePath,
242
+ * processedPath: result.outputPath,
243
+ * size: result.size,
244
+ * duration: result.processingTime
245
+ * });
246
+ * }
247
+ *
248
+ * // Update database with batch results
249
+ * await this.updateProcessingStatus(results);
250
+ *
251
+ * console.log(`Successfully processed ${files.length} ${fileType} files`);
252
+ * return {
253
+ * processed: files.length,
254
+ * fileType,
255
+ * priority,
256
+ * totalSize: results.reduce((sum, r) => sum + r.size, 0),
257
+ * results
258
+ * };
259
+ *
260
+ * } catch (error) {
261
+ * console.error(`Batch file processing failed for ${fileType} files`, error);
262
+ * throw error;
263
+ * }
264
+ * }
265
+ * });
266
+ *
267
+ * async processFile(filePath: string, fileType: 'image' | 'video' | 'document', options: any, priority: 'urgent' | 'normal' | 'background' = 'normal') {
268
+ * // Files are automatically batched by type and priority
269
+ * return await this.fileProcessingBatch.push({
270
+ * filePath,
271
+ * fileType,
272
+ * processingOptions: options,
273
+ * priority
274
+ * });
275
+ * }
276
+ * }
277
+ * ```
12
278
  */
13
279
  declare const $batch: {
14
280
  <TItem extends TSchema, TResponse>(options: BatchDescriptorOptions<TItem, TResponse>): BatchDescriptor<TItem, TResponse>;
@@ -16,43 +282,253 @@ declare const $batch: {
16
282
  };
17
283
  interface BatchDescriptorOptions<TItem extends TSchema, TResponse = any> {
18
284
  /**
19
- * A TypeBox schema to validate each item pushed to the batch.
285
+ * TypeBox schema for validating each item added to the batch.
286
+ *
287
+ * This schema:
288
+ * - Validates every item pushed to the batch for data integrity
289
+ * - Provides full TypeScript type inference for batch items
290
+ * - Ensures type safety between item producers and batch handlers
291
+ * - Enables automatic serialization/deserialization if needed
292
+ *
293
+ * **Schema Design Guidelines**:
294
+ * - Keep schemas focused on the data needed for batch processing
295
+ * - Use optional fields for data that might not always be present
296
+ * - Include identifiers that might be needed for partitioning
297
+ * - Consider versioning for schema evolution in long-running systems
298
+ *
299
+ * @example
300
+ * ```ts
301
+ * t.object({
302
+ * id: t.string(),
303
+ * operation: t.enum(["create", "update"]),
304
+ * data: t.record(t.string(), t.any()),
305
+ * timestamp: t.optional(t.number()),
306
+ * priority: t.optional(t.enum(["high", "normal"]))
307
+ * })
308
+ * ```
20
309
  */
21
310
  schema: TItem;
22
311
  /**
23
- * The handler function that processes a batch of items.
312
+ * The batch processing handler function that processes arrays of validated items.
313
+ *
314
+ * This handler:
315
+ * - Receives an array of validated items based on the schema
316
+ * - Should implement bulk operations for maximum efficiency
317
+ * - Can be async and perform any operations (database, API calls, etc.)
318
+ * - Should handle errors appropriately (retry logic is provided separately)
319
+ * - Has access to the full Alepha dependency injection container
320
+ * - Returns results that will be provided to all items in the batch
321
+ *
322
+ * **Handler Design Guidelines**:
323
+ * - Implement true bulk operations rather than loops of individual operations
324
+ * - Use transactions when processing related data for consistency
325
+ * - Log batch processing progress and results for monitoring
326
+ * - Handle partial failures gracefully when possible
327
+ * - Consider memory usage for large batch sizes
328
+ *
329
+ * **Performance Considerations**:
330
+ * - Batch operations should be significantly faster than individual operations
331
+ * - Use database bulk operations (INSERT, UPDATE, etc.) when available
332
+ * - Optimize for the expected batch size and data characteristics
333
+ * - Consider connection pooling and resource limits
334
+ *
335
+ * @param items - Array of validated items to process in this batch
336
+ * @returns Result that will be returned to all callers who contributed items to this batch
337
+ *
338
+ * @example
339
+ * ```ts
340
+ * handler: async (items) => {
341
+ * console.log(`Processing batch of ${items.length} items`);
342
+ *
343
+ * try {
344
+ * // Use bulk operations for maximum efficiency
345
+ * const results = await this.database.transaction(async (tx) => {
346
+ * const insertResults = await tx.items.insertMany(items);
347
+ *
348
+ * // Update related data in bulk
349
+ * const updates = items.map(item => ({ id: item.id, processed: true }));
350
+ * await tx.items.updateMany(updates);
351
+ *
352
+ * return insertResults;
353
+ * });
354
+ *
355
+ * // Log successful processing
356
+ * console.log(`Successfully processed ${items.length} items`);
357
+ *
358
+ * return {
359
+ * processed: items.length,
360
+ * results: results.map(r => ({ id: r.id, status: 'success' })),
361
+ * timestamp: Date.now()
362
+ * };
363
+ *
364
+ * } catch (error) {
365
+ * console.error(`Batch processing failed for ${items.length} items`, error);
366
+ * throw error; // Will trigger retry logic if configured
367
+ * }
368
+ * }
369
+ * ```
24
370
  */
25
371
  handler: (items: Static<TItem>[]) => TResponse;
26
372
  /**
27
- * The maximum number of items in a batch. When this size is reached,
28
- * the batch is flushed automatically.
373
+ * Maximum number of items to collect before automatically flushing the batch.
374
+ *
375
+ * When this threshold is reached, the batch will be processed immediately
376
+ * regardless of the time duration. This provides an upper bound on batch size
377
+ * and ensures processing doesn't wait indefinitely for more items.
378
+ *
379
+ * **Size Selection Guidelines**:
380
+ * - Database operations: 100-1000 items depending on record size
381
+ * - API calls: 10-100 items depending on rate limits and payload size
382
+ * - File operations: 10-50 items depending on processing complexity
383
+ * - Memory operations: 1000+ items for simple transformations
384
+ *
385
+ * **Trade-offs**:
386
+ * - Larger batches: Better efficiency, higher memory usage, longer latency
387
+ * - Smaller batches: Lower latency, less efficiency, more frequent processing
388
+ *
29
389
  * @default 10
390
+ *
391
+ * @example 50 // Good for database bulk operations
392
+ * @example 100 // Good for API batching with rate limits
393
+ * @example 1000 // Good for high-throughput log processing
30
394
  */
31
395
  maxSize?: number;
32
396
  /**
33
- * The maximum duration to wait before flushing a batch, even if it's not full.
34
- * Starts from the moment the first item is added to a partition.
397
+ * Maximum time to wait before flushing a batch, even if it hasn't reached maxSize.
398
+ *
399
+ * This timer starts when the first item is added to a partition and ensures
400
+ * that items don't wait indefinitely for a batch to fill up. It provides
401
+ * a maximum latency guarantee for batch processing.
402
+ *
403
+ * **Duration Selection Guidelines**:
404
+ * - Real-time systems: 100ms - 1 second for low latency
405
+ * - Background processing: 5 - 30 seconds for higher throughput
406
+ * - Bulk operations: 1 - 5 minutes for maximum efficiency
407
+ * - Log shipping: 30 seconds - 2 minutes for log aggregation
408
+ *
409
+ * **Latency Impact**:
410
+ * - Shorter durations: Lower latency, potentially smaller batches
411
+ * - Longer durations: Higher throughput, potentially better efficiency
412
+ *
35
413
  * @default [1, "second"]
414
+ *
415
+ * @example [500, "milliseconds"] // Low latency for real-time processing
416
+ * @example [10, "seconds"] // Balanced latency and throughput
417
+ * @example [2, "minutes"] // High throughput for bulk operations
36
418
  */
37
419
  maxDuration?: DurationLike;
38
420
  /**
39
- * A function to determine the partition key for an item. Items with the
40
- * same key are batched together. If not provided, all items are placed in a single, default partition.
421
+ * Function to determine partition keys for grouping items into separate batches.
422
+ *
423
+ * Items with the same partition key are batched together, while items with
424
+ * different keys are processed in separate batches. This enables:
425
+ * - Processing different types of items with different logic
426
+ * - Parallel processing of independent item groups
427
+ * - Resource optimization based on item characteristics
428
+ *
429
+ * **Partitioning Strategies**:
430
+ * - By type: Group similar operations together
431
+ * - By destination: Group items going to the same endpoint
432
+ * - By priority: Process high-priority items separately
433
+ * - By size/complexity: Group items with similar processing requirements
434
+ * - By tenant/user: Process items per customer or tenant
435
+ *
436
+ * **Partition Key Guidelines**:
437
+ * - Use descriptive, consistent naming
438
+ * - Keep key cardinality reasonable (avoid too many unique keys)
439
+ * - Consider memory impact of multiple active partitions
440
+ * - Balance between parallelism and resource usage
441
+ *
442
+ * If not provided, all items are placed in a single default partition.
443
+ *
444
+ * @param item - The validated item to determine partition for
445
+ * @returns String key identifying the partition this item belongs to
446
+ *
447
+ * @example
448
+ * ```ts
449
+ * // Partition by operation type
450
+ * partitionBy: (item) => item.operation,
451
+ *
452
+ * // Partition by priority and type
453
+ * partitionBy: (item) => `${item.priority}-${item.type}`,
454
+ *
455
+ * // Partition by destination service
456
+ * partitionBy: (item) => item.targetService,
457
+ *
458
+ * // Dynamic partitioning based on size
459
+ * partitionBy: (item) => {
460
+ * const size = JSON.stringify(item).length;
461
+ * return size > 1000 ? 'large' : 'small';
462
+ * }
463
+ * ```
41
464
  */
42
465
  partitionBy?: (item: Static<TItem>) => string;
43
466
  /**
44
- * The maximum number of concurrent `handler` executions.
467
+ * Maximum number of batch handlers that can execute simultaneously.
468
+ *
469
+ * This controls the level of parallelism for batch processing across
470
+ * all partitions. Higher concurrency can improve throughput but may
471
+ * increase resource usage and contention.
472
+ *
473
+ * **Concurrency Considerations**:
474
+ * - Database operations: Limit based on connection pool size
475
+ * - API calls: Consider rate limits and server capacity
476
+ * - CPU-intensive operations: Set to number of CPU cores
477
+ * - Memory-intensive operations: Consider available RAM
478
+ * - I/O operations: Can be higher than CPU count
479
+ *
480
+ * **Resource Planning**:
481
+ * - Each concurrent handler may use significant memory/connections
482
+ * - Monitor resource usage and adjust based on system capacity
483
+ * - Consider downstream system limits and capabilities
484
+ *
45
485
  * @default 1
486
+ *
487
+ * @example 1 // Sequential processing, lowest resource usage
488
+ * @example 4 // Moderate parallelism for balanced systems
489
+ * @example 10 // High concurrency for I/O-bound operations
46
490
  */
47
491
  concurrency?: number;
48
492
  /**
49
- * Retry options for the batch handler if it fails.
50
- * Leverages the `@alepha/retry` module.
493
+ * Retry configuration for failed batch processing operations.
494
+ *
495
+ * When batch handlers fail, this configuration determines how and when
496
+ * to retry the operation. Uses the `@alepha/retry` module for robust
497
+ * retry logic with exponential backoff, jitter, and other strategies.
498
+ *
499
+ * **Retry Strategies**:
500
+ * - Exponential backoff: Increasingly longer delays between attempts
501
+ * - Fixed delays: Consistent intervals between retries
502
+ * - Jitter: Random variation to avoid thundering herd problems
503
+ *
504
+ * **Failure Scenarios to Consider**:
505
+ * - Temporary network issues
506
+ * - Database connection problems
507
+ * - Rate limiting from external services
508
+ * - Resource exhaustion (memory, disk space)
509
+ * - Downstream service temporary unavailability
510
+ *
511
+ * **Retry Guidelines**:
512
+ * - Use exponential backoff for network-related failures
513
+ * - Set reasonable max attempts to avoid infinite loops
514
+ * - Consider the impact of retries on overall system performance
515
+ * - Monitor retry patterns to identify systemic issues
516
+ *
517
+ * @example
518
+ * ```ts
519
+ * retry: {
520
+ * maxAttempts: 3,
521
+ * delay: [1, "second"],
522
+ * backoff: "exponential",
523
+ * maxDelay: [30, "seconds"],
524
+ * jitter: true
525
+ * }
526
+ * ```
51
527
  */
52
528
  retry?: Omit<RetryDescriptorOptions<() => Array<Static<TItem>>>, "handler">;
53
529
  }
54
530
  declare class BatchDescriptor<TItem extends TSchema, TResponse = any> extends Descriptor<BatchDescriptorOptions<TItem, TResponse>> {
55
- protected readonly log: _alepha_core1.Logger;
531
+ protected readonly log: _alepha_logger0.Logger;
56
532
  protected readonly dateTime: DateTimeProvider;
57
533
  protected readonly partitions: Map<any, any>;
58
534
  protected activeHandlers: PromiseWithResolvers<void>[];
@@ -68,7 +544,6 @@ declare class BatchDescriptor<TItem extends TSchema, TResponse = any> extends De
68
544
  protected flushPartition(partitionKey: string): Promise<void>;
69
545
  protected readonly dispose: _alepha_core1.HookDescriptor<"stop">;
70
546
  }
71
- //# sourceMappingURL=$batch.d.ts.map
72
547
  //#endregion
73
548
  //#region src/index.d.ts
74
549
  /**
@@ -105,9 +580,7 @@ declare class BatchDescriptor<TItem extends TSchema, TResponse = any> extends De
105
580
  * @see {@link $batch}
106
581
  * @module alepha.batch
107
582
  */
108
- declare const AlephaBatch: _alepha_core0.Service<_alepha_core0.Module>;
109
- //# sourceMappingURL=index.d.ts.map
110
-
583
+ declare const AlephaBatch: _alepha_core1.Service<_alepha_core1.Module>;
111
584
  //#endregion
112
585
  export { $batch, AlephaBatch, BatchDescriptor, BatchDescriptorOptions };
113
586
  //# sourceMappingURL=index.d.ts.map